@promptbook/pdf 0.92.0-23 → 0.92.0-25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +71 -47
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +0 -2
- package/esm/typings/src/collection/PipelineCollection.d.ts +0 -2
- package/esm/typings/src/collection/SimplePipelineCollection.d.ts +1 -1
- package/esm/typings/src/commands/FORMFACTOR/formfactorCommandParser.d.ts +1 -1
- package/esm/typings/src/commands/_common/types/CommandParser.d.ts +36 -28
- package/esm/typings/src/config.d.ts +8 -4
- package/esm/typings/src/constants.d.ts +2 -2
- package/esm/typings/src/errors/0-BoilerplateError.d.ts +2 -2
- package/esm/typings/src/execution/CommonToolsOptions.d.ts +3 -3
- package/esm/typings/src/formats/_common/FormatSubvalueParser.d.ts +10 -10
- package/esm/typings/src/formfactors/_boilerplate/BoilerplateFormfactorDefinition.d.ts +3 -2
- package/esm/typings/src/formfactors/_common/string_formfactor_name.d.ts +2 -1
- package/esm/typings/src/formfactors/index.d.ts +1 -1
- package/esm/typings/src/formfactors/sheets/SheetsFormfactorDefinition.d.ts +3 -2
- package/esm/typings/src/llm-providers/_common/register/$llmToolsMetadataRegister.d.ts +3 -3
- package/esm/typings/src/llm-providers/_common/register/$llmToolsRegister.d.ts +3 -3
- package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsConfigurationFromEnv.d.ts +4 -4
- package/esm/typings/src/llm-providers/_common/utils/cache/CacheItem.d.ts +5 -5
- package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +5 -3
- package/esm/typings/src/llm-providers/_common/utils/cache/cacheLlmTools.d.ts +3 -3
- package/esm/typings/src/llm-providers/_common/utils/count-total-usage/limitTotalUsage.d.ts +5 -5
- package/esm/typings/src/other/templates/getBookTemplates.d.ts +2 -2
- package/esm/typings/src/pipeline/PipelineInterface/PipelineInterface.d.ts +3 -3
- package/esm/typings/src/pipeline/PipelineInterface/constants.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineInterface/getPipelineInterface.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineInterface/isPipelineImplementingInterface.d.ts +5 -4
- package/esm/typings/src/pipeline/PipelineInterface/isPipelineInterfacesEqual.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineJson/CommonTaskJson.d.ts +9 -6
- package/esm/typings/src/pipeline/PipelineJson/PipelineJson.d.ts +2 -2
- package/esm/typings/src/pipeline/PipelineString.d.ts +3 -1
- package/esm/typings/src/pipeline/book-notation.d.ts +2 -2
- package/esm/typings/src/prepare/prepareTasks.d.ts +7 -4
- package/esm/typings/src/remote-server/types/RemoteServerOptions.d.ts +2 -1
- package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +3 -3
- package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/_common/Converter.d.ts +3 -1
- package/esm/typings/src/scrapers/_common/Scraper.d.ts +4 -3
- package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +4 -2
- package/esm/typings/src/scrapers/_common/register/$provideFilesystemForNode.d.ts +2 -1
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +6 -3
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +3 -5
- package/esm/typings/src/scrapers/_common/register/$scrapersRegister.d.ts +3 -2
- package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +8 -5
- package/esm/typings/src/scrapers/_common/register/ScraperConstructor.d.ts +2 -1
- package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +6 -5
- package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +3 -1
- package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +2 -1
- package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +4 -1
- package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +2 -1
- package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +3 -4
- package/esm/typings/src/scripting/javascript/postprocessing-functions.d.ts +5 -1
- package/esm/typings/src/storage/file-cache-storage/FileCacheStorage.d.ts +12 -5
- package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +4 -2
- package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +2 -1
- package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromWebStorage.d.ts +2 -1
- package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +2 -1
- package/esm/typings/src/types/ModelVariant.d.ts +5 -5
- package/esm/typings/src/types/typeAliases.d.ts +8 -6
- package/esm/typings/src/utils/editable/edit-pipeline-string/addPipelineCommand.d.ts +2 -2
- package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.d.ts +4 -1
- package/esm/typings/src/utils/editable/utils/isFlatPipeline.d.ts +2 -1
- package/esm/typings/src/utils/environment/$getGlobalScope.d.ts +2 -1
- package/esm/typings/src/utils/markdown/extractAllListItemsFromMarkdown.d.ts +1 -1
- package/esm/typings/src/utils/normalization/nameToUriPart.d.ts +4 -4
- package/esm/typings/src/utils/normalization/nameToUriParts.d.ts +4 -4
- package/esm/typings/src/utils/normalization/normalize-to-kebab-case.d.ts +3 -3
- package/esm/typings/src/utils/normalization/normalizeTo_SCREAMING_CASE.d.ts +3 -3
- package/esm/typings/src/utils/normalization/normalizeTo_camelCase.d.ts +4 -4
- package/esm/typings/src/utils/normalization/normalizeTo_snake_case.d.ts +3 -3
- package/esm/typings/src/utils/normalization/removeDiacritics.d.ts +3 -3
- package/esm/typings/src/utils/normalization/searchKeywords.d.ts +4 -1
- package/esm/typings/src/utils/normalization/titleToName.d.ts +4 -4
- package/esm/typings/src/utils/organization/empty_object.d.ts +2 -2
- package/esm/typings/src/utils/organization/just_empty_object.d.ts +4 -4
- package/esm/typings/src/version.d.ts +2 -1
- package/package.json +2 -2
- package/umd/index.umd.js +71 -47
- package/umd/index.umd.js.map +1 -1
package/esm/index.es.js
CHANGED
|
@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
26
26
|
* @generated
|
|
27
27
|
* @see https://github.com/webgptorg/promptbook
|
|
28
28
|
*/
|
|
29
|
-
const PROMPTBOOK_ENGINE_VERSION = '0.92.0-
|
|
29
|
+
const PROMPTBOOK_ENGINE_VERSION = '0.92.0-25';
|
|
30
30
|
/**
|
|
31
31
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
32
32
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -103,11 +103,20 @@ const DEFAULT_BOOK_TITLE = `✨ Untitled Book`;
|
|
|
103
103
|
*/
|
|
104
104
|
const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
|
|
105
105
|
/**
|
|
106
|
-
*
|
|
106
|
+
* Threshold value that determines when a dataset is considered "big"
|
|
107
|
+
* and may require special handling or optimizations
|
|
108
|
+
*
|
|
109
|
+
* For example, when error occurs in one item of the big dataset, it will not fail the whole pipeline
|
|
107
110
|
*
|
|
108
111
|
* @public exported from `@promptbook/core`
|
|
109
112
|
*/
|
|
110
113
|
const BIG_DATASET_TRESHOLD = 50;
|
|
114
|
+
/**
|
|
115
|
+
* Placeholder text used to represent a placeholder value of failed operation
|
|
116
|
+
*
|
|
117
|
+
* @public exported from `@promptbook/core`
|
|
118
|
+
*/
|
|
119
|
+
const FAILED_VALUE_PLACEHOLDER = '!?';
|
|
111
120
|
// <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
|
|
112
121
|
/**
|
|
113
122
|
* The maximum number of iterations for a loops
|
|
@@ -361,7 +370,8 @@ async function isFileExisting(filename, fs) {
|
|
|
361
370
|
*/
|
|
362
371
|
|
|
363
372
|
/**
|
|
364
|
-
*
|
|
373
|
+
* Converts a name to a properly formatted subfolder path for cache storage.
|
|
374
|
+
* Handles normalization and path formatting to create consistent cache directory structures.
|
|
365
375
|
*
|
|
366
376
|
* @private for `FileCacheStorage`
|
|
367
377
|
*/
|
|
@@ -614,10 +624,10 @@ for (let i = 0; i < defaultDiacriticsRemovalMap.length; i++) {
|
|
|
614
624
|
*/
|
|
615
625
|
|
|
616
626
|
/**
|
|
617
|
-
*
|
|
627
|
+
* Removes diacritic marks (accents) from characters in a string.
|
|
618
628
|
*
|
|
619
|
-
* @param input
|
|
620
|
-
* @returns
|
|
629
|
+
* @param input The string containing diacritics to be normalized.
|
|
630
|
+
* @returns The string with diacritics removed or normalized.
|
|
621
631
|
* @public exported from `@promptbook/utils`
|
|
622
632
|
*/
|
|
623
633
|
function removeDiacritics(input) {
|
|
@@ -631,10 +641,10 @@ function removeDiacritics(input) {
|
|
|
631
641
|
*/
|
|
632
642
|
|
|
633
643
|
/**
|
|
634
|
-
*
|
|
644
|
+
* Converts a given text to kebab-case format.
|
|
635
645
|
*
|
|
636
|
-
* @param text
|
|
637
|
-
* @returns
|
|
646
|
+
* @param text The text to be converted.
|
|
647
|
+
* @returns The kebab-case formatted string.
|
|
638
648
|
* @example 'hello-world'
|
|
639
649
|
* @example 'i-love-promptbook'
|
|
640
650
|
* @public exported from `@promptbook/utils`
|
|
@@ -776,11 +786,11 @@ function isValidUrl(url) {
|
|
|
776
786
|
}
|
|
777
787
|
|
|
778
788
|
/**
|
|
779
|
-
*
|
|
789
|
+
* Converts a title string into a normalized name.
|
|
780
790
|
*
|
|
781
|
-
* @param value
|
|
782
|
-
* @returns
|
|
783
|
-
* @example
|
|
791
|
+
* @param value The title string to be converted to a name.
|
|
792
|
+
* @returns A normalized name derived from the input title.
|
|
793
|
+
* @example 'Hello World!' -> 'hello-world'
|
|
784
794
|
* @public exported from `@promptbook/utils`
|
|
785
795
|
*/
|
|
786
796
|
function titleToName(value) {
|
|
@@ -813,9 +823,8 @@ function TODO_USE(...value) {
|
|
|
813
823
|
}
|
|
814
824
|
|
|
815
825
|
/**
|
|
816
|
-
*
|
|
817
|
-
*
|
|
818
|
-
* Note: It also checks if directory exists and creates it if not
|
|
826
|
+
* Retrieves an intermediate source for a scraper based on the knowledge source.
|
|
827
|
+
* Manages the caching and retrieval of intermediate scraper results for optimized performance.
|
|
819
828
|
*
|
|
820
829
|
* @private as internal utility for scrapers
|
|
821
830
|
*/
|
|
@@ -1449,13 +1458,13 @@ const ORDER_OF_PIPELINE_JSON = [
|
|
|
1449
1458
|
*/
|
|
1450
1459
|
const REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
|
|
1451
1460
|
/**
|
|
1452
|
-
*
|
|
1461
|
+
* Placeholder value indicating a parameter is missing its value.
|
|
1453
1462
|
*
|
|
1454
1463
|
* @private within the repository
|
|
1455
1464
|
*/
|
|
1456
1465
|
const RESERVED_PARAMETER_MISSING_VALUE = 'MISSING-' + REPLACING_NONCE;
|
|
1457
1466
|
/**
|
|
1458
|
-
*
|
|
1467
|
+
* Placeholder value indicating a parameter is restricted and cannot be used directly.
|
|
1459
1468
|
*
|
|
1460
1469
|
* @private within the repository
|
|
1461
1470
|
*/
|
|
@@ -1954,7 +1963,7 @@ class SimplePipelineCollection {
|
|
|
1954
1963
|
/**
|
|
1955
1964
|
* Constructs a pipeline collection from pipelines
|
|
1956
1965
|
*
|
|
1957
|
-
* @param pipelines
|
|
1966
|
+
* @param pipelines Array of pipeline JSON objects to include in the collection
|
|
1958
1967
|
*
|
|
1959
1968
|
* Note: During the construction logic of all pipelines are validated
|
|
1960
1969
|
* Note: It is not recommended to use this constructor directly, use `createCollectionFromJson` *(or other variant)* instead
|
|
@@ -2118,8 +2127,8 @@ class PipelineExecutionError extends Error {
|
|
|
2118
2127
|
* @public exported from `@promptbook/core`
|
|
2119
2128
|
*/
|
|
2120
2129
|
function isPipelinePrepared(pipeline) {
|
|
2121
|
-
// Note: Ignoring `pipeline.preparations`
|
|
2122
|
-
// Note: Ignoring `pipeline.knowledgePieces`
|
|
2130
|
+
// Note: Ignoring `pipeline.preparations`
|
|
2131
|
+
// Note: Ignoring `pipeline.knowledgePieces`
|
|
2123
2132
|
if (pipeline.title === undefined || pipeline.title === '' || pipeline.title === DEFAULT_BOOK_TITLE) {
|
|
2124
2133
|
// TODO: !!! Comment this out
|
|
2125
2134
|
console.log('Pipeline is not prepared because title is undefined or empty', pipeline);
|
|
@@ -3091,7 +3100,8 @@ async function preparePersona(personaDescription, tools, options) {
|
|
|
3091
3100
|
*/
|
|
3092
3101
|
|
|
3093
3102
|
/**
|
|
3094
|
-
*
|
|
3103
|
+
* Safely retrieves the global scope object (window in browser, global in Node.js)
|
|
3104
|
+
* regardless of the JavaScript environment in which the code is running
|
|
3095
3105
|
*
|
|
3096
3106
|
* Note: `$` is used to indicate that this function is not a pure function - it access global scope
|
|
3097
3107
|
*
|
|
@@ -3102,10 +3112,10 @@ function $getGlobalScope() {
|
|
|
3102
3112
|
}
|
|
3103
3113
|
|
|
3104
3114
|
/**
|
|
3105
|
-
*
|
|
3115
|
+
* Normalizes a text string to SCREAMING_CASE (all uppercase with underscores).
|
|
3106
3116
|
*
|
|
3107
|
-
* @param text
|
|
3108
|
-
* @returns
|
|
3117
|
+
* @param text The text string to be converted to SCREAMING_CASE format.
|
|
3118
|
+
* @returns The normalized text in SCREAMING_CASE format.
|
|
3109
3119
|
* @example 'HELLO_WORLD'
|
|
3110
3120
|
* @example 'I_LOVE_PROMPTBOOK'
|
|
3111
3121
|
* @public exported from `@promptbook/utils`
|
|
@@ -3157,10 +3167,10 @@ function normalizeTo_SCREAMING_CASE(text) {
|
|
|
3157
3167
|
*/
|
|
3158
3168
|
|
|
3159
3169
|
/**
|
|
3160
|
-
*
|
|
3170
|
+
* Normalizes a text string to snake_case format.
|
|
3161
3171
|
*
|
|
3162
|
-
* @param text
|
|
3163
|
-
* @returns
|
|
3172
|
+
* @param text The text string to be converted to snake_case format.
|
|
3173
|
+
* @returns The normalized text in snake_case format.
|
|
3164
3174
|
* @example 'hello_world'
|
|
3165
3175
|
* @example 'i_love_promptbook'
|
|
3166
3176
|
* @public exported from `@promptbook/utils`
|
|
@@ -3230,10 +3240,11 @@ const $scrapersMetadataRegister = new $Register('scrapers_metadata');
|
|
|
3230
3240
|
*/
|
|
3231
3241
|
|
|
3232
3242
|
/**
|
|
3233
|
-
*
|
|
3243
|
+
* Registry for all available scrapers in the system.
|
|
3244
|
+
* Central point for registering and accessing different types of content scrapers.
|
|
3234
3245
|
*
|
|
3235
3246
|
* Note: `$` is used to indicate that this interacts with the global scope
|
|
3236
|
-
* @singleton Only one instance of each register is created per build, but
|
|
3247
|
+
* @singleton Only one instance of each register is created per build, but there can be more than one in different build modules
|
|
3237
3248
|
* @public exported from `@promptbook/core`
|
|
3238
3249
|
*/
|
|
3239
3250
|
const $scrapersRegister = new $Register('scraper_constructors');
|
|
@@ -3411,7 +3422,9 @@ const promptbookFetch = async (urlOrRequest, init) => {
|
|
|
3411
3422
|
*/
|
|
3412
3423
|
|
|
3413
3424
|
/**
|
|
3414
|
-
*
|
|
3425
|
+
* Factory function that creates a handler for processing knowledge sources.
|
|
3426
|
+
* Provides standardized processing of different types of knowledge sources
|
|
3427
|
+
* across various scraper implementations.
|
|
3415
3428
|
*
|
|
3416
3429
|
* @public exported from `@promptbook/core`
|
|
3417
3430
|
*/
|
|
@@ -3652,9 +3665,12 @@ TODO: [🧊] This is how it can look in future
|
|
|
3652
3665
|
*/
|
|
3653
3666
|
|
|
3654
3667
|
/**
|
|
3655
|
-
*
|
|
3668
|
+
* Prepares tasks by adding knowledge to the prompt and ensuring all necessary parameters are included.
|
|
3656
3669
|
*
|
|
3657
|
-
* @
|
|
3670
|
+
* @param tasks Sequence of tasks that are chained together to form a pipeline
|
|
3671
|
+
* @returns A promise that resolves to the prepared tasks.
|
|
3672
|
+
*
|
|
3673
|
+
* @private internal utility of `preparePipeline`
|
|
3658
3674
|
*/
|
|
3659
3675
|
async function prepareTasks(pipeline, tools, options) {
|
|
3660
3676
|
const { maxParallelCount = DEFAULT_MAX_PARALLEL_COUNT } = options;
|
|
@@ -4192,6 +4208,15 @@ const CsvFormatParser = {
|
|
|
4192
4208
|
mappedData.push(mappedRow);
|
|
4193
4209
|
if (onProgress) {
|
|
4194
4210
|
// Note: Report the CSV with all rows mapped so far
|
|
4211
|
+
/*
|
|
4212
|
+
!!!!
|
|
4213
|
+
// Report progress with updated value
|
|
4214
|
+
const progressData = mappedData.map((row, i) =>
|
|
4215
|
+
i > index ? { ...row, [outputParameterName]: PENDING_VALUE_PLACEHOLDER } : row,
|
|
4216
|
+
);
|
|
4217
|
+
|
|
4218
|
+
|
|
4219
|
+
*/
|
|
4195
4220
|
await onProgress(unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
|
|
4196
4221
|
}
|
|
4197
4222
|
}
|
|
@@ -4408,7 +4433,7 @@ function mapAvailableToExpectedParameters(options) {
|
|
|
4408
4433
|
else if (!availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) ;
|
|
4409
4434
|
}
|
|
4410
4435
|
if (expectedParameterNames.size === 0) {
|
|
4411
|
-
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent
|
|
4436
|
+
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
|
|
4412
4437
|
Object.freeze(mappedParameters);
|
|
4413
4438
|
return mappedParameters;
|
|
4414
4439
|
}
|
|
@@ -4439,7 +4464,7 @@ function mapAvailableToExpectedParameters(options) {
|
|
|
4439
4464
|
for (let i = 0; i < expectedParameterNames.size; i++) {
|
|
4440
4465
|
mappedParameters[expectedParameterNamesArray[i]] = availableParameters[availableParametersNamesArray[i]];
|
|
4441
4466
|
}
|
|
4442
|
-
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent
|
|
4467
|
+
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
|
|
4443
4468
|
Object.freeze(mappedParameters);
|
|
4444
4469
|
return mappedParameters;
|
|
4445
4470
|
}
|
|
@@ -5192,15 +5217,14 @@ async function executeFormatSubvalues(options) {
|
|
|
5192
5217
|
const highLevelError = new PipelineExecutionError(spaceTrim((block) => `
|
|
5193
5218
|
${error.message}
|
|
5194
5219
|
|
|
5195
|
-
This is error in FOREACH command when mapping data
|
|
5220
|
+
This is error in FOREACH command when mapping ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
|
|
5196
5221
|
You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
|
|
5197
5222
|
|
|
5198
5223
|
${block(pipelineIdentification)}
|
|
5199
|
-
Subparameter index: ${index}
|
|
5200
5224
|
`));
|
|
5201
5225
|
if (length > BIG_DATASET_TRESHOLD) {
|
|
5202
5226
|
console.error(highLevelError);
|
|
5203
|
-
return
|
|
5227
|
+
return FAILED_VALUE_PLACEHOLDER;
|
|
5204
5228
|
}
|
|
5205
5229
|
throw highLevelError;
|
|
5206
5230
|
}
|
|
@@ -5224,14 +5248,13 @@ async function executeFormatSubvalues(options) {
|
|
|
5224
5248
|
catch (error) {
|
|
5225
5249
|
if (length > BIG_DATASET_TRESHOLD) {
|
|
5226
5250
|
console.error(spaceTrim((block) => `
|
|
5227
|
-
|
|
5251
|
+
${error.message}
|
|
5228
5252
|
|
|
5229
|
-
${
|
|
5253
|
+
This is error in FOREACH command when processing ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
|
|
5230
5254
|
|
|
5231
5255
|
${block(pipelineIdentification)}
|
|
5232
|
-
Subparameter index: ${index}
|
|
5233
5256
|
`));
|
|
5234
|
-
return
|
|
5257
|
+
return FAILED_VALUE_PLACEHOLDER;
|
|
5235
5258
|
}
|
|
5236
5259
|
throw error;
|
|
5237
5260
|
}
|
|
@@ -6147,8 +6170,8 @@ class MarkitdownScraper {
|
|
|
6147
6170
|
extension: 'md',
|
|
6148
6171
|
isVerbose,
|
|
6149
6172
|
});
|
|
6150
|
-
// TODO:
|
|
6151
|
-
// Note: Running
|
|
6173
|
+
// TODO: Determine if Markitdown conversion should run only if the cache file doesn't exist, or always.
|
|
6174
|
+
// Note: Running Markitdown conversion ONLY if the file in the cache does not exist
|
|
6152
6175
|
if (!(await isFileExisting(cacheFilehandler.filename, this.tools.fs))) {
|
|
6153
6176
|
const src = source.filename || source.url || null;
|
|
6154
6177
|
// console.log('!!', { src, source, cacheFilehandler });
|
|
@@ -6170,11 +6193,11 @@ class MarkitdownScraper {
|
|
|
6170
6193
|
return cacheFilehandler;
|
|
6171
6194
|
}
|
|
6172
6195
|
/**
|
|
6173
|
-
* Scrapes the
|
|
6196
|
+
* Scrapes the source document (PDF, DOCX, etc.) and returns the knowledge pieces or `null` if it can't scrape it.
|
|
6174
6197
|
*/
|
|
6175
6198
|
async scrape(source) {
|
|
6176
6199
|
const cacheFilehandler = await this.$convert(source);
|
|
6177
|
-
// TODO:
|
|
6200
|
+
// TODO: Ensure this correctly creates the source object for the internal MarkdownScraper using the converted file.
|
|
6178
6201
|
const markdownSource = {
|
|
6179
6202
|
source: source.source,
|
|
6180
6203
|
filename: cacheFilehandler.filename,
|
|
@@ -6318,7 +6341,8 @@ class PdfScraper {
|
|
|
6318
6341
|
*/
|
|
6319
6342
|
|
|
6320
6343
|
/**
|
|
6321
|
-
*
|
|
6344
|
+
* Factory function to create an instance of PdfScraper.
|
|
6345
|
+
* It bundles the scraper class with its metadata.
|
|
6322
6346
|
*
|
|
6323
6347
|
* @public exported from `@promptbook/pdf`
|
|
6324
6348
|
*/
|