@promptbook/pdf 0.92.0-24 → 0.92.0-25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +47 -42
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +0 -2
- package/esm/typings/src/commands/_common/types/CommandParser.d.ts +36 -28
- package/esm/typings/src/constants.d.ts +2 -2
- package/esm/typings/src/errors/0-BoilerplateError.d.ts +2 -2
- package/esm/typings/src/execution/CommonToolsOptions.d.ts +3 -3
- package/esm/typings/src/llm-providers/_common/register/$llmToolsMetadataRegister.d.ts +3 -3
- package/esm/typings/src/llm-providers/_common/register/$llmToolsRegister.d.ts +3 -3
- package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsConfigurationFromEnv.d.ts +4 -4
- package/esm/typings/src/llm-providers/_common/utils/cache/CacheItem.d.ts +5 -5
- package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +5 -3
- package/esm/typings/src/llm-providers/_common/utils/count-total-usage/limitTotalUsage.d.ts +5 -5
- package/esm/typings/src/other/templates/getBookTemplates.d.ts +2 -2
- package/esm/typings/src/pipeline/PipelineInterface/PipelineInterface.d.ts +3 -3
- package/esm/typings/src/pipeline/PipelineInterface/constants.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineInterface/getPipelineInterface.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineInterface/isPipelineImplementingInterface.d.ts +5 -4
- package/esm/typings/src/pipeline/PipelineInterface/isPipelineInterfacesEqual.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineJson/CommonTaskJson.d.ts +9 -6
- package/esm/typings/src/pipeline/PipelineJson/PipelineJson.d.ts +2 -2
- package/esm/typings/src/pipeline/PipelineString.d.ts +3 -1
- package/esm/typings/src/pipeline/book-notation.d.ts +2 -2
- package/esm/typings/src/prepare/prepareTasks.d.ts +7 -4
- package/esm/typings/src/remote-server/types/RemoteServerOptions.d.ts +2 -1
- package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +3 -3
- package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/_common/Converter.d.ts +3 -1
- package/esm/typings/src/scrapers/_common/Scraper.d.ts +4 -3
- package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +4 -2
- package/esm/typings/src/scrapers/_common/register/$provideFilesystemForNode.d.ts +2 -1
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +6 -3
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +3 -5
- package/esm/typings/src/scrapers/_common/register/$scrapersRegister.d.ts +3 -2
- package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +8 -5
- package/esm/typings/src/scrapers/_common/register/ScraperConstructor.d.ts +2 -1
- package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +6 -5
- package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +3 -1
- package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +2 -1
- package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +4 -1
- package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +2 -1
- package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +3 -4
- package/esm/typings/src/scripting/javascript/postprocessing-functions.d.ts +5 -1
- package/esm/typings/src/storage/file-cache-storage/FileCacheStorage.d.ts +12 -5
- package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +4 -2
- package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +2 -1
- package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromWebStorage.d.ts +2 -1
- package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +2 -1
- package/esm/typings/src/types/ModelVariant.d.ts +5 -5
- package/esm/typings/src/types/typeAliases.d.ts +8 -6
- package/esm/typings/src/utils/editable/edit-pipeline-string/addPipelineCommand.d.ts +2 -2
- package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.d.ts +4 -1
- package/esm/typings/src/utils/editable/utils/isFlatPipeline.d.ts +2 -1
- package/esm/typings/src/utils/markdown/extractAllListItemsFromMarkdown.d.ts +1 -1
- package/esm/typings/src/utils/normalization/nameToUriPart.d.ts +4 -4
- package/esm/typings/src/utils/normalization/nameToUriParts.d.ts +4 -4
- package/esm/typings/src/utils/normalization/normalize-to-kebab-case.d.ts +3 -3
- package/esm/typings/src/utils/normalization/normalizeTo_SCREAMING_CASE.d.ts +3 -3
- package/esm/typings/src/utils/normalization/normalizeTo_camelCase.d.ts +4 -4
- package/esm/typings/src/utils/normalization/normalizeTo_snake_case.d.ts +3 -3
- package/esm/typings/src/utils/normalization/removeDiacritics.d.ts +3 -3
- package/esm/typings/src/utils/normalization/searchKeywords.d.ts +4 -1
- package/esm/typings/src/utils/normalization/titleToName.d.ts +4 -4
- package/esm/typings/src/utils/organization/empty_object.d.ts +2 -2
- package/esm/typings/src/utils/organization/just_empty_object.d.ts +4 -4
- package/esm/typings/src/version.d.ts +2 -1
- package/package.json +2 -2
- package/umd/index.umd.js +47 -42
- package/umd/index.umd.js.map +1 -1
|
@@ -6,10 +6,10 @@
|
|
|
6
6
|
*/
|
|
7
7
|
export type string_snake_case = string;
|
|
8
8
|
/**
|
|
9
|
-
*
|
|
9
|
+
* Normalizes a text string to snake_case format.
|
|
10
10
|
*
|
|
11
|
-
* @param text
|
|
12
|
-
* @returns
|
|
11
|
+
* @param text The text string to be converted to snake_case format.
|
|
12
|
+
* @returns The normalized text in snake_case format.
|
|
13
13
|
* @example 'hello_world'
|
|
14
14
|
* @example 'i_love_promptbook'
|
|
15
15
|
* @public exported from `@promptbook/utils`
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Removes diacritic marks (accents) from characters in a string.
|
|
3
3
|
*
|
|
4
|
-
* @param input
|
|
5
|
-
* @returns
|
|
4
|
+
* @param input The string containing diacritics to be normalized.
|
|
5
|
+
* @returns The string with diacritics removed or normalized.
|
|
6
6
|
* @public exported from `@promptbook/utils`
|
|
7
7
|
*/
|
|
8
8
|
export declare function removeDiacritics(input: string): string;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { Keywords } from './IKeywords';
|
|
2
2
|
/**
|
|
3
|
-
*
|
|
3
|
+
* Searches set of keywords for a specific keyword
|
|
4
4
|
*
|
|
5
5
|
* @param haystack
|
|
6
6
|
* @param needle
|
|
@@ -8,3 +8,6 @@ import type { Keywords } from './IKeywords';
|
|
|
8
8
|
* @public exported from `@promptbook/utils`
|
|
9
9
|
*/
|
|
10
10
|
export declare function searchKeywords(haystack: Keywords, needle: Keywords): boolean;
|
|
11
|
+
/**
|
|
12
|
+
* TODO: Rename to `isKeywordInKeywords`
|
|
13
|
+
*/
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Converts a title string into a normalized name.
|
|
3
3
|
*
|
|
4
|
-
* @param value
|
|
5
|
-
* @returns
|
|
6
|
-
* @example
|
|
4
|
+
* @param value The title string to be converted to a name.
|
|
5
|
+
* @returns A normalized name derived from the input title.
|
|
6
|
+
* @example 'Hello World!' -> 'hello-world'
|
|
7
7
|
* @public exported from `@promptbook/utils`
|
|
8
8
|
*/
|
|
9
9
|
export declare function titleToName(value: string): string;
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
* Organizational helper to better mark the place where to really use empty object `{}`
|
|
3
3
|
*
|
|
4
4
|
* Note: There are 2 similar types>
|
|
5
|
-
* - `empty_object`
|
|
6
|
-
* - `just_empty_object`
|
|
5
|
+
* - `empty_object` Type used for empty data objects allowing for potential future extensions
|
|
6
|
+
* - `just_empty_object` Type used when an object must remain permanently empty
|
|
7
7
|
*
|
|
8
8
|
* @private within the repository
|
|
9
9
|
*/
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Organizational helper to better
|
|
2
|
+
* Organizational helper to better distinguish different empty object use cases.
|
|
3
3
|
*
|
|
4
|
-
* Note: There are 2 similar types
|
|
5
|
-
* - `empty_object`
|
|
6
|
-
* - `just_empty_object`
|
|
4
|
+
* Note: There are 2 similar types:
|
|
5
|
+
* - `empty_object` Type used for empty data objects with potential extensions
|
|
6
|
+
* - `just_empty_object` Type used specifically for objects that must remain empty
|
|
7
7
|
*
|
|
8
8
|
* Note: In most cases, you should use `empty_object`
|
|
9
9
|
*
|
|
@@ -14,7 +14,8 @@ export declare const BOOK_LANGUAGE_VERSION: string_semantic_version;
|
|
|
14
14
|
*/
|
|
15
15
|
export declare const PROMPTBOOK_ENGINE_VERSION: string_promptbook_version;
|
|
16
16
|
/**
|
|
17
|
-
*
|
|
17
|
+
* Represents the version string of the Promptbook engine.
|
|
18
|
+
* It follows semantic versioning (e.g., `0.92.0-24`).
|
|
18
19
|
*
|
|
19
20
|
* @generated
|
|
20
21
|
*/
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@promptbook/pdf",
|
|
3
|
-
"version": "0.92.0-
|
|
3
|
+
"version": "0.92.0-25",
|
|
4
4
|
"description": "It's time for a paradigm shift. The future of software in plain English, French or Latin",
|
|
5
5
|
"private": false,
|
|
6
6
|
"sideEffects": false,
|
|
@@ -51,7 +51,7 @@
|
|
|
51
51
|
"module": "./esm/index.es.js",
|
|
52
52
|
"typings": "./esm/typings/src/_packages/pdf.index.d.ts",
|
|
53
53
|
"peerDependencies": {
|
|
54
|
-
"@promptbook/core": "0.92.0-
|
|
54
|
+
"@promptbook/core": "0.92.0-25"
|
|
55
55
|
},
|
|
56
56
|
"dependencies": {
|
|
57
57
|
"crypto": "1.0.1",
|
package/umd/index.umd.js
CHANGED
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
* @generated
|
|
26
26
|
* @see https://github.com/webgptorg/promptbook
|
|
27
27
|
*/
|
|
28
|
-
const PROMPTBOOK_ENGINE_VERSION = '0.92.0-
|
|
28
|
+
const PROMPTBOOK_ENGINE_VERSION = '0.92.0-25';
|
|
29
29
|
/**
|
|
30
30
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
31
31
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -369,7 +369,8 @@
|
|
|
369
369
|
*/
|
|
370
370
|
|
|
371
371
|
/**
|
|
372
|
-
*
|
|
372
|
+
* Converts a name to a properly formatted subfolder path for cache storage.
|
|
373
|
+
* Handles normalization and path formatting to create consistent cache directory structures.
|
|
373
374
|
*
|
|
374
375
|
* @private for `FileCacheStorage`
|
|
375
376
|
*/
|
|
@@ -622,10 +623,10 @@
|
|
|
622
623
|
*/
|
|
623
624
|
|
|
624
625
|
/**
|
|
625
|
-
*
|
|
626
|
+
* Removes diacritic marks (accents) from characters in a string.
|
|
626
627
|
*
|
|
627
|
-
* @param input
|
|
628
|
-
* @returns
|
|
628
|
+
* @param input The string containing diacritics to be normalized.
|
|
629
|
+
* @returns The string with diacritics removed or normalized.
|
|
629
630
|
* @public exported from `@promptbook/utils`
|
|
630
631
|
*/
|
|
631
632
|
function removeDiacritics(input) {
|
|
@@ -639,10 +640,10 @@
|
|
|
639
640
|
*/
|
|
640
641
|
|
|
641
642
|
/**
|
|
642
|
-
*
|
|
643
|
+
* Converts a given text to kebab-case format.
|
|
643
644
|
*
|
|
644
|
-
* @param text
|
|
645
|
-
* @returns
|
|
645
|
+
* @param text The text to be converted.
|
|
646
|
+
* @returns The kebab-case formatted string.
|
|
646
647
|
* @example 'hello-world'
|
|
647
648
|
* @example 'i-love-promptbook'
|
|
648
649
|
* @public exported from `@promptbook/utils`
|
|
@@ -784,11 +785,11 @@
|
|
|
784
785
|
}
|
|
785
786
|
|
|
786
787
|
/**
|
|
787
|
-
*
|
|
788
|
+
* Converts a title string into a normalized name.
|
|
788
789
|
*
|
|
789
|
-
* @param value
|
|
790
|
-
* @returns
|
|
791
|
-
* @example
|
|
790
|
+
* @param value The title string to be converted to a name.
|
|
791
|
+
* @returns A normalized name derived from the input title.
|
|
792
|
+
* @example 'Hello World!' -> 'hello-world'
|
|
792
793
|
* @public exported from `@promptbook/utils`
|
|
793
794
|
*/
|
|
794
795
|
function titleToName(value) {
|
|
@@ -821,9 +822,8 @@
|
|
|
821
822
|
}
|
|
822
823
|
|
|
823
824
|
/**
|
|
824
|
-
*
|
|
825
|
-
*
|
|
826
|
-
* Note: It also checks if directory exists and creates it if not
|
|
825
|
+
* Retrieves an intermediate source for a scraper based on the knowledge source.
|
|
826
|
+
* Manages the caching and retrieval of intermediate scraper results for optimized performance.
|
|
827
827
|
*
|
|
828
828
|
* @private as internal utility for scrapers
|
|
829
829
|
*/
|
|
@@ -1457,13 +1457,13 @@
|
|
|
1457
1457
|
*/
|
|
1458
1458
|
const REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
|
|
1459
1459
|
/**
|
|
1460
|
-
*
|
|
1460
|
+
* Placeholder value indicating a parameter is missing its value.
|
|
1461
1461
|
*
|
|
1462
1462
|
* @private within the repository
|
|
1463
1463
|
*/
|
|
1464
1464
|
const RESERVED_PARAMETER_MISSING_VALUE = 'MISSING-' + REPLACING_NONCE;
|
|
1465
1465
|
/**
|
|
1466
|
-
*
|
|
1466
|
+
* Placeholder value indicating a parameter is restricted and cannot be used directly.
|
|
1467
1467
|
*
|
|
1468
1468
|
* @private within the repository
|
|
1469
1469
|
*/
|
|
@@ -2126,8 +2126,8 @@
|
|
|
2126
2126
|
* @public exported from `@promptbook/core`
|
|
2127
2127
|
*/
|
|
2128
2128
|
function isPipelinePrepared(pipeline) {
|
|
2129
|
-
// Note: Ignoring `pipeline.preparations`
|
|
2130
|
-
// Note: Ignoring `pipeline.knowledgePieces`
|
|
2129
|
+
// Note: Ignoring `pipeline.preparations`
|
|
2130
|
+
// Note: Ignoring `pipeline.knowledgePieces`
|
|
2131
2131
|
if (pipeline.title === undefined || pipeline.title === '' || pipeline.title === DEFAULT_BOOK_TITLE) {
|
|
2132
2132
|
// TODO: !!! Comment this out
|
|
2133
2133
|
console.log('Pipeline is not prepared because title is undefined or empty', pipeline);
|
|
@@ -3111,10 +3111,10 @@
|
|
|
3111
3111
|
}
|
|
3112
3112
|
|
|
3113
3113
|
/**
|
|
3114
|
-
*
|
|
3114
|
+
* Normalizes a text string to SCREAMING_CASE (all uppercase with underscores).
|
|
3115
3115
|
*
|
|
3116
|
-
* @param text
|
|
3117
|
-
* @returns
|
|
3116
|
+
* @param text The text string to be converted to SCREAMING_CASE format.
|
|
3117
|
+
* @returns The normalized text in SCREAMING_CASE format.
|
|
3118
3118
|
* @example 'HELLO_WORLD'
|
|
3119
3119
|
* @example 'I_LOVE_PROMPTBOOK'
|
|
3120
3120
|
* @public exported from `@promptbook/utils`
|
|
@@ -3166,10 +3166,10 @@
|
|
|
3166
3166
|
*/
|
|
3167
3167
|
|
|
3168
3168
|
/**
|
|
3169
|
-
*
|
|
3169
|
+
* Normalizes a text string to snake_case format.
|
|
3170
3170
|
*
|
|
3171
|
-
* @param text
|
|
3172
|
-
* @returns
|
|
3171
|
+
* @param text The text string to be converted to snake_case format.
|
|
3172
|
+
* @returns The normalized text in snake_case format.
|
|
3173
3173
|
* @example 'hello_world'
|
|
3174
3174
|
* @example 'i_love_promptbook'
|
|
3175
3175
|
* @public exported from `@promptbook/utils`
|
|
@@ -3239,10 +3239,11 @@
|
|
|
3239
3239
|
*/
|
|
3240
3240
|
|
|
3241
3241
|
/**
|
|
3242
|
-
*
|
|
3242
|
+
* Registry for all available scrapers in the system.
|
|
3243
|
+
* Central point for registering and accessing different types of content scrapers.
|
|
3243
3244
|
*
|
|
3244
3245
|
* Note: `$` is used to indicate that this interacts with the global scope
|
|
3245
|
-
* @singleton Only one instance of each register is created per build, but
|
|
3246
|
+
* @singleton Only one instance of each register is created per build, but there can be more than one in different build modules
|
|
3246
3247
|
* @public exported from `@promptbook/core`
|
|
3247
3248
|
*/
|
|
3248
3249
|
const $scrapersRegister = new $Register('scraper_constructors');
|
|
@@ -3420,7 +3421,9 @@
|
|
|
3420
3421
|
*/
|
|
3421
3422
|
|
|
3422
3423
|
/**
|
|
3423
|
-
*
|
|
3424
|
+
* Factory function that creates a handler for processing knowledge sources.
|
|
3425
|
+
* Provides standardized processing of different types of knowledge sources
|
|
3426
|
+
* across various scraper implementations.
|
|
3424
3427
|
*
|
|
3425
3428
|
* @public exported from `@promptbook/core`
|
|
3426
3429
|
*/
|
|
@@ -3661,9 +3664,12 @@
|
|
|
3661
3664
|
*/
|
|
3662
3665
|
|
|
3663
3666
|
/**
|
|
3664
|
-
*
|
|
3667
|
+
* Prepares tasks by adding knowledge to the prompt and ensuring all necessary parameters are included.
|
|
3665
3668
|
*
|
|
3666
|
-
* @
|
|
3669
|
+
* @param tasks Sequence of tasks that are chained together to form a pipeline
|
|
3670
|
+
* @returns A promise that resolves to the prepared tasks.
|
|
3671
|
+
*
|
|
3672
|
+
* @private internal utility of `preparePipeline`
|
|
3667
3673
|
*/
|
|
3668
3674
|
async function prepareTasks(pipeline, tools, options) {
|
|
3669
3675
|
const { maxParallelCount = DEFAULT_MAX_PARALLEL_COUNT } = options;
|
|
@@ -4426,7 +4432,7 @@
|
|
|
4426
4432
|
else if (!availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) ;
|
|
4427
4433
|
}
|
|
4428
4434
|
if (expectedParameterNames.size === 0) {
|
|
4429
|
-
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent
|
|
4435
|
+
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
|
|
4430
4436
|
Object.freeze(mappedParameters);
|
|
4431
4437
|
return mappedParameters;
|
|
4432
4438
|
}
|
|
@@ -4457,7 +4463,7 @@
|
|
|
4457
4463
|
for (let i = 0; i < expectedParameterNames.size; i++) {
|
|
4458
4464
|
mappedParameters[expectedParameterNamesArray[i]] = availableParameters[availableParametersNamesArray[i]];
|
|
4459
4465
|
}
|
|
4460
|
-
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent
|
|
4466
|
+
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
|
|
4461
4467
|
Object.freeze(mappedParameters);
|
|
4462
4468
|
return mappedParameters;
|
|
4463
4469
|
}
|
|
@@ -5210,11 +5216,10 @@
|
|
|
5210
5216
|
const highLevelError = new PipelineExecutionError(spaceTrim__default["default"]((block) => `
|
|
5211
5217
|
${error.message}
|
|
5212
5218
|
|
|
5213
|
-
This is error in FOREACH command when mapping data
|
|
5219
|
+
This is error in FOREACH command when mapping ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
|
|
5214
5220
|
You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
|
|
5215
5221
|
|
|
5216
5222
|
${block(pipelineIdentification)}
|
|
5217
|
-
Subparameter index: ${index}
|
|
5218
5223
|
`));
|
|
5219
5224
|
if (length > BIG_DATASET_TRESHOLD) {
|
|
5220
5225
|
console.error(highLevelError);
|
|
@@ -5242,12 +5247,11 @@
|
|
|
5242
5247
|
catch (error) {
|
|
5243
5248
|
if (length > BIG_DATASET_TRESHOLD) {
|
|
5244
5249
|
console.error(spaceTrim__default["default"]((block) => `
|
|
5245
|
-
|
|
5250
|
+
${error.message}
|
|
5246
5251
|
|
|
5247
|
-
${
|
|
5252
|
+
This is error in FOREACH command when processing ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
|
|
5248
5253
|
|
|
5249
5254
|
${block(pipelineIdentification)}
|
|
5250
|
-
Subparameter index: ${index}
|
|
5251
5255
|
`));
|
|
5252
5256
|
return FAILED_VALUE_PLACEHOLDER;
|
|
5253
5257
|
}
|
|
@@ -6165,8 +6169,8 @@
|
|
|
6165
6169
|
extension: 'md',
|
|
6166
6170
|
isVerbose,
|
|
6167
6171
|
});
|
|
6168
|
-
// TODO:
|
|
6169
|
-
// Note: Running
|
|
6172
|
+
// TODO: Determine if Markitdown conversion should run only if the cache file doesn't exist, or always.
|
|
6173
|
+
// Note: Running Markitdown conversion ONLY if the file in the cache does not exist
|
|
6170
6174
|
if (!(await isFileExisting(cacheFilehandler.filename, this.tools.fs))) {
|
|
6171
6175
|
const src = source.filename || source.url || null;
|
|
6172
6176
|
// console.log('!!', { src, source, cacheFilehandler });
|
|
@@ -6188,11 +6192,11 @@
|
|
|
6188
6192
|
return cacheFilehandler;
|
|
6189
6193
|
}
|
|
6190
6194
|
/**
|
|
6191
|
-
* Scrapes the
|
|
6195
|
+
* Scrapes the source document (PDF, DOCX, etc.) and returns the knowledge pieces or `null` if it can't scrape it.
|
|
6192
6196
|
*/
|
|
6193
6197
|
async scrape(source) {
|
|
6194
6198
|
const cacheFilehandler = await this.$convert(source);
|
|
6195
|
-
// TODO:
|
|
6199
|
+
// TODO: Ensure this correctly creates the source object for the internal MarkdownScraper using the converted file.
|
|
6196
6200
|
const markdownSource = {
|
|
6197
6201
|
source: source.source,
|
|
6198
6202
|
filename: cacheFilehandler.filename,
|
|
@@ -6336,7 +6340,8 @@
|
|
|
6336
6340
|
*/
|
|
6337
6341
|
|
|
6338
6342
|
/**
|
|
6339
|
-
*
|
|
6343
|
+
* Factory function to create an instance of PdfScraper.
|
|
6344
|
+
* It bundles the scraper class with its metadata.
|
|
6340
6345
|
*
|
|
6341
6346
|
* @public exported from `@promptbook/pdf`
|
|
6342
6347
|
*/
|