@promptbook/markitdown 0.92.0-24 → 0.92.0-26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +45 -41
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +0 -2
- package/esm/typings/src/commands/_common/types/CommandParser.d.ts +36 -28
- package/esm/typings/src/constants.d.ts +8 -2
- package/esm/typings/src/errors/0-BoilerplateError.d.ts +2 -2
- package/esm/typings/src/execution/CommonToolsOptions.d.ts +3 -3
- package/esm/typings/src/llm-providers/_common/register/$llmToolsMetadataRegister.d.ts +3 -3
- package/esm/typings/src/llm-providers/_common/register/$llmToolsRegister.d.ts +3 -3
- package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsConfigurationFromEnv.d.ts +4 -4
- package/esm/typings/src/llm-providers/_common/utils/cache/CacheItem.d.ts +5 -5
- package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +5 -3
- package/esm/typings/src/llm-providers/_common/utils/count-total-usage/limitTotalUsage.d.ts +5 -5
- package/esm/typings/src/other/templates/getBookTemplates.d.ts +2 -2
- package/esm/typings/src/pipeline/PipelineInterface/PipelineInterface.d.ts +3 -3
- package/esm/typings/src/pipeline/PipelineInterface/constants.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineInterface/getPipelineInterface.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineInterface/isPipelineImplementingInterface.d.ts +5 -4
- package/esm/typings/src/pipeline/PipelineInterface/isPipelineInterfacesEqual.d.ts +1 -1
- package/esm/typings/src/pipeline/PipelineJson/CommonTaskJson.d.ts +9 -6
- package/esm/typings/src/pipeline/PipelineJson/PipelineJson.d.ts +2 -2
- package/esm/typings/src/pipeline/PipelineString.d.ts +3 -1
- package/esm/typings/src/pipeline/book-notation.d.ts +2 -2
- package/esm/typings/src/prepare/prepareTasks.d.ts +7 -4
- package/esm/typings/src/remote-server/types/RemoteServerOptions.d.ts +2 -1
- package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +3 -3
- package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/_common/Converter.d.ts +3 -1
- package/esm/typings/src/scrapers/_common/Scraper.d.ts +4 -3
- package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +4 -2
- package/esm/typings/src/scrapers/_common/register/$provideFilesystemForNode.d.ts +2 -1
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +6 -3
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +3 -5
- package/esm/typings/src/scrapers/_common/register/$scrapersRegister.d.ts +3 -2
- package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +8 -5
- package/esm/typings/src/scrapers/_common/register/ScraperConstructor.d.ts +2 -1
- package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +6 -5
- package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +3 -1
- package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +2 -1
- package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +4 -1
- package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +2 -1
- package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +3 -4
- package/esm/typings/src/scripting/javascript/postprocessing-functions.d.ts +5 -1
- package/esm/typings/src/storage/file-cache-storage/FileCacheStorage.d.ts +12 -5
- package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +4 -2
- package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +2 -1
- package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromWebStorage.d.ts +2 -1
- package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +2 -1
- package/esm/typings/src/types/ModelVariant.d.ts +5 -5
- package/esm/typings/src/types/typeAliases.d.ts +8 -6
- package/esm/typings/src/utils/editable/edit-pipeline-string/addPipelineCommand.d.ts +2 -2
- package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.d.ts +4 -1
- package/esm/typings/src/utils/editable/utils/isFlatPipeline.d.ts +2 -1
- package/esm/typings/src/utils/markdown/extractAllListItemsFromMarkdown.d.ts +1 -1
- package/esm/typings/src/utils/normalization/nameToUriPart.d.ts +4 -4
- package/esm/typings/src/utils/normalization/nameToUriParts.d.ts +4 -4
- package/esm/typings/src/utils/normalization/normalize-to-kebab-case.d.ts +3 -3
- package/esm/typings/src/utils/normalization/normalizeTo_SCREAMING_CASE.d.ts +3 -3
- package/esm/typings/src/utils/normalization/normalizeTo_camelCase.d.ts +4 -4
- package/esm/typings/src/utils/normalization/normalizeTo_snake_case.d.ts +3 -3
- package/esm/typings/src/utils/normalization/removeDiacritics.d.ts +3 -3
- package/esm/typings/src/utils/normalization/searchKeywords.d.ts +4 -1
- package/esm/typings/src/utils/normalization/titleToName.d.ts +4 -4
- package/esm/typings/src/utils/organization/empty_object.d.ts +2 -2
- package/esm/typings/src/utils/organization/just_empty_object.d.ts +4 -4
- package/esm/typings/src/version.d.ts +2 -1
- package/package.json +2 -2
- package/umd/index.umd.js +45 -41
- package/umd/index.umd.js.map +1 -1
package/esm/index.es.js
CHANGED
|
@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
26
26
|
* @generated
|
|
27
27
|
* @see https://github.com/webgptorg/promptbook
|
|
28
28
|
*/
|
|
29
|
-
const PROMPTBOOK_ENGINE_VERSION = '0.92.0-
|
|
29
|
+
const PROMPTBOOK_ENGINE_VERSION = '0.92.0-26';
|
|
30
30
|
/**
|
|
31
31
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
32
32
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -370,7 +370,8 @@ async function isFileExisting(filename, fs) {
|
|
|
370
370
|
*/
|
|
371
371
|
|
|
372
372
|
/**
|
|
373
|
-
*
|
|
373
|
+
* Converts a name to a properly formatted subfolder path for cache storage.
|
|
374
|
+
* Handles normalization and path formatting to create consistent cache directory structures.
|
|
374
375
|
*
|
|
375
376
|
* @private for `FileCacheStorage`
|
|
376
377
|
*/
|
|
@@ -623,10 +624,10 @@ for (let i = 0; i < defaultDiacriticsRemovalMap.length; i++) {
|
|
|
623
624
|
*/
|
|
624
625
|
|
|
625
626
|
/**
|
|
626
|
-
*
|
|
627
|
+
* Removes diacritic marks (accents) from characters in a string.
|
|
627
628
|
*
|
|
628
|
-
* @param input
|
|
629
|
-
* @returns
|
|
629
|
+
* @param input The string containing diacritics to be normalized.
|
|
630
|
+
* @returns The string with diacritics removed or normalized.
|
|
630
631
|
* @public exported from `@promptbook/utils`
|
|
631
632
|
*/
|
|
632
633
|
function removeDiacritics(input) {
|
|
@@ -640,10 +641,10 @@ function removeDiacritics(input) {
|
|
|
640
641
|
*/
|
|
641
642
|
|
|
642
643
|
/**
|
|
643
|
-
*
|
|
644
|
+
* Converts a given text to kebab-case format.
|
|
644
645
|
*
|
|
645
|
-
* @param text
|
|
646
|
-
* @returns
|
|
646
|
+
* @param text The text to be converted.
|
|
647
|
+
* @returns The kebab-case formatted string.
|
|
647
648
|
* @example 'hello-world'
|
|
648
649
|
* @example 'i-love-promptbook'
|
|
649
650
|
* @public exported from `@promptbook/utils`
|
|
@@ -785,11 +786,11 @@ function isValidUrl(url) {
|
|
|
785
786
|
}
|
|
786
787
|
|
|
787
788
|
/**
|
|
788
|
-
*
|
|
789
|
+
* Converts a title string into a normalized name.
|
|
789
790
|
*
|
|
790
|
-
* @param value
|
|
791
|
-
* @returns
|
|
792
|
-
* @example
|
|
791
|
+
* @param value The title string to be converted to a name.
|
|
792
|
+
* @returns A normalized name derived from the input title.
|
|
793
|
+
* @example 'Hello World!' -> 'hello-world'
|
|
793
794
|
* @public exported from `@promptbook/utils`
|
|
794
795
|
*/
|
|
795
796
|
function titleToName(value) {
|
|
@@ -809,9 +810,8 @@ function titleToName(value) {
|
|
|
809
810
|
}
|
|
810
811
|
|
|
811
812
|
/**
|
|
812
|
-
*
|
|
813
|
-
*
|
|
814
|
-
* Note: It also checks if directory exists and creates it if not
|
|
813
|
+
* Retrieves an intermediate source for a scraper based on the knowledge source.
|
|
814
|
+
* Manages the caching and retrieval of intermediate scraper results for optimized performance.
|
|
815
815
|
*
|
|
816
816
|
* @private as internal utility for scrapers
|
|
817
817
|
*/
|
|
@@ -1445,13 +1445,13 @@ const ORDER_OF_PIPELINE_JSON = [
|
|
|
1445
1445
|
*/
|
|
1446
1446
|
const REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
|
|
1447
1447
|
/**
|
|
1448
|
-
*
|
|
1448
|
+
* Placeholder value indicating a parameter is missing its value.
|
|
1449
1449
|
*
|
|
1450
1450
|
* @private within the repository
|
|
1451
1451
|
*/
|
|
1452
1452
|
const RESERVED_PARAMETER_MISSING_VALUE = 'MISSING-' + REPLACING_NONCE;
|
|
1453
1453
|
/**
|
|
1454
|
-
*
|
|
1454
|
+
* Placeholder value indicating a parameter is restricted and cannot be used directly.
|
|
1455
1455
|
*
|
|
1456
1456
|
* @private within the repository
|
|
1457
1457
|
*/
|
|
@@ -2114,8 +2114,8 @@ class PipelineExecutionError extends Error {
|
|
|
2114
2114
|
* @public exported from `@promptbook/core`
|
|
2115
2115
|
*/
|
|
2116
2116
|
function isPipelinePrepared(pipeline) {
|
|
2117
|
-
// Note: Ignoring `pipeline.preparations`
|
|
2118
|
-
// Note: Ignoring `pipeline.knowledgePieces`
|
|
2117
|
+
// Note: Ignoring `pipeline.preparations`
|
|
2118
|
+
// Note: Ignoring `pipeline.knowledgePieces`
|
|
2119
2119
|
if (pipeline.title === undefined || pipeline.title === '' || pipeline.title === DEFAULT_BOOK_TITLE) {
|
|
2120
2120
|
// TODO: !!! Comment this out
|
|
2121
2121
|
console.log('Pipeline is not prepared because title is undefined or empty', pipeline);
|
|
@@ -3099,10 +3099,10 @@ function $getGlobalScope() {
|
|
|
3099
3099
|
}
|
|
3100
3100
|
|
|
3101
3101
|
/**
|
|
3102
|
-
*
|
|
3102
|
+
* Normalizes a text string to SCREAMING_CASE (all uppercase with underscores).
|
|
3103
3103
|
*
|
|
3104
|
-
* @param text
|
|
3105
|
-
* @returns
|
|
3104
|
+
* @param text The text string to be converted to SCREAMING_CASE format.
|
|
3105
|
+
* @returns The normalized text in SCREAMING_CASE format.
|
|
3106
3106
|
* @example 'HELLO_WORLD'
|
|
3107
3107
|
* @example 'I_LOVE_PROMPTBOOK'
|
|
3108
3108
|
* @public exported from `@promptbook/utils`
|
|
@@ -3154,10 +3154,10 @@ function normalizeTo_SCREAMING_CASE(text) {
|
|
|
3154
3154
|
*/
|
|
3155
3155
|
|
|
3156
3156
|
/**
|
|
3157
|
-
*
|
|
3157
|
+
* Normalizes a text string to snake_case format.
|
|
3158
3158
|
*
|
|
3159
|
-
* @param text
|
|
3160
|
-
* @returns
|
|
3159
|
+
* @param text The text string to be converted to snake_case format.
|
|
3160
|
+
* @returns The normalized text in snake_case format.
|
|
3161
3161
|
* @example 'hello_world'
|
|
3162
3162
|
* @example 'i_love_promptbook'
|
|
3163
3163
|
* @public exported from `@promptbook/utils`
|
|
@@ -3227,10 +3227,11 @@ const $scrapersMetadataRegister = new $Register('scrapers_metadata');
|
|
|
3227
3227
|
*/
|
|
3228
3228
|
|
|
3229
3229
|
/**
|
|
3230
|
-
*
|
|
3230
|
+
* Registry for all available scrapers in the system.
|
|
3231
|
+
* Central point for registering and accessing different types of content scrapers.
|
|
3231
3232
|
*
|
|
3232
3233
|
* Note: `$` is used to indicate that this interacts with the global scope
|
|
3233
|
-
* @singleton Only one instance of each register is created per build, but
|
|
3234
|
+
* @singleton Only one instance of each register is created per build, but there can be more than one in different build modules
|
|
3234
3235
|
* @public exported from `@promptbook/core`
|
|
3235
3236
|
*/
|
|
3236
3237
|
const $scrapersRegister = new $Register('scraper_constructors');
|
|
@@ -3408,7 +3409,9 @@ const promptbookFetch = async (urlOrRequest, init) => {
|
|
|
3408
3409
|
*/
|
|
3409
3410
|
|
|
3410
3411
|
/**
|
|
3411
|
-
*
|
|
3412
|
+
* Factory function that creates a handler for processing knowledge sources.
|
|
3413
|
+
* Provides standardized processing of different types of knowledge sources
|
|
3414
|
+
* across various scraper implementations.
|
|
3412
3415
|
*
|
|
3413
3416
|
* @public exported from `@promptbook/core`
|
|
3414
3417
|
*/
|
|
@@ -3649,9 +3652,12 @@ TODO: [🧊] This is how it can look in future
|
|
|
3649
3652
|
*/
|
|
3650
3653
|
|
|
3651
3654
|
/**
|
|
3652
|
-
*
|
|
3655
|
+
* Prepares tasks by adding knowledge to the prompt and ensuring all necessary parameters are included.
|
|
3653
3656
|
*
|
|
3654
|
-
* @
|
|
3657
|
+
* @param tasks Sequence of tasks that are chained together to form a pipeline
|
|
3658
|
+
* @returns A promise that resolves to the prepared tasks.
|
|
3659
|
+
*
|
|
3660
|
+
* @private internal utility of `preparePipeline`
|
|
3655
3661
|
*/
|
|
3656
3662
|
async function prepareTasks(pipeline, tools, options) {
|
|
3657
3663
|
const { maxParallelCount = DEFAULT_MAX_PARALLEL_COUNT } = options;
|
|
@@ -4414,7 +4420,7 @@ function mapAvailableToExpectedParameters(options) {
|
|
|
4414
4420
|
else if (!availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) ;
|
|
4415
4421
|
}
|
|
4416
4422
|
if (expectedParameterNames.size === 0) {
|
|
4417
|
-
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent
|
|
4423
|
+
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
|
|
4418
4424
|
Object.freeze(mappedParameters);
|
|
4419
4425
|
return mappedParameters;
|
|
4420
4426
|
}
|
|
@@ -4445,7 +4451,7 @@ function mapAvailableToExpectedParameters(options) {
|
|
|
4445
4451
|
for (let i = 0; i < expectedParameterNames.size; i++) {
|
|
4446
4452
|
mappedParameters[expectedParameterNamesArray[i]] = availableParameters[availableParametersNamesArray[i]];
|
|
4447
4453
|
}
|
|
4448
|
-
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent
|
|
4454
|
+
// Note: [👨👨👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
|
|
4449
4455
|
Object.freeze(mappedParameters);
|
|
4450
4456
|
return mappedParameters;
|
|
4451
4457
|
}
|
|
@@ -5198,11 +5204,10 @@ async function executeFormatSubvalues(options) {
|
|
|
5198
5204
|
const highLevelError = new PipelineExecutionError(spaceTrim((block) => `
|
|
5199
5205
|
${error.message}
|
|
5200
5206
|
|
|
5201
|
-
This is error in FOREACH command when mapping data
|
|
5207
|
+
This is error in FOREACH command when mapping ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
|
|
5202
5208
|
You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
|
|
5203
5209
|
|
|
5204
5210
|
${block(pipelineIdentification)}
|
|
5205
|
-
Subparameter index: ${index}
|
|
5206
5211
|
`));
|
|
5207
5212
|
if (length > BIG_DATASET_TRESHOLD) {
|
|
5208
5213
|
console.error(highLevelError);
|
|
@@ -5230,12 +5235,11 @@ async function executeFormatSubvalues(options) {
|
|
|
5230
5235
|
catch (error) {
|
|
5231
5236
|
if (length > BIG_DATASET_TRESHOLD) {
|
|
5232
5237
|
console.error(spaceTrim((block) => `
|
|
5233
|
-
|
|
5238
|
+
${error.message}
|
|
5234
5239
|
|
|
5235
|
-
${
|
|
5240
|
+
This is error in FOREACH command when processing ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
|
|
5236
5241
|
|
|
5237
5242
|
${block(pipelineIdentification)}
|
|
5238
|
-
Subparameter index: ${index}
|
|
5239
5243
|
`));
|
|
5240
5244
|
return FAILED_VALUE_PLACEHOLDER;
|
|
5241
5245
|
}
|
|
@@ -6153,8 +6157,8 @@ class MarkitdownScraper {
|
|
|
6153
6157
|
extension: 'md',
|
|
6154
6158
|
isVerbose,
|
|
6155
6159
|
});
|
|
6156
|
-
// TODO:
|
|
6157
|
-
// Note: Running
|
|
6160
|
+
// TODO: Determine if Markitdown conversion should run only if the cache file doesn't exist, or always.
|
|
6161
|
+
// Note: Running Markitdown conversion ONLY if the file in the cache does not exist
|
|
6158
6162
|
if (!(await isFileExisting(cacheFilehandler.filename, this.tools.fs))) {
|
|
6159
6163
|
const src = source.filename || source.url || null;
|
|
6160
6164
|
// console.log('!!', { src, source, cacheFilehandler });
|
|
@@ -6176,11 +6180,11 @@ class MarkitdownScraper {
|
|
|
6176
6180
|
return cacheFilehandler;
|
|
6177
6181
|
}
|
|
6178
6182
|
/**
|
|
6179
|
-
* Scrapes the
|
|
6183
|
+
* Scrapes the source document (PDF, DOCX, etc.) and returns the knowledge pieces or `null` if it can't scrape it.
|
|
6180
6184
|
*/
|
|
6181
6185
|
async scrape(source) {
|
|
6182
6186
|
const cacheFilehandler = await this.$convert(source);
|
|
6183
|
-
// TODO:
|
|
6187
|
+
// TODO: Ensure this correctly creates the source object for the internal MarkdownScraper using the converted file.
|
|
6184
6188
|
const markdownSource = {
|
|
6185
6189
|
source: source.source,
|
|
6186
6190
|
filename: cacheFilehandler.filename,
|