@promptbook/markitdown 0.92.0-24 → 0.92.0-26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/esm/index.es.js +45 -41
  2. package/esm/index.es.js.map +1 -1
  3. package/esm/typings/src/_packages/core.index.d.ts +0 -2
  4. package/esm/typings/src/commands/_common/types/CommandParser.d.ts +36 -28
  5. package/esm/typings/src/constants.d.ts +8 -2
  6. package/esm/typings/src/errors/0-BoilerplateError.d.ts +2 -2
  7. package/esm/typings/src/execution/CommonToolsOptions.d.ts +3 -3
  8. package/esm/typings/src/llm-providers/_common/register/$llmToolsMetadataRegister.d.ts +3 -3
  9. package/esm/typings/src/llm-providers/_common/register/$llmToolsRegister.d.ts +3 -3
  10. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsConfigurationFromEnv.d.ts +4 -4
  11. package/esm/typings/src/llm-providers/_common/utils/cache/CacheItem.d.ts +5 -5
  12. package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +5 -3
  13. package/esm/typings/src/llm-providers/_common/utils/count-total-usage/limitTotalUsage.d.ts +5 -5
  14. package/esm/typings/src/other/templates/getBookTemplates.d.ts +2 -2
  15. package/esm/typings/src/pipeline/PipelineInterface/PipelineInterface.d.ts +3 -3
  16. package/esm/typings/src/pipeline/PipelineInterface/constants.d.ts +1 -1
  17. package/esm/typings/src/pipeline/PipelineInterface/getPipelineInterface.d.ts +1 -1
  18. package/esm/typings/src/pipeline/PipelineInterface/isPipelineImplementingInterface.d.ts +5 -4
  19. package/esm/typings/src/pipeline/PipelineInterface/isPipelineInterfacesEqual.d.ts +1 -1
  20. package/esm/typings/src/pipeline/PipelineJson/CommonTaskJson.d.ts +9 -6
  21. package/esm/typings/src/pipeline/PipelineJson/PipelineJson.d.ts +2 -2
  22. package/esm/typings/src/pipeline/PipelineString.d.ts +3 -1
  23. package/esm/typings/src/pipeline/book-notation.d.ts +2 -2
  24. package/esm/typings/src/prepare/prepareTasks.d.ts +7 -4
  25. package/esm/typings/src/remote-server/types/RemoteServerOptions.d.ts +2 -1
  26. package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +3 -3
  27. package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +1 -1
  28. package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +1 -1
  29. package/esm/typings/src/scrapers/_common/Converter.d.ts +3 -1
  30. package/esm/typings/src/scrapers/_common/Scraper.d.ts +4 -3
  31. package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +4 -2
  32. package/esm/typings/src/scrapers/_common/register/$provideFilesystemForNode.d.ts +2 -1
  33. package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +6 -3
  34. package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +3 -5
  35. package/esm/typings/src/scrapers/_common/register/$scrapersRegister.d.ts +3 -2
  36. package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +8 -5
  37. package/esm/typings/src/scrapers/_common/register/ScraperConstructor.d.ts +2 -1
  38. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +6 -5
  39. package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +3 -1
  40. package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
  41. package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +2 -1
  42. package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +4 -1
  43. package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +1 -1
  44. package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +2 -1
  45. package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +3 -4
  46. package/esm/typings/src/scripting/javascript/postprocessing-functions.d.ts +5 -1
  47. package/esm/typings/src/storage/file-cache-storage/FileCacheStorage.d.ts +12 -5
  48. package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +4 -2
  49. package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +2 -1
  50. package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromWebStorage.d.ts +2 -1
  51. package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +2 -1
  52. package/esm/typings/src/types/ModelVariant.d.ts +5 -5
  53. package/esm/typings/src/types/typeAliases.d.ts +8 -6
  54. package/esm/typings/src/utils/editable/edit-pipeline-string/addPipelineCommand.d.ts +2 -2
  55. package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.d.ts +4 -1
  56. package/esm/typings/src/utils/editable/utils/isFlatPipeline.d.ts +2 -1
  57. package/esm/typings/src/utils/markdown/extractAllListItemsFromMarkdown.d.ts +1 -1
  58. package/esm/typings/src/utils/normalization/nameToUriPart.d.ts +4 -4
  59. package/esm/typings/src/utils/normalization/nameToUriParts.d.ts +4 -4
  60. package/esm/typings/src/utils/normalization/normalize-to-kebab-case.d.ts +3 -3
  61. package/esm/typings/src/utils/normalization/normalizeTo_SCREAMING_CASE.d.ts +3 -3
  62. package/esm/typings/src/utils/normalization/normalizeTo_camelCase.d.ts +4 -4
  63. package/esm/typings/src/utils/normalization/normalizeTo_snake_case.d.ts +3 -3
  64. package/esm/typings/src/utils/normalization/removeDiacritics.d.ts +3 -3
  65. package/esm/typings/src/utils/normalization/searchKeywords.d.ts +4 -1
  66. package/esm/typings/src/utils/normalization/titleToName.d.ts +4 -4
  67. package/esm/typings/src/utils/organization/empty_object.d.ts +2 -2
  68. package/esm/typings/src/utils/organization/just_empty_object.d.ts +4 -4
  69. package/esm/typings/src/version.d.ts +2 -1
  70. package/package.json +2 -2
  71. package/umd/index.umd.js +45 -41
  72. package/umd/index.umd.js.map +1 -1
package/esm/index.es.js CHANGED
@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
26
26
  * @generated
27
27
  * @see https://github.com/webgptorg/promptbook
28
28
  */
29
- const PROMPTBOOK_ENGINE_VERSION = '0.92.0-24';
29
+ const PROMPTBOOK_ENGINE_VERSION = '0.92.0-26';
30
30
  /**
31
31
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
32
32
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -370,7 +370,8 @@ async function isFileExisting(filename, fs) {
370
370
  */
371
371
 
372
372
  /**
373
- * @@@
373
+ * Converts a name to a properly formatted subfolder path for cache storage.
374
+ * Handles normalization and path formatting to create consistent cache directory structures.
374
375
  *
375
376
  * @private for `FileCacheStorage`
376
377
  */
@@ -623,10 +624,10 @@ for (let i = 0; i < defaultDiacriticsRemovalMap.length; i++) {
623
624
  */
624
625
 
625
626
  /**
626
- * @@@
627
+ * Removes diacritic marks (accents) from characters in a string.
627
628
  *
628
- * @param input @@@
629
- * @returns @@@
629
+ * @param input The string containing diacritics to be normalized.
630
+ * @returns The string with diacritics removed or normalized.
630
631
  * @public exported from `@promptbook/utils`
631
632
  */
632
633
  function removeDiacritics(input) {
@@ -640,10 +641,10 @@ function removeDiacritics(input) {
640
641
  */
641
642
 
642
643
  /**
643
- * @@@
644
+ * Converts a given text to kebab-case format.
644
645
  *
645
- * @param text @@@
646
- * @returns @@@
646
+ * @param text The text to be converted.
647
+ * @returns The kebab-case formatted string.
647
648
  * @example 'hello-world'
648
649
  * @example 'i-love-promptbook'
649
650
  * @public exported from `@promptbook/utils`
@@ -785,11 +786,11 @@ function isValidUrl(url) {
785
786
  }
786
787
 
787
788
  /**
788
- * @@@
789
+ * Converts a title string into a normalized name.
789
790
  *
790
- * @param value @@@
791
- * @returns @@@
792
- * @example @@@
791
+ * @param value The title string to be converted to a name.
792
+ * @returns A normalized name derived from the input title.
793
+ * @example 'Hello World!' -> 'hello-world'
793
794
  * @public exported from `@promptbook/utils`
794
795
  */
795
796
  function titleToName(value) {
@@ -809,9 +810,8 @@ function titleToName(value) {
809
810
  }
810
811
 
811
812
  /**
812
- * Create a filename for intermediate cache for scrapers
813
- *
814
- * Note: It also checks if directory exists and creates it if not
813
+ * Retrieves an intermediate source for a scraper based on the knowledge source.
814
+ * Manages the caching and retrieval of intermediate scraper results for optimized performance.
815
815
  *
816
816
  * @private as internal utility for scrapers
817
817
  */
@@ -1445,13 +1445,13 @@ const ORDER_OF_PIPELINE_JSON = [
1445
1445
  */
1446
1446
  const REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
1447
1447
  /**
1448
- * @@@
1448
+ * Placeholder value indicating a parameter is missing its value.
1449
1449
  *
1450
1450
  * @private within the repository
1451
1451
  */
1452
1452
  const RESERVED_PARAMETER_MISSING_VALUE = 'MISSING-' + REPLACING_NONCE;
1453
1453
  /**
1454
- * @@@
1454
+ * Placeholder value indicating a parameter is restricted and cannot be used directly.
1455
1455
  *
1456
1456
  * @private within the repository
1457
1457
  */
@@ -2114,8 +2114,8 @@ class PipelineExecutionError extends Error {
2114
2114
  * @public exported from `@promptbook/core`
2115
2115
  */
2116
2116
  function isPipelinePrepared(pipeline) {
2117
- // Note: Ignoring `pipeline.preparations` @@@
2118
- // Note: Ignoring `pipeline.knowledgePieces` @@@
2117
+ // Note: Ignoring `pipeline.preparations`
2118
+ // Note: Ignoring `pipeline.knowledgePieces`
2119
2119
  if (pipeline.title === undefined || pipeline.title === '' || pipeline.title === DEFAULT_BOOK_TITLE) {
2120
2120
  // TODO: !!! Comment this out
2121
2121
  console.log('Pipeline is not prepared because title is undefined or empty', pipeline);
@@ -3099,10 +3099,10 @@ function $getGlobalScope() {
3099
3099
  }
3100
3100
 
3101
3101
  /**
3102
- * @@@
3102
+ * Normalizes a text string to SCREAMING_CASE (all uppercase with underscores).
3103
3103
  *
3104
- * @param text @@@
3105
- * @returns @@@
3104
+ * @param text The text string to be converted to SCREAMING_CASE format.
3105
+ * @returns The normalized text in SCREAMING_CASE format.
3106
3106
  * @example 'HELLO_WORLD'
3107
3107
  * @example 'I_LOVE_PROMPTBOOK'
3108
3108
  * @public exported from `@promptbook/utils`
@@ -3154,10 +3154,10 @@ function normalizeTo_SCREAMING_CASE(text) {
3154
3154
  */
3155
3155
 
3156
3156
  /**
3157
- * @@@
3157
+ * Normalizes a text string to snake_case format.
3158
3158
  *
3159
- * @param text @@@
3160
- * @returns @@@
3159
+ * @param text The text string to be converted to snake_case format.
3160
+ * @returns The normalized text in snake_case format.
3161
3161
  * @example 'hello_world'
3162
3162
  * @example 'i_love_promptbook'
3163
3163
  * @public exported from `@promptbook/utils`
@@ -3227,10 +3227,11 @@ const $scrapersMetadataRegister = new $Register('scrapers_metadata');
3227
3227
  */
3228
3228
 
3229
3229
  /**
3230
- * @@@
3230
+ * Registry for all available scrapers in the system.
3231
+ * Central point for registering and accessing different types of content scrapers.
3231
3232
  *
3232
3233
  * Note: `$` is used to indicate that this interacts with the global scope
3233
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
3234
+ * @singleton Only one instance of each register is created per build, but there can be more than one in different build modules
3234
3235
  * @public exported from `@promptbook/core`
3235
3236
  */
3236
3237
  const $scrapersRegister = new $Register('scraper_constructors');
@@ -3408,7 +3409,9 @@ const promptbookFetch = async (urlOrRequest, init) => {
3408
3409
  */
3409
3410
 
3410
3411
  /**
3411
- * @@@
3412
+ * Factory function that creates a handler for processing knowledge sources.
3413
+ * Provides standardized processing of different types of knowledge sources
3414
+ * across various scraper implementations.
3412
3415
  *
3413
3416
  * @public exported from `@promptbook/core`
3414
3417
  */
@@ -3649,9 +3652,12 @@ TODO: [🧊] This is how it can look in future
3649
3652
  */
3650
3653
 
3651
3654
  /**
3652
- * @@@
3655
+ * Prepares tasks by adding knowledge to the prompt and ensuring all necessary parameters are included.
3653
3656
  *
3654
- * @public exported from `@promptbook/core`
3657
+ * @param tasks Sequence of tasks that are chained together to form a pipeline
3658
+ * @returns A promise that resolves to the prepared tasks.
3659
+ *
3660
+ * @private internal utility of `preparePipeline`
3655
3661
  */
3656
3662
  async function prepareTasks(pipeline, tools, options) {
3657
3663
  const { maxParallelCount = DEFAULT_MAX_PARALLEL_COUNT } = options;
@@ -4414,7 +4420,7 @@ function mapAvailableToExpectedParameters(options) {
4414
4420
  else if (!availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) ;
4415
4421
  }
4416
4422
  if (expectedParameterNames.size === 0) {
4417
- // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent @@@
4423
+ // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
4418
4424
  Object.freeze(mappedParameters);
4419
4425
  return mappedParameters;
4420
4426
  }
@@ -4445,7 +4451,7 @@ function mapAvailableToExpectedParameters(options) {
4445
4451
  for (let i = 0; i < expectedParameterNames.size; i++) {
4446
4452
  mappedParameters[expectedParameterNamesArray[i]] = availableParameters[availableParametersNamesArray[i]];
4447
4453
  }
4448
- // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent @@@
4454
+ // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
4449
4455
  Object.freeze(mappedParameters);
4450
4456
  return mappedParameters;
4451
4457
  }
@@ -5198,11 +5204,10 @@ async function executeFormatSubvalues(options) {
5198
5204
  const highLevelError = new PipelineExecutionError(spaceTrim((block) => `
5199
5205
  ${error.message}
5200
5206
 
5201
- This is error in FOREACH command when mapping data
5207
+ This is error in FOREACH command when mapping ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
5202
5208
  You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5203
5209
 
5204
5210
  ${block(pipelineIdentification)}
5205
- Subparameter index: ${index}
5206
5211
  `));
5207
5212
  if (length > BIG_DATASET_TRESHOLD) {
5208
5213
  console.error(highLevelError);
@@ -5230,12 +5235,11 @@ async function executeFormatSubvalues(options) {
5230
5235
  catch (error) {
5231
5236
  if (length > BIG_DATASET_TRESHOLD) {
5232
5237
  console.error(spaceTrim((block) => `
5233
- Error in FOREACH command:
5238
+ ${error.message}
5234
5239
 
5235
- ${block(pipelineIdentification)}
5240
+ This is error in FOREACH command when processing ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
5236
5241
 
5237
5242
  ${block(pipelineIdentification)}
5238
- Subparameter index: ${index}
5239
5243
  `));
5240
5244
  return FAILED_VALUE_PLACEHOLDER;
5241
5245
  }
@@ -6153,8 +6157,8 @@ class MarkitdownScraper {
6153
6157
  extension: 'md',
6154
6158
  isVerbose,
6155
6159
  });
6156
- // TODO: @@@ Preserve, delete or modify
6157
- // Note: Running Pandoc ONLY if the file in the cache does not exist
6160
+ // TODO: Determine if Markitdown conversion should run only if the cache file doesn't exist, or always.
6161
+ // Note: Running Markitdown conversion ONLY if the file in the cache does not exist
6158
6162
  if (!(await isFileExisting(cacheFilehandler.filename, this.tools.fs))) {
6159
6163
  const src = source.filename || source.url || null;
6160
6164
  // console.log('!!', { src, source, cacheFilehandler });
@@ -6176,11 +6180,11 @@ class MarkitdownScraper {
6176
6180
  return cacheFilehandler;
6177
6181
  }
6178
6182
  /**
6179
- * Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
6183
+ * Scrapes the source document (PDF, DOCX, etc.) and returns the knowledge pieces or `null` if it can't scrape it.
6180
6184
  */
6181
6185
  async scrape(source) {
6182
6186
  const cacheFilehandler = await this.$convert(source);
6183
- // TODO: @@@ Preserve, delete or modify
6187
+ // TODO: Ensure this correctly creates the source object for the internal MarkdownScraper using the converted file.
6184
6188
  const markdownSource = {
6185
6189
  source: source.source,
6186
6190
  filename: cacheFilehandler.filename,