@promptbook/pdf 0.92.0-23 → 0.92.0-25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/esm/index.es.js +71 -47
  2. package/esm/index.es.js.map +1 -1
  3. package/esm/typings/src/_packages/core.index.d.ts +0 -2
  4. package/esm/typings/src/collection/PipelineCollection.d.ts +0 -2
  5. package/esm/typings/src/collection/SimplePipelineCollection.d.ts +1 -1
  6. package/esm/typings/src/commands/FORMFACTOR/formfactorCommandParser.d.ts +1 -1
  7. package/esm/typings/src/commands/_common/types/CommandParser.d.ts +36 -28
  8. package/esm/typings/src/config.d.ts +8 -4
  9. package/esm/typings/src/constants.d.ts +2 -2
  10. package/esm/typings/src/errors/0-BoilerplateError.d.ts +2 -2
  11. package/esm/typings/src/execution/CommonToolsOptions.d.ts +3 -3
  12. package/esm/typings/src/formats/_common/FormatSubvalueParser.d.ts +10 -10
  13. package/esm/typings/src/formfactors/_boilerplate/BoilerplateFormfactorDefinition.d.ts +3 -2
  14. package/esm/typings/src/formfactors/_common/string_formfactor_name.d.ts +2 -1
  15. package/esm/typings/src/formfactors/index.d.ts +1 -1
  16. package/esm/typings/src/formfactors/sheets/SheetsFormfactorDefinition.d.ts +3 -2
  17. package/esm/typings/src/llm-providers/_common/register/$llmToolsMetadataRegister.d.ts +3 -3
  18. package/esm/typings/src/llm-providers/_common/register/$llmToolsRegister.d.ts +3 -3
  19. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsConfigurationFromEnv.d.ts +4 -4
  20. package/esm/typings/src/llm-providers/_common/utils/cache/CacheItem.d.ts +5 -5
  21. package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +5 -3
  22. package/esm/typings/src/llm-providers/_common/utils/cache/cacheLlmTools.d.ts +3 -3
  23. package/esm/typings/src/llm-providers/_common/utils/count-total-usage/limitTotalUsage.d.ts +5 -5
  24. package/esm/typings/src/other/templates/getBookTemplates.d.ts +2 -2
  25. package/esm/typings/src/pipeline/PipelineInterface/PipelineInterface.d.ts +3 -3
  26. package/esm/typings/src/pipeline/PipelineInterface/constants.d.ts +1 -1
  27. package/esm/typings/src/pipeline/PipelineInterface/getPipelineInterface.d.ts +1 -1
  28. package/esm/typings/src/pipeline/PipelineInterface/isPipelineImplementingInterface.d.ts +5 -4
  29. package/esm/typings/src/pipeline/PipelineInterface/isPipelineInterfacesEqual.d.ts +1 -1
  30. package/esm/typings/src/pipeline/PipelineJson/CommonTaskJson.d.ts +9 -6
  31. package/esm/typings/src/pipeline/PipelineJson/PipelineJson.d.ts +2 -2
  32. package/esm/typings/src/pipeline/PipelineString.d.ts +3 -1
  33. package/esm/typings/src/pipeline/book-notation.d.ts +2 -2
  34. package/esm/typings/src/prepare/prepareTasks.d.ts +7 -4
  35. package/esm/typings/src/remote-server/types/RemoteServerOptions.d.ts +2 -1
  36. package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +3 -3
  37. package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +1 -1
  38. package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +1 -1
  39. package/esm/typings/src/scrapers/_common/Converter.d.ts +3 -1
  40. package/esm/typings/src/scrapers/_common/Scraper.d.ts +4 -3
  41. package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +4 -2
  42. package/esm/typings/src/scrapers/_common/register/$provideFilesystemForNode.d.ts +2 -1
  43. package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +6 -3
  44. package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +3 -5
  45. package/esm/typings/src/scrapers/_common/register/$scrapersRegister.d.ts +3 -2
  46. package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +8 -5
  47. package/esm/typings/src/scrapers/_common/register/ScraperConstructor.d.ts +2 -1
  48. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +6 -5
  49. package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +3 -1
  50. package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
  51. package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +2 -1
  52. package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +4 -1
  53. package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +1 -1
  54. package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +2 -1
  55. package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +3 -4
  56. package/esm/typings/src/scripting/javascript/postprocessing-functions.d.ts +5 -1
  57. package/esm/typings/src/storage/file-cache-storage/FileCacheStorage.d.ts +12 -5
  58. package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +4 -2
  59. package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +2 -1
  60. package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromWebStorage.d.ts +2 -1
  61. package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +2 -1
  62. package/esm/typings/src/types/ModelVariant.d.ts +5 -5
  63. package/esm/typings/src/types/typeAliases.d.ts +8 -6
  64. package/esm/typings/src/utils/editable/edit-pipeline-string/addPipelineCommand.d.ts +2 -2
  65. package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.d.ts +4 -1
  66. package/esm/typings/src/utils/editable/utils/isFlatPipeline.d.ts +2 -1
  67. package/esm/typings/src/utils/environment/$getGlobalScope.d.ts +2 -1
  68. package/esm/typings/src/utils/markdown/extractAllListItemsFromMarkdown.d.ts +1 -1
  69. package/esm/typings/src/utils/normalization/nameToUriPart.d.ts +4 -4
  70. package/esm/typings/src/utils/normalization/nameToUriParts.d.ts +4 -4
  71. package/esm/typings/src/utils/normalization/normalize-to-kebab-case.d.ts +3 -3
  72. package/esm/typings/src/utils/normalization/normalizeTo_SCREAMING_CASE.d.ts +3 -3
  73. package/esm/typings/src/utils/normalization/normalizeTo_camelCase.d.ts +4 -4
  74. package/esm/typings/src/utils/normalization/normalizeTo_snake_case.d.ts +3 -3
  75. package/esm/typings/src/utils/normalization/removeDiacritics.d.ts +3 -3
  76. package/esm/typings/src/utils/normalization/searchKeywords.d.ts +4 -1
  77. package/esm/typings/src/utils/normalization/titleToName.d.ts +4 -4
  78. package/esm/typings/src/utils/organization/empty_object.d.ts +2 -2
  79. package/esm/typings/src/utils/organization/just_empty_object.d.ts +4 -4
  80. package/esm/typings/src/version.d.ts +2 -1
  81. package/package.json +2 -2
  82. package/umd/index.umd.js +71 -47
  83. package/umd/index.umd.js.map +1 -1
package/esm/index.es.js CHANGED
@@ -26,7 +26,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
26
26
  * @generated
27
27
  * @see https://github.com/webgptorg/promptbook
28
28
  */
29
- const PROMPTBOOK_ENGINE_VERSION = '0.92.0-23';
29
+ const PROMPTBOOK_ENGINE_VERSION = '0.92.0-25';
30
30
  /**
31
31
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
32
32
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -103,11 +103,20 @@ const DEFAULT_BOOK_TITLE = `✨ Untitled Book`;
103
103
  */
104
104
  const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
105
105
  /**
106
- * @@@
106
+ * Threshold value that determines when a dataset is considered "big"
107
+ * and may require special handling or optimizations
108
+ *
109
+ * For example, when error occurs in one item of the big dataset, it will not fail the whole pipeline
107
110
  *
108
111
  * @public exported from `@promptbook/core`
109
112
  */
110
113
  const BIG_DATASET_TRESHOLD = 50;
114
+ /**
115
+ * Placeholder text used to represent a placeholder value of failed operation
116
+ *
117
+ * @public exported from `@promptbook/core`
118
+ */
119
+ const FAILED_VALUE_PLACEHOLDER = '!?';
111
120
  // <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
112
121
  /**
113
122
  * The maximum number of iterations for a loops
@@ -361,7 +370,8 @@ async function isFileExisting(filename, fs) {
361
370
  */
362
371
 
363
372
  /**
364
- * @@@
373
+ * Converts a name to a properly formatted subfolder path for cache storage.
374
+ * Handles normalization and path formatting to create consistent cache directory structures.
365
375
  *
366
376
  * @private for `FileCacheStorage`
367
377
  */
@@ -614,10 +624,10 @@ for (let i = 0; i < defaultDiacriticsRemovalMap.length; i++) {
614
624
  */
615
625
 
616
626
  /**
617
- * @@@
627
+ * Removes diacritic marks (accents) from characters in a string.
618
628
  *
619
- * @param input @@@
620
- * @returns @@@
629
+ * @param input The string containing diacritics to be normalized.
630
+ * @returns The string with diacritics removed or normalized.
621
631
  * @public exported from `@promptbook/utils`
622
632
  */
623
633
  function removeDiacritics(input) {
@@ -631,10 +641,10 @@ function removeDiacritics(input) {
631
641
  */
632
642
 
633
643
  /**
634
- * @@@
644
+ * Converts a given text to kebab-case format.
635
645
  *
636
- * @param text @@@
637
- * @returns @@@
646
+ * @param text The text to be converted.
647
+ * @returns The kebab-case formatted string.
638
648
  * @example 'hello-world'
639
649
  * @example 'i-love-promptbook'
640
650
  * @public exported from `@promptbook/utils`
@@ -776,11 +786,11 @@ function isValidUrl(url) {
776
786
  }
777
787
 
778
788
  /**
779
- * @@@
789
+ * Converts a title string into a normalized name.
780
790
  *
781
- * @param value @@@
782
- * @returns @@@
783
- * @example @@@
791
+ * @param value The title string to be converted to a name.
792
+ * @returns A normalized name derived from the input title.
793
+ * @example 'Hello World!' -> 'hello-world'
784
794
  * @public exported from `@promptbook/utils`
785
795
  */
786
796
  function titleToName(value) {
@@ -813,9 +823,8 @@ function TODO_USE(...value) {
813
823
  }
814
824
 
815
825
  /**
816
- * Create a filename for intermediate cache for scrapers
817
- *
818
- * Note: It also checks if directory exists and creates it if not
826
+ * Retrieves an intermediate source for a scraper based on the knowledge source.
827
+ * Manages the caching and retrieval of intermediate scraper results for optimized performance.
819
828
  *
820
829
  * @private as internal utility for scrapers
821
830
  */
@@ -1449,13 +1458,13 @@ const ORDER_OF_PIPELINE_JSON = [
1449
1458
  */
1450
1459
  const REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
1451
1460
  /**
1452
- * @@@
1461
+ * Placeholder value indicating a parameter is missing its value.
1453
1462
  *
1454
1463
  * @private within the repository
1455
1464
  */
1456
1465
  const RESERVED_PARAMETER_MISSING_VALUE = 'MISSING-' + REPLACING_NONCE;
1457
1466
  /**
1458
- * @@@
1467
+ * Placeholder value indicating a parameter is restricted and cannot be used directly.
1459
1468
  *
1460
1469
  * @private within the repository
1461
1470
  */
@@ -1954,7 +1963,7 @@ class SimplePipelineCollection {
1954
1963
  /**
1955
1964
  * Constructs a pipeline collection from pipelines
1956
1965
  *
1957
- * @param pipelines @@@
1966
+ * @param pipelines Array of pipeline JSON objects to include in the collection
1958
1967
  *
1959
1968
  * Note: During the construction logic of all pipelines are validated
1960
1969
  * Note: It is not recommended to use this constructor directly, use `createCollectionFromJson` *(or other variant)* instead
@@ -2118,8 +2127,8 @@ class PipelineExecutionError extends Error {
2118
2127
  * @public exported from `@promptbook/core`
2119
2128
  */
2120
2129
  function isPipelinePrepared(pipeline) {
2121
- // Note: Ignoring `pipeline.preparations` @@@
2122
- // Note: Ignoring `pipeline.knowledgePieces` @@@
2130
+ // Note: Ignoring `pipeline.preparations`
2131
+ // Note: Ignoring `pipeline.knowledgePieces`
2123
2132
  if (pipeline.title === undefined || pipeline.title === '' || pipeline.title === DEFAULT_BOOK_TITLE) {
2124
2133
  // TODO: !!! Comment this out
2125
2134
  console.log('Pipeline is not prepared because title is undefined or empty', pipeline);
@@ -3091,7 +3100,8 @@ async function preparePersona(personaDescription, tools, options) {
3091
3100
  */
3092
3101
 
3093
3102
  /**
3094
- * @@@
3103
+ * Safely retrieves the global scope object (window in browser, global in Node.js)
3104
+ * regardless of the JavaScript environment in which the code is running
3095
3105
  *
3096
3106
  * Note: `$` is used to indicate that this function is not a pure function - it access global scope
3097
3107
  *
@@ -3102,10 +3112,10 @@ function $getGlobalScope() {
3102
3112
  }
3103
3113
 
3104
3114
  /**
3105
- * @@@
3115
+ * Normalizes a text string to SCREAMING_CASE (all uppercase with underscores).
3106
3116
  *
3107
- * @param text @@@
3108
- * @returns @@@
3117
+ * @param text The text string to be converted to SCREAMING_CASE format.
3118
+ * @returns The normalized text in SCREAMING_CASE format.
3109
3119
  * @example 'HELLO_WORLD'
3110
3120
  * @example 'I_LOVE_PROMPTBOOK'
3111
3121
  * @public exported from `@promptbook/utils`
@@ -3157,10 +3167,10 @@ function normalizeTo_SCREAMING_CASE(text) {
3157
3167
  */
3158
3168
 
3159
3169
  /**
3160
- * @@@
3170
+ * Normalizes a text string to snake_case format.
3161
3171
  *
3162
- * @param text @@@
3163
- * @returns @@@
3172
+ * @param text The text string to be converted to snake_case format.
3173
+ * @returns The normalized text in snake_case format.
3164
3174
  * @example 'hello_world'
3165
3175
  * @example 'i_love_promptbook'
3166
3176
  * @public exported from `@promptbook/utils`
@@ -3230,10 +3240,11 @@ const $scrapersMetadataRegister = new $Register('scrapers_metadata');
3230
3240
  */
3231
3241
 
3232
3242
  /**
3233
- * @@@
3243
+ * Registry for all available scrapers in the system.
3244
+ * Central point for registering and accessing different types of content scrapers.
3234
3245
  *
3235
3246
  * Note: `$` is used to indicate that this interacts with the global scope
3236
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
3247
+ * @singleton Only one instance of each register is created per build, but there can be more than one in different build modules
3237
3248
  * @public exported from `@promptbook/core`
3238
3249
  */
3239
3250
  const $scrapersRegister = new $Register('scraper_constructors');
@@ -3411,7 +3422,9 @@ const promptbookFetch = async (urlOrRequest, init) => {
3411
3422
  */
3412
3423
 
3413
3424
  /**
3414
- * @@@
3425
+ * Factory function that creates a handler for processing knowledge sources.
3426
+ * Provides standardized processing of different types of knowledge sources
3427
+ * across various scraper implementations.
3415
3428
  *
3416
3429
  * @public exported from `@promptbook/core`
3417
3430
  */
@@ -3652,9 +3665,12 @@ TODO: [🧊] This is how it can look in future
3652
3665
  */
3653
3666
 
3654
3667
  /**
3655
- * @@@
3668
+ * Prepares tasks by adding knowledge to the prompt and ensuring all necessary parameters are included.
3656
3669
  *
3657
- * @public exported from `@promptbook/core`
3670
+ * @param tasks Sequence of tasks that are chained together to form a pipeline
3671
+ * @returns A promise that resolves to the prepared tasks.
3672
+ *
3673
+ * @private internal utility of `preparePipeline`
3658
3674
  */
3659
3675
  async function prepareTasks(pipeline, tools, options) {
3660
3676
  const { maxParallelCount = DEFAULT_MAX_PARALLEL_COUNT } = options;
@@ -4192,6 +4208,15 @@ const CsvFormatParser = {
4192
4208
  mappedData.push(mappedRow);
4193
4209
  if (onProgress) {
4194
4210
  // Note: Report the CSV with all rows mapped so far
4211
+ /*
4212
+ !!!!
4213
+ // Report progress with updated value
4214
+ const progressData = mappedData.map((row, i) =>
4215
+ i > index ? { ...row, [outputParameterName]: PENDING_VALUE_PLACEHOLDER } : row,
4216
+ );
4217
+
4218
+
4219
+ */
4195
4220
  await onProgress(unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
4196
4221
  }
4197
4222
  }
@@ -4408,7 +4433,7 @@ function mapAvailableToExpectedParameters(options) {
4408
4433
  else if (!availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) ;
4409
4434
  }
4410
4435
  if (expectedParameterNames.size === 0) {
4411
- // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent @@@
4436
+ // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
4412
4437
  Object.freeze(mappedParameters);
4413
4438
  return mappedParameters;
4414
4439
  }
@@ -4439,7 +4464,7 @@ function mapAvailableToExpectedParameters(options) {
4439
4464
  for (let i = 0; i < expectedParameterNames.size; i++) {
4440
4465
  mappedParameters[expectedParameterNamesArray[i]] = availableParameters[availableParametersNamesArray[i]];
4441
4466
  }
4442
- // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent @@@
4467
+ // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
4443
4468
  Object.freeze(mappedParameters);
4444
4469
  return mappedParameters;
4445
4470
  }
@@ -5192,15 +5217,14 @@ async function executeFormatSubvalues(options) {
5192
5217
  const highLevelError = new PipelineExecutionError(spaceTrim((block) => `
5193
5218
  ${error.message}
5194
5219
 
5195
- This is error in FOREACH command when mapping data
5220
+ This is error in FOREACH command when mapping ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
5196
5221
  You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5197
5222
 
5198
5223
  ${block(pipelineIdentification)}
5199
- Subparameter index: ${index}
5200
5224
  `));
5201
5225
  if (length > BIG_DATASET_TRESHOLD) {
5202
5226
  console.error(highLevelError);
5203
- return '~';
5227
+ return FAILED_VALUE_PLACEHOLDER;
5204
5228
  }
5205
5229
  throw highLevelError;
5206
5230
  }
@@ -5224,14 +5248,13 @@ async function executeFormatSubvalues(options) {
5224
5248
  catch (error) {
5225
5249
  if (length > BIG_DATASET_TRESHOLD) {
5226
5250
  console.error(spaceTrim((block) => `
5227
- Error in FOREACH command:
5251
+ ${error.message}
5228
5252
 
5229
- ${block(pipelineIdentification)}
5253
+ This is error in FOREACH command when processing ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
5230
5254
 
5231
5255
  ${block(pipelineIdentification)}
5232
- Subparameter index: ${index}
5233
5256
  `));
5234
- return '~';
5257
+ return FAILED_VALUE_PLACEHOLDER;
5235
5258
  }
5236
5259
  throw error;
5237
5260
  }
@@ -6147,8 +6170,8 @@ class MarkitdownScraper {
6147
6170
  extension: 'md',
6148
6171
  isVerbose,
6149
6172
  });
6150
- // TODO: @@@ Preserve, delete or modify
6151
- // Note: Running Pandoc ONLY if the file in the cache does not exist
6173
+ // TODO: Determine if Markitdown conversion should run only if the cache file doesn't exist, or always.
6174
+ // Note: Running Markitdown conversion ONLY if the file in the cache does not exist
6152
6175
  if (!(await isFileExisting(cacheFilehandler.filename, this.tools.fs))) {
6153
6176
  const src = source.filename || source.url || null;
6154
6177
  // console.log('!!', { src, source, cacheFilehandler });
@@ -6170,11 +6193,11 @@ class MarkitdownScraper {
6170
6193
  return cacheFilehandler;
6171
6194
  }
6172
6195
  /**
6173
- * Scrapes the docx file and returns the knowledge pieces or `null` if it can't scrape it
6196
+ * Scrapes the source document (PDF, DOCX, etc.) and returns the knowledge pieces or `null` if it can't scrape it.
6174
6197
  */
6175
6198
  async scrape(source) {
6176
6199
  const cacheFilehandler = await this.$convert(source);
6177
- // TODO: @@@ Preserve, delete or modify
6200
+ // TODO: Ensure this correctly creates the source object for the internal MarkdownScraper using the converted file.
6178
6201
  const markdownSource = {
6179
6202
  source: source.source,
6180
6203
  filename: cacheFilehandler.filename,
@@ -6318,7 +6341,8 @@ class PdfScraper {
6318
6341
  */
6319
6342
 
6320
6343
  /**
6321
- * @@@
6344
+ * Factory function to create an instance of PdfScraper.
6345
+ * It bundles the scraper class with its metadata.
6322
6346
  *
6323
6347
  * @public exported from `@promptbook/pdf`
6324
6348
  */