@promptbook/website-crawler 0.92.0-23 → 0.92.0-25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. package/esm/index.es.js +67 -43
  2. package/esm/index.es.js.map +1 -1
  3. package/esm/typings/src/_packages/core.index.d.ts +0 -2
  4. package/esm/typings/src/collection/PipelineCollection.d.ts +0 -2
  5. package/esm/typings/src/collection/SimplePipelineCollection.d.ts +1 -1
  6. package/esm/typings/src/commands/FORMFACTOR/formfactorCommandParser.d.ts +1 -1
  7. package/esm/typings/src/commands/_common/types/CommandParser.d.ts +36 -28
  8. package/esm/typings/src/config.d.ts +8 -4
  9. package/esm/typings/src/constants.d.ts +2 -2
  10. package/esm/typings/src/errors/0-BoilerplateError.d.ts +2 -2
  11. package/esm/typings/src/execution/CommonToolsOptions.d.ts +3 -3
  12. package/esm/typings/src/formats/_common/FormatSubvalueParser.d.ts +10 -10
  13. package/esm/typings/src/formfactors/_boilerplate/BoilerplateFormfactorDefinition.d.ts +3 -2
  14. package/esm/typings/src/formfactors/_common/string_formfactor_name.d.ts +2 -1
  15. package/esm/typings/src/formfactors/index.d.ts +1 -1
  16. package/esm/typings/src/formfactors/sheets/SheetsFormfactorDefinition.d.ts +3 -2
  17. package/esm/typings/src/llm-providers/_common/register/$llmToolsMetadataRegister.d.ts +3 -3
  18. package/esm/typings/src/llm-providers/_common/register/$llmToolsRegister.d.ts +3 -3
  19. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsConfigurationFromEnv.d.ts +4 -4
  20. package/esm/typings/src/llm-providers/_common/utils/cache/CacheItem.d.ts +5 -5
  21. package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +5 -3
  22. package/esm/typings/src/llm-providers/_common/utils/cache/cacheLlmTools.d.ts +3 -3
  23. package/esm/typings/src/llm-providers/_common/utils/count-total-usage/limitTotalUsage.d.ts +5 -5
  24. package/esm/typings/src/other/templates/getBookTemplates.d.ts +2 -2
  25. package/esm/typings/src/pipeline/PipelineInterface/PipelineInterface.d.ts +3 -3
  26. package/esm/typings/src/pipeline/PipelineInterface/constants.d.ts +1 -1
  27. package/esm/typings/src/pipeline/PipelineInterface/getPipelineInterface.d.ts +1 -1
  28. package/esm/typings/src/pipeline/PipelineInterface/isPipelineImplementingInterface.d.ts +5 -4
  29. package/esm/typings/src/pipeline/PipelineInterface/isPipelineInterfacesEqual.d.ts +1 -1
  30. package/esm/typings/src/pipeline/PipelineJson/CommonTaskJson.d.ts +9 -6
  31. package/esm/typings/src/pipeline/PipelineJson/PipelineJson.d.ts +2 -2
  32. package/esm/typings/src/pipeline/PipelineString.d.ts +3 -1
  33. package/esm/typings/src/pipeline/book-notation.d.ts +2 -2
  34. package/esm/typings/src/prepare/prepareTasks.d.ts +7 -4
  35. package/esm/typings/src/remote-server/types/RemoteServerOptions.d.ts +2 -1
  36. package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +3 -3
  37. package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +1 -1
  38. package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +1 -1
  39. package/esm/typings/src/scrapers/_common/Converter.d.ts +3 -1
  40. package/esm/typings/src/scrapers/_common/Scraper.d.ts +4 -3
  41. package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +4 -2
  42. package/esm/typings/src/scrapers/_common/register/$provideFilesystemForNode.d.ts +2 -1
  43. package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +6 -3
  44. package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +3 -5
  45. package/esm/typings/src/scrapers/_common/register/$scrapersRegister.d.ts +3 -2
  46. package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +8 -5
  47. package/esm/typings/src/scrapers/_common/register/ScraperConstructor.d.ts +2 -1
  48. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +6 -5
  49. package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +3 -1
  50. package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
  51. package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +2 -1
  52. package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +4 -1
  53. package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +1 -1
  54. package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +2 -1
  55. package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +3 -4
  56. package/esm/typings/src/scripting/javascript/postprocessing-functions.d.ts +5 -1
  57. package/esm/typings/src/storage/file-cache-storage/FileCacheStorage.d.ts +12 -5
  58. package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +4 -2
  59. package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +2 -1
  60. package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromWebStorage.d.ts +2 -1
  61. package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +2 -1
  62. package/esm/typings/src/types/ModelVariant.d.ts +5 -5
  63. package/esm/typings/src/types/typeAliases.d.ts +8 -6
  64. package/esm/typings/src/utils/editable/edit-pipeline-string/addPipelineCommand.d.ts +2 -2
  65. package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.d.ts +4 -1
  66. package/esm/typings/src/utils/editable/utils/isFlatPipeline.d.ts +2 -1
  67. package/esm/typings/src/utils/environment/$getGlobalScope.d.ts +2 -1
  68. package/esm/typings/src/utils/markdown/extractAllListItemsFromMarkdown.d.ts +1 -1
  69. package/esm/typings/src/utils/normalization/nameToUriPart.d.ts +4 -4
  70. package/esm/typings/src/utils/normalization/nameToUriParts.d.ts +4 -4
  71. package/esm/typings/src/utils/normalization/normalize-to-kebab-case.d.ts +3 -3
  72. package/esm/typings/src/utils/normalization/normalizeTo_SCREAMING_CASE.d.ts +3 -3
  73. package/esm/typings/src/utils/normalization/normalizeTo_camelCase.d.ts +4 -4
  74. package/esm/typings/src/utils/normalization/normalizeTo_snake_case.d.ts +3 -3
  75. package/esm/typings/src/utils/normalization/removeDiacritics.d.ts +3 -3
  76. package/esm/typings/src/utils/normalization/searchKeywords.d.ts +4 -1
  77. package/esm/typings/src/utils/normalization/titleToName.d.ts +4 -4
  78. package/esm/typings/src/utils/organization/empty_object.d.ts +2 -2
  79. package/esm/typings/src/utils/organization/just_empty_object.d.ts +4 -4
  80. package/esm/typings/src/version.d.ts +2 -1
  81. package/package.json +2 -2
  82. package/umd/index.umd.js +67 -43
  83. package/umd/index.umd.js.map +1 -1
package/esm/index.es.js CHANGED
@@ -29,7 +29,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
29
29
  * @generated
30
30
  * @see https://github.com/webgptorg/promptbook
31
31
  */
32
- const PROMPTBOOK_ENGINE_VERSION = '0.92.0-23';
32
+ const PROMPTBOOK_ENGINE_VERSION = '0.92.0-25';
33
33
  /**
34
34
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
35
35
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -133,11 +133,20 @@ const DEFAULT_BOOK_TITLE = `✨ Untitled Book`;
133
133
  */
134
134
  const DEFAULT_MAX_FILE_SIZE = 100 * 1024 * 1024; // 100MB
135
135
  /**
136
- * @@@
136
+ * Threshold value that determines when a dataset is considered "big"
137
+ * and may require special handling or optimizations
138
+ *
139
+ * For example, when error occurs in one item of the big dataset, it will not fail the whole pipeline
137
140
  *
138
141
  * @public exported from `@promptbook/core`
139
142
  */
140
143
  const BIG_DATASET_TRESHOLD = 50;
144
+ /**
145
+ * Placeholder text used to represent a placeholder value of failed operation
146
+ *
147
+ * @public exported from `@promptbook/core`
148
+ */
149
+ const FAILED_VALUE_PLACEHOLDER = '!?';
141
150
  // <- TODO: [🧠] Better system for generator warnings - not always "code" and "by `@promptbook/cli`"
142
151
  /**
143
152
  * The maximum number of iterations for a loops
@@ -345,7 +354,8 @@ class UnexpectedError extends Error {
345
354
  }
346
355
 
347
356
  /**
348
- * @@@
357
+ * Safely retrieves the global scope object (window in browser, global in Node.js)
358
+ * regardless of the JavaScript environment in which the code is running
349
359
  *
350
360
  * Note: `$` is used to indicate that this function is not a pure function - it access global scope
351
361
  *
@@ -356,10 +366,10 @@ function $getGlobalScope() {
356
366
  }
357
367
 
358
368
  /**
359
- * @@@
369
+ * Normalizes a text string to SCREAMING_CASE (all uppercase with underscores).
360
370
  *
361
- * @param text @@@
362
- * @returns @@@
371
+ * @param text The text string to be converted to SCREAMING_CASE format.
372
+ * @returns The normalized text in SCREAMING_CASE format.
363
373
  * @example 'HELLO_WORLD'
364
374
  * @example 'I_LOVE_PROMPTBOOK'
365
375
  * @public exported from `@promptbook/utils`
@@ -411,10 +421,10 @@ function normalizeTo_SCREAMING_CASE(text) {
411
421
  */
412
422
 
413
423
  /**
414
- * @@@
424
+ * Normalizes a text string to snake_case format.
415
425
  *
416
- * @param text @@@
417
- * @returns @@@
426
+ * @param text The text string to be converted to snake_case format.
427
+ * @returns The normalized text in snake_case format.
418
428
  * @example 'hello_world'
419
429
  * @example 'i_love_promptbook'
420
430
  * @public exported from `@promptbook/utils`
@@ -539,7 +549,8 @@ class KnowledgeScrapeError extends Error {
539
549
  }
540
550
 
541
551
  /**
542
- * @@@
552
+ * Converts a name to a properly formatted subfolder path for cache storage.
553
+ * Handles normalization and path formatting to create consistent cache directory structures.
543
554
  *
544
555
  * @private for `FileCacheStorage`
545
556
  */
@@ -792,10 +803,10 @@ for (let i = 0; i < defaultDiacriticsRemovalMap.length; i++) {
792
803
  */
793
804
 
794
805
  /**
795
- * @@@
806
+ * Removes diacritic marks (accents) from characters in a string.
796
807
  *
797
- * @param input @@@
798
- * @returns @@@
808
+ * @param input The string containing diacritics to be normalized.
809
+ * @returns The string with diacritics removed or normalized.
799
810
  * @public exported from `@promptbook/utils`
800
811
  */
801
812
  function removeDiacritics(input) {
@@ -809,10 +820,10 @@ function removeDiacritics(input) {
809
820
  */
810
821
 
811
822
  /**
812
- * @@@
823
+ * Converts a given text to kebab-case format.
813
824
  *
814
- * @param text @@@
815
- * @returns @@@
825
+ * @param text The text to be converted.
826
+ * @returns The kebab-case formatted string.
816
827
  * @example 'hello-world'
817
828
  * @example 'i-love-promptbook'
818
829
  * @public exported from `@promptbook/utils`
@@ -954,11 +965,11 @@ function isValidUrl(url) {
954
965
  }
955
966
 
956
967
  /**
957
- * @@@
968
+ * Converts a title string into a normalized name.
958
969
  *
959
- * @param value @@@
960
- * @returns @@@
961
- * @example @@@
970
+ * @param value The title string to be converted to a name.
971
+ * @returns A normalized name derived from the input title.
972
+ * @example 'Hello World!' -> 'hello-world'
962
973
  * @public exported from `@promptbook/utils`
963
974
  */
964
975
  function titleToName(value) {
@@ -978,9 +989,8 @@ function titleToName(value) {
978
989
  }
979
990
 
980
991
  /**
981
- * Create a filename for intermediate cache for scrapers
982
- *
983
- * Note: It also checks if directory exists and creates it if not
992
+ * Retrieves an intermediate source for a scraper based on the knowledge source.
993
+ * Manages the caching and retrieval of intermediate scraper results for optimized performance.
984
994
  *
985
995
  * @private as internal utility for scrapers
986
996
  */
@@ -1587,13 +1597,13 @@ const ORDER_OF_PIPELINE_JSON = [
1587
1597
  */
1588
1598
  const REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
1589
1599
  /**
1590
- * @@@
1600
+ * Placeholder value indicating a parameter is missing its value.
1591
1601
  *
1592
1602
  * @private within the repository
1593
1603
  */
1594
1604
  const RESERVED_PARAMETER_MISSING_VALUE = 'MISSING-' + REPLACING_NONCE;
1595
1605
  /**
1596
- * @@@
1606
+ * Placeholder value indicating a parameter is restricted and cannot be used directly.
1597
1607
  *
1598
1608
  * @private within the repository
1599
1609
  */
@@ -2092,7 +2102,7 @@ class SimplePipelineCollection {
2092
2102
  /**
2093
2103
  * Constructs a pipeline collection from pipelines
2094
2104
  *
2095
- * @param pipelines @@@
2105
+ * @param pipelines Array of pipeline JSON objects to include in the collection
2096
2106
  *
2097
2107
  * Note: During the construction logic of all pipelines are validated
2098
2108
  * Note: It is not recommended to use this constructor directly, use `createCollectionFromJson` *(or other variant)* instead
@@ -2256,8 +2266,8 @@ class PipelineExecutionError extends Error {
2256
2266
  * @public exported from `@promptbook/core`
2257
2267
  */
2258
2268
  function isPipelinePrepared(pipeline) {
2259
- // Note: Ignoring `pipeline.preparations` @@@
2260
- // Note: Ignoring `pipeline.knowledgePieces` @@@
2269
+ // Note: Ignoring `pipeline.preparations`
2270
+ // Note: Ignoring `pipeline.knowledgePieces`
2261
2271
  if (pipeline.title === undefined || pipeline.title === '' || pipeline.title === DEFAULT_BOOK_TITLE) {
2262
2272
  // TODO: !!! Comment this out
2263
2273
  console.log('Pipeline is not prepared because title is undefined or empty', pipeline);
@@ -3206,10 +3216,11 @@ async function preparePersona(personaDescription, tools, options) {
3206
3216
  */
3207
3217
 
3208
3218
  /**
3209
- * @@@
3219
+ * Registry for all available scrapers in the system.
3220
+ * Central point for registering and accessing different types of content scrapers.
3210
3221
  *
3211
3222
  * Note: `$` is used to indicate that this interacts with the global scope
3212
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
3223
+ * @singleton Only one instance of each register is created per build, but there can be more than one in different build modules
3213
3224
  * @public exported from `@promptbook/core`
3214
3225
  */
3215
3226
  const $scrapersRegister = new $Register('scraper_constructors');
@@ -3412,7 +3423,9 @@ const promptbookFetch = async (urlOrRequest, init) => {
3412
3423
  */
3413
3424
 
3414
3425
  /**
3415
- * @@@
3426
+ * Factory function that creates a handler for processing knowledge sources.
3427
+ * Provides standardized processing of different types of knowledge sources
3428
+ * across various scraper implementations.
3416
3429
  *
3417
3430
  * @public exported from `@promptbook/core`
3418
3431
  */
@@ -3653,9 +3666,12 @@ TODO: [🧊] This is how it can look in future
3653
3666
  */
3654
3667
 
3655
3668
  /**
3656
- * @@@
3669
+ * Prepares tasks by adding knowledge to the prompt and ensuring all necessary parameters are included.
3657
3670
  *
3658
- * @public exported from `@promptbook/core`
3671
+ * @param tasks Sequence of tasks that are chained together to form a pipeline
3672
+ * @returns A promise that resolves to the prepared tasks.
3673
+ *
3674
+ * @private internal utility of `preparePipeline`
3659
3675
  */
3660
3676
  async function prepareTasks(pipeline, tools, options) {
3661
3677
  const { maxParallelCount = DEFAULT_MAX_PARALLEL_COUNT } = options;
@@ -4193,6 +4209,15 @@ const CsvFormatParser = {
4193
4209
  mappedData.push(mappedRow);
4194
4210
  if (onProgress) {
4195
4211
  // Note: Report the CSV with all rows mapped so far
4212
+ /*
4213
+ !!!!
4214
+ // Report progress with updated value
4215
+ const progressData = mappedData.map((row, i) =>
4216
+ i > index ? { ...row, [outputParameterName]: PENDING_VALUE_PLACEHOLDER } : row,
4217
+ );
4218
+
4219
+
4220
+ */
4196
4221
  await onProgress(unparse(mappedData, { ...settings, ...MANDATORY_CSV_SETTINGS }));
4197
4222
  }
4198
4223
  }
@@ -4409,7 +4434,7 @@ function mapAvailableToExpectedParameters(options) {
4409
4434
  else if (!availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) ;
4410
4435
  }
4411
4436
  if (expectedParameterNames.size === 0) {
4412
- // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent @@@
4437
+ // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
4413
4438
  Object.freeze(mappedParameters);
4414
4439
  return mappedParameters;
4415
4440
  }
@@ -4440,7 +4465,7 @@ function mapAvailableToExpectedParameters(options) {
4440
4465
  for (let i = 0; i < expectedParameterNames.size; i++) {
4441
4466
  mappedParameters[expectedParameterNamesArray[i]] = availableParameters[availableParametersNamesArray[i]];
4442
4467
  }
4443
- // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent @@@
4468
+ // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
4444
4469
  Object.freeze(mappedParameters);
4445
4470
  return mappedParameters;
4446
4471
  }
@@ -5193,15 +5218,14 @@ async function executeFormatSubvalues(options) {
5193
5218
  const highLevelError = new PipelineExecutionError(spaceTrim$1((block) => `
5194
5219
  ${error.message}
5195
5220
 
5196
- This is error in FOREACH command when mapping data
5221
+ This is error in FOREACH command when mapping ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
5197
5222
  You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5198
5223
 
5199
5224
  ${block(pipelineIdentification)}
5200
- Subparameter index: ${index}
5201
5225
  `));
5202
5226
  if (length > BIG_DATASET_TRESHOLD) {
5203
5227
  console.error(highLevelError);
5204
- return '~';
5228
+ return FAILED_VALUE_PLACEHOLDER;
5205
5229
  }
5206
5230
  throw highLevelError;
5207
5231
  }
@@ -5225,14 +5249,13 @@ async function executeFormatSubvalues(options) {
5225
5249
  catch (error) {
5226
5250
  if (length > BIG_DATASET_TRESHOLD) {
5227
5251
  console.error(spaceTrim$1((block) => `
5228
- Error in FOREACH command:
5252
+ ${error.message}
5229
5253
 
5230
- ${block(pipelineIdentification)}
5254
+ This is error in FOREACH command when processing ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
5231
5255
 
5232
5256
  ${block(pipelineIdentification)}
5233
- Subparameter index: ${index}
5234
5257
  `));
5235
- return '~';
5258
+ return FAILED_VALUE_PLACEHOLDER;
5236
5259
  }
5237
5260
  throw error;
5238
5261
  }
@@ -6189,7 +6212,8 @@ class WebsiteScraper {
6189
6212
  */
6190
6213
 
6191
6214
  /**
6192
- * @@@
6215
+ * Factory function to create an instance of WebsiteScraper.
6216
+ * It bundles the scraper class with its metadata.
6193
6217
  *
6194
6218
  * @public exported from `@promptbook/website-crawler`
6195
6219
  */