@promptbook/website-crawler 0.92.0-24 → 0.92.0-25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/esm/index.es.js +43 -38
  2. package/esm/index.es.js.map +1 -1
  3. package/esm/typings/src/_packages/core.index.d.ts +0 -2
  4. package/esm/typings/src/commands/_common/types/CommandParser.d.ts +36 -28
  5. package/esm/typings/src/constants.d.ts +2 -2
  6. package/esm/typings/src/errors/0-BoilerplateError.d.ts +2 -2
  7. package/esm/typings/src/execution/CommonToolsOptions.d.ts +3 -3
  8. package/esm/typings/src/llm-providers/_common/register/$llmToolsMetadataRegister.d.ts +3 -3
  9. package/esm/typings/src/llm-providers/_common/register/$llmToolsRegister.d.ts +3 -3
  10. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsConfigurationFromEnv.d.ts +4 -4
  11. package/esm/typings/src/llm-providers/_common/utils/cache/CacheItem.d.ts +5 -5
  12. package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +5 -3
  13. package/esm/typings/src/llm-providers/_common/utils/count-total-usage/limitTotalUsage.d.ts +5 -5
  14. package/esm/typings/src/other/templates/getBookTemplates.d.ts +2 -2
  15. package/esm/typings/src/pipeline/PipelineInterface/PipelineInterface.d.ts +3 -3
  16. package/esm/typings/src/pipeline/PipelineInterface/constants.d.ts +1 -1
  17. package/esm/typings/src/pipeline/PipelineInterface/getPipelineInterface.d.ts +1 -1
  18. package/esm/typings/src/pipeline/PipelineInterface/isPipelineImplementingInterface.d.ts +5 -4
  19. package/esm/typings/src/pipeline/PipelineInterface/isPipelineInterfacesEqual.d.ts +1 -1
  20. package/esm/typings/src/pipeline/PipelineJson/CommonTaskJson.d.ts +9 -6
  21. package/esm/typings/src/pipeline/PipelineJson/PipelineJson.d.ts +2 -2
  22. package/esm/typings/src/pipeline/PipelineString.d.ts +3 -1
  23. package/esm/typings/src/pipeline/book-notation.d.ts +2 -2
  24. package/esm/typings/src/prepare/prepareTasks.d.ts +7 -4
  25. package/esm/typings/src/remote-server/types/RemoteServerOptions.d.ts +2 -1
  26. package/esm/typings/src/scrapers/_boilerplate/BoilerplateScraper.d.ts +3 -3
  27. package/esm/typings/src/scrapers/_boilerplate/createBoilerplateScraper.d.ts +1 -1
  28. package/esm/typings/src/scrapers/_boilerplate/register-metadata.d.ts +1 -1
  29. package/esm/typings/src/scrapers/_common/Converter.d.ts +3 -1
  30. package/esm/typings/src/scrapers/_common/Scraper.d.ts +4 -3
  31. package/esm/typings/src/scrapers/_common/ScraperIntermediateSource.d.ts +4 -2
  32. package/esm/typings/src/scrapers/_common/register/$provideFilesystemForNode.d.ts +2 -1
  33. package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +6 -3
  34. package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +3 -5
  35. package/esm/typings/src/scrapers/_common/register/$scrapersRegister.d.ts +3 -2
  36. package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +8 -5
  37. package/esm/typings/src/scrapers/_common/register/ScraperConstructor.d.ts +2 -1
  38. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +6 -5
  39. package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +3 -1
  40. package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
  41. package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +2 -1
  42. package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +4 -1
  43. package/esm/typings/src/scrapers/markitdown/MarkitdownScraper.d.ts +1 -1
  44. package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +2 -1
  45. package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +3 -4
  46. package/esm/typings/src/scripting/javascript/postprocessing-functions.d.ts +5 -1
  47. package/esm/typings/src/storage/file-cache-storage/FileCacheStorage.d.ts +12 -5
  48. package/esm/typings/src/storage/file-cache-storage/FileCacheStorageOptions.d.ts +4 -2
  49. package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +2 -1
  50. package/esm/typings/src/storage/local-storage/utils/makePromptbookStorageFromWebStorage.d.ts +2 -1
  51. package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +2 -1
  52. package/esm/typings/src/types/ModelVariant.d.ts +5 -5
  53. package/esm/typings/src/types/typeAliases.d.ts +8 -6
  54. package/esm/typings/src/utils/editable/edit-pipeline-string/addPipelineCommand.d.ts +2 -2
  55. package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.d.ts +4 -1
  56. package/esm/typings/src/utils/editable/utils/isFlatPipeline.d.ts +2 -1
  57. package/esm/typings/src/utils/markdown/extractAllListItemsFromMarkdown.d.ts +1 -1
  58. package/esm/typings/src/utils/normalization/nameToUriPart.d.ts +4 -4
  59. package/esm/typings/src/utils/normalization/nameToUriParts.d.ts +4 -4
  60. package/esm/typings/src/utils/normalization/normalize-to-kebab-case.d.ts +3 -3
  61. package/esm/typings/src/utils/normalization/normalizeTo_SCREAMING_CASE.d.ts +3 -3
  62. package/esm/typings/src/utils/normalization/normalizeTo_camelCase.d.ts +4 -4
  63. package/esm/typings/src/utils/normalization/normalizeTo_snake_case.d.ts +3 -3
  64. package/esm/typings/src/utils/normalization/removeDiacritics.d.ts +3 -3
  65. package/esm/typings/src/utils/normalization/searchKeywords.d.ts +4 -1
  66. package/esm/typings/src/utils/normalization/titleToName.d.ts +4 -4
  67. package/esm/typings/src/utils/organization/empty_object.d.ts +2 -2
  68. package/esm/typings/src/utils/organization/just_empty_object.d.ts +4 -4
  69. package/esm/typings/src/version.d.ts +2 -1
  70. package/package.json +2 -2
  71. package/umd/index.umd.js +43 -38
  72. package/umd/index.umd.js.map +1 -1
package/esm/index.es.js CHANGED
@@ -29,7 +29,7 @@ const BOOK_LANGUAGE_VERSION = '1.0.0';
29
29
  * @generated
30
30
  * @see https://github.com/webgptorg/promptbook
31
31
  */
32
- const PROMPTBOOK_ENGINE_VERSION = '0.92.0-24';
32
+ const PROMPTBOOK_ENGINE_VERSION = '0.92.0-25';
33
33
  /**
34
34
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
35
35
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -366,10 +366,10 @@ function $getGlobalScope() {
366
366
  }
367
367
 
368
368
  /**
369
- * @@@
369
+ * Normalizes a text string to SCREAMING_CASE (all uppercase with underscores).
370
370
  *
371
- * @param text @@@
372
- * @returns @@@
371
+ * @param text The text string to be converted to SCREAMING_CASE format.
372
+ * @returns The normalized text in SCREAMING_CASE format.
373
373
  * @example 'HELLO_WORLD'
374
374
  * @example 'I_LOVE_PROMPTBOOK'
375
375
  * @public exported from `@promptbook/utils`
@@ -421,10 +421,10 @@ function normalizeTo_SCREAMING_CASE(text) {
421
421
  */
422
422
 
423
423
  /**
424
- * @@@
424
+ * Normalizes a text string to snake_case format.
425
425
  *
426
- * @param text @@@
427
- * @returns @@@
426
+ * @param text The text string to be converted to snake_case format.
427
+ * @returns The normalized text in snake_case format.
428
428
  * @example 'hello_world'
429
429
  * @example 'i_love_promptbook'
430
430
  * @public exported from `@promptbook/utils`
@@ -549,7 +549,8 @@ class KnowledgeScrapeError extends Error {
549
549
  }
550
550
 
551
551
  /**
552
- * @@@
552
+ * Converts a name to a properly formatted subfolder path for cache storage.
553
+ * Handles normalization and path formatting to create consistent cache directory structures.
553
554
  *
554
555
  * @private for `FileCacheStorage`
555
556
  */
@@ -802,10 +803,10 @@ for (let i = 0; i < defaultDiacriticsRemovalMap.length; i++) {
802
803
  */
803
804
 
804
805
  /**
805
- * @@@
806
+ * Removes diacritic marks (accents) from characters in a string.
806
807
  *
807
- * @param input @@@
808
- * @returns @@@
808
+ * @param input The string containing diacritics to be normalized.
809
+ * @returns The string with diacritics removed or normalized.
809
810
  * @public exported from `@promptbook/utils`
810
811
  */
811
812
  function removeDiacritics(input) {
@@ -819,10 +820,10 @@ function removeDiacritics(input) {
819
820
  */
820
821
 
821
822
  /**
822
- * @@@
823
+ * Converts a given text to kebab-case format.
823
824
  *
824
- * @param text @@@
825
- * @returns @@@
825
+ * @param text The text to be converted.
826
+ * @returns The kebab-case formatted string.
826
827
  * @example 'hello-world'
827
828
  * @example 'i-love-promptbook'
828
829
  * @public exported from `@promptbook/utils`
@@ -964,11 +965,11 @@ function isValidUrl(url) {
964
965
  }
965
966
 
966
967
  /**
967
- * @@@
968
+ * Converts a title string into a normalized name.
968
969
  *
969
- * @param value @@@
970
- * @returns @@@
971
- * @example @@@
970
+ * @param value The title string to be converted to a name.
971
+ * @returns A normalized name derived from the input title.
972
+ * @example 'Hello World!' -> 'hello-world'
972
973
  * @public exported from `@promptbook/utils`
973
974
  */
974
975
  function titleToName(value) {
@@ -988,9 +989,8 @@ function titleToName(value) {
988
989
  }
989
990
 
990
991
  /**
991
- * Create a filename for intermediate cache for scrapers
992
- *
993
- * Note: It also checks if directory exists and creates it if not
992
+ * Retrieves an intermediate source for a scraper based on the knowledge source.
993
+ * Manages the caching and retrieval of intermediate scraper results for optimized performance.
994
994
  *
995
995
  * @private as internal utility for scrapers
996
996
  */
@@ -1597,13 +1597,13 @@ const ORDER_OF_PIPELINE_JSON = [
1597
1597
  */
1598
1598
  const REPLACING_NONCE = 'ptbkauk42kV2dzao34faw7FudQUHYPtW';
1599
1599
  /**
1600
- * @@@
1600
+ * Placeholder value indicating a parameter is missing its value.
1601
1601
  *
1602
1602
  * @private within the repository
1603
1603
  */
1604
1604
  const RESERVED_PARAMETER_MISSING_VALUE = 'MISSING-' + REPLACING_NONCE;
1605
1605
  /**
1606
- * @@@
1606
+ * Placeholder value indicating a parameter is restricted and cannot be used directly.
1607
1607
  *
1608
1608
  * @private within the repository
1609
1609
  */
@@ -2266,8 +2266,8 @@ class PipelineExecutionError extends Error {
2266
2266
  * @public exported from `@promptbook/core`
2267
2267
  */
2268
2268
  function isPipelinePrepared(pipeline) {
2269
- // Note: Ignoring `pipeline.preparations` @@@
2270
- // Note: Ignoring `pipeline.knowledgePieces` @@@
2269
+ // Note: Ignoring `pipeline.preparations`
2270
+ // Note: Ignoring `pipeline.knowledgePieces`
2271
2271
  if (pipeline.title === undefined || pipeline.title === '' || pipeline.title === DEFAULT_BOOK_TITLE) {
2272
2272
  // TODO: !!! Comment this out
2273
2273
  console.log('Pipeline is not prepared because title is undefined or empty', pipeline);
@@ -3216,10 +3216,11 @@ async function preparePersona(personaDescription, tools, options) {
3216
3216
  */
3217
3217
 
3218
3218
  /**
3219
- * @@@
3219
+ * Registry for all available scrapers in the system.
3220
+ * Central point for registering and accessing different types of content scrapers.
3220
3221
  *
3221
3222
  * Note: `$` is used to indicate that this interacts with the global scope
3222
- * @singleton Only one instance of each register is created per build, but thare can be more @@@
3223
+ * @singleton Only one instance of each register is created per build, but there can be more than one in different build modules
3223
3224
  * @public exported from `@promptbook/core`
3224
3225
  */
3225
3226
  const $scrapersRegister = new $Register('scraper_constructors');
@@ -3422,7 +3423,9 @@ const promptbookFetch = async (urlOrRequest, init) => {
3422
3423
  */
3423
3424
 
3424
3425
  /**
3425
- * @@@
3426
+ * Factory function that creates a handler for processing knowledge sources.
3427
+ * Provides standardized processing of different types of knowledge sources
3428
+ * across various scraper implementations.
3426
3429
  *
3427
3430
  * @public exported from `@promptbook/core`
3428
3431
  */
@@ -3663,9 +3666,12 @@ TODO: [🧊] This is how it can look in future
3663
3666
  */
3664
3667
 
3665
3668
  /**
3666
- * @@@
3669
+ * Prepares tasks by adding knowledge to the prompt and ensuring all necessary parameters are included.
3667
3670
  *
3668
- * @public exported from `@promptbook/core`
3671
+ * @param tasks Sequence of tasks that are chained together to form a pipeline
3672
+ * @returns A promise that resolves to the prepared tasks.
3673
+ *
3674
+ * @private internal utility of `preparePipeline`
3669
3675
  */
3670
3676
  async function prepareTasks(pipeline, tools, options) {
3671
3677
  const { maxParallelCount = DEFAULT_MAX_PARALLEL_COUNT } = options;
@@ -4428,7 +4434,7 @@ function mapAvailableToExpectedParameters(options) {
4428
4434
  else if (!availableParametersNames.has(parameterName) && expectedParameterNames.has(parameterName)) ;
4429
4435
  }
4430
4436
  if (expectedParameterNames.size === 0) {
4431
- // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent @@@
4437
+ // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
4432
4438
  Object.freeze(mappedParameters);
4433
4439
  return mappedParameters;
4434
4440
  }
@@ -4459,7 +4465,7 @@ function mapAvailableToExpectedParameters(options) {
4459
4465
  for (let i = 0; i < expectedParameterNames.size; i++) {
4460
4466
  mappedParameters[expectedParameterNamesArray[i]] = availableParameters[availableParametersNamesArray[i]];
4461
4467
  }
4462
- // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent @@@
4468
+ // Note: [👨‍👨‍👧] Now we can freeze `mappedParameters` to prevent accidental modifications after mapping
4463
4469
  Object.freeze(mappedParameters);
4464
4470
  return mappedParameters;
4465
4471
  }
@@ -5212,11 +5218,10 @@ async function executeFormatSubvalues(options) {
5212
5218
  const highLevelError = new PipelineExecutionError(spaceTrim$1((block) => `
5213
5219
  ${error.message}
5214
5220
 
5215
- This is error in FOREACH command when mapping data
5221
+ This is error in FOREACH command when mapping ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
5216
5222
  You have probbably passed wrong data to pipeline or wrong data was generated which are processed by FOREACH command
5217
5223
 
5218
5224
  ${block(pipelineIdentification)}
5219
- Subparameter index: ${index}
5220
5225
  `));
5221
5226
  if (length > BIG_DATASET_TRESHOLD) {
5222
5227
  console.error(highLevelError);
@@ -5244,12 +5249,11 @@ async function executeFormatSubvalues(options) {
5244
5249
  catch (error) {
5245
5250
  if (length > BIG_DATASET_TRESHOLD) {
5246
5251
  console.error(spaceTrim$1((block) => `
5247
- Error in FOREACH command:
5252
+ ${error.message}
5248
5253
 
5249
- ${block(pipelineIdentification)}
5254
+ This is error in FOREACH command when processing ${formatDefinition.formatName} ${subvalueParser.subvalueName} data (${index + 1}/${length})
5250
5255
 
5251
5256
  ${block(pipelineIdentification)}
5252
- Subparameter index: ${index}
5253
5257
  `));
5254
5258
  return FAILED_VALUE_PLACEHOLDER;
5255
5259
  }
@@ -6208,7 +6212,8 @@ class WebsiteScraper {
6208
6212
  */
6209
6213
 
6210
6214
  /**
6211
- * @@@
6215
+ * Factory function to create an instance of WebsiteScraper.
6216
+ * It bundles the scraper class with its metadata.
6212
6217
  *
6213
6218
  * @public exported from `@promptbook/website-crawler`
6214
6219
  */