@promptbook/website-crawler 0.71.0-17 → 0.71.0-19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/README.md +2 -4
  2. package/esm/index.es.js +99 -115
  3. package/esm/index.es.js.map +1 -1
  4. package/esm/typings/src/_packages/core.index.d.ts +24 -18
  5. package/esm/typings/src/_packages/node.index.d.ts +2 -0
  6. package/esm/typings/src/_packages/types.index.d.ts +10 -10
  7. package/esm/typings/src/_packages/utils.index.d.ts +2 -2
  8. package/esm/typings/src/collection/PipelineCollection.d.ts +1 -1
  9. package/esm/typings/src/collection/SimplePipelineCollection.d.ts +2 -2
  10. package/esm/typings/src/collection/collectionToJson.d.ts +1 -1
  11. package/esm/typings/src/collection/constructors/createCollectionFromJson.d.ts +1 -1
  12. package/esm/typings/src/collection/constructors/createCollectionFromPromise.d.ts +1 -1
  13. package/esm/typings/src/commands/_common/types/CommandParser.d.ts +5 -5
  14. package/esm/typings/src/config.d.ts +21 -14
  15. package/esm/typings/src/execution/EmbeddingVector.d.ts +1 -1
  16. package/esm/typings/src/execution/Executables.d.ts +18 -0
  17. package/esm/typings/src/execution/ExecutionTools.d.ts +9 -3
  18. package/esm/typings/src/execution/LlmExecutionTools.d.ts +1 -1
  19. package/esm/typings/src/execution/PipelineExecutorResult.d.ts +2 -2
  20. package/esm/typings/src/execution/assertsExecutionSuccessful.d.ts +1 -0
  21. package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorOptions.d.ts +29 -6
  22. package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +2 -11
  23. package/esm/typings/src/execution/createPipelineExecutor/20-executeTemplate.d.ts +4 -13
  24. package/esm/typings/src/execution/createPipelineExecutor/40-executeAttempts.d.ts +9 -14
  25. package/esm/typings/src/execution/translation/automatic-translate/automatic-translators/LindatAutomaticTranslator.d.ts +11 -3
  26. package/esm/typings/src/execution/utils/addUsage.d.ts +1 -1
  27. package/esm/typings/src/execution/utils/forEachAsync.d.ts +1 -1
  28. package/esm/typings/src/formats/_common/FormatDefinition.d.ts +2 -2
  29. package/esm/typings/src/formats/_common/FormatSubvalueDefinition.d.ts +2 -2
  30. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsForCli.d.ts +2 -2
  31. package/esm/typings/src/llm-providers/_common/register/LlmToolsConfiguration.d.ts +1 -1
  32. package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +4 -1
  33. package/esm/typings/src/llm-providers/anthropic-claude/AnthropicClaudeExecutionTools.d.ts +1 -1
  34. package/esm/typings/src/llm-providers/anthropic-claude/anthropic-claude-models.d.ts +1 -1
  35. package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +1 -1
  36. package/esm/typings/src/llm-providers/mocked/$fakeTextToExpectations.d.ts +1 -1
  37. package/esm/typings/src/llm-providers/mocked/MockedEchoLlmExecutionTools.d.ts +1 -1
  38. package/esm/typings/src/llm-providers/mocked/MockedFackedLlmExecutionTools.d.ts +1 -1
  39. package/esm/typings/src/llm-providers/multiple/MultipleLlmExecutionTools.d.ts +4 -5
  40. package/esm/typings/src/llm-providers/multiple/joinLlmExecutionTools.d.ts +1 -1
  41. package/esm/typings/src/llm-providers/openai/OpenAiExecutionTools.d.ts +1 -1
  42. package/esm/typings/src/llm-providers/openai/openai-models.d.ts +1 -1
  43. package/esm/typings/src/llm-providers/remote/RemoteLlmExecutionTools.d.ts +1 -1
  44. package/esm/typings/src/llm-providers/remote/interfaces/PromptbookServer_ListModels_Response.d.ts +4 -4
  45. package/esm/typings/src/llm-providers/remote/interfaces/PromptbookServer_Prompt_Response.d.ts +3 -3
  46. package/esm/typings/src/prepare/PrepareAndScrapeOptions.d.ts +4 -23
  47. package/esm/typings/src/prepare/prepareTemplates.d.ts +1 -1
  48. package/esm/typings/src/scrapers/_common/Scraper.d.ts +2 -2
  49. package/esm/typings/src/scrapers/_common/prepareKnowledgePieces.d.ts +1 -1
  50. package/esm/typings/src/scrapers/_common/register/$provideExecutablesForNode.d.ts +12 -0
  51. package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +2 -2
  52. package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +2 -2
  53. package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +2 -2
  54. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +2 -2
  55. package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +1 -1
  56. package/esm/typings/src/scrapers/document/DocumentScraper.d.ts +2 -2
  57. package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
  58. package/esm/typings/src/scrapers/document/register-metadata.d.ts +1 -1
  59. package/esm/typings/src/scrapers/document-legacy/LegacyDocumentScraper.d.ts +3 -3
  60. package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +1 -1
  61. package/esm/typings/src/scrapers/document-legacy/register-metadata.d.ts +1 -1
  62. package/esm/typings/src/scrapers/markdown/MarkdownScraper.d.ts +1 -1
  63. package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +1 -1
  64. package/esm/typings/src/scrapers/markdown/register-metadata.d.ts +1 -1
  65. package/esm/typings/src/scrapers/pdf/PdfScraper.d.ts +2 -2
  66. package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +1 -1
  67. package/esm/typings/src/scrapers/pdf/register-metadata.d.ts +1 -1
  68. package/esm/typings/src/scrapers/website/WebsiteScraper.d.ts +6 -3
  69. package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +1 -1
  70. package/esm/typings/src/scrapers/website/register-metadata.d.ts +1 -1
  71. package/esm/typings/src/scrapers/website/utils/createShowdownConverter.d.ts +7 -0
  72. package/esm/typings/src/scripting/javascript/utils/preserve.d.ts +1 -1
  73. package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +1 -1
  74. package/esm/typings/src/types/Arrayable.d.ts +1 -1
  75. package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +7 -0
  76. package/esm/typings/src/types/PipelineJson/KnowledgePieceJson.d.ts +4 -4
  77. package/esm/typings/src/types/PipelineJson/KnowledgeSourceJson.d.ts +1 -1
  78. package/esm/typings/src/types/PipelineJson/PersonaJson.d.ts +1 -1
  79. package/esm/typings/src/types/PipelineJson/TemplateJsonCommon.d.ts +2 -2
  80. package/esm/typings/src/types/Prompt.d.ts +1 -1
  81. package/esm/typings/src/types/execution-report/ExecutionReportJson.d.ts +1 -1
  82. package/esm/typings/src/utils/$Register.d.ts +1 -1
  83. package/esm/typings/src/utils/FromtoItems.d.ts +1 -1
  84. package/esm/typings/src/utils/arrayableToArray.d.ts +1 -1
  85. package/esm/typings/src/utils/emojis.d.ts +1 -1
  86. package/esm/typings/src/utils/execCommand/$execCommand.d.ts +2 -2
  87. package/esm/typings/src/utils/execCommand/{IExecCommandOptions.d.ts → ExecCommandOptions.d.ts} +2 -6
  88. package/esm/typings/src/utils/execCommand/execCommandNormalizeOptions.d.ts +3 -3
  89. package/esm/typings/src/utils/expectation-counters/countSentences.d.ts +1 -1
  90. package/esm/typings/src/utils/markdown/extractAllBlocksFromMarkdown.d.ts +1 -1
  91. package/esm/typings/src/utils/markdown/splitMarkdownIntoSections.d.ts +1 -1
  92. package/esm/typings/src/utils/normalization/IKeywords.d.ts +2 -2
  93. package/esm/typings/src/utils/normalization/parseKeywords.d.ts +2 -2
  94. package/esm/typings/src/utils/normalization/parseKeywordsFromString.d.ts +2 -2
  95. package/esm/typings/src/utils/normalization/searchKeywords.d.ts +2 -2
  96. package/esm/typings/src/utils/organization/TODO_USE.d.ts +1 -1
  97. package/esm/typings/src/utils/organization/keepUnused.d.ts +1 -1
  98. package/esm/typings/src/utils/random/$randomSeed.d.ts +1 -1
  99. package/esm/typings/src/utils/sets/intersection.d.ts +1 -1
  100. package/esm/typings/src/utils/sets/union.d.ts +1 -1
  101. package/esm/typings/src/utils/unwrapResult.d.ts +4 -4
  102. package/package.json +4 -3
  103. package/umd/index.umd.js +102 -118
  104. package/umd/index.umd.js.map +1 -1
  105. package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorSettings.d.ts +0 -29
  106. package/esm/typings/src/scrapers/website/utils/markdownConverter.d.ts +0 -12
  107. /package/esm/typings/src/scrapers/website/utils/{markdownConverter.test.d.ts → createShowdownConverter.test.d.ts} +0 -0
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  # ![Promptbook logo - cube with letters P and B](./other/design/logo-h1.png) Promptbook
4
4
 
5
- Supercharge your use of large language models
5
+ Build responsible, controlled and transparent applications on top of LLM models!
6
6
 
7
7
 
8
8
 
@@ -21,11 +21,9 @@ Supercharge your use of large language models
21
21
  - ✨ **Support of [OpenAI o1 model](https://openai.com/o1/)**
22
22
 
23
23
  <blockquote style="color: #ff8811">
24
- <b>⚠ Warning:</b> This is a pre-release version of the library. It is not yet ready for production use. Please look at <a href="https://www.npmjs.com/package/@promptbook/core?activeTab=versions">latest stable release</a>.
24
+ <b>⚠ Warning:</b> This is a pre-release version of the library. It is not yet ready for production use. Please look at <a href="https://www.npmjs.com/package/@promptbook/core?activeTab=versions">latest stable release</a>.
25
25
  </blockquote>
26
26
 
27
-
28
-
29
27
  ## 📦 Package `@promptbook/website-crawler`
30
28
 
31
29
  - Promptbooks are [divided into several](#-packages) packages, all are published from [single monorepo](https://github.com/webgptorg/promptbook).
package/esm/index.es.js CHANGED
@@ -2,12 +2,12 @@ import spaceTrim$1, { spaceTrim } from 'spacetrim';
2
2
  import { Readability } from '@mozilla/readability';
3
3
  import { mkdir, rm, writeFile } from 'fs/promises';
4
4
  import { JSDOM } from 'jsdom';
5
- import { forTime } from 'waitasecond';
6
5
  import { SHA256 } from 'crypto-js';
7
6
  import hexEncoder from 'crypto-js/enc-hex';
8
7
  import { basename, join, dirname } from 'path';
9
8
  import { format } from 'prettier';
10
9
  import parserHtml from 'prettier/parser-html';
10
+ import { forTime } from 'waitasecond';
11
11
  import { lookup } from 'mime-types';
12
12
  import { unparse, parse } from 'papaparse';
13
13
  import { Converter } from 'showdown';
@@ -16,7 +16,7 @@ import { Converter } from 'showdown';
16
16
  /**
17
17
  * The version of the Promptbook library
18
18
  */
19
- var PROMPTBOOK_VERSION = '0.71.0-16';
19
+ var PROMPTBOOK_VERSION = '0.71.0-18';
20
20
  // TODO: [main] !!!! List here all the versions and annotate + put into script
21
21
 
22
22
  /*! *****************************************************************************
@@ -364,18 +364,25 @@ var LOOP_LIMIT = 1000;
364
364
  * @private within the repository - too low-level in comparison with other `MAX_...`
365
365
  */
366
366
  var IMMEDIATE_TIME = 10;
367
+ /**
368
+ * Strategy for caching the intermediate results for knowledge sources
369
+ *
370
+ * @public exported from `@promptbook/core`
371
+ */
372
+ var DEFAULT_INTERMEDIATE_FILES_STRATEGY = 'HIDE_AND_KEEP';
373
+ // <- TODO: [😡] Change to 'VISIBLE'
367
374
  /**
368
375
  * The maximum number of (LLM) tasks running in parallel
369
376
  *
370
377
  * @public exported from `@promptbook/core`
371
378
  */
372
- var MAX_PARALLEL_COUNT = 5; // <- TODO: [🤹‍♂️]
379
+ var DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [🤹‍♂️]
373
380
  /**
374
381
  * The maximum number of attempts to execute LLM task before giving up
375
382
  *
376
383
  * @public exported from `@promptbook/core`
377
384
  */
378
- var MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹‍♂️]
385
+ var DEFAULT_MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹‍♂️]
379
386
  /**
380
387
  * Where to store the scrape cache
381
388
  *
@@ -383,7 +390,7 @@ var MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹‍♂️]
383
390
  *
384
391
  * @public exported from `@promptbook/core`
385
392
  */
386
- var SCRAPE_CACHE_DIRNAME = '/.promptbook/scrape-cache';
393
+ var DEFAULT_SCRAPE_CACHE_DIRNAME = '/.promptbook/scrape-cache';
387
394
  /**
388
395
  * Nonce which is used for replacing things in strings
389
396
  *
@@ -435,7 +442,7 @@ var DEFAULT_CSV_SETTINGS = Object.freeze({
435
442
  *
436
443
  * @public exported from `@promptbook/core`
437
444
  */
438
- var IS_VERBOSE = false;
445
+ var DEFAULT_IS_VERBOSE = false;
439
446
  /**
440
447
  * @@@
441
448
  *
@@ -576,7 +583,7 @@ var $Register = /** @class */ (function () {
576
583
  this.storage = globalScope[storageName];
577
584
  }
578
585
  $Register.prototype.list = function () {
579
- // <- TODO: ReadonlyDeep<Array<TRegistered>>
586
+ // <- TODO: ReadonlyDeep<ReadonlyArray<TRegistered>>
580
587
  return this.storage;
581
588
  };
582
589
  $Register.prototype.register = function (registered) {
@@ -628,7 +635,7 @@ var websiteScraperMetadata = $deepFreeze({
628
635
  mimeTypes: ['text/html'],
629
636
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
630
637
  isAvilableInBrowser: false,
631
- requiredExecutables: ['!!!!!!'],
638
+ requiredExecutables: [],
632
639
  }); /* <- TODO: [🤛] */
633
640
  /**
634
641
  * Registration of known scraper metadata
@@ -1106,12 +1113,12 @@ function TODO_USE() {
1106
1113
  */
1107
1114
  function getScraperIntermediateSource(source, options) {
1108
1115
  return __awaiter(this, void 0, void 0, function () {
1109
- var sourceFilename, url, rootDirname, cacheDirname, isCacheCleaned, extension, isVerbose, hash, semanticName, pieces, name, cacheFilename, isDestroyed, fileHandler;
1116
+ var sourceFilename, url, rootDirname, cacheDirname, intermediateFilesStrategy, extension, isVerbose, hash, semanticName, pieces, name, cacheFilename, isDestroyed, fileHandler;
1110
1117
  return __generator(this, function (_a) {
1111
1118
  switch (_a.label) {
1112
1119
  case 0:
1113
1120
  sourceFilename = source.filename, url = source.url;
1114
- rootDirname = options.rootDirname, cacheDirname = options.cacheDirname, isCacheCleaned = options.isCacheCleaned, extension = options.extension, isVerbose = options.isVerbose;
1121
+ rootDirname = options.rootDirname, cacheDirname = options.cacheDirname, intermediateFilesStrategy = options.intermediateFilesStrategy, extension = options.extension, isVerbose = options.isVerbose;
1115
1122
  hash = SHA256(
1116
1123
  // <- TODO: [🥬] Encapsulate sha256 to some private utility function
1117
1124
  hexEncoder.parse(sourceFilename || url || 'untitled'))
@@ -1121,7 +1128,7 @@ function getScraperIntermediateSource(source, options) {
1121
1128
  pieces = ['intermediate', semanticName, hash].filter(function (piece) { return piece !== ''; });
1122
1129
  name = pieces.join('-').split('--').join('-');
1123
1130
  // <- TODO: Use MAX_FILENAME_LENGTH
1124
- TODO_USE(rootDirname); // <- TODO: !!!!!!
1131
+ TODO_USE(rootDirname); // <- TODO: [😡]
1125
1132
  cacheFilename = join.apply(void 0, __spreadArray(__spreadArray([process.cwd(),
1126
1133
  cacheDirname], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), [name], false)).split('\\')
1127
1134
  .join('/') +
@@ -1141,7 +1148,7 @@ function getScraperIntermediateSource(source, options) {
1141
1148
  return __generator(this, function (_a) {
1142
1149
  switch (_a.label) {
1143
1150
  case 0:
1144
- if (!isCacheCleaned) return [3 /*break*/, 2];
1151
+ if (!(intermediateFilesStrategy === 'HIDE_AND_CLEAN')) return [3 /*break*/, 2];
1145
1152
  if (isVerbose) {
1146
1153
  console.info('legacyDocumentScraper: Clening cache');
1147
1154
  }
@@ -1165,7 +1172,7 @@ function getScraperIntermediateSource(source, options) {
1165
1172
  /**
1166
1173
  * Note: Not using `FileCacheStorage` for two reasons:
1167
1174
  * 1) Need to store more than serialized JSONs
1168
- * 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO: !!!!
1175
+ * 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO: [😡]
1169
1176
  * TODO: [🐱‍🐉][🧠] Make some smart crop
1170
1177
  * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
1171
1178
  */
@@ -1230,7 +1237,6 @@ function pipelineJsonToString(pipelineJson) {
1230
1237
  pipelineString += '\n\n';
1231
1238
  pipelineString += description;
1232
1239
  }
1233
- // TODO:> const commands: Array<Command>
1234
1240
  var commands = [];
1235
1241
  if (pipelineUrl) {
1236
1242
  commands.push("PIPELINE URL ".concat(pipelineUrl));
@@ -1286,7 +1292,6 @@ function pipelineJsonToString(pipelineJson) {
1286
1292
  pipelineString += '\n\n';
1287
1293
  pipelineString += description_1;
1288
1294
  }
1289
- // TODO:> const commands: Array<Command>
1290
1295
  var commands_1 = [];
1291
1296
  var contentLanguage = 'text';
1292
1297
  if (templateType === 'PROMPT_TEMPLATE') {
@@ -2187,6 +2192,7 @@ function assertsExecutionSuccessful(executionResult) {
2187
2192
  }
2188
2193
  }
2189
2194
  /**
2195
+ * TODO: [🐚] This function should be removed OR changed OR be completely rewritten
2190
2196
  * TODO: [🧠] Can this return type be better typed than void
2191
2197
  */
2192
2198
 
@@ -2527,8 +2533,7 @@ function countTotalUsage(llmTools) {
2527
2533
  * Multiple LLM Execution Tools is a proxy server that uses multiple execution tools internally and exposes the executor interface externally.
2528
2534
  *
2529
2535
  * Note: Internal utility of `joinLlmExecutionTools` but exposed type
2530
- * @public exported from `@promptbook/types`
2531
- * TODO: !!!!!! Export as runtime class not just type
2536
+ * @public exported from `@promptbook/core`
2532
2537
  */
2533
2538
  var MultipleLlmExecutionTools = /** @class */ (function () {
2534
2539
  /**
@@ -2848,7 +2853,7 @@ function preparePersona(personaDescription, tools, options) {
2848
2853
  return __generator(this, function (_d) {
2849
2854
  switch (_d.label) {
2850
2855
  case 0:
2851
- _a = options.isVerbose, isVerbose = _a === void 0 ? IS_VERBOSE : _a;
2856
+ _a = options.isVerbose, isVerbose = _a === void 0 ? DEFAULT_IS_VERBOSE : _a;
2852
2857
  if (tools === undefined || tools.llm === undefined) {
2853
2858
  throw new MissingToolsError('LLM tools are required for preparing persona');
2854
2859
  }
@@ -2915,7 +2920,7 @@ var $scrapersRegister = new $Register('scraper_constructors');
2915
2920
  * TODO: [®] DRY Register logic
2916
2921
  */
2917
2922
 
2918
- // TODO: !!!!!! Maybe delete this function
2923
+ // TODO: !!!!!!last - Maybe delete this function
2919
2924
  /**
2920
2925
  * Creates a message with all registered scrapers
2921
2926
  *
@@ -3023,7 +3028,6 @@ function $registeredScrapersMessage() {
3023
3028
  * @private within the repository
3024
3029
  */
3025
3030
  function sourceContentToName(sourceContent) {
3026
- // TODO: !!!!!! Better name for source than gibberish hash
3027
3031
  var hash = SHA256(hexEncoder.parse(JSON.stringify(sourceContent)))
3028
3032
  // <- TODO: [🥬] Encapsulate sha256 to some private utility function
3029
3033
  .toString( /* hex */)
@@ -3097,7 +3101,7 @@ function isFileExisting(filename, fs) {
3097
3101
  /**
3098
3102
  * @@@
3099
3103
  *
3100
- * @private for scraper utilities
3104
+ * @public exported from `@promptbook/core`
3101
3105
  */
3102
3106
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3103
3107
  var _a;
@@ -3110,7 +3114,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3110
3114
  case 0:
3111
3115
  sourceContent = knowledgeSource.sourceContent;
3112
3116
  name = knowledgeSource.name;
3113
- _b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ? IS_VERBOSE : _d;
3117
+ _b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ? DEFAULT_IS_VERBOSE : _d;
3114
3118
  TODO_USE(isVerbose);
3115
3119
  if (!name) {
3116
3120
  name = sourceContentToName(sourceContent);
@@ -3200,7 +3204,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3200
3204
  content = _a.sent();
3201
3205
  return [2 /*return*/, new Blob([
3202
3206
  content,
3203
- // <- TODO: !!!!!! Maybe not working
3207
+ // <- TODO: !!!!!! Test that this is working
3204
3208
  ], { type: mimeType_1 })];
3205
3209
  }
3206
3210
  });
@@ -3263,7 +3267,7 @@ function prepareKnowledgePieces(knowledgeSources, tools, options) {
3263
3267
  return __generator(this, function (_c) {
3264
3268
  switch (_c.label) {
3265
3269
  case 0:
3266
- _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, rootDirname = options.rootDirname, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
3270
+ _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, rootDirname = options.rootDirname, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
3267
3271
  knowledgePreparedUnflatten = new Array(knowledgeSources.length);
3268
3272
  return [4 /*yield*/, forEachAsync(knowledgeSources, { maxParallelCount: maxParallelCount }, function (knowledgeSource, index) { return __awaiter(_this, void 0, void 0, function () {
3269
3273
  var partialPieces, sourceHandler, _a, _b, scraper, partialPiecesUnchecked, e_1_1, pieces;
@@ -3292,7 +3296,8 @@ function prepareKnowledgePieces(knowledgeSources, tools, options) {
3292
3296
  case 4:
3293
3297
  partialPiecesUnchecked = _d.sent();
3294
3298
  if (partialPiecesUnchecked !== null) {
3295
- partialPieces = partialPiecesUnchecked;
3299
+ partialPieces = __spreadArray([], __read(partialPiecesUnchecked), false);
3300
+ // <- TODO: [🪓] Here should be no need for spreading new array, just `partialPieces = partialPiecesUnchecked`
3296
3301
  return [3 /*break*/, 6];
3297
3302
  }
3298
3303
  _d.label = 5;
@@ -3340,7 +3345,7 @@ TODO: [🧊] This is how it can look in future
3340
3345
  > /**
3341
3346
  > * Unprepared knowledge
3342
3347
  > * /
3343
- > readonly knowledgeSources: Array<KnowledgeSourceJson>;
3348
+ > readonly knowledgeSources: ReadonlyArray<KnowledgeSourceJson>;
3344
3349
  > };
3345
3350
  >
3346
3351
  > export async function prepareKnowledgePieces(
@@ -3398,7 +3403,7 @@ function prepareTemplates(pipeline, tools, options) {
3398
3403
  return __generator(this, function (_b) {
3399
3404
  switch (_b.label) {
3400
3405
  case 0:
3401
- _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a;
3406
+ _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a;
3402
3407
  templates = pipeline.templates, parameters = pipeline.parameters, knowledgePiecesCount = pipeline.knowledgePiecesCount;
3403
3408
  // TODO: [main] !!!!! Apply samples to each template (if missing and is for the template defined)
3404
3409
  TODO_USE(parameters);
@@ -3460,7 +3465,7 @@ function preparePipeline(pipeline, tools, options) {
3460
3465
  if (isPipelinePrepared(pipeline)) {
3461
3466
  return [2 /*return*/, pipeline];
3462
3467
  }
3463
- rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
3468
+ rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
3464
3469
  parameters = pipeline.parameters, templates = pipeline.templates, knowledgeSources = pipeline.knowledgeSources, personas = pipeline.personas;
3465
3470
  if (tools === undefined || tools.llm === undefined) {
3466
3471
  throw new MissingToolsError('LLM tools are required for preparing the pipeline');
@@ -3518,7 +3523,9 @@ function preparePipeline(pipeline, tools, options) {
3518
3523
  // ----- /Templates preparation -----
3519
3524
  // Note: Count total usage
3520
3525
  currentPreparation.usage = llmToolsWithUsage.getTotalUsage();
3521
- return [2 /*return*/, $asDeeplyFrozenSerializableJson('Prepared PipelineJson', __assign(__assign({}, clonePipeline(pipeline)), { templates: templatesPrepared, knowledgeSources: knowledgeSourcesPrepared, knowledgePieces: knowledgePiecesPrepared, personas: preparedPersonas, preparations: preparations }))];
3526
+ return [2 /*return*/, $asDeeplyFrozenSerializableJson('Prepared PipelineJson', __assign(__assign({}, clonePipeline(pipeline)), { templates: __spreadArray([], __read(templatesPrepared), false),
3527
+ // <- TODO: [🪓] Here should be no need for spreading new array, just ` templates: templatesPrepared`
3528
+ knowledgeSources: knowledgeSourcesPrepared, knowledgePieces: knowledgePiecesPrepared, personas: preparedPersonas, preparations: __spreadArray([], __read(preparations), false) }))];
3522
3529
  }
3523
3530
  });
3524
3531
  });
@@ -4489,12 +4496,11 @@ function checkExpectations(expectations, value) {
4489
4496
  */
4490
4497
  function executeAttempts(options) {
4491
4498
  return __awaiter(this, void 0, void 0, function () {
4492
- var jokerParameterNames, priority, maxAttempts, preparedContent, parameters, template, preparedPipeline, tools, settings, $executionReport, pipelineIdentification, maxExecutionAttempts, $ongoingTemplateResult, _llms, llmTools, _loop_1, attempt, state_1;
4499
+ var jokerParameterNames, priority, maxAttempts, preparedContent, parameters, template, preparedPipeline, tools, $executionReport, pipelineIdentification, maxExecutionAttempts, $ongoingTemplateResult, _llms, llmTools, _loop_1, attempt, state_1;
4493
4500
  return __generator(this, function (_a) {
4494
4501
  switch (_a.label) {
4495
4502
  case 0:
4496
- jokerParameterNames = options.jokerParameterNames, priority = options.priority, maxAttempts = options.maxAttempts, preparedContent = options.preparedContent, parameters = options.parameters, template = options.template, preparedPipeline = options.preparedPipeline, tools = options.tools, settings = options.settings, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification;
4497
- maxExecutionAttempts = settings.maxExecutionAttempts;
4503
+ jokerParameterNames = options.jokerParameterNames, priority = options.priority, maxAttempts = options.maxAttempts, preparedContent = options.preparedContent, parameters = options.parameters, template = options.template, preparedPipeline = options.preparedPipeline, tools = options.tools, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification, maxExecutionAttempts = options.maxExecutionAttempts;
4498
4504
  $ongoingTemplateResult = {
4499
4505
  $result: null,
4500
4506
  $resultString: null,
@@ -4860,12 +4866,12 @@ function executeAttempts(options) {
4860
4866
  */
4861
4867
  function executeFormatSubvalues(options) {
4862
4868
  return __awaiter(this, void 0, void 0, function () {
4863
- var template, jokerParameterNames, parameters, priority, pipelineIdentification, settings, parameterValue, formatDefinition, subvalueDefinition, formatSettings, resultString;
4869
+ var template, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification, parameterValue, formatDefinition, subvalueDefinition, formatSettings, resultString;
4864
4870
  var _this = this;
4865
4871
  return __generator(this, function (_a) {
4866
4872
  switch (_a.label) {
4867
4873
  case 0:
4868
- template = options.template, jokerParameterNames = options.jokerParameterNames, parameters = options.parameters, priority = options.priority, pipelineIdentification = options.pipelineIdentification, settings = options.settings;
4874
+ template = options.template, jokerParameterNames = options.jokerParameterNames, parameters = options.parameters, priority = options.priority, csvSettings = options.csvSettings, pipelineIdentification = options.pipelineIdentification;
4869
4875
  if (template.foreach === undefined) {
4870
4876
  return [2 /*return*/, /* not await */ executeAttempts(options)];
4871
4877
  }
@@ -4895,7 +4901,7 @@ function executeFormatSubvalues(options) {
4895
4901
  .join('\n')), "\n\n [\u26F7] This should never happen because subformat name should be validated during parsing\n\n ").concat(block(pipelineIdentification), "\n "); }));
4896
4902
  }
4897
4903
  if (formatDefinition.formatName === 'CSV') {
4898
- formatSettings = settings.csvSettings;
4904
+ formatSettings = csvSettings;
4899
4905
  // <- TODO: [🤹‍♂️] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
4900
4906
  }
4901
4907
  return [4 /*yield*/, subvalueDefinition.mapValues(parameterValue, template.foreach.outputSubparameterName, formatSettings, function (subparameters, index) { return __awaiter(_this, void 0, void 0, function () {
@@ -5048,13 +5054,12 @@ function getReservedParametersForTemplate(options) {
5048
5054
  */
5049
5055
  function executeTemplate(options) {
5050
5056
  return __awaiter(this, void 0, void 0, function () {
5051
- var currentTemplate, preparedPipeline, parametersToPass, tools, onProgress, settings, $executionReport, pipelineIdentification, maxExecutionAttempts, name, title, priority, usedParameterNames, dependentParameterNames, definedParameters, _a, _b, _c, definedParameterNames, parameters, _loop_1, _d, _e, parameterName, maxAttempts, jokerParameterNames, preparedContent, resultString;
5052
- var e_1, _f, _g;
5053
- return __generator(this, function (_h) {
5054
- switch (_h.label) {
5057
+ var currentTemplate, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, _a, maxExecutionAttempts, name, title, priority, usedParameterNames, dependentParameterNames, definedParameters, _b, _c, _d, definedParameterNames, parameters, _loop_1, _e, _f, parameterName, maxAttempts, jokerParameterNames, preparedContent, resultString;
5058
+ var e_1, _g, _h;
5059
+ return __generator(this, function (_j) {
5060
+ switch (_j.label) {
5055
5061
  case 0:
5056
- currentTemplate = options.currentTemplate, preparedPipeline = options.preparedPipeline, parametersToPass = options.parametersToPass, tools = options.tools, onProgress = options.onProgress, settings = options.settings, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification;
5057
- maxExecutionAttempts = settings.maxExecutionAttempts;
5062
+ currentTemplate = options.currentTemplate, preparedPipeline = options.preparedPipeline, parametersToPass = options.parametersToPass, tools = options.tools, onProgress = options.onProgress, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification, _a = options.maxExecutionAttempts, maxExecutionAttempts = _a === void 0 ? DEFAULT_MAX_EXECUTION_ATTEMPTS : _a;
5058
5063
  name = "pipeline-executor-frame-".concat(currentTemplate.name);
5059
5064
  title = currentTemplate.title;
5060
5065
  priority = preparedPipeline.templates.length - preparedPipeline.templates.indexOf(currentTemplate);
@@ -5069,7 +5074,7 @@ function executeTemplate(options) {
5069
5074
  // <- [🍸]
5070
5075
  })];
5071
5076
  case 1:
5072
- _h.sent();
5077
+ _j.sent();
5073
5078
  usedParameterNames = extractParameterNamesFromTemplate(currentTemplate);
5074
5079
  dependentParameterNames = new Set(currentTemplate.dependentParameterNames);
5075
5080
  // TODO: [👩🏾‍🤝‍👩🏻] Use here `mapAvailableToExpectedParameters`
@@ -5080,15 +5085,15 @@ function executeTemplate(options) {
5080
5085
  .map(function (name) { return "{".concat(name, "}"); })
5081
5086
  .join(', '), "\n\n ").concat(block(pipelineIdentification), "\n\n "); }));
5082
5087
  }
5083
- _b = (_a = Object).freeze;
5084
- _c = [{}];
5088
+ _c = (_b = Object).freeze;
5089
+ _d = [{}];
5085
5090
  return [4 /*yield*/, getReservedParametersForTemplate({
5086
5091
  preparedPipeline: preparedPipeline,
5087
5092
  template: currentTemplate,
5088
5093
  pipelineIdentification: pipelineIdentification,
5089
5094
  })];
5090
5095
  case 2:
5091
- definedParameters = _b.apply(_a, [__assign.apply(void 0, [__assign.apply(void 0, _c.concat([(_h.sent())])), parametersToPass])]);
5096
+ definedParameters = _c.apply(_b, [__assign.apply(void 0, [__assign.apply(void 0, _d.concat([(_j.sent())])), parametersToPass])]);
5092
5097
  definedParameterNames = new Set(Object.keys(definedParameters));
5093
5098
  parameters = {};
5094
5099
  _loop_1 = function (parameterName) {
@@ -5108,15 +5113,15 @@ function executeTemplate(options) {
5108
5113
  try {
5109
5114
  // Note: [2] Check that all used parameters are defined and removing unused parameters for this template
5110
5115
  // TODO: [👩🏾‍🤝‍👩🏻] Use here `mapAvailableToExpectedParameters`
5111
- for (_d = __values(Array.from(union(definedParameterNames, usedParameterNames, dependentParameterNames))), _e = _d.next(); !_e.done; _e = _d.next()) {
5112
- parameterName = _e.value;
5116
+ for (_e = __values(Array.from(union(definedParameterNames, usedParameterNames, dependentParameterNames))), _f = _e.next(); !_f.done; _f = _e.next()) {
5117
+ parameterName = _f.value;
5113
5118
  _loop_1(parameterName);
5114
5119
  }
5115
5120
  }
5116
5121
  catch (e_1_1) { e_1 = { error: e_1_1 }; }
5117
5122
  finally {
5118
5123
  try {
5119
- if (_e && !_e.done && (_f = _d.return)) _f.call(_d);
5124
+ if (_f && !_f.done && (_g = _e.return)) _g.call(_e);
5120
5125
  }
5121
5126
  finally { if (e_1) throw e_1.error; }
5122
5127
  }
@@ -5136,12 +5141,11 @@ function executeTemplate(options) {
5136
5141
  template: currentTemplate,
5137
5142
  preparedPipeline: preparedPipeline,
5138
5143
  tools: tools,
5139
- settings: settings,
5140
5144
  $executionReport: $executionReport,
5141
5145
  pipelineIdentification: pipelineIdentification,
5142
5146
  })];
5143
5147
  case 3:
5144
- resultString = _h.sent();
5148
+ resultString = _j.sent();
5145
5149
  return [4 /*yield*/, onProgress({
5146
5150
  name: name,
5147
5151
  title: title,
@@ -5153,12 +5157,12 @@ function executeTemplate(options) {
5153
5157
  // <- [🍸]
5154
5158
  })];
5155
5159
  case 4:
5156
- _h.sent();
5157
- return [2 /*return*/, Object.freeze((_g = {},
5158
- _g[currentTemplate.resultingParameterName] =
5160
+ _j.sent();
5161
+ return [2 /*return*/, Object.freeze((_h = {},
5162
+ _h[currentTemplate.resultingParameterName] =
5159
5163
  // <- Note: [👩‍👩‍👧] No need to detect parameter collision here because pipeline checks logic consistency during construction
5160
5164
  resultString,
5161
- _g))];
5165
+ _h))];
5162
5166
  }
5163
5167
  });
5164
5168
  });
@@ -5217,13 +5221,12 @@ function filterJustOutputParameters(options) {
5217
5221
  */
5218
5222
  function executePipeline(options) {
5219
5223
  return __awaiter(this, void 0, void 0, function () {
5220
- var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification, settings, maxParallelCount, rootDirname, _a, isVerbose, preparedPipeline, errors, warnings, executionReport, isReturned, _b, _c, parameter, e_1_1, _loop_1, _d, _e, parameterName, state_1, e_2_1, parametersToPass, resovedParameterNames_1, unresovedTemplates_1, resolving_1, loopLimit, _loop_2, error_1, usage_1, outputParameters_1, usage, outputParameters;
5224
+ var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification, maxParallelCount, rootDirname, _a, isVerbose, preparedPipeline, errors, warnings, executionReport, isReturned, _b, _c, parameter, e_1_1, _loop_1, _d, _e, parameterName, state_1, e_2_1, parametersToPass, resovedParameterNames_1, unresovedTemplates_1, resolving_1, loopLimit, _loop_2, error_1, usage_1, outputParameters_1, usage, outputParameters;
5221
5225
  var e_1, _f, e_2, _g;
5222
5226
  return __generator(this, function (_h) {
5223
5227
  switch (_h.label) {
5224
5228
  case 0:
5225
- inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification, settings = options.settings;
5226
- maxParallelCount = settings.maxParallelCount, rootDirname = settings.rootDirname, _a = settings.isVerbose, isVerbose = _a === void 0 ? IS_VERBOSE : _a;
5229
+ inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification, maxParallelCount = options.maxParallelCount, rootDirname = options.rootDirname, _a = options.isVerbose, isVerbose = _a === void 0 ? DEFAULT_IS_VERBOSE : _a;
5227
5230
  preparedPipeline = options.preparedPipeline;
5228
5231
  if (!(preparedPipeline === undefined)) return [3 /*break*/, 2];
5229
5232
  return [4 /*yield*/, preparePipeline(pipeline, tools, {
@@ -5408,12 +5411,7 @@ function executePipeline(options) {
5408
5411
  return [3 /*break*/, 4];
5409
5412
  case 3:
5410
5413
  unresovedTemplates_1 = unresovedTemplates_1.filter(function (template) { return template !== currentTemplate; });
5411
- work_1 = executeTemplate({
5412
- currentTemplate: currentTemplate,
5413
- preparedPipeline: preparedPipeline,
5414
- parametersToPass: parametersToPass,
5415
- tools: tools,
5416
- onProgress: function (progress) {
5414
+ work_1 = executeTemplate(__assign(__assign({}, options), { currentTemplate: currentTemplate, preparedPipeline: preparedPipeline, parametersToPass: parametersToPass, tools: tools, onProgress: function (progress) {
5417
5415
  if (isReturned) {
5418
5416
  throw new UnexpectedError(spaceTrim(function (block) { return "\n Can not call `onProgress` after pipeline execution is finished\n\n ".concat(block(pipelineIdentification), "\n\n ").concat(block(JSON.stringify(progress, null, 4)
5419
5417
  .split('\n')
@@ -5423,11 +5421,7 @@ function executePipeline(options) {
5423
5421
  if (onProgress) {
5424
5422
  onProgress(progress);
5425
5423
  }
5426
- },
5427
- settings: settings,
5428
- $executionReport: executionReport,
5429
- pipelineIdentification: spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n Template name: ").concat(currentTemplate.name, "\n Template title: ").concat(currentTemplate.title, "\n "); }),
5430
- })
5424
+ }, $executionReport: executionReport, pipelineIdentification: spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n Template name: ").concat(currentTemplate.name, "\n Template title: ").concat(currentTemplate.title, "\n "); }) }))
5431
5425
  .then(function (newParametersToPass) {
5432
5426
  parametersToPass = __assign(__assign({}, newParametersToPass), parametersToPass);
5433
5427
  resovedParameterNames_1 = __spreadArray(__spreadArray([], __read(resovedParameterNames_1), false), [currentTemplate.resultingParameterName], false);
@@ -5531,8 +5525,7 @@ function executePipeline(options) {
5531
5525
  */
5532
5526
  function createPipelineExecutor(options) {
5533
5527
  var _this = this;
5534
- var pipeline = options.pipeline, tools = options.tools, _a = options.settings, settings = _a === void 0 ? {} : _a;
5535
- var _b = settings.maxExecutionAttempts, maxExecutionAttempts = _b === void 0 ? MAX_EXECUTION_ATTEMPTS : _b, _c = settings.maxParallelCount, maxParallelCount = _c === void 0 ? MAX_PARALLEL_COUNT : _c, _d = settings.csvSettings, csvSettings = _d === void 0 ? DEFAULT_CSV_SETTINGS : _d, _e = settings.isVerbose, isVerbose = _e === void 0 ? IS_VERBOSE : _e, _f = settings.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _f === void 0 ? false : _f, _g = settings.rootDirname, rootDirname = _g === void 0 ? null : _g;
5528
+ var pipeline = options.pipeline, tools = options.tools, _a = options.maxExecutionAttempts, maxExecutionAttempts = _a === void 0 ? DEFAULT_MAX_EXECUTION_ATTEMPTS : _a, _b = options.maxParallelCount, maxParallelCount = _b === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _b, _c = options.csvSettings, csvSettings = _c === void 0 ? DEFAULT_CSV_SETTINGS : _c, _d = options.isVerbose, isVerbose = _d === void 0 ? DEFAULT_IS_VERBOSE : _d, _e = options.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _e === void 0 ? false : _e, _f = options.rootDirname, rootDirname = _f === void 0 ? null : _f;
5536
5529
  validatePipeline(pipeline);
5537
5530
  var pipelineIdentification = (function () {
5538
5531
  // Note: This is a 😐 implementation of [🚞]
@@ -5566,14 +5559,12 @@ function createPipelineExecutor(options) {
5566
5559
  tools: tools,
5567
5560
  onProgress: onProgress,
5568
5561
  pipelineIdentification: spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n ").concat(runCount === 1 ? '' : "Run #".concat(runCount), "\n "); }),
5569
- settings: {
5570
- maxExecutionAttempts: maxExecutionAttempts,
5571
- maxParallelCount: maxParallelCount,
5572
- csvSettings: csvSettings,
5573
- isVerbose: isVerbose,
5574
- isNotPreparedWarningSupressed: isNotPreparedWarningSupressed,
5575
- rootDirname: rootDirname,
5576
- },
5562
+ maxExecutionAttempts: maxExecutionAttempts,
5563
+ maxParallelCount: maxParallelCount,
5564
+ csvSettings: csvSettings,
5565
+ isVerbose: isVerbose,
5566
+ isNotPreparedWarningSupressed: isNotPreparedWarningSupressed,
5567
+ rootDirname: rootDirname,
5577
5568
  })];
5578
5569
  });
5579
5570
  }); };
@@ -5595,7 +5586,7 @@ var markdownScraperMetadata = $deepFreeze({
5595
5586
  mimeTypes: ['text/markdown', 'text/plain'],
5596
5587
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5597
5588
  isAvilableInBrowser: true,
5598
- requiredExecutables: ['!!!!!!'],
5589
+ requiredExecutables: [],
5599
5590
  }); /* <- TODO: [🤛] */
5600
5591
  /**
5601
5592
  * Registration of known scraper metadata
@@ -5639,7 +5630,7 @@ var MarkdownScraper = /** @class */ (function () {
5639
5630
  return __generator(this, function (_k) {
5640
5631
  switch (_k.label) {
5641
5632
  case 0:
5642
- _a = this.options, _b = _a.maxParallelCount, maxParallelCount = _b === void 0 ? MAX_PARALLEL_COUNT : _b, _c = _a.isVerbose, isVerbose = _c === void 0 ? IS_VERBOSE : _c;
5633
+ _a = this.options, _b = _a.maxParallelCount, maxParallelCount = _b === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _b, _c = _a.isVerbose, isVerbose = _c === void 0 ? DEFAULT_IS_VERBOSE : _c;
5643
5634
  llm = this.tools.llm;
5644
5635
  if (llm === undefined) {
5645
5636
  throw new MissingToolsError('LLM tools are required for scraping external files');
@@ -5738,7 +5729,8 @@ var MarkdownScraper = /** @class */ (function () {
5738
5729
  embeddingResult = _c.sent();
5739
5730
  index.push({
5740
5731
  modelName: embeddingResult.modelName,
5741
- position: embeddingResult.content,
5732
+ position: __spreadArray([], __read(embeddingResult.content), false),
5733
+ // <- TODO: [🪓] Here should be no need for spreading new array, just `position: embeddingResult.content`
5742
5734
  });
5743
5735
  _c.label = 6;
5744
5736
  case 6: return [3 /*break*/, 8];
@@ -5778,32 +5770,29 @@ var MarkdownScraper = /** @class */ (function () {
5778
5770
  */
5779
5771
 
5780
5772
  /**
5781
- * A converter instance that uses showdown and highlight extensions
5773
+ * Create a new showdown converter instance
5782
5774
  *
5783
- * @type {Converter}
5784
- * @private for markdown and html knowledge scrapers
5785
- */
5786
- var markdownConverter = new Converter({
5787
- flavor: 'github', // <- TODO: !!!!!! Explicitly specify the flavor of promptbook markdown
5788
- /*
5789
- > import showdownHighlight from 'showdown-highlight';
5790
- > extensions: [
5791
- > showdownHighlight({
5792
- > // Whether to add the classes to the <pre> tag, default is false
5793
- > pre: true,
5794
- > // Whether to use hljs' auto language detection, default is true
5795
- > auto_detection: true,
5796
- > }),
5797
- > ],
5798
- */
5799
- });
5800
- /**
5801
- * TODO: !!!!!! Figure out better name not to confuse with `Converter`
5802
- * TODO: !!!!!! Lazy-make converter
5775
+ * @private utility of `WebsiteScraper`
5803
5776
  */
5777
+ function createShowdownConverter() {
5778
+ return new Converter({
5779
+ flavor: 'github',
5780
+ /*
5781
+ > import showdownHighlight from 'showdown-highlight';
5782
+ > extensions: [
5783
+ > showdownHighlight({
5784
+ > // Whether to add the classes to the <pre> tag, default is false
5785
+ > pre: true,
5786
+ > // Whether to use hljs' auto language detection, default is true
5787
+ > auto_detection: true,
5788
+ > }),
5789
+ > ],
5790
+ */
5791
+ });
5792
+ }
5804
5793
 
5805
5794
  /**
5806
- * Scraper for .docx files
5795
+ * Scraper for websites
5807
5796
  *
5808
5797
  * @see `documentationUrl` for more details
5809
5798
  * @public exported from `@promptbook/website-crawler`
@@ -5813,6 +5802,7 @@ var WebsiteScraper = /** @class */ (function () {
5813
5802
  this.tools = tools;
5814
5803
  this.options = options;
5815
5804
  this.markdownScraper = new MarkdownScraper(tools, options);
5805
+ this.showdownConverter = createShowdownConverter();
5816
5806
  }
5817
5807
  Object.defineProperty(WebsiteScraper.prototype, "metadata", {
5818
5808
  /**
@@ -5833,12 +5823,11 @@ var WebsiteScraper = /** @class */ (function () {
5833
5823
  return __awaiter(this, void 0, void 0, function () {
5834
5824
  var _a, _b,
5835
5825
  // TODO: [🧠] Maybe in node use headless browser not just JSDOM
5836
- // externalProgramsPaths = {},
5837
- rootDirname, _c, cacheDirname, _d, isCacheCleaned, _e, isVerbose, jsdom, _f, reader, article, html, i, cacheFilehandler, markdown;
5826
+ rootDirname, _c, cacheDirname, _d, intermediateFilesStrategy, _e, isVerbose, jsdom, _f, reader, article, html, i, cacheFilehandler, markdown;
5838
5827
  return __generator(this, function (_g) {
5839
5828
  switch (_g.label) {
5840
5829
  case 0:
5841
- _a = this.options, _b = _a.rootDirname, rootDirname = _b === void 0 ? process.cwd() : _b, _c = _a.cacheDirname, cacheDirname = _c === void 0 ? SCRAPE_CACHE_DIRNAME : _c, _d = _a.isCacheCleaned, isCacheCleaned = _d === void 0 ? false : _d, _e = _a.isVerbose, isVerbose = _e === void 0 ? IS_VERBOSE : _e;
5830
+ _a = this.options, _b = _a.rootDirname, rootDirname = _b === void 0 ? process.cwd() : _b, _c = _a.cacheDirname, cacheDirname = _c === void 0 ? DEFAULT_SCRAPE_CACHE_DIRNAME : _c, _d = _a.intermediateFilesStrategy, intermediateFilesStrategy = _d === void 0 ? DEFAULT_INTERMEDIATE_FILES_STRATEGY : _d, _e = _a.isVerbose, isVerbose = _e === void 0 ? DEFAULT_IS_VERBOSE : _e;
5842
5831
  // TODO: !!!!!! Does this work in browser? Make it work.
5843
5832
  if (source.url === null) {
5844
5833
  throw new KnowledgeScrapeError('Website scraper requires URL');
@@ -5851,10 +5840,6 @@ var WebsiteScraper = /** @class */ (function () {
5851
5840
  }]))();
5852
5841
  reader = new Readability(jsdom.window.document);
5853
5842
  article = reader.parse();
5854
- console.log(article);
5855
- return [4 /*yield*/, forTime(10000)];
5856
- case 2:
5857
- _g.sent();
5858
5843
  html = (article === null || article === void 0 ? void 0 : article.content) || (article === null || article === void 0 ? void 0 : article.textContent) || jsdom.window.document.body.innerHTML;
5859
5844
  // Note: Unwrap html such as it is convertable by `markdownConverter`
5860
5845
  for (i = 0; i < 2; i++) {
@@ -5866,16 +5851,16 @@ var WebsiteScraper = /** @class */ (function () {
5866
5851
  return [4 /*yield*/, getScraperIntermediateSource(source, {
5867
5852
  rootDirname: rootDirname,
5868
5853
  cacheDirname: cacheDirname,
5869
- isCacheCleaned: isCacheCleaned,
5854
+ intermediateFilesStrategy: intermediateFilesStrategy,
5870
5855
  extension: 'html',
5871
5856
  isVerbose: isVerbose,
5872
5857
  })];
5873
- case 3:
5858
+ case 2:
5874
5859
  cacheFilehandler = _g.sent();
5875
5860
  return [4 /*yield*/, writeFile(cacheFilehandler.filename, html, 'utf-8')];
5876
- case 4:
5861
+ case 3:
5877
5862
  _g.sent();
5878
- markdown = markdownConverter.makeMarkdown(html, jsdom.window.document);
5863
+ markdown = this.showdownConverter.makeMarkdown(html, jsdom.window.document);
5879
5864
  return [2 /*return*/, __assign(__assign({}, cacheFilehandler), { markdown: markdown })];
5880
5865
  }
5881
5866
  });
@@ -5919,7 +5904,6 @@ var WebsiteScraper = /** @class */ (function () {
5919
5904
  return WebsiteScraper;
5920
5905
  }());
5921
5906
  /**
5922
- * TODO: !!!!!! Put into separate package
5923
5907
  * TODO: [👣] Scraped website in .md can act as cache item - there is no need to run conversion each time
5924
5908
  * TODO: [🪂] Do it in parallel 11:11
5925
5909
  * Note: No need to aggregate usage here, it is done by intercepting the llmTools