@promptbook/website-crawler 0.72.0-13 → 0.72.0-15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/README.md +1 -1
  2. package/esm/index.es.js +98 -114
  3. package/esm/index.es.js.map +1 -1
  4. package/esm/typings/src/_packages/core.index.d.ts +22 -18
  5. package/esm/typings/src/_packages/node.index.d.ts +2 -0
  6. package/esm/typings/src/_packages/types.index.d.ts +10 -10
  7. package/esm/typings/src/_packages/utils.index.d.ts +2 -2
  8. package/esm/typings/src/collection/PipelineCollection.d.ts +1 -1
  9. package/esm/typings/src/collection/SimplePipelineCollection.d.ts +2 -2
  10. package/esm/typings/src/collection/collectionToJson.d.ts +1 -1
  11. package/esm/typings/src/collection/constructors/createCollectionFromJson.d.ts +1 -1
  12. package/esm/typings/src/collection/constructors/createCollectionFromPromise.d.ts +1 -1
  13. package/esm/typings/src/commands/_common/types/CommandParser.d.ts +5 -5
  14. package/esm/typings/src/config.d.ts +21 -14
  15. package/esm/typings/src/execution/EmbeddingVector.d.ts +1 -1
  16. package/esm/typings/src/execution/Executables.d.ts +18 -0
  17. package/esm/typings/src/execution/ExecutionTools.d.ts +9 -3
  18. package/esm/typings/src/execution/LlmExecutionTools.d.ts +1 -1
  19. package/esm/typings/src/execution/PipelineExecutorResult.d.ts +2 -2
  20. package/esm/typings/src/execution/assertsExecutionSuccessful.d.ts +1 -0
  21. package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorOptions.d.ts +29 -6
  22. package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +2 -11
  23. package/esm/typings/src/execution/createPipelineExecutor/20-executeTemplate.d.ts +4 -13
  24. package/esm/typings/src/execution/createPipelineExecutor/40-executeAttempts.d.ts +9 -14
  25. package/esm/typings/src/execution/translation/automatic-translate/automatic-translators/LindatAutomaticTranslator.d.ts +11 -3
  26. package/esm/typings/src/execution/utils/addUsage.d.ts +1 -1
  27. package/esm/typings/src/execution/utils/forEachAsync.d.ts +1 -1
  28. package/esm/typings/src/formats/_common/FormatDefinition.d.ts +2 -2
  29. package/esm/typings/src/formats/_common/FormatSubvalueDefinition.d.ts +2 -2
  30. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsForCli.d.ts +2 -2
  31. package/esm/typings/src/llm-providers/_common/register/LlmToolsConfiguration.d.ts +1 -1
  32. package/esm/typings/src/llm-providers/_common/register/createLlmToolsFromConfiguration.d.ts +7 -0
  33. package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +4 -1
  34. package/esm/typings/src/llm-providers/anthropic-claude/AnthropicClaudeExecutionTools.d.ts +1 -1
  35. package/esm/typings/src/llm-providers/anthropic-claude/anthropic-claude-models.d.ts +1 -1
  36. package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +1 -1
  37. package/esm/typings/src/llm-providers/mocked/$fakeTextToExpectations.d.ts +1 -1
  38. package/esm/typings/src/llm-providers/mocked/MockedEchoLlmExecutionTools.d.ts +1 -1
  39. package/esm/typings/src/llm-providers/mocked/MockedFackedLlmExecutionTools.d.ts +1 -1
  40. package/esm/typings/src/llm-providers/multiple/MultipleLlmExecutionTools.d.ts +4 -5
  41. package/esm/typings/src/llm-providers/multiple/joinLlmExecutionTools.d.ts +1 -1
  42. package/esm/typings/src/llm-providers/openai/OpenAiExecutionTools.d.ts +1 -1
  43. package/esm/typings/src/llm-providers/openai/openai-models.d.ts +1 -1
  44. package/esm/typings/src/llm-providers/remote/RemoteLlmExecutionTools.d.ts +1 -1
  45. package/esm/typings/src/llm-providers/remote/interfaces/PromptbookServer_ListModels_Response.d.ts +3 -3
  46. package/esm/typings/src/llm-providers/remote/interfaces/PromptbookServer_Prompt_Response.d.ts +2 -2
  47. package/esm/typings/src/prepare/PrepareAndScrapeOptions.d.ts +4 -23
  48. package/esm/typings/src/prepare/prepareTemplates.d.ts +1 -1
  49. package/esm/typings/src/scrapers/_common/Scraper.d.ts +1 -5
  50. package/esm/typings/src/scrapers/_common/prepareKnowledgePieces.d.ts +1 -1
  51. package/esm/typings/src/scrapers/_common/register/$provideExecutablesForNode.d.ts +12 -0
  52. package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +2 -2
  53. package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +2 -2
  54. package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +2 -2
  55. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +2 -2
  56. package/esm/typings/src/scrapers/document/DocumentScraper.d.ts +2 -2
  57. package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
  58. package/esm/typings/src/scrapers/document/register-metadata.d.ts +1 -1
  59. package/esm/typings/src/scrapers/document-legacy/LegacyDocumentScraper.d.ts +3 -3
  60. package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +1 -1
  61. package/esm/typings/src/scrapers/document-legacy/register-metadata.d.ts +1 -1
  62. package/esm/typings/src/scrapers/markdown/MarkdownScraper.d.ts +1 -1
  63. package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +1 -1
  64. package/esm/typings/src/scrapers/markdown/register-metadata.d.ts +1 -1
  65. package/esm/typings/src/scrapers/pdf/PdfScraper.d.ts +2 -2
  66. package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +1 -1
  67. package/esm/typings/src/scrapers/pdf/register-metadata.d.ts +1 -1
  68. package/esm/typings/src/scrapers/website/WebsiteScraper.d.ts +6 -3
  69. package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +1 -1
  70. package/esm/typings/src/scrapers/website/register-metadata.d.ts +1 -1
  71. package/esm/typings/src/scrapers/website/utils/createShowdownConverter.d.ts +7 -0
  72. package/esm/typings/src/scripting/javascript/utils/preserve.d.ts +1 -1
  73. package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +1 -1
  74. package/esm/typings/src/types/Arrayable.d.ts +1 -1
  75. package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +7 -0
  76. package/esm/typings/src/types/PipelineJson/KnowledgePieceJson.d.ts +4 -4
  77. package/esm/typings/src/types/PipelineJson/KnowledgeSourceJson.d.ts +1 -1
  78. package/esm/typings/src/types/PipelineJson/PersonaJson.d.ts +1 -1
  79. package/esm/typings/src/types/PipelineJson/TemplateJsonCommon.d.ts +2 -2
  80. package/esm/typings/src/types/Prompt.d.ts +1 -1
  81. package/esm/typings/src/types/execution-report/ExecutionReportJson.d.ts +1 -1
  82. package/esm/typings/src/utils/$Register.d.ts +1 -1
  83. package/esm/typings/src/utils/FromtoItems.d.ts +1 -1
  84. package/esm/typings/src/utils/arrayableToArray.d.ts +1 -1
  85. package/esm/typings/src/utils/emojis.d.ts +1 -1
  86. package/esm/typings/src/utils/execCommand/$execCommand.d.ts +2 -2
  87. package/esm/typings/src/utils/execCommand/{IExecCommandOptions.d.ts → ExecCommandOptions.d.ts} +2 -6
  88. package/esm/typings/src/utils/execCommand/execCommandNormalizeOptions.d.ts +3 -3
  89. package/esm/typings/src/utils/expectation-counters/countSentences.d.ts +1 -1
  90. package/esm/typings/src/utils/markdown/extractAllBlocksFromMarkdown.d.ts +1 -1
  91. package/esm/typings/src/utils/markdown/splitMarkdownIntoSections.d.ts +1 -1
  92. package/esm/typings/src/utils/normalization/IKeywords.d.ts +2 -2
  93. package/esm/typings/src/utils/normalization/parseKeywords.d.ts +2 -2
  94. package/esm/typings/src/utils/normalization/parseKeywordsFromString.d.ts +2 -2
  95. package/esm/typings/src/utils/normalization/searchKeywords.d.ts +2 -2
  96. package/esm/typings/src/utils/organization/TODO_USE.d.ts +1 -1
  97. package/esm/typings/src/utils/organization/keepUnused.d.ts +1 -1
  98. package/esm/typings/src/utils/random/$randomSeed.d.ts +1 -1
  99. package/esm/typings/src/utils/sets/intersection.d.ts +1 -1
  100. package/esm/typings/src/utils/sets/union.d.ts +1 -1
  101. package/esm/typings/src/utils/unwrapResult.d.ts +4 -4
  102. package/package.json +4 -3
  103. package/umd/index.umd.js +101 -117
  104. package/umd/index.umd.js.map +1 -1
  105. package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorSettings.d.ts +0 -29
  106. package/esm/typings/src/scrapers/website/utils/markdownConverter.d.ts +0 -12
  107. /package/esm/typings/src/scrapers/website/utils/{markdownConverter.test.d.ts → createShowdownConverter.test.d.ts} +0 -0
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  # ![Promptbook logo - cube with letters P and B](./other/design/logo-h1.png) Promptbook
4
4
 
5
- Supercharge your use of large language models
5
+ Build responsible, controlled and transparent applications on top of LLM models!
6
6
 
7
7
 
8
8
 
package/esm/index.es.js CHANGED
@@ -2,12 +2,12 @@ import spaceTrim$1, { spaceTrim } from 'spacetrim';
2
2
  import { Readability } from '@mozilla/readability';
3
3
  import { mkdir, rm, writeFile } from 'fs/promises';
4
4
  import { JSDOM } from 'jsdom';
5
- import { forTime } from 'waitasecond';
6
5
  import { SHA256 } from 'crypto-js';
7
6
  import hexEncoder from 'crypto-js/enc-hex';
8
7
  import { basename, join, dirname } from 'path';
9
8
  import { format } from 'prettier';
10
9
  import parserHtml from 'prettier/parser-html';
10
+ import { forTime } from 'waitasecond';
11
11
  import { lookup } from 'mime-types';
12
12
  import { unparse, parse } from 'papaparse';
13
13
  import { Converter } from 'showdown';
@@ -16,7 +16,7 @@ import { Converter } from 'showdown';
16
16
  /**
17
17
  * The version of the Promptbook library
18
18
  */
19
- var PROMPTBOOK_VERSION = '0.72.0-12';
19
+ var PROMPTBOOK_VERSION = '0.72.0-14';
20
20
  // TODO: [main] !!!! List here all the versions and annotate + put into script
21
21
 
22
22
  /*! *****************************************************************************
@@ -364,18 +364,25 @@ var LOOP_LIMIT = 1000;
364
364
  * @private within the repository - too low-level in comparison with other `MAX_...`
365
365
  */
366
366
  var IMMEDIATE_TIME = 10;
367
+ /**
368
+ * Strategy for caching the intermediate results for knowledge sources
369
+ *
370
+ * @public exported from `@promptbook/core`
371
+ */
372
+ var DEFAULT_INTERMEDIATE_FILES_STRATEGY = 'HIDE_AND_KEEP';
373
+ // <- TODO: [😡] Change to 'VISIBLE'
367
374
  /**
368
375
  * The maximum number of (LLM) tasks running in parallel
369
376
  *
370
377
  * @public exported from `@promptbook/core`
371
378
  */
372
- var MAX_PARALLEL_COUNT = 5; // <- TODO: [🤹‍♂️]
379
+ var DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [🤹‍♂️]
373
380
  /**
374
381
  * The maximum number of attempts to execute LLM task before giving up
375
382
  *
376
383
  * @public exported from `@promptbook/core`
377
384
  */
378
- var MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹‍♂️]
385
+ var DEFAULT_MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹‍♂️]
379
386
  /**
380
387
  * Where to store the scrape cache
381
388
  *
@@ -383,7 +390,7 @@ var MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹‍♂️]
383
390
  *
384
391
  * @public exported from `@promptbook/core`
385
392
  */
386
- var SCRAPE_CACHE_DIRNAME = '/.promptbook/scrape-cache';
393
+ var DEFAULT_SCRAPE_CACHE_DIRNAME = '/.promptbook/scrape-cache';
387
394
  /**
388
395
  * Nonce which is used for replacing things in strings
389
396
  *
@@ -435,7 +442,7 @@ var DEFAULT_CSV_SETTINGS = Object.freeze({
435
442
  *
436
443
  * @public exported from `@promptbook/core`
437
444
  */
438
- var IS_VERBOSE = false;
445
+ var DEFAULT_IS_VERBOSE = false;
439
446
  /**
440
447
  * @@@
441
448
  *
@@ -576,7 +583,7 @@ var $Register = /** @class */ (function () {
576
583
  this.storage = globalScope[storageName];
577
584
  }
578
585
  $Register.prototype.list = function () {
579
- // <- TODO: ReadonlyDeep<Array<TRegistered>>
586
+ // <- TODO: ReadonlyDeep<ReadonlyArray<TRegistered>>
580
587
  return this.storage;
581
588
  };
582
589
  $Register.prototype.register = function (registered) {
@@ -628,7 +635,7 @@ var websiteScraperMetadata = $deepFreeze({
628
635
  mimeTypes: ['text/html'],
629
636
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
630
637
  isAvilableInBrowser: false,
631
- requiredExecutables: ['!!!!!!'],
638
+ requiredExecutables: [],
632
639
  }); /* <- TODO: [🤛] */
633
640
  /**
634
641
  * Registration of known scraper metadata
@@ -1106,12 +1113,12 @@ function TODO_USE() {
1106
1113
  */
1107
1114
  function getScraperIntermediateSource(source, options) {
1108
1115
  return __awaiter(this, void 0, void 0, function () {
1109
- var sourceFilename, url, rootDirname, cacheDirname, isCacheCleaned, extension, isVerbose, hash, semanticName, pieces, name, cacheFilename, isDestroyed, fileHandler;
1116
+ var sourceFilename, url, rootDirname, cacheDirname, intermediateFilesStrategy, extension, isVerbose, hash, semanticName, pieces, name, cacheFilename, isDestroyed, fileHandler;
1110
1117
  return __generator(this, function (_a) {
1111
1118
  switch (_a.label) {
1112
1119
  case 0:
1113
1120
  sourceFilename = source.filename, url = source.url;
1114
- rootDirname = options.rootDirname, cacheDirname = options.cacheDirname, isCacheCleaned = options.isCacheCleaned, extension = options.extension, isVerbose = options.isVerbose;
1121
+ rootDirname = options.rootDirname, cacheDirname = options.cacheDirname, intermediateFilesStrategy = options.intermediateFilesStrategy, extension = options.extension, isVerbose = options.isVerbose;
1115
1122
  hash = SHA256(
1116
1123
  // <- TODO: [🥬] Encapsulate sha256 to some private utility function
1117
1124
  hexEncoder.parse(sourceFilename || url || 'untitled'))
@@ -1121,7 +1128,7 @@ function getScraperIntermediateSource(source, options) {
1121
1128
  pieces = ['intermediate', semanticName, hash].filter(function (piece) { return piece !== ''; });
1122
1129
  name = pieces.join('-').split('--').join('-');
1123
1130
  // <- TODO: Use MAX_FILENAME_LENGTH
1124
- TODO_USE(rootDirname); // <- TODO: !!!!!!
1131
+ TODO_USE(rootDirname); // <- TODO: [😡]
1125
1132
  cacheFilename = join.apply(void 0, __spreadArray(__spreadArray([process.cwd(),
1126
1133
  cacheDirname], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), [name], false)).split('\\')
1127
1134
  .join('/') +
@@ -1141,7 +1148,7 @@ function getScraperIntermediateSource(source, options) {
1141
1148
  return __generator(this, function (_a) {
1142
1149
  switch (_a.label) {
1143
1150
  case 0:
1144
- if (!isCacheCleaned) return [3 /*break*/, 2];
1151
+ if (!(intermediateFilesStrategy === 'HIDE_AND_CLEAN')) return [3 /*break*/, 2];
1145
1152
  if (isVerbose) {
1146
1153
  console.info('legacyDocumentScraper: Clening cache');
1147
1154
  }
@@ -1165,7 +1172,7 @@ function getScraperIntermediateSource(source, options) {
1165
1172
  /**
1166
1173
  * Note: Not using `FileCacheStorage` for two reasons:
1167
1174
  * 1) Need to store more than serialized JSONs
1168
- * 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO: !!!!
1175
+ * 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO: [😡]
1169
1176
  * TODO: [🐱‍🐉][🧠] Make some smart crop
1170
1177
  * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
1171
1178
  */
@@ -1230,7 +1237,6 @@ function pipelineJsonToString(pipelineJson) {
1230
1237
  pipelineString += '\n\n';
1231
1238
  pipelineString += description;
1232
1239
  }
1233
- // TODO:> const commands: Array<Command>
1234
1240
  var commands = [];
1235
1241
  if (pipelineUrl) {
1236
1242
  commands.push("PIPELINE URL ".concat(pipelineUrl));
@@ -1286,7 +1292,6 @@ function pipelineJsonToString(pipelineJson) {
1286
1292
  pipelineString += '\n\n';
1287
1293
  pipelineString += description_1;
1288
1294
  }
1289
- // TODO:> const commands: Array<Command>
1290
1295
  var commands_1 = [];
1291
1296
  var contentLanguage = 'text';
1292
1297
  if (templateType === 'PROMPT_TEMPLATE') {
@@ -2187,6 +2192,7 @@ function assertsExecutionSuccessful(executionResult) {
2187
2192
  }
2188
2193
  }
2189
2194
  /**
2195
+ * TODO: [🐚] This function should be removed OR changed OR be completely rewritten
2190
2196
  * TODO: [🧠] Can this return type be better typed than void
2191
2197
  */
2192
2198
 
@@ -2372,8 +2378,7 @@ $deepFreeze({
2372
2378
  * Multiple LLM Execution Tools is a proxy server that uses multiple execution tools internally and exposes the executor interface externally.
2373
2379
  *
2374
2380
  * Note: Internal utility of `joinLlmExecutionTools` but exposed type
2375
- * @public exported from `@promptbook/types`
2376
- * TODO: !!!!!! Export as runtime class not just type
2381
+ * @public exported from `@promptbook/core`
2377
2382
  */
2378
2383
  var MultipleLlmExecutionTools = /** @class */ (function () {
2379
2384
  /**
@@ -2875,7 +2880,7 @@ function preparePersona(personaDescription, tools, options) {
2875
2880
  return __generator(this, function (_d) {
2876
2881
  switch (_d.label) {
2877
2882
  case 0:
2878
- _a = options.isVerbose, isVerbose = _a === void 0 ? IS_VERBOSE : _a;
2883
+ _a = options.isVerbose, isVerbose = _a === void 0 ? DEFAULT_IS_VERBOSE : _a;
2879
2884
  if (tools === undefined || tools.llm === undefined) {
2880
2885
  throw new MissingToolsError('LLM tools are required for preparing persona');
2881
2886
  }
@@ -2942,7 +2947,7 @@ var $scrapersRegister = new $Register('scraper_constructors');
2942
2947
  * TODO: [®] DRY Register logic
2943
2948
  */
2944
2949
 
2945
- // TODO: !!!!!! Maybe delete this function
2950
+ // TODO: !!!!!!last - Maybe delete this function
2946
2951
  /**
2947
2952
  * Creates a message with all registered scrapers
2948
2953
  *
@@ -3050,7 +3055,6 @@ function $registeredScrapersMessage() {
3050
3055
  * @private within the repository
3051
3056
  */
3052
3057
  function sourceContentToName(sourceContent) {
3053
- // TODO: !!!!!! Better name for source than gibberish hash
3054
3058
  var hash = SHA256(hexEncoder.parse(JSON.stringify(sourceContent)))
3055
3059
  // <- TODO: [🥬] Encapsulate sha256 to some private utility function
3056
3060
  .toString( /* hex */)
@@ -3137,7 +3141,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3137
3141
  case 0:
3138
3142
  sourceContent = knowledgeSource.sourceContent;
3139
3143
  name = knowledgeSource.name;
3140
- _b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ? IS_VERBOSE : _d;
3144
+ _b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ? DEFAULT_IS_VERBOSE : _d;
3141
3145
  TODO_USE(isVerbose);
3142
3146
  if (!name) {
3143
3147
  name = sourceContentToName(sourceContent);
@@ -3227,7 +3231,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3227
3231
  content = _a.sent();
3228
3232
  return [2 /*return*/, new Blob([
3229
3233
  content,
3230
- // <- TODO: !!!!!! Maybe not working
3234
+ // <- TODO: !!!!!! Test that this is working
3231
3235
  ], { type: mimeType_1 })];
3232
3236
  }
3233
3237
  });
@@ -3290,7 +3294,7 @@ function prepareKnowledgePieces(knowledgeSources, tools, options) {
3290
3294
  return __generator(this, function (_c) {
3291
3295
  switch (_c.label) {
3292
3296
  case 0:
3293
- _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, rootDirname = options.rootDirname, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
3297
+ _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, rootDirname = options.rootDirname, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
3294
3298
  knowledgePreparedUnflatten = new Array(knowledgeSources.length);
3295
3299
  return [4 /*yield*/, forEachAsync(knowledgeSources, { maxParallelCount: maxParallelCount }, function (knowledgeSource, index) { return __awaiter(_this, void 0, void 0, function () {
3296
3300
  var partialPieces, sourceHandler, _a, _b, scraper, partialPiecesUnchecked, e_1_1, pieces;
@@ -3319,7 +3323,8 @@ function prepareKnowledgePieces(knowledgeSources, tools, options) {
3319
3323
  case 4:
3320
3324
  partialPiecesUnchecked = _d.sent();
3321
3325
  if (partialPiecesUnchecked !== null) {
3322
- partialPieces = partialPiecesUnchecked;
3326
+ partialPieces = __spreadArray([], __read(partialPiecesUnchecked), false);
3327
+ // <- TODO: [🪓] Here should be no need for spreading new array, just `partialPieces = partialPiecesUnchecked`
3323
3328
  return [3 /*break*/, 6];
3324
3329
  }
3325
3330
  _d.label = 5;
@@ -3367,7 +3372,7 @@ TODO: [🧊] This is how it can look in future
3367
3372
  > /**
3368
3373
  > * Unprepared knowledge
3369
3374
  > * /
3370
- > readonly knowledgeSources: Array<KnowledgeSourceJson>;
3375
+ > readonly knowledgeSources: ReadonlyArray<KnowledgeSourceJson>;
3371
3376
  > };
3372
3377
  >
3373
3378
  > export async function prepareKnowledgePieces(
@@ -3425,7 +3430,7 @@ function prepareTemplates(pipeline, tools, options) {
3425
3430
  return __generator(this, function (_b) {
3426
3431
  switch (_b.label) {
3427
3432
  case 0:
3428
- _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a;
3433
+ _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a;
3429
3434
  templates = pipeline.templates, parameters = pipeline.parameters, knowledgePiecesCount = pipeline.knowledgePiecesCount;
3430
3435
  // TODO: [main] !!!!! Apply samples to each template (if missing and is for the template defined)
3431
3436
  TODO_USE(parameters);
@@ -3487,7 +3492,7 @@ function preparePipeline(pipeline, tools, options) {
3487
3492
  if (isPipelinePrepared(pipeline)) {
3488
3493
  return [2 /*return*/, pipeline];
3489
3494
  }
3490
- rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
3495
+ rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
3491
3496
  parameters = pipeline.parameters, templates = pipeline.templates, knowledgeSources = pipeline.knowledgeSources, personas = pipeline.personas;
3492
3497
  if (tools === undefined || tools.llm === undefined) {
3493
3498
  throw new MissingToolsError('LLM tools are required for preparing the pipeline');
@@ -3545,7 +3550,9 @@ function preparePipeline(pipeline, tools, options) {
3545
3550
  // ----- /Templates preparation -----
3546
3551
  // Note: Count total usage
3547
3552
  currentPreparation.usage = llmToolsWithUsage.getTotalUsage();
3548
- return [2 /*return*/, $asDeeplyFrozenSerializableJson('Prepared PipelineJson', __assign(__assign({}, clonePipeline(pipeline)), { templates: templatesPrepared, knowledgeSources: knowledgeSourcesPrepared, knowledgePieces: knowledgePiecesPrepared, personas: preparedPersonas, preparations: preparations }))];
3553
+ return [2 /*return*/, $asDeeplyFrozenSerializableJson('Prepared PipelineJson', __assign(__assign({}, clonePipeline(pipeline)), { templates: __spreadArray([], __read(templatesPrepared), false),
3554
+ // <- TODO: [🪓] Here should be no need for spreading new array, just ` templates: templatesPrepared`
3555
+ knowledgeSources: knowledgeSourcesPrepared, knowledgePieces: knowledgePiecesPrepared, personas: preparedPersonas, preparations: __spreadArray([], __read(preparations), false) }))];
3549
3556
  }
3550
3557
  });
3551
3558
  });
@@ -4516,12 +4523,11 @@ function checkExpectations(expectations, value) {
4516
4523
  */
4517
4524
  function executeAttempts(options) {
4518
4525
  return __awaiter(this, void 0, void 0, function () {
4519
- var jokerParameterNames, priority, maxAttempts, preparedContent, parameters, template, preparedPipeline, tools, settings, $executionReport, pipelineIdentification, maxExecutionAttempts, $ongoingTemplateResult, _llms, llmTools, _loop_1, attempt, state_1;
4526
+ var jokerParameterNames, priority, maxAttempts, preparedContent, parameters, template, preparedPipeline, tools, $executionReport, pipelineIdentification, maxExecutionAttempts, $ongoingTemplateResult, _llms, llmTools, _loop_1, attempt, state_1;
4520
4527
  return __generator(this, function (_a) {
4521
4528
  switch (_a.label) {
4522
4529
  case 0:
4523
- jokerParameterNames = options.jokerParameterNames, priority = options.priority, maxAttempts = options.maxAttempts, preparedContent = options.preparedContent, parameters = options.parameters, template = options.template, preparedPipeline = options.preparedPipeline, tools = options.tools, settings = options.settings, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification;
4524
- maxExecutionAttempts = settings.maxExecutionAttempts;
4530
+ jokerParameterNames = options.jokerParameterNames, priority = options.priority, maxAttempts = options.maxAttempts, preparedContent = options.preparedContent, parameters = options.parameters, template = options.template, preparedPipeline = options.preparedPipeline, tools = options.tools, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification, maxExecutionAttempts = options.maxExecutionAttempts;
4525
4531
  $ongoingTemplateResult = {
4526
4532
  $result: null,
4527
4533
  $resultString: null,
@@ -4887,12 +4893,12 @@ function executeAttempts(options) {
4887
4893
  */
4888
4894
  function executeFormatSubvalues(options) {
4889
4895
  return __awaiter(this, void 0, void 0, function () {
4890
- var template, jokerParameterNames, parameters, priority, pipelineIdentification, settings, parameterValue, formatDefinition, subvalueDefinition, formatSettings, resultString;
4896
+ var template, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification, parameterValue, formatDefinition, subvalueDefinition, formatSettings, resultString;
4891
4897
  var _this = this;
4892
4898
  return __generator(this, function (_a) {
4893
4899
  switch (_a.label) {
4894
4900
  case 0:
4895
- template = options.template, jokerParameterNames = options.jokerParameterNames, parameters = options.parameters, priority = options.priority, pipelineIdentification = options.pipelineIdentification, settings = options.settings;
4901
+ template = options.template, jokerParameterNames = options.jokerParameterNames, parameters = options.parameters, priority = options.priority, csvSettings = options.csvSettings, pipelineIdentification = options.pipelineIdentification;
4896
4902
  if (template.foreach === undefined) {
4897
4903
  return [2 /*return*/, /* not await */ executeAttempts(options)];
4898
4904
  }
@@ -4922,7 +4928,7 @@ function executeFormatSubvalues(options) {
4922
4928
  .join('\n')), "\n\n [\u26F7] This should never happen because subformat name should be validated during parsing\n\n ").concat(block(pipelineIdentification), "\n "); }));
4923
4929
  }
4924
4930
  if (formatDefinition.formatName === 'CSV') {
4925
- formatSettings = settings.csvSettings;
4931
+ formatSettings = csvSettings;
4926
4932
  // <- TODO: [🤹‍♂️] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
4927
4933
  }
4928
4934
  return [4 /*yield*/, subvalueDefinition.mapValues(parameterValue, template.foreach.outputSubparameterName, formatSettings, function (subparameters, index) { return __awaiter(_this, void 0, void 0, function () {
@@ -5075,13 +5081,12 @@ function getReservedParametersForTemplate(options) {
5075
5081
  */
5076
5082
  function executeTemplate(options) {
5077
5083
  return __awaiter(this, void 0, void 0, function () {
5078
- var currentTemplate, preparedPipeline, parametersToPass, tools, onProgress, settings, $executionReport, pipelineIdentification, maxExecutionAttempts, name, title, priority, usedParameterNames, dependentParameterNames, definedParameters, _a, _b, _c, definedParameterNames, parameters, _loop_1, _d, _e, parameterName, maxAttempts, jokerParameterNames, preparedContent, resultString;
5079
- var e_1, _f, _g;
5080
- return __generator(this, function (_h) {
5081
- switch (_h.label) {
5084
+ var currentTemplate, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, _a, maxExecutionAttempts, name, title, priority, usedParameterNames, dependentParameterNames, definedParameters, _b, _c, _d, definedParameterNames, parameters, _loop_1, _e, _f, parameterName, maxAttempts, jokerParameterNames, preparedContent, resultString;
5085
+ var e_1, _g, _h;
5086
+ return __generator(this, function (_j) {
5087
+ switch (_j.label) {
5082
5088
  case 0:
5083
- currentTemplate = options.currentTemplate, preparedPipeline = options.preparedPipeline, parametersToPass = options.parametersToPass, tools = options.tools, onProgress = options.onProgress, settings = options.settings, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification;
5084
- maxExecutionAttempts = settings.maxExecutionAttempts;
5089
+ currentTemplate = options.currentTemplate, preparedPipeline = options.preparedPipeline, parametersToPass = options.parametersToPass, tools = options.tools, onProgress = options.onProgress, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification, _a = options.maxExecutionAttempts, maxExecutionAttempts = _a === void 0 ? DEFAULT_MAX_EXECUTION_ATTEMPTS : _a;
5085
5090
  name = "pipeline-executor-frame-".concat(currentTemplate.name);
5086
5091
  title = currentTemplate.title;
5087
5092
  priority = preparedPipeline.templates.length - preparedPipeline.templates.indexOf(currentTemplate);
@@ -5096,7 +5101,7 @@ function executeTemplate(options) {
5096
5101
  // <- [🍸]
5097
5102
  })];
5098
5103
  case 1:
5099
- _h.sent();
5104
+ _j.sent();
5100
5105
  usedParameterNames = extractParameterNamesFromTemplate(currentTemplate);
5101
5106
  dependentParameterNames = new Set(currentTemplate.dependentParameterNames);
5102
5107
  // TODO: [👩🏾‍🤝‍👩🏻] Use here `mapAvailableToExpectedParameters`
@@ -5107,15 +5112,15 @@ function executeTemplate(options) {
5107
5112
  .map(function (name) { return "{".concat(name, "}"); })
5108
5113
  .join(', '), "\n\n ").concat(block(pipelineIdentification), "\n\n "); }));
5109
5114
  }
5110
- _b = (_a = Object).freeze;
5111
- _c = [{}];
5115
+ _c = (_b = Object).freeze;
5116
+ _d = [{}];
5112
5117
  return [4 /*yield*/, getReservedParametersForTemplate({
5113
5118
  preparedPipeline: preparedPipeline,
5114
5119
  template: currentTemplate,
5115
5120
  pipelineIdentification: pipelineIdentification,
5116
5121
  })];
5117
5122
  case 2:
5118
- definedParameters = _b.apply(_a, [__assign.apply(void 0, [__assign.apply(void 0, _c.concat([(_h.sent())])), parametersToPass])]);
5123
+ definedParameters = _c.apply(_b, [__assign.apply(void 0, [__assign.apply(void 0, _d.concat([(_j.sent())])), parametersToPass])]);
5119
5124
  definedParameterNames = new Set(Object.keys(definedParameters));
5120
5125
  parameters = {};
5121
5126
  _loop_1 = function (parameterName) {
@@ -5135,15 +5140,15 @@ function executeTemplate(options) {
5135
5140
  try {
5136
5141
  // Note: [2] Check that all used parameters are defined and removing unused parameters for this template
5137
5142
  // TODO: [👩🏾‍🤝‍👩🏻] Use here `mapAvailableToExpectedParameters`
5138
- for (_d = __values(Array.from(union(definedParameterNames, usedParameterNames, dependentParameterNames))), _e = _d.next(); !_e.done; _e = _d.next()) {
5139
- parameterName = _e.value;
5143
+ for (_e = __values(Array.from(union(definedParameterNames, usedParameterNames, dependentParameterNames))), _f = _e.next(); !_f.done; _f = _e.next()) {
5144
+ parameterName = _f.value;
5140
5145
  _loop_1(parameterName);
5141
5146
  }
5142
5147
  }
5143
5148
  catch (e_1_1) { e_1 = { error: e_1_1 }; }
5144
5149
  finally {
5145
5150
  try {
5146
- if (_e && !_e.done && (_f = _d.return)) _f.call(_d);
5151
+ if (_f && !_f.done && (_g = _e.return)) _g.call(_e);
5147
5152
  }
5148
5153
  finally { if (e_1) throw e_1.error; }
5149
5154
  }
@@ -5163,12 +5168,11 @@ function executeTemplate(options) {
5163
5168
  template: currentTemplate,
5164
5169
  preparedPipeline: preparedPipeline,
5165
5170
  tools: tools,
5166
- settings: settings,
5167
5171
  $executionReport: $executionReport,
5168
5172
  pipelineIdentification: pipelineIdentification,
5169
5173
  })];
5170
5174
  case 3:
5171
- resultString = _h.sent();
5175
+ resultString = _j.sent();
5172
5176
  return [4 /*yield*/, onProgress({
5173
5177
  name: name,
5174
5178
  title: title,
@@ -5180,12 +5184,12 @@ function executeTemplate(options) {
5180
5184
  // <- [🍸]
5181
5185
  })];
5182
5186
  case 4:
5183
- _h.sent();
5184
- return [2 /*return*/, Object.freeze((_g = {},
5185
- _g[currentTemplate.resultingParameterName] =
5187
+ _j.sent();
5188
+ return [2 /*return*/, Object.freeze((_h = {},
5189
+ _h[currentTemplate.resultingParameterName] =
5186
5190
  // <- Note: [👩‍👩‍👧] No need to detect parameter collision here because pipeline checks logic consistency during construction
5187
5191
  resultString,
5188
- _g))];
5192
+ _h))];
5189
5193
  }
5190
5194
  });
5191
5195
  });
@@ -5244,13 +5248,12 @@ function filterJustOutputParameters(options) {
5244
5248
  */
5245
5249
  function executePipeline(options) {
5246
5250
  return __awaiter(this, void 0, void 0, function () {
5247
- var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification, settings, maxParallelCount, rootDirname, _a, isVerbose, preparedPipeline, errors, warnings, executionReport, isReturned, _b, _c, parameter, e_1_1, _loop_1, _d, _e, parameterName, state_1, e_2_1, parametersToPass, resovedParameterNames_1, unresovedTemplates_1, resolving_1, loopLimit, _loop_2, error_1, usage_1, outputParameters_1, usage, outputParameters;
5251
+ var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification, maxParallelCount, rootDirname, _a, isVerbose, preparedPipeline, errors, warnings, executionReport, isReturned, _b, _c, parameter, e_1_1, _loop_1, _d, _e, parameterName, state_1, e_2_1, parametersToPass, resovedParameterNames_1, unresovedTemplates_1, resolving_1, loopLimit, _loop_2, error_1, usage_1, outputParameters_1, usage, outputParameters;
5248
5252
  var e_1, _f, e_2, _g;
5249
5253
  return __generator(this, function (_h) {
5250
5254
  switch (_h.label) {
5251
5255
  case 0:
5252
- inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification, settings = options.settings;
5253
- maxParallelCount = settings.maxParallelCount, rootDirname = settings.rootDirname, _a = settings.isVerbose, isVerbose = _a === void 0 ? IS_VERBOSE : _a;
5256
+ inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification, maxParallelCount = options.maxParallelCount, rootDirname = options.rootDirname, _a = options.isVerbose, isVerbose = _a === void 0 ? DEFAULT_IS_VERBOSE : _a;
5254
5257
  preparedPipeline = options.preparedPipeline;
5255
5258
  if (!(preparedPipeline === undefined)) return [3 /*break*/, 2];
5256
5259
  return [4 /*yield*/, preparePipeline(pipeline, tools, {
@@ -5435,12 +5438,7 @@ function executePipeline(options) {
5435
5438
  return [3 /*break*/, 4];
5436
5439
  case 3:
5437
5440
  unresovedTemplates_1 = unresovedTemplates_1.filter(function (template) { return template !== currentTemplate; });
5438
- work_1 = executeTemplate({
5439
- currentTemplate: currentTemplate,
5440
- preparedPipeline: preparedPipeline,
5441
- parametersToPass: parametersToPass,
5442
- tools: tools,
5443
- onProgress: function (progress) {
5441
+ work_1 = executeTemplate(__assign(__assign({}, options), { currentTemplate: currentTemplate, preparedPipeline: preparedPipeline, parametersToPass: parametersToPass, tools: tools, onProgress: function (progress) {
5444
5442
  if (isReturned) {
5445
5443
  throw new UnexpectedError(spaceTrim(function (block) { return "\n Can not call `onProgress` after pipeline execution is finished\n\n ".concat(block(pipelineIdentification), "\n\n ").concat(block(JSON.stringify(progress, null, 4)
5446
5444
  .split('\n')
@@ -5450,11 +5448,7 @@ function executePipeline(options) {
5450
5448
  if (onProgress) {
5451
5449
  onProgress(progress);
5452
5450
  }
5453
- },
5454
- settings: settings,
5455
- $executionReport: executionReport,
5456
- pipelineIdentification: spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n Template name: ").concat(currentTemplate.name, "\n Template title: ").concat(currentTemplate.title, "\n "); }),
5457
- })
5451
+ }, $executionReport: executionReport, pipelineIdentification: spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n Template name: ").concat(currentTemplate.name, "\n Template title: ").concat(currentTemplate.title, "\n "); }) }))
5458
5452
  .then(function (newParametersToPass) {
5459
5453
  parametersToPass = __assign(__assign({}, newParametersToPass), parametersToPass);
5460
5454
  resovedParameterNames_1 = __spreadArray(__spreadArray([], __read(resovedParameterNames_1), false), [currentTemplate.resultingParameterName], false);
@@ -5558,8 +5552,7 @@ function executePipeline(options) {
5558
5552
  */
5559
5553
  function createPipelineExecutor(options) {
5560
5554
  var _this = this;
5561
- var pipeline = options.pipeline, tools = options.tools, _a = options.settings, settings = _a === void 0 ? {} : _a;
5562
- var _b = settings.maxExecutionAttempts, maxExecutionAttempts = _b === void 0 ? MAX_EXECUTION_ATTEMPTS : _b, _c = settings.maxParallelCount, maxParallelCount = _c === void 0 ? MAX_PARALLEL_COUNT : _c, _d = settings.csvSettings, csvSettings = _d === void 0 ? DEFAULT_CSV_SETTINGS : _d, _e = settings.isVerbose, isVerbose = _e === void 0 ? IS_VERBOSE : _e, _f = settings.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _f === void 0 ? false : _f, _g = settings.rootDirname, rootDirname = _g === void 0 ? null : _g;
5555
+ var pipeline = options.pipeline, tools = options.tools, _a = options.maxExecutionAttempts, maxExecutionAttempts = _a === void 0 ? DEFAULT_MAX_EXECUTION_ATTEMPTS : _a, _b = options.maxParallelCount, maxParallelCount = _b === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _b, _c = options.csvSettings, csvSettings = _c === void 0 ? DEFAULT_CSV_SETTINGS : _c, _d = options.isVerbose, isVerbose = _d === void 0 ? DEFAULT_IS_VERBOSE : _d, _e = options.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _e === void 0 ? false : _e, _f = options.rootDirname, rootDirname = _f === void 0 ? null : _f;
5563
5556
  validatePipeline(pipeline);
5564
5557
  var pipelineIdentification = (function () {
5565
5558
  // Note: This is a 😐 implementation of [🚞]
@@ -5593,14 +5586,12 @@ function createPipelineExecutor(options) {
5593
5586
  tools: tools,
5594
5587
  onProgress: onProgress,
5595
5588
  pipelineIdentification: spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n ").concat(runCount === 1 ? '' : "Run #".concat(runCount), "\n "); }),
5596
- settings: {
5597
- maxExecutionAttempts: maxExecutionAttempts,
5598
- maxParallelCount: maxParallelCount,
5599
- csvSettings: csvSettings,
5600
- isVerbose: isVerbose,
5601
- isNotPreparedWarningSupressed: isNotPreparedWarningSupressed,
5602
- rootDirname: rootDirname,
5603
- },
5589
+ maxExecutionAttempts: maxExecutionAttempts,
5590
+ maxParallelCount: maxParallelCount,
5591
+ csvSettings: csvSettings,
5592
+ isVerbose: isVerbose,
5593
+ isNotPreparedWarningSupressed: isNotPreparedWarningSupressed,
5594
+ rootDirname: rootDirname,
5604
5595
  })];
5605
5596
  });
5606
5597
  }); };
@@ -5622,7 +5613,7 @@ var markdownScraperMetadata = $deepFreeze({
5622
5613
  mimeTypes: ['text/markdown', 'text/plain'],
5623
5614
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5624
5615
  isAvilableInBrowser: true,
5625
- requiredExecutables: ['!!!!!!'],
5616
+ requiredExecutables: [],
5626
5617
  }); /* <- TODO: [🤛] */
5627
5618
  /**
5628
5619
  * Registration of known scraper metadata
@@ -5666,7 +5657,7 @@ var MarkdownScraper = /** @class */ (function () {
5666
5657
  return __generator(this, function (_k) {
5667
5658
  switch (_k.label) {
5668
5659
  case 0:
5669
- _a = this.options, _b = _a.maxParallelCount, maxParallelCount = _b === void 0 ? MAX_PARALLEL_COUNT : _b, _c = _a.isVerbose, isVerbose = _c === void 0 ? IS_VERBOSE : _c;
5660
+ _a = this.options, _b = _a.maxParallelCount, maxParallelCount = _b === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _b, _c = _a.isVerbose, isVerbose = _c === void 0 ? DEFAULT_IS_VERBOSE : _c;
5670
5661
  llm = this.tools.llm;
5671
5662
  if (llm === undefined) {
5672
5663
  throw new MissingToolsError('LLM tools are required for scraping external files');
@@ -5765,7 +5756,8 @@ var MarkdownScraper = /** @class */ (function () {
5765
5756
  embeddingResult = _c.sent();
5766
5757
  index.push({
5767
5758
  modelName: embeddingResult.modelName,
5768
- position: embeddingResult.content,
5759
+ position: __spreadArray([], __read(embeddingResult.content), false),
5760
+ // <- TODO: [🪓] Here should be no need for spreading new array, just `position: embeddingResult.content`
5769
5761
  });
5770
5762
  _c.label = 6;
5771
5763
  case 6: return [3 /*break*/, 8];
@@ -5805,32 +5797,29 @@ var MarkdownScraper = /** @class */ (function () {
5805
5797
  */
5806
5798
 
5807
5799
  /**
5808
- * A converter instance that uses showdown and highlight extensions
5800
+ * Create a new showdown converter instance
5809
5801
  *
5810
- * @type {Converter}
5811
- * @private for markdown and html knowledge scrapers
5812
- */
5813
- var markdownConverter = new Converter({
5814
- flavor: 'github', // <- TODO: !!!!!! Explicitly specify the flavor of promptbook markdown
5815
- /*
5816
- > import showdownHighlight from 'showdown-highlight';
5817
- > extensions: [
5818
- > showdownHighlight({
5819
- > // Whether to add the classes to the <pre> tag, default is false
5820
- > pre: true,
5821
- > // Whether to use hljs' auto language detection, default is true
5822
- > auto_detection: true,
5823
- > }),
5824
- > ],
5825
- */
5826
- });
5827
- /**
5828
- * TODO: !!!!!! Figure out better name not to confuse with `Converter`
5829
- * TODO: !!!!!! Lazy-make converter
5802
+ * @private utility of `WebsiteScraper`
5830
5803
  */
5804
+ function createShowdownConverter() {
5805
+ return new Converter({
5806
+ flavor: 'github',
5807
+ /*
5808
+ > import showdownHighlight from 'showdown-highlight';
5809
+ > extensions: [
5810
+ > showdownHighlight({
5811
+ > // Whether to add the classes to the <pre> tag, default is false
5812
+ > pre: true,
5813
+ > // Whether to use hljs' auto language detection, default is true
5814
+ > auto_detection: true,
5815
+ > }),
5816
+ > ],
5817
+ */
5818
+ });
5819
+ }
5831
5820
 
5832
5821
  /**
5833
- * Scraper for .docx files
5822
+ * Scraper for websites
5834
5823
  *
5835
5824
  * @see `documentationUrl` for more details
5836
5825
  * @public exported from `@promptbook/website-crawler`
@@ -5840,6 +5829,7 @@ var WebsiteScraper = /** @class */ (function () {
5840
5829
  this.tools = tools;
5841
5830
  this.options = options;
5842
5831
  this.markdownScraper = new MarkdownScraper(tools, options);
5832
+ this.showdownConverter = createShowdownConverter();
5843
5833
  }
5844
5834
  Object.defineProperty(WebsiteScraper.prototype, "metadata", {
5845
5835
  /**
@@ -5860,12 +5850,11 @@ var WebsiteScraper = /** @class */ (function () {
5860
5850
  return __awaiter(this, void 0, void 0, function () {
5861
5851
  var _a, _b,
5862
5852
  // TODO: [🧠] Maybe in node use headless browser not just JSDOM
5863
- // externalProgramsPaths = {},
5864
- rootDirname, _c, cacheDirname, _d, isCacheCleaned, _e, isVerbose, jsdom, _f, reader, article, html, i, cacheFilehandler, markdown;
5853
+ rootDirname, _c, cacheDirname, _d, intermediateFilesStrategy, _e, isVerbose, jsdom, _f, reader, article, html, i, cacheFilehandler, markdown;
5865
5854
  return __generator(this, function (_g) {
5866
5855
  switch (_g.label) {
5867
5856
  case 0:
5868
- _a = this.options, _b = _a.rootDirname, rootDirname = _b === void 0 ? process.cwd() : _b, _c = _a.cacheDirname, cacheDirname = _c === void 0 ? SCRAPE_CACHE_DIRNAME : _c, _d = _a.isCacheCleaned, isCacheCleaned = _d === void 0 ? false : _d, _e = _a.isVerbose, isVerbose = _e === void 0 ? IS_VERBOSE : _e;
5857
+ _a = this.options, _b = _a.rootDirname, rootDirname = _b === void 0 ? process.cwd() : _b, _c = _a.cacheDirname, cacheDirname = _c === void 0 ? DEFAULT_SCRAPE_CACHE_DIRNAME : _c, _d = _a.intermediateFilesStrategy, intermediateFilesStrategy = _d === void 0 ? DEFAULT_INTERMEDIATE_FILES_STRATEGY : _d, _e = _a.isVerbose, isVerbose = _e === void 0 ? DEFAULT_IS_VERBOSE : _e;
5869
5858
  // TODO: !!!!!! Does this work in browser? Make it work.
5870
5859
  if (source.url === null) {
5871
5860
  throw new KnowledgeScrapeError('Website scraper requires URL');
@@ -5878,10 +5867,6 @@ var WebsiteScraper = /** @class */ (function () {
5878
5867
  }]))();
5879
5868
  reader = new Readability(jsdom.window.document);
5880
5869
  article = reader.parse();
5881
- console.log(article);
5882
- return [4 /*yield*/, forTime(10000)];
5883
- case 2:
5884
- _g.sent();
5885
5870
  html = (article === null || article === void 0 ? void 0 : article.content) || (article === null || article === void 0 ? void 0 : article.textContent) || jsdom.window.document.body.innerHTML;
5886
5871
  // Note: Unwrap html such as it is convertable by `markdownConverter`
5887
5872
  for (i = 0; i < 2; i++) {
@@ -5893,16 +5878,16 @@ var WebsiteScraper = /** @class */ (function () {
5893
5878
  return [4 /*yield*/, getScraperIntermediateSource(source, {
5894
5879
  rootDirname: rootDirname,
5895
5880
  cacheDirname: cacheDirname,
5896
- isCacheCleaned: isCacheCleaned,
5881
+ intermediateFilesStrategy: intermediateFilesStrategy,
5897
5882
  extension: 'html',
5898
5883
  isVerbose: isVerbose,
5899
5884
  })];
5900
- case 3:
5885
+ case 2:
5901
5886
  cacheFilehandler = _g.sent();
5902
5887
  return [4 /*yield*/, writeFile(cacheFilehandler.filename, html, 'utf-8')];
5903
- case 4:
5888
+ case 3:
5904
5889
  _g.sent();
5905
- markdown = markdownConverter.makeMarkdown(html, jsdom.window.document);
5890
+ markdown = this.showdownConverter.makeMarkdown(html, jsdom.window.document);
5906
5891
  return [2 /*return*/, __assign(__assign({}, cacheFilehandler), { markdown: markdown })];
5907
5892
  }
5908
5893
  });
@@ -5946,7 +5931,6 @@ var WebsiteScraper = /** @class */ (function () {
5946
5931
  return WebsiteScraper;
5947
5932
  }());
5948
5933
  /**
5949
- * TODO: !!!!!! Put into separate package
5950
5934
  * TODO: [👣] Scraped website in .md can act as cache item - there is no need to run conversion each time
5951
5935
  * TODO: [🪂] Do it in parallel 11:11
5952
5936
  * Note: No need to aggregate usage here, it is done by intercepting the llmTools