@promptbook/legacy-documents 0.72.0-13 → 0.72.0-15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/README.md +1 -1
  2. package/esm/index.es.js +89 -95
  3. package/esm/index.es.js.map +1 -1
  4. package/esm/typings/src/_packages/core.index.d.ts +22 -18
  5. package/esm/typings/src/_packages/node.index.d.ts +2 -0
  6. package/esm/typings/src/_packages/types.index.d.ts +10 -10
  7. package/esm/typings/src/_packages/utils.index.d.ts +2 -2
  8. package/esm/typings/src/collection/PipelineCollection.d.ts +1 -1
  9. package/esm/typings/src/collection/SimplePipelineCollection.d.ts +2 -2
  10. package/esm/typings/src/collection/collectionToJson.d.ts +1 -1
  11. package/esm/typings/src/collection/constructors/createCollectionFromJson.d.ts +1 -1
  12. package/esm/typings/src/collection/constructors/createCollectionFromPromise.d.ts +1 -1
  13. package/esm/typings/src/commands/_common/types/CommandParser.d.ts +5 -5
  14. package/esm/typings/src/config.d.ts +21 -14
  15. package/esm/typings/src/execution/EmbeddingVector.d.ts +1 -1
  16. package/esm/typings/src/execution/Executables.d.ts +18 -0
  17. package/esm/typings/src/execution/ExecutionTools.d.ts +9 -3
  18. package/esm/typings/src/execution/LlmExecutionTools.d.ts +1 -1
  19. package/esm/typings/src/execution/PipelineExecutorResult.d.ts +2 -2
  20. package/esm/typings/src/execution/assertsExecutionSuccessful.d.ts +1 -0
  21. package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorOptions.d.ts +29 -6
  22. package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +2 -11
  23. package/esm/typings/src/execution/createPipelineExecutor/20-executeTemplate.d.ts +4 -13
  24. package/esm/typings/src/execution/createPipelineExecutor/40-executeAttempts.d.ts +9 -14
  25. package/esm/typings/src/execution/translation/automatic-translate/automatic-translators/LindatAutomaticTranslator.d.ts +11 -3
  26. package/esm/typings/src/execution/utils/addUsage.d.ts +1 -1
  27. package/esm/typings/src/execution/utils/forEachAsync.d.ts +1 -1
  28. package/esm/typings/src/formats/_common/FormatDefinition.d.ts +2 -2
  29. package/esm/typings/src/formats/_common/FormatSubvalueDefinition.d.ts +2 -2
  30. package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsForCli.d.ts +2 -2
  31. package/esm/typings/src/llm-providers/_common/register/LlmToolsConfiguration.d.ts +1 -1
  32. package/esm/typings/src/llm-providers/_common/register/createLlmToolsFromConfiguration.d.ts +7 -0
  33. package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +4 -1
  34. package/esm/typings/src/llm-providers/anthropic-claude/AnthropicClaudeExecutionTools.d.ts +1 -1
  35. package/esm/typings/src/llm-providers/anthropic-claude/anthropic-claude-models.d.ts +1 -1
  36. package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +1 -1
  37. package/esm/typings/src/llm-providers/mocked/$fakeTextToExpectations.d.ts +1 -1
  38. package/esm/typings/src/llm-providers/mocked/MockedEchoLlmExecutionTools.d.ts +1 -1
  39. package/esm/typings/src/llm-providers/mocked/MockedFackedLlmExecutionTools.d.ts +1 -1
  40. package/esm/typings/src/llm-providers/multiple/MultipleLlmExecutionTools.d.ts +4 -5
  41. package/esm/typings/src/llm-providers/multiple/joinLlmExecutionTools.d.ts +1 -1
  42. package/esm/typings/src/llm-providers/openai/OpenAiExecutionTools.d.ts +1 -1
  43. package/esm/typings/src/llm-providers/openai/openai-models.d.ts +1 -1
  44. package/esm/typings/src/llm-providers/remote/RemoteLlmExecutionTools.d.ts +1 -1
  45. package/esm/typings/src/llm-providers/remote/interfaces/PromptbookServer_ListModels_Response.d.ts +3 -3
  46. package/esm/typings/src/llm-providers/remote/interfaces/PromptbookServer_Prompt_Response.d.ts +2 -2
  47. package/esm/typings/src/prepare/PrepareAndScrapeOptions.d.ts +4 -23
  48. package/esm/typings/src/prepare/prepareTemplates.d.ts +1 -1
  49. package/esm/typings/src/scrapers/_common/Scraper.d.ts +1 -5
  50. package/esm/typings/src/scrapers/_common/prepareKnowledgePieces.d.ts +1 -1
  51. package/esm/typings/src/scrapers/_common/register/$provideExecutablesForNode.d.ts +12 -0
  52. package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +2 -2
  53. package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +2 -2
  54. package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +2 -2
  55. package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +2 -2
  56. package/esm/typings/src/scrapers/document/DocumentScraper.d.ts +2 -2
  57. package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
  58. package/esm/typings/src/scrapers/document/register-metadata.d.ts +1 -1
  59. package/esm/typings/src/scrapers/document-legacy/LegacyDocumentScraper.d.ts +3 -3
  60. package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +1 -1
  61. package/esm/typings/src/scrapers/document-legacy/register-metadata.d.ts +1 -1
  62. package/esm/typings/src/scrapers/markdown/MarkdownScraper.d.ts +1 -1
  63. package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +1 -1
  64. package/esm/typings/src/scrapers/markdown/register-metadata.d.ts +1 -1
  65. package/esm/typings/src/scrapers/pdf/PdfScraper.d.ts +2 -2
  66. package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +1 -1
  67. package/esm/typings/src/scrapers/pdf/register-metadata.d.ts +1 -1
  68. package/esm/typings/src/scrapers/website/WebsiteScraper.d.ts +6 -3
  69. package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +1 -1
  70. package/esm/typings/src/scrapers/website/register-metadata.d.ts +1 -1
  71. package/esm/typings/src/scrapers/website/utils/createShowdownConverter.d.ts +7 -0
  72. package/esm/typings/src/scripting/javascript/utils/preserve.d.ts +1 -1
  73. package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +1 -1
  74. package/esm/typings/src/types/Arrayable.d.ts +1 -1
  75. package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +7 -0
  76. package/esm/typings/src/types/PipelineJson/KnowledgePieceJson.d.ts +4 -4
  77. package/esm/typings/src/types/PipelineJson/KnowledgeSourceJson.d.ts +1 -1
  78. package/esm/typings/src/types/PipelineJson/PersonaJson.d.ts +1 -1
  79. package/esm/typings/src/types/PipelineJson/TemplateJsonCommon.d.ts +2 -2
  80. package/esm/typings/src/types/Prompt.d.ts +1 -1
  81. package/esm/typings/src/types/execution-report/ExecutionReportJson.d.ts +1 -1
  82. package/esm/typings/src/utils/$Register.d.ts +1 -1
  83. package/esm/typings/src/utils/FromtoItems.d.ts +1 -1
  84. package/esm/typings/src/utils/arrayableToArray.d.ts +1 -1
  85. package/esm/typings/src/utils/emojis.d.ts +1 -1
  86. package/esm/typings/src/utils/execCommand/$execCommand.d.ts +2 -2
  87. package/esm/typings/src/utils/execCommand/{IExecCommandOptions.d.ts → ExecCommandOptions.d.ts} +2 -6
  88. package/esm/typings/src/utils/execCommand/execCommandNormalizeOptions.d.ts +3 -3
  89. package/esm/typings/src/utils/expectation-counters/countSentences.d.ts +1 -1
  90. package/esm/typings/src/utils/markdown/extractAllBlocksFromMarkdown.d.ts +1 -1
  91. package/esm/typings/src/utils/markdown/splitMarkdownIntoSections.d.ts +1 -1
  92. package/esm/typings/src/utils/normalization/IKeywords.d.ts +2 -2
  93. package/esm/typings/src/utils/normalization/parseKeywords.d.ts +2 -2
  94. package/esm/typings/src/utils/normalization/parseKeywordsFromString.d.ts +2 -2
  95. package/esm/typings/src/utils/normalization/searchKeywords.d.ts +2 -2
  96. package/esm/typings/src/utils/organization/TODO_USE.d.ts +1 -1
  97. package/esm/typings/src/utils/organization/keepUnused.d.ts +1 -1
  98. package/esm/typings/src/utils/random/$randomSeed.d.ts +1 -1
  99. package/esm/typings/src/utils/sets/intersection.d.ts +1 -1
  100. package/esm/typings/src/utils/sets/union.d.ts +1 -1
  101. package/esm/typings/src/utils/unwrapResult.d.ts +4 -4
  102. package/package.json +4 -3
  103. package/umd/index.umd.js +89 -95
  104. package/umd/index.umd.js.map +1 -1
  105. package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorSettings.d.ts +0 -29
  106. package/esm/typings/src/scrapers/website/utils/markdownConverter.d.ts +0 -12
  107. /package/esm/typings/src/scrapers/website/utils/{markdownConverter.test.d.ts → createShowdownConverter.test.d.ts} +0 -0
package/umd/index.umd.js CHANGED
@@ -15,7 +15,7 @@
15
15
  /**
16
16
  * The version of the Promptbook library
17
17
  */
18
- var PROMPTBOOK_VERSION = '0.72.0-12';
18
+ var PROMPTBOOK_VERSION = '0.72.0-14';
19
19
  // TODO: [main] !!!! List here all the versions and annotate + put into script
20
20
 
21
21
  /*! *****************************************************************************
@@ -363,18 +363,25 @@
363
363
  * @private within the repository - too low-level in comparison with other `MAX_...`
364
364
  */
365
365
  var IMMEDIATE_TIME = 10;
366
+ /**
367
+ * Strategy for caching the intermediate results for knowledge sources
368
+ *
369
+ * @public exported from `@promptbook/core`
370
+ */
371
+ var DEFAULT_INTERMEDIATE_FILES_STRATEGY = 'HIDE_AND_KEEP';
372
+ // <- TODO: [😡] Change to 'VISIBLE'
366
373
  /**
367
374
  * The maximum number of (LLM) tasks running in parallel
368
375
  *
369
376
  * @public exported from `@promptbook/core`
370
377
  */
371
- var MAX_PARALLEL_COUNT = 5; // <- TODO: [🤹‍♂️]
378
+ var DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [🤹‍♂️]
372
379
  /**
373
380
  * The maximum number of attempts to execute LLM task before giving up
374
381
  *
375
382
  * @public exported from `@promptbook/core`
376
383
  */
377
- var MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹‍♂️]
384
+ var DEFAULT_MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹‍♂️]
378
385
  /**
379
386
  * Where to store the scrape cache
380
387
  *
@@ -382,7 +389,7 @@
382
389
  *
383
390
  * @public exported from `@promptbook/core`
384
391
  */
385
- var SCRAPE_CACHE_DIRNAME = '/.promptbook/scrape-cache';
392
+ var DEFAULT_SCRAPE_CACHE_DIRNAME = '/.promptbook/scrape-cache';
386
393
  /**
387
394
  * Nonce which is used for replacing things in strings
388
395
  *
@@ -434,7 +441,7 @@
434
441
  *
435
442
  * @public exported from `@promptbook/core`
436
443
  */
437
- var IS_VERBOSE = false;
444
+ var DEFAULT_IS_VERBOSE = false;
438
445
  /**
439
446
  * @@@
440
447
  *
@@ -1150,12 +1157,12 @@
1150
1157
  */
1151
1158
  function getScraperIntermediateSource(source, options) {
1152
1159
  return __awaiter(this, void 0, void 0, function () {
1153
- var sourceFilename, url, rootDirname, cacheDirname, isCacheCleaned, extension, isVerbose, hash, semanticName, pieces, name, cacheFilename, isDestroyed, fileHandler;
1160
+ var sourceFilename, url, rootDirname, cacheDirname, intermediateFilesStrategy, extension, isVerbose, hash, semanticName, pieces, name, cacheFilename, isDestroyed, fileHandler;
1154
1161
  return __generator(this, function (_a) {
1155
1162
  switch (_a.label) {
1156
1163
  case 0:
1157
1164
  sourceFilename = source.filename, url = source.url;
1158
- rootDirname = options.rootDirname, cacheDirname = options.cacheDirname, isCacheCleaned = options.isCacheCleaned, extension = options.extension, isVerbose = options.isVerbose;
1165
+ rootDirname = options.rootDirname, cacheDirname = options.cacheDirname, intermediateFilesStrategy = options.intermediateFilesStrategy, extension = options.extension, isVerbose = options.isVerbose;
1159
1166
  hash = cryptoJs.SHA256(
1160
1167
  // <- TODO: [🥬] Encapsulate sha256 to some private utility function
1161
1168
  hexEncoder__default["default"].parse(sourceFilename || url || 'untitled'))
@@ -1165,7 +1172,7 @@
1165
1172
  pieces = ['intermediate', semanticName, hash].filter(function (piece) { return piece !== ''; });
1166
1173
  name = pieces.join('-').split('--').join('-');
1167
1174
  // <- TODO: Use MAX_FILENAME_LENGTH
1168
- TODO_USE(rootDirname); // <- TODO: !!!!!!
1175
+ TODO_USE(rootDirname); // <- TODO: [😡]
1169
1176
  cacheFilename = path.join.apply(void 0, __spreadArray(__spreadArray([process.cwd(),
1170
1177
  cacheDirname], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), [name], false)).split('\\')
1171
1178
  .join('/') +
@@ -1185,7 +1192,7 @@
1185
1192
  return __generator(this, function (_a) {
1186
1193
  switch (_a.label) {
1187
1194
  case 0:
1188
- if (!isCacheCleaned) return [3 /*break*/, 2];
1195
+ if (!(intermediateFilesStrategy === 'HIDE_AND_CLEAN')) return [3 /*break*/, 2];
1189
1196
  if (isVerbose) {
1190
1197
  console.info('legacyDocumentScraper: Clening cache');
1191
1198
  }
@@ -1209,7 +1216,7 @@
1209
1216
  /**
1210
1217
  * Note: Not using `FileCacheStorage` for two reasons:
1211
1218
  * 1) Need to store more than serialized JSONs
1212
- * 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO: !!!!
1219
+ * 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO: [😡]
1213
1220
  * TODO: [🐱‍🐉][🧠] Make some smart crop
1214
1221
  * Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
1215
1222
  */
@@ -1274,7 +1281,6 @@
1274
1281
  pipelineString += '\n\n';
1275
1282
  pipelineString += description;
1276
1283
  }
1277
- // TODO:> const commands: Array<Command>
1278
1284
  var commands = [];
1279
1285
  if (pipelineUrl) {
1280
1286
  commands.push("PIPELINE URL ".concat(pipelineUrl));
@@ -1330,7 +1336,6 @@
1330
1336
  pipelineString += '\n\n';
1331
1337
  pipelineString += description_1;
1332
1338
  }
1333
- // TODO:> const commands: Array<Command>
1334
1339
  var commands_1 = [];
1335
1340
  var contentLanguage = 'text';
1336
1341
  if (templateType === 'PROMPT_TEMPLATE') {
@@ -2215,6 +2220,7 @@
2215
2220
  }
2216
2221
  }
2217
2222
  /**
2223
+ * TODO: [🐚] This function should be removed OR changed OR be completely rewritten
2218
2224
  * TODO: [🧠] Can this return type be better typed than void
2219
2225
  */
2220
2226
 
@@ -2400,8 +2406,7 @@
2400
2406
  * Multiple LLM Execution Tools is a proxy server that uses multiple execution tools internally and exposes the executor interface externally.
2401
2407
  *
2402
2408
  * Note: Internal utility of `joinLlmExecutionTools` but exposed type
2403
- * @public exported from `@promptbook/types`
2404
- * TODO: !!!!!! Export as runtime class not just type
2409
+ * @public exported from `@promptbook/core`
2405
2410
  */
2406
2411
  var MultipleLlmExecutionTools = /** @class */ (function () {
2407
2412
  /**
@@ -2903,7 +2908,7 @@
2903
2908
  return __generator(this, function (_d) {
2904
2909
  switch (_d.label) {
2905
2910
  case 0:
2906
- _a = options.isVerbose, isVerbose = _a === void 0 ? IS_VERBOSE : _a;
2911
+ _a = options.isVerbose, isVerbose = _a === void 0 ? DEFAULT_IS_VERBOSE : _a;
2907
2912
  if (tools === undefined || tools.llm === undefined) {
2908
2913
  throw new MissingToolsError('LLM tools are required for preparing persona');
2909
2914
  }
@@ -3069,7 +3074,7 @@
3069
3074
  this.storage = globalScope[storageName];
3070
3075
  }
3071
3076
  $Register.prototype.list = function () {
3072
- // <- TODO: ReadonlyDeep<Array<TRegistered>>
3077
+ // <- TODO: ReadonlyDeep<ReadonlyArray<TRegistered>>
3073
3078
  return this.storage;
3074
3079
  };
3075
3080
  $Register.prototype.register = function (registered) {
@@ -3121,7 +3126,7 @@
3121
3126
  * TODO: [®] DRY Register logic
3122
3127
  */
3123
3128
 
3124
- // TODO: !!!!!! Maybe delete this function
3129
+ // TODO: !!!!!!last - Maybe delete this function
3125
3130
  /**
3126
3131
  * Creates a message with all registered scrapers
3127
3132
  *
@@ -3229,7 +3234,6 @@
3229
3234
  * @private within the repository
3230
3235
  */
3231
3236
  function sourceContentToName(sourceContent) {
3232
- // TODO: !!!!!! Better name for source than gibberish hash
3233
3237
  var hash = cryptoJs.SHA256(hexEncoder__default["default"].parse(JSON.stringify(sourceContent)))
3234
3238
  // <- TODO: [🥬] Encapsulate sha256 to some private utility function
3235
3239
  .toString( /* hex */)
@@ -3270,7 +3274,7 @@
3270
3274
  case 0:
3271
3275
  sourceContent = knowledgeSource.sourceContent;
3272
3276
  name = knowledgeSource.name;
3273
- _b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ? IS_VERBOSE : _d;
3277
+ _b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ? DEFAULT_IS_VERBOSE : _d;
3274
3278
  TODO_USE(isVerbose);
3275
3279
  if (!name) {
3276
3280
  name = sourceContentToName(sourceContent);
@@ -3360,7 +3364,7 @@
3360
3364
  content = _a.sent();
3361
3365
  return [2 /*return*/, new Blob([
3362
3366
  content,
3363
- // <- TODO: !!!!!! Maybe not working
3367
+ // <- TODO: !!!!!! Test that this is working
3364
3368
  ], { type: mimeType_1 })];
3365
3369
  }
3366
3370
  });
@@ -3423,7 +3427,7 @@
3423
3427
  return __generator(this, function (_c) {
3424
3428
  switch (_c.label) {
3425
3429
  case 0:
3426
- _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, rootDirname = options.rootDirname, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
3430
+ _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, rootDirname = options.rootDirname, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
3427
3431
  knowledgePreparedUnflatten = new Array(knowledgeSources.length);
3428
3432
  return [4 /*yield*/, forEachAsync(knowledgeSources, { maxParallelCount: maxParallelCount }, function (knowledgeSource, index) { return __awaiter(_this, void 0, void 0, function () {
3429
3433
  var partialPieces, sourceHandler, _a, _b, scraper, partialPiecesUnchecked, e_1_1, pieces;
@@ -3452,7 +3456,8 @@
3452
3456
  case 4:
3453
3457
  partialPiecesUnchecked = _d.sent();
3454
3458
  if (partialPiecesUnchecked !== null) {
3455
- partialPieces = partialPiecesUnchecked;
3459
+ partialPieces = __spreadArray([], __read(partialPiecesUnchecked), false);
3460
+ // <- TODO: [🪓] Here should be no need for spreading new array, just `partialPieces = partialPiecesUnchecked`
3456
3461
  return [3 /*break*/, 6];
3457
3462
  }
3458
3463
  _d.label = 5;
@@ -3500,7 +3505,7 @@
3500
3505
  > /**
3501
3506
  > * Unprepared knowledge
3502
3507
  > * /
3503
- > readonly knowledgeSources: Array<KnowledgeSourceJson>;
3508
+ > readonly knowledgeSources: ReadonlyArray<KnowledgeSourceJson>;
3504
3509
  > };
3505
3510
  >
3506
3511
  > export async function prepareKnowledgePieces(
@@ -3558,7 +3563,7 @@
3558
3563
  return __generator(this, function (_b) {
3559
3564
  switch (_b.label) {
3560
3565
  case 0:
3561
- _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a;
3566
+ _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a;
3562
3567
  templates = pipeline.templates, parameters = pipeline.parameters, knowledgePiecesCount = pipeline.knowledgePiecesCount;
3563
3568
  // TODO: [main] !!!!! Apply samples to each template (if missing and is for the template defined)
3564
3569
  TODO_USE(parameters);
@@ -3620,7 +3625,7 @@
3620
3625
  if (isPipelinePrepared(pipeline)) {
3621
3626
  return [2 /*return*/, pipeline];
3622
3627
  }
3623
- rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? IS_VERBOSE : _b;
3628
+ rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
3624
3629
  parameters = pipeline.parameters, templates = pipeline.templates, knowledgeSources = pipeline.knowledgeSources, personas = pipeline.personas;
3625
3630
  if (tools === undefined || tools.llm === undefined) {
3626
3631
  throw new MissingToolsError('LLM tools are required for preparing the pipeline');
@@ -3678,7 +3683,9 @@
3678
3683
  // ----- /Templates preparation -----
3679
3684
  // Note: Count total usage
3680
3685
  currentPreparation.usage = llmToolsWithUsage.getTotalUsage();
3681
- return [2 /*return*/, $asDeeplyFrozenSerializableJson('Prepared PipelineJson', __assign(__assign({}, clonePipeline(pipeline)), { templates: templatesPrepared, knowledgeSources: knowledgeSourcesPrepared, knowledgePieces: knowledgePiecesPrepared, personas: preparedPersonas, preparations: preparations }))];
3686
+ return [2 /*return*/, $asDeeplyFrozenSerializableJson('Prepared PipelineJson', __assign(__assign({}, clonePipeline(pipeline)), { templates: __spreadArray([], __read(templatesPrepared), false),
3687
+ // <- TODO: [🪓] Here should be no need for spreading new array, just ` templates: templatesPrepared`
3688
+ knowledgeSources: knowledgeSourcesPrepared, knowledgePieces: knowledgePiecesPrepared, personas: preparedPersonas, preparations: __spreadArray([], __read(preparations), false) }))];
3682
3689
  }
3683
3690
  });
3684
3691
  });
@@ -4649,12 +4656,11 @@
4649
4656
  */
4650
4657
  function executeAttempts(options) {
4651
4658
  return __awaiter(this, void 0, void 0, function () {
4652
- var jokerParameterNames, priority, maxAttempts, preparedContent, parameters, template, preparedPipeline, tools, settings, $executionReport, pipelineIdentification, maxExecutionAttempts, $ongoingTemplateResult, _llms, llmTools, _loop_1, attempt, state_1;
4659
+ var jokerParameterNames, priority, maxAttempts, preparedContent, parameters, template, preparedPipeline, tools, $executionReport, pipelineIdentification, maxExecutionAttempts, $ongoingTemplateResult, _llms, llmTools, _loop_1, attempt, state_1;
4653
4660
  return __generator(this, function (_a) {
4654
4661
  switch (_a.label) {
4655
4662
  case 0:
4656
- jokerParameterNames = options.jokerParameterNames, priority = options.priority, maxAttempts = options.maxAttempts, preparedContent = options.preparedContent, parameters = options.parameters, template = options.template, preparedPipeline = options.preparedPipeline, tools = options.tools, settings = options.settings, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification;
4657
- maxExecutionAttempts = settings.maxExecutionAttempts;
4663
+ jokerParameterNames = options.jokerParameterNames, priority = options.priority, maxAttempts = options.maxAttempts, preparedContent = options.preparedContent, parameters = options.parameters, template = options.template, preparedPipeline = options.preparedPipeline, tools = options.tools, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification, maxExecutionAttempts = options.maxExecutionAttempts;
4658
4664
  $ongoingTemplateResult = {
4659
4665
  $result: null,
4660
4666
  $resultString: null,
@@ -5020,12 +5026,12 @@
5020
5026
  */
5021
5027
  function executeFormatSubvalues(options) {
5022
5028
  return __awaiter(this, void 0, void 0, function () {
5023
- var template, jokerParameterNames, parameters, priority, pipelineIdentification, settings, parameterValue, formatDefinition, subvalueDefinition, formatSettings, resultString;
5029
+ var template, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification, parameterValue, formatDefinition, subvalueDefinition, formatSettings, resultString;
5024
5030
  var _this = this;
5025
5031
  return __generator(this, function (_a) {
5026
5032
  switch (_a.label) {
5027
5033
  case 0:
5028
- template = options.template, jokerParameterNames = options.jokerParameterNames, parameters = options.parameters, priority = options.priority, pipelineIdentification = options.pipelineIdentification, settings = options.settings;
5034
+ template = options.template, jokerParameterNames = options.jokerParameterNames, parameters = options.parameters, priority = options.priority, csvSettings = options.csvSettings, pipelineIdentification = options.pipelineIdentification;
5029
5035
  if (template.foreach === undefined) {
5030
5036
  return [2 /*return*/, /* not await */ executeAttempts(options)];
5031
5037
  }
@@ -5055,7 +5061,7 @@
5055
5061
  .join('\n')), "\n\n [\u26F7] This should never happen because subformat name should be validated during parsing\n\n ").concat(block(pipelineIdentification), "\n "); }));
5056
5062
  }
5057
5063
  if (formatDefinition.formatName === 'CSV') {
5058
- formatSettings = settings.csvSettings;
5064
+ formatSettings = csvSettings;
5059
5065
  // <- TODO: [🤹‍♂️] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
5060
5066
  }
5061
5067
  return [4 /*yield*/, subvalueDefinition.mapValues(parameterValue, template.foreach.outputSubparameterName, formatSettings, function (subparameters, index) { return __awaiter(_this, void 0, void 0, function () {
@@ -5208,13 +5214,12 @@
5208
5214
  */
5209
5215
  function executeTemplate(options) {
5210
5216
  return __awaiter(this, void 0, void 0, function () {
5211
- var currentTemplate, preparedPipeline, parametersToPass, tools, onProgress, settings, $executionReport, pipelineIdentification, maxExecutionAttempts, name, title, priority, usedParameterNames, dependentParameterNames, definedParameters, _a, _b, _c, definedParameterNames, parameters, _loop_1, _d, _e, parameterName, maxAttempts, jokerParameterNames, preparedContent, resultString;
5212
- var e_1, _f, _g;
5213
- return __generator(this, function (_h) {
5214
- switch (_h.label) {
5217
+ var currentTemplate, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, _a, maxExecutionAttempts, name, title, priority, usedParameterNames, dependentParameterNames, definedParameters, _b, _c, _d, definedParameterNames, parameters, _loop_1, _e, _f, parameterName, maxAttempts, jokerParameterNames, preparedContent, resultString;
5218
+ var e_1, _g, _h;
5219
+ return __generator(this, function (_j) {
5220
+ switch (_j.label) {
5215
5221
  case 0:
5216
- currentTemplate = options.currentTemplate, preparedPipeline = options.preparedPipeline, parametersToPass = options.parametersToPass, tools = options.tools, onProgress = options.onProgress, settings = options.settings, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification;
5217
- maxExecutionAttempts = settings.maxExecutionAttempts;
5222
+ currentTemplate = options.currentTemplate, preparedPipeline = options.preparedPipeline, parametersToPass = options.parametersToPass, tools = options.tools, onProgress = options.onProgress, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification, _a = options.maxExecutionAttempts, maxExecutionAttempts = _a === void 0 ? DEFAULT_MAX_EXECUTION_ATTEMPTS : _a;
5218
5223
  name = "pipeline-executor-frame-".concat(currentTemplate.name);
5219
5224
  title = currentTemplate.title;
5220
5225
  priority = preparedPipeline.templates.length - preparedPipeline.templates.indexOf(currentTemplate);
@@ -5229,7 +5234,7 @@
5229
5234
  // <- [🍸]
5230
5235
  })];
5231
5236
  case 1:
5232
- _h.sent();
5237
+ _j.sent();
5233
5238
  usedParameterNames = extractParameterNamesFromTemplate(currentTemplate);
5234
5239
  dependentParameterNames = new Set(currentTemplate.dependentParameterNames);
5235
5240
  // TODO: [👩🏾‍🤝‍👩🏻] Use here `mapAvailableToExpectedParameters`
@@ -5240,15 +5245,15 @@
5240
5245
  .map(function (name) { return "{".concat(name, "}"); })
5241
5246
  .join(', '), "\n\n ").concat(block(pipelineIdentification), "\n\n "); }));
5242
5247
  }
5243
- _b = (_a = Object).freeze;
5244
- _c = [{}];
5248
+ _c = (_b = Object).freeze;
5249
+ _d = [{}];
5245
5250
  return [4 /*yield*/, getReservedParametersForTemplate({
5246
5251
  preparedPipeline: preparedPipeline,
5247
5252
  template: currentTemplate,
5248
5253
  pipelineIdentification: pipelineIdentification,
5249
5254
  })];
5250
5255
  case 2:
5251
- definedParameters = _b.apply(_a, [__assign.apply(void 0, [__assign.apply(void 0, _c.concat([(_h.sent())])), parametersToPass])]);
5256
+ definedParameters = _c.apply(_b, [__assign.apply(void 0, [__assign.apply(void 0, _d.concat([(_j.sent())])), parametersToPass])]);
5252
5257
  definedParameterNames = new Set(Object.keys(definedParameters));
5253
5258
  parameters = {};
5254
5259
  _loop_1 = function (parameterName) {
@@ -5268,15 +5273,15 @@
5268
5273
  try {
5269
5274
  // Note: [2] Check that all used parameters are defined and removing unused parameters for this template
5270
5275
  // TODO: [👩🏾‍🤝‍👩🏻] Use here `mapAvailableToExpectedParameters`
5271
- for (_d = __values(Array.from(union(definedParameterNames, usedParameterNames, dependentParameterNames))), _e = _d.next(); !_e.done; _e = _d.next()) {
5272
- parameterName = _e.value;
5276
+ for (_e = __values(Array.from(union(definedParameterNames, usedParameterNames, dependentParameterNames))), _f = _e.next(); !_f.done; _f = _e.next()) {
5277
+ parameterName = _f.value;
5273
5278
  _loop_1(parameterName);
5274
5279
  }
5275
5280
  }
5276
5281
  catch (e_1_1) { e_1 = { error: e_1_1 }; }
5277
5282
  finally {
5278
5283
  try {
5279
- if (_e && !_e.done && (_f = _d.return)) _f.call(_d);
5284
+ if (_f && !_f.done && (_g = _e.return)) _g.call(_e);
5280
5285
  }
5281
5286
  finally { if (e_1) throw e_1.error; }
5282
5287
  }
@@ -5296,12 +5301,11 @@
5296
5301
  template: currentTemplate,
5297
5302
  preparedPipeline: preparedPipeline,
5298
5303
  tools: tools,
5299
- settings: settings,
5300
5304
  $executionReport: $executionReport,
5301
5305
  pipelineIdentification: pipelineIdentification,
5302
5306
  })];
5303
5307
  case 3:
5304
- resultString = _h.sent();
5308
+ resultString = _j.sent();
5305
5309
  return [4 /*yield*/, onProgress({
5306
5310
  name: name,
5307
5311
  title: title,
@@ -5313,12 +5317,12 @@
5313
5317
  // <- [🍸]
5314
5318
  })];
5315
5319
  case 4:
5316
- _h.sent();
5317
- return [2 /*return*/, Object.freeze((_g = {},
5318
- _g[currentTemplate.resultingParameterName] =
5320
+ _j.sent();
5321
+ return [2 /*return*/, Object.freeze((_h = {},
5322
+ _h[currentTemplate.resultingParameterName] =
5319
5323
  // <- Note: [👩‍👩‍👧] No need to detect parameter collision here because pipeline checks logic consistency during construction
5320
5324
  resultString,
5321
- _g))];
5325
+ _h))];
5322
5326
  }
5323
5327
  });
5324
5328
  });
@@ -5377,13 +5381,12 @@
5377
5381
  */
5378
5382
  function executePipeline(options) {
5379
5383
  return __awaiter(this, void 0, void 0, function () {
5380
- var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification, settings, maxParallelCount, rootDirname, _a, isVerbose, preparedPipeline, errors, warnings, executionReport, isReturned, _b, _c, parameter, e_1_1, _loop_1, _d, _e, parameterName, state_1, e_2_1, parametersToPass, resovedParameterNames_1, unresovedTemplates_1, resolving_1, loopLimit, _loop_2, error_1, usage_1, outputParameters_1, usage, outputParameters;
5384
+ var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification, maxParallelCount, rootDirname, _a, isVerbose, preparedPipeline, errors, warnings, executionReport, isReturned, _b, _c, parameter, e_1_1, _loop_1, _d, _e, parameterName, state_1, e_2_1, parametersToPass, resovedParameterNames_1, unresovedTemplates_1, resolving_1, loopLimit, _loop_2, error_1, usage_1, outputParameters_1, usage, outputParameters;
5381
5385
  var e_1, _f, e_2, _g;
5382
5386
  return __generator(this, function (_h) {
5383
5387
  switch (_h.label) {
5384
5388
  case 0:
5385
- inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification, settings = options.settings;
5386
- maxParallelCount = settings.maxParallelCount, rootDirname = settings.rootDirname, _a = settings.isVerbose, isVerbose = _a === void 0 ? IS_VERBOSE : _a;
5389
+ inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification, maxParallelCount = options.maxParallelCount, rootDirname = options.rootDirname, _a = options.isVerbose, isVerbose = _a === void 0 ? DEFAULT_IS_VERBOSE : _a;
5387
5390
  preparedPipeline = options.preparedPipeline;
5388
5391
  if (!(preparedPipeline === undefined)) return [3 /*break*/, 2];
5389
5392
  return [4 /*yield*/, preparePipeline(pipeline, tools, {
@@ -5568,12 +5571,7 @@
5568
5571
  return [3 /*break*/, 4];
5569
5572
  case 3:
5570
5573
  unresovedTemplates_1 = unresovedTemplates_1.filter(function (template) { return template !== currentTemplate; });
5571
- work_1 = executeTemplate({
5572
- currentTemplate: currentTemplate,
5573
- preparedPipeline: preparedPipeline,
5574
- parametersToPass: parametersToPass,
5575
- tools: tools,
5576
- onProgress: function (progress) {
5574
+ work_1 = executeTemplate(__assign(__assign({}, options), { currentTemplate: currentTemplate, preparedPipeline: preparedPipeline, parametersToPass: parametersToPass, tools: tools, onProgress: function (progress) {
5577
5575
  if (isReturned) {
5578
5576
  throw new UnexpectedError(spaceTrim.spaceTrim(function (block) { return "\n Can not call `onProgress` after pipeline execution is finished\n\n ".concat(block(pipelineIdentification), "\n\n ").concat(block(JSON.stringify(progress, null, 4)
5579
5577
  .split('\n')
@@ -5583,11 +5581,7 @@
5583
5581
  if (onProgress) {
5584
5582
  onProgress(progress);
5585
5583
  }
5586
- },
5587
- settings: settings,
5588
- $executionReport: executionReport,
5589
- pipelineIdentification: spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n Template name: ").concat(currentTemplate.name, "\n Template title: ").concat(currentTemplate.title, "\n "); }),
5590
- })
5584
+ }, $executionReport: executionReport, pipelineIdentification: spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n Template name: ").concat(currentTemplate.name, "\n Template title: ").concat(currentTemplate.title, "\n "); }) }))
5591
5585
  .then(function (newParametersToPass) {
5592
5586
  parametersToPass = __assign(__assign({}, newParametersToPass), parametersToPass);
5593
5587
  resovedParameterNames_1 = __spreadArray(__spreadArray([], __read(resovedParameterNames_1), false), [currentTemplate.resultingParameterName], false);
@@ -5691,8 +5685,7 @@
5691
5685
  */
5692
5686
  function createPipelineExecutor(options) {
5693
5687
  var _this = this;
5694
- var pipeline = options.pipeline, tools = options.tools, _a = options.settings, settings = _a === void 0 ? {} : _a;
5695
- var _b = settings.maxExecutionAttempts, maxExecutionAttempts = _b === void 0 ? MAX_EXECUTION_ATTEMPTS : _b, _c = settings.maxParallelCount, maxParallelCount = _c === void 0 ? MAX_PARALLEL_COUNT : _c, _d = settings.csvSettings, csvSettings = _d === void 0 ? DEFAULT_CSV_SETTINGS : _d, _e = settings.isVerbose, isVerbose = _e === void 0 ? IS_VERBOSE : _e, _f = settings.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _f === void 0 ? false : _f, _g = settings.rootDirname, rootDirname = _g === void 0 ? null : _g;
5688
+ var pipeline = options.pipeline, tools = options.tools, _a = options.maxExecutionAttempts, maxExecutionAttempts = _a === void 0 ? DEFAULT_MAX_EXECUTION_ATTEMPTS : _a, _b = options.maxParallelCount, maxParallelCount = _b === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _b, _c = options.csvSettings, csvSettings = _c === void 0 ? DEFAULT_CSV_SETTINGS : _c, _d = options.isVerbose, isVerbose = _d === void 0 ? DEFAULT_IS_VERBOSE : _d, _e = options.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _e === void 0 ? false : _e, _f = options.rootDirname, rootDirname = _f === void 0 ? null : _f;
5696
5689
  validatePipeline(pipeline);
5697
5690
  var pipelineIdentification = (function () {
5698
5691
  // Note: This is a 😐 implementation of [🚞]
@@ -5726,14 +5719,12 @@
5726
5719
  tools: tools,
5727
5720
  onProgress: onProgress,
5728
5721
  pipelineIdentification: spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n ").concat(runCount === 1 ? '' : "Run #".concat(runCount), "\n "); }),
5729
- settings: {
5730
- maxExecutionAttempts: maxExecutionAttempts,
5731
- maxParallelCount: maxParallelCount,
5732
- csvSettings: csvSettings,
5733
- isVerbose: isVerbose,
5734
- isNotPreparedWarningSupressed: isNotPreparedWarningSupressed,
5735
- rootDirname: rootDirname,
5736
- },
5722
+ maxExecutionAttempts: maxExecutionAttempts,
5723
+ maxParallelCount: maxParallelCount,
5724
+ csvSettings: csvSettings,
5725
+ isVerbose: isVerbose,
5726
+ isNotPreparedWarningSupressed: isNotPreparedWarningSupressed,
5727
+ rootDirname: rootDirname,
5737
5728
  })];
5738
5729
  });
5739
5730
  }); };
@@ -5755,7 +5746,7 @@
5755
5746
  mimeTypes: ['text/markdown', 'text/plain'],
5756
5747
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5757
5748
  isAvilableInBrowser: true,
5758
- requiredExecutables: ['!!!!!!'],
5749
+ requiredExecutables: [],
5759
5750
  }); /* <- TODO: [🤛] */
5760
5751
  /**
5761
5752
  * Registration of known scraper metadata
@@ -5799,7 +5790,7 @@
5799
5790
  return __generator(this, function (_k) {
5800
5791
  switch (_k.label) {
5801
5792
  case 0:
5802
- _a = this.options, _b = _a.maxParallelCount, maxParallelCount = _b === void 0 ? MAX_PARALLEL_COUNT : _b, _c = _a.isVerbose, isVerbose = _c === void 0 ? IS_VERBOSE : _c;
5793
+ _a = this.options, _b = _a.maxParallelCount, maxParallelCount = _b === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _b, _c = _a.isVerbose, isVerbose = _c === void 0 ? DEFAULT_IS_VERBOSE : _c;
5803
5794
  llm = this.tools.llm;
5804
5795
  if (llm === undefined) {
5805
5796
  throw new MissingToolsError('LLM tools are required for scraping external files');
@@ -5898,7 +5889,8 @@
5898
5889
  embeddingResult = _c.sent();
5899
5890
  index.push({
5900
5891
  modelName: embeddingResult.modelName,
5901
- position: embeddingResult.content,
5892
+ position: __spreadArray([], __read(embeddingResult.content), false),
5893
+ // <- TODO: [🪓] Here should be no need for spreading new array, just `position: embeddingResult.content`
5902
5894
  });
5903
5895
  _c.label = 6;
5904
5896
  case 6: return [3 /*break*/, 8];
@@ -5949,7 +5941,7 @@
5949
5941
  mimeTypes: ['application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
5950
5942
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
5951
5943
  isAvilableInBrowser: false,
5952
- requiredExecutables: ['!!!!!!'],
5944
+ requiredExecutables: ['Pandoc'],
5953
5945
  }); /* <- TODO: [🤛] */
5954
5946
  /**
5955
5947
  * Registration of known scraper metadata
@@ -5989,12 +5981,13 @@
5989
5981
  * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
5990
5982
  */
5991
5983
  DocumentScraper.prototype.$convert = function (source) {
5984
+ var _a;
5992
5985
  return __awaiter(this, void 0, void 0, function () {
5993
- var _a, _b, externalProgramsPaths, _c, rootDirname, _d, cacheDirname, _e, isCacheCleaned, _f, isVerbose, extension, cacheFilehandler, command_1;
5986
+ var _b, _c, rootDirname, _d, cacheDirname, _e, intermediateFilesStrategy, _f, isVerbose, extension, cacheFilehandler, command_1;
5994
5987
  return __generator(this, function (_g) {
5995
5988
  switch (_g.label) {
5996
5989
  case 0:
5997
- _a = this.options, _b = _a.externalProgramsPaths, externalProgramsPaths = _b === void 0 ? {} : _b, _c = _a.rootDirname, rootDirname = _c === void 0 ? process.cwd() : _c, _d = _a.cacheDirname, cacheDirname = _d === void 0 ? SCRAPE_CACHE_DIRNAME : _d, _e = _a.isCacheCleaned, isCacheCleaned = _e === void 0 ? false : _e, _f = _a.isVerbose, isVerbose = _f === void 0 ? IS_VERBOSE : _f;
5990
+ _b = this.options, _c = _b.rootDirname, rootDirname = _c === void 0 ? process.cwd() : _c, _d = _b.cacheDirname, cacheDirname = _d === void 0 ? DEFAULT_SCRAPE_CACHE_DIRNAME : _d, _e = _b.intermediateFilesStrategy, intermediateFilesStrategy = _e === void 0 ? DEFAULT_INTERMEDIATE_FILES_STRATEGY : _e, _f = _b.isVerbose, isVerbose = _f === void 0 ? DEFAULT_IS_VERBOSE : _f;
5998
5991
  if (!$isRunningInNode()) {
5999
5992
  throw new KnowledgeScrapeError('Scraping .docx files is only supported in Node environment');
6000
5993
  }
@@ -6002,7 +5995,7 @@
6002
5995
  throw new EnvironmentMismatchError('Can not scrape documents without filesystem tools');
6003
5996
  // <- TODO: [🧠] What is the best error type here`
6004
5997
  }
6005
- if (externalProgramsPaths.pandocPath === undefined) {
5998
+ if (((_a = this.tools.executables) === null || _a === void 0 ? void 0 : _a.pandocPath) === undefined) {
6006
5999
  throw new MissingToolsError('Pandoc is required for scraping .docx files');
6007
6000
  }
6008
6001
  if (source.filename === null) {
@@ -6013,7 +6006,7 @@
6013
6006
  return [4 /*yield*/, getScraperIntermediateSource(source, {
6014
6007
  rootDirname: rootDirname,
6015
6008
  cacheDirname: cacheDirname,
6016
- isCacheCleaned: isCacheCleaned,
6009
+ intermediateFilesStrategy: intermediateFilesStrategy,
6017
6010
  extension: 'md',
6018
6011
  isVerbose: isVerbose,
6019
6012
  })];
@@ -6022,11 +6015,9 @@
6022
6015
  return [4 /*yield*/, isFileExisting(cacheFilehandler.filename, this.tools.fs)];
6023
6016
  case 2:
6024
6017
  if (!!(_g.sent())) return [3 /*break*/, 5];
6025
- command_1 = "\"".concat(externalProgramsPaths.pandocPath, "\" -f ").concat(extension, " -t markdown \"").concat(source.filename, "\" -o \"").concat(cacheFilehandler.filename, "\"");
6026
- // TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook
6018
+ command_1 = "\"".concat(this.tools.executables.pandocPath, "\" -f ").concat(extension, " -t markdown \"").concat(source.filename, "\" -o \"").concat(cacheFilehandler.filename, "\"");
6027
6019
  return [4 /*yield*/, $execCommand(command_1)];
6028
6020
  case 3:
6029
- // TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook
6030
6021
  _g.sent();
6031
6022
  return [4 /*yield*/, isFileExisting(cacheFilehandler.filename, this.tools.fs)];
6032
6023
  case 4:
@@ -6105,7 +6096,11 @@
6105
6096
  mimeTypes: ['application/msword', 'text/rtf'],
6106
6097
  documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
6107
6098
  isAvilableInBrowser: false,
6108
- requiredExecutables: ['!!!!!!'],
6099
+ requiredExecutables: [
6100
+ 'Pandoc',
6101
+ 'LibreOffice',
6102
+ // <- TODO: [🧠] Should be 'LibreOffice' here, its dependency of dependency
6103
+ ],
6109
6104
  }); /* <- TODO: [🤛] */
6110
6105
  /**
6111
6106
  * Registration of known scraper metadata
@@ -6118,7 +6113,7 @@
6118
6113
  $scrapersMetadataRegister.register(legacyDocumentScraperMetadata);
6119
6114
 
6120
6115
  /**
6121
- * Scraper for .docx files
6116
+ * Scraper for old document files (like .doc and .rtf)
6122
6117
  *
6123
6118
  * @see `documentationUrl` for more details
6124
6119
  * @public exported from `@promptbook/legacy-documents`
@@ -6145,12 +6140,13 @@
6145
6140
  * Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
6146
6141
  */
6147
6142
  LegacyDocumentScraper.prototype.$convert = function (source) {
6143
+ var _a;
6148
6144
  return __awaiter(this, void 0, void 0, function () {
6149
- var _a, _b, externalProgramsPaths, _c, rootDirname, _d, cacheDirname, _e, isCacheCleaned, _f, isVerbose, extension, cacheFilehandler, documentSourceOutdirPathForLibreOffice_1, command_1, files_1, file;
6145
+ var _b, _c, rootDirname, _d, cacheDirname, _e, intermediateFilesStrategy, _f, isVerbose, extension, cacheFilehandler, documentSourceOutdirPathForLibreOffice_1, command_1, files_1, file;
6150
6146
  return __generator(this, function (_g) {
6151
6147
  switch (_g.label) {
6152
6148
  case 0:
6153
- _a = this.options, _b = _a.externalProgramsPaths, externalProgramsPaths = _b === void 0 ? {} : _b, _c = _a.rootDirname, rootDirname = _c === void 0 ? process.cwd() : _c, _d = _a.cacheDirname, cacheDirname = _d === void 0 ? SCRAPE_CACHE_DIRNAME : _d, _e = _a.isCacheCleaned, isCacheCleaned = _e === void 0 ? false : _e, _f = _a.isVerbose, isVerbose = _f === void 0 ? IS_VERBOSE : _f;
6149
+ _b = this.options, _c = _b.rootDirname, rootDirname = _c === void 0 ? process.cwd() : _c, _d = _b.cacheDirname, cacheDirname = _d === void 0 ? DEFAULT_SCRAPE_CACHE_DIRNAME : _d, _e = _b.intermediateFilesStrategy, intermediateFilesStrategy = _e === void 0 ? DEFAULT_INTERMEDIATE_FILES_STRATEGY : _e, _f = _b.isVerbose, isVerbose = _f === void 0 ? DEFAULT_IS_VERBOSE : _f;
6154
6150
  if (!$isRunningInNode()) {
6155
6151
  throw new KnowledgeScrapeError('Scraping .doc files is only supported in Node environment');
6156
6152
  }
@@ -6158,7 +6154,7 @@
6158
6154
  throw new EnvironmentMismatchError('Can not scrape (legacy) documents without filesystem tools');
6159
6155
  // <- TODO: [🧠] What is the best error type here`
6160
6156
  }
6161
- if (externalProgramsPaths.libreOfficePath === undefined) {
6157
+ if (((_a = this.tools.executables) === null || _a === void 0 ? void 0 : _a.libreOfficePath) === undefined) {
6162
6158
  throw new MissingToolsError('LibreOffice is required for scraping .doc and .rtf files');
6163
6159
  }
6164
6160
  if (source.filename === null) {
@@ -6169,7 +6165,7 @@
6169
6165
  return [4 /*yield*/, getScraperIntermediateSource(source, {
6170
6166
  rootDirname: rootDirname,
6171
6167
  cacheDirname: cacheDirname,
6172
- isCacheCleaned: isCacheCleaned,
6168
+ intermediateFilesStrategy: intermediateFilesStrategy,
6173
6169
  extension: 'docx',
6174
6170
  isVerbose: isVerbose,
6175
6171
  })];
@@ -6184,11 +6180,9 @@
6184
6180
  documentSourceOutdirPathForLibreOffice_1 = path.join(path.dirname(cacheFilehandler.filename), 'libreoffice')
6185
6181
  .split('\\')
6186
6182
  .join('/');
6187
- command_1 = "\"".concat(externalProgramsPaths.libreOfficePath, "\" --headless --convert-to docx \"").concat(source.filename, "\" --outdir \"").concat(documentSourceOutdirPathForLibreOffice_1, "\"");
6188
- // TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook - this should trigger build polution error
6183
+ command_1 = "\"".concat(this.tools.executables.libreOfficePath, "\" --headless --convert-to docx \"").concat(source.filename, "\" --outdir \"").concat(documentSourceOutdirPathForLibreOffice_1, "\"");
6189
6184
  return [4 /*yield*/, $execCommand(command_1)];
6190
6185
  case 3:
6191
- // TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook - this should trigger build polution error
6192
6186
  _g.sent();
6193
6187
  return [4 /*yield*/, promises.readdir(documentSourceOutdirPathForLibreOffice_1)];
6194
6188
  case 4: