@promptbook/legacy-documents 0.72.0-13 → 0.72.0-15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/esm/index.es.js +89 -95
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +22 -18
- package/esm/typings/src/_packages/node.index.d.ts +2 -0
- package/esm/typings/src/_packages/types.index.d.ts +10 -10
- package/esm/typings/src/_packages/utils.index.d.ts +2 -2
- package/esm/typings/src/collection/PipelineCollection.d.ts +1 -1
- package/esm/typings/src/collection/SimplePipelineCollection.d.ts +2 -2
- package/esm/typings/src/collection/collectionToJson.d.ts +1 -1
- package/esm/typings/src/collection/constructors/createCollectionFromJson.d.ts +1 -1
- package/esm/typings/src/collection/constructors/createCollectionFromPromise.d.ts +1 -1
- package/esm/typings/src/commands/_common/types/CommandParser.d.ts +5 -5
- package/esm/typings/src/config.d.ts +21 -14
- package/esm/typings/src/execution/EmbeddingVector.d.ts +1 -1
- package/esm/typings/src/execution/Executables.d.ts +18 -0
- package/esm/typings/src/execution/ExecutionTools.d.ts +9 -3
- package/esm/typings/src/execution/LlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/execution/PipelineExecutorResult.d.ts +2 -2
- package/esm/typings/src/execution/assertsExecutionSuccessful.d.ts +1 -0
- package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorOptions.d.ts +29 -6
- package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +2 -11
- package/esm/typings/src/execution/createPipelineExecutor/20-executeTemplate.d.ts +4 -13
- package/esm/typings/src/execution/createPipelineExecutor/40-executeAttempts.d.ts +9 -14
- package/esm/typings/src/execution/translation/automatic-translate/automatic-translators/LindatAutomaticTranslator.d.ts +11 -3
- package/esm/typings/src/execution/utils/addUsage.d.ts +1 -1
- package/esm/typings/src/execution/utils/forEachAsync.d.ts +1 -1
- package/esm/typings/src/formats/_common/FormatDefinition.d.ts +2 -2
- package/esm/typings/src/formats/_common/FormatSubvalueDefinition.d.ts +2 -2
- package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsForCli.d.ts +2 -2
- package/esm/typings/src/llm-providers/_common/register/LlmToolsConfiguration.d.ts +1 -1
- package/esm/typings/src/llm-providers/_common/register/createLlmToolsFromConfiguration.d.ts +7 -0
- package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +4 -1
- package/esm/typings/src/llm-providers/anthropic-claude/AnthropicClaudeExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/anthropic-claude/anthropic-claude-models.d.ts +1 -1
- package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/mocked/$fakeTextToExpectations.d.ts +1 -1
- package/esm/typings/src/llm-providers/mocked/MockedEchoLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/mocked/MockedFackedLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/multiple/MultipleLlmExecutionTools.d.ts +4 -5
- package/esm/typings/src/llm-providers/multiple/joinLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/openai/OpenAiExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/openai/openai-models.d.ts +1 -1
- package/esm/typings/src/llm-providers/remote/RemoteLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/remote/interfaces/PromptbookServer_ListModels_Response.d.ts +3 -3
- package/esm/typings/src/llm-providers/remote/interfaces/PromptbookServer_Prompt_Response.d.ts +2 -2
- package/esm/typings/src/prepare/PrepareAndScrapeOptions.d.ts +4 -23
- package/esm/typings/src/prepare/prepareTemplates.d.ts +1 -1
- package/esm/typings/src/scrapers/_common/Scraper.d.ts +1 -5
- package/esm/typings/src/scrapers/_common/prepareKnowledgePieces.d.ts +1 -1
- package/esm/typings/src/scrapers/_common/register/$provideExecutablesForNode.d.ts +12 -0
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +2 -2
- package/esm/typings/src/scrapers/document/DocumentScraper.d.ts +2 -2
- package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/document/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/document-legacy/LegacyDocumentScraper.d.ts +3 -3
- package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/document-legacy/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/markdown/MarkdownScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/markdown/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/pdf/PdfScraper.d.ts +2 -2
- package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/pdf/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/website/WebsiteScraper.d.ts +6 -3
- package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/website/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/website/utils/createShowdownConverter.d.ts +7 -0
- package/esm/typings/src/scripting/javascript/utils/preserve.d.ts +1 -1
- package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +1 -1
- package/esm/typings/src/types/Arrayable.d.ts +1 -1
- package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +7 -0
- package/esm/typings/src/types/PipelineJson/KnowledgePieceJson.d.ts +4 -4
- package/esm/typings/src/types/PipelineJson/KnowledgeSourceJson.d.ts +1 -1
- package/esm/typings/src/types/PipelineJson/PersonaJson.d.ts +1 -1
- package/esm/typings/src/types/PipelineJson/TemplateJsonCommon.d.ts +2 -2
- package/esm/typings/src/types/Prompt.d.ts +1 -1
- package/esm/typings/src/types/execution-report/ExecutionReportJson.d.ts +1 -1
- package/esm/typings/src/utils/$Register.d.ts +1 -1
- package/esm/typings/src/utils/FromtoItems.d.ts +1 -1
- package/esm/typings/src/utils/arrayableToArray.d.ts +1 -1
- package/esm/typings/src/utils/emojis.d.ts +1 -1
- package/esm/typings/src/utils/execCommand/$execCommand.d.ts +2 -2
- package/esm/typings/src/utils/execCommand/{IExecCommandOptions.d.ts → ExecCommandOptions.d.ts} +2 -6
- package/esm/typings/src/utils/execCommand/execCommandNormalizeOptions.d.ts +3 -3
- package/esm/typings/src/utils/expectation-counters/countSentences.d.ts +1 -1
- package/esm/typings/src/utils/markdown/extractAllBlocksFromMarkdown.d.ts +1 -1
- package/esm/typings/src/utils/markdown/splitMarkdownIntoSections.d.ts +1 -1
- package/esm/typings/src/utils/normalization/IKeywords.d.ts +2 -2
- package/esm/typings/src/utils/normalization/parseKeywords.d.ts +2 -2
- package/esm/typings/src/utils/normalization/parseKeywordsFromString.d.ts +2 -2
- package/esm/typings/src/utils/normalization/searchKeywords.d.ts +2 -2
- package/esm/typings/src/utils/organization/TODO_USE.d.ts +1 -1
- package/esm/typings/src/utils/organization/keepUnused.d.ts +1 -1
- package/esm/typings/src/utils/random/$randomSeed.d.ts +1 -1
- package/esm/typings/src/utils/sets/intersection.d.ts +1 -1
- package/esm/typings/src/utils/sets/union.d.ts +1 -1
- package/esm/typings/src/utils/unwrapResult.d.ts +4 -4
- package/package.json +4 -3
- package/umd/index.umd.js +89 -95
- package/umd/index.umd.js.map +1 -1
- package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorSettings.d.ts +0 -29
- package/esm/typings/src/scrapers/website/utils/markdownConverter.d.ts +0 -12
- /package/esm/typings/src/scrapers/website/utils/{markdownConverter.test.d.ts → createShowdownConverter.test.d.ts} +0 -0
package/umd/index.umd.js
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
/**
|
|
16
16
|
* The version of the Promptbook library
|
|
17
17
|
*/
|
|
18
|
-
var PROMPTBOOK_VERSION = '0.72.0-
|
|
18
|
+
var PROMPTBOOK_VERSION = '0.72.0-14';
|
|
19
19
|
// TODO: [main] !!!! List here all the versions and annotate + put into script
|
|
20
20
|
|
|
21
21
|
/*! *****************************************************************************
|
|
@@ -363,18 +363,25 @@
|
|
|
363
363
|
* @private within the repository - too low-level in comparison with other `MAX_...`
|
|
364
364
|
*/
|
|
365
365
|
var IMMEDIATE_TIME = 10;
|
|
366
|
+
/**
|
|
367
|
+
* Strategy for caching the intermediate results for knowledge sources
|
|
368
|
+
*
|
|
369
|
+
* @public exported from `@promptbook/core`
|
|
370
|
+
*/
|
|
371
|
+
var DEFAULT_INTERMEDIATE_FILES_STRATEGY = 'HIDE_AND_KEEP';
|
|
372
|
+
// <- TODO: [😡] Change to 'VISIBLE'
|
|
366
373
|
/**
|
|
367
374
|
* The maximum number of (LLM) tasks running in parallel
|
|
368
375
|
*
|
|
369
376
|
* @public exported from `@promptbook/core`
|
|
370
377
|
*/
|
|
371
|
-
var
|
|
378
|
+
var DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [🤹♂️]
|
|
372
379
|
/**
|
|
373
380
|
* The maximum number of attempts to execute LLM task before giving up
|
|
374
381
|
*
|
|
375
382
|
* @public exported from `@promptbook/core`
|
|
376
383
|
*/
|
|
377
|
-
var
|
|
384
|
+
var DEFAULT_MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹♂️]
|
|
378
385
|
/**
|
|
379
386
|
* Where to store the scrape cache
|
|
380
387
|
*
|
|
@@ -382,7 +389,7 @@
|
|
|
382
389
|
*
|
|
383
390
|
* @public exported from `@promptbook/core`
|
|
384
391
|
*/
|
|
385
|
-
var
|
|
392
|
+
var DEFAULT_SCRAPE_CACHE_DIRNAME = '/.promptbook/scrape-cache';
|
|
386
393
|
/**
|
|
387
394
|
* Nonce which is used for replacing things in strings
|
|
388
395
|
*
|
|
@@ -434,7 +441,7 @@
|
|
|
434
441
|
*
|
|
435
442
|
* @public exported from `@promptbook/core`
|
|
436
443
|
*/
|
|
437
|
-
var
|
|
444
|
+
var DEFAULT_IS_VERBOSE = false;
|
|
438
445
|
/**
|
|
439
446
|
* @@@
|
|
440
447
|
*
|
|
@@ -1150,12 +1157,12 @@
|
|
|
1150
1157
|
*/
|
|
1151
1158
|
function getScraperIntermediateSource(source, options) {
|
|
1152
1159
|
return __awaiter(this, void 0, void 0, function () {
|
|
1153
|
-
var sourceFilename, url, rootDirname, cacheDirname,
|
|
1160
|
+
var sourceFilename, url, rootDirname, cacheDirname, intermediateFilesStrategy, extension, isVerbose, hash, semanticName, pieces, name, cacheFilename, isDestroyed, fileHandler;
|
|
1154
1161
|
return __generator(this, function (_a) {
|
|
1155
1162
|
switch (_a.label) {
|
|
1156
1163
|
case 0:
|
|
1157
1164
|
sourceFilename = source.filename, url = source.url;
|
|
1158
|
-
rootDirname = options.rootDirname, cacheDirname = options.cacheDirname,
|
|
1165
|
+
rootDirname = options.rootDirname, cacheDirname = options.cacheDirname, intermediateFilesStrategy = options.intermediateFilesStrategy, extension = options.extension, isVerbose = options.isVerbose;
|
|
1159
1166
|
hash = cryptoJs.SHA256(
|
|
1160
1167
|
// <- TODO: [🥬] Encapsulate sha256 to some private utility function
|
|
1161
1168
|
hexEncoder__default["default"].parse(sourceFilename || url || 'untitled'))
|
|
@@ -1165,7 +1172,7 @@
|
|
|
1165
1172
|
pieces = ['intermediate', semanticName, hash].filter(function (piece) { return piece !== ''; });
|
|
1166
1173
|
name = pieces.join('-').split('--').join('-');
|
|
1167
1174
|
// <- TODO: Use MAX_FILENAME_LENGTH
|
|
1168
|
-
TODO_USE(rootDirname); // <- TODO:
|
|
1175
|
+
TODO_USE(rootDirname); // <- TODO: [😡]
|
|
1169
1176
|
cacheFilename = path.join.apply(void 0, __spreadArray(__spreadArray([process.cwd(),
|
|
1170
1177
|
cacheDirname], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), [name], false)).split('\\')
|
|
1171
1178
|
.join('/') +
|
|
@@ -1185,7 +1192,7 @@
|
|
|
1185
1192
|
return __generator(this, function (_a) {
|
|
1186
1193
|
switch (_a.label) {
|
|
1187
1194
|
case 0:
|
|
1188
|
-
if (!
|
|
1195
|
+
if (!(intermediateFilesStrategy === 'HIDE_AND_CLEAN')) return [3 /*break*/, 2];
|
|
1189
1196
|
if (isVerbose) {
|
|
1190
1197
|
console.info('legacyDocumentScraper: Clening cache');
|
|
1191
1198
|
}
|
|
@@ -1209,7 +1216,7 @@
|
|
|
1209
1216
|
/**
|
|
1210
1217
|
* Note: Not using `FileCacheStorage` for two reasons:
|
|
1211
1218
|
* 1) Need to store more than serialized JSONs
|
|
1212
|
-
* 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO:
|
|
1219
|
+
* 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO: [😡]
|
|
1213
1220
|
* TODO: [🐱🐉][🧠] Make some smart crop
|
|
1214
1221
|
* Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
|
|
1215
1222
|
*/
|
|
@@ -1274,7 +1281,6 @@
|
|
|
1274
1281
|
pipelineString += '\n\n';
|
|
1275
1282
|
pipelineString += description;
|
|
1276
1283
|
}
|
|
1277
|
-
// TODO:> const commands: Array<Command>
|
|
1278
1284
|
var commands = [];
|
|
1279
1285
|
if (pipelineUrl) {
|
|
1280
1286
|
commands.push("PIPELINE URL ".concat(pipelineUrl));
|
|
@@ -1330,7 +1336,6 @@
|
|
|
1330
1336
|
pipelineString += '\n\n';
|
|
1331
1337
|
pipelineString += description_1;
|
|
1332
1338
|
}
|
|
1333
|
-
// TODO:> const commands: Array<Command>
|
|
1334
1339
|
var commands_1 = [];
|
|
1335
1340
|
var contentLanguage = 'text';
|
|
1336
1341
|
if (templateType === 'PROMPT_TEMPLATE') {
|
|
@@ -2215,6 +2220,7 @@
|
|
|
2215
2220
|
}
|
|
2216
2221
|
}
|
|
2217
2222
|
/**
|
|
2223
|
+
* TODO: [🐚] This function should be removed OR changed OR be completely rewritten
|
|
2218
2224
|
* TODO: [🧠] Can this return type be better typed than void
|
|
2219
2225
|
*/
|
|
2220
2226
|
|
|
@@ -2400,8 +2406,7 @@
|
|
|
2400
2406
|
* Multiple LLM Execution Tools is a proxy server that uses multiple execution tools internally and exposes the executor interface externally.
|
|
2401
2407
|
*
|
|
2402
2408
|
* Note: Internal utility of `joinLlmExecutionTools` but exposed type
|
|
2403
|
-
* @public exported from `@promptbook/
|
|
2404
|
-
* TODO: !!!!!! Export as runtime class not just type
|
|
2409
|
+
* @public exported from `@promptbook/core`
|
|
2405
2410
|
*/
|
|
2406
2411
|
var MultipleLlmExecutionTools = /** @class */ (function () {
|
|
2407
2412
|
/**
|
|
@@ -2903,7 +2908,7 @@
|
|
|
2903
2908
|
return __generator(this, function (_d) {
|
|
2904
2909
|
switch (_d.label) {
|
|
2905
2910
|
case 0:
|
|
2906
|
-
_a = options.isVerbose, isVerbose = _a === void 0 ?
|
|
2911
|
+
_a = options.isVerbose, isVerbose = _a === void 0 ? DEFAULT_IS_VERBOSE : _a;
|
|
2907
2912
|
if (tools === undefined || tools.llm === undefined) {
|
|
2908
2913
|
throw new MissingToolsError('LLM tools are required for preparing persona');
|
|
2909
2914
|
}
|
|
@@ -3069,7 +3074,7 @@
|
|
|
3069
3074
|
this.storage = globalScope[storageName];
|
|
3070
3075
|
}
|
|
3071
3076
|
$Register.prototype.list = function () {
|
|
3072
|
-
// <- TODO: ReadonlyDeep<
|
|
3077
|
+
// <- TODO: ReadonlyDeep<ReadonlyArray<TRegistered>>
|
|
3073
3078
|
return this.storage;
|
|
3074
3079
|
};
|
|
3075
3080
|
$Register.prototype.register = function (registered) {
|
|
@@ -3121,7 +3126,7 @@
|
|
|
3121
3126
|
* TODO: [®] DRY Register logic
|
|
3122
3127
|
*/
|
|
3123
3128
|
|
|
3124
|
-
// TODO: !!!!!! Maybe delete this function
|
|
3129
|
+
// TODO: !!!!!!last - Maybe delete this function
|
|
3125
3130
|
/**
|
|
3126
3131
|
* Creates a message with all registered scrapers
|
|
3127
3132
|
*
|
|
@@ -3229,7 +3234,6 @@
|
|
|
3229
3234
|
* @private within the repository
|
|
3230
3235
|
*/
|
|
3231
3236
|
function sourceContentToName(sourceContent) {
|
|
3232
|
-
// TODO: !!!!!! Better name for source than gibberish hash
|
|
3233
3237
|
var hash = cryptoJs.SHA256(hexEncoder__default["default"].parse(JSON.stringify(sourceContent)))
|
|
3234
3238
|
// <- TODO: [🥬] Encapsulate sha256 to some private utility function
|
|
3235
3239
|
.toString( /* hex */)
|
|
@@ -3270,7 +3274,7 @@
|
|
|
3270
3274
|
case 0:
|
|
3271
3275
|
sourceContent = knowledgeSource.sourceContent;
|
|
3272
3276
|
name = knowledgeSource.name;
|
|
3273
|
-
_b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ?
|
|
3277
|
+
_b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ? DEFAULT_IS_VERBOSE : _d;
|
|
3274
3278
|
TODO_USE(isVerbose);
|
|
3275
3279
|
if (!name) {
|
|
3276
3280
|
name = sourceContentToName(sourceContent);
|
|
@@ -3360,7 +3364,7 @@
|
|
|
3360
3364
|
content = _a.sent();
|
|
3361
3365
|
return [2 /*return*/, new Blob([
|
|
3362
3366
|
content,
|
|
3363
|
-
// <- TODO: !!!!!!
|
|
3367
|
+
// <- TODO: !!!!!! Test that this is working
|
|
3364
3368
|
], { type: mimeType_1 })];
|
|
3365
3369
|
}
|
|
3366
3370
|
});
|
|
@@ -3423,7 +3427,7 @@
|
|
|
3423
3427
|
return __generator(this, function (_c) {
|
|
3424
3428
|
switch (_c.label) {
|
|
3425
3429
|
case 0:
|
|
3426
|
-
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ?
|
|
3430
|
+
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, rootDirname = options.rootDirname, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
|
|
3427
3431
|
knowledgePreparedUnflatten = new Array(knowledgeSources.length);
|
|
3428
3432
|
return [4 /*yield*/, forEachAsync(knowledgeSources, { maxParallelCount: maxParallelCount }, function (knowledgeSource, index) { return __awaiter(_this, void 0, void 0, function () {
|
|
3429
3433
|
var partialPieces, sourceHandler, _a, _b, scraper, partialPiecesUnchecked, e_1_1, pieces;
|
|
@@ -3452,7 +3456,8 @@
|
|
|
3452
3456
|
case 4:
|
|
3453
3457
|
partialPiecesUnchecked = _d.sent();
|
|
3454
3458
|
if (partialPiecesUnchecked !== null) {
|
|
3455
|
-
partialPieces = partialPiecesUnchecked;
|
|
3459
|
+
partialPieces = __spreadArray([], __read(partialPiecesUnchecked), false);
|
|
3460
|
+
// <- TODO: [🪓] Here should be no need for spreading new array, just `partialPieces = partialPiecesUnchecked`
|
|
3456
3461
|
return [3 /*break*/, 6];
|
|
3457
3462
|
}
|
|
3458
3463
|
_d.label = 5;
|
|
@@ -3500,7 +3505,7 @@
|
|
|
3500
3505
|
> /**
|
|
3501
3506
|
> * Unprepared knowledge
|
|
3502
3507
|
> * /
|
|
3503
|
-
> readonly knowledgeSources:
|
|
3508
|
+
> readonly knowledgeSources: ReadonlyArray<KnowledgeSourceJson>;
|
|
3504
3509
|
> };
|
|
3505
3510
|
>
|
|
3506
3511
|
> export async function prepareKnowledgePieces(
|
|
@@ -3558,7 +3563,7 @@
|
|
|
3558
3563
|
return __generator(this, function (_b) {
|
|
3559
3564
|
switch (_b.label) {
|
|
3560
3565
|
case 0:
|
|
3561
|
-
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ?
|
|
3566
|
+
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a;
|
|
3562
3567
|
templates = pipeline.templates, parameters = pipeline.parameters, knowledgePiecesCount = pipeline.knowledgePiecesCount;
|
|
3563
3568
|
// TODO: [main] !!!!! Apply samples to each template (if missing and is for the template defined)
|
|
3564
3569
|
TODO_USE(parameters);
|
|
@@ -3620,7 +3625,7 @@
|
|
|
3620
3625
|
if (isPipelinePrepared(pipeline)) {
|
|
3621
3626
|
return [2 /*return*/, pipeline];
|
|
3622
3627
|
}
|
|
3623
|
-
rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ?
|
|
3628
|
+
rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
|
|
3624
3629
|
parameters = pipeline.parameters, templates = pipeline.templates, knowledgeSources = pipeline.knowledgeSources, personas = pipeline.personas;
|
|
3625
3630
|
if (tools === undefined || tools.llm === undefined) {
|
|
3626
3631
|
throw new MissingToolsError('LLM tools are required for preparing the pipeline');
|
|
@@ -3678,7 +3683,9 @@
|
|
|
3678
3683
|
// ----- /Templates preparation -----
|
|
3679
3684
|
// Note: Count total usage
|
|
3680
3685
|
currentPreparation.usage = llmToolsWithUsage.getTotalUsage();
|
|
3681
|
-
return [2 /*return*/, $asDeeplyFrozenSerializableJson('Prepared PipelineJson', __assign(__assign({}, clonePipeline(pipeline)), { templates:
|
|
3686
|
+
return [2 /*return*/, $asDeeplyFrozenSerializableJson('Prepared PipelineJson', __assign(__assign({}, clonePipeline(pipeline)), { templates: __spreadArray([], __read(templatesPrepared), false),
|
|
3687
|
+
// <- TODO: [🪓] Here should be no need for spreading new array, just ` templates: templatesPrepared`
|
|
3688
|
+
knowledgeSources: knowledgeSourcesPrepared, knowledgePieces: knowledgePiecesPrepared, personas: preparedPersonas, preparations: __spreadArray([], __read(preparations), false) }))];
|
|
3682
3689
|
}
|
|
3683
3690
|
});
|
|
3684
3691
|
});
|
|
@@ -4649,12 +4656,11 @@
|
|
|
4649
4656
|
*/
|
|
4650
4657
|
function executeAttempts(options) {
|
|
4651
4658
|
return __awaiter(this, void 0, void 0, function () {
|
|
4652
|
-
var jokerParameterNames, priority, maxAttempts, preparedContent, parameters, template, preparedPipeline, tools,
|
|
4659
|
+
var jokerParameterNames, priority, maxAttempts, preparedContent, parameters, template, preparedPipeline, tools, $executionReport, pipelineIdentification, maxExecutionAttempts, $ongoingTemplateResult, _llms, llmTools, _loop_1, attempt, state_1;
|
|
4653
4660
|
return __generator(this, function (_a) {
|
|
4654
4661
|
switch (_a.label) {
|
|
4655
4662
|
case 0:
|
|
4656
|
-
jokerParameterNames = options.jokerParameterNames, priority = options.priority, maxAttempts = options.maxAttempts, preparedContent = options.preparedContent, parameters = options.parameters, template = options.template, preparedPipeline = options.preparedPipeline, tools = options.tools,
|
|
4657
|
-
maxExecutionAttempts = settings.maxExecutionAttempts;
|
|
4663
|
+
jokerParameterNames = options.jokerParameterNames, priority = options.priority, maxAttempts = options.maxAttempts, preparedContent = options.preparedContent, parameters = options.parameters, template = options.template, preparedPipeline = options.preparedPipeline, tools = options.tools, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification, maxExecutionAttempts = options.maxExecutionAttempts;
|
|
4658
4664
|
$ongoingTemplateResult = {
|
|
4659
4665
|
$result: null,
|
|
4660
4666
|
$resultString: null,
|
|
@@ -5020,12 +5026,12 @@
|
|
|
5020
5026
|
*/
|
|
5021
5027
|
function executeFormatSubvalues(options) {
|
|
5022
5028
|
return __awaiter(this, void 0, void 0, function () {
|
|
5023
|
-
var template, jokerParameterNames, parameters, priority,
|
|
5029
|
+
var template, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification, parameterValue, formatDefinition, subvalueDefinition, formatSettings, resultString;
|
|
5024
5030
|
var _this = this;
|
|
5025
5031
|
return __generator(this, function (_a) {
|
|
5026
5032
|
switch (_a.label) {
|
|
5027
5033
|
case 0:
|
|
5028
|
-
template = options.template, jokerParameterNames = options.jokerParameterNames, parameters = options.parameters, priority = options.priority,
|
|
5034
|
+
template = options.template, jokerParameterNames = options.jokerParameterNames, parameters = options.parameters, priority = options.priority, csvSettings = options.csvSettings, pipelineIdentification = options.pipelineIdentification;
|
|
5029
5035
|
if (template.foreach === undefined) {
|
|
5030
5036
|
return [2 /*return*/, /* not await */ executeAttempts(options)];
|
|
5031
5037
|
}
|
|
@@ -5055,7 +5061,7 @@
|
|
|
5055
5061
|
.join('\n')), "\n\n [\u26F7] This should never happen because subformat name should be validated during parsing\n\n ").concat(block(pipelineIdentification), "\n "); }));
|
|
5056
5062
|
}
|
|
5057
5063
|
if (formatDefinition.formatName === 'CSV') {
|
|
5058
|
-
formatSettings =
|
|
5064
|
+
formatSettings = csvSettings;
|
|
5059
5065
|
// <- TODO: [🤹♂️] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
|
|
5060
5066
|
}
|
|
5061
5067
|
return [4 /*yield*/, subvalueDefinition.mapValues(parameterValue, template.foreach.outputSubparameterName, formatSettings, function (subparameters, index) { return __awaiter(_this, void 0, void 0, function () {
|
|
@@ -5208,13 +5214,12 @@
|
|
|
5208
5214
|
*/
|
|
5209
5215
|
function executeTemplate(options) {
|
|
5210
5216
|
return __awaiter(this, void 0, void 0, function () {
|
|
5211
|
-
var currentTemplate, preparedPipeline, parametersToPass, tools, onProgress,
|
|
5212
|
-
var e_1,
|
|
5213
|
-
return __generator(this, function (
|
|
5214
|
-
switch (
|
|
5217
|
+
var currentTemplate, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, _a, maxExecutionAttempts, name, title, priority, usedParameterNames, dependentParameterNames, definedParameters, _b, _c, _d, definedParameterNames, parameters, _loop_1, _e, _f, parameterName, maxAttempts, jokerParameterNames, preparedContent, resultString;
|
|
5218
|
+
var e_1, _g, _h;
|
|
5219
|
+
return __generator(this, function (_j) {
|
|
5220
|
+
switch (_j.label) {
|
|
5215
5221
|
case 0:
|
|
5216
|
-
currentTemplate = options.currentTemplate, preparedPipeline = options.preparedPipeline, parametersToPass = options.parametersToPass, tools = options.tools, onProgress = options.onProgress,
|
|
5217
|
-
maxExecutionAttempts = settings.maxExecutionAttempts;
|
|
5222
|
+
currentTemplate = options.currentTemplate, preparedPipeline = options.preparedPipeline, parametersToPass = options.parametersToPass, tools = options.tools, onProgress = options.onProgress, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification, _a = options.maxExecutionAttempts, maxExecutionAttempts = _a === void 0 ? DEFAULT_MAX_EXECUTION_ATTEMPTS : _a;
|
|
5218
5223
|
name = "pipeline-executor-frame-".concat(currentTemplate.name);
|
|
5219
5224
|
title = currentTemplate.title;
|
|
5220
5225
|
priority = preparedPipeline.templates.length - preparedPipeline.templates.indexOf(currentTemplate);
|
|
@@ -5229,7 +5234,7 @@
|
|
|
5229
5234
|
// <- [🍸]
|
|
5230
5235
|
})];
|
|
5231
5236
|
case 1:
|
|
5232
|
-
|
|
5237
|
+
_j.sent();
|
|
5233
5238
|
usedParameterNames = extractParameterNamesFromTemplate(currentTemplate);
|
|
5234
5239
|
dependentParameterNames = new Set(currentTemplate.dependentParameterNames);
|
|
5235
5240
|
// TODO: [👩🏾🤝👩🏻] Use here `mapAvailableToExpectedParameters`
|
|
@@ -5240,15 +5245,15 @@
|
|
|
5240
5245
|
.map(function (name) { return "{".concat(name, "}"); })
|
|
5241
5246
|
.join(', '), "\n\n ").concat(block(pipelineIdentification), "\n\n "); }));
|
|
5242
5247
|
}
|
|
5243
|
-
|
|
5244
|
-
|
|
5248
|
+
_c = (_b = Object).freeze;
|
|
5249
|
+
_d = [{}];
|
|
5245
5250
|
return [4 /*yield*/, getReservedParametersForTemplate({
|
|
5246
5251
|
preparedPipeline: preparedPipeline,
|
|
5247
5252
|
template: currentTemplate,
|
|
5248
5253
|
pipelineIdentification: pipelineIdentification,
|
|
5249
5254
|
})];
|
|
5250
5255
|
case 2:
|
|
5251
|
-
definedParameters =
|
|
5256
|
+
definedParameters = _c.apply(_b, [__assign.apply(void 0, [__assign.apply(void 0, _d.concat([(_j.sent())])), parametersToPass])]);
|
|
5252
5257
|
definedParameterNames = new Set(Object.keys(definedParameters));
|
|
5253
5258
|
parameters = {};
|
|
5254
5259
|
_loop_1 = function (parameterName) {
|
|
@@ -5268,15 +5273,15 @@
|
|
|
5268
5273
|
try {
|
|
5269
5274
|
// Note: [2] Check that all used parameters are defined and removing unused parameters for this template
|
|
5270
5275
|
// TODO: [👩🏾🤝👩🏻] Use here `mapAvailableToExpectedParameters`
|
|
5271
|
-
for (
|
|
5272
|
-
parameterName =
|
|
5276
|
+
for (_e = __values(Array.from(union(definedParameterNames, usedParameterNames, dependentParameterNames))), _f = _e.next(); !_f.done; _f = _e.next()) {
|
|
5277
|
+
parameterName = _f.value;
|
|
5273
5278
|
_loop_1(parameterName);
|
|
5274
5279
|
}
|
|
5275
5280
|
}
|
|
5276
5281
|
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
5277
5282
|
finally {
|
|
5278
5283
|
try {
|
|
5279
|
-
if (
|
|
5284
|
+
if (_f && !_f.done && (_g = _e.return)) _g.call(_e);
|
|
5280
5285
|
}
|
|
5281
5286
|
finally { if (e_1) throw e_1.error; }
|
|
5282
5287
|
}
|
|
@@ -5296,12 +5301,11 @@
|
|
|
5296
5301
|
template: currentTemplate,
|
|
5297
5302
|
preparedPipeline: preparedPipeline,
|
|
5298
5303
|
tools: tools,
|
|
5299
|
-
settings: settings,
|
|
5300
5304
|
$executionReport: $executionReport,
|
|
5301
5305
|
pipelineIdentification: pipelineIdentification,
|
|
5302
5306
|
})];
|
|
5303
5307
|
case 3:
|
|
5304
|
-
resultString =
|
|
5308
|
+
resultString = _j.sent();
|
|
5305
5309
|
return [4 /*yield*/, onProgress({
|
|
5306
5310
|
name: name,
|
|
5307
5311
|
title: title,
|
|
@@ -5313,12 +5317,12 @@
|
|
|
5313
5317
|
// <- [🍸]
|
|
5314
5318
|
})];
|
|
5315
5319
|
case 4:
|
|
5316
|
-
|
|
5317
|
-
return [2 /*return*/, Object.freeze((
|
|
5318
|
-
|
|
5320
|
+
_j.sent();
|
|
5321
|
+
return [2 /*return*/, Object.freeze((_h = {},
|
|
5322
|
+
_h[currentTemplate.resultingParameterName] =
|
|
5319
5323
|
// <- Note: [👩👩👧] No need to detect parameter collision here because pipeline checks logic consistency during construction
|
|
5320
5324
|
resultString,
|
|
5321
|
-
|
|
5325
|
+
_h))];
|
|
5322
5326
|
}
|
|
5323
5327
|
});
|
|
5324
5328
|
});
|
|
@@ -5377,13 +5381,12 @@
|
|
|
5377
5381
|
*/
|
|
5378
5382
|
function executePipeline(options) {
|
|
5379
5383
|
return __awaiter(this, void 0, void 0, function () {
|
|
5380
|
-
var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification,
|
|
5384
|
+
var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification, maxParallelCount, rootDirname, _a, isVerbose, preparedPipeline, errors, warnings, executionReport, isReturned, _b, _c, parameter, e_1_1, _loop_1, _d, _e, parameterName, state_1, e_2_1, parametersToPass, resovedParameterNames_1, unresovedTemplates_1, resolving_1, loopLimit, _loop_2, error_1, usage_1, outputParameters_1, usage, outputParameters;
|
|
5381
5385
|
var e_1, _f, e_2, _g;
|
|
5382
5386
|
return __generator(this, function (_h) {
|
|
5383
5387
|
switch (_h.label) {
|
|
5384
5388
|
case 0:
|
|
5385
|
-
inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification,
|
|
5386
|
-
maxParallelCount = settings.maxParallelCount, rootDirname = settings.rootDirname, _a = settings.isVerbose, isVerbose = _a === void 0 ? IS_VERBOSE : _a;
|
|
5389
|
+
inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification, maxParallelCount = options.maxParallelCount, rootDirname = options.rootDirname, _a = options.isVerbose, isVerbose = _a === void 0 ? DEFAULT_IS_VERBOSE : _a;
|
|
5387
5390
|
preparedPipeline = options.preparedPipeline;
|
|
5388
5391
|
if (!(preparedPipeline === undefined)) return [3 /*break*/, 2];
|
|
5389
5392
|
return [4 /*yield*/, preparePipeline(pipeline, tools, {
|
|
@@ -5568,12 +5571,7 @@
|
|
|
5568
5571
|
return [3 /*break*/, 4];
|
|
5569
5572
|
case 3:
|
|
5570
5573
|
unresovedTemplates_1 = unresovedTemplates_1.filter(function (template) { return template !== currentTemplate; });
|
|
5571
|
-
work_1 = executeTemplate({
|
|
5572
|
-
currentTemplate: currentTemplate,
|
|
5573
|
-
preparedPipeline: preparedPipeline,
|
|
5574
|
-
parametersToPass: parametersToPass,
|
|
5575
|
-
tools: tools,
|
|
5576
|
-
onProgress: function (progress) {
|
|
5574
|
+
work_1 = executeTemplate(__assign(__assign({}, options), { currentTemplate: currentTemplate, preparedPipeline: preparedPipeline, parametersToPass: parametersToPass, tools: tools, onProgress: function (progress) {
|
|
5577
5575
|
if (isReturned) {
|
|
5578
5576
|
throw new UnexpectedError(spaceTrim.spaceTrim(function (block) { return "\n Can not call `onProgress` after pipeline execution is finished\n\n ".concat(block(pipelineIdentification), "\n\n ").concat(block(JSON.stringify(progress, null, 4)
|
|
5579
5577
|
.split('\n')
|
|
@@ -5583,11 +5581,7 @@
|
|
|
5583
5581
|
if (onProgress) {
|
|
5584
5582
|
onProgress(progress);
|
|
5585
5583
|
}
|
|
5586
|
-
},
|
|
5587
|
-
settings: settings,
|
|
5588
|
-
$executionReport: executionReport,
|
|
5589
|
-
pipelineIdentification: spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n Template name: ").concat(currentTemplate.name, "\n Template title: ").concat(currentTemplate.title, "\n "); }),
|
|
5590
|
-
})
|
|
5584
|
+
}, $executionReport: executionReport, pipelineIdentification: spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n Template name: ").concat(currentTemplate.name, "\n Template title: ").concat(currentTemplate.title, "\n "); }) }))
|
|
5591
5585
|
.then(function (newParametersToPass) {
|
|
5592
5586
|
parametersToPass = __assign(__assign({}, newParametersToPass), parametersToPass);
|
|
5593
5587
|
resovedParameterNames_1 = __spreadArray(__spreadArray([], __read(resovedParameterNames_1), false), [currentTemplate.resultingParameterName], false);
|
|
@@ -5691,8 +5685,7 @@
|
|
|
5691
5685
|
*/
|
|
5692
5686
|
function createPipelineExecutor(options) {
|
|
5693
5687
|
var _this = this;
|
|
5694
|
-
var pipeline = options.pipeline, tools = options.tools, _a = options.
|
|
5695
|
-
var _b = settings.maxExecutionAttempts, maxExecutionAttempts = _b === void 0 ? MAX_EXECUTION_ATTEMPTS : _b, _c = settings.maxParallelCount, maxParallelCount = _c === void 0 ? MAX_PARALLEL_COUNT : _c, _d = settings.csvSettings, csvSettings = _d === void 0 ? DEFAULT_CSV_SETTINGS : _d, _e = settings.isVerbose, isVerbose = _e === void 0 ? IS_VERBOSE : _e, _f = settings.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _f === void 0 ? false : _f, _g = settings.rootDirname, rootDirname = _g === void 0 ? null : _g;
|
|
5688
|
+
var pipeline = options.pipeline, tools = options.tools, _a = options.maxExecutionAttempts, maxExecutionAttempts = _a === void 0 ? DEFAULT_MAX_EXECUTION_ATTEMPTS : _a, _b = options.maxParallelCount, maxParallelCount = _b === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _b, _c = options.csvSettings, csvSettings = _c === void 0 ? DEFAULT_CSV_SETTINGS : _c, _d = options.isVerbose, isVerbose = _d === void 0 ? DEFAULT_IS_VERBOSE : _d, _e = options.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _e === void 0 ? false : _e, _f = options.rootDirname, rootDirname = _f === void 0 ? null : _f;
|
|
5696
5689
|
validatePipeline(pipeline);
|
|
5697
5690
|
var pipelineIdentification = (function () {
|
|
5698
5691
|
// Note: This is a 😐 implementation of [🚞]
|
|
@@ -5726,14 +5719,12 @@
|
|
|
5726
5719
|
tools: tools,
|
|
5727
5720
|
onProgress: onProgress,
|
|
5728
5721
|
pipelineIdentification: spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n ").concat(runCount === 1 ? '' : "Run #".concat(runCount), "\n "); }),
|
|
5729
|
-
|
|
5730
|
-
|
|
5731
|
-
|
|
5732
|
-
|
|
5733
|
-
|
|
5734
|
-
|
|
5735
|
-
rootDirname: rootDirname,
|
|
5736
|
-
},
|
|
5722
|
+
maxExecutionAttempts: maxExecutionAttempts,
|
|
5723
|
+
maxParallelCount: maxParallelCount,
|
|
5724
|
+
csvSettings: csvSettings,
|
|
5725
|
+
isVerbose: isVerbose,
|
|
5726
|
+
isNotPreparedWarningSupressed: isNotPreparedWarningSupressed,
|
|
5727
|
+
rootDirname: rootDirname,
|
|
5737
5728
|
})];
|
|
5738
5729
|
});
|
|
5739
5730
|
}); };
|
|
@@ -5755,7 +5746,7 @@
|
|
|
5755
5746
|
mimeTypes: ['text/markdown', 'text/plain'],
|
|
5756
5747
|
documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
|
|
5757
5748
|
isAvilableInBrowser: true,
|
|
5758
|
-
requiredExecutables: [
|
|
5749
|
+
requiredExecutables: [],
|
|
5759
5750
|
}); /* <- TODO: [🤛] */
|
|
5760
5751
|
/**
|
|
5761
5752
|
* Registration of known scraper metadata
|
|
@@ -5799,7 +5790,7 @@
|
|
|
5799
5790
|
return __generator(this, function (_k) {
|
|
5800
5791
|
switch (_k.label) {
|
|
5801
5792
|
case 0:
|
|
5802
|
-
_a = this.options, _b = _a.maxParallelCount, maxParallelCount = _b === void 0 ?
|
|
5793
|
+
_a = this.options, _b = _a.maxParallelCount, maxParallelCount = _b === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _b, _c = _a.isVerbose, isVerbose = _c === void 0 ? DEFAULT_IS_VERBOSE : _c;
|
|
5803
5794
|
llm = this.tools.llm;
|
|
5804
5795
|
if (llm === undefined) {
|
|
5805
5796
|
throw new MissingToolsError('LLM tools are required for scraping external files');
|
|
@@ -5898,7 +5889,8 @@
|
|
|
5898
5889
|
embeddingResult = _c.sent();
|
|
5899
5890
|
index.push({
|
|
5900
5891
|
modelName: embeddingResult.modelName,
|
|
5901
|
-
position: embeddingResult.content,
|
|
5892
|
+
position: __spreadArray([], __read(embeddingResult.content), false),
|
|
5893
|
+
// <- TODO: [🪓] Here should be no need for spreading new array, just `position: embeddingResult.content`
|
|
5902
5894
|
});
|
|
5903
5895
|
_c.label = 6;
|
|
5904
5896
|
case 6: return [3 /*break*/, 8];
|
|
@@ -5949,7 +5941,7 @@
|
|
|
5949
5941
|
mimeTypes: ['application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
|
|
5950
5942
|
documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
|
|
5951
5943
|
isAvilableInBrowser: false,
|
|
5952
|
-
requiredExecutables: ['
|
|
5944
|
+
requiredExecutables: ['Pandoc'],
|
|
5953
5945
|
}); /* <- TODO: [🤛] */
|
|
5954
5946
|
/**
|
|
5955
5947
|
* Registration of known scraper metadata
|
|
@@ -5989,12 +5981,13 @@
|
|
|
5989
5981
|
* Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
|
|
5990
5982
|
*/
|
|
5991
5983
|
DocumentScraper.prototype.$convert = function (source) {
|
|
5984
|
+
var _a;
|
|
5992
5985
|
return __awaiter(this, void 0, void 0, function () {
|
|
5993
|
-
var
|
|
5986
|
+
var _b, _c, rootDirname, _d, cacheDirname, _e, intermediateFilesStrategy, _f, isVerbose, extension, cacheFilehandler, command_1;
|
|
5994
5987
|
return __generator(this, function (_g) {
|
|
5995
5988
|
switch (_g.label) {
|
|
5996
5989
|
case 0:
|
|
5997
|
-
|
|
5990
|
+
_b = this.options, _c = _b.rootDirname, rootDirname = _c === void 0 ? process.cwd() : _c, _d = _b.cacheDirname, cacheDirname = _d === void 0 ? DEFAULT_SCRAPE_CACHE_DIRNAME : _d, _e = _b.intermediateFilesStrategy, intermediateFilesStrategy = _e === void 0 ? DEFAULT_INTERMEDIATE_FILES_STRATEGY : _e, _f = _b.isVerbose, isVerbose = _f === void 0 ? DEFAULT_IS_VERBOSE : _f;
|
|
5998
5991
|
if (!$isRunningInNode()) {
|
|
5999
5992
|
throw new KnowledgeScrapeError('Scraping .docx files is only supported in Node environment');
|
|
6000
5993
|
}
|
|
@@ -6002,7 +5995,7 @@
|
|
|
6002
5995
|
throw new EnvironmentMismatchError('Can not scrape documents without filesystem tools');
|
|
6003
5996
|
// <- TODO: [🧠] What is the best error type here`
|
|
6004
5997
|
}
|
|
6005
|
-
if (
|
|
5998
|
+
if (((_a = this.tools.executables) === null || _a === void 0 ? void 0 : _a.pandocPath) === undefined) {
|
|
6006
5999
|
throw new MissingToolsError('Pandoc is required for scraping .docx files');
|
|
6007
6000
|
}
|
|
6008
6001
|
if (source.filename === null) {
|
|
@@ -6013,7 +6006,7 @@
|
|
|
6013
6006
|
return [4 /*yield*/, getScraperIntermediateSource(source, {
|
|
6014
6007
|
rootDirname: rootDirname,
|
|
6015
6008
|
cacheDirname: cacheDirname,
|
|
6016
|
-
|
|
6009
|
+
intermediateFilesStrategy: intermediateFilesStrategy,
|
|
6017
6010
|
extension: 'md',
|
|
6018
6011
|
isVerbose: isVerbose,
|
|
6019
6012
|
})];
|
|
@@ -6022,11 +6015,9 @@
|
|
|
6022
6015
|
return [4 /*yield*/, isFileExisting(cacheFilehandler.filename, this.tools.fs)];
|
|
6023
6016
|
case 2:
|
|
6024
6017
|
if (!!(_g.sent())) return [3 /*break*/, 5];
|
|
6025
|
-
command_1 = "\"".concat(
|
|
6026
|
-
// TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook
|
|
6018
|
+
command_1 = "\"".concat(this.tools.executables.pandocPath, "\" -f ").concat(extension, " -t markdown \"").concat(source.filename, "\" -o \"").concat(cacheFilehandler.filename, "\"");
|
|
6027
6019
|
return [4 /*yield*/, $execCommand(command_1)];
|
|
6028
6020
|
case 3:
|
|
6029
|
-
// TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook
|
|
6030
6021
|
_g.sent();
|
|
6031
6022
|
return [4 /*yield*/, isFileExisting(cacheFilehandler.filename, this.tools.fs)];
|
|
6032
6023
|
case 4:
|
|
@@ -6105,7 +6096,11 @@
|
|
|
6105
6096
|
mimeTypes: ['application/msword', 'text/rtf'],
|
|
6106
6097
|
documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
|
|
6107
6098
|
isAvilableInBrowser: false,
|
|
6108
|
-
requiredExecutables: [
|
|
6099
|
+
requiredExecutables: [
|
|
6100
|
+
'Pandoc',
|
|
6101
|
+
'LibreOffice',
|
|
6102
|
+
// <- TODO: [🧠] Should be 'LibreOffice' here, its dependency of dependency
|
|
6103
|
+
],
|
|
6109
6104
|
}); /* <- TODO: [🤛] */
|
|
6110
6105
|
/**
|
|
6111
6106
|
* Registration of known scraper metadata
|
|
@@ -6118,7 +6113,7 @@
|
|
|
6118
6113
|
$scrapersMetadataRegister.register(legacyDocumentScraperMetadata);
|
|
6119
6114
|
|
|
6120
6115
|
/**
|
|
6121
|
-
* Scraper for .
|
|
6116
|
+
* Scraper for old document files (like .doc and .rtf)
|
|
6122
6117
|
*
|
|
6123
6118
|
* @see `documentationUrl` for more details
|
|
6124
6119
|
* @public exported from `@promptbook/legacy-documents`
|
|
@@ -6145,12 +6140,13 @@
|
|
|
6145
6140
|
* Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
|
|
6146
6141
|
*/
|
|
6147
6142
|
LegacyDocumentScraper.prototype.$convert = function (source) {
|
|
6143
|
+
var _a;
|
|
6148
6144
|
return __awaiter(this, void 0, void 0, function () {
|
|
6149
|
-
var
|
|
6145
|
+
var _b, _c, rootDirname, _d, cacheDirname, _e, intermediateFilesStrategy, _f, isVerbose, extension, cacheFilehandler, documentSourceOutdirPathForLibreOffice_1, command_1, files_1, file;
|
|
6150
6146
|
return __generator(this, function (_g) {
|
|
6151
6147
|
switch (_g.label) {
|
|
6152
6148
|
case 0:
|
|
6153
|
-
|
|
6149
|
+
_b = this.options, _c = _b.rootDirname, rootDirname = _c === void 0 ? process.cwd() : _c, _d = _b.cacheDirname, cacheDirname = _d === void 0 ? DEFAULT_SCRAPE_CACHE_DIRNAME : _d, _e = _b.intermediateFilesStrategy, intermediateFilesStrategy = _e === void 0 ? DEFAULT_INTERMEDIATE_FILES_STRATEGY : _e, _f = _b.isVerbose, isVerbose = _f === void 0 ? DEFAULT_IS_VERBOSE : _f;
|
|
6154
6150
|
if (!$isRunningInNode()) {
|
|
6155
6151
|
throw new KnowledgeScrapeError('Scraping .doc files is only supported in Node environment');
|
|
6156
6152
|
}
|
|
@@ -6158,7 +6154,7 @@
|
|
|
6158
6154
|
throw new EnvironmentMismatchError('Can not scrape (legacy) documents without filesystem tools');
|
|
6159
6155
|
// <- TODO: [🧠] What is the best error type here`
|
|
6160
6156
|
}
|
|
6161
|
-
if (
|
|
6157
|
+
if (((_a = this.tools.executables) === null || _a === void 0 ? void 0 : _a.libreOfficePath) === undefined) {
|
|
6162
6158
|
throw new MissingToolsError('LibreOffice is required for scraping .doc and .rtf files');
|
|
6163
6159
|
}
|
|
6164
6160
|
if (source.filename === null) {
|
|
@@ -6169,7 +6165,7 @@
|
|
|
6169
6165
|
return [4 /*yield*/, getScraperIntermediateSource(source, {
|
|
6170
6166
|
rootDirname: rootDirname,
|
|
6171
6167
|
cacheDirname: cacheDirname,
|
|
6172
|
-
|
|
6168
|
+
intermediateFilesStrategy: intermediateFilesStrategy,
|
|
6173
6169
|
extension: 'docx',
|
|
6174
6170
|
isVerbose: isVerbose,
|
|
6175
6171
|
})];
|
|
@@ -6184,11 +6180,9 @@
|
|
|
6184
6180
|
documentSourceOutdirPathForLibreOffice_1 = path.join(path.dirname(cacheFilehandler.filename), 'libreoffice')
|
|
6185
6181
|
.split('\\')
|
|
6186
6182
|
.join('/');
|
|
6187
|
-
command_1 = "\"".concat(
|
|
6188
|
-
// TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook - this should trigger build polution error
|
|
6183
|
+
command_1 = "\"".concat(this.tools.executables.libreOfficePath, "\" --headless --convert-to docx \"").concat(source.filename, "\" --outdir \"").concat(documentSourceOutdirPathForLibreOffice_1, "\"");
|
|
6189
6184
|
return [4 /*yield*/, $execCommand(command_1)];
|
|
6190
6185
|
case 3:
|
|
6191
|
-
// TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook - this should trigger build polution error
|
|
6192
6186
|
_g.sent();
|
|
6193
6187
|
return [4 /*yield*/, promises.readdir(documentSourceOutdirPathForLibreOffice_1)];
|
|
6194
6188
|
case 4:
|