@promptbook/legacy-documents 0.72.0-14 → 0.72.0-23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/esm/index.es.js +136 -136
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/cli.index.d.ts +0 -4
- package/esm/typings/src/_packages/core.index.d.ts +22 -20
- package/esm/typings/src/_packages/node.index.d.ts +2 -0
- package/esm/typings/src/_packages/types.index.d.ts +12 -10
- package/esm/typings/src/_packages/utils.index.d.ts +2 -2
- package/esm/typings/src/collection/PipelineCollection.d.ts +1 -1
- package/esm/typings/src/collection/SimplePipelineCollection.d.ts +2 -2
- package/esm/typings/src/collection/collectionToJson.d.ts +1 -1
- package/esm/typings/src/collection/constructors/createCollectionFromJson.d.ts +1 -1
- package/esm/typings/src/collection/constructors/createCollectionFromPromise.d.ts +1 -1
- package/esm/typings/src/commands/_common/types/CommandParser.d.ts +5 -5
- package/esm/typings/src/config.d.ts +21 -14
- package/esm/typings/src/executables/$provideExecutablesForNode.d.ts +12 -0
- package/esm/typings/src/executables/apps/locateLibreoffice.d.ts +11 -0
- package/esm/typings/src/executables/apps/locatePandoc.d.ts +11 -0
- package/esm/typings/src/executables/locateApp.d.ts +33 -0
- package/esm/typings/src/executables/locateApp.test.d.ts +1 -0
- package/esm/typings/src/executables/platforms/locateAppOnLinux.d.ts +12 -0
- package/esm/typings/src/executables/platforms/locateAppOnMacOs.d.ts +12 -0
- package/esm/typings/src/executables/platforms/locateAppOnWindows.d.ts +12 -0
- package/esm/typings/src/execution/EmbeddingVector.d.ts +1 -1
- package/esm/typings/src/execution/Executables.d.ts +18 -0
- package/esm/typings/src/execution/ExecutionTools.d.ts +9 -3
- package/esm/typings/src/execution/LlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/execution/PipelineExecutorResult.d.ts +2 -2
- package/esm/typings/src/execution/assertsExecutionSuccessful.d.ts +1 -0
- package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorOptions.d.ts +29 -6
- package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +2 -11
- package/esm/typings/src/execution/createPipelineExecutor/20-executeTemplate.d.ts +4 -13
- package/esm/typings/src/execution/createPipelineExecutor/40-executeAttempts.d.ts +9 -14
- package/esm/typings/src/execution/translation/automatic-translate/automatic-translators/LindatAutomaticTranslator.d.ts +11 -3
- package/esm/typings/src/execution/utils/addUsage.d.ts +1 -1
- package/esm/typings/src/execution/utils/forEachAsync.d.ts +1 -1
- package/esm/typings/src/formats/_common/FormatDefinition.d.ts +2 -2
- package/esm/typings/src/formats/_common/FormatSubvalueDefinition.d.ts +2 -2
- package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsForCli.d.ts +2 -2
- package/esm/typings/src/llm-providers/_common/register/LlmToolsConfiguration.d.ts +1 -1
- package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +4 -1
- package/esm/typings/src/llm-providers/anthropic-claude/AnthropicClaudeExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/anthropic-claude/anthropic-claude-models.d.ts +1 -1
- package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/mocked/$fakeTextToExpectations.d.ts +1 -1
- package/esm/typings/src/llm-providers/mocked/MockedEchoLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/mocked/MockedFackedLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/multiple/MultipleLlmExecutionTools.d.ts +4 -5
- package/esm/typings/src/llm-providers/multiple/joinLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/openai/OpenAiAssistantExecutionTools.d.ts +0 -1
- package/esm/typings/src/llm-providers/openai/OpenAiExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/openai/openai-models.d.ts +1 -1
- package/esm/typings/src/llm-providers/remote/RemoteLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/remote/interfaces/PromptbookServer_ListModels_Response.d.ts +3 -3
- package/esm/typings/src/llm-providers/remote/interfaces/PromptbookServer_Prompt_Response.d.ts +2 -2
- package/esm/typings/src/prepare/PrepareAndScrapeOptions.d.ts +4 -23
- package/esm/typings/src/prepare/prepareTemplates.d.ts +1 -1
- package/esm/typings/src/scrapers/_common/Scraper.d.ts +2 -9
- package/esm/typings/src/scrapers/_common/prepareKnowledgePieces.d.ts +1 -1
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +2 -2
- package/esm/typings/src/scrapers/document/DocumentScraper.d.ts +2 -2
- package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/document/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/document-legacy/LegacyDocumentScraper.d.ts +3 -3
- package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/document-legacy/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/markdown/MarkdownScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/markdown/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/pdf/PdfScraper.d.ts +2 -2
- package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/pdf/register-metadata.d.ts +1 -1
- package/esm/typings/src/scripting/javascript/utils/preserve.d.ts +1 -1
- package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +1 -1
- package/esm/typings/src/types/Arrayable.d.ts +1 -1
- package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +7 -0
- package/esm/typings/src/types/PipelineJson/KnowledgePieceJson.d.ts +4 -4
- package/esm/typings/src/types/PipelineJson/KnowledgeSourceJson.d.ts +1 -1
- package/esm/typings/src/types/PipelineJson/PersonaJson.d.ts +1 -1
- package/esm/typings/src/types/PipelineJson/TemplateJsonCommon.d.ts +2 -2
- package/esm/typings/src/types/Prompt.d.ts +1 -1
- package/esm/typings/src/types/execution-report/ExecutionReportJson.d.ts +1 -1
- package/esm/typings/src/utils/$Register.d.ts +1 -1
- package/esm/typings/src/utils/FromtoItems.d.ts +1 -1
- package/esm/typings/src/utils/arrayableToArray.d.ts +1 -1
- package/esm/typings/src/utils/emojis.d.ts +1 -1
- package/esm/typings/src/utils/execCommand/$execCommand.d.ts +2 -2
- package/esm/typings/src/utils/execCommand/{IExecCommandOptions.d.ts → ExecCommandOptions.d.ts} +2 -6
- package/esm/typings/src/utils/execCommand/execCommandNormalizeOptions.d.ts +3 -3
- package/esm/typings/src/utils/expectation-counters/countSentences.d.ts +1 -1
- package/esm/typings/src/utils/files/isExecutable.d.ts +11 -0
- package/esm/typings/src/utils/markdown/extractAllBlocksFromMarkdown.d.ts +1 -1
- package/esm/typings/src/utils/markdown/splitMarkdownIntoSections.d.ts +1 -1
- package/esm/typings/src/utils/normalization/IKeywords.d.ts +2 -2
- package/esm/typings/src/utils/normalization/parseKeywords.d.ts +2 -2
- package/esm/typings/src/utils/normalization/parseKeywordsFromString.d.ts +2 -2
- package/esm/typings/src/utils/normalization/searchKeywords.d.ts +2 -2
- package/esm/typings/src/utils/organization/TODO_USE.d.ts +1 -1
- package/esm/typings/src/utils/organization/keepUnused.d.ts +1 -1
- package/esm/typings/src/utils/random/$randomSeed.d.ts +1 -1
- package/esm/typings/src/utils/sets/intersection.d.ts +1 -1
- package/esm/typings/src/utils/sets/union.d.ts +1 -1
- package/esm/typings/src/utils/unwrapResult.d.ts +4 -4
- package/package.json +4 -3
- package/umd/index.umd.js +136 -136
- package/umd/index.umd.js.map +1 -1
- package/esm/typings/src/_packages/website-crawler.index.d.ts +0 -8
- package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorSettings.d.ts +0 -29
- package/esm/typings/src/scrapers/website/WebsiteScraper.d.ts +0 -47
- package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +0 -20
- package/esm/typings/src/scrapers/website/playground/website-scraper-playground.d.ts +0 -5
- package/esm/typings/src/scrapers/website/register-constructor.d.ts +0 -13
- package/esm/typings/src/scrapers/website/register-metadata.d.ts +0 -24
- package/esm/typings/src/scrapers/website/utils/markdownConverter.d.ts +0 -12
- /package/esm/typings/src/{scrapers/website/WebsiteScraper.test.d.ts → executables/apps/locateLibreoffice.test.d.ts} +0 -0
- /package/esm/typings/src/{scrapers/website/utils/markdownConverter.test.d.ts → executables/apps/locatePandoc.test.d.ts} +0 -0
package/umd/index.umd.js
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
/**
|
|
16
16
|
* The version of the Promptbook library
|
|
17
17
|
*/
|
|
18
|
-
var PROMPTBOOK_VERSION = '0.72.0-
|
|
18
|
+
var PROMPTBOOK_VERSION = '0.72.0-22';
|
|
19
19
|
// TODO: [main] !!!! List here all the versions and annotate + put into script
|
|
20
20
|
|
|
21
21
|
/*! *****************************************************************************
|
|
@@ -363,18 +363,25 @@
|
|
|
363
363
|
* @private within the repository - too low-level in comparison with other `MAX_...`
|
|
364
364
|
*/
|
|
365
365
|
var IMMEDIATE_TIME = 10;
|
|
366
|
+
/**
|
|
367
|
+
* Strategy for caching the intermediate results for knowledge sources
|
|
368
|
+
*
|
|
369
|
+
* @public exported from `@promptbook/core`
|
|
370
|
+
*/
|
|
371
|
+
var DEFAULT_INTERMEDIATE_FILES_STRATEGY = 'HIDE_AND_KEEP';
|
|
372
|
+
// <- TODO: [😡] Change to 'VISIBLE'
|
|
366
373
|
/**
|
|
367
374
|
* The maximum number of (LLM) tasks running in parallel
|
|
368
375
|
*
|
|
369
376
|
* @public exported from `@promptbook/core`
|
|
370
377
|
*/
|
|
371
|
-
var
|
|
378
|
+
var DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [🤹♂️]
|
|
372
379
|
/**
|
|
373
380
|
* The maximum number of attempts to execute LLM task before giving up
|
|
374
381
|
*
|
|
375
382
|
* @public exported from `@promptbook/core`
|
|
376
383
|
*/
|
|
377
|
-
var
|
|
384
|
+
var DEFAULT_MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹♂️]
|
|
378
385
|
/**
|
|
379
386
|
* Where to store the scrape cache
|
|
380
387
|
*
|
|
@@ -382,7 +389,7 @@
|
|
|
382
389
|
*
|
|
383
390
|
* @public exported from `@promptbook/core`
|
|
384
391
|
*/
|
|
385
|
-
var
|
|
392
|
+
var DEFAULT_SCRAPE_CACHE_DIRNAME = '/.promptbook/scrape-cache';
|
|
386
393
|
/**
|
|
387
394
|
* Nonce which is used for replacing things in strings
|
|
388
395
|
*
|
|
@@ -434,7 +441,7 @@
|
|
|
434
441
|
*
|
|
435
442
|
* @public exported from `@promptbook/core`
|
|
436
443
|
*/
|
|
437
|
-
var
|
|
444
|
+
var DEFAULT_IS_VERBOSE = false;
|
|
438
445
|
/**
|
|
439
446
|
* @@@
|
|
440
447
|
*
|
|
@@ -1150,12 +1157,12 @@
|
|
|
1150
1157
|
*/
|
|
1151
1158
|
function getScraperIntermediateSource(source, options) {
|
|
1152
1159
|
return __awaiter(this, void 0, void 0, function () {
|
|
1153
|
-
var sourceFilename, url, rootDirname, cacheDirname,
|
|
1160
|
+
var sourceFilename, url, rootDirname, cacheDirname, intermediateFilesStrategy, extension, isVerbose, hash, semanticName, pieces, name, cacheFilename, isDestroyed, fileHandler;
|
|
1154
1161
|
return __generator(this, function (_a) {
|
|
1155
1162
|
switch (_a.label) {
|
|
1156
1163
|
case 0:
|
|
1157
1164
|
sourceFilename = source.filename, url = source.url;
|
|
1158
|
-
rootDirname = options.rootDirname, cacheDirname = options.cacheDirname,
|
|
1165
|
+
rootDirname = options.rootDirname, cacheDirname = options.cacheDirname, intermediateFilesStrategy = options.intermediateFilesStrategy, extension = options.extension, isVerbose = options.isVerbose;
|
|
1159
1166
|
hash = cryptoJs.SHA256(
|
|
1160
1167
|
// <- TODO: [🥬] Encapsulate sha256 to some private utility function
|
|
1161
1168
|
hexEncoder__default["default"].parse(sourceFilename || url || 'untitled'))
|
|
@@ -1165,7 +1172,7 @@
|
|
|
1165
1172
|
pieces = ['intermediate', semanticName, hash].filter(function (piece) { return piece !== ''; });
|
|
1166
1173
|
name = pieces.join('-').split('--').join('-');
|
|
1167
1174
|
// <- TODO: Use MAX_FILENAME_LENGTH
|
|
1168
|
-
TODO_USE(rootDirname); // <- TODO:
|
|
1175
|
+
TODO_USE(rootDirname); // <- TODO: [😡]
|
|
1169
1176
|
cacheFilename = path.join.apply(void 0, __spreadArray(__spreadArray([process.cwd(),
|
|
1170
1177
|
cacheDirname], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), [name], false)).split('\\')
|
|
1171
1178
|
.join('/') +
|
|
@@ -1185,7 +1192,7 @@
|
|
|
1185
1192
|
return __generator(this, function (_a) {
|
|
1186
1193
|
switch (_a.label) {
|
|
1187
1194
|
case 0:
|
|
1188
|
-
if (!
|
|
1195
|
+
if (!(intermediateFilesStrategy === 'HIDE_AND_CLEAN')) return [3 /*break*/, 2];
|
|
1189
1196
|
if (isVerbose) {
|
|
1190
1197
|
console.info('legacyDocumentScraper: Clening cache');
|
|
1191
1198
|
}
|
|
@@ -1209,7 +1216,7 @@
|
|
|
1209
1216
|
/**
|
|
1210
1217
|
* Note: Not using `FileCacheStorage` for two reasons:
|
|
1211
1218
|
* 1) Need to store more than serialized JSONs
|
|
1212
|
-
* 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO:
|
|
1219
|
+
* 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO: [😡]
|
|
1213
1220
|
* TODO: [🐱🐉][🧠] Make some smart crop
|
|
1214
1221
|
* Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
|
|
1215
1222
|
*/
|
|
@@ -1274,7 +1281,6 @@
|
|
|
1274
1281
|
pipelineString += '\n\n';
|
|
1275
1282
|
pipelineString += description;
|
|
1276
1283
|
}
|
|
1277
|
-
// TODO:> const commands: Array<Command>
|
|
1278
1284
|
var commands = [];
|
|
1279
1285
|
if (pipelineUrl) {
|
|
1280
1286
|
commands.push("PIPELINE URL ".concat(pipelineUrl));
|
|
@@ -1330,7 +1336,6 @@
|
|
|
1330
1336
|
pipelineString += '\n\n';
|
|
1331
1337
|
pipelineString += description_1;
|
|
1332
1338
|
}
|
|
1333
|
-
// TODO:> const commands: Array<Command>
|
|
1334
1339
|
var commands_1 = [];
|
|
1335
1340
|
var contentLanguage = 'text';
|
|
1336
1341
|
if (templateType === 'PROMPT_TEMPLATE') {
|
|
@@ -2215,6 +2220,7 @@
|
|
|
2215
2220
|
}
|
|
2216
2221
|
}
|
|
2217
2222
|
/**
|
|
2223
|
+
* TODO: [🐚] This function should be removed OR changed OR be completely rewritten
|
|
2218
2224
|
* TODO: [🧠] Can this return type be better typed than void
|
|
2219
2225
|
*/
|
|
2220
2226
|
|
|
@@ -2400,8 +2406,7 @@
|
|
|
2400
2406
|
* Multiple LLM Execution Tools is a proxy server that uses multiple execution tools internally and exposes the executor interface externally.
|
|
2401
2407
|
*
|
|
2402
2408
|
* Note: Internal utility of `joinLlmExecutionTools` but exposed type
|
|
2403
|
-
* @public exported from `@promptbook/
|
|
2404
|
-
* TODO: !!!!!! Export as runtime class not just type
|
|
2409
|
+
* @public exported from `@promptbook/core`
|
|
2405
2410
|
*/
|
|
2406
2411
|
var MultipleLlmExecutionTools = /** @class */ (function () {
|
|
2407
2412
|
/**
|
|
@@ -2903,7 +2908,7 @@
|
|
|
2903
2908
|
return __generator(this, function (_d) {
|
|
2904
2909
|
switch (_d.label) {
|
|
2905
2910
|
case 0:
|
|
2906
|
-
_a = options.isVerbose, isVerbose = _a === void 0 ?
|
|
2911
|
+
_a = options.isVerbose, isVerbose = _a === void 0 ? DEFAULT_IS_VERBOSE : _a;
|
|
2907
2912
|
if (tools === undefined || tools.llm === undefined) {
|
|
2908
2913
|
throw new MissingToolsError('LLM tools are required for preparing persona');
|
|
2909
2914
|
}
|
|
@@ -3069,7 +3074,7 @@
|
|
|
3069
3074
|
this.storage = globalScope[storageName];
|
|
3070
3075
|
}
|
|
3071
3076
|
$Register.prototype.list = function () {
|
|
3072
|
-
// <- TODO: ReadonlyDeep<
|
|
3077
|
+
// <- TODO: ReadonlyDeep<ReadonlyArray<TRegistered>>
|
|
3073
3078
|
return this.storage;
|
|
3074
3079
|
};
|
|
3075
3080
|
$Register.prototype.register = function (registered) {
|
|
@@ -3121,7 +3126,6 @@
|
|
|
3121
3126
|
* TODO: [®] DRY Register logic
|
|
3122
3127
|
*/
|
|
3123
3128
|
|
|
3124
|
-
// TODO: !!!!!! Maybe delete this function
|
|
3125
3129
|
/**
|
|
3126
3130
|
* Creates a message with all registered scrapers
|
|
3127
3131
|
*
|
|
@@ -3229,7 +3233,6 @@
|
|
|
3229
3233
|
* @private within the repository
|
|
3230
3234
|
*/
|
|
3231
3235
|
function sourceContentToName(sourceContent) {
|
|
3232
|
-
// TODO: !!!!!! Better name for source than gibberish hash
|
|
3233
3236
|
var hash = cryptoJs.SHA256(hexEncoder__default["default"].parse(JSON.stringify(sourceContent)))
|
|
3234
3237
|
// <- TODO: [🥬] Encapsulate sha256 to some private utility function
|
|
3235
3238
|
.toString( /* hex */)
|
|
@@ -3264,13 +3267,13 @@
|
|
|
3264
3267
|
return __awaiter(this, void 0, void 0, function () {
|
|
3265
3268
|
var sourceContent, name, _b, _c, rootDirname, _d,
|
|
3266
3269
|
// <- TODO: process.cwd() if running in Node.js
|
|
3267
|
-
isVerbose, url, response_1, mimeType, filename_1, fileExtension,
|
|
3270
|
+
isVerbose, url, response_1, mimeType, filename_1, fileExtension, mimeType;
|
|
3268
3271
|
return __generator(this, function (_e) {
|
|
3269
3272
|
switch (_e.label) {
|
|
3270
3273
|
case 0:
|
|
3271
3274
|
sourceContent = knowledgeSource.sourceContent;
|
|
3272
3275
|
name = knowledgeSource.name;
|
|
3273
|
-
_b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ?
|
|
3276
|
+
_b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ? DEFAULT_IS_VERBOSE : _d;
|
|
3274
3277
|
TODO_USE(isVerbose);
|
|
3275
3278
|
if (!name) {
|
|
3276
3279
|
name = sourceContentToName(sourceContent);
|
|
@@ -3286,19 +3289,14 @@
|
|
|
3286
3289
|
filename: null,
|
|
3287
3290
|
url: url,
|
|
3288
3291
|
mimeType: mimeType,
|
|
3289
|
-
|
|
3290
|
-
|
|
3291
|
-
|
|
3292
|
-
|
|
3293
|
-
|
|
3294
|
-
|
|
3295
|
-
|
|
3296
|
-
|
|
3297
|
-
return [2 /*return*/, content];
|
|
3298
|
-
}
|
|
3299
|
-
});
|
|
3300
|
-
});
|
|
3301
|
-
},
|
|
3292
|
+
/*
|
|
3293
|
+
TODO: [🥽]
|
|
3294
|
+
> async asBlob() {
|
|
3295
|
+
> // TODO: [👨🏻🤝👨🏻] This can be called multiple times BUT when called second time, response in already consumed
|
|
3296
|
+
> const content = await response.blob();
|
|
3297
|
+
> return content;
|
|
3298
|
+
> },
|
|
3299
|
+
*/
|
|
3302
3300
|
asJson: function () {
|
|
3303
3301
|
return __awaiter(this, void 0, void 0, function () {
|
|
3304
3302
|
var content;
|
|
@@ -3338,34 +3336,31 @@
|
|
|
3338
3336
|
}
|
|
3339
3337
|
filename_1 = path.join(rootDirname, sourceContent).split('\\').join('/');
|
|
3340
3338
|
fileExtension = getFileExtension(filename_1);
|
|
3341
|
-
|
|
3339
|
+
mimeType = extensionToMimeType(fileExtension || '');
|
|
3342
3340
|
return [4 /*yield*/, isFileExisting(filename_1, tools.fs)];
|
|
3343
3341
|
case 3:
|
|
3344
3342
|
if (!(_e.sent())) {
|
|
3345
3343
|
throw new NotFoundError(spaceTrim__default["default"](function (block) { return "\n Can not make source handler for file which does not exist:\n\n File:\n ".concat(block(filename_1), "\n "); }));
|
|
3346
3344
|
}
|
|
3347
|
-
// TODO:
|
|
3345
|
+
// TODO: [🧠][😿] Test security file - file is scoped to the project (BUT maybe do this in `filesystemTools`)
|
|
3348
3346
|
return [2 /*return*/, {
|
|
3349
3347
|
source: name,
|
|
3350
3348
|
filename: filename_1,
|
|
3351
3349
|
url: null,
|
|
3352
|
-
mimeType:
|
|
3353
|
-
|
|
3354
|
-
|
|
3355
|
-
|
|
3356
|
-
|
|
3357
|
-
|
|
3358
|
-
|
|
3359
|
-
|
|
3360
|
-
|
|
3361
|
-
|
|
3362
|
-
|
|
3363
|
-
|
|
3364
|
-
|
|
3365
|
-
|
|
3366
|
-
});
|
|
3367
|
-
});
|
|
3368
|
-
},
|
|
3350
|
+
mimeType: mimeType,
|
|
3351
|
+
/*
|
|
3352
|
+
TODO: [🥽]
|
|
3353
|
+
> async asBlob() {
|
|
3354
|
+
> const content = await tools.fs!.readFile(filename);
|
|
3355
|
+
> return new Blob(
|
|
3356
|
+
> [
|
|
3357
|
+
> content,
|
|
3358
|
+
> // <- TODO: [🥽] This is NOT tested, test it
|
|
3359
|
+
> ],
|
|
3360
|
+
> { type: mimeType },
|
|
3361
|
+
> );
|
|
3362
|
+
> },
|
|
3363
|
+
*/
|
|
3369
3364
|
asJson: function () {
|
|
3370
3365
|
return __awaiter(this, void 0, void 0, function () {
|
|
3371
3366
|
var _a, _b;
|
|
@@ -3401,9 +3396,14 @@
|
|
|
3401
3396
|
asJson: function () {
|
|
3402
3397
|
throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asJson`');
|
|
3403
3398
|
},
|
|
3404
|
-
|
|
3405
|
-
|
|
3406
|
-
|
|
3399
|
+
/*
|
|
3400
|
+
TODO: [🥽]
|
|
3401
|
+
> asBlob() {
|
|
3402
|
+
> throw new UnexpectedError(
|
|
3403
|
+
> 'Did not expect that `markdownScraper` would need to get the content `asBlob`',
|
|
3404
|
+
> );
|
|
3405
|
+
> },
|
|
3406
|
+
*/
|
|
3407
3407
|
}];
|
|
3408
3408
|
}
|
|
3409
3409
|
});
|
|
@@ -3423,7 +3423,7 @@
|
|
|
3423
3423
|
return __generator(this, function (_c) {
|
|
3424
3424
|
switch (_c.label) {
|
|
3425
3425
|
case 0:
|
|
3426
|
-
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ?
|
|
3426
|
+
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, rootDirname = options.rootDirname, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
|
|
3427
3427
|
knowledgePreparedUnflatten = new Array(knowledgeSources.length);
|
|
3428
3428
|
return [4 /*yield*/, forEachAsync(knowledgeSources, { maxParallelCount: maxParallelCount }, function (knowledgeSource, index) { return __awaiter(_this, void 0, void 0, function () {
|
|
3429
3429
|
var partialPieces, sourceHandler, _a, _b, scraper, partialPiecesUnchecked, e_1_1, pieces;
|
|
@@ -3452,7 +3452,8 @@
|
|
|
3452
3452
|
case 4:
|
|
3453
3453
|
partialPiecesUnchecked = _d.sent();
|
|
3454
3454
|
if (partialPiecesUnchecked !== null) {
|
|
3455
|
-
partialPieces = partialPiecesUnchecked;
|
|
3455
|
+
partialPieces = __spreadArray([], __read(partialPiecesUnchecked), false);
|
|
3456
|
+
// <- TODO: [🪓] Here should be no need for spreading new array, just `partialPieces = partialPiecesUnchecked`
|
|
3456
3457
|
return [3 /*break*/, 6];
|
|
3457
3458
|
}
|
|
3458
3459
|
_d.label = 5;
|
|
@@ -3500,7 +3501,7 @@
|
|
|
3500
3501
|
> /**
|
|
3501
3502
|
> * Unprepared knowledge
|
|
3502
3503
|
> * /
|
|
3503
|
-
> readonly knowledgeSources:
|
|
3504
|
+
> readonly knowledgeSources: ReadonlyArray<KnowledgeSourceJson>;
|
|
3504
3505
|
> };
|
|
3505
3506
|
>
|
|
3506
3507
|
> export async function prepareKnowledgePieces(
|
|
@@ -3558,7 +3559,7 @@
|
|
|
3558
3559
|
return __generator(this, function (_b) {
|
|
3559
3560
|
switch (_b.label) {
|
|
3560
3561
|
case 0:
|
|
3561
|
-
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ?
|
|
3562
|
+
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a;
|
|
3562
3563
|
templates = pipeline.templates, parameters = pipeline.parameters, knowledgePiecesCount = pipeline.knowledgePiecesCount;
|
|
3563
3564
|
// TODO: [main] !!!!! Apply samples to each template (if missing and is for the template defined)
|
|
3564
3565
|
TODO_USE(parameters);
|
|
@@ -3620,7 +3621,7 @@
|
|
|
3620
3621
|
if (isPipelinePrepared(pipeline)) {
|
|
3621
3622
|
return [2 /*return*/, pipeline];
|
|
3622
3623
|
}
|
|
3623
|
-
rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ?
|
|
3624
|
+
rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
|
|
3624
3625
|
parameters = pipeline.parameters, templates = pipeline.templates, knowledgeSources = pipeline.knowledgeSources, personas = pipeline.personas;
|
|
3625
3626
|
if (tools === undefined || tools.llm === undefined) {
|
|
3626
3627
|
throw new MissingToolsError('LLM tools are required for preparing the pipeline');
|
|
@@ -3678,7 +3679,9 @@
|
|
|
3678
3679
|
// ----- /Templates preparation -----
|
|
3679
3680
|
// Note: Count total usage
|
|
3680
3681
|
currentPreparation.usage = llmToolsWithUsage.getTotalUsage();
|
|
3681
|
-
return [2 /*return*/, $asDeeplyFrozenSerializableJson('Prepared PipelineJson', __assign(__assign({}, clonePipeline(pipeline)), { templates:
|
|
3682
|
+
return [2 /*return*/, $asDeeplyFrozenSerializableJson('Prepared PipelineJson', __assign(__assign({}, clonePipeline(pipeline)), { templates: __spreadArray([], __read(templatesPrepared), false),
|
|
3683
|
+
// <- TODO: [🪓] Here should be no need for spreading new array, just ` templates: templatesPrepared`
|
|
3684
|
+
knowledgeSources: knowledgeSourcesPrepared, knowledgePieces: knowledgePiecesPrepared, personas: preparedPersonas, preparations: __spreadArray([], __read(preparations), false) }))];
|
|
3682
3685
|
}
|
|
3683
3686
|
});
|
|
3684
3687
|
});
|
|
@@ -4649,12 +4652,11 @@
|
|
|
4649
4652
|
*/
|
|
4650
4653
|
function executeAttempts(options) {
|
|
4651
4654
|
return __awaiter(this, void 0, void 0, function () {
|
|
4652
|
-
var jokerParameterNames, priority, maxAttempts, preparedContent, parameters, template, preparedPipeline, tools,
|
|
4655
|
+
var jokerParameterNames, priority, maxAttempts, preparedContent, parameters, template, preparedPipeline, tools, $executionReport, pipelineIdentification, maxExecutionAttempts, $ongoingTemplateResult, _llms, llmTools, _loop_1, attempt, state_1;
|
|
4653
4656
|
return __generator(this, function (_a) {
|
|
4654
4657
|
switch (_a.label) {
|
|
4655
4658
|
case 0:
|
|
4656
|
-
jokerParameterNames = options.jokerParameterNames, priority = options.priority, maxAttempts = options.maxAttempts, preparedContent = options.preparedContent, parameters = options.parameters, template = options.template, preparedPipeline = options.preparedPipeline, tools = options.tools,
|
|
4657
|
-
maxExecutionAttempts = settings.maxExecutionAttempts;
|
|
4659
|
+
jokerParameterNames = options.jokerParameterNames, priority = options.priority, maxAttempts = options.maxAttempts, preparedContent = options.preparedContent, parameters = options.parameters, template = options.template, preparedPipeline = options.preparedPipeline, tools = options.tools, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification, maxExecutionAttempts = options.maxExecutionAttempts;
|
|
4658
4660
|
$ongoingTemplateResult = {
|
|
4659
4661
|
$result: null,
|
|
4660
4662
|
$resultString: null,
|
|
@@ -5020,12 +5022,12 @@
|
|
|
5020
5022
|
*/
|
|
5021
5023
|
function executeFormatSubvalues(options) {
|
|
5022
5024
|
return __awaiter(this, void 0, void 0, function () {
|
|
5023
|
-
var template, jokerParameterNames, parameters, priority,
|
|
5025
|
+
var template, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification, parameterValue, formatDefinition, subvalueDefinition, formatSettings, resultString;
|
|
5024
5026
|
var _this = this;
|
|
5025
5027
|
return __generator(this, function (_a) {
|
|
5026
5028
|
switch (_a.label) {
|
|
5027
5029
|
case 0:
|
|
5028
|
-
template = options.template, jokerParameterNames = options.jokerParameterNames, parameters = options.parameters, priority = options.priority,
|
|
5030
|
+
template = options.template, jokerParameterNames = options.jokerParameterNames, parameters = options.parameters, priority = options.priority, csvSettings = options.csvSettings, pipelineIdentification = options.pipelineIdentification;
|
|
5029
5031
|
if (template.foreach === undefined) {
|
|
5030
5032
|
return [2 /*return*/, /* not await */ executeAttempts(options)];
|
|
5031
5033
|
}
|
|
@@ -5055,7 +5057,7 @@
|
|
|
5055
5057
|
.join('\n')), "\n\n [\u26F7] This should never happen because subformat name should be validated during parsing\n\n ").concat(block(pipelineIdentification), "\n "); }));
|
|
5056
5058
|
}
|
|
5057
5059
|
if (formatDefinition.formatName === 'CSV') {
|
|
5058
|
-
formatSettings =
|
|
5060
|
+
formatSettings = csvSettings;
|
|
5059
5061
|
// <- TODO: [🤹♂️] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
|
|
5060
5062
|
}
|
|
5061
5063
|
return [4 /*yield*/, subvalueDefinition.mapValues(parameterValue, template.foreach.outputSubparameterName, formatSettings, function (subparameters, index) { return __awaiter(_this, void 0, void 0, function () {
|
|
@@ -5208,13 +5210,12 @@
|
|
|
5208
5210
|
*/
|
|
5209
5211
|
function executeTemplate(options) {
|
|
5210
5212
|
return __awaiter(this, void 0, void 0, function () {
|
|
5211
|
-
var currentTemplate, preparedPipeline, parametersToPass, tools, onProgress,
|
|
5212
|
-
var e_1,
|
|
5213
|
-
return __generator(this, function (
|
|
5214
|
-
switch (
|
|
5213
|
+
var currentTemplate, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, _a, maxExecutionAttempts, name, title, priority, usedParameterNames, dependentParameterNames, definedParameters, _b, _c, _d, definedParameterNames, parameters, _loop_1, _e, _f, parameterName, maxAttempts, jokerParameterNames, preparedContent, resultString;
|
|
5214
|
+
var e_1, _g, _h;
|
|
5215
|
+
return __generator(this, function (_j) {
|
|
5216
|
+
switch (_j.label) {
|
|
5215
5217
|
case 0:
|
|
5216
|
-
currentTemplate = options.currentTemplate, preparedPipeline = options.preparedPipeline, parametersToPass = options.parametersToPass, tools = options.tools, onProgress = options.onProgress,
|
|
5217
|
-
maxExecutionAttempts = settings.maxExecutionAttempts;
|
|
5218
|
+
currentTemplate = options.currentTemplate, preparedPipeline = options.preparedPipeline, parametersToPass = options.parametersToPass, tools = options.tools, onProgress = options.onProgress, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification, _a = options.maxExecutionAttempts, maxExecutionAttempts = _a === void 0 ? DEFAULT_MAX_EXECUTION_ATTEMPTS : _a;
|
|
5218
5219
|
name = "pipeline-executor-frame-".concat(currentTemplate.name);
|
|
5219
5220
|
title = currentTemplate.title;
|
|
5220
5221
|
priority = preparedPipeline.templates.length - preparedPipeline.templates.indexOf(currentTemplate);
|
|
@@ -5229,7 +5230,7 @@
|
|
|
5229
5230
|
// <- [🍸]
|
|
5230
5231
|
})];
|
|
5231
5232
|
case 1:
|
|
5232
|
-
|
|
5233
|
+
_j.sent();
|
|
5233
5234
|
usedParameterNames = extractParameterNamesFromTemplate(currentTemplate);
|
|
5234
5235
|
dependentParameterNames = new Set(currentTemplate.dependentParameterNames);
|
|
5235
5236
|
// TODO: [👩🏾🤝👩🏻] Use here `mapAvailableToExpectedParameters`
|
|
@@ -5240,15 +5241,15 @@
|
|
|
5240
5241
|
.map(function (name) { return "{".concat(name, "}"); })
|
|
5241
5242
|
.join(', '), "\n\n ").concat(block(pipelineIdentification), "\n\n "); }));
|
|
5242
5243
|
}
|
|
5243
|
-
|
|
5244
|
-
|
|
5244
|
+
_c = (_b = Object).freeze;
|
|
5245
|
+
_d = [{}];
|
|
5245
5246
|
return [4 /*yield*/, getReservedParametersForTemplate({
|
|
5246
5247
|
preparedPipeline: preparedPipeline,
|
|
5247
5248
|
template: currentTemplate,
|
|
5248
5249
|
pipelineIdentification: pipelineIdentification,
|
|
5249
5250
|
})];
|
|
5250
5251
|
case 2:
|
|
5251
|
-
definedParameters =
|
|
5252
|
+
definedParameters = _c.apply(_b, [__assign.apply(void 0, [__assign.apply(void 0, _d.concat([(_j.sent())])), parametersToPass])]);
|
|
5252
5253
|
definedParameterNames = new Set(Object.keys(definedParameters));
|
|
5253
5254
|
parameters = {};
|
|
5254
5255
|
_loop_1 = function (parameterName) {
|
|
@@ -5268,15 +5269,15 @@
|
|
|
5268
5269
|
try {
|
|
5269
5270
|
// Note: [2] Check that all used parameters are defined and removing unused parameters for this template
|
|
5270
5271
|
// TODO: [👩🏾🤝👩🏻] Use here `mapAvailableToExpectedParameters`
|
|
5271
|
-
for (
|
|
5272
|
-
parameterName =
|
|
5272
|
+
for (_e = __values(Array.from(union(definedParameterNames, usedParameterNames, dependentParameterNames))), _f = _e.next(); !_f.done; _f = _e.next()) {
|
|
5273
|
+
parameterName = _f.value;
|
|
5273
5274
|
_loop_1(parameterName);
|
|
5274
5275
|
}
|
|
5275
5276
|
}
|
|
5276
5277
|
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
5277
5278
|
finally {
|
|
5278
5279
|
try {
|
|
5279
|
-
if (
|
|
5280
|
+
if (_f && !_f.done && (_g = _e.return)) _g.call(_e);
|
|
5280
5281
|
}
|
|
5281
5282
|
finally { if (e_1) throw e_1.error; }
|
|
5282
5283
|
}
|
|
@@ -5296,12 +5297,11 @@
|
|
|
5296
5297
|
template: currentTemplate,
|
|
5297
5298
|
preparedPipeline: preparedPipeline,
|
|
5298
5299
|
tools: tools,
|
|
5299
|
-
settings: settings,
|
|
5300
5300
|
$executionReport: $executionReport,
|
|
5301
5301
|
pipelineIdentification: pipelineIdentification,
|
|
5302
5302
|
})];
|
|
5303
5303
|
case 3:
|
|
5304
|
-
resultString =
|
|
5304
|
+
resultString = _j.sent();
|
|
5305
5305
|
return [4 /*yield*/, onProgress({
|
|
5306
5306
|
name: name,
|
|
5307
5307
|
title: title,
|
|
@@ -5313,12 +5313,12 @@
|
|
|
5313
5313
|
// <- [🍸]
|
|
5314
5314
|
})];
|
|
5315
5315
|
case 4:
|
|
5316
|
-
|
|
5317
|
-
return [2 /*return*/, Object.freeze((
|
|
5318
|
-
|
|
5316
|
+
_j.sent();
|
|
5317
|
+
return [2 /*return*/, Object.freeze((_h = {},
|
|
5318
|
+
_h[currentTemplate.resultingParameterName] =
|
|
5319
5319
|
// <- Note: [👩👩👧] No need to detect parameter collision here because pipeline checks logic consistency during construction
|
|
5320
5320
|
resultString,
|
|
5321
|
-
|
|
5321
|
+
_h))];
|
|
5322
5322
|
}
|
|
5323
5323
|
});
|
|
5324
5324
|
});
|
|
@@ -5377,13 +5377,12 @@
|
|
|
5377
5377
|
*/
|
|
5378
5378
|
function executePipeline(options) {
|
|
5379
5379
|
return __awaiter(this, void 0, void 0, function () {
|
|
5380
|
-
var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification,
|
|
5380
|
+
var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification, maxParallelCount, rootDirname, _a, isVerbose, preparedPipeline, errors, warnings, executionReport, isReturned, _b, _c, parameter, e_1_1, _loop_1, _d, _e, parameterName, state_1, e_2_1, parametersToPass, resovedParameterNames_1, unresovedTemplates_1, resolving_1, loopLimit, _loop_2, error_1, usage_1, outputParameters_1, usage, outputParameters;
|
|
5381
5381
|
var e_1, _f, e_2, _g;
|
|
5382
5382
|
return __generator(this, function (_h) {
|
|
5383
5383
|
switch (_h.label) {
|
|
5384
5384
|
case 0:
|
|
5385
|
-
inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification,
|
|
5386
|
-
maxParallelCount = settings.maxParallelCount, rootDirname = settings.rootDirname, _a = settings.isVerbose, isVerbose = _a === void 0 ? IS_VERBOSE : _a;
|
|
5385
|
+
inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification, maxParallelCount = options.maxParallelCount, rootDirname = options.rootDirname, _a = options.isVerbose, isVerbose = _a === void 0 ? DEFAULT_IS_VERBOSE : _a;
|
|
5387
5386
|
preparedPipeline = options.preparedPipeline;
|
|
5388
5387
|
if (!(preparedPipeline === undefined)) return [3 /*break*/, 2];
|
|
5389
5388
|
return [4 /*yield*/, preparePipeline(pipeline, tools, {
|
|
@@ -5568,12 +5567,7 @@
|
|
|
5568
5567
|
return [3 /*break*/, 4];
|
|
5569
5568
|
case 3:
|
|
5570
5569
|
unresovedTemplates_1 = unresovedTemplates_1.filter(function (template) { return template !== currentTemplate; });
|
|
5571
|
-
work_1 = executeTemplate({
|
|
5572
|
-
currentTemplate: currentTemplate,
|
|
5573
|
-
preparedPipeline: preparedPipeline,
|
|
5574
|
-
parametersToPass: parametersToPass,
|
|
5575
|
-
tools: tools,
|
|
5576
|
-
onProgress: function (progress) {
|
|
5570
|
+
work_1 = executeTemplate(__assign(__assign({}, options), { currentTemplate: currentTemplate, preparedPipeline: preparedPipeline, parametersToPass: parametersToPass, tools: tools, onProgress: function (progress) {
|
|
5577
5571
|
if (isReturned) {
|
|
5578
5572
|
throw new UnexpectedError(spaceTrim.spaceTrim(function (block) { return "\n Can not call `onProgress` after pipeline execution is finished\n\n ".concat(block(pipelineIdentification), "\n\n ").concat(block(JSON.stringify(progress, null, 4)
|
|
5579
5573
|
.split('\n')
|
|
@@ -5583,11 +5577,7 @@
|
|
|
5583
5577
|
if (onProgress) {
|
|
5584
5578
|
onProgress(progress);
|
|
5585
5579
|
}
|
|
5586
|
-
},
|
|
5587
|
-
settings: settings,
|
|
5588
|
-
$executionReport: executionReport,
|
|
5589
|
-
pipelineIdentification: spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n Template name: ").concat(currentTemplate.name, "\n Template title: ").concat(currentTemplate.title, "\n "); }),
|
|
5590
|
-
})
|
|
5580
|
+
}, $executionReport: executionReport, pipelineIdentification: spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n Template name: ").concat(currentTemplate.name, "\n Template title: ").concat(currentTemplate.title, "\n "); }) }))
|
|
5591
5581
|
.then(function (newParametersToPass) {
|
|
5592
5582
|
parametersToPass = __assign(__assign({}, newParametersToPass), parametersToPass);
|
|
5593
5583
|
resovedParameterNames_1 = __spreadArray(__spreadArray([], __read(resovedParameterNames_1), false), [currentTemplate.resultingParameterName], false);
|
|
@@ -5691,8 +5681,7 @@
|
|
|
5691
5681
|
*/
|
|
5692
5682
|
function createPipelineExecutor(options) {
|
|
5693
5683
|
var _this = this;
|
|
5694
|
-
var pipeline = options.pipeline, tools = options.tools, _a = options.
|
|
5695
|
-
var _b = settings.maxExecutionAttempts, maxExecutionAttempts = _b === void 0 ? MAX_EXECUTION_ATTEMPTS : _b, _c = settings.maxParallelCount, maxParallelCount = _c === void 0 ? MAX_PARALLEL_COUNT : _c, _d = settings.csvSettings, csvSettings = _d === void 0 ? DEFAULT_CSV_SETTINGS : _d, _e = settings.isVerbose, isVerbose = _e === void 0 ? IS_VERBOSE : _e, _f = settings.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _f === void 0 ? false : _f, _g = settings.rootDirname, rootDirname = _g === void 0 ? null : _g;
|
|
5684
|
+
var pipeline = options.pipeline, tools = options.tools, _a = options.maxExecutionAttempts, maxExecutionAttempts = _a === void 0 ? DEFAULT_MAX_EXECUTION_ATTEMPTS : _a, _b = options.maxParallelCount, maxParallelCount = _b === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _b, _c = options.csvSettings, csvSettings = _c === void 0 ? DEFAULT_CSV_SETTINGS : _c, _d = options.isVerbose, isVerbose = _d === void 0 ? DEFAULT_IS_VERBOSE : _d, _e = options.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _e === void 0 ? false : _e, _f = options.rootDirname, rootDirname = _f === void 0 ? null : _f;
|
|
5696
5685
|
validatePipeline(pipeline);
|
|
5697
5686
|
var pipelineIdentification = (function () {
|
|
5698
5687
|
// Note: This is a 😐 implementation of [🚞]
|
|
@@ -5726,14 +5715,12 @@
|
|
|
5726
5715
|
tools: tools,
|
|
5727
5716
|
onProgress: onProgress,
|
|
5728
5717
|
pipelineIdentification: spaceTrim.spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n ").concat(runCount === 1 ? '' : "Run #".concat(runCount), "\n "); }),
|
|
5729
|
-
|
|
5730
|
-
|
|
5731
|
-
|
|
5732
|
-
|
|
5733
|
-
|
|
5734
|
-
|
|
5735
|
-
rootDirname: rootDirname,
|
|
5736
|
-
},
|
|
5718
|
+
maxExecutionAttempts: maxExecutionAttempts,
|
|
5719
|
+
maxParallelCount: maxParallelCount,
|
|
5720
|
+
csvSettings: csvSettings,
|
|
5721
|
+
isVerbose: isVerbose,
|
|
5722
|
+
isNotPreparedWarningSupressed: isNotPreparedWarningSupressed,
|
|
5723
|
+
rootDirname: rootDirname,
|
|
5737
5724
|
})];
|
|
5738
5725
|
});
|
|
5739
5726
|
}); };
|
|
@@ -5755,7 +5742,7 @@
|
|
|
5755
5742
|
mimeTypes: ['text/markdown', 'text/plain'],
|
|
5756
5743
|
documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
|
|
5757
5744
|
isAvilableInBrowser: true,
|
|
5758
|
-
requiredExecutables: [
|
|
5745
|
+
requiredExecutables: [],
|
|
5759
5746
|
}); /* <- TODO: [🤛] */
|
|
5760
5747
|
/**
|
|
5761
5748
|
* Registration of known scraper metadata
|
|
@@ -5799,7 +5786,7 @@
|
|
|
5799
5786
|
return __generator(this, function (_k) {
|
|
5800
5787
|
switch (_k.label) {
|
|
5801
5788
|
case 0:
|
|
5802
|
-
_a = this.options, _b = _a.maxParallelCount, maxParallelCount = _b === void 0 ?
|
|
5789
|
+
_a = this.options, _b = _a.maxParallelCount, maxParallelCount = _b === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _b, _c = _a.isVerbose, isVerbose = _c === void 0 ? DEFAULT_IS_VERBOSE : _c;
|
|
5803
5790
|
llm = this.tools.llm;
|
|
5804
5791
|
if (llm === undefined) {
|
|
5805
5792
|
throw new MissingToolsError('LLM tools are required for scraping external files');
|
|
@@ -5898,7 +5885,8 @@
|
|
|
5898
5885
|
embeddingResult = _c.sent();
|
|
5899
5886
|
index.push({
|
|
5900
5887
|
modelName: embeddingResult.modelName,
|
|
5901
|
-
position: embeddingResult.content,
|
|
5888
|
+
position: __spreadArray([], __read(embeddingResult.content), false),
|
|
5889
|
+
// <- TODO: [🪓] Here should be no need for spreading new array, just `position: embeddingResult.content`
|
|
5902
5890
|
});
|
|
5903
5891
|
_c.label = 6;
|
|
5904
5892
|
case 6: return [3 /*break*/, 8];
|
|
@@ -5949,7 +5937,7 @@
|
|
|
5949
5937
|
mimeTypes: ['application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
|
|
5950
5938
|
documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
|
|
5951
5939
|
isAvilableInBrowser: false,
|
|
5952
|
-
requiredExecutables: ['
|
|
5940
|
+
requiredExecutables: ['Pandoc'],
|
|
5953
5941
|
}); /* <- TODO: [🤛] */
|
|
5954
5942
|
/**
|
|
5955
5943
|
* Registration of known scraper metadata
|
|
@@ -5989,12 +5977,13 @@
|
|
|
5989
5977
|
* Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
|
|
5990
5978
|
*/
|
|
5991
5979
|
DocumentScraper.prototype.$convert = function (source) {
|
|
5980
|
+
var _a;
|
|
5992
5981
|
return __awaiter(this, void 0, void 0, function () {
|
|
5993
|
-
var
|
|
5982
|
+
var _b, _c, rootDirname, _d, cacheDirname, _e, intermediateFilesStrategy, _f, isVerbose, extension, cacheFilehandler, command_1;
|
|
5994
5983
|
return __generator(this, function (_g) {
|
|
5995
5984
|
switch (_g.label) {
|
|
5996
5985
|
case 0:
|
|
5997
|
-
|
|
5986
|
+
_b = this.options, _c = _b.rootDirname, rootDirname = _c === void 0 ? process.cwd() : _c, _d = _b.cacheDirname, cacheDirname = _d === void 0 ? DEFAULT_SCRAPE_CACHE_DIRNAME : _d, _e = _b.intermediateFilesStrategy, intermediateFilesStrategy = _e === void 0 ? DEFAULT_INTERMEDIATE_FILES_STRATEGY : _e, _f = _b.isVerbose, isVerbose = _f === void 0 ? DEFAULT_IS_VERBOSE : _f;
|
|
5998
5987
|
if (!$isRunningInNode()) {
|
|
5999
5988
|
throw new KnowledgeScrapeError('Scraping .docx files is only supported in Node environment');
|
|
6000
5989
|
}
|
|
@@ -6002,7 +5991,7 @@
|
|
|
6002
5991
|
throw new EnvironmentMismatchError('Can not scrape documents without filesystem tools');
|
|
6003
5992
|
// <- TODO: [🧠] What is the best error type here`
|
|
6004
5993
|
}
|
|
6005
|
-
if (
|
|
5994
|
+
if (((_a = this.tools.executables) === null || _a === void 0 ? void 0 : _a.pandocPath) === undefined) {
|
|
6006
5995
|
throw new MissingToolsError('Pandoc is required for scraping .docx files');
|
|
6007
5996
|
}
|
|
6008
5997
|
if (source.filename === null) {
|
|
@@ -6013,7 +6002,7 @@
|
|
|
6013
6002
|
return [4 /*yield*/, getScraperIntermediateSource(source, {
|
|
6014
6003
|
rootDirname: rootDirname,
|
|
6015
6004
|
cacheDirname: cacheDirname,
|
|
6016
|
-
|
|
6005
|
+
intermediateFilesStrategy: intermediateFilesStrategy,
|
|
6017
6006
|
extension: 'md',
|
|
6018
6007
|
isVerbose: isVerbose,
|
|
6019
6008
|
})];
|
|
@@ -6022,11 +6011,9 @@
|
|
|
6022
6011
|
return [4 /*yield*/, isFileExisting(cacheFilehandler.filename, this.tools.fs)];
|
|
6023
6012
|
case 2:
|
|
6024
6013
|
if (!!(_g.sent())) return [3 /*break*/, 5];
|
|
6025
|
-
command_1 = "\"".concat(
|
|
6026
|
-
// TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook
|
|
6014
|
+
command_1 = "\"".concat(this.tools.executables.pandocPath, "\" -f ").concat(extension, " -t markdown \"").concat(source.filename, "\" -o \"").concat(cacheFilehandler.filename, "\"");
|
|
6027
6015
|
return [4 /*yield*/, $execCommand(command_1)];
|
|
6028
6016
|
case 3:
|
|
6029
|
-
// TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook
|
|
6030
6017
|
_g.sent();
|
|
6031
6018
|
return [4 /*yield*/, isFileExisting(cacheFilehandler.filename, this.tools.fs)];
|
|
6032
6019
|
case 4:
|
|
@@ -6071,9 +6058,14 @@
|
|
|
6071
6058
|
asJson: function () {
|
|
6072
6059
|
throw new UnexpectedError('Did not expect that `markdownScraper` would need to get the content `asJson`');
|
|
6073
6060
|
},
|
|
6074
|
-
|
|
6075
|
-
|
|
6076
|
-
|
|
6061
|
+
/*
|
|
6062
|
+
TODO: [🥽]
|
|
6063
|
+
> asBlob() {
|
|
6064
|
+
> throw new UnexpectedError(
|
|
6065
|
+
> 'Did not expect that `markdownScraper` would need to get the content `asBlob`',
|
|
6066
|
+
> );
|
|
6067
|
+
> },
|
|
6068
|
+
*/
|
|
6077
6069
|
};
|
|
6078
6070
|
knowledge = this.markdownScraper.scrape(markdownSource);
|
|
6079
6071
|
return [4 /*yield*/, cacheFilehandler.destroy()];
|
|
@@ -6105,7 +6097,11 @@
|
|
|
6105
6097
|
mimeTypes: ['application/msword', 'text/rtf'],
|
|
6106
6098
|
documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
|
|
6107
6099
|
isAvilableInBrowser: false,
|
|
6108
|
-
requiredExecutables: [
|
|
6100
|
+
requiredExecutables: [
|
|
6101
|
+
'Pandoc',
|
|
6102
|
+
'LibreOffice',
|
|
6103
|
+
// <- TODO: [🧠] Should be 'LibreOffice' here, its dependency of dependency
|
|
6104
|
+
],
|
|
6109
6105
|
}); /* <- TODO: [🤛] */
|
|
6110
6106
|
/**
|
|
6111
6107
|
* Registration of known scraper metadata
|
|
@@ -6118,7 +6114,7 @@
|
|
|
6118
6114
|
$scrapersMetadataRegister.register(legacyDocumentScraperMetadata);
|
|
6119
6115
|
|
|
6120
6116
|
/**
|
|
6121
|
-
* Scraper for .
|
|
6117
|
+
* Scraper for old document files (like .doc and .rtf)
|
|
6122
6118
|
*
|
|
6123
6119
|
* @see `documentationUrl` for more details
|
|
6124
6120
|
* @public exported from `@promptbook/legacy-documents`
|
|
@@ -6145,12 +6141,13 @@
|
|
|
6145
6141
|
* Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
|
|
6146
6142
|
*/
|
|
6147
6143
|
LegacyDocumentScraper.prototype.$convert = function (source) {
|
|
6144
|
+
var _a;
|
|
6148
6145
|
return __awaiter(this, void 0, void 0, function () {
|
|
6149
|
-
var
|
|
6146
|
+
var _b, _c, rootDirname, _d, cacheDirname, _e, intermediateFilesStrategy, _f, isVerbose, extension, cacheFilehandler, documentSourceOutdirPathForLibreOffice_1, command_1, files_1, file;
|
|
6150
6147
|
return __generator(this, function (_g) {
|
|
6151
6148
|
switch (_g.label) {
|
|
6152
6149
|
case 0:
|
|
6153
|
-
|
|
6150
|
+
_b = this.options, _c = _b.rootDirname, rootDirname = _c === void 0 ? process.cwd() : _c, _d = _b.cacheDirname, cacheDirname = _d === void 0 ? DEFAULT_SCRAPE_CACHE_DIRNAME : _d, _e = _b.intermediateFilesStrategy, intermediateFilesStrategy = _e === void 0 ? DEFAULT_INTERMEDIATE_FILES_STRATEGY : _e, _f = _b.isVerbose, isVerbose = _f === void 0 ? DEFAULT_IS_VERBOSE : _f;
|
|
6154
6151
|
if (!$isRunningInNode()) {
|
|
6155
6152
|
throw new KnowledgeScrapeError('Scraping .doc files is only supported in Node environment');
|
|
6156
6153
|
}
|
|
@@ -6158,7 +6155,7 @@
|
|
|
6158
6155
|
throw new EnvironmentMismatchError('Can not scrape (legacy) documents without filesystem tools');
|
|
6159
6156
|
// <- TODO: [🧠] What is the best error type here`
|
|
6160
6157
|
}
|
|
6161
|
-
if (
|
|
6158
|
+
if (((_a = this.tools.executables) === null || _a === void 0 ? void 0 : _a.libreOfficePath) === undefined) {
|
|
6162
6159
|
throw new MissingToolsError('LibreOffice is required for scraping .doc and .rtf files');
|
|
6163
6160
|
}
|
|
6164
6161
|
if (source.filename === null) {
|
|
@@ -6169,7 +6166,7 @@
|
|
|
6169
6166
|
return [4 /*yield*/, getScraperIntermediateSource(source, {
|
|
6170
6167
|
rootDirname: rootDirname,
|
|
6171
6168
|
cacheDirname: cacheDirname,
|
|
6172
|
-
|
|
6169
|
+
intermediateFilesStrategy: intermediateFilesStrategy,
|
|
6173
6170
|
extension: 'docx',
|
|
6174
6171
|
isVerbose: isVerbose,
|
|
6175
6172
|
})];
|
|
@@ -6184,11 +6181,9 @@
|
|
|
6184
6181
|
documentSourceOutdirPathForLibreOffice_1 = path.join(path.dirname(cacheFilehandler.filename), 'libreoffice')
|
|
6185
6182
|
.split('\\')
|
|
6186
6183
|
.join('/');
|
|
6187
|
-
command_1 = "\"".concat(
|
|
6188
|
-
// TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook - this should trigger build polution error
|
|
6184
|
+
command_1 = "\"".concat(this.tools.executables.libreOfficePath, "\" --headless --convert-to docx \"").concat(source.filename, "\" --outdir \"").concat(documentSourceOutdirPathForLibreOffice_1, "\"");
|
|
6189
6185
|
return [4 /*yield*/, $execCommand(command_1)];
|
|
6190
6186
|
case 3:
|
|
6191
|
-
// TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook - this should trigger build polution error
|
|
6192
6187
|
_g.sent();
|
|
6193
6188
|
return [4 /*yield*/, promises.readdir(documentSourceOutdirPathForLibreOffice_1)];
|
|
6194
6189
|
case 4:
|
|
@@ -6236,9 +6231,14 @@
|
|
|
6236
6231
|
asJson: function () {
|
|
6237
6232
|
throw new UnexpectedError('Did not expect that `documentScraper` would need to get the content `asJson`');
|
|
6238
6233
|
},
|
|
6239
|
-
|
|
6240
|
-
|
|
6241
|
-
|
|
6234
|
+
/*
|
|
6235
|
+
TODO: [🥽]
|
|
6236
|
+
> asBlob() {
|
|
6237
|
+
> throw new UnexpectedError(
|
|
6238
|
+
> 'Did not expect that `documentScraper` would need to get the content `asBlob`',
|
|
6239
|
+
> );
|
|
6240
|
+
> },
|
|
6241
|
+
*/
|
|
6242
6242
|
};
|
|
6243
6243
|
knowledge = this.documentScraper.scrape(markdownSource);
|
|
6244
6244
|
return [4 /*yield*/, cacheFilehandler.destroy()];
|