@promptbook/legacy-documents 0.71.0-17 → 0.71.0-19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -4
- package/esm/index.es.js +90 -96
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +24 -18
- package/esm/typings/src/_packages/node.index.d.ts +2 -0
- package/esm/typings/src/_packages/types.index.d.ts +10 -10
- package/esm/typings/src/_packages/utils.index.d.ts +2 -2
- package/esm/typings/src/collection/PipelineCollection.d.ts +1 -1
- package/esm/typings/src/collection/SimplePipelineCollection.d.ts +2 -2
- package/esm/typings/src/collection/collectionToJson.d.ts +1 -1
- package/esm/typings/src/collection/constructors/createCollectionFromJson.d.ts +1 -1
- package/esm/typings/src/collection/constructors/createCollectionFromPromise.d.ts +1 -1
- package/esm/typings/src/commands/_common/types/CommandParser.d.ts +5 -5
- package/esm/typings/src/config.d.ts +21 -14
- package/esm/typings/src/execution/EmbeddingVector.d.ts +1 -1
- package/esm/typings/src/execution/Executables.d.ts +18 -0
- package/esm/typings/src/execution/ExecutionTools.d.ts +9 -3
- package/esm/typings/src/execution/LlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/execution/PipelineExecutorResult.d.ts +2 -2
- package/esm/typings/src/execution/assertsExecutionSuccessful.d.ts +1 -0
- package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorOptions.d.ts +29 -6
- package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +2 -11
- package/esm/typings/src/execution/createPipelineExecutor/20-executeTemplate.d.ts +4 -13
- package/esm/typings/src/execution/createPipelineExecutor/40-executeAttempts.d.ts +9 -14
- package/esm/typings/src/execution/translation/automatic-translate/automatic-translators/LindatAutomaticTranslator.d.ts +11 -3
- package/esm/typings/src/execution/utils/addUsage.d.ts +1 -1
- package/esm/typings/src/execution/utils/forEachAsync.d.ts +1 -1
- package/esm/typings/src/formats/_common/FormatDefinition.d.ts +2 -2
- package/esm/typings/src/formats/_common/FormatSubvalueDefinition.d.ts +2 -2
- package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsForCli.d.ts +2 -2
- package/esm/typings/src/llm-providers/_common/register/LlmToolsConfiguration.d.ts +1 -1
- package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +4 -1
- package/esm/typings/src/llm-providers/anthropic-claude/AnthropicClaudeExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/anthropic-claude/anthropic-claude-models.d.ts +1 -1
- package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/mocked/$fakeTextToExpectations.d.ts +1 -1
- package/esm/typings/src/llm-providers/mocked/MockedEchoLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/mocked/MockedFackedLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/multiple/MultipleLlmExecutionTools.d.ts +4 -5
- package/esm/typings/src/llm-providers/multiple/joinLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/openai/OpenAiExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/openai/openai-models.d.ts +1 -1
- package/esm/typings/src/llm-providers/remote/RemoteLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/remote/interfaces/PromptbookServer_ListModels_Response.d.ts +4 -4
- package/esm/typings/src/llm-providers/remote/interfaces/PromptbookServer_Prompt_Response.d.ts +3 -3
- package/esm/typings/src/prepare/PrepareAndScrapeOptions.d.ts +4 -23
- package/esm/typings/src/prepare/prepareTemplates.d.ts +1 -1
- package/esm/typings/src/scrapers/_common/Scraper.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/prepareKnowledgePieces.d.ts +1 -1
- package/esm/typings/src/scrapers/_common/register/$provideExecutablesForNode.d.ts +12 -0
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +1 -1
- package/esm/typings/src/scrapers/document/DocumentScraper.d.ts +2 -2
- package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/document/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/document-legacy/LegacyDocumentScraper.d.ts +3 -3
- package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/document-legacy/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/markdown/MarkdownScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/markdown/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/pdf/PdfScraper.d.ts +2 -2
- package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/pdf/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/website/WebsiteScraper.d.ts +6 -3
- package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/website/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/website/utils/createShowdownConverter.d.ts +7 -0
- package/esm/typings/src/scripting/javascript/utils/preserve.d.ts +1 -1
- package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +1 -1
- package/esm/typings/src/types/Arrayable.d.ts +1 -1
- package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +7 -0
- package/esm/typings/src/types/PipelineJson/KnowledgePieceJson.d.ts +4 -4
- package/esm/typings/src/types/PipelineJson/KnowledgeSourceJson.d.ts +1 -1
- package/esm/typings/src/types/PipelineJson/PersonaJson.d.ts +1 -1
- package/esm/typings/src/types/PipelineJson/TemplateJsonCommon.d.ts +2 -2
- package/esm/typings/src/types/Prompt.d.ts +1 -1
- package/esm/typings/src/types/execution-report/ExecutionReportJson.d.ts +1 -1
- package/esm/typings/src/utils/$Register.d.ts +1 -1
- package/esm/typings/src/utils/FromtoItems.d.ts +1 -1
- package/esm/typings/src/utils/arrayableToArray.d.ts +1 -1
- package/esm/typings/src/utils/emojis.d.ts +1 -1
- package/esm/typings/src/utils/execCommand/$execCommand.d.ts +2 -2
- package/esm/typings/src/utils/execCommand/{IExecCommandOptions.d.ts → ExecCommandOptions.d.ts} +2 -6
- package/esm/typings/src/utils/execCommand/execCommandNormalizeOptions.d.ts +3 -3
- package/esm/typings/src/utils/expectation-counters/countSentences.d.ts +1 -1
- package/esm/typings/src/utils/markdown/extractAllBlocksFromMarkdown.d.ts +1 -1
- package/esm/typings/src/utils/markdown/splitMarkdownIntoSections.d.ts +1 -1
- package/esm/typings/src/utils/normalization/IKeywords.d.ts +2 -2
- package/esm/typings/src/utils/normalization/parseKeywords.d.ts +2 -2
- package/esm/typings/src/utils/normalization/parseKeywordsFromString.d.ts +2 -2
- package/esm/typings/src/utils/normalization/searchKeywords.d.ts +2 -2
- package/esm/typings/src/utils/organization/TODO_USE.d.ts +1 -1
- package/esm/typings/src/utils/organization/keepUnused.d.ts +1 -1
- package/esm/typings/src/utils/random/$randomSeed.d.ts +1 -1
- package/esm/typings/src/utils/sets/intersection.d.ts +1 -1
- package/esm/typings/src/utils/sets/union.d.ts +1 -1
- package/esm/typings/src/utils/unwrapResult.d.ts +4 -4
- package/package.json +4 -3
- package/umd/index.umd.js +90 -96
- package/umd/index.umd.js.map +1 -1
- package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorSettings.d.ts +0 -29
- package/esm/typings/src/scrapers/website/utils/markdownConverter.d.ts +0 -12
- /package/esm/typings/src/scrapers/website/utils/{markdownConverter.test.d.ts → createShowdownConverter.test.d.ts} +0 -0
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
#  Promptbook
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Build responsible, controlled and transparent applications on top of LLM models!
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
|
|
@@ -21,11 +21,9 @@ Supercharge your use of large language models
|
|
|
21
21
|
- ✨ **Support of [OpenAI o1 model](https://openai.com/o1/)**
|
|
22
22
|
|
|
23
23
|
<blockquote style="color: #ff8811">
|
|
24
|
-
<b>⚠ Warning:</b> This is a pre-release version of the library. It is not yet ready for production use. Please look at <a href="https://www.npmjs.com/package/@promptbook/core?activeTab=versions">latest stable release</a>.
|
|
24
|
+
<b>⚠ Warning:</b> This is a pre-release version of the library. It is not yet ready for production use. Please look at <a href="https://www.npmjs.com/package/@promptbook/core?activeTab=versions">latest stable release</a>.
|
|
25
25
|
</blockquote>
|
|
26
26
|
|
|
27
|
-
|
|
28
|
-
|
|
29
27
|
## 📦 Package `@promptbook/legacy-documents`
|
|
30
28
|
|
|
31
29
|
- Promptbooks are [divided into several](#-packages) packages, all are published from [single monorepo](https://github.com/webgptorg/promptbook).
|
package/esm/index.es.js
CHANGED
|
@@ -15,7 +15,7 @@ import { unparse, parse } from 'papaparse';
|
|
|
15
15
|
/**
|
|
16
16
|
* The version of the Promptbook library
|
|
17
17
|
*/
|
|
18
|
-
var PROMPTBOOK_VERSION = '0.71.0-
|
|
18
|
+
var PROMPTBOOK_VERSION = '0.71.0-18';
|
|
19
19
|
// TODO: [main] !!!! List here all the versions and annotate + put into script
|
|
20
20
|
|
|
21
21
|
/*! *****************************************************************************
|
|
@@ -363,18 +363,25 @@ var LOOP_LIMIT = 1000;
|
|
|
363
363
|
* @private within the repository - too low-level in comparison with other `MAX_...`
|
|
364
364
|
*/
|
|
365
365
|
var IMMEDIATE_TIME = 10;
|
|
366
|
+
/**
|
|
367
|
+
* Strategy for caching the intermediate results for knowledge sources
|
|
368
|
+
*
|
|
369
|
+
* @public exported from `@promptbook/core`
|
|
370
|
+
*/
|
|
371
|
+
var DEFAULT_INTERMEDIATE_FILES_STRATEGY = 'HIDE_AND_KEEP';
|
|
372
|
+
// <- TODO: [😡] Change to 'VISIBLE'
|
|
366
373
|
/**
|
|
367
374
|
* The maximum number of (LLM) tasks running in parallel
|
|
368
375
|
*
|
|
369
376
|
* @public exported from `@promptbook/core`
|
|
370
377
|
*/
|
|
371
|
-
var
|
|
378
|
+
var DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [🤹♂️]
|
|
372
379
|
/**
|
|
373
380
|
* The maximum number of attempts to execute LLM task before giving up
|
|
374
381
|
*
|
|
375
382
|
* @public exported from `@promptbook/core`
|
|
376
383
|
*/
|
|
377
|
-
var
|
|
384
|
+
var DEFAULT_MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹♂️]
|
|
378
385
|
/**
|
|
379
386
|
* Where to store the scrape cache
|
|
380
387
|
*
|
|
@@ -382,7 +389,7 @@ var MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹♂️]
|
|
|
382
389
|
*
|
|
383
390
|
* @public exported from `@promptbook/core`
|
|
384
391
|
*/
|
|
385
|
-
var
|
|
392
|
+
var DEFAULT_SCRAPE_CACHE_DIRNAME = '/.promptbook/scrape-cache';
|
|
386
393
|
/**
|
|
387
394
|
* Nonce which is used for replacing things in strings
|
|
388
395
|
*
|
|
@@ -434,7 +441,7 @@ var DEFAULT_CSV_SETTINGS = Object.freeze({
|
|
|
434
441
|
*
|
|
435
442
|
* @public exported from `@promptbook/core`
|
|
436
443
|
*/
|
|
437
|
-
var
|
|
444
|
+
var DEFAULT_IS_VERBOSE = false;
|
|
438
445
|
/**
|
|
439
446
|
* @@@
|
|
440
447
|
*
|
|
@@ -1150,12 +1157,12 @@ function TODO_USE() {
|
|
|
1150
1157
|
*/
|
|
1151
1158
|
function getScraperIntermediateSource(source, options) {
|
|
1152
1159
|
return __awaiter(this, void 0, void 0, function () {
|
|
1153
|
-
var sourceFilename, url, rootDirname, cacheDirname,
|
|
1160
|
+
var sourceFilename, url, rootDirname, cacheDirname, intermediateFilesStrategy, extension, isVerbose, hash, semanticName, pieces, name, cacheFilename, isDestroyed, fileHandler;
|
|
1154
1161
|
return __generator(this, function (_a) {
|
|
1155
1162
|
switch (_a.label) {
|
|
1156
1163
|
case 0:
|
|
1157
1164
|
sourceFilename = source.filename, url = source.url;
|
|
1158
|
-
rootDirname = options.rootDirname, cacheDirname = options.cacheDirname,
|
|
1165
|
+
rootDirname = options.rootDirname, cacheDirname = options.cacheDirname, intermediateFilesStrategy = options.intermediateFilesStrategy, extension = options.extension, isVerbose = options.isVerbose;
|
|
1159
1166
|
hash = SHA256(
|
|
1160
1167
|
// <- TODO: [🥬] Encapsulate sha256 to some private utility function
|
|
1161
1168
|
hexEncoder.parse(sourceFilename || url || 'untitled'))
|
|
@@ -1165,7 +1172,7 @@ function getScraperIntermediateSource(source, options) {
|
|
|
1165
1172
|
pieces = ['intermediate', semanticName, hash].filter(function (piece) { return piece !== ''; });
|
|
1166
1173
|
name = pieces.join('-').split('--').join('-');
|
|
1167
1174
|
// <- TODO: Use MAX_FILENAME_LENGTH
|
|
1168
|
-
TODO_USE(rootDirname); // <- TODO:
|
|
1175
|
+
TODO_USE(rootDirname); // <- TODO: [😡]
|
|
1169
1176
|
cacheFilename = join.apply(void 0, __spreadArray(__spreadArray([process.cwd(),
|
|
1170
1177
|
cacheDirname], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), [name], false)).split('\\')
|
|
1171
1178
|
.join('/') +
|
|
@@ -1185,7 +1192,7 @@ function getScraperIntermediateSource(source, options) {
|
|
|
1185
1192
|
return __generator(this, function (_a) {
|
|
1186
1193
|
switch (_a.label) {
|
|
1187
1194
|
case 0:
|
|
1188
|
-
if (!
|
|
1195
|
+
if (!(intermediateFilesStrategy === 'HIDE_AND_CLEAN')) return [3 /*break*/, 2];
|
|
1189
1196
|
if (isVerbose) {
|
|
1190
1197
|
console.info('legacyDocumentScraper: Clening cache');
|
|
1191
1198
|
}
|
|
@@ -1209,7 +1216,7 @@ function getScraperIntermediateSource(source, options) {
|
|
|
1209
1216
|
/**
|
|
1210
1217
|
* Note: Not using `FileCacheStorage` for two reasons:
|
|
1211
1218
|
* 1) Need to store more than serialized JSONs
|
|
1212
|
-
* 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO:
|
|
1219
|
+
* 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO: [😡]
|
|
1213
1220
|
* TODO: [🐱🐉][🧠] Make some smart crop
|
|
1214
1221
|
* Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
|
|
1215
1222
|
*/
|
|
@@ -1274,7 +1281,6 @@ function pipelineJsonToString(pipelineJson) {
|
|
|
1274
1281
|
pipelineString += '\n\n';
|
|
1275
1282
|
pipelineString += description;
|
|
1276
1283
|
}
|
|
1277
|
-
// TODO:> const commands: Array<Command>
|
|
1278
1284
|
var commands = [];
|
|
1279
1285
|
if (pipelineUrl) {
|
|
1280
1286
|
commands.push("PIPELINE URL ".concat(pipelineUrl));
|
|
@@ -1330,7 +1336,6 @@ function pipelineJsonToString(pipelineJson) {
|
|
|
1330
1336
|
pipelineString += '\n\n';
|
|
1331
1337
|
pipelineString += description_1;
|
|
1332
1338
|
}
|
|
1333
|
-
// TODO:> const commands: Array<Command>
|
|
1334
1339
|
var commands_1 = [];
|
|
1335
1340
|
var contentLanguage = 'text';
|
|
1336
1341
|
if (templateType === 'PROMPT_TEMPLATE') {
|
|
@@ -2215,6 +2220,7 @@ function assertsExecutionSuccessful(executionResult) {
|
|
|
2215
2220
|
}
|
|
2216
2221
|
}
|
|
2217
2222
|
/**
|
|
2223
|
+
* TODO: [🐚] This function should be removed OR changed OR be completely rewritten
|
|
2218
2224
|
* TODO: [🧠] Can this return type be better typed than void
|
|
2219
2225
|
*/
|
|
2220
2226
|
|
|
@@ -2555,8 +2561,7 @@ function countTotalUsage(llmTools) {
|
|
|
2555
2561
|
* Multiple LLM Execution Tools is a proxy server that uses multiple execution tools internally and exposes the executor interface externally.
|
|
2556
2562
|
*
|
|
2557
2563
|
* Note: Internal utility of `joinLlmExecutionTools` but exposed type
|
|
2558
|
-
* @public exported from `@promptbook/
|
|
2559
|
-
* TODO: !!!!!! Export as runtime class not just type
|
|
2564
|
+
* @public exported from `@promptbook/core`
|
|
2560
2565
|
*/
|
|
2561
2566
|
var MultipleLlmExecutionTools = /** @class */ (function () {
|
|
2562
2567
|
/**
|
|
@@ -2876,7 +2881,7 @@ function preparePersona(personaDescription, tools, options) {
|
|
|
2876
2881
|
return __generator(this, function (_d) {
|
|
2877
2882
|
switch (_d.label) {
|
|
2878
2883
|
case 0:
|
|
2879
|
-
_a = options.isVerbose, isVerbose = _a === void 0 ?
|
|
2884
|
+
_a = options.isVerbose, isVerbose = _a === void 0 ? DEFAULT_IS_VERBOSE : _a;
|
|
2880
2885
|
if (tools === undefined || tools.llm === undefined) {
|
|
2881
2886
|
throw new MissingToolsError('LLM tools are required for preparing persona');
|
|
2882
2887
|
}
|
|
@@ -3042,7 +3047,7 @@ var $Register = /** @class */ (function () {
|
|
|
3042
3047
|
this.storage = globalScope[storageName];
|
|
3043
3048
|
}
|
|
3044
3049
|
$Register.prototype.list = function () {
|
|
3045
|
-
// <- TODO: ReadonlyDeep<
|
|
3050
|
+
// <- TODO: ReadonlyDeep<ReadonlyArray<TRegistered>>
|
|
3046
3051
|
return this.storage;
|
|
3047
3052
|
};
|
|
3048
3053
|
$Register.prototype.register = function (registered) {
|
|
@@ -3094,7 +3099,7 @@ var $scrapersRegister = new $Register('scraper_constructors');
|
|
|
3094
3099
|
* TODO: [®] DRY Register logic
|
|
3095
3100
|
*/
|
|
3096
3101
|
|
|
3097
|
-
// TODO: !!!!!! Maybe delete this function
|
|
3102
|
+
// TODO: !!!!!!last - Maybe delete this function
|
|
3098
3103
|
/**
|
|
3099
3104
|
* Creates a message with all registered scrapers
|
|
3100
3105
|
*
|
|
@@ -3202,7 +3207,6 @@ function $registeredScrapersMessage() {
|
|
|
3202
3207
|
* @private within the repository
|
|
3203
3208
|
*/
|
|
3204
3209
|
function sourceContentToName(sourceContent) {
|
|
3205
|
-
// TODO: !!!!!! Better name for source than gibberish hash
|
|
3206
3210
|
var hash = SHA256(hexEncoder.parse(JSON.stringify(sourceContent)))
|
|
3207
3211
|
// <- TODO: [🥬] Encapsulate sha256 to some private utility function
|
|
3208
3212
|
.toString( /* hex */)
|
|
@@ -3230,7 +3234,7 @@ function extensionToMimeType(value) {
|
|
|
3230
3234
|
/**
|
|
3231
3235
|
* @@@
|
|
3232
3236
|
*
|
|
3233
|
-
* @
|
|
3237
|
+
* @public exported from `@promptbook/core`
|
|
3234
3238
|
*/
|
|
3235
3239
|
function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
3236
3240
|
var _a;
|
|
@@ -3243,7 +3247,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3243
3247
|
case 0:
|
|
3244
3248
|
sourceContent = knowledgeSource.sourceContent;
|
|
3245
3249
|
name = knowledgeSource.name;
|
|
3246
|
-
_b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ?
|
|
3250
|
+
_b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ? DEFAULT_IS_VERBOSE : _d;
|
|
3247
3251
|
TODO_USE(isVerbose);
|
|
3248
3252
|
if (!name) {
|
|
3249
3253
|
name = sourceContentToName(sourceContent);
|
|
@@ -3333,7 +3337,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3333
3337
|
content = _a.sent();
|
|
3334
3338
|
return [2 /*return*/, new Blob([
|
|
3335
3339
|
content,
|
|
3336
|
-
// <- TODO: !!!!!!
|
|
3340
|
+
// <- TODO: !!!!!! Test that this is working
|
|
3337
3341
|
], { type: mimeType_1 })];
|
|
3338
3342
|
}
|
|
3339
3343
|
});
|
|
@@ -3396,7 +3400,7 @@ function prepareKnowledgePieces(knowledgeSources, tools, options) {
|
|
|
3396
3400
|
return __generator(this, function (_c) {
|
|
3397
3401
|
switch (_c.label) {
|
|
3398
3402
|
case 0:
|
|
3399
|
-
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ?
|
|
3403
|
+
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, rootDirname = options.rootDirname, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
|
|
3400
3404
|
knowledgePreparedUnflatten = new Array(knowledgeSources.length);
|
|
3401
3405
|
return [4 /*yield*/, forEachAsync(knowledgeSources, { maxParallelCount: maxParallelCount }, function (knowledgeSource, index) { return __awaiter(_this, void 0, void 0, function () {
|
|
3402
3406
|
var partialPieces, sourceHandler, _a, _b, scraper, partialPiecesUnchecked, e_1_1, pieces;
|
|
@@ -3425,7 +3429,8 @@ function prepareKnowledgePieces(knowledgeSources, tools, options) {
|
|
|
3425
3429
|
case 4:
|
|
3426
3430
|
partialPiecesUnchecked = _d.sent();
|
|
3427
3431
|
if (partialPiecesUnchecked !== null) {
|
|
3428
|
-
partialPieces = partialPiecesUnchecked;
|
|
3432
|
+
partialPieces = __spreadArray([], __read(partialPiecesUnchecked), false);
|
|
3433
|
+
// <- TODO: [🪓] Here should be no need for spreading new array, just `partialPieces = partialPiecesUnchecked`
|
|
3429
3434
|
return [3 /*break*/, 6];
|
|
3430
3435
|
}
|
|
3431
3436
|
_d.label = 5;
|
|
@@ -3473,7 +3478,7 @@ TODO: [🧊] This is how it can look in future
|
|
|
3473
3478
|
> /**
|
|
3474
3479
|
> * Unprepared knowledge
|
|
3475
3480
|
> * /
|
|
3476
|
-
> readonly knowledgeSources:
|
|
3481
|
+
> readonly knowledgeSources: ReadonlyArray<KnowledgeSourceJson>;
|
|
3477
3482
|
> };
|
|
3478
3483
|
>
|
|
3479
3484
|
> export async function prepareKnowledgePieces(
|
|
@@ -3531,7 +3536,7 @@ function prepareTemplates(pipeline, tools, options) {
|
|
|
3531
3536
|
return __generator(this, function (_b) {
|
|
3532
3537
|
switch (_b.label) {
|
|
3533
3538
|
case 0:
|
|
3534
|
-
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ?
|
|
3539
|
+
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a;
|
|
3535
3540
|
templates = pipeline.templates, parameters = pipeline.parameters, knowledgePiecesCount = pipeline.knowledgePiecesCount;
|
|
3536
3541
|
// TODO: [main] !!!!! Apply samples to each template (if missing and is for the template defined)
|
|
3537
3542
|
TODO_USE(parameters);
|
|
@@ -3593,7 +3598,7 @@ function preparePipeline(pipeline, tools, options) {
|
|
|
3593
3598
|
if (isPipelinePrepared(pipeline)) {
|
|
3594
3599
|
return [2 /*return*/, pipeline];
|
|
3595
3600
|
}
|
|
3596
|
-
rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ?
|
|
3601
|
+
rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
|
|
3597
3602
|
parameters = pipeline.parameters, templates = pipeline.templates, knowledgeSources = pipeline.knowledgeSources, personas = pipeline.personas;
|
|
3598
3603
|
if (tools === undefined || tools.llm === undefined) {
|
|
3599
3604
|
throw new MissingToolsError('LLM tools are required for preparing the pipeline');
|
|
@@ -3651,7 +3656,9 @@ function preparePipeline(pipeline, tools, options) {
|
|
|
3651
3656
|
// ----- /Templates preparation -----
|
|
3652
3657
|
// Note: Count total usage
|
|
3653
3658
|
currentPreparation.usage = llmToolsWithUsage.getTotalUsage();
|
|
3654
|
-
return [2 /*return*/, $asDeeplyFrozenSerializableJson('Prepared PipelineJson', __assign(__assign({}, clonePipeline(pipeline)), { templates:
|
|
3659
|
+
return [2 /*return*/, $asDeeplyFrozenSerializableJson('Prepared PipelineJson', __assign(__assign({}, clonePipeline(pipeline)), { templates: __spreadArray([], __read(templatesPrepared), false),
|
|
3660
|
+
// <- TODO: [🪓] Here should be no need for spreading new array, just ` templates: templatesPrepared`
|
|
3661
|
+
knowledgeSources: knowledgeSourcesPrepared, knowledgePieces: knowledgePiecesPrepared, personas: preparedPersonas, preparations: __spreadArray([], __read(preparations), false) }))];
|
|
3655
3662
|
}
|
|
3656
3663
|
});
|
|
3657
3664
|
});
|
|
@@ -4622,12 +4629,11 @@ function checkExpectations(expectations, value) {
|
|
|
4622
4629
|
*/
|
|
4623
4630
|
function executeAttempts(options) {
|
|
4624
4631
|
return __awaiter(this, void 0, void 0, function () {
|
|
4625
|
-
var jokerParameterNames, priority, maxAttempts, preparedContent, parameters, template, preparedPipeline, tools,
|
|
4632
|
+
var jokerParameterNames, priority, maxAttempts, preparedContent, parameters, template, preparedPipeline, tools, $executionReport, pipelineIdentification, maxExecutionAttempts, $ongoingTemplateResult, _llms, llmTools, _loop_1, attempt, state_1;
|
|
4626
4633
|
return __generator(this, function (_a) {
|
|
4627
4634
|
switch (_a.label) {
|
|
4628
4635
|
case 0:
|
|
4629
|
-
jokerParameterNames = options.jokerParameterNames, priority = options.priority, maxAttempts = options.maxAttempts, preparedContent = options.preparedContent, parameters = options.parameters, template = options.template, preparedPipeline = options.preparedPipeline, tools = options.tools,
|
|
4630
|
-
maxExecutionAttempts = settings.maxExecutionAttempts;
|
|
4636
|
+
jokerParameterNames = options.jokerParameterNames, priority = options.priority, maxAttempts = options.maxAttempts, preparedContent = options.preparedContent, parameters = options.parameters, template = options.template, preparedPipeline = options.preparedPipeline, tools = options.tools, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification, maxExecutionAttempts = options.maxExecutionAttempts;
|
|
4631
4637
|
$ongoingTemplateResult = {
|
|
4632
4638
|
$result: null,
|
|
4633
4639
|
$resultString: null,
|
|
@@ -4993,12 +4999,12 @@ function executeAttempts(options) {
|
|
|
4993
4999
|
*/
|
|
4994
5000
|
function executeFormatSubvalues(options) {
|
|
4995
5001
|
return __awaiter(this, void 0, void 0, function () {
|
|
4996
|
-
var template, jokerParameterNames, parameters, priority,
|
|
5002
|
+
var template, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification, parameterValue, formatDefinition, subvalueDefinition, formatSettings, resultString;
|
|
4997
5003
|
var _this = this;
|
|
4998
5004
|
return __generator(this, function (_a) {
|
|
4999
5005
|
switch (_a.label) {
|
|
5000
5006
|
case 0:
|
|
5001
|
-
template = options.template, jokerParameterNames = options.jokerParameterNames, parameters = options.parameters, priority = options.priority,
|
|
5007
|
+
template = options.template, jokerParameterNames = options.jokerParameterNames, parameters = options.parameters, priority = options.priority, csvSettings = options.csvSettings, pipelineIdentification = options.pipelineIdentification;
|
|
5002
5008
|
if (template.foreach === undefined) {
|
|
5003
5009
|
return [2 /*return*/, /* not await */ executeAttempts(options)];
|
|
5004
5010
|
}
|
|
@@ -5028,7 +5034,7 @@ function executeFormatSubvalues(options) {
|
|
|
5028
5034
|
.join('\n')), "\n\n [\u26F7] This should never happen because subformat name should be validated during parsing\n\n ").concat(block(pipelineIdentification), "\n "); }));
|
|
5029
5035
|
}
|
|
5030
5036
|
if (formatDefinition.formatName === 'CSV') {
|
|
5031
|
-
formatSettings =
|
|
5037
|
+
formatSettings = csvSettings;
|
|
5032
5038
|
// <- TODO: [🤹♂️] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
|
|
5033
5039
|
}
|
|
5034
5040
|
return [4 /*yield*/, subvalueDefinition.mapValues(parameterValue, template.foreach.outputSubparameterName, formatSettings, function (subparameters, index) { return __awaiter(_this, void 0, void 0, function () {
|
|
@@ -5181,13 +5187,12 @@ function getReservedParametersForTemplate(options) {
|
|
|
5181
5187
|
*/
|
|
5182
5188
|
function executeTemplate(options) {
|
|
5183
5189
|
return __awaiter(this, void 0, void 0, function () {
|
|
5184
|
-
var currentTemplate, preparedPipeline, parametersToPass, tools, onProgress,
|
|
5185
|
-
var e_1,
|
|
5186
|
-
return __generator(this, function (
|
|
5187
|
-
switch (
|
|
5190
|
+
var currentTemplate, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, _a, maxExecutionAttempts, name, title, priority, usedParameterNames, dependentParameterNames, definedParameters, _b, _c, _d, definedParameterNames, parameters, _loop_1, _e, _f, parameterName, maxAttempts, jokerParameterNames, preparedContent, resultString;
|
|
5191
|
+
var e_1, _g, _h;
|
|
5192
|
+
return __generator(this, function (_j) {
|
|
5193
|
+
switch (_j.label) {
|
|
5188
5194
|
case 0:
|
|
5189
|
-
currentTemplate = options.currentTemplate, preparedPipeline = options.preparedPipeline, parametersToPass = options.parametersToPass, tools = options.tools, onProgress = options.onProgress,
|
|
5190
|
-
maxExecutionAttempts = settings.maxExecutionAttempts;
|
|
5195
|
+
currentTemplate = options.currentTemplate, preparedPipeline = options.preparedPipeline, parametersToPass = options.parametersToPass, tools = options.tools, onProgress = options.onProgress, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification, _a = options.maxExecutionAttempts, maxExecutionAttempts = _a === void 0 ? DEFAULT_MAX_EXECUTION_ATTEMPTS : _a;
|
|
5191
5196
|
name = "pipeline-executor-frame-".concat(currentTemplate.name);
|
|
5192
5197
|
title = currentTemplate.title;
|
|
5193
5198
|
priority = preparedPipeline.templates.length - preparedPipeline.templates.indexOf(currentTemplate);
|
|
@@ -5202,7 +5207,7 @@ function executeTemplate(options) {
|
|
|
5202
5207
|
// <- [🍸]
|
|
5203
5208
|
})];
|
|
5204
5209
|
case 1:
|
|
5205
|
-
|
|
5210
|
+
_j.sent();
|
|
5206
5211
|
usedParameterNames = extractParameterNamesFromTemplate(currentTemplate);
|
|
5207
5212
|
dependentParameterNames = new Set(currentTemplate.dependentParameterNames);
|
|
5208
5213
|
// TODO: [👩🏾🤝👩🏻] Use here `mapAvailableToExpectedParameters`
|
|
@@ -5213,15 +5218,15 @@ function executeTemplate(options) {
|
|
|
5213
5218
|
.map(function (name) { return "{".concat(name, "}"); })
|
|
5214
5219
|
.join(', '), "\n\n ").concat(block(pipelineIdentification), "\n\n "); }));
|
|
5215
5220
|
}
|
|
5216
|
-
|
|
5217
|
-
|
|
5221
|
+
_c = (_b = Object).freeze;
|
|
5222
|
+
_d = [{}];
|
|
5218
5223
|
return [4 /*yield*/, getReservedParametersForTemplate({
|
|
5219
5224
|
preparedPipeline: preparedPipeline,
|
|
5220
5225
|
template: currentTemplate,
|
|
5221
5226
|
pipelineIdentification: pipelineIdentification,
|
|
5222
5227
|
})];
|
|
5223
5228
|
case 2:
|
|
5224
|
-
definedParameters =
|
|
5229
|
+
definedParameters = _c.apply(_b, [__assign.apply(void 0, [__assign.apply(void 0, _d.concat([(_j.sent())])), parametersToPass])]);
|
|
5225
5230
|
definedParameterNames = new Set(Object.keys(definedParameters));
|
|
5226
5231
|
parameters = {};
|
|
5227
5232
|
_loop_1 = function (parameterName) {
|
|
@@ -5241,15 +5246,15 @@ function executeTemplate(options) {
|
|
|
5241
5246
|
try {
|
|
5242
5247
|
// Note: [2] Check that all used parameters are defined and removing unused parameters for this template
|
|
5243
5248
|
// TODO: [👩🏾🤝👩🏻] Use here `mapAvailableToExpectedParameters`
|
|
5244
|
-
for (
|
|
5245
|
-
parameterName =
|
|
5249
|
+
for (_e = __values(Array.from(union(definedParameterNames, usedParameterNames, dependentParameterNames))), _f = _e.next(); !_f.done; _f = _e.next()) {
|
|
5250
|
+
parameterName = _f.value;
|
|
5246
5251
|
_loop_1(parameterName);
|
|
5247
5252
|
}
|
|
5248
5253
|
}
|
|
5249
5254
|
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
5250
5255
|
finally {
|
|
5251
5256
|
try {
|
|
5252
|
-
if (
|
|
5257
|
+
if (_f && !_f.done && (_g = _e.return)) _g.call(_e);
|
|
5253
5258
|
}
|
|
5254
5259
|
finally { if (e_1) throw e_1.error; }
|
|
5255
5260
|
}
|
|
@@ -5269,12 +5274,11 @@ function executeTemplate(options) {
|
|
|
5269
5274
|
template: currentTemplate,
|
|
5270
5275
|
preparedPipeline: preparedPipeline,
|
|
5271
5276
|
tools: tools,
|
|
5272
|
-
settings: settings,
|
|
5273
5277
|
$executionReport: $executionReport,
|
|
5274
5278
|
pipelineIdentification: pipelineIdentification,
|
|
5275
5279
|
})];
|
|
5276
5280
|
case 3:
|
|
5277
|
-
resultString =
|
|
5281
|
+
resultString = _j.sent();
|
|
5278
5282
|
return [4 /*yield*/, onProgress({
|
|
5279
5283
|
name: name,
|
|
5280
5284
|
title: title,
|
|
@@ -5286,12 +5290,12 @@ function executeTemplate(options) {
|
|
|
5286
5290
|
// <- [🍸]
|
|
5287
5291
|
})];
|
|
5288
5292
|
case 4:
|
|
5289
|
-
|
|
5290
|
-
return [2 /*return*/, Object.freeze((
|
|
5291
|
-
|
|
5293
|
+
_j.sent();
|
|
5294
|
+
return [2 /*return*/, Object.freeze((_h = {},
|
|
5295
|
+
_h[currentTemplate.resultingParameterName] =
|
|
5292
5296
|
// <- Note: [👩👩👧] No need to detect parameter collision here because pipeline checks logic consistency during construction
|
|
5293
5297
|
resultString,
|
|
5294
|
-
|
|
5298
|
+
_h))];
|
|
5295
5299
|
}
|
|
5296
5300
|
});
|
|
5297
5301
|
});
|
|
@@ -5350,13 +5354,12 @@ function filterJustOutputParameters(options) {
|
|
|
5350
5354
|
*/
|
|
5351
5355
|
function executePipeline(options) {
|
|
5352
5356
|
return __awaiter(this, void 0, void 0, function () {
|
|
5353
|
-
var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification,
|
|
5357
|
+
var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification, maxParallelCount, rootDirname, _a, isVerbose, preparedPipeline, errors, warnings, executionReport, isReturned, _b, _c, parameter, e_1_1, _loop_1, _d, _e, parameterName, state_1, e_2_1, parametersToPass, resovedParameterNames_1, unresovedTemplates_1, resolving_1, loopLimit, _loop_2, error_1, usage_1, outputParameters_1, usage, outputParameters;
|
|
5354
5358
|
var e_1, _f, e_2, _g;
|
|
5355
5359
|
return __generator(this, function (_h) {
|
|
5356
5360
|
switch (_h.label) {
|
|
5357
5361
|
case 0:
|
|
5358
|
-
inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification,
|
|
5359
|
-
maxParallelCount = settings.maxParallelCount, rootDirname = settings.rootDirname, _a = settings.isVerbose, isVerbose = _a === void 0 ? IS_VERBOSE : _a;
|
|
5362
|
+
inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification, maxParallelCount = options.maxParallelCount, rootDirname = options.rootDirname, _a = options.isVerbose, isVerbose = _a === void 0 ? DEFAULT_IS_VERBOSE : _a;
|
|
5360
5363
|
preparedPipeline = options.preparedPipeline;
|
|
5361
5364
|
if (!(preparedPipeline === undefined)) return [3 /*break*/, 2];
|
|
5362
5365
|
return [4 /*yield*/, preparePipeline(pipeline, tools, {
|
|
@@ -5541,12 +5544,7 @@ function executePipeline(options) {
|
|
|
5541
5544
|
return [3 /*break*/, 4];
|
|
5542
5545
|
case 3:
|
|
5543
5546
|
unresovedTemplates_1 = unresovedTemplates_1.filter(function (template) { return template !== currentTemplate; });
|
|
5544
|
-
work_1 = executeTemplate({
|
|
5545
|
-
currentTemplate: currentTemplate,
|
|
5546
|
-
preparedPipeline: preparedPipeline,
|
|
5547
|
-
parametersToPass: parametersToPass,
|
|
5548
|
-
tools: tools,
|
|
5549
|
-
onProgress: function (progress) {
|
|
5547
|
+
work_1 = executeTemplate(__assign(__assign({}, options), { currentTemplate: currentTemplate, preparedPipeline: preparedPipeline, parametersToPass: parametersToPass, tools: tools, onProgress: function (progress) {
|
|
5550
5548
|
if (isReturned) {
|
|
5551
5549
|
throw new UnexpectedError(spaceTrim(function (block) { return "\n Can not call `onProgress` after pipeline execution is finished\n\n ".concat(block(pipelineIdentification), "\n\n ").concat(block(JSON.stringify(progress, null, 4)
|
|
5552
5550
|
.split('\n')
|
|
@@ -5556,11 +5554,7 @@ function executePipeline(options) {
|
|
|
5556
5554
|
if (onProgress) {
|
|
5557
5555
|
onProgress(progress);
|
|
5558
5556
|
}
|
|
5559
|
-
},
|
|
5560
|
-
settings: settings,
|
|
5561
|
-
$executionReport: executionReport,
|
|
5562
|
-
pipelineIdentification: spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n Template name: ").concat(currentTemplate.name, "\n Template title: ").concat(currentTemplate.title, "\n "); }),
|
|
5563
|
-
})
|
|
5557
|
+
}, $executionReport: executionReport, pipelineIdentification: spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n Template name: ").concat(currentTemplate.name, "\n Template title: ").concat(currentTemplate.title, "\n "); }) }))
|
|
5564
5558
|
.then(function (newParametersToPass) {
|
|
5565
5559
|
parametersToPass = __assign(__assign({}, newParametersToPass), parametersToPass);
|
|
5566
5560
|
resovedParameterNames_1 = __spreadArray(__spreadArray([], __read(resovedParameterNames_1), false), [currentTemplate.resultingParameterName], false);
|
|
@@ -5664,8 +5658,7 @@ function executePipeline(options) {
|
|
|
5664
5658
|
*/
|
|
5665
5659
|
function createPipelineExecutor(options) {
|
|
5666
5660
|
var _this = this;
|
|
5667
|
-
var pipeline = options.pipeline, tools = options.tools, _a = options.
|
|
5668
|
-
var _b = settings.maxExecutionAttempts, maxExecutionAttempts = _b === void 0 ? MAX_EXECUTION_ATTEMPTS : _b, _c = settings.maxParallelCount, maxParallelCount = _c === void 0 ? MAX_PARALLEL_COUNT : _c, _d = settings.csvSettings, csvSettings = _d === void 0 ? DEFAULT_CSV_SETTINGS : _d, _e = settings.isVerbose, isVerbose = _e === void 0 ? IS_VERBOSE : _e, _f = settings.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _f === void 0 ? false : _f, _g = settings.rootDirname, rootDirname = _g === void 0 ? null : _g;
|
|
5661
|
+
var pipeline = options.pipeline, tools = options.tools, _a = options.maxExecutionAttempts, maxExecutionAttempts = _a === void 0 ? DEFAULT_MAX_EXECUTION_ATTEMPTS : _a, _b = options.maxParallelCount, maxParallelCount = _b === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _b, _c = options.csvSettings, csvSettings = _c === void 0 ? DEFAULT_CSV_SETTINGS : _c, _d = options.isVerbose, isVerbose = _d === void 0 ? DEFAULT_IS_VERBOSE : _d, _e = options.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _e === void 0 ? false : _e, _f = options.rootDirname, rootDirname = _f === void 0 ? null : _f;
|
|
5669
5662
|
validatePipeline(pipeline);
|
|
5670
5663
|
var pipelineIdentification = (function () {
|
|
5671
5664
|
// Note: This is a 😐 implementation of [🚞]
|
|
@@ -5699,14 +5692,12 @@ function createPipelineExecutor(options) {
|
|
|
5699
5692
|
tools: tools,
|
|
5700
5693
|
onProgress: onProgress,
|
|
5701
5694
|
pipelineIdentification: spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n ").concat(runCount === 1 ? '' : "Run #".concat(runCount), "\n "); }),
|
|
5702
|
-
|
|
5703
|
-
|
|
5704
|
-
|
|
5705
|
-
|
|
5706
|
-
|
|
5707
|
-
|
|
5708
|
-
rootDirname: rootDirname,
|
|
5709
|
-
},
|
|
5695
|
+
maxExecutionAttempts: maxExecutionAttempts,
|
|
5696
|
+
maxParallelCount: maxParallelCount,
|
|
5697
|
+
csvSettings: csvSettings,
|
|
5698
|
+
isVerbose: isVerbose,
|
|
5699
|
+
isNotPreparedWarningSupressed: isNotPreparedWarningSupressed,
|
|
5700
|
+
rootDirname: rootDirname,
|
|
5710
5701
|
})];
|
|
5711
5702
|
});
|
|
5712
5703
|
}); };
|
|
@@ -5728,7 +5719,7 @@ var markdownScraperMetadata = $deepFreeze({
|
|
|
5728
5719
|
mimeTypes: ['text/markdown', 'text/plain'],
|
|
5729
5720
|
documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
|
|
5730
5721
|
isAvilableInBrowser: true,
|
|
5731
|
-
requiredExecutables: [
|
|
5722
|
+
requiredExecutables: [],
|
|
5732
5723
|
}); /* <- TODO: [🤛] */
|
|
5733
5724
|
/**
|
|
5734
5725
|
* Registration of known scraper metadata
|
|
@@ -5772,7 +5763,7 @@ var MarkdownScraper = /** @class */ (function () {
|
|
|
5772
5763
|
return __generator(this, function (_k) {
|
|
5773
5764
|
switch (_k.label) {
|
|
5774
5765
|
case 0:
|
|
5775
|
-
_a = this.options, _b = _a.maxParallelCount, maxParallelCount = _b === void 0 ?
|
|
5766
|
+
_a = this.options, _b = _a.maxParallelCount, maxParallelCount = _b === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _b, _c = _a.isVerbose, isVerbose = _c === void 0 ? DEFAULT_IS_VERBOSE : _c;
|
|
5776
5767
|
llm = this.tools.llm;
|
|
5777
5768
|
if (llm === undefined) {
|
|
5778
5769
|
throw new MissingToolsError('LLM tools are required for scraping external files');
|
|
@@ -5871,7 +5862,8 @@ var MarkdownScraper = /** @class */ (function () {
|
|
|
5871
5862
|
embeddingResult = _c.sent();
|
|
5872
5863
|
index.push({
|
|
5873
5864
|
modelName: embeddingResult.modelName,
|
|
5874
|
-
position: embeddingResult.content,
|
|
5865
|
+
position: __spreadArray([], __read(embeddingResult.content), false),
|
|
5866
|
+
// <- TODO: [🪓] Here should be no need for spreading new array, just `position: embeddingResult.content`
|
|
5875
5867
|
});
|
|
5876
5868
|
_c.label = 6;
|
|
5877
5869
|
case 6: return [3 /*break*/, 8];
|
|
@@ -5922,7 +5914,7 @@ var documentScraperMetadata = $deepFreeze({
|
|
|
5922
5914
|
mimeTypes: ['application/vnd.openxmlformats-officedocument.wordprocessingml.document'],
|
|
5923
5915
|
documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
|
|
5924
5916
|
isAvilableInBrowser: false,
|
|
5925
|
-
requiredExecutables: ['
|
|
5917
|
+
requiredExecutables: ['Pandoc'],
|
|
5926
5918
|
}); /* <- TODO: [🤛] */
|
|
5927
5919
|
/**
|
|
5928
5920
|
* Registration of known scraper metadata
|
|
@@ -5962,12 +5954,13 @@ var DocumentScraper = /** @class */ (function () {
|
|
|
5962
5954
|
* Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
|
|
5963
5955
|
*/
|
|
5964
5956
|
DocumentScraper.prototype.$convert = function (source) {
|
|
5957
|
+
var _a;
|
|
5965
5958
|
return __awaiter(this, void 0, void 0, function () {
|
|
5966
|
-
var
|
|
5959
|
+
var _b, _c, rootDirname, _d, cacheDirname, _e, intermediateFilesStrategy, _f, isVerbose, extension, cacheFilehandler, command_1;
|
|
5967
5960
|
return __generator(this, function (_g) {
|
|
5968
5961
|
switch (_g.label) {
|
|
5969
5962
|
case 0:
|
|
5970
|
-
|
|
5963
|
+
_b = this.options, _c = _b.rootDirname, rootDirname = _c === void 0 ? process.cwd() : _c, _d = _b.cacheDirname, cacheDirname = _d === void 0 ? DEFAULT_SCRAPE_CACHE_DIRNAME : _d, _e = _b.intermediateFilesStrategy, intermediateFilesStrategy = _e === void 0 ? DEFAULT_INTERMEDIATE_FILES_STRATEGY : _e, _f = _b.isVerbose, isVerbose = _f === void 0 ? DEFAULT_IS_VERBOSE : _f;
|
|
5971
5964
|
if (!$isRunningInNode()) {
|
|
5972
5965
|
throw new KnowledgeScrapeError('Scraping .docx files is only supported in Node environment');
|
|
5973
5966
|
}
|
|
@@ -5975,7 +5968,7 @@ var DocumentScraper = /** @class */ (function () {
|
|
|
5975
5968
|
throw new EnvironmentMismatchError('Can not scrape documents without filesystem tools');
|
|
5976
5969
|
// <- TODO: [🧠] What is the best error type here`
|
|
5977
5970
|
}
|
|
5978
|
-
if (
|
|
5971
|
+
if (((_a = this.tools.executables) === null || _a === void 0 ? void 0 : _a.pandocPath) === undefined) {
|
|
5979
5972
|
throw new MissingToolsError('Pandoc is required for scraping .docx files');
|
|
5980
5973
|
}
|
|
5981
5974
|
if (source.filename === null) {
|
|
@@ -5986,7 +5979,7 @@ var DocumentScraper = /** @class */ (function () {
|
|
|
5986
5979
|
return [4 /*yield*/, getScraperIntermediateSource(source, {
|
|
5987
5980
|
rootDirname: rootDirname,
|
|
5988
5981
|
cacheDirname: cacheDirname,
|
|
5989
|
-
|
|
5982
|
+
intermediateFilesStrategy: intermediateFilesStrategy,
|
|
5990
5983
|
extension: 'md',
|
|
5991
5984
|
isVerbose: isVerbose,
|
|
5992
5985
|
})];
|
|
@@ -5995,11 +5988,9 @@ var DocumentScraper = /** @class */ (function () {
|
|
|
5995
5988
|
return [4 /*yield*/, isFileExisting(cacheFilehandler.filename, this.tools.fs)];
|
|
5996
5989
|
case 2:
|
|
5997
5990
|
if (!!(_g.sent())) return [3 /*break*/, 5];
|
|
5998
|
-
command_1 = "\"".concat(
|
|
5999
|
-
// TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook
|
|
5991
|
+
command_1 = "\"".concat(this.tools.executables.pandocPath, "\" -f ").concat(extension, " -t markdown \"").concat(source.filename, "\" -o \"").concat(cacheFilehandler.filename, "\"");
|
|
6000
5992
|
return [4 /*yield*/, $execCommand(command_1)];
|
|
6001
5993
|
case 3:
|
|
6002
|
-
// TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook
|
|
6003
5994
|
_g.sent();
|
|
6004
5995
|
return [4 /*yield*/, isFileExisting(cacheFilehandler.filename, this.tools.fs)];
|
|
6005
5996
|
case 4:
|
|
@@ -6078,7 +6069,11 @@ var legacyDocumentScraperMetadata = $deepFreeze({
|
|
|
6078
6069
|
mimeTypes: ['application/msword', 'text/rtf'],
|
|
6079
6070
|
documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
|
|
6080
6071
|
isAvilableInBrowser: false,
|
|
6081
|
-
requiredExecutables: [
|
|
6072
|
+
requiredExecutables: [
|
|
6073
|
+
'Pandoc',
|
|
6074
|
+
'LibreOffice',
|
|
6075
|
+
// <- TODO: [🧠] Should be 'LibreOffice' here, its dependency of dependency
|
|
6076
|
+
],
|
|
6082
6077
|
}); /* <- TODO: [🤛] */
|
|
6083
6078
|
/**
|
|
6084
6079
|
* Registration of known scraper metadata
|
|
@@ -6091,7 +6086,7 @@ var legacyDocumentScraperMetadata = $deepFreeze({
|
|
|
6091
6086
|
$scrapersMetadataRegister.register(legacyDocumentScraperMetadata);
|
|
6092
6087
|
|
|
6093
6088
|
/**
|
|
6094
|
-
* Scraper for .
|
|
6089
|
+
* Scraper for old document files (like .doc and .rtf)
|
|
6095
6090
|
*
|
|
6096
6091
|
* @see `documentationUrl` for more details
|
|
6097
6092
|
* @public exported from `@promptbook/legacy-documents`
|
|
@@ -6118,12 +6113,13 @@ var LegacyDocumentScraper = /** @class */ (function () {
|
|
|
6118
6113
|
* Note: `$` is used to indicate that this function is not a pure function - it leaves files on the disk and you are responsible for cleaning them by calling `destroy` method of returned object
|
|
6119
6114
|
*/
|
|
6120
6115
|
LegacyDocumentScraper.prototype.$convert = function (source) {
|
|
6116
|
+
var _a;
|
|
6121
6117
|
return __awaiter(this, void 0, void 0, function () {
|
|
6122
|
-
var
|
|
6118
|
+
var _b, _c, rootDirname, _d, cacheDirname, _e, intermediateFilesStrategy, _f, isVerbose, extension, cacheFilehandler, documentSourceOutdirPathForLibreOffice_1, command_1, files_1, file;
|
|
6123
6119
|
return __generator(this, function (_g) {
|
|
6124
6120
|
switch (_g.label) {
|
|
6125
6121
|
case 0:
|
|
6126
|
-
|
|
6122
|
+
_b = this.options, _c = _b.rootDirname, rootDirname = _c === void 0 ? process.cwd() : _c, _d = _b.cacheDirname, cacheDirname = _d === void 0 ? DEFAULT_SCRAPE_CACHE_DIRNAME : _d, _e = _b.intermediateFilesStrategy, intermediateFilesStrategy = _e === void 0 ? DEFAULT_INTERMEDIATE_FILES_STRATEGY : _e, _f = _b.isVerbose, isVerbose = _f === void 0 ? DEFAULT_IS_VERBOSE : _f;
|
|
6127
6123
|
if (!$isRunningInNode()) {
|
|
6128
6124
|
throw new KnowledgeScrapeError('Scraping .doc files is only supported in Node environment');
|
|
6129
6125
|
}
|
|
@@ -6131,7 +6127,7 @@ var LegacyDocumentScraper = /** @class */ (function () {
|
|
|
6131
6127
|
throw new EnvironmentMismatchError('Can not scrape (legacy) documents without filesystem tools');
|
|
6132
6128
|
// <- TODO: [🧠] What is the best error type here`
|
|
6133
6129
|
}
|
|
6134
|
-
if (
|
|
6130
|
+
if (((_a = this.tools.executables) === null || _a === void 0 ? void 0 : _a.libreOfficePath) === undefined) {
|
|
6135
6131
|
throw new MissingToolsError('LibreOffice is required for scraping .doc and .rtf files');
|
|
6136
6132
|
}
|
|
6137
6133
|
if (source.filename === null) {
|
|
@@ -6142,7 +6138,7 @@ var LegacyDocumentScraper = /** @class */ (function () {
|
|
|
6142
6138
|
return [4 /*yield*/, getScraperIntermediateSource(source, {
|
|
6143
6139
|
rootDirname: rootDirname,
|
|
6144
6140
|
cacheDirname: cacheDirname,
|
|
6145
|
-
|
|
6141
|
+
intermediateFilesStrategy: intermediateFilesStrategy,
|
|
6146
6142
|
extension: 'docx',
|
|
6147
6143
|
isVerbose: isVerbose,
|
|
6148
6144
|
})];
|
|
@@ -6157,11 +6153,9 @@ var LegacyDocumentScraper = /** @class */ (function () {
|
|
|
6157
6153
|
documentSourceOutdirPathForLibreOffice_1 = join(dirname(cacheFilehandler.filename), 'libreoffice')
|
|
6158
6154
|
.split('\\')
|
|
6159
6155
|
.join('/');
|
|
6160
|
-
command_1 = "\"".concat(
|
|
6161
|
-
// TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook - this should trigger build polution error
|
|
6156
|
+
command_1 = "\"".concat(this.tools.executables.libreOfficePath, "\" --headless --convert-to docx \"").concat(source.filename, "\" --outdir \"").concat(documentSourceOutdirPathForLibreOffice_1, "\"");
|
|
6162
6157
|
return [4 /*yield*/, $execCommand(command_1)];
|
|
6163
6158
|
case 3:
|
|
6164
|
-
// TODO: !!!!!! [🕊] Make execCommand standard (?node-)util of the promptbook - this should trigger build polution error
|
|
6165
6159
|
_g.sent();
|
|
6166
6160
|
return [4 /*yield*/, readdir(documentSourceOutdirPathForLibreOffice_1)];
|
|
6167
6161
|
case 4:
|