@promptbook/website-crawler 0.71.0-17 → 0.71.0-19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -4
- package/esm/index.es.js +99 -115
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +24 -18
- package/esm/typings/src/_packages/node.index.d.ts +2 -0
- package/esm/typings/src/_packages/types.index.d.ts +10 -10
- package/esm/typings/src/_packages/utils.index.d.ts +2 -2
- package/esm/typings/src/collection/PipelineCollection.d.ts +1 -1
- package/esm/typings/src/collection/SimplePipelineCollection.d.ts +2 -2
- package/esm/typings/src/collection/collectionToJson.d.ts +1 -1
- package/esm/typings/src/collection/constructors/createCollectionFromJson.d.ts +1 -1
- package/esm/typings/src/collection/constructors/createCollectionFromPromise.d.ts +1 -1
- package/esm/typings/src/commands/_common/types/CommandParser.d.ts +5 -5
- package/esm/typings/src/config.d.ts +21 -14
- package/esm/typings/src/execution/EmbeddingVector.d.ts +1 -1
- package/esm/typings/src/execution/Executables.d.ts +18 -0
- package/esm/typings/src/execution/ExecutionTools.d.ts +9 -3
- package/esm/typings/src/execution/LlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/execution/PipelineExecutorResult.d.ts +2 -2
- package/esm/typings/src/execution/assertsExecutionSuccessful.d.ts +1 -0
- package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorOptions.d.ts +29 -6
- package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +2 -11
- package/esm/typings/src/execution/createPipelineExecutor/20-executeTemplate.d.ts +4 -13
- package/esm/typings/src/execution/createPipelineExecutor/40-executeAttempts.d.ts +9 -14
- package/esm/typings/src/execution/translation/automatic-translate/automatic-translators/LindatAutomaticTranslator.d.ts +11 -3
- package/esm/typings/src/execution/utils/addUsage.d.ts +1 -1
- package/esm/typings/src/execution/utils/forEachAsync.d.ts +1 -1
- package/esm/typings/src/formats/_common/FormatDefinition.d.ts +2 -2
- package/esm/typings/src/formats/_common/FormatSubvalueDefinition.d.ts +2 -2
- package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsForCli.d.ts +2 -2
- package/esm/typings/src/llm-providers/_common/register/LlmToolsConfiguration.d.ts +1 -1
- package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +4 -1
- package/esm/typings/src/llm-providers/anthropic-claude/AnthropicClaudeExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/anthropic-claude/anthropic-claude-models.d.ts +1 -1
- package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/mocked/$fakeTextToExpectations.d.ts +1 -1
- package/esm/typings/src/llm-providers/mocked/MockedEchoLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/mocked/MockedFackedLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/multiple/MultipleLlmExecutionTools.d.ts +4 -5
- package/esm/typings/src/llm-providers/multiple/joinLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/openai/OpenAiExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/openai/openai-models.d.ts +1 -1
- package/esm/typings/src/llm-providers/remote/RemoteLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/remote/interfaces/PromptbookServer_ListModels_Response.d.ts +4 -4
- package/esm/typings/src/llm-providers/remote/interfaces/PromptbookServer_Prompt_Response.d.ts +3 -3
- package/esm/typings/src/prepare/PrepareAndScrapeOptions.d.ts +4 -23
- package/esm/typings/src/prepare/prepareTemplates.d.ts +1 -1
- package/esm/typings/src/scrapers/_common/Scraper.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/prepareKnowledgePieces.d.ts +1 -1
- package/esm/typings/src/scrapers/_common/register/$provideExecutablesForNode.d.ts +12 -0
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/utils/makeKnowledgeSourceHandler.d.ts +1 -1
- package/esm/typings/src/scrapers/document/DocumentScraper.d.ts +2 -2
- package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/document/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/document-legacy/LegacyDocumentScraper.d.ts +3 -3
- package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/document-legacy/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/markdown/MarkdownScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/markdown/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/pdf/PdfScraper.d.ts +2 -2
- package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/pdf/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/website/WebsiteScraper.d.ts +6 -3
- package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/website/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/website/utils/createShowdownConverter.d.ts +7 -0
- package/esm/typings/src/scripting/javascript/utils/preserve.d.ts +1 -1
- package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +1 -1
- package/esm/typings/src/types/Arrayable.d.ts +1 -1
- package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +7 -0
- package/esm/typings/src/types/PipelineJson/KnowledgePieceJson.d.ts +4 -4
- package/esm/typings/src/types/PipelineJson/KnowledgeSourceJson.d.ts +1 -1
- package/esm/typings/src/types/PipelineJson/PersonaJson.d.ts +1 -1
- package/esm/typings/src/types/PipelineJson/TemplateJsonCommon.d.ts +2 -2
- package/esm/typings/src/types/Prompt.d.ts +1 -1
- package/esm/typings/src/types/execution-report/ExecutionReportJson.d.ts +1 -1
- package/esm/typings/src/utils/$Register.d.ts +1 -1
- package/esm/typings/src/utils/FromtoItems.d.ts +1 -1
- package/esm/typings/src/utils/arrayableToArray.d.ts +1 -1
- package/esm/typings/src/utils/emojis.d.ts +1 -1
- package/esm/typings/src/utils/execCommand/$execCommand.d.ts +2 -2
- package/esm/typings/src/utils/execCommand/{IExecCommandOptions.d.ts → ExecCommandOptions.d.ts} +2 -6
- package/esm/typings/src/utils/execCommand/execCommandNormalizeOptions.d.ts +3 -3
- package/esm/typings/src/utils/expectation-counters/countSentences.d.ts +1 -1
- package/esm/typings/src/utils/markdown/extractAllBlocksFromMarkdown.d.ts +1 -1
- package/esm/typings/src/utils/markdown/splitMarkdownIntoSections.d.ts +1 -1
- package/esm/typings/src/utils/normalization/IKeywords.d.ts +2 -2
- package/esm/typings/src/utils/normalization/parseKeywords.d.ts +2 -2
- package/esm/typings/src/utils/normalization/parseKeywordsFromString.d.ts +2 -2
- package/esm/typings/src/utils/normalization/searchKeywords.d.ts +2 -2
- package/esm/typings/src/utils/organization/TODO_USE.d.ts +1 -1
- package/esm/typings/src/utils/organization/keepUnused.d.ts +1 -1
- package/esm/typings/src/utils/random/$randomSeed.d.ts +1 -1
- package/esm/typings/src/utils/sets/intersection.d.ts +1 -1
- package/esm/typings/src/utils/sets/union.d.ts +1 -1
- package/esm/typings/src/utils/unwrapResult.d.ts +4 -4
- package/package.json +4 -3
- package/umd/index.umd.js +102 -118
- package/umd/index.umd.js.map +1 -1
- package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorSettings.d.ts +0 -29
- package/esm/typings/src/scrapers/website/utils/markdownConverter.d.ts +0 -12
- /package/esm/typings/src/scrapers/website/utils/{markdownConverter.test.d.ts → createShowdownConverter.test.d.ts} +0 -0
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
#  Promptbook
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
Build responsible, controlled and transparent applications on top of LLM models!
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
|
|
@@ -21,11 +21,9 @@ Supercharge your use of large language models
|
|
|
21
21
|
- ✨ **Support of [OpenAI o1 model](https://openai.com/o1/)**
|
|
22
22
|
|
|
23
23
|
<blockquote style="color: #ff8811">
|
|
24
|
-
<b>⚠ Warning:</b> This is a pre-release version of the library. It is not yet ready for production use. Please look at <a href="https://www.npmjs.com/package/@promptbook/core?activeTab=versions">latest stable release</a>.
|
|
24
|
+
<b>⚠ Warning:</b> This is a pre-release version of the library. It is not yet ready for production use. Please look at <a href="https://www.npmjs.com/package/@promptbook/core?activeTab=versions">latest stable release</a>.
|
|
25
25
|
</blockquote>
|
|
26
26
|
|
|
27
|
-
|
|
28
|
-
|
|
29
27
|
## 📦 Package `@promptbook/website-crawler`
|
|
30
28
|
|
|
31
29
|
- Promptbooks are [divided into several](#-packages) packages, all are published from [single monorepo](https://github.com/webgptorg/promptbook).
|
package/esm/index.es.js
CHANGED
|
@@ -2,12 +2,12 @@ import spaceTrim$1, { spaceTrim } from 'spacetrim';
|
|
|
2
2
|
import { Readability } from '@mozilla/readability';
|
|
3
3
|
import { mkdir, rm, writeFile } from 'fs/promises';
|
|
4
4
|
import { JSDOM } from 'jsdom';
|
|
5
|
-
import { forTime } from 'waitasecond';
|
|
6
5
|
import { SHA256 } from 'crypto-js';
|
|
7
6
|
import hexEncoder from 'crypto-js/enc-hex';
|
|
8
7
|
import { basename, join, dirname } from 'path';
|
|
9
8
|
import { format } from 'prettier';
|
|
10
9
|
import parserHtml from 'prettier/parser-html';
|
|
10
|
+
import { forTime } from 'waitasecond';
|
|
11
11
|
import { lookup } from 'mime-types';
|
|
12
12
|
import { unparse, parse } from 'papaparse';
|
|
13
13
|
import { Converter } from 'showdown';
|
|
@@ -16,7 +16,7 @@ import { Converter } from 'showdown';
|
|
|
16
16
|
/**
|
|
17
17
|
* The version of the Promptbook library
|
|
18
18
|
*/
|
|
19
|
-
var PROMPTBOOK_VERSION = '0.71.0-
|
|
19
|
+
var PROMPTBOOK_VERSION = '0.71.0-18';
|
|
20
20
|
// TODO: [main] !!!! List here all the versions and annotate + put into script
|
|
21
21
|
|
|
22
22
|
/*! *****************************************************************************
|
|
@@ -364,18 +364,25 @@ var LOOP_LIMIT = 1000;
|
|
|
364
364
|
* @private within the repository - too low-level in comparison with other `MAX_...`
|
|
365
365
|
*/
|
|
366
366
|
var IMMEDIATE_TIME = 10;
|
|
367
|
+
/**
|
|
368
|
+
* Strategy for caching the intermediate results for knowledge sources
|
|
369
|
+
*
|
|
370
|
+
* @public exported from `@promptbook/core`
|
|
371
|
+
*/
|
|
372
|
+
var DEFAULT_INTERMEDIATE_FILES_STRATEGY = 'HIDE_AND_KEEP';
|
|
373
|
+
// <- TODO: [😡] Change to 'VISIBLE'
|
|
367
374
|
/**
|
|
368
375
|
* The maximum number of (LLM) tasks running in parallel
|
|
369
376
|
*
|
|
370
377
|
* @public exported from `@promptbook/core`
|
|
371
378
|
*/
|
|
372
|
-
var
|
|
379
|
+
var DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [🤹♂️]
|
|
373
380
|
/**
|
|
374
381
|
* The maximum number of attempts to execute LLM task before giving up
|
|
375
382
|
*
|
|
376
383
|
* @public exported from `@promptbook/core`
|
|
377
384
|
*/
|
|
378
|
-
var
|
|
385
|
+
var DEFAULT_MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹♂️]
|
|
379
386
|
/**
|
|
380
387
|
* Where to store the scrape cache
|
|
381
388
|
*
|
|
@@ -383,7 +390,7 @@ var MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹♂️]
|
|
|
383
390
|
*
|
|
384
391
|
* @public exported from `@promptbook/core`
|
|
385
392
|
*/
|
|
386
|
-
var
|
|
393
|
+
var DEFAULT_SCRAPE_CACHE_DIRNAME = '/.promptbook/scrape-cache';
|
|
387
394
|
/**
|
|
388
395
|
* Nonce which is used for replacing things in strings
|
|
389
396
|
*
|
|
@@ -435,7 +442,7 @@ var DEFAULT_CSV_SETTINGS = Object.freeze({
|
|
|
435
442
|
*
|
|
436
443
|
* @public exported from `@promptbook/core`
|
|
437
444
|
*/
|
|
438
|
-
var
|
|
445
|
+
var DEFAULT_IS_VERBOSE = false;
|
|
439
446
|
/**
|
|
440
447
|
* @@@
|
|
441
448
|
*
|
|
@@ -576,7 +583,7 @@ var $Register = /** @class */ (function () {
|
|
|
576
583
|
this.storage = globalScope[storageName];
|
|
577
584
|
}
|
|
578
585
|
$Register.prototype.list = function () {
|
|
579
|
-
// <- TODO: ReadonlyDeep<
|
|
586
|
+
// <- TODO: ReadonlyDeep<ReadonlyArray<TRegistered>>
|
|
580
587
|
return this.storage;
|
|
581
588
|
};
|
|
582
589
|
$Register.prototype.register = function (registered) {
|
|
@@ -628,7 +635,7 @@ var websiteScraperMetadata = $deepFreeze({
|
|
|
628
635
|
mimeTypes: ['text/html'],
|
|
629
636
|
documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
|
|
630
637
|
isAvilableInBrowser: false,
|
|
631
|
-
requiredExecutables: [
|
|
638
|
+
requiredExecutables: [],
|
|
632
639
|
}); /* <- TODO: [🤛] */
|
|
633
640
|
/**
|
|
634
641
|
* Registration of known scraper metadata
|
|
@@ -1106,12 +1113,12 @@ function TODO_USE() {
|
|
|
1106
1113
|
*/
|
|
1107
1114
|
function getScraperIntermediateSource(source, options) {
|
|
1108
1115
|
return __awaiter(this, void 0, void 0, function () {
|
|
1109
|
-
var sourceFilename, url, rootDirname, cacheDirname,
|
|
1116
|
+
var sourceFilename, url, rootDirname, cacheDirname, intermediateFilesStrategy, extension, isVerbose, hash, semanticName, pieces, name, cacheFilename, isDestroyed, fileHandler;
|
|
1110
1117
|
return __generator(this, function (_a) {
|
|
1111
1118
|
switch (_a.label) {
|
|
1112
1119
|
case 0:
|
|
1113
1120
|
sourceFilename = source.filename, url = source.url;
|
|
1114
|
-
rootDirname = options.rootDirname, cacheDirname = options.cacheDirname,
|
|
1121
|
+
rootDirname = options.rootDirname, cacheDirname = options.cacheDirname, intermediateFilesStrategy = options.intermediateFilesStrategy, extension = options.extension, isVerbose = options.isVerbose;
|
|
1115
1122
|
hash = SHA256(
|
|
1116
1123
|
// <- TODO: [🥬] Encapsulate sha256 to some private utility function
|
|
1117
1124
|
hexEncoder.parse(sourceFilename || url || 'untitled'))
|
|
@@ -1121,7 +1128,7 @@ function getScraperIntermediateSource(source, options) {
|
|
|
1121
1128
|
pieces = ['intermediate', semanticName, hash].filter(function (piece) { return piece !== ''; });
|
|
1122
1129
|
name = pieces.join('-').split('--').join('-');
|
|
1123
1130
|
// <- TODO: Use MAX_FILENAME_LENGTH
|
|
1124
|
-
TODO_USE(rootDirname); // <- TODO:
|
|
1131
|
+
TODO_USE(rootDirname); // <- TODO: [😡]
|
|
1125
1132
|
cacheFilename = join.apply(void 0, __spreadArray(__spreadArray([process.cwd(),
|
|
1126
1133
|
cacheDirname], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), [name], false)).split('\\')
|
|
1127
1134
|
.join('/') +
|
|
@@ -1141,7 +1148,7 @@ function getScraperIntermediateSource(source, options) {
|
|
|
1141
1148
|
return __generator(this, function (_a) {
|
|
1142
1149
|
switch (_a.label) {
|
|
1143
1150
|
case 0:
|
|
1144
|
-
if (!
|
|
1151
|
+
if (!(intermediateFilesStrategy === 'HIDE_AND_CLEAN')) return [3 /*break*/, 2];
|
|
1145
1152
|
if (isVerbose) {
|
|
1146
1153
|
console.info('legacyDocumentScraper: Clening cache');
|
|
1147
1154
|
}
|
|
@@ -1165,7 +1172,7 @@ function getScraperIntermediateSource(source, options) {
|
|
|
1165
1172
|
/**
|
|
1166
1173
|
* Note: Not using `FileCacheStorage` for two reasons:
|
|
1167
1174
|
* 1) Need to store more than serialized JSONs
|
|
1168
|
-
* 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO:
|
|
1175
|
+
* 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO: [😡]
|
|
1169
1176
|
* TODO: [🐱🐉][🧠] Make some smart crop
|
|
1170
1177
|
* Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
|
|
1171
1178
|
*/
|
|
@@ -1230,7 +1237,6 @@ function pipelineJsonToString(pipelineJson) {
|
|
|
1230
1237
|
pipelineString += '\n\n';
|
|
1231
1238
|
pipelineString += description;
|
|
1232
1239
|
}
|
|
1233
|
-
// TODO:> const commands: Array<Command>
|
|
1234
1240
|
var commands = [];
|
|
1235
1241
|
if (pipelineUrl) {
|
|
1236
1242
|
commands.push("PIPELINE URL ".concat(pipelineUrl));
|
|
@@ -1286,7 +1292,6 @@ function pipelineJsonToString(pipelineJson) {
|
|
|
1286
1292
|
pipelineString += '\n\n';
|
|
1287
1293
|
pipelineString += description_1;
|
|
1288
1294
|
}
|
|
1289
|
-
// TODO:> const commands: Array<Command>
|
|
1290
1295
|
var commands_1 = [];
|
|
1291
1296
|
var contentLanguage = 'text';
|
|
1292
1297
|
if (templateType === 'PROMPT_TEMPLATE') {
|
|
@@ -2187,6 +2192,7 @@ function assertsExecutionSuccessful(executionResult) {
|
|
|
2187
2192
|
}
|
|
2188
2193
|
}
|
|
2189
2194
|
/**
|
|
2195
|
+
* TODO: [🐚] This function should be removed OR changed OR be completely rewritten
|
|
2190
2196
|
* TODO: [🧠] Can this return type be better typed than void
|
|
2191
2197
|
*/
|
|
2192
2198
|
|
|
@@ -2527,8 +2533,7 @@ function countTotalUsage(llmTools) {
|
|
|
2527
2533
|
* Multiple LLM Execution Tools is a proxy server that uses multiple execution tools internally and exposes the executor interface externally.
|
|
2528
2534
|
*
|
|
2529
2535
|
* Note: Internal utility of `joinLlmExecutionTools` but exposed type
|
|
2530
|
-
* @public exported from `@promptbook/
|
|
2531
|
-
* TODO: !!!!!! Export as runtime class not just type
|
|
2536
|
+
* @public exported from `@promptbook/core`
|
|
2532
2537
|
*/
|
|
2533
2538
|
var MultipleLlmExecutionTools = /** @class */ (function () {
|
|
2534
2539
|
/**
|
|
@@ -2848,7 +2853,7 @@ function preparePersona(personaDescription, tools, options) {
|
|
|
2848
2853
|
return __generator(this, function (_d) {
|
|
2849
2854
|
switch (_d.label) {
|
|
2850
2855
|
case 0:
|
|
2851
|
-
_a = options.isVerbose, isVerbose = _a === void 0 ?
|
|
2856
|
+
_a = options.isVerbose, isVerbose = _a === void 0 ? DEFAULT_IS_VERBOSE : _a;
|
|
2852
2857
|
if (tools === undefined || tools.llm === undefined) {
|
|
2853
2858
|
throw new MissingToolsError('LLM tools are required for preparing persona');
|
|
2854
2859
|
}
|
|
@@ -2915,7 +2920,7 @@ var $scrapersRegister = new $Register('scraper_constructors');
|
|
|
2915
2920
|
* TODO: [®] DRY Register logic
|
|
2916
2921
|
*/
|
|
2917
2922
|
|
|
2918
|
-
// TODO: !!!!!! Maybe delete this function
|
|
2923
|
+
// TODO: !!!!!!last - Maybe delete this function
|
|
2919
2924
|
/**
|
|
2920
2925
|
* Creates a message with all registered scrapers
|
|
2921
2926
|
*
|
|
@@ -3023,7 +3028,6 @@ function $registeredScrapersMessage() {
|
|
|
3023
3028
|
* @private within the repository
|
|
3024
3029
|
*/
|
|
3025
3030
|
function sourceContentToName(sourceContent) {
|
|
3026
|
-
// TODO: !!!!!! Better name for source than gibberish hash
|
|
3027
3031
|
var hash = SHA256(hexEncoder.parse(JSON.stringify(sourceContent)))
|
|
3028
3032
|
// <- TODO: [🥬] Encapsulate sha256 to some private utility function
|
|
3029
3033
|
.toString( /* hex */)
|
|
@@ -3097,7 +3101,7 @@ function isFileExisting(filename, fs) {
|
|
|
3097
3101
|
/**
|
|
3098
3102
|
* @@@
|
|
3099
3103
|
*
|
|
3100
|
-
* @
|
|
3104
|
+
* @public exported from `@promptbook/core`
|
|
3101
3105
|
*/
|
|
3102
3106
|
function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
3103
3107
|
var _a;
|
|
@@ -3110,7 +3114,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3110
3114
|
case 0:
|
|
3111
3115
|
sourceContent = knowledgeSource.sourceContent;
|
|
3112
3116
|
name = knowledgeSource.name;
|
|
3113
|
-
_b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ?
|
|
3117
|
+
_b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ? DEFAULT_IS_VERBOSE : _d;
|
|
3114
3118
|
TODO_USE(isVerbose);
|
|
3115
3119
|
if (!name) {
|
|
3116
3120
|
name = sourceContentToName(sourceContent);
|
|
@@ -3200,7 +3204,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3200
3204
|
content = _a.sent();
|
|
3201
3205
|
return [2 /*return*/, new Blob([
|
|
3202
3206
|
content,
|
|
3203
|
-
// <- TODO: !!!!!!
|
|
3207
|
+
// <- TODO: !!!!!! Test that this is working
|
|
3204
3208
|
], { type: mimeType_1 })];
|
|
3205
3209
|
}
|
|
3206
3210
|
});
|
|
@@ -3263,7 +3267,7 @@ function prepareKnowledgePieces(knowledgeSources, tools, options) {
|
|
|
3263
3267
|
return __generator(this, function (_c) {
|
|
3264
3268
|
switch (_c.label) {
|
|
3265
3269
|
case 0:
|
|
3266
|
-
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ?
|
|
3270
|
+
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, rootDirname = options.rootDirname, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
|
|
3267
3271
|
knowledgePreparedUnflatten = new Array(knowledgeSources.length);
|
|
3268
3272
|
return [4 /*yield*/, forEachAsync(knowledgeSources, { maxParallelCount: maxParallelCount }, function (knowledgeSource, index) { return __awaiter(_this, void 0, void 0, function () {
|
|
3269
3273
|
var partialPieces, sourceHandler, _a, _b, scraper, partialPiecesUnchecked, e_1_1, pieces;
|
|
@@ -3292,7 +3296,8 @@ function prepareKnowledgePieces(knowledgeSources, tools, options) {
|
|
|
3292
3296
|
case 4:
|
|
3293
3297
|
partialPiecesUnchecked = _d.sent();
|
|
3294
3298
|
if (partialPiecesUnchecked !== null) {
|
|
3295
|
-
partialPieces = partialPiecesUnchecked;
|
|
3299
|
+
partialPieces = __spreadArray([], __read(partialPiecesUnchecked), false);
|
|
3300
|
+
// <- TODO: [🪓] Here should be no need for spreading new array, just `partialPieces = partialPiecesUnchecked`
|
|
3296
3301
|
return [3 /*break*/, 6];
|
|
3297
3302
|
}
|
|
3298
3303
|
_d.label = 5;
|
|
@@ -3340,7 +3345,7 @@ TODO: [🧊] This is how it can look in future
|
|
|
3340
3345
|
> /**
|
|
3341
3346
|
> * Unprepared knowledge
|
|
3342
3347
|
> * /
|
|
3343
|
-
> readonly knowledgeSources:
|
|
3348
|
+
> readonly knowledgeSources: ReadonlyArray<KnowledgeSourceJson>;
|
|
3344
3349
|
> };
|
|
3345
3350
|
>
|
|
3346
3351
|
> export async function prepareKnowledgePieces(
|
|
@@ -3398,7 +3403,7 @@ function prepareTemplates(pipeline, tools, options) {
|
|
|
3398
3403
|
return __generator(this, function (_b) {
|
|
3399
3404
|
switch (_b.label) {
|
|
3400
3405
|
case 0:
|
|
3401
|
-
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ?
|
|
3406
|
+
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a;
|
|
3402
3407
|
templates = pipeline.templates, parameters = pipeline.parameters, knowledgePiecesCount = pipeline.knowledgePiecesCount;
|
|
3403
3408
|
// TODO: [main] !!!!! Apply samples to each template (if missing and is for the template defined)
|
|
3404
3409
|
TODO_USE(parameters);
|
|
@@ -3460,7 +3465,7 @@ function preparePipeline(pipeline, tools, options) {
|
|
|
3460
3465
|
if (isPipelinePrepared(pipeline)) {
|
|
3461
3466
|
return [2 /*return*/, pipeline];
|
|
3462
3467
|
}
|
|
3463
|
-
rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ?
|
|
3468
|
+
rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
|
|
3464
3469
|
parameters = pipeline.parameters, templates = pipeline.templates, knowledgeSources = pipeline.knowledgeSources, personas = pipeline.personas;
|
|
3465
3470
|
if (tools === undefined || tools.llm === undefined) {
|
|
3466
3471
|
throw new MissingToolsError('LLM tools are required for preparing the pipeline');
|
|
@@ -3518,7 +3523,9 @@ function preparePipeline(pipeline, tools, options) {
|
|
|
3518
3523
|
// ----- /Templates preparation -----
|
|
3519
3524
|
// Note: Count total usage
|
|
3520
3525
|
currentPreparation.usage = llmToolsWithUsage.getTotalUsage();
|
|
3521
|
-
return [2 /*return*/, $asDeeplyFrozenSerializableJson('Prepared PipelineJson', __assign(__assign({}, clonePipeline(pipeline)), { templates:
|
|
3526
|
+
return [2 /*return*/, $asDeeplyFrozenSerializableJson('Prepared PipelineJson', __assign(__assign({}, clonePipeline(pipeline)), { templates: __spreadArray([], __read(templatesPrepared), false),
|
|
3527
|
+
// <- TODO: [🪓] Here should be no need for spreading new array, just ` templates: templatesPrepared`
|
|
3528
|
+
knowledgeSources: knowledgeSourcesPrepared, knowledgePieces: knowledgePiecesPrepared, personas: preparedPersonas, preparations: __spreadArray([], __read(preparations), false) }))];
|
|
3522
3529
|
}
|
|
3523
3530
|
});
|
|
3524
3531
|
});
|
|
@@ -4489,12 +4496,11 @@ function checkExpectations(expectations, value) {
|
|
|
4489
4496
|
*/
|
|
4490
4497
|
function executeAttempts(options) {
|
|
4491
4498
|
return __awaiter(this, void 0, void 0, function () {
|
|
4492
|
-
var jokerParameterNames, priority, maxAttempts, preparedContent, parameters, template, preparedPipeline, tools,
|
|
4499
|
+
var jokerParameterNames, priority, maxAttempts, preparedContent, parameters, template, preparedPipeline, tools, $executionReport, pipelineIdentification, maxExecutionAttempts, $ongoingTemplateResult, _llms, llmTools, _loop_1, attempt, state_1;
|
|
4493
4500
|
return __generator(this, function (_a) {
|
|
4494
4501
|
switch (_a.label) {
|
|
4495
4502
|
case 0:
|
|
4496
|
-
jokerParameterNames = options.jokerParameterNames, priority = options.priority, maxAttempts = options.maxAttempts, preparedContent = options.preparedContent, parameters = options.parameters, template = options.template, preparedPipeline = options.preparedPipeline, tools = options.tools,
|
|
4497
|
-
maxExecutionAttempts = settings.maxExecutionAttempts;
|
|
4503
|
+
jokerParameterNames = options.jokerParameterNames, priority = options.priority, maxAttempts = options.maxAttempts, preparedContent = options.preparedContent, parameters = options.parameters, template = options.template, preparedPipeline = options.preparedPipeline, tools = options.tools, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification, maxExecutionAttempts = options.maxExecutionAttempts;
|
|
4498
4504
|
$ongoingTemplateResult = {
|
|
4499
4505
|
$result: null,
|
|
4500
4506
|
$resultString: null,
|
|
@@ -4860,12 +4866,12 @@ function executeAttempts(options) {
|
|
|
4860
4866
|
*/
|
|
4861
4867
|
function executeFormatSubvalues(options) {
|
|
4862
4868
|
return __awaiter(this, void 0, void 0, function () {
|
|
4863
|
-
var template, jokerParameterNames, parameters, priority,
|
|
4869
|
+
var template, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification, parameterValue, formatDefinition, subvalueDefinition, formatSettings, resultString;
|
|
4864
4870
|
var _this = this;
|
|
4865
4871
|
return __generator(this, function (_a) {
|
|
4866
4872
|
switch (_a.label) {
|
|
4867
4873
|
case 0:
|
|
4868
|
-
template = options.template, jokerParameterNames = options.jokerParameterNames, parameters = options.parameters, priority = options.priority,
|
|
4874
|
+
template = options.template, jokerParameterNames = options.jokerParameterNames, parameters = options.parameters, priority = options.priority, csvSettings = options.csvSettings, pipelineIdentification = options.pipelineIdentification;
|
|
4869
4875
|
if (template.foreach === undefined) {
|
|
4870
4876
|
return [2 /*return*/, /* not await */ executeAttempts(options)];
|
|
4871
4877
|
}
|
|
@@ -4895,7 +4901,7 @@ function executeFormatSubvalues(options) {
|
|
|
4895
4901
|
.join('\n')), "\n\n [\u26F7] This should never happen because subformat name should be validated during parsing\n\n ").concat(block(pipelineIdentification), "\n "); }));
|
|
4896
4902
|
}
|
|
4897
4903
|
if (formatDefinition.formatName === 'CSV') {
|
|
4898
|
-
formatSettings =
|
|
4904
|
+
formatSettings = csvSettings;
|
|
4899
4905
|
// <- TODO: [🤹♂️] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
|
|
4900
4906
|
}
|
|
4901
4907
|
return [4 /*yield*/, subvalueDefinition.mapValues(parameterValue, template.foreach.outputSubparameterName, formatSettings, function (subparameters, index) { return __awaiter(_this, void 0, void 0, function () {
|
|
@@ -5048,13 +5054,12 @@ function getReservedParametersForTemplate(options) {
|
|
|
5048
5054
|
*/
|
|
5049
5055
|
function executeTemplate(options) {
|
|
5050
5056
|
return __awaiter(this, void 0, void 0, function () {
|
|
5051
|
-
var currentTemplate, preparedPipeline, parametersToPass, tools, onProgress,
|
|
5052
|
-
var e_1,
|
|
5053
|
-
return __generator(this, function (
|
|
5054
|
-
switch (
|
|
5057
|
+
var currentTemplate, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, _a, maxExecutionAttempts, name, title, priority, usedParameterNames, dependentParameterNames, definedParameters, _b, _c, _d, definedParameterNames, parameters, _loop_1, _e, _f, parameterName, maxAttempts, jokerParameterNames, preparedContent, resultString;
|
|
5058
|
+
var e_1, _g, _h;
|
|
5059
|
+
return __generator(this, function (_j) {
|
|
5060
|
+
switch (_j.label) {
|
|
5055
5061
|
case 0:
|
|
5056
|
-
currentTemplate = options.currentTemplate, preparedPipeline = options.preparedPipeline, parametersToPass = options.parametersToPass, tools = options.tools, onProgress = options.onProgress,
|
|
5057
|
-
maxExecutionAttempts = settings.maxExecutionAttempts;
|
|
5062
|
+
currentTemplate = options.currentTemplate, preparedPipeline = options.preparedPipeline, parametersToPass = options.parametersToPass, tools = options.tools, onProgress = options.onProgress, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification, _a = options.maxExecutionAttempts, maxExecutionAttempts = _a === void 0 ? DEFAULT_MAX_EXECUTION_ATTEMPTS : _a;
|
|
5058
5063
|
name = "pipeline-executor-frame-".concat(currentTemplate.name);
|
|
5059
5064
|
title = currentTemplate.title;
|
|
5060
5065
|
priority = preparedPipeline.templates.length - preparedPipeline.templates.indexOf(currentTemplate);
|
|
@@ -5069,7 +5074,7 @@ function executeTemplate(options) {
|
|
|
5069
5074
|
// <- [🍸]
|
|
5070
5075
|
})];
|
|
5071
5076
|
case 1:
|
|
5072
|
-
|
|
5077
|
+
_j.sent();
|
|
5073
5078
|
usedParameterNames = extractParameterNamesFromTemplate(currentTemplate);
|
|
5074
5079
|
dependentParameterNames = new Set(currentTemplate.dependentParameterNames);
|
|
5075
5080
|
// TODO: [👩🏾🤝👩🏻] Use here `mapAvailableToExpectedParameters`
|
|
@@ -5080,15 +5085,15 @@ function executeTemplate(options) {
|
|
|
5080
5085
|
.map(function (name) { return "{".concat(name, "}"); })
|
|
5081
5086
|
.join(', '), "\n\n ").concat(block(pipelineIdentification), "\n\n "); }));
|
|
5082
5087
|
}
|
|
5083
|
-
|
|
5084
|
-
|
|
5088
|
+
_c = (_b = Object).freeze;
|
|
5089
|
+
_d = [{}];
|
|
5085
5090
|
return [4 /*yield*/, getReservedParametersForTemplate({
|
|
5086
5091
|
preparedPipeline: preparedPipeline,
|
|
5087
5092
|
template: currentTemplate,
|
|
5088
5093
|
pipelineIdentification: pipelineIdentification,
|
|
5089
5094
|
})];
|
|
5090
5095
|
case 2:
|
|
5091
|
-
definedParameters =
|
|
5096
|
+
definedParameters = _c.apply(_b, [__assign.apply(void 0, [__assign.apply(void 0, _d.concat([(_j.sent())])), parametersToPass])]);
|
|
5092
5097
|
definedParameterNames = new Set(Object.keys(definedParameters));
|
|
5093
5098
|
parameters = {};
|
|
5094
5099
|
_loop_1 = function (parameterName) {
|
|
@@ -5108,15 +5113,15 @@ function executeTemplate(options) {
|
|
|
5108
5113
|
try {
|
|
5109
5114
|
// Note: [2] Check that all used parameters are defined and removing unused parameters for this template
|
|
5110
5115
|
// TODO: [👩🏾🤝👩🏻] Use here `mapAvailableToExpectedParameters`
|
|
5111
|
-
for (
|
|
5112
|
-
parameterName =
|
|
5116
|
+
for (_e = __values(Array.from(union(definedParameterNames, usedParameterNames, dependentParameterNames))), _f = _e.next(); !_f.done; _f = _e.next()) {
|
|
5117
|
+
parameterName = _f.value;
|
|
5113
5118
|
_loop_1(parameterName);
|
|
5114
5119
|
}
|
|
5115
5120
|
}
|
|
5116
5121
|
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
5117
5122
|
finally {
|
|
5118
5123
|
try {
|
|
5119
|
-
if (
|
|
5124
|
+
if (_f && !_f.done && (_g = _e.return)) _g.call(_e);
|
|
5120
5125
|
}
|
|
5121
5126
|
finally { if (e_1) throw e_1.error; }
|
|
5122
5127
|
}
|
|
@@ -5136,12 +5141,11 @@ function executeTemplate(options) {
|
|
|
5136
5141
|
template: currentTemplate,
|
|
5137
5142
|
preparedPipeline: preparedPipeline,
|
|
5138
5143
|
tools: tools,
|
|
5139
|
-
settings: settings,
|
|
5140
5144
|
$executionReport: $executionReport,
|
|
5141
5145
|
pipelineIdentification: pipelineIdentification,
|
|
5142
5146
|
})];
|
|
5143
5147
|
case 3:
|
|
5144
|
-
resultString =
|
|
5148
|
+
resultString = _j.sent();
|
|
5145
5149
|
return [4 /*yield*/, onProgress({
|
|
5146
5150
|
name: name,
|
|
5147
5151
|
title: title,
|
|
@@ -5153,12 +5157,12 @@ function executeTemplate(options) {
|
|
|
5153
5157
|
// <- [🍸]
|
|
5154
5158
|
})];
|
|
5155
5159
|
case 4:
|
|
5156
|
-
|
|
5157
|
-
return [2 /*return*/, Object.freeze((
|
|
5158
|
-
|
|
5160
|
+
_j.sent();
|
|
5161
|
+
return [2 /*return*/, Object.freeze((_h = {},
|
|
5162
|
+
_h[currentTemplate.resultingParameterName] =
|
|
5159
5163
|
// <- Note: [👩👩👧] No need to detect parameter collision here because pipeline checks logic consistency during construction
|
|
5160
5164
|
resultString,
|
|
5161
|
-
|
|
5165
|
+
_h))];
|
|
5162
5166
|
}
|
|
5163
5167
|
});
|
|
5164
5168
|
});
|
|
@@ -5217,13 +5221,12 @@ function filterJustOutputParameters(options) {
|
|
|
5217
5221
|
*/
|
|
5218
5222
|
function executePipeline(options) {
|
|
5219
5223
|
return __awaiter(this, void 0, void 0, function () {
|
|
5220
|
-
var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification,
|
|
5224
|
+
var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification, maxParallelCount, rootDirname, _a, isVerbose, preparedPipeline, errors, warnings, executionReport, isReturned, _b, _c, parameter, e_1_1, _loop_1, _d, _e, parameterName, state_1, e_2_1, parametersToPass, resovedParameterNames_1, unresovedTemplates_1, resolving_1, loopLimit, _loop_2, error_1, usage_1, outputParameters_1, usage, outputParameters;
|
|
5221
5225
|
var e_1, _f, e_2, _g;
|
|
5222
5226
|
return __generator(this, function (_h) {
|
|
5223
5227
|
switch (_h.label) {
|
|
5224
5228
|
case 0:
|
|
5225
|
-
inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification,
|
|
5226
|
-
maxParallelCount = settings.maxParallelCount, rootDirname = settings.rootDirname, _a = settings.isVerbose, isVerbose = _a === void 0 ? IS_VERBOSE : _a;
|
|
5229
|
+
inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification, maxParallelCount = options.maxParallelCount, rootDirname = options.rootDirname, _a = options.isVerbose, isVerbose = _a === void 0 ? DEFAULT_IS_VERBOSE : _a;
|
|
5227
5230
|
preparedPipeline = options.preparedPipeline;
|
|
5228
5231
|
if (!(preparedPipeline === undefined)) return [3 /*break*/, 2];
|
|
5229
5232
|
return [4 /*yield*/, preparePipeline(pipeline, tools, {
|
|
@@ -5408,12 +5411,7 @@ function executePipeline(options) {
|
|
|
5408
5411
|
return [3 /*break*/, 4];
|
|
5409
5412
|
case 3:
|
|
5410
5413
|
unresovedTemplates_1 = unresovedTemplates_1.filter(function (template) { return template !== currentTemplate; });
|
|
5411
|
-
work_1 = executeTemplate({
|
|
5412
|
-
currentTemplate: currentTemplate,
|
|
5413
|
-
preparedPipeline: preparedPipeline,
|
|
5414
|
-
parametersToPass: parametersToPass,
|
|
5415
|
-
tools: tools,
|
|
5416
|
-
onProgress: function (progress) {
|
|
5414
|
+
work_1 = executeTemplate(__assign(__assign({}, options), { currentTemplate: currentTemplate, preparedPipeline: preparedPipeline, parametersToPass: parametersToPass, tools: tools, onProgress: function (progress) {
|
|
5417
5415
|
if (isReturned) {
|
|
5418
5416
|
throw new UnexpectedError(spaceTrim(function (block) { return "\n Can not call `onProgress` after pipeline execution is finished\n\n ".concat(block(pipelineIdentification), "\n\n ").concat(block(JSON.stringify(progress, null, 4)
|
|
5419
5417
|
.split('\n')
|
|
@@ -5423,11 +5421,7 @@ function executePipeline(options) {
|
|
|
5423
5421
|
if (onProgress) {
|
|
5424
5422
|
onProgress(progress);
|
|
5425
5423
|
}
|
|
5426
|
-
},
|
|
5427
|
-
settings: settings,
|
|
5428
|
-
$executionReport: executionReport,
|
|
5429
|
-
pipelineIdentification: spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n Template name: ").concat(currentTemplate.name, "\n Template title: ").concat(currentTemplate.title, "\n "); }),
|
|
5430
|
-
})
|
|
5424
|
+
}, $executionReport: executionReport, pipelineIdentification: spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n Template name: ").concat(currentTemplate.name, "\n Template title: ").concat(currentTemplate.title, "\n "); }) }))
|
|
5431
5425
|
.then(function (newParametersToPass) {
|
|
5432
5426
|
parametersToPass = __assign(__assign({}, newParametersToPass), parametersToPass);
|
|
5433
5427
|
resovedParameterNames_1 = __spreadArray(__spreadArray([], __read(resovedParameterNames_1), false), [currentTemplate.resultingParameterName], false);
|
|
@@ -5531,8 +5525,7 @@ function executePipeline(options) {
|
|
|
5531
5525
|
*/
|
|
5532
5526
|
function createPipelineExecutor(options) {
|
|
5533
5527
|
var _this = this;
|
|
5534
|
-
var pipeline = options.pipeline, tools = options.tools, _a = options.
|
|
5535
|
-
var _b = settings.maxExecutionAttempts, maxExecutionAttempts = _b === void 0 ? MAX_EXECUTION_ATTEMPTS : _b, _c = settings.maxParallelCount, maxParallelCount = _c === void 0 ? MAX_PARALLEL_COUNT : _c, _d = settings.csvSettings, csvSettings = _d === void 0 ? DEFAULT_CSV_SETTINGS : _d, _e = settings.isVerbose, isVerbose = _e === void 0 ? IS_VERBOSE : _e, _f = settings.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _f === void 0 ? false : _f, _g = settings.rootDirname, rootDirname = _g === void 0 ? null : _g;
|
|
5528
|
+
var pipeline = options.pipeline, tools = options.tools, _a = options.maxExecutionAttempts, maxExecutionAttempts = _a === void 0 ? DEFAULT_MAX_EXECUTION_ATTEMPTS : _a, _b = options.maxParallelCount, maxParallelCount = _b === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _b, _c = options.csvSettings, csvSettings = _c === void 0 ? DEFAULT_CSV_SETTINGS : _c, _d = options.isVerbose, isVerbose = _d === void 0 ? DEFAULT_IS_VERBOSE : _d, _e = options.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _e === void 0 ? false : _e, _f = options.rootDirname, rootDirname = _f === void 0 ? null : _f;
|
|
5536
5529
|
validatePipeline(pipeline);
|
|
5537
5530
|
var pipelineIdentification = (function () {
|
|
5538
5531
|
// Note: This is a 😐 implementation of [🚞]
|
|
@@ -5566,14 +5559,12 @@ function createPipelineExecutor(options) {
|
|
|
5566
5559
|
tools: tools,
|
|
5567
5560
|
onProgress: onProgress,
|
|
5568
5561
|
pipelineIdentification: spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n ").concat(runCount === 1 ? '' : "Run #".concat(runCount), "\n "); }),
|
|
5569
|
-
|
|
5570
|
-
|
|
5571
|
-
|
|
5572
|
-
|
|
5573
|
-
|
|
5574
|
-
|
|
5575
|
-
rootDirname: rootDirname,
|
|
5576
|
-
},
|
|
5562
|
+
maxExecutionAttempts: maxExecutionAttempts,
|
|
5563
|
+
maxParallelCount: maxParallelCount,
|
|
5564
|
+
csvSettings: csvSettings,
|
|
5565
|
+
isVerbose: isVerbose,
|
|
5566
|
+
isNotPreparedWarningSupressed: isNotPreparedWarningSupressed,
|
|
5567
|
+
rootDirname: rootDirname,
|
|
5577
5568
|
})];
|
|
5578
5569
|
});
|
|
5579
5570
|
}); };
|
|
@@ -5595,7 +5586,7 @@ var markdownScraperMetadata = $deepFreeze({
|
|
|
5595
5586
|
mimeTypes: ['text/markdown', 'text/plain'],
|
|
5596
5587
|
documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
|
|
5597
5588
|
isAvilableInBrowser: true,
|
|
5598
|
-
requiredExecutables: [
|
|
5589
|
+
requiredExecutables: [],
|
|
5599
5590
|
}); /* <- TODO: [🤛] */
|
|
5600
5591
|
/**
|
|
5601
5592
|
* Registration of known scraper metadata
|
|
@@ -5639,7 +5630,7 @@ var MarkdownScraper = /** @class */ (function () {
|
|
|
5639
5630
|
return __generator(this, function (_k) {
|
|
5640
5631
|
switch (_k.label) {
|
|
5641
5632
|
case 0:
|
|
5642
|
-
_a = this.options, _b = _a.maxParallelCount, maxParallelCount = _b === void 0 ?
|
|
5633
|
+
_a = this.options, _b = _a.maxParallelCount, maxParallelCount = _b === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _b, _c = _a.isVerbose, isVerbose = _c === void 0 ? DEFAULT_IS_VERBOSE : _c;
|
|
5643
5634
|
llm = this.tools.llm;
|
|
5644
5635
|
if (llm === undefined) {
|
|
5645
5636
|
throw new MissingToolsError('LLM tools are required for scraping external files');
|
|
@@ -5738,7 +5729,8 @@ var MarkdownScraper = /** @class */ (function () {
|
|
|
5738
5729
|
embeddingResult = _c.sent();
|
|
5739
5730
|
index.push({
|
|
5740
5731
|
modelName: embeddingResult.modelName,
|
|
5741
|
-
position: embeddingResult.content,
|
|
5732
|
+
position: __spreadArray([], __read(embeddingResult.content), false),
|
|
5733
|
+
// <- TODO: [🪓] Here should be no need for spreading new array, just `position: embeddingResult.content`
|
|
5742
5734
|
});
|
|
5743
5735
|
_c.label = 6;
|
|
5744
5736
|
case 6: return [3 /*break*/, 8];
|
|
@@ -5778,32 +5770,29 @@ var MarkdownScraper = /** @class */ (function () {
|
|
|
5778
5770
|
*/
|
|
5779
5771
|
|
|
5780
5772
|
/**
|
|
5781
|
-
*
|
|
5773
|
+
* Create a new showdown converter instance
|
|
5782
5774
|
*
|
|
5783
|
-
* @
|
|
5784
|
-
* @private for markdown and html knowledge scrapers
|
|
5785
|
-
*/
|
|
5786
|
-
var markdownConverter = new Converter({
|
|
5787
|
-
flavor: 'github', // <- TODO: !!!!!! Explicitly specify the flavor of promptbook markdown
|
|
5788
|
-
/*
|
|
5789
|
-
> import showdownHighlight from 'showdown-highlight';
|
|
5790
|
-
> extensions: [
|
|
5791
|
-
> showdownHighlight({
|
|
5792
|
-
> // Whether to add the classes to the <pre> tag, default is false
|
|
5793
|
-
> pre: true,
|
|
5794
|
-
> // Whether to use hljs' auto language detection, default is true
|
|
5795
|
-
> auto_detection: true,
|
|
5796
|
-
> }),
|
|
5797
|
-
> ],
|
|
5798
|
-
*/
|
|
5799
|
-
});
|
|
5800
|
-
/**
|
|
5801
|
-
* TODO: !!!!!! Figure out better name not to confuse with `Converter`
|
|
5802
|
-
* TODO: !!!!!! Lazy-make converter
|
|
5775
|
+
* @private utility of `WebsiteScraper`
|
|
5803
5776
|
*/
|
|
5777
|
+
function createShowdownConverter() {
|
|
5778
|
+
return new Converter({
|
|
5779
|
+
flavor: 'github',
|
|
5780
|
+
/*
|
|
5781
|
+
> import showdownHighlight from 'showdown-highlight';
|
|
5782
|
+
> extensions: [
|
|
5783
|
+
> showdownHighlight({
|
|
5784
|
+
> // Whether to add the classes to the <pre> tag, default is false
|
|
5785
|
+
> pre: true,
|
|
5786
|
+
> // Whether to use hljs' auto language detection, default is true
|
|
5787
|
+
> auto_detection: true,
|
|
5788
|
+
> }),
|
|
5789
|
+
> ],
|
|
5790
|
+
*/
|
|
5791
|
+
});
|
|
5792
|
+
}
|
|
5804
5793
|
|
|
5805
5794
|
/**
|
|
5806
|
-
* Scraper for
|
|
5795
|
+
* Scraper for websites
|
|
5807
5796
|
*
|
|
5808
5797
|
* @see `documentationUrl` for more details
|
|
5809
5798
|
* @public exported from `@promptbook/website-crawler`
|
|
@@ -5813,6 +5802,7 @@ var WebsiteScraper = /** @class */ (function () {
|
|
|
5813
5802
|
this.tools = tools;
|
|
5814
5803
|
this.options = options;
|
|
5815
5804
|
this.markdownScraper = new MarkdownScraper(tools, options);
|
|
5805
|
+
this.showdownConverter = createShowdownConverter();
|
|
5816
5806
|
}
|
|
5817
5807
|
Object.defineProperty(WebsiteScraper.prototype, "metadata", {
|
|
5818
5808
|
/**
|
|
@@ -5833,12 +5823,11 @@ var WebsiteScraper = /** @class */ (function () {
|
|
|
5833
5823
|
return __awaiter(this, void 0, void 0, function () {
|
|
5834
5824
|
var _a, _b,
|
|
5835
5825
|
// TODO: [🧠] Maybe in node use headless browser not just JSDOM
|
|
5836
|
-
|
|
5837
|
-
rootDirname, _c, cacheDirname, _d, isCacheCleaned, _e, isVerbose, jsdom, _f, reader, article, html, i, cacheFilehandler, markdown;
|
|
5826
|
+
rootDirname, _c, cacheDirname, _d, intermediateFilesStrategy, _e, isVerbose, jsdom, _f, reader, article, html, i, cacheFilehandler, markdown;
|
|
5838
5827
|
return __generator(this, function (_g) {
|
|
5839
5828
|
switch (_g.label) {
|
|
5840
5829
|
case 0:
|
|
5841
|
-
_a = this.options, _b = _a.rootDirname, rootDirname = _b === void 0 ? process.cwd() : _b, _c = _a.cacheDirname, cacheDirname = _c === void 0 ?
|
|
5830
|
+
_a = this.options, _b = _a.rootDirname, rootDirname = _b === void 0 ? process.cwd() : _b, _c = _a.cacheDirname, cacheDirname = _c === void 0 ? DEFAULT_SCRAPE_CACHE_DIRNAME : _c, _d = _a.intermediateFilesStrategy, intermediateFilesStrategy = _d === void 0 ? DEFAULT_INTERMEDIATE_FILES_STRATEGY : _d, _e = _a.isVerbose, isVerbose = _e === void 0 ? DEFAULT_IS_VERBOSE : _e;
|
|
5842
5831
|
// TODO: !!!!!! Does this work in browser? Make it work.
|
|
5843
5832
|
if (source.url === null) {
|
|
5844
5833
|
throw new KnowledgeScrapeError('Website scraper requires URL');
|
|
@@ -5851,10 +5840,6 @@ var WebsiteScraper = /** @class */ (function () {
|
|
|
5851
5840
|
}]))();
|
|
5852
5841
|
reader = new Readability(jsdom.window.document);
|
|
5853
5842
|
article = reader.parse();
|
|
5854
|
-
console.log(article);
|
|
5855
|
-
return [4 /*yield*/, forTime(10000)];
|
|
5856
|
-
case 2:
|
|
5857
|
-
_g.sent();
|
|
5858
5843
|
html = (article === null || article === void 0 ? void 0 : article.content) || (article === null || article === void 0 ? void 0 : article.textContent) || jsdom.window.document.body.innerHTML;
|
|
5859
5844
|
// Note: Unwrap html such as it is convertable by `markdownConverter`
|
|
5860
5845
|
for (i = 0; i < 2; i++) {
|
|
@@ -5866,16 +5851,16 @@ var WebsiteScraper = /** @class */ (function () {
|
|
|
5866
5851
|
return [4 /*yield*/, getScraperIntermediateSource(source, {
|
|
5867
5852
|
rootDirname: rootDirname,
|
|
5868
5853
|
cacheDirname: cacheDirname,
|
|
5869
|
-
|
|
5854
|
+
intermediateFilesStrategy: intermediateFilesStrategy,
|
|
5870
5855
|
extension: 'html',
|
|
5871
5856
|
isVerbose: isVerbose,
|
|
5872
5857
|
})];
|
|
5873
|
-
case
|
|
5858
|
+
case 2:
|
|
5874
5859
|
cacheFilehandler = _g.sent();
|
|
5875
5860
|
return [4 /*yield*/, writeFile(cacheFilehandler.filename, html, 'utf-8')];
|
|
5876
|
-
case
|
|
5861
|
+
case 3:
|
|
5877
5862
|
_g.sent();
|
|
5878
|
-
markdown =
|
|
5863
|
+
markdown = this.showdownConverter.makeMarkdown(html, jsdom.window.document);
|
|
5879
5864
|
return [2 /*return*/, __assign(__assign({}, cacheFilehandler), { markdown: markdown })];
|
|
5880
5865
|
}
|
|
5881
5866
|
});
|
|
@@ -5919,7 +5904,6 @@ var WebsiteScraper = /** @class */ (function () {
|
|
|
5919
5904
|
return WebsiteScraper;
|
|
5920
5905
|
}());
|
|
5921
5906
|
/**
|
|
5922
|
-
* TODO: !!!!!! Put into separate package
|
|
5923
5907
|
* TODO: [👣] Scraped website in .md can act as cache item - there is no need to run conversion each time
|
|
5924
5908
|
* TODO: [🪂] Do it in parallel 11:11
|
|
5925
5909
|
* Note: No need to aggregate usage here, it is done by intercepting the llmTools
|