@promptbook/website-crawler 0.72.0-13 → 0.72.0-15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/esm/index.es.js +98 -114
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +22 -18
- package/esm/typings/src/_packages/node.index.d.ts +2 -0
- package/esm/typings/src/_packages/types.index.d.ts +10 -10
- package/esm/typings/src/_packages/utils.index.d.ts +2 -2
- package/esm/typings/src/collection/PipelineCollection.d.ts +1 -1
- package/esm/typings/src/collection/SimplePipelineCollection.d.ts +2 -2
- package/esm/typings/src/collection/collectionToJson.d.ts +1 -1
- package/esm/typings/src/collection/constructors/createCollectionFromJson.d.ts +1 -1
- package/esm/typings/src/collection/constructors/createCollectionFromPromise.d.ts +1 -1
- package/esm/typings/src/commands/_common/types/CommandParser.d.ts +5 -5
- package/esm/typings/src/config.d.ts +21 -14
- package/esm/typings/src/execution/EmbeddingVector.d.ts +1 -1
- package/esm/typings/src/execution/Executables.d.ts +18 -0
- package/esm/typings/src/execution/ExecutionTools.d.ts +9 -3
- package/esm/typings/src/execution/LlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/execution/PipelineExecutorResult.d.ts +2 -2
- package/esm/typings/src/execution/assertsExecutionSuccessful.d.ts +1 -0
- package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorOptions.d.ts +29 -6
- package/esm/typings/src/execution/createPipelineExecutor/10-executePipeline.d.ts +2 -11
- package/esm/typings/src/execution/createPipelineExecutor/20-executeTemplate.d.ts +4 -13
- package/esm/typings/src/execution/createPipelineExecutor/40-executeAttempts.d.ts +9 -14
- package/esm/typings/src/execution/translation/automatic-translate/automatic-translators/LindatAutomaticTranslator.d.ts +11 -3
- package/esm/typings/src/execution/utils/addUsage.d.ts +1 -1
- package/esm/typings/src/execution/utils/forEachAsync.d.ts +1 -1
- package/esm/typings/src/formats/_common/FormatDefinition.d.ts +2 -2
- package/esm/typings/src/formats/_common/FormatSubvalueDefinition.d.ts +2 -2
- package/esm/typings/src/llm-providers/_common/register/$provideLlmToolsForCli.d.ts +2 -2
- package/esm/typings/src/llm-providers/_common/register/LlmToolsConfiguration.d.ts +1 -1
- package/esm/typings/src/llm-providers/_common/register/createLlmToolsFromConfiguration.d.ts +7 -0
- package/esm/typings/src/llm-providers/_common/utils/cache/CacheLlmToolsOptions.d.ts +4 -1
- package/esm/typings/src/llm-providers/anthropic-claude/AnthropicClaudeExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/anthropic-claude/anthropic-claude-models.d.ts +1 -1
- package/esm/typings/src/llm-providers/azure-openai/AzureOpenAiExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/mocked/$fakeTextToExpectations.d.ts +1 -1
- package/esm/typings/src/llm-providers/mocked/MockedEchoLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/mocked/MockedFackedLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/multiple/MultipleLlmExecutionTools.d.ts +4 -5
- package/esm/typings/src/llm-providers/multiple/joinLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/openai/OpenAiExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/openai/openai-models.d.ts +1 -1
- package/esm/typings/src/llm-providers/remote/RemoteLlmExecutionTools.d.ts +1 -1
- package/esm/typings/src/llm-providers/remote/interfaces/PromptbookServer_ListModels_Response.d.ts +3 -3
- package/esm/typings/src/llm-providers/remote/interfaces/PromptbookServer_Prompt_Response.d.ts +2 -2
- package/esm/typings/src/prepare/PrepareAndScrapeOptions.d.ts +4 -23
- package/esm/typings/src/prepare/prepareTemplates.d.ts +1 -1
- package/esm/typings/src/scrapers/_common/Scraper.d.ts +1 -5
- package/esm/typings/src/scrapers/_common/prepareKnowledgePieces.d.ts +1 -1
- package/esm/typings/src/scrapers/_common/register/$provideExecutablesForNode.d.ts +12 -0
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForBrowser.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/register/$provideScrapersForNode.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/register/ScraperAndConverterMetadata.d.ts +2 -2
- package/esm/typings/src/scrapers/_common/utils/getScraperIntermediateSource.d.ts +2 -2
- package/esm/typings/src/scrapers/document/DocumentScraper.d.ts +2 -2
- package/esm/typings/src/scrapers/document/createDocumentScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/document/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/document-legacy/LegacyDocumentScraper.d.ts +3 -3
- package/esm/typings/src/scrapers/document-legacy/createLegacyDocumentScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/document-legacy/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/markdown/MarkdownScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/markdown/createMarkdownScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/markdown/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/pdf/PdfScraper.d.ts +2 -2
- package/esm/typings/src/scrapers/pdf/createPdfScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/pdf/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/website/WebsiteScraper.d.ts +6 -3
- package/esm/typings/src/scrapers/website/createWebsiteScraper.d.ts +1 -1
- package/esm/typings/src/scrapers/website/register-metadata.d.ts +1 -1
- package/esm/typings/src/scrapers/website/utils/createShowdownConverter.d.ts +7 -0
- package/esm/typings/src/scripting/javascript/utils/preserve.d.ts +1 -1
- package/esm/typings/src/storage/file-cache-storage/utils/nameToSubfolderPath.d.ts +1 -1
- package/esm/typings/src/types/Arrayable.d.ts +1 -1
- package/esm/typings/src/types/IntermediateFilesStrategy.d.ts +7 -0
- package/esm/typings/src/types/PipelineJson/KnowledgePieceJson.d.ts +4 -4
- package/esm/typings/src/types/PipelineJson/KnowledgeSourceJson.d.ts +1 -1
- package/esm/typings/src/types/PipelineJson/PersonaJson.d.ts +1 -1
- package/esm/typings/src/types/PipelineJson/TemplateJsonCommon.d.ts +2 -2
- package/esm/typings/src/types/Prompt.d.ts +1 -1
- package/esm/typings/src/types/execution-report/ExecutionReportJson.d.ts +1 -1
- package/esm/typings/src/utils/$Register.d.ts +1 -1
- package/esm/typings/src/utils/FromtoItems.d.ts +1 -1
- package/esm/typings/src/utils/arrayableToArray.d.ts +1 -1
- package/esm/typings/src/utils/emojis.d.ts +1 -1
- package/esm/typings/src/utils/execCommand/$execCommand.d.ts +2 -2
- package/esm/typings/src/utils/execCommand/{IExecCommandOptions.d.ts → ExecCommandOptions.d.ts} +2 -6
- package/esm/typings/src/utils/execCommand/execCommandNormalizeOptions.d.ts +3 -3
- package/esm/typings/src/utils/expectation-counters/countSentences.d.ts +1 -1
- package/esm/typings/src/utils/markdown/extractAllBlocksFromMarkdown.d.ts +1 -1
- package/esm/typings/src/utils/markdown/splitMarkdownIntoSections.d.ts +1 -1
- package/esm/typings/src/utils/normalization/IKeywords.d.ts +2 -2
- package/esm/typings/src/utils/normalization/parseKeywords.d.ts +2 -2
- package/esm/typings/src/utils/normalization/parseKeywordsFromString.d.ts +2 -2
- package/esm/typings/src/utils/normalization/searchKeywords.d.ts +2 -2
- package/esm/typings/src/utils/organization/TODO_USE.d.ts +1 -1
- package/esm/typings/src/utils/organization/keepUnused.d.ts +1 -1
- package/esm/typings/src/utils/random/$randomSeed.d.ts +1 -1
- package/esm/typings/src/utils/sets/intersection.d.ts +1 -1
- package/esm/typings/src/utils/sets/union.d.ts +1 -1
- package/esm/typings/src/utils/unwrapResult.d.ts +4 -4
- package/package.json +4 -3
- package/umd/index.umd.js +101 -117
- package/umd/index.umd.js.map +1 -1
- package/esm/typings/src/execution/createPipelineExecutor/00-CreatePipelineExecutorSettings.d.ts +0 -29
- package/esm/typings/src/scrapers/website/utils/markdownConverter.d.ts +0 -12
- /package/esm/typings/src/scrapers/website/utils/{markdownConverter.test.d.ts → createShowdownConverter.test.d.ts} +0 -0
package/README.md
CHANGED
package/esm/index.es.js
CHANGED
|
@@ -2,12 +2,12 @@ import spaceTrim$1, { spaceTrim } from 'spacetrim';
|
|
|
2
2
|
import { Readability } from '@mozilla/readability';
|
|
3
3
|
import { mkdir, rm, writeFile } from 'fs/promises';
|
|
4
4
|
import { JSDOM } from 'jsdom';
|
|
5
|
-
import { forTime } from 'waitasecond';
|
|
6
5
|
import { SHA256 } from 'crypto-js';
|
|
7
6
|
import hexEncoder from 'crypto-js/enc-hex';
|
|
8
7
|
import { basename, join, dirname } from 'path';
|
|
9
8
|
import { format } from 'prettier';
|
|
10
9
|
import parserHtml from 'prettier/parser-html';
|
|
10
|
+
import { forTime } from 'waitasecond';
|
|
11
11
|
import { lookup } from 'mime-types';
|
|
12
12
|
import { unparse, parse } from 'papaparse';
|
|
13
13
|
import { Converter } from 'showdown';
|
|
@@ -16,7 +16,7 @@ import { Converter } from 'showdown';
|
|
|
16
16
|
/**
|
|
17
17
|
* The version of the Promptbook library
|
|
18
18
|
*/
|
|
19
|
-
var PROMPTBOOK_VERSION = '0.72.0-
|
|
19
|
+
var PROMPTBOOK_VERSION = '0.72.0-14';
|
|
20
20
|
// TODO: [main] !!!! List here all the versions and annotate + put into script
|
|
21
21
|
|
|
22
22
|
/*! *****************************************************************************
|
|
@@ -364,18 +364,25 @@ var LOOP_LIMIT = 1000;
|
|
|
364
364
|
* @private within the repository - too low-level in comparison with other `MAX_...`
|
|
365
365
|
*/
|
|
366
366
|
var IMMEDIATE_TIME = 10;
|
|
367
|
+
/**
|
|
368
|
+
* Strategy for caching the intermediate results for knowledge sources
|
|
369
|
+
*
|
|
370
|
+
* @public exported from `@promptbook/core`
|
|
371
|
+
*/
|
|
372
|
+
var DEFAULT_INTERMEDIATE_FILES_STRATEGY = 'HIDE_AND_KEEP';
|
|
373
|
+
// <- TODO: [😡] Change to 'VISIBLE'
|
|
367
374
|
/**
|
|
368
375
|
* The maximum number of (LLM) tasks running in parallel
|
|
369
376
|
*
|
|
370
377
|
* @public exported from `@promptbook/core`
|
|
371
378
|
*/
|
|
372
|
-
var
|
|
379
|
+
var DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [🤹♂️]
|
|
373
380
|
/**
|
|
374
381
|
* The maximum number of attempts to execute LLM task before giving up
|
|
375
382
|
*
|
|
376
383
|
* @public exported from `@promptbook/core`
|
|
377
384
|
*/
|
|
378
|
-
var
|
|
385
|
+
var DEFAULT_MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹♂️]
|
|
379
386
|
/**
|
|
380
387
|
* Where to store the scrape cache
|
|
381
388
|
*
|
|
@@ -383,7 +390,7 @@ var MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹♂️]
|
|
|
383
390
|
*
|
|
384
391
|
* @public exported from `@promptbook/core`
|
|
385
392
|
*/
|
|
386
|
-
var
|
|
393
|
+
var DEFAULT_SCRAPE_CACHE_DIRNAME = '/.promptbook/scrape-cache';
|
|
387
394
|
/**
|
|
388
395
|
* Nonce which is used for replacing things in strings
|
|
389
396
|
*
|
|
@@ -435,7 +442,7 @@ var DEFAULT_CSV_SETTINGS = Object.freeze({
|
|
|
435
442
|
*
|
|
436
443
|
* @public exported from `@promptbook/core`
|
|
437
444
|
*/
|
|
438
|
-
var
|
|
445
|
+
var DEFAULT_IS_VERBOSE = false;
|
|
439
446
|
/**
|
|
440
447
|
* @@@
|
|
441
448
|
*
|
|
@@ -576,7 +583,7 @@ var $Register = /** @class */ (function () {
|
|
|
576
583
|
this.storage = globalScope[storageName];
|
|
577
584
|
}
|
|
578
585
|
$Register.prototype.list = function () {
|
|
579
|
-
// <- TODO: ReadonlyDeep<
|
|
586
|
+
// <- TODO: ReadonlyDeep<ReadonlyArray<TRegistered>>
|
|
580
587
|
return this.storage;
|
|
581
588
|
};
|
|
582
589
|
$Register.prototype.register = function (registered) {
|
|
@@ -628,7 +635,7 @@ var websiteScraperMetadata = $deepFreeze({
|
|
|
628
635
|
mimeTypes: ['text/html'],
|
|
629
636
|
documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
|
|
630
637
|
isAvilableInBrowser: false,
|
|
631
|
-
requiredExecutables: [
|
|
638
|
+
requiredExecutables: [],
|
|
632
639
|
}); /* <- TODO: [🤛] */
|
|
633
640
|
/**
|
|
634
641
|
* Registration of known scraper metadata
|
|
@@ -1106,12 +1113,12 @@ function TODO_USE() {
|
|
|
1106
1113
|
*/
|
|
1107
1114
|
function getScraperIntermediateSource(source, options) {
|
|
1108
1115
|
return __awaiter(this, void 0, void 0, function () {
|
|
1109
|
-
var sourceFilename, url, rootDirname, cacheDirname,
|
|
1116
|
+
var sourceFilename, url, rootDirname, cacheDirname, intermediateFilesStrategy, extension, isVerbose, hash, semanticName, pieces, name, cacheFilename, isDestroyed, fileHandler;
|
|
1110
1117
|
return __generator(this, function (_a) {
|
|
1111
1118
|
switch (_a.label) {
|
|
1112
1119
|
case 0:
|
|
1113
1120
|
sourceFilename = source.filename, url = source.url;
|
|
1114
|
-
rootDirname = options.rootDirname, cacheDirname = options.cacheDirname,
|
|
1121
|
+
rootDirname = options.rootDirname, cacheDirname = options.cacheDirname, intermediateFilesStrategy = options.intermediateFilesStrategy, extension = options.extension, isVerbose = options.isVerbose;
|
|
1115
1122
|
hash = SHA256(
|
|
1116
1123
|
// <- TODO: [🥬] Encapsulate sha256 to some private utility function
|
|
1117
1124
|
hexEncoder.parse(sourceFilename || url || 'untitled'))
|
|
@@ -1121,7 +1128,7 @@ function getScraperIntermediateSource(source, options) {
|
|
|
1121
1128
|
pieces = ['intermediate', semanticName, hash].filter(function (piece) { return piece !== ''; });
|
|
1122
1129
|
name = pieces.join('-').split('--').join('-');
|
|
1123
1130
|
// <- TODO: Use MAX_FILENAME_LENGTH
|
|
1124
|
-
TODO_USE(rootDirname); // <- TODO:
|
|
1131
|
+
TODO_USE(rootDirname); // <- TODO: [😡]
|
|
1125
1132
|
cacheFilename = join.apply(void 0, __spreadArray(__spreadArray([process.cwd(),
|
|
1126
1133
|
cacheDirname], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), [name], false)).split('\\')
|
|
1127
1134
|
.join('/') +
|
|
@@ -1141,7 +1148,7 @@ function getScraperIntermediateSource(source, options) {
|
|
|
1141
1148
|
return __generator(this, function (_a) {
|
|
1142
1149
|
switch (_a.label) {
|
|
1143
1150
|
case 0:
|
|
1144
|
-
if (!
|
|
1151
|
+
if (!(intermediateFilesStrategy === 'HIDE_AND_CLEAN')) return [3 /*break*/, 2];
|
|
1145
1152
|
if (isVerbose) {
|
|
1146
1153
|
console.info('legacyDocumentScraper: Clening cache');
|
|
1147
1154
|
}
|
|
@@ -1165,7 +1172,7 @@ function getScraperIntermediateSource(source, options) {
|
|
|
1165
1172
|
/**
|
|
1166
1173
|
* Note: Not using `FileCacheStorage` for two reasons:
|
|
1167
1174
|
* 1) Need to store more than serialized JSONs
|
|
1168
|
-
* 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO:
|
|
1175
|
+
* 2) Need to switch between a `rootDirname` and `cacheDirname` <- TODO: [😡]
|
|
1169
1176
|
* TODO: [🐱🐉][🧠] Make some smart crop
|
|
1170
1177
|
* Note: [🟢] Code in this file should never be never released in packages that could be imported into browser environment
|
|
1171
1178
|
*/
|
|
@@ -1230,7 +1237,6 @@ function pipelineJsonToString(pipelineJson) {
|
|
|
1230
1237
|
pipelineString += '\n\n';
|
|
1231
1238
|
pipelineString += description;
|
|
1232
1239
|
}
|
|
1233
|
-
// TODO:> const commands: Array<Command>
|
|
1234
1240
|
var commands = [];
|
|
1235
1241
|
if (pipelineUrl) {
|
|
1236
1242
|
commands.push("PIPELINE URL ".concat(pipelineUrl));
|
|
@@ -1286,7 +1292,6 @@ function pipelineJsonToString(pipelineJson) {
|
|
|
1286
1292
|
pipelineString += '\n\n';
|
|
1287
1293
|
pipelineString += description_1;
|
|
1288
1294
|
}
|
|
1289
|
-
// TODO:> const commands: Array<Command>
|
|
1290
1295
|
var commands_1 = [];
|
|
1291
1296
|
var contentLanguage = 'text';
|
|
1292
1297
|
if (templateType === 'PROMPT_TEMPLATE') {
|
|
@@ -2187,6 +2192,7 @@ function assertsExecutionSuccessful(executionResult) {
|
|
|
2187
2192
|
}
|
|
2188
2193
|
}
|
|
2189
2194
|
/**
|
|
2195
|
+
* TODO: [🐚] This function should be removed OR changed OR be completely rewritten
|
|
2190
2196
|
* TODO: [🧠] Can this return type be better typed than void
|
|
2191
2197
|
*/
|
|
2192
2198
|
|
|
@@ -2372,8 +2378,7 @@ $deepFreeze({
|
|
|
2372
2378
|
* Multiple LLM Execution Tools is a proxy server that uses multiple execution tools internally and exposes the executor interface externally.
|
|
2373
2379
|
*
|
|
2374
2380
|
* Note: Internal utility of `joinLlmExecutionTools` but exposed type
|
|
2375
|
-
* @public exported from `@promptbook/
|
|
2376
|
-
* TODO: !!!!!! Export as runtime class not just type
|
|
2381
|
+
* @public exported from `@promptbook/core`
|
|
2377
2382
|
*/
|
|
2378
2383
|
var MultipleLlmExecutionTools = /** @class */ (function () {
|
|
2379
2384
|
/**
|
|
@@ -2875,7 +2880,7 @@ function preparePersona(personaDescription, tools, options) {
|
|
|
2875
2880
|
return __generator(this, function (_d) {
|
|
2876
2881
|
switch (_d.label) {
|
|
2877
2882
|
case 0:
|
|
2878
|
-
_a = options.isVerbose, isVerbose = _a === void 0 ?
|
|
2883
|
+
_a = options.isVerbose, isVerbose = _a === void 0 ? DEFAULT_IS_VERBOSE : _a;
|
|
2879
2884
|
if (tools === undefined || tools.llm === undefined) {
|
|
2880
2885
|
throw new MissingToolsError('LLM tools are required for preparing persona');
|
|
2881
2886
|
}
|
|
@@ -2942,7 +2947,7 @@ var $scrapersRegister = new $Register('scraper_constructors');
|
|
|
2942
2947
|
* TODO: [®] DRY Register logic
|
|
2943
2948
|
*/
|
|
2944
2949
|
|
|
2945
|
-
// TODO: !!!!!! Maybe delete this function
|
|
2950
|
+
// TODO: !!!!!!last - Maybe delete this function
|
|
2946
2951
|
/**
|
|
2947
2952
|
* Creates a message with all registered scrapers
|
|
2948
2953
|
*
|
|
@@ -3050,7 +3055,6 @@ function $registeredScrapersMessage() {
|
|
|
3050
3055
|
* @private within the repository
|
|
3051
3056
|
*/
|
|
3052
3057
|
function sourceContentToName(sourceContent) {
|
|
3053
|
-
// TODO: !!!!!! Better name for source than gibberish hash
|
|
3054
3058
|
var hash = SHA256(hexEncoder.parse(JSON.stringify(sourceContent)))
|
|
3055
3059
|
// <- TODO: [🥬] Encapsulate sha256 to some private utility function
|
|
3056
3060
|
.toString( /* hex */)
|
|
@@ -3137,7 +3141,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3137
3141
|
case 0:
|
|
3138
3142
|
sourceContent = knowledgeSource.sourceContent;
|
|
3139
3143
|
name = knowledgeSource.name;
|
|
3140
|
-
_b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ?
|
|
3144
|
+
_b = options || {}, _c = _b.rootDirname, rootDirname = _c === void 0 ? null : _c, _d = _b.isVerbose, isVerbose = _d === void 0 ? DEFAULT_IS_VERBOSE : _d;
|
|
3141
3145
|
TODO_USE(isVerbose);
|
|
3142
3146
|
if (!name) {
|
|
3143
3147
|
name = sourceContentToName(sourceContent);
|
|
@@ -3227,7 +3231,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3227
3231
|
content = _a.sent();
|
|
3228
3232
|
return [2 /*return*/, new Blob([
|
|
3229
3233
|
content,
|
|
3230
|
-
// <- TODO: !!!!!!
|
|
3234
|
+
// <- TODO: !!!!!! Test that this is working
|
|
3231
3235
|
], { type: mimeType_1 })];
|
|
3232
3236
|
}
|
|
3233
3237
|
});
|
|
@@ -3290,7 +3294,7 @@ function prepareKnowledgePieces(knowledgeSources, tools, options) {
|
|
|
3290
3294
|
return __generator(this, function (_c) {
|
|
3291
3295
|
switch (_c.label) {
|
|
3292
3296
|
case 0:
|
|
3293
|
-
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ?
|
|
3297
|
+
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, rootDirname = options.rootDirname, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
|
|
3294
3298
|
knowledgePreparedUnflatten = new Array(knowledgeSources.length);
|
|
3295
3299
|
return [4 /*yield*/, forEachAsync(knowledgeSources, { maxParallelCount: maxParallelCount }, function (knowledgeSource, index) { return __awaiter(_this, void 0, void 0, function () {
|
|
3296
3300
|
var partialPieces, sourceHandler, _a, _b, scraper, partialPiecesUnchecked, e_1_1, pieces;
|
|
@@ -3319,7 +3323,8 @@ function prepareKnowledgePieces(knowledgeSources, tools, options) {
|
|
|
3319
3323
|
case 4:
|
|
3320
3324
|
partialPiecesUnchecked = _d.sent();
|
|
3321
3325
|
if (partialPiecesUnchecked !== null) {
|
|
3322
|
-
partialPieces = partialPiecesUnchecked;
|
|
3326
|
+
partialPieces = __spreadArray([], __read(partialPiecesUnchecked), false);
|
|
3327
|
+
// <- TODO: [🪓] Here should be no need for spreading new array, just `partialPieces = partialPiecesUnchecked`
|
|
3323
3328
|
return [3 /*break*/, 6];
|
|
3324
3329
|
}
|
|
3325
3330
|
_d.label = 5;
|
|
@@ -3367,7 +3372,7 @@ TODO: [🧊] This is how it can look in future
|
|
|
3367
3372
|
> /**
|
|
3368
3373
|
> * Unprepared knowledge
|
|
3369
3374
|
> * /
|
|
3370
|
-
> readonly knowledgeSources:
|
|
3375
|
+
> readonly knowledgeSources: ReadonlyArray<KnowledgeSourceJson>;
|
|
3371
3376
|
> };
|
|
3372
3377
|
>
|
|
3373
3378
|
> export async function prepareKnowledgePieces(
|
|
@@ -3425,7 +3430,7 @@ function prepareTemplates(pipeline, tools, options) {
|
|
|
3425
3430
|
return __generator(this, function (_b) {
|
|
3426
3431
|
switch (_b.label) {
|
|
3427
3432
|
case 0:
|
|
3428
|
-
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ?
|
|
3433
|
+
_a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a;
|
|
3429
3434
|
templates = pipeline.templates, parameters = pipeline.parameters, knowledgePiecesCount = pipeline.knowledgePiecesCount;
|
|
3430
3435
|
// TODO: [main] !!!!! Apply samples to each template (if missing and is for the template defined)
|
|
3431
3436
|
TODO_USE(parameters);
|
|
@@ -3487,7 +3492,7 @@ function preparePipeline(pipeline, tools, options) {
|
|
|
3487
3492
|
if (isPipelinePrepared(pipeline)) {
|
|
3488
3493
|
return [2 /*return*/, pipeline];
|
|
3489
3494
|
}
|
|
3490
|
-
rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ?
|
|
3495
|
+
rootDirname = options.rootDirname, _a = options.maxParallelCount, maxParallelCount = _a === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _a, _b = options.isVerbose, isVerbose = _b === void 0 ? DEFAULT_IS_VERBOSE : _b;
|
|
3491
3496
|
parameters = pipeline.parameters, templates = pipeline.templates, knowledgeSources = pipeline.knowledgeSources, personas = pipeline.personas;
|
|
3492
3497
|
if (tools === undefined || tools.llm === undefined) {
|
|
3493
3498
|
throw new MissingToolsError('LLM tools are required for preparing the pipeline');
|
|
@@ -3545,7 +3550,9 @@ function preparePipeline(pipeline, tools, options) {
|
|
|
3545
3550
|
// ----- /Templates preparation -----
|
|
3546
3551
|
// Note: Count total usage
|
|
3547
3552
|
currentPreparation.usage = llmToolsWithUsage.getTotalUsage();
|
|
3548
|
-
return [2 /*return*/, $asDeeplyFrozenSerializableJson('Prepared PipelineJson', __assign(__assign({}, clonePipeline(pipeline)), { templates:
|
|
3553
|
+
return [2 /*return*/, $asDeeplyFrozenSerializableJson('Prepared PipelineJson', __assign(__assign({}, clonePipeline(pipeline)), { templates: __spreadArray([], __read(templatesPrepared), false),
|
|
3554
|
+
// <- TODO: [🪓] Here should be no need for spreading new array, just ` templates: templatesPrepared`
|
|
3555
|
+
knowledgeSources: knowledgeSourcesPrepared, knowledgePieces: knowledgePiecesPrepared, personas: preparedPersonas, preparations: __spreadArray([], __read(preparations), false) }))];
|
|
3549
3556
|
}
|
|
3550
3557
|
});
|
|
3551
3558
|
});
|
|
@@ -4516,12 +4523,11 @@ function checkExpectations(expectations, value) {
|
|
|
4516
4523
|
*/
|
|
4517
4524
|
function executeAttempts(options) {
|
|
4518
4525
|
return __awaiter(this, void 0, void 0, function () {
|
|
4519
|
-
var jokerParameterNames, priority, maxAttempts, preparedContent, parameters, template, preparedPipeline, tools,
|
|
4526
|
+
var jokerParameterNames, priority, maxAttempts, preparedContent, parameters, template, preparedPipeline, tools, $executionReport, pipelineIdentification, maxExecutionAttempts, $ongoingTemplateResult, _llms, llmTools, _loop_1, attempt, state_1;
|
|
4520
4527
|
return __generator(this, function (_a) {
|
|
4521
4528
|
switch (_a.label) {
|
|
4522
4529
|
case 0:
|
|
4523
|
-
jokerParameterNames = options.jokerParameterNames, priority = options.priority, maxAttempts = options.maxAttempts, preparedContent = options.preparedContent, parameters = options.parameters, template = options.template, preparedPipeline = options.preparedPipeline, tools = options.tools,
|
|
4524
|
-
maxExecutionAttempts = settings.maxExecutionAttempts;
|
|
4530
|
+
jokerParameterNames = options.jokerParameterNames, priority = options.priority, maxAttempts = options.maxAttempts, preparedContent = options.preparedContent, parameters = options.parameters, template = options.template, preparedPipeline = options.preparedPipeline, tools = options.tools, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification, maxExecutionAttempts = options.maxExecutionAttempts;
|
|
4525
4531
|
$ongoingTemplateResult = {
|
|
4526
4532
|
$result: null,
|
|
4527
4533
|
$resultString: null,
|
|
@@ -4887,12 +4893,12 @@ function executeAttempts(options) {
|
|
|
4887
4893
|
*/
|
|
4888
4894
|
function executeFormatSubvalues(options) {
|
|
4889
4895
|
return __awaiter(this, void 0, void 0, function () {
|
|
4890
|
-
var template, jokerParameterNames, parameters, priority,
|
|
4896
|
+
var template, jokerParameterNames, parameters, priority, csvSettings, pipelineIdentification, parameterValue, formatDefinition, subvalueDefinition, formatSettings, resultString;
|
|
4891
4897
|
var _this = this;
|
|
4892
4898
|
return __generator(this, function (_a) {
|
|
4893
4899
|
switch (_a.label) {
|
|
4894
4900
|
case 0:
|
|
4895
|
-
template = options.template, jokerParameterNames = options.jokerParameterNames, parameters = options.parameters, priority = options.priority,
|
|
4901
|
+
template = options.template, jokerParameterNames = options.jokerParameterNames, parameters = options.parameters, priority = options.priority, csvSettings = options.csvSettings, pipelineIdentification = options.pipelineIdentification;
|
|
4896
4902
|
if (template.foreach === undefined) {
|
|
4897
4903
|
return [2 /*return*/, /* not await */ executeAttempts(options)];
|
|
4898
4904
|
}
|
|
@@ -4922,7 +4928,7 @@ function executeFormatSubvalues(options) {
|
|
|
4922
4928
|
.join('\n')), "\n\n [\u26F7] This should never happen because subformat name should be validated during parsing\n\n ").concat(block(pipelineIdentification), "\n "); }));
|
|
4923
4929
|
}
|
|
4924
4930
|
if (formatDefinition.formatName === 'CSV') {
|
|
4925
|
-
formatSettings =
|
|
4931
|
+
formatSettings = csvSettings;
|
|
4926
4932
|
// <- TODO: [🤹♂️] More universal, make simmilar pattern for other formats for example \n vs \r\n in text
|
|
4927
4933
|
}
|
|
4928
4934
|
return [4 /*yield*/, subvalueDefinition.mapValues(parameterValue, template.foreach.outputSubparameterName, formatSettings, function (subparameters, index) { return __awaiter(_this, void 0, void 0, function () {
|
|
@@ -5075,13 +5081,12 @@ function getReservedParametersForTemplate(options) {
|
|
|
5075
5081
|
*/
|
|
5076
5082
|
function executeTemplate(options) {
|
|
5077
5083
|
return __awaiter(this, void 0, void 0, function () {
|
|
5078
|
-
var currentTemplate, preparedPipeline, parametersToPass, tools, onProgress,
|
|
5079
|
-
var e_1,
|
|
5080
|
-
return __generator(this, function (
|
|
5081
|
-
switch (
|
|
5084
|
+
var currentTemplate, preparedPipeline, parametersToPass, tools, onProgress, $executionReport, pipelineIdentification, _a, maxExecutionAttempts, name, title, priority, usedParameterNames, dependentParameterNames, definedParameters, _b, _c, _d, definedParameterNames, parameters, _loop_1, _e, _f, parameterName, maxAttempts, jokerParameterNames, preparedContent, resultString;
|
|
5085
|
+
var e_1, _g, _h;
|
|
5086
|
+
return __generator(this, function (_j) {
|
|
5087
|
+
switch (_j.label) {
|
|
5082
5088
|
case 0:
|
|
5083
|
-
currentTemplate = options.currentTemplate, preparedPipeline = options.preparedPipeline, parametersToPass = options.parametersToPass, tools = options.tools, onProgress = options.onProgress,
|
|
5084
|
-
maxExecutionAttempts = settings.maxExecutionAttempts;
|
|
5089
|
+
currentTemplate = options.currentTemplate, preparedPipeline = options.preparedPipeline, parametersToPass = options.parametersToPass, tools = options.tools, onProgress = options.onProgress, $executionReport = options.$executionReport, pipelineIdentification = options.pipelineIdentification, _a = options.maxExecutionAttempts, maxExecutionAttempts = _a === void 0 ? DEFAULT_MAX_EXECUTION_ATTEMPTS : _a;
|
|
5085
5090
|
name = "pipeline-executor-frame-".concat(currentTemplate.name);
|
|
5086
5091
|
title = currentTemplate.title;
|
|
5087
5092
|
priority = preparedPipeline.templates.length - preparedPipeline.templates.indexOf(currentTemplate);
|
|
@@ -5096,7 +5101,7 @@ function executeTemplate(options) {
|
|
|
5096
5101
|
// <- [🍸]
|
|
5097
5102
|
})];
|
|
5098
5103
|
case 1:
|
|
5099
|
-
|
|
5104
|
+
_j.sent();
|
|
5100
5105
|
usedParameterNames = extractParameterNamesFromTemplate(currentTemplate);
|
|
5101
5106
|
dependentParameterNames = new Set(currentTemplate.dependentParameterNames);
|
|
5102
5107
|
// TODO: [👩🏾🤝👩🏻] Use here `mapAvailableToExpectedParameters`
|
|
@@ -5107,15 +5112,15 @@ function executeTemplate(options) {
|
|
|
5107
5112
|
.map(function (name) { return "{".concat(name, "}"); })
|
|
5108
5113
|
.join(', '), "\n\n ").concat(block(pipelineIdentification), "\n\n "); }));
|
|
5109
5114
|
}
|
|
5110
|
-
|
|
5111
|
-
|
|
5115
|
+
_c = (_b = Object).freeze;
|
|
5116
|
+
_d = [{}];
|
|
5112
5117
|
return [4 /*yield*/, getReservedParametersForTemplate({
|
|
5113
5118
|
preparedPipeline: preparedPipeline,
|
|
5114
5119
|
template: currentTemplate,
|
|
5115
5120
|
pipelineIdentification: pipelineIdentification,
|
|
5116
5121
|
})];
|
|
5117
5122
|
case 2:
|
|
5118
|
-
definedParameters =
|
|
5123
|
+
definedParameters = _c.apply(_b, [__assign.apply(void 0, [__assign.apply(void 0, _d.concat([(_j.sent())])), parametersToPass])]);
|
|
5119
5124
|
definedParameterNames = new Set(Object.keys(definedParameters));
|
|
5120
5125
|
parameters = {};
|
|
5121
5126
|
_loop_1 = function (parameterName) {
|
|
@@ -5135,15 +5140,15 @@ function executeTemplate(options) {
|
|
|
5135
5140
|
try {
|
|
5136
5141
|
// Note: [2] Check that all used parameters are defined and removing unused parameters for this template
|
|
5137
5142
|
// TODO: [👩🏾🤝👩🏻] Use here `mapAvailableToExpectedParameters`
|
|
5138
|
-
for (
|
|
5139
|
-
parameterName =
|
|
5143
|
+
for (_e = __values(Array.from(union(definedParameterNames, usedParameterNames, dependentParameterNames))), _f = _e.next(); !_f.done; _f = _e.next()) {
|
|
5144
|
+
parameterName = _f.value;
|
|
5140
5145
|
_loop_1(parameterName);
|
|
5141
5146
|
}
|
|
5142
5147
|
}
|
|
5143
5148
|
catch (e_1_1) { e_1 = { error: e_1_1 }; }
|
|
5144
5149
|
finally {
|
|
5145
5150
|
try {
|
|
5146
|
-
if (
|
|
5151
|
+
if (_f && !_f.done && (_g = _e.return)) _g.call(_e);
|
|
5147
5152
|
}
|
|
5148
5153
|
finally { if (e_1) throw e_1.error; }
|
|
5149
5154
|
}
|
|
@@ -5163,12 +5168,11 @@ function executeTemplate(options) {
|
|
|
5163
5168
|
template: currentTemplate,
|
|
5164
5169
|
preparedPipeline: preparedPipeline,
|
|
5165
5170
|
tools: tools,
|
|
5166
|
-
settings: settings,
|
|
5167
5171
|
$executionReport: $executionReport,
|
|
5168
5172
|
pipelineIdentification: pipelineIdentification,
|
|
5169
5173
|
})];
|
|
5170
5174
|
case 3:
|
|
5171
|
-
resultString =
|
|
5175
|
+
resultString = _j.sent();
|
|
5172
5176
|
return [4 /*yield*/, onProgress({
|
|
5173
5177
|
name: name,
|
|
5174
5178
|
title: title,
|
|
@@ -5180,12 +5184,12 @@ function executeTemplate(options) {
|
|
|
5180
5184
|
// <- [🍸]
|
|
5181
5185
|
})];
|
|
5182
5186
|
case 4:
|
|
5183
|
-
|
|
5184
|
-
return [2 /*return*/, Object.freeze((
|
|
5185
|
-
|
|
5187
|
+
_j.sent();
|
|
5188
|
+
return [2 /*return*/, Object.freeze((_h = {},
|
|
5189
|
+
_h[currentTemplate.resultingParameterName] =
|
|
5186
5190
|
// <- Note: [👩👩👧] No need to detect parameter collision here because pipeline checks logic consistency during construction
|
|
5187
5191
|
resultString,
|
|
5188
|
-
|
|
5192
|
+
_h))];
|
|
5189
5193
|
}
|
|
5190
5194
|
});
|
|
5191
5195
|
});
|
|
@@ -5244,13 +5248,12 @@ function filterJustOutputParameters(options) {
|
|
|
5244
5248
|
*/
|
|
5245
5249
|
function executePipeline(options) {
|
|
5246
5250
|
return __awaiter(this, void 0, void 0, function () {
|
|
5247
|
-
var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification,
|
|
5251
|
+
var inputParameters, tools, onProgress, pipeline, setPreparedPipeline, pipelineIdentification, maxParallelCount, rootDirname, _a, isVerbose, preparedPipeline, errors, warnings, executionReport, isReturned, _b, _c, parameter, e_1_1, _loop_1, _d, _e, parameterName, state_1, e_2_1, parametersToPass, resovedParameterNames_1, unresovedTemplates_1, resolving_1, loopLimit, _loop_2, error_1, usage_1, outputParameters_1, usage, outputParameters;
|
|
5248
5252
|
var e_1, _f, e_2, _g;
|
|
5249
5253
|
return __generator(this, function (_h) {
|
|
5250
5254
|
switch (_h.label) {
|
|
5251
5255
|
case 0:
|
|
5252
|
-
inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification,
|
|
5253
|
-
maxParallelCount = settings.maxParallelCount, rootDirname = settings.rootDirname, _a = settings.isVerbose, isVerbose = _a === void 0 ? IS_VERBOSE : _a;
|
|
5256
|
+
inputParameters = options.inputParameters, tools = options.tools, onProgress = options.onProgress, pipeline = options.pipeline, setPreparedPipeline = options.setPreparedPipeline, pipelineIdentification = options.pipelineIdentification, maxParallelCount = options.maxParallelCount, rootDirname = options.rootDirname, _a = options.isVerbose, isVerbose = _a === void 0 ? DEFAULT_IS_VERBOSE : _a;
|
|
5254
5257
|
preparedPipeline = options.preparedPipeline;
|
|
5255
5258
|
if (!(preparedPipeline === undefined)) return [3 /*break*/, 2];
|
|
5256
5259
|
return [4 /*yield*/, preparePipeline(pipeline, tools, {
|
|
@@ -5435,12 +5438,7 @@ function executePipeline(options) {
|
|
|
5435
5438
|
return [3 /*break*/, 4];
|
|
5436
5439
|
case 3:
|
|
5437
5440
|
unresovedTemplates_1 = unresovedTemplates_1.filter(function (template) { return template !== currentTemplate; });
|
|
5438
|
-
work_1 = executeTemplate({
|
|
5439
|
-
currentTemplate: currentTemplate,
|
|
5440
|
-
preparedPipeline: preparedPipeline,
|
|
5441
|
-
parametersToPass: parametersToPass,
|
|
5442
|
-
tools: tools,
|
|
5443
|
-
onProgress: function (progress) {
|
|
5441
|
+
work_1 = executeTemplate(__assign(__assign({}, options), { currentTemplate: currentTemplate, preparedPipeline: preparedPipeline, parametersToPass: parametersToPass, tools: tools, onProgress: function (progress) {
|
|
5444
5442
|
if (isReturned) {
|
|
5445
5443
|
throw new UnexpectedError(spaceTrim(function (block) { return "\n Can not call `onProgress` after pipeline execution is finished\n\n ".concat(block(pipelineIdentification), "\n\n ").concat(block(JSON.stringify(progress, null, 4)
|
|
5446
5444
|
.split('\n')
|
|
@@ -5450,11 +5448,7 @@ function executePipeline(options) {
|
|
|
5450
5448
|
if (onProgress) {
|
|
5451
5449
|
onProgress(progress);
|
|
5452
5450
|
}
|
|
5453
|
-
},
|
|
5454
|
-
settings: settings,
|
|
5455
|
-
$executionReport: executionReport,
|
|
5456
|
-
pipelineIdentification: spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n Template name: ").concat(currentTemplate.name, "\n Template title: ").concat(currentTemplate.title, "\n "); }),
|
|
5457
|
-
})
|
|
5451
|
+
}, $executionReport: executionReport, pipelineIdentification: spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n Template name: ").concat(currentTemplate.name, "\n Template title: ").concat(currentTemplate.title, "\n "); }) }))
|
|
5458
5452
|
.then(function (newParametersToPass) {
|
|
5459
5453
|
parametersToPass = __assign(__assign({}, newParametersToPass), parametersToPass);
|
|
5460
5454
|
resovedParameterNames_1 = __spreadArray(__spreadArray([], __read(resovedParameterNames_1), false), [currentTemplate.resultingParameterName], false);
|
|
@@ -5558,8 +5552,7 @@ function executePipeline(options) {
|
|
|
5558
5552
|
*/
|
|
5559
5553
|
function createPipelineExecutor(options) {
|
|
5560
5554
|
var _this = this;
|
|
5561
|
-
var pipeline = options.pipeline, tools = options.tools, _a = options.
|
|
5562
|
-
var _b = settings.maxExecutionAttempts, maxExecutionAttempts = _b === void 0 ? MAX_EXECUTION_ATTEMPTS : _b, _c = settings.maxParallelCount, maxParallelCount = _c === void 0 ? MAX_PARALLEL_COUNT : _c, _d = settings.csvSettings, csvSettings = _d === void 0 ? DEFAULT_CSV_SETTINGS : _d, _e = settings.isVerbose, isVerbose = _e === void 0 ? IS_VERBOSE : _e, _f = settings.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _f === void 0 ? false : _f, _g = settings.rootDirname, rootDirname = _g === void 0 ? null : _g;
|
|
5555
|
+
var pipeline = options.pipeline, tools = options.tools, _a = options.maxExecutionAttempts, maxExecutionAttempts = _a === void 0 ? DEFAULT_MAX_EXECUTION_ATTEMPTS : _a, _b = options.maxParallelCount, maxParallelCount = _b === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _b, _c = options.csvSettings, csvSettings = _c === void 0 ? DEFAULT_CSV_SETTINGS : _c, _d = options.isVerbose, isVerbose = _d === void 0 ? DEFAULT_IS_VERBOSE : _d, _e = options.isNotPreparedWarningSupressed, isNotPreparedWarningSupressed = _e === void 0 ? false : _e, _f = options.rootDirname, rootDirname = _f === void 0 ? null : _f;
|
|
5563
5556
|
validatePipeline(pipeline);
|
|
5564
5557
|
var pipelineIdentification = (function () {
|
|
5565
5558
|
// Note: This is a 😐 implementation of [🚞]
|
|
@@ -5593,14 +5586,12 @@ function createPipelineExecutor(options) {
|
|
|
5593
5586
|
tools: tools,
|
|
5594
5587
|
onProgress: onProgress,
|
|
5595
5588
|
pipelineIdentification: spaceTrim(function (block) { return "\n ".concat(block(pipelineIdentification), "\n ").concat(runCount === 1 ? '' : "Run #".concat(runCount), "\n "); }),
|
|
5596
|
-
|
|
5597
|
-
|
|
5598
|
-
|
|
5599
|
-
|
|
5600
|
-
|
|
5601
|
-
|
|
5602
|
-
rootDirname: rootDirname,
|
|
5603
|
-
},
|
|
5589
|
+
maxExecutionAttempts: maxExecutionAttempts,
|
|
5590
|
+
maxParallelCount: maxParallelCount,
|
|
5591
|
+
csvSettings: csvSettings,
|
|
5592
|
+
isVerbose: isVerbose,
|
|
5593
|
+
isNotPreparedWarningSupressed: isNotPreparedWarningSupressed,
|
|
5594
|
+
rootDirname: rootDirname,
|
|
5604
5595
|
})];
|
|
5605
5596
|
});
|
|
5606
5597
|
}); };
|
|
@@ -5622,7 +5613,7 @@ var markdownScraperMetadata = $deepFreeze({
|
|
|
5622
5613
|
mimeTypes: ['text/markdown', 'text/plain'],
|
|
5623
5614
|
documentationUrl: 'https://github.com/webgptorg/promptbook/discussions/@@',
|
|
5624
5615
|
isAvilableInBrowser: true,
|
|
5625
|
-
requiredExecutables: [
|
|
5616
|
+
requiredExecutables: [],
|
|
5626
5617
|
}); /* <- TODO: [🤛] */
|
|
5627
5618
|
/**
|
|
5628
5619
|
* Registration of known scraper metadata
|
|
@@ -5666,7 +5657,7 @@ var MarkdownScraper = /** @class */ (function () {
|
|
|
5666
5657
|
return __generator(this, function (_k) {
|
|
5667
5658
|
switch (_k.label) {
|
|
5668
5659
|
case 0:
|
|
5669
|
-
_a = this.options, _b = _a.maxParallelCount, maxParallelCount = _b === void 0 ?
|
|
5660
|
+
_a = this.options, _b = _a.maxParallelCount, maxParallelCount = _b === void 0 ? DEFAULT_MAX_PARALLEL_COUNT : _b, _c = _a.isVerbose, isVerbose = _c === void 0 ? DEFAULT_IS_VERBOSE : _c;
|
|
5670
5661
|
llm = this.tools.llm;
|
|
5671
5662
|
if (llm === undefined) {
|
|
5672
5663
|
throw new MissingToolsError('LLM tools are required for scraping external files');
|
|
@@ -5765,7 +5756,8 @@ var MarkdownScraper = /** @class */ (function () {
|
|
|
5765
5756
|
embeddingResult = _c.sent();
|
|
5766
5757
|
index.push({
|
|
5767
5758
|
modelName: embeddingResult.modelName,
|
|
5768
|
-
position: embeddingResult.content,
|
|
5759
|
+
position: __spreadArray([], __read(embeddingResult.content), false),
|
|
5760
|
+
// <- TODO: [🪓] Here should be no need for spreading new array, just `position: embeddingResult.content`
|
|
5769
5761
|
});
|
|
5770
5762
|
_c.label = 6;
|
|
5771
5763
|
case 6: return [3 /*break*/, 8];
|
|
@@ -5805,32 +5797,29 @@ var MarkdownScraper = /** @class */ (function () {
|
|
|
5805
5797
|
*/
|
|
5806
5798
|
|
|
5807
5799
|
/**
|
|
5808
|
-
*
|
|
5800
|
+
* Create a new showdown converter instance
|
|
5809
5801
|
*
|
|
5810
|
-
* @
|
|
5811
|
-
* @private for markdown and html knowledge scrapers
|
|
5812
|
-
*/
|
|
5813
|
-
var markdownConverter = new Converter({
|
|
5814
|
-
flavor: 'github', // <- TODO: !!!!!! Explicitly specify the flavor of promptbook markdown
|
|
5815
|
-
/*
|
|
5816
|
-
> import showdownHighlight from 'showdown-highlight';
|
|
5817
|
-
> extensions: [
|
|
5818
|
-
> showdownHighlight({
|
|
5819
|
-
> // Whether to add the classes to the <pre> tag, default is false
|
|
5820
|
-
> pre: true,
|
|
5821
|
-
> // Whether to use hljs' auto language detection, default is true
|
|
5822
|
-
> auto_detection: true,
|
|
5823
|
-
> }),
|
|
5824
|
-
> ],
|
|
5825
|
-
*/
|
|
5826
|
-
});
|
|
5827
|
-
/**
|
|
5828
|
-
* TODO: !!!!!! Figure out better name not to confuse with `Converter`
|
|
5829
|
-
* TODO: !!!!!! Lazy-make converter
|
|
5802
|
+
* @private utility of `WebsiteScraper`
|
|
5830
5803
|
*/
|
|
5804
|
+
function createShowdownConverter() {
|
|
5805
|
+
return new Converter({
|
|
5806
|
+
flavor: 'github',
|
|
5807
|
+
/*
|
|
5808
|
+
> import showdownHighlight from 'showdown-highlight';
|
|
5809
|
+
> extensions: [
|
|
5810
|
+
> showdownHighlight({
|
|
5811
|
+
> // Whether to add the classes to the <pre> tag, default is false
|
|
5812
|
+
> pre: true,
|
|
5813
|
+
> // Whether to use hljs' auto language detection, default is true
|
|
5814
|
+
> auto_detection: true,
|
|
5815
|
+
> }),
|
|
5816
|
+
> ],
|
|
5817
|
+
*/
|
|
5818
|
+
});
|
|
5819
|
+
}
|
|
5831
5820
|
|
|
5832
5821
|
/**
|
|
5833
|
-
* Scraper for
|
|
5822
|
+
* Scraper for websites
|
|
5834
5823
|
*
|
|
5835
5824
|
* @see `documentationUrl` for more details
|
|
5836
5825
|
* @public exported from `@promptbook/website-crawler`
|
|
@@ -5840,6 +5829,7 @@ var WebsiteScraper = /** @class */ (function () {
|
|
|
5840
5829
|
this.tools = tools;
|
|
5841
5830
|
this.options = options;
|
|
5842
5831
|
this.markdownScraper = new MarkdownScraper(tools, options);
|
|
5832
|
+
this.showdownConverter = createShowdownConverter();
|
|
5843
5833
|
}
|
|
5844
5834
|
Object.defineProperty(WebsiteScraper.prototype, "metadata", {
|
|
5845
5835
|
/**
|
|
@@ -5860,12 +5850,11 @@ var WebsiteScraper = /** @class */ (function () {
|
|
|
5860
5850
|
return __awaiter(this, void 0, void 0, function () {
|
|
5861
5851
|
var _a, _b,
|
|
5862
5852
|
// TODO: [🧠] Maybe in node use headless browser not just JSDOM
|
|
5863
|
-
|
|
5864
|
-
rootDirname, _c, cacheDirname, _d, isCacheCleaned, _e, isVerbose, jsdom, _f, reader, article, html, i, cacheFilehandler, markdown;
|
|
5853
|
+
rootDirname, _c, cacheDirname, _d, intermediateFilesStrategy, _e, isVerbose, jsdom, _f, reader, article, html, i, cacheFilehandler, markdown;
|
|
5865
5854
|
return __generator(this, function (_g) {
|
|
5866
5855
|
switch (_g.label) {
|
|
5867
5856
|
case 0:
|
|
5868
|
-
_a = this.options, _b = _a.rootDirname, rootDirname = _b === void 0 ? process.cwd() : _b, _c = _a.cacheDirname, cacheDirname = _c === void 0 ?
|
|
5857
|
+
_a = this.options, _b = _a.rootDirname, rootDirname = _b === void 0 ? process.cwd() : _b, _c = _a.cacheDirname, cacheDirname = _c === void 0 ? DEFAULT_SCRAPE_CACHE_DIRNAME : _c, _d = _a.intermediateFilesStrategy, intermediateFilesStrategy = _d === void 0 ? DEFAULT_INTERMEDIATE_FILES_STRATEGY : _d, _e = _a.isVerbose, isVerbose = _e === void 0 ? DEFAULT_IS_VERBOSE : _e;
|
|
5869
5858
|
// TODO: !!!!!! Does this work in browser? Make it work.
|
|
5870
5859
|
if (source.url === null) {
|
|
5871
5860
|
throw new KnowledgeScrapeError('Website scraper requires URL');
|
|
@@ -5878,10 +5867,6 @@ var WebsiteScraper = /** @class */ (function () {
|
|
|
5878
5867
|
}]))();
|
|
5879
5868
|
reader = new Readability(jsdom.window.document);
|
|
5880
5869
|
article = reader.parse();
|
|
5881
|
-
console.log(article);
|
|
5882
|
-
return [4 /*yield*/, forTime(10000)];
|
|
5883
|
-
case 2:
|
|
5884
|
-
_g.sent();
|
|
5885
5870
|
html = (article === null || article === void 0 ? void 0 : article.content) || (article === null || article === void 0 ? void 0 : article.textContent) || jsdom.window.document.body.innerHTML;
|
|
5886
5871
|
// Note: Unwrap html such as it is convertable by `markdownConverter`
|
|
5887
5872
|
for (i = 0; i < 2; i++) {
|
|
@@ -5893,16 +5878,16 @@ var WebsiteScraper = /** @class */ (function () {
|
|
|
5893
5878
|
return [4 /*yield*/, getScraperIntermediateSource(source, {
|
|
5894
5879
|
rootDirname: rootDirname,
|
|
5895
5880
|
cacheDirname: cacheDirname,
|
|
5896
|
-
|
|
5881
|
+
intermediateFilesStrategy: intermediateFilesStrategy,
|
|
5897
5882
|
extension: 'html',
|
|
5898
5883
|
isVerbose: isVerbose,
|
|
5899
5884
|
})];
|
|
5900
|
-
case
|
|
5885
|
+
case 2:
|
|
5901
5886
|
cacheFilehandler = _g.sent();
|
|
5902
5887
|
return [4 /*yield*/, writeFile(cacheFilehandler.filename, html, 'utf-8')];
|
|
5903
|
-
case
|
|
5888
|
+
case 3:
|
|
5904
5889
|
_g.sent();
|
|
5905
|
-
markdown =
|
|
5890
|
+
markdown = this.showdownConverter.makeMarkdown(html, jsdom.window.document);
|
|
5906
5891
|
return [2 /*return*/, __assign(__assign({}, cacheFilehandler), { markdown: markdown })];
|
|
5907
5892
|
}
|
|
5908
5893
|
});
|
|
@@ -5946,7 +5931,6 @@ var WebsiteScraper = /** @class */ (function () {
|
|
|
5946
5931
|
return WebsiteScraper;
|
|
5947
5932
|
}());
|
|
5948
5933
|
/**
|
|
5949
|
-
* TODO: !!!!!! Put into separate package
|
|
5950
5934
|
* TODO: [👣] Scraped website in .md can act as cache item - there is no need to run conversion each time
|
|
5951
5935
|
* TODO: [🪂] Do it in parallel 11:11
|
|
5952
5936
|
* Note: No need to aggregate usage here, it is done by intercepting the llmTools
|