@promptbook/node 0.84.0-11 → 0.84.0-13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +131 -100
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +4 -2
- package/esm/typings/src/config.d.ts +9 -1
- package/esm/typings/src/execution/FilesystemTools.d.ts +1 -1
- package/esm/typings/src/wizzard/wizzard.d.ts +7 -1
- package/package.json +2 -2
- package/umd/index.umd.js +133 -102
- package/umd/index.umd.js.map +1 -1
package/esm/index.es.js
CHANGED
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
import colors from 'colors';
|
|
2
2
|
import { stat, access, constants, readFile, writeFile, readdir, mkdir, unlink } from 'fs/promises';
|
|
3
|
-
import {
|
|
3
|
+
import { basename, join, dirname } from 'path';
|
|
4
4
|
import spaceTrim, { spaceTrim as spaceTrim$1 } from 'spacetrim';
|
|
5
5
|
import { format } from 'prettier';
|
|
6
6
|
import parserHtml from 'prettier/parser-html';
|
|
7
7
|
import { forTime } from 'waitasecond';
|
|
8
8
|
import { unparse, parse } from 'papaparse';
|
|
9
|
-
import { SHA256 } from 'crypto-js';
|
|
10
9
|
import hexEncoder from 'crypto-js/enc-hex';
|
|
10
|
+
import sha256 from 'crypto-js/sha256';
|
|
11
|
+
import { SHA256 } from 'crypto-js';
|
|
11
12
|
import { lookup } from 'mime-types';
|
|
12
13
|
import { spawn } from 'child_process';
|
|
13
14
|
import * as dotenv from 'dotenv';
|
|
14
|
-
import sha256 from 'crypto-js/sha256';
|
|
15
15
|
|
|
16
16
|
// ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten
|
|
17
17
|
/**
|
|
@@ -27,7 +27,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
27
27
|
* @generated
|
|
28
28
|
* @see https://github.com/webgptorg/promptbook
|
|
29
29
|
*/
|
|
30
|
-
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-
|
|
30
|
+
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-12';
|
|
31
31
|
/**
|
|
32
32
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
33
33
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -262,6 +262,15 @@ var DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [🤹♂️]
|
|
|
262
262
|
* @public exported from `@promptbook/core`
|
|
263
263
|
*/
|
|
264
264
|
var DEFAULT_MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹♂️]
|
|
265
|
+
// <- TODO: [🕝] Make also `BOOKS_DIRNAME_ALTERNATIVES`
|
|
266
|
+
/**
|
|
267
|
+
* Where to store the temporary downloads
|
|
268
|
+
*
|
|
269
|
+
* Note: When the folder does not exist, it is created recursively
|
|
270
|
+
*
|
|
271
|
+
* @public exported from `@promptbook/core`
|
|
272
|
+
*/
|
|
273
|
+
var DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
|
|
265
274
|
/**
|
|
266
275
|
* Where to store the scrape cache
|
|
267
276
|
*
|
|
@@ -5396,6 +5405,15 @@ function knowledgeSourceContentToName(knowledgeSourceContent) {
|
|
|
5396
5405
|
* TODO: [🐱🐉][🧠] Make some smart crop NOT source-i-m-pavol-a-develop-... BUT source-i-m-pavol-a-developer-...
|
|
5397
5406
|
*/
|
|
5398
5407
|
|
|
5408
|
+
/**
|
|
5409
|
+
* @@@
|
|
5410
|
+
*
|
|
5411
|
+
* @private for `FileCacheStorage`
|
|
5412
|
+
*/
|
|
5413
|
+
function nameToSubfolderPath(name) {
|
|
5414
|
+
return [name.substr(0, 1).toLowerCase(), name.substr(1, 1).toLowerCase()];
|
|
5415
|
+
}
|
|
5416
|
+
|
|
5399
5417
|
/**
|
|
5400
5418
|
* Convert file extension to mime type
|
|
5401
5419
|
*
|
|
@@ -5451,6 +5469,46 @@ function isFileExisting(filename, fs) {
|
|
|
5451
5469
|
* TODO: [🖇] What about symlinks?
|
|
5452
5470
|
*/
|
|
5453
5471
|
|
|
5472
|
+
/**
|
|
5473
|
+
* Removes emojis from a string and fix whitespaces
|
|
5474
|
+
*
|
|
5475
|
+
* @param text with emojis
|
|
5476
|
+
* @returns text without emojis
|
|
5477
|
+
* @public exported from `@promptbook/utils`
|
|
5478
|
+
*/
|
|
5479
|
+
function removeEmojis(text) {
|
|
5480
|
+
// Replace emojis (and also ZWJ sequence) with hyphens
|
|
5481
|
+
text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
|
|
5482
|
+
text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
|
|
5483
|
+
text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
|
|
5484
|
+
text = text.replace(/\p{Extended_Pictographic}/gu, '');
|
|
5485
|
+
return text;
|
|
5486
|
+
}
|
|
5487
|
+
|
|
5488
|
+
/**
|
|
5489
|
+
* @@@
|
|
5490
|
+
*
|
|
5491
|
+
* @param value @@@
|
|
5492
|
+
* @returns @@@
|
|
5493
|
+
* @example @@@
|
|
5494
|
+
* @public exported from `@promptbook/utils`
|
|
5495
|
+
*/
|
|
5496
|
+
function titleToName(value) {
|
|
5497
|
+
if (isValidUrl(value)) {
|
|
5498
|
+
value = value.replace(/^https?:\/\//, '');
|
|
5499
|
+
value = value.replace(/\.html$/, '');
|
|
5500
|
+
}
|
|
5501
|
+
else if (isValidFilePath(value)) {
|
|
5502
|
+
value = basename(value);
|
|
5503
|
+
// Note: Keeping extension in the name
|
|
5504
|
+
}
|
|
5505
|
+
value = value.split('/').join('-');
|
|
5506
|
+
value = removeEmojis(value);
|
|
5507
|
+
value = normalizeToKebabCase(value);
|
|
5508
|
+
// TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
|
|
5509
|
+
return value;
|
|
5510
|
+
}
|
|
5511
|
+
|
|
5454
5512
|
/**
|
|
5455
5513
|
* The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
|
|
5456
5514
|
*
|
|
@@ -5486,10 +5544,11 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
|
|
|
5486
5544
|
function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
5487
5545
|
var _a;
|
|
5488
5546
|
return __awaiter(this, void 0, void 0, function () {
|
|
5489
|
-
var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, filename_1, fileExtension, mimeType;
|
|
5490
|
-
return __generator(this, function (
|
|
5491
|
-
switch (
|
|
5547
|
+
var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
|
|
5548
|
+
return __generator(this, function (_l) {
|
|
5549
|
+
switch (_l.label) {
|
|
5492
5550
|
case 0:
|
|
5551
|
+
console.log('!!! makeKnowledgeSourceHandler', knowledgeSource);
|
|
5493
5552
|
_b = tools.fetch, fetch = _b === void 0 ? scraperFetch : _b;
|
|
5494
5553
|
knowledgeSourceContent = knowledgeSource.knowledgeSourceContent;
|
|
5495
5554
|
name = knowledgeSource.name;
|
|
@@ -5497,54 +5556,74 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
5497
5556
|
if (!name) {
|
|
5498
5557
|
name = knowledgeSourceContentToName(knowledgeSourceContent);
|
|
5499
5558
|
}
|
|
5500
|
-
if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/,
|
|
5559
|
+
if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/, 5];
|
|
5501
5560
|
url = knowledgeSourceContent;
|
|
5502
5561
|
return [4 /*yield*/, fetch(url)];
|
|
5503
5562
|
case 1:
|
|
5504
|
-
response_1 =
|
|
5563
|
+
response_1 = _l.sent();
|
|
5505
5564
|
mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
|
|
5506
|
-
|
|
5507
|
-
|
|
5508
|
-
|
|
5509
|
-
|
|
5510
|
-
|
|
5511
|
-
|
|
5512
|
-
|
|
5513
|
-
|
|
5514
|
-
|
|
5515
|
-
|
|
5516
|
-
|
|
5517
|
-
|
|
5518
|
-
|
|
5519
|
-
|
|
5520
|
-
|
|
5521
|
-
|
|
5522
|
-
|
|
5523
|
-
|
|
5524
|
-
|
|
5525
|
-
|
|
5526
|
-
|
|
5527
|
-
|
|
5528
|
-
|
|
5565
|
+
if (tools.fs === undefined || !url.endsWith('.pdf')) {
|
|
5566
|
+
return [2 /*return*/, {
|
|
5567
|
+
source: name,
|
|
5568
|
+
filename: null,
|
|
5569
|
+
url: url,
|
|
5570
|
+
mimeType: mimeType,
|
|
5571
|
+
/*
|
|
5572
|
+
TODO: [🥽]
|
|
5573
|
+
> async asBlob() {
|
|
5574
|
+
> // TODO: [👨🏻🤝👨🏻] This can be called multiple times BUT when called second time, response in already consumed
|
|
5575
|
+
> const content = await response.blob();
|
|
5576
|
+
> return content;
|
|
5577
|
+
> },
|
|
5578
|
+
*/
|
|
5579
|
+
asJson: function () {
|
|
5580
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
5581
|
+
var content;
|
|
5582
|
+
return __generator(this, function (_a) {
|
|
5583
|
+
switch (_a.label) {
|
|
5584
|
+
case 0: return [4 /*yield*/, response_1.json()];
|
|
5585
|
+
case 1:
|
|
5586
|
+
content = _a.sent();
|
|
5587
|
+
return [2 /*return*/, content];
|
|
5588
|
+
}
|
|
5589
|
+
});
|
|
5529
5590
|
});
|
|
5530
|
-
}
|
|
5531
|
-
|
|
5532
|
-
|
|
5533
|
-
|
|
5534
|
-
|
|
5535
|
-
|
|
5536
|
-
|
|
5537
|
-
|
|
5538
|
-
|
|
5539
|
-
|
|
5540
|
-
|
|
5541
|
-
}
|
|
5591
|
+
},
|
|
5592
|
+
asText: function () {
|
|
5593
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
5594
|
+
var content;
|
|
5595
|
+
return __generator(this, function (_a) {
|
|
5596
|
+
switch (_a.label) {
|
|
5597
|
+
case 0: return [4 /*yield*/, response_1.text()];
|
|
5598
|
+
case 1:
|
|
5599
|
+
content = _a.sent();
|
|
5600
|
+
return [2 /*return*/, content];
|
|
5601
|
+
}
|
|
5602
|
+
});
|
|
5542
5603
|
});
|
|
5543
|
-
}
|
|
5544
|
-
}
|
|
5545
|
-
|
|
5604
|
+
},
|
|
5605
|
+
}];
|
|
5606
|
+
}
|
|
5607
|
+
basename = url.split('/').pop() || titleToName(url);
|
|
5608
|
+
hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
|
|
5609
|
+
rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
|
|
5610
|
+
filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
|
|
5611
|
+
return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
|
|
5546
5612
|
case 2:
|
|
5547
|
-
|
|
5613
|
+
_l.sent();
|
|
5614
|
+
_g = (_f = tools.fs).writeFile;
|
|
5615
|
+
_h = [join(rootDirname_1, filepath)];
|
|
5616
|
+
_k = (_j = Buffer).from;
|
|
5617
|
+
return [4 /*yield*/, response_1.arrayBuffer()];
|
|
5618
|
+
case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
|
|
5619
|
+
case 4:
|
|
5620
|
+
_l.sent();
|
|
5621
|
+
// TODO: !!!!!!!! Check the file security
|
|
5622
|
+
// TODO: !!!!!!!! Check the file size (if it is not too big)
|
|
5623
|
+
// TODO: !!!!!!!! Delete the file
|
|
5624
|
+
return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
|
|
5625
|
+
case 5:
|
|
5626
|
+
if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
|
|
5548
5627
|
if (tools.fs === undefined) {
|
|
5549
5628
|
throw new EnvironmentMismatchError('Can not import file knowledge without filesystem tools');
|
|
5550
5629
|
// <- TODO: [🧠] What is the best error type here`
|
|
@@ -5557,8 +5636,8 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
5557
5636
|
fileExtension = getFileExtension(filename_1);
|
|
5558
5637
|
mimeType = extensionToMimeType(fileExtension || '');
|
|
5559
5638
|
return [4 /*yield*/, isFileExisting(filename_1, tools.fs)];
|
|
5560
|
-
case
|
|
5561
|
-
if (!(
|
|
5639
|
+
case 6:
|
|
5640
|
+
if (!(_l.sent())) {
|
|
5562
5641
|
throw new NotFoundError(spaceTrim(function (block) { return "\n Can not make source handler for file which does not exist:\n\n File:\n ".concat(block(knowledgeSourceContent), "\n\n Full file path:\n ").concat(block(filename_1), "\n "); }));
|
|
5563
5642
|
}
|
|
5564
5643
|
// TODO: [🧠][😿] Test security file - file is scoped to the project (BUT maybe do this in `filesystemTools`)
|
|
@@ -5604,7 +5683,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
5604
5683
|
});
|
|
5605
5684
|
},
|
|
5606
5685
|
}];
|
|
5607
|
-
case
|
|
5686
|
+
case 7: return [2 /*return*/, {
|
|
5608
5687
|
source: name,
|
|
5609
5688
|
filename: null,
|
|
5610
5689
|
url: null,
|
|
@@ -6715,22 +6794,6 @@ function normalizeTo_camelCase(text, _isFirstLetterCapital) {
|
|
|
6715
6794
|
* TODO: [🌺] Use some intermediate util splitWords
|
|
6716
6795
|
*/
|
|
6717
6796
|
|
|
6718
|
-
/**
|
|
6719
|
-
* Removes emojis from a string and fix whitespaces
|
|
6720
|
-
*
|
|
6721
|
-
* @param text with emojis
|
|
6722
|
-
* @returns text without emojis
|
|
6723
|
-
* @public exported from `@promptbook/utils`
|
|
6724
|
-
*/
|
|
6725
|
-
function removeEmojis(text) {
|
|
6726
|
-
// Replace emojis (and also ZWJ sequence) with hyphens
|
|
6727
|
-
text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
|
|
6728
|
-
text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
|
|
6729
|
-
text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
|
|
6730
|
-
text = text.replace(/\p{Extended_Pictographic}/gu, '');
|
|
6731
|
-
return text;
|
|
6732
|
-
}
|
|
6733
|
-
|
|
6734
6797
|
/**
|
|
6735
6798
|
* Removes quotes from a string
|
|
6736
6799
|
*
|
|
@@ -8922,30 +8985,6 @@ function flattenMarkdown(markdown) {
|
|
|
8922
8985
|
* NOW we are working just with markdown string and its good enough
|
|
8923
8986
|
*/
|
|
8924
8987
|
|
|
8925
|
-
/**
|
|
8926
|
-
* @@@
|
|
8927
|
-
*
|
|
8928
|
-
* @param value @@@
|
|
8929
|
-
* @returns @@@
|
|
8930
|
-
* @example @@@
|
|
8931
|
-
* @public exported from `@promptbook/utils`
|
|
8932
|
-
*/
|
|
8933
|
-
function titleToName(value) {
|
|
8934
|
-
if (isValidUrl(value)) {
|
|
8935
|
-
value = value.replace(/^https?:\/\//, '');
|
|
8936
|
-
value = value.replace(/\.html$/, '');
|
|
8937
|
-
}
|
|
8938
|
-
else if (isValidFilePath(value)) {
|
|
8939
|
-
value = basename(value);
|
|
8940
|
-
// Note: Keeping extension in the name
|
|
8941
|
-
}
|
|
8942
|
-
value = value.split('/').join('-');
|
|
8943
|
-
value = removeEmojis(value);
|
|
8944
|
-
value = normalizeToKebabCase(value);
|
|
8945
|
-
// TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
|
|
8946
|
-
return value;
|
|
8947
|
-
}
|
|
8948
|
-
|
|
8949
8988
|
/**
|
|
8950
8989
|
* Compile pipeline from string (markdown) format to JSON format synchronously
|
|
8951
8990
|
*
|
|
@@ -9678,6 +9717,7 @@ function $provideFilesystemForNode(options) {
|
|
|
9678
9717
|
readFile: readFile,
|
|
9679
9718
|
writeFile: writeFile,
|
|
9680
9719
|
readdir: readdir,
|
|
9720
|
+
mkdir: mkdir,
|
|
9681
9721
|
};
|
|
9682
9722
|
}
|
|
9683
9723
|
/**
|
|
@@ -11296,15 +11336,6 @@ function stringifyPipelineJson(pipeline) {
|
|
|
11296
11336
|
* TODO: [🍙] Make some standard order of json properties
|
|
11297
11337
|
*/
|
|
11298
11338
|
|
|
11299
|
-
/**
|
|
11300
|
-
* @@@
|
|
11301
|
-
*
|
|
11302
|
-
* @private for `FileCacheStorage`
|
|
11303
|
-
*/
|
|
11304
|
-
function nameToSubfolderPath(name) {
|
|
11305
|
-
return [name.substr(0, 1).toLowerCase(), name.substr(1, 1).toLowerCase()];
|
|
11306
|
-
}
|
|
11307
|
-
|
|
11308
11339
|
/**
|
|
11309
11340
|
* @@@
|
|
11310
11341
|
*
|