@promptbook/markdown-utils 0.84.0-11 โ 0.84.0-13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +136 -90
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +4 -2
- package/esm/typings/src/config.d.ts +9 -1
- package/esm/typings/src/execution/FilesystemTools.d.ts +1 -1
- package/esm/typings/src/wizzard/wizzard.d.ts +7 -1
- package/package.json +1 -1
- package/umd/index.umd.js +138 -92
- package/umd/index.umd.js.map +1 -1
package/esm/index.es.js
CHANGED
|
@@ -2,9 +2,10 @@ import spaceTrim, { spaceTrim as spaceTrim$1 } from 'spacetrim';
|
|
|
2
2
|
import { format } from 'prettier';
|
|
3
3
|
import parserHtml from 'prettier/parser-html';
|
|
4
4
|
import { forTime } from 'waitasecond';
|
|
5
|
-
import { join, basename } from 'path';
|
|
6
|
-
import { SHA256 } from 'crypto-js';
|
|
7
5
|
import hexEncoder from 'crypto-js/enc-hex';
|
|
6
|
+
import sha256 from 'crypto-js/sha256';
|
|
7
|
+
import { basename, join, dirname } from 'path';
|
|
8
|
+
import { SHA256 } from 'crypto-js';
|
|
8
9
|
import { lookup } from 'mime-types';
|
|
9
10
|
import { unparse, parse } from 'papaparse';
|
|
10
11
|
|
|
@@ -22,7 +23,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
22
23
|
* @generated
|
|
23
24
|
* @see https://github.com/webgptorg/promptbook
|
|
24
25
|
*/
|
|
25
|
-
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-
|
|
26
|
+
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-12';
|
|
26
27
|
/**
|
|
27
28
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
28
29
|
* Note: [๐] Ignore a discrepancy between file name and entity name
|
|
@@ -808,6 +809,12 @@ var SMALL_NUMBER = 0.001;
|
|
|
808
809
|
* @private within the repository - too low-level in comparison with other `MAX_...`
|
|
809
810
|
*/
|
|
810
811
|
var IMMEDIATE_TIME = 10;
|
|
812
|
+
/**
|
|
813
|
+
* The maximum length of the (generated) filename
|
|
814
|
+
*
|
|
815
|
+
* @public exported from `@promptbook/core`
|
|
816
|
+
*/
|
|
817
|
+
var MAX_FILENAME_LENGTH = 30;
|
|
811
818
|
/**
|
|
812
819
|
* Strategy for caching the intermediate results for knowledge sources
|
|
813
820
|
*
|
|
@@ -827,6 +834,15 @@ var DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [๐คนโโ๏ธ]
|
|
|
827
834
|
* @public exported from `@promptbook/core`
|
|
828
835
|
*/
|
|
829
836
|
var DEFAULT_MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [๐คนโโ๏ธ]
|
|
837
|
+
// <- TODO: [๐] Make also `BOOKS_DIRNAME_ALTERNATIVES`
|
|
838
|
+
/**
|
|
839
|
+
* Where to store the temporary downloads
|
|
840
|
+
*
|
|
841
|
+
* Note: When the folder does not exist, it is created recursively
|
|
842
|
+
*
|
|
843
|
+
* @public exported from `@promptbook/core`
|
|
844
|
+
*/
|
|
845
|
+
var DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
|
|
830
846
|
/**
|
|
831
847
|
* Where to store the scrape cache
|
|
832
848
|
*
|
|
@@ -3534,6 +3550,15 @@ function knowledgeSourceContentToName(knowledgeSourceContent) {
|
|
|
3534
3550
|
* TODO: [๐ฑโ๐][๐ง ] Make some smart crop NOT source-i-m-pavol-a-develop-... BUT source-i-m-pavol-a-developer-...
|
|
3535
3551
|
*/
|
|
3536
3552
|
|
|
3553
|
+
/**
|
|
3554
|
+
* @@@
|
|
3555
|
+
*
|
|
3556
|
+
* @private for `FileCacheStorage`
|
|
3557
|
+
*/
|
|
3558
|
+
function nameToSubfolderPath(name) {
|
|
3559
|
+
return [name.substr(0, 1).toLowerCase(), name.substr(1, 1).toLowerCase()];
|
|
3560
|
+
}
|
|
3561
|
+
|
|
3537
3562
|
/**
|
|
3538
3563
|
* Convert file extension to mime type
|
|
3539
3564
|
*
|
|
@@ -3589,6 +3614,46 @@ function isFileExisting(filename, fs) {
|
|
|
3589
3614
|
* TODO: [๐] What about symlinks?
|
|
3590
3615
|
*/
|
|
3591
3616
|
|
|
3617
|
+
/**
|
|
3618
|
+
* Removes emojis from a string and fix whitespaces
|
|
3619
|
+
*
|
|
3620
|
+
* @param text with emojis
|
|
3621
|
+
* @returns text without emojis
|
|
3622
|
+
* @public exported from `@promptbook/utils`
|
|
3623
|
+
*/
|
|
3624
|
+
function removeEmojis(text) {
|
|
3625
|
+
// Replace emojis (and also ZWJ sequence) with hyphens
|
|
3626
|
+
text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
|
|
3627
|
+
text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
|
|
3628
|
+
text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
|
|
3629
|
+
text = text.replace(/\p{Extended_Pictographic}/gu, '');
|
|
3630
|
+
return text;
|
|
3631
|
+
}
|
|
3632
|
+
|
|
3633
|
+
/**
|
|
3634
|
+
* @@@
|
|
3635
|
+
*
|
|
3636
|
+
* @param value @@@
|
|
3637
|
+
* @returns @@@
|
|
3638
|
+
* @example @@@
|
|
3639
|
+
* @public exported from `@promptbook/utils`
|
|
3640
|
+
*/
|
|
3641
|
+
function titleToName(value) {
|
|
3642
|
+
if (isValidUrl(value)) {
|
|
3643
|
+
value = value.replace(/^https?:\/\//, '');
|
|
3644
|
+
value = value.replace(/\.html$/, '');
|
|
3645
|
+
}
|
|
3646
|
+
else if (isValidFilePath(value)) {
|
|
3647
|
+
value = basename(value);
|
|
3648
|
+
// Note: Keeping extension in the name
|
|
3649
|
+
}
|
|
3650
|
+
value = value.split('/').join('-');
|
|
3651
|
+
value = removeEmojis(value);
|
|
3652
|
+
value = normalizeToKebabCase(value);
|
|
3653
|
+
// TODO: [๐ง ] Maybe warn or add some padding to short name which are not good identifiers
|
|
3654
|
+
return value;
|
|
3655
|
+
}
|
|
3656
|
+
|
|
3592
3657
|
/**
|
|
3593
3658
|
* The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
|
|
3594
3659
|
*
|
|
@@ -3624,10 +3689,11 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
|
|
|
3624
3689
|
function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
3625
3690
|
var _a;
|
|
3626
3691
|
return __awaiter(this, void 0, void 0, function () {
|
|
3627
|
-
var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, filename_1, fileExtension, mimeType;
|
|
3628
|
-
return __generator(this, function (
|
|
3629
|
-
switch (
|
|
3692
|
+
var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
|
|
3693
|
+
return __generator(this, function (_l) {
|
|
3694
|
+
switch (_l.label) {
|
|
3630
3695
|
case 0:
|
|
3696
|
+
console.log('!!! makeKnowledgeSourceHandler', knowledgeSource);
|
|
3631
3697
|
_b = tools.fetch, fetch = _b === void 0 ? scraperFetch : _b;
|
|
3632
3698
|
knowledgeSourceContent = knowledgeSource.knowledgeSourceContent;
|
|
3633
3699
|
name = knowledgeSource.name;
|
|
@@ -3635,54 +3701,74 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3635
3701
|
if (!name) {
|
|
3636
3702
|
name = knowledgeSourceContentToName(knowledgeSourceContent);
|
|
3637
3703
|
}
|
|
3638
|
-
if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/,
|
|
3704
|
+
if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/, 5];
|
|
3639
3705
|
url = knowledgeSourceContent;
|
|
3640
3706
|
return [4 /*yield*/, fetch(url)];
|
|
3641
3707
|
case 1:
|
|
3642
|
-
response_1 =
|
|
3708
|
+
response_1 = _l.sent();
|
|
3643
3709
|
mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
|
|
3644
|
-
|
|
3645
|
-
|
|
3646
|
-
|
|
3647
|
-
|
|
3648
|
-
|
|
3649
|
-
|
|
3650
|
-
|
|
3651
|
-
|
|
3652
|
-
|
|
3653
|
-
|
|
3654
|
-
|
|
3655
|
-
|
|
3656
|
-
|
|
3657
|
-
|
|
3658
|
-
|
|
3659
|
-
|
|
3660
|
-
|
|
3661
|
-
|
|
3662
|
-
|
|
3663
|
-
|
|
3664
|
-
|
|
3665
|
-
|
|
3666
|
-
|
|
3710
|
+
if (tools.fs === undefined || !url.endsWith('.pdf')) {
|
|
3711
|
+
return [2 /*return*/, {
|
|
3712
|
+
source: name,
|
|
3713
|
+
filename: null,
|
|
3714
|
+
url: url,
|
|
3715
|
+
mimeType: mimeType,
|
|
3716
|
+
/*
|
|
3717
|
+
TODO: [๐ฅฝ]
|
|
3718
|
+
> async asBlob() {
|
|
3719
|
+
> // TODO: [๐จ๐ปโ๐คโ๐จ๐ป] This can be called multiple times BUT when called second time, response in already consumed
|
|
3720
|
+
> const content = await response.blob();
|
|
3721
|
+
> return content;
|
|
3722
|
+
> },
|
|
3723
|
+
*/
|
|
3724
|
+
asJson: function () {
|
|
3725
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
3726
|
+
var content;
|
|
3727
|
+
return __generator(this, function (_a) {
|
|
3728
|
+
switch (_a.label) {
|
|
3729
|
+
case 0: return [4 /*yield*/, response_1.json()];
|
|
3730
|
+
case 1:
|
|
3731
|
+
content = _a.sent();
|
|
3732
|
+
return [2 /*return*/, content];
|
|
3733
|
+
}
|
|
3734
|
+
});
|
|
3667
3735
|
});
|
|
3668
|
-
}
|
|
3669
|
-
|
|
3670
|
-
|
|
3671
|
-
|
|
3672
|
-
|
|
3673
|
-
|
|
3674
|
-
|
|
3675
|
-
|
|
3676
|
-
|
|
3677
|
-
|
|
3678
|
-
|
|
3679
|
-
}
|
|
3736
|
+
},
|
|
3737
|
+
asText: function () {
|
|
3738
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
3739
|
+
var content;
|
|
3740
|
+
return __generator(this, function (_a) {
|
|
3741
|
+
switch (_a.label) {
|
|
3742
|
+
case 0: return [4 /*yield*/, response_1.text()];
|
|
3743
|
+
case 1:
|
|
3744
|
+
content = _a.sent();
|
|
3745
|
+
return [2 /*return*/, content];
|
|
3746
|
+
}
|
|
3747
|
+
});
|
|
3680
3748
|
});
|
|
3681
|
-
}
|
|
3682
|
-
}
|
|
3683
|
-
|
|
3749
|
+
},
|
|
3750
|
+
}];
|
|
3751
|
+
}
|
|
3752
|
+
basename = url.split('/').pop() || titleToName(url);
|
|
3753
|
+
hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
|
|
3754
|
+
rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
|
|
3755
|
+
filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [๐] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
|
|
3756
|
+
return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
|
|
3684
3757
|
case 2:
|
|
3685
|
-
|
|
3758
|
+
_l.sent();
|
|
3759
|
+
_g = (_f = tools.fs).writeFile;
|
|
3760
|
+
_h = [join(rootDirname_1, filepath)];
|
|
3761
|
+
_k = (_j = Buffer).from;
|
|
3762
|
+
return [4 /*yield*/, response_1.arrayBuffer()];
|
|
3763
|
+
case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
|
|
3764
|
+
case 4:
|
|
3765
|
+
_l.sent();
|
|
3766
|
+
// TODO: !!!!!!!! Check the file security
|
|
3767
|
+
// TODO: !!!!!!!! Check the file size (if it is not too big)
|
|
3768
|
+
// TODO: !!!!!!!! Delete the file
|
|
3769
|
+
return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
|
|
3770
|
+
case 5:
|
|
3771
|
+
if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
|
|
3686
3772
|
if (tools.fs === undefined) {
|
|
3687
3773
|
throw new EnvironmentMismatchError('Can not import file knowledge without filesystem tools');
|
|
3688
3774
|
// <- TODO: [๐ง ] What is the best error type here`
|
|
@@ -3695,8 +3781,8 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3695
3781
|
fileExtension = getFileExtension(filename_1);
|
|
3696
3782
|
mimeType = extensionToMimeType(fileExtension || '');
|
|
3697
3783
|
return [4 /*yield*/, isFileExisting(filename_1, tools.fs)];
|
|
3698
|
-
case
|
|
3699
|
-
if (!(
|
|
3784
|
+
case 6:
|
|
3785
|
+
if (!(_l.sent())) {
|
|
3700
3786
|
throw new NotFoundError(spaceTrim(function (block) { return "\n Can not make source handler for file which does not exist:\n\n File:\n ".concat(block(knowledgeSourceContent), "\n\n Full file path:\n ").concat(block(filename_1), "\n "); }));
|
|
3701
3787
|
}
|
|
3702
3788
|
// TODO: [๐ง ][๐ฟ] Test security file - file is scoped to the project (BUT maybe do this in `filesystemTools`)
|
|
@@ -3742,7 +3828,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3742
3828
|
});
|
|
3743
3829
|
},
|
|
3744
3830
|
}];
|
|
3745
|
-
case
|
|
3831
|
+
case 7: return [2 /*return*/, {
|
|
3746
3832
|
source: name,
|
|
3747
3833
|
filename: null,
|
|
3748
3834
|
url: null,
|
|
@@ -5972,46 +6058,6 @@ function createPipelineExecutor(options) {
|
|
|
5972
6058
|
* TODO: [๐] Change onProgress to object that represents the running execution, can be subscribed via RxJS to and also awaited
|
|
5973
6059
|
*/
|
|
5974
6060
|
|
|
5975
|
-
/**
|
|
5976
|
-
* Removes emojis from a string and fix whitespaces
|
|
5977
|
-
*
|
|
5978
|
-
* @param text with emojis
|
|
5979
|
-
* @returns text without emojis
|
|
5980
|
-
* @public exported from `@promptbook/utils`
|
|
5981
|
-
*/
|
|
5982
|
-
function removeEmojis(text) {
|
|
5983
|
-
// Replace emojis (and also ZWJ sequence) with hyphens
|
|
5984
|
-
text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
|
|
5985
|
-
text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
|
|
5986
|
-
text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
|
|
5987
|
-
text = text.replace(/\p{Extended_Pictographic}/gu, '');
|
|
5988
|
-
return text;
|
|
5989
|
-
}
|
|
5990
|
-
|
|
5991
|
-
/**
|
|
5992
|
-
* @@@
|
|
5993
|
-
*
|
|
5994
|
-
* @param value @@@
|
|
5995
|
-
* @returns @@@
|
|
5996
|
-
* @example @@@
|
|
5997
|
-
* @public exported from `@promptbook/utils`
|
|
5998
|
-
*/
|
|
5999
|
-
function titleToName(value) {
|
|
6000
|
-
if (isValidUrl(value)) {
|
|
6001
|
-
value = value.replace(/^https?:\/\//, '');
|
|
6002
|
-
value = value.replace(/\.html$/, '');
|
|
6003
|
-
}
|
|
6004
|
-
else if (isValidFilePath(value)) {
|
|
6005
|
-
value = basename(value);
|
|
6006
|
-
// Note: Keeping extension in the name
|
|
6007
|
-
}
|
|
6008
|
-
value = value.split('/').join('-');
|
|
6009
|
-
value = removeEmojis(value);
|
|
6010
|
-
value = normalizeToKebabCase(value);
|
|
6011
|
-
// TODO: [๐ง ] Maybe warn or add some padding to short name which are not good identifiers
|
|
6012
|
-
return value;
|
|
6013
|
-
}
|
|
6014
|
-
|
|
6015
6061
|
/**
|
|
6016
6062
|
* Metadata of the scraper
|
|
6017
6063
|
*
|