@promptbook/markdown-utils 0.84.0-12 → 0.84.0-14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +103 -50
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +4 -0
- package/esm/typings/src/config.d.ts +12 -0
- package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.test.d.ts +1 -0
- package/esm/typings/src/utils/editable/utils/isFlatPipeline.test.d.ts +1 -0
- package/esm/typings/src/utils/files/mimeTypeToExtension.d.ts +10 -0
- package/esm/typings/src/utils/files/mimeTypeToExtension.test.d.ts +1 -0
- package/package.json +1 -1
- package/umd/index.umd.js +102 -49
- package/umd/index.umd.js.map +1 -1
package/esm/index.es.js
CHANGED
|
@@ -6,7 +6,7 @@ import hexEncoder from 'crypto-js/enc-hex';
|
|
|
6
6
|
import sha256 from 'crypto-js/sha256';
|
|
7
7
|
import { basename, join, dirname } from 'path';
|
|
8
8
|
import { SHA256 } from 'crypto-js';
|
|
9
|
-
import { lookup } from 'mime-types';
|
|
9
|
+
import { lookup, extension } from 'mime-types';
|
|
10
10
|
import { unparse, parse } from 'papaparse';
|
|
11
11
|
|
|
12
12
|
// ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten
|
|
@@ -23,7 +23,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
23
23
|
* @generated
|
|
24
24
|
* @see https://github.com/webgptorg/promptbook
|
|
25
25
|
*/
|
|
26
|
-
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-
|
|
26
|
+
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-13';
|
|
27
27
|
/**
|
|
28
28
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
29
29
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -3206,22 +3206,6 @@ function $registeredScrapersMessage(availableScrapers) {
|
|
|
3206
3206
|
* TODO: [®] DRY Register logic
|
|
3207
3207
|
*/
|
|
3208
3208
|
|
|
3209
|
-
/**
|
|
3210
|
-
* Removes emojis from a string and fix whitespaces
|
|
3211
|
-
*
|
|
3212
|
-
* @param text with emojis
|
|
3213
|
-
* @returns text without emojis
|
|
3214
|
-
* @public exported from `@promptbook/utils`
|
|
3215
|
-
*/
|
|
3216
|
-
function removeEmojis(text) {
|
|
3217
|
-
// Replace emojis (and also ZWJ sequence) with hyphens
|
|
3218
|
-
text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
|
|
3219
|
-
text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
|
|
3220
|
-
text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
|
|
3221
|
-
text = text.replace(/\p{Extended_Pictographic}/gu, '');
|
|
3222
|
-
return text;
|
|
3223
|
-
}
|
|
3224
|
-
|
|
3225
3209
|
var defaultDiacriticsRemovalMap = [
|
|
3226
3210
|
{
|
|
3227
3211
|
base: 'A',
|
|
@@ -3545,30 +3529,6 @@ function normalizeToKebabCase(text) {
|
|
|
3545
3529
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
3546
3530
|
*/
|
|
3547
3531
|
|
|
3548
|
-
/**
|
|
3549
|
-
* @@@
|
|
3550
|
-
*
|
|
3551
|
-
* @param value @@@
|
|
3552
|
-
* @returns @@@
|
|
3553
|
-
* @example @@@
|
|
3554
|
-
* @public exported from `@promptbook/utils`
|
|
3555
|
-
*/
|
|
3556
|
-
function titleToName(value) {
|
|
3557
|
-
if (isValidUrl(value)) {
|
|
3558
|
-
value = value.replace(/^https?:\/\//, '');
|
|
3559
|
-
value = value.replace(/\.html$/, '');
|
|
3560
|
-
}
|
|
3561
|
-
else if (isValidFilePath(value)) {
|
|
3562
|
-
value = basename(value);
|
|
3563
|
-
// Note: Keeping extension in the name
|
|
3564
|
-
}
|
|
3565
|
-
value = value.split('/').join('-');
|
|
3566
|
-
value = removeEmojis(value);
|
|
3567
|
-
value = normalizeToKebabCase(value);
|
|
3568
|
-
// TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
|
|
3569
|
-
return value;
|
|
3570
|
-
}
|
|
3571
|
-
|
|
3572
3532
|
/**
|
|
3573
3533
|
* Creates unique name for the source
|
|
3574
3534
|
*
|
|
@@ -3654,6 +3614,57 @@ function isFileExisting(filename, fs) {
|
|
|
3654
3614
|
* TODO: [🖇] What about symlinks?
|
|
3655
3615
|
*/
|
|
3656
3616
|
|
|
3617
|
+
/**
|
|
3618
|
+
* Convert mime type to file extension
|
|
3619
|
+
*
|
|
3620
|
+
* Note: If the mime type is invalid, `null` is returned
|
|
3621
|
+
*
|
|
3622
|
+
* @private within the repository
|
|
3623
|
+
*/
|
|
3624
|
+
function mimeTypeToExtension(value) {
|
|
3625
|
+
return extension(value) || null;
|
|
3626
|
+
}
|
|
3627
|
+
|
|
3628
|
+
/**
|
|
3629
|
+
* Removes emojis from a string and fix whitespaces
|
|
3630
|
+
*
|
|
3631
|
+
* @param text with emojis
|
|
3632
|
+
* @returns text without emojis
|
|
3633
|
+
* @public exported from `@promptbook/utils`
|
|
3634
|
+
*/
|
|
3635
|
+
function removeEmojis(text) {
|
|
3636
|
+
// Replace emojis (and also ZWJ sequence) with hyphens
|
|
3637
|
+
text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
|
|
3638
|
+
text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
|
|
3639
|
+
text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
|
|
3640
|
+
text = text.replace(/\p{Extended_Pictographic}/gu, '');
|
|
3641
|
+
return text;
|
|
3642
|
+
}
|
|
3643
|
+
|
|
3644
|
+
/**
|
|
3645
|
+
* @@@
|
|
3646
|
+
*
|
|
3647
|
+
* @param value @@@
|
|
3648
|
+
* @returns @@@
|
|
3649
|
+
* @example @@@
|
|
3650
|
+
* @public exported from `@promptbook/utils`
|
|
3651
|
+
*/
|
|
3652
|
+
function titleToName(value) {
|
|
3653
|
+
if (isValidUrl(value)) {
|
|
3654
|
+
value = value.replace(/^https?:\/\//, '');
|
|
3655
|
+
value = value.replace(/\.html$/, '');
|
|
3656
|
+
}
|
|
3657
|
+
else if (isValidFilePath(value)) {
|
|
3658
|
+
value = basename(value);
|
|
3659
|
+
// Note: Keeping extension in the name
|
|
3660
|
+
}
|
|
3661
|
+
value = value.split('/').join('-');
|
|
3662
|
+
value = removeEmojis(value);
|
|
3663
|
+
value = normalizeToKebabCase(value);
|
|
3664
|
+
// TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
|
|
3665
|
+
return value;
|
|
3666
|
+
}
|
|
3667
|
+
|
|
3657
3668
|
/**
|
|
3658
3669
|
* The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
|
|
3659
3670
|
*
|
|
@@ -3689,7 +3700,7 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
|
|
|
3689
3700
|
function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
3690
3701
|
var _a;
|
|
3691
3702
|
return __awaiter(this, void 0, void 0, function () {
|
|
3692
|
-
var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url,
|
|
3703
|
+
var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
|
|
3693
3704
|
return __generator(this, function (_l) {
|
|
3694
3705
|
switch (_l.label) {
|
|
3695
3706
|
case 0:
|
|
@@ -3705,25 +3716,67 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3705
3716
|
url = knowledgeSourceContent;
|
|
3706
3717
|
return [4 /*yield*/, fetch(url)];
|
|
3707
3718
|
case 1:
|
|
3708
|
-
|
|
3709
|
-
mimeType = ((_a =
|
|
3710
|
-
|
|
3719
|
+
response_1 = _l.sent();
|
|
3720
|
+
mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
|
|
3721
|
+
if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [💵] */)) {
|
|
3722
|
+
return [2 /*return*/, {
|
|
3723
|
+
source: name,
|
|
3724
|
+
filename: null,
|
|
3725
|
+
url: url,
|
|
3726
|
+
mimeType: mimeType,
|
|
3727
|
+
/*
|
|
3728
|
+
TODO: [🥽]
|
|
3729
|
+
> async asBlob() {
|
|
3730
|
+
> // TODO: [👨🏻🤝👨🏻] This can be called multiple times BUT when called second time, response in already consumed
|
|
3731
|
+
> const content = await response.blob();
|
|
3732
|
+
> return content;
|
|
3733
|
+
> },
|
|
3734
|
+
*/
|
|
3735
|
+
asJson: function () {
|
|
3736
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
3737
|
+
var content;
|
|
3738
|
+
return __generator(this, function (_a) {
|
|
3739
|
+
switch (_a.label) {
|
|
3740
|
+
case 0: return [4 /*yield*/, response_1.json()];
|
|
3741
|
+
case 1:
|
|
3742
|
+
content = _a.sent();
|
|
3743
|
+
return [2 /*return*/, content];
|
|
3744
|
+
}
|
|
3745
|
+
});
|
|
3746
|
+
});
|
|
3747
|
+
},
|
|
3748
|
+
asText: function () {
|
|
3749
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
3750
|
+
var content;
|
|
3751
|
+
return __generator(this, function (_a) {
|
|
3752
|
+
switch (_a.label) {
|
|
3753
|
+
case 0: return [4 /*yield*/, response_1.text()];
|
|
3754
|
+
case 1:
|
|
3755
|
+
content = _a.sent();
|
|
3756
|
+
return [2 /*return*/, content];
|
|
3757
|
+
}
|
|
3758
|
+
});
|
|
3759
|
+
});
|
|
3760
|
+
},
|
|
3761
|
+
}];
|
|
3762
|
+
}
|
|
3763
|
+
basename = url.split('/').pop() || titleToName(url);
|
|
3711
3764
|
hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
|
|
3712
3765
|
rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
|
|
3713
|
-
filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(
|
|
3766
|
+
filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".").concat(mimeTypeToExtension(mimeType))], false));
|
|
3714
3767
|
return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
|
|
3715
3768
|
case 2:
|
|
3716
3769
|
_l.sent();
|
|
3717
3770
|
_g = (_f = tools.fs).writeFile;
|
|
3718
3771
|
_h = [join(rootDirname_1, filepath)];
|
|
3719
3772
|
_k = (_j = Buffer).from;
|
|
3720
|
-
return [4 /*yield*/,
|
|
3773
|
+
return [4 /*yield*/, response_1.arrayBuffer()];
|
|
3721
3774
|
case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
|
|
3722
3775
|
case 4:
|
|
3723
3776
|
_l.sent();
|
|
3724
|
-
// TODO:
|
|
3777
|
+
// TODO: [💵] Check the file security
|
|
3725
3778
|
// TODO: !!!!!!!! Check the file size (if it is not too big)
|
|
3726
|
-
// TODO: !!!!!!!! Delete the file
|
|
3779
|
+
// TODO: !!!!!!!! Delete the file after the scraping is done
|
|
3727
3780
|
return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
|
|
3728
3781
|
case 5:
|
|
3729
3782
|
if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
|