@promptbook/markdown-utils 0.84.0-12 → 0.84.0-13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +89 -47
- package/esm/index.es.js.map +1 -1
- package/package.json +1 -1
- package/umd/index.umd.js +89 -47
- package/umd/index.umd.js.map +1 -1
package/esm/index.es.js
CHANGED
|
@@ -23,7 +23,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
23
23
|
* @generated
|
|
24
24
|
* @see https://github.com/webgptorg/promptbook
|
|
25
25
|
*/
|
|
26
|
-
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-
|
|
26
|
+
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-12';
|
|
27
27
|
/**
|
|
28
28
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
29
29
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -3206,22 +3206,6 @@ function $registeredScrapersMessage(availableScrapers) {
|
|
|
3206
3206
|
* TODO: [®] DRY Register logic
|
|
3207
3207
|
*/
|
|
3208
3208
|
|
|
3209
|
-
/**
|
|
3210
|
-
* Removes emojis from a string and fix whitespaces
|
|
3211
|
-
*
|
|
3212
|
-
* @param text with emojis
|
|
3213
|
-
* @returns text without emojis
|
|
3214
|
-
* @public exported from `@promptbook/utils`
|
|
3215
|
-
*/
|
|
3216
|
-
function removeEmojis(text) {
|
|
3217
|
-
// Replace emojis (and also ZWJ sequence) with hyphens
|
|
3218
|
-
text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
|
|
3219
|
-
text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
|
|
3220
|
-
text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
|
|
3221
|
-
text = text.replace(/\p{Extended_Pictographic}/gu, '');
|
|
3222
|
-
return text;
|
|
3223
|
-
}
|
|
3224
|
-
|
|
3225
3209
|
var defaultDiacriticsRemovalMap = [
|
|
3226
3210
|
{
|
|
3227
3211
|
base: 'A',
|
|
@@ -3545,30 +3529,6 @@ function normalizeToKebabCase(text) {
|
|
|
3545
3529
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
3546
3530
|
*/
|
|
3547
3531
|
|
|
3548
|
-
/**
|
|
3549
|
-
* @@@
|
|
3550
|
-
*
|
|
3551
|
-
* @param value @@@
|
|
3552
|
-
* @returns @@@
|
|
3553
|
-
* @example @@@
|
|
3554
|
-
* @public exported from `@promptbook/utils`
|
|
3555
|
-
*/
|
|
3556
|
-
function titleToName(value) {
|
|
3557
|
-
if (isValidUrl(value)) {
|
|
3558
|
-
value = value.replace(/^https?:\/\//, '');
|
|
3559
|
-
value = value.replace(/\.html$/, '');
|
|
3560
|
-
}
|
|
3561
|
-
else if (isValidFilePath(value)) {
|
|
3562
|
-
value = basename(value);
|
|
3563
|
-
// Note: Keeping extension in the name
|
|
3564
|
-
}
|
|
3565
|
-
value = value.split('/').join('-');
|
|
3566
|
-
value = removeEmojis(value);
|
|
3567
|
-
value = normalizeToKebabCase(value);
|
|
3568
|
-
// TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
|
|
3569
|
-
return value;
|
|
3570
|
-
}
|
|
3571
|
-
|
|
3572
3532
|
/**
|
|
3573
3533
|
* Creates unique name for the source
|
|
3574
3534
|
*
|
|
@@ -3654,6 +3614,46 @@ function isFileExisting(filename, fs) {
|
|
|
3654
3614
|
* TODO: [🖇] What about symlinks?
|
|
3655
3615
|
*/
|
|
3656
3616
|
|
|
3617
|
+
/**
|
|
3618
|
+
* Removes emojis from a string and fix whitespaces
|
|
3619
|
+
*
|
|
3620
|
+
* @param text with emojis
|
|
3621
|
+
* @returns text without emojis
|
|
3622
|
+
* @public exported from `@promptbook/utils`
|
|
3623
|
+
*/
|
|
3624
|
+
function removeEmojis(text) {
|
|
3625
|
+
// Replace emojis (and also ZWJ sequence) with hyphens
|
|
3626
|
+
text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
|
|
3627
|
+
text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
|
|
3628
|
+
text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
|
|
3629
|
+
text = text.replace(/\p{Extended_Pictographic}/gu, '');
|
|
3630
|
+
return text;
|
|
3631
|
+
}
|
|
3632
|
+
|
|
3633
|
+
/**
|
|
3634
|
+
* @@@
|
|
3635
|
+
*
|
|
3636
|
+
* @param value @@@
|
|
3637
|
+
* @returns @@@
|
|
3638
|
+
* @example @@@
|
|
3639
|
+
* @public exported from `@promptbook/utils`
|
|
3640
|
+
*/
|
|
3641
|
+
function titleToName(value) {
|
|
3642
|
+
if (isValidUrl(value)) {
|
|
3643
|
+
value = value.replace(/^https?:\/\//, '');
|
|
3644
|
+
value = value.replace(/\.html$/, '');
|
|
3645
|
+
}
|
|
3646
|
+
else if (isValidFilePath(value)) {
|
|
3647
|
+
value = basename(value);
|
|
3648
|
+
// Note: Keeping extension in the name
|
|
3649
|
+
}
|
|
3650
|
+
value = value.split('/').join('-');
|
|
3651
|
+
value = removeEmojis(value);
|
|
3652
|
+
value = normalizeToKebabCase(value);
|
|
3653
|
+
// TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
|
|
3654
|
+
return value;
|
|
3655
|
+
}
|
|
3656
|
+
|
|
3657
3657
|
/**
|
|
3658
3658
|
* The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
|
|
3659
3659
|
*
|
|
@@ -3689,7 +3689,7 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
|
|
|
3689
3689
|
function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
3690
3690
|
var _a;
|
|
3691
3691
|
return __awaiter(this, void 0, void 0, function () {
|
|
3692
|
-
var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url,
|
|
3692
|
+
var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
|
|
3693
3693
|
return __generator(this, function (_l) {
|
|
3694
3694
|
switch (_l.label) {
|
|
3695
3695
|
case 0:
|
|
@@ -3705,19 +3705,61 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3705
3705
|
url = knowledgeSourceContent;
|
|
3706
3706
|
return [4 /*yield*/, fetch(url)];
|
|
3707
3707
|
case 1:
|
|
3708
|
-
|
|
3709
|
-
mimeType = ((_a =
|
|
3710
|
-
|
|
3708
|
+
response_1 = _l.sent();
|
|
3709
|
+
mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
|
|
3710
|
+
if (tools.fs === undefined || !url.endsWith('.pdf')) {
|
|
3711
|
+
return [2 /*return*/, {
|
|
3712
|
+
source: name,
|
|
3713
|
+
filename: null,
|
|
3714
|
+
url: url,
|
|
3715
|
+
mimeType: mimeType,
|
|
3716
|
+
/*
|
|
3717
|
+
TODO: [🥽]
|
|
3718
|
+
> async asBlob() {
|
|
3719
|
+
> // TODO: [👨🏻🤝👨🏻] This can be called multiple times BUT when called second time, response in already consumed
|
|
3720
|
+
> const content = await response.blob();
|
|
3721
|
+
> return content;
|
|
3722
|
+
> },
|
|
3723
|
+
*/
|
|
3724
|
+
asJson: function () {
|
|
3725
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
3726
|
+
var content;
|
|
3727
|
+
return __generator(this, function (_a) {
|
|
3728
|
+
switch (_a.label) {
|
|
3729
|
+
case 0: return [4 /*yield*/, response_1.json()];
|
|
3730
|
+
case 1:
|
|
3731
|
+
content = _a.sent();
|
|
3732
|
+
return [2 /*return*/, content];
|
|
3733
|
+
}
|
|
3734
|
+
});
|
|
3735
|
+
});
|
|
3736
|
+
},
|
|
3737
|
+
asText: function () {
|
|
3738
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
3739
|
+
var content;
|
|
3740
|
+
return __generator(this, function (_a) {
|
|
3741
|
+
switch (_a.label) {
|
|
3742
|
+
case 0: return [4 /*yield*/, response_1.text()];
|
|
3743
|
+
case 1:
|
|
3744
|
+
content = _a.sent();
|
|
3745
|
+
return [2 /*return*/, content];
|
|
3746
|
+
}
|
|
3747
|
+
});
|
|
3748
|
+
});
|
|
3749
|
+
},
|
|
3750
|
+
}];
|
|
3751
|
+
}
|
|
3752
|
+
basename = url.split('/').pop() || titleToName(url);
|
|
3711
3753
|
hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
|
|
3712
3754
|
rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
|
|
3713
|
-
filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(
|
|
3755
|
+
filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
|
|
3714
3756
|
return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
|
|
3715
3757
|
case 2:
|
|
3716
3758
|
_l.sent();
|
|
3717
3759
|
_g = (_f = tools.fs).writeFile;
|
|
3718
3760
|
_h = [join(rootDirname_1, filepath)];
|
|
3719
3761
|
_k = (_j = Buffer).from;
|
|
3720
|
-
return [4 /*yield*/,
|
|
3762
|
+
return [4 /*yield*/, response_1.arrayBuffer()];
|
|
3721
3763
|
case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
|
|
3722
3764
|
case 4:
|
|
3723
3765
|
_l.sent();
|