@promptbook/markdown-utils 0.84.0-12 → 0.84.0-14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +103 -50
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +4 -0
- package/esm/typings/src/config.d.ts +12 -0
- package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.test.d.ts +1 -0
- package/esm/typings/src/utils/editable/utils/isFlatPipeline.test.d.ts +1 -0
- package/esm/typings/src/utils/files/mimeTypeToExtension.d.ts +10 -0
- package/esm/typings/src/utils/files/mimeTypeToExtension.test.d.ts +1 -0
- package/package.json +1 -1
- package/umd/index.umd.js +102 -49
- package/umd/index.umd.js.map +1 -1
|
@@ -12,6 +12,8 @@ import { LOGO_LIGHT_SRC } from '../config';
|
|
|
12
12
|
import { LOGO_DARK_SRC } from '../config';
|
|
13
13
|
import { DEFAULT_BOOK_TITLE } from '../config';
|
|
14
14
|
import { DEFAULT_TASK_TITLE } from '../config';
|
|
15
|
+
import { DEFAULT_PROMPT_TASK_TITLE } from '../config';
|
|
16
|
+
import { DEFAULT_BOOK_OUTPUT_PARAMETER_NAME } from '../config';
|
|
15
17
|
import { MAX_FILENAME_LENGTH } from '../config';
|
|
16
18
|
import { DEFAULT_INTERMEDIATE_FILES_STRATEGY } from '../config';
|
|
17
19
|
import { DEFAULT_MAX_PARALLEL_COUNT } from '../config';
|
|
@@ -139,6 +141,8 @@ export { LOGO_LIGHT_SRC };
|
|
|
139
141
|
export { LOGO_DARK_SRC };
|
|
140
142
|
export { DEFAULT_BOOK_TITLE };
|
|
141
143
|
export { DEFAULT_TASK_TITLE };
|
|
144
|
+
export { DEFAULT_PROMPT_TASK_TITLE };
|
|
145
|
+
export { DEFAULT_BOOK_OUTPUT_PARAMETER_NAME };
|
|
142
146
|
export { MAX_FILENAME_LENGTH };
|
|
143
147
|
export { DEFAULT_INTERMEDIATE_FILES_STRATEGY };
|
|
144
148
|
export { DEFAULT_MAX_PARALLEL_COUNT };
|
|
@@ -65,6 +65,18 @@ export declare const DEFAULT_BOOK_TITLE = "\u2728 Untitled Book";
|
|
|
65
65
|
* @public exported from `@promptbook/core`
|
|
66
66
|
*/
|
|
67
67
|
export declare const DEFAULT_TASK_TITLE = "Task";
|
|
68
|
+
/**
|
|
69
|
+
* When the title of the prompt task is not provided, the default title is used
|
|
70
|
+
*
|
|
71
|
+
* @public exported from `@promptbook/core`
|
|
72
|
+
*/
|
|
73
|
+
export declare const DEFAULT_PROMPT_TASK_TITLE = "Prompt";
|
|
74
|
+
/**
|
|
75
|
+
* When the pipeline is flat and no name of return parameter is provided, this name is used
|
|
76
|
+
*
|
|
77
|
+
* @public exported from `@promptbook/core`
|
|
78
|
+
*/
|
|
79
|
+
export declare const DEFAULT_BOOK_OUTPUT_PARAMETER_NAME = "result";
|
|
68
80
|
/**
|
|
69
81
|
* Warning message for the generated sections and files files
|
|
70
82
|
*
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { string_file_extension } from '../../types/typeAliases';
|
|
2
|
+
import type { string_mime_type } from '../../types/typeAliases';
|
|
3
|
+
/**
|
|
4
|
+
* Convert mime type to file extension
|
|
5
|
+
*
|
|
6
|
+
* Note: If the mime type is invalid, `null` is returned
|
|
7
|
+
*
|
|
8
|
+
* @private within the repository
|
|
9
|
+
*/
|
|
10
|
+
export declare function mimeTypeToExtension(value: string_mime_type): string_file_extension | null;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/package.json
CHANGED
package/umd/index.umd.js
CHANGED
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
* @generated
|
|
26
26
|
* @see https://github.com/webgptorg/promptbook
|
|
27
27
|
*/
|
|
28
|
-
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-
|
|
28
|
+
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-13';
|
|
29
29
|
/**
|
|
30
30
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
31
31
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -3208,22 +3208,6 @@
|
|
|
3208
3208
|
* TODO: [®] DRY Register logic
|
|
3209
3209
|
*/
|
|
3210
3210
|
|
|
3211
|
-
/**
|
|
3212
|
-
* Removes emojis from a string and fix whitespaces
|
|
3213
|
-
*
|
|
3214
|
-
* @param text with emojis
|
|
3215
|
-
* @returns text without emojis
|
|
3216
|
-
* @public exported from `@promptbook/utils`
|
|
3217
|
-
*/
|
|
3218
|
-
function removeEmojis(text) {
|
|
3219
|
-
// Replace emojis (and also ZWJ sequence) with hyphens
|
|
3220
|
-
text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
|
|
3221
|
-
text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
|
|
3222
|
-
text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
|
|
3223
|
-
text = text.replace(/\p{Extended_Pictographic}/gu, '');
|
|
3224
|
-
return text;
|
|
3225
|
-
}
|
|
3226
|
-
|
|
3227
3211
|
var defaultDiacriticsRemovalMap = [
|
|
3228
3212
|
{
|
|
3229
3213
|
base: 'A',
|
|
@@ -3547,30 +3531,6 @@
|
|
|
3547
3531
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
3548
3532
|
*/
|
|
3549
3533
|
|
|
3550
|
-
/**
|
|
3551
|
-
* @@@
|
|
3552
|
-
*
|
|
3553
|
-
* @param value @@@
|
|
3554
|
-
* @returns @@@
|
|
3555
|
-
* @example @@@
|
|
3556
|
-
* @public exported from `@promptbook/utils`
|
|
3557
|
-
*/
|
|
3558
|
-
function titleToName(value) {
|
|
3559
|
-
if (isValidUrl(value)) {
|
|
3560
|
-
value = value.replace(/^https?:\/\//, '');
|
|
3561
|
-
value = value.replace(/\.html$/, '');
|
|
3562
|
-
}
|
|
3563
|
-
else if (isValidFilePath(value)) {
|
|
3564
|
-
value = path.basename(value);
|
|
3565
|
-
// Note: Keeping extension in the name
|
|
3566
|
-
}
|
|
3567
|
-
value = value.split('/').join('-');
|
|
3568
|
-
value = removeEmojis(value);
|
|
3569
|
-
value = normalizeToKebabCase(value);
|
|
3570
|
-
// TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
|
|
3571
|
-
return value;
|
|
3572
|
-
}
|
|
3573
|
-
|
|
3574
3534
|
/**
|
|
3575
3535
|
* Creates unique name for the source
|
|
3576
3536
|
*
|
|
@@ -3656,6 +3616,57 @@
|
|
|
3656
3616
|
* TODO: [🖇] What about symlinks?
|
|
3657
3617
|
*/
|
|
3658
3618
|
|
|
3619
|
+
/**
|
|
3620
|
+
* Convert mime type to file extension
|
|
3621
|
+
*
|
|
3622
|
+
* Note: If the mime type is invalid, `null` is returned
|
|
3623
|
+
*
|
|
3624
|
+
* @private within the repository
|
|
3625
|
+
*/
|
|
3626
|
+
function mimeTypeToExtension(value) {
|
|
3627
|
+
return mimeTypes.extension(value) || null;
|
|
3628
|
+
}
|
|
3629
|
+
|
|
3630
|
+
/**
|
|
3631
|
+
* Removes emojis from a string and fix whitespaces
|
|
3632
|
+
*
|
|
3633
|
+
* @param text with emojis
|
|
3634
|
+
* @returns text without emojis
|
|
3635
|
+
* @public exported from `@promptbook/utils`
|
|
3636
|
+
*/
|
|
3637
|
+
function removeEmojis(text) {
|
|
3638
|
+
// Replace emojis (and also ZWJ sequence) with hyphens
|
|
3639
|
+
text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
|
|
3640
|
+
text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
|
|
3641
|
+
text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
|
|
3642
|
+
text = text.replace(/\p{Extended_Pictographic}/gu, '');
|
|
3643
|
+
return text;
|
|
3644
|
+
}
|
|
3645
|
+
|
|
3646
|
+
/**
|
|
3647
|
+
* @@@
|
|
3648
|
+
*
|
|
3649
|
+
* @param value @@@
|
|
3650
|
+
* @returns @@@
|
|
3651
|
+
* @example @@@
|
|
3652
|
+
* @public exported from `@promptbook/utils`
|
|
3653
|
+
*/
|
|
3654
|
+
function titleToName(value) {
|
|
3655
|
+
if (isValidUrl(value)) {
|
|
3656
|
+
value = value.replace(/^https?:\/\//, '');
|
|
3657
|
+
value = value.replace(/\.html$/, '');
|
|
3658
|
+
}
|
|
3659
|
+
else if (isValidFilePath(value)) {
|
|
3660
|
+
value = path.basename(value);
|
|
3661
|
+
// Note: Keeping extension in the name
|
|
3662
|
+
}
|
|
3663
|
+
value = value.split('/').join('-');
|
|
3664
|
+
value = removeEmojis(value);
|
|
3665
|
+
value = normalizeToKebabCase(value);
|
|
3666
|
+
// TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
|
|
3667
|
+
return value;
|
|
3668
|
+
}
|
|
3669
|
+
|
|
3659
3670
|
/**
|
|
3660
3671
|
* The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
|
|
3661
3672
|
*
|
|
@@ -3691,7 +3702,7 @@
|
|
|
3691
3702
|
function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
3692
3703
|
var _a;
|
|
3693
3704
|
return __awaiter(this, void 0, void 0, function () {
|
|
3694
|
-
var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url,
|
|
3705
|
+
var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
|
|
3695
3706
|
return __generator(this, function (_l) {
|
|
3696
3707
|
switch (_l.label) {
|
|
3697
3708
|
case 0:
|
|
@@ -3707,25 +3718,67 @@
|
|
|
3707
3718
|
url = knowledgeSourceContent;
|
|
3708
3719
|
return [4 /*yield*/, fetch(url)];
|
|
3709
3720
|
case 1:
|
|
3710
|
-
|
|
3711
|
-
mimeType = ((_a =
|
|
3712
|
-
|
|
3721
|
+
response_1 = _l.sent();
|
|
3722
|
+
mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
|
|
3723
|
+
if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [💵] */)) {
|
|
3724
|
+
return [2 /*return*/, {
|
|
3725
|
+
source: name,
|
|
3726
|
+
filename: null,
|
|
3727
|
+
url: url,
|
|
3728
|
+
mimeType: mimeType,
|
|
3729
|
+
/*
|
|
3730
|
+
TODO: [🥽]
|
|
3731
|
+
> async asBlob() {
|
|
3732
|
+
> // TODO: [👨🏻🤝👨🏻] This can be called multiple times BUT when called second time, response in already consumed
|
|
3733
|
+
> const content = await response.blob();
|
|
3734
|
+
> return content;
|
|
3735
|
+
> },
|
|
3736
|
+
*/
|
|
3737
|
+
asJson: function () {
|
|
3738
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
3739
|
+
var content;
|
|
3740
|
+
return __generator(this, function (_a) {
|
|
3741
|
+
switch (_a.label) {
|
|
3742
|
+
case 0: return [4 /*yield*/, response_1.json()];
|
|
3743
|
+
case 1:
|
|
3744
|
+
content = _a.sent();
|
|
3745
|
+
return [2 /*return*/, content];
|
|
3746
|
+
}
|
|
3747
|
+
});
|
|
3748
|
+
});
|
|
3749
|
+
},
|
|
3750
|
+
asText: function () {
|
|
3751
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
3752
|
+
var content;
|
|
3753
|
+
return __generator(this, function (_a) {
|
|
3754
|
+
switch (_a.label) {
|
|
3755
|
+
case 0: return [4 /*yield*/, response_1.text()];
|
|
3756
|
+
case 1:
|
|
3757
|
+
content = _a.sent();
|
|
3758
|
+
return [2 /*return*/, content];
|
|
3759
|
+
}
|
|
3760
|
+
});
|
|
3761
|
+
});
|
|
3762
|
+
},
|
|
3763
|
+
}];
|
|
3764
|
+
}
|
|
3765
|
+
basename = url.split('/').pop() || titleToName(url);
|
|
3713
3766
|
hash = sha256__default["default"](hexEncoder__default["default"].parse(url)).toString( /* hex */);
|
|
3714
3767
|
rootDirname_1 = path.join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
|
|
3715
|
-
filepath = path.join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(
|
|
3768
|
+
filepath = path.join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".").concat(mimeTypeToExtension(mimeType))], false));
|
|
3716
3769
|
return [4 /*yield*/, tools.fs.mkdir(path.dirname(path.join(rootDirname_1, filepath)), { recursive: true })];
|
|
3717
3770
|
case 2:
|
|
3718
3771
|
_l.sent();
|
|
3719
3772
|
_g = (_f = tools.fs).writeFile;
|
|
3720
3773
|
_h = [path.join(rootDirname_1, filepath)];
|
|
3721
3774
|
_k = (_j = Buffer).from;
|
|
3722
|
-
return [4 /*yield*/,
|
|
3775
|
+
return [4 /*yield*/, response_1.arrayBuffer()];
|
|
3723
3776
|
case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
|
|
3724
3777
|
case 4:
|
|
3725
3778
|
_l.sent();
|
|
3726
|
-
// TODO:
|
|
3779
|
+
// TODO: [💵] Check the file security
|
|
3727
3780
|
// TODO: !!!!!!!! Check the file size (if it is not too big)
|
|
3728
|
-
// TODO: !!!!!!!! Delete the file
|
|
3781
|
+
// TODO: !!!!!!!! Delete the file after the scraping is done
|
|
3729
3782
|
return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
|
|
3730
3783
|
case 5:
|
|
3731
3784
|
if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
|