@promptbook/markdown-utils 0.84.0-12 → 0.84.0-14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,8 @@ import { LOGO_LIGHT_SRC } from '../config';
12
12
  import { LOGO_DARK_SRC } from '../config';
13
13
  import { DEFAULT_BOOK_TITLE } from '../config';
14
14
  import { DEFAULT_TASK_TITLE } from '../config';
15
+ import { DEFAULT_PROMPT_TASK_TITLE } from '../config';
16
+ import { DEFAULT_BOOK_OUTPUT_PARAMETER_NAME } from '../config';
15
17
  import { MAX_FILENAME_LENGTH } from '../config';
16
18
  import { DEFAULT_INTERMEDIATE_FILES_STRATEGY } from '../config';
17
19
  import { DEFAULT_MAX_PARALLEL_COUNT } from '../config';
@@ -139,6 +141,8 @@ export { LOGO_LIGHT_SRC };
139
141
  export { LOGO_DARK_SRC };
140
142
  export { DEFAULT_BOOK_TITLE };
141
143
  export { DEFAULT_TASK_TITLE };
144
+ export { DEFAULT_PROMPT_TASK_TITLE };
145
+ export { DEFAULT_BOOK_OUTPUT_PARAMETER_NAME };
142
146
  export { MAX_FILENAME_LENGTH };
143
147
  export { DEFAULT_INTERMEDIATE_FILES_STRATEGY };
144
148
  export { DEFAULT_MAX_PARALLEL_COUNT };
@@ -65,6 +65,18 @@ export declare const DEFAULT_BOOK_TITLE = "\u2728 Untitled Book";
65
65
  * @public exported from `@promptbook/core`
66
66
  */
67
67
  export declare const DEFAULT_TASK_TITLE = "Task";
68
+ /**
69
+ * When the title of the prompt task is not provided, the default title is used
70
+ *
71
+ * @public exported from `@promptbook/core`
72
+ */
73
+ export declare const DEFAULT_PROMPT_TASK_TITLE = "Prompt";
74
+ /**
75
+ * When the pipeline is flat and no name of return parameter is provided, this name is used
76
+ *
77
+ * @public exported from `@promptbook/core`
78
+ */
79
+ export declare const DEFAULT_BOOK_OUTPUT_PARAMETER_NAME = "result";
68
80
  /**
69
81
  * Warning message for the generated sections and files files
70
82
  *
@@ -0,0 +1,10 @@
1
+ import type { string_file_extension } from '../../types/typeAliases';
2
+ import type { string_mime_type } from '../../types/typeAliases';
3
+ /**
4
+ * Convert mime type to file extension
5
+ *
6
+ * Note: If the mime type is invalid, `null` is returned
7
+ *
8
+ * @private within the repository
9
+ */
10
+ export declare function mimeTypeToExtension(value: string_mime_type): string_file_extension | null;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@promptbook/markdown-utils",
3
- "version": "0.84.0-12",
3
+ "version": "0.84.0-14",
4
4
  "description": "It's time for a paradigm shift. The future of software in plain English, French or Latin",
5
5
  "--note-0": " <- [🐊]",
6
6
  "private": false,
package/umd/index.umd.js CHANGED
@@ -25,7 +25,7 @@
25
25
  * @generated
26
26
  * @see https://github.com/webgptorg/promptbook
27
27
  */
28
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-11';
28
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-13';
29
29
  /**
30
30
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
31
31
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -3208,22 +3208,6 @@
3208
3208
  * TODO: [®] DRY Register logic
3209
3209
  */
3210
3210
 
3211
- /**
3212
- * Removes emojis from a string and fix whitespaces
3213
- *
3214
- * @param text with emojis
3215
- * @returns text without emojis
3216
- * @public exported from `@promptbook/utils`
3217
- */
3218
- function removeEmojis(text) {
3219
- // Replace emojis (and also ZWJ sequence) with hyphens
3220
- text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
3221
- text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
3222
- text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
3223
- text = text.replace(/\p{Extended_Pictographic}/gu, '');
3224
- return text;
3225
- }
3226
-
3227
3211
  var defaultDiacriticsRemovalMap = [
3228
3212
  {
3229
3213
  base: 'A',
@@ -3547,30 +3531,6 @@
3547
3531
  * Note: [💞] Ignore a discrepancy between file name and entity name
3548
3532
  */
3549
3533
 
3550
- /**
3551
- * @@@
3552
- *
3553
- * @param value @@@
3554
- * @returns @@@
3555
- * @example @@@
3556
- * @public exported from `@promptbook/utils`
3557
- */
3558
- function titleToName(value) {
3559
- if (isValidUrl(value)) {
3560
- value = value.replace(/^https?:\/\//, '');
3561
- value = value.replace(/\.html$/, '');
3562
- }
3563
- else if (isValidFilePath(value)) {
3564
- value = path.basename(value);
3565
- // Note: Keeping extension in the name
3566
- }
3567
- value = value.split('/').join('-');
3568
- value = removeEmojis(value);
3569
- value = normalizeToKebabCase(value);
3570
- // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
3571
- return value;
3572
- }
3573
-
3574
3534
  /**
3575
3535
  * Creates unique name for the source
3576
3536
  *
@@ -3656,6 +3616,57 @@
3656
3616
  * TODO: [🖇] What about symlinks?
3657
3617
  */
3658
3618
 
3619
+ /**
3620
+ * Convert mime type to file extension
3621
+ *
3622
+ * Note: If the mime type is invalid, `null` is returned
3623
+ *
3624
+ * @private within the repository
3625
+ */
3626
+ function mimeTypeToExtension(value) {
3627
+ return mimeTypes.extension(value) || null;
3628
+ }
3629
+
3630
+ /**
3631
+ * Removes emojis from a string and fix whitespaces
3632
+ *
3633
+ * @param text with emojis
3634
+ * @returns text without emojis
3635
+ * @public exported from `@promptbook/utils`
3636
+ */
3637
+ function removeEmojis(text) {
3638
+ // Replace emojis (and also ZWJ sequence) with hyphens
3639
+ text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
3640
+ text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
3641
+ text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
3642
+ text = text.replace(/\p{Extended_Pictographic}/gu, '');
3643
+ return text;
3644
+ }
3645
+
3646
+ /**
3647
+ * @@@
3648
+ *
3649
+ * @param value @@@
3650
+ * @returns @@@
3651
+ * @example @@@
3652
+ * @public exported from `@promptbook/utils`
3653
+ */
3654
+ function titleToName(value) {
3655
+ if (isValidUrl(value)) {
3656
+ value = value.replace(/^https?:\/\//, '');
3657
+ value = value.replace(/\.html$/, '');
3658
+ }
3659
+ else if (isValidFilePath(value)) {
3660
+ value = path.basename(value);
3661
+ // Note: Keeping extension in the name
3662
+ }
3663
+ value = value.split('/').join('-');
3664
+ value = removeEmojis(value);
3665
+ value = normalizeToKebabCase(value);
3666
+ // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
3667
+ return value;
3668
+ }
3669
+
3659
3670
  /**
3660
3671
  * The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
3661
3672
  *
@@ -3691,7 +3702,7 @@
3691
3702
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3692
3703
  var _a;
3693
3704
  return __awaiter(this, void 0, void 0, function () {
3694
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response, mimeType, filename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
3705
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
3695
3706
  return __generator(this, function (_l) {
3696
3707
  switch (_l.label) {
3697
3708
  case 0:
@@ -3707,25 +3718,67 @@
3707
3718
  url = knowledgeSourceContent;
3708
3719
  return [4 /*yield*/, fetch(url)];
3709
3720
  case 1:
3710
- response = _l.sent();
3711
- mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3712
- filename = url.split('/').pop() || titleToName(url);
3721
+ response_1 = _l.sent();
3722
+ mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3723
+ if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [💵] */)) {
3724
+ return [2 /*return*/, {
3725
+ source: name,
3726
+ filename: null,
3727
+ url: url,
3728
+ mimeType: mimeType,
3729
+ /*
3730
+ TODO: [🥽]
3731
+ > async asBlob() {
3732
+ > // TODO: [👨🏻‍🤝‍👨🏻] This can be called multiple times BUT when called second time, response in already consumed
3733
+ > const content = await response.blob();
3734
+ > return content;
3735
+ > },
3736
+ */
3737
+ asJson: function () {
3738
+ return __awaiter(this, void 0, void 0, function () {
3739
+ var content;
3740
+ return __generator(this, function (_a) {
3741
+ switch (_a.label) {
3742
+ case 0: return [4 /*yield*/, response_1.json()];
3743
+ case 1:
3744
+ content = _a.sent();
3745
+ return [2 /*return*/, content];
3746
+ }
3747
+ });
3748
+ });
3749
+ },
3750
+ asText: function () {
3751
+ return __awaiter(this, void 0, void 0, function () {
3752
+ var content;
3753
+ return __generator(this, function (_a) {
3754
+ switch (_a.label) {
3755
+ case 0: return [4 /*yield*/, response_1.text()];
3756
+ case 1:
3757
+ content = _a.sent();
3758
+ return [2 /*return*/, content];
3759
+ }
3760
+ });
3761
+ });
3762
+ },
3763
+ }];
3764
+ }
3765
+ basename = url.split('/').pop() || titleToName(url);
3713
3766
  hash = sha256__default["default"](hexEncoder__default["default"].parse(url)).toString( /* hex */);
3714
3767
  rootDirname_1 = path.join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
3715
- filepath = path.join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(filename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
3768
+ filepath = path.join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".").concat(mimeTypeToExtension(mimeType))], false));
3716
3769
  return [4 /*yield*/, tools.fs.mkdir(path.dirname(path.join(rootDirname_1, filepath)), { recursive: true })];
3717
3770
  case 2:
3718
3771
  _l.sent();
3719
3772
  _g = (_f = tools.fs).writeFile;
3720
3773
  _h = [path.join(rootDirname_1, filepath)];
3721
3774
  _k = (_j = Buffer).from;
3722
- return [4 /*yield*/, response.arrayBuffer()];
3775
+ return [4 /*yield*/, response_1.arrayBuffer()];
3723
3776
  case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
3724
3777
  case 4:
3725
3778
  _l.sent();
3726
- // TODO: !!!!!!!! Check the file security
3779
+ // TODO: [💵] Check the file security
3727
3780
  // TODO: !!!!!!!! Check the file size (if it is not too big)
3728
- // TODO: !!!!!!!! Delete the file
3781
+ // TODO: !!!!!!!! Delete the file after the scraping is done
3729
3782
  return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
3730
3783
  case 5:
3731
3784
  if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];