@promptbook/markdown-utils 0.84.0-12 → 0.84.0-13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@promptbook/markdown-utils",
3
- "version": "0.84.0-12",
3
+ "version": "0.84.0-13",
4
4
  "description": "It's time for a paradigm shift. The future of software in plain English, French or Latin",
5
5
  "--note-0": " <- [🐊]",
6
6
  "private": false,
package/umd/index.umd.js CHANGED
@@ -25,7 +25,7 @@
25
25
  * @generated
26
26
  * @see https://github.com/webgptorg/promptbook
27
27
  */
28
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-11';
28
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-12';
29
29
  /**
30
30
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
31
31
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -3208,22 +3208,6 @@
3208
3208
  * TODO: [®] DRY Register logic
3209
3209
  */
3210
3210
 
3211
- /**
3212
- * Removes emojis from a string and fix whitespaces
3213
- *
3214
- * @param text with emojis
3215
- * @returns text without emojis
3216
- * @public exported from `@promptbook/utils`
3217
- */
3218
- function removeEmojis(text) {
3219
- // Replace emojis (and also ZWJ sequence) with hyphens
3220
- text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
3221
- text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
3222
- text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
3223
- text = text.replace(/\p{Extended_Pictographic}/gu, '');
3224
- return text;
3225
- }
3226
-
3227
3211
  var defaultDiacriticsRemovalMap = [
3228
3212
  {
3229
3213
  base: 'A',
@@ -3547,30 +3531,6 @@
3547
3531
  * Note: [💞] Ignore a discrepancy between file name and entity name
3548
3532
  */
3549
3533
 
3550
- /**
3551
- * @@@
3552
- *
3553
- * @param value @@@
3554
- * @returns @@@
3555
- * @example @@@
3556
- * @public exported from `@promptbook/utils`
3557
- */
3558
- function titleToName(value) {
3559
- if (isValidUrl(value)) {
3560
- value = value.replace(/^https?:\/\//, '');
3561
- value = value.replace(/\.html$/, '');
3562
- }
3563
- else if (isValidFilePath(value)) {
3564
- value = path.basename(value);
3565
- // Note: Keeping extension in the name
3566
- }
3567
- value = value.split('/').join('-');
3568
- value = removeEmojis(value);
3569
- value = normalizeToKebabCase(value);
3570
- // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
3571
- return value;
3572
- }
3573
-
3574
3534
  /**
3575
3535
  * Creates unique name for the source
3576
3536
  *
@@ -3656,6 +3616,46 @@
3656
3616
  * TODO: [🖇] What about symlinks?
3657
3617
  */
3658
3618
 
3619
+ /**
3620
+ * Removes emojis from a string and fix whitespaces
3621
+ *
3622
+ * @param text with emojis
3623
+ * @returns text without emojis
3624
+ * @public exported from `@promptbook/utils`
3625
+ */
3626
+ function removeEmojis(text) {
3627
+ // Replace emojis (and also ZWJ sequence) with hyphens
3628
+ text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
3629
+ text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
3630
+ text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
3631
+ text = text.replace(/\p{Extended_Pictographic}/gu, '');
3632
+ return text;
3633
+ }
3634
+
3635
+ /**
3636
+ * @@@
3637
+ *
3638
+ * @param value @@@
3639
+ * @returns @@@
3640
+ * @example @@@
3641
+ * @public exported from `@promptbook/utils`
3642
+ */
3643
+ function titleToName(value) {
3644
+ if (isValidUrl(value)) {
3645
+ value = value.replace(/^https?:\/\//, '');
3646
+ value = value.replace(/\.html$/, '');
3647
+ }
3648
+ else if (isValidFilePath(value)) {
3649
+ value = path.basename(value);
3650
+ // Note: Keeping extension in the name
3651
+ }
3652
+ value = value.split('/').join('-');
3653
+ value = removeEmojis(value);
3654
+ value = normalizeToKebabCase(value);
3655
+ // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
3656
+ return value;
3657
+ }
3658
+
3659
3659
  /**
3660
3660
  * The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
3661
3661
  *
@@ -3691,7 +3691,7 @@
3691
3691
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3692
3692
  var _a;
3693
3693
  return __awaiter(this, void 0, void 0, function () {
3694
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response, mimeType, filename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
3694
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
3695
3695
  return __generator(this, function (_l) {
3696
3696
  switch (_l.label) {
3697
3697
  case 0:
@@ -3707,19 +3707,61 @@
3707
3707
  url = knowledgeSourceContent;
3708
3708
  return [4 /*yield*/, fetch(url)];
3709
3709
  case 1:
3710
- response = _l.sent();
3711
- mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3712
- filename = url.split('/').pop() || titleToName(url);
3710
+ response_1 = _l.sent();
3711
+ mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3712
+ if (tools.fs === undefined || !url.endsWith('.pdf')) {
3713
+ return [2 /*return*/, {
3714
+ source: name,
3715
+ filename: null,
3716
+ url: url,
3717
+ mimeType: mimeType,
3718
+ /*
3719
+ TODO: [🥽]
3720
+ > async asBlob() {
3721
+ > // TODO: [👨🏻‍🤝‍👨🏻] This can be called multiple times BUT when called second time, response in already consumed
3722
+ > const content = await response.blob();
3723
+ > return content;
3724
+ > },
3725
+ */
3726
+ asJson: function () {
3727
+ return __awaiter(this, void 0, void 0, function () {
3728
+ var content;
3729
+ return __generator(this, function (_a) {
3730
+ switch (_a.label) {
3731
+ case 0: return [4 /*yield*/, response_1.json()];
3732
+ case 1:
3733
+ content = _a.sent();
3734
+ return [2 /*return*/, content];
3735
+ }
3736
+ });
3737
+ });
3738
+ },
3739
+ asText: function () {
3740
+ return __awaiter(this, void 0, void 0, function () {
3741
+ var content;
3742
+ return __generator(this, function (_a) {
3743
+ switch (_a.label) {
3744
+ case 0: return [4 /*yield*/, response_1.text()];
3745
+ case 1:
3746
+ content = _a.sent();
3747
+ return [2 /*return*/, content];
3748
+ }
3749
+ });
3750
+ });
3751
+ },
3752
+ }];
3753
+ }
3754
+ basename = url.split('/').pop() || titleToName(url);
3713
3755
  hash = sha256__default["default"](hexEncoder__default["default"].parse(url)).toString( /* hex */);
3714
3756
  rootDirname_1 = path.join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
3715
- filepath = path.join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(filename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
3757
+ filepath = path.join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
3716
3758
  return [4 /*yield*/, tools.fs.mkdir(path.dirname(path.join(rootDirname_1, filepath)), { recursive: true })];
3717
3759
  case 2:
3718
3760
  _l.sent();
3719
3761
  _g = (_f = tools.fs).writeFile;
3720
3762
  _h = [path.join(rootDirname_1, filepath)];
3721
3763
  _k = (_j = Buffer).from;
3722
- return [4 /*yield*/, response.arrayBuffer()];
3764
+ return [4 /*yield*/, response_1.arrayBuffer()];
3723
3765
  case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
3724
3766
  case 4:
3725
3767
  _l.sent();