@promptbook/markdown-utils 0.84.0-12 → 0.84.0-13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -23,7 +23,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
23
23
  * @generated
24
24
  * @see https://github.com/webgptorg/promptbook
25
25
  */
26
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-11';
26
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-12';
27
27
  /**
28
28
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
29
29
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -3206,22 +3206,6 @@ function $registeredScrapersMessage(availableScrapers) {
3206
3206
  * TODO: [®] DRY Register logic
3207
3207
  */
3208
3208
 
3209
- /**
3210
- * Removes emojis from a string and fix whitespaces
3211
- *
3212
- * @param text with emojis
3213
- * @returns text without emojis
3214
- * @public exported from `@promptbook/utils`
3215
- */
3216
- function removeEmojis(text) {
3217
- // Replace emojis (and also ZWJ sequence) with hyphens
3218
- text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
3219
- text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
3220
- text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
3221
- text = text.replace(/\p{Extended_Pictographic}/gu, '');
3222
- return text;
3223
- }
3224
-
3225
3209
  var defaultDiacriticsRemovalMap = [
3226
3210
  {
3227
3211
  base: 'A',
@@ -3545,30 +3529,6 @@ function normalizeToKebabCase(text) {
3545
3529
  * Note: [💞] Ignore a discrepancy between file name and entity name
3546
3530
  */
3547
3531
 
3548
- /**
3549
- * @@@
3550
- *
3551
- * @param value @@@
3552
- * @returns @@@
3553
- * @example @@@
3554
- * @public exported from `@promptbook/utils`
3555
- */
3556
- function titleToName(value) {
3557
- if (isValidUrl(value)) {
3558
- value = value.replace(/^https?:\/\//, '');
3559
- value = value.replace(/\.html$/, '');
3560
- }
3561
- else if (isValidFilePath(value)) {
3562
- value = basename(value);
3563
- // Note: Keeping extension in the name
3564
- }
3565
- value = value.split('/').join('-');
3566
- value = removeEmojis(value);
3567
- value = normalizeToKebabCase(value);
3568
- // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
3569
- return value;
3570
- }
3571
-
3572
3532
  /**
3573
3533
  * Creates unique name for the source
3574
3534
  *
@@ -3654,6 +3614,46 @@ function isFileExisting(filename, fs) {
3654
3614
  * TODO: [🖇] What about symlinks?
3655
3615
  */
3656
3616
 
3617
+ /**
3618
+ * Removes emojis from a string and fix whitespaces
3619
+ *
3620
+ * @param text with emojis
3621
+ * @returns text without emojis
3622
+ * @public exported from `@promptbook/utils`
3623
+ */
3624
+ function removeEmojis(text) {
3625
+ // Replace emojis (and also ZWJ sequence) with hyphens
3626
+ text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
3627
+ text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
3628
+ text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
3629
+ text = text.replace(/\p{Extended_Pictographic}/gu, '');
3630
+ return text;
3631
+ }
3632
+
3633
+ /**
3634
+ * @@@
3635
+ *
3636
+ * @param value @@@
3637
+ * @returns @@@
3638
+ * @example @@@
3639
+ * @public exported from `@promptbook/utils`
3640
+ */
3641
+ function titleToName(value) {
3642
+ if (isValidUrl(value)) {
3643
+ value = value.replace(/^https?:\/\//, '');
3644
+ value = value.replace(/\.html$/, '');
3645
+ }
3646
+ else if (isValidFilePath(value)) {
3647
+ value = basename(value);
3648
+ // Note: Keeping extension in the name
3649
+ }
3650
+ value = value.split('/').join('-');
3651
+ value = removeEmojis(value);
3652
+ value = normalizeToKebabCase(value);
3653
+ // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
3654
+ return value;
3655
+ }
3656
+
3657
3657
  /**
3658
3658
  * The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
3659
3659
  *
@@ -3689,7 +3689,7 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
3689
3689
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3690
3690
  var _a;
3691
3691
  return __awaiter(this, void 0, void 0, function () {
3692
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response, mimeType, filename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
3692
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
3693
3693
  return __generator(this, function (_l) {
3694
3694
  switch (_l.label) {
3695
3695
  case 0:
@@ -3705,19 +3705,61 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3705
3705
  url = knowledgeSourceContent;
3706
3706
  return [4 /*yield*/, fetch(url)];
3707
3707
  case 1:
3708
- response = _l.sent();
3709
- mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3710
- filename = url.split('/').pop() || titleToName(url);
3708
+ response_1 = _l.sent();
3709
+ mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3710
+ if (tools.fs === undefined || !url.endsWith('.pdf')) {
3711
+ return [2 /*return*/, {
3712
+ source: name,
3713
+ filename: null,
3714
+ url: url,
3715
+ mimeType: mimeType,
3716
+ /*
3717
+ TODO: [🥽]
3718
+ > async asBlob() {
3719
+ > // TODO: [👨🏻‍🤝‍👨🏻] This can be called multiple times BUT when called second time, response in already consumed
3720
+ > const content = await response.blob();
3721
+ > return content;
3722
+ > },
3723
+ */
3724
+ asJson: function () {
3725
+ return __awaiter(this, void 0, void 0, function () {
3726
+ var content;
3727
+ return __generator(this, function (_a) {
3728
+ switch (_a.label) {
3729
+ case 0: return [4 /*yield*/, response_1.json()];
3730
+ case 1:
3731
+ content = _a.sent();
3732
+ return [2 /*return*/, content];
3733
+ }
3734
+ });
3735
+ });
3736
+ },
3737
+ asText: function () {
3738
+ return __awaiter(this, void 0, void 0, function () {
3739
+ var content;
3740
+ return __generator(this, function (_a) {
3741
+ switch (_a.label) {
3742
+ case 0: return [4 /*yield*/, response_1.text()];
3743
+ case 1:
3744
+ content = _a.sent();
3745
+ return [2 /*return*/, content];
3746
+ }
3747
+ });
3748
+ });
3749
+ },
3750
+ }];
3751
+ }
3752
+ basename = url.split('/').pop() || titleToName(url);
3711
3753
  hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
3712
3754
  rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
3713
- filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(filename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
3755
+ filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
3714
3756
  return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
3715
3757
  case 2:
3716
3758
  _l.sent();
3717
3759
  _g = (_f = tools.fs).writeFile;
3718
3760
  _h = [join(rootDirname_1, filepath)];
3719
3761
  _k = (_j = Buffer).from;
3720
- return [4 /*yield*/, response.arrayBuffer()];
3762
+ return [4 /*yield*/, response_1.arrayBuffer()];
3721
3763
  case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
3722
3764
  case 4:
3723
3765
  _l.sent();