@promptbook/node 0.84.0-12 → 0.84.0-13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -27,7 +27,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
27
27
  * @generated
28
28
  * @see https://github.com/webgptorg/promptbook
29
29
  */
30
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-11';
30
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-12';
31
31
  /**
32
32
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
33
33
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -5322,22 +5322,6 @@ function $registeredScrapersMessage(availableScrapers) {
5322
5322
  * TODO: [®] DRY Register logic
5323
5323
  */
5324
5324
 
5325
- /**
5326
- * Removes emojis from a string and fix whitespaces
5327
- *
5328
- * @param text with emojis
5329
- * @returns text without emojis
5330
- * @public exported from `@promptbook/utils`
5331
- */
5332
- function removeEmojis(text) {
5333
- // Replace emojis (and also ZWJ sequence) with hyphens
5334
- text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
5335
- text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
5336
- text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
5337
- text = text.replace(/\p{Extended_Pictographic}/gu, '');
5338
- return text;
5339
- }
5340
-
5341
5325
  /**
5342
5326
  * @@@
5343
5327
  *
@@ -5400,30 +5384,6 @@ function normalizeToKebabCase(text) {
5400
5384
  * Note: [💞] Ignore a discrepancy between file name and entity name
5401
5385
  */
5402
5386
 
5403
- /**
5404
- * @@@
5405
- *
5406
- * @param value @@@
5407
- * @returns @@@
5408
- * @example @@@
5409
- * @public exported from `@promptbook/utils`
5410
- */
5411
- function titleToName(value) {
5412
- if (isValidUrl(value)) {
5413
- value = value.replace(/^https?:\/\//, '');
5414
- value = value.replace(/\.html$/, '');
5415
- }
5416
- else if (isValidFilePath(value)) {
5417
- value = basename(value);
5418
- // Note: Keeping extension in the name
5419
- }
5420
- value = value.split('/').join('-');
5421
- value = removeEmojis(value);
5422
- value = normalizeToKebabCase(value);
5423
- // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
5424
- return value;
5425
- }
5426
-
5427
5387
  /**
5428
5388
  * Creates unique name for the source
5429
5389
  *
@@ -5509,6 +5469,46 @@ function isFileExisting(filename, fs) {
5509
5469
  * TODO: [🖇] What about symlinks?
5510
5470
  */
5511
5471
 
5472
+ /**
5473
+ * Removes emojis from a string and fix whitespaces
5474
+ *
5475
+ * @param text with emojis
5476
+ * @returns text without emojis
5477
+ * @public exported from `@promptbook/utils`
5478
+ */
5479
+ function removeEmojis(text) {
5480
+ // Replace emojis (and also ZWJ sequence) with hyphens
5481
+ text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
5482
+ text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
5483
+ text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
5484
+ text = text.replace(/\p{Extended_Pictographic}/gu, '');
5485
+ return text;
5486
+ }
5487
+
5488
+ /**
5489
+ * @@@
5490
+ *
5491
+ * @param value @@@
5492
+ * @returns @@@
5493
+ * @example @@@
5494
+ * @public exported from `@promptbook/utils`
5495
+ */
5496
+ function titleToName(value) {
5497
+ if (isValidUrl(value)) {
5498
+ value = value.replace(/^https?:\/\//, '');
5499
+ value = value.replace(/\.html$/, '');
5500
+ }
5501
+ else if (isValidFilePath(value)) {
5502
+ value = basename(value);
5503
+ // Note: Keeping extension in the name
5504
+ }
5505
+ value = value.split('/').join('-');
5506
+ value = removeEmojis(value);
5507
+ value = normalizeToKebabCase(value);
5508
+ // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
5509
+ return value;
5510
+ }
5511
+
5512
5512
  /**
5513
5513
  * The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
5514
5514
  *
@@ -5544,7 +5544,7 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
5544
5544
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5545
5545
  var _a;
5546
5546
  return __awaiter(this, void 0, void 0, function () {
5547
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response, mimeType, filename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
5547
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
5548
5548
  return __generator(this, function (_l) {
5549
5549
  switch (_l.label) {
5550
5550
  case 0:
@@ -5560,19 +5560,61 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5560
5560
  url = knowledgeSourceContent;
5561
5561
  return [4 /*yield*/, fetch(url)];
5562
5562
  case 1:
5563
- response = _l.sent();
5564
- mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
5565
- filename = url.split('/').pop() || titleToName(url);
5563
+ response_1 = _l.sent();
5564
+ mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
5565
+ if (tools.fs === undefined || !url.endsWith('.pdf')) {
5566
+ return [2 /*return*/, {
5567
+ source: name,
5568
+ filename: null,
5569
+ url: url,
5570
+ mimeType: mimeType,
5571
+ /*
5572
+ TODO: [🥽]
5573
+ > async asBlob() {
5574
+ > // TODO: [👨🏻‍🤝‍👨🏻] This can be called multiple times BUT when called second time, response in already consumed
5575
+ > const content = await response.blob();
5576
+ > return content;
5577
+ > },
5578
+ */
5579
+ asJson: function () {
5580
+ return __awaiter(this, void 0, void 0, function () {
5581
+ var content;
5582
+ return __generator(this, function (_a) {
5583
+ switch (_a.label) {
5584
+ case 0: return [4 /*yield*/, response_1.json()];
5585
+ case 1:
5586
+ content = _a.sent();
5587
+ return [2 /*return*/, content];
5588
+ }
5589
+ });
5590
+ });
5591
+ },
5592
+ asText: function () {
5593
+ return __awaiter(this, void 0, void 0, function () {
5594
+ var content;
5595
+ return __generator(this, function (_a) {
5596
+ switch (_a.label) {
5597
+ case 0: return [4 /*yield*/, response_1.text()];
5598
+ case 1:
5599
+ content = _a.sent();
5600
+ return [2 /*return*/, content];
5601
+ }
5602
+ });
5603
+ });
5604
+ },
5605
+ }];
5606
+ }
5607
+ basename = url.split('/').pop() || titleToName(url);
5566
5608
  hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
5567
5609
  rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
5568
- filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(filename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
5610
+ filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
5569
5611
  return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
5570
5612
  case 2:
5571
5613
  _l.sent();
5572
5614
  _g = (_f = tools.fs).writeFile;
5573
5615
  _h = [join(rootDirname_1, filepath)];
5574
5616
  _k = (_j = Buffer).from;
5575
- return [4 /*yield*/, response.arrayBuffer()];
5617
+ return [4 /*yield*/, response_1.arrayBuffer()];
5576
5618
  case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
5577
5619
  case 4:
5578
5620
  _l.sent();