@promptbook/node 0.84.0-11 → 0.84.0-13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -1,17 +1,17 @@
1
1
  import colors from 'colors';
2
2
  import { stat, access, constants, readFile, writeFile, readdir, mkdir, unlink } from 'fs/promises';
3
- import { join, basename, dirname } from 'path';
3
+ import { basename, join, dirname } from 'path';
4
4
  import spaceTrim, { spaceTrim as spaceTrim$1 } from 'spacetrim';
5
5
  import { format } from 'prettier';
6
6
  import parserHtml from 'prettier/parser-html';
7
7
  import { forTime } from 'waitasecond';
8
8
  import { unparse, parse } from 'papaparse';
9
- import { SHA256 } from 'crypto-js';
10
9
  import hexEncoder from 'crypto-js/enc-hex';
10
+ import sha256 from 'crypto-js/sha256';
11
+ import { SHA256 } from 'crypto-js';
11
12
  import { lookup } from 'mime-types';
12
13
  import { spawn } from 'child_process';
13
14
  import * as dotenv from 'dotenv';
14
- import sha256 from 'crypto-js/sha256';
15
15
 
16
16
  // ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten
17
17
  /**
@@ -27,7 +27,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
27
27
  * @generated
28
28
  * @see https://github.com/webgptorg/promptbook
29
29
  */
30
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-10';
30
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-12';
31
31
  /**
32
32
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
33
33
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -262,6 +262,15 @@ var DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [🤹‍♂️]
262
262
  * @public exported from `@promptbook/core`
263
263
  */
264
264
  var DEFAULT_MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹‍♂️]
265
+ // <- TODO: [🕝] Make also `BOOKS_DIRNAME_ALTERNATIVES`
266
+ /**
267
+ * Where to store the temporary downloads
268
+ *
269
+ * Note: When the folder does not exist, it is created recursively
270
+ *
271
+ * @public exported from `@promptbook/core`
272
+ */
273
+ var DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
265
274
  /**
266
275
  * Where to store the scrape cache
267
276
  *
@@ -5396,6 +5405,15 @@ function knowledgeSourceContentToName(knowledgeSourceContent) {
5396
5405
  * TODO: [🐱‍🐉][🧠] Make some smart crop NOT source-i-m-pavol-a-develop-... BUT source-i-m-pavol-a-developer-...
5397
5406
  */
5398
5407
 
5408
+ /**
5409
+ * @@@
5410
+ *
5411
+ * @private for `FileCacheStorage`
5412
+ */
5413
+ function nameToSubfolderPath(name) {
5414
+ return [name.substr(0, 1).toLowerCase(), name.substr(1, 1).toLowerCase()];
5415
+ }
5416
+
5399
5417
  /**
5400
5418
  * Convert file extension to mime type
5401
5419
  *
@@ -5451,6 +5469,46 @@ function isFileExisting(filename, fs) {
5451
5469
  * TODO: [🖇] What about symlinks?
5452
5470
  */
5453
5471
 
5472
+ /**
5473
+ * Removes emojis from a string and fix whitespaces
5474
+ *
5475
+ * @param text with emojis
5476
+ * @returns text without emojis
5477
+ * @public exported from `@promptbook/utils`
5478
+ */
5479
+ function removeEmojis(text) {
5480
+ // Replace emojis (and also ZWJ sequence) with hyphens
5481
+ text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
5482
+ text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
5483
+ text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
5484
+ text = text.replace(/\p{Extended_Pictographic}/gu, '');
5485
+ return text;
5486
+ }
5487
+
5488
+ /**
5489
+ * @@@
5490
+ *
5491
+ * @param value @@@
5492
+ * @returns @@@
5493
+ * @example @@@
5494
+ * @public exported from `@promptbook/utils`
5495
+ */
5496
+ function titleToName(value) {
5497
+ if (isValidUrl(value)) {
5498
+ value = value.replace(/^https?:\/\//, '');
5499
+ value = value.replace(/\.html$/, '');
5500
+ }
5501
+ else if (isValidFilePath(value)) {
5502
+ value = basename(value);
5503
+ // Note: Keeping extension in the name
5504
+ }
5505
+ value = value.split('/').join('-');
5506
+ value = removeEmojis(value);
5507
+ value = normalizeToKebabCase(value);
5508
+ // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
5509
+ return value;
5510
+ }
5511
+
5454
5512
  /**
5455
5513
  * The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
5456
5514
  *
@@ -5486,10 +5544,11 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
5486
5544
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5487
5545
  var _a;
5488
5546
  return __awaiter(this, void 0, void 0, function () {
5489
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, filename_1, fileExtension, mimeType;
5490
- return __generator(this, function (_f) {
5491
- switch (_f.label) {
5547
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
5548
+ return __generator(this, function (_l) {
5549
+ switch (_l.label) {
5492
5550
  case 0:
5551
+ console.log('!!! makeKnowledgeSourceHandler', knowledgeSource);
5493
5552
  _b = tools.fetch, fetch = _b === void 0 ? scraperFetch : _b;
5494
5553
  knowledgeSourceContent = knowledgeSource.knowledgeSourceContent;
5495
5554
  name = knowledgeSource.name;
@@ -5497,54 +5556,74 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5497
5556
  if (!name) {
5498
5557
  name = knowledgeSourceContentToName(knowledgeSourceContent);
5499
5558
  }
5500
- if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/, 2];
5559
+ if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/, 5];
5501
5560
  url = knowledgeSourceContent;
5502
5561
  return [4 /*yield*/, fetch(url)];
5503
5562
  case 1:
5504
- response_1 = _f.sent();
5563
+ response_1 = _l.sent();
5505
5564
  mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
5506
- return [2 /*return*/, {
5507
- source: name,
5508
- filename: null,
5509
- url: url,
5510
- mimeType: mimeType,
5511
- /*
5512
- TODO: [🥽]
5513
- > async asBlob() {
5514
- > // TODO: [👨🏻‍🤝‍👨🏻] This can be called multiple times BUT when called second time, response in already consumed
5515
- > const content = await response.blob();
5516
- > return content;
5517
- > },
5518
- */
5519
- asJson: function () {
5520
- return __awaiter(this, void 0, void 0, function () {
5521
- var content;
5522
- return __generator(this, function (_a) {
5523
- switch (_a.label) {
5524
- case 0: return [4 /*yield*/, response_1.json()];
5525
- case 1:
5526
- content = _a.sent();
5527
- return [2 /*return*/, content];
5528
- }
5565
+ if (tools.fs === undefined || !url.endsWith('.pdf')) {
5566
+ return [2 /*return*/, {
5567
+ source: name,
5568
+ filename: null,
5569
+ url: url,
5570
+ mimeType: mimeType,
5571
+ /*
5572
+ TODO: [🥽]
5573
+ > async asBlob() {
5574
+ > // TODO: [👨🏻‍🤝‍👨🏻] This can be called multiple times BUT when called second time, response in already consumed
5575
+ > const content = await response.blob();
5576
+ > return content;
5577
+ > },
5578
+ */
5579
+ asJson: function () {
5580
+ return __awaiter(this, void 0, void 0, function () {
5581
+ var content;
5582
+ return __generator(this, function (_a) {
5583
+ switch (_a.label) {
5584
+ case 0: return [4 /*yield*/, response_1.json()];
5585
+ case 1:
5586
+ content = _a.sent();
5587
+ return [2 /*return*/, content];
5588
+ }
5589
+ });
5529
5590
  });
5530
- });
5531
- },
5532
- asText: function () {
5533
- return __awaiter(this, void 0, void 0, function () {
5534
- var content;
5535
- return __generator(this, function (_a) {
5536
- switch (_a.label) {
5537
- case 0: return [4 /*yield*/, response_1.text()];
5538
- case 1:
5539
- content = _a.sent();
5540
- return [2 /*return*/, content];
5541
- }
5591
+ },
5592
+ asText: function () {
5593
+ return __awaiter(this, void 0, void 0, function () {
5594
+ var content;
5595
+ return __generator(this, function (_a) {
5596
+ switch (_a.label) {
5597
+ case 0: return [4 /*yield*/, response_1.text()];
5598
+ case 1:
5599
+ content = _a.sent();
5600
+ return [2 /*return*/, content];
5601
+ }
5602
+ });
5542
5603
  });
5543
- });
5544
- },
5545
- }];
5604
+ },
5605
+ }];
5606
+ }
5607
+ basename = url.split('/').pop() || titleToName(url);
5608
+ hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
5609
+ rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
5610
+ filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
5611
+ return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
5546
5612
  case 2:
5547
- if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 4];
5613
+ _l.sent();
5614
+ _g = (_f = tools.fs).writeFile;
5615
+ _h = [join(rootDirname_1, filepath)];
5616
+ _k = (_j = Buffer).from;
5617
+ return [4 /*yield*/, response_1.arrayBuffer()];
5618
+ case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
5619
+ case 4:
5620
+ _l.sent();
5621
+ // TODO: !!!!!!!! Check the file security
5622
+ // TODO: !!!!!!!! Check the file size (if it is not too big)
5623
+ // TODO: !!!!!!!! Delete the file
5624
+ return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
5625
+ case 5:
5626
+ if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
5548
5627
  if (tools.fs === undefined) {
5549
5628
  throw new EnvironmentMismatchError('Can not import file knowledge without filesystem tools');
5550
5629
  // <- TODO: [🧠] What is the best error type here`
@@ -5557,8 +5636,8 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5557
5636
  fileExtension = getFileExtension(filename_1);
5558
5637
  mimeType = extensionToMimeType(fileExtension || '');
5559
5638
  return [4 /*yield*/, isFileExisting(filename_1, tools.fs)];
5560
- case 3:
5561
- if (!(_f.sent())) {
5639
+ case 6:
5640
+ if (!(_l.sent())) {
5562
5641
  throw new NotFoundError(spaceTrim(function (block) { return "\n Can not make source handler for file which does not exist:\n\n File:\n ".concat(block(knowledgeSourceContent), "\n\n Full file path:\n ").concat(block(filename_1), "\n "); }));
5563
5642
  }
5564
5643
  // TODO: [🧠][😿] Test security file - file is scoped to the project (BUT maybe do this in `filesystemTools`)
@@ -5604,7 +5683,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5604
5683
  });
5605
5684
  },
5606
5685
  }];
5607
- case 4: return [2 /*return*/, {
5686
+ case 7: return [2 /*return*/, {
5608
5687
  source: name,
5609
5688
  filename: null,
5610
5689
  url: null,
@@ -6715,22 +6794,6 @@ function normalizeTo_camelCase(text, _isFirstLetterCapital) {
6715
6794
  * TODO: [🌺] Use some intermediate util splitWords
6716
6795
  */
6717
6796
 
6718
- /**
6719
- * Removes emojis from a string and fix whitespaces
6720
- *
6721
- * @param text with emojis
6722
- * @returns text without emojis
6723
- * @public exported from `@promptbook/utils`
6724
- */
6725
- function removeEmojis(text) {
6726
- // Replace emojis (and also ZWJ sequence) with hyphens
6727
- text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
6728
- text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
6729
- text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
6730
- text = text.replace(/\p{Extended_Pictographic}/gu, '');
6731
- return text;
6732
- }
6733
-
6734
6797
  /**
6735
6798
  * Removes quotes from a string
6736
6799
  *
@@ -8922,30 +8985,6 @@ function flattenMarkdown(markdown) {
8922
8985
  * NOW we are working just with markdown string and its good enough
8923
8986
  */
8924
8987
 
8925
- /**
8926
- * @@@
8927
- *
8928
- * @param value @@@
8929
- * @returns @@@
8930
- * @example @@@
8931
- * @public exported from `@promptbook/utils`
8932
- */
8933
- function titleToName(value) {
8934
- if (isValidUrl(value)) {
8935
- value = value.replace(/^https?:\/\//, '');
8936
- value = value.replace(/\.html$/, '');
8937
- }
8938
- else if (isValidFilePath(value)) {
8939
- value = basename(value);
8940
- // Note: Keeping extension in the name
8941
- }
8942
- value = value.split('/').join('-');
8943
- value = removeEmojis(value);
8944
- value = normalizeToKebabCase(value);
8945
- // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
8946
- return value;
8947
- }
8948
-
8949
8988
  /**
8950
8989
  * Compile pipeline from string (markdown) format to JSON format synchronously
8951
8990
  *
@@ -9678,6 +9717,7 @@ function $provideFilesystemForNode(options) {
9678
9717
  readFile: readFile,
9679
9718
  writeFile: writeFile,
9680
9719
  readdir: readdir,
9720
+ mkdir: mkdir,
9681
9721
  };
9682
9722
  }
9683
9723
  /**
@@ -11296,15 +11336,6 @@ function stringifyPipelineJson(pipeline) {
11296
11336
  * TODO: [🍙] Make some standard order of json properties
11297
11337
  */
11298
11338
 
11299
- /**
11300
- * @@@
11301
- *
11302
- * @private for `FileCacheStorage`
11303
- */
11304
- function nameToSubfolderPath(name) {
11305
- return [name.substr(0, 1).toLowerCase(), name.substr(1, 1).toLowerCase()];
11306
- }
11307
-
11308
11339
  /**
11309
11340
  * @@@
11310
11341
  *