@promptbook/node 0.84.0-11 → 0.84.0-12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -1,17 +1,17 @@
1
1
  import colors from 'colors';
2
2
  import { stat, access, constants, readFile, writeFile, readdir, mkdir, unlink } from 'fs/promises';
3
- import { join, basename, dirname } from 'path';
3
+ import { basename, join, dirname } from 'path';
4
4
  import spaceTrim, { spaceTrim as spaceTrim$1 } from 'spacetrim';
5
5
  import { format } from 'prettier';
6
6
  import parserHtml from 'prettier/parser-html';
7
7
  import { forTime } from 'waitasecond';
8
8
  import { unparse, parse } from 'papaparse';
9
- import { SHA256 } from 'crypto-js';
10
9
  import hexEncoder from 'crypto-js/enc-hex';
10
+ import sha256 from 'crypto-js/sha256';
11
+ import { SHA256 } from 'crypto-js';
11
12
  import { lookup } from 'mime-types';
12
13
  import { spawn } from 'child_process';
13
14
  import * as dotenv from 'dotenv';
14
- import sha256 from 'crypto-js/sha256';
15
15
 
16
16
  // ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten
17
17
  /**
@@ -27,7 +27,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
27
27
  * @generated
28
28
  * @see https://github.com/webgptorg/promptbook
29
29
  */
30
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-10';
30
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-11';
31
31
  /**
32
32
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
33
33
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -262,6 +262,15 @@ var DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [🤹‍♂️]
262
262
  * @public exported from `@promptbook/core`
263
263
  */
264
264
  var DEFAULT_MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [🤹‍♂️]
265
+ // <- TODO: [🕝] Make also `BOOKS_DIRNAME_ALTERNATIVES`
266
+ /**
267
+ * Where to store the temporary downloads
268
+ *
269
+ * Note: When the folder does not exist, it is created recursively
270
+ *
271
+ * @public exported from `@promptbook/core`
272
+ */
273
+ var DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
265
274
  /**
266
275
  * Where to store the scrape cache
267
276
  *
@@ -5313,6 +5322,22 @@ function $registeredScrapersMessage(availableScrapers) {
5313
5322
  * TODO: [®] DRY Register logic
5314
5323
  */
5315
5324
 
5325
+ /**
5326
+ * Removes emojis from a string and fix whitespaces
5327
+ *
5328
+ * @param text with emojis
5329
+ * @returns text without emojis
5330
+ * @public exported from `@promptbook/utils`
5331
+ */
5332
+ function removeEmojis(text) {
5333
+ // Replace emojis (and also ZWJ sequence) with hyphens
5334
+ text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
5335
+ text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
5336
+ text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
5337
+ text = text.replace(/\p{Extended_Pictographic}/gu, '');
5338
+ return text;
5339
+ }
5340
+
5316
5341
  /**
5317
5342
  * @@@
5318
5343
  *
@@ -5375,6 +5400,30 @@ function normalizeToKebabCase(text) {
5375
5400
  * Note: [💞] Ignore a discrepancy between file name and entity name
5376
5401
  */
5377
5402
 
5403
+ /**
5404
+ * @@@
5405
+ *
5406
+ * @param value @@@
5407
+ * @returns @@@
5408
+ * @example @@@
5409
+ * @public exported from `@promptbook/utils`
5410
+ */
5411
+ function titleToName(value) {
5412
+ if (isValidUrl(value)) {
5413
+ value = value.replace(/^https?:\/\//, '');
5414
+ value = value.replace(/\.html$/, '');
5415
+ }
5416
+ else if (isValidFilePath(value)) {
5417
+ value = basename(value);
5418
+ // Note: Keeping extension in the name
5419
+ }
5420
+ value = value.split('/').join('-');
5421
+ value = removeEmojis(value);
5422
+ value = normalizeToKebabCase(value);
5423
+ // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
5424
+ return value;
5425
+ }
5426
+
5378
5427
  /**
5379
5428
  * Creates unique name for the source
5380
5429
  *
@@ -5396,6 +5445,15 @@ function knowledgeSourceContentToName(knowledgeSourceContent) {
5396
5445
  * TODO: [🐱‍🐉][🧠] Make some smart crop NOT source-i-m-pavol-a-develop-... BUT source-i-m-pavol-a-developer-...
5397
5446
  */
5398
5447
 
5448
+ /**
5449
+ * @@@
5450
+ *
5451
+ * @private for `FileCacheStorage`
5452
+ */
5453
+ function nameToSubfolderPath(name) {
5454
+ return [name.substr(0, 1).toLowerCase(), name.substr(1, 1).toLowerCase()];
5455
+ }
5456
+
5399
5457
  /**
5400
5458
  * Convert file extension to mime type
5401
5459
  *
@@ -5486,10 +5544,11 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
5486
5544
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5487
5545
  var _a;
5488
5546
  return __awaiter(this, void 0, void 0, function () {
5489
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, filename_1, fileExtension, mimeType;
5490
- return __generator(this, function (_f) {
5491
- switch (_f.label) {
5547
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response, mimeType, filename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
5548
+ return __generator(this, function (_l) {
5549
+ switch (_l.label) {
5492
5550
  case 0:
5551
+ console.log('!!! makeKnowledgeSourceHandler', knowledgeSource);
5493
5552
  _b = tools.fetch, fetch = _b === void 0 ? scraperFetch : _b;
5494
5553
  knowledgeSourceContent = knowledgeSource.knowledgeSourceContent;
5495
5554
  name = knowledgeSource.name;
@@ -5497,54 +5556,32 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5497
5556
  if (!name) {
5498
5557
  name = knowledgeSourceContentToName(knowledgeSourceContent);
5499
5558
  }
5500
- if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/, 2];
5559
+ if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/, 5];
5501
5560
  url = knowledgeSourceContent;
5502
5561
  return [4 /*yield*/, fetch(url)];
5503
5562
  case 1:
5504
- response_1 = _f.sent();
5505
- mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
5506
- return [2 /*return*/, {
5507
- source: name,
5508
- filename: null,
5509
- url: url,
5510
- mimeType: mimeType,
5511
- /*
5512
- TODO: [🥽]
5513
- > async asBlob() {
5514
- > // TODO: [👨🏻‍🤝‍👨🏻] This can be called multiple times BUT when called second time, response in already consumed
5515
- > const content = await response.blob();
5516
- > return content;
5517
- > },
5518
- */
5519
- asJson: function () {
5520
- return __awaiter(this, void 0, void 0, function () {
5521
- var content;
5522
- return __generator(this, function (_a) {
5523
- switch (_a.label) {
5524
- case 0: return [4 /*yield*/, response_1.json()];
5525
- case 1:
5526
- content = _a.sent();
5527
- return [2 /*return*/, content];
5528
- }
5529
- });
5530
- });
5531
- },
5532
- asText: function () {
5533
- return __awaiter(this, void 0, void 0, function () {
5534
- var content;
5535
- return __generator(this, function (_a) {
5536
- switch (_a.label) {
5537
- case 0: return [4 /*yield*/, response_1.text()];
5538
- case 1:
5539
- content = _a.sent();
5540
- return [2 /*return*/, content];
5541
- }
5542
- });
5543
- });
5544
- },
5545
- }];
5563
+ response = _l.sent();
5564
+ mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
5565
+ filename = url.split('/').pop() || titleToName(url);
5566
+ hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
5567
+ rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
5568
+ filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(filename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
5569
+ return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
5546
5570
  case 2:
5547
- if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 4];
5571
+ _l.sent();
5572
+ _g = (_f = tools.fs).writeFile;
5573
+ _h = [join(rootDirname_1, filepath)];
5574
+ _k = (_j = Buffer).from;
5575
+ return [4 /*yield*/, response.arrayBuffer()];
5576
+ case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
5577
+ case 4:
5578
+ _l.sent();
5579
+ // TODO: !!!!!!!! Check the file security
5580
+ // TODO: !!!!!!!! Check the file size (if it is not too big)
5581
+ // TODO: !!!!!!!! Delete the file
5582
+ return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
5583
+ case 5:
5584
+ if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
5548
5585
  if (tools.fs === undefined) {
5549
5586
  throw new EnvironmentMismatchError('Can not import file knowledge without filesystem tools');
5550
5587
  // <- TODO: [🧠] What is the best error type here`
@@ -5557,8 +5594,8 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5557
5594
  fileExtension = getFileExtension(filename_1);
5558
5595
  mimeType = extensionToMimeType(fileExtension || '');
5559
5596
  return [4 /*yield*/, isFileExisting(filename_1, tools.fs)];
5560
- case 3:
5561
- if (!(_f.sent())) {
5597
+ case 6:
5598
+ if (!(_l.sent())) {
5562
5599
  throw new NotFoundError(spaceTrim(function (block) { return "\n Can not make source handler for file which does not exist:\n\n File:\n ".concat(block(knowledgeSourceContent), "\n\n Full file path:\n ").concat(block(filename_1), "\n "); }));
5563
5600
  }
5564
5601
  // TODO: [🧠][😿] Test security file - file is scoped to the project (BUT maybe do this in `filesystemTools`)
@@ -5604,7 +5641,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
5604
5641
  });
5605
5642
  },
5606
5643
  }];
5607
- case 4: return [2 /*return*/, {
5644
+ case 7: return [2 /*return*/, {
5608
5645
  source: name,
5609
5646
  filename: null,
5610
5647
  url: null,
@@ -6715,22 +6752,6 @@ function normalizeTo_camelCase(text, _isFirstLetterCapital) {
6715
6752
  * TODO: [🌺] Use some intermediate util splitWords
6716
6753
  */
6717
6754
 
6718
- /**
6719
- * Removes emojis from a string and fix whitespaces
6720
- *
6721
- * @param text with emojis
6722
- * @returns text without emojis
6723
- * @public exported from `@promptbook/utils`
6724
- */
6725
- function removeEmojis(text) {
6726
- // Replace emojis (and also ZWJ sequence) with hyphens
6727
- text = text.replace(/(\p{Extended_Pictographic})\p{Modifier_Symbol}/gu, '$1');
6728
- text = text.replace(/(\p{Extended_Pictographic})[\u{FE00}-\u{FE0F}]/gu, '$1');
6729
- text = text.replace(/(\p{Extended_Pictographic})(\u{200D}\p{Extended_Pictographic})*/gu, '$1');
6730
- text = text.replace(/\p{Extended_Pictographic}/gu, '');
6731
- return text;
6732
- }
6733
-
6734
6755
  /**
6735
6756
  * Removes quotes from a string
6736
6757
  *
@@ -8922,30 +8943,6 @@ function flattenMarkdown(markdown) {
8922
8943
  * NOW we are working just with markdown string and its good enough
8923
8944
  */
8924
8945
 
8925
- /**
8926
- * @@@
8927
- *
8928
- * @param value @@@
8929
- * @returns @@@
8930
- * @example @@@
8931
- * @public exported from `@promptbook/utils`
8932
- */
8933
- function titleToName(value) {
8934
- if (isValidUrl(value)) {
8935
- value = value.replace(/^https?:\/\//, '');
8936
- value = value.replace(/\.html$/, '');
8937
- }
8938
- else if (isValidFilePath(value)) {
8939
- value = basename(value);
8940
- // Note: Keeping extension in the name
8941
- }
8942
- value = value.split('/').join('-');
8943
- value = removeEmojis(value);
8944
- value = normalizeToKebabCase(value);
8945
- // TODO: [🧠] Maybe warn or add some padding to short name which are not good identifiers
8946
- return value;
8947
- }
8948
-
8949
8946
  /**
8950
8947
  * Compile pipeline from string (markdown) format to JSON format synchronously
8951
8948
  *
@@ -9678,6 +9675,7 @@ function $provideFilesystemForNode(options) {
9678
9675
  readFile: readFile,
9679
9676
  writeFile: writeFile,
9680
9677
  readdir: readdir,
9678
+ mkdir: mkdir,
9681
9679
  };
9682
9680
  }
9683
9681
  /**
@@ -11296,15 +11294,6 @@ function stringifyPipelineJson(pipeline) {
11296
11294
  * TODO: [🍙] Make some standard order of json properties
11297
11295
  */
11298
11296
 
11299
- /**
11300
- * @@@
11301
- *
11302
- * @private for `FileCacheStorage`
11303
- */
11304
- function nameToSubfolderPath(name) {
11305
- return [name.substr(0, 1).toLowerCase(), name.substr(1, 1).toLowerCase()];
11306
- }
11307
-
11308
11297
  /**
11309
11298
  * @@@
11310
11299
  *