@promptbook/pdf 0.84.0-11 โ†’ 0.84.0-13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -6,6 +6,7 @@ import { basename, join, dirname } from 'path';
6
6
  import { format } from 'prettier';
7
7
  import parserHtml from 'prettier/parser-html';
8
8
  import { forTime } from 'waitasecond';
9
+ import sha256 from 'crypto-js/sha256';
9
10
  import { lookup } from 'mime-types';
10
11
  import { unparse, parse } from 'papaparse';
11
12
 
@@ -23,7 +24,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
23
24
  * @generated
24
25
  * @see https://github.com/webgptorg/promptbook
25
26
  */
26
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-10';
27
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-12';
27
28
  /**
28
29
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
29
30
  * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
@@ -227,6 +228,12 @@ var SMALL_NUMBER = 0.001;
227
228
  * @private within the repository - too low-level in comparison with other `MAX_...`
228
229
  */
229
230
  var IMMEDIATE_TIME = 10;
231
+ /**
232
+ * The maximum length of the (generated) filename
233
+ *
234
+ * @public exported from `@promptbook/core`
235
+ */
236
+ var MAX_FILENAME_LENGTH = 30;
230
237
  /**
231
238
  * Strategy for caching the intermediate results for knowledge sources
232
239
  *
@@ -246,6 +253,15 @@ var DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [๐Ÿคนโ€โ™‚๏ธ]
246
253
  * @public exported from `@promptbook/core`
247
254
  */
248
255
  var DEFAULT_MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [๐Ÿคนโ€โ™‚๏ธ]
256
+ // <- TODO: [๐Ÿ•] Make also `BOOKS_DIRNAME_ALTERNATIVES`
257
+ /**
258
+ * Where to store the temporary downloads
259
+ *
260
+ * Note: When the folder does not exist, it is created recursively
261
+ *
262
+ * @public exported from `@promptbook/core`
263
+ */
264
+ var DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
249
265
  /**
250
266
  * Where to store the scrape cache
251
267
  *
@@ -3599,10 +3615,11 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
3599
3615
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3600
3616
  var _a;
3601
3617
  return __awaiter(this, void 0, void 0, function () {
3602
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, filename_1, fileExtension, mimeType;
3603
- return __generator(this, function (_f) {
3604
- switch (_f.label) {
3618
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
3619
+ return __generator(this, function (_l) {
3620
+ switch (_l.label) {
3605
3621
  case 0:
3622
+ console.log('!!! makeKnowledgeSourceHandler', knowledgeSource);
3606
3623
  _b = tools.fetch, fetch = _b === void 0 ? scraperFetch : _b;
3607
3624
  knowledgeSourceContent = knowledgeSource.knowledgeSourceContent;
3608
3625
  name = knowledgeSource.name;
@@ -3610,54 +3627,74 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3610
3627
  if (!name) {
3611
3628
  name = knowledgeSourceContentToName(knowledgeSourceContent);
3612
3629
  }
3613
- if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/, 2];
3630
+ if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/, 5];
3614
3631
  url = knowledgeSourceContent;
3615
3632
  return [4 /*yield*/, fetch(url)];
3616
3633
  case 1:
3617
- response_1 = _f.sent();
3634
+ response_1 = _l.sent();
3618
3635
  mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3619
- return [2 /*return*/, {
3620
- source: name,
3621
- filename: null,
3622
- url: url,
3623
- mimeType: mimeType,
3624
- /*
3625
- TODO: [๐Ÿฅฝ]
3626
- > async asBlob() {
3627
- > // TODO: [๐Ÿ‘จ๐Ÿปโ€๐Ÿคโ€๐Ÿ‘จ๐Ÿป] This can be called multiple times BUT when called second time, response in already consumed
3628
- > const content = await response.blob();
3629
- > return content;
3630
- > },
3631
- */
3632
- asJson: function () {
3633
- return __awaiter(this, void 0, void 0, function () {
3634
- var content;
3635
- return __generator(this, function (_a) {
3636
- switch (_a.label) {
3637
- case 0: return [4 /*yield*/, response_1.json()];
3638
- case 1:
3639
- content = _a.sent();
3640
- return [2 /*return*/, content];
3641
- }
3636
+ if (tools.fs === undefined || !url.endsWith('.pdf')) {
3637
+ return [2 /*return*/, {
3638
+ source: name,
3639
+ filename: null,
3640
+ url: url,
3641
+ mimeType: mimeType,
3642
+ /*
3643
+ TODO: [๐Ÿฅฝ]
3644
+ > async asBlob() {
3645
+ > // TODO: [๐Ÿ‘จ๐Ÿปโ€๐Ÿคโ€๐Ÿ‘จ๐Ÿป] This can be called multiple times BUT when called second time, response in already consumed
3646
+ > const content = await response.blob();
3647
+ > return content;
3648
+ > },
3649
+ */
3650
+ asJson: function () {
3651
+ return __awaiter(this, void 0, void 0, function () {
3652
+ var content;
3653
+ return __generator(this, function (_a) {
3654
+ switch (_a.label) {
3655
+ case 0: return [4 /*yield*/, response_1.json()];
3656
+ case 1:
3657
+ content = _a.sent();
3658
+ return [2 /*return*/, content];
3659
+ }
3660
+ });
3642
3661
  });
3643
- });
3644
- },
3645
- asText: function () {
3646
- return __awaiter(this, void 0, void 0, function () {
3647
- var content;
3648
- return __generator(this, function (_a) {
3649
- switch (_a.label) {
3650
- case 0: return [4 /*yield*/, response_1.text()];
3651
- case 1:
3652
- content = _a.sent();
3653
- return [2 /*return*/, content];
3654
- }
3662
+ },
3663
+ asText: function () {
3664
+ return __awaiter(this, void 0, void 0, function () {
3665
+ var content;
3666
+ return __generator(this, function (_a) {
3667
+ switch (_a.label) {
3668
+ case 0: return [4 /*yield*/, response_1.text()];
3669
+ case 1:
3670
+ content = _a.sent();
3671
+ return [2 /*return*/, content];
3672
+ }
3673
+ });
3655
3674
  });
3656
- });
3657
- },
3658
- }];
3675
+ },
3676
+ }];
3677
+ }
3678
+ basename = url.split('/').pop() || titleToName(url);
3679
+ hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
3680
+ rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
3681
+ filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [๐ŸŽŽ] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
3682
+ return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
3659
3683
  case 2:
3660
- if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 4];
3684
+ _l.sent();
3685
+ _g = (_f = tools.fs).writeFile;
3686
+ _h = [join(rootDirname_1, filepath)];
3687
+ _k = (_j = Buffer).from;
3688
+ return [4 /*yield*/, response_1.arrayBuffer()];
3689
+ case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
3690
+ case 4:
3691
+ _l.sent();
3692
+ // TODO: !!!!!!!! Check the file security
3693
+ // TODO: !!!!!!!! Check the file size (if it is not too big)
3694
+ // TODO: !!!!!!!! Delete the file
3695
+ return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
3696
+ case 5:
3697
+ if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
3661
3698
  if (tools.fs === undefined) {
3662
3699
  throw new EnvironmentMismatchError('Can not import file knowledge without filesystem tools');
3663
3700
  // <- TODO: [๐Ÿง ] What is the best error type here`
@@ -3670,8 +3707,8 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3670
3707
  fileExtension = getFileExtension(filename_1);
3671
3708
  mimeType = extensionToMimeType(fileExtension || '');
3672
3709
  return [4 /*yield*/, isFileExisting(filename_1, tools.fs)];
3673
- case 3:
3674
- if (!(_f.sent())) {
3710
+ case 6:
3711
+ if (!(_l.sent())) {
3675
3712
  throw new NotFoundError(spaceTrim(function (block) { return "\n Can not make source handler for file which does not exist:\n\n File:\n ".concat(block(knowledgeSourceContent), "\n\n Full file path:\n ").concat(block(filename_1), "\n "); }));
3676
3713
  }
3677
3714
  // TODO: [๐Ÿง ][๐Ÿ˜ฟ] Test security file - file is scoped to the project (BUT maybe do this in `filesystemTools`)
@@ -3717,7 +3754,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3717
3754
  });
3718
3755
  },
3719
3756
  }];
3720
- case 4: return [2 /*return*/, {
3757
+ case 7: return [2 /*return*/, {
3721
3758
  source: name,
3722
3759
  filename: null,
3723
3760
  url: null,