@promptbook/documents 0.84.0-11 โ†’ 0.84.0-12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -8,6 +8,7 @@ import hexEncoder from 'crypto-js/enc-hex';
8
8
  import { basename, join, dirname } from 'path';
9
9
  import { format } from 'prettier';
10
10
  import parserHtml from 'prettier/parser-html';
11
+ import sha256 from 'crypto-js/sha256';
11
12
  import { lookup } from 'mime-types';
12
13
  import { unparse, parse } from 'papaparse';
13
14
 
@@ -25,7 +26,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
25
26
  * @generated
26
27
  * @see https://github.com/webgptorg/promptbook
27
28
  */
28
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-10';
29
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-11';
29
30
  /**
30
31
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
31
32
  * Note: [๐Ÿ’ž] Ignore a discrepancy between file name and entity name
@@ -229,6 +230,12 @@ var SMALL_NUMBER = 0.001;
229
230
  * @private within the repository - too low-level in comparison with other `MAX_...`
230
231
  */
231
232
  var IMMEDIATE_TIME = 10;
233
+ /**
234
+ * The maximum length of the (generated) filename
235
+ *
236
+ * @public exported from `@promptbook/core`
237
+ */
238
+ var MAX_FILENAME_LENGTH = 30;
232
239
  /**
233
240
  * Strategy for caching the intermediate results for knowledge sources
234
241
  *
@@ -248,6 +255,15 @@ var DEFAULT_MAX_PARALLEL_COUNT = 5; // <- TODO: [๐Ÿคนโ€โ™‚๏ธ]
248
255
  * @public exported from `@promptbook/core`
249
256
  */
250
257
  var DEFAULT_MAX_EXECUTION_ATTEMPTS = 3; // <- TODO: [๐Ÿคนโ€โ™‚๏ธ]
258
+ // <- TODO: [๐Ÿ•] Make also `BOOKS_DIRNAME_ALTERNATIVES`
259
+ /**
260
+ * Where to store the temporary downloads
261
+ *
262
+ * Note: When the folder does not exist, it is created recursively
263
+ *
264
+ * @public exported from `@promptbook/core`
265
+ */
266
+ var DEFAULT_DOWNLOAD_CACHE_DIRNAME = './.promptbook/download-cache';
251
267
  /**
252
268
  * Where to store the scrape cache
253
269
  *
@@ -3750,10 +3766,11 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
3750
3766
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3751
3767
  var _a;
3752
3768
  return __awaiter(this, void 0, void 0, function () {
3753
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, filename_1, fileExtension, mimeType;
3754
- return __generator(this, function (_f) {
3755
- switch (_f.label) {
3769
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response, mimeType, filename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
3770
+ return __generator(this, function (_l) {
3771
+ switch (_l.label) {
3756
3772
  case 0:
3773
+ console.log('!!! makeKnowledgeSourceHandler', knowledgeSource);
3757
3774
  _b = tools.fetch, fetch = _b === void 0 ? scraperFetch : _b;
3758
3775
  knowledgeSourceContent = knowledgeSource.knowledgeSourceContent;
3759
3776
  name = knowledgeSource.name;
@@ -3761,54 +3778,32 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3761
3778
  if (!name) {
3762
3779
  name = knowledgeSourceContentToName(knowledgeSourceContent);
3763
3780
  }
3764
- if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/, 2];
3781
+ if (!isValidUrl(knowledgeSourceContent)) return [3 /*break*/, 5];
3765
3782
  url = knowledgeSourceContent;
3766
3783
  return [4 /*yield*/, fetch(url)];
3767
3784
  case 1:
3768
- response_1 = _f.sent();
3769
- mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3770
- return [2 /*return*/, {
3771
- source: name,
3772
- filename: null,
3773
- url: url,
3774
- mimeType: mimeType,
3775
- /*
3776
- TODO: [๐Ÿฅฝ]
3777
- > async asBlob() {
3778
- > // TODO: [๐Ÿ‘จ๐Ÿปโ€๐Ÿคโ€๐Ÿ‘จ๐Ÿป] This can be called multiple times BUT when called second time, response in already consumed
3779
- > const content = await response.blob();
3780
- > return content;
3781
- > },
3782
- */
3783
- asJson: function () {
3784
- return __awaiter(this, void 0, void 0, function () {
3785
- var content;
3786
- return __generator(this, function (_a) {
3787
- switch (_a.label) {
3788
- case 0: return [4 /*yield*/, response_1.json()];
3789
- case 1:
3790
- content = _a.sent();
3791
- return [2 /*return*/, content];
3792
- }
3793
- });
3794
- });
3795
- },
3796
- asText: function () {
3797
- return __awaiter(this, void 0, void 0, function () {
3798
- var content;
3799
- return __generator(this, function (_a) {
3800
- switch (_a.label) {
3801
- case 0: return [4 /*yield*/, response_1.text()];
3802
- case 1:
3803
- content = _a.sent();
3804
- return [2 /*return*/, content];
3805
- }
3806
- });
3807
- });
3808
- },
3809
- }];
3785
+ response = _l.sent();
3786
+ mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3787
+ filename = url.split('/').pop() || titleToName(url);
3788
+ hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
3789
+ rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
3790
+ filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [๐ŸŽŽ] Maybe add some SHA256 prefix */)), false), ["".concat(filename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
3791
+ return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
3810
3792
  case 2:
3811
- if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 4];
3793
+ _l.sent();
3794
+ _g = (_f = tools.fs).writeFile;
3795
+ _h = [join(rootDirname_1, filepath)];
3796
+ _k = (_j = Buffer).from;
3797
+ return [4 /*yield*/, response.arrayBuffer()];
3798
+ case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
3799
+ case 4:
3800
+ _l.sent();
3801
+ // TODO: !!!!!!!! Check the file security
3802
+ // TODO: !!!!!!!! Check the file size (if it is not too big)
3803
+ // TODO: !!!!!!!! Delete the file
3804
+ return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
3805
+ case 5:
3806
+ if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
3812
3807
  if (tools.fs === undefined) {
3813
3808
  throw new EnvironmentMismatchError('Can not import file knowledge without filesystem tools');
3814
3809
  // <- TODO: [๐Ÿง ] What is the best error type here`
@@ -3821,8 +3816,8 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3821
3816
  fileExtension = getFileExtension(filename_1);
3822
3817
  mimeType = extensionToMimeType(fileExtension || '');
3823
3818
  return [4 /*yield*/, isFileExisting(filename_1, tools.fs)];
3824
- case 3:
3825
- if (!(_f.sent())) {
3819
+ case 6:
3820
+ if (!(_l.sent())) {
3826
3821
  throw new NotFoundError(spaceTrim$1(function (block) { return "\n Can not make source handler for file which does not exist:\n\n File:\n ".concat(block(knowledgeSourceContent), "\n\n Full file path:\n ").concat(block(filename_1), "\n "); }));
3827
3822
  }
3828
3823
  // TODO: [๐Ÿง ][๐Ÿ˜ฟ] Test security file - file is scoped to the project (BUT maybe do this in `filesystemTools`)
@@ -3868,7 +3863,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3868
3863
  });
3869
3864
  },
3870
3865
  }];
3871
- case 4: return [2 /*return*/, {
3866
+ case 7: return [2 /*return*/, {
3872
3867
  source: name,
3873
3868
  filename: null,
3874
3869
  url: null,