@promptbook/documents 0.84.0-12 → 0.84.0-14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -9,7 +9,7 @@ import { basename, join, dirname } from 'path';
9
9
  import { format } from 'prettier';
10
10
  import parserHtml from 'prettier/parser-html';
11
11
  import sha256 from 'crypto-js/sha256';
12
- import { lookup } from 'mime-types';
12
+ import { lookup, extension } from 'mime-types';
13
13
  import { unparse, parse } from 'papaparse';
14
14
 
15
15
  // ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten
@@ -26,7 +26,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
26
26
  * @generated
27
27
  * @see https://github.com/webgptorg/promptbook
28
28
  */
29
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-11';
29
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-13';
30
30
  /**
31
31
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
32
32
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -3731,6 +3731,17 @@ function extensionToMimeType(value) {
3731
3731
  return lookup(value) || 'application/octet-stream';
3732
3732
  }
3733
3733
 
3734
+ /**
3735
+ * Convert mime type to file extension
3736
+ *
3737
+ * Note: If the mime type is invalid, `null` is returned
3738
+ *
3739
+ * @private within the repository
3740
+ */
3741
+ function mimeTypeToExtension(value) {
3742
+ return extension(value) || null;
3743
+ }
3744
+
3734
3745
  /**
3735
3746
  * The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
3736
3747
  *
@@ -3766,7 +3777,7 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
3766
3777
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3767
3778
  var _a;
3768
3779
  return __awaiter(this, void 0, void 0, function () {
3769
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response, mimeType, filename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
3780
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
3770
3781
  return __generator(this, function (_l) {
3771
3782
  switch (_l.label) {
3772
3783
  case 0:
@@ -3782,25 +3793,67 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3782
3793
  url = knowledgeSourceContent;
3783
3794
  return [4 /*yield*/, fetch(url)];
3784
3795
  case 1:
3785
- response = _l.sent();
3786
- mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3787
- filename = url.split('/').pop() || titleToName(url);
3796
+ response_1 = _l.sent();
3797
+ mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3798
+ if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [💵] */)) {
3799
+ return [2 /*return*/, {
3800
+ source: name,
3801
+ filename: null,
3802
+ url: url,
3803
+ mimeType: mimeType,
3804
+ /*
3805
+ TODO: [🥽]
3806
+ > async asBlob() {
3807
+ > // TODO: [👨🏻‍🤝‍👨🏻] This can be called multiple times BUT when called second time, response in already consumed
3808
+ > const content = await response.blob();
3809
+ > return content;
3810
+ > },
3811
+ */
3812
+ asJson: function () {
3813
+ return __awaiter(this, void 0, void 0, function () {
3814
+ var content;
3815
+ return __generator(this, function (_a) {
3816
+ switch (_a.label) {
3817
+ case 0: return [4 /*yield*/, response_1.json()];
3818
+ case 1:
3819
+ content = _a.sent();
3820
+ return [2 /*return*/, content];
3821
+ }
3822
+ });
3823
+ });
3824
+ },
3825
+ asText: function () {
3826
+ return __awaiter(this, void 0, void 0, function () {
3827
+ var content;
3828
+ return __generator(this, function (_a) {
3829
+ switch (_a.label) {
3830
+ case 0: return [4 /*yield*/, response_1.text()];
3831
+ case 1:
3832
+ content = _a.sent();
3833
+ return [2 /*return*/, content];
3834
+ }
3835
+ });
3836
+ });
3837
+ },
3838
+ }];
3839
+ }
3840
+ basename = url.split('/').pop() || titleToName(url);
3788
3841
  hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
3789
3842
  rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
3790
- filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(filename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
3843
+ filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".").concat(mimeTypeToExtension(mimeType))], false));
3791
3844
  return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
3792
3845
  case 2:
3793
3846
  _l.sent();
3794
3847
  _g = (_f = tools.fs).writeFile;
3795
3848
  _h = [join(rootDirname_1, filepath)];
3796
3849
  _k = (_j = Buffer).from;
3797
- return [4 /*yield*/, response.arrayBuffer()];
3850
+ return [4 /*yield*/, response_1.arrayBuffer()];
3798
3851
  case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
3799
3852
  case 4:
3800
3853
  _l.sent();
3801
- // TODO: !!!!!!!! Check the file security
3854
+ // TODO: [💵] Check the file security
3802
3855
  // TODO: !!!!!!!! Check the file size (if it is not too big)
3803
- // TODO: !!!!!!!! Delete the file
3856
+ // TODO: !!!!!!!! Delete the file after the scraping is done
3804
3857
  return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
3805
3858
  case 5:
3806
3859
  if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];