@promptbook/website-crawler 0.84.0-12 → 0.84.0-13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/esm/index.es.js CHANGED
@@ -27,7 +27,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
27
27
  * @generated
28
28
  * @see https://github.com/webgptorg/promptbook
29
29
  */
30
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-11';
30
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-12';
31
31
  /**
32
32
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
33
33
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -3625,7 +3625,7 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
3625
3625
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3626
3626
  var _a;
3627
3627
  return __awaiter(this, void 0, void 0, function () {
3628
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response, mimeType, filename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
3628
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
3629
3629
  return __generator(this, function (_l) {
3630
3630
  switch (_l.label) {
3631
3631
  case 0:
@@ -3641,19 +3641,61 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3641
3641
  url = knowledgeSourceContent;
3642
3642
  return [4 /*yield*/, fetch(url)];
3643
3643
  case 1:
3644
- response = _l.sent();
3645
- mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3646
- filename = url.split('/').pop() || titleToName(url);
3644
+ response_1 = _l.sent();
3645
+ mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3646
+ if (tools.fs === undefined || !url.endsWith('.pdf')) {
3647
+ return [2 /*return*/, {
3648
+ source: name,
3649
+ filename: null,
3650
+ url: url,
3651
+ mimeType: mimeType,
3652
+ /*
3653
+ TODO: [🥽]
3654
+ > async asBlob() {
3655
+ > // TODO: [👨🏻‍🤝‍👨🏻] This can be called multiple times BUT when called second time, response in already consumed
3656
+ > const content = await response.blob();
3657
+ > return content;
3658
+ > },
3659
+ */
3660
+ asJson: function () {
3661
+ return __awaiter(this, void 0, void 0, function () {
3662
+ var content;
3663
+ return __generator(this, function (_a) {
3664
+ switch (_a.label) {
3665
+ case 0: return [4 /*yield*/, response_1.json()];
3666
+ case 1:
3667
+ content = _a.sent();
3668
+ return [2 /*return*/, content];
3669
+ }
3670
+ });
3671
+ });
3672
+ },
3673
+ asText: function () {
3674
+ return __awaiter(this, void 0, void 0, function () {
3675
+ var content;
3676
+ return __generator(this, function (_a) {
3677
+ switch (_a.label) {
3678
+ case 0: return [4 /*yield*/, response_1.text()];
3679
+ case 1:
3680
+ content = _a.sent();
3681
+ return [2 /*return*/, content];
3682
+ }
3683
+ });
3684
+ });
3685
+ },
3686
+ }];
3687
+ }
3688
+ basename = url.split('/').pop() || titleToName(url);
3647
3689
  hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
3648
3690
  rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
3649
- filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(filename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
3691
+ filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
3650
3692
  return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
3651
3693
  case 2:
3652
3694
  _l.sent();
3653
3695
  _g = (_f = tools.fs).writeFile;
3654
3696
  _h = [join(rootDirname_1, filepath)];
3655
3697
  _k = (_j = Buffer).from;
3656
- return [4 /*yield*/, response.arrayBuffer()];
3698
+ return [4 /*yield*/, response_1.arrayBuffer()];
3657
3699
  case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
3658
3700
  case 4:
3659
3701
  _l.sent();