@promptbook/website-crawler 0.84.0-12 → 0.84.0-13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@promptbook/website-crawler",
3
- "version": "0.84.0-12",
3
+ "version": "0.84.0-13",
4
4
  "description": "It's time for a paradigm shift. The future of software in plain English, French or Latin",
5
5
  "--note-0": " <- [🐊]",
6
6
  "private": false,
@@ -54,7 +54,7 @@
54
54
  "module": "./esm/index.es.js",
55
55
  "typings": "./esm/typings/src/_packages/website-crawler.index.d.ts",
56
56
  "peerDependencies": {
57
- "@promptbook/core": "0.84.0-12"
57
+ "@promptbook/core": "0.84.0-13"
58
58
  },
59
59
  "dependencies": {
60
60
  "@mozilla/readability": "0.5.0",
package/umd/index.umd.js CHANGED
@@ -25,7 +25,7 @@
25
25
  * @generated
26
26
  * @see https://github.com/webgptorg/promptbook
27
27
  */
28
- var PROMPTBOOK_ENGINE_VERSION = '0.84.0-11';
28
+ var PROMPTBOOK_ENGINE_VERSION = '0.84.0-12';
29
29
  /**
30
30
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
31
31
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -3623,7 +3623,7 @@
3623
3623
  function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
3624
3624
  var _a;
3625
3625
  return __awaiter(this, void 0, void 0, function () {
3626
- var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response, mimeType, filename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
3626
+ var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
3627
3627
  return __generator(this, function (_l) {
3628
3628
  switch (_l.label) {
3629
3629
  case 0:
@@ -3639,19 +3639,61 @@
3639
3639
  url = knowledgeSourceContent;
3640
3640
  return [4 /*yield*/, fetch(url)];
3641
3641
  case 1:
3642
- response = _l.sent();
3643
- mimeType = ((_a = response.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3644
- filename = url.split('/').pop() || titleToName(url);
3642
+ response_1 = _l.sent();
3643
+ mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
3644
+ if (tools.fs === undefined || !url.endsWith('.pdf')) {
3645
+ return [2 /*return*/, {
3646
+ source: name,
3647
+ filename: null,
3648
+ url: url,
3649
+ mimeType: mimeType,
3650
+ /*
3651
+ TODO: [🥽]
3652
+ > async asBlob() {
3653
+ > // TODO: [👨🏻‍🤝‍👨🏻] This can be called multiple times BUT when called second time, response in already consumed
3654
+ > const content = await response.blob();
3655
+ > return content;
3656
+ > },
3657
+ */
3658
+ asJson: function () {
3659
+ return __awaiter(this, void 0, void 0, function () {
3660
+ var content;
3661
+ return __generator(this, function (_a) {
3662
+ switch (_a.label) {
3663
+ case 0: return [4 /*yield*/, response_1.json()];
3664
+ case 1:
3665
+ content = _a.sent();
3666
+ return [2 /*return*/, content];
3667
+ }
3668
+ });
3669
+ });
3670
+ },
3671
+ asText: function () {
3672
+ return __awaiter(this, void 0, void 0, function () {
3673
+ var content;
3674
+ return __generator(this, function (_a) {
3675
+ switch (_a.label) {
3676
+ case 0: return [4 /*yield*/, response_1.text()];
3677
+ case 1:
3678
+ content = _a.sent();
3679
+ return [2 /*return*/, content];
3680
+ }
3681
+ });
3682
+ });
3683
+ },
3684
+ }];
3685
+ }
3686
+ basename = url.split('/').pop() || titleToName(url);
3645
3687
  hash = sha256__default["default"](hexEncoder__default["default"].parse(url)).toString( /* hex */);
3646
3688
  rootDirname_1 = path.join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
3647
- filepath = path.join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(filename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
3689
+ filepath = path.join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
3648
3690
  return [4 /*yield*/, tools.fs.mkdir(path.dirname(path.join(rootDirname_1, filepath)), { recursive: true })];
3649
3691
  case 2:
3650
3692
  _l.sent();
3651
3693
  _g = (_f = tools.fs).writeFile;
3652
3694
  _h = [path.join(rootDirname_1, filepath)];
3653
3695
  _k = (_j = Buffer).from;
3654
- return [4 /*yield*/, response.arrayBuffer()];
3696
+ return [4 /*yield*/, response_1.arrayBuffer()];
3655
3697
  case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
3656
3698
  case 4:
3657
3699
  _l.sent();