@promptbook/website-crawler 0.84.0-12 → 0.84.0-13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +49 -7
- package/esm/index.es.js.map +1 -1
- package/package.json +2 -2
- package/umd/index.umd.js +49 -7
- package/umd/index.umd.js.map +1 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@promptbook/website-crawler",
|
|
3
|
-
"version": "0.84.0-
|
|
3
|
+
"version": "0.84.0-13",
|
|
4
4
|
"description": "It's time for a paradigm shift. The future of software in plain English, French or Latin",
|
|
5
5
|
"--note-0": " <- [🐊]",
|
|
6
6
|
"private": false,
|
|
@@ -54,7 +54,7 @@
|
|
|
54
54
|
"module": "./esm/index.es.js",
|
|
55
55
|
"typings": "./esm/typings/src/_packages/website-crawler.index.d.ts",
|
|
56
56
|
"peerDependencies": {
|
|
57
|
-
"@promptbook/core": "0.84.0-
|
|
57
|
+
"@promptbook/core": "0.84.0-13"
|
|
58
58
|
},
|
|
59
59
|
"dependencies": {
|
|
60
60
|
"@mozilla/readability": "0.5.0",
|
package/umd/index.umd.js
CHANGED
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
* @generated
|
|
26
26
|
* @see https://github.com/webgptorg/promptbook
|
|
27
27
|
*/
|
|
28
|
-
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-
|
|
28
|
+
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-12';
|
|
29
29
|
/**
|
|
30
30
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
31
31
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -3623,7 +3623,7 @@
|
|
|
3623
3623
|
function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
3624
3624
|
var _a;
|
|
3625
3625
|
return __awaiter(this, void 0, void 0, function () {
|
|
3626
|
-
var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url,
|
|
3626
|
+
var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
|
|
3627
3627
|
return __generator(this, function (_l) {
|
|
3628
3628
|
switch (_l.label) {
|
|
3629
3629
|
case 0:
|
|
@@ -3639,19 +3639,61 @@
|
|
|
3639
3639
|
url = knowledgeSourceContent;
|
|
3640
3640
|
return [4 /*yield*/, fetch(url)];
|
|
3641
3641
|
case 1:
|
|
3642
|
-
|
|
3643
|
-
mimeType = ((_a =
|
|
3644
|
-
|
|
3642
|
+
response_1 = _l.sent();
|
|
3643
|
+
mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
|
|
3644
|
+
if (tools.fs === undefined || !url.endsWith('.pdf')) {
|
|
3645
|
+
return [2 /*return*/, {
|
|
3646
|
+
source: name,
|
|
3647
|
+
filename: null,
|
|
3648
|
+
url: url,
|
|
3649
|
+
mimeType: mimeType,
|
|
3650
|
+
/*
|
|
3651
|
+
TODO: [🥽]
|
|
3652
|
+
> async asBlob() {
|
|
3653
|
+
> // TODO: [👨🏻🤝👨🏻] This can be called multiple times BUT when called second time, response in already consumed
|
|
3654
|
+
> const content = await response.blob();
|
|
3655
|
+
> return content;
|
|
3656
|
+
> },
|
|
3657
|
+
*/
|
|
3658
|
+
asJson: function () {
|
|
3659
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
3660
|
+
var content;
|
|
3661
|
+
return __generator(this, function (_a) {
|
|
3662
|
+
switch (_a.label) {
|
|
3663
|
+
case 0: return [4 /*yield*/, response_1.json()];
|
|
3664
|
+
case 1:
|
|
3665
|
+
content = _a.sent();
|
|
3666
|
+
return [2 /*return*/, content];
|
|
3667
|
+
}
|
|
3668
|
+
});
|
|
3669
|
+
});
|
|
3670
|
+
},
|
|
3671
|
+
asText: function () {
|
|
3672
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
3673
|
+
var content;
|
|
3674
|
+
return __generator(this, function (_a) {
|
|
3675
|
+
switch (_a.label) {
|
|
3676
|
+
case 0: return [4 /*yield*/, response_1.text()];
|
|
3677
|
+
case 1:
|
|
3678
|
+
content = _a.sent();
|
|
3679
|
+
return [2 /*return*/, content];
|
|
3680
|
+
}
|
|
3681
|
+
});
|
|
3682
|
+
});
|
|
3683
|
+
},
|
|
3684
|
+
}];
|
|
3685
|
+
}
|
|
3686
|
+
basename = url.split('/').pop() || titleToName(url);
|
|
3645
3687
|
hash = sha256__default["default"](hexEncoder__default["default"].parse(url)).toString( /* hex */);
|
|
3646
3688
|
rootDirname_1 = path.join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
|
|
3647
|
-
filepath = path.join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(
|
|
3689
|
+
filepath = path.join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
|
|
3648
3690
|
return [4 /*yield*/, tools.fs.mkdir(path.dirname(path.join(rootDirname_1, filepath)), { recursive: true })];
|
|
3649
3691
|
case 2:
|
|
3650
3692
|
_l.sent();
|
|
3651
3693
|
_g = (_f = tools.fs).writeFile;
|
|
3652
3694
|
_h = [path.join(rootDirname_1, filepath)];
|
|
3653
3695
|
_k = (_j = Buffer).from;
|
|
3654
|
-
return [4 /*yield*/,
|
|
3696
|
+
return [4 /*yield*/, response_1.arrayBuffer()];
|
|
3655
3697
|
case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
|
|
3656
3698
|
case 4:
|
|
3657
3699
|
_l.sent();
|