@promptbook/pdf 0.84.0-12 → 0.84.0-14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +64 -11
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +4 -0
- package/esm/typings/src/config.d.ts +12 -0
- package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.test.d.ts +1 -0
- package/esm/typings/src/utils/editable/utils/isFlatPipeline.test.d.ts +1 -0
- package/esm/typings/src/utils/files/mimeTypeToExtension.d.ts +10 -0
- package/esm/typings/src/utils/files/mimeTypeToExtension.test.d.ts +1 -0
- package/package.json +2 -2
- package/umd/index.umd.js +63 -10
- package/umd/index.umd.js.map +1 -1
package/esm/index.es.js
CHANGED
|
@@ -7,7 +7,7 @@ import { format } from 'prettier';
|
|
|
7
7
|
import parserHtml from 'prettier/parser-html';
|
|
8
8
|
import { forTime } from 'waitasecond';
|
|
9
9
|
import sha256 from 'crypto-js/sha256';
|
|
10
|
-
import { lookup } from 'mime-types';
|
|
10
|
+
import { lookup, extension } from 'mime-types';
|
|
11
11
|
import { unparse, parse } from 'papaparse';
|
|
12
12
|
|
|
13
13
|
// ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten
|
|
@@ -24,7 +24,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
24
24
|
* @generated
|
|
25
25
|
* @see https://github.com/webgptorg/promptbook
|
|
26
26
|
*/
|
|
27
|
-
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-
|
|
27
|
+
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-13';
|
|
28
28
|
/**
|
|
29
29
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
30
30
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -3580,6 +3580,17 @@ function getFileExtension(value) {
|
|
|
3580
3580
|
return match ? match[1].toLowerCase() : null;
|
|
3581
3581
|
}
|
|
3582
3582
|
|
|
3583
|
+
/**
|
|
3584
|
+
* Convert mime type to file extension
|
|
3585
|
+
*
|
|
3586
|
+
* Note: If the mime type is invalid, `null` is returned
|
|
3587
|
+
*
|
|
3588
|
+
* @private within the repository
|
|
3589
|
+
*/
|
|
3590
|
+
function mimeTypeToExtension(value) {
|
|
3591
|
+
return extension(value) || null;
|
|
3592
|
+
}
|
|
3593
|
+
|
|
3583
3594
|
/**
|
|
3584
3595
|
* The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
|
|
3585
3596
|
*
|
|
@@ -3615,7 +3626,7 @@ var scraperFetch = function (url, init) { return __awaiter(void 0, void 0, void
|
|
|
3615
3626
|
function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
3616
3627
|
var _a;
|
|
3617
3628
|
return __awaiter(this, void 0, void 0, function () {
|
|
3618
|
-
var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url,
|
|
3629
|
+
var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
|
|
3619
3630
|
return __generator(this, function (_l) {
|
|
3620
3631
|
switch (_l.label) {
|
|
3621
3632
|
case 0:
|
|
@@ -3631,25 +3642,67 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3631
3642
|
url = knowledgeSourceContent;
|
|
3632
3643
|
return [4 /*yield*/, fetch(url)];
|
|
3633
3644
|
case 1:
|
|
3634
|
-
|
|
3635
|
-
mimeType = ((_a =
|
|
3636
|
-
|
|
3645
|
+
response_1 = _l.sent();
|
|
3646
|
+
mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
|
|
3647
|
+
if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [💵] */)) {
|
|
3648
|
+
return [2 /*return*/, {
|
|
3649
|
+
source: name,
|
|
3650
|
+
filename: null,
|
|
3651
|
+
url: url,
|
|
3652
|
+
mimeType: mimeType,
|
|
3653
|
+
/*
|
|
3654
|
+
TODO: [🥽]
|
|
3655
|
+
> async asBlob() {
|
|
3656
|
+
> // TODO: [👨🏻🤝👨🏻] This can be called multiple times BUT when called second time, response in already consumed
|
|
3657
|
+
> const content = await response.blob();
|
|
3658
|
+
> return content;
|
|
3659
|
+
> },
|
|
3660
|
+
*/
|
|
3661
|
+
asJson: function () {
|
|
3662
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
3663
|
+
var content;
|
|
3664
|
+
return __generator(this, function (_a) {
|
|
3665
|
+
switch (_a.label) {
|
|
3666
|
+
case 0: return [4 /*yield*/, response_1.json()];
|
|
3667
|
+
case 1:
|
|
3668
|
+
content = _a.sent();
|
|
3669
|
+
return [2 /*return*/, content];
|
|
3670
|
+
}
|
|
3671
|
+
});
|
|
3672
|
+
});
|
|
3673
|
+
},
|
|
3674
|
+
asText: function () {
|
|
3675
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
3676
|
+
var content;
|
|
3677
|
+
return __generator(this, function (_a) {
|
|
3678
|
+
switch (_a.label) {
|
|
3679
|
+
case 0: return [4 /*yield*/, response_1.text()];
|
|
3680
|
+
case 1:
|
|
3681
|
+
content = _a.sent();
|
|
3682
|
+
return [2 /*return*/, content];
|
|
3683
|
+
}
|
|
3684
|
+
});
|
|
3685
|
+
});
|
|
3686
|
+
},
|
|
3687
|
+
}];
|
|
3688
|
+
}
|
|
3689
|
+
basename = url.split('/').pop() || titleToName(url);
|
|
3637
3690
|
hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
|
|
3638
3691
|
rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
|
|
3639
|
-
filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(
|
|
3692
|
+
filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".").concat(mimeTypeToExtension(mimeType))], false));
|
|
3640
3693
|
return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
|
|
3641
3694
|
case 2:
|
|
3642
3695
|
_l.sent();
|
|
3643
3696
|
_g = (_f = tools.fs).writeFile;
|
|
3644
3697
|
_h = [join(rootDirname_1, filepath)];
|
|
3645
3698
|
_k = (_j = Buffer).from;
|
|
3646
|
-
return [4 /*yield*/,
|
|
3699
|
+
return [4 /*yield*/, response_1.arrayBuffer()];
|
|
3647
3700
|
case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
|
|
3648
3701
|
case 4:
|
|
3649
3702
|
_l.sent();
|
|
3650
|
-
// TODO:
|
|
3703
|
+
// TODO: [💵] Check the file security
|
|
3651
3704
|
// TODO: !!!!!!!! Check the file size (if it is not too big)
|
|
3652
|
-
// TODO: !!!!!!!! Delete the file
|
|
3705
|
+
// TODO: !!!!!!!! Delete the file after the scraping is done
|
|
3653
3706
|
return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
|
|
3654
3707
|
case 5:
|
|
3655
3708
|
if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
|
|
@@ -6268,7 +6321,7 @@ var markitdownScraperMetadata = $deepFreeze({
|
|
|
6268
6321
|
className: 'MarkitdownScraper',
|
|
6269
6322
|
mimeTypes: [
|
|
6270
6323
|
'application/pdf',
|
|
6271
|
-
// TODO: Make priority for scrapers and than allow all mime types here:
|
|
6324
|
+
// TODO: [💵] Make priority for scrapers and than analyze which mime-types can Markitdown scrape and allow all mime types here:
|
|
6272
6325
|
// 'text/html',
|
|
6273
6326
|
// 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
6274
6327
|
],
|