@promptbook/pdf 0.84.0-12 → 0.84.0-14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/esm/index.es.js +64 -11
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +4 -0
- package/esm/typings/src/config.d.ts +12 -0
- package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.test.d.ts +1 -0
- package/esm/typings/src/utils/editable/utils/isFlatPipeline.test.d.ts +1 -0
- package/esm/typings/src/utils/files/mimeTypeToExtension.d.ts +10 -0
- package/esm/typings/src/utils/files/mimeTypeToExtension.test.d.ts +1 -0
- package/package.json +2 -2
- package/umd/index.umd.js +63 -10
- package/umd/index.umd.js.map +1 -1
|
@@ -12,6 +12,8 @@ import { LOGO_LIGHT_SRC } from '../config';
|
|
|
12
12
|
import { LOGO_DARK_SRC } from '../config';
|
|
13
13
|
import { DEFAULT_BOOK_TITLE } from '../config';
|
|
14
14
|
import { DEFAULT_TASK_TITLE } from '../config';
|
|
15
|
+
import { DEFAULT_PROMPT_TASK_TITLE } from '../config';
|
|
16
|
+
import { DEFAULT_BOOK_OUTPUT_PARAMETER_NAME } from '../config';
|
|
15
17
|
import { MAX_FILENAME_LENGTH } from '../config';
|
|
16
18
|
import { DEFAULT_INTERMEDIATE_FILES_STRATEGY } from '../config';
|
|
17
19
|
import { DEFAULT_MAX_PARALLEL_COUNT } from '../config';
|
|
@@ -139,6 +141,8 @@ export { LOGO_LIGHT_SRC };
|
|
|
139
141
|
export { LOGO_DARK_SRC };
|
|
140
142
|
export { DEFAULT_BOOK_TITLE };
|
|
141
143
|
export { DEFAULT_TASK_TITLE };
|
|
144
|
+
export { DEFAULT_PROMPT_TASK_TITLE };
|
|
145
|
+
export { DEFAULT_BOOK_OUTPUT_PARAMETER_NAME };
|
|
142
146
|
export { MAX_FILENAME_LENGTH };
|
|
143
147
|
export { DEFAULT_INTERMEDIATE_FILES_STRATEGY };
|
|
144
148
|
export { DEFAULT_MAX_PARALLEL_COUNT };
|
|
@@ -65,6 +65,18 @@ export declare const DEFAULT_BOOK_TITLE = "\u2728 Untitled Book";
|
|
|
65
65
|
* @public exported from `@promptbook/core`
|
|
66
66
|
*/
|
|
67
67
|
export declare const DEFAULT_TASK_TITLE = "Task";
|
|
68
|
+
/**
|
|
69
|
+
* When the title of the prompt task is not provided, the default title is used
|
|
70
|
+
*
|
|
71
|
+
* @public exported from `@promptbook/core`
|
|
72
|
+
*/
|
|
73
|
+
export declare const DEFAULT_PROMPT_TASK_TITLE = "Prompt";
|
|
74
|
+
/**
|
|
75
|
+
* When the pipeline is flat and no name of return parameter is provided, this name is used
|
|
76
|
+
*
|
|
77
|
+
* @public exported from `@promptbook/core`
|
|
78
|
+
*/
|
|
79
|
+
export declare const DEFAULT_BOOK_OUTPUT_PARAMETER_NAME = "result";
|
|
68
80
|
/**
|
|
69
81
|
* Warning message for the generated sections and files files
|
|
70
82
|
*
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { string_file_extension } from '../../types/typeAliases';
|
|
2
|
+
import type { string_mime_type } from '../../types/typeAliases';
|
|
3
|
+
/**
|
|
4
|
+
* Convert mime type to file extension
|
|
5
|
+
*
|
|
6
|
+
* Note: If the mime type is invalid, `null` is returned
|
|
7
|
+
*
|
|
8
|
+
* @private within the repository
|
|
9
|
+
*/
|
|
10
|
+
export declare function mimeTypeToExtension(value: string_mime_type): string_file_extension | null;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@promptbook/pdf",
|
|
3
|
-
"version": "0.84.0-
|
|
3
|
+
"version": "0.84.0-14",
|
|
4
4
|
"description": "It's time for a paradigm shift. The future of software in plain English, French or Latin",
|
|
5
5
|
"--note-0": " <- [🐊]",
|
|
6
6
|
"private": false,
|
|
@@ -54,7 +54,7 @@
|
|
|
54
54
|
"module": "./esm/index.es.js",
|
|
55
55
|
"typings": "./esm/typings/src/_packages/pdf.index.d.ts",
|
|
56
56
|
"peerDependencies": {
|
|
57
|
-
"@promptbook/core": "0.84.0-
|
|
57
|
+
"@promptbook/core": "0.84.0-14"
|
|
58
58
|
},
|
|
59
59
|
"dependencies": {
|
|
60
60
|
"crypto-js": "4.2.0",
|
package/umd/index.umd.js
CHANGED
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
* @generated
|
|
26
26
|
* @see https://github.com/webgptorg/promptbook
|
|
27
27
|
*/
|
|
28
|
-
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-
|
|
28
|
+
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-13';
|
|
29
29
|
/**
|
|
30
30
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
31
31
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -3581,6 +3581,17 @@
|
|
|
3581
3581
|
return match ? match[1].toLowerCase() : null;
|
|
3582
3582
|
}
|
|
3583
3583
|
|
|
3584
|
+
/**
|
|
3585
|
+
* Convert mime type to file extension
|
|
3586
|
+
*
|
|
3587
|
+
* Note: If the mime type is invalid, `null` is returned
|
|
3588
|
+
*
|
|
3589
|
+
* @private within the repository
|
|
3590
|
+
*/
|
|
3591
|
+
function mimeTypeToExtension(value) {
|
|
3592
|
+
return mimeTypes.extension(value) || null;
|
|
3593
|
+
}
|
|
3594
|
+
|
|
3584
3595
|
/**
|
|
3585
3596
|
* The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
|
|
3586
3597
|
*
|
|
@@ -3616,7 +3627,7 @@
|
|
|
3616
3627
|
function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
3617
3628
|
var _a;
|
|
3618
3629
|
return __awaiter(this, void 0, void 0, function () {
|
|
3619
|
-
var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url,
|
|
3630
|
+
var _b, fetch, knowledgeSourceContent, name, _c, _d, rootDirname, url, response_1, mimeType, basename, hash, rootDirname_1, filepath, _f, _g, _h, _j, _k, filename_1, fileExtension, mimeType;
|
|
3620
3631
|
return __generator(this, function (_l) {
|
|
3621
3632
|
switch (_l.label) {
|
|
3622
3633
|
case 0:
|
|
@@ -3632,25 +3643,67 @@
|
|
|
3632
3643
|
url = knowledgeSourceContent;
|
|
3633
3644
|
return [4 /*yield*/, fetch(url)];
|
|
3634
3645
|
case 1:
|
|
3635
|
-
|
|
3636
|
-
mimeType = ((_a =
|
|
3637
|
-
|
|
3646
|
+
response_1 = _l.sent();
|
|
3647
|
+
mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
|
|
3648
|
+
if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [💵] */)) {
|
|
3649
|
+
return [2 /*return*/, {
|
|
3650
|
+
source: name,
|
|
3651
|
+
filename: null,
|
|
3652
|
+
url: url,
|
|
3653
|
+
mimeType: mimeType,
|
|
3654
|
+
/*
|
|
3655
|
+
TODO: [🥽]
|
|
3656
|
+
> async asBlob() {
|
|
3657
|
+
> // TODO: [👨🏻🤝👨🏻] This can be called multiple times BUT when called second time, response in already consumed
|
|
3658
|
+
> const content = await response.blob();
|
|
3659
|
+
> return content;
|
|
3660
|
+
> },
|
|
3661
|
+
*/
|
|
3662
|
+
asJson: function () {
|
|
3663
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
3664
|
+
var content;
|
|
3665
|
+
return __generator(this, function (_a) {
|
|
3666
|
+
switch (_a.label) {
|
|
3667
|
+
case 0: return [4 /*yield*/, response_1.json()];
|
|
3668
|
+
case 1:
|
|
3669
|
+
content = _a.sent();
|
|
3670
|
+
return [2 /*return*/, content];
|
|
3671
|
+
}
|
|
3672
|
+
});
|
|
3673
|
+
});
|
|
3674
|
+
},
|
|
3675
|
+
asText: function () {
|
|
3676
|
+
return __awaiter(this, void 0, void 0, function () {
|
|
3677
|
+
var content;
|
|
3678
|
+
return __generator(this, function (_a) {
|
|
3679
|
+
switch (_a.label) {
|
|
3680
|
+
case 0: return [4 /*yield*/, response_1.text()];
|
|
3681
|
+
case 1:
|
|
3682
|
+
content = _a.sent();
|
|
3683
|
+
return [2 /*return*/, content];
|
|
3684
|
+
}
|
|
3685
|
+
});
|
|
3686
|
+
});
|
|
3687
|
+
},
|
|
3688
|
+
}];
|
|
3689
|
+
}
|
|
3690
|
+
basename = url.split('/').pop() || titleToName(url);
|
|
3638
3691
|
hash = sha256__default["default"](hexEncoder__default["default"].parse(url)).toString( /* hex */);
|
|
3639
3692
|
rootDirname_1 = path.join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
|
|
3640
|
-
filepath = path.join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(
|
|
3693
|
+
filepath = path.join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".").concat(mimeTypeToExtension(mimeType))], false));
|
|
3641
3694
|
return [4 /*yield*/, tools.fs.mkdir(path.dirname(path.join(rootDirname_1, filepath)), { recursive: true })];
|
|
3642
3695
|
case 2:
|
|
3643
3696
|
_l.sent();
|
|
3644
3697
|
_g = (_f = tools.fs).writeFile;
|
|
3645
3698
|
_h = [path.join(rootDirname_1, filepath)];
|
|
3646
3699
|
_k = (_j = Buffer).from;
|
|
3647
|
-
return [4 /*yield*/,
|
|
3700
|
+
return [4 /*yield*/, response_1.arrayBuffer()];
|
|
3648
3701
|
case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
|
|
3649
3702
|
case 4:
|
|
3650
3703
|
_l.sent();
|
|
3651
|
-
// TODO:
|
|
3704
|
+
// TODO: [💵] Check the file security
|
|
3652
3705
|
// TODO: !!!!!!!! Check the file size (if it is not too big)
|
|
3653
|
-
// TODO: !!!!!!!! Delete the file
|
|
3706
|
+
// TODO: !!!!!!!! Delete the file after the scraping is done
|
|
3654
3707
|
return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
|
|
3655
3708
|
case 5:
|
|
3656
3709
|
if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
|
|
@@ -6269,7 +6322,7 @@
|
|
|
6269
6322
|
className: 'MarkitdownScraper',
|
|
6270
6323
|
mimeTypes: [
|
|
6271
6324
|
'application/pdf',
|
|
6272
|
-
// TODO: Make priority for scrapers and than allow all mime types here:
|
|
6325
|
+
// TODO: [💵] Make priority for scrapers and than analyze which mime-types can Markitdown scrape and allow all mime types here:
|
|
6273
6326
|
// 'text/html',
|
|
6274
6327
|
// 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
6275
6328
|
],
|