@promptbook/pdf 0.84.0-13 β 0.84.0-15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +1 -0
- package/README.md +3 -3
- package/esm/index.es.js +24 -12
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +4 -0
- package/esm/typings/src/config.d.ts +12 -0
- package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.test.d.ts +1 -0
- package/esm/typings/src/utils/editable/utils/isFlatPipeline.test.d.ts +1 -0
- package/esm/typings/src/utils/files/mimeTypeToExtension.d.ts +10 -0
- package/esm/typings/src/utils/files/mimeTypeToExtension.test.d.ts +1 -0
- package/package.json +3 -15
- package/umd/index.umd.js +23 -11
- package/umd/index.umd.js.map +1 -1
|
@@ -12,6 +12,8 @@ import { LOGO_LIGHT_SRC } from '../config';
|
|
|
12
12
|
import { LOGO_DARK_SRC } from '../config';
|
|
13
13
|
import { DEFAULT_BOOK_TITLE } from '../config';
|
|
14
14
|
import { DEFAULT_TASK_TITLE } from '../config';
|
|
15
|
+
import { DEFAULT_PROMPT_TASK_TITLE } from '../config';
|
|
16
|
+
import { DEFAULT_BOOK_OUTPUT_PARAMETER_NAME } from '../config';
|
|
15
17
|
import { MAX_FILENAME_LENGTH } from '../config';
|
|
16
18
|
import { DEFAULT_INTERMEDIATE_FILES_STRATEGY } from '../config';
|
|
17
19
|
import { DEFAULT_MAX_PARALLEL_COUNT } from '../config';
|
|
@@ -139,6 +141,8 @@ export { LOGO_LIGHT_SRC };
|
|
|
139
141
|
export { LOGO_DARK_SRC };
|
|
140
142
|
export { DEFAULT_BOOK_TITLE };
|
|
141
143
|
export { DEFAULT_TASK_TITLE };
|
|
144
|
+
export { DEFAULT_PROMPT_TASK_TITLE };
|
|
145
|
+
export { DEFAULT_BOOK_OUTPUT_PARAMETER_NAME };
|
|
142
146
|
export { MAX_FILENAME_LENGTH };
|
|
143
147
|
export { DEFAULT_INTERMEDIATE_FILES_STRATEGY };
|
|
144
148
|
export { DEFAULT_MAX_PARALLEL_COUNT };
|
|
@@ -65,6 +65,18 @@ export declare const DEFAULT_BOOK_TITLE = "\u2728 Untitled Book";
|
|
|
65
65
|
* @public exported from `@promptbook/core`
|
|
66
66
|
*/
|
|
67
67
|
export declare const DEFAULT_TASK_TITLE = "Task";
|
|
68
|
+
/**
|
|
69
|
+
* When the title of the prompt task is not provided, the default title is used
|
|
70
|
+
*
|
|
71
|
+
* @public exported from `@promptbook/core`
|
|
72
|
+
*/
|
|
73
|
+
export declare const DEFAULT_PROMPT_TASK_TITLE = "Prompt";
|
|
74
|
+
/**
|
|
75
|
+
* When the pipeline is flat and no name of return parameter is provided, this name is used
|
|
76
|
+
*
|
|
77
|
+
* @public exported from `@promptbook/core`
|
|
78
|
+
*/
|
|
79
|
+
export declare const DEFAULT_BOOK_OUTPUT_PARAMETER_NAME = "result";
|
|
68
80
|
/**
|
|
69
81
|
* Warning message for the generated sections and files files
|
|
70
82
|
*
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { string_file_extension } from '../../types/typeAliases';
|
|
2
|
+
import type { string_mime_type } from '../../types/typeAliases';
|
|
3
|
+
/**
|
|
4
|
+
* Convert mime type to file extension
|
|
5
|
+
*
|
|
6
|
+
* Note: If the mime type is invalid, `null` is returned
|
|
7
|
+
*
|
|
8
|
+
* @private within the repository
|
|
9
|
+
*/
|
|
10
|
+
export declare function mimeTypeToExtension(value: string_mime_type): string_file_extension | null;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/package.json
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@promptbook/pdf",
|
|
3
|
-
"version": "0.84.0-
|
|
3
|
+
"version": "0.84.0-15",
|
|
4
4
|
"description": "It's time for a paradigm shift. The future of software in plain English, French or Latin",
|
|
5
|
-
"--note-0": " <- [π]",
|
|
6
5
|
"private": false,
|
|
7
6
|
"sideEffects": false,
|
|
8
7
|
"repository": {
|
|
@@ -12,7 +11,6 @@
|
|
|
12
11
|
"contributors": [
|
|
13
12
|
"Pavol HejnΓ½ <pavol@ptbk.io> (https://www.pavolhejny.com/)"
|
|
14
13
|
],
|
|
15
|
-
"--todo-0": "TODO: [βοΈ] Make better list of keywords",
|
|
16
14
|
"keywords": [
|
|
17
15
|
"ai",
|
|
18
16
|
"llm",
|
|
@@ -35,26 +33,16 @@
|
|
|
35
33
|
"anthropic",
|
|
36
34
|
"LLMOps"
|
|
37
35
|
],
|
|
38
|
-
"license": "
|
|
36
|
+
"license": "SEE LICENSE IN LICENSE.md",
|
|
39
37
|
"bugs": {
|
|
40
38
|
"url": "https://github.com/webgptorg/promptbook/issues"
|
|
41
39
|
},
|
|
42
40
|
"homepage": "https://www.npmjs.com/package/@promptbook/core",
|
|
43
|
-
"funding": [
|
|
44
|
-
{
|
|
45
|
-
"type": "individual",
|
|
46
|
-
"url": "https://buymeacoffee.com/hejny"
|
|
47
|
-
},
|
|
48
|
-
{
|
|
49
|
-
"type": "github",
|
|
50
|
-
"url": "https://github.com/webgptorg/promptbook/blob/main/README.md#%EF%B8%8F-contributing"
|
|
51
|
-
}
|
|
52
|
-
],
|
|
53
41
|
"main": "./umd/index.umd.js",
|
|
54
42
|
"module": "./esm/index.es.js",
|
|
55
43
|
"typings": "./esm/typings/src/_packages/pdf.index.d.ts",
|
|
56
44
|
"peerDependencies": {
|
|
57
|
-
"@promptbook/core": "0.84.0-
|
|
45
|
+
"@promptbook/core": "0.84.0-15"
|
|
58
46
|
},
|
|
59
47
|
"dependencies": {
|
|
60
48
|
"crypto-js": "4.2.0",
|
package/umd/index.umd.js
CHANGED
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
* @generated
|
|
26
26
|
* @see https://github.com/webgptorg/promptbook
|
|
27
27
|
*/
|
|
28
|
-
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-
|
|
28
|
+
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-14';
|
|
29
29
|
/**
|
|
30
30
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
31
31
|
* Note: [π] Ignore a discrepancy between file name and entity name
|
|
@@ -3581,6 +3581,17 @@
|
|
|
3581
3581
|
return match ? match[1].toLowerCase() : null;
|
|
3582
3582
|
}
|
|
3583
3583
|
|
|
3584
|
+
/**
|
|
3585
|
+
* Convert mime type to file extension
|
|
3586
|
+
*
|
|
3587
|
+
* Note: If the mime type is invalid, `null` is returned
|
|
3588
|
+
*
|
|
3589
|
+
* @private within the repository
|
|
3590
|
+
*/
|
|
3591
|
+
function mimeTypeToExtension(value) {
|
|
3592
|
+
return mimeTypes.extension(value) || null;
|
|
3593
|
+
}
|
|
3594
|
+
|
|
3584
3595
|
/**
|
|
3585
3596
|
* The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
|
|
3586
3597
|
*
|
|
@@ -3634,7 +3645,7 @@
|
|
|
3634
3645
|
case 1:
|
|
3635
3646
|
response_1 = _l.sent();
|
|
3636
3647
|
mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
|
|
3637
|
-
if (tools.fs === undefined || !url.endsWith('.pdf')) {
|
|
3648
|
+
if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [π΅] */)) {
|
|
3638
3649
|
return [2 /*return*/, {
|
|
3639
3650
|
source: name,
|
|
3640
3651
|
filename: null,
|
|
@@ -3679,7 +3690,7 @@
|
|
|
3679
3690
|
basename = url.split('/').pop() || titleToName(url);
|
|
3680
3691
|
hash = sha256__default["default"](hexEncoder__default["default"].parse(url)).toString( /* hex */);
|
|
3681
3692
|
rootDirname_1 = path.join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
|
|
3682
|
-
filepath = path.join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [π] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".
|
|
3693
|
+
filepath = path.join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [π] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".").concat(mimeTypeToExtension(mimeType))], false));
|
|
3683
3694
|
return [4 /*yield*/, tools.fs.mkdir(path.dirname(path.join(rootDirname_1, filepath)), { recursive: true })];
|
|
3684
3695
|
case 2:
|
|
3685
3696
|
_l.sent();
|
|
@@ -3690,9 +3701,9 @@
|
|
|
3690
3701
|
case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
|
|
3691
3702
|
case 4:
|
|
3692
3703
|
_l.sent();
|
|
3693
|
-
// TODO:
|
|
3704
|
+
// TODO: [π΅] Check the file security
|
|
3694
3705
|
// TODO: !!!!!!!! Check the file size (if it is not too big)
|
|
3695
|
-
// TODO: !!!!!!!! Delete the file
|
|
3706
|
+
// TODO: !!!!!!!! Delete the file after the scraping is done
|
|
3696
3707
|
return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
|
|
3697
3708
|
case 5:
|
|
3698
3709
|
if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
|
|
@@ -6311,7 +6322,7 @@
|
|
|
6311
6322
|
className: 'MarkitdownScraper',
|
|
6312
6323
|
mimeTypes: [
|
|
6313
6324
|
'application/pdf',
|
|
6314
|
-
// TODO: Make priority for scrapers and than allow all mime types here:
|
|
6325
|
+
// TODO: [π΅] Make priority for scrapers and than analyze which mime-types can Markitdown scrape and allow all mime types here:
|
|
6315
6326
|
// 'text/html',
|
|
6316
6327
|
// 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
6317
6328
|
],
|
|
@@ -6347,10 +6358,10 @@
|
|
|
6347
6358
|
this.tools = tools;
|
|
6348
6359
|
this.options = options;
|
|
6349
6360
|
this.markdownScraper = new MarkdownScraper(tools, options);
|
|
6361
|
+
// Note: Module `markitdown-ts` has no types available, so it is imported using `require`
|
|
6350
6362
|
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
6351
6363
|
var MarkItDown = require('markitdown-ts').MarkItDown;
|
|
6352
|
-
// <- TODO:
|
|
6353
|
-
// <- Note: !!!!!!!
|
|
6364
|
+
// <- TODO: 'Use Markitdown directly not through this package
|
|
6354
6365
|
this.markitdown = new MarkItDown();
|
|
6355
6366
|
}
|
|
6356
6367
|
Object.defineProperty(MarkitdownScraper.prototype, "metadata", {
|
|
@@ -6395,12 +6406,12 @@
|
|
|
6395
6406
|
case 2:
|
|
6396
6407
|
if (!!(_f.sent())) return [3 /*break*/, 5];
|
|
6397
6408
|
src = source.filename || source.url || null;
|
|
6398
|
-
console.log('!!!', { src
|
|
6409
|
+
// console.log('!!!', { src, source, cacheFilehandler });
|
|
6399
6410
|
if (src === null) {
|
|
6400
6411
|
throw new UnexpectedError('Source has no filename or url');
|
|
6401
6412
|
}
|
|
6402
6413
|
return [4 /*yield*/, this.markitdown.convert(src, {
|
|
6403
|
-
// TODO:
|
|
6414
|
+
// TODO: Pass when sacraping Youtube
|
|
6404
6415
|
// enableYoutubeTranscript: true,
|
|
6405
6416
|
// youtubeTranscriptLanguage: 'en',
|
|
6406
6417
|
})];
|
|
@@ -6410,9 +6421,10 @@
|
|
|
6410
6421
|
throw new Error("Markitdown could not convert the \"".concat(source.source, "\""));
|
|
6411
6422
|
// <- TODO: !!! Make MarkitdownError
|
|
6412
6423
|
}
|
|
6413
|
-
console.log('!!!', { result
|
|
6424
|
+
// console.log('!!!', { result, cacheFilehandler });
|
|
6414
6425
|
return [4 /*yield*/, this.tools.fs.writeFile(cacheFilehandler.filename, result.text_content)];
|
|
6415
6426
|
case 4:
|
|
6427
|
+
// console.log('!!!', { result, cacheFilehandler });
|
|
6416
6428
|
_f.sent();
|
|
6417
6429
|
_f.label = 5;
|
|
6418
6430
|
case 5: return [2 /*return*/, cacheFilehandler];
|