@promptbook/markitdown 0.84.0-13 → 0.84.0-15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +1 -0
- package/README.md +3 -3
- package/esm/index.es.js +24 -12
- package/esm/index.es.js.map +1 -1
- package/esm/typings/src/_packages/core.index.d.ts +4 -0
- package/esm/typings/src/config.d.ts +12 -0
- package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.test.d.ts +1 -0
- package/esm/typings/src/utils/editable/utils/isFlatPipeline.test.d.ts +1 -0
- package/esm/typings/src/utils/files/mimeTypeToExtension.d.ts +10 -0
- package/esm/typings/src/utils/files/mimeTypeToExtension.test.d.ts +1 -0
- package/package.json +3 -15
- package/umd/index.umd.js +23 -11
- package/umd/index.umd.js.map +1 -1
package/LICENSE.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
[Functional Source License, Version 1.1, ALv2 Future License](https://github.com/getsentry/fsl.software/blob/main/FSL-1.1-ALv2.template.md)
|
package/README.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
<!-- ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten -->
|
|
2
2
|
|
|
3
|
-
#
|
|
3
|
+
# ✨ Promptbook
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
|
|
@@ -16,10 +16,10 @@
|
|
|
16
16
|
|
|
17
17
|
## ❄ New Features
|
|
18
18
|
|
|
19
|
+
- 🐋 **Support of [DeepSeek models](https://www.deepseek.com/)**
|
|
19
20
|
- 💙 Working [the **Book** language v1.0.0](https://github.com/webgptorg/book)
|
|
20
21
|
- 🖤 Run books from CLI - `npx ptbk run path/to/your/book`
|
|
21
|
-
- 📚 Support of `.docx`, `.doc` and `.pdf` documents
|
|
22
|
-
- ✨ **Support of [OpenAI o1 model](https://openai.com/o1/)**
|
|
22
|
+
- 📚 Support of `.docx`, `.doc` and `.pdf` documents as knowledge
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
|
package/esm/index.es.js
CHANGED
|
@@ -7,7 +7,7 @@ import { format } from 'prettier';
|
|
|
7
7
|
import parserHtml from 'prettier/parser-html';
|
|
8
8
|
import { forTime } from 'waitasecond';
|
|
9
9
|
import sha256 from 'crypto-js/sha256';
|
|
10
|
-
import { lookup } from 'mime-types';
|
|
10
|
+
import { lookup, extension } from 'mime-types';
|
|
11
11
|
import { unparse, parse } from 'papaparse';
|
|
12
12
|
|
|
13
13
|
// ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten
|
|
@@ -24,7 +24,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
|
|
|
24
24
|
* @generated
|
|
25
25
|
* @see https://github.com/webgptorg/promptbook
|
|
26
26
|
*/
|
|
27
|
-
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-
|
|
27
|
+
var PROMPTBOOK_ENGINE_VERSION = '0.84.0-14';
|
|
28
28
|
/**
|
|
29
29
|
* TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
|
|
30
30
|
* Note: [💞] Ignore a discrepancy between file name and entity name
|
|
@@ -3567,6 +3567,17 @@ function getFileExtension(value) {
|
|
|
3567
3567
|
return match ? match[1].toLowerCase() : null;
|
|
3568
3568
|
}
|
|
3569
3569
|
|
|
3570
|
+
/**
|
|
3571
|
+
* Convert mime type to file extension
|
|
3572
|
+
*
|
|
3573
|
+
* Note: If the mime type is invalid, `null` is returned
|
|
3574
|
+
*
|
|
3575
|
+
* @private within the repository
|
|
3576
|
+
*/
|
|
3577
|
+
function mimeTypeToExtension(value) {
|
|
3578
|
+
return extension(value) || null;
|
|
3579
|
+
}
|
|
3580
|
+
|
|
3570
3581
|
/**
|
|
3571
3582
|
* The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
|
|
3572
3583
|
*
|
|
@@ -3620,7 +3631,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3620
3631
|
case 1:
|
|
3621
3632
|
response_1 = _l.sent();
|
|
3622
3633
|
mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
|
|
3623
|
-
if (tools.fs === undefined || !url.endsWith('.pdf')) {
|
|
3634
|
+
if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [💵] */)) {
|
|
3624
3635
|
return [2 /*return*/, {
|
|
3625
3636
|
source: name,
|
|
3626
3637
|
filename: null,
|
|
@@ -3665,7 +3676,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3665
3676
|
basename = url.split('/').pop() || titleToName(url);
|
|
3666
3677
|
hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
|
|
3667
3678
|
rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
|
|
3668
|
-
filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".
|
|
3679
|
+
filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".").concat(mimeTypeToExtension(mimeType))], false));
|
|
3669
3680
|
return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
|
|
3670
3681
|
case 2:
|
|
3671
3682
|
_l.sent();
|
|
@@ -3676,9 +3687,9 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
|
|
|
3676
3687
|
case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
|
|
3677
3688
|
case 4:
|
|
3678
3689
|
_l.sent();
|
|
3679
|
-
// TODO:
|
|
3690
|
+
// TODO: [💵] Check the file security
|
|
3680
3691
|
// TODO: !!!!!!!! Check the file size (if it is not too big)
|
|
3681
|
-
// TODO: !!!!!!!! Delete the file
|
|
3692
|
+
// TODO: !!!!!!!! Delete the file after the scraping is done
|
|
3682
3693
|
return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
|
|
3683
3694
|
case 5:
|
|
3684
3695
|
if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
|
|
@@ -6297,7 +6308,7 @@ var markitdownScraperMetadata = $deepFreeze({
|
|
|
6297
6308
|
className: 'MarkitdownScraper',
|
|
6298
6309
|
mimeTypes: [
|
|
6299
6310
|
'application/pdf',
|
|
6300
|
-
// TODO: Make priority for scrapers and than allow all mime types here:
|
|
6311
|
+
// TODO: [💵] Make priority for scrapers and than analyze which mime-types can Markitdown scrape and allow all mime types here:
|
|
6301
6312
|
// 'text/html',
|
|
6302
6313
|
// 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
6303
6314
|
],
|
|
@@ -6333,10 +6344,10 @@ var MarkitdownScraper = /** @class */ (function () {
|
|
|
6333
6344
|
this.tools = tools;
|
|
6334
6345
|
this.options = options;
|
|
6335
6346
|
this.markdownScraper = new MarkdownScraper(tools, options);
|
|
6347
|
+
// Note: Module `markitdown-ts` has no types available, so it is imported using `require`
|
|
6336
6348
|
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
6337
6349
|
var MarkItDown = require('markitdown-ts').MarkItDown;
|
|
6338
|
-
// <- TODO:
|
|
6339
|
-
// <- Note: !!!!!!!
|
|
6350
|
+
// <- TODO: 'Use Markitdown directly not through this package
|
|
6340
6351
|
this.markitdown = new MarkItDown();
|
|
6341
6352
|
}
|
|
6342
6353
|
Object.defineProperty(MarkitdownScraper.prototype, "metadata", {
|
|
@@ -6381,12 +6392,12 @@ var MarkitdownScraper = /** @class */ (function () {
|
|
|
6381
6392
|
case 2:
|
|
6382
6393
|
if (!!(_f.sent())) return [3 /*break*/, 5];
|
|
6383
6394
|
src = source.filename || source.url || null;
|
|
6384
|
-
console.log('!!!', { src
|
|
6395
|
+
// console.log('!!!', { src, source, cacheFilehandler });
|
|
6385
6396
|
if (src === null) {
|
|
6386
6397
|
throw new UnexpectedError('Source has no filename or url');
|
|
6387
6398
|
}
|
|
6388
6399
|
return [4 /*yield*/, this.markitdown.convert(src, {
|
|
6389
|
-
// TODO:
|
|
6400
|
+
// TODO: Pass when sacraping Youtube
|
|
6390
6401
|
// enableYoutubeTranscript: true,
|
|
6391
6402
|
// youtubeTranscriptLanguage: 'en',
|
|
6392
6403
|
})];
|
|
@@ -6396,9 +6407,10 @@ var MarkitdownScraper = /** @class */ (function () {
|
|
|
6396
6407
|
throw new Error("Markitdown could not convert the \"".concat(source.source, "\""));
|
|
6397
6408
|
// <- TODO: !!! Make MarkitdownError
|
|
6398
6409
|
}
|
|
6399
|
-
console.log('!!!', { result
|
|
6410
|
+
// console.log('!!!', { result, cacheFilehandler });
|
|
6400
6411
|
return [4 /*yield*/, this.tools.fs.writeFile(cacheFilehandler.filename, result.text_content)];
|
|
6401
6412
|
case 4:
|
|
6413
|
+
// console.log('!!!', { result, cacheFilehandler });
|
|
6402
6414
|
_f.sent();
|
|
6403
6415
|
_f.label = 5;
|
|
6404
6416
|
case 5: return [2 /*return*/, cacheFilehandler];
|