npm - @promptbook/markitdown - Versions diffs - 0.84.0-13 → 0.84.0-15 - Mend

@promptbook/markitdown 0.84.0-13 → 0.84.0-15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/LICENSE.md +1 -0
package/README.md +3 -3
package/esm/index.es.js +24 -12
package/esm/index.es.js.map +1 -1
package/esm/typings/src/_packages/core.index.d.ts +4 -0
package/esm/typings/src/config.d.ts +12 -0
package/esm/typings/src/utils/editable/edit-pipeline-string/deflatePipeline.test.d.ts +1 -0
package/esm/typings/src/utils/editable/utils/isFlatPipeline.test.d.ts +1 -0
package/esm/typings/src/utils/files/mimeTypeToExtension.d.ts +10 -0
package/esm/typings/src/utils/files/mimeTypeToExtension.test.d.ts +1 -0
package/package.json +3 -15
package/umd/index.umd.js +23 -11
package/umd/index.umd.js.map +1 -1

package/LICENSE.md ADDED Viewed

	@@ -0,0 +1 @@
1	+ [Functional Source License, Version 1.1, ALv2 Future License](https://github.com/getsentry/fsl.software/blob/main/FSL-1.1-ALv2.template.md)

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 <!-- ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten -->
-# ❄ Promptbook
+# ✨ Promptbook
@@ -16,10 +16,10 @@
 ## ❄ New Features
+-   🐋 **Support of [DeepSeek models](https://www.deepseek.com/)**
 -   💙 Working [the **Book** language v1.0.0](https://github.com/webgptorg/book)
 -   🖤 Run books from CLI - `npx ptbk run path/to/your/book`
--   📚 Support of `.docx`, `.doc` and `.pdf` documents
--   ✨ **Support of [OpenAI o1 model](https://openai.com/o1/)**
+-   📚 Support of `.docx`, `.doc` and `.pdf` documents as knowledge

package/esm/index.es.js CHANGED Viewed

@@ -7,7 +7,7 @@ import { format } from 'prettier';
 import parserHtml from 'prettier/parser-html';
 import { forTime } from 'waitasecond';
 import sha256 from 'crypto-js/sha256';
-import { lookup } from 'mime-types';
+import { lookup, extension } from 'mime-types';
 import { unparse, parse } from 'papaparse';
 // ⚠️ WARNING: This code has been generated so that any manual changes will be overwritten
@@ -24,7 +24,7 @@ var BOOK_LANGUAGE_VERSION = '1.0.0';
  * @generated
  * @see https://github.com/webgptorg/promptbook
  */
-var PROMPTBOOK_ENGINE_VERSION = '0.84.0-12';
+var PROMPTBOOK_ENGINE_VERSION = '0.84.0-14';
 /**
  * TODO: string_promptbook_version should be constrained to the all versions of Promptbook engine
  * Note: [💞] Ignore a discrepancy between file name and entity name
@@ -3567,6 +3567,17 @@ function getFileExtension(value) {
     return match ? match[1].toLowerCase() : null;
 }
+/**
+ * Convert mime type to file extension
+ *
+ * Note: If the mime type is invalid, `null` is returned
+ *
+ * @private within the repository
+ */
+function mimeTypeToExtension(value) {
+    return extension(value) || null;
+}
 /**
  * The built-in `fetch' function with a lightweight error handling wrapper as default fetch function used in Promptbook scrapers
  *
@@ -3620,7 +3631,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
                 case 1:
                     response_1 = _l.sent();
                     mimeType = ((_a = response_1.headers.get('content-type')) === null || _a === void 0 ? void 0 : _a.split(';')[0]) || 'text/html';
-                    if (tools.fs === undefined || !url.endsWith('.pdf')) {
+                    if (tools.fs === undefined || !url.endsWith('.pdf' /* <- TODO: [💵] */)) {
                         return [2 /*return*/, {
                                 source: name,
                                 filename: null,
@@ -3665,7 +3676,7 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
                     basename = url.split('/').pop() || titleToName(url);
                     hash = sha256(hexEncoder.parse(url)).toString( /* hex */);
                     rootDirname_1 = join(process.cwd(), DEFAULT_DOWNLOAD_CACHE_DIRNAME);
-                    filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".pdf")], false));
+                    filepath = join.apply(void 0, __spreadArray(__spreadArray([], __read(nameToSubfolderPath(hash /* <- TODO: [🎎] Maybe add some SHA256 prefix */)), false), ["".concat(basename.substring(0, MAX_FILENAME_LENGTH), ".").concat(mimeTypeToExtension(mimeType))], false));
                     return [4 /*yield*/, tools.fs.mkdir(dirname(join(rootDirname_1, filepath)), { recursive: true })];
                 case 2:
                     _l.sent();
@@ -3676,9 +3687,9 @@ function makeKnowledgeSourceHandler(knowledgeSource, tools, options) {
                 case 3: return [4 /*yield*/, _g.apply(_f, _h.concat([_k.apply(_j, [_l.sent()])]))];
                 case 4:
                     _l.sent();
-                    // TODO: !!!!!!!! Check the file security
+                    // TODO: [💵] Check the file security
                     // TODO: !!!!!!!! Check the file size (if it is not too big)
-                    // TODO: !!!!!!!! Delete the file
+                    // TODO: !!!!!!!! Delete the file after the scraping is done
                     return [2 /*return*/, makeKnowledgeSourceHandler({ name: name, knowledgeSourceContent: filepath }, tools, __assign(__assign({}, options), { rootDirname: rootDirname_1 }))];
                 case 5:
                     if (!isValidFilePath(knowledgeSourceContent)) return [3 /*break*/, 7];
@@ -6297,7 +6308,7 @@ var markitdownScraperMetadata = $deepFreeze({
     className: 'MarkitdownScraper',
     mimeTypes: [
         'application/pdf',
-        // TODO: Make priority for scrapers and than allow all mime types here:
+        // TODO: [💵] Make priority for scrapers and than analyze which mime-types can Markitdown scrape and allow all mime types here:
         // 'text/html',
         // 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
     ],
@@ -6333,10 +6344,10 @@ var MarkitdownScraper = /** @class */ (function () {
         this.tools = tools;
         this.options = options;
         this.markdownScraper = new MarkdownScraper(tools, options);
+        // Note: Module `markitdown-ts` has no types available, so it is imported using `require`
         // eslint-disable-next-line @typescript-eslint/no-var-requires
         var MarkItDown = require('markitdown-ts').MarkItDown;
-        // <- TODO: !!! Use Markitdown directly not through this package
-        // <- Note: !!!!!!!
+        // <- TODO: 'Use Markitdown directly not through this package
         this.markitdown = new MarkItDown();
     }
     Object.defineProperty(MarkitdownScraper.prototype, "metadata", {
@@ -6381,12 +6392,12 @@ var MarkitdownScraper = /** @class */ (function () {
                     case 2:
                         if (!!(_f.sent())) return [3 /*break*/, 5];
                         src = source.filename || source.url || null;
-                        console.log('!!!', { src: src, source: source, cacheFilehandler: cacheFilehandler });
+                        // console.log('!!!', { src, source, cacheFilehandler });
                         if (src === null) {
                             throw new UnexpectedError('Source has no filename or url');
                         }
                         return [4 /*yield*/, this.markitdown.convert(src, {
-                            // TODO: !!!!!! Pass when sacraping Youtube
+                            // TODO: Pass when sacraping Youtube
                             // enableYoutubeTranscript: true,
                             // youtubeTranscriptLanguage: 'en',
                             })];
@@ -6396,9 +6407,10 @@ var MarkitdownScraper = /** @class */ (function () {
                             throw new Error("Markitdown could not convert the \"".concat(source.source, "\""));
                             // <- TODO: !!! Make MarkitdownError
                         }
-                        console.log('!!!', { result: result, cacheFilehandler: cacheFilehandler });
+                        // console.log('!!!', { result, cacheFilehandler });
                         return [4 /*yield*/, this.tools.fs.writeFile(cacheFilehandler.filename, result.text_content)];
                     case 4:
+                        // console.log('!!!', { result, cacheFilehandler });
                         _f.sent();
                         _f.label = 5;
                     case 5: return [2 /*return*/, cacheFilehandler];