npm - markitdown-ts - Versions diffs - 0.0.6 → 0.0.7 - Mend

markitdown-ts 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -50,7 +50,7 @@ try {
   const result = await markitdown.convert("https://arxiv.org/pdf/2308.08155v2.pdf");
   if (result) {
-    console.log(result.text_content);
+    console.log(result.markdown);
   }
 } catch (error) {
   console.error("Conversion failed:", error);
@@ -158,6 +158,8 @@ class MarkItDown {
 export type ConverterResult =
   | {
       title: string | null;
+      markdown: string;
+      /** @deprecated Use `markdown` instead. */
       text_content: string;
     }
   | null

package/dist/index.cjs CHANGED Viewed

@@ -63,6 +63,7 @@ class PlainTextConverter {
     }
     return {
       title: null,
+      markdown: content,
       text_content: content
     };
   }
@@ -176,6 +177,7 @@ class HtmlConverter {
     }
     return {
       title: doc.title,
+      markdown: webpageText,
       text_content: webpageText
     };
   }
@@ -239,10 +241,7 @@ class RSSConverter {
           mdText += this._parseContent(entryContent);
         }
       }
-      return {
-        title,
-        text_content: mdText
-      };
+      return { title, markdown: mdText, text_content: mdText };
     } catch (error) {
       console.error("Atom Parsing Error:", error);
       return null;
@@ -290,10 +289,7 @@ class RSSConverter {
           mdText += this._parseContent(content);
         }
       }
-      return {
-        title: channelTitle,
-        text_content: mdText
-      };
+      return { title: channelTitle, markdown: mdText, text_content: mdText };
     } catch (error) {
       console.error("RSS Parsing Error:", error);
       return null;
@@ -363,10 +359,7 @@ class WikipediaConverter {
     } else {
       webpageText = new CustomTurnDown().convert_soup(doc);
     }
-    return {
-      title: mainTitle,
-      text_content: webpageText
-    };
+    return { title: mainTitle, markdown: webpageText, text_content: webpageText };
   }
 }
@@ -492,10 +485,7 @@ ${transcriptText}
       }
     }
     const finalTitle = title ? title : doc.title;
-    return {
-      title: finalTitle,
-      text_content: webpageText
-    };
+    return { title: finalTitle, markdown: webpageText, text_content: webpageText };
   }
   _get(metadata, keys, default_value) {
     for (const k of keys) {
@@ -573,10 +563,7 @@ ${sourceLines.join("")}
       }
       const mdText = mdOutput.join("\n\n");
       title = notebookContent.metadata?.title || title;
-      return {
-        title,
-        text_content: mdText
-      };
+      return { title, markdown: mdText, text_content: mdText };
     } catch (e) {
       console.error("Error converting .ipynb file:", e);
       throw new Error(`Error converting .ipynb file: ${e}`);
@@ -637,10 +624,7 @@ class BingSerpConverter {
     const webpageText = `## A Bing search for '${query}' found the following results:
 ${results.join("\n\n")}`;
-    return {
-      title: doc.title,
-      text_content: webpageText
-    };
+    return { title: doc.title, markdown: webpageText, text_content: webpageText };
   }
   _decodeBase64Url(encodedUrl) {
     let u = encodedUrl.slice(2).trim() + "==";
@@ -671,10 +655,7 @@ class PdfConverter {
   async _convert(pdfContent) {
     try {
       const textContent = await pdfTs.pdfToText(pdfContent);
-      return {
-        title: null,
-        text_content: textContent
-      };
+      return { title: null, markdown: textContent, text_content: textContent };
     } catch (error) {
       console.error("PDF Parsing Error:", error);
       return null;
@@ -730,12 +711,9 @@ class XlsxConverter extends HtmlConverter {
         mdContent += `## ${sheetName}
 `;
         let htmlContent = XLSX__namespace.utils.sheet_to_html(workbook.Sheets[sheetName]);
-        mdContent += (await this._convert(htmlContent))?.text_content.trim() + "\n\n";
+        mdContent += (await this._convert(htmlContent))?.markdown.trim() + "\n\n";
       }
-      return {
-        title: workbook?.Props?.Title || "Untitled",
-        text_content: mdContent
-      };
+      return { title: workbook?.Props?.Title || "Untitled", markdown: mdContent, text_content: mdContent };
     } catch (e) {
       console.error(e);
       return null;
@@ -825,10 +803,7 @@ ${transcript === "" ? "[No speech detected]" : transcript}`;
     } else {
       mdContent += "\n\n### Audio Transcript:\n[Audio transcription is not supported for Buffer inputs in this version.]";
     }
-    return {
-      title: null,
-      text_content: mdContent.trim()
-    };
+    return { title: null, markdown: mdContent.trim(), text_content: mdContent.trim() };
   }
   // TODO: Add speech to text
   async _transcribeAudio(_) {
@@ -895,10 +870,7 @@ ${transcript == "" ? "[No speech detected]" : transcript}`;
     } else {
       mdContent += "\n\n### Audio Transcript:\n[Audio conversion and transcription are not supported for Buffer inputs.]";
     }
-    return {
-      title: null,
-      text_content: mdContent.trim()
-    };
+    return { title: null, markdown: mdContent.trim(), text_content: mdContent.trim() };
   }
 }
@@ -950,10 +922,7 @@ class ImageConverter extends MediaConverter {
 ${(await this._getLLMDescription(imageBuffer, options)).trim()}
 `;
     }
-    return {
-      title: null,
-      text_content: mdContent.trim()
-    };
+    return { title: null, markdown: mdContent.trim(), text_content: mdContent.trim() };
   }
   async _getLLMDescription(imageBuffer, options) {
     if (!options.llmPrompt || options.llmPrompt.trim() === "") {
@@ -989,6 +958,7 @@ class ZipConverter {
     if (!parentConverters) {
       return {
         title: null,
+        markdown: `[ERROR] No converters available to process zip contents from: ${source}`,
         text_content: `[ERROR] No converters available to process zip contents from: ${source}`
       };
     }
@@ -1026,7 +996,7 @@ class ZipConverter {
               mdResults.push(`
 ## File: ${relativePath}
-${result.text_content}
+${result.markdown}
 `);
               break;
@@ -1050,19 +1020,18 @@ ${result.text_content}
         inputStream.pipe(parser);
       });
       mdContent += mdResults.join("");
-      return {
-        title: null,
-        text_content: mdContent.trim()
-      };
+      return { title: null, markdown: mdContent.trim(), text_content: mdContent.trim() };
     } catch (error) {
       if (error.message.includes("invalid signature")) {
         return {
           title: null,
+          markdown: `[ERROR] Invalid or corrupted zip file: ${source}`,
           text_content: `[ERROR] Invalid or corrupted zip file: ${source}`
         };
       }
       return {
         title: null,
+        markdown: `[ERROR] Failed to process zip file ${source}: ${String(error)}`,
         text_content: `[ERROR] Failed to process zip file ${source}: ${String(error)}`
       };
     }
@@ -1180,8 +1149,8 @@ class MarkItDown {
           error = e;
         }
         if (res != null) {
-          res.text_content = res.text_content.replace(/(?:\r\n|\r|\n)/g, "\n").trim();
-          res.text_content = res.text_content.replace(/\n{3,}/g, "\n\n");
+          res.markdown = res.markdown.replace(/(?:\r\n|\r|\n)/g, "\n").trim();
+          res.markdown = res.markdown.replace(/\n{3,}/g, "\n\n");
           return res;
         }
       }

package/dist/index.d.cts CHANGED Viewed

@@ -3,6 +3,8 @@ import mammoth from 'mammoth';
 type ConverterResult = {
     title: string | null;
+    markdown: string;
+    /** @deprecated Use `markdown` instead. */
     text_content: string;
 } | null | undefined;
 type ConverterOptions = {

package/dist/index.d.mts CHANGED Viewed

@@ -3,6 +3,8 @@ import mammoth from 'mammoth';
 type ConverterResult = {
     title: string | null;
+    markdown: string;
+    /** @deprecated Use `markdown` instead. */
     text_content: string;
 } | null | undefined;
 type ConverterOptions = {

package/dist/index.d.ts CHANGED Viewed

@@ -3,6 +3,8 @@ import mammoth from 'mammoth';
 type ConverterResult = {
     title: string | null;
+    markdown: string;
+    /** @deprecated Use `markdown` instead. */
     text_content: string;
 } | null | undefined;
 type ConverterOptions = {

package/dist/index.mjs CHANGED Viewed

@@ -35,6 +35,7 @@ class PlainTextConverter {
     }
     return {
       title: null,
+      markdown: content,
       text_content: content
     };
   }
@@ -148,6 +149,7 @@ class HtmlConverter {
     }
     return {
       title: doc.title,
+      markdown: webpageText,
       text_content: webpageText
     };
   }
@@ -211,10 +213,7 @@ class RSSConverter {
           mdText += this._parseContent(entryContent);
         }
       }
-      return {
-        title,
-        text_content: mdText
-      };
+      return { title, markdown: mdText, text_content: mdText };
     } catch (error) {
       console.error("Atom Parsing Error:", error);
       return null;
@@ -262,10 +261,7 @@ class RSSConverter {
           mdText += this._parseContent(content);
         }
       }
-      return {
-        title: channelTitle,
-        text_content: mdText
-      };
+      return { title: channelTitle, markdown: mdText, text_content: mdText };
     } catch (error) {
       console.error("RSS Parsing Error:", error);
       return null;
@@ -335,10 +331,7 @@ class WikipediaConverter {
     } else {
       webpageText = new CustomTurnDown().convert_soup(doc);
     }
-    return {
-      title: mainTitle,
-      text_content: webpageText
-    };
+    return { title: mainTitle, markdown: webpageText, text_content: webpageText };
   }
 }
@@ -464,10 +457,7 @@ ${transcriptText}
       }
     }
     const finalTitle = title ? title : doc.title;
-    return {
-      title: finalTitle,
-      text_content: webpageText
-    };
+    return { title: finalTitle, markdown: webpageText, text_content: webpageText };
   }
   _get(metadata, keys, default_value) {
     for (const k of keys) {
@@ -545,10 +535,7 @@ ${sourceLines.join("")}
       }
       const mdText = mdOutput.join("\n\n");
       title = notebookContent.metadata?.title || title;
-      return {
-        title,
-        text_content: mdText
-      };
+      return { title, markdown: mdText, text_content: mdText };
     } catch (e) {
       console.error("Error converting .ipynb file:", e);
       throw new Error(`Error converting .ipynb file: ${e}`);
@@ -609,10 +596,7 @@ class BingSerpConverter {
     const webpageText = `## A Bing search for '${query}' found the following results:
 ${results.join("\n\n")}`;
-    return {
-      title: doc.title,
-      text_content: webpageText
-    };
+    return { title: doc.title, markdown: webpageText, text_content: webpageText };
   }
   _decodeBase64Url(encodedUrl) {
     let u = encodedUrl.slice(2).trim() + "==";
@@ -643,10 +627,7 @@ class PdfConverter {
   async _convert(pdfContent) {
     try {
       const textContent = await pdfToText(pdfContent);
-      return {
-        title: null,
-        text_content: textContent
-      };
+      return { title: null, markdown: textContent, text_content: textContent };
     } catch (error) {
       console.error("PDF Parsing Error:", error);
       return null;
@@ -702,12 +683,9 @@ class XlsxConverter extends HtmlConverter {
         mdContent += `## ${sheetName}
 `;
         let htmlContent = XLSX.utils.sheet_to_html(workbook.Sheets[sheetName]);
-        mdContent += (await this._convert(htmlContent))?.text_content.trim() + "\n\n";
+        mdContent += (await this._convert(htmlContent))?.markdown.trim() + "\n\n";
       }
-      return {
-        title: workbook?.Props?.Title || "Untitled",
-        text_content: mdContent
-      };
+      return { title: workbook?.Props?.Title || "Untitled", markdown: mdContent, text_content: mdContent };
     } catch (e) {
       console.error(e);
       return null;
@@ -797,10 +775,7 @@ ${transcript === "" ? "[No speech detected]" : transcript}`;
     } else {
       mdContent += "\n\n### Audio Transcript:\n[Audio transcription is not supported for Buffer inputs in this version.]";
     }
-    return {
-      title: null,
-      text_content: mdContent.trim()
-    };
+    return { title: null, markdown: mdContent.trim(), text_content: mdContent.trim() };
   }
   // TODO: Add speech to text
   async _transcribeAudio(_) {
@@ -867,10 +842,7 @@ ${transcript == "" ? "[No speech detected]" : transcript}`;
     } else {
       mdContent += "\n\n### Audio Transcript:\n[Audio conversion and transcription are not supported for Buffer inputs.]";
     }
-    return {
-      title: null,
-      text_content: mdContent.trim()
-    };
+    return { title: null, markdown: mdContent.trim(), text_content: mdContent.trim() };
   }
 }
@@ -922,10 +894,7 @@ class ImageConverter extends MediaConverter {
 ${(await this._getLLMDescription(imageBuffer, options)).trim()}
 `;
     }
-    return {
-      title: null,
-      text_content: mdContent.trim()
-    };
+    return { title: null, markdown: mdContent.trim(), text_content: mdContent.trim() };
   }
   async _getLLMDescription(imageBuffer, options) {
     if (!options.llmPrompt || options.llmPrompt.trim() === "") {
@@ -961,6 +930,7 @@ class ZipConverter {
     if (!parentConverters) {
       return {
         title: null,
+        markdown: `[ERROR] No converters available to process zip contents from: ${source}`,
         text_content: `[ERROR] No converters available to process zip contents from: ${source}`
       };
     }
@@ -998,7 +968,7 @@ class ZipConverter {
               mdResults.push(`
 ## File: ${relativePath}
-${result.text_content}
+${result.markdown}
 `);
               break;
@@ -1022,19 +992,18 @@ ${result.text_content}
         inputStream.pipe(parser);
       });
       mdContent += mdResults.join("");
-      return {
-        title: null,
-        text_content: mdContent.trim()
-      };
+      return { title: null, markdown: mdContent.trim(), text_content: mdContent.trim() };
     } catch (error) {
       if (error.message.includes("invalid signature")) {
         return {
           title: null,
+          markdown: `[ERROR] Invalid or corrupted zip file: ${source}`,
           text_content: `[ERROR] Invalid or corrupted zip file: ${source}`
         };
       }
       return {
         title: null,
+        markdown: `[ERROR] Failed to process zip file ${source}: ${String(error)}`,
         text_content: `[ERROR] Failed to process zip file ${source}: ${String(error)}`
       };
     }
@@ -1152,8 +1121,8 @@ class MarkItDown {
           error = e;
         }
         if (res != null) {
-          res.text_content = res.text_content.replace(/(?:\r\n|\r|\n)/g, "\n").trim();
-          res.text_content = res.text_content.replace(/\n{3,}/g, "\n\n");
+          res.markdown = res.markdown.replace(/(?:\r\n|\r|\n)/g, "\n").trim();
+          res.markdown = res.markdown.replace(/\n{3,}/g, "\n\n");
           return res;
         }
       }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "markitdown-ts",
-  "version": "0.0.6",
+  "version": "0.0.7",
   "description": "",
   "keywords": [],
   "homepage": "https://github.com/dead8309/markitdown-ts#readme",
@@ -34,6 +34,7 @@
     "@types/node": "^22.10.2",
     "@types/turndown": "^5.0.5",
     "@types/unzipper": "^0.10.10",
+    "zod": "^4.1.8",
     "bumpp": "^9.9.1",
     "is-ci": "^4.1.0",
     "prettier": "^3.4.2",