markitdown-ts 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -8,7 +8,7 @@ const TurndownService = require('turndown');
8
8
  const turndownPluginGfm = require('@joplin/turndown-plugin-gfm');
9
9
  const xmldom = require('@xmldom/xmldom');
10
10
  const url = require('url');
11
- const pdfTs = require('pdf-ts');
11
+ const pdfParse = require('pdf-parse');
12
12
  const Mammoth = require('mammoth');
13
13
  const XLSX = require('xlsx');
14
14
  const childProcess = require('child_process');
@@ -654,8 +654,10 @@ class PdfConverter {
654
654
  }
655
655
  async _convert(pdfContent) {
656
656
  try {
657
- const textContent = await pdfTs.pdfToText(pdfContent);
658
- return { title: null, markdown: textContent, text_content: textContent };
657
+ const parser = new pdfParse.PDFParse({ data: pdfContent });
658
+ const result = await parser.getText();
659
+ await parser.destroy();
660
+ return { title: null, markdown: result.text, text_content: result.text };
659
661
  } catch (error) {
660
662
  console.error("PDF Parsing Error:", error);
661
663
  return null;
package/dist/index.mjs CHANGED
@@ -8,7 +8,7 @@ import TurndownService from 'turndown';
8
8
  import turndownPluginGfm from '@joplin/turndown-plugin-gfm';
9
9
  import { DOMParser } from '@xmldom/xmldom';
10
10
  import { URL as URL$1 } from 'url';
11
- import { pdfToText } from 'pdf-ts';
11
+ import { PDFParse } from 'pdf-parse';
12
12
  import Mammoth from 'mammoth';
13
13
  import * as XLSX from 'xlsx';
14
14
  import * as childProcess from 'child_process';
@@ -626,8 +626,10 @@ class PdfConverter {
626
626
  }
627
627
  async _convert(pdfContent) {
628
628
  try {
629
- const textContent = await pdfToText(pdfContent);
630
- return { title: null, markdown: textContent, text_content: textContent };
629
+ const parser = new PDFParse({ data: pdfContent });
630
+ const result = await parser.getText();
631
+ await parser.destroy();
632
+ return { title: null, markdown: result.text, text_content: result.text };
631
633
  } catch (error) {
632
634
  console.error("PDF Parsing Error:", error);
633
635
  return null;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "markitdown-ts",
3
- "version": "0.0.7",
3
+ "version": "0.0.8",
4
4
  "description": "",
5
5
  "keywords": [],
6
6
  "homepage": "https://github.com/dead8309/markitdown-ts#readme",
@@ -34,14 +34,14 @@
34
34
  "@types/node": "^22.10.2",
35
35
  "@types/turndown": "^5.0.5",
36
36
  "@types/unzipper": "^0.10.10",
37
- "zod": "^4.1.8",
38
37
  "bumpp": "^9.9.1",
39
38
  "is-ci": "^4.1.0",
40
39
  "prettier": "^3.4.2",
41
40
  "typescript": "^5.7.2",
42
41
  "unbuild": "^3.0.1",
43
42
  "vite": "^6.0.4",
44
- "vitest": "^2.1.8"
43
+ "vitest": "^2.1.8",
44
+ "zod": "^4.1.8"
45
45
  },
46
46
  "dependencies": {
47
47
  "@joplin/turndown-plugin-gfm": "^1.0.60",
@@ -50,7 +50,7 @@
50
50
  "jsdom": "^25.0.1",
51
51
  "mammoth": "^1.8.0",
52
52
  "mime-types": "^2.1.35",
53
- "pdf-ts": "^0.0.2",
53
+ "pdf-parse": "^2.4.5",
54
54
  "turndown": "^7.2.0",
55
55
  "xlsx": "^0.18.5"
56
56
  },