markitdown-ts 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +5 -3
- package/dist/index.mjs +5 -3
- package/package.json +4 -4
package/dist/index.cjs
CHANGED
|
@@ -8,7 +8,7 @@ const TurndownService = require('turndown');
|
|
|
8
8
|
const turndownPluginGfm = require('@joplin/turndown-plugin-gfm');
|
|
9
9
|
const xmldom = require('@xmldom/xmldom');
|
|
10
10
|
const url = require('url');
|
|
11
|
-
const
|
|
11
|
+
const pdfParse = require('pdf-parse');
|
|
12
12
|
const Mammoth = require('mammoth');
|
|
13
13
|
const XLSX = require('xlsx');
|
|
14
14
|
const childProcess = require('child_process');
|
|
@@ -654,8 +654,10 @@ class PdfConverter {
|
|
|
654
654
|
}
|
|
655
655
|
async _convert(pdfContent) {
|
|
656
656
|
try {
|
|
657
|
-
const
|
|
658
|
-
|
|
657
|
+
const parser = new pdfParse.PDFParse({ data: pdfContent });
|
|
658
|
+
const result = await parser.getText();
|
|
659
|
+
await parser.destroy();
|
|
660
|
+
return { title: null, markdown: result.text, text_content: result.text };
|
|
659
661
|
} catch (error) {
|
|
660
662
|
console.error("PDF Parsing Error:", error);
|
|
661
663
|
return null;
|
package/dist/index.mjs
CHANGED
|
@@ -8,7 +8,7 @@ import TurndownService from 'turndown';
|
|
|
8
8
|
import turndownPluginGfm from '@joplin/turndown-plugin-gfm';
|
|
9
9
|
import { DOMParser } from '@xmldom/xmldom';
|
|
10
10
|
import { URL as URL$1 } from 'url';
|
|
11
|
-
import {
|
|
11
|
+
import { PDFParse } from 'pdf-parse';
|
|
12
12
|
import Mammoth from 'mammoth';
|
|
13
13
|
import * as XLSX from 'xlsx';
|
|
14
14
|
import * as childProcess from 'child_process';
|
|
@@ -626,8 +626,10 @@ class PdfConverter {
|
|
|
626
626
|
}
|
|
627
627
|
async _convert(pdfContent) {
|
|
628
628
|
try {
|
|
629
|
-
const
|
|
630
|
-
|
|
629
|
+
const parser = new PDFParse({ data: pdfContent });
|
|
630
|
+
const result = await parser.getText();
|
|
631
|
+
await parser.destroy();
|
|
632
|
+
return { title: null, markdown: result.text, text_content: result.text };
|
|
631
633
|
} catch (error) {
|
|
632
634
|
console.error("PDF Parsing Error:", error);
|
|
633
635
|
return null;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "markitdown-ts",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.8",
|
|
4
4
|
"description": "",
|
|
5
5
|
"keywords": [],
|
|
6
6
|
"homepage": "https://github.com/dead8309/markitdown-ts#readme",
|
|
@@ -34,14 +34,14 @@
|
|
|
34
34
|
"@types/node": "^22.10.2",
|
|
35
35
|
"@types/turndown": "^5.0.5",
|
|
36
36
|
"@types/unzipper": "^0.10.10",
|
|
37
|
-
"zod": "^4.1.8",
|
|
38
37
|
"bumpp": "^9.9.1",
|
|
39
38
|
"is-ci": "^4.1.0",
|
|
40
39
|
"prettier": "^3.4.2",
|
|
41
40
|
"typescript": "^5.7.2",
|
|
42
41
|
"unbuild": "^3.0.1",
|
|
43
42
|
"vite": "^6.0.4",
|
|
44
|
-
"vitest": "^2.1.8"
|
|
43
|
+
"vitest": "^2.1.8",
|
|
44
|
+
"zod": "^4.1.8"
|
|
45
45
|
},
|
|
46
46
|
"dependencies": {
|
|
47
47
|
"@joplin/turndown-plugin-gfm": "^1.0.60",
|
|
@@ -50,7 +50,7 @@
|
|
|
50
50
|
"jsdom": "^25.0.1",
|
|
51
51
|
"mammoth": "^1.8.0",
|
|
52
52
|
"mime-types": "^2.1.35",
|
|
53
|
-
"pdf-
|
|
53
|
+
"pdf-parse": "^2.4.5",
|
|
54
54
|
"turndown": "^7.2.0",
|
|
55
55
|
"xlsx": "^0.18.5"
|
|
56
56
|
},
|