@wsqc2026/markitdown-typescript 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +191 -0
- package/README.md +157 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +3589 -0
- package/dist/converter-utils/docx/latex-dict.d.ts +34 -0
- package/dist/converter-utils/docx/omml.d.ts +25 -0
- package/dist/converter-utils/docx/pre-process.d.ts +9 -0
- package/dist/converter.d.ts +23 -0
- package/dist/converters/audio.d.ts +1 -0
- package/dist/converters/bing-serp.d.ts +1 -0
- package/dist/converters/csv.d.ts +1 -0
- package/dist/converters/docx.d.ts +1 -0
- package/dist/converters/epub.d.ts +1 -0
- package/dist/converters/exiftool.d.ts +1 -0
- package/dist/converters/html.d.ts +1 -0
- package/dist/converters/image.d.ts +1 -0
- package/dist/converters/index.d.ts +17 -0
- package/dist/converters/ipynb.d.ts +1 -0
- package/dist/converters/outlook-msg.d.ts +1 -0
- package/dist/converters/pdf.d.ts +1 -0
- package/dist/converters/plain-text.d.ts +1 -0
- package/dist/converters/pptx.d.ts +1 -0
- package/dist/converters/rss.d.ts +1 -0
- package/dist/converters/wikipedia.d.ts +1 -0
- package/dist/converters/xlsx.d.ts +2 -0
- package/dist/converters/youtube.d.ts +1 -0
- package/dist/converters/zip.d.ts +8 -0
- package/dist/exceptions.d.ts +18 -0
- package/dist/exit-codes.d.ts +8 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.js +3184 -0
- package/dist/markitdown.d.ts +16 -0
- package/dist/stream-info.d.ts +14 -0
- package/dist/transforms/decode-text.d.ts +6 -0
- package/dist/transforms/html-to-markdown.d.ts +5 -0
- package/dist/types.d.ts +26 -0
- package/dist/uri-utils.d.ts +9 -0
- package/package.json +49 -0
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { Converter } from "./converter.js";
|
|
2
|
+
import type { ConvertOptions, ConvertResult, StreamInfo } from "./types.js";
|
|
3
|
+
export interface MarkItDownOptions extends ConvertOptions {
|
|
4
|
+
}
|
|
5
|
+
export interface ConvertInput {
|
|
6
|
+
streamInfo?: StreamInfo;
|
|
7
|
+
}
|
|
8
|
+
export declare function createMarkItDown(options?: MarkItDownOptions): {
|
|
9
|
+
convert: (source: string | Buffer, input?: ConvertInput & {
|
|
10
|
+
_zipDepth?: number;
|
|
11
|
+
}) => Promise<ConvertResult>;
|
|
12
|
+
convertLocal: (filePath: string, streamInfo?: StreamInfo) => Promise<ConvertResult>;
|
|
13
|
+
convertUri: (uri: string, streamInfo?: StreamInfo) => Promise<ConvertResult>;
|
|
14
|
+
convertUrl: (url: string, streamInfo?: StreamInfo) => Promise<ConvertResult>;
|
|
15
|
+
registerConverter: (conv: Converter, priority?: number) => void;
|
|
16
|
+
};
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { StreamInfo } from "./types.js";
|
|
2
|
+
/**
|
|
3
|
+
* Merge multiple StreamInfo objects. Later values override earlier ones,
|
|
4
|
+
* but only for non-undefined fields.
|
|
5
|
+
*/
|
|
6
|
+
export declare function mergeStreamInfo(base: StreamInfo, ...overrides: (StreamInfo | undefined)[]): StreamInfo;
|
|
7
|
+
/**
|
|
8
|
+
* Guess mimetype from extension using a built-in lookup table.
|
|
9
|
+
*/
|
|
10
|
+
export declare function guessMimeFromExtension(ext: string): string | undefined;
|
|
11
|
+
/**
|
|
12
|
+
* Guess extension from mimetype.
|
|
13
|
+
*/
|
|
14
|
+
export declare function guessExtensionFromMime(mime: string): string | undefined;
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import { z } from "zod/v4";
|
|
2
|
+
export declare const StreamInfoSchema: z.ZodObject<{
|
|
3
|
+
mimetype: z.ZodOptional<z.ZodString>;
|
|
4
|
+
extension: z.ZodOptional<z.ZodString>;
|
|
5
|
+
charset: z.ZodOptional<z.ZodString>;
|
|
6
|
+
filename: z.ZodOptional<z.ZodString>;
|
|
7
|
+
localPath: z.ZodOptional<z.ZodString>;
|
|
8
|
+
url: z.ZodOptional<z.ZodString>;
|
|
9
|
+
}, z.core.$strip>;
|
|
10
|
+
export type StreamInfo = z.infer<typeof StreamInfoSchema>;
|
|
11
|
+
export declare const ConvertResultSchema: z.ZodObject<{
|
|
12
|
+
markdown: z.ZodString;
|
|
13
|
+
title: z.ZodOptional<z.ZodString>;
|
|
14
|
+
}, z.core.$strip>;
|
|
15
|
+
export type ConvertResult = z.infer<typeof ConvertResultSchema>;
|
|
16
|
+
export declare const ConvertOptionsSchema: z.ZodObject<{
|
|
17
|
+
llmClient: z.ZodOptional<z.ZodAny>;
|
|
18
|
+
llmModel: z.ZodOptional<z.ZodString>;
|
|
19
|
+
llmPrompt: z.ZodOptional<z.ZodString>;
|
|
20
|
+
exiftoolPath: z.ZodOptional<z.ZodString>;
|
|
21
|
+
styleMap: z.ZodOptional<z.ZodString>;
|
|
22
|
+
keepDataUris: z.ZodOptional<z.ZodBoolean>;
|
|
23
|
+
youtubeTranscriptLanguages: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
24
|
+
_zipDepth: z.ZodOptional<z.ZodNumber>;
|
|
25
|
+
}, z.core.$strip>;
|
|
26
|
+
export type ConvertOptions = z.infer<typeof ConvertOptionsSchema>;
|
package/package.json
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@wsqc2026/markitdown-typescript",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "A TypeScript port of Microsoft's markitdown Python library — converts documents to Markdown",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
8
|
+
"bin": {
|
|
9
|
+
"markitdown": "dist/cli.js"
|
|
10
|
+
},
|
|
11
|
+
"files": [
|
|
12
|
+
"dist",
|
|
13
|
+
"LICENSE"
|
|
14
|
+
],
|
|
15
|
+
"scripts": {
|
|
16
|
+
"build": "bun build src/index.ts src/cli.ts --outdir dist --target node --packages external && tsc --emitDeclarationOnly --outDir dist",
|
|
17
|
+
"compile": "bun build src/cli.ts --compile --outfile markitdown",
|
|
18
|
+
"test": "bun test",
|
|
19
|
+
"test:coverage": "bun test --coverage --coverage-reporter=lcov --coverage-reporter=text",
|
|
20
|
+
"lint": "biome check src/ tests/",
|
|
21
|
+
"lint:fix": "biome check --write src/ tests/",
|
|
22
|
+
"typecheck": "tsc --noEmit",
|
|
23
|
+
"publish:dry": "./scripts/publish.sh --dry-run",
|
|
24
|
+
"publish:npm": "./scripts/publish.sh"
|
|
25
|
+
},
|
|
26
|
+
"dependencies": {
|
|
27
|
+
"cfb": "^1.2.2",
|
|
28
|
+
"cheerio": "^1.2.0",
|
|
29
|
+
"fast-xml-parser": "^5.4.2",
|
|
30
|
+
"file-type": "^19.6.0",
|
|
31
|
+
"iconv-lite": "^0.6.3",
|
|
32
|
+
"jszip": "^3.10.1",
|
|
33
|
+
"mammoth": "^1.11.0",
|
|
34
|
+
"pdf-parse": "^2.4.5",
|
|
35
|
+
"pdfplumber-wasm": "^0.2.0",
|
|
36
|
+
"turndown": "^7.2.2",
|
|
37
|
+
"turndown-plugin-gfm": "^1.0.2",
|
|
38
|
+
"xlsx": "^0.18.5",
|
|
39
|
+
"youtube-transcript": "^1.2.1",
|
|
40
|
+
"zod": "^4.0.0-beta.20250505"
|
|
41
|
+
},
|
|
42
|
+
"devDependencies": {
|
|
43
|
+
"@biomejs/biome": "^2.4.10",
|
|
44
|
+
"@types/bun": "latest",
|
|
45
|
+
"@types/turndown": "^5.0.6",
|
|
46
|
+
"typescript": "^5.7.0"
|
|
47
|
+
},
|
|
48
|
+
"license": "Apache-2.0"
|
|
49
|
+
}
|