@wsqc2026/markitdown-typescript 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/LICENSE +191 -0
  2. package/README.md +157 -0
  3. package/dist/cli.d.ts +2 -0
  4. package/dist/cli.js +3589 -0
  5. package/dist/converter-utils/docx/latex-dict.d.ts +34 -0
  6. package/dist/converter-utils/docx/omml.d.ts +25 -0
  7. package/dist/converter-utils/docx/pre-process.d.ts +9 -0
  8. package/dist/converter.d.ts +23 -0
  9. package/dist/converters/audio.d.ts +1 -0
  10. package/dist/converters/bing-serp.d.ts +1 -0
  11. package/dist/converters/csv.d.ts +1 -0
  12. package/dist/converters/docx.d.ts +1 -0
  13. package/dist/converters/epub.d.ts +1 -0
  14. package/dist/converters/exiftool.d.ts +1 -0
  15. package/dist/converters/html.d.ts +1 -0
  16. package/dist/converters/image.d.ts +1 -0
  17. package/dist/converters/index.d.ts +17 -0
  18. package/dist/converters/ipynb.d.ts +1 -0
  19. package/dist/converters/outlook-msg.d.ts +1 -0
  20. package/dist/converters/pdf.d.ts +1 -0
  21. package/dist/converters/plain-text.d.ts +1 -0
  22. package/dist/converters/pptx.d.ts +1 -0
  23. package/dist/converters/rss.d.ts +1 -0
  24. package/dist/converters/wikipedia.d.ts +1 -0
  25. package/dist/converters/xlsx.d.ts +2 -0
  26. package/dist/converters/youtube.d.ts +1 -0
  27. package/dist/converters/zip.d.ts +8 -0
  28. package/dist/exceptions.d.ts +18 -0
  29. package/dist/exit-codes.d.ts +8 -0
  30. package/dist/index.d.ts +27 -0
  31. package/dist/index.js +3184 -0
  32. package/dist/markitdown.d.ts +16 -0
  33. package/dist/stream-info.d.ts +14 -0
  34. package/dist/transforms/decode-text.d.ts +6 -0
  35. package/dist/transforms/html-to-markdown.d.ts +5 -0
  36. package/dist/types.d.ts +26 -0
  37. package/dist/uri-utils.d.ts +9 -0
  38. package/package.json +49 -0
@@ -0,0 +1,16 @@
1
+ import type { Converter } from "./converter.js";
2
+ import type { ConvertOptions, ConvertResult, StreamInfo } from "./types.js";
3
+ export interface MarkItDownOptions extends ConvertOptions {
4
+ }
5
+ export interface ConvertInput {
6
+ streamInfo?: StreamInfo;
7
+ }
8
+ export declare function createMarkItDown(options?: MarkItDownOptions): {
9
+ convert: (source: string | Buffer, input?: ConvertInput & {
10
+ _zipDepth?: number;
11
+ }) => Promise<ConvertResult>;
12
+ convertLocal: (filePath: string, streamInfo?: StreamInfo) => Promise<ConvertResult>;
13
+ convertUri: (uri: string, streamInfo?: StreamInfo) => Promise<ConvertResult>;
14
+ convertUrl: (url: string, streamInfo?: StreamInfo) => Promise<ConvertResult>;
15
+ registerConverter: (conv: Converter, priority?: number) => void;
16
+ };
@@ -0,0 +1,14 @@
1
+ import type { StreamInfo } from "./types.js";
2
+ /**
3
+ * Merge multiple StreamInfo objects. Later values override earlier ones,
4
+ * but only for non-undefined fields.
5
+ */
6
+ export declare function mergeStreamInfo(base: StreamInfo, ...overrides: (StreamInfo | undefined)[]): StreamInfo;
7
+ /**
8
+ * Guess mimetype from extension using a built-in lookup table.
9
+ */
10
+ export declare function guessMimeFromExtension(ext: string): string | undefined;
11
+ /**
12
+ * Guess extension from mimetype.
13
+ */
14
+ export declare function guessExtensionFromMime(mime: string): string | undefined;
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Decode a buffer to string using the given charset.
3
+ * Falls back to utf-8 if no charset is specified.
4
+ * Uses iconv-lite for non-standard encodings.
5
+ */
6
+ export declare function decodeBuffer(buffer: Buffer, charset?: string): string;
@@ -0,0 +1,5 @@
1
+ import type { ConvertOptions } from "../types.js";
2
+ export declare function htmlToMarkdown(html: string, opts?: ConvertOptions): {
3
+ markdown: string;
4
+ title?: string;
5
+ };
@@ -0,0 +1,26 @@
1
+ import { z } from "zod/v4";
2
+ export declare const StreamInfoSchema: z.ZodObject<{
3
+ mimetype: z.ZodOptional<z.ZodString>;
4
+ extension: z.ZodOptional<z.ZodString>;
5
+ charset: z.ZodOptional<z.ZodString>;
6
+ filename: z.ZodOptional<z.ZodString>;
7
+ localPath: z.ZodOptional<z.ZodString>;
8
+ url: z.ZodOptional<z.ZodString>;
9
+ }, z.core.$strip>;
10
+ export type StreamInfo = z.infer<typeof StreamInfoSchema>;
11
+ export declare const ConvertResultSchema: z.ZodObject<{
12
+ markdown: z.ZodString;
13
+ title: z.ZodOptional<z.ZodString>;
14
+ }, z.core.$strip>;
15
+ export type ConvertResult = z.infer<typeof ConvertResultSchema>;
16
+ export declare const ConvertOptionsSchema: z.ZodObject<{
17
+ llmClient: z.ZodOptional<z.ZodAny>;
18
+ llmModel: z.ZodOptional<z.ZodString>;
19
+ llmPrompt: z.ZodOptional<z.ZodString>;
20
+ exiftoolPath: z.ZodOptional<z.ZodString>;
21
+ styleMap: z.ZodOptional<z.ZodString>;
22
+ keepDataUris: z.ZodOptional<z.ZodBoolean>;
23
+ youtubeTranscriptLanguages: z.ZodOptional<z.ZodArray<z.ZodString>>;
24
+ _zipDepth: z.ZodOptional<z.ZodNumber>;
25
+ }, z.core.$strip>;
26
+ export type ConvertOptions = z.infer<typeof ConvertOptionsSchema>;
@@ -0,0 +1,9 @@
1
+ export declare function fileUriToPath(fileUri: string): {
2
+ netloc: string | null;
3
+ path: string;
4
+ };
5
+ export declare function parseDataUri(uri: string): {
6
+ mimeType: string | null;
7
+ attributes: Record<string, string>;
8
+ data: Buffer;
9
+ };
package/package.json ADDED
@@ -0,0 +1,49 @@
1
+ {
2
+ "name": "@wsqc2026/markitdown-typescript",
3
+ "version": "0.1.0",
4
+ "description": "A TypeScript port of Microsoft's markitdown Python library — converts documents to Markdown",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "bin": {
9
+ "markitdown": "dist/cli.js"
10
+ },
11
+ "files": [
12
+ "dist",
13
+ "LICENSE"
14
+ ],
15
+ "scripts": {
16
+ "build": "bun build src/index.ts src/cli.ts --outdir dist --target node --packages external && tsc --emitDeclarationOnly --outDir dist",
17
+ "compile": "bun build src/cli.ts --compile --outfile markitdown",
18
+ "test": "bun test",
19
+ "test:coverage": "bun test --coverage --coverage-reporter=lcov --coverage-reporter=text",
20
+ "lint": "biome check src/ tests/",
21
+ "lint:fix": "biome check --write src/ tests/",
22
+ "typecheck": "tsc --noEmit",
23
+ "publish:dry": "./scripts/publish.sh --dry-run",
24
+ "publish:npm": "./scripts/publish.sh"
25
+ },
26
+ "dependencies": {
27
+ "cfb": "^1.2.2",
28
+ "cheerio": "^1.2.0",
29
+ "fast-xml-parser": "^5.4.2",
30
+ "file-type": "^19.6.0",
31
+ "iconv-lite": "^0.6.3",
32
+ "jszip": "^3.10.1",
33
+ "mammoth": "^1.11.0",
34
+ "pdf-parse": "^2.4.5",
35
+ "pdfplumber-wasm": "^0.2.0",
36
+ "turndown": "^7.2.2",
37
+ "turndown-plugin-gfm": "^1.0.2",
38
+ "xlsx": "^0.18.5",
39
+ "youtube-transcript": "^1.2.1",
40
+ "zod": "^4.0.0-beta.20250505"
41
+ },
42
+ "devDependencies": {
43
+ "@biomejs/biome": "^2.4.10",
44
+ "@types/bun": "latest",
45
+ "@types/turndown": "^5.0.6",
46
+ "typescript": "^5.7.0"
47
+ },
48
+ "license": "Apache-2.0"
49
+ }