@wsqc2026/markitdown-typescript 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +191 -0
- package/README.md +157 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +3589 -0
- package/dist/converter-utils/docx/latex-dict.d.ts +34 -0
- package/dist/converter-utils/docx/omml.d.ts +25 -0
- package/dist/converter-utils/docx/pre-process.d.ts +9 -0
- package/dist/converter.d.ts +23 -0
- package/dist/converters/audio.d.ts +1 -0
- package/dist/converters/bing-serp.d.ts +1 -0
- package/dist/converters/csv.d.ts +1 -0
- package/dist/converters/docx.d.ts +1 -0
- package/dist/converters/epub.d.ts +1 -0
- package/dist/converters/exiftool.d.ts +1 -0
- package/dist/converters/html.d.ts +1 -0
- package/dist/converters/image.d.ts +1 -0
- package/dist/converters/index.d.ts +17 -0
- package/dist/converters/ipynb.d.ts +1 -0
- package/dist/converters/outlook-msg.d.ts +1 -0
- package/dist/converters/pdf.d.ts +1 -0
- package/dist/converters/plain-text.d.ts +1 -0
- package/dist/converters/pptx.d.ts +1 -0
- package/dist/converters/rss.d.ts +1 -0
- package/dist/converters/wikipedia.d.ts +1 -0
- package/dist/converters/xlsx.d.ts +2 -0
- package/dist/converters/youtube.d.ts +1 -0
- package/dist/converters/zip.d.ts +8 -0
- package/dist/exceptions.d.ts +18 -0
- package/dist/exit-codes.d.ts +8 -0
- package/dist/index.d.ts +27 -0
- package/dist/index.js +3184 -0
- package/dist/markitdown.d.ts +16 -0
- package/dist/stream-info.d.ts +14 -0
- package/dist/transforms/decode-text.d.ts +6 -0
- package/dist/transforms/html-to-markdown.d.ts +5 -0
- package/dist/types.d.ts +26 -0
- package/dist/uri-utils.d.ts +9 -0
- package/package.json +49 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LaTeX dictionary for OMML to LaTeX conversion.
|
|
3
|
+
* Ported from Python markitdown latex_dict.py
|
|
4
|
+
*/
|
|
5
|
+
export declare const CHARS: Set<string>;
|
|
6
|
+
export declare const BLANK = "";
|
|
7
|
+
export declare const BACKSLASH = "\\";
|
|
8
|
+
export declare const ALN = "&";
|
|
9
|
+
export declare const BRK = "\\\\";
|
|
10
|
+
export declare const FUNC_PLACE = "{fe}";
|
|
11
|
+
/** Unicode → LaTeX accent mapping */
|
|
12
|
+
export declare const CHR: Record<string, string>;
|
|
13
|
+
/** Big operators */
|
|
14
|
+
export declare const CHR_BO: Record<string, string>;
|
|
15
|
+
/** Text symbol mapping (Unicode → LaTeX) */
|
|
16
|
+
export declare const T: Record<string, string>;
|
|
17
|
+
/** Function name templates */
|
|
18
|
+
export declare const FUNC: Record<string, string>;
|
|
19
|
+
export declare const CHR_DEFAULT: Record<string, string>;
|
|
20
|
+
export declare const POS: Record<string, string>;
|
|
21
|
+
export declare const POS_DEFAULT: Record<string, string>;
|
|
22
|
+
export declare const SUB = "_{{{0}}}";
|
|
23
|
+
export declare const SUP = "^{{{0}}}";
|
|
24
|
+
export declare const F: Record<string, string>;
|
|
25
|
+
export declare const F_DEFAULT = "\\frac{{{num}}}{{{den}}}";
|
|
26
|
+
export declare const D = "\\left{left}{text}\\right{right}";
|
|
27
|
+
export declare const D_DEFAULT: Record<string, string>;
|
|
28
|
+
export declare const RAD = "\\sqrt[{deg}]{{{text}}}";
|
|
29
|
+
export declare const RAD_DEFAULT = "\\sqrt{{{text}}}";
|
|
30
|
+
export declare const ARR = "\\begin{{array}}{{c}}{text}\\end{{array}}";
|
|
31
|
+
export declare const LIM_FUNC: Record<string, string>;
|
|
32
|
+
export declare const LIM_TO: [string, string];
|
|
33
|
+
export declare const LIM_UPP = "\\overset{{{lim}}}{{{text}}}";
|
|
34
|
+
export declare const M = "\\begin{{matrix}}{text}\\end{{matrix}}";
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Office Math Markup Language (OMML) to LaTeX converter.
|
|
3
|
+
* Ported from Python markitdown omml.py (adapted from dwml)
|
|
4
|
+
*/
|
|
5
|
+
export declare const OMML_NS = "http://schemas.openxmlformats.org/officeDocument/2006/math";
|
|
6
|
+
interface XmlNode {
|
|
7
|
+
tag: string;
|
|
8
|
+
attrib: Record<string, string>;
|
|
9
|
+
text: string | null;
|
|
10
|
+
children: XmlNode[];
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Parse XML string into XmlNode tree with ElementTree-style {namespace}tag format.
|
|
14
|
+
* This is a minimal parser specifically for OMML math XML.
|
|
15
|
+
*/
|
|
16
|
+
declare function parseOmmlXml(xmlStr: string): XmlNode;
|
|
17
|
+
/** Find child elements matching a tag */
|
|
18
|
+
declare function findAll(node: XmlNode, tag: string): XmlNode[];
|
|
19
|
+
/** Find first child element matching tag, returns null if not found */
|
|
20
|
+
declare function find(node: XmlNode, tag: string): XmlNode | null;
|
|
21
|
+
/** Convert an oMath XmlNode element to LaTeX string */
|
|
22
|
+
export declare function oMathToLatex(element: XmlNode): string;
|
|
23
|
+
/** Parse XML string, find all oMath elements, yield LaTeX for each */
|
|
24
|
+
export declare function loadString(xmlString: string): Generator<string>;
|
|
25
|
+
export { find, findAll, parseOmmlXml, type XmlNode };
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DOCX pre-processing: converts OMML math equations to LaTeX before mammoth.
|
|
3
|
+
* Ported from Python markitdown pre_process.py
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Pre-process a DOCX buffer to convert OMML math equations to LaTeX.
|
|
7
|
+
* Returns a new buffer with equations replaced.
|
|
8
|
+
*/
|
|
9
|
+
export declare function preProcessDocx(inputBuffer: Buffer): Promise<Buffer>;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { ConvertOptions, ConvertResult, StreamInfo } from "./types.js";
|
|
2
|
+
export type ConverterContext = {
|
|
3
|
+
buffer: Buffer;
|
|
4
|
+
info: StreamInfo;
|
|
5
|
+
opts: ConvertOptions;
|
|
6
|
+
};
|
|
7
|
+
export type Matcher = (ctx: ConverterContext) => boolean;
|
|
8
|
+
export type TransformStep = (ctx: ConverterContext) => Promise<ConvertResult>;
|
|
9
|
+
export type Converter = {
|
|
10
|
+
name: string;
|
|
11
|
+
match: Matcher;
|
|
12
|
+
convert: TransformStep;
|
|
13
|
+
};
|
|
14
|
+
export declare function byMime(...mimes: string[]): Matcher;
|
|
15
|
+
export declare function byExt(...exts: string[]): Matcher;
|
|
16
|
+
export declare function byUrl(pattern: RegExp): Matcher;
|
|
17
|
+
export declare function anyOf(...matchers: Matcher[]): Matcher;
|
|
18
|
+
export declare function allOf(...matchers: Matcher[]): Matcher;
|
|
19
|
+
export declare function hasCharset(): Matcher;
|
|
20
|
+
/**
|
|
21
|
+
* Create a converter from a name, matcher, and a conversion function.
|
|
22
|
+
*/
|
|
23
|
+
export declare function converter(name: string, match: Matcher, convert: TransformStep): Converter;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const audioConverter: import("../converter.js").Converter;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const bingSerpConverter: import("../converter.js").Converter;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const csvConverter: import("../converter.js").Converter;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const docxConverter: import("../converter.js").Converter;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const epubConverter: import("../converter.js").Converter;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function exiftoolMetadata(buffer: Buffer, exiftoolPath?: string): Promise<Record<string, string>>;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const htmlConverter: import("../converter.js").Converter;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const imageConverter: import("../converter.js").Converter;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export { audioConverter } from "./audio.js";
|
|
2
|
+
export { bingSerpConverter } from "./bing-serp.js";
|
|
3
|
+
export { csvConverter } from "./csv.js";
|
|
4
|
+
export { docxConverter } from "./docx.js";
|
|
5
|
+
export { epubConverter } from "./epub.js";
|
|
6
|
+
export { htmlConverter } from "./html.js";
|
|
7
|
+
export { imageConverter } from "./image.js";
|
|
8
|
+
export { ipynbConverter } from "./ipynb.js";
|
|
9
|
+
export { outlookMsgConverter } from "./outlook-msg.js";
|
|
10
|
+
export { pdfConverter } from "./pdf.js";
|
|
11
|
+
export { plainTextConverter } from "./plain-text.js";
|
|
12
|
+
export { pptxConverter } from "./pptx.js";
|
|
13
|
+
export { rssConverter } from "./rss.js";
|
|
14
|
+
export { wikipediaConverter } from "./wikipedia.js";
|
|
15
|
+
export { xlsConverter, xlsxConverter } from "./xlsx.js";
|
|
16
|
+
export { youtubeConverter } from "./youtube.js";
|
|
17
|
+
export { createZipConverter } from "./zip.js";
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const ipynbConverter: import("../converter.js").Converter;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const outlookMsgConverter: import("../converter.js").Converter;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const pdfConverter: import("../converter.js").Converter;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const plainTextConverter: import("../converter.js").Converter;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const pptxConverter: import("../converter.js").Converter;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const rssConverter: import("../converter.js").Converter;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const wikipediaConverter: import("../converter.js").Converter;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const youtubeConverter: import("../converter.js").Converter;
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import type { Converter } from "../converter.js";
|
|
2
|
+
import type { ConvertResult, StreamInfo } from "../types.js";
|
|
3
|
+
type ConvertBufferFn = (buffer: Buffer, input?: {
|
|
4
|
+
streamInfo?: StreamInfo;
|
|
5
|
+
_zipDepth?: number;
|
|
6
|
+
}) => Promise<ConvertResult>;
|
|
7
|
+
export declare function createZipConverter(convertFn: ConvertBufferFn): Converter;
|
|
8
|
+
export {};
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
export declare class MarkItDownError extends Error {
|
|
2
|
+
constructor(message?: string);
|
|
3
|
+
}
|
|
4
|
+
export declare class MissingDependencyError extends MarkItDownError {
|
|
5
|
+
constructor(message?: string);
|
|
6
|
+
}
|
|
7
|
+
export declare class UnsupportedFormatError extends MarkItDownError {
|
|
8
|
+
constructor(message?: string);
|
|
9
|
+
}
|
|
10
|
+
export interface FailedConversionAttempt {
|
|
11
|
+
converterName: string;
|
|
12
|
+
error?: Error;
|
|
13
|
+
}
|
|
14
|
+
export declare function getSuggestion(error: unknown): string | undefined;
|
|
15
|
+
export declare class FileConversionError extends MarkItDownError {
|
|
16
|
+
attempts?: FailedConversionAttempt[];
|
|
17
|
+
constructor(message?: string, attempts?: FailedConversionAttempt[]);
|
|
18
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export declare const EXIT_SUCCESS = 0;
|
|
2
|
+
export declare const EXIT_CONVERSION_FAILURE = 1;
|
|
3
|
+
export declare const EXIT_BAD_ARGUMENTS = 2;
|
|
4
|
+
export declare const EXIT_FILE_NOT_FOUND = 3;
|
|
5
|
+
export declare const EXIT_PERMISSION_DENIED = 4;
|
|
6
|
+
export declare const EXIT_UNSUPPORTED_FORMAT = 5;
|
|
7
|
+
export declare const EXIT_CODE_DESCRIPTIONS: Record<number, string>;
|
|
8
|
+
export declare function getExitCode(error: unknown): number;
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
export type { Converter, ConverterContext, Matcher, TransformStep } from "./converter.js";
|
|
2
|
+
export { allOf, anyOf, byExt, byMime, byUrl, converter, hasCharset } from "./converter.js";
|
|
3
|
+
export { audioConverter } from "./converters/audio.js";
|
|
4
|
+
export { bingSerpConverter } from "./converters/bing-serp.js";
|
|
5
|
+
export { csvConverter } from "./converters/csv.js";
|
|
6
|
+
export { docxConverter } from "./converters/docx.js";
|
|
7
|
+
export { epubConverter } from "./converters/epub.js";
|
|
8
|
+
export { htmlConverter } from "./converters/html.js";
|
|
9
|
+
export { imageConverter } from "./converters/image.js";
|
|
10
|
+
export { ipynbConverter } from "./converters/ipynb.js";
|
|
11
|
+
export { outlookMsgConverter } from "./converters/outlook-msg.js";
|
|
12
|
+
export { pdfConverter } from "./converters/pdf.js";
|
|
13
|
+
export { plainTextConverter } from "./converters/plain-text.js";
|
|
14
|
+
export { pptxConverter } from "./converters/pptx.js";
|
|
15
|
+
export { rssConverter } from "./converters/rss.js";
|
|
16
|
+
export { wikipediaConverter } from "./converters/wikipedia.js";
|
|
17
|
+
export { xlsConverter, xlsxConverter } from "./converters/xlsx.js";
|
|
18
|
+
export { youtubeConverter } from "./converters/youtube.js";
|
|
19
|
+
export { createZipConverter } from "./converters/zip.js";
|
|
20
|
+
export { FileConversionError, MarkItDownError, MissingDependencyError, UnsupportedFormatError, } from "./exceptions.js";
|
|
21
|
+
export type { ConvertInput, MarkItDownOptions } from "./markitdown.js";
|
|
22
|
+
export { createMarkItDown } from "./markitdown.js";
|
|
23
|
+
export { mergeStreamInfo } from "./stream-info.js";
|
|
24
|
+
export { decodeBuffer } from "./transforms/decode-text.js";
|
|
25
|
+
export { htmlToMarkdown } from "./transforms/html-to-markdown.js";
|
|
26
|
+
export type { ConvertOptions, ConvertResult, StreamInfo } from "./types.js";
|
|
27
|
+
export { ConvertOptionsSchema, ConvertResultSchema, StreamInfoSchema } from "./types.js";
|