markit-ai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +333 -0
- package/dist/commands/config.d.ts +4 -0
- package/dist/commands/config.js +133 -0
- package/dist/commands/convert.d.ts +5 -0
- package/dist/commands/convert.js +110 -0
- package/dist/commands/formats.d.ts +2 -0
- package/dist/commands/formats.js +56 -0
- package/dist/commands/init.d.ts +2 -0
- package/dist/commands/init.js +29 -0
- package/dist/commands/onboard.d.ts +2 -0
- package/dist/commands/onboard.js +61 -0
- package/dist/commands/plugin.d.ts +4 -0
- package/dist/commands/plugin.js +58 -0
- package/dist/config.d.ts +26 -0
- package/dist/config.js +42 -0
- package/dist/converters/audio.d.ts +7 -0
- package/dist/converters/audio.js +87 -0
- package/dist/converters/csv.d.ts +7 -0
- package/dist/converters/csv.js +83 -0
- package/dist/converters/docx.d.ts +6 -0
- package/dist/converters/docx.js +28 -0
- package/dist/converters/epub.d.ts +8 -0
- package/dist/converters/epub.js +110 -0
- package/dist/converters/html.d.ts +6 -0
- package/dist/converters/html.js +33 -0
- package/dist/converters/image.d.ts +6 -0
- package/dist/converters/image.js +94 -0
- package/dist/converters/ipynb.d.ts +6 -0
- package/dist/converters/ipynb.js +72 -0
- package/dist/converters/json.d.ts +6 -0
- package/dist/converters/json.js +21 -0
- package/dist/converters/pdf.d.ts +6 -0
- package/dist/converters/pdf.js +29 -0
- package/dist/converters/plain-text.d.ts +6 -0
- package/dist/converters/plain-text.js +41 -0
- package/dist/converters/pptx.d.ts +8 -0
- package/dist/converters/pptx.js +189 -0
- package/dist/converters/rss.d.ts +11 -0
- package/dist/converters/rss.js +134 -0
- package/dist/converters/wikipedia.d.ts +6 -0
- package/dist/converters/wikipedia.js +35 -0
- package/dist/converters/xlsx.d.ts +8 -0
- package/dist/converters/xlsx.js +139 -0
- package/dist/converters/xml.d.ts +6 -0
- package/dist/converters/xml.js +17 -0
- package/dist/converters/yaml.d.ts +6 -0
- package/dist/converters/yaml.js +16 -0
- package/dist/converters/zip.d.ts +8 -0
- package/dist/converters/zip.js +56 -0
- package/dist/index.d.ts +28 -0
- package/dist/index.js +24 -0
- package/dist/llm.d.ts +10 -0
- package/dist/llm.js +139 -0
- package/dist/main.d.ts +2 -0
- package/dist/main.js +182 -0
- package/dist/markit.d.ts +19 -0
- package/dist/markit.js +124 -0
- package/dist/mill.d.ts +18 -0
- package/dist/mill.js +123 -0
- package/dist/plugins/api.d.ts +7 -0
- package/dist/plugins/api.js +44 -0
- package/dist/plugins/index.d.ts +4 -0
- package/dist/plugins/index.js +3 -0
- package/dist/plugins/installer.d.ts +25 -0
- package/dist/plugins/installer.js +176 -0
- package/dist/plugins/loader.d.ts +6 -0
- package/dist/plugins/loader.js +61 -0
- package/dist/plugins/types.d.ts +25 -0
- package/dist/plugins/types.js +1 -0
- package/dist/providers/anthropic.d.ts +2 -0
- package/dist/providers/anthropic.js +47 -0
- package/dist/providers/index.d.ts +21 -0
- package/dist/providers/index.js +58 -0
- package/dist/providers/openai.d.ts +2 -0
- package/dist/providers/openai.js +65 -0
- package/dist/providers/types.d.ts +26 -0
- package/dist/providers/types.js +1 -0
- package/dist/types.d.ts +28 -0
- package/dist/types.js +1 -0
- package/dist/utils/exit-codes.d.ts +4 -0
- package/dist/utils/exit-codes.js +4 -0
- package/dist/utils/output.d.ts +22 -0
- package/dist/utils/output.js +31 -0
- package/package.json +70 -0
package/dist/main.js
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { createRequire } from "node:module";
|
|
3
|
+
import { Command } from "commander";
|
|
4
|
+
import { convert } from "./commands/convert.js";
|
|
5
|
+
import { onboard } from "./commands/onboard.js";
|
|
6
|
+
import { formats } from "./commands/formats.js";
|
|
7
|
+
import { init } from "./commands/init.js";
|
|
8
|
+
import { configShow, configGet, configSet } from "./commands/config.js";
|
|
9
|
+
import { pluginInstall, pluginRemove, pluginList } from "./commands/plugin.js";
|
|
10
|
+
const require = createRequire(import.meta.url);
|
|
11
|
+
const { version } = require("../package.json");
|
|
12
|
+
const program = new Command();
|
|
13
|
+
program
|
|
14
|
+
.name("markit")
|
|
15
|
+
.description("Convert anything to markdown.")
|
|
16
|
+
.version(`markit ${version}`, "-V, --version")
|
|
17
|
+
.option("--json", "Output as JSON")
|
|
18
|
+
.option("-q, --quiet", "Raw markdown only, no decoration")
|
|
19
|
+
.option("-p, --prompt <text>", "Extra instructions for image description")
|
|
20
|
+
.option("-o, --output <file>", "Write to file instead of stdout")
|
|
21
|
+
.addHelpText("after", `
|
|
22
|
+
Examples:
|
|
23
|
+
$ markit report.pdf Convert a PDF to markdown
|
|
24
|
+
$ markit document.docx -o doc.md Convert DOCX, write to file
|
|
25
|
+
$ markit https://example.com Convert a web page
|
|
26
|
+
$ markit photo.jpg Extract EXIF + AI description
|
|
27
|
+
$ markit recording.mp3 Metadata + transcription
|
|
28
|
+
$ cat file.pdf | markit - Read from stdin
|
|
29
|
+
$ markit init Create .markit/ config
|
|
30
|
+
$ markit config show Show LLM settings
|
|
31
|
+
|
|
32
|
+
Docs: https://github.com/Michaelliv/markit`);
|
|
33
|
+
program
|
|
34
|
+
.command("convert")
|
|
35
|
+
.alias("c")
|
|
36
|
+
.description("Convert a file or URL to markdown")
|
|
37
|
+
.argument("<source>", "File path, URL, or - for stdin")
|
|
38
|
+
.option("-o, --output <file>", "Write to file instead of stdout")
|
|
39
|
+
.action(async (source, opts, cmd) => {
|
|
40
|
+
const globals = cmd.optsWithGlobals();
|
|
41
|
+
await convert(source, {
|
|
42
|
+
json: globals.json,
|
|
43
|
+
quiet: globals.quiet,
|
|
44
|
+
output: opts.output,
|
|
45
|
+
prompt: opts.prompt,
|
|
46
|
+
});
|
|
47
|
+
});
|
|
48
|
+
program
|
|
49
|
+
.command("init")
|
|
50
|
+
.description("Create .markit/ config directory")
|
|
51
|
+
.action(async (_opts, cmd) => {
|
|
52
|
+
const globals = cmd.optsWithGlobals();
|
|
53
|
+
await init([], { json: globals.json, quiet: globals.quiet });
|
|
54
|
+
});
|
|
55
|
+
const configCmd = program
|
|
56
|
+
.command("config")
|
|
57
|
+
.description("Manage markit configuration");
|
|
58
|
+
configCmd
|
|
59
|
+
.command("show")
|
|
60
|
+
.description("Show current configuration")
|
|
61
|
+
.action(async (_opts, cmd) => {
|
|
62
|
+
const globals = cmd.optsWithGlobals();
|
|
63
|
+
await configShow([], { json: globals.json, quiet: globals.quiet });
|
|
64
|
+
});
|
|
65
|
+
configCmd
|
|
66
|
+
.command("get <key>")
|
|
67
|
+
.description("Get a config value")
|
|
68
|
+
.action(async (key, _opts, cmd) => {
|
|
69
|
+
const globals = cmd.optsWithGlobals();
|
|
70
|
+
await configGet(key, { json: globals.json, quiet: globals.quiet });
|
|
71
|
+
});
|
|
72
|
+
configCmd
|
|
73
|
+
.command("set <key> [value]")
|
|
74
|
+
.description("Set a config value (secrets read from stdin if no value given)")
|
|
75
|
+
.action(async (key, value, _opts, cmd) => {
|
|
76
|
+
const globals = cmd.optsWithGlobals();
|
|
77
|
+
await configSet(key, value, { json: globals.json, quiet: globals.quiet });
|
|
78
|
+
});
|
|
79
|
+
const pluginCmd = program
|
|
80
|
+
.command("plugin")
|
|
81
|
+
.description("Manage plugins");
|
|
82
|
+
pluginCmd
|
|
83
|
+
.command("install <source>")
|
|
84
|
+
.description("Install a plugin (npm:pkg, git:url, or local path)")
|
|
85
|
+
.action(async (source, _opts, cmd) => {
|
|
86
|
+
const globals = cmd.optsWithGlobals();
|
|
87
|
+
await pluginInstall(source, { json: globals.json, quiet: globals.quiet });
|
|
88
|
+
});
|
|
89
|
+
pluginCmd
|
|
90
|
+
.command("remove <name>")
|
|
91
|
+
.description("Remove an installed plugin")
|
|
92
|
+
.action(async (name, _opts, cmd) => {
|
|
93
|
+
const globals = cmd.optsWithGlobals();
|
|
94
|
+
await pluginRemove(name, { json: globals.json, quiet: globals.quiet });
|
|
95
|
+
});
|
|
96
|
+
pluginCmd
|
|
97
|
+
.command("list")
|
|
98
|
+
.description("List installed plugins")
|
|
99
|
+
.action(async (_opts, cmd) => {
|
|
100
|
+
const globals = cmd.optsWithGlobals();
|
|
101
|
+
await pluginList({ json: globals.json, quiet: globals.quiet });
|
|
102
|
+
});
|
|
103
|
+
program
|
|
104
|
+
.command("formats")
|
|
105
|
+
.description("List supported formats")
|
|
106
|
+
.action(async (_opts, cmd) => {
|
|
107
|
+
const globals = cmd.optsWithGlobals();
|
|
108
|
+
await formats([], { json: globals.json, quiet: globals.quiet });
|
|
109
|
+
});
|
|
110
|
+
program
|
|
111
|
+
.command("onboard")
|
|
112
|
+
.description("Add markit instructions to CLAUDE.md or AGENTS.md")
|
|
113
|
+
.action(async (_opts, cmd) => {
|
|
114
|
+
const globals = cmd.optsWithGlobals();
|
|
115
|
+
await onboard([], { json: globals.json, quiet: globals.quiet });
|
|
116
|
+
});
|
|
117
|
+
// Default behavior: if first arg isn't a known subcommand, treat it as a source to convert
|
|
118
|
+
program.on("command:*", async (args) => {
|
|
119
|
+
const source = args[0];
|
|
120
|
+
if (!source) {
|
|
121
|
+
program.help();
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
// Check for typos against known subcommands
|
|
125
|
+
const commands = ["convert", "formats", "onboard", "help", "init", "config", "plugin"];
|
|
126
|
+
const close = commands.filter((c) => levenshtein(source, c) <= 2 && source !== c);
|
|
127
|
+
if (close.length > 0 &&
|
|
128
|
+
!source.includes("/") &&
|
|
129
|
+
!source.includes(".") &&
|
|
130
|
+
!source.startsWith("http")) {
|
|
131
|
+
const { error } = await import("./utils/output.js");
|
|
132
|
+
error(`Unknown command '${source}'. Did you mean '${close[0]}'?`);
|
|
133
|
+
process.exit(1);
|
|
134
|
+
}
|
|
135
|
+
const globals = program.opts();
|
|
136
|
+
await convert(source, {
|
|
137
|
+
json: globals.json,
|
|
138
|
+
quiet: globals.quiet,
|
|
139
|
+
output: globals.output,
|
|
140
|
+
prompt: globals.prompt,
|
|
141
|
+
});
|
|
142
|
+
});
|
|
143
|
+
// No args → show concise help
|
|
144
|
+
if (process.argv.length <= 2) {
|
|
145
|
+
console.log(`markit — convert anything to markdown
|
|
146
|
+
|
|
147
|
+
Usage: markit <file-or-url> [options]
|
|
148
|
+
|
|
149
|
+
Examples:
|
|
150
|
+
$ markit report.pdf
|
|
151
|
+
$ markit document.docx -o doc.md
|
|
152
|
+
$ markit https://example.com
|
|
153
|
+
|
|
154
|
+
Commands:
|
|
155
|
+
markit init Create .markit/ config directory
|
|
156
|
+
markit config Manage settings (LLM, API keys)
|
|
157
|
+
markit formats List supported formats
|
|
158
|
+
markit onboard Add instructions to CLAUDE.md
|
|
159
|
+
|
|
160
|
+
Run markit --help for all options.
|
|
161
|
+
Docs: https://github.com/Michaelliv/markit`);
|
|
162
|
+
process.exit(0);
|
|
163
|
+
}
|
|
164
|
+
program.parseAsync(process.argv).catch((err) => {
|
|
165
|
+
console.error("Fatal error:", err.message);
|
|
166
|
+
process.exit(1);
|
|
167
|
+
});
|
|
168
|
+
function levenshtein(a, b) {
|
|
169
|
+
const m = a.length;
|
|
170
|
+
const n = b.length;
|
|
171
|
+
const dp = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0));
|
|
172
|
+
for (let i = 0; i <= m; i++)
|
|
173
|
+
dp[i][0] = i;
|
|
174
|
+
for (let j = 0; j <= n; j++)
|
|
175
|
+
dp[0][j] = j;
|
|
176
|
+
for (let i = 1; i <= m; i++) {
|
|
177
|
+
for (let j = 1; j <= n; j++) {
|
|
178
|
+
dp[i][j] = Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1, dp[i - 1][j - 1] + (a[i - 1] !== b[j - 1] ? 1 : 0));
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
return dp[m][n];
|
|
182
|
+
}
|
package/dist/markit.d.ts
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { ConversionResult, StreamInfo, MarkitOptions } from "./types.js";
|
|
2
|
+
import type { PluginDef } from "./plugins/types.js";
|
|
3
|
+
export declare class Markit {
|
|
4
|
+
private converters;
|
|
5
|
+
private options;
|
|
6
|
+
constructor(options?: MarkitOptions, plugins?: PluginDef[]);
|
|
7
|
+
/**
|
|
8
|
+
* Convert a local file to markdown.
|
|
9
|
+
*/
|
|
10
|
+
convertFile(path: string): Promise<ConversionResult>;
|
|
11
|
+
/**
|
|
12
|
+
* Convert a URL to markdown.
|
|
13
|
+
*/
|
|
14
|
+
convertUrl(url: string): Promise<ConversionResult>;
|
|
15
|
+
/**
|
|
16
|
+
* Convert a buffer with stream info to markdown.
|
|
17
|
+
*/
|
|
18
|
+
convert(input: Buffer, streamInfo: StreamInfo): Promise<ConversionResult>;
|
|
19
|
+
}
|
package/dist/markit.js
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import { extname, basename } from "node:path";
|
|
3
|
+
import { PdfConverter } from "./converters/pdf.js";
|
|
4
|
+
import { DocxConverter } from "./converters/docx.js";
|
|
5
|
+
import { PptxConverter } from "./converters/pptx.js";
|
|
6
|
+
import { XlsxConverter } from "./converters/xlsx.js";
|
|
7
|
+
import { EpubConverter } from "./converters/epub.js";
|
|
8
|
+
import { IpynbConverter } from "./converters/ipynb.js";
|
|
9
|
+
import { HtmlConverter } from "./converters/html.js";
|
|
10
|
+
import { WikipediaConverter } from "./converters/wikipedia.js";
|
|
11
|
+
import { RssConverter } from "./converters/rss.js";
|
|
12
|
+
import { CsvConverter } from "./converters/csv.js";
|
|
13
|
+
import { JsonConverter } from "./converters/json.js";
|
|
14
|
+
import { YamlConverter } from "./converters/yaml.js";
|
|
15
|
+
import { XmlConverter } from "./converters/xml.js";
|
|
16
|
+
import { ZipConverter } from "./converters/zip.js";
|
|
17
|
+
import { ImageConverter } from "./converters/image.js";
|
|
18
|
+
import { AudioConverter } from "./converters/audio.js";
|
|
19
|
+
import { PlainTextConverter } from "./converters/plain-text.js";
|
|
20
|
+
export class Markit {
|
|
21
|
+
converters = [];
|
|
22
|
+
options;
|
|
23
|
+
constructor(options = {}, plugins = []) {
|
|
24
|
+
this.options = options;
|
|
25
|
+
// Plugin converters go first — they override builtins for the same format
|
|
26
|
+
const pluginConverters = plugins.flatMap((p) => p.converters);
|
|
27
|
+
// Built-in converters: specific formats first, generic last.
|
|
28
|
+
const specific = [
|
|
29
|
+
new PdfConverter(),
|
|
30
|
+
new DocxConverter(),
|
|
31
|
+
new PptxConverter(),
|
|
32
|
+
new XlsxConverter(),
|
|
33
|
+
new EpubConverter(),
|
|
34
|
+
new IpynbConverter(),
|
|
35
|
+
new WikipediaConverter(),
|
|
36
|
+
new RssConverter(),
|
|
37
|
+
new CsvConverter(),
|
|
38
|
+
new JsonConverter(),
|
|
39
|
+
new YamlConverter(),
|
|
40
|
+
new ImageConverter(),
|
|
41
|
+
new AudioConverter(),
|
|
42
|
+
];
|
|
43
|
+
const generic = [
|
|
44
|
+
new XmlConverter(),
|
|
45
|
+
new HtmlConverter(),
|
|
46
|
+
];
|
|
47
|
+
// ZIP gets all converters (plugin + builtin) for recursive extraction
|
|
48
|
+
const allNonZip = [...pluginConverters, ...specific, ...generic];
|
|
49
|
+
const zipConverter = new ZipConverter(allNonZip);
|
|
50
|
+
// Plugin converters first, then builtins, plain text last
|
|
51
|
+
this.converters = [
|
|
52
|
+
...pluginConverters,
|
|
53
|
+
...specific,
|
|
54
|
+
zipConverter,
|
|
55
|
+
...generic,
|
|
56
|
+
new PlainTextConverter(),
|
|
57
|
+
];
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Convert a local file to markdown.
|
|
61
|
+
*/
|
|
62
|
+
async convertFile(path) {
|
|
63
|
+
const buffer = readFileSync(path);
|
|
64
|
+
const streamInfo = {
|
|
65
|
+
localPath: path,
|
|
66
|
+
extension: extname(path).toLowerCase(),
|
|
67
|
+
filename: basename(path),
|
|
68
|
+
};
|
|
69
|
+
return this.convert(buffer, streamInfo);
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Convert a URL to markdown.
|
|
73
|
+
*/
|
|
74
|
+
async convertUrl(url) {
|
|
75
|
+
const response = await fetch(url, {
|
|
76
|
+
headers: {
|
|
77
|
+
Accept: "text/markdown, text/html;q=0.9, text/plain;q=0.8, */*;q=0.1",
|
|
78
|
+
"User-Agent": "mill/0.1.0",
|
|
79
|
+
},
|
|
80
|
+
});
|
|
81
|
+
if (!response.ok) {
|
|
82
|
+
throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`);
|
|
83
|
+
}
|
|
84
|
+
const contentType = response.headers.get("content-type") || "";
|
|
85
|
+
const [mimetype] = contentType.split(";");
|
|
86
|
+
// Derive extension from URL path
|
|
87
|
+
const urlPath = new URL(url).pathname;
|
|
88
|
+
const ext = extname(urlPath).toLowerCase();
|
|
89
|
+
const buffer = Buffer.from(await response.arrayBuffer());
|
|
90
|
+
const streamInfo = {
|
|
91
|
+
url,
|
|
92
|
+
mimetype: mimetype.trim(),
|
|
93
|
+
extension: ext || undefined,
|
|
94
|
+
filename: basename(urlPath) || undefined,
|
|
95
|
+
};
|
|
96
|
+
return this.convert(buffer, streamInfo);
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Convert a buffer with stream info to markdown.
|
|
100
|
+
*/
|
|
101
|
+
async convert(input, streamInfo) {
|
|
102
|
+
const errors = [];
|
|
103
|
+
for (const converter of this.converters) {
|
|
104
|
+
if (!converter.accepts(streamInfo))
|
|
105
|
+
continue;
|
|
106
|
+
try {
|
|
107
|
+
return await converter.convert(input, streamInfo, this.options);
|
|
108
|
+
}
|
|
109
|
+
catch (err) {
|
|
110
|
+
errors.push({
|
|
111
|
+
converter: converter.name,
|
|
112
|
+
error: err instanceof Error ? err : new Error(String(err)),
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
if (errors.length > 0) {
|
|
117
|
+
const details = errors
|
|
118
|
+
.map((e) => ` ${e.converter}: ${e.error.message}`)
|
|
119
|
+
.join("\n");
|
|
120
|
+
throw new Error(`Conversion failed:\n${details}`);
|
|
121
|
+
}
|
|
122
|
+
throw new Error(`Unsupported format: ${streamInfo.extension || streamInfo.mimetype || "unknown"}`);
|
|
123
|
+
}
|
|
124
|
+
}
|
package/dist/mill.d.ts
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import type { ConversionResult, StreamInfo, ConvertOptions } from "./types.js";
|
|
2
|
+
export declare class Mill {
|
|
3
|
+
private converters;
|
|
4
|
+
private options;
|
|
5
|
+
constructor(options?: ConvertOptions);
|
|
6
|
+
/**
|
|
7
|
+
* Convert a local file to markdown.
|
|
8
|
+
*/
|
|
9
|
+
convertFile(path: string): Promise<ConversionResult>;
|
|
10
|
+
/**
|
|
11
|
+
* Convert a URL to markdown.
|
|
12
|
+
*/
|
|
13
|
+
convertUrl(url: string): Promise<ConversionResult>;
|
|
14
|
+
/**
|
|
15
|
+
* Convert a buffer with stream info to markdown.
|
|
16
|
+
*/
|
|
17
|
+
convert(input: Buffer, streamInfo: StreamInfo): Promise<ConversionResult>;
|
|
18
|
+
}
|
package/dist/mill.js
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import { readFileSync } from "node:fs";
|
|
2
|
+
import { extname, basename } from "node:path";
|
|
3
|
+
import { PdfConverter } from "./converters/pdf.js";
|
|
4
|
+
import { DocxConverter } from "./converters/docx.js";
|
|
5
|
+
import { PptxConverter } from "./converters/pptx.js";
|
|
6
|
+
import { XlsxConverter } from "./converters/xlsx.js";
|
|
7
|
+
import { EpubConverter } from "./converters/epub.js";
|
|
8
|
+
import { IpynbConverter } from "./converters/ipynb.js";
|
|
9
|
+
import { HtmlConverter } from "./converters/html.js";
|
|
10
|
+
import { WikipediaConverter } from "./converters/wikipedia.js";
|
|
11
|
+
import { RssConverter } from "./converters/rss.js";
|
|
12
|
+
import { CsvConverter } from "./converters/csv.js";
|
|
13
|
+
import { JsonConverter } from "./converters/json.js";
|
|
14
|
+
import { YamlConverter } from "./converters/yaml.js";
|
|
15
|
+
import { XmlConverter } from "./converters/xml.js";
|
|
16
|
+
import { ZipConverter } from "./converters/zip.js";
|
|
17
|
+
import { ImageConverter } from "./converters/image.js";
|
|
18
|
+
import { AudioConverter } from "./converters/audio.js";
|
|
19
|
+
import { PlainTextConverter } from "./converters/plain-text.js";
|
|
20
|
+
export class Mill {
|
|
21
|
+
converters = [];
|
|
22
|
+
options;
|
|
23
|
+
constructor(options = {}) {
|
|
24
|
+
this.options = options;
|
|
25
|
+
// Order matters: specific formats first, generic last.
|
|
26
|
+
// URL-specific converters (Wikipedia) before generic HTML.
|
|
27
|
+
// ZIP converter gets a reference to other converters for recursive conversion.
|
|
28
|
+
const specific = [
|
|
29
|
+
new PdfConverter(),
|
|
30
|
+
new DocxConverter(),
|
|
31
|
+
new PptxConverter(),
|
|
32
|
+
new XlsxConverter(),
|
|
33
|
+
new EpubConverter(),
|
|
34
|
+
new IpynbConverter(),
|
|
35
|
+
new WikipediaConverter(),
|
|
36
|
+
new RssConverter(),
|
|
37
|
+
new CsvConverter(),
|
|
38
|
+
new JsonConverter(),
|
|
39
|
+
new YamlConverter(),
|
|
40
|
+
new ImageConverter(),
|
|
41
|
+
new AudioConverter(),
|
|
42
|
+
];
|
|
43
|
+
const generic = [
|
|
44
|
+
new XmlConverter(),
|
|
45
|
+
new HtmlConverter(),
|
|
46
|
+
];
|
|
47
|
+
// ZIP gets all other converters for recursive extraction
|
|
48
|
+
const allNonZip = [...specific, ...generic];
|
|
49
|
+
const zipConverter = new ZipConverter(allNonZip);
|
|
50
|
+
// Plain text is the ultimate catch-all
|
|
51
|
+
this.converters = [
|
|
52
|
+
...specific,
|
|
53
|
+
zipConverter,
|
|
54
|
+
...generic,
|
|
55
|
+
new PlainTextConverter(),
|
|
56
|
+
];
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Convert a local file to markdown.
|
|
60
|
+
*/
|
|
61
|
+
async convertFile(path) {
|
|
62
|
+
const buffer = readFileSync(path);
|
|
63
|
+
const streamInfo = {
|
|
64
|
+
localPath: path,
|
|
65
|
+
extension: extname(path).toLowerCase(),
|
|
66
|
+
filename: basename(path),
|
|
67
|
+
};
|
|
68
|
+
return this.convert(buffer, streamInfo);
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Convert a URL to markdown.
|
|
72
|
+
*/
|
|
73
|
+
async convertUrl(url) {
|
|
74
|
+
const response = await fetch(url, {
|
|
75
|
+
headers: {
|
|
76
|
+
Accept: "text/markdown, text/html;q=0.9, text/plain;q=0.8, */*;q=0.1",
|
|
77
|
+
"User-Agent": "mill/0.1.0",
|
|
78
|
+
},
|
|
79
|
+
});
|
|
80
|
+
if (!response.ok) {
|
|
81
|
+
throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`);
|
|
82
|
+
}
|
|
83
|
+
const contentType = response.headers.get("content-type") || "";
|
|
84
|
+
const [mimetype] = contentType.split(";");
|
|
85
|
+
// Derive extension from URL path
|
|
86
|
+
const urlPath = new URL(url).pathname;
|
|
87
|
+
const ext = extname(urlPath).toLowerCase();
|
|
88
|
+
const buffer = Buffer.from(await response.arrayBuffer());
|
|
89
|
+
const streamInfo = {
|
|
90
|
+
url,
|
|
91
|
+
mimetype: mimetype.trim(),
|
|
92
|
+
extension: ext || undefined,
|
|
93
|
+
filename: basename(urlPath) || undefined,
|
|
94
|
+
};
|
|
95
|
+
return this.convert(buffer, streamInfo);
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Convert a buffer with stream info to markdown.
|
|
99
|
+
*/
|
|
100
|
+
async convert(input, streamInfo) {
|
|
101
|
+
const errors = [];
|
|
102
|
+
for (const converter of this.converters) {
|
|
103
|
+
if (!converter.accepts(streamInfo))
|
|
104
|
+
continue;
|
|
105
|
+
try {
|
|
106
|
+
return await converter.convert(input, streamInfo, this.options);
|
|
107
|
+
}
|
|
108
|
+
catch (err) {
|
|
109
|
+
errors.push({
|
|
110
|
+
converter: converter.name,
|
|
111
|
+
error: err instanceof Error ? err : new Error(String(err)),
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
if (errors.length > 0) {
|
|
116
|
+
const details = errors
|
|
117
|
+
.map((e) => ` ${e.converter}: ${e.error.message}`)
|
|
118
|
+
.join("\n");
|
|
119
|
+
throw new Error(`Conversion failed:\n${details}`);
|
|
120
|
+
}
|
|
121
|
+
throw new Error(`Unsupported format: ${streamInfo.extension || streamInfo.mimetype || "unknown"}`);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { MarkitPluginAPI, PluginDef, PluginFunction } from "./types.js";
|
|
2
|
+
export declare function createPluginAPI(pluginId: string): {
|
|
3
|
+
api: MarkitPluginAPI;
|
|
4
|
+
resolve: () => PluginDef;
|
|
5
|
+
};
|
|
6
|
+
export declare function isPluginFunction(val: any): val is PluginFunction;
|
|
7
|
+
export declare function resolvePluginExport(exported: PluginFunction | PluginDef, pluginId: string): PluginDef;
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
export function createPluginAPI(pluginId) {
|
|
2
|
+
let name = pluginId;
|
|
3
|
+
let version = "0.0.0";
|
|
4
|
+
const converters = [];
|
|
5
|
+
const providers = [];
|
|
6
|
+
const formats = [];
|
|
7
|
+
const api = {
|
|
8
|
+
setName(n) {
|
|
9
|
+
name = n;
|
|
10
|
+
},
|
|
11
|
+
setVersion(v) {
|
|
12
|
+
version = v;
|
|
13
|
+
},
|
|
14
|
+
registerConverter(converter, format) {
|
|
15
|
+
converters.push(converter);
|
|
16
|
+
if (format) {
|
|
17
|
+
formats.push(format);
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
registerProvider(provider) {
|
|
21
|
+
providers.push(provider);
|
|
22
|
+
},
|
|
23
|
+
};
|
|
24
|
+
function resolve() {
|
|
25
|
+
return { name, version, converters, providers, formats };
|
|
26
|
+
}
|
|
27
|
+
return { api, resolve };
|
|
28
|
+
}
|
|
29
|
+
export function isPluginFunction(val) {
|
|
30
|
+
return typeof val === "function";
|
|
31
|
+
}
|
|
32
|
+
export function resolvePluginExport(exported, pluginId) {
|
|
33
|
+
if (isPluginFunction(exported)) {
|
|
34
|
+
const { api, resolve } = createPluginAPI(pluginId);
|
|
35
|
+
exported(api);
|
|
36
|
+
return resolve();
|
|
37
|
+
}
|
|
38
|
+
if (exported &&
|
|
39
|
+
typeof exported === "object" &&
|
|
40
|
+
"converters" in exported) {
|
|
41
|
+
return exported;
|
|
42
|
+
}
|
|
43
|
+
throw new Error(`Invalid plugin export from "${pluginId}": expected a function or { name, converters } object`);
|
|
44
|
+
}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export type { MarkitPluginAPI, PluginFunction, PluginDef } from "./types.js";
|
|
2
|
+
export { createPluginAPI, resolvePluginExport } from "./api.js";
|
|
3
|
+
export { loadPluginFromPath, loadAllPlugins } from "./loader.js";
|
|
4
|
+
export { installPlugin, removePlugin, listInstalled, parsePluginSource, } from "./installer.js";
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
export interface PluginSource {
|
|
2
|
+
type: "npm" | "git" | "local";
|
|
3
|
+
name: string;
|
|
4
|
+
ref?: string;
|
|
5
|
+
url?: string;
|
|
6
|
+
path?: string;
|
|
7
|
+
subpath?: string;
|
|
8
|
+
}
|
|
9
|
+
export interface InstalledPlugin {
|
|
10
|
+
source: string;
|
|
11
|
+
path: string;
|
|
12
|
+
name?: string;
|
|
13
|
+
}
|
|
14
|
+
export declare function parsePluginSource(source: string): PluginSource;
|
|
15
|
+
export declare function installPlugin(source: string): Promise<{
|
|
16
|
+
path: string;
|
|
17
|
+
name: string;
|
|
18
|
+
}>;
|
|
19
|
+
export declare function removePlugin(name: string): boolean;
|
|
20
|
+
export declare function listInstalled(): Array<{
|
|
21
|
+
name: string;
|
|
22
|
+
type: string;
|
|
23
|
+
source: string;
|
|
24
|
+
path: string;
|
|
25
|
+
}>;
|