markit-ai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +333 -0
  3. package/dist/commands/config.d.ts +4 -0
  4. package/dist/commands/config.js +133 -0
  5. package/dist/commands/convert.d.ts +5 -0
  6. package/dist/commands/convert.js +110 -0
  7. package/dist/commands/formats.d.ts +2 -0
  8. package/dist/commands/formats.js +56 -0
  9. package/dist/commands/init.d.ts +2 -0
  10. package/dist/commands/init.js +29 -0
  11. package/dist/commands/onboard.d.ts +2 -0
  12. package/dist/commands/onboard.js +61 -0
  13. package/dist/commands/plugin.d.ts +4 -0
  14. package/dist/commands/plugin.js +58 -0
  15. package/dist/config.d.ts +26 -0
  16. package/dist/config.js +42 -0
  17. package/dist/converters/audio.d.ts +7 -0
  18. package/dist/converters/audio.js +87 -0
  19. package/dist/converters/csv.d.ts +7 -0
  20. package/dist/converters/csv.js +83 -0
  21. package/dist/converters/docx.d.ts +6 -0
  22. package/dist/converters/docx.js +28 -0
  23. package/dist/converters/epub.d.ts +8 -0
  24. package/dist/converters/epub.js +110 -0
  25. package/dist/converters/html.d.ts +6 -0
  26. package/dist/converters/html.js +33 -0
  27. package/dist/converters/image.d.ts +6 -0
  28. package/dist/converters/image.js +94 -0
  29. package/dist/converters/ipynb.d.ts +6 -0
  30. package/dist/converters/ipynb.js +72 -0
  31. package/dist/converters/json.d.ts +6 -0
  32. package/dist/converters/json.js +21 -0
  33. package/dist/converters/pdf.d.ts +6 -0
  34. package/dist/converters/pdf.js +29 -0
  35. package/dist/converters/plain-text.d.ts +6 -0
  36. package/dist/converters/plain-text.js +41 -0
  37. package/dist/converters/pptx.d.ts +8 -0
  38. package/dist/converters/pptx.js +189 -0
  39. package/dist/converters/rss.d.ts +11 -0
  40. package/dist/converters/rss.js +134 -0
  41. package/dist/converters/wikipedia.d.ts +6 -0
  42. package/dist/converters/wikipedia.js +35 -0
  43. package/dist/converters/xlsx.d.ts +8 -0
  44. package/dist/converters/xlsx.js +139 -0
  45. package/dist/converters/xml.d.ts +6 -0
  46. package/dist/converters/xml.js +17 -0
  47. package/dist/converters/yaml.d.ts +6 -0
  48. package/dist/converters/yaml.js +16 -0
  49. package/dist/converters/zip.d.ts +8 -0
  50. package/dist/converters/zip.js +56 -0
  51. package/dist/index.d.ts +28 -0
  52. package/dist/index.js +24 -0
  53. package/dist/llm.d.ts +10 -0
  54. package/dist/llm.js +139 -0
  55. package/dist/main.d.ts +2 -0
  56. package/dist/main.js +182 -0
  57. package/dist/markit.d.ts +19 -0
  58. package/dist/markit.js +124 -0
  59. package/dist/mill.d.ts +18 -0
  60. package/dist/mill.js +123 -0
  61. package/dist/plugins/api.d.ts +7 -0
  62. package/dist/plugins/api.js +44 -0
  63. package/dist/plugins/index.d.ts +4 -0
  64. package/dist/plugins/index.js +3 -0
  65. package/dist/plugins/installer.d.ts +25 -0
  66. package/dist/plugins/installer.js +176 -0
  67. package/dist/plugins/loader.d.ts +6 -0
  68. package/dist/plugins/loader.js +61 -0
  69. package/dist/plugins/types.d.ts +25 -0
  70. package/dist/plugins/types.js +1 -0
  71. package/dist/providers/anthropic.d.ts +2 -0
  72. package/dist/providers/anthropic.js +47 -0
  73. package/dist/providers/index.d.ts +21 -0
  74. package/dist/providers/index.js +58 -0
  75. package/dist/providers/openai.d.ts +2 -0
  76. package/dist/providers/openai.js +65 -0
  77. package/dist/providers/types.d.ts +26 -0
  78. package/dist/providers/types.js +1 -0
  79. package/dist/types.d.ts +28 -0
  80. package/dist/types.js +1 -0
  81. package/dist/utils/exit-codes.d.ts +4 -0
  82. package/dist/utils/exit-codes.js +4 -0
  83. package/dist/utils/output.d.ts +22 -0
  84. package/dist/utils/output.js +31 -0
  85. package/package.json +70 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Michael Liv
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,333 @@
1
+ # markit
2
+
3
+ Convert anything to markdown. PDF, DOCX, PPTX, XLSX, HTML, EPUB, Jupyter, RSS, images, audio, URLs, and more. Pluggable converters, built-in LLM providers for image description and audio transcription. Works as a CLI and as a library.
4
+
5
+ ```bash
6
+ npm install -g markit-ai
7
+ ```
8
+
9
+ ---
10
+
11
+ ## Quick Start
12
+
13
+ ```bash
14
+ # Documents
15
+ markit report.pdf
16
+ markit document.docx
17
+ markit slides.pptx
18
+
19
+ # Data
20
+ markit data.csv
21
+ markit config.json
22
+ markit schema.yaml
23
+
24
+ # Web
25
+ markit https://example.com/article
26
+ markit https://en.wikipedia.org/wiki/Markdown
27
+
28
+ # Media (via LLMs. set OPENAI_API_KEY or ANTHROPIC_API_KEY)
29
+ markit photo.jpg # EXIF metadata + AI description
30
+ markit recording.mp3 # Audio metadata + transcription
31
+ markit photo.jpg -p "Extract all text" # Custom instructions
32
+
33
+ # Write to file
34
+ markit report.pdf -o report.md
35
+
36
+ # Pipe it
37
+ markit report.pdf | pbcopy
38
+ markit data.xlsx -q | napkin create "Imported Data"
39
+ ```
40
+
41
+ ---
42
+
43
+ ## Supported Formats
44
+
45
+ | Format | Extensions | How |
46
+ |--------|-----------|-----|
47
+ | PDF | `.pdf` | Text extraction via unpdf |
48
+ | Word | `.docx` | mammoth → turndown, preserves headings/tables |
49
+ | PowerPoint | `.pptx` | XML parsing, slides + notes + tables |
50
+ | Excel | `.xlsx` | Each sheet → markdown table |
51
+ | HTML | `.html` `.htm` | turndown, scripts/styles stripped |
52
+ | EPUB | `.epub` | Spine-ordered chapters, metadata header |
53
+ | Jupyter | `.ipynb` | Markdown cells + code + outputs |
54
+ | RSS/Atom | `.rss` `.atom` `.xml` | Feed items with dates and content |
55
+ | CSV/TSV | `.csv` `.tsv` | Markdown tables |
56
+ | JSON | `.json` | Pretty-printed code block |
57
+ | YAML | `.yaml` `.yml` | Code block |
58
+ | XML/SVG | `.xml` `.svg` | Code block |
59
+ | Images | `.jpg` `.png` `.gif` `.webp` | EXIF metadata + optional AI description |
60
+ | Audio | `.mp3` `.wav` `.m4a` `.flac` | Metadata + optional AI transcription |
61
+ | ZIP | `.zip` | Recursive. converts each file inside |
62
+ | URLs | `http://` `https://` | Fetches with `Accept: text/markdown` |
63
+ | Wikipedia | `*.wikipedia.org` | Main content extraction |
64
+ | Code | `.py` `.ts` `.go` `.rs` ... | Fenced code block |
65
+ | Plain text | `.txt` `.md` `.rst` `.log` | Pass-through |
66
+
67
+ Need more? [Write a plugin.](#plugins)
68
+
69
+ ---
70
+
71
+ ## AI Features
72
+
73
+ Images and audio get metadata extraction for free. For AI-powered descriptions and transcription, set an API key:
74
+
75
+ ```bash
76
+ # OpenAI (default provider)
77
+ export OPENAI_API_KEY=sk-...
78
+ markit photo.jpg
79
+
80
+ # Anthropic
81
+ markit config set llm.provider anthropic
82
+ export ANTHROPIC_API_KEY=sk-ant-...
83
+ markit photo.jpg
84
+
85
+ # Any OpenAI-compatible API (Ollama, Groq, Together, etc.)
86
+ markit config set llm.apiBase http://localhost:11434/v1
87
+ ```
88
+
89
+ Focus the AI on what matters:
90
+
91
+ ```bash
92
+ markit receipt.jpg -p "List all line items with prices as a table"
93
+ markit diagram.png -p "Describe the architecture and data flow"
94
+ markit whiteboard.jpg -p "Extract all text verbatim"
95
+ ```
96
+
97
+ ---
98
+
99
+ ## Plugins
100
+
101
+ Extend markit with new formats, override builtins, or add LLM providers.
102
+
103
+ ### Install
104
+
105
+ ```bash
106
+ markit plugin install npm:markit-plugin-dwg
107
+ markit plugin install git:github.com/user/markit-plugin-ocr
108
+ markit plugin install ./my-plugin.ts
109
+ markit plugin list
110
+ markit plugin remove dwg
111
+ ```
112
+
113
+ ### Write a Plugin
114
+
115
+ A plugin is a function that receives an API and registers converters and/or providers:
116
+
117
+ ```typescript
118
+ import type { MarkitPluginAPI } from "markit-ai";
119
+
120
+ export default function(api: MarkitPluginAPI) {
121
+ api.setName("cad");
122
+ api.setVersion("1.0.0");
123
+
124
+ // Register a converter for a new format
125
+ api.registerConverter(
126
+ {
127
+ name: "dwg",
128
+ accepts: (info) => [".dwg", ".dxf"].includes(info.extension || ""),
129
+ convert: async (input, info) => {
130
+ // Your conversion logic
131
+ return { markdown: "..." };
132
+ },
133
+ },
134
+ // Optional: declare the format so it shows in `markit formats`
135
+ { name: "AutoCAD", extensions: [".dwg", ".dxf"] },
136
+ );
137
+ }
138
+ ```
139
+
140
+ Plugin converters run **before** builtins. so you can override any format:
141
+
142
+ ```typescript
143
+ export default function(api: MarkitPluginAPI) {
144
+ api.setName("better-pdf");
145
+
146
+ // This replaces the built-in PDF converter
147
+ api.registerConverter({
148
+ name: "pdf",
149
+ accepts: (info) => info.extension === ".pdf",
150
+ convert: async (input, info) => {
151
+ // Your superior PDF extraction
152
+ return { markdown: "..." };
153
+ },
154
+ });
155
+ }
156
+ ```
157
+
158
+ Plugins can also register LLM providers:
159
+
160
+ ```typescript
161
+ api.registerProvider({
162
+ name: "gemini",
163
+ envKeys: ["GOOGLE_API_KEY"],
164
+ defaultBase: "https://generativelanguage.googleapis.com/v1beta",
165
+ defaultModel: "gemini-2.0-flash",
166
+ create: (config, prompt) => ({
167
+ describe: async (image, mime) => { /* ... */ },
168
+ }),
169
+ });
170
+ ```
171
+
172
+ ---
173
+
174
+ ## For Agents
175
+
176
+ Every command supports `--json`. Raw markdown with `-q`.
177
+
178
+ ```bash
179
+ markit report.pdf --json # Structured output for parsing
180
+ markit report.pdf -q # Raw markdown, nothing else
181
+ markit onboard # Add instructions to CLAUDE.md
182
+ ```
183
+
184
+ ---
185
+
186
+ ## SDK
187
+
188
+ markit is also a library:
189
+
190
+ ```typescript
191
+ import { Markit } from "markit-ai";
192
+
193
+ const markit = new Markit();
194
+ const { markdown } = await markit.convertFile("report.pdf");
195
+ const { markdown } = await markit.convertUrl("https://example.com");
196
+ const { markdown } = await markit.convert(buffer, { extension: ".docx" });
197
+ ```
198
+
199
+ With AI features. pass plain functions, use any provider:
200
+
201
+ ```typescript
202
+ import OpenAI from "openai";
203
+ import { Markit } from "markit-ai";
204
+
205
+ const openai = new OpenAI();
206
+
207
+ const markit = new Markit({
208
+ describe: async (image, mime) => {
209
+ const res = await openai.chat.completions.create({
210
+ model: "gpt-4.1-nano",
211
+ messages: [{ role: "user", content: [
212
+ { type: "text", text: "Describe this image." },
213
+ { type: "image_url", image_url: { url: `data:${mime};base64,${image.toString("base64")}` } },
214
+ ]}],
215
+ });
216
+ return res.choices[0].message.content ?? "";
217
+ },
218
+ transcribe: async (audio, mime) => {
219
+ const res = await openai.audio.transcriptions.create({
220
+ model: "gpt-4o-mini-transcribe",
221
+ file: new File([audio], "audio.mp3", { type: mime }),
222
+ });
223
+ return res.text;
224
+ },
225
+ });
226
+ ```
227
+
228
+ Mix providers. Claude for vision, OpenAI for audio, whatever:
229
+
230
+ ```typescript
231
+ const markit = new Markit({
232
+ describe: async (image, mime) => {
233
+ const res = await anthropic.messages.create({
234
+ model: "claude-haiku-4-5",
235
+ messages: [{ role: "user", content: [
236
+ { type: "image", source: { type: "base64", media_type: mime, data: image.toString("base64") } },
237
+ { type: "text", text: "Describe this image." },
238
+ ]}],
239
+ });
240
+ return res.content[0].text;
241
+ },
242
+ transcribe: async (audio, mime) => { /* Whisper, Deepgram, AssemblyAI, ... */ },
243
+ });
244
+ ```
245
+
246
+ Or use the built-in providers. no SDK needed:
247
+
248
+ ```typescript
249
+ import { Markit, createLlmFunctions, loadConfig } from "markit-ai";
250
+
251
+ const config = loadConfig(); // reads .markit/config.json + env vars
252
+ const markit = new Markit(createLlmFunctions(config));
253
+ ```
254
+
255
+ With plugins:
256
+
257
+ ```typescript
258
+ import { Markit, createLlmFunctions, loadConfig, loadAllPlugins } from "markit-ai";
259
+
260
+ const config = loadConfig();
261
+ const plugins = await loadAllPlugins();
262
+ const markit = new Markit(createLlmFunctions(config), plugins);
263
+ ```
264
+
265
+ ---
266
+
267
+ ## Configuration
268
+
269
+ ```bash
270
+ markit init # Create .markit/config.json
271
+ markit config show # Show resolved settings
272
+ markit config get llm.model # Get a value
273
+ markit config set llm.provider anthropic # Switch provider
274
+ markit config set llm.apiKey sk-... # Set a value
275
+ ```
276
+
277
+ `.markit/config.json`:
278
+
279
+ ```json
280
+ {
281
+ "llm": {
282
+ "provider": "openai",
283
+ "apiBase": "https://api.openai.com/v1",
284
+ "apiKey": "sk-...",
285
+ "model": "gpt-4.1-nano",
286
+ "transcriptionModel": "gpt-4o-mini-transcribe"
287
+ }
288
+ }
289
+ ```
290
+
291
+ Env vars override config. Each provider checks its own env vars first:
292
+
293
+ | Provider | Env vars | Default model |
294
+ |----------|---------|---------------|
295
+ | `openai` | `OPENAI_API_KEY`, `MARKIT_API_KEY` | `gpt-4.1-nano` |
296
+ | `anthropic` | `ANTHROPIC_API_KEY`, `MARKIT_API_KEY` | `claude-haiku-4-5` |
297
+
298
+ ---
299
+
300
+ ## CLI Reference
301
+
302
+ ```bash
303
+ markit <source> # Convert file or URL
304
+ markit <source> -o output.md # Write to file
305
+ markit <source> -p "instructions" # Custom AI prompt
306
+ markit <source> --json # JSON output
307
+ markit <source> -q # Raw markdown only
308
+ cat file.pdf | markit - # Read from stdin
309
+ markit formats # List supported formats
310
+ markit init # Create .markit/ config
311
+ markit config show # Show settings
312
+ markit config get <key> # Get config value
313
+ markit config set <key> <value> # Set config value
314
+ markit plugin install <source> # Install plugin
315
+ markit plugin list # List plugins
316
+ markit plugin remove <name> # Remove plugin
317
+ markit onboard # Add to CLAUDE.md
318
+ ```
319
+
320
+ ---
321
+
322
+ ## Development
323
+
324
+ ```bash
325
+ bun install
326
+ bun run dev -- report.pdf
327
+ bun test
328
+ bun run check
329
+ ```
330
+
331
+ ## License
332
+
333
+ MIT
@@ -0,0 +1,4 @@
1
+ import type { OutputOptions } from "../utils/output.js";
2
+ export declare function configShow(_args: string[], options: OutputOptions): Promise<void>;
3
+ export declare function configGet(key: string, options: OutputOptions): Promise<void>;
4
+ export declare function configSet(key: string, value: string | undefined, options: OutputOptions): Promise<void>;
@@ -0,0 +1,133 @@
1
+ import { output, success, error, dim, bold } from "../utils/output.js";
2
+ import { loadConfig, saveConfig, findConfigDir } from "../config.js";
3
+ import { getProvider, listProviders } from "../providers/index.js";
4
+ import { EXIT_ERROR, EXIT_USER_ERROR } from "../utils/exit-codes.js";
5
+ export async function configShow(_args, options) {
6
+ const config = loadConfig();
7
+ const configDir = findConfigDir();
8
+ const providerName = config.llm?.provider || "openai";
9
+ const provider = getProvider(providerName);
10
+ output(options, {
11
+ json: () => ({
12
+ configDir,
13
+ config,
14
+ providers: listProviders(),
15
+ }),
16
+ human: () => {
17
+ console.log();
18
+ console.log(bold("Configuration"));
19
+ console.log();
20
+ if (configDir) {
21
+ console.log(` ${dim("config:")} ${configDir}/config.json`);
22
+ }
23
+ else {
24
+ console.log(` ${dim("config:")} none (run 'markit init')`);
25
+ }
26
+ console.log();
27
+ console.log(bold("LLM Settings"));
28
+ console.log();
29
+ console.log(` ${dim("provider:")} ${providerName}`);
30
+ if (provider) {
31
+ // Resolve API key
32
+ const apiKey = provider.envKeys.reduce((found, key) => found || process.env[key], undefined) || config.llm?.apiKey;
33
+ const keySource = provider.envKeys.find((k) => process.env[k]) || (config.llm?.apiKey ? "config" : undefined);
34
+ console.log(` ${dim("api key:")} ${apiKey ? `***${apiKey.slice(-4)} (${keySource})` : dim("not set")}`);
35
+ console.log(` ${dim("api base:")} ${config.llm?.apiBase || provider.defaultBase}`);
36
+ console.log(` ${dim("model:")} ${config.llm?.model || provider.defaultModel}`);
37
+ if (provider.defaultTranscriptionModel) {
38
+ console.log(` ${dim("transcription:")} ${config.llm?.transcriptionModel || provider.defaultTranscriptionModel}`);
39
+ }
40
+ console.log(` ${dim("env vars:")} ${provider.envKeys.join(", ")}`);
41
+ }
42
+ else {
43
+ console.log(` ${dim("(unknown provider)")}`);
44
+ }
45
+ console.log();
46
+ console.log(dim(` Available providers: ${listProviders().join(", ")}`));
47
+ console.log();
48
+ },
49
+ });
50
+ }
51
+ export async function configGet(key, options) {
52
+ const config = loadConfig();
53
+ const value = getNestedValue(config, key);
54
+ if (value === undefined) {
55
+ output(options, {
56
+ json: () => ({ key, value: null }),
57
+ human: () => error(`Key '${key}' not found`),
58
+ });
59
+ process.exit(EXIT_USER_ERROR);
60
+ }
61
+ output(options, {
62
+ json: () => ({ key, value }),
63
+ quiet: () => console.log(String(value)),
64
+ human: () => console.log(String(value)),
65
+ });
66
+ }
67
+ export async function configSet(key, value, options) {
68
+ if (!findConfigDir()) {
69
+ output(options, {
70
+ json: () => ({ success: false, error: "No .markit/ directory. Run 'markit init'" }),
71
+ human: () => error("No .markit/ directory. Run 'markit init' first."),
72
+ });
73
+ process.exit(EXIT_ERROR);
74
+ }
75
+ // Secrets: read from stdin instead of args (avoids shell history)
76
+ const isSecret = key.toLowerCase().includes("key") || key.toLowerCase().includes("secret") || key.toLowerCase().includes("token");
77
+ let resolved;
78
+ if (isSecret && !value) {
79
+ // Prompt from stdin
80
+ if (process.stdin.isTTY) {
81
+ process.stderr.write(`Enter value for ${key}: `);
82
+ }
83
+ const chunks = [];
84
+ for await (const chunk of process.stdin) {
85
+ chunks.push(chunk);
86
+ }
87
+ resolved = Buffer.concat(chunks).toString("utf-8").trim();
88
+ if (!resolved) {
89
+ error("No value provided");
90
+ process.exit(EXIT_USER_ERROR);
91
+ }
92
+ }
93
+ else if (isSecret && value) {
94
+ // Warn if secret passed as arg
95
+ console.error(dim(" hint: secrets in args leak to shell history. Use: markit config set llm.apiKey < keyfile"));
96
+ resolved = value;
97
+ }
98
+ else if (value === undefined) {
99
+ error("Missing value. Usage: markit config set <key> <value>");
100
+ process.exit(EXIT_USER_ERROR);
101
+ }
102
+ else {
103
+ resolved = value;
104
+ }
105
+ const config = loadConfig();
106
+ let parsed = resolved;
107
+ if (resolved === "true")
108
+ parsed = true;
109
+ else if (resolved === "false")
110
+ parsed = false;
111
+ else if (/^\d+$/.test(resolved))
112
+ parsed = parseInt(resolved);
113
+ setNestedValue(config, key, parsed);
114
+ saveConfig(config);
115
+ output(options, {
116
+ json: () => ({ success: true, key, value: parsed }),
117
+ human: () => success(`${key} = ${JSON.stringify(parsed)}`),
118
+ });
119
+ }
120
+ function getNestedValue(obj, path) {
121
+ return path.split(".").reduce((o, k) => o?.[k], obj);
122
+ }
123
+ function setNestedValue(obj, path, value) {
124
+ const keys = path.split(".");
125
+ let current = obj;
126
+ for (let i = 0; i < keys.length - 1; i++) {
127
+ if (!current[keys[i]] || typeof current[keys[i]] !== "object") {
128
+ current[keys[i]] = {};
129
+ }
130
+ current = current[keys[i]];
131
+ }
132
+ current[keys[keys.length - 1]] = value;
133
+ }
@@ -0,0 +1,5 @@
1
+ import type { OutputOptions } from "../utils/output.js";
2
+ export declare function convert(source: string, options: OutputOptions & {
3
+ output?: string;
4
+ prompt?: string;
5
+ }): Promise<void>;
@@ -0,0 +1,110 @@
1
+ import { writeFileSync } from "node:fs";
2
+ import { Markit } from "../markit.js";
3
+ import { loadConfig } from "../config.js";
4
+ import { createLlmFunctions } from "../providers/index.js";
5
+ import { loadAllPlugins } from "../plugins/loader.js";
6
+ import { registerProvider } from "../providers/index.js";
7
+ import { output, success, error, dim } from "../utils/output.js";
8
+ import { EXIT_ERROR, EXIT_UNSUPPORTED } from "../utils/exit-codes.js";
9
+ async function readStdin() {
10
+ const chunks = [];
11
+ for await (const chunk of process.stdin) {
12
+ chunks.push(chunk);
13
+ }
14
+ return Buffer.concat(chunks);
15
+ }
16
+ export async function convert(source, options) {
17
+ const config = loadConfig();
18
+ const plugins = await loadAllPlugins();
19
+ // Register any providers from plugins
20
+ for (const plugin of plugins) {
21
+ for (const provider of plugin.providers) {
22
+ registerProvider(provider);
23
+ }
24
+ }
25
+ const llmFunctions = createLlmFunctions(config, options.prompt);
26
+ const markit = new Markit(llmFunctions, plugins);
27
+ try {
28
+ let result;
29
+ const isStdin = source === "-";
30
+ const isUrl = source.startsWith("http:") ||
31
+ source.startsWith("https:") ||
32
+ source.startsWith("file:");
33
+ if (isStdin) {
34
+ // Check if stdin is a TTY (no piped input)
35
+ if (process.stdin.isTTY) {
36
+ error("No input on stdin. Pipe a file: cat report.pdf | markit -");
37
+ process.exit(EXIT_ERROR);
38
+ }
39
+ const buffer = await readStdin();
40
+ result = await markit.convert(buffer, {});
41
+ }
42
+ else if (isUrl) {
43
+ // Progress hint for URL fetches (stderr so it doesn't pollute piped output)
44
+ if (!options.json && !options.quiet) {
45
+ process.stderr.write(`ℹ Fetching ${source}...\n`);
46
+ }
47
+ result = await markit.convertUrl(source);
48
+ }
49
+ else {
50
+ result = await markit.convertFile(source);
51
+ }
52
+ const label = isStdin ? "stdin" : source;
53
+ // Write to file or stdout
54
+ if (options.output) {
55
+ writeFileSync(options.output, result.markdown);
56
+ output(options, {
57
+ json: () => ({
58
+ success: true,
59
+ source: label,
60
+ output: options.output,
61
+ title: result.title,
62
+ length: result.markdown.length,
63
+ }),
64
+ human: () => {
65
+ success(`Converted → ${options.output}`);
66
+ if (result.title)
67
+ console.log(dim(` title: ${result.title}`));
68
+ console.log(dim(` ${result.markdown.length} chars`));
69
+ },
70
+ });
71
+ }
72
+ else {
73
+ output(options, {
74
+ json: () => ({
75
+ success: true,
76
+ source: label,
77
+ title: result.title,
78
+ markdown: result.markdown,
79
+ }),
80
+ quiet: () => process.stdout.write(result.markdown),
81
+ human: () => process.stdout.write(result.markdown),
82
+ });
83
+ }
84
+ }
85
+ catch (err) {
86
+ const msg = err instanceof Error ? err.message : String(err);
87
+ if (msg.includes("Unsupported format")) {
88
+ output(options, {
89
+ json: () => ({ success: false, error: msg }),
90
+ human: () => {
91
+ error(msg);
92
+ console.log(dim(" Run 'markit formats' to see supported formats."));
93
+ },
94
+ });
95
+ process.exit(EXIT_UNSUPPORTED);
96
+ }
97
+ if (msg.includes("ENOENT") || msg.includes("no such file")) {
98
+ output(options, {
99
+ json: () => ({ success: false, error: `File not found: ${source}` }),
100
+ human: () => error(`File not found: ${source}`),
101
+ });
102
+ process.exit(EXIT_ERROR);
103
+ }
104
+ output(options, {
105
+ json: () => ({ success: false, error: msg }),
106
+ human: () => error(msg),
107
+ });
108
+ process.exit(EXIT_ERROR);
109
+ }
110
+ }
@@ -0,0 +1,2 @@
1
+ import type { OutputOptions } from "../utils/output.js";
2
+ export declare function formats(_args: string[], options: OutputOptions): Promise<void>;
@@ -0,0 +1,56 @@
1
+ import { output, bold, dim } from "../utils/output.js";
2
+ import { loadAllPlugins } from "../plugins/loader.js";
3
+ const BUILTIN_FORMATS = [
4
+ { name: "PDF", extensions: [".pdf"], builtin: true },
5
+ { name: "Word", extensions: [".docx"], builtin: true },
6
+ { name: "PowerPoint", extensions: [".pptx"], builtin: true },
7
+ { name: "Excel", extensions: [".xlsx"], builtin: true },
8
+ { name: "HTML", extensions: [".html", ".htm"], builtin: true },
9
+ { name: "EPUB", extensions: [".epub"], builtin: true },
10
+ { name: "Jupyter", extensions: [".ipynb"], builtin: true },
11
+ { name: "RSS/Atom", extensions: [".rss", ".atom", ".xml"], builtin: true },
12
+ { name: "CSV", extensions: [".csv", ".tsv"], builtin: true },
13
+ { name: "JSON", extensions: [".json"], builtin: true },
14
+ { name: "YAML", extensions: [".yaml", ".yml"], builtin: true },
15
+ { name: "XML", extensions: [".xml", ".svg"], builtin: true },
16
+ { name: "Images", extensions: [".jpg", ".png", ".gif", ".webp"], builtin: true },
17
+ { name: "Audio", extensions: [".mp3", ".wav", ".m4a", ".flac"], builtin: true },
18
+ { name: "ZIP", extensions: [".zip"], builtin: true },
19
+ { name: "Plain text", extensions: [".txt", ".md", ".rst", ".log"], builtin: true },
20
+ { name: "Code", extensions: [".py", ".js", ".ts", ".go", ".rs", "..."], builtin: true },
21
+ { name: "URLs", extensions: ["http://", "https://"], builtin: true },
22
+ { name: "Wikipedia", extensions: ["*.wikipedia.org"], builtin: true },
23
+ ];
24
+ export async function formats(_args, options) {
25
+ const plugins = await loadAllPlugins();
26
+ const pluginFormats = plugins.flatMap((p) => p.formats.map((f) => ({
27
+ name: f.name,
28
+ extensions: f.extensions,
29
+ builtin: false,
30
+ plugin: p.name,
31
+ })));
32
+ const allFormats = [...BUILTIN_FORMATS, ...pluginFormats];
33
+ output(options, {
34
+ json: () => ({ formats: allFormats }),
35
+ human: () => {
36
+ console.log();
37
+ console.log(bold("Supported formats"));
38
+ console.log();
39
+ for (const fmt of BUILTIN_FORMATS) {
40
+ const exts = fmt.extensions.join(", ");
41
+ const note = fmt.dep ? dim(` (requires: npm i ${fmt.dep})`) : "";
42
+ console.log(` ${fmt.name.padEnd(14)} ${dim(exts)}${note}`);
43
+ }
44
+ if (pluginFormats.length > 0) {
45
+ console.log();
46
+ console.log(bold("Plugin formats"));
47
+ console.log();
48
+ for (const fmt of pluginFormats) {
49
+ const exts = fmt.extensions.join(", ");
50
+ console.log(` ${fmt.name.padEnd(14)} ${dim(exts)} ${dim(`(${fmt.plugin})`)}`);
51
+ }
52
+ }
53
+ console.log();
54
+ },
55
+ });
56
+ }
@@ -0,0 +1,2 @@
1
+ import type { OutputOptions } from "../utils/output.js";
2
+ export declare function init(_args: string[], options: OutputOptions): Promise<void>;