markit-ai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +333 -0
- package/dist/commands/config.d.ts +4 -0
- package/dist/commands/config.js +133 -0
- package/dist/commands/convert.d.ts +5 -0
- package/dist/commands/convert.js +110 -0
- package/dist/commands/formats.d.ts +2 -0
- package/dist/commands/formats.js +56 -0
- package/dist/commands/init.d.ts +2 -0
- package/dist/commands/init.js +29 -0
- package/dist/commands/onboard.d.ts +2 -0
- package/dist/commands/onboard.js +61 -0
- package/dist/commands/plugin.d.ts +4 -0
- package/dist/commands/plugin.js +58 -0
- package/dist/config.d.ts +26 -0
- package/dist/config.js +42 -0
- package/dist/converters/audio.d.ts +7 -0
- package/dist/converters/audio.js +87 -0
- package/dist/converters/csv.d.ts +7 -0
- package/dist/converters/csv.js +83 -0
- package/dist/converters/docx.d.ts +6 -0
- package/dist/converters/docx.js +28 -0
- package/dist/converters/epub.d.ts +8 -0
- package/dist/converters/epub.js +110 -0
- package/dist/converters/html.d.ts +6 -0
- package/dist/converters/html.js +33 -0
- package/dist/converters/image.d.ts +6 -0
- package/dist/converters/image.js +94 -0
- package/dist/converters/ipynb.d.ts +6 -0
- package/dist/converters/ipynb.js +72 -0
- package/dist/converters/json.d.ts +6 -0
- package/dist/converters/json.js +21 -0
- package/dist/converters/pdf.d.ts +6 -0
- package/dist/converters/pdf.js +29 -0
- package/dist/converters/plain-text.d.ts +6 -0
- package/dist/converters/plain-text.js +41 -0
- package/dist/converters/pptx.d.ts +8 -0
- package/dist/converters/pptx.js +189 -0
- package/dist/converters/rss.d.ts +11 -0
- package/dist/converters/rss.js +134 -0
- package/dist/converters/wikipedia.d.ts +6 -0
- package/dist/converters/wikipedia.js +35 -0
- package/dist/converters/xlsx.d.ts +8 -0
- package/dist/converters/xlsx.js +139 -0
- package/dist/converters/xml.d.ts +6 -0
- package/dist/converters/xml.js +17 -0
- package/dist/converters/yaml.d.ts +6 -0
- package/dist/converters/yaml.js +16 -0
- package/dist/converters/zip.d.ts +8 -0
- package/dist/converters/zip.js +56 -0
- package/dist/index.d.ts +28 -0
- package/dist/index.js +24 -0
- package/dist/llm.d.ts +10 -0
- package/dist/llm.js +139 -0
- package/dist/main.d.ts +2 -0
- package/dist/main.js +182 -0
- package/dist/markit.d.ts +19 -0
- package/dist/markit.js +124 -0
- package/dist/mill.d.ts +18 -0
- package/dist/mill.js +123 -0
- package/dist/plugins/api.d.ts +7 -0
- package/dist/plugins/api.js +44 -0
- package/dist/plugins/index.d.ts +4 -0
- package/dist/plugins/index.js +3 -0
- package/dist/plugins/installer.d.ts +25 -0
- package/dist/plugins/installer.js +176 -0
- package/dist/plugins/loader.d.ts +6 -0
- package/dist/plugins/loader.js +61 -0
- package/dist/plugins/types.d.ts +25 -0
- package/dist/plugins/types.js +1 -0
- package/dist/providers/anthropic.d.ts +2 -0
- package/dist/providers/anthropic.js +47 -0
- package/dist/providers/index.d.ts +21 -0
- package/dist/providers/index.js +58 -0
- package/dist/providers/openai.d.ts +2 -0
- package/dist/providers/openai.js +65 -0
- package/dist/providers/types.d.ts +26 -0
- package/dist/providers/types.js +1 -0
- package/dist/types.d.ts +28 -0
- package/dist/types.js +1 -0
- package/dist/utils/exit-codes.d.ts +4 -0
- package/dist/utils/exit-codes.js +4 -0
- package/dist/utils/output.d.ts +22 -0
- package/dist/utils/output.js +31 -0
- package/package.json +70 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Michael Liv
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
# markit
|
|
2
|
+
|
|
3
|
+
Convert anything to markdown. PDF, DOCX, PPTX, XLSX, HTML, EPUB, Jupyter, RSS, images, audio, URLs, and more. Pluggable converters, built-in LLM providers for image description and audio transcription. Works as a CLI and as a library.
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
npm install -g markit-ai
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
# Documents
|
|
15
|
+
markit report.pdf
|
|
16
|
+
markit document.docx
|
|
17
|
+
markit slides.pptx
|
|
18
|
+
|
|
19
|
+
# Data
|
|
20
|
+
markit data.csv
|
|
21
|
+
markit config.json
|
|
22
|
+
markit schema.yaml
|
|
23
|
+
|
|
24
|
+
# Web
|
|
25
|
+
markit https://example.com/article
|
|
26
|
+
markit https://en.wikipedia.org/wiki/Markdown
|
|
27
|
+
|
|
28
|
+
# Media (via LLMs. set OPENAI_API_KEY or ANTHROPIC_API_KEY)
|
|
29
|
+
markit photo.jpg # EXIF metadata + AI description
|
|
30
|
+
markit recording.mp3 # Audio metadata + transcription
|
|
31
|
+
markit photo.jpg -p "Extract all text" # Custom instructions
|
|
32
|
+
|
|
33
|
+
# Write to file
|
|
34
|
+
markit report.pdf -o report.md
|
|
35
|
+
|
|
36
|
+
# Pipe it
|
|
37
|
+
markit report.pdf | pbcopy
|
|
38
|
+
markit data.xlsx -q | napkin create "Imported Data"
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
---
|
|
42
|
+
|
|
43
|
+
## Supported Formats
|
|
44
|
+
|
|
45
|
+
| Format | Extensions | How |
|
|
46
|
+
|--------|-----------|-----|
|
|
47
|
+
| PDF | `.pdf` | Text extraction via unpdf |
|
|
48
|
+
| Word | `.docx` | mammoth → turndown, preserves headings/tables |
|
|
49
|
+
| PowerPoint | `.pptx` | XML parsing, slides + notes + tables |
|
|
50
|
+
| Excel | `.xlsx` | Each sheet → markdown table |
|
|
51
|
+
| HTML | `.html` `.htm` | turndown, scripts/styles stripped |
|
|
52
|
+
| EPUB | `.epub` | Spine-ordered chapters, metadata header |
|
|
53
|
+
| Jupyter | `.ipynb` | Markdown cells + code + outputs |
|
|
54
|
+
| RSS/Atom | `.rss` `.atom` `.xml` | Feed items with dates and content |
|
|
55
|
+
| CSV/TSV | `.csv` `.tsv` | Markdown tables |
|
|
56
|
+
| JSON | `.json` | Pretty-printed code block |
|
|
57
|
+
| YAML | `.yaml` `.yml` | Code block |
|
|
58
|
+
| XML/SVG | `.xml` `.svg` | Code block |
|
|
59
|
+
| Images | `.jpg` `.png` `.gif` `.webp` | EXIF metadata + optional AI description |
|
|
60
|
+
| Audio | `.mp3` `.wav` `.m4a` `.flac` | Metadata + optional AI transcription |
|
|
61
|
+
| ZIP | `.zip` | Recursive. converts each file inside |
|
|
62
|
+
| URLs | `http://` `https://` | Fetches with `Accept: text/markdown` |
|
|
63
|
+
| Wikipedia | `*.wikipedia.org` | Main content extraction |
|
|
64
|
+
| Code | `.py` `.ts` `.go` `.rs` ... | Fenced code block |
|
|
65
|
+
| Plain text | `.txt` `.md` `.rst` `.log` | Pass-through |
|
|
66
|
+
|
|
67
|
+
Need more? [Write a plugin.](#plugins)
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## AI Features
|
|
72
|
+
|
|
73
|
+
Images and audio get metadata extraction for free. For AI-powered descriptions and transcription, set an API key:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# OpenAI (default provider)
|
|
77
|
+
export OPENAI_API_KEY=sk-...
|
|
78
|
+
markit photo.jpg
|
|
79
|
+
|
|
80
|
+
# Anthropic
|
|
81
|
+
markit config set llm.provider anthropic
|
|
82
|
+
export ANTHROPIC_API_KEY=sk-ant-...
|
|
83
|
+
markit photo.jpg
|
|
84
|
+
|
|
85
|
+
# Any OpenAI-compatible API (Ollama, Groq, Together, etc.)
|
|
86
|
+
markit config set llm.apiBase http://localhost:11434/v1
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Focus the AI on what matters:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
markit receipt.jpg -p "List all line items with prices as a table"
|
|
93
|
+
markit diagram.png -p "Describe the architecture and data flow"
|
|
94
|
+
markit whiteboard.jpg -p "Extract all text verbatim"
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
---
|
|
98
|
+
|
|
99
|
+
## Plugins
|
|
100
|
+
|
|
101
|
+
Extend markit with new formats, override builtins, or add LLM providers.
|
|
102
|
+
|
|
103
|
+
### Install
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
markit plugin install npm:markit-plugin-dwg
|
|
107
|
+
markit plugin install git:github.com/user/markit-plugin-ocr
|
|
108
|
+
markit plugin install ./my-plugin.ts
|
|
109
|
+
markit plugin list
|
|
110
|
+
markit plugin remove dwg
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Write a Plugin
|
|
114
|
+
|
|
115
|
+
A plugin is a function that receives an API and registers converters and/or providers:
|
|
116
|
+
|
|
117
|
+
```typescript
|
|
118
|
+
import type { MarkitPluginAPI } from "markit-ai";
|
|
119
|
+
|
|
120
|
+
export default function(api: MarkitPluginAPI) {
|
|
121
|
+
api.setName("cad");
|
|
122
|
+
api.setVersion("1.0.0");
|
|
123
|
+
|
|
124
|
+
// Register a converter for a new format
|
|
125
|
+
api.registerConverter(
|
|
126
|
+
{
|
|
127
|
+
name: "dwg",
|
|
128
|
+
accepts: (info) => [".dwg", ".dxf"].includes(info.extension || ""),
|
|
129
|
+
convert: async (input, info) => {
|
|
130
|
+
// Your conversion logic
|
|
131
|
+
return { markdown: "..." };
|
|
132
|
+
},
|
|
133
|
+
},
|
|
134
|
+
// Optional: declare the format so it shows in `markit formats`
|
|
135
|
+
{ name: "AutoCAD", extensions: [".dwg", ".dxf"] },
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Plugin converters run **before** builtins. so you can override any format:
|
|
141
|
+
|
|
142
|
+
```typescript
|
|
143
|
+
export default function(api: MarkitPluginAPI) {
|
|
144
|
+
api.setName("better-pdf");
|
|
145
|
+
|
|
146
|
+
// This replaces the built-in PDF converter
|
|
147
|
+
api.registerConverter({
|
|
148
|
+
name: "pdf",
|
|
149
|
+
accepts: (info) => info.extension === ".pdf",
|
|
150
|
+
convert: async (input, info) => {
|
|
151
|
+
// Your superior PDF extraction
|
|
152
|
+
return { markdown: "..." };
|
|
153
|
+
},
|
|
154
|
+
});
|
|
155
|
+
}
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
Plugins can also register LLM providers:
|
|
159
|
+
|
|
160
|
+
```typescript
|
|
161
|
+
api.registerProvider({
|
|
162
|
+
name: "gemini",
|
|
163
|
+
envKeys: ["GOOGLE_API_KEY"],
|
|
164
|
+
defaultBase: "https://generativelanguage.googleapis.com/v1beta",
|
|
165
|
+
defaultModel: "gemini-2.0-flash",
|
|
166
|
+
create: (config, prompt) => ({
|
|
167
|
+
describe: async (image, mime) => { /* ... */ },
|
|
168
|
+
}),
|
|
169
|
+
});
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## For Agents
|
|
175
|
+
|
|
176
|
+
Every command supports `--json`. Raw markdown with `-q`.
|
|
177
|
+
|
|
178
|
+
```bash
|
|
179
|
+
markit report.pdf --json # Structured output for parsing
|
|
180
|
+
markit report.pdf -q # Raw markdown, nothing else
|
|
181
|
+
markit onboard # Add instructions to CLAUDE.md
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
---
|
|
185
|
+
|
|
186
|
+
## SDK
|
|
187
|
+
|
|
188
|
+
markit is also a library:
|
|
189
|
+
|
|
190
|
+
```typescript
|
|
191
|
+
import { Markit } from "markit-ai";
|
|
192
|
+
|
|
193
|
+
const markit = new Markit();
|
|
194
|
+
const { markdown } = await markit.convertFile("report.pdf");
|
|
195
|
+
const { markdown } = await markit.convertUrl("https://example.com");
|
|
196
|
+
const { markdown } = await markit.convert(buffer, { extension: ".docx" });
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
With AI features. pass plain functions, use any provider:
|
|
200
|
+
|
|
201
|
+
```typescript
|
|
202
|
+
import OpenAI from "openai";
|
|
203
|
+
import { Markit } from "markit-ai";
|
|
204
|
+
|
|
205
|
+
const openai = new OpenAI();
|
|
206
|
+
|
|
207
|
+
const markit = new Markit({
|
|
208
|
+
describe: async (image, mime) => {
|
|
209
|
+
const res = await openai.chat.completions.create({
|
|
210
|
+
model: "gpt-4.1-nano",
|
|
211
|
+
messages: [{ role: "user", content: [
|
|
212
|
+
{ type: "text", text: "Describe this image." },
|
|
213
|
+
{ type: "image_url", image_url: { url: `data:${mime};base64,${image.toString("base64")}` } },
|
|
214
|
+
]}],
|
|
215
|
+
});
|
|
216
|
+
return res.choices[0].message.content ?? "";
|
|
217
|
+
},
|
|
218
|
+
transcribe: async (audio, mime) => {
|
|
219
|
+
const res = await openai.audio.transcriptions.create({
|
|
220
|
+
model: "gpt-4o-mini-transcribe",
|
|
221
|
+
file: new File([audio], "audio.mp3", { type: mime }),
|
|
222
|
+
});
|
|
223
|
+
return res.text;
|
|
224
|
+
},
|
|
225
|
+
});
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
Mix providers. Claude for vision, OpenAI for audio, whatever:
|
|
229
|
+
|
|
230
|
+
```typescript
|
|
231
|
+
const markit = new Markit({
|
|
232
|
+
describe: async (image, mime) => {
|
|
233
|
+
const res = await anthropic.messages.create({
|
|
234
|
+
model: "claude-haiku-4-5",
|
|
235
|
+
messages: [{ role: "user", content: [
|
|
236
|
+
{ type: "image", source: { type: "base64", media_type: mime, data: image.toString("base64") } },
|
|
237
|
+
{ type: "text", text: "Describe this image." },
|
|
238
|
+
]}],
|
|
239
|
+
});
|
|
240
|
+
return res.content[0].text;
|
|
241
|
+
},
|
|
242
|
+
transcribe: async (audio, mime) => { /* Whisper, Deepgram, AssemblyAI, ... */ },
|
|
243
|
+
});
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
Or use the built-in providers. no SDK needed:
|
|
247
|
+
|
|
248
|
+
```typescript
|
|
249
|
+
import { Markit, createLlmFunctions, loadConfig } from "markit-ai";
|
|
250
|
+
|
|
251
|
+
const config = loadConfig(); // reads .markit/config.json + env vars
|
|
252
|
+
const markit = new Markit(createLlmFunctions(config));
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
With plugins:
|
|
256
|
+
|
|
257
|
+
```typescript
|
|
258
|
+
import { Markit, createLlmFunctions, loadConfig, loadAllPlugins } from "markit-ai";
|
|
259
|
+
|
|
260
|
+
const config = loadConfig();
|
|
261
|
+
const plugins = await loadAllPlugins();
|
|
262
|
+
const markit = new Markit(createLlmFunctions(config), plugins);
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
---
|
|
266
|
+
|
|
267
|
+
## Configuration
|
|
268
|
+
|
|
269
|
+
```bash
|
|
270
|
+
markit init # Create .markit/config.json
|
|
271
|
+
markit config show # Show resolved settings
|
|
272
|
+
markit config get llm.model # Get a value
|
|
273
|
+
markit config set llm.provider anthropic # Switch provider
|
|
274
|
+
markit config set llm.apiKey sk-... # Set a value
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
`.markit/config.json`:
|
|
278
|
+
|
|
279
|
+
```json
|
|
280
|
+
{
|
|
281
|
+
"llm": {
|
|
282
|
+
"provider": "openai",
|
|
283
|
+
"apiBase": "https://api.openai.com/v1",
|
|
284
|
+
"apiKey": "sk-...",
|
|
285
|
+
"model": "gpt-4.1-nano",
|
|
286
|
+
"transcriptionModel": "gpt-4o-mini-transcribe"
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
Env vars override config. Each provider checks its own env vars first:
|
|
292
|
+
|
|
293
|
+
| Provider | Env vars | Default model |
|
|
294
|
+
|----------|---------|---------------|
|
|
295
|
+
| `openai` | `OPENAI_API_KEY`, `MARKIT_API_KEY` | `gpt-4.1-nano` |
|
|
296
|
+
| `anthropic` | `ANTHROPIC_API_KEY`, `MARKIT_API_KEY` | `claude-haiku-4-5` |
|
|
297
|
+
|
|
298
|
+
---
|
|
299
|
+
|
|
300
|
+
## CLI Reference
|
|
301
|
+
|
|
302
|
+
```bash
|
|
303
|
+
markit <source> # Convert file or URL
|
|
304
|
+
markit <source> -o output.md # Write to file
|
|
305
|
+
markit <source> -p "instructions" # Custom AI prompt
|
|
306
|
+
markit <source> --json # JSON output
|
|
307
|
+
markit <source> -q # Raw markdown only
|
|
308
|
+
cat file.pdf | markit - # Read from stdin
|
|
309
|
+
markit formats # List supported formats
|
|
310
|
+
markit init # Create .markit/ config
|
|
311
|
+
markit config show # Show settings
|
|
312
|
+
markit config get <key> # Get config value
|
|
313
|
+
markit config set <key> <value> # Set config value
|
|
314
|
+
markit plugin install <source> # Install plugin
|
|
315
|
+
markit plugin list # List plugins
|
|
316
|
+
markit plugin remove <name> # Remove plugin
|
|
317
|
+
markit onboard # Add to CLAUDE.md
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
---
|
|
321
|
+
|
|
322
|
+
## Development
|
|
323
|
+
|
|
324
|
+
```bash
|
|
325
|
+
bun install
|
|
326
|
+
bun run dev -- report.pdf
|
|
327
|
+
bun test
|
|
328
|
+
bun run check
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
## License
|
|
332
|
+
|
|
333
|
+
MIT
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { OutputOptions } from "../utils/output.js";
|
|
2
|
+
export declare function configShow(_args: string[], options: OutputOptions): Promise<void>;
|
|
3
|
+
export declare function configGet(key: string, options: OutputOptions): Promise<void>;
|
|
4
|
+
export declare function configSet(key: string, value: string | undefined, options: OutputOptions): Promise<void>;
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
import { output, success, error, dim, bold } from "../utils/output.js";
|
|
2
|
+
import { loadConfig, saveConfig, findConfigDir } from "../config.js";
|
|
3
|
+
import { getProvider, listProviders } from "../providers/index.js";
|
|
4
|
+
import { EXIT_ERROR, EXIT_USER_ERROR } from "../utils/exit-codes.js";
|
|
5
|
+
export async function configShow(_args, options) {
|
|
6
|
+
const config = loadConfig();
|
|
7
|
+
const configDir = findConfigDir();
|
|
8
|
+
const providerName = config.llm?.provider || "openai";
|
|
9
|
+
const provider = getProvider(providerName);
|
|
10
|
+
output(options, {
|
|
11
|
+
json: () => ({
|
|
12
|
+
configDir,
|
|
13
|
+
config,
|
|
14
|
+
providers: listProviders(),
|
|
15
|
+
}),
|
|
16
|
+
human: () => {
|
|
17
|
+
console.log();
|
|
18
|
+
console.log(bold("Configuration"));
|
|
19
|
+
console.log();
|
|
20
|
+
if (configDir) {
|
|
21
|
+
console.log(` ${dim("config:")} ${configDir}/config.json`);
|
|
22
|
+
}
|
|
23
|
+
else {
|
|
24
|
+
console.log(` ${dim("config:")} none (run 'markit init')`);
|
|
25
|
+
}
|
|
26
|
+
console.log();
|
|
27
|
+
console.log(bold("LLM Settings"));
|
|
28
|
+
console.log();
|
|
29
|
+
console.log(` ${dim("provider:")} ${providerName}`);
|
|
30
|
+
if (provider) {
|
|
31
|
+
// Resolve API key
|
|
32
|
+
const apiKey = provider.envKeys.reduce((found, key) => found || process.env[key], undefined) || config.llm?.apiKey;
|
|
33
|
+
const keySource = provider.envKeys.find((k) => process.env[k]) || (config.llm?.apiKey ? "config" : undefined);
|
|
34
|
+
console.log(` ${dim("api key:")} ${apiKey ? `***${apiKey.slice(-4)} (${keySource})` : dim("not set")}`);
|
|
35
|
+
console.log(` ${dim("api base:")} ${config.llm?.apiBase || provider.defaultBase}`);
|
|
36
|
+
console.log(` ${dim("model:")} ${config.llm?.model || provider.defaultModel}`);
|
|
37
|
+
if (provider.defaultTranscriptionModel) {
|
|
38
|
+
console.log(` ${dim("transcription:")} ${config.llm?.transcriptionModel || provider.defaultTranscriptionModel}`);
|
|
39
|
+
}
|
|
40
|
+
console.log(` ${dim("env vars:")} ${provider.envKeys.join(", ")}`);
|
|
41
|
+
}
|
|
42
|
+
else {
|
|
43
|
+
console.log(` ${dim("(unknown provider)")}`);
|
|
44
|
+
}
|
|
45
|
+
console.log();
|
|
46
|
+
console.log(dim(` Available providers: ${listProviders().join(", ")}`));
|
|
47
|
+
console.log();
|
|
48
|
+
},
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
export async function configGet(key, options) {
|
|
52
|
+
const config = loadConfig();
|
|
53
|
+
const value = getNestedValue(config, key);
|
|
54
|
+
if (value === undefined) {
|
|
55
|
+
output(options, {
|
|
56
|
+
json: () => ({ key, value: null }),
|
|
57
|
+
human: () => error(`Key '${key}' not found`),
|
|
58
|
+
});
|
|
59
|
+
process.exit(EXIT_USER_ERROR);
|
|
60
|
+
}
|
|
61
|
+
output(options, {
|
|
62
|
+
json: () => ({ key, value }),
|
|
63
|
+
quiet: () => console.log(String(value)),
|
|
64
|
+
human: () => console.log(String(value)),
|
|
65
|
+
});
|
|
66
|
+
}
|
|
67
|
+
export async function configSet(key, value, options) {
|
|
68
|
+
if (!findConfigDir()) {
|
|
69
|
+
output(options, {
|
|
70
|
+
json: () => ({ success: false, error: "No .markit/ directory. Run 'markit init'" }),
|
|
71
|
+
human: () => error("No .markit/ directory. Run 'markit init' first."),
|
|
72
|
+
});
|
|
73
|
+
process.exit(EXIT_ERROR);
|
|
74
|
+
}
|
|
75
|
+
// Secrets: read from stdin instead of args (avoids shell history)
|
|
76
|
+
const isSecret = key.toLowerCase().includes("key") || key.toLowerCase().includes("secret") || key.toLowerCase().includes("token");
|
|
77
|
+
let resolved;
|
|
78
|
+
if (isSecret && !value) {
|
|
79
|
+
// Prompt from stdin
|
|
80
|
+
if (process.stdin.isTTY) {
|
|
81
|
+
process.stderr.write(`Enter value for ${key}: `);
|
|
82
|
+
}
|
|
83
|
+
const chunks = [];
|
|
84
|
+
for await (const chunk of process.stdin) {
|
|
85
|
+
chunks.push(chunk);
|
|
86
|
+
}
|
|
87
|
+
resolved = Buffer.concat(chunks).toString("utf-8").trim();
|
|
88
|
+
if (!resolved) {
|
|
89
|
+
error("No value provided");
|
|
90
|
+
process.exit(EXIT_USER_ERROR);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
else if (isSecret && value) {
|
|
94
|
+
// Warn if secret passed as arg
|
|
95
|
+
console.error(dim(" hint: secrets in args leak to shell history. Use: markit config set llm.apiKey < keyfile"));
|
|
96
|
+
resolved = value;
|
|
97
|
+
}
|
|
98
|
+
else if (value === undefined) {
|
|
99
|
+
error("Missing value. Usage: markit config set <key> <value>");
|
|
100
|
+
process.exit(EXIT_USER_ERROR);
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
resolved = value;
|
|
104
|
+
}
|
|
105
|
+
const config = loadConfig();
|
|
106
|
+
let parsed = resolved;
|
|
107
|
+
if (resolved === "true")
|
|
108
|
+
parsed = true;
|
|
109
|
+
else if (resolved === "false")
|
|
110
|
+
parsed = false;
|
|
111
|
+
else if (/^\d+$/.test(resolved))
|
|
112
|
+
parsed = parseInt(resolved);
|
|
113
|
+
setNestedValue(config, key, parsed);
|
|
114
|
+
saveConfig(config);
|
|
115
|
+
output(options, {
|
|
116
|
+
json: () => ({ success: true, key, value: parsed }),
|
|
117
|
+
human: () => success(`${key} = ${JSON.stringify(parsed)}`),
|
|
118
|
+
});
|
|
119
|
+
}
|
|
120
|
+
function getNestedValue(obj, path) {
|
|
121
|
+
return path.split(".").reduce((o, k) => o?.[k], obj);
|
|
122
|
+
}
|
|
123
|
+
function setNestedValue(obj, path, value) {
|
|
124
|
+
const keys = path.split(".");
|
|
125
|
+
let current = obj;
|
|
126
|
+
for (let i = 0; i < keys.length - 1; i++) {
|
|
127
|
+
if (!current[keys[i]] || typeof current[keys[i]] !== "object") {
|
|
128
|
+
current[keys[i]] = {};
|
|
129
|
+
}
|
|
130
|
+
current = current[keys[i]];
|
|
131
|
+
}
|
|
132
|
+
current[keys[keys.length - 1]] = value;
|
|
133
|
+
}
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import { writeFileSync } from "node:fs";
|
|
2
|
+
import { Markit } from "../markit.js";
|
|
3
|
+
import { loadConfig } from "../config.js";
|
|
4
|
+
import { createLlmFunctions } from "../providers/index.js";
|
|
5
|
+
import { loadAllPlugins } from "../plugins/loader.js";
|
|
6
|
+
import { registerProvider } from "../providers/index.js";
|
|
7
|
+
import { output, success, error, dim } from "../utils/output.js";
|
|
8
|
+
import { EXIT_ERROR, EXIT_UNSUPPORTED } from "../utils/exit-codes.js";
|
|
9
|
+
async function readStdin() {
|
|
10
|
+
const chunks = [];
|
|
11
|
+
for await (const chunk of process.stdin) {
|
|
12
|
+
chunks.push(chunk);
|
|
13
|
+
}
|
|
14
|
+
return Buffer.concat(chunks);
|
|
15
|
+
}
|
|
16
|
+
export async function convert(source, options) {
|
|
17
|
+
const config = loadConfig();
|
|
18
|
+
const plugins = await loadAllPlugins();
|
|
19
|
+
// Register any providers from plugins
|
|
20
|
+
for (const plugin of plugins) {
|
|
21
|
+
for (const provider of plugin.providers) {
|
|
22
|
+
registerProvider(provider);
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
const llmFunctions = createLlmFunctions(config, options.prompt);
|
|
26
|
+
const markit = new Markit(llmFunctions, plugins);
|
|
27
|
+
try {
|
|
28
|
+
let result;
|
|
29
|
+
const isStdin = source === "-";
|
|
30
|
+
const isUrl = source.startsWith("http:") ||
|
|
31
|
+
source.startsWith("https:") ||
|
|
32
|
+
source.startsWith("file:");
|
|
33
|
+
if (isStdin) {
|
|
34
|
+
// Check if stdin is a TTY (no piped input)
|
|
35
|
+
if (process.stdin.isTTY) {
|
|
36
|
+
error("No input on stdin. Pipe a file: cat report.pdf | markit -");
|
|
37
|
+
process.exit(EXIT_ERROR);
|
|
38
|
+
}
|
|
39
|
+
const buffer = await readStdin();
|
|
40
|
+
result = await markit.convert(buffer, {});
|
|
41
|
+
}
|
|
42
|
+
else if (isUrl) {
|
|
43
|
+
// Progress hint for URL fetches (stderr so it doesn't pollute piped output)
|
|
44
|
+
if (!options.json && !options.quiet) {
|
|
45
|
+
process.stderr.write(`ℹ Fetching ${source}...\n`);
|
|
46
|
+
}
|
|
47
|
+
result = await markit.convertUrl(source);
|
|
48
|
+
}
|
|
49
|
+
else {
|
|
50
|
+
result = await markit.convertFile(source);
|
|
51
|
+
}
|
|
52
|
+
const label = isStdin ? "stdin" : source;
|
|
53
|
+
// Write to file or stdout
|
|
54
|
+
if (options.output) {
|
|
55
|
+
writeFileSync(options.output, result.markdown);
|
|
56
|
+
output(options, {
|
|
57
|
+
json: () => ({
|
|
58
|
+
success: true,
|
|
59
|
+
source: label,
|
|
60
|
+
output: options.output,
|
|
61
|
+
title: result.title,
|
|
62
|
+
length: result.markdown.length,
|
|
63
|
+
}),
|
|
64
|
+
human: () => {
|
|
65
|
+
success(`Converted → ${options.output}`);
|
|
66
|
+
if (result.title)
|
|
67
|
+
console.log(dim(` title: ${result.title}`));
|
|
68
|
+
console.log(dim(` ${result.markdown.length} chars`));
|
|
69
|
+
},
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
else {
|
|
73
|
+
output(options, {
|
|
74
|
+
json: () => ({
|
|
75
|
+
success: true,
|
|
76
|
+
source: label,
|
|
77
|
+
title: result.title,
|
|
78
|
+
markdown: result.markdown,
|
|
79
|
+
}),
|
|
80
|
+
quiet: () => process.stdout.write(result.markdown),
|
|
81
|
+
human: () => process.stdout.write(result.markdown),
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
catch (err) {
|
|
86
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
87
|
+
if (msg.includes("Unsupported format")) {
|
|
88
|
+
output(options, {
|
|
89
|
+
json: () => ({ success: false, error: msg }),
|
|
90
|
+
human: () => {
|
|
91
|
+
error(msg);
|
|
92
|
+
console.log(dim(" Run 'markit formats' to see supported formats."));
|
|
93
|
+
},
|
|
94
|
+
});
|
|
95
|
+
process.exit(EXIT_UNSUPPORTED);
|
|
96
|
+
}
|
|
97
|
+
if (msg.includes("ENOENT") || msg.includes("no such file")) {
|
|
98
|
+
output(options, {
|
|
99
|
+
json: () => ({ success: false, error: `File not found: ${source}` }),
|
|
100
|
+
human: () => error(`File not found: ${source}`),
|
|
101
|
+
});
|
|
102
|
+
process.exit(EXIT_ERROR);
|
|
103
|
+
}
|
|
104
|
+
output(options, {
|
|
105
|
+
json: () => ({ success: false, error: msg }),
|
|
106
|
+
human: () => error(msg),
|
|
107
|
+
});
|
|
108
|
+
process.exit(EXIT_ERROR);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import { output, bold, dim } from "../utils/output.js";
|
|
2
|
+
import { loadAllPlugins } from "../plugins/loader.js";
|
|
3
|
+
const BUILTIN_FORMATS = [
|
|
4
|
+
{ name: "PDF", extensions: [".pdf"], builtin: true },
|
|
5
|
+
{ name: "Word", extensions: [".docx"], builtin: true },
|
|
6
|
+
{ name: "PowerPoint", extensions: [".pptx"], builtin: true },
|
|
7
|
+
{ name: "Excel", extensions: [".xlsx"], builtin: true },
|
|
8
|
+
{ name: "HTML", extensions: [".html", ".htm"], builtin: true },
|
|
9
|
+
{ name: "EPUB", extensions: [".epub"], builtin: true },
|
|
10
|
+
{ name: "Jupyter", extensions: [".ipynb"], builtin: true },
|
|
11
|
+
{ name: "RSS/Atom", extensions: [".rss", ".atom", ".xml"], builtin: true },
|
|
12
|
+
{ name: "CSV", extensions: [".csv", ".tsv"], builtin: true },
|
|
13
|
+
{ name: "JSON", extensions: [".json"], builtin: true },
|
|
14
|
+
{ name: "YAML", extensions: [".yaml", ".yml"], builtin: true },
|
|
15
|
+
{ name: "XML", extensions: [".xml", ".svg"], builtin: true },
|
|
16
|
+
{ name: "Images", extensions: [".jpg", ".png", ".gif", ".webp"], builtin: true },
|
|
17
|
+
{ name: "Audio", extensions: [".mp3", ".wav", ".m4a", ".flac"], builtin: true },
|
|
18
|
+
{ name: "ZIP", extensions: [".zip"], builtin: true },
|
|
19
|
+
{ name: "Plain text", extensions: [".txt", ".md", ".rst", ".log"], builtin: true },
|
|
20
|
+
{ name: "Code", extensions: [".py", ".js", ".ts", ".go", ".rs", "..."], builtin: true },
|
|
21
|
+
{ name: "URLs", extensions: ["http://", "https://"], builtin: true },
|
|
22
|
+
{ name: "Wikipedia", extensions: ["*.wikipedia.org"], builtin: true },
|
|
23
|
+
];
|
|
24
|
+
export async function formats(_args, options) {
|
|
25
|
+
const plugins = await loadAllPlugins();
|
|
26
|
+
const pluginFormats = plugins.flatMap((p) => p.formats.map((f) => ({
|
|
27
|
+
name: f.name,
|
|
28
|
+
extensions: f.extensions,
|
|
29
|
+
builtin: false,
|
|
30
|
+
plugin: p.name,
|
|
31
|
+
})));
|
|
32
|
+
const allFormats = [...BUILTIN_FORMATS, ...pluginFormats];
|
|
33
|
+
output(options, {
|
|
34
|
+
json: () => ({ formats: allFormats }),
|
|
35
|
+
human: () => {
|
|
36
|
+
console.log();
|
|
37
|
+
console.log(bold("Supported formats"));
|
|
38
|
+
console.log();
|
|
39
|
+
for (const fmt of BUILTIN_FORMATS) {
|
|
40
|
+
const exts = fmt.extensions.join(", ");
|
|
41
|
+
const note = fmt.dep ? dim(` (requires: npm i ${fmt.dep})`) : "";
|
|
42
|
+
console.log(` ${fmt.name.padEnd(14)} ${dim(exts)}${note}`);
|
|
43
|
+
}
|
|
44
|
+
if (pluginFormats.length > 0) {
|
|
45
|
+
console.log();
|
|
46
|
+
console.log(bold("Plugin formats"));
|
|
47
|
+
console.log();
|
|
48
|
+
for (const fmt of pluginFormats) {
|
|
49
|
+
const exts = fmt.extensions.join(", ");
|
|
50
|
+
console.log(` ${fmt.name.padEnd(14)} ${dim(exts)} ${dim(`(${fmt.plugin})`)}`);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
console.log();
|
|
54
|
+
},
|
|
55
|
+
});
|
|
56
|
+
}
|