markit-ai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +333 -0
- package/dist/commands/config.d.ts +4 -0
- package/dist/commands/config.js +133 -0
- package/dist/commands/convert.d.ts +5 -0
- package/dist/commands/convert.js +110 -0
- package/dist/commands/formats.d.ts +2 -0
- package/dist/commands/formats.js +56 -0
- package/dist/commands/init.d.ts +2 -0
- package/dist/commands/init.js +29 -0
- package/dist/commands/onboard.d.ts +2 -0
- package/dist/commands/onboard.js +61 -0
- package/dist/commands/plugin.d.ts +4 -0
- package/dist/commands/plugin.js +58 -0
- package/dist/config.d.ts +26 -0
- package/dist/config.js +42 -0
- package/dist/converters/audio.d.ts +7 -0
- package/dist/converters/audio.js +87 -0
- package/dist/converters/csv.d.ts +7 -0
- package/dist/converters/csv.js +83 -0
- package/dist/converters/docx.d.ts +6 -0
- package/dist/converters/docx.js +28 -0
- package/dist/converters/epub.d.ts +8 -0
- package/dist/converters/epub.js +110 -0
- package/dist/converters/html.d.ts +6 -0
- package/dist/converters/html.js +33 -0
- package/dist/converters/image.d.ts +6 -0
- package/dist/converters/image.js +94 -0
- package/dist/converters/ipynb.d.ts +6 -0
- package/dist/converters/ipynb.js +72 -0
- package/dist/converters/json.d.ts +6 -0
- package/dist/converters/json.js +21 -0
- package/dist/converters/pdf.d.ts +6 -0
- package/dist/converters/pdf.js +29 -0
- package/dist/converters/plain-text.d.ts +6 -0
- package/dist/converters/plain-text.js +41 -0
- package/dist/converters/pptx.d.ts +8 -0
- package/dist/converters/pptx.js +189 -0
- package/dist/converters/rss.d.ts +11 -0
- package/dist/converters/rss.js +134 -0
- package/dist/converters/wikipedia.d.ts +6 -0
- package/dist/converters/wikipedia.js +35 -0
- package/dist/converters/xlsx.d.ts +8 -0
- package/dist/converters/xlsx.js +139 -0
- package/dist/converters/xml.d.ts +6 -0
- package/dist/converters/xml.js +17 -0
- package/dist/converters/yaml.d.ts +6 -0
- package/dist/converters/yaml.js +16 -0
- package/dist/converters/zip.d.ts +8 -0
- package/dist/converters/zip.js +56 -0
- package/dist/index.d.ts +28 -0
- package/dist/index.js +24 -0
- package/dist/llm.d.ts +10 -0
- package/dist/llm.js +139 -0
- package/dist/main.d.ts +2 -0
- package/dist/main.js +182 -0
- package/dist/markit.d.ts +19 -0
- package/dist/markit.js +124 -0
- package/dist/mill.d.ts +18 -0
- package/dist/mill.js +123 -0
- package/dist/plugins/api.d.ts +7 -0
- package/dist/plugins/api.js +44 -0
- package/dist/plugins/index.d.ts +4 -0
- package/dist/plugins/index.js +3 -0
- package/dist/plugins/installer.d.ts +25 -0
- package/dist/plugins/installer.js +176 -0
- package/dist/plugins/loader.d.ts +6 -0
- package/dist/plugins/loader.js +61 -0
- package/dist/plugins/types.d.ts +25 -0
- package/dist/plugins/types.js +1 -0
- package/dist/providers/anthropic.d.ts +2 -0
- package/dist/providers/anthropic.js +47 -0
- package/dist/providers/index.d.ts +21 -0
- package/dist/providers/index.js +58 -0
- package/dist/providers/openai.d.ts +2 -0
- package/dist/providers/openai.js +65 -0
- package/dist/providers/types.d.ts +26 -0
- package/dist/providers/types.js +1 -0
- package/dist/types.d.ts +28 -0
- package/dist/types.js +1 -0
- package/dist/utils/exit-codes.d.ts +4 -0
- package/dist/utils/exit-codes.js +4 -0
- package/dist/utils/output.d.ts +22 -0
- package/dist/utils/output.js +31 -0
- package/package.json +70 -0
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
import { execSync } from "node:child_process";
|
|
2
|
+
import { existsSync, mkdirSync, readFileSync, rmSync, writeFileSync, } from "node:fs";
|
|
3
|
+
import { basename, join, resolve } from "node:path";
|
|
4
|
+
import { findConfigDir } from "../config.js";
|
|
5
|
+
const PLUGINS_FILE = "plugins.json";
|
|
6
|
+
export function parsePluginSource(source) {
|
|
7
|
+
// npm:package@version
|
|
8
|
+
if (source.startsWith("npm:")) {
|
|
9
|
+
const rest = source.slice(4);
|
|
10
|
+
let name;
|
|
11
|
+
let ref;
|
|
12
|
+
if (rest.startsWith("@")) {
|
|
13
|
+
const lastAt = rest.lastIndexOf("@");
|
|
14
|
+
if (lastAt > 0 && lastAt !== rest.indexOf("@")) {
|
|
15
|
+
name = rest.slice(0, lastAt);
|
|
16
|
+
ref = rest.slice(lastAt + 1);
|
|
17
|
+
}
|
|
18
|
+
else {
|
|
19
|
+
name = rest;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
else {
|
|
23
|
+
const atIdx = rest.indexOf("@");
|
|
24
|
+
if (atIdx > 0) {
|
|
25
|
+
name = rest.slice(0, atIdx);
|
|
26
|
+
ref = rest.slice(atIdx + 1);
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
name = rest;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return { type: "npm", name, ref };
|
|
33
|
+
}
|
|
34
|
+
// git:url or https://...
|
|
35
|
+
if (source.startsWith("git:") ||
|
|
36
|
+
source.startsWith("https://") ||
|
|
37
|
+
source.startsWith("http://") ||
|
|
38
|
+
source.startsWith("ssh://")) {
|
|
39
|
+
let raw = source;
|
|
40
|
+
if (raw.startsWith("git:"))
|
|
41
|
+
raw = raw.slice(4);
|
|
42
|
+
let subpath;
|
|
43
|
+
const hashIdx = raw.indexOf("#");
|
|
44
|
+
if (hashIdx > 0) {
|
|
45
|
+
subpath = raw.slice(hashIdx + 1);
|
|
46
|
+
raw = raw.slice(0, hashIdx);
|
|
47
|
+
}
|
|
48
|
+
let ref;
|
|
49
|
+
const atIdx = raw.lastIndexOf("@");
|
|
50
|
+
if (atIdx > 0 && !raw.slice(atIdx).includes("/")) {
|
|
51
|
+
ref = raw.slice(atIdx + 1);
|
|
52
|
+
raw = raw.slice(0, atIdx);
|
|
53
|
+
}
|
|
54
|
+
let url = raw;
|
|
55
|
+
if (!url.startsWith("http://") &&
|
|
56
|
+
!url.startsWith("https://") &&
|
|
57
|
+
!url.startsWith("ssh://")) {
|
|
58
|
+
url = `https://${url}`;
|
|
59
|
+
}
|
|
60
|
+
if (!url.endsWith(".git"))
|
|
61
|
+
url += ".git";
|
|
62
|
+
const name = subpath ? basename(subpath) : basename(url, ".git");
|
|
63
|
+
return { type: "git", name, url, ref, subpath };
|
|
64
|
+
}
|
|
65
|
+
// Local path
|
|
66
|
+
const absPath = resolve(source);
|
|
67
|
+
const name = basename(absPath).replace(/\.(ts|js)$/, "");
|
|
68
|
+
return { type: "local", name, path: absPath };
|
|
69
|
+
}
|
|
70
|
+
function getPluginsDir() {
|
|
71
|
+
const configDir = findConfigDir();
|
|
72
|
+
const dir = configDir
|
|
73
|
+
? join(configDir, "plugins")
|
|
74
|
+
: join(process.cwd(), ".markit", "plugins");
|
|
75
|
+
mkdirSync(dir, { recursive: true });
|
|
76
|
+
return dir;
|
|
77
|
+
}
|
|
78
|
+
function getPluginsJsonPath() {
|
|
79
|
+
const configDir = findConfigDir();
|
|
80
|
+
return configDir
|
|
81
|
+
? join(configDir, PLUGINS_FILE)
|
|
82
|
+
: join(process.cwd(), ".markit", PLUGINS_FILE);
|
|
83
|
+
}
|
|
84
|
+
function readPluginsJson() {
|
|
85
|
+
const path = getPluginsJsonPath();
|
|
86
|
+
if (!existsSync(path))
|
|
87
|
+
return { plugins: [] };
|
|
88
|
+
return JSON.parse(readFileSync(path, "utf-8"));
|
|
89
|
+
}
|
|
90
|
+
function writePluginsJson(data) {
|
|
91
|
+
const path = getPluginsJsonPath();
|
|
92
|
+
mkdirSync(join(path, ".."), { recursive: true });
|
|
93
|
+
writeFileSync(path, `${JSON.stringify(data, null, 2)}\n`);
|
|
94
|
+
}
|
|
95
|
+
export async function installPlugin(source) {
|
|
96
|
+
const parsed = parsePluginSource(source);
|
|
97
|
+
const pluginsDir = getPluginsDir();
|
|
98
|
+
let installPath;
|
|
99
|
+
switch (parsed.type) {
|
|
100
|
+
case "npm": {
|
|
101
|
+
const npmDir = join(pluginsDir, "npm");
|
|
102
|
+
mkdirSync(npmDir, { recursive: true });
|
|
103
|
+
const spec = parsed.ref ? `${parsed.name}@${parsed.ref}` : parsed.name;
|
|
104
|
+
execSync(`npm install ${spec}`, { cwd: npmDir, stdio: "pipe" });
|
|
105
|
+
installPath = join(npmDir, "node_modules", parsed.name);
|
|
106
|
+
break;
|
|
107
|
+
}
|
|
108
|
+
case "git": {
|
|
109
|
+
const url = new URL(parsed.url);
|
|
110
|
+
const gitDir = join(pluginsDir, "git", url.hostname, url.pathname.replace(/\.git$/, ""));
|
|
111
|
+
if (existsSync(gitDir)) {
|
|
112
|
+
execSync("git pull", { cwd: gitDir, stdio: "pipe" });
|
|
113
|
+
}
|
|
114
|
+
else {
|
|
115
|
+
mkdirSync(join(gitDir, ".."), { recursive: true });
|
|
116
|
+
const refArg = parsed.ref ? `--branch ${parsed.ref}` : "";
|
|
117
|
+
execSync(`git clone ${refArg} ${parsed.url} ${gitDir}`, {
|
|
118
|
+
stdio: "pipe",
|
|
119
|
+
});
|
|
120
|
+
}
|
|
121
|
+
if (existsSync(join(gitDir, "package.json"))) {
|
|
122
|
+
execSync("npm install", { cwd: gitDir, stdio: "pipe" });
|
|
123
|
+
}
|
|
124
|
+
installPath = parsed.subpath ? join(gitDir, parsed.subpath) : gitDir;
|
|
125
|
+
break;
|
|
126
|
+
}
|
|
127
|
+
case "local": {
|
|
128
|
+
if (!existsSync(parsed.path)) {
|
|
129
|
+
throw new Error(`Path does not exist: ${parsed.path}`);
|
|
130
|
+
}
|
|
131
|
+
installPath = parsed.path;
|
|
132
|
+
break;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
const data = readPluginsJson();
|
|
136
|
+
const existing = data.plugins.findIndex((p) => p.source === source);
|
|
137
|
+
const entry = {
|
|
138
|
+
source,
|
|
139
|
+
path: installPath,
|
|
140
|
+
name: parsed.name,
|
|
141
|
+
};
|
|
142
|
+
if (existing >= 0) {
|
|
143
|
+
data.plugins[existing] = entry;
|
|
144
|
+
}
|
|
145
|
+
else {
|
|
146
|
+
data.plugins.push(entry);
|
|
147
|
+
}
|
|
148
|
+
writePluginsJson(data);
|
|
149
|
+
return { path: installPath, name: parsed.name };
|
|
150
|
+
}
|
|
151
|
+
export function removePlugin(name) {
|
|
152
|
+
const data = readPluginsJson();
|
|
153
|
+
const idx = data.plugins.findIndex((p) => p.name === name || p.source.includes(name));
|
|
154
|
+
if (idx < 0)
|
|
155
|
+
return false;
|
|
156
|
+
const plugin = data.plugins[idx];
|
|
157
|
+
const parsed = parsePluginSource(plugin.source);
|
|
158
|
+
if (parsed.type !== "local" && existsSync(plugin.path)) {
|
|
159
|
+
rmSync(plugin.path, { recursive: true, force: true });
|
|
160
|
+
}
|
|
161
|
+
data.plugins.splice(idx, 1);
|
|
162
|
+
writePluginsJson(data);
|
|
163
|
+
return true;
|
|
164
|
+
}
|
|
165
|
+
export function listInstalled() {
|
|
166
|
+
const data = readPluginsJson();
|
|
167
|
+
return data.plugins.map((p) => {
|
|
168
|
+
const parsed = parsePluginSource(p.source);
|
|
169
|
+
return {
|
|
170
|
+
name: p.name || parsed.name,
|
|
171
|
+
type: parsed.type,
|
|
172
|
+
source: p.source,
|
|
173
|
+
path: p.path,
|
|
174
|
+
};
|
|
175
|
+
});
|
|
176
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { existsSync, readFileSync, statSync } from "node:fs";
|
|
2
|
+
import { join, resolve } from "node:path";
|
|
3
|
+
import { pathToFileURL } from "node:url";
|
|
4
|
+
import { findConfigDir } from "../config.js";
|
|
5
|
+
import { resolvePluginExport } from "./api.js";
|
|
6
|
+
export async function loadPluginFromPath(path) {
|
|
7
|
+
let absPath = resolve(path);
|
|
8
|
+
// Directory → find entry point
|
|
9
|
+
if (existsSync(absPath) && statSync(absPath).isDirectory()) {
|
|
10
|
+
const candidates = [
|
|
11
|
+
join(absPath, "src", "index.ts"),
|
|
12
|
+
join(absPath, "src", "index.js"),
|
|
13
|
+
join(absPath, "index.ts"),
|
|
14
|
+
join(absPath, "index.js"),
|
|
15
|
+
];
|
|
16
|
+
const pkgPath = join(absPath, "package.json");
|
|
17
|
+
if (existsSync(pkgPath)) {
|
|
18
|
+
try {
|
|
19
|
+
const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
|
|
20
|
+
if (pkg.main)
|
|
21
|
+
candidates.unshift(join(absPath, pkg.main));
|
|
22
|
+
}
|
|
23
|
+
catch { }
|
|
24
|
+
}
|
|
25
|
+
const found = candidates.find((c) => existsSync(c));
|
|
26
|
+
if (found) {
|
|
27
|
+
absPath = found;
|
|
28
|
+
}
|
|
29
|
+
else {
|
|
30
|
+
throw new Error(`No entry point found in ${absPath}`);
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
const mod = await import(pathToFileURL(absPath).href);
|
|
34
|
+
const pluginId = absPath.replace(/.*\//, "").replace(/\.(ts|js)$/, "");
|
|
35
|
+
return resolvePluginExport(mod.default, pluginId);
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Load all plugins from .markit/plugins.json
|
|
39
|
+
*/
|
|
40
|
+
export async function loadAllPlugins() {
|
|
41
|
+
const plugins = [];
|
|
42
|
+
const configDir = findConfigDir();
|
|
43
|
+
if (!configDir)
|
|
44
|
+
return plugins;
|
|
45
|
+
const pluginsFile = join(configDir, "plugins.json");
|
|
46
|
+
if (!existsSync(pluginsFile))
|
|
47
|
+
return plugins;
|
|
48
|
+
try {
|
|
49
|
+
const data = JSON.parse(readFileSync(pluginsFile, "utf-8"));
|
|
50
|
+
const entries = data.plugins ?? [];
|
|
51
|
+
for (const entry of entries) {
|
|
52
|
+
const p = typeof entry === "string" ? entry : entry.path;
|
|
53
|
+
try {
|
|
54
|
+
plugins.push(await loadPluginFromPath(p));
|
|
55
|
+
}
|
|
56
|
+
catch { }
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
catch { }
|
|
60
|
+
return plugins;
|
|
61
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import type { Converter } from "../types.js";
|
|
2
|
+
import type { Provider } from "../providers/types.js";
|
|
3
|
+
export interface FormatDef {
|
|
4
|
+
name: string;
|
|
5
|
+
extensions: string[];
|
|
6
|
+
}
|
|
7
|
+
export interface MarkitPluginAPI {
|
|
8
|
+
setName(name: string): void;
|
|
9
|
+
setVersion(version: string): void;
|
|
10
|
+
registerConverter(converter: Converter, format?: FormatDef): void;
|
|
11
|
+
registerProvider(provider: Provider): void;
|
|
12
|
+
}
|
|
13
|
+
export type PluginFunction = (api: MarkitPluginAPI) => void;
|
|
14
|
+
export interface PluginDef {
|
|
15
|
+
name: string;
|
|
16
|
+
version: string;
|
|
17
|
+
converters: Converter[];
|
|
18
|
+
providers: Provider[];
|
|
19
|
+
formats: FormatDef[];
|
|
20
|
+
}
|
|
21
|
+
export interface InstalledPlugin {
|
|
22
|
+
source: string;
|
|
23
|
+
path: string;
|
|
24
|
+
name?: string;
|
|
25
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
export const anthropic = {
|
|
2
|
+
name: "anthropic",
|
|
3
|
+
envKeys: ["ANTHROPIC_API_KEY", "MARKIT_API_KEY"],
|
|
4
|
+
defaultBase: "https://api.anthropic.com",
|
|
5
|
+
defaultModel: "claude-haiku-4-5",
|
|
6
|
+
create(config, prompt) {
|
|
7
|
+
return {
|
|
8
|
+
describe: async (image, mimetype) => {
|
|
9
|
+
const res = await fetch(`${config.apiBase}/v1/messages`, {
|
|
10
|
+
method: "POST",
|
|
11
|
+
headers: {
|
|
12
|
+
"Content-Type": "application/json",
|
|
13
|
+
"x-api-key": config.apiKey,
|
|
14
|
+
"anthropic-version": "2023-06-01",
|
|
15
|
+
},
|
|
16
|
+
body: JSON.stringify({
|
|
17
|
+
model: config.model,
|
|
18
|
+
max_tokens: 1024,
|
|
19
|
+
messages: [
|
|
20
|
+
{
|
|
21
|
+
role: "user",
|
|
22
|
+
content: [
|
|
23
|
+
{
|
|
24
|
+
type: "image",
|
|
25
|
+
source: {
|
|
26
|
+
type: "base64",
|
|
27
|
+
media_type: mimetype,
|
|
28
|
+
data: image.toString("base64"),
|
|
29
|
+
},
|
|
30
|
+
},
|
|
31
|
+
{ type: "text", text: prompt },
|
|
32
|
+
],
|
|
33
|
+
},
|
|
34
|
+
],
|
|
35
|
+
}),
|
|
36
|
+
});
|
|
37
|
+
if (!res.ok) {
|
|
38
|
+
const body = await res.text();
|
|
39
|
+
throw new Error(`Anthropic API error ${res.status}: ${body}`);
|
|
40
|
+
}
|
|
41
|
+
const data = await res.json();
|
|
42
|
+
return data.content?.[0]?.text ?? "";
|
|
43
|
+
},
|
|
44
|
+
// Anthropic doesn't have a transcription API
|
|
45
|
+
};
|
|
46
|
+
},
|
|
47
|
+
};
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { Provider } from "./types.js";
|
|
2
|
+
import type { MarkitOptions } from "../types.js";
|
|
3
|
+
import type { MarkitConfig } from "../config.js";
|
|
4
|
+
export type { Provider, ProviderConfig, ResolvedConfig } from "./types.js";
|
|
5
|
+
/**
|
|
6
|
+
* Register a custom provider.
|
|
7
|
+
*/
|
|
8
|
+
export declare function registerProvider(provider: Provider): void;
|
|
9
|
+
/**
|
|
10
|
+
* Get a provider by name.
|
|
11
|
+
*/
|
|
12
|
+
export declare function getProvider(name: string): Provider | undefined;
|
|
13
|
+
/**
|
|
14
|
+
* List all registered provider names.
|
|
15
|
+
*/
|
|
16
|
+
export declare function listProviders(): string[];
|
|
17
|
+
/**
|
|
18
|
+
* Build describe/transcribe functions from config.
|
|
19
|
+
* Resolves provider, API key, model, and base URL automatically.
|
|
20
|
+
*/
|
|
21
|
+
export declare function createLlmFunctions(config: MarkitConfig, prompt?: string): MarkitOptions;
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { openai } from "./openai.js";
|
|
2
|
+
import { anthropic } from "./anthropic.js";
|
|
3
|
+
const providers = {
|
|
4
|
+
openai,
|
|
5
|
+
anthropic,
|
|
6
|
+
};
|
|
7
|
+
/**
|
|
8
|
+
* Register a custom provider.
|
|
9
|
+
*/
|
|
10
|
+
export function registerProvider(provider) {
|
|
11
|
+
providers[provider.name] = provider;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Get a provider by name.
|
|
15
|
+
*/
|
|
16
|
+
export function getProvider(name) {
|
|
17
|
+
return providers[name];
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* List all registered provider names.
|
|
21
|
+
*/
|
|
22
|
+
export function listProviders() {
|
|
23
|
+
return Object.keys(providers);
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Resolve config + env vars into a ResolvedConfig for a provider.
|
|
27
|
+
*/
|
|
28
|
+
function resolve(provider, config) {
|
|
29
|
+
// API key: env vars (in provider priority order) > config file
|
|
30
|
+
const apiKey = provider.envKeys.reduce((found, key) => found || process.env[key], undefined) || config.llm?.apiKey;
|
|
31
|
+
if (!apiKey)
|
|
32
|
+
return null;
|
|
33
|
+
return {
|
|
34
|
+
apiKey,
|
|
35
|
+
apiBase: (config.llm?.apiBase || provider.defaultBase).replace(/\/+$/, ""),
|
|
36
|
+
model: process.env.MARKIT_MODEL || config.llm?.model || provider.defaultModel,
|
|
37
|
+
transcriptionModel: config.llm?.transcriptionModel || provider.defaultTranscriptionModel,
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
const BASE_PROMPT = "Describe this image in detail.";
|
|
41
|
+
/**
|
|
42
|
+
* Build describe/transcribe functions from config.
|
|
43
|
+
* Resolves provider, API key, model, and base URL automatically.
|
|
44
|
+
*/
|
|
45
|
+
export function createLlmFunctions(config, prompt) {
|
|
46
|
+
const providerName = config.llm?.provider || "openai";
|
|
47
|
+
const provider = providers[providerName];
|
|
48
|
+
if (!provider) {
|
|
49
|
+
throw new Error(`Unknown provider '${providerName}'. Available: ${Object.keys(providers).join(", ")}`);
|
|
50
|
+
}
|
|
51
|
+
const resolved = resolve(provider, config);
|
|
52
|
+
if (!resolved)
|
|
53
|
+
return {};
|
|
54
|
+
const fullPrompt = prompt
|
|
55
|
+
? `${BASE_PROMPT}\n\n${prompt}`
|
|
56
|
+
: BASE_PROMPT;
|
|
57
|
+
return provider.create(resolved, fullPrompt);
|
|
58
|
+
}
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
export const openai = {
|
|
2
|
+
name: "openai",
|
|
3
|
+
envKeys: ["OPENAI_API_KEY", "MARKIT_API_KEY"],
|
|
4
|
+
defaultBase: "https://api.openai.com/v1",
|
|
5
|
+
defaultModel: "gpt-4.1-nano",
|
|
6
|
+
defaultTranscriptionModel: "gpt-4o-mini-transcribe",
|
|
7
|
+
create(config, prompt) {
|
|
8
|
+
return {
|
|
9
|
+
describe: async (image, mimetype) => {
|
|
10
|
+
const res = await fetch(`${config.apiBase}/chat/completions`, {
|
|
11
|
+
method: "POST",
|
|
12
|
+
headers: {
|
|
13
|
+
"Content-Type": "application/json",
|
|
14
|
+
Authorization: `Bearer ${config.apiKey}`,
|
|
15
|
+
},
|
|
16
|
+
body: JSON.stringify({
|
|
17
|
+
model: config.model,
|
|
18
|
+
messages: [
|
|
19
|
+
{
|
|
20
|
+
role: "user",
|
|
21
|
+
content: [
|
|
22
|
+
{ type: "text", text: prompt },
|
|
23
|
+
{ type: "image_url", image_url: { url: `data:${mimetype};base64,${image.toString("base64")}` } },
|
|
24
|
+
],
|
|
25
|
+
},
|
|
26
|
+
],
|
|
27
|
+
max_tokens: 1024,
|
|
28
|
+
}),
|
|
29
|
+
});
|
|
30
|
+
if (!res.ok) {
|
|
31
|
+
const body = await res.text();
|
|
32
|
+
throw new Error(`OpenAI API error ${res.status}: ${body}`);
|
|
33
|
+
}
|
|
34
|
+
const data = await res.json();
|
|
35
|
+
return data.choices?.[0]?.message?.content ?? "";
|
|
36
|
+
},
|
|
37
|
+
transcribe: async (audio, mimetype) => {
|
|
38
|
+
const ext = mimeToExt(mimetype);
|
|
39
|
+
const file = new File([audio], `audio${ext}`, { type: mimetype });
|
|
40
|
+
const formData = new FormData();
|
|
41
|
+
formData.append("model", config.transcriptionModel || "gpt-4o-mini-transcribe");
|
|
42
|
+
formData.append("file", file);
|
|
43
|
+
const res = await fetch(`${config.apiBase}/audio/transcriptions`, {
|
|
44
|
+
method: "POST",
|
|
45
|
+
headers: { Authorization: `Bearer ${config.apiKey}` },
|
|
46
|
+
body: formData,
|
|
47
|
+
});
|
|
48
|
+
if (!res.ok) {
|
|
49
|
+
const body = await res.text();
|
|
50
|
+
throw new Error(`Transcription API error ${res.status}: ${body}`);
|
|
51
|
+
}
|
|
52
|
+
const data = await res.json();
|
|
53
|
+
return data.text ?? "";
|
|
54
|
+
},
|
|
55
|
+
};
|
|
56
|
+
},
|
|
57
|
+
};
|
|
58
|
+
function mimeToExt(mime) {
|
|
59
|
+
const map = {
|
|
60
|
+
"audio/mpeg": ".mp3", "audio/wav": ".wav", "audio/mp4": ".m4a",
|
|
61
|
+
"video/mp4": ".mp4", "audio/ogg": ".ogg", "audio/flac": ".flac",
|
|
62
|
+
"audio/aac": ".aac",
|
|
63
|
+
};
|
|
64
|
+
return map[mime] || ".mp3";
|
|
65
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { MarkitOptions } from "../types.js";
|
|
2
|
+
export interface ProviderConfig {
|
|
3
|
+
apiKey?: string;
|
|
4
|
+
apiBase?: string;
|
|
5
|
+
model?: string;
|
|
6
|
+
transcriptionModel?: string;
|
|
7
|
+
}
|
|
8
|
+
export interface Provider {
|
|
9
|
+
name: string;
|
|
10
|
+
/** Env vars to check for API key, in priority order */
|
|
11
|
+
envKeys: string[];
|
|
12
|
+
/** Default API base URL */
|
|
13
|
+
defaultBase: string;
|
|
14
|
+
/** Default model for image description */
|
|
15
|
+
defaultModel: string;
|
|
16
|
+
/** Default model for audio transcription (if supported) */
|
|
17
|
+
defaultTranscriptionModel?: string;
|
|
18
|
+
/** Build describe/transcribe functions from resolved config */
|
|
19
|
+
create(config: ResolvedConfig, prompt: string): MarkitOptions;
|
|
20
|
+
}
|
|
21
|
+
export interface ResolvedConfig {
|
|
22
|
+
apiKey: string;
|
|
23
|
+
apiBase: string;
|
|
24
|
+
model: string;
|
|
25
|
+
transcriptionModel?: string;
|
|
26
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export interface StreamInfo {
|
|
2
|
+
mimetype?: string;
|
|
3
|
+
extension?: string;
|
|
4
|
+
charset?: string;
|
|
5
|
+
filename?: string;
|
|
6
|
+
localPath?: string;
|
|
7
|
+
url?: string;
|
|
8
|
+
}
|
|
9
|
+
export interface ConversionResult {
|
|
10
|
+
markdown: string;
|
|
11
|
+
title?: string;
|
|
12
|
+
}
|
|
13
|
+
export interface MarkitOptions {
|
|
14
|
+
/** Describe an image, return markdown. Receives raw bytes and mimetype. */
|
|
15
|
+
describe?: (image: Buffer, mimetype: string) => Promise<string>;
|
|
16
|
+
/** Transcribe audio, return text. Receives raw bytes and mimetype. */
|
|
17
|
+
transcribe?: (audio: Buffer, mimetype: string) => Promise<string>;
|
|
18
|
+
/** Extra instructions appended to the image description prompt. */
|
|
19
|
+
prompt?: string;
|
|
20
|
+
}
|
|
21
|
+
export interface Converter {
|
|
22
|
+
/** Human-readable name for error messages */
|
|
23
|
+
name: string;
|
|
24
|
+
/** Quick check: can this converter handle the given stream? */
|
|
25
|
+
accepts(streamInfo: StreamInfo): boolean;
|
|
26
|
+
/** Convert the source to markdown */
|
|
27
|
+
convert(input: Buffer, streamInfo: StreamInfo, options?: MarkitOptions): Promise<ConversionResult>;
|
|
28
|
+
}
|
package/dist/types.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
export interface OutputOptions {
|
|
2
|
+
json?: boolean;
|
|
3
|
+
quiet?: boolean;
|
|
4
|
+
}
|
|
5
|
+
export declare const success: (msg: string) => void;
|
|
6
|
+
export declare const info: (msg: string) => void;
|
|
7
|
+
export declare const warn: (msg: string) => void;
|
|
8
|
+
export declare const error: (msg: string) => void;
|
|
9
|
+
export declare const bold: (s: string) => string;
|
|
10
|
+
export declare const dim: (s: string) => string;
|
|
11
|
+
export declare const cmd: (s: string) => string;
|
|
12
|
+
export declare const bullet: (msg: string) => void;
|
|
13
|
+
export declare const bulletDim: (msg: string) => void;
|
|
14
|
+
export declare const hint: (msg: string) => void;
|
|
15
|
+
export declare const nextStep: (command: string) => void;
|
|
16
|
+
export declare const header: (title: string) => void;
|
|
17
|
+
export declare function jsonOutput(data: object): void;
|
|
18
|
+
export declare function output(options: OutputOptions, handlers: {
|
|
19
|
+
json?: () => object;
|
|
20
|
+
quiet?: () => void;
|
|
21
|
+
human: () => void;
|
|
22
|
+
}): void;
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import chalk from "chalk";
|
|
2
|
+
export const success = (msg) => console.log(chalk.green("✓"), msg);
|
|
3
|
+
export const info = (msg) => console.log(chalk.blue("ℹ"), msg);
|
|
4
|
+
export const warn = (msg) => console.log(chalk.yellow("⚠"), msg);
|
|
5
|
+
export const error = (msg) => console.error(chalk.red("✗"), msg);
|
|
6
|
+
export const bold = (s) => chalk.bold(s);
|
|
7
|
+
export const dim = (s) => chalk.dim(s);
|
|
8
|
+
export const cmd = (s) => chalk.cyan(s);
|
|
9
|
+
export const bullet = (msg) => console.log(chalk.green("●"), msg);
|
|
10
|
+
export const bulletDim = (msg) => console.log(chalk.dim("●"), msg);
|
|
11
|
+
export const hint = (msg) => console.log(chalk.dim(` ${msg}`));
|
|
12
|
+
export const nextStep = (command) => console.log(` ${cmd(command)}`);
|
|
13
|
+
export const header = (title) => {
|
|
14
|
+
console.log();
|
|
15
|
+
console.log(chalk.bold(title));
|
|
16
|
+
console.log();
|
|
17
|
+
};
|
|
18
|
+
export function jsonOutput(data) {
|
|
19
|
+
console.log(JSON.stringify(data, null, 2));
|
|
20
|
+
}
|
|
21
|
+
export function output(options, handlers) {
|
|
22
|
+
if (options.json && handlers.json) {
|
|
23
|
+
jsonOutput(handlers.json());
|
|
24
|
+
}
|
|
25
|
+
else if (options.quiet && handlers.quiet) {
|
|
26
|
+
handlers.quiet();
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
handlers.human();
|
|
30
|
+
}
|
|
31
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "markit-ai",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Convert anything to markdown. PDFs, DOCX, HTML, URLs — everything gets milled.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.js"
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"bin": {
|
|
15
|
+
"markit": "./dist/main.js"
|
|
16
|
+
},
|
|
17
|
+
"files": [
|
|
18
|
+
"dist"
|
|
19
|
+
],
|
|
20
|
+
"scripts": {
|
|
21
|
+
"build": "tsc",
|
|
22
|
+
"prepublishOnly": "npm run build",
|
|
23
|
+
"dev": "bun run src/main.ts",
|
|
24
|
+
"build:bun": "bun build --compile src/main.ts --outfile mill",
|
|
25
|
+
"build:bun-linux": "bun build --compile --target=bun-linux-x64 src/main.ts --outfile mill-linux-x64",
|
|
26
|
+
"build:bun-mac-arm": "bun build --compile --target=bun-darwin-arm64 src/main.ts --outfile mill-darwin-arm64",
|
|
27
|
+
"build:bun-mac-x64": "bun build --compile --target=bun-darwin-x64 src/main.ts --outfile mill-darwin-x64",
|
|
28
|
+
"test": "bun test",
|
|
29
|
+
"format": "bunx biome format --write src/",
|
|
30
|
+
"lint": "bunx biome lint src/",
|
|
31
|
+
"check": "bunx biome check src/"
|
|
32
|
+
},
|
|
33
|
+
"repository": {
|
|
34
|
+
"type": "git",
|
|
35
|
+
"url": "git+https://github.com/Michaelliv/markit.git"
|
|
36
|
+
},
|
|
37
|
+
"homepage": "https://github.com/Michaelliv/markit#readme",
|
|
38
|
+
"bugs": {
|
|
39
|
+
"url": "https://github.com/Michaelliv/markit/issues"
|
|
40
|
+
},
|
|
41
|
+
"keywords": [
|
|
42
|
+
"markdown",
|
|
43
|
+
"pdf",
|
|
44
|
+
"docx",
|
|
45
|
+
"html",
|
|
46
|
+
"converter",
|
|
47
|
+
"ai",
|
|
48
|
+
"llm",
|
|
49
|
+
"agents"
|
|
50
|
+
],
|
|
51
|
+
"license": "MIT",
|
|
52
|
+
"devDependencies": {
|
|
53
|
+
"@biomejs/biome": "^2.3.14",
|
|
54
|
+
"@types/jszip": "^3.4.1",
|
|
55
|
+
"@types/turndown": "^5.0.5",
|
|
56
|
+
"typescript": "^5.8.0"
|
|
57
|
+
},
|
|
58
|
+
"dependencies": {
|
|
59
|
+
"chalk": "^5.6.2",
|
|
60
|
+
"commander": "^14.0.3",
|
|
61
|
+
"exifr": "^7.1.3",
|
|
62
|
+
"fast-xml-parser": "^5.5.9",
|
|
63
|
+
"jszip": "^3.10.1",
|
|
64
|
+
"mammoth": "^1.9.0",
|
|
65
|
+
"music-metadata": "^11.12.3",
|
|
66
|
+
"rss-parser": "^3.13.0",
|
|
67
|
+
"turndown": "^7.2.0",
|
|
68
|
+
"unpdf": "^1.4.0"
|
|
69
|
+
}
|
|
70
|
+
}
|