@manfred-kunze-dev/backbone-cli 2.9.0-dev.2 → 2.10.0-dev.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/config.js +7 -2
- package/dist/commands/convert.js +146 -20
- package/dist/commands/extractions.js +4 -4
- package/package.json +1 -1
package/dist/commands/config.js
CHANGED
|
@@ -2,11 +2,16 @@ import { Command } from "commander";
|
|
|
2
2
|
import chalk from "chalk";
|
|
3
3
|
import { store, isJsonOutput } from "../lib/config.js";
|
|
4
4
|
const ALLOWED_KEYS = ["apiKey", "baseUrl", "project"];
|
|
5
|
+
const KEY_ALIASES = {
|
|
6
|
+
"api-key": "apiKey",
|
|
7
|
+
"base-url": "baseUrl",
|
|
8
|
+
};
|
|
5
9
|
function validateKey(key) {
|
|
6
|
-
|
|
10
|
+
const normalized = KEY_ALIASES[key] ?? key;
|
|
11
|
+
if (!ALLOWED_KEYS.includes(normalized)) {
|
|
7
12
|
throw new Error(`Unknown config key: "${key}". Allowed keys: ${ALLOWED_KEYS.join(", ")}`);
|
|
8
13
|
}
|
|
9
|
-
return
|
|
14
|
+
return normalized;
|
|
10
15
|
}
|
|
11
16
|
export function makeConfigCommand() {
|
|
12
17
|
const cmd = new Command("config").description("Manage CLI configuration");
|
package/dist/commands/convert.js
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import { Command } from "commander";
|
|
2
|
-
import { readFileSync } from "node:fs";
|
|
2
|
+
import { readFileSync, writeFileSync } from "node:fs";
|
|
3
3
|
import { basename } from "node:path";
|
|
4
|
+
import chalk from "chalk";
|
|
4
5
|
import { getClient, runAction } from "../lib/client.js";
|
|
6
|
+
import { isJsonOutput } from "../lib/config.js";
|
|
5
7
|
import { formatDetail, withSpinner } from "../lib/output.js";
|
|
6
8
|
import { getMimeType } from "../lib/multipart.js";
|
|
7
9
|
function mapFormat(format) {
|
|
@@ -15,30 +17,130 @@ function mapFormat(format) {
|
|
|
15
17
|
};
|
|
16
18
|
return map[format.toLowerCase()] ?? format.toUpperCase();
|
|
17
19
|
}
|
|
20
|
+
/**
|
|
21
|
+
* Extract the primary content from a document based on the requested formats.
|
|
22
|
+
* Prefers markdown > text > html.
|
|
23
|
+
*/
|
|
24
|
+
function getPrimaryContent(doc, formats) {
|
|
25
|
+
const upper = formats.map((f) => f.toUpperCase());
|
|
26
|
+
if (upper.includes("MD") && doc.mdContent)
|
|
27
|
+
return doc.mdContent;
|
|
28
|
+
if (upper.includes("TEXT") && doc.textContent)
|
|
29
|
+
return doc.textContent;
|
|
30
|
+
if (upper.includes("HTML") && doc.htmlContent)
|
|
31
|
+
return doc.htmlContent;
|
|
32
|
+
return doc.mdContent ?? doc.textContent ?? doc.htmlContent ?? "";
|
|
33
|
+
}
|
|
34
|
+
function formatBytes(bytes) {
|
|
35
|
+
if (bytes < 1024)
|
|
36
|
+
return `${bytes} B`;
|
|
37
|
+
if (bytes < 1024 * 1024)
|
|
38
|
+
return `${(bytes / 1024).toFixed(1)} KB`;
|
|
39
|
+
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Output conversion result: --json, --output, or human-readable.
|
|
43
|
+
*/
|
|
44
|
+
function outputConvertResult(data, command, opts) {
|
|
45
|
+
if (isJsonOutput(command)) {
|
|
46
|
+
console.log(JSON.stringify(data, null, 2));
|
|
47
|
+
return;
|
|
48
|
+
}
|
|
49
|
+
const result = data;
|
|
50
|
+
const docs = result.documents ?? [];
|
|
51
|
+
const formats = opts.format.map(mapFormat);
|
|
52
|
+
// --output: write content to file
|
|
53
|
+
if (opts.output) {
|
|
54
|
+
const parts = docs.map((doc) => getPrimaryContent(doc, formats));
|
|
55
|
+
const content = parts.join("\n\n---\n\n");
|
|
56
|
+
writeFileSync(opts.output, content, "utf-8");
|
|
57
|
+
const size = Buffer.byteLength(content, "utf-8");
|
|
58
|
+
console.error(chalk.green(`Wrote ${formatBytes(size)} to ${opts.output}`));
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
// Human-readable output
|
|
62
|
+
const timeStr = result.processingTime
|
|
63
|
+
? ` (${result.processingTime.toFixed(1)}s)`
|
|
64
|
+
: "";
|
|
65
|
+
const count = docs.length;
|
|
66
|
+
console.log(chalk.green(`Converted ${count} document${count !== 1 ? "s" : ""}${timeStr}`));
|
|
67
|
+
if (result.errors?.length) {
|
|
68
|
+
for (const err of result.errors) {
|
|
69
|
+
console.log(chalk.yellow(` Warning: ${err.filename ?? "unknown"} — ${err.errorMessage ?? "error"}`));
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
for (const doc of docs) {
|
|
73
|
+
const content = getPrimaryContent(doc, formats);
|
|
74
|
+
const size = formatBytes(Buffer.byteLength(content, "utf-8"));
|
|
75
|
+
console.log(chalk.dim(`\n--- ${doc.filename ?? "document"} (${size}) ---\n`));
|
|
76
|
+
console.log(content);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Poll an async task until terminal status, then fetch and return the result.
|
|
81
|
+
*/
|
|
82
|
+
async function waitForTask(client, taskId, waitSeconds) {
|
|
83
|
+
const terminalStatuses = new Set(["SUCCESS", "PARTIAL_SUCCESS", "FAILURE", "COMPLETED", "FAILED"]);
|
|
84
|
+
// Poll until done
|
|
85
|
+
let status;
|
|
86
|
+
// eslint-disable-next-line no-constant-condition
|
|
87
|
+
while (true) {
|
|
88
|
+
const { data } = await client.GET("/v1/convert/tasks/{taskId}", {
|
|
89
|
+
params: {
|
|
90
|
+
path: { taskId },
|
|
91
|
+
query: { wait: waitSeconds },
|
|
92
|
+
},
|
|
93
|
+
});
|
|
94
|
+
status = data;
|
|
95
|
+
if (status?.taskStatus && terminalStatuses.has(status.taskStatus.toUpperCase())) {
|
|
96
|
+
break;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
// Fetch final result
|
|
100
|
+
const { data } = await client.GET("/v1/convert/tasks/{taskId}/result", {
|
|
101
|
+
params: { path: { taskId } },
|
|
102
|
+
});
|
|
103
|
+
return data;
|
|
104
|
+
}
|
|
105
|
+
function buildSources(paths, filename) {
|
|
106
|
+
return paths.map((p) => {
|
|
107
|
+
if (p === "-") {
|
|
108
|
+
const name = filename ?? "document";
|
|
109
|
+
const buffer = readFileSync(0);
|
|
110
|
+
return {
|
|
111
|
+
kind: "base64",
|
|
112
|
+
content: buffer.toString("base64"),
|
|
113
|
+
filename: name,
|
|
114
|
+
mimeType: getMimeType(name),
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
const buffer = readFileSync(p);
|
|
118
|
+
const name = basename(p);
|
|
119
|
+
return {
|
|
120
|
+
kind: "base64",
|
|
121
|
+
content: buffer.toString("base64"),
|
|
122
|
+
filename: name,
|
|
123
|
+
mimeType: getMimeType(name),
|
|
124
|
+
};
|
|
125
|
+
});
|
|
126
|
+
}
|
|
18
127
|
export function makeConvertCommand() {
|
|
19
128
|
const cmd = new Command("convert").description("Convert documents");
|
|
20
129
|
cmd
|
|
21
130
|
.command("file")
|
|
22
|
-
.description("Convert local files")
|
|
23
|
-
.argument("<paths...>", "File paths to convert")
|
|
131
|
+
.description("Convert local files (use - to read from stdin)")
|
|
132
|
+
.argument("<paths...>", "File paths to convert (use - for stdin)")
|
|
24
133
|
.option("--format <formats...>", "Output formats (md, text, html, json)", ["md"])
|
|
25
134
|
.option("--pipeline <name>", "Pipeline: fast (default), ocr, vlm. For compound files (MSG/EML), applies to attachments; email body is always plain text")
|
|
26
135
|
.option("--image-mode <mode>", "Image export mode (placeholder, embedded)")
|
|
136
|
+
.option("-o, --output <path>", "Write content to file instead of stdout")
|
|
137
|
+
.option("--filename <name>", "Filename for MIME detection when reading from stdin")
|
|
27
138
|
.option("--async", "Run asynchronously")
|
|
139
|
+
.option("-w, --wait [seconds]", "With --async: wait for completion (default: 30s)", parseFloat)
|
|
28
140
|
.action(async (paths, opts, command) => {
|
|
29
141
|
await runAction(command, async () => {
|
|
30
142
|
const client = getClient(command);
|
|
31
|
-
|
|
32
|
-
const sources = paths.map((p) => {
|
|
33
|
-
const buffer = readFileSync(p);
|
|
34
|
-
const filename = basename(p);
|
|
35
|
-
return {
|
|
36
|
-
kind: "base64",
|
|
37
|
-
content: buffer.toString("base64"),
|
|
38
|
-
filename,
|
|
39
|
-
mimeType: getMimeType(filename),
|
|
40
|
-
};
|
|
41
|
-
});
|
|
143
|
+
const sources = buildSources(paths, opts.filename);
|
|
42
144
|
const outputFormats = opts.format.map(mapFormat);
|
|
43
145
|
const body = {
|
|
44
146
|
sources,
|
|
@@ -52,11 +154,21 @@ export function makeConvertCommand() {
|
|
|
52
154
|
};
|
|
53
155
|
if (opts.async) {
|
|
54
156
|
const { data } = await withSpinner("Submitting conversion...", () => client.POST("/v1/convert/source/async", { body }));
|
|
55
|
-
|
|
157
|
+
if (opts.wait !== undefined) {
|
|
158
|
+
const waitSeconds = opts.wait === true ? 30 : opts.wait;
|
|
159
|
+
const taskId = data?.taskId;
|
|
160
|
+
if (!taskId)
|
|
161
|
+
throw new Error("No taskId returned from async submission");
|
|
162
|
+
const result = await withSpinner("Waiting for completion...", () => waitForTask(client, taskId, waitSeconds));
|
|
163
|
+
outputConvertResult(result, command, opts);
|
|
164
|
+
}
|
|
165
|
+
else {
|
|
166
|
+
formatDetail(data, command);
|
|
167
|
+
}
|
|
56
168
|
}
|
|
57
169
|
else {
|
|
58
170
|
const { data } = await withSpinner("Converting...", () => client.POST("/v1/convert/source", { body }));
|
|
59
|
-
|
|
171
|
+
outputConvertResult(data, command, opts);
|
|
60
172
|
}
|
|
61
173
|
});
|
|
62
174
|
});
|
|
@@ -67,7 +179,9 @@ export function makeConvertCommand() {
|
|
|
67
179
|
.option("--format <formats...>", "Output formats (md, text, html, json)", ["md"])
|
|
68
180
|
.option("--pipeline <name>", "Pipeline: fast (default), ocr, vlm. For compound files (MSG/EML), applies to attachments; email body is always plain text")
|
|
69
181
|
.option("--image-mode <mode>", "Image export mode (placeholder, embedded)")
|
|
182
|
+
.option("-o, --output <path>", "Write content to file instead of stdout")
|
|
70
183
|
.option("--async", "Run asynchronously")
|
|
184
|
+
.option("-w, --wait [seconds]", "With --async: wait for completion (default: 30s)", parseFloat)
|
|
71
185
|
.action(async (urls, opts, command) => {
|
|
72
186
|
await runAction(command, async () => {
|
|
73
187
|
const client = getClient(command);
|
|
@@ -88,11 +202,21 @@ export function makeConvertCommand() {
|
|
|
88
202
|
};
|
|
89
203
|
if (opts.async) {
|
|
90
204
|
const { data } = await withSpinner("Submitting conversion...", () => client.POST("/v1/convert/source/async", { body }));
|
|
91
|
-
|
|
205
|
+
if (opts.wait !== undefined) {
|
|
206
|
+
const waitSeconds = opts.wait === true ? 30 : opts.wait;
|
|
207
|
+
const taskId = data?.taskId;
|
|
208
|
+
if (!taskId)
|
|
209
|
+
throw new Error("No taskId returned from async submission");
|
|
210
|
+
const result = await withSpinner("Waiting for completion...", () => waitForTask(client, taskId, waitSeconds));
|
|
211
|
+
outputConvertResult(result, command, opts);
|
|
212
|
+
}
|
|
213
|
+
else {
|
|
214
|
+
formatDetail(data, command);
|
|
215
|
+
}
|
|
92
216
|
}
|
|
93
217
|
else {
|
|
94
218
|
const { data } = await withSpinner("Converting...", () => client.POST("/v1/convert/source", { body }));
|
|
95
|
-
|
|
219
|
+
outputConvertResult(data, command, opts);
|
|
96
220
|
}
|
|
97
221
|
});
|
|
98
222
|
});
|
|
@@ -118,13 +242,15 @@ export function makeConvertCommand() {
|
|
|
118
242
|
.command("result")
|
|
119
243
|
.description("Get async conversion task result")
|
|
120
244
|
.argument("<taskId>", "Task ID")
|
|
121
|
-
.
|
|
245
|
+
.option("-o, --output <path>", "Write content to file instead of stdout")
|
|
246
|
+
.option("--format <formats...>", "Preferred format for --output (md, text, html)", ["md"])
|
|
247
|
+
.action(async (taskId, opts, command) => {
|
|
122
248
|
await runAction(command, async () => {
|
|
123
249
|
const client = getClient(command);
|
|
124
250
|
const { data } = await client.GET("/v1/convert/tasks/{taskId}/result", {
|
|
125
251
|
params: { path: { taskId } },
|
|
126
252
|
});
|
|
127
|
-
|
|
253
|
+
outputConvertResult(data, command, opts);
|
|
128
254
|
});
|
|
129
255
|
});
|
|
130
256
|
return cmd;
|
|
@@ -29,8 +29,8 @@ export function makeExtractionsCommand() {
|
|
|
29
29
|
.description("Create a new extraction")
|
|
30
30
|
.requiredOption("--schema <id>", "Schema ID")
|
|
31
31
|
.requiredOption("-m, --model <model>", "Model to use (provider/model)")
|
|
32
|
-
.option("-t, --text <text>", "Input text")
|
|
33
|
-
.option("-f, --file <path>", "Read input text from file")
|
|
32
|
+
.option("-t, --text <text>", "Input text (use - for stdin)")
|
|
33
|
+
.option("-f, --file <path>", "Read input text from file (use - for stdin)")
|
|
34
34
|
.option("--images <paths...>", "Image file paths")
|
|
35
35
|
.option("--version-id <id>", "Schema version ID")
|
|
36
36
|
.option("-l, --label <name>", "Schema label")
|
|
@@ -39,9 +39,9 @@ export function makeExtractionsCommand() {
|
|
|
39
39
|
await runAction(command, async () => {
|
|
40
40
|
const projectId = requireProjectId(command);
|
|
41
41
|
const client = getClient(command);
|
|
42
|
-
let text = opts.text;
|
|
42
|
+
let text = opts.text === "-" ? readFileSync(0, "utf-8") : opts.text;
|
|
43
43
|
if (opts.file) {
|
|
44
|
-
text = readFileSync(opts.file, "utf-8");
|
|
44
|
+
text = opts.file === "-" ? readFileSync(0, "utf-8") : readFileSync(opts.file, "utf-8");
|
|
45
45
|
}
|
|
46
46
|
const images = opts.images ? readImages(opts.images) : undefined;
|
|
47
47
|
const body = {
|