@robin7331/papyrus-cli 0.1.6 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -30
- package/dist/cli.js +29 -27
- package/dist/cliHelpers.d.ts +4 -7
- package/dist/cliHelpers.js +20 -27
- package/dist/openaiPdfToMarkdown.d.ts +1 -1
- package/dist/openaiPdfToMarkdown.js +25 -29
- package/package.json +1 -1
- package/src/cli.ts +30 -28
- package/src/cliHelpers.ts +21 -33
- package/src/openaiPdfToMarkdown.ts +35 -37
- package/test/cliHelpers.test.ts +32 -32
package/README.md
CHANGED
|
@@ -27,20 +27,20 @@ papyrus --help
|
|
|
27
27
|
# Show installed CLI version
|
|
28
28
|
papyrus --version
|
|
29
29
|
|
|
30
|
-
# Single file (
|
|
30
|
+
# Single file (default behavior; if no API key is found, Papyrus prompts you to paste one)
|
|
31
31
|
papyrus ./path/to/input.pdf
|
|
32
32
|
|
|
33
|
-
# Single file with explicit
|
|
33
|
+
# Single file with explicit output extension/output/model
|
|
34
34
|
papyrus ./path/to/input.pdf --format md --output ./out/result.md --model gpt-4o-mini
|
|
35
35
|
|
|
36
|
-
#
|
|
36
|
+
# Default conversion with extra instructions
|
|
37
37
|
papyrus ./path/to/input.pdf --instructions "Prioritize table accuracy." --format txt
|
|
38
38
|
|
|
39
|
-
# Prompt
|
|
40
|
-
papyrus ./path/to/input.pdf --
|
|
39
|
+
# Prompt conversion (inline prompt)
|
|
40
|
+
papyrus ./path/to/input.pdf --prompt "Extract all invoice line items as bullet points." --format md
|
|
41
41
|
|
|
42
|
-
# Prompt
|
|
43
|
-
papyrus ./path/to/input.pdf --
|
|
42
|
+
# Prompt conversion (prompt file)
|
|
43
|
+
papyrus ./path/to/input.pdf --prompt-file ./my-prompt.txt --format txt
|
|
44
44
|
|
|
45
45
|
# Folder mode (recursive scan, asks for confirmation)
|
|
46
46
|
papyrus ./path/to/folder
|
|
@@ -110,14 +110,14 @@ papyrus --version
|
|
|
110
110
|
|
|
111
111
|
### `--format <format>`
|
|
112
112
|
|
|
113
|
-
Output
|
|
114
|
-
|
|
115
|
-
|
|
113
|
+
Output file extension override. Any extension is allowed (for example `md`, `txt`, `csv`, `json`).
|
|
114
|
+
This flag controls the output filename extension only.
|
|
115
|
+
When provided, Papyrus also passes the extension as a guidance hint to the model.
|
|
116
116
|
|
|
117
117
|
Example:
|
|
118
118
|
|
|
119
119
|
```bash
|
|
120
|
-
papyrus ./docs/invoice.pdf --format
|
|
120
|
+
papyrus ./docs/invoice.pdf --format csv
|
|
121
121
|
```
|
|
122
122
|
|
|
123
123
|
### `-o, --output <path>`
|
|
@@ -132,46 +132,34 @@ Example:
|
|
|
132
132
|
papyrus ./docs --output ./converted
|
|
133
133
|
```
|
|
134
134
|
|
|
135
|
-
### `--mode <mode>`
|
|
136
|
-
|
|
137
|
-
Conversion mode:
|
|
138
|
-
- `auto` (default): built-in conversion behavior.
|
|
139
|
-
- `prompt`: use your own prompt via `--prompt` or `--prompt-file`.
|
|
140
|
-
|
|
141
|
-
Example:
|
|
142
|
-
|
|
143
|
-
```bash
|
|
144
|
-
papyrus ./docs/invoice.pdf --mode prompt --prompt "Extract all line items."
|
|
145
|
-
```
|
|
146
|
-
|
|
147
135
|
### `--instructions <text>`
|
|
148
136
|
|
|
149
|
-
Additional conversion instructions
|
|
137
|
+
Additional conversion instructions for default conversion behavior. Cannot be combined with `--prompt` or `--prompt-file`.
|
|
150
138
|
|
|
151
139
|
Example:
|
|
152
140
|
|
|
153
141
|
```bash
|
|
154
|
-
papyrus ./docs/invoice.pdf --
|
|
142
|
+
papyrus ./docs/invoice.pdf --instructions "Keep table columns aligned."
|
|
155
143
|
```
|
|
156
144
|
|
|
157
145
|
### `--prompt <text>`
|
|
158
146
|
|
|
159
|
-
Inline prompt text for
|
|
147
|
+
Inline prompt text for prompt-based conversion. Must be non-empty. Use exactly one of `--prompt` or `--prompt-file`.
|
|
160
148
|
|
|
161
149
|
Example:
|
|
162
150
|
|
|
163
151
|
```bash
|
|
164
|
-
papyrus ./docs/invoice.pdf --
|
|
152
|
+
papyrus ./docs/invoice.pdf --prompt "Summarize payment terms."
|
|
165
153
|
```
|
|
166
154
|
|
|
167
155
|
### `--prompt-file <path>`
|
|
168
156
|
|
|
169
|
-
Path to a text file containing the prompt for
|
|
157
|
+
Path to a text file containing the prompt for prompt-based conversion. File must contain non-empty text. Use exactly one of `--prompt` or `--prompt-file`.
|
|
170
158
|
|
|
171
159
|
Example:
|
|
172
160
|
|
|
173
161
|
```bash
|
|
174
|
-
papyrus ./docs/invoice.pdf --
|
|
162
|
+
papyrus ./docs/invoice.pdf --prompt-file ./my-prompt.txt
|
|
175
163
|
```
|
|
176
164
|
|
|
177
165
|
### `-m, --model <model>`
|
|
@@ -206,7 +194,9 @@ papyrus ./docs --yes
|
|
|
206
194
|
|
|
207
195
|
## Notes
|
|
208
196
|
|
|
209
|
-
- In
|
|
197
|
+
- In default conversion (without `--prompt`/`--prompt-file`), the model returns structured JSON with `format` + `content`.
|
|
198
|
+
- Without `--format`, output extension follows model-selected content format (`.md` or `.txt`).
|
|
199
|
+
- With `--format`, only the output extension changes.
|
|
210
200
|
- Single-file input now also shows a live worker lane (spinner in TTY) while conversion is running.
|
|
211
201
|
- Folder input is scanned recursively for `.pdf` files and processed in parallel.
|
|
212
202
|
- In folder mode, `--output` must be a directory path and mirrored subfolders are preserved.
|
package/dist/cli.js
CHANGED
|
@@ -6,7 +6,7 @@ import { dirname, join, relative, resolve } from "node:path";
|
|
|
6
6
|
import { Command } from "commander";
|
|
7
7
|
import { clearStoredApiKey, getConfigFilePath, getStoredApiKey, maskApiKey, setStoredApiKey } from "./config.js";
|
|
8
8
|
import { convertPdf } from "./openaiPdfToMarkdown.js";
|
|
9
|
-
import { defaultOutputPath, formatDurationMs, isPdfPath, looksLikeFileOutput, parseConcurrency, parseFormat,
|
|
9
|
+
import { defaultOutputPath, formatDurationMs, isPdfPath, looksLikeFileOutput, parseConcurrency, parseFormat, resolveFolderOutputPath, truncate, validateOptionCombination } from "./cliHelpers.js";
|
|
10
10
|
const program = new Command();
|
|
11
11
|
const configFilePath = getConfigFilePath();
|
|
12
12
|
const OPENAI_API_KEYS_URL = "https://platform.openai.com/settings/organization/api-keys";
|
|
@@ -20,24 +20,24 @@ program
|
|
|
20
20
|
.option("-m, --model <model>", "OpenAI model to use", "gpt-4o-mini")
|
|
21
21
|
.option("--concurrency <n>", "Max parallel workers for folder input (default: 10)", parseConcurrency)
|
|
22
22
|
.option("-y, --yes", "Skip confirmation prompt in folder mode")
|
|
23
|
-
.option("--
|
|
24
|
-
.option("--
|
|
25
|
-
.option("--
|
|
26
|
-
.option("--prompt <
|
|
27
|
-
.option("--prompt-file <path>", "Path to file containing prompt text for prompt mode")
|
|
23
|
+
.option("--format <format>", "Output file extension override (for example: md, txt, csv, json)", parseFormat)
|
|
24
|
+
.option("--instructions <text>", "Additional conversion instructions (only when not using --prompt/--prompt-file)")
|
|
25
|
+
.option("--prompt <text>", "Custom prompt text (enables prompt mode)")
|
|
26
|
+
.option("--prompt-file <path>", "Path to file containing prompt text (enables prompt mode)")
|
|
28
27
|
.action(async (input, options) => {
|
|
29
28
|
const inputPath = resolve(input);
|
|
30
29
|
const startedAt = Date.now();
|
|
31
30
|
try {
|
|
32
31
|
validateOptionCombination(options);
|
|
33
32
|
const promptText = await resolvePromptText(options);
|
|
33
|
+
const conversionMode = resolveConversionMode(promptText);
|
|
34
34
|
const inputKind = await detectInputKind(inputPath);
|
|
35
35
|
let usageTotals = emptyUsage();
|
|
36
36
|
if (inputKind === "file") {
|
|
37
|
-
usageTotals = await processSingleFile(inputPath, options, promptText);
|
|
37
|
+
usageTotals = await processSingleFile(inputPath, options, conversionMode, promptText);
|
|
38
38
|
}
|
|
39
39
|
else {
|
|
40
|
-
const summary = await processFolder(inputPath, options, promptText);
|
|
40
|
+
const summary = await processFolder(inputPath, options, conversionMode, promptText);
|
|
41
41
|
usageTotals = summary.usage;
|
|
42
42
|
if (!summary.cancelled && summary.failed > 0) {
|
|
43
43
|
process.exitCode = 1;
|
|
@@ -112,7 +112,7 @@ program.parseAsync(process.argv).catch((error) => {
|
|
|
112
112
|
console.error(`Command failed: ${message}`);
|
|
113
113
|
process.exitCode = 1;
|
|
114
114
|
});
|
|
115
|
-
async function processSingleFile(inputPath, options, promptText) {
|
|
115
|
+
async function processSingleFile(inputPath, options, mode, promptText) {
|
|
116
116
|
if (!isPdfPath(inputPath)) {
|
|
117
117
|
throw new Error("Input file must have a .pdf extension.");
|
|
118
118
|
}
|
|
@@ -131,22 +131,23 @@ async function processSingleFile(inputPath, options, promptText) {
|
|
|
131
131
|
const result = await convertPdf({
|
|
132
132
|
inputPath,
|
|
133
133
|
model: options.model,
|
|
134
|
-
mode
|
|
135
|
-
format: options.format,
|
|
134
|
+
mode,
|
|
136
135
|
instructions: options.instructions,
|
|
137
|
-
promptText
|
|
136
|
+
promptText,
|
|
137
|
+
outputExtensionHint: options.format
|
|
138
138
|
});
|
|
139
|
-
const
|
|
139
|
+
const outputExtension = options.format ?? result.format;
|
|
140
|
+
const outputPath = resolve(options.output ?? defaultOutputPath(inputPath, outputExtension));
|
|
140
141
|
await mkdir(dirname(outputPath), { recursive: true });
|
|
141
142
|
await writeFile(outputPath, result.content, "utf8");
|
|
142
143
|
if (workerDashboard) {
|
|
143
|
-
workerDashboard.setWorkerDone(0, displayInput, `${
|
|
144
|
+
workerDashboard.setWorkerDone(0, displayInput, `${outputExtension} in ${formatDurationMs(Date.now() - startedAt)}`);
|
|
144
145
|
workerDashboard.setSummary(1, 0);
|
|
145
146
|
}
|
|
146
147
|
else {
|
|
147
|
-
console.log(`[worker-1] Done ${displayInput} -> ${outputPath} (${
|
|
148
|
+
console.log(`[worker-1] Done ${displayInput} -> ${outputPath} (${outputExtension}, ${formatDurationMs(Date.now() - startedAt)})`);
|
|
148
149
|
}
|
|
149
|
-
console.log(`Output (
|
|
150
|
+
console.log(`Output (.${outputExtension}) written to: ${outputPath}`);
|
|
150
151
|
return result.usage;
|
|
151
152
|
}
|
|
152
153
|
catch (error) {
|
|
@@ -164,9 +165,9 @@ async function processSingleFile(inputPath, options, promptText) {
|
|
|
164
165
|
workerDashboard?.stop();
|
|
165
166
|
}
|
|
166
167
|
}
|
|
167
|
-
async function processFolder(inputDir, options, promptText) {
|
|
168
|
+
async function processFolder(inputDir, options, mode, promptText) {
|
|
168
169
|
if (options.output && looksLikeFileOutput(options.output)) {
|
|
169
|
-
throw new Error("In folder mode, --output must be a directory path
|
|
170
|
+
throw new Error("In folder mode, --output must be a directory path.");
|
|
170
171
|
}
|
|
171
172
|
const files = await collectPdfFiles(inputDir);
|
|
172
173
|
if (files.length === 0) {
|
|
@@ -200,21 +201,22 @@ async function processFolder(inputDir, options, promptText) {
|
|
|
200
201
|
const result = await convertPdf({
|
|
201
202
|
inputPath: filePath,
|
|
202
203
|
model: options.model,
|
|
203
|
-
mode
|
|
204
|
-
format: options.format,
|
|
204
|
+
mode,
|
|
205
205
|
instructions: options.instructions,
|
|
206
|
-
promptText
|
|
206
|
+
promptText,
|
|
207
|
+
outputExtensionHint: options.format
|
|
207
208
|
});
|
|
208
|
-
const
|
|
209
|
+
const outputExtension = options.format ?? result.format;
|
|
210
|
+
const outputPath = resolveFolderOutputPath(filePath, inputDir, outputRoot, outputExtension);
|
|
209
211
|
await mkdir(dirname(outputPath), { recursive: true });
|
|
210
212
|
await writeFile(outputPath, result.content, "utf8");
|
|
211
213
|
succeeded += 1;
|
|
212
214
|
mergeUsage(usage, result.usage);
|
|
213
215
|
if (workerDashboard) {
|
|
214
|
-
workerDashboard.setWorkerDone(workerId, relativeInput, `${
|
|
216
|
+
workerDashboard.setWorkerDone(workerId, relativeInput, `${outputExtension} in ${formatDurationMs(Date.now() - startedAt)}`);
|
|
215
217
|
}
|
|
216
218
|
else {
|
|
217
|
-
console.log(`[worker-${workerId + 1}] Done ${relativeInput} -> ${outputPath} (${
|
|
219
|
+
console.log(`[worker-${workerId + 1}] Done ${relativeInput} -> ${outputPath} (${outputExtension}, ${formatDurationMs(Date.now() - startedAt)})`);
|
|
218
220
|
}
|
|
219
221
|
}
|
|
220
222
|
catch (error) {
|
|
@@ -250,9 +252,6 @@ async function processFolder(inputDir, options, promptText) {
|
|
|
250
252
|
return { total: files.length, succeeded, failed, cancelled: false, usage };
|
|
251
253
|
}
|
|
252
254
|
async function resolvePromptText(options) {
|
|
253
|
-
if (options.mode !== "prompt") {
|
|
254
|
-
return undefined;
|
|
255
|
-
}
|
|
256
255
|
if (options.prompt) {
|
|
257
256
|
const prompt = options.prompt.trim();
|
|
258
257
|
if (!prompt) {
|
|
@@ -270,6 +269,9 @@ async function resolvePromptText(options) {
|
|
|
270
269
|
}
|
|
271
270
|
return promptFromFile;
|
|
272
271
|
}
|
|
272
|
+
function resolveConversionMode(promptText) {
|
|
273
|
+
return promptText ? "prompt" : "auto";
|
|
274
|
+
}
|
|
273
275
|
async function handleConfigInit(options) {
|
|
274
276
|
const existingKey = await getStoredApiKey();
|
|
275
277
|
if (existingKey && !options.force) {
|
package/dist/cliHelpers.d.ts
CHANGED
|
@@ -1,21 +1,18 @@
|
|
|
1
|
-
import { type ConversionMode, type OutputFormat } from "./openaiPdfToMarkdown.js";
|
|
2
1
|
export type CliOptions = {
|
|
3
2
|
output?: string;
|
|
4
3
|
model: string;
|
|
5
4
|
concurrency?: number;
|
|
6
5
|
yes?: boolean;
|
|
7
|
-
|
|
8
|
-
format?: OutputFormat;
|
|
6
|
+
format?: string;
|
|
9
7
|
instructions?: string;
|
|
10
8
|
prompt?: string;
|
|
11
9
|
promptFile?: string;
|
|
12
10
|
};
|
|
13
|
-
export declare function
|
|
14
|
-
export declare function parseFormat(value: string): OutputFormat;
|
|
11
|
+
export declare function parseFormat(value: string): string;
|
|
15
12
|
export declare function parseConcurrency(value: string): number;
|
|
16
13
|
export declare function validateOptionCombination(options: CliOptions): void;
|
|
17
|
-
export declare function defaultOutputPath(inputPath: string,
|
|
18
|
-
export declare function resolveFolderOutputPath(inputPath: string, inputRoot: string, outputRoot: string | undefined,
|
|
14
|
+
export declare function defaultOutputPath(inputPath: string, extension: string): string;
|
|
15
|
+
export declare function resolveFolderOutputPath(inputPath: string, inputRoot: string, outputRoot: string | undefined, extension: string): string;
|
|
19
16
|
export declare function isPdfPath(inputPath: string): boolean;
|
|
20
17
|
export declare function looksLikeFileOutput(outputPath: string): boolean;
|
|
21
18
|
export declare function truncate(value: string, maxLength: number): string;
|
package/dist/cliHelpers.js
CHANGED
|
@@ -1,16 +1,14 @@
|
|
|
1
1
|
import { InvalidArgumentError } from "commander";
|
|
2
2
|
import { basename, dirname, extname, join, relative } from "node:path";
|
|
3
|
-
export function parseMode(value) {
|
|
4
|
-
if (value === "auto" || value === "prompt") {
|
|
5
|
-
return value;
|
|
6
|
-
}
|
|
7
|
-
throw new InvalidArgumentError("Mode must be either 'auto' or 'prompt'.");
|
|
8
|
-
}
|
|
9
3
|
export function parseFormat(value) {
|
|
10
|
-
|
|
11
|
-
|
|
4
|
+
const normalized = value.trim().replace(/^\.+/, "");
|
|
5
|
+
if (!normalized) {
|
|
6
|
+
throw new InvalidArgumentError("Format must be a non-empty file extension.");
|
|
7
|
+
}
|
|
8
|
+
if (normalized.includes("/") || normalized.includes("\\")) {
|
|
9
|
+
throw new InvalidArgumentError("Format must be a file extension, not a path.");
|
|
12
10
|
}
|
|
13
|
-
|
|
11
|
+
return normalized;
|
|
14
12
|
}
|
|
15
13
|
export function parseConcurrency(value) {
|
|
16
14
|
const parsed = Number(value);
|
|
@@ -20,35 +18,30 @@ export function parseConcurrency(value) {
|
|
|
20
18
|
return parsed;
|
|
21
19
|
}
|
|
22
20
|
export function validateOptionCombination(options) {
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
throw new Error("Prompt mode requires exactly one of --prompt or --prompt-file.");
|
|
27
|
-
}
|
|
28
|
-
if (options.instructions) {
|
|
29
|
-
throw new Error("--instructions is only supported in auto mode.");
|
|
30
|
-
}
|
|
31
|
-
return;
|
|
21
|
+
const promptSourceCount = Number(Boolean(options.prompt)) + Number(Boolean(options.promptFile));
|
|
22
|
+
if (promptSourceCount > 1) {
|
|
23
|
+
throw new Error("Use exactly one of --prompt or --prompt-file.");
|
|
32
24
|
}
|
|
33
|
-
if (
|
|
34
|
-
throw new Error("--
|
|
25
|
+
if (promptSourceCount === 1 && options.instructions) {
|
|
26
|
+
throw new Error("--instructions cannot be combined with --prompt or --prompt-file.");
|
|
35
27
|
}
|
|
36
28
|
}
|
|
37
|
-
export function defaultOutputPath(inputPath,
|
|
38
|
-
const
|
|
29
|
+
export function defaultOutputPath(inputPath, extension) {
|
|
30
|
+
const normalizedExtension = extension.startsWith(".") ? extension : `.${extension}`;
|
|
39
31
|
if (extname(inputPath).toLowerCase() === ".pdf") {
|
|
40
|
-
return inputPath.slice(0, -4) +
|
|
32
|
+
return inputPath.slice(0, -4) + normalizedExtension;
|
|
41
33
|
}
|
|
42
|
-
return inputPath +
|
|
34
|
+
return inputPath + normalizedExtension;
|
|
43
35
|
}
|
|
44
|
-
export function resolveFolderOutputPath(inputPath, inputRoot, outputRoot,
|
|
36
|
+
export function resolveFolderOutputPath(inputPath, inputRoot, outputRoot, extension) {
|
|
45
37
|
if (!outputRoot) {
|
|
46
|
-
return defaultOutputPath(inputPath,
|
|
38
|
+
return defaultOutputPath(inputPath, extension);
|
|
47
39
|
}
|
|
48
40
|
const relativePath = relative(inputRoot, inputPath);
|
|
49
41
|
const relativeDir = dirname(relativePath);
|
|
50
42
|
const base = basename(relativePath, extname(relativePath));
|
|
51
|
-
const
|
|
43
|
+
const normalizedExtension = extension.startsWith(".") ? extension.slice(1) : extension;
|
|
44
|
+
const filename = `${base}.${normalizedExtension}`;
|
|
52
45
|
if (relativeDir === ".") {
|
|
53
46
|
return join(outputRoot, filename);
|
|
54
47
|
}
|
|
@@ -2,9 +2,9 @@ export type ConvertOptions = {
|
|
|
2
2
|
inputPath: string;
|
|
3
3
|
model: string;
|
|
4
4
|
mode: ConversionMode;
|
|
5
|
-
format?: OutputFormat;
|
|
6
5
|
instructions?: string;
|
|
7
6
|
promptText?: string;
|
|
7
|
+
outputExtensionHint?: string;
|
|
8
8
|
};
|
|
9
9
|
export type ConversionMode = "auto" | "prompt";
|
|
10
10
|
export type OutputFormat = "md" | "txt";
|
|
@@ -54,13 +54,13 @@ export async function convertPdf(options) {
|
|
|
54
54
|
outputTokens: result.state.usage.outputTokens,
|
|
55
55
|
totalTokens: result.state.usage.totalTokens
|
|
56
56
|
};
|
|
57
|
-
if (options.mode === "auto"
|
|
57
|
+
if (options.mode === "auto") {
|
|
58
58
|
return { ...parseAutoResponse(rawOutput), usage };
|
|
59
59
|
}
|
|
60
|
-
|
|
61
|
-
return { format, content: rawOutput, usage };
|
|
60
|
+
return { format: "txt", content: rawOutput, usage };
|
|
62
61
|
}
|
|
63
62
|
function buildPromptText(options) {
|
|
63
|
+
const outputExtensionHint = normalizeExtensionHint(options.outputExtensionHint);
|
|
64
64
|
if (options.mode === "prompt") {
|
|
65
65
|
if (!options.promptText) {
|
|
66
66
|
throw new Error("promptText is required when mode is 'prompt'.");
|
|
@@ -70,35 +70,16 @@ function buildPromptText(options) {
|
|
|
70
70
|
"Return only the final converted content.",
|
|
71
71
|
`User prompt:\n${options.promptText}`
|
|
72
72
|
];
|
|
73
|
-
if (
|
|
74
|
-
promptModeParts.push(
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
else {
|
|
80
|
-
promptModeParts.push("If the prompt does not enforce a format, prefer plain text without Markdown syntax.");
|
|
73
|
+
if (outputExtensionHint) {
|
|
74
|
+
promptModeParts.push([
|
|
75
|
+
`Output file extension hint: .${outputExtensionHint}.`,
|
|
76
|
+
"Prefer content that is practical for saving under this extension.",
|
|
77
|
+
"Treat this as guidance and still follow the user prompt exactly."
|
|
78
|
+
].join(" "));
|
|
81
79
|
}
|
|
82
80
|
return promptModeParts.join("\n\n");
|
|
83
81
|
}
|
|
84
|
-
|
|
85
|
-
return withAdditionalInstructions([
|
|
86
|
-
"Convert this PDF into clean GitHub-flavored Markdown.",
|
|
87
|
-
"Preserve headings, paragraphs, lists, and tables.",
|
|
88
|
-
"Render tables as Markdown pipe tables with header separators.",
|
|
89
|
-
"If cells are empty due to merged cells, keep the table readable and consistent.",
|
|
90
|
-
"Return only Markdown without code fences."
|
|
91
|
-
].join(" "), options.instructions);
|
|
92
|
-
}
|
|
93
|
-
if (options.format === "txt") {
|
|
94
|
-
return withAdditionalInstructions([
|
|
95
|
-
"Convert this PDF into clean plain text.",
|
|
96
|
-
"Preserve reading order and paragraph boundaries.",
|
|
97
|
-
"Represent tables in readable plain text (no Markdown syntax).",
|
|
98
|
-
"Return plain text only and do not use Markdown syntax or code fences."
|
|
99
|
-
].join(" "), options.instructions);
|
|
100
|
-
}
|
|
101
|
-
return withAdditionalInstructions([
|
|
82
|
+
let autoPrompt = withAdditionalInstructions([
|
|
102
83
|
"Decide the best output format for this PDF: Markdown ('md') or plain text ('txt').",
|
|
103
84
|
"Choose 'md' for documents with meaningful headings, lists, and tables that benefit from Markdown.",
|
|
104
85
|
"Choose 'txt' for mostly linear text where Markdown adds little value.",
|
|
@@ -108,6 +89,14 @@ function buildPromptText(options) {
|
|
|
108
89
|
"If format is 'txt', output plain text only and do not use Markdown syntax.",
|
|
109
90
|
"Do not wrap the JSON in code fences."
|
|
110
91
|
].join("\n"), options.instructions);
|
|
92
|
+
if (outputExtensionHint) {
|
|
93
|
+
autoPrompt = `${autoPrompt}\n\n${[
|
|
94
|
+
`Output file extension hint: .${outputExtensionHint}.`,
|
|
95
|
+
"Prefer content that is practical for that extension while still returning JSON with format='md' or 'txt'.",
|
|
96
|
+
"This is guidance only and should not break the required JSON schema."
|
|
97
|
+
].join(" ")}`;
|
|
98
|
+
}
|
|
99
|
+
return autoPrompt;
|
|
111
100
|
}
|
|
112
101
|
function withAdditionalInstructions(base, additional) {
|
|
113
102
|
if (!additional) {
|
|
@@ -115,6 +104,13 @@ function withAdditionalInstructions(base, additional) {
|
|
|
115
104
|
}
|
|
116
105
|
return `${base}\n\nAdditional user instructions:\n${additional}`;
|
|
117
106
|
}
|
|
107
|
+
function normalizeExtensionHint(extension) {
|
|
108
|
+
if (!extension) {
|
|
109
|
+
return undefined;
|
|
110
|
+
}
|
|
111
|
+
const normalized = extension.trim().replace(/^\.+/, "");
|
|
112
|
+
return normalized || undefined;
|
|
113
|
+
}
|
|
118
114
|
function parseAutoResponse(rawOutput) {
|
|
119
115
|
let candidate = rawOutput.trim();
|
|
120
116
|
const fencedMatch = candidate.match(/```(?:json)?\s*([\s\S]*?)```/i);
|
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -14,6 +14,7 @@ import {
|
|
|
14
14
|
} from "./config.js";
|
|
15
15
|
import {
|
|
16
16
|
convertPdf,
|
|
17
|
+
type ConversionMode,
|
|
17
18
|
type ConvertUsage
|
|
18
19
|
} from "./openaiPdfToMarkdown.js";
|
|
19
20
|
import {
|
|
@@ -23,7 +24,6 @@ import {
|
|
|
23
24
|
looksLikeFileOutput,
|
|
24
25
|
parseConcurrency,
|
|
25
26
|
parseFormat,
|
|
26
|
-
parseMode,
|
|
27
27
|
resolveFolderOutputPath,
|
|
28
28
|
truncate,
|
|
29
29
|
type CliOptions,
|
|
@@ -52,14 +52,13 @@ program
|
|
|
52
52
|
parseConcurrency
|
|
53
53
|
)
|
|
54
54
|
.option("-y, --yes", "Skip confirmation prompt in folder mode")
|
|
55
|
-
.option("--
|
|
56
|
-
.option("--format <format>", "Output format override: md or txt", parseFormat)
|
|
55
|
+
.option("--format <format>", "Output file extension override (for example: md, txt, csv, json)", parseFormat)
|
|
57
56
|
.option(
|
|
58
57
|
"--instructions <text>",
|
|
59
|
-
"Additional conversion instructions
|
|
58
|
+
"Additional conversion instructions (only when not using --prompt/--prompt-file)"
|
|
60
59
|
)
|
|
61
|
-
.option("--prompt <text>", "Custom prompt text
|
|
62
|
-
.option("--prompt-file <path>", "Path to file containing prompt text
|
|
60
|
+
.option("--prompt <text>", "Custom prompt text (enables prompt mode)")
|
|
61
|
+
.option("--prompt-file <path>", "Path to file containing prompt text (enables prompt mode)")
|
|
63
62
|
.action(async (input: string, options: CliOptions) => {
|
|
64
63
|
const inputPath = resolve(input);
|
|
65
64
|
const startedAt = Date.now();
|
|
@@ -68,13 +67,14 @@ program
|
|
|
68
67
|
validateOptionCombination(options);
|
|
69
68
|
|
|
70
69
|
const promptText = await resolvePromptText(options);
|
|
70
|
+
const conversionMode = resolveConversionMode(promptText);
|
|
71
71
|
const inputKind = await detectInputKind(inputPath);
|
|
72
72
|
let usageTotals: ConvertUsage = emptyUsage();
|
|
73
73
|
|
|
74
74
|
if (inputKind === "file") {
|
|
75
|
-
usageTotals = await processSingleFile(inputPath, options, promptText);
|
|
75
|
+
usageTotals = await processSingleFile(inputPath, options, conversionMode, promptText);
|
|
76
76
|
} else {
|
|
77
|
-
const summary = await processFolder(inputPath, options, promptText);
|
|
77
|
+
const summary = await processFolder(inputPath, options, conversionMode, promptText);
|
|
78
78
|
usageTotals = summary.usage;
|
|
79
79
|
if (!summary.cancelled && summary.failed > 0) {
|
|
80
80
|
process.exitCode = 1;
|
|
@@ -157,6 +157,7 @@ program.parseAsync(process.argv).catch((error: unknown) => {
|
|
|
157
157
|
async function processSingleFile(
|
|
158
158
|
inputPath: string,
|
|
159
159
|
options: CliOptions,
|
|
160
|
+
mode: ConversionMode,
|
|
160
161
|
promptText?: string
|
|
161
162
|
): Promise<ConvertUsage> {
|
|
162
163
|
if (!isPdfPath(inputPath)) {
|
|
@@ -180,13 +181,14 @@ async function processSingleFile(
|
|
|
180
181
|
const result = await convertPdf({
|
|
181
182
|
inputPath,
|
|
182
183
|
model: options.model,
|
|
183
|
-
mode
|
|
184
|
-
format: options.format,
|
|
184
|
+
mode,
|
|
185
185
|
instructions: options.instructions,
|
|
186
|
-
promptText
|
|
186
|
+
promptText,
|
|
187
|
+
outputExtensionHint: options.format
|
|
187
188
|
});
|
|
188
189
|
|
|
189
|
-
const
|
|
190
|
+
const outputExtension = options.format ?? result.format;
|
|
191
|
+
const outputPath = resolve(options.output ?? defaultOutputPath(inputPath, outputExtension));
|
|
190
192
|
await mkdir(dirname(outputPath), { recursive: true });
|
|
191
193
|
await writeFile(outputPath, result.content, "utf8");
|
|
192
194
|
|
|
@@ -194,16 +196,16 @@ async function processSingleFile(
|
|
|
194
196
|
workerDashboard.setWorkerDone(
|
|
195
197
|
0,
|
|
196
198
|
displayInput,
|
|
197
|
-
`${
|
|
199
|
+
`${outputExtension} in ${formatDurationMs(Date.now() - startedAt)}`
|
|
198
200
|
);
|
|
199
201
|
workerDashboard.setSummary(1, 0);
|
|
200
202
|
} else {
|
|
201
203
|
console.log(
|
|
202
|
-
`[worker-1] Done ${displayInput} -> ${outputPath} (${
|
|
204
|
+
`[worker-1] Done ${displayInput} -> ${outputPath} (${outputExtension}, ${formatDurationMs(Date.now() - startedAt)})`
|
|
203
205
|
);
|
|
204
206
|
}
|
|
205
207
|
|
|
206
|
-
console.log(`Output (
|
|
208
|
+
console.log(`Output (.${outputExtension}) written to: ${outputPath}`);
|
|
207
209
|
return result.usage;
|
|
208
210
|
} catch (error) {
|
|
209
211
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -237,12 +239,11 @@ type FolderSummary = {
|
|
|
237
239
|
async function processFolder(
|
|
238
240
|
inputDir: string,
|
|
239
241
|
options: CliOptions,
|
|
242
|
+
mode: ConversionMode,
|
|
240
243
|
promptText?: string
|
|
241
244
|
): Promise<FolderSummary> {
|
|
242
245
|
if (options.output && looksLikeFileOutput(options.output)) {
|
|
243
|
-
throw new Error(
|
|
244
|
-
"In folder mode, --output must be a directory path (not a .md/.txt file path)."
|
|
245
|
-
);
|
|
246
|
+
throw new Error("In folder mode, --output must be a directory path.");
|
|
246
247
|
}
|
|
247
248
|
|
|
248
249
|
const files = await collectPdfFiles(inputDir);
|
|
@@ -282,13 +283,14 @@ async function processFolder(
|
|
|
282
283
|
const result = await convertPdf({
|
|
283
284
|
inputPath: filePath,
|
|
284
285
|
model: options.model,
|
|
285
|
-
mode
|
|
286
|
-
format: options.format,
|
|
286
|
+
mode,
|
|
287
287
|
instructions: options.instructions,
|
|
288
|
-
promptText
|
|
288
|
+
promptText,
|
|
289
|
+
outputExtensionHint: options.format
|
|
289
290
|
});
|
|
290
291
|
|
|
291
|
-
const
|
|
292
|
+
const outputExtension = options.format ?? result.format;
|
|
293
|
+
const outputPath = resolveFolderOutputPath(filePath, inputDir, outputRoot, outputExtension);
|
|
292
294
|
await mkdir(dirname(outputPath), { recursive: true });
|
|
293
295
|
await writeFile(outputPath, result.content, "utf8");
|
|
294
296
|
succeeded += 1;
|
|
@@ -298,11 +300,11 @@ async function processFolder(
|
|
|
298
300
|
workerDashboard.setWorkerDone(
|
|
299
301
|
workerId,
|
|
300
302
|
relativeInput,
|
|
301
|
-
`${
|
|
303
|
+
`${outputExtension} in ${formatDurationMs(Date.now() - startedAt)}`
|
|
302
304
|
);
|
|
303
305
|
} else {
|
|
304
306
|
console.log(
|
|
305
|
-
`[worker-${workerId + 1}] Done ${relativeInput} -> ${outputPath} (${
|
|
307
|
+
`[worker-${workerId + 1}] Done ${relativeInput} -> ${outputPath} (${outputExtension}, ${formatDurationMs(Date.now() - startedAt)})`
|
|
306
308
|
);
|
|
307
309
|
}
|
|
308
310
|
} catch (error) {
|
|
@@ -347,10 +349,6 @@ async function processFolder(
|
|
|
347
349
|
}
|
|
348
350
|
|
|
349
351
|
async function resolvePromptText(options: CliOptions): Promise<string | undefined> {
|
|
350
|
-
if (options.mode !== "prompt") {
|
|
351
|
-
return undefined;
|
|
352
|
-
}
|
|
353
|
-
|
|
354
352
|
if (options.prompt) {
|
|
355
353
|
const prompt = options.prompt.trim();
|
|
356
354
|
if (!prompt) {
|
|
@@ -373,6 +371,10 @@ async function resolvePromptText(options: CliOptions): Promise<string | undefine
|
|
|
373
371
|
return promptFromFile;
|
|
374
372
|
}
|
|
375
373
|
|
|
374
|
+
function resolveConversionMode(promptText: string | undefined): ConversionMode {
|
|
375
|
+
return promptText ? "prompt" : "auto";
|
|
376
|
+
}
|
|
377
|
+
|
|
376
378
|
async function handleConfigInit(options: ConfigInitOptions): Promise<void> {
|
|
377
379
|
const existingKey = await getStoredApiKey();
|
|
378
380
|
if (existingKey && !options.force) {
|
package/src/cliHelpers.ts
CHANGED
|
@@ -1,33 +1,28 @@
|
|
|
1
1
|
import { InvalidArgumentError } from "commander";
|
|
2
2
|
import { basename, dirname, extname, join, relative } from "node:path";
|
|
3
|
-
import { type ConversionMode, type OutputFormat } from "./openaiPdfToMarkdown.js";
|
|
4
3
|
|
|
5
4
|
export type CliOptions = {
|
|
6
5
|
output?: string;
|
|
7
6
|
model: string;
|
|
8
7
|
concurrency?: number;
|
|
9
8
|
yes?: boolean;
|
|
10
|
-
|
|
11
|
-
format?: OutputFormat;
|
|
9
|
+
format?: string;
|
|
12
10
|
instructions?: string;
|
|
13
11
|
prompt?: string;
|
|
14
12
|
promptFile?: string;
|
|
15
13
|
};
|
|
16
14
|
|
|
17
|
-
export function
|
|
18
|
-
|
|
19
|
-
|
|
15
|
+
export function parseFormat(value: string): string {
|
|
16
|
+
const normalized = value.trim().replace(/^\.+/, "");
|
|
17
|
+
if (!normalized) {
|
|
18
|
+
throw new InvalidArgumentError("Format must be a non-empty file extension.");
|
|
20
19
|
}
|
|
21
20
|
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
export function parseFormat(value: string): OutputFormat {
|
|
26
|
-
if (value === "md" || value === "txt") {
|
|
27
|
-
return value;
|
|
21
|
+
if (normalized.includes("/") || normalized.includes("\\")) {
|
|
22
|
+
throw new InvalidArgumentError("Format must be a file extension, not a path.");
|
|
28
23
|
}
|
|
29
24
|
|
|
30
|
-
|
|
25
|
+
return normalized;
|
|
31
26
|
}
|
|
32
27
|
|
|
33
28
|
export function parseConcurrency(value: string): number {
|
|
@@ -40,48 +35,41 @@ export function parseConcurrency(value: string): number {
|
|
|
40
35
|
}
|
|
41
36
|
|
|
42
37
|
export function validateOptionCombination(options: CliOptions): void {
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
throw new Error("Prompt mode requires exactly one of --prompt or --prompt-file.");
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
if (options.instructions) {
|
|
50
|
-
throw new Error("--instructions is only supported in auto mode.");
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
return;
|
|
38
|
+
const promptSourceCount = Number(Boolean(options.prompt)) + Number(Boolean(options.promptFile));
|
|
39
|
+
if (promptSourceCount > 1) {
|
|
40
|
+
throw new Error("Use exactly one of --prompt or --prompt-file.");
|
|
54
41
|
}
|
|
55
42
|
|
|
56
|
-
if (
|
|
57
|
-
throw new Error("--
|
|
43
|
+
if (promptSourceCount === 1 && options.instructions) {
|
|
44
|
+
throw new Error("--instructions cannot be combined with --prompt or --prompt-file.");
|
|
58
45
|
}
|
|
59
46
|
}
|
|
60
47
|
|
|
61
|
-
export function defaultOutputPath(inputPath: string,
|
|
62
|
-
const
|
|
48
|
+
export function defaultOutputPath(inputPath: string, extension: string): string {
|
|
49
|
+
const normalizedExtension = extension.startsWith(".") ? extension : `.${extension}`;
|
|
63
50
|
|
|
64
51
|
if (extname(inputPath).toLowerCase() === ".pdf") {
|
|
65
|
-
return inputPath.slice(0, -4) +
|
|
52
|
+
return inputPath.slice(0, -4) + normalizedExtension;
|
|
66
53
|
}
|
|
67
54
|
|
|
68
|
-
return inputPath +
|
|
55
|
+
return inputPath + normalizedExtension;
|
|
69
56
|
}
|
|
70
57
|
|
|
71
58
|
export function resolveFolderOutputPath(
|
|
72
59
|
inputPath: string,
|
|
73
60
|
inputRoot: string,
|
|
74
61
|
outputRoot: string | undefined,
|
|
75
|
-
|
|
62
|
+
extension: string
|
|
76
63
|
): string {
|
|
77
64
|
if (!outputRoot) {
|
|
78
|
-
return defaultOutputPath(inputPath,
|
|
65
|
+
return defaultOutputPath(inputPath, extension);
|
|
79
66
|
}
|
|
80
67
|
|
|
81
68
|
const relativePath = relative(inputRoot, inputPath);
|
|
82
69
|
const relativeDir = dirname(relativePath);
|
|
83
70
|
const base = basename(relativePath, extname(relativePath));
|
|
84
|
-
const
|
|
71
|
+
const normalizedExtension = extension.startsWith(".") ? extension.slice(1) : extension;
|
|
72
|
+
const filename = `${base}.${normalizedExtension}`;
|
|
85
73
|
|
|
86
74
|
if (relativeDir === ".") {
|
|
87
75
|
return join(outputRoot, filename);
|
|
@@ -9,9 +9,9 @@ export type ConvertOptions = {
|
|
|
9
9
|
inputPath: string;
|
|
10
10
|
model: string;
|
|
11
11
|
mode: ConversionMode;
|
|
12
|
-
format?: OutputFormat;
|
|
13
12
|
instructions?: string;
|
|
14
13
|
promptText?: string;
|
|
14
|
+
outputExtensionHint?: string;
|
|
15
15
|
};
|
|
16
16
|
|
|
17
17
|
export type ConversionMode = "auto" | "prompt";
|
|
@@ -94,63 +94,40 @@ export async function convertPdf(options: ConvertOptions): Promise<ConvertResult
|
|
|
94
94
|
totalTokens: result.state.usage.totalTokens
|
|
95
95
|
};
|
|
96
96
|
|
|
97
|
-
if (options.mode === "auto"
|
|
97
|
+
if (options.mode === "auto") {
|
|
98
98
|
return { ...parseAutoResponse(rawOutput), usage };
|
|
99
99
|
}
|
|
100
100
|
|
|
101
|
-
|
|
102
|
-
return { format, content: rawOutput, usage };
|
|
101
|
+
return { format: "txt", content: rawOutput, usage };
|
|
103
102
|
}
|
|
104
103
|
|
|
105
104
|
function buildPromptText(options: ConvertOptions): string {
|
|
105
|
+
const outputExtensionHint = normalizeExtensionHint(options.outputExtensionHint);
|
|
106
106
|
if (options.mode === "prompt") {
|
|
107
107
|
if (!options.promptText) {
|
|
108
108
|
throw new Error("promptText is required when mode is 'prompt'.");
|
|
109
109
|
}
|
|
110
110
|
|
|
111
|
-
const promptModeParts = [
|
|
111
|
+
const promptModeParts: string[] = [
|
|
112
112
|
"Apply the following user prompt to the PDF.",
|
|
113
113
|
"Return only the final converted content.",
|
|
114
114
|
`User prompt:\n${options.promptText}`
|
|
115
115
|
];
|
|
116
116
|
|
|
117
|
-
if (
|
|
118
|
-
promptModeParts.push(
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
117
|
+
if (outputExtensionHint) {
|
|
118
|
+
promptModeParts.push(
|
|
119
|
+
[
|
|
120
|
+
`Output file extension hint: .${outputExtensionHint}.`,
|
|
121
|
+
"Prefer content that is practical for saving under this extension.",
|
|
122
|
+
"Treat this as guidance and still follow the user prompt exactly."
|
|
123
|
+
].join(" ")
|
|
124
|
+
);
|
|
123
125
|
}
|
|
124
126
|
|
|
125
127
|
return promptModeParts.join("\n\n");
|
|
126
128
|
}
|
|
127
129
|
|
|
128
|
-
|
|
129
|
-
return withAdditionalInstructions(
|
|
130
|
-
[
|
|
131
|
-
"Convert this PDF into clean GitHub-flavored Markdown.",
|
|
132
|
-
"Preserve headings, paragraphs, lists, and tables.",
|
|
133
|
-
"Render tables as Markdown pipe tables with header separators.",
|
|
134
|
-
"If cells are empty due to merged cells, keep the table readable and consistent.",
|
|
135
|
-
"Return only Markdown without code fences."
|
|
136
|
-
].join(" "),
|
|
137
|
-
options.instructions
|
|
138
|
-
);
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
if (options.format === "txt") {
|
|
142
|
-
return withAdditionalInstructions(
|
|
143
|
-
[
|
|
144
|
-
"Convert this PDF into clean plain text.",
|
|
145
|
-
"Preserve reading order and paragraph boundaries.",
|
|
146
|
-
"Represent tables in readable plain text (no Markdown syntax).",
|
|
147
|
-
"Return plain text only and do not use Markdown syntax or code fences."
|
|
148
|
-
].join(" "),
|
|
149
|
-
options.instructions
|
|
150
|
-
);
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
return withAdditionalInstructions(
|
|
130
|
+
let autoPrompt = withAdditionalInstructions(
|
|
154
131
|
[
|
|
155
132
|
"Decide the best output format for this PDF: Markdown ('md') or plain text ('txt').",
|
|
156
133
|
"Choose 'md' for documents with meaningful headings, lists, and tables that benefit from Markdown.",
|
|
@@ -163,6 +140,18 @@ function buildPromptText(options: ConvertOptions): string {
|
|
|
163
140
|
].join("\n"),
|
|
164
141
|
options.instructions
|
|
165
142
|
);
|
|
143
|
+
|
|
144
|
+
if (outputExtensionHint) {
|
|
145
|
+
autoPrompt = `${autoPrompt}\n\n${
|
|
146
|
+
[
|
|
147
|
+
`Output file extension hint: .${outputExtensionHint}.`,
|
|
148
|
+
"Prefer content that is practical for that extension while still returning JSON with format='md' or 'txt'.",
|
|
149
|
+
"This is guidance only and should not break the required JSON schema."
|
|
150
|
+
].join(" ")
|
|
151
|
+
}`;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
return autoPrompt;
|
|
166
155
|
}
|
|
167
156
|
|
|
168
157
|
function withAdditionalInstructions(base: string, additional?: string): string {
|
|
@@ -173,6 +162,15 @@ function withAdditionalInstructions(base: string, additional?: string): string {
|
|
|
173
162
|
return `${base}\n\nAdditional user instructions:\n${additional}`;
|
|
174
163
|
}
|
|
175
164
|
|
|
165
|
+
function normalizeExtensionHint(extension: string | undefined): string | undefined {
|
|
166
|
+
if (!extension) {
|
|
167
|
+
return undefined;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const normalized = extension.trim().replace(/^\.+/, "");
|
|
171
|
+
return normalized || undefined;
|
|
172
|
+
}
|
|
173
|
+
|
|
176
174
|
function parseAutoResponse(rawOutput: string): Omit<ConvertResult, "usage"> {
|
|
177
175
|
let candidate = rawOutput.trim();
|
|
178
176
|
|
package/test/cliHelpers.test.ts
CHANGED
|
@@ -8,29 +8,25 @@ import {
|
|
|
8
8
|
looksLikeFileOutput,
|
|
9
9
|
parseConcurrency,
|
|
10
10
|
parseFormat,
|
|
11
|
-
parseMode,
|
|
12
11
|
resolveFolderOutputPath,
|
|
13
12
|
truncate,
|
|
14
13
|
validateOptionCombination,
|
|
15
14
|
type CliOptions
|
|
16
15
|
} from "../src/cliHelpers.js";
|
|
17
16
|
|
|
18
|
-
test("parseMode accepts valid values", () => {
|
|
19
|
-
assert.equal(parseMode("auto"), "auto");
|
|
20
|
-
assert.equal(parseMode("prompt"), "prompt");
|
|
21
|
-
});
|
|
22
|
-
|
|
23
|
-
test("parseMode rejects invalid values", () => {
|
|
24
|
-
assert.throws(() => parseMode("invalid"), InvalidArgumentError);
|
|
25
|
-
});
|
|
26
|
-
|
|
27
17
|
test("parseFormat accepts valid values", () => {
|
|
28
18
|
assert.equal(parseFormat("md"), "md");
|
|
29
19
|
assert.equal(parseFormat("txt"), "txt");
|
|
20
|
+
assert.equal(parseFormat("csv"), "csv");
|
|
21
|
+
assert.equal(parseFormat(".json"), "json");
|
|
22
|
+
assert.equal(parseFormat("tar.gz"), "tar.gz");
|
|
30
23
|
});
|
|
31
24
|
|
|
32
25
|
test("parseFormat rejects invalid values", () => {
|
|
33
|
-
assert.throws(() => parseFormat("
|
|
26
|
+
assert.throws(() => parseFormat(""), InvalidArgumentError);
|
|
27
|
+
assert.throws(() => parseFormat(" "), InvalidArgumentError);
|
|
28
|
+
assert.throws(() => parseFormat("../json"), InvalidArgumentError);
|
|
29
|
+
assert.throws(() => parseFormat("a/b"), InvalidArgumentError);
|
|
34
30
|
});
|
|
35
31
|
|
|
36
32
|
test("parseConcurrency accepts in-range integers", () => {
|
|
@@ -45,48 +41,47 @@ test("parseConcurrency rejects invalid values", () => {
|
|
|
45
41
|
assert.throws(() => parseConcurrency("abc"), InvalidArgumentError);
|
|
46
42
|
});
|
|
47
43
|
|
|
48
|
-
test("validateOptionCombination
|
|
44
|
+
test("validateOptionCombination allows default auto behavior without prompt flags", () => {
|
|
49
45
|
const base: CliOptions = {
|
|
50
|
-
model: "gpt-4o-mini"
|
|
51
|
-
mode: "prompt"
|
|
46
|
+
model: "gpt-4o-mini"
|
|
52
47
|
};
|
|
53
48
|
|
|
54
|
-
assert.
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
49
|
+
assert.doesNotThrow(() => validateOptionCombination(base));
|
|
50
|
+
assert.doesNotThrow(() => validateOptionCombination({ ...base, instructions: "Extra formatting rules" }));
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
test("validateOptionCombination treats --prompt and --prompt-file as mutually exclusive", () => {
|
|
54
|
+
const base: CliOptions = {
|
|
55
|
+
model: "gpt-4o-mini"
|
|
56
|
+
};
|
|
57
|
+
|
|
58
|
+
assert.doesNotThrow(() => validateOptionCombination({ ...base, prompt: "Convert" }));
|
|
59
59
|
assert.doesNotThrow(() => validateOptionCombination({ ...base, promptFile: "./prompt.txt" }));
|
|
60
60
|
assert.throws(
|
|
61
61
|
() => validateOptionCombination({ ...base, prompt: "x", promptFile: "./prompt.txt" }),
|
|
62
|
-
/
|
|
63
|
-
);
|
|
64
|
-
assert.throws(
|
|
65
|
-
() => validateOptionCombination({ ...base, prompt: "x", instructions: "Extra" }),
|
|
66
|
-
/--instructions is only supported in auto mode\./
|
|
62
|
+
/Use exactly one of --prompt or --prompt-file\./
|
|
67
63
|
);
|
|
68
64
|
});
|
|
69
65
|
|
|
70
|
-
test("validateOptionCombination rejects prompt flags
|
|
66
|
+
test("validateOptionCombination rejects --instructions with prompt flags", () => {
|
|
71
67
|
const base: CliOptions = {
|
|
72
|
-
model: "gpt-4o-mini"
|
|
73
|
-
mode: "auto"
|
|
68
|
+
model: "gpt-4o-mini"
|
|
74
69
|
};
|
|
75
70
|
|
|
76
|
-
assert.doesNotThrow(() => validateOptionCombination(base));
|
|
77
71
|
assert.throws(
|
|
78
|
-
() => validateOptionCombination({ ...base, prompt: "
|
|
79
|
-
/--
|
|
72
|
+
() => validateOptionCombination({ ...base, prompt: "x", instructions: "Extra" }),
|
|
73
|
+
/--instructions cannot be combined with --prompt or --prompt-file\./
|
|
80
74
|
);
|
|
81
75
|
assert.throws(
|
|
82
|
-
() => validateOptionCombination({ ...base, promptFile: "./prompt.txt" }),
|
|
83
|
-
/--
|
|
76
|
+
() => validateOptionCombination({ ...base, promptFile: "./prompt.txt", instructions: "Extra" }),
|
|
77
|
+
/--instructions cannot be combined with --prompt or --prompt-file\./
|
|
84
78
|
);
|
|
85
79
|
});
|
|
86
80
|
|
|
87
81
|
test("defaultOutputPath replaces .pdf extension and appends for other files", () => {
|
|
88
82
|
assert.equal(defaultOutputPath("/tmp/input.pdf", "md"), "/tmp/input.md");
|
|
89
83
|
assert.equal(defaultOutputPath("/tmp/input.PDF", "txt"), "/tmp/input.txt");
|
|
84
|
+
assert.equal(defaultOutputPath("/tmp/input.pdf", ".csv"), "/tmp/input.csv");
|
|
90
85
|
assert.equal(defaultOutputPath("/tmp/input", "md"), "/tmp/input.md");
|
|
91
86
|
});
|
|
92
87
|
|
|
@@ -105,6 +100,11 @@ test("resolveFolderOutputPath preserves nested structure when output root is set
|
|
|
105
100
|
resolveFolderOutputPath("/data/invoices/file.pdf", "/data/invoices", "/exports", "txt"),
|
|
106
101
|
"/exports/file.txt"
|
|
107
102
|
);
|
|
103
|
+
|
|
104
|
+
assert.equal(
|
|
105
|
+
resolveFolderOutputPath("/data/invoices/file.pdf", "/data/invoices", "/exports", ".csv"),
|
|
106
|
+
"/exports/file.csv"
|
|
107
|
+
);
|
|
108
108
|
});
|
|
109
109
|
|
|
110
110
|
test("resolveFolderOutputPath falls back to default path when no output root", () => {
|