plasalid 0.8.0 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli/commands/scan.js +69 -10
- package/dist/cli/commands/status.js +15 -2
- package/dist/cli/ink/ScanDashboard.d.ts +1 -1
- package/dist/cli/ink/ScanDashboard.js +11 -2
- package/dist/cli/setup.js +8 -2
- package/dist/config.js +1 -1
- package/dist/scanner/clarifier.d.ts +2 -0
- package/dist/scanner/clarifier.js +1 -0
- package/dist/scanner/concurrency.d.ts +9 -2
- package/dist/scanner/concurrency.js +3 -1
- package/dist/scanner/engine.d.ts +2 -1
- package/dist/scanner/engine.js +21 -3
- package/dist/scanner/hooks.d.ts +6 -0
- package/dist/scanner/parse.js +28 -16
- package/dist/scanner/worker.d.ts +6 -0
- package/dist/scanner/worker.js +12 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -157,7 +157,7 @@ OPENAI_MODEL= # default: gpt-5.4-mini
|
|
|
157
157
|
|
|
158
158
|
# Google Gemini
|
|
159
159
|
GEMINI_API_KEY= # required when provider is gemini
|
|
160
|
-
GEMINI_MODEL= # default: gemini-
|
|
160
|
+
GEMINI_MODEL= # default: gemini-2.5-pro
|
|
161
161
|
|
|
162
162
|
# OpenAI-compatible (LM Studio, Ollama, vLLM, etc.)
|
|
163
163
|
OPENAI_COMPAT_BASE_URL= # e.g. http://localhost:1234/v1
|
|
@@ -3,6 +3,12 @@ import { getDb } from "../../db/connection.js";
|
|
|
3
3
|
import { runScan } from "../../scanner/engine.js";
|
|
4
4
|
import { getActiveModel } from "../../config.js";
|
|
5
5
|
import { getProvider } from "../../ai/providers/index.js";
|
|
6
|
+
import { AbortedError } from "../../ai/errors.js";
|
|
7
|
+
/** Show the cursor — always safe; mirrors the TTY mount-time hide. */
|
|
8
|
+
function restoreTerminal() {
|
|
9
|
+
if (process.stdout.isTTY)
|
|
10
|
+
process.stdout.write("\x1b[?25h");
|
|
11
|
+
}
|
|
6
12
|
export async function runScanCommand(opts) {
|
|
7
13
|
if (opts.regex) {
|
|
8
14
|
try {
|
|
@@ -16,17 +22,46 @@ export async function runScanCommand(opts) {
|
|
|
16
22
|
}
|
|
17
23
|
const parallel = opts.parallel ?? 5;
|
|
18
24
|
const isTTY = !!process.stdout.isTTY;
|
|
19
|
-
const
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
const controller = new AbortController();
|
|
26
|
+
let sigintCount = 0;
|
|
27
|
+
const onSigint = () => {
|
|
28
|
+
sigintCount++;
|
|
29
|
+
if (sigintCount === 1) {
|
|
30
|
+
controller.abort();
|
|
31
|
+
return;
|
|
32
|
+
}
|
|
33
|
+
restoreTerminal();
|
|
34
|
+
process.exit(130);
|
|
35
|
+
};
|
|
36
|
+
process.on("SIGINT", onSigint);
|
|
37
|
+
const hooks = isTTY
|
|
38
|
+
? await buildTtyHooks(controller.signal)
|
|
39
|
+
: buildPlainHooks(controller.signal);
|
|
40
|
+
try {
|
|
41
|
+
const result = await runScan(getDb(), {
|
|
42
|
+
regex: opts.regex,
|
|
43
|
+
force: opts.force,
|
|
44
|
+
interactive: true,
|
|
45
|
+
maxFileWorkers: parallel,
|
|
46
|
+
}, hooks, controller.signal);
|
|
47
|
+
renderSummary(result.state);
|
|
48
|
+
}
|
|
49
|
+
catch (err) {
|
|
50
|
+
if (err instanceof AbortedError) {
|
|
51
|
+
restoreTerminal();
|
|
52
|
+
console.log("");
|
|
53
|
+
console.log(chalk.yellow("scan cancelled. anything committed before cancel stays in the database (run `scan --force` or `revert`)."));
|
|
54
|
+
process.exitCode = 130;
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
throw err;
|
|
58
|
+
}
|
|
59
|
+
finally {
|
|
60
|
+
process.removeListener("SIGINT", onSigint);
|
|
61
|
+
}
|
|
27
62
|
}
|
|
28
63
|
/* TTY mode — Ink dashboard with one in-place row per file. */
|
|
29
|
-
async function buildTtyHooks() {
|
|
64
|
+
async function buildTtyHooks(signal) {
|
|
30
65
|
const { render } = await import("ink");
|
|
31
66
|
const { createElement } = await import("react");
|
|
32
67
|
const { ScanDashboard, createScanDashboardController } = await import("../ink/ScanDashboard.js");
|
|
@@ -34,6 +69,14 @@ async function buildTtyHooks() {
|
|
|
34
69
|
let inkInstance = null;
|
|
35
70
|
let unsubscribeProgress = null;
|
|
36
71
|
const chunkLookup = new Map();
|
|
72
|
+
// Surface cancellation through Ink's controller, not raw stdout — writing
|
|
73
|
+
// to stdout while Ink is rendering corrupts its frame tracking and leaves
|
|
74
|
+
// a phantom copy of the header in scrollback. once:true so the listener
|
|
75
|
+
// self-removes without leaking past the scan run.
|
|
76
|
+
const onAbortEvt = () => {
|
|
77
|
+
controller.publish({ type: "phase-set", phase: "cancelling" });
|
|
78
|
+
};
|
|
79
|
+
signal.addEventListener("abort", onAbortEvt, { once: true });
|
|
37
80
|
return {
|
|
38
81
|
afterDecrypt: (s) => {
|
|
39
82
|
const total = s.decrypted.length + s.skipped.length + s.failed.length;
|
|
@@ -117,6 +160,18 @@ async function buildTtyHooks() {
|
|
|
117
160
|
inkInstance = null;
|
|
118
161
|
process.stdout.write("\x1b[?25h");
|
|
119
162
|
},
|
|
163
|
+
/**
|
|
164
|
+
* Cancellation lands here when the user hits Ctrl+C mid-scan. Drop the
|
|
165
|
+
* Ink dashboard immediately so subsequent stderr lines aren't trapped
|
|
166
|
+
* under its render, and restore the cursor we hid at mount time.
|
|
167
|
+
*/
|
|
168
|
+
onAbort: () => {
|
|
169
|
+
unsubscribeProgress?.();
|
|
170
|
+
unsubscribeProgress = null;
|
|
171
|
+
inkInstance?.unmount();
|
|
172
|
+
inkInstance = null;
|
|
173
|
+
process.stdout.write("\x1b[?25h");
|
|
174
|
+
},
|
|
120
175
|
};
|
|
121
176
|
}
|
|
122
177
|
const FINALIZE_RULES = [
|
|
@@ -135,10 +190,14 @@ function classifyFinalize(t) {
|
|
|
135
190
|
return r.kind;
|
|
136
191
|
return "partial";
|
|
137
192
|
}
|
|
138
|
-
function buildPlainHooks() {
|
|
193
|
+
function buildPlainHooks(signal) {
|
|
139
194
|
const tallies = new Map();
|
|
140
195
|
const fileIdByChunkId = new Map();
|
|
141
196
|
let unsubscribeProgress = null;
|
|
197
|
+
// No Ink in this mode, so writing one dim line directly is safe.
|
|
198
|
+
signal.addEventListener("abort", () => {
|
|
199
|
+
console.log(chalk.dim("Cancelling… waiting for in-flight work."));
|
|
200
|
+
}, { once: true });
|
|
142
201
|
const finalize = (fileId) => {
|
|
143
202
|
const t = tallies.get(fileId);
|
|
144
203
|
if (!t || t.completedChunks + t.failedChunks < t.totalChunks)
|
|
@@ -6,6 +6,7 @@ import { getRecurringSummary } from "../../db/queries/recurrences.js";
|
|
|
6
6
|
import { countScannedFiles } from "../../db/queries/files.js";
|
|
7
7
|
import { countQuestions } from "../../db/queries/questions.js";
|
|
8
8
|
import { countMemories } from "../../ai/memory.js";
|
|
9
|
+
import { config, getActiveModel } from "../../config.js";
|
|
9
10
|
import { formatAmount } from "../../currency.js";
|
|
10
11
|
import { visibleLength } from "../format.js";
|
|
11
12
|
const LABEL_WIDTH = 18;
|
|
@@ -14,6 +15,8 @@ export function showStatus() {
|
|
|
14
15
|
printSection("Financial", financialRows(db));
|
|
15
16
|
console.log("");
|
|
16
17
|
printSection("System", systemRows(db));
|
|
18
|
+
console.log("");
|
|
19
|
+
printSection("Model", modelRows(), { align: "left" });
|
|
17
20
|
}
|
|
18
21
|
function financialRows(db) {
|
|
19
22
|
const nw = getNetWorth(db);
|
|
@@ -72,7 +75,14 @@ function systemRows(db) {
|
|
|
72
75
|
}
|
|
73
76
|
return rows;
|
|
74
77
|
}
|
|
75
|
-
function
|
|
78
|
+
function modelRows() {
|
|
79
|
+
return [
|
|
80
|
+
{ label: "Provider", value: config.providerType },
|
|
81
|
+
{ label: "Model", value: getActiveModel() },
|
|
82
|
+
];
|
|
83
|
+
}
|
|
84
|
+
function printSection(title, rows, opts) {
|
|
85
|
+
const align = opts?.align ?? "right";
|
|
76
86
|
console.log(chalk.bold(title));
|
|
77
87
|
console.log(chalk.dim("─".repeat(title.length)));
|
|
78
88
|
const valueWidth = Math.max(0, ...rows.map((r) => visibleLength(r.value)));
|
|
@@ -80,7 +90,10 @@ function printSection(title, rows) {
|
|
|
80
90
|
const label = row.label.padEnd(LABEL_WIDTH);
|
|
81
91
|
const valuePad = " ".repeat(Math.max(0, valueWidth - visibleLength(row.value)));
|
|
82
92
|
const suffix = row.suffix ? ` ${row.suffix}` : "";
|
|
83
|
-
|
|
93
|
+
const body = align === "left"
|
|
94
|
+
? `${row.value}${valuePad}${suffix}`
|
|
95
|
+
: `${valuePad}${row.value}${suffix}`;
|
|
96
|
+
console.log(` ${label}${body}`);
|
|
84
97
|
}
|
|
85
98
|
}
|
|
86
99
|
function formatInteger(n) {
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Events the CLI publishes into the dashboard. The CLI subscribes to the
|
|
3
3
|
* scanner's ScanProgress sink and routes per-chunk ticks here via chunkLookup.
|
|
4
4
|
*/
|
|
5
|
-
export type CurrentPhase = "parse" | "clarify" | "done";
|
|
5
|
+
export type CurrentPhase = "parse" | "clarify" | "cancelling" | "done";
|
|
6
6
|
export type DashboardEvent = {
|
|
7
7
|
type: "chunk-start";
|
|
8
8
|
fileId: string;
|
|
@@ -34,11 +34,14 @@ export function ScanDashboard(props) {
|
|
|
34
34
|
const rows = useFileGroups(props.controller, props.files);
|
|
35
35
|
const phase = usePhase(props.controller);
|
|
36
36
|
const ruleWidth = useRuleWidth();
|
|
37
|
-
return (_jsxs(Box, { flexDirection: "column", children: [_jsx(Header, { phase: phase }), _jsx(AttachmentLine, { info: props.attachment }), _jsx(Box, { marginTop: 1, children: _jsx(ColumnHeader, {}) }), _jsx(Divider, { width: ruleWidth }), Array.from(rows.entries()).map(([fileId, group]) => (_jsx(FileGroupView, { group: group }, fileId))), _jsx(Divider, { width: ruleWidth })] }));
|
|
37
|
+
return (_jsxs(Box, { flexDirection: "column", children: [_jsx(Header, { phase: phase }), _jsx(AttachmentLine, { info: props.attachment }), _jsx(Box, { marginTop: 1, children: _jsx(ColumnHeader, {}) }), _jsx(Divider, { width: ruleWidth }), Array.from(rows.entries()).map(([fileId, group]) => (_jsx(FileGroupView, { group: group }, fileId))), _jsx(Divider, { width: ruleWidth }), phase !== "done" && _jsx(Footnote, {})] }));
|
|
38
|
+
}
|
|
39
|
+
function Footnote() {
|
|
40
|
+
return (_jsxs(Box, { flexDirection: "column", children: [_jsx(Text, { dimColor: true, children: "output accuracy depends on the model's VL capability." }), _jsxs(Text, { children: [_jsx(Text, { dimColor: true, children: "we also provide " }), _jsx(Text, { color: "cyan", children: "clarify" }), _jsx(Text, { dimColor: true, children: ", " }), _jsx(Text, { color: "cyan", children: "record" }), _jsx(Text, { dimColor: true, children: ", and " }), _jsx(Text, { color: "cyan", children: "chat" }), _jsx(Text, { dimColor: true, children: " to rectify the data later." })] })] }));
|
|
38
41
|
}
|
|
39
42
|
function AttachmentLine({ info }) {
|
|
40
43
|
const detail = info.format === "pdf" ? "pdf (native)" : "png (rasterized)";
|
|
41
|
-
return (_jsxs(Text, { dimColor: true, children: ["sending: ", detail, "
|
|
44
|
+
return (_jsxs(Text, { dimColor: true, children: ["sending: ", detail, " (", info.providerName, "/", info.modelName, ")"] }));
|
|
42
45
|
}
|
|
43
46
|
function usePhase(controller) {
|
|
44
47
|
const [phase, setPhase] = useState("parse");
|
|
@@ -78,6 +81,12 @@ function phaseStateOf(label, current) {
|
|
|
78
81
|
return "pending";
|
|
79
82
|
}
|
|
80
83
|
function Header({ phase }) {
|
|
84
|
+
// Cancellation collapses the parse/clarify segments — neither is still
|
|
85
|
+
// running once the user hits Ctrl+C, and showing them as "pending" would
|
|
86
|
+
// be misleading. The single "cancelling…" label communicates the wind-down.
|
|
87
|
+
if (phase === "cancelling") {
|
|
88
|
+
return (_jsxs(Text, { children: [_jsx(Text, { bold: true, children: "Scanner" }), _jsx(Text, { dimColor: true, children: " · " }), _jsx(Text, { color: "green", children: "\u2713 decrypt" }), _jsx(Text, { dimColor: true, children: " -> " }), _jsx(Text, { color: "green", children: "\u2713 chunk" }), _jsx(Text, { dimColor: true, children: " -> " }), _jsxs(Text, { color: "red", children: [_jsx(Spinner, { type: "dots" }), " cancelling\u2026"] })] }));
|
|
89
|
+
}
|
|
81
90
|
return (_jsxs(Text, { children: [_jsx(Text, { bold: true, children: "Scanner" }), _jsx(Text, { dimColor: true, children: " · " }), _jsx(Text, { color: "green", children: "\u2713 decrypt" }), _jsx(Text, { dimColor: true, children: " -> " }), _jsx(Text, { color: "green", children: "\u2713 chunk" }), _jsx(Text, { dimColor: true, children: " -> " }), PHASE_RENDER[phaseStateOf("parse", phase)]("parse"), _jsx(Text, { dimColor: true, children: " -> " }), PHASE_RENDER[phaseStateOf("clarify", phase)]("clarify")] }));
|
|
82
91
|
}
|
|
83
92
|
function ColumnHeader() {
|
package/dist/cli/setup.js
CHANGED
|
@@ -11,7 +11,7 @@ const DEFAULT_LOCAL_OPENAI_BASE_URL = "http://localhost:11434/v1";
|
|
|
11
11
|
const RECOMMENDED_MODEL = {
|
|
12
12
|
anthropic: "claude-sonnet-4-6",
|
|
13
13
|
openai: "gpt-5.4-mini",
|
|
14
|
-
gemini: "gemini-
|
|
14
|
+
gemini: "gemini-2.5-pro",
|
|
15
15
|
};
|
|
16
16
|
function ensureDir(p) {
|
|
17
17
|
if (!existsSync(p))
|
|
@@ -127,9 +127,15 @@ async function promptModelInput(vendor) {
|
|
|
127
127
|
const carriedOver = savedModelFor(vendor);
|
|
128
128
|
const recommended = vendor === "openai-compat" ? "" : RECOMMENDED_MODEL[vendor];
|
|
129
129
|
const defaultValue = carriedOver || recommended;
|
|
130
|
+
// openai-compat has no single recommended model — the scanner rasterizes
|
|
131
|
+
// PDFs to PNG on this path, so any non-vision model will fail on scan. Steer
|
|
132
|
+
// the user toward a vision-language model in the prompt.
|
|
133
|
+
const message = vendor === "openai-compat"
|
|
134
|
+
? "Which AI model? (use a vision-language model)"
|
|
135
|
+
: `Which AI model? (recommended: ${RECOMMENDED_MODEL[vendor]})`;
|
|
130
136
|
return inputPrompt({
|
|
131
137
|
name: "model",
|
|
132
|
-
message
|
|
138
|
+
message,
|
|
133
139
|
default: defaultValue || undefined,
|
|
134
140
|
validate: (v) => v.trim().length > 0 || "Required",
|
|
135
141
|
});
|
package/dist/config.js
CHANGED
|
@@ -39,7 +39,7 @@ function buildConfig() {
|
|
|
39
39
|
openaiCompatBaseURL: process.env.OPENAI_COMPAT_BASE_URL || file.openaiCompatBaseURL || "",
|
|
40
40
|
openaiCompatModel: process.env.OPENAI_COMPAT_MODEL || file.openaiCompatModel || "",
|
|
41
41
|
geminiKey: process.env.GEMINI_API_KEY || file.geminiKey || "",
|
|
42
|
-
geminiModel: process.env.GEMINI_MODEL || file.geminiModel || "gemini-
|
|
42
|
+
geminiModel: process.env.GEMINI_MODEL || file.geminiModel || "gemini-2.5-pro",
|
|
43
43
|
displayLocale: file.displayLocale || "th-TH",
|
|
44
44
|
displayCurrency: file.displayCurrency || "THB",
|
|
45
45
|
dbPath: process.env.PLASALID_DB_PATH ||
|
|
@@ -28,6 +28,8 @@ export interface RunClarifyOpts {
|
|
|
28
28
|
toolCount: number;
|
|
29
29
|
elapsedMs: number;
|
|
30
30
|
}) => void;
|
|
31
|
+
/** When set and aborted, runClarify stops between passes/questions. */
|
|
32
|
+
signal?: AbortSignal;
|
|
31
33
|
}
|
|
32
34
|
export declare const CLARIFIER_PASSES: readonly ClarifierPass[];
|
|
33
35
|
/**
|
|
@@ -4,8 +4,15 @@
|
|
|
4
4
|
* never aborts the rest — its slot settles as `{ ok: false, error }` and the
|
|
5
5
|
* caller decides what to do.
|
|
6
6
|
*
|
|
7
|
+
* Pass `signal` to make the pool cancellation-aware: when it aborts, no new
|
|
8
|
+
* task is claimed (tasks already running aren't interrupted — their own
|
|
9
|
+
* signal-aware work is expected to react). Unclaimed slots stay `undefined`
|
|
10
|
+
* in the returned array; the caller can spot them by checking length vs the
|
|
11
|
+
* filled entries.
|
|
12
|
+
*
|
|
7
13
|
* No new dependency. Simple worker-pool: kicks off up to `n` tasks, then each
|
|
8
|
-
* worker pulls the next index from a shared cursor until the queue is drained
|
|
14
|
+
* worker pulls the next index from a shared cursor until the queue is drained
|
|
15
|
+
* or the signal aborts.
|
|
9
16
|
*/
|
|
10
17
|
export type Settled<T> = {
|
|
11
18
|
ok: true;
|
|
@@ -14,4 +21,4 @@ export type Settled<T> = {
|
|
|
14
21
|
ok: false;
|
|
15
22
|
error: unknown;
|
|
16
23
|
};
|
|
17
|
-
export declare function runWithConcurrency<T>(tasks: Array<() => Promise<T>>, n: number): Promise<Settled<T>[]>;
|
|
24
|
+
export declare function runWithConcurrency<T>(tasks: Array<() => Promise<T>>, n: number, signal?: AbortSignal): Promise<Settled<T>[]>;
|
|
@@ -1,9 +1,11 @@
|
|
|
1
|
-
export async function runWithConcurrency(tasks, n) {
|
|
1
|
+
export async function runWithConcurrency(tasks, n, signal) {
|
|
2
2
|
const results = new Array(tasks.length);
|
|
3
3
|
const workerCount = Math.max(1, Math.min(n, tasks.length));
|
|
4
4
|
let cursor = 0;
|
|
5
5
|
async function worker() {
|
|
6
6
|
while (cursor < tasks.length) {
|
|
7
|
+
if (signal?.aborted)
|
|
8
|
+
return;
|
|
7
9
|
const index = cursor++;
|
|
8
10
|
try {
|
|
9
11
|
results[index] = { ok: true, value: await tasks[index]() };
|
package/dist/scanner/engine.d.ts
CHANGED
|
@@ -65,6 +65,7 @@ export interface ScanState {
|
|
|
65
65
|
readonly startedAt: number;
|
|
66
66
|
readonly options: RunScanOptions;
|
|
67
67
|
readonly progress: ScanProgress;
|
|
68
|
+
readonly signal: AbortSignal;
|
|
68
69
|
files: ScannedFile[];
|
|
69
70
|
decrypted: DecryptedFile[];
|
|
70
71
|
skipped: SkippedFile[];
|
|
@@ -87,4 +88,4 @@ export declare const DEFAULT_PHASES: readonly {
|
|
|
87
88
|
* through ScanState, then runs the phase chain. Nothing survives between
|
|
88
89
|
* scans.
|
|
89
90
|
*/
|
|
90
|
-
export declare function runScan(db: Database.Database, opts?: RunScanOptions, hooks?: ScanHooks): Promise<ScanResult>;
|
|
91
|
+
export declare function runScan(db: Database.Database, opts?: RunScanOptions, hooks?: ScanHooks, signal?: AbortSignal): Promise<ScanResult>;
|
package/dist/scanner/engine.js
CHANGED
|
@@ -5,6 +5,9 @@ import { parsePhase } from "./parse.js";
|
|
|
5
5
|
import { chunkPdf } from "./pdf/chunker.js";
|
|
6
6
|
import { runClarify } from "./clarifier.js";
|
|
7
7
|
import { errorMessage } from "./result.js";
|
|
8
|
+
import { AbortedError } from "../ai/errors.js";
|
|
9
|
+
/** A signal that never aborts. Used when callers don't pass one. */
|
|
10
|
+
const NEVER_ABORTS = new AbortController().signal;
|
|
8
11
|
const chunkPhase = async (_db, state, hooks) => {
|
|
9
12
|
await hooks.beforeChunk?.(state);
|
|
10
13
|
for (const file of state.decrypted)
|
|
@@ -17,6 +20,7 @@ const clarifyPhase = async (db, state, hooks) => {
|
|
|
17
20
|
db,
|
|
18
21
|
scanId: state.scanId,
|
|
19
22
|
interactive: state.options.interactive ?? true,
|
|
23
|
+
signal: state.signal,
|
|
20
24
|
});
|
|
21
25
|
state.clarifySummary = summary;
|
|
22
26
|
await hooks.afterClarify?.(state, summary);
|
|
@@ -32,7 +36,7 @@ export const DEFAULT_PHASES = [
|
|
|
32
36
|
* through ScanState, then runs the phase chain. Nothing survives between
|
|
33
37
|
* scans.
|
|
34
38
|
*/
|
|
35
|
-
export async function runScan(db, opts = {}, hooks = {}) {
|
|
39
|
+
export async function runScan(db, opts = {}, hooks = {}, signal = NEVER_ABORTS) {
|
|
36
40
|
const scanId = `sc:${randomUUID()}`;
|
|
37
41
|
const progress = createProgress();
|
|
38
42
|
const state = {
|
|
@@ -40,6 +44,7 @@ export async function runScan(db, opts = {}, hooks = {}) {
|
|
|
40
44
|
startedAt: Date.now(),
|
|
41
45
|
options: opts,
|
|
42
46
|
progress,
|
|
47
|
+
signal,
|
|
43
48
|
files: [],
|
|
44
49
|
decrypted: [],
|
|
45
50
|
skipped: [],
|
|
@@ -50,8 +55,19 @@ export async function runScan(db, opts = {}, hooks = {}) {
|
|
|
50
55
|
};
|
|
51
56
|
await fire(hooks.onStart, state);
|
|
52
57
|
const phases = opts.phases ?? DEFAULT_PHASES;
|
|
53
|
-
|
|
54
|
-
|
|
58
|
+
try {
|
|
59
|
+
await runPhaseChain(db, state, hooks, phases);
|
|
60
|
+
if (state.signal.aborted)
|
|
61
|
+
throw new AbortedError();
|
|
62
|
+
}
|
|
63
|
+
catch (err) {
|
|
64
|
+
if (err instanceof AbortedError)
|
|
65
|
+
await fire(hooks.onAbort, state);
|
|
66
|
+
throw err;
|
|
67
|
+
}
|
|
68
|
+
finally {
|
|
69
|
+
await fire(hooks.onFinish, state);
|
|
70
|
+
}
|
|
55
71
|
return { scanId, state };
|
|
56
72
|
}
|
|
57
73
|
async function runPhaseChain(db, state, hooks, phases) {
|
|
@@ -67,6 +83,8 @@ async function tryPhase(db, state, hooks, name, phase) {
|
|
|
67
83
|
return false;
|
|
68
84
|
}
|
|
69
85
|
catch (err) {
|
|
86
|
+
if (err instanceof AbortedError)
|
|
87
|
+
throw err;
|
|
70
88
|
state.errors.push({ phase: name, error: err });
|
|
71
89
|
await fire(hooks.onError, err, name, state);
|
|
72
90
|
return true;
|
package/dist/scanner/hooks.d.ts
CHANGED
|
@@ -19,5 +19,11 @@ export interface ScanHooks {
|
|
|
19
19
|
beforeClarify?(s: Readonly<ScanState>): MaybePromise<void>;
|
|
20
20
|
afterClarify?(s: Readonly<ScanState>, summary: ClarifySummary): MaybePromise<void>;
|
|
21
21
|
onError?(err: unknown, phase: PhaseName, s: Readonly<ScanState>): MaybePromise<void>;
|
|
22
|
+
/**
|
|
23
|
+
* Fired when an AbortSignal trip propagates out of any phase. The CLI uses
|
|
24
|
+
* this to unmount Ink and restore the cursor before runScan's promise
|
|
25
|
+
* settles. onFinish still fires after onAbort.
|
|
26
|
+
*/
|
|
27
|
+
onAbort?(s: Readonly<ScanState>): MaybePromise<void>;
|
|
22
28
|
onFinish?(s: Readonly<ScanState>): MaybePromise<void>;
|
|
23
29
|
}
|
package/dist/scanner/parse.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { runWithConcurrency } from "./concurrency.js";
|
|
2
2
|
import { runScanWorker } from "./worker.js";
|
|
3
3
|
import { errorMessage } from "./result.js";
|
|
4
|
-
const
|
|
5
|
-
const
|
|
4
|
+
const MAX_FILE_WORKERS = 5;
|
|
5
|
+
const MAX_SCAN_WORKERS_PER_FILE = 5;
|
|
6
6
|
const HARD_CAP = 8;
|
|
7
7
|
const clamp = (n, fallback) => Math.min(HARD_CAP, Math.max(1, n ?? fallback));
|
|
8
8
|
/**
|
|
@@ -13,35 +13,47 @@ const clamp = (n, fallback) => Math.min(HARD_CAP, Math.max(1, n ?? fallback));
|
|
|
13
13
|
*/
|
|
14
14
|
export async function parsePhase(db, state, hooks) {
|
|
15
15
|
await hooks.beforeParse?.(state);
|
|
16
|
-
const maxFile = clamp(state.options.maxFileWorkers,
|
|
17
|
-
const maxChunk = clamp(state.options.maxScanWorkersPerFile,
|
|
16
|
+
const maxFile = clamp(state.options.maxFileWorkers, MAX_FILE_WORKERS);
|
|
17
|
+
const maxChunk = clamp(state.options.maxScanWorkersPerFile, MAX_SCAN_WORKERS_PER_FILE);
|
|
18
18
|
const fileGroups = state.decrypted
|
|
19
|
-
.map(file => ({
|
|
19
|
+
.map((file) => ({
|
|
20
20
|
fileId: file.path,
|
|
21
21
|
scannedFileId: file.scannedFileId,
|
|
22
|
-
chunks: state.chunks.filter(c => c.fileId === file.path),
|
|
22
|
+
chunks: state.chunks.filter((c) => c.fileId === file.path),
|
|
23
23
|
}))
|
|
24
|
-
.filter(g => g.chunks.length > 0);
|
|
25
|
-
const fileTasks = fileGroups.map(group => () => {
|
|
26
|
-
const chunkTasks = group.chunks.map(chunk => () => runScanWorker({
|
|
24
|
+
.filter((g) => g.chunks.length > 0);
|
|
25
|
+
const fileTasks = fileGroups.map((group) => () => {
|
|
26
|
+
const chunkTasks = group.chunks.map((chunk) => () => runScanWorker({
|
|
27
27
|
db,
|
|
28
28
|
scanId: state.scanId,
|
|
29
29
|
scannedFileId: group.scannedFileId,
|
|
30
30
|
progress: state.progress,
|
|
31
31
|
chunk,
|
|
32
|
+
signal: state.signal,
|
|
32
33
|
}, hooks));
|
|
33
|
-
return runWithConcurrency(chunkTasks, maxChunk);
|
|
34
|
+
return runWithConcurrency(chunkTasks, maxChunk, state.signal);
|
|
34
35
|
});
|
|
35
|
-
const settled = await runWithConcurrency(fileTasks, maxFile);
|
|
36
|
+
const settled = await runWithConcurrency(fileTasks, maxFile, state.signal);
|
|
36
37
|
for (let i = 0; i < settled.length; i++) {
|
|
37
38
|
const r = settled[i];
|
|
38
|
-
if (!r.ok)
|
|
39
|
-
state.errors.push({
|
|
39
|
+
if (r && !r.ok)
|
|
40
|
+
state.errors.push({
|
|
41
|
+
phase: "parse",
|
|
42
|
+
target: fileGroups[i].fileId,
|
|
43
|
+
error: errorMessage(r.error),
|
|
44
|
+
});
|
|
40
45
|
}
|
|
41
|
-
for
|
|
42
|
-
|
|
46
|
+
// Only flip files to "scanned" for groups that actually completed. On abort
|
|
47
|
+
// the pool leaves later groups unclaimed (their settled slot is undefined);
|
|
48
|
+
// those rows stay `pending` so a future re-scan can pick them up. Partial
|
|
49
|
+
// transactions already committed during the run stay (scanner is DB-direct).
|
|
50
|
+
for (let i = 0; i < fileGroups.length; i++) {
|
|
51
|
+
if (!settled[i])
|
|
43
52
|
continue;
|
|
44
|
-
|
|
53
|
+
const sfId = fileGroups[i].scannedFileId;
|
|
54
|
+
if (!sfId)
|
|
55
|
+
continue;
|
|
56
|
+
db.prepare(`UPDATE scanned_files SET status = 'scanned', scanned_at = datetime('now') WHERE id = ?`).run(sfId);
|
|
45
57
|
}
|
|
46
58
|
await hooks.afterParse?.(state);
|
|
47
59
|
}
|
package/dist/scanner/worker.d.ts
CHANGED
|
@@ -8,6 +8,7 @@ export interface ScanWorkerDeps {
|
|
|
8
8
|
readonly scannedFileId: string | undefined;
|
|
9
9
|
readonly progress: ScanProgress;
|
|
10
10
|
readonly chunk: Chunk;
|
|
11
|
+
readonly signal: AbortSignal;
|
|
11
12
|
}
|
|
12
13
|
/**
|
|
13
14
|
* Process one chunk: run the LLM scan agent over a single-page PDF blob with
|
|
@@ -15,5 +16,10 @@ export interface ScanWorkerDeps {
|
|
|
15
16
|
* context. Agent's record_transactions / note_question calls write directly to
|
|
16
17
|
* the DB; per-row ticks fan out via `progress.emit`. Failures land in the DB
|
|
17
18
|
* as a `chunk_failed` question so the clarifier can pick them up.
|
|
19
|
+
*
|
|
20
|
+
* Cancellation entry point: the worker pool stops claiming new chunks when
|
|
21
|
+
* `signal` aborts; in-flight provider calls abort natively via the SDK and
|
|
22
|
+
* surface as a failed tryExecute outcome — we suppress the chunk_failed row
|
|
23
|
+
* in that case (see below) since cancellation isn't a real failure.
|
|
18
24
|
*/
|
|
19
25
|
export declare function runScanWorker(deps: ScanWorkerDeps, hooks: ScanHooks): Promise<void>;
|
package/dist/scanner/worker.js
CHANGED
|
@@ -10,6 +10,11 @@ import { tryExecute } from "./result.js";
|
|
|
10
10
|
* context. Agent's record_transactions / note_question calls write directly to
|
|
11
11
|
* the DB; per-row ticks fan out via `progress.emit`. Failures land in the DB
|
|
12
12
|
* as a `chunk_failed` question so the clarifier can pick them up.
|
|
13
|
+
*
|
|
14
|
+
* Cancellation entry point: the worker pool stops claiming new chunks when
|
|
15
|
+
* `signal` aborts; in-flight provider calls abort natively via the SDK and
|
|
16
|
+
* surface as a failed tryExecute outcome — we suppress the chunk_failed row
|
|
17
|
+
* in that case (see below) since cancellation isn't a real failure.
|
|
13
18
|
*/
|
|
14
19
|
export async function runScanWorker(deps, hooks) {
|
|
15
20
|
const workerId = `cw:${randomUUID()}`;
|
|
@@ -34,10 +39,16 @@ export async function runScanWorker(deps, hooks) {
|
|
|
34
39
|
chunkId: deps.chunk.chunkId,
|
|
35
40
|
progress: deps.progress,
|
|
36
41
|
},
|
|
42
|
+
signal: deps.signal,
|
|
37
43
|
}));
|
|
38
44
|
hooks.onWorkerEnd?.(workerId, deps.chunk, outcome.ok);
|
|
39
|
-
if (!outcome.ok)
|
|
45
|
+
if (!outcome.ok) {
|
|
46
|
+
// A worker whose in-flight call was cancelled by Ctrl+C is not a real
|
|
47
|
+
// failure — don't pollute the questions table with chunk_failed rows.
|
|
48
|
+
if (deps.signal.aborted)
|
|
49
|
+
return;
|
|
40
50
|
recordChunkFailure(deps, outcome.error);
|
|
51
|
+
}
|
|
41
52
|
}
|
|
42
53
|
function recordChunkFailure(deps, error) {
|
|
43
54
|
try {
|