plasalid 0.7.9 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +22 -6
- package/dist/ai/agent.d.ts +1 -0
- package/dist/ai/agent.js +25 -10
- package/dist/ai/provider.d.ts +21 -1
- package/dist/ai/providers/anthropic.d.ts +0 -1
- package/dist/ai/providers/anthropic.js +2 -3
- package/dist/ai/providers/gemini.d.ts +14 -0
- package/dist/ai/providers/gemini.js +188 -0
- package/dist/ai/providers/index.d.ts +2 -1
- package/dist/ai/providers/index.js +23 -8
- package/dist/ai/providers/openai-compat.d.ts +6 -1
- package/dist/ai/providers/openai-compat.js +48 -104
- package/dist/ai/providers/openai-shared.d.ts +26 -0
- package/dist/ai/providers/openai-shared.js +118 -0
- package/dist/ai/providers/openai.d.ts +27 -3
- package/dist/ai/providers/openai.js +142 -91
- package/dist/cli/commands/scan.js +78 -10
- package/dist/cli/commands/status.js +15 -2
- package/dist/cli/ink/ScanDashboard.d.ts +7 -6
- package/dist/cli/ink/ScanDashboard.js +14 -6
- package/dist/cli/setup.js +175 -119
- package/dist/config.d.ts +10 -4
- package/dist/config.js +40 -11
- package/dist/scanner/clarifier.d.ts +2 -0
- package/dist/scanner/clarifier.js +1 -0
- package/dist/scanner/concurrency.d.ts +9 -2
- package/dist/scanner/concurrency.js +3 -1
- package/dist/scanner/engine.d.ts +2 -1
- package/dist/scanner/engine.js +21 -3
- package/dist/scanner/hooks.d.ts +6 -0
- package/dist/scanner/parse.js +28 -16
- package/dist/scanner/pdf/pdf.d.ts +3 -2
- package/dist/scanner/pdf/pdf.js +11 -1
- package/dist/scanner/pdf/rasterize.d.ts +6 -0
- package/dist/scanner/pdf/rasterize.js +36 -0
- package/dist/scanner/worker.d.ts +6 -0
- package/dist/scanner/worker.js +16 -3
- package/package.json +2 -1
package/dist/scanner/hooks.d.ts
CHANGED
|
@@ -19,5 +19,11 @@ export interface ScanHooks {
|
|
|
19
19
|
beforeClarify?(s: Readonly<ScanState>): MaybePromise<void>;
|
|
20
20
|
afterClarify?(s: Readonly<ScanState>, summary: ClarifySummary): MaybePromise<void>;
|
|
21
21
|
onError?(err: unknown, phase: PhaseName, s: Readonly<ScanState>): MaybePromise<void>;
|
|
22
|
+
/**
|
|
23
|
+
* Fired when an AbortSignal trip propagates out of any phase. The CLI uses
|
|
24
|
+
* this to unmount Ink and restore the cursor before runScan's promise
|
|
25
|
+
* settles. onFinish still fires after onAbort.
|
|
26
|
+
*/
|
|
27
|
+
onAbort?(s: Readonly<ScanState>): MaybePromise<void>;
|
|
22
28
|
onFinish?(s: Readonly<ScanState>): MaybePromise<void>;
|
|
23
29
|
}
|
package/dist/scanner/parse.js
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { runWithConcurrency } from "./concurrency.js";
|
|
2
2
|
import { runScanWorker } from "./worker.js";
|
|
3
3
|
import { errorMessage } from "./result.js";
|
|
4
|
-
const
|
|
5
|
-
const
|
|
4
|
+
const MAX_FILE_WORKERS = 5;
|
|
5
|
+
const MAX_SCAN_WORKERS_PER_FILE = 5;
|
|
6
6
|
const HARD_CAP = 8;
|
|
7
7
|
const clamp = (n, fallback) => Math.min(HARD_CAP, Math.max(1, n ?? fallback));
|
|
8
8
|
/**
|
|
@@ -13,35 +13,47 @@ const clamp = (n, fallback) => Math.min(HARD_CAP, Math.max(1, n ?? fallback));
|
|
|
13
13
|
*/
|
|
14
14
|
export async function parsePhase(db, state, hooks) {
|
|
15
15
|
await hooks.beforeParse?.(state);
|
|
16
|
-
const maxFile = clamp(state.options.maxFileWorkers,
|
|
17
|
-
const maxChunk = clamp(state.options.maxScanWorkersPerFile,
|
|
16
|
+
const maxFile = clamp(state.options.maxFileWorkers, MAX_FILE_WORKERS);
|
|
17
|
+
const maxChunk = clamp(state.options.maxScanWorkersPerFile, MAX_SCAN_WORKERS_PER_FILE);
|
|
18
18
|
const fileGroups = state.decrypted
|
|
19
|
-
.map(file => ({
|
|
19
|
+
.map((file) => ({
|
|
20
20
|
fileId: file.path,
|
|
21
21
|
scannedFileId: file.scannedFileId,
|
|
22
|
-
chunks: state.chunks.filter(c => c.fileId === file.path),
|
|
22
|
+
chunks: state.chunks.filter((c) => c.fileId === file.path),
|
|
23
23
|
}))
|
|
24
|
-
.filter(g => g.chunks.length > 0);
|
|
25
|
-
const fileTasks = fileGroups.map(group => () => {
|
|
26
|
-
const chunkTasks = group.chunks.map(chunk => () => runScanWorker({
|
|
24
|
+
.filter((g) => g.chunks.length > 0);
|
|
25
|
+
const fileTasks = fileGroups.map((group) => () => {
|
|
26
|
+
const chunkTasks = group.chunks.map((chunk) => () => runScanWorker({
|
|
27
27
|
db,
|
|
28
28
|
scanId: state.scanId,
|
|
29
29
|
scannedFileId: group.scannedFileId,
|
|
30
30
|
progress: state.progress,
|
|
31
31
|
chunk,
|
|
32
|
+
signal: state.signal,
|
|
32
33
|
}, hooks));
|
|
33
|
-
return runWithConcurrency(chunkTasks, maxChunk);
|
|
34
|
+
return runWithConcurrency(chunkTasks, maxChunk, state.signal);
|
|
34
35
|
});
|
|
35
|
-
const settled = await runWithConcurrency(fileTasks, maxFile);
|
|
36
|
+
const settled = await runWithConcurrency(fileTasks, maxFile, state.signal);
|
|
36
37
|
for (let i = 0; i < settled.length; i++) {
|
|
37
38
|
const r = settled[i];
|
|
38
|
-
if (!r.ok)
|
|
39
|
-
state.errors.push({
|
|
39
|
+
if (r && !r.ok)
|
|
40
|
+
state.errors.push({
|
|
41
|
+
phase: "parse",
|
|
42
|
+
target: fileGroups[i].fileId,
|
|
43
|
+
error: errorMessage(r.error),
|
|
44
|
+
});
|
|
40
45
|
}
|
|
41
|
-
for
|
|
42
|
-
|
|
46
|
+
// Only flip files to "scanned" for groups that actually completed. On abort
|
|
47
|
+
// the pool leaves later groups unclaimed (their settled slot is undefined);
|
|
48
|
+
// those rows stay `pending` so a future re-scan can pick them up. Partial
|
|
49
|
+
// transactions already committed during the run stay (scanner is DB-direct).
|
|
50
|
+
for (let i = 0; i < fileGroups.length; i++) {
|
|
51
|
+
if (!settled[i])
|
|
43
52
|
continue;
|
|
44
|
-
|
|
53
|
+
const sfId = fileGroups[i].scannedFileId;
|
|
54
|
+
if (!sfId)
|
|
55
|
+
continue;
|
|
56
|
+
db.prepare(`UPDATE scanned_files SET status = 'scanned', scanned_at = datetime('now') WHERE id = ?`).run(sfId);
|
|
45
57
|
}
|
|
46
58
|
await hooks.afterParse?.(state);
|
|
47
59
|
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import type { DocumentBlock } from "../../ai/provider.js";
|
|
1
|
+
import type { DocumentBlock, ImageBlock, Provider } from "../../ai/provider.js";
|
|
2
|
+
import type { Chunk } from "../engine.js";
|
|
2
3
|
export interface LoadedFile {
|
|
3
4
|
bytes: Buffer;
|
|
4
5
|
hash: string;
|
|
@@ -13,5 +14,5 @@ export interface LoadedFile {
|
|
|
13
14
|
* recognize the same file across re-scans regardless of unlock state.
|
|
14
15
|
*/
|
|
15
16
|
export declare function readPdf(path: string): LoadedFile;
|
|
16
|
-
/** Build an Anthropic-compatible document content block from PDF bytes. */
|
|
17
17
|
export declare function buildDocumentBlock(bytes: Buffer, fileName: string, mime?: string): DocumentBlock;
|
|
18
|
+
export declare function buildScanAttachment(chunk: Chunk, provider: Provider): Promise<DocumentBlock | ImageBlock>;
|
package/dist/scanner/pdf/pdf.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { readFileSync, statSync } from "fs";
|
|
2
2
|
import { createHash } from "crypto";
|
|
3
3
|
import { basename, extname } from "path";
|
|
4
|
+
import { rasterizePage } from "./rasterize.js";
|
|
4
5
|
const MIME_BY_EXT = {
|
|
5
6
|
".pdf": "application/pdf",
|
|
6
7
|
};
|
|
@@ -26,7 +27,6 @@ export function readPdf(path) {
|
|
|
26
27
|
const hash = createHash("sha256").update(bytes).digest("hex");
|
|
27
28
|
return { bytes, hash, mime, fileName: basename(path) };
|
|
28
29
|
}
|
|
29
|
-
/** Build an Anthropic-compatible document content block from PDF bytes. */
|
|
30
30
|
export function buildDocumentBlock(bytes, fileName, mime = "application/pdf") {
|
|
31
31
|
return {
|
|
32
32
|
type: "document",
|
|
@@ -34,3 +34,13 @@ export function buildDocumentBlock(bytes, fileName, mime = "application/pdf") {
|
|
|
34
34
|
title: fileName,
|
|
35
35
|
};
|
|
36
36
|
}
|
|
37
|
+
export async function buildScanAttachment(chunk, provider) {
|
|
38
|
+
if (provider.acceptsDocuments) {
|
|
39
|
+
return buildDocumentBlock(chunk.bytes, chunk.fileName, chunk.mime);
|
|
40
|
+
}
|
|
41
|
+
const { bytes, mime } = await rasterizePage(chunk.bytes);
|
|
42
|
+
return {
|
|
43
|
+
type: "image",
|
|
44
|
+
source: { type: "base64", media_type: mime, data: bytes.toString("base64") },
|
|
45
|
+
};
|
|
46
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
let mupdfPromise = null;
|
|
2
|
+
function getMupdf() {
|
|
3
|
+
if (!mupdfPromise)
|
|
4
|
+
mupdfPromise = import("mupdf");
|
|
5
|
+
return mupdfPromise;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* 150 DPI keeps statement numerals readable to a VL model without blowing
|
|
9
|
+
* up the token bill on a dense page.
|
|
10
|
+
*/
|
|
11
|
+
const DEFAULT_DPI = 150;
|
|
12
|
+
export async function rasterizePage(pdfBytes, opts = {}) {
|
|
13
|
+
const mupdf = await getMupdf();
|
|
14
|
+
const dpi = opts.dpi ?? DEFAULT_DPI;
|
|
15
|
+
const scale = dpi / 72;
|
|
16
|
+
const doc = mupdf.Document.openDocument(pdfBytes, "application/pdf");
|
|
17
|
+
try {
|
|
18
|
+
const page = doc.loadPage(0);
|
|
19
|
+
try {
|
|
20
|
+
const pixmap = page.toPixmap(mupdf.Matrix.scale(scale, scale), mupdf.ColorSpace.DeviceRGB, false);
|
|
21
|
+
try {
|
|
22
|
+
const png = pixmap.asPNG();
|
|
23
|
+
return { bytes: Buffer.from(png), mime: "image/png" };
|
|
24
|
+
}
|
|
25
|
+
finally {
|
|
26
|
+
pixmap.destroy();
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
finally {
|
|
30
|
+
page.destroy();
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
finally {
|
|
34
|
+
doc.destroy();
|
|
35
|
+
}
|
|
36
|
+
}
|
package/dist/scanner/worker.d.ts
CHANGED
|
@@ -8,6 +8,7 @@ export interface ScanWorkerDeps {
|
|
|
8
8
|
readonly scannedFileId: string | undefined;
|
|
9
9
|
readonly progress: ScanProgress;
|
|
10
10
|
readonly chunk: Chunk;
|
|
11
|
+
readonly signal: AbortSignal;
|
|
11
12
|
}
|
|
12
13
|
/**
|
|
13
14
|
* Process one chunk: run the LLM scan agent over a single-page PDF blob with
|
|
@@ -15,5 +16,10 @@ export interface ScanWorkerDeps {
|
|
|
15
16
|
* context. Agent's record_transactions / note_question calls write directly to
|
|
16
17
|
* the DB; per-row ticks fan out via `progress.emit`. Failures land in the DB
|
|
17
18
|
* as a `chunk_failed` question so the clarifier can pick them up.
|
|
19
|
+
*
|
|
20
|
+
* Cancellation entry point: the worker pool stops claiming new chunks when
|
|
21
|
+
* `signal` aborts; in-flight provider calls abort natively via the SDK and
|
|
22
|
+
* surface as a failed tryExecute outcome — we suppress the chunk_failed row
|
|
23
|
+
* in that case (see below) since cancellation isn't a real failure.
|
|
18
24
|
*/
|
|
19
25
|
export declare function runScanWorker(deps: ScanWorkerDeps, hooks: ScanHooks): Promise<void>;
|
package/dist/scanner/worker.js
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import { randomUUID } from "crypto";
|
|
2
2
|
import { runScanAgent } from "../ai/agent.js";
|
|
3
|
+
import { getProvider } from "../ai/providers/index.js";
|
|
3
4
|
import { recordQuestion } from "../db/queries/questions.js";
|
|
4
|
-
import {
|
|
5
|
+
import { buildScanAttachment } from "./pdf/pdf.js";
|
|
5
6
|
import { tryExecute } from "./result.js";
|
|
6
7
|
/**
|
|
7
8
|
* Process one chunk: run the LLM scan agent over a single-page PDF blob with
|
|
@@ -9,17 +10,23 @@ import { tryExecute } from "./result.js";
|
|
|
9
10
|
* context. Agent's record_transactions / note_question calls write directly to
|
|
10
11
|
* the DB; per-row ticks fan out via `progress.emit`. Failures land in the DB
|
|
11
12
|
* as a `chunk_failed` question so the clarifier can pick them up.
|
|
13
|
+
*
|
|
14
|
+
* Cancellation entry point: the worker pool stops claiming new chunks when
|
|
15
|
+
* `signal` aborts; in-flight provider calls abort natively via the SDK and
|
|
16
|
+
* surface as a failed tryExecute outcome — we suppress the chunk_failed row
|
|
17
|
+
* in that case (see below) since cancellation isn't a real failure.
|
|
12
18
|
*/
|
|
13
19
|
export async function runScanWorker(deps, hooks) {
|
|
14
20
|
const workerId = `cw:${randomUUID()}`;
|
|
15
21
|
hooks.onWorkerStart?.(workerId, deps.chunk);
|
|
22
|
+
const attachment = await buildScanAttachment(deps.chunk, getProvider());
|
|
16
23
|
const outcome = await tryExecute(() => runScanAgent({
|
|
17
24
|
db: deps.db,
|
|
18
25
|
initialMessages: [
|
|
19
26
|
{
|
|
20
27
|
role: "user",
|
|
21
28
|
content: [
|
|
22
|
-
|
|
29
|
+
attachment,
|
|
23
30
|
{ type: "text", text: buildChunkPrompt(deps.chunk) },
|
|
24
31
|
],
|
|
25
32
|
},
|
|
@@ -32,10 +39,16 @@ export async function runScanWorker(deps, hooks) {
|
|
|
32
39
|
chunkId: deps.chunk.chunkId,
|
|
33
40
|
progress: deps.progress,
|
|
34
41
|
},
|
|
42
|
+
signal: deps.signal,
|
|
35
43
|
}));
|
|
36
44
|
hooks.onWorkerEnd?.(workerId, deps.chunk, outcome.ok);
|
|
37
|
-
if (!outcome.ok)
|
|
45
|
+
if (!outcome.ok) {
|
|
46
|
+
// A worker whose in-flight call was cancelled by Ctrl+C is not a real
|
|
47
|
+
// failure — don't pollute the questions table with chunk_failed rows.
|
|
48
|
+
if (deps.signal.aborted)
|
|
49
|
+
return;
|
|
38
50
|
recordChunkFailure(deps, outcome.error);
|
|
51
|
+
}
|
|
39
52
|
}
|
|
40
53
|
function recordChunkFailure(deps, error) {
|
|
41
54
|
try {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "plasalid",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.8.1",
|
|
4
4
|
"description": "Plasalid — The Harness Layer for Personal Finance",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"finance",
|
|
@@ -41,6 +41,7 @@
|
|
|
41
41
|
},
|
|
42
42
|
"dependencies": {
|
|
43
43
|
"@anthropic-ai/sdk": "^0.74.0",
|
|
44
|
+
"@google/genai": "^2.6.0",
|
|
44
45
|
"chalk": "^5.3.0",
|
|
45
46
|
"commander": "^13.0.0",
|
|
46
47
|
"dotenv": "^16.4.0",
|