plasalid 0.7.9 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. package/README.md +22 -6
  2. package/dist/ai/agent.d.ts +1 -0
  3. package/dist/ai/agent.js +25 -10
  4. package/dist/ai/provider.d.ts +21 -1
  5. package/dist/ai/providers/anthropic.d.ts +0 -1
  6. package/dist/ai/providers/anthropic.js +2 -3
  7. package/dist/ai/providers/gemini.d.ts +14 -0
  8. package/dist/ai/providers/gemini.js +188 -0
  9. package/dist/ai/providers/index.d.ts +2 -1
  10. package/dist/ai/providers/index.js +23 -8
  11. package/dist/ai/providers/openai-compat.d.ts +6 -1
  12. package/dist/ai/providers/openai-compat.js +48 -104
  13. package/dist/ai/providers/openai-shared.d.ts +26 -0
  14. package/dist/ai/providers/openai-shared.js +118 -0
  15. package/dist/ai/providers/openai.d.ts +27 -3
  16. package/dist/ai/providers/openai.js +142 -91
  17. package/dist/cli/commands/scan.js +78 -10
  18. package/dist/cli/commands/status.js +15 -2
  19. package/dist/cli/ink/ScanDashboard.d.ts +7 -6
  20. package/dist/cli/ink/ScanDashboard.js +14 -6
  21. package/dist/cli/setup.js +175 -119
  22. package/dist/config.d.ts +10 -4
  23. package/dist/config.js +40 -11
  24. package/dist/scanner/clarifier.d.ts +2 -0
  25. package/dist/scanner/clarifier.js +1 -0
  26. package/dist/scanner/concurrency.d.ts +9 -2
  27. package/dist/scanner/concurrency.js +3 -1
  28. package/dist/scanner/engine.d.ts +2 -1
  29. package/dist/scanner/engine.js +21 -3
  30. package/dist/scanner/hooks.d.ts +6 -0
  31. package/dist/scanner/parse.js +28 -16
  32. package/dist/scanner/pdf/pdf.d.ts +3 -2
  33. package/dist/scanner/pdf/pdf.js +11 -1
  34. package/dist/scanner/pdf/rasterize.d.ts +6 -0
  35. package/dist/scanner/pdf/rasterize.js +36 -0
  36. package/dist/scanner/worker.d.ts +6 -0
  37. package/dist/scanner/worker.js +16 -3
  38. package/package.json +2 -1
@@ -19,5 +19,11 @@ export interface ScanHooks {
19
19
  beforeClarify?(s: Readonly<ScanState>): MaybePromise<void>;
20
20
  afterClarify?(s: Readonly<ScanState>, summary: ClarifySummary): MaybePromise<void>;
21
21
  onError?(err: unknown, phase: PhaseName, s: Readonly<ScanState>): MaybePromise<void>;
22
+ /**
23
+ * Fired when an AbortSignal trip propagates out of any phase. The CLI uses
24
+ * this to unmount Ink and restore the cursor before runScan's promise
25
+ * settles. onFinish still fires after onAbort.
26
+ */
27
+ onAbort?(s: Readonly<ScanState>): MaybePromise<void>;
22
28
  onFinish?(s: Readonly<ScanState>): MaybePromise<void>;
23
29
  }
@@ -1,8 +1,8 @@
1
1
  import { runWithConcurrency } from "./concurrency.js";
2
2
  import { runScanWorker } from "./worker.js";
3
3
  import { errorMessage } from "./result.js";
4
- const DEFAULT_MAX_FILE_WORKERS = 5;
5
- const DEFAULT_MAX_SCAN_WORKERS_PER_FILE = 5;
4
+ const MAX_FILE_WORKERS = 5;
5
+ const MAX_SCAN_WORKERS_PER_FILE = 5;
6
6
  const HARD_CAP = 8;
7
7
  const clamp = (n, fallback) => Math.min(HARD_CAP, Math.max(1, n ?? fallback));
8
8
  /**
@@ -13,35 +13,47 @@ const clamp = (n, fallback) => Math.min(HARD_CAP, Math.max(1, n ?? fallback));
13
13
  */
14
14
  export async function parsePhase(db, state, hooks) {
15
15
  await hooks.beforeParse?.(state);
16
- const maxFile = clamp(state.options.maxFileWorkers, DEFAULT_MAX_FILE_WORKERS);
17
- const maxChunk = clamp(state.options.maxScanWorkersPerFile, DEFAULT_MAX_SCAN_WORKERS_PER_FILE);
16
+ const maxFile = clamp(state.options.maxFileWorkers, MAX_FILE_WORKERS);
17
+ const maxChunk = clamp(state.options.maxScanWorkersPerFile, MAX_SCAN_WORKERS_PER_FILE);
18
18
  const fileGroups = state.decrypted
19
- .map(file => ({
19
+ .map((file) => ({
20
20
  fileId: file.path,
21
21
  scannedFileId: file.scannedFileId,
22
- chunks: state.chunks.filter(c => c.fileId === file.path),
22
+ chunks: state.chunks.filter((c) => c.fileId === file.path),
23
23
  }))
24
- .filter(g => g.chunks.length > 0);
25
- const fileTasks = fileGroups.map(group => () => {
26
- const chunkTasks = group.chunks.map(chunk => () => runScanWorker({
24
+ .filter((g) => g.chunks.length > 0);
25
+ const fileTasks = fileGroups.map((group) => () => {
26
+ const chunkTasks = group.chunks.map((chunk) => () => runScanWorker({
27
27
  db,
28
28
  scanId: state.scanId,
29
29
  scannedFileId: group.scannedFileId,
30
30
  progress: state.progress,
31
31
  chunk,
32
+ signal: state.signal,
32
33
  }, hooks));
33
- return runWithConcurrency(chunkTasks, maxChunk);
34
+ return runWithConcurrency(chunkTasks, maxChunk, state.signal);
34
35
  });
35
- const settled = await runWithConcurrency(fileTasks, maxFile);
36
+ const settled = await runWithConcurrency(fileTasks, maxFile, state.signal);
36
37
  for (let i = 0; i < settled.length; i++) {
37
38
  const r = settled[i];
38
- if (!r.ok)
39
- state.errors.push({ phase: "parse", target: fileGroups[i].fileId, error: errorMessage(r.error) });
39
+ if (r && !r.ok)
40
+ state.errors.push({
41
+ phase: "parse",
42
+ target: fileGroups[i].fileId,
43
+ error: errorMessage(r.error),
44
+ });
40
45
  }
41
- for (const file of state.decrypted) {
42
- if (!file.scannedFileId)
46
+ // Only flip files to "scanned" for groups that actually completed. On abort
47
+ // the pool leaves later groups unclaimed (their settled slot is undefined);
48
+ // those rows stay `pending` so a future re-scan can pick them up. Partial
49
+ // transactions already committed during the run stay (scanner is DB-direct).
50
+ for (let i = 0; i < fileGroups.length; i++) {
51
+ if (!settled[i])
43
52
  continue;
44
- db.prepare(`UPDATE scanned_files SET status = 'scanned', scanned_at = datetime('now') WHERE id = ?`).run(file.scannedFileId);
53
+ const sfId = fileGroups[i].scannedFileId;
54
+ if (!sfId)
55
+ continue;
56
+ db.prepare(`UPDATE scanned_files SET status = 'scanned', scanned_at = datetime('now') WHERE id = ?`).run(sfId);
45
57
  }
46
58
  await hooks.afterParse?.(state);
47
59
  }
@@ -1,4 +1,5 @@
1
- import type { DocumentBlock } from "../../ai/provider.js";
1
+ import type { DocumentBlock, ImageBlock, Provider } from "../../ai/provider.js";
2
+ import type { Chunk } from "../engine.js";
2
3
  export interface LoadedFile {
3
4
  bytes: Buffer;
4
5
  hash: string;
@@ -13,5 +14,5 @@ export interface LoadedFile {
13
14
  * recognize the same file across re-scans regardless of unlock state.
14
15
  */
15
16
  export declare function readPdf(path: string): LoadedFile;
16
- /** Build an Anthropic-compatible document content block from PDF bytes. */
17
17
  export declare function buildDocumentBlock(bytes: Buffer, fileName: string, mime?: string): DocumentBlock;
18
+ export declare function buildScanAttachment(chunk: Chunk, provider: Provider): Promise<DocumentBlock | ImageBlock>;
@@ -1,6 +1,7 @@
1
1
  import { readFileSync, statSync } from "fs";
2
2
  import { createHash } from "crypto";
3
3
  import { basename, extname } from "path";
4
+ import { rasterizePage } from "./rasterize.js";
4
5
  const MIME_BY_EXT = {
5
6
  ".pdf": "application/pdf",
6
7
  };
@@ -26,7 +27,6 @@ export function readPdf(path) {
26
27
  const hash = createHash("sha256").update(bytes).digest("hex");
27
28
  return { bytes, hash, mime, fileName: basename(path) };
28
29
  }
29
- /** Build an Anthropic-compatible document content block from PDF bytes. */
30
30
  export function buildDocumentBlock(bytes, fileName, mime = "application/pdf") {
31
31
  return {
32
32
  type: "document",
@@ -34,3 +34,13 @@ export function buildDocumentBlock(bytes, fileName, mime = "application/pdf") {
34
34
  title: fileName,
35
35
  };
36
36
  }
37
+ export async function buildScanAttachment(chunk, provider) {
38
+ if (provider.acceptsDocuments) {
39
+ return buildDocumentBlock(chunk.bytes, chunk.fileName, chunk.mime);
40
+ }
41
+ const { bytes, mime } = await rasterizePage(chunk.bytes);
42
+ return {
43
+ type: "image",
44
+ source: { type: "base64", media_type: mime, data: bytes.toString("base64") },
45
+ };
46
+ }
@@ -0,0 +1,6 @@
1
+ export declare function rasterizePage(pdfBytes: Buffer, opts?: {
2
+ dpi?: number;
3
+ }): Promise<{
4
+ bytes: Buffer;
5
+ mime: "image/png";
6
+ }>;
@@ -0,0 +1,36 @@
1
+ let mupdfPromise = null;
2
+ function getMupdf() {
3
+ if (!mupdfPromise)
4
+ mupdfPromise = import("mupdf");
5
+ return mupdfPromise;
6
+ }
7
+ /**
8
+ * 150 DPI keeps statement numerals readable to a VL model without blowing
9
+ * up the token bill on a dense page.
10
+ */
11
+ const DEFAULT_DPI = 150;
12
+ export async function rasterizePage(pdfBytes, opts = {}) {
13
+ const mupdf = await getMupdf();
14
+ const dpi = opts.dpi ?? DEFAULT_DPI;
15
+ const scale = dpi / 72;
16
+ const doc = mupdf.Document.openDocument(pdfBytes, "application/pdf");
17
+ try {
18
+ const page = doc.loadPage(0);
19
+ try {
20
+ const pixmap = page.toPixmap(mupdf.Matrix.scale(scale, scale), mupdf.ColorSpace.DeviceRGB, false);
21
+ try {
22
+ const png = pixmap.asPNG();
23
+ return { bytes: Buffer.from(png), mime: "image/png" };
24
+ }
25
+ finally {
26
+ pixmap.destroy();
27
+ }
28
+ }
29
+ finally {
30
+ page.destroy();
31
+ }
32
+ }
33
+ finally {
34
+ doc.destroy();
35
+ }
36
+ }
@@ -8,6 +8,7 @@ export interface ScanWorkerDeps {
8
8
  readonly scannedFileId: string | undefined;
9
9
  readonly progress: ScanProgress;
10
10
  readonly chunk: Chunk;
11
+ readonly signal: AbortSignal;
11
12
  }
12
13
  /**
13
14
  * Process one chunk: run the LLM scan agent over a single-page PDF blob with
@@ -15,5 +16,10 @@ export interface ScanWorkerDeps {
15
16
  * context. Agent's record_transactions / note_question calls write directly to
16
17
  * the DB; per-row ticks fan out via `progress.emit`. Failures land in the DB
17
18
  * as a `chunk_failed` question so the clarifier can pick them up.
19
+ *
20
+ * Cancellation entry point: the worker pool stops claiming new chunks when
21
+ * `signal` aborts; in-flight provider calls abort natively via the SDK and
22
+ * surface as a failed tryExecute outcome — we suppress the chunk_failed row
23
+ * in that case (see below) since cancellation isn't a real failure.
18
24
  */
19
25
  export declare function runScanWorker(deps: ScanWorkerDeps, hooks: ScanHooks): Promise<void>;
@@ -1,7 +1,8 @@
1
1
  import { randomUUID } from "crypto";
2
2
  import { runScanAgent } from "../ai/agent.js";
3
+ import { getProvider } from "../ai/providers/index.js";
3
4
  import { recordQuestion } from "../db/queries/questions.js";
4
- import { buildDocumentBlock } from "./pdf/pdf.js";
5
+ import { buildScanAttachment } from "./pdf/pdf.js";
5
6
  import { tryExecute } from "./result.js";
6
7
  /**
7
8
  * Process one chunk: run the LLM scan agent over a single-page PDF blob with
@@ -9,17 +10,23 @@ import { tryExecute } from "./result.js";
9
10
  * context. Agent's record_transactions / note_question calls write directly to
10
11
  * the DB; per-row ticks fan out via `progress.emit`. Failures land in the DB
11
12
  * as a `chunk_failed` question so the clarifier can pick them up.
13
+ *
14
+ * Cancellation entry point: the worker pool stops claiming new chunks when
15
+ * `signal` aborts; in-flight provider calls abort natively via the SDK and
16
+ * surface as a failed tryExecute outcome — we suppress the chunk_failed row
17
+ * in that case (see below) since cancellation isn't a real failure.
12
18
  */
13
19
  export async function runScanWorker(deps, hooks) {
14
20
  const workerId = `cw:${randomUUID()}`;
15
21
  hooks.onWorkerStart?.(workerId, deps.chunk);
22
+ const attachment = await buildScanAttachment(deps.chunk, getProvider());
16
23
  const outcome = await tryExecute(() => runScanAgent({
17
24
  db: deps.db,
18
25
  initialMessages: [
19
26
  {
20
27
  role: "user",
21
28
  content: [
22
- buildDocumentBlock(deps.chunk.bytes, deps.chunk.fileName, deps.chunk.mime),
29
+ attachment,
23
30
  { type: "text", text: buildChunkPrompt(deps.chunk) },
24
31
  ],
25
32
  },
@@ -32,10 +39,16 @@ export async function runScanWorker(deps, hooks) {
32
39
  chunkId: deps.chunk.chunkId,
33
40
  progress: deps.progress,
34
41
  },
42
+ signal: deps.signal,
35
43
  }));
36
44
  hooks.onWorkerEnd?.(workerId, deps.chunk, outcome.ok);
37
- if (!outcome.ok)
45
+ if (!outcome.ok) {
46
+ // A worker whose in-flight call was cancelled by Ctrl+C is not a real
47
+ // failure — don't pollute the questions table with chunk_failed rows.
48
+ if (deps.signal.aborted)
49
+ return;
38
50
  recordChunkFailure(deps, outcome.error);
51
+ }
39
52
  }
40
53
  function recordChunkFailure(deps, error) {
41
54
  try {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "plasalid",
3
- "version": "0.7.9",
3
+ "version": "0.8.1",
4
4
  "description": "Plasalid — The Harness Layer for Personal Finance",
5
5
  "keywords": [
6
6
  "finance",
@@ -41,6 +41,7 @@
41
41
  },
42
42
  "dependencies": {
43
43
  "@anthropic-ai/sdk": "^0.74.0",
44
+ "@google/genai": "^2.6.0",
44
45
  "chalk": "^5.3.0",
45
46
  "commander": "^13.0.0",
46
47
  "dotenv": "^16.4.0",