plasalid 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +213 -0
- package/README.md +176 -0
- package/dist/accounts/taxonomy.d.ts +31 -0
- package/dist/accounts/taxonomy.js +189 -0
- package/dist/ai/agent.d.ts +43 -0
- package/dist/ai/agent.js +155 -0
- package/dist/ai/context.d.ts +4 -0
- package/dist/ai/context.js +33 -0
- package/dist/ai/memory.d.ts +14 -0
- package/dist/ai/memory.js +12 -0
- package/dist/ai/provider.d.ts +67 -0
- package/dist/ai/provider.js +5 -0
- package/dist/ai/providers/anthropic.d.ts +5 -0
- package/dist/ai/providers/anthropic.js +49 -0
- package/dist/ai/providers/index.d.ts +2 -0
- package/dist/ai/providers/index.js +12 -0
- package/dist/ai/providers/openai-compat.d.ts +5 -0
- package/dist/ai/providers/openai-compat.js +147 -0
- package/dist/ai/providers/openai.d.ts +5 -0
- package/dist/ai/providers/openai.js +147 -0
- package/dist/ai/redactor.d.ts +2 -0
- package/dist/ai/redactor.js +91 -0
- package/dist/ai/sanitize.d.ts +14 -0
- package/dist/ai/sanitize.js +25 -0
- package/dist/ai/system-prompt.d.ts +13 -0
- package/dist/ai/system-prompt.js +174 -0
- package/dist/ai/thai-taxonomy-hint.d.ts +8 -0
- package/dist/ai/thai-taxonomy-hint.js +22 -0
- package/dist/ai/thinking-phrases.d.ts +7 -0
- package/dist/ai/thinking-phrases.js +15 -0
- package/dist/ai/thinking.d.ts +7 -0
- package/dist/ai/thinking.js +15 -0
- package/dist/ai/tools/common.d.ts +2 -0
- package/dist/ai/tools/common.js +83 -0
- package/dist/ai/tools/index.d.ts +8 -0
- package/dist/ai/tools/index.js +34 -0
- package/dist/ai/tools/ingest.d.ts +2 -0
- package/dist/ai/tools/ingest.js +202 -0
- package/dist/ai/tools/read.d.ts +2 -0
- package/dist/ai/tools/read.js +123 -0
- package/dist/ai/tools/reconcile.d.ts +2 -0
- package/dist/ai/tools/reconcile.js +227 -0
- package/dist/ai/tools/scan.d.ts +2 -0
- package/dist/ai/tools/scan.js +24 -0
- package/dist/ai/tools/types.d.ts +26 -0
- package/dist/ai/tools/types.js +1 -0
- package/dist/ai/tools.d.ts +18 -0
- package/dist/ai/tools.js +402 -0
- package/dist/cli/chat.d.ts +1 -0
- package/dist/cli/chat.js +28 -0
- package/dist/cli/commands/accounts.d.ts +1 -0
- package/dist/cli/commands/accounts.js +86 -0
- package/dist/cli/commands/data.d.ts +1 -0
- package/dist/cli/commands/data.js +28 -0
- package/dist/cli/commands/reconcile.d.ts +2 -0
- package/dist/cli/commands/reconcile.js +15 -0
- package/dist/cli/commands/revert.d.ts +1 -0
- package/dist/cli/commands/revert.js +68 -0
- package/dist/cli/commands/scan.d.ts +4 -0
- package/dist/cli/commands/scan.js +45 -0
- package/dist/cli/commands/status.d.ts +1 -0
- package/dist/cli/commands/status.js +22 -0
- package/dist/cli/commands/transactions.d.ts +8 -0
- package/dist/cli/commands/transactions.js +92 -0
- package/dist/cli/commands/undo.d.ts +1 -0
- package/dist/cli/commands/undo.js +38 -0
- package/dist/cli/commands.d.ts +14 -0
- package/dist/cli/commands.js +196 -0
- package/dist/cli/format.d.ts +8 -0
- package/dist/cli/format.js +109 -0
- package/dist/cli/index.d.ts +2 -0
- package/dist/cli/index.js +126 -0
- package/dist/cli/ink/ChatApp.d.ts +8 -0
- package/dist/cli/ink/ChatApp.js +94 -0
- package/dist/cli/ink/PromptFrame.d.ts +10 -0
- package/dist/cli/ink/PromptFrame.js +11 -0
- package/dist/cli/ink/TextInput.d.ts +13 -0
- package/dist/cli/ink/TextInput.js +24 -0
- package/dist/cli/ink/hooks/useAgent.d.ts +27 -0
- package/dist/cli/ink/hooks/useAgent.js +65 -0
- package/dist/cli/ink/hooks/useCtrlCExit.d.ts +16 -0
- package/dist/cli/ink/hooks/useCtrlCExit.js +43 -0
- package/dist/cli/ink/hooks/useFooterText.d.ts +2 -0
- package/dist/cli/ink/hooks/useFooterText.js +43 -0
- package/dist/cli/ink/hooks/useTextInput.d.ts +32 -0
- package/dist/cli/ink/hooks/useTextInput.js +356 -0
- package/dist/cli/ink/messages/AssistantMessage.d.ts +3 -0
- package/dist/cli/ink/messages/AssistantMessage.js +6 -0
- package/dist/cli/ink/messages/ErrorMessage.d.ts +4 -0
- package/dist/cli/ink/messages/ErrorMessage.js +6 -0
- package/dist/cli/ink/messages/InterruptedMessage.d.ts +1 -0
- package/dist/cli/ink/messages/InterruptedMessage.js +6 -0
- package/dist/cli/ink/messages/ThinkingLine.d.ts +12 -0
- package/dist/cli/ink/messages/ThinkingLine.js +23 -0
- package/dist/cli/ink/messages/UserMessage.d.ts +4 -0
- package/dist/cli/ink/messages/UserMessage.js +15 -0
- package/dist/cli/ink/mount.d.ts +6 -0
- package/dist/cli/ink/mount.js +12 -0
- package/dist/cli/logo.d.ts +1 -0
- package/dist/cli/logo.js +20 -0
- package/dist/cli/setup.d.ts +2 -0
- package/dist/cli/setup.js +210 -0
- package/dist/cli/ux.d.ts +38 -0
- package/dist/cli/ux.js +104 -0
- package/dist/config.d.ts +21 -0
- package/dist/config.js +66 -0
- package/dist/currency.d.ts +6 -0
- package/dist/currency.js +19 -0
- package/dist/db/connection.d.ts +5 -0
- package/dist/db/connection.js +45 -0
- package/dist/db/encryption.d.ts +11 -0
- package/dist/db/encryption.js +45 -0
- package/dist/db/helpers.d.ts +16 -0
- package/dist/db/helpers.js +45 -0
- package/dist/db/queries/account_balance.d.ts +61 -0
- package/dist/db/queries/account_balance.js +146 -0
- package/dist/db/queries/journal.d.ts +95 -0
- package/dist/db/queries/journal.js +204 -0
- package/dist/db/queries/search.d.ts +7 -0
- package/dist/db/queries/search.js +19 -0
- package/dist/db/schema.d.ts +2 -0
- package/dist/db/schema.js +95 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.js +1 -0
- package/dist/parser/pdf.d.ts +14 -0
- package/dist/parser/pdf.js +40 -0
- package/dist/parser/pipeline.d.ts +44 -0
- package/dist/parser/pipeline.js +160 -0
- package/dist/parser/prompts.d.ts +8 -0
- package/dist/parser/prompts.js +20 -0
- package/dist/parser/walker.d.ts +8 -0
- package/dist/parser/walker.js +42 -0
- package/dist/reconciler/pipeline.d.ts +17 -0
- package/dist/reconciler/pipeline.js +45 -0
- package/dist/reconciler/prompts.d.ts +12 -0
- package/dist/reconciler/prompts.js +22 -0
- package/dist/scanner/password-store.d.ts +34 -0
- package/dist/scanner/password-store.js +83 -0
- package/dist/scanner/pdf-unlock.d.ts +17 -0
- package/dist/scanner/pdf-unlock.js +48 -0
- package/dist/scanner/pdf.d.ts +17 -0
- package/dist/scanner/pdf.js +36 -0
- package/dist/scanner/pipeline.d.ts +32 -0
- package/dist/scanner/pipeline.js +137 -0
- package/dist/scanner/prompts.d.ts +8 -0
- package/dist/scanner/prompts.js +20 -0
- package/dist/scanner/state-machine.d.ts +60 -0
- package/dist/scanner/state-machine.js +64 -0
- package/dist/scanner/unlock.d.ts +24 -0
- package/dist/scanner/unlock.js +122 -0
- package/dist/scanner/walker.d.ts +8 -0
- package/dist/scanner/walker.js +42 -0
- package/package.json +65 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { readFileSync, statSync } from "fs";
|
|
2
|
+
import { createHash } from "crypto";
|
|
3
|
+
import { basename, extname } from "path";
|
|
4
|
+
const MIME_BY_EXT = {
|
|
5
|
+
".pdf": "application/pdf",
|
|
6
|
+
};
|
|
7
|
+
const MAX_BYTES = 30 * 1024 * 1024;
|
|
8
|
+
/**
|
|
9
|
+
* Read a local file, hash it, and produce a base64 document block ready to
|
|
10
|
+
* attach to an Anthropic user message. Hash is sha256 of the raw bytes; used as
|
|
11
|
+
* the idempotency key in `parsed_files`.
|
|
12
|
+
*/
|
|
13
|
+
export function loadDocument(path) {
|
|
14
|
+
const ext = extname(path).toLowerCase();
|
|
15
|
+
const mime = MIME_BY_EXT[ext];
|
|
16
|
+
if (!mime) {
|
|
17
|
+
throw new Error(`Unsupported file extension: ${ext}. Plasalid v1 only ingests PDFs.`);
|
|
18
|
+
}
|
|
19
|
+
const stat = statSync(path);
|
|
20
|
+
if (stat.size > MAX_BYTES) {
|
|
21
|
+
throw new Error(`File too large (${stat.size} bytes). Limit is ${MAX_BYTES} bytes.`);
|
|
22
|
+
}
|
|
23
|
+
const bytes = readFileSync(path);
|
|
24
|
+
const hash = createHash("sha256").update(bytes).digest("hex");
|
|
25
|
+
return {
|
|
26
|
+
block: {
|
|
27
|
+
type: "document",
|
|
28
|
+
source: {
|
|
29
|
+
type: "base64",
|
|
30
|
+
media_type: mime,
|
|
31
|
+
data: bytes.toString("base64"),
|
|
32
|
+
},
|
|
33
|
+
title: basename(path),
|
|
34
|
+
},
|
|
35
|
+
hash,
|
|
36
|
+
mime,
|
|
37
|
+
byteLength: stat.size,
|
|
38
|
+
fileName: basename(path),
|
|
39
|
+
};
|
|
40
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import type Database from "libsql";
|
|
2
|
+
export declare const PARSER_VERSION = "plasalid-parser-v1";
|
|
3
|
+
export interface ParseFileResult {
|
|
4
|
+
fileId: string;
|
|
5
|
+
status: "parsed" | "needs_input" | "failed" | "skipped" | "replaced";
|
|
6
|
+
summary?: string;
|
|
7
|
+
error?: string;
|
|
8
|
+
pendingQuestions: number;
|
|
9
|
+
}
|
|
10
|
+
export interface ParseOptions {
|
|
11
|
+
interactive?: boolean;
|
|
12
|
+
force?: boolean;
|
|
13
|
+
onProgress?: (msg: string) => void;
|
|
14
|
+
}
|
|
15
|
+
export declare function parseFile(filePath: string, opts?: ParseOptions): Promise<ParseFileResult>;
|
|
16
|
+
export interface ParseSummary {
|
|
17
|
+
total: number;
|
|
18
|
+
parsed: number;
|
|
19
|
+
replaced: number;
|
|
20
|
+
skipped: number;
|
|
21
|
+
needsInput: number;
|
|
22
|
+
failed: number;
|
|
23
|
+
details: {
|
|
24
|
+
name: string;
|
|
25
|
+
relPath: string;
|
|
26
|
+
result: ParseFileResult;
|
|
27
|
+
}[];
|
|
28
|
+
}
|
|
29
|
+
export interface RunParseOptions extends ParseOptions {
|
|
30
|
+
/** Optional regex (string). Matches partial, case-insensitive, against the relative path under the data dir. */
|
|
31
|
+
regex?: string;
|
|
32
|
+
}
|
|
33
|
+
export declare function compileMatcher(input: string): RegExp;
|
|
34
|
+
export declare function runParse(opts?: RunParseOptions): Promise<ParseSummary>;
|
|
35
|
+
export interface UndoMatch {
|
|
36
|
+
id: string;
|
|
37
|
+
path: string;
|
|
38
|
+
relPath: string;
|
|
39
|
+
parsedAt: string | null;
|
|
40
|
+
}
|
|
41
|
+
/** Find parsed_files rows whose relative-to-data-dir path matches the regex. */
|
|
42
|
+
export declare function findUndoMatches(db: Database.Database, regex: string): UndoMatch[];
|
|
43
|
+
/** Cascade-delete the supplied parsed_files rows. Returns the number deleted. */
|
|
44
|
+
export declare function deleteMatches(db: Database.Database, ids: string[]): number;
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import { randomUUID } from "crypto";
|
|
2
|
+
import inquirer from "inquirer";
|
|
3
|
+
import { getDb } from "../db/connection.js";
|
|
4
|
+
import { runParseAgent } from "../ai/agent.js";
|
|
5
|
+
import { loadDocument } from "./pdf.js";
|
|
6
|
+
import { buildParseUserMessage } from "./prompts.js";
|
|
7
|
+
import { scanDataDir } from "./walker.js";
|
|
8
|
+
import { getDataDir } from "../config.js";
|
|
9
|
+
import { relative, sep } from "path";
|
|
10
|
+
export const PARSER_VERSION = "plasalid-parser-v1";
|
|
11
|
+
function findParsedByHash(db, hash) {
|
|
12
|
+
return db
|
|
13
|
+
.prepare(`SELECT id, status FROM parsed_files WHERE file_hash = ?`)
|
|
14
|
+
.get(hash) ?? null;
|
|
15
|
+
}
|
|
16
|
+
function deleteParsedFile(db, id) {
|
|
17
|
+
db.prepare(`DELETE FROM parsed_files WHERE id = ?`).run(id);
|
|
18
|
+
}
|
|
19
|
+
function insertParsedFile(db, args) {
|
|
20
|
+
const id = `pf:${randomUUID()}`;
|
|
21
|
+
db.prepare(`INSERT INTO parsed_files (id, path, file_hash, mime, status, parser_version)
|
|
22
|
+
VALUES (?, ?, ?, ?, 'pending', ?)`).run(id, args.path, args.hash, args.mime, PARSER_VERSION);
|
|
23
|
+
return id;
|
|
24
|
+
}
|
|
25
|
+
function countPendingQuestions(db, fileId) {
|
|
26
|
+
const row = db
|
|
27
|
+
.prepare(`SELECT COUNT(*) as n FROM pending_questions WHERE file_id = ? AND resolved_at IS NULL`)
|
|
28
|
+
.get(fileId);
|
|
29
|
+
return row.n;
|
|
30
|
+
}
|
|
31
|
+
function setFileStatus(db, id, status, fields = {}) {
|
|
32
|
+
db.prepare(`UPDATE parsed_files
|
|
33
|
+
SET status = ?, parsed_at = datetime('now'), error = ?, raw_text = COALESCE(?, raw_text)
|
|
34
|
+
WHERE id = ?`).run(status, fields.error ?? null, fields.raw_text ?? null, id);
|
|
35
|
+
}
|
|
36
|
+
async function promptUserViaInquirer(prompt, options) {
|
|
37
|
+
if (options && options.length > 0) {
|
|
38
|
+
const { answer } = await inquirer.prompt([
|
|
39
|
+
{ type: "list", name: "answer", message: prompt, choices: options },
|
|
40
|
+
]);
|
|
41
|
+
return String(answer);
|
|
42
|
+
}
|
|
43
|
+
const { answer } = await inquirer.prompt([
|
|
44
|
+
{ type: "input", name: "answer", message: prompt },
|
|
45
|
+
]);
|
|
46
|
+
return String(answer);
|
|
47
|
+
}
|
|
48
|
+
export async function parseFile(filePath, opts = {}) {
|
|
49
|
+
const db = getDb();
|
|
50
|
+
const loaded = loadDocument(filePath);
|
|
51
|
+
const existing = findParsedByHash(db, loaded.hash);
|
|
52
|
+
if (existing) {
|
|
53
|
+
if (!opts.force) {
|
|
54
|
+
return {
|
|
55
|
+
fileId: existing.id,
|
|
56
|
+
status: "skipped",
|
|
57
|
+
pendingQuestions: countPendingQuestions(db, existing.id),
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
deleteParsedFile(db, existing.id);
|
|
61
|
+
}
|
|
62
|
+
const fileId = insertParsedFile(db, {
|
|
63
|
+
path: filePath,
|
|
64
|
+
hash: loaded.hash,
|
|
65
|
+
mime: loaded.mime,
|
|
66
|
+
});
|
|
67
|
+
let summary = "";
|
|
68
|
+
const messages = [
|
|
69
|
+
{
|
|
70
|
+
role: "user",
|
|
71
|
+
content: [
|
|
72
|
+
loaded.block,
|
|
73
|
+
{ type: "text", text: buildParseUserMessage({ fileName: loaded.fileName }) },
|
|
74
|
+
],
|
|
75
|
+
},
|
|
76
|
+
];
|
|
77
|
+
try {
|
|
78
|
+
opts.onProgress?.(`Parsing ${loaded.fileName}...`);
|
|
79
|
+
const text = await runParseAgent({
|
|
80
|
+
db,
|
|
81
|
+
initialMessages: messages,
|
|
82
|
+
prompt: { fileName: loaded.fileName },
|
|
83
|
+
parseCtx: {
|
|
84
|
+
fileId,
|
|
85
|
+
parserVersion: PARSER_VERSION,
|
|
86
|
+
interactive: opts.interactive ?? true,
|
|
87
|
+
promptUser: opts.interactive === false ? undefined : promptUserViaInquirer,
|
|
88
|
+
onMarkParsed: (s) => { summary = s; },
|
|
89
|
+
},
|
|
90
|
+
});
|
|
91
|
+
const stillPending = countPendingQuestions(db, fileId);
|
|
92
|
+
if (stillPending > 0) {
|
|
93
|
+
setFileStatus(db, fileId, "needs_input", { raw_text: text });
|
|
94
|
+
return { fileId, status: "needs_input", summary: summary || text, pendingQuestions: stillPending };
|
|
95
|
+
}
|
|
96
|
+
setFileStatus(db, fileId, "parsed", { raw_text: text });
|
|
97
|
+
const status = existing ? "replaced" : "parsed";
|
|
98
|
+
return { fileId, status, summary: summary || text, pendingQuestions: 0 };
|
|
99
|
+
}
|
|
100
|
+
catch (err) {
|
|
101
|
+
setFileStatus(db, fileId, "failed", { error: err.message });
|
|
102
|
+
return { fileId, status: "failed", error: err.message, pendingQuestions: countPendingQuestions(db, fileId) };
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
export function compileMatcher(input) {
|
|
106
|
+
return new RegExp(input, "i");
|
|
107
|
+
}
|
|
108
|
+
export async function runParse(opts = {}) {
|
|
109
|
+
const matcher = opts.regex ? compileMatcher(opts.regex) : null;
|
|
110
|
+
const files = scanDataDir().filter(f => (matcher ? matcher.test(f.relPath) : true));
|
|
111
|
+
const summary = {
|
|
112
|
+
total: files.length,
|
|
113
|
+
parsed: 0,
|
|
114
|
+
replaced: 0,
|
|
115
|
+
skipped: 0,
|
|
116
|
+
needsInput: 0,
|
|
117
|
+
failed: 0,
|
|
118
|
+
details: [],
|
|
119
|
+
};
|
|
120
|
+
for (const f of files) {
|
|
121
|
+
const result = await parseFile(f.path, opts);
|
|
122
|
+
summary.details.push({ name: f.name, relPath: f.relPath, result });
|
|
123
|
+
if (result.status === "parsed")
|
|
124
|
+
summary.parsed++;
|
|
125
|
+
else if (result.status === "replaced")
|
|
126
|
+
summary.replaced++;
|
|
127
|
+
else if (result.status === "skipped")
|
|
128
|
+
summary.skipped++;
|
|
129
|
+
else if (result.status === "needs_input")
|
|
130
|
+
summary.needsInput++;
|
|
131
|
+
else if (result.status === "failed")
|
|
132
|
+
summary.failed++;
|
|
133
|
+
}
|
|
134
|
+
return summary;
|
|
135
|
+
}
|
|
136
|
+
function pathToRelPath(absolutePath) {
|
|
137
|
+
return relative(getDataDir(), absolutePath).split(sep).join("/");
|
|
138
|
+
}
|
|
139
|
+
/** Find parsed_files rows whose relative-to-data-dir path matches the regex. */
|
|
140
|
+
export function findUndoMatches(db, regex) {
|
|
141
|
+
const matcher = compileMatcher(regex);
|
|
142
|
+
const rows = db
|
|
143
|
+
.prepare(`SELECT id, path, parsed_at FROM parsed_files ORDER BY parsed_at DESC, created_at DESC`)
|
|
144
|
+
.all();
|
|
145
|
+
return rows
|
|
146
|
+
.map(r => ({ id: r.id, path: r.path, relPath: pathToRelPath(r.path), parsedAt: r.parsed_at }))
|
|
147
|
+
.filter(r => matcher.test(r.relPath));
|
|
148
|
+
}
|
|
149
|
+
/** Cascade-delete the supplied parsed_files rows. Returns the number deleted. */
|
|
150
|
+
export function deleteMatches(db, ids) {
|
|
151
|
+
if (ids.length === 0)
|
|
152
|
+
return 0;
|
|
153
|
+
const stmt = db.prepare(`DELETE FROM parsed_files WHERE id = ?`);
|
|
154
|
+
const tx = db.transaction(() => {
|
|
155
|
+
for (const id of ids)
|
|
156
|
+
stmt.run(id);
|
|
157
|
+
});
|
|
158
|
+
tx();
|
|
159
|
+
return ids.length;
|
|
160
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The user-message prelude that accompanies the PDF document block. The persona
|
|
3
|
+
* and rules live in the parse system prompt (src/ai/system-prompt.ts); this
|
|
4
|
+
* message is a per-file instruction.
|
|
5
|
+
*/
|
|
6
|
+
export declare function buildParseUserMessage(opts: {
|
|
7
|
+
fileName: string;
|
|
8
|
+
}): string;
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* The user-message prelude that accompanies the PDF document block. The persona
|
|
3
|
+
* and rules live in the parse system prompt (src/ai/system-prompt.ts); this
|
|
4
|
+
* message is a per-file instruction.
|
|
5
|
+
*/
|
|
6
|
+
export function buildParseUserMessage(opts) {
|
|
7
|
+
return [
|
|
8
|
+
`Please parse the attached document.`,
|
|
9
|
+
`File: ${opts.fileName}`,
|
|
10
|
+
``,
|
|
11
|
+
`Steps:`,
|
|
12
|
+
`1. Call list_accounts to see what already exists.`,
|
|
13
|
+
`2. Infer the primary account type (asset / liability / income / expense) from the document's header, account type field, and transaction patterns.`,
|
|
14
|
+
`3. If this document references an account that isn't yet in the chart, call create_account once. Mask the account number to the last 4 digits.`,
|
|
15
|
+
`4. Persist any document-level metadata you find (statement_day, due_day, points_balance, etc.) using update_account_metadata.`,
|
|
16
|
+
`5. For every transaction in the document, call record_journal_entry with balanced debit/credit lines. Use existing accounts where possible; create expense/income accounts as needed.`,
|
|
17
|
+
`6. If a row is ambiguous, call ask_user before guessing.`,
|
|
18
|
+
`7. When you are done, call mark_file_parsed with a short summary.`,
|
|
19
|
+
].join("\n");
|
|
20
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export interface ScannedFile {
|
|
2
|
+
path: string;
|
|
3
|
+
name: string;
|
|
4
|
+
/** Path relative to the data dir, forward-slashed. */
|
|
5
|
+
relPath: string;
|
|
6
|
+
}
|
|
7
|
+
/** Walk the data directory recursively and return every supported file found. */
|
|
8
|
+
export declare function scanDataDir(): ScannedFile[];
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { readdirSync, statSync } from "fs";
|
|
2
|
+
import { resolve, basename, relative, sep } from "path";
|
|
3
|
+
import { getDataDir } from "../config.js";
|
|
4
|
+
const SUPPORTED_EXTS = new Set([".pdf"]);
|
|
5
|
+
function walk(dir, root, out) {
|
|
6
|
+
let entries;
|
|
7
|
+
try {
|
|
8
|
+
entries = readdirSync(dir);
|
|
9
|
+
}
|
|
10
|
+
catch {
|
|
11
|
+
return;
|
|
12
|
+
}
|
|
13
|
+
for (const entry of entries) {
|
|
14
|
+
if (entry.startsWith("."))
|
|
15
|
+
continue;
|
|
16
|
+
const full = resolve(dir, entry);
|
|
17
|
+
let s;
|
|
18
|
+
try {
|
|
19
|
+
s = statSync(full);
|
|
20
|
+
}
|
|
21
|
+
catch {
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
if (s.isDirectory()) {
|
|
25
|
+
walk(full, root, out);
|
|
26
|
+
}
|
|
27
|
+
else if (s.isFile()) {
|
|
28
|
+
const ext = entry.slice(entry.lastIndexOf(".")).toLowerCase();
|
|
29
|
+
if (!SUPPORTED_EXTS.has(ext))
|
|
30
|
+
continue;
|
|
31
|
+
const rel = relative(root, full).split(sep).join("/");
|
|
32
|
+
out.push({ path: full, name: basename(full), relPath: rel });
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
/** Walk the data directory recursively and return every supported file found. */
|
|
37
|
+
export function scanDataDir() {
|
|
38
|
+
const out = [];
|
|
39
|
+
const root = getDataDir();
|
|
40
|
+
walk(root, root, out);
|
|
41
|
+
return out;
|
|
42
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export interface ReconcileOptions {
|
|
2
|
+
accountId?: string;
|
|
3
|
+
from?: string;
|
|
4
|
+
to?: string;
|
|
5
|
+
dryRun?: boolean;
|
|
6
|
+
interactive?: boolean;
|
|
7
|
+
}
|
|
8
|
+
export interface ReconcileSummary {
|
|
9
|
+
summary: string;
|
|
10
|
+
dryRun: boolean;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Walk the existing journal with the reconcile-profile agent: detect duplicate
|
|
14
|
+
* entries, similar accounts, and unused accounts; propose fixes; apply them
|
|
15
|
+
* (or print "would do X" stubs when dryRun is on) after the user confirms.
|
|
16
|
+
*/
|
|
17
|
+
export declare function runReconcile(opts?: ReconcileOptions): Promise<ReconcileSummary>;
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { getDb } from "../db/connection.js";
|
|
2
|
+
import { runReconcileAgent } from "../ai/agent.js";
|
|
3
|
+
import { statusSpinner, makePromptUser, makeAgentOnProgress, } from "../cli/ux.js";
|
|
4
|
+
import { buildReconcileUserMessage } from "./prompts.js";
|
|
5
|
+
/**
|
|
6
|
+
* Walk the existing journal with the reconcile-profile agent: detect duplicate
|
|
7
|
+
* entries, similar accounts, and unused accounts; propose fixes; apply them
|
|
8
|
+
* (or print "would do X" stubs when dryRun is on) after the user confirms.
|
|
9
|
+
*/
|
|
10
|
+
export async function runReconcile(opts = {}) {
|
|
11
|
+
const db = getDb();
|
|
12
|
+
const interactive = opts.interactive ?? true;
|
|
13
|
+
const dryRun = !!opts.dryRun;
|
|
14
|
+
const scope = {
|
|
15
|
+
accountId: opts.accountId,
|
|
16
|
+
from: opts.from,
|
|
17
|
+
to: opts.to,
|
|
18
|
+
dryRun,
|
|
19
|
+
};
|
|
20
|
+
const spinner = statusSpinner(`Reconciling${dryRun ? " (dry-run)" : ""}...`);
|
|
21
|
+
const promptUser = interactive ? makePromptUser(spinner) : undefined;
|
|
22
|
+
let summary = "";
|
|
23
|
+
try {
|
|
24
|
+
await runReconcileAgent({
|
|
25
|
+
db,
|
|
26
|
+
prompt: scope,
|
|
27
|
+
initialMessages: [
|
|
28
|
+
{ role: "user", content: buildReconcileUserMessage(scope) },
|
|
29
|
+
],
|
|
30
|
+
agentCtx: {
|
|
31
|
+
interactive,
|
|
32
|
+
dryRun,
|
|
33
|
+
promptUser,
|
|
34
|
+
onComplete: (s) => { summary = s; },
|
|
35
|
+
},
|
|
36
|
+
onProgress: makeAgentOnProgress(spinner),
|
|
37
|
+
});
|
|
38
|
+
spinner.succeed(dryRun ? "Reconcile complete (dry-run — no writes)." : "Reconcile complete.");
|
|
39
|
+
}
|
|
40
|
+
catch (err) {
|
|
41
|
+
spinner.fail(`Reconcile failed: ${err.message}`);
|
|
42
|
+
throw err;
|
|
43
|
+
}
|
|
44
|
+
return { summary, dryRun };
|
|
45
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
export interface ReconcileScope {
|
|
2
|
+
accountId?: string;
|
|
3
|
+
from?: string;
|
|
4
|
+
to?: string;
|
|
5
|
+
dryRun: boolean;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Kickoff message the reconcile agent receives. The persona + chart-of-accounts
|
|
9
|
+
* snapshot live in the system prompt (`buildReconcileSystemPrompt`); this is
|
|
10
|
+
* the per-session instruction.
|
|
11
|
+
*/
|
|
12
|
+
export declare function buildReconcileUserMessage(scope: ReconcileScope): string;
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Kickoff message the reconcile agent receives. The persona + chart-of-accounts
|
|
3
|
+
* snapshot live in the system prompt (`buildReconcileSystemPrompt`); this is
|
|
4
|
+
* the per-session instruction.
|
|
5
|
+
*/
|
|
6
|
+
export function buildReconcileUserMessage(scope) {
|
|
7
|
+
return [
|
|
8
|
+
`Reconcile the local Plasalid journal.`,
|
|
9
|
+
``,
|
|
10
|
+
`Scope:`,
|
|
11
|
+
`- account: ${scope.accountId ?? "all"}`,
|
|
12
|
+
`- from: ${scope.from ?? "all time"}`,
|
|
13
|
+
`- to: ${scope.to ?? "now"}`,
|
|
14
|
+
`- dry run: ${scope.dryRun ? "yes — write tools are no-ops" : "no — writes commit after confirmation"}`,
|
|
15
|
+
``,
|
|
16
|
+
`Steps:`,
|
|
17
|
+
`1. Survey: list_accounts, get_net_worth, find_duplicate_entries, find_similar_accounts, find_unused_accounts.`,
|
|
18
|
+
`2. For each candidate, call ask_user with concrete options ("merge X into Y", "delete entry Z", "leave as is").`,
|
|
19
|
+
`3. Apply the chosen action only after the user confirms.`,
|
|
20
|
+
`4. When you're done, call mark_reconcile_done with a short summary.`,
|
|
21
|
+
].join("\n");
|
|
22
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import type Database from "libsql";
|
|
2
|
+
export interface StoredPassword {
|
|
3
|
+
id: string;
|
|
4
|
+
pattern: string;
|
|
5
|
+
password: string;
|
|
6
|
+
useCount: number;
|
|
7
|
+
lastUsedAt: string | null;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Derive a regex from a filename. Strategy: take the leading alphabetic-ish
|
|
11
|
+
* prefix (up to the first separator: underscore, hyphen, space, or dot) and
|
|
12
|
+
* wildcard everything after it. Looser than a literal match — `AcctSt_May26.pdf`
|
|
13
|
+
* and `AcctSt_Jun26.pdf` share the same pattern.
|
|
14
|
+
*
|
|
15
|
+
* Falls back to the older digit-collapse strategy when the prefix is too short
|
|
16
|
+
* (<3 chars) or doesn't start with a letter, so we don't end up with overly
|
|
17
|
+
* generic patterns like `^a.*` or `^\d+.*`.
|
|
18
|
+
*
|
|
19
|
+
* Examples:
|
|
20
|
+
* `AcctSt_May26.pdf` → `^acctst.*`
|
|
21
|
+
* `KBank-Savings-2026-01.pdf` → `^kbank.*`
|
|
22
|
+
* `statement.pdf` → `^statement.*`
|
|
23
|
+
* `1234567890.pdf` → `^\d+\.pdf$` (fallback)
|
|
24
|
+
* `e-statement.pdf` → `^e\-statement\.pdf$` (fallback — prefix too short)
|
|
25
|
+
*/
|
|
26
|
+
export declare function suggestPattern(filename: string): string;
|
|
27
|
+
/** Stored passwords whose pattern matches the basename of `filePath`. */
|
|
28
|
+
export declare function findCandidates(db: Database.Database, filePath: string, dbKey: string): StoredPassword[];
|
|
29
|
+
/**
|
|
30
|
+
* Upsert by pattern. If the pattern already exists the row is replaced — useful
|
|
31
|
+
* when the bank rotates the password for a recurring statement series.
|
|
32
|
+
*/
|
|
33
|
+
export declare function savePassword(db: Database.Database, pattern: string, password: string, dbKey: string): string;
|
|
34
|
+
export declare function recordUse(db: Database.Database, id: string): void;
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { randomUUID } from "crypto";
|
|
2
|
+
import { basename } from "path";
|
|
3
|
+
import { encryptSecret, decryptSecret } from "../db/encryption.js";
|
|
4
|
+
const REGEX_META = /[.*+?^${}()|[\]\\]/g;
|
|
5
|
+
const SEPARATORS = /[_\-\s.]/;
|
|
6
|
+
const MIN_PREFIX_LEN = 3;
|
|
7
|
+
/**
|
|
8
|
+
* Derive a regex from a filename. Strategy: take the leading alphabetic-ish
|
|
9
|
+
* prefix (up to the first separator: underscore, hyphen, space, or dot) and
|
|
10
|
+
* wildcard everything after it. Looser than a literal match — `AcctSt_May26.pdf`
|
|
11
|
+
* and `AcctSt_Jun26.pdf` share the same pattern.
|
|
12
|
+
*
|
|
13
|
+
* Falls back to the older digit-collapse strategy when the prefix is too short
|
|
14
|
+
* (<3 chars) or doesn't start with a letter, so we don't end up with overly
|
|
15
|
+
* generic patterns like `^a.*` or `^\d+.*`.
|
|
16
|
+
*
|
|
17
|
+
* Examples:
|
|
18
|
+
* `AcctSt_May26.pdf` → `^acctst.*`
|
|
19
|
+
* `KBank-Savings-2026-01.pdf` → `^kbank.*`
|
|
20
|
+
* `statement.pdf` → `^statement.*`
|
|
21
|
+
* `1234567890.pdf` → `^\d+\.pdf$` (fallback)
|
|
22
|
+
* `e-statement.pdf` → `^e\-statement\.pdf$` (fallback — prefix too short)
|
|
23
|
+
*/
|
|
24
|
+
export function suggestPattern(filename) {
|
|
25
|
+
const name = basename(filename).toLowerCase();
|
|
26
|
+
const prefix = name.split(SEPARATORS)[0];
|
|
27
|
+
if (prefix.length >= MIN_PREFIX_LEN && /^[a-z]/.test(prefix)) {
|
|
28
|
+
return `^${prefix.replace(REGEX_META, "\\$&")}.*`;
|
|
29
|
+
}
|
|
30
|
+
const escaped = name.replace(REGEX_META, "\\$&");
|
|
31
|
+
const collapsed = escaped.replace(/\d+/g, "\\d+");
|
|
32
|
+
return `^${collapsed}$`;
|
|
33
|
+
}
|
|
34
|
+
/** Stored passwords whose pattern matches the basename of `filePath`. */
|
|
35
|
+
export function findCandidates(db, filePath, dbKey) {
|
|
36
|
+
const target = basename(filePath);
|
|
37
|
+
const rows = db
|
|
38
|
+
.prepare(`SELECT id, pattern, password_encrypted, use_count, last_used_at
|
|
39
|
+
FROM file_passwords
|
|
40
|
+
ORDER BY use_count DESC, last_used_at DESC NULLS LAST, created_at ASC`)
|
|
41
|
+
.all();
|
|
42
|
+
return rows
|
|
43
|
+
.filter(r => safeTest(r.pattern, target))
|
|
44
|
+
.map(r => ({
|
|
45
|
+
id: r.id,
|
|
46
|
+
pattern: r.pattern,
|
|
47
|
+
password: decryptSecret(r.password_encrypted, dbKey),
|
|
48
|
+
useCount: r.use_count,
|
|
49
|
+
lastUsedAt: r.last_used_at,
|
|
50
|
+
}));
|
|
51
|
+
}
|
|
52
|
+
function safeTest(pattern, target) {
|
|
53
|
+
try {
|
|
54
|
+
return new RegExp(pattern, "i").test(target);
|
|
55
|
+
}
|
|
56
|
+
catch {
|
|
57
|
+
return false;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
/**
|
|
61
|
+
* Upsert by pattern. If the pattern already exists the row is replaced — useful
|
|
62
|
+
* when the bank rotates the password for a recurring statement series.
|
|
63
|
+
*/
|
|
64
|
+
export function savePassword(db, pattern, password, dbKey) {
|
|
65
|
+
const encrypted = encryptSecret(password, dbKey);
|
|
66
|
+
const existing = db
|
|
67
|
+
.prepare(`SELECT id FROM file_passwords WHERE pattern = ?`)
|
|
68
|
+
.get(pattern);
|
|
69
|
+
if (existing) {
|
|
70
|
+
db.prepare(`UPDATE file_passwords
|
|
71
|
+
SET password_encrypted = ?, use_count = 0, last_used_at = NULL
|
|
72
|
+
WHERE id = ?`).run(encrypted, existing.id);
|
|
73
|
+
return existing.id;
|
|
74
|
+
}
|
|
75
|
+
const id = `fp:${randomUUID()}`;
|
|
76
|
+
db.prepare(`INSERT INTO file_passwords (id, pattern, password_encrypted) VALUES (?, ?, ?)`).run(id, pattern, encrypted);
|
|
77
|
+
return id;
|
|
78
|
+
}
|
|
79
|
+
export function recordUse(db, id) {
|
|
80
|
+
db.prepare(`UPDATE file_passwords
|
|
81
|
+
SET use_count = use_count + 1, last_used_at = datetime('now')
|
|
82
|
+
WHERE id = ?`).run(id);
|
|
83
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Thin wrapper around the mupdf WASM library. Lazy-imported on first call so
|
|
3
|
+
* the WASM module isn't loaded for data dirs that contain only plaintext PDFs.
|
|
4
|
+
*/
|
|
5
|
+
export declare function isEncrypted(bytes: Buffer): Promise<boolean>;
|
|
6
|
+
export interface UnlockResult {
|
|
7
|
+
ok: boolean;
|
|
8
|
+
/** Set when `ok === true`. Plaintext (decrypted) PDF bytes ready to forward. */
|
|
9
|
+
decrypted?: Buffer;
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Attempt to unlock and re-save `bytes` as an unencrypted PDF using `password`.
|
|
13
|
+
* Returns `{ ok: false }` on wrong password or non-PDF input. Returns
|
|
14
|
+
* `{ ok: true, decrypted }` on success. If the input wasn't encrypted to begin
|
|
15
|
+
* with, returns `{ ok: true, decrypted: bytes }` unchanged.
|
|
16
|
+
*/
|
|
17
|
+
export declare function unlock(bytes: Buffer, password: string): Promise<UnlockResult>;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Thin wrapper around the mupdf WASM library. Lazy-imported on first call so
|
|
3
|
+
* the WASM module isn't loaded for data dirs that contain only plaintext PDFs.
|
|
4
|
+
*/
|
|
5
|
+
let mupdfPromise = null;
|
|
6
|
+
function getMupdf() {
|
|
7
|
+
if (!mupdfPromise) {
|
|
8
|
+
mupdfPromise = import("mupdf");
|
|
9
|
+
}
|
|
10
|
+
return mupdfPromise;
|
|
11
|
+
}
|
|
12
|
+
export async function isEncrypted(bytes) {
|
|
13
|
+
const mupdf = await getMupdf();
|
|
14
|
+
const doc = mupdf.Document.openDocument(bytes, "application/pdf");
|
|
15
|
+
try {
|
|
16
|
+
return doc.needsPassword();
|
|
17
|
+
}
|
|
18
|
+
finally {
|
|
19
|
+
doc.destroy();
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Attempt to unlock and re-save `bytes` as an unencrypted PDF using `password`.
|
|
24
|
+
* Returns `{ ok: false }` on wrong password or non-PDF input. Returns
|
|
25
|
+
* `{ ok: true, decrypted }` on success. If the input wasn't encrypted to begin
|
|
26
|
+
* with, returns `{ ok: true, decrypted: bytes }` unchanged.
|
|
27
|
+
*/
|
|
28
|
+
export async function unlock(bytes, password) {
|
|
29
|
+
const mupdf = await getMupdf();
|
|
30
|
+
const doc = mupdf.Document.openDocument(bytes, "application/pdf");
|
|
31
|
+
try {
|
|
32
|
+
if (!(doc instanceof mupdf.PDFDocument)) {
|
|
33
|
+
return { ok: false };
|
|
34
|
+
}
|
|
35
|
+
if (!doc.needsPassword()) {
|
|
36
|
+
return { ok: true, decrypted: bytes };
|
|
37
|
+
}
|
|
38
|
+
const result = doc.authenticatePassword(password);
|
|
39
|
+
if (result === 0) {
|
|
40
|
+
return { ok: false };
|
|
41
|
+
}
|
|
42
|
+
const out = doc.saveToBuffer("decrypt");
|
|
43
|
+
return { ok: true, decrypted: Buffer.from(out.asUint8Array()) };
|
|
44
|
+
}
|
|
45
|
+
finally {
|
|
46
|
+
doc.destroy();
|
|
47
|
+
}
|
|
48
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import type { DocumentBlock } from "../ai/provider.js";
|
|
2
|
+
export interface LoadedFile {
|
|
3
|
+
bytes: Buffer;
|
|
4
|
+
hash: string;
|
|
5
|
+
mime: string;
|
|
6
|
+
fileName: string;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* Read a local PDF, hash its bytes, and return everything the scan pipeline
|
|
10
|
+
* needs to decide whether to skip / re-scan / unlock the file. The hash is
|
|
11
|
+
* sha256 of the original on-disk bytes (still encrypted if the PDF is
|
|
12
|
+
* password-protected) — that's what the dedup contract relies on, so we can
|
|
13
|
+
* recognize the same file across re-scans regardless of unlock state.
|
|
14
|
+
*/
|
|
15
|
+
export declare function readPdf(path: string): LoadedFile;
|
|
16
|
+
/** Build an Anthropic-compatible document content block from PDF bytes. */
|
|
17
|
+
export declare function buildDocumentBlock(bytes: Buffer, fileName: string, mime?: string): DocumentBlock;
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { readFileSync, statSync } from "fs";
|
|
2
|
+
import { createHash } from "crypto";
|
|
3
|
+
import { basename, extname } from "path";
|
|
4
|
+
const MIME_BY_EXT = {
|
|
5
|
+
".pdf": "application/pdf",
|
|
6
|
+
};
|
|
7
|
+
const MAX_BYTES = 30 * 1024 * 1024;
|
|
8
|
+
/**
|
|
9
|
+
* Read a local PDF, hash its bytes, and return everything the scan pipeline
|
|
10
|
+
* needs to decide whether to skip / re-scan / unlock the file. The hash is
|
|
11
|
+
* sha256 of the original on-disk bytes (still encrypted if the PDF is
|
|
12
|
+
* password-protected) — that's what the dedup contract relies on, so we can
|
|
13
|
+
* recognize the same file across re-scans regardless of unlock state.
|
|
14
|
+
*/
|
|
15
|
+
export function readPdf(path) {
|
|
16
|
+
const ext = extname(path).toLowerCase();
|
|
17
|
+
const mime = MIME_BY_EXT[ext];
|
|
18
|
+
if (!mime) {
|
|
19
|
+
throw new Error(`Unsupported file extension: ${ext}. Plasalid v1 only ingests PDFs.`);
|
|
20
|
+
}
|
|
21
|
+
const stat = statSync(path);
|
|
22
|
+
if (stat.size > MAX_BYTES) {
|
|
23
|
+
throw new Error(`File too large (${stat.size} bytes). Limit is ${MAX_BYTES} bytes.`);
|
|
24
|
+
}
|
|
25
|
+
const bytes = readFileSync(path);
|
|
26
|
+
const hash = createHash("sha256").update(bytes).digest("hex");
|
|
27
|
+
return { bytes, hash, mime, fileName: basename(path) };
|
|
28
|
+
}
|
|
29
|
+
/** Build an Anthropic-compatible document content block from PDF bytes. */
|
|
30
|
+
export function buildDocumentBlock(bytes, fileName, mime = "application/pdf") {
|
|
31
|
+
return {
|
|
32
|
+
type: "document",
|
|
33
|
+
source: { type: "base64", media_type: mime, data: bytes.toString("base64") },
|
|
34
|
+
title: fileName,
|
|
35
|
+
};
|
|
36
|
+
}
|