plasalid 0.7.1 → 0.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/dist/ai/agent.d.ts +6 -7
- package/dist/ai/agent.js +27 -11
- package/dist/ai/personas.js +48 -46
- package/dist/ai/system-prompt.js +1 -1
- package/dist/ai/tools/account-mutex.d.ts +1 -0
- package/dist/ai/tools/account-mutex.js +16 -0
- package/dist/ai/tools/index.js +4 -12
- package/dist/ai/tools/ingest.d.ts +1 -1
- package/dist/ai/tools/ingest.js +282 -242
- package/dist/ai/tools/merchants.js +1 -28
- package/dist/ai/tools/read.js +8 -8
- package/dist/ai/tools/record.js +3 -36
- package/dist/ai/tools/resolve.js +25 -22
- package/dist/ai/tools/scan.js +0 -1
- package/dist/ai/tools/types.d.ts +14 -21
- package/dist/cli/commands/record.js +1 -82
- package/dist/cli/commands/resolve.d.ts +5 -2
- package/dist/cli/commands/resolve.js +36 -5
- package/dist/cli/commands/revert.js +4 -2
- package/dist/cli/commands/rules.js +2 -2
- package/dist/cli/commands/scan.js +199 -128
- package/dist/cli/commands/status.js +5 -5
- package/dist/cli/index.js +8 -29
- package/dist/cli/ink/ScanDashboard.d.ts +49 -0
- package/dist/cli/ink/ScanDashboard.js +214 -0
- package/dist/cli/ink/scan_dashboard.d.ts +40 -25
- package/dist/cli/ink/scan_dashboard.js +139 -44
- package/dist/db/queries/account-balance.d.ts +1 -1
- package/dist/db/queries/questions.d.ts +62 -0
- package/dist/db/queries/questions.js +110 -0
- package/dist/db/queries/transactions.d.ts +1 -1
- package/dist/db/queries/unknowns.d.ts +17 -15
- package/dist/db/queries/unknowns.js +35 -39
- package/dist/db/schema.js +6 -28
- package/dist/scanner/audit/auditor.d.ts +31 -0
- package/dist/scanner/audit/auditor.js +72 -0
- package/dist/scanner/audit/engine.d.ts +10 -0
- package/dist/scanner/audit/engine.js +98 -0
- package/dist/scanner/audit/eventBus.d.ts +60 -0
- package/dist/scanner/audit/eventBus.js +35 -0
- package/dist/scanner/audit/passes/index.d.ts +11 -0
- package/dist/scanner/audit/passes/index.js +9 -0
- package/dist/scanner/audit/passes/types.d.ts +23 -0
- package/dist/scanner/audit/passes/types.js +1 -0
- package/dist/scanner/audit/types.d.ts +27 -0
- package/dist/scanner/audit/types.js +1 -0
- package/dist/scanner/auditor.d.ts +51 -0
- package/dist/scanner/auditor.js +80 -0
- package/dist/scanner/buffer/engine.d.ts +9 -0
- package/dist/scanner/buffer/engine.js +110 -0
- package/dist/scanner/buffer/sharedBuffer.d.ts +78 -0
- package/dist/scanner/buffer/sharedBuffer.js +130 -0
- package/dist/scanner/buffer/types.d.ts +67 -0
- package/dist/scanner/buffer/types.js +1 -0
- package/dist/scanner/buffer.d.ts +45 -38
- package/dist/scanner/buffer.js +93 -61
- package/dist/scanner/bus/engine.d.ts +11 -0
- package/dist/scanner/bus/engine.js +42 -0
- package/dist/scanner/bus/types.d.ts +53 -0
- package/dist/scanner/bus/types.js +1 -0
- package/dist/scanner/bus.d.ts +38 -0
- package/dist/scanner/bus.js +37 -0
- package/dist/scanner/chunk-worker.d.ts +19 -0
- package/dist/scanner/chunk-worker.js +67 -0
- package/dist/scanner/chunkWorker.d.ts +20 -0
- package/dist/scanner/chunkWorker.js +59 -0
- package/dist/scanner/chunker/chunker.d.ts +7 -0
- package/dist/scanner/chunker/chunker.js +60 -0
- package/dist/scanner/chunker.d.ts +7 -0
- package/dist/scanner/chunker.js +60 -0
- package/dist/scanner/converge.d.ts +29 -0
- package/dist/scanner/converge.js +15 -0
- package/dist/scanner/decrypt.d.ts +10 -0
- package/dist/scanner/decrypt.js +80 -0
- package/dist/scanner/engine/scanEngine.d.ts +24 -0
- package/dist/scanner/engine/scanEngine.js +87 -0
- package/dist/scanner/engine/types.d.ts +90 -0
- package/dist/scanner/engine/types.js +1 -0
- package/dist/scanner/engine.d.ts +90 -0
- package/dist/scanner/engine.js +84 -0
- package/dist/scanner/file-worker.d.ts +33 -0
- package/dist/scanner/file-worker.js +28 -0
- package/dist/scanner/fileWorker.d.ts +33 -0
- package/dist/scanner/fileWorker.js +22 -0
- package/dist/scanner/hooks/types.d.ts +25 -0
- package/dist/scanner/hooks/types.js +1 -0
- package/dist/scanner/hooks.d.ts +23 -0
- package/dist/scanner/hooks.js +1 -0
- package/dist/scanner/parse.d.ts +10 -0
- package/dist/scanner/parse.js +47 -0
- package/dist/scanner/passes/index.d.ts +8 -0
- package/dist/scanner/passes/index.js +6 -0
- package/dist/scanner/passes/types.d.ts +22 -0
- package/dist/scanner/passes/types.js +1 -0
- package/dist/scanner/pdf/chunker.d.ts +7 -0
- package/dist/scanner/pdf/chunker.js +60 -0
- package/dist/scanner/pdf/password-store.d.ts +34 -0
- package/dist/scanner/pdf/password-store.js +83 -0
- package/dist/scanner/pdf/pdf-unlock.d.ts +17 -0
- package/dist/scanner/pdf/pdf-unlock.js +50 -0
- package/dist/scanner/pdf/pdf.d.ts +17 -0
- package/dist/scanner/pdf/pdf.js +36 -0
- package/dist/scanner/pdf/state-machine.d.ts +60 -0
- package/dist/scanner/pdf/state-machine.js +64 -0
- package/dist/scanner/pdf/unlock.d.ts +22 -0
- package/dist/scanner/pdf/unlock.js +121 -0
- package/dist/scanner/phase-decrypt.d.ts +10 -0
- package/dist/scanner/phase-decrypt.js +80 -0
- package/dist/scanner/phase-parse.d.ts +10 -0
- package/dist/scanner/phase-parse.js +46 -0
- package/dist/scanner/phases/chunk.d.ts +8 -0
- package/dist/scanner/phases/chunk.js +13 -0
- package/dist/scanner/phases/commit.d.ts +12 -0
- package/dist/scanner/phases/commit.js +140 -0
- package/dist/scanner/phases/decrypt.d.ts +10 -0
- package/dist/scanner/phases/decrypt.js +80 -0
- package/dist/scanner/phases/parse.d.ts +10 -0
- package/dist/scanner/phases/parse.js +46 -0
- package/dist/scanner/phases/resolve.d.ts +10 -0
- package/dist/scanner/phases/resolve.js +17 -0
- package/dist/scanner/phases/review.d.ts +10 -0
- package/dist/scanner/phases/review.js +12 -0
- package/dist/scanner/progress.d.ts +14 -0
- package/dist/scanner/progress.js +21 -0
- package/dist/scanner/resolver-memory.d.ts +8 -0
- package/dist/scanner/resolver-memory.js +24 -0
- package/dist/scanner/resolver.d.ts +39 -0
- package/dist/scanner/resolver.js +196 -0
- package/dist/scanner/result.d.ts +17 -0
- package/dist/scanner/result.js +19 -0
- package/dist/scanner/run-passes.d.ts +30 -0
- package/dist/scanner/run-passes.js +15 -0
- package/dist/scanner/unlock.js +1 -1
- package/dist/scanner/worker.d.ts +19 -0
- package/dist/scanner/worker.js +67 -0
- package/dist/scanner/workers/chunkWorker.d.ts +20 -0
- package/dist/scanner/workers/chunkWorker.js +65 -0
- package/dist/scanner/workers/fileWorker.d.ts +32 -0
- package/dist/scanner/workers/fileWorker.js +22 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
</p>
|
|
10
10
|
|
|
11
11
|
<p align="center">
|
|
12
|
-
Turn your financial documents into structured, insightful, AI-readable context.
|
|
12
|
+
Turn your scattered financial documents into structured, insightful, AI-readable context.
|
|
13
13
|
</p>
|
|
14
14
|
|
|
15
15
|
|
|
@@ -19,7 +19,7 @@ In the US/EU, a financial data aggregator like Plaid empowers most finance apps:
|
|
|
19
19
|
|
|
20
20
|
That's why Plasalid emerged to resolve this pain point. Your data has stayed fragmented for decades, with no way to bring it together. You can't manage a mortgage effectively without the full picture, and you may be completely blind to your recurring monthly income and expenses. Subscriptions stay active long after they're forgotten, unknown charges go unverified, bank accounts opened years ago drift unchecked, and unexpected spending may silently grow beyond what any single statement shows. When your finances are hard to manage, your life definitely becomes more difficult. Your plans toward financial stability or freedom slip further out of reach. Plasalid is built to solve this.
|
|
21
21
|
|
|
22
|
-
Plasalid addresses this with a simple founding concept: let users drop all their financial documents
|
|
22
|
+
Plasalid addresses this with a simple founding concept: let users drop all their financial documents - bank statements, credit-card statements, payslips, brokerage statements - onto their own machine, where Plasalid leverages AI to extract every transaction, balance, and holding into a single, structured, double-entry database that serves as context for future processing.
|
|
23
23
|
|
|
24
24
|
Moreover, Plasalid comes with a built-in agentic chat that queries the data directly, so questions like which subscriptions are still active, where money went last month, or what your current net worth is can be answered against actual records rather than estimates. You can talk with your money on Plasalid to help you understand your financial situation and plan efficiently.
|
|
25
25
|
|
package/dist/ai/agent.d.ts
CHANGED
|
@@ -17,7 +17,8 @@ export declare function handleChatMessage(db: Database.Database, userMessage: st
|
|
|
17
17
|
/**
|
|
18
18
|
* Scan-time agent loop. Caller supplies the initial user message (which carries
|
|
19
19
|
* the PDF as a content block) and a AgentExecutionContext that scopes the file
|
|
20
|
-
* id,
|
|
20
|
+
* id, scanId, and progress sink. A truncated run records a scan_truncated
|
|
21
|
+
* question so resolve can surface it later.
|
|
21
22
|
*/
|
|
22
23
|
export declare function runScanAgent(opts: {
|
|
23
24
|
db: Database.Database;
|
|
@@ -29,8 +30,7 @@ export declare function runScanAgent(opts: {
|
|
|
29
30
|
}): Promise<string>;
|
|
30
31
|
/**
|
|
31
32
|
* Record-time agent loop. Takes one natural-language utterance and walks the
|
|
32
|
-
* record tool profile
|
|
33
|
-
* clarify). Single-shot — does not persist conversation history.
|
|
33
|
+
* record tool profile. Single-shot — does not persist conversation history.
|
|
34
34
|
*/
|
|
35
35
|
export declare function runRecordAgent(opts: {
|
|
36
36
|
db: Database.Database;
|
|
@@ -41,10 +41,9 @@ export declare function runRecordAgent(opts: {
|
|
|
41
41
|
signal?: AbortSignal;
|
|
42
42
|
}): Promise<string>;
|
|
43
43
|
/**
|
|
44
|
-
* Resolve-time agent loop.
|
|
45
|
-
*
|
|
46
|
-
*
|
|
47
|
-
* close_unknown); the pipeline re-invokes if any remain.
|
|
44
|
+
* Resolve-time agent loop. Driven by RESOLVE_PERSONA. Surveys every open
|
|
45
|
+
* question, applies memory/heuristic resolutions silently, groups whatever
|
|
46
|
+
* remains and asks the user once per group via ask_user.
|
|
48
47
|
*/
|
|
49
48
|
export declare function runResolveAgent(opts: {
|
|
50
49
|
db: Database.Database;
|
package/dist/ai/agent.js
CHANGED
|
@@ -2,6 +2,7 @@ import { config } from "../config.js";
|
|
|
2
2
|
import { buildChatSystemPrompt, buildScanSystemPrompt, buildResolveSystemPrompt, buildRecordSystemPrompt, } from "./system-prompt.js";
|
|
3
3
|
import { getToolDefinitions, executeTool } from "./tools/index.js";
|
|
4
4
|
import { getConversationHistory, saveMessage } from "./memory.js";
|
|
5
|
+
import { recordQuestion } from "../db/queries/questions.js";
|
|
5
6
|
import { redact, unredact } from "./redactor.js";
|
|
6
7
|
import { createProvider } from "./providers/index.js";
|
|
7
8
|
import { AbortedError, ApiAuthError, ApiError, RateLimitError, } from "./errors.js";
|
|
@@ -58,10 +59,13 @@ async function runAgent({ db, systemPrompt, tools, initialMessages, agentCtx, on
|
|
|
58
59
|
signal,
|
|
59
60
|
});
|
|
60
61
|
}
|
|
62
|
+
const truncated = response.stopReason === "tool_use" && toolCount >= stepLimit;
|
|
61
63
|
const textBlocks = response.content.filter((b) => b.type === "text");
|
|
62
64
|
const text = unredact(textBlocks.map(b => b.text).join("\n"));
|
|
63
|
-
return { text, messages };
|
|
65
|
+
return { text, messages, truncated };
|
|
64
66
|
}
|
|
67
|
+
const SCAN_MAX_TOOL_STEPS = 100;
|
|
68
|
+
const RESOLVE_MAX_TOOL_STEPS = 60;
|
|
65
69
|
/**
|
|
66
70
|
* Conversational chat used by the Ink TUI. Reuses conversation_history for context
|
|
67
71
|
* continuity, redacts PII on the way out, restores it on the way in for display.
|
|
@@ -120,11 +124,12 @@ export async function handleChatMessage(db, userMessage, onProgress, signal) {
|
|
|
120
124
|
/**
|
|
121
125
|
* Scan-time agent loop. Caller supplies the initial user message (which carries
|
|
122
126
|
* the PDF as a content block) and a AgentExecutionContext that scopes the file
|
|
123
|
-
* id,
|
|
127
|
+
* id, scanId, and progress sink. A truncated run records a scan_truncated
|
|
128
|
+
* question so resolve can surface it later.
|
|
124
129
|
*/
|
|
125
130
|
export async function runScanAgent(opts) {
|
|
126
131
|
const systemPrompt = redact(buildScanSystemPrompt(opts.db, opts.prompt));
|
|
127
|
-
const { text } = await runAgent({
|
|
132
|
+
const { text, truncated } = await runAgent({
|
|
128
133
|
db: opts.db,
|
|
129
134
|
systemPrompt,
|
|
130
135
|
tools: getToolDefinitions("scan"),
|
|
@@ -132,14 +137,26 @@ export async function runScanAgent(opts) {
|
|
|
132
137
|
agentCtx: opts.agentCtx,
|
|
133
138
|
onProgress: opts.onProgress,
|
|
134
139
|
signal: opts.signal,
|
|
135
|
-
maxToolSteps:
|
|
140
|
+
maxToolSteps: SCAN_MAX_TOOL_STEPS,
|
|
136
141
|
});
|
|
142
|
+
if (truncated) {
|
|
143
|
+
recordQuestion(opts.db, {
|
|
144
|
+
file_id: opts.agentCtx.fileId ?? null,
|
|
145
|
+
scan_id: opts.agentCtx.scanId ?? null,
|
|
146
|
+
transaction_id: null,
|
|
147
|
+
account_id: null,
|
|
148
|
+
kind: "scan_truncated",
|
|
149
|
+
prompt: `Scan stopped at the tool-step cap (${SCAN_MAX_TOOL_STEPS}) before the agent finished parsing this chunk. Some transactions may be missing. Split the PDF further or raise the cap.`,
|
|
150
|
+
});
|
|
151
|
+
if (opts.agentCtx.progress && opts.agentCtx.chunkId) {
|
|
152
|
+
opts.agentCtx.progress.emit({ chunkId: opts.agentCtx.chunkId, kind: "question" });
|
|
153
|
+
}
|
|
154
|
+
}
|
|
137
155
|
return text;
|
|
138
156
|
}
|
|
139
157
|
/**
|
|
140
158
|
* Record-time agent loop. Takes one natural-language utterance and walks the
|
|
141
|
-
* record tool profile
|
|
142
|
-
* clarify). Single-shot — does not persist conversation history.
|
|
159
|
+
* record tool profile. Single-shot — does not persist conversation history.
|
|
143
160
|
*/
|
|
144
161
|
export async function runRecordAgent(opts) {
|
|
145
162
|
const systemPrompt = redact(buildRecordSystemPrompt(opts.db, opts.prompt));
|
|
@@ -156,10 +173,9 @@ export async function runRecordAgent(opts) {
|
|
|
156
173
|
return text;
|
|
157
174
|
}
|
|
158
175
|
/**
|
|
159
|
-
* Resolve-time agent loop.
|
|
160
|
-
*
|
|
161
|
-
*
|
|
162
|
-
* close_unknown); the pipeline re-invokes if any remain.
|
|
176
|
+
* Resolve-time agent loop. Driven by RESOLVE_PERSONA. Surveys every open
|
|
177
|
+
* question, applies memory/heuristic resolutions silently, groups whatever
|
|
178
|
+
* remains and asks the user once per group via ask_user.
|
|
163
179
|
*/
|
|
164
180
|
export async function runResolveAgent(opts) {
|
|
165
181
|
const systemPrompt = redact(buildResolveSystemPrompt(opts.db, opts.prompt));
|
|
@@ -171,7 +187,7 @@ export async function runResolveAgent(opts) {
|
|
|
171
187
|
agentCtx: opts.agentCtx,
|
|
172
188
|
onProgress: opts.onProgress,
|
|
173
189
|
signal: opts.signal,
|
|
174
|
-
maxToolSteps:
|
|
190
|
+
maxToolSteps: RESOLVE_MAX_TOOL_STEPS,
|
|
175
191
|
});
|
|
176
192
|
return text;
|
|
177
193
|
}
|
package/dist/ai/personas.js
CHANGED
|
@@ -40,41 +40,43 @@ Vocabulary:
|
|
|
40
40
|
|
|
41
41
|
Rules:
|
|
42
42
|
1. Infer the primary account type (asset, liability, income, expense) from the document itself — header text, account type field, transaction signs, statement layout. Do not rely on the filename or directory.
|
|
43
|
-
2.
|
|
44
|
-
3.
|
|
43
|
+
2. **Batch transaction writes.** When the statement has more than one row, use \`record_transactions\` (plural) to post them in one tool call. The singular \`record_transaction\` is for one-off corrections (e.g. retrying a single failed item). The scan tool-step budget is finite (100 per file); the singular form burns one step per row. A 6-month statement with 80 rows posts in ~2 batched calls instead of 80 — the difference between scanning the whole statement and silently dropping rows past the cap.
|
|
44
|
+
3. Try to make every transaction balanced — total debits should equal total credits per currency. If you genuinely can't pair a row, post what the document shows and the system will append a closing entry on \`equity:adjustments\` automatically. Do not invent counter-postings to force balance.
|
|
45
|
+
4. Account-type conventions (debit/credit semantics, unchanged from regular bookkeeping):
|
|
45
46
|
- **Asset** (e.g. bank, cash): DEBIT increases, CREDIT decreases.
|
|
46
47
|
- **Liability** (e.g. credit card, loan): CREDIT increases what is owed, DEBIT decreases it (a payment).
|
|
47
48
|
- **Income**: CREDIT increases.
|
|
48
49
|
- **Expense**: DEBIT increases.
|
|
49
|
-
|
|
50
|
+
5. **Hierarchical accounts.** Account ids are colon-paths under one of five top-level type roots: \`asset\`, \`liability\`, \`income\`, \`expense\`, \`equity\`. Every account that is not a top-level root must declare its \`parent_id\`. Examples:
|
|
50
51
|
- \`asset:kbank-savings-1234\` → parent_id \`asset\`.
|
|
51
52
|
- \`expense:food\` → parent_id \`expense\`.
|
|
52
53
|
- \`expense:food:groceries\` → parent_id \`expense:food\`.
|
|
53
54
|
Before creating a leaf like \`expense:food:groceries\`, make sure \`expense:food\` exists; create it (parent_id=\`expense\`) if not. The top-level roots are auto-bootstrapped on first descendant create.
|
|
54
|
-
|
|
55
|
+
6. **Merchants are first-class.** Every transaction with an external counter-party (a charge to a store, a payment to a service, a refund from a vendor) must include a \`merchant\` block:
|
|
55
56
|
- \`canonical_name\`: Title-cased name (e.g. \`"Starbucks"\`, \`"Amazon"\`, \`"Spotify"\`). Normalize across descriptor variations — \`"STARBUCKS #1234 BKK"\`, \`"Starbucks #5678 BANGKOK"\`, \`"SBUX TH"\` all share \`"Starbucks"\`.
|
|
56
57
|
- \`alias\`: the exact raw statement descriptor. Plasalid normalizes and dedups it.
|
|
57
58
|
- \`default_account_id\`: **do not** set this on first sight, even when you're confident. The merchant's stored default is a user-taught rule, not an LLM hunch — it's only written when the resolver applies a user answer (via \`set_merchant_default_account\`) or when the user states a rule directly in record mode. Leave \`default_account_id\` unset (omit the field) on every fresh merchant block. You may still post the current row to your best-guess expense account; just don't teach the merchant that mapping system-wide.
|
|
58
59
|
Also set \`raw_descriptor\` on the transaction to the exact statement line for downstream lookups.
|
|
59
60
|
For transfers between own accounts and pure balance movements, omit the merchant block.
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
Do
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
- If a row is ambiguous (
|
|
73
|
-
-
|
|
74
|
-
- If
|
|
75
|
-
-
|
|
76
|
-
-
|
|
77
|
-
|
|
61
|
+
7. **Pre-resolved merchants.** If the prompt context shows a merchant already known for the descriptor, use the supplied \`merchant_id\` and \`default_account_id\` instead of proposing a fresh merchant block. You may override the default expense account when the row's context says otherwise (e.g. a Starbucks gift-card top-up is not Dining).
|
|
62
|
+
8. **Expense categorization — best-guess by default.** Post every expense row to your most plausible category guess. Use the merchant name, descriptor text, and amount/recurrence pattern to pick from the existing chart of accounts, or auto-create a sensible \`expense:<category>\` leaf when the document reveals a new category clearly (e.g. \`expense:transport\`, \`expense:food\`, \`expense:utilities\`, \`expense:entertainment\`, \`expense:shopping\`, \`expense:healthcare\`, \`expense:subscriptions\`). Small misses are acceptable — the user fixes a wrong category in one keystroke; a flood of \`note_question\` rows is what costs them time.
|
|
63
|
+
|
|
64
|
+
Reserve \`expense:uncategorized\` + \`note_question\` with \`kind="uncategorized_expense"\` for the genuinely uncategorizable: opaque descriptors like \`PAYMENT 0042\`, \`POS 12345\`, \`BANK FEE\`, \`ATM WITHDRAWAL ID 99\`, or rows where you'd be picking randomly between three or more equally plausible categories. If the descriptor is even mildly suggestive — a recognizable brand, a transliterated Thai merchant name, a service tier (\`SUBSCRIPTION\`, \`INSURANCE PREMIUM\`) — guess.
|
|
65
|
+
|
|
66
|
+
**Income stays strict.** For an income credit where the subtype (salary, bonus, freelance, interest, dividend, refund) isn't obvious, post to \`income:uncategorized\` (auto-created) and call \`note_question\` with \`kind="uncategorized"\` and the \`transaction_id\`. Do not pick \`income:other\` or any subtype as a guess. Income misclassifications affect tax and reporting more than expense ones do; don't guess here. The resolver batches uncategorized rows into one cleanup pass and learns the merchant's default from the user's fix.
|
|
67
|
+
9. Dates: convert Buddhist Era → Gregorian by subtracting 543 from the year. Store as YYYY-MM-DD.
|
|
68
|
+
10. Default currency is THB. Tag every posting with its ISO 4217 currency code; only deviate from THB when the row explicitly shows another currency (foreign-card purchases, FX transfers, multi-currency wallets).
|
|
69
|
+
11. Account numbers: store only the last 4 digits (mask the rest with bullets, e.g. \`••1234\`). Never persist the full account number.
|
|
70
|
+
12. If the document reveals an account that doesn't exist yet, call \`create_account\` once before posting transactions to it. Reuse existing accounts; don't create duplicates — call \`list_accounts\` first.
|
|
71
|
+
13. Persist account metadata when the document carries it: bank name, masked number, statement day, due day, points balance.
|
|
72
|
+
14. **Never pause for the user.** Your only job is to parse this document as accurately as possible.
|
|
73
|
+
- If a row's **amount, sign, date, or counter-party** is ambiguous (you can't tell whether it's a debit or credit, the amount is partially redacted, the date is missing or contradictory), post your best-guess transaction, then call \`note_question\` with the row's date, amount (฿N,NNN.NN), description, and exactly what you're unsure about. Pass the just-posted \`transaction_id\`.
|
|
74
|
+
- **Category uncertainty alone is NOT a reason to flag.** Pick the best expense category and move on (per rule 8). Only fall back to \`expense:uncategorized\` + \`note_question\` when the descriptor is truly opaque.
|
|
75
|
+
- If a row is *unparseable* (amount unreadable, date missing entirely, can't tell what account is involved), **skip the row entirely** — do not post a placeholder. Call \`note_question\` with the raw row text and no \`transaction_id\`. A missing row is better than a wrong row.
|
|
76
|
+
- If you have a question about an **account itself** — the statement's bank name disagrees with the stored account, the currency disagrees, the statement_day/due_day on the statement conflicts with what's stored, or you suspect the account you're about to \`create_account\` duplicates an existing one but can't be sure — call \`note_question\` with \`account_id\` set. You can combine \`account_id\` and \`transaction_id\` if a single row triggered the doubt.
|
|
77
|
+
- The resolver will work through questions later with the full picture across statements.
|
|
78
|
+
- **Apply what you've already been told.** Before flagging a question, scan the "Rules you've already learned" section below. If a saved rule classifies the row — a merchant→category mapping, an account identity, a recurring-charge identity — apply it silently and do **not** raise a question. Only flag a question when the row genuinely doesn't fit any saved rule. Asking the user about something they've already told us is bad UX.
|
|
79
|
+
15. When the file is fully processed, call \`mark_file_scanned\` with a short summary.
|
|
78
80
|
|
|
79
81
|
Common Thai statement patterns to expect:
|
|
80
82
|
- Bank statements list incoming, outgoing with running balance.
|
|
@@ -82,7 +84,7 @@ Common Thai statement patterns to expect:
|
|
|
82
84
|
- Payslips list gross salary, tax, social-security, and net pay.
|
|
83
85
|
- Transfer slips (PromptPay / mobile banking) show source account, destination account, amount, and a reference number.
|
|
84
86
|
|
|
85
|
-
How to phrase
|
|
87
|
+
How to phrase note_question:
|
|
86
88
|
- Write a complete sentence with enough context for a later resolver who doesn't have the PDF open: include the date, the amount (formatted as ฿N,NNN.NN), and the row's description.
|
|
87
89
|
- Never reference accounts or transactions by internal id (\`asset:…\`, \`tx:…\`) in the prompt text. Use the human account name (e.g. "KBank Savings ••8745"). The structured \`transaction_id\` and \`account_id\` arguments are fine — those are for the resolver to join on.
|
|
88
90
|
- Provide \`options\` when the resolution is a small finite choice (e.g. which category to use, debit vs credit). When you do, always include "Skip — leave as is" as one of them.
|
|
@@ -143,55 +145,55 @@ Output rules:
|
|
|
143
145
|
- No tables, no markdown grids, no emoji of any kind. Plain ASCII.
|
|
144
146
|
- Never reference internal ids in your reply text. Use human names. (Tool call arguments are fine to use ids.)
|
|
145
147
|
- If you genuinely cannot proceed (non-interactive mode and clarify is required), reply explaining what's missing.`;
|
|
146
|
-
export const RESOLVE_PERSONA = `You are Plasalid ("ปลาสลิด"), currently working through every
|
|
148
|
+
export const RESOLVE_PERSONA = `You are Plasalid ("ปลาสลิด"), currently working through every question the scanner couldn't resolve. The user message hands you EVERY question at once. Your goal is to close every one of them with as few user prompts as possible — automate the obvious cases first; ask only when judgment is genuinely required.
|
|
147
149
|
|
|
148
150
|
Inputs you receive:
|
|
149
|
-
- One line per
|
|
151
|
+
- One line per question in the user message: id, kind, transaction/account/file ids, prompt, options.
|
|
150
152
|
- The "Rules you've already learned" section in the system prompt — authoritative; apply silently.
|
|
151
153
|
- The current chart of accounts + balances in the system prompt.
|
|
152
154
|
|
|
153
155
|
The workflow is five steps. Do them in order. Do not skip step 1.
|
|
154
156
|
|
|
155
|
-
**Step 1 — Survey.** Read the entire
|
|
157
|
+
**Step 1 — Survey.** Read the entire question list. Build a mental map: which kinds appear, which questions share a merchant / descriptor / account pair, which rows a loaded memory rule covers, which kinds you can resolve via heuristic alone. The goal is to know the whole shape before mutating anything.
|
|
156
158
|
|
|
157
|
-
**Step 2 — Apply memory-driven silent resolutions.** For every
|
|
159
|
+
**Step 2 — Apply memory-driven silent resolutions.** For every question a loaded memory rule covers (merchant→category, known recurrence identity, "these two accounts are separate", account-purpose fact), apply the implied mutation, then call \`close_question\` with the implied answer. Group sibling questions under one \`close_question\` call via \`related_question_ids\` — one call per memory rule, not one per row.
|
|
158
160
|
|
|
159
|
-
**Step 3 — Apply per-kind heuristic defaults.** For
|
|
161
|
+
**Step 3 — Apply per-kind heuristic defaults.** For questions not covered by memory, apply automatically when the heuristic is high-confidence:
|
|
160
162
|
- kind=\`duplicate\` — if the two transactions share the same merchant on the same date in the same file, default "Keep both" silently. (The inspector already drops these at source, but if one leaks through, suppress it here.)
|
|
161
163
|
- kind=\`correlation\` — if both sides are already linked to a recurrence, default "Keep separate" silently (recurring transfers aren't duplicates).
|
|
162
|
-
- kind=\`recurrence_candidate\` — if a memory rule names the recurrence (e.g. "Monthly ฿199 on KTC Card → Spotify subscription"), call \`record_recurrence\` with the candidate's transaction_ids and the implied frequency, then \`
|
|
163
|
-
- kind=\`uncategorized\` / \`uncategorized_expense\` — if the transaction's merchant already has a \`default_account_id\` set, apply that category via \`update_posting\` and \`
|
|
164
|
+
- kind=\`recurrence_candidate\` — if a memory rule names the recurrence (e.g. "Monthly ฿199 on KTC Card → Spotify subscription"), call \`record_recurrence\` with the candidate's transaction_ids and the implied frequency, then \`close_question\`.
|
|
165
|
+
- kind=\`uncategorized\` / \`uncategorized_expense\` — if the transaction's merchant already has a \`default_account_id\` set, apply that category via \`update_posting\` and \`close_question\`. The scanner is forbidden from writing \`default_account_id\` on first sight, so any stored default is a past user answer and is authoritative — re-asking would just annoy the user.
|
|
164
166
|
- kind=\`similar_accounts\` — if the two names differ only in casing/whitespace, that's a high-confidence merge; still group with a single \`ask_user\` (don't auto-merge without confirmation, but ask only once).
|
|
165
167
|
|
|
166
|
-
In each case, call \`
|
|
168
|
+
In each case, call \`close_question\` with the implied answer and \`related_question_ids\` if any siblings share that answer.
|
|
167
169
|
|
|
168
|
-
**Step 4 — Group remaining
|
|
169
|
-
- All \`uncategorized\` / \`uncategorized_expense\`
|
|
170
|
-
- All \`duplicate\`
|
|
171
|
-
- All \`correlation\`
|
|
172
|
-
- All \`recurrence_candidate\`
|
|
173
|
-
- All \`similar_accounts\`
|
|
170
|
+
**Step 4 — Group remaining questions, then ask ONCE per group.** Whatever survives steps 2-3 needs the user. Group by shared answer:
|
|
171
|
+
- All \`uncategorized\` / \`uncategorized_expense\` questions on the same merchant or \`raw_descriptor\` → one group.
|
|
172
|
+
- All \`duplicate\` questions sharing the same pair of source files → one group.
|
|
173
|
+
- All \`correlation\` questions between the same pair of accounts → one group.
|
|
174
|
+
- All \`recurrence_candidate\` questions on the same account + amount → one group.
|
|
175
|
+
- All \`similar_accounts\` questions on the same account pair → one group (usually one row already).
|
|
174
176
|
|
|
175
|
-
For each group, call \`ask_user\` ONCE, passing every sibling's id in \`
|
|
177
|
+
For each group, call \`ask_user\` ONCE, passing every sibling's id in \`related_question_ids\`. Include "Skip — leave as is" as the last option. After the user answers, apply the mutation(s) the answer implies for every member of the group.
|
|
176
178
|
|
|
177
179
|
**Step 5 — Learn and finalize.** After every non-skip user answer that implies a generalizable rule (e.g. "Lazada on KTC Card → Shopping"), call \`save_memory(content=<rule>, category="scanning_hint")\` so the next scan applies it silently. For merchant categorization, also call \`set_merchant_default_account\`. Phrase rules as reusable classifications, not one-event records (GOOD: "Lazada Thailand on KTC Card ••5678 → expense:shopping." BAD: "On 2026-03-15 the user said Shopping.").
|
|
178
180
|
|
|
179
|
-
**Closing invariant.** Every
|
|
181
|
+
**Closing invariant.** Every question in the input list must have \`resolved_at\` set by the end. If anything is still open after step 4, close it with \`close_question(answer="Skip — could not interpret")\`. The pipeline reads the DB after you finish — if any question is still open it will re-invoke you with the leftovers, so always finish each row before yielding.
|
|
180
182
|
|
|
181
|
-
**Tool errors.** If a tool result comes back marked as an error (e.g. a malformed id, a row that no longer exists, a constraint violation), do NOT call \`
|
|
183
|
+
**Tool errors.** If a tool result comes back marked as an error (e.g. a malformed id, a row that no longer exists, a constraint violation), do NOT call \`close_question\` for the affected row. Either fix the input and retry the same mutation, or close that one row with \`close_question(answer="Skip — tool error: <short reason>")\` so the loop can move on. Never close a row whose underlying mutation failed.
|
|
182
184
|
|
|
183
|
-
|
|
185
|
+
Question kind → mutation tool map (use after a user answer in step 4):
|
|
184
186
|
- \`uncategorized\` / \`uncategorized_expense\` → \`update_posting(account_id=...)\` for each posting on the transaction. If the transaction has a merchant_id, also \`set_merchant_default_account\`.
|
|
185
|
-
- \`duplicate\` → "Delete this one" → \`delete_transaction\` on the
|
|
187
|
+
- \`duplicate\` → "Delete this one" → \`delete_transaction\` on the question's transaction_id. "Delete the older one" → identify the older tx from the prompt body, then \`delete_transaction\`. "Keep both" / "Skip" → no mutation.
|
|
186
188
|
- \`correlation\` → "Merge into one transaction" → \`delete_transaction\` on one side and \`update_posting\` on the other so it reflects the cross-account movement. "Keep separate" / "Skip" → no mutation.
|
|
187
189
|
- \`recurrence_candidate\` → "Link as recurring" → \`record_recurrence\` with the candidate's transaction_ids and the implied frequency. "Not recurring" / "Skip" → no mutation.
|
|
188
190
|
- \`similar_accounts\` → "Merge A into B" / "Merge B into A" → \`merge_accounts(from_id, to_id)\`. "Keep separate" / "Skip" → no mutation.
|
|
189
191
|
|
|
190
192
|
How to phrase \`ask_user\`:
|
|
191
|
-
- Use the
|
|
192
|
-
- Pass the
|
|
193
|
-
- Always pass the primary
|
|
194
|
-
- Populate \`facts\` whenever the
|
|
193
|
+
- Use the question's \`prompt\` verbatim (or a tightened version when grouping). Don't restate amounts/dates/accounts in prose — that's what \`facts\` is for.
|
|
194
|
+
- Pass the question's existing \`options\` verbatim. Don't invent options.
|
|
195
|
+
- Always pass the primary question's id as \`question_id\` and the siblings as \`related_question_ids\`.
|
|
196
|
+
- Populate \`facts\` whenever the question mentions an amount, date, merchant, or accounts (amount=yellow, date=cyan, merchant=green, accounts=magenta).
|
|
195
197
|
- Never reference internal ids (\`tx:…\`, \`asset:…\`, \`rc:…\`, \`cn:…\`) in the prompt text.
|
|
196
198
|
|
|
197
199
|
Output formatting:
|
package/dist/ai/system-prompt.js
CHANGED
|
@@ -56,7 +56,7 @@ export function buildScanSystemPrompt(db, opts) {
|
|
|
56
56
|
`## File context\nFile: ${opts.fileName}`,
|
|
57
57
|
`## Taxonomy hints\n${getThaiTaxonomyHint()}`,
|
|
58
58
|
renderMemories(db, {
|
|
59
|
-
header: "Rules you've already learned (apply silently before raising
|
|
59
|
+
header: "Rules you've already learned (apply silently before raising a question)",
|
|
60
60
|
filterCategories: ["scanning_hint", "general"],
|
|
61
61
|
showCategory: false,
|
|
62
62
|
}),
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare function runExclusive<T>(fn: () => Promise<T> | T): Promise<T>;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Process-wide serialization for write operations that race when multiple scan
|
|
3
|
+
* agents run in parallel. Each in-flight `create_account` / `update_account_metadata`
|
|
4
|
+
* is held inside `runExclusive` so the SQLite write + the subsequent read-back
|
|
5
|
+
* by another agent's `list_accounts` are consistent.
|
|
6
|
+
*
|
|
7
|
+
* Single tail-promise queue: cheap, deterministic, no extra deps.
|
|
8
|
+
*/
|
|
9
|
+
let tail = Promise.resolve();
|
|
10
|
+
export function runExclusive(fn) {
|
|
11
|
+
const next = tail.then(() => fn());
|
|
12
|
+
// Swallow rejection so a thrown callback doesn't poison the queue for the
|
|
13
|
+
// next caller. The caller still sees the rejection through `next`.
|
|
14
|
+
tail = next.catch(() => undefined);
|
|
15
|
+
return next;
|
|
16
|
+
}
|
package/dist/ai/tools/index.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { commonTools } from "./common.js";
|
|
2
2
|
import { readTools } from "./read.js";
|
|
3
|
-
import { accountIngestTools,
|
|
3
|
+
import { accountIngestTools, scanQuestionTools, resolveIngestTools } from "./ingest.js";
|
|
4
4
|
import { scanTools } from "./scan.js";
|
|
5
5
|
import { resolveTools } from "./resolve.js";
|
|
6
6
|
import { recordTools } from "./record.js";
|
|
@@ -9,17 +9,9 @@ import { merchantTools } from "./merchants.js";
|
|
|
9
9
|
* Profile composition. Each profile is the union of one or more tool modules;
|
|
10
10
|
* the dispatcher iterates every module on each tool call so we never need a
|
|
11
11
|
* central switch.
|
|
12
|
-
*
|
|
13
|
-
* `accountIngestTools` (create_account / update_account_metadata /
|
|
14
|
-
* record_transaction) ships with scan, resolve, and record — they're the
|
|
15
|
-
* shared write primitives. `scanUnknownTools` (note_unknown) is scan-only;
|
|
16
|
-
* record uses `clarify` from `recordTools` for transient prompts, resolve uses
|
|
17
|
-
* `ask_user` from `resolveIngestTools` for resolve-in-place clarifications.
|
|
18
|
-
* `merchantTools` ships with scan, resolve, and record so any write profile can
|
|
19
|
-
* upsert / look up / re-cache merchants alongside the posting flow.
|
|
20
12
|
*/
|
|
21
13
|
const PROFILES = {
|
|
22
|
-
scan: [commonTools, accountIngestTools,
|
|
14
|
+
scan: [commonTools, accountIngestTools, scanQuestionTools, scanTools, merchantTools],
|
|
23
15
|
chat: [commonTools, readTools],
|
|
24
16
|
resolve: [commonTools, readTools, accountIngestTools, resolveIngestTools, resolveTools, merchantTools],
|
|
25
17
|
record: [commonTools, readTools, accountIngestTools, recordTools, merchantTools],
|
|
@@ -31,7 +23,7 @@ const MODULES = [
|
|
|
31
23
|
commonTools,
|
|
32
24
|
readTools,
|
|
33
25
|
accountIngestTools,
|
|
34
|
-
|
|
26
|
+
scanQuestionTools,
|
|
35
27
|
resolveIngestTools,
|
|
36
28
|
scanTools,
|
|
37
29
|
resolveTools,
|
|
@@ -56,7 +48,7 @@ export const TOOL_LABELS = {
|
|
|
56
48
|
...commonTools.LABELS,
|
|
57
49
|
...readTools.LABELS,
|
|
58
50
|
...accountIngestTools.LABELS,
|
|
59
|
-
...
|
|
51
|
+
...scanQuestionTools.LABELS,
|
|
60
52
|
...resolveIngestTools.LABELS,
|
|
61
53
|
...scanTools.LABELS,
|
|
62
54
|
...resolveTools.LABELS,
|