plasalid 0.5.7 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/ai/agent.d.ts +1 -3
- package/dist/ai/agent.js +12 -12
- package/dist/ai/errors.d.ts +16 -0
- package/dist/ai/errors.js +47 -0
- package/dist/ai/providers/anthropic.js +10 -4
- package/dist/ai/providers/openai.js +70 -56
- package/dist/ai/redactor.js +77 -51
- package/dist/cli/ink/hooks/useTextInput.js +60 -66
- package/dist/scanner/concurrency.d.ts +10 -7
- package/dist/scanner/concurrency.js +3 -16
- package/dist/scanner/decrypt_queue.js +56 -38
- package/dist/scanner/pdf-unlock.js +3 -1
- package/dist/scanner/pipeline.js +14 -16
- package/package.json +1 -1
package/dist/ai/agent.d.ts
CHANGED
|
@@ -2,15 +2,13 @@ import type Database from "libsql";
|
|
|
2
2
|
import { type ScanPromptOptions, type ReviewPromptOptions, type RecordPromptOptions } from "./system-prompt.js";
|
|
3
3
|
import { type AgentExecutionContext } from "./tools/index.js";
|
|
4
4
|
import type { NormalizedMessage } from "./provider.js";
|
|
5
|
+
export { AbortedError } from "./errors.js";
|
|
5
6
|
export type ProgressCallback = (event: {
|
|
6
7
|
phase: "tool" | "responding";
|
|
7
8
|
toolName?: string;
|
|
8
9
|
toolCount: number;
|
|
9
10
|
elapsedMs: number;
|
|
10
11
|
}) => void;
|
|
11
|
-
export declare class AbortedError extends Error {
|
|
12
|
-
constructor();
|
|
13
|
-
}
|
|
14
12
|
/**
|
|
15
13
|
* Conversational chat used by the Ink TUI. Reuses conversation_history for context
|
|
16
14
|
* continuity, redacts PII on the way out, restores it on the way in for display.
|
package/dist/ai/agent.js
CHANGED
|
@@ -4,14 +4,10 @@ import { getToolDefinitions, executeTool } from "./tools/index.js";
|
|
|
4
4
|
import { getConversationHistory, saveMessage } from "./memory.js";
|
|
5
5
|
import { redact, unredact } from "./redactor.js";
|
|
6
6
|
import { createProvider } from "./providers/index.js";
|
|
7
|
+
import { AbortedError, ApiAuthError, ApiError, RateLimitError, } from "./errors.js";
|
|
8
|
+
export { AbortedError } from "./errors.js";
|
|
7
9
|
const provider = createProvider();
|
|
8
10
|
const MAX_TOOL_STEPS = 20;
|
|
9
|
-
export class AbortedError extends Error {
|
|
10
|
-
constructor() {
|
|
11
|
-
super("aborted");
|
|
12
|
-
this.name = "AbortedError";
|
|
13
|
-
}
|
|
14
|
-
}
|
|
15
11
|
async function runAgent({ db, systemPrompt, tools, initialMessages, agentCtx, onProgress, signal, maxToolSteps, }) {
|
|
16
12
|
const messages = [...initialMessages];
|
|
17
13
|
const useThinking = config.thinkingBudget > 0 && provider.supportsThinking;
|
|
@@ -102,17 +98,21 @@ export async function handleChatMessage(db, userMessage, onProgress, signal) {
|
|
|
102
98
|
return text || "I couldn't formulate a response. Could you rephrase?";
|
|
103
99
|
}
|
|
104
100
|
catch (error) {
|
|
105
|
-
if (error instanceof AbortedError
|
|
101
|
+
if (error instanceof AbortedError)
|
|
102
|
+
throw error;
|
|
103
|
+
if (signal?.aborted)
|
|
106
104
|
throw new AbortedError();
|
|
107
|
-
|
|
108
|
-
if (error.status === 401 || error.status === 403) {
|
|
105
|
+
if (error instanceof ApiAuthError) {
|
|
109
106
|
return "API key was rejected. Run `plasalid setup` to reconfigure your credentials.";
|
|
110
107
|
}
|
|
111
|
-
if (error
|
|
108
|
+
if (error instanceof RateLimitError) {
|
|
112
109
|
return "Rate limited. Wait a moment and try again.";
|
|
113
110
|
}
|
|
114
|
-
|
|
115
|
-
|
|
111
|
+
if (error instanceof ApiError) {
|
|
112
|
+
console.error("AI error:", `API error (${error.status ?? "?"}): ${error.message}`);
|
|
113
|
+
return "Sorry, I had trouble processing that. Could you try again?";
|
|
114
|
+
}
|
|
115
|
+
console.error("AI error:", error.message || "internal error");
|
|
116
116
|
return "Sorry, I had trouble processing that. Could you try again?";
|
|
117
117
|
}
|
|
118
118
|
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export declare class AbortedError extends Error {
|
|
2
|
+
constructor();
|
|
3
|
+
}
|
|
4
|
+
export declare class ApiAuthError extends Error {
|
|
5
|
+
readonly status: number;
|
|
6
|
+
constructor(status: number);
|
|
7
|
+
}
|
|
8
|
+
export declare class RateLimitError extends Error {
|
|
9
|
+
readonly status = 429;
|
|
10
|
+
constructor();
|
|
11
|
+
}
|
|
12
|
+
export declare class ApiError extends Error {
|
|
13
|
+
readonly status: number | undefined;
|
|
14
|
+
constructor(status: number | undefined, message: string);
|
|
15
|
+
}
|
|
16
|
+
export declare function classifyProviderError(err: unknown, signal?: AbortSignal): never;
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
export class AbortedError extends Error {
|
|
2
|
+
constructor() {
|
|
3
|
+
super("aborted");
|
|
4
|
+
this.name = "AbortedError";
|
|
5
|
+
}
|
|
6
|
+
}
|
|
7
|
+
export class ApiAuthError extends Error {
|
|
8
|
+
status;
|
|
9
|
+
constructor(status) {
|
|
10
|
+
super(`auth ${status}`);
|
|
11
|
+
this.status = status;
|
|
12
|
+
this.name = "ApiAuthError";
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
export class RateLimitError extends Error {
|
|
16
|
+
status = 429;
|
|
17
|
+
constructor() {
|
|
18
|
+
super("rate limited");
|
|
19
|
+
this.name = "RateLimitError";
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
export class ApiError extends Error {
|
|
23
|
+
status;
|
|
24
|
+
constructor(status, message) {
|
|
25
|
+
super(message);
|
|
26
|
+
this.status = status;
|
|
27
|
+
this.name = "ApiError";
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
export function classifyProviderError(err, signal) {
|
|
31
|
+
if (err instanceof AbortedError ||
|
|
32
|
+
err instanceof ApiAuthError ||
|
|
33
|
+
err instanceof RateLimitError ||
|
|
34
|
+
err instanceof ApiError) {
|
|
35
|
+
throw err;
|
|
36
|
+
}
|
|
37
|
+
const e = (err ?? {});
|
|
38
|
+
if (signal?.aborted || e.name === "AbortError")
|
|
39
|
+
throw new AbortedError();
|
|
40
|
+
if (e.status === 401 || e.status === 403)
|
|
41
|
+
throw new ApiAuthError(e.status);
|
|
42
|
+
if (e.status === 429)
|
|
43
|
+
throw new RateLimitError();
|
|
44
|
+
if (typeof e.status === "number")
|
|
45
|
+
throw new ApiError(e.status, e.message ?? "");
|
|
46
|
+
throw new ApiError(undefined, e.message ?? "internal error");
|
|
47
|
+
}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import Anthropic from "@anthropic-ai/sdk";
|
|
2
|
+
import { classifyProviderError } from "../errors.js";
|
|
2
3
|
export function createAnthropicProvider(opts) {
|
|
3
4
|
const client = new Anthropic(opts.baseURL
|
|
4
5
|
? { apiKey: opts.apiKey, baseURL: opts.baseURL }
|
|
@@ -17,10 +18,15 @@ export function createAnthropicProvider(opts) {
|
|
|
17
18
|
if (params.thinking) {
|
|
18
19
|
apiParams.thinking = params.thinking;
|
|
19
20
|
}
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
21
|
+
let response;
|
|
22
|
+
try {
|
|
23
|
+
response = await client.messages.create(apiParams, {
|
|
24
|
+
signal: params.signal,
|
|
25
|
+
});
|
|
26
|
+
}
|
|
27
|
+
catch (e) {
|
|
28
|
+
classifyProviderError(e, params.signal);
|
|
29
|
+
}
|
|
24
30
|
const content = [];
|
|
25
31
|
for (const block of response.content) {
|
|
26
32
|
if (block.type === "thinking")
|
|
@@ -1,4 +1,30 @@
|
|
|
1
1
|
import OpenAI from "openai";
|
|
2
|
+
import { classifyProviderError } from "../errors.js";
|
|
3
|
+
function isMaxTokensRejection(e) {
|
|
4
|
+
const err = e;
|
|
5
|
+
return err.status === 400 && (err.message?.includes("max_tokens") ?? false);
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* Some OpenAI-compatible endpoints (older models, Ollama, vLLM) accept `max_tokens`;
|
|
9
|
+
* newer OpenAI models require `max_completion_tokens`. Try the former, fall back on a
|
|
10
|
+
* 400 that explicitly names the parameter.
|
|
11
|
+
*/
|
|
12
|
+
async function createCompletionWithTokenFallback(client, body, options) {
|
|
13
|
+
const base = {
|
|
14
|
+
model: body.model,
|
|
15
|
+
messages: body.messages,
|
|
16
|
+
tools: body.tools,
|
|
17
|
+
};
|
|
18
|
+
try {
|
|
19
|
+
return await client.chat.completions.create({ ...base, max_tokens: body.maxTokens }, options);
|
|
20
|
+
}
|
|
21
|
+
catch (e) {
|
|
22
|
+
if (isMaxTokensRejection(e)) {
|
|
23
|
+
return await client.chat.completions.create({ ...base, max_completion_tokens: body.maxTokens }, options);
|
|
24
|
+
}
|
|
25
|
+
throw e;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
2
28
|
export function createOpenAICompatibleProvider(opts) {
|
|
3
29
|
const client = new OpenAI({
|
|
4
30
|
apiKey: opts.apiKey,
|
|
@@ -8,63 +34,54 @@ export function createOpenAICompatibleProvider(opts) {
|
|
|
8
34
|
name: "openai-compatible",
|
|
9
35
|
supportsThinking: false,
|
|
10
36
|
async sendMessage(params) {
|
|
11
|
-
const messages = convertMessages(params.system, params.messages);
|
|
12
37
|
const tools = convertTools(params.tools);
|
|
13
|
-
|
|
14
|
-
|
|
38
|
+
const body = {
|
|
39
|
+
model: params.model,
|
|
40
|
+
maxTokens: params.maxTokens,
|
|
41
|
+
messages: convertMessages(params.system, params.messages),
|
|
42
|
+
tools: tools.length > 0 ? tools : undefined,
|
|
43
|
+
};
|
|
15
44
|
let response;
|
|
16
45
|
try {
|
|
17
|
-
response = await client.
|
|
18
|
-
model: params.model,
|
|
19
|
-
max_tokens: params.maxTokens,
|
|
20
|
-
messages,
|
|
21
|
-
tools: tools.length > 0 ? tools : undefined,
|
|
22
|
-
}, { signal: params.signal });
|
|
46
|
+
response = await createCompletionWithTokenFallback(client, body, { signal: params.signal });
|
|
23
47
|
}
|
|
24
48
|
catch (e) {
|
|
25
|
-
|
|
26
|
-
response = await client.chat.completions.create({
|
|
27
|
-
model: params.model,
|
|
28
|
-
max_completion_tokens: params.maxTokens,
|
|
29
|
-
messages,
|
|
30
|
-
tools: tools.length > 0 ? tools : undefined,
|
|
31
|
-
}, { signal: params.signal });
|
|
32
|
-
}
|
|
33
|
-
else {
|
|
34
|
-
throw e;
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
const choice = response.choices[0];
|
|
38
|
-
if (!choice) {
|
|
39
|
-
return { content: [], stopReason: "end_turn" };
|
|
40
|
-
}
|
|
41
|
-
const content = [];
|
|
42
|
-
if (choice.message.content) {
|
|
43
|
-
content.push({ type: "text", text: choice.message.content });
|
|
49
|
+
classifyProviderError(e, params.signal);
|
|
44
50
|
}
|
|
45
|
-
|
|
46
|
-
for (const tc of choice.message.tool_calls) {
|
|
47
|
-
if (tc.type !== "function")
|
|
48
|
-
continue;
|
|
49
|
-
content.push({
|
|
50
|
-
type: "tool_use",
|
|
51
|
-
id: tc.id,
|
|
52
|
-
name: tc.function.name,
|
|
53
|
-
input: parseArguments(tc.function.arguments),
|
|
54
|
-
});
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
const hasToolCalls = content.some((b) => b.type === "tool_use");
|
|
58
|
-
return {
|
|
59
|
-
content,
|
|
60
|
-
stopReason: hasToolCalls ? "tool_use" : "end_turn",
|
|
61
|
-
usage: response.usage
|
|
62
|
-
? { input_tokens: response.usage.prompt_tokens, output_tokens: response.usage.completion_tokens }
|
|
63
|
-
: undefined,
|
|
64
|
-
};
|
|
51
|
+
return normalizeResponse(response);
|
|
65
52
|
},
|
|
66
53
|
};
|
|
67
54
|
}
|
|
55
|
+
function normalizeResponse(response) {
|
|
56
|
+
const choice = response.choices[0];
|
|
57
|
+
if (!choice) {
|
|
58
|
+
return { content: [], stopReason: "end_turn" };
|
|
59
|
+
}
|
|
60
|
+
const content = [];
|
|
61
|
+
if (choice.message.content) {
|
|
62
|
+
content.push({ type: "text", text: choice.message.content });
|
|
63
|
+
}
|
|
64
|
+
if (choice.message.tool_calls) {
|
|
65
|
+
for (const tc of choice.message.tool_calls) {
|
|
66
|
+
if (tc.type !== "function")
|
|
67
|
+
continue;
|
|
68
|
+
content.push({
|
|
69
|
+
type: "tool_use",
|
|
70
|
+
id: tc.id,
|
|
71
|
+
name: tc.function.name,
|
|
72
|
+
input: parseArguments(tc.function.arguments),
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
const hasToolCalls = content.some((b) => b.type === "tool_use");
|
|
77
|
+
return {
|
|
78
|
+
content,
|
|
79
|
+
stopReason: hasToolCalls ? "tool_use" : "end_turn",
|
|
80
|
+
usage: response.usage
|
|
81
|
+
? { input_tokens: response.usage.prompt_tokens, output_tokens: response.usage.completion_tokens }
|
|
82
|
+
: undefined,
|
|
83
|
+
};
|
|
84
|
+
}
|
|
68
85
|
function convertMessages(system, messages) {
|
|
69
86
|
const result = [
|
|
70
87
|
{ role: "system", content: system },
|
|
@@ -104,14 +121,11 @@ function convertMessages(system, messages) {
|
|
|
104
121
|
.join("\n");
|
|
105
122
|
const toolCalls = blocks
|
|
106
123
|
.filter((b) => b.type === "tool_use")
|
|
107
|
-
.map((
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
function: { name: tu.name, arguments: JSON.stringify(tu.input) },
|
|
113
|
-
};
|
|
114
|
-
});
|
|
124
|
+
.map((tu) => ({
|
|
125
|
+
id: tu.id,
|
|
126
|
+
type: "function",
|
|
127
|
+
function: { name: tu.name, arguments: JSON.stringify(tu.input) },
|
|
128
|
+
}));
|
|
115
129
|
result.push({
|
|
116
130
|
role: "assistant",
|
|
117
131
|
content: textParts || null,
|
package/dist/ai/redactor.js
CHANGED
|
@@ -1,75 +1,101 @@
|
|
|
1
1
|
import { config } from "../config.js";
|
|
2
2
|
import { readContext } from "./context.js";
|
|
3
|
+
const SECTION_RULES = [
|
|
4
|
+
{
|
|
5
|
+
heading: "Family",
|
|
6
|
+
token: "[PARTNER]",
|
|
7
|
+
stripParen: true,
|
|
8
|
+
skipIfUser: true,
|
|
9
|
+
patterns: [
|
|
10
|
+
/^(?:partner|spouse|wife|husband|child|kid|son|daughter|dependent)[:\s]+(.+)/i,
|
|
11
|
+
/^([\p{Lu}\p{Lo}][\p{L}\s]+)/u,
|
|
12
|
+
],
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
heading: "Income",
|
|
16
|
+
token: "[EMPLOYER]",
|
|
17
|
+
patterns: [
|
|
18
|
+
/(?:employer|works? (?:at|for)|employed (?:at|by))[:\s]+([A-Z][\w\s&.,-]+?)(?:\s*[-–—|,;(\n]|$)/i,
|
|
19
|
+
/\bfrom ([A-Z][A-Za-z\s&.,-]+?)(?:\s*[-–—|,;(\n]|$)/,
|
|
20
|
+
/\bat ([A-Z][A-Za-z\s&.,-]+?)(?:\s*[-–—|,;(\n]|$)/,
|
|
21
|
+
],
|
|
22
|
+
},
|
|
23
|
+
];
|
|
24
|
+
// Patterns for numeric / identifier PII commonly found in Thai financial data.
|
|
25
|
+
const NUMERIC_PII_PATTERNS = [
|
|
26
|
+
// Thai national ID with dashes: 1-2345-67890-12-3
|
|
27
|
+
[/\b\d-\d{4}-\d{5}-\d{2}-\d\b/g, "[NATID]"],
|
|
28
|
+
// Thai national ID without dashes (13 digits) — must precede the generic ACCT pattern.
|
|
29
|
+
[/\b\d{13}\b/g, "[NATID]"],
|
|
30
|
+
// Thai mobile numbers: 0[689]xxxxxxxx (10 digits starting 06/08/09)
|
|
31
|
+
[/\b0[689]\d{8}\b/g, "[PHONE]"],
|
|
32
|
+
// 16-digit credit card (with optional separators)
|
|
33
|
+
[/\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, "[CARD]"],
|
|
34
|
+
// 10–12 digit account / routing numbers at a word boundary
|
|
35
|
+
[/\b\d{10,12}\b(?=\s|$|[,.])/g, "[ACCT]"],
|
|
36
|
+
];
|
|
37
|
+
function extractSectionLines(context, heading) {
|
|
38
|
+
const re = new RegExp(`## ${heading}\\n([\\s\\S]*?)(?=\\n##|$)`);
|
|
39
|
+
const match = context.match(re);
|
|
40
|
+
if (!match)
|
|
41
|
+
return [];
|
|
42
|
+
return match[1]
|
|
43
|
+
.split("\n")
|
|
44
|
+
.filter((l) => l.trim().startsWith("-"))
|
|
45
|
+
.map((l) => l.replace(/^-\s*/, "").trim())
|
|
46
|
+
.filter((text) => text.length > 0 && !text.startsWith("("));
|
|
47
|
+
}
|
|
48
|
+
function applyRule(rule, context, userName, push) {
|
|
49
|
+
for (const line of extractSectionLines(context, rule.heading)) {
|
|
50
|
+
if (rule.skipIfUser && line.toLowerCase() === userName.toLowerCase())
|
|
51
|
+
continue;
|
|
52
|
+
for (const pattern of rule.patterns) {
|
|
53
|
+
const match = line.match(pattern);
|
|
54
|
+
if (!match)
|
|
55
|
+
continue;
|
|
56
|
+
let name = match[1].trim();
|
|
57
|
+
if (rule.stripParen)
|
|
58
|
+
name = name.replace(/\s*\(.*\)/, "").trim();
|
|
59
|
+
if (!name)
|
|
60
|
+
break;
|
|
61
|
+
if (rule.skipIfUser && name.toLowerCase() === userName.toLowerCase())
|
|
62
|
+
break;
|
|
63
|
+
push(name, rule.token);
|
|
64
|
+
break;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
}
|
|
3
68
|
function buildRedactions() {
|
|
4
69
|
const entries = [];
|
|
5
70
|
const seen = new Set();
|
|
6
|
-
|
|
71
|
+
const push = (real, token) => {
|
|
7
72
|
const trimmed = real.trim();
|
|
8
|
-
if (trimmed.length < 2
|
|
73
|
+
if (trimmed.length < 2)
|
|
74
|
+
return;
|
|
75
|
+
const key = trimmed.toLowerCase();
|
|
76
|
+
if (seen.has(key))
|
|
9
77
|
return;
|
|
10
|
-
seen.add(
|
|
78
|
+
seen.add(key);
|
|
11
79
|
entries.push({ real: trimmed, token });
|
|
12
|
-
}
|
|
80
|
+
};
|
|
13
81
|
const userName = config.userName;
|
|
14
82
|
if (userName && userName !== "User") {
|
|
15
|
-
|
|
83
|
+
push(userName, "[USER]");
|
|
16
84
|
const parts = userName.split(/\s+/);
|
|
17
85
|
if (parts.length > 1) {
|
|
18
|
-
|
|
19
|
-
|
|
86
|
+
push(parts[0], "[USER_FIRST]");
|
|
87
|
+
push(parts[parts.length - 1], "[USER_LAST]");
|
|
20
88
|
}
|
|
21
89
|
}
|
|
22
90
|
const context = readContext();
|
|
23
91
|
if (context) {
|
|
24
|
-
const
|
|
25
|
-
|
|
26
|
-
const lines = familyMatch[1].split("\n").filter(l => l.trim().startsWith("-"));
|
|
27
|
-
for (const line of lines) {
|
|
28
|
-
const text = line.replace(/^-\s*/, "").trim();
|
|
29
|
-
if (!text || text.startsWith("(") || text.toLowerCase() === userName.toLowerCase())
|
|
30
|
-
continue;
|
|
31
|
-
const nameMatch = text.match(/^(?:partner|spouse|wife|husband|child|kid|son|daughter|dependent)[:\s]+(.+)/i)
|
|
32
|
-
|| text.match(/^([\p{Lu}\p{Lo}][\p{L}\s]+)/u);
|
|
33
|
-
if (nameMatch) {
|
|
34
|
-
const name = nameMatch[1].replace(/\s*\(.*\)/, "").trim();
|
|
35
|
-
if (name && name.toLowerCase() !== userName.toLowerCase()) {
|
|
36
|
-
add(name, "[PARTNER]");
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
const incomeMatch = context.match(/## Income\n([\s\S]*?)(?=\n##|$)/);
|
|
42
|
-
if (incomeMatch) {
|
|
43
|
-
const lines = incomeMatch[1].split("\n").filter(l => l.trim().startsWith("-"));
|
|
44
|
-
for (const line of lines) {
|
|
45
|
-
const text = line.replace(/^-\s*/, "").trim();
|
|
46
|
-
if (!text || text.startsWith("("))
|
|
47
|
-
continue;
|
|
48
|
-
const employerMatch = text.match(/(?:employer|works? (?:at|for)|employed (?:at|by))[:\s]+([A-Z][\w\s&.,-]+?)(?:\s*[-–—|,;(\n]|$)/i)
|
|
49
|
-
|| text.match(/\bfrom ([A-Z][A-Za-z\s&.,-]+?)(?:\s*[-–—|,;(\n]|$)/)
|
|
50
|
-
|| text.match(/\bat ([A-Z][A-Za-z\s&.,-]+?)(?:\s*[-–—|,;(\n]|$)/);
|
|
51
|
-
if (employerMatch) {
|
|
52
|
-
add(employerMatch[1].trim(), "[EMPLOYER]");
|
|
53
|
-
}
|
|
54
|
-
}
|
|
92
|
+
for (const rule of SECTION_RULES) {
|
|
93
|
+
applyRule(rule, context, userName, push);
|
|
55
94
|
}
|
|
56
95
|
}
|
|
57
96
|
entries.sort((a, b) => b.real.length - a.real.length);
|
|
58
97
|
return entries;
|
|
59
98
|
}
|
|
60
|
-
// Patterns for numeric / identifier PII commonly found in Thai financial data.
|
|
61
|
-
const NUMERIC_PII_PATTERNS = [
|
|
62
|
-
// Thai national ID with dashes: 1-2345-67890-12-3
|
|
63
|
-
[/\b\d-\d{4}-\d{5}-\d{2}-\d\b/g, "[NATID]"],
|
|
64
|
-
// Thai national ID without dashes (13 digits) — must precede the generic ACCT pattern.
|
|
65
|
-
[/\b\d{13}\b/g, "[NATID]"],
|
|
66
|
-
// Thai mobile numbers: 0[689]xxxxxxxx (10 digits starting 06/08/09)
|
|
67
|
-
[/\b0[689]\d{8}\b/g, "[PHONE]"],
|
|
68
|
-
// 16-digit credit card (with optional separators)
|
|
69
|
-
[/\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b/g, "[CARD]"],
|
|
70
|
-
// 10–12 digit account / routing numbers at a word boundary
|
|
71
|
-
[/\b\d{10,12}\b(?=\s|$|[,.])/g, "[ACCT]"],
|
|
72
|
-
];
|
|
73
99
|
export function redact(text) {
|
|
74
100
|
const redactions = buildRedactions();
|
|
75
101
|
let result = text;
|
|
@@ -58,6 +58,22 @@ function insertText(buf, text) {
|
|
|
58
58
|
col: last.length,
|
|
59
59
|
};
|
|
60
60
|
}
|
|
61
|
+
function moveToBol(buf) {
|
|
62
|
+
return { ...buf, col: 0 };
|
|
63
|
+
}
|
|
64
|
+
function moveToEol(buf) {
|
|
65
|
+
return { ...buf, col: buf.lines[buf.row].length };
|
|
66
|
+
}
|
|
67
|
+
function killToEol(buf) {
|
|
68
|
+
const lines = [...buf.lines];
|
|
69
|
+
lines[buf.row] = lines[buf.row].slice(0, buf.col);
|
|
70
|
+
return { lines, row: buf.row, col: buf.col };
|
|
71
|
+
}
|
|
72
|
+
function killToBol(buf) {
|
|
73
|
+
const lines = [...buf.lines];
|
|
74
|
+
lines[buf.row] = lines[buf.row].slice(buf.col);
|
|
75
|
+
return { lines, row: buf.row, col: 0 };
|
|
76
|
+
}
|
|
61
77
|
function backspace(buf) {
|
|
62
78
|
if (buf.col > 0) {
|
|
63
79
|
const lines = [...buf.lines];
|
|
@@ -128,6 +144,43 @@ function moveWordRight(buf) {
|
|
|
128
144
|
function toString(buf) {
|
|
129
145
|
return buf.lines.join("\n");
|
|
130
146
|
}
|
|
147
|
+
/** Pure mutators dispatched by single keycode. Side-effecting keys (Ctrl+C, Ctrl+D,
|
|
148
|
+
* Enter, ESC) stay inline in handleChunk because they call host callbacks or open
|
|
149
|
+
* a sub-state-machine for escape sequences. */
|
|
150
|
+
const CTRL_HANDLERS = {
|
|
151
|
+
[CTRL_A]: moveToBol,
|
|
152
|
+
[CTRL_E]: moveToEol,
|
|
153
|
+
[CTRL_K]: killToEol,
|
|
154
|
+
[CTRL_U]: killToBol,
|
|
155
|
+
[CTRL_W]: deleteWordLeft,
|
|
156
|
+
[BACKSPACE]: backspace,
|
|
157
|
+
[BACKSPACE_ALT]: backspace,
|
|
158
|
+
};
|
|
159
|
+
/** CSI sequences: ESC [ ... <final>. `wordMod` runs when the parameter is one of
|
|
160
|
+
* the word-step modifiers (Option/Ctrl/Cmd) — `1;3`, `1;5`, `1;9`. */
|
|
161
|
+
const CSI_HANDLERS = {
|
|
162
|
+
D: { plain: moveLeft, wordMod: moveWordLeft },
|
|
163
|
+
C: { plain: moveRight, wordMod: moveWordRight },
|
|
164
|
+
A: { plain: moveUp, wordMod: moveUp },
|
|
165
|
+
B: { plain: moveDown, wordMod: moveDown },
|
|
166
|
+
H: { plain: moveToBol, wordMod: moveToBol },
|
|
167
|
+
F: { plain: moveToEol, wordMod: moveToEol },
|
|
168
|
+
};
|
|
169
|
+
/** Kitty keyboard protocol: ESC [ codepoint ; modifier u */
|
|
170
|
+
function handleKittyKey(seq, apply) {
|
|
171
|
+
const parts = seq.split(";");
|
|
172
|
+
const codepoint = parseInt(parts[0], 10);
|
|
173
|
+
const mod = parts.length > 1 ? parseInt(parts[1], 10) : 1;
|
|
174
|
+
const hasShift = ((mod - 1) & 1) !== 0;
|
|
175
|
+
const hasCtrl = ((mod - 1) & 4) !== 0;
|
|
176
|
+
const hasCmd = ((mod - 1) & 8) !== 0;
|
|
177
|
+
if (codepoint === 13 && hasShift) {
|
|
178
|
+
apply((b) => insertText(b, "\n"));
|
|
179
|
+
}
|
|
180
|
+
else if (codepoint === 127 && (hasCmd || hasCtrl)) {
|
|
181
|
+
apply(killToBol);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
131
184
|
/**
|
|
132
185
|
* Raw-stdin driven keystroke state machine that owns a multiline buffer and
|
|
133
186
|
* exposes its current state plus reset/insert helpers. Purely stateful — Ink
|
|
@@ -221,42 +274,15 @@ export function useTextInput(opts) {
|
|
|
221
274
|
}
|
|
222
275
|
continue;
|
|
223
276
|
}
|
|
224
|
-
if (code === CTRL_A) {
|
|
225
|
-
apply(b => ({ ...b, col: 0 }));
|
|
226
|
-
continue;
|
|
227
|
-
}
|
|
228
|
-
if (code === CTRL_E) {
|
|
229
|
-
apply(b => ({ ...b, col: b.lines[b.row].length }));
|
|
230
|
-
continue;
|
|
231
|
-
}
|
|
232
|
-
if (code === CTRL_K) {
|
|
233
|
-
apply(b => {
|
|
234
|
-
const lines = [...b.lines];
|
|
235
|
-
lines[b.row] = lines[b.row].slice(0, b.col);
|
|
236
|
-
return { lines, row: b.row, col: b.col };
|
|
237
|
-
});
|
|
238
|
-
continue;
|
|
239
|
-
}
|
|
240
|
-
if (code === CTRL_U) {
|
|
241
|
-
apply(b => {
|
|
242
|
-
const lines = [...b.lines];
|
|
243
|
-
lines[b.row] = lines[b.row].slice(b.col);
|
|
244
|
-
return { lines, row: b.row, col: 0 };
|
|
245
|
-
});
|
|
246
|
-
continue;
|
|
247
|
-
}
|
|
248
|
-
if (code === CTRL_W) {
|
|
249
|
-
apply(deleteWordLeft);
|
|
250
|
-
continue;
|
|
251
|
-
}
|
|
252
277
|
if (code === ENTER) {
|
|
253
278
|
optsRef.current.onSubmit(toString(bufferRef.current));
|
|
254
279
|
setBuffer(EMPTY_BUFFER);
|
|
255
280
|
optsRef.current.onChange?.(EMPTY_BUFFER);
|
|
256
281
|
continue;
|
|
257
282
|
}
|
|
258
|
-
|
|
259
|
-
|
|
283
|
+
const ctrlHandler = CTRL_HANDLERS[code];
|
|
284
|
+
if (ctrlHandler) {
|
|
285
|
+
apply(ctrlHandler);
|
|
260
286
|
continue;
|
|
261
287
|
}
|
|
262
288
|
if (code === ESC) {
|
|
@@ -288,44 +314,12 @@ export function useTextInput(opts) {
|
|
|
288
314
|
if (i < chunk.length) {
|
|
289
315
|
const final = chunk[i];
|
|
290
316
|
const isWordMod = seq === "1;3" || seq === "1;5" || seq === "1;9";
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
else if (final === "C") {
|
|
295
|
-
apply(isWordMod ? moveWordRight : moveRight);
|
|
296
|
-
}
|
|
297
|
-
else if (final === "A") {
|
|
298
|
-
apply(moveUp);
|
|
299
|
-
}
|
|
300
|
-
else if (final === "B") {
|
|
301
|
-
apply(moveDown);
|
|
302
|
-
}
|
|
303
|
-
else if (final === "H") {
|
|
304
|
-
apply(b => ({ ...b, col: 0 }));
|
|
305
|
-
}
|
|
306
|
-
else if (final === "F") {
|
|
307
|
-
apply(b => ({ ...b, col: b.lines[b.row].length }));
|
|
317
|
+
const csi = CSI_HANDLERS[final];
|
|
318
|
+
if (csi) {
|
|
319
|
+
apply(isWordMod ? csi.wordMod : csi.plain);
|
|
308
320
|
}
|
|
309
321
|
else if (final === "u") {
|
|
310
|
-
|
|
311
|
-
const parts = seq.split(";");
|
|
312
|
-
const codepoint = parseInt(parts[0], 10);
|
|
313
|
-
const mod = parts.length > 1 ? parseInt(parts[1], 10) : 1;
|
|
314
|
-
const hasShift = ((mod - 1) & 1) !== 0;
|
|
315
|
-
const hasCtrl = ((mod - 1) & 4) !== 0;
|
|
316
|
-
const hasCmd = ((mod - 1) & 8) !== 0;
|
|
317
|
-
if (codepoint === 13 && hasShift) {
|
|
318
|
-
// Shift+Enter → insert newline
|
|
319
|
-
apply(b => insertText(b, "\n"));
|
|
320
|
-
}
|
|
321
|
-
else if (codepoint === 127 && (hasCmd || hasCtrl)) {
|
|
322
|
-
// Cmd/Ctrl+Backspace → delete to line start
|
|
323
|
-
apply(b => {
|
|
324
|
-
const lines = [...b.lines];
|
|
325
|
-
lines[b.row] = lines[b.row].slice(b.col);
|
|
326
|
-
return { lines, row: b.row, col: 0 };
|
|
327
|
-
});
|
|
328
|
-
}
|
|
322
|
+
handleKittyKey(seq, apply);
|
|
329
323
|
}
|
|
330
324
|
}
|
|
331
325
|
continue;
|
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Run an array of async task factories with a fixed concurrency bound. Resolves
|
|
3
|
-
* to
|
|
4
|
-
*
|
|
5
|
-
* caller
|
|
6
|
-
* in `Promise.resolve()` and pushed through `try/catch`, one task throwing
|
|
7
|
-
* never aborts the rest of the run.
|
|
3
|
+
* to a `Settled<T>[]` in the same order as the input tasks. One task throwing
|
|
4
|
+
* never aborts the rest — its slot settles as `{ ok: false, error }` and the
|
|
5
|
+
* caller decides what to do.
|
|
8
6
|
*
|
|
9
7
|
* No new dependency. Simple worker-pool: kicks off up to `n` tasks, then each
|
|
10
8
|
* worker pulls the next index from a shared cursor until the queue is drained.
|
|
11
9
|
*/
|
|
12
|
-
export
|
|
10
|
+
export type Settled<T> = {
|
|
11
|
+
ok: true;
|
|
12
|
+
value: T;
|
|
13
|
+
} | {
|
|
14
|
+
ok: false;
|
|
13
15
|
error: unknown;
|
|
14
|
-
}
|
|
16
|
+
};
|
|
17
|
+
export declare function runWithConcurrency<T>(tasks: Array<() => Promise<T>>, n: number): Promise<Settled<T>[]>;
|
|
@@ -1,28 +1,15 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Run an array of async task factories with a fixed concurrency bound. Resolves
|
|
3
|
-
* to an array of results in the same order as the input tasks (regardless of
|
|
4
|
-
* completion order). Any rejection settles that slot with `undefined` and the
|
|
5
|
-
* caller is responsible for tracking failures — but since each task is wrapped
|
|
6
|
-
* in `Promise.resolve()` and pushed through `try/catch`, one task throwing
|
|
7
|
-
* never aborts the rest of the run.
|
|
8
|
-
*
|
|
9
|
-
* No new dependency. Simple worker-pool: kicks off up to `n` tasks, then each
|
|
10
|
-
* worker pulls the next index from a shared cursor until the queue is drained.
|
|
11
|
-
*/
|
|
12
1
|
export async function runWithConcurrency(tasks, n) {
|
|
13
2
|
const results = new Array(tasks.length);
|
|
14
3
|
const workerCount = Math.max(1, Math.min(n, tasks.length));
|
|
15
4
|
let cursor = 0;
|
|
16
5
|
async function worker() {
|
|
17
|
-
while (
|
|
6
|
+
while (cursor < tasks.length) {
|
|
18
7
|
const index = cursor++;
|
|
19
|
-
if (index >= tasks.length)
|
|
20
|
-
return;
|
|
21
8
|
try {
|
|
22
|
-
results[index] = await tasks[index]();
|
|
9
|
+
results[index] = { ok: true, value: await tasks[index]() };
|
|
23
10
|
}
|
|
24
11
|
catch (err) {
|
|
25
|
-
results[index] = { error: err };
|
|
12
|
+
results[index] = { ok: false, error: err };
|
|
26
13
|
}
|
|
27
14
|
}
|
|
28
15
|
}
|
|
@@ -2,6 +2,46 @@ import chalk from "chalk";
|
|
|
2
2
|
import inquirer from "inquirer";
|
|
3
3
|
import { readPdf } from "./pdf.js";
|
|
4
4
|
import { unlockIfNeeded, persistUnlockOutcome } from "./unlock.js";
|
|
5
|
+
async function decryptOne(db, file, opts) {
|
|
6
|
+
let pdf;
|
|
7
|
+
try {
|
|
8
|
+
pdf = readPdf(file.path);
|
|
9
|
+
}
|
|
10
|
+
catch (err) {
|
|
11
|
+
return { kind: "failed", error: `read failed: ${errorMessage(err)}` };
|
|
12
|
+
}
|
|
13
|
+
const existing = findScannedByHash(db, pdf.hash);
|
|
14
|
+
if (existing && !opts.force) {
|
|
15
|
+
return { kind: "skipped", existingScannedFileId: existing.id };
|
|
16
|
+
}
|
|
17
|
+
try {
|
|
18
|
+
const unlocked = await unlockIfNeeded({
|
|
19
|
+
db,
|
|
20
|
+
filePath: file.path,
|
|
21
|
+
bytes: pdf.bytes,
|
|
22
|
+
interactive: opts.interactive,
|
|
23
|
+
});
|
|
24
|
+
persistUnlockOutcome(db, file.path, unlocked.outcome);
|
|
25
|
+
return {
|
|
26
|
+
kind: "decrypted",
|
|
27
|
+
file: {
|
|
28
|
+
path: file.path,
|
|
29
|
+
fileName: file.name,
|
|
30
|
+
relPath: file.relPath,
|
|
31
|
+
hash: pdf.hash,
|
|
32
|
+
mime: pdf.mime,
|
|
33
|
+
decryptedBytes: unlocked.decrypted,
|
|
34
|
+
replacesPriorScannedFileId: existing?.id,
|
|
35
|
+
},
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
catch (err) {
|
|
39
|
+
return { kind: "failed", error: errorMessage(err) || "unlock failed" };
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
function errorMessage(err) {
|
|
43
|
+
return err instanceof Error ? err.message : String(err);
|
|
44
|
+
}
|
|
5
45
|
/**
|
|
6
46
|
* Phase 1 of scan: walk every file in the queue, decrypt any that need it,
|
|
7
47
|
* and return a partition (decrypted / skipped / failed). The actual agent
|
|
@@ -15,44 +55,22 @@ export async function decryptQueue(db, files, opts) {
|
|
|
15
55
|
const skipped = [];
|
|
16
56
|
const failed = [];
|
|
17
57
|
for (let i = 0; i < files.length; i++) {
|
|
18
|
-
const
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
try {
|
|
35
|
-
const unlocked = await unlockIfNeeded({
|
|
36
|
-
db,
|
|
37
|
-
filePath: f.path,
|
|
38
|
-
bytes: pdf.bytes,
|
|
39
|
-
interactive: opts.interactive,
|
|
40
|
-
});
|
|
41
|
-
persistUnlockOutcome(db, f.path, unlocked.outcome);
|
|
42
|
-
decrypted.push({
|
|
43
|
-
path: f.path,
|
|
44
|
-
fileName: f.name,
|
|
45
|
-
relPath: f.relPath,
|
|
46
|
-
hash: pdf.hash,
|
|
47
|
-
mime: pdf.mime,
|
|
48
|
-
decryptedBytes: unlocked.decrypted,
|
|
49
|
-
replacesPriorScannedFileId: existing?.id,
|
|
50
|
-
});
|
|
51
|
-
opts.onProgress?.({ index: i, total: files.length, fileName: f.name, outcome: "decrypted" });
|
|
52
|
-
}
|
|
53
|
-
catch (err) {
|
|
54
|
-
failed.push({ file: f, error: err.message ?? "unlock failed" });
|
|
55
|
-
opts.onProgress?.({ index: i, total: files.length, fileName: f.name, outcome: "failed" });
|
|
58
|
+
const file = files[i];
|
|
59
|
+
const outcome = await decryptOne(db, file, opts);
|
|
60
|
+
const progress = (kind) => opts.onProgress?.({ index: i, total: files.length, fileName: file.name, outcome: kind });
|
|
61
|
+
switch (outcome.kind) {
|
|
62
|
+
case "decrypted":
|
|
63
|
+
decrypted.push(outcome.file);
|
|
64
|
+
progress("decrypted");
|
|
65
|
+
break;
|
|
66
|
+
case "skipped":
|
|
67
|
+
skipped.push({ file, existingScannedFileId: outcome.existingScannedFileId });
|
|
68
|
+
progress("skipped");
|
|
69
|
+
break;
|
|
70
|
+
case "failed":
|
|
71
|
+
failed.push({ file, error: outcome.error });
|
|
72
|
+
progress("failed");
|
|
73
|
+
break;
|
|
56
74
|
}
|
|
57
75
|
}
|
|
58
76
|
return { decrypted, skipped, failed };
|
|
@@ -3,6 +3,8 @@
|
|
|
3
3
|
* the WASM module isn't loaded for data dirs that contain only plaintext PDFs.
|
|
4
4
|
*/
|
|
5
5
|
let mupdfPromise = null;
|
|
6
|
+
/** mupdf's authenticatePassword returns 0 on a wrong password, non-zero on success. */
|
|
7
|
+
const MUPDF_AUTH_FAILED = 0;
|
|
6
8
|
function getMupdf() {
|
|
7
9
|
if (!mupdfPromise) {
|
|
8
10
|
mupdfPromise = import("mupdf");
|
|
@@ -36,7 +38,7 @@ export async function unlock(bytes, password) {
|
|
|
36
38
|
return { ok: true, decrypted: bytes };
|
|
37
39
|
}
|
|
38
40
|
const result = doc.authenticatePassword(password);
|
|
39
|
-
if (result ===
|
|
41
|
+
if (result === MUPDF_AUTH_FAILED) {
|
|
40
42
|
return { ok: false };
|
|
41
43
|
}
|
|
42
44
|
const out = doc.saveToBuffer("decrypt");
|
package/dist/scanner/pipeline.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import { randomUUID } from "crypto";
|
|
2
2
|
import { getDb } from "../db/connection.js";
|
|
3
|
-
import { countOpenConcerns
|
|
3
|
+
import { countOpenConcerns } from "../db/queries/concerns.js";
|
|
4
4
|
import { correlatePairs } from "../db/queries/transactions.js";
|
|
5
5
|
import { runScanAgent } from "../ai/agent.js";
|
|
6
6
|
import { buildDocumentBlock } from "./pdf.js";
|
|
@@ -44,24 +44,22 @@ export async function runScan(opts = {}) {
|
|
|
44
44
|
// Phase 4 — per-file commit
|
|
45
45
|
events?.committing?.();
|
|
46
46
|
const fileResults = commitAll(db, decryptResult, scanResults);
|
|
47
|
-
return buildSummary(allFiles.length, fileResults
|
|
47
|
+
return buildSummary(allFiles.length, fileResults);
|
|
48
48
|
}
|
|
49
49
|
async function scanInParallel(db, files, opts) {
|
|
50
50
|
const tasks = files.map(f => () => scanOneFile(db, f, opts.events));
|
|
51
51
|
const settled = await runWithConcurrency(tasks, opts.concurrency);
|
|
52
|
-
//
|
|
53
|
-
//
|
|
54
|
-
// so the `{error}` branch only fires for truly unexpected throws.
|
|
52
|
+
// scanOneFile catches LLM errors and returns ScanWorkResult with `error` set,
|
|
53
|
+
// so a !r.ok slot here only fires for truly unexpected throws.
|
|
55
54
|
return settled.map((r, i) => {
|
|
56
|
-
if (r
|
|
57
|
-
return
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
}
|
|
64
|
-
return r;
|
|
55
|
+
if (r.ok)
|
|
56
|
+
return r.value;
|
|
57
|
+
return {
|
|
58
|
+
decryptedFile: files[i],
|
|
59
|
+
buffer: new BufferedWriteContext(files[i].fileName),
|
|
60
|
+
error: String(r.error),
|
|
61
|
+
agentText: "",
|
|
62
|
+
};
|
|
65
63
|
});
|
|
66
64
|
}
|
|
67
65
|
async function scanOneFile(db, file, events) {
|
|
@@ -243,7 +241,7 @@ function commitAll(db, decryptResult, scanResults) {
|
|
|
243
241
|
return out;
|
|
244
242
|
}
|
|
245
243
|
/** Summary assembly */
|
|
246
|
-
function buildSummary(total, details
|
|
244
|
+
function buildSummary(total, details) {
|
|
247
245
|
const summary = {
|
|
248
246
|
total,
|
|
249
247
|
scanned: 0,
|
|
@@ -268,7 +266,7 @@ function buildAbortedSummary(total, decrypt) {
|
|
|
268
266
|
name: f.file.name, relPath: f.file.relPath, status: "failed", transactions: 0, concerns: 0, error: f.error,
|
|
269
267
|
})),
|
|
270
268
|
];
|
|
271
|
-
return buildSummary(total, details
|
|
269
|
+
return buildSummary(total, details);
|
|
272
270
|
}
|
|
273
271
|
/** Low-level DB helpers */
|
|
274
272
|
function deleteScannedFile(db, id) {
|