@kansei-link/bantou 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +110 -0
  3. package/data/exclusion-rules/README.md +104 -0
  4. package/data/exclusion-rules/jp-tax-baseline-v1.json +185 -0
  5. package/data/exclusion-rules-schema.json +109 -0
  6. package/data/keyword-dict/README.md +91 -0
  7. package/data/keyword-dict/jp-tax-baseline-v1.json +398 -0
  8. package/data/keyword-dict-schema.json +117 -0
  9. package/data/tax-rules/jp-tax-rules-v1.json +170 -0
  10. package/dist/adapters/csv-parser.d.ts +11 -0
  11. package/dist/adapters/csv-parser.js +133 -0
  12. package/dist/adapters/freee-csv-adapter.d.ts +14 -0
  13. package/dist/adapters/freee-csv-adapter.js +67 -0
  14. package/dist/adapters/generic-adapter.d.ts +20 -0
  15. package/dist/adapters/generic-adapter.js +73 -0
  16. package/dist/adapters/index.d.ts +23 -0
  17. package/dist/adapters/index.js +386 -0
  18. package/dist/adapters/types.d.ts +111 -0
  19. package/dist/adapters/types.js +9 -0
  20. package/dist/adapters/yayoi-adapter.d.ts +46 -0
  21. package/dist/adapters/yayoi-adapter.js +181 -0
  22. package/dist/bin/freee-doctor.d.ts +3 -0
  23. package/dist/bin/freee-doctor.js +15 -0
  24. package/dist/classifier/claude-classifier.d.ts +24 -0
  25. package/dist/classifier/claude-classifier.js +154 -0
  26. package/dist/classifier/keyword-classifier.d.ts +22 -0
  27. package/dist/classifier/keyword-classifier.js +124 -0
  28. package/dist/classifier/keyword-match.d.ts +21 -0
  29. package/dist/classifier/keyword-match.js +57 -0
  30. package/dist/classifier/normalize.d.ts +3 -0
  31. package/dist/classifier/normalize.js +27 -0
  32. package/dist/classifier/two-stage-classifier.d.ts +21 -0
  33. package/dist/classifier/two-stage-classifier.js +51 -0
  34. package/dist/classifier/types.d.ts +31 -0
  35. package/dist/classifier/types.js +3 -0
  36. package/dist/connectors/freee.d.ts +115 -0
  37. package/dist/connectors/freee.js +177 -0
  38. package/dist/exclusion/exclusion-checker.d.ts +10 -0
  39. package/dist/exclusion/exclusion-checker.js +162 -0
  40. package/dist/freee-doctor.d.ts +26 -0
  41. package/dist/freee-doctor.js +82 -0
  42. package/dist/index.d.ts +3 -0
  43. package/dist/index.js +656 -0
  44. package/dist/memory/cockpit-memory.d.ts +73 -0
  45. package/dist/memory/cockpit-memory.js +473 -0
  46. package/dist/memory/types.d.ts +114 -0
  47. package/dist/memory/types.js +11 -0
  48. package/dist/pipeline/confidence-router.d.ts +38 -0
  49. package/dist/pipeline/confidence-router.js +129 -0
  50. package/dist/pipeline/nightly-pipeline.d.ts +44 -0
  51. package/dist/pipeline/nightly-pipeline.js +497 -0
  52. package/dist/pipeline/types.d.ts +84 -0
  53. package/dist/pipeline/types.js +12 -0
  54. package/dist/reports/monthly-report.d.ts +64 -0
  55. package/dist/reports/monthly-report.js +230 -0
  56. package/dist/secrets.d.ts +14 -0
  57. package/dist/secrets.js +86 -0
  58. package/dist/tax-rules/tax-rule-engine.d.ts +103 -0
  59. package/dist/tax-rules/tax-rule-engine.js +449 -0
  60. package/dist/tax-rules/types.d.ts +103 -0
  61. package/dist/tax-rules/types.js +7 -0
  62. package/package.json +74 -0
@@ -0,0 +1,181 @@
1
+ // 弥生会計 (Yayoi) CSV adapter.
2
+ //
3
+ // Parses the 仕訳日記帳 CSV export format.
4
+ //
5
+ // 弥生 exports several CSV formats. This adapter handles the most common:
6
+ // Format A: 仕訳日記帳 (Journal Ledger) — full double-entry bookkeeping
7
+ // Format B: 簡易帳簿 (Simple Bookkeeping) — single-entry (個人事業主向け)
8
+ //
9
+ // Both formats are supported via header auto-detection.
10
+ //
11
+ // ⚠ Important: 弥生 defaults to Shift-JIS encoding.
12
+ // Users must select UTF-8 when exporting:
13
+ // 弥生 → ファイル → エクスポート → 文字コード → UTF-8
14
+ /**
15
+ * 弥生 仕訳日記帳 Format A headers (full double-entry).
16
+ *
17
+ * Typical columns (order may vary):
18
+ * 識別フラグ, 伝票No., 決算, 取引日付, 借方勘定科目, 借方補助科目,
19
+ * 借方部門, 借方税区分, 借方金額, 借方税金額, 貸方勘定科目, 貸方補助科目,
20
+ * 貸方部門, 貸方税区分, 貸方金額, 貸方税金額, 摘要, 番号, 期日, タイプ, 生成元, 仕訳メモ
21
+ */
22
+ const YAYOI_FULL_REQUIRED = ['取引日付', '借方勘定科目', '借方金額', '摘要'];
23
+ const YAYOI_FULL_OPTIONAL = ['貸方勘定科目', '貸方金額', '借方税区分', '貸方税区分', '伝票No.', '仕訳メモ'];
24
+ /**
25
+ * 弥生 簡易帳簿 Format B headers (single-entry, 個人事業主).
26
+ *
27
+ * Typical columns:
28
+ * 日付, 科目, 金額, 摘要, 取引先
29
+ */
30
+ const YAYOI_SIMPLE_REQUIRED = ['日付', '科目', '金額', '摘要'];
31
+ export class YayoiAdapter {
32
+ source = 'yayoi';
33
+ label = '弥生会計 CSV';
34
+ format = null;
35
+ detectFormat(headers) {
36
+ // Normalize: trim whitespace
37
+ const h = headers.map(s => s.trim());
38
+ // Check Format A (full double-entry)
39
+ if (YAYOI_FULL_REQUIRED.every(req => h.includes(req))) {
40
+ this.format = 'full';
41
+ return true;
42
+ }
43
+ // Check Format B (simple bookkeeping)
44
+ if (YAYOI_SIMPLE_REQUIRED.every(req => h.includes(req))) {
45
+ this.format = 'simple';
46
+ return true;
47
+ }
48
+ return false;
49
+ }
50
+ parseRow(row, rowNumber) {
51
+ if (this.format === 'full') {
52
+ return this.parseFullRow(row, rowNumber);
53
+ }
54
+ else if (this.format === 'simple') {
55
+ return this.parseSimpleRow(row, rowNumber);
56
+ }
57
+ return { transaction: null, skip_reason: 'Unknown 弥生 format' };
58
+ }
59
+ /**
60
+ * Parse Format A: 仕訳日記帳 (double-entry).
61
+ *
62
+ * Logic:
63
+ * - Uses 借方金額 as amount (expense side). If 0, uses 貸方金額 (income side).
64
+ * - Combines 摘要 + 仕訳メモ for the memo field.
65
+ * - Skips rows where 識別フラグ indicates non-transaction lines (headers, totals).
66
+ */
67
+ parseFullRow(row, rowNumber) {
68
+ // Skip non-transaction rows (識別フラグ: 2000=通常仕訳, 2100=決算仕訳)
69
+ const flag = row['識別フラグ']?.trim();
70
+ if (flag && !['2000', '2100', ''].includes(flag)) {
71
+ return { transaction: null, skip_reason: `識別フラグ=${flag} (非仕訳行)` };
72
+ }
73
+ // Parse date
74
+ const dateStr = row['取引日付']?.trim();
75
+ const date = this.parseDate(dateStr);
76
+ if (!date) {
77
+ return { transaction: null, skip_reason: `日付パース失敗: "${dateStr}"` };
78
+ }
79
+ // Parse amount: prefer 借方金額 (expense), fall back to 貸方金額 (income)
80
+ const debitAmount = this.parseAmount(row['借方金額']);
81
+ const creditAmount = this.parseAmount(row['貸方金額']);
82
+ const amount = debitAmount || creditAmount;
83
+ if (!amount || amount === 0) {
84
+ return { transaction: null, skip_reason: '金額が0またはパース失敗' };
85
+ }
86
+ // Build memo: 摘要 + 仕訳メモ (if present)
87
+ const tekiyou = row['摘要']?.trim() || '';
88
+ const memo_note = row['仕訳メモ']?.trim() || '';
89
+ const memo = memo_note ? `${tekiyou} ${memo_note}` : tekiyou;
90
+ if (!memo) {
91
+ return { transaction: null, skip_reason: '摘要が空' };
92
+ }
93
+ // Extract partner name from 摘要 if it contains a known pattern
94
+ // (弥生 doesn't have a dedicated partner column in Format A)
95
+ const partner_name = this.extractPartner(row);
96
+ return {
97
+ transaction: { amount, memo, date, partner_name },
98
+ skip_reason: null,
99
+ };
100
+ }
101
+ /**
102
+ * Parse Format B: 簡易帳簿 (single-entry).
103
+ */
104
+ parseSimpleRow(row, rowNumber) {
105
+ const dateStr = row['日付']?.trim();
106
+ const date = this.parseDate(dateStr);
107
+ if (!date) {
108
+ return { transaction: null, skip_reason: `日付パース失敗: "${dateStr}"` };
109
+ }
110
+ const amount = this.parseAmount(row['金額']);
111
+ if (!amount || amount === 0) {
112
+ return { transaction: null, skip_reason: '金額が0またはパース失敗' };
113
+ }
114
+ const memo = row['摘要']?.trim() || '';
115
+ if (!memo) {
116
+ return { transaction: null, skip_reason: '摘要が空' };
117
+ }
118
+ const partner_name = row['取引先']?.trim() || undefined;
119
+ return {
120
+ transaction: { amount, memo, date, partner_name },
121
+ skip_reason: null,
122
+ };
123
+ }
124
+ /**
125
+ * Parse date from various 弥生 formats:
126
+ * - "2026/05/01" (Western calendar)
127
+ * - "2026-05-01" (ISO)
128
+ * - "R08/05/01" (和暦 令和)
129
+ * - "H28/05/01" (和暦 平成)
130
+ * Returns ISO format "YYYY-MM-DD" or null.
131
+ */
132
+ parseDate(dateStr) {
133
+ if (!dateStr)
134
+ return null;
135
+ const s = dateStr.trim();
136
+ // Western calendar: "2026/05/01" or "2026-05-01"
137
+ const westernMatch = s.match(/^(\d{4})[\/\-](\d{1,2})[\/\-](\d{1,2})$/);
138
+ if (westernMatch) {
139
+ const [, y, m, d] = westernMatch;
140
+ return `${y}-${m.padStart(2, '0')}-${d.padStart(2, '0')}`;
141
+ }
142
+ // 和暦: "R08/05/01" (令和), "H28/05/01" (平成)
143
+ const warekiMatch = s.match(/^([RrHh])(\d{1,2})[\/\-](\d{1,2})[\/\-](\d{1,2})$/);
144
+ if (warekiMatch) {
145
+ const [, era, ey, m, d] = warekiMatch;
146
+ const eraYear = parseInt(ey);
147
+ let year;
148
+ if (era === 'R' || era === 'r') {
149
+ year = 2018 + eraYear; // 令和1年 = 2019
150
+ }
151
+ else {
152
+ year = 1988 + eraYear; // 平成1年 = 1989
153
+ }
154
+ return `${year}-${m.padStart(2, '0')}-${d.padStart(2, '0')}`;
155
+ }
156
+ return null;
157
+ }
158
+ /**
159
+ * Parse amount string, removing commas and handling negative values.
160
+ * "12,000" → 12000, "-3,000" → 3000 (absolute value for classification).
161
+ */
162
+ parseAmount(amountStr) {
163
+ if (!amountStr)
164
+ return 0;
165
+ const cleaned = amountStr.replace(/[,、¥¥\s]/g, '');
166
+ const num = parseInt(cleaned, 10);
167
+ return isNaN(num) ? 0 : Math.abs(num);
168
+ }
169
+ /**
170
+ * Try to extract partner name from 弥生 Format A.
171
+ * Format A doesn't have a dedicated partner column, but some users
172
+ * put the partner name in 借方補助科目 or 貸方補助科目.
173
+ */
174
+ extractPartner(row) {
175
+ // Check 借方補助科目 / 貸方補助科目
176
+ const debitSub = row['借方補助科目']?.trim();
177
+ const creditSub = row['貸方補助科目']?.trim();
178
+ return debitSub || creditSub || undefined;
179
+ }
180
+ }
181
+ //# sourceMappingURL=yayoi-adapter.js.map
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env node
2
+ export {};
3
+ //# sourceMappingURL=freee-doctor.d.ts.map
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env node
2
+ // CLI entry for the freee connection doctor.
3
+ // npm run doctor:freee (or: npx tsx src/bin/freee-doctor.ts)
4
+ // Prints the diagnostic as JSON. NEVER prints the access token.
5
+ import { runFreeeDoctor } from '../freee-doctor.js';
6
+ runFreeeDoctor()
7
+ .then((report) => {
8
+ console.log(JSON.stringify(report, null, 2));
9
+ process.exit(report.ok ? 0 : 1);
10
+ })
11
+ .catch((err) => {
12
+ console.error(JSON.stringify({ ok: false, error: err?.message ?? String(err) }, null, 2));
13
+ process.exit(1);
14
+ });
15
+ //# sourceMappingURL=freee-doctor.js.map
@@ -0,0 +1,24 @@
1
+ import { Transaction, ClassificationResult } from './types.js';
2
+ export interface KeywordCategoryMeta {
3
+ id: string;
4
+ name_ja: string;
5
+ name_en?: string;
6
+ freee_account_code: number;
7
+ default_tax_code: number;
8
+ description?: string;
9
+ }
10
+ export declare class ClaudeClassifier {
11
+ private client;
12
+ private categories;
13
+ private categoryById;
14
+ private systemPrompt;
15
+ private model;
16
+ constructor(apiKey: string, categories: KeywordCategoryMeta[], model?: string);
17
+ classify(tx: Transaction): Promise<ClassificationResult>;
18
+ private buildSystemPrompt;
19
+ private buildUserPrompt;
20
+ private parseResponse;
21
+ private fallback;
22
+ getModel(): string;
23
+ }
24
+ //# sourceMappingURL=claude-classifier.d.ts.map
@@ -0,0 +1,154 @@
1
+ // Stage 2 classifier: Claude API fallback.
2
+ //
3
+ // Used when Stage 1 keyword classifier returns no match. Sends transaction
4
+ // to Claude Haiku (= cheap + fast) with the 14-category system prompt and
5
+ // returns a classification with confidence (high/medium/low).
6
+ //
7
+ // Cost optimization:
8
+ // - System prompt (= 14 categories + rules) uses prompt caching → ~90% cost
9
+ // reduction on repeated requests with same category context.
10
+ // - Model defaults to Haiku 4.5 (cheap, fast, sufficient for classification).
11
+ // Override via CLAUDE_MODEL env var.
12
+ // - max_tokens = 200 (= classification response is short JSON).
13
+ //
14
+ // Falls back to unclassified if API errors, rate limits, or response is invalid.
15
+ import Anthropic from '@anthropic-ai/sdk';
16
+ const DEFAULT_MODEL = process.env.CLAUDE_MODEL || 'claude-haiku-4-5';
17
+ export class ClaudeClassifier {
18
+ client;
19
+ categories;
20
+ categoryById;
21
+ systemPrompt;
22
+ model;
23
+ constructor(apiKey, categories, model = DEFAULT_MODEL) {
24
+ if (!apiKey || apiKey.trim() === '') {
25
+ throw new Error('ANTHROPIC_API_KEY is required for Stage 2 classifier');
26
+ }
27
+ this.client = new Anthropic({ apiKey });
28
+ this.categories = categories;
29
+ this.categoryById = new Map(categories.map(c => [c.id, c]));
30
+ this.systemPrompt = this.buildSystemPrompt();
31
+ this.model = model;
32
+ }
33
+ async classify(tx) {
34
+ try {
35
+ const response = await this.client.messages.create({
36
+ model: this.model,
37
+ max_tokens: 200,
38
+ system: [
39
+ {
40
+ type: 'text',
41
+ text: this.systemPrompt,
42
+ cache_control: { type: 'ephemeral' }, // 90% cost reduction on warm cache
43
+ },
44
+ ],
45
+ messages: [
46
+ {
47
+ role: 'user',
48
+ content: this.buildUserPrompt(tx),
49
+ },
50
+ ],
51
+ });
52
+ const text = response.content
53
+ .filter((block) => block.type === 'text')
54
+ .map(block => block.text)
55
+ .join('');
56
+ return this.parseResponse(text);
57
+ }
58
+ catch (err) {
59
+ return this.fallback(`Stage 2 API error: ${err?.message || String(err)}`);
60
+ }
61
+ }
62
+ buildSystemPrompt() {
63
+ const categoryList = this.categories.map(c => `- ${c.id} (${c.name_ja}): ${c.description || 'no description'}`).join('\n');
64
+ return `You are a Japanese tax accounting classifier. Classify business transactions into 1 of 14 categories.
65
+
66
+ # Categories
67
+
68
+ ${categoryList}
69
+
70
+ # Output format
71
+
72
+ Return JSON only. No markdown, no explanation outside JSON:
73
+
74
+ {
75
+ "category_id": "<one of the category ids above>",
76
+ "confidence": "high|medium|low",
77
+ "reasoning": "<one short sentence in Japanese, 50 chars max>"
78
+ }
79
+
80
+ # Confidence rules
81
+
82
+ - **high**: Clear unambiguous match (e.g., "楽天モバイル ¥5,500" → communications obvious)
83
+ - **medium**: Leans toward one but other plausible (e.g., 海外 SaaS amount that could be utilities OR communications)
84
+ - **low**: 2+ categories equally plausible OR insufficient info
85
+
86
+ # Japanese tax accounting rules
87
+
88
+ - 飲食 ≤¥10,000 → meeting_meal (会議費)
89
+ - 飲食 >¥10,000 → entertainment (交際費)
90
+ - 海外 SaaS (Anthropic / OpenAI / GitHub / AWS / Cloudflare / etc.) → communications + tax_code 0 (国外取引)
91
+ - 軽減税率 (8%): 食品 / 新聞定期購読 / 持ち帰り飲食
92
+ - 標準税率 (10%): 店内飲食 / 通常物販 / 国内サービス
93
+ - 給与 / 借入 / 社保 / 投資 / ATM出金 / 公共料金 → これらは別途 exclusion check で escalate されるので、 通常分類すべきではない (= ただし keyword match で対象外な場合のみ、 確信あれば salary / loan etc. でもOK)`;
94
+ }
95
+ buildUserPrompt(tx) {
96
+ return `Transaction:
97
+ - amount: ${tx.amount} JPY
98
+ - memo: ${tx.memo}
99
+ - date: ${tx.date}
100
+ - partner: ${tx.partner_name || '(unknown)'}
101
+
102
+ Classify into 1 category. JSON only.`;
103
+ }
104
+ parseResponse(text) {
105
+ try {
106
+ // Extract JSON (= LLM sometimes wraps in markdown despite instructions)
107
+ const jsonMatch = text.match(/\{[\s\S]*?\}/);
108
+ if (!jsonMatch) {
109
+ return this.fallback('Stage 2 returned no JSON');
110
+ }
111
+ const parsed = JSON.parse(jsonMatch[0]);
112
+ if (typeof parsed.category_id !== 'string') {
113
+ return this.fallback('Stage 2 response missing category_id');
114
+ }
115
+ const cat = this.categoryById.get(parsed.category_id);
116
+ if (!cat) {
117
+ return this.fallback(`Stage 2 returned unknown category: ${parsed.category_id}`);
118
+ }
119
+ const confidence = parsed.confidence === 'high'
120
+ ? 'high'
121
+ : parsed.confidence === 'medium'
122
+ ? 'medium'
123
+ : 'low';
124
+ const reasoning = typeof parsed.reasoning === 'string'
125
+ ? parsed.reasoning.slice(0, 100)
126
+ : 'AI classification';
127
+ return {
128
+ classified: true,
129
+ category_id: cat.id,
130
+ category_name_ja: cat.name_ja,
131
+ freee_account_code: cat.freee_account_code,
132
+ tax_code: cat.default_tax_code,
133
+ confidence,
134
+ match_reason: `Stage 2 (${this.model}): ${reasoning}`,
135
+ classifier_version: `jp-tax-baseline-v1.0.0+claude/${this.model}`,
136
+ };
137
+ }
138
+ catch (err) {
139
+ return this.fallback(`Stage 2 parse error: ${err?.message || String(err)}`);
140
+ }
141
+ }
142
+ fallback(reason) {
143
+ return {
144
+ classified: false,
145
+ confidence: 'none',
146
+ match_reason: reason,
147
+ classifier_version: `jp-tax-baseline-v1.0.0+claude/${this.model}`,
148
+ };
149
+ }
150
+ getModel() {
151
+ return this.model;
152
+ }
153
+ }
154
+ //# sourceMappingURL=claude-classifier.js.map
@@ -0,0 +1,22 @@
1
+ import { Transaction, ClassificationResult } from './types.js';
2
+ export declare class KeywordClassifier {
3
+ private dict;
4
+ private dictFile;
5
+ constructor(dictFile?: string, dataDir?: string);
6
+ classify(tx: Transaction): ClassificationResult;
7
+ getVersion(): string;
8
+ getCategoriesCount(): number;
9
+ getKeywordsCount(): number;
10
+ /**
11
+ * Returns category metadata for Stage 2 Claude classifier construction.
12
+ */
13
+ getCategoriesMeta(): {
14
+ id: string;
15
+ name_ja: string;
16
+ name_en: string | undefined;
17
+ freee_account_code: number;
18
+ default_tax_code: number;
19
+ description: string | undefined;
20
+ }[];
21
+ }
22
+ //# sourceMappingURL=keyword-classifier.d.ts.map
@@ -0,0 +1,124 @@
1
+ // Stage 1 keyword classifier.
2
+ //
3
+ // Reads jp-tax-baseline-v1.json keyword dictionary and matches a transaction
4
+ // against the 14 categories using substring search after normalization.
5
+ //
6
+ // Match algorithm:
7
+ // 1. Normalize memo (= 全角→半角, lowercase, trim)
8
+ // 2. For each category (top to bottom):
9
+ // a. Check amount threshold (min/max)
10
+ // b. Iterate keywords; first substring match wins category
11
+ // c. If matched but amount exceeds threshold_max, redirect to amount_overflow_category
12
+ // 3. No match → return classified: false (= proceed to Stage 2)
13
+ import fs from 'node:fs';
14
+ import path from 'node:path';
15
+ import { fileURLToPath } from 'node:url';
16
+ import { normalizeMemo } from './normalize.js';
17
+ import { findFirstMatchingKeyword } from './keyword-match.js';
18
+ const __filename = fileURLToPath(import.meta.url);
19
+ const __dirname = path.dirname(__filename);
20
+ // Locate the data directory (= prefer env var, else relative to package)
21
+ function defaultDataDir() {
22
+ const envDir = process.env.COCKPIT_DATA_DIR;
23
+ if (envDir)
24
+ return envDir;
25
+ // In dev: src/classifier/ → ../../../data
26
+ // In prod (dist/): dist/classifier/ → ../../../data
27
+ return path.resolve(__dirname, '../../../../data');
28
+ }
29
+ export class KeywordClassifier {
30
+ dict;
31
+ dictFile;
32
+ constructor(dictFile, dataDir) {
33
+ const dir = dataDir || defaultDataDir();
34
+ this.dictFile = dictFile || path.join(dir, 'keyword-dict', 'jp-tax-baseline-v1.json');
35
+ if (!fs.existsSync(this.dictFile)) {
36
+ throw new Error(`Keyword dictionary not found at ${this.dictFile}. ` +
37
+ `Set COCKPIT_DATA_DIR env var or place data files at the expected path.`);
38
+ }
39
+ const raw = fs.readFileSync(this.dictFile, 'utf8');
40
+ this.dict = JSON.parse(raw);
41
+ // Pre-normalize all keywords once
42
+ for (const cat of this.dict.categories) {
43
+ cat._normalized_keywords = cat.keywords.map(normalizeMemo);
44
+ }
45
+ }
46
+ classify(tx) {
47
+ const normalized = normalizeMemo(tx.memo);
48
+ for (const cat of this.dict.categories) {
49
+ // Check amount threshold (min)
50
+ if (cat.amount_threshold_min !== undefined && tx.amount < cat.amount_threshold_min) {
51
+ continue;
52
+ }
53
+ // Find matching keyword (ASCII = word boundary, CJK = substring)
54
+ const keywords = cat._normalized_keywords || [];
55
+ const matchedKeywordIdx = findFirstMatchingKeyword(normalized, keywords);
56
+ const matchedKeyword = matchedKeywordIdx >= 0 ? cat.keywords[matchedKeywordIdx] : undefined;
57
+ if (matchedKeywordIdx === -1)
58
+ continue;
59
+ // Check amount threshold (max) → redirect if needed
60
+ if (cat.amount_threshold_max !== undefined && tx.amount > cat.amount_threshold_max) {
61
+ if (cat.amount_overflow_category) {
62
+ const redirectedCat = this.dict.categories.find(c => c.id === cat.amount_overflow_category);
63
+ if (redirectedCat) {
64
+ return {
65
+ classified: true,
66
+ category_id: redirectedCat.id,
67
+ category_name_ja: redirectedCat.name_ja,
68
+ freee_account_code: redirectedCat.freee_account_code,
69
+ tax_code: redirectedCat.default_tax_code,
70
+ confidence: 'high', // amount-redirect is deterministic
71
+ matched_keyword: matchedKeyword,
72
+ match_reason: `Matched "${matchedKeyword}" in "${cat.id}" but amount ${tx.amount} > ${cat.amount_threshold_max}, redirected to "${redirectedCat.id}"`,
73
+ classifier_version: this.dict.version,
74
+ amount_override_redirect: cat.id,
75
+ special_pattern: redirectedCat.special_pattern,
76
+ };
77
+ }
78
+ }
79
+ }
80
+ // Normal match
81
+ return {
82
+ classified: true,
83
+ category_id: cat.id,
84
+ category_name_ja: cat.name_ja,
85
+ freee_account_code: cat.freee_account_code,
86
+ tax_code: cat.default_tax_code,
87
+ confidence: 'high',
88
+ matched_keyword: matchedKeyword,
89
+ match_reason: `Matched keyword "${matchedKeyword}" in category "${cat.id}"`,
90
+ classifier_version: this.dict.version,
91
+ special_pattern: cat.special_pattern,
92
+ };
93
+ }
94
+ return {
95
+ classified: false,
96
+ confidence: 'none',
97
+ match_reason: 'No keyword match in any category',
98
+ classifier_version: this.dict.version,
99
+ };
100
+ }
101
+ getVersion() {
102
+ return this.dict.version;
103
+ }
104
+ getCategoriesCount() {
105
+ return this.dict.categories.length;
106
+ }
107
+ getKeywordsCount() {
108
+ return this.dict.categories.reduce((sum, c) => sum + c.keywords.length, 0);
109
+ }
110
+ /**
111
+ * Returns category metadata for Stage 2 Claude classifier construction.
112
+ */
113
+ getCategoriesMeta() {
114
+ return this.dict.categories.map(c => ({
115
+ id: c.id,
116
+ name_ja: c.name_ja,
117
+ name_en: c.name_en,
118
+ freee_account_code: c.freee_account_code,
119
+ default_tax_code: c.default_tax_code,
120
+ description: c.description,
121
+ }));
122
+ }
123
+ }
124
+ //# sourceMappingURL=keyword-classifier.js.map
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Test if a keyword matches in a normalized memo.
3
+ *
4
+ * For ASCII-only keywords (= "ANA", "Suica", "Amazon"): require word boundary.
5
+ * - "ANA 機内食" → matches (= space boundary)
6
+ * - "analytics" → no match (= mid-word)
7
+ * - "ANA" → matches (= start/end boundary)
8
+ *
9
+ * For non-ASCII keywords (= "新幹線", "コーヒー"): substring match.
10
+ * - "新幹線のぞみ" → matches
11
+ * - "新幹線" alone → matches
12
+ */
13
+ export declare function keywordMatches(normalizedMemo: string, normalizedKeyword: string): boolean;
14
+ /**
15
+ * Find first matching keyword from a pre-normalized list against a pre-normalized memo.
16
+ * Returns the index of the first match, or -1 if no match.
17
+ *
18
+ * Use this in classifier loops for efficient single-pass matching.
19
+ */
20
+ export declare function findFirstMatchingKeyword(normalizedMemo: string, normalizedKeywords: string[]): number;
21
+ //# sourceMappingURL=keyword-match.d.ts.map
@@ -0,0 +1,57 @@
1
+ // Smart keyword matcher that prevents English-keyword false positives.
2
+ //
3
+ // Problem (= discovered 2026-05-12):
4
+ // keyword "ANA" → substring match → "Posthog Cloud (= an analytics SaaS)"
5
+ // の "ana" に誤発火、 travel category に分類される。
6
+ //
7
+ // Fix:
8
+ // - ASCII-only keywords → word-boundary regex match
9
+ // - Japanese / CJK / mixed keywords → substring match (= 既存挙動)
10
+ //
11
+ // Word boundary regex (\b) only works for word chars (= [A-Za-z0-9_]),
12
+ // not CJK. So we split by charset and use appropriate strategy.
13
+ const ASCII_ONLY_RE = /^[\x00-\x7F]+$/;
14
+ const REGEX_SPECIAL_RE = /[.*+?^${}()|[\]\\]/g;
15
+ function escapeRegExp(s) {
16
+ return s.replace(REGEX_SPECIAL_RE, '\\$&');
17
+ }
18
+ /**
19
+ * Test if a keyword matches in a normalized memo.
20
+ *
21
+ * For ASCII-only keywords (= "ANA", "Suica", "Amazon"): require word boundary.
22
+ * - "ANA 機内食" → matches (= space boundary)
23
+ * - "analytics" → no match (= mid-word)
24
+ * - "ANA" → matches (= start/end boundary)
25
+ *
26
+ * For non-ASCII keywords (= "新幹線", "コーヒー"): substring match.
27
+ * - "新幹線のぞみ" → matches
28
+ * - "新幹線" alone → matches
29
+ */
30
+ export function keywordMatches(normalizedMemo, normalizedKeyword) {
31
+ if (!normalizedKeyword)
32
+ return false;
33
+ if (ASCII_ONLY_RE.test(normalizedKeyword)) {
34
+ // ASCII keyword → word-boundary regex
35
+ const re = new RegExp(`\\b${escapeRegExp(normalizedKeyword)}\\b`, 'i');
36
+ return re.test(normalizedMemo);
37
+ }
38
+ else {
39
+ // CJK / mixed keyword → substring match (= original behavior)
40
+ return normalizedMemo.includes(normalizedKeyword);
41
+ }
42
+ }
43
+ /**
44
+ * Find first matching keyword from a pre-normalized list against a pre-normalized memo.
45
+ * Returns the index of the first match, or -1 if no match.
46
+ *
47
+ * Use this in classifier loops for efficient single-pass matching.
48
+ */
49
+ export function findFirstMatchingKeyword(normalizedMemo, normalizedKeywords) {
50
+ for (let i = 0; i < normalizedKeywords.length; i++) {
51
+ if (keywordMatches(normalizedMemo, normalizedKeywords[i])) {
52
+ return i;
53
+ }
54
+ }
55
+ return -1;
56
+ }
57
+ //# sourceMappingURL=keyword-match.js.map
@@ -0,0 +1,3 @@
1
+ export declare function normalizeMemo(input: string): string;
2
+ export declare function normalizeKeywordList(keywords: string[]): string[];
3
+ //# sourceMappingURL=normalize.d.ts.map
@@ -0,0 +1,27 @@
1
+ // Memo string normalization for keyword matching.
2
+ //
3
+ // Steps:
4
+ // 1. 全角英数 → 半角
5
+ // 2. 全角カナ → 半角カナ (optional, configurable)
6
+ // 3. 大文字 → 小文字
7
+ // 4. trim whitespace
8
+ //
9
+ // Used by both classifier and exclusion checker for consistent matching.
10
+ export function normalizeMemo(input) {
11
+ if (!input)
12
+ return '';
13
+ let s = input;
14
+ // 全角英数 → 半角 (ASCII range 0x21-0x7E)
15
+ s = s.replace(/[A-Za-z0-9]/g, (ch) => String.fromCharCode(ch.charCodeAt(0) - 0xFEE0));
16
+ // 全角スペース → 半角スペース
17
+ s = s.replace(/ /g, ' ');
18
+ // 大文字 → 小文字
19
+ s = s.toLowerCase();
20
+ // Trim + collapse whitespace
21
+ s = s.replace(/\s+/g, ' ').trim();
22
+ return s;
23
+ }
24
+ export function normalizeKeywordList(keywords) {
25
+ return keywords.map(normalizeMemo);
26
+ }
27
+ //# sourceMappingURL=normalize.js.map
@@ -0,0 +1,21 @@
1
+ import { KeywordClassifier } from './keyword-classifier.js';
2
+ import { ClaudeClassifier, KeywordCategoryMeta } from './claude-classifier.js';
3
+ import { Transaction, ClassificationResult } from './types.js';
4
+ export interface TwoStageResult extends ClassificationResult {
5
+ stage: 1 | 2 | 'unclassified';
6
+ }
7
+ export declare class TwoStageClassifier {
8
+ private stage1;
9
+ private stage2;
10
+ constructor(stage1: KeywordClassifier, stage2?: ClaudeClassifier | null);
11
+ classify(tx: Transaction): Promise<TwoStageResult>;
12
+ hasStage2(): boolean;
13
+ getStage1(): KeywordClassifier;
14
+ getStage2(): ClaudeClassifier | null;
15
+ }
16
+ /**
17
+ * Helper: extract category metadata from keyword dict for ClaudeClassifier construction.
18
+ * Reads from KeywordClassifier's internal data via a getCategories() method.
19
+ */
20
+ export declare function extractCategoryMeta(classifier: KeywordClassifier): KeywordCategoryMeta[];
21
+ //# sourceMappingURL=two-stage-classifier.d.ts.map