@dizzlkheinz/ynab-mcpb 0.13.1 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. package/.code/agents/01a13ef4-3f23-4f52-b33b-3585b73cfa60/error.txt +3 -0
  2. package/.code/agents/084fd32f-e298-4728-9103-a78d7dc39613/error.txt +3 -0
  3. package/.code/agents/0fed51e1-a943-4b97-a2a8-a6f0f27c844d/status.txt +1 -0
  4. package/.code/agents/1059b6bd-5ccd-4d83-a12c-7c9d89137399/error.txt +5 -0
  5. package/.code/agents/110/exec-call_F9BDNG7JfxKkq7Vc8ESAvdft.txt +1569 -0
  6. package/.code/agents/11ebcef3-b13f-4e44-ad80-d94a866804b7/error.txt +3 -0
  7. package/.code/agents/1398/exec-call_CjItcWMU1G6JoPshX62QvpaR.txt +2832 -0
  8. package/.code/agents/1398/exec-call_SUVq2ivmONQ5LMCmd7ngmOqr.txt +2709 -0
  9. package/.code/agents/1398/exec-call_SdNY4NOffdcC5pRYjVXHjPCK.txt +2832 -0
  10. package/.code/agents/1398/exec-call_qblJo9et1gsFFB63TtLOiji2.txt +2832 -0
  11. package/.code/agents/1398/exec-call_zaRrzlGz7GJcNzVfkAmML7Zg.txt +2709 -0
  12. package/.code/agents/171834fd-5905-42fc-bbcc-2c755145b0fc/status.txt +1 -0
  13. package/.code/agents/1724/exec-call_HvHQe0w5CCG3T7Q3ULT6MO3g.txt +5217 -0
  14. package/.code/agents/1724/exec-call_QwUNESVzfxxk78K1frh1Vahb.txt +2594 -0
  15. package/.code/agents/1724/exec-call_aJ1Xwz71XmIpD4SBxSHERzLe.txt +2594 -0
  16. package/.code/agents/1d7d7ab7-7473-4b69-8b97-6e914f56056a/result.txt +231 -0
  17. package/.code/agents/210/exec-call_0tQCsKNJ1WTuIchb8wlcFJpW.txt +2590 -0
  18. package/.code/agents/210/exec-call_8ZlY9cUc8Ft1twi4ch8UJ6IN.txt +5195 -0
  19. package/.code/agents/2188/exec-call_5HqayBxIteJtoI8oPTiLWgvJ.txt +286 -0
  20. package/.code/agents/2188/exec-call_XRbBKBq3adZe6dcppAvQtM7G.txt +218 -0
  21. package/.code/agents/2188/exec-call_ehA0SjpYtrUi6GJXmibLjp4i.txt +180 -0
  22. package/.code/agents/21902821-ecaf-4759-bb9d-222b90921af5/error.txt +3 -0
  23. package/.code/agents/232073be-aa0e-46da-b478-5b64dbf03cf5/status.txt +1 -0
  24. package/.code/agents/234ff534-2336-4771-a8d9-aa04421a63be/result.txt +747 -0
  25. package/.code/agents/253e2695-dc36-4022-b436-27655e0fc6c7/status.txt +1 -0
  26. package/.code/agents/2583/exec-call_M59I4eDjpjlBIWBiSxyS0YlJ.txt +2594 -0
  27. package/.code/agents/2583/exec-call_usLRGh7OhVHtsRBL4iUwRhjq.txt +2594 -0
  28. package/.code/agents/292aa3ff-dbab-470f-97c9-e7e8fd65e0db/result.txt +144 -0
  29. package/.code/agents/3134/exec-call_IgCAMGx19lWfuo8zfYIt5FFC.txt +416 -0
  30. package/.code/agents/3134/exec-call_IxvLR2Oo7kba2QTsI1gHVko8.txt +2590 -0
  31. package/.code/agents/3134/exec-call_jYvc8hksZChSiysbzKjl2ZbB.txt +2590 -0
  32. package/.code/agents/329/exec-call_4QdP3SfSO7HGPCwVcqZIth6s.txt +2590 -0
  33. package/.code/agents/472/exec-call_4AxzEEcWwkKhpqRB3bE8Ha4L.txt +790 -0
  34. package/.code/agents/472/exec-call_CB3LPYQA8QIZRi8I6kj4J17A.txt +766 -0
  35. package/.code/agents/472/exec-call_YeoUWvaFoktay2nqVUsa9KKX.txt +790 -0
  36. package/.code/agents/472/exec-call_jPWgKVquBBXTg0T3Lks5ZfkK.txt +2594 -0
  37. package/.code/agents/472/exec-call_qBkvunpGBDEHph2jPmTwtcsb.txt +1000 -0
  38. package/.code/agents/472/exec-call_v0ffRV1p0kTckBmJPzzHAEy0.txt +3489 -0
  39. package/.code/agents/472/exec-call_xAX5FXqWIlk02d9WubHbHWh8.txt +766 -0
  40. package/.code/agents/5346/exec-call_9q0muXUuLaucwEqI51Pt7idT.txt +2594 -0
  41. package/.code/agents/5346/exec-call_B2el3B79rVkq9LhWTI2VYlz7.txt +2456 -0
  42. package/.code/agents/5346/exec-call_BfX08f02qkZI9uJD5dvCvuoj.txt +2594 -0
  43. package/.code/agents/543328d0-61d6-4fd1-a723-bb168656e2e2/error.txt +18 -0
  44. package/.code/agents/5580c02c-1383-4d18-9cbd-cc8a06e3408d/result.txt +48 -0
  45. package/.code/agents/60ce1a22-5126-44b2-b977-1d5b56142a7b/status.txt +1 -0
  46. package/.code/agents/6215d9db-7fa9-4429-aeec-3835c3212291/error.txt +1 -0
  47. package/.code/agents/6743db55-30e5-4b4e-9366-a8214fc7f714/error.txt +1 -0
  48. package/.code/agents/6bf9591b-b9c9-422c-b0a5-e968c7d8422a/status.txt +1 -0
  49. package/.code/agents/7/exec-call_eww3GfdEiJZx61sJEQ9wNmt3.txt +1271 -0
  50. package/.code/agents/70/exec-call_owUtDMYiVgqDf8vsz1i32PFf.txt +1570 -0
  51. package/.code/agents/8/exec-call_UtrjAcLbhYLatxR4O97fZgnm.txt +2590 -0
  52. package/.code/agents/82490bc9-f34e-4b1b-8a8e-bccc2e6254f5/error.txt +3 -0
  53. package/.code/agents/841/exec-call_7nTNhSBCNjTDUIJv7py6CepO.txt +3299 -0
  54. package/.code/agents/841/exec-call_TLI0yUdUijuUAvI4o3DXEvHO.txt +3299 -0
  55. package/.code/agents/9/exec-call_XaABQT1hIlRpnKZ2uyBMWsTC.txt +1882 -0
  56. package/.code/agents/941/exec-call_GuGHRx7NNXWIDAnxUG2NEWPa.txt +2594 -0
  57. package/.code/agents/95d9fbab-19a2-48af-83f9-c792566a347f/error.txt +1 -0
  58. package/.code/agents/b0098cb8-cb32-4ada-9bc4-37c587518896/result.txt +170 -0
  59. package/.code/agents/b4fe59a4-81df-42e2-a112-0153e504faca/error.txt +1 -0
  60. package/.code/agents/bf4ce152-f623-49d7-aa52-c18631625c3c/error.txt +3 -0
  61. package/.code/agents/d7d1db75-d7eb-468e-adea-4ef4d916d187/status.txt +1 -0
  62. package/.code/agents/e2baa9c8-bac3-49e3-a39d-024333e6a990/status.txt +1 -0
  63. package/.code/agents/e350b8c3-8483-408c-b2bb-94515f492a11/error.txt +3 -0
  64. package/.code/agents/e63f9919-719f-4ad0-bccf-01b1a596e1e9/status.txt +1 -0
  65. package/.code/agents/e71695a8-3044-478d-8f12-ed13d02884c7/status.txt +1 -0
  66. package/.code/agents/f95b7464-3e25-4897-b153-c8dfd63fd605/error.txt +5 -0
  67. package/.code/agents/fa3c5ddf-cdf7-47a2-930a-b806c6363689/status.txt +1 -0
  68. package/.github/workflows/publish.yml +3 -3
  69. package/.github/workflows/release.yml +4 -0
  70. package/CHANGELOG.md +75 -0
  71. package/NUL +1 -0
  72. package/dist/bundle/index.cjs +65 -42
  73. package/dist/server/errorHandler.d.ts +2 -0
  74. package/dist/server/errorHandler.js +49 -5
  75. package/dist/tools/reconcileAdapter.js +10 -5
  76. package/dist/tools/reconciliation/analyzer.d.ts +8 -2
  77. package/dist/tools/reconciliation/analyzer.js +127 -409
  78. package/dist/tools/reconciliation/csvParser.d.ts +51 -0
  79. package/dist/tools/reconciliation/csvParser.js +413 -0
  80. package/dist/tools/reconciliation/executor.d.ts +8 -0
  81. package/dist/tools/reconciliation/executor.js +204 -58
  82. package/dist/tools/reconciliation/index.d.ts +7 -7
  83. package/dist/tools/reconciliation/index.js +115 -39
  84. package/dist/tools/reconciliation/matcher.d.ts +24 -3
  85. package/dist/tools/reconciliation/matcher.js +175 -133
  86. package/dist/tools/reconciliation/recommendationEngine.js +22 -18
  87. package/dist/tools/reconciliation/reportFormatter.js +9 -8
  88. package/dist/tools/reconciliation/signDetector.d.ts +2 -0
  89. package/dist/tools/reconciliation/signDetector.js +54 -0
  90. package/dist/tools/reconciliation/types.d.ts +20 -34
  91. package/dist/tools/reconciliation/types.js +1 -7
  92. package/dist/tools/reconciliation/ynabAdapter.d.ts +4 -0
  93. package/dist/tools/reconciliation/ynabAdapter.js +15 -0
  94. package/dist/types/reconciliation.d.ts +24 -0
  95. package/dist/types/reconciliation.js +1 -0
  96. package/docs/guides/ARCHITECTURE.md +12 -129
  97. package/docs/plans/2025-11-21-v014-hardening.md +153 -0
  98. package/docs/plans/reconciliation-v2-redesign.md +1571 -0
  99. package/package.json +6 -1
  100. package/scripts/test-recommendations.ts +1 -1
  101. package/src/__tests__/tools/reconciliation/csvParser.integration.test.ts +129 -0
  102. package/src/__tests__/tools/reconciliation/real-world.integration.test.ts +53 -0
  103. package/src/server/errorHandler.ts +52 -5
  104. package/src/tools/reconcileAdapter.ts +10 -5
  105. package/src/tools/reconciliation/__tests__/adapter.test.ts +28 -22
  106. package/src/tools/reconciliation/__tests__/analyzer.test.ts +114 -180
  107. package/src/tools/reconciliation/__tests__/csvParser.test.ts +87 -0
  108. package/src/tools/reconciliation/__tests__/executor.integration.test.ts +1 -1
  109. package/src/tools/reconciliation/__tests__/executor.test.ts +88 -61
  110. package/src/tools/reconciliation/__tests__/matcher.test.ts +68 -54
  111. package/src/tools/reconciliation/__tests__/recommendationEngine.test.ts +37 -30
  112. package/src/tools/reconciliation/__tests__/reportFormatter.test.ts +6 -5
  113. package/src/tools/reconciliation/__tests__/scenarios/extremes.scenario.test.ts +30 -11
  114. package/src/tools/reconciliation/__tests__/scenarios/repeatAmount.scenario.test.ts +50 -15
  115. package/src/tools/reconciliation/__tests__/signDetector.test.ts +211 -0
  116. package/src/tools/reconciliation/__tests__/ynabAdapter.test.ts +61 -0
  117. package/src/tools/reconciliation/analyzer.ts +191 -550
  118. package/src/tools/reconciliation/csvParser.ts +617 -0
  119. package/src/tools/reconciliation/executor.ts +249 -66
  120. package/src/tools/reconciliation/index.ts +148 -54
  121. package/src/tools/reconciliation/matcher.ts +234 -214
  122. package/src/tools/reconciliation/recommendationEngine.ts +23 -19
  123. package/src/tools/reconciliation/reportFormatter.ts +16 -11
  124. package/src/tools/reconciliation/signDetector.ts +117 -0
  125. package/src/tools/reconciliation/types.ts +39 -61
  126. package/src/tools/reconciliation/ynabAdapter.ts +33 -0
  127. package/src/types/reconciliation.ts +49 -0
  128. package/test-exports/ynab_since_2025-10-16_account_53298e13_238items_2025-11-28_13-46-20.json +3662 -0
  129. package/.code/agents/0427d95e-edca-431f-a214-5e53264e29c4/error.txt +0 -8
  130. package/.code/agents/0d675174-d1e1-41c3-9975-4c2e275819a9/error.txt +0 -3
  131. package/.code/agents/0d8c5afd-4787-422b-abf8-2e5943fc7e67/error.txt +0 -3
  132. package/.code/agents/0ec34a70-ed5d-4b9e-bee4-bb0e4cccbc4b/error.txt +0 -1
  133. package/.code/agents/0ef51a21-1ab1-49d7-9561-0eaa43875ebc/error.txt +0 -12
  134. package/.code/agents/15db95d7-abad-4b4d-9c3b-8446089cb61d/error.txt +0 -1
  135. package/.code/agents/19ab9acb-f675-4ff0-902a-09a5476f8149/error.txt +0 -1
  136. package/.code/agents/1ef7e12d-f6ff-4897-8a9b-152d523d898e/error.txt +0 -5
  137. package/.code/agents/2465/exec-call_lroN9KKzJVWC7t5423DK1nT9.txt +0 -1453
  138. package/.code/agents/28edb6fe-95a9-41a0-ae69-aa0100d26c0c/error.txt +0 -8
  139. package/.code/agents/2ae40cf5-b4bf-42e2-92bf-7ea350a7755e/error.txt +0 -9
  140. package/.code/agents/2bfc4e1f-ac4b-45a5-b6df-bf89d4dbb54c/error.txt +0 -1
  141. package/.code/agents/2e2e1134-eff0-49be-ba25-8e2c3468a564/error.txt +0 -5
  142. package/.code/agents/3/exec-call_203OC4TNVkLxW7z2HCVEQ1cM.txt +0 -81
  143. package/.code/agents/3/exec-call_SS5T0XSiXB4LSNzUKTl75wkh.txt +0 -610
  144. package/.code/agents/3322c003-ce5e-48e3-a342-f5049c5bf9a2/error.txt +0 -1
  145. package/.code/agents/391e9b08-1ebc-468c-9bcd-6d0cc3193b37/error.txt +0 -1
  146. package/.code/agents/3ab0aa84-b7bb-4054-afa3-40b8fd7d3be0/error.txt +0 -1
  147. package/.code/agents/3bed368d-50fe-477e-aee3-a6707eaa1ab9/error.txt +0 -3
  148. package/.code/agents/3e40b925-db12-442f-8d7a-a25fc69a6672/error.txt +0 -8
  149. package/.code/agents/414d5776-cf58-41f3-9328-a6daed503a50/error.txt +0 -5
  150. package/.code/agents/42687751-4565-4610-b240-67835b17d861/error.txt +0 -1
  151. package/.code/agents/46b98876-1a39-43c9-9e2f-507ca6d47335/error.txt +0 -9
  152. package/.code/agents/4a7d9491-b26f-43dd-850d-2ecdc49b5d1b/error.txt +0 -1
  153. package/.code/agents/4e60f00a-1b3e-447f-87f3-7faf9deddec3/error.txt +0 -13
  154. package/.code/agents/5138fc1c-4d49-4b74-a7da-ccdb3a8e44e7/error.txt +0 -14
  155. package/.code/agents/521cff39-a7a3-42e5-a557-134f0f7daaa0/error.txt +0 -5
  156. package/.code/agents/53302dc5-3857-4413-9a47-9e0f64a51dc4/error.txt +0 -5
  157. package/.code/agents/567c7c2e-6a6f-4761-a08d-d36deeb2e0ac/error.txt +0 -5
  158. package/.code/agents/57b00845-80dc-47c9-953c-3028d16275d6/error.txt +0 -3
  159. package/.code/agents/593d9005-c2a5-48fd-8813-ece0d3f2de96/error.txt +0 -1
  160. package/.code/agents/5a112e66-0e1a-42f9-877c-53af56ea3551/error.txt +0 -1
  161. package/.code/agents/5b05e8ed-7788-4738-b7ee-9faa8180f992/error.txt +0 -5
  162. package/.code/agents/5f888d6f-d7ca-4ac8-be23-9ea1bf753951/error.txt +0 -5
  163. package/.code/agents/607db3ab-e4b0-435b-b497-93e9aa525549/error.txt +0 -8
  164. package/.code/agents/67dcb2a2-900f-4c78-b3fc-80b5213e0ddf/error.txt +0 -8
  165. package/.code/agents/69ad848c-4e98-49b3-b16c-0094ac2d1759/error.txt +0 -5
  166. package/.code/agents/6c9cfc5f-0d0b-445c-b121-9f60082c4f70/error.txt +0 -1
  167. package/.code/agents/6f6f8f77-4ab0-4f6e-9f30-40e8be0bd8f5/error.txt +0 -1
  168. package/.code/agents/72a7cde4-fa8a-4024-9038-27faa550539b/error.txt +0 -1
  169. package/.code/agents/7b48335c-8247-43aa-9949-5f820ba8e199/error.txt +0 -1
  170. package/.code/agents/80944249-bea9-4ac5-87de-a666c4df306e/error.txt +0 -1
  171. package/.code/agents/826099df-1b66-4186-a915-7eb59f9db19d/error.txt +0 -5
  172. package/.code/agents/8291d158-18a8-4a92-b799-4e9a4d9cce88/error.txt +0 -1
  173. package/.code/agents/82fb71a3-20fb-4341-804a-a2fc900f95bc/error.txt +0 -1
  174. package/.code/agents/855790ea-54ee-43e4-8209-a66994e37590/error.txt +0 -1
  175. package/.code/agents/88ce3a2e-04f2-42be-9062-bf97aa798da0/error.txt +0 -3
  176. package/.code/agents/9a17e398-b6ed-4218-bb55-bc64a8d38ce8/error.txt +0 -8
  177. package/.code/agents/9a4f4bfc-a2a6-4f40-a896-9335b41a7ed1/error.txt +0 -1
  178. package/.code/agents/9b633e55-ef84-47d6-94bb-fd3dd172ad97/error.txt +0 -1
  179. package/.code/agents/9b81f3ab-c72b-4a81-9a8f-28a49ddba84a/error.txt +0 -8
  180. package/.code/agents/a35daf29-b2d1-4aef-9b42-dad63a76bd47/error.txt +0 -3
  181. package/.code/agents/a81990cc-69ee-44d2-b907-17403c9bc5d7/error.txt +0 -5
  182. package/.code/agents/ab56260a-4a83-4ad4-9410-f88a23d6520a/error.txt +0 -1
  183. package/.code/agents/ad722c31-2d1d-45f7-bae2-3f02ca455b60/error.txt +0 -1
  184. package/.code/agents/b62e8690-3324-4b97-9309-731bee79416b/error.txt +0 -5
  185. package/.code/agents/baf60a3a-752b-4ad8-99d6-df32423ed2eb/error.txt +0 -1
  186. package/.code/agents/be049042-7dcb-4ac8-9beb-c8f1aea67742/error.txt +0 -14
  187. package/.code/agents/bed1dcb4-bfce-4a9f-8594-0f994962aafd/error.txt +0 -1
  188. package/.code/agents/c324a6cf-e935-4ede-9529-b3ebc18e8d6b/error.txt +0 -5
  189. package/.code/agents/c37c06ff-dfe3-43f2-9bbc-3ec73ec8f41d/error.txt +0 -5
  190. package/.code/agents/c8cd6671-433a-456b-9f88-e51cb2df6bfc/error.txt +0 -11
  191. package/.code/agents/ca2ccb67-2f24-428e-b27d-9365beadd140/error.txt +0 -1
  192. package/.code/agents/cf08c0c8-e7f0-423e-93ba-547e8e818340/error.txt +0 -8
  193. package/.code/agents/d579c74f-874b-40a4-9d56-ced1eb6a701d/error.txt +0 -1
  194. package/.code/agents/df412c98-7378-4deb-8e1e-76c416931181/error.txt +0 -3
  195. package/.code/agents/e5134eb3-2af4-45b0-8998-051cb4afdb45/error.txt +0 -3
  196. package/.code/agents/e6308471-aa45-4e9e-9496-2e9404164d97/error.txt +0 -8
  197. package/.code/agents/e7bd8bc7-23fb-4f46-98dc-b0dcf11b75a1/error.txt +0 -1
  198. package/.code/agents/e92bec35-378d-4fe1-8ac0-6e1bb3c86911/error.txt +0 -5
  199. package/.code/agents/ed918fbf-2dc4-4aa2-bfc5-04b65d9471ea/error.txt +0 -1
  200. package/.code/agents/ef1d756f-b272-48fc-8729-f05c494674f7/error.txt +0 -1
  201. package/.code/agents/ef359853-0249-4e41-a804-c0fc459fe456/error.txt +0 -1
  202. package/.code/agents/effc7b4a-4b90-40a0-8c86-a7a99d2d5fd2/error.txt +0 -1
  203. package/.code/agents/fa15f8d5-8359-4a8b-83a3-2f2056b3ff40/error.txt +0 -3
  204. package/.code/agents/fbef4193-eadf-4c8a-83ff-4878a6310f25/error.txt +0 -8
  205. package/.code/agents/fd0a4b4a-fda4-4964-a6d6-2b8a2da387c6/error.txt +0 -1
  206. package/.gemini/settings.json +0 -8
  207. package/WARP.md +0 -245
@@ -0,0 +1,1571 @@
1
+ # Reconciliation Tool v2 - Complete Redesign
2
+
3
+ ## Executive Summary
4
+
5
+ The current reconciliation tool has fundamental architectural problems that prevent it from working reliably. This document outlines a complete redesign based on:
6
+ 1. Analysis of the existing codebase and its failure modes
7
+ 2. Research into best-in-class libraries for CSV parsing, fuzzy matching, and date handling
8
+ 3. Learnings from production reconciliation engines (Midday.ai's open-source implementation)
9
+
10
+ **Target Outcome:** A reconciliation tool that achieves 90%+ auto-match accuracy for Canadian bank statements against YNAB transactions.
11
+
12
+ ---
13
+
14
+ ## Document Role & Source of Truth
15
+
16
+ > **⚠️ This is a design document, not a specification.**
17
+ >
18
+ > - **Source of truth:** The actual TypeScript implementation in `src/tools/reconciliation/` is authoritative
19
+ > - **Code blocks below:** Illustrative pseudo-code showing intent and interfaces—not copy-paste ready
20
+ > - **Drift expectation:** Implementation details will evolve; this doc captures architectural decisions and rationale
21
+ > - **Update policy:** Major architectural changes should be reflected here; minor implementation tweaks need not be
22
+
23
+ When in doubt, read the actual source files:
24
+ - `src/tools/reconciliation/types.ts` - Canonical types
25
+ - `src/tools/reconciliation/matcher.ts` - Matching algorithm (V2 with legacy compatibility)
26
+ - `src/tools/reconciliation/analyzer.ts` - Orchestration
27
+ - `src/tools/compareTransactions/parser.ts` - Legacy CSV parsing (to be replaced)
28
+
29
+ ---
30
+
31
+ ## Part 1: Critical Problems in Current Implementation
32
+
33
+ ### Problem 1: Two Incompatible `BankTransaction` Types
34
+
35
+ There are two completely different interfaces with the same name:
36
+
37
+ **`src/tools/compareTransactions/types.ts`:**
38
+ ```typescript
39
+ interface BankTransaction {
40
+ date: Date; // JavaScript Date object
41
+ amount: number; // In MILLIUNITS (1000 = $1.00)
42
+ description: string;
43
+ raw_amount: string;
44
+ raw_date: string;
45
+ row_number: number;
46
+ }
47
+ ```
48
+
49
+ **`src/tools/reconciliation/types.ts`:**
50
+ ```typescript
51
+ interface BankTransaction {
52
+ id: string;
53
+ date: string; // YYYY-MM-DD string
54
+ amount: number; // In DOLLARS (1.00 = $1.00)
55
+ payee: string;
56
+ memo?: string;
57
+ original_csv_row: number;
58
+ }
59
+ ```
60
+
61
+ The analyzer imports from the parser (compareTransactions) but expects reconciliation types. A fragile `normalizeAmount()` function tries to detect which format based on whether `date instanceof Date` - this is the root cause of most matching failures.
62
+
63
+ ### Problem 2: Tests Mock the Parser with Wrong Types
64
+
65
+ ```typescript
66
+ // In analyzer.test.ts - THESE ARE WRONG
67
+ vi.mocked(parser.parseBankCSV).mockReturnValue({
68
+ transactions: [
69
+ { date: '2025-10-15', amount: -45.23, payee: 'Shell Gas' } // String date, dollars
70
+ ]
71
+ });
72
+
73
+ // But real parser returns:
74
+ { date: new Date('2025-10-15'), amount: -45230, description: 'Shell Gas' } // Date object, milliunits
75
+ ```
76
+
77
+ Tests pass but real code path fails.
78
+
79
+ ### Problem 3: Rigid Confidence Scoring
80
+
81
+ Current weights:
82
+ - Amount match: 40% (REQUIRED - 0 if no match)
83
+ - Date match: 40% (within 2 days default)
84
+ - Payee match: 20%
85
+
86
+ Problems:
87
+ 1. **2-day date tolerance too tight** - banks post 3-7 days after transaction
88
+ 2. **Losing 40% for date mismatch is catastrophic** - a perfect amount + payee match with date outside tolerance only scores 60%
89
+ 3. **Payee only worth 20%** - can't compensate for date issues
90
+ 4. **Auto-match threshold 90%** - nearly impossible to reach without all three matching
91
+
92
+ ### Problem 4: Primitive Fuzzy Matching
93
+
94
+ Current payee matching uses hand-rolled Levenshtein distance. This fails on real-world bank data:
95
+ - "AMZN MKTP CA*123456" vs "Amazon" → Low score
96
+ - "SQ *COFFEE SHOP TORONTO" vs "Square Coffee" → Low score
97
+ - "PAYPAL *NETFLIX" vs "Netflix" → Low score
98
+
99
+ ### Problem 5: CSV Parser Fragility
100
+
101
+ The `autoDetectCSVFormat()` function:
102
+ - Only looks at first 3 rows
103
+ - Has limited date format support
104
+ - Fails on European number formats (1.234,56)
105
+ - No presets for known Canadian bank formats
106
+
107
+ ---
108
+
109
+ ## Part 1b: Accuracy Target & Evaluation Plan
110
+
111
+ The "90%+ auto-match accuracy" target requires operationalisation:
112
+
113
+ ### Definition of Success
114
+
115
+ | Metric | Definition | Target |
116
+ |--------|------------|--------|
117
+ | **Auto-match precision** | % of `confidence: 'high'` matches that are correct | ≥95% |
118
+ | **Auto-match recall** | % of true matches captured at `confidence: 'high'` | ≥90% |
119
+ | **Overall match rate** | % of bank transactions with any match (`high` + `medium`) | ≥95% |
120
+ | **False positive rate** | % of auto-matches that are wrong | ≤5% |
121
+
122
+ ### Evaluation Dataset Construction
123
+
124
+ 1. **Source CSVs:** Collect 10+ real statement exports per major Canadian bank (TD, RBC, Scotiabank, Tangerine, Wealthsimple, CIBC, BMO)
125
+ 2. **Ground truth labeling:** Manually match each bank transaction to its YNAB counterpart (or mark as "new")
126
+ 3. **Edge case coverage:** Ensure dataset includes:
127
+ - Recurring charges (same amount, different dates)
128
+ - Similar merchants (Starbucks #1234 vs #5678)
129
+ - Split transactions
130
+ - Refunds and reversals
131
+ - Multi-day posting delays (3-7 days)
132
+ 4. **Storage:** `test-exports/csv/labeled/` with `.csv` + `.labels.json` pairs
133
+
134
+ ### V1 vs V2 Comparison Methodology
135
+
136
+ ```bash
137
+ # Run both implementations on the same dataset
138
+ npm run benchmark:reconciliation -- --version=v1 --dataset=test-exports/csv/labeled/
139
+ npm run benchmark:reconciliation -- --version=v2 --dataset=test-exports/csv/labeled/
140
+
141
+ # Compare results
142
+ npm run benchmark:compare -- v1-results.json v2-results.json
143
+ ```
144
+
145
+ Output metrics:
146
+ - Confusion matrix (TP/FP/TN/FN per confidence tier)
147
+ - Precision/recall curves at different thresholds
148
+ - Per-bank breakdown
149
+ - Failure case analysis (which transaction types fail most?)
150
+
151
+ ### Acceptance Criteria
152
+
153
+ V2 is ready for release when:
154
+ 1. Auto-match precision ≥95% across all banks in the test dataset
155
+ 2. Auto-match recall ≥90% (we catch most true matches)
156
+ 3. No regression on any bank compared to v1
157
+ 4. P95 latency <2s for 500-transaction CSVs
158
+
159
+ ---
160
+
161
+ ## Part 2: Recommended Libraries
162
+
163
+ ### CSV Parsing: PapaParse
164
+
165
+ **Why:** Auto-detects delimiters, handles malformed CSVs gracefully, dynamic typing, excellent edge case handling.
166
+
167
+ ```bash
168
+ npm install papaparse @types/papaparse
169
+ ```
170
+
171
+ **Key Features:**
172
+ - `delimiter: ""` → auto-detect
173
+ - `dynamicTyping: true` → auto-convert numbers
174
+ - `skipEmptyLines: true`
175
+ - `transformHeader` → normalize column names
176
+ - Detailed error reporting per row
177
+
178
+ **Usage:**
179
+ ```typescript
180
+ import Papa from 'papaparse';
181
+
182
+ const result = Papa.parse(csvContent, {
183
+ header: true,
184
+ dynamicTyping: true,
185
+ skipEmptyLines: true,
186
+ transformHeader: (h) => h.toLowerCase().trim(),
187
+ });
188
+
189
+ // result.data = parsed rows
190
+ // result.errors = array of {row, message} for each parsing error
191
+ // result.meta = {delimiter, fields, truncated}
192
+ ```
193
+
194
+ ### Fuzzy String Matching: fuzzball
195
+
196
+ **Why:** Port of Python's TheFuzz (fuzzywuzzy), battle-tested algorithms for transaction matching, includes token-based matching crucial for merchant names.
197
+
198
+ ```bash
199
+ npm install fuzzball
200
+ ```
201
+
202
+ **Key Algorithms:**
203
+ ```typescript
204
+ import fuzz from 'fuzzball';
205
+
206
+ // Basic ratio - simple Levenshtein
207
+ fuzz.ratio("Shell Gas", "SHELL GAS STATION"); // 73
208
+
209
+ // Token Set Ratio - handles word order, duplicates
210
+ fuzz.token_set_ratio("AMZN MKTP CA*123", "Amazon Marketplace"); // 90+
211
+
212
+ // Token Sort Ratio - alphabetizes then compares
213
+ fuzz.token_sort_ratio("fuzzy wuzzy", "wuzzy fuzzy"); // 100
214
+
215
+ // Partial Ratio - best substring match
216
+ fuzz.partial_ratio("Netflix", "PAYPAL *NETFLIX INC"); // 100
217
+
218
+ // WRatio - weighted combination of above
219
+ fuzz.WRatio("SQ *COFFEE SHOP", "Square Coffee Shop"); // ~90
220
+ ```
221
+
222
+ **Recommendation:** Use `token_set_ratio` for payee matching as it handles the common case of bank merchant names having extra tokens (location codes, transaction IDs, etc.)
223
+
224
+ ### Date Parsing: chrono-node
225
+
226
+ **Why:** Parses natural language dates, handles many formats automatically, battle-tested.
227
+
228
+ ```bash
229
+ npm install chrono-node
230
+ ```
231
+
232
+ **Usage:**
233
+ ```typescript
234
+ import * as chrono from 'chrono-node';
235
+
236
+ chrono.parseDate("Sep 18, 2025"); // Date object
237
+ chrono.parseDate("18/09/2025"); // Date object
238
+ chrono.parseDate("2025-09-18"); // Date object
239
+ chrono.parseDate("September 18th"); // Date object (relative to today)
240
+ ```
241
+
242
+ **Fallback:** Use with dayjs for formatting:
243
+ ```bash
244
+ npm install dayjs
245
+ ```
246
+
247
+ ### Optional: Vector Embeddings for Semantic Matching
248
+
249
+ For future enhancement, consider pgvector with OpenAI/Google embeddings for semantic merchant matching. The Midday.ai approach uses 768-dimensional vectors but this is overkill for v2 - fuzzball's token_set_ratio should get us to 90%+ accuracy.
250
+
251
+ ---
252
+
253
+ ## Part 3: Architectural Redesign
254
+
255
+ ### 3.1 Unified Transaction Types
256
+
257
+ **File:** `src/types/reconciliation.ts`
258
+
259
+ > **Design Note:** These types are intentionally decoupled from the YNAB SDK. The `NormalizedYNABTransaction` interface is SDK-agnostic; a thin adapter in `src/tools/reconciliation/ynabAdapter.ts` handles the conversion from `ynab.TransactionDetail`.
260
+
261
+ > **Critical Decision: All amounts are in MILLIUNITS (integers).**
262
+ > - 1000 milliunits = $1.00
263
+ > - This matches YNAB's native format
264
+ > - Eliminates floating-point precision issues entirely
265
+ > - Enables exact integer comparison: `a === b`
266
+ > - Conversion from CSV floats happens ONCE at the parser boundary
267
+
268
+ ```typescript
269
+ /**
270
+ * Canonical bank transaction type used throughout reconciliation.
271
+ *
272
+ * AMOUNTS ARE IN MILLIUNITS (integers, 1000 = $1.00).
273
+ * This matches YNAB's native format and allows exact integer comparison.
274
+ */
275
+ export interface BankTransaction {
276
+ /** UUID generated for tracking */
277
+ id: string;
278
+ /** ISO date string YYYY-MM-DD */
279
+ date: string;
280
+ /** Amount in MILLIUNITS (negative = outflow, positive = inflow) */
281
+ amount: number;
282
+ /** Merchant/payee name from CSV */
283
+ payee: string;
284
+ /** Optional memo/description */
285
+ memo?: string;
286
+ /** Original CSV row number (1-indexed, after header) */
287
+ sourceRow: number;
288
+ /** Raw values from CSV for debugging */
289
+ raw: {
290
+ date: string;
291
+ amount: string;
292
+ description: string;
293
+ };
294
+ /** Parser warnings (e.g., ambiguous debit/credit) */
295
+ warnings?: string[];
296
+ }
297
+
298
+ /**
299
+ * YNAB transaction normalized for comparison.
300
+ *
301
+ * This interface is intentionally SDK-agnostic. Use the adapter
302
+ * function in ynabAdapter.ts to convert from ynab.TransactionDetail.
303
+ *
304
+ * AMOUNTS ARE IN MILLIUNITS - same as YNAB API native format.
305
+ * No conversion needed from the SDK.
306
+ */
307
+ export interface NormalizedYNABTransaction {
308
+ id: string;
309
+ date: string; // YYYY-MM-DD
310
+ /** Amount in MILLIUNITS (same as YNAB API) */
311
+ amount: number;
312
+ payee: string | null;
313
+ memo: string | null;
314
+ categoryName: string | null;
315
+ cleared: 'cleared' | 'uncleared' | 'reconciled';
316
+ approved: boolean;
317
+ }
318
+ ```
319
+
320
+ **File:** `src/tools/reconciliation/ynabAdapter.ts`
321
+
322
+ ```typescript
323
+ import type * as ynab from 'ynab';
324
+ import type { NormalizedYNABTransaction } from '../../types/reconciliation.js';
325
+
326
+ /**
327
+ * Convert YNAB SDK transaction to normalized format for matching.
328
+ *
329
+ * This adapter keeps the YNAB SDK dependency isolated from the
330
+ * reconciliation core logic.
331
+ *
332
+ * NOTE: Amount stays in milliunits - no conversion needed since
333
+ * YNAB API already uses milliunits natively.
334
+ */
335
+ export function normalizeYNABTransaction(
336
+ txn: ynab.TransactionDetail
337
+ ): NormalizedYNABTransaction {
338
+ return {
339
+ id: txn.id,
340
+ date: txn.date,
341
+ amount: txn.amount, // Already in milliunits - no conversion!
342
+ payee: txn.payee_name ?? null,
343
+ memo: txn.memo ?? null,
344
+ categoryName: txn.category_name ?? null,
345
+ cleared: txn.cleared,
346
+ approved: txn.approved,
347
+ };
348
+ }
349
+
350
+ /**
351
+ * Batch convert YNAB transactions.
352
+ */
353
+ export function normalizeYNABTransactions(
354
+ txns: ynab.TransactionDetail[]
355
+ ): NormalizedYNABTransaction[] {
356
+ return txns.map(normalizeYNABTransaction);
357
+ }
358
+ ```
359
+
360
+ ### 3.2 New CSV Parser Module
361
+
362
+ **File:** `src/tools/reconciliation/csvParser.ts`
363
+
364
+ ```typescript
365
+ import Papa from 'papaparse';
366
+ import * as chrono from 'chrono-node';
367
+ import { randomUUID } from 'crypto';
368
+ import type { BankTransaction } from '../../types/reconciliation.js';
369
+
370
+ export interface CSVParseResult {
371
+ transactions: BankTransaction[];
372
+ errors: ParseError[];
373
+ warnings: ParseWarning[];
374
+ meta: {
375
+ detectedDelimiter: string;
376
+ detectedColumns: string[];
377
+ totalRows: number;
378
+ validRows: number;
379
+ skippedRows: number;
380
+ };
381
+ }
382
+
383
+ export interface ParseError {
384
+ row: number;
385
+ field: string;
386
+ message: string;
387
+ rawValue: string;
388
+ }
389
+
390
+ export interface ParseWarning {
391
+ row: number;
392
+ message: string;
393
+ }
394
+
395
+ export interface BankPreset {
396
+ name: string;
397
+ dateColumn: string | string[];
398
+ amountColumn?: string | string[];
399
+ debitColumn?: string;
400
+ creditColumn?: string;
401
+ descriptionColumn: string | string[];
402
+ amountMultiplier?: number;
403
+ /** Expected date format hint: 'YMD', 'MDY', 'DMY' */
404
+ dateFormat?: 'YMD' | 'MDY' | 'DMY';
405
+ }
406
+
407
+ // Presets for Canadian banks
408
+ export const BANK_PRESETS: Record<string, BankPreset> = {
409
+ 'td': {
410
+ name: 'TD Canada Trust',
411
+ dateColumn: ['Date', 'Transaction Date', 'Posted Date'],
412
+ amountColumn: ['Amount', 'CAD$'],
413
+ descriptionColumn: ['Description', 'Transaction Description', 'Merchant'],
414
+ dateFormat: 'MDY', // TD typically uses MM/DD/YYYY
415
+ },
416
+ 'rbc': {
417
+ name: 'RBC Royal Bank',
418
+ dateColumn: ['Transaction Date', 'Date'],
419
+ debitColumn: 'Debit',
420
+ creditColumn: 'Credit',
421
+ descriptionColumn: ['Description 1', 'Description', 'Transaction'],
422
+ dateFormat: 'YMD', // RBC typically uses YYYY-MM-DD
423
+ },
424
+ 'scotiabank': {
425
+ name: 'Scotiabank',
426
+ dateColumn: ['Date', 'Transaction Date'],
427
+ amountColumn: ['Amount'],
428
+ descriptionColumn: ['Description', 'Transaction Details'],
429
+ dateFormat: 'DMY', // Scotiabank often uses DD/MM/YYYY
430
+ },
431
+ 'wealthsimple': {
432
+ name: 'Wealthsimple',
433
+ dateColumn: ['Date'],
434
+ amountColumn: ['Amount'],
435
+ descriptionColumn: ['Description', 'Payee'],
436
+ amountMultiplier: 1,
437
+ dateFormat: 'YMD',
438
+ },
439
+ 'tangerine': {
440
+ name: 'Tangerine',
441
+ dateColumn: ['Date', 'Transaction date'],
442
+ amountColumn: ['Amount'],
443
+ descriptionColumn: ['Name', 'Transaction name', 'Memo'],
444
+ dateFormat: 'MDY',
445
+ },
446
+ };
447
+
448
+ export interface ParseCSVOptions {
449
+ /** Bank preset key (e.g., 'td', 'rbc') */
450
+ preset?: string;
451
+ /** Multiply all amounts by -1 */
452
+ invertAmounts?: boolean;
453
+ }
454
+
455
+ /**
456
+ * Parse a bank CSV file into BankTransaction objects.
457
+ *
458
+ * IMPORTANT: Amounts are converted to MILLIUNITS (integers) at this boundary.
459
+ * This is the ONLY place where float-to-milliunit conversion happens.
460
+ */
461
+ export function parseCSV(
462
+ content: string,
463
+ options: ParseCSVOptions = {}
464
+ ): CSVParseResult {
465
+ const errors: ParseError[] = [];
466
+ const warnings: ParseWarning[] = [];
467
+
468
+ // Parse with PapaParse
469
+ const parsed = Papa.parse(content, {
470
+ header: true,
471
+ dynamicTyping: false, // We'll handle type conversion ourselves
472
+ skipEmptyLines: true,
473
+ transformHeader: (h) => h.trim(),
474
+ });
475
+
476
+ if (parsed.errors.length > 0) {
477
+ for (const err of parsed.errors) {
478
+ errors.push({
479
+ row: err.row ?? 0,
480
+ field: 'csv',
481
+ message: err.message,
482
+ rawValue: '',
483
+ });
484
+ }
485
+ }
486
+
487
+ const columns = parsed.meta.fields ?? [];
488
+ const preset = options.preset ? BANK_PRESETS[options.preset] : detectPreset(columns);
489
+
490
+ // Find actual column names
491
+ const dateCol = findColumn(columns, preset?.dateColumn ?? ['Date', 'Transaction Date', 'Posted Date']);
492
+ const descCol = findColumn(columns, preset?.descriptionColumn ?? ['Description', 'Payee', 'Merchant', 'Name']);
493
+
494
+ let amountCol: string | null = null;
495
+ let debitCol: string | null = null;
496
+ let creditCol: string | null = null;
497
+
498
+ if (preset?.debitColumn && preset?.creditColumn) {
499
+ debitCol = findColumn(columns, [preset.debitColumn]);
500
+ creditCol = findColumn(columns, [preset.creditColumn]);
501
+ } else {
502
+ amountCol = findColumn(columns, preset?.amountColumn ?? ['Amount', 'CAD$', 'Value']);
503
+ }
504
+
505
+ if (!dateCol) {
506
+ errors.push({ row: 0, field: 'date', message: 'Could not identify date column', rawValue: columns.join(', ') });
507
+ }
508
+ if (!amountCol && !debitCol) {
509
+ errors.push({ row: 0, field: 'amount', message: 'Could not identify amount column', rawValue: columns.join(', ') });
510
+ }
511
+
512
+ const transactions: BankTransaction[] = [];
513
+ const rows = parsed.data as Record<string, string>[];
514
+
515
+ for (let i = 0; i < rows.length; i++) {
516
+ const row = rows[i];
517
+ const rowNum = i + 2; // 1-indexed, after header
518
+ const rowWarnings: string[] = [];
519
+
520
+ // Parse date with priority: explicit format > ISO > chrono-node fallback
521
+ const rawDate = dateCol ? row[dateCol]?.trim() ?? '' : '';
522
+ const parsedDate = parseDate(rawDate, preset?.dateFormat);
523
+ if (!parsedDate) {
524
+ errors.push({ row: rowNum, field: 'date', message: `Could not parse date: "${rawDate}"`, rawValue: rawDate });
525
+ continue;
526
+ }
527
+ // Use LOCAL date components to avoid timezone shifts
528
+ const dateStr = formatLocalDate(parsedDate);
529
+
530
+ // Parse amount - convert to MILLIUNITS immediately
531
+ let amountMilliunits: number;
532
+ let rawAmount: string;
533
+
534
+ if (amountCol) {
535
+ rawAmount = row[amountCol]?.trim() ?? '';
536
+ amountMilliunits = dollarStringToMilliunits(rawAmount);
537
+ } else if (debitCol && creditCol) {
538
+ const debit = row[debitCol]?.trim() ?? '';
539
+ const credit = row[creditCol]?.trim() ?? '';
540
+ rawAmount = debit || credit;
541
+
542
+ const debitMilliunits = dollarStringToMilliunits(debit);
543
+ const creditMilliunits = dollarStringToMilliunits(credit);
544
+
545
+ // Warn if both debit and credit have values (ambiguous)
546
+ if (Math.abs(debitMilliunits) > 0 && Math.abs(creditMilliunits) > 0) {
547
+ rowWarnings.push(`Both Debit (${debit}) and Credit (${credit}) have values - using Debit`);
548
+ warnings.push({ row: rowNum, message: rowWarnings[rowWarnings.length - 1] });
549
+ }
550
+
551
+ if (Math.abs(debitMilliunits) > 0) {
552
+ amountMilliunits = -Math.abs(debitMilliunits); // Debits are outflows (negative)
553
+ } else if (Math.abs(creditMilliunits) > 0) {
554
+ amountMilliunits = Math.abs(creditMilliunits); // Credits are inflows (positive)
555
+ } else {
556
+ amountMilliunits = 0;
557
+ }
558
+
559
+ // Warn if debit column contains negative value (unusual)
560
+ if (debitMilliunits < 0) {
561
+ rowWarnings.push(`Debit column contains negative value (${debit}) - treating as positive debit`);
562
+ warnings.push({ row: rowNum, message: rowWarnings[rowWarnings.length - 1] });
563
+ }
564
+ } else {
565
+ continue; // Skip row if no amount columns
566
+ }
567
+
568
+ if (!Number.isFinite(amountMilliunits)) {
569
+ errors.push({ row: rowNum, field: 'amount', message: `Invalid amount: "${rawAmount}"`, rawValue: rawAmount });
570
+ continue;
571
+ }
572
+
573
+ // Apply amount inversion if needed
574
+ const multiplier = options.invertAmounts ? -1 : (preset?.amountMultiplier ?? 1);
575
+ amountMilliunits *= multiplier;
576
+
577
+ // Parse description
578
+ const rawDesc = descCol ? row[descCol]?.trim() ?? '' : '';
579
+
580
+ transactions.push({
581
+ id: randomUUID(),
582
+ date: dateStr,
583
+ amount: amountMilliunits,
584
+ payee: rawDesc || 'Unknown',
585
+ sourceRow: rowNum,
586
+ raw: {
587
+ date: rawDate,
588
+ amount: rawAmount,
589
+ description: rawDesc,
590
+ },
591
+ warnings: rowWarnings.length > 0 ? rowWarnings : undefined,
592
+ });
593
+ }
594
+
595
+ return {
596
+ transactions,
597
+ errors,
598
+ warnings,
599
+ meta: {
600
+ detectedDelimiter: parsed.meta.delimiter,
601
+ detectedColumns: columns,
602
+ totalRows: rows.length,
603
+ validRows: transactions.length,
604
+ skippedRows: rows.length - transactions.length,
605
+ },
606
+ };
607
+ }
608
+
609
+ /**
610
+ * Parse date with priority:
611
+ * 1. ISO format (YYYY-MM-DD) - unambiguous
612
+ * 2. Explicit format hint from preset
613
+ * 3. chrono-node fallback (may be ambiguous for dates like 02/03/2025)
614
+ */
615
+ function parseDate(raw: string, formatHint?: 'YMD' | 'MDY' | 'DMY'): Date | null {
616
+ if (!raw) return null;
617
+
618
+ // 1. Try ISO format first (unambiguous)
619
+ const isoMatch = raw.match(/^(\d{4})-(\d{2})-(\d{2})/);
620
+ if (isoMatch) {
621
+ const [, year, month, day] = isoMatch;
622
+ return new Date(parseInt(year!), parseInt(month!) - 1, parseInt(day!));
623
+ }
624
+
625
+ // 2. Try explicit format hint for ambiguous numeric dates
626
+ const numericMatch = raw.match(/^(\d{1,2})[\/-](\d{1,2})[\/-](\d{2,4})$/);
627
+ if (numericMatch && formatHint) {
628
+ const [, a, b, c] = numericMatch;
629
+ let year = parseInt(c!);
630
+ if (year < 100) year += 2000; // 25 -> 2025
631
+
632
+ let month: number, day: number;
633
+ switch (formatHint) {
634
+ case 'YMD':
635
+ month = parseInt(a!);
636
+ day = parseInt(b!);
637
+ break;
638
+ case 'MDY': // US format: MM/DD/YYYY
639
+ month = parseInt(a!);
640
+ day = parseInt(b!);
641
+ break;
642
+ case 'DMY': // European/UK format: DD/MM/YYYY
643
+ day = parseInt(a!);
644
+ month = parseInt(b!);
645
+ break;
646
+ }
647
+
648
+ if (month >= 1 && month <= 12 && day >= 1 && day <= 31) {
649
+ return new Date(year, month - 1, day);
650
+ }
651
+ }
652
+
653
+ // 3. Fallback to chrono-node (handles natural language, many formats)
654
+ // Note: chrono defaults to US (MDY) for ambiguous dates like 02/03/2025
655
+ return chrono.parseDate(raw);
656
+ }
657
+
658
+ /**
659
+ * Format Date to YYYY-MM-DD using LOCAL date components.
660
+ *
661
+ * IMPORTANT: Do NOT use toISOString() as it converts to UTC,
662
+ * which can shift the date if the local time is before midnight UTC.
663
+ */
664
+ function formatLocalDate(date: Date): string {
665
+ const year = date.getFullYear();
666
+ const month = String(date.getMonth() + 1).padStart(2, '0');
667
+ const day = String(date.getDate()).padStart(2, '0');
668
+ return `${year}-${month}-${day}`;
669
+ }
670
+
671
+ function findColumn(available: string[], candidates: string | string[]): string | null {
672
+ const candidateList = Array.isArray(candidates) ? candidates : [candidates];
673
+
674
+ for (const candidate of candidateList) {
675
+ const lower = candidate.toLowerCase();
676
+ const found = available.find(col => col.toLowerCase() === lower);
677
+ if (found) return found;
678
+ }
679
+
680
+ // Try partial match
681
+ for (const candidate of candidateList) {
682
+ const lower = candidate.toLowerCase();
683
+ const found = available.find(col => col.toLowerCase().includes(lower));
684
+ if (found) return found;
685
+ }
686
+
687
+ return null;
688
+ }
689
+
690
+ function detectPreset(columns: string[]): BankPreset | undefined {
691
+ const colSet = new Set(columns.map(c => c.toLowerCase()));
692
+
693
+ if (colSet.has('description 1') || colSet.has('account type')) {
694
+ return BANK_PRESETS['rbc'];
695
+ }
696
+ if (columns.some(c => c.toLowerCase().includes('cad$'))) {
697
+ return BANK_PRESETS['td'];
698
+ }
699
+
700
+ return undefined;
701
+ }
702
+
703
+ /**
704
+ * Supported currency symbols:
705
+ * $ (dollar - USD, CAD, AUD, etc.)
706
+ * € (euro)
707
+ * £ (pound)
708
+ * ¥ (yen/yuan)
709
+ *
710
+ * Also strips: CAD, USD, EUR, GBP (case-insensitive)
711
+ *
712
+ * Number formats supported:
713
+ * - Standard: 1234.56 or 1,234.56
714
+ * - European: 1.234,56 (detected by pattern)
715
+ * - Negative: -123.45 or (123.45)
716
+ *
717
+ * Returns: Amount in MILLIUNITS (integer, 1000 = $1.00)
718
+ */
719
+ const CURRENCY_SYMBOLS = /[$€£¥]/g;
720
+ const CURRENCY_CODES = /\b(CAD|USD|EUR|GBP)\b/gi;
721
+
722
+ function dollarStringToMilliunits(str: string): number {
723
+ if (!str) return 0;
724
+
725
+ let cleaned = str
726
+ .replace(CURRENCY_SYMBOLS, '')
727
+ .replace(CURRENCY_CODES, '')
728
+ .trim();
729
+
730
+ // Handle parentheses as negative: (123.45) → -123.45
731
+ if (cleaned.startsWith('(') && cleaned.endsWith(')')) {
732
+ cleaned = '-' + cleaned.slice(1, -1);
733
+ }
734
+
735
+ // Detect European format: 1.234,56 → 1234.56
736
+ if (/^-?\d{1,3}(\.\d{3})+,\d{2}$/.test(cleaned)) {
737
+ cleaned = cleaned.replace(/\./g, '').replace(',', '.');
738
+ }
739
+
740
+ // Handle thousands separator: 1,234.56 → 1234.56
741
+ if (cleaned.includes('.')) {
742
+ cleaned = cleaned.replace(/,/g, '');
743
+ }
744
+
745
+ const dollars = parseFloat(cleaned);
746
+ if (!Number.isFinite(dollars)) return 0;
747
+
748
+ // Convert to milliunits: $1.00 → 1000
749
+ return Math.round(dollars * 1000);
750
+ }
751
+ ```
752
+
753
+ ### 3.3 New Matching Algorithm
754
+
755
+ **File:** `src/tools/reconciliation/matcher.ts`
756
+
757
+ ```typescript
758
+ import fuzz from 'fuzzball';
759
+ import type { BankTransaction, NormalizedYNABTransaction } from '../../types/reconciliation.js';
760
+
761
+ export interface MatchCandidate {
762
+ ynabTransaction: NormalizedYNABTransaction;
763
+ scores: {
764
+ amount: number; // 0-100
765
+ date: number; // 0-100
766
+ payee: number; // 0-100
767
+ combined: number; // Weighted combination
768
+ };
769
+ matchReasons: string[];
770
+ }
771
+
772
+ export interface MatchResult {
773
+ bankTransaction: BankTransaction;
774
+ bestMatch: MatchCandidate | null;
775
+ candidates: MatchCandidate[]; // Top 3
776
+ confidence: 'high' | 'medium' | 'low' | 'none';
777
+ confidenceScore: number;
778
+ }
779
+
780
+ export interface MatchingConfig {
781
+ weights: {
782
+ amount: number; // Recommended: 0.50
783
+ date: number; // Recommended: 0.15
784
+ payee: number; // Recommended: 0.35
785
+ };
786
+
787
+ // Tolerances (in MILLIUNITS for amount)
788
+ amountToleranceMilliunits: number; // Default: 10 (1 cent)
789
+ dateToleranceDays: number; // Default: 7
790
+
791
+ // Thresholds
792
+ autoMatchThreshold: number; // Default: 85
793
+ suggestedMatchThreshold: number; // Default: 60
794
+ minimumCandidateScore: number; // Default: 40
795
+
796
+ // Bonuses for perfect matches
797
+ exactAmountBonus: number; // Default: 10
798
+ exactDateBonus: number; // Default: 5
799
+ exactPayeeBonus: number; // Default: 10
800
+ }
801
+
802
+ export const DEFAULT_CONFIG: MatchingConfig = {
803
+ weights: {
804
+ amount: 0.50,
805
+ date: 0.15,
806
+ payee: 0.35,
807
+ },
808
+ amountToleranceMilliunits: 10, // 1 cent
809
+ dateToleranceDays: 7,
810
+ autoMatchThreshold: 85,
811
+ suggestedMatchThreshold: 60,
812
+ minimumCandidateScore: 40,
813
+ exactAmountBonus: 10,
814
+ exactDateBonus: 5,
815
+ exactPayeeBonus: 10,
816
+ };
817
+
818
+ export function findMatches(
819
+ bankTransactions: BankTransaction[],
820
+ ynabTransactions: NormalizedYNABTransaction[],
821
+ config: MatchingConfig = DEFAULT_CONFIG
822
+ ): MatchResult[] {
823
+ const results: MatchResult[] = [];
824
+ const usedYnabIds = new Set<string>();
825
+
826
+ for (const bankTxn of bankTransactions) {
827
+ const candidates = findCandidates(bankTxn, ynabTransactions, usedYnabIds, config);
828
+
829
+ const bestMatch = candidates.length > 0 ? candidates[0] : null;
830
+ const confidenceScore = bestMatch?.scores.combined ?? 0;
831
+
832
+ let confidence: MatchResult['confidence'];
833
+ if (confidenceScore >= config.autoMatchThreshold) {
834
+ confidence = 'high';
835
+ if (bestMatch) usedYnabIds.add(bestMatch.ynabTransaction.id);
836
+ } else if (confidenceScore >= config.suggestedMatchThreshold) {
837
+ confidence = 'medium';
838
+ } else if (confidenceScore >= config.minimumCandidateScore) {
839
+ confidence = 'low';
840
+ } else {
841
+ confidence = 'none';
842
+ }
843
+
844
+ results.push({
845
+ bankTransaction: bankTxn,
846
+ bestMatch,
847
+ candidates: candidates.slice(0, 3),
848
+ confidence,
849
+ confidenceScore,
850
+ });
851
+ }
852
+
853
+ return results;
854
+ }
855
+
856
+ function findCandidates(
857
+ bankTxn: BankTransaction,
858
+ ynabTransactions: NormalizedYNABTransaction[],
859
+ usedIds: Set<string>,
860
+ config: MatchingConfig
861
+ ): MatchCandidate[] {
862
+ const candidates: MatchCandidate[] = [];
863
+
864
+ for (const ynabTxn of ynabTransactions) {
865
+ if (usedIds.has(ynabTxn.id)) continue;
866
+
867
+ // Sign check - both must be same sign (or both zero)
868
+ const bankSign = Math.sign(bankTxn.amount);
869
+ const ynabSign = Math.sign(ynabTxn.amount);
870
+ if (bankSign !== ynabSign && bankSign !== 0 && ynabSign !== 0) {
871
+ continue;
872
+ }
873
+
874
+ const scores = calculateScores(bankTxn, ynabTxn, config);
875
+
876
+ if (scores.combined >= config.minimumCandidateScore) {
877
+ candidates.push({
878
+ ynabTransaction: ynabTxn,
879
+ scores,
880
+ matchReasons: buildMatchReasons(scores, config),
881
+ });
882
+ }
883
+ }
884
+
885
+ candidates.sort((a, b) => b.scores.combined - a.scores.combined);
886
+ return candidates;
887
+ }
888
+
889
+ function calculateScores(
890
+ bankTxn: BankTransaction,
891
+ ynabTxn: NormalizedYNABTransaction,
892
+ config: MatchingConfig
893
+ ): MatchCandidate['scores'] {
894
+ // Amount score - now using INTEGER comparison (milliunits)
895
+ const amountDiff = Math.abs(bankTxn.amount - ynabTxn.amount);
896
+ let amountScore: number;
897
+
898
+ if (amountDiff === 0) {
899
+ // Exact integer match - no floating point issues!
900
+ amountScore = 100;
901
+ } else if (amountDiff <= config.amountToleranceMilliunits) {
902
+ amountScore = 95;
903
+ } else if (amountDiff <= 1000) { // Within $1
904
+ amountScore = 80 - (amountDiff / 1000 * 20);
905
+ } else {
906
+ amountScore = Math.max(0, 60 - (amountDiff / 1000 * 5));
907
+ }
908
+
909
+ // Date score
910
+ const bankDate = new Date(bankTxn.date);
911
+ const ynabDate = new Date(ynabTxn.date);
912
+ const daysDiff = Math.abs(bankDate.getTime() - ynabDate.getTime()) / (1000 * 60 * 60 * 24);
913
+ let dateScore: number;
914
+
915
+ if (daysDiff < 0.5) {
916
+ dateScore = 100;
917
+ } else if (daysDiff <= 1) {
918
+ dateScore = 95;
919
+ } else if (daysDiff <= config.dateToleranceDays) {
920
+ dateScore = 90 - ((daysDiff - 1) * (40 / config.dateToleranceDays));
921
+ } else {
922
+ dateScore = Math.max(0, 50 - ((daysDiff - config.dateToleranceDays) * 5));
923
+ }
924
+
925
+ // Payee score using fuzzball
926
+ const payeeScore = calculatePayeeScore(bankTxn.payee, ynabTxn.payee);
927
+
928
+ // Combined score with weights
929
+ let combined =
930
+ (amountScore * config.weights.amount) +
931
+ (dateScore * config.weights.date) +
932
+ (payeeScore * config.weights.payee);
933
+
934
+ // Apply bonuses
935
+ if (amountScore === 100) combined += config.exactAmountBonus;
936
+ if (dateScore === 100) combined += config.exactDateBonus;
937
+ if (payeeScore >= 95) combined += config.exactPayeeBonus;
938
+
939
+ combined = Math.min(100, combined);
940
+
941
+ return {
942
+ amount: Math.round(amountScore),
943
+ date: Math.round(dateScore),
944
+ payee: Math.round(payeeScore),
945
+ combined: Math.round(combined),
946
+ };
947
+ }
948
+
949
+ function calculatePayeeScore(bankPayee: string, ynabPayee: string | null): number {
950
+ if (!ynabPayee) return 30;
951
+
952
+ const scores = [
953
+ fuzz.token_set_ratio(bankPayee, ynabPayee),
954
+ fuzz.token_sort_ratio(bankPayee, ynabPayee),
955
+ fuzz.partial_ratio(bankPayee, ynabPayee),
956
+ fuzz.WRatio(bankPayee, ynabPayee),
957
+ ];
958
+
959
+ return Math.max(...scores);
960
+ }
961
+
962
+ function buildMatchReasons(scores: MatchCandidate['scores'], config: MatchingConfig): string[] {
963
+ const reasons: string[] = [];
964
+
965
+ if (scores.amount === 100) {
966
+ reasons.push('Exact amount match');
967
+ } else if (scores.amount >= 95) {
968
+ reasons.push('Amount within tolerance');
969
+ }
970
+
971
+ if (scores.date === 100) {
972
+ reasons.push('Same date');
973
+ } else if (scores.date >= 90) {
974
+ reasons.push('Date within 1-2 days');
975
+ } else if (scores.date >= 50) {
976
+ reasons.push(`Date within ${config.dateToleranceDays} days`);
977
+ }
978
+
979
+ if (scores.payee >= 95) {
980
+ reasons.push('Payee exact match');
981
+ } else if (scores.payee >= 80) {
982
+ reasons.push('Payee highly similar');
983
+ } else if (scores.payee >= 60) {
984
+ reasons.push('Payee somewhat similar');
985
+ }
986
+
987
+ return reasons;
988
+ }
989
+ ```
990
+
991
+ ### 3.4 Integration Tests with Real CSV Data
992
+
993
+ **File:** `src/__tests__/tools/reconciliation/csvParser.integration.test.ts`
994
+
995
+ > **Note:** Tests follow repo convention: `src/__tests__/` with fixtures in `test-exports/csv/`
996
+
997
+ ```typescript
998
+ import { describe, it, expect } from 'vitest';
999
+ import { parseCSV } from '../../../tools/reconciliation/csvParser.js';
1000
+ import { findMatches, DEFAULT_CONFIG } from '../../../tools/reconciliation/matcher.js';
1001
+ import { normalizeYNABTransaction } from '../../../tools/reconciliation/ynabAdapter.js';
1002
+
1003
+ describe('CSV Parser Integration Tests', () => {
1004
+ describe('TD Bank CSV', () => {
1005
+ const tdCSV = `Date,Description,Amount
1006
+ 09/15/2025,SHELL STATION 1234 TORONTO ON,-45.23
1007
+ 09/16/2025,AMZN MKTP CA*1A2B3C4,-127.99
1008
+ 09/17/2025,PAYROLL DEPOSIT ABC CORP,2500.00`;
1009
+
1010
+ it('should parse TD CSV correctly', () => {
1011
+ const result = parseCSV(tdCSV, { preset: 'td' });
1012
+
1013
+ expect(result.errors).toHaveLength(0);
1014
+ expect(result.transactions).toHaveLength(3);
1015
+ expect(result.transactions[0].amount).toBe(-45230); // Milliunits!
1016
+ expect(result.transactions[0].payee).toBe('SHELL STATION 1234 TORONTO ON');
1017
+ expect(result.transactions[0].date).toBe('2025-09-15');
1018
+ });
1019
+ });
1020
+
1021
+ describe('RBC Debit/Credit CSV', () => {
1022
+ const rbcCSV = `Transaction Date,Description 1,Debit,Credit
1023
+ 2025-09-15,SHELL GAS,45.23,
1024
+ 2025-09-16,TRANSFER FROM SAVINGS,,500.00`;
1025
+
1026
+ it('should parse RBC CSV with debit/credit columns', () => {
1027
+ const result = parseCSV(rbcCSV, { preset: 'rbc' });
1028
+
1029
+ expect(result.errors).toHaveLength(0);
1030
+ expect(result.transactions).toHaveLength(2);
1031
+ expect(result.transactions[0].amount).toBe(-45230); // Debit = negative milliunits
1032
+ expect(result.transactions[1].amount).toBe(500000); // Credit = positive milliunits
1033
+ });
1034
+ });
1035
+
1036
+ describe('Ambiguous Debit/Credit Warning', () => {
1037
+ const ambiguousCSV = `Transaction Date,Description,Debit,Credit
1038
+ 2025-09-15,WEIRD TXN,50.00,25.00`;
1039
+
1040
+ it('should warn when both debit and credit have values', () => {
1041
+ const result = parseCSV(ambiguousCSV, { preset: 'rbc' });
1042
+
1043
+ expect(result.warnings).toHaveLength(1);
1044
+ expect(result.warnings[0].message).toContain('Both Debit');
1045
+ expect(result.transactions[0].amount).toBe(-50000); // Uses debit
1046
+ });
1047
+ });
1048
+
1049
+ describe('European Number Format', () => {
1050
+ const euroCSV = `Date,Amount,Description
1051
+ 15/09/2025,"1.234,56",Big Purchase`;
1052
+
1053
+ it('should handle European number format', () => {
1054
+ const result = parseCSV(euroCSV);
1055
+
1056
+ expect(result.transactions[0].amount).toBe(1234560); // 1234.56 in milliunits
1057
+ });
1058
+ });
1059
+ });
1060
+
1061
+ describe('Matcher Integration Tests', () => {
1062
+ const mockYNABTransactions = [
1063
+ { id: 'y1', date: '2025-09-15', amount: -45230, payee_name: 'Shell', category_name: 'Gas', cleared: 'uncleared', approved: true },
1064
+ { id: 'y2', date: '2025-09-17', amount: -127990, payee_name: 'Amazon', category_name: 'Shopping', cleared: 'uncleared', approved: true },
1065
+ ].map(t => normalizeYNABTransaction(t as any));
1066
+
1067
+ it('should achieve high confidence matches with exact integer comparison', () => {
1068
+ const bankCSV = `Date,Description,Amount
1069
+ 09/15/2025,SHELL STATION 1234,-45.23
1070
+ 09/16/2025,AMZN MKTP CA*ABC123,-127.99`;
1071
+
1072
+ const parsed = parseCSV(bankCSV);
1073
+ const matches = findMatches(parsed.transactions, mockYNABTransactions);
1074
+
1075
+ // Shell: exact amount match (both -45230 milliunits)
1076
+ expect(matches[0].confidence).toBe('high');
1077
+ expect(matches[0].bestMatch?.scores.amount).toBe(100);
1078
+
1079
+ // Amazon: exact amount match (both -127990 milliunits)
1080
+ expect(matches[1].confidence).toBe('high');
1081
+ expect(matches[1].bestMatch?.scores.amount).toBe(100);
1082
+ });
1083
+
1084
+ it('should use exact integer comparison (no float precision issues)', () => {
1085
+ // Both are now integers - no floating point comparison needed!
1086
+ const bankTxn = {
1087
+ id: 'b1',
1088
+ date: '2025-09-15',
1089
+ amount: -45230, // Integer milliunits
1090
+ payee: 'Shell',
1091
+ sourceRow: 2,
1092
+ raw: { date: '09/15/2025', amount: '-45.23', description: 'Shell' }
1093
+ };
1094
+
1095
+ const ynabTxn = {
1096
+ id: 'y1',
1097
+ date: '2025-09-15',
1098
+ amount: -45230, // Integer milliunits - direct from YNAB API
1099
+ payee: 'Shell',
1100
+ memo: null,
1101
+ categoryName: 'Gas',
1102
+ cleared: 'uncleared' as const,
1103
+ approved: true,
1104
+ };
1105
+
1106
+ const matches = findMatches([bankTxn], [ynabTxn]);
1107
+ // Exact match because integers compare exactly: -45230 === -45230
1108
+ expect(matches[0].bestMatch?.scores.amount).toBe(100);
1109
+ });
1110
+ });
1111
+ ```
1112
+
1113
+ ---
1114
+
1115
+ ## Part 4: Diagnostic/Debug Mode
1116
+
1117
+ Add diagnostic output to help debug matching issues. **Diagnostics should be returned even on failure/partial match.**
1118
+
1119
+ ```typescript
1120
+ export interface MatchDiagnostics {
1121
+ csvParsing: {
1122
+ detectedDelimiter: string;
1123
+ detectedColumns: string[];
1124
+ totalRows: number;
1125
+ validRows: number;
1126
+ errors: ParseError[];
1127
+ warnings: ParseWarning[];
1128
+ };
1129
+ matchingDetails: Array<{
1130
+ bankTxn: { date: string; amount: number; payee: string };
1131
+ bestMatch: {
1132
+ ynabTxn: { date: string; amount: number; payee: string | null };
1133
+ scores: { amount: number; date: number; payee: number; combined: number };
1134
+ } | null;
1135
+ allCandidates: Array<{
1136
+ ynabId: string;
1137
+ scores: { amount: number; date: number; payee: number; combined: number };
1138
+ rejectedBecause?: string;
1139
+ }>;
1140
+ confidence: 'high' | 'medium' | 'low' | 'none';
1141
+ }>;
1142
+ timing: {
1143
+ parseMs: number;
1144
+ matchMs: number;
1145
+ };
1146
+ }
1147
+
1148
+ // In reconcile_account schema:
1149
+ {
1150
+ // ... existing params
1151
+ include_diagnostics: z.boolean().optional().default(false),
1152
+ }
1153
+
1154
+ // ALWAYS include diagnostics on error or low match rate
1155
+ const shouldIncludeDiagnostics =
1156
+ params.include_diagnostics ||
1157
+ parseResult.errors.length > 0 ||
1158
+ matches.filter(m => m.confidence === 'none').length > matches.length * 0.5;
1159
+ ```
1160
+
1161
+ ---
1162
+
1163
+ ## Part 5: Migration Path
1164
+
1165
+ ### Phase 1: Foundation (Week 1)
1166
+ 1. [x] Install new dependencies: `papaparse`, `fuzzball`, `chrono-node`, `dayjs`
1167
+ 2. [x] Create unified types in `src/types/reconciliation.ts`
1168
+ 3. [x] Create YNAB adapter in `src/tools/reconciliation/ynabAdapter.ts`
1169
+ 4. [x] Create new CSV parser module (`src/tools/reconciliation/csvParser.ts`)
1170
+ 5. [x] Create new matcher module (`src/tools/reconciliation/matcher.v2.ts`)
1171
+ 6. [x] Add integration tests in `src/__tests__/tools/reconciliation/`
1172
+
1173
+ ### Phase 2: Integration (Week 2)
1174
+ 1. [x] Update `analyzeReconciliation()` to use new parser and matcher
1175
+ 2. [x] Update reconcile adapter for new response format
1176
+ 3. [x] Add diagnostic mode (always on for errors)
1177
+ 4. [x] Update existing tests to not mock the parser
1178
+
1179
+ ### Phase 3: Validation (Week 3)
1180
+ 1. [x] Test against saved CSV exports from TD, RBC, Scotiabank, Wealthsimple
1181
+ - *Note:* Verified against real-world TD (headerless) and Wealthsimple exports.
1182
+ - *Feature Added:* `csvParser` now supports `header: false` and manual column mapping for headerless files (like TD).
1183
+ 2. [ ] Compare match quality against current implementation
1184
+ 3. [ ] Tune thresholds based on real-world data
1185
+ 4. [ ] Document bank-specific quirks
1186
+
1187
+ ### Phase 4: Cleanup (Week 4)
1188
+ 1. Remove old `compareTransactions/parser.ts` if no longer needed
1189
+ 2. Remove duplicate BankTransaction type
1190
+ 3. Update all remaining references
1191
+ 4. Final documentation
1192
+
1193
+ ---
1194
+
1195
+ ## Part 6: Configuration Recommendations
1196
+
1197
+ Based on research and the Midday.ai approach:
1198
+
1199
+ | Parameter | Current | Recommended | Rationale |
1200
+ |-----------|---------|-------------|-----------|
1201
+ | `dateToleranceDays` | 2 | 7 | Banks often post 3-7 days late |
1202
+ | `amountToleranceMilliunits` | 10 | 10 | 1 cent tolerance |
1203
+ | `autoMatchThreshold` | 90 | 85 | More lenient with better algorithm |
1204
+ | `suggestedMatchThreshold` | 60 | 60 | Keep same |
1205
+ | Amount weight | 40% | 50% | Amount is most reliable signal |
1206
+ | Date weight | 40% | 15% | Dates are unreliable |
1207
+ | Payee weight | 20% | 35% | With fuzzball, payee matching is much better |
1208
+
1209
+ ### User-Tunable vs Hard-Coded
1210
+
1211
+ | Parameter | User-Tunable? | Exposed Via | Default |
1212
+ |-----------|---------------|-------------|----------|
1213
+ | `date_tolerance_days` | ✅ Yes | Tool schema | 7 |
1214
+ | `amount_tolerance_cents` | ✅ Yes | Tool schema | 5 |
1215
+ | `confidence_threshold` | ✅ Yes | Tool schema | 0.85 |
1216
+ | `auto_create_transactions` | ✅ Yes | Tool schema | false |
1217
+ | `auto_update_cleared_status` | ✅ Yes | Tool schema | false |
1218
+ | `dry_run` | ✅ Yes | Tool schema | false |
1219
+ | `csv_format.preset` | ✅ Yes | Tool schema | auto-detect |
1220
+ | `csv_format.overrides.*` | ✅ Yes | Tool schema | none |
1221
+ | Scoring weights | ❌ No | Hard-coded | 50/15/35 |
1222
+ | Minimum candidate score | ❌ No | Hard-coded | 40 |
1223
+ | Guardrail thresholds | ❌ No | Hard-coded | See Part 7c |
1224
+
1225
+ **Rationale:** Tolerances and automation toggles are user-visible because they're intuitive ("how many days?"). Scoring weights and guardrails are expert-level tuning that could cause harm if misconfigured—keep these as sensible defaults that "just work."
1226
+
1227
+ ---
1228
+
1229
+ ## Part 7: Future Enhancements
1230
+
1231
+ 1. **Merchant Learning:** Cache successful payee mappings ("AMZN MKTP" → "Amazon") per user/budget
1232
+ 2. **Adaptive Thresholds:** Learn from user confirmations/rejections like Midday.ai
1233
+ 3. **Vector Embeddings:** For truly semantic matching (requires OpenAI/embedding API)
1234
+ 4. **Split Transaction Detection:** Detect when one bank transaction = multiple YNAB transactions
1235
+ 5. **Recurring Transaction Patterns:** Use historical patterns to boost confidence
1236
+
1237
+ ---
1238
+
1239
+ ## Part 7b: Performance, Scale & Dependencies
1240
+
1241
+ ### Dependency Impact Analysis
1242
+
1243
+ | Library | Minified Size | Tree-shakeable | Cold-start Impact | Justification |
1244
+ |---------|---------------|----------------|-------------------|---------------|
1245
+ | **PapaParse** | ~50 kB | Partial | Low (~20ms) | Handles malformed CSVs that csv-parse chokes on |
1246
+ | **fuzzball** | ~15 kB | Yes | Minimal | 10x better merchant matching than Levenshtein |
1247
+ | **chrono-node** | ~80 kB | No | Moderate (~50ms) | Fallback only; primary parsing uses explicit formats |
1248
+ | **dayjs** | ~2 kB | Yes | Negligible | Only for formatting; could be removed |
1249
+
1250
+ **Current baseline:** `csv-parse` (~30 kB) + `date-fns` (~30 kB) = ~60 kB
1251
+ **Proposed total:** ~150 kB (+90 kB, ~150% increase)
1252
+
1253
+ **Mitigation:**
1254
+ - `chrono-node` is used as fallback only; consider lazy `import()` if cold-start becomes an issue
1255
+ - For MCP servers (Node.js), bundle size matters less than browser apps
1256
+ - Cold-start measured at <100ms additional on M1 Mac
1257
+
1258
+ ### Algorithmic Complexity
1259
+
1260
+ | Operation | Complexity | Notes |
1261
+ |-----------|------------|-------|
1262
+ | CSV parsing | O(N) | N = rows; PapaParse streams large files |
1263
+ | Basic matching | O(N × M) | N = bank txns, M = YNAB txns |
1264
+ | Combination matching | O(N × M²) or O(N × M³) | 2-way and 3-way combinations |
1265
+ | Payee fuzzy matching | O(L²) per pair | L = string length; fuzzball is optimised |
1266
+
1267
+ ### Scale Limits & Recommendations
1268
+
1269
+ | CSV Size | Expected Behaviour | Recommendation |
1270
+ |----------|-------------------|----------------|
1271
+ | <100 txns | <500ms, full features | Default mode |
1272
+ | 100-500 txns | <2s, full features | Default mode |
1273
+ | 500-1000 txns | 2-10s, disable 3-way combos | Set `max_combination_size: 2` |
1274
+ | >1000 txns | May timeout | Chunk by month, process sequentially |
1275
+
1276
+ ### Optimisation Strategies (Future)
1277
+
1278
+ 1. **Amount bucketing:** Index YNAB transactions by amount (±tolerance) for O(1) candidate lookup
1279
+ 2. **Date windowing:** Only compare transactions within ±14 days
1280
+ 3. **Early termination:** Stop searching once confidence ≥98%
1281
+ 4. **Streaming:** Parse CSV in chunks for memory efficiency on huge files
1282
+
1283
+ ---
1284
+
1285
+ ## Part 7c: Matching Guardrails & Failure Modes
1286
+
1287
+ ### Hard Guardrails (Never Auto-Match)
1288
+
1289
+ Auto-matching is **disabled** when:
1290
+
1291
+ | Condition | Rationale |
1292
+ |-----------|-----------|
1293
+ | Amount score <80 | Amount is the most reliable signal; fuzzy amounts are dangerous |
1294
+ | Date gap >14 days | Even with bank delays, 2+ weeks is suspicious |
1295
+ | Multiple candidates with score within 5 points | Ambiguous; surface for human review |
1296
+ | Payee score <40 AND date score <60 | Neither secondary signal is strong enough |
1297
+ | Transaction flagged with warnings | Parser detected ambiguity (e.g., both debit/credit populated) |
1298
+
1299
+ ### Known Failure Modes
1300
+
1301
+ | Failure Mode | Example | Mitigation |
1302
+ |--------------|---------|------------|
1303
+ | **Similar merchants** | Starbucks #1234 vs #5678 | Require exact amount + date ≤1 day for coffee shops |
1304
+ | **Recurring subscriptions** | Netflix $15.99 every month | Use date as primary discriminator when amounts match |
1305
+ | **Refunds** | -$50.00 refund vs original +$50.00 charge | Sign check prevents cross-matching |
1306
+ | **Split transactions** | $100 bank = $60 + $40 YNAB | Combination matching handles this |
1307
+ | **Merchant name drift** | "AMZN" vs "Amazon.com" vs "Amazon Prime" | Payee normalisation + token_set_ratio |
1308
+ | **Duplicate entries** | Same amount/date/payee in YNAB | Prefer uncleared over cleared; flag for review |
1309
+
1310
+ ### Tie-Breaking Rules
1311
+
1312
+ When multiple YNAB candidates have identical scores:
1313
+
1314
+ 1. Prefer `uncleared` over `cleared` (expecting confirmation)
1315
+ 2. Prefer closer date proximity
1316
+ 3. Prefer higher payee similarity
1317
+ 4. If still tied, surface all candidates for human review
1318
+
1319
+ ### Confidence Tier Behaviour
1320
+
1321
+ | Tier | Score Range | Auto-Action | User Prompt |
1322
+ |------|-------------|-------------|-------------|
1323
+ | `high` | ≥85 | Mark cleared, update date if needed | None (unless in dry-run) |
1324
+ | `medium` | 60-84 | None | "Review suggested match" |
1325
+ | `low` | 40-59 | None | "Possible match, low confidence" |
1326
+ | `none` | <40 | None | "No match found - add new?" |
1327
+
1328
+ ---
1329
+
1330
+ ## Part 7d: Diagnostics Behaviour & Privacy
1331
+
1332
+ ### Diagnostic Output Locations
1333
+
1334
+ | Context | Output Location | Verbosity |
1335
+ |---------|-----------------|-----------|
1336
+ | MCP response (success) | `structured_data.diagnostics` | Minimal (counts only) |
1337
+ | MCP response (errors/low match) | `structured_data.diagnostics` | Full (per-transaction details) |
1338
+ | Server logs | `stdout` via structured logging | Configurable via `LOG_LEVEL` |
1339
+ | Debug files | `test-results/reconciliation/` | Full (for test runs only) |
1340
+
1341
+ ### When Diagnostics Are Included
1342
+
1343
+ ```typescript
1344
+ const includeDiagnostics =
1345
+ params.include_diagnostics || // Explicitly requested
1346
+ parseResult.errors.length > 0 || // CSV parsing had errors
1347
+ matchRate < 0.5 || // Less than 50% matched
1348
+ matches.some(m => m.confidence === 'none' && m.bankTransaction.amount > 10000); // Large unmatched txn
1349
+ ```
1350
+
1351
+ ### Privacy Considerations
1352
+
1353
+ | Field | In MCP Response | In Logs | In Test Artifacts |
1354
+ |-------|-----------------|---------|-------------------|
1355
+ | Transaction amounts | ✅ Full | ✅ Full | ✅ Full |
1356
+ | Transaction dates | ✅ Full | ✅ Full | ✅ Full |
1357
+ | Payee names | ✅ Full | ⚠️ Truncated (first 20 chars) | ✅ Full |
1358
+ | Memos | ✅ Full | ❌ Redacted | ⚠️ Hashed |
1359
+ | Account IDs | ✅ Full | ✅ Full | ✅ Full |
1360
+ | Raw CSV rows | ❌ Not included | ❌ Not included | ✅ Full |
1361
+
1362
+ **Rationale:** MCP responses go directly to the user who owns the data. Logs may be aggregated for debugging; truncate PII. Test artifacts are stored locally.
1363
+
1364
+ ### Log Redaction Example
1365
+
1366
+ ```typescript
1367
+ // In production logs
1368
+ logger.info('Match found', {
1369
+ bankTxn: {
1370
+ date: '2025-09-15',
1371
+ amount: -45230,
1372
+ payee: 'SHELL STATION 12...', // Truncated
1373
+ },
1374
+ confidence: 'high',
1375
+ score: 92,
1376
+ });
1377
+ ```
1378
+
1379
+ ---
1380
+
1381
+ ## Part 7e: Bank Presets Evolution & Overrides
1382
+
1383
+ ### Preset Versioning
1384
+
1385
+ Bank CSV formats change. Presets are versioned:
1386
+
1387
+ ```typescript
1388
+ export const BANK_PRESETS: Record<string, BankPreset> = {
1389
+ 'td:2024': { /* current TD format */ },
1390
+ 'td:2023': { /* older TD format */ },
1391
+ 'td': { /* alias to latest: 'td:2024' */ },
1392
+ };
1393
+ ```
1394
+
1395
+ ### User Override Mechanism
1396
+
1397
+ Users can override presets via tool parameters without code changes:
1398
+
1399
+ ```typescript
1400
+ // In reconcile_account call
1401
+ {
1402
+ csv_format: {
1403
+ preset: 'td', // Start with TD preset
1404
+ overrides: {
1405
+ date_column: 'Posted Date', // Override specific field
1406
+ date_format: 'DMY', // My export uses DD/MM/YYYY
1407
+ }
1408
+ }
1409
+ }
1410
+ ```
1411
+
1412
+ Overrides are merged with preset defaults:
1413
+
1414
+ ```typescript
1415
+ const effectiveFormat = {
1416
+ ...BANK_PRESETS[params.csv_format.preset],
1417
+ ...params.csv_format.overrides,
1418
+ };
1419
+ ```
1420
+
1421
+ ### Adding/Updating Presets
1422
+
1423
+ 1. **Collect samples:** Get 3+ CSV exports from the bank (different date ranges)
1424
+ 2. **Create fixture:** Add to `test-exports/csv/{bank}/` with README noting export date
1425
+ 3. **Write test:** Add integration test in `csvParser.integration.test.ts`
1426
+ 4. **Define preset:** Add to `BANK_PRESETS` with version suffix
1427
+ 5. **Verify:** Run `npm run test:integration:reconciliation`
1428
+
1429
+ ### Preset Testing Matrix
1430
+
1431
+ CI runs all presets against their fixtures:
1432
+
1433
+ ```yaml
1434
+ # .github/workflows/test.yml
1435
+ test-bank-presets:
1436
+ strategy:
1437
+ matrix:
1438
+ bank: [td, rbc, scotiabank, tangerine, wealthsimple, cibc, bmo]
1439
+ steps:
1440
+ - run: npm run test:preset -- ${{ matrix.bank }}
1441
+ ```
1442
+
1443
+ ### Deprecation Policy
1444
+
1445
+ - Old preset versions kept for 12 months after new version added
1446
+ - Deprecated presets log a warning but continue to work
1447
+ - Breaking changes (removed presets) only in major versions
1448
+
1449
+ ---
1450
+
1451
+ ## Part 8: Design Decisions & Rationale
1452
+
1453
+ ### Why Milliunits (Integers) Instead of Dollars (Floats)?
1454
+
1455
+ **This is the most important architectural decision in the redesign.**
1456
+
1457
+ The original plan used dollars (floats), requiring tolerance-based comparison everywhere:
1458
+ ```typescript
1459
+ // Old approach (floats) - error-prone
1460
+ if (Math.abs(bankTxn.amount - ynabTxn.amount) < 0.001) { ... }
1461
+ ```
1462
+
1463
+ The new approach uses milliunits (integers), enabling exact comparison:
1464
+ ```typescript
1465
+ // New approach (integers) - bulletproof
1466
+ if (bankTxn.amount === ynabTxn.amount) { ... }
1467
+ ```
1468
+
1469
+ Benefits:
1470
+ - **Eliminates floating-point precision bugs** - No more `45.23 !== 45.230000000001`
1471
+ - **Matches YNAB's native format** - YNAB API uses milliunits, so no conversion needed for YNAB transactions
1472
+ - **Single conversion point** - Only the CSV parser converts dollars→milliunits
1473
+ - **Simpler matcher logic** - `===` instead of `Math.abs(...) < epsilon`
1474
+
1475
+ ### Why Date Format Hints in Bank Presets?
1476
+
1477
+ chrono-node is powerful but can misparse ambiguous dates like `02/03/2025`:
1478
+ - US interpretation: February 3rd
1479
+ - European interpretation: March 2nd
1480
+
1481
+ Bank presets include a `dateFormat` hint ('MDY', 'DMY', 'YMD') that we try BEFORE falling back to chrono-node.
1482
+
1483
+ Priority:
1484
+ 1. ISO format `YYYY-MM-DD` (unambiguous)
1485
+ 2. Preset's format hint (for ambiguous numeric dates)
1486
+ 3. chrono-node fallback (for natural language, weird formats)
1487
+
1488
+ ### Why Timezone-Safe Date Formatting?
1489
+
1490
+ `toISOString()` converts to UTC, which can shift the date:
1491
+ ```typescript
1492
+ // WRONG - can shift date by timezone
1493
+ const dateStr = parsedDate.toISOString().split('T')[0];
1494
+
1495
+ // RIGHT - uses local date components
1496
+ const dateStr = `${date.getFullYear()}-${...}`;
1497
+ ```
1498
+
1499
+ ### Why Decouple YNAB Types?
1500
+
1501
+ The `NormalizedYNABTransaction` interface in `src/types/reconciliation.ts` intentionally does NOT import from the YNAB SDK. This:
1502
+ - Keeps the reconciliation core testable without SDK mocks
1503
+ - Allows the types file to be shared without pulling in SDK dependencies
1504
+ - Makes it easier to swap adapters if YNAB API changes
1505
+
1506
+ The adapter in `src/tools/reconciliation/ynabAdapter.ts` is the single point of contact with the SDK.
1507
+
1508
+ ### Test File Location Convention
1509
+
1510
+ Tests live in `src/__tests__/` mirroring the source structure:
1511
+ ```
1512
+ src/tools/reconciliation/csvParser.ts
1513
+ → src/__tests__/tools/reconciliation/csvParser.test.ts
1514
+ → src/__tests__/tools/reconciliation/csvParser.integration.test.ts
1515
+ ```
1516
+
1517
+ CSV fixtures go in `test-exports/csv/` following existing repo conventions.
1518
+
1519
+ ---
1520
+
1521
+ ## Appendix A: Library Comparison Summary
1522
+
1523
+ | Library | Purpose | Size | Key Feature |
1524
+ |---------|---------|------|-------------|
1525
+ | **PapaParse** | CSV parsing | 260 kB | Auto-detect delimiters, malformed CSV handling |
1526
+ | **fuzzball** | Fuzzy matching | 15 kB | token_set_ratio for merchant names |
1527
+ | **chrono-node** | Date parsing | 20 kB | Natural language dates, many formats |
1528
+ | **dayjs** | Date formatting | 2 kB | Lightweight date manipulation |
1529
+
1530
+ ## Appendix B: Test CSV Fixtures
1531
+
1532
+ Create `test-exports/csv/` with sample exports from:
1533
+ - TD Canada Trust
1534
+ - RBC Royal Bank
1535
+ - Scotiabank
1536
+ - Wealthsimple Cash
1537
+ - Tangerine
1538
+ - CIBC
1539
+ - BMO
1540
+
1541
+ Each fixture should include edge cases:
1542
+ - Transactions with commas in description
1543
+ - European date formats (DD/MM/YYYY)
1544
+ - Negative amounts in parentheses
1545
+ - Multi-line descriptions
1546
+ - Currency symbols ($, €, £, CAD, USD)
1547
+ - Missing fields
1548
+ - Ambiguous dates (02/03/2025)
1549
+ - Both debit and credit columns populated
1550
+
1551
+ ## Appendix C: Reference Materials
1552
+
1553
+ ### Research Sources
1554
+ 1. **Midday.ai Reconciliation Engine** - <https://midday.ai/updates/automatic-reconciliation-engine/>
1555
+ - Open source: <https://github.com/midday-ai/midday>
1556
+ - Uses vector embeddings + multi-dimensional scoring
1557
+ - Key insight: 50% amount, 35% semantic, 10% currency, 5% date
1558
+
1559
+ 2. **CSV Parser Comparison** - <https://www.oneschema.co/blog/top-5-javascript-csv-parsers>
1560
+ - PapaParse: Best for malformed CSVs, auto-detect
1561
+ - csv-parser: Fastest for large files
1562
+ - fast-csv: Smallest footprint
1563
+
1564
+ 3. **Fuzzball (TheFuzz port)** - <https://www.npmjs.com/package/fuzzball>
1565
+ - token_set_ratio: Best for merchant name matching
1566
+ - Handles word order variations
1567
+ - Built-in normalization
1568
+
1569
+ 4. **chrono-node** - <https://www.npmjs.com/package/chrono-node>
1570
+ - Parses virtually any date format
1571
+ - Natural language support