dinary 0.1.0__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. dinary-0.2.0/.env.example +5 -0
  2. {dinary-0.1.0 → dinary-0.2.0}/.github/workflows/ci.yml +23 -1
  3. {dinary-0.1.0 → dinary-0.2.0}/.gitignore +5 -0
  4. {dinary-0.1.0 → dinary-0.2.0}/.plans/architecture.md +97 -32
  5. dinary-0.2.0/.plans/deploy-oracle-no-db.md +168 -0
  6. {dinary-0.1.0 → dinary-0.2.0}/.plans/frontend-evaluation.md +4 -4
  7. dinary-0.2.0/.plans/phase0.md +252 -0
  8. dinary-0.2.0/.plans/phase1.md +655 -0
  9. dinary-0.2.0/.plans/sql-vs-ibis-comparison.md +118 -0
  10. {dinary-0.1.0 → dinary-0.2.0}/PKG-INFO +21 -8
  11. {dinary-0.1.0 → dinary-0.2.0}/README.md +18 -7
  12. {dinary-0.1.0 → dinary-0.2.0}/docker-compose.yml +2 -2
  13. {dinary-0.1.0 → dinary-0.2.0}/docs/mkdocs.yml +2 -2
  14. {dinary-0.1.0 → dinary-0.2.0}/docs/src/en/cloudflare-setup.md +30 -22
  15. dinary-0.2.0/docs/src/en/deploy-oracle.md +156 -0
  16. dinary-0.2.0/docs/src/en/deploy-selfhost.md +94 -0
  17. {dinary-0.1.0 → dinary-0.2.0}/docs/src/en/google-sheets-setup.md +7 -4
  18. {dinary-0.1.0 → dinary-0.2.0}/docs/src/en/index.md +2 -3
  19. {dinary-0.1.0 → dinary-0.2.0}/docs/src/en/installation.md +6 -7
  20. dinary-0.2.0/docs/src/en/operations.md +87 -0
  21. {dinary-0.1.0 → dinary-0.2.0}/docs/src/en/pwa-install.md +27 -6
  22. {dinary-0.1.0 → dinary-0.2.0}/docs/src/ru/cloudflare-setup.md +30 -22
  23. dinary-0.2.0/docs/src/ru/deploy-oracle.md +156 -0
  24. dinary-0.2.0/docs/src/ru/deploy-selfhost.md +94 -0
  25. {dinary-0.1.0 → dinary-0.2.0}/docs/src/ru/google-sheets-setup.md +7 -4
  26. {dinary-0.1.0 → dinary-0.2.0}/docs/src/ru/index.md +2 -3
  27. {dinary-0.1.0 → dinary-0.2.0}/docs/src/ru/installation.md +6 -7
  28. dinary-0.2.0/docs/src/ru/operations.md +87 -0
  29. {dinary-0.1.0 → dinary-0.2.0}/docs/src/ru/pwa-install.md +27 -6
  30. dinary-0.2.0/package-lock.json +1567 -0
  31. dinary-0.2.0/package.json +14 -0
  32. {dinary-0.1.0 → dinary-0.2.0}/pyproject.toml +2 -0
  33. dinary-0.2.0/src/dinary/__about__.py +1 -0
  34. {dinary-0.1.0 → dinary-0.2.0}/src/dinary/api/categories.py +11 -5
  35. dinary-0.2.0/src/dinary/api/expenses.py +108 -0
  36. {dinary-0.1.0 → dinary-0.2.0}/src/dinary/config.py +5 -2
  37. dinary-0.2.0/src/dinary/main.py +106 -0
  38. dinary-0.2.0/src/dinary/migrations/README.md +15 -0
  39. dinary-0.2.0/src/dinary/migrations/budget/0001_initial_schema.rollback.sql +3 -0
  40. dinary-0.2.0/src/dinary/migrations/budget/0001_initial_schema.sql +25 -0
  41. dinary-0.2.0/src/dinary/migrations/config/0001_initial_schema.rollback.sql +8 -0
  42. dinary-0.2.0/src/dinary/migrations/config/0001_initial_schema.sql +54 -0
  43. {dinary-0.1.0 → dinary-0.2.0}/src/dinary/services/category_store.py +3 -0
  44. dinary-0.2.0/src/dinary/services/db_migrations.py +107 -0
  45. dinary-0.2.0/src/dinary/services/duckdb_repo.py +407 -0
  46. dinary-0.2.0/src/dinary/services/import_sheet.py +229 -0
  47. dinary-0.2.0/src/dinary/services/seed_config.py +206 -0
  48. dinary-0.2.0/src/dinary/services/sheets.py +357 -0
  49. dinary-0.2.0/src/dinary/services/sql_loader.py +70 -0
  50. dinary-0.2.0/src/dinary/services/sync.py +362 -0
  51. dinary-0.2.0/src/dinary/sql/__init__.py +0 -0
  52. dinary-0.2.0/src/dinary/sql/find_travel_event.sql +3 -0
  53. dinary-0.2.0/src/dinary/sql/get_existing_expense.sql +3 -0
  54. dinary-0.2.0/src/dinary/sql/get_month_expenses.sql +14 -0
  55. dinary-0.2.0/src/dinary/sql/insert_expense.sql +5 -0
  56. dinary-0.2.0/src/dinary/sql/list_sheet_categories.sql +3 -0
  57. dinary-0.2.0/src/dinary/sql/resolve_mapping.sql +3 -0
  58. dinary-0.2.0/src/dinary/sql/reverse_lookup_5d.sql +6 -0
  59. dinary-0.2.0/src/dinary/sql/reverse_lookup_travel.sql +3 -0
  60. dinary-0.2.0/src/dinary/sql/seed_load_categories.sql +1 -0
  61. dinary-0.2.0/src/dinary/sql/seed_load_groups.sql +1 -0
  62. dinary-0.2.0/src/dinary/sql/seed_load_members.sql +1 -0
  63. dinary-0.2.0/src/dinary/sql/seed_load_tags.sql +1 -0
  64. {dinary-0.1.0 → dinary-0.2.0}/static/css/style.css +72 -12
  65. dinary-0.2.0/static/icons/icon-180.png +0 -0
  66. dinary-0.2.0/static/icons/icon-192.png +0 -0
  67. dinary-0.2.0/static/icons/icon-512.png +0 -0
  68. dinary-0.2.0/static/index.html +80 -0
  69. {dinary-0.1.0 → dinary-0.2.0}/static/js/api.js +25 -10
  70. dinary-0.2.0/static/js/app.js +325 -0
  71. dinary-0.2.0/static/js/categories.js +66 -0
  72. {dinary-0.1.0 → dinary-0.2.0}/static/js/offline-queue.js +16 -1
  73. dinary-0.2.0/static/js/qr-scanner-lib.js +31 -0
  74. dinary-0.2.0/static/js/qr-scanner-worker.min.js +98 -0
  75. dinary-0.2.0/static/js/qr-scanner.js +108 -0
  76. {dinary-0.1.0 → dinary-0.2.0}/static/manifest.json +10 -2
  77. {dinary-0.1.0 → dinary-0.2.0}/static/sw.js +16 -10
  78. dinary-0.2.0/tasks.py +427 -0
  79. {dinary-0.1.0 → dinary-0.2.0}/tests/conftest.py +2 -1
  80. dinary-0.2.0/tests/js/no-data-loss.test.js +451 -0
  81. dinary-0.2.0/tests/js/offline-queue.test.js +168 -0
  82. dinary-0.2.0/tests/js/setup.js +1 -0
  83. dinary-0.2.0/tests/test_api.py +349 -0
  84. {dinary-0.1.0 → dinary-0.2.0}/tests/test_dinary.py +4 -0
  85. dinary-0.2.0/tests/test_duckdb.py +599 -0
  86. dinary-0.2.0/tests/test_migrations.py +124 -0
  87. dinary-0.2.0/tests/test_seed_config.py +157 -0
  88. {dinary-0.1.0 → dinary-0.2.0}/tests/test_services.py +20 -0
  89. dinary-0.2.0/tests/test_sheets.py +754 -0
  90. dinary-0.2.0/tests/test_sql_loader.py +134 -0
  91. dinary-0.2.0/tests/test_sync.py +476 -0
  92. {dinary-0.1.0 → dinary-0.2.0}/uv.lock +79 -1
  93. dinary-0.2.0/vitest.config.js +11 -0
  94. dinary-0.1.0/.env.example +0 -5
  95. dinary-0.1.0/.plans/phase0.md +0 -214
  96. dinary-0.1.0/docs/src/en/deploy-oracle.md +0 -100
  97. dinary-0.1.0/docs/src/en/deploy-railway.md +0 -75
  98. dinary-0.1.0/docs/src/en/deploy-render.md +0 -73
  99. dinary-0.1.0/docs/src/ru/deploy-oracle.md +0 -100
  100. dinary-0.1.0/docs/src/ru/deploy-railway.md +0 -75
  101. dinary-0.1.0/docs/src/ru/deploy-render.md +0 -71
  102. dinary-0.1.0/src/dinary/__about__.py +0 -1
  103. dinary-0.1.0/src/dinary/api/expenses.py +0 -51
  104. dinary-0.1.0/src/dinary/main.py +0 -68
  105. dinary-0.1.0/src/dinary/services/sheets.py +0 -284
  106. dinary-0.1.0/static/icons/icon-192.png +0 -0
  107. dinary-0.1.0/static/icons/icon-512.png +0 -0
  108. dinary-0.1.0/static/index.html +0 -66
  109. dinary-0.1.0/static/js/app.js +0 -208
  110. dinary-0.1.0/static/js/categories.js +0 -48
  111. dinary-0.1.0/static/js/qr-scanner.js +0 -36
  112. dinary-0.1.0/tasks.py +0 -84
  113. dinary-0.1.0/tests/test_api.py +0 -133
  114. dinary-0.1.0/tests/test_sheets.py +0 -183
  115. {dinary-0.1.0 → dinary-0.2.0}/.coveragerc +0 -0
  116. {dinary-0.1.0 → dinary-0.2.0}/.github/workflows/docs.yml +0 -0
  117. {dinary-0.1.0 → dinary-0.2.0}/.github/workflows/pip_publish.yml +0 -0
  118. {dinary-0.1.0 → dinary-0.2.0}/.github/workflows/static.yml +0 -0
  119. {dinary-0.1.0 → dinary-0.2.0}/.plans/task.md +0 -0
  120. {dinary-0.1.0 → dinary-0.2.0}/.pre-commit-config.yaml +0 -0
  121. {dinary-0.1.0 → dinary-0.2.0}/Dockerfile +0 -0
  122. {dinary-0.1.0 → dinary-0.2.0}/LICENSE.txt +0 -0
  123. {dinary-0.1.0 → dinary-0.2.0}/activate.sh +0 -0
  124. {dinary-0.1.0 → dinary-0.2.0}/docs/includes/install_pipx_macos.sh +0 -0
  125. {dinary-0.1.0 → dinary-0.2.0}/docs/src/en/images/about.jpg +0 -0
  126. {dinary-0.1.0 → dinary-0.2.0}/docs/src/en/reference.md +0 -0
  127. {dinary-0.1.0 → dinary-0.2.0}/invoke.yml +0 -0
  128. {dinary-0.1.0 → dinary-0.2.0}/pytest.ini +0 -0
  129. {dinary-0.1.0 → dinary-0.2.0}/scripts/__init__.py +0 -0
  130. {dinary-0.1.0 → dinary-0.2.0}/scripts/build-docs.sh +0 -0
  131. {dinary-0.1.0 → dinary-0.2.0}/scripts/build.sh +0 -0
  132. {dinary-0.1.0 → dinary-0.2.0}/scripts/docs-render-config.sh +0 -0
  133. {dinary-0.1.0 → dinary-0.2.0}/scripts/upload.sh +0 -0
  134. {dinary-0.1.0 → dinary-0.2.0}/scripts/verup.sh +0 -0
  135. {dinary-0.1.0 → dinary-0.2.0}/scripts/verup_action.sh +0 -0
  136. {dinary-0.1.0 → dinary-0.2.0}/src/dinary/__init__.py +0 -0
  137. {dinary-0.1.0 → dinary-0.2.0}/src/dinary/api/__init__.py +0 -0
  138. {dinary-0.1.0 → dinary-0.2.0}/src/dinary/api/qr.py +0 -0
  139. {dinary-0.1.0 → dinary-0.2.0}/src/dinary/services/__init__.py +0 -0
  140. {dinary-0.1.0 → dinary-0.2.0}/src/dinary/services/exchange_rate.py +0 -0
  141. {dinary-0.1.0 → dinary-0.2.0}/src/dinary/services/qr_parser.py +0 -0
@@ -0,0 +1,5 @@
1
+ # Copy to .env and fill in your values (.env is gitignored)
2
+ # cp .env.example .env
3
+ DINARY_GOOGLE_SHEETS_SPREADSHEET_ID=your-spreadsheet-id-here
4
+ DINARY_DEPLOY_HOST=ubuntu@<PUBLIC_IP>
5
+ # DINARY_TUNNEL=tailscale # tailscale (default) | cloudflare | none
@@ -29,7 +29,7 @@ jobs:
29
29
  matrix-build:
30
30
  strategy:
31
31
  matrix:
32
- python-version: [3.13]
32
+ python-version: [3.13, 3.14]
33
33
  platform: [ubuntu-latest, macos-latest, windows-latest]
34
34
  runs-on: ${{ matrix.platform }}
35
35
 
@@ -48,6 +48,17 @@ jobs:
48
48
  - name: Install dependencies
49
49
  run: uv sync --frozen
50
50
 
51
+ - name: Set up Node.js
52
+ uses: actions/setup-node@v4
53
+ with:
54
+ node-version: 22
55
+
56
+ - name: Install JS dependencies
57
+ run: npm ci
58
+
59
+ - name: Run JS tests
60
+ run: npm test
61
+
51
62
  - name: Test with pytest
52
63
  run: ${{ env.PYTEST_CMD }}
53
64
 
@@ -73,6 +84,17 @@ jobs:
73
84
  - name: Install dependencies
74
85
  run: uv sync --frozen
75
86
 
87
+ - name: Set up Node.js
88
+ uses: actions/setup-node@v4
89
+ with:
90
+ node-version: 22
91
+
92
+ - name: Install JS dependencies
93
+ run: npm ci
94
+
95
+ - name: Run JS tests with Allure
96
+ run: npm test
97
+
76
98
  - name: Test with pytest and Allure report
77
99
  run: "${{ env.PYTEST_CMD }} --alluredir=./allure-results"
78
100
 
@@ -16,6 +16,11 @@ pytest-coverage.txt
16
16
  **/ru/reference.md
17
17
  **/.setup-scripts/
18
18
  allure-results/
19
+ allure-report/
19
20
  dist/
20
21
  credentials.json
21
22
  .env
23
+ node_modules/
24
+ data/
25
+ _static/
26
+ backups/
@@ -33,6 +33,21 @@ prioritizing clean data model and scriptability over UI polish.
33
33
  - Python-native: `import duckdb` — no server, no driver, no ORM needed.
34
34
  - At the expected scale (~30K item rows/year), every query completes in milliseconds.
35
35
 
36
+ ### Server Memory Constraint
37
+
38
+ The production design must fit on an always-on VPS with **1 OCPU / 1 GB RAM**.
39
+ This is a hard architectural constraint, not just a deployment preference.
40
+
41
+ Implications:
42
+
43
+ - Prefer embedded/local components over additional server daemons. DuckDB is acceptable precisely because it runs in-process and avoids a separate database service.
44
+ - The backend must remain a **small FastAPI + DuckDB process**, not a multi-service stack.
45
+ - Do not require Docker in production on the 1 GB instance.
46
+ - Do not run AI/LLM workloads, heavy batch classification, or other memory-hungry jobs on the server. Those stay on the laptop-side `dinary` agent.
47
+ - Keep background work serialized and bounded: no fan-out workers, no parallel sync pipelines, no large in-memory queues.
48
+ - Google Sheets sync should operate on **dirty months / targeted aggregates**, not full-sheet or full-history recomputation on every request.
49
+ - Caches must stay small and optional. Correctness must not depend on large resident in-memory datasets.
50
+
36
51
  ### Partitioning Strategy
37
52
 
38
53
  One DuckDB file per year:
@@ -324,9 +339,10 @@ The architecture is agnostic — the input layer is a thin client that sends str
324
339
 
325
340
  **Phase 0 (MVP) requirements:**
326
341
 
327
- - Camera access for QR scanning (extract URL, send to backend for total + date extraction).
328
- - Fast manual entry: amount + category selector + optional comment, one tap to submit.
329
- - Offline data persistence with sync-on-reconnect.
342
+ - Camera access for QR scanning. In the implemented MVP the browser decodes the Serbian fiscal QR locally with `zbar-wasm`, and the client can extract amount/date from the QR URL path without waiting for a backend roundtrip.
343
+ - Fast manual entry: amount + group selector + category selector + optional comment, one tap to submit. Entry saves instantly to IndexedDB first; network send happens only after local persistence is secured.
344
+ - Offline data persistence via IndexedDB (reliable for installed PWAs — iOS Safari eviction only affects non-installed sites). `navigator.storage.persist()` for additional protection.
345
+ - QR scan with parallel processing: while user selects group/category, the app finishes local QR parsing and can still fall back to backend parsing when needed.
330
346
 
331
347
  **Full requirements (Phase 3 target):**
332
348
 
@@ -335,13 +351,9 @@ The architecture is agnostic — the input layer is a thin client that sends str
335
351
  - Event selector: if the expense date falls within an active event's date range, auto-suggest it. If multiple active events overlap, show a dropdown. Allow manual assignment/removal.
336
352
  - Beneficiary selector: defaults to "семья", quick switch to a specific family member.
337
353
 
338
- #### Frontend Tool Evaluation (Phase 3 prerequisite)
339
-
340
- Before building the mobile input layer, evaluate the candidate tools listed below **and research whether other tools exist** that may fit better.
341
- The list is a starting point, not exhaustive — the no-code/low-code landscape changes rapidly and there may be newer or niche tools
342
- that satisfy the requirements better than any of these.
354
+ #### Frontend Tool Evaluation
343
355
 
344
- Build a minimal MVP with the most promising 1-2 candidates to compare real-world UX before committing.
356
+ **Evaluation result**: .plans/frontend-evaluation.md
345
357
 
346
358
  **Initial candidate list:**
347
359
 
@@ -381,9 +393,7 @@ Nice-to-have:
381
393
 
382
394
  ### Three-tier Classification
383
395
 
384
- **Tier 1: Rule-based (instant, free).** `category_rules` table contains patterns (substrings or regexes) matched against item names.
385
- Example: pattern `MLEKO` matches category "Dairy", pattern `SREDSTVO ZA` matches "Household chemicals".
386
- Rules are applied immediately when items are ingested. This handles the majority of repeat purchases after an initial learning period.
396
+ **Tier 1: Fuzzy ML based classification like in other personal expense tracking apps.
387
397
 
388
398
  **Tier 2: AI batch classification (deferred, economical).** Unclassified items (`classification_status = 'pending'`) accumulate on dinary-server throughout the day.
389
399
  When the user runs dinary (manually or via scheduler), it fetches pending items from the server API and classifies them using `claude -p`:
@@ -546,9 +556,24 @@ The local agent is stateless — it fetches tasks, processes them, and pushes re
546
556
  **What it does NOT do:**
547
557
  - Any AI/LLM calls. All AI work is delegated to dinary.
548
558
 
549
- **Hosting:** Oracle Cloud Free Tier (free ARM VM, 4 cores, 24 GB RAM — permanent free tier). Alternative: any cheap VPS, or even a Raspberry Pi at home with Cloudflare Tunnel for external access.
559
+ **Hosting (free, always-on options):**
560
+
561
+ - **Oracle Cloud Free Tier** — AMD Micro VM (1 OCPU, 1 GB RAM, always available) is recommended for reliability. ARM A1 Flex (up to 4 OCPU, 24 GB RAM) is more powerful but often unavailable due to shared capacity pool. Run directly with uvicorn as a systemd service (no Docker — saves RAM on 1 GB instances). Docker available for local development.
562
+ - **Self-hosted (Mac/PC)** — run locally, expose via Tailscale Serve (tailnet-only) or Cloudflare Tunnel (custom domain + Cloudflare Access). Aligns with Phase 4 architecture (dinary desktop app on the same machine).
550
563
 
551
- **Accessibility:** Dashboard and API served via Cloudflare Tunnel (free, no public IP needed) or directly from the VPS.
564
+ **Important:** sleeping/serverless hosting (Render free tier, AWS Lambda, etc.) is **not suitable** — the PWA on iOS cannot run background sync, so the server must respond within 1-2 seconds while the user still has the app open.
565
+
566
+ **Accessibility:** API served via Cloudflare Tunnel or Tailscale Serve. For the current MVP, Tailscale Serve is the preferred default because it avoids public internet exposure.
567
+
568
+ #### 1 GB Server Rules
569
+
570
+ Because the reference production target is the Oracle AMD Micro instance, the server-side implementation must follow these rules:
571
+
572
+ - Run a single app process by default. Do not scale by adding multiple uvicorn workers on the 1 GB host.
573
+ - Avoid colocating extra infrastructure on the VPS: no separate Postgres, Redis, Celery, message broker, or background analytics service in Phase 1.
574
+ - Treat Google Sheets sync as lightweight projection work, not as a second analytics engine.
575
+ - Prefer on-demand or dirty-month scoped recomputation over broad periodic rebuilds.
576
+ - Any future feature that materially increases steady-state RAM use must be designed to run off-box (for example on the laptop-side agent) or be explicitly deferred until a larger host is available.
552
577
 
553
578
  ### dinary (User's Laptop)
554
579
 
@@ -612,27 +637,28 @@ daemon lifecycle management) depends on the GUI framework choice and will be det
612
637
 
613
638
  ### Security
614
639
 
615
- - dinary-server API protected by API key or mutual TLS (single user, no need for full auth system).
616
- - Cloudflare Tunnel provides HTTPS without exposing the VPS directly.
640
+ - dinary-server API protected by Cloudflare Access (if using Cloudflare Tunnel) or by tailnet membership (if using Tailscale Serve). Single user, no need for an in-app auth system.
641
+ - Cloudflare Tunnel or Tailscale Serve provides HTTPS without exposing the application port directly to the internet.
617
642
  - DuckDB files are not accessible from the internet — only through the dinary-server API.
618
643
 
619
644
  ---
620
645
 
621
646
  ## Build Plan (Incremental Phases)
622
647
 
623
- ### Phase 0: MVP — Manual Entry + QR Total → Google Sheets (no DuckDB, no line parsing, no AI)
648
+ ### Phase 0: MVP — Manual Entry + QR Total → Google Sheets (completed)
624
649
 
625
650
  The fastest path to replacing manual spreadsheet editing, with early validation of QR scanning.
626
651
  No new database, no line-item parsing — just a mobile frontend that writes directly to the existing Google Sheets structure.
627
652
 
628
653
  **Scope:**
629
654
 
630
- - A mobile frontend (chosen from the non-disqualified candidates in the evaluation table) with a simple form: amount (RSD) + category (dropdown from the existing ~33 categories) + category group (auto-filled from category) + optional comment.
631
- - **QR scanning:** the user scans a Serbian fiscal receipt QR code on the phone. The backend fetches the receipt page from SUF PURS, extracts only the **total amount** and **date** no line-item parsing, no store extraction. The amount and date are pre-filled into the entry form; the user picks a category and submits. This is treated as a single expense entry (one row), same as manual entry.
632
- - A lightweight backend (Python script or serverless function) that receives the entry and writes it to the existing Google Sheets spreadsheet via the Sheets API.
633
- - **Auto-month creation:** if the backend detects that rows for the current month don't exist yet in the sheet, it automatically creates the full block of category rows for the new month (copying the category/group structure from the previous month). This eliminates the most tedious manual step.
634
- - **Currency conversion:** the EUR/RSD exchange rate is stored in the sheet itself, in a cell to the right of the first row of each month block. When the backend creates a new month or writes the first expense of the month, it checks the rate cell: if empty, it fetches the current NBS middle rate from `kurs.resenje.org` (same API as `ibkr-porez-py`) and writes it. Each month thus has its own visible rate. The EUR amount is derived as `amount_rsd / rate`.
635
- - **Offline queue:** when the device has no internet, completed entries (manual or QR-based) are persisted locally on the device (e.g., IndexedDB for PWA, local storage for the chosen tool). When connectivity is restored, the queue is flushed to the backend automatically. The user must never lose an entry due to network unavailability. The specific storage mechanism depends on the chosen frontend tool this is a key evaluation criterion.
655
+ - A mobile frontend (implemented as a PWA) with a simple form: amount (RSD) + group dropdown + category dropdown + optional comment. This matches the existing spreadsheet model better than a single huge selector.
656
+ - **QR scanning with parallel processing:** the user scans a Serbian fiscal receipt QR code on the phone. The QR code is decoded on the device (fully offlineclient-side image processing) using `zbar-wasm`. The client extracts amount/date from the receipt URL immediately and shows the form without waiting for the backend. Backend QR parsing remains as a fallback/API capability. No line-item parsing, no store extraction in Phase 0.
657
+ - A FastAPI backend that receives the entry and writes it to the existing Google Sheets spreadsheet via the Sheets API. FastAPI (not serverless) because it carries forward into Phase 1 (DuckDB) and Phase 4 (AI agent API) without rewriting.
658
+ - **Auto-month creation:** if the backend detects that rows for the current month don't exist yet in the sheet, it automatically creates the full block of category rows for the new month by copying the previous block, preserving spreadsheet formulas, zeroing RSD values, and inserting the new month at the top of the yearly sheet.
659
+ - **Currency conversion:** the EUR/RSD exchange rate is stored in the sheet itself, on the first row of each month block. When the backend creates a new month or writes the first expense of the month, it checks that month header row and writes the rate only there if missing.
660
+ - **Offline queue:** entries are stored in IndexedDB on the device before any network call. When connectivity is restored, the queue is flushed automatically on app open, on `online`, and after successful user actions when pending items exist. The user must never lose an entry due to network or server failure.
661
+ - **Always-on server required:** PWA on iOS cannot run background sync — sync only happens while the app is open. The server must respond within 1-2 seconds. Sleeping/serverless hosting (Render free tier, Lambda) is not suitable. Use Oracle Cloud Free Tier (AMD Micro, always on) or self-hosted Mac/PC with Tailscale Serve / Cloudflare Tunnel.
636
662
  - No line-item parsing, no store extraction, no DuckDB, no AI. The user picks the category manually, just as they do now — but from a phone instead of editing a spreadsheet. QR scanning only extracts the receipt total amount and date, not individual items or store.
637
663
 
638
664
  **What this validates:**
@@ -647,24 +673,57 @@ No new database, no line-item parsing — just a mobile frontend that writes dir
647
673
  - PWA frontend (in `static/`), backend, manuals, deployment scripts — all in the dinary-server repo
648
674
  - The `dinary` repo is not used in Phase 0 (reserved for the Rust desktop app, Phase 4+)
649
675
 
676
+ **Operational conventions introduced by the completed MVP:**
677
+
678
+ - Local/CI regression entry point is `inv test`, which runs both pytest and Vitest and writes a shared `allure-results/` directory.
679
+ - New tests must preserve the existing Allure taxonomy unless there is an explicit architecture-level reason to extend it.
680
+ - Phase 0 approved Allure epics are: `Data Safety`, `Google Sheets`, `API`, `Services`, `Build`.
681
+ - Phase 0 approved features are:
682
+ - `Data Safety`: `Formula Preservation`, `Comment Preservation`, `Column Protection`, `Offline Queue`, `No Data Loss`
683
+ - `Google Sheets`: `Read Categories`, `Write Expense`, `Exchange Rate`, `Month Creation`, `Helpers`
684
+ - `API`: `Health`, `Categories`, `Expenses`, `QR Parse`
685
+ - `Services`: `Category Store`, `Exchange Rate`, `QR Parser`
686
+ - `Build`: `Version`
687
+
650
688
  **Exit criteria for Phase 0:**
651
689
  - The user has used the system daily for 2+ weeks and no longer opens the spreadsheet to enter data manually.
652
690
  - QR scanning has been used successfully on real receipts (camera → URL extraction → total + date pre-fill) and is confirmed to work reliably with the chosen frontend tool.
653
691
 
654
- ### Phase 1: Data Foundation & Backend Deployment (dinary-server)
655
- - Set up DuckDB schema (config.duckdb + budget_2026.duckdb) on VPS (Oracle Cloud Free Tier).
656
- - Deploy dinary-server (FastAPI) with basic REST API for expense ingestion.
657
- - Migrate existing Google Sheets data into DuckDB.
658
- - Write basic SQL queries for monthly aggregates.
659
- - Backend now writes to both DuckDB (primary) and Google Sheets (view layer).
660
- - Set up Cloudflare Tunnel or direct HTTPS access to the backend.
692
+ ### Phase 1: Data Foundation & Idempotent Ingestion (dinary-server) ✓ IMPLEMENTED
693
+
694
+ Detailed plan: [phase1.md](phase1.md)
695
+
696
+ - DuckDB with the **full 5-dimensional classification schema** (category, beneficiary, event, tags, store) from day one.
697
+ - **sheet-to-5D mapping table** (`sheet_category_mapping`) decomposes the current Google Sheet's flat `(Расходы, Конверт)` pairs into proper 5D assignments.
698
+ - **PWA unchanged** -- sends `(category, group)` as in Phase 0; server resolves to 5D via mapping table.
699
+ - DuckDB-backed expense ingestion with idempotent deduplication via `expenses.id PRIMARY KEY`.
700
+ - Google Sheets is a derived read-only view: sync layer projects 5D DuckDB data back into sheet format.
701
+ - Client generates `expense_id = crypto.randomUUID()` at enqueue time; server returns `200 created`, `200 duplicate`, or `409 Conflict`.
702
+ - Allure test suite covers: `Data Safety / Deduplication` (Python + JS), `DuckDB / Bootstrap`, `DuckDB / Mapping`, `DuckDB / Travel Events`, `DuckDB / Reverse Mapping`, `DuckDB / Year Boundary`.
703
+
704
+ **Historical data migration is NOT part of Phase 1.** After Phase 1 cutover, DuckDB holds only new expenses; historical data remains in Google Sheets until Phase 1.5.
705
+
706
+ ### Phase 1.5: Historical Data Migration
707
+
708
+ The existing Google Sheets contain ~10 years of data. Nearly every year used a slightly different category system (different category names, different envelope groupings) and even different column layouts. This makes bulk import impractical -- each year requires individual analysis and its own mapping.
709
+
710
+ - Analyze each yearly Google Sheets tab individually: identify that year's category/envelope structure, column layout, and how it differs from other years.
711
+ - Build per-year mapping from that year's flat `(category, envelope)` pairs to the 5D classification model, handling cases where the same category name meant different things in different years.
712
+ - For "путешествия" envelopes: create a per-year synthetic event "отпуск-YYYY" (`date_from = YYYY-01-01`, `date_to = YYYY-12-31`) and map all travel rows to it (same approach as Phase 1 uses for the current year). Once the PWA switches to native 5D input (Phase 2+), set `date_to` of the last synthetic travel event to the release date of the 5D PWA. From that date forward, the user creates specific named trips instead of a per-year umbrella, and the auto-attach rule for `sheet_group = "путешествия"` is retired.
713
+ - Build per-year import scripts that create synthetic expense rows in `budget_YYYY.duckdb` with `source = 'legacy_import'`.
714
+ - Reconcile imported totals against original sheet totals.
715
+ - After successful import, run DuckDB -> Google Sheets sync to verify the rebuilt sheet matches legacy data.
661
716
 
662
717
  ### Phase 2: Receipt Parser
663
718
  - Integrate or adapt sr-invoice-parser for fetching and parsing Serbian fiscal receipts from SUF PURS URLs.
664
719
  - Build the ingestion pipeline: URL → fetch HTML → parse line items → insert into `expenses` table in DuckDB.
665
- - Implement rule-based auto-classification.
720
+ - Implement fuzzy ML / AI auto-classification that produces 5D classification directly (category, beneficiary, event, tags, store).
721
+ - Change the PWA so it no longer works in Google Sheets terms for new receipt/manual flows. From Phase 2 onward, the PWA should use the native 5D classification model directly instead of asking the user for `(Расходы, Конверт)` from the spreadsheet.
722
+ - Google Sheets sync uses the `sheet_category_mapping` table in reverse: from the 5D classification produced by AI/rules, determine the target `(Расходы, Конверт)` row in the sheet.
666
723
 
667
724
  ### Phase 3: Mobile Input — Full Version (dinary-app)
725
+ **Done as part of MVP**
726
+
668
727
  - **3a: Frontend tool evaluation.
669
728
 
670
729
  - ** Research the candidate tools from the evaluation table (see "Frontend Tool Evaluation" section) **and any other tools discovered during research**.
@@ -693,6 +752,7 @@ No new database, no line-item parsing — just a mobile frontend that writes dir
693
752
  - Implement the task queue API on dinary-server (`/api/tasks/*`).
694
753
  - Build the batch classification flow: fetch pending → `claude -p` → push results.
695
754
  - GUI: interactive AI API calls for responsive receipt extraction (paste text/PDF → AI API → extract amount, date, items → store via server API).
755
+ - After AI classification of receipt line items is available, change the PWA receipt flow so scanning a receipt submits it immediately without waiting for a manual `Save` press. The scan should create the receipt/import job right away; later user interaction is only for review/correction, not for the initial submission.
696
756
  - Implement the review/confirm flow (via GUI or CLI).
697
757
  - Wire up rule learning (confirmed classifications → new rules in `category_rules`).
698
758
 
@@ -709,8 +769,13 @@ No new database, no line-item parsing — just a mobile frontend that writes dir
709
769
  Each phase is independently useful.
710
770
 
711
771
  - Phase 0 alone eliminates manual spreadsheet editing, validates QR scanning, and validates the mobile input tool.
712
- - Phase 1 establishes the proper data foundation.
772
+ - Phase 1 establishes the proper data foundation with idempotent ingestion and deduplication.
773
+ - Phase 1.5 migrates historical Google Sheets data into DuckDB (complex, per-year analysis required).
713
774
  - Phase 2 solves the supermarket opacity problem.
714
775
  - Phase 3 adds full line-item QR flow and complete mobile input.
715
776
  - Phase 4 builds the desktop app (daemon + GUI) with AI classification and responsive receipt extraction.
716
777
  - Phases 5-6 add dashboards, AI analysis, and Google Sheets sync.
778
+
779
+ ## Open questions
780
+
781
+ - **Cross-year events**: events (e.g. a trip) can span a year boundary (start in December, end in January). Since `expenses` are partitioned into yearly `budget_YYYY.duckdb` files but `events` live in the shared `config.duckdb`, this works at the data level -- expenses in both years reference the same `event_id`. However, reporting and sync need to handle the case where a single event's expenses are split across two yearly DB files. Decide whether to query both years when summarizing an event, or accept per-year totals as sufficient.
@@ -0,0 +1,168 @@
1
+ # Deploy to Oracle Cloud (replace existing, no DB)
2
+
3
+ ## Context
4
+
5
+ - Oracle Cloud Free Tier VM is already running a previous version of dinary-server
6
+ - Tailscale tunnel is configured, systemd service `dinary` is active
7
+ - There is no existing DuckDB data to preserve (no `config.duckdb`, no `budget_*.duckdb`)
8
+ - New version on `main` uses `yoyo` migrations instead of inline DDL, dropped `ibis`/`pandas`
9
+
10
+ ## Pre-deploy checks (on laptop)
11
+
12
+ ```bash
13
+ # 1. Ensure .env is configured
14
+ cat .env
15
+ # Should show DINARY_DEPLOY_HOST, DINARY_GOOGLE_SHEETS_SPREADSHEET_ID
16
+
17
+ # 2. Verify SSH access
18
+ inv ssh
19
+ # Ctrl-D to exit
20
+
21
+ # 3. Verify local tests pass
22
+ uv run pytest tests/ -q
23
+
24
+ # 4. Verify main is pushed
25
+ git log --oneline -3 origin/main
26
+ # Should show "yoyo migrations" as latest
27
+ ```
28
+
29
+ ## Deploy
30
+
31
+ ```bash
32
+ inv deploy
33
+ ```
34
+
35
+ This runs the following steps automatically via SSH:
36
+
37
+ 1. **Pre-deploy backup** of remote `data/` (will be empty or missing — that is expected)
38
+ 2. `git pull` on the server to get latest `main`
39
+ 3. `uv sync --no-dev` to install/remove dependencies (installs `yoyo-migrations`, removes `ibis`/`pandas`/`pyarrow` if present)
40
+ 4. `mkdir -p data/`
41
+ 5. **Apply config migrations** — creates fresh `config.duckdb` with `yoyo` version tracking
42
+ 6. Render `__VERSION__` into static assets
43
+ 7. `systemctl restart dinary`
44
+ 8. Health check: `curl localhost:8000/api/health`
45
+
46
+ ## Post-deploy verification
47
+
48
+ Run these from the laptop, in order:
49
+
50
+ ### Step 1: Service health
51
+
52
+ ```bash
53
+ inv status
54
+ ```
55
+
56
+ Expected: `dinary.service` active (running), Tailscale serve active.
57
+
58
+ ### Step 2: Health endpoint
59
+
60
+ ```bash
61
+ inv ssh
62
+ curl -s http://localhost:8000/api/health | python3 -m json.tool
63
+ ```
64
+
65
+ Expected: `{"status": "ok", "version": "<short git hash>"}`.
66
+
67
+ ### Step 3: Logs — no errors on startup
68
+
69
+ ```bash
70
+ inv logs --lines=30
71
+ ```
72
+
73
+ Look for:
74
+ - no Python tracebacks
75
+ - no import errors (especially no `ibis` / `pandas` references)
76
+ - migration log line if present
77
+
78
+ ### Step 4: Seed config from Google Sheets
79
+
80
+ Since there is no existing DB, seed reference data:
81
+
82
+ ```bash
83
+ inv seed-config
84
+ ```
85
+
86
+ Expected: JSON summary with `category_groups > 0`, `categories > 0`, `mappings_created > 0`.
87
+
88
+ ### Step 5: Verify categories API
89
+
90
+ ```bash
91
+ inv ssh
92
+ curl -s http://localhost:8000/api/categories | python3 -m json.tool
93
+ ```
94
+
95
+ Expected: list of category objects from seeded data.
96
+
97
+ ### Step 6: Test expense creation (from phone or curl)
98
+
99
+ ```bash
100
+ inv ssh
101
+ curl -s -X POST http://localhost:8000/api/expenses \
102
+ -H 'Content-Type: application/json' \
103
+ -d '{
104
+ "expense_id": "deploy-test-1",
105
+ "amount": 100,
106
+ "currency": "RSD",
107
+ "category": "<a known category from step 5>",
108
+ "group": "<its group>",
109
+ "date": "2026-04-16",
110
+ "comment": "deploy smoke test"
111
+ }' | python3 -m json.tool
112
+ ```
113
+
114
+ Expected: `{"status": "created", ...}`.
115
+
116
+ ### Step 7: Verify budget DB was created
117
+
118
+ ```bash
119
+ inv ssh
120
+ ls -la ~/dinary-server/data/
121
+ ```
122
+
123
+ Expected: `config.duckdb` and `budget_2026.duckdb` present.
124
+
125
+ ### Step 8: Verify yoyo tracking tables
126
+
127
+ ```bash
128
+ inv ssh
129
+ cd ~/dinary-server && source ~/.local/bin/env
130
+ uv run python -c "
131
+ import duckdb
132
+ for f in ['data/config.duckdb', 'data/budget_2026.duckdb']:
133
+ con = duckdb.connect(f, read_only=True)
134
+ rows = con.execute('SELECT * FROM _yoyo_migration').fetchall()
135
+ print(f'{f}: {len(rows)} migration(s) applied')
136
+ con.close()
137
+ "
138
+ ```
139
+
140
+ Expected: 1 migration applied in each file.
141
+
142
+ ### Step 9: PWA smoke test
143
+
144
+ Open the Tailscale URL on phone. Verify the app loads, categories are visible, and a test expense can be submitted through the UI.
145
+
146
+ ### Step 10: Verify sync (optional)
147
+
148
+ ```bash
149
+ inv sync
150
+ ```
151
+
152
+ Expected: `Synced 1 months` (or however many dirty months exist after the test expense).
153
+
154
+ ## Rollback
155
+
156
+ If something goes wrong:
157
+
158
+ ```bash
159
+ # Deploy the previous known-good commit
160
+ inv deploy --ref=<previous commit hash>
161
+ ```
162
+
163
+ Since there is no data to lose (no pre-existing DB), rollback is just redeploying the old code. The `data/` directory can be safely deleted and recreated.
164
+
165
+ ## After successful deploy
166
+
167
+ - Delete the test expense row from Google Sheets if sync wrote it
168
+ - Or leave it as a smoke test record
@@ -34,8 +34,8 @@
34
34
 
35
35
  ### Tech stack
36
36
 
37
- - **html5-qrcode** (MIT) for QR scanning via rear camera
38
- - **IndexedDB** for offline entry queue
39
- - **Service Worker** for caching and background sync
37
+ - **zbar-wasm** for live QR scanning in the browser. Earlier MVP experiments with `html5-qrcode` and other JS scanners were not reliable enough on dense Serbian fiscal QR codes, especially on iOS.
38
+ - **IndexedDB** for the offline entry queue
39
+ - **Service Worker** for caching and installability
40
40
  - Vanilla HTML/CSS/JS — no build step, no framework
41
- - Served by FastAPI `StaticFiles` (same origin, same Cloudflare Tunnel)
41
+ - Served by FastAPI `StaticFiles` (same origin; typically exposed through Tailscale Serve, optionally Cloudflare Tunnel)