@realtimex/folio 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +20 -0
- package/README.md +63 -0
- package/api/server.ts +130 -0
- package/api/src/config/index.ts +96 -0
- package/api/src/middleware/auth.ts +128 -0
- package/api/src/middleware/errorHandler.ts +88 -0
- package/api/src/middleware/index.ts +4 -0
- package/api/src/middleware/rateLimit.ts +71 -0
- package/api/src/middleware/validation.ts +58 -0
- package/api/src/routes/accounts.ts +142 -0
- package/api/src/routes/baseline-config.ts +124 -0
- package/api/src/routes/chat.ts +154 -0
- package/api/src/routes/health.ts +61 -0
- package/api/src/routes/index.ts +35 -0
- package/api/src/routes/ingestions.ts +275 -0
- package/api/src/routes/migrate.ts +112 -0
- package/api/src/routes/policies.ts +121 -0
- package/api/src/routes/processing.ts +90 -0
- package/api/src/routes/rules.ts +11 -0
- package/api/src/routes/sdk.ts +100 -0
- package/api/src/routes/settings.ts +80 -0
- package/api/src/routes/setup.ts +389 -0
- package/api/src/routes/stats.ts +81 -0
- package/api/src/routes/tts.ts +190 -0
- package/api/src/services/BaselineConfigService.ts +208 -0
- package/api/src/services/ChatService.ts +204 -0
- package/api/src/services/GoogleDriveService.ts +331 -0
- package/api/src/services/GoogleSheetsService.ts +1107 -0
- package/api/src/services/IngestionService.ts +1187 -0
- package/api/src/services/ModelCapabilityService.ts +248 -0
- package/api/src/services/PolicyEngine.ts +1625 -0
- package/api/src/services/PolicyLearningService.ts +527 -0
- package/api/src/services/PolicyLoader.ts +249 -0
- package/api/src/services/RAGService.ts +391 -0
- package/api/src/services/SDKService.ts +249 -0
- package/api/src/services/supabase.ts +113 -0
- package/api/src/utils/Actuator.ts +284 -0
- package/api/src/utils/actions/ActionHandler.ts +34 -0
- package/api/src/utils/actions/AppendToGSheetAction.ts +260 -0
- package/api/src/utils/actions/AutoRenameAction.ts +58 -0
- package/api/src/utils/actions/CopyAction.ts +120 -0
- package/api/src/utils/actions/CopyToGDriveAction.ts +64 -0
- package/api/src/utils/actions/LogCsvAction.ts +48 -0
- package/api/src/utils/actions/NotifyAction.ts +39 -0
- package/api/src/utils/actions/RenameAction.ts +57 -0
- package/api/src/utils/actions/WebhookAction.ts +58 -0
- package/api/src/utils/actions/utils.ts +293 -0
- package/api/src/utils/llmResponse.ts +61 -0
- package/api/src/utils/logger.ts +67 -0
- package/bin/folio-deploy.js +12 -0
- package/bin/folio-setup.js +45 -0
- package/bin/folio.js +65 -0
- package/dist/api/server.js +106 -0
- package/dist/api/src/config/index.js +81 -0
- package/dist/api/src/middleware/auth.js +93 -0
- package/dist/api/src/middleware/errorHandler.js +73 -0
- package/dist/api/src/middleware/index.js +4 -0
- package/dist/api/src/middleware/rateLimit.js +43 -0
- package/dist/api/src/middleware/validation.js +54 -0
- package/dist/api/src/routes/accounts.js +110 -0
- package/dist/api/src/routes/baseline-config.js +91 -0
- package/dist/api/src/routes/chat.js +114 -0
- package/dist/api/src/routes/health.js +52 -0
- package/dist/api/src/routes/index.js +31 -0
- package/dist/api/src/routes/ingestions.js +207 -0
- package/dist/api/src/routes/migrate.js +91 -0
- package/dist/api/src/routes/policies.js +86 -0
- package/dist/api/src/routes/processing.js +75 -0
- package/dist/api/src/routes/rules.js +8 -0
- package/dist/api/src/routes/sdk.js +80 -0
- package/dist/api/src/routes/settings.js +68 -0
- package/dist/api/src/routes/setup.js +315 -0
- package/dist/api/src/routes/stats.js +62 -0
- package/dist/api/src/routes/tts.js +178 -0
- package/dist/api/src/services/BaselineConfigService.js +168 -0
- package/dist/api/src/services/ChatService.js +166 -0
- package/dist/api/src/services/GoogleDriveService.js +280 -0
- package/dist/api/src/services/GoogleSheetsService.js +795 -0
- package/dist/api/src/services/IngestionService.js +990 -0
- package/dist/api/src/services/ModelCapabilityService.js +179 -0
- package/dist/api/src/services/PolicyEngine.js +1353 -0
- package/dist/api/src/services/PolicyLearningService.js +397 -0
- package/dist/api/src/services/PolicyLoader.js +159 -0
- package/dist/api/src/services/RAGService.js +295 -0
- package/dist/api/src/services/SDKService.js +212 -0
- package/dist/api/src/services/supabase.js +72 -0
- package/dist/api/src/utils/Actuator.js +225 -0
- package/dist/api/src/utils/actions/ActionHandler.js +1 -0
- package/dist/api/src/utils/actions/AppendToGSheetAction.js +191 -0
- package/dist/api/src/utils/actions/AutoRenameAction.js +49 -0
- package/dist/api/src/utils/actions/CopyAction.js +112 -0
- package/dist/api/src/utils/actions/CopyToGDriveAction.js +55 -0
- package/dist/api/src/utils/actions/LogCsvAction.js +42 -0
- package/dist/api/src/utils/actions/NotifyAction.js +32 -0
- package/dist/api/src/utils/actions/RenameAction.js +51 -0
- package/dist/api/src/utils/actions/WebhookAction.js +51 -0
- package/dist/api/src/utils/actions/utils.js +237 -0
- package/dist/api/src/utils/llmResponse.js +63 -0
- package/dist/api/src/utils/logger.js +51 -0
- package/dist/assets/index-DzN8-j-e.css +1 -0
- package/dist/assets/index-Uy-ai3Dh.js +113 -0
- package/dist/favicon.svg +31 -0
- package/dist/folio-logo.svg +46 -0
- package/dist/index.html +14 -0
- package/docs-dev/FPE-spec.md +196 -0
- package/docs-dev/folio-prd.md +47 -0
- package/docs-dev/foundation-checklist.md +30 -0
- package/docs-dev/hybrid-routing-architecture.md +205 -0
- package/docs-dev/ingestion-engine.md +69 -0
- package/docs-dev/port-from-email-automator.md +32 -0
- package/docs-dev/tech-spec.md +98 -0
- package/index.html +13 -0
- package/package.json +101 -0
- package/public/favicon.svg +31 -0
- package/public/folio-logo.svg +46 -0
- package/scripts/dev-task.mjs +51 -0
- package/scripts/get-latest-migration-timestamp.mjs +34 -0
- package/scripts/migrate.sh +91 -0
- package/supabase/.temp/cli-latest +1 -0
- package/supabase/.temp/gotrue-version +1 -0
- package/supabase/.temp/pooler-url +1 -0
- package/supabase/.temp/postgres-version +1 -0
- package/supabase/.temp/project-ref +1 -0
- package/supabase/.temp/rest-version +1 -0
- package/supabase/.temp/storage-migration +1 -0
- package/supabase/.temp/storage-version +1 -0
- package/supabase/config.toml +64 -0
- package/supabase/functions/_shared/auth.ts +35 -0
- package/supabase/functions/_shared/cors.ts +12 -0
- package/supabase/functions/_shared/supabaseAdmin.ts +17 -0
- package/supabase/functions/api-v1-settings/index.ts +66 -0
- package/supabase/functions/setup/index.ts +91 -0
- package/supabase/migrations/20260223000000_initial_foundation.sql +136 -0
- package/supabase/migrations/20260223000001_add_migration_rpc.sql +10 -0
- package/supabase/migrations/20260224000002_add_init_state_view.sql +20 -0
- package/supabase/migrations/20260224000003_port_user_creation_parity.sql +139 -0
- package/supabase/migrations/20260224000004_add_avatars_storage.sql +26 -0
- package/supabase/migrations/20260224000005_add_tts_and_embed_settings.sql +24 -0
- package/supabase/migrations/20260224000006_add_policies_table.sql +48 -0
- package/supabase/migrations/20260224000007_fix_migration_rpc.sql +9 -0
- package/supabase/migrations/20260224000008_add_ingestions_table.sql +42 -0
- package/supabase/migrations/20260225000000_setup_compatible_mode.sql +119 -0
- package/supabase/migrations/20260225000001_restore_ingestions.sql +49 -0
- package/supabase/migrations/20260225000002_add_ingestion_trace.sql +2 -0
- package/supabase/migrations/20260225000003_add_baseline_configs.sql +35 -0
- package/supabase/migrations/20260226000000_add_processing_events.sql +26 -0
- package/supabase/migrations/20260226000001_add_ingestion_file_hash.sql +10 -0
- package/supabase/migrations/20260226000002_add_dynamic_rag.sql +150 -0
- package/supabase/migrations/20260226000003_add_ingestion_summary.sql +4 -0
- package/supabase/migrations/20260226000004_add_ingestion_tags.sql +7 -0
- package/supabase/migrations/20260226000005_add_chat_tables.sql +60 -0
- package/supabase/migrations/20260227000000_harden_chat_messages_rls.sql +25 -0
- package/supabase/migrations/20260228000000_add_vision_model_capabilities.sql +8 -0
- package/supabase/migrations/20260228000001_add_policy_match_feedback.sql +51 -0
- package/supabase/migrations/29991231235959_test_migration.sql +0 -0
- package/supabase/templates/confirmation.html +76 -0
- package/supabase/templates/email-change.html +76 -0
- package/supabase/templates/invite.html +72 -0
- package/supabase/templates/magic-link.html +68 -0
- package/supabase/templates/recovery.html +82 -0
- package/tsconfig.api.json +16 -0
- package/tsconfig.json +25 -0
- package/vite.config.ts +146 -0
package/dist/favicon.svg
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
<svg width="64" height="64" viewBox="0 0 64 64" fill="none" xmlns="http://www.w3.org/2000/svg">
|
|
2
|
+
<!--
|
|
3
|
+
FAVICON STRATEGY:
|
|
4
|
+
1. High Contrast: Uses the brand Indigo against transparent/white.
|
|
5
|
+
2. Simplified Geometry: Removed shadows and subtle opacity layers.
|
|
6
|
+
3. Scalable: This vector will look crisp on Retina displays.
|
|
7
|
+
-->
|
|
8
|
+
|
|
9
|
+
<defs>
|
|
10
|
+
<linearGradient id="faviconGradient" x1="0" y1="0" x2="64" y2="64" gradientUnits="userSpaceOnUse">
|
|
11
|
+
<stop offset="0%" stop-color="#6366F1" /> <!-- Indigo-500 -->
|
|
12
|
+
<stop offset="100%" stop-color="#4338CA" /> <!-- Indigo-700 -->
|
|
13
|
+
</linearGradient>
|
|
14
|
+
</defs>
|
|
15
|
+
|
|
16
|
+
<!-- The "Back" Page (Input) - Lighter/Translucent representation -->
|
|
17
|
+
<!-- Positioned to create the right stem of the abstract 'F' -->
|
|
18
|
+
<path d="M28 12 H 48 C 50.2 12 52 13.8 52 16 V 48 L 40 36 V 12 Z"
|
|
19
|
+
fill="url(#faviconGradient)"
|
|
20
|
+
fill-opacity="0.5" />
|
|
21
|
+
|
|
22
|
+
<!-- The "Front" Page (Output) - Solid representation -->
|
|
23
|
+
<!-- Positioned to create the main body of the 'F' -->
|
|
24
|
+
<path d="M12 12 H 36 C 38.2 12 40 13.8 40 16 V 52 C 40 54.2 38.2 56 36 56 H 12 V 12 Z"
|
|
25
|
+
fill="url(#faviconGradient)" />
|
|
26
|
+
|
|
27
|
+
<!-- The "Fold" (Action) - The distinctive dog-ear -->
|
|
28
|
+
<path d="M40 16 L 36 16 C 34.9 16 34 15.1 34 14 L 34 12"
|
|
29
|
+
fill="#FFFFFF"
|
|
30
|
+
fill-opacity="0.6"/>
|
|
31
|
+
</svg>
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
<svg width="512" height="512" viewBox="0 0 512 512" fill="none" xmlns="http://www.w3.org/2000/svg">
|
|
2
|
+
<defs>
|
|
3
|
+
<!-- Main Gradient: Gives a subtle 'tech' feel and depth -->
|
|
4
|
+
<linearGradient id="folioGradient" x1="0" y1="0" x2="512" y2="512" gradientUnits="userSpaceOnUse">
|
|
5
|
+
<stop offset="0%" stop-color="#6366F1" /> <!-- Indigo-500 -->
|
|
6
|
+
<stop offset="100%" stop-color="#4338CA" /> <!-- Indigo-700 -->
|
|
7
|
+
</linearGradient>
|
|
8
|
+
|
|
9
|
+
<!-- Shadow for depth between the pages -->
|
|
10
|
+
<filter id="dropShadow" x="-20%" y="-20%" width="140%" height="140%">
|
|
11
|
+
<feGaussianBlur in="SourceAlpha" stdDeviation="8"/>
|
|
12
|
+
<feOffset dx="4" dy="8" result="offsetblur"/>
|
|
13
|
+
<feComponentTransfer>
|
|
14
|
+
<feFuncA type="linear" slope="0.3"/>
|
|
15
|
+
</feComponentTransfer>
|
|
16
|
+
<feMerge>
|
|
17
|
+
<feMergeNode/>
|
|
18
|
+
<feMergeNode in="SourceGraphic"/>
|
|
19
|
+
</feMerge>
|
|
20
|
+
</filter>
|
|
21
|
+
</defs>
|
|
22
|
+
|
|
23
|
+
<!-- BACKGROUND CONTAINER: Ensures visibility on both Dark and Light modes -->
|
|
24
|
+
<rect x="32" y="32" width="448" height="448" rx="112" fill="url(#folioGradient)" />
|
|
25
|
+
|
|
26
|
+
<!-- ICON ELEMENTS -->
|
|
27
|
+
<g filter="url(#dropShadow)">
|
|
28
|
+
<!-- The "Back" Page (The Foundation) -->
|
|
29
|
+
<!-- Representing the raw data/input -->
|
|
30
|
+
<path d="M180 140 H 330 C 352.09 140 370 157.91 370 180 V 370 C 370 370 370 370 370 370 H 260 L 180 290 V 140 Z"
|
|
31
|
+
fill="#FFFFFF"
|
|
32
|
+
fill-opacity="0.4" />
|
|
33
|
+
|
|
34
|
+
<!-- The "Front" Page (The Organization) -->
|
|
35
|
+
<!-- Representing the processed, clean output.
|
|
36
|
+
Notice how it creates an abstract 'F' shape with the back page. -->
|
|
37
|
+
<path d="M140 140 H 280 C 302.09 140 320 157.91 320 180 V 332 C 320 354.09 302.09 372 280 372 H 140 V 140 Z"
|
|
38
|
+
fill="#FFFFFF" />
|
|
39
|
+
|
|
40
|
+
<!-- The "Action" Accent (The Dog Ear / Fold) -->
|
|
41
|
+
<!-- Adds a tactile feel, implying this is a document being handled. -->
|
|
42
|
+
<path d="M320 180 L 280 180 C 268.95 180 260 171.05 260 160 L 260 140"
|
|
43
|
+
fill="#C7D2FE"
|
|
44
|
+
fill-opacity="0.5"/>
|
|
45
|
+
</g>
|
|
46
|
+
</svg>
|
package/dist/index.html
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
<!doctype html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8" />
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
6
|
+
<link rel="icon" type="image/svg+xml" href="/favicon.svg" />
|
|
7
|
+
<title>Folio</title>
|
|
8
|
+
<script type="module" crossorigin src="/assets/index-Uy-ai3Dh.js"></script>
|
|
9
|
+
<link rel="stylesheet" crossorigin href="/assets/index-DzN8-j-e.css">
|
|
10
|
+
</head>
|
|
11
|
+
<body>
|
|
12
|
+
<div id="root"></div>
|
|
13
|
+
</body>
|
|
14
|
+
</html>
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
# Folio Policy Engine (FPE) Specification
|
|
2
|
+
**Version:** 1.0.0
|
|
3
|
+
**Status:** Final
|
|
4
|
+
**Scope:** Core Logic & Schema Definition
|
|
5
|
+
|
|
6
|
+
## 1. System Architecture
|
|
7
|
+
The Policy Engine is a pipeline execution environment. It does not "guess"; it evaluates documents against a prioritized list of user-defined definitions (Policies).
|
|
8
|
+
|
|
9
|
+
### The 4-Stage Pipeline
|
|
10
|
+
1. **Ingest & Normalize:** Convert incoming file (PDF/Img) into a standardized `Document Object` (Text + Vector Embeddings).
|
|
11
|
+
2. **Match (The Router):** Iterate through active Policies to find the *Best Fit*.
|
|
12
|
+
3. **Extract (The Parser):** Use the specific schema defined in the matched Policy to query the LLM.
|
|
13
|
+
4. **Execute (The Actuator):** Perform the side-effects defined in the Policy (Move, Rename, API Call).
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## 2. The Policy Definition Schema (YAML)
|
|
18
|
+
All policies must adhere to the `folio/v1` standard.
|
|
19
|
+
|
|
20
|
+
### 2.1 Header & Metadata
|
|
21
|
+
Defines the identity and priority of the policy.
|
|
22
|
+
|
|
23
|
+
```yaml
|
|
24
|
+
apiVersion: folio/v1
|
|
25
|
+
kind: Policy
|
|
26
|
+
metadata:
|
|
27
|
+
id: "pge-residential-bill"
|
|
28
|
+
name: "PG&E Utility Bill"
|
|
29
|
+
version: "1.0.0"
|
|
30
|
+
description: "Handles residential electricity statements from Pacific Gas & Electric."
|
|
31
|
+
priority: 100 # Higher numbers evaluated first. (e.g., Junk Filter = 900)
|
|
32
|
+
tags: ["utility", "household", "tax-deductible"]
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
### 2.2 The Matcher (Logic Gate)
|
|
36
|
+
Defines *if* this policy applies to the document. Supports Boolean logic.
|
|
37
|
+
|
|
38
|
+
```yaml
|
|
39
|
+
spec:
|
|
40
|
+
match:
|
|
41
|
+
strategy: "ALL" # Options: ALL (AND), ANY (OR)
|
|
42
|
+
conditions:
|
|
43
|
+
- type: "keyword"
|
|
44
|
+
value: ["Pacific Gas and Electric Company", "PG&E"]
|
|
45
|
+
case_sensitive: false
|
|
46
|
+
|
|
47
|
+
- type: "keyword"
|
|
48
|
+
value: ["Service For:", "Account Number"]
|
|
49
|
+
|
|
50
|
+
- type: "llm_verify"
|
|
51
|
+
prompt: "Is this a monthly utility bill statement?"
|
|
52
|
+
confidence_threshold: 0.85
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
### 2.3 The Extractor (Data Schema)
|
|
56
|
+
Defines *what* data points to pull. This generates the prompt for the LLM.
|
|
57
|
+
|
|
58
|
+
```yaml
|
|
59
|
+
extract:
|
|
60
|
+
- key: "total_amount"
|
|
61
|
+
type: "currency"
|
|
62
|
+
description: "The 'Total Amount Due' for this billing period. Exclude past due balances."
|
|
63
|
+
required: true
|
|
64
|
+
|
|
65
|
+
- key: "due_date"
|
|
66
|
+
type: "date"
|
|
67
|
+
format: "YYYY-MM-DD"
|
|
68
|
+
description: "The date payment must be received to avoid penalties."
|
|
69
|
+
required: true
|
|
70
|
+
|
|
71
|
+
- key: "usage_kwh"
|
|
72
|
+
type: "number"
|
|
73
|
+
description: "Total electricity usage in kWh."
|
|
74
|
+
required: false
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### 2.4 The Actions (Workflow)
|
|
78
|
+
Defines *what happens* after data is extracted.
|
|
79
|
+
|
|
80
|
+
```yaml
|
|
81
|
+
actions:
|
|
82
|
+
# 1. Rename the file using extracted variables
|
|
83
|
+
- type: "rename"
|
|
84
|
+
pattern: "{due_date}_PGE_Bill_{total_amount}.pdf"
|
|
85
|
+
|
|
86
|
+
# 2. Move to structured directory
|
|
87
|
+
- type: "move"
|
|
88
|
+
destination: "/Documents/Home/Utilities/Electricity/{year}/"
|
|
89
|
+
|
|
90
|
+
# 3. Append to CSV Ledger
|
|
91
|
+
- type: "log_csv"
|
|
92
|
+
path: "/Finance/2024_Household_Expenses.csv"
|
|
93
|
+
columns: ["due_date", "PGE", "total_amount", "usage_kwh"]
|
|
94
|
+
|
|
95
|
+
# 4. Create Calendar Event (Optional)
|
|
96
|
+
- type: "integration/calendar"
|
|
97
|
+
provider: "google_calendar"
|
|
98
|
+
title: "Pay PG&E Bill: ${total_amount}"
|
|
99
|
+
date: "{due_date}"
|
|
100
|
+
description: "Link to file: {folio_link}"
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
---
|
|
104
|
+
|
|
105
|
+
## 3. The "Global" Policies (Standard Pack)
|
|
106
|
+
Folio ships with 3 fundamental policies that handle the lifecycle of most documents.
|
|
107
|
+
|
|
108
|
+
### Policy A: The "Garbage Collector" (Priority: 999)
|
|
109
|
+
* **Match:** Keywords ["Presorted Standard", "Current Resident", "Apply Now", "0% APR"].
|
|
110
|
+
* **Extract:** None.
|
|
111
|
+
* **Action:** Move to `/Trash/Auto_Delete_30Days`. Log "Blocked Junk Mail".
|
|
112
|
+
|
|
113
|
+
### Policy B: The "Inbox Zero" Fallback (Priority: 0)
|
|
114
|
+
* **Match:** `*` (Matches everything that failed previous policies).
|
|
115
|
+
* **Extract:** `Summary` (1-sentence description).
|
|
116
|
+
* **Action:** Move to `/_Needs_Review`. Notify User: "Unrecognized document found."
|
|
117
|
+
|
|
118
|
+
### Policy C: The "Tax Dragnet" (Priority: 500)
|
|
119
|
+
* **Match:** Keywords ["Form W-2", "1099-INT", "1098-T", "Internal Revenue Service"].
|
|
120
|
+
* **Extract:** Tax Year, Form Type, Issuer, SSN (Last 4).
|
|
121
|
+
* **Action:** Move to `/Financial/Taxes/{tax_year}/Raw_Docs/`. Alert User: "Tax Document Detected."
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
125
|
+
## 4. Execution Logic (The Engine Code)
|
|
126
|
+
|
|
127
|
+
When a document enters the system, the Engine runs this exact logic loop:
|
|
128
|
+
|
|
129
|
+
1. **Load Policies:** Fetch all active YAML files (Local + Imported Packs).
|
|
130
|
+
2. **Sort Policies:** Order by `metadata.priority` (Descending).
|
|
131
|
+
3. **Iterate:**
|
|
132
|
+
* Run `match.conditions` for Policy 1.
|
|
133
|
+
* If **Match = True**:
|
|
134
|
+
* Stop iteration (First Match Wins strategy).
|
|
135
|
+
* Execute `extract`.
|
|
136
|
+
* **Derive Variables:** Run computed transformers (e.g., extract `year` from `due_date`).
|
|
137
|
+
* Validate `required` fields are present.
|
|
138
|
+
* Execute `actions`.
|
|
139
|
+
* Generate `manifest.json` sidecar file.
|
|
140
|
+
* If **Match = False**:
|
|
141
|
+
* Proceed to Policy 2.
|
|
142
|
+
4. **Fallback:** If no match found by end of list $\rightarrow$ Execute **Policy B (Inbox Zero)**.
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## 5. Advanced Features
|
|
147
|
+
|
|
148
|
+
### 5.1 Computed Variables (Transformers)
|
|
149
|
+
To keep paths clean, Folio automatically derives common variables from extracted data.
|
|
150
|
+
|
|
151
|
+
```yaml
|
|
152
|
+
# In section 2.3
|
|
153
|
+
extract:
|
|
154
|
+
- key: "bill_date"
|
|
155
|
+
type: "date"
|
|
156
|
+
transformers:
|
|
157
|
+
- name: "get_year"
|
|
158
|
+
as: "year" # Creates {year} for use in paths
|
|
159
|
+
- name: "get_month_name"
|
|
160
|
+
as: "month" # Creates {month} (e.g., "January")
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### 5.2 Multi-Page Splitting
|
|
164
|
+
If a policy is marked as `kind: Splitter`, it identifies page boundaries.
|
|
165
|
+
* **Strategy:** "LLM-Boundary" (LLM looks at first/last lines of pages to find new document headers).
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## 6. UX & Distribution Patterns
|
|
170
|
+
|
|
171
|
+
### 6.1 Policy Packs (The "Packs" System)
|
|
172
|
+
Policies can be bundled into JSON/YAML collections for easy sharing.
|
|
173
|
+
* **Discovery:** Users can "Search for Packs" (e.g., "Sunnyvale Utilities").
|
|
174
|
+
* **Pre-Configured:** Packs include verified regex/keywords for specific localized entities.
|
|
175
|
+
|
|
176
|
+
### 6.2 Natural Language Generation (AI-to-YAML)
|
|
177
|
+
The Folio UI provides a "Chat-to-Policy" interface.
|
|
178
|
+
* **Input:** "Put my Tesla invoices in a Car folder."
|
|
179
|
+
* **Logic:** Folio uses an internal LLM agent to:
|
|
180
|
+
1. Draft the `metadata.id` (e.g., `tesla-invoice`).
|
|
181
|
+
2. Select `match.conditions` (keyword: "Tesla").
|
|
182
|
+
3. Define `actions.move` (destination: "/Car/").
|
|
183
|
+
4. Persist the policy to the Supabase `policies` table (user-scoped via RLS).
|
|
184
|
+
|
|
185
|
+
---
|
|
186
|
+
|
|
187
|
+
## 7. Error Handling
|
|
188
|
+
* **Validation Failure:** If `total_amount` is required but not found $\rightarrow$ Trigger "Human Review" workflow.
|
|
189
|
+
* **Hallucination Check:** If extracted date is `2025` but document says `2023` $\rightarrow$ Flag as "Date Mismatch".
|
|
190
|
+
|
|
191
|
+
---
|
|
192
|
+
|
|
193
|
+
## 8. Development Guidelines for Policies
|
|
194
|
+
* **Idempotency:** Running a policy twice on the same file should result in the same outcome (no duplicate CSV rows).
|
|
195
|
+
* **Modularity:** Policies are self-contained. Deleting a policy simply stops Folio from recognizing that document type.
|
|
196
|
+
* **Readability:** Variable names in `extract` (`{total_amount}`) must match exactly in `actions` (`pattern: ...{total_amount}...`).
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
Product Requirement Document (PRD)
|
|
2
|
+
|
|
3
|
+
**Project Name:** Folio
|
|
4
|
+
**Version:** 1.0
|
|
5
|
+
**Status:** Draft
|
|
6
|
+
**Owner:** Product Lead (You)
|
|
7
|
+
|
|
8
|
+
## 1. Executive Summary
|
|
9
|
+
Folio is an AI-powered "Chief of Staff" for personal documentation. It automates the lifecycle of physical and digital documents—from ingestion to data extraction—eliminating the manual labor of sorting mail, renaming files, and typing data into spreadsheets.
|
|
10
|
+
|
|
11
|
+
## 2. Problem Statement
|
|
12
|
+
Residents and professionals receive high volumes of physical mail and digital invoices.
|
|
13
|
+
* **The Bottleneck:** Manual digitization (scanning) is disconnected from organization (filing).
|
|
14
|
+
* **The Risk:** Critical documents (Tax forms, legal notices) are lost in "Downloads" folders or physical piles.
|
|
15
|
+
* **The Waste:** Valuable data (medical expenses, tax deductions) remains trapped on paper, requiring manual entry during tax season.
|
|
16
|
+
|
|
17
|
+
## 3. User Personas
|
|
18
|
+
* **The "Sunnyvale Local":** High net worth, complex taxes (RSUs, Investments), receives 20+ pieces of mail/week. Values time over money. Privacy-conscious.
|
|
19
|
+
* **The Expat/Immigrant:** Dealings with visa documents, government notices, and international banking. Needs precise record-keeping.
|
|
20
|
+
|
|
21
|
+
## 4. Functional Requirements
|
|
22
|
+
|
|
23
|
+
### 4.1 Ingestion (The Funnel)
|
|
24
|
+
* **FR-01: Network Scanner Watch:** System must poll a local network folder (SMB/FTP) designated for a physical scanner (e.g., Fujitsu ScanSnap).
|
|
25
|
+
* **FR-02: Email Watch:** System must monitor a specific email alias (e.g., `docs@folio.local`) for digital invoices.
|
|
26
|
+
* **FR-03: Cloud Watch:** System must monitor a "Drop Zone" in Google Drive/Dropbox.
|
|
27
|
+
|
|
28
|
+
### 4.2 Classification (The Brain)
|
|
29
|
+
* **FR-04: Doc Type Detection:** AI must classify documents into user-defined categories: *Tax, Legal, Medical, Utility, Insurance, Personal, Junk.*
|
|
30
|
+
* **FR-05: Junk Filtering:** System must identify "Marketing/Spam" with >99% accuracy and move to a `/Trash` folder for auto-deletion after 30 days.
|
|
31
|
+
* **FR-06: Multi-Page Splitting:** If a 20-page PDF contains 3 different letters, the system must split them into 3 separate files.
|
|
32
|
+
|
|
33
|
+
### 4.3 Organization (The Librarian)
|
|
34
|
+
* **FR-07: Smart Renaming:** Files must be renamed using a consistent convention: `YYYY-MM-DD_[Entity]_[Type]_[Amount/Ref].pdf`.
|
|
35
|
+
* **FR-08: Dynamic Directory Routing:** Files must be moved to a directory structure based on content:
|
|
36
|
+
* *Input:* PG&E Bill $\rightarrow$ *Output:* `/Utilities/Electricity/2024/`
|
|
37
|
+
* *Input:* W-2 Form $\rightarrow$ *Output:* `/Financial/Taxes/2024/Income/`
|
|
38
|
+
|
|
39
|
+
### 4.4 Data Extraction (The Analyst)
|
|
40
|
+
* **FR-09: Entity Extraction:** System must extract specific fields based on document type (Amount, Due Date, Tax Year, Box 1 Wages).
|
|
41
|
+
* **FR-10: CSV/Spreadsheet Append:** Extracted data must be appended to a "Master Ledger" (CSV or Google Sheet).
|
|
42
|
+
* **FR-11: Calendar Integration:** If a document has a future "Due Date" or "Appearance Date," create a calendar event (via `.ics` file or API).
|
|
43
|
+
|
|
44
|
+
## 5. Non-Functional Requirements
|
|
45
|
+
* **NFR-01: Privacy:** No data sent to public LLM APIs without PII scrubbing OR use of Zero-Retention enterprise APIs.
|
|
46
|
+
* **NFR-02: Latency:** Document processing time < 60 seconds per page.
|
|
47
|
+
* **NFR-03: Reliability:** "Human-in-the-loop" folder for documents with low classification confidence (<80%).
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# Foundation Checklist
|
|
2
|
+
|
|
3
|
+
## Local App
|
|
4
|
+
|
|
5
|
+
- [x] Vite React frontend shell
|
|
6
|
+
- [x] Local Express API server
|
|
7
|
+
- [x] Dev and build scripts
|
|
8
|
+
- [x] CLI wrappers (`folio`, `folio-setup`, `folio-deploy`)
|
|
9
|
+
|
|
10
|
+
## Supabase
|
|
11
|
+
|
|
12
|
+
- [x] Supabase config
|
|
13
|
+
- [x] Initial schema migration
|
|
14
|
+
- [x] Migration timestamp RPC
|
|
15
|
+
- [x] RLS policies for all baseline tables
|
|
16
|
+
- [x] Minimal edge function (`api-v1-settings`)
|
|
17
|
+
|
|
18
|
+
## RealTimeX SDK
|
|
19
|
+
|
|
20
|
+
- [x] SDK initialization service
|
|
21
|
+
- [x] availability checks
|
|
22
|
+
- [x] default provider selection
|
|
23
|
+
- [x] local processing dispatch stub route
|
|
24
|
+
|
|
25
|
+
## Gaps Before Feature Work
|
|
26
|
+
|
|
27
|
+
- [ ] Add typed DB client generation
|
|
28
|
+
- [x] Add CI workflow (typecheck/test/build/lint)
|
|
29
|
+
- [x] Add auth/session wiring in frontend
|
|
30
|
+
- [x] Add setup wizard UX parity with email-automator
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
# Folio Hybrid Routing Architecture
|
|
2
|
+
|
|
3
|
+
This document describes Folio's document ingestion pipeline — from raw file upload through triage, AI extraction, policy matching, and final actuation. The architecture is designed around a single principle: **spend the minimum compute necessary to make a fully-informed decision.**
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Architecture Diagram
|
|
8
|
+
|
|
9
|
+
```mermaid
|
|
10
|
+
graph TD
|
|
11
|
+
classDef folio fill:#4f46e5,stroke:#312e81,color:white;
|
|
12
|
+
classDef realtimex fill:#ea580c,stroke:#9a3412,color:white;
|
|
13
|
+
classDef db fill:#059669,stroke:#064e3b,color:white;
|
|
14
|
+
classDef llm fill:#7c3aed,stroke:#4c1d95,color:white;
|
|
15
|
+
|
|
16
|
+
A["New File Ingestion (UI Upload / Watcher)"] --> B{Extension Gate}
|
|
17
|
+
|
|
18
|
+
subgraph FolioFast["Folio — Local Node.js (Fast Path)"]
|
|
19
|
+
B -->|"txt md csv json"| TE["Extract Text from Buffer"]
|
|
20
|
+
B -->|"pdf"| ST{"Smart PDF Triage\n4-Signal Classifier"}
|
|
21
|
+
ST -->|"All 4 signals pass"| TE
|
|
22
|
+
|
|
23
|
+
TE --> S1["① Baseline Extraction · LLM Call 1 of max 2\ndocument_type · issuer · date · amount\nsubject · tags[ ] · _uncertain_fields[ ]"]
|
|
24
|
+
|
|
25
|
+
S1 --> PS{"② Policy Scoring\nZero LLM calls — deterministic\nMatch baseline entities vs. conditions"}
|
|
26
|
+
|
|
27
|
+
PS -->|"score > 0.75\nDEFINITE MATCH"| DA["Augment with policy-specific\nfields not already in baseline"]
|
|
28
|
+
PS -->|"0.3 ≤ score ≤ 0.75\nUNCERTAIN"| DC["③ Targeted Deep Call · LLM Call 2 of max 2\nResolves only the blocking fields\nfor the top candidate policy"]
|
|
29
|
+
PS -->|"score < 0.3\nDEFINITE NO-MATCH"| NM["No Match\nBaseline entities persisted"]
|
|
30
|
+
|
|
31
|
+
DC --> RS{"Re-score candidate\nafter deep extraction"}
|
|
32
|
+
RS -->|"Match confirmed"| DA
|
|
33
|
+
RS -->|"Still no match"| NM
|
|
34
|
+
|
|
35
|
+
DA --> ACT["Actuator\nmove · rename · log_csv · notify"]
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
subgraph HeavyPath["Local Dropzone (Heavy Path)"]
|
|
39
|
+
B -->|"images / binaries"| DZ["Write to Physical\nDropzone Folder"]
|
|
40
|
+
ST -->|"Any signal fails\nscanned / encrypted PDF"| DZ
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
subgraph DBLayer["Supabase Database"]
|
|
44
|
+
DZ --> RTA[("rtx_activities\nstatus: pending")]
|
|
45
|
+
ACT --> ING[("ingestions\nstatus: matched\nextracted: full entities")]
|
|
46
|
+
NM --> ING2[("ingestions\nstatus: no_match\nextracted: baseline entities")]
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
subgraph RTXWorker["RealTimeX Desktop — GPU Worker"]
|
|
50
|
+
RTA -->|"Claim Task"| WQ["Worker Queue"]
|
|
51
|
+
WQ --> OCR["Docling OCR / VLM Processing"]
|
|
52
|
+
OCR --> SO["Structured JSON Output"]
|
|
53
|
+
SO -->|"RPC: Complete Task"| RTA
|
|
54
|
+
RTA -.->|"Realtime subscription"| RACT["Folio Actuator\nPolicy Match + Execute"]
|
|
55
|
+
RACT --> ING
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
class A,B,TE,ST,PS,DA,RS,NM,ACT folio;
|
|
59
|
+
class S1,DC llm;
|
|
60
|
+
class WQ,OCR,SO,RACT realtimex;
|
|
61
|
+
class RTA,ING,ING2 db;
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
## Stage 0 — Triage
|
|
67
|
+
|
|
68
|
+
Before any AI work is done, the file is routed by two cheap, zero-LLM checks.
|
|
69
|
+
|
|
70
|
+
### Extension Gate
|
|
71
|
+
|
|
72
|
+
| Extension | Route |
|
|
73
|
+
|---|---|
|
|
74
|
+
| `.txt` `.md` `.csv` `.json` | Fast Path — text extracted directly from the upload buffer |
|
|
75
|
+
| `.pdf` | Passes to Smart PDF Triage |
|
|
76
|
+
| Everything else (images, `.docx`, binaries) | Heavy Path — written to the physical Dropzone |
|
|
77
|
+
|
|
78
|
+
### Smart PDF Triage — 4-Signal Classifier
|
|
79
|
+
|
|
80
|
+
A PDF hits the 4-signal classifier before any LLM is invoked. **All four signals must pass** for the document to take the Fast Path. A single failure routes to Heavy Path.
|
|
81
|
+
|
|
82
|
+
| Signal | Threshold | What it catches |
|
|
83
|
+
|---|---|---|
|
|
84
|
+
| **1. Normalized content length** | ≥ 100 chars after whitespace collapse | Truly empty or near-empty extractions |
|
|
85
|
+
| **2. Word count** (Unicode `\p{L}`) | ≥ 20 word-like tokens | Digit-only / symbol-only garbage; also handles non-Latin scripts without bias |
|
|
86
|
+
| **3. Garbage character ratio** | < 2% control chars + `U+FFFD` | Image bytes mis-decoded as text — the fingerprint of scanned or font-subset PDFs |
|
|
87
|
+
| **4. Page coverage** (multi-page only) | ≥ 40% of pages yield > 30 non-whitespace chars | Mixed scan+digital docs (e.g. a scanned contract with a text cover page) |
|
|
88
|
+
|
|
89
|
+
PDFs that fail triage (scanned images, encrypted, font-embedded) are written to the Dropzone and delegated to the RealTimeX GPU worker for full OCR.
|
|
90
|
+
|
|
91
|
+
---
|
|
92
|
+
|
|
93
|
+
## Stage 1 — Baseline Extraction (LLM Call 1 of max 2)
|
|
94
|
+
|
|
95
|
+
Once a document is on the Fast Path, **the first thing that always happens is a single LLM call** to extract a fixed baseline schema from the raw text. This runs unconditionally — before any policy is evaluated.
|
|
96
|
+
|
|
97
|
+
```ts
|
|
98
|
+
interface BaselineEntities {
|
|
99
|
+
document_type: string; // "invoice" | "contract" | "receipt" | "report" | ...
|
|
100
|
+
issuer: string | null; // who sent / issued the document
|
|
101
|
+
recipient: string | null; // who it is addressed to
|
|
102
|
+
date: string | null; // primary date (ISO 8601)
|
|
103
|
+
amount: number | null; // monetary value if present
|
|
104
|
+
currency: string | null; // "USD" | "EUR" | ...
|
|
105
|
+
subject: string; // one-line description of the document
|
|
106
|
+
tags: string[]; // semantic labels: ["subscription", "renewal", "tax", ...]
|
|
107
|
+
_uncertain_fields: string[]; // fields the model flagged as ambiguous or missing
|
|
108
|
+
}
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
The `tags[]` array is the most powerful field for policy matching — it lets the model assign semantic labels during extraction so that downstream matching can be deterministic rather than requiring additional LLM calls.
|
|
112
|
+
|
|
113
|
+
`_uncertain_fields` is the signal the engine uses in Stage 3 to decide whether a second call is justified.
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## Stage 2 — Policy Scoring (Zero LLM calls)
|
|
118
|
+
|
|
119
|
+
After baseline extraction, each enabled user policy is scored deterministically against the extracted entities. **No LLM is invoked at this stage.**
|
|
120
|
+
|
|
121
|
+
Policy conditions are evaluated as follows:
|
|
122
|
+
|
|
123
|
+
| Condition type | Evaluation against baseline |
|
|
124
|
+
|---|---|
|
|
125
|
+
| `keyword` | Text scan on raw document text (unchanged) |
|
|
126
|
+
| `entity_equals` | Exact match on a baseline field (`document_type === "invoice"`) |
|
|
127
|
+
| `entity_contains` | Substring / array membership check (`tags.includes("renewal")`) |
|
|
128
|
+
| `llm_verify` | Cannot be resolved from baseline — treated as an open question, lowers the policy score |
|
|
129
|
+
|
|
130
|
+
Each policy receives a score `[0, 1]` based on how many of its conditions are satisfied and how many are unresolvable.
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## Stage 3 — Confidence Decision
|
|
135
|
+
|
|
136
|
+
The highest-scoring candidate policy falls into one of three zones:
|
|
137
|
+
|
|
138
|
+
```
|
|
139
|
+
DEFINITE NO UNCERTAIN ZONE DEFINITE YES
|
|
140
|
+
score < 0.3 0.3 ≤ score ≤ 0.75 score > 0.75
|
|
141
|
+
│ │ │
|
|
142
|
+
No match. Fire Targeted Direct match.
|
|
143
|
+
Save baseline. Deep Call (Stage 3b) Augment & execute.
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Stage 3b — Targeted Deep Call (LLM Call 2 of max 2, conditional)
|
|
147
|
+
|
|
148
|
+
The deep call only fires when **all three conditions hold**:
|
|
149
|
+
|
|
150
|
+
1. A candidate policy exists with score in the uncertain zone
|
|
151
|
+
2. Its score is higher than the current best confirmed match (if any)
|
|
152
|
+
3. The blocking fields are plausibly present in the document (no point asking for `invoice_number` if `document_type` came back as `"presentation"`)
|
|
153
|
+
|
|
154
|
+
The prompt for the deep call is surgically constructed from the candidate's open questions — not a repeat of baseline:
|
|
155
|
+
|
|
156
|
+
> *"We believe this may be a [Stripe Invoice] (confidence: 0.61). To confirm, extract only the following fields that remain unresolved: `subscription_plan`, `invoice_number`, `billing_period`. Return null for fields not present."*
|
|
157
|
+
|
|
158
|
+
After the deep call, the candidate policy is re-scored. It either confirms to a definite match or drops to a definite no-match. The LLM budget is exhausted after this call — no further inference occurs.
|
|
159
|
+
|
|
160
|
+
**LLM call budget per document: max 2.**
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
## Stage 4 — Actuation
|
|
165
|
+
|
|
166
|
+
Regardless of outcome, **baseline entities are always persisted** on the `ingestions` record. A document is never left with an empty `extracted` field.
|
|
167
|
+
|
|
168
|
+
| Outcome | `ingestions.status` | `ingestions.extracted` |
|
|
169
|
+
|---|---|---|
|
|
170
|
+
| Definite match (Stage 3) | `matched` | Baseline + policy-specific fields |
|
|
171
|
+
| Match after deep call (Stage 3b) | `matched` | Baseline + deep call fields |
|
|
172
|
+
| No match | `no_match` | Baseline entities only |
|
|
173
|
+
|
|
174
|
+
For matched documents, the **Actuator** executes the policy's declared actions: `move`, `rename`, `log_csv`, `notify`.
|
|
175
|
+
|
|
176
|
+
For Heavy Path documents processed by RealTimeX, the same Actuation stage runs after the GPU worker completes OCR and returns structured JSON via the `rtx_activities` RPC.
|
|
177
|
+
|
|
178
|
+
---
|
|
179
|
+
|
|
180
|
+
## Heavy Path — RealTimeX GPU Worker
|
|
181
|
+
|
|
182
|
+
Documents that fail triage (scanned PDFs, images, encrypted files) are written to the user's configured Dropzone folder and a `rtx_activities` record is inserted with `status: pending`. The RealTimeX Desktop app:
|
|
183
|
+
|
|
184
|
+
1. Polls for pending tasks via Supabase Realtime
|
|
185
|
+
2. Claims the task (`rtx_fn_claim_task`) with a machine ID lock
|
|
186
|
+
3. Reads the file from the physical `file_path` in the payload
|
|
187
|
+
4. Runs it through the Docling OCR / VLM pipeline
|
|
188
|
+
5. Calls `rtx_fn_complete_task` with the structured JSON result
|
|
189
|
+
6. Folio's Realtime subscription picks up the completion and routes it through the same Actuation stage as the Fast Path
|
|
190
|
+
|
|
191
|
+
Stale locks (tasks claimed but not completed within 5 minutes) are automatically released by a `pg_cron` job every minute.
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
## Design Principles
|
|
196
|
+
|
|
197
|
+
**Extract-first, match-second.** Baseline extraction runs before any policy is evaluated. This ensures matching operates on structured entities, not raw text, eliminating redundant LLM calls for questions the baseline already answers.
|
|
198
|
+
|
|
199
|
+
**Spend compute where decisions are uncertain.** The second LLM call is gated behind a confidence check. Definite matches and definite non-matches never need it. Only genuinely ambiguous documents pay for deeper inference.
|
|
200
|
+
|
|
201
|
+
**Entities are always the output.** Every document that touches the Fast Path emerges with structured baseline entities — whether it matches a policy or not. This supports future search, audit, and retrospective policy creation.
|
|
202
|
+
|
|
203
|
+
**GPU resources are reserved for what requires them.** The 4-signal PDF classifier prevents digital text documents from ever reaching the GPU worker. OCR is expensive; it only runs when the document is genuinely image-based.
|
|
204
|
+
|
|
205
|
+
**LLM budget is bounded and explicit.** Maximum 2 LLM calls per document. The pipeline never recurses or speculates beyond a single deep call.
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
### 1. The Ingestion Architecture: "Local Microservices" Pattern
|
|
2
|
+
|
|
3
|
+
Think of `realtimex.ai` desktop app as your backend API, even though it's running on the same machine.
|
|
4
|
+
|
|
5
|
+
* **Folio (The Client):** Watcher script + UI + Database connector.
|
|
6
|
+
* **RealTimeX (The Server):** The GPU-accelerated runtime hosting Docling and the VLM
|
|
7
|
+
|
|
8
|
+
### 2. The Ingestion Workflow (Step-by-Step)
|
|
9
|
+
|
|
10
|
+
#### Step 0: The Dropzone
|
|
11
|
+
* Folio does **not** store raw document bytes in the database.
|
|
12
|
+
* Folio creates a physical "Dropzone" folder on the local machine (e.g., `~/.realtimex/folio/dropzone` or a configured `storage_path`).
|
|
13
|
+
|
|
14
|
+
#### Step 1: Ingestion (Folio)
|
|
15
|
+
* Folio detects or receives `scan_001.pdf` via the UI or Watcher.
|
|
16
|
+
* Folio saves the physical file into the **Dropzone**.
|
|
17
|
+
* Folio generates a `task_id` and inserts a task into the `rtx_activities` table (Compatible Mode).
|
|
18
|
+
* **Crucially**: The `raw_data` JSON only contains a metadata pointer to the physical file: `{ "file_path": "/Users/local/.realtimex/folio/dropzone/scan_001.pdf" }`.
|
|
19
|
+
|
|
20
|
+
#### Step 2: The Handshake (Folio $\rightarrow$ RealTimeX SDK)
|
|
21
|
+
Folio uses the RealTimeX SDK (via Compatible Mode `rtx_fn_claim_task`) to queue the work. The extraction schema (derived from the Policy) is also provided so the LLM output is strictly typed JSON.
|
|
22
|
+
|
|
23
|
+
#### Step 3: Execution (RealTimeX Runtime)
|
|
24
|
+
* The standalone RealTimeX Desktop app picks up the `rtx_activities` task.
|
|
25
|
+
* **Layer 1 (Docling):** RealTimeX reads the physical file from the Dropzone pointer and runs Docling to convert it into structured Markdown.
|
|
26
|
+
* **Layer 2 (LLM Inference):** RealTimeX feeds that Markdown into the LLM with the JSON Schema.
|
|
27
|
+
* **Return:** RealTimeX returns the unified extraction and updates `rtx_activities.result` (via `rtx_fn_complete_task`).
|
|
28
|
+
|
|
29
|
+
#### Step 4: Persistence & Actuation (Folio)
|
|
30
|
+
Folio detects the completed task.
|
|
31
|
+
|
|
32
|
+
* `rtx_activities.result` contains the extracted JSON.
|
|
33
|
+
* Folio matches the JSON data against the active Policy.
|
|
34
|
+
* Folio executes the Policy Actions (e.g., moving the physical file out of the Dropzone into the final `Taxes/2026` folder).
|
|
35
|
+
* `entities` table (JSONB column): Stores the extracted data (`{ amount: 145.20 }`).
|
|
36
|
+
* **Vector Embeddings:** If RealTimeX supports it, ask for embeddings of the Markdown during Step 3. Save these to Supabase `pgvector` for semantic search later ("Show me all bills from last winter").
|
|
37
|
+
|
|
38
|
+
### 3. Critical Considerations for this Setup
|
|
39
|
+
|
|
40
|
+
#### A. Asynchronous Queuing
|
|
41
|
+
Docling + LLM is **slow** (comparatively). It might take 5–15 seconds per page depending on the GPU.
|
|
42
|
+
* **Do not block the Folio UI.**
|
|
43
|
+
* Folio should display a "Processing..." state.
|
|
44
|
+
* RealTimeX SDK likely supports streams or callbacks. Use them to show a progress bar.
|
|
45
|
+
|
|
46
|
+
#### B. Error Handling (The "Retry" Loop)
|
|
47
|
+
* If RealTimeX returns `null` or a hallucination (e.g., date is "tomorrow"), Folio needs a UI for the user to fix it.
|
|
48
|
+
* **The "Human-in-the-Loop":** The UI should show the PDF on the left and the extracted Form on the right. If confidence is low, highlight the field in Red.
|
|
49
|
+
|
|
50
|
+
### 4. Revised Architecture Diagram
|
|
51
|
+
|
|
52
|
+
```mermaid
|
|
53
|
+
graph LR
|
|
54
|
+
A[Upload/Watcher] -->|Save File| B(Local Dropzone)
|
|
55
|
+
A -->|Queue Task| C[(rtx_activities)]
|
|
56
|
+
|
|
57
|
+
subgraph "RealTimeX Runtime (Localhost)"
|
|
58
|
+
C -->|Claim Task| D[RealTimeX Worker]
|
|
59
|
+
D -->|Read File| B
|
|
60
|
+
D -->|PDF -> MD| E[Docling Engine]
|
|
61
|
+
E -->|MD + Schema| F[Local LLM]
|
|
62
|
+
F -->|Extracted JSON| G[Task Result]
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
G -->|Update Task| C
|
|
66
|
+
C -->|Trigger Action| H(Folio Actuator)
|
|
67
|
+
H -->|Move File| I[Organized Final Folder]
|
|
68
|
+
```
|
|
69
|
+
|