fin-infra 0.1.62__py3-none-any.whl → 0.1.82__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fin_infra/__init__.py +53 -3
- fin_infra/analytics/__init__.py +13 -2
- fin_infra/analytics/add.py +30 -32
- fin_infra/analytics/cash_flow.py +6 -5
- fin_infra/analytics/ease.py +19 -20
- fin_infra/analytics/portfolio.py +19 -26
- fin_infra/analytics/projections.py +1 -3
- fin_infra/analytics/rebalancing.py +2 -4
- fin_infra/analytics/savings.py +1 -1
- fin_infra/analytics/spending.py +15 -11
- fin_infra/banking/__init__.py +33 -31
- fin_infra/banking/history.py +11 -12
- fin_infra/banking/utils.py +116 -110
- fin_infra/brokerage/__init__.py +27 -27
- fin_infra/budgets/__init__.py +3 -3
- fin_infra/budgets/add.py +16 -17
- fin_infra/budgets/alerts.py +3 -3
- fin_infra/budgets/tracker.py +4 -5
- fin_infra/cashflows/__init__.py +8 -10
- fin_infra/cashflows/core.py +1 -1
- fin_infra/categorization/__init__.py +1 -1
- fin_infra/categorization/add.py +17 -19
- fin_infra/categorization/ease.py +3 -4
- fin_infra/categorization/engine.py +21 -18
- fin_infra/categorization/llm_layer.py +10 -10
- fin_infra/categorization/models.py +1 -1
- fin_infra/categorization/rules.py +2 -4
- fin_infra/categorization/taxonomy.py +2 -2
- fin_infra/chat/__init__.py +13 -22
- fin_infra/chat/planning.py +57 -1
- fin_infra/cli/cmds/scaffold_cmds.py +11 -12
- fin_infra/clients/__init__.py +23 -1
- fin_infra/clients/base.py +1 -1
- fin_infra/clients/plaid.py +2 -2
- fin_infra/compliance/__init__.py +7 -6
- fin_infra/credit/add.py +7 -7
- fin_infra/credit/experian/auth.py +3 -2
- fin_infra/credit/experian/client.py +2 -2
- fin_infra/credit/experian/provider.py +19 -19
- fin_infra/crypto/__init__.py +8 -10
- fin_infra/crypto/insights.py +5 -6
- fin_infra/documents/add.py +11 -13
- fin_infra/documents/analysis.py +9 -9
- fin_infra/documents/ease.py +18 -17
- fin_infra/documents/models.py +7 -7
- fin_infra/documents/ocr.py +8 -8
- fin_infra/documents/storage.py +23 -14
- fin_infra/exceptions.py +1 -2
- fin_infra/goals/__init__.py +8 -8
- fin_infra/goals/add.py +36 -36
- fin_infra/goals/funding.py +4 -6
- fin_infra/goals/management.py +6 -7
- fin_infra/goals/milestones.py +2 -3
- fin_infra/goals/models.py +7 -11
- fin_infra/insights/__init__.py +12 -10
- fin_infra/insights/aggregator.py +1 -1
- fin_infra/investments/__init__.py +14 -9
- fin_infra/investments/add.py +53 -73
- fin_infra/investments/ease.py +16 -13
- fin_infra/investments/models.py +135 -69
- fin_infra/investments/providers/base.py +9 -15
- fin_infra/investments/providers/plaid.py +70 -55
- fin_infra/investments/providers/snaptrade.py +35 -53
- fin_infra/markets/__init__.py +16 -11
- fin_infra/models/__init__.py +10 -10
- fin_infra/models/accounts.py +2 -1
- fin_infra/models/brokerage.py +2 -1
- fin_infra/models/candle.py +1 -0
- fin_infra/models/money.py +1 -0
- fin_infra/models/quotes.py +4 -3
- fin_infra/models/tax.py +2 -1
- fin_infra/models/transactions.py +4 -4
- fin_infra/net_worth/__init__.py +7 -0
- fin_infra/net_worth/add.py +8 -5
- fin_infra/net_worth/aggregator.py +9 -6
- fin_infra/net_worth/calculator.py +8 -6
- fin_infra/net_worth/ease.py +36 -15
- fin_infra/net_worth/insights.py +4 -5
- fin_infra/net_worth/models.py +237 -116
- fin_infra/normalization/__init__.py +17 -15
- fin_infra/normalization/providers/exchangerate.py +5 -5
- fin_infra/obs/classifier.py +3 -3
- fin_infra/providers/banking/plaid_client.py +23 -22
- fin_infra/providers/banking/teller_client.py +14 -7
- fin_infra/providers/base.py +131 -14
- fin_infra/providers/brokerage/alpaca.py +7 -7
- fin_infra/providers/credit/experian.py +5 -0
- fin_infra/providers/market/alphavantage.py +6 -11
- fin_infra/providers/market/ccxt_crypto.py +25 -4
- fin_infra/providers/market/coingecko.py +5 -6
- fin_infra/providers/market/yahoo.py +23 -8
- fin_infra/providers/tax/__init__.py +1 -1
- fin_infra/providers/tax/irs.py +1 -1
- fin_infra/providers/tax/mock.py +8 -8
- fin_infra/providers/tax/taxbit.py +1 -1
- fin_infra/recurring/__init__.py +6 -6
- fin_infra/recurring/add.py +24 -12
- fin_infra/recurring/detector.py +8 -8
- fin_infra/recurring/detectors_llm.py +14 -13
- fin_infra/recurring/ease.py +3 -5
- fin_infra/recurring/insights.py +20 -19
- fin_infra/recurring/models.py +3 -3
- fin_infra/recurring/normalizer.py +3 -2
- fin_infra/recurring/normalizers.py +11 -10
- fin_infra/recurring/summary.py +13 -15
- fin_infra/scaffold/__init__.py +1 -1
- fin_infra/scaffold/budgets.py +9 -9
- fin_infra/scaffold/goals.py +5 -5
- fin_infra/security/__init__.py +8 -8
- fin_infra/security/encryption.py +6 -6
- fin_infra/security/models.py +7 -7
- fin_infra/security/pii_filter.py +6 -6
- fin_infra/security/pii_patterns.py +1 -1
- fin_infra/security/token_store.py +3 -1
- fin_infra/settings.py +2 -1
- fin_infra/tax/__init__.py +2 -2
- fin_infra/tax/add.py +3 -2
- fin_infra/tax/tlh.py +5 -5
- fin_infra/utils/http.py +5 -3
- fin_infra/utils/retry.py +2 -1
- {fin_infra-0.1.62.dist-info → fin_infra-0.1.82.dist-info}/METADATA +14 -9
- fin_infra-0.1.82.dist-info/RECORD +180 -0
- fin_infra-0.1.62.dist-info/RECORD +0 -180
- {fin_infra-0.1.62.dist-info → fin_infra-0.1.82.dist-info}/LICENSE +0 -0
- {fin_infra-0.1.62.dist-info → fin_infra-0.1.82.dist-info}/WHEEL +0 -0
- {fin_infra-0.1.62.dist-info → fin_infra-0.1.82.dist-info}/entry_points.txt +0 -0
|
@@ -30,8 +30,8 @@ Example:
|
|
|
30
30
|
"""
|
|
31
31
|
|
|
32
32
|
import logging
|
|
33
|
-
from datetime import
|
|
34
|
-
from typing import Literal
|
|
33
|
+
from datetime import UTC, datetime
|
|
34
|
+
from typing import Literal, cast
|
|
35
35
|
|
|
36
36
|
from fin_infra.credit.experian.auth import ExperianAuthManager
|
|
37
37
|
from fin_infra.credit.experian.client import ExperianClient
|
|
@@ -174,10 +174,10 @@ class ExperianProvider(CreditProvider):
|
|
|
174
174
|
permissible_purpose = kwargs.get("permissible_purpose", "account_review")
|
|
175
175
|
requester_ip = kwargs.get("requester_ip", "unknown")
|
|
176
176
|
requester_user_id = kwargs.get("requester_user_id", "unknown")
|
|
177
|
-
|
|
177
|
+
|
|
178
178
|
# FCRA Audit Log - REQUIRED for regulatory compliance (15 USC § 1681b)
|
|
179
179
|
# This log must be retained for at least 2 years per FCRA requirements
|
|
180
|
-
timestamp = datetime.now(
|
|
180
|
+
timestamp = datetime.now(UTC).isoformat()
|
|
181
181
|
fcra_audit_logger.info(
|
|
182
182
|
"FCRA_CREDIT_PULL",
|
|
183
183
|
extra={
|
|
@@ -190,7 +190,7 @@ class ExperianProvider(CreditProvider):
|
|
|
190
190
|
"environment": self.environment,
|
|
191
191
|
"timestamp": timestamp,
|
|
192
192
|
"result": "pending",
|
|
193
|
-
}
|
|
193
|
+
},
|
|
194
194
|
)
|
|
195
195
|
|
|
196
196
|
try:
|
|
@@ -202,7 +202,7 @@ class ExperianProvider(CreditProvider):
|
|
|
202
202
|
|
|
203
203
|
# Parse response to CreditScore model
|
|
204
204
|
result = parse_credit_score(data, user_id=user_id)
|
|
205
|
-
|
|
205
|
+
|
|
206
206
|
# Log successful pull
|
|
207
207
|
fcra_audit_logger.info(
|
|
208
208
|
"FCRA_CREDIT_PULL_SUCCESS",
|
|
@@ -213,11 +213,11 @@ class ExperianProvider(CreditProvider):
|
|
|
213
213
|
"timestamp": timestamp,
|
|
214
214
|
"result": "success",
|
|
215
215
|
"score_returned": result.score is not None,
|
|
216
|
-
}
|
|
216
|
+
},
|
|
217
217
|
)
|
|
218
|
-
|
|
218
|
+
|
|
219
219
|
return result
|
|
220
|
-
|
|
220
|
+
|
|
221
221
|
except Exception as e:
|
|
222
222
|
# Log failed pull - still required for FCRA audit trail
|
|
223
223
|
fcra_audit_logger.warning(
|
|
@@ -229,7 +229,7 @@ class ExperianProvider(CreditProvider):
|
|
|
229
229
|
"timestamp": timestamp,
|
|
230
230
|
"result": "error",
|
|
231
231
|
"error_type": type(e).__name__,
|
|
232
|
-
}
|
|
232
|
+
},
|
|
233
233
|
)
|
|
234
234
|
raise
|
|
235
235
|
|
|
@@ -262,11 +262,11 @@ class ExperianProvider(CreditProvider):
|
|
|
262
262
|
permissible_purpose = kwargs.get("permissible_purpose", "account_review")
|
|
263
263
|
requester_ip = kwargs.get("requester_ip", "unknown")
|
|
264
264
|
requester_user_id = kwargs.get("requester_user_id", "unknown")
|
|
265
|
-
|
|
265
|
+
|
|
266
266
|
# FCRA Audit Log - REQUIRED for regulatory compliance (15 USC § 1681b)
|
|
267
267
|
# Full credit report pulls have stricter requirements than score-only pulls
|
|
268
268
|
# This log must be retained for at least 2 years per FCRA requirements
|
|
269
|
-
timestamp = datetime.now(
|
|
269
|
+
timestamp = datetime.now(UTC).isoformat()
|
|
270
270
|
fcra_audit_logger.info(
|
|
271
271
|
"FCRA_CREDIT_PULL",
|
|
272
272
|
extra={
|
|
@@ -280,7 +280,7 @@ class ExperianProvider(CreditProvider):
|
|
|
280
280
|
"timestamp": timestamp,
|
|
281
281
|
"result": "pending",
|
|
282
282
|
"report_type": "full",
|
|
283
|
-
}
|
|
283
|
+
},
|
|
284
284
|
)
|
|
285
285
|
|
|
286
286
|
try:
|
|
@@ -292,7 +292,7 @@ class ExperianProvider(CreditProvider):
|
|
|
292
292
|
|
|
293
293
|
# Parse response to CreditReport model
|
|
294
294
|
result = parse_credit_report(data, user_id=user_id)
|
|
295
|
-
|
|
295
|
+
|
|
296
296
|
# Log successful pull
|
|
297
297
|
fcra_audit_logger.info(
|
|
298
298
|
"FCRA_CREDIT_PULL_SUCCESS",
|
|
@@ -304,11 +304,11 @@ class ExperianProvider(CreditProvider):
|
|
|
304
304
|
"result": "success",
|
|
305
305
|
"accounts_returned": len(result.accounts) if result.accounts else 0,
|
|
306
306
|
"inquiries_returned": len(result.inquiries) if result.inquiries else 0,
|
|
307
|
-
}
|
|
307
|
+
},
|
|
308
308
|
)
|
|
309
|
-
|
|
309
|
+
|
|
310
310
|
return result
|
|
311
|
-
|
|
311
|
+
|
|
312
312
|
except Exception as e:
|
|
313
313
|
# Log failed pull - still required for FCRA audit trail
|
|
314
314
|
fcra_audit_logger.warning(
|
|
@@ -320,7 +320,7 @@ class ExperianProvider(CreditProvider):
|
|
|
320
320
|
"timestamp": timestamp,
|
|
321
321
|
"result": "error",
|
|
322
322
|
"error_type": type(e).__name__,
|
|
323
|
-
}
|
|
323
|
+
},
|
|
324
324
|
)
|
|
325
325
|
raise
|
|
326
326
|
|
|
@@ -360,4 +360,4 @@ class ExperianProvider(CreditProvider):
|
|
|
360
360
|
signature_key=signature_key,
|
|
361
361
|
)
|
|
362
362
|
|
|
363
|
-
return data.get("subscriptionId", "unknown")
|
|
363
|
+
return cast("str", data.get("subscriptionId", "unknown"))
|
fin_infra/crypto/__init__.py
CHANGED
|
@@ -13,7 +13,7 @@ Quick start:
|
|
|
13
13
|
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
|
-
from datetime import
|
|
16
|
+
from datetime import UTC, datetime
|
|
17
17
|
from typing import TYPE_CHECKING, Literal
|
|
18
18
|
|
|
19
19
|
if TYPE_CHECKING:
|
|
@@ -70,11 +70,11 @@ def easy_crypto(
|
|
|
70
70
|
return CoinGeckoCryptoData()
|
|
71
71
|
|
|
72
72
|
else:
|
|
73
|
-
raise ValueError(f"Unknown crypto data provider: {provider_name}.
|
|
73
|
+
raise ValueError(f"Unknown crypto data provider: {provider_name}. Supported: coingecko")
|
|
74
74
|
|
|
75
75
|
|
|
76
76
|
def add_crypto_data(
|
|
77
|
-
app:
|
|
77
|
+
app: FastAPI,
|
|
78
78
|
*,
|
|
79
79
|
provider: str | CryptoDataProvider | None = None,
|
|
80
80
|
prefix: str = "/crypto",
|
|
@@ -131,9 +131,9 @@ def add_crypto_data(
|
|
|
131
131
|
>>> add_observability(app)
|
|
132
132
|
>>> crypto = add_crypto_data(app)
|
|
133
133
|
"""
|
|
134
|
-
from svc_infra.api.fastapi.dual.public import public_router
|
|
135
|
-
from svc_infra.api.fastapi.docs.scoped import add_prefixed_docs
|
|
136
134
|
from fastapi import HTTPException, Query
|
|
135
|
+
from svc_infra.api.fastapi.docs.scoped import add_prefixed_docs
|
|
136
|
+
from svc_infra.api.fastapi.dual.public import public_router
|
|
137
137
|
|
|
138
138
|
# Initialize provider if string or None
|
|
139
139
|
if isinstance(provider, str):
|
|
@@ -168,11 +168,11 @@ def add_crypto_data(
|
|
|
168
168
|
"price": float(ticker.price),
|
|
169
169
|
"as_of": ticker.as_of.isoformat()
|
|
170
170
|
if ticker.as_of
|
|
171
|
-
else datetime.now(
|
|
171
|
+
else datetime.now(UTC).isoformat(),
|
|
172
172
|
}
|
|
173
173
|
except Exception as e:
|
|
174
174
|
raise HTTPException(
|
|
175
|
-
status_code=400, detail=f"Error fetching ticker for {symbol}: {
|
|
175
|
+
status_code=400, detail=f"Error fetching ticker for {symbol}: {e!s}"
|
|
176
176
|
)
|
|
177
177
|
|
|
178
178
|
@router.get("/ohlcv/{symbol}")
|
|
@@ -216,9 +216,7 @@ def add_crypto_data(
|
|
|
216
216
|
],
|
|
217
217
|
}
|
|
218
218
|
except Exception as e:
|
|
219
|
-
raise HTTPException(
|
|
220
|
-
status_code=400, detail=f"Error fetching OHLCV for {symbol}: {str(e)}"
|
|
221
|
-
)
|
|
219
|
+
raise HTTPException(status_code=400, detail=f"Error fetching OHLCV for {symbol}: {e!s}")
|
|
222
220
|
|
|
223
221
|
# Mount router
|
|
224
222
|
app.include_router(router, include_in_schema=True)
|
fin_infra/crypto/insights.py
CHANGED
|
@@ -8,6 +8,7 @@ CRITICAL: Uses ai-infra.llm.LLM (NEVER custom LLM clients).
|
|
|
8
8
|
|
|
9
9
|
from __future__ import annotations
|
|
10
10
|
|
|
11
|
+
import logging
|
|
11
12
|
from datetime import datetime
|
|
12
13
|
from decimal import Decimal
|
|
13
14
|
from typing import TYPE_CHECKING
|
|
@@ -17,6 +18,8 @@ from pydantic import BaseModel, Field
|
|
|
17
18
|
if TYPE_CHECKING:
|
|
18
19
|
from ai_infra.llm import LLM
|
|
19
20
|
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
20
23
|
|
|
21
24
|
class CryptoInsight(BaseModel):
|
|
22
25
|
"""Personalized cryptocurrency insight.
|
|
@@ -258,10 +261,8 @@ Provide your insight:"""
|
|
|
258
261
|
|
|
259
262
|
try:
|
|
260
263
|
# Use natural language conversation (no output_schema)
|
|
261
|
-
# Note: In tests, achat is mocked with messages= parameter
|
|
262
|
-
# In production, this should use user_msg, provider, model_name parameters
|
|
263
264
|
response = await llm.achat(
|
|
264
|
-
|
|
265
|
+
user_msg=prompt,
|
|
265
266
|
)
|
|
266
267
|
|
|
267
268
|
# Parse response text
|
|
@@ -286,8 +287,6 @@ Provide your insight:"""
|
|
|
286
287
|
)
|
|
287
288
|
)
|
|
288
289
|
except Exception as e:
|
|
289
|
-
|
|
290
|
-
# In production, use svc-infra logging
|
|
291
|
-
print(f"Warning: LLM insight generation failed: {e}")
|
|
290
|
+
logger.warning("LLM insight generation failed: %s", e)
|
|
292
291
|
|
|
293
292
|
return insights
|
fin_infra/documents/add.py
CHANGED
|
@@ -23,23 +23,22 @@ Quick Start:
|
|
|
23
23
|
|
|
24
24
|
from __future__ import annotations
|
|
25
25
|
|
|
26
|
-
from typing import TYPE_CHECKING
|
|
26
|
+
from typing import TYPE_CHECKING
|
|
27
27
|
|
|
28
28
|
if TYPE_CHECKING:
|
|
29
29
|
from fastapi import FastAPI
|
|
30
|
-
|
|
31
30
|
from svc_infra.storage.base import StorageBackend
|
|
32
31
|
|
|
33
32
|
from .ease import FinancialDocumentManager
|
|
34
33
|
|
|
35
34
|
|
|
36
35
|
def add_documents(
|
|
37
|
-
app:
|
|
38
|
-
storage:
|
|
36
|
+
app: FastAPI,
|
|
37
|
+
storage: StorageBackend | None = None,
|
|
39
38
|
default_ocr_provider: str = "tesseract",
|
|
40
39
|
prefix: str = "/documents",
|
|
41
|
-
tags:
|
|
42
|
-
) ->
|
|
40
|
+
tags: list[str] | None = None,
|
|
41
|
+
) -> FinancialDocumentManager:
|
|
43
42
|
"""
|
|
44
43
|
Add financial document management endpoints to FastAPI app.
|
|
45
44
|
|
|
@@ -87,12 +86,12 @@ def add_documents(
|
|
|
87
86
|
- Stores manager on app.state.financial_documents
|
|
88
87
|
"""
|
|
89
88
|
from fastapi import HTTPException
|
|
90
|
-
|
|
91
89
|
from svc_infra.api.fastapi.dual.protected import user_router
|
|
92
90
|
|
|
93
91
|
# Import svc-infra base function to mount base endpoints (with fallback)
|
|
94
92
|
try:
|
|
95
93
|
from svc_infra.documents import add_documents as add_base_documents
|
|
94
|
+
|
|
96
95
|
HAS_SVC_INFRA_DOCUMENTS = True
|
|
97
96
|
except ImportError:
|
|
98
97
|
# Fallback for older svc-infra versions - skip base endpoints
|
|
@@ -104,16 +103,17 @@ def add_documents(
|
|
|
104
103
|
|
|
105
104
|
# Step 1: Mount base endpoints (upload, list, get, delete) via svc-infra
|
|
106
105
|
# This returns the base DocumentManager, but we'll create our own FinancialDocumentManager
|
|
107
|
-
if HAS_SVC_INFRA_DOCUMENTS and add_base_documents:
|
|
106
|
+
if HAS_SVC_INFRA_DOCUMENTS and add_base_documents is not None:
|
|
108
107
|
add_base_documents(app, storage_backend=storage, prefix=prefix, tags=tags)
|
|
109
108
|
else:
|
|
110
109
|
# Legacy mode: mount basic endpoints inline (for svc-infra < 0.1.668)
|
|
111
110
|
import warnings
|
|
111
|
+
|
|
112
112
|
warnings.warn(
|
|
113
113
|
"svc_infra.documents not found. Using legacy document endpoints. "
|
|
114
114
|
"Please upgrade svc-infra to >=0.1.668 for full functionality.",
|
|
115
115
|
DeprecationWarning,
|
|
116
|
-
stacklevel=2
|
|
116
|
+
stacklevel=2,
|
|
117
117
|
)
|
|
118
118
|
|
|
119
119
|
# Step 2: Create financial document manager with OCR/AI capabilities
|
|
@@ -126,7 +126,7 @@ def add_documents(
|
|
|
126
126
|
@router.post("/{document_id}/ocr", response_model=OCRResult)
|
|
127
127
|
async def extract_text_ocr(
|
|
128
128
|
document_id: str,
|
|
129
|
-
provider:
|
|
129
|
+
provider: str | None = None,
|
|
130
130
|
force_refresh: bool = False,
|
|
131
131
|
) -> OCRResult:
|
|
132
132
|
"""
|
|
@@ -210,9 +210,7 @@ def add_documents(
|
|
|
210
210
|
```
|
|
211
211
|
"""
|
|
212
212
|
try:
|
|
213
|
-
return await manager.analyze(
|
|
214
|
-
document_id=document_id, force_refresh=force_refresh
|
|
215
|
-
)
|
|
213
|
+
return await manager.analyze(document_id=document_id, force_refresh=force_refresh)
|
|
216
214
|
except ValueError as e:
|
|
217
215
|
raise HTTPException(status_code=404, detail=str(e))
|
|
218
216
|
|
fin_infra/documents/analysis.py
CHANGED
|
@@ -24,7 +24,7 @@ from __future__ import annotations
|
|
|
24
24
|
|
|
25
25
|
import re
|
|
26
26
|
from datetime import datetime
|
|
27
|
-
from typing import TYPE_CHECKING
|
|
27
|
+
from typing import TYPE_CHECKING
|
|
28
28
|
|
|
29
29
|
if TYPE_CHECKING:
|
|
30
30
|
from svc_infra.storage.base import StorageBackend
|
|
@@ -32,14 +32,14 @@ if TYPE_CHECKING:
|
|
|
32
32
|
from .models import DocumentAnalysis
|
|
33
33
|
|
|
34
34
|
# In-memory analysis cache (production: use svc-infra cache)
|
|
35
|
-
_analysis_cache:
|
|
35
|
+
_analysis_cache: dict[str, DocumentAnalysis] = {}
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
async def analyze_document(
|
|
39
|
-
storage:
|
|
39
|
+
storage: StorageBackend,
|
|
40
40
|
document_id: str,
|
|
41
41
|
force_refresh: bool = False,
|
|
42
|
-
) ->
|
|
42
|
+
) -> DocumentAnalysis:
|
|
43
43
|
"""
|
|
44
44
|
Analyze a document using AI to extract insights and recommendations.
|
|
45
45
|
|
|
@@ -165,7 +165,7 @@ Important: This analysis is not a substitute for professional financial advice.
|
|
|
165
165
|
return prompt
|
|
166
166
|
|
|
167
167
|
|
|
168
|
-
def _validate_analysis(analysis:
|
|
168
|
+
def _validate_analysis(analysis: DocumentAnalysis) -> bool:
|
|
169
169
|
"""
|
|
170
170
|
Validate LLM analysis output.
|
|
171
171
|
|
|
@@ -201,7 +201,7 @@ def _validate_analysis(analysis: "DocumentAnalysis") -> bool:
|
|
|
201
201
|
return True
|
|
202
202
|
|
|
203
203
|
|
|
204
|
-
def _analyze_tax_document(ocr_text: str, metadata: dict, document_id: str) ->
|
|
204
|
+
def _analyze_tax_document(ocr_text: str, metadata: dict, document_id: str) -> DocumentAnalysis:
|
|
205
205
|
"""
|
|
206
206
|
Specialized analysis for tax documents.
|
|
207
207
|
|
|
@@ -301,7 +301,7 @@ def _analyze_tax_document(ocr_text: str, metadata: dict, document_id: str) -> "D
|
|
|
301
301
|
)
|
|
302
302
|
|
|
303
303
|
|
|
304
|
-
def _analyze_bank_statement(ocr_text: str, metadata: dict, document_id: str) ->
|
|
304
|
+
def _analyze_bank_statement(ocr_text: str, metadata: dict, document_id: str) -> DocumentAnalysis:
|
|
305
305
|
"""
|
|
306
306
|
Specialized analysis for bank statements.
|
|
307
307
|
|
|
@@ -352,7 +352,7 @@ def _analyze_bank_statement(ocr_text: str, metadata: dict, document_id: str) ->
|
|
|
352
352
|
)
|
|
353
353
|
|
|
354
354
|
|
|
355
|
-
def _analyze_receipt(ocr_text: str, metadata: dict, document_id: str) ->
|
|
355
|
+
def _analyze_receipt(ocr_text: str, metadata: dict, document_id: str) -> DocumentAnalysis:
|
|
356
356
|
"""
|
|
357
357
|
Specialized analysis for receipts.
|
|
358
358
|
|
|
@@ -394,7 +394,7 @@ def _analyze_receipt(ocr_text: str, metadata: dict, document_id: str) -> "Docume
|
|
|
394
394
|
|
|
395
395
|
def _analyze_generic_document(
|
|
396
396
|
ocr_text: str, document_type: str, metadata: dict, document_id: str
|
|
397
|
-
) ->
|
|
397
|
+
) -> DocumentAnalysis:
|
|
398
398
|
"""
|
|
399
399
|
Generic analysis for other document types.
|
|
400
400
|
|
fin_infra/documents/ease.py
CHANGED
|
@@ -36,7 +36,7 @@ Quick Start:
|
|
|
36
36
|
|
|
37
37
|
from __future__ import annotations
|
|
38
38
|
|
|
39
|
-
from typing import TYPE_CHECKING
|
|
39
|
+
from typing import TYPE_CHECKING
|
|
40
40
|
|
|
41
41
|
try:
|
|
42
42
|
from svc_infra.documents import DocumentManager as BaseDocumentManager
|
|
@@ -44,11 +44,12 @@ except ImportError:
|
|
|
44
44
|
# Fallback for older svc-infra versions without documents module
|
|
45
45
|
# This provides backward compatibility until svc-infra 0.1.668+ is published
|
|
46
46
|
import warnings
|
|
47
|
+
|
|
47
48
|
warnings.warn(
|
|
48
49
|
"svc_infra.documents not found. Using legacy implementation. "
|
|
49
50
|
"Please upgrade svc-infra to >=0.1.668 for layered architecture support.",
|
|
50
51
|
DeprecationWarning,
|
|
51
|
-
stacklevel=2
|
|
52
|
+
stacklevel=2,
|
|
52
53
|
)
|
|
53
54
|
BaseDocumentManager = object # type: ignore
|
|
54
55
|
|
|
@@ -65,10 +66,10 @@ class FinancialDocumentManager(BaseDocumentManager):
|
|
|
65
66
|
Inherits from svc-infra DocumentManager:
|
|
66
67
|
- upload(), download(), delete(), get(), list() for base document CRUD
|
|
67
68
|
- storage backend integration
|
|
68
|
-
|
|
69
|
+
|
|
69
70
|
Adds financial-specific methods:
|
|
70
71
|
- upload_financial(): Upload with DocumentType, tax_year, form_type
|
|
71
|
-
- extract_text(): OCR for tax forms
|
|
72
|
+
- extract_text(): OCR for tax forms
|
|
72
73
|
- analyze(): AI-powered financial insights
|
|
73
74
|
|
|
74
75
|
Attributes:
|
|
@@ -93,7 +94,7 @@ class FinancialDocumentManager(BaseDocumentManager):
|
|
|
93
94
|
|
|
94
95
|
def __init__(
|
|
95
96
|
self,
|
|
96
|
-
storage:
|
|
97
|
+
storage: StorageBackend,
|
|
97
98
|
default_ocr_provider: str = "tesseract",
|
|
98
99
|
):
|
|
99
100
|
"""
|
|
@@ -110,12 +111,12 @@ class FinancialDocumentManager(BaseDocumentManager):
|
|
|
110
111
|
self,
|
|
111
112
|
user_id: str,
|
|
112
113
|
file: bytes,
|
|
113
|
-
document_type:
|
|
114
|
+
document_type: DocumentType,
|
|
114
115
|
filename: str,
|
|
115
|
-
metadata:
|
|
116
|
-
tax_year:
|
|
117
|
-
form_type:
|
|
118
|
-
) ->
|
|
116
|
+
metadata: dict | None = None,
|
|
117
|
+
tax_year: int | None = None,
|
|
118
|
+
form_type: str | None = None,
|
|
119
|
+
) -> FinancialDocument:
|
|
119
120
|
"""
|
|
120
121
|
Upload a financial document with financial-specific fields.
|
|
121
122
|
|
|
@@ -158,11 +159,11 @@ class FinancialDocumentManager(BaseDocumentManager):
|
|
|
158
159
|
def list_financial(
|
|
159
160
|
self,
|
|
160
161
|
user_id: str,
|
|
161
|
-
document_type:
|
|
162
|
-
tax_year:
|
|
162
|
+
document_type: DocumentType | None = None,
|
|
163
|
+
tax_year: int | None = None,
|
|
163
164
|
limit: int = 100,
|
|
164
165
|
offset: int = 0,
|
|
165
|
-
) -> list[
|
|
166
|
+
) -> list[FinancialDocument]:
|
|
166
167
|
"""
|
|
167
168
|
List user's financial documents with filters.
|
|
168
169
|
|
|
@@ -206,9 +207,9 @@ class FinancialDocumentManager(BaseDocumentManager):
|
|
|
206
207
|
async def extract_text(
|
|
207
208
|
self,
|
|
208
209
|
document_id: str,
|
|
209
|
-
provider:
|
|
210
|
+
provider: str | None = None,
|
|
210
211
|
force_refresh: bool = False,
|
|
211
|
-
) ->
|
|
212
|
+
) -> OCRResult:
|
|
212
213
|
"""
|
|
213
214
|
Extract text from document using OCR (financial extension).
|
|
214
215
|
|
|
@@ -238,7 +239,7 @@ class FinancialDocumentManager(BaseDocumentManager):
|
|
|
238
239
|
self,
|
|
239
240
|
document_id: str,
|
|
240
241
|
force_refresh: bool = False,
|
|
241
|
-
) ->
|
|
242
|
+
) -> DocumentAnalysis:
|
|
242
243
|
"""
|
|
243
244
|
Analyze document using AI (financial extension).
|
|
244
245
|
|
|
@@ -267,7 +268,7 @@ DocumentManager = FinancialDocumentManager
|
|
|
267
268
|
|
|
268
269
|
|
|
269
270
|
def easy_documents(
|
|
270
|
-
storage:
|
|
271
|
+
storage: StorageBackend | None = None,
|
|
271
272
|
default_ocr_provider: str = "tesseract",
|
|
272
273
|
) -> FinancialDocumentManager:
|
|
273
274
|
"""
|
fin_infra/documents/models.py
CHANGED
|
@@ -31,7 +31,7 @@ from __future__ import annotations
|
|
|
31
31
|
|
|
32
32
|
from datetime import datetime
|
|
33
33
|
from enum import Enum
|
|
34
|
-
from typing import
|
|
34
|
+
from typing import Optional
|
|
35
35
|
|
|
36
36
|
from pydantic import BaseModel, ConfigDict, Field
|
|
37
37
|
from svc_infra.documents import Document as BaseDocument
|
|
@@ -52,17 +52,17 @@ class DocumentType(str, Enum):
|
|
|
52
52
|
class FinancialDocument(BaseDocument):
|
|
53
53
|
"""
|
|
54
54
|
Financial document extending base Document with financial-specific fields.
|
|
55
|
-
|
|
55
|
+
|
|
56
56
|
Inherits from svc-infra Document:
|
|
57
57
|
- id, user_id, filename, file_size, upload_date
|
|
58
58
|
- storage_path, content_type, checksum
|
|
59
59
|
- metadata (Dict[str, Any])
|
|
60
|
-
|
|
60
|
+
|
|
61
61
|
Adds financial-specific fields:
|
|
62
62
|
- type: DocumentType enum
|
|
63
63
|
- tax_year: Optional year for tax documents
|
|
64
64
|
- form_type: Optional form identifier (W-2, 1099, etc.)
|
|
65
|
-
|
|
65
|
+
|
|
66
66
|
Examples:
|
|
67
67
|
>>> # Tax document with W-2 form
|
|
68
68
|
>>> doc = FinancialDocument(
|
|
@@ -145,7 +145,7 @@ class OCRResult(BaseModel):
|
|
|
145
145
|
confidence: float = Field(
|
|
146
146
|
..., description="Overall OCR confidence score (0.0-1.0)", ge=0.0, le=1.0
|
|
147
147
|
)
|
|
148
|
-
fields_extracted:
|
|
148
|
+
fields_extracted: dict[str, str] = Field(
|
|
149
149
|
default_factory=dict,
|
|
150
150
|
description="Structured fields extracted from document (names, amounts, dates)",
|
|
151
151
|
)
|
|
@@ -181,10 +181,10 @@ class DocumentAnalysis(BaseModel):
|
|
|
181
181
|
|
|
182
182
|
document_id: str = Field(..., description="Document that was analyzed")
|
|
183
183
|
summary: str = Field(..., description="High-level document summary")
|
|
184
|
-
key_findings:
|
|
184
|
+
key_findings: list[str] = Field(
|
|
185
185
|
default_factory=list, description="Important facts extracted from document"
|
|
186
186
|
)
|
|
187
|
-
recommendations:
|
|
187
|
+
recommendations: list[str] = Field(
|
|
188
188
|
default_factory=list, description="Action items or suggestions based on document content"
|
|
189
189
|
)
|
|
190
190
|
analysis_date: datetime = Field(
|
fin_infra/documents/ocr.py
CHANGED
|
@@ -25,7 +25,7 @@ from __future__ import annotations
|
|
|
25
25
|
|
|
26
26
|
import re
|
|
27
27
|
from datetime import datetime
|
|
28
|
-
from typing import TYPE_CHECKING
|
|
28
|
+
from typing import TYPE_CHECKING
|
|
29
29
|
|
|
30
30
|
if TYPE_CHECKING:
|
|
31
31
|
from svc_infra.storage.base import StorageBackend
|
|
@@ -33,20 +33,20 @@ if TYPE_CHECKING:
|
|
|
33
33
|
from .models import OCRResult
|
|
34
34
|
|
|
35
35
|
# In-memory OCR cache (production: use svc-infra cache)
|
|
36
|
-
_ocr_cache:
|
|
36
|
+
_ocr_cache: dict[str, OCRResult] = {}
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
async def extract_text(
|
|
40
|
-
storage:
|
|
40
|
+
storage: StorageBackend,
|
|
41
41
|
document_id: str,
|
|
42
42
|
provider: str = "tesseract",
|
|
43
43
|
force_refresh: bool = False,
|
|
44
|
-
) ->
|
|
44
|
+
) -> OCRResult:
|
|
45
45
|
"""
|
|
46
46
|
Extract text from a document using OCR (uses svc-infra storage).
|
|
47
47
|
|
|
48
48
|
Args:
|
|
49
|
-
storage: Storage backend instance
|
|
49
|
+
storage: Storage backend instance
|
|
50
50
|
document_id: Document identifier
|
|
51
51
|
provider: OCR provider ("tesseract" or "textract")
|
|
52
52
|
force_refresh: Force re-extraction even if cached result exists
|
|
@@ -106,7 +106,7 @@ async def extract_text(
|
|
|
106
106
|
|
|
107
107
|
def _extract_with_tesseract(
|
|
108
108
|
file_content: bytes, filename: str, metadata: dict, document_id: str
|
|
109
|
-
) ->
|
|
109
|
+
) -> OCRResult:
|
|
110
110
|
"""
|
|
111
111
|
Extract text using Tesseract OCR (simulated).
|
|
112
112
|
|
|
@@ -158,7 +158,7 @@ def _extract_with_tesseract(
|
|
|
158
158
|
|
|
159
159
|
def _extract_with_textract(
|
|
160
160
|
file_content: bytes, filename: str, metadata: dict, document_id: str
|
|
161
|
-
) ->
|
|
161
|
+
) -> OCRResult:
|
|
162
162
|
"""
|
|
163
163
|
Extract text using AWS Textract (simulated).
|
|
164
164
|
|
|
@@ -207,7 +207,7 @@ def _extract_with_textract(
|
|
|
207
207
|
)
|
|
208
208
|
|
|
209
209
|
|
|
210
|
-
def _parse_tax_form(text: str, form_type:
|
|
210
|
+
def _parse_tax_form(text: str, form_type: str | None = None) -> dict[str, str]:
|
|
211
211
|
"""
|
|
212
212
|
Parse tax form text into structured fields.
|
|
213
213
|
|