finforge 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- finforge-1.0.0/LICENSE +21 -0
- finforge-1.0.0/MANIFEST.in +8 -0
- finforge-1.0.0/PKG-INFO +282 -0
- finforge-1.0.0/README.md +244 -0
- finforge-1.0.0/finforge/__init__.py +5 -0
- finforge-1.0.0/finforge/behavior/__init__.py +1 -0
- finforge-1.0.0/finforge/behavior/adaptive_spending.py +129 -0
- finforge-1.0.0/finforge/behavior/balance_awareness.py +94 -0
- finforge-1.0.0/finforge/behavior/budgeting.py +78 -0
- finforge-1.0.0/finforge/behavior/clustering.py +102 -0
- finforge-1.0.0/finforge/behavior/identity.py +168 -0
- finforge-1.0.0/finforge/behavior/lifecycle.py +64 -0
- finforge-1.0.0/finforge/behavior/merchant_affinity.py +126 -0
- finforge-1.0.0/finforge/behavior/overdraft.py +49 -0
- finforge-1.0.0/finforge/behavior/sessions.py +230 -0
- finforge-1.0.0/finforge/behavior/spending_patterns.py +152 -0
- finforge-1.0.0/finforge/behavior/subscriptions.py +70 -0
- finforge-1.0.0/finforge/core/__init__.py +1 -0
- finforge-1.0.0/finforge/core/config.py +31 -0
- finforge-1.0.0/finforge/core/constants.py +13 -0
- finforge-1.0.0/finforge/core/enums.py +17 -0
- finforge-1.0.0/finforge/core/models.py +68 -0
- finforge-1.0.0/finforge/dataset.py +142 -0
- finforge-1.0.0/finforge/exporters/__init__.py +1 -0
- finforge-1.0.0/finforge/exporters/csv_exporter.py +18 -0
- finforge-1.0.0/finforge/generators/__init__.py +1 -0
- finforge-1.0.0/finforge/generators/scheduler.py +34 -0
- finforge-1.0.0/finforge/generators/transaction_generator.py +417 -0
- finforge-1.0.0/finforge/generators/user_generator.py +76 -0
- finforge-1.0.0/finforge/llm/__init__.py +5 -0
- finforge-1.0.0/finforge/llm/interfaces.py +12 -0
- finforge-1.0.0/finforge/merchants/__init__.py +1 -0
- finforge-1.0.0/finforge/merchants/catalog.py +120 -0
- finforge-1.0.0/finforge/personas/__init__.py +1 -0
- finforge-1.0.0/finforge/personas/base.py +61 -0
- finforge-1.0.0/finforge/personas/salaried.py +103 -0
- finforge-1.0.0/finforge/personas/student.py +86 -0
- finforge-1.0.0/finforge/utils/__init__.py +1 -0
- finforge-1.0.0/finforge/utils/balances.py +10 -0
- finforge-1.0.0/finforge/utils/dates.py +30 -0
- finforge-1.0.0/finforge/utils/randomness.py +24 -0
- finforge-1.0.0/finforge.egg-info/PKG-INFO +282 -0
- finforge-1.0.0/finforge.egg-info/SOURCES.txt +54 -0
- finforge-1.0.0/finforge.egg-info/dependency_links.txt +1 -0
- finforge-1.0.0/finforge.egg-info/requires.txt +10 -0
- finforge-1.0.0/finforge.egg-info/top_level.txt +1 -0
- finforge-1.0.0/pyproject.toml +63 -0
- finforge-1.0.0/requirements.txt +6 -0
- finforge-1.0.0/setup.cfg +4 -0
- finforge-1.0.0/setup.py +6 -0
- finforge-1.0.0/tests/test_balances.py +8 -0
- finforge-1.0.0/tests/test_behavior.py +77 -0
- finforge-1.0.0/tests/test_generation.py +72 -0
- finforge-1.0.0/tests/test_identity.py +24 -0
- finforge-1.0.0/tests/test_personas.py +56 -0
- finforge-1.0.0/tests/test_seed_reproducibility.py +7 -0
finforge-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 FinForge contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
finforge-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: finforge
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Synthetic financial transaction data generation with persona-driven behavior simulation.
|
|
5
|
+
Author: FinForge contributors
|
|
6
|
+
Maintainer: FinForge maintainers
|
|
7
|
+
License-Expression: MIT
|
|
8
|
+
Project-URL: Homepage, https://github.com/finforge/finforge
|
|
9
|
+
Project-URL: Repository, https://github.com/finforge/finforge
|
|
10
|
+
Project-URL: Issues, https://github.com/finforge/finforge/issues
|
|
11
|
+
Project-URL: Documentation, https://github.com/finforge/finforge#readme
|
|
12
|
+
Project-URL: Changelog, https://github.com/finforge/finforge/blob/main/CHANGELOG.md
|
|
13
|
+
Keywords: synthetic-data,finance,transactions,simulation,testing
|
|
14
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Intended Audience :: Financial and Insurance Industry
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Operating System :: OS Independent
|
|
22
|
+
Classifier: Topic :: Software Development :: Testing
|
|
23
|
+
Classifier: Topic :: Office/Business :: Financial
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
25
|
+
Requires-Python: >=3.11
|
|
26
|
+
Description-Content-Type: text/markdown
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Requires-Dist: pandas>=2.0
|
|
29
|
+
Requires-Dist: numpy>=1.24
|
|
30
|
+
Requires-Dist: faker>=24.0
|
|
31
|
+
Requires-Dist: pydantic>=2.6
|
|
32
|
+
Requires-Dist: python-dateutil>=2.8
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
35
|
+
Requires-Dist: build>=1.2; extra == "dev"
|
|
36
|
+
Requires-Dist: twine>=5.0; extra == "dev"
|
|
37
|
+
Dynamic: license-file
|
|
38
|
+
|
|
39
|
+
# FinForge v1.0
|
|
40
|
+
|
|
41
|
+
FinForge is a synthetic financial transaction data generation framework for developers, QA teams, and analytics engineers who need realistic transaction datasets without using production customer records.
|
|
42
|
+
|
|
43
|
+
Unlike basic fake data libraries, FinForge focuses on behavioral simulation: persona-driven users, persistent financial identities, recurring cash flows, spending memory, merchant loyalty, monthly stress cycles, chronological balance updates, and deterministic reproducibility for testing and benchmarking.
|
|
44
|
+
|
|
45
|
+
## Why FinForge v1.0 Is Different
|
|
46
|
+
|
|
47
|
+
FinForge v1.0 simulates persistent financial lives instead of generating isolated fake rows.
|
|
48
|
+
|
|
49
|
+
- Persistent user identity: each user carries a stable spending style, merchant loyalty profile, night activity score, and savings tendency.
|
|
50
|
+
- Temporal financial rhythm: salaries, transfers, bills, and subscriptions follow a repeatable monthly cadence.
|
|
51
|
+
- Realistic behavioral adaptation: low-balance users pull back discretionary spending, while stronger spenders show more weekend and late-night activity.
|
|
52
|
+
- Reproducible synthetic data: the same seed and config produce the same dataset, which makes FinForge practical for testing and benchmarking.
|
|
53
|
+
|
|
54
|
+
## Problem Statement
|
|
55
|
+
|
|
56
|
+
Financial applications often need transaction histories that are:
|
|
57
|
+
|
|
58
|
+
- realistic enough to exercise business logic
|
|
59
|
+
- reproducible enough for automated testing
|
|
60
|
+
- structured enough for analytics experiments
|
|
61
|
+
- safe enough to share across teams
|
|
62
|
+
|
|
63
|
+
Most generic fake data tools generate isolated rows. Real financial systems need temporally consistent histories where balances evolve over time, transactions follow plausible cadence, and spending patterns reflect customer behavior.
|
|
64
|
+
|
|
65
|
+
FinForge addresses that gap.
|
|
66
|
+
|
|
67
|
+
## Features
|
|
68
|
+
|
|
69
|
+
- Synthetic user generation with configurable personas
|
|
70
|
+
- Persona-driven transaction generation with persistent user habits
|
|
71
|
+
- Persistent user identity traits such as spending style, merchant loyalty, and commute pattern
|
|
72
|
+
- Chronologically ordered event simulation
|
|
73
|
+
- Deterministic seed support for reproducible datasets
|
|
74
|
+
- Realistic recurring events like salary, rent, and subscriptions
|
|
75
|
+
- Merchant/category consistency with merchant affinity reuse
|
|
76
|
+
- Weekend vs weekday spending behavior
|
|
77
|
+
- Balance-aware suppression of discretionary spending
|
|
78
|
+
- Spending memory and overspend suppression
|
|
79
|
+
- Dedicated subscription engine with once-per-month recurrence
|
|
80
|
+
- Explicit overdraft metadata and configurable negative-balance handling
|
|
81
|
+
- Month-end spending compression and salary-cycle effects
|
|
82
|
+
- Clustered daily transaction bursts that feel session-like
|
|
83
|
+
- Spending-style frequency calibration for `minimalist`, `budget_conscious`, `lifestyle_spender`, and `impulsive_student`
|
|
84
|
+
- Running balance tracking
|
|
85
|
+
- Pandas DataFrame output
|
|
86
|
+
- CSV export utilities
|
|
87
|
+
|
|
88
|
+
## Installation
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
pip install -e .
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
Or install dependencies manually:
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
pip install -r requirements.txt
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Quickstart
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
from finforge import DatasetGenerator
|
|
104
|
+
|
|
105
|
+
dataset = (
|
|
106
|
+
DatasetGenerator(seed=42)
|
|
107
|
+
.with_users(100)
|
|
108
|
+
.with_persona("salaried")
|
|
109
|
+
.for_months(6)
|
|
110
|
+
.generate()
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
print(dataset.head())
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
Export to CSV:
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from finforge import DatasetGenerator
|
|
120
|
+
|
|
121
|
+
generator = (
|
|
122
|
+
DatasetGenerator(seed=42)
|
|
123
|
+
.with_users(25)
|
|
124
|
+
.with_persona("student")
|
|
125
|
+
.for_months(3)
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
dataset = generator.generate()
|
|
129
|
+
generator.export_csv("student_transactions.csv")
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
The public API remains fluent and backward-compatible:
|
|
133
|
+
|
|
134
|
+
```python
|
|
135
|
+
from finforge import DatasetGenerator
|
|
136
|
+
|
|
137
|
+
dataset = (
|
|
138
|
+
DatasetGenerator(seed=101)
|
|
139
|
+
.with_users(3)
|
|
140
|
+
.with_persona("student")
|
|
141
|
+
.for_months(2)
|
|
142
|
+
.generate()
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
dataset.to_csv("transactionsBehaviour.csv", index=False)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
Overdraft controls are configurable without changing the public API shape:
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
dataset = (
|
|
152
|
+
DatasetGenerator(seed=7)
|
|
153
|
+
.with_users(10)
|
|
154
|
+
.with_persona("student")
|
|
155
|
+
.for_months(2)
|
|
156
|
+
.prevent_negative_balance(True)
|
|
157
|
+
.with_overdraft(0.0)
|
|
158
|
+
.generate()
|
|
159
|
+
)
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Architecture Overview
|
|
163
|
+
|
|
164
|
+
FinForge is organized into small, composable modules:
|
|
165
|
+
|
|
166
|
+
- `finforge.core`: shared models, enums, constants, and configuration
|
|
167
|
+
- `finforge.personas`: persona definitions and behavioral profiles
|
|
168
|
+
- `finforge.generators`: user generation, scheduling, and transaction generation
|
|
169
|
+
- `finforge.merchants`: category-safe merchant catalog
|
|
170
|
+
- `finforge.utils`: randomness, dates, and balance helpers
|
|
171
|
+
- `finforge.exporters`: output adapters such as CSV
|
|
172
|
+
- `finforge.dataset`: fluent public API surface
|
|
173
|
+
|
|
174
|
+
The v1.0 architecture keeps future local-model extensions possible while keeping all LLM-related functionality out of the runtime path for now.
|
|
175
|
+
|
|
176
|
+
Behavioral simulation components live under `finforge.behavior`:
|
|
177
|
+
|
|
178
|
+
- `identity.py`: long-lived user behavioral identities
|
|
179
|
+
- `merchant_affinity.py`: persistent merchant preferences and reuse weights
|
|
180
|
+
- `adaptive_spending.py`: liquidity and overspend-aware daily spending controls
|
|
181
|
+
- `subscriptions.py`: dedicated subscription assignment and stable monthly pricing
|
|
182
|
+
- `overdraft.py`: explicit negative-balance policy decisions
|
|
183
|
+
- `budgeting.py`: rolling budget memory and spending pressure
|
|
184
|
+
- `lifecycle.py`: monthly cashflow rhythm and student irregular inflows
|
|
185
|
+
- `sessions.py`: grouped temporal spending sessions
|
|
186
|
+
|
|
187
|
+
## Example Output
|
|
188
|
+
|
|
189
|
+
Example generated schema:
|
|
190
|
+
|
|
191
|
+
| transaction_id | user_id | timestamp | merchant | category | amount | spending_style | is_subscription | recurrence_type | balance_state | session_id |
|
|
192
|
+
| --- | --- | --- | --- | --- | ---: | --- | --- | --- | --- | --- |
|
|
193
|
+
| txn_000001 | user_000001 | 2026-01-01 09:14:00 | Acme Payroll | income | 5840.00 | budget_conscious | False | income | normal | |
|
|
194
|
+
| txn_000002 | user_000001 | 2026-01-03 10:05:00 | Green Residency | housing | -1450.00 | budget_conscious | False | bill | normal | |
|
|
195
|
+
| txn_000003 | user_000001 | 2026-01-05 20:11:00 | Netflix | subscription | -649.00 | budget_conscious | True | subscription | normal | |
|
|
196
|
+
|
|
197
|
+
Typical generated behavior now includes:
|
|
198
|
+
|
|
199
|
+
- recurring salary and bill cadence near the beginning of each month
|
|
200
|
+
- subscriptions generated only by the recurring engine, never by random entertainment spending
|
|
201
|
+
- exactly one subscription row per assigned merchant per simulated month
|
|
202
|
+
- repeated use of a user’s preferred merchants
|
|
203
|
+
- persistent user styles such as `budget_conscious`, `lifestyle_spender`, and `impulsive_student`
|
|
204
|
+
- stronger commute and coffee activity on weekdays
|
|
205
|
+
- more entertainment and food delivery on weekends
|
|
206
|
+
- student late-night activity and irregular top-up inflows
|
|
207
|
+
- smaller discretionary tickets when balances run low
|
|
208
|
+
- behavioral pullback after recent overspending
|
|
209
|
+
- overdrafts either prevented or explicitly marked with `is_overdraft` and `overdraft_amount`
|
|
210
|
+
- clustered bursts such as `Uber -> Coffee -> Lunch`
|
|
211
|
+
|
|
212
|
+
## Metadata Columns
|
|
213
|
+
|
|
214
|
+
Generated transaction rows include behavioral metadata that is useful for testing and downstream modeling:
|
|
215
|
+
|
|
216
|
+
- `persona`
|
|
217
|
+
- `spending_style`
|
|
218
|
+
- `savings_tendency`
|
|
219
|
+
- `merchant_loyalty`
|
|
220
|
+
- `impulse_buying_score`
|
|
221
|
+
- `lifestyle_score`
|
|
222
|
+
- `night_activity_score`
|
|
223
|
+
- `is_recurring`
|
|
224
|
+
- `is_subscription`
|
|
225
|
+
- `is_discretionary`
|
|
226
|
+
- `recurrence_type`
|
|
227
|
+
- `session_id`
|
|
228
|
+
- `day_type`
|
|
229
|
+
- `balance_state`
|
|
230
|
+
- `is_overdraft`
|
|
231
|
+
- `overdraft_amount`
|
|
232
|
+
|
|
233
|
+
## Testing Guarantees
|
|
234
|
+
|
|
235
|
+
The v1.0 test suite verifies:
|
|
236
|
+
|
|
237
|
+
- balance integrity on every row
|
|
238
|
+
- chronological ordering per user
|
|
239
|
+
- seed reproducibility
|
|
240
|
+
- subscription recurrence and amount stability
|
|
241
|
+
- low-balance discretionary suppression
|
|
242
|
+
- reasonable session-linked rates
|
|
243
|
+
- merchant-category consistency
|
|
244
|
+
- required behavioral metadata columns
|
|
245
|
+
- explicit overdraft marking whenever balances go negative
|
|
246
|
+
|
|
247
|
+
## Roadmap
|
|
248
|
+
|
|
249
|
+
- Additional personas for freelancers, retirees, and small business owners
|
|
250
|
+
- More nuanced cash flow events and seasonal behavior
|
|
251
|
+
- Local Ollama-backed narrative and explanation modules
|
|
252
|
+
- Richer export formats and scenario presets
|
|
253
|
+
- Extended validation and benchmarking datasets
|
|
254
|
+
|
|
255
|
+
## Contributing
|
|
256
|
+
|
|
257
|
+
Contributions are welcome. Good first contributions include:
|
|
258
|
+
|
|
259
|
+
- new persona modules
|
|
260
|
+
- expanded merchant catalogs
|
|
261
|
+
- improved temporal rules
|
|
262
|
+
- additional exporters
|
|
263
|
+
- stronger test coverage
|
|
264
|
+
|
|
265
|
+
To contribute:
|
|
266
|
+
|
|
267
|
+
1. Fork the repository
|
|
268
|
+
2. Create a feature branch
|
|
269
|
+
3. Add tests for behavior changes
|
|
270
|
+
4. Run `pytest`
|
|
271
|
+
5. Open a pull request with a clear description of the use case
|
|
272
|
+
|
|
273
|
+
## Development
|
|
274
|
+
|
|
275
|
+
```bash
|
|
276
|
+
pip install -e .[dev]
|
|
277
|
+
pytest
|
|
278
|
+
```
|
|
279
|
+
|
|
280
|
+
## License
|
|
281
|
+
|
|
282
|
+
MIT
|
finforge-1.0.0/README.md
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
# FinForge v1.0
|
|
2
|
+
|
|
3
|
+
FinForge is a synthetic financial transaction data generation framework for developers, QA teams, and analytics engineers who need realistic transaction datasets without using production customer records.
|
|
4
|
+
|
|
5
|
+
Unlike basic fake data libraries, FinForge focuses on behavioral simulation: persona-driven users, persistent financial identities, recurring cash flows, spending memory, merchant loyalty, monthly stress cycles, chronological balance updates, and deterministic reproducibility for testing and benchmarking.
|
|
6
|
+
|
|
7
|
+
## Why FinForge v1.0 Is Different
|
|
8
|
+
|
|
9
|
+
FinForge v1.0 simulates persistent financial lives instead of generating isolated fake rows.
|
|
10
|
+
|
|
11
|
+
- Persistent user identity: each user carries a stable spending style, merchant loyalty profile, night activity score, and savings tendency.
|
|
12
|
+
- Temporal financial rhythm: salaries, transfers, bills, and subscriptions follow a repeatable monthly cadence.
|
|
13
|
+
- Realistic behavioral adaptation: low-balance users pull back discretionary spending, while stronger spenders show more weekend and late-night activity.
|
|
14
|
+
- Reproducible synthetic data: the same seed and config produce the same dataset, which makes FinForge practical for testing and benchmarking.
|
|
15
|
+
|
|
16
|
+
## Problem Statement
|
|
17
|
+
|
|
18
|
+
Financial applications often need transaction histories that are:
|
|
19
|
+
|
|
20
|
+
- realistic enough to exercise business logic
|
|
21
|
+
- reproducible enough for automated testing
|
|
22
|
+
- structured enough for analytics experiments
|
|
23
|
+
- safe enough to share across teams
|
|
24
|
+
|
|
25
|
+
Most generic fake data tools generate isolated rows. Real financial systems need temporally consistent histories where balances evolve over time, transactions follow plausible cadence, and spending patterns reflect customer behavior.
|
|
26
|
+
|
|
27
|
+
FinForge addresses that gap.
|
|
28
|
+
|
|
29
|
+
## Features
|
|
30
|
+
|
|
31
|
+
- Synthetic user generation with configurable personas
|
|
32
|
+
- Persona-driven transaction generation with persistent user habits
|
|
33
|
+
- Persistent user identity traits such as spending style, merchant loyalty, and commute pattern
|
|
34
|
+
- Chronologically ordered event simulation
|
|
35
|
+
- Deterministic seed support for reproducible datasets
|
|
36
|
+
- Realistic recurring events like salary, rent, and subscriptions
|
|
37
|
+
- Merchant/category consistency with merchant affinity reuse
|
|
38
|
+
- Weekend vs weekday spending behavior
|
|
39
|
+
- Balance-aware suppression of discretionary spending
|
|
40
|
+
- Spending memory and overspend suppression
|
|
41
|
+
- Dedicated subscription engine with once-per-month recurrence
|
|
42
|
+
- Explicit overdraft metadata and configurable negative-balance handling
|
|
43
|
+
- Month-end spending compression and salary-cycle effects
|
|
44
|
+
- Clustered daily transaction bursts that feel session-like
|
|
45
|
+
- Spending-style frequency calibration for `minimalist`, `budget_conscious`, `lifestyle_spender`, and `impulsive_student`
|
|
46
|
+
- Running balance tracking
|
|
47
|
+
- Pandas DataFrame output
|
|
48
|
+
- CSV export utilities
|
|
49
|
+
|
|
50
|
+
## Installation
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install -e .
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
Or install dependencies manually:
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
pip install -r requirements.txt
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## Quickstart
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
from finforge import DatasetGenerator
|
|
66
|
+
|
|
67
|
+
dataset = (
|
|
68
|
+
DatasetGenerator(seed=42)
|
|
69
|
+
.with_users(100)
|
|
70
|
+
.with_persona("salaried")
|
|
71
|
+
.for_months(6)
|
|
72
|
+
.generate()
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
print(dataset.head())
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Export to CSV:
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
from finforge import DatasetGenerator
|
|
82
|
+
|
|
83
|
+
generator = (
|
|
84
|
+
DatasetGenerator(seed=42)
|
|
85
|
+
.with_users(25)
|
|
86
|
+
.with_persona("student")
|
|
87
|
+
.for_months(3)
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
dataset = generator.generate()
|
|
91
|
+
generator.export_csv("student_transactions.csv")
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
The public API remains fluent and backward-compatible:
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
from finforge import DatasetGenerator
|
|
98
|
+
|
|
99
|
+
dataset = (
|
|
100
|
+
DatasetGenerator(seed=101)
|
|
101
|
+
.with_users(3)
|
|
102
|
+
.with_persona("student")
|
|
103
|
+
.for_months(2)
|
|
104
|
+
.generate()
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
dataset.to_csv("transactionsBehaviour.csv", index=False)
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Overdraft controls are configurable without changing the public API shape:
|
|
111
|
+
|
|
112
|
+
```python
|
|
113
|
+
dataset = (
|
|
114
|
+
DatasetGenerator(seed=7)
|
|
115
|
+
.with_users(10)
|
|
116
|
+
.with_persona("student")
|
|
117
|
+
.for_months(2)
|
|
118
|
+
.prevent_negative_balance(True)
|
|
119
|
+
.with_overdraft(0.0)
|
|
120
|
+
.generate()
|
|
121
|
+
)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Architecture Overview
|
|
125
|
+
|
|
126
|
+
FinForge is organized into small, composable modules:
|
|
127
|
+
|
|
128
|
+
- `finforge.core`: shared models, enums, constants, and configuration
|
|
129
|
+
- `finforge.personas`: persona definitions and behavioral profiles
|
|
130
|
+
- `finforge.generators`: user generation, scheduling, and transaction generation
|
|
131
|
+
- `finforge.merchants`: category-safe merchant catalog
|
|
132
|
+
- `finforge.utils`: randomness, dates, and balance helpers
|
|
133
|
+
- `finforge.exporters`: output adapters such as CSV
|
|
134
|
+
- `finforge.dataset`: fluent public API surface
|
|
135
|
+
|
|
136
|
+
The v1.0 architecture keeps future local-model extensions possible while keeping all LLM-related functionality out of the runtime path for now.
|
|
137
|
+
|
|
138
|
+
Behavioral simulation components live under `finforge.behavior`:
|
|
139
|
+
|
|
140
|
+
- `identity.py`: long-lived user behavioral identities
|
|
141
|
+
- `merchant_affinity.py`: persistent merchant preferences and reuse weights
|
|
142
|
+
- `adaptive_spending.py`: liquidity and overspend-aware daily spending controls
|
|
143
|
+
- `subscriptions.py`: dedicated subscription assignment and stable monthly pricing
|
|
144
|
+
- `overdraft.py`: explicit negative-balance policy decisions
|
|
145
|
+
- `budgeting.py`: rolling budget memory and spending pressure
|
|
146
|
+
- `lifecycle.py`: monthly cashflow rhythm and student irregular inflows
|
|
147
|
+
- `sessions.py`: grouped temporal spending sessions
|
|
148
|
+
|
|
149
|
+
## Example Output
|
|
150
|
+
|
|
151
|
+
Example generated schema:
|
|
152
|
+
|
|
153
|
+
| transaction_id | user_id | timestamp | merchant | category | amount | spending_style | is_subscription | recurrence_type | balance_state | session_id |
|
|
154
|
+
| --- | --- | --- | --- | --- | ---: | --- | --- | --- | --- | --- |
|
|
155
|
+
| txn_000001 | user_000001 | 2026-01-01 09:14:00 | Acme Payroll | income | 5840.00 | budget_conscious | False | income | normal | |
|
|
156
|
+
| txn_000002 | user_000001 | 2026-01-03 10:05:00 | Green Residency | housing | -1450.00 | budget_conscious | False | bill | normal | |
|
|
157
|
+
| txn_000003 | user_000001 | 2026-01-05 20:11:00 | Netflix | subscription | -649.00 | budget_conscious | True | subscription | normal | |
|
|
158
|
+
|
|
159
|
+
Typical generated behavior now includes:
|
|
160
|
+
|
|
161
|
+
- recurring salary and bill cadence near the beginning of each month
|
|
162
|
+
- subscriptions generated only by the recurring engine, never by random entertainment spending
|
|
163
|
+
- exactly one subscription row per assigned merchant per simulated month
|
|
164
|
+
- repeated use of a user’s preferred merchants
|
|
165
|
+
- persistent user styles such as `budget_conscious`, `lifestyle_spender`, and `impulsive_student`
|
|
166
|
+
- stronger commute and coffee activity on weekdays
|
|
167
|
+
- more entertainment and food delivery on weekends
|
|
168
|
+
- student late-night activity and irregular top-up inflows
|
|
169
|
+
- smaller discretionary tickets when balances run low
|
|
170
|
+
- behavioral pullback after recent overspending
|
|
171
|
+
- overdrafts either prevented or explicitly marked with `is_overdraft` and `overdraft_amount`
|
|
172
|
+
- clustered bursts such as `Uber -> Coffee -> Lunch`
|
|
173
|
+
|
|
174
|
+
## Metadata Columns
|
|
175
|
+
|
|
176
|
+
Generated transaction rows include behavioral metadata that is useful for testing and downstream modeling:
|
|
177
|
+
|
|
178
|
+
- `persona`
|
|
179
|
+
- `spending_style`
|
|
180
|
+
- `savings_tendency`
|
|
181
|
+
- `merchant_loyalty`
|
|
182
|
+
- `impulse_buying_score`
|
|
183
|
+
- `lifestyle_score`
|
|
184
|
+
- `night_activity_score`
|
|
185
|
+
- `is_recurring`
|
|
186
|
+
- `is_subscription`
|
|
187
|
+
- `is_discretionary`
|
|
188
|
+
- `recurrence_type`
|
|
189
|
+
- `session_id`
|
|
190
|
+
- `day_type`
|
|
191
|
+
- `balance_state`
|
|
192
|
+
- `is_overdraft`
|
|
193
|
+
- `overdraft_amount`
|
|
194
|
+
|
|
195
|
+
## Testing Guarantees
|
|
196
|
+
|
|
197
|
+
The v1.0 test suite verifies:
|
|
198
|
+
|
|
199
|
+
- balance integrity on every row
|
|
200
|
+
- chronological ordering per user
|
|
201
|
+
- seed reproducibility
|
|
202
|
+
- subscription recurrence and amount stability
|
|
203
|
+
- low-balance discretionary suppression
|
|
204
|
+
- reasonable session-linked rates
|
|
205
|
+
- merchant-category consistency
|
|
206
|
+
- required behavioral metadata columns
|
|
207
|
+
- explicit overdraft marking whenever balances go negative
|
|
208
|
+
|
|
209
|
+
## Roadmap
|
|
210
|
+
|
|
211
|
+
- Additional personas for freelancers, retirees, and small business owners
|
|
212
|
+
- More nuanced cash flow events and seasonal behavior
|
|
213
|
+
- Local Ollama-backed narrative and explanation modules
|
|
214
|
+
- Richer export formats and scenario presets
|
|
215
|
+
- Extended validation and benchmarking datasets
|
|
216
|
+
|
|
217
|
+
## Contributing
|
|
218
|
+
|
|
219
|
+
Contributions are welcome. Good first contributions include:
|
|
220
|
+
|
|
221
|
+
- new persona modules
|
|
222
|
+
- expanded merchant catalogs
|
|
223
|
+
- improved temporal rules
|
|
224
|
+
- additional exporters
|
|
225
|
+
- stronger test coverage
|
|
226
|
+
|
|
227
|
+
To contribute:
|
|
228
|
+
|
|
229
|
+
1. Fork the repository
|
|
230
|
+
2. Create a feature branch
|
|
231
|
+
3. Add tests for behavior changes
|
|
232
|
+
4. Run `pytest`
|
|
233
|
+
5. Open a pull request with a clear description of the use case
|
|
234
|
+
|
|
235
|
+
## Development
|
|
236
|
+
|
|
237
|
+
```bash
|
|
238
|
+
pip install -e .[dev]
|
|
239
|
+
pytest
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
## License
|
|
243
|
+
|
|
244
|
+
MIT
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Behavioral simulation components for FinForge."""
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
"""Adaptive spending adjustments based on balance, budget, and lifecycle."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from finforge.behavior.budgeting import BudgetingEngine, UserBudgetState
|
|
8
|
+
from finforge.core.models import User
|
|
9
|
+
from finforge.personas.base import SpendingProfile
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass(frozen=True)
|
|
13
|
+
class AdaptiveSpendingSignal:
|
|
14
|
+
"""Per-day spending signal derived from balance and memory."""
|
|
15
|
+
|
|
16
|
+
state: str
|
|
17
|
+
frequency_multiplier: float
|
|
18
|
+
amount_multiplier: float
|
|
19
|
+
category_multipliers: dict[str, float]
|
|
20
|
+
amount_multipliers: dict[str, float]
|
|
21
|
+
overspend_pressure: float
|
|
22
|
+
lifecycle_phase: str
|
|
23
|
+
max_discretionary_transactions: int | None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AdaptiveSpendingEngine:
|
|
27
|
+
"""Combines balance, month phase, and recent spend memory."""
|
|
28
|
+
|
|
29
|
+
def __init__(self, budgeting_engine: BudgetingEngine) -> None:
|
|
30
|
+
self.budgeting_engine = budgeting_engine
|
|
31
|
+
|
|
32
|
+
def assess(
|
|
33
|
+
self,
|
|
34
|
+
user: User,
|
|
35
|
+
state: UserBudgetState,
|
|
36
|
+
day_of_month: int,
|
|
37
|
+
days_in_month: int,
|
|
38
|
+
spending_profile: SpendingProfile,
|
|
39
|
+
) -> AdaptiveSpendingSignal:
|
|
40
|
+
"""Compute day-level adaptive spending controls."""
|
|
41
|
+
phase = self._phase(day_of_month, days_in_month)
|
|
42
|
+
overspend = self.budgeting_engine.overspend_pressure(state)
|
|
43
|
+
low_threshold, high_threshold = self._thresholds(user)
|
|
44
|
+
category_multipliers = {category: 1.0 for category in user.category_affinities}
|
|
45
|
+
amount_multipliers = {category: 1.0 for category in user.category_affinities}
|
|
46
|
+
|
|
47
|
+
for category, affinity in user.category_affinities.items():
|
|
48
|
+
category_multipliers[category] *= affinity
|
|
49
|
+
|
|
50
|
+
if phase == "early":
|
|
51
|
+
category_multipliers["shopping"] = category_multipliers.get("shopping", 1.0) * (1.05 + user.impulse_buying_score * 0.18)
|
|
52
|
+
category_multipliers["entertainment"] = category_multipliers.get("entertainment", 1.0) * (1.03 + user.entertainment_preference * 0.15)
|
|
53
|
+
elif phase == "late":
|
|
54
|
+
for category, multiplier in spending_profile.month_end_category_multipliers.items():
|
|
55
|
+
category_multipliers[category] = category_multipliers.get(category, 1.0) * multiplier
|
|
56
|
+
|
|
57
|
+
if state.balance <= low_threshold:
|
|
58
|
+
low_balance_multipliers = {
|
|
59
|
+
"entertainment": 0.10,
|
|
60
|
+
"shopping": 0.15,
|
|
61
|
+
"food": 0.40,
|
|
62
|
+
"coffee": 0.50,
|
|
63
|
+
"travel": 0.80,
|
|
64
|
+
"groceries": 1.20,
|
|
65
|
+
}
|
|
66
|
+
low_balance_amount_multipliers = {
|
|
67
|
+
"entertainment": 0.50,
|
|
68
|
+
"shopping": 0.50,
|
|
69
|
+
"food": 0.65,
|
|
70
|
+
"coffee": 0.75,
|
|
71
|
+
"travel": 0.90,
|
|
72
|
+
"groceries": 1.00,
|
|
73
|
+
}
|
|
74
|
+
for category, multiplier in low_balance_multipliers.items():
|
|
75
|
+
category_multipliers[category] = category_multipliers.get(category, 1.0) * multiplier
|
|
76
|
+
for category, multiplier in low_balance_amount_multipliers.items():
|
|
77
|
+
amount_multipliers[category] = amount_multipliers.get(category, 1.0) * multiplier
|
|
78
|
+
signal_state = "low"
|
|
79
|
+
frequency_multiplier = 0.26 if user.persona.value == "student" else 0.34
|
|
80
|
+
amount_multiplier = 0.46
|
|
81
|
+
max_discretionary_transactions = 1 if user.persona.value == "student" else 2
|
|
82
|
+
elif state.balance >= high_threshold:
|
|
83
|
+
for category, multiplier in spending_profile.high_balance_category_multipliers.items():
|
|
84
|
+
category_multipliers[category] = category_multipliers.get(category, 1.0) * multiplier
|
|
85
|
+
signal_state = "high"
|
|
86
|
+
frequency_multiplier = 1.10
|
|
87
|
+
amount_multiplier = 1.08
|
|
88
|
+
max_discretionary_transactions = None
|
|
89
|
+
else:
|
|
90
|
+
signal_state = "normal"
|
|
91
|
+
frequency_multiplier = 1.0
|
|
92
|
+
amount_multiplier = 1.0
|
|
93
|
+
max_discretionary_transactions = None
|
|
94
|
+
|
|
95
|
+
if overspend > 1.0:
|
|
96
|
+
pressure_discount = min((overspend - 1.0) * 0.35, 0.4)
|
|
97
|
+
frequency_multiplier *= 1.0 - pressure_discount
|
|
98
|
+
amount_multiplier *= 1.0 - pressure_discount * 0.8
|
|
99
|
+
category_multipliers["shopping"] = category_multipliers.get("shopping", 1.0) * 0.45
|
|
100
|
+
category_multipliers["entertainment"] = category_multipliers.get("entertainment", 1.0) * 0.55
|
|
101
|
+
category_multipliers["food"] = category_multipliers.get("food", 1.0) * 0.78
|
|
102
|
+
amount_multipliers["shopping"] = amount_multipliers.get("shopping", 1.0) * 0.7
|
|
103
|
+
amount_multipliers["entertainment"] = amount_multipliers.get("entertainment", 1.0) * 0.75
|
|
104
|
+
amount_multipliers["food"] = amount_multipliers.get("food", 1.0) * 0.85
|
|
105
|
+
|
|
106
|
+
return AdaptiveSpendingSignal(
|
|
107
|
+
state=signal_state,
|
|
108
|
+
frequency_multiplier=round(frequency_multiplier, 3),
|
|
109
|
+
amount_multiplier=round(amount_multiplier, 3),
|
|
110
|
+
category_multipliers={key: round(value, 3) for key, value in category_multipliers.items()},
|
|
111
|
+
amount_multipliers={key: round(value, 3) for key, value in amount_multipliers.items()},
|
|
112
|
+
overspend_pressure=round(overspend, 3),
|
|
113
|
+
lifecycle_phase=phase,
|
|
114
|
+
max_discretionary_transactions=max_discretionary_transactions,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
def _phase(self, day_of_month: int, days_in_month: int) -> str:
|
|
118
|
+
"""Map a day in month into a lifecycle phase."""
|
|
119
|
+
if day_of_month <= 7:
|
|
120
|
+
return "early"
|
|
121
|
+
if day_of_month >= max(days_in_month - 5, 25):
|
|
122
|
+
return "late"
|
|
123
|
+
return "mid"
|
|
124
|
+
|
|
125
|
+
def _thresholds(self, user: User) -> tuple[float, float]:
|
|
126
|
+
"""Return persona-level low and high balance thresholds."""
|
|
127
|
+
if user.persona.value == "student":
|
|
128
|
+
return 500.0, 5000.0
|
|
129
|
+
return 5000.0, 50000.0
|