finforge 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. finforge-1.0.0/LICENSE +21 -0
  2. finforge-1.0.0/MANIFEST.in +8 -0
  3. finforge-1.0.0/PKG-INFO +282 -0
  4. finforge-1.0.0/README.md +244 -0
  5. finforge-1.0.0/finforge/__init__.py +5 -0
  6. finforge-1.0.0/finforge/behavior/__init__.py +1 -0
  7. finforge-1.0.0/finforge/behavior/adaptive_spending.py +129 -0
  8. finforge-1.0.0/finforge/behavior/balance_awareness.py +94 -0
  9. finforge-1.0.0/finforge/behavior/budgeting.py +78 -0
  10. finforge-1.0.0/finforge/behavior/clustering.py +102 -0
  11. finforge-1.0.0/finforge/behavior/identity.py +168 -0
  12. finforge-1.0.0/finforge/behavior/lifecycle.py +64 -0
  13. finforge-1.0.0/finforge/behavior/merchant_affinity.py +126 -0
  14. finforge-1.0.0/finforge/behavior/overdraft.py +49 -0
  15. finforge-1.0.0/finforge/behavior/sessions.py +230 -0
  16. finforge-1.0.0/finforge/behavior/spending_patterns.py +152 -0
  17. finforge-1.0.0/finforge/behavior/subscriptions.py +70 -0
  18. finforge-1.0.0/finforge/core/__init__.py +1 -0
  19. finforge-1.0.0/finforge/core/config.py +31 -0
  20. finforge-1.0.0/finforge/core/constants.py +13 -0
  21. finforge-1.0.0/finforge/core/enums.py +17 -0
  22. finforge-1.0.0/finforge/core/models.py +68 -0
  23. finforge-1.0.0/finforge/dataset.py +142 -0
  24. finforge-1.0.0/finforge/exporters/__init__.py +1 -0
  25. finforge-1.0.0/finforge/exporters/csv_exporter.py +18 -0
  26. finforge-1.0.0/finforge/generators/__init__.py +1 -0
  27. finforge-1.0.0/finforge/generators/scheduler.py +34 -0
  28. finforge-1.0.0/finforge/generators/transaction_generator.py +417 -0
  29. finforge-1.0.0/finforge/generators/user_generator.py +76 -0
  30. finforge-1.0.0/finforge/llm/__init__.py +5 -0
  31. finforge-1.0.0/finforge/llm/interfaces.py +12 -0
  32. finforge-1.0.0/finforge/merchants/__init__.py +1 -0
  33. finforge-1.0.0/finforge/merchants/catalog.py +120 -0
  34. finforge-1.0.0/finforge/personas/__init__.py +1 -0
  35. finforge-1.0.0/finforge/personas/base.py +61 -0
  36. finforge-1.0.0/finforge/personas/salaried.py +103 -0
  37. finforge-1.0.0/finforge/personas/student.py +86 -0
  38. finforge-1.0.0/finforge/utils/__init__.py +1 -0
  39. finforge-1.0.0/finforge/utils/balances.py +10 -0
  40. finforge-1.0.0/finforge/utils/dates.py +30 -0
  41. finforge-1.0.0/finforge/utils/randomness.py +24 -0
  42. finforge-1.0.0/finforge.egg-info/PKG-INFO +282 -0
  43. finforge-1.0.0/finforge.egg-info/SOURCES.txt +54 -0
  44. finforge-1.0.0/finforge.egg-info/dependency_links.txt +1 -0
  45. finforge-1.0.0/finforge.egg-info/requires.txt +10 -0
  46. finforge-1.0.0/finforge.egg-info/top_level.txt +1 -0
  47. finforge-1.0.0/pyproject.toml +63 -0
  48. finforge-1.0.0/requirements.txt +6 -0
  49. finforge-1.0.0/setup.cfg +4 -0
  50. finforge-1.0.0/setup.py +6 -0
  51. finforge-1.0.0/tests/test_balances.py +8 -0
  52. finforge-1.0.0/tests/test_behavior.py +77 -0
  53. finforge-1.0.0/tests/test_generation.py +72 -0
  54. finforge-1.0.0/tests/test_identity.py +24 -0
  55. finforge-1.0.0/tests/test_personas.py +56 -0
  56. finforge-1.0.0/tests/test_seed_reproducibility.py +7 -0
finforge-1.0.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 FinForge contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,8 @@
1
+ include README.md
2
+ include LICENSE
3
+ include requirements.txt
4
+ include pyproject.toml
5
+ include setup.py
6
+ recursive-include finforge *.py
7
+ global-exclude __pycache__
8
+ global-exclude *.py[cod]
@@ -0,0 +1,282 @@
1
+ Metadata-Version: 2.4
2
+ Name: finforge
3
+ Version: 1.0.0
4
+ Summary: Synthetic financial transaction data generation with persona-driven behavior simulation.
5
+ Author: FinForge contributors
6
+ Maintainer: FinForge maintainers
7
+ License-Expression: MIT
8
+ Project-URL: Homepage, https://github.com/finforge/finforge
9
+ Project-URL: Repository, https://github.com/finforge/finforge
10
+ Project-URL: Issues, https://github.com/finforge/finforge/issues
11
+ Project-URL: Documentation, https://github.com/finforge/finforge#readme
12
+ Project-URL: Changelog, https://github.com/finforge/finforge/blob/main/CHANGELOG.md
13
+ Keywords: synthetic-data,finance,transactions,simulation,testing
14
+ Classifier: Development Status :: 5 - Production/Stable
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Intended Audience :: Financial and Insurance Industry
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Operating System :: OS Independent
22
+ Classifier: Topic :: Software Development :: Testing
23
+ Classifier: Topic :: Office/Business :: Financial
24
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
25
+ Requires-Python: >=3.11
26
+ Description-Content-Type: text/markdown
27
+ License-File: LICENSE
28
+ Requires-Dist: pandas>=2.0
29
+ Requires-Dist: numpy>=1.24
30
+ Requires-Dist: faker>=24.0
31
+ Requires-Dist: pydantic>=2.6
32
+ Requires-Dist: python-dateutil>=2.8
33
+ Provides-Extra: dev
34
+ Requires-Dist: pytest>=8.0; extra == "dev"
35
+ Requires-Dist: build>=1.2; extra == "dev"
36
+ Requires-Dist: twine>=5.0; extra == "dev"
37
+ Dynamic: license-file
38
+
39
+ # FinForge v1.0
40
+
41
+ FinForge is a synthetic financial transaction data generation framework for developers, QA teams, and analytics engineers who need realistic transaction datasets without using production customer records.
42
+
43
+ Unlike basic fake data libraries, FinForge focuses on behavioral simulation: persona-driven users, persistent financial identities, recurring cash flows, spending memory, merchant loyalty, monthly stress cycles, chronological balance updates, and deterministic reproducibility for testing and benchmarking.
44
+
45
+ ## Why FinForge v1.0 Is Different
46
+
47
+ FinForge v1.0 simulates persistent financial lives instead of generating isolated fake rows.
48
+
49
+ - Persistent user identity: each user carries a stable spending style, merchant loyalty profile, night activity score, and savings tendency.
50
+ - Temporal financial rhythm: salaries, transfers, bills, and subscriptions follow a repeatable monthly cadence.
51
+ - Realistic behavioral adaptation: low-balance users pull back discretionary spending, while stronger spenders show more weekend and late-night activity.
52
+ - Reproducible synthetic data: the same seed and config produce the same dataset, which makes FinForge practical for testing and benchmarking.
53
+
54
+ ## Problem Statement
55
+
56
+ Financial applications often need transaction histories that are:
57
+
58
+ - realistic enough to exercise business logic
59
+ - reproducible enough for automated testing
60
+ - structured enough for analytics experiments
61
+ - safe enough to share across teams
62
+
63
+ Most generic fake data tools generate isolated rows. Real financial systems need temporally consistent histories where balances evolve over time, transactions follow plausible cadence, and spending patterns reflect customer behavior.
64
+
65
+ FinForge addresses that gap.
66
+
67
+ ## Features
68
+
69
+ - Synthetic user generation with configurable personas
70
+ - Persona-driven transaction generation with persistent user habits
71
+ - Persistent user identity traits such as spending style, merchant loyalty, and commute pattern
72
+ - Chronologically ordered event simulation
73
+ - Deterministic seed support for reproducible datasets
74
+ - Realistic recurring events like salary, rent, and subscriptions
75
+ - Merchant/category consistency with merchant affinity reuse
76
+ - Weekend vs weekday spending behavior
77
+ - Balance-aware suppression of discretionary spending
78
+ - Spending memory and overspend suppression
79
+ - Dedicated subscription engine with once-per-month recurrence
80
+ - Explicit overdraft metadata and configurable negative-balance handling
81
+ - Month-end spending compression and salary-cycle effects
82
+ - Clustered daily transaction bursts that feel session-like
83
+ - Spending-style frequency calibration for `minimalist`, `budget_conscious`, `lifestyle_spender`, and `impulsive_student`
84
+ - Running balance tracking
85
+ - Pandas DataFrame output
86
+ - CSV export utilities
87
+
88
+ ## Installation
89
+
90
+ ```bash
91
+ pip install -e .
92
+ ```
93
+
94
+ Or install dependencies manually:
95
+
96
+ ```bash
97
+ pip install -r requirements.txt
98
+ ```
99
+
100
+ ## Quickstart
101
+
102
+ ```python
103
+ from finforge import DatasetGenerator
104
+
105
+ dataset = (
106
+ DatasetGenerator(seed=42)
107
+ .with_users(100)
108
+ .with_persona("salaried")
109
+ .for_months(6)
110
+ .generate()
111
+ )
112
+
113
+ print(dataset.head())
114
+ ```
115
+
116
+ Export to CSV:
117
+
118
+ ```python
119
+ from finforge import DatasetGenerator
120
+
121
+ generator = (
122
+ DatasetGenerator(seed=42)
123
+ .with_users(25)
124
+ .with_persona("student")
125
+ .for_months(3)
126
+ )
127
+
128
+ dataset = generator.generate()
129
+ generator.export_csv("student_transactions.csv")
130
+ ```
131
+
132
+ The public API remains fluent and backward-compatible:
133
+
134
+ ```python
135
+ from finforge import DatasetGenerator
136
+
137
+ dataset = (
138
+ DatasetGenerator(seed=101)
139
+ .with_users(3)
140
+ .with_persona("student")
141
+ .for_months(2)
142
+ .generate()
143
+ )
144
+
145
+ dataset.to_csv("transactionsBehaviour.csv", index=False)
146
+ ```
147
+
148
+ Overdraft controls are configurable without changing the public API shape:
149
+
150
+ ```python
151
+ dataset = (
152
+ DatasetGenerator(seed=7)
153
+ .with_users(10)
154
+ .with_persona("student")
155
+ .for_months(2)
156
+ .prevent_negative_balance(True)
157
+ .with_overdraft(0.0)
158
+ .generate()
159
+ )
160
+ ```
161
+
162
+ ## Architecture Overview
163
+
164
+ FinForge is organized into small, composable modules:
165
+
166
+ - `finforge.core`: shared models, enums, constants, and configuration
167
+ - `finforge.personas`: persona definitions and behavioral profiles
168
+ - `finforge.generators`: user generation, scheduling, and transaction generation
169
+ - `finforge.merchants`: category-safe merchant catalog
170
+ - `finforge.utils`: randomness, dates, and balance helpers
171
+ - `finforge.exporters`: output adapters such as CSV
172
+ - `finforge.dataset`: fluent public API surface
173
+
174
+ The v1.0 architecture keeps future local-model extensions possible while keeping all LLM-related functionality out of the runtime path for now.
175
+
176
+ Behavioral simulation components live under `finforge.behavior`:
177
+
178
+ - `identity.py`: long-lived user behavioral identities
179
+ - `merchant_affinity.py`: persistent merchant preferences and reuse weights
180
+ - `adaptive_spending.py`: liquidity and overspend-aware daily spending controls
181
+ - `subscriptions.py`: dedicated subscription assignment and stable monthly pricing
182
+ - `overdraft.py`: explicit negative-balance policy decisions
183
+ - `budgeting.py`: rolling budget memory and spending pressure
184
+ - `lifecycle.py`: monthly cashflow rhythm and student irregular inflows
185
+ - `sessions.py`: grouped temporal spending sessions
186
+
187
+ ## Example Output
188
+
189
+ Example generated schema:
190
+
191
+ | transaction_id | user_id | timestamp | merchant | category | amount | spending_style | is_subscription | recurrence_type | balance_state | session_id |
192
+ | --- | --- | --- | --- | --- | ---: | --- | --- | --- | --- | --- |
193
+ | txn_000001 | user_000001 | 2026-01-01 09:14:00 | Acme Payroll | income | 5840.00 | budget_conscious | False | income | normal | |
194
+ | txn_000002 | user_000001 | 2026-01-03 10:05:00 | Green Residency | housing | -1450.00 | budget_conscious | False | bill | normal | |
195
+ | txn_000003 | user_000001 | 2026-01-05 20:11:00 | Netflix | subscription | -649.00 | budget_conscious | True | subscription | normal | |
196
+
197
+ Typical generated behavior now includes:
198
+
199
+ - recurring salary and bill cadence near the beginning of each month
200
+ - subscriptions generated only by the recurring engine, never by random entertainment spending
201
+ - exactly one subscription row per assigned merchant per simulated month
202
+ - repeated use of a user’s preferred merchants
203
+ - persistent user styles such as `budget_conscious`, `lifestyle_spender`, and `impulsive_student`
204
+ - stronger commute and coffee activity on weekdays
205
+ - more entertainment and food delivery on weekends
206
+ - student late-night activity and irregular top-up inflows
207
+ - smaller discretionary tickets when balances run low
208
+ - behavioral pullback after recent overspending
209
+ - overdrafts either prevented or explicitly marked with `is_overdraft` and `overdraft_amount`
210
+ - clustered bursts such as `Uber -> Coffee -> Lunch`
211
+
212
+ ## Metadata Columns
213
+
214
+ Generated transaction rows include behavioral metadata that is useful for testing and downstream modeling:
215
+
216
+ - `persona`
217
+ - `spending_style`
218
+ - `savings_tendency`
219
+ - `merchant_loyalty`
220
+ - `impulse_buying_score`
221
+ - `lifestyle_score`
222
+ - `night_activity_score`
223
+ - `is_recurring`
224
+ - `is_subscription`
225
+ - `is_discretionary`
226
+ - `recurrence_type`
227
+ - `session_id`
228
+ - `day_type`
229
+ - `balance_state`
230
+ - `is_overdraft`
231
+ - `overdraft_amount`
232
+
233
+ ## Testing Guarantees
234
+
235
+ The v1.0 test suite verifies:
236
+
237
+ - balance integrity on every row
238
+ - chronological ordering per user
239
+ - seed reproducibility
240
+ - subscription recurrence and amount stability
241
+ - low-balance discretionary suppression
242
+ - reasonable session-linked rates
243
+ - merchant-category consistency
244
+ - required behavioral metadata columns
245
+ - explicit overdraft marking whenever balances go negative
246
+
247
+ ## Roadmap
248
+
249
+ - Additional personas for freelancers, retirees, and small business owners
250
+ - More nuanced cash flow events and seasonal behavior
251
+ - Local Ollama-backed narrative and explanation modules
252
+ - Richer export formats and scenario presets
253
+ - Extended validation and benchmarking datasets
254
+
255
+ ## Contributing
256
+
257
+ Contributions are welcome. Good first contributions include:
258
+
259
+ - new persona modules
260
+ - expanded merchant catalogs
261
+ - improved temporal rules
262
+ - additional exporters
263
+ - stronger test coverage
264
+
265
+ To contribute:
266
+
267
+ 1. Fork the repository
268
+ 2. Create a feature branch
269
+ 3. Add tests for behavior changes
270
+ 4. Run `pytest`
271
+ 5. Open a pull request with a clear description of the use case
272
+
273
+ ## Development
274
+
275
+ ```bash
276
+ pip install -e .[dev]
277
+ pytest
278
+ ```
279
+
280
+ ## License
281
+
282
+ MIT
@@ -0,0 +1,244 @@
1
+ # FinForge v1.0
2
+
3
+ FinForge is a synthetic financial transaction data generation framework for developers, QA teams, and analytics engineers who need realistic transaction datasets without using production customer records.
4
+
5
+ Unlike basic fake data libraries, FinForge focuses on behavioral simulation: persona-driven users, persistent financial identities, recurring cash flows, spending memory, merchant loyalty, monthly stress cycles, chronological balance updates, and deterministic reproducibility for testing and benchmarking.
6
+
7
+ ## Why FinForge v1.0 Is Different
8
+
9
+ FinForge v1.0 simulates persistent financial lives instead of generating isolated fake rows.
10
+
11
+ - Persistent user identity: each user carries a stable spending style, merchant loyalty profile, night activity score, and savings tendency.
12
+ - Temporal financial rhythm: salaries, transfers, bills, and subscriptions follow a repeatable monthly cadence.
13
+ - Realistic behavioral adaptation: low-balance users pull back discretionary spending, while stronger spenders show more weekend and late-night activity.
14
+ - Reproducible synthetic data: the same seed and config produce the same dataset, which makes FinForge practical for testing and benchmarking.
15
+
16
+ ## Problem Statement
17
+
18
+ Financial applications often need transaction histories that are:
19
+
20
+ - realistic enough to exercise business logic
21
+ - reproducible enough for automated testing
22
+ - structured enough for analytics experiments
23
+ - safe enough to share across teams
24
+
25
+ Most generic fake data tools generate isolated rows. Real financial systems need temporally consistent histories where balances evolve over time, transactions follow plausible cadence, and spending patterns reflect customer behavior.
26
+
27
+ FinForge addresses that gap.
28
+
29
+ ## Features
30
+
31
+ - Synthetic user generation with configurable personas
32
+ - Persona-driven transaction generation with persistent user habits
33
+ - Persistent user identity traits such as spending style, merchant loyalty, and commute pattern
34
+ - Chronologically ordered event simulation
35
+ - Deterministic seed support for reproducible datasets
36
+ - Realistic recurring events like salary, rent, and subscriptions
37
+ - Merchant/category consistency with merchant affinity reuse
38
+ - Weekend vs weekday spending behavior
39
+ - Balance-aware suppression of discretionary spending
40
+ - Spending memory and overspend suppression
41
+ - Dedicated subscription engine with once-per-month recurrence
42
+ - Explicit overdraft metadata and configurable negative-balance handling
43
+ - Month-end spending compression and salary-cycle effects
44
+ - Clustered daily transaction bursts that feel session-like
45
+ - Spending-style frequency calibration for `minimalist`, `budget_conscious`, `lifestyle_spender`, and `impulsive_student`
46
+ - Running balance tracking
47
+ - Pandas DataFrame output
48
+ - CSV export utilities
49
+
50
+ ## Installation
51
+
52
+ ```bash
53
+ pip install -e .
54
+ ```
55
+
56
+ Or install dependencies manually:
57
+
58
+ ```bash
59
+ pip install -r requirements.txt
60
+ ```
61
+
62
+ ## Quickstart
63
+
64
+ ```python
65
+ from finforge import DatasetGenerator
66
+
67
+ dataset = (
68
+ DatasetGenerator(seed=42)
69
+ .with_users(100)
70
+ .with_persona("salaried")
71
+ .for_months(6)
72
+ .generate()
73
+ )
74
+
75
+ print(dataset.head())
76
+ ```
77
+
78
+ Export to CSV:
79
+
80
+ ```python
81
+ from finforge import DatasetGenerator
82
+
83
+ generator = (
84
+ DatasetGenerator(seed=42)
85
+ .with_users(25)
86
+ .with_persona("student")
87
+ .for_months(3)
88
+ )
89
+
90
+ dataset = generator.generate()
91
+ generator.export_csv("student_transactions.csv")
92
+ ```
93
+
94
+ The public API remains fluent and backward-compatible:
95
+
96
+ ```python
97
+ from finforge import DatasetGenerator
98
+
99
+ dataset = (
100
+ DatasetGenerator(seed=101)
101
+ .with_users(3)
102
+ .with_persona("student")
103
+ .for_months(2)
104
+ .generate()
105
+ )
106
+
107
+ dataset.to_csv("transactionsBehaviour.csv", index=False)
108
+ ```
109
+
110
+ Overdraft controls are configurable without changing the public API shape:
111
+
112
+ ```python
113
+ dataset = (
114
+ DatasetGenerator(seed=7)
115
+ .with_users(10)
116
+ .with_persona("student")
117
+ .for_months(2)
118
+ .prevent_negative_balance(True)
119
+ .with_overdraft(0.0)
120
+ .generate()
121
+ )
122
+ ```
123
+
124
+ ## Architecture Overview
125
+
126
+ FinForge is organized into small, composable modules:
127
+
128
+ - `finforge.core`: shared models, enums, constants, and configuration
129
+ - `finforge.personas`: persona definitions and behavioral profiles
130
+ - `finforge.generators`: user generation, scheduling, and transaction generation
131
+ - `finforge.merchants`: category-safe merchant catalog
132
+ - `finforge.utils`: randomness, dates, and balance helpers
133
+ - `finforge.exporters`: output adapters such as CSV
134
+ - `finforge.dataset`: fluent public API surface
135
+
136
+ The v1.0 architecture keeps future local-model extensions possible while keeping all LLM-related functionality out of the runtime path for now.
137
+
138
+ Behavioral simulation components live under `finforge.behavior`:
139
+
140
+ - `identity.py`: long-lived user behavioral identities
141
+ - `merchant_affinity.py`: persistent merchant preferences and reuse weights
142
+ - `adaptive_spending.py`: liquidity and overspend-aware daily spending controls
143
+ - `subscriptions.py`: dedicated subscription assignment and stable monthly pricing
144
+ - `overdraft.py`: explicit negative-balance policy decisions
145
+ - `budgeting.py`: rolling budget memory and spending pressure
146
+ - `lifecycle.py`: monthly cashflow rhythm and student irregular inflows
147
+ - `sessions.py`: grouped temporal spending sessions
148
+
149
+ ## Example Output
150
+
151
+ Example generated schema:
152
+
153
+ | transaction_id | user_id | timestamp | merchant | category | amount | spending_style | is_subscription | recurrence_type | balance_state | session_id |
154
+ | --- | --- | --- | --- | --- | ---: | --- | --- | --- | --- | --- |
155
+ | txn_000001 | user_000001 | 2026-01-01 09:14:00 | Acme Payroll | income | 5840.00 | budget_conscious | False | income | normal | |
156
+ | txn_000002 | user_000001 | 2026-01-03 10:05:00 | Green Residency | housing | -1450.00 | budget_conscious | False | bill | normal | |
157
+ | txn_000003 | user_000001 | 2026-01-05 20:11:00 | Netflix | subscription | -649.00 | budget_conscious | True | subscription | normal | |
158
+
159
+ Typical generated behavior now includes:
160
+
161
+ - recurring salary and bill cadence near the beginning of each month
162
+ - subscriptions generated only by the recurring engine, never by random entertainment spending
163
+ - exactly one subscription row per assigned merchant per simulated month
164
+ - repeated use of a user’s preferred merchants
165
+ - persistent user styles such as `budget_conscious`, `lifestyle_spender`, and `impulsive_student`
166
+ - stronger commute and coffee activity on weekdays
167
+ - more entertainment and food delivery on weekends
168
+ - student late-night activity and irregular top-up inflows
169
+ - smaller discretionary tickets when balances run low
170
+ - behavioral pullback after recent overspending
171
+ - overdrafts either prevented or explicitly marked with `is_overdraft` and `overdraft_amount`
172
+ - clustered bursts such as `Uber -> Coffee -> Lunch`
173
+
174
+ ## Metadata Columns
175
+
176
+ Generated transaction rows include behavioral metadata that is useful for testing and downstream modeling:
177
+
178
+ - `persona`
179
+ - `spending_style`
180
+ - `savings_tendency`
181
+ - `merchant_loyalty`
182
+ - `impulse_buying_score`
183
+ - `lifestyle_score`
184
+ - `night_activity_score`
185
+ - `is_recurring`
186
+ - `is_subscription`
187
+ - `is_discretionary`
188
+ - `recurrence_type`
189
+ - `session_id`
190
+ - `day_type`
191
+ - `balance_state`
192
+ - `is_overdraft`
193
+ - `overdraft_amount`
194
+
195
+ ## Testing Guarantees
196
+
197
+ The v1.0 test suite verifies:
198
+
199
+ - balance integrity on every row
200
+ - chronological ordering per user
201
+ - seed reproducibility
202
+ - subscription recurrence and amount stability
203
+ - low-balance discretionary suppression
204
+ - reasonable session-linked rates
205
+ - merchant-category consistency
206
+ - required behavioral metadata columns
207
+ - explicit overdraft marking whenever balances go negative
208
+
209
+ ## Roadmap
210
+
211
+ - Additional personas for freelancers, retirees, and small business owners
212
+ - More nuanced cash flow events and seasonal behavior
213
+ - Local Ollama-backed narrative and explanation modules
214
+ - Richer export formats and scenario presets
215
+ - Extended validation and benchmarking datasets
216
+
217
+ ## Contributing
218
+
219
+ Contributions are welcome. Good first contributions include:
220
+
221
+ - new persona modules
222
+ - expanded merchant catalogs
223
+ - improved temporal rules
224
+ - additional exporters
225
+ - stronger test coverage
226
+
227
+ To contribute:
228
+
229
+ 1. Fork the repository
230
+ 2. Create a feature branch
231
+ 3. Add tests for behavior changes
232
+ 4. Run `pytest`
233
+ 5. Open a pull request with a clear description of the use case
234
+
235
+ ## Development
236
+
237
+ ```bash
238
+ pip install -e .[dev]
239
+ pytest
240
+ ```
241
+
242
+ ## License
243
+
244
+ MIT
@@ -0,0 +1,5 @@
1
+ """Public package exports for FinForge."""
2
+
3
+ from finforge.dataset import DatasetGenerator
4
+
5
+ __all__ = ["DatasetGenerator"]
@@ -0,0 +1 @@
1
+ """Behavioral simulation components for FinForge."""
@@ -0,0 +1,129 @@
1
+ """Adaptive spending adjustments based on balance, budget, and lifecycle."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+
7
+ from finforge.behavior.budgeting import BudgetingEngine, UserBudgetState
8
+ from finforge.core.models import User
9
+ from finforge.personas.base import SpendingProfile
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class AdaptiveSpendingSignal:
14
+ """Per-day spending signal derived from balance and memory."""
15
+
16
+ state: str
17
+ frequency_multiplier: float
18
+ amount_multiplier: float
19
+ category_multipliers: dict[str, float]
20
+ amount_multipliers: dict[str, float]
21
+ overspend_pressure: float
22
+ lifecycle_phase: str
23
+ max_discretionary_transactions: int | None
24
+
25
+
26
+ class AdaptiveSpendingEngine:
27
+ """Combines balance, month phase, and recent spend memory."""
28
+
29
+ def __init__(self, budgeting_engine: BudgetingEngine) -> None:
30
+ self.budgeting_engine = budgeting_engine
31
+
32
+ def assess(
33
+ self,
34
+ user: User,
35
+ state: UserBudgetState,
36
+ day_of_month: int,
37
+ days_in_month: int,
38
+ spending_profile: SpendingProfile,
39
+ ) -> AdaptiveSpendingSignal:
40
+ """Compute day-level adaptive spending controls."""
41
+ phase = self._phase(day_of_month, days_in_month)
42
+ overspend = self.budgeting_engine.overspend_pressure(state)
43
+ low_threshold, high_threshold = self._thresholds(user)
44
+ category_multipliers = {category: 1.0 for category in user.category_affinities}
45
+ amount_multipliers = {category: 1.0 for category in user.category_affinities}
46
+
47
+ for category, affinity in user.category_affinities.items():
48
+ category_multipliers[category] *= affinity
49
+
50
+ if phase == "early":
51
+ category_multipliers["shopping"] = category_multipliers.get("shopping", 1.0) * (1.05 + user.impulse_buying_score * 0.18)
52
+ category_multipliers["entertainment"] = category_multipliers.get("entertainment", 1.0) * (1.03 + user.entertainment_preference * 0.15)
53
+ elif phase == "late":
54
+ for category, multiplier in spending_profile.month_end_category_multipliers.items():
55
+ category_multipliers[category] = category_multipliers.get(category, 1.0) * multiplier
56
+
57
+ if state.balance <= low_threshold:
58
+ low_balance_multipliers = {
59
+ "entertainment": 0.10,
60
+ "shopping": 0.15,
61
+ "food": 0.40,
62
+ "coffee": 0.50,
63
+ "travel": 0.80,
64
+ "groceries": 1.20,
65
+ }
66
+ low_balance_amount_multipliers = {
67
+ "entertainment": 0.50,
68
+ "shopping": 0.50,
69
+ "food": 0.65,
70
+ "coffee": 0.75,
71
+ "travel": 0.90,
72
+ "groceries": 1.00,
73
+ }
74
+ for category, multiplier in low_balance_multipliers.items():
75
+ category_multipliers[category] = category_multipliers.get(category, 1.0) * multiplier
76
+ for category, multiplier in low_balance_amount_multipliers.items():
77
+ amount_multipliers[category] = amount_multipliers.get(category, 1.0) * multiplier
78
+ signal_state = "low"
79
+ frequency_multiplier = 0.26 if user.persona.value == "student" else 0.34
80
+ amount_multiplier = 0.46
81
+ max_discretionary_transactions = 1 if user.persona.value == "student" else 2
82
+ elif state.balance >= high_threshold:
83
+ for category, multiplier in spending_profile.high_balance_category_multipliers.items():
84
+ category_multipliers[category] = category_multipliers.get(category, 1.0) * multiplier
85
+ signal_state = "high"
86
+ frequency_multiplier = 1.10
87
+ amount_multiplier = 1.08
88
+ max_discretionary_transactions = None
89
+ else:
90
+ signal_state = "normal"
91
+ frequency_multiplier = 1.0
92
+ amount_multiplier = 1.0
93
+ max_discretionary_transactions = None
94
+
95
+ if overspend > 1.0:
96
+ pressure_discount = min((overspend - 1.0) * 0.35, 0.4)
97
+ frequency_multiplier *= 1.0 - pressure_discount
98
+ amount_multiplier *= 1.0 - pressure_discount * 0.8
99
+ category_multipliers["shopping"] = category_multipliers.get("shopping", 1.0) * 0.45
100
+ category_multipliers["entertainment"] = category_multipliers.get("entertainment", 1.0) * 0.55
101
+ category_multipliers["food"] = category_multipliers.get("food", 1.0) * 0.78
102
+ amount_multipliers["shopping"] = amount_multipliers.get("shopping", 1.0) * 0.7
103
+ amount_multipliers["entertainment"] = amount_multipliers.get("entertainment", 1.0) * 0.75
104
+ amount_multipliers["food"] = amount_multipliers.get("food", 1.0) * 0.85
105
+
106
+ return AdaptiveSpendingSignal(
107
+ state=signal_state,
108
+ frequency_multiplier=round(frequency_multiplier, 3),
109
+ amount_multiplier=round(amount_multiplier, 3),
110
+ category_multipliers={key: round(value, 3) for key, value in category_multipliers.items()},
111
+ amount_multipliers={key: round(value, 3) for key, value in amount_multipliers.items()},
112
+ overspend_pressure=round(overspend, 3),
113
+ lifecycle_phase=phase,
114
+ max_discretionary_transactions=max_discretionary_transactions,
115
+ )
116
+
117
+ def _phase(self, day_of_month: int, days_in_month: int) -> str:
118
+ """Map a day in month into a lifecycle phase."""
119
+ if day_of_month <= 7:
120
+ return "early"
121
+ if day_of_month >= max(days_in_month - 5, 25):
122
+ return "late"
123
+ return "mid"
124
+
125
+ def _thresholds(self, user: User) -> tuple[float, float]:
126
+ """Return persona-level low and high balance thresholds."""
127
+ if user.persona.value == "student":
128
+ return 500.0, 5000.0
129
+ return 5000.0, 50000.0