opentaxonomy 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- opentaxonomy-0.1.0/.gitignore +34 -0
- opentaxonomy-0.1.0/PKG-INFO +130 -0
- opentaxonomy-0.1.0/README.md +111 -0
- opentaxonomy-0.1.0/files/README.md +66 -0
- opentaxonomy-0.1.0/files/digital_subscriptions.yaml +31 -0
- opentaxonomy-0.1.0/files/dining_out.yaml +28 -0
- opentaxonomy-0.1.0/files/discretionary.yaml +30 -0
- opentaxonomy-0.1.0/files/expenditure.yaml +27 -0
- opentaxonomy-0.1.0/files/groceries.yaml +22 -0
- opentaxonomy-0.1.0/files/placement_map.yaml +307 -0
- opentaxonomy-0.1.0/files/prima-seed.yaml +167 -0
- opentaxonomy-0.1.0/files/seed.yaml +102 -0
- opentaxonomy-0.1.0/pyproject.toml +33 -0
- opentaxonomy-0.1.0/src/opentaxonomy/__init__.py +1 -0
- opentaxonomy-0.1.0/src/opentaxonomy/cli.py +105 -0
- opentaxonomy-0.1.0/src/opentaxonomy/commands/__init__.py +0 -0
- opentaxonomy-0.1.0/src/opentaxonomy/commands/create.py +97 -0
- opentaxonomy-0.1.0/src/opentaxonomy/commands/run.py +82 -0
- opentaxonomy-0.1.0/src/opentaxonomy/io/__init__.py +0 -0
- opentaxonomy-0.1.0/src/opentaxonomy/io/base.py +15 -0
- opentaxonomy-0.1.0/src/opentaxonomy/io/db_sources.py +40 -0
- opentaxonomy-0.1.0/src/opentaxonomy/io/file_sources.py +73 -0
- opentaxonomy-0.1.0/src/opentaxonomy/llm/__init__.py +0 -0
- opentaxonomy-0.1.0/src/opentaxonomy/llm/client.py +62 -0
- opentaxonomy-0.1.0/src/opentaxonomy/llm/create_flow.py +389 -0
- opentaxonomy-0.1.0/src/opentaxonomy/llm/prompts.py +180 -0
- opentaxonomy-0.1.0/src/opentaxonomy/llm/run_flow.py +190 -0
- opentaxonomy-0.1.0/src/opentaxonomy/llm/schemas.py +91 -0
- opentaxonomy-0.1.0/src/opentaxonomy/utils/__init__.py +0 -0
- opentaxonomy-0.1.0/src/opentaxonomy/utils/canonical_id.py +15 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.egg-info/
|
|
5
|
+
*.egg
|
|
6
|
+
dist/
|
|
7
|
+
build/
|
|
8
|
+
.eggs/
|
|
9
|
+
|
|
10
|
+
# Virtual environments
|
|
11
|
+
.venv/
|
|
12
|
+
venv/
|
|
13
|
+
env/
|
|
14
|
+
|
|
15
|
+
# Distribution
|
|
16
|
+
dist/
|
|
17
|
+
|
|
18
|
+
# Environment
|
|
19
|
+
.env
|
|
20
|
+
.env.*
|
|
21
|
+
|
|
22
|
+
# IDE
|
|
23
|
+
.vscode/
|
|
24
|
+
.idea/
|
|
25
|
+
|
|
26
|
+
# Taxonomy output (generated data — not source)
|
|
27
|
+
taxonomy/
|
|
28
|
+
|
|
29
|
+
# Claude Code
|
|
30
|
+
.claude/
|
|
31
|
+
|
|
32
|
+
# OS
|
|
33
|
+
.DS_Store
|
|
34
|
+
Thumbs.db
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: opentaxonomy
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: LLM-powered semantic taxonomy generator for raw categorical data
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Requires-Dist: anthropic>=0.40.0
|
|
8
|
+
Requires-Dist: click>=8.1.0
|
|
9
|
+
Requires-Dist: openpyxl>=3.1.0
|
|
10
|
+
Requires-Dist: pandas>=2.0.0
|
|
11
|
+
Requires-Dist: pyarrow>=14.0.0
|
|
12
|
+
Requires-Dist: pydantic>=2.0.0
|
|
13
|
+
Requires-Dist: pyyaml>=6.0
|
|
14
|
+
Requires-Dist: rich>=13.0.0
|
|
15
|
+
Requires-Dist: sqlalchemy>=2.0.0
|
|
16
|
+
Provides-Extra: dev
|
|
17
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# OpenTaxonomy
|
|
21
|
+
|
|
22
|
+
LLM-powered semantic taxonomy generator for raw categorical data.
|
|
23
|
+
|
|
24
|
+
## What it does
|
|
25
|
+
|
|
26
|
+
OpenTaxonomy takes a column of messy raw values (bank transactions, product names, survey responses — anything) and generates a structured semantic taxonomy from it using Claude as the reasoning engine.
|
|
27
|
+
|
|
28
|
+
**Output for each value:**
|
|
29
|
+
- `{column}_normalized` — cleaned entity name (e.g. `"REWE SAGT DANKE 46654184/..."` → `"REWE"`)
|
|
30
|
+
- `canonical_id` — taxonomy path (e.g. `ft.expenditure.variable.groceries`)
|
|
31
|
+
|
|
32
|
+
The taxonomy itself is a set of YAML files — a `seed.yaml` capturing the domain structure, one `node.yaml` per tree node (each an ontological contract with inclusion/exclusion criteria and a decision record), and a `placement_map.yaml` that is the source of truth for all mappings.
|
|
33
|
+
|
|
34
|
+
## Architecture
|
|
35
|
+
|
|
36
|
+
The core is the **Prima Seed** — a universal questioning protocol that generates domain-specific taxonomic trees from any categorical data:
|
|
37
|
+
|
|
38
|
+
- **Q0** Identify the Form: what unifies all values?
|
|
39
|
+
- **Q0b** Establish context: what operational realm governs classification?
|
|
40
|
+
- **Q1** Primary differentiation: the most essential splitting criterion
|
|
41
|
+
- **Q2** Recursive differentiation: applied per branch at each level
|
|
42
|
+
- **Q3** Dialectical check: values that resist placement expose flawed criteria
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install opentaxonomy
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Requires Python 3.11+ and an [Anthropic API key](https://console.anthropic.com/).
|
|
51
|
+
|
|
52
|
+
## Usage
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
export ANTHROPIC_API_KEY=sk-ant-...
|
|
56
|
+
|
|
57
|
+
# Generate a new taxonomy from raw data
|
|
58
|
+
opentaxonomy create -i transactions.csv -c description -o ./taxonomy
|
|
59
|
+
|
|
60
|
+
# Place new/unseen values into an existing taxonomy
|
|
61
|
+
opentaxonomy run -i new_data.csv -c description -s ./taxonomy
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### Supported input formats
|
|
65
|
+
|
|
66
|
+
| Format | Example |
|
|
67
|
+
|--------|---------|
|
|
68
|
+
| CSV / TSV | `data.csv`, `data.tsv` |
|
|
69
|
+
| JSON | `data.json` |
|
|
70
|
+
| Excel | `data.xlsx` |
|
|
71
|
+
| Parquet | `data.parquet` |
|
|
72
|
+
| Database | `postgresql://user:pass@host/db` + `--db-table` |
|
|
73
|
+
|
|
74
|
+
### Options
|
|
75
|
+
|
|
76
|
+
```
|
|
77
|
+
opentaxonomy create
|
|
78
|
+
-i, --input Input file or database connection string [required]
|
|
79
|
+
-c, --column Column containing raw values to classify [required]
|
|
80
|
+
-o, --output-dir Where to write taxonomy files [default: ./taxonomy]
|
|
81
|
+
--domain-hint Optional hint to guide the LLM (e.g. "German grocery products")
|
|
82
|
+
--model Claude model [default: claude-sonnet-4-6]
|
|
83
|
+
--api-key Anthropic API key (or set ANTHROPIC_API_KEY)
|
|
84
|
+
|
|
85
|
+
opentaxonomy run
|
|
86
|
+
-i, --input Input file or database connection string [required]
|
|
87
|
+
-c, --column Column containing raw values to classify [required]
|
|
88
|
+
-s, --seed-dir Directory with seed.yaml and placement_map.yaml [default: ./taxonomy]
|
|
89
|
+
--model Claude model [default: claude-sonnet-4-6]
|
|
90
|
+
--api-key Anthropic API key (or set ANTHROPIC_API_KEY)
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
## Output structure
|
|
94
|
+
|
|
95
|
+
```
|
|
96
|
+
taxonomy/
|
|
97
|
+
├── seed.yaml # Domain seed: context, levels, edge cases
|
|
98
|
+
├── placement_map.yaml # Raw values → canonical IDs (source of truth)
|
|
99
|
+
└── nodes/
|
|
100
|
+
├── root.yaml # Root node
|
|
101
|
+
├── expenditure.yaml # Internal node with decision record
|
|
102
|
+
├── groceries.yaml # Leaf node with inclusion/exclusion criteria
|
|
103
|
+
└── ...
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
Each node file is an ontological contract:
|
|
107
|
+
|
|
108
|
+
```yaml
|
|
109
|
+
node: Groceries
|
|
110
|
+
canonical_id: ft.expenditure.variable.groceries
|
|
111
|
+
question: Is this a purchase of food or household staples from a supermarket or grocery store?
|
|
112
|
+
criteria:
|
|
113
|
+
includes:
|
|
114
|
+
- Supermarket purchases (REWE, LIDL, EDEKA, etc.)
|
|
115
|
+
- Organic/bio market purchases
|
|
116
|
+
excludes:
|
|
117
|
+
- Restaurant meals
|
|
118
|
+
- Drugstore purchases unless food items
|
|
119
|
+
edge_cases:
|
|
120
|
+
- term: REWE TO GO
|
|
121
|
+
resolution: Included — still a grocery/convenience purchase
|
|
122
|
+
decided: true
|
|
123
|
+
parent: Variable Necessities
|
|
124
|
+
children: []
|
|
125
|
+
version: 1.0.0
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## License
|
|
129
|
+
|
|
130
|
+
MIT
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# OpenTaxonomy
|
|
2
|
+
|
|
3
|
+
LLM-powered semantic taxonomy generator for raw categorical data.
|
|
4
|
+
|
|
5
|
+
## What it does
|
|
6
|
+
|
|
7
|
+
OpenTaxonomy takes a column of messy raw values (bank transactions, product names, survey responses — anything) and generates a structured semantic taxonomy from it using Claude as the reasoning engine.
|
|
8
|
+
|
|
9
|
+
**Output for each value:**
|
|
10
|
+
- `{column}_normalized` — cleaned entity name (e.g. `"REWE SAGT DANKE 46654184/..."` → `"REWE"`)
|
|
11
|
+
- `canonical_id` — taxonomy path (e.g. `ft.expenditure.variable.groceries`)
|
|
12
|
+
|
|
13
|
+
The taxonomy itself is a set of YAML files — a `seed.yaml` capturing the domain structure, one `node.yaml` per tree node (each an ontological contract with inclusion/exclusion criteria and a decision record), and a `placement_map.yaml` that is the source of truth for all mappings.
|
|
14
|
+
|
|
15
|
+
## Architecture
|
|
16
|
+
|
|
17
|
+
The core is the **Prima Seed** — a universal questioning protocol that generates domain-specific taxonomic trees from any categorical data:
|
|
18
|
+
|
|
19
|
+
- **Q0** Identify the Form: what unifies all values?
|
|
20
|
+
- **Q0b** Establish context: what operational realm governs classification?
|
|
21
|
+
- **Q1** Primary differentiation: the most essential splitting criterion
|
|
22
|
+
- **Q2** Recursive differentiation: applied per branch at each level
|
|
23
|
+
- **Q3** Dialectical check: values that resist placement expose flawed criteria
|
|
24
|
+
|
|
25
|
+
## Installation
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install opentaxonomy
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
Requires Python 3.11+ and an [Anthropic API key](https://console.anthropic.com/).
|
|
32
|
+
|
|
33
|
+
## Usage
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
export ANTHROPIC_API_KEY=sk-ant-...
|
|
37
|
+
|
|
38
|
+
# Generate a new taxonomy from raw data
|
|
39
|
+
opentaxonomy create -i transactions.csv -c description -o ./taxonomy
|
|
40
|
+
|
|
41
|
+
# Place new/unseen values into an existing taxonomy
|
|
42
|
+
opentaxonomy run -i new_data.csv -c description -s ./taxonomy
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Supported input formats
|
|
46
|
+
|
|
47
|
+
| Format | Example |
|
|
48
|
+
|--------|---------|
|
|
49
|
+
| CSV / TSV | `data.csv`, `data.tsv` |
|
|
50
|
+
| JSON | `data.json` |
|
|
51
|
+
| Excel | `data.xlsx` |
|
|
52
|
+
| Parquet | `data.parquet` |
|
|
53
|
+
| Database | `postgresql://user:pass@host/db` + `--db-table` |
|
|
54
|
+
|
|
55
|
+
### Options
|
|
56
|
+
|
|
57
|
+
```
|
|
58
|
+
opentaxonomy create
|
|
59
|
+
-i, --input Input file or database connection string [required]
|
|
60
|
+
-c, --column Column containing raw values to classify [required]
|
|
61
|
+
-o, --output-dir Where to write taxonomy files [default: ./taxonomy]
|
|
62
|
+
--domain-hint Optional hint to guide the LLM (e.g. "German grocery products")
|
|
63
|
+
--model Claude model [default: claude-sonnet-4-6]
|
|
64
|
+
--api-key Anthropic API key (or set ANTHROPIC_API_KEY)
|
|
65
|
+
|
|
66
|
+
opentaxonomy run
|
|
67
|
+
-i, --input Input file or database connection string [required]
|
|
68
|
+
-c, --column Column containing raw values to classify [required]
|
|
69
|
+
-s, --seed-dir Directory with seed.yaml and placement_map.yaml [default: ./taxonomy]
|
|
70
|
+
--model Claude model [default: claude-sonnet-4-6]
|
|
71
|
+
--api-key Anthropic API key (or set ANTHROPIC_API_KEY)
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Output structure
|
|
75
|
+
|
|
76
|
+
```
|
|
77
|
+
taxonomy/
|
|
78
|
+
├── seed.yaml # Domain seed: context, levels, edge cases
|
|
79
|
+
├── placement_map.yaml # Raw values → canonical IDs (source of truth)
|
|
80
|
+
└── nodes/
|
|
81
|
+
├── root.yaml # Root node
|
|
82
|
+
├── expenditure.yaml # Internal node with decision record
|
|
83
|
+
├── groceries.yaml # Leaf node with inclusion/exclusion criteria
|
|
84
|
+
└── ...
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Each node file is an ontological contract:
|
|
88
|
+
|
|
89
|
+
```yaml
|
|
90
|
+
node: Groceries
|
|
91
|
+
canonical_id: ft.expenditure.variable.groceries
|
|
92
|
+
question: Is this a purchase of food or household staples from a supermarket or grocery store?
|
|
93
|
+
criteria:
|
|
94
|
+
includes:
|
|
95
|
+
- Supermarket purchases (REWE, LIDL, EDEKA, etc.)
|
|
96
|
+
- Organic/bio market purchases
|
|
97
|
+
excludes:
|
|
98
|
+
- Restaurant meals
|
|
99
|
+
- Drugstore purchases unless food items
|
|
100
|
+
edge_cases:
|
|
101
|
+
- term: REWE TO GO
|
|
102
|
+
resolution: Included — still a grocery/convenience purchase
|
|
103
|
+
decided: true
|
|
104
|
+
parent: Variable Necessities
|
|
105
|
+
children: []
|
|
106
|
+
version: 1.0.0
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
## License
|
|
110
|
+
|
|
111
|
+
MIT
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# OpenTaxonomy v0.1.0 — First Output
|
|
2
|
+
|
|
3
|
+
## What is this?
|
|
4
|
+
|
|
5
|
+
This is the first concrete output of the OpenTaxonomy project — a proof-of-concept
|
|
6
|
+
demonstrating the Prima Seed protocol applied to real-world data.
|
|
7
|
+
|
|
8
|
+
## Structure
|
|
9
|
+
|
|
10
|
+
```
|
|
11
|
+
opentaxonomy/
|
|
12
|
+
├── README.md ← You are here
|
|
13
|
+
└── seeds/
|
|
14
|
+
├── prima/
|
|
15
|
+
│ └── prima-seed.yaml ← The universal questioning protocol
|
|
16
|
+
└── personal-finance-transactions/
|
|
17
|
+
├── seed.yaml ← Domain seed (generated by Prima Seed)
|
|
18
|
+
├── placement_map.yaml ← Raw values → canonical IDs
|
|
19
|
+
└── nodes/
|
|
20
|
+
├── root.yaml ← Financial Transaction (root)
|
|
21
|
+
├── income.yaml ← Income
|
|
22
|
+
├── employment.yaml ← Employment Income (leaf)
|
|
23
|
+
├── government_benefits.yaml ← Government Benefits (leaf)
|
|
24
|
+
├── expenditure.yaml ← Expenditure
|
|
25
|
+
├── fixed_obligations.yaml ← Fixed Obligations
|
|
26
|
+
├── variable_necessities.yaml ← Variable Necessities
|
|
27
|
+
├── discretionary.yaml ← Discretionary
|
|
28
|
+
├── groceries.yaml ← Groceries (leaf)
|
|
29
|
+
├── dining_out.yaml ← Dining Out (leaf)
|
|
30
|
+
└── digital_subscriptions.yaml ← Digital Subscriptions (leaf)
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Key Concepts
|
|
34
|
+
|
|
35
|
+
- **Prima Seed**: The universal meta-protocol. Domain-agnostic questions that
|
|
36
|
+
generate domain-specific trees from any categorical data.
|
|
37
|
+
|
|
38
|
+
- **Domain Seed**: A specific questioning protocol for a domain (e.g., personal
|
|
39
|
+
finance transactions). Generated by running the Prima Seed against real data.
|
|
40
|
+
Reusable by anyone with similar data.
|
|
41
|
+
|
|
42
|
+
- **Node**: An ontological contract — a YAML file carrying inclusion criteria,
|
|
43
|
+
exclusion criteria, edge cases, and decision records. Each node is one file.
|
|
44
|
+
|
|
45
|
+
- **Canonical ID**: A deterministic identifier derived from the criteria path
|
|
46
|
+
(e.g., `ft.expenditure.variable.groceries`). Enables semantic joins.
|
|
47
|
+
|
|
48
|
+
- **Placement Map**: The mapping of raw data values to canonical IDs. This is
|
|
49
|
+
where meaning is assigned to data.
|
|
50
|
+
|
|
51
|
+
## How to Read This
|
|
52
|
+
|
|
53
|
+
1. Start with `prima/prima-seed.yaml` — understand the questioning protocol
|
|
54
|
+
2. Read `personal-finance-transactions/seed.yaml` — see how a domain seed
|
|
55
|
+
captures the context and level structure
|
|
56
|
+
3. Browse `nodes/` — each file is a self-contained ontological contract
|
|
57
|
+
4. Check `placement_map.yaml` — see how messy real-world bank transactions
|
|
58
|
+
get assigned to canonical IDs with meaning
|
|
59
|
+
|
|
60
|
+
## What's Next
|
|
61
|
+
|
|
62
|
+
- [ ] Build a CLI runner that executes the Prima Seed against any data column
|
|
63
|
+
- [ ] Define canonical ID hashing algorithm
|
|
64
|
+
- [ ] Build the linking ID mechanism (cross-tree semantic comparison)
|
|
65
|
+
- [ ] Create second domain seed (German Grocery Products) for cross-domain testing
|
|
66
|
+
- [ ] Define the seed registry format for OpenTaxonomy platform
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
node: Digital Subscriptions
|
|
2
|
+
canonical_id: "ft.expenditure.fixed.subscriptions"
|
|
3
|
+
question: "Is this a recurring payment for a digital service or platform?"
|
|
4
|
+
criteria:
|
|
5
|
+
includes:
|
|
6
|
+
- "Streaming services (Netflix, Disney+, Spotify, MUBI, Audible)"
|
|
7
|
+
- "Software subscriptions (ChatGPT, Wix, Apple services)"
|
|
8
|
+
- "Gaming services (Nintendo, Google Play)"
|
|
9
|
+
- "Internet service (HerzoMedia/HERZOvision)"
|
|
10
|
+
excludes:
|
|
11
|
+
- "One-time digital purchases"
|
|
12
|
+
- "Physical subscription boxes"
|
|
13
|
+
- "Mobile phone contract (classified under telecommunications)"
|
|
14
|
+
edge_cases:
|
|
15
|
+
- term: "Amazon Prime"
|
|
16
|
+
resolution: "Included — recurring digital service even though it enables physical delivery"
|
|
17
|
+
decided: true
|
|
18
|
+
- term: "HerzoMedia/HERZOvision"
|
|
19
|
+
resolution: "Included — internet is a digital subscription; could also be telecommunications"
|
|
20
|
+
decided: false
|
|
21
|
+
parent: Fixed Obligations
|
|
22
|
+
children: []
|
|
23
|
+
version: 1.0.0
|
|
24
|
+
decision_record:
|
|
25
|
+
criterion_chosen: "Recurring billing for digital access"
|
|
26
|
+
alternatives_considered:
|
|
27
|
+
- "Split by media type (video, audio, software)"
|
|
28
|
+
reason: >
|
|
29
|
+
In budgeting, what matters is that these are fixed monthly
|
|
30
|
+
costs that can be individually cancelled. Media type is
|
|
31
|
+
secondary to the financial commitment structure.
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
node: Dining Out
|
|
2
|
+
canonical_id: "ft.expenditure.discretionary.dining"
|
|
3
|
+
question: "Is this a payment at a restaurant, cafe, bar, or food service establishment?"
|
|
4
|
+
criteria:
|
|
5
|
+
includes:
|
|
6
|
+
- "Full-service restaurants"
|
|
7
|
+
- "Fast food chains (McDonalds, Five Guys, etc.)"
|
|
8
|
+
- "Cafes and coffee shops (Starbucks, etc.)"
|
|
9
|
+
- "Bars and beer gardens"
|
|
10
|
+
- "Takeaway/delivery services (Lieferando, Takeaway.com)"
|
|
11
|
+
- "Corporate canteens (Eurest)"
|
|
12
|
+
- "Bakery cafes when dining in"
|
|
13
|
+
excludes:
|
|
14
|
+
- "Supermarket food purchases"
|
|
15
|
+
- "Grocery delivery services"
|
|
16
|
+
edge_cases:
|
|
17
|
+
- term: "Bakery (Baeckerei und Konditorei)"
|
|
18
|
+
resolution: "Included — the purchase is prepared food for immediate consumption"
|
|
19
|
+
decided: true
|
|
20
|
+
- term: "Food Affairs GmbH"
|
|
21
|
+
resolution: "Included — appears to be a food service/catering entity"
|
|
22
|
+
decided: true
|
|
23
|
+
- term: "Airport food (Relay, Exki, Gru Fridays)"
|
|
24
|
+
resolution: "Included — dining out regardless of location"
|
|
25
|
+
decided: true
|
|
26
|
+
parent: Discretionary
|
|
27
|
+
children: []
|
|
28
|
+
version: 1.0.0
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
node: Discretionary
|
|
2
|
+
canonical_id: "ft.expenditure.discretionary"
|
|
3
|
+
question: "Is this a lifestyle choice — spending that could be reduced or eliminated without impacting basic needs?"
|
|
4
|
+
criteria:
|
|
5
|
+
includes:
|
|
6
|
+
- "Restaurant meals and dining out"
|
|
7
|
+
- "Fashion and retail shopping"
|
|
8
|
+
- "Online shopping (non-essential)"
|
|
9
|
+
- "Entertainment, leisure, cultural activities"
|
|
10
|
+
- "Travel and accommodation"
|
|
11
|
+
excludes:
|
|
12
|
+
- "Contractual obligations"
|
|
13
|
+
- "Essential groceries and household supplies"
|
|
14
|
+
- "Health-related spending"
|
|
15
|
+
- "Commute-related transport"
|
|
16
|
+
edge_cases:
|
|
17
|
+
- term: "Fast food (McDonalds)"
|
|
18
|
+
resolution: "Included — discretionary even if routine; not a nutritional necessity"
|
|
19
|
+
decided: true
|
|
20
|
+
- term: "Eurest corporate canteen"
|
|
21
|
+
resolution: "Included — could bring lunch from home; dining choice"
|
|
22
|
+
decided: true
|
|
23
|
+
parent: Expenditure
|
|
24
|
+
children:
|
|
25
|
+
- dining_out
|
|
26
|
+
- shopping_fashion
|
|
27
|
+
- shopping_online_general
|
|
28
|
+
- entertainment_leisure
|
|
29
|
+
- travel_accommodation
|
|
30
|
+
version: 1.0.0
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
node: Expenditure
|
|
2
|
+
canonical_id: "ft.expenditure"
|
|
3
|
+
question: "Is money flowing out of the account?"
|
|
4
|
+
criteria:
|
|
5
|
+
includes:
|
|
6
|
+
- "Any debit, card payment, direct debit, standing order, or cash withdrawal"
|
|
7
|
+
excludes:
|
|
8
|
+
- "Incoming payments"
|
|
9
|
+
parent: Financial Transaction
|
|
10
|
+
children:
|
|
11
|
+
- fixed_obligations
|
|
12
|
+
- variable_necessities
|
|
13
|
+
- discretionary
|
|
14
|
+
- personal_transfers
|
|
15
|
+
- cash_withdrawals
|
|
16
|
+
version: 1.0.0
|
|
17
|
+
decision_record:
|
|
18
|
+
criterion_chosen: "Degree of obligation — how controllable is this spending?"
|
|
19
|
+
alternatives_considered:
|
|
20
|
+
- "Merchant type (where did I spend?)"
|
|
21
|
+
- "Payment method (card vs. transfer vs. cash)"
|
|
22
|
+
- "Amount range"
|
|
23
|
+
reason: >
|
|
24
|
+
In a personal budgeting context, the most actionable split
|
|
25
|
+
is controllability. Fixed obligations are locked in; variable
|
|
26
|
+
necessities can be optimized; discretionary can be cut. This
|
|
27
|
+
maps directly to how a person can act on their budget.
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
node: Groceries
|
|
2
|
+
canonical_id: "ft.expenditure.variable.groceries"
|
|
3
|
+
question: "Is this a purchase of food or household staples from a supermarket or grocery store?"
|
|
4
|
+
criteria:
|
|
5
|
+
includes:
|
|
6
|
+
- "Supermarket purchases (REWE, LIDL, EDEKA, etc.)"
|
|
7
|
+
- "Organic/bio market purchases"
|
|
8
|
+
- "International grocery equivalents (Carrefour, Pao de Acucar, CONAD, etc.)"
|
|
9
|
+
excludes:
|
|
10
|
+
- "Restaurant meals (even takeaway from non-grocery)"
|
|
11
|
+
- "Specialty food shops that are primarily dining (bakery cafe)"
|
|
12
|
+
- "Drugstore purchases (Rossmann, DM) unless food items"
|
|
13
|
+
edge_cases:
|
|
14
|
+
- term: "REWE TO GO"
|
|
15
|
+
resolution: "Included — still a grocery/convenience purchase, not a restaurant"
|
|
16
|
+
decided: true
|
|
17
|
+
- term: "Biomarkt"
|
|
18
|
+
resolution: "Included — organic grocery store"
|
|
19
|
+
decided: true
|
|
20
|
+
parent: Variable Necessities
|
|
21
|
+
children: []
|
|
22
|
+
version: 1.0.0
|