seedforge 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- seedforge-0.1.0/PKG-INFO +214 -0
- seedforge-0.1.0/README.md +179 -0
- seedforge-0.1.0/pyproject.toml +50 -0
- seedforge-0.1.0/seedforge/__init__.py +3 -0
- seedforge-0.1.0/seedforge/ai.py +150 -0
- seedforge-0.1.0/seedforge/cli.py +311 -0
- seedforge-0.1.0/seedforge/config.py +53 -0
- seedforge-0.1.0/seedforge/generators.py +217 -0
- seedforge-0.1.0/seedforge/graph.py +60 -0
- seedforge-0.1.0/seedforge/heuristics.py +243 -0
- seedforge-0.1.0/seedforge/inserter.py +153 -0
- seedforge-0.1.0/seedforge/introspector.py +319 -0
- seedforge-0.1.0/seedforge.egg-info/PKG-INFO +214 -0
- seedforge-0.1.0/seedforge.egg-info/SOURCES.txt +20 -0
- seedforge-0.1.0/seedforge.egg-info/dependency_links.txt +1 -0
- seedforge-0.1.0/seedforge.egg-info/entry_points.txt +2 -0
- seedforge-0.1.0/seedforge.egg-info/requires.txt +15 -0
- seedforge-0.1.0/seedforge.egg-info/top_level.txt +1 -0
- seedforge-0.1.0/setup.cfg +4 -0
- seedforge-0.1.0/tests/test_generators.py +145 -0
- seedforge-0.1.0/tests/test_graph.py +98 -0
- seedforge-0.1.0/tests/test_heuristics.py +97 -0
seedforge-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: seedforge
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AI-powered test data generator that reads your database schema and fills it with realistic, FK-valid data in seconds
|
|
5
|
+
Author-email: SilkHorizon <hello@silkhorizon.uz>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/silkhorizonstudios/seedforge
|
|
8
|
+
Project-URL: Documentation, https://seedforge.dev
|
|
9
|
+
Project-URL: Repository, https://github.com/silkhorizonstudios/seedforge
|
|
10
|
+
Keywords: database,testing,faker,seed,synthetic-data,postgresql
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Database
|
|
20
|
+
Classifier: Topic :: Software Development :: Testing
|
|
21
|
+
Requires-Python: >=3.10
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
Requires-Dist: typer>=0.12.0
|
|
24
|
+
Requires-Dist: rich>=13.0.0
|
|
25
|
+
Requires-Dist: psycopg2-binary>=2.9.0
|
|
26
|
+
Requires-Dist: faker>=25.0.0
|
|
27
|
+
Requires-Dist: pyyaml>=6.0
|
|
28
|
+
Provides-Extra: mysql
|
|
29
|
+
Requires-Dist: pymysql>=1.1.0; extra == "mysql"
|
|
30
|
+
Provides-Extra: ai
|
|
31
|
+
Requires-Dist: anthropic>=0.40.0; extra == "ai"
|
|
32
|
+
Provides-Extra: all
|
|
33
|
+
Requires-Dist: pymysql>=1.1.0; extra == "all"
|
|
34
|
+
Requires-Dist: anthropic>=0.40.0; extra == "all"
|
|
35
|
+
|
|
36
|
+
# SeedForge
|
|
37
|
+
|
|
38
|
+
**One command to fill your database with realistic test data.**
|
|
39
|
+
|
|
40
|
+
SeedForge connects to your database, reads the schema (tables, columns, foreign keys, constraints), and generates realistic, FK-valid data — no code, no config, no seed scripts.
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
pip install seedforge
|
|
44
|
+
seedforge connect postgresql://user:pass@localhost/mydb
|
|
45
|
+
seedforge generate --rows 1000
|
|
46
|
+
# Done. 40 tables filled in 3 seconds.
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Features
|
|
50
|
+
|
|
51
|
+
- **Zero-config** — reads your DB schema automatically, no setup needed
|
|
52
|
+
- **FK integrity** — resolves foreign keys via topological sort, inserts in correct order
|
|
53
|
+
- **Smart heuristics** — 80+ column name patterns for realistic data (`email` → real email, `price` → decimal, `role` → admin/user/editor)
|
|
54
|
+
- **Deterministic** — use `--seed` to get the same data every time
|
|
55
|
+
- **AI-powered** — optional Claude AI integration for maximum realism
|
|
56
|
+
- **Export** — SQL or JSON file output
|
|
57
|
+
- **Privacy-first** — runs entirely locally, your data never leaves your machine
|
|
58
|
+
|
|
59
|
+
## Installation
|
|
60
|
+
|
|
61
|
+
```bash
|
|
62
|
+
pip install seedforge
|
|
63
|
+
|
|
64
|
+
# With MySQL support
|
|
65
|
+
pip install seedforge[mysql]
|
|
66
|
+
|
|
67
|
+
# With AI support (Claude API)
|
|
68
|
+
pip install seedforge[ai]
|
|
69
|
+
|
|
70
|
+
# Everything
|
|
71
|
+
pip install seedforge[all]
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## Quick Start
|
|
75
|
+
|
|
76
|
+
### 1. Connect
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
seedforge connect postgresql://user:pass@localhost:5432/mydb
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Saves the connection to `.seedforge.yaml` so you don't have to type it again.
|
|
83
|
+
|
|
84
|
+
### 2. Inspect
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
seedforge inspect
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Shows all tables, columns, types, foreign keys, and insertion order:
|
|
91
|
+
|
|
92
|
+
```
|
|
93
|
+
Found 18 tables (insertion order):
|
|
94
|
+
|
|
95
|
+
1. users
|
|
96
|
+
┏━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━┓
|
|
97
|
+
┃ Column ┃ Type ┃ Nullable ┃ FK → ┃
|
|
98
|
+
┡━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━┩
|
|
99
|
+
│ id │ serial │ NO │ │
|
|
100
|
+
│ email │ varchar │ NO │ │
|
|
101
|
+
│ name │ varchar │ YES │ │
|
|
102
|
+
└────────────┴───────────┴──────────┴───────┘
|
|
103
|
+
|
|
104
|
+
2. orders
|
|
105
|
+
┏━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┓
|
|
106
|
+
┃ Column ┃ Type ┃ Nullable ┃ FK → ┃
|
|
107
|
+
┡━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━┩
|
|
108
|
+
│ id │ serial │ NO │ │
|
|
109
|
+
│ user_id │ integer │ NO │ users.id │
|
|
110
|
+
│ total │ numeric │ NO │ │
|
|
111
|
+
└────────────┴───────────┴──────────┴────────────┘
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### 3. Generate
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
# Generate and insert 100 rows per table
|
|
118
|
+
seedforge generate --rows 100
|
|
119
|
+
|
|
120
|
+
# Preview without inserting
|
|
121
|
+
seedforge generate --rows 10 --dry-run
|
|
122
|
+
|
|
123
|
+
# Export to SQL file
|
|
124
|
+
seedforge generate --rows 1000 --export sql
|
|
125
|
+
|
|
126
|
+
# Export to JSON
|
|
127
|
+
seedforge generate --rows 1000 --export json
|
|
128
|
+
|
|
129
|
+
# Deterministic (same data every time)
|
|
130
|
+
seedforge generate --rows 100 --seed 42
|
|
131
|
+
|
|
132
|
+
# Only specific tables (auto-includes FK parents)
|
|
133
|
+
seedforge generate --tables orders,payments --rows 50
|
|
134
|
+
|
|
135
|
+
# Clean tables before generating
|
|
136
|
+
seedforge generate --rows 100 --clean
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### 4. AI Generate (optional)
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
export ANTHROPIC_API_KEY=sk-...
|
|
143
|
+
seedforge ai-generate --rows 20
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Uses Claude AI to generate context-aware data with maximum realism.
|
|
147
|
+
|
|
148
|
+
## How It Works
|
|
149
|
+
|
|
150
|
+
1. **Schema introspection** — connects to your database, reads `information_schema` to get tables, columns, types, FK relationships, constraints, ENUMs
|
|
151
|
+
2. **Dependency graph** — builds a directed graph from FK relationships, runs topological sort to determine insertion order (parents first)
|
|
152
|
+
3. **Smart heuristics** — maps column names to appropriate generators (`email` → realistic email, `phone` → phone number, `created_at` → recent datetime)
|
|
153
|
+
4. **FK resolution** — child rows automatically reference real IDs from already-generated parent rows
|
|
154
|
+
5. **Batch insert** — fast bulk insertion with proper transaction handling
|
|
155
|
+
|
|
156
|
+
## Column Name Heuristics
|
|
157
|
+
|
|
158
|
+
SeedForge automatically detects what kind of data to generate based on column names:
|
|
159
|
+
|
|
160
|
+
| Column name | Generated data |
|
|
161
|
+
|---|---|
|
|
162
|
+
| `email` | `john.smith@example.com` |
|
|
163
|
+
| `phone`, `mobile` | `+1-555-0123` |
|
|
164
|
+
| `first_name` | `John` |
|
|
165
|
+
| `last_name` | `Smith` |
|
|
166
|
+
| `username` | `jsmith42` |
|
|
167
|
+
| `address`, `street` | `123 Main St, Apt 4` |
|
|
168
|
+
| `city` | `San Francisco` |
|
|
169
|
+
| `country` | `United States` |
|
|
170
|
+
| `price`, `amount`, `total` | `49.99` |
|
|
171
|
+
| `url`, `website` | `https://example.com` |
|
|
172
|
+
| `avatar`, `image_url` | `https://picsum.photos/seed/123/400/300` |
|
|
173
|
+
| `role` | `admin`, `user`, `moderator` |
|
|
174
|
+
| `status` | `active`, `pending`, `completed` |
|
|
175
|
+
| `plan` | `free`, `pro`, `enterprise` |
|
|
176
|
+
| `created_at`, `updated_at` | Recent datetime |
|
|
177
|
+
| `is_active`, `verified` | `true` (85% bias) |
|
|
178
|
+
| `is_deleted`, `archived` | `false` (90% bias) |
|
|
179
|
+
| `password` | SHA-256 hash |
|
|
180
|
+
| `token`, `api_key` | Random hex string |
|
|
181
|
+
| `uuid`, `guid` | Valid UUID v4 |
|
|
182
|
+
| ...and 60+ more patterns | |
|
|
183
|
+
|
|
184
|
+
## Configuration
|
|
185
|
+
|
|
186
|
+
`.seedforge.yaml` (auto-created by `seedforge connect`):
|
|
187
|
+
|
|
188
|
+
```yaml
|
|
189
|
+
db_url: postgresql://user:pass@localhost:5432/mydb
|
|
190
|
+
default_rows: 100
|
|
191
|
+
default_schema: public
|
|
192
|
+
seed: 42 # optional, for deterministic generation
|
|
193
|
+
exclude_tables:
|
|
194
|
+
- _prisma_migrations
|
|
195
|
+
- django_migrations
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## Supported Databases
|
|
199
|
+
|
|
200
|
+
- [x] PostgreSQL
|
|
201
|
+
- [x] MySQL / MariaDB
|
|
202
|
+
- [ ] SQLite (planned)
|
|
203
|
+
|
|
204
|
+
## Data Privacy
|
|
205
|
+
|
|
206
|
+
**Your data never leaves your machine.** SeedForge runs entirely locally — it connects directly to your database, generates data in memory, and inserts it. No cloud, no telemetry, no data collection.
|
|
207
|
+
|
|
208
|
+
## License
|
|
209
|
+
|
|
210
|
+
MIT
|
|
211
|
+
|
|
212
|
+
## Contributing
|
|
213
|
+
|
|
214
|
+
Issues and PRs welcome at [github.com/silkhorizonstudios/seedforge](https://github.com/silkhorizonstudios/seedforge).
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# SeedForge
|
|
2
|
+
|
|
3
|
+
**One command to fill your database with realistic test data.**
|
|
4
|
+
|
|
5
|
+
SeedForge connects to your database, reads the schema (tables, columns, foreign keys, constraints), and generates realistic, FK-valid data — no code, no config, no seed scripts.
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install seedforge
|
|
9
|
+
seedforge connect postgresql://user:pass@localhost/mydb
|
|
10
|
+
seedforge generate --rows 1000
|
|
11
|
+
# Done. 40 tables filled in 3 seconds.
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Features
|
|
15
|
+
|
|
16
|
+
- **Zero-config** — reads your DB schema automatically, no setup needed
|
|
17
|
+
- **FK integrity** — resolves foreign keys via topological sort, inserts in correct order
|
|
18
|
+
- **Smart heuristics** — 80+ column name patterns for realistic data (`email` → real email, `price` → decimal, `role` → admin/user/editor)
|
|
19
|
+
- **Deterministic** — use `--seed` to get the same data every time
|
|
20
|
+
- **AI-powered** — optional Claude AI integration for maximum realism
|
|
21
|
+
- **Export** — SQL or JSON file output
|
|
22
|
+
- **Privacy-first** — runs entirely locally, your data never leaves your machine
|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install seedforge
|
|
28
|
+
|
|
29
|
+
# With MySQL support
|
|
30
|
+
pip install seedforge[mysql]
|
|
31
|
+
|
|
32
|
+
# With AI support (Claude API)
|
|
33
|
+
pip install seedforge[ai]
|
|
34
|
+
|
|
35
|
+
# Everything
|
|
36
|
+
pip install seedforge[all]
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## Quick Start
|
|
40
|
+
|
|
41
|
+
### 1. Connect
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
seedforge connect postgresql://user:pass@localhost:5432/mydb
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
Saves the connection to `.seedforge.yaml` so you don't have to type it again.
|
|
48
|
+
|
|
49
|
+
### 2. Inspect
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
seedforge inspect
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
Shows all tables, columns, types, foreign keys, and insertion order:
|
|
56
|
+
|
|
57
|
+
```
|
|
58
|
+
Found 18 tables (insertion order):
|
|
59
|
+
|
|
60
|
+
1. users
|
|
61
|
+
┏━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━┓
|
|
62
|
+
┃ Column ┃ Type ┃ Nullable ┃ FK → ┃
|
|
63
|
+
┡━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━┩
|
|
64
|
+
│ id │ serial │ NO │ │
|
|
65
|
+
│ email │ varchar │ NO │ │
|
|
66
|
+
│ name │ varchar │ YES │ │
|
|
67
|
+
└────────────┴───────────┴──────────┴───────┘
|
|
68
|
+
|
|
69
|
+
2. orders
|
|
70
|
+
┏━━━━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━┓
|
|
71
|
+
┃ Column ┃ Type ┃ Nullable ┃ FK → ┃
|
|
72
|
+
┡━━━━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━┩
|
|
73
|
+
│ id │ serial │ NO │ │
|
|
74
|
+
│ user_id │ integer │ NO │ users.id │
|
|
75
|
+
│ total │ numeric │ NO │ │
|
|
76
|
+
└────────────┴───────────┴──────────┴────────────┘
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### 3. Generate
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
# Generate and insert 100 rows per table
|
|
83
|
+
seedforge generate --rows 100
|
|
84
|
+
|
|
85
|
+
# Preview without inserting
|
|
86
|
+
seedforge generate --rows 10 --dry-run
|
|
87
|
+
|
|
88
|
+
# Export to SQL file
|
|
89
|
+
seedforge generate --rows 1000 --export sql
|
|
90
|
+
|
|
91
|
+
# Export to JSON
|
|
92
|
+
seedforge generate --rows 1000 --export json
|
|
93
|
+
|
|
94
|
+
# Deterministic (same data every time)
|
|
95
|
+
seedforge generate --rows 100 --seed 42
|
|
96
|
+
|
|
97
|
+
# Only specific tables (auto-includes FK parents)
|
|
98
|
+
seedforge generate --tables orders,payments --rows 50
|
|
99
|
+
|
|
100
|
+
# Clean tables before generating
|
|
101
|
+
seedforge generate --rows 100 --clean
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### 4. AI Generate (optional)
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
export ANTHROPIC_API_KEY=sk-...
|
|
108
|
+
seedforge ai-generate --rows 20
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
Uses Claude AI to generate context-aware data with maximum realism.
|
|
112
|
+
|
|
113
|
+
## How It Works
|
|
114
|
+
|
|
115
|
+
1. **Schema introspection** — connects to your database, reads `information_schema` to get tables, columns, types, FK relationships, constraints, ENUMs
|
|
116
|
+
2. **Dependency graph** — builds a directed graph from FK relationships, runs topological sort to determine insertion order (parents first)
|
|
117
|
+
3. **Smart heuristics** — maps column names to appropriate generators (`email` → realistic email, `phone` → phone number, `created_at` → recent datetime)
|
|
118
|
+
4. **FK resolution** — child rows automatically reference real IDs from already-generated parent rows
|
|
119
|
+
5. **Batch insert** — fast bulk insertion with proper transaction handling
|
|
120
|
+
|
|
121
|
+
## Column Name Heuristics
|
|
122
|
+
|
|
123
|
+
SeedForge automatically detects what kind of data to generate based on column names:
|
|
124
|
+
|
|
125
|
+
| Column name | Generated data |
|
|
126
|
+
|---|---|
|
|
127
|
+
| `email` | `john.smith@example.com` |
|
|
128
|
+
| `phone`, `mobile` | `+1-555-0123` |
|
|
129
|
+
| `first_name` | `John` |
|
|
130
|
+
| `last_name` | `Smith` |
|
|
131
|
+
| `username` | `jsmith42` |
|
|
132
|
+
| `address`, `street` | `123 Main St, Apt 4` |
|
|
133
|
+
| `city` | `San Francisco` |
|
|
134
|
+
| `country` | `United States` |
|
|
135
|
+
| `price`, `amount`, `total` | `49.99` |
|
|
136
|
+
| `url`, `website` | `https://example.com` |
|
|
137
|
+
| `avatar`, `image_url` | `https://picsum.photos/seed/123/400/300` |
|
|
138
|
+
| `role` | `admin`, `user`, `moderator` |
|
|
139
|
+
| `status` | `active`, `pending`, `completed` |
|
|
140
|
+
| `plan` | `free`, `pro`, `enterprise` |
|
|
141
|
+
| `created_at`, `updated_at` | Recent datetime |
|
|
142
|
+
| `is_active`, `verified` | `true` (85% bias) |
|
|
143
|
+
| `is_deleted`, `archived` | `false` (90% bias) |
|
|
144
|
+
| `password` | SHA-256 hash |
|
|
145
|
+
| `token`, `api_key` | Random hex string |
|
|
146
|
+
| `uuid`, `guid` | Valid UUID v4 |
|
|
147
|
+
| ...and 60+ more patterns | |
|
|
148
|
+
|
|
149
|
+
## Configuration
|
|
150
|
+
|
|
151
|
+
`.seedforge.yaml` (auto-created by `seedforge connect`):
|
|
152
|
+
|
|
153
|
+
```yaml
|
|
154
|
+
db_url: postgresql://user:pass@localhost:5432/mydb
|
|
155
|
+
default_rows: 100
|
|
156
|
+
default_schema: public
|
|
157
|
+
seed: 42 # optional, for deterministic generation
|
|
158
|
+
exclude_tables:
|
|
159
|
+
- _prisma_migrations
|
|
160
|
+
- django_migrations
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Supported Databases
|
|
164
|
+
|
|
165
|
+
- [x] PostgreSQL
|
|
166
|
+
- [x] MySQL / MariaDB
|
|
167
|
+
- [ ] SQLite (planned)
|
|
168
|
+
|
|
169
|
+
## Data Privacy
|
|
170
|
+
|
|
171
|
+
**Your data never leaves your machine.** SeedForge runs entirely locally — it connects directly to your database, generates data in memory, and inserts it. No cloud, no telemetry, no data collection.
|
|
172
|
+
|
|
173
|
+
## License
|
|
174
|
+
|
|
175
|
+
MIT
|
|
176
|
+
|
|
177
|
+
## Contributing
|
|
178
|
+
|
|
179
|
+
Issues and PRs welcome at [github.com/silkhorizonstudios/seedforge](https://github.com/silkhorizonstudios/seedforge).
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "seedforge"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "AI-powered test data generator that reads your database schema and fills it with realistic, FK-valid data in seconds"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "SilkHorizon", email = "hello@silkhorizon.uz"},
|
|
14
|
+
]
|
|
15
|
+
keywords = ["database", "testing", "faker", "seed", "synthetic-data", "postgresql"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 3 - Alpha",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"License :: OSI Approved :: MIT License",
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.10",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
"Programming Language :: Python :: 3.12",
|
|
24
|
+
"Programming Language :: Python :: 3.13",
|
|
25
|
+
"Topic :: Database",
|
|
26
|
+
"Topic :: Software Development :: Testing",
|
|
27
|
+
]
|
|
28
|
+
dependencies = [
|
|
29
|
+
"typer>=0.12.0",
|
|
30
|
+
"rich>=13.0.0",
|
|
31
|
+
"psycopg2-binary>=2.9.0",
|
|
32
|
+
"faker>=25.0.0",
|
|
33
|
+
"pyyaml>=6.0",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
[project.optional-dependencies]
|
|
37
|
+
mysql = ["pymysql>=1.1.0"]
|
|
38
|
+
ai = ["anthropic>=0.40.0"]
|
|
39
|
+
all = ["pymysql>=1.1.0", "anthropic>=0.40.0"]
|
|
40
|
+
|
|
41
|
+
[project.scripts]
|
|
42
|
+
seedforge = "seedforge.cli:app"
|
|
43
|
+
|
|
44
|
+
[project.urls]
|
|
45
|
+
Homepage = "https://github.com/silkhorizonstudios/seedforge"
|
|
46
|
+
Documentation = "https://seedforge.dev"
|
|
47
|
+
Repository = "https://github.com/silkhorizonstudios/seedforge"
|
|
48
|
+
|
|
49
|
+
[tool.setuptools.packages.find]
|
|
50
|
+
include = ["seedforge*"]
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
"""AI-слой: Claude анализирует схему и генерирует контекстно-зависимые данные.
|
|
2
|
+
|
|
3
|
+
Используется как premium-фича. Отправляет только метаданные схемы (имена таблиц/колонок),
|
|
4
|
+
НЕ реальные данные пользователя.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_ai_generators(tables: dict, api_key: str | None = None) -> dict:
|
|
12
|
+
"""Попросить Claude проанализировать схему и предложить генераторы.
|
|
13
|
+
|
|
14
|
+
Возвращает dict: {"table.column": {"generator": "...", "args": {...}}}
|
|
15
|
+
"""
|
|
16
|
+
api_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
|
|
17
|
+
if not api_key:
|
|
18
|
+
return {}
|
|
19
|
+
|
|
20
|
+
# Собираем метаданные схемы (только имена, типы, связи — НЕ данные)
|
|
21
|
+
schema_desc = _build_schema_description(tables)
|
|
22
|
+
|
|
23
|
+
try:
|
|
24
|
+
import anthropic
|
|
25
|
+
client = anthropic.Anthropic(api_key=api_key)
|
|
26
|
+
|
|
27
|
+
response = client.messages.create(
|
|
28
|
+
model="claude-haiku-4-5-20251001",
|
|
29
|
+
max_tokens=4096,
|
|
30
|
+
messages=[{
|
|
31
|
+
"role": "user",
|
|
32
|
+
"content": f"""Analyze this database schema and suggest realistic test data generators for each column.
|
|
33
|
+
|
|
34
|
+
Schema:
|
|
35
|
+
{schema_desc}
|
|
36
|
+
|
|
37
|
+
For each column, suggest what kind of realistic data should be generated.
|
|
38
|
+
Focus on columns where the name alone isn't enough to determine the right generator
|
|
39
|
+
(e.g., "name" in an "organizations" table should be a company name, not a person name).
|
|
40
|
+
|
|
41
|
+
Return a JSON object where keys are "table.column" and values have:
|
|
42
|
+
- "generator": one of the Faker methods (e.g., "company", "name", "sentence") or a custom description
|
|
43
|
+
- "example": an example value
|
|
44
|
+
- "values": (optional) a list of realistic values to choose from (for status/type/role fields)
|
|
45
|
+
|
|
46
|
+
Only include columns where your suggestion differs from the obvious default.
|
|
47
|
+
Return ONLY valid JSON, no markdown, no explanation."""
|
|
48
|
+
}],
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
text = response.content[0].text.strip()
|
|
52
|
+
# Убираем markdown если есть
|
|
53
|
+
if text.startswith("```"):
|
|
54
|
+
text = text.split("\n", 1)[1]
|
|
55
|
+
text = text.rsplit("```", 1)[0]
|
|
56
|
+
|
|
57
|
+
return json.loads(text)
|
|
58
|
+
|
|
59
|
+
except ImportError:
|
|
60
|
+
return {}
|
|
61
|
+
except Exception:
|
|
62
|
+
return {}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def generate_with_ai(
|
|
66
|
+
table_name: str,
|
|
67
|
+
columns: list[dict],
|
|
68
|
+
row_count: int,
|
|
69
|
+
api_key: str | None = None,
|
|
70
|
+
context: str = "",
|
|
71
|
+
) -> list[dict] | None:
|
|
72
|
+
"""Генерировать данные для таблицы через Claude.
|
|
73
|
+
|
|
74
|
+
Используется для небольших таблиц (до 50 строк) где нужна высокая реалистичность.
|
|
75
|
+
Для больших объёмов используем rule-based генерацию.
|
|
76
|
+
"""
|
|
77
|
+
api_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
|
|
78
|
+
if not api_key:
|
|
79
|
+
return None
|
|
80
|
+
|
|
81
|
+
# Ограничиваем до 50 строк для AI-генерации (стоимость)
|
|
82
|
+
row_count = min(row_count, 50)
|
|
83
|
+
|
|
84
|
+
col_desc = ", ".join(
|
|
85
|
+
f"{c['name']} ({c['type']}{'?' if c.get('nullable') else ''})"
|
|
86
|
+
for c in columns
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
import anthropic
|
|
91
|
+
client = anthropic.Anthropic(api_key=api_key)
|
|
92
|
+
|
|
93
|
+
prompt = f"""Generate {row_count} realistic rows for the "{table_name}" table.
|
|
94
|
+
Columns: {col_desc}
|
|
95
|
+
{f"Context: {context}" if context else ""}
|
|
96
|
+
|
|
97
|
+
Requirements:
|
|
98
|
+
- Data must be realistic and internally consistent
|
|
99
|
+
- If column is a name in an organization table, use company names
|
|
100
|
+
- If column is a status, use realistic statuses
|
|
101
|
+
- Dates should be recent (within last 2 years)
|
|
102
|
+
- Respect nullable columns (occasionally set to null)
|
|
103
|
+
|
|
104
|
+
Return a JSON array of objects. Each object has column names as keys.
|
|
105
|
+
Return ONLY valid JSON, no markdown, no explanation."""
|
|
106
|
+
|
|
107
|
+
response = client.messages.create(
|
|
108
|
+
model="claude-haiku-4-5-20251001",
|
|
109
|
+
max_tokens=8192,
|
|
110
|
+
messages=[{"role": "user", "content": prompt}],
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
text = response.content[0].text.strip()
|
|
114
|
+
if text.startswith("```"):
|
|
115
|
+
text = text.split("\n", 1)[1]
|
|
116
|
+
text = text.rsplit("```", 1)[0]
|
|
117
|
+
|
|
118
|
+
return json.loads(text)
|
|
119
|
+
|
|
120
|
+
except ImportError:
|
|
121
|
+
return None
|
|
122
|
+
except Exception:
|
|
123
|
+
return None
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def _build_schema_description(tables: dict) -> str:
|
|
127
|
+
"""Построить текстовое описание схемы для AI."""
|
|
128
|
+
lines = []
|
|
129
|
+
for table_name, table in tables.items():
|
|
130
|
+
cols = []
|
|
131
|
+
for col in table.columns:
|
|
132
|
+
parts = [f"{col.name} {col.data_type}"]
|
|
133
|
+
if col.is_primary:
|
|
134
|
+
parts.append("PK")
|
|
135
|
+
if col.fk_table:
|
|
136
|
+
parts.append(f"FK→{col.fk_table}.{col.fk_column}")
|
|
137
|
+
if not col.nullable:
|
|
138
|
+
parts.append("NOT NULL")
|
|
139
|
+
if col.is_unique:
|
|
140
|
+
parts.append("UNIQUE")
|
|
141
|
+
if col.enum_values:
|
|
142
|
+
parts.append(f"ENUM({', '.join(col.enum_values[:5])})")
|
|
143
|
+
cols.append(" ".join(parts))
|
|
144
|
+
|
|
145
|
+
lines.append(f"TABLE {table_name}:")
|
|
146
|
+
for c in cols:
|
|
147
|
+
lines.append(f" - {c}")
|
|
148
|
+
lines.append("")
|
|
149
|
+
|
|
150
|
+
return "\n".join(lines)
|