seedforge 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
seedforge/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """SeedForge - AI-powered test data generator for databases."""
2
+
3
+ __version__ = "0.1.0"
seedforge/ai.py ADDED
@@ -0,0 +1,150 @@
1
+ """AI-слой: Claude анализирует схему и генерирует контекстно-зависимые данные.
2
+
3
+ Используется как premium-фича. Отправляет только метаданные схемы (имена таблиц/колонок),
4
+ НЕ реальные данные пользователя.
5
+ """
6
+
7
+ import json
8
+ import os
9
+
10
+
11
+ def get_ai_generators(tables: dict, api_key: str | None = None) -> dict:
12
+ """Попросить Claude проанализировать схему и предложить генераторы.
13
+
14
+ Возвращает dict: {"table.column": {"generator": "...", "args": {...}}}
15
+ """
16
+ api_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
17
+ if not api_key:
18
+ return {}
19
+
20
+ # Собираем метаданные схемы (только имена, типы, связи — НЕ данные)
21
+ schema_desc = _build_schema_description(tables)
22
+
23
+ try:
24
+ import anthropic
25
+ client = anthropic.Anthropic(api_key=api_key)
26
+
27
+ response = client.messages.create(
28
+ model="claude-haiku-4-5-20251001",
29
+ max_tokens=4096,
30
+ messages=[{
31
+ "role": "user",
32
+ "content": f"""Analyze this database schema and suggest realistic test data generators for each column.
33
+
34
+ Schema:
35
+ {schema_desc}
36
+
37
+ For each column, suggest what kind of realistic data should be generated.
38
+ Focus on columns where the name alone isn't enough to determine the right generator
39
+ (e.g., "name" in an "organizations" table should be a company name, not a person name).
40
+
41
+ Return a JSON object where keys are "table.column" and values have:
42
+ - "generator": one of the Faker methods (e.g., "company", "name", "sentence") or a custom description
43
+ - "example": an example value
44
+ - "values": (optional) a list of realistic values to choose from (for status/type/role fields)
45
+
46
+ Only include columns where your suggestion differs from the obvious default.
47
+ Return ONLY valid JSON, no markdown, no explanation."""
48
+ }],
49
+ )
50
+
51
+ text = response.content[0].text.strip()
52
+ # Убираем markdown если есть
53
+ if text.startswith("```"):
54
+ text = text.split("\n", 1)[1]
55
+ text = text.rsplit("```", 1)[0]
56
+
57
+ return json.loads(text)
58
+
59
+ except ImportError:
60
+ return {}
61
+ except Exception:
62
+ return {}
63
+
64
+
65
+ def generate_with_ai(
66
+ table_name: str,
67
+ columns: list[dict],
68
+ row_count: int,
69
+ api_key: str | None = None,
70
+ context: str = "",
71
+ ) -> list[dict] | None:
72
+ """Генерировать данные для таблицы через Claude.
73
+
74
+ Используется для небольших таблиц (до 50 строк) где нужна высокая реалистичность.
75
+ Для больших объёмов используем rule-based генерацию.
76
+ """
77
+ api_key = api_key or os.environ.get("ANTHROPIC_API_KEY")
78
+ if not api_key:
79
+ return None
80
+
81
+ # Ограничиваем до 50 строк для AI-генерации (стоимость)
82
+ row_count = min(row_count, 50)
83
+
84
+ col_desc = ", ".join(
85
+ f"{c['name']} ({c['type']}{'?' if c.get('nullable') else ''})"
86
+ for c in columns
87
+ )
88
+
89
+ try:
90
+ import anthropic
91
+ client = anthropic.Anthropic(api_key=api_key)
92
+
93
+ prompt = f"""Generate {row_count} realistic rows for the "{table_name}" table.
94
+ Columns: {col_desc}
95
+ {f"Context: {context}" if context else ""}
96
+
97
+ Requirements:
98
+ - Data must be realistic and internally consistent
99
+ - If column is a name in an organization table, use company names
100
+ - If column is a status, use realistic statuses
101
+ - Dates should be recent (within last 2 years)
102
+ - Respect nullable columns (occasionally set to null)
103
+
104
+ Return a JSON array of objects. Each object has column names as keys.
105
+ Return ONLY valid JSON, no markdown, no explanation."""
106
+
107
+ response = client.messages.create(
108
+ model="claude-haiku-4-5-20251001",
109
+ max_tokens=8192,
110
+ messages=[{"role": "user", "content": prompt}],
111
+ )
112
+
113
+ text = response.content[0].text.strip()
114
+ if text.startswith("```"):
115
+ text = text.split("\n", 1)[1]
116
+ text = text.rsplit("```", 1)[0]
117
+
118
+ return json.loads(text)
119
+
120
+ except ImportError:
121
+ return None
122
+ except Exception:
123
+ return None
124
+
125
+
126
+ def _build_schema_description(tables: dict) -> str:
127
+ """Построить текстовое описание схемы для AI."""
128
+ lines = []
129
+ for table_name, table in tables.items():
130
+ cols = []
131
+ for col in table.columns:
132
+ parts = [f"{col.name} {col.data_type}"]
133
+ if col.is_primary:
134
+ parts.append("PK")
135
+ if col.fk_table:
136
+ parts.append(f"FK→{col.fk_table}.{col.fk_column}")
137
+ if not col.nullable:
138
+ parts.append("NOT NULL")
139
+ if col.is_unique:
140
+ parts.append("UNIQUE")
141
+ if col.enum_values:
142
+ parts.append(f"ENUM({', '.join(col.enum_values[:5])})")
143
+ cols.append(" ".join(parts))
144
+
145
+ lines.append(f"TABLE {table_name}:")
146
+ for c in cols:
147
+ lines.append(f" - {c}")
148
+ lines.append("")
149
+
150
+ return "\n".join(lines)
seedforge/cli.py ADDED
@@ -0,0 +1,311 @@
1
+ """CLI-интерфейс SeedForge."""
2
+
3
+ import typer
4
+ from rich.console import Console
5
+ from rich.table import Table
6
+ from rich.panel import Panel
7
+ from rich import print as rprint
8
+ from typing import Optional
9
+ from pathlib import Path
10
+
11
+ from seedforge.config import Config, DEFAULT_CONFIG_FILE
12
+ from seedforge.introspector import create_introspector
13
+ from seedforge.graph import DependencyGraph
14
+ from seedforge.generators import DataGenerator
15
+ from seedforge.inserter import BatchInserter
16
+
17
+ app = typer.Typer(
18
+ name="seedforge",
19
+ help="AI-powered test data generator. Reads your DB schema, generates realistic FK-valid data.",
20
+ no_args_is_help=True,
21
+ )
22
+ console = Console()
23
+
24
+
25
+ @app.command()
26
+ def connect(
27
+ db_url: str = typer.Argument(..., help="PostgreSQL connection string (postgresql://user:pass@host:port/dbname)"),
28
+ save: bool = typer.Option(True, help="Save connection to .seedforge.yaml"),
29
+ ):
30
+ """Connect to a PostgreSQL database and save the connection."""
31
+ console.print(f"\n[bold blue]Connecting to database...[/bold blue]")
32
+
33
+ try:
34
+ introspector = create_introspector(db_url)
35
+ info = introspector.get_db_info()
36
+ introspector.close()
37
+
38
+ console.print(f"[bold green]Connected![/bold green] {info['database']} @ {info['host']}")
39
+ console.print(f" Tables: {info['table_count']}")
40
+ console.print(f" PostgreSQL: {info['version']}")
41
+
42
+ if save:
43
+ config = Config.load()
44
+ config.db_url = db_url
45
+ config.save()
46
+ console.print(f"\n[dim]Saved to {DEFAULT_CONFIG_FILE}[/dim]")
47
+
48
+ except Exception as e:
49
+ console.print(f"[bold red]Connection failed:[/bold red] {e}")
50
+ raise typer.Exit(1)
51
+
52
+
53
+ @app.command()
54
+ def inspect(
55
+ db_url: Optional[str] = typer.Argument(None, help="PostgreSQL connection string"),
56
+ schema: str = typer.Option("public", help="Database schema to inspect"),
57
+ ):
58
+ """Inspect database schema: tables, columns, foreign keys."""
59
+ db_url = _resolve_db_url(db_url)
60
+
61
+ introspector = Introspector(db_url)
62
+ tables = introspector.get_tables(schema)
63
+ introspector.close()
64
+
65
+ if not tables:
66
+ console.print("[yellow]No tables found.[/yellow]")
67
+ raise typer.Exit(0)
68
+
69
+ # Граф зависимостей + порядок вставки
70
+ graph = DependencyGraph(tables)
71
+ order = graph.topological_sort()
72
+
73
+ console.print(f"\n[bold]Found {len(tables)} tables[/bold] (insertion order):\n")
74
+
75
+ for i, table_name in enumerate(order, 1):
76
+ table = tables[table_name]
77
+ t = Table(title=f"{i}. {table_name}", title_style="bold cyan", show_lines=False)
78
+ t.add_column("Column", style="white")
79
+ t.add_column("Type", style="green")
80
+ t.add_column("Nullable", style="yellow", width=8)
81
+ t.add_column("FK → ", style="magenta")
82
+
83
+ for col in table.columns:
84
+ fk_str = f"{col.fk_table}.{col.fk_column}" if col.fk_table else ""
85
+ nullable = "YES" if col.nullable else "NO"
86
+ t.add_row(col.name, col.data_type, nullable, fk_str)
87
+
88
+ console.print(t)
89
+ console.print()
90
+
91
+ # Статистика
92
+ total_cols = sum(len(t.columns) for t in tables.values())
93
+ total_fks = sum(1 for t in tables.values() for c in t.columns if c.fk_table)
94
+ console.print(f"[dim]Total: {len(tables)} tables, {total_cols} columns, {total_fks} foreign keys[/dim]\n")
95
+
96
+
97
+ @app.command()
98
+ def generate(
99
+ db_url: Optional[str] = typer.Argument(None, help="PostgreSQL connection string"),
100
+ rows: int = typer.Option(100, "--rows", "-r", help="Rows per table"),
101
+ schema: str = typer.Option("public", help="Database schema"),
102
+ seed: Optional[int] = typer.Option(None, "--seed", "-s", help="Random seed for deterministic generation"),
103
+ export: Optional[str] = typer.Option(None, "--export", "-e", help="Export to file (sql/json) instead of inserting"),
104
+ tables: Optional[str] = typer.Option(None, "--tables", "-t", help="Comma-separated list of tables to fill"),
105
+ clean: bool = typer.Option(False, "--clean", help="TRUNCATE tables before inserting (CASCADE)"),
106
+ dry_run: bool = typer.Option(False, "--dry-run", help="Show what would be generated without inserting"),
107
+ ):
108
+ """Generate realistic test data and insert into the database."""
109
+ db_url = _resolve_db_url(db_url)
110
+
111
+ with console.status("[bold blue]Reading schema...[/bold blue]"):
112
+ introspector = create_introspector(db_url)
113
+ all_tables = introspector.get_tables(schema)
114
+
115
+ if not all_tables:
116
+ console.print("[yellow]No tables found.[/yellow]")
117
+ introspector.close()
118
+ raise typer.Exit(0)
119
+
120
+ # Фильтр таблиц
121
+ if tables:
122
+ table_list = [t.strip() for t in tables.split(",")]
123
+ filtered = {k: v for k, v in all_tables.items() if k in table_list}
124
+ if not filtered:
125
+ console.print(f"[red]Tables not found: {tables}[/red]")
126
+ introspector.close()
127
+ raise typer.Exit(1)
128
+ # Добавляем родительские таблицы (для FK)
129
+ for tbl in list(filtered.values()):
130
+ for col in tbl.columns:
131
+ if col.fk_table and col.fk_table in all_tables and col.fk_table not in filtered:
132
+ filtered[col.fk_table] = all_tables[col.fk_table]
133
+ all_tables = filtered
134
+
135
+ # Порядок вставки
136
+ graph = DependencyGraph(all_tables)
137
+ order = graph.topological_sort()
138
+
139
+ console.print(f"\n[bold]Generating {rows} rows for {len(order)} tables[/bold]")
140
+ if seed is not None:
141
+ console.print(f"[dim]Seed: {seed}[/dim]")
142
+ console.print()
143
+
144
+ # Генерация данных
145
+ generator = DataGenerator(seed=seed)
146
+ generated_data = {}
147
+
148
+ for table_name in order:
149
+ table = all_tables[table_name]
150
+ with console.status(f"[blue]Generating {table_name}...[/blue]"):
151
+ data = generator.generate_table(table, rows, generated_data)
152
+ generated_data[table_name] = data
153
+ console.print(f" [green]✓[/green] {table_name}: {len(data)} rows")
154
+
155
+ # Экспорт или вставка
156
+ if export:
157
+ _export_data(export, generated_data, all_tables, order)
158
+ elif dry_run:
159
+ console.print(f"\n[yellow]Dry run — no data inserted.[/yellow]")
160
+ _show_preview(generated_data, order)
161
+ else:
162
+ engine = introspector.get_db_info().get("engine", "PostgreSQL")
163
+ inserter = BatchInserter(introspector.connection, engine=engine)
164
+ if clean:
165
+ with console.status("[yellow]Cleaning tables...[/yellow]"):
166
+ inserter.truncate_tables(order)
167
+ console.print("[yellow]Tables truncated.[/yellow]\n")
168
+
169
+ with console.status("[bold blue]Inserting data...[/bold blue]"):
170
+ inserter.insert_all(generated_data, all_tables, order)
171
+ console.print(f"\n[bold green]Done![/bold green] Inserted {sum(len(d) for d in generated_data.values())} rows into {len(order)} tables.\n")
172
+
173
+ introspector.close()
174
+
175
+
176
+ @app.command()
177
+ def ai_generate(
178
+ db_url: Optional[str] = typer.Argument(None, help="Database connection string"),
179
+ rows: int = typer.Option(20, "--rows", "-r", help="Rows per table (max 50 for AI)"),
180
+ schema: str = typer.Option("public", help="Database schema"),
181
+ api_key: Optional[str] = typer.Option(None, "--api-key", envvar="ANTHROPIC_API_KEY", help="Anthropic API key"),
182
+ export: Optional[str] = typer.Option(None, "--export", "-e", help="Export to file (sql/json)"),
183
+ ):
184
+ """Generate data using Claude AI for maximum realism (requires ANTHROPIC_API_KEY)."""
185
+ from seedforge.ai import generate_with_ai
186
+
187
+ if not api_key:
188
+ console.print("[red]Set ANTHROPIC_API_KEY or use --api-key[/red]")
189
+ raise typer.Exit(1)
190
+
191
+ db_url = _resolve_db_url(db_url)
192
+ rows = min(rows, 50)
193
+
194
+ with console.status("[bold blue]Reading schema...[/bold blue]"):
195
+ introspector = create_introspector(db_url)
196
+ all_tables = introspector.get_tables(schema)
197
+
198
+ if not all_tables:
199
+ console.print("[yellow]No tables found.[/yellow]")
200
+ introspector.close()
201
+ raise typer.Exit(0)
202
+
203
+ from seedforge.graph import DependencyGraph
204
+ graph = DependencyGraph(all_tables)
205
+ order = graph.topological_sort()
206
+
207
+ console.print(f"\n[bold]AI generating {rows} rows for {len(order)} tables[/bold]\n")
208
+
209
+ generated_data = {}
210
+ for table_name in order:
211
+ table = all_tables[table_name]
212
+ columns = [
213
+ {"name": c.name, "type": c.data_type, "nullable": c.nullable}
214
+ for c in table.columns
215
+ if not (c.is_primary and c.is_serial)
216
+ ]
217
+
218
+ with console.status(f"[blue]AI generating {table_name}...[/blue]"):
219
+ data = generate_with_ai(table_name, columns, rows, api_key=api_key)
220
+
221
+ if data:
222
+ generated_data[table_name] = data
223
+ console.print(f" [green]✓[/green] {table_name}: {len(data)} rows (AI)")
224
+ else:
225
+ console.print(f" [yellow]⚠[/yellow] {table_name}: AI failed, skipping")
226
+
227
+ if export:
228
+ _export_data(export, generated_data, all_tables, order)
229
+ else:
230
+ console.print(f"\n[bold green]Done![/bold green] Generated {sum(len(d) for d in generated_data.values())} rows.")
231
+ _show_preview(generated_data, order)
232
+
233
+ introspector.close()
234
+
235
+
236
+ @app.command()
237
+ def version():
238
+ """Show SeedForge version."""
239
+ from seedforge import __version__
240
+ console.print(f"SeedForge v{__version__}")
241
+
242
+
243
+ def _resolve_db_url(db_url: Optional[str]) -> str:
244
+ """Получить DB URL из аргумента или конфига."""
245
+ if db_url:
246
+ return db_url
247
+ config = Config.load()
248
+ if config.db_url:
249
+ return config.db_url
250
+ console.print("[red]No database URL. Run 'seedforge connect <url>' first or pass it as argument.[/red]")
251
+ raise typer.Exit(1)
252
+
253
+
254
+ def _export_data(format: str, data: dict, tables: dict, order: list):
255
+ """Экспорт данных в файл."""
256
+ if format == "sql":
257
+ from seedforge.inserter import BatchInserter
258
+ sql = BatchInserter.generate_sql(data, tables, order)
259
+ output_file = "seedforge_export.sql"
260
+ Path(output_file).write_text(sql)
261
+ console.print(f"\n[green]Exported to {output_file}[/green]")
262
+ elif format == "json":
263
+ import json
264
+ output_file = "seedforge_export.json"
265
+ # Конвертируем в JSON-сериализуемый формат
266
+ serializable = {}
267
+ for table_name in order:
268
+ serializable[table_name] = [
269
+ {k: _json_safe(v) for k, v in row.items()}
270
+ for row in data[table_name]
271
+ ]
272
+ Path(output_file).write_text(json.dumps(serializable, indent=2, ensure_ascii=False))
273
+ console.print(f"\n[green]Exported to {output_file}[/green]")
274
+ else:
275
+ console.print(f"[red]Unknown format: {format}. Use 'sql' or 'json'.[/red]")
276
+
277
+
278
+ def _json_safe(value):
279
+ """Конвертировать значение в JSON-безопасный тип."""
280
+ import datetime
281
+ import decimal
282
+ import uuid
283
+ if isinstance(value, (datetime.date, datetime.datetime)):
284
+ return value.isoformat()
285
+ if isinstance(value, decimal.Decimal):
286
+ return float(value)
287
+ if isinstance(value, uuid.UUID):
288
+ return str(value)
289
+ return value
290
+
291
+
292
+ def _show_preview(data: dict, order: list):
293
+ """Показать превью данных."""
294
+ for table_name in order:
295
+ rows = data[table_name]
296
+ if not rows:
297
+ continue
298
+ t = Table(title=table_name, title_style="bold cyan")
299
+ cols = list(rows[0].keys())
300
+ for col in cols:
301
+ t.add_column(col)
302
+ for row in rows[:5]:
303
+ t.add_row(*[str(row.get(c, ""))[:40] for c in cols])
304
+ if len(rows) > 5:
305
+ t.add_row(*[f"... ({len(rows)} total)" if i == 0 else "..." for i, c in enumerate(cols)])
306
+ console.print(t)
307
+ console.print()
308
+
309
+
310
+ if __name__ == "__main__":
311
+ app()
seedforge/config.py ADDED
@@ -0,0 +1,53 @@
1
+ """Конфигурация SeedForge (.seedforge.yaml)."""
2
+
3
+ import yaml
4
+ from pathlib import Path
5
+ from dataclasses import dataclass, field
6
+
7
+ DEFAULT_CONFIG_FILE = ".seedforge.yaml"
8
+
9
+
10
+ @dataclass
11
+ class Config:
12
+ db_url: str = ""
13
+ default_rows: int = 100
14
+ default_schema: str = "public"
15
+ seed: int | None = None
16
+ exclude_tables: list[str] = field(default_factory=list)
17
+ # Кастомные генераторы для колонок
18
+ # Формат: {"table.column": {"type": "faker_method", "args": {...}}}
19
+ custom_generators: dict = field(default_factory=dict)
20
+
21
+ @classmethod
22
+ def load(cls, path: str = DEFAULT_CONFIG_FILE) -> "Config":
23
+ """Загрузить конфиг из файла."""
24
+ config_path = Path(path)
25
+ if not config_path.exists():
26
+ return cls()
27
+ with open(config_path) as f:
28
+ data = yaml.safe_load(f) or {}
29
+ return cls(
30
+ db_url=data.get("db_url", ""),
31
+ default_rows=data.get("default_rows", 100),
32
+ default_schema=data.get("default_schema", "public"),
33
+ seed=data.get("seed"),
34
+ exclude_tables=data.get("exclude_tables", []),
35
+ custom_generators=data.get("custom_generators", {}),
36
+ )
37
+
38
+ def save(self, path: str = DEFAULT_CONFIG_FILE):
39
+ """Сохранить конфиг в файл."""
40
+ data = {
41
+ "db_url": self.db_url,
42
+ "default_rows": self.default_rows,
43
+ "default_schema": self.default_schema,
44
+ }
45
+ if self.seed is not None:
46
+ data["seed"] = self.seed
47
+ if self.exclude_tables:
48
+ data["exclude_tables"] = self.exclude_tables
49
+ if self.custom_generators:
50
+ data["custom_generators"] = self.custom_generators
51
+
52
+ with open(path, "w") as f:
53
+ yaml.dump(data, f, default_flow_style=False, allow_unicode=True)