fixtureforge 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- fixtureforge-0.1.0/LICENSE +23 -0
- fixtureforge-0.1.0/PKG-INFO +44 -0
- fixtureforge-0.1.0/README.md +16 -0
- fixtureforge-0.1.0/pyproject.toml +37 -0
- fixtureforge-0.1.0/src/fixtureforge/__init__.py +70 -0
- fixtureforge-0.1.0/src/fixtureforge/ai/__init__.py +10 -0
- fixtureforge-0.1.0/src/fixtureforge/ai/cache.py +86 -0
- fixtureforge-0.1.0/src/fixtureforge/ai/engine.py +44 -0
- fixtureforge-0.1.0/src/fixtureforge/ai/prompts.py +31 -0
- fixtureforge-0.1.0/src/fixtureforge/cli/__init__.py +6 -0
- fixtureforge-0.1.0/src/fixtureforge/cli/commands.py +90 -0
- fixtureforge-0.1.0/src/fixtureforge/core/__init__.py +11 -0
- fixtureforge-0.1.0/src/fixtureforge/core/analyzer.py +0 -0
- fixtureforge-0.1.0/src/fixtureforge/core/exporter.py +96 -0
- fixtureforge-0.1.0/src/fixtureforge/core/generator.py +135 -0
- fixtureforge-0.1.0/src/fixtureforge/core/parser.py +108 -0
- fixtureforge-0.1.0/src/fixtureforge/core/recipe.py +76 -0
- fixtureforge-0.1.0/src/fixtureforge/core/router.py +50 -0
- fixtureforge-0.1.0/src/fixtureforge/core/streamer.py +83 -0
- fixtureforge-0.1.0/src/fixtureforge/integrations/__init__.py +0 -0
- fixtureforge-0.1.0/src/fixtureforge/integrations/github.py +0 -0
- fixtureforge-0.1.0/src/fixtureforge/integrations/jira.py +0 -0
- fixtureforge-0.1.0/src/fixtureforge/library/__init__.py +0 -0
- fixtureforge-0.1.0/src/fixtureforge/library/sharing.py +0 -0
- fixtureforge-0.1.0/src/fixtureforge/library/storage.py +0 -0
- fixtureforge-0.1.0/src/fixtureforge/pyproject.toml +15 -0
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
`LICENSE`
|
|
2
|
+
```text
|
|
3
|
+
MIT License
|
|
4
|
+
|
|
5
|
+
Copyright (c) 2026 Yaniv
|
|
6
|
+
|
|
7
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
8
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
9
|
+
in the Software without restriction, including without limitation the rights
|
|
10
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
11
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
12
|
+
furnished to do so, subject to the following conditions:
|
|
13
|
+
|
|
14
|
+
The above copyright notice and this permission notice shall be included in all
|
|
15
|
+
copies or substantial portions of the Software.
|
|
16
|
+
|
|
17
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
18
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
19
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
20
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
21
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
22
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
23
|
+
SOFTWARE.
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: fixtureforge
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: AI-powered realistic test data generation
|
|
5
|
+
License: MIT
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Keywords: testing,fixtures,test-data,qa,automation
|
|
8
|
+
Author: Yaniv Metuku
|
|
9
|
+
Requires-Python: >=3.11,<4.0
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
|
+
Requires-Dist: anthropic (>=0.18.0,<0.19.0)
|
|
17
|
+
Requires-Dist: click (>=8.1.0,<9.0.0)
|
|
18
|
+
Requires-Dist: faker (>=22.0.0,<23.0.0)
|
|
19
|
+
Requires-Dist: google-genai (>=1.62.0,<2.0.0)
|
|
20
|
+
Requires-Dist: httpx (>=0.28.1,<0.29.0)
|
|
21
|
+
Requires-Dist: pydantic (>=2.5.0,<3.0.0)
|
|
22
|
+
Requires-Dist: pyyaml (>=6.0,<7.0)
|
|
23
|
+
Requires-Dist: rich (>=13.7.0,<14.0.0)
|
|
24
|
+
Requires-Dist: sqlalchemy (>=2.0.0,<3.0.0)
|
|
25
|
+
Project-URL: Homepage, https://fixtureforge.dev
|
|
26
|
+
Project-URL: Repository, https://github.com/Yaniv2809/fixtureforge
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
# FixtureForge 🛠️
|
|
30
|
+
|
|
31
|
+
**Generate realistic, AI-powered test data using Google Gemini.**
|
|
32
|
+
Stop using "Lorem Ipsum" or "Test User 1". Generate context-aware data for your QA & Development environments.
|
|
33
|
+
|
|
34
|
+
## 🚀 Features
|
|
35
|
+
|
|
36
|
+
* **Context-Aware:** Generate "Angry Customers" or "High-Value Orders" using AI.
|
|
37
|
+
* **Structured Output:** Exports directly to SQL, JSON, or CSV.
|
|
38
|
+
* **Lazy Streaming:** Can generate massive datasets (1GB+) without crashing memory.
|
|
39
|
+
* **Smart Relationships:** Automatically links Orders to Customers.
|
|
40
|
+
|
|
41
|
+
## 📦 Installation
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install fixtureforge
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# FixtureForge 🛠️
|
|
2
|
+
|
|
3
|
+
**Generate realistic, AI-powered test data using Google Gemini.**
|
|
4
|
+
Stop using "Lorem Ipsum" or "Test User 1". Generate context-aware data for your QA & Development environments.
|
|
5
|
+
|
|
6
|
+
## 🚀 Features
|
|
7
|
+
|
|
8
|
+
* **Context-Aware:** Generate "Angry Customers" or "High-Value Orders" using AI.
|
|
9
|
+
* **Structured Output:** Exports directly to SQL, JSON, or CSV.
|
|
10
|
+
* **Lazy Streaming:** Can generate massive datasets (1GB+) without crashing memory.
|
|
11
|
+
* **Smart Relationships:** Automatically links Orders to Customers.
|
|
12
|
+
|
|
13
|
+
## 📦 Installation
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install fixtureforge
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
[tool.poetry]
|
|
2
|
+
name = "fixtureforge"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "AI-powered realistic test data generation"
|
|
5
|
+
authors = ["Yaniv Metuku"]
|
|
6
|
+
license = "MIT"
|
|
7
|
+
readme = "README.md"
|
|
8
|
+
homepage = "https://fixtureforge.dev"
|
|
9
|
+
repository = "https://github.com/Yaniv2809/fixtureforge"
|
|
10
|
+
keywords = ["testing", "fixtures", "test-data", "qa", "automation"]
|
|
11
|
+
|
|
12
|
+
[tool.poetry.dependencies]
|
|
13
|
+
python = "^3.11"
|
|
14
|
+
pydantic = "^2.5.0"
|
|
15
|
+
sqlalchemy = "^2.0.0"
|
|
16
|
+
anthropic = "^0.18.0"
|
|
17
|
+
faker = "^22.0.0"
|
|
18
|
+
click = "^8.1.0"
|
|
19
|
+
rich = "^13.7.0"
|
|
20
|
+
httpx = "^0.28.1"
|
|
21
|
+
pyyaml = "^6.0"
|
|
22
|
+
google-genai = "^1.62.0"
|
|
23
|
+
|
|
24
|
+
[tool.poetry.group.dev.dependencies]
|
|
25
|
+
pytest = "^7.4.0"
|
|
26
|
+
pytest-asyncio = "^0.23.0"
|
|
27
|
+
pytest-cov = "^4.1.0"
|
|
28
|
+
black = "^23.12.0"
|
|
29
|
+
ruff = "^0.1.9"
|
|
30
|
+
mypy = "^1.8.0"
|
|
31
|
+
|
|
32
|
+
[tool.poetry.scripts]
|
|
33
|
+
forge = "fixtureforge.cli.commands:cli"
|
|
34
|
+
|
|
35
|
+
[build-system]
|
|
36
|
+
requires = ["poetry-core"]
|
|
37
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from typing import Type, Any, List, Optional, Generator
|
|
2
|
+
import os
|
|
3
|
+
from .core.generator import BasicGenerator
|
|
4
|
+
from .core.streamer import DataStreamer
|
|
5
|
+
|
|
6
|
+
__version__ = "0.1.0"
|
|
7
|
+
|
|
8
|
+
class Forge:
|
|
9
|
+
def __init__(self, api_key: Optional[str] = None, use_ai: bool = True, locale: str = "en_US"):
|
|
10
|
+
self.use_ai = use_ai
|
|
11
|
+
key = api_key or os.getenv("GOOGLE_API_KEY")
|
|
12
|
+
self.generator = BasicGenerator(locale=locale, api_key=key)
|
|
13
|
+
|
|
14
|
+
def create(self, model: Type, count: int = 1, context: str = None, **overrides) -> Any:
|
|
15
|
+
"""Standard creation (In-Memory). Good for small batches."""
|
|
16
|
+
results = []
|
|
17
|
+
if count == 1:
|
|
18
|
+
item = self.generator.generate(model, context=context, **overrides)
|
|
19
|
+
self._add_to_registry(model, item)
|
|
20
|
+
return item
|
|
21
|
+
|
|
22
|
+
for i in range(count):
|
|
23
|
+
print(f" ...generating {i+1}/{count}...")
|
|
24
|
+
item = self.generator.generate(model, context=context, **overrides)
|
|
25
|
+
self._add_to_registry(model, item)
|
|
26
|
+
results.append(item)
|
|
27
|
+
|
|
28
|
+
return results
|
|
29
|
+
|
|
30
|
+
def create_stream(self, model: Type, count: int, filename: str, context: str = None, **overrides) -> Generator[Any, None, None]:
|
|
31
|
+
"""
|
|
32
|
+
Lazy Evaluation: Generates data and writes immediately to disk.
|
|
33
|
+
Prevents Memory Explosion for large datasets.
|
|
34
|
+
"""
|
|
35
|
+
streamer = DataStreamer(filename)
|
|
36
|
+
streamer.start()
|
|
37
|
+
|
|
38
|
+
print(f"🌊 Starting stream to {filename} ({count} items)...")
|
|
39
|
+
|
|
40
|
+
for i in range(count):
|
|
41
|
+
# 1. Generate
|
|
42
|
+
item = self.generator.generate(model, context=context, **overrides)
|
|
43
|
+
|
|
44
|
+
# 2. Write to Disk (Immediately!)
|
|
45
|
+
streamer.write(item)
|
|
46
|
+
|
|
47
|
+
# 3. Register (Optional - needed if this model is a parent for others)
|
|
48
|
+
self._add_to_registry(model, item)
|
|
49
|
+
|
|
50
|
+
# 4. Yield for real-time consumption (if needed)
|
|
51
|
+
yield item
|
|
52
|
+
|
|
53
|
+
streamer.close()
|
|
54
|
+
print(f"✅ Stream complete! Data saved to {filename}")
|
|
55
|
+
|
|
56
|
+
def _add_to_registry(self, model: Type, item: Any):
|
|
57
|
+
"""Helper to register items for relationships"""
|
|
58
|
+
model_name = model.__name__.lower()
|
|
59
|
+
if model_name not in self.generator.registry:
|
|
60
|
+
self.generator.registry[model_name] = []
|
|
61
|
+
self.generator.registry[model_name].append(item)
|
|
62
|
+
|
|
63
|
+
def create_batch(self, model: Type, count: int, context: str = None, **overrides) -> List[Any]:
|
|
64
|
+
return self.create(model, count=count, context=context, **overrides)
|
|
65
|
+
|
|
66
|
+
def stats(self):
|
|
67
|
+
return {k: len(v) for k, v in self.generator.registry.items()}
|
|
68
|
+
|
|
69
|
+
# Global instance
|
|
70
|
+
forge = Forge()
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Cache AI responses to save costs and speed
|
|
3
|
+
"""
|
|
4
|
+
import hashlib
|
|
5
|
+
import json
|
|
6
|
+
# import os
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Dict, Optional
|
|
9
|
+
from datetime import datetime, timedelta
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ResponseCache:
|
|
13
|
+
"""Cache AI responses locally"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, cache_dir: Optional[str] = None):
|
|
16
|
+
if cache_dir:
|
|
17
|
+
self.cache_dir = Path(cache_dir)
|
|
18
|
+
else:
|
|
19
|
+
# Default: ~/.fixtureforge/cache
|
|
20
|
+
self.cache_dir = Path.home() / ".fixtureforge" / "cache"
|
|
21
|
+
|
|
22
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
23
|
+
self.ttl = timedelta(days=7) # Cache for 7 days
|
|
24
|
+
|
|
25
|
+
def _hash_key(self, model_name: str, context: str, overrides: Dict) -> str:
|
|
26
|
+
"""Generate cache key"""
|
|
27
|
+
key_data = {
|
|
28
|
+
"model": model_name,
|
|
29
|
+
"context": context,
|
|
30
|
+
"overrides": overrides
|
|
31
|
+
}
|
|
32
|
+
key_str = json.dumps(key_data, sort_keys=True)
|
|
33
|
+
return hashlib.sha256(key_str.encode()).hexdigest()
|
|
34
|
+
|
|
35
|
+
def get(
|
|
36
|
+
self,
|
|
37
|
+
model_name: str,
|
|
38
|
+
context: Optional[str],
|
|
39
|
+
overrides: Dict[str, Any]
|
|
40
|
+
) -> Optional[Any]:
|
|
41
|
+
"""Get from cache"""
|
|
42
|
+
key = self._hash_key(model_name, context or "", overrides)
|
|
43
|
+
cache_file = self.cache_dir / f"{key}.json"
|
|
44
|
+
|
|
45
|
+
if not cache_file.exists():
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
# Check if expired
|
|
49
|
+
mod_time = datetime.fromtimestamp(cache_file.stat().st_mtime)
|
|
50
|
+
if datetime.now() - mod_time > self.ttl:
|
|
51
|
+
cache_file.unlink() # Delete expired
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
# Load
|
|
55
|
+
with open(cache_file, 'r') as f:
|
|
56
|
+
return json.load(f)
|
|
57
|
+
|
|
58
|
+
def set(
|
|
59
|
+
self,
|
|
60
|
+
model_name: str,
|
|
61
|
+
context: Optional[str],
|
|
62
|
+
overrides: Dict[str, Any],
|
|
63
|
+
data: Any
|
|
64
|
+
):
|
|
65
|
+
"""Save to cache"""
|
|
66
|
+
key = self._hash_key(model_name, context or "", overrides)
|
|
67
|
+
cache_file = self.cache_dir / f"{key}.json"
|
|
68
|
+
|
|
69
|
+
with open(cache_file, 'w') as f:
|
|
70
|
+
json.dump(data, f, indent=2, default=str)
|
|
71
|
+
|
|
72
|
+
def clear(self):
|
|
73
|
+
"""Clear all cache"""
|
|
74
|
+
for cache_file in self.cache_dir.glob("*.json"):
|
|
75
|
+
cache_file.unlink()
|
|
76
|
+
|
|
77
|
+
def stats(self) -> Dict[str, Any]:
|
|
78
|
+
"""Get cache statistics"""
|
|
79
|
+
files = list(self.cache_dir.glob("*.json"))
|
|
80
|
+
total_size = sum(f.stat().st_size for f in files)
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
"entries": len(files),
|
|
84
|
+
"size_mb": round(total_size / 1024 / 1024, 2),
|
|
85
|
+
"location": str(self.cache_dir)
|
|
86
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
from typing import Optional
|
|
4
|
+
from google import genai
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class AIEngine:
|
|
8
|
+
def __init__(self, api_key: Optional[str] = None):
|
|
9
|
+
self.api_key = api_key or os.getenv("GOOGLE_API_KEY")
|
|
10
|
+
self.client = None
|
|
11
|
+
|
|
12
|
+
if not self.api_key:
|
|
13
|
+
return
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
self.client = genai.Client(api_key=self.api_key)
|
|
17
|
+
except Exception as e:
|
|
18
|
+
print(f"⚠️ AI Config Error: {e}")
|
|
19
|
+
|
|
20
|
+
def generate_text(self, prompt: str, retries: int = 5) -> str:
|
|
21
|
+
if not self.api_key or not self.client:
|
|
22
|
+
return "[AI Error: Missing API Key]"
|
|
23
|
+
|
|
24
|
+
attempt = 0
|
|
25
|
+
while attempt < retries:
|
|
26
|
+
try:
|
|
27
|
+
# הקריאה החדשה והרשמית
|
|
28
|
+
response = self.client.models.generate_content(
|
|
29
|
+
model='gemini-2.0-flash',
|
|
30
|
+
contents=prompt
|
|
31
|
+
)
|
|
32
|
+
return response.text.strip()
|
|
33
|
+
|
|
34
|
+
except Exception as e:
|
|
35
|
+
error_msg = str(e)
|
|
36
|
+
if "429" in error_msg or "quota" in error_msg.lower() or "503" in error_msg or "400" in error_msg:
|
|
37
|
+
wait_time = 10 * (attempt + 1)
|
|
38
|
+
print(f"⏳ Server busy (Quota). Waiting {wait_time}s... (Attempt {attempt + 1}/{retries})")
|
|
39
|
+
time.sleep(wait_time)
|
|
40
|
+
attempt += 1
|
|
41
|
+
else:
|
|
42
|
+
return f"[AI Error: {error_msg}]"
|
|
43
|
+
|
|
44
|
+
return "[AI Error: Timeout - Failed after retries]"
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""
|
|
2
|
+
System Prompts for the AI Engine.
|
|
3
|
+
Defines how the AI should behave and format data.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
SYSTEM_PROMPT = """
|
|
7
|
+
You are FixtureForge, an advanced test data generator.
|
|
8
|
+
Your goal is to generate realistic, context-aware synthetic data for developers and QA engineers.
|
|
9
|
+
|
|
10
|
+
RULES:
|
|
11
|
+
1. OUTPUT FORMAT: You must output ONLY valid JSON. No Markdown, no explanations, no code blocks (```json).
|
|
12
|
+
2. REALISM: Data must look real (names, addresses, emails). Use the requested locale if specified.
|
|
13
|
+
3. CONSISTENCY: If generating multiple fields, they must match (e.g., email should match the name).
|
|
14
|
+
4. CONTEXT: Pay attention to the user's specific scenario (e.g., "Angry customer", "Medical patient").
|
|
15
|
+
5. ARRAY ONLY: The output must always be a JSON array of objects, even if requesting 1 item.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
def build_prompt(model_schema: dict, count: int, context: str = "") -> str:
|
|
19
|
+
"""Builds the final prompt for the AI"""
|
|
20
|
+
return f"""
|
|
21
|
+
{SYSTEM_PROMPT}
|
|
22
|
+
|
|
23
|
+
REQUEST:
|
|
24
|
+
Generate {count} items based on this schema:
|
|
25
|
+
{model_schema}
|
|
26
|
+
|
|
27
|
+
CONTEXT/SCENARIO:
|
|
28
|
+
{context if context else "General realistic data"}
|
|
29
|
+
|
|
30
|
+
Remember: Output strictly a JSON list of objects.
|
|
31
|
+
"""
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""CLI interface for FixtureForge"""
|
|
2
|
+
import click
|
|
3
|
+
import sys
|
|
4
|
+
import importlib.util
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from rich.console import Console
|
|
7
|
+
from rich.table import Table
|
|
8
|
+
from fixtureforge import Forge
|
|
9
|
+
from fixtureforge.core.exporter import DataExporter
|
|
10
|
+
|
|
11
|
+
console = Console()
|
|
12
|
+
|
|
13
|
+
@click.group()
|
|
14
|
+
def cli():
|
|
15
|
+
"""FixtureForge - AI-powered test data generation"""
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
@cli.command()
|
|
19
|
+
@click.argument('model_path')
|
|
20
|
+
@click.option('--count', '-n', default=1, help='Number of instances')
|
|
21
|
+
@click.option('--no-ai', is_flag=True, default=False, help='Disable AI features')
|
|
22
|
+
@click.option('--output', '-o', help='Output file (e.g., data.json)')
|
|
23
|
+
@click.option('--context', '-c', help='Context for AI generation (Director Mode)')
|
|
24
|
+
def generate(model_path, count, no_ai, output, context):
|
|
25
|
+
"""
|
|
26
|
+
Generate test data.
|
|
27
|
+
"""
|
|
28
|
+
try:
|
|
29
|
+
console.print(f"[bold blue]🔮 Loading model: {model_path}[/bold blue]")
|
|
30
|
+
model_class = load_model_from_string(model_path)
|
|
31
|
+
|
|
32
|
+
forge = Forge(use_ai=not no_ai)
|
|
33
|
+
|
|
34
|
+
if context:
|
|
35
|
+
console.print(f"[bold yellow]🎬 Director Mode: {context}[/bold yellow]")
|
|
36
|
+
|
|
37
|
+
console.print(f"[bold blue]📦 Generating {count} instances...[/bold blue]")
|
|
38
|
+
|
|
39
|
+
instances = forge.create_batch(model_class, count=count, context=context)
|
|
40
|
+
|
|
41
|
+
display_table(instances)
|
|
42
|
+
|
|
43
|
+
if output:
|
|
44
|
+
console.print(f"[yellow]💾 Saving to {output}...[/yellow]")
|
|
45
|
+
DataExporter.export(instances, output)
|
|
46
|
+
|
|
47
|
+
except Exception as e:
|
|
48
|
+
console.print(f"[bold red]❌ Error:[/bold red] {e}")
|
|
49
|
+
sys.exit(1)
|
|
50
|
+
|
|
51
|
+
def load_model_from_string(model_path: str):
|
|
52
|
+
try:
|
|
53
|
+
module_name, class_name = model_path.split(":")
|
|
54
|
+
except ValueError:
|
|
55
|
+
raise ValueError("Model path must be in format 'filename:ClassName'")
|
|
56
|
+
|
|
57
|
+
sys.path.append(str(Path.cwd()))
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
module = importlib.import_module(module_name)
|
|
61
|
+
except ImportError:
|
|
62
|
+
spec = importlib.util.spec_from_file_location(module_name, f"{module_name}.py")
|
|
63
|
+
if spec and spec.loader:
|
|
64
|
+
module = importlib.util.module_from_spec(spec)
|
|
65
|
+
spec.loader.exec_module(module)
|
|
66
|
+
else:
|
|
67
|
+
raise ImportError(f"Could not find module '{module_name}'")
|
|
68
|
+
|
|
69
|
+
if not hasattr(module, class_name):
|
|
70
|
+
raise AttributeError(f"Class '{class_name}' not found in {module_name}")
|
|
71
|
+
|
|
72
|
+
return getattr(module, class_name)
|
|
73
|
+
|
|
74
|
+
def display_table(instances):
|
|
75
|
+
if not instances:
|
|
76
|
+
return
|
|
77
|
+
|
|
78
|
+
first_item = instances[0]
|
|
79
|
+
headers = first_item.model_dump().keys()
|
|
80
|
+
|
|
81
|
+
table = Table(show_header=True, header_style="bold magenta")
|
|
82
|
+
|
|
83
|
+
for h in headers:
|
|
84
|
+
table.add_column(h)
|
|
85
|
+
|
|
86
|
+
for item in instances:
|
|
87
|
+
row = [str(getattr(item, h)) for h in headers]
|
|
88
|
+
table.add_row(*row)
|
|
89
|
+
|
|
90
|
+
console.print(table)
|
|
File without changes
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import csv
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import List, Any
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
from datetime import date, datetime
|
|
7
|
+
|
|
8
|
+
class DataExporter:
|
|
9
|
+
"""Handles exporting generated data to files (JSON, CSV, SQL)"""
|
|
10
|
+
|
|
11
|
+
@staticmethod
|
|
12
|
+
def export(data: List[Any], filename: str):
|
|
13
|
+
"""Export data based on file extension"""
|
|
14
|
+
path = Path(filename)
|
|
15
|
+
extension = path.suffix.lower()
|
|
16
|
+
|
|
17
|
+
if extension == ".json":
|
|
18
|
+
DataExporter._to_json(data, path)
|
|
19
|
+
elif extension == ".csv":
|
|
20
|
+
DataExporter._to_csv(data, path)
|
|
21
|
+
elif extension == ".sql":
|
|
22
|
+
DataExporter._to_sql(data, path)
|
|
23
|
+
else:
|
|
24
|
+
raise ValueError(f"Unsupported format: {extension}")
|
|
25
|
+
|
|
26
|
+
@staticmethod
|
|
27
|
+
def _to_json(data: List[Any], path: Path):
|
|
28
|
+
"""Save as JSON"""
|
|
29
|
+
json_ready = [
|
|
30
|
+
item.model_dump() if isinstance(item, BaseModel) else item.__dict__
|
|
31
|
+
for item in data
|
|
32
|
+
]
|
|
33
|
+
|
|
34
|
+
# המרה של תאריכים למחרוזות כדי ש-JSON לא ייכשל
|
|
35
|
+
json_ready = json.loads(json.dumps(json_ready, default=str))
|
|
36
|
+
|
|
37
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
38
|
+
json.dump(json_ready, f, indent=2)
|
|
39
|
+
|
|
40
|
+
print(f"✅ Successfully exported {len(data)} items to {path}")
|
|
41
|
+
|
|
42
|
+
@staticmethod
|
|
43
|
+
def _to_csv(data: List[Any], path: Path):
|
|
44
|
+
"""Save as CSV"""
|
|
45
|
+
if not data:
|
|
46
|
+
return
|
|
47
|
+
|
|
48
|
+
first_item = data[0]
|
|
49
|
+
if isinstance(first_item, BaseModel):
|
|
50
|
+
headers = first_item.model_dump().keys()
|
|
51
|
+
rows = [item.model_dump() for item in data]
|
|
52
|
+
else:
|
|
53
|
+
headers = first_item.__dict__.keys()
|
|
54
|
+
rows = [item.__dict__ for item in data]
|
|
55
|
+
|
|
56
|
+
with open(path, "w", newline="", encoding="utf-8") as f:
|
|
57
|
+
writer = csv.DictWriter(f, fieldnames=headers)
|
|
58
|
+
writer.writeheader()
|
|
59
|
+
writer.writerows(rows)
|
|
60
|
+
|
|
61
|
+
print(f"✅ Successfully exported {len(data)} items to {path}")
|
|
62
|
+
|
|
63
|
+
@staticmethod
|
|
64
|
+
def _to_sql(data: List[Any], path: Path):
|
|
65
|
+
"""Save as SQL INSERT statements"""
|
|
66
|
+
if not data:
|
|
67
|
+
return
|
|
68
|
+
|
|
69
|
+
table_name = data[0].__class__.__name__.lower() + "s" # Customer -> customers
|
|
70
|
+
|
|
71
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
72
|
+
f.write("-- Auto-generated by FixtureForge\n")
|
|
73
|
+
f.write(f"-- Target Table: {table_name}\n\n")
|
|
74
|
+
|
|
75
|
+
for item in data:
|
|
76
|
+
# המרה למילון
|
|
77
|
+
record = item.model_dump() if isinstance(item, BaseModel) else item.__dict__
|
|
78
|
+
|
|
79
|
+
columns = ", ".join(record.keys())
|
|
80
|
+
values = []
|
|
81
|
+
|
|
82
|
+
for v in record.values():
|
|
83
|
+
if isinstance(v, (str, date, datetime)):
|
|
84
|
+
|
|
85
|
+
clean_v = str(v).replace("'", "''")
|
|
86
|
+
values.append(f"'{clean_v}'")
|
|
87
|
+
elif v is None:
|
|
88
|
+
values.append("NULL")
|
|
89
|
+
else:
|
|
90
|
+
values.append(str(v))
|
|
91
|
+
|
|
92
|
+
vals_str = ", ".join(values)
|
|
93
|
+
sql_stmt = f"INSERT INTO {table_name} ({columns}) VALUES ({vals_str});\n"
|
|
94
|
+
f.write(sql_stmt)
|
|
95
|
+
|
|
96
|
+
print(f"✅ Successfully exported {len(data)} SQL statements to {path}")
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Basic data generation using Faker & AI"""
|
|
2
|
+
from typing import Any, Type, Optional, Dict, List
|
|
3
|
+
from faker import Faker
|
|
4
|
+
import random
|
|
5
|
+
import uuid
|
|
6
|
+
from .parser import ModelParser, FieldInfo
|
|
7
|
+
from .router import IntelligentRouter, FieldTier
|
|
8
|
+
from ..ai.engine import AIEngine
|
|
9
|
+
|
|
10
|
+
class BasicGenerator:
|
|
11
|
+
"""Generate data using Faker based on Intelligent Routing"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, locale: str = "en_US", api_key: Optional[str] = None):
|
|
14
|
+
self.faker = Faker(locale)
|
|
15
|
+
self.router = IntelligentRouter()
|
|
16
|
+
self._id_counters = {}
|
|
17
|
+
self.registry: Dict[str, List[Any]] = {}
|
|
18
|
+
|
|
19
|
+
if api_key:
|
|
20
|
+
self.ai_engine = AIEngine(api_key)
|
|
21
|
+
else:
|
|
22
|
+
self.ai_engine = None
|
|
23
|
+
|
|
24
|
+
def generate(self, model: Type, context: str = None, **overrides) -> Any:
|
|
25
|
+
"""Generate a model instance with optional context"""
|
|
26
|
+
parser = ModelParser()
|
|
27
|
+
fields = parser.parse(model)
|
|
28
|
+
|
|
29
|
+
data = {}
|
|
30
|
+
for field in fields:
|
|
31
|
+
if field.name in overrides:
|
|
32
|
+
data[field.name] = overrides[field.name]
|
|
33
|
+
else:
|
|
34
|
+
data[field.name] = self._generate_smart_value(field, context)
|
|
35
|
+
|
|
36
|
+
instance = model(**data)
|
|
37
|
+
return instance
|
|
38
|
+
|
|
39
|
+
def _generate_smart_value(self, field: FieldInfo, context: str = None) -> Any:
|
|
40
|
+
tier = self.router.classify(field)
|
|
41
|
+
|
|
42
|
+
if tier == FieldTier.STRUCTURAL:
|
|
43
|
+
return self._generate_structural(field)
|
|
44
|
+
|
|
45
|
+
if tier == FieldTier.SEMANTIC:
|
|
46
|
+
return self._generate_semantic_content(field, context)
|
|
47
|
+
|
|
48
|
+
return self._generate_standard(field)
|
|
49
|
+
|
|
50
|
+
def _generate_structural(self, field: FieldInfo) -> Any:
|
|
51
|
+
"""Handle IDs and Foreign Keys"""
|
|
52
|
+
name = field.name.lower()
|
|
53
|
+
|
|
54
|
+
# 1
|
|
55
|
+
if name.endswith("_id") and name != "id":
|
|
56
|
+
target_model = name.replace("_id", "")
|
|
57
|
+
|
|
58
|
+
if target_model in self.registry and self.registry[target_model]:
|
|
59
|
+
random_record = random.choice(self.registry[target_model])
|
|
60
|
+
return getattr(random_record, "id")
|
|
61
|
+
|
|
62
|
+
# 2
|
|
63
|
+
if "int" in field.type_name.lower():
|
|
64
|
+
if field.name not in self._id_counters:
|
|
65
|
+
self._id_counters[field.name] = 1
|
|
66
|
+
|
|
67
|
+
val = self._id_counters[field.name]
|
|
68
|
+
self._id_counters[field.name] += 1
|
|
69
|
+
return val
|
|
70
|
+
|
|
71
|
+
return str(uuid.uuid4())
|
|
72
|
+
|
|
73
|
+
def _generate_semantic_content(self, field: FieldInfo, context: str = None) -> str:
|
|
74
|
+
"""Generate REAL content using AI with Context"""
|
|
75
|
+
if not self.ai_engine or not self.ai_engine.api_key:
|
|
76
|
+
return f"[AI Placeholder for {field.name}]"
|
|
77
|
+
|
|
78
|
+
prompt = f"Generate a realistic value for a database field named '{field.name}'."
|
|
79
|
+
|
|
80
|
+
if context:
|
|
81
|
+
prompt += f" IMPORTANT CONTEXT: {context}."
|
|
82
|
+
|
|
83
|
+
prompt += " Output ONLY the value, no quotes."
|
|
84
|
+
|
|
85
|
+
return self.ai_engine.generate_text(prompt)
|
|
86
|
+
|
|
87
|
+
def _generate_standard(self, field: FieldInfo) -> Any:
|
|
88
|
+
"""Standard Faker generation - Strictly Cleaned for Linter"""
|
|
89
|
+
name = field.name.lower()
|
|
90
|
+
type_name = field.type_name.lower()
|
|
91
|
+
|
|
92
|
+
# Heuristics - Broken down line by line
|
|
93
|
+
if "email" in name:
|
|
94
|
+
return self.faker.email()
|
|
95
|
+
|
|
96
|
+
if "name" in name:
|
|
97
|
+
return self.faker.name()
|
|
98
|
+
|
|
99
|
+
if "address" in name:
|
|
100
|
+
return self.faker.address()
|
|
101
|
+
|
|
102
|
+
if "phone" in name:
|
|
103
|
+
return self.faker.phone_number()
|
|
104
|
+
|
|
105
|
+
if "date" in name or "time" in name:
|
|
106
|
+
return self.faker.date_time_this_year()
|
|
107
|
+
|
|
108
|
+
if "city" in name:
|
|
109
|
+
return self.faker.city()
|
|
110
|
+
|
|
111
|
+
if "country" in name:
|
|
112
|
+
return self.faker.country()
|
|
113
|
+
|
|
114
|
+
# Type fallback
|
|
115
|
+
if "int" in type_name:
|
|
116
|
+
return self._generate_int(field)
|
|
117
|
+
|
|
118
|
+
if "bool" in type_name:
|
|
119
|
+
return self.faker.boolean()
|
|
120
|
+
|
|
121
|
+
if "float" in type_name:
|
|
122
|
+
return random.uniform(0, 1000)
|
|
123
|
+
|
|
124
|
+
return self.faker.word()
|
|
125
|
+
|
|
126
|
+
def _generate_int(self, field: FieldInfo) -> int:
|
|
127
|
+
min_val, max_val = 0, 10000
|
|
128
|
+
if field.constraints:
|
|
129
|
+
min_val = field.constraints.get("ge", field.constraints.get("gt", -1) + 1)
|
|
130
|
+
max_val = field.constraints.get("le", field.constraints.get("lt", 10001) - 1)
|
|
131
|
+
|
|
132
|
+
if min_val > max_val:
|
|
133
|
+
max_val = min_val + 100
|
|
134
|
+
|
|
135
|
+
return random.randint(min_val, max_val)
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Model introspection and field extraction (Pydantic Edition)
|
|
3
|
+
"""
|
|
4
|
+
from typing import Any, Dict, List, Type, get_origin, get_args
|
|
5
|
+
from pydantic import BaseModel
|
|
6
|
+
|
|
7
|
+
# --- SQLAlchemy Support Check ---
|
|
8
|
+
try:
|
|
9
|
+
from sqlalchemy import inspect as sa_inspect
|
|
10
|
+
from sqlalchemy.orm import DeclarativeMeta
|
|
11
|
+
HAS_SQLALCHEMY = True
|
|
12
|
+
except ImportError:
|
|
13
|
+
HAS_SQLALCHEMY = False
|
|
14
|
+
# SQLA
|
|
15
|
+
DeclarativeMeta = type("DeclarativeMeta", (), {})
|
|
16
|
+
sa_inspect = None
|
|
17
|
+
|
|
18
|
+
class FieldInfo:
|
|
19
|
+
"""Information about a model field"""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
name: str,
|
|
24
|
+
field_type: Type,
|
|
25
|
+
required: bool = True,
|
|
26
|
+
default: Any = None,
|
|
27
|
+
constraints: Dict[str, Any] = None,
|
|
28
|
+
metadata: Dict[str, Any] = None
|
|
29
|
+
):
|
|
30
|
+
self.name = name
|
|
31
|
+
self.field_type = field_type
|
|
32
|
+
self.required = required
|
|
33
|
+
self.default = default
|
|
34
|
+
self.constraints = constraints or {}
|
|
35
|
+
self.metadata = metadata or {}
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
def type_name(self) -> str:
|
|
39
|
+
"""Get human-readable type name"""
|
|
40
|
+
try:
|
|
41
|
+
origin = get_origin(self.field_type)
|
|
42
|
+
if origin:
|
|
43
|
+
args = get_args(self.field_type)
|
|
44
|
+
if args:
|
|
45
|
+
# args
|
|
46
|
+
args_str = ', '.join(
|
|
47
|
+
a.__name__ if hasattr(a, '__name__') else str(a)
|
|
48
|
+
for a in args
|
|
49
|
+
)
|
|
50
|
+
return f"{origin.__name__}[{args_str}]"
|
|
51
|
+
return origin.__name__
|
|
52
|
+
return self.field_type.__name__
|
|
53
|
+
except Exception:
|
|
54
|
+
return str(self.field_type)
|
|
55
|
+
|
|
56
|
+
class ModelParser:
|
|
57
|
+
"""Parse Pydantic models"""
|
|
58
|
+
|
|
59
|
+
@classmethod
|
|
60
|
+
def parse(cls, model: Type[BaseModel]) -> List[FieldInfo]:
|
|
61
|
+
"""Parse Pydantic V2 model"""
|
|
62
|
+
fields = []
|
|
63
|
+
|
|
64
|
+
for field_name, field_info in model.model_fields.items():
|
|
65
|
+
# Extract type
|
|
66
|
+
field_type = field_info.annotation
|
|
67
|
+
|
|
68
|
+
# Check if required
|
|
69
|
+
required = field_info.is_required()
|
|
70
|
+
|
|
71
|
+
# Get default
|
|
72
|
+
default = field_info.default if not required else None
|
|
73
|
+
|
|
74
|
+
# Extract constraints
|
|
75
|
+
constraints = {}
|
|
76
|
+
if field_info.metadata:
|
|
77
|
+
for meta in field_info.metadata:
|
|
78
|
+
if hasattr(meta, 'ge') and meta.ge is not None:
|
|
79
|
+
constraints['ge'] = meta.ge
|
|
80
|
+
if hasattr(meta, 'le') and meta.le is not None:
|
|
81
|
+
constraints['le'] = meta.le
|
|
82
|
+
if hasattr(meta, 'gt') and meta.gt is not None:
|
|
83
|
+
constraints['gt'] = meta.gt
|
|
84
|
+
if hasattr(meta, 'lt') and meta.lt is not None:
|
|
85
|
+
constraints['lt'] = meta.lt
|
|
86
|
+
if hasattr(meta, 'min_length') and meta.min_length is not None:
|
|
87
|
+
constraints['min_length'] = meta.min_length
|
|
88
|
+
if hasattr(meta, 'max_length') and meta.max_length is not None:
|
|
89
|
+
constraints['max_length'] = meta.max_length
|
|
90
|
+
if hasattr(meta, 'pattern') and meta.pattern is not None:
|
|
91
|
+
constraints['pattern'] = meta.pattern
|
|
92
|
+
|
|
93
|
+
# Extract metadata description
|
|
94
|
+
metadata = {
|
|
95
|
+
"description": field_info.description,
|
|
96
|
+
"examples": field_info.examples if hasattr(field_info, 'examples') else None
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
fields.append(FieldInfo(
|
|
100
|
+
name=field_name,
|
|
101
|
+
field_type=field_type,
|
|
102
|
+
required=required,
|
|
103
|
+
default=default,
|
|
104
|
+
constraints=constraints,
|
|
105
|
+
metadata=metadata
|
|
106
|
+
))
|
|
107
|
+
|
|
108
|
+
return fields
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Recipe Runner: Executes YAML-based data generation scenarios.
|
|
3
|
+
"""
|
|
4
|
+
import yaml
|
|
5
|
+
from typing import Dict, Type
|
|
6
|
+
from pydantic import create_model, Field
|
|
7
|
+
from fixtureforge import forge
|
|
8
|
+
|
|
9
|
+
class RecipeRunner:
|
|
10
|
+
"""Parses YAML recipes and executes generation steps"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, recipe_path: str):
|
|
13
|
+
self.recipe_path = recipe_path
|
|
14
|
+
self.type_mapping = {
|
|
15
|
+
"int": int,
|
|
16
|
+
"str": str,
|
|
17
|
+
"float": float,
|
|
18
|
+
"bool": bool,
|
|
19
|
+
"list": list,
|
|
20
|
+
"dict": dict
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
def run(self):
|
|
24
|
+
"""Execute the recipe"""
|
|
25
|
+
with open(self.recipe_path, 'r', encoding='utf-8') as f:
|
|
26
|
+
recipe = yaml.safe_load(f)
|
|
27
|
+
|
|
28
|
+
print(f"📜 Running Recipe: {self.recipe_path}")
|
|
29
|
+
|
|
30
|
+
steps = recipe.get("steps", [])
|
|
31
|
+
results = {}
|
|
32
|
+
|
|
33
|
+
for step in steps:
|
|
34
|
+
model_name = step["model"]
|
|
35
|
+
count = step.get("count", 1)
|
|
36
|
+
context = step.get("context", None)
|
|
37
|
+
fields_config = step.get("fields", {})
|
|
38
|
+
|
|
39
|
+
# 1
|
|
40
|
+
dynamic_model = self._create_dynamic_model(model_name, fields_config)
|
|
41
|
+
|
|
42
|
+
print(f"\n🏗️ Step: Creating {count} x {model_name}...")
|
|
43
|
+
if context:
|
|
44
|
+
print(f" 🎬 Context: {context}")
|
|
45
|
+
|
|
46
|
+
# 2
|
|
47
|
+
generated_data = forge.create_batch(
|
|
48
|
+
dynamic_model,
|
|
49
|
+
count=count,
|
|
50
|
+
context=context
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
results[model_name] = generated_data
|
|
54
|
+
|
|
55
|
+
# 3
|
|
56
|
+
self._print_sample(generated_data)
|
|
57
|
+
|
|
58
|
+
return results
|
|
59
|
+
|
|
60
|
+
def _create_dynamic_model(self, name: str, fields_config: Dict[str, str]) -> Type:
|
|
61
|
+
"""Dynamically build a Pydantic model from YAML definitions"""
|
|
62
|
+
pydantic_fields = {}
|
|
63
|
+
|
|
64
|
+
for field_name, field_type_str in fields_config.items():
|
|
65
|
+
|
|
66
|
+
py_type = self.type_mapping.get(field_type_str, str)
|
|
67
|
+
|
|
68
|
+
pydantic_fields[field_name] = (py_type, Field(description=f"Dynamic field {field_name}"))
|
|
69
|
+
|
|
70
|
+
return create_model(name, **pydantic_fields)
|
|
71
|
+
|
|
72
|
+
def _print_sample(self, data: list):
|
|
73
|
+
"""Print a summary of generated data"""
|
|
74
|
+
for item in data[:3]: # Print only first 3 items for brevity
|
|
75
|
+
d = item.model_dump()
|
|
76
|
+
print(f" ✅ {d}")
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
from enum import Enum, auto
|
|
2
|
+
from .parser import FieldInfo
|
|
3
|
+
|
|
4
|
+
class FieldTier(Enum):
|
|
5
|
+
STRUCTURAL = auto() # IDs, PKs
|
|
6
|
+
STANDARD = auto() # Faker (Name, Email, Dates)
|
|
7
|
+
SEMANTIC = auto() # AI (Bio, Reviews, Descriptions)
|
|
8
|
+
|
|
9
|
+
class IntelligentRouter:
|
|
10
|
+
"""
|
|
11
|
+
The Brain: Classifies fields into generation tiers based on metadata.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def classify(self, field: FieldInfo) -> FieldTier:
|
|
15
|
+
name = field.name.lower()
|
|
16
|
+
type_str = field.type_name.lower()
|
|
17
|
+
constraints = field.constraints
|
|
18
|
+
|
|
19
|
+
# --- Tier 0: Structural
|
|
20
|
+
if ("id" == name or "_id" in name) and "valid" not in name and "uuid" not in name:
|
|
21
|
+
return FieldTier.STRUCTURAL
|
|
22
|
+
|
|
23
|
+
if constraints.get("primary_key"):
|
|
24
|
+
return FieldTier.STRUCTURAL
|
|
25
|
+
|
|
26
|
+
standard_keywords = [
|
|
27
|
+
"name", "email", "phone", "address", "city",
|
|
28
|
+
"country", "date", "time", "url", "link"
|
|
29
|
+
]
|
|
30
|
+
if any(k in name for k in standard_keywords):
|
|
31
|
+
return FieldTier.STANDARD
|
|
32
|
+
|
|
33
|
+
# --- Tier 2: Semantic ---
|
|
34
|
+
semantic_keywords = [
|
|
35
|
+
"description", "bio", "review", "comment", "reason",
|
|
36
|
+
"message", "content", "summary", "feedback", "about", "story"
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
if "str" in type_str:
|
|
40
|
+
|
|
41
|
+
if any(keyword in name for keyword in semantic_keywords):
|
|
42
|
+
return FieldTier.SEMANTIC
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
max_len = constraints.get("max_length", 1000)
|
|
46
|
+
if max_len > 200:
|
|
47
|
+
return FieldTier.SEMANTIC
|
|
48
|
+
|
|
49
|
+
# --- Tier 1: Standard ---
|
|
50
|
+
return FieldTier.STANDARD
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data Streamer - The heart of Lazy Evaluation.
|
|
3
|
+
Handles writing data incrementally to avoid Memory Explosion.
|
|
4
|
+
"""
|
|
5
|
+
import csv
|
|
6
|
+
import json
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
class DataStreamer:
|
|
12
|
+
def __init__(self, filename: str):
|
|
13
|
+
self.path = Path(filename)
|
|
14
|
+
self.extension = self.path.suffix.lower()
|
|
15
|
+
self.file_handle = None
|
|
16
|
+
self.writer = None
|
|
17
|
+
self.first_item = True
|
|
18
|
+
self.is_open = False
|
|
19
|
+
|
|
20
|
+
def start(self):
|
|
21
|
+
"""Opens the file stream"""
|
|
22
|
+
self.file_handle = open(self.path, "w", encoding="utf-8", newline="")
|
|
23
|
+
self.is_open = True
|
|
24
|
+
|
|
25
|
+
if self.extension == ".json":
|
|
26
|
+
self.file_handle.write("[\n") # Start of JSON array
|
|
27
|
+
elif self.extension == ".sql":
|
|
28
|
+
self.file_handle.write("-- Auto-generated Stream by FixtureForge\n")
|
|
29
|
+
|
|
30
|
+
def write(self, item: Any):
|
|
31
|
+
"""Writes a single item to the stream"""
|
|
32
|
+
if not self.is_open:
|
|
33
|
+
raise RuntimeError("Stream is not open. Call start() first.")
|
|
34
|
+
|
|
35
|
+
data = item.model_dump() if isinstance(item, BaseModel) else item.__dict__
|
|
36
|
+
|
|
37
|
+
if self.extension == ".json":
|
|
38
|
+
self._write_json_item(data)
|
|
39
|
+
elif self.extension == ".csv":
|
|
40
|
+
self._write_csv_item(data)
|
|
41
|
+
elif self.extension == ".sql":
|
|
42
|
+
self._write_sql_item(data, item)
|
|
43
|
+
|
|
44
|
+
def close(self):
|
|
45
|
+
"""Closes the stream properly"""
|
|
46
|
+
if self.extension == ".json":
|
|
47
|
+
self.file_handle.write("\n]") # End of JSON array
|
|
48
|
+
|
|
49
|
+
if self.file_handle:
|
|
50
|
+
self.file_handle.close()
|
|
51
|
+
self.is_open = False
|
|
52
|
+
print(f"✅ Stream closed. Data saved to {self.path}")
|
|
53
|
+
|
|
54
|
+
def _write_json_item(self, data: dict):
|
|
55
|
+
if not self.first_item:
|
|
56
|
+
self.file_handle.write(",\n")
|
|
57
|
+
json_str = json.dumps(data, default=str, indent=2)
|
|
58
|
+
# Indent the internal lines to make it valid pretty JSON
|
|
59
|
+
self.file_handle.write(json_str)
|
|
60
|
+
self.first_item = False
|
|
61
|
+
|
|
62
|
+
def _write_csv_item(self, data: dict):
|
|
63
|
+
if self.first_item:
|
|
64
|
+
self.writer = csv.DictWriter(self.file_handle, fieldnames=data.keys())
|
|
65
|
+
self.writer.writeheader()
|
|
66
|
+
|
|
67
|
+
self.writer.writerow(data)
|
|
68
|
+
self.first_item = False
|
|
69
|
+
|
|
70
|
+
def _write_sql_item(self, data: dict, original_obj: Any):
|
|
71
|
+
table_name = original_obj.__class__.__name__.lower() + "s"
|
|
72
|
+
columns = ", ".join(data.keys())
|
|
73
|
+
values = []
|
|
74
|
+
|
|
75
|
+
for v in data.values():
|
|
76
|
+
if v is None:
|
|
77
|
+
values.append("NULL")
|
|
78
|
+
else:
|
|
79
|
+
clean_v = str(v).replace("'", "''")
|
|
80
|
+
values.append(f"'{clean_v}'")
|
|
81
|
+
|
|
82
|
+
vals_str = ", ".join(values)
|
|
83
|
+
self.file_handle.write(f"INSERT INTO {table_name} ({columns}) VALUES ({vals_str});\n")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "fixtureforge"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = ""
|
|
5
|
+
authors = [
|
|
6
|
+
{name = "Your Name",email = "you@example.com"}
|
|
7
|
+
]
|
|
8
|
+
requires-python = ">=3.11"
|
|
9
|
+
dependencies = [
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
[build-system]
|
|
14
|
+
requires = ["poetry-core>=2.0.0,<3.0.0"]
|
|
15
|
+
build-backend = "poetry.core.masonry.api"
|