frootai 3.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
frootai-3.3.0/PKG-INFO ADDED
@@ -0,0 +1,204 @@
1
+ Metadata-Version: 2.4
2
+ Name: frootai
3
+ Version: 3.3.0
4
+ Summary: FrootAI SDK — The open glue for AI architecture. Offline access to 16 knowledge modules, 20 solution plays, cost estimation, evaluation, and A/B testing.
5
+ Author-email: Pavleen Bali <pavleenbali@frootai.dev>
6
+ License: MIT
7
+ Project-URL: Homepage, https://frootai.dev
8
+ Project-URL: Repository, https://github.com/gitpavleenbali/frootai
9
+ Project-URL: Documentation, https://frootai.dev/api-docs
10
+ Keywords: frootai,ai,architecture,azure,mcp,agents,rag,sdk
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Software Development :: Libraries
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+
23
+ # FrootAI — Python SDK
24
+
25
+ > Offline-first access to 16 AI architecture knowledge modules, 20 solution plays, cost estimation, evaluation, and A/B testing. Zero external dependencies.
26
+
27
+ [![PyPI](https://img.shields.io/pypi/v/frootai)](https://pypi.org/project/frootai/)
28
+ [![Python](https://img.shields.io/pypi/pyversions/frootai)](https://pypi.org/project/frootai/)
29
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
30
+
31
+ ## Install
32
+
33
+ ```bash
34
+ pip install frootai
35
+ ```
36
+
37
+ ## Quick Start
38
+
39
+ ```python
40
+ from frootai import FrootAI, SolutionPlay, Evaluator
41
+
42
+ # Search 682KB knowledge base (16 modules across 5 FROOT layers)
43
+ client = FrootAI()
44
+ results = client.search("RAG architecture")
45
+ for r in results:
46
+ print(f"[{r['module_id']}] {r['title']} — {r['relevance']} hits")
47
+
48
+ # Get a specific module
49
+ module = client.get_module("R2") # RAG Architecture & Retrieval
50
+ print(f"{module['title']}: {module['content_length']:,} chars")
51
+
52
+ # List all FROOT layers
53
+ for layer in client.list_layers():
54
+ print(f"{layer['emoji']} {layer['name']} ({len(layer['modules'])} modules)")
55
+
56
+ # Estimate Azure costs
57
+ cost = client.estimate_cost("01-enterprise-rag", scale="prod")
58
+ print(f"${cost['monthly_total']}/mo")
59
+
60
+ # Browse 20 solution plays
61
+ plays = SolutionPlay.all()
62
+ ready = SolutionPlay.ready() # 3 production-ready
63
+ by_layer = SolutionPlay.by_layer("R") # Reasoning layer plays
64
+ ```
65
+
66
+ ## Features
67
+
68
+ ### Knowledge Search (offline, no API calls)
69
+
70
+ ```python
71
+ client = FrootAI()
72
+
73
+ # Full-text search across 16 modules (643KB of real content)
74
+ results = client.search("embeddings", max_results=5)
75
+
76
+ # Get module by ID
77
+ mod = client.get_module("O2") # AI Agents & Microsoft Agent Framework
78
+
79
+ # List all modules
80
+ for m in client.list_modules():
81
+ print(f"{m['emoji']} {m['id']} {m['title']} ({m['content_length'] // 1024}KB)")
82
+
83
+ # Extract a specific section
84
+ section = client.get_module_section("F1", "Table of Contents")
85
+ ```
86
+
87
+ ### Glossary (159+ terms extracted from content)
88
+
89
+ ```python
90
+ # Look up a term
91
+ term = client.lookup_term("temperature")
92
+
93
+ # Search glossary
94
+ terms = client.search_glossary("embedding", max_results=10)
95
+ ```
96
+
97
+ ### Cost Estimation
98
+
99
+ ```python
100
+ # Estimate monthly Azure costs for a solution play
101
+ cost = client.estimate_cost("01-enterprise-rag", scale="dev")
102
+ # {'play': '01-enterprise-rag', 'scale': 'dev', 'monthly_total': 430, 'breakdown': {...}}
103
+
104
+ cost = client.estimate_cost("01-enterprise-rag", scale="prod")
105
+ # {'monthly_total': 3600, 'breakdown': {'openai-gpt4o': 2500, 'ai-search-standard': 750, ...}}
106
+ ```
107
+
108
+ ### Solution Plays (20 pre-tuned architecture blueprints)
109
+
110
+ ```python
111
+ from frootai.plays import SolutionPlay
112
+
113
+ play = SolutionPlay.get("03")
114
+ print(f"{play.name}: {play.description}")
115
+ print(f"Infrastructure: {play.infra}")
116
+ print(f"Tuning params: {play.tuning}")
117
+ print(f"Related modules: {play.modules}")
118
+
119
+ # Filter by FROOT layer
120
+ orchestration_plays = SolutionPlay.by_layer("O_ORCH")
121
+ ```
122
+
123
+ ### Evaluation (quality gates)
124
+
125
+ ```python
126
+ from frootai import Evaluator
127
+
128
+ evaluator = Evaluator()
129
+ scores = {"groundedness": 4.5, "relevance": 3.2, "coherence": 4.1, "fluency": 4.8}
130
+
131
+ results = evaluator.check_thresholds(scores)
132
+ print(evaluator.summary(scores))
133
+ # 3/4 checks passed (relevance 3.2 < threshold 4.0)
134
+ ```
135
+
136
+ ### A/B Testing (prompt experiments)
137
+
138
+ ```python
139
+ from frootai.ab_testing import PromptExperiment, PromptVariant
140
+
141
+ # You provide the model function — no fake scores
142
+ def my_model(system_prompt, query):
143
+ return call_your_llm(system_prompt=system_prompt, user_message=query)
144
+
145
+ def my_scorer(query, response):
146
+ return {"groundedness": 4.5, "relevance": 4.0}
147
+
148
+ experiment = PromptExperiment(
149
+ name="system-prompt-v2",
150
+ variants=[
151
+ PromptVariant("control", "You are a helpful assistant."),
152
+ PromptVariant("expert", "You are an Azure AI expert. Cite sources."),
153
+ ],
154
+ )
155
+
156
+ results = experiment.run(["What is RAG?"], model_fn=my_model, scorer_fn=my_scorer)
157
+ print(f"Winner: {experiment.pick_winner(results)}")
158
+ ```
159
+
160
+ ## CLI
161
+
162
+ ```bash
163
+ frootai plays # List all 20 solution plays
164
+ frootai plays --ready # Show production-ready plays only
165
+ frootai plays --layer R # Filter by FROOT layer
166
+ frootai search "embeddings" # Search knowledge base
167
+ frootai modules # List all 16 modules with sizes
168
+ frootai glossary temperature # Look up a term
169
+ frootai cost 01-enterprise-rag # Estimate Azure costs
170
+ frootai cost 01-enterprise-rag --scale prod
171
+ frootai --version # Show version
172
+ ```
173
+
174
+ ## What's Inside
175
+
176
+ - **16 knowledge modules** (643KB) across 5 FROOT layers: Foundations, Reasoning, Orchestration, Operations, Transformation
177
+ - **20 solution plays** with infrastructure, tuning parameters, and module mapping
178
+ - **159+ glossary terms** extracted from module content
179
+ - **Cost estimation** for 10 plays with dev/prod breakdowns
180
+ - **Evaluation framework** with configurable thresholds
181
+ - **A/B testing framework** with real model callbacks (no fake scores)
182
+ - **Zero external dependencies** — pure Python stdlib
183
+
184
+ ## FROOT Layers
185
+
186
+ | Layer | Emoji | Name | Modules |
187
+ |-------|-------|------|---------|
188
+ | F | 🌱 | Foundations | F1-F4 (GenAI, LLMs, Glossary, Agentic OS) |
189
+ | R | 🪵 | Reasoning | R1-R3 (Prompts, RAG, Deterministic AI) |
190
+ | O_ORCH | 🌿 | Orchestration | O1-O3 (Semantic Kernel, Agents, MCP) |
191
+ | O_OPS | 🏗️ | Operations | O4-O6 (Platform, Infrastructure, Copilot) |
192
+ | T | 🍎 | Transformation | T1-T3 (Fine-Tuning, Responsible AI, Production) |
193
+
194
+ ## Links
195
+
196
+ - **Website:** [frootai.dev](https://frootai.dev)
197
+ - **npm MCP Server:** [frootai-mcp](https://www.npmjs.com/package/frootai-mcp)
198
+ - **VS Code Extension:** [pavleenbali.frootai](https://marketplace.visualstudio.com/items?itemName=pavleenbali.frootai)
199
+ - **GitHub:** [github.com/gitpavleenbali/frootai](https://github.com/gitpavleenbali/frootai)
200
+ - **Python MCP Server:** [frootai-mcp (PyPI)](https://pypi.org/project/frootai-mcp/)
201
+
202
+ ## License
203
+
204
+ MIT — Pavleen Bali
@@ -0,0 +1,182 @@
1
+ # FrootAI — Python SDK
2
+
3
+ > Offline-first access to 16 AI architecture knowledge modules, 20 solution plays, cost estimation, evaluation, and A/B testing. Zero external dependencies.
4
+
5
+ [![PyPI](https://img.shields.io/pypi/v/frootai)](https://pypi.org/project/frootai/)
6
+ [![Python](https://img.shields.io/pypi/pyversions/frootai)](https://pypi.org/project/frootai/)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
8
+
9
+ ## Install
10
+
11
+ ```bash
12
+ pip install frootai
13
+ ```
14
+
15
+ ## Quick Start
16
+
17
+ ```python
18
+ from frootai import FrootAI, SolutionPlay, Evaluator
19
+
20
+ # Search 682KB knowledge base (16 modules across 5 FROOT layers)
21
+ client = FrootAI()
22
+ results = client.search("RAG architecture")
23
+ for r in results:
24
+ print(f"[{r['module_id']}] {r['title']} — {r['relevance']} hits")
25
+
26
+ # Get a specific module
27
+ module = client.get_module("R2") # RAG Architecture & Retrieval
28
+ print(f"{module['title']}: {module['content_length']:,} chars")
29
+
30
+ # List all FROOT layers
31
+ for layer in client.list_layers():
32
+ print(f"{layer['emoji']} {layer['name']} ({len(layer['modules'])} modules)")
33
+
34
+ # Estimate Azure costs
35
+ cost = client.estimate_cost("01-enterprise-rag", scale="prod")
36
+ print(f"${cost['monthly_total']}/mo")
37
+
38
+ # Browse 20 solution plays
39
+ plays = SolutionPlay.all()
40
+ ready = SolutionPlay.ready() # 3 production-ready
41
+ by_layer = SolutionPlay.by_layer("R") # Reasoning layer plays
42
+ ```
43
+
44
+ ## Features
45
+
46
+ ### Knowledge Search (offline, no API calls)
47
+
48
+ ```python
49
+ client = FrootAI()
50
+
51
+ # Full-text search across 16 modules (643KB of real content)
52
+ results = client.search("embeddings", max_results=5)
53
+
54
+ # Get module by ID
55
+ mod = client.get_module("O2") # AI Agents & Microsoft Agent Framework
56
+
57
+ # List all modules
58
+ for m in client.list_modules():
59
+ print(f"{m['emoji']} {m['id']} {m['title']} ({m['content_length'] // 1024}KB)")
60
+
61
+ # Extract a specific section
62
+ section = client.get_module_section("F1", "Table of Contents")
63
+ ```
64
+
65
+ ### Glossary (159+ terms extracted from content)
66
+
67
+ ```python
68
+ # Look up a term
69
+ term = client.lookup_term("temperature")
70
+
71
+ # Search glossary
72
+ terms = client.search_glossary("embedding", max_results=10)
73
+ ```
74
+
75
+ ### Cost Estimation
76
+
77
+ ```python
78
+ # Estimate monthly Azure costs for a solution play
79
+ cost = client.estimate_cost("01-enterprise-rag", scale="dev")
80
+ # {'play': '01-enterprise-rag', 'scale': 'dev', 'monthly_total': 430, 'breakdown': {...}}
81
+
82
+ cost = client.estimate_cost("01-enterprise-rag", scale="prod")
83
+ # {'monthly_total': 3600, 'breakdown': {'openai-gpt4o': 2500, 'ai-search-standard': 750, ...}}
84
+ ```
85
+
86
+ ### Solution Plays (20 pre-tuned architecture blueprints)
87
+
88
+ ```python
89
+ from frootai.plays import SolutionPlay
90
+
91
+ play = SolutionPlay.get("03")
92
+ print(f"{play.name}: {play.description}")
93
+ print(f"Infrastructure: {play.infra}")
94
+ print(f"Tuning params: {play.tuning}")
95
+ print(f"Related modules: {play.modules}")
96
+
97
+ # Filter by FROOT layer
98
+ orchestration_plays = SolutionPlay.by_layer("O_ORCH")
99
+ ```
100
+
101
+ ### Evaluation (quality gates)
102
+
103
+ ```python
104
+ from frootai import Evaluator
105
+
106
+ evaluator = Evaluator()
107
+ scores = {"groundedness": 4.5, "relevance": 3.2, "coherence": 4.1, "fluency": 4.8}
108
+
109
+ results = evaluator.check_thresholds(scores)
110
+ print(evaluator.summary(scores))
111
+ # 3/4 checks passed (relevance 3.2 < threshold 4.0)
112
+ ```
113
+
114
+ ### A/B Testing (prompt experiments)
115
+
116
+ ```python
117
+ from frootai.ab_testing import PromptExperiment, PromptVariant
118
+
119
+ # You provide the model function — no fake scores
120
+ def my_model(system_prompt, query):
121
+ return call_your_llm(system_prompt=system_prompt, user_message=query)
122
+
123
+ def my_scorer(query, response):
124
+ return {"groundedness": 4.5, "relevance": 4.0}
125
+
126
+ experiment = PromptExperiment(
127
+ name="system-prompt-v2",
128
+ variants=[
129
+ PromptVariant("control", "You are a helpful assistant."),
130
+ PromptVariant("expert", "You are an Azure AI expert. Cite sources."),
131
+ ],
132
+ )
133
+
134
+ results = experiment.run(["What is RAG?"], model_fn=my_model, scorer_fn=my_scorer)
135
+ print(f"Winner: {experiment.pick_winner(results)}")
136
+ ```
137
+
138
+ ## CLI
139
+
140
+ ```bash
141
+ frootai plays # List all 20 solution plays
142
+ frootai plays --ready # Show production-ready plays only
143
+ frootai plays --layer R # Filter by FROOT layer
144
+ frootai search "embeddings" # Search knowledge base
145
+ frootai modules # List all 16 modules with sizes
146
+ frootai glossary temperature # Look up a term
147
+ frootai cost 01-enterprise-rag # Estimate Azure costs
148
+ frootai cost 01-enterprise-rag --scale prod
149
+ frootai --version # Show version
150
+ ```
151
+
152
+ ## What's Inside
153
+
154
+ - **16 knowledge modules** (643KB) across 5 FROOT layers: Foundations, Reasoning, Orchestration, Operations, Transformation
155
+ - **20 solution plays** with infrastructure, tuning parameters, and module mapping
156
+ - **159+ glossary terms** extracted from module content
157
+ - **Cost estimation** for 10 plays with dev/prod breakdowns
158
+ - **Evaluation framework** with configurable thresholds
159
+ - **A/B testing framework** with real model callbacks (no fake scores)
160
+ - **Zero external dependencies** — pure Python stdlib
161
+
162
+ ## FROOT Layers
163
+
164
+ | Layer | Emoji | Name | Modules |
165
+ |-------|-------|------|---------|
166
+ | F | 🌱 | Foundations | F1-F4 (GenAI, LLMs, Glossary, Agentic OS) |
167
+ | R | 🪵 | Reasoning | R1-R3 (Prompts, RAG, Deterministic AI) |
168
+ | O_ORCH | 🌿 | Orchestration | O1-O3 (Semantic Kernel, Agents, MCP) |
169
+ | O_OPS | 🏗️ | Operations | O4-O6 (Platform, Infrastructure, Copilot) |
170
+ | T | 🍎 | Transformation | T1-T3 (Fine-Tuning, Responsible AI, Production) |
171
+
172
+ ## Links
173
+
174
+ - **Website:** [frootai.dev](https://frootai.dev)
175
+ - **npm MCP Server:** [frootai-mcp](https://www.npmjs.com/package/frootai-mcp)
176
+ - **VS Code Extension:** [pavleenbali.frootai](https://marketplace.visualstudio.com/items?itemName=pavleenbali.frootai)
177
+ - **GitHub:** [github.com/gitpavleenbali/frootai](https://github.com/gitpavleenbali/frootai)
178
+ - **Python MCP Server:** [frootai-mcp (PyPI)](https://pypi.org/project/frootai-mcp/)
179
+
180
+ ## License
181
+
182
+ MIT — Pavleen Bali
@@ -0,0 +1,27 @@
1
+ """FrootAI SDK — Programmatic access to the FrootAI ecosystem.
2
+
3
+ From the Roots to the Fruits. It's simply Frootful.
4
+
5
+ Usage:
6
+ from frootai import FrootAI, SolutionPlay, Evaluator
7
+
8
+ client = FrootAI()
9
+ results = client.search("RAG architecture")
10
+ module = client.get_module("R2")
11
+ cost = client.estimate_cost("01-enterprise-rag", scale="dev")
12
+
13
+ plays = SolutionPlay.all()
14
+ play = SolutionPlay.get("03")
15
+
16
+ evaluator = Evaluator()
17
+ evaluator.check_thresholds({"groundedness": 4.2, "relevance": 3.8})
18
+ """
19
+
20
+ __version__ = "3.3.0"
21
+ __author__ = "Pavleen Bali"
22
+
23
+ from frootai.client import FrootAI
24
+ from frootai.plays import SolutionPlay
25
+ from frootai.evaluation import Evaluator
26
+
27
+ __all__ = ["FrootAI", "SolutionPlay", "Evaluator", "__version__"]
@@ -0,0 +1,162 @@
1
+ """FrootAI Prompt A/B Testing Framework.
2
+
3
+ Run prompt experiments across variants, measure quality, pick winners.
4
+ Requires a model_fn callback for actual LLM inference.
5
+
6
+ Usage:
7
+ from frootai.ab_testing import PromptExperiment, PromptVariant
8
+
9
+ def my_model(system_prompt: str, query: str) -> str:
10
+ # Call Azure OpenAI, local model, etc.
11
+ return openai_client.chat(system_prompt=system_prompt, query=query)
12
+
13
+ def my_scorer(query: str, response: str) -> dict[str, float]:
14
+ return {"groundedness": 4.5, "relevance": 4.0}
15
+
16
+ experiment = PromptExperiment(
17
+ name="rag-system-prompt-v2",
18
+ variants=[
19
+ PromptVariant("control", "You are a helpful assistant."),
20
+ PromptVariant("concise", "You are a concise assistant. Answer in 2 sentences max."),
21
+ PromptVariant("expert", "You are an Azure AI expert. Cite sources."),
22
+ ],
23
+ metrics=["groundedness", "relevance", "latency"],
24
+ )
25
+
26
+ results = experiment.run(
27
+ test_queries=["What is RAG?", "Explain embeddings"],
28
+ model_fn=my_model,
29
+ scorer_fn=my_scorer,
30
+ )
31
+ winner = experiment.pick_winner(results)
32
+ """
33
+
34
+ from dataclasses import dataclass, field
35
+ from typing import Optional, Callable
36
+ import json
37
+ import time
38
+
39
+
40
+ @dataclass
41
+ class PromptVariant:
42
+ """A single prompt variant in an A/B test."""
43
+ name: str
44
+ system_prompt: str
45
+ weight: float = 1.0
46
+
47
+
48
+ @dataclass
49
+ class ExperimentResult:
50
+ """Result of running one variant against one query."""
51
+ variant: str
52
+ query: str
53
+ response: str
54
+ latency_ms: float
55
+ scores: dict[str, float] = field(default_factory=dict)
56
+
57
+
58
+ @dataclass
59
+ class PromptExperiment:
60
+ """A/B testing experiment for prompt variants.
61
+
62
+ Attributes:
63
+ name: Experiment identifier
64
+ variants: List of prompt variants to test
65
+ metrics: Quality metrics to measure
66
+ """
67
+ name: str
68
+ variants: list[PromptVariant]
69
+ metrics: list[str] = field(default_factory=lambda: ["groundedness", "relevance", "coherence"])
70
+
71
+ def run(
72
+ self,
73
+ test_queries: list[str],
74
+ model_fn: Callable[[str, str], str],
75
+ scorer_fn: Optional[Callable[[str, str], dict[str, float]]] = None,
76
+ rounds: int = 1,
77
+ ) -> list[ExperimentResult]:
78
+ """Run the experiment using provided model and scorer functions.
79
+
80
+ Args:
81
+ test_queries: Questions to test each variant against.
82
+ model_fn: Callable(system_prompt, query) -> response string.
83
+ scorer_fn: Optional Callable(query, response) -> {metric: score}.
84
+ If not provided, only latency is measured.
85
+ rounds: Number of rounds to repeat (for statistical stability).
86
+ """
87
+ results = []
88
+ for _ in range(rounds):
89
+ for query in test_queries:
90
+ for variant in self.variants:
91
+ start = time.perf_counter()
92
+ response = model_fn(variant.system_prompt, query)
93
+ latency = (time.perf_counter() - start) * 1000
94
+
95
+ scores = {}
96
+ if scorer_fn is not None:
97
+ scores = scorer_fn(query, response)
98
+ scores["latency_ms"] = round(latency, 1)
99
+
100
+ result = ExperimentResult(
101
+ variant=variant.name,
102
+ query=query,
103
+ response=response,
104
+ latency_ms=round(latency, 1),
105
+ scores=scores,
106
+ )
107
+ results.append(result)
108
+ return results
109
+
110
+ def pick_winner(self, results: list[ExperimentResult]) -> str:
111
+ """Pick the best variant based on average scores (excluding latency)."""
112
+ variant_scores: dict[str, list[float]] = {}
113
+ for r in results:
114
+ if r.variant not in variant_scores:
115
+ variant_scores[r.variant] = []
116
+ quality_scores = {k: v for k, v in r.scores.items() if k != "latency_ms"}
117
+ if quality_scores:
118
+ avg = sum(quality_scores.values()) / len(quality_scores)
119
+ variant_scores[r.variant].append(avg)
120
+
121
+ if not variant_scores or all(len(v) == 0 for v in variant_scores.values()):
122
+ # Fall back to lowest latency if no quality scores
123
+ latencies: dict[str, list[float]] = {}
124
+ for r in results:
125
+ latencies.setdefault(r.variant, []).append(r.latency_ms)
126
+ return min(latencies, key=lambda v: sum(latencies[v]) / len(latencies[v]))
127
+
128
+ averages = {v: sum(s) / len(s) for v, s in variant_scores.items() if s}
129
+ return max(averages, key=averages.get)
130
+
131
+ def summary(self, results: list[ExperimentResult]) -> str:
132
+ """Generate experiment summary."""
133
+ lines = [f"Experiment: {self.name}", "=" * 50]
134
+ variant_data: dict[str, list] = {}
135
+ for r in results:
136
+ variant_data.setdefault(r.variant, []).append(r)
137
+
138
+ for variant, data in variant_data.items():
139
+ avg_scores: dict[str, float] = {}
140
+ all_metrics = set()
141
+ for r in data:
142
+ all_metrics.update(r.scores.keys())
143
+ for m in sorted(all_metrics):
144
+ vals = [r.scores[m] for r in data if m in r.scores]
145
+ if vals:
146
+ avg_scores[m] = sum(vals) / len(vals)
147
+ lines.append(f"\n Variant: {variant}")
148
+ lines.append(f" Samples: {len(data)}")
149
+ for m, s in avg_scores.items():
150
+ lines.append(f" {m}: {s:.2f}")
151
+
152
+ winner = self.pick_winner(results)
153
+ lines.append(f"\n Winner: {winner}")
154
+ return "\n".join(lines)
155
+
156
+ def to_json(self) -> str:
157
+ """Export experiment config as JSON."""
158
+ return json.dumps({
159
+ "name": self.name,
160
+ "variants": [{"name": v.name, "system_prompt": v.system_prompt, "weight": v.weight} for v in self.variants],
161
+ "metrics": self.metrics,
162
+ }, indent=2)