adaptive-iteration 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- adaptive_iteration/__init__.py +12 -0
- adaptive_iteration/adapters/__init__.py +5 -0
- adaptive_iteration/adapters/base.py +81 -0
- adaptive_iteration/adapters/short_video.py +178 -0
- adaptive_iteration/core/__init__.py +1 -0
- adaptive_iteration/core/analyzer.py +153 -0
- adaptive_iteration/core/config.py +106 -0
- adaptive_iteration/core/experiment.py +125 -0
- adaptive_iteration/core/hypothesis.py +190 -0
- adaptive_iteration/core/ledger.py +119 -0
- adaptive_iteration-0.1.0.dist-info/METADATA +165 -0
- adaptive_iteration-0.1.0.dist-info/RECORD +13 -0
- adaptive_iteration-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""adaptive_iteration — Domain-agnostic adaptive experiment framework.
|
|
2
|
+
|
|
3
|
+
Import paths:
|
|
4
|
+
from adaptive_iteration.core.experiment import Experiment, Variant, ExperimentState
|
|
5
|
+
from adaptive_iteration.core.ledger import Ledger
|
|
6
|
+
from adaptive_iteration.core.analyzer import Analyzer
|
|
7
|
+
from adaptive_iteration.core.hypothesis import HypothesisEngine
|
|
8
|
+
from adaptive_iteration.core.config import AdaptiveConfig
|
|
9
|
+
from adaptive_iteration.adapters.base import DomainAdapter
|
|
10
|
+
from adaptive_iteration.adapters.short_video import ShortVideoAdapter
|
|
11
|
+
"""
|
|
12
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""adapters/base.py — DomainAdapter ABC.
|
|
2
|
+
|
|
3
|
+
Every domain (short video, blog, email, ...) must implement these three methods.
|
|
4
|
+
core/ never imports from here; HypothesisEngine receives the outputs as plain dicts/strings.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DomainAdapter(ABC):
|
|
13
|
+
"""Bridge between adaptive_iteration.core and a concrete external system.
|
|
14
|
+
|
|
15
|
+
Three required methods
|
|
16
|
+
----------------------
|
|
17
|
+
collect_metrics(item_ids)
|
|
18
|
+
Pull raw metric data for the given item IDs.
|
|
19
|
+
Returns a list of dicts, one per item. Each dict must include at least
|
|
20
|
+
the item's ID under some consistent key (e.g. "id").
|
|
21
|
+
|
|
22
|
+
get_signals(metrics)
|
|
23
|
+
Normalise raw metrics into framework-friendly signals:
|
|
24
|
+
- "primary_metric": float (the single most-important KPI for ranking)
|
|
25
|
+
- "secondary_metrics": dict (any additional useful metrics)
|
|
26
|
+
Returns a dict.
|
|
27
|
+
|
|
28
|
+
format_context(top_items, bottom_items)
|
|
29
|
+
Produce a human-readable text block describing top and bottom performers.
|
|
30
|
+
This is injected verbatim into the HypothesisEngine prompt.
|
|
31
|
+
Returns a str.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
# ── Required ───────────────────────────────────────────────────────────────
|
|
35
|
+
|
|
36
|
+
@abstractmethod
|
|
37
|
+
def collect_metrics(self, item_ids: list[str]) -> list[dict[str, Any]]:
|
|
38
|
+
"""Fetch raw metrics for *item_ids* from the external system.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
item_ids : list of platform-specific IDs (video IDs, post IDs, …)
|
|
43
|
+
|
|
44
|
+
Returns
|
|
45
|
+
-------
|
|
46
|
+
list[dict]
|
|
47
|
+
One dict per item. Shape is domain-specific; must be consistent with
|
|
48
|
+
what get_signals() expects.
|
|
49
|
+
"""
|
|
50
|
+
...
|
|
51
|
+
|
|
52
|
+
@abstractmethod
|
|
53
|
+
def get_signals(self, metrics: list[dict[str, Any]]) -> dict[str, Any]:
|
|
54
|
+
"""Normalise raw metrics into a standard signals dict.
|
|
55
|
+
|
|
56
|
+
Expected output schema
|
|
57
|
+
----------------------
|
|
58
|
+
{
|
|
59
|
+
"primary_metric": <float>, # main ranking KPI
|
|
60
|
+
"secondary_metrics": {<str>: <float>, ...},
|
|
61
|
+
}
|
|
62
|
+
"""
|
|
63
|
+
...
|
|
64
|
+
|
|
65
|
+
@abstractmethod
|
|
66
|
+
def format_context(
|
|
67
|
+
self,
|
|
68
|
+
top_items: list[dict[str, Any]],
|
|
69
|
+
bottom_items: list[dict[str, Any]],
|
|
70
|
+
) -> str:
|
|
71
|
+
"""Return a text block describing top vs bottom performers.
|
|
72
|
+
|
|
73
|
+
Used verbatim in the HypothesisEngine LLM prompt.
|
|
74
|
+
"""
|
|
75
|
+
...
|
|
76
|
+
|
|
77
|
+
# ── Optional convenience ───────────────────────────────────────────────────
|
|
78
|
+
|
|
79
|
+
def describe(self) -> str:
|
|
80
|
+
"""Short human-readable description of this adapter (for logs/docs)."""
|
|
81
|
+
return self.__class__.__name__
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""adapters/short_video.py — Example ShortVideoAdapter.
|
|
2
|
+
|
|
3
|
+
This is a reference implementation showing how to build a DomainAdapter
|
|
4
|
+
for short-form video platforms (e.g. YouTube Shorts, Instagram Reels).
|
|
5
|
+
|
|
6
|
+
In a real deployment, replace `collect_metrics` and `get_signals` with
|
|
7
|
+
calls to your actual analytics backend.
|
|
8
|
+
|
|
9
|
+
Usage
|
|
10
|
+
-----
|
|
11
|
+
from adaptive_iteration.adapters.short_video import ShortVideoAdapter
|
|
12
|
+
|
|
13
|
+
adapter = ShortVideoAdapter(platform="youtube")
|
|
14
|
+
metrics = adapter.collect_metrics(["video_001", "video_002"])
|
|
15
|
+
signals = adapter.get_signals(metrics)
|
|
16
|
+
"""
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import random
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
from .base import DomainAdapter
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class ShortVideoAdapter(DomainAdapter):
|
|
26
|
+
"""Example DomainAdapter for short-video platforms.
|
|
27
|
+
|
|
28
|
+
Supports "youtube" and "instagram" as platform targets.
|
|
29
|
+
Metrics are simulated — replace with your real analytics calls.
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
platform : "youtube" or "instagram"
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
SUPPORTED_PLATFORMS = ("youtube", "instagram")
|
|
37
|
+
|
|
38
|
+
def __init__(self, platform: str) -> None:
|
|
39
|
+
if platform not in self.SUPPORTED_PLATFORMS:
|
|
40
|
+
raise ValueError(
|
|
41
|
+
f"Unsupported platform {platform!r}. "
|
|
42
|
+
f"Choose from {self.SUPPORTED_PLATFORMS}."
|
|
43
|
+
)
|
|
44
|
+
self.platform = platform
|
|
45
|
+
|
|
46
|
+
# ── DomainAdapter interface ────────────────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
def collect_metrics(self, item_ids: list[str]) -> list[dict[str, Any]]:
|
|
49
|
+
"""Fetch metrics for each item ID.
|
|
50
|
+
|
|
51
|
+
Replace this with real API calls to your analytics provider.
|
|
52
|
+
This implementation returns simulated data for illustration.
|
|
53
|
+
"""
|
|
54
|
+
results = []
|
|
55
|
+
for item_id in item_ids:
|
|
56
|
+
if self.platform == "youtube":
|
|
57
|
+
results.append({
|
|
58
|
+
"id": item_id,
|
|
59
|
+
"views": random.randint(500, 50_000),
|
|
60
|
+
"avg_view_pct": round(random.uniform(30, 90), 1),
|
|
61
|
+
"avg_view_sec": round(random.uniform(15, 75), 1),
|
|
62
|
+
"like_rate": round(random.uniform(0.5, 8.0), 2),
|
|
63
|
+
"subscribers_gained": random.randint(0, 50),
|
|
64
|
+
})
|
|
65
|
+
else: # instagram
|
|
66
|
+
reach = random.randint(200, 20_000)
|
|
67
|
+
likes = random.randint(10, int(reach * 0.15))
|
|
68
|
+
results.append({
|
|
69
|
+
"id": item_id,
|
|
70
|
+
"reach": reach,
|
|
71
|
+
"likes": likes,
|
|
72
|
+
"saves": random.randint(0, int(reach * 0.05)),
|
|
73
|
+
"shares": random.randint(0, int(reach * 0.03)),
|
|
74
|
+
"comments": random.randint(0, int(reach * 0.02)),
|
|
75
|
+
"avg_watch_time_ms": random.randint(3_000, 30_000),
|
|
76
|
+
"total_interactions": likes + random.randint(5, 100),
|
|
77
|
+
})
|
|
78
|
+
return results
|
|
79
|
+
|
|
80
|
+
def get_signals(self, metrics: list[dict[str, Any]]) -> dict[str, Any]:
|
|
81
|
+
"""Normalise raw metrics into framework signals.
|
|
82
|
+
|
|
83
|
+
YouTube → primary_metric = avg_view_pct (0–100)
|
|
84
|
+
Instagram → primary_metric = engagement_score
|
|
85
|
+
= (saves×3 + shares×2 + comments×2 + likes) / reach × 100
|
|
86
|
+
"""
|
|
87
|
+
if not metrics:
|
|
88
|
+
return {"primary_metric": 0.0, "secondary_metrics": {}}
|
|
89
|
+
|
|
90
|
+
if self.platform == "youtube":
|
|
91
|
+
return self._yt_signals(metrics)
|
|
92
|
+
return self._ig_signals(metrics)
|
|
93
|
+
|
|
94
|
+
def format_context(
|
|
95
|
+
self,
|
|
96
|
+
top_items: list[dict[str, Any]],
|
|
97
|
+
bottom_items: list[dict[str, Any]],
|
|
98
|
+
) -> str:
|
|
99
|
+
lines = [f"Platform: {self.platform}"]
|
|
100
|
+
lines.append("\nTop performers:")
|
|
101
|
+
for item in top_items:
|
|
102
|
+
lines.append(self._fmt(item))
|
|
103
|
+
lines.append("\nBottom performers:")
|
|
104
|
+
for item in bottom_items:
|
|
105
|
+
lines.append(self._fmt(item))
|
|
106
|
+
return "\n".join(lines)
|
|
107
|
+
|
|
108
|
+
def describe(self) -> str:
|
|
109
|
+
return f"ShortVideoAdapter(platform={self.platform!r})"
|
|
110
|
+
|
|
111
|
+
# ── Internal ───────────────────────────────────────────────────────────────
|
|
112
|
+
|
|
113
|
+
def _yt_signals(self, metrics: list[dict]) -> dict[str, Any]:
|
|
114
|
+
valid = [m for m in metrics if m.get("avg_view_pct") is not None]
|
|
115
|
+
if not valid:
|
|
116
|
+
return {"primary_metric": 0.0, "secondary_metrics": {}}
|
|
117
|
+
n = len(metrics)
|
|
118
|
+
return {
|
|
119
|
+
"primary_metric": round(
|
|
120
|
+
sum(m["avg_view_pct"] for m in valid) / len(valid), 2
|
|
121
|
+
),
|
|
122
|
+
"secondary_metrics": {
|
|
123
|
+
"views": round(sum(m.get("views", 0) for m in metrics) / n, 1),
|
|
124
|
+
"like_rate": round(sum(m.get("like_rate", 0) for m in metrics) / n, 3),
|
|
125
|
+
"avg_view_sec": round(sum(m.get("avg_view_sec", 0) for m in metrics) / n, 1),
|
|
126
|
+
"subscribers_gained": round(sum(m.get("subscribers_gained", 0) for m in metrics) / n, 2),
|
|
127
|
+
},
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
def _ig_signals(self, metrics: list[dict]) -> dict[str, Any]:
|
|
131
|
+
valid = [m for m in metrics if m.get("reach", 0) > 0]
|
|
132
|
+
if not valid:
|
|
133
|
+
return {"primary_metric": 0.0, "secondary_metrics": {}}
|
|
134
|
+
n = len(metrics)
|
|
135
|
+
|
|
136
|
+
def _eng(m: dict) -> float:
|
|
137
|
+
reach = m.get("reach", 1)
|
|
138
|
+
return (
|
|
139
|
+
m.get("saves", 0) * 3
|
|
140
|
+
+ m.get("shares", 0) * 2
|
|
141
|
+
+ m.get("comments", 0) * 2
|
|
142
|
+
+ m.get("likes", 0)
|
|
143
|
+
) / reach * 100
|
|
144
|
+
|
|
145
|
+
return {
|
|
146
|
+
"primary_metric": round(sum(_eng(m) for m in valid) / len(valid), 4),
|
|
147
|
+
"secondary_metrics": {
|
|
148
|
+
"reach": round(sum(m.get("reach", 0) for m in metrics) / n, 1),
|
|
149
|
+
"like_rate": round(
|
|
150
|
+
sum(m.get("likes", 0) / max(m.get("reach", 1), 1) * 100 for m in metrics) / n, 3
|
|
151
|
+
),
|
|
152
|
+
"avg_watch_time_s": round(
|
|
153
|
+
sum(m.get("avg_watch_time_ms", 0) for m in metrics) / n / 1000, 2
|
|
154
|
+
),
|
|
155
|
+
"shares": round(sum(m.get("shares", 0) for m in metrics) / n, 2),
|
|
156
|
+
"saved": round(sum(m.get("saves", 0) for m in metrics) / n, 2),
|
|
157
|
+
"total_interactions": round(sum(m.get("total_interactions", 0) for m in metrics) / n, 2),
|
|
158
|
+
},
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
def _fmt(self, item: dict) -> str:
|
|
162
|
+
item_id = item.get("id", "?")
|
|
163
|
+
if self.platform == "youtube":
|
|
164
|
+
return (
|
|
165
|
+
f" [{item_id}] "
|
|
166
|
+
f"avg_view_pct={item.get('avg_view_pct')}% "
|
|
167
|
+
f"views={item.get('views')} "
|
|
168
|
+
f"like_rate={item.get('like_rate')}%"
|
|
169
|
+
)
|
|
170
|
+
reach = item.get("reach", 0)
|
|
171
|
+
likes = item.get("likes", 0)
|
|
172
|
+
lr = round(likes / reach * 100, 2) if reach > 0 else 0.0
|
|
173
|
+
return (
|
|
174
|
+
f" [{item_id}] "
|
|
175
|
+
f"like_rate={lr}% "
|
|
176
|
+
f"reach={reach} "
|
|
177
|
+
f"avg_watch={item.get('avg_watch_time_ms', 0) // 1000}s"
|
|
178
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# adaptive_iteration.core — pure Python, zero domain dependencies
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
"""core/analyzer.py — Pattern detection from ledger records.
|
|
2
|
+
|
|
3
|
+
Finds top / bottom performers and common features, computes per-dimension variance,
|
|
4
|
+
and produces a structured AnalysisResult for use by HypothesisEngine.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import statistics
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import Any, Optional
|
|
11
|
+
|
|
12
|
+
from .ledger import Ledger
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class DimensionStats:
|
|
17
|
+
"""Variance and mean for a single metric dimension."""
|
|
18
|
+
name: str
|
|
19
|
+
mean: float
|
|
20
|
+
variance: float
|
|
21
|
+
stdev: float
|
|
22
|
+
count: int
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class AnalysisResult:
|
|
27
|
+
"""Output of Analyzer.analyze()."""
|
|
28
|
+
domain: str
|
|
29
|
+
total_records: int
|
|
30
|
+
top_performers: list[dict[str, Any]] # records with highest primary_metric
|
|
31
|
+
bottom_performers: list[dict[str, Any]] # records with lowest primary_metric
|
|
32
|
+
top_features: dict[str, Any] # common variable→variant among top
|
|
33
|
+
bottom_features: dict[str, Any] # common variable→variant among bottom
|
|
34
|
+
dimension_stats: list[DimensionStats] # per-metric variance across all records
|
|
35
|
+
winning_patterns: dict[str, str] # variable → variant that won most often
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class Analyzer:
|
|
39
|
+
"""Derives insights from a Ledger.
|
|
40
|
+
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
ledger : Ledger instance to analyse
|
|
44
|
+
primary_metric : the metric key to rank performers by (e.g. "avg_view_pct")
|
|
45
|
+
top_n : how many top/bottom performers to surface
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
ledger: Ledger,
|
|
51
|
+
primary_metric: str = "primary_metric",
|
|
52
|
+
top_n: int = 5,
|
|
53
|
+
) -> None:
|
|
54
|
+
self.ledger = ledger
|
|
55
|
+
self.primary_metric = primary_metric
|
|
56
|
+
self.top_n = top_n
|
|
57
|
+
|
|
58
|
+
# ── Public ─────────────────────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
def analyze(self, domain: Optional[str] = None) -> AnalysisResult:
|
|
61
|
+
"""Run full analysis for *domain* (or all domains if None)."""
|
|
62
|
+
records = self.ledger.query(domain=domain) if domain else self.ledger.records
|
|
63
|
+
|
|
64
|
+
if not records:
|
|
65
|
+
return AnalysisResult(
|
|
66
|
+
domain=domain or "all",
|
|
67
|
+
total_records=0,
|
|
68
|
+
top_performers=[],
|
|
69
|
+
bottom_performers=[],
|
|
70
|
+
top_features={},
|
|
71
|
+
bottom_features={},
|
|
72
|
+
dimension_stats=[],
|
|
73
|
+
winning_patterns={},
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Sort by primary metric (records without it go to the bottom)
|
|
77
|
+
def _primary(r: dict) -> float:
|
|
78
|
+
return float(r.get("metric_values", {}).get(self.primary_metric) or 0.0)
|
|
79
|
+
|
|
80
|
+
ranked = sorted(records, key=_primary, reverse=True)
|
|
81
|
+
top = ranked[: self.top_n]
|
|
82
|
+
bottom = ranked[-self.top_n:]
|
|
83
|
+
|
|
84
|
+
return AnalysisResult(
|
|
85
|
+
domain=domain or "all",
|
|
86
|
+
total_records=len(records),
|
|
87
|
+
top_performers=top,
|
|
88
|
+
bottom_performers=bottom,
|
|
89
|
+
top_features=self._common_features(top),
|
|
90
|
+
bottom_features=self._common_features(bottom),
|
|
91
|
+
dimension_stats=self._dimension_stats(records),
|
|
92
|
+
winning_patterns=self._winning_patterns(records),
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
# ── Internal ───────────────────────────────────────────────────────────────
|
|
96
|
+
|
|
97
|
+
def _common_features(self, records: list[dict]) -> dict[str, Any]:
|
|
98
|
+
"""Return {variable: variant} pairs that appear in >50% of records."""
|
|
99
|
+
if not records:
|
|
100
|
+
return {}
|
|
101
|
+
counts: dict[str, dict[str, int]] = {}
|
|
102
|
+
for r in records:
|
|
103
|
+
var = r.get("variable", "")
|
|
104
|
+
val = r.get("variant", "")
|
|
105
|
+
if var and val:
|
|
106
|
+
counts.setdefault(var, {}).setdefault(val, 0)
|
|
107
|
+
counts[var][val] += 1
|
|
108
|
+
|
|
109
|
+
threshold = len(records) * 0.5
|
|
110
|
+
result: dict[str, Any] = {}
|
|
111
|
+
for var, val_counts in counts.items():
|
|
112
|
+
best_val, best_count = max(val_counts.items(), key=lambda x: x[1])
|
|
113
|
+
if best_count >= threshold:
|
|
114
|
+
result[var] = best_val
|
|
115
|
+
return result
|
|
116
|
+
|
|
117
|
+
def _dimension_stats(self, records: list[dict]) -> list[DimensionStats]:
|
|
118
|
+
"""Compute mean / variance for every metric key found in records."""
|
|
119
|
+
all_metrics: dict[str, list[float]] = {}
|
|
120
|
+
for r in records:
|
|
121
|
+
for k, v in r.get("metric_values", {}).items():
|
|
122
|
+
if isinstance(v, (int, float)) and v is not None:
|
|
123
|
+
all_metrics.setdefault(k, []).append(float(v))
|
|
124
|
+
|
|
125
|
+
stats: list[DimensionStats] = []
|
|
126
|
+
for name, values in all_metrics.items():
|
|
127
|
+
if len(values) < 2:
|
|
128
|
+
continue
|
|
129
|
+
mean = statistics.mean(values)
|
|
130
|
+
var = statistics.variance(values)
|
|
131
|
+
std = statistics.stdev(values)
|
|
132
|
+
stats.append(DimensionStats(
|
|
133
|
+
name=name, mean=round(mean, 4), variance=round(var, 4),
|
|
134
|
+
stdev=round(std, 4), count=len(values),
|
|
135
|
+
))
|
|
136
|
+
return sorted(stats, key=lambda s: s.variance, reverse=True)
|
|
137
|
+
|
|
138
|
+
def _winning_patterns(self, records: list[dict]) -> dict[str, str]:
|
|
139
|
+
"""For each variable, find the variant that won most frequently."""
|
|
140
|
+
win_counts: dict[str, dict[str, int]] = {}
|
|
141
|
+
for r in records:
|
|
142
|
+
if not r.get("winner"):
|
|
143
|
+
continue
|
|
144
|
+
var = r.get("variable", "")
|
|
145
|
+
val = r.get("variant", "")
|
|
146
|
+
if var and val:
|
|
147
|
+
win_counts.setdefault(var, {}).setdefault(val, 0)
|
|
148
|
+
win_counts[var][val] += 1
|
|
149
|
+
|
|
150
|
+
return {
|
|
151
|
+
var: max(val_counts, key=val_counts.get)
|
|
152
|
+
for var, val_counts in win_counts.items()
|
|
153
|
+
}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""core/config.py — AdaptiveConfig: read/write JSON config with winner hints support.
|
|
2
|
+
|
|
3
|
+
Config file schema (example):
|
|
4
|
+
{
|
|
5
|
+
"domain": "short_video",
|
|
6
|
+
"primary_metric": "avg_view_pct",
|
|
7
|
+
"min_items_per_variant": 3,
|
|
8
|
+
"winner_hints": {
|
|
9
|
+
"hook_type": "question",
|
|
10
|
+
"cta_style": "soft"
|
|
11
|
+
},
|
|
12
|
+
"extra": {}
|
|
13
|
+
}
|
|
14
|
+
"""
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Any, Optional
|
|
20
|
+
|
|
21
|
+
_DEFAULTS: dict[str, Any] = {
|
|
22
|
+
"domain": "default",
|
|
23
|
+
"primary_metric": "primary_metric",
|
|
24
|
+
"min_items_per_variant": 3,
|
|
25
|
+
"winner_hints": {},
|
|
26
|
+
"extra": {},
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class AdaptiveConfig:
|
|
31
|
+
"""Lightweight JSON config bag with typed accessor helpers.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
path : where to load from / save to (created on first save if absent)
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, path: Path) -> None:
|
|
39
|
+
self.path = path
|
|
40
|
+
self._data: dict[str, Any] = dict(_DEFAULTS)
|
|
41
|
+
if path.exists():
|
|
42
|
+
try:
|
|
43
|
+
loaded = json.loads(path.read_text(encoding="utf-8"))
|
|
44
|
+
self._data.update(loaded)
|
|
45
|
+
except (json.JSONDecodeError, OSError):
|
|
46
|
+
pass
|
|
47
|
+
|
|
48
|
+
# ── Accessors ──────────────────────────────────────────────────────────────
|
|
49
|
+
|
|
50
|
+
def get(self, key: str, default: Any = None) -> Any:
|
|
51
|
+
return self._data.get(key, default)
|
|
52
|
+
|
|
53
|
+
def set(self, key: str, value: Any) -> None:
|
|
54
|
+
self._data[key] = value
|
|
55
|
+
self._flush()
|
|
56
|
+
|
|
57
|
+
def update(self, mapping: dict[str, Any]) -> None:
|
|
58
|
+
self._data.update(mapping)
|
|
59
|
+
self._flush()
|
|
60
|
+
|
|
61
|
+
# ── Winner hints ───────────────────────────────────────────────────────────
|
|
62
|
+
|
|
63
|
+
def set_winner_hint(self, variable: str, variant_label: str) -> None:
|
|
64
|
+
"""Record which variant won for *variable*."""
|
|
65
|
+
hints = self._data.setdefault("winner_hints", {})
|
|
66
|
+
hints[variable] = variant_label
|
|
67
|
+
self._flush()
|
|
68
|
+
|
|
69
|
+
def get_winner_hint(self, variable: str) -> Optional[str]:
|
|
70
|
+
return self._data.get("winner_hints", {}).get(variable)
|
|
71
|
+
|
|
72
|
+
def all_winner_hints(self) -> dict[str, str]:
|
|
73
|
+
return dict(self._data.get("winner_hints", {}))
|
|
74
|
+
|
|
75
|
+
# ── Convenience ────────────────────────────────────────────────────────────
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def domain(self) -> str:
|
|
79
|
+
return str(self._data.get("domain", "default"))
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def primary_metric(self) -> str:
|
|
83
|
+
return str(self._data.get("primary_metric", "primary_metric"))
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def min_items_per_variant(self) -> int:
|
|
87
|
+
return int(self._data.get("min_items_per_variant", 3))
|
|
88
|
+
|
|
89
|
+
def to_dict(self) -> dict[str, Any]:
|
|
90
|
+
return dict(self._data)
|
|
91
|
+
|
|
92
|
+
# ── Persistence ────────────────────────────────────────────────────────────
|
|
93
|
+
|
|
94
|
+
def _flush(self) -> None:
|
|
95
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
96
|
+
self.path.write_text(
|
|
97
|
+
json.dumps(self._data, ensure_ascii=False, indent=2),
|
|
98
|
+
encoding="utf-8",
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
def save(self) -> None:
|
|
102
|
+
"""Explicit save (normally auto-saved on every set/update)."""
|
|
103
|
+
self._flush()
|
|
104
|
+
|
|
105
|
+
def __repr__(self) -> str:
|
|
106
|
+
return f"AdaptiveConfig(path={self.path!r}, domain={self.domain!r})"
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""core/experiment.py — Experiment / Variant dataclasses + ExperimentState.
|
|
2
|
+
|
|
3
|
+
All fields are plain Python types so they round-trip cleanly through JSON.
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import uuid
|
|
8
|
+
from dataclasses import dataclass, field
|
|
9
|
+
from typing import Any, Literal, Optional
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class Variant:
|
|
14
|
+
"""One arm of an experiment."""
|
|
15
|
+
label: str
|
|
16
|
+
params: dict[str, Any] = field(default_factory=dict)
|
|
17
|
+
hint: Optional[str] = None # short text cue for downstream renderers
|
|
18
|
+
|
|
19
|
+
def to_dict(self) -> dict:
|
|
20
|
+
return {"label": self.label, "params": self.params, "hint": self.hint}
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def from_dict(cls, d: dict) -> "Variant":
|
|
24
|
+
return cls(label=d["label"], params=d.get("params", {}), hint=d.get("hint"))
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class Experiment:
|
|
29
|
+
"""A single testable hypothesis with two variants (A/B).
|
|
30
|
+
|
|
31
|
+
Fields
|
|
32
|
+
------
|
|
33
|
+
id : short unique id (8-char hex by default)
|
|
34
|
+
domain : e.g. "short_video", "blog_post", "email_campaign"
|
|
35
|
+
variable : what's being tested, e.g. "hook_type", "cta_style"
|
|
36
|
+
description : human-readable summary of the hypothesis
|
|
37
|
+
variant_a : control arm
|
|
38
|
+
variant_b : challenger arm
|
|
39
|
+
tier : priority tier (1 = run alone, 2 = can run in parallel, 3 = no A/B)
|
|
40
|
+
started : ISO date string when experiment went active, or None
|
|
41
|
+
concluded : ISO date string when experiment was evaluated, or None
|
|
42
|
+
status : "pending" | "active" | "concluded"
|
|
43
|
+
items : list of {"item_id": str, "variant": str, ...} — produced units
|
|
44
|
+
"""
|
|
45
|
+
id: str = field(default_factory=lambda: uuid.uuid4().hex[:8])
|
|
46
|
+
domain: str = ""
|
|
47
|
+
variable: str = ""
|
|
48
|
+
description: str = ""
|
|
49
|
+
variant_a: Variant = field(default_factory=lambda: Variant(label="control"))
|
|
50
|
+
variant_b: Variant = field(default_factory=lambda: Variant(label="variant"))
|
|
51
|
+
tier: int = 1
|
|
52
|
+
started: Optional[str] = None
|
|
53
|
+
concluded: Optional[str] = None
|
|
54
|
+
status: str = "pending"
|
|
55
|
+
mode: Literal["interleaved", "paired"] = "interleaved"
|
|
56
|
+
"""Experiment execution mode.
|
|
57
|
+
|
|
58
|
+
- ``interleaved``: Different topics are assigned to variants in alternating
|
|
59
|
+
fashion. Maximises throughput (Tier 2 experiments). One topic → one
|
|
60
|
+
video → one variant.
|
|
61
|
+
|
|
62
|
+
- ``paired``: The *same* topic is used to produce two videos — one per
|
|
63
|
+
variant — in a single run. This eliminates topic-level confounders and
|
|
64
|
+
yields cleaner causal inference. Preferred for Tier 1 experiments where
|
|
65
|
+
the variable under test (e.g. subscribe_prompt, cta_style) is independent
|
|
66
|
+
of the topic itself.
|
|
67
|
+
"""
|
|
68
|
+
items: list[dict[str, Any]] = field(default_factory=list)
|
|
69
|
+
|
|
70
|
+
def to_dict(self) -> dict:
|
|
71
|
+
return {
|
|
72
|
+
"id": self.id,
|
|
73
|
+
"domain": self.domain,
|
|
74
|
+
"variable": self.variable,
|
|
75
|
+
"description": self.description,
|
|
76
|
+
"variant_a": self.variant_a.to_dict(),
|
|
77
|
+
"variant_b": self.variant_b.to_dict(),
|
|
78
|
+
"tier": self.tier,
|
|
79
|
+
"started": self.started,
|
|
80
|
+
"concluded": self.concluded,
|
|
81
|
+
"status": self.status,
|
|
82
|
+
"mode": self.mode,
|
|
83
|
+
"items": self.items,
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def from_dict(cls, d: dict) -> "Experiment":
|
|
88
|
+
return cls(
|
|
89
|
+
id=d.get("id", uuid.uuid4().hex[:8]),
|
|
90
|
+
domain=d.get("domain", ""),
|
|
91
|
+
variable=d.get("variable", ""),
|
|
92
|
+
description=d.get("description", ""),
|
|
93
|
+
variant_a=Variant.from_dict(d.get("variant_a", {"label": "control"})),
|
|
94
|
+
variant_b=Variant.from_dict(d.get("variant_b", {"label": "variant"})),
|
|
95
|
+
tier=d.get("tier", 1),
|
|
96
|
+
started=d.get("started"),
|
|
97
|
+
concluded=d.get("concluded"),
|
|
98
|
+
status=d.get("status", "pending"),
|
|
99
|
+
mode=d.get("mode", "interleaved"),
|
|
100
|
+
items=d.get("items", []),
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
@dataclass
|
|
105
|
+
class ExperimentState:
|
|
106
|
+
"""Persisted state for one domain's experiment pipeline."""
|
|
107
|
+
current: Optional[Experiment] = None
|
|
108
|
+
queue: list[Experiment] = field(default_factory=list)
|
|
109
|
+
history: list[dict[str, Any]] = field(default_factory=list)
|
|
110
|
+
|
|
111
|
+
def to_dict(self) -> dict:
|
|
112
|
+
return {
|
|
113
|
+
"current": self.current.to_dict() if self.current else {},
|
|
114
|
+
"queue": [e.to_dict() for e in self.queue],
|
|
115
|
+
"history": self.history,
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
@classmethod
|
|
119
|
+
def from_dict(cls, d: dict) -> "ExperimentState":
|
|
120
|
+
current_raw = d.get("current", {})
|
|
121
|
+
return cls(
|
|
122
|
+
current=Experiment.from_dict(current_raw) if current_raw else None,
|
|
123
|
+
queue=[Experiment.from_dict(e) for e in d.get("queue", [])],
|
|
124
|
+
history=d.get("history", []),
|
|
125
|
+
)
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
"""core/hypothesis.py — HypothesisEngine: ledger + analysis → new Experiment candidates.
|
|
2
|
+
|
|
3
|
+
Uses OpenAI Chat API. Prompt structure:
|
|
4
|
+
system : role + output schema
|
|
5
|
+
user : past experiment results + top/bottom performer context
|
|
6
|
+
|
|
7
|
+
Output: list of Experiment dicts (JSON), one per hypothesis candidate.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import re
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
from .analyzer import AnalysisResult
|
|
17
|
+
from .experiment import Experiment, Variant
|
|
18
|
+
from .ledger import Ledger
|
|
19
|
+
|
|
20
|
+
_SYSTEM_PROMPT = """\
|
|
21
|
+
You are an adaptive experimentation strategist. Your job is to analyse past A/B experiment \
|
|
22
|
+
results and suggest the next highest-value hypotheses to test.
|
|
23
|
+
|
|
24
|
+
Rules:
|
|
25
|
+
- Each hypothesis must test exactly ONE variable (to avoid confounding).
|
|
26
|
+
- Build on what worked: if a variable already has a known winner, either skip it or propose \
|
|
27
|
+
a refined follow-up.
|
|
28
|
+
- Prioritise variables with high variance in the metrics — they have the most room for \
|
|
29
|
+
improvement.
|
|
30
|
+
- Avoid re-testing already conclusive experiments unless the context has meaningfully changed.
|
|
31
|
+
- Output ONLY a valid JSON array. Each element must match:
|
|
32
|
+
|
|
33
|
+
{
|
|
34
|
+
"variable": "<what is being tested, snake_case>",
|
|
35
|
+
"description": "<one-sentence hypothesis>",
|
|
36
|
+
"variant_a": {"label": "<control label>", "params": {}, "hint": "<optional>"},
|
|
37
|
+
"variant_b": {"label": "<challenger label>", "params": {}, "hint": "<optional>"},
|
|
38
|
+
"tier": <1 | 2 | 3>,
|
|
39
|
+
"rationale": "<why this is worth testing now>"
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
Tier meanings:
|
|
43
|
+
1 = high-impact, must run alone
|
|
44
|
+
2 = moderate-impact, can run in parallel with other Tier 2
|
|
45
|
+
3 = no A/B needed, recommend directly based on research
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class HypothesisEngine:
|
|
50
|
+
"""Generate new Experiment candidates from past ledger data.
|
|
51
|
+
|
|
52
|
+
Parameters
|
|
53
|
+
----------
|
|
54
|
+
ledger : Ledger instance (source of truth for past results)
|
|
55
|
+
api_key : OpenAI API key (or path to a file containing it)
|
|
56
|
+
model : OpenAI chat model to use
|
|
57
|
+
max_candidates : how many hypotheses to request
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
ledger: Ledger,
|
|
63
|
+
api_key: str | Path,
|
|
64
|
+
model: str = "gpt-5.4-mini",
|
|
65
|
+
max_candidates: int = 5,
|
|
66
|
+
) -> None:
|
|
67
|
+
self.ledger = ledger
|
|
68
|
+
self.model = model
|
|
69
|
+
self.max_candidates = max_candidates
|
|
70
|
+
|
|
71
|
+
if isinstance(api_key, Path) or (isinstance(api_key, str) and "\n" not in api_key and len(api_key) < 200):
|
|
72
|
+
p = Path(api_key)
|
|
73
|
+
if p.exists():
|
|
74
|
+
self._api_key = p.read_text().strip()
|
|
75
|
+
else:
|
|
76
|
+
self._api_key = api_key # treat as literal key string
|
|
77
|
+
else:
|
|
78
|
+
self._api_key = api_key
|
|
79
|
+
|
|
80
|
+
# ── Public ─────────────────────────────────────────────────────────────────
|
|
81
|
+
|
|
82
|
+
def generate(
|
|
83
|
+
self,
|
|
84
|
+
domain: str,
|
|
85
|
+
analysis: AnalysisResult,
|
|
86
|
+
domain_context: str = "",
|
|
87
|
+
extra_instructions: str = "",
|
|
88
|
+
) -> list[Experiment]:
|
|
89
|
+
"""Call LLM and return a list of Experiment candidates.
|
|
90
|
+
|
|
91
|
+
Parameters
|
|
92
|
+
----------
|
|
93
|
+
domain : domain identifier (e.g. "short_video")
|
|
94
|
+
analysis : output of Analyzer.analyze()
|
|
95
|
+
domain_context : adapter-provided text about top/bottom performers
|
|
96
|
+
extra_instructions : any domain-specific guidance to append to the prompt
|
|
97
|
+
"""
|
|
98
|
+
from openai import OpenAI
|
|
99
|
+
|
|
100
|
+
client = OpenAI(api_key=self._api_key)
|
|
101
|
+
|
|
102
|
+
user_message = self._build_user_message(
|
|
103
|
+
domain=domain,
|
|
104
|
+
analysis=analysis,
|
|
105
|
+
domain_context=domain_context,
|
|
106
|
+
extra_instructions=extra_instructions,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
resp = client.chat.completions.create(
|
|
110
|
+
model=self.model,
|
|
111
|
+
messages=[
|
|
112
|
+
{"role": "system", "content": _SYSTEM_PROMPT},
|
|
113
|
+
{"role": "user", "content": user_message},
|
|
114
|
+
],
|
|
115
|
+
max_completion_tokens=2000,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
raw = resp.choices[0].message.content.strip()
|
|
119
|
+
candidates = self._parse_response(raw)
|
|
120
|
+
experiments = []
|
|
121
|
+
for c in candidates:
|
|
122
|
+
exp = Experiment(
|
|
123
|
+
domain=domain,
|
|
124
|
+
variable=c.get("variable", "unknown"),
|
|
125
|
+
description=c.get("description", ""),
|
|
126
|
+
variant_a=Variant.from_dict(c.get("variant_a", {"label": "control"})),
|
|
127
|
+
variant_b=Variant.from_dict(c.get("variant_b", {"label": "variant"})),
|
|
128
|
+
tier=int(c.get("tier", 2)),
|
|
129
|
+
)
|
|
130
|
+
experiments.append(exp)
|
|
131
|
+
|
|
132
|
+
return experiments
|
|
133
|
+
|
|
134
|
+
# ── Internal ───────────────────────────────────────────────────────────────
|
|
135
|
+
|
|
136
|
+
def _build_user_message(
|
|
137
|
+
self,
|
|
138
|
+
domain: str,
|
|
139
|
+
analysis: AnalysisResult,
|
|
140
|
+
domain_context: str,
|
|
141
|
+
extra_instructions: str,
|
|
142
|
+
) -> str:
|
|
143
|
+
parts: list[str] = [f"Domain: {domain}"]
|
|
144
|
+
|
|
145
|
+
# Past results summary
|
|
146
|
+
records = self.ledger.query(domain=domain)
|
|
147
|
+
if records:
|
|
148
|
+
concluded = [r for r in records if r.get("winner") is not None]
|
|
149
|
+
parts.append(f"\n## Past experiments ({len(concluded)} concluded, {len(records)} total records)")
|
|
150
|
+
# Group by experiment_id
|
|
151
|
+
by_exp: dict[str, list] = {}
|
|
152
|
+
for r in concluded:
|
|
153
|
+
eid = r.get("experiment_id", "?")
|
|
154
|
+
by_exp.setdefault(eid, []).append(r)
|
|
155
|
+
for eid, exp_records in list(by_exp.items())[-10:]: # last 10 experiments
|
|
156
|
+
winners = [r for r in exp_records if r.get("winner")]
|
|
157
|
+
w_str = f"winner={winners[0]['variant']} ({winners[0]['metric_values']})" if winners else "inconclusive"
|
|
158
|
+
var = exp_records[0].get("variable", "?")
|
|
159
|
+
parts.append(f" - [{eid}] variable={var} → {w_str}")
|
|
160
|
+
else:
|
|
161
|
+
parts.append("\n## Past experiments: none yet — this is the first cycle.")
|
|
162
|
+
|
|
163
|
+
# Winning patterns
|
|
164
|
+
if analysis.winning_patterns:
|
|
165
|
+
parts.append("\n## Known winners by variable\n" +
|
|
166
|
+
"\n".join(f" {k}: {v}" for k, v in analysis.winning_patterns.items()))
|
|
167
|
+
|
|
168
|
+
# High-variance dimensions
|
|
169
|
+
if analysis.dimension_stats:
|
|
170
|
+
parts.append("\n## Metric variance (highest = most opportunity)")
|
|
171
|
+
for ds in analysis.dimension_stats[:5]:
|
|
172
|
+
parts.append(f" {ds.name}: variance={ds.variance:.3f}, mean={ds.mean:.3f} (n={ds.count})")
|
|
173
|
+
|
|
174
|
+
# Domain-provided context (top/bottom performers)
|
|
175
|
+
if domain_context:
|
|
176
|
+
parts.append(f"\n## Domain context (top vs bottom performers)\n{domain_context}")
|
|
177
|
+
|
|
178
|
+
parts.append(
|
|
179
|
+
f"\n## Task\nSuggest {self.max_candidates} next experiment candidates for domain '{domain}'."
|
|
180
|
+
)
|
|
181
|
+
if extra_instructions:
|
|
182
|
+
parts.append(f"\nAdditional guidance:\n{extra_instructions}")
|
|
183
|
+
|
|
184
|
+
return "\n".join(parts)
|
|
185
|
+
|
|
186
|
+
@staticmethod
|
|
187
|
+
def _parse_response(raw: str) -> list[dict[str, Any]]:
|
|
188
|
+
# Strip markdown code fences if present
|
|
189
|
+
raw = re.sub(r"^```[a-z]*\n?|\n?```$", "", raw.strip())
|
|
190
|
+
return json.loads(raw)
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""core/ledger.py — Domain-agnostic JSON ledger.
|
|
2
|
+
|
|
3
|
+
Replaces results.tsv with a structured, queryable JSON store.
|
|
4
|
+
|
|
5
|
+
Record schema
|
|
6
|
+
-------------
|
|
7
|
+
{
|
|
8
|
+
"domain": str, # e.g. "short_video"
|
|
9
|
+
"experiment_id": str,
|
|
10
|
+
"variable": str, # what was tested
|
|
11
|
+
"variant": str, # variant label ("control" / "challenger" / ...)
|
|
12
|
+
"metric_values": dict, # raw metric snapshot, e.g. {"avg_view_pct": 45.2}
|
|
13
|
+
"winner": bool | None, # True if this variant won
|
|
14
|
+
"timestamp": str # ISO-8601 datetime
|
|
15
|
+
}
|
|
16
|
+
"""
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import json
|
|
20
|
+
from datetime import datetime, timezone
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any, Optional
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class Ledger:
|
|
26
|
+
"""Append-only JSON ledger stored at *path*.
|
|
27
|
+
|
|
28
|
+
Usage
|
|
29
|
+
-----
|
|
30
|
+
ledger = Ledger(Path("data/ledger.json"))
|
|
31
|
+
ledger.append(
|
|
32
|
+
domain="short_video",
|
|
33
|
+
experiment_id="abc123",
|
|
34
|
+
variable="hook_type",
|
|
35
|
+
variant="question",
|
|
36
|
+
metric_values={"avg_view_pct": 52.1, "like_rate": 3.2},
|
|
37
|
+
winner=True,
|
|
38
|
+
)
|
|
39
|
+
records = ledger.query(domain="short_video")
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(self, path: Path) -> None:
|
|
43
|
+
self.path = path
|
|
44
|
+
self._records: list[dict[str, Any]] = []
|
|
45
|
+
if path.exists():
|
|
46
|
+
try:
|
|
47
|
+
self._records = json.loads(path.read_text(encoding="utf-8"))
|
|
48
|
+
except (json.JSONDecodeError, OSError):
|
|
49
|
+
self._records = []
|
|
50
|
+
|
|
51
|
+
# ── Write ──────────────────────────────────────────────────────────────────
|
|
52
|
+
|
|
53
|
+
def append(
|
|
54
|
+
self,
|
|
55
|
+
domain: str,
|
|
56
|
+
experiment_id: str,
|
|
57
|
+
variable: str,
|
|
58
|
+
variant: str,
|
|
59
|
+
metric_values: dict[str, Any],
|
|
60
|
+
winner: Optional[bool] = None,
|
|
61
|
+
timestamp: Optional[str] = None,
|
|
62
|
+
) -> dict[str, Any]:
|
|
63
|
+
record = {
|
|
64
|
+
"domain": domain,
|
|
65
|
+
"experiment_id": experiment_id,
|
|
66
|
+
"variable": variable,
|
|
67
|
+
"variant": variant,
|
|
68
|
+
"metric_values": metric_values,
|
|
69
|
+
"winner": winner,
|
|
70
|
+
"timestamp": timestamp or datetime.now(timezone.utc).isoformat(),
|
|
71
|
+
}
|
|
72
|
+
self._records.append(record)
|
|
73
|
+
self._flush()
|
|
74
|
+
return record
|
|
75
|
+
|
|
76
|
+
def _flush(self) -> None:
|
|
77
|
+
self.path.parent.mkdir(parents=True, exist_ok=True)
|
|
78
|
+
self.path.write_text(
|
|
79
|
+
json.dumps(self._records, ensure_ascii=False, indent=2),
|
|
80
|
+
encoding="utf-8",
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# ── Read ───────────────────────────────────────────────────────────────────
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
def records(self) -> list[dict[str, Any]]:
|
|
87
|
+
return list(self._records)
|
|
88
|
+
|
|
89
|
+
def query(
|
|
90
|
+
self,
|
|
91
|
+
domain: Optional[str] = None,
|
|
92
|
+
variable: Optional[str] = None,
|
|
93
|
+
experiment_id: Optional[str] = None,
|
|
94
|
+
winners_only: bool = False,
|
|
95
|
+
) -> list[dict[str, Any]]:
|
|
96
|
+
"""Return filtered records. All filters are ANDed together."""
|
|
97
|
+
result = self._records
|
|
98
|
+
if domain is not None:
|
|
99
|
+
result = [r for r in result if r.get("domain") == domain]
|
|
100
|
+
if variable is not None:
|
|
101
|
+
result = [r for r in result if r.get("variable") == variable]
|
|
102
|
+
if experiment_id is not None:
|
|
103
|
+
result = [r for r in result if r.get("experiment_id") == experiment_id]
|
|
104
|
+
if winners_only:
|
|
105
|
+
result = [r for r in result if r.get("winner") is True]
|
|
106
|
+
return list(result)
|
|
107
|
+
|
|
108
|
+
def all_domains(self) -> list[str]:
|
|
109
|
+
return sorted({r["domain"] for r in self._records if "domain" in r})
|
|
110
|
+
|
|
111
|
+
def all_variables(self, domain: Optional[str] = None) -> list[str]:
|
|
112
|
+
records = self.query(domain=domain) if domain else self._records
|
|
113
|
+
return sorted({r["variable"] for r in records if "variable" in r})
|
|
114
|
+
|
|
115
|
+
def __len__(self) -> int:
|
|
116
|
+
return len(self._records)
|
|
117
|
+
|
|
118
|
+
def __repr__(self) -> str:
|
|
119
|
+
return f"Ledger(path={self.path!r}, records={len(self._records)})"
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: adaptive-iteration
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Domain-agnostic adaptive experimentation framework: experiment → measure → learn → challenge.
|
|
5
|
+
Project-URL: Homepage, https://github.com/imaknas/adaptive-iteration
|
|
6
|
+
Project-URL: Repository, https://github.com/imaknas/adaptive-iteration
|
|
7
|
+
License: MIT
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Requires-Dist: openai>=1.0.0
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
|
|
12
|
+
# adaptive_iteration
|
|
13
|
+
|
|
14
|
+
**Domain-agnostic adaptive experimentation framework.**
|
|
15
|
+
|
|
16
|
+
A lightweight Python framework that abstracts the **experiment → measure → learn → challenge**
|
|
17
|
+
cycle into reusable components. Bring your own domain; the framework handles the rest.
|
|
18
|
+
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
## What It Is
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
adaptive_iteration/
|
|
25
|
+
├── core/ # pure Python stdlib, zero domain deps
|
|
26
|
+
│ ├── experiment.py # Experiment / Variant dataclasses + ExperimentState
|
|
27
|
+
│ ├── ledger.py # JSON append-only results ledger
|
|
28
|
+
│ ├── analyzer.py # top/bottom performer detection, variance per dimension
|
|
29
|
+
│ ├── hypothesis.py # HypothesisEngine: ledger + analysis → LLM → Experiment candidates
|
|
30
|
+
│ └── config.py # AdaptiveConfig: JSON config + winner hints
|
|
31
|
+
└── adapters/
|
|
32
|
+
├── base.py # DomainAdapter ABC (3 methods to implement)
|
|
33
|
+
└── short_video.py # Example adapter: YouTube + Instagram (simulated data)
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
# with uv (recommended)
|
|
42
|
+
uv add adaptive-iteration
|
|
43
|
+
|
|
44
|
+
# with pip
|
|
45
|
+
pip install adaptive-iteration
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Requires Python 3.10+. The only runtime dependency is `openai` (used only in
|
|
49
|
+
`HypothesisEngine`; the rest of `core/` is stdlib-only).
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Quick Start
|
|
54
|
+
|
|
55
|
+
```python
|
|
56
|
+
import os
|
|
57
|
+
from pathlib import Path
|
|
58
|
+
from adaptive_iteration.core.ledger import Ledger
|
|
59
|
+
from adaptive_iteration.core.analyzer import Analyzer
|
|
60
|
+
from adaptive_iteration.core.hypothesis import HypothesisEngine
|
|
61
|
+
from adaptive_iteration.core.config import AdaptiveConfig
|
|
62
|
+
|
|
63
|
+
# 1. Load / create config
|
|
64
|
+
cfg = AdaptiveConfig(Path("data/adaptive_config.json"))
|
|
65
|
+
cfg.update({"domain": "my_domain", "primary_metric": "conversion_rate"})
|
|
66
|
+
|
|
67
|
+
# 2. Open ledger
|
|
68
|
+
ledger = Ledger(Path("data/adaptive_ledger.json"))
|
|
69
|
+
|
|
70
|
+
# 3. Record experiment results
|
|
71
|
+
ledger.append(
|
|
72
|
+
domain="my_domain",
|
|
73
|
+
experiment_id="exp001",
|
|
74
|
+
variable="cta_style",
|
|
75
|
+
variant="soft",
|
|
76
|
+
metric_values={"conversion_rate": 4.2, "bounce_rate": 31.0},
|
|
77
|
+
winner=True,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# 4. Analyse
|
|
81
|
+
analyzer = Analyzer(ledger, primary_metric="conversion_rate")
|
|
82
|
+
analysis = analyzer.analyze(domain="my_domain")
|
|
83
|
+
print(f"Top performers: {[p['variant'] for p in analysis.top_performers]}")
|
|
84
|
+
print(f"Winning patterns: {analysis.winning_patterns}")
|
|
85
|
+
|
|
86
|
+
# 5. Generate next hypotheses (requires OPENAI_API_KEY)
|
|
87
|
+
engine = HypothesisEngine(
|
|
88
|
+
ledger=ledger,
|
|
89
|
+
api_key=os.environ["OPENAI_API_KEY"],
|
|
90
|
+
)
|
|
91
|
+
candidates = engine.generate(domain="my_domain", analysis=analysis)
|
|
92
|
+
for exp in candidates:
|
|
93
|
+
print(f" [{exp.tier}] {exp.variable}: {exp.description}")
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Integrating a New Domain
|
|
99
|
+
|
|
100
|
+
Subclass `DomainAdapter` and implement three methods:
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
from adaptive_iteration.adapters.base import DomainAdapter
|
|
104
|
+
|
|
105
|
+
class MyDomainAdapter(DomainAdapter):
|
|
106
|
+
|
|
107
|
+
def collect_metrics(self, item_ids: list[str]) -> list[dict]:
|
|
108
|
+
"""Pull raw metrics from your external system for each item_id."""
|
|
109
|
+
results = []
|
|
110
|
+
for item_id in item_ids:
|
|
111
|
+
raw = my_api.get_metrics(item_id)
|
|
112
|
+
results.append({"id": item_id, **raw})
|
|
113
|
+
return results
|
|
114
|
+
|
|
115
|
+
def get_signals(self, metrics: list[dict]) -> dict:
|
|
116
|
+
"""Normalise to framework signals."""
|
|
117
|
+
primary = sum(m["conversion_rate"] for m in metrics) / len(metrics)
|
|
118
|
+
return {
|
|
119
|
+
"primary_metric": primary,
|
|
120
|
+
"secondary_metrics": {
|
|
121
|
+
"bounce_rate": sum(m["bounce_rate"] for m in metrics) / len(metrics),
|
|
122
|
+
},
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
def format_context(self, top_items, bottom_items) -> str:
|
|
126
|
+
lines = ["Top performers:"]
|
|
127
|
+
for item in top_items:
|
|
128
|
+
lines.append(f" [{item['id']}] conversion_rate={item.get('conversion_rate')}")
|
|
129
|
+
lines.append("Bottom performers:")
|
|
130
|
+
for item in bottom_items:
|
|
131
|
+
lines.append(f" [{item['id']}] conversion_rate={item.get('conversion_rate')}")
|
|
132
|
+
return "\n".join(lines)
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
See `adapters/short_video.py` for a complete reference implementation.
|
|
136
|
+
|
|
137
|
+
---
|
|
138
|
+
|
|
139
|
+
## Experiment Modes
|
|
140
|
+
|
|
141
|
+
| Mode | When to use | Description |
|
|
142
|
+
|------|-------------|-------------|
|
|
143
|
+
| `interleaved` | Tier 2 | Rotate variants across different items; maximises volume |
|
|
144
|
+
| `paired` | Tier 1 | Generate two variants for the same item; cleaner causal inference |
|
|
145
|
+
|
|
146
|
+
Tier 1 experiments (high-impact variables) should use `paired` mode to eliminate
|
|
147
|
+
confounding factors introduced by item-level differences.
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## Import Verification
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
python3 -c "from adaptive_iteration.core.experiment import Experiment; print('ok')"
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
---
|
|
158
|
+
|
|
159
|
+
## Design Principles
|
|
160
|
+
|
|
161
|
+
1. `core/` has **zero external deps** — pure Python stdlib only (`openai` in `hypothesis.py`
|
|
162
|
+
is lazy-imported and optional until you call `HypothesisEngine`).
|
|
163
|
+
2. `DomainAdapter` is the **only** layer that touches external systems.
|
|
164
|
+
3. `Ledger` is the **single source of truth** — append-only JSON, no database required.
|
|
165
|
+
4. `HypothesisEngine` uses **structured JSON output** prompting so parsing is deterministic.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
adaptive_iteration/__init__.py,sha256=53mv6ma-Gxhi7wYTK_C9DF6XMe8lZEwqbCTweg_Eve4,581
|
|
2
|
+
adaptive_iteration/adapters/__init__.py,sha256=Pxvt872_ZyCOn5f3IDIuKPyoaPSQ0894gV-W0XY7hGU,209
|
|
3
|
+
adaptive_iteration/adapters/base.py,sha256=E0Q15j6vgGTYxIWwBnyJC4Ia4nLr0rH4qRmaCQjp64Y,2923
|
|
4
|
+
adaptive_iteration/adapters/short_video.py,sha256=Cr6Q3sMjYRKX7UTbY7v6fZ4ekBVfZ18-fRiHngFx8ks,7205
|
|
5
|
+
adaptive_iteration/core/__init__.py,sha256=Hbwg7CiFZee_Xy5AWXsK_HW2xM6E5jd8g43gGA2gOoo,68
|
|
6
|
+
adaptive_iteration/core/analyzer.py,sha256=qoAEuyw3FGYYrICGksR-_7JNDXTOUarJQGj4DzY918c,5870
|
|
7
|
+
adaptive_iteration/core/config.py,sha256=PGJPXo2Arzuv1BDqb8s2T6bTLe5MEBAvko1-QrueTvE,3719
|
|
8
|
+
adaptive_iteration/core/experiment.py,sha256=W7h705JSKSW_2D2UBamO0LUnY39UsMPTsiCY5v6UiTU,4770
|
|
9
|
+
adaptive_iteration/core/hypothesis.py,sha256=WgiYzQln13g4KNjQxBtPOr7ISBjgvJMixIUD6WgPDKA,7357
|
|
10
|
+
adaptive_iteration/core/ledger.py,sha256=T6bFRP5ZewC8ad-Y7Lk2Ag65k0MGoxgT0rAphkP3hCY,4215
|
|
11
|
+
adaptive_iteration-0.1.0.dist-info/METADATA,sha256=fVLoCCJf15_K3UYxcRxx8BshMhZSuRMVQ8Iyz3Ck3MI,5360
|
|
12
|
+
adaptive_iteration-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
13
|
+
adaptive_iteration-0.1.0.dist-info/RECORD,,
|