modelab 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
modelab/__init__.py ADDED
@@ -0,0 +1,126 @@
1
+ """modelab — provider-agnostic A/B testing for LLM systems."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import urllib.request
8
+ from typing import Any, Sequence
9
+
10
+ from modelab._assignment import Assignment
11
+ from modelab._engine import assign_variant
12
+ from modelab._errors import FlagNotFoundError, NotInitializedError
13
+ from modelab._server_storage import ServerStorage
14
+ from modelab._state import _global_state
15
+ from modelab._types import (
16
+ AssignmentRecord,
17
+ EvalContext,
18
+ Flag,
19
+ Variant,
20
+ )
21
+
22
+ __all__ = [
23
+ "init",
24
+ "assign",
25
+ "evaluate",
26
+ "Flag",
27
+ "Variant",
28
+ "EvalContext",
29
+ "Assignment",
30
+ ]
31
+
32
+
33
+ def init(
34
+ server: str,
35
+ flags: Sequence[Flag] = (),
36
+ api_key: str = "",
37
+ ) -> None:
38
+ """Initialize modelab with a server URL and flag definitions.
39
+
40
+ Args:
41
+ server: The modelab server URL (e.g. "http://localhost:8100").
42
+ flags: The experiment flags to register.
43
+ api_key: Optional API key for server authentication.
44
+ """
45
+ storage = ServerStorage(server, api_key=api_key)
46
+ _global_state.configure(storage, list(flags), server_url=server)
47
+
48
+
49
+ def assign(flag_name: str, ctx: EvalContext) -> Assignment | None:
50
+ """Assign a variant for the given flag and context.
51
+
52
+ Returns None if the user is outside the rollout percentage.
53
+ Raises NotInitializedError if init() hasn't been called.
54
+ Raises FlagNotFoundError if the flag name isn't registered.
55
+ """
56
+ if not _global_state.initialized:
57
+ raise NotInitializedError()
58
+
59
+ flag = _global_state.flags.get(flag_name)
60
+ if flag is None:
61
+ raise FlagNotFoundError(flag_name)
62
+
63
+ variant = assign_variant(flag, ctx)
64
+ if variant is None:
65
+ return None
66
+
67
+ storage = _global_state.storage
68
+ assert storage is not None
69
+
70
+ record = AssignmentRecord(
71
+ flag_name=flag_name,
72
+ variant_name=variant.name,
73
+ user_id=ctx.user_id,
74
+ session_id=ctx.session_id,
75
+ config_json=dict(variant.config),
76
+ )
77
+
78
+ try:
79
+ storage.save_assignment(record)
80
+ except Exception:
81
+ logging.getLogger("modelab").warning(
82
+ "Failed to save assignment for %s", flag_name, exc_info=True
83
+ )
84
+
85
+ return Assignment(
86
+ flag_name=flag_name,
87
+ variant_name=variant.name,
88
+ config=dict(variant.config),
89
+ context=ctx,
90
+ storage=storage,
91
+ assignment_id=record.assignment_id,
92
+ )
93
+
94
+
95
+ def evaluate(flag_name: str) -> dict[str, Any]:
96
+ """Fetch per-variant metrics for a flag from the server.
97
+
98
+ Flushes any buffered data, then queries the server's
99
+ GET /api/v1/flags/{flag_name} endpoint.
100
+
101
+ Raises NotInitializedError if init() hasn't been called.
102
+ Raises FlagNotFoundError if the flag name isn't registered.
103
+ """
104
+ if not _global_state.initialized:
105
+ raise NotInitializedError()
106
+
107
+ if flag_name not in _global_state.flags:
108
+ raise FlagNotFoundError(flag_name)
109
+
110
+ storage = _global_state.storage
111
+ assert storage is not None
112
+ storage.flush()
113
+
114
+ url = f"{_global_state.server_url.rstrip('/')}/api/v1/flags/{flag_name}"
115
+ req = urllib.request.Request(url, method="GET")
116
+ with urllib.request.urlopen(req, timeout=10) as resp:
117
+ return json.loads(resp.read())
118
+
119
+
120
+ def reset() -> None:
121
+ """Reset global state to uninitialized.
122
+
123
+ Clears all flags, storage, and server URL. Primarily used for testing
124
+ to ensure clean state between test runs.
125
+ """
126
+ _global_state.reset()
modelab/_assignment.py ADDED
@@ -0,0 +1,163 @@
1
+ """Assignment class — the main object returned by modelab.assign()."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import TYPE_CHECKING, Any
7
+
8
+ from modelab._types import (
9
+ EvalContext,
10
+ EventRecord,
11
+ ExecutionRecord,
12
+ )
13
+
14
+ if TYPE_CHECKING:
15
+ from modelab._server_storage import ServerStorage
16
+
17
+ logger = logging.getLogger("modelab")
18
+
19
+
20
+ def _extract_usage_from_response(response: Any) -> tuple[int | None, int | None]:
21
+ """Duck-type token usage from a provider response object.
22
+
23
+ Tries OpenAI-style attrs first (usage.prompt_tokens / completion_tokens),
24
+ then Anthropic-style (usage.input_tokens / output_tokens).
25
+ Returns (input_tokens, output_tokens) or (None, None) if not found.
26
+ """
27
+ usage = getattr(response, "usage", None)
28
+ if usage is None:
29
+ return None, None
30
+
31
+ # OpenAI: usage.prompt_tokens / usage.completion_tokens
32
+ prompt = getattr(usage, "prompt_tokens", None)
33
+ completion = getattr(usage, "completion_tokens", None)
34
+ if prompt is not None or completion is not None:
35
+ return prompt, completion
36
+
37
+ # Anthropic: usage.input_tokens / usage.output_tokens
38
+ inp = getattr(usage, "input_tokens", None)
39
+ out = getattr(usage, "output_tokens", None)
40
+ if inp is not None or out is not None:
41
+ return inp, out
42
+
43
+ return None, None
44
+
45
+
46
+ class Assignment:
47
+ """Represents a variant assignment for a specific evaluation context."""
48
+
49
+ def __init__(
50
+ self,
51
+ flag_name: str,
52
+ variant_name: str,
53
+ config: dict[str, Any],
54
+ context: EvalContext,
55
+ storage: ServerStorage,
56
+ assignment_id: str,
57
+ ) -> None:
58
+ self._flag_name = flag_name
59
+ self._variant_name = variant_name
60
+ self._config = config
61
+ self._context = context
62
+ self._storage = storage
63
+ self._assignment_id = assignment_id
64
+
65
+ @property
66
+ def flag_name(self) -> str:
67
+ return self._flag_name
68
+
69
+ @property
70
+ def variant_name(self) -> str:
71
+ return self._variant_name
72
+
73
+ @property
74
+ def config(self) -> dict[str, Any]:
75
+ return self._config
76
+
77
+ @property
78
+ def context(self) -> EvalContext:
79
+ return self._context
80
+
81
+ @property
82
+ def assignment_id(self) -> str:
83
+ return self._assignment_id
84
+
85
+ def record(
86
+ self,
87
+ response: Any = None,
88
+ *,
89
+ latency_ms: float | None = None,
90
+ input_tokens: int | None = None,
91
+ output_tokens: int | None = None,
92
+ cost: float | None = None,
93
+ error: str | None = None,
94
+ **metadata: Any,
95
+ ) -> None:
96
+ """Record execution metrics, optionally extracting tokens from a provider response.
97
+
98
+ If ``response`` is passed, token counts are duck-typed from
99
+ ``response.usage`` (OpenAI and Anthropic formats).
100
+ Explicit keyword arguments always override extracted values.
101
+ """
102
+ if response is not None:
103
+ extracted_in, extracted_out = _extract_usage_from_response(response)
104
+ if input_tokens is None:
105
+ input_tokens = extracted_in
106
+ if output_tokens is None:
107
+ output_tokens = extracted_out
108
+
109
+ self._save_execution(
110
+ ExecutionRecord(
111
+ assignment_id=self._assignment_id,
112
+ latency_ms=latency_ms,
113
+ input_tokens=input_tokens,
114
+ output_tokens=output_tokens,
115
+ cost=cost,
116
+ error=error,
117
+ metadata_json=metadata,
118
+ )
119
+ )
120
+
121
+ def mark_success(self, payload: dict[str, Any] | None = None) -> None:
122
+ """Mark this assignment as a success event.
123
+
124
+ Args:
125
+ payload: Optional additional data to attach to the event.
126
+ """
127
+ self._save_event("success", "", payload or {})
128
+
129
+ def mark_failure(self, payload: dict[str, Any] | None = None) -> None:
130
+ """Mark this assignment as a failure event.
131
+
132
+ Args:
133
+ payload: Optional additional data to attach to the event.
134
+ """
135
+ self._save_event("failure", "", payload or {})
136
+
137
+ def mark_custom_event(self, name: str, payload: dict[str, Any] | None = None) -> None:
138
+ """Mark a custom event for this assignment.
139
+
140
+ Args:
141
+ name: The name of the custom event (e.g., "copied", "dismissed").
142
+ payload: Optional additional data to attach to the event.
143
+ """
144
+ self._save_event("custom", name, payload or {})
145
+
146
+ def _save_execution(self, record: ExecutionRecord) -> None:
147
+ try:
148
+ self._storage.save_execution(record)
149
+ except Exception:
150
+ logger.warning("Failed to save execution for %s", self._assignment_id, exc_info=True)
151
+
152
+ def _save_event(self, event_type: str, event_name: str, payload: dict[str, Any]) -> None:
153
+ try:
154
+ self._storage.save_event(
155
+ EventRecord(
156
+ assignment_id=self._assignment_id,
157
+ event_type=event_type,
158
+ event_name=event_name,
159
+ payload_json=payload,
160
+ )
161
+ )
162
+ except Exception:
163
+ logger.warning("Failed to save event for %s", self._assignment_id, exc_info=True)
modelab/_engine.py ADDED
@@ -0,0 +1,40 @@
1
+ """Deterministic assignment engine: hash → bucket → rollout gate → variant."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+
7
+ from modelab._types import EvalContext, Flag, Variant
8
+
9
+ BUCKET_COUNT = 10_000
10
+
11
+
12
+ def _bucket(flag_name: str, user_id: str) -> int:
13
+ """Deterministic bucket in [0, BUCKET_COUNT) from flag + user."""
14
+ key = f"{flag_name}:{user_id}"
15
+ digest = hashlib.md5(key.encode()).hexdigest()
16
+ return int(digest, 16) % BUCKET_COUNT
17
+
18
+
19
+ def assign_variant(flag: Flag, ctx: EvalContext) -> Variant | None:
20
+ """Return the assigned Variant, or None if outside rollout."""
21
+ bucket = _bucket(flag.name, ctx.user_id)
22
+
23
+ # Rollout gate: rollout_pct of 100 means buckets 0–9999 pass
24
+ rollout_threshold = int(flag.rollout_pct / 100.0 * BUCKET_COUNT)
25
+ if bucket >= rollout_threshold:
26
+ return None
27
+
28
+ # Weighted variant selection within the rollout population
29
+ total_weight = sum(v.weight for v in flag.variants)
30
+ if total_weight == 0:
31
+ return flag.variants[0] if flag.variants else None
32
+ point = bucket % total_weight
33
+ cumulative = 0
34
+ for variant in flag.variants:
35
+ cumulative += variant.weight
36
+ if point < cumulative:
37
+ return variant
38
+
39
+ # Fallback (shouldn't happen if weights > 0)
40
+ return flag.variants[-1] if flag.variants else None
modelab/_errors.py ADDED
@@ -0,0 +1,26 @@
1
+ """Exception hierarchy for modelab."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ class ModelabError(Exception):
7
+ """Base exception for all modelab errors."""
8
+
9
+
10
+ class NotInitializedError(ModelabError):
11
+ """Raised when modelab.assign() is called before modelab.init()."""
12
+
13
+ def __init__(self) -> None:
14
+ super().__init__("modelab.init() must be called before assign()")
15
+
16
+
17
+ class FlagNotFoundError(ModelabError):
18
+ """Raised when a flag name is not in the registry."""
19
+
20
+ def __init__(self, name: str) -> None:
21
+ super().__init__(f"Flag not found: {name!r}")
22
+ self.name = name
23
+
24
+
25
+ class InvalidFlagError(ModelabError):
26
+ """Raised when a Flag definition is invalid."""
@@ -0,0 +1,102 @@
1
+ """ServerStorage — HTTP storage backend that buffers and flushes to modelab-server."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import atexit
6
+ import json
7
+ import logging
8
+ import threading
9
+ import urllib.request
10
+ from dataclasses import asdict
11
+ from datetime import datetime, timezone
12
+ from typing import Any
13
+
14
+ from modelab._types import AssignmentRecord, EventRecord, ExecutionRecord
15
+
16
+ logger = logging.getLogger("modelab")
17
+
18
+ _FLUSH_SIZE = 50
19
+ _FLUSH_INTERVAL = 5.0 # seconds
20
+
21
+
22
+ def _default_serializer(obj: Any) -> str:
23
+ if isinstance(obj, datetime):
24
+ return obj.isoformat()
25
+ raise TypeError(f"Not serializable: {type(obj)}")
26
+
27
+
28
+ class ServerStorage:
29
+ """HTTP storage that buffers records and flushes to the modelab server."""
30
+
31
+ def __init__(self, base_url: str, api_key: str = "") -> None:
32
+ self._base_url = base_url.rstrip("/")
33
+ self._api_key = api_key
34
+ self._lock = threading.Lock()
35
+ self._assignments: list[dict[str, Any]] = []
36
+ self._executions: list[dict[str, Any]] = []
37
+ self._events: list[dict[str, Any]] = []
38
+
39
+ # Background flush timer
40
+ self._timer: threading.Timer | None = None
41
+ self._start_timer()
42
+ atexit.register(self.flush)
43
+
44
+ def _start_timer(self) -> None:
45
+ self._timer = threading.Timer(_FLUSH_INTERVAL, self._timer_flush)
46
+ self._timer.daemon = True
47
+ self._timer.start()
48
+
49
+ def _timer_flush(self) -> None:
50
+ self.flush()
51
+ self._start_timer()
52
+
53
+ def save_assignment(self, record: AssignmentRecord) -> None:
54
+ with self._lock:
55
+ self._assignments.append(asdict(record))
56
+ if len(self._assignments) >= _FLUSH_SIZE:
57
+ self._flush_locked("assignments", self._assignments)
58
+ self._assignments = []
59
+
60
+ def save_execution(self, record: ExecutionRecord) -> None:
61
+ with self._lock:
62
+ self._executions.append(asdict(record))
63
+ if len(self._executions) >= _FLUSH_SIZE:
64
+ self._flush_locked("executions", self._executions)
65
+ self._executions = []
66
+
67
+ def save_event(self, record: EventRecord) -> None:
68
+ with self._lock:
69
+ self._events.append(asdict(record))
70
+ if len(self._events) >= _FLUSH_SIZE:
71
+ self._flush_locked("events", self._events)
72
+ self._events = []
73
+
74
+ def flush(self) -> None:
75
+ with self._lock:
76
+ if self._assignments:
77
+ self._flush_locked("assignments", self._assignments)
78
+ self._assignments = []
79
+ if self._executions:
80
+ self._flush_locked("executions", self._executions)
81
+ self._executions = []
82
+ if self._events:
83
+ self._flush_locked("events", self._events)
84
+ self._events = []
85
+
86
+ def _flush_locked(self, endpoint: str, records: list[dict[str, Any]]) -> None:
87
+ url = f"{self._base_url}/api/v1/ingest/{endpoint}"
88
+ data = json.dumps(records, default=_default_serializer).encode()
89
+ req = urllib.request.Request(
90
+ url,
91
+ data=data,
92
+ headers={
93
+ "Content-Type": "application/json",
94
+ **({"X-API-Key": self._api_key} if self._api_key else {}),
95
+ },
96
+ method="POST",
97
+ )
98
+ try:
99
+ with urllib.request.urlopen(req, timeout=10) as resp:
100
+ resp.read()
101
+ except Exception:
102
+ logger.warning("Failed to flush %d %s to %s", len(records), endpoint, url, exc_info=True)
modelab/_state.py ADDED
@@ -0,0 +1,42 @@
1
+ """Module-level singleton state for modelab."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ from modelab._errors import InvalidFlagError
8
+ from modelab._types import Flag
9
+
10
+ if TYPE_CHECKING:
11
+ from modelab._server_storage import ServerStorage
12
+
13
+
14
+ class _State:
15
+ def __init__(self) -> None:
16
+ self.storage: ServerStorage | None = None
17
+ self.flags: dict[str, Flag] = {}
18
+ self.server_url: str = ""
19
+
20
+ def configure(self, storage: ServerStorage, flags: list[Flag], server_url: str = "") -> None:
21
+ for flag in flags:
22
+ if not flag.variants:
23
+ raise InvalidFlagError(f"Flag {flag.name!r} has no variants")
24
+ if not (0 <= flag.rollout_pct <= 100):
25
+ raise InvalidFlagError(
26
+ f"Flag {flag.name!r} rollout_pct must be 0-100, got {flag.rollout_pct}"
27
+ )
28
+ self.storage = storage
29
+ self.flags = {f.name: f for f in flags}
30
+ self.server_url = server_url
31
+
32
+ def reset(self) -> None:
33
+ self.storage = None
34
+ self.flags = {}
35
+ self.server_url = ""
36
+
37
+ @property
38
+ def initialized(self) -> bool:
39
+ return self.storage is not None
40
+
41
+
42
+ _global_state = _State()
modelab/_types.py ADDED
@@ -0,0 +1,70 @@
1
+ """Core types for modelab: Flag, Variant, EvalContext, records, Storage protocol."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import uuid
6
+ from dataclasses import dataclass, field
7
+ from datetime import datetime, timezone
8
+ from typing import Any
9
+
10
+
11
+ @dataclass(frozen=True)
12
+ class Variant:
13
+ """A single variant within a flag."""
14
+
15
+ name: str
16
+ weight: int = 50
17
+ config: dict[str, Any] = field(default_factory=dict)
18
+
19
+
20
+ @dataclass(frozen=True)
21
+ class Flag:
22
+ """An experiment flag with one or more variants."""
23
+
24
+ name: str
25
+ variants: list[Variant] = field(default_factory=list)
26
+ rollout_pct: float = 100.0
27
+
28
+
29
+ @dataclass(frozen=True)
30
+ class EvalContext:
31
+ """Context for assignment — identifies who is being assigned."""
32
+
33
+ user_id: str
34
+ session_id: str = ""
35
+
36
+
37
+ # ── Records (persisted to storage) ──────────────────────────────────
38
+
39
+
40
+ @dataclass
41
+ class AssignmentRecord:
42
+ assignment_id: str = field(default_factory=lambda: str(uuid.uuid4()))
43
+ flag_name: str = ""
44
+ variant_name: str = ""
45
+ user_id: str = ""
46
+ session_id: str = ""
47
+ config_json: dict[str, Any] = field(default_factory=dict)
48
+ assigned_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
49
+
50
+
51
+ @dataclass
52
+ class ExecutionRecord:
53
+ assignment_id: str = ""
54
+ latency_ms: float | None = None
55
+ input_tokens: int | None = None
56
+ output_tokens: int | None = None
57
+ cost: float | None = None
58
+ error: str | None = None
59
+ metadata_json: dict[str, Any] = field(default_factory=dict)
60
+ recorded_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
61
+
62
+
63
+ @dataclass
64
+ class EventRecord:
65
+ event_id: str = field(default_factory=lambda: str(uuid.uuid4()))
66
+ assignment_id: str = ""
67
+ event_type: str = "" # success / failure / custom
68
+ event_name: str = "" # for custom events
69
+ payload_json: dict[str, Any] = field(default_factory=dict)
70
+ created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
modelab/py.typed ADDED
File without changes
@@ -0,0 +1,163 @@
1
+ Metadata-Version: 2.4
2
+ Name: modelab
3
+ Version: 0.1.0
4
+ Summary: Provider-agnostic A/B testing for LLM systems
5
+ License-Expression: MIT
6
+ Keywords: ab-testing,experiments,feature-flags,llm
7
+ Requires-Python: >=3.10
8
+ Provides-Extra: dev
9
+ Requires-Dist: httpx>=0.27; extra == 'dev'
10
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
11
+ Requires-Dist: pytest>=8.0; extra == 'dev'
12
+ Provides-Extra: server
13
+ Requires-Dist: fastapi>=0.110; extra == 'server'
14
+ Requires-Dist: psycopg[binary,pool]>=3.1; extra == 'server'
15
+ Requires-Dist: uvicorn[standard]>=0.29; extra == 'server'
16
+ Description-Content-Type: text/markdown
17
+
18
+ # modelab
19
+
20
+ Provider-agnostic A/B testing for LLM systems in production.
21
+
22
+ **Two components:**
23
+ 1. **Python SDK** — zero-dependency library for assignment, tracking, and evaluation
24
+ 2. **Server + Dashboard** — self-hosted FastAPI + React app for visualization (Docker Compose)
25
+
26
+ ## Quick Start
27
+
28
+ ### SDK (local development)
29
+
30
+ ```bash
31
+ pip install modelab
32
+ ```
33
+
34
+ ```python
35
+ import modelab
36
+ from modelab import Flag, Variant, EvalContext
37
+
38
+ # Initialize — point to the modelab server
39
+ modelab.init(
40
+ server="http://localhost:8100",
41
+ flags=[
42
+ Flag(
43
+ name="summarizer_v2",
44
+ variants=[
45
+ Variant("control", weight=50, config={"model": "gpt-3.5-turbo", "prompt": "Summarize: {input}"}),
46
+ Variant("treatment", weight=50, config={"model": "gpt-4", "prompt": "Concisely summarize: {input}"}),
47
+ ],
48
+ rollout_pct=100,
49
+ ),
50
+ ],
51
+ )
52
+
53
+ # Assign a variant
54
+ ctx = EvalContext(user_id="user_123", session_id="abc")
55
+ assignment = modelab.assign("summarizer_v2", ctx)
56
+
57
+ if assignment is None:
58
+ # Outside rollout — use default behavior
59
+ response = call_llm(model="gpt-3.5-turbo", prompt=text)
60
+ else:
61
+ # In experiment — use assigned variant config
62
+ response = call_llm(
63
+ model=assignment.config["model"],
64
+ prompt=assignment.config["prompt"].format(input=text),
65
+ )
66
+ assignment.record(response, cost=0.013)
67
+ assignment.mark_success()
68
+
69
+ # Evaluate results
70
+ results = modelab.evaluate("summarizer_v2")
71
+ print(results)
72
+ ```
73
+
74
+ ### Self-Hosted Server + Dashboard
75
+
76
+ ```bash
77
+ docker compose up
78
+ ```
79
+
80
+ This starts:
81
+ - **PostgreSQL** on port 5432
82
+ - **modelab server + dashboard** on port 8100
83
+
84
+ ## Concepts
85
+
86
+ ### Flags
87
+ An experiment with one or more variants and a rollout percentage (0-100%).
88
+
89
+ ### Variants
90
+ Each variant has a name, weight (for traffic splitting), and a config dict you use to parameterize your LLM calls.
91
+
92
+ ### Assignment
93
+ Deterministic — the same `(flag_name, user_id)` always maps to the same variant. Uses MD5 hashing into 10,000 buckets for 0.01% rollout granularity.
94
+
95
+ ### Recording
96
+
97
+ Use `assignment.record(response)` to capture execution metrics. Token counts are automatically extracted from the response object via duck-typing (supports OpenAI and Anthropic response formats). Cost, latency, error, and arbitrary metadata can be passed as keyword arguments:
98
+
99
+ ```python
100
+ assignment.record(response, cost=0.013, latency_ms=250.0, model="gpt-4o")
101
+ ```
102
+
103
+ You can also record without a response object:
104
+
105
+ ```python
106
+ assignment.record(input_tokens=50, output_tokens=100, cost=0.01)
107
+ ```
108
+
109
+ ### Events
110
+ Mark assignments as success/failure or record custom events (e.g., "copied", "thumbs_up").
111
+
112
+ ### Evaluation
113
+ `modelab.evaluate(flag_name)` returns per-variant metrics: success rate, avg latency, avg cost, token usage, and custom event counts.
114
+
115
+ ## Server API
116
+
117
+ ### Ingestion (from SDK)
118
+ ```
119
+ POST /api/v1/ingest/assignments (batch)
120
+ POST /api/v1/ingest/executions (batch)
121
+ POST /api/v1/ingest/events (batch)
122
+ ```
123
+
124
+ ### Dashboard API
125
+ ```
126
+ GET /api/v1/flags — list flags with summary stats
127
+ GET /api/v1/flags/{name} — detailed per-variant evaluation
128
+ GET /api/v1/flags/{name}/timeline — time-series metrics
129
+ ```
130
+
131
+ ## Development
132
+
133
+ ```bash
134
+ # Install in dev mode
135
+ pip install -e ".[dev]"
136
+
137
+ # Run tests
138
+ pytest
139
+
140
+ # Run dashboard dev server
141
+ cd dashboard && npm install && npm run dev
142
+
143
+ # Run API server (requires Postgres)
144
+ uvicorn server.app:app --reload --port 8100
145
+ ```
146
+
147
+ ## Architecture
148
+
149
+ ```
150
+ Developer's App
151
+
152
+ ├── modelab SDK (pip install modelab)
153
+ │ └── ServerStorage ──HTTP POST──▶ modelab-server
154
+
155
+ modelab-server (docker compose up)
156
+ ├── FastAPI backend
157
+ ├── React dashboard (served as static files)
158
+ └── PostgreSQL
159
+ ```
160
+
161
+ ## License
162
+
163
+ MIT
@@ -0,0 +1,11 @@
1
+ modelab/__init__.py,sha256=ToaPXEjWZYi8poObLXTHXOkTz8DugUHLyKcZsqQ_jLg,3463
2
+ modelab/_assignment.py,sha256=XXH9ZNMftobh6m2rj62T-pqpjwxw8wKG41JLmXz4Mvo,5211
3
+ modelab/_engine.py,sha256=rPhlSXIRQLvhHWxnLs6YayJ1HJX5iKwZbVFrkmIB49A,1333
4
+ modelab/_errors.py,sha256=y6aqP28CihLrlM2MTxm0v3ISdK-qbkW4aOetOZRh_oM,693
5
+ modelab/_server_storage.py,sha256=pmlIRwIY9mmNKEUemVeF2f41YnJ5RYcr_2YZlmCSksA,3537
6
+ modelab/_state.py,sha256=pERDfuCx5Ylh7YzqTdpbgZ90aTHz9bO5HyqCcdbk0kY,1225
7
+ modelab/_types.py,sha256=3zc_be5PTmlANlLrY1ZhYoX9vaTyWO4PdBd5NK6zh_s,2033
8
+ modelab/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
+ modelab-0.1.0.dist-info/METADATA,sha256=lPiLzc6Hs0Ni5Qk4V1TvHxSHgVLloG0bteexe3ea5-g,4386
10
+ modelab-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
11
+ modelab-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any