expt-logger 0.1.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,205 @@
1
+ """
2
+ expt_logger - Simple experiment tracking library.
3
+
4
+ Usage:
5
+ import expt_logger
6
+
7
+ run = expt_logger.init(name="experiment-1")
8
+
9
+ for step in range(100):
10
+ expt_logger.log({"train/loss": 0.5, "train/accuracy": 0.9})
11
+
12
+ expt_logger.end()
13
+
14
+ Or with context manager:
15
+ with expt_logger.init(name="my-experiment") as run:
16
+ expt_logger.log({"loss": 0.5})
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import os
22
+ from typing import Any
23
+
24
+ from .client import APIError, Client
25
+ from .config import DEFAULT_BASE_URL
26
+ from .run import Run
27
+ from .types import Config, Message, Reward, Rollout, Scalar
28
+ from .utils import parse_conversation
29
+
30
+ __version__ = "0.1.0"
31
+ __all__ = [
32
+ # Main API
33
+ "init",
34
+ "log",
35
+ "log_rollout",
36
+ "flush",
37
+ "end",
38
+ # Global state
39
+ "run",
40
+ "config",
41
+ # Classes
42
+ "Run",
43
+ "Config",
44
+ "Client",
45
+ "APIError",
46
+ # Types
47
+ "Scalar",
48
+ "Rollout",
49
+ "Message",
50
+ "Reward",
51
+ # Utils
52
+ "parse_conversation",
53
+ ]
54
+
55
+ # Global run instance
56
+ _current_run: Run | None = None
57
+
58
+
59
+ def init(
60
+ name: str | None = None,
61
+ config: dict[str, Any] | None = None,
62
+ api_key: str | None = None,
63
+ base_url: str | None = None,
64
+ ) -> Run:
65
+ """
66
+ Initialize a new experiment run.
67
+
68
+ Args:
69
+ name: Experiment name. Auto-generated if not provided.
70
+ config: Initial configuration dictionary.
71
+ api_key: API key. Falls back to EXPT_LOGGER_API_KEY environment variable.
72
+ base_url: API server URL.
73
+ Falls back to EXPT_LOGGER_BASE_URL env var or configured default.
74
+
75
+ Returns:
76
+ Run instance (also accessible via expt_logger.run).
77
+
78
+ Example:
79
+ run = expt_logger.init(
80
+ name="my-experiment",
81
+ config={"lr": 0.001, "batch_size": 32}
82
+ )
83
+ """
84
+ global _current_run
85
+
86
+ # Finish any existing run
87
+ if _current_run is not None:
88
+ _current_run.end()
89
+
90
+ # Resolve API key
91
+ resolved_api_key = api_key or os.environ.get("EXPT_LOGGER_API_KEY")
92
+ if not resolved_api_key:
93
+ raise ValueError("API key required. Pass api_key or set EXPT_LOGGER_API_KEY env variable.")
94
+
95
+ # Resolve base URL
96
+ resolved_base_url = base_url or os.environ.get("EXPT_LOGGER_BASE_URL", DEFAULT_BASE_URL)
97
+
98
+ _current_run = Run(
99
+ name=name,
100
+ config=config,
101
+ api_key=resolved_api_key,
102
+ base_url=resolved_base_url,
103
+ )
104
+
105
+ return _current_run
106
+
107
+
108
+ def _get_run() -> Run:
109
+ """Get the current run, raising if not initialized."""
110
+ if _current_run is None:
111
+ raise RuntimeError("No active run. Call expt_logger.init() first.")
112
+ return _current_run
113
+
114
+
115
+ def run() -> Run | None:
116
+ """Get the current active run."""
117
+ return _current_run
118
+
119
+
120
+ def config() -> Config:
121
+ """Get the current run's config."""
122
+ return _get_run().config
123
+
124
+
125
+ def log(
126
+ metrics: dict[str, float],
127
+ step: int | None = None,
128
+ mode: str | None = None,
129
+ commit: bool = True,
130
+ ) -> None:
131
+ """
132
+ Log scalar metrics to the current run.
133
+
134
+ Args:
135
+ metrics: Dictionary of metric names to values.
136
+ Use slash prefix for mode: "train/loss", "eval/accuracy"
137
+ step: Step number. Auto-increments if not provided.
138
+ mode: Default mode for metrics without slash prefix.
139
+ commit: If False, buffer metrics until next commit=True call.
140
+
141
+ Example:
142
+ expt_logger.log({"loss": 0.5, "accuracy": 0.9})
143
+ expt_logger.log({"train/loss": 0.5, "eval/loss": 0.6}, step=10)
144
+
145
+ # Multiple metrics at same step
146
+ expt_logger.log({"train/loss": 0.5}, commit=False)
147
+ expt_logger.log({"train/acc": 0.9}) # commits both
148
+ """
149
+ _get_run().log(metrics, step=step, mode=mode, commit=commit)
150
+
151
+
152
+ def log_rollout(
153
+ prompt: str,
154
+ messages: list[dict[str, str]] | str,
155
+ rewards: dict[str, float] | list[dict[str, float | str]],
156
+ step: int | None = None,
157
+ mode: str = "train",
158
+ ) -> None:
159
+ """
160
+ Log a conversation rollout to the current run.
161
+
162
+ Args:
163
+ prompt: The prompt text.
164
+ messages: Either a list of message dicts [{"role": "...", "content": "..."}]
165
+ or a string that will be parsed into messages.
166
+ rewards: Either a dict {"reward_name": value} or list [{"name": ..., "value": ...}]
167
+ step: Step number. Uses current step if not provided.
168
+ mode: "train" or "eval".
169
+
170
+ Example:
171
+ expt_logger.log_rollout(
172
+ prompt="What is 2+2?",
173
+ messages=[
174
+ {"role": "assistant", "content": "2+2 equals 4."},
175
+ {"role": "user", "content": "Thanks!"},
176
+ ],
177
+ rewards={"correctness": 1.0, "clarity": 0.9},
178
+ )
179
+
180
+ # Or with string parsing:
181
+ expt_logger.log_rollout(
182
+ prompt="Explain gravity",
183
+ messages="Assistant: Gravity is a force...\\nUser: Can you elaborate?",
184
+ rewards={"quality": 0.8},
185
+ )
186
+ """
187
+ _get_run().log_rollout(prompt, messages, rewards, step=step, mode=mode)
188
+
189
+
190
+ def flush() -> None:
191
+ """Manually flush buffered data to the server."""
192
+ _get_run().flush()
193
+
194
+
195
+ def end() -> None:
196
+ """
197
+ Finish the current run.
198
+
199
+ This is called automatically on program exit, but can be called
200
+ explicitly to end a run early.
201
+ """
202
+ global _current_run
203
+ if _current_run is not None:
204
+ _current_run.end()
205
+ _current_run = None
expt_logger/client.py ADDED
@@ -0,0 +1,207 @@
1
+ """HTTP client for the experiment tracking API."""
2
+
3
+ import logging
4
+ from typing import Any, cast
5
+
6
+ import httpx
7
+
8
+ from .types import Rollout, Scalar
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class APIError(Exception):
14
+ """Raised when an API request fails."""
15
+
16
+ def __init__(self, message: str, status_code: int | None = None):
17
+ super().__init__(message)
18
+ self.status_code = status_code
19
+
20
+
21
+ class Client:
22
+ """HTTP client for the experiment tracking API."""
23
+
24
+ def __init__(
25
+ self,
26
+ base_url: str,
27
+ api_key: str | None = None,
28
+ timeout: int = 30,
29
+ ):
30
+ self.base_url = base_url.rstrip("/")
31
+ self.api_key = api_key
32
+ self.timeout = timeout
33
+
34
+ headers = {"Content-Type": "application/json"}
35
+ if api_key:
36
+ headers["x-api-key"] = api_key
37
+
38
+ self._client = httpx.Client(headers=headers, timeout=timeout)
39
+
40
+ def create_experiment(
41
+ self, name: str | None = None, config: dict[str, Any] | None = None
42
+ ) -> str:
43
+ """
44
+ Create a new experiment.
45
+
46
+ Args:
47
+ name: Experiment name. If not provided, a random name is generated on the server.
48
+ config: Initial experiment configuration.
49
+
50
+ Returns the experiment ID.
51
+ """
52
+ payload: dict[str, Any] = {}
53
+ if name is not None:
54
+ payload["name"] = name
55
+ if config is not None:
56
+ payload["config"] = config
57
+
58
+ response = self._request("POST", "/api/experiments", json=payload)
59
+ return cast(str, response["experimentId"])
60
+
61
+ def log_scalars(self, experiment_id: str, scalars: list[Scalar]) -> dict[str, Any]:
62
+ """Log scalar metrics for an experiment (non-blocking)."""
63
+ payload = {
64
+ "scalars": [
65
+ {
66
+ "step": s.step,
67
+ "mode": s.mode,
68
+ "type": s.type,
69
+ "value": s.value,
70
+ }
71
+ for s in scalars
72
+ ]
73
+ }
74
+ return self._request(
75
+ "POST",
76
+ f"/api/experiments/{experiment_id}/scalars",
77
+ json=payload,
78
+ fire_and_forget=True,
79
+ )
80
+
81
+ def log_rollouts(self, experiment_id: str, rollouts: list[Rollout]) -> dict[str, Any]:
82
+ """Log rollouts for an experiment (non-blocking)."""
83
+ payload = {
84
+ "rollouts": [
85
+ {
86
+ "step": r.step,
87
+ "mode": r.mode,
88
+ "promptText": r.prompt_text,
89
+ "messages": [{"role": m.role, "content": m.content} for m in r.messages],
90
+ "rewards": [{"name": rw.name, "value": rw.value} for rw in r.rewards],
91
+ }
92
+ for r in rollouts
93
+ ]
94
+ }
95
+ return self._request(
96
+ "POST",
97
+ f"/api/experiments/{experiment_id}/rollouts",
98
+ json=payload,
99
+ fire_and_forget=True,
100
+ )
101
+
102
+ def update_experiment(
103
+ self,
104
+ experiment_id: str,
105
+ name: str | None = None,
106
+ config: dict[str, Any] | None = None,
107
+ status: str | None = None,
108
+ ) -> dict[str, Any]:
109
+ """
110
+ Update an experiment's name, config, and/or status.
111
+
112
+ Args:
113
+ experiment_id: The experiment ID.
114
+ name: New experiment name (optional).
115
+ config: New experiment configuration (optional).
116
+ status: New experiment status (optional).
117
+
118
+ Returns:
119
+ Response with success status.
120
+ """
121
+ if name is None and config is None and status is None:
122
+ raise ValueError("At least one of name, config, or status must be provided")
123
+
124
+ payload: dict[str, Any] = {"id": experiment_id}
125
+ if name is not None:
126
+ payload["name"] = name
127
+ if config is not None:
128
+ payload["config"] = config
129
+ if status is not None:
130
+ payload["status"] = status
131
+
132
+ return self._request("PUT", "/api/experiments", json=payload)
133
+
134
+ def log_config(self, experiment_id: str, config: dict[str, Any]) -> dict[str, Any]:
135
+ """Log configuration for an experiment."""
136
+ return self.update_experiment(experiment_id, config=config)
137
+
138
+ def end_experiment(self, experiment_id: str) -> dict[str, Any]:
139
+ """Mark an experiment as finished."""
140
+ return self.update_experiment(experiment_id, status="complete")
141
+
142
+ def _request(
143
+ self,
144
+ method: str,
145
+ path: str,
146
+ json: dict[str, Any] | None = None,
147
+ fire_and_forget: bool = False,
148
+ ) -> dict[str, Any]:
149
+ """
150
+ Make an HTTP request to the API.
151
+
152
+ Args:
153
+ method: HTTP method (GET, POST, etc.)
154
+ path: API path
155
+ json: JSON payload
156
+ fire_and_forget: If True, send request without waiting for response.
157
+ Used for logging operations to avoid blocking.
158
+ """
159
+ url = f"{self.base_url}{path}"
160
+
161
+ try:
162
+ if fire_and_forget:
163
+ # Fire and forget - don't wait for response
164
+ # This makes logging operations non-blocking
165
+ response = self._client.post(url, json=json)
166
+ # Log errors but don't raise them
167
+ if not response.is_success:
168
+ try:
169
+ error_data = response.json()
170
+ error_msg = error_data.get("error", response.text)
171
+ except ValueError:
172
+ error_msg = response.text
173
+ logger.warning(
174
+ f"Fire-and-forget request failed: {method} {path} "
175
+ f"returned {response.status_code}: {error_msg}"
176
+ )
177
+ return {}
178
+
179
+ response = self._client.request(
180
+ method=method,
181
+ url=url,
182
+ json=json,
183
+ )
184
+ except httpx.RequestError as e:
185
+ if fire_and_forget:
186
+ # Silently fail for fire-and-forget requests
187
+ logger.warning(f"Fire-and-forget request exception: {method} {path} - {e}")
188
+ return {}
189
+ raise APIError(f"Request failed: {e}") from e
190
+
191
+ if not response.is_success:
192
+ try:
193
+ error_data = response.json()
194
+ message = error_data.get("error", response.text)
195
+ except ValueError:
196
+ message = response.text
197
+ raise APIError(message, status_code=response.status_code)
198
+
199
+ # Handle empty responses
200
+ if not response.text:
201
+ return {}
202
+
203
+ return cast(dict[str, Any], response.json())
204
+
205
+ def close(self) -> None:
206
+ """Close the HTTP client."""
207
+ self._client.close()
expt_logger/config.py ADDED
@@ -0,0 +1,5 @@
1
+ """Central configuration for expt_logger."""
2
+
3
+ # Default API base URL
4
+ # Change this value to switch to production endpoint
5
+ DEFAULT_BASE_URL = "https://expt-platform.vercel.app/"
expt_logger/run.py ADDED
@@ -0,0 +1,317 @@
1
+ """Run class for experiment tracking."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import atexit
6
+ import signal
7
+ import sys
8
+ import threading
9
+ from typing import Any
10
+
11
+ from .client import Client
12
+ from .config import DEFAULT_BASE_URL
13
+ from .types import Config, Message, Reward, Rollout, Scalar
14
+ from .utils import parse_conversation, parse_metric_key
15
+
16
+
17
+ class Run:
18
+ """
19
+ A single experiment run.
20
+
21
+ Tracks metrics, rollouts, and configuration for an experiment.
22
+ Handles automatic cleanup on exit or interrupt.
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ name: str | None = None,
28
+ config: dict[str, Any] | None = None,
29
+ api_key: str | None = None,
30
+ base_url: str = DEFAULT_BASE_URL,
31
+ ):
32
+ self.name = name # May be None initially; set after server response
33
+ self.config = Config()
34
+
35
+ if config:
36
+ self.config.update(config)
37
+
38
+ # Internal state
39
+ self._client = Client(base_url=base_url, api_key=api_key)
40
+ self._base_url = base_url
41
+ self._experiment_id: str | None = None
42
+ self._current_step = 1
43
+ self._scalar_buffer: list[Scalar] = []
44
+ self._rollout_buffer: list[Rollout] = []
45
+ self._lock = threading.Lock()
46
+ self._finished = False
47
+
48
+ # Track pending metrics for commit=False behavior
49
+ self._pending_metrics: dict[str, float] = {}
50
+ self._pending_step: int | None = None
51
+ self._pending_mode: str | None = None
52
+
53
+ # Create experiment on server
54
+ self._init_experiment()
55
+
56
+ # Setup exit handlers
57
+ self._setup_exit_handlers()
58
+
59
+ def _init_experiment(self) -> None:
60
+ """Create the experiment on the server."""
61
+ # Pass both name and config to the server
62
+ # If name is None, server will generate a random one
63
+ config_dict = self.config.to_dict() if self.config.to_dict() else None
64
+ self._experiment_id = self._client.create_experiment(self.name, config_dict)
65
+
66
+ def _setup_exit_handlers(self) -> None:
67
+ """Register cleanup handlers for graceful shutdown."""
68
+ atexit.register(self._cleanup)
69
+
70
+ # Store original signal handlers
71
+ self._original_handlers: dict[signal.Signals, Any] = {}
72
+
73
+ for sig in (signal.SIGINT, signal.SIGTERM):
74
+ self._original_handlers[sig] = signal.getsignal(sig)
75
+ signal.signal(sig, self._signal_handler)
76
+
77
+ def _signal_handler(self, signum: int, frame: Any) -> None:
78
+ """Handle interrupt signals."""
79
+ self._cleanup()
80
+
81
+ # Restore original handler and re-raise
82
+ sig = signal.Signals(signum)
83
+ original = self._original_handlers.get(sig)
84
+ signal.signal(sig, original or signal.SIG_DFL)
85
+
86
+ if signum == signal.SIGINT:
87
+ raise KeyboardInterrupt
88
+ else:
89
+ sys.exit(128 + signum)
90
+
91
+ def _cleanup(self) -> None:
92
+ """Flush buffers and mark experiment as finished."""
93
+ with self._lock:
94
+ if self._finished:
95
+ return
96
+ self._finished = True
97
+
98
+ # Commit any pending metrics
99
+ self._commit_pending()
100
+
101
+ # Flush remaining data
102
+ self.flush()
103
+
104
+ # Mark as finished on server
105
+ if self._experiment_id:
106
+ try:
107
+ self._client.end_experiment(self._experiment_id)
108
+ except Exception:
109
+ pass # Best effort
110
+
111
+ # Cleanup
112
+ self._client.close()
113
+
114
+ # Unregister atexit
115
+ try:
116
+ atexit.unregister(self._cleanup)
117
+ except Exception:
118
+ pass
119
+
120
+ def log(
121
+ self,
122
+ metrics: dict[str, float],
123
+ step: int | None = None,
124
+ mode: str | None = None,
125
+ commit: bool = True,
126
+ ) -> None:
127
+ """
128
+ Log scalar metrics.
129
+
130
+ Args:
131
+ metrics: Dictionary of metric names to values.
132
+ Use slash prefix for mode: "train/loss", "eval/accuracy"
133
+ step: Step number. Auto-increments if not provided.
134
+ mode: Default mode for metrics without slash prefix.
135
+ If not specified, defaults to "train".
136
+ commit: If False, buffer metrics until next commit=True call.
137
+ Useful for logging multiple metrics at the same step.
138
+
139
+ Note:
140
+ If mode is specified, all metrics should either have slash prefixes
141
+ or none should have slash prefixes. Mixing both styles is not recommended
142
+ as it may lead to unexpected behavior.
143
+ """
144
+ if self._finished:
145
+ return
146
+
147
+ # Check for conflicting usage: mode param with slash-prefixed keys
148
+ has_slash_keys = any("/" in key for key in metrics.keys())
149
+ if mode is not None and has_slash_keys:
150
+ raise ValueError(
151
+ "Cannot specify 'mode' parameter when metric keys contain slashes. "
152
+ "Either use slash-prefixed keys like 'train/loss' OR use the mode "
153
+ "parameter, not both."
154
+ )
155
+
156
+ # Determine step
157
+ if step is None:
158
+ if self._pending_step is not None:
159
+ step = self._pending_step
160
+ else:
161
+ step = self._current_step
162
+
163
+ default_mode = mode or self._pending_mode or "train"
164
+
165
+ # Accumulate metrics
166
+ for key, value in metrics.items():
167
+ parsed_mode, metric_name = parse_metric_key(key)
168
+ # Use parsed mode from key, or fall back to default
169
+ final_mode = parsed_mode if "/" in key else default_mode
170
+ self._pending_metrics[f"{final_mode}/{metric_name}"] = value
171
+
172
+ self._pending_step = step
173
+ self._pending_mode = default_mode
174
+
175
+ if commit:
176
+ self._commit_pending()
177
+
178
+ def _commit_pending(self) -> None:
179
+ """Commit all pending metrics to the buffer."""
180
+ if not self._pending_metrics or self._pending_step is None:
181
+ return
182
+
183
+ with self._lock:
184
+ for key, value in self._pending_metrics.items():
185
+ mode, metric_name = parse_metric_key(key)
186
+ self._scalar_buffer.append(
187
+ Scalar(
188
+ step=self._pending_step,
189
+ mode=mode,
190
+ type=metric_name,
191
+ value=value,
192
+ )
193
+ )
194
+
195
+ # Auto-increment step for next log call
196
+ self._current_step = self._pending_step + 1
197
+
198
+ # Clear pending state
199
+ self._pending_metrics = {}
200
+ self._pending_step = None
201
+ self._pending_mode = None
202
+
203
+ def log_rollout(
204
+ self,
205
+ prompt: str,
206
+ messages: list[dict[str, str]] | str,
207
+ rewards: dict[str, float] | list[dict[str, float | str]],
208
+ step: int | None = None,
209
+ mode: str = "train",
210
+ ) -> None:
211
+ """
212
+ Log a conversation rollout.
213
+
214
+ Args:
215
+ prompt: The prompt text.
216
+ messages: Either a list of message dicts [{"role": "...", "content": "..."}]
217
+ or a string that will be parsed into messages.
218
+ rewards: Either a dict {"reward_name": value} or list [{"name": ..., "value": ...}]
219
+ step: Step number. Uses current step if not provided.
220
+ mode: "train" or "eval".
221
+ """
222
+ if self._finished:
223
+ return
224
+
225
+ if step is None:
226
+ step = self._current_step
227
+
228
+ # Parse messages if string
229
+ if isinstance(messages, str):
230
+ parsed = parse_conversation(messages)
231
+ else:
232
+ parsed = messages
233
+
234
+ # Convert to Message objects
235
+ message_objs = [Message(role=m["role"], content=m["content"]) for m in parsed]
236
+
237
+ # Parse rewards
238
+ if isinstance(rewards, dict):
239
+ reward_objs = [Reward(name=k, value=v) for k, v in rewards.items()]
240
+ else:
241
+ reward_objs = [Reward(name=str(r["name"]), value=float(r["value"])) for r in rewards]
242
+
243
+ with self._lock:
244
+ self._rollout_buffer.append(
245
+ Rollout(
246
+ step=step,
247
+ mode=mode,
248
+ prompt_text=prompt,
249
+ messages=message_objs,
250
+ rewards=reward_objs,
251
+ )
252
+ )
253
+
254
+ def flush(self) -> None:
255
+ """Send all buffered data to the server."""
256
+ if self._experiment_id is None:
257
+ return
258
+
259
+ # Commit any pending metrics first
260
+ self._commit_pending()
261
+
262
+ with self._lock:
263
+ scalars = self._scalar_buffer.copy()
264
+ rollouts = self._rollout_buffer.copy()
265
+ self._scalar_buffer.clear()
266
+ self._rollout_buffer.clear()
267
+
268
+ # Send to server
269
+ if scalars:
270
+ try:
271
+ self._client.log_scalars(self._experiment_id, scalars)
272
+ except Exception as e:
273
+ print(f"Warning: Failed to log scalars: {e}")
274
+
275
+ if rollouts:
276
+ try:
277
+ self._client.log_rollouts(self._experiment_id, rollouts)
278
+ except Exception as e:
279
+ print(f"Warning: Failed to log rollouts: {e}")
280
+
281
+ def end(self) -> None:
282
+ """Explicitly finish the run."""
283
+ self._cleanup()
284
+
285
+ @property
286
+ def id(self) -> str | None:
287
+ """Return the experiment ID."""
288
+ return self._experiment_id
289
+
290
+ @property
291
+ def step(self) -> int:
292
+ """Return the current step."""
293
+ return self._current_step
294
+
295
+ @property
296
+ def base_url(self) -> str:
297
+ """Return the base URL of the experiment tracking server."""
298
+ return self._base_url
299
+
300
+ @property
301
+ def experiment_url(self) -> str | None:
302
+ """Return the full URL to view this experiment in the web interface."""
303
+ if self._experiment_id is None:
304
+ return None
305
+ # Remove /api prefix if present and construct experiment URL
306
+ base = self._base_url.rstrip("/")
307
+ return f"{base}/experiments/{self._experiment_id}"
308
+
309
+ def __enter__(self) -> Run:
310
+ return self
311
+
312
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
313
+ self.end()
314
+ # Don't suppress exceptions
315
+
316
+ def __repr__(self) -> str:
317
+ return f"Run(name={self.name!r}, id={self._experiment_id!r}, step={self._current_step})"
expt_logger/types.py ADDED
@@ -0,0 +1,90 @@
1
+ """Type definitions for the expt_logger library."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import Any
5
+
6
+
7
+ @dataclass
8
+ class Message:
9
+ """A single message in a conversation."""
10
+
11
+ role: str # "user", "assistant", "system"
12
+ content: str
13
+
14
+
15
+ @dataclass
16
+ class Reward:
17
+ """A named reward value."""
18
+
19
+ name: str
20
+ value: float
21
+
22
+
23
+ @dataclass
24
+ class Scalar:
25
+ """A scalar metric logged at a specific step."""
26
+
27
+ step: int
28
+ mode: str
29
+ type: str # metric name
30
+ value: float
31
+
32
+
33
+ @dataclass
34
+ class Rollout:
35
+ """A conversation rollout with rewards."""
36
+
37
+ step: int
38
+ mode: str
39
+ prompt_text: str
40
+ messages: list[Message]
41
+ rewards: list[Reward]
42
+
43
+
44
+ @dataclass
45
+ class Config:
46
+ """
47
+ Experiment configuration that supports both dict-style and attribute-style access.
48
+
49
+ Usage:
50
+ config = Config()
51
+ config.learning_rate = 0.001
52
+ config["batch_size"] = 32
53
+ config.update({"epochs": 10})
54
+ """
55
+
56
+ _data: dict[str, Any] = field(default_factory=dict)
57
+
58
+ def __setattr__(self, name: str, value: Any) -> None:
59
+ if name == "_data":
60
+ object.__setattr__(self, name, value)
61
+ else:
62
+ self._data[name] = value
63
+
64
+ def __getattr__(self, name: str) -> Any:
65
+ if name == "_data":
66
+ return object.__getattribute__(self, name)
67
+ try:
68
+ return self._data[name]
69
+ except KeyError:
70
+ raise AttributeError(f"Config has no attribute '{name}'")
71
+
72
+ def __setitem__(self, key: str, value: Any) -> None:
73
+ self._data[key] = value
74
+
75
+ def __getitem__(self, key: str) -> Any:
76
+ return self._data[key]
77
+
78
+ def __contains__(self, key: str) -> bool:
79
+ return key in self._data
80
+
81
+ def update(self, data: dict[str, Any]) -> None:
82
+ """Update config with a dictionary of values."""
83
+ self._data.update(data)
84
+
85
+ def to_dict(self) -> dict[str, Any]:
86
+ """Return config as a plain dictionary."""
87
+ return self._data.copy()
88
+
89
+ def __repr__(self) -> str:
90
+ return f"Config({self._data})"
expt_logger/utils.py ADDED
@@ -0,0 +1,49 @@
1
+ """Utility functions for the expt_logger library."""
2
+
3
+ import os
4
+ from typing import TypeVar
5
+
6
+
7
+ def get_env_var(name: str, default: str | None = None) -> str | None:
8
+ """Get environment variable with optional default."""
9
+ return os.environ.get(name, default)
10
+
11
+
12
+ def parse_metric_key(key: str) -> tuple[str, str]:
13
+ """
14
+ Parse a metric key into (mode, metric_name).
15
+
16
+ Examples:
17
+ "train/loss" -> ("train", "loss")
18
+ "eval/accuracy" -> ("eval", "accuracy")
19
+ "loss" -> ("train", "loss") # default mode
20
+ """
21
+ if "/" in key:
22
+ parts = key.split("/", 1)
23
+ return parts[0], parts[1]
24
+ return "train", key
25
+
26
+
27
+ def parse_conversation(text: str) -> list[dict[str, str]]:
28
+ """
29
+ Parse a conversation string into a list of messages.
30
+
31
+ TODO: Implement parsing logic for different conversation formats:
32
+ - "User: hello\nAssistant: hi there"
33
+ - "Human: hello\nAssistant: hi there"
34
+ - "<user>hello</user><assistant>hi</assistant>"
35
+
36
+ Returns list of {"role": "user"|"assistant", "content": "..."}
37
+ """
38
+ raise NotImplementedError(
39
+ "Conversation parsing from raw text is not yet implemented. "
40
+ "Please pass messages as a list of dicts with 'role' and 'content' keys."
41
+ )
42
+
43
+
44
+ T = TypeVar("T")
45
+
46
+
47
+ def chunk_list(lst: list[T], chunk_size: int) -> list[list[T]]:
48
+ """Split a list into chunks of specified size."""
49
+ return [lst[i : i + chunk_size] for i in range(0, len(lst), chunk_size)]
@@ -0,0 +1,252 @@
1
+ Metadata-Version: 2.4
2
+ Name: expt-logger
3
+ Version: 0.1.0.dev0
4
+ Summary: Simple experiment logging library
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: httpx>=0.27.0
7
+ Description-Content-Type: text/markdown
8
+
9
+ # expt_logger
10
+
11
+ Simple experiment tracking for RL training with a W&B-style API.
12
+
13
+ ## Quick Start
14
+
15
+ **Install:**
16
+ ```bash
17
+ uv add expt-logger
18
+ # or
19
+ pip install expt-logger
20
+ ```
21
+
22
+ **Set your API key:**
23
+ ```bash
24
+ export EXPT_LOGGER_API_KEY=your_api_key
25
+ ```
26
+
27
+ **Start logging:**
28
+ ```python
29
+ import expt_logger
30
+
31
+ # Initialize run with config
32
+ run = expt_logger.init(
33
+ name="grpo-math",
34
+ config={"lr": 3e-6, "batch_size": 8}
35
+ )
36
+
37
+ # Get experiment URLs
38
+ print(f"View experiment: {run.experiment_url}")
39
+ print(f"Base URL: {run.base_url}")
40
+
41
+ # Log RL rollouts with rewards
42
+ expt_logger.log_rollout(
43
+ prompt="What is 2+2?",
44
+ messages=[{"role": "assistant", "content": "The answer is 4."}],
45
+ rewards={"correctness": 1.0, "format": 0.9},
46
+ mode="train"
47
+ )
48
+
49
+ # Log scalar metrics
50
+ expt_logger.log({
51
+ "train/loss": 0.45,
52
+ "train/kl": 0.02,
53
+ "train/reward": 0.85
54
+ })
55
+
56
+ expt_logger.end()
57
+ ```
58
+
59
+ ## Core Features
60
+
61
+ ### Scalar Metrics
62
+
63
+ Log training metrics with automatic step tracking:
64
+
65
+ ```python
66
+ # Auto-increment steps (defaults to "train" mode)
67
+ expt_logger.log({"loss": 0.5}) # step 0, train/loss
68
+ expt_logger.log({"loss": 0.4}) # step 1, train/loss
69
+
70
+ # Use slash prefixes for train/eval modes
71
+ expt_logger.log({
72
+ "train/loss": 0.5,
73
+ "eval/loss": 0.6
74
+ }, step=10)
75
+
76
+ # Or set mode explicitly
77
+ expt_logger.log({"loss": 0.5}, mode="eval")
78
+ ```
79
+
80
+ **Note:** Metrics default to `"train"` mode when no mode is specified and keys don't have slash prefixes.
81
+
82
+ **Batching metrics** at the same step:
83
+ ```python
84
+ expt_logger.log({"metric_a": 1.0}, commit=False)
85
+ expt_logger.log({"metric_b": 2.0}, commit=False)
86
+ expt_logger.log({"metric_c": 3.0}) # commits all three at step 0
87
+ ```
88
+
89
+ ### Rollouts (RL-specific)
90
+
91
+ Log conversation rollouts with multiple reward functions:
92
+
93
+ ```python
94
+ expt_logger.log_rollout(
95
+ prompt="Solve: x^2 - 5x + 6 = 0",
96
+ messages=[
97
+ {"role": "assistant", "content": "Let me factor this..."},
98
+ {"role": "user", "content": "Can you verify?"},
99
+ {"role": "assistant", "content": "Sure! (x-2)(x-3) = 0..."}
100
+ ],
101
+ rewards={
102
+ "correctness": 1.0,
103
+ "format": 0.9,
104
+ "helpfulness": 0.85
105
+ },
106
+ step=5,
107
+ mode="train"
108
+ )
109
+ ```
110
+
111
+ - **Messages format:** List of dicts with `"role"` and `"content"` keys
112
+ - **Rewards format:** Dict of reward names to float values
113
+ - **Mode:** `"train"` or `"eval"` (default: `"train"`)
114
+
115
+ ### Configuration
116
+
117
+ Track hyperparameters and update them dynamically:
118
+
119
+ ```python
120
+ run = expt_logger.init(config={"lr": 0.001, "batch_size": 32})
121
+
122
+ # Update config during training
123
+ run.config.lr = 0.0005 # attribute style
124
+ run.config["epochs"] = 100 # dict style
125
+ run.config.update({"model": "gpt2"}) # bulk update
126
+ ```
127
+
128
+ ### API Key & Server Configuration
129
+
130
+ **API Key** (required):
131
+ ```bash
132
+ export EXPT_LOGGER_API_KEY=your_api_key
133
+ ```
134
+ Or pass directly:
135
+ ```python
136
+ expt_logger.init(api_key="your_key")
137
+ ```
138
+
139
+ **Custom server URL** (optional, for self-hosting):
140
+ ```bash
141
+ export EXPT_LOGGER_BASE_URL=https://your-server.com
142
+ ```
143
+ Or:
144
+ ```python
145
+ expt_logger.init(base_url="https://your-server.com")
146
+ ```
147
+
148
+ ### Accessing Experiment URLs
149
+
150
+ Get the experiment URL and base URL from the run object:
151
+
152
+ ```python
153
+ run = expt_logger.init(name="my-experiment")
154
+
155
+ # Get the full experiment URL to view in browser
156
+ print(run.experiment_url)
157
+ # https://expt-platform.vercel.app/experiments/ccf1f879-50a6-492b-9072-fed6effac731
158
+
159
+ # Get the base URL of the tracking server
160
+ print(run.base_url)
161
+ # https://expt-platform.vercel.app
162
+ ```
163
+
164
+ ## API Reference
165
+
166
+ ### `expt_logger.init()`
167
+
168
+ ```python
169
+ init(
170
+ name: str | None = None,
171
+ config: dict[str, Any] | None = None,
172
+ api_key: str | None = None,
173
+ base_url: str | None = None
174
+ ) -> Run
175
+ ```
176
+
177
+ - `name`: Experiment name (auto-generated if not provided)
178
+ - `config`: Initial hyperparameters
179
+ - `api_key`: API key (or set `EXPT_LOGGER_API_KEY`)
180
+ - `base_url`: Custom server URL (or set `EXPT_LOGGER_BASE_URL`)
181
+
182
+ ### `expt_logger.log()`
183
+
184
+ ```python
185
+ log(
186
+ metrics: dict[str, float],
187
+ step: int | None = None,
188
+ mode: str | None = None,
189
+ commit: bool = True
190
+ )
191
+ ```
192
+
193
+ - `metrics`: Dict of metric names to values
194
+ - `step`: Step number (auto-increments if not provided)
195
+ - `mode`: Default mode for keys without slashes (default: `"train"`)
196
+ - `commit`: If `False`, buffer metrics until next `commit=True`
197
+
198
+ ### `expt_logger.log_rollout()`
199
+
200
+ ```python
201
+ log_rollout(
202
+ prompt: str,
203
+ messages: list[dict[str, str]],
204
+ rewards: dict[str, float],
205
+ step: int | None = None,
206
+ mode: str = "train"
207
+ )
208
+ ```
209
+
210
+ - `prompt`: The prompt text
211
+ - `messages`: List of `{"role": ..., "content": ...}` dicts
212
+ - `rewards`: Dict of reward names to values
213
+ - `step`: Step number (uses current step if not provided)
214
+ - `mode`: `"train"` or `"eval"`
215
+
216
+ ### `expt_logger.flush()` / `expt_logger.end()`
217
+
218
+ - `flush()`: Manually send buffered data to server
219
+ - `end()`: Finish the run (called automatically on exit)
220
+
221
+ ## Advanced
222
+
223
+ ### Context Manager
224
+
225
+ Ensures automatic cleanup:
226
+
227
+ ```python
228
+ with expt_logger.init(name="my-run") as run:
229
+ expt_logger.log({"loss": 0.5})
230
+ # end() called automatically
231
+ ```
232
+
233
+ ### Graceful Shutdown
234
+
235
+ The library handles cleanup on:
236
+ - Normal exit (`atexit`)
237
+ - Ctrl+C (`SIGINT`)
238
+ - `SIGTERM`
239
+
240
+ All buffered data is flushed before exit.
241
+
242
+ ## Development
243
+
244
+ For local development, see [DEVELOPMENT.md](DEVELOPMENT.md).
245
+
246
+ Run the demo:
247
+
248
+ ```bash
249
+ python demo.py # GRPO-style training simulation
250
+ python demo.py commit # Batching demo
251
+ python demo.py messages # Structured messages demo
252
+ ```
@@ -0,0 +1,9 @@
1
+ expt_logger/__init__.py,sha256=co2y2fVem9QlwcsE1IlrjbuykW0APpyxPllQpu1rSDk,5360
2
+ expt_logger/client.py,sha256=SQIpVO0GWN3E51w5bHfvT7OhB5Vm1AdPB3sNZrNNt48,6771
3
+ expt_logger/config.py,sha256=mImXM-cdiksBzCSVMortUXChYXn5Zx_kWLaL-CYQJbA,177
4
+ expt_logger/run.py,sha256=OjtibIs9nCI-qtGfc3WYeRLwzf8zogDwNxqJhdGb8ls,10270
5
+ expt_logger/types.py,sha256=RrVi935U7Q5kBBbY1XcrBBT_Kl03gDhbiegSUxU0y7U,2061
6
+ expt_logger/utils.py,sha256=a9IsjzUSClNl2lxCp30xyi5eYC0K8LFcu3ohX7a7qyE,1473
7
+ expt_logger-0.1.0.dev0.dist-info/METADATA,sha256=QojqT89iufcQwAE2DJML388bUiqKYplJC-VY-6-Vshg,5656
8
+ expt_logger-0.1.0.dev0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
9
+ expt_logger-0.1.0.dev0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any