expt-logger 0.1.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- expt_logger/__init__.py +205 -0
- expt_logger/client.py +207 -0
- expt_logger/config.py +5 -0
- expt_logger/run.py +317 -0
- expt_logger/types.py +90 -0
- expt_logger/utils.py +49 -0
- expt_logger-0.1.0.dev0.dist-info/METADATA +252 -0
- expt_logger-0.1.0.dev0.dist-info/RECORD +9 -0
- expt_logger-0.1.0.dev0.dist-info/WHEEL +4 -0
expt_logger/__init__.py
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
"""
|
|
2
|
+
expt_logger - Simple experiment tracking library.
|
|
3
|
+
|
|
4
|
+
Usage:
|
|
5
|
+
import expt_logger
|
|
6
|
+
|
|
7
|
+
run = expt_logger.init(name="experiment-1")
|
|
8
|
+
|
|
9
|
+
for step in range(100):
|
|
10
|
+
expt_logger.log({"train/loss": 0.5, "train/accuracy": 0.9})
|
|
11
|
+
|
|
12
|
+
expt_logger.end()
|
|
13
|
+
|
|
14
|
+
Or with context manager:
|
|
15
|
+
with expt_logger.init(name="my-experiment") as run:
|
|
16
|
+
expt_logger.log({"loss": 0.5})
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import os
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
from .client import APIError, Client
|
|
25
|
+
from .config import DEFAULT_BASE_URL
|
|
26
|
+
from .run import Run
|
|
27
|
+
from .types import Config, Message, Reward, Rollout, Scalar
|
|
28
|
+
from .utils import parse_conversation
|
|
29
|
+
|
|
30
|
+
__version__ = "0.1.0"
|
|
31
|
+
__all__ = [
|
|
32
|
+
# Main API
|
|
33
|
+
"init",
|
|
34
|
+
"log",
|
|
35
|
+
"log_rollout",
|
|
36
|
+
"flush",
|
|
37
|
+
"end",
|
|
38
|
+
# Global state
|
|
39
|
+
"run",
|
|
40
|
+
"config",
|
|
41
|
+
# Classes
|
|
42
|
+
"Run",
|
|
43
|
+
"Config",
|
|
44
|
+
"Client",
|
|
45
|
+
"APIError",
|
|
46
|
+
# Types
|
|
47
|
+
"Scalar",
|
|
48
|
+
"Rollout",
|
|
49
|
+
"Message",
|
|
50
|
+
"Reward",
|
|
51
|
+
# Utils
|
|
52
|
+
"parse_conversation",
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
# Global run instance
|
|
56
|
+
_current_run: Run | None = None
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def init(
|
|
60
|
+
name: str | None = None,
|
|
61
|
+
config: dict[str, Any] | None = None,
|
|
62
|
+
api_key: str | None = None,
|
|
63
|
+
base_url: str | None = None,
|
|
64
|
+
) -> Run:
|
|
65
|
+
"""
|
|
66
|
+
Initialize a new experiment run.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
name: Experiment name. Auto-generated if not provided.
|
|
70
|
+
config: Initial configuration dictionary.
|
|
71
|
+
api_key: API key. Falls back to EXPT_LOGGER_API_KEY environment variable.
|
|
72
|
+
base_url: API server URL.
|
|
73
|
+
Falls back to EXPT_LOGGER_BASE_URL env var or configured default.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Run instance (also accessible via expt_logger.run).
|
|
77
|
+
|
|
78
|
+
Example:
|
|
79
|
+
run = expt_logger.init(
|
|
80
|
+
name="my-experiment",
|
|
81
|
+
config={"lr": 0.001, "batch_size": 32}
|
|
82
|
+
)
|
|
83
|
+
"""
|
|
84
|
+
global _current_run
|
|
85
|
+
|
|
86
|
+
# Finish any existing run
|
|
87
|
+
if _current_run is not None:
|
|
88
|
+
_current_run.end()
|
|
89
|
+
|
|
90
|
+
# Resolve API key
|
|
91
|
+
resolved_api_key = api_key or os.environ.get("EXPT_LOGGER_API_KEY")
|
|
92
|
+
if not resolved_api_key:
|
|
93
|
+
raise ValueError("API key required. Pass api_key or set EXPT_LOGGER_API_KEY env variable.")
|
|
94
|
+
|
|
95
|
+
# Resolve base URL
|
|
96
|
+
resolved_base_url = base_url or os.environ.get("EXPT_LOGGER_BASE_URL", DEFAULT_BASE_URL)
|
|
97
|
+
|
|
98
|
+
_current_run = Run(
|
|
99
|
+
name=name,
|
|
100
|
+
config=config,
|
|
101
|
+
api_key=resolved_api_key,
|
|
102
|
+
base_url=resolved_base_url,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
return _current_run
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _get_run() -> Run:
|
|
109
|
+
"""Get the current run, raising if not initialized."""
|
|
110
|
+
if _current_run is None:
|
|
111
|
+
raise RuntimeError("No active run. Call expt_logger.init() first.")
|
|
112
|
+
return _current_run
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def run() -> Run | None:
|
|
116
|
+
"""Get the current active run."""
|
|
117
|
+
return _current_run
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def config() -> Config:
|
|
121
|
+
"""Get the current run's config."""
|
|
122
|
+
return _get_run().config
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def log(
|
|
126
|
+
metrics: dict[str, float],
|
|
127
|
+
step: int | None = None,
|
|
128
|
+
mode: str | None = None,
|
|
129
|
+
commit: bool = True,
|
|
130
|
+
) -> None:
|
|
131
|
+
"""
|
|
132
|
+
Log scalar metrics to the current run.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
metrics: Dictionary of metric names to values.
|
|
136
|
+
Use slash prefix for mode: "train/loss", "eval/accuracy"
|
|
137
|
+
step: Step number. Auto-increments if not provided.
|
|
138
|
+
mode: Default mode for metrics without slash prefix.
|
|
139
|
+
commit: If False, buffer metrics until next commit=True call.
|
|
140
|
+
|
|
141
|
+
Example:
|
|
142
|
+
expt_logger.log({"loss": 0.5, "accuracy": 0.9})
|
|
143
|
+
expt_logger.log({"train/loss": 0.5, "eval/loss": 0.6}, step=10)
|
|
144
|
+
|
|
145
|
+
# Multiple metrics at same step
|
|
146
|
+
expt_logger.log({"train/loss": 0.5}, commit=False)
|
|
147
|
+
expt_logger.log({"train/acc": 0.9}) # commits both
|
|
148
|
+
"""
|
|
149
|
+
_get_run().log(metrics, step=step, mode=mode, commit=commit)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def log_rollout(
|
|
153
|
+
prompt: str,
|
|
154
|
+
messages: list[dict[str, str]] | str,
|
|
155
|
+
rewards: dict[str, float] | list[dict[str, float | str]],
|
|
156
|
+
step: int | None = None,
|
|
157
|
+
mode: str = "train",
|
|
158
|
+
) -> None:
|
|
159
|
+
"""
|
|
160
|
+
Log a conversation rollout to the current run.
|
|
161
|
+
|
|
162
|
+
Args:
|
|
163
|
+
prompt: The prompt text.
|
|
164
|
+
messages: Either a list of message dicts [{"role": "...", "content": "..."}]
|
|
165
|
+
or a string that will be parsed into messages.
|
|
166
|
+
rewards: Either a dict {"reward_name": value} or list [{"name": ..., "value": ...}]
|
|
167
|
+
step: Step number. Uses current step if not provided.
|
|
168
|
+
mode: "train" or "eval".
|
|
169
|
+
|
|
170
|
+
Example:
|
|
171
|
+
expt_logger.log_rollout(
|
|
172
|
+
prompt="What is 2+2?",
|
|
173
|
+
messages=[
|
|
174
|
+
{"role": "assistant", "content": "2+2 equals 4."},
|
|
175
|
+
{"role": "user", "content": "Thanks!"},
|
|
176
|
+
],
|
|
177
|
+
rewards={"correctness": 1.0, "clarity": 0.9},
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Or with string parsing:
|
|
181
|
+
expt_logger.log_rollout(
|
|
182
|
+
prompt="Explain gravity",
|
|
183
|
+
messages="Assistant: Gravity is a force...\\nUser: Can you elaborate?",
|
|
184
|
+
rewards={"quality": 0.8},
|
|
185
|
+
)
|
|
186
|
+
"""
|
|
187
|
+
_get_run().log_rollout(prompt, messages, rewards, step=step, mode=mode)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def flush() -> None:
|
|
191
|
+
"""Manually flush buffered data to the server."""
|
|
192
|
+
_get_run().flush()
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def end() -> None:
|
|
196
|
+
"""
|
|
197
|
+
Finish the current run.
|
|
198
|
+
|
|
199
|
+
This is called automatically on program exit, but can be called
|
|
200
|
+
explicitly to end a run early.
|
|
201
|
+
"""
|
|
202
|
+
global _current_run
|
|
203
|
+
if _current_run is not None:
|
|
204
|
+
_current_run.end()
|
|
205
|
+
_current_run = None
|
expt_logger/client.py
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""HTTP client for the experiment tracking API."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any, cast
|
|
5
|
+
|
|
6
|
+
import httpx
|
|
7
|
+
|
|
8
|
+
from .types import Rollout, Scalar
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class APIError(Exception):
|
|
14
|
+
"""Raised when an API request fails."""
|
|
15
|
+
|
|
16
|
+
def __init__(self, message: str, status_code: int | None = None):
|
|
17
|
+
super().__init__(message)
|
|
18
|
+
self.status_code = status_code
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class Client:
|
|
22
|
+
"""HTTP client for the experiment tracking API."""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
base_url: str,
|
|
27
|
+
api_key: str | None = None,
|
|
28
|
+
timeout: int = 30,
|
|
29
|
+
):
|
|
30
|
+
self.base_url = base_url.rstrip("/")
|
|
31
|
+
self.api_key = api_key
|
|
32
|
+
self.timeout = timeout
|
|
33
|
+
|
|
34
|
+
headers = {"Content-Type": "application/json"}
|
|
35
|
+
if api_key:
|
|
36
|
+
headers["x-api-key"] = api_key
|
|
37
|
+
|
|
38
|
+
self._client = httpx.Client(headers=headers, timeout=timeout)
|
|
39
|
+
|
|
40
|
+
def create_experiment(
|
|
41
|
+
self, name: str | None = None, config: dict[str, Any] | None = None
|
|
42
|
+
) -> str:
|
|
43
|
+
"""
|
|
44
|
+
Create a new experiment.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
name: Experiment name. If not provided, a random name is generated on the server.
|
|
48
|
+
config: Initial experiment configuration.
|
|
49
|
+
|
|
50
|
+
Returns the experiment ID.
|
|
51
|
+
"""
|
|
52
|
+
payload: dict[str, Any] = {}
|
|
53
|
+
if name is not None:
|
|
54
|
+
payload["name"] = name
|
|
55
|
+
if config is not None:
|
|
56
|
+
payload["config"] = config
|
|
57
|
+
|
|
58
|
+
response = self._request("POST", "/api/experiments", json=payload)
|
|
59
|
+
return cast(str, response["experimentId"])
|
|
60
|
+
|
|
61
|
+
def log_scalars(self, experiment_id: str, scalars: list[Scalar]) -> dict[str, Any]:
|
|
62
|
+
"""Log scalar metrics for an experiment (non-blocking)."""
|
|
63
|
+
payload = {
|
|
64
|
+
"scalars": [
|
|
65
|
+
{
|
|
66
|
+
"step": s.step,
|
|
67
|
+
"mode": s.mode,
|
|
68
|
+
"type": s.type,
|
|
69
|
+
"value": s.value,
|
|
70
|
+
}
|
|
71
|
+
for s in scalars
|
|
72
|
+
]
|
|
73
|
+
}
|
|
74
|
+
return self._request(
|
|
75
|
+
"POST",
|
|
76
|
+
f"/api/experiments/{experiment_id}/scalars",
|
|
77
|
+
json=payload,
|
|
78
|
+
fire_and_forget=True,
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def log_rollouts(self, experiment_id: str, rollouts: list[Rollout]) -> dict[str, Any]:
|
|
82
|
+
"""Log rollouts for an experiment (non-blocking)."""
|
|
83
|
+
payload = {
|
|
84
|
+
"rollouts": [
|
|
85
|
+
{
|
|
86
|
+
"step": r.step,
|
|
87
|
+
"mode": r.mode,
|
|
88
|
+
"promptText": r.prompt_text,
|
|
89
|
+
"messages": [{"role": m.role, "content": m.content} for m in r.messages],
|
|
90
|
+
"rewards": [{"name": rw.name, "value": rw.value} for rw in r.rewards],
|
|
91
|
+
}
|
|
92
|
+
for r in rollouts
|
|
93
|
+
]
|
|
94
|
+
}
|
|
95
|
+
return self._request(
|
|
96
|
+
"POST",
|
|
97
|
+
f"/api/experiments/{experiment_id}/rollouts",
|
|
98
|
+
json=payload,
|
|
99
|
+
fire_and_forget=True,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
def update_experiment(
|
|
103
|
+
self,
|
|
104
|
+
experiment_id: str,
|
|
105
|
+
name: str | None = None,
|
|
106
|
+
config: dict[str, Any] | None = None,
|
|
107
|
+
status: str | None = None,
|
|
108
|
+
) -> dict[str, Any]:
|
|
109
|
+
"""
|
|
110
|
+
Update an experiment's name, config, and/or status.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
experiment_id: The experiment ID.
|
|
114
|
+
name: New experiment name (optional).
|
|
115
|
+
config: New experiment configuration (optional).
|
|
116
|
+
status: New experiment status (optional).
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
Response with success status.
|
|
120
|
+
"""
|
|
121
|
+
if name is None and config is None and status is None:
|
|
122
|
+
raise ValueError("At least one of name, config, or status must be provided")
|
|
123
|
+
|
|
124
|
+
payload: dict[str, Any] = {"id": experiment_id}
|
|
125
|
+
if name is not None:
|
|
126
|
+
payload["name"] = name
|
|
127
|
+
if config is not None:
|
|
128
|
+
payload["config"] = config
|
|
129
|
+
if status is not None:
|
|
130
|
+
payload["status"] = status
|
|
131
|
+
|
|
132
|
+
return self._request("PUT", "/api/experiments", json=payload)
|
|
133
|
+
|
|
134
|
+
def log_config(self, experiment_id: str, config: dict[str, Any]) -> dict[str, Any]:
|
|
135
|
+
"""Log configuration for an experiment."""
|
|
136
|
+
return self.update_experiment(experiment_id, config=config)
|
|
137
|
+
|
|
138
|
+
def end_experiment(self, experiment_id: str) -> dict[str, Any]:
|
|
139
|
+
"""Mark an experiment as finished."""
|
|
140
|
+
return self.update_experiment(experiment_id, status="complete")
|
|
141
|
+
|
|
142
|
+
def _request(
|
|
143
|
+
self,
|
|
144
|
+
method: str,
|
|
145
|
+
path: str,
|
|
146
|
+
json: dict[str, Any] | None = None,
|
|
147
|
+
fire_and_forget: bool = False,
|
|
148
|
+
) -> dict[str, Any]:
|
|
149
|
+
"""
|
|
150
|
+
Make an HTTP request to the API.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
method: HTTP method (GET, POST, etc.)
|
|
154
|
+
path: API path
|
|
155
|
+
json: JSON payload
|
|
156
|
+
fire_and_forget: If True, send request without waiting for response.
|
|
157
|
+
Used for logging operations to avoid blocking.
|
|
158
|
+
"""
|
|
159
|
+
url = f"{self.base_url}{path}"
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
if fire_and_forget:
|
|
163
|
+
# Fire and forget - don't wait for response
|
|
164
|
+
# This makes logging operations non-blocking
|
|
165
|
+
response = self._client.post(url, json=json)
|
|
166
|
+
# Log errors but don't raise them
|
|
167
|
+
if not response.is_success:
|
|
168
|
+
try:
|
|
169
|
+
error_data = response.json()
|
|
170
|
+
error_msg = error_data.get("error", response.text)
|
|
171
|
+
except ValueError:
|
|
172
|
+
error_msg = response.text
|
|
173
|
+
logger.warning(
|
|
174
|
+
f"Fire-and-forget request failed: {method} {path} "
|
|
175
|
+
f"returned {response.status_code}: {error_msg}"
|
|
176
|
+
)
|
|
177
|
+
return {}
|
|
178
|
+
|
|
179
|
+
response = self._client.request(
|
|
180
|
+
method=method,
|
|
181
|
+
url=url,
|
|
182
|
+
json=json,
|
|
183
|
+
)
|
|
184
|
+
except httpx.RequestError as e:
|
|
185
|
+
if fire_and_forget:
|
|
186
|
+
# Silently fail for fire-and-forget requests
|
|
187
|
+
logger.warning(f"Fire-and-forget request exception: {method} {path} - {e}")
|
|
188
|
+
return {}
|
|
189
|
+
raise APIError(f"Request failed: {e}") from e
|
|
190
|
+
|
|
191
|
+
if not response.is_success:
|
|
192
|
+
try:
|
|
193
|
+
error_data = response.json()
|
|
194
|
+
message = error_data.get("error", response.text)
|
|
195
|
+
except ValueError:
|
|
196
|
+
message = response.text
|
|
197
|
+
raise APIError(message, status_code=response.status_code)
|
|
198
|
+
|
|
199
|
+
# Handle empty responses
|
|
200
|
+
if not response.text:
|
|
201
|
+
return {}
|
|
202
|
+
|
|
203
|
+
return cast(dict[str, Any], response.json())
|
|
204
|
+
|
|
205
|
+
def close(self) -> None:
|
|
206
|
+
"""Close the HTTP client."""
|
|
207
|
+
self._client.close()
|
expt_logger/config.py
ADDED
expt_logger/run.py
ADDED
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
"""Run class for experiment tracking."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import atexit
|
|
6
|
+
import signal
|
|
7
|
+
import sys
|
|
8
|
+
import threading
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from .client import Client
|
|
12
|
+
from .config import DEFAULT_BASE_URL
|
|
13
|
+
from .types import Config, Message, Reward, Rollout, Scalar
|
|
14
|
+
from .utils import parse_conversation, parse_metric_key
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Run:
|
|
18
|
+
"""
|
|
19
|
+
A single experiment run.
|
|
20
|
+
|
|
21
|
+
Tracks metrics, rollouts, and configuration for an experiment.
|
|
22
|
+
Handles automatic cleanup on exit or interrupt.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
name: str | None = None,
|
|
28
|
+
config: dict[str, Any] | None = None,
|
|
29
|
+
api_key: str | None = None,
|
|
30
|
+
base_url: str = DEFAULT_BASE_URL,
|
|
31
|
+
):
|
|
32
|
+
self.name = name # May be None initially; set after server response
|
|
33
|
+
self.config = Config()
|
|
34
|
+
|
|
35
|
+
if config:
|
|
36
|
+
self.config.update(config)
|
|
37
|
+
|
|
38
|
+
# Internal state
|
|
39
|
+
self._client = Client(base_url=base_url, api_key=api_key)
|
|
40
|
+
self._base_url = base_url
|
|
41
|
+
self._experiment_id: str | None = None
|
|
42
|
+
self._current_step = 1
|
|
43
|
+
self._scalar_buffer: list[Scalar] = []
|
|
44
|
+
self._rollout_buffer: list[Rollout] = []
|
|
45
|
+
self._lock = threading.Lock()
|
|
46
|
+
self._finished = False
|
|
47
|
+
|
|
48
|
+
# Track pending metrics for commit=False behavior
|
|
49
|
+
self._pending_metrics: dict[str, float] = {}
|
|
50
|
+
self._pending_step: int | None = None
|
|
51
|
+
self._pending_mode: str | None = None
|
|
52
|
+
|
|
53
|
+
# Create experiment on server
|
|
54
|
+
self._init_experiment()
|
|
55
|
+
|
|
56
|
+
# Setup exit handlers
|
|
57
|
+
self._setup_exit_handlers()
|
|
58
|
+
|
|
59
|
+
def _init_experiment(self) -> None:
|
|
60
|
+
"""Create the experiment on the server."""
|
|
61
|
+
# Pass both name and config to the server
|
|
62
|
+
# If name is None, server will generate a random one
|
|
63
|
+
config_dict = self.config.to_dict() if self.config.to_dict() else None
|
|
64
|
+
self._experiment_id = self._client.create_experiment(self.name, config_dict)
|
|
65
|
+
|
|
66
|
+
def _setup_exit_handlers(self) -> None:
|
|
67
|
+
"""Register cleanup handlers for graceful shutdown."""
|
|
68
|
+
atexit.register(self._cleanup)
|
|
69
|
+
|
|
70
|
+
# Store original signal handlers
|
|
71
|
+
self._original_handlers: dict[signal.Signals, Any] = {}
|
|
72
|
+
|
|
73
|
+
for sig in (signal.SIGINT, signal.SIGTERM):
|
|
74
|
+
self._original_handlers[sig] = signal.getsignal(sig)
|
|
75
|
+
signal.signal(sig, self._signal_handler)
|
|
76
|
+
|
|
77
|
+
def _signal_handler(self, signum: int, frame: Any) -> None:
|
|
78
|
+
"""Handle interrupt signals."""
|
|
79
|
+
self._cleanup()
|
|
80
|
+
|
|
81
|
+
# Restore original handler and re-raise
|
|
82
|
+
sig = signal.Signals(signum)
|
|
83
|
+
original = self._original_handlers.get(sig)
|
|
84
|
+
signal.signal(sig, original or signal.SIG_DFL)
|
|
85
|
+
|
|
86
|
+
if signum == signal.SIGINT:
|
|
87
|
+
raise KeyboardInterrupt
|
|
88
|
+
else:
|
|
89
|
+
sys.exit(128 + signum)
|
|
90
|
+
|
|
91
|
+
def _cleanup(self) -> None:
|
|
92
|
+
"""Flush buffers and mark experiment as finished."""
|
|
93
|
+
with self._lock:
|
|
94
|
+
if self._finished:
|
|
95
|
+
return
|
|
96
|
+
self._finished = True
|
|
97
|
+
|
|
98
|
+
# Commit any pending metrics
|
|
99
|
+
self._commit_pending()
|
|
100
|
+
|
|
101
|
+
# Flush remaining data
|
|
102
|
+
self.flush()
|
|
103
|
+
|
|
104
|
+
# Mark as finished on server
|
|
105
|
+
if self._experiment_id:
|
|
106
|
+
try:
|
|
107
|
+
self._client.end_experiment(self._experiment_id)
|
|
108
|
+
except Exception:
|
|
109
|
+
pass # Best effort
|
|
110
|
+
|
|
111
|
+
# Cleanup
|
|
112
|
+
self._client.close()
|
|
113
|
+
|
|
114
|
+
# Unregister atexit
|
|
115
|
+
try:
|
|
116
|
+
atexit.unregister(self._cleanup)
|
|
117
|
+
except Exception:
|
|
118
|
+
pass
|
|
119
|
+
|
|
120
|
+
def log(
|
|
121
|
+
self,
|
|
122
|
+
metrics: dict[str, float],
|
|
123
|
+
step: int | None = None,
|
|
124
|
+
mode: str | None = None,
|
|
125
|
+
commit: bool = True,
|
|
126
|
+
) -> None:
|
|
127
|
+
"""
|
|
128
|
+
Log scalar metrics.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
metrics: Dictionary of metric names to values.
|
|
132
|
+
Use slash prefix for mode: "train/loss", "eval/accuracy"
|
|
133
|
+
step: Step number. Auto-increments if not provided.
|
|
134
|
+
mode: Default mode for metrics without slash prefix.
|
|
135
|
+
If not specified, defaults to "train".
|
|
136
|
+
commit: If False, buffer metrics until next commit=True call.
|
|
137
|
+
Useful for logging multiple metrics at the same step.
|
|
138
|
+
|
|
139
|
+
Note:
|
|
140
|
+
If mode is specified, all metrics should either have slash prefixes
|
|
141
|
+
or none should have slash prefixes. Mixing both styles is not recommended
|
|
142
|
+
as it may lead to unexpected behavior.
|
|
143
|
+
"""
|
|
144
|
+
if self._finished:
|
|
145
|
+
return
|
|
146
|
+
|
|
147
|
+
# Check for conflicting usage: mode param with slash-prefixed keys
|
|
148
|
+
has_slash_keys = any("/" in key for key in metrics.keys())
|
|
149
|
+
if mode is not None and has_slash_keys:
|
|
150
|
+
raise ValueError(
|
|
151
|
+
"Cannot specify 'mode' parameter when metric keys contain slashes. "
|
|
152
|
+
"Either use slash-prefixed keys like 'train/loss' OR use the mode "
|
|
153
|
+
"parameter, not both."
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# Determine step
|
|
157
|
+
if step is None:
|
|
158
|
+
if self._pending_step is not None:
|
|
159
|
+
step = self._pending_step
|
|
160
|
+
else:
|
|
161
|
+
step = self._current_step
|
|
162
|
+
|
|
163
|
+
default_mode = mode or self._pending_mode or "train"
|
|
164
|
+
|
|
165
|
+
# Accumulate metrics
|
|
166
|
+
for key, value in metrics.items():
|
|
167
|
+
parsed_mode, metric_name = parse_metric_key(key)
|
|
168
|
+
# Use parsed mode from key, or fall back to default
|
|
169
|
+
final_mode = parsed_mode if "/" in key else default_mode
|
|
170
|
+
self._pending_metrics[f"{final_mode}/{metric_name}"] = value
|
|
171
|
+
|
|
172
|
+
self._pending_step = step
|
|
173
|
+
self._pending_mode = default_mode
|
|
174
|
+
|
|
175
|
+
if commit:
|
|
176
|
+
self._commit_pending()
|
|
177
|
+
|
|
178
|
+
def _commit_pending(self) -> None:
|
|
179
|
+
"""Commit all pending metrics to the buffer."""
|
|
180
|
+
if not self._pending_metrics or self._pending_step is None:
|
|
181
|
+
return
|
|
182
|
+
|
|
183
|
+
with self._lock:
|
|
184
|
+
for key, value in self._pending_metrics.items():
|
|
185
|
+
mode, metric_name = parse_metric_key(key)
|
|
186
|
+
self._scalar_buffer.append(
|
|
187
|
+
Scalar(
|
|
188
|
+
step=self._pending_step,
|
|
189
|
+
mode=mode,
|
|
190
|
+
type=metric_name,
|
|
191
|
+
value=value,
|
|
192
|
+
)
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
# Auto-increment step for next log call
|
|
196
|
+
self._current_step = self._pending_step + 1
|
|
197
|
+
|
|
198
|
+
# Clear pending state
|
|
199
|
+
self._pending_metrics = {}
|
|
200
|
+
self._pending_step = None
|
|
201
|
+
self._pending_mode = None
|
|
202
|
+
|
|
203
|
+
def log_rollout(
|
|
204
|
+
self,
|
|
205
|
+
prompt: str,
|
|
206
|
+
messages: list[dict[str, str]] | str,
|
|
207
|
+
rewards: dict[str, float] | list[dict[str, float | str]],
|
|
208
|
+
step: int | None = None,
|
|
209
|
+
mode: str = "train",
|
|
210
|
+
) -> None:
|
|
211
|
+
"""
|
|
212
|
+
Log a conversation rollout.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
prompt: The prompt text.
|
|
216
|
+
messages: Either a list of message dicts [{"role": "...", "content": "..."}]
|
|
217
|
+
or a string that will be parsed into messages.
|
|
218
|
+
rewards: Either a dict {"reward_name": value} or list [{"name": ..., "value": ...}]
|
|
219
|
+
step: Step number. Uses current step if not provided.
|
|
220
|
+
mode: "train" or "eval".
|
|
221
|
+
"""
|
|
222
|
+
if self._finished:
|
|
223
|
+
return
|
|
224
|
+
|
|
225
|
+
if step is None:
|
|
226
|
+
step = self._current_step
|
|
227
|
+
|
|
228
|
+
# Parse messages if string
|
|
229
|
+
if isinstance(messages, str):
|
|
230
|
+
parsed = parse_conversation(messages)
|
|
231
|
+
else:
|
|
232
|
+
parsed = messages
|
|
233
|
+
|
|
234
|
+
# Convert to Message objects
|
|
235
|
+
message_objs = [Message(role=m["role"], content=m["content"]) for m in parsed]
|
|
236
|
+
|
|
237
|
+
# Parse rewards
|
|
238
|
+
if isinstance(rewards, dict):
|
|
239
|
+
reward_objs = [Reward(name=k, value=v) for k, v in rewards.items()]
|
|
240
|
+
else:
|
|
241
|
+
reward_objs = [Reward(name=str(r["name"]), value=float(r["value"])) for r in rewards]
|
|
242
|
+
|
|
243
|
+
with self._lock:
|
|
244
|
+
self._rollout_buffer.append(
|
|
245
|
+
Rollout(
|
|
246
|
+
step=step,
|
|
247
|
+
mode=mode,
|
|
248
|
+
prompt_text=prompt,
|
|
249
|
+
messages=message_objs,
|
|
250
|
+
rewards=reward_objs,
|
|
251
|
+
)
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
def flush(self) -> None:
|
|
255
|
+
"""Send all buffered data to the server."""
|
|
256
|
+
if self._experiment_id is None:
|
|
257
|
+
return
|
|
258
|
+
|
|
259
|
+
# Commit any pending metrics first
|
|
260
|
+
self._commit_pending()
|
|
261
|
+
|
|
262
|
+
with self._lock:
|
|
263
|
+
scalars = self._scalar_buffer.copy()
|
|
264
|
+
rollouts = self._rollout_buffer.copy()
|
|
265
|
+
self._scalar_buffer.clear()
|
|
266
|
+
self._rollout_buffer.clear()
|
|
267
|
+
|
|
268
|
+
# Send to server
|
|
269
|
+
if scalars:
|
|
270
|
+
try:
|
|
271
|
+
self._client.log_scalars(self._experiment_id, scalars)
|
|
272
|
+
except Exception as e:
|
|
273
|
+
print(f"Warning: Failed to log scalars: {e}")
|
|
274
|
+
|
|
275
|
+
if rollouts:
|
|
276
|
+
try:
|
|
277
|
+
self._client.log_rollouts(self._experiment_id, rollouts)
|
|
278
|
+
except Exception as e:
|
|
279
|
+
print(f"Warning: Failed to log rollouts: {e}")
|
|
280
|
+
|
|
281
|
+
def end(self) -> None:
|
|
282
|
+
"""Explicitly finish the run."""
|
|
283
|
+
self._cleanup()
|
|
284
|
+
|
|
285
|
+
@property
|
|
286
|
+
def id(self) -> str | None:
|
|
287
|
+
"""Return the experiment ID."""
|
|
288
|
+
return self._experiment_id
|
|
289
|
+
|
|
290
|
+
@property
|
|
291
|
+
def step(self) -> int:
|
|
292
|
+
"""Return the current step."""
|
|
293
|
+
return self._current_step
|
|
294
|
+
|
|
295
|
+
@property
|
|
296
|
+
def base_url(self) -> str:
|
|
297
|
+
"""Return the base URL of the experiment tracking server."""
|
|
298
|
+
return self._base_url
|
|
299
|
+
|
|
300
|
+
@property
|
|
301
|
+
def experiment_url(self) -> str | None:
|
|
302
|
+
"""Return the full URL to view this experiment in the web interface."""
|
|
303
|
+
if self._experiment_id is None:
|
|
304
|
+
return None
|
|
305
|
+
# Remove /api prefix if present and construct experiment URL
|
|
306
|
+
base = self._base_url.rstrip("/")
|
|
307
|
+
return f"{base}/experiments/{self._experiment_id}"
|
|
308
|
+
|
|
309
|
+
def __enter__(self) -> Run:
|
|
310
|
+
return self
|
|
311
|
+
|
|
312
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
313
|
+
self.end()
|
|
314
|
+
# Don't suppress exceptions
|
|
315
|
+
|
|
316
|
+
def __repr__(self) -> str:
|
|
317
|
+
return f"Run(name={self.name!r}, id={self._experiment_id!r}, step={self._current_step})"
|
expt_logger/types.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Type definitions for the expt_logger library."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass, field
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@dataclass
|
|
8
|
+
class Message:
|
|
9
|
+
"""A single message in a conversation."""
|
|
10
|
+
|
|
11
|
+
role: str # "user", "assistant", "system"
|
|
12
|
+
content: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class Reward:
|
|
17
|
+
"""A named reward value."""
|
|
18
|
+
|
|
19
|
+
name: str
|
|
20
|
+
value: float
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class Scalar:
|
|
25
|
+
"""A scalar metric logged at a specific step."""
|
|
26
|
+
|
|
27
|
+
step: int
|
|
28
|
+
mode: str
|
|
29
|
+
type: str # metric name
|
|
30
|
+
value: float
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class Rollout:
|
|
35
|
+
"""A conversation rollout with rewards."""
|
|
36
|
+
|
|
37
|
+
step: int
|
|
38
|
+
mode: str
|
|
39
|
+
prompt_text: str
|
|
40
|
+
messages: list[Message]
|
|
41
|
+
rewards: list[Reward]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class Config:
|
|
46
|
+
"""
|
|
47
|
+
Experiment configuration that supports both dict-style and attribute-style access.
|
|
48
|
+
|
|
49
|
+
Usage:
|
|
50
|
+
config = Config()
|
|
51
|
+
config.learning_rate = 0.001
|
|
52
|
+
config["batch_size"] = 32
|
|
53
|
+
config.update({"epochs": 10})
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
_data: dict[str, Any] = field(default_factory=dict)
|
|
57
|
+
|
|
58
|
+
def __setattr__(self, name: str, value: Any) -> None:
|
|
59
|
+
if name == "_data":
|
|
60
|
+
object.__setattr__(self, name, value)
|
|
61
|
+
else:
|
|
62
|
+
self._data[name] = value
|
|
63
|
+
|
|
64
|
+
def __getattr__(self, name: str) -> Any:
|
|
65
|
+
if name == "_data":
|
|
66
|
+
return object.__getattribute__(self, name)
|
|
67
|
+
try:
|
|
68
|
+
return self._data[name]
|
|
69
|
+
except KeyError:
|
|
70
|
+
raise AttributeError(f"Config has no attribute '{name}'")
|
|
71
|
+
|
|
72
|
+
def __setitem__(self, key: str, value: Any) -> None:
|
|
73
|
+
self._data[key] = value
|
|
74
|
+
|
|
75
|
+
def __getitem__(self, key: str) -> Any:
|
|
76
|
+
return self._data[key]
|
|
77
|
+
|
|
78
|
+
def __contains__(self, key: str) -> bool:
|
|
79
|
+
return key in self._data
|
|
80
|
+
|
|
81
|
+
def update(self, data: dict[str, Any]) -> None:
|
|
82
|
+
"""Update config with a dictionary of values."""
|
|
83
|
+
self._data.update(data)
|
|
84
|
+
|
|
85
|
+
def to_dict(self) -> dict[str, Any]:
|
|
86
|
+
"""Return config as a plain dictionary."""
|
|
87
|
+
return self._data.copy()
|
|
88
|
+
|
|
89
|
+
def __repr__(self) -> str:
|
|
90
|
+
return f"Config({self._data})"
|
expt_logger/utils.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Utility functions for the expt_logger library."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from typing import TypeVar
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def get_env_var(name: str, default: str | None = None) -> str | None:
|
|
8
|
+
"""Get environment variable with optional default."""
|
|
9
|
+
return os.environ.get(name, default)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def parse_metric_key(key: str) -> tuple[str, str]:
|
|
13
|
+
"""
|
|
14
|
+
Parse a metric key into (mode, metric_name).
|
|
15
|
+
|
|
16
|
+
Examples:
|
|
17
|
+
"train/loss" -> ("train", "loss")
|
|
18
|
+
"eval/accuracy" -> ("eval", "accuracy")
|
|
19
|
+
"loss" -> ("train", "loss") # default mode
|
|
20
|
+
"""
|
|
21
|
+
if "/" in key:
|
|
22
|
+
parts = key.split("/", 1)
|
|
23
|
+
return parts[0], parts[1]
|
|
24
|
+
return "train", key
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def parse_conversation(text: str) -> list[dict[str, str]]:
|
|
28
|
+
"""
|
|
29
|
+
Parse a conversation string into a list of messages.
|
|
30
|
+
|
|
31
|
+
TODO: Implement parsing logic for different conversation formats:
|
|
32
|
+
- "User: hello\nAssistant: hi there"
|
|
33
|
+
- "Human: hello\nAssistant: hi there"
|
|
34
|
+
- "<user>hello</user><assistant>hi</assistant>"
|
|
35
|
+
|
|
36
|
+
Returns list of {"role": "user"|"assistant", "content": "..."}
|
|
37
|
+
"""
|
|
38
|
+
raise NotImplementedError(
|
|
39
|
+
"Conversation parsing from raw text is not yet implemented. "
|
|
40
|
+
"Please pass messages as a list of dicts with 'role' and 'content' keys."
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
T = TypeVar("T")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def chunk_list(lst: list[T], chunk_size: int) -> list[list[T]]:
|
|
48
|
+
"""Split a list into chunks of specified size."""
|
|
49
|
+
return [lst[i : i + chunk_size] for i in range(0, len(lst), chunk_size)]
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: expt-logger
|
|
3
|
+
Version: 0.1.0.dev0
|
|
4
|
+
Summary: Simple experiment logging library
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Requires-Dist: httpx>=0.27.0
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
|
|
9
|
+
# expt_logger
|
|
10
|
+
|
|
11
|
+
Simple experiment tracking for RL training with a W&B-style API.
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
**Install:**
|
|
16
|
+
```bash
|
|
17
|
+
uv add expt-logger
|
|
18
|
+
# or
|
|
19
|
+
pip install expt-logger
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
**Set your API key:**
|
|
23
|
+
```bash
|
|
24
|
+
export EXPT_LOGGER_API_KEY=your_api_key
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
**Start logging:**
|
|
28
|
+
```python
|
|
29
|
+
import expt_logger
|
|
30
|
+
|
|
31
|
+
# Initialize run with config
|
|
32
|
+
run = expt_logger.init(
|
|
33
|
+
name="grpo-math",
|
|
34
|
+
config={"lr": 3e-6, "batch_size": 8}
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Get experiment URLs
|
|
38
|
+
print(f"View experiment: {run.experiment_url}")
|
|
39
|
+
print(f"Base URL: {run.base_url}")
|
|
40
|
+
|
|
41
|
+
# Log RL rollouts with rewards
|
|
42
|
+
expt_logger.log_rollout(
|
|
43
|
+
prompt="What is 2+2?",
|
|
44
|
+
messages=[{"role": "assistant", "content": "The answer is 4."}],
|
|
45
|
+
rewards={"correctness": 1.0, "format": 0.9},
|
|
46
|
+
mode="train"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Log scalar metrics
|
|
50
|
+
expt_logger.log({
|
|
51
|
+
"train/loss": 0.45,
|
|
52
|
+
"train/kl": 0.02,
|
|
53
|
+
"train/reward": 0.85
|
|
54
|
+
})
|
|
55
|
+
|
|
56
|
+
expt_logger.end()
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Core Features
|
|
60
|
+
|
|
61
|
+
### Scalar Metrics
|
|
62
|
+
|
|
63
|
+
Log training metrics with automatic step tracking:
|
|
64
|
+
|
|
65
|
+
```python
|
|
66
|
+
# Auto-increment steps (defaults to "train" mode)
|
|
67
|
+
expt_logger.log({"loss": 0.5}) # step 0, train/loss
|
|
68
|
+
expt_logger.log({"loss": 0.4}) # step 1, train/loss
|
|
69
|
+
|
|
70
|
+
# Use slash prefixes for train/eval modes
|
|
71
|
+
expt_logger.log({
|
|
72
|
+
"train/loss": 0.5,
|
|
73
|
+
"eval/loss": 0.6
|
|
74
|
+
}, step=10)
|
|
75
|
+
|
|
76
|
+
# Or set mode explicitly
|
|
77
|
+
expt_logger.log({"loss": 0.5}, mode="eval")
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
**Note:** Metrics default to `"train"` mode when no mode is specified and keys don't have slash prefixes.
|
|
81
|
+
|
|
82
|
+
**Batching metrics** at the same step:
|
|
83
|
+
```python
|
|
84
|
+
expt_logger.log({"metric_a": 1.0}, commit=False)
|
|
85
|
+
expt_logger.log({"metric_b": 2.0}, commit=False)
|
|
86
|
+
expt_logger.log({"metric_c": 3.0}) # commits all three at step 0
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Rollouts (RL-specific)
|
|
90
|
+
|
|
91
|
+
Log conversation rollouts with multiple reward functions:
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
expt_logger.log_rollout(
|
|
95
|
+
prompt="Solve: x^2 - 5x + 6 = 0",
|
|
96
|
+
messages=[
|
|
97
|
+
{"role": "assistant", "content": "Let me factor this..."},
|
|
98
|
+
{"role": "user", "content": "Can you verify?"},
|
|
99
|
+
{"role": "assistant", "content": "Sure! (x-2)(x-3) = 0..."}
|
|
100
|
+
],
|
|
101
|
+
rewards={
|
|
102
|
+
"correctness": 1.0,
|
|
103
|
+
"format": 0.9,
|
|
104
|
+
"helpfulness": 0.85
|
|
105
|
+
},
|
|
106
|
+
step=5,
|
|
107
|
+
mode="train"
|
|
108
|
+
)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
- **Messages format:** List of dicts with `"role"` and `"content"` keys
|
|
112
|
+
- **Rewards format:** Dict of reward names to float values
|
|
113
|
+
- **Mode:** `"train"` or `"eval"` (default: `"train"`)
|
|
114
|
+
|
|
115
|
+
### Configuration
|
|
116
|
+
|
|
117
|
+
Track hyperparameters and update them dynamically:
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
run = expt_logger.init(config={"lr": 0.001, "batch_size": 32})
|
|
121
|
+
|
|
122
|
+
# Update config during training
|
|
123
|
+
run.config.lr = 0.0005 # attribute style
|
|
124
|
+
run.config["epochs"] = 100 # dict style
|
|
125
|
+
run.config.update({"model": "gpt2"}) # bulk update
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### API Key & Server Configuration
|
|
129
|
+
|
|
130
|
+
**API Key** (required):
|
|
131
|
+
```bash
|
|
132
|
+
export EXPT_LOGGER_API_KEY=your_api_key
|
|
133
|
+
```
|
|
134
|
+
Or pass directly:
|
|
135
|
+
```python
|
|
136
|
+
expt_logger.init(api_key="your_key")
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
**Custom server URL** (optional, for self-hosting):
|
|
140
|
+
```bash
|
|
141
|
+
export EXPT_LOGGER_BASE_URL=https://your-server.com
|
|
142
|
+
```
|
|
143
|
+
Or:
|
|
144
|
+
```python
|
|
145
|
+
expt_logger.init(base_url="https://your-server.com")
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Accessing Experiment URLs
|
|
149
|
+
|
|
150
|
+
Get the experiment URL and base URL from the run object:
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
run = expt_logger.init(name="my-experiment")
|
|
154
|
+
|
|
155
|
+
# Get the full experiment URL to view in browser
|
|
156
|
+
print(run.experiment_url)
|
|
157
|
+
# https://expt-platform.vercel.app/experiments/ccf1f879-50a6-492b-9072-fed6effac731
|
|
158
|
+
|
|
159
|
+
# Get the base URL of the tracking server
|
|
160
|
+
print(run.base_url)
|
|
161
|
+
# https://expt-platform.vercel.app
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## API Reference
|
|
165
|
+
|
|
166
|
+
### `expt_logger.init()`
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
init(
|
|
170
|
+
name: str | None = None,
|
|
171
|
+
config: dict[str, Any] | None = None,
|
|
172
|
+
api_key: str | None = None,
|
|
173
|
+
base_url: str | None = None
|
|
174
|
+
) -> Run
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
- `name`: Experiment name (auto-generated if not provided)
|
|
178
|
+
- `config`: Initial hyperparameters
|
|
179
|
+
- `api_key`: API key (or set `EXPT_LOGGER_API_KEY`)
|
|
180
|
+
- `base_url`: Custom server URL (or set `EXPT_LOGGER_BASE_URL`)
|
|
181
|
+
|
|
182
|
+
### `expt_logger.log()`
|
|
183
|
+
|
|
184
|
+
```python
|
|
185
|
+
log(
|
|
186
|
+
metrics: dict[str, float],
|
|
187
|
+
step: int | None = None,
|
|
188
|
+
mode: str | None = None,
|
|
189
|
+
commit: bool = True
|
|
190
|
+
)
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
- `metrics`: Dict of metric names to values
|
|
194
|
+
- `step`: Step number (auto-increments if not provided)
|
|
195
|
+
- `mode`: Default mode for keys without slashes (default: `"train"`)
|
|
196
|
+
- `commit`: If `False`, buffer metrics until next `commit=True`
|
|
197
|
+
|
|
198
|
+
### `expt_logger.log_rollout()`
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
log_rollout(
|
|
202
|
+
prompt: str,
|
|
203
|
+
messages: list[dict[str, str]],
|
|
204
|
+
rewards: dict[str, float],
|
|
205
|
+
step: int | None = None,
|
|
206
|
+
mode: str = "train"
|
|
207
|
+
)
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
- `prompt`: The prompt text
|
|
211
|
+
- `messages`: List of `{"role": ..., "content": ...}` dicts
|
|
212
|
+
- `rewards`: Dict of reward names to values
|
|
213
|
+
- `step`: Step number (uses current step if not provided)
|
|
214
|
+
- `mode`: `"train"` or `"eval"`
|
|
215
|
+
|
|
216
|
+
### `expt_logger.flush()` / `expt_logger.end()`
|
|
217
|
+
|
|
218
|
+
- `flush()`: Manually send buffered data to server
|
|
219
|
+
- `end()`: Finish the run (called automatically on exit)
|
|
220
|
+
|
|
221
|
+
## Advanced
|
|
222
|
+
|
|
223
|
+
### Context Manager
|
|
224
|
+
|
|
225
|
+
Ensures automatic cleanup:
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
with expt_logger.init(name="my-run") as run:
|
|
229
|
+
expt_logger.log({"loss": 0.5})
|
|
230
|
+
# end() called automatically
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
### Graceful Shutdown
|
|
234
|
+
|
|
235
|
+
The library handles cleanup on:
|
|
236
|
+
- Normal exit (`atexit`)
|
|
237
|
+
- Ctrl+C (`SIGINT`)
|
|
238
|
+
- `SIGTERM`
|
|
239
|
+
|
|
240
|
+
All buffered data is flushed before exit.
|
|
241
|
+
|
|
242
|
+
## Development
|
|
243
|
+
|
|
244
|
+
For local development, see [DEVELOPMENT.md](DEVELOPMENT.md).
|
|
245
|
+
|
|
246
|
+
Run the demo:
|
|
247
|
+
|
|
248
|
+
```bash
|
|
249
|
+
python demo.py # GRPO-style training simulation
|
|
250
|
+
python demo.py commit # Batching demo
|
|
251
|
+
python demo.py messages # Structured messages demo
|
|
252
|
+
```
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
expt_logger/__init__.py,sha256=co2y2fVem9QlwcsE1IlrjbuykW0APpyxPllQpu1rSDk,5360
|
|
2
|
+
expt_logger/client.py,sha256=SQIpVO0GWN3E51w5bHfvT7OhB5Vm1AdPB3sNZrNNt48,6771
|
|
3
|
+
expt_logger/config.py,sha256=mImXM-cdiksBzCSVMortUXChYXn5Zx_kWLaL-CYQJbA,177
|
|
4
|
+
expt_logger/run.py,sha256=OjtibIs9nCI-qtGfc3WYeRLwzf8zogDwNxqJhdGb8ls,10270
|
|
5
|
+
expt_logger/types.py,sha256=RrVi935U7Q5kBBbY1XcrBBT_Kl03gDhbiegSUxU0y7U,2061
|
|
6
|
+
expt_logger/utils.py,sha256=a9IsjzUSClNl2lxCp30xyi5eYC0K8LFcu3ohX7a7qyE,1473
|
|
7
|
+
expt_logger-0.1.0.dev0.dist-info/METADATA,sha256=QojqT89iufcQwAE2DJML388bUiqKYplJC-VY-6-Vshg,5656
|
|
8
|
+
expt_logger-0.1.0.dev0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
9
|
+
expt_logger-0.1.0.dev0.dist-info/RECORD,,
|