expt-logger 0.1.0.dev18__tar.gz → 0.1.0.dev20__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- expt_logger-0.1.0.dev18/README.md → expt_logger-0.1.0.dev20/PKG-INFO +40 -5
- expt_logger-0.1.0.dev18/PKG-INFO → expt_logger-0.1.0.dev20/README.md +31 -13
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/pyproject.toml +2 -1
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/__init__.py +21 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/client.py +41 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/run.py +44 -1
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/types.py +12 -1
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_client.py +12 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_global_api.py +23 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_integration_e2e.py +130 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_run.py +85 -0
- expt_logger-0.1.0.dev18/.claude/settings.local.json +0 -29
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/.gitignore +0 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/.pre-commit-config.yaml +0 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/DEVELOPMENT.md +0 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/buffer.py +0 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/config.py +0 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/env.py +0 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/exceptions.py +0 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/py.typed +0 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/validation.py +0 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/conftest.py +0 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_buffer.py +0 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_client_integration.py +0 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_config.py +0 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_env.py +0 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_exceptions.py +0 -0
- {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_validation.py +0 -0
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: expt-logger
|
|
3
|
+
Version: 0.1.0.dev20
|
|
4
|
+
Summary: Simple experiment logging library
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Requires-Dist: requests>=2.31.0
|
|
7
|
+
Requires-Dist: typing-extensions>=4.1.0
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
|
|
1
10
|
# expt_logger
|
|
2
11
|
|
|
3
12
|
Simple experiment tracking for RL training with a W&B-style API.
|
|
@@ -244,15 +253,23 @@ init(
|
|
|
244
253
|
config: dict[str, Any] | None = None,
|
|
245
254
|
api_key: str | None = None,
|
|
246
255
|
base_url: str | None = None,
|
|
247
|
-
is_main_process: bool = True
|
|
256
|
+
is_main_process: bool = True,
|
|
257
|
+
experiment_id: str | None = None
|
|
248
258
|
) -> Run
|
|
249
259
|
```
|
|
250
260
|
|
|
251
|
-
- `name`: Experiment name (auto-generated if not provided)
|
|
252
|
-
- `config`: Initial hyperparameters
|
|
261
|
+
- `name`: Experiment name (auto-generated if not provided, used only when creating new experiments)
|
|
262
|
+
- `config`: Initial hyperparameters (synced to server when provided)
|
|
253
263
|
- `api_key`: API key (or set `EXPT_LOGGER_API_KEY`)
|
|
254
264
|
- `base_url`: Custom server URL (or set `EXPT_LOGGER_BASE_URL`)
|
|
255
265
|
- `is_main_process`: If `False`, read experiment ID from temp file instead of creating a new experiment (for multi-process logging)
|
|
266
|
+
- `experiment_id`: Optional experiment ID to attach to an existing experiment (overrides all other resolution methods)
|
|
267
|
+
|
|
268
|
+
**Behavior:**
|
|
269
|
+
- If `experiment_id` is provided: attach to that specific experiment (overrides all)
|
|
270
|
+
- Else if `EXPT_LOGGER_EXPERIMENT_ID` env var exists: attach to that experiment
|
|
271
|
+
- Else if `is_main_process=True`: create a new experiment
|
|
272
|
+
- Else if `is_main_process=False`: read from temp file (multi-process)
|
|
256
273
|
|
|
257
274
|
**Note:** When creating a new experiment (main process), `init()` automatically sets `EXPT_LOGGER_EXPERIMENT_ID` and writes to a temp file so subprocesses can discover it.
|
|
258
275
|
|
|
@@ -280,7 +297,7 @@ log_rollout(
|
|
|
280
297
|
messages: list[dict[str, str]],
|
|
281
298
|
rewards: dict[str, float],
|
|
282
299
|
step: int | None = None,
|
|
283
|
-
mode: str =
|
|
300
|
+
mode: str | None = None,
|
|
284
301
|
commit: bool = True
|
|
285
302
|
)
|
|
286
303
|
```
|
|
@@ -289,7 +306,7 @@ log_rollout(
|
|
|
289
306
|
- `messages`: List of `{"role": ..., "content": ...}` dicts (both must be strings)
|
|
290
307
|
- `rewards`: Dict of reward names to numeric values (must be valid numbers, not NaN/Inf)
|
|
291
308
|
- `step`: Step number (must be non-negative integer if provided)
|
|
292
|
-
- `mode`:
|
|
309
|
+
- `mode`: Optional mode (defaults to `"train"` if not provided)
|
|
293
310
|
- `commit`: If `True` (default), commit immediately and increment step. If `False`, buffer metrics until commit.
|
|
294
311
|
|
|
295
312
|
**Input Validation:**
|
|
@@ -297,6 +314,24 @@ log_rollout(
|
|
|
297
314
|
- Invalid inputs raise `ValidationError` with descriptive error messages
|
|
298
315
|
- Metric and reward values must be numeric (int/float) and cannot be NaN or Infinity
|
|
299
316
|
|
|
317
|
+
### `expt_logger.log_error()`
|
|
318
|
+
|
|
319
|
+
```python
|
|
320
|
+
log_error(
|
|
321
|
+
error: Exception | str,
|
|
322
|
+
step: int | None = None,
|
|
323
|
+
mode: str | None = None,
|
|
324
|
+
include_traceback: bool = True,
|
|
325
|
+
commit: bool = True
|
|
326
|
+
)
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
- `error`: The error (Exception object or string message)
|
|
330
|
+
- `step`: Step number (overrides automatic step counter if provided)
|
|
331
|
+
- `mode`: Optional mode (e.g., "train", "eval")
|
|
332
|
+
- `include_traceback`: Whether to include the traceback (only for Exception objects, default: `True`)
|
|
333
|
+
- `commit`: If `True` (default), commit immediately and increment step. If `False`, buffer until commit.
|
|
334
|
+
|
|
300
335
|
### `expt_logger.commit()`
|
|
301
336
|
|
|
302
337
|
```python
|
|
@@ -1,11 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: expt-logger
|
|
3
|
-
Version: 0.1.0.dev18
|
|
4
|
-
Summary: Simple experiment logging library
|
|
5
|
-
Requires-Python: >=3.10
|
|
6
|
-
Requires-Dist: requests>=2.31.0
|
|
7
|
-
Description-Content-Type: text/markdown
|
|
8
|
-
|
|
9
1
|
# expt_logger
|
|
10
2
|
|
|
11
3
|
Simple experiment tracking for RL training with a W&B-style API.
|
|
@@ -252,15 +244,23 @@ init(
|
|
|
252
244
|
config: dict[str, Any] | None = None,
|
|
253
245
|
api_key: str | None = None,
|
|
254
246
|
base_url: str | None = None,
|
|
255
|
-
is_main_process: bool = True
|
|
247
|
+
is_main_process: bool = True,
|
|
248
|
+
experiment_id: str | None = None
|
|
256
249
|
) -> Run
|
|
257
250
|
```
|
|
258
251
|
|
|
259
|
-
- `name`: Experiment name (auto-generated if not provided)
|
|
260
|
-
- `config`: Initial hyperparameters
|
|
252
|
+
- `name`: Experiment name (auto-generated if not provided, used only when creating new experiments)
|
|
253
|
+
- `config`: Initial hyperparameters (synced to server when provided)
|
|
261
254
|
- `api_key`: API key (or set `EXPT_LOGGER_API_KEY`)
|
|
262
255
|
- `base_url`: Custom server URL (or set `EXPT_LOGGER_BASE_URL`)
|
|
263
256
|
- `is_main_process`: If `False`, read experiment ID from temp file instead of creating a new experiment (for multi-process logging)
|
|
257
|
+
- `experiment_id`: Optional experiment ID to attach to an existing experiment (overrides all other resolution methods)
|
|
258
|
+
|
|
259
|
+
**Behavior:**
|
|
260
|
+
- If `experiment_id` is provided: attach to that specific experiment (overrides all)
|
|
261
|
+
- Else if `EXPT_LOGGER_EXPERIMENT_ID` env var exists: attach to that experiment
|
|
262
|
+
- Else if `is_main_process=True`: create a new experiment
|
|
263
|
+
- Else if `is_main_process=False`: read from temp file (multi-process)
|
|
264
264
|
|
|
265
265
|
**Note:** When creating a new experiment (main process), `init()` automatically sets `EXPT_LOGGER_EXPERIMENT_ID` and writes to a temp file so subprocesses can discover it.
|
|
266
266
|
|
|
@@ -288,7 +288,7 @@ log_rollout(
|
|
|
288
288
|
messages: list[dict[str, str]],
|
|
289
289
|
rewards: dict[str, float],
|
|
290
290
|
step: int | None = None,
|
|
291
|
-
mode: str =
|
|
291
|
+
mode: str | None = None,
|
|
292
292
|
commit: bool = True
|
|
293
293
|
)
|
|
294
294
|
```
|
|
@@ -297,7 +297,7 @@ log_rollout(
|
|
|
297
297
|
- `messages`: List of `{"role": ..., "content": ...}` dicts (both must be strings)
|
|
298
298
|
- `rewards`: Dict of reward names to numeric values (must be valid numbers, not NaN/Inf)
|
|
299
299
|
- `step`: Step number (must be non-negative integer if provided)
|
|
300
|
-
- `mode`:
|
|
300
|
+
- `mode`: Optional mode (defaults to `"train"` if not provided)
|
|
301
301
|
- `commit`: If `True` (default), commit immediately and increment step. If `False`, buffer metrics until commit.
|
|
302
302
|
|
|
303
303
|
**Input Validation:**
|
|
@@ -305,6 +305,24 @@ log_rollout(
|
|
|
305
305
|
- Invalid inputs raise `ValidationError` with descriptive error messages
|
|
306
306
|
- Metric and reward values must be numeric (int/float) and cannot be NaN or Infinity
|
|
307
307
|
|
|
308
|
+
### `expt_logger.log_error()`
|
|
309
|
+
|
|
310
|
+
```python
|
|
311
|
+
log_error(
|
|
312
|
+
error: Exception | str,
|
|
313
|
+
step: int | None = None,
|
|
314
|
+
mode: str | None = None,
|
|
315
|
+
include_traceback: bool = True,
|
|
316
|
+
commit: bool = True
|
|
317
|
+
)
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
- `error`: The error (Exception object or string message)
|
|
321
|
+
- `step`: Step number (overrides automatic step counter if provided)
|
|
322
|
+
- `mode`: Optional mode (e.g., "train", "eval")
|
|
323
|
+
- `include_traceback`: Whether to include the traceback (only for Exception objects, default: `True`)
|
|
324
|
+
- `commit`: If `True` (default), commit immediately and increment step. If `False`, buffer until commit.
|
|
325
|
+
|
|
308
326
|
### `expt_logger.commit()`
|
|
309
327
|
|
|
310
328
|
```python
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "expt-logger"
|
|
3
|
-
version = "0.1.0.
|
|
3
|
+
version = "0.1.0.dev20"
|
|
4
4
|
description = "Simple experiment logging library"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.10"
|
|
7
7
|
dependencies = [
|
|
8
8
|
"requests>=2.31.0",
|
|
9
|
+
"typing_extensions>=4.1.0",
|
|
9
10
|
]
|
|
10
11
|
|
|
11
12
|
[dependency-groups]
|
|
@@ -112,6 +112,7 @@ def log_rollout(
|
|
|
112
112
|
step: int | None = None,
|
|
113
113
|
mode: str | None = None,
|
|
114
114
|
commit: bool = True,
|
|
115
|
+
id: str | None = None,
|
|
115
116
|
) -> None:
|
|
116
117
|
"""Log a rollout (conversation + rewards).
|
|
117
118
|
|
|
@@ -151,9 +152,28 @@ def log_rollout(
|
|
|
151
152
|
step=step,
|
|
152
153
|
mode=mode,
|
|
153
154
|
commit=commit,
|
|
155
|
+
id=id
|
|
154
156
|
)
|
|
155
157
|
|
|
156
158
|
|
|
159
|
+
def log_environment(
|
|
160
|
+
rollout_id: str,
|
|
161
|
+
content: str,
|
|
162
|
+
) -> None:
|
|
163
|
+
"""Log an environment log entry associated with a rollout.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
rollout_id: ID of the rollout this log entry is associated with
|
|
167
|
+
content: Log content string
|
|
168
|
+
|
|
169
|
+
Raises:
|
|
170
|
+
RuntimeError: If no active run exists
|
|
171
|
+
"""
|
|
172
|
+
if _active_run is None:
|
|
173
|
+
raise RuntimeError("No active run. Call init() first.")
|
|
174
|
+
_active_run.log_environment(rollout_id=rollout_id, content=content)
|
|
175
|
+
|
|
176
|
+
|
|
157
177
|
def log_error(
|
|
158
178
|
error: Exception | str,
|
|
159
179
|
step: int | None = None,
|
|
@@ -297,6 +317,7 @@ __all__ = [
|
|
|
297
317
|
"init",
|
|
298
318
|
"log",
|
|
299
319
|
"log_rollout",
|
|
320
|
+
"log_environment",
|
|
300
321
|
"log_error",
|
|
301
322
|
"commit",
|
|
302
323
|
"end",
|
|
@@ -280,6 +280,47 @@ class APIClient:
|
|
|
280
280
|
payload = {"rollouts": chunk}
|
|
281
281
|
self._request("POST", url, json=payload)
|
|
282
282
|
|
|
283
|
+
def log_env_logs(
|
|
284
|
+
self,
|
|
285
|
+
experiment_id: str,
|
|
286
|
+
rollout_id: str,
|
|
287
|
+
content: str,
|
|
288
|
+
) -> None:
|
|
289
|
+
"""Log an environment log entry for an experiment.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
experiment_id: Experiment ID
|
|
293
|
+
rollout_id: Rollout ID this log entry is associated with
|
|
294
|
+
content: Log content string
|
|
295
|
+
|
|
296
|
+
Raises:
|
|
297
|
+
APIError: If request fails
|
|
298
|
+
"""
|
|
299
|
+
url = f"{self.base_url}/api/experiments/{experiment_id}/env-logs"
|
|
300
|
+
payload = {"rolloutId": rollout_id, "content": content}
|
|
301
|
+
self._request("POST", url, json=payload)
|
|
302
|
+
|
|
303
|
+
def get_env_logs(
|
|
304
|
+
self,
|
|
305
|
+
experiment_id: str,
|
|
306
|
+
rollout_id: str,
|
|
307
|
+
) -> list[dict]:
|
|
308
|
+
"""Fetch environment logs for a specific rollout.
|
|
309
|
+
|
|
310
|
+
Args:
|
|
311
|
+
experiment_id: Experiment ID
|
|
312
|
+
rollout_id: Rollout ID to filter logs by
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
List of env log entries
|
|
316
|
+
|
|
317
|
+
Raises:
|
|
318
|
+
APIError: If request fails
|
|
319
|
+
"""
|
|
320
|
+
url = f"{self.base_url}/api/experiments/{experiment_id}/env-logs"
|
|
321
|
+
response = self._request("GET", url, params={"rolloutId": rollout_id})
|
|
322
|
+
return response.json()["logs"]
|
|
323
|
+
|
|
283
324
|
def log_errors(
|
|
284
325
|
self,
|
|
285
326
|
experiment_id: str,
|
|
@@ -23,6 +23,7 @@ from expt_logger.types import (
|
|
|
23
23
|
ConfigUpdateCommand,
|
|
24
24
|
ErrorItem,
|
|
25
25
|
LogCommand,
|
|
26
|
+
LogEnvironmentCommand,
|
|
26
27
|
LogErrorCommand,
|
|
27
28
|
LogRolloutCommand,
|
|
28
29
|
MessageItem,
|
|
@@ -228,6 +229,7 @@ class Run:
|
|
|
228
229
|
step: int | None = None,
|
|
229
230
|
mode: str | None = None,
|
|
230
231
|
commit: bool = True,
|
|
232
|
+
id: str | None = None,
|
|
231
233
|
) -> None:
|
|
232
234
|
"""Log a rollout (conversation + rewards).
|
|
233
235
|
|
|
@@ -238,6 +240,7 @@ class Run:
|
|
|
238
240
|
step: Optional step number (overrides worker's step counter if provided)
|
|
239
241
|
mode: Optional mode (defaults to "train")
|
|
240
242
|
commit: Whether to flush buffer after logging
|
|
243
|
+
id: Optional identifier for this rollout
|
|
241
244
|
"""
|
|
242
245
|
from expt_logger.validation import (
|
|
243
246
|
validate_messages,
|
|
@@ -273,6 +276,7 @@ class Run:
|
|
|
273
276
|
"rewards": reward_items,
|
|
274
277
|
"mode": mode or "train",
|
|
275
278
|
"step": step,
|
|
279
|
+
"id": id,
|
|
276
280
|
}
|
|
277
281
|
|
|
278
282
|
try:
|
|
@@ -283,6 +287,26 @@ class Run:
|
|
|
283
287
|
if commit:
|
|
284
288
|
self.commit()
|
|
285
289
|
|
|
290
|
+
def log_environment(
|
|
291
|
+
self,
|
|
292
|
+
rollout_id: str,
|
|
293
|
+
content: str,
|
|
294
|
+
) -> None:
|
|
295
|
+
"""Log an environment log entry associated with a rollout.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
rollout_id: ID of the rollout this log entry is associated with
|
|
299
|
+
content: Log content string
|
|
300
|
+
"""
|
|
301
|
+
env_cmd: LogEnvironmentCommand = {
|
|
302
|
+
"rollout_id": rollout_id,
|
|
303
|
+
"content": content,
|
|
304
|
+
}
|
|
305
|
+
try:
|
|
306
|
+
self._queue.put_nowait(("log_environment", env_cmd))
|
|
307
|
+
except Full:
|
|
308
|
+
logger.warning(f"Command queue full, dropping environment log for rollout: {rollout_id}")
|
|
309
|
+
|
|
286
310
|
def log_error(
|
|
287
311
|
self,
|
|
288
312
|
error: Exception | str,
|
|
@@ -312,7 +336,11 @@ class Run:
|
|
|
312
336
|
if isinstance(error, Exception):
|
|
313
337
|
message = str(error)
|
|
314
338
|
error_type = type(error).__name__
|
|
315
|
-
traceback_str =
|
|
339
|
+
traceback_str = (
|
|
340
|
+
"".join(tb.format_exception(type(error), error, error.__traceback__))
|
|
341
|
+
if include_traceback
|
|
342
|
+
else None
|
|
343
|
+
)
|
|
316
344
|
else:
|
|
317
345
|
message = error
|
|
318
346
|
error_type = "Error"
|
|
@@ -418,6 +446,8 @@ class Run:
|
|
|
418
446
|
self._handle_log(cast(LogCommand, data))
|
|
419
447
|
elif command == "log_rollout":
|
|
420
448
|
self._handle_log_rollout(cast(LogRolloutCommand, data))
|
|
449
|
+
elif command == "log_environment":
|
|
450
|
+
self._handle_log_environment(cast(LogEnvironmentCommand, data))
|
|
421
451
|
elif command == "log_error":
|
|
422
452
|
self._handle_log_error(cast(LogErrorCommand, data))
|
|
423
453
|
elif command == "commit":
|
|
@@ -528,9 +558,22 @@ class Run:
|
|
|
528
558
|
"messages": data["messages"],
|
|
529
559
|
"rewards": data["rewards"],
|
|
530
560
|
}
|
|
561
|
+
if data["id"] is not None:
|
|
562
|
+
rollout["id"] = data["id"]
|
|
531
563
|
|
|
532
564
|
self._buffer.add_rollout(rollout)
|
|
533
565
|
|
|
566
|
+
def _handle_log_environment(self, data: LogEnvironmentCommand) -> None:
|
|
567
|
+
"""Handle log_environment command.
|
|
568
|
+
|
|
569
|
+
Args:
|
|
570
|
+
data: Log environment command data with rollout_id and content
|
|
571
|
+
"""
|
|
572
|
+
try:
|
|
573
|
+
self._client.log_env_logs(self._experiment_id, data["rollout_id"], data["content"])
|
|
574
|
+
except Exception as e:
|
|
575
|
+
logger.error(f"Error logging environment log: {e}", exc_info=True)
|
|
576
|
+
|
|
534
577
|
def _handle_log_error(self, data: LogErrorCommand) -> None:
|
|
535
578
|
"""Handle log_error command.
|
|
536
579
|
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
"""Type definitions for expt-logger."""
|
|
2
2
|
|
|
3
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, TypedDict
|
|
4
|
+
|
|
5
|
+
from typing_extensions import NotRequired
|
|
4
6
|
|
|
5
7
|
|
|
6
8
|
class ScalarItem(TypedDict):
|
|
@@ -34,6 +36,7 @@ class RolloutItem(TypedDict):
|
|
|
34
36
|
promptText: str
|
|
35
37
|
messages: list[MessageItem]
|
|
36
38
|
rewards: list[RewardItem]
|
|
39
|
+
id: NotRequired[str]
|
|
37
40
|
|
|
38
41
|
|
|
39
42
|
class ErrorItem(TypedDict):
|
|
@@ -73,6 +76,7 @@ class LogRolloutCommand(TypedDict):
|
|
|
73
76
|
rewards: list[RewardItem]
|
|
74
77
|
mode: str
|
|
75
78
|
step: int | None
|
|
79
|
+
id: str | None
|
|
76
80
|
|
|
77
81
|
|
|
78
82
|
class LogErrorCommand(TypedDict):
|
|
@@ -85,6 +89,13 @@ class LogErrorCommand(TypedDict):
|
|
|
85
89
|
step: int | None
|
|
86
90
|
|
|
87
91
|
|
|
92
|
+
class LogEnvironmentCommand(TypedDict):
|
|
93
|
+
"""Command to log an environment log entry."""
|
|
94
|
+
|
|
95
|
+
rollout_id: str
|
|
96
|
+
content: str
|
|
97
|
+
|
|
98
|
+
|
|
88
99
|
class CommitCommand(TypedDict):
|
|
89
100
|
"""Command to commit (flush) the buffer."""
|
|
90
101
|
|
|
@@ -294,6 +294,18 @@ def test_log_rollouts(client):
|
|
|
294
294
|
assert call_args[1]["json"] == {"rollouts": rollouts}
|
|
295
295
|
|
|
296
296
|
|
|
297
|
+
def test_log_env_logs(client):
|
|
298
|
+
"""Test logging environment logs."""
|
|
299
|
+
with patch.object(client, "_request") as mock_request:
|
|
300
|
+
client.log_env_logs("exp-123", "rollout-456", "step 1 observation")
|
|
301
|
+
|
|
302
|
+
mock_request.assert_called_once()
|
|
303
|
+
call_args = mock_request.call_args
|
|
304
|
+
assert call_args[0][0] == "POST"
|
|
305
|
+
assert call_args[0][1] == "https://test.example.com/api/experiments/exp-123/env-logs"
|
|
306
|
+
assert call_args[1]["json"] == {"rolloutId": "rollout-456", "content": "step 1 observation"}
|
|
307
|
+
|
|
308
|
+
|
|
297
309
|
def test_log_errors(client):
|
|
298
310
|
"""Test logging errors."""
|
|
299
311
|
with patch.object(client, "_request") as mock_request:
|
|
@@ -134,6 +134,28 @@ def test_log_rollout_raises_if_no_active_run(mock_run):
|
|
|
134
134
|
assert "No active run" in str(exc_info.value)
|
|
135
135
|
|
|
136
136
|
|
|
137
|
+
def test_log_environment_delegates_to_run(mock_run):
|
|
138
|
+
"""Test log_environment() delegates to the active run."""
|
|
139
|
+
_, run_instance = mock_run
|
|
140
|
+
|
|
141
|
+
expt_logger.init()
|
|
142
|
+
expt_logger.log_environment(rollout_id="rollout-abc", content="step 1 observation")
|
|
143
|
+
|
|
144
|
+
run_instance.log_environment.assert_called_once_with(
|
|
145
|
+
rollout_id="rollout-abc",
|
|
146
|
+
content="step 1 observation",
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def test_log_environment_raises_if_no_active_run():
|
|
151
|
+
"""Test log_environment() raises RuntimeError if no active run."""
|
|
152
|
+
with pytest.raises(RuntimeError) as exc_info:
|
|
153
|
+
expt_logger.log_environment("rollout-abc", "some content")
|
|
154
|
+
|
|
155
|
+
assert "No active run" in str(exc_info.value)
|
|
156
|
+
assert "init()" in str(exc_info.value)
|
|
157
|
+
|
|
158
|
+
|
|
137
159
|
def test_log_error_delegates_to_run(mock_run):
|
|
138
160
|
"""Test log_error() delegates to the active run."""
|
|
139
161
|
_, run_instance = mock_run
|
|
@@ -393,6 +415,7 @@ def test_all_exports():
|
|
|
393
415
|
"init",
|
|
394
416
|
"log",
|
|
395
417
|
"log_rollout",
|
|
418
|
+
"log_environment",
|
|
396
419
|
"log_error",
|
|
397
420
|
"commit",
|
|
398
421
|
"end",
|
|
@@ -112,6 +112,22 @@ def fetch_rollouts(shared_api_key: str, base_url: str):
|
|
|
112
112
|
return _fetch
|
|
113
113
|
|
|
114
114
|
|
|
115
|
+
@pytest.fixture
|
|
116
|
+
def fetch_env_logs(shared_api_key: str, base_url: str):
|
|
117
|
+
"""Factory fixture for fetching environment log data."""
|
|
118
|
+
|
|
119
|
+
def _fetch(experiment_id: str, rollout_id: str):
|
|
120
|
+
response = requests.get(
|
|
121
|
+
f"{base_url}/api/experiments/{experiment_id}/env-logs",
|
|
122
|
+
params={"rolloutId": rollout_id},
|
|
123
|
+
headers={"Authorization": f"Bearer {shared_api_key}"},
|
|
124
|
+
)
|
|
125
|
+
assert response.status_code == 200
|
|
126
|
+
return response.json()["logs"]
|
|
127
|
+
|
|
128
|
+
return _fetch
|
|
129
|
+
|
|
130
|
+
|
|
115
131
|
# ============================================================================
|
|
116
132
|
# Test Class 1: Basic Workflow Tests
|
|
117
133
|
# ============================================================================
|
|
@@ -1054,3 +1070,117 @@ class TestMultiProcess:
|
|
|
1054
1070
|
# Worker metrics
|
|
1055
1071
|
for i in range(num_workers):
|
|
1056
1072
|
assert f"worker-{i + 1}-metric" in scalars
|
|
1073
|
+
|
|
1074
|
+
|
|
1075
|
+
# ============================================================================
|
|
1076
|
+
# Test Class 12: Environment Log Tests
|
|
1077
|
+
# ============================================================================
|
|
1078
|
+
|
|
1079
|
+
|
|
1080
|
+
@pytest.mark.integration
|
|
1081
|
+
class TestEnvironmentLogs:
|
|
1082
|
+
"""Environment log functionality."""
|
|
1083
|
+
|
|
1084
|
+
def test_log_environment_basic(
|
|
1085
|
+
self,
|
|
1086
|
+
shared_api_key: str,
|
|
1087
|
+
base_url: str,
|
|
1088
|
+
cleanup_experiments: list[str],
|
|
1089
|
+
fetch_env_logs,
|
|
1090
|
+
) -> None:
|
|
1091
|
+
"""Test basic environment log persists to server."""
|
|
1092
|
+
run = expt_logger.init(
|
|
1093
|
+
name="test-env-log-basic",
|
|
1094
|
+
api_key=shared_api_key,
|
|
1095
|
+
base_url=base_url,
|
|
1096
|
+
)
|
|
1097
|
+
cleanup_experiments.append(run._experiment_id)
|
|
1098
|
+
|
|
1099
|
+
expt_logger.log_environment("00000000-0000-0000-0000-000000000001", "observation: step=1, reward=0.9")
|
|
1100
|
+
time.sleep(0.5)
|
|
1101
|
+
|
|
1102
|
+
env_logs = fetch_env_logs(run._experiment_id, "00000000-0000-0000-0000-000000000001")
|
|
1103
|
+
assert len(env_logs) == 1
|
|
1104
|
+
assert env_logs[0]["content"] == "observation: step=1, reward=0.9"
|
|
1105
|
+
|
|
1106
|
+
expt_logger.end()
|
|
1107
|
+
|
|
1108
|
+
def test_log_environment_multiple_for_same_rollout(
|
|
1109
|
+
self,
|
|
1110
|
+
shared_api_key: str,
|
|
1111
|
+
base_url: str,
|
|
1112
|
+
cleanup_experiments: list[str],
|
|
1113
|
+
fetch_env_logs,
|
|
1114
|
+
) -> None:
|
|
1115
|
+
"""Test multiple environment logs for the same rollout are all stored."""
|
|
1116
|
+
run = expt_logger.init(
|
|
1117
|
+
name="test-env-log-multiple",
|
|
1118
|
+
api_key=shared_api_key,
|
|
1119
|
+
base_url=base_url,
|
|
1120
|
+
)
|
|
1121
|
+
cleanup_experiments.append(run._experiment_id)
|
|
1122
|
+
|
|
1123
|
+
expt_logger.log_environment("00000000-0000-0000-0000-000000000002", "step 1: action=left")
|
|
1124
|
+
expt_logger.log_environment("00000000-0000-0000-0000-000000000002", "step 2: action=right")
|
|
1125
|
+
expt_logger.log_environment("00000000-0000-0000-0000-000000000002", "step 3: action=jump")
|
|
1126
|
+
time.sleep(0.5)
|
|
1127
|
+
|
|
1128
|
+
env_logs = fetch_env_logs(run._experiment_id, "00000000-0000-0000-0000-000000000002")
|
|
1129
|
+
assert len(env_logs) == 3
|
|
1130
|
+
contents = {log["content"] for log in env_logs}
|
|
1131
|
+
assert contents == {"step 1: action=left", "step 2: action=right", "step 3: action=jump"}
|
|
1132
|
+
|
|
1133
|
+
expt_logger.end()
|
|
1134
|
+
|
|
1135
|
+
def test_log_environment_different_rollouts(
|
|
1136
|
+
self,
|
|
1137
|
+
shared_api_key: str,
|
|
1138
|
+
base_url: str,
|
|
1139
|
+
cleanup_experiments: list[str],
|
|
1140
|
+
fetch_env_logs,
|
|
1141
|
+
) -> None:
|
|
1142
|
+
"""Test environment logs for different rollouts stay separate."""
|
|
1143
|
+
run = expt_logger.init(
|
|
1144
|
+
name="test-env-log-separate",
|
|
1145
|
+
api_key=shared_api_key,
|
|
1146
|
+
base_url=base_url,
|
|
1147
|
+
)
|
|
1148
|
+
cleanup_experiments.append(run._experiment_id)
|
|
1149
|
+
|
|
1150
|
+
expt_logger.log_environment("00000000-0000-0000-0000-00000000003a", "log for rollout A")
|
|
1151
|
+
expt_logger.log_environment("00000000-0000-0000-0000-00000000003b", "log for rollout B")
|
|
1152
|
+
time.sleep(0.5)
|
|
1153
|
+
|
|
1154
|
+
logs_a = fetch_env_logs(run._experiment_id, "00000000-0000-0000-0000-00000000003a")
|
|
1155
|
+
logs_b = fetch_env_logs(run._experiment_id, "00000000-0000-0000-0000-00000000003b")
|
|
1156
|
+
assert len(logs_a) == 1
|
|
1157
|
+
assert logs_a[0]["content"] == "log for rollout A"
|
|
1158
|
+
assert len(logs_b) == 1
|
|
1159
|
+
assert logs_b[0]["content"] == "log for rollout B"
|
|
1160
|
+
|
|
1161
|
+
expt_logger.end()
|
|
1162
|
+
|
|
1163
|
+
def test_log_environment_multiline_content(
|
|
1164
|
+
self,
|
|
1165
|
+
shared_api_key: str,
|
|
1166
|
+
base_url: str,
|
|
1167
|
+
cleanup_experiments: list[str],
|
|
1168
|
+
fetch_env_logs,
|
|
1169
|
+
) -> None:
|
|
1170
|
+
"""Test environment log with multiline content."""
|
|
1171
|
+
run = expt_logger.init(
|
|
1172
|
+
name="test-env-log-multiline",
|
|
1173
|
+
api_key=shared_api_key,
|
|
1174
|
+
base_url=base_url,
|
|
1175
|
+
)
|
|
1176
|
+
cleanup_experiments.append(run._experiment_id)
|
|
1177
|
+
|
|
1178
|
+
content = "obs: {x: 1.0, y: 2.0}\nreward: 0.5\ndone: false\ninfo: {step: 10}"
|
|
1179
|
+
expt_logger.log_environment("00000000-0000-0000-0000-000000000004", content)
|
|
1180
|
+
time.sleep(0.5)
|
|
1181
|
+
|
|
1182
|
+
env_logs = fetch_env_logs(run._experiment_id, "00000000-0000-0000-0000-000000000004")
|
|
1183
|
+
assert len(env_logs) == 1
|
|
1184
|
+
assert env_logs[0]["content"] == content
|
|
1185
|
+
|
|
1186
|
+
expt_logger.end()
|
|
@@ -1480,3 +1480,88 @@ def test_log_error_with_invalid_mode_empty(mock_client):
|
|
|
1480
1480
|
|
|
1481
1481
|
assert "non-empty" in str(exc_info.value)
|
|
1482
1482
|
run.end()
|
|
1483
|
+
|
|
1484
|
+
|
|
1485
|
+
# ========== log_environment Tests ==========
|
|
1486
|
+
|
|
1487
|
+
|
|
1488
|
+
def test_log_environment_calls_client(mock_client):
|
|
1489
|
+
"""Test log_environment() calls client with correct args."""
|
|
1490
|
+
_, client_instance = mock_client
|
|
1491
|
+
|
|
1492
|
+
run = Run(name="test-run", api_key="test-key", base_url="https://test.example.com")
|
|
1493
|
+
|
|
1494
|
+
run.log_environment("rollout-abc", "step 1 observation")
|
|
1495
|
+
|
|
1496
|
+
# Give worker time to process
|
|
1497
|
+
time.sleep(0.1)
|
|
1498
|
+
|
|
1499
|
+
assert client_instance.log_env_logs.called
|
|
1500
|
+
call_args = client_instance.log_env_logs.call_args
|
|
1501
|
+
assert call_args[0][0] == "test-exp-id"
|
|
1502
|
+
assert call_args[0][1] == "rollout-abc"
|
|
1503
|
+
assert call_args[0][2] == "step 1 observation"
|
|
1504
|
+
|
|
1505
|
+
run.end()
|
|
1506
|
+
|
|
1507
|
+
|
|
1508
|
+
def test_log_environment_multiple_calls(mock_client):
|
|
1509
|
+
"""Test log_environment() can be called multiple times for different rollouts."""
|
|
1510
|
+
_, client_instance = mock_client
|
|
1511
|
+
|
|
1512
|
+
run = Run(name="test-run", api_key="test-key", base_url="https://test.example.com")
|
|
1513
|
+
|
|
1514
|
+
run.log_environment("rollout-1", "log content 1")
|
|
1515
|
+
run.log_environment("rollout-2", "log content 2")
|
|
1516
|
+
|
|
1517
|
+
# Give worker time to process
|
|
1518
|
+
time.sleep(0.1)
|
|
1519
|
+
|
|
1520
|
+
assert client_instance.log_env_logs.call_count == 2
|
|
1521
|
+
first_call = client_instance.log_env_logs.call_args_list[0]
|
|
1522
|
+
second_call = client_instance.log_env_logs.call_args_list[1]
|
|
1523
|
+
assert first_call[0][1] == "rollout-1"
|
|
1524
|
+
assert first_call[0][2] == "log content 1"
|
|
1525
|
+
assert second_call[0][1] == "rollout-2"
|
|
1526
|
+
assert second_call[0][2] == "log content 2"
|
|
1527
|
+
|
|
1528
|
+
run.end()
|
|
1529
|
+
|
|
1530
|
+
|
|
1531
|
+
def test_log_environment_queue_full_handling(mock_client):
|
|
1532
|
+
"""Test that queue full is handled gracefully for environment logs."""
|
|
1533
|
+
_, _ = mock_client
|
|
1534
|
+
|
|
1535
|
+
run = Run(name="test-run", api_key="test-key", base_url="https://test.example.com")
|
|
1536
|
+
run._queue = queue.Queue(maxsize=1)
|
|
1537
|
+
|
|
1538
|
+
with patch("expt_logger.run.logger") as mock_logger:
|
|
1539
|
+
run.log_environment("rollout-1", "log 1")
|
|
1540
|
+
run.log_environment("rollout-2", "log 2") # Should trigger queue full
|
|
1541
|
+
|
|
1542
|
+
assert mock_logger.warning.called
|
|
1543
|
+
warning_msg = mock_logger.warning.call_args[0][0]
|
|
1544
|
+
assert "queue full" in warning_msg.lower()
|
|
1545
|
+
|
|
1546
|
+
run.end()
|
|
1547
|
+
|
|
1548
|
+
|
|
1549
|
+
def test_log_environment_api_error_is_logged(mock_client):
|
|
1550
|
+
"""Test that API errors from log_environment are caught and logged, not raised."""
|
|
1551
|
+
_, client_instance = mock_client
|
|
1552
|
+
|
|
1553
|
+
client_instance.log_env_logs.side_effect = APIError("Server error")
|
|
1554
|
+
|
|
1555
|
+
run = Run(name="test-run", api_key="test-key", base_url="https://test.example.com")
|
|
1556
|
+
|
|
1557
|
+
with patch("expt_logger.run.logger") as mock_logger:
|
|
1558
|
+
run.log_environment("rollout-abc", "some content")
|
|
1559
|
+
|
|
1560
|
+
# Give worker time to process and handle error
|
|
1561
|
+
time.sleep(0.2)
|
|
1562
|
+
|
|
1563
|
+
assert run._worker_thread is not None
|
|
1564
|
+
assert run._worker_thread.is_alive()
|
|
1565
|
+
assert mock_logger.error.called
|
|
1566
|
+
|
|
1567
|
+
run.end()
|
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"permissions": {
|
|
3
|
-
"allow": [
|
|
4
|
-
"Bash(uv run mypy:*)",
|
|
5
|
-
"Bash(uv run pytest:*)",
|
|
6
|
-
"Bash(python -m pytest:*)",
|
|
7
|
-
"Bash(python example_config_validation.py:*)",
|
|
8
|
-
"Bash(uv run python:*)",
|
|
9
|
-
"Bash(ls:*)",
|
|
10
|
-
"Bash(curl:*)",
|
|
11
|
-
"Bash(cat:*)",
|
|
12
|
-
"Bash(EXPT_LOGGER_LOG_LEVEL=DEBUG uv run pytest:*)",
|
|
13
|
-
"Bash(EXPT_LOGGER_API_KEY=test EXPT_LOGGER_BASE_URL=http://localhost:3000 uv run python:*)",
|
|
14
|
-
"Bash(EXPT_LOGGER_API_KEY=test uv run python:*)",
|
|
15
|
-
"Bash(uv sync:*)",
|
|
16
|
-
"Bash(python -m json.tool:*)",
|
|
17
|
-
"Bash(uv run ruff:*)",
|
|
18
|
-
"Bash(find:*)",
|
|
19
|
-
"Bash(uv add:*)",
|
|
20
|
-
"Bash(python:*)",
|
|
21
|
-
"Bash(EXPT_LOGGER_LOG_LEVEL=DEBUG uv run python:*)",
|
|
22
|
-
"Bash(uv run:*)",
|
|
23
|
-
"Bash(git restore:*)",
|
|
24
|
-
"Bash(git checkout:*)",
|
|
25
|
-
"Bash(git rebase:*)",
|
|
26
|
-
"Bash(git add:*)"
|
|
27
|
-
]
|
|
28
|
-
}
|
|
29
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|