expt-logger 0.1.0.dev18__tar.gz → 0.1.0.dev20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. expt_logger-0.1.0.dev18/README.md → expt_logger-0.1.0.dev20/PKG-INFO +40 -5
  2. expt_logger-0.1.0.dev18/PKG-INFO → expt_logger-0.1.0.dev20/README.md +31 -13
  3. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/pyproject.toml +2 -1
  4. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/__init__.py +21 -0
  5. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/client.py +41 -0
  6. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/run.py +44 -1
  7. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/types.py +12 -1
  8. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_client.py +12 -0
  9. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_global_api.py +23 -0
  10. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_integration_e2e.py +130 -0
  11. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_run.py +85 -0
  12. expt_logger-0.1.0.dev18/.claude/settings.local.json +0 -29
  13. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/.gitignore +0 -0
  14. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/.pre-commit-config.yaml +0 -0
  15. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/DEVELOPMENT.md +0 -0
  16. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/buffer.py +0 -0
  17. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/config.py +0 -0
  18. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/env.py +0 -0
  19. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/exceptions.py +0 -0
  20. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/py.typed +0 -0
  21. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/src/expt_logger/validation.py +0 -0
  22. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/conftest.py +0 -0
  23. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_buffer.py +0 -0
  24. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_client_integration.py +0 -0
  25. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_config.py +0 -0
  26. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_env.py +0 -0
  27. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_exceptions.py +0 -0
  28. {expt_logger-0.1.0.dev18 → expt_logger-0.1.0.dev20}/tests/test_validation.py +0 -0
@@ -1,3 +1,12 @@
1
+ Metadata-Version: 2.4
2
+ Name: expt-logger
3
+ Version: 0.1.0.dev20
4
+ Summary: Simple experiment logging library
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: requests>=2.31.0
7
+ Requires-Dist: typing-extensions>=4.1.0
8
+ Description-Content-Type: text/markdown
9
+
1
10
  # expt_logger
2
11
 
3
12
  Simple experiment tracking for RL training with a W&B-style API.
@@ -244,15 +253,23 @@ init(
244
253
  config: dict[str, Any] | None = None,
245
254
  api_key: str | None = None,
246
255
  base_url: str | None = None,
247
- is_main_process: bool = True
256
+ is_main_process: bool = True,
257
+ experiment_id: str | None = None
248
258
  ) -> Run
249
259
  ```
250
260
 
251
- - `name`: Experiment name (auto-generated if not provided)
252
- - `config`: Initial hyperparameters
261
+ - `name`: Experiment name (auto-generated if not provided, used only when creating new experiments)
262
+ - `config`: Initial hyperparameters (synced to server when provided)
253
263
  - `api_key`: API key (or set `EXPT_LOGGER_API_KEY`)
254
264
  - `base_url`: Custom server URL (or set `EXPT_LOGGER_BASE_URL`)
255
265
  - `is_main_process`: If `False`, read experiment ID from temp file instead of creating a new experiment (for multi-process logging)
266
+ - `experiment_id`: Optional experiment ID to attach to an existing experiment (overrides all other resolution methods)
267
+
268
+ **Behavior:**
269
+ - If `experiment_id` is provided: attach to that specific experiment (overrides all)
270
+ - Else if `EXPT_LOGGER_EXPERIMENT_ID` env var exists: attach to that experiment
271
+ - Else if `is_main_process=True`: create a new experiment
272
+ - Else if `is_main_process=False`: read from temp file (multi-process)
256
273
 
257
274
  **Note:** When creating a new experiment (main process), `init()` automatically sets `EXPT_LOGGER_EXPERIMENT_ID` and writes to a temp file so subprocesses can discover it.
258
275
 
@@ -280,7 +297,7 @@ log_rollout(
280
297
  messages: list[dict[str, str]],
281
298
  rewards: dict[str, float],
282
299
  step: int | None = None,
283
- mode: str = "train",
300
+ mode: str | None = None,
284
301
  commit: bool = True
285
302
  )
286
303
  ```
@@ -289,7 +306,7 @@ log_rollout(
289
306
  - `messages`: List of `{"role": ..., "content": ...}` dicts (both must be strings)
290
307
  - `rewards`: Dict of reward names to numeric values (must be valid numbers, not NaN/Inf)
291
308
  - `step`: Step number (must be non-negative integer if provided)
292
- - `mode`: `"train"` or `"eval"` (must be non-empty string)
309
+ - `mode`: Optional mode (defaults to `"train"` if not provided)
293
310
  - `commit`: If `True` (default), commit immediately and increment step. If `False`, buffer metrics until commit.
294
311
 
295
312
  **Input Validation:**
@@ -297,6 +314,24 @@ log_rollout(
297
314
  - Invalid inputs raise `ValidationError` with descriptive error messages
298
315
  - Metric and reward values must be numeric (int/float) and cannot be NaN or Infinity
299
316
 
317
+ ### `expt_logger.log_error()`
318
+
319
+ ```python
320
+ log_error(
321
+ error: Exception | str,
322
+ step: int | None = None,
323
+ mode: str | None = None,
324
+ include_traceback: bool = True,
325
+ commit: bool = True
326
+ )
327
+ ```
328
+
329
+ - `error`: The error (Exception object or string message)
330
+ - `step`: Step number (overrides automatic step counter if provided)
331
+ - `mode`: Optional mode (e.g., "train", "eval")
332
+ - `include_traceback`: Whether to include the traceback (only for Exception objects, default: `True`)
333
+ - `commit`: If `True` (default), commit immediately and increment step. If `False`, buffer until commit.
334
+
300
335
  ### `expt_logger.commit()`
301
336
 
302
337
  ```python
@@ -1,11 +1,3 @@
1
- Metadata-Version: 2.4
2
- Name: expt-logger
3
- Version: 0.1.0.dev18
4
- Summary: Simple experiment logging library
5
- Requires-Python: >=3.10
6
- Requires-Dist: requests>=2.31.0
7
- Description-Content-Type: text/markdown
8
-
9
1
  # expt_logger
10
2
 
11
3
  Simple experiment tracking for RL training with a W&B-style API.
@@ -252,15 +244,23 @@ init(
252
244
  config: dict[str, Any] | None = None,
253
245
  api_key: str | None = None,
254
246
  base_url: str | None = None,
255
- is_main_process: bool = True
247
+ is_main_process: bool = True,
248
+ experiment_id: str | None = None
256
249
  ) -> Run
257
250
  ```
258
251
 
259
- - `name`: Experiment name (auto-generated if not provided)
260
- - `config`: Initial hyperparameters
252
+ - `name`: Experiment name (auto-generated if not provided, used only when creating new experiments)
253
+ - `config`: Initial hyperparameters (synced to server when provided)
261
254
  - `api_key`: API key (or set `EXPT_LOGGER_API_KEY`)
262
255
  - `base_url`: Custom server URL (or set `EXPT_LOGGER_BASE_URL`)
263
256
  - `is_main_process`: If `False`, read experiment ID from temp file instead of creating a new experiment (for multi-process logging)
257
+ - `experiment_id`: Optional experiment ID to attach to an existing experiment (overrides all other resolution methods)
258
+
259
+ **Behavior:**
260
+ - If `experiment_id` is provided: attach to that specific experiment (overrides all)
261
+ - Else if `EXPT_LOGGER_EXPERIMENT_ID` env var exists: attach to that experiment
262
+ - Else if `is_main_process=True`: create a new experiment
263
+ - Else if `is_main_process=False`: read from temp file (multi-process)
264
264
 
265
265
  **Note:** When creating a new experiment (main process), `init()` automatically sets `EXPT_LOGGER_EXPERIMENT_ID` and writes to a temp file so subprocesses can discover it.
266
266
 
@@ -288,7 +288,7 @@ log_rollout(
288
288
  messages: list[dict[str, str]],
289
289
  rewards: dict[str, float],
290
290
  step: int | None = None,
291
- mode: str = "train",
291
+ mode: str | None = None,
292
292
  commit: bool = True
293
293
  )
294
294
  ```
@@ -297,7 +297,7 @@ log_rollout(
297
297
  - `messages`: List of `{"role": ..., "content": ...}` dicts (both must be strings)
298
298
  - `rewards`: Dict of reward names to numeric values (must be valid numbers, not NaN/Inf)
299
299
  - `step`: Step number (must be non-negative integer if provided)
300
- - `mode`: `"train"` or `"eval"` (must be non-empty string)
300
+ - `mode`: Optional mode (defaults to `"train"` if not provided)
301
301
  - `commit`: If `True` (default), commit immediately and increment step. If `False`, buffer metrics until commit.
302
302
 
303
303
  **Input Validation:**
@@ -305,6 +305,24 @@ log_rollout(
305
305
  - Invalid inputs raise `ValidationError` with descriptive error messages
306
306
  - Metric and reward values must be numeric (int/float) and cannot be NaN or Infinity
307
307
 
308
+ ### `expt_logger.log_error()`
309
+
310
+ ```python
311
+ log_error(
312
+ error: Exception | str,
313
+ step: int | None = None,
314
+ mode: str | None = None,
315
+ include_traceback: bool = True,
316
+ commit: bool = True
317
+ )
318
+ ```
319
+
320
+ - `error`: The error (Exception object or string message)
321
+ - `step`: Step number (overrides automatic step counter if provided)
322
+ - `mode`: Optional mode (e.g., "train", "eval")
323
+ - `include_traceback`: Whether to include the traceback (only for Exception objects, default: `True`)
324
+ - `commit`: If `True` (default), commit immediately and increment step. If `False`, buffer until commit.
325
+
308
326
  ### `expt_logger.commit()`
309
327
 
310
328
  ```python
@@ -1,11 +1,12 @@
1
1
  [project]
2
2
  name = "expt-logger"
3
- version = "0.1.0.dev18"
3
+ version = "0.1.0.dev20"
4
4
  description = "Simple experiment logging library"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
7
7
  dependencies = [
8
8
  "requests>=2.31.0",
9
+ "typing_extensions>=4.1.0",
9
10
  ]
10
11
 
11
12
  [dependency-groups]
@@ -112,6 +112,7 @@ def log_rollout(
112
112
  step: int | None = None,
113
113
  mode: str | None = None,
114
114
  commit: bool = True,
115
+ id: str | None = None,
115
116
  ) -> None:
116
117
  """Log a rollout (conversation + rewards).
117
118
 
@@ -151,9 +152,28 @@ def log_rollout(
151
152
  step=step,
152
153
  mode=mode,
153
154
  commit=commit,
155
+ id=id
154
156
  )
155
157
 
156
158
 
159
+ def log_environment(
160
+ rollout_id: str,
161
+ content: str,
162
+ ) -> None:
163
+ """Log an environment log entry associated with a rollout.
164
+
165
+ Args:
166
+ rollout_id: ID of the rollout this log entry is associated with
167
+ content: Log content string
168
+
169
+ Raises:
170
+ RuntimeError: If no active run exists
171
+ """
172
+ if _active_run is None:
173
+ raise RuntimeError("No active run. Call init() first.")
174
+ _active_run.log_environment(rollout_id=rollout_id, content=content)
175
+
176
+
157
177
  def log_error(
158
178
  error: Exception | str,
159
179
  step: int | None = None,
@@ -297,6 +317,7 @@ __all__ = [
297
317
  "init",
298
318
  "log",
299
319
  "log_rollout",
320
+ "log_environment",
300
321
  "log_error",
301
322
  "commit",
302
323
  "end",
@@ -280,6 +280,47 @@ class APIClient:
280
280
  payload = {"rollouts": chunk}
281
281
  self._request("POST", url, json=payload)
282
282
 
283
+ def log_env_logs(
284
+ self,
285
+ experiment_id: str,
286
+ rollout_id: str,
287
+ content: str,
288
+ ) -> None:
289
+ """Log an environment log entry for an experiment.
290
+
291
+ Args:
292
+ experiment_id: Experiment ID
293
+ rollout_id: Rollout ID this log entry is associated with
294
+ content: Log content string
295
+
296
+ Raises:
297
+ APIError: If request fails
298
+ """
299
+ url = f"{self.base_url}/api/experiments/{experiment_id}/env-logs"
300
+ payload = {"rolloutId": rollout_id, "content": content}
301
+ self._request("POST", url, json=payload)
302
+
303
+ def get_env_logs(
304
+ self,
305
+ experiment_id: str,
306
+ rollout_id: str,
307
+ ) -> list[dict]:
308
+ """Fetch environment logs for a specific rollout.
309
+
310
+ Args:
311
+ experiment_id: Experiment ID
312
+ rollout_id: Rollout ID to filter logs by
313
+
314
+ Returns:
315
+ List of env log entries
316
+
317
+ Raises:
318
+ APIError: If request fails
319
+ """
320
+ url = f"{self.base_url}/api/experiments/{experiment_id}/env-logs"
321
+ response = self._request("GET", url, params={"rolloutId": rollout_id})
322
+ return response.json()["logs"]
323
+
283
324
  def log_errors(
284
325
  self,
285
326
  experiment_id: str,
@@ -23,6 +23,7 @@ from expt_logger.types import (
23
23
  ConfigUpdateCommand,
24
24
  ErrorItem,
25
25
  LogCommand,
26
+ LogEnvironmentCommand,
26
27
  LogErrorCommand,
27
28
  LogRolloutCommand,
28
29
  MessageItem,
@@ -228,6 +229,7 @@ class Run:
228
229
  step: int | None = None,
229
230
  mode: str | None = None,
230
231
  commit: bool = True,
232
+ id: str | None = None,
231
233
  ) -> None:
232
234
  """Log a rollout (conversation + rewards).
233
235
 
@@ -238,6 +240,7 @@ class Run:
238
240
  step: Optional step number (overrides worker's step counter if provided)
239
241
  mode: Optional mode (defaults to "train")
240
242
  commit: Whether to flush buffer after logging
243
+ id: Optional identifier for this rollout
241
244
  """
242
245
  from expt_logger.validation import (
243
246
  validate_messages,
@@ -273,6 +276,7 @@ class Run:
273
276
  "rewards": reward_items,
274
277
  "mode": mode or "train",
275
278
  "step": step,
279
+ "id": id,
276
280
  }
277
281
 
278
282
  try:
@@ -283,6 +287,26 @@ class Run:
283
287
  if commit:
284
288
  self.commit()
285
289
 
290
+ def log_environment(
291
+ self,
292
+ rollout_id: str,
293
+ content: str,
294
+ ) -> None:
295
+ """Log an environment log entry associated with a rollout.
296
+
297
+ Args:
298
+ rollout_id: ID of the rollout this log entry is associated with
299
+ content: Log content string
300
+ """
301
+ env_cmd: LogEnvironmentCommand = {
302
+ "rollout_id": rollout_id,
303
+ "content": content,
304
+ }
305
+ try:
306
+ self._queue.put_nowait(("log_environment", env_cmd))
307
+ except Full:
308
+ logger.warning(f"Command queue full, dropping environment log for rollout: {rollout_id}")
309
+
286
310
  def log_error(
287
311
  self,
288
312
  error: Exception | str,
@@ -312,7 +336,11 @@ class Run:
312
336
  if isinstance(error, Exception):
313
337
  message = str(error)
314
338
  error_type = type(error).__name__
315
- traceback_str = "".join(tb.format_exception(type(error), error, error.__traceback__)) if include_traceback else None
339
+ traceback_str = (
340
+ "".join(tb.format_exception(type(error), error, error.__traceback__))
341
+ if include_traceback
342
+ else None
343
+ )
316
344
  else:
317
345
  message = error
318
346
  error_type = "Error"
@@ -418,6 +446,8 @@ class Run:
418
446
  self._handle_log(cast(LogCommand, data))
419
447
  elif command == "log_rollout":
420
448
  self._handle_log_rollout(cast(LogRolloutCommand, data))
449
+ elif command == "log_environment":
450
+ self._handle_log_environment(cast(LogEnvironmentCommand, data))
421
451
  elif command == "log_error":
422
452
  self._handle_log_error(cast(LogErrorCommand, data))
423
453
  elif command == "commit":
@@ -528,9 +558,22 @@ class Run:
528
558
  "messages": data["messages"],
529
559
  "rewards": data["rewards"],
530
560
  }
561
+ if data["id"] is not None:
562
+ rollout["id"] = data["id"]
531
563
 
532
564
  self._buffer.add_rollout(rollout)
533
565
 
566
+ def _handle_log_environment(self, data: LogEnvironmentCommand) -> None:
567
+ """Handle log_environment command.
568
+
569
+ Args:
570
+ data: Log environment command data with rollout_id and content
571
+ """
572
+ try:
573
+ self._client.log_env_logs(self._experiment_id, data["rollout_id"], data["content"])
574
+ except Exception as e:
575
+ logger.error(f"Error logging environment log: {e}", exc_info=True)
576
+
534
577
  def _handle_log_error(self, data: LogErrorCommand) -> None:
535
578
  """Handle log_error command.
536
579
 
@@ -1,6 +1,8 @@
1
1
  """Type definitions for expt-logger."""
2
2
 
3
- from typing import Any, NotRequired, TypedDict
3
+ from typing import Any, TypedDict
4
+
5
+ from typing_extensions import NotRequired
4
6
 
5
7
 
6
8
  class ScalarItem(TypedDict):
@@ -34,6 +36,7 @@ class RolloutItem(TypedDict):
34
36
  promptText: str
35
37
  messages: list[MessageItem]
36
38
  rewards: list[RewardItem]
39
+ id: NotRequired[str]
37
40
 
38
41
 
39
42
  class ErrorItem(TypedDict):
@@ -73,6 +76,7 @@ class LogRolloutCommand(TypedDict):
73
76
  rewards: list[RewardItem]
74
77
  mode: str
75
78
  step: int | None
79
+ id: str | None
76
80
 
77
81
 
78
82
  class LogErrorCommand(TypedDict):
@@ -85,6 +89,13 @@ class LogErrorCommand(TypedDict):
85
89
  step: int | None
86
90
 
87
91
 
92
+ class LogEnvironmentCommand(TypedDict):
93
+ """Command to log an environment log entry."""
94
+
95
+ rollout_id: str
96
+ content: str
97
+
98
+
88
99
  class CommitCommand(TypedDict):
89
100
  """Command to commit (flush) the buffer."""
90
101
 
@@ -294,6 +294,18 @@ def test_log_rollouts(client):
294
294
  assert call_args[1]["json"] == {"rollouts": rollouts}
295
295
 
296
296
 
297
+ def test_log_env_logs(client):
298
+ """Test logging environment logs."""
299
+ with patch.object(client, "_request") as mock_request:
300
+ client.log_env_logs("exp-123", "rollout-456", "step 1 observation")
301
+
302
+ mock_request.assert_called_once()
303
+ call_args = mock_request.call_args
304
+ assert call_args[0][0] == "POST"
305
+ assert call_args[0][1] == "https://test.example.com/api/experiments/exp-123/env-logs"
306
+ assert call_args[1]["json"] == {"rolloutId": "rollout-456", "content": "step 1 observation"}
307
+
308
+
297
309
  def test_log_errors(client):
298
310
  """Test logging errors."""
299
311
  with patch.object(client, "_request") as mock_request:
@@ -134,6 +134,28 @@ def test_log_rollout_raises_if_no_active_run(mock_run):
134
134
  assert "No active run" in str(exc_info.value)
135
135
 
136
136
 
137
+ def test_log_environment_delegates_to_run(mock_run):
138
+ """Test log_environment() delegates to the active run."""
139
+ _, run_instance = mock_run
140
+
141
+ expt_logger.init()
142
+ expt_logger.log_environment(rollout_id="rollout-abc", content="step 1 observation")
143
+
144
+ run_instance.log_environment.assert_called_once_with(
145
+ rollout_id="rollout-abc",
146
+ content="step 1 observation",
147
+ )
148
+
149
+
150
+ def test_log_environment_raises_if_no_active_run():
151
+ """Test log_environment() raises RuntimeError if no active run."""
152
+ with pytest.raises(RuntimeError) as exc_info:
153
+ expt_logger.log_environment("rollout-abc", "some content")
154
+
155
+ assert "No active run" in str(exc_info.value)
156
+ assert "init()" in str(exc_info.value)
157
+
158
+
137
159
  def test_log_error_delegates_to_run(mock_run):
138
160
  """Test log_error() delegates to the active run."""
139
161
  _, run_instance = mock_run
@@ -393,6 +415,7 @@ def test_all_exports():
393
415
  "init",
394
416
  "log",
395
417
  "log_rollout",
418
+ "log_environment",
396
419
  "log_error",
397
420
  "commit",
398
421
  "end",
@@ -112,6 +112,22 @@ def fetch_rollouts(shared_api_key: str, base_url: str):
112
112
  return _fetch
113
113
 
114
114
 
115
+ @pytest.fixture
116
+ def fetch_env_logs(shared_api_key: str, base_url: str):
117
+ """Factory fixture for fetching environment log data."""
118
+
119
+ def _fetch(experiment_id: str, rollout_id: str):
120
+ response = requests.get(
121
+ f"{base_url}/api/experiments/{experiment_id}/env-logs",
122
+ params={"rolloutId": rollout_id},
123
+ headers={"Authorization": f"Bearer {shared_api_key}"},
124
+ )
125
+ assert response.status_code == 200
126
+ return response.json()["logs"]
127
+
128
+ return _fetch
129
+
130
+
115
131
  # ============================================================================
116
132
  # Test Class 1: Basic Workflow Tests
117
133
  # ============================================================================
@@ -1054,3 +1070,117 @@ class TestMultiProcess:
1054
1070
  # Worker metrics
1055
1071
  for i in range(num_workers):
1056
1072
  assert f"worker-{i + 1}-metric" in scalars
1073
+
1074
+
1075
+ # ============================================================================
1076
+ # Test Class 12: Environment Log Tests
1077
+ # ============================================================================
1078
+
1079
+
1080
+ @pytest.mark.integration
1081
+ class TestEnvironmentLogs:
1082
+ """Environment log functionality."""
1083
+
1084
+ def test_log_environment_basic(
1085
+ self,
1086
+ shared_api_key: str,
1087
+ base_url: str,
1088
+ cleanup_experiments: list[str],
1089
+ fetch_env_logs,
1090
+ ) -> None:
1091
+ """Test basic environment log persists to server."""
1092
+ run = expt_logger.init(
1093
+ name="test-env-log-basic",
1094
+ api_key=shared_api_key,
1095
+ base_url=base_url,
1096
+ )
1097
+ cleanup_experiments.append(run._experiment_id)
1098
+
1099
+ expt_logger.log_environment("00000000-0000-0000-0000-000000000001", "observation: step=1, reward=0.9")
1100
+ time.sleep(0.5)
1101
+
1102
+ env_logs = fetch_env_logs(run._experiment_id, "00000000-0000-0000-0000-000000000001")
1103
+ assert len(env_logs) == 1
1104
+ assert env_logs[0]["content"] == "observation: step=1, reward=0.9"
1105
+
1106
+ expt_logger.end()
1107
+
1108
+ def test_log_environment_multiple_for_same_rollout(
1109
+ self,
1110
+ shared_api_key: str,
1111
+ base_url: str,
1112
+ cleanup_experiments: list[str],
1113
+ fetch_env_logs,
1114
+ ) -> None:
1115
+ """Test multiple environment logs for the same rollout are all stored."""
1116
+ run = expt_logger.init(
1117
+ name="test-env-log-multiple",
1118
+ api_key=shared_api_key,
1119
+ base_url=base_url,
1120
+ )
1121
+ cleanup_experiments.append(run._experiment_id)
1122
+
1123
+ expt_logger.log_environment("00000000-0000-0000-0000-000000000002", "step 1: action=left")
1124
+ expt_logger.log_environment("00000000-0000-0000-0000-000000000002", "step 2: action=right")
1125
+ expt_logger.log_environment("00000000-0000-0000-0000-000000000002", "step 3: action=jump")
1126
+ time.sleep(0.5)
1127
+
1128
+ env_logs = fetch_env_logs(run._experiment_id, "00000000-0000-0000-0000-000000000002")
1129
+ assert len(env_logs) == 3
1130
+ contents = {log["content"] for log in env_logs}
1131
+ assert contents == {"step 1: action=left", "step 2: action=right", "step 3: action=jump"}
1132
+
1133
+ expt_logger.end()
1134
+
1135
+ def test_log_environment_different_rollouts(
1136
+ self,
1137
+ shared_api_key: str,
1138
+ base_url: str,
1139
+ cleanup_experiments: list[str],
1140
+ fetch_env_logs,
1141
+ ) -> None:
1142
+ """Test environment logs for different rollouts stay separate."""
1143
+ run = expt_logger.init(
1144
+ name="test-env-log-separate",
1145
+ api_key=shared_api_key,
1146
+ base_url=base_url,
1147
+ )
1148
+ cleanup_experiments.append(run._experiment_id)
1149
+
1150
+ expt_logger.log_environment("00000000-0000-0000-0000-00000000003a", "log for rollout A")
1151
+ expt_logger.log_environment("00000000-0000-0000-0000-00000000003b", "log for rollout B")
1152
+ time.sleep(0.5)
1153
+
1154
+ logs_a = fetch_env_logs(run._experiment_id, "00000000-0000-0000-0000-00000000003a")
1155
+ logs_b = fetch_env_logs(run._experiment_id, "00000000-0000-0000-0000-00000000003b")
1156
+ assert len(logs_a) == 1
1157
+ assert logs_a[0]["content"] == "log for rollout A"
1158
+ assert len(logs_b) == 1
1159
+ assert logs_b[0]["content"] == "log for rollout B"
1160
+
1161
+ expt_logger.end()
1162
+
1163
+ def test_log_environment_multiline_content(
1164
+ self,
1165
+ shared_api_key: str,
1166
+ base_url: str,
1167
+ cleanup_experiments: list[str],
1168
+ fetch_env_logs,
1169
+ ) -> None:
1170
+ """Test environment log with multiline content."""
1171
+ run = expt_logger.init(
1172
+ name="test-env-log-multiline",
1173
+ api_key=shared_api_key,
1174
+ base_url=base_url,
1175
+ )
1176
+ cleanup_experiments.append(run._experiment_id)
1177
+
1178
+ content = "obs: {x: 1.0, y: 2.0}\nreward: 0.5\ndone: false\ninfo: {step: 10}"
1179
+ expt_logger.log_environment("00000000-0000-0000-0000-000000000004", content)
1180
+ time.sleep(0.5)
1181
+
1182
+ env_logs = fetch_env_logs(run._experiment_id, "00000000-0000-0000-0000-000000000004")
1183
+ assert len(env_logs) == 1
1184
+ assert env_logs[0]["content"] == content
1185
+
1186
+ expt_logger.end()
@@ -1480,3 +1480,88 @@ def test_log_error_with_invalid_mode_empty(mock_client):
1480
1480
 
1481
1481
  assert "non-empty" in str(exc_info.value)
1482
1482
  run.end()
1483
+
1484
+
1485
+ # ========== log_environment Tests ==========
1486
+
1487
+
1488
+ def test_log_environment_calls_client(mock_client):
1489
+ """Test log_environment() calls client with correct args."""
1490
+ _, client_instance = mock_client
1491
+
1492
+ run = Run(name="test-run", api_key="test-key", base_url="https://test.example.com")
1493
+
1494
+ run.log_environment("rollout-abc", "step 1 observation")
1495
+
1496
+ # Give worker time to process
1497
+ time.sleep(0.1)
1498
+
1499
+ assert client_instance.log_env_logs.called
1500
+ call_args = client_instance.log_env_logs.call_args
1501
+ assert call_args[0][0] == "test-exp-id"
1502
+ assert call_args[0][1] == "rollout-abc"
1503
+ assert call_args[0][2] == "step 1 observation"
1504
+
1505
+ run.end()
1506
+
1507
+
1508
+ def test_log_environment_multiple_calls(mock_client):
1509
+ """Test log_environment() can be called multiple times for different rollouts."""
1510
+ _, client_instance = mock_client
1511
+
1512
+ run = Run(name="test-run", api_key="test-key", base_url="https://test.example.com")
1513
+
1514
+ run.log_environment("rollout-1", "log content 1")
1515
+ run.log_environment("rollout-2", "log content 2")
1516
+
1517
+ # Give worker time to process
1518
+ time.sleep(0.1)
1519
+
1520
+ assert client_instance.log_env_logs.call_count == 2
1521
+ first_call = client_instance.log_env_logs.call_args_list[0]
1522
+ second_call = client_instance.log_env_logs.call_args_list[1]
1523
+ assert first_call[0][1] == "rollout-1"
1524
+ assert first_call[0][2] == "log content 1"
1525
+ assert second_call[0][1] == "rollout-2"
1526
+ assert second_call[0][2] == "log content 2"
1527
+
1528
+ run.end()
1529
+
1530
+
1531
+ def test_log_environment_queue_full_handling(mock_client):
1532
+ """Test that queue full is handled gracefully for environment logs."""
1533
+ _, _ = mock_client
1534
+
1535
+ run = Run(name="test-run", api_key="test-key", base_url="https://test.example.com")
1536
+ run._queue = queue.Queue(maxsize=1)
1537
+
1538
+ with patch("expt_logger.run.logger") as mock_logger:
1539
+ run.log_environment("rollout-1", "log 1")
1540
+ run.log_environment("rollout-2", "log 2") # Should trigger queue full
1541
+
1542
+ assert mock_logger.warning.called
1543
+ warning_msg = mock_logger.warning.call_args[0][0]
1544
+ assert "queue full" in warning_msg.lower()
1545
+
1546
+ run.end()
1547
+
1548
+
1549
+ def test_log_environment_api_error_is_logged(mock_client):
1550
+ """Test that API errors from log_environment are caught and logged, not raised."""
1551
+ _, client_instance = mock_client
1552
+
1553
+ client_instance.log_env_logs.side_effect = APIError("Server error")
1554
+
1555
+ run = Run(name="test-run", api_key="test-key", base_url="https://test.example.com")
1556
+
1557
+ with patch("expt_logger.run.logger") as mock_logger:
1558
+ run.log_environment("rollout-abc", "some content")
1559
+
1560
+ # Give worker time to process and handle error
1561
+ time.sleep(0.2)
1562
+
1563
+ assert run._worker_thread is not None
1564
+ assert run._worker_thread.is_alive()
1565
+ assert mock_logger.error.called
1566
+
1567
+ run.end()
@@ -1,29 +0,0 @@
1
- {
2
- "permissions": {
3
- "allow": [
4
- "Bash(uv run mypy:*)",
5
- "Bash(uv run pytest:*)",
6
- "Bash(python -m pytest:*)",
7
- "Bash(python example_config_validation.py:*)",
8
- "Bash(uv run python:*)",
9
- "Bash(ls:*)",
10
- "Bash(curl:*)",
11
- "Bash(cat:*)",
12
- "Bash(EXPT_LOGGER_LOG_LEVEL=DEBUG uv run pytest:*)",
13
- "Bash(EXPT_LOGGER_API_KEY=test EXPT_LOGGER_BASE_URL=http://localhost:3000 uv run python:*)",
14
- "Bash(EXPT_LOGGER_API_KEY=test uv run python:*)",
15
- "Bash(uv sync:*)",
16
- "Bash(python -m json.tool:*)",
17
- "Bash(uv run ruff:*)",
18
- "Bash(find:*)",
19
- "Bash(uv add:*)",
20
- "Bash(python:*)",
21
- "Bash(EXPT_LOGGER_LOG_LEVEL=DEBUG uv run python:*)",
22
- "Bash(uv run:*)",
23
- "Bash(git restore:*)",
24
- "Bash(git checkout:*)",
25
- "Bash(git rebase:*)",
26
- "Bash(git add:*)"
27
- ]
28
- }
29
- }