PyPI - expt-logger - Versions diffs - 0.1.0.dev20__tar.gz → 0.1.0.dev22__tar.gz - Mend

expt-logger 0.1.0.dev20tar.gz → 0.1.0.dev22tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

{expt_logger-0.1.0.dev20 → expt_logger-0.1.0.dev22}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: expt-logger
-Version: 0.1.0.dev20
+Version: 0.1.0.dev22
 Summary: Simple experiment logging library
 Requires-Python: >=3.10
 Requires-Dist: requests>=2.31.0

{expt_logger-0.1.0.dev20 → expt_logger-0.1.0.dev22}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "expt-logger"
-version = "0.1.0.dev20"
+version = "0.1.0.dev22"
 description = "Simple experiment logging library"
 readme = "README.md"
 requires-python = ">=3.10"

{expt_logger-0.1.0.dev20 → expt_logger-0.1.0.dev22}/src/expt_logger/__init__.py RENAMED Viewed

@@ -159,19 +159,23 @@ def log_rollout(
 def log_environment(
     rollout_id: str,
     content: str,
+    k: int | None = None,
+    commit: bool = True,
 ) -> None:
     """Log an environment log entry associated with a rollout.
     Args:
         rollout_id: ID of the rollout this log entry is associated with
         content: Log content string
+        k: If set, commit only when the buffer has more than k env log entries.
+        commit: Whether to flush buffer after logging
     Raises:
         RuntimeError: If no active run exists
     """
     if _active_run is None:
         raise RuntimeError("No active run. Call init() first.")
-    _active_run.log_environment(rollout_id=rollout_id, content=content)
+    _active_run.log_environment(rollout_id=rollout_id, content=content, k=k, commit=commit)
 def log_error(

{expt_logger-0.1.0.dev20 → expt_logger-0.1.0.dev22}/src/expt_logger/buffer.py RENAMED Viewed

@@ -2,7 +2,7 @@
 import logging
-from expt_logger.types import ErrorItem, RolloutItem, ScalarItem
+from expt_logger.types import EnvLogItem, ErrorItem, RolloutItem, ScalarItem
 logger = logging.getLogger(__name__)
@@ -20,6 +20,7 @@ class Buffer:
         self._scalars: dict[str, float] = {}  # full_key (mode/metric) -> value
         self._rollouts: list[RolloutItem] = []
         self._errors: list[ErrorItem] = []
+        self._env_logs: list[EnvLogItem] = []
     def add_scalar(self, name: str, value: float, mode: str | None = None) -> None:
         """Add a scalar metric to the buffer.
@@ -88,7 +89,16 @@ class Buffer:
         """
         self._errors.append(error)
-    def get_and_clear(self) -> tuple[list[ScalarItem], list[RolloutItem], list[ErrorItem]]:
+    def add_env_log(self, rollout_id: str, content: str) -> None:
+        """Add an environment log entry to the buffer.
+        Args:
+            rollout_id: ID of the rollout this log entry is associated with
+            content: Log content string
+        """
+        self._env_logs.append({"rollout_id": rollout_id, "content": content})
+    def get_and_clear(self) -> tuple[list[ScalarItem], list[RolloutItem], list[ErrorItem], list[EnvLogItem]]:
         """Get all buffered data and clear the buffer.
         Returns:
@@ -110,12 +120,14 @@ class Buffer:
         rollouts = self._rollouts.copy()
         errors = self._errors.copy()
+        env_logs = self._env_logs.copy()
         self._scalars.clear()
         self._rollouts.clear()
         self._errors.clear()
+        self._env_logs.clear()
-        return scalar_items, rollouts, errors
+        return scalar_items, rollouts, errors, env_logs
     def is_empty(self) -> bool:
         """Check if buffer has any data.
@@ -123,4 +135,4 @@ class Buffer:
         Returns:
             True if buffer is empty, False otherwise
         """
-        return not self._scalars and not self._rollouts and not self._errors
+        return not self._scalars and not self._rollouts and not self._errors and not self._env_logs

{expt_logger-0.1.0.dev20 → expt_logger-0.1.0.dev22}/src/expt_logger/client.py RENAMED Viewed

@@ -7,7 +7,7 @@ from typing import Any
 import requests
 from expt_logger.exceptions import APIError, AuthenticationError
-from expt_logger.types import ErrorItem, RolloutItem, ScalarItem, ScalarValue
+from expt_logger.types import EnvLogItem, ErrorItem, RolloutItem, ScalarItem, ScalarValue
 logger = logging.getLogger(__name__)
@@ -283,21 +283,19 @@ class APIClient:
     def log_env_logs(
         self,
         experiment_id: str,
-        rollout_id: str,
-        content: str,
+        logs: list[EnvLogItem],
     ) -> None:
-        """Log an environment log entry for an experiment.
+        """Log environment log entries for an experiment.
         Args:
             experiment_id: Experiment ID
-            rollout_id: Rollout ID this log entry is associated with
-            content: Log content string
+            logs: List of env log entries with rollout_id and content
         Raises:
             APIError: If request fails
         """
         url = f"{self.base_url}/api/experiments/{experiment_id}/env-logs"
-        payload = {"rolloutId": rollout_id, "content": content}
+        payload = {"logs": [{"rolloutId": log["rollout_id"], "content": log["content"]} for log in logs]}
         self._request("POST", url, json=payload)
     def get_env_logs(

{expt_logger-0.1.0.dev20 → expt_logger-0.1.0.dev22}/src/expt_logger/run.py RENAMED Viewed

@@ -82,7 +82,9 @@ class Run:
             resolved_experiment_id = get_experiment_id(experiment_id, is_main_process=False)
             if resolved_experiment_id is not None:
                 self._experiment_id = resolved_experiment_id
-                self._validate_and_attach_experiment(config=config)
+                # Do not set the config if already attached to an experiment to avoid
+                # overwriting existing settings
+                self._validate_and_attach_experiment()
                 logger.info(
                     f"[expt_logger] Attached to experiment ID: {self._experiment_id} (subprocess)"
                 )
@@ -291,12 +293,16 @@ class Run:
         self,
         rollout_id: str,
         content: str,
+        k: int | None = None,
+        commit: bool = True,
     ) -> None:
         """Log an environment log entry associated with a rollout.
         Args:
             rollout_id: ID of the rollout this log entry is associated with
             content: Log content string
+            k: If set, commit when the buffer has more than k elements
+            commit: Whether to flush buffer after logging
         """
         env_cmd: LogEnvironmentCommand = {
             "rollout_id": rollout_id,
@@ -305,7 +311,12 @@ class Run:
         try:
             self._queue.put_nowait(("log_environment", env_cmd))
         except Full:
-            logger.warning(f"Command queue full, dropping environment log for rollout: {rollout_id}")
+            logger.warning(
+                f"Command queue full, dropping environment log for rollout: {rollout_id}"
+            )
+        if commit or (k is not None and len(self._buffer._env_logs) > k):
+            self.commit()
     def log_error(
         self,
@@ -569,10 +580,7 @@ class Run:
         Args:
             data: Log environment command data with rollout_id and content
         """
-        try:
-            self._client.log_env_logs(self._experiment_id, data["rollout_id"], data["content"])
-        except Exception as e:
-            logger.error(f"Error logging environment log: {e}", exc_info=True)
+        self._buffer.add_env_log(data["rollout_id"], data["content"])
     def _handle_log_error(self, data: LogErrorCommand) -> None:
         """Handle log_error command.
@@ -619,7 +627,7 @@ class Run:
         if self._buffer.is_empty():
             return
-        scalars, rollouts, errors = self._buffer.get_and_clear()
+        scalars, rollouts, errors, env_logs = self._buffer.get_and_clear()
         # Send scalars if any
         if scalars:
@@ -644,3 +652,11 @@ class Run:
                 logger.debug(f"Flushed {len(errors)} errors at step {self._step}")
             except Exception as e:
                 logger.error(f"Error logging errors: {e}", exc_info=True)
+        # Send env logs if any
+        if env_logs:
+            try:
+                self._client.log_env_logs(self._experiment_id, env_logs)
+                logger.debug(f"Flushed {len(env_logs)} environment logs")
+            except Exception as e:
+                logger.error(f"Error logging environment logs: {e}", exc_info=True)

{expt_logger-0.1.0.dev20 → expt_logger-0.1.0.dev22}/src/expt_logger/types.py RENAMED Viewed

@@ -49,6 +49,13 @@ class ErrorItem(TypedDict):
     traceback: str | None
+class EnvLogItem(TypedDict):
+    """An environment log item for API submission."""
+    rollout_id: str
+    content: str
 class ScalarValue(TypedDict):
     """A single scalar value at a specific step (used in GET responses)."""

{expt_logger-0.1.0.dev20 → expt_logger-0.1.0.dev22}/tests/test_buffer.py RENAMED Viewed

@@ -16,17 +16,18 @@ def buffer():
 def test_buffer_initialization(buffer):
     """Test buffer starts empty."""
     assert buffer.is_empty()
-    scalars, rollouts, errors = buffer.get_and_clear()
+    scalars, rollouts, errors, env_logs = buffer.get_and_clear()
     assert scalars == []
     assert rollouts == []
     assert errors == []
+    assert env_logs == []
 def test_add_scalar_simple(buffer):
     """Test adding a simple scalar metric."""
     buffer.add_scalar("loss", 0.5)
-    scalars, rollouts, errors = buffer.get_and_clear()
+    scalars, rollouts, errors, _ = buffer.get_and_clear()
     assert len(scalars) == 1
     assert scalars[0] == {"step": 0, "mode": "train", "name": "loss", "value": 0.5}
     assert rollouts == []
@@ -37,7 +38,7 @@ def test_add_scalar_with_mode_in_key(buffer):
     """Test adding scalar with mode already in key."""
     buffer.add_scalar("train/loss", 0.5)
-    scalars, rollouts, errors = buffer.get_and_clear()
+    scalars, rollouts, errors, _ = buffer.get_and_clear()
     assert len(scalars) == 1
     assert scalars[0] == {"step": 0, "mode": "train", "name": "loss", "value": 0.5}
@@ -46,7 +47,7 @@ def test_add_scalar_with_mode_parameter(buffer):
     """Test adding scalar with explicit mode parameter."""
     buffer.add_scalar("loss", 0.5, mode="eval")
-    scalars, rollouts, errors = buffer.get_and_clear()
+    scalars, rollouts, errors, _ = buffer.get_and_clear()
     assert len(scalars) == 1
     assert scalars[0] == {"step": 0, "mode": "eval", "name": "loss", "value": 0.5}
@@ -57,7 +58,7 @@ def test_add_scalar_mode_conflict_key_wins(buffer, caplog):
         buffer.add_scalar("train/loss", 0.5, mode="eval")
     # Should use "eval" mode from parameter, with full "train/loss" as the name
-    scalars, _, _ = buffer.get_and_clear()
+    scalars, _, _, _ = buffer.get_and_clear()
     assert len(scalars) == 1
     assert scalars[0] == {"step": 0, "mode": "eval", "name": "train/loss", "value": 0.5}
@@ -70,7 +71,7 @@ def test_add_scalar_mode_no_conflict(buffer, caplog):
     with caplog.at_level(logging.WARNING):
         buffer.add_scalar("train/loss", 0.5, mode="train")
-    scalars, _, _ = buffer.get_and_clear()
+    scalars, _, _, _ = buffer.get_and_clear()
     assert len(scalars) == 1
     assert scalars[0] == {"step": 0, "mode": "train", "name": "loss", "value": 0.5}
@@ -84,7 +85,7 @@ def test_add_multiple_scalars(buffer):
     buffer.add_scalar("accuracy", 0.9)
     buffer.add_scalar("eval/loss", 0.6)
-    scalars, _, _ = buffer.get_and_clear()
+    scalars, _, _, _ = buffer.get_and_clear()
     assert len(scalars) == 3
     # Convert to dict for easier comparison
     scalars_dict = {f"{s['mode']}/{s['name']}": s["value"] for s in scalars}
@@ -101,7 +102,7 @@ def test_add_scalar_last_write_wins(buffer, caplog):
         buffer.add_scalar("loss", 0.5)
         buffer.add_scalar("loss", 0.3)  # Overwrites
-    scalars, _, _ = buffer.get_and_clear()
+    scalars, _, _, _ = buffer.get_and_clear()
     assert len(scalars) == 1
     assert scalars[0] == {"step": 0, "mode": "train", "name": "loss", "value": 0.3}
@@ -117,7 +118,7 @@ def test_add_scalar_last_write_wins_different_modes(buffer):
     buffer.add_scalar("loss", 0.5, mode="train")
     buffer.add_scalar("loss", 0.6, mode="eval")
-    scalars, _, _ = buffer.get_and_clear()
+    scalars, _, _, _ = buffer.get_and_clear()
     assert len(scalars) == 2
     # Convert to dict for easier comparison
     scalars_dict = {f"{s['mode']}/{s['name']}": s["value"] for s in scalars}
@@ -139,7 +140,7 @@ def test_add_rollout(buffer):
     buffer.add_rollout(rollout)
-    scalars, rollouts, errors = buffer.get_and_clear()
+    scalars, rollouts, errors, _ = buffer.get_and_clear()
     assert scalars == []
     assert len(rollouts) == 1
     assert rollouts[0] == rollout
@@ -165,7 +166,7 @@ def test_add_multiple_rollouts(buffer):
     buffer.add_rollout(rollout1)
     buffer.add_rollout(rollout2)
-    _, rollouts, _ = buffer.get_and_clear()
+    _, rollouts, _, _ = buffer.get_and_clear()
     assert len(rollouts) == 2
     assert rollouts[0] == rollout1
     assert rollouts[1] == rollout2
@@ -185,7 +186,7 @@ def test_mixed_scalars_and_rollouts(buffer):
     )
     buffer.add_scalar("accuracy", 0.9)
-    scalars, rollouts, errors = buffer.get_and_clear()
+    scalars, rollouts, errors, _ = buffer.get_and_clear()
     assert len(scalars) == 2
     scalars_dict = {f"{s['mode']}/{s['name']}": s["value"] for s in scalars}
     assert scalars_dict == {"train/loss": 0.5, "train/accuracy": 0.9}
@@ -208,7 +209,7 @@ def test_get_and_clear_empties_buffer(buffer):
     assert not buffer.is_empty()
     # First call returns data
-    scalars1, rollouts1, errors1 = buffer.get_and_clear()
+    scalars1, rollouts1, errors1, _ = buffer.get_and_clear()
     assert len(scalars1) > 0
     assert len(rollouts1) > 0
@@ -216,7 +217,7 @@ def test_get_and_clear_empties_buffer(buffer):
     assert buffer.is_empty()
     # Second call returns empty
-    scalars2, rollouts2, errors2 = buffer.get_and_clear()
+    scalars2, rollouts2, errors2, _ = buffer.get_and_clear()
     assert scalars2 == []
     assert rollouts2 == []
     assert errors2 == []
@@ -226,13 +227,13 @@ def test_get_and_clear_returns_copy(buffer):
     """Test that get_and_clear returns a copy, not reference."""
     buffer.add_scalar("loss", 0.5)
-    scalars1, _, _ = buffer.get_and_clear()
+    scalars1, _, _, _ = buffer.get_and_clear()
     # Modify returned list
     scalars1.append({"step": 999, "mode": "test", "name": "modified", "value": 999})
     # Add new data
     buffer.add_scalar("accuracy", 0.9)
-    scalars2, _, _ = buffer.get_and_clear()
+    scalars2, _, _, _ = buffer.get_and_clear()
     # Should not contain the modification
     assert len(scalars2) == 1
@@ -274,7 +275,7 @@ def test_metric_key_with_multiple_slashes(buffer):
     # Edge case: what if key has multiple slashes?
     buffer.add_scalar("train/sub/metric", 0.5)
-    scalars, _, _ = buffer.get_and_clear()
+    scalars, _, _, _ = buffer.get_and_clear()
     # Should split on first slash only
     assert len(scalars) == 1
     assert scalars[0] == {"step": 0, "mode": "train", "name": "sub/metric", "value": 0.5}
@@ -285,7 +286,7 @@ def test_default_mode_is_train(buffer):
     buffer.add_scalar("loss", 0.5)
     buffer.add_scalar("accuracy", 0.9)
-    scalars, _, _ = buffer.get_and_clear()
+    scalars, _, _, _ = buffer.get_and_clear()
     assert all(s["mode"] == "train" for s in scalars)
@@ -293,7 +294,7 @@ def test_mode_provided_strips_matching_prefix(buffer):
     """Test that when mode is provided and matches key prefix, the prefix is stripped."""
     buffer.add_scalar("train/loss", 0.5, mode="train")
-    scalars, _, _ = buffer.get_and_clear()
+    scalars, _, _, _ = buffer.get_and_clear()
     # Should be train/loss (prefix stripped)
     assert len(scalars) == 1
     assert scalars[0] == {"step": 0, "mode": "train", "name": "loss", "value": 0.5}
@@ -303,7 +304,7 @@ def test_mode_provided_keeps_mismatched_prefix(buffer):
     """Test that when mode is provided and doesn't match key prefix, full key is kept."""
     buffer.add_scalar("train/loss", 0.5, mode="eval")
-    scalars, _, _ = buffer.get_and_clear()
+    scalars, _, _, _ = buffer.get_and_clear()
     # Should be eval/train/loss (full key kept as name)
     assert len(scalars) == 1
     assert scalars[0] == {"step": 0, "mode": "eval", "name": "train/loss", "value": 0.5}
@@ -313,7 +314,7 @@ def test_mode_not_provided_extracts_from_key(buffer):
     """Test that when mode is not provided, it's extracted from key prefix."""
     buffer.add_scalar("eval/accuracy", 0.95)
-    scalars, _, _ = buffer.get_and_clear()
+    scalars, _, _, _ = buffer.get_and_clear()
     # Should be eval/accuracy (mode extracted, name is accuracy)
     assert len(scalars) == 1
     assert scalars[0] == {"step": 0, "mode": "eval", "name": "accuracy", "value": 0.95}
@@ -323,7 +324,7 @@ def test_mode_not_provided_simple_key_defaults_train(buffer):
     """Test that simple keys without mode default to train mode."""
     buffer.add_scalar("loss", 0.5)
-    scalars, _, _ = buffer.get_and_clear()
+    scalars, _, _, _ = buffer.get_and_clear()
     # Should be train/loss (default mode)
     assert len(scalars) == 1
     assert scalars[0] == {"step": 0, "mode": "train", "name": "loss", "value": 0.5}
@@ -342,7 +343,7 @@ def test_add_error(buffer):
     buffer.add_error(error)
-    scalars, rollouts, errors = buffer.get_and_clear()
+    scalars, rollouts, errors, _ = buffer.get_and_clear()
     assert scalars == []
     assert rollouts == []
     assert len(errors) == 1
@@ -369,7 +370,7 @@ def test_add_multiple_errors(buffer):
     buffer.add_error(error1)
     buffer.add_error(error2)
-    _, _, errors = buffer.get_and_clear()
+    _, _, errors, _ = buffer.get_and_clear()
     assert len(errors) == 2
     assert errors[0] == error1
     assert errors[1] == error2
@@ -398,7 +399,7 @@ def test_mixed_scalars_rollouts_and_errors(buffer):
     )
     buffer.add_scalar("accuracy", 0.9)
-    scalars, rollouts, errors = buffer.get_and_clear()
+    scalars, rollouts, errors, _ = buffer.get_and_clear()
     assert len(scalars) == 2
     assert len(rollouts) == 1
     assert len(errors) == 1
@@ -438,13 +439,58 @@ def test_get_and_clear_empties_errors(buffer):
     assert not buffer.is_empty()
     # First call returns data
-    _, _, errors1 = buffer.get_and_clear()
+    _, _, errors1, _ = buffer.get_and_clear()
     assert len(errors1) > 0
     # Buffer should now be empty
     assert buffer.is_empty()
     # Second call returns empty
-    _, _, errors2 = buffer.get_and_clear()
+    _, _, errors2, _ = buffer.get_and_clear()
     assert errors2 == []
+def test_add_env_log(buffer):
+    """Test adding an environment log entry."""
+    buffer.add_env_log("rollout-1", "step 1 observation")
+    _, _, _, env_logs = buffer.get_and_clear()
+    assert len(env_logs) == 1
+    assert env_logs[0] == {"rollout_id": "rollout-1", "content": "step 1 observation"}
+def test_add_multiple_env_logs(buffer):
+    """Test adding multiple environment log entries."""
+    buffer.add_env_log("rollout-1", "step 1")
+    buffer.add_env_log("rollout-1", "step 2")
+    buffer.add_env_log("rollout-2", "other rollout")
+    _, _, _, env_logs = buffer.get_and_clear()
+    assert len(env_logs) == 3
+    assert env_logs[0] == {"rollout_id": "rollout-1", "content": "step 1"}
+    assert env_logs[1] == {"rollout_id": "rollout-1", "content": "step 2"}
+    assert env_logs[2] == {"rollout_id": "rollout-2", "content": "other rollout"}
+def test_is_empty_with_only_env_logs(buffer):
+    """Test is_empty with only env logs."""
+    assert buffer.is_empty()
+    buffer.add_env_log("rollout-1", "some content")
+    assert not buffer.is_empty()
+    buffer.get_and_clear()
+    assert buffer.is_empty()
+def test_get_and_clear_empties_env_logs(buffer):
+    """Test that get_and_clear empties env logs from buffer."""
+    buffer.add_env_log("rollout-1", "content")
+    _, _, _, env_logs1 = buffer.get_and_clear()
+    assert len(env_logs1) == 1
+    assert buffer.is_empty()
+    _, _, _, env_logs2 = buffer.get_and_clear()
+    assert env_logs2 == []

{expt_logger-0.1.0.dev20 → expt_logger-0.1.0.dev22}/tests/test_client.py RENAMED Viewed

@@ -295,15 +295,24 @@ def test_log_rollouts(client):
 def test_log_env_logs(client):
-    """Test logging environment logs."""
+    """Test logging environment logs as a batch."""
+    logs = [
+        {"rollout_id": "rollout-456", "content": "step 1 observation"},
+        {"rollout_id": "rollout-456", "content": "step 2 observation"},
+    ]
     with patch.object(client, "_request") as mock_request:
-        client.log_env_logs("exp-123", "rollout-456", "step 1 observation")
+        client.log_env_logs("exp-123", logs)
         mock_request.assert_called_once()
         call_args = mock_request.call_args
         assert call_args[0][0] == "POST"
         assert call_args[0][1] == "https://test.example.com/api/experiments/exp-123/env-logs"
-        assert call_args[1]["json"] == {"rolloutId": "rollout-456", "content": "step 1 observation"}
+        assert call_args[1]["json"] == {
+            "logs": [
+                {"rolloutId": "rollout-456", "content": "step 1 observation"},
+                {"rolloutId": "rollout-456", "content": "step 2 observation"},
+            ]
+        }
 def test_log_errors(client):

{expt_logger-0.1.0.dev20 → expt_logger-0.1.0.dev22}/tests/test_client_integration.py RENAMED Viewed

@@ -176,7 +176,7 @@ class TestAPIClientIntegration:
         response = requests.get(
             f"{client.base_url}/api/experiments/{experiment_id}/errors",
             params={"mode": "train"},
-            headers={"x-api-key": client.api_key},
+            headers={"Authorization": f"Bearer {client.api_key}"},
         )
         assert response.status_code == 200
@@ -370,7 +370,7 @@ class TestAPIClientIntegration:
         errors_response = requests.get(
             f"{client.base_url}/api/experiments/{experiment_id}/errors",
             params={"mode": "train"},
-            headers={"x-api-key": client.api_key},
+            headers={"Authorization": f"Bearer {client.api_key}"},
         )
         assert errors_response.status_code == 200

{expt_logger-0.1.0.dev20 → expt_logger-0.1.0.dev22}/tests/test_integration_e2e.py RENAMED Viewed

@@ -1096,7 +1096,7 @@ class TestEnvironmentLogs:
         )
         cleanup_experiments.append(run._experiment_id)
-        expt_logger.log_environment("00000000-0000-0000-0000-000000000001", "observation: step=1, reward=0.9")
+        expt_logger.log_environment("00000000-0000-0000-0000-000000000001", "observation: step=1, reward=0.9", k=-1)
         time.sleep(0.5)
         env_logs = fetch_env_logs(run._experiment_id, "00000000-0000-0000-0000-000000000001")
@@ -1120,9 +1120,9 @@ class TestEnvironmentLogs:
         )
         cleanup_experiments.append(run._experiment_id)
-        expt_logger.log_environment("00000000-0000-0000-0000-000000000002", "step 1: action=left")
-        expt_logger.log_environment("00000000-0000-0000-0000-000000000002", "step 2: action=right")
-        expt_logger.log_environment("00000000-0000-0000-0000-000000000002", "step 3: action=jump")
+        expt_logger.log_environment("00000000-0000-0000-0000-000000000002", "step 1: action=left", commit=True)
+        expt_logger.log_environment("00000000-0000-0000-0000-000000000002", "step 2: action=right", commit=True)
+        expt_logger.log_environment("00000000-0000-0000-0000-000000000002", "step 3: action=jump", commit=True)
         time.sleep(0.5)
         env_logs = fetch_env_logs(run._experiment_id, "00000000-0000-0000-0000-000000000002")
@@ -1147,8 +1147,8 @@ class TestEnvironmentLogs:
         )
         cleanup_experiments.append(run._experiment_id)
-        expt_logger.log_environment("00000000-0000-0000-0000-00000000003a", "log for rollout A")
-        expt_logger.log_environment("00000000-0000-0000-0000-00000000003b", "log for rollout B")
+        expt_logger.log_environment("00000000-0000-0000-0000-00000000003a", "log for rollout A", commit=True)
+        expt_logger.log_environment("00000000-0000-0000-0000-00000000003b", "log for rollout B", commit=True)
         time.sleep(0.5)
         logs_a = fetch_env_logs(run._experiment_id, "00000000-0000-0000-0000-00000000003a")
@@ -1160,6 +1160,50 @@ class TestEnvironmentLogs:
         expt_logger.end()
+    def test_log_environment_k_threshold_batching(
+        self,
+        shared_api_key: str,
+        base_url: str,
+        cleanup_experiments: list[str],
+        fetch_env_logs,
+    ) -> None:
+        """Test that k parameter batches env logs and flushes only when threshold is exceeded.
+        With k=2, the flush triggers when the main thread calls log_environment and
+        finds > 2 entries already in the worker's buffer. So we need k+2 total calls:
+        the first k+1 accumulate in the buffer, and the (k+2)th call detects the overflow
+        and triggers a commit.
+        """
+        k = 2
+        rollout_id = "00000000-0000-0000-0000-00000000005a"
+        run = expt_logger.init(
+            name="test-env-log-k-threshold",
+            api_key=shared_api_key,
+            base_url=base_url,
+        )
+        cleanup_experiments.append(run._experiment_id)
+        # Log k+1 items without committing; sleep to let the worker process them into the buffer
+        for i in range(k + 1):
+            expt_logger.log_environment(rollout_id, f"step {i}", commit=False, k=k)
+        time.sleep(0.3)
+        # Nothing should be on the server yet — threshold not exceeded in the main thread's view
+        env_logs = fetch_env_logs(run._experiment_id, rollout_id)
+        assert len(env_logs) == 0, f"Expected 0 logs before threshold, got {len(env_logs)}"
+        # One more call: the main thread now sees k+1 items in the buffer (> k) and commits
+        expt_logger.log_environment(rollout_id, f"step {k + 1}", commit=False, k=k)
+        time.sleep(0.5)
+        # All k+2 logs should now be on the server
+        env_logs = fetch_env_logs(run._experiment_id, rollout_id)
+        assert len(env_logs) == k + 2
+        contents = {log["content"] for log in env_logs}
+        assert contents == {f"step {i}" for i in range(k + 2)}
+        expt_logger.end()
     def test_log_environment_multiline_content(
         self,
         shared_api_key: str,
@@ -1176,7 +1220,7 @@ class TestEnvironmentLogs:
         cleanup_experiments.append(run._experiment_id)
         content = "obs: {x: 1.0, y: 2.0}\nreward: 0.5\ndone: false\ninfo: {step: 10}"
-        expt_logger.log_environment("00000000-0000-0000-0000-000000000004", content)
+        expt_logger.log_environment("00000000-0000-0000-0000-000000000004", content, commit=True)
         time.sleep(0.5)
         env_logs = fetch_env_logs(run._experiment_id, "00000000-0000-0000-0000-000000000004")

{expt_logger-0.1.0.dev20 → expt_logger-0.1.0.dev22}/tests/test_run.py RENAMED Viewed

@@ -1486,47 +1486,37 @@ def test_log_error_with_invalid_mode_empty(mock_client):
 def test_log_environment_calls_client(mock_client):
-    """Test log_environment() calls client with correct args."""
+    """Test log_environment() flushes to client on commit."""
     _, client_instance = mock_client
     run = Run(name="test-run", api_key="test-key", base_url="https://test.example.com")
     run.log_environment("rollout-abc", "step 1 observation")
-    # Give worker time to process
-    time.sleep(0.1)
+    run.end()
     assert client_instance.log_env_logs.called
     call_args = client_instance.log_env_logs.call_args
     assert call_args[0][0] == "test-exp-id"
-    assert call_args[0][1] == "rollout-abc"
-    assert call_args[0][2] == "step 1 observation"
-    run.end()
+    assert call_args[0][1] == [{"rollout_id": "rollout-abc", "content": "step 1 observation"}]
 def test_log_environment_multiple_calls(mock_client):
-    """Test log_environment() can be called multiple times for different rollouts."""
+    """Test multiple log_environment() calls are batched into a single client call."""
     _, client_instance = mock_client
     run = Run(name="test-run", api_key="test-key", base_url="https://test.example.com")
     run.log_environment("rollout-1", "log content 1")
     run.log_environment("rollout-2", "log content 2")
-    # Give worker time to process
-    time.sleep(0.1)
-    assert client_instance.log_env_logs.call_count == 2
-    first_call = client_instance.log_env_logs.call_args_list[0]
-    second_call = client_instance.log_env_logs.call_args_list[1]
-    assert first_call[0][1] == "rollout-1"
-    assert first_call[0][2] == "log content 1"
-    assert second_call[0][1] == "rollout-2"
-    assert second_call[0][2] == "log content 2"
     run.end()
+    assert client_instance.log_env_logs.call_count == 1
+    call_args = client_instance.log_env_logs.call_args
+    assert call_args[0][1] == [
+        {"rollout_id": "rollout-1", "content": "log content 1"},
+        {"rollout_id": "rollout-2", "content": "log content 2"},
+    ]
 def test_log_environment_queue_full_handling(mock_client):
     """Test that queue full is handled gracefully for environment logs."""
@@ -1556,12 +1546,8 @@ def test_log_environment_api_error_is_logged(mock_client):
     with patch("expt_logger.run.logger") as mock_logger:
         run.log_environment("rollout-abc", "some content")
-        # Give worker time to process and handle error
-        time.sleep(0.2)
+        run.end()
         assert run._worker_thread is not None
-        assert run._worker_thread.is_alive()
+        assert not run._worker_thread.is_alive()
         assert mock_logger.error.called
-    run.end()