plato-sdk-v2 2.3.4__py3-none-any.whl → 2.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
plato/agents/runner.py CHANGED
@@ -10,7 +10,10 @@ import platform
10
10
  import tempfile
11
11
  from pathlib import Path
12
12
 
13
- from plato.agents.logging import log_event, span, upload_artifacts
13
+ from opentelemetry import trace
14
+
15
+ from plato.agents.artifacts import upload_artifacts
16
+ from plato.agents.otel import get_tracer
14
17
 
15
18
  logger = logging.getLogger(__name__)
16
19
 
@@ -38,20 +41,32 @@ async def run_agent(
38
41
  logs_dir = logs_dir or tempfile.mkdtemp(prefix="agent_logs_")
39
42
  agent_name = image.split("/")[-1].split(":")[0]
40
43
 
41
- async with span(agent_name, span_type="agent", source="agent") as agent_span:
42
- agent_span.log(f"Starting agent: {agent_name} ({image})")
44
+ # Get session info from environment variables
45
+ session_id = os.environ.get("SESSION_ID")
46
+ otel_url = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
47
+ upload_url = os.environ.get("UPLOAD_URL")
48
+
49
+ tracer = get_tracer("plato.agent")
50
+
51
+ with tracer.start_as_current_span(agent_name) as agent_span:
52
+ agent_span.set_attribute("span.type", "agent")
53
+ agent_span.set_attribute("source", "agent")
54
+ agent_span.set_attribute("image", image)
55
+ agent_span.set_attribute("content", f"Starting agent: {agent_name}")
43
56
 
44
57
  # Pull image if requested
45
58
  if pull:
46
- agent_span.log(f"Pulling image: {image}")
47
- pull_proc = await asyncio.create_subprocess_exec(
48
- "docker",
49
- "pull",
50
- image,
51
- stdout=asyncio.subprocess.PIPE,
52
- stderr=asyncio.subprocess.STDOUT,
53
- )
54
- await pull_proc.wait()
59
+ with tracer.start_as_current_span("docker_pull") as pull_span:
60
+ pull_span.set_attribute("span.type", "docker_pull")
61
+ pull_span.set_attribute("image", image)
62
+ pull_proc = await asyncio.create_subprocess_exec(
63
+ "docker",
64
+ "pull",
65
+ image,
66
+ stdout=asyncio.subprocess.PIPE,
67
+ stderr=asyncio.subprocess.STDOUT,
68
+ )
69
+ await pull_proc.wait()
55
70
 
56
71
  # Setup
57
72
  os.makedirs(os.path.join(logs_dir, "agent"), exist_ok=True)
@@ -63,9 +78,7 @@ async def run_agent(
63
78
  # Build docker command
64
79
  docker_cmd = ["docker", "run", "--rm"]
65
80
 
66
- # Determine if we need host networking:
67
- # - Required on Linux without iptables for connectivity
68
- # - Skip on macOS where --network=host doesn't work properly
81
+ # Determine if we need host networking
69
82
  use_host_network = False
70
83
  is_macos = platform.system() == "Darwin"
71
84
 
@@ -103,68 +116,200 @@ async def run_agent(
103
116
  ]
104
117
  )
105
118
 
119
+ # Pass session info to agent
120
+ if otel_url:
121
+ docker_cmd.extend(["-e", f"OTEL_EXPORTER_OTLP_ENDPOINT={otel_url}"])
122
+ # Use JSON protocol (not protobuf) for OTLP exports
123
+ docker_cmd.extend(["-e", "OTEL_EXPORTER_OTLP_PROTOCOL=http/json"])
124
+ if session_id:
125
+ docker_cmd.extend(["-e", f"SESSION_ID={session_id}"])
126
+ if upload_url:
127
+ docker_cmd.extend(["-e", f"UPLOAD_URL={upload_url}"])
128
+
129
+ # Pass trace context to agent for parent linking
130
+ current_span = trace.get_current_span()
131
+ span_context = current_span.get_span_context()
132
+ if span_context.is_valid:
133
+ trace_id = format(span_context.trace_id, "032x")
134
+ span_id = format(span_context.span_id, "016x")
135
+ docker_cmd.extend(
136
+ [
137
+ "-e",
138
+ f"OTEL_TRACE_ID={trace_id}",
139
+ "-e",
140
+ f"OTEL_PARENT_SPAN_ID={span_id}",
141
+ ]
142
+ )
143
+
106
144
  for key, value in secrets.items():
107
145
  docker_cmd.extend(["-e", f"{key.upper()}={value}"])
108
146
 
109
147
  docker_cmd.append(image)
110
148
 
111
- # Pass instruction via CLI arg (agents expect --instruction flag)
149
+ # Pass instruction via CLI arg
112
150
  docker_cmd.extend(["--instruction", instruction])
113
151
 
114
152
  # Run container and stream output
115
- process = await asyncio.create_subprocess_exec(
116
- *docker_cmd,
117
- stdout=asyncio.subprocess.PIPE,
118
- stderr=asyncio.subprocess.STDOUT,
119
- )
153
+ with tracer.start_as_current_span("agent_execution") as exec_span:
154
+ exec_span.set_attribute("span.type", "agent_execution")
155
+ exec_span.set_attribute("content", f"Running {agent_name}")
120
156
 
121
- # Stream output line by line, collecting for error reporting
122
- output_lines: list[str] = []
123
- assert process.stdout is not None
124
- while True:
125
- line = await process.stdout.readline()
126
- if not line:
127
- break
128
- decoded_line = line.decode().rstrip()
129
- output_lines.append(decoded_line)
130
- logger.info(f"[agent] {decoded_line}")
131
-
132
- await process.wait()
133
-
134
- if process.returncode != 0:
135
- # Get last N lines of output for error context
136
- error_context = "\n".join(output_lines[-50:]) if output_lines else "No output captured"
137
-
138
- # Log error event with container output
139
- await log_event(
140
- span_type="error",
141
- content=f"Agent failed with exit code {process.returncode}",
142
- source="agent",
143
- extra={
144
- "exit_code": process.returncode,
145
- "image": image,
146
- "agent_name": agent_name,
147
- "output": error_context,
148
- "output_line_count": len(output_lines),
149
- },
157
+ process = await asyncio.create_subprocess_exec(
158
+ *docker_cmd,
159
+ stdout=asyncio.subprocess.PIPE,
160
+ stderr=asyncio.subprocess.STDOUT,
150
161
  )
151
162
 
152
- agent_span.set_extra(
153
- {
154
- "error": True,
155
- "exit_code": process.returncode,
156
- "output": error_context,
157
- }
158
- )
163
+ # Stream output line by line
164
+ output_lines: list[str] = []
165
+ turn_count = 0
166
+ assert process.stdout is not None
167
+ while True:
168
+ line = await process.stdout.readline()
169
+ if not line:
170
+ break
171
+ decoded_line = line.decode().rstrip()
172
+ output_lines.append(decoded_line)
173
+
174
+ # Try to parse JSON output from agent for structured trajectory spans
175
+ try:
176
+ data = json.loads(decoded_line)
177
+ event_type = data.get("type", "")
178
+
179
+ if event_type == "assistant":
180
+ # Agent response - create a turn span
181
+ turn_count += 1
182
+ msg = data.get("message", {})
183
+ content_items = msg.get("content", [])
184
+
185
+ # Extract text and tool calls with full details
186
+ text_parts = []
187
+ tool_calls = []
188
+ for item in content_items:
189
+ if item.get("type") == "text":
190
+ text_parts.append(item.get("text", "")[:2000])
191
+ elif item.get("type") == "tool_use":
192
+ tool_input = item.get("input", {})
193
+ # Truncate large inputs
194
+ input_str = json.dumps(tool_input) if tool_input else ""
195
+ if len(input_str) > 2000:
196
+ input_str = input_str[:2000] + "..."
197
+ tool_calls.append(
198
+ {
199
+ "tool": item.get("name"),
200
+ "id": item.get("id"),
201
+ "input": input_str,
202
+ }
203
+ )
204
+
205
+ with tracer.start_as_current_span(f"turn_{turn_count}") as turn_span:
206
+ turn_span.set_attribute("span.type", "agent_turn")
207
+ turn_span.set_attribute("source", "agent")
208
+ turn_span.set_attribute("turn_number", turn_count)
209
+ turn_span.set_attribute("model", msg.get("model", "unknown"))
159
210
 
160
- raise RuntimeError(f"Agent failed with exit code {process.returncode}")
211
+ if text_parts:
212
+ turn_span.set_attribute("content", "\n".join(text_parts)[:4000])
213
+ if tool_calls:
214
+ turn_span.set_attribute("tool_calls", json.dumps(tool_calls))
215
+ # If no text content, show tool calls summary
216
+ if not text_parts:
217
+ turn_span.set_attribute(
218
+ "content", f"Tool calls: {', '.join(t['tool'] for t in tool_calls)}"
219
+ )
161
220
 
162
- agent_span.log("Agent completed successfully")
221
+ # Usage info
222
+ usage = msg.get("usage", {})
223
+ if usage:
224
+ turn_span.set_attribute("input_tokens", usage.get("input_tokens", 0))
225
+ turn_span.set_attribute("output_tokens", usage.get("output_tokens", 0))
226
+
227
+ elif event_type == "user":
228
+ # Tool result
229
+ tool_results = data.get("message", {}).get("content", [])
230
+ for result in tool_results:
231
+ if result.get("type") == "tool_result":
232
+ tool_id = result.get("tool_use_id", "")
233
+ content = result.get("content", "")
234
+ # Handle content that might be a list of content blocks
235
+ if isinstance(content, list):
236
+ text_parts = []
237
+ for item in content:
238
+ if isinstance(item, dict) and item.get("type") == "text":
239
+ text_parts.append(item.get("text", ""))
240
+ elif isinstance(item, str):
241
+ text_parts.append(item)
242
+ content = "\n".join(text_parts)
243
+ if isinstance(content, str):
244
+ content = content[:2000] # Truncate large results
245
+ with tracer.start_as_current_span("tool_result") as tr_span:
246
+ tr_span.set_attribute("span.type", "tool_result")
247
+ tr_span.set_attribute("source", "agent")
248
+ tr_span.set_attribute("tool_use_id", tool_id)
249
+ tr_span.set_attribute("content", f"Tool result for {tool_id}")
250
+ tr_span.set_attribute("result", content if content else "")
251
+
252
+ elif event_type == "result":
253
+ # Final result
254
+ result_text = data.get("result", "")[:1000]
255
+ is_error = data.get("is_error", False)
256
+ duration_ms = data.get("duration_ms", 0)
257
+ total_cost = data.get("total_cost_usd", 0)
258
+
259
+ with tracer.start_as_current_span("agent_result") as res_span:
260
+ res_span.set_attribute("span.type", "agent_result")
261
+ res_span.set_attribute("source", "agent")
262
+ res_span.set_attribute("content", result_text if result_text else "Agent completed")
263
+ res_span.set_attribute("is_error", is_error)
264
+ res_span.set_attribute("duration_ms", duration_ms)
265
+ res_span.set_attribute("total_cost_usd", total_cost)
266
+ res_span.set_attribute("num_turns", data.get("num_turns", turn_count))
267
+
268
+ elif event_type == "system" and data.get("subtype") == "init":
269
+ # Agent initialization
270
+ with tracer.start_as_current_span("agent_init") as init_span:
271
+ init_span.set_attribute("span.type", "agent_init")
272
+ init_span.set_attribute("source", "agent")
273
+ init_span.set_attribute("model", data.get("model", "unknown"))
274
+ init_span.set_attribute("tools", json.dumps(data.get("tools", [])))
275
+ init_span.set_attribute("content", f"Agent initialized: {data.get('model', 'unknown')}")
276
+
277
+ else:
278
+ # Other output - just log it without creating a span
279
+ logger.debug(f"[agent] {decoded_line}")
280
+ continue
281
+
282
+ except json.JSONDecodeError:
283
+ # Not JSON - just log it
284
+ logger.info(f"[agent] {decoded_line}")
285
+
286
+ await process.wait()
287
+
288
+ exit_code = process.returncode or 0
289
+ if exit_code != 0:
290
+ error_context = "\n".join(output_lines[-50:]) if output_lines else "No output captured"
291
+
292
+ exec_span.set_attribute("error", True)
293
+ exec_span.set_attribute("exit_code", exit_code)
294
+ exec_span.add_event(
295
+ "agent_error",
296
+ {
297
+ "exit_code": exit_code,
298
+ "output": error_context[:4000],
299
+ },
300
+ )
301
+
302
+ agent_span.set_attribute("error", True)
303
+ agent_span.set_attribute("exit_code", exit_code)
304
+
305
+ raise RuntimeError(f"Agent failed with exit code {exit_code}")
306
+
307
+ exec_span.set_attribute("success", True)
163
308
 
164
309
  finally:
165
310
  os.unlink(config_file.name)
166
311
 
167
- # Load trajectory and add to span
312
+ # Load trajectory and log as event
168
313
  trajectory_path = Path(logs_dir) / "agent" / "trajectory.json"
169
314
  if trajectory_path.exists():
170
315
  try:
@@ -178,14 +323,24 @@ async def run_agent(
178
323
  agent_data["extra"] = extra
179
324
  trajectory["agent"] = agent_data
180
325
 
181
- # Log trajectory as separate event
182
- await log_event(
183
- span_type="trajectory",
184
- log_type="atif",
185
- extra=trajectory,
186
- source="agent",
187
- )
326
+ # Log trajectory as span event
327
+ with tracer.start_as_current_span("trajectory") as traj_span:
328
+ traj_span.set_attribute("span.type", "trajectory")
329
+ traj_span.set_attribute("log_type", "atif")
330
+ traj_span.set_attribute("source", "agent")
331
+ # Store trajectory in span (truncated for OTel limits)
332
+ traj_json = json.dumps(trajectory)
333
+ if len(traj_json) > 10000:
334
+ traj_span.set_attribute("trajectory_truncated", True)
335
+ traj_span.set_attribute("trajectory_size", len(traj_json))
336
+ else:
337
+ traj_span.set_attribute("trajectory", traj_json)
188
338
  except Exception as e:
189
339
  logger.warning(f"Failed to load trajectory: {e}")
190
340
 
191
- await upload_artifacts(logs_dir)
341
+ # Upload artifacts if we have upload URL configured
342
+ if upload_url:
343
+ await upload_artifacts(upload_url, logs_dir)
344
+
345
+ agent_span.set_attribute("success", True)
346
+ agent_span.set_attribute("content", f"Agent {agent_name} completed successfully")
plato/worlds/README.md CHANGED
@@ -28,7 +28,8 @@ Create a JSON config file:
28
28
  },
29
29
  "git_token": "ghp_...",
30
30
  "session_id": "local-test-001",
31
- "callback_url": ""
31
+ "otel_url": "",
32
+ "upload_url": ""
32
33
  }
33
34
  ```
34
35
 
plato/worlds/base.py CHANGED
@@ -3,6 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import logging
6
+ import os
6
7
  import subprocess
7
8
  from abc import ABC, abstractmethod
8
9
  from pathlib import Path
@@ -16,12 +17,14 @@ if TYPE_CHECKING:
16
17
  from plato.v2.async_.environment import Environment
17
18
  from plato.v2.async_.session import Session
18
19
 
19
- from plato.agents.logging import init_logging as _init_chronos_logging
20
- from plato.agents.logging import log_event as _log_event
21
- from plato.agents.logging import reset_logging as _reset_chronos_logging
22
- from plato.agents.logging import span as _span
23
- from plato.agents.logging import upload_artifact as _upload_artifact
24
- from plato.agents.logging import upload_checkpoint as _upload_checkpoint
20
+ from plato.agents.artifacts import (
21
+ upload_artifact as _upload_artifact_raw,
22
+ )
23
+ from plato.agents.otel import (
24
+ get_tracer,
25
+ init_tracing,
26
+ shutdown_tracing,
27
+ )
25
28
 
26
29
  logger = logging.getLogger(__name__)
27
30
 
@@ -111,6 +114,7 @@ class BaseWorld(ABC, Generic[ConfigT]):
111
114
  self._step_count: int = 0
112
115
  self.plato_session = None
113
116
  self._current_step_id: str | None = None
117
+ self._session_id: str | None = None
114
118
 
115
119
  @classmethod
116
120
  def get_config_class(cls) -> type[RunConfig]:
@@ -390,17 +394,39 @@ class BaseWorld(ABC, Generic[ConfigT]):
390
394
  self.logger.warning(f"Failed to create state bundle: {e.stderr}")
391
395
  return None
392
396
 
393
- async def _create_and_upload_checkpoint(self) -> dict[str, Any] | None:
397
+ async def _upload_artifact(
398
+ self,
399
+ data: bytes,
400
+ content_type: str = "application/octet-stream",
401
+ ) -> bool:
402
+ """Upload an artifact directly to S3.
403
+
404
+ Args:
405
+ data: Raw bytes of the artifact
406
+ content_type: MIME type of the content
407
+
408
+ Returns:
409
+ True if successful, False otherwise
410
+ """
411
+ if not self.config.upload_url:
412
+ self.logger.warning("Cannot upload artifact: upload_url not set")
413
+ return False
414
+ return await _upload_artifact_raw(
415
+ upload_url=self.config.upload_url,
416
+ data=data,
417
+ content_type=content_type,
418
+ )
419
+
420
+ async def _create_and_upload_checkpoint(self) -> bool:
394
421
  """Create a full checkpoint including env snapshots and state bundle.
395
422
 
396
423
  This method:
397
424
  1. Commits any pending state changes
398
425
  2. Creates env snapshots using snapshot_store
399
- 3. Creates and uploads state bundle as an artifact
400
- 4. Calls the checkpoint endpoint with all data
426
+ 3. Creates and uploads state bundle to S3
401
427
 
402
428
  Returns:
403
- Checkpoint result dict if successful, None otherwise.
429
+ True if successful, False otherwise
404
430
  """
405
431
  # Commit state changes first
406
432
  self._commit_state(f"Checkpoint at step {self._step_count}")
@@ -411,35 +437,21 @@ class BaseWorld(ABC, Generic[ConfigT]):
411
437
  env_snapshots = {}
412
438
 
413
439
  # Create and upload state bundle
414
- state_artifact_id: str | None = None
415
440
  if self.config.state.enabled:
416
441
  bundle_data = self._create_state_bundle()
417
442
  if bundle_data:
418
- result = await _upload_artifact(
443
+ success = await self._upload_artifact(
419
444
  data=bundle_data,
420
- artifact_type="state",
421
- filename=f"state_step_{self._step_count}.bundle",
422
- extra={
423
- "step_number": self._step_count,
424
- "state_path": self.config.state.path,
425
- },
445
+ content_type="application/octet-stream",
426
446
  )
427
- if result:
428
- state_artifact_id = result.get("artifact_id")
429
- self.logger.info(f"Uploaded state artifact: {state_artifact_id}")
430
-
431
- # Upload checkpoint with all data
432
- checkpoint_result = await _upload_checkpoint(
433
- step_number=self._step_count,
434
- env_snapshots=env_snapshots,
435
- state_artifact_id=state_artifact_id,
436
- extra={
437
- "world_name": self.name,
438
- "world_version": self.get_version(),
439
- },
440
- )
447
+ if success:
448
+ self.logger.info(f"Uploaded state bundle at step {self._step_count}")
449
+ return True
450
+ else:
451
+ self.logger.warning(f"Failed to upload state bundle at step {self._step_count}")
452
+ return False
441
453
 
442
- return checkpoint_result
454
+ return True
443
455
 
444
456
  def get_env(self, alias: str) -> Environment | None:
445
457
  """Get an environment by alias.
@@ -630,61 +642,82 @@ The following services are available for your use:
630
642
  # Initialize state directory (creates git repo if needed)
631
643
  self._init_state_directory()
632
644
 
633
- # Initialize the logging singleton for agents to use
634
- if config.callback_url and config.session_id:
635
- _init_chronos_logging(
636
- callback_url=config.callback_url,
637
- session_id=config.session_id,
638
- )
645
+ # Initialize OTel tracing and session info for artifact uploads
646
+ if config.session_id:
647
+ self._session_id = config.session_id
648
+
649
+ # Set environment variables for agent runners
650
+ os.environ["SESSION_ID"] = config.session_id
651
+ if config.otel_url:
652
+ os.environ["OTEL_EXPORTER_OTLP_ENDPOINT"] = config.otel_url
653
+ # Use JSON protocol (not protobuf) for the OTLP exporter
654
+ os.environ["OTEL_EXPORTER_OTLP_PROTOCOL"] = "http/json"
655
+ if config.upload_url:
656
+ os.environ["UPLOAD_URL"] = config.upload_url
657
+
658
+ # Initialize OTel tracing if otel_url is provided
659
+ print(f"[World] OTel URL from config: {config.otel_url!r}")
660
+ if config.otel_url:
661
+ init_tracing(
662
+ service_name=f"world-{self.name}",
663
+ session_id=config.session_id,
664
+ otlp_endpoint=config.otel_url,
665
+ )
666
+ else:
667
+ print("[World] No otel_url in config - OTel tracing disabled")
639
668
 
640
669
  # Connect to Plato session if configured (for heartbeats)
641
670
  await self._connect_plato_session()
642
671
 
672
+ # Get tracer for spans
673
+ tracer = get_tracer("plato.world")
674
+
643
675
  # Log session start
644
- await _log_event(
645
- span_type="session_start",
646
- content=f"World '{self.name}' started",
647
- source="world",
648
- extra={"world_name": self.name, "world_version": self.get_version()},
649
- )
676
+ with tracer.start_as_current_span("session_start") as span:
677
+ span.set_attribute("span.type", "session_start")
678
+ span.set_attribute("source", "world")
679
+ span.set_attribute("world_name", self.name)
680
+ span.set_attribute("world_version", self.get_version())
681
+ span.set_attribute("content", f"World '{self.name}' started")
650
682
 
651
683
  try:
652
- # Execute reset with automatic span tracking
653
- async with _span("reset", span_type="reset", source="world") as reset_span:
654
- reset_span.log(f"Resetting world '{self.name}'")
684
+ # Execute reset with OTel span
685
+ with tracer.start_as_current_span("reset") as reset_span:
686
+ reset_span.set_attribute("span.type", "reset")
687
+ reset_span.set_attribute("source", "world")
688
+ reset_span.set_attribute("content", f"Resetting world '{self.name}'")
655
689
  obs = await self.reset()
656
- reset_span.set_extra({"observation": obs.model_dump() if hasattr(obs, "model_dump") else str(obs)})
690
+ obs_data = obs.model_dump() if hasattr(obs, "model_dump") else str(obs)
691
+ reset_span.set_attribute("observation", str(obs_data)[:1000]) # Truncate for OTel
657
692
  self.logger.info(f"World reset complete: {obs}")
658
693
 
659
694
  while True:
660
695
  self._step_count += 1
661
696
 
662
- # Execute step with automatic span tracking
663
- # The span automatically sets itself as the current parent,
664
- # so agent trajectories will nest under this step
665
- async with _span(
666
- f"step_{self._step_count}",
667
- span_type="step",
668
- source="world",
669
- ) as step_span:
670
- self._current_step_id = step_span.event_id
671
- step_span.log(f"Step {self._step_count} started")
697
+ # Execute step with OTel span
698
+ with tracer.start_as_current_span(f"step_{self._step_count}") as step_span:
699
+ step_span.set_attribute("span.type", "step")
700
+ step_span.set_attribute("source", "world")
701
+ step_span.set_attribute("step_number", self._step_count)
702
+ step_span.set_attribute("content", f"Step {self._step_count} started")
703
+
704
+ # Store span context for nested agent spans
705
+
706
+ self._current_step_id = format(step_span.get_span_context().span_id, "016x")
707
+
672
708
  result = await self.step()
673
- step_span.set_extra(
674
- {
675
- "done": result.done,
676
- "observation": result.observation.model_dump()
677
- if hasattr(result.observation, "model_dump")
678
- else str(result.observation),
679
- "info": result.info,
680
- }
709
+
710
+ step_span.set_attribute("done", result.done)
711
+ obs_data = (
712
+ result.observation.model_dump()
713
+ if hasattr(result.observation, "model_dump")
714
+ else str(result.observation)
681
715
  )
716
+ step_span.set_attribute("observation", str(obs_data)[:1000])
682
717
 
683
718
  self.logger.info(f"Step {self._step_count}: done={result.done}")
684
719
 
685
720
  # Create checkpoint if enabled and interval matches
686
- # Note: The checkpoint event is created by the callback endpoint,
687
- # so we don't need a span wrapper here (would create duplicates)
688
721
  if self.config.checkpoint.enabled and self._step_count % self.config.checkpoint.interval == 0:
689
722
  self.logger.info(f"Creating checkpoint after step {self._step_count}")
690
723
  await self._create_and_upload_checkpoint()
@@ -697,14 +730,14 @@ The following services are available for your use:
697
730
  await self._disconnect_plato_session()
698
731
 
699
732
  # Log session end
700
- await _log_event(
701
- span_type="session_end",
702
- content=f"World '{self.name}' completed after {self._step_count} steps",
703
- source="world",
704
- extra={"total_steps": self._step_count},
705
- )
706
-
707
- # Reset the logging singleton
708
- _reset_chronos_logging()
733
+ with tracer.start_as_current_span("session_end") as span:
734
+ span.set_attribute("span.type", "session_end")
735
+ span.set_attribute("source", "world")
736
+ span.set_attribute("total_steps", self._step_count)
737
+ span.set_attribute("content", f"World '{self.name}' completed after {self._step_count} steps")
738
+
739
+ # Shutdown OTel tracing and clear session info
740
+ shutdown_tracing()
741
+ self._session_id = None
709
742
 
710
743
  self.logger.info(f"World '{self.name}' completed after {self._step_count} steps")
plato/worlds/config.py CHANGED
@@ -126,13 +126,15 @@ class RunConfig(BaseModel):
126
126
 
127
127
  Attributes:
128
128
  session_id: Unique Chronos session identifier
129
- callback_url: Callback URL for status updates
129
+ otel_url: OTel endpoint URL (e.g., https://chronos.plato.so/api/otel)
130
+ upload_url: Presigned S3 URL for uploading artifacts (provided by Chronos)
130
131
  plato_session: Serialized Plato session for connecting to existing VM session
131
132
  checkpoint: Configuration for automatic checkpoints after steps
132
133
  """
133
134
 
134
135
  session_id: str = ""
135
- callback_url: str = ""
136
+ otel_url: str = "" # OTel endpoint URL
137
+ upload_url: str = "" # Presigned S3 URL for uploads
136
138
  all_secrets: dict[str, str] = Field(default_factory=dict) # All secrets (world + agent)
137
139
 
138
140
  # Serialized Plato session for connecting to VM and sending heartbeats
@@ -182,7 +184,7 @@ class RunConfig(BaseModel):
182
184
  envs = []
183
185
 
184
186
  # Skip runtime fields
185
- runtime_fields = {"session_id", "callback_url", "all_secrets", "plato_session", "checkpoint", "state"}
187
+ runtime_fields = {"session_id", "otel_url", "upload_url", "all_secrets", "plato_session", "checkpoint", "state"}
186
188
 
187
189
  for field_name, prop_schema in properties.items():
188
190
  if field_name in runtime_fields: