plato-sdk-v2 2.3.0__py3-none-any.whl → 2.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
plato/agents/__init__.py CHANGED
@@ -20,8 +20,9 @@ Trajectory (ATIF):
20
20
  - Trajectory: ATIF trajectory model
21
21
  - Step, Agent, ToolCall, etc.: ATIF components
22
22
 
23
- Callback:
24
- - ChronosCallback: Utility for Chronos communication
23
+ OTel Tracing:
24
+ - instrument: Initialize OTel tracing from environment
25
+ - get_tracer: Get a tracer for creating spans
25
26
 
26
27
  Example (direct execution):
27
28
  from plato.agents import BaseAgent, AgentConfig, Secret, register_agent
@@ -80,14 +81,25 @@ __all__ = [
80
81
  "Metrics",
81
82
  "FinalMetrics",
82
83
  "SCHEMA_VERSION",
83
- # Logging
84
- "init_logging",
85
- "span",
86
- "log_event",
84
+ # Artifacts
85
+ "zip_directory",
87
86
  "upload_artifacts",
88
- "reset_logging",
87
+ "upload_artifact",
88
+ "upload_to_s3",
89
+ # OTel tracing
90
+ "init_tracing",
91
+ "instrument",
92
+ "shutdown_tracing",
93
+ "get_tracer",
94
+ "is_initialized",
89
95
  ]
90
96
 
97
+ from plato.agents.artifacts import (
98
+ upload_artifact,
99
+ upload_artifacts,
100
+ upload_to_s3,
101
+ zip_directory,
102
+ )
91
103
  from plato.agents.base import (
92
104
  BaseAgent,
93
105
  ConfigT,
@@ -97,12 +109,12 @@ from plato.agents.base import (
97
109
  )
98
110
  from plato.agents.build import BuildConfig, load_build_config
99
111
  from plato.agents.config import AgentConfig, Secret
100
- from plato.agents.logging import (
101
- init_logging,
102
- log_event,
103
- reset_logging,
104
- span,
105
- upload_artifacts,
112
+ from plato.agents.otel import (
113
+ get_tracer,
114
+ init_tracing,
115
+ instrument,
116
+ is_initialized,
117
+ shutdown_tracing,
106
118
  )
107
119
  from plato.agents.runner import run_agent
108
120
  from plato.agents.trajectory import (
@@ -0,0 +1,108 @@
1
+ """Artifact upload utilities for Plato agents and worlds.
2
+
3
+ These functions upload artifacts directly to S3 using presigned URLs.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import io
9
+ import logging
10
+ import zipfile
11
+ from pathlib import Path
12
+
13
+ import httpx
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def zip_directory(dir_path: str) -> bytes:
19
+ """Zip an entire directory.
20
+
21
+ Args:
22
+ dir_path: Path to the directory
23
+
24
+ Returns:
25
+ Zip file contents as bytes.
26
+ """
27
+ path = Path(dir_path)
28
+ buffer = io.BytesIO()
29
+
30
+ with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf:
31
+ for file_path in path.rglob("*"):
32
+ if file_path.is_file():
33
+ arcname = file_path.relative_to(path)
34
+ zf.write(file_path, arcname)
35
+
36
+ buffer.seek(0)
37
+ return buffer.read()
38
+
39
+
40
+ async def upload_to_s3(upload_url: str, data: bytes, content_type: str = "application/octet-stream") -> bool:
41
+ """Upload data directly to S3 using a presigned URL.
42
+
43
+ Args:
44
+ upload_url: Presigned S3 PUT URL
45
+ data: Raw bytes to upload
46
+ content_type: MIME type of the content
47
+
48
+ Returns:
49
+ True if successful, False otherwise
50
+ """
51
+ if not upload_url:
52
+ logger.warning("No upload URL provided")
53
+ return False
54
+
55
+ try:
56
+ async with httpx.AsyncClient(timeout=120.0) as client:
57
+ response = await client.put(
58
+ upload_url,
59
+ content=data,
60
+ headers={"Content-Type": content_type},
61
+ )
62
+ if response.status_code in (200, 201, 204):
63
+ logger.info(f"Uploaded {len(data)} bytes to S3")
64
+ return True
65
+ else:
66
+ logger.warning(f"S3 upload failed: {response.status_code} {response.text}")
67
+ return False
68
+ except Exception as e:
69
+ logger.warning(f"Failed to upload to S3: {e}")
70
+ return False
71
+
72
+
73
+ async def upload_artifacts(upload_url: str, dir_path: str) -> bool:
74
+ """Upload a directory as a zip directly to S3.
75
+
76
+ Args:
77
+ upload_url: Presigned S3 PUT URL
78
+ dir_path: Path to the directory to upload
79
+
80
+ Returns:
81
+ True if successful, False otherwise
82
+ """
83
+ try:
84
+ zip_data = zip_directory(dir_path)
85
+ logger.info(f"Zipped directory: {len(zip_data)} bytes")
86
+ except Exception as e:
87
+ logger.warning(f"Failed to zip directory: {e}")
88
+ return False
89
+
90
+ return await upload_to_s3(upload_url, zip_data, "application/zip")
91
+
92
+
93
+ async def upload_artifact(
94
+ upload_url: str,
95
+ data: bytes,
96
+ content_type: str = "application/octet-stream",
97
+ ) -> bool:
98
+ """Upload an artifact directly to S3.
99
+
100
+ Args:
101
+ upload_url: Presigned S3 PUT URL
102
+ data: Raw bytes of the artifact
103
+ content_type: MIME type of the content
104
+
105
+ Returns:
106
+ True if successful, False otherwise
107
+ """
108
+ return await upload_to_s3(upload_url, data, content_type)
plato/agents/config.py CHANGED
@@ -18,10 +18,8 @@ Example:
18
18
  from __future__ import annotations
19
19
 
20
20
  import json
21
- from pathlib import Path
22
21
  from typing import Any
23
22
 
24
- from pydantic import Field
25
23
  from pydantic_settings import BaseSettings, SettingsConfigDict
26
24
 
27
25
 
@@ -57,8 +55,6 @@ class AgentConfig(BaseSettings):
57
55
 
58
56
  Attributes:
59
57
  logs_dir: Directory for agent logs and trajectory output.
60
- checkpoint_paths: Directories to watch for checkpoint triggers (for workspace tracking).
61
- checkpoint_debounce_ms: Debounce interval for checkpoints.
62
58
  """
63
59
 
64
60
  model_config = SettingsConfigDict(
@@ -68,8 +64,6 @@ class AgentConfig(BaseSettings):
68
64
  )
69
65
 
70
66
  logs_dir: str = "/logs"
71
- checkpoint_paths: list[str] = Field(default_factory=list)
72
- checkpoint_debounce_ms: int = 500
73
67
 
74
68
  @classmethod
75
69
  def get_field_secrets(cls) -> dict[str, Secret]:
@@ -97,7 +91,7 @@ class AgentConfig(BaseSettings):
97
91
  secrets = []
98
92
 
99
93
  # Skip internal fields
100
- internal_fields = {"logs_dir", "checkpoint_paths", "checkpoint_debounce_ms"}
94
+ internal_fields = {"logs_dir"}
101
95
 
102
96
  for field_name, prop_schema in properties.items():
103
97
  if field_name in internal_fields:
@@ -140,7 +134,7 @@ class AgentConfig(BaseSettings):
140
134
  def get_config_dict(self) -> dict[str, Any]:
141
135
  """Extract non-secret config values as a dict."""
142
136
  secrets_map = self.get_field_secrets()
143
- internal_fields = {"logs_dir", "checkpoint_paths", "checkpoint_debounce_ms"}
137
+ internal_fields = {"logs_dir"}
144
138
 
145
139
  result: dict[str, Any] = {}
146
140
  for field_name in self.model_fields:
@@ -152,9 +146,18 @@ class AgentConfig(BaseSettings):
152
146
  return result
153
147
 
154
148
  @classmethod
155
- def from_file(cls, path: str | Path) -> AgentConfig:
156
- """Load config from a JSON file."""
157
- path = Path(path)
158
- with open(path) as f:
159
- data = json.load(f)
149
+ def from_env(cls) -> AgentConfig:
150
+ """Load config from AGENT_CONFIG_B64 environment variable.
151
+
152
+ The runner passes config as base64-encoded JSON in the
153
+ AGENT_CONFIG_B64 environment variable.
154
+ """
155
+ import base64
156
+ import os
157
+
158
+ config_b64 = os.environ.get("AGENT_CONFIG_B64")
159
+ if not config_b64:
160
+ raise ValueError("AGENT_CONFIG_B64 environment variable not set")
161
+ config_json = base64.b64decode(config_b64).decode()
162
+ data = json.loads(config_json)
160
163
  return cls(**data)
plato/agents/otel.py ADDED
@@ -0,0 +1,261 @@
1
+ """OpenTelemetry integration for Plato agents and worlds.
2
+
3
+ Provides tracing utilities using OpenTelemetry SDK. Traces are sent directly
4
+ to the Chronos OTLP endpoint.
5
+
6
+ Usage:
7
+ from plato.agents.otel import init_tracing, get_tracer, shutdown_tracing
8
+
9
+ # Initialize tracing (sends to Chronos OTLP endpoint)
10
+ init_tracing(
11
+ service_name="my-world",
12
+ session_id="session-123",
13
+ otlp_endpoint="http://chronos/api/otel",
14
+ )
15
+
16
+ # Create spans
17
+ tracer = get_tracer()
18
+ with tracer.start_as_current_span("my-operation") as span:
19
+ span.set_attribute("key", "value")
20
+ # ... do work ...
21
+
22
+ # Cleanup
23
+ shutdown_tracing()
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import logging
29
+
30
+ from opentelemetry import trace
31
+ from opentelemetry.trace import Tracer
32
+
33
+ _module_logger = logging.getLogger(__name__)
34
+
35
+ # Global state
36
+ _tracer_provider = None
37
+ _initialized = False
38
+ _log_handler = None
39
+
40
+
41
+ class OTelSpanLogHandler(logging.Handler):
42
+ """Logging handler that creates OTel spans for log messages.
43
+
44
+ Converts Python log records to OTel spans with log attributes.
45
+ """
46
+
47
+ def __init__(self, tracer: Tracer, level: int = logging.INFO):
48
+ super().__init__(level)
49
+ self.tracer = tracer
50
+
51
+ def emit(self, record: logging.LogRecord) -> None:
52
+ """Emit a log record as an OTel span."""
53
+ try:
54
+ # Debug: print that we're emitting a log span
55
+ print(f"[OTel] Emitting log span: {record.name} - {record.getMessage()[:100]}")
56
+ # Create a span for the log message
57
+ with self.tracer.start_as_current_span(f"log.{record.levelname.lower()}") as span:
58
+ span.set_attribute("log.level", record.levelname)
59
+ span.set_attribute("log.message", record.getMessage())
60
+ span.set_attribute("log.logger", record.name)
61
+ span.set_attribute("source", "world")
62
+ span.set_attribute("content", record.getMessage()[:1000])
63
+
64
+ if record.funcName:
65
+ span.set_attribute("log.function", record.funcName)
66
+ if record.lineno:
67
+ span.set_attribute("log.lineno", record.lineno)
68
+
69
+ # Mark errors
70
+ if record.levelno >= logging.ERROR:
71
+ span.set_attribute("error", True)
72
+
73
+ except Exception:
74
+ # Don't let logging errors crash the application
75
+ pass
76
+
77
+
78
+ def init_tracing(
79
+ service_name: str,
80
+ session_id: str,
81
+ otlp_endpoint: str,
82
+ parent_trace_id: str | None = None,
83
+ parent_span_id: str | None = None,
84
+ ) -> None:
85
+ """Initialize OpenTelemetry tracing.
86
+
87
+ Args:
88
+ service_name: Name of the service (e.g., world name or agent name)
89
+ session_id: Chronos session ID (added as resource attribute)
90
+ otlp_endpoint: Chronos OTLP endpoint (e.g., http://chronos/api/otel)
91
+ parent_trace_id: Optional parent trace ID for linking (hex string)
92
+ parent_span_id: Optional parent span ID for linking (hex string)
93
+ """
94
+ global _tracer_provider, _initialized, _log_handler
95
+
96
+ if _initialized:
97
+ _module_logger.debug("Tracing already initialized")
98
+ return
99
+
100
+ try:
101
+ from opentelemetry import context as context_api
102
+ from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
103
+ OTLPSpanExporter,
104
+ )
105
+ from opentelemetry.sdk.resources import Resource
106
+ from opentelemetry.sdk.trace import TracerProvider
107
+ from opentelemetry.sdk.trace.export import SimpleSpanProcessor
108
+ from opentelemetry.trace import NonRecordingSpan, SpanContext, TraceFlags
109
+
110
+ # Create resource with session ID
111
+ resource = Resource.create(
112
+ {
113
+ "service.name": service_name,
114
+ "plato.session.id": session_id,
115
+ }
116
+ )
117
+
118
+ # Create tracer provider
119
+ _tracer_provider = TracerProvider(resource=resource)
120
+
121
+ # Add OTLP exporter pointing to Chronos (use SimpleSpanProcessor for immediate export)
122
+ otlp_exporter = OTLPSpanExporter(endpoint=f"{otlp_endpoint.rstrip('/')}/v1/traces")
123
+ _tracer_provider.add_span_processor(SimpleSpanProcessor(otlp_exporter))
124
+
125
+ # Set as global tracer provider
126
+ trace.set_tracer_provider(_tracer_provider)
127
+
128
+ # If parent context is provided, set it as the current context
129
+ # This allows new spans to automatically link to the parent
130
+ if parent_trace_id and parent_span_id:
131
+ parent_context = SpanContext(
132
+ trace_id=int(parent_trace_id, 16),
133
+ span_id=int(parent_span_id, 16),
134
+ is_remote=True,
135
+ trace_flags=TraceFlags(0x01), # Sampled
136
+ )
137
+ parent_span = NonRecordingSpan(parent_context)
138
+ ctx = trace.set_span_in_context(parent_span)
139
+ context_api.attach(ctx)
140
+ print(f"[OTel] Using parent context: trace_id={parent_trace_id}, span_id={parent_span_id}")
141
+
142
+ # Add OTel logging handler to capture logs from plato SDK
143
+ tracer = trace.get_tracer(service_name)
144
+ _log_handler = OTelSpanLogHandler(tracer, level=logging.INFO)
145
+
146
+ # Add handler to plato loggers (worlds and agents)
147
+ # Set level to INFO to ensure logs propagate from child loggers
148
+ plato_logger = logging.getLogger("plato")
149
+ plato_logger.setLevel(logging.INFO)
150
+ plato_logger.addHandler(_log_handler)
151
+ print(
152
+ f"[OTel] Added log handler to 'plato' logger (level={plato_logger.level}, handlers={len(plato_logger.handlers)})"
153
+ )
154
+
155
+ _initialized = True
156
+
157
+ print(f"[OTel] Tracing initialized: service={service_name}, session={session_id}, endpoint={otlp_endpoint}")
158
+
159
+ except ImportError as e:
160
+ print(f"[OTel] OpenTelemetry SDK not installed: {e}")
161
+ _module_logger.warning(f"OpenTelemetry SDK not installed: {e}")
162
+ except Exception as e:
163
+ print(f"[OTel] Failed to initialize tracing: {e}")
164
+ _module_logger.error(f"Failed to initialize tracing: {e}")
165
+
166
+
167
+ def shutdown_tracing(timeout_millis: int = 30000) -> None:
168
+ """Shutdown the tracer provider and flush spans.
169
+
170
+ Args:
171
+ timeout_millis: Timeout in milliseconds to wait for flush (default 30s)
172
+ """
173
+ global _tracer_provider, _initialized, _log_handler
174
+
175
+ # Remove log handler
176
+ if _log_handler:
177
+ try:
178
+ plato_logger = logging.getLogger("plato")
179
+ plato_logger.removeHandler(_log_handler)
180
+ except Exception:
181
+ pass
182
+ _log_handler = None
183
+
184
+ if _tracer_provider:
185
+ try:
186
+ # Force flush all pending spans before shutdown
187
+ print(f"[OTel] Flushing spans (timeout={timeout_millis}ms)...")
188
+ flush_success = _tracer_provider.force_flush(timeout_millis=timeout_millis)
189
+ if flush_success:
190
+ print("[OTel] Span flush completed successfully")
191
+ else:
192
+ print("[OTel] Span flush timed out or failed")
193
+
194
+ _tracer_provider.shutdown()
195
+ print("[OTel] Tracing shutdown complete")
196
+ except Exception as e:
197
+ print(f"[OTel] Error shutting down tracer: {e}")
198
+ _module_logger.warning(f"Error shutting down tracer: {e}")
199
+
200
+ _tracer_provider = None
201
+ _initialized = False
202
+
203
+
204
+ def get_tracer(name: str = "plato") -> Tracer:
205
+ """Get a tracer instance.
206
+
207
+ Args:
208
+ name: Tracer name (default: "plato")
209
+
210
+ Returns:
211
+ OpenTelemetry Tracer
212
+ """
213
+ return trace.get_tracer(name)
214
+
215
+
216
+ def is_initialized() -> bool:
217
+ """Check if OTel tracing is initialized."""
218
+ return _initialized
219
+
220
+
221
+ def instrument(service_name: str = "plato-agent") -> Tracer:
222
+ """Initialize OTel tracing from environment variables.
223
+
224
+ Reads the following env vars:
225
+ - OTEL_EXPORTER_OTLP_ENDPOINT: Chronos OTLP endpoint (required for tracing)
226
+ - SESSION_ID: Chronos session ID (default: "local")
227
+ - OTEL_TRACE_ID: Parent trace ID for linking spans (optional)
228
+ - OTEL_PARENT_SPAN_ID: Parent span ID for linking spans (optional)
229
+
230
+ If OTEL_EXPORTER_OTLP_ENDPOINT is not set, returns a no-op tracer.
231
+
232
+ Args:
233
+ service_name: Name of the service for traces
234
+
235
+ Returns:
236
+ OpenTelemetry Tracer
237
+ """
238
+ import os
239
+
240
+ otel_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
241
+ session_id = os.environ.get("SESSION_ID", "local")
242
+ parent_trace_id = os.environ.get("OTEL_TRACE_ID")
243
+ parent_span_id = os.environ.get("OTEL_PARENT_SPAN_ID")
244
+
245
+ print(f"[OTel] instrument() called: service={service_name}, endpoint={otel_endpoint}, session={session_id}")
246
+
247
+ if not otel_endpoint:
248
+ # Return default tracer (no-op if no provider configured)
249
+ print("[OTel] No OTEL_EXPORTER_OTLP_ENDPOINT set, returning no-op tracer")
250
+ return trace.get_tracer(service_name)
251
+
252
+ # Initialize tracing with parent context if provided
253
+ init_tracing(
254
+ service_name=service_name,
255
+ session_id=session_id,
256
+ otlp_endpoint=otel_endpoint,
257
+ parent_trace_id=parent_trace_id,
258
+ parent_span_id=parent_span_id,
259
+ )
260
+
261
+ return trace.get_tracer(service_name)