plato-sdk-v2 2.3.6__py3-none-any.whl → 2.3.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plato/agents/otel.py +77 -87
- plato/agents/runner.py +125 -288
- plato/v1/cli/sandbox.py +5 -2
- plato/v1/cli/ssh.py +21 -14
- plato/v1/cli/utils.py +32 -12
- plato/worlds/base.py +103 -92
- plato/worlds/runner.py +33 -15
- {plato_sdk_v2-2.3.6.dist-info → plato_sdk_v2-2.3.8.dist-info}/METADATA +1 -2
- {plato_sdk_v2-2.3.6.dist-info → plato_sdk_v2-2.3.8.dist-info}/RECORD +11 -11
- {plato_sdk_v2-2.3.6.dist-info → plato_sdk_v2-2.3.8.dist-info}/WHEEL +0 -0
- {plato_sdk_v2-2.3.6.dist-info → plato_sdk_v2-2.3.8.dist-info}/entry_points.txt +0 -0
plato/agents/otel.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""OpenTelemetry integration for Plato agents and worlds.
|
|
2
2
|
|
|
3
|
-
Provides tracing
|
|
4
|
-
|
|
3
|
+
Provides tracing utilities using OpenTelemetry SDK. Traces are sent directly
|
|
4
|
+
to the Chronos OTLP endpoint.
|
|
5
5
|
|
|
6
6
|
Usage:
|
|
7
7
|
from plato.agents.otel import init_tracing, get_tracer, shutdown_tracing
|
|
@@ -19,11 +19,6 @@ Usage:
|
|
|
19
19
|
span.set_attribute("key", "value")
|
|
20
20
|
# ... do work ...
|
|
21
21
|
|
|
22
|
-
# All Python logging is automatically sent to Chronos
|
|
23
|
-
import logging
|
|
24
|
-
logger = logging.getLogger(__name__)
|
|
25
|
-
logger.info("This will appear in the trajectory viewer!")
|
|
26
|
-
|
|
27
22
|
# Cleanup
|
|
28
23
|
shutdown_tracing()
|
|
29
24
|
"""
|
|
@@ -39,74 +34,44 @@ _module_logger = logging.getLogger(__name__)
|
|
|
39
34
|
|
|
40
35
|
# Global state
|
|
41
36
|
_tracer_provider = None
|
|
42
|
-
_logging_handler = None
|
|
43
37
|
_initialized = False
|
|
38
|
+
_log_handler = None
|
|
44
39
|
|
|
45
40
|
|
|
46
|
-
class
|
|
47
|
-
"""Logging handler that
|
|
41
|
+
class OTelSpanLogHandler(logging.Handler):
|
|
42
|
+
"""Logging handler that creates OTel spans for log messages.
|
|
48
43
|
|
|
49
|
-
|
|
50
|
-
- span.type: "log"
|
|
51
|
-
- log.level: DEBUG/INFO/WARNING/ERROR/CRITICAL
|
|
52
|
-
- content: the log message
|
|
53
|
-
- source: the logger name
|
|
44
|
+
Converts Python log records to OTel spans with log attributes.
|
|
54
45
|
"""
|
|
55
46
|
|
|
56
|
-
def __init__(self,
|
|
57
|
-
super().__init__()
|
|
58
|
-
self.
|
|
59
|
-
# Filter out noisy loggers
|
|
60
|
-
self._ignored_loggers = {
|
|
61
|
-
"httpx",
|
|
62
|
-
"httpcore",
|
|
63
|
-
"urllib3",
|
|
64
|
-
"asyncio",
|
|
65
|
-
"opentelemetry",
|
|
66
|
-
"plato.agents.otel", # Avoid recursion
|
|
67
|
-
}
|
|
47
|
+
def __init__(self, tracer: Tracer, level: int = logging.INFO):
|
|
48
|
+
super().__init__(level)
|
|
49
|
+
self.tracer = tracer
|
|
68
50
|
|
|
69
51
|
def emit(self, record: logging.LogRecord) -> None:
|
|
70
52
|
"""Emit a log record as an OTel span."""
|
|
71
|
-
# Skip ignored loggers
|
|
72
|
-
logger_name = record.name
|
|
73
|
-
for ignored in self._ignored_loggers:
|
|
74
|
-
if logger_name.startswith(ignored):
|
|
75
|
-
return
|
|
76
|
-
|
|
77
53
|
try:
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
# Format the message
|
|
81
|
-
try:
|
|
82
|
-
msg = self.format(record)
|
|
83
|
-
except Exception:
|
|
84
|
-
msg = record.getMessage()
|
|
85
|
-
|
|
54
|
+
# Debug: print that we're emitting a log span
|
|
55
|
+
print(f"[OTel] Emitting log span: {record.name} - {record.getMessage()[:100]}")
|
|
86
56
|
# Create a span for the log message
|
|
87
|
-
with tracer.start_as_current_span(
|
|
88
|
-
f"log.{record.levelname.lower()}",
|
|
89
|
-
end_on_exit=True,
|
|
90
|
-
) as span:
|
|
91
|
-
span.set_attribute("span.type", "log")
|
|
57
|
+
with self.tracer.start_as_current_span(f"log.{record.levelname.lower()}") as span:
|
|
92
58
|
span.set_attribute("log.level", record.levelname)
|
|
93
|
-
span.set_attribute("
|
|
94
|
-
span.set_attribute("
|
|
59
|
+
span.set_attribute("log.message", record.getMessage())
|
|
60
|
+
span.set_attribute("log.logger", record.name)
|
|
61
|
+
span.set_attribute("source", "world")
|
|
62
|
+
span.set_attribute("content", record.getMessage()[:1000])
|
|
95
63
|
|
|
96
|
-
# Add extra context if available
|
|
97
64
|
if record.funcName:
|
|
98
65
|
span.set_attribute("log.function", record.funcName)
|
|
99
|
-
if record.pathname:
|
|
100
|
-
span.set_attribute("log.file", record.pathname)
|
|
101
66
|
if record.lineno:
|
|
102
|
-
span.set_attribute("log.
|
|
67
|
+
span.set_attribute("log.lineno", record.lineno)
|
|
103
68
|
|
|
104
|
-
#
|
|
105
|
-
if record.
|
|
106
|
-
span.
|
|
69
|
+
# Mark errors
|
|
70
|
+
if record.levelno >= logging.ERROR:
|
|
71
|
+
span.set_attribute("error", True)
|
|
107
72
|
|
|
108
73
|
except Exception:
|
|
109
|
-
# Don't let logging
|
|
74
|
+
# Don't let logging errors crash the application
|
|
110
75
|
pass
|
|
111
76
|
|
|
112
77
|
|
|
@@ -114,66 +79,82 @@ def init_tracing(
|
|
|
114
79
|
service_name: str,
|
|
115
80
|
session_id: str,
|
|
116
81
|
otlp_endpoint: str,
|
|
117
|
-
|
|
118
|
-
|
|
82
|
+
parent_trace_id: str | None = None,
|
|
83
|
+
parent_span_id: str | None = None,
|
|
119
84
|
) -> None:
|
|
120
|
-
"""Initialize OpenTelemetry tracing
|
|
85
|
+
"""Initialize OpenTelemetry tracing.
|
|
121
86
|
|
|
122
87
|
Args:
|
|
123
88
|
service_name: Name of the service (e.g., world name or agent name)
|
|
124
89
|
session_id: Chronos session ID (added as resource attribute)
|
|
125
90
|
otlp_endpoint: Chronos OTLP endpoint (e.g., http://chronos/api/otel)
|
|
126
|
-
|
|
127
|
-
|
|
91
|
+
parent_trace_id: Optional parent trace ID for linking (hex string)
|
|
92
|
+
parent_span_id: Optional parent span ID for linking (hex string)
|
|
128
93
|
"""
|
|
129
|
-
global _tracer_provider,
|
|
94
|
+
global _tracer_provider, _initialized, _log_handler
|
|
130
95
|
|
|
131
96
|
if _initialized:
|
|
132
97
|
_module_logger.debug("Tracing already initialized")
|
|
133
98
|
return
|
|
134
99
|
|
|
135
100
|
try:
|
|
101
|
+
from opentelemetry import context as context_api
|
|
136
102
|
from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
|
|
137
103
|
OTLPSpanExporter,
|
|
138
104
|
)
|
|
139
105
|
from opentelemetry.sdk.resources import Resource
|
|
140
106
|
from opentelemetry.sdk.trace import TracerProvider
|
|
141
|
-
from opentelemetry.sdk.trace.export import
|
|
107
|
+
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
|
108
|
+
from opentelemetry.trace import NonRecordingSpan, SpanContext, TraceFlags
|
|
142
109
|
|
|
143
110
|
# Create resource with session ID
|
|
144
111
|
resource = Resource.create(
|
|
145
112
|
{
|
|
146
113
|
"service.name": service_name,
|
|
147
|
-
"session.id": session_id,
|
|
114
|
+
"plato.session.id": session_id,
|
|
148
115
|
}
|
|
149
116
|
)
|
|
150
117
|
|
|
151
118
|
# Create tracer provider
|
|
152
119
|
_tracer_provider = TracerProvider(resource=resource)
|
|
153
120
|
|
|
154
|
-
# Add OTLP exporter pointing to Chronos
|
|
121
|
+
# Add OTLP exporter pointing to Chronos (use SimpleSpanProcessor for immediate export)
|
|
155
122
|
otlp_exporter = OTLPSpanExporter(endpoint=f"{otlp_endpoint.rstrip('/')}/v1/traces")
|
|
156
|
-
_tracer_provider.add_span_processor(
|
|
123
|
+
_tracer_provider.add_span_processor(SimpleSpanProcessor(otlp_exporter))
|
|
157
124
|
|
|
158
125
|
# Set as global tracer provider
|
|
159
126
|
trace.set_tracer_provider(_tracer_provider)
|
|
160
127
|
|
|
161
|
-
|
|
128
|
+
# If parent context is provided, set it as the current context
|
|
129
|
+
# This allows new spans to automatically link to the parent
|
|
130
|
+
if parent_trace_id and parent_span_id:
|
|
131
|
+
parent_context = SpanContext(
|
|
132
|
+
trace_id=int(parent_trace_id, 16),
|
|
133
|
+
span_id=int(parent_span_id, 16),
|
|
134
|
+
is_remote=True,
|
|
135
|
+
trace_flags=TraceFlags(0x01), # Sampled
|
|
136
|
+
)
|
|
137
|
+
parent_span = NonRecordingSpan(parent_context)
|
|
138
|
+
ctx = trace.set_span_in_context(parent_span)
|
|
139
|
+
context_api.attach(ctx)
|
|
140
|
+
print(f"[OTel] Using parent context: trace_id={parent_trace_id}, span_id={parent_span_id}")
|
|
141
|
+
|
|
142
|
+
# Add OTel logging handler to capture logs from plato SDK
|
|
143
|
+
tracer = trace.get_tracer(service_name)
|
|
144
|
+
_log_handler = OTelSpanLogHandler(tracer, level=logging.INFO)
|
|
145
|
+
|
|
146
|
+
# Add handler to plato loggers (worlds and agents)
|
|
147
|
+
# Set level to INFO to ensure logs propagate from child loggers
|
|
148
|
+
plato_logger = logging.getLogger("plato")
|
|
149
|
+
plato_logger.setLevel(logging.INFO)
|
|
150
|
+
plato_logger.addHandler(_log_handler)
|
|
151
|
+
print(
|
|
152
|
+
f"[OTel] Added log handler to 'plato' logger (level={plato_logger.level}, handlers={len(plato_logger.handlers)})"
|
|
153
|
+
)
|
|
162
154
|
|
|
163
|
-
|
|
164
|
-
if capture_logging:
|
|
165
|
-
_logging_handler = OTelLoggingHandler()
|
|
166
|
-
_logging_handler.setLevel(log_level)
|
|
167
|
-
# Add to root logger to capture all logs
|
|
168
|
-
logging.getLogger().addHandler(_logging_handler)
|
|
155
|
+
_initialized = True
|
|
169
156
|
|
|
170
|
-
# Use print to ensure this shows regardless of logging config
|
|
171
157
|
print(f"[OTel] Tracing initialized: service={service_name}, session={session_id}, endpoint={otlp_endpoint}")
|
|
172
|
-
_module_logger.info(
|
|
173
|
-
f"OTel tracing initialized: service={service_name}, "
|
|
174
|
-
f"session={session_id}, endpoint={otlp_endpoint}, "
|
|
175
|
-
f"capture_logging={capture_logging}"
|
|
176
|
-
)
|
|
177
158
|
|
|
178
159
|
except ImportError as e:
|
|
179
160
|
print(f"[OTel] OpenTelemetry SDK not installed: {e}")
|
|
@@ -184,16 +165,17 @@ def init_tracing(
|
|
|
184
165
|
|
|
185
166
|
|
|
186
167
|
def shutdown_tracing() -> None:
|
|
187
|
-
"""Shutdown the tracer provider
|
|
188
|
-
global _tracer_provider,
|
|
168
|
+
"""Shutdown the tracer provider and flush spans."""
|
|
169
|
+
global _tracer_provider, _initialized, _log_handler
|
|
189
170
|
|
|
190
|
-
# Remove
|
|
191
|
-
if
|
|
171
|
+
# Remove log handler
|
|
172
|
+
if _log_handler:
|
|
192
173
|
try:
|
|
193
|
-
logging.getLogger()
|
|
174
|
+
plato_logger = logging.getLogger("plato")
|
|
175
|
+
plato_logger.removeHandler(_log_handler)
|
|
194
176
|
except Exception:
|
|
195
177
|
pass
|
|
196
|
-
|
|
178
|
+
_log_handler = None
|
|
197
179
|
|
|
198
180
|
if _tracer_provider:
|
|
199
181
|
try:
|
|
@@ -229,6 +211,8 @@ def instrument(service_name: str = "plato-agent") -> Tracer:
|
|
|
229
211
|
Reads the following env vars:
|
|
230
212
|
- OTEL_EXPORTER_OTLP_ENDPOINT: Chronos OTLP endpoint (required for tracing)
|
|
231
213
|
- SESSION_ID: Chronos session ID (default: "local")
|
|
214
|
+
- OTEL_TRACE_ID: Parent trace ID for linking spans (optional)
|
|
215
|
+
- OTEL_PARENT_SPAN_ID: Parent span ID for linking spans (optional)
|
|
232
216
|
|
|
233
217
|
If OTEL_EXPORTER_OTLP_ENDPOINT is not set, returns a no-op tracer.
|
|
234
218
|
|
|
@@ -242,17 +226,23 @@ def instrument(service_name: str = "plato-agent") -> Tracer:
|
|
|
242
226
|
|
|
243
227
|
otel_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
|
|
244
228
|
session_id = os.environ.get("SESSION_ID", "local")
|
|
229
|
+
parent_trace_id = os.environ.get("OTEL_TRACE_ID")
|
|
230
|
+
parent_span_id = os.environ.get("OTEL_PARENT_SPAN_ID")
|
|
231
|
+
|
|
232
|
+
print(f"[OTel] instrument() called: service={service_name}, endpoint={otel_endpoint}, session={session_id}")
|
|
245
233
|
|
|
246
234
|
if not otel_endpoint:
|
|
247
235
|
# Return default tracer (no-op if no provider configured)
|
|
236
|
+
print("[OTel] No OTEL_EXPORTER_OTLP_ENDPOINT set, returning no-op tracer")
|
|
248
237
|
return trace.get_tracer(service_name)
|
|
249
238
|
|
|
250
|
-
# Initialize tracing
|
|
239
|
+
# Initialize tracing with parent context if provided
|
|
251
240
|
init_tracing(
|
|
252
241
|
service_name=service_name,
|
|
253
242
|
session_id=session_id,
|
|
254
243
|
otlp_endpoint=otel_endpoint,
|
|
255
|
-
|
|
244
|
+
parent_trace_id=parent_trace_id,
|
|
245
|
+
parent_span_id=parent_span_id,
|
|
256
246
|
)
|
|
257
247
|
|
|
258
248
|
return trace.get_tracer(service_name)
|