plato-sdk-v2 2.3.0__py3-none-any.whl → 2.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- plato/agents/__init__.py +25 -13
- plato/agents/artifacts.py +108 -0
- plato/agents/config.py +16 -13
- plato/agents/otel.py +261 -0
- plato/agents/runner.py +226 -122
- plato/chronos/models/__init__.py +9 -1
- plato/v1/cli/chronos.py +788 -0
- plato/v1/cli/main.py +2 -2
- plato/v1/cli/pm.py +3 -3
- plato/v1/cli/sandbox.py +246 -52
- plato/v1/cli/ssh.py +28 -9
- plato/v1/cli/templates/world-runner.Dockerfile +27 -0
- plato/v1/cli/utils.py +32 -12
- plato/v1/cli/verify.py +243 -827
- plato/worlds/README.md +2 -1
- plato/worlds/__init__.py +3 -1
- plato/worlds/base.py +462 -67
- plato/worlds/config.py +42 -3
- plato/worlds/runner.py +1 -339
- {plato_sdk_v2-2.3.0.dist-info → plato_sdk_v2-2.4.1.dist-info}/METADATA +4 -1
- {plato_sdk_v2-2.3.0.dist-info → plato_sdk_v2-2.4.1.dist-info}/RECORD +23 -27
- plato/agents/logging.py +0 -401
- plato/chronos/api/callback/__init__.py +0 -11
- plato/chronos/api/callback/push_agent_logs.py +0 -61
- plato/chronos/api/callback/update_agent_status.py +0 -57
- plato/chronos/api/callback/upload_artifacts.py +0 -59
- plato/chronos/api/callback/upload_logs_zip.py +0 -57
- plato/chronos/api/callback/upload_trajectory.py +0 -57
- plato/v1/cli/sim.py +0 -11
- {plato_sdk_v2-2.3.0.dist-info → plato_sdk_v2-2.4.1.dist-info}/WHEEL +0 -0
- {plato_sdk_v2-2.3.0.dist-info → plato_sdk_v2-2.4.1.dist-info}/entry_points.txt +0 -0
plato/agents/__init__.py
CHANGED
|
@@ -20,8 +20,9 @@ Trajectory (ATIF):
|
|
|
20
20
|
- Trajectory: ATIF trajectory model
|
|
21
21
|
- Step, Agent, ToolCall, etc.: ATIF components
|
|
22
22
|
|
|
23
|
-
|
|
24
|
-
-
|
|
23
|
+
OTel Tracing:
|
|
24
|
+
- instrument: Initialize OTel tracing from environment
|
|
25
|
+
- get_tracer: Get a tracer for creating spans
|
|
25
26
|
|
|
26
27
|
Example (direct execution):
|
|
27
28
|
from plato.agents import BaseAgent, AgentConfig, Secret, register_agent
|
|
@@ -80,14 +81,25 @@ __all__ = [
|
|
|
80
81
|
"Metrics",
|
|
81
82
|
"FinalMetrics",
|
|
82
83
|
"SCHEMA_VERSION",
|
|
83
|
-
#
|
|
84
|
-
"
|
|
85
|
-
"span",
|
|
86
|
-
"log_event",
|
|
84
|
+
# Artifacts
|
|
85
|
+
"zip_directory",
|
|
87
86
|
"upload_artifacts",
|
|
88
|
-
"
|
|
87
|
+
"upload_artifact",
|
|
88
|
+
"upload_to_s3",
|
|
89
|
+
# OTel tracing
|
|
90
|
+
"init_tracing",
|
|
91
|
+
"instrument",
|
|
92
|
+
"shutdown_tracing",
|
|
93
|
+
"get_tracer",
|
|
94
|
+
"is_initialized",
|
|
89
95
|
]
|
|
90
96
|
|
|
97
|
+
from plato.agents.artifacts import (
|
|
98
|
+
upload_artifact,
|
|
99
|
+
upload_artifacts,
|
|
100
|
+
upload_to_s3,
|
|
101
|
+
zip_directory,
|
|
102
|
+
)
|
|
91
103
|
from plato.agents.base import (
|
|
92
104
|
BaseAgent,
|
|
93
105
|
ConfigT,
|
|
@@ -97,12 +109,12 @@ from plato.agents.base import (
|
|
|
97
109
|
)
|
|
98
110
|
from plato.agents.build import BuildConfig, load_build_config
|
|
99
111
|
from plato.agents.config import AgentConfig, Secret
|
|
100
|
-
from plato.agents.
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
112
|
+
from plato.agents.otel import (
|
|
113
|
+
get_tracer,
|
|
114
|
+
init_tracing,
|
|
115
|
+
instrument,
|
|
116
|
+
is_initialized,
|
|
117
|
+
shutdown_tracing,
|
|
106
118
|
)
|
|
107
119
|
from plato.agents.runner import run_agent
|
|
108
120
|
from plato.agents.trajectory import (
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Artifact upload utilities for Plato agents and worlds.
|
|
2
|
+
|
|
3
|
+
These functions upload artifacts directly to S3 using presigned URLs.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import io
|
|
9
|
+
import logging
|
|
10
|
+
import zipfile
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
import httpx
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def zip_directory(dir_path: str) -> bytes:
|
|
19
|
+
"""Zip an entire directory.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
dir_path: Path to the directory
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
Zip file contents as bytes.
|
|
26
|
+
"""
|
|
27
|
+
path = Path(dir_path)
|
|
28
|
+
buffer = io.BytesIO()
|
|
29
|
+
|
|
30
|
+
with zipfile.ZipFile(buffer, "w", zipfile.ZIP_DEFLATED) as zf:
|
|
31
|
+
for file_path in path.rglob("*"):
|
|
32
|
+
if file_path.is_file():
|
|
33
|
+
arcname = file_path.relative_to(path)
|
|
34
|
+
zf.write(file_path, arcname)
|
|
35
|
+
|
|
36
|
+
buffer.seek(0)
|
|
37
|
+
return buffer.read()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
async def upload_to_s3(upload_url: str, data: bytes, content_type: str = "application/octet-stream") -> bool:
|
|
41
|
+
"""Upload data directly to S3 using a presigned URL.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
upload_url: Presigned S3 PUT URL
|
|
45
|
+
data: Raw bytes to upload
|
|
46
|
+
content_type: MIME type of the content
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
True if successful, False otherwise
|
|
50
|
+
"""
|
|
51
|
+
if not upload_url:
|
|
52
|
+
logger.warning("No upload URL provided")
|
|
53
|
+
return False
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
async with httpx.AsyncClient(timeout=120.0) as client:
|
|
57
|
+
response = await client.put(
|
|
58
|
+
upload_url,
|
|
59
|
+
content=data,
|
|
60
|
+
headers={"Content-Type": content_type},
|
|
61
|
+
)
|
|
62
|
+
if response.status_code in (200, 201, 204):
|
|
63
|
+
logger.info(f"Uploaded {len(data)} bytes to S3")
|
|
64
|
+
return True
|
|
65
|
+
else:
|
|
66
|
+
logger.warning(f"S3 upload failed: {response.status_code} {response.text}")
|
|
67
|
+
return False
|
|
68
|
+
except Exception as e:
|
|
69
|
+
logger.warning(f"Failed to upload to S3: {e}")
|
|
70
|
+
return False
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
async def upload_artifacts(upload_url: str, dir_path: str) -> bool:
|
|
74
|
+
"""Upload a directory as a zip directly to S3.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
upload_url: Presigned S3 PUT URL
|
|
78
|
+
dir_path: Path to the directory to upload
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
True if successful, False otherwise
|
|
82
|
+
"""
|
|
83
|
+
try:
|
|
84
|
+
zip_data = zip_directory(dir_path)
|
|
85
|
+
logger.info(f"Zipped directory: {len(zip_data)} bytes")
|
|
86
|
+
except Exception as e:
|
|
87
|
+
logger.warning(f"Failed to zip directory: {e}")
|
|
88
|
+
return False
|
|
89
|
+
|
|
90
|
+
return await upload_to_s3(upload_url, zip_data, "application/zip")
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
async def upload_artifact(
|
|
94
|
+
upload_url: str,
|
|
95
|
+
data: bytes,
|
|
96
|
+
content_type: str = "application/octet-stream",
|
|
97
|
+
) -> bool:
|
|
98
|
+
"""Upload an artifact directly to S3.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
upload_url: Presigned S3 PUT URL
|
|
102
|
+
data: Raw bytes of the artifact
|
|
103
|
+
content_type: MIME type of the content
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
True if successful, False otherwise
|
|
107
|
+
"""
|
|
108
|
+
return await upload_to_s3(upload_url, data, content_type)
|
plato/agents/config.py
CHANGED
|
@@ -18,10 +18,8 @@ Example:
|
|
|
18
18
|
from __future__ import annotations
|
|
19
19
|
|
|
20
20
|
import json
|
|
21
|
-
from pathlib import Path
|
|
22
21
|
from typing import Any
|
|
23
22
|
|
|
24
|
-
from pydantic import Field
|
|
25
23
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
26
24
|
|
|
27
25
|
|
|
@@ -57,8 +55,6 @@ class AgentConfig(BaseSettings):
|
|
|
57
55
|
|
|
58
56
|
Attributes:
|
|
59
57
|
logs_dir: Directory for agent logs and trajectory output.
|
|
60
|
-
checkpoint_paths: Directories to watch for checkpoint triggers (for workspace tracking).
|
|
61
|
-
checkpoint_debounce_ms: Debounce interval for checkpoints.
|
|
62
58
|
"""
|
|
63
59
|
|
|
64
60
|
model_config = SettingsConfigDict(
|
|
@@ -68,8 +64,6 @@ class AgentConfig(BaseSettings):
|
|
|
68
64
|
)
|
|
69
65
|
|
|
70
66
|
logs_dir: str = "/logs"
|
|
71
|
-
checkpoint_paths: list[str] = Field(default_factory=list)
|
|
72
|
-
checkpoint_debounce_ms: int = 500
|
|
73
67
|
|
|
74
68
|
@classmethod
|
|
75
69
|
def get_field_secrets(cls) -> dict[str, Secret]:
|
|
@@ -97,7 +91,7 @@ class AgentConfig(BaseSettings):
|
|
|
97
91
|
secrets = []
|
|
98
92
|
|
|
99
93
|
# Skip internal fields
|
|
100
|
-
internal_fields = {"logs_dir"
|
|
94
|
+
internal_fields = {"logs_dir"}
|
|
101
95
|
|
|
102
96
|
for field_name, prop_schema in properties.items():
|
|
103
97
|
if field_name in internal_fields:
|
|
@@ -140,7 +134,7 @@ class AgentConfig(BaseSettings):
|
|
|
140
134
|
def get_config_dict(self) -> dict[str, Any]:
|
|
141
135
|
"""Extract non-secret config values as a dict."""
|
|
142
136
|
secrets_map = self.get_field_secrets()
|
|
143
|
-
internal_fields = {"logs_dir"
|
|
137
|
+
internal_fields = {"logs_dir"}
|
|
144
138
|
|
|
145
139
|
result: dict[str, Any] = {}
|
|
146
140
|
for field_name in self.model_fields:
|
|
@@ -152,9 +146,18 @@ class AgentConfig(BaseSettings):
|
|
|
152
146
|
return result
|
|
153
147
|
|
|
154
148
|
@classmethod
|
|
155
|
-
def
|
|
156
|
-
"""Load config from
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
149
|
+
def from_env(cls) -> AgentConfig:
|
|
150
|
+
"""Load config from AGENT_CONFIG_B64 environment variable.
|
|
151
|
+
|
|
152
|
+
The runner passes config as base64-encoded JSON in the
|
|
153
|
+
AGENT_CONFIG_B64 environment variable.
|
|
154
|
+
"""
|
|
155
|
+
import base64
|
|
156
|
+
import os
|
|
157
|
+
|
|
158
|
+
config_b64 = os.environ.get("AGENT_CONFIG_B64")
|
|
159
|
+
if not config_b64:
|
|
160
|
+
raise ValueError("AGENT_CONFIG_B64 environment variable not set")
|
|
161
|
+
config_json = base64.b64decode(config_b64).decode()
|
|
162
|
+
data = json.loads(config_json)
|
|
160
163
|
return cls(**data)
|
plato/agents/otel.py
ADDED
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
"""OpenTelemetry integration for Plato agents and worlds.
|
|
2
|
+
|
|
3
|
+
Provides tracing utilities using OpenTelemetry SDK. Traces are sent directly
|
|
4
|
+
to the Chronos OTLP endpoint.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from plato.agents.otel import init_tracing, get_tracer, shutdown_tracing
|
|
8
|
+
|
|
9
|
+
# Initialize tracing (sends to Chronos OTLP endpoint)
|
|
10
|
+
init_tracing(
|
|
11
|
+
service_name="my-world",
|
|
12
|
+
session_id="session-123",
|
|
13
|
+
otlp_endpoint="http://chronos/api/otel",
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
# Create spans
|
|
17
|
+
tracer = get_tracer()
|
|
18
|
+
with tracer.start_as_current_span("my-operation") as span:
|
|
19
|
+
span.set_attribute("key", "value")
|
|
20
|
+
# ... do work ...
|
|
21
|
+
|
|
22
|
+
# Cleanup
|
|
23
|
+
shutdown_tracing()
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import logging
|
|
29
|
+
|
|
30
|
+
from opentelemetry import trace
|
|
31
|
+
from opentelemetry.trace import Tracer
|
|
32
|
+
|
|
33
|
+
_module_logger = logging.getLogger(__name__)
|
|
34
|
+
|
|
35
|
+
# Global state
|
|
36
|
+
_tracer_provider = None
|
|
37
|
+
_initialized = False
|
|
38
|
+
_log_handler = None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class OTelSpanLogHandler(logging.Handler):
|
|
42
|
+
"""Logging handler that creates OTel spans for log messages.
|
|
43
|
+
|
|
44
|
+
Converts Python log records to OTel spans with log attributes.
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
def __init__(self, tracer: Tracer, level: int = logging.INFO):
|
|
48
|
+
super().__init__(level)
|
|
49
|
+
self.tracer = tracer
|
|
50
|
+
|
|
51
|
+
def emit(self, record: logging.LogRecord) -> None:
|
|
52
|
+
"""Emit a log record as an OTel span."""
|
|
53
|
+
try:
|
|
54
|
+
# Debug: print that we're emitting a log span
|
|
55
|
+
print(f"[OTel] Emitting log span: {record.name} - {record.getMessage()[:100]}")
|
|
56
|
+
# Create a span for the log message
|
|
57
|
+
with self.tracer.start_as_current_span(f"log.{record.levelname.lower()}") as span:
|
|
58
|
+
span.set_attribute("log.level", record.levelname)
|
|
59
|
+
span.set_attribute("log.message", record.getMessage())
|
|
60
|
+
span.set_attribute("log.logger", record.name)
|
|
61
|
+
span.set_attribute("source", "world")
|
|
62
|
+
span.set_attribute("content", record.getMessage()[:1000])
|
|
63
|
+
|
|
64
|
+
if record.funcName:
|
|
65
|
+
span.set_attribute("log.function", record.funcName)
|
|
66
|
+
if record.lineno:
|
|
67
|
+
span.set_attribute("log.lineno", record.lineno)
|
|
68
|
+
|
|
69
|
+
# Mark errors
|
|
70
|
+
if record.levelno >= logging.ERROR:
|
|
71
|
+
span.set_attribute("error", True)
|
|
72
|
+
|
|
73
|
+
except Exception:
|
|
74
|
+
# Don't let logging errors crash the application
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def init_tracing(
|
|
79
|
+
service_name: str,
|
|
80
|
+
session_id: str,
|
|
81
|
+
otlp_endpoint: str,
|
|
82
|
+
parent_trace_id: str | None = None,
|
|
83
|
+
parent_span_id: str | None = None,
|
|
84
|
+
) -> None:
|
|
85
|
+
"""Initialize OpenTelemetry tracing.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
service_name: Name of the service (e.g., world name or agent name)
|
|
89
|
+
session_id: Chronos session ID (added as resource attribute)
|
|
90
|
+
otlp_endpoint: Chronos OTLP endpoint (e.g., http://chronos/api/otel)
|
|
91
|
+
parent_trace_id: Optional parent trace ID for linking (hex string)
|
|
92
|
+
parent_span_id: Optional parent span ID for linking (hex string)
|
|
93
|
+
"""
|
|
94
|
+
global _tracer_provider, _initialized, _log_handler
|
|
95
|
+
|
|
96
|
+
if _initialized:
|
|
97
|
+
_module_logger.debug("Tracing already initialized")
|
|
98
|
+
return
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
from opentelemetry import context as context_api
|
|
102
|
+
from opentelemetry.exporter.otlp.proto.http.trace_exporter import (
|
|
103
|
+
OTLPSpanExporter,
|
|
104
|
+
)
|
|
105
|
+
from opentelemetry.sdk.resources import Resource
|
|
106
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
107
|
+
from opentelemetry.sdk.trace.export import SimpleSpanProcessor
|
|
108
|
+
from opentelemetry.trace import NonRecordingSpan, SpanContext, TraceFlags
|
|
109
|
+
|
|
110
|
+
# Create resource with session ID
|
|
111
|
+
resource = Resource.create(
|
|
112
|
+
{
|
|
113
|
+
"service.name": service_name,
|
|
114
|
+
"plato.session.id": session_id,
|
|
115
|
+
}
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
# Create tracer provider
|
|
119
|
+
_tracer_provider = TracerProvider(resource=resource)
|
|
120
|
+
|
|
121
|
+
# Add OTLP exporter pointing to Chronos (use SimpleSpanProcessor for immediate export)
|
|
122
|
+
otlp_exporter = OTLPSpanExporter(endpoint=f"{otlp_endpoint.rstrip('/')}/v1/traces")
|
|
123
|
+
_tracer_provider.add_span_processor(SimpleSpanProcessor(otlp_exporter))
|
|
124
|
+
|
|
125
|
+
# Set as global tracer provider
|
|
126
|
+
trace.set_tracer_provider(_tracer_provider)
|
|
127
|
+
|
|
128
|
+
# If parent context is provided, set it as the current context
|
|
129
|
+
# This allows new spans to automatically link to the parent
|
|
130
|
+
if parent_trace_id and parent_span_id:
|
|
131
|
+
parent_context = SpanContext(
|
|
132
|
+
trace_id=int(parent_trace_id, 16),
|
|
133
|
+
span_id=int(parent_span_id, 16),
|
|
134
|
+
is_remote=True,
|
|
135
|
+
trace_flags=TraceFlags(0x01), # Sampled
|
|
136
|
+
)
|
|
137
|
+
parent_span = NonRecordingSpan(parent_context)
|
|
138
|
+
ctx = trace.set_span_in_context(parent_span)
|
|
139
|
+
context_api.attach(ctx)
|
|
140
|
+
print(f"[OTel] Using parent context: trace_id={parent_trace_id}, span_id={parent_span_id}")
|
|
141
|
+
|
|
142
|
+
# Add OTel logging handler to capture logs from plato SDK
|
|
143
|
+
tracer = trace.get_tracer(service_name)
|
|
144
|
+
_log_handler = OTelSpanLogHandler(tracer, level=logging.INFO)
|
|
145
|
+
|
|
146
|
+
# Add handler to plato loggers (worlds and agents)
|
|
147
|
+
# Set level to INFO to ensure logs propagate from child loggers
|
|
148
|
+
plato_logger = logging.getLogger("plato")
|
|
149
|
+
plato_logger.setLevel(logging.INFO)
|
|
150
|
+
plato_logger.addHandler(_log_handler)
|
|
151
|
+
print(
|
|
152
|
+
f"[OTel] Added log handler to 'plato' logger (level={plato_logger.level}, handlers={len(plato_logger.handlers)})"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
_initialized = True
|
|
156
|
+
|
|
157
|
+
print(f"[OTel] Tracing initialized: service={service_name}, session={session_id}, endpoint={otlp_endpoint}")
|
|
158
|
+
|
|
159
|
+
except ImportError as e:
|
|
160
|
+
print(f"[OTel] OpenTelemetry SDK not installed: {e}")
|
|
161
|
+
_module_logger.warning(f"OpenTelemetry SDK not installed: {e}")
|
|
162
|
+
except Exception as e:
|
|
163
|
+
print(f"[OTel] Failed to initialize tracing: {e}")
|
|
164
|
+
_module_logger.error(f"Failed to initialize tracing: {e}")
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def shutdown_tracing(timeout_millis: int = 30000) -> None:
|
|
168
|
+
"""Shutdown the tracer provider and flush spans.
|
|
169
|
+
|
|
170
|
+
Args:
|
|
171
|
+
timeout_millis: Timeout in milliseconds to wait for flush (default 30s)
|
|
172
|
+
"""
|
|
173
|
+
global _tracer_provider, _initialized, _log_handler
|
|
174
|
+
|
|
175
|
+
# Remove log handler
|
|
176
|
+
if _log_handler:
|
|
177
|
+
try:
|
|
178
|
+
plato_logger = logging.getLogger("plato")
|
|
179
|
+
plato_logger.removeHandler(_log_handler)
|
|
180
|
+
except Exception:
|
|
181
|
+
pass
|
|
182
|
+
_log_handler = None
|
|
183
|
+
|
|
184
|
+
if _tracer_provider:
|
|
185
|
+
try:
|
|
186
|
+
# Force flush all pending spans before shutdown
|
|
187
|
+
print(f"[OTel] Flushing spans (timeout={timeout_millis}ms)...")
|
|
188
|
+
flush_success = _tracer_provider.force_flush(timeout_millis=timeout_millis)
|
|
189
|
+
if flush_success:
|
|
190
|
+
print("[OTel] Span flush completed successfully")
|
|
191
|
+
else:
|
|
192
|
+
print("[OTel] Span flush timed out or failed")
|
|
193
|
+
|
|
194
|
+
_tracer_provider.shutdown()
|
|
195
|
+
print("[OTel] Tracing shutdown complete")
|
|
196
|
+
except Exception as e:
|
|
197
|
+
print(f"[OTel] Error shutting down tracer: {e}")
|
|
198
|
+
_module_logger.warning(f"Error shutting down tracer: {e}")
|
|
199
|
+
|
|
200
|
+
_tracer_provider = None
|
|
201
|
+
_initialized = False
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def get_tracer(name: str = "plato") -> Tracer:
|
|
205
|
+
"""Get a tracer instance.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
name: Tracer name (default: "plato")
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
OpenTelemetry Tracer
|
|
212
|
+
"""
|
|
213
|
+
return trace.get_tracer(name)
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def is_initialized() -> bool:
|
|
217
|
+
"""Check if OTel tracing is initialized."""
|
|
218
|
+
return _initialized
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def instrument(service_name: str = "plato-agent") -> Tracer:
|
|
222
|
+
"""Initialize OTel tracing from environment variables.
|
|
223
|
+
|
|
224
|
+
Reads the following env vars:
|
|
225
|
+
- OTEL_EXPORTER_OTLP_ENDPOINT: Chronos OTLP endpoint (required for tracing)
|
|
226
|
+
- SESSION_ID: Chronos session ID (default: "local")
|
|
227
|
+
- OTEL_TRACE_ID: Parent trace ID for linking spans (optional)
|
|
228
|
+
- OTEL_PARENT_SPAN_ID: Parent span ID for linking spans (optional)
|
|
229
|
+
|
|
230
|
+
If OTEL_EXPORTER_OTLP_ENDPOINT is not set, returns a no-op tracer.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
service_name: Name of the service for traces
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
OpenTelemetry Tracer
|
|
237
|
+
"""
|
|
238
|
+
import os
|
|
239
|
+
|
|
240
|
+
otel_endpoint = os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT")
|
|
241
|
+
session_id = os.environ.get("SESSION_ID", "local")
|
|
242
|
+
parent_trace_id = os.environ.get("OTEL_TRACE_ID")
|
|
243
|
+
parent_span_id = os.environ.get("OTEL_PARENT_SPAN_ID")
|
|
244
|
+
|
|
245
|
+
print(f"[OTel] instrument() called: service={service_name}, endpoint={otel_endpoint}, session={session_id}")
|
|
246
|
+
|
|
247
|
+
if not otel_endpoint:
|
|
248
|
+
# Return default tracer (no-op if no provider configured)
|
|
249
|
+
print("[OTel] No OTEL_EXPORTER_OTLP_ENDPOINT set, returning no-op tracer")
|
|
250
|
+
return trace.get_tracer(service_name)
|
|
251
|
+
|
|
252
|
+
# Initialize tracing with parent context if provided
|
|
253
|
+
init_tracing(
|
|
254
|
+
service_name=service_name,
|
|
255
|
+
session_id=session_id,
|
|
256
|
+
otlp_endpoint=otel_endpoint,
|
|
257
|
+
parent_trace_id=parent_trace_id,
|
|
258
|
+
parent_span_id=parent_span_id,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
return trace.get_tracer(service_name)
|