mantisdk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mantisdk might be problematic. Click here for more details.
- mantisdk/__init__.py +22 -0
- mantisdk/adapter/__init__.py +15 -0
- mantisdk/adapter/base.py +94 -0
- mantisdk/adapter/messages.py +270 -0
- mantisdk/adapter/triplet.py +1028 -0
- mantisdk/algorithm/__init__.py +39 -0
- mantisdk/algorithm/apo/__init__.py +5 -0
- mantisdk/algorithm/apo/apo.py +889 -0
- mantisdk/algorithm/apo/prompts/apply_edit_variant01.poml +22 -0
- mantisdk/algorithm/apo/prompts/apply_edit_variant02.poml +18 -0
- mantisdk/algorithm/apo/prompts/text_gradient_variant01.poml +18 -0
- mantisdk/algorithm/apo/prompts/text_gradient_variant02.poml +16 -0
- mantisdk/algorithm/apo/prompts/text_gradient_variant03.poml +107 -0
- mantisdk/algorithm/base.py +162 -0
- mantisdk/algorithm/decorator.py +264 -0
- mantisdk/algorithm/fast.py +250 -0
- mantisdk/algorithm/gepa/__init__.py +59 -0
- mantisdk/algorithm/gepa/adapter.py +459 -0
- mantisdk/algorithm/gepa/gepa.py +364 -0
- mantisdk/algorithm/gepa/lib/__init__.py +18 -0
- mantisdk/algorithm/gepa/lib/adapters/README.md +12 -0
- mantisdk/algorithm/gepa/lib/adapters/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/README.md +341 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/__init__.py +1 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/anymaths_adapter.py +174 -0
- mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/requirements.txt +1 -0
- mantisdk/algorithm/gepa/lib/adapters/default_adapter/README.md +0 -0
- mantisdk/algorithm/gepa/lib/adapters/default_adapter/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/default_adapter/default_adapter.py +209 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/README.md +7 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/dspy_adapter.py +307 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/README.md +99 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py +137 -0
- mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py +266 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/GEPA_RAG.md +621 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/__init__.py +56 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/evaluation_metrics.py +226 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/generic_rag_adapter.py +496 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/rag_pipeline.py +238 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_store_interface.py +212 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/__init__.py +2 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/chroma_store.py +196 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/lancedb_store.py +422 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/milvus_store.py +409 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/qdrant_store.py +368 -0
- mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/weaviate_store.py +418 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/README.md +552 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/__init__.py +37 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_adapter.py +705 -0
- mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_client.py +364 -0
- mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/README.md +9 -0
- mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/terminal_bench_adapter.py +217 -0
- mantisdk/algorithm/gepa/lib/api.py +375 -0
- mantisdk/algorithm/gepa/lib/core/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/core/adapter.py +180 -0
- mantisdk/algorithm/gepa/lib/core/data_loader.py +74 -0
- mantisdk/algorithm/gepa/lib/core/engine.py +356 -0
- mantisdk/algorithm/gepa/lib/core/result.py +233 -0
- mantisdk/algorithm/gepa/lib/core/state.py +636 -0
- mantisdk/algorithm/gepa/lib/examples/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/examples/aime.py +24 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/eval_default.py +111 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/instruction_prompt.txt +9 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/optimal_prompt.txt +24 -0
- mantisdk/algorithm/gepa/lib/examples/anymaths-bench/train_anymaths.py +177 -0
- mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/arc_agi.ipynb +25705 -0
- mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/example.ipynb +348 -0
- mantisdk/algorithm/gepa/lib/examples/mcp_adapter/__init__.py +4 -0
- mantisdk/algorithm/gepa/lib/examples/mcp_adapter/mcp_optimization_example.py +455 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/RAG_GUIDE.md +613 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/__init__.py +9 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/rag_optimization.py +824 -0
- mantisdk/algorithm/gepa/lib/examples/rag_adapter/requirements-rag.txt +29 -0
- mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/instruction_prompt.txt +16 -0
- mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/terminus.txt +9 -0
- mantisdk/algorithm/gepa/lib/examples/terminal-bench/train_terminus.py +161 -0
- mantisdk/algorithm/gepa/lib/gepa_utils.py +117 -0
- mantisdk/algorithm/gepa/lib/logging/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/logging/experiment_tracker.py +187 -0
- mantisdk/algorithm/gepa/lib/logging/logger.py +75 -0
- mantisdk/algorithm/gepa/lib/logging/utils.py +103 -0
- mantisdk/algorithm/gepa/lib/proposer/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/proposer/base.py +31 -0
- mantisdk/algorithm/gepa/lib/proposer/merge.py +357 -0
- mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/base.py +49 -0
- mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/reflective_mutation.py +176 -0
- mantisdk/algorithm/gepa/lib/py.typed +0 -0
- mantisdk/algorithm/gepa/lib/strategies/__init__.py +0 -0
- mantisdk/algorithm/gepa/lib/strategies/batch_sampler.py +77 -0
- mantisdk/algorithm/gepa/lib/strategies/candidate_selector.py +50 -0
- mantisdk/algorithm/gepa/lib/strategies/component_selector.py +36 -0
- mantisdk/algorithm/gepa/lib/strategies/eval_policy.py +64 -0
- mantisdk/algorithm/gepa/lib/strategies/instruction_proposal.py +127 -0
- mantisdk/algorithm/gepa/lib/utils/__init__.py +10 -0
- mantisdk/algorithm/gepa/lib/utils/stop_condition.py +196 -0
- mantisdk/algorithm/gepa/tracing.py +105 -0
- mantisdk/algorithm/utils.py +177 -0
- mantisdk/algorithm/verl/__init__.py +5 -0
- mantisdk/algorithm/verl/interface.py +202 -0
- mantisdk/cli/__init__.py +56 -0
- mantisdk/cli/prometheus.py +115 -0
- mantisdk/cli/store.py +131 -0
- mantisdk/cli/vllm.py +29 -0
- mantisdk/client.py +408 -0
- mantisdk/config.py +348 -0
- mantisdk/emitter/__init__.py +43 -0
- mantisdk/emitter/annotation.py +370 -0
- mantisdk/emitter/exception.py +54 -0
- mantisdk/emitter/message.py +61 -0
- mantisdk/emitter/object.py +117 -0
- mantisdk/emitter/reward.py +320 -0
- mantisdk/env_var.py +156 -0
- mantisdk/execution/__init__.py +15 -0
- mantisdk/execution/base.py +64 -0
- mantisdk/execution/client_server.py +443 -0
- mantisdk/execution/events.py +69 -0
- mantisdk/execution/inter_process.py +16 -0
- mantisdk/execution/shared_memory.py +282 -0
- mantisdk/instrumentation/__init__.py +119 -0
- mantisdk/instrumentation/agentops.py +314 -0
- mantisdk/instrumentation/agentops_langchain.py +45 -0
- mantisdk/instrumentation/litellm.py +83 -0
- mantisdk/instrumentation/vllm.py +81 -0
- mantisdk/instrumentation/weave.py +500 -0
- mantisdk/litagent/__init__.py +11 -0
- mantisdk/litagent/decorator.py +536 -0
- mantisdk/litagent/litagent.py +252 -0
- mantisdk/llm_proxy.py +1890 -0
- mantisdk/logging.py +370 -0
- mantisdk/reward.py +7 -0
- mantisdk/runner/__init__.py +11 -0
- mantisdk/runner/agent.py +845 -0
- mantisdk/runner/base.py +182 -0
- mantisdk/runner/legacy.py +309 -0
- mantisdk/semconv.py +170 -0
- mantisdk/server.py +401 -0
- mantisdk/store/__init__.py +23 -0
- mantisdk/store/base.py +897 -0
- mantisdk/store/client_server.py +2092 -0
- mantisdk/store/collection/__init__.py +30 -0
- mantisdk/store/collection/base.py +587 -0
- mantisdk/store/collection/memory.py +970 -0
- mantisdk/store/collection/mongo.py +1412 -0
- mantisdk/store/collection_based.py +1823 -0
- mantisdk/store/insight.py +648 -0
- mantisdk/store/listener.py +58 -0
- mantisdk/store/memory.py +396 -0
- mantisdk/store/mongo.py +165 -0
- mantisdk/store/sqlite.py +3 -0
- mantisdk/store/threading.py +357 -0
- mantisdk/store/utils.py +142 -0
- mantisdk/tracer/__init__.py +16 -0
- mantisdk/tracer/agentops.py +242 -0
- mantisdk/tracer/base.py +287 -0
- mantisdk/tracer/dummy.py +106 -0
- mantisdk/tracer/otel.py +555 -0
- mantisdk/tracer/weave.py +677 -0
- mantisdk/trainer/__init__.py +6 -0
- mantisdk/trainer/init_utils.py +263 -0
- mantisdk/trainer/legacy.py +367 -0
- mantisdk/trainer/registry.py +12 -0
- mantisdk/trainer/trainer.py +618 -0
- mantisdk/types/__init__.py +6 -0
- mantisdk/types/core.py +553 -0
- mantisdk/types/resources.py +204 -0
- mantisdk/types/tracer.py +515 -0
- mantisdk/types/tracing.py +218 -0
- mantisdk/utils/__init__.py +1 -0
- mantisdk/utils/id.py +18 -0
- mantisdk/utils/metrics.py +1025 -0
- mantisdk/utils/otel.py +578 -0
- mantisdk/utils/otlp.py +536 -0
- mantisdk/utils/server_launcher.py +1045 -0
- mantisdk/utils/system_snapshot.py +81 -0
- mantisdk/verl/__init__.py +8 -0
- mantisdk/verl/__main__.py +6 -0
- mantisdk/verl/async_server.py +46 -0
- mantisdk/verl/config.yaml +27 -0
- mantisdk/verl/daemon.py +1154 -0
- mantisdk/verl/dataset.py +44 -0
- mantisdk/verl/entrypoint.py +248 -0
- mantisdk/verl/trainer.py +549 -0
- mantisdk-0.1.0.dist-info/METADATA +119 -0
- mantisdk-0.1.0.dist-info/RECORD +190 -0
- mantisdk-0.1.0.dist-info/WHEEL +4 -0
- mantisdk-0.1.0.dist-info/entry_points.txt +2 -0
- mantisdk-0.1.0.dist-info/licenses/LICENSE +19 -0
|
@@ -0,0 +1,648 @@
|
|
|
1
|
+
# Copyright (c) Microsoft. All rights reserved.
|
|
2
|
+
|
|
3
|
+
"""InsightTracker - A StorageListener that streams state to Insight.
|
|
4
|
+
|
|
5
|
+
This module provides two ways to use Insight tracking:
|
|
6
|
+
|
|
7
|
+
1. **InsightTracker** (recommended): A StorageListener that can be attached to any store.
|
|
8
|
+
```python
|
|
9
|
+
from mantisdk.store import InMemoryLightningStore, InsightTracker
|
|
10
|
+
|
|
11
|
+
tracker = InsightTracker(
|
|
12
|
+
api_key="pk-lf-abc123",
|
|
13
|
+
secret_key="sk-lf-xyz789",
|
|
14
|
+
insight_url="https://insight.withmetis.ai",
|
|
15
|
+
project_id="proj-123",
|
|
16
|
+
)
|
|
17
|
+
store = InMemoryLightningStore(listeners=[tracker])
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
2. **InsightLightningStore** (convenience): Pre-configured InMemoryLightningStore with InsightTracker.
|
|
21
|
+
```python
|
|
22
|
+
from mantisdk.store import InsightLightningStore
|
|
23
|
+
|
|
24
|
+
store = InsightLightningStore(
|
|
25
|
+
api_key="pk-lf-abc123",
|
|
26
|
+
secret_key="sk-lf-xyz789",
|
|
27
|
+
insight_url="https://insight.withmetis.ai",
|
|
28
|
+
project_id="proj-123",
|
|
29
|
+
)
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
Both approaches provide:
|
|
33
|
+
- Non-blocking event streaming via background thread
|
|
34
|
+
- Fault-tolerant operation (network issues don't crash the agent)
|
|
35
|
+
- Batched HTTP requests to minimize overhead
|
|
36
|
+
- Full resource content tracking
|
|
37
|
+
- OTLP trace export support
|
|
38
|
+
- Automatic reward-to-score conversion (rewards sent as Insight scores)
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
from __future__ import annotations
|
|
42
|
+
|
|
43
|
+
import atexit
|
|
44
|
+
import base64
|
|
45
|
+
import logging
|
|
46
|
+
import queue
|
|
47
|
+
import threading
|
|
48
|
+
import time
|
|
49
|
+
import uuid
|
|
50
|
+
from dataclasses import dataclass
|
|
51
|
+
from datetime import datetime, timezone
|
|
52
|
+
from typing import Any, Dict, List, Optional
|
|
53
|
+
|
|
54
|
+
import httpx
|
|
55
|
+
|
|
56
|
+
from mantisdk.emitter.reward import get_rewards_from_span, is_reward_span
|
|
57
|
+
from mantisdk.types import (
|
|
58
|
+
Attempt,
|
|
59
|
+
ResourcesUpdate,
|
|
60
|
+
Rollout,
|
|
61
|
+
Span,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
from .listener import StorageListener
|
|
65
|
+
|
|
66
|
+
logger = logging.getLogger(__name__)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class InsightEvent:
|
|
71
|
+
"""An event to be sent to the Insight API."""
|
|
72
|
+
|
|
73
|
+
id: str
|
|
74
|
+
type: str
|
|
75
|
+
timestamp: str # ISO 8601 datetime string
|
|
76
|
+
data: Dict[str, Any]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class InsightTracker:
|
|
80
|
+
"""A StorageListener that streams storage events to Insight.
|
|
81
|
+
|
|
82
|
+
This tracker implements the StorageListener protocol and can be attached
|
|
83
|
+
to any LightningStore to enable Insight tracking.
|
|
84
|
+
|
|
85
|
+
Features:
|
|
86
|
+
- Non-blocking: Never slow down the agent execution
|
|
87
|
+
- Fault-tolerant: Network issues don't crash the agent
|
|
88
|
+
- Batched: Reduce HTTP overhead by buffering events
|
|
89
|
+
- Full content: Sends complete resource content for experiment tracking
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
api_key: Insight public API key for authentication.
|
|
93
|
+
secret_key: Insight secret key for authentication.
|
|
94
|
+
insight_url: Insight server URL (e.g., "http://localhost:3000").
|
|
95
|
+
project_id: Project ID to associate events with.
|
|
96
|
+
flush_interval: Seconds between automatic flushes (default: 1.0).
|
|
97
|
+
max_buffer_size: Maximum events before forcing a flush (default: 1000).
|
|
98
|
+
request_timeout: HTTP request timeout in seconds (default: 10.0).
|
|
99
|
+
max_retries: Maximum retry attempts for failed requests (default: 3).
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
def __init__(
|
|
103
|
+
self,
|
|
104
|
+
*,
|
|
105
|
+
api_key: str,
|
|
106
|
+
secret_key: str,
|
|
107
|
+
insight_url: str,
|
|
108
|
+
project_id: str,
|
|
109
|
+
flush_interval: float = 1.0,
|
|
110
|
+
max_buffer_size: int = 1000,
|
|
111
|
+
request_timeout: float = 10.0,
|
|
112
|
+
max_retries: int = 3,
|
|
113
|
+
) -> None:
|
|
114
|
+
# Store configuration
|
|
115
|
+
self._api_key = api_key
|
|
116
|
+
self._secret_key = secret_key
|
|
117
|
+
self._insight_url = insight_url.rstrip("/")
|
|
118
|
+
self._project_id = project_id
|
|
119
|
+
self._flush_interval = flush_interval
|
|
120
|
+
self._max_buffer_size = max_buffer_size
|
|
121
|
+
self._request_timeout = request_timeout
|
|
122
|
+
self._max_retries = max_retries
|
|
123
|
+
|
|
124
|
+
# Generate a unique job ID for this tracker instance
|
|
125
|
+
self._job_id = f"job-{uuid.uuid4().hex[:12]}"
|
|
126
|
+
|
|
127
|
+
# Track if job has been completed to prevent double-complete
|
|
128
|
+
self._completed = False
|
|
129
|
+
|
|
130
|
+
# Event buffer (thread-safe queue)
|
|
131
|
+
self._event_buffer: queue.Queue[InsightEvent] = queue.Queue()
|
|
132
|
+
|
|
133
|
+
# Background sender thread control
|
|
134
|
+
self._stop_event = threading.Event()
|
|
135
|
+
self._sender_thread: Optional[threading.Thread] = None
|
|
136
|
+
|
|
137
|
+
# Start the background sender thread
|
|
138
|
+
self._start_sender_thread()
|
|
139
|
+
|
|
140
|
+
# Emit job.created event immediately
|
|
141
|
+
self._emit_job_created()
|
|
142
|
+
|
|
143
|
+
# Register cleanup on exit
|
|
144
|
+
atexit.register(self._cleanup)
|
|
145
|
+
|
|
146
|
+
logger.info(
|
|
147
|
+
f"InsightTracker initialized - streaming to {self._insight_url} "
|
|
148
|
+
f"(project={self._project_id}, job={self._job_id})"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# ─────────────────────────────────────────────────────────────
|
|
152
|
+
# StorageListener Protocol Implementation
|
|
153
|
+
# ─────────────────────────────────────────────────────────────
|
|
154
|
+
|
|
155
|
+
@property
|
|
156
|
+
def capabilities(self) -> Dict[str, bool]:
|
|
157
|
+
"""Return the capabilities of the listener."""
|
|
158
|
+
return {
|
|
159
|
+
"otlp_traces": True, # Enable OTLP trace export to Insight
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
@property
|
|
163
|
+
def job_id(self) -> str:
|
|
164
|
+
"""Return the job ID for this tracker instance."""
|
|
165
|
+
return self._job_id
|
|
166
|
+
|
|
167
|
+
def otlp_traces_endpoint(self) -> Optional[str]:
|
|
168
|
+
"""Return the OTLP/HTTP traces endpoint."""
|
|
169
|
+
endpoint = f"{self._insight_url}/api/public/otel/v1/traces"
|
|
170
|
+
logger.debug(f"OTLP traces endpoint: {endpoint}")
|
|
171
|
+
return endpoint
|
|
172
|
+
|
|
173
|
+
def get_otlp_headers(self) -> Dict[str, str]:
|
|
174
|
+
"""Return the authentication headers for OTLP export.
|
|
175
|
+
|
|
176
|
+
Insight's OTLP endpoint uses Basic Auth with format: public_key:secret_key
|
|
177
|
+
"""
|
|
178
|
+
credentials = f"{self._api_key}:{self._secret_key}"
|
|
179
|
+
encoded = base64.b64encode(credentials.encode()).decode()
|
|
180
|
+
return {
|
|
181
|
+
"Authorization": f"Basic {encoded}",
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
async def on_job_created(self, job_id: str, project_id: Optional[str] = None) -> None:
|
|
185
|
+
"""Called when a job is created. (Usually handled internally)"""
|
|
186
|
+
pass # We emit job.created in __init__
|
|
187
|
+
|
|
188
|
+
async def on_rollout_created(self, rollout: Rollout) -> None:
|
|
189
|
+
"""Called when a rollout is created."""
|
|
190
|
+
self._emit(
|
|
191
|
+
"rollout.created",
|
|
192
|
+
{
|
|
193
|
+
"id": rollout.rollout_id,
|
|
194
|
+
"input": rollout.input,
|
|
195
|
+
"status": rollout.status,
|
|
196
|
+
"resource_id": rollout.resources_id,
|
|
197
|
+
"mode": rollout.mode,
|
|
198
|
+
"start_time": rollout.start_time,
|
|
199
|
+
"config": {
|
|
200
|
+
"max_attempts": rollout.config.max_attempts,
|
|
201
|
+
"retry_condition": rollout.config.retry_condition,
|
|
202
|
+
"timeout_seconds": rollout.config.timeout_seconds,
|
|
203
|
+
"unresponsive_seconds": rollout.config.unresponsive_seconds,
|
|
204
|
+
},
|
|
205
|
+
"metadata": rollout.metadata,
|
|
206
|
+
},
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
async def on_rollout_updated(self, rollout: Rollout) -> None:
|
|
210
|
+
"""Called when a rollout is updated."""
|
|
211
|
+
data: Dict[str, Any] = {
|
|
212
|
+
"id": rollout.rollout_id,
|
|
213
|
+
"status": rollout.status,
|
|
214
|
+
}
|
|
215
|
+
if rollout.end_time is not None:
|
|
216
|
+
data["end_time"] = rollout.end_time
|
|
217
|
+
self._emit("rollout.status_changed", data)
|
|
218
|
+
|
|
219
|
+
async def on_attempt_created(self, attempt: Attempt) -> None:
|
|
220
|
+
"""Called when an attempt is created."""
|
|
221
|
+
self._emit(
|
|
222
|
+
"attempt.created",
|
|
223
|
+
{
|
|
224
|
+
"id": attempt.attempt_id,
|
|
225
|
+
"rollout_id": attempt.rollout_id,
|
|
226
|
+
"sequence_id": attempt.sequence_id,
|
|
227
|
+
"status": attempt.status,
|
|
228
|
+
"start_time": attempt.start_time,
|
|
229
|
+
"worker_id": attempt.worker_id,
|
|
230
|
+
},
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
async def on_attempt_updated(self, attempt: Attempt, rollout_id: str) -> None:
|
|
234
|
+
"""Called when an attempt is updated."""
|
|
235
|
+
data: Dict[str, Any] = {
|
|
236
|
+
"id": attempt.attempt_id,
|
|
237
|
+
"rollout_id": rollout_id,
|
|
238
|
+
"status": attempt.status,
|
|
239
|
+
}
|
|
240
|
+
if attempt.end_time is not None:
|
|
241
|
+
data["end_time"] = attempt.end_time
|
|
242
|
+
self._emit("attempt.status_changed", data)
|
|
243
|
+
|
|
244
|
+
async def on_span_created(self, span: Span) -> None:
|
|
245
|
+
"""Called when a span is added.
|
|
246
|
+
|
|
247
|
+
If the span is a reward span, also sends the reward as an Insight score.
|
|
248
|
+
"""
|
|
249
|
+
# Emit span event to the event buffer
|
|
250
|
+
self._emit(
|
|
251
|
+
"span.emitted",
|
|
252
|
+
{
|
|
253
|
+
"trace_id": span.trace_id,
|
|
254
|
+
"span_id": span.span_id,
|
|
255
|
+
"parent_id": span.parent_id,
|
|
256
|
+
"attempt_id": span.attempt_id,
|
|
257
|
+
"rollout_id": span.rollout_id,
|
|
258
|
+
"sequence_id": span.sequence_id,
|
|
259
|
+
"name": span.name,
|
|
260
|
+
"status": {
|
|
261
|
+
"status_code": span.status.status_code,
|
|
262
|
+
"description": span.status.description,
|
|
263
|
+
},
|
|
264
|
+
"attributes": span.attributes,
|
|
265
|
+
"start_time": span.start_time,
|
|
266
|
+
"end_time": span.end_time,
|
|
267
|
+
},
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
# Check if this is a reward span and send as Insight score
|
|
271
|
+
if is_reward_span(span):
|
|
272
|
+
rewards = get_rewards_from_span(span)
|
|
273
|
+
for reward in rewards:
|
|
274
|
+
self._send_score(
|
|
275
|
+
name=reward.name,
|
|
276
|
+
value=reward.value,
|
|
277
|
+
trace_id=span.trace_id,
|
|
278
|
+
observation_id=span.span_id,
|
|
279
|
+
rollout_id=span.rollout_id,
|
|
280
|
+
attempt_id=span.attempt_id,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
async def on_resource_registered(self, resource: ResourcesUpdate) -> None:
|
|
284
|
+
"""Called when a resource is registered/updated.
|
|
285
|
+
|
|
286
|
+
Sends FULL resource content for complete experiment tracking.
|
|
287
|
+
"""
|
|
288
|
+
# Serialize resources to JSON-compatible dicts
|
|
289
|
+
# Resources are Pydantic models (PromptTemplate, LLM, etc.) which need model_dump()
|
|
290
|
+
serialized_resources: Dict[str, Any] = {}
|
|
291
|
+
for name, res in resource.resources.items():
|
|
292
|
+
if hasattr(res, "model_dump"):
|
|
293
|
+
serialized_resources[name] = res.model_dump()
|
|
294
|
+
elif hasattr(res, "dict"):
|
|
295
|
+
# Fallback for older Pydantic v1 models
|
|
296
|
+
serialized_resources[name] = res.dict()
|
|
297
|
+
else:
|
|
298
|
+
# Already a dict or primitive
|
|
299
|
+
serialized_resources[name] = res
|
|
300
|
+
|
|
301
|
+
self._emit(
|
|
302
|
+
"resource.registered",
|
|
303
|
+
{
|
|
304
|
+
"id": resource.resources_id,
|
|
305
|
+
"version": resource.version,
|
|
306
|
+
"create_time": resource.create_time,
|
|
307
|
+
"update_time": resource.update_time,
|
|
308
|
+
"resources": serialized_resources,
|
|
309
|
+
},
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
# ─────────────────────────────────────────────────────────────
|
|
313
|
+
# Event Emission Helpers
|
|
314
|
+
# ─────────────────────────────────────────────────────────────
|
|
315
|
+
|
|
316
|
+
def _emit(self, event_type: str, data: Dict[str, Any]) -> None:
|
|
317
|
+
"""Add an event to the buffer for later sending."""
|
|
318
|
+
# Format: 2026-01-17T19:51:40.123Z (Zod datetime expects 'Z' suffix for UTC)
|
|
319
|
+
now = datetime.now(timezone.utc)
|
|
320
|
+
timestamp = now.strftime("%Y-%m-%dT%H:%M:%S.") + f"{now.microsecond // 1000:03d}Z"
|
|
321
|
+
event = InsightEvent(
|
|
322
|
+
id=f"evt-{uuid.uuid4().hex[:8]}",
|
|
323
|
+
type=event_type,
|
|
324
|
+
timestamp=timestamp,
|
|
325
|
+
data=data,
|
|
326
|
+
)
|
|
327
|
+
try:
|
|
328
|
+
self._event_buffer.put_nowait(event)
|
|
329
|
+
except queue.Full:
|
|
330
|
+
logger.warning(f"Event buffer full, dropping event: {event_type}")
|
|
331
|
+
return
|
|
332
|
+
|
|
333
|
+
# Force flush if buffer is at capacity
|
|
334
|
+
if self._event_buffer.qsize() >= self._max_buffer_size:
|
|
335
|
+
self._trigger_flush()
|
|
336
|
+
|
|
337
|
+
def _emit_job_created(self) -> None:
|
|
338
|
+
"""Emit the job.created event when the tracker is initialized."""
|
|
339
|
+
self._emit(
|
|
340
|
+
"job.created",
|
|
341
|
+
{
|
|
342
|
+
"project_id": self._project_id,
|
|
343
|
+
"type": "agent",
|
|
344
|
+
},
|
|
345
|
+
)
|
|
346
|
+
# Immediately flush to ensure job is created before other events
|
|
347
|
+
self._flush_events()
|
|
348
|
+
|
|
349
|
+
def _send_score(
|
|
350
|
+
self,
|
|
351
|
+
name: str,
|
|
352
|
+
value: float,
|
|
353
|
+
trace_id: str,
|
|
354
|
+
observation_id: Optional[str] = None,
|
|
355
|
+
rollout_id: Optional[str] = None,
|
|
356
|
+
attempt_id: Optional[str] = None,
|
|
357
|
+
) -> None:
|
|
358
|
+
"""Send a score to Insight's /api/public/scores endpoint.
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
name: Score name (e.g., "primary", "task_completion").
|
|
362
|
+
value: Numeric score value.
|
|
363
|
+
trace_id: OTEL trace ID to link the score to.
|
|
364
|
+
observation_id: Optional span ID to link to specific observation.
|
|
365
|
+
rollout_id: Optional rollout ID for metadata.
|
|
366
|
+
attempt_id: Optional attempt ID for metadata.
|
|
367
|
+
"""
|
|
368
|
+
score_payload: Dict[str, Any] = {
|
|
369
|
+
"name": name,
|
|
370
|
+
"value": value,
|
|
371
|
+
"dataType": "NUMERIC",
|
|
372
|
+
"traceId": trace_id,
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
if observation_id:
|
|
376
|
+
score_payload["observationId"] = observation_id
|
|
377
|
+
|
|
378
|
+
# Add rollout/attempt context as metadata
|
|
379
|
+
metadata: Dict[str, Any] = {
|
|
380
|
+
"source": "agent_lightning",
|
|
381
|
+
"job_id": self._job_id,
|
|
382
|
+
}
|
|
383
|
+
if rollout_id:
|
|
384
|
+
metadata["rollout_id"] = rollout_id
|
|
385
|
+
if attempt_id:
|
|
386
|
+
metadata["attempt_id"] = attempt_id
|
|
387
|
+
score_payload["metadata"] = metadata
|
|
388
|
+
|
|
389
|
+
# Create Basic auth header
|
|
390
|
+
auth_string = f"{self._api_key}:{self._secret_key}"
|
|
391
|
+
auth_bytes = base64.b64encode(auth_string.encode("utf-8")).decode("utf-8")
|
|
392
|
+
|
|
393
|
+
# Send score in a non-blocking way (fire and forget with retries)
|
|
394
|
+
def send_score_async() -> None:
|
|
395
|
+
for attempt in range(self._max_retries):
|
|
396
|
+
try:
|
|
397
|
+
with httpx.Client(timeout=self._request_timeout) as client:
|
|
398
|
+
response = client.post(
|
|
399
|
+
f"{self._insight_url}/api/public/scores",
|
|
400
|
+
headers={
|
|
401
|
+
"Authorization": f"Basic {auth_bytes}",
|
|
402
|
+
"Content-Type": "application/json",
|
|
403
|
+
},
|
|
404
|
+
json=score_payload,
|
|
405
|
+
)
|
|
406
|
+
response.raise_for_status()
|
|
407
|
+
logger.debug(f"Successfully sent score '{name}={value}' to Insight")
|
|
408
|
+
return
|
|
409
|
+
except httpx.HTTPStatusError as e:
|
|
410
|
+
if e.response.status_code == 401:
|
|
411
|
+
logger.error("Unauthorized (401) sending score to Insight. Check API credentials.")
|
|
412
|
+
return
|
|
413
|
+
else:
|
|
414
|
+
try:
|
|
415
|
+
error_body = e.response.json()
|
|
416
|
+
except Exception:
|
|
417
|
+
error_body = e.response.text
|
|
418
|
+
logger.warning(
|
|
419
|
+
f"HTTP error sending score (attempt {attempt + 1}/{self._max_retries}): "
|
|
420
|
+
f"{e.response.status_code} - {error_body}"
|
|
421
|
+
)
|
|
422
|
+
except httpx.HTTPError as e:
|
|
423
|
+
logger.warning(f"Failed to send score (attempt {attempt + 1}/{self._max_retries}): {e}")
|
|
424
|
+
|
|
425
|
+
# Exponential backoff
|
|
426
|
+
if attempt < self._max_retries - 1:
|
|
427
|
+
backoff_time = 2**attempt
|
|
428
|
+
time.sleep(backoff_time)
|
|
429
|
+
|
|
430
|
+
logger.error(f"Failed to send score '{name}' after {self._max_retries} retries")
|
|
431
|
+
|
|
432
|
+
# Run in a thread to avoid blocking
|
|
433
|
+
threading.Thread(target=send_score_async, daemon=True, name="insight-score-sender").start()
|
|
434
|
+
|
|
435
|
+
# ─────────────────────────────────────────────────────────────
|
|
436
|
+
# Background Sender Thread
|
|
437
|
+
# ─────────────────────────────────────────────────────────────
|
|
438
|
+
|
|
439
|
+
def _start_sender_thread(self) -> None:
|
|
440
|
+
"""Start the background sender thread."""
|
|
441
|
+
|
|
442
|
+
def sender_loop() -> None:
|
|
443
|
+
while not self._stop_event.is_set():
|
|
444
|
+
# Wait for the flush interval or until stopped
|
|
445
|
+
self._stop_event.wait(timeout=self._flush_interval)
|
|
446
|
+
if not self._stop_event.is_set():
|
|
447
|
+
self._flush_events()
|
|
448
|
+
|
|
449
|
+
self._sender_thread = threading.Thread(
|
|
450
|
+
target=sender_loop,
|
|
451
|
+
daemon=True,
|
|
452
|
+
name="insight-sender",
|
|
453
|
+
)
|
|
454
|
+
self._sender_thread.start()
|
|
455
|
+
|
|
456
|
+
def _trigger_flush(self) -> None:
|
|
457
|
+
"""Trigger an immediate flush by interrupting the wait."""
|
|
458
|
+
# The flush will happen on the next iteration since we're using a timeout
|
|
459
|
+
pass
|
|
460
|
+
|
|
461
|
+
def _flush_events(self) -> None:
|
|
462
|
+
"""Flush all buffered events to the Insight API."""
|
|
463
|
+
events: List[InsightEvent] = []
|
|
464
|
+
|
|
465
|
+
# Drain the queue
|
|
466
|
+
while True:
|
|
467
|
+
try:
|
|
468
|
+
event = self._event_buffer.get_nowait()
|
|
469
|
+
events.append(event)
|
|
470
|
+
except queue.Empty:
|
|
471
|
+
break
|
|
472
|
+
|
|
473
|
+
if not events:
|
|
474
|
+
return
|
|
475
|
+
|
|
476
|
+
self._send_events(events)
|
|
477
|
+
|
|
478
|
+
def _send_events(self, events: List[InsightEvent]) -> None:
|
|
479
|
+
"""Send events to the Insight API with retry logic."""
|
|
480
|
+
if not events:
|
|
481
|
+
return
|
|
482
|
+
|
|
483
|
+
payload = {
|
|
484
|
+
"job_id": self._job_id,
|
|
485
|
+
# Note: project_id is not sent - it's derived from the API key auth
|
|
486
|
+
"events": [
|
|
487
|
+
{
|
|
488
|
+
"id": e.id,
|
|
489
|
+
"type": e.type,
|
|
490
|
+
"timestamp": e.timestamp,
|
|
491
|
+
"data": e.data,
|
|
492
|
+
}
|
|
493
|
+
for e in events
|
|
494
|
+
],
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
# Create Basic auth header from api_key:secret_key
|
|
498
|
+
auth_string = f"{self._api_key}:{self._secret_key}"
|
|
499
|
+
auth_bytes = base64.b64encode(auth_string.encode("utf-8")).decode("utf-8")
|
|
500
|
+
|
|
501
|
+
for attempt in range(self._max_retries):
|
|
502
|
+
try:
|
|
503
|
+
with httpx.Client(timeout=self._request_timeout) as client:
|
|
504
|
+
response = client.post(
|
|
505
|
+
f"{self._insight_url}/api/public/v1/agent/ingest",
|
|
506
|
+
headers={
|
|
507
|
+
"Authorization": f"Basic {auth_bytes}",
|
|
508
|
+
"Content-Type": "application/json",
|
|
509
|
+
},
|
|
510
|
+
json=payload,
|
|
511
|
+
)
|
|
512
|
+
response.raise_for_status()
|
|
513
|
+
logger.debug(f"Successfully sent {len(events)} events to Insight")
|
|
514
|
+
return
|
|
515
|
+
except httpx.HTTPStatusError as e:
|
|
516
|
+
if e.response.status_code == 401:
|
|
517
|
+
# Unauthorized - bad API key, won't fix itself
|
|
518
|
+
logger.error("Unauthorized (401) sending events to Insight. Check API credentials.")
|
|
519
|
+
return
|
|
520
|
+
elif e.response.status_code == 429:
|
|
521
|
+
# Rate limited - back off
|
|
522
|
+
logger.warning("Rate limited (429) sending events, retrying with backoff...")
|
|
523
|
+
else:
|
|
524
|
+
# Log the full response for debugging
|
|
525
|
+
try:
|
|
526
|
+
error_body = e.response.json()
|
|
527
|
+
except Exception:
|
|
528
|
+
error_body = e.response.text
|
|
529
|
+
logger.warning(
|
|
530
|
+
f"HTTP error sending events (attempt {attempt + 1}/{self._max_retries}): "
|
|
531
|
+
f"{e.response.status_code} - {error_body}"
|
|
532
|
+
)
|
|
533
|
+
except httpx.HTTPError as e:
|
|
534
|
+
logger.warning(f"Failed to send events (attempt {attempt + 1}/{self._max_retries}): {e}")
|
|
535
|
+
|
|
536
|
+
# Exponential backoff
|
|
537
|
+
if attempt < self._max_retries - 1:
|
|
538
|
+
backoff_time = 2**attempt
|
|
539
|
+
time.sleep(backoff_time)
|
|
540
|
+
|
|
541
|
+
logger.error(f"Failed to send {len(events)} events after {self._max_retries} retries - events dropped")
|
|
542
|
+
|
|
543
|
+
# ─────────────────────────────────────────────────────────────
|
|
544
|
+
# Lifecycle Methods
|
|
545
|
+
# ─────────────────────────────────────────────────────────────
|
|
546
|
+
|
|
547
|
+
def complete(self, summary: Optional[Dict[str, Any]] = None) -> None:
|
|
548
|
+
"""Mark the job as complete and flush all remaining events."""
|
|
549
|
+
if self._completed:
|
|
550
|
+
return
|
|
551
|
+
self._completed = True
|
|
552
|
+
self._emit("job.completed", {"summary": summary or {}})
|
|
553
|
+
self._stop_event.set()
|
|
554
|
+
self._flush_events()
|
|
555
|
+
logger.info(f"InsightTracker job {self._job_id} completed")
|
|
556
|
+
|
|
557
|
+
def fail(self, error: str) -> None:
|
|
558
|
+
"""Mark the job as failed and flush all remaining events."""
|
|
559
|
+
self._emit("job.failed", {"error": error})
|
|
560
|
+
self._stop_event.set()
|
|
561
|
+
self._flush_events()
|
|
562
|
+
logger.error(f"InsightTracker job {self._job_id} failed: {error}")
|
|
563
|
+
|
|
564
|
+
def _cleanup(self) -> None:
|
|
565
|
+
"""Cleanup resources on exit."""
|
|
566
|
+
if not self._stop_event.is_set():
|
|
567
|
+
# Flush any remaining events before exiting
|
|
568
|
+
self._flush_events()
|
|
569
|
+
self._stop_event.set()
|
|
570
|
+
|
|
571
|
+
def __enter__(self) -> "InsightTracker":
|
|
572
|
+
return self
|
|
573
|
+
|
|
574
|
+
def __exit__(
|
|
575
|
+
self,
|
|
576
|
+
exc_type: Optional[type],
|
|
577
|
+
exc_val: Optional[BaseException],
|
|
578
|
+
exc_tb: Optional[Any],
|
|
579
|
+
) -> None:
|
|
580
|
+
if exc_type:
|
|
581
|
+
self.fail(str(exc_val) if exc_val else "Unknown error")
|
|
582
|
+
elif not self._completed:
|
|
583
|
+
self.complete()
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
587
|
+
# Convenience class for backward compatibility
|
|
588
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
def InsightLightningStore(
|
|
592
|
+
*,
|
|
593
|
+
api_key: str,
|
|
594
|
+
secret_key: str,
|
|
595
|
+
insight_url: str,
|
|
596
|
+
project_id: str,
|
|
597
|
+
flush_interval: float = 1.0,
|
|
598
|
+
max_buffer_size: int = 1000,
|
|
599
|
+
request_timeout: float = 10.0,
|
|
600
|
+
max_retries: int = 3,
|
|
601
|
+
thread_safe: bool = False,
|
|
602
|
+
**kwargs: Any,
|
|
603
|
+
) -> "InMemoryLightningStore":
|
|
604
|
+
"""Create an InMemoryLightningStore with Insight tracking enabled.
|
|
605
|
+
|
|
606
|
+
This is a convenience function that creates an InsightTracker and attaches
|
|
607
|
+
it to an InMemoryLightningStore. For more control, use InsightTracker directly.
|
|
608
|
+
|
|
609
|
+
Args:
|
|
610
|
+
api_key: Insight public API key for authentication.
|
|
611
|
+
secret_key: Insight secret key for authentication.
|
|
612
|
+
insight_url: Insight server URL (e.g., "http://localhost:3000").
|
|
613
|
+
project_id: Project ID to associate events with.
|
|
614
|
+
flush_interval: Seconds between automatic flushes (default: 1.0).
|
|
615
|
+
max_buffer_size: Maximum events before forcing a flush (default: 1000).
|
|
616
|
+
request_timeout: HTTP request timeout in seconds (default: 10.0).
|
|
617
|
+
max_retries: Maximum retry attempts for failed requests (default: 3).
|
|
618
|
+
thread_safe: Whether the underlying store is thread-safe (default: False).
|
|
619
|
+
|
|
620
|
+
Returns:
|
|
621
|
+
An InMemoryLightningStore with InsightTracker attached.
|
|
622
|
+
|
|
623
|
+
Example:
|
|
624
|
+
```python
|
|
625
|
+
store = InsightLightningStore(
|
|
626
|
+
api_key="pk-lf-abc123",
|
|
627
|
+
secret_key="sk-lf-xyz789",
|
|
628
|
+
insight_url="https://insight.withmetis.ai",
|
|
629
|
+
project_id="proj-123",
|
|
630
|
+
)
|
|
631
|
+
|
|
632
|
+
trainer = Trainer(algorithm=GEPA(...), store=store)
|
|
633
|
+
```
|
|
634
|
+
"""
|
|
635
|
+
from .memory import InMemoryLightningStore as MemStore
|
|
636
|
+
|
|
637
|
+
tracker = InsightTracker(
|
|
638
|
+
api_key=api_key,
|
|
639
|
+
secret_key=secret_key,
|
|
640
|
+
insight_url=insight_url,
|
|
641
|
+
project_id=project_id,
|
|
642
|
+
flush_interval=flush_interval,
|
|
643
|
+
max_buffer_size=max_buffer_size,
|
|
644
|
+
request_timeout=request_timeout,
|
|
645
|
+
max_retries=max_retries,
|
|
646
|
+
)
|
|
647
|
+
|
|
648
|
+
return MemStore(thread_safe=thread_safe, listeners=[tracker], **kwargs)
|