penguiflow 2.2.5__py3-none-any.whl → 2.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of penguiflow might be problematic. Click here for more details.
- examples/planner_enterprise_agent/__init__.py +30 -0
- examples/planner_enterprise_agent/config.py +93 -0
- examples/planner_enterprise_agent/main.py +709 -0
- examples/planner_enterprise_agent/nodes.py +882 -0
- examples/planner_enterprise_agent/telemetry.py +245 -0
- penguiflow/__init__.py +1 -1
- penguiflow/planner/__init__.py +6 -0
- penguiflow/planner/dspy_client.py +327 -0
- penguiflow/planner/react.py +465 -52
- penguiflow/remote.py +2 -2
- penguiflow/state.py +1 -1
- {penguiflow-2.2.5.dist-info → penguiflow-2.2.6.dist-info}/METADATA +2 -1
- {penguiflow-2.2.5.dist-info → penguiflow-2.2.6.dist-info}/RECORD +17 -11
- {penguiflow-2.2.5.dist-info → penguiflow-2.2.6.dist-info}/WHEEL +0 -0
- {penguiflow-2.2.5.dist-info → penguiflow-2.2.6.dist-info}/entry_points.txt +0 -0
- {penguiflow-2.2.5.dist-info → penguiflow-2.2.6.dist-info}/licenses/LICENSE +0 -0
- {penguiflow-2.2.5.dist-info → penguiflow-2.2.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
"""Enterprise telemetry middleware for comprehensive observability.
|
|
2
|
+
|
|
3
|
+
This module implements the telemetry patterns from the successful PenguiFlow
|
|
4
|
+
implementation case study, providing full visibility into:
|
|
5
|
+
- Planner lifecycle events
|
|
6
|
+
- Node execution with detailed error payloads
|
|
7
|
+
- Flow events with structured logging
|
|
8
|
+
- MLflow/observability backend integration
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
from collections.abc import Mapping
|
|
15
|
+
from typing import Any
|
|
16
|
+
|
|
17
|
+
from examples.planner_enterprise_agent.config import AgentConfig
|
|
18
|
+
from penguiflow.metrics import FlowEvent
|
|
19
|
+
from penguiflow.planner import PlannerEvent
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AgentTelemetry:
|
|
23
|
+
"""Comprehensive telemetry for enterprise agent deployments.
|
|
24
|
+
|
|
25
|
+
Implements the telemetry middleware pattern that captures:
|
|
26
|
+
1. Full exception tracebacks from FlowEvents
|
|
27
|
+
2. Detailed error payloads with context
|
|
28
|
+
3. LLM call costs and latency
|
|
29
|
+
4. Planning step metrics
|
|
30
|
+
5. Structured events for external systems
|
|
31
|
+
|
|
32
|
+
Usage:
|
|
33
|
+
telemetry = AgentTelemetry(config)
|
|
34
|
+
|
|
35
|
+
# Add to PenguiFlow
|
|
36
|
+
flow.add_middleware(log_flow_events(telemetry.logger))
|
|
37
|
+
flow.add_middleware(telemetry.record_flow_event)
|
|
38
|
+
|
|
39
|
+
# Add to ReactPlanner
|
|
40
|
+
planner = ReactPlanner(..., event_callback=telemetry.record_planner_event)
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, config: AgentConfig) -> None:
|
|
44
|
+
self.config = config
|
|
45
|
+
self.logger = logging.getLogger(f"penguiflow.{config.agent_name}")
|
|
46
|
+
self.planner_logger = logging.getLogger(
|
|
47
|
+
f"penguiflow.{config.agent_name}.planner"
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
# Event collection for batch emission
|
|
51
|
+
self._events: list[dict[str, Any]] = []
|
|
52
|
+
|
|
53
|
+
# Metrics tracking
|
|
54
|
+
self._metrics: dict[str, Any] = {
|
|
55
|
+
"planner_steps": 0,
|
|
56
|
+
"planner_llm_calls": 0,
|
|
57
|
+
"planner_cost_usd": 0.0,
|
|
58
|
+
"flow_node_errors": 0,
|
|
59
|
+
"flow_node_successes": 0,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async def record_flow_event(self, event: FlowEvent) -> FlowEvent:
|
|
63
|
+
"""Middleware function that intercepts all PenguiFlow events.
|
|
64
|
+
|
|
65
|
+
This is the CRITICAL pattern from the case study - it extracts
|
|
66
|
+
detailed error payloads that would otherwise be trapped in flow state.
|
|
67
|
+
"""
|
|
68
|
+
event_type = event.event_type
|
|
69
|
+
|
|
70
|
+
if event_type == "node_start":
|
|
71
|
+
self.logger.debug(
|
|
72
|
+
"node_start",
|
|
73
|
+
extra={
|
|
74
|
+
"node": event.node_name,
|
|
75
|
+
"trace_id": event.trace_id,
|
|
76
|
+
"node_id": event.node_id,
|
|
77
|
+
},
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
elif event_type == "node_success":
|
|
81
|
+
self._metrics["flow_node_successes"] += 1
|
|
82
|
+
self.logger.info(
|
|
83
|
+
"node_success",
|
|
84
|
+
extra={
|
|
85
|
+
"node": event.node_name,
|
|
86
|
+
"trace_id": event.trace_id,
|
|
87
|
+
"latency_ms": event.latency_ms,
|
|
88
|
+
},
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
elif event_type == "node_error":
|
|
92
|
+
# THIS IS THE CRITICAL PART - Extract error details!
|
|
93
|
+
# Without this, you only see "node_error" with no context
|
|
94
|
+
error_payload = event.error_payload or {}
|
|
95
|
+
|
|
96
|
+
self._metrics["flow_node_errors"] += 1
|
|
97
|
+
|
|
98
|
+
# Log everything for debugging (the breakthrough from the case study)
|
|
99
|
+
self.logger.error(
|
|
100
|
+
"node_error",
|
|
101
|
+
extra={
|
|
102
|
+
"node": event.node_name,
|
|
103
|
+
"trace_id": event.trace_id,
|
|
104
|
+
"node_id": event.node_id,
|
|
105
|
+
"error_class": error_payload.get("error_class"),
|
|
106
|
+
"error_message": error_payload.get("error_message"),
|
|
107
|
+
"error_traceback": error_payload.get("error_traceback"),
|
|
108
|
+
"flow_error_code": error_payload.get("code"),
|
|
109
|
+
"flow_error_message": error_payload.get("message"),
|
|
110
|
+
# Include full payload for complete visibility
|
|
111
|
+
**error_payload,
|
|
112
|
+
},
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Collect for batch emission to observability backend
|
|
116
|
+
if self.config.enable_telemetry:
|
|
117
|
+
self._events.append(
|
|
118
|
+
{
|
|
119
|
+
"event": "flow.node_error",
|
|
120
|
+
"payload": {
|
|
121
|
+
"node": event.node_name,
|
|
122
|
+
"trace_id": event.trace_id,
|
|
123
|
+
**error_payload,
|
|
124
|
+
},
|
|
125
|
+
}
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Always return event unmodified - middleware is read-only
|
|
129
|
+
return event
|
|
130
|
+
|
|
131
|
+
def record_planner_event(self, event: PlannerEvent) -> None:
|
|
132
|
+
"""Callback for ReactPlanner events.
|
|
133
|
+
|
|
134
|
+
Captures planner-specific telemetry:
|
|
135
|
+
- Step start/complete with latency
|
|
136
|
+
- LLM calls with cost tracking
|
|
137
|
+
- Pause/resume operations
|
|
138
|
+
- Constraint violations
|
|
139
|
+
"""
|
|
140
|
+
event_type = event.event_type
|
|
141
|
+
|
|
142
|
+
# Extract all event data
|
|
143
|
+
extra = event.to_payload()
|
|
144
|
+
|
|
145
|
+
if event_type == "step_start":
|
|
146
|
+
self.planner_logger.debug("step_start", extra=extra)
|
|
147
|
+
|
|
148
|
+
elif event_type == "step_complete":
|
|
149
|
+
self._metrics["planner_steps"] += 1
|
|
150
|
+
self.planner_logger.info("step_complete", extra=extra)
|
|
151
|
+
|
|
152
|
+
# Track cost if available
|
|
153
|
+
cost = extra.get("cost_usd", 0)
|
|
154
|
+
if cost > 0:
|
|
155
|
+
self._metrics["planner_cost_usd"] += cost
|
|
156
|
+
|
|
157
|
+
elif event_type == "llm_call":
|
|
158
|
+
self._metrics["planner_llm_calls"] += 1
|
|
159
|
+
self.planner_logger.debug("llm_call", extra=extra)
|
|
160
|
+
|
|
161
|
+
elif event_type == "pause":
|
|
162
|
+
self.planner_logger.info("pause", extra=extra)
|
|
163
|
+
|
|
164
|
+
elif event_type == "resume":
|
|
165
|
+
self.planner_logger.info("resume", extra=extra)
|
|
166
|
+
|
|
167
|
+
elif event_type == "finish":
|
|
168
|
+
self.planner_logger.info("finish", extra=extra)
|
|
169
|
+
|
|
170
|
+
elif event_type.endswith("_error") or "error" in event.extra:
|
|
171
|
+
self.planner_logger.error(event_type, extra=extra)
|
|
172
|
+
|
|
173
|
+
# Collect for observability backend
|
|
174
|
+
if self.config.enable_telemetry:
|
|
175
|
+
self._events.append(
|
|
176
|
+
{
|
|
177
|
+
"event": f"planner.{event_type}",
|
|
178
|
+
"payload": extra,
|
|
179
|
+
}
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
def emit_collected_events(self) -> None:
|
|
183
|
+
"""Emit batched events to observability backend.
|
|
184
|
+
|
|
185
|
+
Call this after planner execution completes to send all
|
|
186
|
+
collected telemetry to your monitoring system.
|
|
187
|
+
"""
|
|
188
|
+
if not self._events:
|
|
189
|
+
return
|
|
190
|
+
|
|
191
|
+
if self.config.telemetry_backend == "mlflow":
|
|
192
|
+
self._emit_to_mlflow()
|
|
193
|
+
elif self.config.telemetry_backend == "datadog":
|
|
194
|
+
self._emit_to_datadog()
|
|
195
|
+
else:
|
|
196
|
+
# Default: log as JSON for structured log aggregation
|
|
197
|
+
self.logger.info(
|
|
198
|
+
"telemetry_batch",
|
|
199
|
+
extra={
|
|
200
|
+
"events": self._events,
|
|
201
|
+
"metrics": self._metrics,
|
|
202
|
+
},
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
# Clear for next execution
|
|
206
|
+
self._events.clear()
|
|
207
|
+
|
|
208
|
+
def _emit_to_mlflow(self) -> None:
|
|
209
|
+
"""Emit events to MLflow tracking server."""
|
|
210
|
+
if not self.config.mlflow_tracking_uri:
|
|
211
|
+
self.logger.warning("mlflow_tracking_uri not configured, skipping emission")
|
|
212
|
+
return
|
|
213
|
+
|
|
214
|
+
# Implementation would use mlflow.log_metrics, mlflow.log_params, etc.
|
|
215
|
+
# Stub for example purposes
|
|
216
|
+
self.logger.info(
|
|
217
|
+
"mlflow_emit",
|
|
218
|
+
extra={
|
|
219
|
+
"tracking_uri": self.config.mlflow_tracking_uri,
|
|
220
|
+
"event_count": len(self._events),
|
|
221
|
+
},
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
def _emit_to_datadog(self) -> None:
|
|
225
|
+
"""Emit events to DataDog APM."""
|
|
226
|
+
# Implementation would use datadog client
|
|
227
|
+
# Stub for example purposes
|
|
228
|
+
self.logger.info(
|
|
229
|
+
"datadog_emit",
|
|
230
|
+
extra={"event_count": len(self._events)},
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
def get_metrics(self) -> Mapping[str, Any]:
|
|
234
|
+
"""Return current metrics snapshot."""
|
|
235
|
+
return dict(self._metrics)
|
|
236
|
+
|
|
237
|
+
def reset_metrics(self) -> None:
|
|
238
|
+
"""Reset metrics counters (useful for testing)."""
|
|
239
|
+
self._metrics = {
|
|
240
|
+
"planner_steps": 0,
|
|
241
|
+
"planner_llm_calls": 0,
|
|
242
|
+
"planner_cost_usd": 0.0,
|
|
243
|
+
"flow_node_errors": 0,
|
|
244
|
+
"flow_node_successes": 0,
|
|
245
|
+
}
|
penguiflow/__init__.py
CHANGED
penguiflow/planner/__init__.py
CHANGED
|
@@ -2,10 +2,13 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
from .dspy_client import DSPyLLMClient
|
|
5
6
|
from .react import (
|
|
6
7
|
ParallelCall,
|
|
7
8
|
ParallelJoin,
|
|
8
9
|
PlannerAction,
|
|
10
|
+
PlannerEvent,
|
|
11
|
+
PlannerEventCallback,
|
|
9
12
|
PlannerFinish,
|
|
10
13
|
PlannerPause,
|
|
11
14
|
ReactPlanner,
|
|
@@ -15,9 +18,12 @@ from .react import (
|
|
|
15
18
|
)
|
|
16
19
|
|
|
17
20
|
__all__ = [
|
|
21
|
+
"DSPyLLMClient",
|
|
18
22
|
"ParallelCall",
|
|
19
23
|
"ParallelJoin",
|
|
20
24
|
"PlannerAction",
|
|
25
|
+
"PlannerEvent",
|
|
26
|
+
"PlannerEventCallback",
|
|
21
27
|
"PlannerFinish",
|
|
22
28
|
"PlannerPause",
|
|
23
29
|
"ReactPlanner",
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
"""DSPy-based LLM client for ReactPlanner with robust structured outputs.
|
|
2
|
+
|
|
3
|
+
This module provides a DSPy-powered alternative to direct LiteLLM calls,
|
|
4
|
+
offering better structured output handling across different LLM providers.
|
|
5
|
+
DSPy's signature system with Pydantic models works reliably even with
|
|
6
|
+
providers that don't support native JSON schema mode (like Databricks).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import ast
|
|
12
|
+
import asyncio
|
|
13
|
+
import json
|
|
14
|
+
import logging
|
|
15
|
+
from collections.abc import Mapping, Sequence
|
|
16
|
+
from typing import TYPE_CHECKING, Any
|
|
17
|
+
|
|
18
|
+
from pydantic import BaseModel
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
# PlannerAction imported at runtime in _create_signature to avoid circular import
|
|
22
|
+
from penguiflow.planner.react import PlannerAction # noqa: F401
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class DSPyLLMClient:
|
|
28
|
+
"""LLM client using DSPy for structured outputs.
|
|
29
|
+
|
|
30
|
+
This client implements the JSONLLMClient protocol and uses DSPy's
|
|
31
|
+
signature system to generate structured outputs. DSPy handles the
|
|
32
|
+
prompt engineering and parsing internally, providing more reliable
|
|
33
|
+
structured outputs across different LLM providers.
|
|
34
|
+
|
|
35
|
+
Benefits over direct LiteLLM:
|
|
36
|
+
- Better structured output reliability across providers
|
|
37
|
+
- Automatic prompt optimization for structure extraction
|
|
38
|
+
- Works with models that don't support native JSON schema mode
|
|
39
|
+
- Graceful degradation with retry logic
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
llm: Model identifier (e.g., "gpt-4o-mini",
|
|
43
|
+
"databricks/databricks-gpt-oss-120b")
|
|
44
|
+
temperature: Sampling temperature (0.0 = deterministic)
|
|
45
|
+
max_retries: Number of retry attempts for transient failures
|
|
46
|
+
timeout_s: Timeout per LLM call in seconds
|
|
47
|
+
|
|
48
|
+
Example:
|
|
49
|
+
>>> client = DSPyLLMClient(
|
|
50
|
+
... llm="databricks/databricks-gpt-oss-120b",
|
|
51
|
+
... temperature=0.0,
|
|
52
|
+
... )
|
|
53
|
+
response = await client.complete(
|
|
54
|
+
messages=[{"role": "user", "content": "..."}],
|
|
55
|
+
response_format={"type": "json_schema", "json_schema": {...}},
|
|
56
|
+
)
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
expects_json_schema = True
|
|
60
|
+
|
|
61
|
+
def __init__(
|
|
62
|
+
self,
|
|
63
|
+
llm: str | dict[str, Any],
|
|
64
|
+
*,
|
|
65
|
+
temperature: float = 0.0,
|
|
66
|
+
max_retries: int = 3,
|
|
67
|
+
timeout_s: float = 60.0,
|
|
68
|
+
) -> None:
|
|
69
|
+
self._llm = llm
|
|
70
|
+
self._temperature = temperature
|
|
71
|
+
self._max_retries = max_retries
|
|
72
|
+
self._timeout_s = timeout_s
|
|
73
|
+
self._dspy_module: Any = None
|
|
74
|
+
self._lm: Any = None
|
|
75
|
+
|
|
76
|
+
def _ensure_dspy_initialized(self) -> None:
|
|
77
|
+
"""Lazy-initialize DSPy to avoid import overhead."""
|
|
78
|
+
if self._lm is not None:
|
|
79
|
+
return
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
import dspy
|
|
83
|
+
except ModuleNotFoundError as exc: # pragma: no cover
|
|
84
|
+
raise RuntimeError(
|
|
85
|
+
"DSPy is not installed. Install penguiflow[planner] or provide "
|
|
86
|
+
"a custom llm_client."
|
|
87
|
+
) from exc
|
|
88
|
+
|
|
89
|
+
# Configure DSPy LM
|
|
90
|
+
model_name = self._llm if isinstance(self._llm, str) else self._llm["model"]
|
|
91
|
+
|
|
92
|
+
# DSPy uses LiteLLM under the hood, so all LiteLLM model names work
|
|
93
|
+
self._lm = dspy.LM(
|
|
94
|
+
model=model_name,
|
|
95
|
+
temperature=self._temperature,
|
|
96
|
+
max_tokens=4096,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
logger.info(
|
|
100
|
+
"dspy_lm_initialized",
|
|
101
|
+
extra={"model": model_name, "temperature": self._temperature},
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
def _create_signature(self, response_format: Mapping[str, Any] | None) -> type[Any]:
|
|
105
|
+
"""Create a DSPy signature for PlannerAction output.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
response_format: OpenAI-style response_format (used to detect schema mode)
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
DSPy Signature class with PlannerAction as output type
|
|
112
|
+
"""
|
|
113
|
+
import dspy
|
|
114
|
+
|
|
115
|
+
# Import at runtime to avoid circular dependency
|
|
116
|
+
from penguiflow.planner.react import PlannerAction
|
|
117
|
+
|
|
118
|
+
if not response_format or "json_schema" not in response_format:
|
|
119
|
+
# Fallback: simple string output for non-schema requests
|
|
120
|
+
attrs = {
|
|
121
|
+
"__doc__": "Generate a response.",
|
|
122
|
+
"__annotations__": {"messages": str, "response": str},
|
|
123
|
+
"messages": dspy.InputField(),
|
|
124
|
+
"response": dspy.OutputField(),
|
|
125
|
+
}
|
|
126
|
+
return type("TextOutputSignature", (dspy.Signature,), attrs)
|
|
127
|
+
|
|
128
|
+
# Use PlannerAction directly - no schema conversion needed!
|
|
129
|
+
# DSPy will handle the Pydantic model → JSON → Pydantic validation
|
|
130
|
+
attrs = {
|
|
131
|
+
"__doc__": "Generate a structured planner action with proper type safety.",
|
|
132
|
+
"__annotations__": {"messages": str, "response": PlannerAction},
|
|
133
|
+
"messages": dspy.InputField(
|
|
134
|
+
desc="Conversation history and user query requiring a planner action"
|
|
135
|
+
),
|
|
136
|
+
"response": dspy.OutputField(
|
|
137
|
+
desc=(
|
|
138
|
+
"Structured planner action with thought, next_node, "
|
|
139
|
+
"args, plan, or join"
|
|
140
|
+
)
|
|
141
|
+
),
|
|
142
|
+
}
|
|
143
|
+
return type("PlannerActionSignature", (dspy.Signature,), attrs)
|
|
144
|
+
|
|
145
|
+
def _messages_to_text(self, messages: Sequence[Mapping[str, str]]) -> str:
|
|
146
|
+
"""Convert OpenAI-style messages to a single text prompt.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
messages: List of message dicts with role and content
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
Concatenated text suitable for DSPy input
|
|
153
|
+
"""
|
|
154
|
+
parts = []
|
|
155
|
+
for msg in messages:
|
|
156
|
+
role = msg.get("role", "user")
|
|
157
|
+
content = msg.get("content", "")
|
|
158
|
+
if role == "system":
|
|
159
|
+
parts.append(f"System: {content}")
|
|
160
|
+
elif role == "user":
|
|
161
|
+
parts.append(f"User: {content}")
|
|
162
|
+
elif role == "assistant":
|
|
163
|
+
parts.append(f"Assistant: {content}")
|
|
164
|
+
else:
|
|
165
|
+
parts.append(content)
|
|
166
|
+
return "\n\n".join(parts)
|
|
167
|
+
|
|
168
|
+
async def complete(
|
|
169
|
+
self,
|
|
170
|
+
*,
|
|
171
|
+
messages: Sequence[Mapping[str, str]],
|
|
172
|
+
response_format: Mapping[str, Any] | None = None,
|
|
173
|
+
) -> str:
|
|
174
|
+
"""Generate completion with structured output via DSPy.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
messages: OpenAI-style message list
|
|
178
|
+
response_format: Optional JSON schema for structured output
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
JSON string containing the structured response
|
|
182
|
+
|
|
183
|
+
Raises:
|
|
184
|
+
RuntimeError: If all retry attempts fail
|
|
185
|
+
TimeoutError: If the call exceeds timeout_s
|
|
186
|
+
"""
|
|
187
|
+
import dspy
|
|
188
|
+
|
|
189
|
+
self._ensure_dspy_initialized()
|
|
190
|
+
|
|
191
|
+
# Create signature based on response format
|
|
192
|
+
signature_class = self._create_signature(response_format)
|
|
193
|
+
|
|
194
|
+
# Create DSPy predictor
|
|
195
|
+
predictor = dspy.Predict(signature_class)
|
|
196
|
+
|
|
197
|
+
# Convert messages to text
|
|
198
|
+
input_text = self._messages_to_text(messages)
|
|
199
|
+
|
|
200
|
+
last_error: Exception | None = None
|
|
201
|
+
for attempt in range(self._max_retries):
|
|
202
|
+
try:
|
|
203
|
+
async with asyncio.timeout(self._timeout_s):
|
|
204
|
+
# DSPy doesn't have native async support yet, so we run in executor
|
|
205
|
+
loop = asyncio.get_running_loop()
|
|
206
|
+
|
|
207
|
+
def _run_dspy() -> Any:
|
|
208
|
+
with dspy.context(lm=self._lm):
|
|
209
|
+
return predictor(messages=input_text)
|
|
210
|
+
|
|
211
|
+
result = await loop.run_in_executor(None, _run_dspy)
|
|
212
|
+
|
|
213
|
+
# Extract response
|
|
214
|
+
if hasattr(result, "response"):
|
|
215
|
+
response_obj = result.response
|
|
216
|
+
logger.debug(
|
|
217
|
+
"dspy_response_extracted",
|
|
218
|
+
extra={
|
|
219
|
+
"response_type": type(response_obj).__name__,
|
|
220
|
+
"response_preview": str(response_obj)[:200],
|
|
221
|
+
},
|
|
222
|
+
)
|
|
223
|
+
if isinstance(response_obj, BaseModel):
|
|
224
|
+
# PlannerAction or other Pydantic model - already validated!
|
|
225
|
+
json_output = response_obj.model_dump_json()
|
|
226
|
+
logger.debug(
|
|
227
|
+
"dspy_pydantic_success",
|
|
228
|
+
extra={
|
|
229
|
+
"model": type(response_obj).__name__,
|
|
230
|
+
"json_length": len(json_output),
|
|
231
|
+
},
|
|
232
|
+
)
|
|
233
|
+
return json_output
|
|
234
|
+
elif isinstance(response_obj, dict):
|
|
235
|
+
return json.dumps(response_obj)
|
|
236
|
+
else:
|
|
237
|
+
# DSPy sometimes returns string - normalise to JSON
|
|
238
|
+
response_str = str(response_obj)
|
|
239
|
+
logger.debug(
|
|
240
|
+
"dspy_string_response",
|
|
241
|
+
extra={"response_preview": response_str[:500]},
|
|
242
|
+
)
|
|
243
|
+
normalised = self._normalise_json(response_str)
|
|
244
|
+
if normalised is not None:
|
|
245
|
+
logger.debug("dspy_json_normalised_success")
|
|
246
|
+
return normalised
|
|
247
|
+
logger.warning(
|
|
248
|
+
"dspy_invalid_json",
|
|
249
|
+
extra={"response": response_str[:500]},
|
|
250
|
+
)
|
|
251
|
+
raise RuntimeError(
|
|
252
|
+
"DSPy returned output that could not be coerced to JSON"
|
|
253
|
+
)
|
|
254
|
+
else:
|
|
255
|
+
raise RuntimeError("DSPy returned no response field")
|
|
256
|
+
|
|
257
|
+
except TimeoutError as exc:
|
|
258
|
+
last_error = exc
|
|
259
|
+
logger.warning(
|
|
260
|
+
"dspy_timeout",
|
|
261
|
+
extra={"attempt": attempt + 1, "timeout_s": self._timeout_s},
|
|
262
|
+
)
|
|
263
|
+
except Exception as exc:
|
|
264
|
+
last_error = exc
|
|
265
|
+
logger.warning(
|
|
266
|
+
"dspy_error",
|
|
267
|
+
extra={
|
|
268
|
+
"attempt": attempt + 1,
|
|
269
|
+
"error": str(exc),
|
|
270
|
+
"error_type": type(exc).__name__,
|
|
271
|
+
},
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
# Exponential backoff
|
|
275
|
+
if attempt < self._max_retries - 1:
|
|
276
|
+
await asyncio.sleep(2**attempt)
|
|
277
|
+
|
|
278
|
+
# All retries exhausted
|
|
279
|
+
error_msg = f"DSPy LLM call failed after {self._max_retries} attempts"
|
|
280
|
+
if last_error:
|
|
281
|
+
error_msg += f": {last_error}"
|
|
282
|
+
raise RuntimeError(error_msg)
|
|
283
|
+
|
|
284
|
+
def _normalise_json(self, text: str) -> str | None:
|
|
285
|
+
"""Attempt to coerce arbitrary text into canonical JSON string."""
|
|
286
|
+
candidate = text.strip()
|
|
287
|
+
if not candidate:
|
|
288
|
+
return None
|
|
289
|
+
|
|
290
|
+
# Remove code fences if present
|
|
291
|
+
if candidate.startswith("```"):
|
|
292
|
+
parts = candidate.split("```")
|
|
293
|
+
candidate = ""
|
|
294
|
+
for part in parts:
|
|
295
|
+
stripped = part.strip()
|
|
296
|
+
if stripped.lower().startswith("json"):
|
|
297
|
+
stripped = stripped[4:].strip()
|
|
298
|
+
if stripped:
|
|
299
|
+
candidate = stripped
|
|
300
|
+
break
|
|
301
|
+
if not candidate:
|
|
302
|
+
candidate = text.strip("` \n")
|
|
303
|
+
|
|
304
|
+
# Extract substring bounded by braces if extra commentary exists
|
|
305
|
+
if candidate.count("{") >= 1 and candidate.count("}") >= 1:
|
|
306
|
+
start = candidate.find("{")
|
|
307
|
+
end = candidate.rfind("}")
|
|
308
|
+
if start != -1 and end != -1 and end > start:
|
|
309
|
+
candidate = candidate[start : end + 1]
|
|
310
|
+
|
|
311
|
+
# First try strict JSON
|
|
312
|
+
try:
|
|
313
|
+
payload = json.loads(candidate)
|
|
314
|
+
except json.JSONDecodeError:
|
|
315
|
+
# Try python literal eval fallback (handles single quotes, trailing commas)
|
|
316
|
+
try:
|
|
317
|
+
payload = ast.literal_eval(candidate)
|
|
318
|
+
except Exception:
|
|
319
|
+
return None
|
|
320
|
+
|
|
321
|
+
# Ensure payload is JSON-serialisable dict
|
|
322
|
+
if isinstance(payload, (str, int, float, bool)) or payload is None:
|
|
323
|
+
return json.dumps(payload)
|
|
324
|
+
try:
|
|
325
|
+
return json.dumps(payload)
|
|
326
|
+
except TypeError:
|
|
327
|
+
return None
|