flock-core 0.5.0b16__py3-none-any.whl → 0.5.0b18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flock-core might be problematic. Click here for more details.
- flock/components/evaluation/declarative_evaluation_component.py +99 -40
- flock/components/utility/memory_utility_component.py +5 -3
- flock/core/__init__.py +5 -0
- flock/core/agent/default_agent.py +170 -0
- flock/core/agent/flock_agent_integration.py +54 -0
- flock/core/agent/flock_agent_lifecycle.py +9 -0
- flock/core/context/context.py +2 -3
- flock/core/execution/local_executor.py +1 -1
- flock/core/execution/temporal_executor.py +4 -6
- flock/core/flock_agent.py +17 -2
- flock/core/flock_factory.py +92 -80
- flock/core/logging/telemetry.py +7 -2
- flock/core/mcp/flock_mcp_server.py +19 -0
- flock/core/mcp/flock_mcp_tool.py +9 -53
- flock/core/mcp/mcp_config.py +22 -4
- flock/core/mixin/dspy_integration.py +107 -149
- flock/core/orchestration/flock_execution.py +7 -0
- flock/core/orchestration/flock_initialization.py +24 -0
- flock/core/serialization/serialization_utils.py +20 -20
- flock/workflow/agent_execution_activity.py +57 -83
- flock/workflow/flock_workflow.py +39 -50
- flock/workflow/temporal_setup.py +11 -3
- {flock_core-0.5.0b16.dist-info → flock_core-0.5.0b18.dist-info}/METADATA +7 -7
- {flock_core-0.5.0b16.dist-info → flock_core-0.5.0b18.dist-info}/RECORD +27 -26
- {flock_core-0.5.0b16.dist-info → flock_core-0.5.0b18.dist-info}/WHEEL +0 -0
- {flock_core-0.5.0b16.dist-info → flock_core-0.5.0b18.dist-info}/entry_points.txt +0 -0
- {flock_core-0.5.0b16.dist-info → flock_core-0.5.0b18.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"""DeclarativeEvaluationComponent - DSPy-based evaluation using the unified component system."""
|
|
3
3
|
|
|
4
4
|
from collections.abc import Generator
|
|
5
|
-
from typing import Any, override
|
|
5
|
+
from typing import Any, Literal, override
|
|
6
6
|
|
|
7
7
|
from temporalio import workflow
|
|
8
8
|
|
|
@@ -29,7 +29,7 @@ class DeclarativeEvaluationConfig(AgentComponentConfig):
|
|
|
29
29
|
model: str | None = "openai/gpt-4o"
|
|
30
30
|
use_cache: bool = True
|
|
31
31
|
temperature: float = 1.0
|
|
32
|
-
max_tokens: int =
|
|
32
|
+
max_tokens: int = 32000
|
|
33
33
|
max_retries: int = 3
|
|
34
34
|
max_tool_calls: int = 10
|
|
35
35
|
stream: bool = Field(
|
|
@@ -44,6 +44,14 @@ class DeclarativeEvaluationConfig(AgentComponentConfig):
|
|
|
44
44
|
default=False,
|
|
45
45
|
description="Include the reasoning in the output.",
|
|
46
46
|
)
|
|
47
|
+
adapter: Literal["chat", "json", "xml", "two_step"] | None = Field(
|
|
48
|
+
default=None,
|
|
49
|
+
description="Optional DSPy adapter to use for formatting/parsing.",
|
|
50
|
+
)
|
|
51
|
+
extraction_model: str | None = Field(
|
|
52
|
+
default=None,
|
|
53
|
+
description="Extraction LM for TwoStepAdapter when adapter='two_step'",
|
|
54
|
+
)
|
|
47
55
|
kwargs: dict[str, Any] = Field(default_factory=dict)
|
|
48
56
|
|
|
49
57
|
|
|
@@ -70,7 +78,7 @@ class DeclarativeEvaluationComponent(
|
|
|
70
78
|
super().__init__(**data)
|
|
71
79
|
|
|
72
80
|
@override
|
|
73
|
-
def set_model(self, model: str, temperature: float = 1.0, max_tokens: int =
|
|
81
|
+
def set_model(self, model: str, temperature: float = 1.0, max_tokens: int = 32000) -> None:
|
|
74
82
|
"""Set the model for the evaluation component."""
|
|
75
83
|
self.config.model = model
|
|
76
84
|
self.config.temperature = temperature
|
|
@@ -87,16 +95,32 @@ class DeclarativeEvaluationComponent(
|
|
|
87
95
|
"""Core evaluation logic using DSPy - migrated from DeclarativeEvaluator."""
|
|
88
96
|
logger.debug(f"Starting declarative evaluation for component '{self.name}'")
|
|
89
97
|
|
|
90
|
-
#
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
98
|
+
# Prepare LM and optional adapter; keep settings changes scoped with dspy.context
|
|
99
|
+
lm = dspy.LM(
|
|
100
|
+
model=self.config.model or agent.model,
|
|
101
|
+
cache=self.config.use_cache,
|
|
102
|
+
temperature=self.config.temperature,
|
|
103
|
+
max_tokens=self.config.max_tokens,
|
|
104
|
+
num_retries=self.config.max_retries,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
adapter = None
|
|
108
|
+
if self.config.adapter:
|
|
109
|
+
try:
|
|
110
|
+
if self.config.adapter == "json":
|
|
111
|
+
adapter = dspy.JSONAdapter()
|
|
112
|
+
elif self.config.adapter == "xml":
|
|
113
|
+
adapter = dspy.XMLAdapter()
|
|
114
|
+
elif self.config.adapter == "two_step":
|
|
115
|
+
extractor = dspy.LM(self.config.extraction_model or "openai/gpt-4o-mini")
|
|
116
|
+
adapter = dspy.TwoStepAdapter(extraction_model=extractor)
|
|
117
|
+
else:
|
|
118
|
+
# chat is default; leave adapter=None
|
|
119
|
+
adapter = None
|
|
120
|
+
except Exception as e:
|
|
121
|
+
logger.warning(f"Failed to construct adapter '{self.config.adapter}': {e}. Proceeding without.")
|
|
122
|
+
|
|
123
|
+
with dspy.context(lm=lm, adapter=adapter):
|
|
100
124
|
try:
|
|
101
125
|
from rich.console import Console
|
|
102
126
|
console = Console()
|
|
@@ -136,11 +160,11 @@ class DeclarativeEvaluationComponent(
|
|
|
136
160
|
|
|
137
161
|
# Execute with streaming or non-streaming
|
|
138
162
|
if self.config.stream:
|
|
139
|
-
return await self._execute_streaming(agent_task, inputs, agent, console)
|
|
163
|
+
return await self._execute_streaming(_dspy_signature, agent_task, inputs, agent, console)
|
|
140
164
|
else:
|
|
141
165
|
return await self._execute_standard(agent_task, inputs, agent)
|
|
142
166
|
|
|
143
|
-
async def _execute_streaming(self, agent_task, inputs: dict[str, Any], agent: Any, console) -> dict[str, Any]:
|
|
167
|
+
async def _execute_streaming(self, signature, agent_task, inputs: dict[str, Any], agent: Any, console) -> dict[str, Any]:
|
|
144
168
|
"""Execute DSPy program in streaming mode (from original implementation)."""
|
|
145
169
|
logger.info(f"Evaluating agent '{agent.name}' with async streaming.")
|
|
146
170
|
|
|
@@ -148,34 +172,70 @@ class DeclarativeEvaluationComponent(
|
|
|
148
172
|
logger.error("agent_task is not callable, cannot stream.")
|
|
149
173
|
raise TypeError("DSPy task could not be created or is not callable.")
|
|
150
174
|
|
|
151
|
-
|
|
175
|
+
# Prepare stream listeners for any string output fields
|
|
176
|
+
listeners = []
|
|
177
|
+
try:
|
|
178
|
+
for name, field in signature.output_fields.items():
|
|
179
|
+
if field.annotation is str:
|
|
180
|
+
listeners.append(dspy.streaming.StreamListener(signature_field_name=name))
|
|
181
|
+
except Exception:
|
|
182
|
+
listeners = []
|
|
183
|
+
|
|
184
|
+
streaming_task = dspy.streamify(
|
|
185
|
+
agent_task,
|
|
186
|
+
is_async_program=True,
|
|
187
|
+
stream_listeners=listeners if listeners else None,
|
|
188
|
+
)
|
|
152
189
|
stream_generator: Generator = streaming_task(**inputs)
|
|
153
|
-
delta_content = ""
|
|
154
190
|
|
|
155
191
|
console.print("\n")
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
192
|
+
final_result: dict[str, Any] | None = None
|
|
193
|
+
async for value in stream_generator:
|
|
194
|
+
# Handle DSPy streaming artifacts
|
|
195
|
+
try:
|
|
196
|
+
from dspy.streaming import StatusMessage, StreamResponse
|
|
197
|
+
from litellm import ModelResponseStream
|
|
198
|
+
import dspy as _d
|
|
199
|
+
except Exception:
|
|
200
|
+
StatusMessage = object # type: ignore
|
|
201
|
+
StreamResponse = object # type: ignore
|
|
202
|
+
ModelResponseStream = object # type: ignore
|
|
203
|
+
_d = None
|
|
204
|
+
|
|
205
|
+
if isinstance(value, StatusMessage):
|
|
206
|
+
# Optionally surface status to console
|
|
207
|
+
console.print(f"[status] {getattr(value, 'message', '')}")
|
|
208
|
+
continue
|
|
209
|
+
if isinstance(value, StreamResponse):
|
|
210
|
+
token = getattr(value, "token", None)
|
|
211
|
+
if token:
|
|
212
|
+
console.print(token, end="")
|
|
213
|
+
continue
|
|
214
|
+
if isinstance(value, ModelResponseStream):
|
|
215
|
+
# Raw model chunk; print minimal content if available for debug
|
|
216
|
+
try:
|
|
217
|
+
chunk = value
|
|
218
|
+
text = chunk.choices[0].delta.content or ""
|
|
219
|
+
if text:
|
|
220
|
+
console.print(text, end="")
|
|
221
|
+
except Exception:
|
|
222
|
+
pass
|
|
223
|
+
continue
|
|
224
|
+
if _d and isinstance(value, _d.Prediction):
|
|
225
|
+
# Final prediction
|
|
226
|
+
result_dict, cost, lm_history = self._process_result(value, inputs)
|
|
227
|
+
self._cost = cost
|
|
228
|
+
self._lm_history = lm_history
|
|
229
|
+
final_result = result_dict
|
|
172
230
|
|
|
173
231
|
console.print("\n")
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
232
|
+
if final_result is None:
|
|
233
|
+
raise RuntimeError("Streaming did not yield a final prediction.")
|
|
234
|
+
final_result = self.filter_reasoning(
|
|
235
|
+
final_result, self.config.include_reasoning
|
|
236
|
+
)
|
|
177
237
|
return self.filter_thought_process(
|
|
178
|
-
|
|
238
|
+
final_result, self.config.include_thought_process
|
|
179
239
|
)
|
|
180
240
|
|
|
181
241
|
async def _execute_standard(self, agent_task, inputs: dict[str, Any], agent: Any) -> dict[str, Any]:
|
|
@@ -189,8 +249,8 @@ class DeclarativeEvaluationComponent(
|
|
|
189
249
|
self._cost = cost
|
|
190
250
|
self._lm_history = lm_history
|
|
191
251
|
result_dict = self.filter_reasoning(
|
|
192
|
-
|
|
193
|
-
|
|
252
|
+
result_dict, self.config.include_reasoning
|
|
253
|
+
)
|
|
194
254
|
return self.filter_thought_process(
|
|
195
255
|
result_dict, self.config.include_thought_process
|
|
196
256
|
)
|
|
@@ -226,4 +286,3 @@ class DeclarativeEvaluationComponent(
|
|
|
226
286
|
for k, v in result_dict.items()
|
|
227
287
|
if not (k.startswith("reasoning"))
|
|
228
288
|
}
|
|
229
|
-
|
|
@@ -542,7 +542,9 @@ class MemoryUtilityComponent(UtilityComponent):
|
|
|
542
542
|
"Extract key concepts from text",
|
|
543
543
|
"text: str | Input text -> concepts: list[str] | key concepts lower case",
|
|
544
544
|
)
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
545
|
+
import dspy
|
|
546
|
+
lm = dspy.LM(model=agent.model, cache=True, temperature=0.0, max_tokens=8192)
|
|
547
|
+
predictor = agent._select_task(concept_signature, "Predict")
|
|
548
|
+
with dspy.settings.context(lm=lm):
|
|
549
|
+
res = predictor(text=text)
|
|
548
550
|
return set(getattr(res, "concepts", []))
|
flock/core/__init__.py
CHANGED
|
@@ -11,6 +11,7 @@ from typing import Any
|
|
|
11
11
|
__all__ = [
|
|
12
12
|
"Flock",
|
|
13
13
|
"FlockAgent",
|
|
14
|
+
"DefaultAgent",
|
|
14
15
|
"FlockContext",
|
|
15
16
|
"FlockFactory",
|
|
16
17
|
# Components
|
|
@@ -43,6 +44,10 @@ def __getattr__(name: str) -> Any: # pragma: no cover - thin loader
|
|
|
43
44
|
from .flock_agent import FlockAgent
|
|
44
45
|
|
|
45
46
|
return FlockAgent
|
|
47
|
+
if name == "DefaultAgent":
|
|
48
|
+
from .agent.default_agent import DefaultAgent
|
|
49
|
+
|
|
50
|
+
return DefaultAgent
|
|
46
51
|
if name == "FlockContext":
|
|
47
52
|
from .context.context import FlockContext
|
|
48
53
|
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""DefaultAgent: explicit preset agent wiring standard components.
|
|
2
|
+
|
|
3
|
+
This class replaces the need for using FlockFactory for common setups by
|
|
4
|
+
providing a clear, explicit Agent class that mirrors the factory's kwargs
|
|
5
|
+
and composes the standard components under the hood.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from collections.abc import Callable
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
from flock.components.utility.metrics_utility_component import (
|
|
14
|
+
MetricsUtilityComponent,
|
|
15
|
+
MetricsUtilityConfig,
|
|
16
|
+
)
|
|
17
|
+
from flock.core.config.flock_agent_config import FlockAgentConfig
|
|
18
|
+
from flock.core.flock_agent import DynamicStr, FlockAgent
|
|
19
|
+
from flock.core.logging.formatters.themes import OutputTheme
|
|
20
|
+
from flock.core.mcp.flock_mcp_server import FlockMCPServer
|
|
21
|
+
from flock.workflow.temporal_config import TemporalActivityConfig
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DefaultAgent(FlockAgent):
|
|
25
|
+
"""Explicit agent class wiring standard evaluation + utility components.
|
|
26
|
+
|
|
27
|
+
Components included:
|
|
28
|
+
- DeclarativeEvaluationComponent (LLM evaluation)
|
|
29
|
+
- OutputUtilityComponent (formatting/printing)
|
|
30
|
+
- MetricsUtilityComponent (latency tracking)
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
name: str,
|
|
36
|
+
description: DynamicStr | None = None,
|
|
37
|
+
model: str | None = None,
|
|
38
|
+
input: DynamicStr | None = None,
|
|
39
|
+
output: DynamicStr | None = None,
|
|
40
|
+
tools: list[Callable[..., Any] | Any] | None = None,
|
|
41
|
+
servers: list[str | FlockMCPServer] | None = None,
|
|
42
|
+
tool_whitelist: list[str] | None = None,
|
|
43
|
+
# Evaluation parameters
|
|
44
|
+
use_cache: bool = False,
|
|
45
|
+
temperature: float = 0.7,
|
|
46
|
+
max_tokens: int | None = None,
|
|
47
|
+
max_tool_calls: int = 0,
|
|
48
|
+
max_retries: int = 2,
|
|
49
|
+
stream: bool = False,
|
|
50
|
+
include_thought_process: bool = False,
|
|
51
|
+
include_reasoning: bool = False,
|
|
52
|
+
# Output utility parameters
|
|
53
|
+
enable_rich_tables: bool = True,
|
|
54
|
+
output_theme: OutputTheme | None = None,
|
|
55
|
+
no_output: bool = False,
|
|
56
|
+
print_context: bool = False,
|
|
57
|
+
# Agent config
|
|
58
|
+
write_to_file: bool = False,
|
|
59
|
+
wait_for_input: bool = False,
|
|
60
|
+
# Metrics utility
|
|
61
|
+
alert_latency_threshold_ms: int = 30_000,
|
|
62
|
+
# Workflow
|
|
63
|
+
next_agent: DynamicStr | None = None,
|
|
64
|
+
temporal_activity_config: TemporalActivityConfig | None = None,
|
|
65
|
+
):
|
|
66
|
+
"""Initialize a DefaultAgent with standard components and configuration.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
name: Unique identifier for the agent
|
|
70
|
+
description: Human-readable description of the agent's purpose
|
|
71
|
+
model: Model identifier (e.g., 'openai/gpt-4o'). Uses Flock default if None
|
|
72
|
+
input: Input signature for the agent
|
|
73
|
+
output: Output signature for the agent
|
|
74
|
+
tools: List of callable tools the agent can use
|
|
75
|
+
servers: List of MCP servers the agent can connect to
|
|
76
|
+
tool_whitelist: List of tool names that this agent is allowed to use.
|
|
77
|
+
If provided, the agent will only have access to tools
|
|
78
|
+
whose names are in this list. This applies to both native
|
|
79
|
+
Python tools and MCP tools. Recommended for security and
|
|
80
|
+
to prevent tool conflicts in multi-agent workflows.
|
|
81
|
+
use_cache: Whether to enable caching for evaluation
|
|
82
|
+
temperature: Sampling temperature for LLM generation
|
|
83
|
+
max_tokens: Maximum tokens for LLM response
|
|
84
|
+
max_tool_calls: Maximum number of tool calls per evaluation
|
|
85
|
+
max_retries: Maximum retries for failed LLM calls
|
|
86
|
+
stream: Whether to enable streaming responses
|
|
87
|
+
include_thought_process: Include reasoning in output
|
|
88
|
+
include_reasoning: Include detailed reasoning steps
|
|
89
|
+
enable_rich_tables: Enable rich table formatting for output
|
|
90
|
+
output_theme: Theme for output formatting
|
|
91
|
+
no_output: Disable output printing
|
|
92
|
+
print_context: Include context in output
|
|
93
|
+
write_to_file: Save outputs to file
|
|
94
|
+
wait_for_input: Wait for user input after execution
|
|
95
|
+
alert_latency_threshold_ms: Threshold for latency alerts
|
|
96
|
+
next_agent: Next agent in workflow chain
|
|
97
|
+
temporal_activity_config: Configuration for Temporal workflow execution
|
|
98
|
+
"""
|
|
99
|
+
# Import evaluation/output components lazily to avoid heavy imports at module import time
|
|
100
|
+
from flock.components.evaluation.declarative_evaluation_component import (
|
|
101
|
+
DeclarativeEvaluationComponent,
|
|
102
|
+
DeclarativeEvaluationConfig,
|
|
103
|
+
)
|
|
104
|
+
from flock.components.utility.output_utility_component import (
|
|
105
|
+
OutputUtilityComponent,
|
|
106
|
+
OutputUtilityConfig,
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Apply sensible defaults for special models if needed
|
|
110
|
+
if model and "gpt-oss" in model:
|
|
111
|
+
# Ensure defaults are generous for local OSS models
|
|
112
|
+
temperature = 1.0
|
|
113
|
+
max_tokens = 32_768
|
|
114
|
+
|
|
115
|
+
# Evaluation component
|
|
116
|
+
_eval_kwargs = dict(
|
|
117
|
+
model=model,
|
|
118
|
+
use_cache=use_cache,
|
|
119
|
+
temperature=temperature,
|
|
120
|
+
max_tool_calls=max_tool_calls,
|
|
121
|
+
max_retries=max_retries,
|
|
122
|
+
stream=stream,
|
|
123
|
+
include_thought_process=include_thought_process,
|
|
124
|
+
include_reasoning=include_reasoning,
|
|
125
|
+
)
|
|
126
|
+
if max_tokens is not None:
|
|
127
|
+
_eval_kwargs["max_tokens"] = max_tokens
|
|
128
|
+
eval_config = DeclarativeEvaluationConfig(**_eval_kwargs)
|
|
129
|
+
evaluator = DeclarativeEvaluationComponent(
|
|
130
|
+
name="default_evaluator", config=eval_config
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# Output utility component
|
|
134
|
+
_output_kwargs = dict(
|
|
135
|
+
render_table=enable_rich_tables,
|
|
136
|
+
no_output=no_output,
|
|
137
|
+
print_context=print_context,
|
|
138
|
+
)
|
|
139
|
+
if output_theme is not None:
|
|
140
|
+
_output_kwargs["theme"] = output_theme
|
|
141
|
+
output_config = OutputUtilityConfig(**_output_kwargs)
|
|
142
|
+
output_component = OutputUtilityComponent(
|
|
143
|
+
name="output_formatter", config=output_config
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Metrics utility component
|
|
147
|
+
metrics_config = MetricsUtilityConfig(
|
|
148
|
+
latency_threshold_ms=alert_latency_threshold_ms
|
|
149
|
+
)
|
|
150
|
+
metrics_component = MetricsUtilityComponent(
|
|
151
|
+
name="metrics_tracker", config=metrics_config
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
super().__init__(
|
|
155
|
+
name=name,
|
|
156
|
+
model=model,
|
|
157
|
+
description=description,
|
|
158
|
+
input=input,
|
|
159
|
+
output=output,
|
|
160
|
+
tools=tools,
|
|
161
|
+
servers=servers,
|
|
162
|
+
tool_whitelist=tool_whitelist,
|
|
163
|
+
components=[evaluator, output_component, metrics_component],
|
|
164
|
+
config=FlockAgentConfig(
|
|
165
|
+
write_to_file=write_to_file,
|
|
166
|
+
wait_for_input=wait_for_input,
|
|
167
|
+
),
|
|
168
|
+
next_agent=next_agent,
|
|
169
|
+
temporal_activity_config=temporal_activity_config,
|
|
170
|
+
)
|
|
@@ -9,6 +9,12 @@ from typing import TYPE_CHECKING, Any, TypeVar, cast
|
|
|
9
9
|
from flock.core.context.context import FlockContext
|
|
10
10
|
from flock.core.logging.logging import get_logger
|
|
11
11
|
from flock.core.mcp.flock_mcp_server import FlockMCPServer
|
|
12
|
+
from pydantic import BaseModel
|
|
13
|
+
from flock.core.registry import get_registry
|
|
14
|
+
from flock.core.serialization.serialization_utils import (
|
|
15
|
+
_format_type_to_string,
|
|
16
|
+
collect_pydantic_models,
|
|
17
|
+
)
|
|
12
18
|
|
|
13
19
|
if TYPE_CHECKING:
|
|
14
20
|
from flock.core.flock_agent import FlockAgent
|
|
@@ -83,10 +89,58 @@ class FlockAgentIntegration:
|
|
|
83
89
|
self.agent = agent
|
|
84
90
|
|
|
85
91
|
def _resolve(self, raw: str | Callable[..., str], name: str, ctx: FlockContext | None) -> str | None:
|
|
92
|
+
# Support Pydantic BaseModel classes (alternative I/O definitions)
|
|
93
|
+
try:
|
|
94
|
+
if isinstance(raw, type) and issubclass(raw, BaseModel):
|
|
95
|
+
return self._build_spec_from_pydantic(raw)
|
|
96
|
+
# Also support instances directly (use their class schema)
|
|
97
|
+
if isinstance(raw, BaseModel):
|
|
98
|
+
return self._build_spec_from_pydantic(type(raw))
|
|
99
|
+
except Exception:
|
|
100
|
+
# If introspection failed, fall through to normal handling
|
|
101
|
+
pass
|
|
102
|
+
|
|
86
103
|
if callable(raw):
|
|
87
104
|
raw = adapt(name, raw)(ctx or FlockContext())
|
|
88
105
|
return raw
|
|
89
106
|
|
|
107
|
+
def _build_spec_from_pydantic(self, model_cls: type[BaseModel]) -> str:
|
|
108
|
+
"""Builds a flock I/O spec string from a Pydantic BaseModel class.
|
|
109
|
+
|
|
110
|
+
Format per field: "name: type | description"; description omitted when empty.
|
|
111
|
+
Also ensures involved Pydantic models are registered in the TypeRegistry
|
|
112
|
+
so type resolution works when constructing DSPy signatures.
|
|
113
|
+
"""
|
|
114
|
+
# Proactively register this model and any nested models
|
|
115
|
+
try:
|
|
116
|
+
registry = get_registry()
|
|
117
|
+
registry.register_type(model_cls, name=model_cls.__name__)
|
|
118
|
+
# Register nested Pydantic models used in type hints
|
|
119
|
+
if hasattr(model_cls, "model_fields"):
|
|
120
|
+
for _, f in model_cls.model_fields.items():
|
|
121
|
+
ann = getattr(f, "annotation", None)
|
|
122
|
+
for m in collect_pydantic_models(ann):
|
|
123
|
+
registry.register_type(m, name=m.__name__)
|
|
124
|
+
except Exception:
|
|
125
|
+
# Registration best-effort; continue building spec
|
|
126
|
+
pass
|
|
127
|
+
|
|
128
|
+
fields = []
|
|
129
|
+
# Pydantic v2: class-level model_fields
|
|
130
|
+
if not hasattr(model_cls, "model_fields") or model_cls.model_fields is None: # type: ignore[attr-defined]
|
|
131
|
+
return ""
|
|
132
|
+
|
|
133
|
+
for name, field in model_cls.model_fields.items(): # type: ignore[attr-defined]
|
|
134
|
+
type_hint = getattr(field, "annotation", None)
|
|
135
|
+
type_str = _format_type_to_string(type_hint) if type_hint is not None else "str"
|
|
136
|
+
desc = getattr(field, "description", None) or ""
|
|
137
|
+
if desc:
|
|
138
|
+
fields.append(f"{name}: {type_str} | {desc}")
|
|
139
|
+
else:
|
|
140
|
+
fields.append(f"{name}: {type_str}")
|
|
141
|
+
|
|
142
|
+
return ", ".join(fields)
|
|
143
|
+
|
|
90
144
|
def resolve_description(self, context: FlockContext | None = None) -> str | None:
|
|
91
145
|
"""Resolve the agent's description, handling callable descriptions."""
|
|
92
146
|
return self._resolve(self.agent.description_spec, "description", context)
|
|
@@ -131,6 +131,15 @@ class FlockAgentLifecycle:
|
|
|
131
131
|
if self.agent.servers:
|
|
132
132
|
mcp_tools = await self.agent._integration.get_mcp_tools()
|
|
133
133
|
|
|
134
|
+
# Filter tools based on the agent's whitelist (if it has been provided)
|
|
135
|
+
if self.agent.tool_whitelist is not None and len(self.agent.tool_whitelist) > 0:
|
|
136
|
+
for tool in mcp_tools:
|
|
137
|
+
if hasattr(tool, "name") and tool.name not in self.agent.tool_whitelist:
|
|
138
|
+
mcp_tools.remove(tool)
|
|
139
|
+
for tool in registered_tools:
|
|
140
|
+
if hasattr(tool, "__name__") and tool.__name__ not in self.agent.tool_whitelist:
|
|
141
|
+
registered_tools.remove(tool)
|
|
142
|
+
|
|
134
143
|
# --------------------------------------------------
|
|
135
144
|
# Use evaluator component's evaluate_core method
|
|
136
145
|
# --------------------------------------------------
|
flock/core/context/context.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# src/
|
|
1
|
+
# src/flock/core/execution/temporal_executor.py
|
|
2
2
|
|
|
3
3
|
import asyncio # Import asyncio
|
|
4
4
|
from typing import TYPE_CHECKING, Any
|
|
@@ -20,6 +20,7 @@ from flock.workflow.temporal_config import (
|
|
|
20
20
|
TemporalWorkflowConfig,
|
|
21
21
|
)
|
|
22
22
|
from flock.workflow.temporal_setup import create_temporal_client, setup_worker
|
|
23
|
+
from flock.config import TEMPORAL_SERVER_URL
|
|
23
24
|
|
|
24
25
|
logger = get_logger("flock")
|
|
25
26
|
|
|
@@ -50,7 +51,7 @@ async def run_temporal_workflow(
|
|
|
50
51
|
wf_config = flock_instance.temporal_config or TemporalWorkflowConfig()
|
|
51
52
|
|
|
52
53
|
logger.debug("Creating Temporal client")
|
|
53
|
-
flock_client = await create_temporal_client()
|
|
54
|
+
flock_client = await create_temporal_client(server_address=TEMPORAL_SERVER_URL)
|
|
54
55
|
|
|
55
56
|
# Determine if we need to manage an in-process worker
|
|
56
57
|
start_worker_locally = flock_instance.temporal_start_in_process_worker
|
|
@@ -70,12 +71,9 @@ async def run_temporal_workflow(
|
|
|
70
71
|
[execute_single_agent, determine_next_agent],
|
|
71
72
|
)
|
|
72
73
|
|
|
73
|
-
# Run the worker in the background
|
|
74
|
+
# Run the worker in the background; result awaiting will block until a worker picks up tasks
|
|
74
75
|
worker_task = asyncio.create_task(worker.run())
|
|
75
76
|
logger.info("Temporal worker started in background.")
|
|
76
|
-
|
|
77
|
-
# Allow worker time to start polling (heuristic for local testing)
|
|
78
|
-
await asyncio.sleep(2)
|
|
79
77
|
else:
|
|
80
78
|
logger.info(
|
|
81
79
|
"Skipping in-process worker startup. Assuming dedicated workers are running."
|
flock/core/flock_agent.py
CHANGED
|
@@ -35,11 +35,11 @@ DynamicStr = str | Callable[[FlockContext], str]
|
|
|
35
35
|
|
|
36
36
|
class FlockAgent(BaseModel, Serializable, DSPyIntegrationMixin, ABC):
|
|
37
37
|
"""Unified FlockAgent using the new component architecture.
|
|
38
|
-
|
|
38
|
+
|
|
39
39
|
This is the next-generation FlockAgent that uses a single components list
|
|
40
40
|
instead of separate evaluator, router, and modules. All agent functionality
|
|
41
41
|
is now provided through AgentComponent instances.
|
|
42
|
-
|
|
42
|
+
|
|
43
43
|
Key changes:
|
|
44
44
|
- components: list[AgentComponent] - unified component list
|
|
45
45
|
- next_agent: str | None - explicit workflow state
|
|
@@ -79,6 +79,19 @@ class FlockAgent(BaseModel, Serializable, DSPyIntegrationMixin, ABC):
|
|
|
79
79
|
default=None,
|
|
80
80
|
description="List of callable tools the agent can use. These must be registered.",
|
|
81
81
|
)
|
|
82
|
+
|
|
83
|
+
tool_whitelist: list[str] | None = Field(
|
|
84
|
+
default=None,
|
|
85
|
+
description="Whitelist of tool names that this agent can use during execution. "
|
|
86
|
+
"If provided, the agent will only have access to tools whose names "
|
|
87
|
+
"are in this list. This applies to both native Python tools (identified "
|
|
88
|
+
"by __name__) and MCP tools (identified by name attribute). "
|
|
89
|
+
"When combined with server-level tool filtering, the agent gets access "
|
|
90
|
+
"to the intersection of both whitelists. If None, all available tools "
|
|
91
|
+
"from servers and native tools are accessible. "
|
|
92
|
+
"Recommended over server-level filtering for granular control."
|
|
93
|
+
)
|
|
94
|
+
|
|
82
95
|
servers: list[str | FlockMCPServer] | None = Field(
|
|
83
96
|
default=None,
|
|
84
97
|
description="List of MCP Servers the agent can use to enhance its capabilities.",
|
|
@@ -124,6 +137,7 @@ class FlockAgent(BaseModel, Serializable, DSPyIntegrationMixin, ABC):
|
|
|
124
137
|
output: DynamicStr | None = None,
|
|
125
138
|
tools: list[Callable[..., Any]] | None = None,
|
|
126
139
|
servers: list[str | FlockMCPServer] | None = None,
|
|
140
|
+
tool_whitelist: list[str] | None = None,
|
|
127
141
|
components: list[AgentComponent] | None = None,
|
|
128
142
|
config: FlockAgentConfig | None = None,
|
|
129
143
|
next_agent: DynamicStr | None = None,
|
|
@@ -140,6 +154,7 @@ class FlockAgent(BaseModel, Serializable, DSPyIntegrationMixin, ABC):
|
|
|
140
154
|
output=output,
|
|
141
155
|
tools=tools,
|
|
142
156
|
servers=servers,
|
|
157
|
+
tool_whitelist=tool_whitelist,
|
|
143
158
|
components=components if components is not None else [],
|
|
144
159
|
config=config,
|
|
145
160
|
temporal_activity_config=temporal_activity_config,
|