flock-core 0.4.506__py3-none-any.whl → 0.4.509__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flock-core might be problematic. Click here for more details.

@@ -7,8 +7,15 @@ from typing import Any, Union
7
7
 
8
8
  import pandas as pd
9
9
  from box import Box
10
- from datasets import get_dataset_config_names, load_dataset
11
-
10
+ from datasets import (
11
+ Dataset as HFDataset,
12
+ get_dataset_config_names,
13
+ load_dataset,
14
+ )
15
+ from opik import Opik
16
+ from opik.evaluation import evaluate
17
+
18
+ from flock.core.flock import Flock
12
19
  from flock.core.flock_agent import FlockAgent
13
20
  from flock.core.flock_evaluator import FlockEvaluator
14
21
  from flock.core.logging.logging import get_logger
@@ -18,6 +25,64 @@ from flock.core.logging.logging import get_logger
18
25
  logger_helpers = get_logger("util.evaluation")
19
26
 
20
27
 
28
+ def evaluate_with_opik(
29
+ dataset: str | Path | list[dict[str, Any]] | pd.DataFrame | HFDataset,
30
+ dataset_name: str,
31
+ experiment_name: str,
32
+ start_agent: FlockAgent | str,
33
+ input_mapping: dict[str, str],
34
+ answer_mapping: dict[str, str],
35
+ metrics: list[
36
+ str
37
+ | Callable[[Any, Any], bool | float | dict[str, Any]]
38
+ | FlockAgent
39
+ | FlockEvaluator
40
+ ],
41
+ ):
42
+ df = normalize_dataset(dataset)
43
+ client = Opik()
44
+ dataset = client.get_or_create_dataset(name=dataset_name)
45
+
46
+ dataset.insert_from_pandas(dataframe=df, ignore_keys=["source"])
47
+
48
+ # Create a single Flock instance outside the task function
49
+ shared_flock = Flock(
50
+ name="opik_eval", model="azure/gpt-4.1", show_flock_banner=False
51
+ )
52
+ shared_flock.add_agent(start_agent)
53
+
54
+ def evaluation_task(dataset_item):
55
+ agent_input = {
56
+ value: dataset_item[key] for key, value in input_mapping.items()
57
+ }
58
+
59
+ # Use the shared Flock instance instead of creating a new one
60
+ result_flock = shared_flock.run(
61
+ start_agent=start_agent, input=agent_input, box_result=False
62
+ )
63
+
64
+ # agent_output = result_flock.get(answer_mapping[key], "No answer found")
65
+
66
+ key = next(iter(answer_mapping.keys()))
67
+ reference = dataset_item[key]
68
+ answer = result_flock.get(answer_mapping[key], "No answer found")
69
+
70
+ result = {
71
+ "input": agent_input,
72
+ "output": answer,
73
+ "reference": reference,
74
+ }
75
+
76
+ return result
77
+
78
+ eval_results = evaluate(
79
+ experiment_name=experiment_name,
80
+ dataset=dataset,
81
+ task=evaluation_task,
82
+ scoring_metrics=metrics,
83
+ )
84
+
85
+
21
86
  def load_and_merge_all_configs(dataset_name: str) -> pd.DataFrame:
22
87
  all_configs = get_dataset_config_names(dataset_name)
23
88
  all_dfs = []
@@ -31,9 +96,27 @@ def load_and_merge_all_configs(dataset_name: str) -> pd.DataFrame:
31
96
  all_dfs.append(df)
32
97
 
33
98
  merged_df = pd.concat(all_dfs, ignore_index=True)
99
+ logger_helpers.info(f"merged_df.head(): {merged_df.head()}")
34
100
  return merged_df
35
101
 
36
102
 
103
+ def import_hf_dataset_to_opik(dataset_name: str) -> pd.DataFrame:
104
+ df = load_and_merge_all_configs(dataset_name)
105
+ logger_helpers.info(
106
+ f"type(df): {type(df)}"
107
+ ) # ➜ <class 'pandas.core.frame.DataFrame'>
108
+ logger_helpers.info(f"df.shape: {df.shape}") # e.g. (123456, N_COLUMNS+2)
109
+ logger_helpers.info(
110
+ f"df['split'].value_counts(): {df['split'].value_counts()}"
111
+ )
112
+ logger_helpers.info(f"df['config'].unique(): {df['config'].unique()}")
113
+ client = Opik()
114
+ dataset = client.get_or_create_dataset(name=dataset_name)
115
+
116
+ dataset.insert_from_pandas(dataframe=df, ignore_keys=["source"])
117
+ return df
118
+
119
+
37
120
  def normalize_dataset(dataset: Any) -> pd.DataFrame:
38
121
  """Converts various dataset formats into a pandas DataFrame."""
39
122
  if isinstance(dataset, pd.DataFrame):
flock/core/flock.py CHANGED
@@ -17,7 +17,6 @@ from typing import (
17
17
  TypeVar,
18
18
  )
19
19
 
20
- _R = TypeVar("_R")
21
20
  # Third-party imports
22
21
  from box import Box
23
22
  from temporalio import workflow
@@ -32,8 +31,11 @@ with workflow.unsafe.imports_passed_through():
32
31
  from flock.core.execution.local_executor import (
33
32
  run_local_workflow,
34
33
  )
34
+
35
+ import opik
35
36
  from opentelemetry import trace
36
37
  from opentelemetry.baggage import get_baggage, set_baggage
38
+ from opik.integrations.dspy.callback import OpikCallback
37
39
  from pandas import DataFrame # type: ignore
38
40
  from pydantic import BaseModel, Field
39
41
 
@@ -67,7 +69,7 @@ try:
67
69
 
68
70
  PANDAS_AVAILABLE = True
69
71
  except ImportError:
70
- pd = None # type: ignore
72
+ pd = None # type: ignore
71
73
  PANDAS_AVAILABLE = False
72
74
 
73
75
  logger = get_logger("flock.api")
@@ -77,6 +79,7 @@ FlockRegistry = get_registry() # Get the registry instance
77
79
 
78
80
  # Define TypeVar for generic class methods like from_dict
79
81
  T = TypeVar("T", bound="Flock")
82
+ _R = TypeVar("_R")
80
83
 
81
84
 
82
85
  class Flock(BaseModel, Serializable):
@@ -103,6 +106,10 @@ class Flock(BaseModel, Serializable):
103
106
  default=False,
104
107
  description="If True, execute workflows via Temporal; otherwise, run locally.",
105
108
  )
109
+ enable_opik: bool = Field(
110
+ default=False,
111
+ description="If True, enable Opik for cost tracking and model management.",
112
+ )
106
113
  show_flock_banner: bool = Field(
107
114
  default=True,
108
115
  description="If True, show the Flock banner on console interactions.",
@@ -159,11 +166,11 @@ class Flock(BaseModel, Serializable):
159
166
  """
160
167
  try:
161
168
  asyncio.get_running_loop()
162
- except RuntimeError: # no loop → simple
169
+ except RuntimeError: # no loop → simple
163
170
  return asyncio.run(coro)
164
171
 
165
172
  # A loop is already running – Jupyter / ASGI / etc.
166
- ctx = contextvars.copy_context() # propagate baggage
173
+ ctx = contextvars.copy_context() # propagate baggage
167
174
  with ThreadPoolExecutor(max_workers=1) as pool:
168
175
  future = pool.submit(ctx.run, asyncio.run, coro)
169
176
  try:
@@ -179,6 +186,7 @@ class Flock(BaseModel, Serializable):
179
186
  description: str | None = None,
180
187
  show_flock_banner: bool = True,
181
188
  enable_temporal: bool = False,
189
+ enable_opik: bool = False,
182
190
  agents: list[FlockAgent] | None = None,
183
191
  servers: list[FlockMCPServerBase] | None = None,
184
192
  temporal_config: TemporalWorkflowConfig | None = None,
@@ -195,6 +203,7 @@ class Flock(BaseModel, Serializable):
195
203
  model=model,
196
204
  description=description,
197
205
  enable_temporal=enable_temporal,
206
+ enable_opik=enable_opik,
198
207
  show_flock_banner=show_flock_banner,
199
208
  temporal_config=temporal_config,
200
209
  temporal_start_in_process_worker=temporal_start_in_process_worker,
@@ -208,7 +217,6 @@ class Flock(BaseModel, Serializable):
208
217
  self._start_input = {}
209
218
  self._mgr = FlockServerManager()
210
219
 
211
-
212
220
  # Register passed servers
213
221
  # (need to be registered first so that agents can retrieve them from the registry)
214
222
  # This will also add them to the managed list of self._mgr
@@ -225,7 +233,6 @@ class Flock(BaseModel, Serializable):
225
233
  f"Item provided in 'servers' list is not a FlockMCPServer: {type(server)}"
226
234
  )
227
235
 
228
-
229
236
  # Register passed agents
230
237
  if agents:
231
238
  from flock.core.flock_agent import (
@@ -241,7 +248,7 @@ class Flock(BaseModel, Serializable):
241
248
  )
242
249
 
243
250
  # Initialize console if needed for banner
244
- if self.show_flock_banner: # Check instance attribute
251
+ if self.show_flock_banner: # Check instance attribute
245
252
  init_console(clear_screen=True, show_banner=self.show_flock_banner)
246
253
 
247
254
  # Set Temporal debug environment variable
@@ -252,6 +259,15 @@ class Flock(BaseModel, Serializable):
252
259
 
253
260
  FlockRegistry.discover_and_register_components()
254
261
 
262
+ if self.enable_opik:
263
+ import dspy
264
+
265
+ opik.configure(use_local=True, automatic_approvals=True)
266
+ opik_callback = OpikCallback(project_name=self.name, log_graph=True)
267
+ dspy.settings.configure(
268
+ callbacks=[opik_callback],
269
+ )
270
+
255
271
  logger.info(
256
272
  "Flock instance initialized",
257
273
  name=self.name,
@@ -259,39 +275,54 @@ class Flock(BaseModel, Serializable):
259
275
  enable_temporal=self.enable_temporal,
260
276
  )
261
277
 
262
- def prepare_benchmark(self, agent: FlockAgent | str | None = None, input_field: str | None = None, eval_field: str | None = None):
278
+ def prepare_benchmark(
279
+ self,
280
+ agent: FlockAgent | str | None = None,
281
+ input_field: str | None = None,
282
+ eval_field: str | None = None,
283
+ ):
263
284
  """Prepare a benchmark for the Flock instance."""
264
285
  from flock.core.flock_agent import FlockAgent as ConcreteFlockAgent
265
286
 
266
- logger.info(f"Preparing benchmark for Flock instance '{self.name}' with agent '{agent}'.")
287
+ logger.info(
288
+ f"Preparing benchmark for Flock instance '{self.name}' with agent '{agent}'."
289
+ )
267
290
 
268
291
  name = agent.name if isinstance(agent, ConcreteFlockAgent) else agent
269
292
 
270
293
  if self._agents.get(name) is None:
271
- raise ValueError(f"Agent '{name}' not found in Flock instance '{self.name}'.")
294
+ raise ValueError(
295
+ f"Agent '{name}' not found in Flock instance '{self.name}'."
296
+ )
272
297
 
273
298
  self.benchmark_agent_name = name
274
299
  self.benchmark_eval_field = eval_field
275
300
  self.benchmark_input_field = input_field
276
301
 
277
-
278
-
279
302
  def inspect(self):
280
303
  """Inspect the Flock instance."""
281
- logger.info(f"Inspecting Flock instance '{self.name}' with start agent '{self.benchmark_agent_name}' and input '{input}'.")
304
+ logger.info(
305
+ f"Inspecting Flock instance '{self.name}' with start agent '{self.benchmark_agent_name}' and input '{input}'."
306
+ )
282
307
 
283
- async def run(input: dict[str, Any])-> dict[str, Any]:
308
+ async def run(input: dict[str, Any]) -> dict[str, Any]:
284
309
  """Inspect the Flock instance."""
285
- logger.info(f"Inspecting Flock instance '{self.name}' with start agent '{self.benchmark_agent_name}' and input '{input}'.")
310
+ logger.info(
311
+ f"Inspecting Flock instance '{self.name}' with start agent '{self.benchmark_agent_name}' and input '{input}'."
312
+ )
286
313
  msg_content = input.get("messages")[0].get("content")
287
314
 
288
- agent_input = {
289
- self.benchmark_input_field: msg_content
290
- }
315
+ agent_input = {self.benchmark_input_field: msg_content}
291
316
 
292
- result = await self.run_async(start_agent=self.benchmark_agent_name, input=agent_input, box_result=False)
317
+ result = await self.run_async(
318
+ start_agent=self.benchmark_agent_name,
319
+ input=agent_input,
320
+ box_result=False,
321
+ )
293
322
 
294
- agent_output = result.get(self.benchmark_eval_field, "No answer found")
323
+ agent_output = result.get(
324
+ self.benchmark_eval_field, "No answer found"
325
+ )
295
326
 
296
327
  return {
297
328
  "output": agent_output,
@@ -299,8 +330,6 @@ class Flock(BaseModel, Serializable):
299
330
 
300
331
  return run
301
332
 
302
-
303
-
304
333
  def _set_temporal_debug_flag(self):
305
334
  """Set or remove LOCAL_DEBUG env var based on enable_temporal."""
306
335
  if not self.enable_temporal:
@@ -373,10 +402,14 @@ class Flock(BaseModel, Serializable):
373
402
  if agent.name in self._agents:
374
403
  # Allow re-adding the same instance, but raise error for different instance with same name
375
404
  if self._agents[agent.name] is not agent:
376
- raise ValueError(f"Agent with name '{agent.name}' already exists with a different instance.")
405
+ raise ValueError(
406
+ f"Agent with name '{agent.name}' already exists with a different instance."
407
+ )
377
408
  else:
378
- logger.debug(f"Agent '{agent.name}' is already added. Skipping.")
379
- return agent # Return existing agent
409
+ logger.debug(
410
+ f"Agent '{agent.name}' is already added. Skipping."
411
+ )
412
+ return agent # Return existing agent
380
413
 
381
414
  self._agents[agent.name] = agent
382
415
  FlockRegistry.register_agent(agent) # Register globally
@@ -415,7 +448,7 @@ class Flock(BaseModel, Serializable):
415
448
  box_result: bool = True,
416
449
  agents: list[FlockAgent] | None = None,
417
450
  servers: list[FlockMCPServerBase] | None = None,
418
- memo: dict[str, Any] | None = None
451
+ memo: dict[str, Any] | None = None,
419
452
  ) -> Box | dict:
420
453
  return self._run_sync(
421
454
  self.run_async(
@@ -430,7 +463,6 @@ class Flock(BaseModel, Serializable):
430
463
  )
431
464
  )
432
465
 
433
-
434
466
  async def run_async(
435
467
  self,
436
468
  start_agent: FlockAgent | str | None = None,
@@ -474,11 +506,13 @@ class Flock(BaseModel, Serializable):
474
506
  start_agent_name: str | None = None
475
507
  if isinstance(start_agent, ConcreteFlockAgent):
476
508
  start_agent_name = start_agent.name
477
- if start_agent_name not in self._agents: # Add if not already present
509
+ if (
510
+ start_agent_name not in self._agents
511
+ ): # Add if not already present
478
512
  self.add_agent(start_agent)
479
513
  elif isinstance(start_agent, str):
480
514
  start_agent_name = start_agent
481
- else: # start_agent is None
515
+ else: # start_agent is None
482
516
  start_agent_name = self._start_agent_name
483
517
 
484
518
  # Default to first agent if only one exists and none specified
@@ -516,23 +550,27 @@ class Flock(BaseModel, Serializable):
516
550
 
517
551
  try:
518
552
  resolved_start_agent = self._agents.get(start_agent_name)
519
- if not resolved_start_agent: # Should have been handled by now
520
- raise ValueError(f"Start agent '{start_agent_name}' not found after checks.")
553
+ if not resolved_start_agent: # Should have been handled by now
554
+ raise ValueError(
555
+ f"Start agent '{start_agent_name}' not found after checks."
556
+ )
521
557
 
522
558
  run_context = context if context else FlockContext()
523
- set_baggage("run_id", effective_run_id) # Set for OpenTelemetry
559
+ set_baggage("run_id", effective_run_id) # Set for OpenTelemetry
524
560
 
525
561
  initialize_context(
526
562
  run_context,
527
563
  start_agent_name,
528
564
  run_input,
529
565
  effective_run_id,
530
- not self.enable_temporal, # local_debug is inverse of enable_temporal
566
+ not self.enable_temporal, # local_debug is inverse of enable_temporal
531
567
  self.model or resolved_start_agent.model or DEFAULT_MODEL,
532
568
  )
533
569
  # Add agent definitions to context for routing/serialization within workflow
534
570
  for agent_name_iter, agent_instance_iter in self.agents.items():
535
- agent_dict_repr = agent_instance_iter.to_dict() # Agents handle their own serialization
571
+ agent_dict_repr = (
572
+ agent_instance_iter.to_dict()
573
+ ) # Agents handle their own serialization
536
574
  run_context.add_agent_definition(
537
575
  agent_type=type(agent_instance_iter),
538
576
  agent_name=agent_name_iter,
@@ -568,13 +606,14 @@ class Flock(BaseModel, Serializable):
568
606
  # Execute workflow
569
607
  if not self.enable_temporal:
570
608
  result = await run_local_workflow(
571
- run_context, box_result=False # Boxing handled below
609
+ run_context,
610
+ box_result=False, # Boxing handled below
572
611
  )
573
612
  else:
574
613
  result = await run_temporal_workflow(
575
- self, # Pass the Flock instance
614
+ self, # Pass the Flock instance
576
615
  run_context,
577
- box_result=False, # Boxing handled below
616
+ box_result=False, # Boxing handled below
578
617
  memo=memo,
579
618
  )
580
619
 
@@ -616,7 +655,6 @@ class Flock(BaseModel, Serializable):
616
655
  }
617
656
  return Box(error_output) if box_result else error_output
618
657
 
619
-
620
658
  # --- Batch Processing (Delegation) ---
621
659
  async def run_batch_async(
622
660
  self,
@@ -689,19 +727,18 @@ class Flock(BaseModel, Serializable):
689
727
  )
690
728
  )
691
729
 
692
-
693
730
  # --- Evaluation (Delegation) ---
694
731
  async def evaluate_async(
695
732
  self,
696
- dataset: str | Path | list[dict[str, Any]] | DataFrame | Dataset, # type: ignore
733
+ dataset: str | Path | list[dict[str, Any]] | DataFrame | Dataset, # type: ignore
697
734
  start_agent: FlockAgent | str,
698
735
  input_mapping: dict[str, str],
699
736
  answer_mapping: dict[str, str],
700
737
  metrics: list[
701
738
  str
702
739
  | Callable[[Any, Any], bool | float | dict[str, Any]]
703
- | FlockAgent # Type hint only
704
- | FlockEvaluator # Type hint only
740
+ | FlockAgent # Type hint only
741
+ | FlockEvaluator # Type hint only
705
742
  ],
706
743
  metric_configs: dict[str, dict[str, Any]] | None = None,
707
744
  static_inputs: dict[str, Any] | None = None,
@@ -713,7 +750,7 @@ class Flock(BaseModel, Serializable):
713
750
  return_dataframe: bool = True,
714
751
  silent_mode: bool = False,
715
752
  metadata_columns: list[str] | None = None,
716
- ) -> DataFrame | list[dict[str, Any]]: # type: ignore
753
+ ) -> DataFrame | list[dict[str, Any]]: # type: ignore
717
754
  """Evaluates the Flock's performance against a dataset (delegated)."""
718
755
  # Import processor locally
719
756
  from flock.core.execution.evaluation_executor import (
@@ -741,15 +778,15 @@ class Flock(BaseModel, Serializable):
741
778
 
742
779
  def evaluate(
743
780
  self,
744
- dataset: str | Path | list[dict[str, Any]] | DataFrame | Dataset, # type: ignore
781
+ dataset: str | Path | list[dict[str, Any]] | DataFrame | Dataset, # type: ignore
745
782
  start_agent: FlockAgent | str,
746
783
  input_mapping: dict[str, str],
747
784
  answer_mapping: dict[str, str],
748
785
  metrics: list[
749
786
  str
750
787
  | Callable[[Any, Any], bool | float | dict[str, Any]]
751
- | FlockAgent # Type hint only
752
- | FlockEvaluator # Type hint only
788
+ | FlockAgent # Type hint only
789
+ | FlockEvaluator # Type hint only
753
790
  ],
754
791
  metric_configs: dict[str, dict[str, Any]] | None = None,
755
792
  static_inputs: dict[str, Any] | None = None,
@@ -761,7 +798,7 @@ class Flock(BaseModel, Serializable):
761
798
  return_dataframe: bool = True,
762
799
  silent_mode: bool = False,
763
800
  metadata_columns: list[str] | None = None,
764
- ) -> DataFrame | list[dict[str, Any]]: # type: ignore
801
+ ) -> DataFrame | list[dict[str, Any]]: # type: ignore
765
802
  return self._run_sync(
766
803
  self.evaluate_async(
767
804
  dataset=dataset,
@@ -781,18 +818,22 @@ class Flock(BaseModel, Serializable):
781
818
  metadata_columns=metadata_columns,
782
819
  )
783
820
  )
821
+
784
822
  # --- Server & CLI Starters (Delegation) ---
785
823
  def start_api(
786
824
  self,
787
825
  host: str = "127.0.0.1",
788
826
  port: int = 8344,
789
827
  server_name: str = "Flock Server",
790
- create_ui: bool = True, # Default to True for the integrated experience
828
+ create_ui: bool = True, # Default to True for the integrated experience
791
829
  ui_theme: str | None = None,
792
- custom_endpoints: Sequence[FlockEndpoint] | dict[tuple[str, list[str] | None], Callable[..., Any]] | None = None,
830
+ custom_endpoints: Sequence[FlockEndpoint]
831
+ | dict[tuple[str, list[str] | None], Callable[..., Any]]
832
+ | None = None,
793
833
  ) -> None:
794
834
  """Starts a unified REST API server and/or Web UI for this Flock instance."""
795
835
  import warnings
836
+
796
837
  warnings.warn(
797
838
  "start_api() is deprecated and will be removed in a future release. "
798
839
  "Use serve() instead.",
@@ -825,7 +866,9 @@ class Flock(BaseModel, Serializable):
825
866
  chat_history_key: str = "history",
826
867
  chat_response_key: str = "response",
827
868
  ui_theme: str | None = None,
828
- custom_endpoints: Sequence[FlockEndpoint] | dict[tuple[str, list[str] | None], Callable[..., Any]] | None = None,
869
+ custom_endpoints: Sequence[FlockEndpoint]
870
+ | dict[tuple[str, list[str] | None], Callable[..., Any]]
871
+ | None = None,
829
872
  ) -> None:
830
873
  """Launch an HTTP server that exposes the core REST API and, optionally, the
831
874
  browser-based UI.
@@ -871,7 +914,9 @@ class Flock(BaseModel, Serializable):
871
914
 
872
915
  def start_cli(
873
916
  self,
874
- start_agent: FlockAgent | str | None = None, # Added start_agent to match method signature in file_26
917
+ start_agent: FlockAgent
918
+ | str
919
+ | None = None, # Added start_agent to match method signature in file_26
875
920
  server_name: str = "Flock CLI",
876
921
  show_results: bool = False,
877
922
  edit_mode: bool = False,
@@ -893,14 +938,13 @@ class Flock(BaseModel, Serializable):
893
938
  # If start_agent is crucial here, start_flock_cli needs to handle it.
894
939
  logger.info(f"Starting CLI for Flock '{self.name}'...")
895
940
  start_flock_cli(
896
- flock=self, # Pass the Flock instance
941
+ flock=self, # Pass the Flock instance
897
942
  # start_agent=start_agent, # This argument is not in the definition of start_flock_cli in file_50
898
943
  server_name=server_name,
899
944
  show_results=show_results,
900
- edit_mode=edit_mode
945
+ edit_mode=edit_mode,
901
946
  )
902
947
 
903
-
904
948
  # --- Serialization Delegation Methods ---
905
949
  def to_dict(self, path_type: str = "relative") -> dict[str, Any]:
906
950
  """Serialize Flock instance to dictionary using FlockSerializer."""
@@ -917,12 +961,14 @@ class Flock(BaseModel, Serializable):
917
961
 
918
962
  # --- Static Method Loader (Delegates to loader module) ---
919
963
  @staticmethod
920
- def load_from_file(file_path: str) -> Flock: # Ensure return type is Flock
964
+ def load_from_file(file_path: str) -> Flock: # Ensure return type is Flock
921
965
  """Load a Flock instance from various file formats (delegates to loader)."""
922
966
  from flock.core.util.loader import load_flock_from_file
923
967
 
924
968
  loaded_flock = load_flock_from_file(file_path)
925
969
  # Ensure the loaded object is indeed a Flock instance
926
970
  if not isinstance(loaded_flock, Flock):
927
- raise TypeError(f"Loaded object from {file_path} is not a Flock instance, but {type(loaded_flock)}")
971
+ raise TypeError(
972
+ f"Loaded object from {file_path} is not a Flock instance, but {type(loaded_flock)}"
973
+ )
928
974
  return loaded_flock
flock/core/flock_agent.py CHANGED
@@ -691,8 +691,8 @@ class FlockAgent(BaseModel, Serializable, DSPyIntegrationMixin, ABC):
691
691
 
692
692
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
693
693
  filename = f"{agent_name}_output_{timestamp}.json"
694
- filepath = os.path.join("output/", filename)
695
- os.makedirs("output/", exist_ok=True)
694
+ filepath = os.path.join(".flock/output/", filename)
695
+ os.makedirs(".flock/output/", exist_ok=True)
696
696
 
697
697
  output_data = {
698
698
  "agent": agent_name,
@@ -45,9 +45,16 @@ class FlockEvaluator(ABC, BaseModel):
45
45
  description="Evaluator configuration",
46
46
  )
47
47
 
48
+ def __init__(self, **data):
49
+ super().__init__(**data)
50
+
48
51
  @abstractmethod
49
52
  async def evaluate(
50
- self, agent: Any, inputs: dict[str, Any], tools: list[Any], mcp_tools: list[Any] | None = None
53
+ self,
54
+ agent: Any,
55
+ inputs: dict[str, Any],
56
+ tools: list[Any],
57
+ mcp_tools: list[Any] | None = None,
51
58
  ) -> dict[str, Any]:
52
59
  """Evaluate inputs to produce outputs."""
53
60
  pass
@@ -321,6 +321,8 @@ class FlockFactory:
321
321
  wait_for_input: bool = False,
322
322
  temperature: float = 0.0,
323
323
  max_tokens: int = 8192,
324
+ max_tool_calls: int = 10,
325
+ max_retries: int = 3,
324
326
  alert_latency_threshold_ms: int = 30000,
325
327
  no_output: bool = False,
326
328
  print_context: bool = False,
@@ -343,6 +345,8 @@ class FlockFactory:
343
345
  use_cache=use_cache,
344
346
  max_tokens=max_tokens,
345
347
  temperature=temperature,
348
+ max_tool_calls=max_tool_calls,
349
+ max_retries=max_retries,
346
350
  stream=stream,
347
351
  include_thought_process=include_thought_process,
348
352
  )
@@ -75,7 +75,9 @@ def _resolve_type_string(type_str: str) -> type:
75
75
  try:
76
76
  return tuple(ast.literal_eval(f"[{args_str}]"))
77
77
  except (SyntaxError, ValueError) as exc:
78
- raise ValueError(f"Cannot parse {args_str!r} as literals") from exc
78
+ raise ValueError(
79
+ f"Cannot parse {args_str!r} as literals"
80
+ ) from exc
79
81
 
80
82
  literal_args = parse_literal_args(args_str)
81
83
  logger.debug(
@@ -250,8 +252,7 @@ class DSPyIntegrationMixin:
250
252
  f"Failed to create dynamic type 'dspy_{agent_name}': {e}",
251
253
  exc_info=True,
252
254
  )
253
- raise TypeError(
254
- f"Could not create DSPy signature type: {e}") from e
255
+ raise TypeError(f"Could not create DSPy signature type: {e}") from e
255
256
 
256
257
  def _configure_language_model(
257
258
  self,
@@ -308,6 +309,7 @@ class DSPyIntegrationMixin:
308
309
  self,
309
310
  signature: Any,
310
311
  override_evaluator_type: AgentType,
312
+ max_tool_calls: int = 10,
311
313
  tools: list[Any] | None = None,
312
314
  mcp_tools: list[Any] | None = None,
313
315
  kwargs: dict[str, Any] = {},
@@ -370,7 +372,7 @@ class DSPyIntegrationMixin:
370
372
  dspy_program = dspy.ChainOfThought(signature, **kwargs)
371
373
  elif selected_type == "ReAct":
372
374
  if not kwargs:
373
- kwargs = {"max_iters": 10}
375
+ kwargs = {"max_iters": max_tool_calls}
374
376
  dspy_program = dspy.ReAct(
375
377
  signature, tools=merged_tools or [], **kwargs
376
378
  )
@@ -427,8 +429,7 @@ class DSPyIntegrationMixin:
427
429
  final_result = {**inputs, **output_dict}
428
430
 
429
431
  lm = dspy.settings.get("lm")
430
- cost = sum([x["cost"]
431
- for x in lm.history if x["cost"] is not None])
432
+ cost = sum([x["cost"] for x in lm.history if x["cost"] is not None])
432
433
  lm_history = lm.history
433
434
 
434
435
  return final_result, cost, lm_history