DeepFabric 4.8.3__py3-none-any.whl → 4.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deepfabric/builders.py CHANGED
@@ -83,12 +83,14 @@ class BuilderType(BaseModel):
83
83
  # Builder type constants
84
84
  SINGLE_SHOT_BUILDER = BuilderType(name="single_shot", requires_tools=False)
85
85
  SINGLE_TURN_AGENT_BUILDER = BuilderType(name="single_turn_agent", requires_tools=True)
86
- MULTI_TURN_AGENT_BUILDER = BuilderType(name="multi_turn_agent", requires_tools=True)
87
86
 
88
87
 
89
88
  def determine_builder_type(config: "DataSetGeneratorConfig") -> BuilderType:
90
89
  """Determine the appropriate builder type from configuration.
91
90
 
91
+ Agent mode is implicit when tools are configured (tool_components or custom_tools).
92
+ Single-turn agent mode is used for tool-calling conversations.
93
+
92
94
  Args:
93
95
  config: Generator configuration (Pydantic model)
94
96
 
@@ -98,20 +100,10 @@ def determine_builder_type(config: "DataSetGeneratorConfig") -> BuilderType:
98
100
  Raises:
99
101
  ValueError: If configuration is invalid or unsupported
100
102
  """
101
- # Agent mode with tools requires specialized builder
102
- if config.agent_mode:
103
- # Check that tools are configured via tool_components or custom_tools
104
- has_tools = config.tool_components or config.custom_tools
105
- if not has_tools:
106
- msg = "agent_mode requires tools to be configured via tool_components or custom_tools"
107
- raise ValueError(msg)
108
-
109
- if config.agent_mode == "multi_turn":
110
- return MULTI_TURN_AGENT_BUILDER
111
- if config.agent_mode == "single_turn":
112
- return SINGLE_TURN_AGENT_BUILDER
113
- msg = f"Unknown agent_mode: {config.agent_mode}"
114
- raise ValueError(msg)
103
+ # Agent mode is implicit when tools are configured
104
+ has_tools = config.tool_components or config.custom_tools
105
+ if has_tools:
106
+ return SINGLE_TURN_AGENT_BUILDER
115
107
 
116
108
  # Non-agent conversations use single-shot generation
117
109
  if config.conversation_type in ("basic", "cot"):
@@ -293,11 +285,5 @@ class ConversationBuilderFactory:
293
285
  return SingleTurnAgentBuilder(
294
286
  llm, config, cast("ToolRegistry", tool_registry), progress_reporter
295
287
  )
296
- if builder_type == MULTI_TURN_AGENT_BUILDER:
297
- from .builders_agent import MultiTurnAgentBuilder # noqa: PLC0415
298
-
299
- return MultiTurnAgentBuilder(
300
- llm, config, cast("ToolRegistry", tool_registry), progress_reporter
301
- )
302
288
  msg = f"Unknown builder type: {builder_type.name}"
303
289
  raise ValueError(msg)
@@ -1,6 +1,5 @@
1
1
  import json
2
2
  import logging
3
- import random
4
3
  import uuid
5
4
 
6
5
  from typing import TYPE_CHECKING, Any
@@ -77,15 +76,6 @@ class UserQuestion(BaseModel):
77
76
  )
78
77
 
79
78
 
80
- class Scenario(BaseModel):
81
- """Multi-turn scenario description."""
82
-
83
- description: str = Field(
84
- description="Brief scenario description requiring multiple turns",
85
- min_length=20,
86
- )
87
-
88
-
89
79
  class AgentResponse(BaseModel):
90
80
  """Agent's response to user."""
91
81
 
@@ -101,54 +91,6 @@ class ToolOutput(BaseModel):
101
91
  result: str = Field(description="The tool's output/result", min_length=1)
102
92
 
103
93
 
104
- class ConclusionDecision(BaseModel):
105
- """Decision on whether to conclude conversation."""
106
-
107
- should_conclude: bool = Field(
108
- description="True if conversation task is complete, False if more turns needed"
109
- )
110
-
111
-
112
- class StepWithResults(BaseModel):
113
- """A ReAct step paired with its execution results.
114
-
115
- Preserves the step-by-step structure for proper conversation formatting.
116
- """
117
-
118
- step: AgentStep = Field(description="The original step with thought and pending tool calls")
119
- results: list[ToolExecution] = Field(
120
- default_factory=list, description="Tool execution results for this step"
121
- )
122
-
123
-
124
- class AgentTurnData(BaseModel):
125
- """Typed data for a single turn in an agent conversation.
126
-
127
- This model ensures type safety when building multi-turn conversations.
128
- Stores steps with their results to preserve ReAct structure.
129
- """
130
-
131
- user_message: ChatMessage = Field(description="User's message for this turn")
132
- steps_with_results: list[StepWithResults] = Field(
133
- description="ReAct steps with their execution results, preserving step-by-step order"
134
- )
135
- agent_response: ChatMessage = Field(description="Agent's final response for this turn")
136
-
137
- @property
138
- def reasoning_steps(self) -> list[ReasoningStep]:
139
- """Convert steps to ReasoningSteps for backward compatibility."""
140
- steps = [swr.step for swr in self.steps_with_results]
141
- return _convert_steps_to_reasoning(steps)
142
-
143
- @property
144
- def tool_calls(self) -> list[ToolExecution]:
145
- """Get all tool executions for backward compatibility."""
146
- result = []
147
- for swr in self.steps_with_results:
148
- result.extend(swr.results)
149
- return result
150
-
151
-
152
94
  class SingleTurnAgentBuilder(ConversationBuilder):
153
95
  """Builder for single-turn agent conversations with tool calling.
154
96
 
@@ -830,487 +772,3 @@ Remember: You have access to the tools listed above and have used them in this c
830
772
  0,
831
773
  ChatMessage(role="system", content=self.config.dataset_system_prompt or ""),
832
774
  )
833
-
834
-
835
- class MultiTurnAgentBuilder(SingleTurnAgentBuilder):
836
- """Builder for multi-turn agent conversations.
837
-
838
- Extends SingleTurnAgentBuilder to generate conversations with multiple
839
- user-agent interaction turns. Each turn can involve different tools
840
- and builds on previous context.
841
- """
842
-
843
- async def generate(
844
- self,
845
- topic_prompt: str,
846
- error_feedback: str | None = None, # noqa: ARG002
847
- ) -> Conversation:
848
- """Generate multi-turn agent conversation using ReAct loop.
849
-
850
- Args:
851
- topic_prompt: Topic or scenario to generate conversation about
852
- error_feedback: Unused, kept for interface consistency with ConversationBuilder
853
-
854
- Returns:
855
- Complete multi-turn Conversation
856
-
857
- Raises:
858
- ValueError: If generation fails or config is invalid
859
- """
860
- try:
861
- # Initialize Spin session if configured
862
- await self._ensure_spin_session()
863
-
864
- # Determine number of turns (from config range)
865
- num_turns = random.randint(self.config.min_turns, self.config.max_turns) # noqa: S311 # nosec
866
-
867
- # Track conversation context
868
- turns: list[AgentTurnData] = []
869
- all_messages: list[ChatMessage] = []
870
-
871
- # Reset duplicate tracking for this conversation
872
- self._seen_tool_signatures.clear()
873
-
874
- # Generate scenario overview
875
- scenario = await self._generate_scenario(topic_prompt, num_turns)
876
-
877
- for turn_idx in range(num_turns):
878
- # Generate this turn using ReAct loop
879
- turn_data = await self._generate_turn(turn_idx, scenario, all_messages)
880
- turns.append(turn_data)
881
-
882
- # Accumulate messages for context
883
- all_messages.extend(
884
- [
885
- turn_data.user_message,
886
- turn_data.agent_response,
887
- ]
888
- )
889
-
890
- # Count total tool calls so far
891
- total_tool_calls = sum(len(t.tool_calls) for t in turns)
892
-
893
- # Check if we should conclude early
894
- if turn_idx >= self.config.min_turns - 1 and await self._should_conclude_early(
895
- all_messages, scenario, turn_idx + 1, total_tool_calls
896
- ):
897
- break
898
-
899
- # Assemble into complete conversation
900
- return self._build_multi_turn_conversation(turns, scenario, topic_prompt)
901
- finally:
902
- # Always cleanup Spin session
903
- await self._cleanup_spin_session()
904
-
905
- async def _generate_scenario(self, topic_prompt: str, num_turns: int) -> str:
906
- """Generate a multi-turn scenario description.
907
-
908
- Args:
909
- topic_prompt: Original topic
910
- num_turns: Number of turns to plan for
911
-
912
- Returns:
913
- Scenario description that requires multiple interactions
914
- """
915
- tools_info = self._format_tools_for_prompt()
916
-
917
- prompt = (
918
- f"Generate a realistic scenario for this topic that requires {num_turns} user-agent interaction turns:\n"
919
- f"{topic_prompt}\n\n"
920
- f"Available tools:\n"
921
- f"{tools_info}\n\n"
922
- f"The scenario MUST:\n"
923
- f"- Require at least {num_turns} distinct tool calls across different turns\n"
924
- f"- Have tool dependencies (e.g., read before modify, search before create, fetch before analyze)\n"
925
- f"- Build progressively - each turn depends on results from previous turns\n"
926
- f"- NOT be completable in a single turn\n\n"
927
- f"Example structure for a {num_turns}-turn scenario:\n"
928
- f"- Turn 1: User asks to find/read/search something\n"
929
- f"- Turn 2: User asks to modify/create based on what was found\n"
930
- f"- Turn 3+: User asks to verify, take action, or build further on previous results\n\n"
931
- f"Keep it brief (2-3 sentences) but ensure multi-step complexity with clear tool dependencies."
932
- )
933
-
934
- # Always use non-streaming for reliable structured output
935
- response = await self.llm.generate_async(
936
- prompt=prompt,
937
- schema=Scenario,
938
- max_tokens=self.config.max_tokens,
939
- temperature=self.config.temperature,
940
- )
941
-
942
- # Fire-and-forget: simulate streaming for TUI preview (non-blocking)
943
- simulate_stream(
944
- self.progress_reporter,
945
- response.model_dump_json(),
946
- source="scenario_gen",
947
- )
948
-
949
- return response.description
950
-
951
- async def _generate_turn(
952
- self,
953
- turn_idx: int,
954
- scenario: str,
955
- previous_messages: list[ChatMessage],
956
- ) -> AgentTurnData:
957
- """Generate a single turn of the conversation using ReAct loop.
958
-
959
- Args:
960
- turn_idx: Index of this turn (0-based)
961
- scenario: Overall scenario description
962
- previous_messages: Messages from previous turns
963
-
964
- Returns:
965
- Complete turn data with step-by-step structure preserved
966
- """
967
- # Build context from previous messages
968
- context_text = self._format_message_context(previous_messages)
969
-
970
- # Generate user message for this turn
971
- user_message = await self._generate_turn_user_message(turn_idx, scenario, context_text)
972
-
973
- # ReAct loop for this turn - preserve step structure
974
- steps_with_results: list[StepWithResults] = []
975
- all_steps: list[AgentStep] = [] # For passing to next step generation
976
- all_tool_results: list[ToolExecution] = [] # For passing to next step generation
977
- max_steps = getattr(self.config, "max_agent_steps", 5)
978
-
979
- for step_num in range(max_steps):
980
- if self.progress_reporter:
981
- self.progress_reporter.emit_step_start(
982
- f"Turn {turn_idx + 1}, ReAct step {step_num + 1}/{max_steps}"
983
- )
984
-
985
- # Generate next step based on observations so far
986
- step = await self._generate_next_step_with_context(
987
- user_message,
988
- all_steps,
989
- all_tool_results,
990
- context_text,
991
- )
992
- all_steps.append(step)
993
-
994
- # Check if agent is done
995
- if step.is_final or not step.tool_calls:
996
- # Add final step with no results
997
- steps_with_results.append(StepWithResults(step=step, results=[]))
998
- if self.progress_reporter:
999
- self.progress_reporter.emit_step_complete(
1000
- f"Turn {turn_idx + 1}: Agent concluded after {step_num + 1} steps"
1001
- )
1002
- break
1003
-
1004
- # Execute THIS step's tools via Spin
1005
- step_results = await self._execute_step_tools(step.tool_calls)
1006
-
1007
- # Store step with its results
1008
- steps_with_results.append(StepWithResults(step=step, results=step_results))
1009
- all_tool_results.extend(step_results)
1010
-
1011
- if self.progress_reporter:
1012
- self.progress_reporter.emit_step_complete(
1013
- f"Turn {turn_idx + 1}: Executed {len(step.tool_calls)} tool(s)"
1014
- )
1015
-
1016
- # Generate agent response based on all observations
1017
- agent_response = await self._generate_agent_conclusion(
1018
- user_message, all_steps, all_tool_results, context=context_text
1019
- )
1020
-
1021
- return AgentTurnData(
1022
- user_message=user_message,
1023
- steps_with_results=steps_with_results,
1024
- agent_response=agent_response,
1025
- )
1026
-
1027
- async def _generate_next_step_with_context(
1028
- self,
1029
- user_message: ChatMessage,
1030
- previous_steps: list[AgentStep],
1031
- previous_results: list[ToolExecution],
1032
- conversation_context: str,
1033
- ) -> AgentStep:
1034
- """Generate the next ReAct step with conversation context.
1035
-
1036
- Similar to _generate_next_step but includes multi-turn conversation context.
1037
- """
1038
- tools_info = self._format_tools_for_prompt()
1039
- history = self._format_step_history(previous_steps, previous_results)
1040
-
1041
- prompt_parts = [
1042
- "## Conversation Context",
1043
- conversation_context if conversation_context else "(No previous conversation)",
1044
- "",
1045
- "## Current User Request",
1046
- user_message.content or "",
1047
- "",
1048
- "## Available Tools",
1049
- tools_info,
1050
- "",
1051
- "## Previous Actions & Results (this turn)",
1052
- history if history else "None yet - this is your first action for this turn.",
1053
- "",
1054
- "## Instructions",
1055
- "Based on the conversation context and what you've observed so far, decide your next action:",
1056
- "- If you need more information, specify tool_calls for THIS step only",
1057
- "- If you have enough information to answer, set is_final=true and leave tool_calls empty",
1058
- "- IMPORTANT: Do NOT call write/modify operations until you've confirmed current state via read operations",
1059
- "- Tool arguments must use concrete values (no placeholders like '<user_input>' or null)",
1060
- "",
1061
- "What is your next step?",
1062
- ]
1063
-
1064
- prompt = "\n".join(prompt_parts)
1065
-
1066
- # Always use non-streaming for reliable structured output
1067
- response = await self.llm.generate_async(
1068
- prompt=prompt,
1069
- schema=AgentStep,
1070
- max_tokens=self.config.max_tokens,
1071
- temperature=self.config.temperature,
1072
- )
1073
-
1074
- # Fire-and-forget: simulate streaming for TUI preview (non-blocking)
1075
- simulate_stream(
1076
- self.progress_reporter,
1077
- response.model_dump_json(),
1078
- source="agent_step_mt",
1079
- )
1080
-
1081
- return response
1082
-
1083
- async def _generate_turn_user_message(
1084
- self,
1085
- turn_idx: int,
1086
- scenario: str,
1087
- context: str,
1088
- ) -> ChatMessage:
1089
- """Generate user message for a specific turn.
1090
-
1091
- Args:
1092
- turn_idx: Turn index
1093
- scenario: Overall scenario
1094
- context: Previous conversation context
1095
-
1096
- Returns:
1097
- User message for this turn
1098
- """
1099
- turn_guidance = {
1100
- 0: "Start with the initial request or question",
1101
- 1: "Request a follow-up action or ask for more information",
1102
- 2: "Request another related action or verify results",
1103
- 3: "Final request or verification",
1104
- }
1105
-
1106
- guidance = turn_guidance.get(turn_idx, "Continue the conversation naturally")
1107
-
1108
- prompt = f"""Scenario: {scenario}
1109
-
1110
- Previous conversation:
1111
- {context if context else "(No previous conversation)"}
1112
-
1113
- Generate the user's message for turn {turn_idx + 1}.
1114
- Guidance: {guidance}
1115
-
1116
- The message should reference or build upon previous conversation if applicable.
1117
- Keep it concise and natural."""
1118
-
1119
- # Always use non-streaming for reliable structured output
1120
- response = await self.llm.generate_async(
1121
- prompt=prompt,
1122
- schema=UserQuestion,
1123
- max_tokens=self.config.max_tokens,
1124
- temperature=self.config.temperature,
1125
- )
1126
-
1127
- # Fire-and-forget: simulate streaming for TUI preview (non-blocking)
1128
- simulate_stream(
1129
- self.progress_reporter,
1130
- response.model_dump_json(),
1131
- source=f"turn_{turn_idx}_user",
1132
- turn=turn_idx + 1,
1133
- )
1134
-
1135
- return ChatMessage(role="user", content=response.content)
1136
-
1137
- async def _should_conclude_early(
1138
- self, messages: list[ChatMessage], scenario: str, current_turn: int, total_tool_calls: int
1139
- ) -> bool:
1140
- """Determine if conversation should conclude before max_turns.
1141
-
1142
- Args:
1143
- messages: All messages so far
1144
- scenario: Original scenario
1145
- current_turn: Current turn number
1146
- total_tool_calls: Total number of tool calls made so far
1147
-
1148
- Returns:
1149
- True if conversation should end
1150
- """
1151
- # Don't conclude early if we haven't met the minimum tool calls requirement
1152
- if total_tool_calls < self.config.min_tool_calls:
1153
- return False
1154
-
1155
- # Format conversation so far
1156
- conversation_text = self._format_message_context(messages)
1157
-
1158
- prompt = f"""Scenario: {scenario}
1159
-
1160
- Conversation so far (after {current_turn} turns):
1161
- {conversation_text}
1162
-
1163
- Is the user's original task/goal from the scenario fully completed?
1164
- - True: Task is complete, conversation can end naturally
1165
- - False: Task incomplete, more turns needed"""
1166
-
1167
- response = await self.llm.generate_async(
1168
- prompt=prompt,
1169
- schema=ConclusionDecision,
1170
- max_tokens=100,
1171
- temperature=0.3,
1172
- )
1173
-
1174
- return response.should_conclude
1175
-
1176
- def _format_message_context(self, messages: list[ChatMessage]) -> str:
1177
- """Format messages as readable context.
1178
-
1179
- Args:
1180
- messages: List of chat messages
1181
-
1182
- Returns:
1183
- Formatted string of messages
1184
- """
1185
- if not messages:
1186
- return ""
1187
-
1188
- lines = []
1189
- for msg in messages:
1190
- lines.append(f"{msg.role}: {msg.content}")
1191
-
1192
- return "\n".join(lines)
1193
-
1194
- def _build_multi_turn_conversation(
1195
- self, turns: list[AgentTurnData], scenario: str, topic_prompt: str = ""
1196
- ) -> Conversation:
1197
- """Assemble multi-turn conversation from turn data.
1198
-
1199
- Preserves ReAct step-by-step structure: each step's tool calls become
1200
- a separate assistant message followed by tool responses. This ensures
1201
- training data shows the agent making decisions AFTER observing results.
1202
-
1203
- Args:
1204
- turns: List of turn data
1205
- scenario: Scenario description
1206
- topic_prompt: Topic used to generate this conversation (for metadata)
1207
-
1208
- Returns:
1209
- Complete Conversation object
1210
- """
1211
- messages = []
1212
-
1213
- # Don't add system message for agent mode - it interferes with tool calling
1214
- # The system prompt teaches models to explain tool usage instead of executing tools
1215
- # For tool calling, the tool definitions themselves serve as instructions
1216
-
1217
- # Collect all reasoning steps and tool executions
1218
- all_reasoning: list[ReasoningStep] = []
1219
- all_executions: list[ToolExecution] = []
1220
-
1221
- # Add messages from each turn in correct order:
1222
- # For each turn: user -> [step: assistant(tool_calls) -> tool(responses)]* -> assistant(final)
1223
- for turn in turns:
1224
- # User message
1225
- messages.append(turn.user_message)
1226
-
1227
- # Process each ReAct step separately to preserve step-by-step structure
1228
- # This is critical: agent should see results from step N before deciding step N+1
1229
- for step_with_results in turn.steps_with_results:
1230
- step = step_with_results.step
1231
- step_results = step_with_results.results
1232
-
1233
- # Skip steps with no tool calls (e.g., final "is_final=true" step)
1234
- if not step.tool_calls:
1235
- continue
1236
-
1237
- # Build tool_calls for THIS step only
1238
- step_tool_calls: list[ToolCall] = []
1239
- step_tool_call_ids: list[str] = []
1240
- for result in step_results:
1241
- tool_call_id = generate_tool_call_id()
1242
- step_tool_call_ids.append(tool_call_id)
1243
- step_tool_calls.append(result.to_tool_call(tool_call_id))
1244
-
1245
- # Assistant message with tool_calls for this step
1246
- messages.append(
1247
- ChatMessage(
1248
- role="assistant",
1249
- content="",
1250
- tool_calls=step_tool_calls,
1251
- )
1252
- )
1253
-
1254
- # Tool response messages for this step
1255
- for idx, result in enumerate(step_results):
1256
- messages.append(
1257
- ChatMessage(
1258
- role="tool",
1259
- content=result.result,
1260
- tool_call_id=step_tool_call_ids[idx],
1261
- )
1262
- )
1263
-
1264
- # Accumulate executions for this step
1265
- all_executions.extend(step_results)
1266
-
1267
- # Final assistant response with the answer for this turn
1268
- messages.append(turn.agent_response)
1269
-
1270
- # Accumulate reasoning across all turns
1271
- all_reasoning.extend(turn.reasoning_steps)
1272
-
1273
- # Build tool context (executions only - tools are in top-level 'tools' field)
1274
- tool_context = ToolContext(
1275
- executions=all_executions,
1276
- )
1277
-
1278
- # Build reasoning trace
1279
- reasoning_trace = ReasoningTrace(
1280
- style=self.config.reasoning_style or "agent", # type: ignore
1281
- content=all_reasoning,
1282
- )
1283
-
1284
- # Build agent context
1285
- agent_context = AgentContext(
1286
- mode="multi_turn",
1287
- planning_trace=scenario,
1288
- execution_summary=f"Completed {len(turns)}-turn conversation",
1289
- )
1290
-
1291
- # Build metadata
1292
- metadata = {
1293
- "conversation_type": "cot" if reasoning_trace else "basic",
1294
- "topic": topic_prompt if topic_prompt else "general",
1295
- }
1296
-
1297
- # Insert system message if configured
1298
- self._insert_system_message_if_configured(messages)
1299
-
1300
- # Convert tools to OpenAI format, filtering based on inclusion strategy
1301
- if self.config.tool_inclusion_strategy == "used_only" and all_executions:
1302
- used_names = {te.function_name for te in all_executions}
1303
- tools_openai = [
1304
- tool.to_openai() for tool in self.tool_registry.tools if tool.name in used_names
1305
- ]
1306
- else:
1307
- tools_openai = [tool.to_openai() for tool in self.tool_registry.tools]
1308
-
1309
- return Conversation(
1310
- messages=messages,
1311
- reasoning=reasoning_trace,
1312
- tool_context=tool_context,
1313
- tools=tools_openai,
1314
- agent_context=agent_context,
1315
- metadata=metadata,
1316
- )