DeepFabric 4.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. deepfabric/__init__.py +70 -0
  2. deepfabric/__main__.py +6 -0
  3. deepfabric/auth.py +382 -0
  4. deepfabric/builders.py +303 -0
  5. deepfabric/builders_agent.py +1304 -0
  6. deepfabric/cli.py +1288 -0
  7. deepfabric/config.py +899 -0
  8. deepfabric/config_manager.py +251 -0
  9. deepfabric/constants.py +94 -0
  10. deepfabric/dataset_manager.py +534 -0
  11. deepfabric/error_codes.py +581 -0
  12. deepfabric/evaluation/__init__.py +47 -0
  13. deepfabric/evaluation/backends/__init__.py +32 -0
  14. deepfabric/evaluation/backends/ollama_backend.py +137 -0
  15. deepfabric/evaluation/backends/tool_call_parsers.py +409 -0
  16. deepfabric/evaluation/backends/transformers_backend.py +326 -0
  17. deepfabric/evaluation/evaluator.py +845 -0
  18. deepfabric/evaluation/evaluators/__init__.py +13 -0
  19. deepfabric/evaluation/evaluators/base.py +104 -0
  20. deepfabric/evaluation/evaluators/builtin/__init__.py +5 -0
  21. deepfabric/evaluation/evaluators/builtin/tool_calling.py +93 -0
  22. deepfabric/evaluation/evaluators/registry.py +66 -0
  23. deepfabric/evaluation/inference.py +155 -0
  24. deepfabric/evaluation/metrics.py +397 -0
  25. deepfabric/evaluation/parser.py +304 -0
  26. deepfabric/evaluation/reporters/__init__.py +13 -0
  27. deepfabric/evaluation/reporters/base.py +56 -0
  28. deepfabric/evaluation/reporters/cloud_reporter.py +195 -0
  29. deepfabric/evaluation/reporters/file_reporter.py +61 -0
  30. deepfabric/evaluation/reporters/multi_reporter.py +56 -0
  31. deepfabric/exceptions.py +67 -0
  32. deepfabric/factory.py +26 -0
  33. deepfabric/generator.py +1084 -0
  34. deepfabric/graph.py +545 -0
  35. deepfabric/hf_hub.py +214 -0
  36. deepfabric/kaggle_hub.py +219 -0
  37. deepfabric/llm/__init__.py +41 -0
  38. deepfabric/llm/api_key_verifier.py +534 -0
  39. deepfabric/llm/client.py +1206 -0
  40. deepfabric/llm/errors.py +105 -0
  41. deepfabric/llm/rate_limit_config.py +262 -0
  42. deepfabric/llm/rate_limit_detector.py +278 -0
  43. deepfabric/llm/retry_handler.py +270 -0
  44. deepfabric/metrics.py +212 -0
  45. deepfabric/progress.py +262 -0
  46. deepfabric/prompts.py +290 -0
  47. deepfabric/schemas.py +1000 -0
  48. deepfabric/spin/__init__.py +6 -0
  49. deepfabric/spin/client.py +263 -0
  50. deepfabric/spin/models.py +26 -0
  51. deepfabric/stream_simulator.py +90 -0
  52. deepfabric/tools/__init__.py +5 -0
  53. deepfabric/tools/defaults.py +85 -0
  54. deepfabric/tools/loader.py +87 -0
  55. deepfabric/tools/mcp_client.py +677 -0
  56. deepfabric/topic_manager.py +303 -0
  57. deepfabric/topic_model.py +20 -0
  58. deepfabric/training/__init__.py +35 -0
  59. deepfabric/training/api_key_prompt.py +302 -0
  60. deepfabric/training/callback.py +363 -0
  61. deepfabric/training/metrics_sender.py +301 -0
  62. deepfabric/tree.py +438 -0
  63. deepfabric/tui.py +1267 -0
  64. deepfabric/update_checker.py +166 -0
  65. deepfabric/utils.py +150 -0
  66. deepfabric/validation.py +143 -0
  67. deepfabric-4.4.0.dist-info/METADATA +702 -0
  68. deepfabric-4.4.0.dist-info/RECORD +71 -0
  69. deepfabric-4.4.0.dist-info/WHEEL +4 -0
  70. deepfabric-4.4.0.dist-info/entry_points.txt +2 -0
  71. deepfabric-4.4.0.dist-info/licenses/LICENSE +201 -0
deepfabric/builders.py ADDED
@@ -0,0 +1,303 @@
1
+ import logging
2
+
3
+ from abc import ABC, abstractmethod
4
+ from typing import TYPE_CHECKING, cast
5
+
6
+ from pydantic import BaseModel, Field
7
+
8
+ from .progress import ProgressReporter
9
+ from .schemas import ChatMessage, Conversation
10
+ from .stream_simulator import simulate_stream
11
+
12
+ if TYPE_CHECKING:
13
+ from .generator import DataSetGeneratorConfig
14
+ from .llm import LLMClient
15
+ from .schemas import ToolRegistry
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class ConversationBuilder(ABC):
21
+ """Abstract base class for conversation builders.
22
+
23
+ Each builder implements a specific strategy for generating conversations.
24
+ Builders receive typed configuration and dependencies via constructor.
25
+
26
+ Attributes:
27
+ llm: LLM client for generation
28
+ config: Typed configuration for the generator
29
+ tool_registry: Optional tool registry for tool-calling conversations
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ llm: "LLMClient",
35
+ config: "DataSetGeneratorConfig",
36
+ tool_registry: "ToolRegistry | None" = None,
37
+ progress_reporter: ProgressReporter | None = None,
38
+ ):
39
+ """Initialize the conversation builder.
40
+
41
+ Args:
42
+ llm: LLM client for making generation requests
43
+ config: Generator configuration (must be Pydantic model)
44
+ tool_registry: Optional tool registry for tool-calling
45
+ progress_reporter: Optional progress reporter for streaming feedback
46
+ """
47
+ self.llm = llm
48
+ self.config = config
49
+ self.tool_registry = tool_registry
50
+ self.progress_reporter = progress_reporter
51
+
52
+ @abstractmethod
53
+ async def generate(self, topic_prompt: str, error_feedback: str | None = None) -> Conversation:
54
+ """Generate a complete conversation.
55
+
56
+ Args:
57
+ topic_prompt: The topic/scenario prompt to generate conversation about
58
+ error_feedback: Optional error message from a previous failed attempt,
59
+ used to help the model correct its output on retry
60
+
61
+ Returns:
62
+ Complete Conversation object (Pydantic model)
63
+
64
+ Raises:
65
+ ValueError: If generation fails validation
66
+ """
67
+ pass
68
+
69
+
70
+ class BuilderType(BaseModel):
71
+ """Type discriminator for builder selection.
72
+
73
+ This model ensures type-safe builder selection based on configuration.
74
+ """
75
+
76
+ name: str = Field(description="Builder type name")
77
+ requires_tools: bool = Field(default=False, description="Whether this builder requires tools")
78
+
79
+ class Config:
80
+ frozen = True
81
+
82
+
83
+ # Builder type constants
84
+ SINGLE_SHOT_BUILDER = BuilderType(name="single_shot", requires_tools=False)
85
+ SINGLE_TURN_AGENT_BUILDER = BuilderType(name="single_turn_agent", requires_tools=True)
86
+ MULTI_TURN_AGENT_BUILDER = BuilderType(name="multi_turn_agent", requires_tools=True)
87
+
88
+
89
+ def determine_builder_type(config: "DataSetGeneratorConfig") -> BuilderType:
90
+ """Determine the appropriate builder type from configuration.
91
+
92
+ Args:
93
+ config: Generator configuration (Pydantic model)
94
+
95
+ Returns:
96
+ BuilderType indicating which builder to use
97
+
98
+ Raises:
99
+ ValueError: If configuration is invalid or unsupported
100
+ """
101
+ # Agent mode with tools requires specialized builder
102
+ if config.agent_mode:
103
+ # Check that tools are configured via tool_components or custom_tools
104
+ has_tools = config.tool_components or config.custom_tools
105
+ if not has_tools:
106
+ msg = "agent_mode requires tools to be configured via tool_components or custom_tools"
107
+ raise ValueError(msg)
108
+
109
+ if config.agent_mode == "multi_turn":
110
+ return MULTI_TURN_AGENT_BUILDER
111
+ if config.agent_mode == "single_turn":
112
+ return SINGLE_TURN_AGENT_BUILDER
113
+ msg = f"Unknown agent_mode: {config.agent_mode}"
114
+ raise ValueError(msg)
115
+
116
+ # Non-agent conversations use single-shot generation
117
+ if config.conversation_type in ("basic", "chain_of_thought"):
118
+ return SINGLE_SHOT_BUILDER
119
+
120
+ msg = f"Cannot determine builder type for conversation_type={config.conversation_type}"
121
+ raise ValueError(msg)
122
+
123
+
124
+ class SingleShotBuilder(ConversationBuilder):
125
+ """Builder for simple conversations using single-shot JSON generation.
126
+
127
+ This builder generates the entire conversation in one LLM call using
128
+ structured output with JSON schema validation. Suitable for:
129
+ - Basic Q&A conversations
130
+ - Chain-of-thought reasoning without tools
131
+ - Any conversation that can be generated in one pass
132
+ """
133
+
134
+ async def generate(self, topic_prompt: str, error_feedback: str | None = None) -> Conversation:
135
+ """Generate conversation using single LLM call with JSON schema.
136
+
137
+ Args:
138
+ topic_prompt: Topic or scenario to generate conversation about
139
+ error_feedback: Optional error message from a previous failed attempt
140
+
141
+ Returns:
142
+ Complete Conversation object
143
+
144
+ Raises:
145
+ ValueError: If LLM fails to generate valid conversation
146
+ """
147
+ # Build the generation prompt
148
+ generation_prompt = self._build_prompt(topic_prompt, error_feedback)
149
+
150
+ # Always use non-streaming for reliable structured output
151
+ conversation = await self._generate_non_streaming(generation_prompt)
152
+
153
+ # Fire-and-forget: simulate streaming for TUI preview (non-blocking)
154
+ simulate_stream(
155
+ self.progress_reporter,
156
+ conversation.model_dump_json(indent=2),
157
+ source="conversation_gen",
158
+ )
159
+
160
+ # Ensure type checker knows this is a Conversation
161
+ conversation = cast(Conversation, conversation)
162
+
163
+ # Validate that generated conversation starts with user message
164
+ # (system messages are added by builder, not generated by LLM)
165
+ if conversation.messages and conversation.messages[0].role != "user":
166
+ msg = (
167
+ f"Generated conversation must start with 'user' message, got '{conversation.messages[0].role}'. "
168
+ "System messages are added automatically by the builder."
169
+ )
170
+ raise ValueError(msg)
171
+
172
+ # Insert system message if configured
173
+ if self.config.sys_msg:
174
+ conversation.messages.insert(
175
+ 0,
176
+ ChatMessage(role="system", content=self.config.dataset_system_prompt or ""),
177
+ )
178
+
179
+ return conversation
180
+
181
+ async def _generate_non_streaming(self, prompt: str) -> Conversation:
182
+ """Generate conversation using non-streaming LLM call.
183
+
184
+ Args:
185
+ prompt: The complete generation prompt
186
+
187
+ Returns:
188
+ Generated Conversation object
189
+ """
190
+ return await self.llm.generate_async(
191
+ prompt=prompt,
192
+ schema=Conversation,
193
+ max_retries=self.config.max_retries,
194
+ max_tokens=self.config.max_tokens,
195
+ temperature=self.config.temperature,
196
+ )
197
+
198
+ def _build_prompt(self, topic_prompt: str, error_feedback: str | None = None) -> str:
199
+ """Build the generation prompt for single-shot generation.
200
+
201
+ Args:
202
+ topic_prompt: The topic to generate about
203
+ error_feedback: Optional error message from a previous failed attempt
204
+
205
+ Returns:
206
+ Complete prompt string for the LLM
207
+ """
208
+ # Use the generation system prompt as the base
209
+ prompt_parts = [self.config.generation_system_prompt]
210
+
211
+ # Add topic/scenario
212
+ prompt_parts.append(f"\nTopic/Scenario: {topic_prompt}")
213
+
214
+ # Add error feedback if this is a retry
215
+ if error_feedback:
216
+ prompt_parts.append(
217
+ f"\n\nRETRY: {error_feedback}. Use real values, not null/empty/placeholders."
218
+ )
219
+
220
+ # Add any additional instructions
221
+ if self.config.instructions:
222
+ prompt_parts.append(f"\nAdditional Instructions: {self.config.instructions}")
223
+
224
+ # Add reasoning-specific guidance based on style
225
+ if self.config.conversation_type == "chain_of_thought":
226
+ if self.config.reasoning_style == "freetext":
227
+ prompt_parts.append(
228
+ "\nREASONING FORMAT: Generate natural, conversational reasoning content (string format). "
229
+ "Show your actual thinking process - explore ideas, consider alternatives, work through the problem. "
230
+ "Think like a human would: 'Hmm, let me think about this...', 'Wait, that doesn't work...', "
231
+ "'Actually, if I approach it this way...'. "
232
+ "DO NOT use numbered steps or structured outlines. "
233
+ "Use the 'content' field in reasoning as a plain string (not a list)."
234
+ )
235
+ elif self.config.reasoning_style == "agent":
236
+ prompt_parts.append(
237
+ "\nREASONING FORMAT: Generate structured reasoning steps as a list of ReasoningStep objects. "
238
+ "Each step should have clear thought and action fields."
239
+ )
240
+
241
+ # Add explicit structure requirement
242
+ prompt_parts.append(
243
+ "\nIMPORTANT: Generate the conversation messages array starting with a 'user' message "
244
+ "(the user's question or request), followed by an 'assistant' message (the response). "
245
+ "Do NOT include any 'system' role messages - those are added separately."
246
+ )
247
+
248
+ return "\n".join(prompt_parts)
249
+
250
+
251
+ class ConversationBuilderFactory:
252
+ """Factory for creating conversation builders.
253
+
254
+ Provides type-safe builder instantiation based on configuration.
255
+ """
256
+
257
+ @staticmethod
258
+ def create(
259
+ config: "DataSetGeneratorConfig",
260
+ llm: "LLMClient",
261
+ tool_registry: "ToolRegistry | None" = None,
262
+ progress_reporter: ProgressReporter | None = None,
263
+ ) -> ConversationBuilder:
264
+ """Create the appropriate conversation builder.
265
+
266
+ Args:
267
+ config: Generator configuration (Pydantic model)
268
+ llm: LLM client for generation
269
+ tool_registry: Optional tool registry (required for agent builders)
270
+ progress_reporter: Optional progress reporter for streaming feedback
271
+
272
+ Returns:
273
+ Appropriate ConversationBuilder instance
274
+
275
+ Raises:
276
+ ValueError: If configuration is invalid or builder requirements not met
277
+ """
278
+ builder_type = determine_builder_type(config)
279
+
280
+ # Validate tool registry requirement
281
+ if builder_type.requires_tools and tool_registry is None:
282
+ msg = (
283
+ f"Builder type '{builder_type.name}' requires tool_registry but it was not provided"
284
+ )
285
+ raise ValueError(msg)
286
+
287
+ # Instantiate appropriate builder
288
+ if builder_type == SINGLE_SHOT_BUILDER:
289
+ return SingleShotBuilder(llm, config, progress_reporter=progress_reporter)
290
+ if builder_type == SINGLE_TURN_AGENT_BUILDER:
291
+ from .builders_agent import SingleTurnAgentBuilder # noqa: PLC0415
292
+
293
+ return SingleTurnAgentBuilder(
294
+ llm, config, cast("ToolRegistry", tool_registry), progress_reporter
295
+ )
296
+ if builder_type == MULTI_TURN_AGENT_BUILDER:
297
+ from .builders_agent import MultiTurnAgentBuilder # noqa: PLC0415
298
+
299
+ return MultiTurnAgentBuilder(
300
+ llm, config, cast("ToolRegistry", tool_registry), progress_reporter
301
+ )
302
+ msg = f"Unknown builder type: {builder_type.name}"
303
+ raise ValueError(msg)