DeepFabric 4.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepfabric/__init__.py +70 -0
- deepfabric/__main__.py +6 -0
- deepfabric/auth.py +382 -0
- deepfabric/builders.py +303 -0
- deepfabric/builders_agent.py +1304 -0
- deepfabric/cli.py +1288 -0
- deepfabric/config.py +899 -0
- deepfabric/config_manager.py +251 -0
- deepfabric/constants.py +94 -0
- deepfabric/dataset_manager.py +534 -0
- deepfabric/error_codes.py +581 -0
- deepfabric/evaluation/__init__.py +47 -0
- deepfabric/evaluation/backends/__init__.py +32 -0
- deepfabric/evaluation/backends/ollama_backend.py +137 -0
- deepfabric/evaluation/backends/tool_call_parsers.py +409 -0
- deepfabric/evaluation/backends/transformers_backend.py +326 -0
- deepfabric/evaluation/evaluator.py +845 -0
- deepfabric/evaluation/evaluators/__init__.py +13 -0
- deepfabric/evaluation/evaluators/base.py +104 -0
- deepfabric/evaluation/evaluators/builtin/__init__.py +5 -0
- deepfabric/evaluation/evaluators/builtin/tool_calling.py +93 -0
- deepfabric/evaluation/evaluators/registry.py +66 -0
- deepfabric/evaluation/inference.py +155 -0
- deepfabric/evaluation/metrics.py +397 -0
- deepfabric/evaluation/parser.py +304 -0
- deepfabric/evaluation/reporters/__init__.py +13 -0
- deepfabric/evaluation/reporters/base.py +56 -0
- deepfabric/evaluation/reporters/cloud_reporter.py +195 -0
- deepfabric/evaluation/reporters/file_reporter.py +61 -0
- deepfabric/evaluation/reporters/multi_reporter.py +56 -0
- deepfabric/exceptions.py +67 -0
- deepfabric/factory.py +26 -0
- deepfabric/generator.py +1084 -0
- deepfabric/graph.py +545 -0
- deepfabric/hf_hub.py +214 -0
- deepfabric/kaggle_hub.py +219 -0
- deepfabric/llm/__init__.py +41 -0
- deepfabric/llm/api_key_verifier.py +534 -0
- deepfabric/llm/client.py +1206 -0
- deepfabric/llm/errors.py +105 -0
- deepfabric/llm/rate_limit_config.py +262 -0
- deepfabric/llm/rate_limit_detector.py +278 -0
- deepfabric/llm/retry_handler.py +270 -0
- deepfabric/metrics.py +212 -0
- deepfabric/progress.py +262 -0
- deepfabric/prompts.py +290 -0
- deepfabric/schemas.py +1000 -0
- deepfabric/spin/__init__.py +6 -0
- deepfabric/spin/client.py +263 -0
- deepfabric/spin/models.py +26 -0
- deepfabric/stream_simulator.py +90 -0
- deepfabric/tools/__init__.py +5 -0
- deepfabric/tools/defaults.py +85 -0
- deepfabric/tools/loader.py +87 -0
- deepfabric/tools/mcp_client.py +677 -0
- deepfabric/topic_manager.py +303 -0
- deepfabric/topic_model.py +20 -0
- deepfabric/training/__init__.py +35 -0
- deepfabric/training/api_key_prompt.py +302 -0
- deepfabric/training/callback.py +363 -0
- deepfabric/training/metrics_sender.py +301 -0
- deepfabric/tree.py +438 -0
- deepfabric/tui.py +1267 -0
- deepfabric/update_checker.py +166 -0
- deepfabric/utils.py +150 -0
- deepfabric/validation.py +143 -0
- deepfabric-4.4.0.dist-info/METADATA +702 -0
- deepfabric-4.4.0.dist-info/RECORD +71 -0
- deepfabric-4.4.0.dist-info/WHEEL +4 -0
- deepfabric-4.4.0.dist-info/entry_points.txt +2 -0
- deepfabric-4.4.0.dist-info/licenses/LICENSE +201 -0
deepfabric/builders.py
ADDED
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
|
|
3
|
+
from abc import ABC, abstractmethod
|
|
4
|
+
from typing import TYPE_CHECKING, cast
|
|
5
|
+
|
|
6
|
+
from pydantic import BaseModel, Field
|
|
7
|
+
|
|
8
|
+
from .progress import ProgressReporter
|
|
9
|
+
from .schemas import ChatMessage, Conversation
|
|
10
|
+
from .stream_simulator import simulate_stream
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from .generator import DataSetGeneratorConfig
|
|
14
|
+
from .llm import LLMClient
|
|
15
|
+
from .schemas import ToolRegistry
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ConversationBuilder(ABC):
|
|
21
|
+
"""Abstract base class for conversation builders.
|
|
22
|
+
|
|
23
|
+
Each builder implements a specific strategy for generating conversations.
|
|
24
|
+
Builders receive typed configuration and dependencies via constructor.
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
llm: LLM client for generation
|
|
28
|
+
config: Typed configuration for the generator
|
|
29
|
+
tool_registry: Optional tool registry for tool-calling conversations
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
llm: "LLMClient",
|
|
35
|
+
config: "DataSetGeneratorConfig",
|
|
36
|
+
tool_registry: "ToolRegistry | None" = None,
|
|
37
|
+
progress_reporter: ProgressReporter | None = None,
|
|
38
|
+
):
|
|
39
|
+
"""Initialize the conversation builder.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
llm: LLM client for making generation requests
|
|
43
|
+
config: Generator configuration (must be Pydantic model)
|
|
44
|
+
tool_registry: Optional tool registry for tool-calling
|
|
45
|
+
progress_reporter: Optional progress reporter for streaming feedback
|
|
46
|
+
"""
|
|
47
|
+
self.llm = llm
|
|
48
|
+
self.config = config
|
|
49
|
+
self.tool_registry = tool_registry
|
|
50
|
+
self.progress_reporter = progress_reporter
|
|
51
|
+
|
|
52
|
+
@abstractmethod
|
|
53
|
+
async def generate(self, topic_prompt: str, error_feedback: str | None = None) -> Conversation:
|
|
54
|
+
"""Generate a complete conversation.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
topic_prompt: The topic/scenario prompt to generate conversation about
|
|
58
|
+
error_feedback: Optional error message from a previous failed attempt,
|
|
59
|
+
used to help the model correct its output on retry
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Complete Conversation object (Pydantic model)
|
|
63
|
+
|
|
64
|
+
Raises:
|
|
65
|
+
ValueError: If generation fails validation
|
|
66
|
+
"""
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class BuilderType(BaseModel):
|
|
71
|
+
"""Type discriminator for builder selection.
|
|
72
|
+
|
|
73
|
+
This model ensures type-safe builder selection based on configuration.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
name: str = Field(description="Builder type name")
|
|
77
|
+
requires_tools: bool = Field(default=False, description="Whether this builder requires tools")
|
|
78
|
+
|
|
79
|
+
class Config:
|
|
80
|
+
frozen = True
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# Builder type constants
|
|
84
|
+
SINGLE_SHOT_BUILDER = BuilderType(name="single_shot", requires_tools=False)
|
|
85
|
+
SINGLE_TURN_AGENT_BUILDER = BuilderType(name="single_turn_agent", requires_tools=True)
|
|
86
|
+
MULTI_TURN_AGENT_BUILDER = BuilderType(name="multi_turn_agent", requires_tools=True)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def determine_builder_type(config: "DataSetGeneratorConfig") -> BuilderType:
|
|
90
|
+
"""Determine the appropriate builder type from configuration.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
config: Generator configuration (Pydantic model)
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
BuilderType indicating which builder to use
|
|
97
|
+
|
|
98
|
+
Raises:
|
|
99
|
+
ValueError: If configuration is invalid or unsupported
|
|
100
|
+
"""
|
|
101
|
+
# Agent mode with tools requires specialized builder
|
|
102
|
+
if config.agent_mode:
|
|
103
|
+
# Check that tools are configured via tool_components or custom_tools
|
|
104
|
+
has_tools = config.tool_components or config.custom_tools
|
|
105
|
+
if not has_tools:
|
|
106
|
+
msg = "agent_mode requires tools to be configured via tool_components or custom_tools"
|
|
107
|
+
raise ValueError(msg)
|
|
108
|
+
|
|
109
|
+
if config.agent_mode == "multi_turn":
|
|
110
|
+
return MULTI_TURN_AGENT_BUILDER
|
|
111
|
+
if config.agent_mode == "single_turn":
|
|
112
|
+
return SINGLE_TURN_AGENT_BUILDER
|
|
113
|
+
msg = f"Unknown agent_mode: {config.agent_mode}"
|
|
114
|
+
raise ValueError(msg)
|
|
115
|
+
|
|
116
|
+
# Non-agent conversations use single-shot generation
|
|
117
|
+
if config.conversation_type in ("basic", "chain_of_thought"):
|
|
118
|
+
return SINGLE_SHOT_BUILDER
|
|
119
|
+
|
|
120
|
+
msg = f"Cannot determine builder type for conversation_type={config.conversation_type}"
|
|
121
|
+
raise ValueError(msg)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class SingleShotBuilder(ConversationBuilder):
|
|
125
|
+
"""Builder for simple conversations using single-shot JSON generation.
|
|
126
|
+
|
|
127
|
+
This builder generates the entire conversation in one LLM call using
|
|
128
|
+
structured output with JSON schema validation. Suitable for:
|
|
129
|
+
- Basic Q&A conversations
|
|
130
|
+
- Chain-of-thought reasoning without tools
|
|
131
|
+
- Any conversation that can be generated in one pass
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
async def generate(self, topic_prompt: str, error_feedback: str | None = None) -> Conversation:
|
|
135
|
+
"""Generate conversation using single LLM call with JSON schema.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
topic_prompt: Topic or scenario to generate conversation about
|
|
139
|
+
error_feedback: Optional error message from a previous failed attempt
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
Complete Conversation object
|
|
143
|
+
|
|
144
|
+
Raises:
|
|
145
|
+
ValueError: If LLM fails to generate valid conversation
|
|
146
|
+
"""
|
|
147
|
+
# Build the generation prompt
|
|
148
|
+
generation_prompt = self._build_prompt(topic_prompt, error_feedback)
|
|
149
|
+
|
|
150
|
+
# Always use non-streaming for reliable structured output
|
|
151
|
+
conversation = await self._generate_non_streaming(generation_prompt)
|
|
152
|
+
|
|
153
|
+
# Fire-and-forget: simulate streaming for TUI preview (non-blocking)
|
|
154
|
+
simulate_stream(
|
|
155
|
+
self.progress_reporter,
|
|
156
|
+
conversation.model_dump_json(indent=2),
|
|
157
|
+
source="conversation_gen",
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Ensure type checker knows this is a Conversation
|
|
161
|
+
conversation = cast(Conversation, conversation)
|
|
162
|
+
|
|
163
|
+
# Validate that generated conversation starts with user message
|
|
164
|
+
# (system messages are added by builder, not generated by LLM)
|
|
165
|
+
if conversation.messages and conversation.messages[0].role != "user":
|
|
166
|
+
msg = (
|
|
167
|
+
f"Generated conversation must start with 'user' message, got '{conversation.messages[0].role}'. "
|
|
168
|
+
"System messages are added automatically by the builder."
|
|
169
|
+
)
|
|
170
|
+
raise ValueError(msg)
|
|
171
|
+
|
|
172
|
+
# Insert system message if configured
|
|
173
|
+
if self.config.sys_msg:
|
|
174
|
+
conversation.messages.insert(
|
|
175
|
+
0,
|
|
176
|
+
ChatMessage(role="system", content=self.config.dataset_system_prompt or ""),
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
return conversation
|
|
180
|
+
|
|
181
|
+
async def _generate_non_streaming(self, prompt: str) -> Conversation:
|
|
182
|
+
"""Generate conversation using non-streaming LLM call.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
prompt: The complete generation prompt
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Generated Conversation object
|
|
189
|
+
"""
|
|
190
|
+
return await self.llm.generate_async(
|
|
191
|
+
prompt=prompt,
|
|
192
|
+
schema=Conversation,
|
|
193
|
+
max_retries=self.config.max_retries,
|
|
194
|
+
max_tokens=self.config.max_tokens,
|
|
195
|
+
temperature=self.config.temperature,
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
def _build_prompt(self, topic_prompt: str, error_feedback: str | None = None) -> str:
|
|
199
|
+
"""Build the generation prompt for single-shot generation.
|
|
200
|
+
|
|
201
|
+
Args:
|
|
202
|
+
topic_prompt: The topic to generate about
|
|
203
|
+
error_feedback: Optional error message from a previous failed attempt
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
Complete prompt string for the LLM
|
|
207
|
+
"""
|
|
208
|
+
# Use the generation system prompt as the base
|
|
209
|
+
prompt_parts = [self.config.generation_system_prompt]
|
|
210
|
+
|
|
211
|
+
# Add topic/scenario
|
|
212
|
+
prompt_parts.append(f"\nTopic/Scenario: {topic_prompt}")
|
|
213
|
+
|
|
214
|
+
# Add error feedback if this is a retry
|
|
215
|
+
if error_feedback:
|
|
216
|
+
prompt_parts.append(
|
|
217
|
+
f"\n\nRETRY: {error_feedback}. Use real values, not null/empty/placeholders."
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Add any additional instructions
|
|
221
|
+
if self.config.instructions:
|
|
222
|
+
prompt_parts.append(f"\nAdditional Instructions: {self.config.instructions}")
|
|
223
|
+
|
|
224
|
+
# Add reasoning-specific guidance based on style
|
|
225
|
+
if self.config.conversation_type == "chain_of_thought":
|
|
226
|
+
if self.config.reasoning_style == "freetext":
|
|
227
|
+
prompt_parts.append(
|
|
228
|
+
"\nREASONING FORMAT: Generate natural, conversational reasoning content (string format). "
|
|
229
|
+
"Show your actual thinking process - explore ideas, consider alternatives, work through the problem. "
|
|
230
|
+
"Think like a human would: 'Hmm, let me think about this...', 'Wait, that doesn't work...', "
|
|
231
|
+
"'Actually, if I approach it this way...'. "
|
|
232
|
+
"DO NOT use numbered steps or structured outlines. "
|
|
233
|
+
"Use the 'content' field in reasoning as a plain string (not a list)."
|
|
234
|
+
)
|
|
235
|
+
elif self.config.reasoning_style == "agent":
|
|
236
|
+
prompt_parts.append(
|
|
237
|
+
"\nREASONING FORMAT: Generate structured reasoning steps as a list of ReasoningStep objects. "
|
|
238
|
+
"Each step should have clear thought and action fields."
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# Add explicit structure requirement
|
|
242
|
+
prompt_parts.append(
|
|
243
|
+
"\nIMPORTANT: Generate the conversation messages array starting with a 'user' message "
|
|
244
|
+
"(the user's question or request), followed by an 'assistant' message (the response). "
|
|
245
|
+
"Do NOT include any 'system' role messages - those are added separately."
|
|
246
|
+
)
|
|
247
|
+
|
|
248
|
+
return "\n".join(prompt_parts)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
class ConversationBuilderFactory:
|
|
252
|
+
"""Factory for creating conversation builders.
|
|
253
|
+
|
|
254
|
+
Provides type-safe builder instantiation based on configuration.
|
|
255
|
+
"""
|
|
256
|
+
|
|
257
|
+
@staticmethod
|
|
258
|
+
def create(
|
|
259
|
+
config: "DataSetGeneratorConfig",
|
|
260
|
+
llm: "LLMClient",
|
|
261
|
+
tool_registry: "ToolRegistry | None" = None,
|
|
262
|
+
progress_reporter: ProgressReporter | None = None,
|
|
263
|
+
) -> ConversationBuilder:
|
|
264
|
+
"""Create the appropriate conversation builder.
|
|
265
|
+
|
|
266
|
+
Args:
|
|
267
|
+
config: Generator configuration (Pydantic model)
|
|
268
|
+
llm: LLM client for generation
|
|
269
|
+
tool_registry: Optional tool registry (required for agent builders)
|
|
270
|
+
progress_reporter: Optional progress reporter for streaming feedback
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
Appropriate ConversationBuilder instance
|
|
274
|
+
|
|
275
|
+
Raises:
|
|
276
|
+
ValueError: If configuration is invalid or builder requirements not met
|
|
277
|
+
"""
|
|
278
|
+
builder_type = determine_builder_type(config)
|
|
279
|
+
|
|
280
|
+
# Validate tool registry requirement
|
|
281
|
+
if builder_type.requires_tools and tool_registry is None:
|
|
282
|
+
msg = (
|
|
283
|
+
f"Builder type '{builder_type.name}' requires tool_registry but it was not provided"
|
|
284
|
+
)
|
|
285
|
+
raise ValueError(msg)
|
|
286
|
+
|
|
287
|
+
# Instantiate appropriate builder
|
|
288
|
+
if builder_type == SINGLE_SHOT_BUILDER:
|
|
289
|
+
return SingleShotBuilder(llm, config, progress_reporter=progress_reporter)
|
|
290
|
+
if builder_type == SINGLE_TURN_AGENT_BUILDER:
|
|
291
|
+
from .builders_agent import SingleTurnAgentBuilder # noqa: PLC0415
|
|
292
|
+
|
|
293
|
+
return SingleTurnAgentBuilder(
|
|
294
|
+
llm, config, cast("ToolRegistry", tool_registry), progress_reporter
|
|
295
|
+
)
|
|
296
|
+
if builder_type == MULTI_TURN_AGENT_BUILDER:
|
|
297
|
+
from .builders_agent import MultiTurnAgentBuilder # noqa: PLC0415
|
|
298
|
+
|
|
299
|
+
return MultiTurnAgentBuilder(
|
|
300
|
+
llm, config, cast("ToolRegistry", tool_registry), progress_reporter
|
|
301
|
+
)
|
|
302
|
+
msg = f"Unknown builder type: {builder_type.name}"
|
|
303
|
+
raise ValueError(msg)
|