DeepFabric 4.5.1__py3-none-any.whl → 4.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepfabric/__init__.py +8 -0
- deepfabric/auth.py +16 -6
- deepfabric/builders.py +2 -2
- deepfabric/builders_agent.py +2 -2
- deepfabric/cli.py +289 -10
- deepfabric/cloud_upload.py +884 -0
- deepfabric/config.py +47 -20
- deepfabric/config_manager.py +2 -2
- deepfabric/dataset.py +302 -0
- deepfabric/evaluation/parser.py +8 -8
- deepfabric/evaluation/reporters/cloud_reporter.py +19 -6
- deepfabric/exceptions.py +14 -0
- deepfabric/generator.py +4 -4
- deepfabric/graph.py +38 -0
- deepfabric/loader.py +554 -0
- deepfabric/schemas.py +5 -5
- deepfabric/topic_manager.py +4 -0
- deepfabric/training/callback.py +43 -1
- deepfabric/training/metrics_sender.py +50 -16
- deepfabric/tui.py +9 -1
- deepfabric/utils.py +14 -0
- deepfabric/validation.py +1 -1
- {deepfabric-4.5.1.dist-info → deepfabric-4.7.0.dist-info}/METADATA +79 -175
- {deepfabric-4.5.1.dist-info → deepfabric-4.7.0.dist-info}/RECORD +27 -24
- {deepfabric-4.5.1.dist-info → deepfabric-4.7.0.dist-info}/WHEEL +0 -0
- {deepfabric-4.5.1.dist-info → deepfabric-4.7.0.dist-info}/entry_points.txt +0 -0
- {deepfabric-4.5.1.dist-info → deepfabric-4.7.0.dist-info}/licenses/LICENSE +0 -0
deepfabric/config.py
CHANGED
|
@@ -119,13 +119,13 @@ class TopicsConfig(BaseModel):
|
|
|
119
119
|
class ConversationConfig(BaseModel):
|
|
120
120
|
"""Configuration for conversation structure in generation."""
|
|
121
121
|
|
|
122
|
-
type: Literal["basic", "
|
|
122
|
+
type: Literal["basic", "cot"] = Field(
|
|
123
123
|
default="basic",
|
|
124
|
-
description="Base conversation type: basic (simple chat),
|
|
124
|
+
description="Base conversation type: basic (simple chat), cot (with reasoning)",
|
|
125
125
|
)
|
|
126
126
|
reasoning_style: Literal["freetext", "agent", "structured", "hybrid"] | None = Field(
|
|
127
127
|
default=None,
|
|
128
|
-
description="Reasoning style for
|
|
128
|
+
description="Reasoning style for cot: freetext or agent. Note: 'structured' and 'hybrid' are deprecated.",
|
|
129
129
|
)
|
|
130
130
|
agent_mode: Literal["single_turn", "multi_turn"] | None = Field(
|
|
131
131
|
default=None,
|
|
@@ -159,15 +159,14 @@ class ConversationConfig(BaseModel):
|
|
|
159
159
|
@model_validator(mode="after")
|
|
160
160
|
def validate_configuration(self):
|
|
161
161
|
"""Validate that configuration combinations are consistent."""
|
|
162
|
-
if self.reasoning_style is not None and self.type != "
|
|
162
|
+
if self.reasoning_style is not None and self.type != "cot":
|
|
163
163
|
raise ValueError(
|
|
164
|
-
f"reasoning_style can only be set when type='
|
|
165
|
-
f"got type='{self.type}'"
|
|
164
|
+
f"reasoning_style can only be set when type='cot', got type='{self.type}'"
|
|
166
165
|
)
|
|
167
166
|
|
|
168
|
-
if self.type == "
|
|
167
|
+
if self.type == "cot" and self.reasoning_style is None:
|
|
169
168
|
raise ValueError(
|
|
170
|
-
"reasoning_style must be specified when type='
|
|
169
|
+
"reasoning_style must be specified when type='cot'. "
|
|
171
170
|
"Choose from: 'freetext' or 'agent'"
|
|
172
171
|
)
|
|
173
172
|
|
|
@@ -346,16 +345,37 @@ class KaggleConfig(BaseModel):
|
|
|
346
345
|
version_notes: str | None = Field(None, description="Version notes for dataset update")
|
|
347
346
|
|
|
348
347
|
|
|
348
|
+
class DeepFabricCloudConfig(BaseModel):
|
|
349
|
+
"""Configuration for DeepFabric Cloud integration."""
|
|
350
|
+
|
|
351
|
+
graph: str | None = Field(
|
|
352
|
+
default=None,
|
|
353
|
+
description="DeepFabric Cloud graph handle (e.g., username/graph-name)",
|
|
354
|
+
)
|
|
355
|
+
dataset: str | None = Field(
|
|
356
|
+
default=None,
|
|
357
|
+
description="DeepFabric Cloud dataset handle (e.g., username/dataset-name)",
|
|
358
|
+
)
|
|
359
|
+
description: str | None = Field(
|
|
360
|
+
default=None,
|
|
361
|
+
description="Description for uploaded resources",
|
|
362
|
+
)
|
|
363
|
+
tags: list[str] = Field(
|
|
364
|
+
default_factory=list,
|
|
365
|
+
description="Tags for uploaded resources",
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
|
|
349
369
|
class EvaluationConfig(BaseModel):
|
|
350
370
|
"""Configuration for model evaluation."""
|
|
351
371
|
|
|
352
|
-
conversation_type: Literal["basic", "
|
|
372
|
+
conversation_type: Literal["basic", "cot"] = Field(
|
|
353
373
|
...,
|
|
354
374
|
description="Conversation type (must match dataset generation)",
|
|
355
375
|
)
|
|
356
376
|
reasoning_style: Literal["freetext", "agent", "structured", "hybrid"] | None = Field(
|
|
357
377
|
default=None,
|
|
358
|
-
description="Reasoning style for
|
|
378
|
+
description="Reasoning style for cot type",
|
|
359
379
|
)
|
|
360
380
|
|
|
361
381
|
@field_validator("reasoning_style", mode="before")
|
|
@@ -419,15 +439,15 @@ class EvaluationConfig(BaseModel):
|
|
|
419
439
|
@model_validator(mode="after")
|
|
420
440
|
def validate_evaluation_config(self) -> "EvaluationConfig":
|
|
421
441
|
"""Validate evaluation configuration consistency."""
|
|
422
|
-
if self.reasoning_style is not None and self.conversation_type != "
|
|
442
|
+
if self.reasoning_style is not None and self.conversation_type != "cot":
|
|
423
443
|
raise ValueError(
|
|
424
|
-
f"reasoning_style can only be set when conversation_type='
|
|
444
|
+
f"reasoning_style can only be set when conversation_type='cot', "
|
|
425
445
|
f"got conversation_type='{self.conversation_type}'"
|
|
426
446
|
)
|
|
427
447
|
|
|
428
|
-
if self.conversation_type == "
|
|
448
|
+
if self.conversation_type == "cot" and self.reasoning_style is None:
|
|
429
449
|
raise ValueError(
|
|
430
|
-
"reasoning_style must be specified when conversation_type='
|
|
450
|
+
"reasoning_style must be specified when conversation_type='cot'. "
|
|
431
451
|
"Choose from: 'freetext' or 'agent'"
|
|
432
452
|
)
|
|
433
453
|
|
|
@@ -457,6 +477,9 @@ class DeepFabricConfig(BaseModel):
|
|
|
457
477
|
evaluation: EvaluationConfig | None = Field(None, description="Evaluation configuration")
|
|
458
478
|
huggingface: HuggingFaceConfig | None = Field(None, description="Hugging Face configuration")
|
|
459
479
|
kaggle: KaggleConfig | None = Field(None, description="Kaggle configuration")
|
|
480
|
+
deepfabric_cloud: DeepFabricCloudConfig | None = Field(
|
|
481
|
+
None, description="DeepFabric Cloud configuration"
|
|
482
|
+
)
|
|
460
483
|
|
|
461
484
|
@classmethod
|
|
462
485
|
def _detect_old_format(cls, config_dict: dict) -> bool:
|
|
@@ -663,6 +686,10 @@ See documentation for full examples.
|
|
|
663
686
|
"""Get Kaggle configuration."""
|
|
664
687
|
return self.kaggle.model_dump() if self.kaggle else {}
|
|
665
688
|
|
|
689
|
+
def get_deepfabric_cloud_config(self) -> dict:
|
|
690
|
+
"""Get DeepFabric Cloud configuration."""
|
|
691
|
+
return self.deepfabric_cloud.model_dump() if self.deepfabric_cloud else {}
|
|
692
|
+
|
|
666
693
|
def get_configured_providers(self) -> set[str]:
|
|
667
694
|
"""Get the set of LLM providers configured in this config."""
|
|
668
695
|
providers = set()
|
|
@@ -808,13 +835,13 @@ class DataEngineConfig(BaseModel):
|
|
|
808
835
|
default=None,
|
|
809
836
|
description="Rate limiting and retry configuration",
|
|
810
837
|
)
|
|
811
|
-
conversation_type: Literal["basic", "
|
|
838
|
+
conversation_type: Literal["basic", "cot"] = Field(
|
|
812
839
|
default="basic",
|
|
813
840
|
description="Base conversation type",
|
|
814
841
|
)
|
|
815
842
|
reasoning_style: Literal["freetext", "agent", "structured", "hybrid"] | None = Field(
|
|
816
843
|
default=None,
|
|
817
|
-
description="Reasoning style for
|
|
844
|
+
description="Reasoning style for cot type",
|
|
818
845
|
)
|
|
819
846
|
|
|
820
847
|
@field_validator("reasoning_style", mode="before")
|
|
@@ -839,15 +866,15 @@ class DataEngineConfig(BaseModel):
|
|
|
839
866
|
|
|
840
867
|
@model_validator(mode="after")
|
|
841
868
|
def validate_configuration(self):
|
|
842
|
-
if self.reasoning_style is not None and self.conversation_type != "
|
|
869
|
+
if self.reasoning_style is not None and self.conversation_type != "cot":
|
|
843
870
|
raise ValueError(
|
|
844
|
-
f"reasoning_style can only be set when conversation_type='
|
|
871
|
+
f"reasoning_style can only be set when conversation_type='cot', "
|
|
845
872
|
f"got conversation_type='{self.conversation_type}'"
|
|
846
873
|
)
|
|
847
874
|
|
|
848
|
-
if self.conversation_type == "
|
|
875
|
+
if self.conversation_type == "cot" and self.reasoning_style is None:
|
|
849
876
|
raise ValueError(
|
|
850
|
-
"reasoning_style must be specified when conversation_type='
|
|
877
|
+
"reasoning_style must be specified when conversation_type='cot'. "
|
|
851
878
|
"Choose from: 'freetext' or 'agent'"
|
|
852
879
|
)
|
|
853
880
|
|
deepfabric/config_manager.py
CHANGED
|
@@ -63,8 +63,8 @@ def load_config( # noqa: PLR0913
|
|
|
63
63
|
output_save_as: Path to save dataset
|
|
64
64
|
include_system_message: Include system message in dataset
|
|
65
65
|
mode: Topic generation mode (tree or graph)
|
|
66
|
-
conversation_type: Base conversation type (basic,
|
|
67
|
-
reasoning_style: Reasoning style for
|
|
66
|
+
conversation_type: Base conversation type (basic, cot)
|
|
67
|
+
reasoning_style: Reasoning style for cot (freetext, agent)
|
|
68
68
|
agent_mode: Agent mode (single_turn, multi_turn)
|
|
69
69
|
|
|
70
70
|
Returns:
|
deepfabric/dataset.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
1
|
+
"""Native DeepFabric Dataset implementation.
|
|
2
|
+
|
|
3
|
+
This module provides a simple, maintainable Dataset class with no external
|
|
4
|
+
dependencies (beyond stdlib). It supports column-oriented access patterns
|
|
5
|
+
similar to HuggingFace datasets.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import random
|
|
10
|
+
|
|
11
|
+
from collections.abc import Callable, Iterator
|
|
12
|
+
from typing import Any, overload
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Dataset:
|
|
16
|
+
"""A simple, native dataset class that stores data as a list of dicts
|
|
17
|
+
with column-oriented access patterns.
|
|
18
|
+
|
|
19
|
+
Examples:
|
|
20
|
+
>>> ds = Dataset([{"text": "hello"}, {"text": "world"}])
|
|
21
|
+
>>> len(ds)
|
|
22
|
+
2
|
|
23
|
+
>>> ds["text"]
|
|
24
|
+
['hello', 'world']
|
|
25
|
+
>>> ds[0]
|
|
26
|
+
{'text': 'hello'}
|
|
27
|
+
>>> ds[0:1]
|
|
28
|
+
Dataset with 1 samples
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, data: list[dict[str, Any]], metadata: dict | None = None):
|
|
32
|
+
"""Initialize dataset from list of sample dicts.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
data: List of sample dictionaries
|
|
36
|
+
metadata: Optional metadata (source, path, etc.)
|
|
37
|
+
"""
|
|
38
|
+
self._data = data
|
|
39
|
+
self._metadata = metadata or {}
|
|
40
|
+
self._columns: list[str] | None = None
|
|
41
|
+
|
|
42
|
+
@property
|
|
43
|
+
def column_names(self) -> list[str]:
|
|
44
|
+
"""Return list of column names."""
|
|
45
|
+
if self._columns is None:
|
|
46
|
+
if self._data:
|
|
47
|
+
# Collect all unique keys across samples
|
|
48
|
+
all_keys: set[str] = set()
|
|
49
|
+
for sample in self._data:
|
|
50
|
+
all_keys.update(sample.keys())
|
|
51
|
+
self._columns = sorted(all_keys)
|
|
52
|
+
else:
|
|
53
|
+
self._columns = []
|
|
54
|
+
return self._columns
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def num_rows(self) -> int:
|
|
58
|
+
"""Return number of samples (alias for len)."""
|
|
59
|
+
return len(self._data)
|
|
60
|
+
|
|
61
|
+
def __len__(self) -> int:
|
|
62
|
+
"""Return number of samples."""
|
|
63
|
+
return len(self._data)
|
|
64
|
+
|
|
65
|
+
@overload
|
|
66
|
+
def __getitem__(self, key: str) -> list[Any]: ...
|
|
67
|
+
|
|
68
|
+
@overload
|
|
69
|
+
def __getitem__(self, key: int) -> dict[str, Any]: ...
|
|
70
|
+
|
|
71
|
+
@overload
|
|
72
|
+
def __getitem__(self, key: slice) -> "Dataset": ...
|
|
73
|
+
|
|
74
|
+
def __getitem__(self, key: str | int | slice) -> Any:
|
|
75
|
+
"""Access by column name, index, or slice.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
key: Column name (str), row index (int), or slice
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
- For str: list of values for that column
|
|
82
|
+
- For int: dict for that sample
|
|
83
|
+
- For slice: new Dataset with selected samples
|
|
84
|
+
|
|
85
|
+
Examples:
|
|
86
|
+
>>> ds["messages"] # Get column as list
|
|
87
|
+
>>> ds[0] # Get first sample as dict
|
|
88
|
+
>>> ds[0:10] # Get first 10 samples as new Dataset
|
|
89
|
+
"""
|
|
90
|
+
if isinstance(key, str):
|
|
91
|
+
# Column access - return list of values
|
|
92
|
+
return [sample.get(key) for sample in self._data]
|
|
93
|
+
if isinstance(key, int):
|
|
94
|
+
# Single sample access
|
|
95
|
+
if key < 0:
|
|
96
|
+
key = len(self._data) + key
|
|
97
|
+
if key < 0 or key >= len(self._data):
|
|
98
|
+
raise IndexError(
|
|
99
|
+
f"Index {key} out of range for dataset with {len(self._data)} samples"
|
|
100
|
+
)
|
|
101
|
+
return self._data[key]
|
|
102
|
+
if isinstance(key, slice):
|
|
103
|
+
# Slice access - return new Dataset
|
|
104
|
+
return Dataset(self._data[key], self._metadata.copy())
|
|
105
|
+
raise TypeError(f"Invalid key type: {type(key)}. Expected str, int, or slice.")
|
|
106
|
+
|
|
107
|
+
def __iter__(self) -> Iterator[dict[str, Any]]:
|
|
108
|
+
"""Iterate over samples."""
|
|
109
|
+
return iter(self._data)
|
|
110
|
+
|
|
111
|
+
def __repr__(self) -> str:
|
|
112
|
+
"""Return string representation."""
|
|
113
|
+
cols = ", ".join(self.column_names[:5])
|
|
114
|
+
if len(self.column_names) > 5: # noqa: PLR2004
|
|
115
|
+
cols += ", ..."
|
|
116
|
+
return f"Dataset(num_rows={len(self)}, columns=[{cols}])"
|
|
117
|
+
|
|
118
|
+
def split(
|
|
119
|
+
self,
|
|
120
|
+
test_size: float = 0.1,
|
|
121
|
+
seed: int | None = None,
|
|
122
|
+
) -> dict[str, "Dataset"]:
|
|
123
|
+
"""Split dataset into train and test sets.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
test_size: Fraction of data for test set (0.0 to 1.0)
|
|
127
|
+
seed: Random seed for reproducibility
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
Dict with "train" and "test" Dataset instances
|
|
131
|
+
|
|
132
|
+
Examples:
|
|
133
|
+
>>> splits = ds.split(test_size=0.1, seed=42)
|
|
134
|
+
>>> train_ds = splits["train"]
|
|
135
|
+
>>> test_ds = splits["test"]
|
|
136
|
+
"""
|
|
137
|
+
if not 0.0 < test_size < 1.0:
|
|
138
|
+
raise ValueError("test_size must be between 0.0 and 1.0 (exclusive)")
|
|
139
|
+
|
|
140
|
+
# Use a local Random instance to avoid affecting global state
|
|
141
|
+
rng = random.Random(seed) # noqa: S311 # nosec
|
|
142
|
+
|
|
143
|
+
# Create shuffled indices
|
|
144
|
+
indices = list(range(len(self._data)))
|
|
145
|
+
rng.shuffle(indices)
|
|
146
|
+
|
|
147
|
+
# Calculate split point
|
|
148
|
+
split_idx = int(len(indices) * (1 - test_size))
|
|
149
|
+
|
|
150
|
+
train_indices = indices[:split_idx]
|
|
151
|
+
test_indices = indices[split_idx:]
|
|
152
|
+
|
|
153
|
+
return {
|
|
154
|
+
"train": self.select(train_indices),
|
|
155
|
+
"test": self.select(test_indices),
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
def select(self, indices: list[int]) -> "Dataset":
|
|
159
|
+
"""Select samples by indices.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
indices: List of integer indices to select
|
|
163
|
+
|
|
164
|
+
Returns:
|
|
165
|
+
New Dataset with selected samples
|
|
166
|
+
"""
|
|
167
|
+
return Dataset([self._data[i] for i in indices], self._metadata.copy())
|
|
168
|
+
|
|
169
|
+
def shuffle(self, seed: int | None = None) -> "Dataset":
|
|
170
|
+
"""Return a shuffled copy of the dataset.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
seed: Random seed for reproducibility
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
New Dataset with shuffled samples
|
|
177
|
+
"""
|
|
178
|
+
rng = random.Random(seed) # nosec # noqa: S311
|
|
179
|
+
indices = list(range(len(self._data)))
|
|
180
|
+
rng.shuffle(indices)
|
|
181
|
+
return self.select(indices)
|
|
182
|
+
|
|
183
|
+
def map(self, fn: Callable[[dict[str, Any]], dict[str, Any]]) -> "Dataset":
|
|
184
|
+
"""Apply function to each sample.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
fn: Function that takes a sample dict and returns a new sample dict
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
New Dataset with transformed samples
|
|
191
|
+
|
|
192
|
+
Examples:
|
|
193
|
+
>>> ds.map(lambda x: {"text": x["text"].upper()})
|
|
194
|
+
"""
|
|
195
|
+
return Dataset([fn(sample) for sample in self._data], self._metadata.copy())
|
|
196
|
+
|
|
197
|
+
def filter(self, fn: Callable[[dict[str, Any]], bool]) -> "Dataset":
|
|
198
|
+
"""Filter samples by predicate function.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
fn: Function that takes a sample dict and returns True to keep
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
New Dataset with filtered samples
|
|
205
|
+
|
|
206
|
+
Examples:
|
|
207
|
+
>>> ds.filter(lambda x: len(x["text"]) > 10)
|
|
208
|
+
"""
|
|
209
|
+
return Dataset([s for s in self._data if fn(s)], self._metadata.copy())
|
|
210
|
+
|
|
211
|
+
def to_list(self) -> list[dict[str, Any]]:
|
|
212
|
+
"""Return data as list of dicts.
|
|
213
|
+
|
|
214
|
+
Returns:
|
|
215
|
+
Copy of internal data as list of dictionaries
|
|
216
|
+
"""
|
|
217
|
+
return self._data.copy()
|
|
218
|
+
|
|
219
|
+
def to_hf(self) -> Any:
|
|
220
|
+
"""Convert to HuggingFace Dataset for use with TRL/transformers.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
A HuggingFace datasets.Dataset instance
|
|
224
|
+
|
|
225
|
+
Raises:
|
|
226
|
+
ImportError: If the 'datasets' package is not installed
|
|
227
|
+
|
|
228
|
+
Examples:
|
|
229
|
+
>>> from deepfabric import load_dataset
|
|
230
|
+
>>> ds = load_dataset("data.jsonl")
|
|
231
|
+
>>> hf_ds = ds.to_hf()
|
|
232
|
+
>>> trainer = SFTTrainer(train_dataset=hf_ds, ...)
|
|
233
|
+
"""
|
|
234
|
+
try:
|
|
235
|
+
from datasets import Dataset as HFDataset # noqa: PLC0415
|
|
236
|
+
except ImportError:
|
|
237
|
+
raise ImportError(
|
|
238
|
+
"The 'datasets' package is required for to_hf(). "
|
|
239
|
+
"Install it with: pip install datasets"
|
|
240
|
+
) from None
|
|
241
|
+
|
|
242
|
+
return HFDataset.from_list(self._data)
|
|
243
|
+
|
|
244
|
+
def to_jsonl(self, path: str) -> None:
|
|
245
|
+
"""Save dataset to JSONL file.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
path: File path to save to
|
|
249
|
+
"""
|
|
250
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
251
|
+
for sample in self._data:
|
|
252
|
+
f.write(json.dumps(sample, ensure_ascii=False) + "\n")
|
|
253
|
+
|
|
254
|
+
@classmethod
|
|
255
|
+
def from_jsonl(cls, path: str) -> "Dataset":
|
|
256
|
+
"""Load dataset from JSONL file.
|
|
257
|
+
|
|
258
|
+
Args:
|
|
259
|
+
path: File path to load from
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
New Dataset loaded from file
|
|
263
|
+
"""
|
|
264
|
+
data = []
|
|
265
|
+
with open(path, encoding="utf-8") as f:
|
|
266
|
+
for line in f:
|
|
267
|
+
if line.strip():
|
|
268
|
+
data.append(json.loads(line))
|
|
269
|
+
return cls(data, metadata={"source": "jsonl", "path": path})
|
|
270
|
+
|
|
271
|
+
@classmethod
|
|
272
|
+
def from_list(cls, data: list[dict[str, Any]]) -> "Dataset":
|
|
273
|
+
"""Create dataset from list of dicts.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
data: List of sample dictionaries
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
New Dataset from the provided data
|
|
280
|
+
"""
|
|
281
|
+
return cls(data)
|
|
282
|
+
|
|
283
|
+
|
|
284
|
+
class DatasetDict(dict):
|
|
285
|
+
"""Dictionary of Dataset objects for train/test/validation splits.
|
|
286
|
+
|
|
287
|
+
A simple dict subclass that provides typed access to Dataset values.
|
|
288
|
+
|
|
289
|
+
Examples:
|
|
290
|
+
>>> dd = DatasetDict({"train": train_ds, "test": test_ds})
|
|
291
|
+
>>> dd["train"]
|
|
292
|
+
Dataset(num_rows=100, columns=[...])
|
|
293
|
+
"""
|
|
294
|
+
|
|
295
|
+
def __getitem__(self, key: str) -> Dataset:
|
|
296
|
+
"""Get Dataset by split name."""
|
|
297
|
+
return super().__getitem__(key)
|
|
298
|
+
|
|
299
|
+
def __repr__(self) -> str:
|
|
300
|
+
"""Return string representation."""
|
|
301
|
+
splits = ", ".join(f"{k}: {len(v)} rows" for k, v in self.items())
|
|
302
|
+
return f"DatasetDict({{{splits}}})"
|
deepfabric/evaluation/parser.py
CHANGED
|
@@ -49,12 +49,12 @@ class GroundTruth(BaseModel):
|
|
|
49
49
|
default=None,
|
|
50
50
|
description="Expected final answer if available",
|
|
51
51
|
)
|
|
52
|
-
conversation_type: Literal["basic", "
|
|
52
|
+
conversation_type: Literal["basic", "cot"] = Field(
|
|
53
53
|
description="Type of conversation",
|
|
54
54
|
)
|
|
55
55
|
reasoning_style: Literal["freetext", "agent", "structured", "hybrid"] | None = Field(
|
|
56
56
|
default=None,
|
|
57
|
-
description="Reasoning style if
|
|
57
|
+
description="Reasoning style if cot",
|
|
58
58
|
)
|
|
59
59
|
agent_mode: Literal["single_turn", "multi_turn"] | None = Field(
|
|
60
60
|
default=None,
|
|
@@ -75,18 +75,18 @@ class GroundTruthParser:
|
|
|
75
75
|
|
|
76
76
|
def __init__(
|
|
77
77
|
self,
|
|
78
|
-
conversation_type: Literal["basic", "
|
|
78
|
+
conversation_type: Literal["basic", "cot"],
|
|
79
79
|
reasoning_style: Literal["freetext", "agent", "structured", "hybrid"] | None = None,
|
|
80
80
|
agent_mode: Literal["single_turn", "multi_turn"] | None = None,
|
|
81
81
|
):
|
|
82
82
|
"""Initialize parser with conversation configuration.
|
|
83
83
|
|
|
84
84
|
Args:
|
|
85
|
-
conversation_type: Type of conversation (basic,
|
|
86
|
-
reasoning_style: Reasoning style for
|
|
85
|
+
conversation_type: Type of conversation (basic, cot)
|
|
86
|
+
reasoning_style: Reasoning style for cot
|
|
87
87
|
agent_mode: Agent mode if tools are used
|
|
88
88
|
"""
|
|
89
|
-
self.conversation_type: Literal["basic", "
|
|
89
|
+
self.conversation_type: Literal["basic", "cot"] = conversation_type
|
|
90
90
|
self.reasoning_style: Literal["freetext", "agent", "structured", "hybrid"] | None = (
|
|
91
91
|
reasoning_style
|
|
92
92
|
)
|
|
@@ -270,7 +270,7 @@ class GroundTruthParser:
|
|
|
270
270
|
|
|
271
271
|
def parse_batch(
|
|
272
272
|
conversations: list[Conversation],
|
|
273
|
-
conversation_type: Literal["basic", "
|
|
273
|
+
conversation_type: Literal["basic", "cot"],
|
|
274
274
|
reasoning_style: Literal["freetext", "agent", "structured", "hybrid"] | None = None,
|
|
275
275
|
agent_mode: Literal["single_turn", "multi_turn"] | None = None,
|
|
276
276
|
) -> list[GroundTruth]:
|
|
@@ -279,7 +279,7 @@ def parse_batch(
|
|
|
279
279
|
Args:
|
|
280
280
|
conversations: List of Conversation objects
|
|
281
281
|
conversation_type: Type of conversation
|
|
282
|
-
reasoning_style: Reasoning style if
|
|
282
|
+
reasoning_style: Reasoning style if cot
|
|
283
283
|
agent_mode: Agent mode if tools are used
|
|
284
284
|
|
|
285
285
|
Returns:
|
|
@@ -13,6 +13,7 @@ import httpx
|
|
|
13
13
|
|
|
14
14
|
from rich.console import Console
|
|
15
15
|
|
|
16
|
+
from ...utils import get_bool_env
|
|
16
17
|
from .base import BaseReporter
|
|
17
18
|
|
|
18
19
|
if TYPE_CHECKING:
|
|
@@ -45,7 +46,7 @@ class CloudReporter(BaseReporter):
|
|
|
45
46
|
|
|
46
47
|
Args:
|
|
47
48
|
config: Optional configuration with:
|
|
48
|
-
- api_url: DeepFabric API URL (default: https://api.deepfabric.
|
|
49
|
+
- api_url: DeepFabric API URL (default: https://api.deepfabric.cloud")
|
|
49
50
|
- project_id: Project ID to associate results with
|
|
50
51
|
- auth_token: Authentication token (if not provided, will read from config file)
|
|
51
52
|
- enabled: Whether to enable cloud reporting (default: True if authenticated)
|
|
@@ -53,7 +54,7 @@ class CloudReporter(BaseReporter):
|
|
|
53
54
|
super().__init__(config)
|
|
54
55
|
|
|
55
56
|
# Get API URL from config or environment
|
|
56
|
-
self.api_url = os.getenv("DEEPFABRIC_API_URL", "https://api.deepfabric.
|
|
57
|
+
self.api_url = os.getenv("DEEPFABRIC_API_URL", "https://api.deepfabric.cloud")
|
|
57
58
|
if config and "api_url" in config:
|
|
58
59
|
self.api_url = config["api_url"]
|
|
59
60
|
|
|
@@ -67,8 +68,9 @@ class CloudReporter(BaseReporter):
|
|
|
67
68
|
# Get project ID from config
|
|
68
69
|
self.project_id = config.get("project_id") if config else None
|
|
69
70
|
|
|
70
|
-
# Enable cloud reporting if authenticated
|
|
71
|
-
|
|
71
|
+
# Enable cloud reporting if authenticated AND experimental flag is set
|
|
72
|
+
is_experimental = get_bool_env("EXPERIMENTAL_DF")
|
|
73
|
+
self.enabled = is_experimental and (
|
|
72
74
|
config.get("enabled", bool(self.auth_token)) if config else bool(self.auth_token)
|
|
73
75
|
)
|
|
74
76
|
|
|
@@ -99,11 +101,22 @@ class CloudReporter(BaseReporter):
|
|
|
99
101
|
try:
|
|
100
102
|
console.print("[cyan]Uploading evaluation results to cloud...[/cyan]")
|
|
101
103
|
|
|
104
|
+
# Get model name as string (handle in-memory model objects)
|
|
105
|
+
model_value = result.config.inference_config.model
|
|
106
|
+
if isinstance(model_value, str):
|
|
107
|
+
model_name = model_value
|
|
108
|
+
else:
|
|
109
|
+
# For in-memory model objects, extract name from config
|
|
110
|
+
model_config = getattr(model_value, "config", None)
|
|
111
|
+
model_name = (
|
|
112
|
+
getattr(model_config, "name_or_path", None) or type(model_value).__name__
|
|
113
|
+
)
|
|
114
|
+
|
|
102
115
|
# Create evaluation run
|
|
103
116
|
run_data = {
|
|
104
|
-
"
|
|
117
|
+
"pipeline_id": self.project_id,
|
|
105
118
|
"name": f"Evaluation - {datetime.now(UTC).strftime('%Y-%m-%d %H:%M')}",
|
|
106
|
-
"model_name":
|
|
119
|
+
"model_name": model_name,
|
|
107
120
|
"model_provider": result.config.inference_config.backend,
|
|
108
121
|
"config": {
|
|
109
122
|
"evaluators": getattr(result.config, "evaluators", ["tool_calling"]),
|
deepfabric/exceptions.py
CHANGED
|
@@ -65,3 +65,17 @@ class RetryExhaustedError(ModelError):
|
|
|
65
65
|
"""Raised when maximum retries are exceeded."""
|
|
66
66
|
|
|
67
67
|
pass
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class LoaderError(DeepFabricError):
|
|
71
|
+
"""Raised when dataset loading fails.
|
|
72
|
+
|
|
73
|
+
Common causes:
|
|
74
|
+
- File not found
|
|
75
|
+
- Invalid file format (malformed JSON/JSONL)
|
|
76
|
+
- Cloud authentication failure
|
|
77
|
+
- Network errors
|
|
78
|
+
- Empty dataset
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
pass
|
deepfabric/generator.py
CHANGED
|
@@ -127,14 +127,14 @@ class DataSetGeneratorConfig(BaseModel):
|
|
|
127
127
|
)
|
|
128
128
|
|
|
129
129
|
# Modular conversation configuration
|
|
130
|
-
conversation_type: Literal["basic", "
|
|
130
|
+
conversation_type: Literal["basic", "cot"] = Field(
|
|
131
131
|
default="basic",
|
|
132
|
-
description="Base conversation type: basic (simple chat),
|
|
132
|
+
description="Base conversation type: basic (simple chat), cot (with reasoning traces)",
|
|
133
133
|
)
|
|
134
134
|
|
|
135
135
|
reasoning_style: Literal["freetext", "agent", "structured", "hybrid"] | None = Field(
|
|
136
136
|
default=None,
|
|
137
|
-
description="Reasoning style for
|
|
137
|
+
description="Reasoning style for cot type: freetext (natural language) or agent (structured step-by-step for tool-calling). Note: 'structured' and 'hybrid' are deprecated.",
|
|
138
138
|
)
|
|
139
139
|
|
|
140
140
|
@field_validator("reasoning_style", mode="before")
|
|
@@ -1045,7 +1045,7 @@ class DataSetGenerator:
|
|
|
1045
1045
|
return CONVERSATION_GENERATION_PROMPT
|
|
1046
1046
|
|
|
1047
1047
|
# Handle chain of thought conversations
|
|
1048
|
-
if self.config.conversation_type == "
|
|
1048
|
+
if self.config.conversation_type == "cot":
|
|
1049
1049
|
# Agent mode with tools - use agent prompts
|
|
1050
1050
|
if self.config.agent_mode == "single_turn" and self.tool_registry:
|
|
1051
1051
|
# Use agent prompt for single-turn tool calling
|