DeepFabric 4.8.3__py3-none-any.whl → 4.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepfabric/builders.py +7 -21
- deepfabric/builders_agent.py +0 -542
- deepfabric/cli.py +505 -74
- deepfabric/config.py +57 -73
- deepfabric/config_manager.py +8 -6
- deepfabric/constants.py +6 -0
- deepfabric/dataset_manager.py +107 -11
- deepfabric/evaluation/parser.py +7 -7
- deepfabric/generator.py +656 -103
- deepfabric/graph.py +46 -1
- deepfabric/prompts.py +12 -49
- deepfabric/schemas.py +4 -3
- deepfabric/topic_model.py +32 -0
- deepfabric/tree.py +23 -1
- deepfabric/tui.py +66 -21
- deepfabric/utils.py +184 -0
- deepfabric/validation.py +47 -77
- {deepfabric-4.8.3.dist-info → deepfabric-4.10.0.dist-info}/METADATA +6 -7
- {deepfabric-4.8.3.dist-info → deepfabric-4.10.0.dist-info}/RECORD +22 -22
- {deepfabric-4.8.3.dist-info → deepfabric-4.10.0.dist-info}/WHEEL +0 -0
- {deepfabric-4.8.3.dist-info → deepfabric-4.10.0.dist-info}/entry_points.txt +0 -0
- {deepfabric-4.8.3.dist-info → deepfabric-4.10.0.dist-info}/licenses/LICENSE +0 -0
deepfabric/validation.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import time
|
|
2
2
|
|
|
3
|
-
from .exceptions import ConfigurationError
|
|
4
3
|
from .tui import get_tui
|
|
5
4
|
|
|
6
5
|
|
|
@@ -27,91 +26,39 @@ def calculate_expected_paths(mode: str, depth: int, degree: int) -> int:
|
|
|
27
26
|
|
|
28
27
|
|
|
29
28
|
def validate_path_requirements(
|
|
30
|
-
mode: str,
|
|
31
|
-
depth: int,
|
|
32
|
-
degree: int,
|
|
33
|
-
|
|
34
|
-
batch_size: int,
|
|
35
|
-
loading_existing: bool = False,
|
|
29
|
+
mode: str, # noqa: ARG001 - kept for API compatibility
|
|
30
|
+
depth: int, # noqa: ARG001 - kept for API compatibility
|
|
31
|
+
degree: int, # noqa: ARG001 - kept for API compatibility
|
|
32
|
+
num_samples: int | str, # noqa: ARG001 - kept for API compatibility
|
|
33
|
+
batch_size: int, # noqa: ARG001 - kept for API compatibility
|
|
34
|
+
loading_existing: bool = False, # noqa: ARG001 - kept for API compatibility
|
|
36
35
|
) -> None:
|
|
37
36
|
"""
|
|
38
|
-
Validate
|
|
37
|
+
Validate topic generation parameters (informational only, no longer errors).
|
|
38
|
+
|
|
39
|
+
When num_samples exceeds available paths, topics will cycle for even coverage.
|
|
39
40
|
|
|
40
41
|
Args:
|
|
41
42
|
mode: Generation mode ('tree' or 'graph')
|
|
42
43
|
depth: Depth of the tree/graph
|
|
43
44
|
degree: Branching factor
|
|
44
|
-
|
|
45
|
-
batch_size: Batch size for generation
|
|
45
|
+
num_samples: Total samples to generate, or "auto"/percentage string
|
|
46
|
+
batch_size: Batch size for generation (not used in validation, kept for API compat)
|
|
46
47
|
loading_existing: Whether loading existing topic model from file
|
|
47
|
-
|
|
48
|
-
Raises:
|
|
49
|
-
ConfigurationError: If validation fails
|
|
50
48
|
"""
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
required_samples = num_steps * batch_size
|
|
57
|
-
|
|
58
|
-
if required_samples > expected_paths:
|
|
59
|
-
# Alternative: provide exact combinations that use all paths
|
|
60
|
-
optimal_combinations = []
|
|
61
|
-
for test_steps in range(1, expected_paths + 1):
|
|
62
|
-
test_batch = expected_paths // test_steps
|
|
63
|
-
if test_steps * test_batch <= expected_paths and test_batch > 0:
|
|
64
|
-
optimal_combinations.append((test_steps, test_batch))
|
|
65
|
-
|
|
66
|
-
# Sort by preference (fewer steps first, then larger batches)
|
|
67
|
-
optimal_combinations.sort(key=lambda x: (x[0], -x[1]))
|
|
68
|
-
|
|
69
|
-
tui = get_tui()
|
|
70
|
-
tui.error(" Path validation failed - stopping before topic generation")
|
|
71
|
-
|
|
72
|
-
# Build recommendations - focus on optimal combinations rather than misleading individual params
|
|
73
|
-
recommendations = []
|
|
74
|
-
|
|
75
|
-
if optimal_combinations:
|
|
76
|
-
recommendations.append(
|
|
77
|
-
f" • Use one of these combinations to utilize the {expected_paths} paths:"
|
|
78
|
-
)
|
|
79
|
-
for steps, batch in optimal_combinations[:3]: # Show top 3
|
|
80
|
-
total_samples = steps * batch
|
|
81
|
-
recommendations.append(
|
|
82
|
-
f" --num-samples {steps} --batch-size {batch} (generates {total_samples} samples)"
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
recommendations.extend(
|
|
86
|
-
[
|
|
87
|
-
f" • Or increase --depth (currently {depth}) or --degree (currently {degree})",
|
|
88
|
-
]
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
estimation_note = ""
|
|
92
|
-
if mode == "graph":
|
|
93
|
-
estimation_note = " (estimated - graphs vary due to cross-connections)"
|
|
94
|
-
|
|
95
|
-
error_msg = (
|
|
96
|
-
f"Insufficient expected paths for dataset generation:\n"
|
|
97
|
-
f" • Expected {mode} paths: ~{expected_paths}{estimation_note} (depth={depth}, degree={degree})\n"
|
|
98
|
-
f" • Requested samples: {required_samples} ({num_steps} steps × {batch_size} batch size)\n"
|
|
99
|
-
f" • Shortfall: ~{required_samples - expected_paths} samples\n\n"
|
|
100
|
-
f"Recommendations:\n" + "\n".join(recommendations)
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
if mode == "graph":
|
|
104
|
-
error_msg += f"\n\nNote: Graph path counts are estimates. The actual graph may produce {expected_paths // 2}-{expected_paths * 2} paths due to cross-connections."
|
|
105
|
-
|
|
106
|
-
raise ConfigurationError(error_msg)
|
|
49
|
+
# No validation needed - generator handles all cases:
|
|
50
|
+
# - num_samples < paths: random subset
|
|
51
|
+
# - num_samples == paths: all paths used once
|
|
52
|
+
# - num_samples > paths: topics cycle for even coverage
|
|
53
|
+
pass
|
|
107
54
|
|
|
108
55
|
|
|
109
56
|
def show_validation_success(
|
|
110
57
|
mode: str,
|
|
111
58
|
depth: int,
|
|
112
59
|
degree: int,
|
|
113
|
-
|
|
114
|
-
batch_size: int,
|
|
60
|
+
num_samples: int | str,
|
|
61
|
+
batch_size: int, # noqa: ARG001 - kept for API compatibility
|
|
115
62
|
loading_existing: bool = False,
|
|
116
63
|
) -> None:
|
|
117
64
|
"""
|
|
@@ -121,21 +68,44 @@ def show_validation_success(
|
|
|
121
68
|
mode: Generation mode ('tree' or 'graph')
|
|
122
69
|
depth: Depth of the tree/graph
|
|
123
70
|
degree: Branching factor
|
|
124
|
-
|
|
125
|
-
batch_size: Batch size for generation
|
|
71
|
+
num_samples: Total samples to generate, or "auto"/percentage string
|
|
72
|
+
batch_size: Batch size for generation (not used in display, kept for API compat)
|
|
126
73
|
loading_existing: Whether loading existing topic model from file
|
|
127
74
|
"""
|
|
128
75
|
if loading_existing:
|
|
129
76
|
return
|
|
130
77
|
|
|
131
78
|
expected_paths = calculate_expected_paths(mode, depth, degree)
|
|
132
|
-
total_samples = num_steps * batch_size
|
|
133
|
-
|
|
134
79
|
tui = get_tui()
|
|
80
|
+
|
|
81
|
+
# Handle dynamic num_samples (auto or percentage)
|
|
82
|
+
if isinstance(num_samples, str):
|
|
83
|
+
tui.success("Path Validation Passed")
|
|
84
|
+
tui.info(f" Expected {mode} paths: ~{expected_paths} (depth={depth}, degree={degree})")
|
|
85
|
+
if num_samples == "auto":
|
|
86
|
+
tui.info(f" Requested samples: auto (will use all ~{expected_paths} paths)")
|
|
87
|
+
else:
|
|
88
|
+
# Percentage string like "50%"
|
|
89
|
+
pct = float(num_samples[:-1])
|
|
90
|
+
estimated_samples = max(1, int(expected_paths * pct / 100))
|
|
91
|
+
tui.info(
|
|
92
|
+
f" Requested samples: {num_samples} (~{estimated_samples} of {expected_paths} paths)"
|
|
93
|
+
)
|
|
94
|
+
if mode == "graph":
|
|
95
|
+
tui.info(" Note: Graph paths may vary due to cross-connections")
|
|
96
|
+
print() # Extra space before topic generation
|
|
97
|
+
time.sleep(0.5) # Brief pause to allow user to see the information
|
|
98
|
+
return
|
|
99
|
+
|
|
135
100
|
tui.success("Path Validation Passed")
|
|
136
101
|
tui.info(f" Expected {mode} paths: ~{expected_paths} (depth={depth}, degree={degree})")
|
|
137
|
-
tui.info(f" Requested samples: {
|
|
138
|
-
|
|
102
|
+
tui.info(f" Requested samples: {num_samples}")
|
|
103
|
+
|
|
104
|
+
if num_samples > expected_paths:
|
|
105
|
+
cycles = (num_samples + expected_paths - 1) // expected_paths # ceil division
|
|
106
|
+
tui.info(f" Topic cycling: ~{cycles}x passes through topics")
|
|
107
|
+
else:
|
|
108
|
+
tui.info(f" Path utilization: ~{(num_samples / expected_paths) * 100:.1f}%")
|
|
139
109
|
|
|
140
110
|
if mode == "graph":
|
|
141
111
|
tui.info(" Note: Graph paths may vary due to cross-connections")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: DeepFabric
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.10.0
|
|
4
4
|
Summary: Curate High Quality Datasets, Train, Evaluate and Ship
|
|
5
5
|
Author-email: DeepFabric Team <oss@alwaysfurther.ai>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -11,7 +11,7 @@ Requires-Dist: componentize-py>=0.19.3
|
|
|
11
11
|
Requires-Dist: datasets<5.0,>=3.0
|
|
12
12
|
Requires-Dist: google-api-core>=2.0.0
|
|
13
13
|
Requires-Dist: google-genai>=1.56.0
|
|
14
|
-
Requires-Dist: huggingface-hub
|
|
14
|
+
Requires-Dist: huggingface-hub>=1.3.1
|
|
15
15
|
Requires-Dist: kagglehub>=0.3.0
|
|
16
16
|
Requires-Dist: mermaid-py>=0.8.0
|
|
17
17
|
Requires-Dist: ollama>=0.6.1
|
|
@@ -25,7 +25,7 @@ Requires-Dist: pyyaml>=6.0.1
|
|
|
25
25
|
Requires-Dist: rich>=13.0.0
|
|
26
26
|
Requires-Dist: sentencepiece>=0.1.99
|
|
27
27
|
Requires-Dist: spin-sdk>=3.4.1
|
|
28
|
-
Requires-Dist: transformers
|
|
28
|
+
Requires-Dist: transformers==5.0.0rc3
|
|
29
29
|
Provides-Extra: dev
|
|
30
30
|
Requires-Dist: bandit>=1.7.10; extra == 'dev'
|
|
31
31
|
Requires-Dist: mermaid-py>=0.2.0; extra == 'dev'
|
|
@@ -172,12 +172,12 @@ generation:
|
|
|
172
172
|
Provide context on when and why to use specific patterns or libraries.
|
|
173
173
|
Ensure code is modular, testable, and maintainable.
|
|
174
174
|
|
|
175
|
+
# Agent mode is implicit when tools are configured
|
|
175
176
|
conversation:
|
|
176
177
|
type: cot # basic | cot
|
|
177
178
|
reasoning_style: agent # freetext | agent (for cot)
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
# Tool configuration (required for agent modes)
|
|
179
|
+
|
|
180
|
+
# Tool configuration (enables agent mode automatically)
|
|
181
181
|
tools:
|
|
182
182
|
spin_endpoint: "http://localhost:3000" # Spin service for tool execution
|
|
183
183
|
components: # Map component name to tool names
|
|
@@ -535,7 +535,6 @@ generation:
|
|
|
535
535
|
conversation:
|
|
536
536
|
type: cot
|
|
537
537
|
reasoning_style: agent
|
|
538
|
-
agent_mode: single_turn
|
|
539
538
|
|
|
540
539
|
tools:
|
|
541
540
|
spin_endpoint: "http://localhost:3000" # Spin service URL
|
|
@@ -1,40 +1,40 @@
|
|
|
1
1
|
deepfabric/__init__.py,sha256=DA4DT1nmF7VjQ00IA_8J_kSlqGYrK2EvP5H4XPzuKoA,1690
|
|
2
2
|
deepfabric/__main__.py,sha256=Ojx6VFnAWx4wY52VehsWhW85IaEmPb8FP_sGvOk628g,113
|
|
3
3
|
deepfabric/auth.py,sha256=7UEvk84XPx7wiXj0QfhIH_MJ5BlI7wQHodwgd4LPO80,12977
|
|
4
|
-
deepfabric/builders.py,sha256=
|
|
5
|
-
deepfabric/builders_agent.py,sha256=
|
|
6
|
-
deepfabric/cli.py,sha256=
|
|
4
|
+
deepfabric/builders.py,sha256=2-tJh0_yzGqcBn8qRy3ocd5qIhBciwLsGPiqP1PK75w,10821
|
|
5
|
+
deepfabric/builders_agent.py,sha256=youDxc8tWJF0DbA3Al-6zcswx68gcI7z9Gz7hPsDTJo,28326
|
|
6
|
+
deepfabric/cli.py,sha256=KGCR7AGEAbshfiFNu-MnNGdgqIVBF-vI2piHQzkRrv0,68750
|
|
7
7
|
deepfabric/cloud_upload.py,sha256=WYaISQY1XxorNdL7_F_FYwQUPHGJr2Bb_bohAa5xpbY,27801
|
|
8
|
-
deepfabric/config.py,sha256=
|
|
9
|
-
deepfabric/config_manager.py,sha256=
|
|
10
|
-
deepfabric/constants.py,sha256=
|
|
8
|
+
deepfabric/config.py,sha256=LJptRUykRGreirIwyUxF8rcverf1BD9hOPiiYNxkoEI,33990
|
|
9
|
+
deepfabric/config_manager.py,sha256=c0N4Rb0P970khoiKgpN1ULDYV0bZEYIbec45cSfg92c,9334
|
|
10
|
+
deepfabric/constants.py,sha256=nUc2Bf6Fu_c5biUy1nE5jCP9t6z08_ihWzarRTE1nwg,2827
|
|
11
11
|
deepfabric/dataset.py,sha256=bZfx35A-dt0kMflgskU9Ge-NLVesq8xNKHsrxTnNn6Q,9740
|
|
12
|
-
deepfabric/dataset_manager.py,sha256=
|
|
12
|
+
deepfabric/dataset_manager.py,sha256=hf0KQtYpLi4-P-a1czKNhqgrKsqFG8zJjfubbSBOuAo,24652
|
|
13
13
|
deepfabric/error_codes.py,sha256=HGGWsahUTI8UG996C74X-XgNuaPX8RHo4gOidlaJql4,17630
|
|
14
14
|
deepfabric/exceptions.py,sha256=pEg4YFQaDEWtBoJaSkxsJJoBBp2-6EE3M7m5H7R6i_8,1586
|
|
15
15
|
deepfabric/factory.py,sha256=OCqo3w-eiYNWvK_I_egDZuWj192kf18yD3SPj8rrPxU,753
|
|
16
|
-
deepfabric/generator.py,sha256=
|
|
17
|
-
deepfabric/graph.py,sha256=
|
|
16
|
+
deepfabric/generator.py,sha256=sirx7qukTl_SM9RGYT7xPDP65L8BlNzZb7ykbvO4Sg8,68994
|
|
17
|
+
deepfabric/graph.py,sha256=UehVpNnLKThNceteKUVfY401V160q7LFYm8cyTdE2bw,25787
|
|
18
18
|
deepfabric/hf_hub.py,sha256=hw2CWqZ3CzyAzMo552VPZKVWtuv-j0TQ2_gV5K0AUto,7670
|
|
19
19
|
deepfabric/kaggle_hub.py,sha256=CXVO1Lv3IRhdO0bp9_IQr6nUs-v5jOWi5k4EwPkbJmw,7927
|
|
20
20
|
deepfabric/loader.py,sha256=YNTGZZE-POjR0BIlx6WCT4bIzf0T4lW_fQl7ev9UFqE,18584
|
|
21
21
|
deepfabric/metrics.py,sha256=txqmXDM_r6cWPjdnnEjoA5xJkCHxFrjKWTpihE_jimA,6129
|
|
22
22
|
deepfabric/progress.py,sha256=3XQQrf2pUZlyd-8eRcNATH1v0Oi8JMedVHGbhPcca-8,9354
|
|
23
|
-
deepfabric/prompts.py,sha256=
|
|
24
|
-
deepfabric/schemas.py,sha256=
|
|
23
|
+
deepfabric/prompts.py,sha256=pAFkw_0WAcYdzjqkHgywyLiSEdDlRdZwmyykUC307Gk,13872
|
|
24
|
+
deepfabric/schemas.py,sha256=ebSwaXCu3lwshFGEhOhuAAUQA3deaYWXUcoRaeslqLc,37880
|
|
25
25
|
deepfabric/stream_simulator.py,sha256=GzvAxWxHVsuTwgXlqwXNfrTUDn6sND2kJOoQuYg88FA,3028
|
|
26
26
|
deepfabric/topic_manager.py,sha256=6YxMO6dQHaGyxghsI8iNJGP1miaekBe5Mh1WdYeLqdI,11164
|
|
27
|
-
deepfabric/topic_model.py,sha256=
|
|
28
|
-
deepfabric/tree.py,sha256=
|
|
29
|
-
deepfabric/tui.py,sha256=
|
|
27
|
+
deepfabric/topic_model.py,sha256=3RupTQnEYfKSLASV79tfEhBXBrm5VT3nUcpl2EyK7Hk,1654
|
|
28
|
+
deepfabric/tree.py,sha256=0J5KuHJrVEfF4F6zM9o2-ppAEzolay_9fDloZCHDN6c,15791
|
|
29
|
+
deepfabric/tui.py,sha256=hUYK00czktdPT5W3GJQ5svObbDYMAAnou3u-YDOEC-o,52608
|
|
30
30
|
deepfabric/update_checker.py,sha256=AUa9iUdkGNzu7tWkQRxIlF19YRmKLetwxu-Ys2ONS8Y,5145
|
|
31
|
-
deepfabric/utils.py,sha256=
|
|
32
|
-
deepfabric/validation.py,sha256=
|
|
31
|
+
deepfabric/utils.py,sha256=nVDNQnNQlfua8mMpKC7gk9EB3bYXbNUk6Sq39k1mN7s,12507
|
|
32
|
+
deepfabric/validation.py,sha256=D6H_L_0mDtiI0s6DLXxkR1BickOw7HGM2al8u2xhKZk,4355
|
|
33
33
|
deepfabric/evaluation/__init__.py,sha256=7xMLmYXaNC1U7qf88S9fMxWTABoDRiOcimSYfCt_PSo,1224
|
|
34
34
|
deepfabric/evaluation/evaluator.py,sha256=qNowle5v2ukDJ11igNOCParlBfXT8QUeOvXx6sSJ_Ug,34480
|
|
35
35
|
deepfabric/evaluation/inference.py,sha256=y7JA0IsBDwe0sJzVQeItYHAV5wUJn6Bjp1Wsp3r7qYQ,7644
|
|
36
36
|
deepfabric/evaluation/metrics.py,sha256=ITNevYj7CBXzYs-rYhsihO6-rE9n30CYRaVUfdTbcFQ,12026
|
|
37
|
-
deepfabric/evaluation/parser.py,sha256=
|
|
37
|
+
deepfabric/evaluation/parser.py,sha256=KLRP5cI6y2wqtbX2kPnyEJiYHqMk5qDa1toqJrKetb4,10604
|
|
38
38
|
deepfabric/evaluation/backends/__init__.py,sha256=GqC0FfpWmtgJmjHd0kVKNg7g-NjhRoh5h2MtAoOhUOY,827
|
|
39
39
|
deepfabric/evaluation/backends/llm_eval_backend.py,sha256=4jp5tnTp7v_0pHCGhcPbI55ig79-eVxdzooesi2PymA,18827
|
|
40
40
|
deepfabric/evaluation/backends/ollama_backend.py,sha256=mtPp1JtIDRjb76X_rTa1jS1ETzMjte8t3WJjuYV1oDQ,4372
|
|
@@ -69,8 +69,8 @@ deepfabric/training/api_key_prompt.py,sha256=pSIMX3eDGyV9x_r7MHE4TyIsIB2SqYb8gKC
|
|
|
69
69
|
deepfabric/training/callback.py,sha256=5zdifbHA2PWILHl2cVFyO65aW7cGAQhcvDqm3s8_I0Q,13221
|
|
70
70
|
deepfabric/training/dataset_utils.py,sha256=klx8DoawEwuMigBDP-RpMAfe7FvYxRbhj599MErxBr4,7313
|
|
71
71
|
deepfabric/training/metrics_sender.py,sha256=ZCyvMv5hRu8XJnQYVGXJ9wh7HEMJ0l3Ktyi8_etOpZs,10833
|
|
72
|
-
deepfabric-4.
|
|
73
|
-
deepfabric-4.
|
|
74
|
-
deepfabric-4.
|
|
75
|
-
deepfabric-4.
|
|
76
|
-
deepfabric-4.
|
|
72
|
+
deepfabric-4.10.0.dist-info/METADATA,sha256=bybLquu722z-9xb40wCLHe6QIliwAYSW7oHONs_D02Y,20498
|
|
73
|
+
deepfabric-4.10.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
74
|
+
deepfabric-4.10.0.dist-info/entry_points.txt,sha256=zatevils13hfs8x29_vmUyivQ6rTtq7hE2RBusZw1Fo,50
|
|
75
|
+
deepfabric-4.10.0.dist-info/licenses/LICENSE,sha256=-qRt8wmrhQ9aMf7KhmZXc2vrTETYZF-6_T1KCeUhvHY,11340
|
|
76
|
+
deepfabric-4.10.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|