DeepFabric 4.9.0__py3-none-any.whl → 4.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deepfabric/validation.py CHANGED
@@ -1,6 +1,5 @@
1
1
  import time
2
2
 
3
- from .exceptions import ConfigurationError
4
3
  from .tui import get_tui
5
4
 
6
5
 
@@ -27,91 +26,39 @@ def calculate_expected_paths(mode: str, depth: int, degree: int) -> int:
27
26
 
28
27
 
29
28
  def validate_path_requirements(
30
- mode: str,
31
- depth: int,
32
- degree: int,
33
- num_steps: int,
34
- batch_size: int,
35
- loading_existing: bool = False,
29
+ mode: str, # noqa: ARG001 - kept for API compatibility
30
+ depth: int, # noqa: ARG001 - kept for API compatibility
31
+ degree: int, # noqa: ARG001 - kept for API compatibility
32
+ num_samples: int | str, # noqa: ARG001 - kept for API compatibility
33
+ batch_size: int, # noqa: ARG001 - kept for API compatibility
34
+ loading_existing: bool = False, # noqa: ARG001 - kept for API compatibility
36
35
  ) -> None:
37
36
  """
38
- Validate that the topic generation parameters will produce enough paths.
37
+ Validate topic generation parameters (informational only, no longer errors).
38
+
39
+ When num_samples exceeds available paths, topics will cycle for even coverage.
39
40
 
40
41
  Args:
41
42
  mode: Generation mode ('tree' or 'graph')
42
43
  depth: Depth of the tree/graph
43
44
  degree: Branching factor
44
- num_steps: Number of generation steps
45
- batch_size: Batch size for generation
45
+ num_samples: Total samples to generate, or "auto"/percentage string
46
+ batch_size: Batch size for generation (not used in validation, kept for API compat)
46
47
  loading_existing: Whether loading existing topic model from file
47
-
48
- Raises:
49
- ConfigurationError: If validation fails
50
48
  """
51
- if loading_existing:
52
- # Can't validate existing files without loading them
53
- return
54
-
55
- expected_paths = calculate_expected_paths(mode, depth, degree)
56
- required_samples = num_steps * batch_size
57
-
58
- if required_samples > expected_paths:
59
- # Alternative: provide exact combinations that use all paths
60
- optimal_combinations = []
61
- for test_steps in range(1, expected_paths + 1):
62
- test_batch = expected_paths // test_steps
63
- if test_steps * test_batch <= expected_paths and test_batch > 0:
64
- optimal_combinations.append((test_steps, test_batch))
65
-
66
- # Sort by preference (fewer steps first, then larger batches)
67
- optimal_combinations.sort(key=lambda x: (x[0], -x[1]))
68
-
69
- tui = get_tui()
70
- tui.error(" Path validation failed - stopping before topic generation")
71
-
72
- # Build recommendations - focus on optimal combinations rather than misleading individual params
73
- recommendations = []
74
-
75
- if optimal_combinations:
76
- recommendations.append(
77
- f" • Use one of these combinations to utilize the {expected_paths} paths:"
78
- )
79
- for steps, batch in optimal_combinations[:3]: # Show top 3
80
- total_samples = steps * batch
81
- recommendations.append(
82
- f" --num-samples {steps} --batch-size {batch} (generates {total_samples} samples)"
83
- )
84
-
85
- recommendations.extend(
86
- [
87
- f" • Or increase --depth (currently {depth}) or --degree (currently {degree})",
88
- ]
89
- )
90
-
91
- estimation_note = ""
92
- if mode == "graph":
93
- estimation_note = " (estimated - graphs vary due to cross-connections)"
94
-
95
- error_msg = (
96
- f"Insufficient expected paths for dataset generation:\n"
97
- f" • Expected {mode} paths: ~{expected_paths}{estimation_note} (depth={depth}, degree={degree})\n"
98
- f" • Requested samples: {required_samples} ({num_steps} steps × {batch_size} batch size)\n"
99
- f" • Shortfall: ~{required_samples - expected_paths} samples\n\n"
100
- f"Recommendations:\n" + "\n".join(recommendations)
101
- )
102
-
103
- if mode == "graph":
104
- error_msg += f"\n\nNote: Graph path counts are estimates. The actual graph may produce {expected_paths // 2}-{expected_paths * 2} paths due to cross-connections."
105
-
106
- raise ConfigurationError(error_msg)
49
+ # No validation needed - generator handles all cases:
50
+ # - num_samples < paths: random subset
51
+ # - num_samples == paths: all paths used once
52
+ # - num_samples > paths: topics cycle for even coverage
53
+ pass
107
54
 
108
55
 
109
56
  def show_validation_success(
110
57
  mode: str,
111
58
  depth: int,
112
59
  degree: int,
113
- num_steps: int,
114
- batch_size: int,
60
+ num_samples: int | str,
61
+ batch_size: int, # noqa: ARG001 - kept for API compatibility
115
62
  loading_existing: bool = False,
116
63
  ) -> None:
117
64
  """
@@ -121,21 +68,44 @@ def show_validation_success(
121
68
  mode: Generation mode ('tree' or 'graph')
122
69
  depth: Depth of the tree/graph
123
70
  degree: Branching factor
124
- num_steps: Number of generation steps
125
- batch_size: Batch size for generation
71
+ num_samples: Total samples to generate, or "auto"/percentage string
72
+ batch_size: Batch size for generation (not used in display, kept for API compat)
126
73
  loading_existing: Whether loading existing topic model from file
127
74
  """
128
75
  if loading_existing:
129
76
  return
130
77
 
131
78
  expected_paths = calculate_expected_paths(mode, depth, degree)
132
- total_samples = num_steps * batch_size
133
-
134
79
  tui = get_tui()
80
+
81
+ # Handle dynamic num_samples (auto or percentage)
82
+ if isinstance(num_samples, str):
83
+ tui.success("Path Validation Passed")
84
+ tui.info(f" Expected {mode} paths: ~{expected_paths} (depth={depth}, degree={degree})")
85
+ if num_samples == "auto":
86
+ tui.info(f" Requested samples: auto (will use all ~{expected_paths} paths)")
87
+ else:
88
+ # Percentage string like "50%"
89
+ pct = float(num_samples[:-1])
90
+ estimated_samples = max(1, int(expected_paths * pct / 100))
91
+ tui.info(
92
+ f" Requested samples: {num_samples} (~{estimated_samples} of {expected_paths} paths)"
93
+ )
94
+ if mode == "graph":
95
+ tui.info(" Note: Graph paths may vary due to cross-connections")
96
+ print() # Extra space before topic generation
97
+ time.sleep(0.5) # Brief pause to allow user to see the information
98
+ return
99
+
135
100
  tui.success("Path Validation Passed")
136
101
  tui.info(f" Expected {mode} paths: ~{expected_paths} (depth={depth}, degree={degree})")
137
- tui.info(f" Requested samples: {total_samples} ({num_steps} steps x {batch_size} batch size)")
138
- tui.info(f" Path utilization: ~{min(100, (total_samples / expected_paths) * 100):.1f}%")
102
+ tui.info(f" Requested samples: {num_samples}")
103
+
104
+ if num_samples > expected_paths:
105
+ cycles = (num_samples + expected_paths - 1) // expected_paths # ceil division
106
+ tui.info(f" Topic cycling: ~{cycles}x passes through topics")
107
+ else:
108
+ tui.info(f" Path utilization: ~{(num_samples / expected_paths) * 100:.1f}%")
139
109
 
140
110
  if mode == "graph":
141
111
  tui.info(" Note: Graph paths may vary due to cross-connections")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: DeepFabric
3
- Version: 4.9.0
3
+ Version: 4.10.0
4
4
  Summary: Curate High Quality Datasets, Train, Evaluate and Ship
5
5
  Author-email: DeepFabric Team <oss@alwaysfurther.ai>
6
6
  License-File: LICENSE
@@ -25,7 +25,7 @@ Requires-Dist: pyyaml>=6.0.1
25
25
  Requires-Dist: rich>=13.0.0
26
26
  Requires-Dist: sentencepiece>=0.1.99
27
27
  Requires-Dist: spin-sdk>=3.4.1
28
- Requires-Dist: transformers==5.0.0rc2
28
+ Requires-Dist: transformers==5.0.0rc3
29
29
  Provides-Extra: dev
30
30
  Requires-Dist: bandit>=1.7.10; extra == 'dev'
31
31
  Requires-Dist: mermaid-py>=0.2.0; extra == 'dev'
@@ -172,12 +172,12 @@ generation:
172
172
  Provide context on when and why to use specific patterns or libraries.
173
173
  Ensure code is modular, testable, and maintainable.
174
174
 
175
+ # Agent mode is implicit when tools are configured
175
176
  conversation:
176
177
  type: cot # basic | cot
177
178
  reasoning_style: agent # freetext | agent (for cot)
178
- agent_mode: single_turn # single_turn | multi_turn (for agent)
179
-
180
- # Tool configuration (required for agent modes)
179
+
180
+ # Tool configuration (enables agent mode automatically)
181
181
  tools:
182
182
  spin_endpoint: "http://localhost:3000" # Spin service for tool execution
183
183
  components: # Map component name to tool names
@@ -535,7 +535,6 @@ generation:
535
535
  conversation:
536
536
  type: cot
537
537
  reasoning_style: agent
538
- agent_mode: single_turn
539
538
 
540
539
  tools:
541
540
  spin_endpoint: "http://localhost:3000" # Spin service URL
@@ -1,40 +1,40 @@
1
1
  deepfabric/__init__.py,sha256=DA4DT1nmF7VjQ00IA_8J_kSlqGYrK2EvP5H4XPzuKoA,1690
2
2
  deepfabric/__main__.py,sha256=Ojx6VFnAWx4wY52VehsWhW85IaEmPb8FP_sGvOk628g,113
3
3
  deepfabric/auth.py,sha256=7UEvk84XPx7wiXj0QfhIH_MJ5BlI7wQHodwgd4LPO80,12977
4
- deepfabric/builders.py,sha256=XKlKsAhsed2_M_uHft-VB-n8T1ZhAbIo_7XXc2mE3Ug,11503
5
- deepfabric/builders_agent.py,sha256=7xaXVNzmW_vBDkxJNKR_9HveljrdljwDAFx3iu-L_-M,48468
6
- deepfabric/cli.py,sha256=gf7HoXlLn-8SZHHmpNWI7dwXvv_VCzu7wCQraOJkKsc,51236
4
+ deepfabric/builders.py,sha256=2-tJh0_yzGqcBn8qRy3ocd5qIhBciwLsGPiqP1PK75w,10821
5
+ deepfabric/builders_agent.py,sha256=youDxc8tWJF0DbA3Al-6zcswx68gcI7z9Gz7hPsDTJo,28326
6
+ deepfabric/cli.py,sha256=KGCR7AGEAbshfiFNu-MnNGdgqIVBF-vI2piHQzkRrv0,68750
7
7
  deepfabric/cloud_upload.py,sha256=WYaISQY1XxorNdL7_F_FYwQUPHGJr2Bb_bohAa5xpbY,27801
8
- deepfabric/config.py,sha256=5M8BhvWXGmC0rcvSlY3TKwGI8TIrYHL6xjQ95ITGs8o,34780
9
- deepfabric/config_manager.py,sha256=CIOJV121tBpH_V_ljwTenvyFO31yoohPSjW0yrHCD-w,9041
10
- deepfabric/constants.py,sha256=MwADziDmnt0zi9t9gG65EM7AJvIQP0FSsXgGj7Yqxm8,2578
8
+ deepfabric/config.py,sha256=LJptRUykRGreirIwyUxF8rcverf1BD9hOPiiYNxkoEI,33990
9
+ deepfabric/config_manager.py,sha256=c0N4Rb0P970khoiKgpN1ULDYV0bZEYIbec45cSfg92c,9334
10
+ deepfabric/constants.py,sha256=nUc2Bf6Fu_c5biUy1nE5jCP9t6z08_ihWzarRTE1nwg,2827
11
11
  deepfabric/dataset.py,sha256=bZfx35A-dt0kMflgskU9Ge-NLVesq8xNKHsrxTnNn6Q,9740
12
- deepfabric/dataset_manager.py,sha256=fJ6VFG05FLTpmbkLKlnVTTi7aim8q7eWI1cgOmKaP5s,20461
12
+ deepfabric/dataset_manager.py,sha256=hf0KQtYpLi4-P-a1czKNhqgrKsqFG8zJjfubbSBOuAo,24652
13
13
  deepfabric/error_codes.py,sha256=HGGWsahUTI8UG996C74X-XgNuaPX8RHo4gOidlaJql4,17630
14
14
  deepfabric/exceptions.py,sha256=pEg4YFQaDEWtBoJaSkxsJJoBBp2-6EE3M7m5H7R6i_8,1586
15
15
  deepfabric/factory.py,sha256=OCqo3w-eiYNWvK_I_egDZuWj192kf18yD3SPj8rrPxU,753
16
- deepfabric/generator.py,sha256=wdGxuKQOMGY8oEpa-YvXX2ceCnzRApDgzweLHgwtjlw,44226
17
- deepfabric/graph.py,sha256=JQ68GXnLymtR7ESfeZgdMh3YrReSPX5wEWEqXlkIR4Q,24175
16
+ deepfabric/generator.py,sha256=sirx7qukTl_SM9RGYT7xPDP65L8BlNzZb7ykbvO4Sg8,68994
17
+ deepfabric/graph.py,sha256=UehVpNnLKThNceteKUVfY401V160q7LFYm8cyTdE2bw,25787
18
18
  deepfabric/hf_hub.py,sha256=hw2CWqZ3CzyAzMo552VPZKVWtuv-j0TQ2_gV5K0AUto,7670
19
19
  deepfabric/kaggle_hub.py,sha256=CXVO1Lv3IRhdO0bp9_IQr6nUs-v5jOWi5k4EwPkbJmw,7927
20
20
  deepfabric/loader.py,sha256=YNTGZZE-POjR0BIlx6WCT4bIzf0T4lW_fQl7ev9UFqE,18584
21
21
  deepfabric/metrics.py,sha256=txqmXDM_r6cWPjdnnEjoA5xJkCHxFrjKWTpihE_jimA,6129
22
22
  deepfabric/progress.py,sha256=3XQQrf2pUZlyd-8eRcNATH1v0Oi8JMedVHGbhPcca-8,9354
23
- deepfabric/prompts.py,sha256=C-zonGrzyE752oWY2zOr4ufmvVaxsxFx4E2wLuSNluI,15447
24
- deepfabric/schemas.py,sha256=N1cTvXuAyV8r8YS5DSAcFgpfxF0AqVGJbbOpeT5H72g,37881
23
+ deepfabric/prompts.py,sha256=pAFkw_0WAcYdzjqkHgywyLiSEdDlRdZwmyykUC307Gk,13872
24
+ deepfabric/schemas.py,sha256=ebSwaXCu3lwshFGEhOhuAAUQA3deaYWXUcoRaeslqLc,37880
25
25
  deepfabric/stream_simulator.py,sha256=GzvAxWxHVsuTwgXlqwXNfrTUDn6sND2kJOoQuYg88FA,3028
26
26
  deepfabric/topic_manager.py,sha256=6YxMO6dQHaGyxghsI8iNJGP1miaekBe5Mh1WdYeLqdI,11164
27
- deepfabric/topic_model.py,sha256=i_wYpw2kUl8NLodOSaqNu-C4_d6caYT1kPe_vkKjoyw,707
28
- deepfabric/tree.py,sha256=Kxl2iLHU55xPq2MwdoLM0-M2nZRx51bRj9FM36jqs-M,14933
29
- deepfabric/tui.py,sha256=9ETtGFQk26U9PQ2b5foplVYDKxaFGd-8UqK7uSKyHwE,50480
27
+ deepfabric/topic_model.py,sha256=3RupTQnEYfKSLASV79tfEhBXBrm5VT3nUcpl2EyK7Hk,1654
28
+ deepfabric/tree.py,sha256=0J5KuHJrVEfF4F6zM9o2-ppAEzolay_9fDloZCHDN6c,15791
29
+ deepfabric/tui.py,sha256=hUYK00czktdPT5W3GJQ5svObbDYMAAnou3u-YDOEC-o,52608
30
30
  deepfabric/update_checker.py,sha256=AUa9iUdkGNzu7tWkQRxIlF19YRmKLetwxu-Ys2ONS8Y,5145
31
- deepfabric/utils.py,sha256=a9G6VTw52UdddTFoMw-JjunjawtPN54N275-XGPL2cQ,5822
32
- deepfabric/validation.py,sha256=1x1X_45kyI0w_FCdUiNdvy4LQu3B0KVR-fyvLkrKEGw,5125
31
+ deepfabric/utils.py,sha256=nVDNQnNQlfua8mMpKC7gk9EB3bYXbNUk6Sq39k1mN7s,12507
32
+ deepfabric/validation.py,sha256=D6H_L_0mDtiI0s6DLXxkR1BickOw7HGM2al8u2xhKZk,4355
33
33
  deepfabric/evaluation/__init__.py,sha256=7xMLmYXaNC1U7qf88S9fMxWTABoDRiOcimSYfCt_PSo,1224
34
34
  deepfabric/evaluation/evaluator.py,sha256=qNowle5v2ukDJ11igNOCParlBfXT8QUeOvXx6sSJ_Ug,34480
35
35
  deepfabric/evaluation/inference.py,sha256=y7JA0IsBDwe0sJzVQeItYHAV5wUJn6Bjp1Wsp3r7qYQ,7644
36
36
  deepfabric/evaluation/metrics.py,sha256=ITNevYj7CBXzYs-rYhsihO6-rE9n30CYRaVUfdTbcFQ,12026
37
- deepfabric/evaluation/parser.py,sha256=AXyiCtNV4rueZQxLE_GqqkFNeDAewGoC--0vXHW-jW8,10603
37
+ deepfabric/evaluation/parser.py,sha256=KLRP5cI6y2wqtbX2kPnyEJiYHqMk5qDa1toqJrKetb4,10604
38
38
  deepfabric/evaluation/backends/__init__.py,sha256=GqC0FfpWmtgJmjHd0kVKNg7g-NjhRoh5h2MtAoOhUOY,827
39
39
  deepfabric/evaluation/backends/llm_eval_backend.py,sha256=4jp5tnTp7v_0pHCGhcPbI55ig79-eVxdzooesi2PymA,18827
40
40
  deepfabric/evaluation/backends/ollama_backend.py,sha256=mtPp1JtIDRjb76X_rTa1jS1ETzMjte8t3WJjuYV1oDQ,4372
@@ -69,8 +69,8 @@ deepfabric/training/api_key_prompt.py,sha256=pSIMX3eDGyV9x_r7MHE4TyIsIB2SqYb8gKC
69
69
  deepfabric/training/callback.py,sha256=5zdifbHA2PWILHl2cVFyO65aW7cGAQhcvDqm3s8_I0Q,13221
70
70
  deepfabric/training/dataset_utils.py,sha256=klx8DoawEwuMigBDP-RpMAfe7FvYxRbhj599MErxBr4,7313
71
71
  deepfabric/training/metrics_sender.py,sha256=ZCyvMv5hRu8XJnQYVGXJ9wh7HEMJ0l3Ktyi8_etOpZs,10833
72
- deepfabric-4.9.0.dist-info/METADATA,sha256=exOMACgFoZPhQ0xpu-x5iyo4J5-1EBiEHtu3aQ7pozI,20537
73
- deepfabric-4.9.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
74
- deepfabric-4.9.0.dist-info/entry_points.txt,sha256=zatevils13hfs8x29_vmUyivQ6rTtq7hE2RBusZw1Fo,50
75
- deepfabric-4.9.0.dist-info/licenses/LICENSE,sha256=-qRt8wmrhQ9aMf7KhmZXc2vrTETYZF-6_T1KCeUhvHY,11340
76
- deepfabric-4.9.0.dist-info/RECORD,,
72
+ deepfabric-4.10.0.dist-info/METADATA,sha256=bybLquu722z-9xb40wCLHe6QIliwAYSW7oHONs_D02Y,20498
73
+ deepfabric-4.10.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
74
+ deepfabric-4.10.0.dist-info/entry_points.txt,sha256=zatevils13hfs8x29_vmUyivQ6rTtq7hE2RBusZw1Fo,50
75
+ deepfabric-4.10.0.dist-info/licenses/LICENSE,sha256=-qRt8wmrhQ9aMf7KhmZXc2vrTETYZF-6_T1KCeUhvHY,11340
76
+ deepfabric-4.10.0.dist-info/RECORD,,