PyPI - DeepFabric - Versions diffs - 4.8.3__py3-none-any.whl → 4.10.0__py3-none-any.whl - Mend

DeepFabric 4.8.3py3-none-any.whl → 4.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

deepfabric/builders.py +7 -21
deepfabric/builders_agent.py +0 -542
deepfabric/cli.py +505 -74
deepfabric/config.py +57 -73
deepfabric/config_manager.py +8 -6
deepfabric/constants.py +6 -0
deepfabric/dataset_manager.py +107 -11
deepfabric/evaluation/parser.py +7 -7
deepfabric/generator.py +656 -103
deepfabric/graph.py +46 -1
deepfabric/prompts.py +12 -49
deepfabric/schemas.py +4 -3
deepfabric/topic_model.py +32 -0
deepfabric/tree.py +23 -1
deepfabric/tui.py +66 -21
deepfabric/utils.py +184 -0
deepfabric/validation.py +47 -77
{deepfabric-4.8.3.dist-info → deepfabric-4.10.0.dist-info}/METADATA +6 -7
{deepfabric-4.8.3.dist-info → deepfabric-4.10.0.dist-info}/RECORD +22 -22
{deepfabric-4.8.3.dist-info → deepfabric-4.10.0.dist-info}/WHEEL +0 -0
{deepfabric-4.8.3.dist-info → deepfabric-4.10.0.dist-info}/entry_points.txt +0 -0
{deepfabric-4.8.3.dist-info → deepfabric-4.10.0.dist-info}/licenses/LICENSE +0 -0

deepfabric/validation.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import time
-from .exceptions import ConfigurationError
 from .tui import get_tui
@@ -27,91 +26,39 @@ def calculate_expected_paths(mode: str, depth: int, degree: int) -> int:
 def validate_path_requirements(
-    mode: str,
-    depth: int,
-    degree: int,
-    num_steps: int,
-    batch_size: int,
-    loading_existing: bool = False,
+    mode: str,  # noqa: ARG001 - kept for API compatibility
+    depth: int,  # noqa: ARG001 - kept for API compatibility
+    degree: int,  # noqa: ARG001 - kept for API compatibility
+    num_samples: int | str,  # noqa: ARG001 - kept for API compatibility
+    batch_size: int,  # noqa: ARG001 - kept for API compatibility
+    loading_existing: bool = False,  # noqa: ARG001 - kept for API compatibility
 ) -> None:
     """
-    Validate that the topic generation parameters will produce enough paths.
+    Validate topic generation parameters (informational only, no longer errors).
+    When num_samples exceeds available paths, topics will cycle for even coverage.
     Args:
         mode: Generation mode ('tree' or 'graph')
         depth: Depth of the tree/graph
         degree: Branching factor
-        num_steps: Number of generation steps
-        batch_size: Batch size for generation
+        num_samples: Total samples to generate, or "auto"/percentage string
+        batch_size: Batch size for generation (not used in validation, kept for API compat)
         loading_existing: Whether loading existing topic model from file
-    Raises:
-        ConfigurationError: If validation fails
     """
-    if loading_existing:
-        # Can't validate existing files without loading them
-        return
-    expected_paths = calculate_expected_paths(mode, depth, degree)
-    required_samples = num_steps * batch_size
-    if required_samples > expected_paths:
-        # Alternative: provide exact combinations that use all paths
-        optimal_combinations = []
-        for test_steps in range(1, expected_paths + 1):
-            test_batch = expected_paths // test_steps
-            if test_steps * test_batch <= expected_paths and test_batch > 0:
-                optimal_combinations.append((test_steps, test_batch))
-        # Sort by preference (fewer steps first, then larger batches)
-        optimal_combinations.sort(key=lambda x: (x[0], -x[1]))
-        tui = get_tui()
-        tui.error(" Path validation failed - stopping before topic generation")
-        # Build recommendations - focus on optimal combinations rather than misleading individual params
-        recommendations = []
-        if optimal_combinations:
-            recommendations.append(
-                f"  • Use one of these combinations to utilize the {expected_paths} paths:"
-            )
-            for steps, batch in optimal_combinations[:3]:  # Show top 3
-                total_samples = steps * batch
-                recommendations.append(
-                    f"    --num-samples {steps} --batch-size {batch}  (generates {total_samples} samples)"
-                )
-        recommendations.extend(
-            [
-                f"  • Or increase --depth (currently {depth}) or --degree (currently {degree})",
-            ]
-        )
-        estimation_note = ""
-        if mode == "graph":
-            estimation_note = " (estimated - graphs vary due to cross-connections)"
-        error_msg = (
-            f"Insufficient expected paths for dataset generation:\n"
-            f"  • Expected {mode} paths: ~{expected_paths}{estimation_note} (depth={depth}, degree={degree})\n"
-            f"  • Requested samples: {required_samples} ({num_steps} steps × {batch_size} batch size)\n"
-            f"  • Shortfall: ~{required_samples - expected_paths} samples\n\n"
-            f"Recommendations:\n" + "\n".join(recommendations)
-        )
-        if mode == "graph":
-            error_msg += f"\n\nNote: Graph path counts are estimates. The actual graph may produce {expected_paths // 2}-{expected_paths * 2} paths due to cross-connections."
-        raise ConfigurationError(error_msg)
+    # No validation needed - generator handles all cases:
+    # - num_samples < paths: random subset
+    # - num_samples == paths: all paths used once
+    # - num_samples > paths: topics cycle for even coverage
+    pass
 def show_validation_success(
     mode: str,
     depth: int,
     degree: int,
-    num_steps: int,
-    batch_size: int,
+    num_samples: int | str,
+    batch_size: int,  # noqa: ARG001 - kept for API compatibility
     loading_existing: bool = False,
 ) -> None:
     """
@@ -121,21 +68,44 @@ def show_validation_success(
         mode: Generation mode ('tree' or 'graph')
         depth: Depth of the tree/graph
         degree: Branching factor
-        num_steps: Number of generation steps
-        batch_size: Batch size for generation
+        num_samples: Total samples to generate, or "auto"/percentage string
+        batch_size: Batch size for generation (not used in display, kept for API compat)
         loading_existing: Whether loading existing topic model from file
     """
     if loading_existing:
         return
     expected_paths = calculate_expected_paths(mode, depth, degree)
-    total_samples = num_steps * batch_size
     tui = get_tui()
+    # Handle dynamic num_samples (auto or percentage)
+    if isinstance(num_samples, str):
+        tui.success("Path Validation Passed")
+        tui.info(f"  Expected {mode} paths: ~{expected_paths} (depth={depth}, degree={degree})")
+        if num_samples == "auto":
+            tui.info(f"  Requested samples: auto (will use all ~{expected_paths} paths)")
+        else:
+            # Percentage string like "50%"
+            pct = float(num_samples[:-1])
+            estimated_samples = max(1, int(expected_paths * pct / 100))
+            tui.info(
+                f"  Requested samples: {num_samples} (~{estimated_samples} of {expected_paths} paths)"
+            )
+        if mode == "graph":
+            tui.info("  Note: Graph paths may vary due to cross-connections")
+        print()  # Extra space before topic generation
+        time.sleep(0.5)  # Brief pause to allow user to see the information
+        return
     tui.success("Path Validation Passed")
     tui.info(f"  Expected {mode} paths: ~{expected_paths} (depth={depth}, degree={degree})")
-    tui.info(f"  Requested samples: {total_samples} ({num_steps} steps x {batch_size} batch size)")
-    tui.info(f"  Path utilization: ~{min(100, (total_samples / expected_paths) * 100):.1f}%")
+    tui.info(f"  Requested samples: {num_samples}")
+    if num_samples > expected_paths:
+        cycles = (num_samples + expected_paths - 1) // expected_paths  # ceil division
+        tui.info(f"  Topic cycling: ~{cycles}x passes through topics")
+    else:
+        tui.info(f"  Path utilization: ~{(num_samples / expected_paths) * 100:.1f}%")
     if mode == "graph":
         tui.info("  Note: Graph paths may vary due to cross-connections")

{deepfabric-4.8.3.dist-info → deepfabric-4.10.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: DeepFabric
-Version: 4.8.3
+Version: 4.10.0
 Summary: Curate High Quality Datasets, Train, Evaluate and Ship
 Author-email: DeepFabric Team <oss@alwaysfurther.ai>
 License-File: LICENSE
@@ -11,7 +11,7 @@ Requires-Dist: componentize-py>=0.19.3
 Requires-Dist: datasets<5.0,>=3.0
 Requires-Dist: google-api-core>=2.0.0
 Requires-Dist: google-genai>=1.56.0
-Requires-Dist: huggingface-hub==0.36.0
+Requires-Dist: huggingface-hub>=1.3.1
 Requires-Dist: kagglehub>=0.3.0
 Requires-Dist: mermaid-py>=0.8.0
 Requires-Dist: ollama>=0.6.1
@@ -25,7 +25,7 @@ Requires-Dist: pyyaml>=6.0.1
 Requires-Dist: rich>=13.0.0
 Requires-Dist: sentencepiece>=0.1.99
 Requires-Dist: spin-sdk>=3.4.1
-Requires-Dist: transformers>=4.57.1
+Requires-Dist: transformers==5.0.0rc3
 Provides-Extra: dev
 Requires-Dist: bandit>=1.7.10; extra == 'dev'
 Requires-Dist: mermaid-py>=0.2.0; extra == 'dev'
@@ -172,12 +172,12 @@ generation:
     Provide context on when and why to use specific patterns or libraries.
     Ensure code is modular, testable, and maintainable.
+  # Agent mode is implicit when tools are configured
   conversation:
     type: cot      # basic | cot
     reasoning_style: agent      # freetext | agent (for cot)
-    agent_mode: single_turn     # single_turn | multi_turn (for agent)
-  # Tool configuration (required for agent modes)
+  # Tool configuration (enables agent mode automatically)
   tools:
     spin_endpoint: "http://localhost:3000"  # Spin service for tool execution
     components:                 # Map component name to tool names
@@ -535,7 +535,6 @@ generation:
   conversation:
     type: cot
     reasoning_style: agent
-    agent_mode: single_turn
   tools:
     spin_endpoint: "http://localhost:3000"  # Spin service URL

{deepfabric-4.8.3.dist-info → deepfabric-4.10.0.dist-info}/RECORD RENAMED Viewed

@@ -1,40 +1,40 @@
 deepfabric/__init__.py,sha256=DA4DT1nmF7VjQ00IA_8J_kSlqGYrK2EvP5H4XPzuKoA,1690
 deepfabric/__main__.py,sha256=Ojx6VFnAWx4wY52VehsWhW85IaEmPb8FP_sGvOk628g,113
 deepfabric/auth.py,sha256=7UEvk84XPx7wiXj0QfhIH_MJ5BlI7wQHodwgd4LPO80,12977
-deepfabric/builders.py,sha256=XKlKsAhsed2_M_uHft-VB-n8T1ZhAbIo_7XXc2mE3Ug,11503
-deepfabric/builders_agent.py,sha256=7xaXVNzmW_vBDkxJNKR_9HveljrdljwDAFx3iu-L_-M,48468
-deepfabric/cli.py,sha256=gf7HoXlLn-8SZHHmpNWI7dwXvv_VCzu7wCQraOJkKsc,51236
+deepfabric/builders.py,sha256=2-tJh0_yzGqcBn8qRy3ocd5qIhBciwLsGPiqP1PK75w,10821
+deepfabric/builders_agent.py,sha256=youDxc8tWJF0DbA3Al-6zcswx68gcI7z9Gz7hPsDTJo,28326
+deepfabric/cli.py,sha256=KGCR7AGEAbshfiFNu-MnNGdgqIVBF-vI2piHQzkRrv0,68750
 deepfabric/cloud_upload.py,sha256=WYaISQY1XxorNdL7_F_FYwQUPHGJr2Bb_bohAa5xpbY,27801
-deepfabric/config.py,sha256=5M8BhvWXGmC0rcvSlY3TKwGI8TIrYHL6xjQ95ITGs8o,34780
-deepfabric/config_manager.py,sha256=CIOJV121tBpH_V_ljwTenvyFO31yoohPSjW0yrHCD-w,9041
-deepfabric/constants.py,sha256=MwADziDmnt0zi9t9gG65EM7AJvIQP0FSsXgGj7Yqxm8,2578
+deepfabric/config.py,sha256=LJptRUykRGreirIwyUxF8rcverf1BD9hOPiiYNxkoEI,33990
+deepfabric/config_manager.py,sha256=c0N4Rb0P970khoiKgpN1ULDYV0bZEYIbec45cSfg92c,9334
+deepfabric/constants.py,sha256=nUc2Bf6Fu_c5biUy1nE5jCP9t6z08_ihWzarRTE1nwg,2827
 deepfabric/dataset.py,sha256=bZfx35A-dt0kMflgskU9Ge-NLVesq8xNKHsrxTnNn6Q,9740
-deepfabric/dataset_manager.py,sha256=fJ6VFG05FLTpmbkLKlnVTTi7aim8q7eWI1cgOmKaP5s,20461
+deepfabric/dataset_manager.py,sha256=hf0KQtYpLi4-P-a1czKNhqgrKsqFG8zJjfubbSBOuAo,24652
 deepfabric/error_codes.py,sha256=HGGWsahUTI8UG996C74X-XgNuaPX8RHo4gOidlaJql4,17630
 deepfabric/exceptions.py,sha256=pEg4YFQaDEWtBoJaSkxsJJoBBp2-6EE3M7m5H7R6i_8,1586
 deepfabric/factory.py,sha256=OCqo3w-eiYNWvK_I_egDZuWj192kf18yD3SPj8rrPxU,753
-deepfabric/generator.py,sha256=wdGxuKQOMGY8oEpa-YvXX2ceCnzRApDgzweLHgwtjlw,44226
-deepfabric/graph.py,sha256=JQ68GXnLymtR7ESfeZgdMh3YrReSPX5wEWEqXlkIR4Q,24175
+deepfabric/generator.py,sha256=sirx7qukTl_SM9RGYT7xPDP65L8BlNzZb7ykbvO4Sg8,68994
+deepfabric/graph.py,sha256=UehVpNnLKThNceteKUVfY401V160q7LFYm8cyTdE2bw,25787
 deepfabric/hf_hub.py,sha256=hw2CWqZ3CzyAzMo552VPZKVWtuv-j0TQ2_gV5K0AUto,7670
 deepfabric/kaggle_hub.py,sha256=CXVO1Lv3IRhdO0bp9_IQr6nUs-v5jOWi5k4EwPkbJmw,7927
 deepfabric/loader.py,sha256=YNTGZZE-POjR0BIlx6WCT4bIzf0T4lW_fQl7ev9UFqE,18584
 deepfabric/metrics.py,sha256=txqmXDM_r6cWPjdnnEjoA5xJkCHxFrjKWTpihE_jimA,6129
 deepfabric/progress.py,sha256=3XQQrf2pUZlyd-8eRcNATH1v0Oi8JMedVHGbhPcca-8,9354
-deepfabric/prompts.py,sha256=XKFaoiT9G_t7z3VhWNr1xsWx78I-2kzq6wErc6DW1eI,15397
-deepfabric/schemas.py,sha256=N1cTvXuAyV8r8YS5DSAcFgpfxF0AqVGJbbOpeT5H72g,37881
+deepfabric/prompts.py,sha256=pAFkw_0WAcYdzjqkHgywyLiSEdDlRdZwmyykUC307Gk,13872
+deepfabric/schemas.py,sha256=ebSwaXCu3lwshFGEhOhuAAUQA3deaYWXUcoRaeslqLc,37880
 deepfabric/stream_simulator.py,sha256=GzvAxWxHVsuTwgXlqwXNfrTUDn6sND2kJOoQuYg88FA,3028
 deepfabric/topic_manager.py,sha256=6YxMO6dQHaGyxghsI8iNJGP1miaekBe5Mh1WdYeLqdI,11164
-deepfabric/topic_model.py,sha256=i_wYpw2kUl8NLodOSaqNu-C4_d6caYT1kPe_vkKjoyw,707
-deepfabric/tree.py,sha256=Kxl2iLHU55xPq2MwdoLM0-M2nZRx51bRj9FM36jqs-M,14933
-deepfabric/tui.py,sha256=9ETtGFQk26U9PQ2b5foplVYDKxaFGd-8UqK7uSKyHwE,50480
+deepfabric/topic_model.py,sha256=3RupTQnEYfKSLASV79tfEhBXBrm5VT3nUcpl2EyK7Hk,1654
+deepfabric/tree.py,sha256=0J5KuHJrVEfF4F6zM9o2-ppAEzolay_9fDloZCHDN6c,15791
+deepfabric/tui.py,sha256=hUYK00czktdPT5W3GJQ5svObbDYMAAnou3u-YDOEC-o,52608
 deepfabric/update_checker.py,sha256=AUa9iUdkGNzu7tWkQRxIlF19YRmKLetwxu-Ys2ONS8Y,5145
-deepfabric/utils.py,sha256=a9G6VTw52UdddTFoMw-JjunjawtPN54N275-XGPL2cQ,5822
-deepfabric/validation.py,sha256=1x1X_45kyI0w_FCdUiNdvy4LQu3B0KVR-fyvLkrKEGw,5125
+deepfabric/utils.py,sha256=nVDNQnNQlfua8mMpKC7gk9EB3bYXbNUk6Sq39k1mN7s,12507
+deepfabric/validation.py,sha256=D6H_L_0mDtiI0s6DLXxkR1BickOw7HGM2al8u2xhKZk,4355
 deepfabric/evaluation/__init__.py,sha256=7xMLmYXaNC1U7qf88S9fMxWTABoDRiOcimSYfCt_PSo,1224
 deepfabric/evaluation/evaluator.py,sha256=qNowle5v2ukDJ11igNOCParlBfXT8QUeOvXx6sSJ_Ug,34480
 deepfabric/evaluation/inference.py,sha256=y7JA0IsBDwe0sJzVQeItYHAV5wUJn6Bjp1Wsp3r7qYQ,7644
 deepfabric/evaluation/metrics.py,sha256=ITNevYj7CBXzYs-rYhsihO6-rE9n30CYRaVUfdTbcFQ,12026
-deepfabric/evaluation/parser.py,sha256=AXyiCtNV4rueZQxLE_GqqkFNeDAewGoC--0vXHW-jW8,10603
+deepfabric/evaluation/parser.py,sha256=KLRP5cI6y2wqtbX2kPnyEJiYHqMk5qDa1toqJrKetb4,10604
 deepfabric/evaluation/backends/__init__.py,sha256=GqC0FfpWmtgJmjHd0kVKNg7g-NjhRoh5h2MtAoOhUOY,827
 deepfabric/evaluation/backends/llm_eval_backend.py,sha256=4jp5tnTp7v_0pHCGhcPbI55ig79-eVxdzooesi2PymA,18827
 deepfabric/evaluation/backends/ollama_backend.py,sha256=mtPp1JtIDRjb76X_rTa1jS1ETzMjte8t3WJjuYV1oDQ,4372
@@ -69,8 +69,8 @@ deepfabric/training/api_key_prompt.py,sha256=pSIMX3eDGyV9x_r7MHE4TyIsIB2SqYb8gKC
 deepfabric/training/callback.py,sha256=5zdifbHA2PWILHl2cVFyO65aW7cGAQhcvDqm3s8_I0Q,13221
 deepfabric/training/dataset_utils.py,sha256=klx8DoawEwuMigBDP-RpMAfe7FvYxRbhj599MErxBr4,7313
 deepfabric/training/metrics_sender.py,sha256=ZCyvMv5hRu8XJnQYVGXJ9wh7HEMJ0l3Ktyi8_etOpZs,10833
-deepfabric-4.8.3.dist-info/METADATA,sha256=ppBY0UdQd2bybvZF0HcXivMwFnHGEn_Nk6kCsDFBR6c,20536
-deepfabric-4.8.3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-deepfabric-4.8.3.dist-info/entry_points.txt,sha256=zatevils13hfs8x29_vmUyivQ6rTtq7hE2RBusZw1Fo,50
-deepfabric-4.8.3.dist-info/licenses/LICENSE,sha256=-qRt8wmrhQ9aMf7KhmZXc2vrTETYZF-6_T1KCeUhvHY,11340
-deepfabric-4.8.3.dist-info/RECORD,,
+deepfabric-4.10.0.dist-info/METADATA,sha256=bybLquu722z-9xb40wCLHe6QIliwAYSW7oHONs_D02Y,20498
+deepfabric-4.10.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+deepfabric-4.10.0.dist-info/entry_points.txt,sha256=zatevils13hfs8x29_vmUyivQ6rTtq7hE2RBusZw1Fo,50
+deepfabric-4.10.0.dist-info/licenses/LICENSE,sha256=-qRt8wmrhQ9aMf7KhmZXc2vrTETYZF-6_T1KCeUhvHY,11340
+deepfabric-4.10.0.dist-info/RECORD,,

{deepfabric-4.8.3.dist-info → deepfabric-4.10.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{deepfabric-4.8.3.dist-info → deepfabric-4.10.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{deepfabric-4.8.3.dist-info → deepfabric-4.10.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

DeepFabric 4.8.3__py3-none-any.whl → 4.10.0__py3-none-any.whl

DeepFabric 4.8.3py3-none-any.whl → 4.10.0py3-none-any.whl