DeepFabric 4.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. deepfabric/__init__.py +70 -0
  2. deepfabric/__main__.py +6 -0
  3. deepfabric/auth.py +382 -0
  4. deepfabric/builders.py +303 -0
  5. deepfabric/builders_agent.py +1304 -0
  6. deepfabric/cli.py +1288 -0
  7. deepfabric/config.py +899 -0
  8. deepfabric/config_manager.py +251 -0
  9. deepfabric/constants.py +94 -0
  10. deepfabric/dataset_manager.py +534 -0
  11. deepfabric/error_codes.py +581 -0
  12. deepfabric/evaluation/__init__.py +47 -0
  13. deepfabric/evaluation/backends/__init__.py +32 -0
  14. deepfabric/evaluation/backends/ollama_backend.py +137 -0
  15. deepfabric/evaluation/backends/tool_call_parsers.py +409 -0
  16. deepfabric/evaluation/backends/transformers_backend.py +326 -0
  17. deepfabric/evaluation/evaluator.py +845 -0
  18. deepfabric/evaluation/evaluators/__init__.py +13 -0
  19. deepfabric/evaluation/evaluators/base.py +104 -0
  20. deepfabric/evaluation/evaluators/builtin/__init__.py +5 -0
  21. deepfabric/evaluation/evaluators/builtin/tool_calling.py +93 -0
  22. deepfabric/evaluation/evaluators/registry.py +66 -0
  23. deepfabric/evaluation/inference.py +155 -0
  24. deepfabric/evaluation/metrics.py +397 -0
  25. deepfabric/evaluation/parser.py +304 -0
  26. deepfabric/evaluation/reporters/__init__.py +13 -0
  27. deepfabric/evaluation/reporters/base.py +56 -0
  28. deepfabric/evaluation/reporters/cloud_reporter.py +195 -0
  29. deepfabric/evaluation/reporters/file_reporter.py +61 -0
  30. deepfabric/evaluation/reporters/multi_reporter.py +56 -0
  31. deepfabric/exceptions.py +67 -0
  32. deepfabric/factory.py +26 -0
  33. deepfabric/generator.py +1084 -0
  34. deepfabric/graph.py +545 -0
  35. deepfabric/hf_hub.py +214 -0
  36. deepfabric/kaggle_hub.py +219 -0
  37. deepfabric/llm/__init__.py +41 -0
  38. deepfabric/llm/api_key_verifier.py +534 -0
  39. deepfabric/llm/client.py +1206 -0
  40. deepfabric/llm/errors.py +105 -0
  41. deepfabric/llm/rate_limit_config.py +262 -0
  42. deepfabric/llm/rate_limit_detector.py +278 -0
  43. deepfabric/llm/retry_handler.py +270 -0
  44. deepfabric/metrics.py +212 -0
  45. deepfabric/progress.py +262 -0
  46. deepfabric/prompts.py +290 -0
  47. deepfabric/schemas.py +1000 -0
  48. deepfabric/spin/__init__.py +6 -0
  49. deepfabric/spin/client.py +263 -0
  50. deepfabric/spin/models.py +26 -0
  51. deepfabric/stream_simulator.py +90 -0
  52. deepfabric/tools/__init__.py +5 -0
  53. deepfabric/tools/defaults.py +85 -0
  54. deepfabric/tools/loader.py +87 -0
  55. deepfabric/tools/mcp_client.py +677 -0
  56. deepfabric/topic_manager.py +303 -0
  57. deepfabric/topic_model.py +20 -0
  58. deepfabric/training/__init__.py +35 -0
  59. deepfabric/training/api_key_prompt.py +302 -0
  60. deepfabric/training/callback.py +363 -0
  61. deepfabric/training/metrics_sender.py +301 -0
  62. deepfabric/tree.py +438 -0
  63. deepfabric/tui.py +1267 -0
  64. deepfabric/update_checker.py +166 -0
  65. deepfabric/utils.py +150 -0
  66. deepfabric/validation.py +143 -0
  67. deepfabric-4.4.0.dist-info/METADATA +702 -0
  68. deepfabric-4.4.0.dist-info/RECORD +71 -0
  69. deepfabric-4.4.0.dist-info/WHEEL +4 -0
  70. deepfabric-4.4.0.dist-info/entry_points.txt +2 -0
  71. deepfabric-4.4.0.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,166 @@
1
+ import importlib.metadata
2
+ import json
3
+ import logging
4
+ import os
5
+ import urllib.error
6
+ import urllib.request
7
+
8
+ from typing import TypedDict
9
+
10
+ from packaging.version import Version, parse
11
+
12
+ from .metrics import trace
13
+ from .tui import get_tui
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class PyPIPackageInfo(TypedDict, total=False):
19
+ """PyPI package info section."""
20
+
21
+ version: str
22
+
23
+
24
+ class PyPIResponse(TypedDict, total=False):
25
+ """PyPI JSON API response structure."""
26
+
27
+ info: PyPIPackageInfo
28
+
29
+
30
+ # PyPI API endpoint for deepfabric package
31
+ PYPI_API_URL = "https://pypi.org/pypi/deepfabric/json"
32
+
33
+ # Timeout for PyPI API request (2 seconds)
34
+ REQUEST_TIMEOUT = 2.0
35
+
36
+
37
+ def _get_current_version() -> str | None:
38
+ """
39
+ Get the current installed version of deepfabric.
40
+
41
+ Returns:
42
+ str | None: Version string or None if unable to determine
43
+ """
44
+ try:
45
+ return importlib.metadata.version("deepfabric")
46
+ except (ImportError, importlib.metadata.PackageNotFoundError):
47
+ logger.debug("Unable to determine current version")
48
+ return None
49
+
50
+
51
+ def _is_update_check_disabled() -> bool:
52
+ """
53
+ Check if update checking is disabled via environment variable.
54
+
55
+ Returns:
56
+ bool: True if DEEPFABRIC_NO_UPDATE_CHECK is set to any truthy value
57
+ """
58
+ env_value = os.environ.get("DEEPFABRIC_NO_UPDATE_CHECK", "").lower()
59
+ return env_value in ("1", "true", "yes", "on")
60
+
61
+
62
+ def _fetch_latest_version_from_pypi() -> str | None:
63
+ """
64
+ Fetch the latest version from PyPI API.
65
+
66
+ Returns:
67
+ str | None: Latest version string or None if fetch fails
68
+ """
69
+ try:
70
+ with urllib.request.urlopen( # noqa: S310 # nosec
71
+ PYPI_API_URL, timeout=REQUEST_TIMEOUT
72
+ ) as response:
73
+ data: PyPIResponse = json.loads(response.read().decode("utf-8"))
74
+ latest_version = data.get("info", {}).get("version")
75
+ if latest_version:
76
+ logger.debug("Fetched latest version from PyPI: %s", latest_version)
77
+ return latest_version
78
+ logger.debug("No version found in PyPI response")
79
+ return None
80
+ except TimeoutError:
81
+ logger.debug("PyPI request timed out after %s seconds", REQUEST_TIMEOUT)
82
+ return None
83
+ except urllib.error.URLError as e:
84
+ logger.debug("Failed to fetch from PyPI: %s", e)
85
+ return None
86
+ except (KeyError, ValueError, json.JSONDecodeError) as e:
87
+ logger.debug("Failed to parse PyPI response: %s", e)
88
+ return None
89
+
90
+
91
+ def _compare_versions(current: str, latest: str) -> bool:
92
+ """
93
+ Compare version strings to determine if an update is available.
94
+
95
+ Args:
96
+ current: Current version string
97
+ latest: Latest version string
98
+
99
+ Returns:
100
+ bool: True if latest > current, False otherwise
101
+ """
102
+ try:
103
+ current_version: Version = parse(current)
104
+ latest_version: Version = parse(latest)
105
+ except Exception as e:
106
+ logger.debug("Failed to compare versions: %s", e)
107
+ return False
108
+ else:
109
+ return latest_version > current_version
110
+
111
+
112
+ def check_for_updates() -> None:
113
+ """
114
+ Check for available updates and notify user if a newer version exists.
115
+
116
+ This function:
117
+ 1. Checks if update checking is disabled via environment variable
118
+ 2. Gets the current installed version
119
+ 3. Fetches the latest version from PyPI
120
+ 4. Compares versions and displays a warning if update is available
121
+ 5. Tracks metrics about the update check
122
+
123
+ The function is designed to fail silently and never block CLI execution.
124
+ All errors are logged at DEBUG level and do not interrupt the user.
125
+ """
126
+ # Check if update checking is disabled
127
+ if _is_update_check_disabled():
128
+ logger.debug("Update check disabled via DEEPFABRIC_NO_UPDATE_CHECK")
129
+ return
130
+
131
+ # Get current version
132
+ current_version = _get_current_version()
133
+ if not current_version or current_version == "development":
134
+ logger.debug("Skipping update check for development version")
135
+ return
136
+
137
+ # Fetch latest version from PyPI
138
+ latest_version = _fetch_latest_version_from_pypi()
139
+ if not latest_version:
140
+ logger.debug("Could not fetch latest version from PyPI")
141
+ return
142
+
143
+ # Track metrics about the check
144
+ try:
145
+ trace(
146
+ "update_check_performed",
147
+ {
148
+ "current_version": current_version,
149
+ "latest_version": latest_version,
150
+ "update_available": _compare_versions(current_version, latest_version),
151
+ },
152
+ )
153
+ except Exception as e:
154
+ logger.debug("Failed to track update check metrics: %s", e)
155
+
156
+ # Compare versions and notify user if update is available
157
+ if _compare_versions(current_version, latest_version):
158
+ try:
159
+ tui = get_tui()
160
+ tui.warning(
161
+ f"Update available: deepfabric {latest_version} "
162
+ f"(you have {current_version})\n"
163
+ f" Run: pip install --upgrade deepfabric"
164
+ )
165
+ except Exception as e:
166
+ logger.debug("Failed to display update notification: %s", e)
deepfabric/utils.py ADDED
@@ -0,0 +1,150 @@
1
+ import ast
2
+ import asyncio
3
+ import json
4
+ import re
5
+
6
+ VALIDATION_ERROR_INDICATORS = [
7
+ "validation error",
8
+ "value error",
9
+ "is null",
10
+ "is empty string",
11
+ "must provide actual value",
12
+ "invalid schema",
13
+ "pydantic",
14
+ "string should have at least",
15
+ "field required",
16
+ ]
17
+
18
+
19
+ def is_validation_error(error: Exception) -> bool:
20
+ """Check if an error is a validation/schema error that can be retried."""
21
+ error_str = str(error).lower()
22
+ return any(indicator in error_str for indicator in VALIDATION_ERROR_INDICATORS)
23
+
24
+
25
+ def ensure_not_running_loop(method_name: str) -> None:
26
+ """Raise when invoked inside an active asyncio event loop."""
27
+
28
+ try:
29
+ loop = asyncio.get_running_loop()
30
+ except RuntimeError:
31
+ return
32
+
33
+ if loop.is_running():
34
+ msg = (
35
+ f"{method_name} cannot be called while an event loop is running. "
36
+ "Use the async variant instead."
37
+ )
38
+ raise RuntimeError(msg)
39
+
40
+
41
+ def extract_list(input_string: str):
42
+ """
43
+ Extracts a Python list from a given input string.
44
+
45
+ This function attempts to parse the input string as JSON. If that fails,
46
+ it searches for the first Python list within the string by identifying
47
+ the opening and closing brackets. If a list is found, it is evaluated
48
+ safely to ensure it is a valid Python list.
49
+
50
+ Args:
51
+ input_string (str): The input string potentially containing a Python list.
52
+
53
+ Returns:
54
+ list: The extracted Python list if found and valid, otherwise an empty list.
55
+
56
+ Raises:
57
+ None: This function handles its own exceptions and does not raise any.
58
+ """
59
+ try:
60
+ return json.loads(input_string)
61
+ except json.JSONDecodeError:
62
+ print("Failed to parse the input string as JSON.")
63
+
64
+ start = input_string.find("[")
65
+ if start == -1:
66
+ print("No Python list found in the input string.")
67
+ return []
68
+
69
+ count = 0
70
+ for i, char in enumerate(input_string[start:]):
71
+ if char == "[":
72
+ count += 1
73
+ elif char == "]":
74
+ count -= 1
75
+ if count == 0:
76
+ end = i + start + 1
77
+ break
78
+ else:
79
+ print("No matching closing bracket found.")
80
+ return []
81
+
82
+ found_list_str = input_string[start:end]
83
+ found_list = safe_literal_eval(found_list_str)
84
+ if found_list is None:
85
+ print("Failed to parse the list due to syntax issues.")
86
+ return []
87
+
88
+ return found_list
89
+
90
+
91
+ def remove_linebreaks_and_spaces(input_string):
92
+ """
93
+ Remove line breaks and extra spaces from the input string.
94
+
95
+ This function replaces all whitespace characters (including line breaks)
96
+ with a single space and then ensures that there are no consecutive spaces
97
+ in the resulting string.
98
+
99
+ Args:
100
+ input_string (str): The string from which to remove line breaks and extra spaces.
101
+
102
+ Returns:
103
+ str: The processed string with line breaks and extra spaces removed.
104
+ """
105
+ no_linebreaks = re.sub(r"\s+", " ", input_string)
106
+ return " ".join(no_linebreaks.split())
107
+
108
+
109
+ def safe_literal_eval(list_string: str):
110
+ """
111
+ Safely evaluate a string containing a Python literal expression.
112
+
113
+ This function attempts to evaluate a string containing a Python literal
114
+ expression using `ast.literal_eval`. If a `SyntaxError` or `ValueError`
115
+ occurs, it tries to sanitize the string by replacing problematic apostrophes
116
+ with the actual right single quote character and attempts the evaluation again.
117
+
118
+ Args:
119
+ list_string (str): The string to be evaluated.
120
+
121
+ Returns:
122
+ The result of the evaluated string if successful, otherwise `None`.
123
+ """
124
+ try:
125
+ return ast.literal_eval(list_string)
126
+ except (SyntaxError, ValueError):
127
+ # Replace problematic apostrophes with the actual right single quote character
128
+ sanitized_string = re.sub(r"(\w)'(\w)", r"\1’\2", list_string)
129
+ try:
130
+ return ast.literal_eval(sanitized_string)
131
+ except (SyntaxError, ValueError):
132
+ print("Failed to parse the list due to syntax issues.")
133
+ return None
134
+
135
+
136
+ def read_topic_tree_from_jsonl(file_path: str) -> list[dict]:
137
+ """
138
+ Read the topic tree from a JSONL file.
139
+
140
+ Args:
141
+ file_path (str): The path to the JSONL file.
142
+
143
+ Returns:
144
+ list[dict]: The topic tree.
145
+ """
146
+ topic_tree = []
147
+ with open(file_path) as file:
148
+ for line in file:
149
+ topic_tree.append(json.loads(line.strip()))
150
+ return topic_tree
@@ -0,0 +1,143 @@
1
+ import time
2
+
3
+ from .exceptions import ConfigurationError
4
+ from .tui import get_tui
5
+
6
+
7
+ def calculate_expected_paths(mode: str, depth: int, degree: int) -> int:
8
+ """
9
+ Calculate expected number of paths for tree/graph generation.
10
+
11
+ Args:
12
+ mode: Generation mode ('tree' or 'graph')
13
+ depth: Depth of the tree/graph
14
+ degree: Branching factor
15
+
16
+ Returns:
17
+ Expected number of paths
18
+ """
19
+ if mode == "tree":
20
+ # Tree paths = degree^depth (exact - each leaf is a unique path)
21
+ return degree**depth
22
+ # mode == "graph"
23
+ # Graph paths vary widely due to cross-connections
24
+ # Can range from degree^depth * 0.5 to degree^depth * 2+
25
+ # Use base estimate as rough middle ground, but warn it's approximate
26
+ return degree**depth
27
+
28
+
29
+ def validate_path_requirements(
30
+ mode: str,
31
+ depth: int,
32
+ degree: int,
33
+ num_steps: int,
34
+ batch_size: int,
35
+ loading_existing: bool = False,
36
+ ) -> None:
37
+ """
38
+ Validate that the topic generation parameters will produce enough paths.
39
+
40
+ Args:
41
+ mode: Generation mode ('tree' or 'graph')
42
+ depth: Depth of the tree/graph
43
+ degree: Branching factor
44
+ num_steps: Number of generation steps
45
+ batch_size: Batch size for generation
46
+ loading_existing: Whether loading existing topic model from file
47
+
48
+ Raises:
49
+ ConfigurationError: If validation fails
50
+ """
51
+ if loading_existing:
52
+ # Can't validate existing files without loading them
53
+ return
54
+
55
+ expected_paths = calculate_expected_paths(mode, depth, degree)
56
+ required_samples = num_steps * batch_size
57
+
58
+ if required_samples > expected_paths:
59
+ # Alternative: provide exact combinations that use all paths
60
+ optimal_combinations = []
61
+ for test_steps in range(1, expected_paths + 1):
62
+ test_batch = expected_paths // test_steps
63
+ if test_steps * test_batch <= expected_paths and test_batch > 0:
64
+ optimal_combinations.append((test_steps, test_batch))
65
+
66
+ # Sort by preference (fewer steps first, then larger batches)
67
+ optimal_combinations.sort(key=lambda x: (x[0], -x[1]))
68
+
69
+ tui = get_tui()
70
+ tui.error(" Path validation failed - stopping before topic generation")
71
+
72
+ # Build recommendations - focus on optimal combinations rather than misleading individual params
73
+ recommendations = []
74
+
75
+ if optimal_combinations:
76
+ recommendations.append(
77
+ f" • Use one of these combinations to utilize the {expected_paths} paths:"
78
+ )
79
+ for steps, batch in optimal_combinations[:3]: # Show top 3
80
+ total_samples = steps * batch
81
+ recommendations.append(
82
+ f" --num-steps {steps} --batch-size {batch} (generates {total_samples} samples)"
83
+ )
84
+
85
+ recommendations.extend(
86
+ [
87
+ f" • Or increase --depth (currently {depth}) or --degree (currently {degree})",
88
+ ]
89
+ )
90
+
91
+ estimation_note = ""
92
+ if mode == "graph":
93
+ estimation_note = " (estimated - graphs vary due to cross-connections)"
94
+
95
+ error_msg = (
96
+ f"Insufficient expected paths for dataset generation:\n"
97
+ f" • Expected {mode} paths: ~{expected_paths}{estimation_note} (depth={depth}, degree={degree})\n"
98
+ f" • Requested samples: {required_samples} ({num_steps} steps × {batch_size} batch size)\n"
99
+ f" • Shortfall: ~{required_samples - expected_paths} samples\n\n"
100
+ f"Recommendations:\n" + "\n".join(recommendations)
101
+ )
102
+
103
+ if mode == "graph":
104
+ error_msg += f"\n\nNote: Graph path counts are estimates. The actual graph may produce {expected_paths // 2}-{expected_paths * 2} paths due to cross-connections."
105
+
106
+ raise ConfigurationError(error_msg)
107
+
108
+
109
+ def show_validation_success(
110
+ mode: str,
111
+ depth: int,
112
+ degree: int,
113
+ num_steps: int,
114
+ batch_size: int,
115
+ loading_existing: bool = False,
116
+ ) -> None:
117
+ """
118
+ Show validation success message.
119
+
120
+ Args:
121
+ mode: Generation mode ('tree' or 'graph')
122
+ depth: Depth of the tree/graph
123
+ degree: Branching factor
124
+ num_steps: Number of generation steps
125
+ batch_size: Batch size for generation
126
+ loading_existing: Whether loading existing topic model from file
127
+ """
128
+ if loading_existing:
129
+ return
130
+
131
+ expected_paths = calculate_expected_paths(mode, depth, degree)
132
+ total_samples = num_steps * batch_size
133
+
134
+ tui = get_tui()
135
+ tui.success("Path Validation Passed")
136
+ tui.info(f" Expected {mode} paths: ~{expected_paths} (depth={depth}, degree={degree})")
137
+ tui.info(f" Requested samples: {total_samples} ({num_steps} steps x {batch_size} batch size)")
138
+ tui.info(f" Path utilization: ~{min(100, (total_samples / expected_paths) * 100):.1f}%")
139
+
140
+ if mode == "graph":
141
+ tui.info(" Note: Graph paths may vary due to cross-connections")
142
+ print() # Extra space before topic generation
143
+ time.sleep(0.5) # Brief pause to allow user to see the information