diffai-python 0.3.4__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
diffai/__init__.py ADDED
@@ -0,0 +1,303 @@
1
+ """
2
+ diffai - AI/ML specialized diff tool for deep tensor comparison and analysis
3
+
4
+ This package provides a Python wrapper around the diffai Rust binary,
5
+ following the same pattern as ruff for optimal performance and reliability.
6
+ """
7
+
8
+ # No backward compatibility imports
9
+
10
+ import json
11
+ import subprocess
12
+ import sys
13
+ import shutil
14
+ from pathlib import Path
15
+ from typing import Any, Dict, List, Optional, Union
16
+ from dataclasses import dataclass
17
+ from enum import Enum
18
+
19
+ # Version is now managed dynamically from pyproject.toml
20
+ # This prevents hardcoded version mismatches during releases
21
+ try:
22
+ from importlib.metadata import version
23
+ __version__ = version("diffai-python")
24
+ except ImportError:
25
+ # Fallback for Python < 3.8
26
+ try:
27
+ import pkg_resources
28
+ __version__ = pkg_resources.get_distribution("diffai-python").version
29
+ except Exception:
30
+ __version__ = "unknown"
31
+
32
+ class OutputFormat(Enum):
33
+ """Supported output formats for diffai results."""
34
+ CLI = "cli"
35
+ JSON = "json"
36
+ YAML = "yaml"
37
+
38
+ @dataclass
39
+ class DiffOptions:
40
+ """Configuration options for diffai analysis."""
41
+
42
+ # Basic options
43
+ input_format: Optional[str] = None
44
+ output_format: Optional[OutputFormat] = None
45
+ recursive: bool = False
46
+ verbose: bool = False
47
+ path: Optional[str] = None
48
+ ignore_keys_regex: Optional[str] = None
49
+ epsilon: Optional[float] = None
50
+ array_id_key: Optional[str] = None
51
+
52
+ # ML analysis options
53
+ show_layer_impact: bool = False
54
+ quantization_analysis: bool = False
55
+ sort_by_change_magnitude: bool = False
56
+ stats: bool = False
57
+ learning_progress: bool = False
58
+ convergence_analysis: bool = False
59
+ anomaly_detection: bool = False
60
+ gradient_analysis: bool = False
61
+ memory_analysis: bool = False
62
+ inference_speed_estimate: bool = False
63
+ regression_test: bool = False
64
+ alert_on_degradation: bool = False
65
+ review_friendly: bool = False
66
+ change_summary: bool = False
67
+ deployment_readiness: bool = False
68
+ architecture_comparison: bool = False
69
+ param_efficiency_analysis: bool = False
70
+ hyperparameter_impact: bool = False
71
+ learning_rate_analysis: bool = False
72
+ performance_impact_estimate: bool = False
73
+ generate_report: bool = False
74
+ markdown_output: bool = False
75
+ include_charts: bool = False
76
+ embedding_analysis: bool = False
77
+ similarity_matrix: bool = False
78
+ clustering_change: bool = False
79
+ attention_analysis: bool = False
80
+ head_importance: bool = False
81
+ attention_pattern_diff: bool = False
82
+ hyperparameter_comparison: bool = False
83
+ learning_curve_analysis: bool = False
84
+ statistical_significance: bool = False
85
+
86
+ def to_args(self) -> List[str]:
87
+ """Convert options to command line arguments."""
88
+ args = []
89
+
90
+ # Basic options
91
+ if self.input_format:
92
+ args.extend(["--format", self.input_format])
93
+ if self.output_format:
94
+ args.extend(["--output", self.output_format.value])
95
+ if self.recursive:
96
+ args.append("--recursive")
97
+ if self.verbose:
98
+ args.append("--verbose")
99
+ if self.path:
100
+ args.extend(["--path", self.path])
101
+ if self.ignore_keys_regex:
102
+ args.extend(["--ignore-keys-regex", self.ignore_keys_regex])
103
+ if self.epsilon is not None:
104
+ args.extend(["--epsilon", str(self.epsilon)])
105
+ if self.array_id_key:
106
+ args.extend(["--array-id-key", self.array_id_key])
107
+
108
+ # ML analysis options
109
+ if self.show_layer_impact:
110
+ args.append("--show-layer-impact")
111
+ if self.quantization_analysis:
112
+ args.append("--quantization-analysis")
113
+ if self.sort_by_change_magnitude:
114
+ args.append("--sort-by-change-magnitude")
115
+ if self.stats:
116
+ args.append("--stats")
117
+ if self.learning_progress:
118
+ args.append("--learning-progress")
119
+ if self.convergence_analysis:
120
+ args.append("--convergence-analysis")
121
+ if self.anomaly_detection:
122
+ args.append("--anomaly-detection")
123
+ if self.gradient_analysis:
124
+ args.append("--gradient-analysis")
125
+ if self.memory_analysis:
126
+ args.append("--memory-analysis")
127
+ if self.inference_speed_estimate:
128
+ args.append("--inference-speed-estimate")
129
+ if self.regression_test:
130
+ args.append("--regression-test")
131
+ if self.alert_on_degradation:
132
+ args.append("--alert-on-degradation")
133
+ if self.review_friendly:
134
+ args.append("--review-friendly")
135
+ if self.change_summary:
136
+ args.append("--change-summary")
137
+ if self.deployment_readiness:
138
+ args.append("--deployment-readiness")
139
+ if self.architecture_comparison:
140
+ args.append("--architecture-comparison")
141
+ if self.param_efficiency_analysis:
142
+ args.append("--param-efficiency-analysis")
143
+ if self.hyperparameter_impact:
144
+ args.append("--hyperparameter-impact")
145
+ if self.learning_rate_analysis:
146
+ args.append("--learning-rate-analysis")
147
+ if self.performance_impact_estimate:
148
+ args.append("--performance-impact-estimate")
149
+ if self.generate_report:
150
+ args.append("--generate-report")
151
+ if self.markdown_output:
152
+ args.append("--markdown-output")
153
+ if self.include_charts:
154
+ args.append("--include-charts")
155
+ if self.embedding_analysis:
156
+ args.append("--embedding-analysis")
157
+ if self.similarity_matrix:
158
+ args.append("--similarity-matrix")
159
+ if self.clustering_change:
160
+ args.append("--clustering-change")
161
+ if self.attention_analysis:
162
+ args.append("--attention-analysis")
163
+ if self.head_importance:
164
+ args.append("--head-importance")
165
+ if self.attention_pattern_diff:
166
+ args.append("--attention-pattern-diff")
167
+ if self.hyperparameter_comparison:
168
+ args.append("--hyperparameter-comparison")
169
+ if self.learning_curve_analysis:
170
+ args.append("--learning-curve-analysis")
171
+ if self.statistical_significance:
172
+ args.append("--statistical-significance")
173
+
174
+ return args
175
+
176
+ class DiffaiError(Exception):
177
+ """Base exception for diffai-related errors."""
178
+ pass
179
+
180
+ class DiffResult:
181
+ """Result from diffai analysis."""
182
+
183
+ def __init__(self, raw_output: str, format_type: str = "cli", return_code: int = 0):
184
+ self.raw_output = raw_output
185
+ self.format_type = format_type
186
+ self.return_code = return_code
187
+ self._parsed_data = None
188
+
189
+ @property
190
+ def data(self) -> Any:
191
+ """Get parsed data (JSON objects for JSON output, raw string otherwise)."""
192
+ if self._parsed_data is None:
193
+ if self.format_type == "json" and self.raw_output.strip():
194
+ try:
195
+ self._parsed_data = json.loads(self.raw_output)
196
+ except json.JSONDecodeError:
197
+ self._parsed_data = self.raw_output
198
+ else:
199
+ self._parsed_data = self.raw_output
200
+ return self._parsed_data
201
+
202
+ @property
203
+ def is_json(self) -> bool:
204
+ """True if result is in JSON format."""
205
+ return self.format_type == "json" and isinstance(self.data, (dict, list))
206
+
207
+ def __str__(self) -> str:
208
+ return self.raw_output
209
+
210
+ def _find_diffai_binary() -> str:
211
+ """Find the diffai binary, checking bundled location first."""
212
+ # Check if bundled with package
213
+ package_dir = Path(__file__).parent.parent.parent
214
+ bundled_binary = package_dir / "diffai"
215
+
216
+ if bundled_binary.exists() and bundled_binary.is_file():
217
+ return str(bundled_binary)
218
+
219
+ # Fallback to system PATH
220
+ system_binary = shutil.which("diffai")
221
+ if system_binary:
222
+ return system_binary
223
+
224
+ raise DiffaiError(
225
+ "diffai binary not found. Please ensure diffai is installed or available in PATH."
226
+ )
227
+
228
+ def diff(
229
+ input1: str,
230
+ input2: str,
231
+ options: Optional[Union[DiffOptions, Dict[str, Any]]] = None,
232
+ **kwargs
233
+ ) -> DiffResult:
234
+ """
235
+ Compare two files using diffai.
236
+
237
+ Args:
238
+ input1: Path to first input file
239
+ input2: Path to second input file
240
+ options: DiffOptions object or dict of options
241
+ **kwargs: Additional options as keyword arguments
242
+
243
+ Returns:
244
+ DiffResult object containing comparison results
245
+ """
246
+ # Handle different option formats
247
+ if options is None:
248
+ options = DiffOptions(**kwargs)
249
+ elif isinstance(options, dict):
250
+ combined_options = {**options, **kwargs}
251
+ options = DiffOptions(**combined_options)
252
+ elif kwargs:
253
+ # Merge kwargs into existing DiffOptions
254
+ option_dict = {
255
+ field.name: getattr(options, field.name)
256
+ for field in options.__dataclass_fields__.values()
257
+ }
258
+ combined_options = {**option_dict, **kwargs}
259
+ options = DiffOptions(**combined_options)
260
+
261
+ try:
262
+ binary_path = _find_diffai_binary()
263
+ cmd = [binary_path] + options.to_args() + [input1, input2]
264
+
265
+ result = subprocess.run(
266
+ cmd,
267
+ capture_output=True,
268
+ text=True,
269
+ check=False
270
+ )
271
+
272
+ if result.returncode != 0 and result.stderr:
273
+ raise DiffaiError(f"diffai failed: {result.stderr}")
274
+
275
+ format_type = options.output_format.value if options.output_format else "cli"
276
+ return DiffResult(result.stdout, format_type, result.returncode)
277
+
278
+ except FileNotFoundError:
279
+ raise DiffaiError("diffai binary not found")
280
+ except Exception as e:
281
+ raise DiffaiError(f"Diff failed: {e}")
282
+
283
+ def main():
284
+ """CLI entry point for the diffai command."""
285
+ try:
286
+ binary_path = _find_diffai_binary()
287
+ # Forward all arguments to the binary
288
+ result = subprocess.run([binary_path] + sys.argv[1:])
289
+ sys.exit(result.returncode)
290
+ except DiffaiError as e:
291
+ print(f"Error: {e}", file=sys.stderr)
292
+ sys.exit(1)
293
+
294
+ # Export main API
295
+ __all__ = [
296
+ "diff",
297
+ "DiffOptions",
298
+ "DiffResult",
299
+ "OutputFormat",
300
+ "DiffaiError",
301
+ "__version__",
302
+ "main",
303
+ ]
diffai/__main__.py ADDED
@@ -0,0 +1,8 @@
1
+ """
2
+ Entry point for python -m diffai
3
+ """
4
+
5
+ from . import main
6
+
7
+ if __name__ == "__main__":
8
+ main()
diffai/installer.py ADDED
@@ -0,0 +1,206 @@
1
+ """
2
+ Binary installer for diffai Python package.
3
+
4
+ This module handles downloading and installing the diffai binary
5
+ when the package is installed via pip.
6
+
7
+ Supports platforms: Windows, Linux, Darwin (macOS) with x86_64 and aarch64 architectures.
8
+ """
9
+
10
+ import os
11
+ import platform
12
+ import shutil
13
+ import subprocess
14
+ import sys
15
+ import tarfile
16
+ import tempfile
17
+ import urllib.request
18
+ import zipfile
19
+ from pathlib import Path
20
+
21
+ # Package version constant for GitHub releases
22
+ PACKAGE_VERSION = "0.3.4"
23
+
24
+
25
+ def get_platform_info():
26
+ """Get platform-specific information for binary download.
27
+
28
+ Supported platforms: Windows, Linux, Darwin
29
+ Supported architectures: x86_64, aarch64
30
+ """
31
+ system = platform.system().lower()
32
+ machine = platform.machine().lower()
33
+
34
+ if system == "linux": # Linux systems
35
+ if machine in ("x86_64", "amd64"):
36
+ return "linux-x86_64", "diffai"
37
+ else:
38
+ raise RuntimeError(f"Unsupported Linux architecture: {machine}")
39
+ elif system == "darwin": # Darwin (macOS) systems
40
+ if machine == "arm64":
41
+ return "macos-aarch64", "diffai"
42
+ elif machine in ("x86_64", "amd64"):
43
+ return "macos-x86_64", "diffai"
44
+ else:
45
+ raise RuntimeError(f"Unsupported Darwin architecture: {machine}")
46
+ elif system == "windows": # Windows systems
47
+ if machine in ("x86_64", "amd64"):
48
+ return "windows-x86_64", "diffai.exe"
49
+ else:
50
+ raise RuntimeError(f"Unsupported Windows architecture: {machine}")
51
+ else:
52
+ raise RuntimeError(f"Unsupported operating system: {system}")
53
+
54
+
55
+ def download_file(url, dest_path):
56
+ """Download a file from URL to destination path."""
57
+ try:
58
+ with urllib.request.urlopen(url) as response:
59
+ with open(dest_path, 'wb') as f:
60
+ f.write(response.read())
61
+ except Exception as e:
62
+ raise RuntimeError(f"Failed to download file: {e}")
63
+
64
+
65
+ def extract_archive(archive_path, extract_to):
66
+ """Extract archive file to destination directory."""
67
+ try:
68
+ if archive_path.endswith('.zip'):
69
+ with zipfile.ZipFile(archive_path, 'r') as zip_ref:
70
+ zip_ref.extractall(extract_to)
71
+ elif archive_path.endswith(('.tar.gz', '.tgz')):
72
+ with tarfile.open(archive_path, 'r:gz') as tar_ref:
73
+ tar_ref.extractall(extract_to)
74
+ else:
75
+ raise RuntimeError(f"Unsupported archive format: {archive_path}")
76
+ except Exception as e:
77
+ raise RuntimeError(f"Failed to extract archive: {e}")
78
+
79
+
80
+ def verify_binary(binary_path):
81
+ """Verify that the downloaded binary works correctly."""
82
+ try:
83
+ # Test binary with --version flag
84
+ result = subprocess.run(
85
+ [binary_path, "--version"],
86
+ capture_output=True,
87
+ text=True,
88
+ timeout=10
89
+ )
90
+
91
+ if result.returncode == 0:
92
+ return True
93
+ else:
94
+ raise RuntimeError(f"Binary verification failed: {result.stderr}")
95
+
96
+ except subprocess.TimeoutExpired:
97
+ raise RuntimeError("Binary verification timed out")
98
+ except Exception as e:
99
+ raise RuntimeError(f"Failed to verify binary: {e}")
100
+
101
+
102
+ def get_latest_release_info():
103
+ """Get information about the latest GitHub release from github.com."""
104
+ try:
105
+ import json
106
+
107
+ # GitHub API URL for latest release
108
+ url = "https://api.github.com/repos/kako-jun/diffai/releases/latest"
109
+ with urllib.request.urlopen(url) as response:
110
+ data = json.loads(response.read().decode())
111
+
112
+ return data["tag_name"], data["assets"]
113
+ except Exception as e:
114
+ raise RuntimeError(f"Failed to get release information from github.com: {e}")
115
+
116
+
117
+ def download_binary(version=None):
118
+ """Download the diffai binary for the current platform."""
119
+ platform_name, binary_name = get_platform_info()
120
+
121
+ if version is None:
122
+ version, assets = get_latest_release_info()
123
+ else:
124
+ # For specific version, construct asset URL manually
125
+ assets = None
126
+
127
+ # Construct download URL
128
+ if assets:
129
+ # Find the correct asset
130
+ asset_name = f"diffai-{platform_name}.tar.gz"
131
+ if platform_name.startswith("windows"):
132
+ asset_name = f"diffai-{platform_name}.zip"
133
+
134
+ asset_url = None
135
+ for asset in assets:
136
+ if asset["name"] == asset_name:
137
+ asset_url = asset["browser_download_url"]
138
+ break
139
+
140
+ if not asset_url:
141
+ raise RuntimeError(f"Binary not found for platform: {platform_name}")
142
+ else:
143
+ # Fallback URL construction
144
+ base_url = "https://github.com/kako-jun/diffai/releases/download"
145
+ if platform_name.startswith("windows"):
146
+ asset_name = f"diffai-{platform_name}.zip"
147
+ else:
148
+ asset_name = f"diffai-{platform_name}.tar.gz"
149
+ asset_url = f"{base_url}/{version}/{asset_name}"
150
+
151
+ # Download to temporary location
152
+ with tempfile.TemporaryDirectory() as temp_dir:
153
+ temp_path = Path(temp_dir)
154
+ archive_path = temp_path / asset_name
155
+
156
+ print(f"Downloading {asset_url}...")
157
+ urllib.request.urlretrieve(asset_url, archive_path)
158
+
159
+ # Extract archive
160
+ if asset_name.endswith(".tar.gz"):
161
+ subprocess.run(["tar", "-xzf", archive_path, "-C", temp_path], check=True)
162
+ elif asset_name.endswith(".zip"):
163
+ import zipfile
164
+ with zipfile.ZipFile(archive_path, 'r') as zip_ref:
165
+ zip_ref.extractall(temp_path)
166
+
167
+ # Find the binary
168
+ binary_path = temp_path / binary_name
169
+ if not binary_path.exists():
170
+ raise RuntimeError(f"Binary not found in archive: {binary_name}")
171
+
172
+ return binary_path
173
+
174
+
175
+ def install_binary():
176
+ """Install the diffai binary to the package directory."""
177
+ try:
178
+ # Get package directory
179
+ package_dir = Path(__file__).parent.parent.parent
180
+ binary_dir = package_dir / "bin"
181
+ binary_dir.mkdir(exist_ok=True)
182
+
183
+ # Download binary
184
+ temp_binary = download_binary()
185
+
186
+ # Copy to package directory
187
+ platform_name, binary_name = get_platform_info()
188
+ target_path = binary_dir / binary_name
189
+
190
+ shutil.copy2(temp_binary, target_path)
191
+
192
+ # Make executable on Unix-like systems
193
+ if not platform_name.startswith("windows"):
194
+ os.chmod(target_path, 0o755)
195
+
196
+ print(f"Successfully installed diffai binary to {target_path}")
197
+ return target_path
198
+
199
+ except Exception as e:
200
+ print(f"Warning: Failed to install diffai binary: {e}")
201
+ print("The Python API will still work if diffai is available in PATH")
202
+ return None
203
+
204
+
205
+ if __name__ == "__main__":
206
+ install_binary()
@@ -0,0 +1,385 @@
1
+ Metadata-Version: 2.4
2
+ Name: diffai-python
3
+ Version: 0.3.4
4
+ Classifier: Development Status :: 4 - Beta
5
+ Classifier: Environment :: Console
6
+ Classifier: Intended Audience :: Developers
7
+ Classifier: Intended Audience :: Science/Research
8
+ Classifier: License :: OSI Approved :: MIT License
9
+ Classifier: Operating System :: OS Independent
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.8
12
+ Classifier: Programming Language :: Python :: 3.9
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
17
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
18
+ Classifier: Topic :: Text Processing :: General
19
+ Classifier: Topic :: Utilities
20
+ Requires-Dist: pytest>=6.0 ; extra == 'dev'
21
+ Requires-Dist: pytest-cov ; extra == 'dev'
22
+ Requires-Dist: black ; extra == 'dev'
23
+ Requires-Dist: isort ; extra == 'dev'
24
+ Requires-Dist: mypy ; extra == 'dev'
25
+ Requires-Dist: ruff ; extra == 'dev'
26
+ Provides-Extra: dev
27
+ Summary: AI/ML specialized diff tool for deep tensor comparison and analysis
28
+ Keywords: ai,ml,machine-learning,diff,tensor,pytorch,safetensors,numpy,matlab,model-comparison,diffai,artificial-intelligence
29
+ Author: kako-jun
30
+ License: MIT
31
+ Requires-Python: >=3.8
32
+ Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
33
+ Project-URL: Homepage, https://github.com/kako-jun/diffai
34
+ Project-URL: Documentation, https://github.com/kako-jun/diffai/blob/main/docs/
35
+ Project-URL: Repository, https://github.com/kako-jun/diffai.git
36
+ Project-URL: Issues, https://github.com/kako-jun/diffai/issues
37
+ Project-URL: Changelog, https://github.com/kako-jun/diffai/blob/main/CHANGELOG.md
38
+
39
+ # diffai-python
40
+
41
+ AI/ML specialized diff tool for deep tensor comparison and analysis - Python Package
42
+
43
+ [![PyPI version](https://badge.fury.io/py/diffai-python.svg)](https://badge.fury.io/py/diffai-python)
44
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
45
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
46
+
47
+ ## Overview
48
+
49
+ **diffai-python** provides Python bindings for [diffai](https://github.com/kako-jun/diffai), an AI/ML specialized diff tool. This package bundles the high-performance Rust binary and provides a clean Python API for integration into ML workflows, notebooks, and automation scripts.
50
+
51
+ Following the same distribution pattern as [ruff](https://github.com/astral-sh/ruff), this package distributes a pre-compiled binary for maximum performance while providing a convenient Python interface.
52
+
53
+ ## Features
54
+
55
+ - **High Performance**: Uses the native diffai Rust binary for maximum speed
56
+ - **Zero Dependencies**: Self-contained package with bundled binary
57
+ - **ML-Focused**: Specialized analysis for PyTorch, Safetensors, NumPy, and MATLAB files
58
+ - **Scientific Computing**: Full support for NumPy arrays and MATLAB .mat files
59
+ - **Multiple Output Formats**: CLI, JSON, and YAML outputs for different use cases
60
+ - **Python Integration**: Clean API for programmatic use in ML pipelines
61
+
62
+ ## Installation
63
+
64
+ ```bash
65
+ pip install diffai-python
66
+ ```
67
+
68
+ ## Quick Start
69
+
70
+ ### Command Line Usage
71
+
72
+ After installation, the `diffai` command is available:
73
+
74
+ ```bash
75
+ # Compare ML models
76
+ diffai model_v1.safetensors model_v2.safetensors --stats
77
+
78
+ # Compare NumPy arrays
79
+ diffai data_v1.npy data_v2.npy --stats
80
+
81
+ # JSON output for automation
82
+ diffai model_v1.pt model_v2.pt --output json
83
+ ```
84
+
85
+ ### Python API Usage
86
+
87
+ ```python
88
+ import diffai
89
+
90
+ # Basic comparison
91
+ result = diffai.diff("model_v1.safetensors", "model_v2.safetensors")
92
+ print(result.raw_output)
93
+
94
+ # With options
95
+ options = diffai.DiffOptions(
96
+ stats=True,
97
+ architecture_comparison=True,
98
+ output_format=diffai.OutputFormat.JSON
99
+ )
100
+ result = diffai.diff("model_v1.pt", "model_v2.pt", options)
101
+
102
+ # Access structured data
103
+ if result.is_json:
104
+ data = result.data
105
+ print(f"Found {len(data)} differences")
106
+ ```
107
+
108
+ ### Advanced ML Analysis
109
+
110
+ ```python
111
+ # Comprehensive ML model analysis
112
+ result = diffai.diff(
113
+ "baseline.safetensors",
114
+ "improved.safetensors",
115
+ stats=True,
116
+ architecture_comparison=True,
117
+ memory_analysis=True,
118
+ anomaly_detection=True,
119
+ convergence_analysis=True
120
+ )
121
+
122
+ print(result.raw_output)
123
+ ```
124
+
125
+ ## Supported Formats
126
+
127
+ ### Input Formats
128
+ - **ML Models**: `.safetensors`, `.pt`, `.pth`, `.bin` (PyTorch)
129
+ - **Scientific Data**: `.npy`, `.npz` (NumPy), `.mat` (MATLAB)
130
+ - **Structured Data**: `.json`, `.yaml`, `.toml`, `.xml`, `.ini`, `.csv`
131
+
132
+ ### Output Formats
133
+ - **CLI**: Colored terminal output (default)
134
+ - **JSON**: Machine-readable format for automation
135
+ - **YAML**: Human-readable structured format
136
+
137
+ ## ML Analysis Features
138
+
139
+ The package provides 11 specialized ML analysis features:
140
+
141
+ - `--stats`: Detailed tensor statistics
142
+ - `--architecture-comparison`: Model structure comparison
143
+ - `--memory-analysis`: Memory usage analysis
144
+ - `--anomaly-detection`: Numerical anomaly detection
145
+ - `--convergence-analysis`: Training convergence analysis
146
+ - `--gradient-analysis`: Gradient information analysis
147
+ - `--similarity-matrix`: Layer similarity comparison
148
+ - `--change-summary`: Detailed change summary
149
+ - `--quantization-analysis`: Quantization impact analysis
150
+ - `--sort-by-change-magnitude`: Sort by change magnitude
151
+ - `--show-layer-impact`: Layer-specific impact analysis
152
+
153
+ ## API Reference
154
+
155
+ ### Main Functions
156
+
157
+ ```python
158
+ # Compare two files
159
+ def diff(input1: str, input2: str, options: Optional[DiffOptions] = None, **kwargs) -> DiffResult
160
+
161
+ # Main CLI entry point
162
+ def main() -> None
163
+ ```
164
+
165
+ ### Configuration
166
+
167
+ ```python
168
+ @dataclass
169
+ class DiffOptions:
170
+ # Basic options
171
+ input_format: Optional[str] = None
172
+ output_format: Optional[OutputFormat] = None
173
+ recursive: bool = False
174
+ verbose: bool = False
175
+
176
+ # ML analysis options
177
+ stats: bool = False
178
+ architecture_comparison: bool = False
179
+ memory_analysis: bool = False
180
+ anomaly_detection: bool = False
181
+ # ... and more
182
+ ```
183
+
184
+ ### Results
185
+
186
+ ```python
187
+ class DiffResult:
188
+ raw_output: str # Raw output from diffai
189
+ format_type: str # Output format used
190
+ return_code: int # Process return code
191
+
192
+ @property
193
+ def data(self) -> Any # Parsed data (JSON when applicable)
194
+
195
+ @property
196
+ def is_json(self) -> bool # True if JSON format
197
+ ```
198
+
199
+ ## Use Cases
200
+
201
+ ### Research & Development
202
+ ```python
203
+ # Compare fine-tuning results
204
+ before = "model_baseline.safetensors"
205
+ after = "model_finetuned.safetensors"
206
+
207
+ result = diffai.diff(before, after,
208
+ stats=True,
209
+ convergence_analysis=True)
210
+ ```
211
+
212
+ ### MLOps Integration
213
+ ```python
214
+ # Automated model validation in CI/CD
215
+ def validate_model_changes(old_model, new_model):
216
+ result = diffai.diff(old_model, new_model,
217
+ output_format=diffai.OutputFormat.JSON,
218
+ anomaly_detection=True,
219
+ memory_analysis=True)
220
+
221
+ if result.is_json:
222
+ # Check for critical issues
223
+ for item in result.data:
224
+ if 'AnomalyDetection' in item and 'critical' in str(item):
225
+ raise ValueError("Critical model anomaly detected")
226
+
227
+ return result
228
+
229
+ ### MLflow Integration
230
+ ```python
231
+ import mlflow
232
+ import diffai
233
+
234
+ def log_model_comparison(run_id1, run_id2):
235
+ """Compare models between MLflow runs"""
236
+
237
+ # Download models from MLflow
238
+ model1_path = mlflow.artifacts.download_artifacts(
239
+ run_id=run_id1, artifact_path="model/model.pt"
240
+ )
241
+ model2_path = mlflow.artifacts.download_artifacts(
242
+ run_id=run_id2, artifact_path="model/model.pt"
243
+ )
244
+
245
+ # Compare with diffai
246
+ result = diffai.diff(model1_path, model2_path,
247
+ output_format=diffai.OutputFormat.JSON,
248
+ stats=True,
249
+ architecture_comparison=True)
250
+
251
+ # Log results to MLflow
252
+ with mlflow.start_run():
253
+ mlflow.log_dict(result.data, "model_comparison.json")
254
+ if result.is_json:
255
+ # Extract metrics for logging
256
+ for item in result.data:
257
+ if 'TensorStatsChanged' in item:
258
+ mlflow.log_metric("tensor_changes", len(result.data))
259
+ break
260
+
261
+ return result
262
+
263
+ ### Weights & Biases Integration
264
+ ```python
265
+ import wandb
266
+ import diffai
267
+
268
+ def log_model_comparison_wandb(model1_path, model2_path):
269
+ """Log model comparison to Weights & Biases"""
270
+
271
+ result = diffai.diff(model1_path, model2_path,
272
+ output_format=diffai.OutputFormat.JSON,
273
+ stats=True,
274
+ memory_analysis=True,
275
+ convergence_analysis=True)
276
+
277
+ # Log to wandb
278
+ wandb.log({"model_comparison": result.data})
279
+
280
+ if result.is_json:
281
+ # Log specific metrics
282
+ memory_changes = [item for item in result.data if 'MemoryAnalysis' in item]
283
+ if memory_changes:
284
+ wandb.log({"memory_impact_detected": len(memory_changes)})
285
+
286
+ return result
287
+ ```
288
+
289
+ ### Jupyter Notebooks
290
+ ```python
291
+ # Interactive analysis in notebooks
292
+ result = diffai.diff("checkpoint_100.pt", "checkpoint_200.pt",
293
+ stats=True, memory_analysis=True)
294
+
295
+ # Display results
296
+ if result.is_json:
297
+ from IPython.display import display, JSON
298
+ display(JSON(result.data))
299
+ else:
300
+ print(result.raw_output)
301
+ ```
302
+
303
+ ## Binary Distribution
304
+
305
+ This package follows the same pattern as [ruff](https://github.com/astral-sh/ruff):
306
+
307
+ - Pre-compiled `diffai` binary is bundled with the Python package
308
+ - No external dependencies or system requirements
309
+ - Cross-platform compatibility (Windows, macOS, Linux)
310
+ - Maximum performance through native Rust implementation
311
+
312
+ ## Testing
313
+
314
+ Run the integration tests:
315
+
316
+ ```bash
317
+ cd diffai-python
318
+ python test_integration.py
319
+ ```
320
+
321
+ The test suite includes:
322
+ - Binary availability verification
323
+ - Basic diff functionality
324
+ - JSON output parsing
325
+ - ML analysis options
326
+ - Error handling
327
+
328
+ ## Contributing
329
+
330
+ This package is part of the [diffai](https://github.com/kako-jun/diffai) project. Please see the main repository for contribution guidelines.
331
+
332
+ ## License
333
+
334
+ MIT License - see [LICENSE](../LICENSE) for details.
335
+
336
+ ## Related Projects
337
+
338
+ - **[diffai](https://github.com/kako-jun/diffai)**: Main Rust CLI tool
339
+ - **[diffx](https://github.com/kako-jun/diffx)**: Generic structured data diff tool
340
+ - **[ruff](https://github.com/astral-sh/ruff)**: Inspiration for Python packaging approach
341
+
342
+ ## Error Handling
343
+
344
+ The diffai-python package provides comprehensive error handling for various failure scenarios:
345
+
346
+ ### DiffaiError
347
+ The base exception class for all diffai-related errors:
348
+
349
+ ```python
350
+ import diffai
351
+
352
+ try:
353
+ result = diffai.diff("model1.pt", "model2.pt")
354
+ except diffai.DiffaiError as e:
355
+ print(f"Diffai error: {e}")
356
+ ```
357
+
358
+ ### BinaryNotFoundError
359
+ Raised when the diffai binary cannot be found:
360
+
361
+ ```python
362
+ import diffai
363
+
364
+ try:
365
+ result = diffai.diff("model1.pt", "model2.pt")
366
+ except diffai.DiffaiError as e:
367
+ if "binary not found" in str(e):
368
+ print("Please install diffai binary or ensure it's in PATH")
369
+ # Fallback or installation logic here
370
+ ```
371
+
372
+ ### Binary Installation
373
+ If the binary is not found, you can install it manually:
374
+
375
+ ```bash
376
+ # Install via pip (includes binary)
377
+ pip install diffai-python
378
+
379
+ # Or install Rust version globally
380
+ cargo install diffai-cli
381
+ ```
382
+
383
+ ## License
384
+
385
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
@@ -0,0 +1,7 @@
1
+ diffai/__init__.py,sha256=eL_ZqXdoct718oT9v7smtFVbvRSrV1WjP0w4oRNMnjs,10653
2
+ diffai/__main__.py,sha256=0ABdXiMt3e1FQ3Bept_jgjEoU6unvf-pW9iG50YuS70,106
3
+ diffai/installer.py,sha256=8aLhL6sZ8uDgCoZQM6Ea_dR2jiF4k7mN81I7TghO12U,7132
4
+ diffai_python-0.3.4.data/scripts/diffai.exe,sha256=QbYwNMaJYi0GrN6cnoYGsL20votEsgXxbZR-hyRR1Ps,243200
5
+ diffai_python-0.3.4.dist-info/METADATA,sha256=rBxjiqkzA7U2IDQA63iAf7LJmbAhxucJEmZhXTfexJA,11912
6
+ diffai_python-0.3.4.dist-info/WHEEL,sha256=T1-x9ZAB-aE3ewIGbYuockW5ywV7fI-Nla9FsiR1vW4,93
7
+ diffai_python-0.3.4.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.9.1)
3
+ Root-Is-Purelib: false
4
+ Tag: py3-none-win_amd64