diffai-python 0.3.4__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffai/__init__.py +303 -0
- diffai/__main__.py +8 -0
- diffai/installer.py +206 -0
- diffai_python-0.3.4.data/scripts/diffai.exe +0 -0
- diffai_python-0.3.4.dist-info/METADATA +385 -0
- diffai_python-0.3.4.dist-info/RECORD +7 -0
- diffai_python-0.3.4.dist-info/WHEEL +4 -0
diffai/__init__.py
ADDED
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
"""
|
|
2
|
+
diffai - AI/ML specialized diff tool for deep tensor comparison and analysis
|
|
3
|
+
|
|
4
|
+
This package provides a Python wrapper around the diffai Rust binary,
|
|
5
|
+
following the same pattern as ruff for optimal performance and reliability.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
# No backward compatibility imports
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import subprocess
|
|
12
|
+
import sys
|
|
13
|
+
import shutil
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any, Dict, List, Optional, Union
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from enum import Enum
|
|
18
|
+
|
|
19
|
+
# Version is now managed dynamically from pyproject.toml
|
|
20
|
+
# This prevents hardcoded version mismatches during releases
|
|
21
|
+
try:
|
|
22
|
+
from importlib.metadata import version
|
|
23
|
+
__version__ = version("diffai-python")
|
|
24
|
+
except ImportError:
|
|
25
|
+
# Fallback for Python < 3.8
|
|
26
|
+
try:
|
|
27
|
+
import pkg_resources
|
|
28
|
+
__version__ = pkg_resources.get_distribution("diffai-python").version
|
|
29
|
+
except Exception:
|
|
30
|
+
__version__ = "unknown"
|
|
31
|
+
|
|
32
|
+
class OutputFormat(Enum):
|
|
33
|
+
"""Supported output formats for diffai results."""
|
|
34
|
+
CLI = "cli"
|
|
35
|
+
JSON = "json"
|
|
36
|
+
YAML = "yaml"
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class DiffOptions:
|
|
40
|
+
"""Configuration options for diffai analysis."""
|
|
41
|
+
|
|
42
|
+
# Basic options
|
|
43
|
+
input_format: Optional[str] = None
|
|
44
|
+
output_format: Optional[OutputFormat] = None
|
|
45
|
+
recursive: bool = False
|
|
46
|
+
verbose: bool = False
|
|
47
|
+
path: Optional[str] = None
|
|
48
|
+
ignore_keys_regex: Optional[str] = None
|
|
49
|
+
epsilon: Optional[float] = None
|
|
50
|
+
array_id_key: Optional[str] = None
|
|
51
|
+
|
|
52
|
+
# ML analysis options
|
|
53
|
+
show_layer_impact: bool = False
|
|
54
|
+
quantization_analysis: bool = False
|
|
55
|
+
sort_by_change_magnitude: bool = False
|
|
56
|
+
stats: bool = False
|
|
57
|
+
learning_progress: bool = False
|
|
58
|
+
convergence_analysis: bool = False
|
|
59
|
+
anomaly_detection: bool = False
|
|
60
|
+
gradient_analysis: bool = False
|
|
61
|
+
memory_analysis: bool = False
|
|
62
|
+
inference_speed_estimate: bool = False
|
|
63
|
+
regression_test: bool = False
|
|
64
|
+
alert_on_degradation: bool = False
|
|
65
|
+
review_friendly: bool = False
|
|
66
|
+
change_summary: bool = False
|
|
67
|
+
deployment_readiness: bool = False
|
|
68
|
+
architecture_comparison: bool = False
|
|
69
|
+
param_efficiency_analysis: bool = False
|
|
70
|
+
hyperparameter_impact: bool = False
|
|
71
|
+
learning_rate_analysis: bool = False
|
|
72
|
+
performance_impact_estimate: bool = False
|
|
73
|
+
generate_report: bool = False
|
|
74
|
+
markdown_output: bool = False
|
|
75
|
+
include_charts: bool = False
|
|
76
|
+
embedding_analysis: bool = False
|
|
77
|
+
similarity_matrix: bool = False
|
|
78
|
+
clustering_change: bool = False
|
|
79
|
+
attention_analysis: bool = False
|
|
80
|
+
head_importance: bool = False
|
|
81
|
+
attention_pattern_diff: bool = False
|
|
82
|
+
hyperparameter_comparison: bool = False
|
|
83
|
+
learning_curve_analysis: bool = False
|
|
84
|
+
statistical_significance: bool = False
|
|
85
|
+
|
|
86
|
+
def to_args(self) -> List[str]:
|
|
87
|
+
"""Convert options to command line arguments."""
|
|
88
|
+
args = []
|
|
89
|
+
|
|
90
|
+
# Basic options
|
|
91
|
+
if self.input_format:
|
|
92
|
+
args.extend(["--format", self.input_format])
|
|
93
|
+
if self.output_format:
|
|
94
|
+
args.extend(["--output", self.output_format.value])
|
|
95
|
+
if self.recursive:
|
|
96
|
+
args.append("--recursive")
|
|
97
|
+
if self.verbose:
|
|
98
|
+
args.append("--verbose")
|
|
99
|
+
if self.path:
|
|
100
|
+
args.extend(["--path", self.path])
|
|
101
|
+
if self.ignore_keys_regex:
|
|
102
|
+
args.extend(["--ignore-keys-regex", self.ignore_keys_regex])
|
|
103
|
+
if self.epsilon is not None:
|
|
104
|
+
args.extend(["--epsilon", str(self.epsilon)])
|
|
105
|
+
if self.array_id_key:
|
|
106
|
+
args.extend(["--array-id-key", self.array_id_key])
|
|
107
|
+
|
|
108
|
+
# ML analysis options
|
|
109
|
+
if self.show_layer_impact:
|
|
110
|
+
args.append("--show-layer-impact")
|
|
111
|
+
if self.quantization_analysis:
|
|
112
|
+
args.append("--quantization-analysis")
|
|
113
|
+
if self.sort_by_change_magnitude:
|
|
114
|
+
args.append("--sort-by-change-magnitude")
|
|
115
|
+
if self.stats:
|
|
116
|
+
args.append("--stats")
|
|
117
|
+
if self.learning_progress:
|
|
118
|
+
args.append("--learning-progress")
|
|
119
|
+
if self.convergence_analysis:
|
|
120
|
+
args.append("--convergence-analysis")
|
|
121
|
+
if self.anomaly_detection:
|
|
122
|
+
args.append("--anomaly-detection")
|
|
123
|
+
if self.gradient_analysis:
|
|
124
|
+
args.append("--gradient-analysis")
|
|
125
|
+
if self.memory_analysis:
|
|
126
|
+
args.append("--memory-analysis")
|
|
127
|
+
if self.inference_speed_estimate:
|
|
128
|
+
args.append("--inference-speed-estimate")
|
|
129
|
+
if self.regression_test:
|
|
130
|
+
args.append("--regression-test")
|
|
131
|
+
if self.alert_on_degradation:
|
|
132
|
+
args.append("--alert-on-degradation")
|
|
133
|
+
if self.review_friendly:
|
|
134
|
+
args.append("--review-friendly")
|
|
135
|
+
if self.change_summary:
|
|
136
|
+
args.append("--change-summary")
|
|
137
|
+
if self.deployment_readiness:
|
|
138
|
+
args.append("--deployment-readiness")
|
|
139
|
+
if self.architecture_comparison:
|
|
140
|
+
args.append("--architecture-comparison")
|
|
141
|
+
if self.param_efficiency_analysis:
|
|
142
|
+
args.append("--param-efficiency-analysis")
|
|
143
|
+
if self.hyperparameter_impact:
|
|
144
|
+
args.append("--hyperparameter-impact")
|
|
145
|
+
if self.learning_rate_analysis:
|
|
146
|
+
args.append("--learning-rate-analysis")
|
|
147
|
+
if self.performance_impact_estimate:
|
|
148
|
+
args.append("--performance-impact-estimate")
|
|
149
|
+
if self.generate_report:
|
|
150
|
+
args.append("--generate-report")
|
|
151
|
+
if self.markdown_output:
|
|
152
|
+
args.append("--markdown-output")
|
|
153
|
+
if self.include_charts:
|
|
154
|
+
args.append("--include-charts")
|
|
155
|
+
if self.embedding_analysis:
|
|
156
|
+
args.append("--embedding-analysis")
|
|
157
|
+
if self.similarity_matrix:
|
|
158
|
+
args.append("--similarity-matrix")
|
|
159
|
+
if self.clustering_change:
|
|
160
|
+
args.append("--clustering-change")
|
|
161
|
+
if self.attention_analysis:
|
|
162
|
+
args.append("--attention-analysis")
|
|
163
|
+
if self.head_importance:
|
|
164
|
+
args.append("--head-importance")
|
|
165
|
+
if self.attention_pattern_diff:
|
|
166
|
+
args.append("--attention-pattern-diff")
|
|
167
|
+
if self.hyperparameter_comparison:
|
|
168
|
+
args.append("--hyperparameter-comparison")
|
|
169
|
+
if self.learning_curve_analysis:
|
|
170
|
+
args.append("--learning-curve-analysis")
|
|
171
|
+
if self.statistical_significance:
|
|
172
|
+
args.append("--statistical-significance")
|
|
173
|
+
|
|
174
|
+
return args
|
|
175
|
+
|
|
176
|
+
class DiffaiError(Exception):
|
|
177
|
+
"""Base exception for diffai-related errors."""
|
|
178
|
+
pass
|
|
179
|
+
|
|
180
|
+
class DiffResult:
|
|
181
|
+
"""Result from diffai analysis."""
|
|
182
|
+
|
|
183
|
+
def __init__(self, raw_output: str, format_type: str = "cli", return_code: int = 0):
|
|
184
|
+
self.raw_output = raw_output
|
|
185
|
+
self.format_type = format_type
|
|
186
|
+
self.return_code = return_code
|
|
187
|
+
self._parsed_data = None
|
|
188
|
+
|
|
189
|
+
@property
|
|
190
|
+
def data(self) -> Any:
|
|
191
|
+
"""Get parsed data (JSON objects for JSON output, raw string otherwise)."""
|
|
192
|
+
if self._parsed_data is None:
|
|
193
|
+
if self.format_type == "json" and self.raw_output.strip():
|
|
194
|
+
try:
|
|
195
|
+
self._parsed_data = json.loads(self.raw_output)
|
|
196
|
+
except json.JSONDecodeError:
|
|
197
|
+
self._parsed_data = self.raw_output
|
|
198
|
+
else:
|
|
199
|
+
self._parsed_data = self.raw_output
|
|
200
|
+
return self._parsed_data
|
|
201
|
+
|
|
202
|
+
@property
|
|
203
|
+
def is_json(self) -> bool:
|
|
204
|
+
"""True if result is in JSON format."""
|
|
205
|
+
return self.format_type == "json" and isinstance(self.data, (dict, list))
|
|
206
|
+
|
|
207
|
+
def __str__(self) -> str:
|
|
208
|
+
return self.raw_output
|
|
209
|
+
|
|
210
|
+
def _find_diffai_binary() -> str:
|
|
211
|
+
"""Find the diffai binary, checking bundled location first."""
|
|
212
|
+
# Check if bundled with package
|
|
213
|
+
package_dir = Path(__file__).parent.parent.parent
|
|
214
|
+
bundled_binary = package_dir / "diffai"
|
|
215
|
+
|
|
216
|
+
if bundled_binary.exists() and bundled_binary.is_file():
|
|
217
|
+
return str(bundled_binary)
|
|
218
|
+
|
|
219
|
+
# Fallback to system PATH
|
|
220
|
+
system_binary = shutil.which("diffai")
|
|
221
|
+
if system_binary:
|
|
222
|
+
return system_binary
|
|
223
|
+
|
|
224
|
+
raise DiffaiError(
|
|
225
|
+
"diffai binary not found. Please ensure diffai is installed or available in PATH."
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
def diff(
|
|
229
|
+
input1: str,
|
|
230
|
+
input2: str,
|
|
231
|
+
options: Optional[Union[DiffOptions, Dict[str, Any]]] = None,
|
|
232
|
+
**kwargs
|
|
233
|
+
) -> DiffResult:
|
|
234
|
+
"""
|
|
235
|
+
Compare two files using diffai.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
input1: Path to first input file
|
|
239
|
+
input2: Path to second input file
|
|
240
|
+
options: DiffOptions object or dict of options
|
|
241
|
+
**kwargs: Additional options as keyword arguments
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
DiffResult object containing comparison results
|
|
245
|
+
"""
|
|
246
|
+
# Handle different option formats
|
|
247
|
+
if options is None:
|
|
248
|
+
options = DiffOptions(**kwargs)
|
|
249
|
+
elif isinstance(options, dict):
|
|
250
|
+
combined_options = {**options, **kwargs}
|
|
251
|
+
options = DiffOptions(**combined_options)
|
|
252
|
+
elif kwargs:
|
|
253
|
+
# Merge kwargs into existing DiffOptions
|
|
254
|
+
option_dict = {
|
|
255
|
+
field.name: getattr(options, field.name)
|
|
256
|
+
for field in options.__dataclass_fields__.values()
|
|
257
|
+
}
|
|
258
|
+
combined_options = {**option_dict, **kwargs}
|
|
259
|
+
options = DiffOptions(**combined_options)
|
|
260
|
+
|
|
261
|
+
try:
|
|
262
|
+
binary_path = _find_diffai_binary()
|
|
263
|
+
cmd = [binary_path] + options.to_args() + [input1, input2]
|
|
264
|
+
|
|
265
|
+
result = subprocess.run(
|
|
266
|
+
cmd,
|
|
267
|
+
capture_output=True,
|
|
268
|
+
text=True,
|
|
269
|
+
check=False
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
if result.returncode != 0 and result.stderr:
|
|
273
|
+
raise DiffaiError(f"diffai failed: {result.stderr}")
|
|
274
|
+
|
|
275
|
+
format_type = options.output_format.value if options.output_format else "cli"
|
|
276
|
+
return DiffResult(result.stdout, format_type, result.returncode)
|
|
277
|
+
|
|
278
|
+
except FileNotFoundError:
|
|
279
|
+
raise DiffaiError("diffai binary not found")
|
|
280
|
+
except Exception as e:
|
|
281
|
+
raise DiffaiError(f"Diff failed: {e}")
|
|
282
|
+
|
|
283
|
+
def main():
|
|
284
|
+
"""CLI entry point for the diffai command."""
|
|
285
|
+
try:
|
|
286
|
+
binary_path = _find_diffai_binary()
|
|
287
|
+
# Forward all arguments to the binary
|
|
288
|
+
result = subprocess.run([binary_path] + sys.argv[1:])
|
|
289
|
+
sys.exit(result.returncode)
|
|
290
|
+
except DiffaiError as e:
|
|
291
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
292
|
+
sys.exit(1)
|
|
293
|
+
|
|
294
|
+
# Export main API
|
|
295
|
+
__all__ = [
|
|
296
|
+
"diff",
|
|
297
|
+
"DiffOptions",
|
|
298
|
+
"DiffResult",
|
|
299
|
+
"OutputFormat",
|
|
300
|
+
"DiffaiError",
|
|
301
|
+
"__version__",
|
|
302
|
+
"main",
|
|
303
|
+
]
|
diffai/__main__.py
ADDED
diffai/installer.py
ADDED
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Binary installer for diffai Python package.
|
|
3
|
+
|
|
4
|
+
This module handles downloading and installing the diffai binary
|
|
5
|
+
when the package is installed via pip.
|
|
6
|
+
|
|
7
|
+
Supports platforms: Windows, Linux, Darwin (macOS) with x86_64 and aarch64 architectures.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import platform
|
|
12
|
+
import shutil
|
|
13
|
+
import subprocess
|
|
14
|
+
import sys
|
|
15
|
+
import tarfile
|
|
16
|
+
import tempfile
|
|
17
|
+
import urllib.request
|
|
18
|
+
import zipfile
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
# Package version constant for GitHub releases
|
|
22
|
+
PACKAGE_VERSION = "0.3.4"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_platform_info():
|
|
26
|
+
"""Get platform-specific information for binary download.
|
|
27
|
+
|
|
28
|
+
Supported platforms: Windows, Linux, Darwin
|
|
29
|
+
Supported architectures: x86_64, aarch64
|
|
30
|
+
"""
|
|
31
|
+
system = platform.system().lower()
|
|
32
|
+
machine = platform.machine().lower()
|
|
33
|
+
|
|
34
|
+
if system == "linux": # Linux systems
|
|
35
|
+
if machine in ("x86_64", "amd64"):
|
|
36
|
+
return "linux-x86_64", "diffai"
|
|
37
|
+
else:
|
|
38
|
+
raise RuntimeError(f"Unsupported Linux architecture: {machine}")
|
|
39
|
+
elif system == "darwin": # Darwin (macOS) systems
|
|
40
|
+
if machine == "arm64":
|
|
41
|
+
return "macos-aarch64", "diffai"
|
|
42
|
+
elif machine in ("x86_64", "amd64"):
|
|
43
|
+
return "macos-x86_64", "diffai"
|
|
44
|
+
else:
|
|
45
|
+
raise RuntimeError(f"Unsupported Darwin architecture: {machine}")
|
|
46
|
+
elif system == "windows": # Windows systems
|
|
47
|
+
if machine in ("x86_64", "amd64"):
|
|
48
|
+
return "windows-x86_64", "diffai.exe"
|
|
49
|
+
else:
|
|
50
|
+
raise RuntimeError(f"Unsupported Windows architecture: {machine}")
|
|
51
|
+
else:
|
|
52
|
+
raise RuntimeError(f"Unsupported operating system: {system}")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def download_file(url, dest_path):
|
|
56
|
+
"""Download a file from URL to destination path."""
|
|
57
|
+
try:
|
|
58
|
+
with urllib.request.urlopen(url) as response:
|
|
59
|
+
with open(dest_path, 'wb') as f:
|
|
60
|
+
f.write(response.read())
|
|
61
|
+
except Exception as e:
|
|
62
|
+
raise RuntimeError(f"Failed to download file: {e}")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def extract_archive(archive_path, extract_to):
|
|
66
|
+
"""Extract archive file to destination directory."""
|
|
67
|
+
try:
|
|
68
|
+
if archive_path.endswith('.zip'):
|
|
69
|
+
with zipfile.ZipFile(archive_path, 'r') as zip_ref:
|
|
70
|
+
zip_ref.extractall(extract_to)
|
|
71
|
+
elif archive_path.endswith(('.tar.gz', '.tgz')):
|
|
72
|
+
with tarfile.open(archive_path, 'r:gz') as tar_ref:
|
|
73
|
+
tar_ref.extractall(extract_to)
|
|
74
|
+
else:
|
|
75
|
+
raise RuntimeError(f"Unsupported archive format: {archive_path}")
|
|
76
|
+
except Exception as e:
|
|
77
|
+
raise RuntimeError(f"Failed to extract archive: {e}")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def verify_binary(binary_path):
|
|
81
|
+
"""Verify that the downloaded binary works correctly."""
|
|
82
|
+
try:
|
|
83
|
+
# Test binary with --version flag
|
|
84
|
+
result = subprocess.run(
|
|
85
|
+
[binary_path, "--version"],
|
|
86
|
+
capture_output=True,
|
|
87
|
+
text=True,
|
|
88
|
+
timeout=10
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
if result.returncode == 0:
|
|
92
|
+
return True
|
|
93
|
+
else:
|
|
94
|
+
raise RuntimeError(f"Binary verification failed: {result.stderr}")
|
|
95
|
+
|
|
96
|
+
except subprocess.TimeoutExpired:
|
|
97
|
+
raise RuntimeError("Binary verification timed out")
|
|
98
|
+
except Exception as e:
|
|
99
|
+
raise RuntimeError(f"Failed to verify binary: {e}")
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def get_latest_release_info():
|
|
103
|
+
"""Get information about the latest GitHub release from github.com."""
|
|
104
|
+
try:
|
|
105
|
+
import json
|
|
106
|
+
|
|
107
|
+
# GitHub API URL for latest release
|
|
108
|
+
url = "https://api.github.com/repos/kako-jun/diffai/releases/latest"
|
|
109
|
+
with urllib.request.urlopen(url) as response:
|
|
110
|
+
data = json.loads(response.read().decode())
|
|
111
|
+
|
|
112
|
+
return data["tag_name"], data["assets"]
|
|
113
|
+
except Exception as e:
|
|
114
|
+
raise RuntimeError(f"Failed to get release information from github.com: {e}")
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def download_binary(version=None):
|
|
118
|
+
"""Download the diffai binary for the current platform."""
|
|
119
|
+
platform_name, binary_name = get_platform_info()
|
|
120
|
+
|
|
121
|
+
if version is None:
|
|
122
|
+
version, assets = get_latest_release_info()
|
|
123
|
+
else:
|
|
124
|
+
# For specific version, construct asset URL manually
|
|
125
|
+
assets = None
|
|
126
|
+
|
|
127
|
+
# Construct download URL
|
|
128
|
+
if assets:
|
|
129
|
+
# Find the correct asset
|
|
130
|
+
asset_name = f"diffai-{platform_name}.tar.gz"
|
|
131
|
+
if platform_name.startswith("windows"):
|
|
132
|
+
asset_name = f"diffai-{platform_name}.zip"
|
|
133
|
+
|
|
134
|
+
asset_url = None
|
|
135
|
+
for asset in assets:
|
|
136
|
+
if asset["name"] == asset_name:
|
|
137
|
+
asset_url = asset["browser_download_url"]
|
|
138
|
+
break
|
|
139
|
+
|
|
140
|
+
if not asset_url:
|
|
141
|
+
raise RuntimeError(f"Binary not found for platform: {platform_name}")
|
|
142
|
+
else:
|
|
143
|
+
# Fallback URL construction
|
|
144
|
+
base_url = "https://github.com/kako-jun/diffai/releases/download"
|
|
145
|
+
if platform_name.startswith("windows"):
|
|
146
|
+
asset_name = f"diffai-{platform_name}.zip"
|
|
147
|
+
else:
|
|
148
|
+
asset_name = f"diffai-{platform_name}.tar.gz"
|
|
149
|
+
asset_url = f"{base_url}/{version}/{asset_name}"
|
|
150
|
+
|
|
151
|
+
# Download to temporary location
|
|
152
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
153
|
+
temp_path = Path(temp_dir)
|
|
154
|
+
archive_path = temp_path / asset_name
|
|
155
|
+
|
|
156
|
+
print(f"Downloading {asset_url}...")
|
|
157
|
+
urllib.request.urlretrieve(asset_url, archive_path)
|
|
158
|
+
|
|
159
|
+
# Extract archive
|
|
160
|
+
if asset_name.endswith(".tar.gz"):
|
|
161
|
+
subprocess.run(["tar", "-xzf", archive_path, "-C", temp_path], check=True)
|
|
162
|
+
elif asset_name.endswith(".zip"):
|
|
163
|
+
import zipfile
|
|
164
|
+
with zipfile.ZipFile(archive_path, 'r') as zip_ref:
|
|
165
|
+
zip_ref.extractall(temp_path)
|
|
166
|
+
|
|
167
|
+
# Find the binary
|
|
168
|
+
binary_path = temp_path / binary_name
|
|
169
|
+
if not binary_path.exists():
|
|
170
|
+
raise RuntimeError(f"Binary not found in archive: {binary_name}")
|
|
171
|
+
|
|
172
|
+
return binary_path
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def install_binary():
|
|
176
|
+
"""Install the diffai binary to the package directory."""
|
|
177
|
+
try:
|
|
178
|
+
# Get package directory
|
|
179
|
+
package_dir = Path(__file__).parent.parent.parent
|
|
180
|
+
binary_dir = package_dir / "bin"
|
|
181
|
+
binary_dir.mkdir(exist_ok=True)
|
|
182
|
+
|
|
183
|
+
# Download binary
|
|
184
|
+
temp_binary = download_binary()
|
|
185
|
+
|
|
186
|
+
# Copy to package directory
|
|
187
|
+
platform_name, binary_name = get_platform_info()
|
|
188
|
+
target_path = binary_dir / binary_name
|
|
189
|
+
|
|
190
|
+
shutil.copy2(temp_binary, target_path)
|
|
191
|
+
|
|
192
|
+
# Make executable on Unix-like systems
|
|
193
|
+
if not platform_name.startswith("windows"):
|
|
194
|
+
os.chmod(target_path, 0o755)
|
|
195
|
+
|
|
196
|
+
print(f"Successfully installed diffai binary to {target_path}")
|
|
197
|
+
return target_path
|
|
198
|
+
|
|
199
|
+
except Exception as e:
|
|
200
|
+
print(f"Warning: Failed to install diffai binary: {e}")
|
|
201
|
+
print("The Python API will still work if diffai is available in PATH")
|
|
202
|
+
return None
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
if __name__ == "__main__":
|
|
206
|
+
install_binary()
|
|
Binary file
|
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: diffai-python
|
|
3
|
+
Version: 0.3.4
|
|
4
|
+
Classifier: Development Status :: 4 - Beta
|
|
5
|
+
Classifier: Environment :: Console
|
|
6
|
+
Classifier: Intended Audience :: Developers
|
|
7
|
+
Classifier: Intended Audience :: Science/Research
|
|
8
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
9
|
+
Classifier: Operating System :: OS Independent
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
17
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
18
|
+
Classifier: Topic :: Text Processing :: General
|
|
19
|
+
Classifier: Topic :: Utilities
|
|
20
|
+
Requires-Dist: pytest>=6.0 ; extra == 'dev'
|
|
21
|
+
Requires-Dist: pytest-cov ; extra == 'dev'
|
|
22
|
+
Requires-Dist: black ; extra == 'dev'
|
|
23
|
+
Requires-Dist: isort ; extra == 'dev'
|
|
24
|
+
Requires-Dist: mypy ; extra == 'dev'
|
|
25
|
+
Requires-Dist: ruff ; extra == 'dev'
|
|
26
|
+
Provides-Extra: dev
|
|
27
|
+
Summary: AI/ML specialized diff tool for deep tensor comparison and analysis
|
|
28
|
+
Keywords: ai,ml,machine-learning,diff,tensor,pytorch,safetensors,numpy,matlab,model-comparison,diffai,artificial-intelligence
|
|
29
|
+
Author: kako-jun
|
|
30
|
+
License: MIT
|
|
31
|
+
Requires-Python: >=3.8
|
|
32
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
33
|
+
Project-URL: Homepage, https://github.com/kako-jun/diffai
|
|
34
|
+
Project-URL: Documentation, https://github.com/kako-jun/diffai/blob/main/docs/
|
|
35
|
+
Project-URL: Repository, https://github.com/kako-jun/diffai.git
|
|
36
|
+
Project-URL: Issues, https://github.com/kako-jun/diffai/issues
|
|
37
|
+
Project-URL: Changelog, https://github.com/kako-jun/diffai/blob/main/CHANGELOG.md
|
|
38
|
+
|
|
39
|
+
# diffai-python
|
|
40
|
+
|
|
41
|
+
AI/ML specialized diff tool for deep tensor comparison and analysis - Python Package
|
|
42
|
+
|
|
43
|
+
[](https://badge.fury.io/py/diffai-python)
|
|
44
|
+
[](https://www.python.org/downloads/)
|
|
45
|
+
[](https://opensource.org/licenses/MIT)
|
|
46
|
+
|
|
47
|
+
## Overview
|
|
48
|
+
|
|
49
|
+
**diffai-python** provides Python bindings for [diffai](https://github.com/kako-jun/diffai), an AI/ML specialized diff tool. This package bundles the high-performance Rust binary and provides a clean Python API for integration into ML workflows, notebooks, and automation scripts.
|
|
50
|
+
|
|
51
|
+
Following the same distribution pattern as [ruff](https://github.com/astral-sh/ruff), this package distributes a pre-compiled binary for maximum performance while providing a convenient Python interface.
|
|
52
|
+
|
|
53
|
+
## Features
|
|
54
|
+
|
|
55
|
+
- **High Performance**: Uses the native diffai Rust binary for maximum speed
|
|
56
|
+
- **Zero Dependencies**: Self-contained package with bundled binary
|
|
57
|
+
- **ML-Focused**: Specialized analysis for PyTorch, Safetensors, NumPy, and MATLAB files
|
|
58
|
+
- **Scientific Computing**: Full support for NumPy arrays and MATLAB .mat files
|
|
59
|
+
- **Multiple Output Formats**: CLI, JSON, and YAML outputs for different use cases
|
|
60
|
+
- **Python Integration**: Clean API for programmatic use in ML pipelines
|
|
61
|
+
|
|
62
|
+
## Installation
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pip install diffai-python
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Quick Start
|
|
69
|
+
|
|
70
|
+
### Command Line Usage
|
|
71
|
+
|
|
72
|
+
After installation, the `diffai` command is available:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
# Compare ML models
|
|
76
|
+
diffai model_v1.safetensors model_v2.safetensors --stats
|
|
77
|
+
|
|
78
|
+
# Compare NumPy arrays
|
|
79
|
+
diffai data_v1.npy data_v2.npy --stats
|
|
80
|
+
|
|
81
|
+
# JSON output for automation
|
|
82
|
+
diffai model_v1.pt model_v2.pt --output json
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Python API Usage
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
import diffai
|
|
89
|
+
|
|
90
|
+
# Basic comparison
|
|
91
|
+
result = diffai.diff("model_v1.safetensors", "model_v2.safetensors")
|
|
92
|
+
print(result.raw_output)
|
|
93
|
+
|
|
94
|
+
# With options
|
|
95
|
+
options = diffai.DiffOptions(
|
|
96
|
+
stats=True,
|
|
97
|
+
architecture_comparison=True,
|
|
98
|
+
output_format=diffai.OutputFormat.JSON
|
|
99
|
+
)
|
|
100
|
+
result = diffai.diff("model_v1.pt", "model_v2.pt", options)
|
|
101
|
+
|
|
102
|
+
# Access structured data
|
|
103
|
+
if result.is_json:
|
|
104
|
+
data = result.data
|
|
105
|
+
print(f"Found {len(data)} differences")
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
### Advanced ML Analysis
|
|
109
|
+
|
|
110
|
+
```python
|
|
111
|
+
# Comprehensive ML model analysis
|
|
112
|
+
result = diffai.diff(
|
|
113
|
+
"baseline.safetensors",
|
|
114
|
+
"improved.safetensors",
|
|
115
|
+
stats=True,
|
|
116
|
+
architecture_comparison=True,
|
|
117
|
+
memory_analysis=True,
|
|
118
|
+
anomaly_detection=True,
|
|
119
|
+
convergence_analysis=True
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
print(result.raw_output)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Supported Formats
|
|
126
|
+
|
|
127
|
+
### Input Formats
|
|
128
|
+
- **ML Models**: `.safetensors`, `.pt`, `.pth`, `.bin` (PyTorch)
|
|
129
|
+
- **Scientific Data**: `.npy`, `.npz` (NumPy), `.mat` (MATLAB)
|
|
130
|
+
- **Structured Data**: `.json`, `.yaml`, `.toml`, `.xml`, `.ini`, `.csv`
|
|
131
|
+
|
|
132
|
+
### Output Formats
|
|
133
|
+
- **CLI**: Colored terminal output (default)
|
|
134
|
+
- **JSON**: Machine-readable format for automation
|
|
135
|
+
- **YAML**: Human-readable structured format
|
|
136
|
+
|
|
137
|
+
## ML Analysis Features
|
|
138
|
+
|
|
139
|
+
The package provides 11 specialized ML analysis features:
|
|
140
|
+
|
|
141
|
+
- `--stats`: Detailed tensor statistics
|
|
142
|
+
- `--architecture-comparison`: Model structure comparison
|
|
143
|
+
- `--memory-analysis`: Memory usage analysis
|
|
144
|
+
- `--anomaly-detection`: Numerical anomaly detection
|
|
145
|
+
- `--convergence-analysis`: Training convergence analysis
|
|
146
|
+
- `--gradient-analysis`: Gradient information analysis
|
|
147
|
+
- `--similarity-matrix`: Layer similarity comparison
|
|
148
|
+
- `--change-summary`: Detailed change summary
|
|
149
|
+
- `--quantization-analysis`: Quantization impact analysis
|
|
150
|
+
- `--sort-by-change-magnitude`: Sort by change magnitude
|
|
151
|
+
- `--show-layer-impact`: Layer-specific impact analysis
|
|
152
|
+
|
|
153
|
+
## API Reference
|
|
154
|
+
|
|
155
|
+
### Main Functions
|
|
156
|
+
|
|
157
|
+
```python
|
|
158
|
+
# Compare two files
|
|
159
|
+
def diff(input1: str, input2: str, options: Optional[DiffOptions] = None, **kwargs) -> DiffResult
|
|
160
|
+
|
|
161
|
+
# Main CLI entry point
|
|
162
|
+
def main() -> None
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Configuration
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
@dataclass
|
|
169
|
+
class DiffOptions:
|
|
170
|
+
# Basic options
|
|
171
|
+
input_format: Optional[str] = None
|
|
172
|
+
output_format: Optional[OutputFormat] = None
|
|
173
|
+
recursive: bool = False
|
|
174
|
+
verbose: bool = False
|
|
175
|
+
|
|
176
|
+
# ML analysis options
|
|
177
|
+
stats: bool = False
|
|
178
|
+
architecture_comparison: bool = False
|
|
179
|
+
memory_analysis: bool = False
|
|
180
|
+
anomaly_detection: bool = False
|
|
181
|
+
# ... and more
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### Results
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
class DiffResult:
|
|
188
|
+
raw_output: str # Raw output from diffai
|
|
189
|
+
format_type: str # Output format used
|
|
190
|
+
return_code: int # Process return code
|
|
191
|
+
|
|
192
|
+
@property
|
|
193
|
+
def data(self) -> Any # Parsed data (JSON when applicable)
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def is_json(self) -> bool # True if JSON format
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
## Use Cases
|
|
200
|
+
|
|
201
|
+
### Research & Development
|
|
202
|
+
```python
|
|
203
|
+
# Compare fine-tuning results
|
|
204
|
+
before = "model_baseline.safetensors"
|
|
205
|
+
after = "model_finetuned.safetensors"
|
|
206
|
+
|
|
207
|
+
result = diffai.diff(before, after,
|
|
208
|
+
stats=True,
|
|
209
|
+
convergence_analysis=True)
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
### MLOps Integration
|
|
213
|
+
```python
|
|
214
|
+
# Automated model validation in CI/CD
|
|
215
|
+
def validate_model_changes(old_model, new_model):
|
|
216
|
+
result = diffai.diff(old_model, new_model,
|
|
217
|
+
output_format=diffai.OutputFormat.JSON,
|
|
218
|
+
anomaly_detection=True,
|
|
219
|
+
memory_analysis=True)
|
|
220
|
+
|
|
221
|
+
if result.is_json:
|
|
222
|
+
# Check for critical issues
|
|
223
|
+
for item in result.data:
|
|
224
|
+
if 'AnomalyDetection' in item and 'critical' in str(item):
|
|
225
|
+
raise ValueError("Critical model anomaly detected")
|
|
226
|
+
|
|
227
|
+
return result
|
|
228
|
+
|
|
229
|
+
### MLflow Integration
|
|
230
|
+
```python
|
|
231
|
+
import mlflow
|
|
232
|
+
import diffai
|
|
233
|
+
|
|
234
|
+
def log_model_comparison(run_id1, run_id2):
|
|
235
|
+
"""Compare models between MLflow runs"""
|
|
236
|
+
|
|
237
|
+
# Download models from MLflow
|
|
238
|
+
model1_path = mlflow.artifacts.download_artifacts(
|
|
239
|
+
run_id=run_id1, artifact_path="model/model.pt"
|
|
240
|
+
)
|
|
241
|
+
model2_path = mlflow.artifacts.download_artifacts(
|
|
242
|
+
run_id=run_id2, artifact_path="model/model.pt"
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
# Compare with diffai
|
|
246
|
+
result = diffai.diff(model1_path, model2_path,
|
|
247
|
+
output_format=diffai.OutputFormat.JSON,
|
|
248
|
+
stats=True,
|
|
249
|
+
architecture_comparison=True)
|
|
250
|
+
|
|
251
|
+
# Log results to MLflow
|
|
252
|
+
with mlflow.start_run():
|
|
253
|
+
mlflow.log_dict(result.data, "model_comparison.json")
|
|
254
|
+
if result.is_json:
|
|
255
|
+
# Extract metrics for logging
|
|
256
|
+
for item in result.data:
|
|
257
|
+
if 'TensorStatsChanged' in item:
|
|
258
|
+
mlflow.log_metric("tensor_changes", len(result.data))
|
|
259
|
+
break
|
|
260
|
+
|
|
261
|
+
return result
|
|
262
|
+
|
|
263
|
+
### Weights & Biases Integration
|
|
264
|
+
```python
|
|
265
|
+
import wandb
|
|
266
|
+
import diffai
|
|
267
|
+
|
|
268
|
+
def log_model_comparison_wandb(model1_path, model2_path):
|
|
269
|
+
"""Log model comparison to Weights & Biases"""
|
|
270
|
+
|
|
271
|
+
result = diffai.diff(model1_path, model2_path,
|
|
272
|
+
output_format=diffai.OutputFormat.JSON,
|
|
273
|
+
stats=True,
|
|
274
|
+
memory_analysis=True,
|
|
275
|
+
convergence_analysis=True)
|
|
276
|
+
|
|
277
|
+
# Log to wandb
|
|
278
|
+
wandb.log({"model_comparison": result.data})
|
|
279
|
+
|
|
280
|
+
if result.is_json:
|
|
281
|
+
# Log specific metrics
|
|
282
|
+
memory_changes = [item for item in result.data if 'MemoryAnalysis' in item]
|
|
283
|
+
if memory_changes:
|
|
284
|
+
wandb.log({"memory_impact_detected": len(memory_changes)})
|
|
285
|
+
|
|
286
|
+
return result
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
### Jupyter Notebooks
|
|
290
|
+
```python
|
|
291
|
+
# Interactive analysis in notebooks
|
|
292
|
+
result = diffai.diff("checkpoint_100.pt", "checkpoint_200.pt",
|
|
293
|
+
stats=True, memory_analysis=True)
|
|
294
|
+
|
|
295
|
+
# Display results
|
|
296
|
+
if result.is_json:
|
|
297
|
+
from IPython.display import display, JSON
|
|
298
|
+
display(JSON(result.data))
|
|
299
|
+
else:
|
|
300
|
+
print(result.raw_output)
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
## Binary Distribution
|
|
304
|
+
|
|
305
|
+
This package follows the same pattern as [ruff](https://github.com/astral-sh/ruff):
|
|
306
|
+
|
|
307
|
+
- Pre-compiled `diffai` binary is bundled with the Python package
|
|
308
|
+
- No external dependencies or system requirements
|
|
309
|
+
- Cross-platform compatibility (Windows, macOS, Linux)
|
|
310
|
+
- Maximum performance through native Rust implementation
|
|
311
|
+
|
|
312
|
+
## Testing
|
|
313
|
+
|
|
314
|
+
Run the integration tests:
|
|
315
|
+
|
|
316
|
+
```bash
|
|
317
|
+
cd diffai-python
|
|
318
|
+
python test_integration.py
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
The test suite includes:
|
|
322
|
+
- Binary availability verification
|
|
323
|
+
- Basic diff functionality
|
|
324
|
+
- JSON output parsing
|
|
325
|
+
- ML analysis options
|
|
326
|
+
- Error handling
|
|
327
|
+
|
|
328
|
+
## Contributing
|
|
329
|
+
|
|
330
|
+
This package is part of the [diffai](https://github.com/kako-jun/diffai) project. Please see the main repository for contribution guidelines.
|
|
331
|
+
|
|
332
|
+
## License
|
|
333
|
+
|
|
334
|
+
MIT License - see [LICENSE](../LICENSE) for details.
|
|
335
|
+
|
|
336
|
+
## Related Projects
|
|
337
|
+
|
|
338
|
+
- **[diffai](https://github.com/kako-jun/diffai)**: Main Rust CLI tool
|
|
339
|
+
- **[diffx](https://github.com/kako-jun/diffx)**: Generic structured data diff tool
|
|
340
|
+
- **[ruff](https://github.com/astral-sh/ruff)**: Inspiration for Python packaging approach
|
|
341
|
+
|
|
342
|
+
## Error Handling
|
|
343
|
+
|
|
344
|
+
The diffai-python package provides comprehensive error handling for various failure scenarios:
|
|
345
|
+
|
|
346
|
+
### DiffaiError
|
|
347
|
+
The base exception class for all diffai-related errors:
|
|
348
|
+
|
|
349
|
+
```python
|
|
350
|
+
import diffai
|
|
351
|
+
|
|
352
|
+
try:
|
|
353
|
+
result = diffai.diff("model1.pt", "model2.pt")
|
|
354
|
+
except diffai.DiffaiError as e:
|
|
355
|
+
print(f"Diffai error: {e}")
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
### BinaryNotFoundError
|
|
359
|
+
Raised when the diffai binary cannot be found:
|
|
360
|
+
|
|
361
|
+
```python
|
|
362
|
+
import diffai
|
|
363
|
+
|
|
364
|
+
try:
|
|
365
|
+
result = diffai.diff("model1.pt", "model2.pt")
|
|
366
|
+
except diffai.DiffaiError as e:
|
|
367
|
+
if "binary not found" in str(e):
|
|
368
|
+
print("Please install diffai binary or ensure it's in PATH")
|
|
369
|
+
# Fallback or installation logic here
|
|
370
|
+
```
|
|
371
|
+
|
|
372
|
+
### Binary Installation
|
|
373
|
+
If the binary is not found, you can install it manually:
|
|
374
|
+
|
|
375
|
+
```bash
|
|
376
|
+
# Install via pip (includes binary)
|
|
377
|
+
pip install diffai-python
|
|
378
|
+
|
|
379
|
+
# Or install Rust version globally
|
|
380
|
+
cargo install diffai-cli
|
|
381
|
+
```
|
|
382
|
+
|
|
383
|
+
## License
|
|
384
|
+
|
|
385
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
diffai/__init__.py,sha256=eL_ZqXdoct718oT9v7smtFVbvRSrV1WjP0w4oRNMnjs,10653
|
|
2
|
+
diffai/__main__.py,sha256=0ABdXiMt3e1FQ3Bept_jgjEoU6unvf-pW9iG50YuS70,106
|
|
3
|
+
diffai/installer.py,sha256=8aLhL6sZ8uDgCoZQM6Ea_dR2jiF4k7mN81I7TghO12U,7132
|
|
4
|
+
diffai_python-0.3.4.data/scripts/diffai.exe,sha256=QbYwNMaJYi0GrN6cnoYGsL20votEsgXxbZR-hyRR1Ps,243200
|
|
5
|
+
diffai_python-0.3.4.dist-info/METADATA,sha256=rBxjiqkzA7U2IDQA63iAf7LJmbAhxucJEmZhXTfexJA,11912
|
|
6
|
+
diffai_python-0.3.4.dist-info/WHEEL,sha256=T1-x9ZAB-aE3ewIGbYuockW5ywV7fI-Nla9FsiR1vW4,93
|
|
7
|
+
diffai_python-0.3.4.dist-info/RECORD,,
|