diffai-python 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffai/__init__.py +63 -0
- diffai/diffai.py +452 -0
- diffai/installer.py +335 -0
- diffai_python-0.2.9.dist-info/METADATA +391 -0
- diffai_python-0.2.9.dist-info/RECORD +7 -0
- diffai_python-0.2.9.dist-info/WHEEL +4 -0
- diffai_python-0.2.9.dist-info/entry_points.txt +2 -0
diffai/__init__.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""
|
|
2
|
+
diffai - AI/ML specialized diff tool for deep tensor comparison and analysis
|
|
3
|
+
|
|
4
|
+
This package provides Python bindings for diffai, a powerful command-line tool
|
|
5
|
+
specialized for comparing AI/ML model files, scientific data, and structured data.
|
|
6
|
+
|
|
7
|
+
Quick start:
|
|
8
|
+
>>> import diffai
|
|
9
|
+
>>> result = diffai.diff("model1.safetensors", "model2.safetensors", stats=True)
|
|
10
|
+
>>> print(result)
|
|
11
|
+
|
|
12
|
+
Advanced usage:
|
|
13
|
+
>>> options = diffai.DiffOptions(
|
|
14
|
+
... stats=True,
|
|
15
|
+
... architecture_comparison=True,
|
|
16
|
+
... output_format="json"
|
|
17
|
+
... )
|
|
18
|
+
>>> result = diffai.diff("model1.safetensors", "model2.safetensors", options)
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from .diffai import (
|
|
22
|
+
# Main API functions
|
|
23
|
+
diff,
|
|
24
|
+
diff_string,
|
|
25
|
+
run_diffai,
|
|
26
|
+
verify_installation,
|
|
27
|
+
|
|
28
|
+
# Configuration and result classes
|
|
29
|
+
DiffOptions,
|
|
30
|
+
DiffResult,
|
|
31
|
+
OutputFormat,
|
|
32
|
+
|
|
33
|
+
# Exceptions
|
|
34
|
+
DiffaiError,
|
|
35
|
+
BinaryNotFoundError,
|
|
36
|
+
InvalidInputError,
|
|
37
|
+
|
|
38
|
+
# Version info
|
|
39
|
+
__version__,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# No backward compatibility imports - clean modern API only
|
|
43
|
+
|
|
44
|
+
__all__ = [
|
|
45
|
+
# Main API
|
|
46
|
+
"diff",
|
|
47
|
+
"diff_string",
|
|
48
|
+
"run_diffai",
|
|
49
|
+
"verify_installation",
|
|
50
|
+
|
|
51
|
+
# Configuration
|
|
52
|
+
"DiffOptions",
|
|
53
|
+
"DiffResult",
|
|
54
|
+
"OutputFormat",
|
|
55
|
+
|
|
56
|
+
# Exceptions
|
|
57
|
+
"DiffaiError",
|
|
58
|
+
"BinaryNotFoundError",
|
|
59
|
+
"InvalidInputError",
|
|
60
|
+
|
|
61
|
+
# Metadata
|
|
62
|
+
"__version__",
|
|
63
|
+
]
|
diffai/diffai.py
ADDED
|
@@ -0,0 +1,452 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Main diffai module providing Python interface to the diffai CLI tool.
|
|
3
|
+
|
|
4
|
+
This module provides a high-level Python API for the diffai command-line tool,
|
|
5
|
+
with support for type-safe configuration, structured results, and comprehensive
|
|
6
|
+
error handling.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import platform
|
|
11
|
+
import subprocess
|
|
12
|
+
import sys
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from enum import Enum
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any, Dict, List, Optional, Union
|
|
17
|
+
|
|
18
|
+
import importlib.metadata
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
__version__ = importlib.metadata.version("diffai-python")
|
|
22
|
+
except importlib.metadata.PackageNotFoundError:
|
|
23
|
+
# Fallback for development
|
|
24
|
+
__version__ = "0.2.9"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class OutputFormat(Enum):
|
|
28
|
+
"""Supported output formats for diffai results - exact mapping from CLI."""
|
|
29
|
+
CLI = "cli" # Default colored CLI output
|
|
30
|
+
JSON = "json" # Machine-readable JSON
|
|
31
|
+
YAML = "yaml" # Human-readable YAML
|
|
32
|
+
UNIFIED = "unified" # Unified diff format
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class DiffOptions:
|
|
37
|
+
"""
|
|
38
|
+
Configuration options for diffai analysis.
|
|
39
|
+
|
|
40
|
+
Provides a type-safe way to configure all diffai analysis options
|
|
41
|
+
including ML-specific analysis functions and output formatting.
|
|
42
|
+
|
|
43
|
+
Based on actual CLI options from diffai-cli/src/main.rs
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
# Output and basic configuration
|
|
47
|
+
output_format: Optional[OutputFormat] = None
|
|
48
|
+
input_format: Optional[str] = None # json, yaml, toml, ini, xml, csv, safetensors, pytorch, numpy, npz, matlab
|
|
49
|
+
recursive: bool = False
|
|
50
|
+
path: Optional[str] = None # Filter by specific path
|
|
51
|
+
ignore_keys_regex: Optional[str] = None # Ignore keys matching regex
|
|
52
|
+
epsilon: Optional[float] = None # Tolerance for float comparisons
|
|
53
|
+
array_id_key: Optional[str] = None # Key for identifying array elements
|
|
54
|
+
|
|
55
|
+
# Core analysis options
|
|
56
|
+
stats: bool = False
|
|
57
|
+
verbose: bool = False # NEW: Added verbose mode
|
|
58
|
+
show_layer_impact: bool = False
|
|
59
|
+
quantization_analysis: bool = False
|
|
60
|
+
sort_by_change_magnitude: bool = False
|
|
61
|
+
|
|
62
|
+
# Enhanced ML analysis (Phase 3 implemented)
|
|
63
|
+
learning_progress: bool = False
|
|
64
|
+
convergence_analysis: bool = False
|
|
65
|
+
anomaly_detection: bool = False
|
|
66
|
+
gradient_analysis: bool = False
|
|
67
|
+
memory_analysis: bool = False
|
|
68
|
+
inference_speed_estimate: bool = False
|
|
69
|
+
regression_test: bool = False
|
|
70
|
+
alert_on_degradation: bool = False
|
|
71
|
+
review_friendly: bool = False
|
|
72
|
+
change_summary: bool = False
|
|
73
|
+
risk_assessment: bool = False
|
|
74
|
+
architecture_comparison: bool = False
|
|
75
|
+
param_efficiency_analysis: bool = False
|
|
76
|
+
hyperparameter_impact: bool = False
|
|
77
|
+
learning_rate_analysis: bool = False
|
|
78
|
+
deployment_readiness: bool = False
|
|
79
|
+
performance_impact_estimate: bool = False
|
|
80
|
+
generate_report: bool = False
|
|
81
|
+
markdown_output: bool = False
|
|
82
|
+
include_charts: bool = False
|
|
83
|
+
embedding_analysis: bool = False
|
|
84
|
+
similarity_matrix: bool = False
|
|
85
|
+
clustering_change: bool = False
|
|
86
|
+
attention_analysis: bool = False
|
|
87
|
+
head_importance: bool = False
|
|
88
|
+
attention_pattern_diff: bool = False
|
|
89
|
+
|
|
90
|
+
# Phase 2 options
|
|
91
|
+
hyperparameter_comparison: bool = False
|
|
92
|
+
learning_curve_analysis: bool = False
|
|
93
|
+
statistical_significance: bool = False
|
|
94
|
+
|
|
95
|
+
# Additional options
|
|
96
|
+
extra_args: List[str] = field(default_factory=list)
|
|
97
|
+
|
|
98
|
+
def to_cli_args(self) -> List[str]:
|
|
99
|
+
"""Convert options to CLI arguments based on actual CLI structure."""
|
|
100
|
+
args = []
|
|
101
|
+
|
|
102
|
+
# Input format
|
|
103
|
+
if self.input_format:
|
|
104
|
+
args.extend(["--format", self.input_format])
|
|
105
|
+
|
|
106
|
+
# Output format
|
|
107
|
+
if self.output_format:
|
|
108
|
+
args.extend(["--output", self.output_format.value])
|
|
109
|
+
|
|
110
|
+
# String options
|
|
111
|
+
if self.path:
|
|
112
|
+
args.extend(["--path", self.path])
|
|
113
|
+
if self.ignore_keys_regex:
|
|
114
|
+
args.extend(["--ignore-keys-regex", self.ignore_keys_regex])
|
|
115
|
+
if self.epsilon is not None:
|
|
116
|
+
args.extend(["--epsilon", str(self.epsilon)])
|
|
117
|
+
if self.array_id_key:
|
|
118
|
+
args.extend(["--array-id-key", self.array_id_key])
|
|
119
|
+
|
|
120
|
+
# Boolean flags - exact mapping from CLI
|
|
121
|
+
flag_mapping = {
|
|
122
|
+
"recursive": "--recursive",
|
|
123
|
+
"stats": "--stats",
|
|
124
|
+
"verbose": "--verbose", # NEW: Added verbose mode
|
|
125
|
+
"show_layer_impact": "--show-layer-impact",
|
|
126
|
+
"quantization_analysis": "--quantization-analysis",
|
|
127
|
+
"sort_by_change_magnitude": "--sort-by-change-magnitude",
|
|
128
|
+
"learning_progress": "--learning-progress",
|
|
129
|
+
"convergence_analysis": "--convergence-analysis",
|
|
130
|
+
"anomaly_detection": "--anomaly-detection",
|
|
131
|
+
"gradient_analysis": "--gradient-analysis",
|
|
132
|
+
"memory_analysis": "--memory-analysis",
|
|
133
|
+
"inference_speed_estimate": "--inference-speed-estimate",
|
|
134
|
+
"regression_test": "--regression-test",
|
|
135
|
+
"alert_on_degradation": "--alert-on-degradation",
|
|
136
|
+
"review_friendly": "--review-friendly",
|
|
137
|
+
"change_summary": "--change-summary",
|
|
138
|
+
"risk_assessment": "--risk-assessment",
|
|
139
|
+
"architecture_comparison": "--architecture-comparison",
|
|
140
|
+
"param_efficiency_analysis": "--param-efficiency-analysis",
|
|
141
|
+
"hyperparameter_impact": "--hyperparameter-impact",
|
|
142
|
+
"learning_rate_analysis": "--learning-rate-analysis",
|
|
143
|
+
"deployment_readiness": "--deployment-readiness",
|
|
144
|
+
"performance_impact_estimate": "--performance-impact-estimate",
|
|
145
|
+
"generate_report": "--generate-report",
|
|
146
|
+
"markdown_output": "--markdown-output",
|
|
147
|
+
"include_charts": "--include-charts",
|
|
148
|
+
"embedding_analysis": "--embedding-analysis",
|
|
149
|
+
"similarity_matrix": "--similarity-matrix",
|
|
150
|
+
"clustering_change": "--clustering-change",
|
|
151
|
+
"attention_analysis": "--attention-analysis",
|
|
152
|
+
"head_importance": "--head-importance",
|
|
153
|
+
"attention_pattern_diff": "--attention-pattern-diff",
|
|
154
|
+
"hyperparameter_comparison": "--hyperparameter-comparison",
|
|
155
|
+
"learning_curve_analysis": "--learning-curve-analysis",
|
|
156
|
+
"statistical_significance": "--statistical-significance",
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
for option, flag in flag_mapping.items():
|
|
160
|
+
if getattr(self, option):
|
|
161
|
+
args.append(flag)
|
|
162
|
+
|
|
163
|
+
# Extra arguments
|
|
164
|
+
args.extend(self.extra_args)
|
|
165
|
+
|
|
166
|
+
return args
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class DiffaiError(Exception):
|
|
170
|
+
"""Base exception for diffai-related errors."""
|
|
171
|
+
|
|
172
|
+
def __init__(self, message: str, exit_code: Optional[int] = None, stderr: Optional[str] = None):
|
|
173
|
+
super().__init__(message)
|
|
174
|
+
self.exit_code = exit_code
|
|
175
|
+
self.stderr = stderr
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
class BinaryNotFoundError(DiffaiError):
|
|
179
|
+
"""Raised when the diffai binary cannot be found."""
|
|
180
|
+
pass
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
class InvalidInputError(DiffaiError):
|
|
184
|
+
"""Raised when input files or arguments are invalid."""
|
|
185
|
+
pass
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class DiffResult:
|
|
189
|
+
"""
|
|
190
|
+
Structured result from diffai analysis.
|
|
191
|
+
|
|
192
|
+
Provides convenient access to diff results with automatic JSON parsing
|
|
193
|
+
for structured data and raw text access for other formats.
|
|
194
|
+
"""
|
|
195
|
+
|
|
196
|
+
def __init__(self, raw_output: str, exit_code: int = 0, format_type: str = "diffai"):
|
|
197
|
+
self.raw_output = raw_output
|
|
198
|
+
self.exit_code = exit_code
|
|
199
|
+
self.format_type = format_type
|
|
200
|
+
self._parsed_data = None
|
|
201
|
+
|
|
202
|
+
@property
|
|
203
|
+
def data(self) -> Any:
|
|
204
|
+
"""Get parsed data (JSON objects for JSON output, raw string otherwise)."""
|
|
205
|
+
if self._parsed_data is None:
|
|
206
|
+
if self.format_type == "json" and self.raw_output.strip():
|
|
207
|
+
try:
|
|
208
|
+
self._parsed_data = json.loads(self.raw_output)
|
|
209
|
+
except json.JSONDecodeError:
|
|
210
|
+
self._parsed_data = self.raw_output
|
|
211
|
+
else:
|
|
212
|
+
self._parsed_data = self.raw_output
|
|
213
|
+
return self._parsed_data
|
|
214
|
+
|
|
215
|
+
@property
|
|
216
|
+
def is_json(self) -> bool:
|
|
217
|
+
"""True if result is in JSON format."""
|
|
218
|
+
return self.format_type == "json" and isinstance(self.data, (dict, list))
|
|
219
|
+
|
|
220
|
+
@property
|
|
221
|
+
def changes(self) -> List[Dict[str, Any]]:
|
|
222
|
+
"""Get list of changes (for JSON output)."""
|
|
223
|
+
if self.is_json and isinstance(self.data, list):
|
|
224
|
+
return self.data
|
|
225
|
+
return []
|
|
226
|
+
|
|
227
|
+
@property
|
|
228
|
+
def summary(self) -> Dict[str, Any]:
|
|
229
|
+
"""Get summary information (for JSON output)."""
|
|
230
|
+
if self.is_json and isinstance(self.data, dict):
|
|
231
|
+
return self.data
|
|
232
|
+
return {}
|
|
233
|
+
|
|
234
|
+
def __str__(self) -> str:
|
|
235
|
+
"""String representation of the result."""
|
|
236
|
+
return self.raw_output
|
|
237
|
+
|
|
238
|
+
def __repr__(self) -> str:
|
|
239
|
+
"""Detailed representation of the result."""
|
|
240
|
+
return f"DiffResult(format={self.format_type}, exit_code={self.exit_code}, length={len(self.raw_output)})"
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _get_diffai_binary_path() -> str:
|
|
244
|
+
"""
|
|
245
|
+
Get the path to the diffai binary.
|
|
246
|
+
|
|
247
|
+
Checks for local installation first, then falls back to system PATH.
|
|
248
|
+
"""
|
|
249
|
+
# Check for local installation (installed via pip)
|
|
250
|
+
package_dir = Path(__file__).parent.parent.parent
|
|
251
|
+
binary_name = "diffai.exe" if platform.system() == "Windows" else "diffai"
|
|
252
|
+
local_binary_path = package_dir / "bin" / binary_name
|
|
253
|
+
|
|
254
|
+
if local_binary_path.exists():
|
|
255
|
+
return str(local_binary_path)
|
|
256
|
+
|
|
257
|
+
# Check package-local bin directory
|
|
258
|
+
package_bin = Path(__file__).parent.parent / "bin" / binary_name
|
|
259
|
+
if package_bin.exists():
|
|
260
|
+
return str(package_bin)
|
|
261
|
+
|
|
262
|
+
# Fall back to system PATH
|
|
263
|
+
return "diffai"
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def verify_installation() -> Dict[str, Any]:
|
|
267
|
+
"""
|
|
268
|
+
Verify that diffai is properly installed and accessible.
|
|
269
|
+
|
|
270
|
+
Returns:
|
|
271
|
+
Dict containing installation status and version information.
|
|
272
|
+
|
|
273
|
+
Raises:
|
|
274
|
+
BinaryNotFoundError: If diffai binary cannot be found or executed.
|
|
275
|
+
"""
|
|
276
|
+
try:
|
|
277
|
+
binary_path = _get_diffai_binary_path()
|
|
278
|
+
result = subprocess.run(
|
|
279
|
+
[binary_path, "--version"],
|
|
280
|
+
capture_output=True,
|
|
281
|
+
text=True,
|
|
282
|
+
timeout=10
|
|
283
|
+
)
|
|
284
|
+
|
|
285
|
+
if result.returncode == 0:
|
|
286
|
+
version_output = result.stdout.strip()
|
|
287
|
+
info: Dict[str, str] = {
|
|
288
|
+
"binary_path": binary_path,
|
|
289
|
+
"version": version_output,
|
|
290
|
+
"status": "ok"
|
|
291
|
+
}
|
|
292
|
+
return info
|
|
293
|
+
else:
|
|
294
|
+
raise BinaryNotFoundError(
|
|
295
|
+
f"diffai binary found at {binary_path} but failed to execute: {result.stderr}"
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
except subprocess.TimeoutExpired:
|
|
299
|
+
raise BinaryNotFoundError("diffai binary execution timed out")
|
|
300
|
+
except FileNotFoundError:
|
|
301
|
+
raise BinaryNotFoundError(
|
|
302
|
+
"diffai binary not found. Please install diffai or ensure it's in your PATH. "
|
|
303
|
+
"See: https://github.com/kako-jun/diffai/releases"
|
|
304
|
+
)
|
|
305
|
+
except Exception as e:
|
|
306
|
+
raise BinaryNotFoundError(f"Failed to verify diffai installation: {e}")
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def run_diffai(args: List[str], timeout: Optional[int] = None) -> DiffResult:
|
|
310
|
+
"""
|
|
311
|
+
Execute diffai with specified arguments.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
args: Command-line arguments to pass to diffai
|
|
315
|
+
timeout: Maximum execution time in seconds
|
|
316
|
+
|
|
317
|
+
Returns:
|
|
318
|
+
DiffResult object containing execution results
|
|
319
|
+
|
|
320
|
+
Raises:
|
|
321
|
+
DiffaiError: If execution fails
|
|
322
|
+
BinaryNotFoundError: If diffai binary cannot be found
|
|
323
|
+
"""
|
|
324
|
+
try:
|
|
325
|
+
binary_path = _get_diffai_binary_path()
|
|
326
|
+
cmd = [binary_path] + args
|
|
327
|
+
|
|
328
|
+
result = subprocess.run(
|
|
329
|
+
cmd,
|
|
330
|
+
capture_output=True,
|
|
331
|
+
text=True,
|
|
332
|
+
timeout=timeout
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
# Determine output format
|
|
336
|
+
format_type = "json" if "--output" in args and "json" in args else "diffai"
|
|
337
|
+
|
|
338
|
+
if result.returncode == 0:
|
|
339
|
+
return DiffResult(result.stdout, result.returncode, format_type)
|
|
340
|
+
else:
|
|
341
|
+
# Handle common error cases
|
|
342
|
+
if "No such file" in result.stderr or "not found" in result.stderr:
|
|
343
|
+
raise InvalidInputError(
|
|
344
|
+
f"Input file not found: {result.stderr}",
|
|
345
|
+
result.returncode,
|
|
346
|
+
result.stderr
|
|
347
|
+
)
|
|
348
|
+
else:
|
|
349
|
+
raise DiffaiError(
|
|
350
|
+
f"diffai execution failed: {result.stderr}",
|
|
351
|
+
result.returncode,
|
|
352
|
+
result.stderr
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
except subprocess.TimeoutExpired:
|
|
356
|
+
raise DiffaiError(f"diffai execution timed out after {timeout} seconds")
|
|
357
|
+
except FileNotFoundError:
|
|
358
|
+
raise BinaryNotFoundError(
|
|
359
|
+
"diffai binary not found. Please install diffai or ensure it's in your PATH."
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
def diff(
|
|
364
|
+
input1: str,
|
|
365
|
+
input2: str,
|
|
366
|
+
options: Optional[Union[DiffOptions, Dict[str, Any]]] = None,
|
|
367
|
+
**kwargs
|
|
368
|
+
) -> DiffResult:
|
|
369
|
+
"""
|
|
370
|
+
Compare two files using diffai.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
input1: Path to first input file
|
|
374
|
+
input2: Path to second input file
|
|
375
|
+
options: DiffOptions object or dict of options
|
|
376
|
+
**kwargs: Additional options as keyword arguments
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
DiffResult object containing comparison results
|
|
380
|
+
|
|
381
|
+
Example:
|
|
382
|
+
>>> result = diff("model1.safetensors", "model2.safetensors", stats=True)
|
|
383
|
+
>>> print(result)
|
|
384
|
+
|
|
385
|
+
>>> options = DiffOptions(stats=True, architecture_comparison=True)
|
|
386
|
+
>>> result = diff("model1.safetensors", "model2.safetensors", options)
|
|
387
|
+
"""
|
|
388
|
+
# Handle different option formats
|
|
389
|
+
if options is None:
|
|
390
|
+
options = DiffOptions(**kwargs)
|
|
391
|
+
elif isinstance(options, dict):
|
|
392
|
+
combined_options: Dict[str, Any] = {**options, **kwargs}
|
|
393
|
+
options = DiffOptions(**combined_options)
|
|
394
|
+
elif kwargs:
|
|
395
|
+
# Merge kwargs into existing DiffOptions
|
|
396
|
+
option_dict = {
|
|
397
|
+
field.name: getattr(options, field.name)
|
|
398
|
+
for field in options.__dataclass_fields__.values()
|
|
399
|
+
}
|
|
400
|
+
combined_options = {**option_dict, **kwargs}
|
|
401
|
+
merged_opts: Dict[str, Union[str, bool, int, float, None]] = combined_options
|
|
402
|
+
options = DiffOptions(**merged_opts)
|
|
403
|
+
|
|
404
|
+
# Build command arguments
|
|
405
|
+
args = [input1, input2]
|
|
406
|
+
args.extend(options.to_cli_args())
|
|
407
|
+
|
|
408
|
+
return run_diffai(args)
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def diff_string(
|
|
412
|
+
content1: str,
|
|
413
|
+
content2: str,
|
|
414
|
+
format_type: Union[str, None] = None,
|
|
415
|
+
**kwargs
|
|
416
|
+
) -> DiffResult:
|
|
417
|
+
"""
|
|
418
|
+
Compare two strings using diffai (creates temporary files).
|
|
419
|
+
|
|
420
|
+
Args:
|
|
421
|
+
content1: First string content
|
|
422
|
+
content2: Second string content
|
|
423
|
+
**kwargs: Options passed to diff()
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
DiffResult object containing comparison results
|
|
427
|
+
|
|
428
|
+
Note:
|
|
429
|
+
This function creates temporary files for string comparison.
|
|
430
|
+
Use diff() directly for file-based comparisons.
|
|
431
|
+
"""
|
|
432
|
+
import tempfile
|
|
433
|
+
import os
|
|
434
|
+
|
|
435
|
+
# Create temporary files
|
|
436
|
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f1, \
|
|
437
|
+
tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f2:
|
|
438
|
+
|
|
439
|
+
f1.write(content1)
|
|
440
|
+
f2.write(content2)
|
|
441
|
+
f1_path = f1.name
|
|
442
|
+
f2_path = f2.name
|
|
443
|
+
|
|
444
|
+
try:
|
|
445
|
+
return diff(f1_path, f2_path, **kwargs)
|
|
446
|
+
finally:
|
|
447
|
+
# Clean up temporary files
|
|
448
|
+
try:
|
|
449
|
+
os.unlink(f1_path)
|
|
450
|
+
os.unlink(f2_path)
|
|
451
|
+
except OSError:
|
|
452
|
+
pass # Ignore cleanup errors
|
diffai/installer.py
ADDED
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Binary installer for diffai Python package.
|
|
4
|
+
|
|
5
|
+
This module handles downloading and installing platform-specific diffai binaries
|
|
6
|
+
from GitHub releases. It supports automatic platform detection and graceful
|
|
7
|
+
error handling for environments where binary installation fails.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import argparse
|
|
11
|
+
import hashlib
|
|
12
|
+
import os
|
|
13
|
+
import platform
|
|
14
|
+
import shutil
|
|
15
|
+
import subprocess
|
|
16
|
+
import sys
|
|
17
|
+
import tarfile
|
|
18
|
+
import tempfile
|
|
19
|
+
import urllib.request
|
|
20
|
+
import zipfile
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Dict, Optional, Tuple
|
|
23
|
+
|
|
24
|
+
import importlib.metadata
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
PACKAGE_VERSION = importlib.metadata.version("diffai-python")
|
|
28
|
+
except importlib.metadata.PackageNotFoundError:
|
|
29
|
+
# Fallback for development
|
|
30
|
+
PACKAGE_VERSION = "0.2.8"
|
|
31
|
+
|
|
32
|
+
GITHUB_REPO = "kako-jun/diffai"
|
|
33
|
+
RELEASES_URL = f"https://github.com/{GITHUB_REPO}/releases/download/v{PACKAGE_VERSION}"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_platform_info() -> Dict[str, str]:
|
|
37
|
+
"""
|
|
38
|
+
Detect current platform and return appropriate binary information.
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Dictionary containing platform-specific binary information
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
ValueError: If platform is not supported
|
|
45
|
+
"""
|
|
46
|
+
system = platform.system()
|
|
47
|
+
machine = platform.machine()
|
|
48
|
+
|
|
49
|
+
# Normalize architecture names
|
|
50
|
+
if machine in ('x86_64', 'AMD64'):
|
|
51
|
+
arch = 'x86_64'
|
|
52
|
+
elif machine in ('arm64', 'aarch64'):
|
|
53
|
+
arch = 'aarch64'
|
|
54
|
+
else:
|
|
55
|
+
raise ValueError(f"Unsupported architecture: {machine}")
|
|
56
|
+
|
|
57
|
+
if system == "Windows":
|
|
58
|
+
return {
|
|
59
|
+
"platform": "windows",
|
|
60
|
+
"arch": arch,
|
|
61
|
+
"extension": "zip",
|
|
62
|
+
"binary_name": "diffai.exe",
|
|
63
|
+
"archive_name": f"diffai-windows-{arch}.zip"
|
|
64
|
+
}
|
|
65
|
+
elif system == "Darwin":
|
|
66
|
+
return {
|
|
67
|
+
"platform": "macos",
|
|
68
|
+
"arch": arch,
|
|
69
|
+
"extension": "tar.gz",
|
|
70
|
+
"binary_name": "diffai",
|
|
71
|
+
"archive_name": f"diffai-macos-{arch}.tar.gz"
|
|
72
|
+
}
|
|
73
|
+
elif system == "Linux":
|
|
74
|
+
return {
|
|
75
|
+
"platform": "linux",
|
|
76
|
+
"arch": arch,
|
|
77
|
+
"extension": "tar.gz",
|
|
78
|
+
"binary_name": "diffai",
|
|
79
|
+
"archive_name": f"diffai-linux-{arch}.tar.gz"
|
|
80
|
+
}
|
|
81
|
+
else:
|
|
82
|
+
raise ValueError(f"Unsupported platform: {system}")
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def download_file(url: str, destination: Path, chunk_size: int = 8192) -> None:
|
|
86
|
+
"""
|
|
87
|
+
Download a file from URL to destination path.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
url: URL to download from
|
|
91
|
+
destination: Local path to save file
|
|
92
|
+
chunk_size: Size of chunks to download at a time
|
|
93
|
+
|
|
94
|
+
Raises:
|
|
95
|
+
urllib.error.URLError: If download fails
|
|
96
|
+
"""
|
|
97
|
+
print(f"Downloading from: {url}")
|
|
98
|
+
|
|
99
|
+
try:
|
|
100
|
+
with urllib.request.urlopen(url) as response:
|
|
101
|
+
if response.status != 200:
|
|
102
|
+
raise urllib.error.URLError(f"HTTP {response.status}: {response.reason}")
|
|
103
|
+
|
|
104
|
+
total_size = response.headers.get('content-length')
|
|
105
|
+
if total_size:
|
|
106
|
+
total_size = int(total_size)
|
|
107
|
+
print(f"File size: {total_size:,} bytes")
|
|
108
|
+
|
|
109
|
+
downloaded = 0
|
|
110
|
+
with open(destination, 'wb') as f:
|
|
111
|
+
while True:
|
|
112
|
+
chunk = response.read(chunk_size)
|
|
113
|
+
if not chunk:
|
|
114
|
+
break
|
|
115
|
+
f.write(chunk)
|
|
116
|
+
downloaded += len(chunk)
|
|
117
|
+
|
|
118
|
+
if total_size:
|
|
119
|
+
percent = (downloaded / total_size) * 100
|
|
120
|
+
print(f"\rProgress: {percent:.1f}% ({downloaded:,}/{total_size:,} bytes)", end='')
|
|
121
|
+
|
|
122
|
+
if total_size:
|
|
123
|
+
print() # New line after progress
|
|
124
|
+
|
|
125
|
+
except Exception as e:
|
|
126
|
+
if destination.exists():
|
|
127
|
+
destination.unlink()
|
|
128
|
+
raise urllib.error.URLError(f"Download failed: {e}")
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def extract_archive(archive_path: Path, extract_to: Path, platform_info: Dict[str, str]) -> Path:
|
|
132
|
+
"""
|
|
133
|
+
Extract downloaded archive and return path to binary.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
archive_path: Path to downloaded archive
|
|
137
|
+
extract_to: Directory to extract to
|
|
138
|
+
platform_info: Platform information dictionary
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
Path to extracted binary
|
|
142
|
+
|
|
143
|
+
Raises:
|
|
144
|
+
Exception: If extraction fails
|
|
145
|
+
"""
|
|
146
|
+
print(f"Extracting {archive_path} to {extract_to}")
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
if platform_info["extension"] == "zip":
|
|
150
|
+
with zipfile.ZipFile(archive_path, 'r') as zip_ref:
|
|
151
|
+
zip_ref.extractall(extract_to)
|
|
152
|
+
else: # tar.gz
|
|
153
|
+
with tarfile.open(archive_path, 'r:gz') as tar_ref:
|
|
154
|
+
tar_ref.extractall(extract_to)
|
|
155
|
+
|
|
156
|
+
# Find the binary in extracted files
|
|
157
|
+
binary_name = platform_info["binary_name"]
|
|
158
|
+
|
|
159
|
+
# Check common locations
|
|
160
|
+
possible_paths = [
|
|
161
|
+
extract_to / binary_name,
|
|
162
|
+
extract_to / "diffai" / binary_name,
|
|
163
|
+
extract_to / f"diffai-{platform_info['platform']}-{platform_info['arch']}" / binary_name,
|
|
164
|
+
]
|
|
165
|
+
|
|
166
|
+
for path in possible_paths:
|
|
167
|
+
if path.exists():
|
|
168
|
+
return path
|
|
169
|
+
|
|
170
|
+
# If not found in expected locations, search recursively
|
|
171
|
+
for root, dirs, files in os.walk(extract_to):
|
|
172
|
+
if binary_name in files:
|
|
173
|
+
return Path(root) / binary_name
|
|
174
|
+
|
|
175
|
+
raise FileNotFoundError(f"Binary '{binary_name}' not found in extracted archive")
|
|
176
|
+
|
|
177
|
+
except Exception as e:
|
|
178
|
+
raise Exception(f"Failed to extract archive: {e}")
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def verify_binary(binary_path: Path) -> bool:
|
|
182
|
+
"""
|
|
183
|
+
Verify that the downloaded binary is functional.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
binary_path: Path to binary to verify
|
|
187
|
+
|
|
188
|
+
Returns:
|
|
189
|
+
True if binary is functional, False otherwise
|
|
190
|
+
"""
|
|
191
|
+
try:
|
|
192
|
+
# Make executable on Unix systems
|
|
193
|
+
if platform.system() != "Windows":
|
|
194
|
+
os.chmod(binary_path, 0o755)
|
|
195
|
+
|
|
196
|
+
# Test binary execution
|
|
197
|
+
result = subprocess.run(
|
|
198
|
+
[str(binary_path), "--version"],
|
|
199
|
+
capture_output=True,
|
|
200
|
+
text=True,
|
|
201
|
+
timeout=10
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
return result.returncode == 0 and "diffai" in result.stdout.lower()
|
|
205
|
+
|
|
206
|
+
except Exception:
|
|
207
|
+
return False
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def install_binary(target_dir: Optional[Path] = None, force: bool = False) -> bool:
|
|
211
|
+
"""
|
|
212
|
+
Download and install diffai binary.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
target_dir: Directory to install binary (default: package bin directory)
|
|
216
|
+
force: Whether to overwrite existing binary
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
True if installation succeeded, False otherwise
|
|
220
|
+
"""
|
|
221
|
+
try:
|
|
222
|
+
platform_info = get_platform_info()
|
|
223
|
+
print(f"Detected platform: {platform_info['platform']} {platform_info['arch']}")
|
|
224
|
+
|
|
225
|
+
# Determine target directory
|
|
226
|
+
if target_dir is None:
|
|
227
|
+
# Install to package bin directory
|
|
228
|
+
package_dir = Path(__file__).parent.parent.parent
|
|
229
|
+
target_dir = package_dir / "bin"
|
|
230
|
+
|
|
231
|
+
target_dir.mkdir(parents=True, exist_ok=True)
|
|
232
|
+
binary_path = target_dir / platform_info["binary_name"]
|
|
233
|
+
|
|
234
|
+
# Check if binary already exists
|
|
235
|
+
if binary_path.exists() and not force:
|
|
236
|
+
if verify_binary(binary_path):
|
|
237
|
+
print(f"diffai binary already exists and is functional: {binary_path}")
|
|
238
|
+
return True
|
|
239
|
+
else:
|
|
240
|
+
print(f"Existing binary appears corrupted, replacing: {binary_path}")
|
|
241
|
+
|
|
242
|
+
# Download and install
|
|
243
|
+
download_url = f"{RELEASES_URL}/{platform_info['archive_name']}"
|
|
244
|
+
|
|
245
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
246
|
+
temp_path = Path(temp_dir)
|
|
247
|
+
archive_path = temp_path / platform_info["archive_name"]
|
|
248
|
+
|
|
249
|
+
# Download archive
|
|
250
|
+
download_file(download_url, archive_path)
|
|
251
|
+
|
|
252
|
+
# Extract archive
|
|
253
|
+
extracted_binary = extract_archive(archive_path, temp_path, platform_info)
|
|
254
|
+
|
|
255
|
+
# Verify extracted binary
|
|
256
|
+
if not verify_binary(extracted_binary):
|
|
257
|
+
raise Exception("Downloaded binary failed verification")
|
|
258
|
+
|
|
259
|
+
# Move to target location
|
|
260
|
+
shutil.move(str(extracted_binary), str(binary_path))
|
|
261
|
+
|
|
262
|
+
print(f"Successfully installed diffai binary to: {binary_path}")
|
|
263
|
+
return True
|
|
264
|
+
|
|
265
|
+
except Exception as e:
|
|
266
|
+
print(f"Failed to install diffai binary: {e}", file=sys.stderr)
|
|
267
|
+
print(f"Manual installation available at: {RELEASES_URL}", file=sys.stderr)
|
|
268
|
+
return False
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def main() -> int:
|
|
272
|
+
"""
|
|
273
|
+
Main entry point for diffai-download-binary command.
|
|
274
|
+
|
|
275
|
+
Returns:
|
|
276
|
+
Exit code (0 for success, 1 for failure)
|
|
277
|
+
"""
|
|
278
|
+
parser = argparse.ArgumentParser(
|
|
279
|
+
description="Download and install diffai binary",
|
|
280
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
281
|
+
epilog="""
|
|
282
|
+
Examples:
|
|
283
|
+
diffai-download-binary # Install to default location
|
|
284
|
+
diffai-download-binary --force # Force reinstall
|
|
285
|
+
diffai-download-binary --target ./bin # Install to specific directory
|
|
286
|
+
diffai-download-binary --verify # Verify existing installation
|
|
287
|
+
"""
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
parser.add_argument(
|
|
291
|
+
"--target",
|
|
292
|
+
type=Path,
|
|
293
|
+
help="Target directory for installation (default: package bin directory)"
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
parser.add_argument(
|
|
297
|
+
"--force",
|
|
298
|
+
action="store_true",
|
|
299
|
+
help="Force reinstallation even if binary exists"
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
parser.add_argument(
|
|
303
|
+
"--verify",
|
|
304
|
+
action="store_true",
|
|
305
|
+
help="Only verify existing installation"
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
parser.add_argument(
|
|
309
|
+
"--version",
|
|
310
|
+
action="version",
|
|
311
|
+
version=f"diffai-installer {PACKAGE_VERSION}"
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
args = parser.parse_args()
|
|
315
|
+
|
|
316
|
+
if args.verify:
|
|
317
|
+
# Verify existing installation
|
|
318
|
+
target_dir = args.target or (Path(__file__).parent.parent.parent / "bin")
|
|
319
|
+
platform_info = get_platform_info()
|
|
320
|
+
binary_path = target_dir / platform_info["binary_name"]
|
|
321
|
+
|
|
322
|
+
if binary_path.exists() and verify_binary(binary_path):
|
|
323
|
+
print(f"✅ diffai binary is properly installed: {binary_path}")
|
|
324
|
+
return 0
|
|
325
|
+
else:
|
|
326
|
+
print(f"❌ diffai binary not found or not functional: {binary_path}")
|
|
327
|
+
return 1
|
|
328
|
+
else:
|
|
329
|
+
# Install binary
|
|
330
|
+
success = install_binary(args.target, args.force)
|
|
331
|
+
return 0 if success else 1
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
if __name__ == "__main__":
|
|
335
|
+
sys.exit(main())
|
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: diffai-python
|
|
3
|
+
Version: 0.2.9
|
|
4
|
+
Summary: AI/ML specialized diff tool for deep tensor comparison and analysis
|
|
5
|
+
Project-URL: Homepage, https://github.com/kako-jun/diffai
|
|
6
|
+
Project-URL: Documentation, https://github.com/kako-jun/diffai/blob/main/docs/
|
|
7
|
+
Project-URL: Repository, https://github.com/kako-jun/diffai.git
|
|
8
|
+
Project-URL: Issues, https://github.com/kako-jun/diffai/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/kako-jun/diffai/blob/main/CHANGELOG.md
|
|
10
|
+
Author: kako-jun
|
|
11
|
+
License-Expression: MIT
|
|
12
|
+
Keywords: ai,artificial-intelligence,diff,diffai,machine-learning,matlab,ml,model-comparison,numpy,pytorch,safetensors,tensor
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Environment :: Console
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Intended Audience :: Science/Research
|
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
25
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
26
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
27
|
+
Classifier: Topic :: Text Processing :: General
|
|
28
|
+
Classifier: Topic :: Utilities
|
|
29
|
+
Requires-Python: >=3.8
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: black>=22.0; extra == 'dev'
|
|
32
|
+
Requires-Dist: flake8>=5.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: mypy>=0.991; extra == 'dev'
|
|
34
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
35
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
36
|
+
Requires-Dist: types-requests>=2.28; extra == 'dev'
|
|
37
|
+
Description-Content-Type: text/markdown
|
|
38
|
+
|
|
39
|
+
# diffai - AI/ML Specialized Diff Tool (Python Package)
|
|
40
|
+
|
|
41
|
+
[](https://badge.fury.io/py/diffai-python)
|
|
42
|
+
[](https://pypi.org/project/diffai-python/)
|
|
43
|
+
[](https://pypi.org/project/diffai-python/)
|
|
44
|
+
|
|
45
|
+
AI/ML specialized data diff tool for deep tensor comparison and analysis. This Python package provides a convenient and type-safe interface to diffai through Python.
|
|
46
|
+
|
|
47
|
+
## 🚀 Quick Start
|
|
48
|
+
|
|
49
|
+
### Installation
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
# Install via pip
|
|
53
|
+
pip install diffai-python
|
|
54
|
+
|
|
55
|
+
# Development installation
|
|
56
|
+
pip install diffai-python[dev]
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Basic Usage
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
import diffai
|
|
63
|
+
|
|
64
|
+
# Simple model comparison
|
|
65
|
+
result = diffai.diff("model_v1.safetensors", "model_v2.safetensors", stats=True)
|
|
66
|
+
print(result)
|
|
67
|
+
|
|
68
|
+
# Advanced ML analysis with type-safe configuration
|
|
69
|
+
options = diffai.DiffOptions(
|
|
70
|
+
stats=True,
|
|
71
|
+
architecture_comparison=True,
|
|
72
|
+
memory_analysis=True,
|
|
73
|
+
output_format=diffai.OutputFormat.JSON
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
result = diffai.diff("baseline.safetensors", "improved.safetensors", options)
|
|
77
|
+
if result.is_json:
|
|
78
|
+
for change in result.changes:
|
|
79
|
+
print(f"Changed: {change}")
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Command Line Usage
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
# The package also installs the diffai binary
|
|
86
|
+
diffai model1.safetensors model2.safetensors --stats
|
|
87
|
+
|
|
88
|
+
# Download binary manually if needed
|
|
89
|
+
diffai-download-binary
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## 📦 Supported File Formats
|
|
93
|
+
|
|
94
|
+
### AI/ML Formats (Specialized Analysis)
|
|
95
|
+
- **Safetensors** (.safetensors) - PyTorch model format with ML analysis
|
|
96
|
+
- **PyTorch** (.pt, .pth) - Native PyTorch models with tensor statistics
|
|
97
|
+
- **NumPy** (.npy, .npz) - Scientific computing arrays with statistical analysis
|
|
98
|
+
- **MATLAB** (.mat) - Engineering/scientific data with numerical analysis
|
|
99
|
+
|
|
100
|
+
### Structured Data Formats (Universal)
|
|
101
|
+
- **JSON** (.json) - API configurations, model metadata
|
|
102
|
+
- **YAML** (.yaml, .yml) - Configuration files, CI/CD pipelines
|
|
103
|
+
- **TOML** (.toml) - Rust configs, Python pyproject.toml
|
|
104
|
+
- **XML** (.xml) - Legacy configurations, model definitions
|
|
105
|
+
- **CSV** (.csv) - Datasets, experiment results
|
|
106
|
+
- **INI** (.ini) - Legacy configuration files
|
|
107
|
+
|
|
108
|
+
## 🔬 35 ML Analysis Functions
|
|
109
|
+
|
|
110
|
+
### Core Analysis Functions
|
|
111
|
+
```python
|
|
112
|
+
# Statistical analysis
|
|
113
|
+
result = diffai.diff("model1.safetensors", "model2.safetensors", stats=True)
|
|
114
|
+
|
|
115
|
+
# Quantization analysis
|
|
116
|
+
result = diffai.diff("fp32.safetensors", "quantized.safetensors",
|
|
117
|
+
quantization_analysis=True)
|
|
118
|
+
|
|
119
|
+
# Change magnitude sorting
|
|
120
|
+
result = diffai.diff("model1.safetensors", "model2.safetensors",
|
|
121
|
+
sort_by_change_magnitude=True, stats=True)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Phase 3 Advanced Analysis (v0.2.7+)
|
|
125
|
+
```python
|
|
126
|
+
# Architecture comparison
|
|
127
|
+
result = diffai.diff("model1.safetensors", "model2.safetensors",
|
|
128
|
+
architecture_comparison=True)
|
|
129
|
+
|
|
130
|
+
# Memory analysis for deployment
|
|
131
|
+
result = diffai.diff("model1.safetensors", "model2.safetensors",
|
|
132
|
+
memory_analysis=True)
|
|
133
|
+
|
|
134
|
+
# Anomaly detection for debugging
|
|
135
|
+
result = diffai.diff("stable.safetensors", "problematic.safetensors",
|
|
136
|
+
anomaly_detection=True)
|
|
137
|
+
|
|
138
|
+
# Comprehensive analysis
|
|
139
|
+
options = diffai.DiffOptions(
|
|
140
|
+
stats=True,
|
|
141
|
+
architecture_comparison=True,
|
|
142
|
+
memory_analysis=True,
|
|
143
|
+
anomaly_detection=True,
|
|
144
|
+
convergence_analysis=True,
|
|
145
|
+
gradient_analysis=True,
|
|
146
|
+
similarity_matrix=True,
|
|
147
|
+
change_summary=True
|
|
148
|
+
)
|
|
149
|
+
result = diffai.diff("baseline.safetensors", "improved.safetensors", options)
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## 💡 Python API Examples
|
|
153
|
+
|
|
154
|
+
### Type-Safe Configuration
|
|
155
|
+
```python
|
|
156
|
+
from diffai import DiffOptions, OutputFormat
|
|
157
|
+
|
|
158
|
+
# Create type-safe configuration
|
|
159
|
+
options = DiffOptions(
|
|
160
|
+
stats=True,
|
|
161
|
+
architecture_comparison=True,
|
|
162
|
+
memory_analysis=True,
|
|
163
|
+
output_format=OutputFormat.JSON
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Compare models
|
|
167
|
+
result = diffai.diff("model1.safetensors", "model2.safetensors", options)
|
|
168
|
+
|
|
169
|
+
# Access structured results
|
|
170
|
+
if result.is_json:
|
|
171
|
+
print(f"Found {len(result.changes)} changes")
|
|
172
|
+
for change in result.changes:
|
|
173
|
+
print(f" {change.get('path')}: {change.get('type')}")
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Scientific Data Analysis
|
|
177
|
+
```python
|
|
178
|
+
# NumPy array comparison
|
|
179
|
+
result = diffai.diff("experiment_v1.npy", "experiment_v2.npy", stats=True)
|
|
180
|
+
print(f"Statistical changes: {result}")
|
|
181
|
+
|
|
182
|
+
# MATLAB data comparison
|
|
183
|
+
result = diffai.diff("simulation_v1.mat", "simulation_v2.mat",
|
|
184
|
+
stats=True, sort_by_change_magnitude=True)
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### JSON Output for Automation
|
|
188
|
+
```python
|
|
189
|
+
# Get JSON results for MLOps integration
|
|
190
|
+
result = diffai.diff("model1.safetensors", "model2.safetensors",
|
|
191
|
+
stats=True, output_format=diffai.OutputFormat.JSON)
|
|
192
|
+
|
|
193
|
+
if result.is_json:
|
|
194
|
+
# Process structured data
|
|
195
|
+
changes = result.changes
|
|
196
|
+
summary = result.summary
|
|
197
|
+
|
|
198
|
+
# Integration with MLflow, Weights & Biases, etc.
|
|
199
|
+
log_model_comparison(changes, summary)
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### Error Handling
|
|
203
|
+
```python
|
|
204
|
+
try:
|
|
205
|
+
result = diffai.diff("model1.safetensors", "model2.safetensors", stats=True)
|
|
206
|
+
print(result)
|
|
207
|
+
except diffai.BinaryNotFoundError:
|
|
208
|
+
print("diffai binary not found. Please install: pip install diffai-python")
|
|
209
|
+
except diffai.InvalidInputError as e:
|
|
210
|
+
print(f"Invalid input: {e}")
|
|
211
|
+
except diffai.DiffaiError as e:
|
|
212
|
+
print(f"diffai error: {e}")
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### String Comparison (Temporary Files)
|
|
216
|
+
```python
|
|
217
|
+
# Compare JSON strings directly
|
|
218
|
+
json1 = '{"model": "gpt-2", "layers": 12}'
|
|
219
|
+
json2 = '{"model": "gpt-2", "layers": 24}'
|
|
220
|
+
|
|
221
|
+
result = diffai.diff_string(json1, json2, output_format=diffai.OutputFormat.JSON)
|
|
222
|
+
print(result)
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
## 🔧 Advanced Usage
|
|
226
|
+
|
|
227
|
+
### Installation Verification
|
|
228
|
+
```python
|
|
229
|
+
# Check if diffai is properly installed
|
|
230
|
+
try:
|
|
231
|
+
info = diffai.verify_installation()
|
|
232
|
+
print(f"diffai version: {info['version']}")
|
|
233
|
+
print(f"Binary path: {info['binary_path']}")
|
|
234
|
+
except diffai.BinaryNotFoundError as e:
|
|
235
|
+
print(f"Installation issue: {e}")
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
### Manual Binary Management
|
|
239
|
+
```python
|
|
240
|
+
# Download binary programmatically
|
|
241
|
+
from diffai.installer import install_binary
|
|
242
|
+
|
|
243
|
+
success = install_binary(force=True) # Force reinstall
|
|
244
|
+
if success:
|
|
245
|
+
print("Binary installed successfully")
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### Low-Level API Access
|
|
249
|
+
```python
|
|
250
|
+
# Direct command execution
|
|
251
|
+
result = diffai.run_diffai([
|
|
252
|
+
"model1.safetensors",
|
|
253
|
+
"model2.safetensors",
|
|
254
|
+
"--stats",
|
|
255
|
+
"--architecture-comparison",
|
|
256
|
+
"--output", "json"
|
|
257
|
+
])
|
|
258
|
+
|
|
259
|
+
print(f"Exit code: {result.exit_code}")
|
|
260
|
+
print(f"Output: {result.raw_output}")
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
## 🔗 Integration Examples
|
|
264
|
+
|
|
265
|
+
### MLflow Integration
|
|
266
|
+
```python
|
|
267
|
+
import mlflow
|
|
268
|
+
import diffai
|
|
269
|
+
|
|
270
|
+
def log_model_comparison(model1_path, model2_path, run_id=None):
|
|
271
|
+
with mlflow.start_run(run_id=run_id):
|
|
272
|
+
# Compare models with comprehensive analysis
|
|
273
|
+
result = diffai.diff(
|
|
274
|
+
model1_path, model2_path,
|
|
275
|
+
stats=True,
|
|
276
|
+
architecture_comparison=True,
|
|
277
|
+
memory_analysis=True,
|
|
278
|
+
output_format=diffai.OutputFormat.JSON
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
if result.is_json:
|
|
282
|
+
# Log structured comparison data
|
|
283
|
+
mlflow.log_dict(result.data, "model_comparison.json")
|
|
284
|
+
|
|
285
|
+
# Log metrics
|
|
286
|
+
if result.changes:
|
|
287
|
+
mlflow.log_metric("total_changes", len(result.changes))
|
|
288
|
+
mlflow.log_metric("significant_changes",
|
|
289
|
+
sum(1 for c in result.changes
|
|
290
|
+
if c.get('magnitude', 0) > 0.1))
|
|
291
|
+
|
|
292
|
+
# Usage
|
|
293
|
+
log_model_comparison("baseline.safetensors", "candidate.safetensors")
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
### Weights & Biases Integration
|
|
297
|
+
```python
|
|
298
|
+
import wandb
|
|
299
|
+
import diffai
|
|
300
|
+
|
|
301
|
+
def wandb_log_model_diff(model1, model2, **kwargs):
|
|
302
|
+
result = diffai.diff(model1, model2,
|
|
303
|
+
stats=True,
|
|
304
|
+
output_format=diffai.OutputFormat.JSON,
|
|
305
|
+
**kwargs)
|
|
306
|
+
|
|
307
|
+
if result.is_json and result.changes:
|
|
308
|
+
# Log to wandb
|
|
309
|
+
wandb.log({
|
|
310
|
+
"model_comparison": wandb.Table(
|
|
311
|
+
columns=["parameter", "change_type", "magnitude"],
|
|
312
|
+
data=[[c.get("path"), c.get("type"), c.get("magnitude")]
|
|
313
|
+
for c in result.changes[:100]] # Limit rows
|
|
314
|
+
)
|
|
315
|
+
})
|
|
316
|
+
|
|
317
|
+
# Initialize wandb run
|
|
318
|
+
wandb.init(project="model-comparison")
|
|
319
|
+
wandb_log_model_diff("model_v1.safetensors", "model_v2.safetensors")
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
### Flask API Endpoint
|
|
323
|
+
```python
|
|
324
|
+
from flask import Flask, request, jsonify
|
|
325
|
+
import diffai
|
|
326
|
+
|
|
327
|
+
app = Flask(__name__)
|
|
328
|
+
|
|
329
|
+
@app.route('/compare', methods=['POST'])
|
|
330
|
+
def compare_models():
|
|
331
|
+
try:
|
|
332
|
+
files = request.files
|
|
333
|
+
model1 = files['model1']
|
|
334
|
+
model2 = files['model2']
|
|
335
|
+
|
|
336
|
+
# Save temporary files
|
|
337
|
+
model1.save('/tmp/model1.safetensors')
|
|
338
|
+
model2.save('/tmp/model2.safetensors')
|
|
339
|
+
|
|
340
|
+
# Compare models
|
|
341
|
+
result = diffai.diff('/tmp/model1.safetensors', '/tmp/model2.safetensors',
|
|
342
|
+
stats=True,
|
|
343
|
+
architecture_comparison=True,
|
|
344
|
+
output_format=diffai.OutputFormat.JSON)
|
|
345
|
+
|
|
346
|
+
return jsonify({
|
|
347
|
+
"status": "success",
|
|
348
|
+
"comparison": result.data if result.is_json else result.raw_output
|
|
349
|
+
})
|
|
350
|
+
|
|
351
|
+
except diffai.DiffaiError as e:
|
|
352
|
+
return jsonify({"status": "error", "message": str(e)}), 400
|
|
353
|
+
|
|
354
|
+
if __name__ == '__main__':
|
|
355
|
+
app.run(debug=True)
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
## 🏗️ Platform Support
|
|
359
|
+
|
|
360
|
+
This package automatically downloads platform-specific binaries:
|
|
361
|
+
|
|
362
|
+
- **Linux** (x86_64, ARM64)
|
|
363
|
+
- **macOS** (Intel x86_64, Apple Silicon ARM64)
|
|
364
|
+
- **Windows** (x86_64)
|
|
365
|
+
|
|
366
|
+
The binary is downloaded during installation and cached. If download fails, the package falls back to system PATH.
|
|
367
|
+
|
|
368
|
+
## 🔗 Related Projects
|
|
369
|
+
|
|
370
|
+
- **[diffx-python](https://pypi.org/project/diffx-python/)** - General-purpose structured data diff tool
|
|
371
|
+
- **[diffai (npm)](https://www.npmjs.com/package/diffai)** - Node.js package for diffai
|
|
372
|
+
- **[diffai (GitHub)](https://github.com/diffai-team/diffai)** - Main repository
|
|
373
|
+
|
|
374
|
+
## 📚 Documentation
|
|
375
|
+
|
|
376
|
+
- [CLI Reference](https://github.com/diffai-team/diffai/blob/main/docs/reference/cli-reference.md)
|
|
377
|
+
- [ML Analysis Guide](https://github.com/diffai-team/diffai/blob/main/docs/reference/ml-analysis.md)
|
|
378
|
+
- [User Guide](https://github.com/diffai-team/diffai/blob/main/docs/user-guide/)
|
|
379
|
+
- [API Documentation](https://github.com/diffai-team/diffai/blob/main/docs/reference/api-reference.md)
|
|
380
|
+
|
|
381
|
+
## 📄 License
|
|
382
|
+
|
|
383
|
+
MIT License - see [LICENSE](https://github.com/diffai-team/diffai/blob/main/LICENSE) file for details.
|
|
384
|
+
|
|
385
|
+
## 🤝 Contributing
|
|
386
|
+
|
|
387
|
+
Contributions welcome! Please see [CONTRIBUTING.md](https://github.com/diffai-team/diffai/blob/main/CONTRIBUTING.md) for guidelines.
|
|
388
|
+
|
|
389
|
+
---
|
|
390
|
+
|
|
391
|
+
**diffai** - Making AI/ML data differences visible, measurable, and actionable through Python. 🐍🚀
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
diffai/__init__.py,sha256=iKtiy7DNC0yV3LNp3iImdGQeEtxIF-81Rh58naWj1iA,1360
|
|
2
|
+
diffai/diffai.py,sha256=C1y4Jly9L2FwqpWVVII4eoTIIy7Dwn9juf0bhI-k3x0,15586
|
|
3
|
+
diffai/installer.py,sha256=l3cXX516rvx-m9jbk1THZL5SauYMlPsziD9_-rm3Juo,10662
|
|
4
|
+
diffai_python-0.2.9.dist-info/METADATA,sha256=oI2ielcDpD67qnzSjz1EqvWN3qNjyEqSEnY7rzf70Ng,12452
|
|
5
|
+
diffai_python-0.2.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
6
|
+
diffai_python-0.2.9.dist-info/entry_points.txt,sha256=iL4_2-iwaeQSnqkD5jh6NqsPXPiyU7R4zqg23-J1Xbk,65
|
|
7
|
+
diffai_python-0.2.9.dist-info/RECORD,,
|