diffai-python 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
diffai/__init__.py ADDED
@@ -0,0 +1,63 @@
1
+ """
2
+ diffai - AI/ML specialized diff tool for deep tensor comparison and analysis
3
+
4
+ This package provides Python bindings for diffai, a powerful command-line tool
5
+ specialized for comparing AI/ML model files, scientific data, and structured data.
6
+
7
+ Quick start:
8
+ >>> import diffai
9
+ >>> result = diffai.diff("model1.safetensors", "model2.safetensors", stats=True)
10
+ >>> print(result)
11
+
12
+ Advanced usage:
13
+ >>> options = diffai.DiffOptions(
14
+ ... stats=True,
15
+ ... architecture_comparison=True,
16
+ ... output_format="json"
17
+ ... )
18
+ >>> result = diffai.diff("model1.safetensors", "model2.safetensors", options)
19
+ """
20
+
21
+ from .diffai import (
22
+ # Main API functions
23
+ diff,
24
+ diff_string,
25
+ run_diffai,
26
+ verify_installation,
27
+
28
+ # Configuration and result classes
29
+ DiffOptions,
30
+ DiffResult,
31
+ OutputFormat,
32
+
33
+ # Exceptions
34
+ DiffaiError,
35
+ BinaryNotFoundError,
36
+ InvalidInputError,
37
+
38
+ # Version info
39
+ __version__,
40
+ )
41
+
42
+ # No backward compatibility imports - clean modern API only
43
+
44
+ __all__ = [
45
+ # Main API
46
+ "diff",
47
+ "diff_string",
48
+ "run_diffai",
49
+ "verify_installation",
50
+
51
+ # Configuration
52
+ "DiffOptions",
53
+ "DiffResult",
54
+ "OutputFormat",
55
+
56
+ # Exceptions
57
+ "DiffaiError",
58
+ "BinaryNotFoundError",
59
+ "InvalidInputError",
60
+
61
+ # Metadata
62
+ "__version__",
63
+ ]
diffai/diffai.py ADDED
@@ -0,0 +1,452 @@
1
+ """
2
+ Main diffai module providing Python interface to the diffai CLI tool.
3
+
4
+ This module provides a high-level Python API for the diffai command-line tool,
5
+ with support for type-safe configuration, structured results, and comprehensive
6
+ error handling.
7
+ """
8
+
9
+ import json
10
+ import platform
11
+ import subprocess
12
+ import sys
13
+ from dataclasses import dataclass, field
14
+ from enum import Enum
15
+ from pathlib import Path
16
+ from typing import Any, Dict, List, Optional, Union
17
+
18
+ import importlib.metadata
19
+
20
+ try:
21
+ __version__ = importlib.metadata.version("diffai-python")
22
+ except importlib.metadata.PackageNotFoundError:
23
+ # Fallback for development
24
+ __version__ = "0.2.9"
25
+
26
+
27
+ class OutputFormat(Enum):
28
+ """Supported output formats for diffai results - exact mapping from CLI."""
29
+ CLI = "cli" # Default colored CLI output
30
+ JSON = "json" # Machine-readable JSON
31
+ YAML = "yaml" # Human-readable YAML
32
+ UNIFIED = "unified" # Unified diff format
33
+
34
+
35
+ @dataclass
36
+ class DiffOptions:
37
+ """
38
+ Configuration options for diffai analysis.
39
+
40
+ Provides a type-safe way to configure all diffai analysis options
41
+ including ML-specific analysis functions and output formatting.
42
+
43
+ Based on actual CLI options from diffai-cli/src/main.rs
44
+ """
45
+
46
+ # Output and basic configuration
47
+ output_format: Optional[OutputFormat] = None
48
+ input_format: Optional[str] = None # json, yaml, toml, ini, xml, csv, safetensors, pytorch, numpy, npz, matlab
49
+ recursive: bool = False
50
+ path: Optional[str] = None # Filter by specific path
51
+ ignore_keys_regex: Optional[str] = None # Ignore keys matching regex
52
+ epsilon: Optional[float] = None # Tolerance for float comparisons
53
+ array_id_key: Optional[str] = None # Key for identifying array elements
54
+
55
+ # Core analysis options
56
+ stats: bool = False
57
+ verbose: bool = False # NEW: Added verbose mode
58
+ show_layer_impact: bool = False
59
+ quantization_analysis: bool = False
60
+ sort_by_change_magnitude: bool = False
61
+
62
+ # Enhanced ML analysis (Phase 3 implemented)
63
+ learning_progress: bool = False
64
+ convergence_analysis: bool = False
65
+ anomaly_detection: bool = False
66
+ gradient_analysis: bool = False
67
+ memory_analysis: bool = False
68
+ inference_speed_estimate: bool = False
69
+ regression_test: bool = False
70
+ alert_on_degradation: bool = False
71
+ review_friendly: bool = False
72
+ change_summary: bool = False
73
+ risk_assessment: bool = False
74
+ architecture_comparison: bool = False
75
+ param_efficiency_analysis: bool = False
76
+ hyperparameter_impact: bool = False
77
+ learning_rate_analysis: bool = False
78
+ deployment_readiness: bool = False
79
+ performance_impact_estimate: bool = False
80
+ generate_report: bool = False
81
+ markdown_output: bool = False
82
+ include_charts: bool = False
83
+ embedding_analysis: bool = False
84
+ similarity_matrix: bool = False
85
+ clustering_change: bool = False
86
+ attention_analysis: bool = False
87
+ head_importance: bool = False
88
+ attention_pattern_diff: bool = False
89
+
90
+ # Phase 2 options
91
+ hyperparameter_comparison: bool = False
92
+ learning_curve_analysis: bool = False
93
+ statistical_significance: bool = False
94
+
95
+ # Additional options
96
+ extra_args: List[str] = field(default_factory=list)
97
+
98
+ def to_cli_args(self) -> List[str]:
99
+ """Convert options to CLI arguments based on actual CLI structure."""
100
+ args = []
101
+
102
+ # Input format
103
+ if self.input_format:
104
+ args.extend(["--format", self.input_format])
105
+
106
+ # Output format
107
+ if self.output_format:
108
+ args.extend(["--output", self.output_format.value])
109
+
110
+ # String options
111
+ if self.path:
112
+ args.extend(["--path", self.path])
113
+ if self.ignore_keys_regex:
114
+ args.extend(["--ignore-keys-regex", self.ignore_keys_regex])
115
+ if self.epsilon is not None:
116
+ args.extend(["--epsilon", str(self.epsilon)])
117
+ if self.array_id_key:
118
+ args.extend(["--array-id-key", self.array_id_key])
119
+
120
+ # Boolean flags - exact mapping from CLI
121
+ flag_mapping = {
122
+ "recursive": "--recursive",
123
+ "stats": "--stats",
124
+ "verbose": "--verbose", # NEW: Added verbose mode
125
+ "show_layer_impact": "--show-layer-impact",
126
+ "quantization_analysis": "--quantization-analysis",
127
+ "sort_by_change_magnitude": "--sort-by-change-magnitude",
128
+ "learning_progress": "--learning-progress",
129
+ "convergence_analysis": "--convergence-analysis",
130
+ "anomaly_detection": "--anomaly-detection",
131
+ "gradient_analysis": "--gradient-analysis",
132
+ "memory_analysis": "--memory-analysis",
133
+ "inference_speed_estimate": "--inference-speed-estimate",
134
+ "regression_test": "--regression-test",
135
+ "alert_on_degradation": "--alert-on-degradation",
136
+ "review_friendly": "--review-friendly",
137
+ "change_summary": "--change-summary",
138
+ "risk_assessment": "--risk-assessment",
139
+ "architecture_comparison": "--architecture-comparison",
140
+ "param_efficiency_analysis": "--param-efficiency-analysis",
141
+ "hyperparameter_impact": "--hyperparameter-impact",
142
+ "learning_rate_analysis": "--learning-rate-analysis",
143
+ "deployment_readiness": "--deployment-readiness",
144
+ "performance_impact_estimate": "--performance-impact-estimate",
145
+ "generate_report": "--generate-report",
146
+ "markdown_output": "--markdown-output",
147
+ "include_charts": "--include-charts",
148
+ "embedding_analysis": "--embedding-analysis",
149
+ "similarity_matrix": "--similarity-matrix",
150
+ "clustering_change": "--clustering-change",
151
+ "attention_analysis": "--attention-analysis",
152
+ "head_importance": "--head-importance",
153
+ "attention_pattern_diff": "--attention-pattern-diff",
154
+ "hyperparameter_comparison": "--hyperparameter-comparison",
155
+ "learning_curve_analysis": "--learning-curve-analysis",
156
+ "statistical_significance": "--statistical-significance",
157
+ }
158
+
159
+ for option, flag in flag_mapping.items():
160
+ if getattr(self, option):
161
+ args.append(flag)
162
+
163
+ # Extra arguments
164
+ args.extend(self.extra_args)
165
+
166
+ return args
167
+
168
+
169
+ class DiffaiError(Exception):
170
+ """Base exception for diffai-related errors."""
171
+
172
+ def __init__(self, message: str, exit_code: Optional[int] = None, stderr: Optional[str] = None):
173
+ super().__init__(message)
174
+ self.exit_code = exit_code
175
+ self.stderr = stderr
176
+
177
+
178
+ class BinaryNotFoundError(DiffaiError):
179
+ """Raised when the diffai binary cannot be found."""
180
+ pass
181
+
182
+
183
+ class InvalidInputError(DiffaiError):
184
+ """Raised when input files or arguments are invalid."""
185
+ pass
186
+
187
+
188
+ class DiffResult:
189
+ """
190
+ Structured result from diffai analysis.
191
+
192
+ Provides convenient access to diff results with automatic JSON parsing
193
+ for structured data and raw text access for other formats.
194
+ """
195
+
196
+ def __init__(self, raw_output: str, exit_code: int = 0, format_type: str = "diffai"):
197
+ self.raw_output = raw_output
198
+ self.exit_code = exit_code
199
+ self.format_type = format_type
200
+ self._parsed_data = None
201
+
202
+ @property
203
+ def data(self) -> Any:
204
+ """Get parsed data (JSON objects for JSON output, raw string otherwise)."""
205
+ if self._parsed_data is None:
206
+ if self.format_type == "json" and self.raw_output.strip():
207
+ try:
208
+ self._parsed_data = json.loads(self.raw_output)
209
+ except json.JSONDecodeError:
210
+ self._parsed_data = self.raw_output
211
+ else:
212
+ self._parsed_data = self.raw_output
213
+ return self._parsed_data
214
+
215
+ @property
216
+ def is_json(self) -> bool:
217
+ """True if result is in JSON format."""
218
+ return self.format_type == "json" and isinstance(self.data, (dict, list))
219
+
220
+ @property
221
+ def changes(self) -> List[Dict[str, Any]]:
222
+ """Get list of changes (for JSON output)."""
223
+ if self.is_json and isinstance(self.data, list):
224
+ return self.data
225
+ return []
226
+
227
+ @property
228
+ def summary(self) -> Dict[str, Any]:
229
+ """Get summary information (for JSON output)."""
230
+ if self.is_json and isinstance(self.data, dict):
231
+ return self.data
232
+ return {}
233
+
234
+ def __str__(self) -> str:
235
+ """String representation of the result."""
236
+ return self.raw_output
237
+
238
+ def __repr__(self) -> str:
239
+ """Detailed representation of the result."""
240
+ return f"DiffResult(format={self.format_type}, exit_code={self.exit_code}, length={len(self.raw_output)})"
241
+
242
+
243
+ def _get_diffai_binary_path() -> str:
244
+ """
245
+ Get the path to the diffai binary.
246
+
247
+ Checks for local installation first, then falls back to system PATH.
248
+ """
249
+ # Check for local installation (installed via pip)
250
+ package_dir = Path(__file__).parent.parent.parent
251
+ binary_name = "diffai.exe" if platform.system() == "Windows" else "diffai"
252
+ local_binary_path = package_dir / "bin" / binary_name
253
+
254
+ if local_binary_path.exists():
255
+ return str(local_binary_path)
256
+
257
+ # Check package-local bin directory
258
+ package_bin = Path(__file__).parent.parent / "bin" / binary_name
259
+ if package_bin.exists():
260
+ return str(package_bin)
261
+
262
+ # Fall back to system PATH
263
+ return "diffai"
264
+
265
+
266
+ def verify_installation() -> Dict[str, Any]:
267
+ """
268
+ Verify that diffai is properly installed and accessible.
269
+
270
+ Returns:
271
+ Dict containing installation status and version information.
272
+
273
+ Raises:
274
+ BinaryNotFoundError: If diffai binary cannot be found or executed.
275
+ """
276
+ try:
277
+ binary_path = _get_diffai_binary_path()
278
+ result = subprocess.run(
279
+ [binary_path, "--version"],
280
+ capture_output=True,
281
+ text=True,
282
+ timeout=10
283
+ )
284
+
285
+ if result.returncode == 0:
286
+ version_output = result.stdout.strip()
287
+ info: Dict[str, str] = {
288
+ "binary_path": binary_path,
289
+ "version": version_output,
290
+ "status": "ok"
291
+ }
292
+ return info
293
+ else:
294
+ raise BinaryNotFoundError(
295
+ f"diffai binary found at {binary_path} but failed to execute: {result.stderr}"
296
+ )
297
+
298
+ except subprocess.TimeoutExpired:
299
+ raise BinaryNotFoundError("diffai binary execution timed out")
300
+ except FileNotFoundError:
301
+ raise BinaryNotFoundError(
302
+ "diffai binary not found. Please install diffai or ensure it's in your PATH. "
303
+ "See: https://github.com/kako-jun/diffai/releases"
304
+ )
305
+ except Exception as e:
306
+ raise BinaryNotFoundError(f"Failed to verify diffai installation: {e}")
307
+
308
+
309
+ def run_diffai(args: List[str], timeout: Optional[int] = None) -> DiffResult:
310
+ """
311
+ Execute diffai with specified arguments.
312
+
313
+ Args:
314
+ args: Command-line arguments to pass to diffai
315
+ timeout: Maximum execution time in seconds
316
+
317
+ Returns:
318
+ DiffResult object containing execution results
319
+
320
+ Raises:
321
+ DiffaiError: If execution fails
322
+ BinaryNotFoundError: If diffai binary cannot be found
323
+ """
324
+ try:
325
+ binary_path = _get_diffai_binary_path()
326
+ cmd = [binary_path] + args
327
+
328
+ result = subprocess.run(
329
+ cmd,
330
+ capture_output=True,
331
+ text=True,
332
+ timeout=timeout
333
+ )
334
+
335
+ # Determine output format
336
+ format_type = "json" if "--output" in args and "json" in args else "diffai"
337
+
338
+ if result.returncode == 0:
339
+ return DiffResult(result.stdout, result.returncode, format_type)
340
+ else:
341
+ # Handle common error cases
342
+ if "No such file" in result.stderr or "not found" in result.stderr:
343
+ raise InvalidInputError(
344
+ f"Input file not found: {result.stderr}",
345
+ result.returncode,
346
+ result.stderr
347
+ )
348
+ else:
349
+ raise DiffaiError(
350
+ f"diffai execution failed: {result.stderr}",
351
+ result.returncode,
352
+ result.stderr
353
+ )
354
+
355
+ except subprocess.TimeoutExpired:
356
+ raise DiffaiError(f"diffai execution timed out after {timeout} seconds")
357
+ except FileNotFoundError:
358
+ raise BinaryNotFoundError(
359
+ "diffai binary not found. Please install diffai or ensure it's in your PATH."
360
+ )
361
+
362
+
363
+ def diff(
364
+ input1: str,
365
+ input2: str,
366
+ options: Optional[Union[DiffOptions, Dict[str, Any]]] = None,
367
+ **kwargs
368
+ ) -> DiffResult:
369
+ """
370
+ Compare two files using diffai.
371
+
372
+ Args:
373
+ input1: Path to first input file
374
+ input2: Path to second input file
375
+ options: DiffOptions object or dict of options
376
+ **kwargs: Additional options as keyword arguments
377
+
378
+ Returns:
379
+ DiffResult object containing comparison results
380
+
381
+ Example:
382
+ >>> result = diff("model1.safetensors", "model2.safetensors", stats=True)
383
+ >>> print(result)
384
+
385
+ >>> options = DiffOptions(stats=True, architecture_comparison=True)
386
+ >>> result = diff("model1.safetensors", "model2.safetensors", options)
387
+ """
388
+ # Handle different option formats
389
+ if options is None:
390
+ options = DiffOptions(**kwargs)
391
+ elif isinstance(options, dict):
392
+ combined_options: Dict[str, Any] = {**options, **kwargs}
393
+ options = DiffOptions(**combined_options)
394
+ elif kwargs:
395
+ # Merge kwargs into existing DiffOptions
396
+ option_dict = {
397
+ field.name: getattr(options, field.name)
398
+ for field in options.__dataclass_fields__.values()
399
+ }
400
+ combined_options = {**option_dict, **kwargs}
401
+ merged_opts: Dict[str, Union[str, bool, int, float, None]] = combined_options
402
+ options = DiffOptions(**merged_opts)
403
+
404
+ # Build command arguments
405
+ args = [input1, input2]
406
+ args.extend(options.to_cli_args())
407
+
408
+ return run_diffai(args)
409
+
410
+
411
+ def diff_string(
412
+ content1: str,
413
+ content2: str,
414
+ format_type: Union[str, None] = None,
415
+ **kwargs
416
+ ) -> DiffResult:
417
+ """
418
+ Compare two strings using diffai (creates temporary files).
419
+
420
+ Args:
421
+ content1: First string content
422
+ content2: Second string content
423
+ **kwargs: Options passed to diff()
424
+
425
+ Returns:
426
+ DiffResult object containing comparison results
427
+
428
+ Note:
429
+ This function creates temporary files for string comparison.
430
+ Use diff() directly for file-based comparisons.
431
+ """
432
+ import tempfile
433
+ import os
434
+
435
+ # Create temporary files
436
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f1, \
437
+ tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f2:
438
+
439
+ f1.write(content1)
440
+ f2.write(content2)
441
+ f1_path = f1.name
442
+ f2_path = f2.name
443
+
444
+ try:
445
+ return diff(f1_path, f2_path, **kwargs)
446
+ finally:
447
+ # Clean up temporary files
448
+ try:
449
+ os.unlink(f1_path)
450
+ os.unlink(f2_path)
451
+ except OSError:
452
+ pass # Ignore cleanup errors
diffai/installer.py ADDED
@@ -0,0 +1,335 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Binary installer for diffai Python package.
4
+
5
+ This module handles downloading and installing platform-specific diffai binaries
6
+ from GitHub releases. It supports automatic platform detection and graceful
7
+ error handling for environments where binary installation fails.
8
+ """
9
+
10
+ import argparse
11
+ import hashlib
12
+ import os
13
+ import platform
14
+ import shutil
15
+ import subprocess
16
+ import sys
17
+ import tarfile
18
+ import tempfile
19
+ import urllib.request
20
+ import zipfile
21
+ from pathlib import Path
22
+ from typing import Dict, Optional, Tuple
23
+
24
+ import importlib.metadata
25
+
26
+ try:
27
+ PACKAGE_VERSION = importlib.metadata.version("diffai-python")
28
+ except importlib.metadata.PackageNotFoundError:
29
+ # Fallback for development
30
+ PACKAGE_VERSION = "0.2.8"
31
+
32
+ GITHUB_REPO = "kako-jun/diffai"
33
+ RELEASES_URL = f"https://github.com/{GITHUB_REPO}/releases/download/v{PACKAGE_VERSION}"
34
+
35
+
36
+ def get_platform_info() -> Dict[str, str]:
37
+ """
38
+ Detect current platform and return appropriate binary information.
39
+
40
+ Returns:
41
+ Dictionary containing platform-specific binary information
42
+
43
+ Raises:
44
+ ValueError: If platform is not supported
45
+ """
46
+ system = platform.system()
47
+ machine = platform.machine()
48
+
49
+ # Normalize architecture names
50
+ if machine in ('x86_64', 'AMD64'):
51
+ arch = 'x86_64'
52
+ elif machine in ('arm64', 'aarch64'):
53
+ arch = 'aarch64'
54
+ else:
55
+ raise ValueError(f"Unsupported architecture: {machine}")
56
+
57
+ if system == "Windows":
58
+ return {
59
+ "platform": "windows",
60
+ "arch": arch,
61
+ "extension": "zip",
62
+ "binary_name": "diffai.exe",
63
+ "archive_name": f"diffai-windows-{arch}.zip"
64
+ }
65
+ elif system == "Darwin":
66
+ return {
67
+ "platform": "macos",
68
+ "arch": arch,
69
+ "extension": "tar.gz",
70
+ "binary_name": "diffai",
71
+ "archive_name": f"diffai-macos-{arch}.tar.gz"
72
+ }
73
+ elif system == "Linux":
74
+ return {
75
+ "platform": "linux",
76
+ "arch": arch,
77
+ "extension": "tar.gz",
78
+ "binary_name": "diffai",
79
+ "archive_name": f"diffai-linux-{arch}.tar.gz"
80
+ }
81
+ else:
82
+ raise ValueError(f"Unsupported platform: {system}")
83
+
84
+
85
+ def download_file(url: str, destination: Path, chunk_size: int = 8192) -> None:
86
+ """
87
+ Download a file from URL to destination path.
88
+
89
+ Args:
90
+ url: URL to download from
91
+ destination: Local path to save file
92
+ chunk_size: Size of chunks to download at a time
93
+
94
+ Raises:
95
+ urllib.error.URLError: If download fails
96
+ """
97
+ print(f"Downloading from: {url}")
98
+
99
+ try:
100
+ with urllib.request.urlopen(url) as response:
101
+ if response.status != 200:
102
+ raise urllib.error.URLError(f"HTTP {response.status}: {response.reason}")
103
+
104
+ total_size = response.headers.get('content-length')
105
+ if total_size:
106
+ total_size = int(total_size)
107
+ print(f"File size: {total_size:,} bytes")
108
+
109
+ downloaded = 0
110
+ with open(destination, 'wb') as f:
111
+ while True:
112
+ chunk = response.read(chunk_size)
113
+ if not chunk:
114
+ break
115
+ f.write(chunk)
116
+ downloaded += len(chunk)
117
+
118
+ if total_size:
119
+ percent = (downloaded / total_size) * 100
120
+ print(f"\rProgress: {percent:.1f}% ({downloaded:,}/{total_size:,} bytes)", end='')
121
+
122
+ if total_size:
123
+ print() # New line after progress
124
+
125
+ except Exception as e:
126
+ if destination.exists():
127
+ destination.unlink()
128
+ raise urllib.error.URLError(f"Download failed: {e}")
129
+
130
+
131
+ def extract_archive(archive_path: Path, extract_to: Path, platform_info: Dict[str, str]) -> Path:
132
+ """
133
+ Extract downloaded archive and return path to binary.
134
+
135
+ Args:
136
+ archive_path: Path to downloaded archive
137
+ extract_to: Directory to extract to
138
+ platform_info: Platform information dictionary
139
+
140
+ Returns:
141
+ Path to extracted binary
142
+
143
+ Raises:
144
+ Exception: If extraction fails
145
+ """
146
+ print(f"Extracting {archive_path} to {extract_to}")
147
+
148
+ try:
149
+ if platform_info["extension"] == "zip":
150
+ with zipfile.ZipFile(archive_path, 'r') as zip_ref:
151
+ zip_ref.extractall(extract_to)
152
+ else: # tar.gz
153
+ with tarfile.open(archive_path, 'r:gz') as tar_ref:
154
+ tar_ref.extractall(extract_to)
155
+
156
+ # Find the binary in extracted files
157
+ binary_name = platform_info["binary_name"]
158
+
159
+ # Check common locations
160
+ possible_paths = [
161
+ extract_to / binary_name,
162
+ extract_to / "diffai" / binary_name,
163
+ extract_to / f"diffai-{platform_info['platform']}-{platform_info['arch']}" / binary_name,
164
+ ]
165
+
166
+ for path in possible_paths:
167
+ if path.exists():
168
+ return path
169
+
170
+ # If not found in expected locations, search recursively
171
+ for root, dirs, files in os.walk(extract_to):
172
+ if binary_name in files:
173
+ return Path(root) / binary_name
174
+
175
+ raise FileNotFoundError(f"Binary '{binary_name}' not found in extracted archive")
176
+
177
+ except Exception as e:
178
+ raise Exception(f"Failed to extract archive: {e}")
179
+
180
+
181
+ def verify_binary(binary_path: Path) -> bool:
182
+ """
183
+ Verify that the downloaded binary is functional.
184
+
185
+ Args:
186
+ binary_path: Path to binary to verify
187
+
188
+ Returns:
189
+ True if binary is functional, False otherwise
190
+ """
191
+ try:
192
+ # Make executable on Unix systems
193
+ if platform.system() != "Windows":
194
+ os.chmod(binary_path, 0o755)
195
+
196
+ # Test binary execution
197
+ result = subprocess.run(
198
+ [str(binary_path), "--version"],
199
+ capture_output=True,
200
+ text=True,
201
+ timeout=10
202
+ )
203
+
204
+ return result.returncode == 0 and "diffai" in result.stdout.lower()
205
+
206
+ except Exception:
207
+ return False
208
+
209
+
210
+ def install_binary(target_dir: Optional[Path] = None, force: bool = False) -> bool:
211
+ """
212
+ Download and install diffai binary.
213
+
214
+ Args:
215
+ target_dir: Directory to install binary (default: package bin directory)
216
+ force: Whether to overwrite existing binary
217
+
218
+ Returns:
219
+ True if installation succeeded, False otherwise
220
+ """
221
+ try:
222
+ platform_info = get_platform_info()
223
+ print(f"Detected platform: {platform_info['platform']} {platform_info['arch']}")
224
+
225
+ # Determine target directory
226
+ if target_dir is None:
227
+ # Install to package bin directory
228
+ package_dir = Path(__file__).parent.parent.parent
229
+ target_dir = package_dir / "bin"
230
+
231
+ target_dir.mkdir(parents=True, exist_ok=True)
232
+ binary_path = target_dir / platform_info["binary_name"]
233
+
234
+ # Check if binary already exists
235
+ if binary_path.exists() and not force:
236
+ if verify_binary(binary_path):
237
+ print(f"diffai binary already exists and is functional: {binary_path}")
238
+ return True
239
+ else:
240
+ print(f"Existing binary appears corrupted, replacing: {binary_path}")
241
+
242
+ # Download and install
243
+ download_url = f"{RELEASES_URL}/{platform_info['archive_name']}"
244
+
245
+ with tempfile.TemporaryDirectory() as temp_dir:
246
+ temp_path = Path(temp_dir)
247
+ archive_path = temp_path / platform_info["archive_name"]
248
+
249
+ # Download archive
250
+ download_file(download_url, archive_path)
251
+
252
+ # Extract archive
253
+ extracted_binary = extract_archive(archive_path, temp_path, platform_info)
254
+
255
+ # Verify extracted binary
256
+ if not verify_binary(extracted_binary):
257
+ raise Exception("Downloaded binary failed verification")
258
+
259
+ # Move to target location
260
+ shutil.move(str(extracted_binary), str(binary_path))
261
+
262
+ print(f"Successfully installed diffai binary to: {binary_path}")
263
+ return True
264
+
265
+ except Exception as e:
266
+ print(f"Failed to install diffai binary: {e}", file=sys.stderr)
267
+ print(f"Manual installation available at: {RELEASES_URL}", file=sys.stderr)
268
+ return False
269
+
270
+
271
+ def main() -> int:
272
+ """
273
+ Main entry point for diffai-download-binary command.
274
+
275
+ Returns:
276
+ Exit code (0 for success, 1 for failure)
277
+ """
278
+ parser = argparse.ArgumentParser(
279
+ description="Download and install diffai binary",
280
+ formatter_class=argparse.RawDescriptionHelpFormatter,
281
+ epilog="""
282
+ Examples:
283
+ diffai-download-binary # Install to default location
284
+ diffai-download-binary --force # Force reinstall
285
+ diffai-download-binary --target ./bin # Install to specific directory
286
+ diffai-download-binary --verify # Verify existing installation
287
+ """
288
+ )
289
+
290
+ parser.add_argument(
291
+ "--target",
292
+ type=Path,
293
+ help="Target directory for installation (default: package bin directory)"
294
+ )
295
+
296
+ parser.add_argument(
297
+ "--force",
298
+ action="store_true",
299
+ help="Force reinstallation even if binary exists"
300
+ )
301
+
302
+ parser.add_argument(
303
+ "--verify",
304
+ action="store_true",
305
+ help="Only verify existing installation"
306
+ )
307
+
308
+ parser.add_argument(
309
+ "--version",
310
+ action="version",
311
+ version=f"diffai-installer {PACKAGE_VERSION}"
312
+ )
313
+
314
+ args = parser.parse_args()
315
+
316
+ if args.verify:
317
+ # Verify existing installation
318
+ target_dir = args.target or (Path(__file__).parent.parent.parent / "bin")
319
+ platform_info = get_platform_info()
320
+ binary_path = target_dir / platform_info["binary_name"]
321
+
322
+ if binary_path.exists() and verify_binary(binary_path):
323
+ print(f"✅ diffai binary is properly installed: {binary_path}")
324
+ return 0
325
+ else:
326
+ print(f"❌ diffai binary not found or not functional: {binary_path}")
327
+ return 1
328
+ else:
329
+ # Install binary
330
+ success = install_binary(args.target, args.force)
331
+ return 0 if success else 1
332
+
333
+
334
+ if __name__ == "__main__":
335
+ sys.exit(main())
@@ -0,0 +1,391 @@
1
+ Metadata-Version: 2.4
2
+ Name: diffai-python
3
+ Version: 0.2.9
4
+ Summary: AI/ML specialized diff tool for deep tensor comparison and analysis
5
+ Project-URL: Homepage, https://github.com/kako-jun/diffai
6
+ Project-URL: Documentation, https://github.com/kako-jun/diffai/blob/main/docs/
7
+ Project-URL: Repository, https://github.com/kako-jun/diffai.git
8
+ Project-URL: Issues, https://github.com/kako-jun/diffai/issues
9
+ Project-URL: Changelog, https://github.com/kako-jun/diffai/blob/main/CHANGELOG.md
10
+ Author: kako-jun
11
+ License-Expression: MIT
12
+ Keywords: ai,artificial-intelligence,diff,diffai,machine-learning,matlab,ml,model-comparison,numpy,pytorch,safetensors,tensor
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Environment :: Console
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Intended Audience :: Science/Research
17
+ Classifier: License :: OSI Approved :: MIT License
18
+ Classifier: Operating System :: OS Independent
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.8
21
+ Classifier: Programming Language :: Python :: 3.9
22
+ Classifier: Programming Language :: Python :: 3.10
23
+ Classifier: Programming Language :: Python :: 3.11
24
+ Classifier: Programming Language :: Python :: 3.12
25
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
26
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
27
+ Classifier: Topic :: Text Processing :: General
28
+ Classifier: Topic :: Utilities
29
+ Requires-Python: >=3.8
30
+ Provides-Extra: dev
31
+ Requires-Dist: black>=22.0; extra == 'dev'
32
+ Requires-Dist: flake8>=5.0; extra == 'dev'
33
+ Requires-Dist: mypy>=0.991; extra == 'dev'
34
+ Requires-Dist: pytest-cov>=4.0; extra == 'dev'
35
+ Requires-Dist: pytest>=7.0; extra == 'dev'
36
+ Requires-Dist: types-requests>=2.28; extra == 'dev'
37
+ Description-Content-Type: text/markdown
38
+
39
+ # diffai - AI/ML Specialized Diff Tool (Python Package)
40
+
41
+ [![PyPI version](https://badge.fury.io/py/diffai-python.svg)](https://badge.fury.io/py/diffai-python)
42
+ [![Downloads](https://img.shields.io/pypi/dm/diffai-python.svg)](https://pypi.org/project/diffai-python/)
43
+ [![Python Versions](https://img.shields.io/pypi/pyversions/diffai-python.svg)](https://pypi.org/project/diffai-python/)
44
+
45
+ AI/ML specialized data diff tool for deep tensor comparison and analysis. This Python package provides a convenient and type-safe interface to diffai through Python.
46
+
47
+ ## 🚀 Quick Start
48
+
49
+ ### Installation
50
+
51
+ ```bash
52
+ # Install via pip
53
+ pip install diffai-python
54
+
55
+ # Development installation
56
+ pip install diffai-python[dev]
57
+ ```
58
+
59
+ ### Basic Usage
60
+
61
+ ```python
62
+ import diffai
63
+
64
+ # Simple model comparison
65
+ result = diffai.diff("model_v1.safetensors", "model_v2.safetensors", stats=True)
66
+ print(result)
67
+
68
+ # Advanced ML analysis with type-safe configuration
69
+ options = diffai.DiffOptions(
70
+ stats=True,
71
+ architecture_comparison=True,
72
+ memory_analysis=True,
73
+ output_format=diffai.OutputFormat.JSON
74
+ )
75
+
76
+ result = diffai.diff("baseline.safetensors", "improved.safetensors", options)
77
+ if result.is_json:
78
+ for change in result.changes:
79
+ print(f"Changed: {change}")
80
+ ```
81
+
82
+ ### Command Line Usage
83
+
84
+ ```bash
85
+ # The package also installs the diffai binary
86
+ diffai model1.safetensors model2.safetensors --stats
87
+
88
+ # Download binary manually if needed
89
+ diffai-download-binary
90
+ ```
91
+
92
+ ## 📦 Supported File Formats
93
+
94
+ ### AI/ML Formats (Specialized Analysis)
95
+ - **Safetensors** (.safetensors) - PyTorch model format with ML analysis
96
+ - **PyTorch** (.pt, .pth) - Native PyTorch models with tensor statistics
97
+ - **NumPy** (.npy, .npz) - Scientific computing arrays with statistical analysis
98
+ - **MATLAB** (.mat) - Engineering/scientific data with numerical analysis
99
+
100
+ ### Structured Data Formats (Universal)
101
+ - **JSON** (.json) - API configurations, model metadata
102
+ - **YAML** (.yaml, .yml) - Configuration files, CI/CD pipelines
103
+ - **TOML** (.toml) - Rust configs, Python pyproject.toml
104
+ - **XML** (.xml) - Legacy configurations, model definitions
105
+ - **CSV** (.csv) - Datasets, experiment results
106
+ - **INI** (.ini) - Legacy configuration files
107
+
108
+ ## 🔬 35 ML Analysis Functions
109
+
110
+ ### Core Analysis Functions
111
+ ```python
112
+ # Statistical analysis
113
+ result = diffai.diff("model1.safetensors", "model2.safetensors", stats=True)
114
+
115
+ # Quantization analysis
116
+ result = diffai.diff("fp32.safetensors", "quantized.safetensors",
117
+ quantization_analysis=True)
118
+
119
+ # Change magnitude sorting
120
+ result = diffai.diff("model1.safetensors", "model2.safetensors",
121
+ sort_by_change_magnitude=True, stats=True)
122
+ ```
123
+
124
+ ### Phase 3 Advanced Analysis (v0.2.7+)
125
+ ```python
126
+ # Architecture comparison
127
+ result = diffai.diff("model1.safetensors", "model2.safetensors",
128
+ architecture_comparison=True)
129
+
130
+ # Memory analysis for deployment
131
+ result = diffai.diff("model1.safetensors", "model2.safetensors",
132
+ memory_analysis=True)
133
+
134
+ # Anomaly detection for debugging
135
+ result = diffai.diff("stable.safetensors", "problematic.safetensors",
136
+ anomaly_detection=True)
137
+
138
+ # Comprehensive analysis
139
+ options = diffai.DiffOptions(
140
+ stats=True,
141
+ architecture_comparison=True,
142
+ memory_analysis=True,
143
+ anomaly_detection=True,
144
+ convergence_analysis=True,
145
+ gradient_analysis=True,
146
+ similarity_matrix=True,
147
+ change_summary=True
148
+ )
149
+ result = diffai.diff("baseline.safetensors", "improved.safetensors", options)
150
+ ```
151
+
152
+ ## 💡 Python API Examples
153
+
154
+ ### Type-Safe Configuration
155
+ ```python
156
+ from diffai import DiffOptions, OutputFormat
157
+
158
+ # Create type-safe configuration
159
+ options = DiffOptions(
160
+ stats=True,
161
+ architecture_comparison=True,
162
+ memory_analysis=True,
163
+ output_format=OutputFormat.JSON
164
+ )
165
+
166
+ # Compare models
167
+ result = diffai.diff("model1.safetensors", "model2.safetensors", options)
168
+
169
+ # Access structured results
170
+ if result.is_json:
171
+ print(f"Found {len(result.changes)} changes")
172
+ for change in result.changes:
173
+ print(f" {change.get('path')}: {change.get('type')}")
174
+ ```
175
+
176
+ ### Scientific Data Analysis
177
+ ```python
178
+ # NumPy array comparison
179
+ result = diffai.diff("experiment_v1.npy", "experiment_v2.npy", stats=True)
180
+ print(f"Statistical changes: {result}")
181
+
182
+ # MATLAB data comparison
183
+ result = diffai.diff("simulation_v1.mat", "simulation_v2.mat",
184
+ stats=True, sort_by_change_magnitude=True)
185
+ ```
186
+
187
+ ### JSON Output for Automation
188
+ ```python
189
+ # Get JSON results for MLOps integration
190
+ result = diffai.diff("model1.safetensors", "model2.safetensors",
191
+ stats=True, output_format=diffai.OutputFormat.JSON)
192
+
193
+ if result.is_json:
194
+ # Process structured data
195
+ changes = result.changes
196
+ summary = result.summary
197
+
198
+ # Integration with MLflow, Weights & Biases, etc.
199
+ log_model_comparison(changes, summary)
200
+ ```
201
+
202
+ ### Error Handling
203
+ ```python
204
+ try:
205
+ result = diffai.diff("model1.safetensors", "model2.safetensors", stats=True)
206
+ print(result)
207
+ except diffai.BinaryNotFoundError:
208
+ print("diffai binary not found. Please install: pip install diffai-python")
209
+ except diffai.InvalidInputError as e:
210
+ print(f"Invalid input: {e}")
211
+ except diffai.DiffaiError as e:
212
+ print(f"diffai error: {e}")
213
+ ```
214
+
215
+ ### String Comparison (Temporary Files)
216
+ ```python
217
+ # Compare JSON strings directly
218
+ json1 = '{"model": "gpt-2", "layers": 12}'
219
+ json2 = '{"model": "gpt-2", "layers": 24}'
220
+
221
+ result = diffai.diff_string(json1, json2, output_format=diffai.OutputFormat.JSON)
222
+ print(result)
223
+ ```
224
+
225
+ ## 🔧 Advanced Usage
226
+
227
+ ### Installation Verification
228
+ ```python
229
+ # Check if diffai is properly installed
230
+ try:
231
+ info = diffai.verify_installation()
232
+ print(f"diffai version: {info['version']}")
233
+ print(f"Binary path: {info['binary_path']}")
234
+ except diffai.BinaryNotFoundError as e:
235
+ print(f"Installation issue: {e}")
236
+ ```
237
+
238
+ ### Manual Binary Management
239
+ ```python
240
+ # Download binary programmatically
241
+ from diffai.installer import install_binary
242
+
243
+ success = install_binary(force=True) # Force reinstall
244
+ if success:
245
+ print("Binary installed successfully")
246
+ ```
247
+
248
+ ### Low-Level API Access
249
+ ```python
250
+ # Direct command execution
251
+ result = diffai.run_diffai([
252
+ "model1.safetensors",
253
+ "model2.safetensors",
254
+ "--stats",
255
+ "--architecture-comparison",
256
+ "--output", "json"
257
+ ])
258
+
259
+ print(f"Exit code: {result.exit_code}")
260
+ print(f"Output: {result.raw_output}")
261
+ ```
262
+
263
+ ## 🔗 Integration Examples
264
+
265
+ ### MLflow Integration
266
+ ```python
267
+ import mlflow
268
+ import diffai
269
+
270
+ def log_model_comparison(model1_path, model2_path, run_id=None):
271
+ with mlflow.start_run(run_id=run_id):
272
+ # Compare models with comprehensive analysis
273
+ result = diffai.diff(
274
+ model1_path, model2_path,
275
+ stats=True,
276
+ architecture_comparison=True,
277
+ memory_analysis=True,
278
+ output_format=diffai.OutputFormat.JSON
279
+ )
280
+
281
+ if result.is_json:
282
+ # Log structured comparison data
283
+ mlflow.log_dict(result.data, "model_comparison.json")
284
+
285
+ # Log metrics
286
+ if result.changes:
287
+ mlflow.log_metric("total_changes", len(result.changes))
288
+ mlflow.log_metric("significant_changes",
289
+ sum(1 for c in result.changes
290
+ if c.get('magnitude', 0) > 0.1))
291
+
292
+ # Usage
293
+ log_model_comparison("baseline.safetensors", "candidate.safetensors")
294
+ ```
295
+
296
+ ### Weights & Biases Integration
297
+ ```python
298
+ import wandb
299
+ import diffai
300
+
301
+ def wandb_log_model_diff(model1, model2, **kwargs):
302
+ result = diffai.diff(model1, model2,
303
+ stats=True,
304
+ output_format=diffai.OutputFormat.JSON,
305
+ **kwargs)
306
+
307
+ if result.is_json and result.changes:
308
+ # Log to wandb
309
+ wandb.log({
310
+ "model_comparison": wandb.Table(
311
+ columns=["parameter", "change_type", "magnitude"],
312
+ data=[[c.get("path"), c.get("type"), c.get("magnitude")]
313
+ for c in result.changes[:100]] # Limit rows
314
+ )
315
+ })
316
+
317
+ # Initialize wandb run
318
+ wandb.init(project="model-comparison")
319
+ wandb_log_model_diff("model_v1.safetensors", "model_v2.safetensors")
320
+ ```
321
+
322
+ ### Flask API Endpoint
323
+ ```python
324
+ from flask import Flask, request, jsonify
325
+ import diffai
326
+
327
+ app = Flask(__name__)
328
+
329
+ @app.route('/compare', methods=['POST'])
330
+ def compare_models():
331
+ try:
332
+ files = request.files
333
+ model1 = files['model1']
334
+ model2 = files['model2']
335
+
336
+ # Save temporary files
337
+ model1.save('/tmp/model1.safetensors')
338
+ model2.save('/tmp/model2.safetensors')
339
+
340
+ # Compare models
341
+ result = diffai.diff('/tmp/model1.safetensors', '/tmp/model2.safetensors',
342
+ stats=True,
343
+ architecture_comparison=True,
344
+ output_format=diffai.OutputFormat.JSON)
345
+
346
+ return jsonify({
347
+ "status": "success",
348
+ "comparison": result.data if result.is_json else result.raw_output
349
+ })
350
+
351
+ except diffai.DiffaiError as e:
352
+ return jsonify({"status": "error", "message": str(e)}), 400
353
+
354
+ if __name__ == '__main__':
355
+ app.run(debug=True)
356
+ ```
357
+
358
+ ## 🏗️ Platform Support
359
+
360
+ This package automatically downloads platform-specific binaries:
361
+
362
+ - **Linux** (x86_64, ARM64)
363
+ - **macOS** (Intel x86_64, Apple Silicon ARM64)
364
+ - **Windows** (x86_64)
365
+
366
+ The binary is downloaded during installation and cached. If download fails, the package falls back to system PATH.
367
+
368
+ ## 🔗 Related Projects
369
+
370
+ - **[diffx-python](https://pypi.org/project/diffx-python/)** - General-purpose structured data diff tool
371
+ - **[diffai (npm)](https://www.npmjs.com/package/diffai)** - Node.js package for diffai
372
+ - **[diffai (GitHub)](https://github.com/diffai-team/diffai)** - Main repository
373
+
374
+ ## 📚 Documentation
375
+
376
+ - [CLI Reference](https://github.com/diffai-team/diffai/blob/main/docs/reference/cli-reference.md)
377
+ - [ML Analysis Guide](https://github.com/diffai-team/diffai/blob/main/docs/reference/ml-analysis.md)
378
+ - [User Guide](https://github.com/diffai-team/diffai/blob/main/docs/user-guide/)
379
+ - [API Documentation](https://github.com/diffai-team/diffai/blob/main/docs/reference/api-reference.md)
380
+
381
+ ## 📄 License
382
+
383
+ MIT License - see [LICENSE](https://github.com/diffai-team/diffai/blob/main/LICENSE) file for details.
384
+
385
+ ## 🤝 Contributing
386
+
387
+ Contributions welcome! Please see [CONTRIBUTING.md](https://github.com/diffai-team/diffai/blob/main/CONTRIBUTING.md) for guidelines.
388
+
389
+ ---
390
+
391
+ **diffai** - Making AI/ML data differences visible, measurable, and actionable through Python. 🐍🚀
@@ -0,0 +1,7 @@
1
+ diffai/__init__.py,sha256=iKtiy7DNC0yV3LNp3iImdGQeEtxIF-81Rh58naWj1iA,1360
2
+ diffai/diffai.py,sha256=C1y4Jly9L2FwqpWVVII4eoTIIy7Dwn9juf0bhI-k3x0,15586
3
+ diffai/installer.py,sha256=l3cXX516rvx-m9jbk1THZL5SauYMlPsziD9_-rm3Juo,10662
4
+ diffai_python-0.2.9.dist-info/METADATA,sha256=oI2ielcDpD67qnzSjz1EqvWN3qNjyEqSEnY7rzf70Ng,12452
5
+ diffai_python-0.2.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
6
+ diffai_python-0.2.9.dist-info/entry_points.txt,sha256=iL4_2-iwaeQSnqkD5jh6NqsPXPiyU7R4zqg23-J1Xbk,65
7
+ diffai_python-0.2.9.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ diffai-download-binary = diffai.installer:main