diffai-python 0.3.4__tar.gz → 0.3.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. {diffai_python-0.3.4 → diffai_python-0.3.5}/Cargo.toml +1 -1
  2. {diffai_python-0.3.4 → diffai_python-0.3.5}/PKG-INFO +36 -47
  3. {diffai_python-0.3.4 → diffai_python-0.3.5}/README.md +35 -46
  4. {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-core/README.md +89 -64
  5. {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/Cargo.lock +2 -2
  6. {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/Cargo.toml +2 -2
  7. {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/README.md +35 -46
  8. {diffai_python-0.3.4 → diffai_python-0.3.5}/pyproject.toml +1 -1
  9. {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-core/Cargo.toml +0 -0
  10. {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-core/benches/diff_benchmark.rs +0 -0
  11. {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-core/benches/ml_performance.rs +0 -0
  12. {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-core/src/analysis_results_diff.rs +0 -0
  13. {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-core/src/lib.rs +0 -0
  14. {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/.gitignore +0 -0
  15. {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/diffai +0 -0
  16. {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/src/diffai/__init__.py +0 -0
  17. {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/src/diffai/__main__.py +0 -0
  18. {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/src/diffai/installer.py +0 -0
  19. {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/src/main.rs +0 -0
  20. {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/test_integration.py +0 -0
  21. {diffai_python-0.3.4 → diffai_python-0.3.5}/src/diffai/__init__.py +0 -0
  22. {diffai_python-0.3.4 → diffai_python-0.3.5}/src/diffai/__main__.py +0 -0
  23. {diffai_python-0.3.4 → diffai_python-0.3.5}/src/diffai/installer.py +0 -0
@@ -6,7 +6,7 @@ members = [
6
6
  ]
7
7
 
8
8
  [workspace.package]
9
- version = "0.3.4"
9
+ version = "0.3.5"
10
10
  edition = "2021"
11
11
  authors = ["kako-jun"]
12
12
  license = "MIT"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: diffai-python
3
- Version: 0.3.4
3
+ Version: 0.3.5
4
4
  Classifier: Development Status :: 4 - Beta
5
5
  Classifier: Environment :: Console
6
6
  Classifier: Intended Audience :: Developers
@@ -72,13 +72,13 @@ pip install diffai-python
72
72
  After installation, the `diffai` command is available:
73
73
 
74
74
  ```bash
75
- # Compare ML models
76
- diffai model_v1.safetensors model_v2.safetensors --stats
75
+ # Compare ML models (30+ analysis features automatic)
76
+ diffai model_v1.safetensors model_v2.safetensors
77
77
 
78
78
  # Compare NumPy arrays
79
- diffai data_v1.npy data_v2.npy --stats
79
+ diffai data_v1.npy data_v2.npy
80
80
 
81
- # JSON output for automation
81
+ # JSON output for automation (all ML features included)
82
82
  diffai model_v1.pt model_v2.pt --output json
83
83
  ```
84
84
 
@@ -93,8 +93,6 @@ print(result.raw_output)
93
93
 
94
94
  # With options
95
95
  options = diffai.DiffOptions(
96
- stats=True,
97
- architecture_comparison=True,
98
96
  output_format=diffai.OutputFormat.JSON
99
97
  )
100
98
  result = diffai.diff("model_v1.pt", "model_v2.pt", options)
@@ -108,18 +106,22 @@ if result.is_json:
108
106
  ### Advanced ML Analysis
109
107
 
110
108
  ```python
111
- # Comprehensive ML model analysis
109
+ # Comprehensive ML model analysis (automatic for ML models)
112
110
  result = diffai.diff(
113
111
  "baseline.safetensors",
114
112
  "improved.safetensors",
115
- stats=True,
116
- architecture_comparison=True,
117
- memory_analysis=True,
118
- anomaly_detection=True,
119
- convergence_analysis=True
113
+ stats=True # Enable statistical analysis
120
114
  )
121
115
 
122
116
  print(result.raw_output)
117
+
118
+ # ML-specific analysis features (automatic for ML models)
119
+ # - architecture_comparison: Model architecture and structural changes
120
+ # - memory_analysis: Memory usage and optimization opportunities
121
+ # - anomaly_detection: Numerical anomalies and training issues
122
+ # - convergence_analysis: Training convergence patterns
123
+ # - gradient_analysis: Gradient flow health assessment
124
+ # - quantization_analysis: Quantization effect analysis
123
125
  ```
124
126
 
125
127
  ## Supported Formats
@@ -134,21 +136,21 @@ print(result.raw_output)
134
136
  - **JSON**: Machine-readable format for automation
135
137
  - **YAML**: Human-readable structured format
136
138
 
137
- ## ML Analysis Features
139
+ ## ML Analysis Features (Automatic)
138
140
 
139
- The package provides 11 specialized ML analysis features:
141
+ The package provides 30+ specialized ML analysis features that run automatically for PyTorch and Safetensors files:
140
142
 
141
- - `--stats`: Detailed tensor statistics
142
- - `--architecture-comparison`: Model structure comparison
143
- - `--memory-analysis`: Memory usage analysis
144
- - `--anomaly-detection`: Numerical anomaly detection
145
- - `--convergence-analysis`: Training convergence analysis
146
- - `--gradient-analysis`: Gradient information analysis
147
- - `--similarity-matrix`: Layer similarity comparison
148
- - `--change-summary`: Detailed change summary
149
- - `--quantization-analysis`: Quantization impact analysis
150
- - `--sort-by-change-magnitude`: Sort by change magnitude
151
- - `--show-layer-impact`: Layer-specific impact analysis
143
+ - **Detailed tensor statistics**: Mean, std, min, max, shape, dtype
144
+ - **Model structure comparison**: Architecture and structural changes
145
+ - **Memory usage analysis**: Memory optimization opportunities
146
+ - **Numerical anomaly detection**: Training issues and anomalies
147
+ - **Training convergence analysis**: Convergence patterns
148
+ - **Gradient information analysis**: Gradient flow health
149
+ - **Layer similarity comparison**: Inter-layer analysis
150
+ - **Detailed change summary**: Comprehensive change patterns
151
+ - **Quantization impact analysis**: Quantization effects
152
+ - **Change magnitude sorting**: Priority-sorted differences
153
+ - **Plus 20+ additional specialized features**
152
154
 
153
155
  ## API Reference
154
156
 
@@ -173,12 +175,9 @@ class DiffOptions:
173
175
  recursive: bool = False
174
176
  verbose: bool = False
175
177
 
176
- # ML analysis options
177
- stats: bool = False
178
- architecture_comparison: bool = False
179
- memory_analysis: bool = False
180
- anomaly_detection: bool = False
181
- # ... and more
178
+ # For scientific data (NumPy/MATLAB)
179
+ stats: bool = False # Only used for NumPy/MATLAB files
180
+ # Note: ML analysis runs automatically for PyTorch/Safetensors
182
181
  ```
183
182
 
184
183
  ### Results
@@ -204,9 +203,7 @@ class DiffResult:
204
203
  before = "model_baseline.safetensors"
205
204
  after = "model_finetuned.safetensors"
206
205
 
207
- result = diffai.diff(before, after,
208
- stats=True,
209
- convergence_analysis=True)
206
+ result = diffai.diff(before, after)
210
207
  ```
211
208
 
212
209
  ### MLOps Integration
@@ -214,9 +211,7 @@ result = diffai.diff(before, after,
214
211
  # Automated model validation in CI/CD
215
212
  def validate_model_changes(old_model, new_model):
216
213
  result = diffai.diff(old_model, new_model,
217
- output_format=diffai.OutputFormat.JSON,
218
- anomaly_detection=True,
219
- memory_analysis=True)
214
+ output_format=diffai.OutputFormat.JSON)
220
215
 
221
216
  if result.is_json:
222
217
  # Check for critical issues
@@ -244,9 +239,7 @@ def log_model_comparison(run_id1, run_id2):
244
239
 
245
240
  # Compare with diffai
246
241
  result = diffai.diff(model1_path, model2_path,
247
- output_format=diffai.OutputFormat.JSON,
248
- stats=True,
249
- architecture_comparison=True)
242
+ output_format=diffai.OutputFormat.JSON)
250
243
 
251
244
  # Log results to MLflow
252
245
  with mlflow.start_run():
@@ -269,10 +262,7 @@ def log_model_comparison_wandb(model1_path, model2_path):
269
262
  """Log model comparison to Weights & Biases"""
270
263
 
271
264
  result = diffai.diff(model1_path, model2_path,
272
- output_format=diffai.OutputFormat.JSON,
273
- stats=True,
274
- memory_analysis=True,
275
- convergence_analysis=True)
265
+ output_format=diffai.OutputFormat.JSON)
276
266
 
277
267
  # Log to wandb
278
268
  wandb.log({"model_comparison": result.data})
@@ -289,8 +279,7 @@ def log_model_comparison_wandb(model1_path, model2_path):
289
279
  ### Jupyter Notebooks
290
280
  ```python
291
281
  # Interactive analysis in notebooks
292
- result = diffai.diff("checkpoint_100.pt", "checkpoint_200.pt",
293
- stats=True, memory_analysis=True)
282
+ result = diffai.diff("checkpoint_100.pt", "checkpoint_200.pt")
294
283
 
295
284
  # Display results
296
285
  if result.is_json:
@@ -34,13 +34,13 @@ pip install diffai-python
34
34
  After installation, the `diffai` command is available:
35
35
 
36
36
  ```bash
37
- # Compare ML models
38
- diffai model_v1.safetensors model_v2.safetensors --stats
37
+ # Compare ML models (30+ analysis features automatic)
38
+ diffai model_v1.safetensors model_v2.safetensors
39
39
 
40
40
  # Compare NumPy arrays
41
- diffai data_v1.npy data_v2.npy --stats
41
+ diffai data_v1.npy data_v2.npy
42
42
 
43
- # JSON output for automation
43
+ # JSON output for automation (all ML features included)
44
44
  diffai model_v1.pt model_v2.pt --output json
45
45
  ```
46
46
 
@@ -55,8 +55,6 @@ print(result.raw_output)
55
55
 
56
56
  # With options
57
57
  options = diffai.DiffOptions(
58
- stats=True,
59
- architecture_comparison=True,
60
58
  output_format=diffai.OutputFormat.JSON
61
59
  )
62
60
  result = diffai.diff("model_v1.pt", "model_v2.pt", options)
@@ -70,18 +68,22 @@ if result.is_json:
70
68
  ### Advanced ML Analysis
71
69
 
72
70
  ```python
73
- # Comprehensive ML model analysis
71
+ # Comprehensive ML model analysis (automatic for ML models)
74
72
  result = diffai.diff(
75
73
  "baseline.safetensors",
76
74
  "improved.safetensors",
77
- stats=True,
78
- architecture_comparison=True,
79
- memory_analysis=True,
80
- anomaly_detection=True,
81
- convergence_analysis=True
75
+ stats=True # Enable statistical analysis
82
76
  )
83
77
 
84
78
  print(result.raw_output)
79
+
80
+ # ML-specific analysis features (automatic for ML models)
81
+ # - architecture_comparison: Model architecture and structural changes
82
+ # - memory_analysis: Memory usage and optimization opportunities
83
+ # - anomaly_detection: Numerical anomalies and training issues
84
+ # - convergence_analysis: Training convergence patterns
85
+ # - gradient_analysis: Gradient flow health assessment
86
+ # - quantization_analysis: Quantization effect analysis
85
87
  ```
86
88
 
87
89
  ## Supported Formats
@@ -96,21 +98,21 @@ print(result.raw_output)
96
98
  - **JSON**: Machine-readable format for automation
97
99
  - **YAML**: Human-readable structured format
98
100
 
99
- ## ML Analysis Features
101
+ ## ML Analysis Features (Automatic)
100
102
 
101
- The package provides 11 specialized ML analysis features:
103
+ The package provides 30+ specialized ML analysis features that run automatically for PyTorch and Safetensors files:
102
104
 
103
- - `--stats`: Detailed tensor statistics
104
- - `--architecture-comparison`: Model structure comparison
105
- - `--memory-analysis`: Memory usage analysis
106
- - `--anomaly-detection`: Numerical anomaly detection
107
- - `--convergence-analysis`: Training convergence analysis
108
- - `--gradient-analysis`: Gradient information analysis
109
- - `--similarity-matrix`: Layer similarity comparison
110
- - `--change-summary`: Detailed change summary
111
- - `--quantization-analysis`: Quantization impact analysis
112
- - `--sort-by-change-magnitude`: Sort by change magnitude
113
- - `--show-layer-impact`: Layer-specific impact analysis
105
+ - **Detailed tensor statistics**: Mean, std, min, max, shape, dtype
106
+ - **Model structure comparison**: Architecture and structural changes
107
+ - **Memory usage analysis**: Memory optimization opportunities
108
+ - **Numerical anomaly detection**: Training issues and anomalies
109
+ - **Training convergence analysis**: Convergence patterns
110
+ - **Gradient information analysis**: Gradient flow health
111
+ - **Layer similarity comparison**: Inter-layer analysis
112
+ - **Detailed change summary**: Comprehensive change patterns
113
+ - **Quantization impact analysis**: Quantization effects
114
+ - **Change magnitude sorting**: Priority-sorted differences
115
+ - **Plus 20+ additional specialized features**
114
116
 
115
117
  ## API Reference
116
118
 
@@ -135,12 +137,9 @@ class DiffOptions:
135
137
  recursive: bool = False
136
138
  verbose: bool = False
137
139
 
138
- # ML analysis options
139
- stats: bool = False
140
- architecture_comparison: bool = False
141
- memory_analysis: bool = False
142
- anomaly_detection: bool = False
143
- # ... and more
140
+ # For scientific data (NumPy/MATLAB)
141
+ stats: bool = False # Only used for NumPy/MATLAB files
142
+ # Note: ML analysis runs automatically for PyTorch/Safetensors
144
143
  ```
145
144
 
146
145
  ### Results
@@ -166,9 +165,7 @@ class DiffResult:
166
165
  before = "model_baseline.safetensors"
167
166
  after = "model_finetuned.safetensors"
168
167
 
169
- result = diffai.diff(before, after,
170
- stats=True,
171
- convergence_analysis=True)
168
+ result = diffai.diff(before, after)
172
169
  ```
173
170
 
174
171
  ### MLOps Integration
@@ -176,9 +173,7 @@ result = diffai.diff(before, after,
176
173
  # Automated model validation in CI/CD
177
174
  def validate_model_changes(old_model, new_model):
178
175
  result = diffai.diff(old_model, new_model,
179
- output_format=diffai.OutputFormat.JSON,
180
- anomaly_detection=True,
181
- memory_analysis=True)
176
+ output_format=diffai.OutputFormat.JSON)
182
177
 
183
178
  if result.is_json:
184
179
  # Check for critical issues
@@ -206,9 +201,7 @@ def log_model_comparison(run_id1, run_id2):
206
201
 
207
202
  # Compare with diffai
208
203
  result = diffai.diff(model1_path, model2_path,
209
- output_format=diffai.OutputFormat.JSON,
210
- stats=True,
211
- architecture_comparison=True)
204
+ output_format=diffai.OutputFormat.JSON)
212
205
 
213
206
  # Log results to MLflow
214
207
  with mlflow.start_run():
@@ -231,10 +224,7 @@ def log_model_comparison_wandb(model1_path, model2_path):
231
224
  """Log model comparison to Weights & Biases"""
232
225
 
233
226
  result = diffai.diff(model1_path, model2_path,
234
- output_format=diffai.OutputFormat.JSON,
235
- stats=True,
236
- memory_analysis=True,
237
- convergence_analysis=True)
227
+ output_format=diffai.OutputFormat.JSON)
238
228
 
239
229
  # Log to wandb
240
230
  wandb.log({"model_comparison": result.data})
@@ -251,8 +241,7 @@ def log_model_comparison_wandb(model1_path, model2_path):
251
241
  ### Jupyter Notebooks
252
242
  ```python
253
243
  # Interactive analysis in notebooks
254
- result = diffai.diff("checkpoint_100.pt", "checkpoint_200.pt",
255
- stats=True, memory_analysis=True)
244
+ result = diffai.diff("checkpoint_100.pt", "checkpoint_200.pt")
256
245
 
257
246
  # Display results
258
247
  if result.is_json:
@@ -14,19 +14,25 @@ A next-generation diff tool specialized for **AI/ML and scientific computing wor
14
14
  $ diff model_v1.safetensors model_v2.safetensors
15
15
  Binary files model_v1.safetensors and model_v2.safetensors differ
16
16
 
17
- # diffai shows meaningful model changes
18
- $ diffai model_v1.safetensors model_v2.safetensors --stats
17
+ # diffai shows meaningful model changes with full analysis
18
+ $ diffai model_v1.safetensors model_v2.safetensors
19
19
  ~ fc1.bias: mean=0.0018->0.0017, std=0.0518->0.0647
20
20
  ~ fc1.weight: mean=-0.0002->-0.0001, std=0.0514->0.0716
21
21
  ~ fc2.weight: mean=-0.0008->-0.0018, std=0.0719->0.0883
22
22
  gradient_analysis: flow_health=healthy, norm=0.015000, ratio=1.0500
23
+ deployment_readiness: readiness=0.92, strategy=blue_green, risk=low
24
+ quantization_analysis: compression=0.0%, speedup=1.8x, precision_loss=1.5%
25
+
26
+ [WARNING]
27
+ • Memory usage increased moderately (+250MB). Monitor resource consumption.
28
+ • Inference speed moderately affected (1.3x slower). Consider optimization opportunities.
23
29
  ```
24
30
 
25
31
  ## Key Features
26
32
 
27
33
  - **AI/ML Native**: Direct support for PyTorch (.pt/.pth), Safetensors (.safetensors), NumPy (.npy/.npz), and MATLAB (.mat) files
28
34
  - **Tensor Analysis**: Automatic calculation of tensor statistics (mean, std, min, max, shape, memory usage)
29
- - **ML Analysis Functions**: Statistical analysis, quantization analysis, architecture comparison, and more
35
+ - **Comprehensive ML Analysis**: 30+ analysis functions including quantization, architecture, memory, convergence, anomaly detection, and deployment readiness - all enabled by default
30
36
  - **Scientific Data Support**: NumPy arrays and MATLAB matrices with complex number support
31
37
  - **Pure Rust Implementation**: No system dependencies, works on Windows/Linux/macOS without additional installations
32
38
  - **Multiple Output Formats**: Colored CLI, JSON for MLOps integration, YAML for human-readable reports
@@ -76,38 +82,43 @@ cargo build --release
76
82
  ### Basic Model Comparison
77
83
 
78
84
  ```bash
79
- # Compare PyTorch models
80
- diffai model_old.pt model_new.pt --stats
85
+ # Compare PyTorch models with full analysis (default)
86
+ diffai model_old.pt model_new.pt
81
87
 
82
- # Compare Safetensors with statistical analysis
83
- diffai checkpoint_v1.safetensors checkpoint_v2.safetensors --stats
88
+ # Compare Safetensors with complete ML analysis
89
+ diffai checkpoint_v1.safetensors checkpoint_v2.safetensors
84
90
 
85
91
  # Compare NumPy arrays
86
- diffai data_v1.npy data_v2.npy --stats
92
+ diffai data_v1.npy data_v2.npy
87
93
 
88
94
  # Compare MATLAB files
89
- diffai experiment_v1.mat experiment_v2.mat --stats
95
+ diffai experiment_v1.mat experiment_v2.mat
90
96
  ```
91
97
 
92
- ### Advanced ML Analysis
98
+ ### ML Analysis Features
93
99
 
94
100
  ```bash
95
- # Current available analysis
96
- diffai baseline.safetensors finetuned.safetensors --stats --quantization-analysis
97
-
98
- # Combined analysis with sorting
99
- diffai original.pt optimized.pt --stats --quantization-analysis --sort-by-change-magnitude
101
+ # Full ML analysis runs automatically for PyTorch/Safetensors
102
+ diffai baseline.safetensors finetuned.safetensors
103
+ # Outputs: 30+ analysis types including quantization, architecture, memory, etc.
100
104
 
101
105
  # JSON output for automation
102
- diffai model_v1.safetensors model_v2.safetensors --stats --output json
106
+ diffai model_v1.safetensors model_v2.safetensors --output json
103
107
 
104
108
  # Detailed diagnostic information with verbose mode
105
- diffai model_v1.safetensors model_v2.safetensors --verbose --stats --architecture-comparison
109
+ diffai model_v1.safetensors model_v2.safetensors --verbose
106
110
 
107
- # Future Phase 3 features (coming soon)
108
- diffai model_v1.safetensors model_v2.safetensors --architecture-comparison --memory-analysis
111
+ # YAML output for human-readable reports
112
+ diffai model_v1.safetensors model_v2.safetensors --output yaml
109
113
  ```
110
114
 
115
+ ## 📚 Documentation
116
+
117
+ - **[Working Examples & Demonstrations](docs/examples/)** - See diffai in action with real outputs
118
+ - **[API Documentation](https://docs.rs/diffai-core)** - Rust library documentation
119
+ - **[User Guide](docs/user-guide.md)** - Comprehensive usage guide
120
+ - **[ML Analysis Guide](docs/ml-analysis-guide.md)** - Deep dive into ML-specific features
121
+
111
122
  ## Supported File Formats
112
123
 
113
124
  ### ML Model Formats
@@ -128,26 +139,28 @@ diffai model_v1.safetensors model_v2.safetensors --architecture-comparison --mem
128
139
 
129
140
  ## ML Analysis Functions
130
141
 
131
- ### Currently Available (v0.2.7)
132
- - `--stats` - Detailed tensor statistics (mean, std, min, max, shape, memory)
133
- - `--quantization-analysis` - Analyze quantization effects and efficiency
134
- - `--sort-by-change-magnitude` - Sort differences by magnitude for prioritization
135
- - `--show-layer-impact` - Layer-by-layer impact analysis
136
- - `--architecture-comparison` - Compare model architectures and structural changes
137
- - `--memory-analysis` - Analyze memory usage and optimization opportunities
138
- - `--anomaly-detection` - Detect numerical anomalies in model parameters
139
- - `--change-summary` - Generate detailed change summaries
140
- - `--convergence-analysis` - Analyze convergence patterns in model parameters
141
- - `--gradient-analysis` - Analyze gradient information when available
142
- - `--similarity-matrix` - Generate similarity matrix for model comparison
143
-
144
- ### Coming in Phase 4 (ML Framework Expansion)
142
+ ### Automatic Comprehensive Analysis (v0.3.4)
143
+ When comparing PyTorch or Safetensors files, diffai automatically runs 30+ ML analysis features:
144
+
145
+ **Automatic Features Include:**
146
+ - **Statistical Analysis**: Detailed tensor statistics (mean, std, min, max, shape, memory)
147
+ - **Quantization Analysis**: Analyze quantization effects and efficiency
148
+ - **Architecture Comparison**: Compare model architectures and structural changes
149
+ - **Memory Analysis**: Analyze memory usage and optimization opportunities
150
+ - **Anomaly Detection**: Detect numerical anomalies in model parameters
151
+ - **Convergence Analysis**: Analyze convergence patterns in model parameters
152
+ - **Gradient Analysis**: Analyze gradient information when available
153
+ - **Deployment Readiness**: Assess production deployment readiness
154
+ - **Regression Testing**: Automatic performance degradation detection
155
+ - **Plus 20+ additional specialized features**
156
+
157
+ ### Future Enhancements
145
158
  - TensorFlow format support (.pb, .h5, SavedModel)
146
159
  - ONNX format support
147
160
  - Advanced visualization and charting features
148
161
 
149
162
  ### Design Philosophy
150
- diffai follows UNIX philosophy: simple, composable tools that do one thing well. Features are orthogonal and can be combined for powerful analysis workflows.
163
+ diffai provides comprehensive analysis by default for ML models, eliminating choice paralysis. Users get all relevant insights without needing to remember or specify dozens of analysis flags.
151
164
 
152
165
  ## Debugging and Diagnostics
153
166
 
@@ -158,12 +171,12 @@ Get comprehensive diagnostic information for debugging and performance analysis:
158
171
  # Basic verbose output
159
172
  diffai model1.safetensors model2.safetensors --verbose
160
173
 
161
- # Verbose with ML analysis features
162
- diffai data1.json data2.json --verbose --stats --epsilon 0.001 --ignore-keys-regex "^id$"
174
+ # Verbose with structured data filtering
175
+ diffai data1.json data2.json --verbose --epsilon 0.001 --ignore-keys-regex "^id$"
163
176
  ```
164
177
 
165
178
  **Verbose output includes:**
166
- - **Configuration diagnostics**: Active ML features, format settings, filters
179
+ - **Configuration diagnostics**: Format settings, filters, analysis modes
167
180
  - **File analysis**: Paths, sizes, detected formats, processing context
168
181
  - **Performance metrics**: Processing time, difference counts, optimization status
169
182
  - **Directory statistics**: File counts, comparison summaries (with `--recursive`)
@@ -172,9 +185,9 @@ diffai data1.json data2.json --verbose --stats --epsilon 0.001 --ignore-keys-reg
172
185
  ```
173
186
  === diffai verbose mode enabled ===
174
187
  Configuration:
175
- Input format: None
188
+ Input format: Safetensors
176
189
  Output format: Cli
177
- ML analysis features: statistics, architecture_comparison
190
+ ML analysis: Full analysis enabled (all 30 features)
178
191
  Epsilon tolerance: 0.001
179
192
 
180
193
  File analysis:
@@ -216,36 +229,36 @@ diffai model1.safetensors model2.safetensors --output yaml
216
229
 
217
230
  ### Research & Development
218
231
  ```bash
219
- # Compare model before and after fine-tuning
220
- diffai pretrained_model.safetensors finetuned_model.safetensors \
221
- --learning-progress --convergence-analysis --stats
232
+ # Compare model before and after fine-tuning (full analysis automatic)
233
+ diffai pretrained_model.safetensors finetuned_model.safetensors
234
+ # Outputs: learning_progress, convergence_analysis, parameter stats, and 27 more analyses
222
235
 
223
236
  # Analyze architectural changes during development
224
- diffai baseline_architecture.pt improved_architecture.pt \
225
- --architecture-comparison --param-efficiency-analysis
237
+ diffai baseline_architecture.pt improved_architecture.pt
238
+ # Outputs: architecture_comparison, param_efficiency_analysis, and full ML analysis
226
239
  ```
227
240
 
228
241
  ### MLOps & CI/CD
229
242
  ```bash
230
- # Automated model validation in CI/CD
231
- diffai production_model.safetensors candidate_model.safetensors \
232
- --deployment-readiness --regression-test --risk-assessment
243
+ # Automated model validation in CI/CD (comprehensive analysis)
244
+ diffai production_model.safetensors candidate_model.safetensors
245
+ # Outputs: deployment_readiness, regression_test, risk_assessment, and 27 more analyses
233
246
 
234
- # Performance impact assessment
235
- diffai original_model.pt optimized_model.pt \
236
- --quantization-analysis --memory-analysis --performance-impact-estimate
247
+ # Performance impact assessment with JSON output for automation
248
+ diffai original_model.pt optimized_model.pt --output json
249
+ # Outputs: quantization_analysis, memory_analysis, performance_impact_estimate, etc.
237
250
  ```
238
251
 
239
252
  ### Scientific Computing
240
253
  ```bash
241
254
  # Compare NumPy experiment results
242
- diffai baseline_results.npy new_results.npy --stats
255
+ diffai baseline_results.npy new_results.npy
243
256
 
244
257
  # Analyze MATLAB simulation data
245
- diffai simulation_v1.mat simulation_v2.mat --stats
258
+ diffai simulation_v1.mat simulation_v2.mat
246
259
 
247
260
  # Compare compressed NumPy archives
248
- diffai dataset_v1.npz dataset_v2.npz --stats
261
+ diffai dataset_v1.npz dataset_v2.npz
249
262
  ```
250
263
 
251
264
  ### Experiment Tracking
@@ -265,7 +278,8 @@ diffai model_a.safetensors model_b.safetensors \
265
278
  - `-f, --format <FORMAT>` - Specify input file format
266
279
  - `-o, --output <OUTPUT>` - Choose output format (cli, json, yaml)
267
280
  - `-r, --recursive` - Compare directories recursively
268
- - `--stats` - Show detailed statistics for ML models
281
+
282
+ **Note:** For ML models (PyTorch/Safetensors), comprehensive analysis including statistics runs automatically
269
283
 
270
284
  ### Advanced Options
271
285
  - `--path <PATH>` - Filter differences by specific path
@@ -276,9 +290,16 @@ diffai model_a.safetensors model_b.safetensors \
276
290
 
277
291
  ## Examples
278
292
 
279
- ### Basic Tensor Comparison
293
+ ### Basic Tensor Comparison (Automatic)
280
294
  ```bash
281
- $ diffai simple_model_v1.safetensors simple_model_v2.safetensors --stats
295
+ $ diffai simple_model_v1.safetensors simple_model_v2.safetensors
296
+ anomaly_detection: type=none, severity=none, action="continue_training"
297
+ architecture_comparison: type1=feedforward, type2=feedforward, deployment_readiness=ready
298
+ convergence_analysis: status=converging, stability=0.92
299
+ gradient_analysis: flow_health=healthy, norm=0.021069
300
+ memory_analysis: delta=+0.0MB, efficiency=1.000000
301
+ quantization_analysis: compression=0.0%, speedup=1.8x, precision_loss=1.5%
302
+ regression_test: passed=true, degradation=-2.5%, severity=low
282
303
  ~ fc1.bias: mean=0.0018->0.0017, std=0.0518->0.0647
283
304
  ~ fc1.weight: mean=-0.0002->-0.0001, std=0.0514->0.0716
284
305
  ~ fc2.bias: mean=-0.0076->-0.0257, std=0.0661->0.0973
@@ -287,24 +308,28 @@ $ diffai simple_model_v1.safetensors simple_model_v2.safetensors --stats
287
308
  ~ fc3.weight: mean=-0.0035->-0.0010, std=0.0990->0.1113
288
309
  ```
289
310
 
290
- ### Advanced Analysis
311
+ ### JSON Output for Automation
291
312
  ```bash
292
- $ diffai baseline.safetensors improved.safetensors --deployment-readiness --architecture-comparison
293
- deployment_readiness: readiness=0.92, strategy=blue_green, risk=low, timeline=ready_for_immediate_deployment
294
- architecture_comparison: type1=feedforward, type2=feedforward, depth=3->3, differences=0
295
- ~ fc1.bias: mean=0.0018->0.0017, std=0.0518->0.0647
296
- ~ fc1.weight: mean=-0.0002->-0.0001, std=0.0514->0.0716
313
+ $ diffai baseline.safetensors improved.safetensors --output json
314
+ {
315
+ "anomaly_detection": {"type": "none", "severity": "none"},
316
+ "architecture_comparison": {"type1": "feedforward", "type2": "feedforward"},
317
+ "deployment_readiness": {"readiness": 0.92, "strategy": "blue_green"},
318
+ "quantization_analysis": {"compression": "0.0%", "speedup": "1.8x"},
319
+ "regression_test": {"passed": true, "degradation": "-2.5%"}
320
+ // ... plus 25+ additional analysis features
321
+ }
297
322
  ```
298
323
 
299
324
  ### Scientific Data Analysis
300
325
  ```bash
301
- $ diffai experiment_data_v1.npy experiment_data_v2.npy --stats
326
+ $ diffai experiment_data_v1.npy experiment_data_v2.npy
302
327
  ~ data: shape=[1000, 256], mean=0.1234->0.1456, std=0.9876->0.9654, dtype=float64
303
328
  ```
304
329
 
305
330
  ### MATLAB File Comparison
306
331
  ```bash
307
- $ diffai simulation_v1.mat simulation_v2.mat --stats
332
+ $ diffai simulation_v1.mat simulation_v2.mat
308
333
  ~ results: var=results, shape=[500, 100], mean=2.3456->2.4567, std=1.2345->1.3456, dtype=double
309
334
  + new_variable: var=new_variable, shape=[100], dtype=single, elements=100, size=0.39KB
310
335
  ```
@@ -432,7 +432,7 @@ dependencies = [
432
432
 
433
433
  [[package]]
434
434
  name = "diffai-core"
435
- version = "0.3.4"
435
+ version = "0.3.5"
436
436
  dependencies = [
437
437
  "anyhow",
438
438
  "bytemuck",
@@ -455,7 +455,7 @@ dependencies = [
455
455
 
456
456
  [[package]]
457
457
  name = "diffai-python"
458
- version = "0.3.4"
458
+ version = "0.3.5"
459
459
  dependencies = [
460
460
  "anyhow",
461
461
  "clap",
@@ -3,7 +3,7 @@
3
3
 
4
4
  [package]
5
5
  name = "diffai-python"
6
- version = "0.3.4"
6
+ version = "0.3.5"
7
7
  edition = "2021"
8
8
  authors = ["kako-jun"]
9
9
  license = "MIT"
@@ -17,7 +17,7 @@ path = "src/main.rs"
17
17
 
18
18
  [dependencies]
19
19
  # Reference to the actual diffai dependencies
20
- diffai-core = { version = "0.3.4", path = "../diffai-core" }
20
+ diffai-core = { version = "0.3.5", path = "../diffai-core" }
21
21
  clap = { version = "4.0", features = ["derive"] }
22
22
  colored = "2.0"
23
23
  serde = { version = "1.0", features = ["derive"] }
@@ -34,13 +34,13 @@ pip install diffai-python
34
34
  After installation, the `diffai` command is available:
35
35
 
36
36
  ```bash
37
- # Compare ML models
38
- diffai model_v1.safetensors model_v2.safetensors --stats
37
+ # Compare ML models (30+ analysis features automatic)
38
+ diffai model_v1.safetensors model_v2.safetensors
39
39
 
40
40
  # Compare NumPy arrays
41
- diffai data_v1.npy data_v2.npy --stats
41
+ diffai data_v1.npy data_v2.npy
42
42
 
43
- # JSON output for automation
43
+ # JSON output for automation (all ML features included)
44
44
  diffai model_v1.pt model_v2.pt --output json
45
45
  ```
46
46
 
@@ -55,8 +55,6 @@ print(result.raw_output)
55
55
 
56
56
  # With options
57
57
  options = diffai.DiffOptions(
58
- stats=True,
59
- architecture_comparison=True,
60
58
  output_format=diffai.OutputFormat.JSON
61
59
  )
62
60
  result = diffai.diff("model_v1.pt", "model_v2.pt", options)
@@ -70,18 +68,22 @@ if result.is_json:
70
68
  ### Advanced ML Analysis
71
69
 
72
70
  ```python
73
- # Comprehensive ML model analysis
71
+ # Comprehensive ML model analysis (automatic for ML models)
74
72
  result = diffai.diff(
75
73
  "baseline.safetensors",
76
74
  "improved.safetensors",
77
- stats=True,
78
- architecture_comparison=True,
79
- memory_analysis=True,
80
- anomaly_detection=True,
81
- convergence_analysis=True
75
+ stats=True # Enable statistical analysis
82
76
  )
83
77
 
84
78
  print(result.raw_output)
79
+
80
+ # ML-specific analysis features (automatic for ML models)
81
+ # - architecture_comparison: Model architecture and structural changes
82
+ # - memory_analysis: Memory usage and optimization opportunities
83
+ # - anomaly_detection: Numerical anomalies and training issues
84
+ # - convergence_analysis: Training convergence patterns
85
+ # - gradient_analysis: Gradient flow health assessment
86
+ # - quantization_analysis: Quantization effect analysis
85
87
  ```
86
88
 
87
89
  ## Supported Formats
@@ -96,21 +98,21 @@ print(result.raw_output)
96
98
  - **JSON**: Machine-readable format for automation
97
99
  - **YAML**: Human-readable structured format
98
100
 
99
- ## ML Analysis Features
101
+ ## ML Analysis Features (Automatic)
100
102
 
101
- The package provides 11 specialized ML analysis features:
103
+ The package provides 30+ specialized ML analysis features that run automatically for PyTorch and Safetensors files:
102
104
 
103
- - `--stats`: Detailed tensor statistics
104
- - `--architecture-comparison`: Model structure comparison
105
- - `--memory-analysis`: Memory usage analysis
106
- - `--anomaly-detection`: Numerical anomaly detection
107
- - `--convergence-analysis`: Training convergence analysis
108
- - `--gradient-analysis`: Gradient information analysis
109
- - `--similarity-matrix`: Layer similarity comparison
110
- - `--change-summary`: Detailed change summary
111
- - `--quantization-analysis`: Quantization impact analysis
112
- - `--sort-by-change-magnitude`: Sort by change magnitude
113
- - `--show-layer-impact`: Layer-specific impact analysis
105
+ - **Detailed tensor statistics**: Mean, std, min, max, shape, dtype
106
+ - **Model structure comparison**: Architecture and structural changes
107
+ - **Memory usage analysis**: Memory optimization opportunities
108
+ - **Numerical anomaly detection**: Training issues and anomalies
109
+ - **Training convergence analysis**: Convergence patterns
110
+ - **Gradient information analysis**: Gradient flow health
111
+ - **Layer similarity comparison**: Inter-layer analysis
112
+ - **Detailed change summary**: Comprehensive change patterns
113
+ - **Quantization impact analysis**: Quantization effects
114
+ - **Change magnitude sorting**: Priority-sorted differences
115
+ - **Plus 20+ additional specialized features**
114
116
 
115
117
  ## API Reference
116
118
 
@@ -135,12 +137,9 @@ class DiffOptions:
135
137
  recursive: bool = False
136
138
  verbose: bool = False
137
139
 
138
- # ML analysis options
139
- stats: bool = False
140
- architecture_comparison: bool = False
141
- memory_analysis: bool = False
142
- anomaly_detection: bool = False
143
- # ... and more
140
+ # For scientific data (NumPy/MATLAB)
141
+ stats: bool = False # Only used for NumPy/MATLAB files
142
+ # Note: ML analysis runs automatically for PyTorch/Safetensors
144
143
  ```
145
144
 
146
145
  ### Results
@@ -166,9 +165,7 @@ class DiffResult:
166
165
  before = "model_baseline.safetensors"
167
166
  after = "model_finetuned.safetensors"
168
167
 
169
- result = diffai.diff(before, after,
170
- stats=True,
171
- convergence_analysis=True)
168
+ result = diffai.diff(before, after)
172
169
  ```
173
170
 
174
171
  ### MLOps Integration
@@ -176,9 +173,7 @@ result = diffai.diff(before, after,
176
173
  # Automated model validation in CI/CD
177
174
  def validate_model_changes(old_model, new_model):
178
175
  result = diffai.diff(old_model, new_model,
179
- output_format=diffai.OutputFormat.JSON,
180
- anomaly_detection=True,
181
- memory_analysis=True)
176
+ output_format=diffai.OutputFormat.JSON)
182
177
 
183
178
  if result.is_json:
184
179
  # Check for critical issues
@@ -206,9 +201,7 @@ def log_model_comparison(run_id1, run_id2):
206
201
 
207
202
  # Compare with diffai
208
203
  result = diffai.diff(model1_path, model2_path,
209
- output_format=diffai.OutputFormat.JSON,
210
- stats=True,
211
- architecture_comparison=True)
204
+ output_format=diffai.OutputFormat.JSON)
212
205
 
213
206
  # Log results to MLflow
214
207
  with mlflow.start_run():
@@ -231,10 +224,7 @@ def log_model_comparison_wandb(model1_path, model2_path):
231
224
  """Log model comparison to Weights & Biases"""
232
225
 
233
226
  result = diffai.diff(model1_path, model2_path,
234
- output_format=diffai.OutputFormat.JSON,
235
- stats=True,
236
- memory_analysis=True,
237
- convergence_analysis=True)
227
+ output_format=diffai.OutputFormat.JSON)
238
228
 
239
229
  # Log to wandb
240
230
  wandb.log({"model_comparison": result.data})
@@ -251,8 +241,7 @@ def log_model_comparison_wandb(model1_path, model2_path):
251
241
  ### Jupyter Notebooks
252
242
  ```python
253
243
  # Interactive analysis in notebooks
254
- result = diffai.diff("checkpoint_100.pt", "checkpoint_200.pt",
255
- stats=True, memory_analysis=True)
244
+ result = diffai.diff("checkpoint_100.pt", "checkpoint_200.pt")
256
245
 
257
246
  # Display results
258
247
  if result.is_json:
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "diffai-python"
7
- version = "0.3.4"
7
+ version = "0.3.5"
8
8
  description = "AI/ML specialized diff tool for deep tensor comparison and analysis"
9
9
  readme = "README.md"
10
10
  license = "MIT"