diffai-python 0.3.4__tar.gz → 0.3.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {diffai_python-0.3.4 → diffai_python-0.3.5}/Cargo.toml +1 -1
- {diffai_python-0.3.4 → diffai_python-0.3.5}/PKG-INFO +36 -47
- {diffai_python-0.3.4 → diffai_python-0.3.5}/README.md +35 -46
- {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-core/README.md +89 -64
- {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/Cargo.lock +2 -2
- {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/Cargo.toml +2 -2
- {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/README.md +35 -46
- {diffai_python-0.3.4 → diffai_python-0.3.5}/pyproject.toml +1 -1
- {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-core/Cargo.toml +0 -0
- {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-core/benches/diff_benchmark.rs +0 -0
- {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-core/benches/ml_performance.rs +0 -0
- {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-core/src/analysis_results_diff.rs +0 -0
- {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-core/src/lib.rs +0 -0
- {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/.gitignore +0 -0
- {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/diffai +0 -0
- {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/src/diffai/__init__.py +0 -0
- {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/src/diffai/__main__.py +0 -0
- {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/src/diffai/installer.py +0 -0
- {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/src/main.rs +0 -0
- {diffai_python-0.3.4 → diffai_python-0.3.5}/diffai-python/test_integration.py +0 -0
- {diffai_python-0.3.4 → diffai_python-0.3.5}/src/diffai/__init__.py +0 -0
- {diffai_python-0.3.4 → diffai_python-0.3.5}/src/diffai/__main__.py +0 -0
- {diffai_python-0.3.4 → diffai_python-0.3.5}/src/diffai/installer.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: diffai-python
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.5
|
|
4
4
|
Classifier: Development Status :: 4 - Beta
|
|
5
5
|
Classifier: Environment :: Console
|
|
6
6
|
Classifier: Intended Audience :: Developers
|
|
@@ -72,13 +72,13 @@ pip install diffai-python
|
|
|
72
72
|
After installation, the `diffai` command is available:
|
|
73
73
|
|
|
74
74
|
```bash
|
|
75
|
-
# Compare ML models
|
|
76
|
-
diffai model_v1.safetensors model_v2.safetensors
|
|
75
|
+
# Compare ML models (30+ analysis features automatic)
|
|
76
|
+
diffai model_v1.safetensors model_v2.safetensors
|
|
77
77
|
|
|
78
78
|
# Compare NumPy arrays
|
|
79
|
-
diffai data_v1.npy data_v2.npy
|
|
79
|
+
diffai data_v1.npy data_v2.npy
|
|
80
80
|
|
|
81
|
-
# JSON output for automation
|
|
81
|
+
# JSON output for automation (all ML features included)
|
|
82
82
|
diffai model_v1.pt model_v2.pt --output json
|
|
83
83
|
```
|
|
84
84
|
|
|
@@ -93,8 +93,6 @@ print(result.raw_output)
|
|
|
93
93
|
|
|
94
94
|
# With options
|
|
95
95
|
options = diffai.DiffOptions(
|
|
96
|
-
stats=True,
|
|
97
|
-
architecture_comparison=True,
|
|
98
96
|
output_format=diffai.OutputFormat.JSON
|
|
99
97
|
)
|
|
100
98
|
result = diffai.diff("model_v1.pt", "model_v2.pt", options)
|
|
@@ -108,18 +106,22 @@ if result.is_json:
|
|
|
108
106
|
### Advanced ML Analysis
|
|
109
107
|
|
|
110
108
|
```python
|
|
111
|
-
# Comprehensive ML model analysis
|
|
109
|
+
# Comprehensive ML model analysis (automatic for ML models)
|
|
112
110
|
result = diffai.diff(
|
|
113
111
|
"baseline.safetensors",
|
|
114
112
|
"improved.safetensors",
|
|
115
|
-
stats=True
|
|
116
|
-
architecture_comparison=True,
|
|
117
|
-
memory_analysis=True,
|
|
118
|
-
anomaly_detection=True,
|
|
119
|
-
convergence_analysis=True
|
|
113
|
+
stats=True # Enable statistical analysis
|
|
120
114
|
)
|
|
121
115
|
|
|
122
116
|
print(result.raw_output)
|
|
117
|
+
|
|
118
|
+
# ML-specific analysis features (automatic for ML models)
|
|
119
|
+
# - architecture_comparison: Model architecture and structural changes
|
|
120
|
+
# - memory_analysis: Memory usage and optimization opportunities
|
|
121
|
+
# - anomaly_detection: Numerical anomalies and training issues
|
|
122
|
+
# - convergence_analysis: Training convergence patterns
|
|
123
|
+
# - gradient_analysis: Gradient flow health assessment
|
|
124
|
+
# - quantization_analysis: Quantization effect analysis
|
|
123
125
|
```
|
|
124
126
|
|
|
125
127
|
## Supported Formats
|
|
@@ -134,21 +136,21 @@ print(result.raw_output)
|
|
|
134
136
|
- **JSON**: Machine-readable format for automation
|
|
135
137
|
- **YAML**: Human-readable structured format
|
|
136
138
|
|
|
137
|
-
## ML Analysis Features
|
|
139
|
+
## ML Analysis Features (Automatic)
|
|
138
140
|
|
|
139
|
-
The package provides
|
|
141
|
+
The package provides 30+ specialized ML analysis features that run automatically for PyTorch and Safetensors files:
|
|
140
142
|
|
|
141
|
-
-
|
|
142
|
-
-
|
|
143
|
-
-
|
|
144
|
-
-
|
|
145
|
-
-
|
|
146
|
-
-
|
|
147
|
-
-
|
|
148
|
-
-
|
|
149
|
-
-
|
|
150
|
-
-
|
|
151
|
-
-
|
|
143
|
+
- **Detailed tensor statistics**: Mean, std, min, max, shape, dtype
|
|
144
|
+
- **Model structure comparison**: Architecture and structural changes
|
|
145
|
+
- **Memory usage analysis**: Memory optimization opportunities
|
|
146
|
+
- **Numerical anomaly detection**: Training issues and anomalies
|
|
147
|
+
- **Training convergence analysis**: Convergence patterns
|
|
148
|
+
- **Gradient information analysis**: Gradient flow health
|
|
149
|
+
- **Layer similarity comparison**: Inter-layer analysis
|
|
150
|
+
- **Detailed change summary**: Comprehensive change patterns
|
|
151
|
+
- **Quantization impact analysis**: Quantization effects
|
|
152
|
+
- **Change magnitude sorting**: Priority-sorted differences
|
|
153
|
+
- **Plus 20+ additional specialized features**
|
|
152
154
|
|
|
153
155
|
## API Reference
|
|
154
156
|
|
|
@@ -173,12 +175,9 @@ class DiffOptions:
|
|
|
173
175
|
recursive: bool = False
|
|
174
176
|
verbose: bool = False
|
|
175
177
|
|
|
176
|
-
#
|
|
177
|
-
stats: bool = False
|
|
178
|
-
|
|
179
|
-
memory_analysis: bool = False
|
|
180
|
-
anomaly_detection: bool = False
|
|
181
|
-
# ... and more
|
|
178
|
+
# For scientific data (NumPy/MATLAB)
|
|
179
|
+
stats: bool = False # Only used for NumPy/MATLAB files
|
|
180
|
+
# Note: ML analysis runs automatically for PyTorch/Safetensors
|
|
182
181
|
```
|
|
183
182
|
|
|
184
183
|
### Results
|
|
@@ -204,9 +203,7 @@ class DiffResult:
|
|
|
204
203
|
before = "model_baseline.safetensors"
|
|
205
204
|
after = "model_finetuned.safetensors"
|
|
206
205
|
|
|
207
|
-
result = diffai.diff(before, after
|
|
208
|
-
stats=True,
|
|
209
|
-
convergence_analysis=True)
|
|
206
|
+
result = diffai.diff(before, after)
|
|
210
207
|
```
|
|
211
208
|
|
|
212
209
|
### MLOps Integration
|
|
@@ -214,9 +211,7 @@ result = diffai.diff(before, after,
|
|
|
214
211
|
# Automated model validation in CI/CD
|
|
215
212
|
def validate_model_changes(old_model, new_model):
|
|
216
213
|
result = diffai.diff(old_model, new_model,
|
|
217
|
-
output_format=diffai.OutputFormat.JSON
|
|
218
|
-
anomaly_detection=True,
|
|
219
|
-
memory_analysis=True)
|
|
214
|
+
output_format=diffai.OutputFormat.JSON)
|
|
220
215
|
|
|
221
216
|
if result.is_json:
|
|
222
217
|
# Check for critical issues
|
|
@@ -244,9 +239,7 @@ def log_model_comparison(run_id1, run_id2):
|
|
|
244
239
|
|
|
245
240
|
# Compare with diffai
|
|
246
241
|
result = diffai.diff(model1_path, model2_path,
|
|
247
|
-
output_format=diffai.OutputFormat.JSON
|
|
248
|
-
stats=True,
|
|
249
|
-
architecture_comparison=True)
|
|
242
|
+
output_format=diffai.OutputFormat.JSON)
|
|
250
243
|
|
|
251
244
|
# Log results to MLflow
|
|
252
245
|
with mlflow.start_run():
|
|
@@ -269,10 +262,7 @@ def log_model_comparison_wandb(model1_path, model2_path):
|
|
|
269
262
|
"""Log model comparison to Weights & Biases"""
|
|
270
263
|
|
|
271
264
|
result = diffai.diff(model1_path, model2_path,
|
|
272
|
-
output_format=diffai.OutputFormat.JSON
|
|
273
|
-
stats=True,
|
|
274
|
-
memory_analysis=True,
|
|
275
|
-
convergence_analysis=True)
|
|
265
|
+
output_format=diffai.OutputFormat.JSON)
|
|
276
266
|
|
|
277
267
|
# Log to wandb
|
|
278
268
|
wandb.log({"model_comparison": result.data})
|
|
@@ -289,8 +279,7 @@ def log_model_comparison_wandb(model1_path, model2_path):
|
|
|
289
279
|
### Jupyter Notebooks
|
|
290
280
|
```python
|
|
291
281
|
# Interactive analysis in notebooks
|
|
292
|
-
result = diffai.diff("checkpoint_100.pt", "checkpoint_200.pt"
|
|
293
|
-
stats=True, memory_analysis=True)
|
|
282
|
+
result = diffai.diff("checkpoint_100.pt", "checkpoint_200.pt")
|
|
294
283
|
|
|
295
284
|
# Display results
|
|
296
285
|
if result.is_json:
|
|
@@ -34,13 +34,13 @@ pip install diffai-python
|
|
|
34
34
|
After installation, the `diffai` command is available:
|
|
35
35
|
|
|
36
36
|
```bash
|
|
37
|
-
# Compare ML models
|
|
38
|
-
diffai model_v1.safetensors model_v2.safetensors
|
|
37
|
+
# Compare ML models (30+ analysis features automatic)
|
|
38
|
+
diffai model_v1.safetensors model_v2.safetensors
|
|
39
39
|
|
|
40
40
|
# Compare NumPy arrays
|
|
41
|
-
diffai data_v1.npy data_v2.npy
|
|
41
|
+
diffai data_v1.npy data_v2.npy
|
|
42
42
|
|
|
43
|
-
# JSON output for automation
|
|
43
|
+
# JSON output for automation (all ML features included)
|
|
44
44
|
diffai model_v1.pt model_v2.pt --output json
|
|
45
45
|
```
|
|
46
46
|
|
|
@@ -55,8 +55,6 @@ print(result.raw_output)
|
|
|
55
55
|
|
|
56
56
|
# With options
|
|
57
57
|
options = diffai.DiffOptions(
|
|
58
|
-
stats=True,
|
|
59
|
-
architecture_comparison=True,
|
|
60
58
|
output_format=diffai.OutputFormat.JSON
|
|
61
59
|
)
|
|
62
60
|
result = diffai.diff("model_v1.pt", "model_v2.pt", options)
|
|
@@ -70,18 +68,22 @@ if result.is_json:
|
|
|
70
68
|
### Advanced ML Analysis
|
|
71
69
|
|
|
72
70
|
```python
|
|
73
|
-
# Comprehensive ML model analysis
|
|
71
|
+
# Comprehensive ML model analysis (automatic for ML models)
|
|
74
72
|
result = diffai.diff(
|
|
75
73
|
"baseline.safetensors",
|
|
76
74
|
"improved.safetensors",
|
|
77
|
-
stats=True
|
|
78
|
-
architecture_comparison=True,
|
|
79
|
-
memory_analysis=True,
|
|
80
|
-
anomaly_detection=True,
|
|
81
|
-
convergence_analysis=True
|
|
75
|
+
stats=True # Enable statistical analysis
|
|
82
76
|
)
|
|
83
77
|
|
|
84
78
|
print(result.raw_output)
|
|
79
|
+
|
|
80
|
+
# ML-specific analysis features (automatic for ML models)
|
|
81
|
+
# - architecture_comparison: Model architecture and structural changes
|
|
82
|
+
# - memory_analysis: Memory usage and optimization opportunities
|
|
83
|
+
# - anomaly_detection: Numerical anomalies and training issues
|
|
84
|
+
# - convergence_analysis: Training convergence patterns
|
|
85
|
+
# - gradient_analysis: Gradient flow health assessment
|
|
86
|
+
# - quantization_analysis: Quantization effect analysis
|
|
85
87
|
```
|
|
86
88
|
|
|
87
89
|
## Supported Formats
|
|
@@ -96,21 +98,21 @@ print(result.raw_output)
|
|
|
96
98
|
- **JSON**: Machine-readable format for automation
|
|
97
99
|
- **YAML**: Human-readable structured format
|
|
98
100
|
|
|
99
|
-
## ML Analysis Features
|
|
101
|
+
## ML Analysis Features (Automatic)
|
|
100
102
|
|
|
101
|
-
The package provides
|
|
103
|
+
The package provides 30+ specialized ML analysis features that run automatically for PyTorch and Safetensors files:
|
|
102
104
|
|
|
103
|
-
-
|
|
104
|
-
-
|
|
105
|
-
-
|
|
106
|
-
-
|
|
107
|
-
-
|
|
108
|
-
-
|
|
109
|
-
-
|
|
110
|
-
-
|
|
111
|
-
-
|
|
112
|
-
-
|
|
113
|
-
-
|
|
105
|
+
- **Detailed tensor statistics**: Mean, std, min, max, shape, dtype
|
|
106
|
+
- **Model structure comparison**: Architecture and structural changes
|
|
107
|
+
- **Memory usage analysis**: Memory optimization opportunities
|
|
108
|
+
- **Numerical anomaly detection**: Training issues and anomalies
|
|
109
|
+
- **Training convergence analysis**: Convergence patterns
|
|
110
|
+
- **Gradient information analysis**: Gradient flow health
|
|
111
|
+
- **Layer similarity comparison**: Inter-layer analysis
|
|
112
|
+
- **Detailed change summary**: Comprehensive change patterns
|
|
113
|
+
- **Quantization impact analysis**: Quantization effects
|
|
114
|
+
- **Change magnitude sorting**: Priority-sorted differences
|
|
115
|
+
- **Plus 20+ additional specialized features**
|
|
114
116
|
|
|
115
117
|
## API Reference
|
|
116
118
|
|
|
@@ -135,12 +137,9 @@ class DiffOptions:
|
|
|
135
137
|
recursive: bool = False
|
|
136
138
|
verbose: bool = False
|
|
137
139
|
|
|
138
|
-
#
|
|
139
|
-
stats: bool = False
|
|
140
|
-
|
|
141
|
-
memory_analysis: bool = False
|
|
142
|
-
anomaly_detection: bool = False
|
|
143
|
-
# ... and more
|
|
140
|
+
# For scientific data (NumPy/MATLAB)
|
|
141
|
+
stats: bool = False # Only used for NumPy/MATLAB files
|
|
142
|
+
# Note: ML analysis runs automatically for PyTorch/Safetensors
|
|
144
143
|
```
|
|
145
144
|
|
|
146
145
|
### Results
|
|
@@ -166,9 +165,7 @@ class DiffResult:
|
|
|
166
165
|
before = "model_baseline.safetensors"
|
|
167
166
|
after = "model_finetuned.safetensors"
|
|
168
167
|
|
|
169
|
-
result = diffai.diff(before, after
|
|
170
|
-
stats=True,
|
|
171
|
-
convergence_analysis=True)
|
|
168
|
+
result = diffai.diff(before, after)
|
|
172
169
|
```
|
|
173
170
|
|
|
174
171
|
### MLOps Integration
|
|
@@ -176,9 +173,7 @@ result = diffai.diff(before, after,
|
|
|
176
173
|
# Automated model validation in CI/CD
|
|
177
174
|
def validate_model_changes(old_model, new_model):
|
|
178
175
|
result = diffai.diff(old_model, new_model,
|
|
179
|
-
output_format=diffai.OutputFormat.JSON
|
|
180
|
-
anomaly_detection=True,
|
|
181
|
-
memory_analysis=True)
|
|
176
|
+
output_format=diffai.OutputFormat.JSON)
|
|
182
177
|
|
|
183
178
|
if result.is_json:
|
|
184
179
|
# Check for critical issues
|
|
@@ -206,9 +201,7 @@ def log_model_comparison(run_id1, run_id2):
|
|
|
206
201
|
|
|
207
202
|
# Compare with diffai
|
|
208
203
|
result = diffai.diff(model1_path, model2_path,
|
|
209
|
-
output_format=diffai.OutputFormat.JSON
|
|
210
|
-
stats=True,
|
|
211
|
-
architecture_comparison=True)
|
|
204
|
+
output_format=diffai.OutputFormat.JSON)
|
|
212
205
|
|
|
213
206
|
# Log results to MLflow
|
|
214
207
|
with mlflow.start_run():
|
|
@@ -231,10 +224,7 @@ def log_model_comparison_wandb(model1_path, model2_path):
|
|
|
231
224
|
"""Log model comparison to Weights & Biases"""
|
|
232
225
|
|
|
233
226
|
result = diffai.diff(model1_path, model2_path,
|
|
234
|
-
output_format=diffai.OutputFormat.JSON
|
|
235
|
-
stats=True,
|
|
236
|
-
memory_analysis=True,
|
|
237
|
-
convergence_analysis=True)
|
|
227
|
+
output_format=diffai.OutputFormat.JSON)
|
|
238
228
|
|
|
239
229
|
# Log to wandb
|
|
240
230
|
wandb.log({"model_comparison": result.data})
|
|
@@ -251,8 +241,7 @@ def log_model_comparison_wandb(model1_path, model2_path):
|
|
|
251
241
|
### Jupyter Notebooks
|
|
252
242
|
```python
|
|
253
243
|
# Interactive analysis in notebooks
|
|
254
|
-
result = diffai.diff("checkpoint_100.pt", "checkpoint_200.pt"
|
|
255
|
-
stats=True, memory_analysis=True)
|
|
244
|
+
result = diffai.diff("checkpoint_100.pt", "checkpoint_200.pt")
|
|
256
245
|
|
|
257
246
|
# Display results
|
|
258
247
|
if result.is_json:
|
|
@@ -14,19 +14,25 @@ A next-generation diff tool specialized for **AI/ML and scientific computing wor
|
|
|
14
14
|
$ diff model_v1.safetensors model_v2.safetensors
|
|
15
15
|
Binary files model_v1.safetensors and model_v2.safetensors differ
|
|
16
16
|
|
|
17
|
-
# diffai shows meaningful model changes
|
|
18
|
-
$ diffai model_v1.safetensors model_v2.safetensors
|
|
17
|
+
# diffai shows meaningful model changes with full analysis
|
|
18
|
+
$ diffai model_v1.safetensors model_v2.safetensors
|
|
19
19
|
~ fc1.bias: mean=0.0018->0.0017, std=0.0518->0.0647
|
|
20
20
|
~ fc1.weight: mean=-0.0002->-0.0001, std=0.0514->0.0716
|
|
21
21
|
~ fc2.weight: mean=-0.0008->-0.0018, std=0.0719->0.0883
|
|
22
22
|
gradient_analysis: flow_health=healthy, norm=0.015000, ratio=1.0500
|
|
23
|
+
deployment_readiness: readiness=0.92, strategy=blue_green, risk=low
|
|
24
|
+
quantization_analysis: compression=0.0%, speedup=1.8x, precision_loss=1.5%
|
|
25
|
+
|
|
26
|
+
[WARNING]
|
|
27
|
+
• Memory usage increased moderately (+250MB). Monitor resource consumption.
|
|
28
|
+
• Inference speed moderately affected (1.3x slower). Consider optimization opportunities.
|
|
23
29
|
```
|
|
24
30
|
|
|
25
31
|
## Key Features
|
|
26
32
|
|
|
27
33
|
- **AI/ML Native**: Direct support for PyTorch (.pt/.pth), Safetensors (.safetensors), NumPy (.npy/.npz), and MATLAB (.mat) files
|
|
28
34
|
- **Tensor Analysis**: Automatic calculation of tensor statistics (mean, std, min, max, shape, memory usage)
|
|
29
|
-
- **ML Analysis
|
|
35
|
+
- **Comprehensive ML Analysis**: 30+ analysis functions including quantization, architecture, memory, convergence, anomaly detection, and deployment readiness - all enabled by default
|
|
30
36
|
- **Scientific Data Support**: NumPy arrays and MATLAB matrices with complex number support
|
|
31
37
|
- **Pure Rust Implementation**: No system dependencies, works on Windows/Linux/macOS without additional installations
|
|
32
38
|
- **Multiple Output Formats**: Colored CLI, JSON for MLOps integration, YAML for human-readable reports
|
|
@@ -76,38 +82,43 @@ cargo build --release
|
|
|
76
82
|
### Basic Model Comparison
|
|
77
83
|
|
|
78
84
|
```bash
|
|
79
|
-
# Compare PyTorch models
|
|
80
|
-
diffai model_old.pt model_new.pt
|
|
85
|
+
# Compare PyTorch models with full analysis (default)
|
|
86
|
+
diffai model_old.pt model_new.pt
|
|
81
87
|
|
|
82
|
-
# Compare Safetensors with
|
|
83
|
-
diffai checkpoint_v1.safetensors checkpoint_v2.safetensors
|
|
88
|
+
# Compare Safetensors with complete ML analysis
|
|
89
|
+
diffai checkpoint_v1.safetensors checkpoint_v2.safetensors
|
|
84
90
|
|
|
85
91
|
# Compare NumPy arrays
|
|
86
|
-
diffai data_v1.npy data_v2.npy
|
|
92
|
+
diffai data_v1.npy data_v2.npy
|
|
87
93
|
|
|
88
94
|
# Compare MATLAB files
|
|
89
|
-
diffai experiment_v1.mat experiment_v2.mat
|
|
95
|
+
diffai experiment_v1.mat experiment_v2.mat
|
|
90
96
|
```
|
|
91
97
|
|
|
92
|
-
###
|
|
98
|
+
### ML Analysis Features
|
|
93
99
|
|
|
94
100
|
```bash
|
|
95
|
-
#
|
|
96
|
-
diffai baseline.safetensors finetuned.safetensors
|
|
97
|
-
|
|
98
|
-
# Combined analysis with sorting
|
|
99
|
-
diffai original.pt optimized.pt --stats --quantization-analysis --sort-by-change-magnitude
|
|
101
|
+
# Full ML analysis runs automatically for PyTorch/Safetensors
|
|
102
|
+
diffai baseline.safetensors finetuned.safetensors
|
|
103
|
+
# Outputs: 30+ analysis types including quantization, architecture, memory, etc.
|
|
100
104
|
|
|
101
105
|
# JSON output for automation
|
|
102
|
-
diffai model_v1.safetensors model_v2.safetensors --
|
|
106
|
+
diffai model_v1.safetensors model_v2.safetensors --output json
|
|
103
107
|
|
|
104
108
|
# Detailed diagnostic information with verbose mode
|
|
105
|
-
diffai model_v1.safetensors model_v2.safetensors --verbose
|
|
109
|
+
diffai model_v1.safetensors model_v2.safetensors --verbose
|
|
106
110
|
|
|
107
|
-
#
|
|
108
|
-
diffai model_v1.safetensors model_v2.safetensors --
|
|
111
|
+
# YAML output for human-readable reports
|
|
112
|
+
diffai model_v1.safetensors model_v2.safetensors --output yaml
|
|
109
113
|
```
|
|
110
114
|
|
|
115
|
+
## 📚 Documentation
|
|
116
|
+
|
|
117
|
+
- **[Working Examples & Demonstrations](docs/examples/)** - See diffai in action with real outputs
|
|
118
|
+
- **[API Documentation](https://docs.rs/diffai-core)** - Rust library documentation
|
|
119
|
+
- **[User Guide](docs/user-guide.md)** - Comprehensive usage guide
|
|
120
|
+
- **[ML Analysis Guide](docs/ml-analysis-guide.md)** - Deep dive into ML-specific features
|
|
121
|
+
|
|
111
122
|
## Supported File Formats
|
|
112
123
|
|
|
113
124
|
### ML Model Formats
|
|
@@ -128,26 +139,28 @@ diffai model_v1.safetensors model_v2.safetensors --architecture-comparison --mem
|
|
|
128
139
|
|
|
129
140
|
## ML Analysis Functions
|
|
130
141
|
|
|
131
|
-
###
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
-
|
|
136
|
-
-
|
|
137
|
-
-
|
|
138
|
-
-
|
|
139
|
-
-
|
|
140
|
-
-
|
|
141
|
-
-
|
|
142
|
-
-
|
|
143
|
-
|
|
144
|
-
|
|
142
|
+
### Automatic Comprehensive Analysis (v0.3.4)
|
|
143
|
+
When comparing PyTorch or Safetensors files, diffai automatically runs 30+ ML analysis features:
|
|
144
|
+
|
|
145
|
+
**Automatic Features Include:**
|
|
146
|
+
- **Statistical Analysis**: Detailed tensor statistics (mean, std, min, max, shape, memory)
|
|
147
|
+
- **Quantization Analysis**: Analyze quantization effects and efficiency
|
|
148
|
+
- **Architecture Comparison**: Compare model architectures and structural changes
|
|
149
|
+
- **Memory Analysis**: Analyze memory usage and optimization opportunities
|
|
150
|
+
- **Anomaly Detection**: Detect numerical anomalies in model parameters
|
|
151
|
+
- **Convergence Analysis**: Analyze convergence patterns in model parameters
|
|
152
|
+
- **Gradient Analysis**: Analyze gradient information when available
|
|
153
|
+
- **Deployment Readiness**: Assess production deployment readiness
|
|
154
|
+
- **Regression Testing**: Automatic performance degradation detection
|
|
155
|
+
- **Plus 20+ additional specialized features**
|
|
156
|
+
|
|
157
|
+
### Future Enhancements
|
|
145
158
|
- TensorFlow format support (.pb, .h5, SavedModel)
|
|
146
159
|
- ONNX format support
|
|
147
160
|
- Advanced visualization and charting features
|
|
148
161
|
|
|
149
162
|
### Design Philosophy
|
|
150
|
-
diffai
|
|
163
|
+
diffai provides comprehensive analysis by default for ML models, eliminating choice paralysis. Users get all relevant insights without needing to remember or specify dozens of analysis flags.
|
|
151
164
|
|
|
152
165
|
## Debugging and Diagnostics
|
|
153
166
|
|
|
@@ -158,12 +171,12 @@ Get comprehensive diagnostic information for debugging and performance analysis:
|
|
|
158
171
|
# Basic verbose output
|
|
159
172
|
diffai model1.safetensors model2.safetensors --verbose
|
|
160
173
|
|
|
161
|
-
# Verbose with
|
|
162
|
-
diffai data1.json data2.json --verbose --
|
|
174
|
+
# Verbose with structured data filtering
|
|
175
|
+
diffai data1.json data2.json --verbose --epsilon 0.001 --ignore-keys-regex "^id$"
|
|
163
176
|
```
|
|
164
177
|
|
|
165
178
|
**Verbose output includes:**
|
|
166
|
-
- **Configuration diagnostics**:
|
|
179
|
+
- **Configuration diagnostics**: Format settings, filters, analysis modes
|
|
167
180
|
- **File analysis**: Paths, sizes, detected formats, processing context
|
|
168
181
|
- **Performance metrics**: Processing time, difference counts, optimization status
|
|
169
182
|
- **Directory statistics**: File counts, comparison summaries (with `--recursive`)
|
|
@@ -172,9 +185,9 @@ diffai data1.json data2.json --verbose --stats --epsilon 0.001 --ignore-keys-reg
|
|
|
172
185
|
```
|
|
173
186
|
=== diffai verbose mode enabled ===
|
|
174
187
|
Configuration:
|
|
175
|
-
Input format:
|
|
188
|
+
Input format: Safetensors
|
|
176
189
|
Output format: Cli
|
|
177
|
-
ML analysis
|
|
190
|
+
ML analysis: Full analysis enabled (all 30 features)
|
|
178
191
|
Epsilon tolerance: 0.001
|
|
179
192
|
|
|
180
193
|
File analysis:
|
|
@@ -216,36 +229,36 @@ diffai model1.safetensors model2.safetensors --output yaml
|
|
|
216
229
|
|
|
217
230
|
### Research & Development
|
|
218
231
|
```bash
|
|
219
|
-
# Compare model before and after fine-tuning
|
|
220
|
-
diffai pretrained_model.safetensors finetuned_model.safetensors
|
|
221
|
-
|
|
232
|
+
# Compare model before and after fine-tuning (full analysis automatic)
|
|
233
|
+
diffai pretrained_model.safetensors finetuned_model.safetensors
|
|
234
|
+
# Outputs: learning_progress, convergence_analysis, parameter stats, and 27 more analyses
|
|
222
235
|
|
|
223
236
|
# Analyze architectural changes during development
|
|
224
|
-
diffai baseline_architecture.pt improved_architecture.pt
|
|
225
|
-
|
|
237
|
+
diffai baseline_architecture.pt improved_architecture.pt
|
|
238
|
+
# Outputs: architecture_comparison, param_efficiency_analysis, and full ML analysis
|
|
226
239
|
```
|
|
227
240
|
|
|
228
241
|
### MLOps & CI/CD
|
|
229
242
|
```bash
|
|
230
|
-
# Automated model validation in CI/CD
|
|
231
|
-
diffai production_model.safetensors candidate_model.safetensors
|
|
232
|
-
|
|
243
|
+
# Automated model validation in CI/CD (comprehensive analysis)
|
|
244
|
+
diffai production_model.safetensors candidate_model.safetensors
|
|
245
|
+
# Outputs: deployment_readiness, regression_test, risk_assessment, and 27 more analyses
|
|
233
246
|
|
|
234
|
-
# Performance impact assessment
|
|
235
|
-
diffai original_model.pt optimized_model.pt
|
|
236
|
-
|
|
247
|
+
# Performance impact assessment with JSON output for automation
|
|
248
|
+
diffai original_model.pt optimized_model.pt --output json
|
|
249
|
+
# Outputs: quantization_analysis, memory_analysis, performance_impact_estimate, etc.
|
|
237
250
|
```
|
|
238
251
|
|
|
239
252
|
### Scientific Computing
|
|
240
253
|
```bash
|
|
241
254
|
# Compare NumPy experiment results
|
|
242
|
-
diffai baseline_results.npy new_results.npy
|
|
255
|
+
diffai baseline_results.npy new_results.npy
|
|
243
256
|
|
|
244
257
|
# Analyze MATLAB simulation data
|
|
245
|
-
diffai simulation_v1.mat simulation_v2.mat
|
|
258
|
+
diffai simulation_v1.mat simulation_v2.mat
|
|
246
259
|
|
|
247
260
|
# Compare compressed NumPy archives
|
|
248
|
-
diffai dataset_v1.npz dataset_v2.npz
|
|
261
|
+
diffai dataset_v1.npz dataset_v2.npz
|
|
249
262
|
```
|
|
250
263
|
|
|
251
264
|
### Experiment Tracking
|
|
@@ -265,7 +278,8 @@ diffai model_a.safetensors model_b.safetensors \
|
|
|
265
278
|
- `-f, --format <FORMAT>` - Specify input file format
|
|
266
279
|
- `-o, --output <OUTPUT>` - Choose output format (cli, json, yaml)
|
|
267
280
|
- `-r, --recursive` - Compare directories recursively
|
|
268
|
-
|
|
281
|
+
|
|
282
|
+
**Note:** For ML models (PyTorch/Safetensors), comprehensive analysis including statistics runs automatically
|
|
269
283
|
|
|
270
284
|
### Advanced Options
|
|
271
285
|
- `--path <PATH>` - Filter differences by specific path
|
|
@@ -276,9 +290,16 @@ diffai model_a.safetensors model_b.safetensors \
|
|
|
276
290
|
|
|
277
291
|
## Examples
|
|
278
292
|
|
|
279
|
-
### Basic Tensor Comparison
|
|
293
|
+
### Basic Tensor Comparison (Automatic)
|
|
280
294
|
```bash
|
|
281
|
-
$ diffai simple_model_v1.safetensors simple_model_v2.safetensors
|
|
295
|
+
$ diffai simple_model_v1.safetensors simple_model_v2.safetensors
|
|
296
|
+
anomaly_detection: type=none, severity=none, action="continue_training"
|
|
297
|
+
architecture_comparison: type1=feedforward, type2=feedforward, deployment_readiness=ready
|
|
298
|
+
convergence_analysis: status=converging, stability=0.92
|
|
299
|
+
gradient_analysis: flow_health=healthy, norm=0.021069
|
|
300
|
+
memory_analysis: delta=+0.0MB, efficiency=1.000000
|
|
301
|
+
quantization_analysis: compression=0.0%, speedup=1.8x, precision_loss=1.5%
|
|
302
|
+
regression_test: passed=true, degradation=-2.5%, severity=low
|
|
282
303
|
~ fc1.bias: mean=0.0018->0.0017, std=0.0518->0.0647
|
|
283
304
|
~ fc1.weight: mean=-0.0002->-0.0001, std=0.0514->0.0716
|
|
284
305
|
~ fc2.bias: mean=-0.0076->-0.0257, std=0.0661->0.0973
|
|
@@ -287,24 +308,28 @@ $ diffai simple_model_v1.safetensors simple_model_v2.safetensors --stats
|
|
|
287
308
|
~ fc3.weight: mean=-0.0035->-0.0010, std=0.0990->0.1113
|
|
288
309
|
```
|
|
289
310
|
|
|
290
|
-
###
|
|
311
|
+
### JSON Output for Automation
|
|
291
312
|
```bash
|
|
292
|
-
$ diffai baseline.safetensors improved.safetensors --
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
313
|
+
$ diffai baseline.safetensors improved.safetensors --output json
|
|
314
|
+
{
|
|
315
|
+
"anomaly_detection": {"type": "none", "severity": "none"},
|
|
316
|
+
"architecture_comparison": {"type1": "feedforward", "type2": "feedforward"},
|
|
317
|
+
"deployment_readiness": {"readiness": 0.92, "strategy": "blue_green"},
|
|
318
|
+
"quantization_analysis": {"compression": "0.0%", "speedup": "1.8x"},
|
|
319
|
+
"regression_test": {"passed": true, "degradation": "-2.5%"}
|
|
320
|
+
// ... plus 25+ additional analysis features
|
|
321
|
+
}
|
|
297
322
|
```
|
|
298
323
|
|
|
299
324
|
### Scientific Data Analysis
|
|
300
325
|
```bash
|
|
301
|
-
$ diffai experiment_data_v1.npy experiment_data_v2.npy
|
|
326
|
+
$ diffai experiment_data_v1.npy experiment_data_v2.npy
|
|
302
327
|
~ data: shape=[1000, 256], mean=0.1234->0.1456, std=0.9876->0.9654, dtype=float64
|
|
303
328
|
```
|
|
304
329
|
|
|
305
330
|
### MATLAB File Comparison
|
|
306
331
|
```bash
|
|
307
|
-
$ diffai simulation_v1.mat simulation_v2.mat
|
|
332
|
+
$ diffai simulation_v1.mat simulation_v2.mat
|
|
308
333
|
~ results: var=results, shape=[500, 100], mean=2.3456->2.4567, std=1.2345->1.3456, dtype=double
|
|
309
334
|
+ new_variable: var=new_variable, shape=[100], dtype=single, elements=100, size=0.39KB
|
|
310
335
|
```
|
|
@@ -432,7 +432,7 @@ dependencies = [
|
|
|
432
432
|
|
|
433
433
|
[[package]]
|
|
434
434
|
name = "diffai-core"
|
|
435
|
-
version = "0.3.
|
|
435
|
+
version = "0.3.5"
|
|
436
436
|
dependencies = [
|
|
437
437
|
"anyhow",
|
|
438
438
|
"bytemuck",
|
|
@@ -455,7 +455,7 @@ dependencies = [
|
|
|
455
455
|
|
|
456
456
|
[[package]]
|
|
457
457
|
name = "diffai-python"
|
|
458
|
-
version = "0.3.
|
|
458
|
+
version = "0.3.5"
|
|
459
459
|
dependencies = [
|
|
460
460
|
"anyhow",
|
|
461
461
|
"clap",
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
[package]
|
|
5
5
|
name = "diffai-python"
|
|
6
|
-
version = "0.3.
|
|
6
|
+
version = "0.3.5"
|
|
7
7
|
edition = "2021"
|
|
8
8
|
authors = ["kako-jun"]
|
|
9
9
|
license = "MIT"
|
|
@@ -17,7 +17,7 @@ path = "src/main.rs"
|
|
|
17
17
|
|
|
18
18
|
[dependencies]
|
|
19
19
|
# Reference to the actual diffai dependencies
|
|
20
|
-
diffai-core = { version = "0.3.
|
|
20
|
+
diffai-core = { version = "0.3.5", path = "../diffai-core" }
|
|
21
21
|
clap = { version = "4.0", features = ["derive"] }
|
|
22
22
|
colored = "2.0"
|
|
23
23
|
serde = { version = "1.0", features = ["derive"] }
|
|
@@ -34,13 +34,13 @@ pip install diffai-python
|
|
|
34
34
|
After installation, the `diffai` command is available:
|
|
35
35
|
|
|
36
36
|
```bash
|
|
37
|
-
# Compare ML models
|
|
38
|
-
diffai model_v1.safetensors model_v2.safetensors
|
|
37
|
+
# Compare ML models (30+ analysis features automatic)
|
|
38
|
+
diffai model_v1.safetensors model_v2.safetensors
|
|
39
39
|
|
|
40
40
|
# Compare NumPy arrays
|
|
41
|
-
diffai data_v1.npy data_v2.npy
|
|
41
|
+
diffai data_v1.npy data_v2.npy
|
|
42
42
|
|
|
43
|
-
# JSON output for automation
|
|
43
|
+
# JSON output for automation (all ML features included)
|
|
44
44
|
diffai model_v1.pt model_v2.pt --output json
|
|
45
45
|
```
|
|
46
46
|
|
|
@@ -55,8 +55,6 @@ print(result.raw_output)
|
|
|
55
55
|
|
|
56
56
|
# With options
|
|
57
57
|
options = diffai.DiffOptions(
|
|
58
|
-
stats=True,
|
|
59
|
-
architecture_comparison=True,
|
|
60
58
|
output_format=diffai.OutputFormat.JSON
|
|
61
59
|
)
|
|
62
60
|
result = diffai.diff("model_v1.pt", "model_v2.pt", options)
|
|
@@ -70,18 +68,22 @@ if result.is_json:
|
|
|
70
68
|
### Advanced ML Analysis
|
|
71
69
|
|
|
72
70
|
```python
|
|
73
|
-
# Comprehensive ML model analysis
|
|
71
|
+
# Comprehensive ML model analysis (automatic for ML models)
|
|
74
72
|
result = diffai.diff(
|
|
75
73
|
"baseline.safetensors",
|
|
76
74
|
"improved.safetensors",
|
|
77
|
-
stats=True
|
|
78
|
-
architecture_comparison=True,
|
|
79
|
-
memory_analysis=True,
|
|
80
|
-
anomaly_detection=True,
|
|
81
|
-
convergence_analysis=True
|
|
75
|
+
stats=True # Enable statistical analysis
|
|
82
76
|
)
|
|
83
77
|
|
|
84
78
|
print(result.raw_output)
|
|
79
|
+
|
|
80
|
+
# ML-specific analysis features (automatic for ML models)
|
|
81
|
+
# - architecture_comparison: Model architecture and structural changes
|
|
82
|
+
# - memory_analysis: Memory usage and optimization opportunities
|
|
83
|
+
# - anomaly_detection: Numerical anomalies and training issues
|
|
84
|
+
# - convergence_analysis: Training convergence patterns
|
|
85
|
+
# - gradient_analysis: Gradient flow health assessment
|
|
86
|
+
# - quantization_analysis: Quantization effect analysis
|
|
85
87
|
```
|
|
86
88
|
|
|
87
89
|
## Supported Formats
|
|
@@ -96,21 +98,21 @@ print(result.raw_output)
|
|
|
96
98
|
- **JSON**: Machine-readable format for automation
|
|
97
99
|
- **YAML**: Human-readable structured format
|
|
98
100
|
|
|
99
|
-
## ML Analysis Features
|
|
101
|
+
## ML Analysis Features (Automatic)
|
|
100
102
|
|
|
101
|
-
The package provides
|
|
103
|
+
The package provides 30+ specialized ML analysis features that run automatically for PyTorch and Safetensors files:
|
|
102
104
|
|
|
103
|
-
-
|
|
104
|
-
-
|
|
105
|
-
-
|
|
106
|
-
-
|
|
107
|
-
-
|
|
108
|
-
-
|
|
109
|
-
-
|
|
110
|
-
-
|
|
111
|
-
-
|
|
112
|
-
-
|
|
113
|
-
-
|
|
105
|
+
- **Detailed tensor statistics**: Mean, std, min, max, shape, dtype
|
|
106
|
+
- **Model structure comparison**: Architecture and structural changes
|
|
107
|
+
- **Memory usage analysis**: Memory optimization opportunities
|
|
108
|
+
- **Numerical anomaly detection**: Training issues and anomalies
|
|
109
|
+
- **Training convergence analysis**: Convergence patterns
|
|
110
|
+
- **Gradient information analysis**: Gradient flow health
|
|
111
|
+
- **Layer similarity comparison**: Inter-layer analysis
|
|
112
|
+
- **Detailed change summary**: Comprehensive change patterns
|
|
113
|
+
- **Quantization impact analysis**: Quantization effects
|
|
114
|
+
- **Change magnitude sorting**: Priority-sorted differences
|
|
115
|
+
- **Plus 20+ additional specialized features**
|
|
114
116
|
|
|
115
117
|
## API Reference
|
|
116
118
|
|
|
@@ -135,12 +137,9 @@ class DiffOptions:
|
|
|
135
137
|
recursive: bool = False
|
|
136
138
|
verbose: bool = False
|
|
137
139
|
|
|
138
|
-
#
|
|
139
|
-
stats: bool = False
|
|
140
|
-
|
|
141
|
-
memory_analysis: bool = False
|
|
142
|
-
anomaly_detection: bool = False
|
|
143
|
-
# ... and more
|
|
140
|
+
# For scientific data (NumPy/MATLAB)
|
|
141
|
+
stats: bool = False # Only used for NumPy/MATLAB files
|
|
142
|
+
# Note: ML analysis runs automatically for PyTorch/Safetensors
|
|
144
143
|
```
|
|
145
144
|
|
|
146
145
|
### Results
|
|
@@ -166,9 +165,7 @@ class DiffResult:
|
|
|
166
165
|
before = "model_baseline.safetensors"
|
|
167
166
|
after = "model_finetuned.safetensors"
|
|
168
167
|
|
|
169
|
-
result = diffai.diff(before, after
|
|
170
|
-
stats=True,
|
|
171
|
-
convergence_analysis=True)
|
|
168
|
+
result = diffai.diff(before, after)
|
|
172
169
|
```
|
|
173
170
|
|
|
174
171
|
### MLOps Integration
|
|
@@ -176,9 +173,7 @@ result = diffai.diff(before, after,
|
|
|
176
173
|
# Automated model validation in CI/CD
|
|
177
174
|
def validate_model_changes(old_model, new_model):
|
|
178
175
|
result = diffai.diff(old_model, new_model,
|
|
179
|
-
output_format=diffai.OutputFormat.JSON
|
|
180
|
-
anomaly_detection=True,
|
|
181
|
-
memory_analysis=True)
|
|
176
|
+
output_format=diffai.OutputFormat.JSON)
|
|
182
177
|
|
|
183
178
|
if result.is_json:
|
|
184
179
|
# Check for critical issues
|
|
@@ -206,9 +201,7 @@ def log_model_comparison(run_id1, run_id2):
|
|
|
206
201
|
|
|
207
202
|
# Compare with diffai
|
|
208
203
|
result = diffai.diff(model1_path, model2_path,
|
|
209
|
-
output_format=diffai.OutputFormat.JSON
|
|
210
|
-
stats=True,
|
|
211
|
-
architecture_comparison=True)
|
|
204
|
+
output_format=diffai.OutputFormat.JSON)
|
|
212
205
|
|
|
213
206
|
# Log results to MLflow
|
|
214
207
|
with mlflow.start_run():
|
|
@@ -231,10 +224,7 @@ def log_model_comparison_wandb(model1_path, model2_path):
|
|
|
231
224
|
"""Log model comparison to Weights & Biases"""
|
|
232
225
|
|
|
233
226
|
result = diffai.diff(model1_path, model2_path,
|
|
234
|
-
output_format=diffai.OutputFormat.JSON
|
|
235
|
-
stats=True,
|
|
236
|
-
memory_analysis=True,
|
|
237
|
-
convergence_analysis=True)
|
|
227
|
+
output_format=diffai.OutputFormat.JSON)
|
|
238
228
|
|
|
239
229
|
# Log to wandb
|
|
240
230
|
wandb.log({"model_comparison": result.data})
|
|
@@ -251,8 +241,7 @@ def log_model_comparison_wandb(model1_path, model2_path):
|
|
|
251
241
|
### Jupyter Notebooks
|
|
252
242
|
```python
|
|
253
243
|
# Interactive analysis in notebooks
|
|
254
|
-
result = diffai.diff("checkpoint_100.pt", "checkpoint_200.pt"
|
|
255
|
-
stats=True, memory_analysis=True)
|
|
244
|
+
result = diffai.diff("checkpoint_100.pt", "checkpoint_200.pt")
|
|
256
245
|
|
|
257
246
|
# Display results
|
|
258
247
|
if result.is_json:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|