diffai-python 0.2.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- diffai_python-0.2.9/.gitignore +26 -0
- diffai_python-0.2.9/PKG-INFO +391 -0
- diffai_python-0.2.9/README.md +353 -0
- diffai_python-0.2.9/pyproject.toml +119 -0
- diffai_python-0.2.9/src/diffai/__init__.py +63 -0
- diffai_python-0.2.9/src/diffai/diffai.py +452 -0
- diffai_python-0.2.9/src/diffai/installer.py +335 -0
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Rust
|
|
2
|
+
/target/
|
|
3
|
+
**/*.rs.bk
|
|
4
|
+
|
|
5
|
+
# IDEs
|
|
6
|
+
.idea/
|
|
7
|
+
.vscode/
|
|
8
|
+
|
|
9
|
+
# OS generated files
|
|
10
|
+
.DS_Store
|
|
11
|
+
thumbs.db
|
|
12
|
+
|
|
13
|
+
# diffx symbolic link
|
|
14
|
+
diffx/
|
|
15
|
+
|
|
16
|
+
# Internal development documentation
|
|
17
|
+
CLAUDE.md
|
|
18
|
+
|
|
19
|
+
# Downloaded ML models (large files)
|
|
20
|
+
test-models/distilbert_base/
|
|
21
|
+
test-models/dialogpt_small/
|
|
22
|
+
test-models/gpt2_small/
|
|
23
|
+
test-models/distilgpt2/
|
|
24
|
+
test-models/tiny_gpt2/
|
|
25
|
+
test-models/.venv/
|
|
26
|
+
test-models/uv.lock
|
|
@@ -0,0 +1,391 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: diffai-python
|
|
3
|
+
Version: 0.2.9
|
|
4
|
+
Summary: AI/ML specialized diff tool for deep tensor comparison and analysis
|
|
5
|
+
Project-URL: Homepage, https://github.com/kako-jun/diffai
|
|
6
|
+
Project-URL: Documentation, https://github.com/kako-jun/diffai/blob/main/docs/
|
|
7
|
+
Project-URL: Repository, https://github.com/kako-jun/diffai.git
|
|
8
|
+
Project-URL: Issues, https://github.com/kako-jun/diffai/issues
|
|
9
|
+
Project-URL: Changelog, https://github.com/kako-jun/diffai/blob/main/CHANGELOG.md
|
|
10
|
+
Author: kako-jun
|
|
11
|
+
License-Expression: MIT
|
|
12
|
+
Keywords: ai,artificial-intelligence,diff,diffai,machine-learning,matlab,ml,model-comparison,numpy,pytorch,safetensors,tensor
|
|
13
|
+
Classifier: Development Status :: 4 - Beta
|
|
14
|
+
Classifier: Environment :: Console
|
|
15
|
+
Classifier: Intended Audience :: Developers
|
|
16
|
+
Classifier: Intended Audience :: Science/Research
|
|
17
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
18
|
+
Classifier: Operating System :: OS Independent
|
|
19
|
+
Classifier: Programming Language :: Python :: 3
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
24
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
25
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
26
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
27
|
+
Classifier: Topic :: Text Processing :: General
|
|
28
|
+
Classifier: Topic :: Utilities
|
|
29
|
+
Requires-Python: >=3.8
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: black>=22.0; extra == 'dev'
|
|
32
|
+
Requires-Dist: flake8>=5.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: mypy>=0.991; extra == 'dev'
|
|
34
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
35
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
36
|
+
Requires-Dist: types-requests>=2.28; extra == 'dev'
|
|
37
|
+
Description-Content-Type: text/markdown
|
|
38
|
+
|
|
39
|
+
# diffai - AI/ML Specialized Diff Tool (Python Package)
|
|
40
|
+
|
|
41
|
+
[](https://badge.fury.io/py/diffai-python)
|
|
42
|
+
[](https://pypi.org/project/diffai-python/)
|
|
43
|
+
[](https://pypi.org/project/diffai-python/)
|
|
44
|
+
|
|
45
|
+
AI/ML specialized data diff tool for deep tensor comparison and analysis. This Python package provides a convenient and type-safe interface to diffai through Python.
|
|
46
|
+
|
|
47
|
+
## 🚀 Quick Start
|
|
48
|
+
|
|
49
|
+
### Installation
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
# Install via pip
|
|
53
|
+
pip install diffai-python
|
|
54
|
+
|
|
55
|
+
# Development installation
|
|
56
|
+
pip install diffai-python[dev]
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Basic Usage
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
import diffai
|
|
63
|
+
|
|
64
|
+
# Simple model comparison
|
|
65
|
+
result = diffai.diff("model_v1.safetensors", "model_v2.safetensors", stats=True)
|
|
66
|
+
print(result)
|
|
67
|
+
|
|
68
|
+
# Advanced ML analysis with type-safe configuration
|
|
69
|
+
options = diffai.DiffOptions(
|
|
70
|
+
stats=True,
|
|
71
|
+
architecture_comparison=True,
|
|
72
|
+
memory_analysis=True,
|
|
73
|
+
output_format=diffai.OutputFormat.JSON
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
result = diffai.diff("baseline.safetensors", "improved.safetensors", options)
|
|
77
|
+
if result.is_json:
|
|
78
|
+
for change in result.changes:
|
|
79
|
+
print(f"Changed: {change}")
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### Command Line Usage
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
# The package also installs the diffai binary
|
|
86
|
+
diffai model1.safetensors model2.safetensors --stats
|
|
87
|
+
|
|
88
|
+
# Download binary manually if needed
|
|
89
|
+
diffai-download-binary
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## 📦 Supported File Formats
|
|
93
|
+
|
|
94
|
+
### AI/ML Formats (Specialized Analysis)
|
|
95
|
+
- **Safetensors** (.safetensors) - PyTorch model format with ML analysis
|
|
96
|
+
- **PyTorch** (.pt, .pth) - Native PyTorch models with tensor statistics
|
|
97
|
+
- **NumPy** (.npy, .npz) - Scientific computing arrays with statistical analysis
|
|
98
|
+
- **MATLAB** (.mat) - Engineering/scientific data with numerical analysis
|
|
99
|
+
|
|
100
|
+
### Structured Data Formats (Universal)
|
|
101
|
+
- **JSON** (.json) - API configurations, model metadata
|
|
102
|
+
- **YAML** (.yaml, .yml) - Configuration files, CI/CD pipelines
|
|
103
|
+
- **TOML** (.toml) - Rust configs, Python pyproject.toml
|
|
104
|
+
- **XML** (.xml) - Legacy configurations, model definitions
|
|
105
|
+
- **CSV** (.csv) - Datasets, experiment results
|
|
106
|
+
- **INI** (.ini) - Legacy configuration files
|
|
107
|
+
|
|
108
|
+
## 🔬 35 ML Analysis Functions
|
|
109
|
+
|
|
110
|
+
### Core Analysis Functions
|
|
111
|
+
```python
|
|
112
|
+
# Statistical analysis
|
|
113
|
+
result = diffai.diff("model1.safetensors", "model2.safetensors", stats=True)
|
|
114
|
+
|
|
115
|
+
# Quantization analysis
|
|
116
|
+
result = diffai.diff("fp32.safetensors", "quantized.safetensors",
|
|
117
|
+
quantization_analysis=True)
|
|
118
|
+
|
|
119
|
+
# Change magnitude sorting
|
|
120
|
+
result = diffai.diff("model1.safetensors", "model2.safetensors",
|
|
121
|
+
sort_by_change_magnitude=True, stats=True)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Phase 3 Advanced Analysis (v0.2.7+)
|
|
125
|
+
```python
|
|
126
|
+
# Architecture comparison
|
|
127
|
+
result = diffai.diff("model1.safetensors", "model2.safetensors",
|
|
128
|
+
architecture_comparison=True)
|
|
129
|
+
|
|
130
|
+
# Memory analysis for deployment
|
|
131
|
+
result = diffai.diff("model1.safetensors", "model2.safetensors",
|
|
132
|
+
memory_analysis=True)
|
|
133
|
+
|
|
134
|
+
# Anomaly detection for debugging
|
|
135
|
+
result = diffai.diff("stable.safetensors", "problematic.safetensors",
|
|
136
|
+
anomaly_detection=True)
|
|
137
|
+
|
|
138
|
+
# Comprehensive analysis
|
|
139
|
+
options = diffai.DiffOptions(
|
|
140
|
+
stats=True,
|
|
141
|
+
architecture_comparison=True,
|
|
142
|
+
memory_analysis=True,
|
|
143
|
+
anomaly_detection=True,
|
|
144
|
+
convergence_analysis=True,
|
|
145
|
+
gradient_analysis=True,
|
|
146
|
+
similarity_matrix=True,
|
|
147
|
+
change_summary=True
|
|
148
|
+
)
|
|
149
|
+
result = diffai.diff("baseline.safetensors", "improved.safetensors", options)
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## 💡 Python API Examples
|
|
153
|
+
|
|
154
|
+
### Type-Safe Configuration
|
|
155
|
+
```python
|
|
156
|
+
from diffai import DiffOptions, OutputFormat
|
|
157
|
+
|
|
158
|
+
# Create type-safe configuration
|
|
159
|
+
options = DiffOptions(
|
|
160
|
+
stats=True,
|
|
161
|
+
architecture_comparison=True,
|
|
162
|
+
memory_analysis=True,
|
|
163
|
+
output_format=OutputFormat.JSON
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Compare models
|
|
167
|
+
result = diffai.diff("model1.safetensors", "model2.safetensors", options)
|
|
168
|
+
|
|
169
|
+
# Access structured results
|
|
170
|
+
if result.is_json:
|
|
171
|
+
print(f"Found {len(result.changes)} changes")
|
|
172
|
+
for change in result.changes:
|
|
173
|
+
print(f" {change.get('path')}: {change.get('type')}")
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Scientific Data Analysis
|
|
177
|
+
```python
|
|
178
|
+
# NumPy array comparison
|
|
179
|
+
result = diffai.diff("experiment_v1.npy", "experiment_v2.npy", stats=True)
|
|
180
|
+
print(f"Statistical changes: {result}")
|
|
181
|
+
|
|
182
|
+
# MATLAB data comparison
|
|
183
|
+
result = diffai.diff("simulation_v1.mat", "simulation_v2.mat",
|
|
184
|
+
stats=True, sort_by_change_magnitude=True)
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
### JSON Output for Automation
|
|
188
|
+
```python
|
|
189
|
+
# Get JSON results for MLOps integration
|
|
190
|
+
result = diffai.diff("model1.safetensors", "model2.safetensors",
|
|
191
|
+
stats=True, output_format=diffai.OutputFormat.JSON)
|
|
192
|
+
|
|
193
|
+
if result.is_json:
|
|
194
|
+
# Process structured data
|
|
195
|
+
changes = result.changes
|
|
196
|
+
summary = result.summary
|
|
197
|
+
|
|
198
|
+
# Integration with MLflow, Weights & Biases, etc.
|
|
199
|
+
log_model_comparison(changes, summary)
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### Error Handling
|
|
203
|
+
```python
|
|
204
|
+
try:
|
|
205
|
+
result = diffai.diff("model1.safetensors", "model2.safetensors", stats=True)
|
|
206
|
+
print(result)
|
|
207
|
+
except diffai.BinaryNotFoundError:
|
|
208
|
+
print("diffai binary not found. Please install: pip install diffai-python")
|
|
209
|
+
except diffai.InvalidInputError as e:
|
|
210
|
+
print(f"Invalid input: {e}")
|
|
211
|
+
except diffai.DiffaiError as e:
|
|
212
|
+
print(f"diffai error: {e}")
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### String Comparison (Temporary Files)
|
|
216
|
+
```python
|
|
217
|
+
# Compare JSON strings directly
|
|
218
|
+
json1 = '{"model": "gpt-2", "layers": 12}'
|
|
219
|
+
json2 = '{"model": "gpt-2", "layers": 24}'
|
|
220
|
+
|
|
221
|
+
result = diffai.diff_string(json1, json2, output_format=diffai.OutputFormat.JSON)
|
|
222
|
+
print(result)
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
## 🔧 Advanced Usage
|
|
226
|
+
|
|
227
|
+
### Installation Verification
|
|
228
|
+
```python
|
|
229
|
+
# Check if diffai is properly installed
|
|
230
|
+
try:
|
|
231
|
+
info = diffai.verify_installation()
|
|
232
|
+
print(f"diffai version: {info['version']}")
|
|
233
|
+
print(f"Binary path: {info['binary_path']}")
|
|
234
|
+
except diffai.BinaryNotFoundError as e:
|
|
235
|
+
print(f"Installation issue: {e}")
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
### Manual Binary Management
|
|
239
|
+
```python
|
|
240
|
+
# Download binary programmatically
|
|
241
|
+
from diffai.installer import install_binary
|
|
242
|
+
|
|
243
|
+
success = install_binary(force=True) # Force reinstall
|
|
244
|
+
if success:
|
|
245
|
+
print("Binary installed successfully")
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
### Low-Level API Access
|
|
249
|
+
```python
|
|
250
|
+
# Direct command execution
|
|
251
|
+
result = diffai.run_diffai([
|
|
252
|
+
"model1.safetensors",
|
|
253
|
+
"model2.safetensors",
|
|
254
|
+
"--stats",
|
|
255
|
+
"--architecture-comparison",
|
|
256
|
+
"--output", "json"
|
|
257
|
+
])
|
|
258
|
+
|
|
259
|
+
print(f"Exit code: {result.exit_code}")
|
|
260
|
+
print(f"Output: {result.raw_output}")
|
|
261
|
+
```
|
|
262
|
+
|
|
263
|
+
## 🔗 Integration Examples
|
|
264
|
+
|
|
265
|
+
### MLflow Integration
|
|
266
|
+
```python
|
|
267
|
+
import mlflow
|
|
268
|
+
import diffai
|
|
269
|
+
|
|
270
|
+
def log_model_comparison(model1_path, model2_path, run_id=None):
|
|
271
|
+
with mlflow.start_run(run_id=run_id):
|
|
272
|
+
# Compare models with comprehensive analysis
|
|
273
|
+
result = diffai.diff(
|
|
274
|
+
model1_path, model2_path,
|
|
275
|
+
stats=True,
|
|
276
|
+
architecture_comparison=True,
|
|
277
|
+
memory_analysis=True,
|
|
278
|
+
output_format=diffai.OutputFormat.JSON
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
if result.is_json:
|
|
282
|
+
# Log structured comparison data
|
|
283
|
+
mlflow.log_dict(result.data, "model_comparison.json")
|
|
284
|
+
|
|
285
|
+
# Log metrics
|
|
286
|
+
if result.changes:
|
|
287
|
+
mlflow.log_metric("total_changes", len(result.changes))
|
|
288
|
+
mlflow.log_metric("significant_changes",
|
|
289
|
+
sum(1 for c in result.changes
|
|
290
|
+
if c.get('magnitude', 0) > 0.1))
|
|
291
|
+
|
|
292
|
+
# Usage
|
|
293
|
+
log_model_comparison("baseline.safetensors", "candidate.safetensors")
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
### Weights & Biases Integration
|
|
297
|
+
```python
|
|
298
|
+
import wandb
|
|
299
|
+
import diffai
|
|
300
|
+
|
|
301
|
+
def wandb_log_model_diff(model1, model2, **kwargs):
|
|
302
|
+
result = diffai.diff(model1, model2,
|
|
303
|
+
stats=True,
|
|
304
|
+
output_format=diffai.OutputFormat.JSON,
|
|
305
|
+
**kwargs)
|
|
306
|
+
|
|
307
|
+
if result.is_json and result.changes:
|
|
308
|
+
# Log to wandb
|
|
309
|
+
wandb.log({
|
|
310
|
+
"model_comparison": wandb.Table(
|
|
311
|
+
columns=["parameter", "change_type", "magnitude"],
|
|
312
|
+
data=[[c.get("path"), c.get("type"), c.get("magnitude")]
|
|
313
|
+
for c in result.changes[:100]] # Limit rows
|
|
314
|
+
)
|
|
315
|
+
})
|
|
316
|
+
|
|
317
|
+
# Initialize wandb run
|
|
318
|
+
wandb.init(project="model-comparison")
|
|
319
|
+
wandb_log_model_diff("model_v1.safetensors", "model_v2.safetensors")
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
### Flask API Endpoint
|
|
323
|
+
```python
|
|
324
|
+
from flask import Flask, request, jsonify
|
|
325
|
+
import diffai
|
|
326
|
+
|
|
327
|
+
app = Flask(__name__)
|
|
328
|
+
|
|
329
|
+
@app.route('/compare', methods=['POST'])
|
|
330
|
+
def compare_models():
|
|
331
|
+
try:
|
|
332
|
+
files = request.files
|
|
333
|
+
model1 = files['model1']
|
|
334
|
+
model2 = files['model2']
|
|
335
|
+
|
|
336
|
+
# Save temporary files
|
|
337
|
+
model1.save('/tmp/model1.safetensors')
|
|
338
|
+
model2.save('/tmp/model2.safetensors')
|
|
339
|
+
|
|
340
|
+
# Compare models
|
|
341
|
+
result = diffai.diff('/tmp/model1.safetensors', '/tmp/model2.safetensors',
|
|
342
|
+
stats=True,
|
|
343
|
+
architecture_comparison=True,
|
|
344
|
+
output_format=diffai.OutputFormat.JSON)
|
|
345
|
+
|
|
346
|
+
return jsonify({
|
|
347
|
+
"status": "success",
|
|
348
|
+
"comparison": result.data if result.is_json else result.raw_output
|
|
349
|
+
})
|
|
350
|
+
|
|
351
|
+
except diffai.DiffaiError as e:
|
|
352
|
+
return jsonify({"status": "error", "message": str(e)}), 400
|
|
353
|
+
|
|
354
|
+
if __name__ == '__main__':
|
|
355
|
+
app.run(debug=True)
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
## 🏗️ Platform Support
|
|
359
|
+
|
|
360
|
+
This package automatically downloads platform-specific binaries:
|
|
361
|
+
|
|
362
|
+
- **Linux** (x86_64, ARM64)
|
|
363
|
+
- **macOS** (Intel x86_64, Apple Silicon ARM64)
|
|
364
|
+
- **Windows** (x86_64)
|
|
365
|
+
|
|
366
|
+
The binary is downloaded during installation and cached. If download fails, the package falls back to system PATH.
|
|
367
|
+
|
|
368
|
+
## 🔗 Related Projects
|
|
369
|
+
|
|
370
|
+
- **[diffx-python](https://pypi.org/project/diffx-python/)** - General-purpose structured data diff tool
|
|
371
|
+
- **[diffai (npm)](https://www.npmjs.com/package/diffai)** - Node.js package for diffai
|
|
372
|
+
- **[diffai (GitHub)](https://github.com/diffai-team/diffai)** - Main repository
|
|
373
|
+
|
|
374
|
+
## 📚 Documentation
|
|
375
|
+
|
|
376
|
+
- [CLI Reference](https://github.com/diffai-team/diffai/blob/main/docs/reference/cli-reference.md)
|
|
377
|
+
- [ML Analysis Guide](https://github.com/diffai-team/diffai/blob/main/docs/reference/ml-analysis.md)
|
|
378
|
+
- [User Guide](https://github.com/diffai-team/diffai/blob/main/docs/user-guide/)
|
|
379
|
+
- [API Documentation](https://github.com/diffai-team/diffai/blob/main/docs/reference/api-reference.md)
|
|
380
|
+
|
|
381
|
+
## 📄 License
|
|
382
|
+
|
|
383
|
+
MIT License - see [LICENSE](https://github.com/diffai-team/diffai/blob/main/LICENSE) file for details.
|
|
384
|
+
|
|
385
|
+
## 🤝 Contributing
|
|
386
|
+
|
|
387
|
+
Contributions welcome! Please see [CONTRIBUTING.md](https://github.com/diffai-team/diffai/blob/main/CONTRIBUTING.md) for guidelines.
|
|
388
|
+
|
|
389
|
+
---
|
|
390
|
+
|
|
391
|
+
**diffai** - Making AI/ML data differences visible, measurable, and actionable through Python. 🐍🚀
|