mod-trace 0.4.0__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {mod_trace-0.4.0 → mod_trace-0.4.2}/Cargo.lock +1 -1
- {mod_trace-0.4.0 → mod_trace-0.4.2}/Cargo.toml +1 -1
- {mod_trace-0.4.0 → mod_trace-0.4.2}/PKG-INFO +20 -2
- {mod_trace-0.4.0 → mod_trace-0.4.2}/README.md +19 -1
- {mod_trace-0.4.0 → mod_trace-0.4.2}/pyproject.toml +1 -1
- {mod_trace-0.4.0 → mod_trace-0.4.2}/src/main.rs +27 -10
- {mod_trace-0.4.0 → mod_trace-0.4.2}/src/pt.rs +3 -5
- {mod_trace-0.4.0 → mod_trace-0.4.2}/.github/workflows/release.yml +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/.gitignore +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/LICENSE +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/benchmarks/tiny_pytorch.py +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/docs/ARCHITECTURE.md +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/docs/REAL_MODELS.md +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/docs/tensor-lab.md +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/broken_shape.json +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/lightgbm/README.md +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/lightgbm/clf_v1.txt +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/lightgbm/clf_v2.txt +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/lightgbm/generate_demo_models.py +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/make_sample_catboost.py +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/mlp.json +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/onnx/README.md +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/onnx/generate_demo_models.py +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/onnx/mlp_retrain_a.onnx +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/onnx/mlp_retrain_b.onnx +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/onnx/mlp_v1.onnx +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/onnx/mlp_v2.onnx +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/pytorch/README.md +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/pytorch/generate_demo_models.py +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/pytorch/mlp_v1.pt +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/pytorch/mlp_v2.pt +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/tiny_attention.json +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/tiny_attention_plan.json +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/src/catboost_deep_diff.py +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/src/catboost_explain.py +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/src/cbm.rs +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/src/demo.rs +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/src/explain.rs +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/src/lgbm.rs +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/src/model.rs +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/src/onnx.rs +0 -0
- {mod_trace-0.4.0 → mod_trace-0.4.2}/src/tensor.rs +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mod-trace
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.2
|
|
4
4
|
Classifier: Programming Language :: Rust
|
|
5
5
|
Classifier: Programming Language :: Python :: 3
|
|
6
6
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
@@ -500,9 +500,27 @@ cargo run -- inspect models/tiny-distilbert-base-cased/model_fixed.onnx
|
|
|
500
500
|
|
|
501
501
|
Fixed shapes such as `[1, 8]` produce better numeric estimates than symbolic shapes such as `[batch, sequence]`.
|
|
502
502
|
|
|
503
|
+
## PyTorch
|
|
504
|
+
|
|
505
|
+
mod-trace reads PyTorch `torch.save` files (`.pt`, `.pth`, `.bin`, `.ckpt`) **natively — no torch, no Python**:
|
|
506
|
+
|
|
507
|
+
```sh
|
|
508
|
+
mod-trace inspect model.pt
|
|
509
|
+
mod-trace diff old.pt new.pt
|
|
510
|
+
mod-trace explain-diff old.pt new.pt
|
|
511
|
+
mod-trace check --max-parameter-growth 30% old.pt new.pt
|
|
512
|
+
```
|
|
513
|
+
|
|
514
|
+
It parses the `torch.save` zip (pickled structure + raw tensor storages) and reports file size, tensor count, **estimated parameter count** (storage bytes ÷ dtype), **dominant dtype**, **recovered parameter/layer names**, and a sampled weight fingerprint that changes on a retrain/finetune. ZIP64 is handled for models over 4 GB.
|
|
515
|
+
|
|
516
|
+
Limits, by design:
|
|
517
|
+
- **Tensor shapes are not decoded** (that needs a full pickle interpreter) — you get counts, names, and dtype, not per-tensor shapes.
|
|
518
|
+
- **Legacy (pre-1.6) pickle `.pt`** and older Hugging Face `pytorch_model.bin` recover names + a fingerprint but not parameter sizes (tensors aren't stored as zip entries there). Modern zip-format saves get the full report.
|
|
519
|
+
- **`.safetensors` is a different format** and is not read by mod-trace.
|
|
520
|
+
|
|
503
521
|
### Exporting any PyTorch model to ONNX
|
|
504
522
|
|
|
505
|
-
|
|
523
|
+
For richer graph-level detail (operators, attention layers), or if you only have a `.safetensors`/legacy file, export to ONNX — the usual serving format — which mod-trace reads fully. For a plain `nn.Module` the export is a single call:
|
|
506
524
|
|
|
507
525
|
```python
|
|
508
526
|
import torch
|
|
@@ -483,9 +483,27 @@ cargo run -- inspect models/tiny-distilbert-base-cased/model_fixed.onnx
|
|
|
483
483
|
|
|
484
484
|
Fixed shapes such as `[1, 8]` produce better numeric estimates than symbolic shapes such as `[batch, sequence]`.
|
|
485
485
|
|
|
486
|
+
## PyTorch
|
|
487
|
+
|
|
488
|
+
mod-trace reads PyTorch `torch.save` files (`.pt`, `.pth`, `.bin`, `.ckpt`) **natively — no torch, no Python**:
|
|
489
|
+
|
|
490
|
+
```sh
|
|
491
|
+
mod-trace inspect model.pt
|
|
492
|
+
mod-trace diff old.pt new.pt
|
|
493
|
+
mod-trace explain-diff old.pt new.pt
|
|
494
|
+
mod-trace check --max-parameter-growth 30% old.pt new.pt
|
|
495
|
+
```
|
|
496
|
+
|
|
497
|
+
It parses the `torch.save` zip (pickled structure + raw tensor storages) and reports file size, tensor count, **estimated parameter count** (storage bytes ÷ dtype), **dominant dtype**, **recovered parameter/layer names**, and a sampled weight fingerprint that changes on a retrain/finetune. ZIP64 is handled for models over 4 GB.
|
|
498
|
+
|
|
499
|
+
Limits, by design:
|
|
500
|
+
- **Tensor shapes are not decoded** (that needs a full pickle interpreter) — you get counts, names, and dtype, not per-tensor shapes.
|
|
501
|
+
- **Legacy (pre-1.6) pickle `.pt`** and older Hugging Face `pytorch_model.bin` recover names + a fingerprint but not parameter sizes (tensors aren't stored as zip entries there). Modern zip-format saves get the full report.
|
|
502
|
+
- **`.safetensors` is a different format** and is not read by mod-trace.
|
|
503
|
+
|
|
486
504
|
### Exporting any PyTorch model to ONNX
|
|
487
505
|
|
|
488
|
-
|
|
506
|
+
For richer graph-level detail (operators, attention layers), or if you only have a `.safetensors`/legacy file, export to ONNX — the usual serving format — which mod-trace reads fully. For a plain `nn.Module` the export is a single call:
|
|
489
507
|
|
|
490
508
|
```python
|
|
491
509
|
import torch
|
|
@@ -1815,7 +1815,7 @@ fn artifact_kind(path: &str) -> ArtifactKind {
|
|
|
1815
1815
|
Some("cbm") => return ArtifactKind::CatBoost,
|
|
1816
1816
|
Some("lgb") => return ArtifactKind::LightGbm,
|
|
1817
1817
|
Some("onnx") => return ArtifactKind::Onnx,
|
|
1818
|
-
Some("pt") | Some("pth") => return ArtifactKind::PyTorch,
|
|
1818
|
+
Some("pt") | Some("pth") | Some("bin") | Some("ckpt") => return ArtifactKind::PyTorch,
|
|
1819
1819
|
Some("json") => return ArtifactKind::Json,
|
|
1820
1820
|
_ => {}
|
|
1821
1821
|
}
|
|
@@ -1824,11 +1824,22 @@ fn artifact_kind(path: &str) -> ArtifactKind {
|
|
|
1824
1824
|
ArtifactKind::CatBoost
|
|
1825
1825
|
} else if has_lgbm_signature(path) {
|
|
1826
1826
|
ArtifactKind::LightGbm
|
|
1827
|
+
} else if has_pickle_signature(path) {
|
|
1828
|
+
// torch.save legacy pickle (0x80 proto marker) with an unknown extension
|
|
1829
|
+
ArtifactKind::PyTorch
|
|
1827
1830
|
} else {
|
|
1828
1831
|
ArtifactKind::Unknown
|
|
1829
1832
|
}
|
|
1830
1833
|
}
|
|
1831
1834
|
|
|
1835
|
+
fn has_pickle_signature(path: &str) -> bool {
|
|
1836
|
+
let mut header = [0u8; 1];
|
|
1837
|
+
fs::File::open(path)
|
|
1838
|
+
.and_then(|mut file| file.read_exact(&mut header))
|
|
1839
|
+
.map(|_| header[0] == 0x80)
|
|
1840
|
+
.unwrap_or(false)
|
|
1841
|
+
}
|
|
1842
|
+
|
|
1832
1843
|
fn has_cbm_signature(path: &str) -> bool {
|
|
1833
1844
|
let mut header = [0u8; 4];
|
|
1834
1845
|
fs::File::open(path)
|
|
@@ -3169,15 +3180,21 @@ fn print_pt_report(report: &pt::PtReport, limit: usize) {
|
|
|
3169
3180
|
}
|
|
3170
3181
|
println!();
|
|
3171
3182
|
println!("Structure:");
|
|
3172
|
-
|
|
3173
|
-
|
|
3174
|
-
|
|
3175
|
-
|
|
3176
|
-
|
|
3177
|
-
|
|
3178
|
-
|
|
3179
|
-
|
|
3180
|
-
|
|
3183
|
+
if report.is_zip {
|
|
3184
|
+
println!(" Tensors (storages): {}", report.tensor_count);
|
|
3185
|
+
println!(
|
|
3186
|
+
" Parameters (est): {}",
|
|
3187
|
+
format_count_human(report.estimated_parameter_count as usize)
|
|
3188
|
+
);
|
|
3189
|
+
println!(
|
|
3190
|
+
" Parameter bytes: {}",
|
|
3191
|
+
format_bytes(report.total_parameter_bytes as usize)
|
|
3192
|
+
);
|
|
3193
|
+
} else {
|
|
3194
|
+
println!(" Tensors (storages): unknown (legacy pickle)");
|
|
3195
|
+
println!(" Parameters (est): unknown (legacy pickle does not store sizes as entries)");
|
|
3196
|
+
println!(" Parameter bytes: unknown (legacy pickle)");
|
|
3197
|
+
}
|
|
3181
3198
|
print_optional("Dominant dtype", report.dominant_dtype.as_deref());
|
|
3182
3199
|
println!();
|
|
3183
3200
|
println!("Parameter-like Internals:");
|
|
@@ -35,10 +35,6 @@ impl PtReport {
|
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
-
pub fn looks_like_pt(head: &[u8]) -> bool {
|
|
39
|
-
head.starts_with(b"PK\x03\x04") || head.first() == Some(&0x80)
|
|
40
|
-
}
|
|
41
|
-
|
|
42
38
|
const SAMPLE_PER_STORAGE: u64 = 1 << 20; // 1 MiB sampled per tensor for the weight fingerprint
|
|
43
39
|
|
|
44
40
|
pub fn inspect(path: &str) -> Result<PtReport, String> {
|
|
@@ -231,7 +227,8 @@ fn inspect_zip(file: &mut File, path: &str, total: u64) -> Result<PtReport, Stri
|
|
|
231
227
|
}
|
|
232
228
|
|
|
233
229
|
fn inspect_legacy(file: &mut File, path: &str, total: u64) -> Result<PtReport, String> {
|
|
234
|
-
// Legacy (non-zip) pickle:
|
|
230
|
+
// Legacy (non-zip) pickle: tensors aren't stored as separate entries, so we
|
|
231
|
+
// recover names + dtype + a count of storage references, but not sizes.
|
|
235
232
|
let bytes = read_at(file, 0, total as usize)?;
|
|
236
233
|
let param_names = recover_param_names(&bytes);
|
|
237
234
|
let (dtype, _) = dominant_dtype(&bytes);
|
|
@@ -250,6 +247,7 @@ fn inspect_legacy(file: &mut File, path: &str, total: u64) -> Result<PtReport, S
|
|
|
250
247
|
})
|
|
251
248
|
}
|
|
252
249
|
|
|
250
|
+
|
|
253
251
|
fn file_name(path: &str) -> String {
|
|
254
252
|
Path::new(path)
|
|
255
253
|
.file_name()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|