mod-trace 0.4.0__tar.gz → 0.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {mod_trace-0.4.0 → mod_trace-0.4.2}/Cargo.lock +1 -1
  2. {mod_trace-0.4.0 → mod_trace-0.4.2}/Cargo.toml +1 -1
  3. {mod_trace-0.4.0 → mod_trace-0.4.2}/PKG-INFO +20 -2
  4. {mod_trace-0.4.0 → mod_trace-0.4.2}/README.md +19 -1
  5. {mod_trace-0.4.0 → mod_trace-0.4.2}/pyproject.toml +1 -1
  6. {mod_trace-0.4.0 → mod_trace-0.4.2}/src/main.rs +27 -10
  7. {mod_trace-0.4.0 → mod_trace-0.4.2}/src/pt.rs +3 -5
  8. {mod_trace-0.4.0 → mod_trace-0.4.2}/.github/workflows/release.yml +0 -0
  9. {mod_trace-0.4.0 → mod_trace-0.4.2}/.gitignore +0 -0
  10. {mod_trace-0.4.0 → mod_trace-0.4.2}/LICENSE +0 -0
  11. {mod_trace-0.4.0 → mod_trace-0.4.2}/benchmarks/tiny_pytorch.py +0 -0
  12. {mod_trace-0.4.0 → mod_trace-0.4.2}/docs/ARCHITECTURE.md +0 -0
  13. {mod_trace-0.4.0 → mod_trace-0.4.2}/docs/REAL_MODELS.md +0 -0
  14. {mod_trace-0.4.0 → mod_trace-0.4.2}/docs/tensor-lab.md +0 -0
  15. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/broken_shape.json +0 -0
  16. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/lightgbm/README.md +0 -0
  17. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/lightgbm/clf_v1.txt +0 -0
  18. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/lightgbm/clf_v2.txt +0 -0
  19. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/lightgbm/generate_demo_models.py +0 -0
  20. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/make_sample_catboost.py +0 -0
  21. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/mlp.json +0 -0
  22. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/onnx/README.md +0 -0
  23. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/onnx/generate_demo_models.py +0 -0
  24. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/onnx/mlp_retrain_a.onnx +0 -0
  25. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/onnx/mlp_retrain_b.onnx +0 -0
  26. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/onnx/mlp_v1.onnx +0 -0
  27. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/onnx/mlp_v2.onnx +0 -0
  28. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/pytorch/README.md +0 -0
  29. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/pytorch/generate_demo_models.py +0 -0
  30. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/pytorch/mlp_v1.pt +0 -0
  31. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/pytorch/mlp_v2.pt +0 -0
  32. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/tiny_attention.json +0 -0
  33. {mod_trace-0.4.0 → mod_trace-0.4.2}/examples/tiny_attention_plan.json +0 -0
  34. {mod_trace-0.4.0 → mod_trace-0.4.2}/src/catboost_deep_diff.py +0 -0
  35. {mod_trace-0.4.0 → mod_trace-0.4.2}/src/catboost_explain.py +0 -0
  36. {mod_trace-0.4.0 → mod_trace-0.4.2}/src/cbm.rs +0 -0
  37. {mod_trace-0.4.0 → mod_trace-0.4.2}/src/demo.rs +0 -0
  38. {mod_trace-0.4.0 → mod_trace-0.4.2}/src/explain.rs +0 -0
  39. {mod_trace-0.4.0 → mod_trace-0.4.2}/src/lgbm.rs +0 -0
  40. {mod_trace-0.4.0 → mod_trace-0.4.2}/src/model.rs +0 -0
  41. {mod_trace-0.4.0 → mod_trace-0.4.2}/src/onnx.rs +0 -0
  42. {mod_trace-0.4.0 → mod_trace-0.4.2}/src/tensor.rs +0 -0
@@ -16,7 +16,7 @@ checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8"
16
16
 
17
17
  [[package]]
18
18
  name = "mod-trace"
19
- version = "0.4.0"
19
+ version = "0.4.2"
20
20
  dependencies = [
21
21
  "serde",
22
22
  "serde_json",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "mod-trace"
3
- version = "0.4.0"
3
+ version = "0.4.2"
4
4
  edition = "2024"
5
5
  description = "Rust CLI for inspecting ML model artifacts without loading the framework"
6
6
  license = "MIT"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mod-trace
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3 :: Only
@@ -500,9 +500,27 @@ cargo run -- inspect models/tiny-distilbert-base-cased/model_fixed.onnx
500
500
 
501
501
  Fixed shapes such as `[1, 8]` produce better numeric estimates than symbolic shapes such as `[batch, sequence]`.
502
502
 
503
+ ## PyTorch
504
+
505
+ mod-trace reads PyTorch `torch.save` files (`.pt`, `.pth`, `.bin`, `.ckpt`) **natively — no torch, no Python**:
506
+
507
+ ```sh
508
+ mod-trace inspect model.pt
509
+ mod-trace diff old.pt new.pt
510
+ mod-trace explain-diff old.pt new.pt
511
+ mod-trace check --max-parameter-growth 30% old.pt new.pt
512
+ ```
513
+
514
+ It parses the `torch.save` zip (pickled structure + raw tensor storages) and reports file size, tensor count, **estimated parameter count** (storage bytes ÷ dtype), **dominant dtype**, **recovered parameter/layer names**, and a sampled weight fingerprint that changes on a retrain/finetune. ZIP64 is handled for models over 4 GB.
515
+
516
+ Limits, by design:
517
+ - **Tensor shapes are not decoded** (that needs a full pickle interpreter) — you get counts, names, and dtype, not per-tensor shapes.
518
+ - **Legacy (pre-1.6) pickle `.pt`** and older Hugging Face `pytorch_model.bin` recover names + a fingerprint but not parameter sizes (tensors aren't stored as zip entries there). Modern zip-format saves get the full report.
519
+ - **`.safetensors` is a different format** and is not read by mod-trace.
520
+
503
521
  ### Exporting any PyTorch model to ONNX
504
522
 
505
- mod-trace does not read native PyTorch `.pt`/`.pth` files (those are Python pickles / TorchScript archives). The supported path is to export to ONNX, which is the usual serving format anyway. For a plain `nn.Module` the export is a single call:
523
+ For richer graph-level detail (operators, attention layers), or if you only have a `.safetensors`/legacy file, export to ONNX the usual serving format — which mod-trace reads fully. For a plain `nn.Module` the export is a single call:
506
524
 
507
525
  ```python
508
526
  import torch
@@ -483,9 +483,27 @@ cargo run -- inspect models/tiny-distilbert-base-cased/model_fixed.onnx
483
483
 
484
484
  Fixed shapes such as `[1, 8]` produce better numeric estimates than symbolic shapes such as `[batch, sequence]`.
485
485
 
486
+ ## PyTorch
487
+
488
+ mod-trace reads PyTorch `torch.save` files (`.pt`, `.pth`, `.bin`, `.ckpt`) **natively — no torch, no Python**:
489
+
490
+ ```sh
491
+ mod-trace inspect model.pt
492
+ mod-trace diff old.pt new.pt
493
+ mod-trace explain-diff old.pt new.pt
494
+ mod-trace check --max-parameter-growth 30% old.pt new.pt
495
+ ```
496
+
497
+ It parses the `torch.save` zip (pickled structure + raw tensor storages) and reports file size, tensor count, **estimated parameter count** (storage bytes ÷ dtype), **dominant dtype**, **recovered parameter/layer names**, and a sampled weight fingerprint that changes on a retrain/finetune. ZIP64 is handled for models over 4 GB.
498
+
499
+ Limits, by design:
500
+ - **Tensor shapes are not decoded** (that needs a full pickle interpreter) — you get counts, names, and dtype, not per-tensor shapes.
501
+ - **Legacy (pre-1.6) pickle `.pt`** and older Hugging Face `pytorch_model.bin` recover names + a fingerprint but not parameter sizes (tensors aren't stored as zip entries there). Modern zip-format saves get the full report.
502
+ - **`.safetensors` is a different format** and is not read by mod-trace.
503
+
486
504
  ### Exporting any PyTorch model to ONNX
487
505
 
488
- mod-trace does not read native PyTorch `.pt`/`.pth` files (those are Python pickles / TorchScript archives). The supported path is to export to ONNX, which is the usual serving format anyway. For a plain `nn.Module` the export is a single call:
506
+ For richer graph-level detail (operators, attention layers), or if you only have a `.safetensors`/legacy file, export to ONNX the usual serving format — which mod-trace reads fully. For a plain `nn.Module` the export is a single call:
489
507
 
490
508
  ```python
491
509
  import torch
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "mod-trace"
7
- version = "0.4.0"
7
+ version = "0.4.2"
8
8
  description = "Rust CLI for inspecting ML model artifacts without loading the framework"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"
@@ -1815,7 +1815,7 @@ fn artifact_kind(path: &str) -> ArtifactKind {
1815
1815
  Some("cbm") => return ArtifactKind::CatBoost,
1816
1816
  Some("lgb") => return ArtifactKind::LightGbm,
1817
1817
  Some("onnx") => return ArtifactKind::Onnx,
1818
- Some("pt") | Some("pth") => return ArtifactKind::PyTorch,
1818
+ Some("pt") | Some("pth") | Some("bin") | Some("ckpt") => return ArtifactKind::PyTorch,
1819
1819
  Some("json") => return ArtifactKind::Json,
1820
1820
  _ => {}
1821
1821
  }
@@ -1824,11 +1824,22 @@ fn artifact_kind(path: &str) -> ArtifactKind {
1824
1824
  ArtifactKind::CatBoost
1825
1825
  } else if has_lgbm_signature(path) {
1826
1826
  ArtifactKind::LightGbm
1827
+ } else if has_pickle_signature(path) {
1828
+ // torch.save legacy pickle (0x80 proto marker) with an unknown extension
1829
+ ArtifactKind::PyTorch
1827
1830
  } else {
1828
1831
  ArtifactKind::Unknown
1829
1832
  }
1830
1833
  }
1831
1834
 
1835
+ fn has_pickle_signature(path: &str) -> bool {
1836
+ let mut header = [0u8; 1];
1837
+ fs::File::open(path)
1838
+ .and_then(|mut file| file.read_exact(&mut header))
1839
+ .map(|_| header[0] == 0x80)
1840
+ .unwrap_or(false)
1841
+ }
1842
+
1832
1843
  fn has_cbm_signature(path: &str) -> bool {
1833
1844
  let mut header = [0u8; 4];
1834
1845
  fs::File::open(path)
@@ -3169,15 +3180,21 @@ fn print_pt_report(report: &pt::PtReport, limit: usize) {
3169
3180
  }
3170
3181
  println!();
3171
3182
  println!("Structure:");
3172
- println!(" Tensors (storages): {}", report.tensor_count);
3173
- println!(
3174
- " Parameters (est): {}",
3175
- format_count_human(report.estimated_parameter_count as usize)
3176
- );
3177
- println!(
3178
- " Parameter bytes: {}",
3179
- format_bytes(report.total_parameter_bytes as usize)
3180
- );
3183
+ if report.is_zip {
3184
+ println!(" Tensors (storages): {}", report.tensor_count);
3185
+ println!(
3186
+ " Parameters (est): {}",
3187
+ format_count_human(report.estimated_parameter_count as usize)
3188
+ );
3189
+ println!(
3190
+ " Parameter bytes: {}",
3191
+ format_bytes(report.total_parameter_bytes as usize)
3192
+ );
3193
+ } else {
3194
+ println!(" Tensors (storages): unknown (legacy pickle)");
3195
+ println!(" Parameters (est): unknown (legacy pickle does not store sizes as entries)");
3196
+ println!(" Parameter bytes: unknown (legacy pickle)");
3197
+ }
3181
3198
  print_optional("Dominant dtype", report.dominant_dtype.as_deref());
3182
3199
  println!();
3183
3200
  println!("Parameter-like Internals:");
@@ -35,10 +35,6 @@ impl PtReport {
35
35
  }
36
36
  }
37
37
 
38
- pub fn looks_like_pt(head: &[u8]) -> bool {
39
- head.starts_with(b"PK\x03\x04") || head.first() == Some(&0x80)
40
- }
41
-
42
38
  const SAMPLE_PER_STORAGE: u64 = 1 << 20; // 1 MiB sampled per tensor for the weight fingerprint
43
39
 
44
40
  pub fn inspect(path: &str) -> Result<PtReport, String> {
@@ -231,7 +227,8 @@ fn inspect_zip(file: &mut File, path: &str, total: u64) -> Result<PtReport, Stri
231
227
  }
232
228
 
233
229
  fn inspect_legacy(file: &mut File, path: &str, total: u64) -> Result<PtReport, String> {
234
- // Legacy (non-zip) pickle: we can only scan strings + fingerprint the bytes.
230
+ // Legacy (non-zip) pickle: tensors aren't stored as separate entries, so we
231
+ // recover names + dtype + a count of storage references, but not sizes.
235
232
  let bytes = read_at(file, 0, total as usize)?;
236
233
  let param_names = recover_param_names(&bytes);
237
234
  let (dtype, _) = dominant_dtype(&bytes);
@@ -250,6 +247,7 @@ fn inspect_legacy(file: &mut File, path: &str, total: u64) -> Result<PtReport, S
250
247
  })
251
248
  }
252
249
 
250
+
253
251
  fn file_name(path: &str) -> String {
254
252
  Path::new(path)
255
253
  .file_name()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes