mod-trace 0.3.2__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. {mod_trace-0.3.2 → mod_trace-0.4.1}/Cargo.lock +1 -1
  2. {mod_trace-0.3.2 → mod_trace-0.4.1}/Cargo.toml +1 -1
  3. {mod_trace-0.3.2 → mod_trace-0.4.1}/PKG-INFO +21 -3
  4. {mod_trace-0.3.2 → mod_trace-0.4.1}/README.md +20 -2
  5. mod_trace-0.4.1/examples/pytorch/README.md +35 -0
  6. mod_trace-0.4.1/examples/pytorch/generate_demo_models.py +35 -0
  7. mod_trace-0.4.1/examples/pytorch/mlp_v1.pt +0 -0
  8. mod_trace-0.4.1/examples/pytorch/mlp_v2.pt +0 -0
  9. {mod_trace-0.3.2 → mod_trace-0.4.1}/pyproject.toml +1 -1
  10. {mod_trace-0.3.2 → mod_trace-0.4.1}/src/main.rs +452 -16
  11. mod_trace-0.4.1/src/pt.rs +398 -0
  12. {mod_trace-0.3.2 → mod_trace-0.4.1}/.github/workflows/release.yml +0 -0
  13. {mod_trace-0.3.2 → mod_trace-0.4.1}/.gitignore +0 -0
  14. {mod_trace-0.3.2 → mod_trace-0.4.1}/LICENSE +0 -0
  15. {mod_trace-0.3.2 → mod_trace-0.4.1}/benchmarks/tiny_pytorch.py +0 -0
  16. {mod_trace-0.3.2 → mod_trace-0.4.1}/docs/ARCHITECTURE.md +0 -0
  17. {mod_trace-0.3.2 → mod_trace-0.4.1}/docs/REAL_MODELS.md +0 -0
  18. {mod_trace-0.3.2 → mod_trace-0.4.1}/docs/tensor-lab.md +0 -0
  19. {mod_trace-0.3.2 → mod_trace-0.4.1}/examples/broken_shape.json +0 -0
  20. {mod_trace-0.3.2 → mod_trace-0.4.1}/examples/lightgbm/README.md +0 -0
  21. {mod_trace-0.3.2 → mod_trace-0.4.1}/examples/lightgbm/clf_v1.txt +0 -0
  22. {mod_trace-0.3.2 → mod_trace-0.4.1}/examples/lightgbm/clf_v2.txt +0 -0
  23. {mod_trace-0.3.2 → mod_trace-0.4.1}/examples/lightgbm/generate_demo_models.py +0 -0
  24. {mod_trace-0.3.2 → mod_trace-0.4.1}/examples/make_sample_catboost.py +0 -0
  25. {mod_trace-0.3.2 → mod_trace-0.4.1}/examples/mlp.json +0 -0
  26. {mod_trace-0.3.2 → mod_trace-0.4.1}/examples/onnx/README.md +0 -0
  27. {mod_trace-0.3.2 → mod_trace-0.4.1}/examples/onnx/generate_demo_models.py +0 -0
  28. {mod_trace-0.3.2 → mod_trace-0.4.1}/examples/onnx/mlp_retrain_a.onnx +0 -0
  29. {mod_trace-0.3.2 → mod_trace-0.4.1}/examples/onnx/mlp_retrain_b.onnx +0 -0
  30. {mod_trace-0.3.2 → mod_trace-0.4.1}/examples/onnx/mlp_v1.onnx +0 -0
  31. {mod_trace-0.3.2 → mod_trace-0.4.1}/examples/onnx/mlp_v2.onnx +0 -0
  32. {mod_trace-0.3.2 → mod_trace-0.4.1}/examples/tiny_attention.json +0 -0
  33. {mod_trace-0.3.2 → mod_trace-0.4.1}/examples/tiny_attention_plan.json +0 -0
  34. {mod_trace-0.3.2 → mod_trace-0.4.1}/src/catboost_deep_diff.py +0 -0
  35. {mod_trace-0.3.2 → mod_trace-0.4.1}/src/catboost_explain.py +0 -0
  36. {mod_trace-0.3.2 → mod_trace-0.4.1}/src/cbm.rs +0 -0
  37. {mod_trace-0.3.2 → mod_trace-0.4.1}/src/demo.rs +0 -0
  38. {mod_trace-0.3.2 → mod_trace-0.4.1}/src/explain.rs +0 -0
  39. {mod_trace-0.3.2 → mod_trace-0.4.1}/src/lgbm.rs +0 -0
  40. {mod_trace-0.3.2 → mod_trace-0.4.1}/src/model.rs +0 -0
  41. {mod_trace-0.3.2 → mod_trace-0.4.1}/src/onnx.rs +0 -0
  42. {mod_trace-0.3.2 → mod_trace-0.4.1}/src/tensor.rs +0 -0
@@ -16,7 +16,7 @@ checksum = "6b947ae49db0d222b1dbc6b113ce7248a3fc3a6ca21b696717bfc000ba4484d8"
16
16
 
17
17
  [[package]]
18
18
  name = "mod-trace"
19
- version = "0.3.2"
19
+ version = "0.4.1"
20
20
  dependencies = [
21
21
  "serde",
22
22
  "serde_json",
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "mod-trace"
3
- version = "0.3.2"
3
+ version = "0.4.1"
4
4
  edition = "2024"
5
5
  description = "Rust CLI for inspecting ML model artifacts without loading the framework"
6
6
  license = "MIT"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mod-trace
3
- Version: 0.3.2
3
+ Version: 0.4.1
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: 3
6
6
  Classifier: Programming Language :: Python :: 3 :: Only
@@ -25,7 +25,7 @@ mod-trace is a small Rust CLI for answering a practical question:
25
25
  What is inside this model file?
26
26
  ```
27
27
 
28
- It can inspect real artifacts such as CatBoost `.cbm` files, LightGBM `.txt`/`.lgb` text models, and ONNX `.onnx` graphs, then report structure, size, parameters, operator mix, rough inference cost, and changes between versions. CatBoost, LightGBM, and ONNX are all read natively — no Python, framework, or runtime needed (CatBoost `--deep` is the one optional exception).
28
+ It can inspect real artifacts such as CatBoost `.cbm` files, LightGBM `.txt`/`.lgb` text models, ONNX `.onnx` graphs, and PyTorch `.pt`/`.pth` checkpoints, then report structure, size, parameters, operator mix, rough inference cost, and changes between versions. All formats are read natively — no Python, framework, or runtime needed (CatBoost `--deep` is the one optional exception). The PyTorch reader is static: it sizes/names tensors and fingerprints weights without decoding exact shapes.
29
29
 
30
30
  The most useful command is `explain-diff`, which says in plain English what changed between two model versions:
31
31
 
@@ -500,9 +500,27 @@ cargo run -- inspect models/tiny-distilbert-base-cased/model_fixed.onnx
500
500
 
501
501
  Fixed shapes such as `[1, 8]` produce better numeric estimates than symbolic shapes such as `[batch, sequence]`.
502
502
 
503
+ ## PyTorch
504
+
505
+ mod-trace reads PyTorch `torch.save` files (`.pt`, `.pth`, `.bin`, `.ckpt`) **natively — no torch, no Python**:
506
+
507
+ ```sh
508
+ mod-trace inspect model.pt
509
+ mod-trace diff old.pt new.pt
510
+ mod-trace explain-diff old.pt new.pt
511
+ mod-trace check --max-parameter-growth 30% old.pt new.pt
512
+ ```
513
+
514
+ It parses the `torch.save` zip (pickled structure + raw tensor storages) and reports file size, tensor count, **estimated parameter count** (storage bytes ÷ dtype), **dominant dtype**, **recovered parameter/layer names**, and a sampled weight fingerprint that changes on a retrain/finetune. ZIP64 is handled for models over 4 GB.
515
+
516
+ Limits, by design:
517
+ - **Tensor shapes are not decoded** (that needs a full pickle interpreter) — you get counts, names, and dtype, not per-tensor shapes.
518
+ - **Legacy (pre-1.6) pickle `.pt`** and older Hugging Face `pytorch_model.bin` recover names + a fingerprint but not parameter sizes (tensors aren't stored as zip entries there). Modern zip-format saves get the full report.
519
+ - **`.safetensors` is a different format** and is not read by mod-trace.
520
+
503
521
  ### Exporting any PyTorch model to ONNX
504
522
 
505
- mod-trace does not read native PyTorch `.pt`/`.pth` files (those are Python pickles / TorchScript archives). The supported path is to export to ONNX, which is the usual serving format anyway. For a plain `nn.Module` the export is a single call:
523
+ For richer graph-level detail (operators, attention layers), or if you only have a `.safetensors`/legacy file, export to ONNX the usual serving format — which mod-trace reads fully. For a plain `nn.Module` the export is a single call:
506
524
 
507
525
  ```python
508
526
  import torch
@@ -8,7 +8,7 @@ mod-trace is a small Rust CLI for answering a practical question:
8
8
  What is inside this model file?
9
9
  ```
10
10
 
11
- It can inspect real artifacts such as CatBoost `.cbm` files, LightGBM `.txt`/`.lgb` text models, and ONNX `.onnx` graphs, then report structure, size, parameters, operator mix, rough inference cost, and changes between versions. CatBoost, LightGBM, and ONNX are all read natively — no Python, framework, or runtime needed (CatBoost `--deep` is the one optional exception).
11
+ It can inspect real artifacts such as CatBoost `.cbm` files, LightGBM `.txt`/`.lgb` text models, ONNX `.onnx` graphs, and PyTorch `.pt`/`.pth` checkpoints, then report structure, size, parameters, operator mix, rough inference cost, and changes between versions. All formats are read natively — no Python, framework, or runtime needed (CatBoost `--deep` is the one optional exception). The PyTorch reader is static: it sizes/names tensors and fingerprints weights without decoding exact shapes.
12
12
 
13
13
  The most useful command is `explain-diff`, which says in plain English what changed between two model versions:
14
14
 
@@ -483,9 +483,27 @@ cargo run -- inspect models/tiny-distilbert-base-cased/model_fixed.onnx
483
483
 
484
484
  Fixed shapes such as `[1, 8]` produce better numeric estimates than symbolic shapes such as `[batch, sequence]`.
485
485
 
486
+ ## PyTorch
487
+
488
+ mod-trace reads PyTorch `torch.save` files (`.pt`, `.pth`, `.bin`, `.ckpt`) **natively — no torch, no Python**:
489
+
490
+ ```sh
491
+ mod-trace inspect model.pt
492
+ mod-trace diff old.pt new.pt
493
+ mod-trace explain-diff old.pt new.pt
494
+ mod-trace check --max-parameter-growth 30% old.pt new.pt
495
+ ```
496
+
497
+ It parses the `torch.save` zip (pickled structure + raw tensor storages) and reports file size, tensor count, **estimated parameter count** (storage bytes ÷ dtype), **dominant dtype**, **recovered parameter/layer names**, and a sampled weight fingerprint that changes on a retrain/finetune. ZIP64 is handled for models over 4 GB.
498
+
499
+ Limits, by design:
500
+ - **Tensor shapes are not decoded** (that needs a full pickle interpreter) — you get counts, names, and dtype, not per-tensor shapes.
501
+ - **Legacy (pre-1.6) pickle `.pt`** and older Hugging Face `pytorch_model.bin` recover names + a fingerprint but not parameter sizes (tensors aren't stored as zip entries there). Modern zip-format saves get the full report.
502
+ - **`.safetensors` is a different format** and is not read by mod-trace.
503
+
486
504
  ### Exporting any PyTorch model to ONNX
487
505
 
488
- mod-trace does not read native PyTorch `.pt`/`.pth` files (those are Python pickles / TorchScript archives). The supported path is to export to ONNX, which is the usual serving format anyway. For a plain `nn.Module` the export is a single call:
506
+ For richer graph-level detail (operators, attention layers), or if you only have a `.safetensors`/legacy file, export to ONNX the usual serving format — which mod-trace reads fully. For a plain `nn.Module` the export is a single call:
489
507
 
490
508
  ```python
491
509
  import torch
@@ -0,0 +1,35 @@
1
+ # PyTorch example models
2
+
3
+ Synthetic `torch.save` artifacts for trying `mod-trace` on PyTorch with **no
4
+ torch and no Python** — mod-trace reads the `.pt` zip (pickled structure + raw
5
+ tensor storages) statically.
6
+
7
+ | Files | What they show |
8
+ |-------|----------------|
9
+ | `mlp_v1.pt` vs `mlp_v2.pt` | Same 2-layer MLP, hidden size 32 → 64 (parameter count ~doubles, same layer names). |
10
+
11
+ ## Try it
12
+
13
+ ```bash
14
+ mod-trace inspect examples/pytorch/mlp_v1.pt
15
+ mod-trace explain-diff examples/pytorch/mlp_v1.pt examples/pytorch/mlp_v2.pt
16
+ mod-trace check --max-parameter-growth 30% examples/pytorch/mlp_v1.pt examples/pytorch/mlp_v2.pt
17
+ mod-trace inspect --json examples/pytorch/mlp_v1.pt
18
+ ```
19
+
20
+ ## What it reads (and what it doesn't)
21
+
22
+ Reads, statically: file size, tensor/storage count, **estimated parameter count**
23
+ (from storage bytes ÷ dtype), **dominant dtype**, **recovered parameter/layer
24
+ names** (`fc1.weight`, …), and fingerprints (a sampled weight fingerprint that
25
+ changes on a retrain/finetune).
26
+
27
+ Does **not** decode exact per-tensor shapes — that would need a full pickle
28
+ interpreter. Same static/heuristic philosophy as the CatBoost and ONNX readers.
29
+
30
+ ## Regenerate
31
+
32
+ ```bash
33
+ python -m pip install torch
34
+ python examples/pytorch/generate_demo_models.py
35
+ ```
@@ -0,0 +1,35 @@
1
+ """Generate the synthetic PyTorch demo models used by the README examples.
2
+
3
+ Fully synthetic (no real data). Run:
4
+
5
+ python -m pip install torch
6
+ python examples/pytorch/generate_demo_models.py
7
+
8
+ Produces, in this directory:
9
+ mlp_v1.pt / mlp_v2.pt -> same 2-layer MLP, different hidden size (32 vs 64)
10
+ """
11
+
12
+ import os
13
+
14
+ import torch
15
+ import torch.nn as nn
16
+
17
+ HERE = os.path.dirname(os.path.abspath(__file__))
18
+
19
+
20
+ class Net(nn.Module):
21
+ def __init__(self, hidden):
22
+ super().__init__()
23
+ self.fc1 = nn.Linear(16, hidden)
24
+ self.fc2 = nn.Linear(hidden, 4)
25
+
26
+ def forward(self, x):
27
+ return self.fc2(torch.relu(self.fc1(x)))
28
+
29
+
30
+ if __name__ == "__main__":
31
+ torch.manual_seed(0)
32
+ torch.save(Net(32).state_dict(), os.path.join(HERE, "mlp_v1.pt"))
33
+ torch.manual_seed(1)
34
+ torch.save(Net(64).state_dict(), os.path.join(HERE, "mlp_v2.pt"))
35
+ print("wrote mlp_v1.pt and mlp_v2.pt")
@@ -4,7 +4,7 @@ build-backend = "maturin"
4
4
 
5
5
  [project]
6
6
  name = "mod-trace"
7
- version = "0.3.2"
7
+ version = "0.4.1"
8
8
  description = "Rust CLI for inspecting ML model artifacts without loading the framework"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.9"