mrm-trace 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mrm_trace-0.1.0/PKG-INFO +234 -0
- mrm_trace-0.1.0/README.md +207 -0
- mrm_trace-0.1.0/mrm_trace/__init__.py +7 -0
- mrm_trace-0.1.0/mrm_trace/analyser/__init__.py +53 -0
- mrm_trace-0.1.0/mrm_trace/analyser/iai.py +67 -0
- mrm_trace-0.1.0/mrm_trace/analyser/locality.py +83 -0
- mrm_trace-0.1.0/mrm_trace/analyser/read_freq.py +40 -0
- mrm_trace-0.1.0/mrm_trace/analyser/retention.py +83 -0
- mrm_trace-0.1.0/mrm_trace/analyser/suitability.py +63 -0
- mrm_trace-0.1.0/mrm_trace/analyser/working_set.py +49 -0
- mrm_trace-0.1.0/mrm_trace/analyser/write_once.py +50 -0
- mrm_trace-0.1.0/mrm_trace/api.py +52 -0
- mrm_trace-0.1.0/mrm_trace/cli.py +140 -0
- mrm_trace-0.1.0/mrm_trace/collector/__init__.py +42 -0
- mrm_trace-0.1.0/mrm_trace/collector/artifact_manager.py +67 -0
- mrm_trace-0.1.0/mrm_trace/collector/base.py +46 -0
- mrm_trace-0.1.0/mrm_trace/collector/memray_runner.py +83 -0
- mrm_trace-0.1.0/mrm_trace/collector/perf_runner.py +164 -0
- mrm_trace-0.1.0/mrm_trace/collector/process_monitor.py +115 -0
- mrm_trace-0.1.0/mrm_trace/config/__init__.py +4 -0
- mrm_trace-0.1.0/mrm_trace/config/loader.py +28 -0
- mrm_trace-0.1.0/mrm_trace/config/schema.py +220 -0
- mrm_trace-0.1.0/mrm_trace/config/validators.py +143 -0
- mrm_trace-0.1.0/mrm_trace/engines/__init__.py +29 -0
- mrm_trace-0.1.0/mrm_trace/engines/base.py +140 -0
- mrm_trace-0.1.0/mrm_trace/engines/llamacpp.py +200 -0
- mrm_trace-0.1.0/mrm_trace/engines/vllm.py +176 -0
- mrm_trace-0.1.0/mrm_trace/labeller/__init__.py +15 -0
- mrm_trace-0.1.0/mrm_trace/labeller/address_tracker.py +111 -0
- mrm_trace-0.1.0/mrm_trace/labeller/kv_lifecycle.py +80 -0
- mrm_trace-0.1.0/mrm_trace/labeller/labeller.py +228 -0
- mrm_trace-0.1.0/mrm_trace/labeller/symbol_rules.py +106 -0
- mrm_trace-0.1.0/mrm_trace/orchestration/__init__.py +0 -0
- mrm_trace-0.1.0/mrm_trace/parser/__init__.py +13 -0
- mrm_trace-0.1.0/mrm_trace/parser/memray_parser.py +93 -0
- mrm_trace-0.1.0/mrm_trace/parser/normalizer.py +61 -0
- mrm_trace-0.1.0/mrm_trace/parser/perf_script_parser.py +102 -0
- mrm_trace-0.1.0/mrm_trace/parser/schema.py +33 -0
- mrm_trace-0.1.0/mrm_trace/parser/writer.py +54 -0
- mrm_trace-0.1.0/mrm_trace/reporter/__init__.py +24 -0
- mrm_trace-0.1.0/mrm_trace/reporter/figures.py +138 -0
- mrm_trace-0.1.0/mrm_trace/reporter/manifest.py +59 -0
- mrm_trace-0.1.0/mrm_trace/reporter/metrics_csv.py +87 -0
- mrm_trace-0.1.0/mrm_trace/reporter/parquet_export.py +52 -0
- mrm_trace-0.1.0/mrm_trace/reporter/run_exporter.py +123 -0
- mrm_trace-0.1.0/mrm_trace/schema_version.py +127 -0
- mrm_trace-0.1.0/mrm_trace/telemetry/__init__.py +21 -0
- mrm_trace-0.1.0/mrm_trace/telemetry/baseline.py +68 -0
- mrm_trace-0.1.0/mrm_trace/telemetry/observer_effect.py +66 -0
- mrm_trace-0.1.0/mrm_trace/telemetry/thermal.py +77 -0
- mrm_trace-0.1.0/mrm_trace/telemetry/validity.py +128 -0
- mrm_trace-0.1.0/mrm_trace/utils/__init__.py +0 -0
- mrm_trace-0.1.0/mrm_trace/utils/files.py +18 -0
- mrm_trace-0.1.0/mrm_trace/utils/ids.py +17 -0
- mrm_trace-0.1.0/mrm_trace/utils/logging.py +48 -0
- mrm_trace-0.1.0/mrm_trace.egg-info/PKG-INFO +234 -0
- mrm_trace-0.1.0/mrm_trace.egg-info/SOURCES.txt +61 -0
- mrm_trace-0.1.0/mrm_trace.egg-info/dependency_links.txt +1 -0
- mrm_trace-0.1.0/mrm_trace.egg-info/entry_points.txt +2 -0
- mrm_trace-0.1.0/mrm_trace.egg-info/requires.txt +21 -0
- mrm_trace-0.1.0/mrm_trace.egg-info/top_level.txt +1 -0
- mrm_trace-0.1.0/pyproject.toml +68 -0
- mrm_trace-0.1.0/setup.cfg +4 -0
mrm_trace-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: mrm-trace
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: LLM inference memory trace platform for MRM research
|
|
5
|
+
License: MIT
|
|
6
|
+
Requires-Python: >=3.11
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
Requires-Dist: pydantic>=2.0
|
|
9
|
+
Requires-Dist: pyyaml>=6.0
|
|
10
|
+
Requires-Dist: typer>=0.12
|
|
11
|
+
Requires-Dist: rich>=13.0
|
|
12
|
+
Requires-Dist: pandas>=2.0
|
|
13
|
+
Requires-Dist: pyarrow>=15.0
|
|
14
|
+
Requires-Dist: psutil>=5.9
|
|
15
|
+
Requires-Dist: numpy>=1.26
|
|
16
|
+
Provides-Extra: test
|
|
17
|
+
Requires-Dist: pytest>=8.0; extra == "test"
|
|
18
|
+
Requires-Dist: pytest-cov; extra == "test"
|
|
19
|
+
Requires-Dist: pytest-xdist; extra == "test"
|
|
20
|
+
Requires-Dist: pytest-benchmark; extra == "test"
|
|
21
|
+
Requires-Dist: pytest-mock; extra == "test"
|
|
22
|
+
Requires-Dist: hypothesis>=6.0; extra == "test"
|
|
23
|
+
Requires-Dist: freezegun; extra == "test"
|
|
24
|
+
Provides-Extra: plots
|
|
25
|
+
Requires-Dist: matplotlib>=3.8; extra == "plots"
|
|
26
|
+
Requires-Dist: seaborn>=0.13; extra == "plots"
|
|
27
|
+
|
|
28
|
+
# mrm-trace
|
|
29
|
+
|
|
30
|
+
A Python research package for collecting, parsing, labelling, and analysing LLM inference
|
|
31
|
+
memory access traces. Designed as scientific instrumentation for
|
|
32
|
+
**Managed-Retention Memory (MRM)** research - it characterises how model weights, KV cache,
|
|
33
|
+
activations, and runtime allocations are actually accessed during inference.
|
|
34
|
+
|
|
35
|
+
**Primary metrics:** retention duration · write-once ratio · read frequency · working set size
|
|
36
|
+
|
|
37
|
+
---
|
|
38
|
+
|
|
39
|
+
## Requirements
|
|
40
|
+
|
|
41
|
+
| Requirement | Notes |
|
|
42
|
+
|---|---|
|
|
43
|
+
| Linux (WSL2 supported) | `perf mem` requires Linux PMU; WSL2 works |
|
|
44
|
+
| Python ≥ 3.11 | Tested on 3.11 and 3.12 |
|
|
45
|
+
| sudo / CAP_PERFMON | Required for `perf mem` collection |
|
|
46
|
+
|
|
47
|
+
---
|
|
48
|
+
|
|
49
|
+
## Install
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
# Clone and set up a virtual environment
|
|
53
|
+
git clone <repo-url>
|
|
54
|
+
cd mrm-trace
|
|
55
|
+
python -m venv venv
|
|
56
|
+
source venv/bin/activate # Windows WSL: same command
|
|
57
|
+
|
|
58
|
+
# Install package + test dependencies
|
|
59
|
+
pip install -e ".[test]"
|
|
60
|
+
|
|
61
|
+
# Optional: install matplotlib/seaborn for figures
|
|
62
|
+
pip install -e ".[test,plots]"
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## Quick start
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
# Validate a config file
|
|
71
|
+
mrm-trace validate --config config/default_experiment.yaml
|
|
72
|
+
|
|
73
|
+
# Preview what a run would do (dry run)
|
|
74
|
+
mrm-trace plan --config config/default_experiment.yaml
|
|
75
|
+
|
|
76
|
+
# Run a full experiment (requires model files + sudo for perf)
|
|
77
|
+
mrm-trace run --config config/default_experiment.yaml
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
---
|
|
81
|
+
|
|
82
|
+
## Running tests
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
# Every commit - fast, no I/O
|
|
86
|
+
pytest -m unit
|
|
87
|
+
|
|
88
|
+
# Pre-merge - includes integration tests
|
|
89
|
+
pytest -m "unit or integration"
|
|
90
|
+
|
|
91
|
+
# Before dataset release - scientific correctness checks
|
|
92
|
+
pytest -m validity
|
|
93
|
+
|
|
94
|
+
# Property-based invariant tests (Hypothesis)
|
|
95
|
+
pytest tests/property/
|
|
96
|
+
|
|
97
|
+
# Performance benchmarks (excluded from default run)
|
|
98
|
+
pytest -m performance --benchmark-only
|
|
99
|
+
|
|
100
|
+
# Full suite (excludes slow + performance)
|
|
101
|
+
pytest
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
The test suite has three tiers:
|
|
105
|
+
|
|
106
|
+
| Tier | Marker | Purpose |
|
|
107
|
+
|---|---|---|
|
|
108
|
+
| 1 | `unit` | Individual functions behave correctly |
|
|
109
|
+
| 2 | `integration` | Components work together |
|
|
110
|
+
| 3 | `validity` | Measurements are scientifically sound |
|
|
111
|
+
|
|
112
|
+
Tier-3 validity tests are the most important: they verify that known synthetic inputs produce
|
|
113
|
+
known metric outputs (e.g. a 30s weight retention window must yield `retention_p99_s ≈ 30.0`).
|
|
114
|
+
|
|
115
|
+
---
|
|
116
|
+
|
|
117
|
+
## Output layout
|
|
118
|
+
|
|
119
|
+
Each run writes to `results/<model_id>/<run_id>/`:
|
|
120
|
+
|
|
121
|
+
```
|
|
122
|
+
results/llama-7b/run_20240101_120000/
|
|
123
|
+
├── trace.parquet ← labelled memory access trace
|
|
124
|
+
├── region_map.parquet ← one row per region (weight, kv_cache, …)
|
|
125
|
+
├── kv_block_lifecycle.parquet ← per-block write / read / eviction timestamps
|
|
126
|
+
├── metrics.csv ← per-region-type summary (human-readable)
|
|
127
|
+
├── metadata.json ← hardware, software, observer effect, run validity
|
|
128
|
+
├── manifest.json ← SHA-256 checksums for all files
|
|
129
|
+
└── raw/
|
|
130
|
+
├── perf.data
|
|
131
|
+
├── perf_script.txt
|
|
132
|
+
└── memray.bin
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
## Run validity classification
|
|
138
|
+
|
|
139
|
+
Every run is automatically classified based on observer overhead:
|
|
140
|
+
|
|
141
|
+
| Class | Criteria |
|
|
142
|
+
|---|---|
|
|
143
|
+
| `clean` | observer CPU < 10 %, observer mem < 5 % of target RSS, no throttle, baseline CPU < 15 % |
|
|
144
|
+
| `marginal` | observer CPU < 20 %, observer mem < 15 % of target RSS, ≤ 2 throttle events |
|
|
145
|
+
| `contaminated` | anything worse than marginal |
|
|
146
|
+
|
|
147
|
+
Contaminated runs are archived but excluded from aggregated metrics and paper figures.
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## Architecture
|
|
152
|
+
|
|
153
|
+
```
|
|
154
|
+
mrm_trace/
|
|
155
|
+
├── cli.py CLI (typer)
|
|
156
|
+
├── api.py Python API (Experiment class)
|
|
157
|
+
├── schema_version.py Schema version registry and compatibility checking
|
|
158
|
+
├── engines/ llama.cpp / vLLM wrappers
|
|
159
|
+
├── collector/ perf mem / memray / process_monitor
|
|
160
|
+
├── parser/ perf script + memray parsers → trace.parquet
|
|
161
|
+
├── labeller/ symbol + address-range region classification
|
|
162
|
+
├── analyser/ retention / write-once / read-freq / working-set / IAI / suitability
|
|
163
|
+
├── telemetry/ baseline capture / thermal / observer effect / validity classifier
|
|
164
|
+
├── reporter/ CSV + Parquet export / figures / manifest / RunExporter
|
|
165
|
+
└── utils/ logging / IDs / file helpers
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
Key design decisions:
|
|
169
|
+
- **Streaming parser** - generators throughout; never loads full trace into RAM (ADR-2)
|
|
170
|
+
- **Phase-aware tracing** - `weight_load` / `generation` / `teardown` phases distinguish weight from KV (ADR-3)
|
|
171
|
+
- **Observer effect as mandatory output** - every run records overhead and validity class (ADR-4)
|
|
172
|
+
- **Parquet + zstd** - column-oriented, ~3× better compression than gzip (ADR-8)
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
## MRM suitability labels
|
|
177
|
+
|
|
178
|
+
| Label | Criteria |
|
|
179
|
+
|---|---|
|
|
180
|
+
| `high_mrm` | write-once ratio ≥ 0.8 **and** retention p99 ≥ 10 s |
|
|
181
|
+
| `medium_mrm` | write-once ratio ≥ 0.5 **and** retention p50 ≥ 1 s |
|
|
182
|
+
| `low_mrm` | everything else |
|
|
183
|
+
|
|
184
|
+
In practice: model weights → `high_mrm`, short-lived KV blocks → `low_mrm`.
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## Schema versioning
|
|
189
|
+
|
|
190
|
+
All output files carry a `mrm_trace_schema_version` in their Parquet metadata.
|
|
191
|
+
The version registry is in `mrm_trace/schema_version.py`. Readers validate
|
|
192
|
+
major-version compatibility on load; a major bump is a breaking change.
|
|
193
|
+
|
|
194
|
+
```python
|
|
195
|
+
from mrm_trace.schema_version import check_parquet_schema
|
|
196
|
+
check_parquet_schema("results/.../trace.parquet", "trace") # raises on incompatibility
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
---
|
|
200
|
+
|
|
201
|
+
## Python API
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
from mrm_trace.labeller import TraceLabeller
|
|
205
|
+
from mrm_trace.analyser import compute_all
|
|
206
|
+
from mrm_trace.reporter import RunExporter
|
|
207
|
+
|
|
208
|
+
# Label a stream of raw trace rows
|
|
209
|
+
labeller = TraceLabeller()
|
|
210
|
+
labelled = list(labeller.label(raw_rows))
|
|
211
|
+
region_map = labeller.region_map() # call after consuming label()
|
|
212
|
+
kv_lifecycle = labeller.kv_lifecycle()
|
|
213
|
+
|
|
214
|
+
# Analyse
|
|
215
|
+
import pandas as pd
|
|
216
|
+
trace = pd.DataFrame(labelled)
|
|
217
|
+
results = compute_all(trace)
|
|
218
|
+
# results keys: retention_per_region, retention_summary, write_once,
|
|
219
|
+
# read_freq, working_set_per_region, working_set_summary,
|
|
220
|
+
# locality_per_region, locality_summary, iai, suitability
|
|
221
|
+
|
|
222
|
+
# Export a publication-ready run directory
|
|
223
|
+
exporter = RunExporter("results/llama-7b/run_001")
|
|
224
|
+
exporter.export(trace, region_map, kv_lifecycle, results,
|
|
225
|
+
metadata={"run_id": "run_001"}, run_id="run_001")
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
---
|
|
229
|
+
|
|
230
|
+
## Collector hierarchy
|
|
231
|
+
|
|
232
|
+
1. `perf mem` - primary; requires Linux PMU + root/sudo; WSL2 supported
|
|
233
|
+
2. `memray` - fallback; Python-level allocations; no root needed
|
|
234
|
+
3. `process_monitor` - always runs in parallel as coarse baseline (psutil)
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
# mrm-trace
|
|
2
|
+
|
|
3
|
+
A Python research package for collecting, parsing, labelling, and analysing LLM inference
|
|
4
|
+
memory access traces. Designed as scientific instrumentation for
|
|
5
|
+
**Managed-Retention Memory (MRM)** research - it characterises how model weights, KV cache,
|
|
6
|
+
activations, and runtime allocations are actually accessed during inference.
|
|
7
|
+
|
|
8
|
+
**Primary metrics:** retention duration · write-once ratio · read frequency · working set size
|
|
9
|
+
|
|
10
|
+
---
|
|
11
|
+
|
|
12
|
+
## Requirements
|
|
13
|
+
|
|
14
|
+
| Requirement | Notes |
|
|
15
|
+
|---|---|
|
|
16
|
+
| Linux (WSL2 supported) | `perf mem` requires Linux PMU; WSL2 works |
|
|
17
|
+
| Python ≥ 3.11 | Tested on 3.11 and 3.12 |
|
|
18
|
+
| sudo / CAP_PERFMON | Required for `perf mem` collection |
|
|
19
|
+
|
|
20
|
+
---
|
|
21
|
+
|
|
22
|
+
## Install
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
# Clone and set up a virtual environment
|
|
26
|
+
git clone <repo-url>
|
|
27
|
+
cd mrm-trace
|
|
28
|
+
python -m venv venv
|
|
29
|
+
source venv/bin/activate # Windows WSL: same command
|
|
30
|
+
|
|
31
|
+
# Install package + test dependencies
|
|
32
|
+
pip install -e ".[test]"
|
|
33
|
+
|
|
34
|
+
# Optional: install matplotlib/seaborn for figures
|
|
35
|
+
pip install -e ".[test,plots]"
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## Quick start
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
# Validate a config file
|
|
44
|
+
mrm-trace validate --config config/default_experiment.yaml
|
|
45
|
+
|
|
46
|
+
# Preview what a run would do (dry run)
|
|
47
|
+
mrm-trace plan --config config/default_experiment.yaml
|
|
48
|
+
|
|
49
|
+
# Run a full experiment (requires model files + sudo for perf)
|
|
50
|
+
mrm-trace run --config config/default_experiment.yaml
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
## Running tests
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
# Every commit - fast, no I/O
|
|
59
|
+
pytest -m unit
|
|
60
|
+
|
|
61
|
+
# Pre-merge - includes integration tests
|
|
62
|
+
pytest -m "unit or integration"
|
|
63
|
+
|
|
64
|
+
# Before dataset release - scientific correctness checks
|
|
65
|
+
pytest -m validity
|
|
66
|
+
|
|
67
|
+
# Property-based invariant tests (Hypothesis)
|
|
68
|
+
pytest tests/property/
|
|
69
|
+
|
|
70
|
+
# Performance benchmarks (excluded from default run)
|
|
71
|
+
pytest -m performance --benchmark-only
|
|
72
|
+
|
|
73
|
+
# Full suite (excludes slow + performance)
|
|
74
|
+
pytest
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
The test suite has three tiers:
|
|
78
|
+
|
|
79
|
+
| Tier | Marker | Purpose |
|
|
80
|
+
|---|---|---|
|
|
81
|
+
| 1 | `unit` | Individual functions behave correctly |
|
|
82
|
+
| 2 | `integration` | Components work together |
|
|
83
|
+
| 3 | `validity` | Measurements are scientifically sound |
|
|
84
|
+
|
|
85
|
+
Tier-3 validity tests are the most important: they verify that known synthetic inputs produce
|
|
86
|
+
known metric outputs (e.g. a 30s weight retention window must yield `retention_p99_s ≈ 30.0`).
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## Output layout
|
|
91
|
+
|
|
92
|
+
Each run writes to `results/<model_id>/<run_id>/`:
|
|
93
|
+
|
|
94
|
+
```
|
|
95
|
+
results/llama-7b/run_20240101_120000/
|
|
96
|
+
├── trace.parquet ← labelled memory access trace
|
|
97
|
+
├── region_map.parquet ← one row per region (weight, kv_cache, …)
|
|
98
|
+
├── kv_block_lifecycle.parquet ← per-block write / read / eviction timestamps
|
|
99
|
+
├── metrics.csv ← per-region-type summary (human-readable)
|
|
100
|
+
├── metadata.json ← hardware, software, observer effect, run validity
|
|
101
|
+
├── manifest.json ← SHA-256 checksums for all files
|
|
102
|
+
└── raw/
|
|
103
|
+
├── perf.data
|
|
104
|
+
├── perf_script.txt
|
|
105
|
+
└── memray.bin
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
## Run validity classification
|
|
111
|
+
|
|
112
|
+
Every run is automatically classified based on observer overhead:
|
|
113
|
+
|
|
114
|
+
| Class | Criteria |
|
|
115
|
+
|---|---|
|
|
116
|
+
| `clean` | observer CPU < 10 %, observer mem < 5 % of target RSS, no throttle, baseline CPU < 15 % |
|
|
117
|
+
| `marginal` | observer CPU < 20 %, observer mem < 15 % of target RSS, ≤ 2 throttle events |
|
|
118
|
+
| `contaminated` | anything worse than marginal |
|
|
119
|
+
|
|
120
|
+
Contaminated runs are archived but excluded from aggregated metrics and paper figures.
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## Architecture
|
|
125
|
+
|
|
126
|
+
```
|
|
127
|
+
mrm_trace/
|
|
128
|
+
├── cli.py CLI (typer)
|
|
129
|
+
├── api.py Python API (Experiment class)
|
|
130
|
+
├── schema_version.py Schema version registry and compatibility checking
|
|
131
|
+
├── engines/ llama.cpp / vLLM wrappers
|
|
132
|
+
├── collector/ perf mem / memray / process_monitor
|
|
133
|
+
├── parser/ perf script + memray parsers → trace.parquet
|
|
134
|
+
├── labeller/ symbol + address-range region classification
|
|
135
|
+
├── analyser/ retention / write-once / read-freq / working-set / IAI / suitability
|
|
136
|
+
├── telemetry/ baseline capture / thermal / observer effect / validity classifier
|
|
137
|
+
├── reporter/ CSV + Parquet export / figures / manifest / RunExporter
|
|
138
|
+
└── utils/ logging / IDs / file helpers
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
Key design decisions:
|
|
142
|
+
- **Streaming parser** - generators throughout; never loads full trace into RAM (ADR-2)
|
|
143
|
+
- **Phase-aware tracing** - `weight_load` / `generation` / `teardown` phases distinguish weight from KV (ADR-3)
|
|
144
|
+
- **Observer effect as mandatory output** - every run records overhead and validity class (ADR-4)
|
|
145
|
+
- **Parquet + zstd** - column-oriented, ~3× better compression than gzip (ADR-8)
|
|
146
|
+
|
|
147
|
+
---
|
|
148
|
+
|
|
149
|
+
## MRM suitability labels
|
|
150
|
+
|
|
151
|
+
| Label | Criteria |
|
|
152
|
+
|---|---|
|
|
153
|
+
| `high_mrm` | write-once ratio ≥ 0.8 **and** retention p99 ≥ 10 s |
|
|
154
|
+
| `medium_mrm` | write-once ratio ≥ 0.5 **and** retention p50 ≥ 1 s |
|
|
155
|
+
| `low_mrm` | everything else |
|
|
156
|
+
|
|
157
|
+
In practice: model weights → `high_mrm`, short-lived KV blocks → `low_mrm`.
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
## Schema versioning
|
|
162
|
+
|
|
163
|
+
All output files carry a `mrm_trace_schema_version` in their Parquet metadata.
|
|
164
|
+
The version registry is in `mrm_trace/schema_version.py`. Readers validate
|
|
165
|
+
major-version compatibility on load; a major bump is a breaking change.
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
from mrm_trace.schema_version import check_parquet_schema
|
|
169
|
+
check_parquet_schema("results/.../trace.parquet", "trace") # raises on incompatibility
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## Python API
|
|
175
|
+
|
|
176
|
+
```python
|
|
177
|
+
from mrm_trace.labeller import TraceLabeller
|
|
178
|
+
from mrm_trace.analyser import compute_all
|
|
179
|
+
from mrm_trace.reporter import RunExporter
|
|
180
|
+
|
|
181
|
+
# Label a stream of raw trace rows
|
|
182
|
+
labeller = TraceLabeller()
|
|
183
|
+
labelled = list(labeller.label(raw_rows))
|
|
184
|
+
region_map = labeller.region_map() # call after consuming label()
|
|
185
|
+
kv_lifecycle = labeller.kv_lifecycle()
|
|
186
|
+
|
|
187
|
+
# Analyse
|
|
188
|
+
import pandas as pd
|
|
189
|
+
trace = pd.DataFrame(labelled)
|
|
190
|
+
results = compute_all(trace)
|
|
191
|
+
# results keys: retention_per_region, retention_summary, write_once,
|
|
192
|
+
# read_freq, working_set_per_region, working_set_summary,
|
|
193
|
+
# locality_per_region, locality_summary, iai, suitability
|
|
194
|
+
|
|
195
|
+
# Export a publication-ready run directory
|
|
196
|
+
exporter = RunExporter("results/llama-7b/run_001")
|
|
197
|
+
exporter.export(trace, region_map, kv_lifecycle, results,
|
|
198
|
+
metadata={"run_id": "run_001"}, run_id="run_001")
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
---
|
|
202
|
+
|
|
203
|
+
## Collector hierarchy
|
|
204
|
+
|
|
205
|
+
1. `perf mem` - primary; requires Linux PMU + root/sudo; WSL2 supported
|
|
206
|
+
2. `memray` - fallback; Python-level allocations; no root needed
|
|
207
|
+
3. `process_monitor` - always runs in parallel as coarse baseline (psutil)
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Analysis subsystem for mrm-trace — Phase 6."""
|
|
2
|
+
|
|
3
|
+
from mrm_trace.analyser.iai import compute_iai
|
|
4
|
+
from mrm_trace.analyser.locality import compute_locality
|
|
5
|
+
from mrm_trace.analyser.read_freq import compute_read_freq
|
|
6
|
+
from mrm_trace.analyser.retention import compute_retention
|
|
7
|
+
from mrm_trace.analyser.suitability import classify_suitability
|
|
8
|
+
from mrm_trace.analyser.working_set import compute_working_set
|
|
9
|
+
from mrm_trace.analyser.write_once import compute_write_once
|
|
10
|
+
|
|
11
|
+
__all__ = [
|
|
12
|
+
"compute_retention",
|
|
13
|
+
"compute_write_once",
|
|
14
|
+
"compute_read_freq",
|
|
15
|
+
"compute_working_set",
|
|
16
|
+
"compute_locality",
|
|
17
|
+
"compute_iai",
|
|
18
|
+
"classify_suitability",
|
|
19
|
+
"compute_all",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def compute_all(trace) -> dict:
|
|
24
|
+
"""
|
|
25
|
+
Run all analysis modules on a trace DataFrame.
|
|
26
|
+
|
|
27
|
+
Returns a dict with keys:
|
|
28
|
+
retention_per_region, retention_summary,
|
|
29
|
+
write_once, read_freq,
|
|
30
|
+
working_set_per_region, working_set_summary,
|
|
31
|
+
locality_per_region, locality_summary,
|
|
32
|
+
iai, suitability
|
|
33
|
+
"""
|
|
34
|
+
ret_per_region, ret_summary = compute_retention(trace)
|
|
35
|
+
wo = compute_write_once(trace)
|
|
36
|
+
rf = compute_read_freq(trace)
|
|
37
|
+
ws_per_region, ws_summary = compute_working_set(trace)
|
|
38
|
+
loc_per_region, loc_summary = compute_locality(trace)
|
|
39
|
+
iai = compute_iai(trace)
|
|
40
|
+
suit = classify_suitability(ret_summary, wo, rf)
|
|
41
|
+
|
|
42
|
+
return {
|
|
43
|
+
"retention_per_region": ret_per_region,
|
|
44
|
+
"retention_summary": ret_summary,
|
|
45
|
+
"write_once": wo,
|
|
46
|
+
"read_freq": rf,
|
|
47
|
+
"working_set_per_region": ws_per_region,
|
|
48
|
+
"working_set_summary": ws_summary,
|
|
49
|
+
"locality_per_region": loc_per_region,
|
|
50
|
+
"locality_summary": loc_summary,
|
|
51
|
+
"iai": iai,
|
|
52
|
+
"suitability": suit,
|
|
53
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Inter-access interval (IAI) analysis — time between consecutive accesses.
|
|
3
|
+
|
|
4
|
+
Groups by exact address (not address_page) to correctly handle sub-page KV
|
|
5
|
+
blocks at 256-byte stride where multiple blocks share a 4K page.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import numpy as np
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def compute_iai(trace: pd.DataFrame) -> pd.DataFrame:
|
|
13
|
+
"""
|
|
14
|
+
Compute inter-access interval distribution per region_type.
|
|
15
|
+
|
|
16
|
+
IAI = time gap between consecutive accesses to the *same exact address*,
|
|
17
|
+
sorted by timestamp_ns within each address group.
|
|
18
|
+
|
|
19
|
+
Returns
|
|
20
|
+
-------
|
|
21
|
+
DataFrame
|
|
22
|
+
Columns: region_type, n_intervals, iai_p50_ns, iai_p90_ns,
|
|
23
|
+
iai_p99_ns, iai_mean_ns
|
|
24
|
+
One row per region_type (regions with < 2 accesses to any address
|
|
25
|
+
contribute 0 intervals and are excluded from the summary if they
|
|
26
|
+
have no intervals at all).
|
|
27
|
+
"""
|
|
28
|
+
trace_sorted = trace.sort_values(["address", "timestamp_ns"])
|
|
29
|
+
all_intervals = []
|
|
30
|
+
|
|
31
|
+
for (address, region_type), grp in trace_sorted.groupby(
|
|
32
|
+
["address", "region_type"], sort=False
|
|
33
|
+
):
|
|
34
|
+
ts = grp["timestamp_ns"].to_numpy(dtype=np.int64)
|
|
35
|
+
if len(ts) < 2:
|
|
36
|
+
continue
|
|
37
|
+
intervals = np.diff(ts)
|
|
38
|
+
intervals = intervals[intervals > 0] # ignore same-timestamp duplicates
|
|
39
|
+
if len(intervals) == 0:
|
|
40
|
+
continue
|
|
41
|
+
all_intervals.append(
|
|
42
|
+
pd.DataFrame({"region_type": region_type, "iai_ns": intervals})
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
if not all_intervals:
|
|
46
|
+
return pd.DataFrame(
|
|
47
|
+
columns=["region_type", "n_intervals", "iai_p50_ns",
|
|
48
|
+
"iai_p90_ns", "iai_p99_ns", "iai_mean_ns"]
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
combined = pd.concat(all_intervals, ignore_index=True)
|
|
52
|
+
|
|
53
|
+
rows = []
|
|
54
|
+
for rtype, grp in combined.groupby("region_type"):
|
|
55
|
+
s = grp["iai_ns"]
|
|
56
|
+
rows.append(
|
|
57
|
+
{
|
|
58
|
+
"region_type": rtype,
|
|
59
|
+
"n_intervals": len(s),
|
|
60
|
+
"iai_p50_ns": int(s.quantile(0.50)),
|
|
61
|
+
"iai_p90_ns": int(s.quantile(0.90)),
|
|
62
|
+
"iai_p99_ns": int(s.quantile(0.99)),
|
|
63
|
+
"iai_mean_ns": float(s.mean()),
|
|
64
|
+
}
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
return pd.DataFrame(rows)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Access locality analysis — stride distribution and same-page access fraction.
|
|
3
|
+
|
|
4
|
+
Locality measures spatial regularity: small strides and high same-page fraction
|
|
5
|
+
indicate cache-friendly, predictable access patterns.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Tuple
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def compute_locality(trace: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
|
|
15
|
+
"""
|
|
16
|
+
Compute locality statistics per region.
|
|
17
|
+
|
|
18
|
+
Stride = |address[i+1] - address[i]| within consecutive accesses to same region.
|
|
19
|
+
Same-page fraction = fraction of consecutive accesses that stay on the same page.
|
|
20
|
+
|
|
21
|
+
Returns
|
|
22
|
+
-------
|
|
23
|
+
per_region : DataFrame
|
|
24
|
+
Columns: region_id, region_type, n_accesses, mean_stride_bytes,
|
|
25
|
+
median_stride_bytes, same_page_fraction
|
|
26
|
+
type_summary : DataFrame
|
|
27
|
+
Columns: region_type, mean_stride_bytes, median_stride_bytes,
|
|
28
|
+
same_page_fraction
|
|
29
|
+
"""
|
|
30
|
+
rows = []
|
|
31
|
+
trace_sorted = trace.sort_values(["region_id", "timestamp_ns"])
|
|
32
|
+
|
|
33
|
+
for region_id, grp in trace_sorted.groupby("region_id"):
|
|
34
|
+
region_type = grp["region_type"].iloc[0]
|
|
35
|
+
n = len(grp)
|
|
36
|
+
|
|
37
|
+
if n < 2:
|
|
38
|
+
rows.append(
|
|
39
|
+
{
|
|
40
|
+
"region_id": region_id,
|
|
41
|
+
"region_type": region_type,
|
|
42
|
+
"n_accesses": n,
|
|
43
|
+
"mean_stride_bytes": 0.0,
|
|
44
|
+
"median_stride_bytes": 0.0,
|
|
45
|
+
"same_page_fraction": 0.0,
|
|
46
|
+
}
|
|
47
|
+
)
|
|
48
|
+
continue
|
|
49
|
+
|
|
50
|
+
addrs = grp["address"].to_numpy()
|
|
51
|
+
pages = grp["address_page"].to_numpy()
|
|
52
|
+
strides = np.abs(np.diff(addrs.astype(np.int64)))
|
|
53
|
+
same_page = np.sum(pages[1:] == pages[:-1])
|
|
54
|
+
|
|
55
|
+
rows.append(
|
|
56
|
+
{
|
|
57
|
+
"region_id": region_id,
|
|
58
|
+
"region_type": region_type,
|
|
59
|
+
"n_accesses": n,
|
|
60
|
+
"mean_stride_bytes": float(strides.mean()),
|
|
61
|
+
"median_stride_bytes": float(np.median(strides)),
|
|
62
|
+
"same_page_fraction": float(same_page / len(strides)),
|
|
63
|
+
}
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
per_region = pd.DataFrame(rows)
|
|
67
|
+
if per_region.empty:
|
|
68
|
+
return per_region, pd.DataFrame(
|
|
69
|
+
columns=["region_type", "mean_stride_bytes", "median_stride_bytes",
|
|
70
|
+
"same_page_fraction"]
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
type_summary = (
|
|
74
|
+
per_region.groupby("region_type")
|
|
75
|
+
.agg(
|
|
76
|
+
mean_stride_bytes=("mean_stride_bytes", "mean"),
|
|
77
|
+
median_stride_bytes=("median_stride_bytes", "mean"),
|
|
78
|
+
same_page_fraction=("same_page_fraction", "mean"),
|
|
79
|
+
)
|
|
80
|
+
.reset_index()
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
return per_region, type_summary
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Read frequency analysis — total accesses, read fraction, reads-per-write.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def compute_read_freq(trace: pd.DataFrame) -> pd.DataFrame:
|
|
9
|
+
"""
|
|
10
|
+
Compute read frequency statistics per region.
|
|
11
|
+
|
|
12
|
+
Returns
|
|
13
|
+
-------
|
|
14
|
+
DataFrame
|
|
15
|
+
Columns: region_id, region_type, total_reads, total_writes,
|
|
16
|
+
total_accesses, read_fraction, reads_per_write
|
|
17
|
+
"""
|
|
18
|
+
region_types = trace.groupby("region_id")["region_type"].first()
|
|
19
|
+
|
|
20
|
+
counts = trace.groupby(["region_id", "op_type"]).size().unstack(fill_value=0)
|
|
21
|
+
|
|
22
|
+
# Ensure both columns exist even if all ops are one type
|
|
23
|
+
for col in ("load", "store"):
|
|
24
|
+
if col not in counts.columns:
|
|
25
|
+
counts[col] = 0
|
|
26
|
+
|
|
27
|
+
counts = counts.rename(columns={"load": "total_reads", "store": "total_writes"})
|
|
28
|
+
counts["total_accesses"] = counts["total_reads"] + counts["total_writes"]
|
|
29
|
+
counts["read_fraction"] = (counts["total_reads"] / counts["total_accesses"]).fillna(0.0)
|
|
30
|
+
counts["reads_per_write"] = (
|
|
31
|
+
counts["total_reads"] / counts["total_writes"].replace(0, float("nan"))
|
|
32
|
+
).fillna(0.0)
|
|
33
|
+
|
|
34
|
+
result = counts.reset_index()
|
|
35
|
+
result["region_type"] = result["region_id"].map(region_types)
|
|
36
|
+
|
|
37
|
+
return result[
|
|
38
|
+
["region_id", "region_type", "total_reads", "total_writes",
|
|
39
|
+
"total_accesses", "read_fraction", "reads_per_write"]
|
|
40
|
+
]
|