midden 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- midden-0.1.0/PKG-INFO +72 -0
- midden-0.1.0/README.md +55 -0
- midden-0.1.0/pyproject.toml +41 -0
- midden-0.1.0/src/midden/__init__.py +1 -0
- midden-0.1.0/src/midden/dump/__init__.py +1 -0
- midden-0.1.0/src/midden/dump/dump_heap.py +210 -0
- midden-0.1.0/src/midden/dump/inject.py +411 -0
- midden-0.1.0/src/midden/static/style.css +14 -0
- midden-0.1.0/src/midden/templates/components.html +35 -0
- midden-0.1.0/src/midden/templates/explore.html +25 -0
- midden-0.1.0/src/midden/templates/index.html +42 -0
- midden-0.1.0/src/midden/templates/object.html +52 -0
- midden-0.1.0/src/midden/templates/path.html +46 -0
- midden-0.1.0/src/midden/templates/type.html +27 -0
- midden-0.1.0/src/midden/wsgi.py +216 -0
midden-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: midden
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A simple heap analysis and visualization tool.
|
|
5
|
+
License-Expression: MIT
|
|
6
|
+
Requires-Dist: flask>=3.1.3 ; extra == 'ui'
|
|
7
|
+
Requires-Dist: psutil>=7.2.2 ; extra == 'ui'
|
|
8
|
+
Requires-Dist: midden-analysis==0.1.0 ; extra == 'ui'
|
|
9
|
+
Requires-Dist: cheroot>=11.1.2 ; extra == 'ui'
|
|
10
|
+
Maintainer: James Pickering
|
|
11
|
+
Maintainer-email: James Pickering <james_pic@hotmail.com>
|
|
12
|
+
Requires-Python: >=3.10
|
|
13
|
+
Project-URL: Repository, https://github.com/jamespic/midden
|
|
14
|
+
Project-URL: Issues, https://github.com/jamespic/midden/issues
|
|
15
|
+
Provides-Extra: ui
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
|
|
18
|
+
Midden
|
|
19
|
+
======
|
|
20
|
+
|
|
21
|
+
Midden is a tool for dumping and analysing heaps from Python programs.
|
|
22
|
+
|
|
23
|
+
It takes a "dump first, ask questions later" approach, making it easy to grab a heap dump from a running
|
|
24
|
+
application, then analyse it offline later in its UI, potentially on a different machine entirely.
|
|
25
|
+
|
|
26
|
+
Installing
|
|
27
|
+
----------
|
|
28
|
+
|
|
29
|
+
If you just want to grab a heap dump, you can install everything you need with:
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
pip install midden
|
|
33
|
+
# Or if you're using uv
|
|
34
|
+
uv add midden
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
If you want to analyse the data, you'll need the extra UI dependencies, which you can install with
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
pip install midden[ui]
|
|
41
|
+
# Or if you're using uv
|
|
42
|
+
uv add midden[ui]
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
Grabbing a Heap Dump
|
|
46
|
+
--------------------
|
|
47
|
+
|
|
48
|
+
Grabbing a heap dump can be as simple as:
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
# Assuming pid we want to grab heap from is pid 12345
|
|
52
|
+
midden-inject 12345 --output-file /tmp/dump.jsonl
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
On Python 3.14 and newer, this will use `sys.remote_exec`, which means no extra dependencies.
|
|
56
|
+
You will need appropriate permissions [as documented here](https://docs.python.org/3/howto/remote_debugging.html#permission-requirements).
|
|
57
|
+
Roughly speaking, you either need to be root/administrator, or be running on Linux with ptrace protection disabled.
|
|
58
|
+
|
|
59
|
+
On Python 3.10 to 3.13, injection is done with gdb, which means gdb needs to be installed.
|
|
60
|
+
Gdb-based injection is only tested on Linux.
|
|
61
|
+
|
|
62
|
+
Analysing a Heap Dump
|
|
63
|
+
---------------------
|
|
64
|
+
|
|
65
|
+
You can run the analysis UI with
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
midden-ui
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
This will start a web application, and pop up a web browser pointing at the analysis application. Upload a heap dump
|
|
72
|
+
generated with `midden-inject` to get started.
|
midden-0.1.0/README.md
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
Midden
|
|
2
|
+
======
|
|
3
|
+
|
|
4
|
+
Midden is a tool for dumping and analysing heaps from Python programs.
|
|
5
|
+
|
|
6
|
+
It takes a "dump first, ask questions later" approach, making it easy to grab a heap dump from a running
|
|
7
|
+
application, then analyse it offline later in its UI, potentially on a different machine entirely.
|
|
8
|
+
|
|
9
|
+
Installing
|
|
10
|
+
----------
|
|
11
|
+
|
|
12
|
+
If you just want to grab a heap dump, you can install everything you need with:
|
|
13
|
+
|
|
14
|
+
```
|
|
15
|
+
pip install midden
|
|
16
|
+
# Or if you're using uv
|
|
17
|
+
uv add midden
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
If you want to analyse the data, you'll need the extra UI dependencies, which you can install with
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
pip install midden[ui]
|
|
24
|
+
# Or if you're using uv
|
|
25
|
+
uv add midden[ui]
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
Grabbing a Heap Dump
|
|
29
|
+
--------------------
|
|
30
|
+
|
|
31
|
+
Grabbing a heap dump can be as simple as:
|
|
32
|
+
|
|
33
|
+
```
|
|
34
|
+
# Assuming pid we want to grab heap from is pid 12345
|
|
35
|
+
midden-inject 12345 --output-file /tmp/dump.jsonl
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
On Python 3.14 and newer, this will use `sys.remote_exec`, which means no extra dependencies.
|
|
39
|
+
You will need appropriate permissions [as documented here](https://docs.python.org/3/howto/remote_debugging.html#permission-requirements).
|
|
40
|
+
Roughly speaking, you either need to be root/administrator, or be running on Linux with ptrace protection disabled.
|
|
41
|
+
|
|
42
|
+
On Python 3.10 to 3.13, injection is done with gdb, which means gdb needs to be installed.
|
|
43
|
+
Gdb-based injection is only tested on Linux.
|
|
44
|
+
|
|
45
|
+
Analysing a Heap Dump
|
|
46
|
+
---------------------
|
|
47
|
+
|
|
48
|
+
You can run the analysis UI with
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
midden-ui
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
This will start a web application, and pop up a web browser pointing at the analysis application. Upload a heap dump
|
|
55
|
+
generated with `midden-inject` to get started.
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "midden"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "A simple heap analysis and visualization tool."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
dependencies = []
|
|
8
|
+
license = "MIT"
|
|
9
|
+
maintainers = [
|
|
10
|
+
{ name = "James Pickering", email = "james_pic@hotmail.com" }
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
[project.urls]
|
|
15
|
+
Repository = "https://github.com/jamespic/midden"
|
|
16
|
+
Issues = "https://github.com/jamespic/midden/issues"
|
|
17
|
+
|
|
18
|
+
[project.optional-dependencies]
|
|
19
|
+
ui = [
|
|
20
|
+
"flask>=3.1.3",
|
|
21
|
+
"psutil>=7.2.2",
|
|
22
|
+
"midden-analysis==0.1.0",
|
|
23
|
+
"cheroot>=11.1.2",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.scripts]
|
|
27
|
+
midden-inject = "midden.dump.inject:main"
|
|
28
|
+
midden-ui = "midden.wsgi:main"
|
|
29
|
+
|
|
30
|
+
[build-system]
|
|
31
|
+
requires = ["uv_build>=0.9.22,<0.10.0"]
|
|
32
|
+
build-backend = "uv_build"
|
|
33
|
+
|
|
34
|
+
[dependency-groups]
|
|
35
|
+
dev = ["docker>=7.1.0", "pytest>=9.0.3", "ruff>=0.15.7", "ty>=0.0.24"]
|
|
36
|
+
|
|
37
|
+
[tool.pytest.ini_options]
|
|
38
|
+
markers = [
|
|
39
|
+
"linux: tests that require Linux",
|
|
40
|
+
"min_python(python_ver): tests that require a minimum Python version",
|
|
41
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Web UI and heap-dump tooling for exploring Python process memory."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Helpers for capturing heap dumps from running Python processes."""
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
"""Script to be injected with either Pyrasite or sys.remote_exec to dump the heap of a running Python process to a file.
|
|
2
|
+
|
|
3
|
+
The output is a JSONL file where each line is a JSON object representing an object in the heap,
|
|
4
|
+
with its id, type, referers, references, and optionally its value (for simple types)."""
|
|
5
|
+
|
|
6
|
+
from types import (
|
|
7
|
+
ModuleType,
|
|
8
|
+
FunctionType,
|
|
9
|
+
BuiltinFunctionType,
|
|
10
|
+
MethodType,
|
|
11
|
+
WrapperDescriptorType,
|
|
12
|
+
MethodWrapperType,
|
|
13
|
+
MethodDescriptorType,
|
|
14
|
+
ClassMethodDescriptorType,
|
|
15
|
+
GetSetDescriptorType,
|
|
16
|
+
MemberDescriptorType,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
import gc
|
|
20
|
+
import json
|
|
21
|
+
import os
|
|
22
|
+
import sys
|
|
23
|
+
|
|
24
|
+
# Max length for string representations of values
|
|
25
|
+
_max_value_len = 1000
|
|
26
|
+
|
|
27
|
+
DEFAULT_DUMP_FILE_NAME = "/tmp/dump.jsonl"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _dump_heap():
|
|
31
|
+
"""Write a JSONL heap snapshot for the current process to the fixed dump path."""
|
|
32
|
+
# Get all objects tracked by gc
|
|
33
|
+
all_objects = gc.get_objects()
|
|
34
|
+
print(f"Found {len(all_objects)} objects from gc.get_objects()", file=sys.stderr)
|
|
35
|
+
extra_objects = []
|
|
36
|
+
object_ids_tracked = set(id(obj) for obj in all_objects)
|
|
37
|
+
|
|
38
|
+
# Collect ids of our own data structures so we can exclude them
|
|
39
|
+
exclude_ids = set(
|
|
40
|
+
[id(all_objects), id(extra_objects), id(object_ids_tracked), id(_dump_heap)]
|
|
41
|
+
)
|
|
42
|
+
exclude_ids.add(
|
|
43
|
+
id(exclude_ids)
|
|
44
|
+
) # Don't forget to exclude the set of excluded ids itself!
|
|
45
|
+
|
|
46
|
+
def _maybe_add_ref(ref_obj, refs):
|
|
47
|
+
ref_id = id(ref_obj)
|
|
48
|
+
if ref_id not in exclude_ids:
|
|
49
|
+
refs.append(ref_id)
|
|
50
|
+
if ref_id not in object_ids_tracked:
|
|
51
|
+
extra_objects.append(ref_obj)
|
|
52
|
+
object_ids_tracked.add(ref_id)
|
|
53
|
+
|
|
54
|
+
exclude_ids.add(id(_maybe_add_ref))
|
|
55
|
+
|
|
56
|
+
def _get_all_references(obj):
|
|
57
|
+
"""Get references from an object, including non-gc-tracked immutables."""
|
|
58
|
+
refs = []
|
|
59
|
+
exclude_ids.add(id(refs))
|
|
60
|
+
|
|
61
|
+
obj_type = type(obj)
|
|
62
|
+
if obj_type is dict:
|
|
63
|
+
for k, v in obj.items():
|
|
64
|
+
_maybe_add_ref(k, refs)
|
|
65
|
+
_maybe_add_ref(v, refs)
|
|
66
|
+
elif obj_type in (list, tuple, frozenset, set):
|
|
67
|
+
for item in obj:
|
|
68
|
+
_maybe_add_ref(item, refs)
|
|
69
|
+
else:
|
|
70
|
+
# Fall back to gc.get_referents for other types,
|
|
71
|
+
# which catches __dict__, slots, etc.
|
|
72
|
+
gc_refs = gc.get_referents(obj)
|
|
73
|
+
exclude_ids.add(id(gc_refs))
|
|
74
|
+
for r in gc_refs:
|
|
75
|
+
_maybe_add_ref(r, refs)
|
|
76
|
+
|
|
77
|
+
return refs
|
|
78
|
+
|
|
79
|
+
exclude_ids.add(id(_get_all_references))
|
|
80
|
+
|
|
81
|
+
try:
|
|
82
|
+
with open(f"{DEFAULT_DUMP_FILE_NAME}.partial", "w") as f:
|
|
83
|
+
exclude_ids.add(id(f))
|
|
84
|
+
|
|
85
|
+
def dump_object(obj):
|
|
86
|
+
"""Serialize one object unless it belongs to the dumper's bookkeeping."""
|
|
87
|
+
obj_id = id(obj)
|
|
88
|
+
|
|
89
|
+
# Skip our own bookkeeping objects
|
|
90
|
+
if obj_id in exclude_ids:
|
|
91
|
+
return
|
|
92
|
+
|
|
93
|
+
# Get references (including non-gc-tracked children)
|
|
94
|
+
references = _get_all_references(obj)
|
|
95
|
+
|
|
96
|
+
type_name = _get_type_name(obj)
|
|
97
|
+
|
|
98
|
+
record = {
|
|
99
|
+
"id": obj_id,
|
|
100
|
+
"type": type_name,
|
|
101
|
+
"references": references,
|
|
102
|
+
"size": sys.getsizeof(
|
|
103
|
+
obj, 0
|
|
104
|
+
), # Get size of object, excluding referents
|
|
105
|
+
# Don't get referrers - it's too slow. We'll index it offline later.
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
# Only include value for whitelisted types
|
|
109
|
+
if extractor := _value_extractors.get(type(obj)):
|
|
110
|
+
try:
|
|
111
|
+
record["value"] = extractor(obj)
|
|
112
|
+
except Exception as e:
|
|
113
|
+
record["value"] = f"<error extracting value: {e}>"
|
|
114
|
+
|
|
115
|
+
exclude_ids.add(id(record))
|
|
116
|
+
exclude_ids.add(id(record.get("references")))
|
|
117
|
+
|
|
118
|
+
line = json.dumps(record, default=str)
|
|
119
|
+
exclude_ids.add(id(line))
|
|
120
|
+
f.write(line)
|
|
121
|
+
f.write("\n")
|
|
122
|
+
|
|
123
|
+
exclude_ids.add(id(dump_object))
|
|
124
|
+
|
|
125
|
+
for obj in all_objects:
|
|
126
|
+
dump_object(obj)
|
|
127
|
+
for obj in extra_objects:
|
|
128
|
+
dump_object(obj)
|
|
129
|
+
|
|
130
|
+
os.rename(f"{DEFAULT_DUMP_FILE_NAME}.partial", DEFAULT_DUMP_FILE_NAME)
|
|
131
|
+
|
|
132
|
+
except Exception as e:
|
|
133
|
+
sys.stderr.write(f"dump_heap error: {e}\n")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _get_qualname(obj):
|
|
137
|
+
"""Get a qualified name for an object, if possible."""
|
|
138
|
+
if qualname := getattr(obj, "__qualname__", None):
|
|
139
|
+
return qualname
|
|
140
|
+
elif name := getattr(obj, "__name__", None):
|
|
141
|
+
return name
|
|
142
|
+
else:
|
|
143
|
+
return repr(obj)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _get_prefix(obj):
|
|
147
|
+
"""Get the module name or class name for an object, if possible."""
|
|
148
|
+
if module := getattr(obj, "__module__", None):
|
|
149
|
+
return f"{module}."
|
|
150
|
+
elif obj_class := getattr(obj, "__objclass__", None):
|
|
151
|
+
return f"{_get_qualname(obj_class)}."
|
|
152
|
+
else:
|
|
153
|
+
return ""
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def _name_extractor(obj):
|
|
157
|
+
"""Return a readable name for callables, descriptors, and types."""
|
|
158
|
+
return _get_prefix(obj) + _get_qualname(obj)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _get_type_name(obj):
|
|
162
|
+
"""Get a friendly type name for an object."""
|
|
163
|
+
t = type(obj)
|
|
164
|
+
return _name_extractor(t)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _string_extractor(obj):
|
|
168
|
+
"""Keep long strings readable by truncating them in the dump."""
|
|
169
|
+
if len(obj) > _max_value_len:
|
|
170
|
+
return obj[:_max_value_len] + "...<truncated>"
|
|
171
|
+
return obj
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _bytes_extractor(obj):
|
|
175
|
+
"""Render bytes as repr output, truncating long values."""
|
|
176
|
+
if len(obj) > _max_value_len:
|
|
177
|
+
return repr(obj[:_max_value_len]) + "...<truncated>"
|
|
178
|
+
return repr(obj)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _module_extractor(obj):
|
|
182
|
+
"""Render modules by name rather than by their default repr."""
|
|
183
|
+
return f"module {obj.__name__}"
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
_value_extractors = {
|
|
187
|
+
str: _string_extractor,
|
|
188
|
+
bytes: _bytes_extractor,
|
|
189
|
+
int: str,
|
|
190
|
+
float: str,
|
|
191
|
+
complex: str,
|
|
192
|
+
bool: str,
|
|
193
|
+
type(None): lambda x: "None",
|
|
194
|
+
ModuleType: _module_extractor,
|
|
195
|
+
FunctionType: _name_extractor,
|
|
196
|
+
BuiltinFunctionType: _name_extractor,
|
|
197
|
+
MethodType: _name_extractor,
|
|
198
|
+
staticmethod: _name_extractor,
|
|
199
|
+
classmethod: _name_extractor,
|
|
200
|
+
WrapperDescriptorType: _name_extractor,
|
|
201
|
+
MethodWrapperType: _name_extractor,
|
|
202
|
+
MethodDescriptorType: _name_extractor,
|
|
203
|
+
ClassMethodDescriptorType: _name_extractor,
|
|
204
|
+
GetSetDescriptorType: _name_extractor,
|
|
205
|
+
MemberDescriptorType: _name_extractor,
|
|
206
|
+
type: _name_extractor,
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
# _dump_heap() # Replaced by the injector so the dump only runs inside the target process.
|
|
@@ -0,0 +1,411 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from contextlib import contextmanager
|
|
3
|
+
import io
|
|
4
|
+
import shutil
|
|
5
|
+
import re
|
|
6
|
+
import tarfile
|
|
7
|
+
import time
|
|
8
|
+
import tempfile
|
|
9
|
+
from subprocess import Popen, PIPE
|
|
10
|
+
import os
|
|
11
|
+
import pathlib
|
|
12
|
+
import sys
|
|
13
|
+
import subprocess
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
from sys import remote_exec # ty: ignore[unresolved-import]
|
|
17
|
+
except ImportError:
|
|
18
|
+
remote_exec = None
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
from os import setns # ty: ignore[unresolved-import]
|
|
22
|
+
except ImportError:
|
|
23
|
+
setns = None
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
import psutil
|
|
27
|
+
|
|
28
|
+
_psutil_available = True
|
|
29
|
+
except ImportError:
|
|
30
|
+
_psutil_available = False
|
|
31
|
+
|
|
32
|
+
GIL_ENABLED = True
|
|
33
|
+
try:
|
|
34
|
+
from sys import _is_gil_enabled # ty: ignore[unresolved-import]
|
|
35
|
+
|
|
36
|
+
GIL_ENABLED = _is_gil_enabled()
|
|
37
|
+
except ImportError:
|
|
38
|
+
pass
|
|
39
|
+
|
|
40
|
+
DEFAULT_DUMP_FILE = "/tmp/dump.jsonl"
|
|
41
|
+
|
|
42
|
+
_FILES_NEEDED_FOR_INJECTION = ["dump_heap.py", "inject.py"]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _build_tarball_of_dumping_code():
|
|
46
|
+
"""Bundle the dump scripts so they can be copied into another namespace."""
|
|
47
|
+
file_obj = io.BytesIO()
|
|
48
|
+
with tarfile.open(fileobj=file_obj, mode="w:gz") as tar:
|
|
49
|
+
for filename in _FILES_NEEDED_FOR_INJECTION:
|
|
50
|
+
file_path = pathlib.Path(__file__).parent / filename
|
|
51
|
+
tar.add(file_path, arcname=filename)
|
|
52
|
+
return file_obj.getvalue()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# We do this at import time so that if we fork into another namespace, it's still available
|
|
56
|
+
_TARBALL = _build_tarball_of_dumping_code()
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def dump_heap_from_pid(
|
|
60
|
+
pid,
|
|
61
|
+
output_file=DEFAULT_DUMP_FILE,
|
|
62
|
+
can_use_namespace_injection=True,
|
|
63
|
+
can_use_alternate_python_interpreter=True,
|
|
64
|
+
):
|
|
65
|
+
"""Dump the heap of a running Python process given its PID."""
|
|
66
|
+
_dump_heap_from_pid_possibly_in_namespace(
|
|
67
|
+
pid,
|
|
68
|
+
output_file,
|
|
69
|
+
can_use_namespace_injection,
|
|
70
|
+
can_use_alternate_python_interpreter,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
_DUMP_SCRIPT = (pathlib.Path(__file__).parent / "dump_heap.py").read_text()
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _build_dump_heap_code(output_file):
|
|
78
|
+
"""Rewrite the injected script so it writes to the requested output path."""
|
|
79
|
+
return _DUMP_SCRIPT.replace(f'"{DEFAULT_DUMP_FILE}"', repr(output_file)).replace(
|
|
80
|
+
"# _dump_heap()", "_dump_heap()"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _dump_heap_from_pid_possibly_using_an_alternate_python_interpreter(
|
|
85
|
+
pid,
|
|
86
|
+
output_file=DEFAULT_DUMP_FILE,
|
|
87
|
+
can_use_alternate_python_interpreter=True,
|
|
88
|
+
):
|
|
89
|
+
"""Pick a compatible Python interpreter before injecting, if needed."""
|
|
90
|
+
if can_use_alternate_python_interpreter and (
|
|
91
|
+
alternate_python := _should_use_alternate_python_interpreter(pid)
|
|
92
|
+
):
|
|
93
|
+
print(
|
|
94
|
+
f"Using alternate Python interpreter {alternate_python} for injection",
|
|
95
|
+
file=sys.stderr,
|
|
96
|
+
)
|
|
97
|
+
_dump_heap_from_pid_using_alternate_python_interpreter(
|
|
98
|
+
pid, alternate_python, output_file
|
|
99
|
+
)
|
|
100
|
+
else:
|
|
101
|
+
_dump_heap_from_pid(pid, output_file)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _dump_heap_from_pid_using_alternate_python_interpreter(
|
|
105
|
+
pid, alternate_python, output_file=DEFAULT_DUMP_FILE
|
|
106
|
+
):
|
|
107
|
+
"""Run the injector with a different Python executable inside a temp copy of the code."""
|
|
108
|
+
# The target mount namespace may not be able to see our original source tree.
|
|
109
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
110
|
+
tarball_file_obj = io.BytesIO(_TARBALL)
|
|
111
|
+
with tarfile.open(fileobj=tarball_file_obj, mode="r:gz") as tar:
|
|
112
|
+
tar.extractall(path=tmpdir)
|
|
113
|
+
|
|
114
|
+
cmd = [
|
|
115
|
+
alternate_python,
|
|
116
|
+
tmpdir + "/inject.py",
|
|
117
|
+
str(pid),
|
|
118
|
+
"--output-file",
|
|
119
|
+
output_file,
|
|
120
|
+
"--no-alternate-python-interpreter", # Don't recurse into trying to find another alternate Python interpreter
|
|
121
|
+
"--no-namespace-injection", # Don't use namespace injection - this function should only be called after we've sorted out namespaces
|
|
122
|
+
]
|
|
123
|
+
print(f"Running injection command: {' '.join(cmd)}", file=sys.stderr)
|
|
124
|
+
subprocess.run(cmd, check=True)
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def _should_use_alternate_python_interpreter(pid) -> str | None:
|
|
128
|
+
"""Return a better-matched Python executable for injection, if one is needed."""
|
|
129
|
+
try:
|
|
130
|
+
exe, maps = _get_exe_and_maps(pid)
|
|
131
|
+
except Exception as e:
|
|
132
|
+
print(
|
|
133
|
+
f"Could not determine target process executable and maps, so proceeding with this interpreter: {e!r}",
|
|
134
|
+
file=sys.stderr,
|
|
135
|
+
)
|
|
136
|
+
return None
|
|
137
|
+
if not _can_this_python_inject(exe):
|
|
138
|
+
if os.path.basename(exe).startswith("python"):
|
|
139
|
+
print(
|
|
140
|
+
f"Using {exe} as alternate Python interpreter based on process exe",
|
|
141
|
+
file=sys.stderr,
|
|
142
|
+
)
|
|
143
|
+
return exe
|
|
144
|
+
else:
|
|
145
|
+
# Check maps for a python library to identify Python version
|
|
146
|
+
for m in maps:
|
|
147
|
+
if match := re.match(
|
|
148
|
+
r".*lib(python\d\.\d.*)\.so", m
|
|
149
|
+
): # Look for a python library in the maps
|
|
150
|
+
python_exe_wanted = match.group(1)
|
|
151
|
+
if python_exe_wanted == _this_effective_executable_name():
|
|
152
|
+
print(
|
|
153
|
+
f"Mapped library {m} suggests target process is running same Python version, so no alternate interpreter needed",
|
|
154
|
+
file=sys.stderr,
|
|
155
|
+
)
|
|
156
|
+
return None
|
|
157
|
+
# Check if there's an executable with the same name on the path
|
|
158
|
+
if python_exe := shutil.which(python_exe_wanted):
|
|
159
|
+
print(
|
|
160
|
+
f"Using {python_exe} as alternate Python interpreter based on mapped library {m}",
|
|
161
|
+
file=sys.stderr,
|
|
162
|
+
)
|
|
163
|
+
return python_exe
|
|
164
|
+
print(
|
|
165
|
+
"Couldn't find an alternate Python interpreter to use for injection,"
|
|
166
|
+
" but target process is running a different Python version, so injection may fail",
|
|
167
|
+
file=sys.stderr,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _get_exe_and_maps(pid):
|
|
172
|
+
"""Inspect the target process executable and mapped libraries."""
|
|
173
|
+
if _psutil_available:
|
|
174
|
+
psutil_process = psutil.Process(pid)
|
|
175
|
+
exe = psutil_process.exe()
|
|
176
|
+
try:
|
|
177
|
+
maps = [map.path for map in psutil_process.memory_maps(grouped=False)]
|
|
178
|
+
except Exception:
|
|
179
|
+
maps = []
|
|
180
|
+
return exe, maps
|
|
181
|
+
elif sys.platform == "linux":
|
|
182
|
+
# Linux-only procfs fallback
|
|
183
|
+
exe = os.readlink(f"/proc/{pid}/exe")
|
|
184
|
+
maps = []
|
|
185
|
+
try:
|
|
186
|
+
with open(f"/proc/{pid}/maps") as f:
|
|
187
|
+
for line in f:
|
|
188
|
+
parts = line.split()
|
|
189
|
+
if len(parts) >= 6:
|
|
190
|
+
path = parts[-1]
|
|
191
|
+
maps.append(path)
|
|
192
|
+
except Exception:
|
|
193
|
+
pass
|
|
194
|
+
return exe, maps
|
|
195
|
+
else:
|
|
196
|
+
raise Exception(
|
|
197
|
+
"Can't determine process executable on this platform without psutil"
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _can_this_python_inject(exe):
|
|
202
|
+
"""Return whether the current interpreter likely matches the target runtime."""
|
|
203
|
+
if exe == sys.executable:
|
|
204
|
+
return True
|
|
205
|
+
if _get_effective_executable_name(exe) == _this_effective_executable_name():
|
|
206
|
+
return True
|
|
207
|
+
|
|
208
|
+
return False
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _get_effective_executable_name(exe):
|
|
212
|
+
"""Return the versioned Python executable name for a given path, if it looks like Python."""
|
|
213
|
+
basename = os.path.basename(exe)
|
|
214
|
+
match = re.match(r"python\d\.\d.+t?", basename)
|
|
215
|
+
if match:
|
|
216
|
+
return match.group(0)
|
|
217
|
+
if os.path.islink(exe):
|
|
218
|
+
# If it's a symlink, check if the target looks like Python
|
|
219
|
+
target = os.readlink(exe)
|
|
220
|
+
return _get_effective_executable_name(target)
|
|
221
|
+
return None
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def _this_effective_executable_name():
|
|
225
|
+
"""Return the versioned Python executable name for the current runtime."""
|
|
226
|
+
|
|
227
|
+
name = f"python{sys.version_info.major}.{sys.version_info.minor}"
|
|
228
|
+
if GIL_ENABLED:
|
|
229
|
+
return name
|
|
230
|
+
else:
|
|
231
|
+
# For GIL-less Python, the executable is suffixed with -gil0
|
|
232
|
+
return name + "t"
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _dump_heap_from_pid_possibly_in_namespace(
|
|
236
|
+
pid,
|
|
237
|
+
output_file=DEFAULT_DUMP_FILE,
|
|
238
|
+
can_use_namespace_injection=True,
|
|
239
|
+
can_use_alternate_python_interpreter=True,
|
|
240
|
+
):
|
|
241
|
+
"""Decide whether the dump has to run from inside the target mount namespace."""
|
|
242
|
+
if can_use_namespace_injection and _should_use_namespace(pid):
|
|
243
|
+
print(
|
|
244
|
+
"Target process is in a different mount namespace, using namespace injection method",
|
|
245
|
+
file=sys.stderr,
|
|
246
|
+
)
|
|
247
|
+
_dump_heap_from_pid_in_namespace(
|
|
248
|
+
pid, output_file, can_use_alternate_python_interpreter
|
|
249
|
+
)
|
|
250
|
+
else:
|
|
251
|
+
_dump_heap_from_pid_possibly_using_an_alternate_python_interpreter(
|
|
252
|
+
pid, output_file, can_use_alternate_python_interpreter
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def _dump_heap_from_pid_in_namespace(
|
|
257
|
+
pid, output_file=DEFAULT_DUMP_FILE, can_use_alternate_python_interpreter=True
|
|
258
|
+
):
|
|
259
|
+
"""Copy the dump file out of the target mount namespace after injection."""
|
|
260
|
+
dump_loc_in_namespace = f"/tmp/dump_{pid}.jsonl"
|
|
261
|
+
out = os.open(output_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
|
|
262
|
+
|
|
263
|
+
with _in_namespace(pid) as inner_pid:
|
|
264
|
+
if inner_pid is not None:
|
|
265
|
+
_dump_heap_from_pid_possibly_using_an_alternate_python_interpreter(
|
|
266
|
+
inner_pid, dump_loc_in_namespace, can_use_alternate_python_interpreter
|
|
267
|
+
)
|
|
268
|
+
while not os.path.exists(dump_loc_in_namespace):
|
|
269
|
+
time.sleep(0.1)
|
|
270
|
+
with open(dump_loc_in_namespace) as f, os.fdopen(out, "w") as out_f:
|
|
271
|
+
for line in f:
|
|
272
|
+
out_f.write(line)
|
|
273
|
+
os.remove(dump_loc_in_namespace)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
@contextmanager
|
|
277
|
+
def _in_namespace(pid):
|
|
278
|
+
"""Fork into the target mount and PID namespaces for the duration of the block."""
|
|
279
|
+
if setns is None:
|
|
280
|
+
raise NotImplementedError(
|
|
281
|
+
"Namespace injection method is not available on this platform"
|
|
282
|
+
)
|
|
283
|
+
inner_pid = _identify_pid_within_namespace(pid)
|
|
284
|
+
if (forked_pid := os.fork()) == 0:
|
|
285
|
+
setns(os.open(f"/proc/{pid}/ns/pid", 0), 0)
|
|
286
|
+
setns(os.open(f"/proc/{pid}/ns/mnt", 0), 0)
|
|
287
|
+
# Must fork again after setns to actually be in the new PID namespace
|
|
288
|
+
if (inner_forked_pid := os.fork()) == 0:
|
|
289
|
+
yield inner_pid
|
|
290
|
+
os._exit(0)
|
|
291
|
+
else:
|
|
292
|
+
yield None
|
|
293
|
+
pid, status = os.waitpid(inner_forked_pid, 0)
|
|
294
|
+
|
|
295
|
+
os._exit(status)
|
|
296
|
+
else:
|
|
297
|
+
yield None
|
|
298
|
+
pid, status = os.waitpid(forked_pid, 0)
|
|
299
|
+
if status != 0:
|
|
300
|
+
print(f"Child process failed with status {status}", file=sys.stderr)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def _identify_pid_within_namespace(pid):
|
|
304
|
+
"""Translate a host PID into the PID seen inside the target namespace."""
|
|
305
|
+
with open(f"/proc/{pid}/status") as f:
|
|
306
|
+
for line in f:
|
|
307
|
+
if line.startswith("NSpid:"):
|
|
308
|
+
return int(line.split()[-1])
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def _dump_heap_from_pid(
|
|
312
|
+
pid, output_file=DEFAULT_DUMP_FILE, inactivity_timeout=datetime.timedelta(seconds=5)
|
|
313
|
+
):
|
|
314
|
+
"""Build the payload script and inject it into the target process.
|
|
315
|
+
|
|
316
|
+
Use remote_exec when available, else fall back to gdb."""
|
|
317
|
+
code = _build_dump_heap_code(output_file)
|
|
318
|
+
|
|
319
|
+
script_file = tempfile.NamedTemporaryFile(suffix=".py", mode="w", delete=False)
|
|
320
|
+
script_file.write(code)
|
|
321
|
+
script_file.close()
|
|
322
|
+
if remote_exec is not None:
|
|
323
|
+
print("Using remote_exec to inject code", file=sys.stderr)
|
|
324
|
+
remote_exec(pid, script_file.name)
|
|
325
|
+
last_progress = time.time()
|
|
326
|
+
while not os.path.exists(output_file):
|
|
327
|
+
partial_file = output_file + ".partial"
|
|
328
|
+
try:
|
|
329
|
+
last_progress = os.stat(partial_file).st_mtime
|
|
330
|
+
except FileNotFoundError:
|
|
331
|
+
pass
|
|
332
|
+
if time.time() - last_progress > inactivity_timeout.total_seconds():
|
|
333
|
+
raise PermissionError(
|
|
334
|
+
"Timed out waiting for dump file to be created, injection may have failed"
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
else:
|
|
338
|
+
print("remote_exec not available, falling back to gdb method", file=sys.stderr)
|
|
339
|
+
gdb_cmds = [
|
|
340
|
+
"(char *) PyGILState_Ensure()",
|
|
341
|
+
'(void) PyRun_SimpleString("'
|
|
342
|
+
rf'exec(open(\"{script_file.name}\").read())")',
|
|
343
|
+
"(void) PyGILState_Release($1)",
|
|
344
|
+
]
|
|
345
|
+
p = Popen(
|
|
346
|
+
[
|
|
347
|
+
"gdb",
|
|
348
|
+
"-p",
|
|
349
|
+
str(pid),
|
|
350
|
+
"--batch",
|
|
351
|
+
*(f"--eval-command=call {cmd}" for cmd in gdb_cmds),
|
|
352
|
+
],
|
|
353
|
+
stdout=PIPE,
|
|
354
|
+
stderr=PIPE,
|
|
355
|
+
text=True,
|
|
356
|
+
)
|
|
357
|
+
out, err = p.communicate()
|
|
358
|
+
print(out, file=sys.stderr)
|
|
359
|
+
print(err, file=sys.stderr)
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _should_use_namespace(pid):
|
|
363
|
+
"""Return whether the target process lives in a different mount namespace."""
|
|
364
|
+
try:
|
|
365
|
+
import os
|
|
366
|
+
|
|
367
|
+
own_ns = os.readlink("/proc/self/ns/mnt")
|
|
368
|
+
target_ns = os.readlink(f"/proc/{pid}/ns/mnt")
|
|
369
|
+
return own_ns != target_ns
|
|
370
|
+
except Exception:
|
|
371
|
+
# This can fail for a number of reasons, but they're all cases where we can't join the processes namespace
|
|
372
|
+
return False
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
def main():
|
|
376
|
+
"""CLI entry point for dumping a live Python process by PID."""
|
|
377
|
+
import argparse
|
|
378
|
+
|
|
379
|
+
parser = argparse.ArgumentParser(
|
|
380
|
+
description="Dump the heap of a running Python process."
|
|
381
|
+
)
|
|
382
|
+
parser.add_argument("pid", type=int, help="PID of the target Python process")
|
|
383
|
+
parser.add_argument(
|
|
384
|
+
"--output-file",
|
|
385
|
+
"-o",
|
|
386
|
+
default=DEFAULT_DUMP_FILE,
|
|
387
|
+
help=f"Path to output file (default: {DEFAULT_DUMP_FILE})",
|
|
388
|
+
)
|
|
389
|
+
parser.add_argument(
|
|
390
|
+
"--no-namespace-injection",
|
|
391
|
+
action="store_false",
|
|
392
|
+
dest="can_use_namespace_injection",
|
|
393
|
+
help="Don't attempt to use namespace injection method.",
|
|
394
|
+
)
|
|
395
|
+
parser.add_argument(
|
|
396
|
+
"--no-alternate-python-interpreter",
|
|
397
|
+
action="store_false",
|
|
398
|
+
dest="can_use_alternate_python_interpreter",
|
|
399
|
+
help="Don't attempt to use an alternate Python interpreter for injection, even if the target process is running a different Python version.",
|
|
400
|
+
)
|
|
401
|
+
args = parser.parse_args()
|
|
402
|
+
dump_heap_from_pid(
|
|
403
|
+
args.pid,
|
|
404
|
+
args.output_file,
|
|
405
|
+
args.can_use_namespace_injection,
|
|
406
|
+
args.can_use_alternate_python_interpreter,
|
|
407
|
+
)
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
if __name__ == "__main__":
|
|
411
|
+
main()
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
body { font-family: Arial, sans-serif; margin: 2em; }
|
|
2
|
+
table { border-collapse: collapse; width: 50%; }
|
|
3
|
+
th, td { border: 1px solid #ccc; padding: 8px 12px; text-align: left; }
|
|
4
|
+
th { background: #f4f4f4; }
|
|
5
|
+
h1 { margin-bottom: 1em; }
|
|
6
|
+
ul { margin-bottom: 2em; }
|
|
7
|
+
li { margin: 0.5em 0; }
|
|
8
|
+
form.upload-form { border: 1px solid #ccc; padding: 1em; width: 350px; background: #f9f9f9; }
|
|
9
|
+
label { display: block; margin-bottom: 0.5em; }
|
|
10
|
+
input[type="text"], input[type="file"], select { width: 100%; margin-bottom: 1em; }
|
|
11
|
+
input[type="submit"] { padding: 0.5em 1em; }
|
|
12
|
+
dl { margin-bottom: 2em; }
|
|
13
|
+
dt { font-weight: bold; margin-top: 1em; }
|
|
14
|
+
dd { margin-left: 1.5em; margin-bottom: 0.5em; }
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{% macro object_list(objects, dump_name, sortable=False) %}
|
|
2
|
+
<table>
|
|
3
|
+
<tr>
|
|
4
|
+
<th>{% if sortable %}
|
|
5
|
+
<a href="?sort=id">ID</a>
|
|
6
|
+
{% else %}
|
|
7
|
+
ID
|
|
8
|
+
{% endif %}
|
|
9
|
+
</th>
|
|
10
|
+
<th>Type</th>
|
|
11
|
+
<th>
|
|
12
|
+
{% if sortable %}
|
|
13
|
+
<a href="?sort=size">Size</a>
|
|
14
|
+
{% else %}
|
|
15
|
+
Size
|
|
16
|
+
{% endif %}
|
|
17
|
+
</th>
|
|
18
|
+
<th>
|
|
19
|
+
{% if sortable %}
|
|
20
|
+
<a href="?sort=subtree_size">Subtree Size</a>
|
|
21
|
+
{% else %}
|
|
22
|
+
Subtree Size
|
|
23
|
+
{% endif %}
|
|
24
|
+
</th>
|
|
25
|
+
<th>Value</th></tr>
|
|
26
|
+
{% for obj in objects %}
|
|
27
|
+
<tr>
|
|
28
|
+
<td><a href="{{ url_for('explore_object', dump_name=dump_name, obj_id=obj.id) }}">{{ obj.id }}</a></td>
|
|
29
|
+
<td>{{ obj.type }}</td>
|
|
30
|
+
<td>{{ obj.size }} bytes</td>
|
|
31
|
+
<td>{% if obj.subtree_size is not none %}{{ obj.subtree_size }} bytes{% else %}Unknown{% endif %}</td>
|
|
32
|
+
<td>{% if obj.value is not none %}{{ obj.value }}{% endif %}</td></tr>
|
|
33
|
+
{% endfor %}
|
|
34
|
+
</table>
|
|
35
|
+
{% endmacro %}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html>
|
|
3
|
+
<head>
|
|
4
|
+
<title>Heap Dump Overview - {{ dump_name }}</title>
|
|
5
|
+
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
|
|
6
|
+
</head>
|
|
7
|
+
<body>
|
|
8
|
+
<h1>Heap Dump Overview: {{ dump_name }}</h1>
|
|
9
|
+
<p><a href="{{ url_for('index') }}">Back to heap dump list</a></p>
|
|
10
|
+
<table>
|
|
11
|
+
<tr>
|
|
12
|
+
<th><a href="{{ url_for('explore_dump', dump_name=dump_name, sort_by='type') }}">Type</a></th>
|
|
13
|
+
<th><a href="{{ url_for('explore_dump', dump_name=dump_name, sort_by='count') }}">Instance Count</a></th>
|
|
14
|
+
<th><a href="{{ url_for('explore_dump', dump_name=dump_name, sort_by='size') }}">Total Size</a></th>
|
|
15
|
+
</tr>
|
|
16
|
+
{% for type_name, summary in type_summaries %}
|
|
17
|
+
<tr>
|
|
18
|
+
<td><a href="{{ url_for('explore_type', dump_name=dump_name, type_name=type_name) }}">{{ type_name }}</a></td>
|
|
19
|
+
<td>{{ summary.count }}</td>
|
|
20
|
+
<td>{{ summary.total_size }}</td>
|
|
21
|
+
</tr>
|
|
22
|
+
{% endfor %}
|
|
23
|
+
</table>
|
|
24
|
+
</body>
|
|
25
|
+
</html>
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html>
|
|
3
|
+
<head>
|
|
4
|
+
<title>Heap Dump Explorer</title>
|
|
5
|
+
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
|
|
6
|
+
</head>
|
|
7
|
+
<body>
|
|
8
|
+
<h1>Heap Dump Explorer</h1>
|
|
9
|
+
|
|
10
|
+
<h2>Existing Heap Dumps</h2>
|
|
11
|
+
{% if dump_names %}
|
|
12
|
+
<ul>
|
|
13
|
+
{% for name in dump_names %}
|
|
14
|
+
<li>
|
|
15
|
+
<a href="{{ url_for('explore_dump', dump_name=name) }}">{{ name }}</a>
|
|
16
|
+
</li>
|
|
17
|
+
{% endfor %}
|
|
18
|
+
</ul>
|
|
19
|
+
{% else %}
|
|
20
|
+
<p>No heap dumps loaded yet.</p>
|
|
21
|
+
{% endif %}
|
|
22
|
+
|
|
23
|
+
<h2>Upload New Heap Dump</h2>
|
|
24
|
+
<form action="{{ url_for('upload_dump') }}" method="post" enctype="multipart/form-data" class="upload-form">
|
|
25
|
+
<label for="dump_name">Heap Dump Name:</label>
|
|
26
|
+
<input type="text" id="dump_name" name="dump_name">
|
|
27
|
+
<label for="estimator_precision">Subtree Size Estimate Precision</label>
|
|
28
|
+
<select id="estimator_precision" name="estimator_precision" required>
|
|
29
|
+
<option value="no_estimates">No Estimates (fastest, lowest memory)</option>
|
|
30
|
+
<option value="low">Low Precision</option>
|
|
31
|
+
<option value="medium" selected>Medium Precision</option>
|
|
32
|
+
<option value="high">High Precision</option>
|
|
33
|
+
<option value="exact">Exact (slowest, highest memory)</option>
|
|
34
|
+
</select>
|
|
35
|
+
|
|
36
|
+
<label for="dump_file">Heap Dump File (.jsonl):</label>
|
|
37
|
+
<input type="file" id="dump_file" name="dump_file" accept=".jsonl" required>
|
|
38
|
+
|
|
39
|
+
<input type="submit" value="Upload">
|
|
40
|
+
</form>
|
|
41
|
+
</body>
|
|
42
|
+
</html>
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{% import 'components.html' as components %}
|
|
2
|
+
<!DOCTYPE html>
|
|
3
|
+
<html>
|
|
4
|
+
<head>
|
|
5
|
+
<title>{{ obj.type }} instance {{ obj.id }} - {{ dump_name }}</title>
|
|
6
|
+
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
|
|
7
|
+
</head>
|
|
8
|
+
<body>
|
|
9
|
+
<h1>{{ obj.type }} instance {{ obj.id }} in Heap Dump: {{ dump_name }}</h1>
|
|
10
|
+
<p><a href="{{ url_for('explore_dump', dump_name=dump_name) }}">Back to overview</a></p>
|
|
11
|
+
{% if current_from_id == obj.id %}
|
|
12
|
+
<p><strong>This object will be used as the starting point if you start a path search.</strong></p>
|
|
13
|
+
{% else %}
|
|
14
|
+
<form action="{{ url_for('set_path_finding_endpoint', dump_name=dump_name) }}" method="post">
|
|
15
|
+
<input type="hidden" name="from_id" value="{{ obj.id }}">
|
|
16
|
+
<button type="submit">Can you find a path from here to {% if current_to_id %}object {{ current_to_id }}{% else %}...{% endif %}?</button>
|
|
17
|
+
</form>
|
|
18
|
+
{% endif %}
|
|
19
|
+
{% if current_to_id == obj.id %}
|
|
20
|
+
<p><strong>This object will be used as the ending point if you start a path search.</strong></p>
|
|
21
|
+
{% else %}
|
|
22
|
+
<form action="{{ url_for('set_path_finding_endpoint', dump_name=dump_name) }}" method="post">
|
|
23
|
+
<input type="hidden" name="to_id" value="{{ obj.id }}">
|
|
24
|
+
<button type="submit">Can you find a path from {% if current_from_id %}object {{ current_from_id }}{% else %}...{% endif %} to here?</button>
|
|
25
|
+
</form>
|
|
26
|
+
{% endif %}
|
|
27
|
+
<dl>
|
|
28
|
+
<dt>Type:</dt>
|
|
29
|
+
<dd><a href="{{ url_for('explore_type', type_name=obj.type, dump_name=dump_name) }}">{{ obj.type }}</a></dd>
|
|
30
|
+
<dt>ID:</dt>
|
|
31
|
+
<dd>{{ obj.id }}</dd>
|
|
32
|
+
<dt>Size:</dt>
|
|
33
|
+
<dd>{{ obj.size }} bytes</dd>
|
|
34
|
+
{% if obj.subtree_size is not none %}
|
|
35
|
+
<dt>Subtree Size:</dt>
|
|
36
|
+
<dd>{{ obj.subtree_size }} bytes</dd>
|
|
37
|
+
{% endif %}
|
|
38
|
+
{% if obj.value is not none %}
|
|
39
|
+
<dt>Value:</dt>
|
|
40
|
+
<dd>{{ obj.value }}</dd>
|
|
41
|
+
{% endif %}
|
|
42
|
+
</dl>
|
|
43
|
+
{% if obj.referrers %}
|
|
44
|
+
<h2>Referrers</h2>
|
|
45
|
+
{{ components.object_list(obj.referrers, dump_name) }}
|
|
46
|
+
{% endif %}
|
|
47
|
+
{% if obj.references %}
|
|
48
|
+
<h2>References</h2>
|
|
49
|
+
{{ components.object_list(obj.references, dump_name) }}
|
|
50
|
+
{% endif %}
|
|
51
|
+
</body>
|
|
52
|
+
</html>
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{% import 'components.html' as components %}
|
|
2
|
+
<!DOCTYPE html>
|
|
3
|
+
<html>
|
|
4
|
+
<head>
|
|
5
|
+
<title>Path from {{ from_id }} to {{ to_id }} - {{ dump_name }}</title>
|
|
6
|
+
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
|
|
7
|
+
</head>
|
|
8
|
+
<body>
|
|
9
|
+
<h1>Path from {{ from_id }} to {{ to_id }} in Heap Dump: {{ dump_name }}</h1>
|
|
10
|
+
<p><a href="{{ url_for('explore_dump', dump_name=dump_name) }}">Back to overview</a></p>
|
|
11
|
+
{% if path is none %}
|
|
12
|
+
<p><strong>No path found.</strong></p>
|
|
13
|
+
{% else %}
|
|
14
|
+
<table>
|
|
15
|
+
<tr>
|
|
16
|
+
<th>ID</th>
|
|
17
|
+
<th>Type</th>
|
|
18
|
+
<th>Size</th>
|
|
19
|
+
<th>Subtree Size</th>
|
|
20
|
+
<th>Value</th>
|
|
21
|
+
<th>Avoid</th>
|
|
22
|
+
</tr>
|
|
23
|
+
|
|
24
|
+
{% for obj in path %}
|
|
25
|
+
<tr>
|
|
26
|
+
<td><a href="{{ url_for('explore_object', dump_name=dump_name, obj_id=obj.id) }}">{{ obj.id }}</a></td>
|
|
27
|
+
<td>{{ obj.type }}</td>
|
|
28
|
+
<td>{{ obj.size }} bytes</td>
|
|
29
|
+
<td>{% if obj.subtree_size is not none %}{{ obj.subtree_size }} bytes{% else %}Unknown{% endif %}</td>
|
|
30
|
+
<td>{% if obj.value is not none %}{{ obj.value }}{% endif %}</td>
|
|
31
|
+
<td>
|
|
32
|
+
{% if obj.id not in avoid_ids %}
|
|
33
|
+
<a
|
|
34
|
+
href="{{ url_for('find_path', dump_name=dump_name, from_id=from_id, to_id=to_id, avoid_id=avoid_ids + [obj.id]) }}"
|
|
35
|
+
title="Find a new path that avoids this object">
|
|
36
|
+
Avoid
|
|
37
|
+
</a>
|
|
38
|
+
{% endif %}
|
|
39
|
+
</td>
|
|
40
|
+
</tr>
|
|
41
|
+
{% endfor %}
|
|
42
|
+
</table>
|
|
43
|
+
{% endif %}
|
|
44
|
+
|
|
45
|
+
</body>
|
|
46
|
+
</html>
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
{% import 'components.html' as components %}
|
|
2
|
+
<!DOCTYPE html>
|
|
3
|
+
<html>
|
|
4
|
+
<head>
|
|
5
|
+
<title>Instances of type {{ type_name }} - {{ dump_name }}</title>
|
|
6
|
+
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
|
|
7
|
+
</head>
|
|
8
|
+
<body>
|
|
9
|
+
<h1>Instances of type {{ type_name }} in Heap Dump: {{ dump_name }}</h1>
|
|
10
|
+
<p><a href="{{ url_for('explore_dump', dump_name=dump_name) }}">Back to overview</a></p>
|
|
11
|
+
<p>
|
|
12
|
+
<form action="{{ url_for('explore_type', dump_name=dump_name, type_name=type_name) }}" method="get">
|
|
13
|
+
<a title="first page" href="{{ url_for('explore_type', dump_name=dump_name, type_name=type_name, page=1, sort=sort) }}">⏮️</a>
|
|
14
|
+
{% if page > 1 %}
|
|
15
|
+
<a title="previous page" href="{{ url_for('explore_type', dump_name=dump_name, type_name=type_name, page=page-1, sort=sort) }}">◀️</a>
|
|
16
|
+
{% endif %}
|
|
17
|
+
Page <input type="number" name="page" value="{{ page }}" min="1" max="{{ total_pages }}">
|
|
18
|
+
<input type="hidden" name="sort" value="{{ sort }}"> of {{ total_pages }}
|
|
19
|
+
{% if page < total_pages %}
|
|
20
|
+
<a title="next page" href="{{ url_for('explore_type', dump_name=dump_name, type_name=type_name, page=page+1, sort=sort) }}">▶️</a>
|
|
21
|
+
{% endif %}
|
|
22
|
+
<a title="last page" href="{{ url_for('explore_type', dump_name=dump_name, type_name=type_name, page=total_pages, sort=sort) }}">⏭️</a>
|
|
23
|
+
</form>
|
|
24
|
+
</p>
|
|
25
|
+
{{ components.object_list(objects, dump_name, sortable=True) }}
|
|
26
|
+
</body>
|
|
27
|
+
</html>
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""A simple Flask GUI, with templated HTML with very basic styling, that lets the user explore a heap dump created by dump_heap.py."""
|
|
2
|
+
import webbrowser
|
|
3
|
+
|
|
4
|
+
import shutil
|
|
5
|
+
|
|
6
|
+
import argparse
|
|
7
|
+
|
|
8
|
+
from werkzeug.exceptions import NotFound
|
|
9
|
+
from ntpath import basename
|
|
10
|
+
import os
|
|
11
|
+
import pathlib
|
|
12
|
+
|
|
13
|
+
from cheroot.wsgi import Server as WSGIServer
|
|
14
|
+
from flask import Flask, request, redirect, url_for, render_template, session
|
|
15
|
+
from midden_analysis import HeapDumpExplorer, TypeSummary, EstimatorPrecision
|
|
16
|
+
|
|
17
|
+
DUMPS_DIR = os.getenv("DUMPS_DIR", "/tmp/dumps")
|
|
18
|
+
|
|
19
|
+
PRECISION_MAP = {
|
|
20
|
+
"no_estimates": EstimatorPrecision.NoEstimates,
|
|
21
|
+
"low": EstimatorPrecision.Low,
|
|
22
|
+
"medium": EstimatorPrecision.Medium,
|
|
23
|
+
"high": EstimatorPrecision.High,
|
|
24
|
+
"exact": EstimatorPrecision.Exact,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def create_app():
|
|
29
|
+
"""Create the Flask app and preload any dumps already stored on disk."""
|
|
30
|
+
os.makedirs(DUMPS_DIR, exist_ok=True)
|
|
31
|
+
loaded_dumps: dict[str, HeapDumpExplorer] = {
|
|
32
|
+
basename(path)[:-5]: HeapDumpExplorer(f"{DUMPS_DIR}/{path}")
|
|
33
|
+
for path in os.listdir(DUMPS_DIR)
|
|
34
|
+
if path.endswith(".lmdb")
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
def get_dump(dump_name) -> HeapDumpExplorer:
|
|
38
|
+
"""Return a loaded dump or raise a 404 for unknown names."""
|
|
39
|
+
explorer = loaded_dumps.get(dump_name)
|
|
40
|
+
if not explorer:
|
|
41
|
+
raise NotFound(f"Dump '{dump_name}' not found")
|
|
42
|
+
return explorer
|
|
43
|
+
|
|
44
|
+
app = Flask(__name__)
|
|
45
|
+
|
|
46
|
+
@app.route("/")
|
|
47
|
+
def index():
|
|
48
|
+
dump_names = list(loaded_dumps.keys())
|
|
49
|
+
return render_template("index.html", dump_names=dump_names)
|
|
50
|
+
|
|
51
|
+
@app.route("/upload_dump", methods=["POST"])
|
|
52
|
+
def upload_dump():
|
|
53
|
+
"""Import an uploaded JSONL heap dump into a new LMDB-backed explorer."""
|
|
54
|
+
dump_name = request.form.get("dump_name")
|
|
55
|
+
dump_file = request.files["dump_file"]
|
|
56
|
+
if not dump_name:
|
|
57
|
+
if upload_filename := dump_file.filename:
|
|
58
|
+
dump_name = pathlib.Path(upload_filename).stem
|
|
59
|
+
else:
|
|
60
|
+
dump_name = f"heap_dump_{len(loaded_dumps) + 1}"
|
|
61
|
+
if dump_name in loaded_dumps:
|
|
62
|
+
return f"Heap dump with name '{dump_name}' already exists", 409
|
|
63
|
+
if "/" in dump_name or "\\" in dump_name or dump_name.startswith("."):
|
|
64
|
+
return "Invalid heap dump name", 400
|
|
65
|
+
dump_dir = f"{DUMPS_DIR}/{dump_name}.lmdb"
|
|
66
|
+
precision = request.form.get("estimator_precision", "medium")
|
|
67
|
+
estimator_precision = PRECISION_MAP.get(precision, EstimatorPrecision.Medium)
|
|
68
|
+
os.mkdir(dump_dir)
|
|
69
|
+
try:
|
|
70
|
+
explorer = HeapDumpExplorer(f"{DUMPS_DIR}/{dump_name}.lmdb")
|
|
71
|
+
explorer.import_lines(dump_file, estimator_precision)
|
|
72
|
+
except Exception:
|
|
73
|
+
shutil.rmtree(dump_dir)
|
|
74
|
+
raise
|
|
75
|
+
loaded_dumps[dump_name] = explorer
|
|
76
|
+
return redirect(url_for("explore_dump", dump_name=dump_name))
|
|
77
|
+
|
|
78
|
+
@app.route("/explore/<dump_name>")
|
|
79
|
+
def explore_dump(dump_name):
|
|
80
|
+
explorer = get_dump(dump_name)
|
|
81
|
+
# The landing page for a dump is a type summary table.
|
|
82
|
+
type_summaries: list[tuple[str, TypeSummary]] = explorer.get_type_summaries()
|
|
83
|
+
sort_by = request.args.get("sort_by", "count")
|
|
84
|
+
match sort_by:
|
|
85
|
+
case "size":
|
|
86
|
+
type_summaries.sort(key=lambda x: x[1].total_size, reverse=True)
|
|
87
|
+
case "type":
|
|
88
|
+
type_summaries.sort(key=lambda x: x[0])
|
|
89
|
+
case "count" | _:
|
|
90
|
+
type_summaries.sort(key=lambda x: x[1].count, reverse=True)
|
|
91
|
+
return render_template(
|
|
92
|
+
"explore.html", dump_name=dump_name, type_summaries=type_summaries
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
@app.route("/explore/<dump_name>/type/<type_name>")
|
|
96
|
+
def explore_type(dump_name, type_name):
|
|
97
|
+
"""Show one page of objects for a type, with optional size-based sorting."""
|
|
98
|
+
page = request.args.get("page", 1, type=int)
|
|
99
|
+
page_zero_indexed = max(page - 1, 0)
|
|
100
|
+
explorer = get_dump(dump_name)
|
|
101
|
+
sort = request.args.get("sort", "id")
|
|
102
|
+
match sort:
|
|
103
|
+
case "size":
|
|
104
|
+
objects = explorer.get_objects_by_type_ordered_by_size(
|
|
105
|
+
type_name, page=page_zero_indexed, subtree_size=False
|
|
106
|
+
)
|
|
107
|
+
case "subtree_size":
|
|
108
|
+
objects = explorer.get_objects_by_type_ordered_by_size(
|
|
109
|
+
type_name, page=page_zero_indexed, subtree_size=True
|
|
110
|
+
)
|
|
111
|
+
case "id" | _:
|
|
112
|
+
objects = explorer.get_objects_by_type(
|
|
113
|
+
type_name, page=page_zero_indexed
|
|
114
|
+
)
|
|
115
|
+
total_pages = explorer.get_page_count_for_type(type_name)
|
|
116
|
+
return render_template(
|
|
117
|
+
"type.html",
|
|
118
|
+
dump_name=dump_name,
|
|
119
|
+
type_name=type_name,
|
|
120
|
+
objects=objects,
|
|
121
|
+
page=page,
|
|
122
|
+
total_pages=total_pages,
|
|
123
|
+
sort=sort,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
@app.route("/explore/<dump_name>/object/<int:obj_id>")
|
|
127
|
+
def explore_object(dump_name, obj_id):
|
|
128
|
+
"""Show one object together with its references and referrers."""
|
|
129
|
+
explorer = get_dump(dump_name)
|
|
130
|
+
obj = explorer.get_object(obj_id)
|
|
131
|
+
current_from_id = session.get(f"path_finding_from_id:{dump_name}")
|
|
132
|
+
current_to_id = session.get(f"path_finding_to_id:{dump_name}")
|
|
133
|
+
if not obj:
|
|
134
|
+
raise NotFound(f"Object with ID {obj_id} not found in dump '{dump_name}'")
|
|
135
|
+
return render_template(
|
|
136
|
+
"object.html",
|
|
137
|
+
dump_name=dump_name,
|
|
138
|
+
obj=obj,
|
|
139
|
+
current_from_id=current_from_id,
|
|
140
|
+
current_to_id=current_to_id,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
@app.route("/explore/<dump_name>/set_path_finding_endpoint", methods=["POST"])
|
|
144
|
+
def set_path_finding_endpoint(dump_name):
|
|
145
|
+
"""Store or complete the pair of object IDs used for path finding."""
|
|
146
|
+
from_id = request.form.get(
|
|
147
|
+
"from_id", session.get(f"path_finding_from_id:{dump_name}"), type=int
|
|
148
|
+
)
|
|
149
|
+
to_id = request.form.get(
|
|
150
|
+
"to_id", session.get(f"path_finding_to_id:{dump_name}"), type=int
|
|
151
|
+
)
|
|
152
|
+
if from_id is not None and to_id is not None:
|
|
153
|
+
del session[f"path_finding_from_id:{dump_name}"]
|
|
154
|
+
del session[f"path_finding_to_id:{dump_name}"]
|
|
155
|
+
return redirect(
|
|
156
|
+
url_for("find_path", dump_name=dump_name, from_id=from_id, to_id=to_id)
|
|
157
|
+
)
|
|
158
|
+
else:
|
|
159
|
+
session[f"path_finding_from_id:{dump_name}"] = from_id
|
|
160
|
+
session[f"path_finding_to_id:{dump_name}"] = to_id
|
|
161
|
+
|
|
162
|
+
return redirect(
|
|
163
|
+
url_for("explore_object", dump_name=dump_name, obj_id=from_id or to_id)
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
@app.route("/explore/<dump_name>/find_path")
|
|
167
|
+
def find_path(dump_name):
|
|
168
|
+
"""Find a reference path between two objects in the selected dump."""
|
|
169
|
+
explorer = get_dump(dump_name)
|
|
170
|
+
from_id = request.args.get("from_id", type=int)
|
|
171
|
+
to_id = request.args.get("to_id", type=int)
|
|
172
|
+
avoid_ids = set(request.args.getlist("avoid_id", type=int))
|
|
173
|
+
if from_id is None or to_id is None:
|
|
174
|
+
return "Missing from_id or to_id query parameters", 400
|
|
175
|
+
path = explorer.find_path_between_objects(
|
|
176
|
+
from_id, to_id, avoiding_ids=avoid_ids
|
|
177
|
+
)
|
|
178
|
+
return render_template(
|
|
179
|
+
"path.html",
|
|
180
|
+
dump_name=dump_name,
|
|
181
|
+
from_id=from_id,
|
|
182
|
+
to_id=to_id,
|
|
183
|
+
path=path,
|
|
184
|
+
avoid_ids=list(avoid_ids),
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
app.secret_key = os.urandom(
|
|
188
|
+
16
|
|
189
|
+
) # This app keeps state in-process, so a per-process key is fine.
|
|
190
|
+
return app
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def main():
|
|
194
|
+
"""Run the local Midden web server."""
|
|
195
|
+
arg_parser = argparse.ArgumentParser(description="Run the Midden web server")
|
|
196
|
+
arg_parser.add_argument(
|
|
197
|
+
"--host", default="127.0.0.1"
|
|
198
|
+
)
|
|
199
|
+
arg_parser.add_argument(
|
|
200
|
+
"--port", default=5000, type=int
|
|
201
|
+
)
|
|
202
|
+
arg_parser.add_argument(
|
|
203
|
+
"--no-start-web-browser", action="store_false", dest="start_web_browser", help="Don't automatically open the web browser"
|
|
204
|
+
)
|
|
205
|
+
args = arg_parser.parse_args()
|
|
206
|
+
app = create_app()
|
|
207
|
+
url = f"http://{args.host}:{args.port}"
|
|
208
|
+
print(f"Starting Midden web server on {url}")
|
|
209
|
+
server = WSGIServer((args.host, args.port), app)
|
|
210
|
+
if args.start_web_browser:
|
|
211
|
+
webbrowser.open(url)
|
|
212
|
+
server.start()
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
if __name__ == "__main__":
|
|
216
|
+
main()
|