midden 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
midden-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,72 @@
1
+ Metadata-Version: 2.4
2
+ Name: midden
3
+ Version: 0.1.0
4
+ Summary: A simple heap analysis and visualization tool.
5
+ License-Expression: MIT
6
+ Requires-Dist: flask>=3.1.3 ; extra == 'ui'
7
+ Requires-Dist: psutil>=7.2.2 ; extra == 'ui'
8
+ Requires-Dist: midden-analysis==0.1.0 ; extra == 'ui'
9
+ Requires-Dist: cheroot>=11.1.2 ; extra == 'ui'
10
+ Maintainer: James Pickering
11
+ Maintainer-email: James Pickering <james_pic@hotmail.com>
12
+ Requires-Python: >=3.10
13
+ Project-URL: Repository, https://github.com/jamespic/midden
14
+ Project-URL: Issues, https://github.com/jamespic/midden/issues
15
+ Provides-Extra: ui
16
+ Description-Content-Type: text/markdown
17
+
18
+ Midden
19
+ ======
20
+
21
+ Midden is a tool for dumping and analysing heaps from Python programs.
22
+
23
+ It takes a "dump first, ask questions later" approach, making it easy to grab a heap dump from a running
24
+ application, then analyse it offline later in its UI, potentially on a different machine entirely.
25
+
26
+ Installing
27
+ ----------
28
+
29
+ If you just want to grab a heap dump, you can install everything you need with:
30
+
31
+ ```
32
+ pip install midden
33
+ # Or if you're using uv
34
+ uv add midden
35
+ ```
36
+
37
+ If you want to analyse the data, you'll need the extra UI dependencies, which you can install with
38
+
39
+ ```
40
+ pip install midden[ui]
41
+ # Or if you're using uv
42
+ uv add midden[ui]
43
+ ```
44
+
45
+ Grabbing a Heap Dump
46
+ --------------------
47
+
48
+ Grabbing a heap dump can be as simple as:
49
+
50
+ ```
51
+ # Assuming pid we want to grab heap from is pid 12345
52
+ midden-inject 12345 --output-file /tmp/dump.jsonl
53
+ ```
54
+
55
+ On Python 3.14 and newer, this will use `sys.remote_exec`, which means no extra dependencies.
56
+ You will need appropriate permissions [as documented here](https://docs.python.org/3/howto/remote_debugging.html#permission-requirements).
57
+ Roughly speaking, you either need to be root/administrator, or be running on Linux with ptrace protection disabled.
58
+
59
+ On Python 3.10 to 3.13, injection is done with gdb, which means gdb needs to be installed.
60
+ Gdb-based injection is only tested on Linux.
61
+
62
+ Analysing a Heap Dump
63
+ ---------------------
64
+
65
+ You can run the analysis UI with
66
+
67
+ ```
68
+ midden-ui
69
+ ```
70
+
71
+ This will start a web application, and pop up a web browser pointing at the analysis application. Upload a heap dump
72
+ generated with `midden-inject` to get started.
midden-0.1.0/README.md ADDED
@@ -0,0 +1,55 @@
1
+ Midden
2
+ ======
3
+
4
+ Midden is a tool for dumping and analysing heaps from Python programs.
5
+
6
+ It takes a "dump first, ask questions later" approach, making it easy to grab a heap dump from a running
7
+ application, then analyse it offline later in its UI, potentially on a different machine entirely.
8
+
9
+ Installing
10
+ ----------
11
+
12
+ If you just want to grab a heap dump, you can install everything you need with:
13
+
14
+ ```
15
+ pip install midden
16
+ # Or if you're using uv
17
+ uv add midden
18
+ ```
19
+
20
+ If you want to analyse the data, you'll need the extra UI dependencies, which you can install with
21
+
22
+ ```
23
+ pip install midden[ui]
24
+ # Or if you're using uv
25
+ uv add midden[ui]
26
+ ```
27
+
28
+ Grabbing a Heap Dump
29
+ --------------------
30
+
31
+ Grabbing a heap dump can be as simple as:
32
+
33
+ ```
34
+ # Assuming pid we want to grab heap from is pid 12345
35
+ midden-inject 12345 --output-file /tmp/dump.jsonl
36
+ ```
37
+
38
+ On Python 3.14 and newer, this will use `sys.remote_exec`, which means no extra dependencies.
39
+ You will need appropriate permissions [as documented here](https://docs.python.org/3/howto/remote_debugging.html#permission-requirements).
40
+ Roughly speaking, you either need to be root/administrator, or be running on Linux with ptrace protection disabled.
41
+
42
+ On Python 3.10 to 3.13, injection is done with gdb, which means gdb needs to be installed.
43
+ Gdb-based injection is only tested on Linux.
44
+
45
+ Analysing a Heap Dump
46
+ ---------------------
47
+
48
+ You can run the analysis UI with
49
+
50
+ ```
51
+ midden-ui
52
+ ```
53
+
54
+ This will start a web application, and pop up a web browser pointing at the analysis application. Upload a heap dump
55
+ generated with `midden-inject` to get started.
@@ -0,0 +1,41 @@
1
+ [project]
2
+ name = "midden"
3
+ version = "0.1.0"
4
+ description = "A simple heap analysis and visualization tool."
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ dependencies = []
8
+ license = "MIT"
9
+ maintainers = [
10
+ { name = "James Pickering", email = "james_pic@hotmail.com" }
11
+ ]
12
+
13
+
14
+ [project.urls]
15
+ Repository = "https://github.com/jamespic/midden"
16
+ Issues = "https://github.com/jamespic/midden/issues"
17
+
18
+ [project.optional-dependencies]
19
+ ui = [
20
+ "flask>=3.1.3",
21
+ "psutil>=7.2.2",
22
+ "midden-analysis==0.1.0",
23
+ "cheroot>=11.1.2",
24
+ ]
25
+
26
+ [project.scripts]
27
+ midden-inject = "midden.dump.inject:main"
28
+ midden-ui = "midden.wsgi:main"
29
+
30
+ [build-system]
31
+ requires = ["uv_build>=0.9.22,<0.10.0"]
32
+ build-backend = "uv_build"
33
+
34
+ [dependency-groups]
35
+ dev = ["docker>=7.1.0", "pytest>=9.0.3", "ruff>=0.15.7", "ty>=0.0.24"]
36
+
37
+ [tool.pytest.ini_options]
38
+ markers = [
39
+ "linux: tests that require Linux",
40
+ "min_python(python_ver): tests that require a minimum Python version",
41
+ ]
@@ -0,0 +1 @@
1
+ """Web UI and heap-dump tooling for exploring Python process memory."""
@@ -0,0 +1 @@
1
+ """Helpers for capturing heap dumps from running Python processes."""
@@ -0,0 +1,210 @@
1
+ """Script to be injected with either Pyrasite or sys.remote_exec to dump the heap of a running Python process to a file.
2
+
3
+ The output is a JSONL file where each line is a JSON object representing an object in the heap,
4
+ with its id, type, referers, references, and optionally its value (for simple types)."""
5
+
6
+ from types import (
7
+ ModuleType,
8
+ FunctionType,
9
+ BuiltinFunctionType,
10
+ MethodType,
11
+ WrapperDescriptorType,
12
+ MethodWrapperType,
13
+ MethodDescriptorType,
14
+ ClassMethodDescriptorType,
15
+ GetSetDescriptorType,
16
+ MemberDescriptorType,
17
+ )
18
+
19
+ import gc
20
+ import json
21
+ import os
22
+ import sys
23
+
24
+ # Max length for string representations of values
25
+ _max_value_len = 1000
26
+
27
+ DEFAULT_DUMP_FILE_NAME = "/tmp/dump.jsonl"
28
+
29
+
30
+ def _dump_heap():
31
+ """Write a JSONL heap snapshot for the current process to the fixed dump path."""
32
+ # Get all objects tracked by gc
33
+ all_objects = gc.get_objects()
34
+ print(f"Found {len(all_objects)} objects from gc.get_objects()", file=sys.stderr)
35
+ extra_objects = []
36
+ object_ids_tracked = set(id(obj) for obj in all_objects)
37
+
38
+ # Collect ids of our own data structures so we can exclude them
39
+ exclude_ids = set(
40
+ [id(all_objects), id(extra_objects), id(object_ids_tracked), id(_dump_heap)]
41
+ )
42
+ exclude_ids.add(
43
+ id(exclude_ids)
44
+ ) # Don't forget to exclude the set of excluded ids itself!
45
+
46
+ def _maybe_add_ref(ref_obj, refs):
47
+ ref_id = id(ref_obj)
48
+ if ref_id not in exclude_ids:
49
+ refs.append(ref_id)
50
+ if ref_id not in object_ids_tracked:
51
+ extra_objects.append(ref_obj)
52
+ object_ids_tracked.add(ref_id)
53
+
54
+ exclude_ids.add(id(_maybe_add_ref))
55
+
56
+ def _get_all_references(obj):
57
+ """Get references from an object, including non-gc-tracked immutables."""
58
+ refs = []
59
+ exclude_ids.add(id(refs))
60
+
61
+ obj_type = type(obj)
62
+ if obj_type is dict:
63
+ for k, v in obj.items():
64
+ _maybe_add_ref(k, refs)
65
+ _maybe_add_ref(v, refs)
66
+ elif obj_type in (list, tuple, frozenset, set):
67
+ for item in obj:
68
+ _maybe_add_ref(item, refs)
69
+ else:
70
+ # Fall back to gc.get_referents for other types,
71
+ # which catches __dict__, slots, etc.
72
+ gc_refs = gc.get_referents(obj)
73
+ exclude_ids.add(id(gc_refs))
74
+ for r in gc_refs:
75
+ _maybe_add_ref(r, refs)
76
+
77
+ return refs
78
+
79
+ exclude_ids.add(id(_get_all_references))
80
+
81
+ try:
82
+ with open(f"{DEFAULT_DUMP_FILE_NAME}.partial", "w") as f:
83
+ exclude_ids.add(id(f))
84
+
85
+ def dump_object(obj):
86
+ """Serialize one object unless it belongs to the dumper's bookkeeping."""
87
+ obj_id = id(obj)
88
+
89
+ # Skip our own bookkeeping objects
90
+ if obj_id in exclude_ids:
91
+ return
92
+
93
+ # Get references (including non-gc-tracked children)
94
+ references = _get_all_references(obj)
95
+
96
+ type_name = _get_type_name(obj)
97
+
98
+ record = {
99
+ "id": obj_id,
100
+ "type": type_name,
101
+ "references": references,
102
+ "size": sys.getsizeof(
103
+ obj, 0
104
+ ), # Get size of object, excluding referents
105
+ # Don't get referrers - it's too slow. We'll index it offline later.
106
+ }
107
+
108
+ # Only include value for whitelisted types
109
+ if extractor := _value_extractors.get(type(obj)):
110
+ try:
111
+ record["value"] = extractor(obj)
112
+ except Exception as e:
113
+ record["value"] = f"<error extracting value: {e}>"
114
+
115
+ exclude_ids.add(id(record))
116
+ exclude_ids.add(id(record.get("references")))
117
+
118
+ line = json.dumps(record, default=str)
119
+ exclude_ids.add(id(line))
120
+ f.write(line)
121
+ f.write("\n")
122
+
123
+ exclude_ids.add(id(dump_object))
124
+
125
+ for obj in all_objects:
126
+ dump_object(obj)
127
+ for obj in extra_objects:
128
+ dump_object(obj)
129
+
130
+ os.rename(f"{DEFAULT_DUMP_FILE_NAME}.partial", DEFAULT_DUMP_FILE_NAME)
131
+
132
+ except Exception as e:
133
+ sys.stderr.write(f"dump_heap error: {e}\n")
134
+
135
+
136
+ def _get_qualname(obj):
137
+ """Get a qualified name for an object, if possible."""
138
+ if qualname := getattr(obj, "__qualname__", None):
139
+ return qualname
140
+ elif name := getattr(obj, "__name__", None):
141
+ return name
142
+ else:
143
+ return repr(obj)
144
+
145
+
146
+ def _get_prefix(obj):
147
+ """Get the module name or class name for an object, if possible."""
148
+ if module := getattr(obj, "__module__", None):
149
+ return f"{module}."
150
+ elif obj_class := getattr(obj, "__objclass__", None):
151
+ return f"{_get_qualname(obj_class)}."
152
+ else:
153
+ return ""
154
+
155
+
156
+ def _name_extractor(obj):
157
+ """Return a readable name for callables, descriptors, and types."""
158
+ return _get_prefix(obj) + _get_qualname(obj)
159
+
160
+
161
+ def _get_type_name(obj):
162
+ """Get a friendly type name for an object."""
163
+ t = type(obj)
164
+ return _name_extractor(t)
165
+
166
+
167
+ def _string_extractor(obj):
168
+ """Keep long strings readable by truncating them in the dump."""
169
+ if len(obj) > _max_value_len:
170
+ return obj[:_max_value_len] + "...<truncated>"
171
+ return obj
172
+
173
+
174
+ def _bytes_extractor(obj):
175
+ """Render bytes as repr output, truncating long values."""
176
+ if len(obj) > _max_value_len:
177
+ return repr(obj[:_max_value_len]) + "...<truncated>"
178
+ return repr(obj)
179
+
180
+
181
+ def _module_extractor(obj):
182
+ """Render modules by name rather than by their default repr."""
183
+ return f"module {obj.__name__}"
184
+
185
+
186
+ _value_extractors = {
187
+ str: _string_extractor,
188
+ bytes: _bytes_extractor,
189
+ int: str,
190
+ float: str,
191
+ complex: str,
192
+ bool: str,
193
+ type(None): lambda x: "None",
194
+ ModuleType: _module_extractor,
195
+ FunctionType: _name_extractor,
196
+ BuiltinFunctionType: _name_extractor,
197
+ MethodType: _name_extractor,
198
+ staticmethod: _name_extractor,
199
+ classmethod: _name_extractor,
200
+ WrapperDescriptorType: _name_extractor,
201
+ MethodWrapperType: _name_extractor,
202
+ MethodDescriptorType: _name_extractor,
203
+ ClassMethodDescriptorType: _name_extractor,
204
+ GetSetDescriptorType: _name_extractor,
205
+ MemberDescriptorType: _name_extractor,
206
+ type: _name_extractor,
207
+ }
208
+
209
+
210
+ # _dump_heap() # Replaced by the injector so the dump only runs inside the target process.
@@ -0,0 +1,411 @@
1
+ import datetime
2
+ from contextlib import contextmanager
3
+ import io
4
+ import shutil
5
+ import re
6
+ import tarfile
7
+ import time
8
+ import tempfile
9
+ from subprocess import Popen, PIPE
10
+ import os
11
+ import pathlib
12
+ import sys
13
+ import subprocess
14
+
15
+ try:
16
+ from sys import remote_exec # ty: ignore[unresolved-import]
17
+ except ImportError:
18
+ remote_exec = None
19
+
20
+ try:
21
+ from os import setns # ty: ignore[unresolved-import]
22
+ except ImportError:
23
+ setns = None
24
+
25
+ try:
26
+ import psutil
27
+
28
+ _psutil_available = True
29
+ except ImportError:
30
+ _psutil_available = False
31
+
32
+ GIL_ENABLED = True
33
+ try:
34
+ from sys import _is_gil_enabled # ty: ignore[unresolved-import]
35
+
36
+ GIL_ENABLED = _is_gil_enabled()
37
+ except ImportError:
38
+ pass
39
+
40
+ DEFAULT_DUMP_FILE = "/tmp/dump.jsonl"
41
+
42
+ _FILES_NEEDED_FOR_INJECTION = ["dump_heap.py", "inject.py"]
43
+
44
+
45
+ def _build_tarball_of_dumping_code():
46
+ """Bundle the dump scripts so they can be copied into another namespace."""
47
+ file_obj = io.BytesIO()
48
+ with tarfile.open(fileobj=file_obj, mode="w:gz") as tar:
49
+ for filename in _FILES_NEEDED_FOR_INJECTION:
50
+ file_path = pathlib.Path(__file__).parent / filename
51
+ tar.add(file_path, arcname=filename)
52
+ return file_obj.getvalue()
53
+
54
+
55
+ # We do this at import time so that if we fork into another namespace, it's still available
56
+ _TARBALL = _build_tarball_of_dumping_code()
57
+
58
+
59
+ def dump_heap_from_pid(
60
+ pid,
61
+ output_file=DEFAULT_DUMP_FILE,
62
+ can_use_namespace_injection=True,
63
+ can_use_alternate_python_interpreter=True,
64
+ ):
65
+ """Dump the heap of a running Python process given its PID."""
66
+ _dump_heap_from_pid_possibly_in_namespace(
67
+ pid,
68
+ output_file,
69
+ can_use_namespace_injection,
70
+ can_use_alternate_python_interpreter,
71
+ )
72
+
73
+
74
+ _DUMP_SCRIPT = (pathlib.Path(__file__).parent / "dump_heap.py").read_text()
75
+
76
+
77
+ def _build_dump_heap_code(output_file):
78
+ """Rewrite the injected script so it writes to the requested output path."""
79
+ return _DUMP_SCRIPT.replace(f'"{DEFAULT_DUMP_FILE}"', repr(output_file)).replace(
80
+ "# _dump_heap()", "_dump_heap()"
81
+ )
82
+
83
+
84
+ def _dump_heap_from_pid_possibly_using_an_alternate_python_interpreter(
85
+ pid,
86
+ output_file=DEFAULT_DUMP_FILE,
87
+ can_use_alternate_python_interpreter=True,
88
+ ):
89
+ """Pick a compatible Python interpreter before injecting, if needed."""
90
+ if can_use_alternate_python_interpreter and (
91
+ alternate_python := _should_use_alternate_python_interpreter(pid)
92
+ ):
93
+ print(
94
+ f"Using alternate Python interpreter {alternate_python} for injection",
95
+ file=sys.stderr,
96
+ )
97
+ _dump_heap_from_pid_using_alternate_python_interpreter(
98
+ pid, alternate_python, output_file
99
+ )
100
+ else:
101
+ _dump_heap_from_pid(pid, output_file)
102
+
103
+
104
+ def _dump_heap_from_pid_using_alternate_python_interpreter(
105
+ pid, alternate_python, output_file=DEFAULT_DUMP_FILE
106
+ ):
107
+ """Run the injector with a different Python executable inside a temp copy of the code."""
108
+ # The target mount namespace may not be able to see our original source tree.
109
+ with tempfile.TemporaryDirectory() as tmpdir:
110
+ tarball_file_obj = io.BytesIO(_TARBALL)
111
+ with tarfile.open(fileobj=tarball_file_obj, mode="r:gz") as tar:
112
+ tar.extractall(path=tmpdir)
113
+
114
+ cmd = [
115
+ alternate_python,
116
+ tmpdir + "/inject.py",
117
+ str(pid),
118
+ "--output-file",
119
+ output_file,
120
+ "--no-alternate-python-interpreter", # Don't recurse into trying to find another alternate Python interpreter
121
+ "--no-namespace-injection", # Don't use namespace injection - this function should only be called after we've sorted out namespaces
122
+ ]
123
+ print(f"Running injection command: {' '.join(cmd)}", file=sys.stderr)
124
+ subprocess.run(cmd, check=True)
125
+
126
+
127
+ def _should_use_alternate_python_interpreter(pid) -> str | None:
128
+ """Return a better-matched Python executable for injection, if one is needed."""
129
+ try:
130
+ exe, maps = _get_exe_and_maps(pid)
131
+ except Exception as e:
132
+ print(
133
+ f"Could not determine target process executable and maps, so proceeding with this interpreter: {e!r}",
134
+ file=sys.stderr,
135
+ )
136
+ return None
137
+ if not _can_this_python_inject(exe):
138
+ if os.path.basename(exe).startswith("python"):
139
+ print(
140
+ f"Using {exe} as alternate Python interpreter based on process exe",
141
+ file=sys.stderr,
142
+ )
143
+ return exe
144
+ else:
145
+ # Check maps for a python library to identify Python version
146
+ for m in maps:
147
+ if match := re.match(
148
+ r".*lib(python\d\.\d.*)\.so", m
149
+ ): # Look for a python library in the maps
150
+ python_exe_wanted = match.group(1)
151
+ if python_exe_wanted == _this_effective_executable_name():
152
+ print(
153
+ f"Mapped library {m} suggests target process is running same Python version, so no alternate interpreter needed",
154
+ file=sys.stderr,
155
+ )
156
+ return None
157
+ # Check if there's an executable with the same name on the path
158
+ if python_exe := shutil.which(python_exe_wanted):
159
+ print(
160
+ f"Using {python_exe} as alternate Python interpreter based on mapped library {m}",
161
+ file=sys.stderr,
162
+ )
163
+ return python_exe
164
+ print(
165
+ "Couldn't find an alternate Python interpreter to use for injection,"
166
+ " but target process is running a different Python version, so injection may fail",
167
+ file=sys.stderr,
168
+ )
169
+
170
+
171
+ def _get_exe_and_maps(pid):
172
+ """Inspect the target process executable and mapped libraries."""
173
+ if _psutil_available:
174
+ psutil_process = psutil.Process(pid)
175
+ exe = psutil_process.exe()
176
+ try:
177
+ maps = [map.path for map in psutil_process.memory_maps(grouped=False)]
178
+ except Exception:
179
+ maps = []
180
+ return exe, maps
181
+ elif sys.platform == "linux":
182
+ # Linux-only procfs fallback
183
+ exe = os.readlink(f"/proc/{pid}/exe")
184
+ maps = []
185
+ try:
186
+ with open(f"/proc/{pid}/maps") as f:
187
+ for line in f:
188
+ parts = line.split()
189
+ if len(parts) >= 6:
190
+ path = parts[-1]
191
+ maps.append(path)
192
+ except Exception:
193
+ pass
194
+ return exe, maps
195
+ else:
196
+ raise Exception(
197
+ "Can't determine process executable on this platform without psutil"
198
+ )
199
+
200
+
201
+ def _can_this_python_inject(exe):
202
+ """Return whether the current interpreter likely matches the target runtime."""
203
+ if exe == sys.executable:
204
+ return True
205
+ if _get_effective_executable_name(exe) == _this_effective_executable_name():
206
+ return True
207
+
208
+ return False
209
+
210
+
211
+ def _get_effective_executable_name(exe):
212
+ """Return the versioned Python executable name for a given path, if it looks like Python."""
213
+ basename = os.path.basename(exe)
214
+ match = re.match(r"python\d\.\d.+t?", basename)
215
+ if match:
216
+ return match.group(0)
217
+ if os.path.islink(exe):
218
+ # If it's a symlink, check if the target looks like Python
219
+ target = os.readlink(exe)
220
+ return _get_effective_executable_name(target)
221
+ return None
222
+
223
+
224
+ def _this_effective_executable_name():
225
+ """Return the versioned Python executable name for the current runtime."""
226
+
227
+ name = f"python{sys.version_info.major}.{sys.version_info.minor}"
228
+ if GIL_ENABLED:
229
+ return name
230
+ else:
231
+ # For GIL-less Python, the executable is suffixed with -gil0
232
+ return name + "t"
233
+
234
+
235
+ def _dump_heap_from_pid_possibly_in_namespace(
236
+ pid,
237
+ output_file=DEFAULT_DUMP_FILE,
238
+ can_use_namespace_injection=True,
239
+ can_use_alternate_python_interpreter=True,
240
+ ):
241
+ """Decide whether the dump has to run from inside the target mount namespace."""
242
+ if can_use_namespace_injection and _should_use_namespace(pid):
243
+ print(
244
+ "Target process is in a different mount namespace, using namespace injection method",
245
+ file=sys.stderr,
246
+ )
247
+ _dump_heap_from_pid_in_namespace(
248
+ pid, output_file, can_use_alternate_python_interpreter
249
+ )
250
+ else:
251
+ _dump_heap_from_pid_possibly_using_an_alternate_python_interpreter(
252
+ pid, output_file, can_use_alternate_python_interpreter
253
+ )
254
+
255
+
256
+ def _dump_heap_from_pid_in_namespace(
257
+ pid, output_file=DEFAULT_DUMP_FILE, can_use_alternate_python_interpreter=True
258
+ ):
259
+ """Copy the dump file out of the target mount namespace after injection."""
260
+ dump_loc_in_namespace = f"/tmp/dump_{pid}.jsonl"
261
+ out = os.open(output_file, os.O_WRONLY | os.O_CREAT | os.O_TRUNC)
262
+
263
+ with _in_namespace(pid) as inner_pid:
264
+ if inner_pid is not None:
265
+ _dump_heap_from_pid_possibly_using_an_alternate_python_interpreter(
266
+ inner_pid, dump_loc_in_namespace, can_use_alternate_python_interpreter
267
+ )
268
+ while not os.path.exists(dump_loc_in_namespace):
269
+ time.sleep(0.1)
270
+ with open(dump_loc_in_namespace) as f, os.fdopen(out, "w") as out_f:
271
+ for line in f:
272
+ out_f.write(line)
273
+ os.remove(dump_loc_in_namespace)
274
+
275
+
276
+ @contextmanager
277
+ def _in_namespace(pid):
278
+ """Fork into the target mount and PID namespaces for the duration of the block."""
279
+ if setns is None:
280
+ raise NotImplementedError(
281
+ "Namespace injection method is not available on this platform"
282
+ )
283
+ inner_pid = _identify_pid_within_namespace(pid)
284
+ if (forked_pid := os.fork()) == 0:
285
+ setns(os.open(f"/proc/{pid}/ns/pid", 0), 0)
286
+ setns(os.open(f"/proc/{pid}/ns/mnt", 0), 0)
287
+ # Must fork again after setns to actually be in the new PID namespace
288
+ if (inner_forked_pid := os.fork()) == 0:
289
+ yield inner_pid
290
+ os._exit(0)
291
+ else:
292
+ yield None
293
+ pid, status = os.waitpid(inner_forked_pid, 0)
294
+
295
+ os._exit(status)
296
+ else:
297
+ yield None
298
+ pid, status = os.waitpid(forked_pid, 0)
299
+ if status != 0:
300
+ print(f"Child process failed with status {status}", file=sys.stderr)
301
+
302
+
303
+ def _identify_pid_within_namespace(pid):
304
+ """Translate a host PID into the PID seen inside the target namespace."""
305
+ with open(f"/proc/{pid}/status") as f:
306
+ for line in f:
307
+ if line.startswith("NSpid:"):
308
+ return int(line.split()[-1])
309
+
310
+
311
+ def _dump_heap_from_pid(
312
+ pid, output_file=DEFAULT_DUMP_FILE, inactivity_timeout=datetime.timedelta(seconds=5)
313
+ ):
314
+ """Build the payload script and inject it into the target process.
315
+
316
+ Use remote_exec when available, else fall back to gdb."""
317
+ code = _build_dump_heap_code(output_file)
318
+
319
+ script_file = tempfile.NamedTemporaryFile(suffix=".py", mode="w", delete=False)
320
+ script_file.write(code)
321
+ script_file.close()
322
+ if remote_exec is not None:
323
+ print("Using remote_exec to inject code", file=sys.stderr)
324
+ remote_exec(pid, script_file.name)
325
+ last_progress = time.time()
326
+ while not os.path.exists(output_file):
327
+ partial_file = output_file + ".partial"
328
+ try:
329
+ last_progress = os.stat(partial_file).st_mtime
330
+ except FileNotFoundError:
331
+ pass
332
+ if time.time() - last_progress > inactivity_timeout.total_seconds():
333
+ raise PermissionError(
334
+ "Timed out waiting for dump file to be created, injection may have failed"
335
+ )
336
+
337
+ else:
338
+ print("remote_exec not available, falling back to gdb method", file=sys.stderr)
339
+ gdb_cmds = [
340
+ "(char *) PyGILState_Ensure()",
341
+ '(void) PyRun_SimpleString("'
342
+ rf'exec(open(\"{script_file.name}\").read())")',
343
+ "(void) PyGILState_Release($1)",
344
+ ]
345
+ p = Popen(
346
+ [
347
+ "gdb",
348
+ "-p",
349
+ str(pid),
350
+ "--batch",
351
+ *(f"--eval-command=call {cmd}" for cmd in gdb_cmds),
352
+ ],
353
+ stdout=PIPE,
354
+ stderr=PIPE,
355
+ text=True,
356
+ )
357
+ out, err = p.communicate()
358
+ print(out, file=sys.stderr)
359
+ print(err, file=sys.stderr)
360
+
361
+
362
+ def _should_use_namespace(pid):
363
+ """Return whether the target process lives in a different mount namespace."""
364
+ try:
365
+ import os
366
+
367
+ own_ns = os.readlink("/proc/self/ns/mnt")
368
+ target_ns = os.readlink(f"/proc/{pid}/ns/mnt")
369
+ return own_ns != target_ns
370
+ except Exception:
371
+ # This can fail for a number of reasons, but they're all cases where we can't join the processes namespace
372
+ return False
373
+
374
+
375
+ def main():
376
+ """CLI entry point for dumping a live Python process by PID."""
377
+ import argparse
378
+
379
+ parser = argparse.ArgumentParser(
380
+ description="Dump the heap of a running Python process."
381
+ )
382
+ parser.add_argument("pid", type=int, help="PID of the target Python process")
383
+ parser.add_argument(
384
+ "--output-file",
385
+ "-o",
386
+ default=DEFAULT_DUMP_FILE,
387
+ help=f"Path to output file (default: {DEFAULT_DUMP_FILE})",
388
+ )
389
+ parser.add_argument(
390
+ "--no-namespace-injection",
391
+ action="store_false",
392
+ dest="can_use_namespace_injection",
393
+ help="Don't attempt to use namespace injection method.",
394
+ )
395
+ parser.add_argument(
396
+ "--no-alternate-python-interpreter",
397
+ action="store_false",
398
+ dest="can_use_alternate_python_interpreter",
399
+ help="Don't attempt to use an alternate Python interpreter for injection, even if the target process is running a different Python version.",
400
+ )
401
+ args = parser.parse_args()
402
+ dump_heap_from_pid(
403
+ args.pid,
404
+ args.output_file,
405
+ args.can_use_namespace_injection,
406
+ args.can_use_alternate_python_interpreter,
407
+ )
408
+
409
+
410
+ if __name__ == "__main__":
411
+ main()
@@ -0,0 +1,14 @@
1
+ body { font-family: Arial, sans-serif; margin: 2em; }
2
+ table { border-collapse: collapse; width: 50%; }
3
+ th, td { border: 1px solid #ccc; padding: 8px 12px; text-align: left; }
4
+ th { background: #f4f4f4; }
5
+ h1 { margin-bottom: 1em; }
6
+ ul { margin-bottom: 2em; }
7
+ li { margin: 0.5em 0; }
8
+ form.upload-form { border: 1px solid #ccc; padding: 1em; width: 350px; background: #f9f9f9; }
9
+ label { display: block; margin-bottom: 0.5em; }
10
+ input[type="text"], input[type="file"], select { width: 100%; margin-bottom: 1em; }
11
+ input[type="submit"] { padding: 0.5em 1em; }
12
+ dl { margin-bottom: 2em; }
13
+ dt { font-weight: bold; margin-top: 1em; }
14
+ dd { margin-left: 1.5em; margin-bottom: 0.5em; }
@@ -0,0 +1,35 @@
1
+ {% macro object_list(objects, dump_name, sortable=False) %}
2
+ <table>
3
+ <tr>
4
+ <th>{% if sortable %}
5
+ <a href="?sort=id">ID</a>
6
+ {% else %}
7
+ ID
8
+ {% endif %}
9
+ </th>
10
+ <th>Type</th>
11
+ <th>
12
+ {% if sortable %}
13
+ <a href="?sort=size">Size</a>
14
+ {% else %}
15
+ Size
16
+ {% endif %}
17
+ </th>
18
+ <th>
19
+ {% if sortable %}
20
+ <a href="?sort=subtree_size">Subtree Size</a>
21
+ {% else %}
22
+ Subtree Size
23
+ {% endif %}
24
+ </th>
25
+ <th>Value</th></tr>
26
+ {% for obj in objects %}
27
+ <tr>
28
+ <td><a href="{{ url_for('explore_object', dump_name=dump_name, obj_id=obj.id) }}">{{ obj.id }}</a></td>
29
+ <td>{{ obj.type }}</td>
30
+ <td>{{ obj.size }} bytes</td>
31
+ <td>{% if obj.subtree_size is not none %}{{ obj.subtree_size }} bytes{% else %}Unknown{% endif %}</td>
32
+ <td>{% if obj.value is not none %}{{ obj.value }}{% endif %}</td></tr>
33
+ {% endfor %}
34
+ </table>
35
+ {% endmacro %}
@@ -0,0 +1,25 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Heap Dump Overview - {{ dump_name }}</title>
5
+ <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
6
+ </head>
7
+ <body>
8
+ <h1>Heap Dump Overview: {{ dump_name }}</h1>
9
+ <p><a href="{{ url_for('index') }}">Back to heap dump list</a></p>
10
+ <table>
11
+ <tr>
12
+ <th><a href="{{ url_for('explore_dump', dump_name=dump_name, sort_by='type') }}">Type</a></th>
13
+ <th><a href="{{ url_for('explore_dump', dump_name=dump_name, sort_by='count') }}">Instance Count</a></th>
14
+ <th><a href="{{ url_for('explore_dump', dump_name=dump_name, sort_by='size') }}">Total Size</a></th>
15
+ </tr>
16
+ {% for type_name, summary in type_summaries %}
17
+ <tr>
18
+ <td><a href="{{ url_for('explore_type', dump_name=dump_name, type_name=type_name) }}">{{ type_name }}</a></td>
19
+ <td>{{ summary.count }}</td>
20
+ <td>{{ summary.total_size }}</td>
21
+ </tr>
22
+ {% endfor %}
23
+ </table>
24
+ </body>
25
+ </html>
@@ -0,0 +1,42 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <title>Heap Dump Explorer</title>
5
+ <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
6
+ </head>
7
+ <body>
8
+ <h1>Heap Dump Explorer</h1>
9
+
10
+ <h2>Existing Heap Dumps</h2>
11
+ {% if dump_names %}
12
+ <ul>
13
+ {% for name in dump_names %}
14
+ <li>
15
+ <a href="{{ url_for('explore_dump', dump_name=name) }}">{{ name }}</a>
16
+ </li>
17
+ {% endfor %}
18
+ </ul>
19
+ {% else %}
20
+ <p>No heap dumps loaded yet.</p>
21
+ {% endif %}
22
+
23
+ <h2>Upload New Heap Dump</h2>
24
+ <form action="{{ url_for('upload_dump') }}" method="post" enctype="multipart/form-data" class="upload-form">
25
+ <label for="dump_name">Heap Dump Name:</label>
26
+ <input type="text" id="dump_name" name="dump_name">
27
+ <label for="estimator_precision">Subtree Size Estimate Precision</label>
28
+ <select id="estimator_precision" name="estimator_precision" required>
29
+ <option value="no_estimates">No Estimates (fastest, lowest memory)</option>
30
+ <option value="low">Low Precision</option>
31
+ <option value="medium" selected>Medium Precision</option>
32
+ <option value="high">High Precision</option>
33
+ <option value="exact">Exact (slowest, highest memory)</option>
34
+ </select>
35
+
36
+ <label for="dump_file">Heap Dump File (.jsonl):</label>
37
+ <input type="file" id="dump_file" name="dump_file" accept=".jsonl" required>
38
+
39
+ <input type="submit" value="Upload">
40
+ </form>
41
+ </body>
42
+ </html>
@@ -0,0 +1,52 @@
1
+ {% import 'components.html' as components %}
2
+ <!DOCTYPE html>
3
+ <html>
4
+ <head>
5
+ <title>{{ obj.type }} instance {{ obj.id }} - {{ dump_name }}</title>
6
+ <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
7
+ </head>
8
+ <body>
9
+ <h1>{{ obj.type }} instance {{ obj.id }} in Heap Dump: {{ dump_name }}</h1>
10
+ <p><a href="{{ url_for('explore_dump', dump_name=dump_name) }}">Back to overview</a></p>
11
+ {% if current_from_id == obj.id %}
12
+ <p><strong>This object will be used as the starting point if you start a path search.</strong></p>
13
+ {% else %}
14
+ <form action="{{ url_for('set_path_finding_endpoint', dump_name=dump_name) }}" method="post">
15
+ <input type="hidden" name="from_id" value="{{ obj.id }}">
16
+ <button type="submit">Can you find a path from here to {% if current_to_id %}object {{ current_to_id }}{% else %}...{% endif %}?</button>
17
+ </form>
18
+ {% endif %}
19
+ {% if current_to_id == obj.id %}
20
+ <p><strong>This object will be used as the ending point if you start a path search.</strong></p>
21
+ {% else %}
22
+ <form action="{{ url_for('set_path_finding_endpoint', dump_name=dump_name) }}" method="post">
23
+ <input type="hidden" name="to_id" value="{{ obj.id }}">
24
+ <button type="submit">Can you find a path from {% if current_from_id %}object {{ current_from_id }}{% else %}...{% endif %} to here?</button>
25
+ </form>
26
+ {% endif %}
27
+ <dl>
28
+ <dt>Type:</dt>
29
+ <dd><a href="{{ url_for('explore_type', type_name=obj.type, dump_name=dump_name) }}">{{ obj.type }}</a></dd>
30
+ <dt>ID:</dt>
31
+ <dd>{{ obj.id }}</dd>
32
+ <dt>Size:</dt>
33
+ <dd>{{ obj.size }} bytes</dd>
34
+ {% if obj.subtree_size is not none %}
35
+ <dt>Subtree Size:</dt>
36
+ <dd>{{ obj.subtree_size }} bytes</dd>
37
+ {% endif %}
38
+ {% if obj.value is not none %}
39
+ <dt>Value:</dt>
40
+ <dd>{{ obj.value }}</dd>
41
+ {% endif %}
42
+ </dl>
43
+ {% if obj.referrers %}
44
+ <h2>Referrers</h2>
45
+ {{ components.object_list(obj.referrers, dump_name) }}
46
+ {% endif %}
47
+ {% if obj.references %}
48
+ <h2>References</h2>
49
+ {{ components.object_list(obj.references, dump_name) }}
50
+ {% endif %}
51
+ </body>
52
+ </html>
@@ -0,0 +1,46 @@
1
+ {% import 'components.html' as components %}
2
+ <!DOCTYPE html>
3
+ <html>
4
+ <head>
5
+ <title>Path from {{ from_id }} to {{ to_id }} - {{ dump_name }}</title>
6
+ <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
7
+ </head>
8
+ <body>
9
+ <h1>Path from {{ from_id }} to {{ to_id }} in Heap Dump: {{ dump_name }}</h1>
10
+ <p><a href="{{ url_for('explore_dump', dump_name=dump_name) }}">Back to overview</a></p>
11
+ {% if path is none %}
12
+ <p><strong>No path found.</strong></p>
13
+ {% else %}
14
+ <table>
15
+ <tr>
16
+ <th>ID</th>
17
+ <th>Type</th>
18
+ <th>Size</th>
19
+ <th>Subtree Size</th>
20
+ <th>Value</th>
21
+ <th>Avoid</th>
22
+ </tr>
23
+
24
+ {% for obj in path %}
25
+ <tr>
26
+ <td><a href="{{ url_for('explore_object', dump_name=dump_name, obj_id=obj.id) }}">{{ obj.id }}</a></td>
27
+ <td>{{ obj.type }}</td>
28
+ <td>{{ obj.size }} bytes</td>
29
+ <td>{% if obj.subtree_size is not none %}{{ obj.subtree_size }} bytes{% else %}Unknown{% endif %}</td>
30
+ <td>{% if obj.value is not none %}{{ obj.value }}{% endif %}</td>
31
+ <td>
32
+ {% if obj.id not in avoid_ids %}
33
+ <a
34
+ href="{{ url_for('find_path', dump_name=dump_name, from_id=from_id, to_id=to_id, avoid_id=avoid_ids + [obj.id]) }}"
35
+ title="Find a new path that avoids this object">
36
+ Avoid
37
+ </a>
38
+ {% endif %}
39
+ </td>
40
+ </tr>
41
+ {% endfor %}
42
+ </table>
43
+ {% endif %}
44
+
45
+ </body>
46
+ </html>
@@ -0,0 +1,27 @@
1
+ {% import 'components.html' as components %}
2
+ <!DOCTYPE html>
3
+ <html>
4
+ <head>
5
+ <title>Instances of type {{ type_name }} - {{ dump_name }}</title>
6
+ <link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}">
7
+ </head>
8
+ <body>
9
+ <h1>Instances of type {{ type_name }} in Heap Dump: {{ dump_name }}</h1>
10
+ <p><a href="{{ url_for('explore_dump', dump_name=dump_name) }}">Back to overview</a></p>
11
+ <p>
12
+ <form action="{{ url_for('explore_type', dump_name=dump_name, type_name=type_name) }}" method="get">
13
+ <a title="first page" href="{{ url_for('explore_type', dump_name=dump_name, type_name=type_name, page=1, sort=sort) }}">⏮️</a>
14
+ {% if page > 1 %}
15
+ <a title="previous page" href="{{ url_for('explore_type', dump_name=dump_name, type_name=type_name, page=page-1, sort=sort) }}">◀️</a>
16
+ {% endif %}
17
+ Page <input type="number" name="page" value="{{ page }}" min="1" max="{{ total_pages }}">
18
+ <input type="hidden" name="sort" value="{{ sort }}"> of {{ total_pages }}
19
+ {% if page < total_pages %}
20
+ <a title="next page" href="{{ url_for('explore_type', dump_name=dump_name, type_name=type_name, page=page+1, sort=sort) }}">▶️</a>
21
+ {% endif %}
22
+ <a title="last page" href="{{ url_for('explore_type', dump_name=dump_name, type_name=type_name, page=total_pages, sort=sort) }}">⏭️</a>
23
+ </form>
24
+ </p>
25
+ {{ components.object_list(objects, dump_name, sortable=True) }}
26
+ </body>
27
+ </html>
@@ -0,0 +1,216 @@
1
+ """A simple Flask GUI, with templated HTML with very basic styling, that lets the user explore a heap dump created by dump_heap.py."""
2
+ import webbrowser
3
+
4
+ import shutil
5
+
6
+ import argparse
7
+
8
+ from werkzeug.exceptions import NotFound
9
+ from ntpath import basename
10
+ import os
11
+ import pathlib
12
+
13
+ from cheroot.wsgi import Server as WSGIServer
14
+ from flask import Flask, request, redirect, url_for, render_template, session
15
+ from midden_analysis import HeapDumpExplorer, TypeSummary, EstimatorPrecision
16
+
17
+ DUMPS_DIR = os.getenv("DUMPS_DIR", "/tmp/dumps")
18
+
19
+ PRECISION_MAP = {
20
+ "no_estimates": EstimatorPrecision.NoEstimates,
21
+ "low": EstimatorPrecision.Low,
22
+ "medium": EstimatorPrecision.Medium,
23
+ "high": EstimatorPrecision.High,
24
+ "exact": EstimatorPrecision.Exact,
25
+ }
26
+
27
+
28
+ def create_app():
29
+ """Create the Flask app and preload any dumps already stored on disk."""
30
+ os.makedirs(DUMPS_DIR, exist_ok=True)
31
+ loaded_dumps: dict[str, HeapDumpExplorer] = {
32
+ basename(path)[:-5]: HeapDumpExplorer(f"{DUMPS_DIR}/{path}")
33
+ for path in os.listdir(DUMPS_DIR)
34
+ if path.endswith(".lmdb")
35
+ }
36
+
37
+ def get_dump(dump_name) -> HeapDumpExplorer:
38
+ """Return a loaded dump or raise a 404 for unknown names."""
39
+ explorer = loaded_dumps.get(dump_name)
40
+ if not explorer:
41
+ raise NotFound(f"Dump '{dump_name}' not found")
42
+ return explorer
43
+
44
+ app = Flask(__name__)
45
+
46
+ @app.route("/")
47
+ def index():
48
+ dump_names = list(loaded_dumps.keys())
49
+ return render_template("index.html", dump_names=dump_names)
50
+
51
+ @app.route("/upload_dump", methods=["POST"])
52
+ def upload_dump():
53
+ """Import an uploaded JSONL heap dump into a new LMDB-backed explorer."""
54
+ dump_name = request.form.get("dump_name")
55
+ dump_file = request.files["dump_file"]
56
+ if not dump_name:
57
+ if upload_filename := dump_file.filename:
58
+ dump_name = pathlib.Path(upload_filename).stem
59
+ else:
60
+ dump_name = f"heap_dump_{len(loaded_dumps) + 1}"
61
+ if dump_name in loaded_dumps:
62
+ return f"Heap dump with name '{dump_name}' already exists", 409
63
+ if "/" in dump_name or "\\" in dump_name or dump_name.startswith("."):
64
+ return "Invalid heap dump name", 400
65
+ dump_dir = f"{DUMPS_DIR}/{dump_name}.lmdb"
66
+ precision = request.form.get("estimator_precision", "medium")
67
+ estimator_precision = PRECISION_MAP.get(precision, EstimatorPrecision.Medium)
68
+ os.mkdir(dump_dir)
69
+ try:
70
+ explorer = HeapDumpExplorer(f"{DUMPS_DIR}/{dump_name}.lmdb")
71
+ explorer.import_lines(dump_file, estimator_precision)
72
+ except Exception:
73
+ shutil.rmtree(dump_dir)
74
+ raise
75
+ loaded_dumps[dump_name] = explorer
76
+ return redirect(url_for("explore_dump", dump_name=dump_name))
77
+
78
+ @app.route("/explore/<dump_name>")
79
+ def explore_dump(dump_name):
80
+ explorer = get_dump(dump_name)
81
+ # The landing page for a dump is a type summary table.
82
+ type_summaries: list[tuple[str, TypeSummary]] = explorer.get_type_summaries()
83
+ sort_by = request.args.get("sort_by", "count")
84
+ match sort_by:
85
+ case "size":
86
+ type_summaries.sort(key=lambda x: x[1].total_size, reverse=True)
87
+ case "type":
88
+ type_summaries.sort(key=lambda x: x[0])
89
+ case "count" | _:
90
+ type_summaries.sort(key=lambda x: x[1].count, reverse=True)
91
+ return render_template(
92
+ "explore.html", dump_name=dump_name, type_summaries=type_summaries
93
+ )
94
+
95
+ @app.route("/explore/<dump_name>/type/<type_name>")
96
+ def explore_type(dump_name, type_name):
97
+ """Show one page of objects for a type, with optional size-based sorting."""
98
+ page = request.args.get("page", 1, type=int)
99
+ page_zero_indexed = max(page - 1, 0)
100
+ explorer = get_dump(dump_name)
101
+ sort = request.args.get("sort", "id")
102
+ match sort:
103
+ case "size":
104
+ objects = explorer.get_objects_by_type_ordered_by_size(
105
+ type_name, page=page_zero_indexed, subtree_size=False
106
+ )
107
+ case "subtree_size":
108
+ objects = explorer.get_objects_by_type_ordered_by_size(
109
+ type_name, page=page_zero_indexed, subtree_size=True
110
+ )
111
+ case "id" | _:
112
+ objects = explorer.get_objects_by_type(
113
+ type_name, page=page_zero_indexed
114
+ )
115
+ total_pages = explorer.get_page_count_for_type(type_name)
116
+ return render_template(
117
+ "type.html",
118
+ dump_name=dump_name,
119
+ type_name=type_name,
120
+ objects=objects,
121
+ page=page,
122
+ total_pages=total_pages,
123
+ sort=sort,
124
+ )
125
+
126
+ @app.route("/explore/<dump_name>/object/<int:obj_id>")
127
+ def explore_object(dump_name, obj_id):
128
+ """Show one object together with its references and referrers."""
129
+ explorer = get_dump(dump_name)
130
+ obj = explorer.get_object(obj_id)
131
+ current_from_id = session.get(f"path_finding_from_id:{dump_name}")
132
+ current_to_id = session.get(f"path_finding_to_id:{dump_name}")
133
+ if not obj:
134
+ raise NotFound(f"Object with ID {obj_id} not found in dump '{dump_name}'")
135
+ return render_template(
136
+ "object.html",
137
+ dump_name=dump_name,
138
+ obj=obj,
139
+ current_from_id=current_from_id,
140
+ current_to_id=current_to_id,
141
+ )
142
+
143
+ @app.route("/explore/<dump_name>/set_path_finding_endpoint", methods=["POST"])
144
+ def set_path_finding_endpoint(dump_name):
145
+ """Store or complete the pair of object IDs used for path finding."""
146
+ from_id = request.form.get(
147
+ "from_id", session.get(f"path_finding_from_id:{dump_name}"), type=int
148
+ )
149
+ to_id = request.form.get(
150
+ "to_id", session.get(f"path_finding_to_id:{dump_name}"), type=int
151
+ )
152
+ if from_id is not None and to_id is not None:
153
+ del session[f"path_finding_from_id:{dump_name}"]
154
+ del session[f"path_finding_to_id:{dump_name}"]
155
+ return redirect(
156
+ url_for("find_path", dump_name=dump_name, from_id=from_id, to_id=to_id)
157
+ )
158
+ else:
159
+ session[f"path_finding_from_id:{dump_name}"] = from_id
160
+ session[f"path_finding_to_id:{dump_name}"] = to_id
161
+
162
+ return redirect(
163
+ url_for("explore_object", dump_name=dump_name, obj_id=from_id or to_id)
164
+ )
165
+
166
+ @app.route("/explore/<dump_name>/find_path")
167
+ def find_path(dump_name):
168
+ """Find a reference path between two objects in the selected dump."""
169
+ explorer = get_dump(dump_name)
170
+ from_id = request.args.get("from_id", type=int)
171
+ to_id = request.args.get("to_id", type=int)
172
+ avoid_ids = set(request.args.getlist("avoid_id", type=int))
173
+ if from_id is None or to_id is None:
174
+ return "Missing from_id or to_id query parameters", 400
175
+ path = explorer.find_path_between_objects(
176
+ from_id, to_id, avoiding_ids=avoid_ids
177
+ )
178
+ return render_template(
179
+ "path.html",
180
+ dump_name=dump_name,
181
+ from_id=from_id,
182
+ to_id=to_id,
183
+ path=path,
184
+ avoid_ids=list(avoid_ids),
185
+ )
186
+
187
+ app.secret_key = os.urandom(
188
+ 16
189
+ ) # This app keeps state in-process, so a per-process key is fine.
190
+ return app
191
+
192
+
193
+ def main():
194
+ """Run the local Midden web server."""
195
+ arg_parser = argparse.ArgumentParser(description="Run the Midden web server")
196
+ arg_parser.add_argument(
197
+ "--host", default="127.0.0.1"
198
+ )
199
+ arg_parser.add_argument(
200
+ "--port", default=5000, type=int
201
+ )
202
+ arg_parser.add_argument(
203
+ "--no-start-web-browser", action="store_false", dest="start_web_browser", help="Don't automatically open the web browser"
204
+ )
205
+ args = arg_parser.parse_args()
206
+ app = create_app()
207
+ url = f"http://{args.host}:{args.port}"
208
+ print(f"Starting Midden web server on {url}")
209
+ server = WSGIServer((args.host, args.port), app)
210
+ if args.start_web_browser:
211
+ webbrowser.open(url)
212
+ server.start()
213
+
214
+
215
+ if __name__ == "__main__":
216
+ main()