mod-wsgi-telemetry 1.0.0.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mod_wsgi/__init__.py ADDED
@@ -0,0 +1,2 @@
1
+ import pkgutil
2
+ __path__ = pkgutil.extend_path(__path__, __name__)
@@ -0,0 +1 @@
1
+ __version__ = "1.0.0dev2"
@@ -0,0 +1,55 @@
1
+ """Top-level dispatcher for mod_wsgi-telemetry subcommands.
2
+
3
+ Recognises ``serve``, ``top``, ``dump``, ``simulate`` and forwards the
4
+ remaining argv to the subcommand's own ``main(argv)``. Bare invocation
5
+ (no arguments) runs ``serve``.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import sys
11
+ from importlib import import_module
12
+
13
+
14
+ _SUBCOMMANDS = {
15
+ "serve": ("mod_wsgi.telemetry.server", "Run the ingestor and web UI (default)."),
16
+ "top": ("mod_wsgi.telemetry.tui", "Curses terminal monitor."),
17
+ "dump": ("mod_wsgi.telemetry.dump", "Bind the listen socket and print decoded samples."),
18
+ "simulate": ("mod_wsgi.telemetry.simulate", "Emit synthetic samples for UI development."),
19
+ }
20
+
21
+
22
+ def _print_usage(stream) -> None:
23
+ print("usage: mod_wsgi-telemetry <command> [options]", file=stream)
24
+ print("", file=stream)
25
+ print("commands:", file=stream)
26
+ for name, (_, desc) in _SUBCOMMANDS.items():
27
+ print(f" {name:9s} {desc}", file=stream)
28
+ print("", file=stream)
29
+ print("Run 'mod_wsgi-telemetry <command> --help' for command-specific options.",
30
+ file=stream)
31
+
32
+
33
+ def main(argv: list[str] | None = None) -> int:
34
+ argv = list(sys.argv[1:] if argv is None else argv)
35
+
36
+ if not argv:
37
+ cmd, rest = "serve", []
38
+ elif argv[0] in ("-h", "--help"):
39
+ _print_usage(sys.stdout)
40
+ return 0
41
+ elif argv[0] in _SUBCOMMANDS:
42
+ cmd, rest = argv[0], argv[1:]
43
+ else:
44
+ print(f"mod_wsgi-telemetry: unknown subcommand or option {argv[0]!r}",
45
+ file=sys.stderr)
46
+ _print_usage(sys.stderr)
47
+ return 2
48
+
49
+ module_name, _ = _SUBCOMMANDS[cmd]
50
+ sys.argv[0] = f"mod_wsgi-telemetry {cmd}"
51
+ return import_module(module_name).main(rest)
52
+
53
+
54
+ if __name__ == "__main__":
55
+ raise SystemExit(main())
@@ -0,0 +1,229 @@
1
+ """GIL contention coefficient computation.
2
+
3
+ When a Python thread blocks waiting for the GIL, its wait time clusters
4
+ at multiples of ``sys.setswitchinterval`` (default 5 ms): a head bucket
5
+ below 1 ms for immediate handoffs, then bumps near s, 2*s, 3*s, ... — one
6
+ extra switch-interval cycle per missed handoff. Under fair contention
7
+ the per-cycle handoff success probability `q` is roughly constant and
8
+ the bump heights follow a geometric distribution::
9
+
10
+ P(k cycles) = (1 - q) ** k * q
11
+
12
+ Fitting `q` from aggregated HDR-bucket counts yields a single contention
13
+ coefficient that is more interpretable than p95 / p99 of GIL-wait time:
14
+ high `q` means waits typically resolve in one handoff, low `q` means
15
+ threads consistently lose multiple cycles (convoy).
16
+
17
+ The wire format reports the active switch-interval value as field 13;
18
+ the consumer bands the aggregated GIL-wait HDR buckets at multiples of
19
+ that interval and fits the geometric on `c1..c3`. The k=0 (immediate)
20
+ band is intentionally excluded from the fit because it is contaminated
21
+ by voluntary GIL releases (I/O drops where the holder happens to release
22
+ before its check fires); these have nothing to do with contention but
23
+ inflate the head bucket.
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import math
29
+ from typing import Iterable
30
+
31
+
32
+ def cycle_band_counts(
33
+ buckets: list[int],
34
+ bucket_bounds: list[tuple[float, float]],
35
+ switch_interval_s: float,
36
+ ) -> list[int]:
37
+ """Sum HDR bucket counts into cycle bands.
38
+
39
+ Bands are indexed by k (cycles waited):
40
+ k=0: [0, 0.8*s) immediate / voluntary release
41
+ k=1: [0.8*s, 1.8*s) one missed handoff
42
+ k=2: [1.8*s, 2.8*s) two missed handoffs
43
+ k=3: [2.8*s, 3.8*s)
44
+ k=4plus: [3.8*s, inf) tail (likely OS stalls, not race losses)
45
+
46
+ A bucket is assigned to whichever band contains its midpoint. The
47
+ head bucket (index 0) is treated as covering ``[0, bounds[0][1])``
48
+ regardless of the lower edge passed in — the HDR helper used for
49
+ percentile interpolation reports the head bucket's lower bound as
50
+ the bottom of the first octave (1 ms), but for band assignment the
51
+ head bucket logically starts at zero.
52
+
53
+ Returns a 5-element list [c0, c1, c2, c3, c4plus].
54
+ """
55
+ s = switch_interval_s
56
+ edges = [
57
+ (0.0, 0.8 * s),
58
+ (0.8 * s, 1.8 * s),
59
+ (1.8 * s, 2.8 * s),
60
+ (2.8 * s, 3.8 * s),
61
+ (3.8 * s, math.inf),
62
+ ]
63
+ bands = [0] * 5
64
+ for i, (count, (lo, hi)) in enumerate(zip(buckets, bucket_bounds)):
65
+ if count <= 0:
66
+ continue
67
+ if i == 0:
68
+ lo = 0.0
69
+ if math.isinf(hi):
70
+ mid = lo
71
+ else:
72
+ mid = 0.5 * (lo + hi)
73
+ for k, (blo, bhi) in enumerate(edges):
74
+ if blo <= mid < bhi:
75
+ bands[k] += count
76
+ break
77
+ return bands
78
+
79
+
80
+ def _fit_geometric_decay(
81
+ points: list[tuple[float, float, float]],
82
+ ) -> dict | None:
83
+ """Weighted log-linear fit of log(c[k]) vs k.
84
+
85
+ ``points`` is a list of ``(x, log(count), weight)`` triples. Returns
86
+ ``{"q": float, "r2": float}`` when the fit meets the geometric-decay
87
+ assumption (negative slope, valid q in (0, 1), R² ≥ 0.5); returns
88
+ ``None`` otherwise.
89
+ """
90
+ if len(points) < 2:
91
+ return None
92
+
93
+ sum_w = sum(p[2] for p in points)
94
+ mean_x = sum(w * x for x, _, w in points) / sum_w
95
+ mean_y = sum(w * y for _, y, w in points) / sum_w
96
+ var_x = sum(w * (x - mean_x) ** 2 for x, _, w in points) / sum_w
97
+ cov_xy = sum(
98
+ w * (x - mean_x) * (y - mean_y) for x, y, w in points
99
+ ) / sum_w
100
+
101
+ if var_x <= 0.0:
102
+ return None
103
+
104
+ slope = cov_xy / var_x
105
+ if slope >= 0.0:
106
+ return None
107
+
108
+ q = 1.0 - math.exp(slope)
109
+ if not (0.0 < q < 1.0):
110
+ return None
111
+
112
+ intercept = mean_y - slope * mean_x
113
+ ss_res = sum(
114
+ w * (y - (intercept + slope * x)) ** 2 for x, y, w in points
115
+ )
116
+ var_y = sum(w * (y - mean_y) ** 2 for _, y, w in points)
117
+ if var_y <= 0.0:
118
+ return None
119
+ r2 = 1.0 - ss_res / var_y
120
+
121
+ if r2 < 0.5:
122
+ return None
123
+
124
+ return {"q": q, "r2": r2}
125
+
126
+
127
+ def contention_coefficient(
128
+ buckets: list[int],
129
+ bucket_bounds: list[tuple[float, float]],
130
+ switch_interval_s: float,
131
+ ) -> dict | None:
132
+ """Compute the GIL contention coefficient from aggregated HDR buckets.
133
+
134
+ Primary path fits a geometric decay to cycle bands ``c1..c3``.
135
+ Fallback path fits ``c2..c4plus`` when ``c1`` is contaminated by the
136
+ HDR head bucket — at switch intervals ≲ 1 ms the head bucket
137
+ (1.25 ms wide on the default HDR config) absorbs both ``k=0``
138
+ (immediate) and ``k=1`` (one missed cycle), leaving ``c1`` holding
139
+ only the spillover. The contamination signature is ``c1 < c2``.
140
+
141
+ Returns ``None`` if the data does not support a meaningful fit:
142
+ - switch_interval not positive
143
+ - fewer than 100 events in the chosen cycle bands combined
144
+ - geometric fit R² below 0.5 (model doesn't apply, e.g. non-
145
+ stationary load, very low contention, or extreme contamination)
146
+
147
+ Otherwise returns a dict::
148
+
149
+ {
150
+ "q": float, # per-cycle handoff success probability
151
+ "r": float, # 1 - q, convoy persistence
152
+ "band_counts": [c0, c1, c2, c3, c4plus],
153
+ "fit_r2": float, # R^2 of the log-linear fit
154
+ "n_total": int, # total events across all bands
155
+ "n_fit": int, # events used in the fit
156
+ "fit_kind": str, # "primary" or "fallback_c2_c4plus"
157
+ }
158
+ """
159
+ if switch_interval_s <= 0.0:
160
+ return None
161
+
162
+ bands = cycle_band_counts(buckets, bucket_bounds, switch_interval_s)
163
+ n_total = sum(bands)
164
+
165
+ c1_contaminated = bands[1] < bands[2]
166
+
167
+ if not c1_contaminated:
168
+ n_fit = bands[1] + bands[2] + bands[3]
169
+ if n_fit >= 100:
170
+ points = [
171
+ (float(k - 1), math.log(bands[k]), float(bands[k]))
172
+ for k in (1, 2, 3)
173
+ if bands[k] > 0
174
+ ]
175
+ fit = _fit_geometric_decay(points)
176
+ if fit is not None:
177
+ return {
178
+ "q": fit["q"],
179
+ "r": 1.0 - fit["q"],
180
+ "band_counts": bands,
181
+ "fit_r2": fit["r2"],
182
+ "n_total": n_total,
183
+ "n_fit": n_fit,
184
+ "fit_kind": "primary",
185
+ }
186
+
187
+ # Fallback: c2, c3, c4plus. c4plus aggregates the true k=4 cycle
188
+ # band with the OS-stall tail beyond it, so the fit is somewhat
189
+ # noisier and biased high in q (tail inflates c4plus); the R²
190
+ # check still applies. Only attempted when c1 is contaminated —
191
+ # the primary fit is preferred whenever it can run.
192
+ n_fit = bands[2] + bands[3] + bands[4]
193
+ if n_fit < 100:
194
+ return None
195
+ points = [
196
+ (float(k - 2), math.log(bands[k]), float(bands[k]))
197
+ for k in (2, 3, 4)
198
+ if bands[k] > 0
199
+ ]
200
+ fit = _fit_geometric_decay(points)
201
+ if fit is None:
202
+ return None
203
+ return {
204
+ "q": fit["q"],
205
+ "r": 1.0 - fit["q"],
206
+ "band_counts": bands,
207
+ "fit_r2": fit["r2"],
208
+ "n_total": n_total,
209
+ "n_fit": n_fit,
210
+ "fit_kind": "fallback_c2_c4plus",
211
+ }
212
+
213
+
214
+ def decay_label(q: float) -> str:
215
+ """Verbal tier for q describing convoy-decay shape, NOT severity.
216
+
217
+ ``q`` is the per-cycle handoff success probability fitted from the
218
+ geometric decay across cycle bands; high ``q`` means convoys clear
219
+ quickly (a missed cycle rarely chains into more), low ``q`` means
220
+ they persist (the Beazley convoy signature). Whether ``q`` maps to
221
+ "things are bad" depends on the absolute wall-clock cost per cycle
222
+ (the switch interval) and the ``gil_wait_time`` mean — combine the
223
+ two for a severity read.
224
+ """
225
+ if q >= 0.7:
226
+ return "transient"
227
+ if q >= 0.4:
228
+ return "compounding"
229
+ return "convoy"
@@ -0,0 +1,102 @@
1
+ """CLI pretty-printer for the telemetry wire format.
2
+
3
+ Binds the listening socket itself (so don't run this at the same time as
4
+ the ingester), receives datagrams, decodes, prints.
5
+
6
+ Usage:
7
+ mod_wsgi-telemetry dump --listen unix:/tmp/mod_wsgi-telemetry.sock
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import json
14
+ import os
15
+ import socket
16
+ import sys
17
+ from datetime import datetime, timezone
18
+
19
+ from .ingest import open_socket
20
+ from .wire import decode
21
+
22
+
23
+ def _fmt_value(v):
24
+ if isinstance(v, bytes):
25
+ try:
26
+ return v.decode("utf-8")
27
+ except UnicodeDecodeError:
28
+ return v.hex()
29
+ return v
30
+
31
+
32
+ def _parse_octal_mode(s: str) -> int:
33
+ try:
34
+ return int(s, 8)
35
+ except ValueError:
36
+ raise argparse.ArgumentTypeError(
37
+ f"socket-mode must be octal (e.g. 0660 or 660), got {s!r}")
38
+
39
+
40
+ def main(argv: list[str] | None = None) -> int:
41
+ ap = argparse.ArgumentParser(description=__doc__)
42
+ ap.add_argument("--listen", default="unix:/tmp/mod_wsgi-telemetry.sock")
43
+ ap.add_argument("--socket-mode", type=_parse_octal_mode, default=0o660,
44
+ metavar="MODE",
45
+ help="Octal permission mode for the UNIX socket "
46
+ "(default: 0660).")
47
+ ap.add_argument("--socket-group", default=None, metavar="GROUP",
48
+ help="Group name or numeric GID to chown the UNIX "
49
+ "socket to.")
50
+ ap.add_argument("--format", choices=["text", "json"], default="text")
51
+ ap.add_argument("--count", type=int, default=0,
52
+ help="stop after N samples (0 = forever)")
53
+ args = ap.parse_args(argv)
54
+
55
+ socket_group: str | int | None = args.socket_group
56
+ if isinstance(socket_group, str) and socket_group.isdigit():
57
+ socket_group = int(socket_group)
58
+
59
+ sock = open_socket(args.listen, mode=args.socket_mode, group=socket_group)
60
+ sock.setblocking(True)
61
+ seen = 0
62
+ try:
63
+ while True:
64
+ data, _ = sock.recvfrom(65536)
65
+ try:
66
+ sample = decode(data)
67
+ except Exception as e:
68
+ print(f"decode error: {e} (len={len(data)})", file=sys.stderr)
69
+ continue
70
+
71
+ if args.format == "json":
72
+ payload = {
73
+ "kind": sample.kind_name,
74
+ "pid": sample.pid,
75
+ "seq": sample.seq,
76
+ "stamp": sample.stamp,
77
+ "fields": {k: _fmt_value(v) for k, v in sample.fields.items()},
78
+ }
79
+ print(json.dumps(payload))
80
+ else:
81
+ ts = datetime.fromtimestamp(
82
+ sample.stamp, tz=timezone.utc
83
+ ).isoformat(timespec="milliseconds")
84
+ print(
85
+ f"\n[{ts}] pid={sample.pid} seq={sample.seq} "
86
+ f"kind={sample.kind_name} v{sample.version}"
87
+ )
88
+ for k, v in sample.fields.items():
89
+ print(f" {k:30s} = {_fmt_value(v)}")
90
+
91
+ seen += 1
92
+ if args.count and seen >= args.count:
93
+ break
94
+ except KeyboardInterrupt:
95
+ pass
96
+ finally:
97
+ sock.close()
98
+ return 0
99
+
100
+
101
+ if __name__ == "__main__":
102
+ sys.exit(main())