@zeyue0329/xiaoma-cli 1.17.0 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,305 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ trace_referrers.py — 反向引用追溯(自实现的 path-to-GC-root)。
4
+
5
+ 从某个"嫌疑类"的所有实例出发,逐跳向上寻找 referrer(谁引用了它们):
6
+ 实例字段引用 / 数组元素 / **静态字段**。一直追到 GC root 或聚集容器,
7
+ 从而在字段级别定位"哪个集合/字段持有了泄漏对象、且没有释放"。
8
+
9
+ 这是直方图回答不了的关键问题:直方图告诉你"什么对象多",
10
+ 本脚本告诉你"谁持有它们、通过哪个字段"。等价于 Eclipse MAT 的
11
+ "Path to GC Roots / merge shortest paths",但纯标准库、不依赖 MAT。
12
+
13
+ 用法:
14
+ python3 trace_referrers.py <dump.hprof> <class-name> [--hops N]
15
+
16
+ 参数:
17
+ class-name 嫌疑类全名,点或斜杠分隔均可
18
+ 例: com.corundumstudio.socketio.handler.ClientHead
19
+ --hops N 向上追溯的最大跳数(默认 6)
20
+
21
+ 每一跳输出:
22
+ - [referrer 类 -> referrer 对象数] 谁引用了当前层对象
23
+ - [referrer 字段 -> 指向目标的边数] 通过哪个字段名/数组下标
24
+ - [★静态字段持有(GC-root 级)] 被某类的 static 字段直接持有(强信号)
25
+ - [★referrer 自身就是 GC root] referrer 本身是 GC root(线程/JNI/sticky class 等)
26
+
27
+ 阅读技巧:
28
+ - 注意对象图常有环(如 ClientHead.clientsBox -> ClientsBox -> map -> ClientHead),
29
+ 反向 BFS 的 next 集合到后期会膨胀,这是正常现象。
30
+ - 关注每一跳"收敛"出的单一容器(如某个 ConcurrentHashMap 的 table 数组、
31
+ 某个单例 Holder 对象),以及 ★ 标记的静态字段/GC root 锚点 —— 那就是泄漏的持有链。
32
+ """
33
+ import struct
34
+ import argparse
35
+ from collections import defaultdict
36
+
37
+
38
+ def main():
39
+ ap = argparse.ArgumentParser(description="HPROF 反向引用追溯")
40
+ ap.add_argument("hprof")
41
+ ap.add_argument("class_name", help="嫌疑类全名(点或斜杠分隔)")
42
+ ap.add_argument("--hops", type=int, default=6, help="最大追溯跳数")
43
+ args = ap.parse_args()
44
+
45
+ path = args.hprof
46
+ target_name = args.class_name.replace(".", "/")
47
+ MAX_HOPS = args.hops
48
+
49
+ def read_header(f):
50
+ buf = bytearray()
51
+ while True:
52
+ c = f.read(1)
53
+ if c in (b"\x00", b""):
54
+ break
55
+ buf += c
56
+ if not bytes(buf).startswith(b"JAVA PROFILE"):
57
+ raise SystemExit(f"[err] {path} 不是 HPROF 堆转储(缺少 'JAVA PROFILE' 魔数)")
58
+ head = f.read(4)
59
+ if len(head) < 4:
60
+ raise SystemExit(f"[err] {path} 文件过短或损坏,无法读取 id_size")
61
+ ids = struct.unpack(">I", head)[0]
62
+ f.read(8)
63
+ return ids
64
+
65
+ with open(path, "rb") as f0:
66
+ id_size = read_header(f0)
67
+ ID = ">Q" if id_size == 8 else ">I"
68
+ TS = {2: id_size, 4: 1, 5: 2, 6: 4, 7: 8, 8: 1, 9: 2, 10: 4, 11: 8}
69
+
70
+ def rid(mv, off):
71
+ return struct.unpack_from(ID, mv, off)[0], off + id_size
72
+
73
+ strings = {}
74
+ loadclass = {}
75
+ class_super = {}
76
+ class_ifields = {}
77
+ roots = {}
78
+
79
+ def parse_class_dump(mv, off, collect):
80
+ cls_id, off = rid(mv, off)
81
+ off += 4
82
+ super_id, off = rid(mv, off)
83
+ off += id_size * 5
84
+ off += 4
85
+ cp = struct.unpack_from(">H", mv, off)[0]; off += 2
86
+ for _ in range(cp):
87
+ off += 2
88
+ t = mv[off]; off += 1
89
+ off += TS[t]
90
+ sf = struct.unpack_from(">H", mv, off)[0]; off += 2
91
+ srefs = []
92
+ for _ in range(sf):
93
+ nid, off = rid(mv, off)
94
+ t = mv[off]; off += 1
95
+ if t == 2:
96
+ val, off = rid(mv, off)
97
+ srefs.append((nid, val))
98
+ else:
99
+ off += TS[t]
100
+ iff = struct.unpack_from(">H", mv, off)[0]; off += 2
101
+ ifs = []
102
+ for _ in range(iff):
103
+ nid, off = rid(mv, off)
104
+ t = mv[off]; off += 1
105
+ ifs.append((t, nid))
106
+ if collect:
107
+ class_super[cls_id] = super_id
108
+ class_ifields[cls_id] = ifs
109
+ return off, cls_id, srefs
110
+
111
+ ROOT_FIX = {0xFF: 0, 0x05: 0, 0x07: 0, 0x02: 8, 0x03: 8, 0x08: 8, 0x04: 4, 0x06: 4}
112
+
113
+ def skip_root(mv, off, sub):
114
+ _, off = rid(mv, off)
115
+ if sub == 0x01:
116
+ _, off = rid(mv, off)
117
+ else:
118
+ off += ROOT_FIX[sub]
119
+ return off
120
+
121
+ # ---------- Pass 0: 类元数据 + 目标实例 ids + GC roots ----------
122
+ def build():
123
+ target_cls_id = None
124
+ S0 = set()
125
+ f = open(path, "rb"); read_header(f); read = f.read
126
+ while True:
127
+ hdr = read(9)
128
+ if len(hdr) < 9:
129
+ break
130
+ tag = hdr[0]
131
+ length = struct.unpack_from(">I", hdr, 5)[0]
132
+ if tag == 0x01:
133
+ body = read(length)
134
+ sid = struct.unpack_from(ID, body, 0)[0]
135
+ strings[sid] = bytes(body[id_size:]).decode("utf-8", "replace")
136
+ elif tag == 0x02:
137
+ body = read(length)
138
+ off = 4
139
+ cls_id, off = rid(body, off)
140
+ off += 4
141
+ nid, off = rid(body, off)
142
+ loadclass[cls_id] = nid
143
+ elif tag in (0x0C, 0x1C):
144
+ body = read(length); mv = memoryview(body); n = len(mv); off = 0
145
+ if target_cls_id is None:
146
+ for cid, nid in loadclass.items():
147
+ if strings.get(nid) == target_name:
148
+ target_cls_id = cid
149
+ break
150
+ while off < n:
151
+ sub = mv[off]; off += 1
152
+ if sub == 0x21:
153
+ oid, off = rid(mv, off); off += 4
154
+ cls_id, off = rid(mv, off)
155
+ nb = struct.unpack_from(">I", mv, off)[0]; off += 4
156
+ off += nb
157
+ if cls_id == target_cls_id:
158
+ S0.add(oid)
159
+ elif sub == 0x20:
160
+ off, _, _ = parse_class_dump(mv, off, True)
161
+ elif sub == 0x22:
162
+ _, off = rid(mv, off); off += 4
163
+ num = struct.unpack_from(">I", mv, off)[0]; off += 4
164
+ _, off = rid(mv, off)
165
+ off += num * id_size
166
+ elif sub == 0x23:
167
+ _, off = rid(mv, off); off += 4
168
+ num = struct.unpack_from(">I", mv, off)[0]; off += 4
169
+ et = mv[off]; off += 1
170
+ off += num * TS[et]
171
+ else:
172
+ oid = struct.unpack_from(ID, mv, off)[0]
173
+ roots[oid] = sub
174
+ off = skip_root(mv, off, sub)
175
+ else:
176
+ f.seek(length, 1)
177
+ f.close()
178
+ return target_cls_id, S0
179
+
180
+ refoffs_cache = {}
181
+
182
+ def get_refoffs(cls_id):
183
+ r = refoffs_cache.get(cls_id)
184
+ if r is not None:
185
+ return r
186
+ res = []
187
+ off = 0
188
+ cid = cls_id
189
+ while cid in class_ifields:
190
+ for (t, nid) in class_ifields[cid]:
191
+ if t == 2:
192
+ res.append((off, nid))
193
+ off += id_size
194
+ else:
195
+ off += TS[t]
196
+ cid = class_super.get(cid, 0)
197
+ refoffs_cache[cls_id] = res
198
+ return res
199
+
200
+ def cname(cid):
201
+ nid = loadclass.get(cid)
202
+ return strings.get(nid, f"<cls@{cid}>").replace("/", ".") if nid else f"<cls@{cid}>"
203
+
204
+ def fname(nid):
205
+ return strings.get(nid, f"<f@{nid}>")
206
+
207
+ def scan_referrers(S):
208
+ next_set = set()
209
+ field_hits = defaultdict(int)
210
+ cls_hits = defaultdict(int)
211
+ static_hits = defaultdict(int)
212
+ root_ref = defaultdict(int)
213
+ Sdisj = S.isdisjoint
214
+ f = open(path, "rb"); read_header(f); read = f.read
215
+ while True:
216
+ hdr = read(9)
217
+ if len(hdr) < 9:
218
+ break
219
+ tag = hdr[0]
220
+ length = struct.unpack_from(">I", hdr, 5)[0]
221
+ if tag in (0x0C, 0x1C):
222
+ body = read(length); mv = memoryview(body); n = len(mv); off = 0
223
+ while off < n:
224
+ sub = mv[off]; off += 1
225
+ if sub == 0x21:
226
+ oid, off = rid(mv, off); off += 4
227
+ cls_id, off = rid(mv, off)
228
+ nb = struct.unpack_from(">I", mv, off)[0]; off += 4
229
+ d = off; off += nb
230
+ hit = False
231
+ for (fo, nid) in get_refoffs(cls_id):
232
+ rv = struct.unpack_from(ID, mv, d + fo)[0]
233
+ if rv in S:
234
+ hit = True
235
+ field_hits[(cname(cls_id), fname(nid))] += 1
236
+ if hit:
237
+ next_set.add(oid)
238
+ cls_hits[cname(cls_id)] += 1
239
+ if oid in roots:
240
+ root_ref[cname(cls_id)] += 1
241
+ elif sub == 0x20:
242
+ off, cls_id, srefs = parse_class_dump(mv, off, False)
243
+ for (nid, val) in srefs:
244
+ if val in S:
245
+ static_hits[(cname(cls_id), fname(nid))] += 1
246
+ elif sub == 0x22:
247
+ oid, off = rid(mv, off); off += 4
248
+ num = struct.unpack_from(">I", mv, off)[0]; off += 4
249
+ arr_cls, off = rid(mv, off)
250
+ if num:
251
+ elems = struct.unpack_from(">%d%s" % (num, ID[1]), mv, off)
252
+ off += num * id_size
253
+ if not Sdisj(elems):
254
+ next_set.add(oid)
255
+ cls_hits[cname(arr_cls)] += 1
256
+ for e in elems:
257
+ if e in S:
258
+ field_hits[(cname(arr_cls), "[]")] += 1
259
+ if oid in roots:
260
+ root_ref[cname(arr_cls)] += 1
261
+ elif sub == 0x23:
262
+ _, off = rid(mv, off); off += 4
263
+ num = struct.unpack_from(">I", mv, off)[0]; off += 4
264
+ et = mv[off]; off += 1
265
+ off += num * TS[et]
266
+ else:
267
+ off = skip_root(mv, off, sub)
268
+ else:
269
+ f.seek(length, 1)
270
+ f.close()
271
+ return next_set, field_hits, cls_hits, static_hits, root_ref
272
+
273
+ def show(title, d, topn=15):
274
+ print(title)
275
+ for k, v in sorted(d.items(), key=lambda x: x[1], reverse=True)[:topn]:
276
+ print(f" {v:>12,} {k}")
277
+
278
+ print(f"[build] id_size={id_size} target={target_name}", flush=True)
279
+ target_cls_id, S = build()
280
+ if target_cls_id is None:
281
+ raise SystemExit(f"[err] 未在 dump 中找到类 {target_name}(检查类名拼写/包路径)")
282
+ print(f"[build] target_cls_id={target_cls_id} instances={len(S):,} "
283
+ f"roots={len(roots):,} classes={len(class_ifields):,}", flush=True)
284
+
285
+ for hop in range(1, MAX_HOPS + 1):
286
+ print("\n" + "#" * 90, flush=True)
287
+ print(f"# HOP {hop}: 寻找引用 {len(S):,} 个对象的 referrer", flush=True)
288
+ print("#" * 90, flush=True)
289
+ nxt, field_hits, cls_hits, static_hits, root_ref = scan_referrers(S)
290
+ show("[referrer 类 -> referrer 对象数]", cls_hits)
291
+ show("[referrer 字段 -> 指向目标的边数]", field_hits)
292
+ if static_hits:
293
+ show("[★静态字段持有(GC-root 级) -> 边数]", static_hits)
294
+ if root_ref:
295
+ show("[★referrer 自身就是 GC root -> 个数]", root_ref)
296
+ print(f"[next] referrer 对象总数 = {len(nxt):,}", flush=True)
297
+ if not nxt:
298
+ print("[stop] 无更多 referrer(已达根)", flush=True)
299
+ break
300
+ S = nxt
301
+ print("\n[done]", flush=True)
302
+
303
+
304
+ if __name__ == "__main__":
305
+ main()
@@ -0,0 +1,15 @@
1
+ type: skill
2
+ module: core
3
+ capabilities:
4
+ - name: xiaoma-heap-dump-analysis
5
+ menu-code: HDA
6
+ description: "Find the true root cause of a JVM memory leak from a .hprof heap dump — class histogram, reverse-reference GC-root tracing, optional Eclipse MAT cross-validation, and precise collection/field measurement. Use when you have an OOM / heap dump and need to know which collection retains the leaked objects and why."
7
+ supports-headless: true
8
+ input: a JVM heap dump (.hprof)
9
+ args: optional suspected class name
10
+ output: root-cause memory-leak report located next to the dump
11
+ config-vars-used: null
12
+ phase: anytime
13
+ before: []
14
+ after: []
15
+ is-required: false
@@ -449,8 +449,8 @@ class ConfigDrivenIdeSetup {
449
449
  this.skillWriteTracker?.add(canonicalId);
450
450
 
451
451
  // Copy all skill files, filtering OS/editor artifacts recursively
452
- const skipPatterns = new Set(['.DS_Store', 'Thumbs.db', 'desktop.ini']);
453
- const skipSuffixes = ['~', '.swp', '.swo', '.bak'];
452
+ const skipPatterns = new Set(['.DS_Store', 'Thumbs.db', 'desktop.ini', '__pycache__']);
453
+ const skipSuffixes = ['~', '.swp', '.swo', '.bak', '.pyc', '.pyo'];
454
454
  const filter = (src) => {
455
455
  const name = path.basename(src);
456
456
  if (src === sourceDir) return true;
@@ -763,6 +763,10 @@ class OfficialModules {
763
763
  const entries = await fs.readdir(dir, { withFileTypes: true });
764
764
 
765
765
  for (const entry of entries) {
766
+ // Skip Python bytecode caches — regenerated locally, never meant to ship
767
+ if (entry.name === '__pycache__') continue;
768
+ if (entry.name.endsWith('.pyc') || entry.name.endsWith('.pyo')) continue;
769
+
766
770
  const fullPath = path.join(dir, entry.name);
767
771
 
768
772
  if (entry.isDirectory()) {