npm - @zeyue0329/xiaoma-cli - Versions diffs - 1.17.0 → 1.18.0 - Mend

@zeyue0329/xiaoma-cli 1.17.0 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/src/core-skills/xiaoma-heap-dump-analysis/scripts/trace_referrers.py ADDED Viewed

@@ -0,0 +1,305 @@
+#!/usr/bin/env python3
+"""
+trace_referrers.py — 反向引用追溯（自实现的 path-to-GC-root）。
+从某个"嫌疑类"的所有实例出发，逐跳向上寻找 referrer（谁引用了它们）：
+实例字段引用 / 数组元素 / **静态字段**。一直追到 GC root 或聚集容器，
+从而在字段级别定位"哪个集合/字段持有了泄漏对象、且没有释放"。
+这是直方图回答不了的关键问题：直方图告诉你"什么对象多"，
+本脚本告诉你"谁持有它们、通过哪个字段"。等价于 Eclipse MAT 的
+"Path to GC Roots / merge shortest paths"，但纯标准库、不依赖 MAT。
+用法:
+    python3 trace_referrers.py <dump.hprof> <class-name> [--hops N]
+参数:
+    class-name   嫌疑类全名，点或斜杠分隔均可
+                 例: com.corundumstudio.socketio.handler.ClientHead
+    --hops N     向上追溯的最大跳数（默认 6）
+每一跳输出:
+    - [referrer 类 -> referrer 对象数]      谁引用了当前层对象
+    - [referrer 字段 -> 指向目标的边数]      通过哪个字段名/数组下标
+    - [★静态字段持有(GC-root 级)]           被某类的 static 字段直接持有（强信号）
+    - [★referrer 自身就是 GC root]          referrer 本身是 GC root（线程/JNI/sticky class 等）
+阅读技巧:
+    - 注意对象图常有环（如 ClientHead.clientsBox -> ClientsBox -> map -> ClientHead），
+      反向 BFS 的 next 集合到后期会膨胀，这是正常现象。
+    - 关注每一跳"收敛"出的单一容器（如某个 ConcurrentHashMap 的 table 数组、
+      某个单例 Holder 对象），以及 ★ 标记的静态字段/GC root 锚点 —— 那就是泄漏的持有链。
+"""
+import struct
+import argparse
+from collections import defaultdict
+def main():
+    ap = argparse.ArgumentParser(description="HPROF 反向引用追溯")
+    ap.add_argument("hprof")
+    ap.add_argument("class_name", help="嫌疑类全名（点或斜杠分隔）")
+    ap.add_argument("--hops", type=int, default=6, help="最大追溯跳数")
+    args = ap.parse_args()
+    path = args.hprof
+    target_name = args.class_name.replace(".", "/")
+    MAX_HOPS = args.hops
+    def read_header(f):
+        buf = bytearray()
+        while True:
+            c = f.read(1)
+            if c in (b"\x00", b""):
+                break
+            buf += c
+        if not bytes(buf).startswith(b"JAVA PROFILE"):
+            raise SystemExit(f"[err] {path} 不是 HPROF 堆转储（缺少 'JAVA PROFILE' 魔数）")
+        head = f.read(4)
+        if len(head) < 4:
+            raise SystemExit(f"[err] {path} 文件过短或损坏，无法读取 id_size")
+        ids = struct.unpack(">I", head)[0]
+        f.read(8)
+        return ids
+    with open(path, "rb") as f0:
+        id_size = read_header(f0)
+    ID = ">Q" if id_size == 8 else ">I"
+    TS = {2: id_size, 4: 1, 5: 2, 6: 4, 7: 8, 8: 1, 9: 2, 10: 4, 11: 8}
+    def rid(mv, off):
+        return struct.unpack_from(ID, mv, off)[0], off + id_size
+    strings = {}
+    loadclass = {}
+    class_super = {}
+    class_ifields = {}
+    roots = {}
+    def parse_class_dump(mv, off, collect):
+        cls_id, off = rid(mv, off)
+        off += 4
+        super_id, off = rid(mv, off)
+        off += id_size * 5
+        off += 4
+        cp = struct.unpack_from(">H", mv, off)[0]; off += 2
+        for _ in range(cp):
+            off += 2
+            t = mv[off]; off += 1
+            off += TS[t]
+        sf = struct.unpack_from(">H", mv, off)[0]; off += 2
+        srefs = []
+        for _ in range(sf):
+            nid, off = rid(mv, off)
+            t = mv[off]; off += 1
+            if t == 2:
+                val, off = rid(mv, off)
+                srefs.append((nid, val))
+            else:
+                off += TS[t]
+        iff = struct.unpack_from(">H", mv, off)[0]; off += 2
+        ifs = []
+        for _ in range(iff):
+            nid, off = rid(mv, off)
+            t = mv[off]; off += 1
+            ifs.append((t, nid))
+        if collect:
+            class_super[cls_id] = super_id
+            class_ifields[cls_id] = ifs
+        return off, cls_id, srefs
+    ROOT_FIX = {0xFF: 0, 0x05: 0, 0x07: 0, 0x02: 8, 0x03: 8, 0x08: 8, 0x04: 4, 0x06: 4}
+    def skip_root(mv, off, sub):
+        _, off = rid(mv, off)
+        if sub == 0x01:
+            _, off = rid(mv, off)
+        else:
+            off += ROOT_FIX[sub]
+        return off
+    # ---------- Pass 0: 类元数据 + 目标实例 ids + GC roots ----------
+    def build():
+        target_cls_id = None
+        S0 = set()
+        f = open(path, "rb"); read_header(f); read = f.read
+        while True:
+            hdr = read(9)
+            if len(hdr) < 9:
+                break
+            tag = hdr[0]
+            length = struct.unpack_from(">I", hdr, 5)[0]
+            if tag == 0x01:
+                body = read(length)
+                sid = struct.unpack_from(ID, body, 0)[0]
+                strings[sid] = bytes(body[id_size:]).decode("utf-8", "replace")
+            elif tag == 0x02:
+                body = read(length)
+                off = 4
+                cls_id, off = rid(body, off)
+                off += 4
+                nid, off = rid(body, off)
+                loadclass[cls_id] = nid
+            elif tag in (0x0C, 0x1C):
+                body = read(length); mv = memoryview(body); n = len(mv); off = 0
+                if target_cls_id is None:
+                    for cid, nid in loadclass.items():
+                        if strings.get(nid) == target_name:
+                            target_cls_id = cid
+                            break
+                while off < n:
+                    sub = mv[off]; off += 1
+                    if sub == 0x21:
+                        oid, off = rid(mv, off); off += 4
+                        cls_id, off = rid(mv, off)
+                        nb = struct.unpack_from(">I", mv, off)[0]; off += 4
+                        off += nb
+                        if cls_id == target_cls_id:
+                            S0.add(oid)
+                    elif sub == 0x20:
+                        off, _, _ = parse_class_dump(mv, off, True)
+                    elif sub == 0x22:
+                        _, off = rid(mv, off); off += 4
+                        num = struct.unpack_from(">I", mv, off)[0]; off += 4
+                        _, off = rid(mv, off)
+                        off += num * id_size
+                    elif sub == 0x23:
+                        _, off = rid(mv, off); off += 4
+                        num = struct.unpack_from(">I", mv, off)[0]; off += 4
+                        et = mv[off]; off += 1
+                        off += num * TS[et]
+                    else:
+                        oid = struct.unpack_from(ID, mv, off)[0]
+                        roots[oid] = sub
+                        off = skip_root(mv, off, sub)
+            else:
+                f.seek(length, 1)
+        f.close()
+        return target_cls_id, S0
+    refoffs_cache = {}
+    def get_refoffs(cls_id):
+        r = refoffs_cache.get(cls_id)
+        if r is not None:
+            return r
+        res = []
+        off = 0
+        cid = cls_id
+        while cid in class_ifields:
+            for (t, nid) in class_ifields[cid]:
+                if t == 2:
+                    res.append((off, nid))
+                    off += id_size
+                else:
+                    off += TS[t]
+            cid = class_super.get(cid, 0)
+        refoffs_cache[cls_id] = res
+        return res
+    def cname(cid):
+        nid = loadclass.get(cid)
+        return strings.get(nid, f"<cls@{cid}>").replace("/", ".") if nid else f"<cls@{cid}>"
+    def fname(nid):
+        return strings.get(nid, f"<f@{nid}>")
+    def scan_referrers(S):
+        next_set = set()
+        field_hits = defaultdict(int)
+        cls_hits = defaultdict(int)
+        static_hits = defaultdict(int)
+        root_ref = defaultdict(int)
+        Sdisj = S.isdisjoint
+        f = open(path, "rb"); read_header(f); read = f.read
+        while True:
+            hdr = read(9)
+            if len(hdr) < 9:
+                break
+            tag = hdr[0]
+            length = struct.unpack_from(">I", hdr, 5)[0]
+            if tag in (0x0C, 0x1C):
+                body = read(length); mv = memoryview(body); n = len(mv); off = 0
+                while off < n:
+                    sub = mv[off]; off += 1
+                    if sub == 0x21:
+                        oid, off = rid(mv, off); off += 4
+                        cls_id, off = rid(mv, off)
+                        nb = struct.unpack_from(">I", mv, off)[0]; off += 4
+                        d = off; off += nb
+                        hit = False
+                        for (fo, nid) in get_refoffs(cls_id):
+                            rv = struct.unpack_from(ID, mv, d + fo)[0]
+                            if rv in S:
+                                hit = True
+                                field_hits[(cname(cls_id), fname(nid))] += 1
+                        if hit:
+                            next_set.add(oid)
+                            cls_hits[cname(cls_id)] += 1
+                            if oid in roots:
+                                root_ref[cname(cls_id)] += 1
+                    elif sub == 0x20:
+                        off, cls_id, srefs = parse_class_dump(mv, off, False)
+                        for (nid, val) in srefs:
+                            if val in S:
+                                static_hits[(cname(cls_id), fname(nid))] += 1
+                    elif sub == 0x22:
+                        oid, off = rid(mv, off); off += 4
+                        num = struct.unpack_from(">I", mv, off)[0]; off += 4
+                        arr_cls, off = rid(mv, off)
+                        if num:
+                            elems = struct.unpack_from(">%d%s" % (num, ID[1]), mv, off)
+                            off += num * id_size
+                            if not Sdisj(elems):
+                                next_set.add(oid)
+                                cls_hits[cname(arr_cls)] += 1
+                                for e in elems:
+                                    if e in S:
+                                        field_hits[(cname(arr_cls), "[]")] += 1
+                                if oid in roots:
+                                    root_ref[cname(arr_cls)] += 1
+                    elif sub == 0x23:
+                        _, off = rid(mv, off); off += 4
+                        num = struct.unpack_from(">I", mv, off)[0]; off += 4
+                        et = mv[off]; off += 1
+                        off += num * TS[et]
+                    else:
+                        off = skip_root(mv, off, sub)
+            else:
+                f.seek(length, 1)
+        f.close()
+        return next_set, field_hits, cls_hits, static_hits, root_ref
+    def show(title, d, topn=15):
+        print(title)
+        for k, v in sorted(d.items(), key=lambda x: x[1], reverse=True)[:topn]:
+            print(f"    {v:>12,}   {k}")
+    print(f"[build] id_size={id_size} target={target_name}", flush=True)
+    target_cls_id, S = build()
+    if target_cls_id is None:
+        raise SystemExit(f"[err] 未在 dump 中找到类 {target_name}（检查类名拼写/包路径）")
+    print(f"[build] target_cls_id={target_cls_id}  instances={len(S):,}  "
+          f"roots={len(roots):,}  classes={len(class_ifields):,}", flush=True)
+    for hop in range(1, MAX_HOPS + 1):
+        print("\n" + "#" * 90, flush=True)
+        print(f"# HOP {hop}: 寻找引用 {len(S):,} 个对象的 referrer", flush=True)
+        print("#" * 90, flush=True)
+        nxt, field_hits, cls_hits, static_hits, root_ref = scan_referrers(S)
+        show("[referrer 类 -> referrer 对象数]", cls_hits)
+        show("[referrer 字段 -> 指向目标的边数]", field_hits)
+        if static_hits:
+            show("[★静态字段持有(GC-root 级) -> 边数]", static_hits)
+        if root_ref:
+            show("[★referrer 自身就是 GC root -> 个数]", root_ref)
+        print(f"[next] referrer 对象总数 = {len(nxt):,}", flush=True)
+        if not nxt:
+            print("[stop] 无更多 referrer（已达根）", flush=True)
+            break
+        S = nxt
+    print("\n[done]", flush=True)
+if __name__ == "__main__":
+    main()

package/src/core-skills/xiaoma-heap-dump-analysis/xiaoma-skill-manifest.yaml ADDED Viewed

@@ -0,0 +1,15 @@
+type: skill
+module: core
+capabilities:
+  - name: xiaoma-heap-dump-analysis
+    menu-code: HDA
+    description: "Find the true root cause of a JVM memory leak from a .hprof heap dump — class histogram, reverse-reference GC-root tracing, optional Eclipse MAT cross-validation, and precise collection/field measurement. Use when you have an OOM / heap dump and need to know which collection retains the leaked objects and why."
+    supports-headless: true
+    input: a JVM heap dump (.hprof)
+    args: optional suspected class name
+    output: root-cause memory-leak report located next to the dump
+    config-vars-used: null
+    phase: anytime
+    before: []
+    after: []
+    is-required: false

package/tools/installer/ide/_config-driven.js CHANGED Viewed

@@ -449,8 +449,8 @@ class ConfigDrivenIdeSetup {
       this.skillWriteTracker?.add(canonicalId);
       // Copy all skill files, filtering OS/editor artifacts recursively
-      const skipPatterns = new Set(['.DS_Store', 'Thumbs.db', 'desktop.ini']);
-      const skipSuffixes = ['~', '.swp', '.swo', '.bak'];
+      const skipPatterns = new Set(['.DS_Store', 'Thumbs.db', 'desktop.ini', '__pycache__']);
+      const skipSuffixes = ['~', '.swp', '.swo', '.bak', '.pyc', '.pyo'];
       const filter = (src) => {
         const name = path.basename(src);
         if (src === sourceDir) return true;

package/tools/installer/modules/official-modules.js CHANGED Viewed

@@ -763,6 +763,10 @@ class OfficialModules {
     const entries = await fs.readdir(dir, { withFileTypes: true });
     for (const entry of entries) {
+      // Skip Python bytecode caches — regenerated locally, never meant to ship
+      if (entry.name === '__pycache__') continue;
+      if (entry.name.endsWith('.pyc') || entry.name.endsWith('.pyo')) continue;
       const fullPath = path.join(dir, entry.name);
       if (entry.isDirectory()) {