@zeyue0329/xiaoma-cli 1.17.0 → 1.17.1-next.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/core-skills/module-help.csv +1 -0
- package/src/core-skills/xiaoma-heap-dump-analysis/SKILL.md +137 -0
- package/src/core-skills/xiaoma-heap-dump-analysis/resources/hprof-internals.md +157 -0
- package/src/core-skills/xiaoma-heap-dump-analysis/resources/mat-headless-runbook.md +108 -0
- package/src/core-skills/xiaoma-heap-dump-analysis/resources/methodology.md +148 -0
- package/src/core-skills/xiaoma-heap-dump-analysis/scripts/hprof_histogram.py +215 -0
- package/src/core-skills/xiaoma-heap-dump-analysis/scripts/inspect_objects.py +335 -0
- package/src/core-skills/xiaoma-heap-dump-analysis/scripts/trace_referrers.py +305 -0
- package/src/core-skills/xiaoma-heap-dump-analysis/xiaoma-skill-manifest.yaml +15 -0
- package/tools/installer/ide/_config-driven.js +2 -2
- package/tools/installer/modules/official-modules.js +4 -0
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
hprof_histogram.py — 流式 HPROF 类直方图(实例数 / 浅大小)。
|
|
4
|
+
|
|
5
|
+
纯标准库、单遍流式解析,内存占用与对象总数无关(只缓存类元数据),
|
|
6
|
+
可处理数 GB、数千万对象的 dump。用于内存泄漏分析第一步:
|
|
7
|
+
快速看出"哪些类实例最多 / 占内存最多",尤其是异常增长的业务/第三方类。
|
|
8
|
+
|
|
9
|
+
用法:
|
|
10
|
+
python3 hprof_histogram.py <dump.hprof> [--top N] [--biz-only] [--no-jdk]
|
|
11
|
+
|
|
12
|
+
参数:
|
|
13
|
+
--top N 每个榜单显示前 N 行(默认 50)
|
|
14
|
+
--biz-only 只显示业务/第三方类(排除 java./javax./jdk./sun./com.sun./jakarta.)
|
|
15
|
+
|
|
16
|
+
输出: 四个榜单 —— 全部按浅大小 / 全部按实例数 / 业务类按浅大小 / 业务类按实例数。
|
|
17
|
+
|
|
18
|
+
注意:
|
|
19
|
+
- 浅大小是估算(对象头按 16 字节近似),用于排名而非精确字节会计。
|
|
20
|
+
- 真正定位"谁持有这些对象"需要配合 trace_referrers.py(反向引用追溯)。
|
|
21
|
+
"""
|
|
22
|
+
import sys
|
|
23
|
+
import struct
|
|
24
|
+
import argparse
|
|
25
|
+
from collections import defaultdict
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def main():
|
|
29
|
+
ap = argparse.ArgumentParser(description="HPROF 类直方图")
|
|
30
|
+
ap.add_argument("hprof", help="heap dump (.hprof) 路径")
|
|
31
|
+
ap.add_argument("--top", type=int, default=50, help="每个榜单显示前 N 行")
|
|
32
|
+
ap.add_argument("--biz-only", action="store_true", help="只显示业务/第三方类")
|
|
33
|
+
args = ap.parse_args()
|
|
34
|
+
|
|
35
|
+
f = open(args.hprof, "rb")
|
|
36
|
+
|
|
37
|
+
# ---- header: 格式串(null 结尾) + id_size(u4) + timestamp(u8) ----
|
|
38
|
+
buf = bytearray()
|
|
39
|
+
while True:
|
|
40
|
+
c = f.read(1)
|
|
41
|
+
if c in (b"\x00", b""):
|
|
42
|
+
break
|
|
43
|
+
buf += c
|
|
44
|
+
fmt = bytes(buf).decode("ascii", "replace")
|
|
45
|
+
if not fmt.startswith("JAVA PROFILE"):
|
|
46
|
+
raise SystemExit(f"[err] {args.hprof} 不是 HPROF 堆转储(缺少 'JAVA PROFILE' 魔数)")
|
|
47
|
+
head = f.read(4)
|
|
48
|
+
if len(head) < 4:
|
|
49
|
+
raise SystemExit(f"[err] {args.hprof} 文件过短或损坏,无法读取 id_size")
|
|
50
|
+
id_size = struct.unpack(">I", head)[0]
|
|
51
|
+
f.read(8)
|
|
52
|
+
sys.stderr.write(f"[info] format={fmt!r} id_size={id_size}\n")
|
|
53
|
+
sys.stderr.flush()
|
|
54
|
+
|
|
55
|
+
ID = ">Q" if id_size == 8 else ">I"
|
|
56
|
+
TS = {2: id_size, 4: 1, 5: 2, 6: 4, 7: 8, 8: 1, 9: 2, 10: 4, 11: 8}
|
|
57
|
+
PRIM = {4: "boolean", 5: "char", 6: "float", 7: "double",
|
|
58
|
+
8: "byte", 9: "short", 10: "int", 11: "long"}
|
|
59
|
+
|
|
60
|
+
def rid(mv, off):
|
|
61
|
+
return struct.unpack_from(ID, mv, off)[0], off + id_size
|
|
62
|
+
|
|
63
|
+
strings = {}
|
|
64
|
+
loadclass = {}
|
|
65
|
+
inst_count = defaultdict(int)
|
|
66
|
+
inst_bytes = defaultdict(int)
|
|
67
|
+
objarr_count = defaultdict(int)
|
|
68
|
+
objarr_bytes = defaultdict(int)
|
|
69
|
+
primarr_count = defaultdict(int)
|
|
70
|
+
primarr_bytes = defaultdict(int)
|
|
71
|
+
total_objs = 0
|
|
72
|
+
|
|
73
|
+
def parse_class_dump(mv, off):
|
|
74
|
+
_, off = rid(mv, off) # class object id
|
|
75
|
+
off += 4 # stack trace serial
|
|
76
|
+
off += id_size * 6 # super, loader, signers, protdomain, res1, res2
|
|
77
|
+
off += 4 # instance size
|
|
78
|
+
cp = struct.unpack_from(">H", mv, off)[0]; off += 2
|
|
79
|
+
for _ in range(cp):
|
|
80
|
+
off += 2
|
|
81
|
+
t = mv[off]; off += 1
|
|
82
|
+
off += TS[t]
|
|
83
|
+
sf = struct.unpack_from(">H", mv, off)[0]; off += 2
|
|
84
|
+
for _ in range(sf):
|
|
85
|
+
off += id_size
|
|
86
|
+
t = mv[off]; off += 1
|
|
87
|
+
off += TS[t]
|
|
88
|
+
iff = struct.unpack_from(">H", mv, off)[0]; off += 2
|
|
89
|
+
off += iff * (id_size + 1)
|
|
90
|
+
return off
|
|
91
|
+
|
|
92
|
+
def parse_heap(body):
|
|
93
|
+
nonlocal total_objs
|
|
94
|
+
mv = memoryview(body)
|
|
95
|
+
n = len(mv)
|
|
96
|
+
off = 0
|
|
97
|
+
su = struct.unpack_from
|
|
98
|
+
while off < n:
|
|
99
|
+
sub = mv[off]; off += 1
|
|
100
|
+
if sub == 0x21: # INSTANCE DUMP
|
|
101
|
+
_, off = rid(mv, off); off += 4
|
|
102
|
+
cls_id, off = rid(mv, off)
|
|
103
|
+
nb = su(">I", mv, off)[0]; off += 4
|
|
104
|
+
off += nb
|
|
105
|
+
inst_count[cls_id] += 1
|
|
106
|
+
inst_bytes[cls_id] += 16 + nb
|
|
107
|
+
total_objs += 1
|
|
108
|
+
elif sub == 0x20: # CLASS DUMP
|
|
109
|
+
off = parse_class_dump(mv, off)
|
|
110
|
+
elif sub == 0x22: # OBJECT ARRAY DUMP
|
|
111
|
+
_, off = rid(mv, off); off += 4
|
|
112
|
+
num = su(">I", mv, off)[0]; off += 4
|
|
113
|
+
arr_cls, off = rid(mv, off)
|
|
114
|
+
off += num * id_size
|
|
115
|
+
objarr_count[arr_cls] += 1
|
|
116
|
+
objarr_bytes[arr_cls] += 16 + num * id_size
|
|
117
|
+
total_objs += 1
|
|
118
|
+
elif sub == 0x23: # PRIMITIVE ARRAY DUMP
|
|
119
|
+
_, off = rid(mv, off); off += 4
|
|
120
|
+
num = su(">I", mv, off)[0]; off += 4
|
|
121
|
+
et = mv[off]; off += 1
|
|
122
|
+
off += num * TS[et]
|
|
123
|
+
primarr_count[et] += 1
|
|
124
|
+
primarr_bytes[et] += 16 + num * TS[et]
|
|
125
|
+
total_objs += 1
|
|
126
|
+
elif sub == 0xFF: _, off = rid(mv, off)
|
|
127
|
+
elif sub == 0x01: _, off = rid(mv, off); _, off = rid(mv, off)
|
|
128
|
+
elif sub == 0x02: _, off = rid(mv, off); off += 8
|
|
129
|
+
elif sub == 0x03: _, off = rid(mv, off); off += 8
|
|
130
|
+
elif sub == 0x04: _, off = rid(mv, off); off += 4
|
|
131
|
+
elif sub == 0x05: _, off = rid(mv, off)
|
|
132
|
+
elif sub == 0x06: _, off = rid(mv, off); off += 4
|
|
133
|
+
elif sub == 0x07: _, off = rid(mv, off)
|
|
134
|
+
elif sub == 0x08: _, off = rid(mv, off); off += 8
|
|
135
|
+
else:
|
|
136
|
+
raise SystemExit(f"[err] unknown heap subrecord 0x{sub:02x} at off {off-1}")
|
|
137
|
+
|
|
138
|
+
read = f.read
|
|
139
|
+
seg = 0
|
|
140
|
+
while True:
|
|
141
|
+
hdr = read(9)
|
|
142
|
+
if len(hdr) < 9:
|
|
143
|
+
break
|
|
144
|
+
tag = hdr[0]
|
|
145
|
+
length = struct.unpack_from(">I", hdr, 5)[0]
|
|
146
|
+
if tag == 0x01: # STRING
|
|
147
|
+
body = read(length)
|
|
148
|
+
sid = struct.unpack_from(ID, body, 0)[0]
|
|
149
|
+
strings[sid] = bytes(body[id_size:]).decode("utf-8", "replace")
|
|
150
|
+
elif tag == 0x02: # LOAD CLASS
|
|
151
|
+
body = read(length)
|
|
152
|
+
off = 4
|
|
153
|
+
cls_id, off = rid(body, off)
|
|
154
|
+
off += 4
|
|
155
|
+
name_id, off = rid(body, off)
|
|
156
|
+
loadclass[cls_id] = name_id
|
|
157
|
+
elif tag in (0x0C, 0x1C): # HEAP DUMP / SEGMENT
|
|
158
|
+
body = read(length)
|
|
159
|
+
parse_heap(body)
|
|
160
|
+
seg += 1
|
|
161
|
+
sys.stderr.write(f"[info] heap segment #{seg} parsed, total_objs={total_objs:,}\n")
|
|
162
|
+
sys.stderr.flush()
|
|
163
|
+
else:
|
|
164
|
+
f.seek(length, 1)
|
|
165
|
+
f.close()
|
|
166
|
+
|
|
167
|
+
def cname(cls_id):
|
|
168
|
+
nid = loadclass.get(cls_id)
|
|
169
|
+
if nid is None:
|
|
170
|
+
return f"<class@{cls_id}>"
|
|
171
|
+
return strings.get(nid, f"<str@{nid}>").replace("/", ".")
|
|
172
|
+
|
|
173
|
+
rows = []
|
|
174
|
+
for cid, c in inst_count.items():
|
|
175
|
+
rows.append((cname(cid), c, inst_bytes[cid]))
|
|
176
|
+
for cid, c in objarr_count.items():
|
|
177
|
+
rows.append((cname(cid), c, objarr_bytes[cid]))
|
|
178
|
+
for et, c in primarr_count.items():
|
|
179
|
+
rows.append((f"{PRIM[et]}[]", c, primarr_bytes[et]))
|
|
180
|
+
|
|
181
|
+
total_bytes = sum(r[2] for r in rows)
|
|
182
|
+
out = []
|
|
183
|
+
out.append(f"format={fmt!r} id_size={id_size}")
|
|
184
|
+
out.append(f"total_objects={total_objs:,} total_shallow_bytes={total_bytes:,} "
|
|
185
|
+
f"({total_bytes/1024/1024/1024:.2f} GiB)")
|
|
186
|
+
|
|
187
|
+
def is_biz(name):
|
|
188
|
+
base = name.replace("[]", "").lstrip("[L").rstrip(";")
|
|
189
|
+
if base in ("int", "long", "short", "byte", "char",
|
|
190
|
+
"boolean", "float", "double"):
|
|
191
|
+
return False # 基本类型数组归 JDK,不算业务类
|
|
192
|
+
return not base.startswith(("java.", "javax.", "jdk.", "sun.",
|
|
193
|
+
"com.sun.", "jakarta.", "<"))
|
|
194
|
+
|
|
195
|
+
def section(title, key, biz):
|
|
196
|
+
out.append("\n" + "=" * 100)
|
|
197
|
+
out.append(title)
|
|
198
|
+
out.append("=" * 100)
|
|
199
|
+
out.append(f"{'shallow_MiB':>12} {'count':>14} class")
|
|
200
|
+
sel = [r for r in rows if (not biz or is_biz(r[0]))]
|
|
201
|
+
for name, c, b in sorted(sel, key=key, reverse=True)[:args.top]:
|
|
202
|
+
out.append(f"{b/1024/1024:12.1f} {c:14,} {name}")
|
|
203
|
+
|
|
204
|
+
if not args.biz_only:
|
|
205
|
+
section("TOP by SHALLOW SIZE (全部)", lambda r: r[2], False)
|
|
206
|
+
section("TOP by INSTANCE COUNT (全部)", lambda r: r[1], False)
|
|
207
|
+
section("TOP by SHALLOW SIZE (业务/第三方类,排除 JDK)", lambda r: r[2], True)
|
|
208
|
+
section("TOP by INSTANCE COUNT (业务/第三方类,排除 JDK)", lambda r: r[1], True)
|
|
209
|
+
|
|
210
|
+
print("\n".join(out))
|
|
211
|
+
print("\n[done]", flush=True)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
if __name__ == "__main__":
|
|
215
|
+
main()
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
inspect_objects.py — 精确实测某个类的实例字段 / 静态字段值,以及它持有的 Map 的真实条目数。
|
|
4
|
+
|
|
5
|
+
用于内存泄漏分析的"钉死根因"阶段:当反向追溯/MAT 指出某个 holder 的某个集合字段
|
|
6
|
+
是泄漏聚集点时,用本脚本读出确凿数字 —— 例如:
|
|
7
|
+
- 某 ConcurrentHashMap 字段实际装了多少条目(容量 / 非空桶)
|
|
8
|
+
- 某配置对象的关键字段实际值(如心跳/超时配置是否被改坏)
|
|
9
|
+
- 某工具类的 *静态* 缓存 Map 实际有多大(静态字段是 GC-root 级持有,最常见的泄漏点之一)
|
|
10
|
+
|
|
11
|
+
四种模式:
|
|
12
|
+
1) 列字段(默认): 只给 --class,打印该类的全部实例字段与静态字段(名/类型),
|
|
13
|
+
帮助你决定接下来读哪个字段。
|
|
14
|
+
2) 读实例字段 --fields a,b,c:
|
|
15
|
+
打印该类每个实例上这些字段的实际值(int/long/bool/ref…)。
|
|
16
|
+
3) 读静态字段 --static-fields a,b,c:
|
|
17
|
+
打印该类这些 *静态* 字段的值;若静态字段指向 HashMap/ConcurrentHashMap,
|
|
18
|
+
一并实测其条目数。适合排查静态缓存/注册表泄漏。
|
|
19
|
+
4) 测实例 Map --map-fields x,y:
|
|
20
|
+
把这些实例字段当 java.util.HashMap / ConcurrentHashMap,
|
|
21
|
+
输出 size 字段值、table 容量、非空桶数(条目数可靠下界)。
|
|
22
|
+
|
|
23
|
+
用法:
|
|
24
|
+
python3 inspect_objects.py <dump.hprof> --class <holder-class>
|
|
25
|
+
python3 inspect_objects.py <dump.hprof> --class <holder-class> --fields f1,f2
|
|
26
|
+
python3 inspect_objects.py <dump.hprof> --class <holder-class> --static-fields s1,s2
|
|
27
|
+
python3 inspect_objects.py <dump.hprof> --class <holder-class> --map-fields m1,m2
|
|
28
|
+
[--limit N] 最多打印前 N 个实例(默认 20)
|
|
29
|
+
|
|
30
|
+
注意:
|
|
31
|
+
ConcurrentHashMap 的 baseCount/size 在高并发下可能偏小(计数分散到 counterCells),
|
|
32
|
+
本脚本同时给出"非空桶数",它是条目数更可靠的下界;当 size 字段与非空桶量级差 >10×
|
|
33
|
+
时会主动追加 ⚠️ 提示。结合 table 容量(2 的幂)即可判断真实规模量级。
|
|
34
|
+
"""
|
|
35
|
+
import sys
|
|
36
|
+
import struct
|
|
37
|
+
import argparse
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def main():
|
|
41
|
+
ap = argparse.ArgumentParser(description="HPROF 对象字段 / 静态字段 / Map 条目实测")
|
|
42
|
+
ap.add_argument("hprof")
|
|
43
|
+
ap.add_argument("--class", dest="cls", required=True, help="holder 类全名(点或斜杠)")
|
|
44
|
+
ap.add_argument("--fields", default="", help="逗号分隔的实例字段名,读其实际值")
|
|
45
|
+
ap.add_argument("--static-fields", dest="static_fields", default="",
|
|
46
|
+
help="逗号分隔的静态字段名,读其值(指向 Map 则一并测条目数)")
|
|
47
|
+
ap.add_argument("--map-fields", dest="map_fields", default="",
|
|
48
|
+
help="逗号分隔的实例字段名,按 Map 实测条目数")
|
|
49
|
+
ap.add_argument("--limit", type=int, default=20, help="最多打印实例数")
|
|
50
|
+
args = ap.parse_args()
|
|
51
|
+
|
|
52
|
+
path = args.hprof
|
|
53
|
+
holder_name = args.cls.replace(".", "/")
|
|
54
|
+
want_fields = [s.strip() for s in args.fields.split(",") if s.strip()]
|
|
55
|
+
want_static = [s.strip() for s in args.static_fields.split(",") if s.strip()]
|
|
56
|
+
want_maps = [s.strip() for s in args.map_fields.split(",") if s.strip()]
|
|
57
|
+
|
|
58
|
+
def read_header(f):
|
|
59
|
+
b = bytearray()
|
|
60
|
+
while True:
|
|
61
|
+
c = f.read(1)
|
|
62
|
+
if c in (b"\x00", b""):
|
|
63
|
+
break
|
|
64
|
+
b += c
|
|
65
|
+
if not bytes(b).startswith(b"JAVA PROFILE"):
|
|
66
|
+
raise SystemExit(f"[err] {path} 不是 HPROF 堆转储(缺少 'JAVA PROFILE' 魔数)")
|
|
67
|
+
head = f.read(4)
|
|
68
|
+
if len(head) < 4:
|
|
69
|
+
raise SystemExit(f"[err] {path} 文件过短或损坏,无法读取 id_size")
|
|
70
|
+
ids = struct.unpack(">I", head)[0]
|
|
71
|
+
f.read(8)
|
|
72
|
+
return ids
|
|
73
|
+
|
|
74
|
+
with open(path, "rb") as f0:
|
|
75
|
+
id_size = read_header(f0)
|
|
76
|
+
ID = ">Q" if id_size == 8 else ">I"
|
|
77
|
+
TS = {2: id_size, 4: 1, 5: 2, 6: 4, 7: 8, 8: 1, 9: 2, 10: 4, 11: 8}
|
|
78
|
+
TNAME = {2: "ref", 4: "boolean", 5: "char", 6: "float", 7: "double",
|
|
79
|
+
8: "byte", 9: "short", 10: "int", 11: "long"}
|
|
80
|
+
|
|
81
|
+
def rid(mv, off):
|
|
82
|
+
return struct.unpack_from(ID, mv, off)[0], off + id_size
|
|
83
|
+
|
|
84
|
+
def read_val(mv, base, off, t):
|
|
85
|
+
"""读 base+off 处一个 type=t 的值。"""
|
|
86
|
+
p = base + off
|
|
87
|
+
if t == 2:
|
|
88
|
+
return struct.unpack_from(ID, mv, p)[0]
|
|
89
|
+
if t == 10:
|
|
90
|
+
return struct.unpack_from(">i", mv, p)[0]
|
|
91
|
+
if t == 11:
|
|
92
|
+
return struct.unpack_from(">q", mv, p)[0]
|
|
93
|
+
if t == 4:
|
|
94
|
+
return bool(mv[p])
|
|
95
|
+
if t == 9:
|
|
96
|
+
return struct.unpack_from(">h", mv, p)[0]
|
|
97
|
+
if t == 5:
|
|
98
|
+
return struct.unpack_from(">H", mv, p)[0]
|
|
99
|
+
if t == 8:
|
|
100
|
+
return struct.unpack_from(">b", mv, p)[0]
|
|
101
|
+
if t == 6:
|
|
102
|
+
return struct.unpack_from(">f", mv, p)[0]
|
|
103
|
+
if t == 7:
|
|
104
|
+
return struct.unpack_from(">d", mv, p)[0]
|
|
105
|
+
return "?"
|
|
106
|
+
|
|
107
|
+
strings = {}
|
|
108
|
+
loadclass = {}
|
|
109
|
+
class_super = {}
|
|
110
|
+
class_ifields = {}
|
|
111
|
+
class_statics = {} # cid -> [(name_id, type, value), ...]
|
|
112
|
+
|
|
113
|
+
def parse_cd(mv, off, collect):
|
|
114
|
+
cid, off = rid(mv, off); off += 4
|
|
115
|
+
sup, off = rid(mv, off); off += id_size * 5; off += 4
|
|
116
|
+
cp = struct.unpack_from(">H", mv, off)[0]; off += 2
|
|
117
|
+
for _ in range(cp):
|
|
118
|
+
off += 2; t = mv[off]; off += 1; off += TS[t]
|
|
119
|
+
sf = struct.unpack_from(">H", mv, off)[0]; off += 2
|
|
120
|
+
srefs = []
|
|
121
|
+
for _ in range(sf):
|
|
122
|
+
nid, off = rid(mv, off); t = mv[off]; off += 1
|
|
123
|
+
v = read_val(mv, 0, off, t)
|
|
124
|
+
off += id_size if t == 2 else TS[t]
|
|
125
|
+
srefs.append((nid, t, v))
|
|
126
|
+
iff = struct.unpack_from(">H", mv, off)[0]; off += 2
|
|
127
|
+
ifs = []
|
|
128
|
+
for _ in range(iff):
|
|
129
|
+
nid, off = rid(mv, off); t = mv[off]; off += 1
|
|
130
|
+
ifs.append((t, nid))
|
|
131
|
+
if collect:
|
|
132
|
+
class_super[cid] = sup
|
|
133
|
+
class_ifields[cid] = ifs
|
|
134
|
+
if srefs:
|
|
135
|
+
class_statics[cid] = srefs
|
|
136
|
+
return off, cid
|
|
137
|
+
|
|
138
|
+
ROOT_FIX = {0xFF: 0, 0x05: 0, 0x07: 0, 0x02: 8, 0x03: 8, 0x08: 8, 0x04: 4, 0x06: 4}
|
|
139
|
+
|
|
140
|
+
def skip_root(mv, off, sub):
|
|
141
|
+
_, off = rid(mv, off)
|
|
142
|
+
if sub == 0x01:
|
|
143
|
+
_, off = rid(mv, off)
|
|
144
|
+
else:
|
|
145
|
+
off += ROOT_FIX[sub]
|
|
146
|
+
return off
|
|
147
|
+
|
|
148
|
+
def walk(collect_classes, instance_cb, array_cb, label=""):
|
|
149
|
+
f = open(path, "rb"); read_header(f); read = f.read
|
|
150
|
+
seg = 0
|
|
151
|
+
while True:
|
|
152
|
+
h = read(9)
|
|
153
|
+
if len(h) < 9:
|
|
154
|
+
break
|
|
155
|
+
tag = h[0]; L = struct.unpack_from(">I", h, 5)[0]
|
|
156
|
+
if tag == 0x01:
|
|
157
|
+
b = read(L); sid = struct.unpack_from(ID, b, 0)[0]
|
|
158
|
+
strings[sid] = bytes(b[id_size:]).decode("utf-8", "replace")
|
|
159
|
+
elif tag == 0x02:
|
|
160
|
+
b = read(L); off = 4; cid, off = rid(b, off); off += 4
|
|
161
|
+
nid, off = rid(b, off); loadclass[cid] = nid
|
|
162
|
+
elif tag in (0x0C, 0x1C):
|
|
163
|
+
b = read(L); mv = memoryview(b); n = len(mv); off = 0
|
|
164
|
+
while off < n:
|
|
165
|
+
s = mv[off]; off += 1
|
|
166
|
+
if s == 0x21:
|
|
167
|
+
oid, off = rid(mv, off); off += 4
|
|
168
|
+
ccid, off = rid(mv, off)
|
|
169
|
+
nb = struct.unpack_from(">I", mv, off)[0]; off += 4
|
|
170
|
+
d = off; off += nb
|
|
171
|
+
if instance_cb:
|
|
172
|
+
instance_cb(oid, ccid, mv, d)
|
|
173
|
+
elif s == 0x20:
|
|
174
|
+
off, _ = parse_cd(mv, off, collect_classes)
|
|
175
|
+
elif s == 0x22:
|
|
176
|
+
oid, off = rid(mv, off); off += 4
|
|
177
|
+
num = struct.unpack_from(">I", mv, off)[0]; off += 4
|
|
178
|
+
acid, off = rid(mv, off)
|
|
179
|
+
if array_cb:
|
|
180
|
+
array_cb(oid, acid, num, mv, off)
|
|
181
|
+
off += num * id_size
|
|
182
|
+
elif s == 0x23:
|
|
183
|
+
_, off = rid(mv, off); off += 4
|
|
184
|
+
num = struct.unpack_from(">I", mv, off)[0]; off += 4
|
|
185
|
+
et = mv[off]; off += 1; off += num * TS[et]
|
|
186
|
+
else:
|
|
187
|
+
off = skip_root(mv, off, s)
|
|
188
|
+
seg += 1
|
|
189
|
+
if label:
|
|
190
|
+
sys.stderr.write(f"[{label}] heap segment #{seg} scanned\n")
|
|
191
|
+
sys.stderr.flush()
|
|
192
|
+
else:
|
|
193
|
+
f.seek(L, 1)
|
|
194
|
+
f.close()
|
|
195
|
+
|
|
196
|
+
# ---------- Pass A: 类布局 + 静态字段 ----------
|
|
197
|
+
sys.stderr.write("[passA] 扫描类元数据...\n"); sys.stderr.flush()
|
|
198
|
+
walk(True, None, None)
|
|
199
|
+
|
|
200
|
+
def cid_by_name(name):
|
|
201
|
+
name = name.replace(".", "/")
|
|
202
|
+
for cid, nid in loadclass.items():
|
|
203
|
+
if strings.get(nid) == name:
|
|
204
|
+
return cid
|
|
205
|
+
return None
|
|
206
|
+
|
|
207
|
+
def fname(nid):
|
|
208
|
+
return strings.get(nid, f"<f@{nid}>")
|
|
209
|
+
|
|
210
|
+
def layout(cls_id):
|
|
211
|
+
res = {}
|
|
212
|
+
off = 0
|
|
213
|
+
cid = cls_id
|
|
214
|
+
while cid in class_ifields:
|
|
215
|
+
for (t, nid) in class_ifields[cid]:
|
|
216
|
+
nm = strings.get(nid, "?")
|
|
217
|
+
if nm not in res:
|
|
218
|
+
res[nm] = (off, t)
|
|
219
|
+
off += id_size if t == 2 else TS[t]
|
|
220
|
+
cid = class_super.get(cid, 0)
|
|
221
|
+
return res
|
|
222
|
+
|
|
223
|
+
HOLDER = cid_by_name(holder_name)
|
|
224
|
+
if HOLDER is None:
|
|
225
|
+
raise SystemExit(f"[err] 未找到类 {holder_name}")
|
|
226
|
+
hl = layout(HOLDER)
|
|
227
|
+
statics = {fname(nid): (t, v) for (nid, t, v) in class_statics.get(HOLDER, [])}
|
|
228
|
+
|
|
229
|
+
# 模式 1:仅列字段(实例 + 静态)
|
|
230
|
+
if not want_fields and not want_static and not want_maps:
|
|
231
|
+
print(f"[class] {holder_name}")
|
|
232
|
+
print(" 实例字段 (name : type @offset):")
|
|
233
|
+
for nm, (o, t) in sorted(hl.items(), key=lambda x: x[1][0]):
|
|
234
|
+
print(f" {nm:30s} : {TNAME.get(t, t):8s} @{o}")
|
|
235
|
+
if statics:
|
|
236
|
+
print(" 静态字段 (name : type):")
|
|
237
|
+
for nm, (t, v) in statics.items():
|
|
238
|
+
vs = hex(v) if t == 2 and isinstance(v, int) else v
|
|
239
|
+
print(f" {nm:30s} : {TNAME.get(t, t):8s} = {vs}")
|
|
240
|
+
print("\n用 --fields 读实例字段,--static-fields 读静态字段,--map-fields 测实例 Map。")
|
|
241
|
+
return
|
|
242
|
+
|
|
243
|
+
# Map 类布局
|
|
244
|
+
CHM = cid_by_name("java/util/concurrent/ConcurrentHashMap")
|
|
245
|
+
HM = cid_by_name("java/util/HashMap")
|
|
246
|
+
chm_l = layout(CHM) if CHM else {}
|
|
247
|
+
hm_l = layout(HM) if HM else {}
|
|
248
|
+
NODE_CHM = cid_by_name("[Ljava/util/concurrent/ConcurrentHashMap$Node;")
|
|
249
|
+
NODE_HM = cid_by_name("[Ljava/util/HashMap$Node;")
|
|
250
|
+
|
|
251
|
+
holder_rows = [] # (oid, {field: (value, tname)}, {mapfield: map_id})
|
|
252
|
+
map_info = {} # map_id -> (size_or_baseCount, table_id)
|
|
253
|
+
arr_info = {} # nodearr_id -> (capacity, nonnull)
|
|
254
|
+
need_passB = bool(want_fields or want_maps) or any(
|
|
255
|
+
statics.get(s, (None, None))[0] == 2 for s in want_static)
|
|
256
|
+
|
|
257
|
+
def inst_cb(oid, ccid, mv, d):
|
|
258
|
+
if ccid == HOLDER and (want_fields or want_maps):
|
|
259
|
+
fv = {}
|
|
260
|
+
for fn in want_fields:
|
|
261
|
+
if fn in hl:
|
|
262
|
+
o, t = hl[fn]
|
|
263
|
+
fv[fn] = (read_val(mv, d, o, t), TNAME.get(t, t))
|
|
264
|
+
else:
|
|
265
|
+
fv[fn] = ("<无此字段>", "")
|
|
266
|
+
mids = {}
|
|
267
|
+
for fn in want_maps:
|
|
268
|
+
mids[fn] = struct.unpack_from(ID, mv, d + hl[fn][0])[0] if fn in hl else 0
|
|
269
|
+
holder_rows.append((oid, fv, mids))
|
|
270
|
+
elif ccid == CHM:
|
|
271
|
+
bc = struct.unpack_from(">q", mv, d + chm_l["baseCount"][0])[0] if "baseCount" in chm_l else -1
|
|
272
|
+
tid = struct.unpack_from(ID, mv, d + chm_l["table"][0])[0] if "table" in chm_l else 0
|
|
273
|
+
map_info[oid] = (bc, tid)
|
|
274
|
+
elif ccid == HM:
|
|
275
|
+
sz = struct.unpack_from(">i", mv, d + hm_l["size"][0])[0] if "size" in hm_l else -1
|
|
276
|
+
tid = struct.unpack_from(ID, mv, d + hm_l["table"][0])[0] if "table" in hm_l else 0
|
|
277
|
+
map_info[oid] = (sz, tid)
|
|
278
|
+
|
|
279
|
+
def arr_cb(oid, acid, num, mv, off):
|
|
280
|
+
if acid in (NODE_CHM, NODE_HM):
|
|
281
|
+
if num:
|
|
282
|
+
elems = struct.unpack_from(">%d%s" % (num, ID[1]), mv, off)
|
|
283
|
+
nn = sum(1 for e in elems if e != 0)
|
|
284
|
+
else:
|
|
285
|
+
nn = 0
|
|
286
|
+
arr_info[oid] = (num, nn)
|
|
287
|
+
|
|
288
|
+
if need_passB:
|
|
289
|
+
sys.stderr.write("[passB] 扫描实例 / Map / 数组...\n"); sys.stderr.flush()
|
|
290
|
+
walk(False, inst_cb, arr_cb)
|
|
291
|
+
|
|
292
|
+
def report_map(label, mid):
|
|
293
|
+
if not mid:
|
|
294
|
+
print(f" {label} = <null>"); return
|
|
295
|
+
info = map_info.get(mid)
|
|
296
|
+
if not info:
|
|
297
|
+
print(f" {label} = {hex(mid)} (非 HashMap/ConcurrentHashMap 或未解析)"); return
|
|
298
|
+
szc, tid = info
|
|
299
|
+
cap, nn = arr_info.get(tid, ("?", "?"))
|
|
300
|
+
warn = ""
|
|
301
|
+
if isinstance(nn, int) and szc >= 0 and nn > szc * 10:
|
|
302
|
+
warn = " ⚠️ size 字段疑似失真(并发计数分散到 counterCells),以非空桶/容量为准"
|
|
303
|
+
print(f" {label}: size字段={szc:,} table容量={cap} 非空桶={nn}{warn}")
|
|
304
|
+
|
|
305
|
+
# 模式 3:静态字段
|
|
306
|
+
if want_static:
|
|
307
|
+
print(f"[class] {holder_name} 静态字段:")
|
|
308
|
+
for sn in want_static:
|
|
309
|
+
if sn not in statics:
|
|
310
|
+
print(f" {sn} = <无此静态字段>"); continue
|
|
311
|
+
t, v = statics[sn]
|
|
312
|
+
if t == 2:
|
|
313
|
+
report_map(sn, v)
|
|
314
|
+
else:
|
|
315
|
+
print(f" {sn} = {v} ({TNAME.get(t, t)})")
|
|
316
|
+
print()
|
|
317
|
+
|
|
318
|
+
# 模式 2/4:实例字段 / 实例 Map
|
|
319
|
+
if want_fields or want_maps:
|
|
320
|
+
print(f"[class] {holder_name} 实例数 = {len(holder_rows)}\n")
|
|
321
|
+
for i, (oid, fv, mids) in enumerate(holder_rows[:args.limit]):
|
|
322
|
+
print(f"实例#{i} @ {hex(oid)}")
|
|
323
|
+
for fn, (v, t) in fv.items():
|
|
324
|
+
vs = hex(v) if t == "ref" and isinstance(v, int) else v
|
|
325
|
+
print(f" [field] {fn} = {vs} ({t})")
|
|
326
|
+
for fn, mid in mids.items():
|
|
327
|
+
report_map(f"[map] {fn}", mid)
|
|
328
|
+
print()
|
|
329
|
+
if len(holder_rows) > args.limit:
|
|
330
|
+
print(f"... 其余 {len(holder_rows) - args.limit} 个实例已省略(--limit 调整)")
|
|
331
|
+
print("[done]")
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
if __name__ == "__main__":
|
|
335
|
+
main()
|