cppgolf 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {cppgolf-0.1.0 → cppgolf-0.1.2}/PKG-INFO +2 -3
- {cppgolf-0.1.0 → cppgolf-0.1.2}/cppgolf/__main__.py +3 -3
- cppgolf-0.1.2/cppgolf/golf_rename.py +410 -0
- {cppgolf-0.1.0 → cppgolf-0.1.2}/cppgolf/merge.py +25 -6
- {cppgolf-0.1.0 → cppgolf-0.1.2}/cppgolf/transforms.py +27 -16
- {cppgolf-0.1.0 → cppgolf-0.1.2}/cppgolf.egg-info/PKG-INFO +2 -3
- cppgolf-0.1.2/cppgolf.egg-info/requires.txt +6 -0
- {cppgolf-0.1.0 → cppgolf-0.1.2}/pyproject.toml +2 -3
- cppgolf-0.1.0/cppgolf/golf_rename.py +0 -452
- cppgolf-0.1.0/cppgolf.egg-info/requires.txt +0 -7
- {cppgolf-0.1.0 → cppgolf-0.1.2}/README.md +0 -0
- {cppgolf-0.1.0 → cppgolf-0.1.2}/cppgolf/__init__.py +0 -0
- {cppgolf-0.1.0 → cppgolf-0.1.2}/cppgolf/strip_comments.py +0 -0
- {cppgolf-0.1.0 → cppgolf-0.1.2}/cppgolf/whitespace.py +0 -0
- {cppgolf-0.1.0 → cppgolf-0.1.2}/cppgolf.egg-info/SOURCES.txt +0 -0
- {cppgolf-0.1.0 → cppgolf-0.1.2}/cppgolf.egg-info/dependency_links.txt +0 -0
- {cppgolf-0.1.0 → cppgolf-0.1.2}/cppgolf.egg-info/entry_points.txt +0 -0
- {cppgolf-0.1.0 → cppgolf-0.1.2}/cppgolf.egg-info/top_level.txt +0 -0
- {cppgolf-0.1.0 → cppgolf-0.1.2}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cppgolf
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: C++ multi-file merge & code golf / minifier tool
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://github.com/yourname/cppgolf
|
|
@@ -18,8 +18,7 @@ Classifier: Topic :: Software Development :: Code Generators
|
|
|
18
18
|
Classifier: Topic :: Text Processing :: Filters
|
|
19
19
|
Requires-Python: >=3.10
|
|
20
20
|
Description-Content-Type: text/markdown
|
|
21
|
-
Requires-Dist:
|
|
22
|
-
Requires-Dist: tree-sitter-cpp>=0.23
|
|
21
|
+
Requires-Dist: libclang
|
|
23
22
|
Provides-Extra: dev
|
|
24
23
|
Requires-Dist: build; extra == "dev"
|
|
25
24
|
Requires-Dist: twine; extra == "dev"
|
|
@@ -96,8 +96,8 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
96
96
|
help='单语句 if/for/while 去花括号')
|
|
97
97
|
g2.add_argument('--shortcuts', dest='define_shortcuts', action='store_true',
|
|
98
98
|
help='高频 cout/cin 用 #define 缩写')
|
|
99
|
-
g2.add_argument('
|
|
100
|
-
help='
|
|
99
|
+
g2.add_argument('-no-rename', dest='no_rename_symbols', action='store_true',
|
|
100
|
+
help='不将用户变量/成员名压缩为短名(需要 tree-sitter-cpp)')
|
|
101
101
|
|
|
102
102
|
p.add_argument('--stats', action='store_true', help='显示压缩率统计')
|
|
103
103
|
return p
|
|
@@ -124,7 +124,7 @@ def main():
|
|
|
124
124
|
keep_inline=args.keep_inline,
|
|
125
125
|
aggressive=args.aggressive,
|
|
126
126
|
define_shortcuts=args.define_shortcuts,
|
|
127
|
-
rename_symbols=args.
|
|
127
|
+
rename_symbols=not(args.no_rename_symbols),
|
|
128
128
|
)
|
|
129
129
|
|
|
130
130
|
def print_stats(final_size: int):
|
|
@@ -0,0 +1,410 @@
|
|
|
1
|
+
"""
|
|
2
|
+
golf_rename.py — Pass 5: 符号名压缩(libclang AST 驱动)
|
|
3
|
+
|
|
4
|
+
依赖: pip install libclang
|
|
5
|
+
"""
|
|
6
|
+
import re
|
|
7
|
+
import itertools
|
|
8
|
+
import tempfile
|
|
9
|
+
import os
|
|
10
|
+
import sys as _sys
|
|
11
|
+
import struct as _struct
|
|
12
|
+
|
|
13
|
+
_MIN_RENAME_LEN = 2
|
|
14
|
+
|
|
15
|
+
# C/C++ 保留关键字,生成短名时不得使用
|
|
16
|
+
_CXX_KEYWORDS = frozenset({
|
|
17
|
+
# C keywords
|
|
18
|
+
'auto', 'break', 'case', 'char', 'const', 'continue', 'default',
|
|
19
|
+
'do', 'double', 'else', 'enum', 'extern', 'float', 'for', 'goto',
|
|
20
|
+
'if', 'inline', 'int', 'long', 'register', 'restrict', 'return',
|
|
21
|
+
'short', 'signed', 'sizeof', 'static', 'struct', 'switch', 'typedef',
|
|
22
|
+
'union', 'unsigned', 'void', 'volatile', 'while',
|
|
23
|
+
# C++ keywords
|
|
24
|
+
'alignas', 'alignof', 'and', 'and_eq', 'asm', 'bitand', 'bitor',
|
|
25
|
+
'bool', 'catch', 'class', 'compl', 'concept', 'consteval', 'constexpr',
|
|
26
|
+
'constinit', 'co_await', 'co_return', 'co_yield', 'decltype', 'delete',
|
|
27
|
+
'explicit', 'export', 'false', 'friend', 'mutable', 'namespace',
|
|
28
|
+
'new', 'noexcept', 'not', 'not_eq', 'nullptr', 'operator', 'or',
|
|
29
|
+
'or_eq', 'private', 'protected', 'public', 'requires', 'static_assert',
|
|
30
|
+
'static_cast', 'dynamic_cast', 'reinterpret_cast', 'const_cast',
|
|
31
|
+
'template', 'this', 'thread_local', 'throw', 'true', 'try', 'typeid',
|
|
32
|
+
'typename', 'using', 'virtual', 'wchar_t', 'xor', 'xor_eq',
|
|
33
|
+
# 常用宏 / 内置名
|
|
34
|
+
'NULL', 'TRUE', 'FALSE', 'EOF', 'stdin', 'stdout', 'stderr',
|
|
35
|
+
})
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _gen_short_names():
|
|
39
|
+
for length in itertools.count(1):
|
|
40
|
+
for combo in itertools.product('abcdefghijklmnopqrstuvwxyz', repeat=length):
|
|
41
|
+
yield ''.join(combo)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _make_platform_args() -> list:
|
|
45
|
+
"""返回当前平台所需的 libclang 预处理宏参数列表。
|
|
46
|
+
|
|
47
|
+
- Windows: 注入 _WIN32/WIN32/_WIN64/WIN64,以及 _HAS_STD_BYTE=0(避免
|
|
48
|
+
MSVC STL std::byte 与 Windows 头文件中全局 byte typedef 冲突)和
|
|
49
|
+
WIN32_LEAN_AND_MEAN(减少头文件噪音)。
|
|
50
|
+
- Linux / macOS: 注入对应平台宏。
|
|
51
|
+
"""
|
|
52
|
+
args: list = []
|
|
53
|
+
if _sys.platform == 'win32' or os.name == 'nt':
|
|
54
|
+
args += ['-D_WIN32', '-DWIN32']
|
|
55
|
+
if _struct.calcsize('P') == 8:
|
|
56
|
+
args += ['-D_WIN64', '-DWIN64']
|
|
57
|
+
args += ['-D_HAS_STD_BYTE=0', '-DWIN32_LEAN_AND_MEAN']
|
|
58
|
+
elif _sys.platform.startswith('linux'):
|
|
59
|
+
args += ['-D__linux__', '-D__unix__', '-DLINUX']
|
|
60
|
+
elif _sys.platform == 'darwin':
|
|
61
|
+
args += ['-D__APPLE__', '-D__unix__', '-D__MACH__']
|
|
62
|
+
return args
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _is_user_file(cursor, tmppath: str) -> bool:
|
|
66
|
+
"""判断 cursor 的定义位置是否属于用户临时文件(即待处理的源码),
|
|
67
|
+
而非系统/第三方头文件。用于过滤系统结构体字段等不应被重命名的符号。"""
|
|
68
|
+
loc = cursor.location
|
|
69
|
+
if not loc.file:
|
|
70
|
+
return False
|
|
71
|
+
try:
|
|
72
|
+
return os.path.samefile(loc.file.name, tmppath)
|
|
73
|
+
except OSError:
|
|
74
|
+
return False
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _walk_ast(
|
|
78
|
+
cursor,
|
|
79
|
+
tmppath: str,
|
|
80
|
+
src_bytes: bytes,
|
|
81
|
+
decl_map: dict,
|
|
82
|
+
replacements: list,
|
|
83
|
+
decl_kinds: frozenset,
|
|
84
|
+
ref_kinds: frozenset,
|
|
85
|
+
) -> None:
|
|
86
|
+
"""递归遍历 AST,收集需要重命名的符号声明位(decl_map)和所有引用位(replacements)。
|
|
87
|
+
|
|
88
|
+
参数:
|
|
89
|
+
decl_kinds — VAR_DECL / FIELD_DECL / PARM_DECL 等声明节点类型集合
|
|
90
|
+
ref_kinds — MEMBER_REF_EXPR / DECL_REF_EXPR / MEMBER_REF 等引用节点类型集合
|
|
91
|
+
decl_map — USR → (orig_name, first_decl_offset, name_byte_len),原地追加
|
|
92
|
+
replacements — (offset, byte_len, usr) 三元组列表,原地追加
|
|
93
|
+
"""
|
|
94
|
+
if cursor.kind.is_invalid():
|
|
95
|
+
return
|
|
96
|
+
if _is_user_file(cursor, tmppath):
|
|
97
|
+
kind = cursor.kind
|
|
98
|
+
if kind in decl_kinds:
|
|
99
|
+
name = cursor.spelling
|
|
100
|
+
if len(name) >= _MIN_RENAME_LEN:
|
|
101
|
+
usr = cursor.get_usr()
|
|
102
|
+
if usr:
|
|
103
|
+
off = cursor.location.offset
|
|
104
|
+
blen = len(name.encode('utf-8'))
|
|
105
|
+
# 跳过 offset 与源码不匹配的(宏展开内参数等)
|
|
106
|
+
if src_bytes[off:off + blen] == name.encode('utf-8'):
|
|
107
|
+
if usr not in decl_map:
|
|
108
|
+
decl_map[usr] = (name, off, blen)
|
|
109
|
+
replacements.append((off, blen, usr))
|
|
110
|
+
elif kind in ref_kinds:
|
|
111
|
+
ref = cursor.referenced
|
|
112
|
+
if ref and ref.kind in decl_kinds:
|
|
113
|
+
usr = ref.get_usr()
|
|
114
|
+
name = cursor.spelling
|
|
115
|
+
if usr and len(name) >= _MIN_RENAME_LEN:
|
|
116
|
+
off = cursor.location.offset
|
|
117
|
+
blen = len(name.encode('utf-8'))
|
|
118
|
+
# 跳过 offset 与源码不匹配的(宏展开内引用等)
|
|
119
|
+
if src_bytes[off:off + blen] == name.encode('utf-8'):
|
|
120
|
+
replacements.append((off, blen, usr))
|
|
121
|
+
for child in cursor.get_children():
|
|
122
|
+
_walk_ast(child, tmppath, src_bytes, decl_map, replacements, decl_kinds, ref_kinds)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def _scan_tokens(
|
|
126
|
+
tu,
|
|
127
|
+
tmppath: str,
|
|
128
|
+
src_bytes: bytes,
|
|
129
|
+
decl_map: dict,
|
|
130
|
+
decl_kinds: frozenset,
|
|
131
|
+
ref_kinds: frozenset,
|
|
132
|
+
ci,
|
|
133
|
+
) -> list:
|
|
134
|
+
"""Token 扫描补全 pass:修正 AST walk 因宏展开 offset 错位而漏掉的符号位置。
|
|
135
|
+
|
|
136
|
+
libclang 的 AST cursor 对宏参数的 location.offset 指向宏调用起始而非参数本身,
|
|
137
|
+
导致 offset 校验失败、符号未进入 decl_map / replacements。
|
|
138
|
+
token.location 是真实文本位置,此 pass 在 rename_map 建好前先收集候选。
|
|
139
|
+
|
|
140
|
+
参数:
|
|
141
|
+
decl_map — 可能被原地追加(补入宏内漏掉的 DECL)
|
|
142
|
+
ci — clang.cindex 模块(用于访问 CursorKind.DECL_STMT 等)
|
|
143
|
+
|
|
144
|
+
返回:
|
|
145
|
+
token_candidates 列表,元素为
|
|
146
|
+
(offset: int, byte_len: int, tok_name: str, usr: str|None, is_member_access: bool)
|
|
147
|
+
- usr=None 表示本 token 暂未匹配到已知 USR,留给后续策略2/3处理
|
|
148
|
+
- is_member_access=True 表示该 token 前紧跟 . 或 ->,策略2/3 须跳过
|
|
149
|
+
"""
|
|
150
|
+
token_candidates: list = []
|
|
151
|
+
prev_tok_spelling = ''
|
|
152
|
+
for token in tu.get_tokens(extent=tu.cursor.extent):
|
|
153
|
+
if token.kind.name != 'IDENTIFIER':
|
|
154
|
+
prev_tok_spelling = token.spelling # 跟踪 . 和 -> 等标点符号
|
|
155
|
+
continue
|
|
156
|
+
loc = token.location
|
|
157
|
+
if not loc.file:
|
|
158
|
+
prev_tok_spelling = token.spelling
|
|
159
|
+
continue
|
|
160
|
+
try:
|
|
161
|
+
if not os.path.samefile(loc.file.name, tmppath):
|
|
162
|
+
prev_tok_spelling = token.spelling
|
|
163
|
+
continue
|
|
164
|
+
except OSError:
|
|
165
|
+
prev_tok_spelling = token.spelling
|
|
166
|
+
continue
|
|
167
|
+
off = loc.offset
|
|
168
|
+
tok_name = token.spelling
|
|
169
|
+
# 判断是否是成员访问(前面紧跟 . 或 ->),用于限制名字回退策略的误用范围
|
|
170
|
+
is_member_access = prev_tok_spelling in ('.', '->')
|
|
171
|
+
prev_tok_spelling = tok_name
|
|
172
|
+
if len(tok_name) < _MIN_RENAME_LEN:
|
|
173
|
+
continue
|
|
174
|
+
blen = len(tok_name.encode('utf-8'))
|
|
175
|
+
if src_bytes[off:off + blen] != tok_name.encode('utf-8'):
|
|
176
|
+
continue
|
|
177
|
+
cur = token.cursor
|
|
178
|
+
usr = None
|
|
179
|
+
# 策略1:cursor 精确匹配(无宏展开偏移问题时走这里)
|
|
180
|
+
if cur.kind in decl_kinds and cur.spelling == tok_name:
|
|
181
|
+
usr = cur.get_usr()
|
|
182
|
+
# AST walk 因 offset 校验失败而漏掉的 DECL,在此补入 decl_map
|
|
183
|
+
if usr and usr not in decl_map:
|
|
184
|
+
decl_map[usr] = (tok_name, off, blen)
|
|
185
|
+
# 策略1.5:cursor 为 DECL_STMT(宏内变量声明常见),向下找 VAR_DECL 子节点
|
|
186
|
+
elif cur.kind == ci.CursorKind.DECL_STMT:
|
|
187
|
+
for child in cur.get_children():
|
|
188
|
+
if child.kind in decl_kinds and child.spelling == tok_name:
|
|
189
|
+
usr = child.get_usr()
|
|
190
|
+
if usr and usr not in decl_map:
|
|
191
|
+
decl_map[usr] = (tok_name, off, blen)
|
|
192
|
+
break
|
|
193
|
+
elif cur.kind in ref_kinds:
|
|
194
|
+
ref = cur.referenced
|
|
195
|
+
if ref and ref.kind in decl_kinds and cur.spelling == tok_name:
|
|
196
|
+
ref_usr = ref.get_usr()
|
|
197
|
+
if ref_usr:
|
|
198
|
+
# 仅当被引用 DECL 位于用户文件时才补入 decl_map,
|
|
199
|
+
# 避免把系统结构体字段(如 sockaddr_in6::sin6_family)纳入重命名
|
|
200
|
+
if ref_usr not in decl_map and _is_user_file(ref, tmppath):
|
|
201
|
+
decl_name = ref.spelling or tok_name
|
|
202
|
+
if len(decl_name) >= _MIN_RENAME_LEN:
|
|
203
|
+
decl_map[ref_usr] = (decl_name, off, blen)
|
|
204
|
+
# usr 只在 decl_map 中存在时才设置,
|
|
205
|
+
# 防止系统字段 USR 进入 token_candidates 后被错误匹配
|
|
206
|
+
if ref_usr in decl_map:
|
|
207
|
+
usr = ref_usr
|
|
208
|
+
token_candidates.append((off, blen, tok_name, usr, is_member_access))
|
|
209
|
+
return token_candidates
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _build_rename_map(
|
|
213
|
+
decl_map: dict,
|
|
214
|
+
replacements: list,
|
|
215
|
+
code: str,
|
|
216
|
+
) -> tuple:
|
|
217
|
+
"""根据声明表和引用频次生成 USR→短名 映射,并构建名字单义查找表。
|
|
218
|
+
|
|
219
|
+
参数:
|
|
220
|
+
decl_map — USR → (orig_name, first_decl_offset, name_byte_len)
|
|
221
|
+
replacements — (offset, byte_len, usr) 列表,用于统计出现频次
|
|
222
|
+
code — 原始源码字符串,用于提取已有标识符(避免短名冲突)
|
|
223
|
+
|
|
224
|
+
返回:
|
|
225
|
+
(rename_map, name_to_usr)
|
|
226
|
+
rename_map — USR → short_name(高频 USR 优先分配最短名)
|
|
227
|
+
name_to_usr — orig_name → USR,仅包含该名字在 decl_map 中唯一对应一个
|
|
228
|
+
USR 的情况(供 token 扫描策略2的名字单义回退使用)
|
|
229
|
+
"""
|
|
230
|
+
# 统计每个 USR 的引用频次,高频符号优先分配最短名
|
|
231
|
+
freq: dict = {}
|
|
232
|
+
for _, _, usr in replacements:
|
|
233
|
+
freq[usr] = freq.get(usr, 0) + 1
|
|
234
|
+
|
|
235
|
+
sorted_usrs = sorted(decl_map.keys(), key=lambda u: -freq.get(u, 0))
|
|
236
|
+
|
|
237
|
+
# 生成短名,跳过已存在的标识符和 C++ 关键字
|
|
238
|
+
all_existing = set(re.findall(r'\b[A-Za-z_]\w*\b', code))
|
|
239
|
+
occupied = all_existing | _CXX_KEYWORDS
|
|
240
|
+
rename_map: dict = {}
|
|
241
|
+
gen = _gen_short_names()
|
|
242
|
+
for usr in sorted_usrs:
|
|
243
|
+
orig = decl_map[usr][0]
|
|
244
|
+
short = next(gen)
|
|
245
|
+
while short in occupied or short == orig:
|
|
246
|
+
short = next(gen)
|
|
247
|
+
rename_map[usr] = short
|
|
248
|
+
occupied.add(short)
|
|
249
|
+
|
|
250
|
+
# 构建旧名 → USR 的单义查找表(仅唯一映射才加入,防止多义时误匹配)
|
|
251
|
+
name_counts: dict = {}
|
|
252
|
+
for u, (oname, _, _) in decl_map.items():
|
|
253
|
+
if u in rename_map:
|
|
254
|
+
name_counts[oname] = name_counts.get(oname, 0) + 1
|
|
255
|
+
name_to_usr: dict = {}
|
|
256
|
+
for u, (oname, _, _) in decl_map.items():
|
|
257
|
+
if u in rename_map and name_counts.get(oname, 0) == 1:
|
|
258
|
+
name_to_usr[oname] = u
|
|
259
|
+
|
|
260
|
+
return rename_map, name_to_usr
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _merge_token_candidates(
|
|
264
|
+
token_candidates: list,
|
|
265
|
+
replacements: list,
|
|
266
|
+
rename_map: dict,
|
|
267
|
+
name_to_usr: dict,
|
|
268
|
+
) -> None:
|
|
269
|
+
"""将 token 候选列表合并进 replacements,应用策略2/3补全 AST walk 漏掉的位置。
|
|
270
|
+
|
|
271
|
+
策略说明(is_member_access=True 时策略2/3均跳过):
|
|
272
|
+
策略1/1.5 cursor 精确匹配,token_candidates 中 usr 已设置,直接使用。
|
|
273
|
+
策略2 名字单义回退:tok_name 在 name_to_usr 中唯一对应一个 USR。
|
|
274
|
+
用于 AST cursor 指向错误(宏参数常见)但名字无歧义的情况。
|
|
275
|
+
策略3 最近 DECL_REF 推断:找同名且已知 USR 的 token 中距离最近的。
|
|
276
|
+
用于策略1/2均失败、但代码局部性强的情况。
|
|
277
|
+
|
|
278
|
+
参数:
|
|
279
|
+
token_candidates — _scan_tokens 返回的候选列表(只读)
|
|
280
|
+
replacements — 原地追加新的 (offset, byte_len, usr) 三元组
|
|
281
|
+
rename_map — USR → short_name,用于过滤无效 USR
|
|
282
|
+
name_to_usr — orig_name → USR(仅单义映射),供策略2使用
|
|
283
|
+
"""
|
|
284
|
+
# 建立名字 → [(offset, usr)] 的索引,供策略3(最近 DECL_REF 推断)使用
|
|
285
|
+
ref_by_name: dict = {}
|
|
286
|
+
for off, _blen, tok_name, usr, _ma in token_candidates:
|
|
287
|
+
if usr is not None:
|
|
288
|
+
ref_by_name.setdefault(tok_name, []).append((off, usr))
|
|
289
|
+
|
|
290
|
+
ast_seen = {off for off, _, _ in replacements}
|
|
291
|
+
for off, blen, tok_name, usr, is_member_access in token_candidates:
|
|
292
|
+
if off in ast_seen:
|
|
293
|
+
continue # AST walk 已覆盖,跳过
|
|
294
|
+
if usr is None and not is_member_access:
|
|
295
|
+
# 策略2:名字单义回退(非成员访问)
|
|
296
|
+
usr = name_to_usr.get(tok_name)
|
|
297
|
+
if usr is None and not is_member_access:
|
|
298
|
+
# 策略3:最近 DECL_REF 推断(非成员访问)
|
|
299
|
+
candidates_for_name = ref_by_name.get(tok_name, [])
|
|
300
|
+
if candidates_for_name:
|
|
301
|
+
nearest_usr = min(candidates_for_name, key=lambda x: abs(x[0] - off))[1]
|
|
302
|
+
usr = nearest_usr
|
|
303
|
+
if usr and usr in rename_map:
|
|
304
|
+
replacements.append((off, blen, usr))
|
|
305
|
+
ast_seen.add(off)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def _apply_replacements(
|
|
309
|
+
src_bytes: bytes,
|
|
310
|
+
replacements: list,
|
|
311
|
+
rename_map: dict,
|
|
312
|
+
) -> str:
|
|
313
|
+
"""将所有重命名替换应用到源码字节串,返回替换后的字符串。
|
|
314
|
+
|
|
315
|
+
处理步骤:
|
|
316
|
+
1. 过滤掉 USR 不在 rename_map 中的记录(系统符号等)。
|
|
317
|
+
2. 按 offset 降序排列并去重,确保从后向前替换不影响前面的 offset。
|
|
318
|
+
3. 逐条把旧名字节替换为新短名字节。
|
|
319
|
+
"""
|
|
320
|
+
valid = [
|
|
321
|
+
(off, blen, usr)
|
|
322
|
+
for off, blen, usr in replacements
|
|
323
|
+
if usr in rename_map
|
|
324
|
+
]
|
|
325
|
+
seen: set = set()
|
|
326
|
+
deduped: list = []
|
|
327
|
+
for off, blen, usr in sorted(valid, key=lambda x: -x[0]):
|
|
328
|
+
if off not in seen:
|
|
329
|
+
seen.add(off)
|
|
330
|
+
deduped.append((off, blen, usr))
|
|
331
|
+
|
|
332
|
+
result = bytearray(src_bytes)
|
|
333
|
+
for off, blen, usr in deduped:
|
|
334
|
+
result[off:off + blen] = rename_map[usr].encode('utf-8')
|
|
335
|
+
return result.decode('utf-8')
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def golf_rename_symbols(code: str) -> str:
|
|
339
|
+
"""使用 libclang 对 C++ 代码做符号名压缩。
|
|
340
|
+
|
|
341
|
+
重命名范围:局部变量、函数参数、结构体/类字段(仅用户代码中定义的)。
|
|
342
|
+
不重命名:函数名、类型名、宏名、标准库 / 系统头文件中的符号。
|
|
343
|
+
"""
|
|
344
|
+
try:
|
|
345
|
+
import clang.cindex as ci
|
|
346
|
+
except ImportError:
|
|
347
|
+
raise RuntimeError("需要 libclang: pip install libclang")
|
|
348
|
+
|
|
349
|
+
src_bytes = code.encode('utf-8')
|
|
350
|
+
|
|
351
|
+
# 必须用二进制写,避免 Windows 上 \n→\r\n 导致 offset 错位
|
|
352
|
+
with tempfile.NamedTemporaryFile(suffix='.cpp', mode='wb', delete=False) as f:
|
|
353
|
+
f.write(src_bytes)
|
|
354
|
+
tmppath = f.name
|
|
355
|
+
|
|
356
|
+
try:
|
|
357
|
+
index = ci.Index.create()
|
|
358
|
+
|
|
359
|
+
tu = index.parse(
|
|
360
|
+
tmppath,
|
|
361
|
+
args=['-std=c++23', '-w', '-fno-spell-checking'] + _make_platform_args(),
|
|
362
|
+
options=(
|
|
363
|
+
ci.TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD |
|
|
364
|
+
ci.TranslationUnit.PARSE_INCOMPLETE
|
|
365
|
+
),
|
|
366
|
+
)
|
|
367
|
+
|
|
368
|
+
# 声明节点类型:这些节点是需要被重命名的符号定义位
|
|
369
|
+
_DECL_KINDS = frozenset({
|
|
370
|
+
ci.CursorKind.VAR_DECL,
|
|
371
|
+
ci.CursorKind.FIELD_DECL,
|
|
372
|
+
ci.CursorKind.PARM_DECL,
|
|
373
|
+
})
|
|
374
|
+
# 引用节点类型:这些节点是已声明符号的使用位
|
|
375
|
+
_REF_KINDS = frozenset({
|
|
376
|
+
ci.CursorKind.MEMBER_REF_EXPR, # obj.field / obj->field
|
|
377
|
+
ci.CursorKind.MEMBER_REF, # 构造函数初始化列表 : field(...)
|
|
378
|
+
ci.CursorKind.DECL_REF_EXPR, # 局部变量、参数引用
|
|
379
|
+
})
|
|
380
|
+
|
|
381
|
+
# USR → (orig_name, first_decl_offset, name_byte_len)
|
|
382
|
+
decl_map: dict = {}
|
|
383
|
+
# (offset, byte_len, usr) 三元组,记录所有声明位和引用位
|
|
384
|
+
replacements: list = []
|
|
385
|
+
|
|
386
|
+
# AST 遍历:收集所有用户文件中的声明和引用
|
|
387
|
+
_walk_ast(tu.cursor, tmppath, src_bytes, decl_map, replacements, _DECL_KINDS, _REF_KINDS)
|
|
388
|
+
|
|
389
|
+
# Token 扫描:补全 AST walk 因宏展开 offset 错位而漏掉的符号位置
|
|
390
|
+
token_candidates = _scan_tokens(
|
|
391
|
+
tu, tmppath, src_bytes, decl_map, _DECL_KINDS, _REF_KINDS, ci
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
if not decl_map:
|
|
395
|
+
return code
|
|
396
|
+
|
|
397
|
+
# 生成 USR→短名 映射,以及名字单义查找表(供策略2使用)
|
|
398
|
+
rename_map, name_to_usr = _build_rename_map(decl_map, replacements, code)
|
|
399
|
+
|
|
400
|
+
# 合并 token 候选:策略1已设 usr,策略2/3补全宏参数等漏掉的位置
|
|
401
|
+
_merge_token_candidates(token_candidates, replacements, rename_map, name_to_usr)
|
|
402
|
+
|
|
403
|
+
# 应用所有重命名替换(从后向前,保持 offset 正确)
|
|
404
|
+
return _apply_replacements(src_bytes, replacements, rename_map)
|
|
405
|
+
|
|
406
|
+
finally:
|
|
407
|
+
try:
|
|
408
|
+
os.unlink(tmppath)
|
|
409
|
+
except OSError:
|
|
410
|
+
pass
|
|
@@ -34,15 +34,30 @@ def merge_files(filepath: Path, include_dirs: list,
|
|
|
34
34
|
code = strip_include_guard(code)
|
|
35
35
|
parts = []
|
|
36
36
|
|
|
37
|
+
# 跟踪预处理条件块嵌套深度:depth > 0 表示当前在 #if/#ifdef/#ifndef 内部
|
|
38
|
+
# 处于条件块内的 #include <...> 必须保留在原位,不能提升到文件顶部
|
|
39
|
+
cond_depth = 0
|
|
40
|
+
|
|
37
41
|
for line in code.splitlines(keepends=True):
|
|
38
42
|
s = line.strip()
|
|
39
43
|
|
|
44
|
+
# 更新条件块深度
|
|
45
|
+
if re.match(r'#\s*if(?:def|ndef)?\b', s):
|
|
46
|
+
cond_depth += 1
|
|
47
|
+
elif re.match(r'#\s*endif\b', s):
|
|
48
|
+
cond_depth = max(0, cond_depth - 1)
|
|
49
|
+
|
|
40
50
|
# 系统头文件 #include <...>
|
|
41
51
|
m_sys = re.match(r'#\s*include\s*<([^>]+)>', s)
|
|
42
52
|
if m_sys:
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
53
|
+
if cond_depth > 0:
|
|
54
|
+
# 在条件块内:保留在原位,维持条件上下文
|
|
55
|
+
parts.append(line)
|
|
56
|
+
else:
|
|
57
|
+
# 无条件引用:提升到文件顶部统一去重管理
|
|
58
|
+
entry = f'#include <{m_sys.group(1)}>\n'
|
|
59
|
+
if entry not in sys_includes:
|
|
60
|
+
sys_includes.append(entry)
|
|
46
61
|
continue
|
|
47
62
|
|
|
48
63
|
# 本地头文件 #include "..."
|
|
@@ -55,9 +70,13 @@ def merge_files(filepath: Path, include_dirs: list,
|
|
|
55
70
|
if c.exists():
|
|
56
71
|
found = c; break
|
|
57
72
|
if found:
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
73
|
+
if cond_depth > 0:
|
|
74
|
+
# 在条件块内:不内联,保留原始 include 行
|
|
75
|
+
parts.append(line)
|
|
76
|
+
else:
|
|
77
|
+
parts.append(f'\n// ── inlined: {inc} ──\n')
|
|
78
|
+
parts.append(merge_files(found, include_dirs, visited, sys_includes))
|
|
79
|
+
parts.append(f'\n// ── end: {inc} ──\n')
|
|
61
80
|
else:
|
|
62
81
|
print(f'[警告] 找不到本地头文件:{inc}', file=sys.stderr)
|
|
63
82
|
parts.append(line)
|
|
@@ -20,29 +20,40 @@ def golf_std_namespace(code: str) -> str:
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
def golf_typedefs(code: str) -> str:
|
|
23
|
-
"""对高频长类型名添加
|
|
23
|
+
"""对高频长类型名添加 typedef 缩写(出现 ≥2 次时触发)。"""
|
|
24
24
|
replacements = [
|
|
25
|
-
(r'\blong long\b', 'll', '
|
|
26
|
-
(r'\bunsigned long long\b', 'ull', '
|
|
27
|
-
(r'\blong double\b', 'ld', '
|
|
28
|
-
(r'\bvector<int>\b', 'vi', '
|
|
29
|
-
(r'\bvector<ll>\b', 'vll', '
|
|
30
|
-
(r'\bpair<int,int>\b', 'pii', '
|
|
31
|
-
(r'\bpair<ll,ll>\b', 'pll', '
|
|
25
|
+
(r'\blong long\b', 'll', 'typedef long long ll;'),
|
|
26
|
+
(r'\bunsigned long long\b', 'ull', 'typedef unsigned long long ull;'),
|
|
27
|
+
(r'\blong double\b', 'ld', 'typedef long double ld;'),
|
|
28
|
+
(r'\bvector<int>\b', 'vi', 'typedef vector<int> vi;'),
|
|
29
|
+
(r'\bvector<ll>\b', 'vll', 'typedef vector<ll> vll;'),
|
|
30
|
+
(r'\bpair<int,int>\b', 'pii', 'typedef pair<int,int> pii;'),
|
|
31
|
+
(r'\bpair<ll,ll>\b', 'pll', 'typedef pair<ll,ll> pll;'),
|
|
32
32
|
]
|
|
33
33
|
defines_to_add = []
|
|
34
34
|
for pattern, short, defline in replacements:
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
35
|
+
# 提取缩写名(typedef ... short;)
|
|
36
|
+
macro = defline.rstrip(';').split()[-1]
|
|
37
|
+
# 匹配已有的 typedef 或 #define 形式
|
|
38
|
+
existing_re = re.compile(
|
|
39
|
+
r'^[ \t]*(?:'
|
|
40
|
+
r'typedef\b[^\n]+\b' + re.escape(macro) + r'\s*;'
|
|
41
|
+
r'|#[ \t]*define[ \t]+' + re.escape(macro) + r'\b[^\n]*'
|
|
42
|
+
r')[ \t]*\n?',
|
|
43
|
+
re.MULTILINE,
|
|
44
|
+
)
|
|
45
|
+
existing = existing_re.search(code)
|
|
46
|
+
if existing:
|
|
47
|
+
# 已有定义:从原位删掉,稍后统一插到顶部
|
|
48
|
+
code = code[:existing.start()] + code[existing.end():]
|
|
49
|
+
defines_to_add.append(defline)
|
|
50
|
+
elif len(re.findall(pattern, code)) >= 2:
|
|
39
51
|
defines_to_add.append(defline)
|
|
40
52
|
code = re.sub(pattern, short, code)
|
|
41
53
|
if defines_to_add:
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
)
|
|
54
|
+
# 插入点:文件顶部 include 块末尾
|
|
55
|
+
include_ends = [m.end() for m in re.finditer(r'^[ \t]*#[ \t]*include\b.*$', code, re.MULTILINE)]
|
|
56
|
+
last = include_ends[-1] if include_ends else 0
|
|
46
57
|
code = code[:last] + '\n' + '\n'.join(defines_to_add) + '\n' + code[last:]
|
|
47
58
|
return code
|
|
48
59
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cppgolf
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: C++ multi-file merge & code golf / minifier tool
|
|
5
5
|
License: MIT
|
|
6
6
|
Project-URL: Homepage, https://github.com/yourname/cppgolf
|
|
@@ -18,8 +18,7 @@ Classifier: Topic :: Software Development :: Code Generators
|
|
|
18
18
|
Classifier: Topic :: Text Processing :: Filters
|
|
19
19
|
Requires-Python: >=3.10
|
|
20
20
|
Description-Content-Type: text/markdown
|
|
21
|
-
Requires-Dist:
|
|
22
|
-
Requires-Dist: tree-sitter-cpp>=0.23
|
|
21
|
+
Requires-Dist: libclang
|
|
23
22
|
Provides-Extra: dev
|
|
24
23
|
Requires-Dist: build; extra == "dev"
|
|
25
24
|
Requires-Dist: twine; extra == "dev"
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "cppgolf"
|
|
7
|
-
version = "0.1.
|
|
7
|
+
version = "0.1.2"
|
|
8
8
|
description = "C++ multi-file merge & code golf / minifier tool"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "MIT" }
|
|
@@ -24,8 +24,7 @@ classifiers = [
|
|
|
24
24
|
]
|
|
25
25
|
|
|
26
26
|
dependencies = [
|
|
27
|
-
"
|
|
28
|
-
"tree-sitter-cpp>=0.23",
|
|
27
|
+
"libclang",
|
|
29
28
|
]
|
|
30
29
|
|
|
31
30
|
[project.optional-dependencies]
|
|
@@ -1,452 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
golf_rename.py — Pass 5: 符号名压缩(tree-sitter AST 驱动)
|
|
3
|
-
"""
|
|
4
|
-
import re
|
|
5
|
-
import sys
|
|
6
|
-
import itertools
|
|
7
|
-
|
|
8
|
-
_DECLARATOR_CONTAINERS = frozenset({
|
|
9
|
-
'init_declarator', 'pointer_declarator', 'reference_declarator',
|
|
10
|
-
'array_declarator', 'abstract_pointer_declarator',
|
|
11
|
-
'abstract_reference_declarator', 'abstract_array_declarator',
|
|
12
|
-
})
|
|
13
|
-
_MIN_RENAME_LEN = 2
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def _gen_short_names():
|
|
17
|
-
for length in itertools.count(1):
|
|
18
|
-
for combo in itertools.product('abcdefghijklmnopqrstuvwxyz', repeat=length):
|
|
19
|
-
yield ''.join(combo)
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def _extract_declarator_id(node, want_field: bool):
|
|
23
|
-
target_type = 'field_identifier' if want_field else 'identifier'
|
|
24
|
-
if node.type == target_type:
|
|
25
|
-
return node
|
|
26
|
-
if node.type in _DECLARATOR_CONTAINERS:
|
|
27
|
-
for ch in node.children:
|
|
28
|
-
if ch.type in ('*', '**', '&', '&&', '=', '[', ']',
|
|
29
|
-
'const', 'volatile', 'restrict',
|
|
30
|
-
'__cdecl', '__stdcall', '__fastcall', '__thiscall',
|
|
31
|
-
'abstract_pointer_declarator',
|
|
32
|
-
'abstract_reference_declarator'):
|
|
33
|
-
continue
|
|
34
|
-
result = _extract_declarator_id(ch, want_field)
|
|
35
|
-
if result:
|
|
36
|
-
return result
|
|
37
|
-
return None
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
class _RenameCtx:
|
|
41
|
-
"""封装一次重命名所需的全部状态与子方法。"""
|
|
42
|
-
|
|
43
|
-
def __init__(self, src_bytes, tree):
|
|
44
|
-
self.src = src_bytes
|
|
45
|
-
self.tree = tree
|
|
46
|
-
# 类型上下文
|
|
47
|
-
self.user_struct_names: set = set()
|
|
48
|
-
self.struct_field_types: dict = {}
|
|
49
|
-
self.var_type_map: dict = {}
|
|
50
|
-
self.typedef_map: dict = {}
|
|
51
|
-
|
|
52
|
-
# ── 工具 ────────────────────────────────────────────────────────────
|
|
53
|
-
def name_of(self, node) -> str:
|
|
54
|
-
return self.src[node.start_byte:node.end_byte].decode('utf-8')
|
|
55
|
-
|
|
56
|
-
def _get_primary_type_name(self, node) -> str | None:
|
|
57
|
-
for ch in node.children:
|
|
58
|
-
if ch.type in ('type_identifier', 'primitive_type'):
|
|
59
|
-
return self.name_of(ch)
|
|
60
|
-
if ch.type == 'qualified_identifier':
|
|
61
|
-
for sub in reversed(ch.children):
|
|
62
|
-
if sub.type in ('identifier', 'type_identifier'):
|
|
63
|
-
return self.name_of(sub)
|
|
64
|
-
return None
|
|
65
|
-
|
|
66
|
-
def _is_qid_name(self, node) -> bool:
|
|
67
|
-
par = node.parent
|
|
68
|
-
if not par or par.type != 'qualified_identifier':
|
|
69
|
-
return False
|
|
70
|
-
for ch in reversed(par.children):
|
|
71
|
-
if ch.type != '::':
|
|
72
|
-
return ch == node
|
|
73
|
-
return False
|
|
74
|
-
|
|
75
|
-
def _get_qid_scope_class(self, qid_node) -> str | None:
|
|
76
|
-
for ch in qid_node.children:
|
|
77
|
-
if ch.type == '::':
|
|
78
|
-
break
|
|
79
|
-
if ch.type in ('identifier', 'type_identifier', 'namespace_identifier'):
|
|
80
|
-
return self.name_of(ch)
|
|
81
|
-
elif ch.type == 'qualified_identifier':
|
|
82
|
-
for sub in reversed(ch.children):
|
|
83
|
-
if sub.type in ('identifier', 'type_identifier', 'namespace_identifier'):
|
|
84
|
-
return self.name_of(sub)
|
|
85
|
-
break
|
|
86
|
-
return None
|
|
87
|
-
|
|
88
|
-
# ── 步骤 0:构建类型上下文 ───────────────────────────────────────────
|
|
89
|
-
def build_type_context(self):
|
|
90
|
-
self._walk_types(self.tree.root_node)
|
|
91
|
-
for alias, real in self.typedef_map.items():
|
|
92
|
-
if real in self.user_struct_names:
|
|
93
|
-
self.user_struct_names.add(alias)
|
|
94
|
-
if real in self.struct_field_types and alias not in self.struct_field_types:
|
|
95
|
-
self.struct_field_types[alias] = self.struct_field_types[real]
|
|
96
|
-
|
|
97
|
-
def _walk_types(self, node):
|
|
98
|
-
nt = node.type
|
|
99
|
-
if nt == 'type_definition':
|
|
100
|
-
inner = None
|
|
101
|
-
for ch in node.children:
|
|
102
|
-
if ch.type in ('struct_specifier', 'class_specifier', 'union_specifier'):
|
|
103
|
-
for sub in ch.children:
|
|
104
|
-
if sub.type == 'type_identifier':
|
|
105
|
-
inner = self.name_of(sub); break
|
|
106
|
-
break
|
|
107
|
-
if inner:
|
|
108
|
-
for ch in node.children:
|
|
109
|
-
if ch.type == 'type_identifier' and self.name_of(ch) != inner:
|
|
110
|
-
self.typedef_map[self.name_of(ch)] = inner
|
|
111
|
-
elif ch.type in _DECLARATOR_CONTAINERS:
|
|
112
|
-
id_node = _extract_declarator_id(ch, False)
|
|
113
|
-
if id_node:
|
|
114
|
-
self.typedef_map[self.name_of(id_node)] = inner
|
|
115
|
-
if nt in ('struct_specifier', 'class_specifier', 'union_specifier'):
|
|
116
|
-
struct_name = None
|
|
117
|
-
for ch in node.children:
|
|
118
|
-
if ch.type == 'type_identifier':
|
|
119
|
-
struct_name = self.name_of(ch); break
|
|
120
|
-
if struct_name and any(c.type == 'field_declaration_list' for c in node.children):
|
|
121
|
-
self.user_struct_names.add(struct_name)
|
|
122
|
-
fmap = self.struct_field_types.setdefault(struct_name, {})
|
|
123
|
-
for ch in node.children:
|
|
124
|
-
if ch.type == 'field_declaration_list':
|
|
125
|
-
for fd in ch.children:
|
|
126
|
-
if fd.type != 'field_declaration':
|
|
127
|
-
continue
|
|
128
|
-
ftype = self._get_primary_type_name(fd)
|
|
129
|
-
for fc in fd.children:
|
|
130
|
-
if fc.type == 'field_identifier':
|
|
131
|
-
fmap[self.name_of(fc)] = ftype
|
|
132
|
-
elif fc.type in _DECLARATOR_CONTAINERS or fc.type == 'init_declarator':
|
|
133
|
-
id_node = _extract_declarator_id(fc, True)
|
|
134
|
-
if id_node:
|
|
135
|
-
fmap[self.name_of(id_node)] = ftype
|
|
136
|
-
break
|
|
137
|
-
if nt in ('declaration', 'parameter_declaration'):
|
|
138
|
-
vtype = self._get_primary_type_name(node)
|
|
139
|
-
if vtype:
|
|
140
|
-
for ch in node.children:
|
|
141
|
-
if ch.type == 'identifier':
|
|
142
|
-
self.var_type_map.setdefault(self.name_of(ch), vtype)
|
|
143
|
-
elif ch.type in _DECLARATOR_CONTAINERS or ch.type == 'init_declarator':
|
|
144
|
-
id_node = _extract_declarator_id(ch, False)
|
|
145
|
-
if id_node:
|
|
146
|
-
self.var_type_map.setdefault(self.name_of(id_node), vtype)
|
|
147
|
-
for ch in node.children:
|
|
148
|
-
self._walk_types(ch)
|
|
149
|
-
|
|
150
|
-
# ── cast 类型提取 ────────────────────────────────────────────────────
|
|
151
|
-
def _extract_cast_target_type(self, node) -> str | None:
|
|
152
|
-
if node.type == 'call_expression':
|
|
153
|
-
fn = node.children[0] if node.children else None
|
|
154
|
-
if fn and fn.type == 'template_function':
|
|
155
|
-
fn_name = None
|
|
156
|
-
for ch in fn.children:
|
|
157
|
-
if ch.type == 'identifier':
|
|
158
|
-
fn_name = self.name_of(ch); break
|
|
159
|
-
if fn_name in ('reinterpret_cast', 'static_cast', 'dynamic_cast', 'const_cast'):
|
|
160
|
-
for ch in fn.children:
|
|
161
|
-
if ch.type == 'template_argument_list':
|
|
162
|
-
for sub in ch.children:
|
|
163
|
-
if sub.type == 'type_descriptor':
|
|
164
|
-
return self._get_primary_type_name(sub)
|
|
165
|
-
if node.type == 'cast_expression':
|
|
166
|
-
for ch in node.children:
|
|
167
|
-
if ch.type == 'type_descriptor':
|
|
168
|
-
return self._get_primary_type_name(ch)
|
|
169
|
-
if node.type in ('reinterpret_cast_expression', 'static_cast_expression',
|
|
170
|
-
'dynamic_cast_expression', 'const_cast_expression'):
|
|
171
|
-
for ch in node.children:
|
|
172
|
-
if ch.type == 'type_descriptor':
|
|
173
|
-
return self._get_primary_type_name(ch)
|
|
174
|
-
return None
|
|
175
|
-
|
|
176
|
-
def _extract_init_cast_type(self, decl_node, var_name) -> str | None:
|
|
177
|
-
for ch in decl_node.children:
|
|
178
|
-
if ch.type == 'init_declarator':
|
|
179
|
-
id_nd = _extract_declarator_id(ch, False)
|
|
180
|
-
if not id_nd or self.name_of(id_nd) != var_name:
|
|
181
|
-
continue
|
|
182
|
-
for sub in ch.children:
|
|
183
|
-
t = self._extract_cast_target_type(sub)
|
|
184
|
-
if t:
|
|
185
|
-
return t
|
|
186
|
-
return None
|
|
187
|
-
|
|
188
|
-
# ── 作用域感知的变量类型查找 ─────────────────────────────────────────
|
|
189
|
-
def _lookup_var_type_in_scope(self, identifier_node) -> str | None:
|
|
190
|
-
var_name = self.name_of(identifier_node)
|
|
191
|
-
node = identifier_node.parent
|
|
192
|
-
while node is not None:
|
|
193
|
-
if node.type == 'parameter_list':
|
|
194
|
-
for param in node.children:
|
|
195
|
-
if param.type == 'parameter_declaration':
|
|
196
|
-
vtype = self._get_primary_type_name(param)
|
|
197
|
-
if vtype:
|
|
198
|
-
for ch in param.children:
|
|
199
|
-
if ch.type == 'identifier' and self.name_of(ch) == var_name:
|
|
200
|
-
return vtype
|
|
201
|
-
elif ch.type in _DECLARATOR_CONTAINERS:
|
|
202
|
-
id_nd = _extract_declarator_id(ch, False)
|
|
203
|
-
if id_nd and self.name_of(id_nd) == var_name:
|
|
204
|
-
return vtype
|
|
205
|
-
if node.type in ('compound_statement', 'translation_unit',
|
|
206
|
-
'namespace_definition', 'function_definition'):
|
|
207
|
-
for child in node.children:
|
|
208
|
-
if child.type == 'declaration':
|
|
209
|
-
vtype = self._get_primary_type_name(child)
|
|
210
|
-
matched = False
|
|
211
|
-
for ch in child.children:
|
|
212
|
-
if ch.type == 'identifier' and self.name_of(ch) == var_name:
|
|
213
|
-
matched = True; break
|
|
214
|
-
elif ch.type in _DECLARATOR_CONTAINERS or ch.type == 'init_declarator':
|
|
215
|
-
id_nd = _extract_declarator_id(ch, False)
|
|
216
|
-
if id_nd and self.name_of(id_nd) == var_name:
|
|
217
|
-
matched = True; break
|
|
218
|
-
if matched:
|
|
219
|
-
if vtype:
|
|
220
|
-
return vtype
|
|
221
|
-
return self._extract_init_cast_type(child, var_name)
|
|
222
|
-
node = node.parent
|
|
223
|
-
return self.var_type_map.get(var_name)
|
|
224
|
-
|
|
225
|
-
# ── 字段访问对象类型推断 ─────────────────────────────────────────────
|
|
226
|
-
def _resolve_field_object_type(self, field_expr_node) -> str | None:
|
|
227
|
-
if not field_expr_node.children:
|
|
228
|
-
return None
|
|
229
|
-
value_node = field_expr_node.children[0]
|
|
230
|
-
vt = value_node.type
|
|
231
|
-
td = self.typedef_map
|
|
232
|
-
if vt == 'identifier':
|
|
233
|
-
t = self._lookup_var_type_in_scope(value_node)
|
|
234
|
-
return td.get(t, t)
|
|
235
|
-
elif vt == 'field_expression':
|
|
236
|
-
parent_type = self._resolve_field_object_type(value_node)
|
|
237
|
-
if parent_type and parent_type in self.struct_field_types:
|
|
238
|
-
for ch in value_node.children:
|
|
239
|
-
if ch.type == 'field_identifier':
|
|
240
|
-
ft = self.struct_field_types[parent_type].get(self.name_of(ch))
|
|
241
|
-
return td.get(ft, ft) if ft else None
|
|
242
|
-
return None
|
|
243
|
-
elif vt == 'pointer_expression':
|
|
244
|
-
for ch in value_node.children:
|
|
245
|
-
if ch.type == 'identifier':
|
|
246
|
-
t = self._lookup_var_type_in_scope(ch)
|
|
247
|
-
return td.get(t, t)
|
|
248
|
-
elif vt == 'subscript_expression':
|
|
249
|
-
arr = value_node.children[0] if value_node.children else None
|
|
250
|
-
if arr is None:
|
|
251
|
-
return None
|
|
252
|
-
if arr.type == 'identifier':
|
|
253
|
-
t = self._lookup_var_type_in_scope(arr)
|
|
254
|
-
return td.get(t, t) if t else None
|
|
255
|
-
elif arr.type == 'field_expression':
|
|
256
|
-
return self._resolve_field_object_type(arr)
|
|
257
|
-
return None
|
|
258
|
-
|
|
259
|
-
# ── 步骤 1:收集声明位节点 ────────────────────────────────────────────
|
|
260
|
-
def collect_decl_nodes(self):
|
|
261
|
-
local_decl: list = []
|
|
262
|
-
member_decl: list = []
|
|
263
|
-
|
|
264
|
-
def walk(node):
|
|
265
|
-
nt = node.type
|
|
266
|
-
if nt == 'declaration':
|
|
267
|
-
for ch in node.children:
|
|
268
|
-
if ch.type == 'identifier':
|
|
269
|
-
local_decl.append(ch)
|
|
270
|
-
elif ch.type in _DECLARATOR_CONTAINERS or ch.type == 'init_declarator':
|
|
271
|
-
id_node = _extract_declarator_id(ch, False)
|
|
272
|
-
if id_node: local_decl.append(id_node)
|
|
273
|
-
elif ch.type == 'function_declarator':
|
|
274
|
-
decl_type = self._get_primary_type_name(node)
|
|
275
|
-
if decl_type and decl_type in self.user_struct_names:
|
|
276
|
-
for sub in ch.children:
|
|
277
|
-
if sub.type == 'identifier':
|
|
278
|
-
local_decl.append(sub); break
|
|
279
|
-
elif nt == 'parameter_declaration':
|
|
280
|
-
for ch in node.children:
|
|
281
|
-
if ch.type == 'identifier':
|
|
282
|
-
local_decl.append(ch)
|
|
283
|
-
elif ch.type in _DECLARATOR_CONTAINERS:
|
|
284
|
-
id_node = _extract_declarator_id(ch, False)
|
|
285
|
-
if id_node: local_decl.append(id_node)
|
|
286
|
-
elif nt == 'for_range_loop':
|
|
287
|
-
found_type = False
|
|
288
|
-
for ch in node.children:
|
|
289
|
-
if ch.type in (':', 'compound_statement'): break
|
|
290
|
-
if ch.is_named and not found_type:
|
|
291
|
-
found_type = True; continue
|
|
292
|
-
if ch.type == 'identifier':
|
|
293
|
-
local_decl.append(ch); break
|
|
294
|
-
elif ch.type in _DECLARATOR_CONTAINERS:
|
|
295
|
-
id_node = _extract_declarator_id(ch, False)
|
|
296
|
-
if id_node: local_decl.append(id_node)
|
|
297
|
-
break
|
|
298
|
-
elif nt == 'field_declaration':
|
|
299
|
-
for ch in node.children:
|
|
300
|
-
if ch.type == 'field_identifier':
|
|
301
|
-
member_decl.append(ch)
|
|
302
|
-
elif ch.type in _DECLARATOR_CONTAINERS or ch.type == 'init_declarator':
|
|
303
|
-
id_node = _extract_declarator_id(ch, True)
|
|
304
|
-
if id_node: member_decl.append(id_node)
|
|
305
|
-
if nt == 'function_declarator':
|
|
306
|
-
for ch in node.children:
|
|
307
|
-
if ch.type != 'identifier': walk(ch)
|
|
308
|
-
else:
|
|
309
|
-
for ch in node.children: walk(ch)
|
|
310
|
-
|
|
311
|
-
walk(self.tree.root_node)
|
|
312
|
-
return local_decl, member_decl
|
|
313
|
-
|
|
314
|
-
# ── 步骤 3:统计频率 ──────────────────────────────────────────────────
|
|
315
|
-
def count_freq(self, local_names, member_names) -> dict:
|
|
316
|
-
freq: dict = {}
|
|
317
|
-
def walk(node):
|
|
318
|
-
if node.type == 'identifier':
|
|
319
|
-
n = self.name_of(node)
|
|
320
|
-
if n in local_names:
|
|
321
|
-
freq[n] = freq.get(n, 0) + 1
|
|
322
|
-
elif n in member_names and self._is_qid_name(node):
|
|
323
|
-
scope_cls = self._get_qid_scope_class(node.parent)
|
|
324
|
-
real_cls = self.typedef_map.get(scope_cls, scope_cls) if scope_cls else None
|
|
325
|
-
if real_cls and real_cls in self.user_struct_names:
|
|
326
|
-
freq[n] = freq.get(n, 0) + 1
|
|
327
|
-
elif node.type == 'field_identifier':
|
|
328
|
-
n = self.name_of(node)
|
|
329
|
-
if n in member_names: freq[n] = freq.get(n, 0) + 1
|
|
330
|
-
elif node.type == 'type_identifier':
|
|
331
|
-
n = self.name_of(node)
|
|
332
|
-
if n in local_names:
|
|
333
|
-
par = node.parent
|
|
334
|
-
if (par and par.type == 'parameter_declaration'
|
|
335
|
-
and par.parent and par.parent.type == 'parameter_list'
|
|
336
|
-
and par.parent.parent and par.parent.parent.type == 'function_declarator'
|
|
337
|
-
and par.parent.parent.parent
|
|
338
|
-
and par.parent.parent.parent.type == 'declaration'):
|
|
339
|
-
freq[n] = freq.get(n, 0) + 1
|
|
340
|
-
for ch in node.children: walk(ch)
|
|
341
|
-
walk(self.tree.root_node)
|
|
342
|
-
return freq
|
|
343
|
-
|
|
344
|
-
# ── 步骤 5:收集替换位置 ──────────────────────────────────────────────
|
|
345
|
-
def build_replacements(self, rename_map, local_names, member_names):
|
|
346
|
-
replacements: list = []
|
|
347
|
-
class_stack: list = []
|
|
348
|
-
|
|
349
|
-
def walk(node):
|
|
350
|
-
entered = False
|
|
351
|
-
nt = node.type
|
|
352
|
-
if nt in ('struct_specifier', 'class_specifier', 'union_specifier'):
|
|
353
|
-
for ch in node.children:
|
|
354
|
-
if ch.type == 'type_identifier':
|
|
355
|
-
class_stack.append(self.name_of(ch)); entered = True; break
|
|
356
|
-
|
|
357
|
-
if nt == 'identifier':
|
|
358
|
-
n = self.name_of(node)
|
|
359
|
-
if n in rename_map and n in local_names:
|
|
360
|
-
replacements.append((node.start_byte, node.end_byte, rename_map[n].encode()))
|
|
361
|
-
elif n in rename_map and n in member_names and class_stack:
|
|
362
|
-
replacements.append((node.start_byte, node.end_byte, rename_map[n].encode()))
|
|
363
|
-
elif n in rename_map and n in member_names and self._is_qid_name(node):
|
|
364
|
-
scope_cls = self._get_qid_scope_class(node.parent)
|
|
365
|
-
real_cls = self.typedef_map.get(scope_cls, scope_cls) if scope_cls else None
|
|
366
|
-
if real_cls and real_cls in self.user_struct_names:
|
|
367
|
-
replacements.append((node.start_byte, node.end_byte, rename_map[n].encode()))
|
|
368
|
-
elif nt == 'type_identifier':
|
|
369
|
-
n = self.name_of(node)
|
|
370
|
-
if n in rename_map and n in local_names:
|
|
371
|
-
par = node.parent
|
|
372
|
-
if (par and par.type == 'parameter_declaration'
|
|
373
|
-
and par.parent and par.parent.type == 'parameter_list'
|
|
374
|
-
and par.parent.parent and par.parent.parent.type == 'function_declarator'
|
|
375
|
-
and par.parent.parent.parent
|
|
376
|
-
and par.parent.parent.parent.type == 'declaration'):
|
|
377
|
-
decl_type = self._get_primary_type_name(par.parent.parent.parent)
|
|
378
|
-
if decl_type and decl_type in self.user_struct_names:
|
|
379
|
-
replacements.append((node.start_byte, node.end_byte, rename_map[n].encode()))
|
|
380
|
-
elif nt == 'field_identifier':
|
|
381
|
-
n = self.name_of(node)
|
|
382
|
-
if n in rename_map and n in member_names:
|
|
383
|
-
parent = node.parent
|
|
384
|
-
if parent and parent.type == 'field_expression':
|
|
385
|
-
obj_type = self._resolve_field_object_type(parent)
|
|
386
|
-
if obj_type and obj_type in self.user_struct_names:
|
|
387
|
-
replacements.append((node.start_byte, node.end_byte, rename_map[n].encode()))
|
|
388
|
-
else:
|
|
389
|
-
replacements.append((node.start_byte, node.end_byte, rename_map[n].encode()))
|
|
390
|
-
|
|
391
|
-
for ch in node.children: walk(ch)
|
|
392
|
-
if entered: class_stack.pop()
|
|
393
|
-
|
|
394
|
-
walk(self.tree.root_node)
|
|
395
|
-
return replacements
|
|
396
|
-
|
|
397
|
-
# ── 步骤 6:应用替换 ──────────────────────────────────────────────────
|
|
398
|
-
def apply(self, replacements) -> str:
|
|
399
|
-
replacements.sort(key=lambda x: x[0], reverse=True)
|
|
400
|
-
buf = bytearray(self.src)
|
|
401
|
-
for start, end, new in replacements:
|
|
402
|
-
buf[start:end] = new
|
|
403
|
-
return buf.decode('utf-8')
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
# ─────────────────────────────────────────────────────────────────────────────
|
|
407
|
-
# 公开入口
|
|
408
|
-
# ─────────────────────────────────────────────────────────────────────────────
|
|
409
|
-
def golf_rename_symbols(code: str) -> str:
|
|
410
|
-
try:
|
|
411
|
-
from tree_sitter import Language, Parser
|
|
412
|
-
import tree_sitter_cpp as tscpp
|
|
413
|
-
_lang = Language(tscpp.language())
|
|
414
|
-
except ImportError:
|
|
415
|
-
print('[警告] 未找到 tree-sitter,跳过符号重命名。'
|
|
416
|
-
' 运行: pip install tree-sitter tree-sitter-cpp', file=sys.stderr)
|
|
417
|
-
return code
|
|
418
|
-
|
|
419
|
-
src_bytes = code.encode('utf-8')
|
|
420
|
-
parser = Parser(_lang)
|
|
421
|
-
tree = parser.parse(src_bytes)
|
|
422
|
-
|
|
423
|
-
ctx = _RenameCtx(src_bytes, tree)
|
|
424
|
-
ctx.build_type_context()
|
|
425
|
-
|
|
426
|
-
local_decl, member_decl = ctx.collect_decl_nodes()
|
|
427
|
-
name_of = ctx.name_of
|
|
428
|
-
|
|
429
|
-
local_names = {name_of(n) for n in local_decl if len(name_of(n)) >= _MIN_RENAME_LEN}
|
|
430
|
-
member_names = {name_of(n) for n in member_decl if len(name_of(n)) >= _MIN_RENAME_LEN}
|
|
431
|
-
if not local_names and not member_names:
|
|
432
|
-
return code
|
|
433
|
-
|
|
434
|
-
all_targets = local_names | member_names
|
|
435
|
-
freq = ctx.count_freq(local_names, member_names)
|
|
436
|
-
|
|
437
|
-
# 步骤 4:生成重命名映射
|
|
438
|
-
all_existing = set(re.findall(r'\b[A-Za-z_]\w*\b', code))
|
|
439
|
-
occupied = set(all_existing)
|
|
440
|
-
rename_map: dict = {}
|
|
441
|
-
gen = _gen_short_names()
|
|
442
|
-
for original in sorted(all_targets, key=lambda x: -freq.get(x, 0)):
|
|
443
|
-
short = next(gen)
|
|
444
|
-
while short in occupied or short == original:
|
|
445
|
-
short = next(gen)
|
|
446
|
-
rename_map[original] = short
|
|
447
|
-
occupied.add(short)
|
|
448
|
-
|
|
449
|
-
replacements = ctx.build_replacements(rename_map, local_names, member_names)
|
|
450
|
-
if not replacements:
|
|
451
|
-
return code
|
|
452
|
-
return ctx.apply(replacements)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|