cppgolf 0.1.0__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cppgolf
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: C++ multi-file merge & code golf / minifier tool
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://github.com/yourname/cppgolf
@@ -18,8 +18,7 @@ Classifier: Topic :: Software Development :: Code Generators
18
18
  Classifier: Topic :: Text Processing :: Filters
19
19
  Requires-Python: >=3.10
20
20
  Description-Content-Type: text/markdown
21
- Requires-Dist: tree-sitter>=0.25
22
- Requires-Dist: tree-sitter-cpp>=0.23
21
+ Requires-Dist: libclang
23
22
  Provides-Extra: dev
24
23
  Requires-Dist: build; extra == "dev"
25
24
  Requires-Dist: twine; extra == "dev"
@@ -96,8 +96,8 @@ def build_parser() -> argparse.ArgumentParser:
96
96
  help='单语句 if/for/while 去花括号')
97
97
  g2.add_argument('--shortcuts', dest='define_shortcuts', action='store_true',
98
98
  help='高频 cout/cin 用 #define 缩写')
99
- g2.add_argument('--rename', dest='rename_symbols', action='store_true',
100
- help='将用户变量/成员名压缩为短名(需要 tree-sitter-cpp)')
99
+ g2.add_argument('-no-rename', dest='no_rename_symbols', action='store_true',
100
+ help='不将用户变量/成员名压缩为短名(需要 tree-sitter-cpp)')
101
101
 
102
102
  p.add_argument('--stats', action='store_true', help='显示压缩率统计')
103
103
  return p
@@ -124,7 +124,7 @@ def main():
124
124
  keep_inline=args.keep_inline,
125
125
  aggressive=args.aggressive,
126
126
  define_shortcuts=args.define_shortcuts,
127
- rename_symbols=args.rename_symbols,
127
+ rename_symbols=not(args.no_rename_symbols),
128
128
  )
129
129
 
130
130
  def print_stats(final_size: int):
@@ -0,0 +1,410 @@
1
+ """
2
+ golf_rename.py — Pass 5: 符号名压缩(libclang AST 驱动)
3
+
4
+ 依赖: pip install libclang
5
+ """
6
+ import re
7
+ import itertools
8
+ import tempfile
9
+ import os
10
+ import sys as _sys
11
+ import struct as _struct
12
+
13
+ _MIN_RENAME_LEN = 2
14
+
15
+ # C/C++ 保留关键字,生成短名时不得使用
16
+ _CXX_KEYWORDS = frozenset({
17
+ # C keywords
18
+ 'auto', 'break', 'case', 'char', 'const', 'continue', 'default',
19
+ 'do', 'double', 'else', 'enum', 'extern', 'float', 'for', 'goto',
20
+ 'if', 'inline', 'int', 'long', 'register', 'restrict', 'return',
21
+ 'short', 'signed', 'sizeof', 'static', 'struct', 'switch', 'typedef',
22
+ 'union', 'unsigned', 'void', 'volatile', 'while',
23
+ # C++ keywords
24
+ 'alignas', 'alignof', 'and', 'and_eq', 'asm', 'bitand', 'bitor',
25
+ 'bool', 'catch', 'class', 'compl', 'concept', 'consteval', 'constexpr',
26
+ 'constinit', 'co_await', 'co_return', 'co_yield', 'decltype', 'delete',
27
+ 'explicit', 'export', 'false', 'friend', 'mutable', 'namespace',
28
+ 'new', 'noexcept', 'not', 'not_eq', 'nullptr', 'operator', 'or',
29
+ 'or_eq', 'private', 'protected', 'public', 'requires', 'static_assert',
30
+ 'static_cast', 'dynamic_cast', 'reinterpret_cast', 'const_cast',
31
+ 'template', 'this', 'thread_local', 'throw', 'true', 'try', 'typeid',
32
+ 'typename', 'using', 'virtual', 'wchar_t', 'xor', 'xor_eq',
33
+ # 常用宏 / 内置名
34
+ 'NULL', 'TRUE', 'FALSE', 'EOF', 'stdin', 'stdout', 'stderr',
35
+ })
36
+
37
+
38
+ def _gen_short_names():
39
+ for length in itertools.count(1):
40
+ for combo in itertools.product('abcdefghijklmnopqrstuvwxyz', repeat=length):
41
+ yield ''.join(combo)
42
+
43
+
44
+ def _make_platform_args() -> list:
45
+ """返回当前平台所需的 libclang 预处理宏参数列表。
46
+
47
+ - Windows: 注入 _WIN32/WIN32/_WIN64/WIN64,以及 _HAS_STD_BYTE=0(避免
48
+ MSVC STL std::byte 与 Windows 头文件中全局 byte typedef 冲突)和
49
+ WIN32_LEAN_AND_MEAN(减少头文件噪音)。
50
+ - Linux / macOS: 注入对应平台宏。
51
+ """
52
+ args: list = []
53
+ if _sys.platform == 'win32' or os.name == 'nt':
54
+ args += ['-D_WIN32', '-DWIN32']
55
+ if _struct.calcsize('P') == 8:
56
+ args += ['-D_WIN64', '-DWIN64']
57
+ args += ['-D_HAS_STD_BYTE=0', '-DWIN32_LEAN_AND_MEAN']
58
+ elif _sys.platform.startswith('linux'):
59
+ args += ['-D__linux__', '-D__unix__', '-DLINUX']
60
+ elif _sys.platform == 'darwin':
61
+ args += ['-D__APPLE__', '-D__unix__', '-D__MACH__']
62
+ return args
63
+
64
+
65
+ def _is_user_file(cursor, tmppath: str) -> bool:
66
+ """判断 cursor 的定义位置是否属于用户临时文件(即待处理的源码),
67
+ 而非系统/第三方头文件。用于过滤系统结构体字段等不应被重命名的符号。"""
68
+ loc = cursor.location
69
+ if not loc.file:
70
+ return False
71
+ try:
72
+ return os.path.samefile(loc.file.name, tmppath)
73
+ except OSError:
74
+ return False
75
+
76
+
77
+ def _walk_ast(
78
+ cursor,
79
+ tmppath: str,
80
+ src_bytes: bytes,
81
+ decl_map: dict,
82
+ replacements: list,
83
+ decl_kinds: frozenset,
84
+ ref_kinds: frozenset,
85
+ ) -> None:
86
+ """递归遍历 AST,收集需要重命名的符号声明位(decl_map)和所有引用位(replacements)。
87
+
88
+ 参数:
89
+ decl_kinds — VAR_DECL / FIELD_DECL / PARM_DECL 等声明节点类型集合
90
+ ref_kinds — MEMBER_REF_EXPR / DECL_REF_EXPR / MEMBER_REF 等引用节点类型集合
91
+ decl_map — USR → (orig_name, first_decl_offset, name_byte_len),原地追加
92
+ replacements — (offset, byte_len, usr) 三元组列表,原地追加
93
+ """
94
+ if cursor.kind.is_invalid():
95
+ return
96
+ if _is_user_file(cursor, tmppath):
97
+ kind = cursor.kind
98
+ if kind in decl_kinds:
99
+ name = cursor.spelling
100
+ if len(name) >= _MIN_RENAME_LEN:
101
+ usr = cursor.get_usr()
102
+ if usr:
103
+ off = cursor.location.offset
104
+ blen = len(name.encode('utf-8'))
105
+ # 跳过 offset 与源码不匹配的(宏展开内参数等)
106
+ if src_bytes[off:off + blen] == name.encode('utf-8'):
107
+ if usr not in decl_map:
108
+ decl_map[usr] = (name, off, blen)
109
+ replacements.append((off, blen, usr))
110
+ elif kind in ref_kinds:
111
+ ref = cursor.referenced
112
+ if ref and ref.kind in decl_kinds:
113
+ usr = ref.get_usr()
114
+ name = cursor.spelling
115
+ if usr and len(name) >= _MIN_RENAME_LEN:
116
+ off = cursor.location.offset
117
+ blen = len(name.encode('utf-8'))
118
+ # 跳过 offset 与源码不匹配的(宏展开内引用等)
119
+ if src_bytes[off:off + blen] == name.encode('utf-8'):
120
+ replacements.append((off, blen, usr))
121
+ for child in cursor.get_children():
122
+ _walk_ast(child, tmppath, src_bytes, decl_map, replacements, decl_kinds, ref_kinds)
123
+
124
+
125
+ def _scan_tokens(
126
+ tu,
127
+ tmppath: str,
128
+ src_bytes: bytes,
129
+ decl_map: dict,
130
+ decl_kinds: frozenset,
131
+ ref_kinds: frozenset,
132
+ ci,
133
+ ) -> list:
134
+ """Token 扫描补全 pass:修正 AST walk 因宏展开 offset 错位而漏掉的符号位置。
135
+
136
+ libclang 的 AST cursor 对宏参数的 location.offset 指向宏调用起始而非参数本身,
137
+ 导致 offset 校验失败、符号未进入 decl_map / replacements。
138
+ token.location 是真实文本位置,此 pass 在 rename_map 建好前先收集候选。
139
+
140
+ 参数:
141
+ decl_map — 可能被原地追加(补入宏内漏掉的 DECL)
142
+ ci — clang.cindex 模块(用于访问 CursorKind.DECL_STMT 等)
143
+
144
+ 返回:
145
+ token_candidates 列表,元素为
146
+ (offset: int, byte_len: int, tok_name: str, usr: str|None, is_member_access: bool)
147
+ - usr=None 表示本 token 暂未匹配到已知 USR,留给后续策略2/3处理
148
+ - is_member_access=True 表示该 token 前紧跟 . 或 ->,策略2/3 须跳过
149
+ """
150
+ token_candidates: list = []
151
+ prev_tok_spelling = ''
152
+ for token in tu.get_tokens(extent=tu.cursor.extent):
153
+ if token.kind.name != 'IDENTIFIER':
154
+ prev_tok_spelling = token.spelling # 跟踪 . 和 -> 等标点符号
155
+ continue
156
+ loc = token.location
157
+ if not loc.file:
158
+ prev_tok_spelling = token.spelling
159
+ continue
160
+ try:
161
+ if not os.path.samefile(loc.file.name, tmppath):
162
+ prev_tok_spelling = token.spelling
163
+ continue
164
+ except OSError:
165
+ prev_tok_spelling = token.spelling
166
+ continue
167
+ off = loc.offset
168
+ tok_name = token.spelling
169
+ # 判断是否是成员访问(前面紧跟 . 或 ->),用于限制名字回退策略的误用范围
170
+ is_member_access = prev_tok_spelling in ('.', '->')
171
+ prev_tok_spelling = tok_name
172
+ if len(tok_name) < _MIN_RENAME_LEN:
173
+ continue
174
+ blen = len(tok_name.encode('utf-8'))
175
+ if src_bytes[off:off + blen] != tok_name.encode('utf-8'):
176
+ continue
177
+ cur = token.cursor
178
+ usr = None
179
+ # 策略1:cursor 精确匹配(无宏展开偏移问题时走这里)
180
+ if cur.kind in decl_kinds and cur.spelling == tok_name:
181
+ usr = cur.get_usr()
182
+ # AST walk 因 offset 校验失败而漏掉的 DECL,在此补入 decl_map
183
+ if usr and usr not in decl_map:
184
+ decl_map[usr] = (tok_name, off, blen)
185
+ # 策略1.5:cursor 为 DECL_STMT(宏内变量声明常见),向下找 VAR_DECL 子节点
186
+ elif cur.kind == ci.CursorKind.DECL_STMT:
187
+ for child in cur.get_children():
188
+ if child.kind in decl_kinds and child.spelling == tok_name:
189
+ usr = child.get_usr()
190
+ if usr and usr not in decl_map:
191
+ decl_map[usr] = (tok_name, off, blen)
192
+ break
193
+ elif cur.kind in ref_kinds:
194
+ ref = cur.referenced
195
+ if ref and ref.kind in decl_kinds and cur.spelling == tok_name:
196
+ ref_usr = ref.get_usr()
197
+ if ref_usr:
198
+ # 仅当被引用 DECL 位于用户文件时才补入 decl_map,
199
+ # 避免把系统结构体字段(如 sockaddr_in6::sin6_family)纳入重命名
200
+ if ref_usr not in decl_map and _is_user_file(ref, tmppath):
201
+ decl_name = ref.spelling or tok_name
202
+ if len(decl_name) >= _MIN_RENAME_LEN:
203
+ decl_map[ref_usr] = (decl_name, off, blen)
204
+ # usr 只在 decl_map 中存在时才设置,
205
+ # 防止系统字段 USR 进入 token_candidates 后被错误匹配
206
+ if ref_usr in decl_map:
207
+ usr = ref_usr
208
+ token_candidates.append((off, blen, tok_name, usr, is_member_access))
209
+ return token_candidates
210
+
211
+
212
+ def _build_rename_map(
213
+ decl_map: dict,
214
+ replacements: list,
215
+ code: str,
216
+ ) -> tuple:
217
+ """根据声明表和引用频次生成 USR→短名 映射,并构建名字单义查找表。
218
+
219
+ 参数:
220
+ decl_map — USR → (orig_name, first_decl_offset, name_byte_len)
221
+ replacements — (offset, byte_len, usr) 列表,用于统计出现频次
222
+ code — 原始源码字符串,用于提取已有标识符(避免短名冲突)
223
+
224
+ 返回:
225
+ (rename_map, name_to_usr)
226
+ rename_map — USR → short_name(高频 USR 优先分配最短名)
227
+ name_to_usr — orig_name → USR,仅包含该名字在 decl_map 中唯一对应一个
228
+ USR 的情况(供 token 扫描策略2的名字单义回退使用)
229
+ """
230
+ # 统计每个 USR 的引用频次,高频符号优先分配最短名
231
+ freq: dict = {}
232
+ for _, _, usr in replacements:
233
+ freq[usr] = freq.get(usr, 0) + 1
234
+
235
+ sorted_usrs = sorted(decl_map.keys(), key=lambda u: -freq.get(u, 0))
236
+
237
+ # 生成短名,跳过已存在的标识符和 C++ 关键字
238
+ all_existing = set(re.findall(r'\b[A-Za-z_]\w*\b', code))
239
+ occupied = all_existing | _CXX_KEYWORDS
240
+ rename_map: dict = {}
241
+ gen = _gen_short_names()
242
+ for usr in sorted_usrs:
243
+ orig = decl_map[usr][0]
244
+ short = next(gen)
245
+ while short in occupied or short == orig:
246
+ short = next(gen)
247
+ rename_map[usr] = short
248
+ occupied.add(short)
249
+
250
+ # 构建旧名 → USR 的单义查找表(仅唯一映射才加入,防止多义时误匹配)
251
+ name_counts: dict = {}
252
+ for u, (oname, _, _) in decl_map.items():
253
+ if u in rename_map:
254
+ name_counts[oname] = name_counts.get(oname, 0) + 1
255
+ name_to_usr: dict = {}
256
+ for u, (oname, _, _) in decl_map.items():
257
+ if u in rename_map and name_counts.get(oname, 0) == 1:
258
+ name_to_usr[oname] = u
259
+
260
+ return rename_map, name_to_usr
261
+
262
+
263
+ def _merge_token_candidates(
264
+ token_candidates: list,
265
+ replacements: list,
266
+ rename_map: dict,
267
+ name_to_usr: dict,
268
+ ) -> None:
269
+ """将 token 候选列表合并进 replacements,应用策略2/3补全 AST walk 漏掉的位置。
270
+
271
+ 策略说明(is_member_access=True 时策略2/3均跳过):
272
+ 策略1/1.5 cursor 精确匹配,token_candidates 中 usr 已设置,直接使用。
273
+ 策略2 名字单义回退:tok_name 在 name_to_usr 中唯一对应一个 USR。
274
+ 用于 AST cursor 指向错误(宏参数常见)但名字无歧义的情况。
275
+ 策略3 最近 DECL_REF 推断:找同名且已知 USR 的 token 中距离最近的。
276
+ 用于策略1/2均失败、但代码局部性强的情况。
277
+
278
+ 参数:
279
+ token_candidates — _scan_tokens 返回的候选列表(只读)
280
+ replacements — 原地追加新的 (offset, byte_len, usr) 三元组
281
+ rename_map — USR → short_name,用于过滤无效 USR
282
+ name_to_usr — orig_name → USR(仅单义映射),供策略2使用
283
+ """
284
+ # 建立名字 → [(offset, usr)] 的索引,供策略3(最近 DECL_REF 推断)使用
285
+ ref_by_name: dict = {}
286
+ for off, _blen, tok_name, usr, _ma in token_candidates:
287
+ if usr is not None:
288
+ ref_by_name.setdefault(tok_name, []).append((off, usr))
289
+
290
+ ast_seen = {off for off, _, _ in replacements}
291
+ for off, blen, tok_name, usr, is_member_access in token_candidates:
292
+ if off in ast_seen:
293
+ continue # AST walk 已覆盖,跳过
294
+ if usr is None and not is_member_access:
295
+ # 策略2:名字单义回退(非成员访问)
296
+ usr = name_to_usr.get(tok_name)
297
+ if usr is None and not is_member_access:
298
+ # 策略3:最近 DECL_REF 推断(非成员访问)
299
+ candidates_for_name = ref_by_name.get(tok_name, [])
300
+ if candidates_for_name:
301
+ nearest_usr = min(candidates_for_name, key=lambda x: abs(x[0] - off))[1]
302
+ usr = nearest_usr
303
+ if usr and usr in rename_map:
304
+ replacements.append((off, blen, usr))
305
+ ast_seen.add(off)
306
+
307
+
308
+ def _apply_replacements(
309
+ src_bytes: bytes,
310
+ replacements: list,
311
+ rename_map: dict,
312
+ ) -> str:
313
+ """将所有重命名替换应用到源码字节串,返回替换后的字符串。
314
+
315
+ 处理步骤:
316
+ 1. 过滤掉 USR 不在 rename_map 中的记录(系统符号等)。
317
+ 2. 按 offset 降序排列并去重,确保从后向前替换不影响前面的 offset。
318
+ 3. 逐条把旧名字节替换为新短名字节。
319
+ """
320
+ valid = [
321
+ (off, blen, usr)
322
+ for off, blen, usr in replacements
323
+ if usr in rename_map
324
+ ]
325
+ seen: set = set()
326
+ deduped: list = []
327
+ for off, blen, usr in sorted(valid, key=lambda x: -x[0]):
328
+ if off not in seen:
329
+ seen.add(off)
330
+ deduped.append((off, blen, usr))
331
+
332
+ result = bytearray(src_bytes)
333
+ for off, blen, usr in deduped:
334
+ result[off:off + blen] = rename_map[usr].encode('utf-8')
335
+ return result.decode('utf-8')
336
+
337
+
338
+ def golf_rename_symbols(code: str) -> str:
339
+ """使用 libclang 对 C++ 代码做符号名压缩。
340
+
341
+ 重命名范围:局部变量、函数参数、结构体/类字段(仅用户代码中定义的)。
342
+ 不重命名:函数名、类型名、宏名、标准库 / 系统头文件中的符号。
343
+ """
344
+ try:
345
+ import clang.cindex as ci
346
+ except ImportError:
347
+ raise RuntimeError("需要 libclang: pip install libclang")
348
+
349
+ src_bytes = code.encode('utf-8')
350
+
351
+ # 必须用二进制写,避免 Windows 上 \n→\r\n 导致 offset 错位
352
+ with tempfile.NamedTemporaryFile(suffix='.cpp', mode='wb', delete=False) as f:
353
+ f.write(src_bytes)
354
+ tmppath = f.name
355
+
356
+ try:
357
+ index = ci.Index.create()
358
+
359
+ tu = index.parse(
360
+ tmppath,
361
+ args=['-std=c++23', '-w', '-fno-spell-checking'] + _make_platform_args(),
362
+ options=(
363
+ ci.TranslationUnit.PARSE_DETAILED_PROCESSING_RECORD |
364
+ ci.TranslationUnit.PARSE_INCOMPLETE
365
+ ),
366
+ )
367
+
368
+ # 声明节点类型:这些节点是需要被重命名的符号定义位
369
+ _DECL_KINDS = frozenset({
370
+ ci.CursorKind.VAR_DECL,
371
+ ci.CursorKind.FIELD_DECL,
372
+ ci.CursorKind.PARM_DECL,
373
+ })
374
+ # 引用节点类型:这些节点是已声明符号的使用位
375
+ _REF_KINDS = frozenset({
376
+ ci.CursorKind.MEMBER_REF_EXPR, # obj.field / obj->field
377
+ ci.CursorKind.MEMBER_REF, # 构造函数初始化列表 : field(...)
378
+ ci.CursorKind.DECL_REF_EXPR, # 局部变量、参数引用
379
+ })
380
+
381
+ # USR → (orig_name, first_decl_offset, name_byte_len)
382
+ decl_map: dict = {}
383
+ # (offset, byte_len, usr) 三元组,记录所有声明位和引用位
384
+ replacements: list = []
385
+
386
+ # AST 遍历:收集所有用户文件中的声明和引用
387
+ _walk_ast(tu.cursor, tmppath, src_bytes, decl_map, replacements, _DECL_KINDS, _REF_KINDS)
388
+
389
+ # Token 扫描:补全 AST walk 因宏展开 offset 错位而漏掉的符号位置
390
+ token_candidates = _scan_tokens(
391
+ tu, tmppath, src_bytes, decl_map, _DECL_KINDS, _REF_KINDS, ci
392
+ )
393
+
394
+ if not decl_map:
395
+ return code
396
+
397
+ # 生成 USR→短名 映射,以及名字单义查找表(供策略2使用)
398
+ rename_map, name_to_usr = _build_rename_map(decl_map, replacements, code)
399
+
400
+ # 合并 token 候选:策略1已设 usr,策略2/3补全宏参数等漏掉的位置
401
+ _merge_token_candidates(token_candidates, replacements, rename_map, name_to_usr)
402
+
403
+ # 应用所有重命名替换(从后向前,保持 offset 正确)
404
+ return _apply_replacements(src_bytes, replacements, rename_map)
405
+
406
+ finally:
407
+ try:
408
+ os.unlink(tmppath)
409
+ except OSError:
410
+ pass
@@ -34,15 +34,30 @@ def merge_files(filepath: Path, include_dirs: list,
34
34
  code = strip_include_guard(code)
35
35
  parts = []
36
36
 
37
+ # 跟踪预处理条件块嵌套深度:depth > 0 表示当前在 #if/#ifdef/#ifndef 内部
38
+ # 处于条件块内的 #include <...> 必须保留在原位,不能提升到文件顶部
39
+ cond_depth = 0
40
+
37
41
  for line in code.splitlines(keepends=True):
38
42
  s = line.strip()
39
43
 
44
+ # 更新条件块深度
45
+ if re.match(r'#\s*if(?:def|ndef)?\b', s):
46
+ cond_depth += 1
47
+ elif re.match(r'#\s*endif\b', s):
48
+ cond_depth = max(0, cond_depth - 1)
49
+
40
50
  # 系统头文件 #include <...>
41
51
  m_sys = re.match(r'#\s*include\s*<([^>]+)>', s)
42
52
  if m_sys:
43
- entry = f'#include <{m_sys.group(1)}>\n'
44
- if entry not in sys_includes:
45
- sys_includes.append(entry)
53
+ if cond_depth > 0:
54
+ # 在条件块内:保留在原位,维持条件上下文
55
+ parts.append(line)
56
+ else:
57
+ # 无条件引用:提升到文件顶部统一去重管理
58
+ entry = f'#include <{m_sys.group(1)}>\n'
59
+ if entry not in sys_includes:
60
+ sys_includes.append(entry)
46
61
  continue
47
62
 
48
63
  # 本地头文件 #include "..."
@@ -55,9 +70,13 @@ def merge_files(filepath: Path, include_dirs: list,
55
70
  if c.exists():
56
71
  found = c; break
57
72
  if found:
58
- parts.append(f'\n// ── inlined: {inc} ──\n')
59
- parts.append(merge_files(found, include_dirs, visited, sys_includes))
60
- parts.append(f'\n// ── end: {inc} ──\n')
73
+ if cond_depth > 0:
74
+ # 在条件块内:不内联,保留原始 include
75
+ parts.append(line)
76
+ else:
77
+ parts.append(f'\n// ── inlined: {inc} ──\n')
78
+ parts.append(merge_files(found, include_dirs, visited, sys_includes))
79
+ parts.append(f'\n// ── end: {inc} ──\n')
61
80
  else:
62
81
  print(f'[警告] 找不到本地头文件:{inc}', file=sys.stderr)
63
82
  parts.append(line)
@@ -20,29 +20,40 @@ def golf_std_namespace(code: str) -> str:
20
20
 
21
21
 
22
22
  def golf_typedefs(code: str) -> str:
23
- """对高频长类型名添加 #define 缩写(出现 ≥2 次时触发)。"""
23
+ """对高频长类型名添加 typedef 缩写(出现 ≥2 次时触发)。"""
24
24
  replacements = [
25
- (r'\blong long\b', 'll', '#define ll long long'),
26
- (r'\bunsigned long long\b', 'ull', '#define ull unsigned long long'),
27
- (r'\blong double\b', 'ld', '#define ld long double'),
28
- (r'\bvector<int>\b', 'vi', '#define vi vector<int>'),
29
- (r'\bvector<ll>\b', 'vll', '#define vll vector<ll>'),
30
- (r'\bpair<int,int>\b', 'pii', '#define pii pair<int,int>'),
31
- (r'\bpair<ll,ll>\b', 'pll', '#define pll pair<ll,ll>'),
25
+ (r'\blong long\b', 'll', 'typedef long long ll;'),
26
+ (r'\bunsigned long long\b', 'ull', 'typedef unsigned long long ull;'),
27
+ (r'\blong double\b', 'ld', 'typedef long double ld;'),
28
+ (r'\bvector<int>\b', 'vi', 'typedef vector<int> vi;'),
29
+ (r'\bvector<ll>\b', 'vll', 'typedef vector<ll> vll;'),
30
+ (r'\bpair<int,int>\b', 'pii', 'typedef pair<int,int> pii;'),
31
+ (r'\bpair<ll,ll>\b', 'pll', 'typedef pair<ll,ll> pll;'),
32
32
  ]
33
33
  defines_to_add = []
34
34
  for pattern, short, defline in replacements:
35
- macro = defline.split()[1]
36
- if re.search(r'\b' + re.escape(macro) + r'\b', code):
37
- continue
38
- if len(re.findall(pattern, code)) >= 2:
35
+ # 提取缩写名(typedef ... short;)
36
+ macro = defline.rstrip(';').split()[-1]
37
+ # 匹配已有的 typedef 或 #define 形式
38
+ existing_re = re.compile(
39
+ r'^[ \t]*(?:'
40
+ r'typedef\b[^\n]+\b' + re.escape(macro) + r'\s*;'
41
+ r'|#[ \t]*define[ \t]+' + re.escape(macro) + r'\b[^\n]*'
42
+ r')[ \t]*\n?',
43
+ re.MULTILINE,
44
+ )
45
+ existing = existing_re.search(code)
46
+ if existing:
47
+ # 已有定义:从原位删掉,稍后统一插到顶部
48
+ code = code[:existing.start()] + code[existing.end():]
49
+ defines_to_add.append(defline)
50
+ elif len(re.findall(pattern, code)) >= 2:
39
51
  defines_to_add.append(defline)
40
52
  code = re.sub(pattern, short, code)
41
53
  if defines_to_add:
42
- last = max(
43
- (m.end() for m in re.finditer(r'^#(?:include|define)\b.*$', code, re.MULTILINE)),
44
- default=0,
45
- )
54
+ # 插入点:文件顶部 include 块末尾
55
+ include_ends = [m.end() for m in re.finditer(r'^[ \t]*#[ \t]*include\b.*$', code, re.MULTILINE)]
56
+ last = include_ends[-1] if include_ends else 0
46
57
  code = code[:last] + '\n' + '\n'.join(defines_to_add) + '\n' + code[last:]
47
58
  return code
48
59
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cppgolf
3
- Version: 0.1.0
3
+ Version: 0.1.2
4
4
  Summary: C++ multi-file merge & code golf / minifier tool
5
5
  License: MIT
6
6
  Project-URL: Homepage, https://github.com/yourname/cppgolf
@@ -18,8 +18,7 @@ Classifier: Topic :: Software Development :: Code Generators
18
18
  Classifier: Topic :: Text Processing :: Filters
19
19
  Requires-Python: >=3.10
20
20
  Description-Content-Type: text/markdown
21
- Requires-Dist: tree-sitter>=0.25
22
- Requires-Dist: tree-sitter-cpp>=0.23
21
+ Requires-Dist: libclang
23
22
  Provides-Extra: dev
24
23
  Requires-Dist: build; extra == "dev"
25
24
  Requires-Dist: twine; extra == "dev"
@@ -0,0 +1,6 @@
1
+ libclang
2
+
3
+ [dev]
4
+ build
5
+ twine
6
+ pytest
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "cppgolf"
7
- version = "0.1.0"
7
+ version = "0.1.2"
8
8
  description = "C++ multi-file merge & code golf / minifier tool"
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -24,8 +24,7 @@ classifiers = [
24
24
  ]
25
25
 
26
26
  dependencies = [
27
- "tree-sitter>=0.25",
28
- "tree-sitter-cpp>=0.23",
27
+ "libclang",
29
28
  ]
30
29
 
31
30
  [project.optional-dependencies]
@@ -1,452 +0,0 @@
1
- """
2
- golf_rename.py — Pass 5: 符号名压缩(tree-sitter AST 驱动)
3
- """
4
- import re
5
- import sys
6
- import itertools
7
-
8
- _DECLARATOR_CONTAINERS = frozenset({
9
- 'init_declarator', 'pointer_declarator', 'reference_declarator',
10
- 'array_declarator', 'abstract_pointer_declarator',
11
- 'abstract_reference_declarator', 'abstract_array_declarator',
12
- })
13
- _MIN_RENAME_LEN = 2
14
-
15
-
16
- def _gen_short_names():
17
- for length in itertools.count(1):
18
- for combo in itertools.product('abcdefghijklmnopqrstuvwxyz', repeat=length):
19
- yield ''.join(combo)
20
-
21
-
22
- def _extract_declarator_id(node, want_field: bool):
23
- target_type = 'field_identifier' if want_field else 'identifier'
24
- if node.type == target_type:
25
- return node
26
- if node.type in _DECLARATOR_CONTAINERS:
27
- for ch in node.children:
28
- if ch.type in ('*', '**', '&', '&&', '=', '[', ']',
29
- 'const', 'volatile', 'restrict',
30
- '__cdecl', '__stdcall', '__fastcall', '__thiscall',
31
- 'abstract_pointer_declarator',
32
- 'abstract_reference_declarator'):
33
- continue
34
- result = _extract_declarator_id(ch, want_field)
35
- if result:
36
- return result
37
- return None
38
-
39
-
40
- class _RenameCtx:
41
- """封装一次重命名所需的全部状态与子方法。"""
42
-
43
- def __init__(self, src_bytes, tree):
44
- self.src = src_bytes
45
- self.tree = tree
46
- # 类型上下文
47
- self.user_struct_names: set = set()
48
- self.struct_field_types: dict = {}
49
- self.var_type_map: dict = {}
50
- self.typedef_map: dict = {}
51
-
52
- # ── 工具 ────────────────────────────────────────────────────────────
53
- def name_of(self, node) -> str:
54
- return self.src[node.start_byte:node.end_byte].decode('utf-8')
55
-
56
- def _get_primary_type_name(self, node) -> str | None:
57
- for ch in node.children:
58
- if ch.type in ('type_identifier', 'primitive_type'):
59
- return self.name_of(ch)
60
- if ch.type == 'qualified_identifier':
61
- for sub in reversed(ch.children):
62
- if sub.type in ('identifier', 'type_identifier'):
63
- return self.name_of(sub)
64
- return None
65
-
66
- def _is_qid_name(self, node) -> bool:
67
- par = node.parent
68
- if not par or par.type != 'qualified_identifier':
69
- return False
70
- for ch in reversed(par.children):
71
- if ch.type != '::':
72
- return ch == node
73
- return False
74
-
75
- def _get_qid_scope_class(self, qid_node) -> str | None:
76
- for ch in qid_node.children:
77
- if ch.type == '::':
78
- break
79
- if ch.type in ('identifier', 'type_identifier', 'namespace_identifier'):
80
- return self.name_of(ch)
81
- elif ch.type == 'qualified_identifier':
82
- for sub in reversed(ch.children):
83
- if sub.type in ('identifier', 'type_identifier', 'namespace_identifier'):
84
- return self.name_of(sub)
85
- break
86
- return None
87
-
88
- # ── 步骤 0:构建类型上下文 ───────────────────────────────────────────
89
- def build_type_context(self):
90
- self._walk_types(self.tree.root_node)
91
- for alias, real in self.typedef_map.items():
92
- if real in self.user_struct_names:
93
- self.user_struct_names.add(alias)
94
- if real in self.struct_field_types and alias not in self.struct_field_types:
95
- self.struct_field_types[alias] = self.struct_field_types[real]
96
-
97
- def _walk_types(self, node):
98
- nt = node.type
99
- if nt == 'type_definition':
100
- inner = None
101
- for ch in node.children:
102
- if ch.type in ('struct_specifier', 'class_specifier', 'union_specifier'):
103
- for sub in ch.children:
104
- if sub.type == 'type_identifier':
105
- inner = self.name_of(sub); break
106
- break
107
- if inner:
108
- for ch in node.children:
109
- if ch.type == 'type_identifier' and self.name_of(ch) != inner:
110
- self.typedef_map[self.name_of(ch)] = inner
111
- elif ch.type in _DECLARATOR_CONTAINERS:
112
- id_node = _extract_declarator_id(ch, False)
113
- if id_node:
114
- self.typedef_map[self.name_of(id_node)] = inner
115
- if nt in ('struct_specifier', 'class_specifier', 'union_specifier'):
116
- struct_name = None
117
- for ch in node.children:
118
- if ch.type == 'type_identifier':
119
- struct_name = self.name_of(ch); break
120
- if struct_name and any(c.type == 'field_declaration_list' for c in node.children):
121
- self.user_struct_names.add(struct_name)
122
- fmap = self.struct_field_types.setdefault(struct_name, {})
123
- for ch in node.children:
124
- if ch.type == 'field_declaration_list':
125
- for fd in ch.children:
126
- if fd.type != 'field_declaration':
127
- continue
128
- ftype = self._get_primary_type_name(fd)
129
- for fc in fd.children:
130
- if fc.type == 'field_identifier':
131
- fmap[self.name_of(fc)] = ftype
132
- elif fc.type in _DECLARATOR_CONTAINERS or fc.type == 'init_declarator':
133
- id_node = _extract_declarator_id(fc, True)
134
- if id_node:
135
- fmap[self.name_of(id_node)] = ftype
136
- break
137
- if nt in ('declaration', 'parameter_declaration'):
138
- vtype = self._get_primary_type_name(node)
139
- if vtype:
140
- for ch in node.children:
141
- if ch.type == 'identifier':
142
- self.var_type_map.setdefault(self.name_of(ch), vtype)
143
- elif ch.type in _DECLARATOR_CONTAINERS or ch.type == 'init_declarator':
144
- id_node = _extract_declarator_id(ch, False)
145
- if id_node:
146
- self.var_type_map.setdefault(self.name_of(id_node), vtype)
147
- for ch in node.children:
148
- self._walk_types(ch)
149
-
150
- # ── cast 类型提取 ────────────────────────────────────────────────────
151
- def _extract_cast_target_type(self, node) -> str | None:
152
- if node.type == 'call_expression':
153
- fn = node.children[0] if node.children else None
154
- if fn and fn.type == 'template_function':
155
- fn_name = None
156
- for ch in fn.children:
157
- if ch.type == 'identifier':
158
- fn_name = self.name_of(ch); break
159
- if fn_name in ('reinterpret_cast', 'static_cast', 'dynamic_cast', 'const_cast'):
160
- for ch in fn.children:
161
- if ch.type == 'template_argument_list':
162
- for sub in ch.children:
163
- if sub.type == 'type_descriptor':
164
- return self._get_primary_type_name(sub)
165
- if node.type == 'cast_expression':
166
- for ch in node.children:
167
- if ch.type == 'type_descriptor':
168
- return self._get_primary_type_name(ch)
169
- if node.type in ('reinterpret_cast_expression', 'static_cast_expression',
170
- 'dynamic_cast_expression', 'const_cast_expression'):
171
- for ch in node.children:
172
- if ch.type == 'type_descriptor':
173
- return self._get_primary_type_name(ch)
174
- return None
175
-
176
- def _extract_init_cast_type(self, decl_node, var_name) -> str | None:
177
- for ch in decl_node.children:
178
- if ch.type == 'init_declarator':
179
- id_nd = _extract_declarator_id(ch, False)
180
- if not id_nd or self.name_of(id_nd) != var_name:
181
- continue
182
- for sub in ch.children:
183
- t = self._extract_cast_target_type(sub)
184
- if t:
185
- return t
186
- return None
187
-
188
- # ── 作用域感知的变量类型查找 ─────────────────────────────────────────
189
- def _lookup_var_type_in_scope(self, identifier_node) -> str | None:
190
- var_name = self.name_of(identifier_node)
191
- node = identifier_node.parent
192
- while node is not None:
193
- if node.type == 'parameter_list':
194
- for param in node.children:
195
- if param.type == 'parameter_declaration':
196
- vtype = self._get_primary_type_name(param)
197
- if vtype:
198
- for ch in param.children:
199
- if ch.type == 'identifier' and self.name_of(ch) == var_name:
200
- return vtype
201
- elif ch.type in _DECLARATOR_CONTAINERS:
202
- id_nd = _extract_declarator_id(ch, False)
203
- if id_nd and self.name_of(id_nd) == var_name:
204
- return vtype
205
- if node.type in ('compound_statement', 'translation_unit',
206
- 'namespace_definition', 'function_definition'):
207
- for child in node.children:
208
- if child.type == 'declaration':
209
- vtype = self._get_primary_type_name(child)
210
- matched = False
211
- for ch in child.children:
212
- if ch.type == 'identifier' and self.name_of(ch) == var_name:
213
- matched = True; break
214
- elif ch.type in _DECLARATOR_CONTAINERS or ch.type == 'init_declarator':
215
- id_nd = _extract_declarator_id(ch, False)
216
- if id_nd and self.name_of(id_nd) == var_name:
217
- matched = True; break
218
- if matched:
219
- if vtype:
220
- return vtype
221
- return self._extract_init_cast_type(child, var_name)
222
- node = node.parent
223
- return self.var_type_map.get(var_name)
224
-
225
- # ── 字段访问对象类型推断 ─────────────────────────────────────────────
226
- def _resolve_field_object_type(self, field_expr_node) -> str | None:
227
- if not field_expr_node.children:
228
- return None
229
- value_node = field_expr_node.children[0]
230
- vt = value_node.type
231
- td = self.typedef_map
232
- if vt == 'identifier':
233
- t = self._lookup_var_type_in_scope(value_node)
234
- return td.get(t, t)
235
- elif vt == 'field_expression':
236
- parent_type = self._resolve_field_object_type(value_node)
237
- if parent_type and parent_type in self.struct_field_types:
238
- for ch in value_node.children:
239
- if ch.type == 'field_identifier':
240
- ft = self.struct_field_types[parent_type].get(self.name_of(ch))
241
- return td.get(ft, ft) if ft else None
242
- return None
243
- elif vt == 'pointer_expression':
244
- for ch in value_node.children:
245
- if ch.type == 'identifier':
246
- t = self._lookup_var_type_in_scope(ch)
247
- return td.get(t, t)
248
- elif vt == 'subscript_expression':
249
- arr = value_node.children[0] if value_node.children else None
250
- if arr is None:
251
- return None
252
- if arr.type == 'identifier':
253
- t = self._lookup_var_type_in_scope(arr)
254
- return td.get(t, t) if t else None
255
- elif arr.type == 'field_expression':
256
- return self._resolve_field_object_type(arr)
257
- return None
258
-
259
- # ── 步骤 1:收集声明位节点 ────────────────────────────────────────────
260
- def collect_decl_nodes(self):
261
- local_decl: list = []
262
- member_decl: list = []
263
-
264
- def walk(node):
265
- nt = node.type
266
- if nt == 'declaration':
267
- for ch in node.children:
268
- if ch.type == 'identifier':
269
- local_decl.append(ch)
270
- elif ch.type in _DECLARATOR_CONTAINERS or ch.type == 'init_declarator':
271
- id_node = _extract_declarator_id(ch, False)
272
- if id_node: local_decl.append(id_node)
273
- elif ch.type == 'function_declarator':
274
- decl_type = self._get_primary_type_name(node)
275
- if decl_type and decl_type in self.user_struct_names:
276
- for sub in ch.children:
277
- if sub.type == 'identifier':
278
- local_decl.append(sub); break
279
- elif nt == 'parameter_declaration':
280
- for ch in node.children:
281
- if ch.type == 'identifier':
282
- local_decl.append(ch)
283
- elif ch.type in _DECLARATOR_CONTAINERS:
284
- id_node = _extract_declarator_id(ch, False)
285
- if id_node: local_decl.append(id_node)
286
- elif nt == 'for_range_loop':
287
- found_type = False
288
- for ch in node.children:
289
- if ch.type in (':', 'compound_statement'): break
290
- if ch.is_named and not found_type:
291
- found_type = True; continue
292
- if ch.type == 'identifier':
293
- local_decl.append(ch); break
294
- elif ch.type in _DECLARATOR_CONTAINERS:
295
- id_node = _extract_declarator_id(ch, False)
296
- if id_node: local_decl.append(id_node)
297
- break
298
- elif nt == 'field_declaration':
299
- for ch in node.children:
300
- if ch.type == 'field_identifier':
301
- member_decl.append(ch)
302
- elif ch.type in _DECLARATOR_CONTAINERS or ch.type == 'init_declarator':
303
- id_node = _extract_declarator_id(ch, True)
304
- if id_node: member_decl.append(id_node)
305
- if nt == 'function_declarator':
306
- for ch in node.children:
307
- if ch.type != 'identifier': walk(ch)
308
- else:
309
- for ch in node.children: walk(ch)
310
-
311
- walk(self.tree.root_node)
312
- return local_decl, member_decl
313
-
314
- # ── 步骤 3:统计频率 ──────────────────────────────────────────────────
315
- def count_freq(self, local_names, member_names) -> dict:
316
- freq: dict = {}
317
- def walk(node):
318
- if node.type == 'identifier':
319
- n = self.name_of(node)
320
- if n in local_names:
321
- freq[n] = freq.get(n, 0) + 1
322
- elif n in member_names and self._is_qid_name(node):
323
- scope_cls = self._get_qid_scope_class(node.parent)
324
- real_cls = self.typedef_map.get(scope_cls, scope_cls) if scope_cls else None
325
- if real_cls and real_cls in self.user_struct_names:
326
- freq[n] = freq.get(n, 0) + 1
327
- elif node.type == 'field_identifier':
328
- n = self.name_of(node)
329
- if n in member_names: freq[n] = freq.get(n, 0) + 1
330
- elif node.type == 'type_identifier':
331
- n = self.name_of(node)
332
- if n in local_names:
333
- par = node.parent
334
- if (par and par.type == 'parameter_declaration'
335
- and par.parent and par.parent.type == 'parameter_list'
336
- and par.parent.parent and par.parent.parent.type == 'function_declarator'
337
- and par.parent.parent.parent
338
- and par.parent.parent.parent.type == 'declaration'):
339
- freq[n] = freq.get(n, 0) + 1
340
- for ch in node.children: walk(ch)
341
- walk(self.tree.root_node)
342
- return freq
343
-
344
- # ── 步骤 5:收集替换位置 ──────────────────────────────────────────────
345
- def build_replacements(self, rename_map, local_names, member_names):
346
- replacements: list = []
347
- class_stack: list = []
348
-
349
- def walk(node):
350
- entered = False
351
- nt = node.type
352
- if nt in ('struct_specifier', 'class_specifier', 'union_specifier'):
353
- for ch in node.children:
354
- if ch.type == 'type_identifier':
355
- class_stack.append(self.name_of(ch)); entered = True; break
356
-
357
- if nt == 'identifier':
358
- n = self.name_of(node)
359
- if n in rename_map and n in local_names:
360
- replacements.append((node.start_byte, node.end_byte, rename_map[n].encode()))
361
- elif n in rename_map and n in member_names and class_stack:
362
- replacements.append((node.start_byte, node.end_byte, rename_map[n].encode()))
363
- elif n in rename_map and n in member_names and self._is_qid_name(node):
364
- scope_cls = self._get_qid_scope_class(node.parent)
365
- real_cls = self.typedef_map.get(scope_cls, scope_cls) if scope_cls else None
366
- if real_cls and real_cls in self.user_struct_names:
367
- replacements.append((node.start_byte, node.end_byte, rename_map[n].encode()))
368
- elif nt == 'type_identifier':
369
- n = self.name_of(node)
370
- if n in rename_map and n in local_names:
371
- par = node.parent
372
- if (par and par.type == 'parameter_declaration'
373
- and par.parent and par.parent.type == 'parameter_list'
374
- and par.parent.parent and par.parent.parent.type == 'function_declarator'
375
- and par.parent.parent.parent
376
- and par.parent.parent.parent.type == 'declaration'):
377
- decl_type = self._get_primary_type_name(par.parent.parent.parent)
378
- if decl_type and decl_type in self.user_struct_names:
379
- replacements.append((node.start_byte, node.end_byte, rename_map[n].encode()))
380
- elif nt == 'field_identifier':
381
- n = self.name_of(node)
382
- if n in rename_map and n in member_names:
383
- parent = node.parent
384
- if parent and parent.type == 'field_expression':
385
- obj_type = self._resolve_field_object_type(parent)
386
- if obj_type and obj_type in self.user_struct_names:
387
- replacements.append((node.start_byte, node.end_byte, rename_map[n].encode()))
388
- else:
389
- replacements.append((node.start_byte, node.end_byte, rename_map[n].encode()))
390
-
391
- for ch in node.children: walk(ch)
392
- if entered: class_stack.pop()
393
-
394
- walk(self.tree.root_node)
395
- return replacements
396
-
397
- # ── 步骤 6:应用替换 ──────────────────────────────────────────────────
398
- def apply(self, replacements) -> str:
399
- replacements.sort(key=lambda x: x[0], reverse=True)
400
- buf = bytearray(self.src)
401
- for start, end, new in replacements:
402
- buf[start:end] = new
403
- return buf.decode('utf-8')
404
-
405
-
406
- # ─────────────────────────────────────────────────────────────────────────────
407
- # 公开入口
408
- # ─────────────────────────────────────────────────────────────────────────────
409
- def golf_rename_symbols(code: str) -> str:
410
- try:
411
- from tree_sitter import Language, Parser
412
- import tree_sitter_cpp as tscpp
413
- _lang = Language(tscpp.language())
414
- except ImportError:
415
- print('[警告] 未找到 tree-sitter,跳过符号重命名。'
416
- ' 运行: pip install tree-sitter tree-sitter-cpp', file=sys.stderr)
417
- return code
418
-
419
- src_bytes = code.encode('utf-8')
420
- parser = Parser(_lang)
421
- tree = parser.parse(src_bytes)
422
-
423
- ctx = _RenameCtx(src_bytes, tree)
424
- ctx.build_type_context()
425
-
426
- local_decl, member_decl = ctx.collect_decl_nodes()
427
- name_of = ctx.name_of
428
-
429
- local_names = {name_of(n) for n in local_decl if len(name_of(n)) >= _MIN_RENAME_LEN}
430
- member_names = {name_of(n) for n in member_decl if len(name_of(n)) >= _MIN_RENAME_LEN}
431
- if not local_names and not member_names:
432
- return code
433
-
434
- all_targets = local_names | member_names
435
- freq = ctx.count_freq(local_names, member_names)
436
-
437
- # 步骤 4:生成重命名映射
438
- all_existing = set(re.findall(r'\b[A-Za-z_]\w*\b', code))
439
- occupied = set(all_existing)
440
- rename_map: dict = {}
441
- gen = _gen_short_names()
442
- for original in sorted(all_targets, key=lambda x: -freq.get(x, 0)):
443
- short = next(gen)
444
- while short in occupied or short == original:
445
- short = next(gen)
446
- rename_map[original] = short
447
- occupied.add(short)
448
-
449
- replacements = ctx.build_replacements(rename_map, local_names, member_names)
450
- if not replacements:
451
- return code
452
- return ctx.apply(replacements)
@@ -1,7 +0,0 @@
1
- tree-sitter>=0.25
2
- tree-sitter-cpp>=0.23
3
-
4
- [dev]
5
- build
6
- twine
7
- pytest
File without changes
File without changes
File without changes
File without changes