cppgolf 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cppgolf/__init__.py +46 -0
- cppgolf/__main__.py +148 -0
- cppgolf/golf_rename.py +452 -0
- cppgolf/merge.py +68 -0
- cppgolf/strip_comments.py +62 -0
- cppgolf/transforms.py +99 -0
- cppgolf/whitespace.py +127 -0
- cppgolf-0.1.0.dist-info/METADATA +97 -0
- cppgolf-0.1.0.dist-info/RECORD +12 -0
- cppgolf-0.1.0.dist-info/WHEEL +5 -0
- cppgolf-0.1.0.dist-info/entry_points.txt +2 -0
- cppgolf-0.1.0.dist-info/top_level.txt +1 -0
cppgolf/__init__.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""
|
|
2
|
+
cppgolf — C++ multi-file merge & code golf tool
|
|
3
|
+
|
|
4
|
+
公开 API:
|
|
5
|
+
process(input_file, include_dirs, **options) -> str
|
|
6
|
+
golf_rename_symbols(code) -> str
|
|
7
|
+
strip_comments(code) -> str
|
|
8
|
+
merge_files(filepath, include_dirs, visited, sys_includes) -> str
|
|
9
|
+
compress_whitespace(code) -> str
|
|
10
|
+
golf_std_namespace / golf_typedefs / golf_endl_to_newline /
|
|
11
|
+
golf_remove_main_return / golf_remove_inline /
|
|
12
|
+
golf_braces_single_stmt / golf_define_shortcuts
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from .strip_comments import strip_comments
|
|
16
|
+
from .merge import merge_files, strip_include_guard
|
|
17
|
+
from .whitespace import compress_whitespace
|
|
18
|
+
from .transforms import (
|
|
19
|
+
golf_std_namespace,
|
|
20
|
+
golf_typedefs,
|
|
21
|
+
golf_remove_main_return,
|
|
22
|
+
golf_endl_to_newline,
|
|
23
|
+
golf_remove_inline,
|
|
24
|
+
golf_braces_single_stmt,
|
|
25
|
+
golf_define_shortcuts,
|
|
26
|
+
)
|
|
27
|
+
from .golf_rename import golf_rename_symbols
|
|
28
|
+
from .__main__ import process
|
|
29
|
+
|
|
30
|
+
__all__ = [
|
|
31
|
+
"process",
|
|
32
|
+
"strip_comments",
|
|
33
|
+
"merge_files",
|
|
34
|
+
"strip_include_guard",
|
|
35
|
+
"compress_whitespace",
|
|
36
|
+
"golf_std_namespace",
|
|
37
|
+
"golf_typedefs",
|
|
38
|
+
"golf_remove_main_return",
|
|
39
|
+
"golf_endl_to_newline",
|
|
40
|
+
"golf_remove_inline",
|
|
41
|
+
"golf_braces_single_stmt",
|
|
42
|
+
"golf_define_shortcuts",
|
|
43
|
+
"golf_rename_symbols",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
__version__ = "0.1.0"
|
cppgolf/__main__.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""cppgolf.__main__ — CLI 入口,支持 python -m cppgolf 和 cppgolf 命令"""
|
|
2
|
+
import sys
|
|
3
|
+
import argparse
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from .strip_comments import strip_comments
|
|
7
|
+
from .merge import merge_files
|
|
8
|
+
from .whitespace import compress_whitespace
|
|
9
|
+
from .transforms import (
|
|
10
|
+
golf_std_namespace, golf_typedefs, golf_remove_main_return,
|
|
11
|
+
golf_endl_to_newline, golf_remove_inline,
|
|
12
|
+
golf_braces_single_stmt, golf_define_shortcuts,
|
|
13
|
+
)
|
|
14
|
+
from .golf_rename import golf_rename_symbols
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def process(
|
|
18
|
+
input_file: Path,
|
|
19
|
+
include_dirs: list,
|
|
20
|
+
*,
|
|
21
|
+
no_merge: bool = False,
|
|
22
|
+
no_strip_comments: bool = False,
|
|
23
|
+
no_compress_ws: bool = False,
|
|
24
|
+
no_std_ns: bool = False,
|
|
25
|
+
no_typedefs: bool = False,
|
|
26
|
+
keep_main_return: bool = False,
|
|
27
|
+
keep_endl: bool = False,
|
|
28
|
+
keep_inline: bool = False,
|
|
29
|
+
aggressive: bool = False,
|
|
30
|
+
define_shortcuts: bool = False,
|
|
31
|
+
rename_symbols: bool = False,
|
|
32
|
+
) -> str:
|
|
33
|
+
sys_includes: list = []
|
|
34
|
+
visited: set = set()
|
|
35
|
+
|
|
36
|
+
if not no_merge:
|
|
37
|
+
merged = merge_files(input_file, list(include_dirs), visited, sys_includes)
|
|
38
|
+
code = ''.join(sys_includes) + merged
|
|
39
|
+
else:
|
|
40
|
+
code = input_file.read_text(encoding='utf-8-sig', errors='replace')
|
|
41
|
+
|
|
42
|
+
if not no_strip_comments:
|
|
43
|
+
code = strip_comments(code)
|
|
44
|
+
|
|
45
|
+
if not keep_endl:
|
|
46
|
+
code = golf_endl_to_newline(code)
|
|
47
|
+
if not no_std_ns:
|
|
48
|
+
code = golf_std_namespace(code)
|
|
49
|
+
if not no_typedefs:
|
|
50
|
+
code = golf_typedefs(code)
|
|
51
|
+
if not keep_main_return:
|
|
52
|
+
code = golf_remove_main_return(code)
|
|
53
|
+
if not keep_inline:
|
|
54
|
+
code = golf_remove_inline(code)
|
|
55
|
+
if aggressive:
|
|
56
|
+
code = golf_braces_single_stmt(code)
|
|
57
|
+
if define_shortcuts:
|
|
58
|
+
code = golf_define_shortcuts(code)
|
|
59
|
+
if rename_symbols:
|
|
60
|
+
code = golf_rename_symbols(code)
|
|
61
|
+
|
|
62
|
+
if not no_compress_ws:
|
|
63
|
+
code = compress_whitespace(code)
|
|
64
|
+
|
|
65
|
+
return code.strip() + '\n'
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
69
|
+
p = argparse.ArgumentParser(
|
|
70
|
+
prog='cppgolf',
|
|
71
|
+
description='C++ 多文件合并 + 代码高尔夫工具',
|
|
72
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
73
|
+
epilog="""示例:
|
|
74
|
+
cppgolf solution.cpp
|
|
75
|
+
cppgolf solution.cpp -o golf.cpp
|
|
76
|
+
cppgolf solution.cpp -I include/ --rename --stats
|
|
77
|
+
""",
|
|
78
|
+
)
|
|
79
|
+
p.add_argument('input', type=Path, help='入口 C++ 文件')
|
|
80
|
+
p.add_argument('-o', '--output', type=Path, default=None, help='输出文件(默认 stdout)')
|
|
81
|
+
p.add_argument('-I', '--include', dest='include_dirs', action='append',
|
|
82
|
+
type=Path, default=[], metavar='DIR', help='追加 include 目录(可多次)')
|
|
83
|
+
|
|
84
|
+
g = p.add_argument_group('功能开关(默认全部开启)')
|
|
85
|
+
g.add_argument('--no-merge', action='store_true', help='跳过多文件合并')
|
|
86
|
+
g.add_argument('--no-strip-comments', action='store_true', help='保留注释')
|
|
87
|
+
g.add_argument('--no-compress-ws', action='store_true', help='保留空白格式')
|
|
88
|
+
g.add_argument('--no-std-ns', action='store_true', help='不添加 using namespace std')
|
|
89
|
+
g.add_argument('--no-typedefs', action='store_true', help='不添加 ll/ld 等类型宏')
|
|
90
|
+
g.add_argument('--keep-main-return', action='store_true', help='保留 main 末尾 return 0')
|
|
91
|
+
g.add_argument('--keep-endl', action='store_true', help='保留 endl')
|
|
92
|
+
g.add_argument('--keep-inline', action='store_true', help='保留 inline 关键字')
|
|
93
|
+
|
|
94
|
+
g2 = p.add_argument_group('激进优化(有风险,默认关闭)')
|
|
95
|
+
g2.add_argument('--aggressive', action='store_true',
|
|
96
|
+
help='单语句 if/for/while 去花括号')
|
|
97
|
+
g2.add_argument('--shortcuts', dest='define_shortcuts', action='store_true',
|
|
98
|
+
help='高频 cout/cin 用 #define 缩写')
|
|
99
|
+
g2.add_argument('--rename', dest='rename_symbols', action='store_true',
|
|
100
|
+
help='将用户变量/成员名压缩为短名(需要 tree-sitter-cpp)')
|
|
101
|
+
|
|
102
|
+
p.add_argument('--stats', action='store_true', help='显示压缩率统计')
|
|
103
|
+
return p
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def main():
|
|
107
|
+
parser = build_parser()
|
|
108
|
+
args = parser.parse_args()
|
|
109
|
+
|
|
110
|
+
if not args.input.exists():
|
|
111
|
+
print(f'错误:文件不存在 —— {args.input}', file=sys.stderr)
|
|
112
|
+
sys.exit(1)
|
|
113
|
+
|
|
114
|
+
original_size = args.input.stat().st_size
|
|
115
|
+
result = process(
|
|
116
|
+
args.input, args.include_dirs,
|
|
117
|
+
no_merge=args.no_merge,
|
|
118
|
+
no_strip_comments=args.no_strip_comments,
|
|
119
|
+
no_compress_ws=args.no_compress_ws,
|
|
120
|
+
no_std_ns=args.no_std_ns,
|
|
121
|
+
no_typedefs=args.no_typedefs,
|
|
122
|
+
keep_main_return=args.keep_main_return,
|
|
123
|
+
keep_endl=args.keep_endl,
|
|
124
|
+
keep_inline=args.keep_inline,
|
|
125
|
+
aggressive=args.aggressive,
|
|
126
|
+
define_shortcuts=args.define_shortcuts,
|
|
127
|
+
rename_symbols=args.rename_symbols,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
def print_stats(final_size: int):
|
|
131
|
+
ratio = (1 - final_size / original_size) * 100 if original_size else 0
|
|
132
|
+
print(f'[统计] 原始:{original_size} B → 高尔夫后:{final_size} B (压缩 {ratio:.1f}%)',
|
|
133
|
+
file=sys.stderr)
|
|
134
|
+
|
|
135
|
+
if args.output:
|
|
136
|
+
args.output.write_text(result, encoding='utf-8')
|
|
137
|
+
if args.stats:
|
|
138
|
+
print_stats(args.output.stat().st_size)
|
|
139
|
+
else:
|
|
140
|
+
print(f'已写入:{args.output}', file=sys.stderr)
|
|
141
|
+
else:
|
|
142
|
+
if args.stats:
|
|
143
|
+
print_stats(len(result.encode('utf-8')))
|
|
144
|
+
sys.stdout.write(result)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
if __name__ == '__main__':
|
|
148
|
+
main()
|
cppgolf/golf_rename.py
ADDED
|
@@ -0,0 +1,452 @@
|
|
|
1
|
+
"""
|
|
2
|
+
golf_rename.py — Pass 5: 符号名压缩(tree-sitter AST 驱动)
|
|
3
|
+
"""
|
|
4
|
+
import re
|
|
5
|
+
import sys
|
|
6
|
+
import itertools
|
|
7
|
+
|
|
8
|
+
_DECLARATOR_CONTAINERS = frozenset({
|
|
9
|
+
'init_declarator', 'pointer_declarator', 'reference_declarator',
|
|
10
|
+
'array_declarator', 'abstract_pointer_declarator',
|
|
11
|
+
'abstract_reference_declarator', 'abstract_array_declarator',
|
|
12
|
+
})
|
|
13
|
+
_MIN_RENAME_LEN = 2
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _gen_short_names():
|
|
17
|
+
for length in itertools.count(1):
|
|
18
|
+
for combo in itertools.product('abcdefghijklmnopqrstuvwxyz', repeat=length):
|
|
19
|
+
yield ''.join(combo)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _extract_declarator_id(node, want_field: bool):
|
|
23
|
+
target_type = 'field_identifier' if want_field else 'identifier'
|
|
24
|
+
if node.type == target_type:
|
|
25
|
+
return node
|
|
26
|
+
if node.type in _DECLARATOR_CONTAINERS:
|
|
27
|
+
for ch in node.children:
|
|
28
|
+
if ch.type in ('*', '**', '&', '&&', '=', '[', ']',
|
|
29
|
+
'const', 'volatile', 'restrict',
|
|
30
|
+
'__cdecl', '__stdcall', '__fastcall', '__thiscall',
|
|
31
|
+
'abstract_pointer_declarator',
|
|
32
|
+
'abstract_reference_declarator'):
|
|
33
|
+
continue
|
|
34
|
+
result = _extract_declarator_id(ch, want_field)
|
|
35
|
+
if result:
|
|
36
|
+
return result
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class _RenameCtx:
|
|
41
|
+
"""封装一次重命名所需的全部状态与子方法。"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, src_bytes, tree):
|
|
44
|
+
self.src = src_bytes
|
|
45
|
+
self.tree = tree
|
|
46
|
+
# 类型上下文
|
|
47
|
+
self.user_struct_names: set = set()
|
|
48
|
+
self.struct_field_types: dict = {}
|
|
49
|
+
self.var_type_map: dict = {}
|
|
50
|
+
self.typedef_map: dict = {}
|
|
51
|
+
|
|
52
|
+
# ── 工具 ────────────────────────────────────────────────────────────
|
|
53
|
+
def name_of(self, node) -> str:
|
|
54
|
+
return self.src[node.start_byte:node.end_byte].decode('utf-8')
|
|
55
|
+
|
|
56
|
+
def _get_primary_type_name(self, node) -> str | None:
|
|
57
|
+
for ch in node.children:
|
|
58
|
+
if ch.type in ('type_identifier', 'primitive_type'):
|
|
59
|
+
return self.name_of(ch)
|
|
60
|
+
if ch.type == 'qualified_identifier':
|
|
61
|
+
for sub in reversed(ch.children):
|
|
62
|
+
if sub.type in ('identifier', 'type_identifier'):
|
|
63
|
+
return self.name_of(sub)
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
def _is_qid_name(self, node) -> bool:
|
|
67
|
+
par = node.parent
|
|
68
|
+
if not par or par.type != 'qualified_identifier':
|
|
69
|
+
return False
|
|
70
|
+
for ch in reversed(par.children):
|
|
71
|
+
if ch.type != '::':
|
|
72
|
+
return ch == node
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
def _get_qid_scope_class(self, qid_node) -> str | None:
|
|
76
|
+
for ch in qid_node.children:
|
|
77
|
+
if ch.type == '::':
|
|
78
|
+
break
|
|
79
|
+
if ch.type in ('identifier', 'type_identifier', 'namespace_identifier'):
|
|
80
|
+
return self.name_of(ch)
|
|
81
|
+
elif ch.type == 'qualified_identifier':
|
|
82
|
+
for sub in reversed(ch.children):
|
|
83
|
+
if sub.type in ('identifier', 'type_identifier', 'namespace_identifier'):
|
|
84
|
+
return self.name_of(sub)
|
|
85
|
+
break
|
|
86
|
+
return None
|
|
87
|
+
|
|
88
|
+
# ── 步骤 0:构建类型上下文 ───────────────────────────────────────────
|
|
89
|
+
def build_type_context(self):
|
|
90
|
+
self._walk_types(self.tree.root_node)
|
|
91
|
+
for alias, real in self.typedef_map.items():
|
|
92
|
+
if real in self.user_struct_names:
|
|
93
|
+
self.user_struct_names.add(alias)
|
|
94
|
+
if real in self.struct_field_types and alias not in self.struct_field_types:
|
|
95
|
+
self.struct_field_types[alias] = self.struct_field_types[real]
|
|
96
|
+
|
|
97
|
+
def _walk_types(self, node):
|
|
98
|
+
nt = node.type
|
|
99
|
+
if nt == 'type_definition':
|
|
100
|
+
inner = None
|
|
101
|
+
for ch in node.children:
|
|
102
|
+
if ch.type in ('struct_specifier', 'class_specifier', 'union_specifier'):
|
|
103
|
+
for sub in ch.children:
|
|
104
|
+
if sub.type == 'type_identifier':
|
|
105
|
+
inner = self.name_of(sub); break
|
|
106
|
+
break
|
|
107
|
+
if inner:
|
|
108
|
+
for ch in node.children:
|
|
109
|
+
if ch.type == 'type_identifier' and self.name_of(ch) != inner:
|
|
110
|
+
self.typedef_map[self.name_of(ch)] = inner
|
|
111
|
+
elif ch.type in _DECLARATOR_CONTAINERS:
|
|
112
|
+
id_node = _extract_declarator_id(ch, False)
|
|
113
|
+
if id_node:
|
|
114
|
+
self.typedef_map[self.name_of(id_node)] = inner
|
|
115
|
+
if nt in ('struct_specifier', 'class_specifier', 'union_specifier'):
|
|
116
|
+
struct_name = None
|
|
117
|
+
for ch in node.children:
|
|
118
|
+
if ch.type == 'type_identifier':
|
|
119
|
+
struct_name = self.name_of(ch); break
|
|
120
|
+
if struct_name and any(c.type == 'field_declaration_list' for c in node.children):
|
|
121
|
+
self.user_struct_names.add(struct_name)
|
|
122
|
+
fmap = self.struct_field_types.setdefault(struct_name, {})
|
|
123
|
+
for ch in node.children:
|
|
124
|
+
if ch.type == 'field_declaration_list':
|
|
125
|
+
for fd in ch.children:
|
|
126
|
+
if fd.type != 'field_declaration':
|
|
127
|
+
continue
|
|
128
|
+
ftype = self._get_primary_type_name(fd)
|
|
129
|
+
for fc in fd.children:
|
|
130
|
+
if fc.type == 'field_identifier':
|
|
131
|
+
fmap[self.name_of(fc)] = ftype
|
|
132
|
+
elif fc.type in _DECLARATOR_CONTAINERS or fc.type == 'init_declarator':
|
|
133
|
+
id_node = _extract_declarator_id(fc, True)
|
|
134
|
+
if id_node:
|
|
135
|
+
fmap[self.name_of(id_node)] = ftype
|
|
136
|
+
break
|
|
137
|
+
if nt in ('declaration', 'parameter_declaration'):
|
|
138
|
+
vtype = self._get_primary_type_name(node)
|
|
139
|
+
if vtype:
|
|
140
|
+
for ch in node.children:
|
|
141
|
+
if ch.type == 'identifier':
|
|
142
|
+
self.var_type_map.setdefault(self.name_of(ch), vtype)
|
|
143
|
+
elif ch.type in _DECLARATOR_CONTAINERS or ch.type == 'init_declarator':
|
|
144
|
+
id_node = _extract_declarator_id(ch, False)
|
|
145
|
+
if id_node:
|
|
146
|
+
self.var_type_map.setdefault(self.name_of(id_node), vtype)
|
|
147
|
+
for ch in node.children:
|
|
148
|
+
self._walk_types(ch)
|
|
149
|
+
|
|
150
|
+
# ── cast 类型提取 ────────────────────────────────────────────────────
|
|
151
|
+
def _extract_cast_target_type(self, node) -> str | None:
|
|
152
|
+
if node.type == 'call_expression':
|
|
153
|
+
fn = node.children[0] if node.children else None
|
|
154
|
+
if fn and fn.type == 'template_function':
|
|
155
|
+
fn_name = None
|
|
156
|
+
for ch in fn.children:
|
|
157
|
+
if ch.type == 'identifier':
|
|
158
|
+
fn_name = self.name_of(ch); break
|
|
159
|
+
if fn_name in ('reinterpret_cast', 'static_cast', 'dynamic_cast', 'const_cast'):
|
|
160
|
+
for ch in fn.children:
|
|
161
|
+
if ch.type == 'template_argument_list':
|
|
162
|
+
for sub in ch.children:
|
|
163
|
+
if sub.type == 'type_descriptor':
|
|
164
|
+
return self._get_primary_type_name(sub)
|
|
165
|
+
if node.type == 'cast_expression':
|
|
166
|
+
for ch in node.children:
|
|
167
|
+
if ch.type == 'type_descriptor':
|
|
168
|
+
return self._get_primary_type_name(ch)
|
|
169
|
+
if node.type in ('reinterpret_cast_expression', 'static_cast_expression',
|
|
170
|
+
'dynamic_cast_expression', 'const_cast_expression'):
|
|
171
|
+
for ch in node.children:
|
|
172
|
+
if ch.type == 'type_descriptor':
|
|
173
|
+
return self._get_primary_type_name(ch)
|
|
174
|
+
return None
|
|
175
|
+
|
|
176
|
+
def _extract_init_cast_type(self, decl_node, var_name) -> str | None:
|
|
177
|
+
for ch in decl_node.children:
|
|
178
|
+
if ch.type == 'init_declarator':
|
|
179
|
+
id_nd = _extract_declarator_id(ch, False)
|
|
180
|
+
if not id_nd or self.name_of(id_nd) != var_name:
|
|
181
|
+
continue
|
|
182
|
+
for sub in ch.children:
|
|
183
|
+
t = self._extract_cast_target_type(sub)
|
|
184
|
+
if t:
|
|
185
|
+
return t
|
|
186
|
+
return None
|
|
187
|
+
|
|
188
|
+
# ── 作用域感知的变量类型查找 ─────────────────────────────────────────
|
|
189
|
+
def _lookup_var_type_in_scope(self, identifier_node) -> str | None:
|
|
190
|
+
var_name = self.name_of(identifier_node)
|
|
191
|
+
node = identifier_node.parent
|
|
192
|
+
while node is not None:
|
|
193
|
+
if node.type == 'parameter_list':
|
|
194
|
+
for param in node.children:
|
|
195
|
+
if param.type == 'parameter_declaration':
|
|
196
|
+
vtype = self._get_primary_type_name(param)
|
|
197
|
+
if vtype:
|
|
198
|
+
for ch in param.children:
|
|
199
|
+
if ch.type == 'identifier' and self.name_of(ch) == var_name:
|
|
200
|
+
return vtype
|
|
201
|
+
elif ch.type in _DECLARATOR_CONTAINERS:
|
|
202
|
+
id_nd = _extract_declarator_id(ch, False)
|
|
203
|
+
if id_nd and self.name_of(id_nd) == var_name:
|
|
204
|
+
return vtype
|
|
205
|
+
if node.type in ('compound_statement', 'translation_unit',
|
|
206
|
+
'namespace_definition', 'function_definition'):
|
|
207
|
+
for child in node.children:
|
|
208
|
+
if child.type == 'declaration':
|
|
209
|
+
vtype = self._get_primary_type_name(child)
|
|
210
|
+
matched = False
|
|
211
|
+
for ch in child.children:
|
|
212
|
+
if ch.type == 'identifier' and self.name_of(ch) == var_name:
|
|
213
|
+
matched = True; break
|
|
214
|
+
elif ch.type in _DECLARATOR_CONTAINERS or ch.type == 'init_declarator':
|
|
215
|
+
id_nd = _extract_declarator_id(ch, False)
|
|
216
|
+
if id_nd and self.name_of(id_nd) == var_name:
|
|
217
|
+
matched = True; break
|
|
218
|
+
if matched:
|
|
219
|
+
if vtype:
|
|
220
|
+
return vtype
|
|
221
|
+
return self._extract_init_cast_type(child, var_name)
|
|
222
|
+
node = node.parent
|
|
223
|
+
return self.var_type_map.get(var_name)
|
|
224
|
+
|
|
225
|
+
# ── 字段访问对象类型推断 ─────────────────────────────────────────────
|
|
226
|
+
def _resolve_field_object_type(self, field_expr_node) -> str | None:
|
|
227
|
+
if not field_expr_node.children:
|
|
228
|
+
return None
|
|
229
|
+
value_node = field_expr_node.children[0]
|
|
230
|
+
vt = value_node.type
|
|
231
|
+
td = self.typedef_map
|
|
232
|
+
if vt == 'identifier':
|
|
233
|
+
t = self._lookup_var_type_in_scope(value_node)
|
|
234
|
+
return td.get(t, t)
|
|
235
|
+
elif vt == 'field_expression':
|
|
236
|
+
parent_type = self._resolve_field_object_type(value_node)
|
|
237
|
+
if parent_type and parent_type in self.struct_field_types:
|
|
238
|
+
for ch in value_node.children:
|
|
239
|
+
if ch.type == 'field_identifier':
|
|
240
|
+
ft = self.struct_field_types[parent_type].get(self.name_of(ch))
|
|
241
|
+
return td.get(ft, ft) if ft else None
|
|
242
|
+
return None
|
|
243
|
+
elif vt == 'pointer_expression':
|
|
244
|
+
for ch in value_node.children:
|
|
245
|
+
if ch.type == 'identifier':
|
|
246
|
+
t = self._lookup_var_type_in_scope(ch)
|
|
247
|
+
return td.get(t, t)
|
|
248
|
+
elif vt == 'subscript_expression':
|
|
249
|
+
arr = value_node.children[0] if value_node.children else None
|
|
250
|
+
if arr is None:
|
|
251
|
+
return None
|
|
252
|
+
if arr.type == 'identifier':
|
|
253
|
+
t = self._lookup_var_type_in_scope(arr)
|
|
254
|
+
return td.get(t, t) if t else None
|
|
255
|
+
elif arr.type == 'field_expression':
|
|
256
|
+
return self._resolve_field_object_type(arr)
|
|
257
|
+
return None
|
|
258
|
+
|
|
259
|
+
# ── 步骤 1:收集声明位节点 ────────────────────────────────────────────
|
|
260
|
+
def collect_decl_nodes(self):
|
|
261
|
+
local_decl: list = []
|
|
262
|
+
member_decl: list = []
|
|
263
|
+
|
|
264
|
+
def walk(node):
|
|
265
|
+
nt = node.type
|
|
266
|
+
if nt == 'declaration':
|
|
267
|
+
for ch in node.children:
|
|
268
|
+
if ch.type == 'identifier':
|
|
269
|
+
local_decl.append(ch)
|
|
270
|
+
elif ch.type in _DECLARATOR_CONTAINERS or ch.type == 'init_declarator':
|
|
271
|
+
id_node = _extract_declarator_id(ch, False)
|
|
272
|
+
if id_node: local_decl.append(id_node)
|
|
273
|
+
elif ch.type == 'function_declarator':
|
|
274
|
+
decl_type = self._get_primary_type_name(node)
|
|
275
|
+
if decl_type and decl_type in self.user_struct_names:
|
|
276
|
+
for sub in ch.children:
|
|
277
|
+
if sub.type == 'identifier':
|
|
278
|
+
local_decl.append(sub); break
|
|
279
|
+
elif nt == 'parameter_declaration':
|
|
280
|
+
for ch in node.children:
|
|
281
|
+
if ch.type == 'identifier':
|
|
282
|
+
local_decl.append(ch)
|
|
283
|
+
elif ch.type in _DECLARATOR_CONTAINERS:
|
|
284
|
+
id_node = _extract_declarator_id(ch, False)
|
|
285
|
+
if id_node: local_decl.append(id_node)
|
|
286
|
+
elif nt == 'for_range_loop':
|
|
287
|
+
found_type = False
|
|
288
|
+
for ch in node.children:
|
|
289
|
+
if ch.type in (':', 'compound_statement'): break
|
|
290
|
+
if ch.is_named and not found_type:
|
|
291
|
+
found_type = True; continue
|
|
292
|
+
if ch.type == 'identifier':
|
|
293
|
+
local_decl.append(ch); break
|
|
294
|
+
elif ch.type in _DECLARATOR_CONTAINERS:
|
|
295
|
+
id_node = _extract_declarator_id(ch, False)
|
|
296
|
+
if id_node: local_decl.append(id_node)
|
|
297
|
+
break
|
|
298
|
+
elif nt == 'field_declaration':
|
|
299
|
+
for ch in node.children:
|
|
300
|
+
if ch.type == 'field_identifier':
|
|
301
|
+
member_decl.append(ch)
|
|
302
|
+
elif ch.type in _DECLARATOR_CONTAINERS or ch.type == 'init_declarator':
|
|
303
|
+
id_node = _extract_declarator_id(ch, True)
|
|
304
|
+
if id_node: member_decl.append(id_node)
|
|
305
|
+
if nt == 'function_declarator':
|
|
306
|
+
for ch in node.children:
|
|
307
|
+
if ch.type != 'identifier': walk(ch)
|
|
308
|
+
else:
|
|
309
|
+
for ch in node.children: walk(ch)
|
|
310
|
+
|
|
311
|
+
walk(self.tree.root_node)
|
|
312
|
+
return local_decl, member_decl
|
|
313
|
+
|
|
314
|
+
# ── 步骤 3:统计频率 ──────────────────────────────────────────────────
|
|
315
|
+
def count_freq(self, local_names, member_names) -> dict:
|
|
316
|
+
freq: dict = {}
|
|
317
|
+
def walk(node):
|
|
318
|
+
if node.type == 'identifier':
|
|
319
|
+
n = self.name_of(node)
|
|
320
|
+
if n in local_names:
|
|
321
|
+
freq[n] = freq.get(n, 0) + 1
|
|
322
|
+
elif n in member_names and self._is_qid_name(node):
|
|
323
|
+
scope_cls = self._get_qid_scope_class(node.parent)
|
|
324
|
+
real_cls = self.typedef_map.get(scope_cls, scope_cls) if scope_cls else None
|
|
325
|
+
if real_cls and real_cls in self.user_struct_names:
|
|
326
|
+
freq[n] = freq.get(n, 0) + 1
|
|
327
|
+
elif node.type == 'field_identifier':
|
|
328
|
+
n = self.name_of(node)
|
|
329
|
+
if n in member_names: freq[n] = freq.get(n, 0) + 1
|
|
330
|
+
elif node.type == 'type_identifier':
|
|
331
|
+
n = self.name_of(node)
|
|
332
|
+
if n in local_names:
|
|
333
|
+
par = node.parent
|
|
334
|
+
if (par and par.type == 'parameter_declaration'
|
|
335
|
+
and par.parent and par.parent.type == 'parameter_list'
|
|
336
|
+
and par.parent.parent and par.parent.parent.type == 'function_declarator'
|
|
337
|
+
and par.parent.parent.parent
|
|
338
|
+
and par.parent.parent.parent.type == 'declaration'):
|
|
339
|
+
freq[n] = freq.get(n, 0) + 1
|
|
340
|
+
for ch in node.children: walk(ch)
|
|
341
|
+
walk(self.tree.root_node)
|
|
342
|
+
return freq
|
|
343
|
+
|
|
344
|
+
# ── 步骤 5:收集替换位置 ──────────────────────────────────────────────
|
|
345
|
+
def build_replacements(self, rename_map, local_names, member_names):
|
|
346
|
+
replacements: list = []
|
|
347
|
+
class_stack: list = []
|
|
348
|
+
|
|
349
|
+
def walk(node):
|
|
350
|
+
entered = False
|
|
351
|
+
nt = node.type
|
|
352
|
+
if nt in ('struct_specifier', 'class_specifier', 'union_specifier'):
|
|
353
|
+
for ch in node.children:
|
|
354
|
+
if ch.type == 'type_identifier':
|
|
355
|
+
class_stack.append(self.name_of(ch)); entered = True; break
|
|
356
|
+
|
|
357
|
+
if nt == 'identifier':
|
|
358
|
+
n = self.name_of(node)
|
|
359
|
+
if n in rename_map and n in local_names:
|
|
360
|
+
replacements.append((node.start_byte, node.end_byte, rename_map[n].encode()))
|
|
361
|
+
elif n in rename_map and n in member_names and class_stack:
|
|
362
|
+
replacements.append((node.start_byte, node.end_byte, rename_map[n].encode()))
|
|
363
|
+
elif n in rename_map and n in member_names and self._is_qid_name(node):
|
|
364
|
+
scope_cls = self._get_qid_scope_class(node.parent)
|
|
365
|
+
real_cls = self.typedef_map.get(scope_cls, scope_cls) if scope_cls else None
|
|
366
|
+
if real_cls and real_cls in self.user_struct_names:
|
|
367
|
+
replacements.append((node.start_byte, node.end_byte, rename_map[n].encode()))
|
|
368
|
+
elif nt == 'type_identifier':
|
|
369
|
+
n = self.name_of(node)
|
|
370
|
+
if n in rename_map and n in local_names:
|
|
371
|
+
par = node.parent
|
|
372
|
+
if (par and par.type == 'parameter_declaration'
|
|
373
|
+
and par.parent and par.parent.type == 'parameter_list'
|
|
374
|
+
and par.parent.parent and par.parent.parent.type == 'function_declarator'
|
|
375
|
+
and par.parent.parent.parent
|
|
376
|
+
and par.parent.parent.parent.type == 'declaration'):
|
|
377
|
+
decl_type = self._get_primary_type_name(par.parent.parent.parent)
|
|
378
|
+
if decl_type and decl_type in self.user_struct_names:
|
|
379
|
+
replacements.append((node.start_byte, node.end_byte, rename_map[n].encode()))
|
|
380
|
+
elif nt == 'field_identifier':
|
|
381
|
+
n = self.name_of(node)
|
|
382
|
+
if n in rename_map and n in member_names:
|
|
383
|
+
parent = node.parent
|
|
384
|
+
if parent and parent.type == 'field_expression':
|
|
385
|
+
obj_type = self._resolve_field_object_type(parent)
|
|
386
|
+
if obj_type and obj_type in self.user_struct_names:
|
|
387
|
+
replacements.append((node.start_byte, node.end_byte, rename_map[n].encode()))
|
|
388
|
+
else:
|
|
389
|
+
replacements.append((node.start_byte, node.end_byte, rename_map[n].encode()))
|
|
390
|
+
|
|
391
|
+
for ch in node.children: walk(ch)
|
|
392
|
+
if entered: class_stack.pop()
|
|
393
|
+
|
|
394
|
+
walk(self.tree.root_node)
|
|
395
|
+
return replacements
|
|
396
|
+
|
|
397
|
+
# ── 步骤 6:应用替换 ──────────────────────────────────────────────────
|
|
398
|
+
def apply(self, replacements) -> str:
|
|
399
|
+
replacements.sort(key=lambda x: x[0], reverse=True)
|
|
400
|
+
buf = bytearray(self.src)
|
|
401
|
+
for start, end, new in replacements:
|
|
402
|
+
buf[start:end] = new
|
|
403
|
+
return buf.decode('utf-8')
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
407
|
+
# 公开入口
|
|
408
|
+
# ─────────────────────────────────────────────────────────────────────────────
|
|
409
|
+
def golf_rename_symbols(code: str) -> str:
|
|
410
|
+
try:
|
|
411
|
+
from tree_sitter import Language, Parser
|
|
412
|
+
import tree_sitter_cpp as tscpp
|
|
413
|
+
_lang = Language(tscpp.language())
|
|
414
|
+
except ImportError:
|
|
415
|
+
print('[警告] 未找到 tree-sitter,跳过符号重命名。'
|
|
416
|
+
' 运行: pip install tree-sitter tree-sitter-cpp', file=sys.stderr)
|
|
417
|
+
return code
|
|
418
|
+
|
|
419
|
+
src_bytes = code.encode('utf-8')
|
|
420
|
+
parser = Parser(_lang)
|
|
421
|
+
tree = parser.parse(src_bytes)
|
|
422
|
+
|
|
423
|
+
ctx = _RenameCtx(src_bytes, tree)
|
|
424
|
+
ctx.build_type_context()
|
|
425
|
+
|
|
426
|
+
local_decl, member_decl = ctx.collect_decl_nodes()
|
|
427
|
+
name_of = ctx.name_of
|
|
428
|
+
|
|
429
|
+
local_names = {name_of(n) for n in local_decl if len(name_of(n)) >= _MIN_RENAME_LEN}
|
|
430
|
+
member_names = {name_of(n) for n in member_decl if len(name_of(n)) >= _MIN_RENAME_LEN}
|
|
431
|
+
if not local_names and not member_names:
|
|
432
|
+
return code
|
|
433
|
+
|
|
434
|
+
all_targets = local_names | member_names
|
|
435
|
+
freq = ctx.count_freq(local_names, member_names)
|
|
436
|
+
|
|
437
|
+
# 步骤 4:生成重命名映射
|
|
438
|
+
all_existing = set(re.findall(r'\b[A-Za-z_]\w*\b', code))
|
|
439
|
+
occupied = set(all_existing)
|
|
440
|
+
rename_map: dict = {}
|
|
441
|
+
gen = _gen_short_names()
|
|
442
|
+
for original in sorted(all_targets, key=lambda x: -freq.get(x, 0)):
|
|
443
|
+
short = next(gen)
|
|
444
|
+
while short in occupied or short == original:
|
|
445
|
+
short = next(gen)
|
|
446
|
+
rename_map[original] = short
|
|
447
|
+
occupied.add(short)
|
|
448
|
+
|
|
449
|
+
replacements = ctx.build_replacements(rename_map, local_names, member_names)
|
|
450
|
+
if not replacements:
|
|
451
|
+
return code
|
|
452
|
+
return ctx.apply(replacements)
|
cppgolf/merge.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""merge.py — 递归内联本地 #include,去除 include guard / pragma once"""
|
|
2
|
+
import re
|
|
3
|
+
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
_GUARD_TOP = re.compile(
|
|
7
|
+
r'^\s*#\s*ifndef\s+(\w+)\s*\n\s*#\s*define\s+\1\b[^\n]*\n', re.MULTILINE)
|
|
8
|
+
_GUARD_BOTTOM = re.compile(r'\n\s*#\s*endif\s*(?://[^\n]*)?\s*$')
|
|
9
|
+
_PRAGMA_ONCE = re.compile(r'^\s*#\s*pragma\s+once\s*$', re.MULTILINE)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def strip_include_guard(code: str) -> str:
|
|
13
|
+
code = _PRAGMA_ONCE.sub('', code, count=1)
|
|
14
|
+
m = _GUARD_TOP.search(code)
|
|
15
|
+
if m:
|
|
16
|
+
code = code[m.end():]
|
|
17
|
+
code = _GUARD_BOTTOM.sub('', code, count=1)
|
|
18
|
+
return code
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def merge_files(filepath: Path, include_dirs: list,
|
|
22
|
+
visited: set, sys_includes: list) -> str:
|
|
23
|
+
real_path = filepath.resolve()
|
|
24
|
+
if real_path in visited:
|
|
25
|
+
return ''
|
|
26
|
+
visited.add(real_path)
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
code = real_path.read_text(encoding='utf-8-sig', errors='replace')
|
|
30
|
+
except FileNotFoundError:
|
|
31
|
+
print(f'[警告] 找不到文件:{real_path}', file=sys.stderr)
|
|
32
|
+
return ''
|
|
33
|
+
|
|
34
|
+
code = strip_include_guard(code)
|
|
35
|
+
parts = []
|
|
36
|
+
|
|
37
|
+
for line in code.splitlines(keepends=True):
|
|
38
|
+
s = line.strip()
|
|
39
|
+
|
|
40
|
+
# 系统头文件 #include <...>
|
|
41
|
+
m_sys = re.match(r'#\s*include\s*<([^>]+)>', s)
|
|
42
|
+
if m_sys:
|
|
43
|
+
entry = f'#include <{m_sys.group(1)}>\n'
|
|
44
|
+
if entry not in sys_includes:
|
|
45
|
+
sys_includes.append(entry)
|
|
46
|
+
continue
|
|
47
|
+
|
|
48
|
+
# 本地头文件 #include "..."
|
|
49
|
+
m_loc = re.match(r'#\s*include\s*"([^"]+)"', s)
|
|
50
|
+
if m_loc:
|
|
51
|
+
inc = m_loc.group(1)
|
|
52
|
+
found = None
|
|
53
|
+
for d in [real_path.parent] + list(include_dirs):
|
|
54
|
+
c = (d / inc).resolve()
|
|
55
|
+
if c.exists():
|
|
56
|
+
found = c; break
|
|
57
|
+
if found:
|
|
58
|
+
parts.append(f'\n// ── inlined: {inc} ──\n')
|
|
59
|
+
parts.append(merge_files(found, include_dirs, visited, sys_includes))
|
|
60
|
+
parts.append(f'\n// ── end: {inc} ──\n')
|
|
61
|
+
else:
|
|
62
|
+
print(f'[警告] 找不到本地头文件:{inc}', file=sys.stderr)
|
|
63
|
+
parts.append(line)
|
|
64
|
+
continue
|
|
65
|
+
|
|
66
|
+
parts.append(line)
|
|
67
|
+
|
|
68
|
+
return ''.join(parts)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""strip_comments.py — 移除 C/C++ 注释(状态机,感知字符串/字符字面量)"""
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def strip_comments(code: str) -> str:
|
|
6
|
+
"""移除所有 C/C++ 注释,保留字符串/字符字面量内容不变。"""
|
|
7
|
+
result = []
|
|
8
|
+
i = 0
|
|
9
|
+
n = len(code)
|
|
10
|
+
|
|
11
|
+
while i < n:
|
|
12
|
+
# 原始字符串 R"delimiter(...)delimiter"
|
|
13
|
+
raw_m = re.match(r'R"([^()\\ \t\n]*)\(', code[i:])
|
|
14
|
+
if raw_m:
|
|
15
|
+
delim = raw_m.group(1)
|
|
16
|
+
end_marker = ')' + delim + '"'
|
|
17
|
+
end_idx = code.find(end_marker, i + raw_m.end())
|
|
18
|
+
if end_idx == -1:
|
|
19
|
+
result.append(code[i:]); break
|
|
20
|
+
end_idx += len(end_marker)
|
|
21
|
+
result.append(code[i:end_idx]); i = end_idx; continue
|
|
22
|
+
|
|
23
|
+
# 字符串字面量
|
|
24
|
+
if code[i] == '"':
|
|
25
|
+
j = i + 1
|
|
26
|
+
while j < n:
|
|
27
|
+
if code[j] == '\\': j += 2
|
|
28
|
+
elif code[j] == '"': j += 1; break
|
|
29
|
+
else: j += 1
|
|
30
|
+
result.append(code[i:j]); i = j; continue
|
|
31
|
+
|
|
32
|
+
# 字符字面量
|
|
33
|
+
if code[i] == "'":
|
|
34
|
+
j = i + 1
|
|
35
|
+
while j < n:
|
|
36
|
+
if code[j] == '\\': j += 2
|
|
37
|
+
elif code[j] == "'": j += 1; break
|
|
38
|
+
else: j += 1
|
|
39
|
+
result.append(code[i:j]); i = j; continue
|
|
40
|
+
|
|
41
|
+
# 行注释 // ...
|
|
42
|
+
if code[i:i+2] == '//':
|
|
43
|
+
j = i + 2
|
|
44
|
+
while j < n:
|
|
45
|
+
if code[j] == '\\' and j+1 < n and code[j+1] == '\n': j += 2
|
|
46
|
+
elif code[j] == '\n': break
|
|
47
|
+
else: j += 1
|
|
48
|
+
result.append(' '); i = j; continue
|
|
49
|
+
|
|
50
|
+
# 块注释 /* ... */
|
|
51
|
+
if code[i:i+2] == '/*':
|
|
52
|
+
j = i + 2
|
|
53
|
+
while j < n - 1:
|
|
54
|
+
if code[j:j+2] == '*/': j += 2; break
|
|
55
|
+
j += 1
|
|
56
|
+
nl = code[i:j].count('\n')
|
|
57
|
+
result.append('\n' * nl if nl else ' ')
|
|
58
|
+
i = j; continue
|
|
59
|
+
|
|
60
|
+
result.append(code[i]); i += 1
|
|
61
|
+
|
|
62
|
+
return ''.join(result)
|
cppgolf/transforms.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""transforms.py — 语义级高尔夫变换(std::、typedef、endl、inline、braces、shortcuts)"""
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def golf_std_namespace(code: str) -> str:
|
|
6
|
+
"""若代码有 std:: 则添加 using namespace std; 并删除所有 std:: 前缀。"""
|
|
7
|
+
has_using = bool(re.search(r'\busing\s+namespace\s+std\s*;', code))
|
|
8
|
+
if not re.search(r'\bstd::', code):
|
|
9
|
+
return code
|
|
10
|
+
if not has_using:
|
|
11
|
+
lines = code.split('\n')
|
|
12
|
+
insert_at = len(lines)
|
|
13
|
+
for idx, line in enumerate(lines):
|
|
14
|
+
s = line.strip()
|
|
15
|
+
if s and not s.startswith('#'):
|
|
16
|
+
insert_at = idx; break
|
|
17
|
+
lines.insert(insert_at, 'using namespace std;')
|
|
18
|
+
code = '\n'.join(lines)
|
|
19
|
+
return re.sub(r'\bstd::', '', code)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def golf_typedefs(code: str) -> str:
|
|
23
|
+
"""对高频长类型名添加 #define 缩写(出现 ≥2 次时触发)。"""
|
|
24
|
+
replacements = [
|
|
25
|
+
(r'\blong long\b', 'll', '#define ll long long'),
|
|
26
|
+
(r'\bunsigned long long\b', 'ull', '#define ull unsigned long long'),
|
|
27
|
+
(r'\blong double\b', 'ld', '#define ld long double'),
|
|
28
|
+
(r'\bvector<int>\b', 'vi', '#define vi vector<int>'),
|
|
29
|
+
(r'\bvector<ll>\b', 'vll', '#define vll vector<ll>'),
|
|
30
|
+
(r'\bpair<int,int>\b', 'pii', '#define pii pair<int,int>'),
|
|
31
|
+
(r'\bpair<ll,ll>\b', 'pll', '#define pll pair<ll,ll>'),
|
|
32
|
+
]
|
|
33
|
+
defines_to_add = []
|
|
34
|
+
for pattern, short, defline in replacements:
|
|
35
|
+
macro = defline.split()[1]
|
|
36
|
+
if re.search(r'\b' + re.escape(macro) + r'\b', code):
|
|
37
|
+
continue
|
|
38
|
+
if len(re.findall(pattern, code)) >= 2:
|
|
39
|
+
defines_to_add.append(defline)
|
|
40
|
+
code = re.sub(pattern, short, code)
|
|
41
|
+
if defines_to_add:
|
|
42
|
+
last = max(
|
|
43
|
+
(m.end() for m in re.finditer(r'^#(?:include|define)\b.*$', code, re.MULTILINE)),
|
|
44
|
+
default=0,
|
|
45
|
+
)
|
|
46
|
+
code = code[:last] + '\n' + '\n'.join(defines_to_add) + '\n' + code[last:]
|
|
47
|
+
return code
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def golf_remove_main_return(code: str) -> str:
|
|
51
|
+
"""移除 main 末尾的 return 0;(C++ 标准允许省略)。"""
|
|
52
|
+
return re.sub(
|
|
53
|
+
r'(int\s+main\s*\([^)]*\)\s*\{.*?)(\s*return\s+0\s*;\s*)(\})',
|
|
54
|
+
lambda m: m.group(1) + '\n' + m.group(3),
|
|
55
|
+
code, flags=re.DOTALL,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def golf_endl_to_newline(code: str) -> str:
|
|
60
|
+
r"""将 endl 替换为 '\n'(避免 flush,且更短)。"""
|
|
61
|
+
nl_str = r'"\n"'
|
|
62
|
+
code = re.sub(r'<<\s*endl\b', lambda _: '<< ' + nl_str, code)
|
|
63
|
+
code = re.sub(r'\bendl\b(?=\s*[;,)])', lambda _: nl_str, code)
|
|
64
|
+
return code
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def golf_remove_inline(code: str) -> str:
|
|
68
|
+
"""移除 inline,保留 inline static(C++17 内联静态成员变量)。"""
|
|
69
|
+
return re.sub(r'\binline\s+(?!static\b)', '', code)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def golf_braces_single_stmt(code: str) -> str:
|
|
73
|
+
"""(激进)移除单条语句 if/for/while 的花括号。"""
|
|
74
|
+
return re.compile(
|
|
75
|
+
r'\b(if|for|while)\s*(\([^)]*\))\s*\{\s*([^{};]*;)\s*\}',
|
|
76
|
+
re.DOTALL,
|
|
77
|
+
).sub(r'\1\2\3', code)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def golf_define_shortcuts(code: str) -> str:
|
|
81
|
+
"""高频(≥5次)cout/cin 生成 #define 缩写。"""
|
|
82
|
+
shortcuts = [
|
|
83
|
+
(r'\bcout\b', 'co', '#define co cout'),
|
|
84
|
+
(r'\bcin\b', 'ci', '#define ci cin'),
|
|
85
|
+
]
|
|
86
|
+
defines_to_add = []
|
|
87
|
+
for pattern, short, defline in shortcuts:
|
|
88
|
+
if re.search(re.escape(defline), code):
|
|
89
|
+
continue
|
|
90
|
+
if len(re.findall(pattern, code)) >= 5:
|
|
91
|
+
defines_to_add.append(defline)
|
|
92
|
+
code = re.sub(pattern, short, code)
|
|
93
|
+
if defines_to_add:
|
|
94
|
+
last = max(
|
|
95
|
+
(m.end() for m in re.finditer(r'^#(?:include|define)\b.*$', code, re.MULTILINE)),
|
|
96
|
+
default=0,
|
|
97
|
+
)
|
|
98
|
+
code = code[:last] + '\n' + '\n'.join(defines_to_add) + '\n' + code[last:]
|
|
99
|
+
return code
|
cppgolf/whitespace.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
"""whitespace.py — token 级空白压缩(字符串/预处理行感知)"""
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
_IDENT_END = re.compile(r'[A-Za-z0-9_]$')
|
|
5
|
+
_IDENT_START = re.compile(r'^[A-Za-z0-9_]')
|
|
6
|
+
|
|
7
|
+
# 完整预处理行(含续行 \)
|
|
8
|
+
_PP_LINE_RE = re.compile(r'[ \t]*#[^\n]*(?:\\\n[^\n]*)*')
|
|
9
|
+
|
|
10
|
+
# token 正则
|
|
11
|
+
_TOKENIZE_RE = re.compile(
|
|
12
|
+
r'(\x01[^\x01]+\x01)' # \x01PP...\x01
|
|
13
|
+
r'|(\x02[^\x02]+\x02)' # \x02S...\x02
|
|
14
|
+
r'|(0[xX][0-9A-Fa-f]+[uUlL]*' # 十六进制
|
|
15
|
+
r'|0[bB][01]+[uUlL]*' # 二进制
|
|
16
|
+
r'|\d[\d.]*(?:[eE][+-]?\d+)?[uUlLfF]*' # 整数/浮点
|
|
17
|
+
r'|\.[\d]+(?:[eE][+-]?\d+)?[fF]?)' # .开头浮点
|
|
18
|
+
r'|([A-Za-z_]\w*)' # 标识符
|
|
19
|
+
r'|(>>=|<<=|->|\.\.\.|::' # 多字符运算符
|
|
20
|
+
r'|[+\-*/%&|^]=|==|!=|<=|>=|<<|>>|\+\+|\-\-|&&|\|\|'
|
|
21
|
+
r'|[~!%^&*()\-+=\[\]{}|;:,.<>?/])'
|
|
22
|
+
r'|(\n[ \t]*)' # 换行
|
|
23
|
+
r'|([ \t]+)', # 水平空白
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _needs_space(a: str, b: str) -> bool:
|
|
28
|
+
if not a or not b:
|
|
29
|
+
return False
|
|
30
|
+
if _IDENT_END.search(a) and _IDENT_START.match(b):
|
|
31
|
+
return True
|
|
32
|
+
if a[-1] in '+-' and b[0] == a[-1]:
|
|
33
|
+
return True
|
|
34
|
+
return False
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _tokenize(code: str) -> list:
|
|
38
|
+
tokens = []
|
|
39
|
+
for pp_ph, str_ph, num, ident, op, nl, sp in _TOKENIZE_RE.findall(code):
|
|
40
|
+
if pp_ph: tokens.append(('lit', pp_ph))
|
|
41
|
+
elif str_ph: tokens.append(('lit', str_ph))
|
|
42
|
+
elif num: tokens.append(('num', num))
|
|
43
|
+
elif ident: tokens.append(('id', ident))
|
|
44
|
+
elif op: tokens.append(('op', op))
|
|
45
|
+
elif nl: tokens.append(('nl', '\n'))
|
|
46
|
+
elif sp: tokens.append(('sp', ' '))
|
|
47
|
+
return tokens
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def _extract_strings(src: str) -> tuple[str, list]:
|
|
51
|
+
"""提取字符串/字符字面量,替换为 \\x02S{n}\\x02 占位符。"""
|
|
52
|
+
str_lits: list = []
|
|
53
|
+
result = []
|
|
54
|
+
i = 0
|
|
55
|
+
n = len(src)
|
|
56
|
+
while i < n:
|
|
57
|
+
raw_m = re.match(r'R"([^()\\ \t\n]*)\(', src[i:])
|
|
58
|
+
if raw_m:
|
|
59
|
+
delim = raw_m.group(1)
|
|
60
|
+
end_marker = ')' + delim + '"'
|
|
61
|
+
end_idx = src.find(end_marker, i + raw_m.end())
|
|
62
|
+
if end_idx == -1:
|
|
63
|
+
result.append(src[i:]); break
|
|
64
|
+
end_idx += len(end_marker)
|
|
65
|
+
idx = len(str_lits); str_lits.append(src[i:end_idx])
|
|
66
|
+
result.append(f'\x02S{idx}\x02'); i = end_idx; continue
|
|
67
|
+
if src[i] == '"':
|
|
68
|
+
j = i + 1
|
|
69
|
+
while j < n:
|
|
70
|
+
if src[j] == '\\': j += 2
|
|
71
|
+
elif src[j] == '"': j += 1; break
|
|
72
|
+
else: j += 1
|
|
73
|
+
idx = len(str_lits); str_lits.append(src[i:j])
|
|
74
|
+
result.append(f'\x02S{idx}\x02'); i = j; continue
|
|
75
|
+
if src[i] == "'":
|
|
76
|
+
j = i + 1
|
|
77
|
+
while j < n:
|
|
78
|
+
if src[j] == '\\': j += 2
|
|
79
|
+
elif src[j] == "'": j += 1; break
|
|
80
|
+
else: j += 1
|
|
81
|
+
idx = len(str_lits); str_lits.append(src[i:j])
|
|
82
|
+
result.append(f'\x02S{idx}\x02'); i = j; continue
|
|
83
|
+
result.append(src[i]); i += 1
|
|
84
|
+
return ''.join(result), str_lits
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def compress_whitespace(code: str) -> str:
|
|
88
|
+
"""
|
|
89
|
+
1. 提取字符串字面量 → \\x02S{n}\\x02
|
|
90
|
+
2. 提取预处理行 → \\x01PP{n}\\x01
|
|
91
|
+
3. token 级空白最小化
|
|
92
|
+
4. 还原 PP 行 / 字符串字面量
|
|
93
|
+
"""
|
|
94
|
+
code_no_str, str_lits = _extract_strings(code)
|
|
95
|
+
|
|
96
|
+
pp_lines: list = []
|
|
97
|
+
|
|
98
|
+
def replace_pp(m):
|
|
99
|
+
idx = len(pp_lines)
|
|
100
|
+
pp_lines.append(m.group(0).strip())
|
|
101
|
+
return f'\x01PP{idx}\x01'
|
|
102
|
+
|
|
103
|
+
code_no_pp = _PP_LINE_RE.sub(replace_pp, code_no_str)
|
|
104
|
+
|
|
105
|
+
tokens = _tokenize(code_no_pp)
|
|
106
|
+
out: list = []
|
|
107
|
+
prev_val = ''
|
|
108
|
+
pending_space = False
|
|
109
|
+
for kind, val in tokens:
|
|
110
|
+
if kind in ('nl', 'sp'):
|
|
111
|
+
pending_space = True
|
|
112
|
+
else:
|
|
113
|
+
if pending_space and _needs_space(prev_val, val):
|
|
114
|
+
out.append(' ')
|
|
115
|
+
pending_space = False
|
|
116
|
+
out.append(val)
|
|
117
|
+
prev_val = val
|
|
118
|
+
|
|
119
|
+
code_min = ''.join(out)
|
|
120
|
+
code_min = re.sub(r'\x01PP(\d+)\x01',
|
|
121
|
+
lambda m: '\n' + pp_lines[int(m.group(1))] + '\n',
|
|
122
|
+
code_min)
|
|
123
|
+
code_min = re.sub(r'\x02S(\d+)\x02',
|
|
124
|
+
lambda m: str_lits[int(m.group(1))],
|
|
125
|
+
code_min)
|
|
126
|
+
code_min = re.sub(r'\n{2,}', '\n', code_min)
|
|
127
|
+
return code_min.strip()
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cppgolf
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: C++ multi-file merge & code golf / minifier tool
|
|
5
|
+
License: MIT
|
|
6
|
+
Project-URL: Homepage, https://github.com/yourname/cppgolf
|
|
7
|
+
Project-URL: Issues, https://github.com/yourname/cppgolf/issues
|
|
8
|
+
Keywords: cpp,c++,golf,minify,code-golf,competitive-programming
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Environment :: Console
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Topic :: Software Development :: Code Generators
|
|
18
|
+
Classifier: Topic :: Text Processing :: Filters
|
|
19
|
+
Requires-Python: >=3.10
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
Requires-Dist: tree-sitter>=0.25
|
|
22
|
+
Requires-Dist: tree-sitter-cpp>=0.23
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: build; extra == "dev"
|
|
25
|
+
Requires-Dist: twine; extra == "dev"
|
|
26
|
+
Requires-Dist: pytest; extra == "dev"
|
|
27
|
+
|
|
28
|
+
# CPPGolf
|
|
29
|
+
|
|
30
|
+
C++ 多文件合并 + 代码高尔夫(压缩)工具,专为竞技编程场景设计。
|
|
31
|
+
|
|
32
|
+
## 安装
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install cppgolf
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## CLI 用法
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
cppgolf solution.cpp # 输出到 stdout
|
|
42
|
+
cppgolf solution.cpp -o golf.cpp # 输出到文件
|
|
43
|
+
cppgolf solution.cpp -I include/ -o out.cpp
|
|
44
|
+
cppgolf solution.cpp --rename # 符号压缩
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### 选项
|
|
48
|
+
|
|
49
|
+
| 选项 | 说明 |
|
|
50
|
+
|------|------|
|
|
51
|
+
| `-o FILE` | 输出文件(默认 stdout) |
|
|
52
|
+
| `-I DIR` | 追加 include 搜索目录(可多次) |
|
|
53
|
+
| `--no-merge` | 跳过 `#include "..."` 内联 |
|
|
54
|
+
| `--no-strip-comments` | 保留注释 |
|
|
55
|
+
| `--no-compress-ws` | 保留空白格式 |
|
|
56
|
+
| `--no-std-ns` | 不添加 `using namespace std` |
|
|
57
|
+
| `--no-typedefs` | 不添加 `ll`/`ld` 等类型宏 |
|
|
58
|
+
| `--keep-main-return` | 保留 `return 0;` |
|
|
59
|
+
| `--keep-endl` | 保留 `endl` |
|
|
60
|
+
| `--keep-inline` | 保留 `inline` 关键字 |
|
|
61
|
+
| `--aggressive` | 去除单语句 if/for/while 花括号 |
|
|
62
|
+
| `--shortcuts` | 高频 cout/cin → `#define` 缩写 |
|
|
63
|
+
| `--rename` | 变量/成员名压缩为 a/b/aa/… |
|
|
64
|
+
| `--stats` | 显示压缩率统计 |
|
|
65
|
+
|
|
66
|
+
## Python API
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from cppgolf import process
|
|
70
|
+
from pathlib import Path
|
|
71
|
+
|
|
72
|
+
result = process(
|
|
73
|
+
Path("solution.cpp"),
|
|
74
|
+
include_dirs=[], # 额外的 #include 搜索目录
|
|
75
|
+
rename_symbols=True,
|
|
76
|
+
)
|
|
77
|
+
print(result)
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
也可单独使用各 pass:
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
from cppgolf import strip_comments, compress_whitespace, golf_rename_symbols
|
|
84
|
+
|
|
85
|
+
code = open("a.cpp").read()
|
|
86
|
+
code = strip_comments(code)
|
|
87
|
+
code = golf_rename_symbols(code)
|
|
88
|
+
code = compress_whitespace(code)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## 功能说明
|
|
92
|
+
|
|
93
|
+
- **合并**:递归内联 `#include "..."` 本地头文件,去除 include guard / `#pragma once`,系统头去重
|
|
94
|
+
- **去注释**:状态机感知字符串,支持 `//`、`/* */`、原始字符串 `R"(...)"`
|
|
95
|
+
- **语义压缩**:`std::` 消除、`long long→ll` 宏、`endl→"\n"`、去 `return 0;`、去 `inline`
|
|
96
|
+
- **空白压缩**:token 级最小化,代码压为单行,预处理行保留换行
|
|
97
|
+
- **符号重命名**:tree-sitter AST 驱动,仅重命名用户自定义变量/参数/成员名,不碰函数名/类型名/命名空间
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
cppgolf/__init__.py,sha256=AvGeURSWj92-Q1rpU0zMP-Hhwy-ZxQLLY8LZ9UxUNAc,1306
|
|
2
|
+
cppgolf/__main__.py,sha256=193sWXGl7F46qmoZf0dNXh634tTf-FIoEq040_1HSj4,5627
|
|
3
|
+
cppgolf/golf_rename.py,sha256=5vYDadhqmTkJdXd9tCZD4krurKkDOCNMDGaiTa450DQ,22938
|
|
4
|
+
cppgolf/merge.py,sha256=0f5NkDpJAzNrk4gOgByCxfZGLEWkPa4n2zIrbeyIYGE,2304
|
|
5
|
+
cppgolf/strip_comments.py,sha256=u5t3FzGQ00IU8z-qzvyHtz7ujmT2wILIX8Zwkt0msik,2051
|
|
6
|
+
cppgolf/transforms.py,sha256=YMxpY41iPgD8iqlsYiTx_t_hQTlTU3PFM4tUby9p1OI,3910
|
|
7
|
+
cppgolf/whitespace.py,sha256=x13TZ9oqpgH566QwHuan7ArIL2-2l7usJJmpZJn99Eg,4567
|
|
8
|
+
cppgolf-0.1.0.dist-info/METADATA,sha256=qxy7DvOOjvI-IXPjBn_cG4vEwD9pmhBHESfwIJaqcfk,3341
|
|
9
|
+
cppgolf-0.1.0.dist-info/WHEEL,sha256=YCfwYGOYMi5Jhw2fU4yNgwErybb2IX5PEwBKV4ZbdBo,91
|
|
10
|
+
cppgolf-0.1.0.dist-info/entry_points.txt,sha256=LLvAb8oAIYhpuSI96XviIXHzH3_FVpv4_ptaMFtsnhE,50
|
|
11
|
+
cppgolf-0.1.0.dist-info/top_level.txt,sha256=nE1Myu2JceWMeVSMq3WSiGzhTeHlKPHfwyiDcmd9gGk,8
|
|
12
|
+
cppgolf-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
cppgolf
|