code-explore-by-sql 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_explore_by_sql-0.1.0.dist-info/METADATA +205 -0
- code_explore_by_sql-0.1.0.dist-info/RECORD +29 -0
- code_explore_by_sql-0.1.0.dist-info/WHEEL +4 -0
- code_explore_by_sql-0.1.0.dist-info/entry_points.txt +3 -0
- code_explore_by_sql-0.1.0.dist-info/licenses/LICENSE +21 -0
- code_source_sql/__init__.py +9 -0
- code_source_sql/__main__.py +5 -0
- code_source_sql/bracket_scanner.py +385 -0
- code_source_sql/build_db.py +284 -0
- code_source_sql/code_block_summary.py +522 -0
- code_source_sql/configs.py +402 -0
- code_source_sql/db.py +625 -0
- code_source_sql/edge_extractor.py +183 -0
- code_source_sql/languages/__init__.py +31 -0
- code_source_sql/languages/c.py +118 -0
- code_source_sql/languages/cpp.py +106 -0
- code_source_sql/languages/csharp.py +103 -0
- code_source_sql/languages/glsl.py +162 -0
- code_source_sql/languages/go.py +91 -0
- code_source_sql/languages/hlsl.py +155 -0
- code_source_sql/languages/java.py +98 -0
- code_source_sql/languages/javascript.py +215 -0
- code_source_sql/languages/kotlin.py +108 -0
- code_source_sql/languages/python.py +105 -0
- code_source_sql/languages/rust.py +91 -0
- code_source_sql/languages/swift.py +116 -0
- code_source_sql/server.py +264 -0
- code_source_sql/symbol_analyzer.py +487 -0
- code_source_sql/unreal_rules.py +163 -0
|
@@ -0,0 +1,487 @@
|
|
|
1
|
+
"""Symbol analyzer — block classification with QN normalization and framework decoration awareness.
|
|
2
|
+
|
|
3
|
+
Implements plan.md rules:
|
|
4
|
+
- Black list: skip control flow, framework noise macros, basic types
|
|
5
|
+
- QN normalization: always ClassName::MethodName
|
|
6
|
+
- Framework decoration macro sniffing: look upward 1-3 lines for decoration macros
|
|
7
|
+
- Decoration metadata extraction for framework-specific edges
|
|
8
|
+
- Support for delegate_def, macro_def block types
|
|
9
|
+
|
|
10
|
+
Refactored to accept LanguageConfig + FrameworkConfig instead of hardcoded constants.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
|
|
18
|
+
from .bracket_scanner import BracketBlock
|
|
19
|
+
from .configs import FrameworkConfig, LanguageConfig
|
|
20
|
+
|
|
21
|
+
# ── Data types ───────────────────────────────────────────────────────────
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class SymbolDef:
|
|
25
|
+
"""A classified symbol from the source."""
|
|
26
|
+
qualified_name: str
|
|
27
|
+
block_type: str # class, method, enum, delegate_def, macro_def, function
|
|
28
|
+
file_id: int
|
|
29
|
+
start_line: int # 1-based, includes decoration macro lines above
|
|
30
|
+
end_line: int # 1-based
|
|
31
|
+
decoration_meta: dict | None = None # e.g. {"UFUNCTION": ["Server", "Reliable"]}
|
|
32
|
+
parent_class: str | None = None # for methods, the containing class name
|
|
33
|
+
signature: str | None = None
|
|
34
|
+
inheritance_base: str | None = None # for class/struct, the base class name
|
|
35
|
+
language: str = ""
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class ExtraSymbol:
|
|
40
|
+
"""A symbol defined outside braces (framework delegates, #define macros)."""
|
|
41
|
+
qualified_name: str
|
|
42
|
+
block_type: str # delegate_def, macro_def
|
|
43
|
+
file_id: int
|
|
44
|
+
start_line: int
|
|
45
|
+
end_line: int
|
|
46
|
+
signature: str = ""
|
|
47
|
+
language: str = ""
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# ── Helpers ──────────────────────────────────────────────────────────────
|
|
51
|
+
|
|
52
|
+
def _strip_comments(text: str, lang: LanguageConfig) -> str:
|
|
53
|
+
if lang.block_comment_pair:
|
|
54
|
+
o, c = lang.block_comment_pair
|
|
55
|
+
text = re.compile(f"{re.escape(o)}.*?{re.escape(c)}", re.DOTALL).sub(" ", text)
|
|
56
|
+
if lang.line_comment:
|
|
57
|
+
text = re.compile(f"{re.escape(lang.line_comment)}[^\n]*").sub(" ", text)
|
|
58
|
+
return text
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _strip_template(text: str, template_re: re.Pattern) -> str:
|
|
62
|
+
while True:
|
|
63
|
+
new = template_re.sub(" ", text)
|
|
64
|
+
if new == text:
|
|
65
|
+
return text
|
|
66
|
+
text = new
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _normalize_decl(text: str, lang: LanguageConfig) -> str:
|
|
70
|
+
text = _strip_comments(text, lang)
|
|
71
|
+
if lang.attribute_re:
|
|
72
|
+
text = lang.attribute_re.sub(" ", text)
|
|
73
|
+
if lang.calling_conv_re:
|
|
74
|
+
text = lang.calling_conv_re.sub(" ", text)
|
|
75
|
+
if lang.export_macro_re:
|
|
76
|
+
text = lang.export_macro_re.sub(" ", text)
|
|
77
|
+
text = re.sub(r"\s+", " ", text).strip()
|
|
78
|
+
return text
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _extract_func_name(
|
|
82
|
+
sig: str, func_name_re: re.Pattern, export_macro_re: re.Pattern,
|
|
83
|
+
scope_operator: str = "::",
|
|
84
|
+
) -> str | None:
|
|
85
|
+
sig = re.sub(rf"\s*{re.escape(scope_operator)}\s*", scope_operator, sig)
|
|
86
|
+
m = func_name_re.search(sig)
|
|
87
|
+
if m:
|
|
88
|
+
name = m.group(1).strip()
|
|
89
|
+
parts = name.split()
|
|
90
|
+
parts = [p for p in parts if not export_macro_re.match(p)]
|
|
91
|
+
return "::".join(p.strip() for p in " ".join(parts).split(scope_operator)) or None
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _line_before_brace(line: str) -> str:
|
|
96
|
+
before, _, _ = line.partition("{")
|
|
97
|
+
return before.rstrip()
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _declaration_boundary(clean: str, lang: LanguageConfig) -> bool:
|
|
101
|
+
if not clean:
|
|
102
|
+
return False
|
|
103
|
+
if clean == "{":
|
|
104
|
+
return True
|
|
105
|
+
if clean.endswith(";") or clean.endswith("}") or clean.endswith("):"):
|
|
106
|
+
return True
|
|
107
|
+
if lang.access_spec_re.match(clean):
|
|
108
|
+
return True
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _gather_declaration(
|
|
113
|
+
lines: list[str],
|
|
114
|
+
open_line_0: int,
|
|
115
|
+
lang: LanguageConfig,
|
|
116
|
+
fw: FrameworkConfig,
|
|
117
|
+
max_lookback: int = 24,
|
|
118
|
+
) -> list[str]:
|
|
119
|
+
"""Gather the declaration text preceding a brace at open_line_0 (0-based).
|
|
120
|
+
|
|
121
|
+
For indent-based languages (Python), the open_line already contains the
|
|
122
|
+
declaration (e.g. 'class Foo:' or 'def bar():'). Only look back to gather
|
|
123
|
+
multi-line signatures when parentheses are unbalanced.
|
|
124
|
+
"""
|
|
125
|
+
if open_line_0 < 0 or open_line_0 >= len(lines):
|
|
126
|
+
return []
|
|
127
|
+
|
|
128
|
+
context: list[str] = []
|
|
129
|
+
open_text = _line_before_brace(lines[open_line_0]).strip()
|
|
130
|
+
if open_text:
|
|
131
|
+
context.append(open_text)
|
|
132
|
+
|
|
133
|
+
paren_balance = open_text.count("(") - open_text.count(")")
|
|
134
|
+
angle_balance = open_text.count("<") - open_text.count(">")
|
|
135
|
+
|
|
136
|
+
# For indent-based languages, if open_line is already a complete declaration
|
|
137
|
+
# (balanced parens, no need to look back), return immediately.
|
|
138
|
+
if lang.uses_indent_blocks:
|
|
139
|
+
if paren_balance == 0 and angle_balance == 0:
|
|
140
|
+
return context
|
|
141
|
+
# Unbalanced — multi-line signature, look back to gather it
|
|
142
|
+
for j in range(open_line_0 - 1, max(open_line_0 - max_lookback, -1), -1):
|
|
143
|
+
stripped = lines[j].strip()
|
|
144
|
+
if not stripped:
|
|
145
|
+
break
|
|
146
|
+
context.insert(0, stripped)
|
|
147
|
+
paren_balance += stripped.count("(") - stripped.count(")")
|
|
148
|
+
if paren_balance == 0:
|
|
149
|
+
break
|
|
150
|
+
return context
|
|
151
|
+
|
|
152
|
+
if (
|
|
153
|
+
open_text
|
|
154
|
+
and paren_balance <= 0
|
|
155
|
+
and angle_balance <= 0
|
|
156
|
+
and not open_text.startswith(":")
|
|
157
|
+
and not open_text.startswith(",")
|
|
158
|
+
and (
|
|
159
|
+
(lang.block_keyword_re and lang.block_keyword_re.search(open_text))
|
|
160
|
+
or "(" in open_text
|
|
161
|
+
or lang.macro_like_re.match(open_text)
|
|
162
|
+
)
|
|
163
|
+
):
|
|
164
|
+
return context
|
|
165
|
+
|
|
166
|
+
for j in range(open_line_0 - 1, max(open_line_0 - max_lookback, -1), -1):
|
|
167
|
+
stripped = lines[j].strip()
|
|
168
|
+
if not stripped:
|
|
169
|
+
if context and paren_balance <= 0 and angle_balance <= 0:
|
|
170
|
+
break
|
|
171
|
+
continue
|
|
172
|
+
if fw.noise_macro_re and fw.noise_macro_re.match(stripped):
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
# Skip preprocessor directives — they can appear inside declarations
|
|
176
|
+
if lang.preprocessor_prefix and stripped.startswith(lang.preprocessor_prefix):
|
|
177
|
+
continue
|
|
178
|
+
|
|
179
|
+
clean = _strip_comments(stripped, lang).strip()
|
|
180
|
+
if not clean:
|
|
181
|
+
continue
|
|
182
|
+
if _declaration_boundary(clean, lang) and paren_balance <= 0 and angle_balance <= 0:
|
|
183
|
+
break
|
|
184
|
+
|
|
185
|
+
context.insert(0, stripped)
|
|
186
|
+
paren_balance += clean.count("(") - clean.count(")")
|
|
187
|
+
angle_balance += clean.count("<") - clean.count(">")
|
|
188
|
+
|
|
189
|
+
# Don't break early on constructor initializer entries (: or , lines)
|
|
190
|
+
is_member_init = clean.startswith(":") or clean.startswith(",")
|
|
191
|
+
if (
|
|
192
|
+
not is_member_init
|
|
193
|
+
and paren_balance <= 0
|
|
194
|
+
and angle_balance <= 0
|
|
195
|
+
and (
|
|
196
|
+
(lang.block_keyword_re and lang.block_keyword_re.search(clean))
|
|
197
|
+
or re.search(r"\w\s*\([^;{}]*$", clean)
|
|
198
|
+
or (lang.has_preprocessor_macros and clean.startswith("template"))
|
|
199
|
+
)
|
|
200
|
+
):
|
|
201
|
+
break
|
|
202
|
+
|
|
203
|
+
return context
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _sniff_decoration_above(
|
|
207
|
+
lines: list[str],
|
|
208
|
+
block_start_0: int,
|
|
209
|
+
fw: FrameworkConfig,
|
|
210
|
+
) -> tuple[int, dict | None]:
|
|
211
|
+
"""Look 1-3 lines above block_start for framework decoration macros.
|
|
212
|
+
|
|
213
|
+
Returns (adjusted_start_line_1based, meta_dict_or_None).
|
|
214
|
+
If found, adjusts start_line upward to include the decoration macro.
|
|
215
|
+
"""
|
|
216
|
+
if not fw.decoration_macro_re or not fw.sniff_decoration_above:
|
|
217
|
+
return 0, None
|
|
218
|
+
|
|
219
|
+
for offset in range(1, 4):
|
|
220
|
+
idx = block_start_0 - offset
|
|
221
|
+
if idx < 0:
|
|
222
|
+
break
|
|
223
|
+
stripped = lines[idx].strip()
|
|
224
|
+
m = fw.decoration_macro_re.match(stripped)
|
|
225
|
+
if m:
|
|
226
|
+
macro_name = m.group(1)
|
|
227
|
+
params_str = m.group(2)
|
|
228
|
+
params = [p.strip() for p in params_str.split(",") if p.strip()]
|
|
229
|
+
return idx + 1, {macro_name: params}
|
|
230
|
+
|
|
231
|
+
return 0, None
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def _classify_block(
|
|
235
|
+
lines: list[str],
|
|
236
|
+
block: BracketBlock,
|
|
237
|
+
lang: LanguageConfig,
|
|
238
|
+
fw: FrameworkConfig,
|
|
239
|
+
parent_class: str | None = None,
|
|
240
|
+
parent_namespace: str | None = None,
|
|
241
|
+
) -> tuple[str, str | None, str | None, str | None, str | None]:
|
|
242
|
+
"""Classify a bracket block.
|
|
243
|
+
|
|
244
|
+
Returns (block_type, block_name, qualified_name, signature, inheritance_base).
|
|
245
|
+
"""
|
|
246
|
+
open_0 = block.open_line - 1
|
|
247
|
+
|
|
248
|
+
context = _gather_declaration(lines, open_0, lang, fw)
|
|
249
|
+
if not context:
|
|
250
|
+
return ("", None, None, None, None)
|
|
251
|
+
|
|
252
|
+
joined = " ".join(context)
|
|
253
|
+
joined_clean = _normalize_decl(joined, lang)
|
|
254
|
+
if not joined_clean:
|
|
255
|
+
return ("", None, None, None, None)
|
|
256
|
+
|
|
257
|
+
classifier_sig = joined_clean
|
|
258
|
+
function_sig = _strip_template(joined_clean, lang.template_re)
|
|
259
|
+
|
|
260
|
+
# #define
|
|
261
|
+
if any(lang.define_re.match(line) for line in context):
|
|
262
|
+
return ("macro_def", None, None, joined_clean, None)
|
|
263
|
+
|
|
264
|
+
# Unknown macro-like block
|
|
265
|
+
if lang.macro_like_re.match(joined_clean):
|
|
266
|
+
return ("", None, None, None, None)
|
|
267
|
+
|
|
268
|
+
# extern "C"
|
|
269
|
+
if lang.extern_c_re.search(joined_clean):
|
|
270
|
+
return ("namespace", None, None, joined_clean, None)
|
|
271
|
+
|
|
272
|
+
# Namespace
|
|
273
|
+
if lang.namespace_sig_re:
|
|
274
|
+
ns_match = lang.namespace_sig_re.match(classifier_sig)
|
|
275
|
+
else:
|
|
276
|
+
ns_match = None
|
|
277
|
+
if ns_match:
|
|
278
|
+
ns_name = ns_match.group(1)
|
|
279
|
+
return ("namespace", ns_name, ns_name, joined_clean, None)
|
|
280
|
+
|
|
281
|
+
# Enum
|
|
282
|
+
m = lang.enum_re.search(classifier_sig)
|
|
283
|
+
if m:
|
|
284
|
+
name = m.group(1)
|
|
285
|
+
qn = f"{parent_namespace}::{name}" if parent_namespace else name
|
|
286
|
+
return ("enum", name, qn, joined_clean, None)
|
|
287
|
+
|
|
288
|
+
# Class / struct
|
|
289
|
+
m = lang.class_re.search(classifier_sig)
|
|
290
|
+
if m:
|
|
291
|
+
name = m.group(2)
|
|
292
|
+
base = m.group(3) if m.lastindex >= 3 else None
|
|
293
|
+
return ("class", name, name, joined_clean, base)
|
|
294
|
+
|
|
295
|
+
# Lambda — skip (check before function detection)
|
|
296
|
+
if lang.lambda_re and lang.lambda_re.search(joined_clean):
|
|
297
|
+
return ("", None, None, None, None)
|
|
298
|
+
|
|
299
|
+
# operator new/delete/etc. — skip
|
|
300
|
+
if lang.operator_re and lang.operator_re.search(classifier_sig):
|
|
301
|
+
return ("", None, None, None, None)
|
|
302
|
+
|
|
303
|
+
# Strip constructor initializer list: Foo(params) : member(val), ... -> Foo(params)
|
|
304
|
+
if lang.init_list_re:
|
|
305
|
+
init_match = lang.init_list_re.search(function_sig)
|
|
306
|
+
if init_match:
|
|
307
|
+
function_sig = function_sig[:init_match.start() + 1]
|
|
308
|
+
init_match2 = lang.init_list_re.search(classifier_sig)
|
|
309
|
+
if init_match2:
|
|
310
|
+
classifier_sig = classifier_sig[:init_match2.start() + 1]
|
|
311
|
+
|
|
312
|
+
# Function / method detection
|
|
313
|
+
test_sig = lang.trailing_mods_re.sub("", function_sig).rstrip()
|
|
314
|
+
test_sig = re.sub(r"\s+", " ", test_sig).strip()
|
|
315
|
+
if test_sig.endswith(")") or lang.func_sig_end_re.search(test_sig):
|
|
316
|
+
# Destructor
|
|
317
|
+
dtor = lang.dtor_re.search(test_sig)
|
|
318
|
+
if dtor:
|
|
319
|
+
raw_name = dtor.group(1)
|
|
320
|
+
qn = f"{parent_class}::{raw_name}" if parent_class else raw_name
|
|
321
|
+
block_type = "method" if parent_class else "function"
|
|
322
|
+
return (block_type, raw_name, qn, joined_clean, None)
|
|
323
|
+
|
|
324
|
+
# Control flow — skip
|
|
325
|
+
if lang.control_flow_re.match(test_sig):
|
|
326
|
+
return ("", None, None, None, None)
|
|
327
|
+
if lang.macro_like_re.match(test_sig):
|
|
328
|
+
return ("", None, None, None, None)
|
|
329
|
+
|
|
330
|
+
raw_name = _extract_func_name(
|
|
331
|
+
test_sig, lang.func_name_re, lang.export_macro_re,
|
|
332
|
+
scope_operator=lang.scope_operator,
|
|
333
|
+
)
|
|
334
|
+
if raw_name:
|
|
335
|
+
func_part = raw_name.split("::")[-1]
|
|
336
|
+
if func_part in lang.control_flow_names:
|
|
337
|
+
return ("", None, None, None, None)
|
|
338
|
+
if "::" in raw_name:
|
|
339
|
+
qn = raw_name
|
|
340
|
+
else:
|
|
341
|
+
qn = f"{parent_class}::{raw_name}" if parent_class else raw_name
|
|
342
|
+
block_type = "method" if parent_class else "function"
|
|
343
|
+
return (block_type, raw_name, qn, joined_clean, None)
|
|
344
|
+
|
|
345
|
+
return ("", None, None, None, None)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
# ── Main analysis ────────────────────────────────────────────────────────
|
|
349
|
+
|
|
350
|
+
def analyze_file(
|
|
351
|
+
lines: list[str],
|
|
352
|
+
file_id: int,
|
|
353
|
+
lang: LanguageConfig,
|
|
354
|
+
fw: FrameworkConfig,
|
|
355
|
+
) -> tuple[list[SymbolDef], list[ExtraSymbol]]:
|
|
356
|
+
"""Analyze a file and extract all symbols with QN normalization.
|
|
357
|
+
|
|
358
|
+
Returns (bracket_symbols, extra_symbols).
|
|
359
|
+
"""
|
|
360
|
+
content = "\n".join(lines)
|
|
361
|
+
from .bracket_scanner import compute_parent_map, scan_brackets
|
|
362
|
+
|
|
363
|
+
blocks = scan_brackets(
|
|
364
|
+
content,
|
|
365
|
+
verbatim_string_prefix=lang.verbatim_string_prefix,
|
|
366
|
+
raw_string_char=lang.raw_string_char,
|
|
367
|
+
lang=lang,
|
|
368
|
+
)
|
|
369
|
+
if not blocks:
|
|
370
|
+
return [], extract_extra_symbols(lines, file_id, lang, fw)
|
|
371
|
+
|
|
372
|
+
parent_map = compute_parent_map(blocks)
|
|
373
|
+
|
|
374
|
+
# Build block index for parent lookup
|
|
375
|
+
block_by_key: dict[tuple[int, int], int] = {}
|
|
376
|
+
for i, b in enumerate(blocks):
|
|
377
|
+
block_by_key[(b.open_line, b.depth)] = i
|
|
378
|
+
|
|
379
|
+
# Track class name at each depth for QN assembly
|
|
380
|
+
class_at_depth: dict[int, str] = {}
|
|
381
|
+
namespace_at_depth: dict[int, str] = {}
|
|
382
|
+
|
|
383
|
+
symbols: list[SymbolDef] = []
|
|
384
|
+
|
|
385
|
+
# Sort blocks by open_line so parents are always processed before children
|
|
386
|
+
sorted_indices = sorted(range(len(blocks)), key=lambda i: blocks[i].open_line)
|
|
387
|
+
|
|
388
|
+
for i in sorted_indices:
|
|
389
|
+
block = blocks[i]
|
|
390
|
+
key = (block.open_line, block.depth)
|
|
391
|
+
parent_key = parent_map.get(key)
|
|
392
|
+
|
|
393
|
+
# Determine parent class and namespace
|
|
394
|
+
parent_class: str | None = None
|
|
395
|
+
parent_namespace: str | None = None
|
|
396
|
+
if parent_key is not None:
|
|
397
|
+
parent_idx = block_by_key.get(parent_key)
|
|
398
|
+
if parent_idx is not None:
|
|
399
|
+
parent_class = class_at_depth.get(parent_idx)
|
|
400
|
+
parent_namespace = namespace_at_depth.get(parent_idx)
|
|
401
|
+
|
|
402
|
+
btype, bname, qn, sig, base = _classify_block(
|
|
403
|
+
lines, block, lang, fw, parent_class, parent_namespace
|
|
404
|
+
)
|
|
405
|
+
if not btype or not bname:
|
|
406
|
+
continue
|
|
407
|
+
|
|
408
|
+
# Framework decoration sniffing — look above the block
|
|
409
|
+
deco_start_line, deco_meta = _sniff_decoration_above(lines, block.open_line - 1, fw)
|
|
410
|
+
actual_start = deco_start_line if deco_start_line > 0 else block.open_line
|
|
411
|
+
|
|
412
|
+
if btype == "class":
|
|
413
|
+
class_at_depth[i] = bname
|
|
414
|
+
if btype == "namespace" and bname:
|
|
415
|
+
ns_qn = bname
|
|
416
|
+
if parent_namespace:
|
|
417
|
+
ns_qn = f"{parent_namespace}::{bname}"
|
|
418
|
+
namespace_at_depth[i] = ns_qn
|
|
419
|
+
|
|
420
|
+
if btype in ("function",) and parent_class:
|
|
421
|
+
btype = "method"
|
|
422
|
+
|
|
423
|
+
symbols.append(SymbolDef(
|
|
424
|
+
qualified_name=qn or bname,
|
|
425
|
+
block_type=btype,
|
|
426
|
+
file_id=file_id,
|
|
427
|
+
start_line=actual_start,
|
|
428
|
+
end_line=block.close_line,
|
|
429
|
+
decoration_meta=deco_meta,
|
|
430
|
+
parent_class=parent_class,
|
|
431
|
+
signature=sig,
|
|
432
|
+
inheritance_base=base,
|
|
433
|
+
language=lang.name,
|
|
434
|
+
))
|
|
435
|
+
|
|
436
|
+
extra = extract_extra_symbols(lines, file_id, lang, fw)
|
|
437
|
+
return symbols, extra
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def extract_extra_symbols(
|
|
441
|
+
lines: list[str],
|
|
442
|
+
file_id: int,
|
|
443
|
+
lang: LanguageConfig,
|
|
444
|
+
fw: FrameworkConfig,
|
|
445
|
+
) -> list[ExtraSymbol]:
|
|
446
|
+
"""Extract non-brace symbols: framework-specific delegates and #define macros."""
|
|
447
|
+
results: list[ExtraSymbol] = []
|
|
448
|
+
|
|
449
|
+
for i, line in enumerate(lines, start=1):
|
|
450
|
+
stripped = line.strip()
|
|
451
|
+
|
|
452
|
+
# Framework declaration macros (e.g., DECLARE_DELEGATE)
|
|
453
|
+
if fw.declare_macro_re and fw.parse_delegate_name:
|
|
454
|
+
m = fw.declare_macro_re.match(stripped)
|
|
455
|
+
if m and "delegate_def" in fw.extra_symbol_types:
|
|
456
|
+
delegate_name = fw.parse_delegate_name(stripped)
|
|
457
|
+
if delegate_name:
|
|
458
|
+
results.append(ExtraSymbol(
|
|
459
|
+
qualified_name=delegate_name,
|
|
460
|
+
block_type="delegate_def",
|
|
461
|
+
file_id=file_id,
|
|
462
|
+
start_line=i,
|
|
463
|
+
end_line=i,
|
|
464
|
+
signature=stripped,
|
|
465
|
+
language=lang.name,
|
|
466
|
+
))
|
|
467
|
+
continue
|
|
468
|
+
|
|
469
|
+
# #define macros (C/C++ only)
|
|
470
|
+
if "macro_def" in fw.extra_symbol_types and lang.define_re:
|
|
471
|
+
if lang.define_line_re:
|
|
472
|
+
dm = lang.define_line_re.match(stripped)
|
|
473
|
+
if dm:
|
|
474
|
+
name = dm.group(1)
|
|
475
|
+
if fw.macro_name_filter and fw.macro_name_filter(name):
|
|
476
|
+
continue
|
|
477
|
+
results.append(ExtraSymbol(
|
|
478
|
+
qualified_name=name,
|
|
479
|
+
block_type="macro_def",
|
|
480
|
+
file_id=file_id,
|
|
481
|
+
start_line=i,
|
|
482
|
+
end_line=i,
|
|
483
|
+
signature=stripped,
|
|
484
|
+
language=lang.name,
|
|
485
|
+
))
|
|
486
|
+
|
|
487
|
+
return results
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""Unreal Engine framework rules — UE-specific configuration for symbol analysis.
|
|
2
|
+
|
|
3
|
+
Extracts all hardcoded UE constants from symbol_analyzer and edge_extractor
|
|
4
|
+
into a single FrameworkConfig instance.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
|
|
11
|
+
from .configs import FrameworkConfig
|
|
12
|
+
|
|
13
|
+
# ── UE decoration macros ──────────────────────────────────────────────────
|
|
14
|
+
|
|
15
|
+
_UE_DECORATION_RE = re.compile(
|
|
16
|
+
r"^\s*(UCLASS|USTRUCT|UENUM|UFUNCTION|UPROPERTY|UINTERFACE)"
|
|
17
|
+
r"\s*\(([^)]*)\)"
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
_UE_DECORATION_NAMES = frozenset({
|
|
21
|
+
"UCLASS", "USTRUCT", "UENUM", "UFUNCTION", "UPROPERTY", "UINTERFACE",
|
|
22
|
+
})
|
|
23
|
+
|
|
24
|
+
_UE_DECLARE_RE = re.compile(r"^\s*(DECLARE_\w+(?:_\w+)*)\s*\(")
|
|
25
|
+
|
|
26
|
+
_UE_NOISE_RE = re.compile(
|
|
27
|
+
r"^\s*(?:GENERATED_BODY|GENERATED_UCLASS_BODY|GENERATED_USTRUCT_BODY|UMETA|UPARAM)\s*\(?"
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# ── UE type skip lists ────────────────────────────────────────────────────
|
|
31
|
+
|
|
32
|
+
_UE_SKIP_TYPES = frozenset({
|
|
33
|
+
# Basic UE container / value types
|
|
34
|
+
"TArray", "TMap", "TSet", "TSharedPtr", "TSharedRef", "TWeakPtr",
|
|
35
|
+
"TUniquePtr", "TFunction", "TTuple", "TPair", "TOptional",
|
|
36
|
+
"FString", "FName", "FText",
|
|
37
|
+
# Basic math types
|
|
38
|
+
"FLinearColor", "FVector", "FVector2D", "FVector4",
|
|
39
|
+
"FRotator", "FQuat", "FTransform", "FMatrix",
|
|
40
|
+
"FGuid", "FDateTime", "FTimespan",
|
|
41
|
+
# Core UE base classes — too common to be useful as edges
|
|
42
|
+
"AActor", "UObject", "ACharacter", "APawn", "AController",
|
|
43
|
+
"UActorComponent", "USceneComponent", "UPrimitiveComponent",
|
|
44
|
+
"UGameInstance", "UGameModeBase", "UWorld", "APlayerState",
|
|
45
|
+
"UGameStateBase", "APlayerController",
|
|
46
|
+
})
|
|
47
|
+
|
|
48
|
+
_BASIC_SKIP_TYPES = frozenset({
|
|
49
|
+
# C/C++ primitive / standard types — NOTE: moved to LanguageConfig.basic_skip_types
|
|
50
|
+
# for make_cpp_language(). Kept here only for reference / backward compat.
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
_NOISE_TYPE_NAMES = frozenset({
|
|
54
|
+
"FORCEINLINE", "FORCENOINLINE", "INLINE", "PRAGMA", "Deprecated",
|
|
55
|
+
"The", "Type", "Tag", "Name", "Value", "Key", "Data", "Result",
|
|
56
|
+
"Index", "Count", "Size", "Offset", "Flags", "Mode", "State",
|
|
57
|
+
"Id", "ID", "Handle", "Ptr", "Ref", "Desc", "Info", "Error",
|
|
58
|
+
"CbField", "Design", "ObjectData", "VOIP", "Begin", "End",
|
|
59
|
+
"Max", "Min", "Default", "None", "Null", "True", "False",
|
|
60
|
+
"Out", "In", "Src", "Dst", "Len", "Buf", "Res",
|
|
61
|
+
"Header", "Footer", "Body", "Title", "Label",
|
|
62
|
+
"Module", "Package", "Plugin", "Project",
|
|
63
|
+
"Source", "Target", "Input", "Output",
|
|
64
|
+
"Self", "Super", "This",
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
_RPC_SPECIFIERS = frozenset({"Server", "Client", "NetMulticast"})
|
|
68
|
+
_BLUEPRINT_NATIVE_EVENT = "BlueprintNativeEvent"
|
|
69
|
+
_RPC_VALIDATION_PARAM = "WithValidation"
|
|
70
|
+
|
|
71
|
+
_UE_PREFIXES = ("A", "U", "F", "E", "I", "T")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# ── Callback implementations ───────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def extract_ue_rpc_edges(qn: str, decoration_meta: dict) -> list[tuple[str, str]]:
|
|
78
|
+
"""Extract UE RPC routing edges from decoration metadata.
|
|
79
|
+
|
|
80
|
+
Returns [(target_qn, edge_type), ...] pairs.
|
|
81
|
+
e.g. ("AFoo::Bar", {"UFUNCTION": ["Server", "Reliable"]})
|
|
82
|
+
-> [("AFoo::Bar_Implementation", "rpc_routing")]
|
|
83
|
+
"""
|
|
84
|
+
if qn.endswith("_Implementation") or qn.endswith("_Validate"):
|
|
85
|
+
return []
|
|
86
|
+
|
|
87
|
+
results: list[tuple[str, str]] = []
|
|
88
|
+
for macro_name, params in decoration_meta.items():
|
|
89
|
+
if macro_name in _UE_DECORATION_NAMES:
|
|
90
|
+
has_rpc = False
|
|
91
|
+
for p in params:
|
|
92
|
+
if p in _RPC_SPECIFIERS or p == _BLUEPRINT_NATIVE_EVENT:
|
|
93
|
+
has_rpc = True
|
|
94
|
+
break
|
|
95
|
+
if has_rpc:
|
|
96
|
+
results.append((f"{qn}_Implementation", "rpc_routing"))
|
|
97
|
+
if _RPC_VALIDATION_PARAM in params:
|
|
98
|
+
results.append((f"{qn}_Validate", "rpc_routing"))
|
|
99
|
+
return results
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def parse_ue_delegate_name(stripped_line: str) -> str | None:
|
|
103
|
+
"""Parse delegate name from a UE DECLARE_* macro line."""
|
|
104
|
+
paren_start = stripped_line.find("(")
|
|
105
|
+
if paren_start < 0:
|
|
106
|
+
return None
|
|
107
|
+
inner = stripped_line[paren_start + 1:].strip()
|
|
108
|
+
first_comma = inner.find(",")
|
|
109
|
+
first_paren = inner.find(")")
|
|
110
|
+
end = min(
|
|
111
|
+
first_comma if first_comma >= 0 else len(inner),
|
|
112
|
+
first_paren if first_paren >= 0 else len(inner),
|
|
113
|
+
)
|
|
114
|
+
delegate_name = inner[:end].strip().rstrip(")")
|
|
115
|
+
if delegate_name and delegate_name[0].isupper():
|
|
116
|
+
return delegate_name
|
|
117
|
+
return None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def should_skip_ue_macro(name: str) -> bool:
|
|
121
|
+
"""Return True for UE macro names that should be excluded from extraction."""
|
|
122
|
+
return name.startswith("_") or name.startswith("GENERATED_")
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def format_ue_meta_display(meta: dict) -> list[str]:
|
|
126
|
+
"""Format UE decoration metadata into display parts for [Meta] line."""
|
|
127
|
+
parts: list[str] = []
|
|
128
|
+
for macro, params in meta.items():
|
|
129
|
+
params_str = ",".join(params)
|
|
130
|
+
parts.append(f"{macro}({params_str})")
|
|
131
|
+
rpc_triggers = _RPC_SPECIFIERS | {_BLUEPRINT_NATIVE_EVENT} - {""}
|
|
132
|
+
if any(p in rpc_triggers for p in params):
|
|
133
|
+
parts.append("->_Implementation")
|
|
134
|
+
if _RPC_VALIDATION_PARAM in params:
|
|
135
|
+
parts.append("->_Validate")
|
|
136
|
+
return parts
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def resolve_ue_type_prefixes(qualified_name: str) -> list[str]:
|
|
140
|
+
"""Generate candidate QNs by prepending UE type prefixes (A, U, F, E, I, T)."""
|
|
141
|
+
return [prefix + qualified_name for prefix in _UE_PREFIXES]
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def make_unreal_framework() -> FrameworkConfig:
|
|
145
|
+
"""Create the Unreal Engine framework configuration."""
|
|
146
|
+
return FrameworkConfig(
|
|
147
|
+
name="unreal",
|
|
148
|
+
skip_types=_UE_SKIP_TYPES,
|
|
149
|
+
noise_type_names=_NOISE_TYPE_NAMES,
|
|
150
|
+
decoration_macro_re=_UE_DECORATION_RE,
|
|
151
|
+
decoration_macro_names=_UE_DECORATION_NAMES,
|
|
152
|
+
noise_macro_re=_UE_NOISE_RE,
|
|
153
|
+
declare_macro_re=_UE_DECLARE_RE,
|
|
154
|
+
extract_decoration_meta=True,
|
|
155
|
+
sniff_decoration_above=True,
|
|
156
|
+
extra_symbol_types=frozenset({"delegate_def", "macro_def"}),
|
|
157
|
+
# Framework behavior callbacks
|
|
158
|
+
extract_framework_edges=extract_ue_rpc_edges,
|
|
159
|
+
parse_delegate_name=parse_ue_delegate_name,
|
|
160
|
+
macro_name_filter=should_skip_ue_macro,
|
|
161
|
+
format_meta_display=format_ue_meta_display,
|
|
162
|
+
resolve_type_prefixes=resolve_ue_type_prefixes,
|
|
163
|
+
)
|