code-explore-by-sql 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,402 @@
1
+ """Layered configuration for code source analysis.
2
+
3
+ Three layers:
4
+ Language — syntax rules selected by file extension (C/C++ vs C#)
5
+ Framework — application framework rules (Unreal, Unity, etc.)
6
+ Project — user-configurable settings (extensions, excludes, module inference)
7
+
8
+ Any language can combine with any framework (e.g., Unreal uses C# for build system code).
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import re
14
+ from collections.abc import Callable
15
+ from dataclasses import dataclass, field
16
+
17
+ # ── Language Layer ────────────────────────────────────────────────────────
18
+
19
+ @dataclass(frozen=True)
20
+ class LanguageConfig:
21
+ """Language-specific syntax rules, selected by file extension."""
22
+
23
+ name: str # "cpp", "csharp", etc.
24
+
25
+ # Block classification regexes
26
+ class_re: re.Pattern
27
+ enum_re: re.Pattern
28
+ namespace_re: re.Pattern
29
+ func_name_re: re.Pattern
30
+ export_macro_re: re.Pattern
31
+ calling_conv_re: re.Pattern
32
+ attribute_re: re.Pattern
33
+ template_re: re.Pattern
34
+ dtor_re: re.Pattern
35
+
36
+ # Stripping / normalisation
37
+ control_flow_re: re.Pattern
38
+ control_flow_names: frozenset[str]
39
+ trailing_mods_re: re.Pattern
40
+ access_spec_re: re.Pattern
41
+
42
+ # Comment / noise
43
+ macro_like_re: re.Pattern
44
+ define_re: re.Pattern
45
+ extern_c_re: re.Pattern
46
+ operator_re: re.Pattern | None
47
+
48
+ # Whether this language uses brace-based block scoping
49
+ uses_braces: bool = True
50
+
51
+ # Whether namespaces affect qualified names
52
+ uses_namespaces: bool = True
53
+
54
+ # Whether colon inheritance is used (class Foo : public Bar)
55
+ uses_colon_inheritance: bool = True
56
+
57
+ # ── Edge extraction (consumed by edge_extractor.py) ──────────────
58
+
59
+ # Scope operator string: "::" for C++, "." for C#
60
+ scope_operator: str = "::"
61
+
62
+ # Base/super keyword: "Super" for UE C++, "base" for C#
63
+ base_keyword: str = "Super"
64
+
65
+ # Static call pattern: captures (ClassName, MethodName)
66
+ static_call_re: re.Pattern | None = None
67
+
68
+ # Super/base call pattern: captures (MethodName)
69
+ super_call_re: re.Pattern | None = None
70
+
71
+ # Type name pattern for dependency extraction
72
+ type_re: re.Pattern | None = None
73
+
74
+ # Parameter type extraction from function signatures
75
+ param_type_re: re.Pattern | None = None
76
+
77
+ # Language-level primitive/standard types that should never generate edges
78
+ basic_skip_types: frozenset[str] = field(default_factory=frozenset)
79
+
80
+ # ── Block classification helpers (consumed by symbol_analyzer.py) ─
81
+
82
+ # Combined keyword regex for namespace|class|struct|enum detection
83
+ block_keyword_re: re.Pattern | None = None
84
+
85
+ # Lambda expression detection regex (None if language has no lambdas)
86
+ lambda_re: re.Pattern | None = None
87
+
88
+ # Function signature end pattern: matches when a joined declaration looks like
89
+ # a complete function signature. C/C++: ")", ") const". Python: "):".
90
+ func_sig_end_re: re.Pattern = re.compile(r"\)\s*(?:const\s*)?$")
91
+
92
+ # Namespace signature regex for _classify_block (captures namespace name)
93
+ namespace_sig_re: re.Pattern | None = None
94
+
95
+ # Constructor initializer list pattern: "):" or similar
96
+ init_list_re: re.Pattern | None = None
97
+
98
+ # ── View / summary helpers (consumed by code_block_summary.py) ────
99
+
100
+ # Access specifier names for view filtering (e.g. {"public:", "private:", "protected:"})
101
+ access_spec_names: frozenset[str] = field(default_factory=frozenset)
102
+
103
+ # Structural keywords for signature view detection (e.g. {"class ", "struct ", "enum ", "namespace "})
104
+ view_structural_kws: tuple[str, ...] = ()
105
+
106
+ # Modifier keywords for signature view (e.g. {"virtual ", "static ", "override", "FORCEINLINE"})
107
+ view_modifier_kws: tuple[str, ...] = ()
108
+
109
+ # Local variable type modifiers (e.g. "const|static|mutable|constexpr|volatile")
110
+ local_var_modifiers: str = ""
111
+
112
+ # ── Bracket scanner hints ────────────────────────────────────────
113
+
114
+ # Character prefix for verbatim strings (None if not supported, '@' for C#)
115
+ verbatim_string_prefix: str | None = None
116
+
117
+ # Character that starts a raw string literal (None if unsupported)
118
+ # C++: "R" → R"delim(...)delim", C#: None
119
+ raw_string_char: str | None = None
120
+
121
+ # ── Function body summary hints ──────────────────────────────────
122
+
123
+ # Range-based for loop pattern for function body summary (None if unsupported)
124
+ # C++: for (Type name : container), C#: foreach (Type name in collection)
125
+ range_for_re: re.Pattern | None = None
126
+
127
+ # ── Comment syntax (consumed by bracket_scanner, symbol_analyzer, edge_extractor, code_block_summary) ──
128
+
129
+ # Line comment prefix (e.g. "//" for C-family, "#" for Python)
130
+ line_comment: str = "//"
131
+
132
+ # Block comment open/close pair, or None if unsupported
133
+ block_comment_pair: tuple[str, str] | None = ("/*", "*/")
134
+
135
+ # ── String syntax (consumed by bracket_scanner) ───────────────────
136
+
137
+ # Characters that start string literals (e.g. {'"', "'"} for Python, {'"'} for C++ where ' is char)
138
+ string_delimiters: frozenset[str] = frozenset({'"', "'"})
139
+
140
+ # Escape character inside strings (None if no escape mechanism)
141
+ string_escape_char: str | None = "\\"
142
+
143
+ # Triple-quoted string openers for indent-based languages (e.g. ('"""', "'''") for Python)
144
+ triple_quote_strings: tuple[str, ...] = ()
145
+
146
+ # ── Block style (consumed by bracket_scanner) ─────────────────────
147
+
148
+ # Whether this language uses indent-based blocks instead of braces
149
+ uses_indent_blocks: bool = False
150
+
151
+ # ── Preprocessor (consumed by symbol_analyzer, edge_extractor, code_block_summary) ──
152
+
153
+ # Preprocessor directive prefix (e.g. "#" for C/C++, "" if none)
154
+ preprocessor_prefix: str = "#"
155
+
156
+ # Whether #define-style macros exist (controls macro_def extraction)
157
+ has_preprocessor_macros: bool = True
158
+
159
+ # ── Statement / block close (consumed by code_block_summary) ──────
160
+
161
+ # Statement terminator character (e.g. ";" for C-family, "" for Python)
162
+ statement_terminator: str = ";"
163
+
164
+ # Closing brace syntax for summary output (e.g. "};" for C++, "}" for most others)
165
+ block_close_suffix: str = "}"
166
+
167
+ # Comment prefix for summary lines (e.g. "//" for C-family, "#" for Python)
168
+ summary_comment_prefix: str = "//"
169
+
170
+ # ── Config-driven control flow (consumed by code_block_summary) ───
171
+
172
+ # Control flow patterns: tuple of (label, compiled_regex) for function body summary
173
+ control_flow_patterns: tuple[tuple[str, re.Pattern], ...] = ()
174
+
175
+ # Return keyword regex for function body summary
176
+ return_re: re.Pattern | None = None
177
+
178
+ # ── Extra syntax hints ────────────────────────────────────────────
179
+
180
+ # Pointer/reference type indicator chars (e.g. "*&" for C/C++, "" for Python/Java)
181
+ type_indicator_chars: str = "*&"
182
+
183
+ # Full #define line regex for ExtraSymbol extraction (None if language has no macros)
184
+ define_line_re: re.Pattern | None = None
185
+
186
+ # Whether backtick template literal strings exist (JavaScript/TypeScript)
187
+ has_template_strings: bool = False
188
+
189
+ # Raw string syntax style: "cpp" for R"delim(...)delim", "rust" for r#"..."#
190
+ raw_string_style: str = "cpp"
191
+
192
+
193
+ # ── Framework Layer ───────────────────────────────────────────────────────
194
+
195
+ @dataclass(frozen=True)
196
+ class FrameworkConfig:
197
+ """Application-framework-specific rules, overlaid on any language."""
198
+
199
+ name: str # "unreal", "unity", "generic", etc.
200
+
201
+ # Types that should never generate reference edges
202
+ skip_types: frozenset[str] = field(default_factory=frozenset)
203
+
204
+ # Noise type names — matched but never real types
205
+ noise_type_names: frozenset[str] = field(default_factory=frozenset)
206
+
207
+ # UE-style decoration macros (UCLASS, UFUNCTION, etc.)
208
+ decoration_macro_re: re.Pattern | None = None
209
+ decoration_macro_names: frozenset[str] = field(default_factory=frozenset)
210
+
211
+ # Noise macros to skip during declaration gathering
212
+ noise_macro_re: re.Pattern | None = None
213
+
214
+ # Declaration macros for extra symbols (DECLARE_DELEGATE, etc.)
215
+ declare_macro_re: re.Pattern | None = None
216
+
217
+ # Framework-specific generated body macros to skip
218
+ generated_body_re: re.Pattern | None = None
219
+
220
+ # Type prefix letters for fuzzy resolution (e.g., UE's A/U/F/E/I)
221
+ # (Obsolete — use resolve_type_prefixes callback instead)
222
+ type_prefixes: tuple[str, ...] = ()
223
+
224
+ # RPC specifiers for framework-specific edges (obsolete — use extract_framework_edges)
225
+ rpc_specifiers: frozenset[str] = field(default_factory=frozenset)
226
+ blueprint_native_event: str = ""
227
+
228
+ # Static call noise targets to skip
229
+ # (Obsolete — no longer read from framework layer)
230
+
231
+ # Parameter name that triggers _Validate suffix in RPC routing formatting
232
+ # (Obsolete — use extract_framework_edges callback instead)
233
+ rpc_validation_param: str = ""
234
+
235
+ # Whether to extract decoration metadata from decoration macros
236
+ extract_decoration_meta: bool = False
237
+
238
+ # Whether to sniff decoration macros above blocks
239
+ sniff_decoration_above: bool = False
240
+
241
+ # Extra symbol types to extract (delegate_def, macro_def, etc.)
242
+ extra_symbol_types: frozenset[str] = field(default_factory=frozenset)
243
+
244
+ # ── Framework behavior callbacks ──────────────────────────────────
245
+ # These allow framework-specific algorithm logic to live in
246
+ # the framework layer instead of inline in processing modules.
247
+ # Processing modules call these callbacks; generic framework = None.
248
+
249
+ # Extract framework-specific edges from decoration metadata.
250
+ # Takes (qualified_name: str, decoration_meta: dict) -> [(target_qn, edge_type)]
251
+ extract_framework_edges: Callable[[str, dict], list[tuple[str, str]]] | None = None
252
+
253
+ # Parse delegate/type name from a declaration macro line.
254
+ # Takes (stripped_line: str) -> name or None
255
+ parse_delegate_name: Callable[[str], str | None] | None = None
256
+
257
+ # Filter macro names that should be excluded from symbol extraction.
258
+ # Takes (macro_name: str) -> True = skip this macro
259
+ macro_name_filter: Callable[[str], bool] | None = None
260
+
261
+ # Format decoration metadata into display parts for [Meta] header.
262
+ # Takes (meta: dict) -> [display_string, ...]
263
+ format_meta_display: Callable[[dict], list[str]] | None = None
264
+
265
+ # Generate candidate QNs by prepending type prefixes.
266
+ # Takes (qualified_name: str) -> [candidate_qn, ...]
267
+ resolve_type_prefixes: Callable[[str], list[str]] | None = None
268
+
269
+
270
+ # ── Project Layer ─────────────────────────────────────────────────────────
271
+
272
+ @dataclass(frozen=True)
273
+ class ProjectConfig:
274
+ """User-configurable project settings."""
275
+
276
+ # File extensions to index, mapped to language name
277
+ extension_to_language: dict[str, str] = field(default_factory=dict)
278
+
279
+ # Directories to exclude from indexing
280
+ exclude_parts: frozenset[str] = field(default_factory=frozenset)
281
+
282
+ # Path component marking source root
283
+ source_marker: str = "Source"
284
+
285
+ # Category dirs to skip after source marker
286
+ categories: frozenset[str] = field(default_factory=frozenset)
287
+
288
+ # Invalid module name components
289
+ invalid_module_names: frozenset[str] = field(default_factory=frozenset)
290
+
291
+ # Framework to apply (by name or config instance)
292
+ framework_name: str = "generic"
293
+
294
+
295
+ # ── Framework / Project Defaults ──────────────────────────────────────────
296
+
297
+ def make_generic_framework() -> FrameworkConfig:
298
+ """Generic framework — no framework-specific rules."""
299
+ return FrameworkConfig(name="generic")
300
+
301
+
302
+ def make_unreal_project(
303
+ framework: FrameworkConfig | None = None,
304
+ extra_extensions: dict[str, str] | None = None,
305
+ ) -> ProjectConfig:
306
+ """Default Unreal Engine project configuration."""
307
+ ext_map: dict[str, str] = {
308
+ ".h": "cpp", ".hpp": "cpp", ".hh": "cpp", ".inl": "cpp",
309
+ ".cpp": "cpp", ".cc": "cpp", ".cxx": "cpp",
310
+ ".cs": "csharp",
311
+ ".usf": "hlsl", ".ush": "hlsl", ".hlsl": "hlsl",
312
+ }
313
+ if extra_extensions:
314
+ ext_map.update(extra_extensions)
315
+
316
+ return ProjectConfig(
317
+ extension_to_language=ext_map,
318
+ exclude_parts=frozenset({
319
+ ".git", ".vs", "Binaries", "Build", "DerivedDataCache",
320
+ "Intermediate", "Saved", "ThirdParty",
321
+ }),
322
+ source_marker="Source",
323
+ categories=frozenset(),
324
+ invalid_module_names=frozenset({
325
+ "Private", "Public", "Classes", "Inc", "Src", "Source",
326
+ "Include", "Internal", "Tests", "Test",
327
+ }),
328
+ framework_name=framework.name if framework else "generic",
329
+ )
330
+
331
+
332
+ def make_generic_project(
333
+ extra_extensions: dict[str, str] | None = None,
334
+ ) -> ProjectConfig:
335
+ """Generic project configuration supporting all registered languages."""
336
+ ext_map: dict[str, str] = {
337
+ # C/C++
338
+ ".h": "cpp", ".hpp": "cpp", ".hh": "cpp", ".inl": "cpp",
339
+ ".c": "c", ".cpp": "cpp", ".cc": "cpp", ".cxx": "cpp",
340
+ # C#
341
+ ".cs": "csharp",
342
+ # Java
343
+ ".java": "java",
344
+ # Go
345
+ ".go": "go",
346
+ # Rust
347
+ ".rs": "rust",
348
+ # JavaScript/TypeScript
349
+ ".js": "javascript", ".jsx": "javascript", ".mjs": "javascript",
350
+ ".ts": "typescript", ".tsx": "typescript",
351
+ # Kotlin
352
+ ".kt": "kotlin", ".kts": "kotlin",
353
+ # Swift
354
+ ".swift": "swift",
355
+ # Python
356
+ ".py": "python", ".pyi": "python",
357
+ # HLSL
358
+ ".hlsl": "hlsl", ".fx": "hlsl", ".fxh": "hlsl",
359
+ # GLSL
360
+ ".glsl": "glsl", ".vert": "glsl", ".frag": "glsl",
361
+ ".comp": "glsl", ".geom": "glsl", ".tesc": "glsl", ".tese": "glsl",
362
+ }
363
+ if extra_extensions:
364
+ ext_map.update(extra_extensions)
365
+
366
+ return ProjectConfig(
367
+ extension_to_language=ext_map,
368
+ exclude_parts=frozenset({".git", ".vs", "node_modules", "__pycache__"}),
369
+ source_marker="",
370
+ categories=frozenset(),
371
+ invalid_module_names=frozenset(),
372
+ framework_name="generic",
373
+ )
374
+
375
+
376
+ # ── Language Registry ──────────────────────────────────────────────────────
377
+
378
+ _LANGUAGE_FACTORIES: dict[str, Callable[[], LanguageConfig]] = {}
379
+
380
+
381
+ def register_language(name: str, factory: Callable[[], LanguageConfig]) -> None:
382
+ """Register a language factory function."""
383
+ _LANGUAGE_FACTORIES[name] = factory
384
+
385
+
386
+ def get_language(name: str) -> LanguageConfig:
387
+ """Get a LanguageConfig by name, raising ValueError if unknown."""
388
+ if name not in _LANGUAGE_FACTORIES:
389
+ raise ValueError(
390
+ f"Unknown language: {name!r}. Registered: {sorted(_LANGUAGE_FACTORIES.keys())}"
391
+ )
392
+ return _LANGUAGE_FACTORIES[name]()
393
+
394
+
395
+ def registered_languages() -> list[str]:
396
+ """Return sorted list of registered language names."""
397
+ return sorted(_LANGUAGE_FACTORIES.keys())
398
+
399
+
400
+ # Re-export language factory functions from the languages/ subpackage.
401
+ # This import triggers registration of all language factories.
402
+ from . import languages # noqa: E402, F401