apisec-code-bolt 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- apisec_code_bolt/__init__.py +42 -0
- apisec_code_bolt/__main__.py +11 -0
- apisec_code_bolt/analysis/__init__.py +96 -0
- apisec_code_bolt/analysis/analyzer.py +2309 -0
- apisec_code_bolt/analysis/binding_tracker.py +341 -0
- apisec_code_bolt/analysis/call_graph.py +1197 -0
- apisec_code_bolt/analysis/call_graph_types.py +332 -0
- apisec_code_bolt/analysis/call_resolver.py +988 -0
- apisec_code_bolt/analysis/capability_tagger.py +322 -0
- apisec_code_bolt/analysis/config_scanner.py +197 -0
- apisec_code_bolt/analysis/data_flow.py +1883 -0
- apisec_code_bolt/analysis/dependency_extractor.py +959 -0
- apisec_code_bolt/analysis/flow_analysis.py +1406 -0
- apisec_code_bolt/analysis/hof_catalog.py +61 -0
- apisec_code_bolt/analysis/integration_detector.py +1399 -0
- apisec_code_bolt/analysis/literal_scanner.py +300 -0
- apisec_code_bolt/analysis/path_normalizer.py +55 -0
- apisec_code_bolt/analysis/read_site_detector.py +310 -0
- apisec_code_bolt/analysis/request_patterns.py +162 -0
- apisec_code_bolt/analysis/sensitivity_classifier.py +224 -0
- apisec_code_bolt/analysis/sink_evidence.py +333 -0
- apisec_code_bolt/analysis/url_prefix_resolver.py +338 -0
- apisec_code_bolt/cli/__init__.py +5 -0
- apisec_code_bolt/cli/exit_codes.py +17 -0
- apisec_code_bolt/cli/main.py +1069 -0
- apisec_code_bolt/cloud/__init__.py +1 -0
- apisec_code_bolt/cloud/apisec_client.py +118 -0
- apisec_code_bolt/cloud/client.py +255 -0
- apisec_code_bolt/core/__init__.py +75 -0
- apisec_code_bolt/core/config.py +528 -0
- apisec_code_bolt/core/credentials.py +65 -0
- apisec_code_bolt/core/discovery.py +433 -0
- apisec_code_bolt/core/log_format.py +115 -0
- apisec_code_bolt/core/manifest.py +1009 -0
- apisec_code_bolt/core/repo.py +280 -0
- apisec_code_bolt/core/state.py +59 -0
- apisec_code_bolt/core/telemetry.py +451 -0
- apisec_code_bolt/core/types.py +587 -0
- apisec_code_bolt/fingerprinting/__init__.py +1 -0
- apisec_code_bolt/frameworks/__init__.py +29 -0
- apisec_code_bolt/frameworks/_jwt_common.py +50 -0
- apisec_code_bolt/frameworks/auth_helpers.py +437 -0
- apisec_code_bolt/frameworks/base.py +608 -0
- apisec_code_bolt/frameworks/dotnet/__init__.py +17 -0
- apisec_code_bolt/frameworks/dotnet/_path_helpers.py +43 -0
- apisec_code_bolt/frameworks/dotnet/aspnet_plugin.py +2546 -0
- apisec_code_bolt/frameworks/dotnet/grpc_plugin.py +559 -0
- apisec_code_bolt/frameworks/dotnet/jwt_config_extractor.py +545 -0
- apisec_code_bolt/frameworks/dotnet/legacy_aspnet_plugin.py +732 -0
- apisec_code_bolt/frameworks/dotnet/refit_plugin.py +374 -0
- apisec_code_bolt/frameworks/dotnet/wcf_plugin.py +1239 -0
- apisec_code_bolt/frameworks/java/__init__.py +6 -0
- apisec_code_bolt/frameworks/java/_annotations.py +167 -0
- apisec_code_bolt/frameworks/java/_constraints.py +128 -0
- apisec_code_bolt/frameworks/java/graphql_plugin.py +287 -0
- apisec_code_bolt/frameworks/java/jaxrs_plugin.py +748 -0
- apisec_code_bolt/frameworks/java/jwt_config_extractor.py +361 -0
- apisec_code_bolt/frameworks/java/micronaut_plugin.py +1059 -0
- apisec_code_bolt/frameworks/java/spring_plugin.py +1293 -0
- apisec_code_bolt/frameworks/js/__init__.py +8 -0
- apisec_code_bolt/frameworks/js/express_plugin.py +391 -0
- apisec_code_bolt/frameworks/js/fastify_plugin.py +381 -0
- apisec_code_bolt/frameworks/js/graphql_plugin.py +198 -0
- apisec_code_bolt/frameworks/js/nestjs_plugin.py +423 -0
- apisec_code_bolt/frameworks/python/__init__.py +19 -0
- apisec_code_bolt/frameworks/python/celery_plugin.py +393 -0
- apisec_code_bolt/frameworks/python/click_plugin.py +427 -0
- apisec_code_bolt/frameworks/python/django_plugin.py +867 -0
- apisec_code_bolt/frameworks/python/fastapi/__init__.py +28 -0
- apisec_code_bolt/frameworks/python/fastapi/plugin.py +1390 -0
- apisec_code_bolt/frameworks/python/flask_plugin.py +205 -0
- apisec_code_bolt/frameworks/python/graphql_plugin.py +274 -0
- apisec_code_bolt/frameworks/python/prefect_plugin.py +251 -0
- apisec_code_bolt/frameworks/python/webhook_plugin.py +255 -0
- apisec_code_bolt/parsing/__init__.py +62 -0
- apisec_code_bolt/parsing/base.py +554 -0
- apisec_code_bolt/parsing/csharp/__init__.py +5 -0
- apisec_code_bolt/parsing/csharp/language_services.py +203 -0
- apisec_code_bolt/parsing/csharp/literals.py +72 -0
- apisec_code_bolt/parsing/csharp/parser.py +1158 -0
- apisec_code_bolt/parsing/csharp/type_resolver.py +568 -0
- apisec_code_bolt/parsing/js/__init__.py +5 -0
- apisec_code_bolt/parsing/js/language_services.py +118 -0
- apisec_code_bolt/parsing/js/parser.py +622 -0
- apisec_code_bolt/parsing/jvm/__init__.py +7 -0
- apisec_code_bolt/parsing/jvm/language_services.py +270 -0
- apisec_code_bolt/parsing/jvm/parser.py +774 -0
- apisec_code_bolt/parsing/jvm/type_resolver.py +422 -0
- apisec_code_bolt/parsing/python/__init__.py +150 -0
- apisec_code_bolt/parsing/python/cbv_extractor.py +606 -0
- apisec_code_bolt/parsing/python/constant_resolver.py +500 -0
- apisec_code_bolt/parsing/python/cross_file_resolver.py +1054 -0
- apisec_code_bolt/parsing/python/dynamic_route_detector.py +532 -0
- apisec_code_bolt/parsing/python/expression_utils.py +221 -0
- apisec_code_bolt/parsing/python/extraction_types.py +271 -0
- apisec_code_bolt/parsing/python/language_services.py +487 -0
- apisec_code_bolt/parsing/python/parameter_analyzer.py +789 -0
- apisec_code_bolt/parsing/python/parser.py +719 -0
- apisec_code_bolt/parsing/python/path_resolver.py +576 -0
- apisec_code_bolt/parsing/python/router_registry.py +806 -0
- apisec_code_bolt/parsing/python/type_resolver.py +730 -0
- apisec_code_bolt/parsing/python/visitors.py +1544 -0
- apisec_code_bolt/parsing/services.py +544 -0
- apisec_code_bolt/query/__init__.py +1 -0
- apisec_code_bolt/query/ast_cache.py +182 -0
- apisec_code_bolt/query/executor.py +283 -0
- apisec_code_bolt/query/handlers.py +832 -0
- apisec_code_bolt-0.1.0.dist-info/METADATA +230 -0
- apisec_code_bolt-0.1.0.dist-info/RECORD +111 -0
- apisec_code_bolt-0.1.0.dist-info/WHEEL +4 -0
- apisec_code_bolt-0.1.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,1158 @@
|
|
|
1
|
+
"""
|
|
2
|
+
C# source-code parser built on tree-sitter.
|
|
3
|
+
|
|
4
|
+
Converts a .cs file into the language-agnostic ``ParsedFile`` dataclass
|
|
5
|
+
consumed by framework plugins. The goal is completeness for the surface
|
|
6
|
+
we care about (ASP.NET Core controllers, middleware, configuration) rather
|
|
7
|
+
than full-fidelity C# semantics.
|
|
8
|
+
|
|
9
|
+
AST terminology (tree-sitter-c-sharp node types used here)
|
|
10
|
+
----------------------------------------------------------
|
|
11
|
+
compilation_unit → top-level container
|
|
12
|
+
using_directive → using System; / using X = Y;
|
|
13
|
+
namespace_declaration → namespace Foo { … }
|
|
14
|
+
file_scoped_namespace_declaration → namespace Foo;
|
|
15
|
+
class_declaration → class / record / struct
|
|
16
|
+
interface_declaration → interface
|
|
17
|
+
method_declaration → instance method
|
|
18
|
+
constructor_declaration → constructor (skipped for route analysis)
|
|
19
|
+
attribute_list → [HttpGet("/")] — zero or more before a member
|
|
20
|
+
parameter → method parameter with optional [From…] attribute
|
|
21
|
+
field_declaration → class-level field
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import logging
|
|
27
|
+
from pathlib import Path
|
|
28
|
+
from typing import ClassVar
|
|
29
|
+
|
|
30
|
+
import tree_sitter_c_sharp as tscsharp
|
|
31
|
+
from tree_sitter import Language, Node, Parser
|
|
32
|
+
|
|
33
|
+
from ...core.types import CodeLocation, QualifiedName
|
|
34
|
+
from ...core.types import Language as Lang
|
|
35
|
+
from ..base import (
|
|
36
|
+
BaseParser,
|
|
37
|
+
ParsedArgument,
|
|
38
|
+
ParsedCallSite,
|
|
39
|
+
ParsedClass,
|
|
40
|
+
ParsedDecorator,
|
|
41
|
+
ParsedField,
|
|
42
|
+
ParsedFile,
|
|
43
|
+
ParsedFunction,
|
|
44
|
+
ParsedImport,
|
|
45
|
+
ParsedParameter,
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
logger = logging.getLogger(__name__)
|
|
49
|
+
|
|
50
|
+
# ---------------------------------------------------------------------------
|
|
51
|
+
# Module-level tree-sitter setup (shared across all parser instances)
|
|
52
|
+
# ---------------------------------------------------------------------------
|
|
53
|
+
|
|
54
|
+
_CS_LANGUAGE: Language = Language(tscsharp.language())
|
|
55
|
+
_PARSER: Parser = Parser(_CS_LANGUAGE)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# ---------------------------------------------------------------------------
|
|
59
|
+
# Helpers
|
|
60
|
+
# ---------------------------------------------------------------------------
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _text(node: Node) -> str:
|
|
64
|
+
"""Return the decoded UTF-8 text of a tree-sitter node."""
|
|
65
|
+
return node.text.decode("utf-8", errors="replace") if node.text else ""
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _loc(node: Node, file_path: Path | None) -> CodeLocation:
|
|
69
|
+
return CodeLocation(
|
|
70
|
+
file=str(file_path) if file_path else "unknown",
|
|
71
|
+
line=node.start_point[0] + 1,
|
|
72
|
+
column=node.start_point[1],
|
|
73
|
+
end_line=node.end_point[0] + 1,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _child_by_type(node: Node, *types: str) -> Node | None:
|
|
78
|
+
"""Return the first direct child whose type is in *types*."""
|
|
79
|
+
for child in node.children:
|
|
80
|
+
if child.type in types:
|
|
81
|
+
return child
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _children_by_type(node: Node, *types: str) -> list[Node]:
|
|
86
|
+
return [c for c in node.children if c.type in types]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _named_children(node: Node) -> list[Node]:
|
|
90
|
+
return [c for c in node.children if c.is_named]
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _identifier_text(node: Node) -> str:
|
|
94
|
+
"""Extract simple identifier text from a node (strips generic type args)."""
|
|
95
|
+
raw = _text(node)
|
|
96
|
+
# Strip generic suffix: "Task<ActionResult<T>>" → "Task"
|
|
97
|
+
return raw.split("<")[0].split("[")[0].strip()
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _extract_base_type_name(node: Node) -> str | None:
|
|
101
|
+
"""Extract the primary base type name from a base_list entry.
|
|
102
|
+
|
|
103
|
+
Handles:
|
|
104
|
+
- ``identifier`` → simple name, returned as-is
|
|
105
|
+
- ``generic_name`` → strips type args: ``IEndpoint<T1,T2>`` → ``IEndpoint``
|
|
106
|
+
- ``qualified_name`` → plain dotted path: ``Microsoft.AspNetCore.Mvc.Controller``
|
|
107
|
+
OR chained-generic fluent builder:
|
|
108
|
+
``EndpointBaseAsync.WithRequest<T>.WithActionResult<T>``
|
|
109
|
+
→ returns the leftmost simple identifier ``EndpointBaseAsync``
|
|
110
|
+
"""
|
|
111
|
+
if node.type == "identifier":
|
|
112
|
+
return _text(node)
|
|
113
|
+
|
|
114
|
+
if node.type == "generic_name":
|
|
115
|
+
id_node = _child_by_type(node, "identifier")
|
|
116
|
+
return _text(id_node) if id_node else _identifier_text(node)
|
|
117
|
+
|
|
118
|
+
if node.type == "qualified_name":
|
|
119
|
+
# If any direct child is a generic_name the whole chain is a fluent
|
|
120
|
+
# builder (e.g. Ardalis EndpointBaseAsync.WithRequest<T>.WithActionResult<T>).
|
|
121
|
+
# In that case return only the leftmost simple identifier (the root class).
|
|
122
|
+
has_generic_child = any(c.type == "generic_name" for c in node.children)
|
|
123
|
+
if has_generic_child:
|
|
124
|
+
for child in node.children:
|
|
125
|
+
if child.type == "identifier":
|
|
126
|
+
return _text(child)
|
|
127
|
+
if child.type == "qualified_name":
|
|
128
|
+
result = _extract_base_type_name(child)
|
|
129
|
+
if result:
|
|
130
|
+
return result
|
|
131
|
+
else:
|
|
132
|
+
# Plain qualified name — preserve the full dotted path
|
|
133
|
+
return _text(node).replace("\n", "").replace(" ", "")
|
|
134
|
+
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _extract_lambda_params(lambda_node: Node) -> list[tuple[str, str]]:
|
|
139
|
+
"""Extract typed parameter pairs from a lambda_expression node.
|
|
140
|
+
|
|
141
|
+
Returns a list of (type_annotation, parameter_name) tuples.
|
|
142
|
+
Ignores untyped parameters (single-identifier lambdas like ``x => x``).
|
|
143
|
+
|
|
144
|
+
C# tree-sitter structure::
|
|
145
|
+
|
|
146
|
+
lambda_expression
|
|
147
|
+
attribute_list? ← [Authorize(...)] etc.
|
|
148
|
+
modifier? ← async
|
|
149
|
+
parameter_list ← (TypeA a, TypeB b)
|
|
150
|
+
parameter ← TypeA a
|
|
151
|
+
<type_node> ← identifier / generic_name / qualified_name
|
|
152
|
+
identifier ← a
|
|
153
|
+
=>
|
|
154
|
+
block | expression
|
|
155
|
+
"""
|
|
156
|
+
params: list[tuple[str, str]] = []
|
|
157
|
+
param_list = _child_by_type(lambda_node, "parameter_list")
|
|
158
|
+
if param_list is None:
|
|
159
|
+
return params
|
|
160
|
+
|
|
161
|
+
for param in param_list.children:
|
|
162
|
+
if param.type != "parameter":
|
|
163
|
+
continue
|
|
164
|
+
# Children: [type_node, name_identifier] (both identifiers means typed param).
|
|
165
|
+
# Exclude attribute_list nodes (e.g. [FromBody], [FromHeader]) — they appear
|
|
166
|
+
# before the type and must not be mistaken for the type annotation.
|
|
167
|
+
id_nodes = [c for c in param.children if c.type not in (",", "(", ")", "attribute_list")]
|
|
168
|
+
if len(id_nodes) < 2:
|
|
169
|
+
continue
|
|
170
|
+
type_node = id_nodes[0]
|
|
171
|
+
name_node = id_nodes[-1]
|
|
172
|
+
# type_node may be identifier, generic_name, qualified_name, array_type, etc.
|
|
173
|
+
type_text = _text(type_node).replace("\n", " ").strip()
|
|
174
|
+
name_text = _text(name_node).strip()
|
|
175
|
+
if type_text and name_text and type_text != name_text:
|
|
176
|
+
params.append((type_text, name_text))
|
|
177
|
+
|
|
178
|
+
return params
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def _extract_lambda_attributes(lambda_node: Node) -> list[str]:
|
|
182
|
+
"""Return attribute names (e.g. 'Authorize', 'AllowAnonymous') from a lambda_expression."""
|
|
183
|
+
names: list[str] = []
|
|
184
|
+
for child in lambda_node.children:
|
|
185
|
+
if child.type == "attribute_list":
|
|
186
|
+
for attr in child.children:
|
|
187
|
+
if attr.type == "attribute":
|
|
188
|
+
id_node = _child_by_type(attr, "identifier", "qualified_name")
|
|
189
|
+
if id_node:
|
|
190
|
+
names.append(_text(id_node).split(".")[-1])
|
|
191
|
+
return names
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def _string_literal_value(node: Node) -> str | None:
|
|
195
|
+
"""
|
|
196
|
+
Extract the string content from a string_literal / interpolated_string_text node.
|
|
197
|
+
|
|
198
|
+
tree-sitter represents "hello" as:
|
|
199
|
+
string_literal
|
|
200
|
+
" (quote)
|
|
201
|
+
string_literal_content → 'hello'
|
|
202
|
+
" (quote)
|
|
203
|
+
|
|
204
|
+
Verbatim strings (@"...") have verbatim_string_literal_content.
|
|
205
|
+
"""
|
|
206
|
+
if node.type in ("string_literal", "verbatim_string_literal"):
|
|
207
|
+
for child in node.children:
|
|
208
|
+
if child.type in (
|
|
209
|
+
"string_literal_content",
|
|
210
|
+
"verbatim_string_literal_content",
|
|
211
|
+
):
|
|
212
|
+
return _text(child)
|
|
213
|
+
# Fallback: strip surrounding quotes
|
|
214
|
+
raw = _text(node)
|
|
215
|
+
if (raw.startswith('"') and raw.endswith('"')) or (raw.startswith("'") and raw.endswith("'")):
|
|
216
|
+
return raw[1:-1]
|
|
217
|
+
if raw.startswith('@"') and raw.endswith('"'):
|
|
218
|
+
return raw[2:-1]
|
|
219
|
+
return None
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
# ---------------------------------------------------------------------------
|
|
223
|
+
# Attribute (C# annotation) extraction
|
|
224
|
+
# ---------------------------------------------------------------------------
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def _extract_attribute(attr_node: Node) -> ParsedDecorator:
|
|
228
|
+
"""
|
|
229
|
+
Parse a single [Name(arg1, key=val2)] attribute node into ParsedDecorator.
|
|
230
|
+
|
|
231
|
+
tree-sitter node shape:
|
|
232
|
+
attribute
|
|
233
|
+
identifier | qualified_name → attribute name
|
|
234
|
+
attribute_argument_list?
|
|
235
|
+
attribute_argument* → positional or named
|
|
236
|
+
"""
|
|
237
|
+
# Name: may be identifier or qualified_name
|
|
238
|
+
name_node = _child_by_type(attr_node, "identifier", "qualified_name")
|
|
239
|
+
name = _identifier_text(name_node) if name_node else _text(attr_node).split("(")[0].strip()
|
|
240
|
+
|
|
241
|
+
positional: list[str] = []
|
|
242
|
+
named: dict[str, str] = {}
|
|
243
|
+
|
|
244
|
+
arg_list = _child_by_type(attr_node, "attribute_argument_list")
|
|
245
|
+
if arg_list:
|
|
246
|
+
for arg in _children_by_type(arg_list, "attribute_argument"):
|
|
247
|
+
# tree-sitter C# flattens named arguments as:
|
|
248
|
+
# attribute_argument
|
|
249
|
+
# identifier (key)
|
|
250
|
+
# =
|
|
251
|
+
# expression (value)
|
|
252
|
+
# There is NO intermediate name_equals wrapper — detect by structure.
|
|
253
|
+
arg_children = [c for c in arg.children if c.type not in (",",)]
|
|
254
|
+
named_detected = (
|
|
255
|
+
len(arg_children) >= 3
|
|
256
|
+
and arg_children[0].type == "identifier"
|
|
257
|
+
and _text(arg_children[1]) == "="
|
|
258
|
+
)
|
|
259
|
+
if named_detected:
|
|
260
|
+
key = _text(arg_children[0])
|
|
261
|
+
val_node = arg_children[2]
|
|
262
|
+
sval = _string_literal_value(val_node)
|
|
263
|
+
named[key] = sval if sval is not None else _text(val_node)
|
|
264
|
+
else:
|
|
265
|
+
# Positional — take the first named value node
|
|
266
|
+
val_nodes = [c for c in arg.children if c.is_named]
|
|
267
|
+
if val_nodes:
|
|
268
|
+
sval = _string_literal_value(val_nodes[0])
|
|
269
|
+
positional.append(sval if sval is not None else _text(val_nodes[0]))
|
|
270
|
+
|
|
271
|
+
return ParsedDecorator(
|
|
272
|
+
name=name,
|
|
273
|
+
positional_args=positional,
|
|
274
|
+
arguments=named,
|
|
275
|
+
raw_arguments=_text(arg_list) if arg_list else None,
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _extract_attributes_before(node: Node) -> list[ParsedDecorator]:
|
|
280
|
+
"""
|
|
281
|
+
Collect all [Attribute] lists that appear before *node* in the parent's children.
|
|
282
|
+
|
|
283
|
+
C# places attribute_list nodes as siblings just before the declaration they
|
|
284
|
+
annotate inside a declaration_list (class body). We walk the parent's
|
|
285
|
+
children in order, collecting attribute_lists until we hit *node* itself.
|
|
286
|
+
"""
|
|
287
|
+
decorators: list[ParsedDecorator] = []
|
|
288
|
+
parent = node.parent
|
|
289
|
+
if parent is None:
|
|
290
|
+
return decorators
|
|
291
|
+
for sibling in parent.children:
|
|
292
|
+
if sibling.id == node.id:
|
|
293
|
+
break
|
|
294
|
+
if sibling.type == "attribute_list":
|
|
295
|
+
for attr in _children_by_type(sibling, "attribute"):
|
|
296
|
+
decorators.append(_extract_attribute(attr))
|
|
297
|
+
return decorators
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def _attributes_on_node(node: Node) -> list[ParsedDecorator]:
|
|
301
|
+
"""
|
|
302
|
+
Collect attribute_list children *inside* a declaration node.
|
|
303
|
+
|
|
304
|
+
method_declaration / class_declaration contain their own attribute_list
|
|
305
|
+
children before the method/class body.
|
|
306
|
+
"""
|
|
307
|
+
decorators: list[ParsedDecorator] = []
|
|
308
|
+
for child in node.children:
|
|
309
|
+
if child.type == "attribute_list":
|
|
310
|
+
for attr in _children_by_type(child, "attribute"):
|
|
311
|
+
decorators.append(_extract_attribute(attr))
|
|
312
|
+
elif child.type in (
|
|
313
|
+
"modifier",
|
|
314
|
+
"predefined_type",
|
|
315
|
+
"identifier",
|
|
316
|
+
"block",
|
|
317
|
+
"declaration_list",
|
|
318
|
+
):
|
|
319
|
+
# Stop collecting attributes once we hit a modifier or body
|
|
320
|
+
break
|
|
321
|
+
return decorators
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
# ---------------------------------------------------------------------------
|
|
325
|
+
# Qualified-name builder
|
|
326
|
+
# ---------------------------------------------------------------------------
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
def _qualified_name(node: Node) -> str:
|
|
330
|
+
"""Recursively flatten qualified_name / member_access_expression to a dotted string.
|
|
331
|
+
|
|
332
|
+
tree-sitter C# represents ``Microsoft.AspNetCore.Mvc`` as a nested tree:
|
|
333
|
+
qualified_name
|
|
334
|
+
qualified_name ← "Microsoft.AspNetCore"
|
|
335
|
+
identifier "Microsoft"
|
|
336
|
+
.
|
|
337
|
+
identifier "AspNetCore"
|
|
338
|
+
.
|
|
339
|
+
identifier "Mvc"
|
|
340
|
+
|
|
341
|
+
We walk all direct children and collect identifier/nested-qualified-name
|
|
342
|
+
segments in order, joining with ".".
|
|
343
|
+
"""
|
|
344
|
+
if node.type in ("identifier", "predefined_type"):
|
|
345
|
+
return _text(node)
|
|
346
|
+
if node.type in ("qualified_name", "member_access_expression"):
|
|
347
|
+
parts: list[str] = []
|
|
348
|
+
for child in node.children:
|
|
349
|
+
if child.type == "identifier":
|
|
350
|
+
parts.append(_text(child))
|
|
351
|
+
elif child.type in ("qualified_name", "member_access_expression"):
|
|
352
|
+
parts.append(_qualified_name(child))
|
|
353
|
+
elif child.type == "generic_name":
|
|
354
|
+
# UseMiddleware<AuthMiddleware> — keep base name, strip type args
|
|
355
|
+
parts.append(_identifier_text(child))
|
|
356
|
+
# skip "." punctuation tokens
|
|
357
|
+
return ".".join(parts)
|
|
358
|
+
return _text(node)
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
# ---------------------------------------------------------------------------
|
|
362
|
+
# Import extraction
|
|
363
|
+
# ---------------------------------------------------------------------------
|
|
364
|
+
|
|
365
|
+
|
|
366
|
+
def _extract_using(node: Node, file_path: Path | None) -> ParsedImport | None:
|
|
367
|
+
"""
|
|
368
|
+
using Microsoft.AspNetCore.Mvc; → module="Microsoft.AspNetCore.Mvc"
|
|
369
|
+
using static System.Math; → module="System.Math"
|
|
370
|
+
using Alias = Some.Long.Type; → module="Some.Long.Type", alias="Alias"
|
|
371
|
+
"""
|
|
372
|
+
# Skip 'using' keyword and semicolons — find the meaningful child
|
|
373
|
+
for child in node.children:
|
|
374
|
+
if child.type in ("qualified_name", "identifier", "alias_qualified_name"):
|
|
375
|
+
# Check for alias (name_equals sibling before the qualified_name)
|
|
376
|
+
alias_node = _child_by_type(node, "name_equals")
|
|
377
|
+
alias = None
|
|
378
|
+
if alias_node:
|
|
379
|
+
id_node = _child_by_type(alias_node, "identifier")
|
|
380
|
+
alias = _text(id_node) if id_node else None
|
|
381
|
+
|
|
382
|
+
module = _qualified_name(child)
|
|
383
|
+
return ParsedImport(
|
|
384
|
+
module=module,
|
|
385
|
+
names=[],
|
|
386
|
+
alias=alias,
|
|
387
|
+
location=_loc(node, file_path),
|
|
388
|
+
)
|
|
389
|
+
return None
|
|
390
|
+
|
|
391
|
+
|
|
392
|
+
# ---------------------------------------------------------------------------
|
|
393
|
+
# Parameter extraction
|
|
394
|
+
# ---------------------------------------------------------------------------
|
|
395
|
+
|
|
396
|
+
_PARAM_ATTRIBUTES: frozenset[str] = frozenset(
|
|
397
|
+
{
|
|
398
|
+
"FromRoute",
|
|
399
|
+
"FromQuery",
|
|
400
|
+
"FromBody",
|
|
401
|
+
"FromHeader",
|
|
402
|
+
"FromForm",
|
|
403
|
+
"FromServices",
|
|
404
|
+
"FromKeyedServices",
|
|
405
|
+
}
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
_CONSTRAINT_ATTRIBUTES: frozenset[str] = frozenset(
|
|
409
|
+
{
|
|
410
|
+
"Required",
|
|
411
|
+
"Range",
|
|
412
|
+
"MinLength",
|
|
413
|
+
"MaxLength",
|
|
414
|
+
"StringLength",
|
|
415
|
+
"RegularExpression",
|
|
416
|
+
"EmailAddress",
|
|
417
|
+
"Url",
|
|
418
|
+
"Phone",
|
|
419
|
+
"Compare",
|
|
420
|
+
"CreditCard",
|
|
421
|
+
}
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
_SECURITY_ATTRIBUTES: frozenset[str] = frozenset(
|
|
425
|
+
{
|
|
426
|
+
"Authorize",
|
|
427
|
+
"AllowAnonymous",
|
|
428
|
+
"RequireScope",
|
|
429
|
+
"Roles",
|
|
430
|
+
}
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
def _extract_parameter(param_node: Node, file_path: Path | None) -> ParsedParameter:
|
|
435
|
+
"""
|
|
436
|
+
Extract a ParsedParameter from a tree-sitter parameter node.
|
|
437
|
+
|
|
438
|
+
parameter
|
|
439
|
+
attribute_list* → [FromRoute], [Required], etc.
|
|
440
|
+
type (predefined_type, identifier, nullable_type, generic_name …)
|
|
441
|
+
identifier → parameter name
|
|
442
|
+
= default_value?
|
|
443
|
+
"""
|
|
444
|
+
metadata: dict[str, object] = {}
|
|
445
|
+
|
|
446
|
+
# Collect parameter-level attributes → metadata keys
|
|
447
|
+
for child in param_node.children:
|
|
448
|
+
if child.type == "attribute_list":
|
|
449
|
+
for attr in _children_by_type(child, "attribute"):
|
|
450
|
+
dec = _extract_attribute(attr)
|
|
451
|
+
val: object = (
|
|
452
|
+
dec.positional_args[0]
|
|
453
|
+
if dec.positional_args
|
|
454
|
+
else (dec.arguments.get("Name", dec.arguments.get("name", True)))
|
|
455
|
+
)
|
|
456
|
+
metadata[dec.name] = val
|
|
457
|
+
# Capture named attribute keys too (e.g. ErrorMessage, Min, Max)
|
|
458
|
+
metadata.update(dec.arguments)
|
|
459
|
+
|
|
460
|
+
# Type annotation: skip attribute_lists and identifier (name), keep the type
|
|
461
|
+
type_ann: str | None = None
|
|
462
|
+
param_name = ""
|
|
463
|
+
|
|
464
|
+
children = [c for c in param_node.children if c.type not in ("attribute_list",)]
|
|
465
|
+
# Walk children: first non-modifier is the type, last identifier is the name
|
|
466
|
+
identifiers: list[Node] = []
|
|
467
|
+
type_node: Node | None = None
|
|
468
|
+
for child in children:
|
|
469
|
+
if child.type in ("modifier",):
|
|
470
|
+
continue
|
|
471
|
+
if child.type == "identifier":
|
|
472
|
+
identifiers.append(child)
|
|
473
|
+
elif child.type not in ("=", ",", "(", ")", "this") and type_node is None:
|
|
474
|
+
type_node = child # first real type node (don't overwrite with default values)
|
|
475
|
+
|
|
476
|
+
if type_node:
|
|
477
|
+
type_ann = _text(type_node).rstrip("?") # strip nullable marker
|
|
478
|
+
if identifiers:
|
|
479
|
+
param_name = _text(identifiers[-1])
|
|
480
|
+
# If there was no explicit type node (e.g. "CreateUserRequest body" — both
|
|
481
|
+
# are identifiers), treat first identifier as type, last as name.
|
|
482
|
+
if type_ann is None and len(identifiers) >= 2:
|
|
483
|
+
type_ann = _text(identifiers[0])
|
|
484
|
+
param_name = _text(identifiers[-1])
|
|
485
|
+
|
|
486
|
+
# Default value
|
|
487
|
+
default_val: str | None = None
|
|
488
|
+
eq_idx = None
|
|
489
|
+
for i, child in enumerate(param_node.children):
|
|
490
|
+
if _text(child) == "=":
|
|
491
|
+
eq_idx = i
|
|
492
|
+
break
|
|
493
|
+
if eq_idx is not None and eq_idx + 1 < len(param_node.children):
|
|
494
|
+
default_val = _text(param_node.children[eq_idx + 1])
|
|
495
|
+
|
|
496
|
+
return ParsedParameter(
|
|
497
|
+
name=param_name,
|
|
498
|
+
type_annotation=type_ann,
|
|
499
|
+
default_value=default_val,
|
|
500
|
+
metadata=metadata,
|
|
501
|
+
location=_loc(param_node, file_path),
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
# ---------------------------------------------------------------------------
|
|
506
|
+
# Argument extraction helpers
|
|
507
|
+
# ---------------------------------------------------------------------------
|
|
508
|
+
|
|
509
|
+
|
|
510
|
+
def _arg_from_node(node: Node, position: int, name: str | None) -> ParsedArgument:
|
|
511
|
+
"""Convert an expression node to a ParsedArgument."""
|
|
512
|
+
t = node.type
|
|
513
|
+
|
|
514
|
+
if t in ("string_literal", "verbatim_string_literal"):
|
|
515
|
+
sval = _string_literal_value(node)
|
|
516
|
+
return ParsedArgument(
|
|
517
|
+
position=position,
|
|
518
|
+
name=name,
|
|
519
|
+
is_literal=True,
|
|
520
|
+
literal_value=sval,
|
|
521
|
+
literal_type="str",
|
|
522
|
+
)
|
|
523
|
+
if t == "integer_literal":
|
|
524
|
+
raw = _text(node).rstrip("lLuU")
|
|
525
|
+
try:
|
|
526
|
+
return ParsedArgument(
|
|
527
|
+
position=position,
|
|
528
|
+
name=name,
|
|
529
|
+
is_literal=True,
|
|
530
|
+
literal_value=int(raw),
|
|
531
|
+
literal_type="int",
|
|
532
|
+
)
|
|
533
|
+
except ValueError:
|
|
534
|
+
pass
|
|
535
|
+
if t == "real_literal":
|
|
536
|
+
raw = _text(node).rstrip("fFdDmM")
|
|
537
|
+
try:
|
|
538
|
+
return ParsedArgument(
|
|
539
|
+
position=position,
|
|
540
|
+
name=name,
|
|
541
|
+
is_literal=True,
|
|
542
|
+
literal_value=float(raw),
|
|
543
|
+
literal_type="float",
|
|
544
|
+
)
|
|
545
|
+
except ValueError:
|
|
546
|
+
pass
|
|
547
|
+
if t in ("boolean_literal", "true", "false"):
|
|
548
|
+
val = _text(node).lower() == "true"
|
|
549
|
+
return ParsedArgument(
|
|
550
|
+
position=position, name=name, is_literal=True, literal_value=val, literal_type="bool"
|
|
551
|
+
)
|
|
552
|
+
if t == "null_literal":
|
|
553
|
+
return ParsedArgument(
|
|
554
|
+
position=position, name=name, is_literal=True, literal_value=None, literal_type="None"
|
|
555
|
+
)
|
|
556
|
+
if t == "identifier":
|
|
557
|
+
return ParsedArgument(
|
|
558
|
+
position=position, name=name, is_variable=True, variable_name=_text(node)
|
|
559
|
+
)
|
|
560
|
+
if t in ("member_access_expression", "qualified_name"):
|
|
561
|
+
return ParsedArgument(
|
|
562
|
+
position=position, name=name, is_variable=True, variable_name=_qualified_name(node)
|
|
563
|
+
)
|
|
564
|
+
if t in ("lambda_expression", "anonymous_method_expression", "parenthesized_lambda_expression"):
|
|
565
|
+
lambda_params = _extract_lambda_params(node)
|
|
566
|
+
lambda_attrs = _extract_lambda_attributes(node)
|
|
567
|
+
return ParsedArgument(
|
|
568
|
+
position=position,
|
|
569
|
+
name=name,
|
|
570
|
+
is_expression=True,
|
|
571
|
+
expression_text="<lambda>",
|
|
572
|
+
lambda_parameter_types=lambda_params,
|
|
573
|
+
lambda_attribute_names=lambda_attrs,
|
|
574
|
+
)
|
|
575
|
+
if t == "interpolated_string_expression":
|
|
576
|
+
return ParsedArgument(
|
|
577
|
+
position=position, name=name, is_string_interpolation=True, expression_text=_text(node)
|
|
578
|
+
)
|
|
579
|
+
return ParsedArgument(
|
|
580
|
+
position=position, name=name, is_expression=True, expression_text=_text(node)
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
def _extract_invocation_args(invocation_node: Node) -> list[ParsedArgument]:
|
|
585
|
+
"""
|
|
586
|
+
Extract arguments from an invocation_expression node.
|
|
587
|
+
|
|
588
|
+
C# argument_list shape:
|
|
589
|
+
argument_list
|
|
590
|
+
( punctuation
|
|
591
|
+
argument*
|
|
592
|
+
name_colon? → named arg key
|
|
593
|
+
expression → value
|
|
594
|
+
, punctuation (between args)
|
|
595
|
+
) punctuation
|
|
596
|
+
"""
|
|
597
|
+
arg_list = _child_by_type(invocation_node, "argument_list")
|
|
598
|
+
if not arg_list:
|
|
599
|
+
return []
|
|
600
|
+
|
|
601
|
+
args: list[ParsedArgument] = []
|
|
602
|
+
for pos, arg in enumerate(_children_by_type(arg_list, "argument")):
|
|
603
|
+
# Named argument: arg → name_colon + expression
|
|
604
|
+
name_colon = _child_by_type(arg, "name_colon")
|
|
605
|
+
kwarg_name: str | None = None
|
|
606
|
+
if name_colon:
|
|
607
|
+
id_node = _child_by_type(name_colon, "identifier")
|
|
608
|
+
kwarg_name = _text(id_node) if id_node else None
|
|
609
|
+
|
|
610
|
+
# Value: first named child that isn't name_colon
|
|
611
|
+
val_node: Node | None = None
|
|
612
|
+
for child in arg.children:
|
|
613
|
+
if child.is_named and child.type != "name_colon":
|
|
614
|
+
val_node = child
|
|
615
|
+
break
|
|
616
|
+
|
|
617
|
+
if val_node is not None:
|
|
618
|
+
args.append(_arg_from_node(val_node, pos, kwarg_name))
|
|
619
|
+
|
|
620
|
+
return args
|
|
621
|
+
|
|
622
|
+
|
|
623
|
+
# ---------------------------------------------------------------------------
|
|
624
|
+
# Call site extraction
|
|
625
|
+
# ---------------------------------------------------------------------------
|
|
626
|
+
|
|
627
|
+
|
|
628
|
+
def _extract_call_sites(body_node: Node, file_path: Path | None) -> list[ParsedCallSite]:
|
|
629
|
+
"""Walk a method body and collect invocation_expression nodes."""
|
|
630
|
+
results: list[ParsedCallSite] = []
|
|
631
|
+
_walk_calls(body_node, results, file_path)
|
|
632
|
+
return results
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def _walk_calls(
|
|
636
|
+
node: Node,
|
|
637
|
+
results: list[ParsedCallSite],
|
|
638
|
+
file_path: Path | None,
|
|
639
|
+
) -> None:
|
|
640
|
+
if node.type == "invocation_expression":
|
|
641
|
+
func_node = node.children[0] if node.children else None
|
|
642
|
+
if func_node:
|
|
643
|
+
callee = _qualified_name(func_node)
|
|
644
|
+
results.append(
|
|
645
|
+
ParsedCallSite(
|
|
646
|
+
callee_name=callee,
|
|
647
|
+
location=_loc(node, file_path),
|
|
648
|
+
arguments=[],
|
|
649
|
+
)
|
|
650
|
+
)
|
|
651
|
+
for child in node.children:
|
|
652
|
+
_walk_calls(child, results, file_path)
|
|
653
|
+
|
|
654
|
+
|
|
655
|
+
# ---------------------------------------------------------------------------
|
|
656
|
+
# Method extraction
|
|
657
|
+
# ---------------------------------------------------------------------------
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
def _extract_method(
|
|
661
|
+
method_node: Node,
|
|
662
|
+
owner_class: str,
|
|
663
|
+
namespace: str,
|
|
664
|
+
file_path: Path | None,
|
|
665
|
+
) -> ParsedFunction:
|
|
666
|
+
"""Convert a method_declaration node into ParsedFunction."""
|
|
667
|
+
decorators = _attributes_on_node(method_node)
|
|
668
|
+
|
|
669
|
+
# Name: the identifier immediately preceding the parameter_list.
|
|
670
|
+
# We cannot simply take the first identifier because for simple return
|
|
671
|
+
# types like "IActionResult" the return type IS an identifier and comes
|
|
672
|
+
# before the method name.
|
|
673
|
+
name = "unknown"
|
|
674
|
+
return_type: str | None = None
|
|
675
|
+
param_list_idx: int | None = None
|
|
676
|
+
for i, child in enumerate(method_node.children):
|
|
677
|
+
if child.type == "parameter_list":
|
|
678
|
+
param_list_idx = i
|
|
679
|
+
break
|
|
680
|
+
if param_list_idx is not None:
|
|
681
|
+
for i in range(param_list_idx - 1, -1, -1):
|
|
682
|
+
child = method_node.children[i]
|
|
683
|
+
if child.type == "identifier":
|
|
684
|
+
name = _text(child)
|
|
685
|
+
break
|
|
686
|
+
# Return type: first non-attribute, non-modifier, non-name child
|
|
687
|
+
for child in method_node.children:
|
|
688
|
+
if child.type in ("attribute_list", "modifier"):
|
|
689
|
+
continue
|
|
690
|
+
if child.type == "identifier" and _text(child) == name:
|
|
691
|
+
break
|
|
692
|
+
if child.type == "parameter_list":
|
|
693
|
+
break
|
|
694
|
+
return_type = _text(child).rstrip("?")
|
|
695
|
+
break
|
|
696
|
+
|
|
697
|
+
# Parameters
|
|
698
|
+
params: list[ParsedParameter] = []
|
|
699
|
+
param_list = _child_by_type(method_node, "parameter_list")
|
|
700
|
+
if param_list:
|
|
701
|
+
for p in _children_by_type(param_list, "parameter"):
|
|
702
|
+
params.append(_extract_parameter(p, file_path))
|
|
703
|
+
|
|
704
|
+
# Async?
|
|
705
|
+
is_async = any(_text(c) == "async" for c in _children_by_type(method_node, "modifier"))
|
|
706
|
+
|
|
707
|
+
# Static?
|
|
708
|
+
is_static = any(_text(c) == "static" for c in _children_by_type(method_node, "modifier"))
|
|
709
|
+
|
|
710
|
+
cls_module = f"{namespace}.{owner_class}" if namespace else owner_class
|
|
711
|
+
qualified = QualifiedName(module=cls_module, name=name)
|
|
712
|
+
|
|
713
|
+
# Body source and call sites
|
|
714
|
+
body = _child_by_type(method_node, "block", "arrow_expression_clause")
|
|
715
|
+
body_text = ""
|
|
716
|
+
body_lines = 0
|
|
717
|
+
if body:
|
|
718
|
+
body_text = _text(body)
|
|
719
|
+
body_lines = body_text.count("\n")
|
|
720
|
+
|
|
721
|
+
return ParsedFunction(
|
|
722
|
+
name=name,
|
|
723
|
+
qualified_name=qualified,
|
|
724
|
+
location=_loc(method_node, file_path),
|
|
725
|
+
parameters=params,
|
|
726
|
+
return_type=return_type,
|
|
727
|
+
decorators=decorators,
|
|
728
|
+
is_async=is_async,
|
|
729
|
+
binding="static" if is_static else "instance",
|
|
730
|
+
owner_type=owner_class,
|
|
731
|
+
body_line_count=body_lines,
|
|
732
|
+
body_source=body_text or None,
|
|
733
|
+
)
|
|
734
|
+
|
|
735
|
+
|
|
736
|
+
# ---------------------------------------------------------------------------
|
|
737
|
+
# Field extraction
|
|
738
|
+
# ---------------------------------------------------------------------------
|
|
739
|
+
|
|
740
|
+
|
|
741
|
+
def _field_access_modifier(node: Node) -> str:
|
|
742
|
+
"""Derive the effective access modifier from a field/property declaration node."""
|
|
743
|
+
mods = {_text(c) for c in _children_by_type(node, "modifier")}
|
|
744
|
+
if "private" in mods:
|
|
745
|
+
return "private"
|
|
746
|
+
if "protected" in mods and "internal" in mods:
|
|
747
|
+
return "protected internal"
|
|
748
|
+
if "protected" in mods:
|
|
749
|
+
return "protected"
|
|
750
|
+
if "internal" in mods:
|
|
751
|
+
return "internal"
|
|
752
|
+
return "public"
|
|
753
|
+
|
|
754
|
+
|
|
755
|
+
def _extract_field(field_node: Node, file_path: Path | None) -> list[ParsedField]:
|
|
756
|
+
"""
|
|
757
|
+
field_declaration
|
|
758
|
+
attribute_list*
|
|
759
|
+
modifier*
|
|
760
|
+
variable_declaration
|
|
761
|
+
type
|
|
762
|
+
variable_declarator+
|
|
763
|
+
identifier
|
|
764
|
+
= initializer?
|
|
765
|
+
"""
|
|
766
|
+
decorators = _attributes_on_node(field_node)
|
|
767
|
+
access = _field_access_modifier(field_node)
|
|
768
|
+
var_decl = _child_by_type(field_node, "variable_declaration")
|
|
769
|
+
if not var_decl:
|
|
770
|
+
return []
|
|
771
|
+
|
|
772
|
+
type_node = var_decl.children[0] if var_decl.children else None
|
|
773
|
+
type_ann = _text(type_node).rstrip("?") if type_node else None
|
|
774
|
+
|
|
775
|
+
fields: list[ParsedField] = []
|
|
776
|
+
for var in _children_by_type(var_decl, "variable_declarator"):
|
|
777
|
+
id_node = _child_by_type(var, "identifier")
|
|
778
|
+
name = _text(id_node) if id_node else "unknown"
|
|
779
|
+
|
|
780
|
+
# Default / initializer value
|
|
781
|
+
default_val: str | None = None
|
|
782
|
+
eq_seen = False
|
|
783
|
+
for child in var.children:
|
|
784
|
+
if _text(child) == "=":
|
|
785
|
+
eq_seen = True
|
|
786
|
+
continue
|
|
787
|
+
if eq_seen and child.is_named:
|
|
788
|
+
sval = _string_literal_value(child)
|
|
789
|
+
default_val = sval if sval is not None else _text(child)
|
|
790
|
+
break
|
|
791
|
+
|
|
792
|
+
fields.append(
|
|
793
|
+
ParsedField(
|
|
794
|
+
name=name,
|
|
795
|
+
type_annotation=type_ann,
|
|
796
|
+
default_value=default_val,
|
|
797
|
+
access_modifier=access,
|
|
798
|
+
decorators=decorators,
|
|
799
|
+
location=_loc(field_node, file_path),
|
|
800
|
+
)
|
|
801
|
+
)
|
|
802
|
+
return fields
|
|
803
|
+
|
|
804
|
+
|
|
805
|
+
# ---------------------------------------------------------------------------
|
|
806
|
+
# Class extraction
|
|
807
|
+
# ---------------------------------------------------------------------------
|
|
808
|
+
|
|
809
|
+
|
|
810
|
+
def _extract_class(
|
|
811
|
+
cls_node: Node,
|
|
812
|
+
namespace: str,
|
|
813
|
+
file_path: Path | None,
|
|
814
|
+
) -> ParsedClass:
|
|
815
|
+
"""Convert a class_declaration (or record/interface) into ParsedClass."""
|
|
816
|
+
decorators = _attributes_on_node(cls_node)
|
|
817
|
+
|
|
818
|
+
name_node = _child_by_type(cls_node, "identifier")
|
|
819
|
+
name = _text(name_node) if name_node else "Unknown"
|
|
820
|
+
qualified = QualifiedName(module=namespace, name=name)
|
|
821
|
+
|
|
822
|
+
# Base types
|
|
823
|
+
base_classes: list[str] = []
|
|
824
|
+
base_list = _child_by_type(cls_node, "base_list")
|
|
825
|
+
if base_list:
|
|
826
|
+
for child in base_list.children:
|
|
827
|
+
base_name = _extract_base_type_name(child)
|
|
828
|
+
if base_name:
|
|
829
|
+
base_classes.append(base_name)
|
|
830
|
+
|
|
831
|
+
is_abstract = any(_text(c) == "abstract" for c in _children_by_type(cls_node, "modifier"))
|
|
832
|
+
|
|
833
|
+
methods: list[ParsedFunction] = []
|
|
834
|
+
fields: list[ParsedField] = []
|
|
835
|
+
|
|
836
|
+
body = _child_by_type(cls_node, "declaration_list")
|
|
837
|
+
if body:
|
|
838
|
+
for child in body.children:
|
|
839
|
+
if child.type == "method_declaration":
|
|
840
|
+
methods.append(_extract_method(child, name, namespace, file_path))
|
|
841
|
+
elif child.type == "field_declaration":
|
|
842
|
+
fields.extend(_extract_field(child, file_path))
|
|
843
|
+
elif child.type == "property_declaration":
|
|
844
|
+
# Treat auto-properties as fields for surface analysis
|
|
845
|
+
fields.extend(_property_as_field(child, file_path))
|
|
846
|
+
|
|
847
|
+
return ParsedClass(
|
|
848
|
+
name=name,
|
|
849
|
+
qualified_name=qualified,
|
|
850
|
+
location=_loc(cls_node, file_path),
|
|
851
|
+
base_classes=base_classes,
|
|
852
|
+
decorators=decorators,
|
|
853
|
+
methods=methods,
|
|
854
|
+
fields=fields,
|
|
855
|
+
is_abstract=is_abstract,
|
|
856
|
+
)
|
|
857
|
+
|
|
858
|
+
|
|
859
|
+
def _property_as_field(prop_node: Node, file_path: Path | None) -> list[ParsedField]:
|
|
860
|
+
"""
|
|
861
|
+
Treat a property_declaration as a field for surface analysis.
|
|
862
|
+
|
|
863
|
+
public string JwtSecret { get; set; } = "...";
|
|
864
|
+
[FromQuery] public int Page { get; set; }
|
|
865
|
+
"""
|
|
866
|
+
decorators = _attributes_on_node(prop_node)
|
|
867
|
+
access = _field_access_modifier(prop_node)
|
|
868
|
+
|
|
869
|
+
# Type is the first non-attribute, non-modifier child
|
|
870
|
+
type_ann: str | None = None
|
|
871
|
+
name = ""
|
|
872
|
+
default_val: str | None = None
|
|
873
|
+
|
|
874
|
+
identifiers: list[Node] = []
|
|
875
|
+
for child in prop_node.children:
|
|
876
|
+
if child.type in ("attribute_list", "modifier", "accessor_list"):
|
|
877
|
+
continue
|
|
878
|
+
if child.type == "identifier":
|
|
879
|
+
if type_ann is None:
|
|
880
|
+
# First identifier is the type for user-defined types (e.g. PriorityLevel)
|
|
881
|
+
type_ann = _text(child)
|
|
882
|
+
else:
|
|
883
|
+
# Subsequent identifiers are the property name
|
|
884
|
+
identifiers.append(child)
|
|
885
|
+
elif type_ann is None and child.type not in ("=", ";"):
|
|
886
|
+
# predefined_type, nullable_type, generic_name, qualified_name, etc.
|
|
887
|
+
type_ann = _text(child).rstrip("?")
|
|
888
|
+
|
|
889
|
+
if identifiers:
|
|
890
|
+
name = _text(identifiers[-1])
|
|
891
|
+
|
|
892
|
+
# Initializer after accessor_list
|
|
893
|
+
eq_seen = False
|
|
894
|
+
for child in prop_node.children:
|
|
895
|
+
if _text(child) == "=":
|
|
896
|
+
eq_seen = True
|
|
897
|
+
continue
|
|
898
|
+
if eq_seen and child.is_named:
|
|
899
|
+
sval = _string_literal_value(child)
|
|
900
|
+
default_val = sval if sval is not None else _text(child)
|
|
901
|
+
break
|
|
902
|
+
|
|
903
|
+
if not name:
|
|
904
|
+
return []
|
|
905
|
+
return [
|
|
906
|
+
ParsedField(
|
|
907
|
+
name=name,
|
|
908
|
+
type_annotation=type_ann,
|
|
909
|
+
default_value=default_val,
|
|
910
|
+
access_modifier=access,
|
|
911
|
+
decorators=decorators,
|
|
912
|
+
location=_loc(prop_node, file_path),
|
|
913
|
+
)
|
|
914
|
+
]
|
|
915
|
+
|
|
916
|
+
|
|
917
|
+
# ---------------------------------------------------------------------------
|
|
918
|
+
# Namespace resolver
|
|
919
|
+
# ---------------------------------------------------------------------------
|
|
920
|
+
|
|
921
|
+
|
|
922
|
+
def _get_namespace(node: Node) -> str:
|
|
923
|
+
"""Walk up the tree to find the enclosing namespace name.
|
|
924
|
+
|
|
925
|
+
Handles both block-scoped (``namespace Foo { }``) and file-scoped
|
|
926
|
+
(``namespace Foo;``) namespace declarations. For file-scoped namespaces
|
|
927
|
+
the class is a direct sibling of the namespace node under
|
|
928
|
+
``compilation_unit``, so we also scan the root's children.
|
|
929
|
+
"""
|
|
930
|
+
current = node.parent
|
|
931
|
+
while current:
|
|
932
|
+
if current.type in (
|
|
933
|
+
"namespace_declaration",
|
|
934
|
+
"file_scoped_namespace_declaration",
|
|
935
|
+
):
|
|
936
|
+
name_node = _child_by_type(current, "qualified_name", "identifier")
|
|
937
|
+
if name_node:
|
|
938
|
+
return _qualified_name(name_node)
|
|
939
|
+
|
|
940
|
+
# File-scoped namespace: class is a sibling under compilation_unit,
|
|
941
|
+
# not a child of the namespace node. Scan the root's children.
|
|
942
|
+
if current.type == "compilation_unit":
|
|
943
|
+
for sibling in current.children:
|
|
944
|
+
if sibling.type == "file_scoped_namespace_declaration":
|
|
945
|
+
name_node = _child_by_type(sibling, "qualified_name", "identifier")
|
|
946
|
+
if name_node:
|
|
947
|
+
return _qualified_name(name_node)
|
|
948
|
+
break # compilation_unit is the root — stop here
|
|
949
|
+
|
|
950
|
+
current = current.parent
|
|
951
|
+
return ""
|
|
952
|
+
|
|
953
|
+
|
|
954
|
+
# ---------------------------------------------------------------------------
|
|
955
|
+
# Top-level file walker
|
|
956
|
+
# ---------------------------------------------------------------------------
|
|
957
|
+
|
|
958
|
+
|
|
959
|
+
def _walk_compilation_unit(
|
|
960
|
+
root: Node,
|
|
961
|
+
file_path: Path | None,
|
|
962
|
+
) -> tuple[list[ParsedImport], list[ParsedClass], list[ParsedCallSite]]:
|
|
963
|
+
imports: list[ParsedImport] = []
|
|
964
|
+
classes: list[ParsedClass] = []
|
|
965
|
+
call_sites: list[ParsedCallSite] = []
|
|
966
|
+
|
|
967
|
+
# Pass 1 — declarations (imports + classes)
|
|
968
|
+
def _walk_declarations(node: Node) -> None:
|
|
969
|
+
if node.type == "using_directive":
|
|
970
|
+
imp = _extract_using(node, file_path)
|
|
971
|
+
if imp:
|
|
972
|
+
imports.append(imp)
|
|
973
|
+
elif node.type in (
|
|
974
|
+
"class_declaration",
|
|
975
|
+
"interface_declaration",
|
|
976
|
+
"record_declaration",
|
|
977
|
+
"struct_declaration",
|
|
978
|
+
):
|
|
979
|
+
ns = _get_namespace(node)
|
|
980
|
+
classes.append(_extract_class(node, ns, file_path))
|
|
981
|
+
return # _extract_class handles nested members; don't double-extract
|
|
982
|
+
for child in node.children:
|
|
983
|
+
_walk_declarations(child)
|
|
984
|
+
|
|
985
|
+
# Pass 2 — call sites (invocations + object creations across the whole tree,
|
|
986
|
+
# including inside class/method bodies)
|
|
987
|
+
def _walk_calls_all(node: Node) -> None:
|
|
988
|
+
if node.type == "invocation_expression":
|
|
989
|
+
func_node = node.children[0] if node.children else None
|
|
990
|
+
if func_node:
|
|
991
|
+
callee = _qualified_name(func_node)
|
|
992
|
+
args = _extract_invocation_args(node)
|
|
993
|
+
|
|
994
|
+
# Detect chaining: if the callee is a member_access_expression
|
|
995
|
+
# whose receiver is itself an invocation, record that receiver's
|
|
996
|
+
# start line so the plugin can correlate auth chainers (e.g.
|
|
997
|
+
# .RequireAuthorization()) with the Map* call they modify.
|
|
998
|
+
receiver_expr: str | None = None
|
|
999
|
+
if func_node.type == "member_access_expression" and func_node.children:
|
|
1000
|
+
recv_node = func_node.children[0]
|
|
1001
|
+
if recv_node.type == "invocation_expression":
|
|
1002
|
+
# Chained on another call — record the inner call's line
|
|
1003
|
+
receiver_expr = f"line:{recv_node.start_point[0] + 1}"
|
|
1004
|
+
elif recv_node.type in (
|
|
1005
|
+
"identifier",
|
|
1006
|
+
"qualified_name",
|
|
1007
|
+
"member_access_expression",
|
|
1008
|
+
):
|
|
1009
|
+
receiver_expr = _qualified_name(recv_node) or None
|
|
1010
|
+
|
|
1011
|
+
# is_method_call: true when receiver is a lowercase-starting
|
|
1012
|
+
# identifier (instance variable / parameter) or "this" / "base".
|
|
1013
|
+
# Static class references (uppercase) stay False so they go
|
|
1014
|
+
# through the qualified-call resolver instead.
|
|
1015
|
+
is_method = False
|
|
1016
|
+
if receiver_expr and receiver_expr not in ("line",):
|
|
1017
|
+
first_char = receiver_expr.lstrip("_")[0:1]
|
|
1018
|
+
is_method = bool(
|
|
1019
|
+
first_char and (first_char.islower() or receiver_expr in ("this", "base"))
|
|
1020
|
+
)
|
|
1021
|
+
|
|
1022
|
+
call_sites.append(
|
|
1023
|
+
ParsedCallSite(
|
|
1024
|
+
callee_name=callee,
|
|
1025
|
+
location=_loc(node, file_path),
|
|
1026
|
+
arguments=args,
|
|
1027
|
+
receiver_expression=receiver_expr,
|
|
1028
|
+
is_method_call=is_method,
|
|
1029
|
+
)
|
|
1030
|
+
)
|
|
1031
|
+
|
|
1032
|
+
elif node.type == "object_creation_expression":
|
|
1033
|
+
# new ClassName(…) — children: [new, type_node, argument_list]
|
|
1034
|
+
# Skip the "new" keyword to find the type identifier.
|
|
1035
|
+
for child in node.children:
|
|
1036
|
+
if child.type in ("identifier", "qualified_name", "generic_name"):
|
|
1037
|
+
call_sites.append(
|
|
1038
|
+
ParsedCallSite(
|
|
1039
|
+
callee_name=_identifier_text(child),
|
|
1040
|
+
location=_loc(node, file_path),
|
|
1041
|
+
arguments=[],
|
|
1042
|
+
)
|
|
1043
|
+
)
|
|
1044
|
+
break
|
|
1045
|
+
|
|
1046
|
+
elif node.type == "member_access_expression":
|
|
1047
|
+
# Capture static-member references (e.g. SecurityAlgorithms.HmacSha256)
|
|
1048
|
+
# as pseudo call-sites for algorithm detection — but ONLY when this
|
|
1049
|
+
# member_access is NOT the callee of an invocation_expression (which
|
|
1050
|
+
# would already have been captured above, causing a duplicate).
|
|
1051
|
+
parent = node.parent
|
|
1052
|
+
is_callee = (
|
|
1053
|
+
parent is not None
|
|
1054
|
+
and parent.type == "invocation_expression"
|
|
1055
|
+
and parent.children
|
|
1056
|
+
and parent.children[0].id == node.id
|
|
1057
|
+
)
|
|
1058
|
+
if not is_callee:
|
|
1059
|
+
full = _qualified_name(node)
|
|
1060
|
+
if full:
|
|
1061
|
+
call_sites.append(
|
|
1062
|
+
ParsedCallSite(
|
|
1063
|
+
callee_name=full,
|
|
1064
|
+
location=_loc(node, file_path),
|
|
1065
|
+
arguments=[],
|
|
1066
|
+
)
|
|
1067
|
+
)
|
|
1068
|
+
|
|
1069
|
+
for child in node.children:
|
|
1070
|
+
_walk_calls_all(child)
|
|
1071
|
+
|
|
1072
|
+
_walk_declarations(root)
|
|
1073
|
+
|
|
1074
|
+
# Build method-range index from the parsed classes so call sites can be
|
|
1075
|
+
# attributed to their enclosing method (caller_function).
|
|
1076
|
+
# Entry: (start_line, end_line, QualifiedName)
|
|
1077
|
+
_method_ranges: list[tuple[int, int, QualifiedName]] = []
|
|
1078
|
+
for _cls in classes:
|
|
1079
|
+
for _m in _cls.methods:
|
|
1080
|
+
_start = _m.location.line
|
|
1081
|
+
_end = _m.location.end_line or _start + 10000
|
|
1082
|
+
_method_ranges.append((_start, _end, _m.qualified_name))
|
|
1083
|
+
# Sort by start line for deterministic lookup
|
|
1084
|
+
_method_ranges.sort(key=lambda t: t[0])
|
|
1085
|
+
|
|
1086
|
+
def _find_caller(line: int) -> QualifiedName | None:
|
|
1087
|
+
"""Return the innermost method that contains *line*."""
|
|
1088
|
+
best: tuple[int, int, QualifiedName] | None = None
|
|
1089
|
+
for start, end, qname in _method_ranges:
|
|
1090
|
+
if start <= line <= end:
|
|
1091
|
+
# Prefer the method with the latest start (innermost scope)
|
|
1092
|
+
if best is None or start > best[0]:
|
|
1093
|
+
best = (start, end, qname)
|
|
1094
|
+
return best[2] if best else None
|
|
1095
|
+
|
|
1096
|
+
_walk_calls_all(root)
|
|
1097
|
+
|
|
1098
|
+
# Annotate call sites with their enclosing method
|
|
1099
|
+
for _cs in call_sites:
|
|
1100
|
+
if _cs.caller_function is None:
|
|
1101
|
+
_cs.caller_function = _find_caller(_cs.location.line)
|
|
1102
|
+
|
|
1103
|
+
return imports, classes, call_sites
|
|
1104
|
+
|
|
1105
|
+
|
|
1106
|
+
# ---------------------------------------------------------------------------
|
|
1107
|
+
# CSharpParser — public interface
|
|
1108
|
+
# ---------------------------------------------------------------------------
|
|
1109
|
+
|
|
1110
|
+
|
|
1111
|
+
class CSharpParser(BaseParser):
|
|
1112
|
+
"""
|
|
1113
|
+
Tree-sitter-based C# parser.
|
|
1114
|
+
|
|
1115
|
+
Produces ``ParsedFile`` instances consumed by framework plugins.
|
|
1116
|
+
Registered automatically with ``ParserRegistry`` at import time.
|
|
1117
|
+
"""
|
|
1118
|
+
|
|
1119
|
+
LANGUAGE: ClassVar[Lang] = Lang.CSHARP
|
|
1120
|
+
SUPPORTED_EXTENSIONS: ClassVar[frozenset[str]] = frozenset({".cs"})
|
|
1121
|
+
|
|
1122
|
+
def parse_file(self, file_path: Path) -> ParsedFile:
|
|
1123
|
+
try:
|
|
1124
|
+
source = file_path.read_bytes()
|
|
1125
|
+
except OSError as exc:
|
|
1126
|
+
raise ValueError(f"Cannot read {file_path}: {exc}") from exc
|
|
1127
|
+
return self._parse(source, file_path)
|
|
1128
|
+
|
|
1129
|
+
def parse_source(self, source: str, file_path: Path | None = None) -> ParsedFile:
|
|
1130
|
+
return self._parse(source.encode("utf-8", errors="replace"), file_path)
|
|
1131
|
+
|
|
1132
|
+
# ------------------------------------------------------------------
|
|
1133
|
+
# Internal
|
|
1134
|
+
# ------------------------------------------------------------------
|
|
1135
|
+
|
|
1136
|
+
@staticmethod
|
|
1137
|
+
def _parse(source_bytes: bytes, file_path: Path | None) -> ParsedFile:
|
|
1138
|
+
tree = _PARSER.parse(source_bytes)
|
|
1139
|
+
imports, classes, call_sites = _walk_compilation_unit(tree.root_node, file_path)
|
|
1140
|
+
return ParsedFile(
|
|
1141
|
+
path=file_path or Path("unknown.cs"),
|
|
1142
|
+
language=Lang.CSHARP,
|
|
1143
|
+
imports=imports,
|
|
1144
|
+
classes=classes,
|
|
1145
|
+
functions=[], # top-level functions are rare in C#; methods live in classes
|
|
1146
|
+
call_sites=call_sites,
|
|
1147
|
+
assignments=[],
|
|
1148
|
+
)
|
|
1149
|
+
|
|
1150
|
+
|
|
1151
|
+
# ---------------------------------------------------------------------------
|
|
1152
|
+
# Auto-registration
|
|
1153
|
+
# ---------------------------------------------------------------------------
|
|
1154
|
+
|
|
1155
|
+
from ..base import ParserRegistry # noqa: E402
|
|
1156
|
+
|
|
1157
|
+
_parser_instance = CSharpParser()
|
|
1158
|
+
ParserRegistry.register(_parser_instance)
|