ctxgraph-code 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ctxgraph_code/__init__.py +1 -0
- ctxgraph_code/analyzers/treesitter/__init__.py +4 -0
- ctxgraph_code/analyzers/treesitter/analyzer.py +249 -0
- ctxgraph_code/analyzers/treesitter/languages.py +229 -0
- ctxgraph_code/cli.py +1064 -873
- ctxgraph_code/config/hooks.py +165 -0
- ctxgraph_code/config/settings.py +2 -2
- ctxgraph_code/graph/builder.py +59 -11
- {ctxgraph_code-0.3.0.dist-info → ctxgraph_code-0.4.0.dist-info}/METADATA +58 -7
- {ctxgraph_code-0.3.0.dist-info → ctxgraph_code-0.4.0.dist-info}/RECORD +13 -9
- {ctxgraph_code-0.3.0.dist-info → ctxgraph_code-0.4.0.dist-info}/WHEEL +0 -0
- {ctxgraph_code-0.3.0.dist-info → ctxgraph_code-0.4.0.dist-info}/entry_points.txt +0 -0
- {ctxgraph_code-0.3.0.dist-info → ctxgraph_code-0.4.0.dist-info}/top_level.txt +0 -0
ctxgraph_code/__init__.py
CHANGED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from ctxgraph_code.analyzers.treesitter.languages import (
|
|
6
|
+
EXTENSION_LANG,
|
|
7
|
+
LANG_QUERIES,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TSAnalyzerResult:
|
|
12
|
+
nodes: list[dict]
|
|
13
|
+
edges: list[dict]
|
|
14
|
+
|
|
15
|
+
def __init__(self, nodes: Optional[list[dict]] = None, edges: Optional[list[dict]] = None):
|
|
16
|
+
self.nodes = nodes or []
|
|
17
|
+
self.edges = edges or []
|
|
18
|
+
|
|
19
|
+
def to_dict(self) -> dict:
|
|
20
|
+
return {"nodes": self.nodes, "edges": self.edges}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class TSAnalyzer:
|
|
24
|
+
_parser_cache: dict[str, object] = {}
|
|
25
|
+
_lang_cache: dict[str, object] = {}
|
|
26
|
+
|
|
27
|
+
def __init__(self, file_path, root_path):
|
|
28
|
+
self.file_path = file_path
|
|
29
|
+
self.root_path = root_path
|
|
30
|
+
self.rel = str(file_path.relative_to(root_path)).replace("\\", "/")
|
|
31
|
+
self.ext = file_path.suffix.lower()
|
|
32
|
+
self.lang_name = EXTENSION_LANG.get(self.ext, "")
|
|
33
|
+
|
|
34
|
+
def can_handle(self) -> bool:
|
|
35
|
+
return self.lang_name in LANG_QUERIES
|
|
36
|
+
|
|
37
|
+
def analyze(self, source: str) -> TSAnalyzerResult:
|
|
38
|
+
if not self.can_handle():
|
|
39
|
+
return TSAnalyzerResult()
|
|
40
|
+
|
|
41
|
+
import tree_sitter as ts
|
|
42
|
+
from tree_sitter_language_pack import get_language
|
|
43
|
+
|
|
44
|
+
lang = self._get_lang(self.lang_name)
|
|
45
|
+
if not lang:
|
|
46
|
+
return TSAnalyzerResult()
|
|
47
|
+
|
|
48
|
+
parser = self._get_parser(self.lang_name)
|
|
49
|
+
if not parser:
|
|
50
|
+
return TSAnalyzerResult()
|
|
51
|
+
|
|
52
|
+
tree = parser.parse(source.encode("utf-8"))
|
|
53
|
+
if not tree or not tree.root_node:
|
|
54
|
+
return TSAnalyzerResult()
|
|
55
|
+
|
|
56
|
+
queries = LANG_QUERIES[self.lang_name]
|
|
57
|
+
result = TSAnalyzerResult()
|
|
58
|
+
|
|
59
|
+
# File node
|
|
60
|
+
file_id = f"{self.root_path}:{self.rel}"
|
|
61
|
+
result.nodes.append({
|
|
62
|
+
"id": file_id,
|
|
63
|
+
"type": "file",
|
|
64
|
+
"name": self.file_path.name,
|
|
65
|
+
"path": self.rel,
|
|
66
|
+
"parent_id": None,
|
|
67
|
+
"summary": None,
|
|
68
|
+
"importance": 0.5,
|
|
69
|
+
"size_bytes": len(source),
|
|
70
|
+
"lineno": 0,
|
|
71
|
+
})
|
|
72
|
+
|
|
73
|
+
# Extract definitions (functions, classes, structs, etc.)
|
|
74
|
+
self._extract_defs(lang, parser, tree, queries, file_id, result)
|
|
75
|
+
|
|
76
|
+
# Extract imports
|
|
77
|
+
self._extract_imports(lang, tree, queries, file_id, result)
|
|
78
|
+
|
|
79
|
+
# Extract calls
|
|
80
|
+
self._extract_calls(lang, tree, queries, file_id, result)
|
|
81
|
+
|
|
82
|
+
return result
|
|
83
|
+
|
|
84
|
+
def _get_lang(self, name: str):
|
|
85
|
+
if name not in self._lang_cache:
|
|
86
|
+
try:
|
|
87
|
+
from tree_sitter_language_pack import get_language
|
|
88
|
+
self._lang_cache[name] = get_language(name)
|
|
89
|
+
except Exception:
|
|
90
|
+
self._lang_cache[name] = None
|
|
91
|
+
return self._lang_cache[name]
|
|
92
|
+
|
|
93
|
+
def _get_parser(self, name: str):
|
|
94
|
+
if name not in self._parser_cache:
|
|
95
|
+
try:
|
|
96
|
+
import tree_sitter as ts
|
|
97
|
+
from tree_sitter_language_pack import get_parser
|
|
98
|
+
parser = get_parser(name)
|
|
99
|
+
self._parser_cache[name] = parser
|
|
100
|
+
except Exception:
|
|
101
|
+
self._parser_cache[name] = None
|
|
102
|
+
return self._parser_cache[name]
|
|
103
|
+
|
|
104
|
+
def _run_query(self, lang, query_str, node):
|
|
105
|
+
import tree_sitter as ts
|
|
106
|
+
try:
|
|
107
|
+
q = ts.Query(lang, query_str)
|
|
108
|
+
cur = ts.QueryCursor(q)
|
|
109
|
+
return cur.captures(node)
|
|
110
|
+
except Exception:
|
|
111
|
+
return {}
|
|
112
|
+
|
|
113
|
+
def _extract_defs(self, lang, parser, tree, queries, file_id, result):
|
|
114
|
+
defs_seen: set[str] = set()
|
|
115
|
+
lineno_offsets = self._line_offsets(tree.root_node)
|
|
116
|
+
|
|
117
|
+
for tag_name, sym_type in [
|
|
118
|
+
("functions", "function"),
|
|
119
|
+
("methods", "method"),
|
|
120
|
+
("classes", "class"),
|
|
121
|
+
("structs", "struct"),
|
|
122
|
+
("interfaces", "interface"),
|
|
123
|
+
("traits", "trait"),
|
|
124
|
+
("types", "type"),
|
|
125
|
+
("modules", "module"),
|
|
126
|
+
]:
|
|
127
|
+
qs = queries.get(tag_name)
|
|
128
|
+
if not qs:
|
|
129
|
+
continue
|
|
130
|
+
|
|
131
|
+
caps = self._run_query(lang, qs, tree.root_node)
|
|
132
|
+
if not caps:
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
names = caps.get("name", [])
|
|
136
|
+
containers = caps.get(tag_name, [])
|
|
137
|
+
|
|
138
|
+
for i, n in enumerate(names):
|
|
139
|
+
name = n.text.decode("utf-8", errors="replace")
|
|
140
|
+
if name in defs_seen:
|
|
141
|
+
continue
|
|
142
|
+
defs_seen.add(name)
|
|
143
|
+
|
|
144
|
+
lineno = n.start_point[0] + 1 if n.start_point else 0
|
|
145
|
+
node_id = f"{file_id}::{name}"
|
|
146
|
+
|
|
147
|
+
result.nodes.append({
|
|
148
|
+
"id": node_id,
|
|
149
|
+
"type": sym_type,
|
|
150
|
+
"name": name,
|
|
151
|
+
"path": self.rel,
|
|
152
|
+
"parent_id": file_id,
|
|
153
|
+
"summary": None,
|
|
154
|
+
"importance": 0.6 if sym_type in ("class", "struct", "interface") else 0.5,
|
|
155
|
+
"size_bytes": 0,
|
|
156
|
+
"lineno": lineno,
|
|
157
|
+
})
|
|
158
|
+
|
|
159
|
+
result.edges.append({
|
|
160
|
+
"source_id": file_id,
|
|
161
|
+
"target_id": node_id,
|
|
162
|
+
"relation": "defines",
|
|
163
|
+
"weight": 1.0,
|
|
164
|
+
})
|
|
165
|
+
|
|
166
|
+
def _extract_imports(self, lang, tree, queries, file_id, result):
|
|
167
|
+
qs = queries.get("imports")
|
|
168
|
+
if not qs:
|
|
169
|
+
return
|
|
170
|
+
|
|
171
|
+
caps = self._run_query(lang, qs, tree.root_node)
|
|
172
|
+
if not caps:
|
|
173
|
+
return
|
|
174
|
+
|
|
175
|
+
seen: set[str] = set()
|
|
176
|
+
for sources in caps.values():
|
|
177
|
+
for s in sources:
|
|
178
|
+
path = s.text.decode("utf-8", errors="replace").strip("\"'<>")
|
|
179
|
+
if path in seen:
|
|
180
|
+
continue
|
|
181
|
+
seen.add(path)
|
|
182
|
+
|
|
183
|
+
import_id = f"{file_id}:import:{path}"
|
|
184
|
+
result.nodes.append({
|
|
185
|
+
"id": import_id,
|
|
186
|
+
"type": "import",
|
|
187
|
+
"name": path,
|
|
188
|
+
"path": self.rel,
|
|
189
|
+
"parent_id": file_id,
|
|
190
|
+
"summary": None,
|
|
191
|
+
"importance": 0.3,
|
|
192
|
+
"size_bytes": 0,
|
|
193
|
+
"lineno": 0,
|
|
194
|
+
})
|
|
195
|
+
|
|
196
|
+
result.edges.append({
|
|
197
|
+
"source_id": file_id,
|
|
198
|
+
"target_id": import_id,
|
|
199
|
+
"relation": "imports",
|
|
200
|
+
"weight": 1.0,
|
|
201
|
+
})
|
|
202
|
+
|
|
203
|
+
def _extract_calls(self, lang, tree, queries, file_id, result):
|
|
204
|
+
qs = queries.get("calls")
|
|
205
|
+
if not qs:
|
|
206
|
+
return
|
|
207
|
+
|
|
208
|
+
caps = self._run_query(lang, qs, tree.root_node)
|
|
209
|
+
if not caps:
|
|
210
|
+
return
|
|
211
|
+
|
|
212
|
+
call_names = caps.get("call_name", [])
|
|
213
|
+
seen: set[str] = set()
|
|
214
|
+
for c in call_names:
|
|
215
|
+
name = c.text.decode("utf-8", errors="replace")
|
|
216
|
+
if name in seen:
|
|
217
|
+
continue
|
|
218
|
+
seen.add(name)
|
|
219
|
+
|
|
220
|
+
call_id = f"{file_id}:call:{name}"
|
|
221
|
+
result.nodes.append({
|
|
222
|
+
"id": call_id,
|
|
223
|
+
"type": "call",
|
|
224
|
+
"name": name,
|
|
225
|
+
"path": self.rel,
|
|
226
|
+
"parent_id": file_id,
|
|
227
|
+
"summary": None,
|
|
228
|
+
"importance": 0.3,
|
|
229
|
+
"size_bytes": 0,
|
|
230
|
+
"lineno": c.start_point[0] + 1 if c.start_point else 0,
|
|
231
|
+
})
|
|
232
|
+
|
|
233
|
+
result.edges.append({
|
|
234
|
+
"source_id": file_id,
|
|
235
|
+
"target_id": call_id,
|
|
236
|
+
"relation": "calls",
|
|
237
|
+
"weight": 0.7,
|
|
238
|
+
})
|
|
239
|
+
|
|
240
|
+
def _line_offsets(self, root_node):
|
|
241
|
+
offsets = {}
|
|
242
|
+
stack = [root_node]
|
|
243
|
+
while stack:
|
|
244
|
+
node = stack.pop()
|
|
245
|
+
if node.start_point and node.start_point[0] != node.end_point[0]:
|
|
246
|
+
offsets[node.start_point[0]] = node.start_point[1]
|
|
247
|
+
for c in node.children:
|
|
248
|
+
stack.append(c)
|
|
249
|
+
return offsets
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
EXTENSION_LANG: dict[str, str] = {
|
|
4
|
+
".c": "c",
|
|
5
|
+
".h": "c",
|
|
6
|
+
".cpp": "cpp",
|
|
7
|
+
".hpp": "cpp",
|
|
8
|
+
".cc": "cpp",
|
|
9
|
+
".cxx": "cpp",
|
|
10
|
+
".hh": "cpp",
|
|
11
|
+
".hxx": "cpp",
|
|
12
|
+
".js": "javascript",
|
|
13
|
+
".jsx": "javascript",
|
|
14
|
+
".ts": "typescript",
|
|
15
|
+
".tsx": "typescript",
|
|
16
|
+
".go": "go",
|
|
17
|
+
".rs": "rust",
|
|
18
|
+
".java": "java",
|
|
19
|
+
".rb": "ruby",
|
|
20
|
+
".kt": "kotlin",
|
|
21
|
+
".kts": "kotlin",
|
|
22
|
+
".swift": "swift",
|
|
23
|
+
".scala": "scala",
|
|
24
|
+
".pl": "perl",
|
|
25
|
+
".pm": "perl",
|
|
26
|
+
".lua": "lua",
|
|
27
|
+
".groovy": "groovy",
|
|
28
|
+
".gradle": "groovy",
|
|
29
|
+
".ex": "elixir",
|
|
30
|
+
".exs": "elixir",
|
|
31
|
+
".cs": "c_sharp",
|
|
32
|
+
".zig": "zig",
|
|
33
|
+
".jl": "julia",
|
|
34
|
+
".php": "php",
|
|
35
|
+
".phtml": "php",
|
|
36
|
+
".bash": "bash",
|
|
37
|
+
".sh": "bash",
|
|
38
|
+
".zsh": "bash",
|
|
39
|
+
".ps1": "powershell",
|
|
40
|
+
".psm1": "powershell",
|
|
41
|
+
".json": "json",
|
|
42
|
+
".yaml": "yaml",
|
|
43
|
+
".yml": "yaml",
|
|
44
|
+
".vue": "embedded_template",
|
|
45
|
+
".erb": "embedded_template",
|
|
46
|
+
".ejs": "embedded_template",
|
|
47
|
+
".f": "fortran",
|
|
48
|
+
".f90": "fortran",
|
|
49
|
+
".f95": "fortran",
|
|
50
|
+
".m": "objc",
|
|
51
|
+
".mm": "objc",
|
|
52
|
+
".v": "verilog",
|
|
53
|
+
".vh": "verilog",
|
|
54
|
+
".sv": "verilog",
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
LANG_QUERIES: dict[str, dict[str, str]] = {
|
|
58
|
+
# ── C ──────────────────────────────────────────────────────────
|
|
59
|
+
"c": {
|
|
60
|
+
"imports": r"""
|
|
61
|
+
(preproc_include path: (string_literal) @path)
|
|
62
|
+
(preproc_include path: (system_lib_string) @lib)
|
|
63
|
+
""",
|
|
64
|
+
"functions": r"""
|
|
65
|
+
(function_definition
|
|
66
|
+
declarator: (function_declarator declarator: (identifier) @name)
|
|
67
|
+
) @func
|
|
68
|
+
""",
|
|
69
|
+
"structs": r"""
|
|
70
|
+
(struct_specifier name: (type_identifier) @name) @struct
|
|
71
|
+
""",
|
|
72
|
+
"calls": r"""
|
|
73
|
+
(call_expression function: (identifier) @call_name)
|
|
74
|
+
""",
|
|
75
|
+
},
|
|
76
|
+
# ── C++ ────────────────────────────────────────────────────────
|
|
77
|
+
"cpp": {
|
|
78
|
+
"imports": r"""
|
|
79
|
+
(preproc_include path: (string_literal) @path)
|
|
80
|
+
(preproc_include path: (system_lib_string) @lib)
|
|
81
|
+
""",
|
|
82
|
+
"functions": r"""
|
|
83
|
+
(function_definition
|
|
84
|
+
declarator: (function_declarator declarator: (identifier) @name)
|
|
85
|
+
) @func
|
|
86
|
+
""",
|
|
87
|
+
"structs": r"""
|
|
88
|
+
(struct_specifier name: (type_identifier) @name) @struct
|
|
89
|
+
""",
|
|
90
|
+
"classes": r"""
|
|
91
|
+
(class_specifier name: (type_identifier) @name) @class
|
|
92
|
+
""",
|
|
93
|
+
"calls": r"""
|
|
94
|
+
(call_expression function: (identifier) @call_name)
|
|
95
|
+
""",
|
|
96
|
+
},
|
|
97
|
+
# ── JavaScript ─────────────────────────────────────────────────
|
|
98
|
+
"javascript": {
|
|
99
|
+
"imports": r"""
|
|
100
|
+
(import_statement source: (string) @source)
|
|
101
|
+
""",
|
|
102
|
+
"functions": r"""
|
|
103
|
+
(function_declaration name: (identifier) @name) @func
|
|
104
|
+
""",
|
|
105
|
+
"classes": r"""
|
|
106
|
+
(class_declaration name: (identifier) @name) @class
|
|
107
|
+
""",
|
|
108
|
+
"calls": r"""
|
|
109
|
+
(call_expression function: (identifier) @call_name)
|
|
110
|
+
""",
|
|
111
|
+
},
|
|
112
|
+
# ── TypeScript ─────────────────────────────────────────────────
|
|
113
|
+
"typescript": {
|
|
114
|
+
"imports": r"""
|
|
115
|
+
(import_statement source: (string) @source)
|
|
116
|
+
""",
|
|
117
|
+
"functions": r"""
|
|
118
|
+
(function_declaration name: (identifier) @name) @func
|
|
119
|
+
""",
|
|
120
|
+
"classes": r"""
|
|
121
|
+
(class_declaration name: (type_identifier) @name) @class
|
|
122
|
+
""",
|
|
123
|
+
"interfaces": r"""
|
|
124
|
+
(interface_declaration name: (type_identifier) @name) @interface
|
|
125
|
+
""",
|
|
126
|
+
"calls": r"""
|
|
127
|
+
(call_expression function: (identifier) @call_name)
|
|
128
|
+
""",
|
|
129
|
+
},
|
|
130
|
+
# ── Go ─────────────────────────────────────────────────────────
|
|
131
|
+
"go": {
|
|
132
|
+
"imports": r"""
|
|
133
|
+
(import_declaration (import_spec) @import_path)
|
|
134
|
+
""",
|
|
135
|
+
"functions": r"""
|
|
136
|
+
(function_declaration name: (identifier) @name) @func
|
|
137
|
+
""",
|
|
138
|
+
"types": r"""
|
|
139
|
+
(type_declaration (type_spec name: (type_identifier) @name)) @type
|
|
140
|
+
""",
|
|
141
|
+
"calls": r"""
|
|
142
|
+
(call_expression function: (identifier) @call_name)
|
|
143
|
+
""",
|
|
144
|
+
},
|
|
145
|
+
# ── Rust ───────────────────────────────────────────────────────
|
|
146
|
+
"rust": {
|
|
147
|
+
"imports": r"""
|
|
148
|
+
(use_declaration (scoped_identifier) @path)
|
|
149
|
+
""",
|
|
150
|
+
"functions": r"""
|
|
151
|
+
(function_item name: (identifier) @name) @func
|
|
152
|
+
""",
|
|
153
|
+
"structs": r"""
|
|
154
|
+
(struct_item name: (type_identifier) @name) @struct
|
|
155
|
+
""",
|
|
156
|
+
"traits": r"""
|
|
157
|
+
(trait_item name: (type_identifier) @name) @trait
|
|
158
|
+
""",
|
|
159
|
+
"calls": r"""
|
|
160
|
+
(call_expression function: (identifier) @call_name)
|
|
161
|
+
""",
|
|
162
|
+
},
|
|
163
|
+
# ── Java ───────────────────────────────────────────────────────
|
|
164
|
+
"java": {
|
|
165
|
+
"imports": r"""
|
|
166
|
+
(import_declaration scoped_identifier: (scoped_identifier) @path)
|
|
167
|
+
""",
|
|
168
|
+
"classes": r"""
|
|
169
|
+
(class_declaration name: (identifier) @name) @class
|
|
170
|
+
""",
|
|
171
|
+
"interfaces": r"""
|
|
172
|
+
(interface_declaration name: (identifier) @name) @interface
|
|
173
|
+
""",
|
|
174
|
+
"methods": r"""
|
|
175
|
+
(method_declaration name: (identifier) @name) @method
|
|
176
|
+
""",
|
|
177
|
+
"calls": r"""
|
|
178
|
+
(method_invocation name: (identifier) @call_name)
|
|
179
|
+
""",
|
|
180
|
+
},
|
|
181
|
+
# ── Ruby ───────────────────────────────────────────────────────
|
|
182
|
+
"ruby": {
|
|
183
|
+
"imports": r"""
|
|
184
|
+
(call method: (identifier "require") arguments: (argument_list (string) @path))
|
|
185
|
+
""",
|
|
186
|
+
"functions": r"""
|
|
187
|
+
(method name: (identifier) @name) @method
|
|
188
|
+
""",
|
|
189
|
+
"classes": r"""
|
|
190
|
+
(class name: (constant) @name) @class
|
|
191
|
+
""",
|
|
192
|
+
"modules": r"""
|
|
193
|
+
(module name: (constant) @name) @module
|
|
194
|
+
""",
|
|
195
|
+
"calls": r"""
|
|
196
|
+
(call method: (identifier) @call_name)
|
|
197
|
+
""",
|
|
198
|
+
},
|
|
199
|
+
# ── Python (via tree-sitter, for non-ast fallback) ─────────────
|
|
200
|
+
"python": {
|
|
201
|
+
"functions": r"""
|
|
202
|
+
(function_definition name: (identifier) @name) @func
|
|
203
|
+
""",
|
|
204
|
+
"classes": r"""
|
|
205
|
+
(class_definition name: (identifier) @name) @class
|
|
206
|
+
""",
|
|
207
|
+
"calls": r"""
|
|
208
|
+
(call_expression function: (identifier) @call_name)
|
|
209
|
+
""",
|
|
210
|
+
},
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
# Languages that support function calls extraction
|
|
214
|
+
CALL_SUPPORT: set[str] = {
|
|
215
|
+
"c", "cpp", "javascript", "typescript", "go", "rust",
|
|
216
|
+
"java", "ruby", "python",
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
# Languages that support class/type extraction
|
|
220
|
+
TYPE_SUPPORT: set[str] = {
|
|
221
|
+
"c", "cpp", "javascript", "typescript", "go", "rust",
|
|
222
|
+
"java", "ruby", "python",
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
# Languages that support import extraction
|
|
226
|
+
IMPORT_SUPPORT: set[str] = {
|
|
227
|
+
"c", "cpp", "javascript", "typescript", "go", "rust",
|
|
228
|
+
"java", "ruby",
|
|
229
|
+
}
|