ctxgraph-code 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ctxgraph_code/__init__.py CHANGED
@@ -0,0 +1 @@
1
+ 
@@ -0,0 +1,4 @@
1
+ from __future__ import annotations
2
+
3
+ from .languages import EXTENSION_LANG, LANG_QUERIES # noqa: F401
4
+ from .analyzer import TSAnalyzer, TSAnalyzerResult # noqa: F401
@@ -0,0 +1,249 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional
4
+
5
+ from ctxgraph_code.analyzers.treesitter.languages import (
6
+ EXTENSION_LANG,
7
+ LANG_QUERIES,
8
+ )
9
+
10
+
11
+ class TSAnalyzerResult:
12
+ nodes: list[dict]
13
+ edges: list[dict]
14
+
15
+ def __init__(self, nodes: Optional[list[dict]] = None, edges: Optional[list[dict]] = None):
16
+ self.nodes = nodes or []
17
+ self.edges = edges or []
18
+
19
+ def to_dict(self) -> dict:
20
+ return {"nodes": self.nodes, "edges": self.edges}
21
+
22
+
23
+ class TSAnalyzer:
24
+ _parser_cache: dict[str, object] = {}
25
+ _lang_cache: dict[str, object] = {}
26
+
27
+ def __init__(self, file_path, root_path):
28
+ self.file_path = file_path
29
+ self.root_path = root_path
30
+ self.rel = str(file_path.relative_to(root_path)).replace("\\", "/")
31
+ self.ext = file_path.suffix.lower()
32
+ self.lang_name = EXTENSION_LANG.get(self.ext, "")
33
+
34
+ def can_handle(self) -> bool:
35
+ return self.lang_name in LANG_QUERIES
36
+
37
+ def analyze(self, source: str) -> TSAnalyzerResult:
38
+ if not self.can_handle():
39
+ return TSAnalyzerResult()
40
+
41
+ import tree_sitter as ts
42
+ from tree_sitter_language_pack import get_language
43
+
44
+ lang = self._get_lang(self.lang_name)
45
+ if not lang:
46
+ return TSAnalyzerResult()
47
+
48
+ parser = self._get_parser(self.lang_name)
49
+ if not parser:
50
+ return TSAnalyzerResult()
51
+
52
+ tree = parser.parse(source.encode("utf-8"))
53
+ if not tree or not tree.root_node:
54
+ return TSAnalyzerResult()
55
+
56
+ queries = LANG_QUERIES[self.lang_name]
57
+ result = TSAnalyzerResult()
58
+
59
+ # File node
60
+ file_id = f"{self.root_path}:{self.rel}"
61
+ result.nodes.append({
62
+ "id": file_id,
63
+ "type": "file",
64
+ "name": self.file_path.name,
65
+ "path": self.rel,
66
+ "parent_id": None,
67
+ "summary": None,
68
+ "importance": 0.5,
69
+ "size_bytes": len(source),
70
+ "lineno": 0,
71
+ })
72
+
73
+ # Extract definitions (functions, classes, structs, etc.)
74
+ self._extract_defs(lang, parser, tree, queries, file_id, result)
75
+
76
+ # Extract imports
77
+ self._extract_imports(lang, tree, queries, file_id, result)
78
+
79
+ # Extract calls
80
+ self._extract_calls(lang, tree, queries, file_id, result)
81
+
82
+ return result
83
+
84
+ def _get_lang(self, name: str):
85
+ if name not in self._lang_cache:
86
+ try:
87
+ from tree_sitter_language_pack import get_language
88
+ self._lang_cache[name] = get_language(name)
89
+ except Exception:
90
+ self._lang_cache[name] = None
91
+ return self._lang_cache[name]
92
+
93
+ def _get_parser(self, name: str):
94
+ if name not in self._parser_cache:
95
+ try:
96
+ import tree_sitter as ts
97
+ from tree_sitter_language_pack import get_parser
98
+ parser = get_parser(name)
99
+ self._parser_cache[name] = parser
100
+ except Exception:
101
+ self._parser_cache[name] = None
102
+ return self._parser_cache[name]
103
+
104
+ def _run_query(self, lang, query_str, node):
105
+ import tree_sitter as ts
106
+ try:
107
+ q = ts.Query(lang, query_str)
108
+ cur = ts.QueryCursor(q)
109
+ return cur.captures(node)
110
+ except Exception:
111
+ return {}
112
+
113
+ def _extract_defs(self, lang, parser, tree, queries, file_id, result):
114
+ defs_seen: set[str] = set()
115
+ lineno_offsets = self._line_offsets(tree.root_node)
116
+
117
+ for tag_name, sym_type in [
118
+ ("functions", "function"),
119
+ ("methods", "method"),
120
+ ("classes", "class"),
121
+ ("structs", "struct"),
122
+ ("interfaces", "interface"),
123
+ ("traits", "trait"),
124
+ ("types", "type"),
125
+ ("modules", "module"),
126
+ ]:
127
+ qs = queries.get(tag_name)
128
+ if not qs:
129
+ continue
130
+
131
+ caps = self._run_query(lang, qs, tree.root_node)
132
+ if not caps:
133
+ continue
134
+
135
+ names = caps.get("name", [])
136
+ containers = caps.get(tag_name, [])
137
+
138
+ for i, n in enumerate(names):
139
+ name = n.text.decode("utf-8", errors="replace")
140
+ if name in defs_seen:
141
+ continue
142
+ defs_seen.add(name)
143
+
144
+ lineno = n.start_point[0] + 1 if n.start_point else 0
145
+ node_id = f"{file_id}::{name}"
146
+
147
+ result.nodes.append({
148
+ "id": node_id,
149
+ "type": sym_type,
150
+ "name": name,
151
+ "path": self.rel,
152
+ "parent_id": file_id,
153
+ "summary": None,
154
+ "importance": 0.6 if sym_type in ("class", "struct", "interface") else 0.5,
155
+ "size_bytes": 0,
156
+ "lineno": lineno,
157
+ })
158
+
159
+ result.edges.append({
160
+ "source_id": file_id,
161
+ "target_id": node_id,
162
+ "relation": "defines",
163
+ "weight": 1.0,
164
+ })
165
+
166
+ def _extract_imports(self, lang, tree, queries, file_id, result):
167
+ qs = queries.get("imports")
168
+ if not qs:
169
+ return
170
+
171
+ caps = self._run_query(lang, qs, tree.root_node)
172
+ if not caps:
173
+ return
174
+
175
+ seen: set[str] = set()
176
+ for sources in caps.values():
177
+ for s in sources:
178
+ path = s.text.decode("utf-8", errors="replace").strip("\"'<>")
179
+ if path in seen:
180
+ continue
181
+ seen.add(path)
182
+
183
+ import_id = f"{file_id}:import:{path}"
184
+ result.nodes.append({
185
+ "id": import_id,
186
+ "type": "import",
187
+ "name": path,
188
+ "path": self.rel,
189
+ "parent_id": file_id,
190
+ "summary": None,
191
+ "importance": 0.3,
192
+ "size_bytes": 0,
193
+ "lineno": 0,
194
+ })
195
+
196
+ result.edges.append({
197
+ "source_id": file_id,
198
+ "target_id": import_id,
199
+ "relation": "imports",
200
+ "weight": 1.0,
201
+ })
202
+
203
+ def _extract_calls(self, lang, tree, queries, file_id, result):
204
+ qs = queries.get("calls")
205
+ if not qs:
206
+ return
207
+
208
+ caps = self._run_query(lang, qs, tree.root_node)
209
+ if not caps:
210
+ return
211
+
212
+ call_names = caps.get("call_name", [])
213
+ seen: set[str] = set()
214
+ for c in call_names:
215
+ name = c.text.decode("utf-8", errors="replace")
216
+ if name in seen:
217
+ continue
218
+ seen.add(name)
219
+
220
+ call_id = f"{file_id}:call:{name}"
221
+ result.nodes.append({
222
+ "id": call_id,
223
+ "type": "call",
224
+ "name": name,
225
+ "path": self.rel,
226
+ "parent_id": file_id,
227
+ "summary": None,
228
+ "importance": 0.3,
229
+ "size_bytes": 0,
230
+ "lineno": c.start_point[0] + 1 if c.start_point else 0,
231
+ })
232
+
233
+ result.edges.append({
234
+ "source_id": file_id,
235
+ "target_id": call_id,
236
+ "relation": "calls",
237
+ "weight": 0.7,
238
+ })
239
+
240
+ def _line_offsets(self, root_node):
241
+ offsets = {}
242
+ stack = [root_node]
243
+ while stack:
244
+ node = stack.pop()
245
+ if node.start_point and node.start_point[0] != node.end_point[0]:
246
+ offsets[node.start_point[0]] = node.start_point[1]
247
+ for c in node.children:
248
+ stack.append(c)
249
+ return offsets
@@ -0,0 +1,229 @@
1
+ from __future__ import annotations
2
+
3
+ EXTENSION_LANG: dict[str, str] = {
4
+ ".c": "c",
5
+ ".h": "c",
6
+ ".cpp": "cpp",
7
+ ".hpp": "cpp",
8
+ ".cc": "cpp",
9
+ ".cxx": "cpp",
10
+ ".hh": "cpp",
11
+ ".hxx": "cpp",
12
+ ".js": "javascript",
13
+ ".jsx": "javascript",
14
+ ".ts": "typescript",
15
+ ".tsx": "typescript",
16
+ ".go": "go",
17
+ ".rs": "rust",
18
+ ".java": "java",
19
+ ".rb": "ruby",
20
+ ".kt": "kotlin",
21
+ ".kts": "kotlin",
22
+ ".swift": "swift",
23
+ ".scala": "scala",
24
+ ".pl": "perl",
25
+ ".pm": "perl",
26
+ ".lua": "lua",
27
+ ".groovy": "groovy",
28
+ ".gradle": "groovy",
29
+ ".ex": "elixir",
30
+ ".exs": "elixir",
31
+ ".cs": "c_sharp",
32
+ ".zig": "zig",
33
+ ".jl": "julia",
34
+ ".php": "php",
35
+ ".phtml": "php",
36
+ ".bash": "bash",
37
+ ".sh": "bash",
38
+ ".zsh": "bash",
39
+ ".ps1": "powershell",
40
+ ".psm1": "powershell",
41
+ ".json": "json",
42
+ ".yaml": "yaml",
43
+ ".yml": "yaml",
44
+ ".vue": "embedded_template",
45
+ ".erb": "embedded_template",
46
+ ".ejs": "embedded_template",
47
+ ".f": "fortran",
48
+ ".f90": "fortran",
49
+ ".f95": "fortran",
50
+ ".m": "objc",
51
+ ".mm": "objc",
52
+ ".v": "verilog",
53
+ ".vh": "verilog",
54
+ ".sv": "verilog",
55
+ }
56
+
57
+ LANG_QUERIES: dict[str, dict[str, str]] = {
58
+ # ── C ──────────────────────────────────────────────────────────
59
+ "c": {
60
+ "imports": r"""
61
+ (preproc_include path: (string_literal) @path)
62
+ (preproc_include path: (system_lib_string) @lib)
63
+ """,
64
+ "functions": r"""
65
+ (function_definition
66
+ declarator: (function_declarator declarator: (identifier) @name)
67
+ ) @func
68
+ """,
69
+ "structs": r"""
70
+ (struct_specifier name: (type_identifier) @name) @struct
71
+ """,
72
+ "calls": r"""
73
+ (call_expression function: (identifier) @call_name)
74
+ """,
75
+ },
76
+ # ── C++ ────────────────────────────────────────────────────────
77
+ "cpp": {
78
+ "imports": r"""
79
+ (preproc_include path: (string_literal) @path)
80
+ (preproc_include path: (system_lib_string) @lib)
81
+ """,
82
+ "functions": r"""
83
+ (function_definition
84
+ declarator: (function_declarator declarator: (identifier) @name)
85
+ ) @func
86
+ """,
87
+ "structs": r"""
88
+ (struct_specifier name: (type_identifier) @name) @struct
89
+ """,
90
+ "classes": r"""
91
+ (class_specifier name: (type_identifier) @name) @class
92
+ """,
93
+ "calls": r"""
94
+ (call_expression function: (identifier) @call_name)
95
+ """,
96
+ },
97
+ # ── JavaScript ─────────────────────────────────────────────────
98
+ "javascript": {
99
+ "imports": r"""
100
+ (import_statement source: (string) @source)
101
+ """,
102
+ "functions": r"""
103
+ (function_declaration name: (identifier) @name) @func
104
+ """,
105
+ "classes": r"""
106
+ (class_declaration name: (identifier) @name) @class
107
+ """,
108
+ "calls": r"""
109
+ (call_expression function: (identifier) @call_name)
110
+ """,
111
+ },
112
+ # ── TypeScript ─────────────────────────────────────────────────
113
+ "typescript": {
114
+ "imports": r"""
115
+ (import_statement source: (string) @source)
116
+ """,
117
+ "functions": r"""
118
+ (function_declaration name: (identifier) @name) @func
119
+ """,
120
+ "classes": r"""
121
+ (class_declaration name: (type_identifier) @name) @class
122
+ """,
123
+ "interfaces": r"""
124
+ (interface_declaration name: (type_identifier) @name) @interface
125
+ """,
126
+ "calls": r"""
127
+ (call_expression function: (identifier) @call_name)
128
+ """,
129
+ },
130
+ # ── Go ─────────────────────────────────────────────────────────
131
+ "go": {
132
+ "imports": r"""
133
+ (import_declaration (import_spec) @import_path)
134
+ """,
135
+ "functions": r"""
136
+ (function_declaration name: (identifier) @name) @func
137
+ """,
138
+ "types": r"""
139
+ (type_declaration (type_spec name: (type_identifier) @name)) @type
140
+ """,
141
+ "calls": r"""
142
+ (call_expression function: (identifier) @call_name)
143
+ """,
144
+ },
145
+ # ── Rust ───────────────────────────────────────────────────────
146
+ "rust": {
147
+ "imports": r"""
148
+ (use_declaration (scoped_identifier) @path)
149
+ """,
150
+ "functions": r"""
151
+ (function_item name: (identifier) @name) @func
152
+ """,
153
+ "structs": r"""
154
+ (struct_item name: (type_identifier) @name) @struct
155
+ """,
156
+ "traits": r"""
157
+ (trait_item name: (type_identifier) @name) @trait
158
+ """,
159
+ "calls": r"""
160
+ (call_expression function: (identifier) @call_name)
161
+ """,
162
+ },
163
+ # ── Java ───────────────────────────────────────────────────────
164
+ "java": {
165
+ "imports": r"""
166
+ (import_declaration scoped_identifier: (scoped_identifier) @path)
167
+ """,
168
+ "classes": r"""
169
+ (class_declaration name: (identifier) @name) @class
170
+ """,
171
+ "interfaces": r"""
172
+ (interface_declaration name: (identifier) @name) @interface
173
+ """,
174
+ "methods": r"""
175
+ (method_declaration name: (identifier) @name) @method
176
+ """,
177
+ "calls": r"""
178
+ (method_invocation name: (identifier) @call_name)
179
+ """,
180
+ },
181
+ # ── Ruby ───────────────────────────────────────────────────────
182
+ "ruby": {
183
+ "imports": r"""
184
+ (call method: (identifier "require") arguments: (argument_list (string) @path))
185
+ """,
186
+ "functions": r"""
187
+ (method name: (identifier) @name) @method
188
+ """,
189
+ "classes": r"""
190
+ (class name: (constant) @name) @class
191
+ """,
192
+ "modules": r"""
193
+ (module name: (constant) @name) @module
194
+ """,
195
+ "calls": r"""
196
+ (call method: (identifier) @call_name)
197
+ """,
198
+ },
199
+ # ── Python (via tree-sitter, for non-ast fallback) ─────────────
200
+ "python": {
201
+ "functions": r"""
202
+ (function_definition name: (identifier) @name) @func
203
+ """,
204
+ "classes": r"""
205
+ (class_definition name: (identifier) @name) @class
206
+ """,
207
+ "calls": r"""
208
+ (call_expression function: (identifier) @call_name)
209
+ """,
210
+ },
211
+ }
212
+
213
+ # Languages that support function calls extraction
214
+ CALL_SUPPORT: set[str] = {
215
+ "c", "cpp", "javascript", "typescript", "go", "rust",
216
+ "java", "ruby", "python",
217
+ }
218
+
219
+ # Languages that support class/type extraction
220
+ TYPE_SUPPORT: set[str] = {
221
+ "c", "cpp", "javascript", "typescript", "go", "rust",
222
+ "java", "ruby", "python",
223
+ }
224
+
225
+ # Languages that support import extraction
226
+ IMPORT_SUPPORT: set[str] = {
227
+ "c", "cpp", "javascript", "typescript", "go", "rust",
228
+ "java", "ruby",
229
+ }