codegraph-ai 0.2.0__tar.gz → 0.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/PKG-INFO +1 -1
  2. codegraph_ai-0.2.2/codegraph/adapters/java_adapter.py +555 -0
  3. codegraph_ai-0.2.2/codegraph/adapters/python_adapter.py +692 -0
  4. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph/cli.py +2 -1
  5. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph/core.py +107 -1
  6. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph/models.py +17 -0
  7. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph_ai.egg-info/PKG-INFO +1 -1
  8. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph_ai.egg-info/SOURCES.txt +2 -0
  9. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/pyproject.toml +1 -1
  10. codegraph_ai-0.2.2/tests/test_java_adapter.py +341 -0
  11. codegraph_ai-0.2.0/codegraph/adapters/python_adapter.py +0 -337
  12. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/README.md +0 -0
  13. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph/__init__.py +0 -0
  14. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph/__main__.py +0 -0
  15. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph/adapters/__init__.py +0 -0
  16. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph/adapters/base.py +0 -0
  17. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph/adapters/c_adapter.py +0 -0
  18. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph/adapters/js_adapter.py +0 -0
  19. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph/analyzer.py +0 -0
  20. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph/bug_locator.py +0 -0
  21. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph/bug_parser.py +0 -0
  22. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph/github_client.py +0 -0
  23. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph/issue_cache.py +0 -0
  24. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph/issue_fetcher.py +0 -0
  25. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph/mcp_server.py +0 -0
  26. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph/qa.py +0 -0
  27. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph_ai.egg-info/dependency_links.txt +0 -0
  28. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph_ai.egg-info/entry_points.txt +0 -0
  29. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph_ai.egg-info/requires.txt +0 -0
  30. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/codegraph_ai.egg-info/top_level.txt +0 -0
  31. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/setup.cfg +0 -0
  32. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/tests/test_adapters.py +0 -0
  33. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/tests/test_advanced.py +0 -0
  34. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/tests/test_bug_locator.py +0 -0
  35. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/tests/test_bug_parser.py +0 -0
  36. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/tests/test_core_schema.py +0 -0
  37. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/tests/test_cross_locate.py +0 -0
  38. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/tests/test_impact.py +0 -0
  39. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/tests/test_incremental.py +0 -0
  40. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/tests/test_indexing.py +0 -0
  41. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/tests/test_integration.py +0 -0
  42. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/tests/test_issue_cache.py +0 -0
  43. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/tests/test_js_adapter.py +0 -0
  44. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/tests/test_models.py +0 -0
  45. {codegraph_ai-0.2.0 → codegraph_ai-0.2.2}/tests/test_similar.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codegraph-ai
3
- Version: 0.2.0
3
+ Version: 0.2.2
4
4
  Summary: Hybrid graph + vector code intelligence powered by NeuG and zvec
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: neug
@@ -0,0 +1,555 @@
1
+ """Java source code adapter using tree-sitter.
2
+
3
+ Handles ``.java`` files.
4
+ Extracts:
5
+ - Class, interface, and enum definitions with inheritance
6
+ - Method and constructor definitions with full generic signatures
7
+ - Method invocations with receiver context
8
+ - Import statements (single, wildcard, static)
9
+ - Annotations on classes and methods
10
+ - JavaDoc comments
11
+ - Inner classes (prefixed with outer class name)
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from tree_sitter_language_pack import get_parser
17
+
18
+ from codegraph.adapters.base import BaseAdapter
19
+ from codegraph.models import (
20
+ CallInfo,
21
+ ParsedClass,
22
+ ParsedFunction,
23
+ ParsedImport,
24
+ ParseResult,
25
+ )
26
+
27
+
28
+ def _node_text(node) -> str:
29
+ """Return the UTF-8 text of a tree-sitter node."""
30
+ return node.text.decode("utf-8") if node and node.text else ""
31
+
32
+
33
+ def _extract_javadoc(node) -> str:
34
+ """Extract a preceding JavaDoc block comment (``/** ... */``).
35
+
36
+ Walks backward through siblings to find a block_comment starting
37
+ with ``/**``. Strips delimiters and leading ``*``s.
38
+ """
39
+ prev = node.prev_sibling
40
+ if prev is None or prev.type != "block_comment":
41
+ return ""
42
+ text = _node_text(prev)
43
+ if not text.startswith("/**"):
44
+ return ""
45
+ text = text[3:]
46
+ if text.endswith("*/"):
47
+ text = text[:-2]
48
+ lines = text.splitlines()
49
+ cleaned: list[str] = []
50
+ for line in lines:
51
+ line = line.strip()
52
+ if line.startswith("*"):
53
+ line = line[1:].strip()
54
+ if line.startswith("@"):
55
+ break
56
+ if line:
57
+ cleaned.append(line)
58
+ return " ".join(cleaned)
59
+
60
+
61
+ def _extract_annotations(node) -> list[str]:
62
+ """Return annotation names from a node's ``modifiers`` child."""
63
+ annotations: list[str] = []
64
+ modifiers = None
65
+ for child in node.children:
66
+ if child.type == "modifiers":
67
+ modifiers = child
68
+ break
69
+ if modifiers is None:
70
+ return annotations
71
+ for child in modifiers.children:
72
+ if child.type in ("marker_annotation", "annotation"):
73
+ name_node = child.child_by_field_name("name")
74
+ if name_node:
75
+ annotations.append(_node_text(name_node))
76
+ return annotations
77
+
78
+
79
+ def _extract_modifiers(node) -> list[str]:
80
+ """Return modifier keywords (public, static, etc.) from a node."""
81
+ mods: list[str] = []
82
+ for child in node.children:
83
+ if child.type == "modifiers":
84
+ for mc in child.children:
85
+ if mc.type in (
86
+ "public", "private", "protected", "static",
87
+ "final", "abstract", "synchronized", "native",
88
+ "default", "transient", "volatile", "strictfp",
89
+ ):
90
+ mods.append(_node_text(mc))
91
+ return mods
92
+
93
+
94
+ def _build_method_signature(method_node) -> str:
95
+ """Build a human-readable signature from a method_declaration node.
96
+
97
+ Example output: ``public static <T> List<T> sort(List<T> items)``
98
+ """
99
+ parts: list[str] = []
100
+
101
+ mods = _extract_modifiers(method_node)
102
+ if mods:
103
+ parts.append(" ".join(mods))
104
+
105
+ tp = method_node.child_by_field_name("type_parameters")
106
+ if tp:
107
+ parts.append(_node_text(tp))
108
+
109
+ ret = method_node.child_by_field_name("type")
110
+ if ret:
111
+ parts.append(_node_text(ret))
112
+
113
+ name_node = method_node.child_by_field_name("name")
114
+ name = _node_text(name_node) if name_node else "?"
115
+ params = method_node.child_by_field_name("parameters")
116
+ params_text = _node_text(params) if params else "()"
117
+ parts.append(f"{name}{params_text}")
118
+
119
+ return " ".join(parts)
120
+
121
+
122
+ def _build_constructor_signature(ctor_node) -> str:
123
+ """Build a human-readable signature from a constructor_declaration."""
124
+ parts: list[str] = []
125
+ mods = _extract_modifiers(ctor_node)
126
+ if mods:
127
+ parts.append(" ".join(mods))
128
+ name_node = ctor_node.child_by_field_name("name")
129
+ name = _node_text(name_node) if name_node else "?"
130
+ params = ctor_node.child_by_field_name("parameters")
131
+ params_text = _node_text(params) if params else "()"
132
+ parts.append(f"{name}{params_text}")
133
+ return " ".join(parts)
134
+
135
+
136
+ def _collect_calls(node, calls: list[CallInfo]) -> None:
137
+ """Recursively collect method calls from a Java AST subtree."""
138
+ if node.type == "method_invocation":
139
+ name_node = node.child_by_field_name("name")
140
+ obj_node = node.child_by_field_name("object")
141
+ callee = _node_text(name_node) if name_node else ""
142
+ receiver = None
143
+ if obj_node:
144
+ receiver = _node_text(obj_node)
145
+ if "." in receiver:
146
+ receiver = receiver.rsplit(".", 1)[-1]
147
+ if callee:
148
+ calls.append(CallInfo(
149
+ callee_name=callee,
150
+ receiver=receiver,
151
+ raw_expression=_node_text(node).split("(")[0],
152
+ ))
153
+ elif node.type == "object_creation_expression":
154
+ type_node = node.child_by_field_name("type")
155
+ if type_node:
156
+ type_text = _node_text(type_node)
157
+ if "<" in type_text:
158
+ type_text = type_text[:type_text.index("<")]
159
+ calls.append(CallInfo(
160
+ callee_name=f"{type_text}.<init>",
161
+ receiver=None,
162
+ raw_expression=_node_text(node).split("(")[0],
163
+ ))
164
+
165
+ for child in node.children:
166
+ _collect_calls(child, calls)
167
+
168
+
169
+ class JavaAdapter(BaseAdapter):
170
+ """Extract classes, methods, calls and imports from Java files."""
171
+
172
+ def __init__(self) -> None:
173
+ self._parser = get_parser("java")
174
+
175
+ def language_name(self) -> str:
176
+ return "java"
177
+
178
+ def supported_extensions(self) -> list[str]:
179
+ return [".java"]
180
+
181
+ def parse_file(self, source: bytes, file_path: str) -> ParseResult:
182
+ tree = self._parser.parse(source)
183
+ root = tree.root_node
184
+
185
+ functions: list[ParsedFunction] = []
186
+ classes: list[ParsedClass] = []
187
+ imports: list[ParsedImport] = []
188
+ package = ""
189
+
190
+ for child in root.children:
191
+ if child.type == "package_declaration":
192
+ package = self._extract_package(child)
193
+
194
+ self._walk(root, file_path, package, functions, classes, imports,
195
+ outer_class=None)
196
+ return ParseResult(functions=functions, classes=classes, imports=imports)
197
+
198
+ # -- top-level walk -------------------------------------------------------
199
+
200
+ def _walk(
201
+ self,
202
+ node,
203
+ file_path: str,
204
+ package: str,
205
+ functions: list[ParsedFunction],
206
+ classes: list[ParsedClass],
207
+ imports: list[ParsedImport],
208
+ outer_class: str | None,
209
+ ) -> None:
210
+ for child in node.children:
211
+ if child.type == "class_declaration":
212
+ self._extract_class(
213
+ child, file_path, package, functions, classes, imports,
214
+ outer_class,
215
+ )
216
+ elif child.type == "interface_declaration":
217
+ self._extract_interface(
218
+ child, file_path, package, functions, classes, imports,
219
+ outer_class,
220
+ )
221
+ elif child.type == "enum_declaration":
222
+ self._extract_enum(
223
+ child, file_path, package, functions, classes, imports,
224
+ outer_class,
225
+ )
226
+ elif child.type == "import_declaration":
227
+ self._extract_import(child, file_path, imports)
228
+
229
+ # -- package --------------------------------------------------------------
230
+
231
+ @staticmethod
232
+ def _extract_package(node) -> str:
233
+ for child in node.children:
234
+ if child.type == "scoped_identifier" or child.type == "identifier":
235
+ return _node_text(child)
236
+ return ""
237
+
238
+ # -- classes / interfaces / enums -----------------------------------------
239
+
240
+ def _extract_class(
241
+ self,
242
+ node,
243
+ file_path: str,
244
+ package: str,
245
+ functions: list[ParsedFunction],
246
+ classes: list[ParsedClass],
247
+ imports: list[ParsedImport],
248
+ outer_class: str | None,
249
+ ) -> None:
250
+ name_node = node.child_by_field_name("name")
251
+ cls_name = _node_text(name_node) if name_node else "Unknown"
252
+ display_name = f"{outer_class}.{cls_name}" if outer_class else cls_name
253
+
254
+ start_line = node.start_point[0] + 1
255
+ end_line = node.end_point[0] + 1
256
+ qualified = f"{file_path}:{display_name}"
257
+
258
+ base_classes: list[str] = []
259
+ superclass_node = node.child_by_field_name("superclass")
260
+ if superclass_node:
261
+ for child in superclass_node.children:
262
+ if child.is_named:
263
+ text = _node_text(child)
264
+ if "<" in text:
265
+ text = text[:text.index("<")]
266
+ base_classes.append(text)
267
+
268
+ interfaces_node = node.child_by_field_name("interfaces")
269
+ if interfaces_node:
270
+ for child in interfaces_node.children:
271
+ if child.type == "type_list":
272
+ for tc in child.children:
273
+ if tc.is_named:
274
+ text = _node_text(tc)
275
+ if "<" in text:
276
+ text = text[:text.index("<")]
277
+ base_classes.append(text)
278
+
279
+ method_names: list[str] = []
280
+ body = node.child_by_field_name("body")
281
+ if body:
282
+ self._process_class_body(
283
+ body, file_path, package, display_name, functions, classes,
284
+ imports, method_names,
285
+ )
286
+
287
+ classes.append(
288
+ ParsedClass(
289
+ name=display_name,
290
+ qualified_name=qualified,
291
+ file_path=file_path,
292
+ start_line=start_line,
293
+ end_line=end_line,
294
+ method_names=method_names,
295
+ base_classes=base_classes,
296
+ )
297
+ )
298
+
299
+ def _extract_interface(
300
+ self,
301
+ node,
302
+ file_path: str,
303
+ package: str,
304
+ functions: list[ParsedFunction],
305
+ classes: list[ParsedClass],
306
+ imports: list[ParsedImport],
307
+ outer_class: str | None,
308
+ ) -> None:
309
+ name_node = node.child_by_field_name("name")
310
+ iface_name = _node_text(name_node) if name_node else "Unknown"
311
+ display_name = f"{outer_class}.{iface_name}" if outer_class else iface_name
312
+
313
+ start_line = node.start_point[0] + 1
314
+ end_line = node.end_point[0] + 1
315
+ qualified = f"{file_path}:{display_name}"
316
+
317
+ base_classes: list[str] = []
318
+ for child in node.children:
319
+ if child.type == "extends_interfaces":
320
+ for tc in child.children:
321
+ if tc.type == "type_list":
322
+ for item in tc.children:
323
+ if item.is_named:
324
+ text = _node_text(item)
325
+ if "<" in text:
326
+ text = text[:text.index("<")]
327
+ base_classes.append(text)
328
+
329
+ method_names: list[str] = []
330
+ body = node.child_by_field_name("body")
331
+ if body:
332
+ self._process_class_body(
333
+ body, file_path, package, display_name, functions, classes,
334
+ imports, method_names,
335
+ )
336
+
337
+ classes.append(
338
+ ParsedClass(
339
+ name=display_name,
340
+ qualified_name=qualified,
341
+ file_path=file_path,
342
+ start_line=start_line,
343
+ end_line=end_line,
344
+ method_names=method_names,
345
+ base_classes=base_classes,
346
+ )
347
+ )
348
+
349
+ def _extract_enum(
350
+ self,
351
+ node,
352
+ file_path: str,
353
+ package: str,
354
+ functions: list[ParsedFunction],
355
+ classes: list[ParsedClass],
356
+ imports: list[ParsedImport],
357
+ outer_class: str | None,
358
+ ) -> None:
359
+ name_node = node.child_by_field_name("name")
360
+ enum_name = _node_text(name_node) if name_node else "Unknown"
361
+ display_name = f"{outer_class}.{enum_name}" if outer_class else enum_name
362
+
363
+ start_line = node.start_point[0] + 1
364
+ end_line = node.end_point[0] + 1
365
+ qualified = f"{file_path}:{display_name}"
366
+
367
+ base_classes: list[str] = []
368
+ interfaces_node = node.child_by_field_name("interfaces")
369
+ if interfaces_node:
370
+ for child in interfaces_node.children:
371
+ if child.type == "type_list":
372
+ for tc in child.children:
373
+ if tc.is_named:
374
+ text = _node_text(tc)
375
+ if "<" in text:
376
+ text = text[:text.index("<")]
377
+ base_classes.append(text)
378
+
379
+ method_names: list[str] = []
380
+ body = node.child_by_field_name("body")
381
+ if body:
382
+ for child in body.children:
383
+ if child.type == "enum_body_declarations":
384
+ self._process_class_body(
385
+ child, file_path, package, display_name, functions,
386
+ classes, imports, method_names,
387
+ )
388
+
389
+ classes.append(
390
+ ParsedClass(
391
+ name=display_name,
392
+ qualified_name=qualified,
393
+ file_path=file_path,
394
+ start_line=start_line,
395
+ end_line=end_line,
396
+ method_names=method_names,
397
+ base_classes=base_classes,
398
+ )
399
+ )
400
+
401
+ def _process_class_body(
402
+ self,
403
+ body_node,
404
+ file_path: str,
405
+ package: str,
406
+ class_name: str,
407
+ functions: list[ParsedFunction],
408
+ classes: list[ParsedClass],
409
+ imports: list[ParsedImport],
410
+ method_names: list[str],
411
+ ) -> None:
412
+ """Process children of a class/interface/enum body."""
413
+ for child in body_node.children:
414
+ if child.type == "method_declaration":
415
+ name = self._extract_method(
416
+ child, file_path, functions, class_name,
417
+ )
418
+ if name:
419
+ method_names.append(name)
420
+ elif child.type == "constructor_declaration":
421
+ name = self._extract_constructor(
422
+ child, file_path, functions, class_name,
423
+ )
424
+ if name:
425
+ method_names.append(name)
426
+ elif child.type == "class_declaration":
427
+ self._extract_class(
428
+ child, file_path, package, functions, classes, imports,
429
+ outer_class=class_name,
430
+ )
431
+ elif child.type == "interface_declaration":
432
+ self._extract_interface(
433
+ child, file_path, package, functions, classes, imports,
434
+ outer_class=class_name,
435
+ )
436
+ elif child.type == "enum_declaration":
437
+ self._extract_enum(
438
+ child, file_path, package, functions, classes, imports,
439
+ outer_class=class_name,
440
+ )
441
+
442
+ # -- methods / constructors -----------------------------------------------
443
+
444
+ def _extract_method(
445
+ self,
446
+ node,
447
+ file_path: str,
448
+ functions: list[ParsedFunction],
449
+ class_name: str,
450
+ ) -> str | None:
451
+ name_node = node.child_by_field_name("name")
452
+ name = _node_text(name_node) if name_node else "unknown"
453
+ start_line = node.start_point[0] + 1
454
+ end_line = node.end_point[0] + 1
455
+
456
+ qualified = f"{file_path}:{class_name}.{name}"
457
+ sig = _build_method_signature(node)
458
+ doc = _extract_javadoc(node)
459
+
460
+ body = node.child_by_field_name("body")
461
+ calls: list[CallInfo] = []
462
+ if body:
463
+ _collect_calls(body, calls)
464
+
465
+ functions.append(
466
+ ParsedFunction(
467
+ name=name,
468
+ qualified_name=qualified,
469
+ signature=sig,
470
+ file_path=file_path,
471
+ start_line=start_line,
472
+ end_line=end_line,
473
+ doc_comment=doc,
474
+ call_names=[c.callee_name for c in calls],
475
+ calls=calls,
476
+ class_name=class_name,
477
+ )
478
+ )
479
+ return name
480
+
481
+ def _extract_constructor(
482
+ self,
483
+ node,
484
+ file_path: str,
485
+ functions: list[ParsedFunction],
486
+ class_name: str,
487
+ ) -> str | None:
488
+ name = "<init>"
489
+ start_line = node.start_point[0] + 1
490
+ end_line = node.end_point[0] + 1
491
+
492
+ qualified = f"{file_path}:{class_name}.<init>"
493
+ sig = _build_constructor_signature(node)
494
+ doc = _extract_javadoc(node)
495
+
496
+ body = node.child_by_field_name("body")
497
+ calls: list[CallInfo] = []
498
+ if body:
499
+ _collect_calls(body, calls)
500
+
501
+ functions.append(
502
+ ParsedFunction(
503
+ name=name,
504
+ qualified_name=qualified,
505
+ signature=sig,
506
+ file_path=file_path,
507
+ start_line=start_line,
508
+ end_line=end_line,
509
+ doc_comment=doc,
510
+ call_names=[c.callee_name for c in calls],
511
+ calls=calls,
512
+ class_name=class_name,
513
+ )
514
+ )
515
+ return name
516
+
517
+ # -- imports --------------------------------------------------------------
518
+
519
+ def _extract_import(
520
+ self,
521
+ node,
522
+ file_path: str,
523
+ imports: list[ParsedImport],
524
+ ) -> None:
525
+ is_static = False
526
+ is_wildcard = False
527
+ fqn_parts: list[str] = []
528
+
529
+ for child in node.children:
530
+ if child.type == "static":
531
+ is_static = True
532
+ elif child.type == "asterisk":
533
+ is_wildcard = True
534
+ elif child.type == "scoped_identifier" or child.type == "identifier":
535
+ fqn_parts.append(_node_text(child))
536
+
537
+ if not fqn_parts:
538
+ return
539
+
540
+ target_module = fqn_parts[0]
541
+
542
+ if is_wildcard:
543
+ imported_names = ["*"]
544
+ else:
545
+ simple_name = target_module.rsplit(".", 1)[-1] if "." in target_module else target_module
546
+ imported_names = [simple_name]
547
+
548
+ imports.append(
549
+ ParsedImport(
550
+ source_path=file_path,
551
+ target_module=target_module,
552
+ imported_names=imported_names,
553
+ is_relative=False,
554
+ )
555
+ )