codespine 0.1.2__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.1.2 → codespine-0.1.3}/PKG-INFO +1 -1
- {codespine-0.1.2 → codespine-0.1.3}/codespine/db/store.py +8 -1
- {codespine-0.1.2 → codespine-0.1.3}/codespine/diff/branch_diff.py +56 -2
- {codespine-0.1.2 → codespine-0.1.3}/codespine/indexer/java_parser.py +80 -20
- {codespine-0.1.2 → codespine-0.1.3}/codespine/search/vector.py +10 -1
- {codespine-0.1.2 → codespine-0.1.3}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-0.1.2 → codespine-0.1.3}/pyproject.toml +1 -1
- {codespine-0.1.2 → codespine-0.1.3}/tests/test_java_parser.py +1 -1
- {codespine-0.1.2 → codespine-0.1.3}/LICENSE +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/README.md +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/__init__.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/analysis/__init__.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/analysis/community.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/analysis/context.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/analysis/coupling.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/analysis/deadcode.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/analysis/flow.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/analysis/impact.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/cli.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/config.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/db/__init__.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/db/schema.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/diff/__init__.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/indexer/__init__.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/indexer/call_resolver.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/indexer/engine.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/mcp/__init__.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/mcp/server.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/noise/__init__.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/noise/blocklist.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/search/__init__.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/search/bm25.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/search/fuzzy.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/search/hybrid.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/search/rrf.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/watch/__init__.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine/watch/watcher.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine.egg-info/SOURCES.txt +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/gindex.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/setup.cfg +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/tests/test_call_resolver.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.1.2 → codespine-0.1.3}/tests/test_search_ranking.py +0 -0
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import hashlib
|
|
4
4
|
import json
|
|
5
5
|
import logging
|
|
6
|
+
import os
|
|
6
7
|
from contextlib import contextmanager
|
|
7
8
|
from dataclasses import dataclass
|
|
8
9
|
from typing import Any
|
|
@@ -20,7 +21,13 @@ class GraphStore:
|
|
|
20
21
|
read_only: bool = False
|
|
21
22
|
|
|
22
23
|
def __post_init__(self) -> None:
|
|
23
|
-
|
|
24
|
+
db_path = SETTINGS.db_path
|
|
25
|
+
try:
|
|
26
|
+
self.db = kuzu.Database(db_path, buffer_pool_size=1024**3)
|
|
27
|
+
except Exception as exc:
|
|
28
|
+
fallback = os.path.join("/tmp", ".codespine_db")
|
|
29
|
+
LOGGER.warning("Primary DB path failed (%s). Falling back to %s", exc, fallback)
|
|
30
|
+
self.db = kuzu.Database(fallback, buffer_pool_size=1024**3)
|
|
24
31
|
self.conn = kuzu.Connection(self.db)
|
|
25
32
|
if not self.read_only:
|
|
26
33
|
ensure_schema(self.conn)
|
|
@@ -21,6 +21,59 @@ def _text(node) -> str:
|
|
|
21
21
|
return node.text.decode("utf-8")
|
|
22
22
|
|
|
23
23
|
|
|
24
|
+
def _captures(query: Query, node) -> list[tuple]:
|
|
25
|
+
if hasattr(query, "captures"):
|
|
26
|
+
return query.captures(node)
|
|
27
|
+
|
|
28
|
+
from tree_sitter import QueryCursor
|
|
29
|
+
|
|
30
|
+
raw = None
|
|
31
|
+
try:
|
|
32
|
+
cursor = QueryCursor(query)
|
|
33
|
+
if hasattr(cursor, "captures"):
|
|
34
|
+
raw = cursor.captures(node)
|
|
35
|
+
except TypeError:
|
|
36
|
+
raw = None
|
|
37
|
+
|
|
38
|
+
if raw is None:
|
|
39
|
+
cursor = QueryCursor()
|
|
40
|
+
for call in (
|
|
41
|
+
lambda: cursor.captures(query, node),
|
|
42
|
+
lambda: cursor.captures(node, query),
|
|
43
|
+
):
|
|
44
|
+
try:
|
|
45
|
+
raw = call()
|
|
46
|
+
break
|
|
47
|
+
except TypeError:
|
|
48
|
+
continue
|
|
49
|
+
if raw is None:
|
|
50
|
+
return []
|
|
51
|
+
if isinstance(raw, dict):
|
|
52
|
+
out: list[tuple] = []
|
|
53
|
+
for tag, nodes in raw.items():
|
|
54
|
+
for n in nodes:
|
|
55
|
+
out.append((n, tag))
|
|
56
|
+
return out
|
|
57
|
+
out: list[tuple] = []
|
|
58
|
+
for item in raw:
|
|
59
|
+
if not isinstance(item, (tuple, list)) or len(item) < 2:
|
|
60
|
+
continue
|
|
61
|
+
n, t = item[0], item[1]
|
|
62
|
+
if isinstance(t, int):
|
|
63
|
+
tag = None
|
|
64
|
+
for attr in ("capture_name_for_id", "capture_name"):
|
|
65
|
+
if hasattr(query, attr):
|
|
66
|
+
try:
|
|
67
|
+
tag = getattr(query, attr)(t)
|
|
68
|
+
break
|
|
69
|
+
except Exception:
|
|
70
|
+
pass
|
|
71
|
+
out.append((n, tag if tag else str(t)))
|
|
72
|
+
else:
|
|
73
|
+
out.append((n, t))
|
|
74
|
+
return out
|
|
75
|
+
|
|
76
|
+
|
|
24
77
|
def _hash_text(text: str) -> str:
|
|
25
78
|
return hashlib.sha1(_normalize_java_snippet(text).encode("utf-8")).hexdigest()
|
|
26
79
|
|
|
@@ -30,6 +83,7 @@ def _normalize_java_snippet(text: str) -> str:
|
|
|
30
83
|
text = re.sub(r"/\*.*?\*/", "", text, flags=re.DOTALL)
|
|
31
84
|
text = re.sub(r"//.*?$", "", text, flags=re.MULTILINE)
|
|
32
85
|
text = re.sub(r"\s+", " ", text).strip()
|
|
86
|
+
text = re.sub(r"\s*([{}();,])\s*", r"\1", text)
|
|
33
87
|
return text
|
|
34
88
|
|
|
35
89
|
|
|
@@ -51,7 +105,7 @@ def _method_hashes(source: bytes) -> dict[str, dict]:
|
|
|
51
105
|
)
|
|
52
106
|
methods: dict[str, dict] = {}
|
|
53
107
|
grouped: dict[object, dict[str, str]] = {}
|
|
54
|
-
for node, tag in method_query
|
|
108
|
+
for node, tag in _captures(method_query, root):
|
|
55
109
|
key_node = node if tag == "decl" else node.parent
|
|
56
110
|
grouped.setdefault(key_node, {})[tag] = _text(node)
|
|
57
111
|
|
|
@@ -80,7 +134,7 @@ def _class_hashes(source: bytes) -> dict[str, str]:
|
|
|
80
134
|
""",
|
|
81
135
|
)
|
|
82
136
|
grouped: dict[object, dict[str, str]] = {}
|
|
83
|
-
for node, tag in class_query
|
|
137
|
+
for node, tag in _captures(class_query, root):
|
|
84
138
|
key_node = node if tag == "decl" else node.parent
|
|
85
139
|
grouped.setdefault(key_node, {})[tag] = _text(node)
|
|
86
140
|
out: dict[str, str] = {}
|
|
@@ -62,6 +62,66 @@ def _text(node) -> str:
|
|
|
62
62
|
return node.text.decode("utf-8")
|
|
63
63
|
|
|
64
64
|
|
|
65
|
+
def _captures(query: Query, node) -> list[tuple]:
|
|
66
|
+
"""Compatibility wrapper for tree-sitter Python bindings."""
|
|
67
|
+
if hasattr(query, "captures"):
|
|
68
|
+
return query.captures(node)
|
|
69
|
+
|
|
70
|
+
from tree_sitter import QueryCursor
|
|
71
|
+
|
|
72
|
+
raw = None
|
|
73
|
+
# API shape A: QueryCursor(query).captures(node)
|
|
74
|
+
try:
|
|
75
|
+
cursor = QueryCursor(query)
|
|
76
|
+
if hasattr(cursor, "captures"):
|
|
77
|
+
raw = cursor.captures(node)
|
|
78
|
+
except TypeError:
|
|
79
|
+
raw = None
|
|
80
|
+
|
|
81
|
+
# API shape B/C: QueryCursor().captures(...)
|
|
82
|
+
if raw is None:
|
|
83
|
+
cursor = QueryCursor()
|
|
84
|
+
for call in (
|
|
85
|
+
lambda: cursor.captures(query, node),
|
|
86
|
+
lambda: cursor.captures(node, query),
|
|
87
|
+
):
|
|
88
|
+
try:
|
|
89
|
+
raw = call()
|
|
90
|
+
break
|
|
91
|
+
except TypeError:
|
|
92
|
+
continue
|
|
93
|
+
|
|
94
|
+
if raw is None:
|
|
95
|
+
return []
|
|
96
|
+
|
|
97
|
+
# Newer bindings may return {capture_name: [nodes...]}
|
|
98
|
+
if isinstance(raw, dict):
|
|
99
|
+
out: list[tuple] = []
|
|
100
|
+
for tag, nodes in raw.items():
|
|
101
|
+
for n in nodes:
|
|
102
|
+
out.append((n, tag))
|
|
103
|
+
return out
|
|
104
|
+
|
|
105
|
+
out: list[tuple] = []
|
|
106
|
+
for item in raw:
|
|
107
|
+
if not isinstance(item, (tuple, list)) or len(item) < 2:
|
|
108
|
+
continue
|
|
109
|
+
n, t = item[0], item[1]
|
|
110
|
+
if isinstance(t, int):
|
|
111
|
+
tag = None
|
|
112
|
+
for attr in ("capture_name_for_id", "capture_name"):
|
|
113
|
+
if hasattr(query, attr):
|
|
114
|
+
try:
|
|
115
|
+
tag = getattr(query, attr)(t)
|
|
116
|
+
break
|
|
117
|
+
except Exception:
|
|
118
|
+
pass
|
|
119
|
+
out.append((n, tag if tag else str(t)))
|
|
120
|
+
else:
|
|
121
|
+
out.append((n, t))
|
|
122
|
+
return out
|
|
123
|
+
|
|
124
|
+
|
|
65
125
|
def _hash_node(node) -> str:
|
|
66
126
|
return hashlib.sha1(_normalize_java_bytes(node.text).encode("utf-8")).hexdigest()
|
|
67
127
|
|
|
@@ -122,7 +182,7 @@ def _extract_local_types(method_node) -> dict[str, str]:
|
|
|
122
182
|
declarator: (variable_declarator name: (identifier) @name))
|
|
123
183
|
""",
|
|
124
184
|
)
|
|
125
|
-
captures = q
|
|
185
|
+
captures = _captures(q, method_node)
|
|
126
186
|
locals_map: dict[str, str] = {}
|
|
127
187
|
current_type = None
|
|
128
188
|
for node, tag in captures:
|
|
@@ -142,7 +202,7 @@ def _extract_field_types(class_node) -> dict[str, str]:
|
|
|
142
202
|
declarator: (variable_declarator name: (identifier) @name))
|
|
143
203
|
""",
|
|
144
204
|
)
|
|
145
|
-
captures = q
|
|
205
|
+
captures = _captures(q, class_node)
|
|
146
206
|
field_map: dict[str, str] = {}
|
|
147
207
|
current_type = None
|
|
148
208
|
for node, tag in captures:
|
|
@@ -156,16 +216,16 @@ def _extract_field_types(class_node) -> dict[str, str]:
|
|
|
156
216
|
def _extract_parameter_types(params_node) -> list[str]:
|
|
157
217
|
if params_node is None:
|
|
158
218
|
return []
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
"""
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
return [
|
|
219
|
+
types: list[str] = []
|
|
220
|
+
for child in params_node.named_children:
|
|
221
|
+
if child.type in {"formal_parameter", "spread_parameter"}:
|
|
222
|
+
tnode = child.child_by_field_name("type")
|
|
223
|
+
types.append(_node_type_name(tnode))
|
|
224
|
+
elif child.type == "receiver_parameter":
|
|
225
|
+
# Keep receiver as pseudo-type to stabilize signature arity
|
|
226
|
+
tnode = child.child_by_field_name("type")
|
|
227
|
+
types.append(_node_type_name(tnode))
|
|
228
|
+
return [t for t in types if t]
|
|
169
229
|
|
|
170
230
|
|
|
171
231
|
def _extract_inheritance(class_node) -> tuple[str | None, list[str]]:
|
|
@@ -189,7 +249,7 @@ def _extract_inheritance(class_node) -> tuple[str | None, list[str]]:
|
|
|
189
249
|
]
|
|
190
250
|
""",
|
|
191
251
|
)
|
|
192
|
-
interfaces = [_node_type_name(n) for n, tag in type_query
|
|
252
|
+
interfaces = [_node_type_name(n) for n, tag in _captures(type_query, iface_node) if tag == "t"]
|
|
193
253
|
|
|
194
254
|
# Fallback for grammar variants where interfaces are not exposed as a field.
|
|
195
255
|
if not interfaces:
|
|
@@ -206,7 +266,7 @@ def _extract_inheritance(class_node) -> tuple[str | None, list[str]]:
|
|
|
206
266
|
]
|
|
207
267
|
""",
|
|
208
268
|
)
|
|
209
|
-
interfaces.extend([_node_type_name(n) for n, tag in type_query
|
|
269
|
+
interfaces.extend([_node_type_name(n) for n, tag in _captures(type_query, child) if tag == "t"])
|
|
210
270
|
|
|
211
271
|
return extends_name, interfaces
|
|
212
272
|
|
|
@@ -229,12 +289,12 @@ def parse_java_source(source: bytes) -> ParsedFile:
|
|
|
229
289
|
package_name = ""
|
|
230
290
|
imports: list[str] = []
|
|
231
291
|
|
|
232
|
-
for node, tag in pkg_query
|
|
292
|
+
for node, tag in _captures(pkg_query, root):
|
|
233
293
|
if tag == "pkg":
|
|
234
294
|
package_name = _text(node)
|
|
235
295
|
break
|
|
236
296
|
|
|
237
|
-
for node, tag in import_query
|
|
297
|
+
for node, tag in _captures(import_query, root):
|
|
238
298
|
if tag == "imp":
|
|
239
299
|
imports.append(_text(node))
|
|
240
300
|
|
|
@@ -267,7 +327,7 @@ def parse_java_source(source: bytes) -> ParsedFile:
|
|
|
267
327
|
""",
|
|
268
328
|
)
|
|
269
329
|
|
|
270
|
-
for node, tag in cls_query
|
|
330
|
+
for node, tag in _captures(cls_query, root):
|
|
271
331
|
if tag != "class_decl":
|
|
272
332
|
continue
|
|
273
333
|
|
|
@@ -292,8 +352,8 @@ def parse_java_source(source: bytes) -> ParsedFile:
|
|
|
292
352
|
body_hash=_hash_node(node),
|
|
293
353
|
)
|
|
294
354
|
|
|
295
|
-
method_nodes = [n for n, t in method_query
|
|
296
|
-
method_nodes.extend([n for n, t in ctor_query
|
|
355
|
+
method_nodes = [n for n, t in _captures(method_query, node) if t == "method_decl"]
|
|
356
|
+
method_nodes.extend([n for n, t in _captures(ctor_query, node) if t == "method_decl"])
|
|
297
357
|
|
|
298
358
|
for m_node in method_nodes:
|
|
299
359
|
m_name_node = m_node.child_by_field_name("name")
|
|
@@ -323,7 +383,7 @@ def parse_java_source(source: bytes) -> ParsedFile:
|
|
|
323
383
|
body_node = m_node.child_by_field_name("body")
|
|
324
384
|
if body_node is not None:
|
|
325
385
|
grouped: dict[object, dict[str, str]] = {}
|
|
326
|
-
for c_node, c_tag in call_query
|
|
386
|
+
for c_node, c_tag in _captures(call_query, body_node):
|
|
327
387
|
inv_node = c_node if c_tag == "call_inv" else c_node.parent
|
|
328
388
|
grouped.setdefault(inv_node, {})[c_tag] = _text(c_node)
|
|
329
389
|
for inv_node, capture_map in grouped.items():
|
|
@@ -35,7 +35,16 @@ def _load_model():
|
|
|
35
35
|
|
|
36
36
|
@lru_cache(maxsize=1)
|
|
37
37
|
def _embedding_cache_conn():
|
|
38
|
-
|
|
38
|
+
path = SETTINGS.embedding_cache_db
|
|
39
|
+
try:
|
|
40
|
+
os_dir = path.rsplit("/", 1)[0] if "/" in path else ""
|
|
41
|
+
if os_dir:
|
|
42
|
+
import os
|
|
43
|
+
|
|
44
|
+
os.makedirs(os_dir, exist_ok=True)
|
|
45
|
+
conn = sqlite3.connect(path)
|
|
46
|
+
except Exception:
|
|
47
|
+
conn = sqlite3.connect("/tmp/.codespine_embedding_cache.sqlite3")
|
|
39
48
|
conn.execute(
|
|
40
49
|
"""
|
|
41
50
|
CREATE TABLE IF NOT EXISTS embedding_cache (
|
|
@@ -25,4 +25,4 @@ def test_parse_java_methods_and_calls():
|
|
|
25
25
|
methods = {m.name: m for m in parsed.classes[0].methods}
|
|
26
26
|
assert "processPayment" in methods
|
|
27
27
|
assert "helper" in methods
|
|
28
|
-
assert "helper" in methods["processPayment"].calls
|
|
28
|
+
assert any(call.name == "helper" for call in methods["processPayment"].calls)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|