codespine 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codespine/cli.py ADDED
@@ -0,0 +1,424 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ import signal
7
+ import subprocess
8
+ import sys
9
+ import time
10
+
11
+ import click
12
+ import psutil
13
+
14
+ from codespine.analysis.community import detect_communities, symbol_community
15
+ from codespine.analysis.context import build_symbol_context
16
+ from codespine.analysis.coupling import compute_coupling, get_coupling
17
+ from codespine.analysis.deadcode import detect_dead_code
18
+ from codespine.analysis.flow import trace_execution_flows
19
+ from codespine.analysis.impact import analyze_impact
20
+ from codespine.config import SETTINGS
21
+ from codespine.db.store import GraphStore
22
+ from codespine.diff.branch_diff import compare_branches
23
+ from codespine.indexer.engine import JavaIndexer
24
+ from codespine.mcp.server import build_mcp_server
25
+ from codespine.search.hybrid import hybrid_search
26
+ from codespine.watch.watcher import run_watch_mode
27
+
28
+ logging.basicConfig(filename=SETTINGS.log_file, level=logging.INFO)
29
+ LOGGER = logging.getLogger(__name__)
30
+
31
+
32
+ def _echo_json(data, as_json: bool) -> None:
33
+ if as_json:
34
+ click.echo(json.dumps(data, indent=2))
35
+ else:
36
+ click.echo(data)
37
+
38
+
39
+ def _is_running() -> bool:
40
+ if not os.path.exists(SETTINGS.pid_file):
41
+ return False
42
+ try:
43
+ with open(SETTINGS.pid_file, "r", encoding="utf-8") as f:
44
+ pid = int(f.read().strip())
45
+ return psutil.pid_exists(pid)
46
+ except Exception:
47
+ return False
48
+
49
+
50
+ def _current_repo_path() -> str:
51
+ return os.getcwd()
52
+
53
+
54
+ def _db_size_bytes(path: str) -> int:
55
+ if os.path.isfile(path):
56
+ return os.path.getsize(path)
57
+ if not os.path.isdir(path):
58
+ return 0
59
+ total = 0
60
+ for root, _, files in os.walk(path):
61
+ for name in files:
62
+ try:
63
+ total += os.path.getsize(os.path.join(root, name))
64
+ except OSError:
65
+ pass
66
+ return total
67
+
68
+
69
+ def _phase(label: str, value: str) -> None:
70
+ click.echo(f"{label:<30} {value}")
71
+
72
+
73
+ @click.group()
74
+ def main() -> None:
75
+ """CodeSpine CLI."""
76
+
77
+
78
+ @main.command()
79
+ @click.argument("path", type=click.Path(exists=True))
80
+ @click.option("--full/--incremental", default=True, show_default=True)
81
+ def analyse(path: str, full: bool) -> None:
82
+ """Index a local Java project."""
83
+ if _is_running():
84
+ click.secho("Stop MCP first ('codespine stop') to index.", fg="yellow")
85
+ return
86
+
87
+ started = time.perf_counter()
88
+ abs_path = os.path.abspath(path)
89
+ store = GraphStore(read_only=False)
90
+ indexer = JavaIndexer(store)
91
+
92
+ result = indexer.index_project(abs_path, full=full)
93
+ _phase("Walking files...", f"{result.files_found} files found")
94
+ _phase("Parsing code...", f"{result.files_indexed}/{result.files_found}")
95
+ _phase("Tracing calls...", f"{result.calls_resolved} calls resolved")
96
+ _phase("Analyzing types...", f"{result.type_relationships} type relationships")
97
+
98
+ communities = detect_communities(store)
99
+ _phase("Detecting communities...", f"{len(communities)} clusters found")
100
+
101
+ flows = trace_execution_flows(store)
102
+ _phase("Detecting execution flows...", f"{len(flows)} processes found")
103
+
104
+ dead = detect_dead_code(store, limit=500)
105
+ _phase("Finding dead code...", f"{len(dead)} unreachable symbols")
106
+
107
+ coupling_pairs = compute_coupling(
108
+ store,
109
+ abs_path,
110
+ result.project_id,
111
+ months=SETTINGS.default_coupling_months,
112
+ min_strength=SETTINGS.default_min_coupling_strength,
113
+ min_cochanges=SETTINGS.default_min_cochanges,
114
+ )
115
+ _phase("Analyzing git history...", f"{len(coupling_pairs)} coupled file pairs")
116
+
117
+ vector_count = store.query_records(
118
+ """
119
+ MATCH (s:Symbol)
120
+ WHERE s.embedding IS NOT NULL
121
+ RETURN count(s) as count
122
+ """
123
+ )
124
+ vectors_stored = int(vector_count[0]["count"]) if vector_count else result.embeddings_generated
125
+ _phase("Generating embeddings...", f"{vectors_stored} vectors stored")
126
+
127
+ symbol_count = store.query_records("MATCH (s:Symbol) RETURN count(s) as count")
128
+ edge_count = store.query_records("MATCH ()-[r]->() RETURN count(r) as count")
129
+ symbols = int(symbol_count[0]["count"]) if symbol_count else 0
130
+ edges = int(edge_count[0]["count"]) if edge_count else 0
131
+ elapsed = time.perf_counter() - started
132
+
133
+ click.echo()
134
+ click.secho(
135
+ f"Done in {elapsed:.1f}s - {symbols} symbols, {edges} edges, {len(communities)} clusters, {len(flows)} flows",
136
+ fg="green",
137
+ )
138
+
139
+
140
+ @main.command()
141
+ @click.argument("query")
142
+ @click.option("--k", default=20, show_default=True, type=int)
143
+ @click.option("--json", "as_json", is_flag=True)
144
+ def search(query: str, k: int, as_json: bool) -> None:
145
+ """Hybrid search (BM25 + vector + fuzzy + RRF)."""
146
+ store = GraphStore(read_only=True)
147
+ results = hybrid_search(store, query, k=k)
148
+ _echo_json(results, as_json)
149
+
150
+
151
+ @main.command()
152
+ @click.argument("query")
153
+ @click.option("--max-depth", default=3, show_default=True, type=int)
154
+ @click.option("--json", "as_json", is_flag=True)
155
+ def context(query: str, max_depth: int, as_json: bool) -> None:
156
+ """Get one-shot symbol context: search + impact + community + flows."""
157
+ store = GraphStore(read_only=True)
158
+ result = build_symbol_context(store, query, max_depth=max_depth)
159
+ _echo_json(result, as_json)
160
+
161
+
162
+ @main.command()
163
+ @click.argument("symbol")
164
+ @click.option("--max-depth", default=4, show_default=True, type=int)
165
+ @click.option("--json", "as_json", is_flag=True)
166
+ def impact(symbol: str, max_depth: int, as_json: bool) -> None:
167
+ """Impact analysis grouped by depth with confidence scores."""
168
+ store = GraphStore(read_only=True)
169
+ result = analyze_impact(store, symbol, max_depth=max_depth)
170
+ _echo_json(result, as_json)
171
+
172
+
173
+ @main.command()
174
+ @click.option("--limit", default=200, show_default=True, type=int)
175
+ @click.option("--json", "as_json", is_flag=True)
176
+ def deadcode(limit: int, as_json: bool) -> None:
177
+ """Detect dead code candidates with Java-aware exemptions."""
178
+ store = GraphStore(read_only=True)
179
+ result = detect_dead_code(store, limit=limit)
180
+ _echo_json(result, as_json)
181
+
182
+
183
+ @main.command()
184
+ @click.option("--entry", "entry_symbol", default=None)
185
+ @click.option("--max-depth", default=6, show_default=True, type=int)
186
+ @click.option("--json", "as_json", is_flag=True)
187
+ def flow(entry_symbol: str | None, max_depth: int, as_json: bool) -> None:
188
+ """Trace execution flows from detected entry points."""
189
+ store = GraphStore(read_only=True)
190
+ result = trace_execution_flows(store, entry_symbol=entry_symbol, max_depth=max_depth)
191
+ _echo_json(result, as_json)
192
+
193
+
194
+ @main.command()
195
+ @click.option("--symbol", default=None)
196
+ @click.option("--json", "as_json", is_flag=True)
197
+ def community(symbol: str | None, as_json: bool) -> None:
198
+ """Detect communities or lookup community for a symbol."""
199
+ store = GraphStore(read_only=False)
200
+ detect_communities(store)
201
+ if symbol:
202
+ _echo_json(symbol_community(store, symbol), as_json)
203
+ return
204
+ communities = store.query_records(
205
+ "MATCH (c:Community) RETURN c.id as id, c.label as label, c.cohesion as cohesion ORDER BY c.cohesion DESC LIMIT 200"
206
+ )
207
+ _echo_json(communities, as_json)
208
+
209
+
210
+ @main.command()
211
+ @click.option("--months", default=6, show_default=True, type=int)
212
+ @click.option("--min-strength", default=0.3, show_default=True, type=float)
213
+ @click.option("--min-cochanges", default=3, show_default=True, type=int)
214
+ @click.option("--json", "as_json", is_flag=True)
215
+ def coupling(months: int, min_strength: float, min_cochanges: int, as_json: bool) -> None:
216
+ """Compute and query git change coupling."""
217
+ store = GraphStore(read_only=False)
218
+ project = store.query_records("MATCH (p:Project) RETURN p.id as id LIMIT 1")
219
+ project_id = project[0]["id"] if project else os.path.basename(os.getcwd())
220
+ compute_coupling(store, os.getcwd(), project_id, months=months, min_strength=min_strength, min_cochanges=min_cochanges)
221
+ result = get_coupling(
222
+ store,
223
+ symbol=None,
224
+ months=months,
225
+ min_strength=min_strength,
226
+ min_cochanges=min_cochanges,
227
+ )
228
+ _echo_json(result, as_json)
229
+
230
+
231
+ @main.command()
232
+ @click.option("--path", default=".", show_default=True, type=click.Path(exists=True))
233
+ @click.option("--global-interval", default=30, show_default=True, type=int)
234
+ def watch(path: str, global_interval: int) -> None:
235
+ """Live re-indexing and periodic global analysis refresh."""
236
+ store = GraphStore(read_only=False)
237
+ run_watch_mode(store, os.path.abspath(path), global_interval=global_interval)
238
+
239
+
240
+ @main.command()
241
+ @click.argument("range_spec")
242
+ @click.option("--json", "as_json", is_flag=True)
243
+ def diff(range_spec: str, as_json: bool) -> None:
244
+ """Compare branches at symbol level: <base>..<head>."""
245
+ if ".." not in range_spec:
246
+ raise click.ClickException("Range must be in format <base>..<head>")
247
+ base_ref, head_ref = range_spec.split("..", 1)
248
+ result = compare_branches(os.getcwd(), base_ref, head_ref)
249
+ _echo_json(result, as_json)
250
+
251
+
252
+ @main.command()
253
+ def stats() -> None:
254
+ """Show project and graph statistics."""
255
+ store = GraphStore(read_only=True)
256
+ projects = store.query_records("MATCH (p:Project) RETURN p.id as project, p.path as path")
257
+ classes = store.query_records("MATCH (c:Class) RETURN count(c) as count")
258
+ methods = store.query_records("MATCH (m:Method) RETURN count(m) as count")
259
+ calls = store.query_records("MATCH (:Method)-[r:CALLS]->(:Method) RETURN count(r) as count")
260
+
261
+ click.echo("--- Projects ---")
262
+ click.echo(projects)
263
+ click.echo("--- Counts ---")
264
+ click.echo(
265
+ {
266
+ "classes": classes[0]["count"] if classes else 0,
267
+ "methods": methods[0]["count"] if methods else 0,
268
+ "calls": calls[0]["count"] if calls else 0,
269
+ }
270
+ )
271
+
272
+
273
+ @main.command("list")
274
+ @click.option("--json", "as_json", is_flag=True)
275
+ def list_projects(as_json: bool) -> None:
276
+ """List indexed projects."""
277
+ store = GraphStore(read_only=True)
278
+ projects = store.query_records("MATCH (p:Project) RETURN p.id as id, p.path as path, p.language as language ORDER BY p.id")
279
+ _echo_json(projects, as_json)
280
+
281
+
282
+ @main.command()
283
+ @click.option("--json", "as_json", is_flag=True)
284
+ def status(as_json: bool) -> None:
285
+ """Show service and database status."""
286
+ running = _is_running()
287
+ pid = None
288
+ if os.path.exists(SETTINGS.pid_file):
289
+ try:
290
+ with open(SETTINGS.pid_file, "r", encoding="utf-8") as f:
291
+ pid = int(f.read().strip())
292
+ except Exception:
293
+ pid = None
294
+ payload = {
295
+ "running": running,
296
+ "pid": pid,
297
+ "pid_file": SETTINGS.pid_file,
298
+ "db_path": SETTINGS.db_path,
299
+ "db_size_bytes": _db_size_bytes(SETTINGS.db_path),
300
+ "log_file": SETTINGS.log_file,
301
+ }
302
+ _echo_json(payload, as_json)
303
+
304
+
305
+ @main.command()
306
+ @click.argument("query")
307
+ @click.option("--json", "as_json", is_flag=True)
308
+ def cypher(query: str, as_json: bool) -> None:
309
+ """Run a raw Cypher query against the graph DB."""
310
+ store = GraphStore(read_only=True)
311
+ try:
312
+ result = store.query_records(query)
313
+ except Exception as exc:
314
+ raise click.ClickException(f"Cypher query failed: {exc}") from exc
315
+ _echo_json(result, as_json)
316
+
317
+
318
+ @main.command()
319
+ @click.option("--force", is_flag=True, help="Skip confirmation prompt.")
320
+ def clean(force: bool) -> None:
321
+ """Remove CodeSpine local state (DB/PID/log)."""
322
+ if not force and not click.confirm("Remove local CodeSpine DB, PID, and logs?"):
323
+ click.echo("Aborted.")
324
+ return
325
+ for path in [SETTINGS.pid_file, SETTINGS.log_file, SETTINGS.db_path]:
326
+ if not os.path.exists(path):
327
+ continue
328
+ if os.path.isdir(path):
329
+ import shutil
330
+
331
+ shutil.rmtree(path, ignore_errors=True)
332
+ else:
333
+ try:
334
+ os.remove(path)
335
+ except OSError:
336
+ pass
337
+ click.echo("Cleaned CodeSpine local state.")
338
+
339
+
340
+ @main.command()
341
+ def setup() -> None:
342
+ """Print local setup checks and next steps."""
343
+ checks = {
344
+ "click": False,
345
+ "kuzu": False,
346
+ "tree_sitter_java": False,
347
+ "fastmcp": False,
348
+ "watchfiles": False,
349
+ }
350
+ for mod in list(checks):
351
+ try:
352
+ __import__(mod)
353
+ checks[mod] = True
354
+ except Exception:
355
+ checks[mod] = False
356
+ click.echo("Dependency check:")
357
+ for mod, ok in checks.items():
358
+ click.echo(f" - {mod}: {'OK' if ok else 'MISSING'}")
359
+ click.echo("\\nRecommended:")
360
+ click.echo(" pip install -e .")
361
+ click.echo(" codespine analyse /path/to/java-project --full")
362
+ click.echo(" codespine search payment --json")
363
+
364
+
365
+ @main.command()
366
+ def start() -> None:
367
+ """Launch MCP background server."""
368
+ if _is_running():
369
+ click.secho("CodeSpine already active.", fg="yellow")
370
+ return
371
+
372
+ if os.path.exists(SETTINGS.pid_file):
373
+ os.remove(SETTINGS.pid_file)
374
+
375
+ proc = subprocess.Popen(
376
+ [sys.executable, "-m", "codespine.cli", "run-mcp"],
377
+ stdout=open(SETTINGS.log_file, "a", encoding="utf-8"),
378
+ stderr=subprocess.STDOUT,
379
+ )
380
+ with open(SETTINGS.pid_file, "w", encoding="utf-8") as f:
381
+ f.write(str(proc.pid))
382
+ click.secho("CodeSpine MCP active", fg="cyan")
383
+
384
+
385
+ @main.command()
386
+ def serve() -> None:
387
+ """Alias for start."""
388
+ start()
389
+
390
+
391
+ @main.command()
392
+ def mcp() -> None:
393
+ """Run MCP server in foreground (stdio)."""
394
+ run_mcp()
395
+
396
+
397
+ @main.command()
398
+ def stop() -> None:
399
+ """Stop MCP background server."""
400
+ if not os.path.exists(SETTINGS.pid_file):
401
+ click.echo("Nothing to stop.")
402
+ return
403
+ try:
404
+ with open(SETTINGS.pid_file, "r", encoding="utf-8") as f:
405
+ pid = int(f.read().strip())
406
+ os.kill(pid, signal.SIGTERM)
407
+ click.echo(f"Stopped {pid}")
408
+ except Exception:
409
+ click.echo("Stale PID removed")
410
+ finally:
411
+ if os.path.exists(SETTINGS.pid_file):
412
+ os.remove(SETTINGS.pid_file)
413
+
414
+
415
+ @main.command("run-mcp", hidden=True)
416
+ def run_mcp() -> None:
417
+ """Run MCP server in stdio mode."""
418
+ store = GraphStore(read_only=True)
419
+ mcp = build_mcp_server(store, repo_path_provider=_current_repo_path)
420
+ mcp.run()
421
+
422
+
423
+ if __name__ == "__main__":
424
+ main()
codespine/config.py ADDED
@@ -0,0 +1,22 @@
1
+ import os
2
+ from dataclasses import dataclass
3
+
4
+
5
+ @dataclass(frozen=True)
6
+ class Settings:
7
+ db_path: str = os.path.expanduser("~/.codespine_db")
8
+ pid_file: str = os.path.expanduser("~/.codespine.pid")
9
+ log_file: str = os.path.expanduser("~/.codespine.log")
10
+ embedding_cache_db: str = os.path.expanduser("~/.codespine_embedding_cache.sqlite3")
11
+ embedding_model: str = "BAAI/bge-small-en-v1.5"
12
+ vector_dim: int = 384
13
+ rrf_k: int = 60
14
+ semantic_candidate_pool: int = 2000
15
+ write_batch_size: int = 500
16
+ default_coupling_months: int = 6
17
+ default_min_coupling_strength: float = 0.3
18
+ default_min_cochanges: int = 3
19
+ default_global_interval_s: int = 30
20
+
21
+
22
+ SETTINGS = Settings()
@@ -0,0 +1 @@
1
+ """Database layer."""
codespine/db/schema.py ADDED
@@ -0,0 +1,82 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Iterable
5
+
6
+ LOGGER = logging.getLogger(__name__)
7
+
8
+
9
+ NODE_TABLES: list[tuple[str, str]] = [
10
+ ("SchemaMeta", "CREATE NODE TABLE SchemaMeta(key STRING, value STRING, PRIMARY KEY (key))"),
11
+ (
12
+ "Project",
13
+ "CREATE NODE TABLE Project(id STRING, path STRING, language STRING, PRIMARY KEY (id))",
14
+ ),
15
+ (
16
+ "File",
17
+ "CREATE NODE TABLE File(id STRING, path STRING, project_id STRING, is_test BOOL, hash STRING, PRIMARY KEY (id))",
18
+ ),
19
+ (
20
+ "Class",
21
+ "CREATE NODE TABLE Class(id STRING, fqcn STRING, name STRING, package STRING, file_id STRING, PRIMARY KEY (id))",
22
+ ),
23
+ (
24
+ "Method",
25
+ "CREATE NODE TABLE Method(id STRING, class_id STRING, name STRING, signature STRING, return_type STRING, modifiers STRING[], is_constructor BOOL, is_test BOOL, PRIMARY KEY (id))",
26
+ ),
27
+ (
28
+ "Symbol",
29
+ "CREATE NODE TABLE Symbol(id STRING, kind STRING, name STRING, fqname STRING, file_id STRING, line INT64, col INT64, embedding FLOAT[384], PRIMARY KEY (id))",
30
+ ),
31
+ (
32
+ "Community",
33
+ "CREATE NODE TABLE Community(id STRING, label STRING, cohesion DOUBLE, PRIMARY KEY (id))",
34
+ ),
35
+ (
36
+ "Flow",
37
+ "CREATE NODE TABLE Flow(id STRING, entry_symbol_id STRING, kind STRING, PRIMARY KEY (id))",
38
+ ),
39
+ ]
40
+
41
+ REL_TABLES: Iterable[tuple[str, str]] = [
42
+ ("DECLARES", "CREATE REL TABLE DECLARES(FROM File TO Symbol)"),
43
+ ("HAS_METHOD", "CREATE REL TABLE HAS_METHOD(FROM Class TO Method)"),
44
+ ("CALLS", "CREATE REL TABLE CALLS(FROM Method TO Method, confidence DOUBLE, reason STRING)"),
45
+ ("REFERENCES_TYPE", "CREATE REL TABLE REFERENCES_TYPE(FROM Symbol TO Symbol, confidence DOUBLE)"),
46
+ ("IMPLEMENTS", "CREATE REL TABLE IMPLEMENTS(FROM Class TO Class, confidence DOUBLE)"),
47
+ ("OVERRIDES", "CREATE REL TABLE OVERRIDES(FROM Method TO Method, confidence DOUBLE)"),
48
+ ("IN_COMMUNITY", "CREATE REL TABLE IN_COMMUNITY(FROM Symbol TO Community)"),
49
+ ("IN_FLOW", "CREATE REL TABLE IN_FLOW(FROM Symbol TO Flow, depth INT64)"),
50
+ (
51
+ "CO_CHANGED_WITH",
52
+ "CREATE REL TABLE CO_CHANGED_WITH(FROM File TO File, strength DOUBLE, cochanges INT64, months INT64)",
53
+ ),
54
+ ]
55
+
56
+
57
+ def _safe_execute(conn, query: str, params: dict | None = None) -> None:
58
+ try:
59
+ conn.execute(query, params or {})
60
+ except Exception as exc: # pragma: no cover - kuzu error surface varies by version
61
+ LOGGER.debug("Ignoring schema query failure: %s (%s)", query, exc)
62
+
63
+
64
+ def ensure_schema(conn) -> None:
65
+ for _, query in NODE_TABLES:
66
+ _safe_execute(conn, query)
67
+
68
+ for _, query in REL_TABLES:
69
+ _safe_execute(conn, query)
70
+
71
+ # Best-effort FTS/index hints. Kuzu versions differ, so keep optional.
72
+ _safe_execute(
73
+ conn,
74
+ "CALL CREATE_FTS_INDEX('symbol_fts', 'Symbol', ['name', 'fqname'])",
75
+ )
76
+ _safe_execute(conn, "CALL CREATE_FTS_INDEX('method_fts', 'Method', ['name', 'signature'])")
77
+ _safe_execute(conn, "CALL CREATE_FTS_INDEX('class_fts', 'Class', ['name', 'fqcn'])")
78
+
79
+ _safe_execute(
80
+ conn,
81
+ "MERGE (s:SchemaMeta {key: 'schema_version'}) SET s.value = '2'",
82
+ )