sampler-cli 0.4.1__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/PKG-INFO +10 -3
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/README.md +9 -2
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/pyproject.toml +1 -1
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/__init__.py +1 -1
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/cli/main.py +69 -16
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/db.py +51 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/indexer/builder.py +30 -1
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/indexer/embedder.py +13 -1
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/indexer/store.py +25 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/query/engine.py +28 -1
- sampler_cli-0.4.2/src/sampler/viz/__init__.py +6 -0
- sampler_cli-0.4.2/src/sampler/viz/bus.py +29 -0
- sampler_cli-0.4.2/src/sampler/viz/canvas.py +372 -0
- sampler_cli-0.4.2/src/sampler/viz/discover_emit.py +30 -0
- sampler_cli-0.4.2/src/sampler/viz/engine.py +284 -0
- sampler_cli-0.4.2/src/sampler/viz/events.py +132 -0
- sampler_cli-0.4.2/src/sampler/viz/headline.py +14 -0
- sampler_cli-0.4.2/src/sampler/viz/layout_algo.py +142 -0
- sampler_cli-0.4.2/src/sampler/viz/live.py +60 -0
- sampler_cli-0.4.2/src/sampler/viz/pipeline.py +162 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler_cli.egg-info/PKG-INFO +10 -3
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler_cli.egg-info/SOURCES.txt +17 -1
- sampler_cli-0.4.2/tests/test_canvas_graph.py +74 -0
- sampler_cli-0.4.2/tests/test_events.py +18 -0
- sampler_cli-0.4.2/tests/test_headline.py +10 -0
- sampler_cli-0.4.2/tests/test_stale_code.py +180 -0
- sampler_cli-0.4.2/tests/test_viz_engine.py +74 -0
- sampler_cli-0.4.2/tests/test_viz_layout.py +41 -0
- sampler_cli-0.4.2/tests/test_viz_pipeline.py +37 -0
- sampler_cli-0.4.1/tests/test_stale_code.py +0 -75
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/LICENSE +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/setup.cfg +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/__main__.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/cli/__init__.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/cli/render.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/config.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/embeddings.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/indexer/__init__.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/indexer/discover.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/indexer/imports.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/indexer/parsers/__init__.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/indexer/parsers/base.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/indexer/parsers/go.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/indexer/parsers/python.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/indexer/parsers/typescript.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/mcp/__init__.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/mcp/server.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/models.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/query/__init__.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler/query/semantic.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler_cli.egg-info/dependency_links.txt +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler_cli.egg-info/entry_points.txt +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler_cli.egg-info/requires.txt +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/src/sampler_cli.egg-info/top_level.txt +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/tests/test_cli.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/tests/test_config.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/tests/test_db.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/tests/test_discover.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/tests/test_embeddings.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/tests/test_go_parser.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/tests/test_imports.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/tests/test_index_query.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/tests/test_python_parser.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/tests/test_relationships.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/tests/test_render_bars.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/tests/test_semantic.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/tests/test_smoke.py +0 -0
- {sampler_cli-0.4.1 → sampler_cli-0.4.2}/tests/test_typescript_parser.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sampler-cli
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.2
|
|
4
4
|
Summary: Token-efficient CLI for indexing and searching code symbols (Python-first, designed for minimal LLM/agent context size)
|
|
5
5
|
Author: Samuel Ignacio Carmona Rodriguez
|
|
6
6
|
License: MIT
|
|
@@ -52,7 +52,7 @@ Dynamic: license-file
|
|
|
52
52
|
|
|
53
53
|
Token-efficient CLI for indexing and searching code symbols across multiple projects.
|
|
54
54
|
|
|
55
|
-
Current version: 0.4.
|
|
55
|
+
Current version: 0.4.2
|
|
56
56
|
|
|
57
57
|
Designed for humans and agents: compact default output, short paths, and low-noise symbol views.
|
|
58
58
|
|
|
@@ -92,7 +92,7 @@ sampler overview src/main.py
|
|
|
92
92
|
## Command Overview
|
|
93
93
|
|
|
94
94
|
Core:
|
|
95
|
-
- `sampler version`
|
|
95
|
+
- `sampler version [--plain]`
|
|
96
96
|
- `sampler init`
|
|
97
97
|
- `sampler index <project>`
|
|
98
98
|
- `sampler search <query> [--project <name>] [--type <t>] [--limit <n>] [--semantic] [--style plain|bars]`
|
|
@@ -168,6 +168,13 @@ Offline / air-gapped: `provider: hash` (or just don't install the embeddings ext
|
|
|
168
168
|
|
|
169
169
|
- function is called from test files
|
|
170
170
|
- function has zero non-test callers in project call graph
|
|
171
|
+
- symbol is defined in production code (symbols defined in test files are excluded)
|
|
172
|
+
|
|
173
|
+
Test file detection supports common multi-language patterns:
|
|
174
|
+
|
|
175
|
+
- Python: `tests/`, `test_*.py`, `*_test.py`
|
|
176
|
+
- Go: `*_test.go`
|
|
177
|
+
- TypeScript/JavaScript: `__tests__/`, `test/`, `spec/`, `*.test.*`, `*.spec.*`
|
|
171
178
|
|
|
172
179
|
This is heuristic signal, not guaranteed dead-code proof.
|
|
173
180
|
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
Token-efficient CLI for indexing and searching code symbols across multiple projects.
|
|
4
4
|
|
|
5
|
-
Current version: 0.4.
|
|
5
|
+
Current version: 0.4.2
|
|
6
6
|
|
|
7
7
|
Designed for humans and agents: compact default output, short paths, and low-noise symbol views.
|
|
8
8
|
|
|
@@ -42,7 +42,7 @@ sampler overview src/main.py
|
|
|
42
42
|
## Command Overview
|
|
43
43
|
|
|
44
44
|
Core:
|
|
45
|
-
- `sampler version`
|
|
45
|
+
- `sampler version [--plain]`
|
|
46
46
|
- `sampler init`
|
|
47
47
|
- `sampler index <project>`
|
|
48
48
|
- `sampler search <query> [--project <name>] [--type <t>] [--limit <n>] [--semantic] [--style plain|bars]`
|
|
@@ -118,6 +118,13 @@ Offline / air-gapped: `provider: hash` (or just don't install the embeddings ext
|
|
|
118
118
|
|
|
119
119
|
- function is called from test files
|
|
120
120
|
- function has zero non-test callers in project call graph
|
|
121
|
+
- symbol is defined in production code (symbols defined in test files are excluded)
|
|
122
|
+
|
|
123
|
+
Test file detection supports common multi-language patterns:
|
|
124
|
+
|
|
125
|
+
- Python: `tests/`, `test_*.py`, `*_test.py`
|
|
126
|
+
- Go: `*_test.go`
|
|
127
|
+
- TypeScript/JavaScript: `__tests__/`, `test/`, `spec/`, `*.test.*`, `*.spec.*`
|
|
121
128
|
|
|
122
129
|
This is heuristic signal, not guaranteed dead-code proof.
|
|
123
130
|
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import sys
|
|
1
2
|
from pathlib import Path
|
|
2
3
|
|
|
3
4
|
import typer
|
|
@@ -6,7 +7,6 @@ from rich.console import Console
|
|
|
6
7
|
from sampler import __version__
|
|
7
8
|
from sampler.config import ConfigManager
|
|
8
9
|
from sampler.db import Database
|
|
9
|
-
from sampler.indexer.builder import IndexBuilder
|
|
10
10
|
from sampler.query.engine import QueryEngine
|
|
11
11
|
|
|
12
12
|
# Embeddings provider support (lazy, optional)
|
|
@@ -89,9 +89,17 @@ def _format_line_range(start_line: int | None, end_line: int | None) -> str:
|
|
|
89
89
|
|
|
90
90
|
|
|
91
91
|
@app.command("version")
|
|
92
|
-
def version(
|
|
92
|
+
def version(
|
|
93
|
+
plain: bool = typer.Option(False, "--plain", help="Plain text output (version number only)"),
|
|
94
|
+
) -> None:
|
|
93
95
|
"""Show installed sampler version."""
|
|
94
|
-
|
|
96
|
+
if plain or not sys.stdout.isatty():
|
|
97
|
+
console.print(f"sampler {__version__}")
|
|
98
|
+
return
|
|
99
|
+
|
|
100
|
+
from sampler.viz.headline import print_version_card
|
|
101
|
+
|
|
102
|
+
print_version_card(console, __version__)
|
|
95
103
|
|
|
96
104
|
|
|
97
105
|
@app.command("init")
|
|
@@ -381,9 +389,25 @@ def symbols(
|
|
|
381
389
|
console.print(line)
|
|
382
390
|
|
|
383
391
|
|
|
392
|
+
def _build_embedder():
|
|
393
|
+
from sampler.indexer.embedder import Embedder
|
|
394
|
+
|
|
395
|
+
if get_embedding_provider is not None:
|
|
396
|
+
try:
|
|
397
|
+
return Embedder(provider=get_embedding_provider())
|
|
398
|
+
except Exception:
|
|
399
|
+
return Embedder()
|
|
400
|
+
return Embedder()
|
|
401
|
+
|
|
402
|
+
|
|
384
403
|
@app.command("index")
|
|
385
|
-
def index(
|
|
386
|
-
|
|
404
|
+
def index(
|
|
405
|
+
project: str,
|
|
406
|
+
plain: bool = typer.Option(False, "--plain", help="Compact output without Live visualization (for CI/scripts)"),
|
|
407
|
+
batch_size: int = typer.Option(32, "--batch-size", help="Batch size for embedding generation"),
|
|
408
|
+
force: bool = typer.Option(False, "--force", help="Re-index all files regardless of hash"),
|
|
409
|
+
) -> None:
|
|
410
|
+
"""Index project and generate embeddings (Live visualization when attached to a TTY)."""
|
|
387
411
|
config = ConfigManager()
|
|
388
412
|
project_cfg = config.get_project(project)
|
|
389
413
|
if project_cfg is None:
|
|
@@ -394,17 +418,46 @@ def index(project: str) -> None:
|
|
|
394
418
|
"Use 'sampler project list' to see registered projects."
|
|
395
419
|
)
|
|
396
420
|
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
421
|
+
embedder = _build_embedder()
|
|
422
|
+
use_plain = plain or not sys.stdout.isatty()
|
|
423
|
+
|
|
424
|
+
try:
|
|
425
|
+
embedder.provider.embed("sampler health probe", for_query=True)
|
|
426
|
+
except RuntimeError as exc:
|
|
427
|
+
from rich.panel import Panel
|
|
428
|
+
|
|
429
|
+
console.print(Panel.fit(str(exc), title="Error", border_style="red"))
|
|
430
|
+
raise typer.Exit(code=1)
|
|
431
|
+
|
|
432
|
+
from sampler.viz.pipeline import run_index_pipeline
|
|
433
|
+
|
|
434
|
+
try:
|
|
435
|
+
stats = run_index_pipeline(
|
|
436
|
+
db=_database(),
|
|
437
|
+
project_cfg=project_cfg,
|
|
438
|
+
embedder=embedder,
|
|
439
|
+
force=force,
|
|
440
|
+
batch_size=batch_size,
|
|
441
|
+
plain=use_plain,
|
|
442
|
+
console=console,
|
|
443
|
+
)
|
|
444
|
+
except RuntimeError as exc:
|
|
445
|
+
from rich.panel import Panel
|
|
446
|
+
|
|
447
|
+
console.print(Panel.fit(str(exc), title="Error", border_style="red"))
|
|
448
|
+
raise typer.Exit(code=1)
|
|
449
|
+
|
|
450
|
+
if use_plain:
|
|
451
|
+
console.print(
|
|
452
|
+
f"[green]✓[/green] Indexed [bold]{stats['project']}[/bold]: "
|
|
453
|
+
f"discovered={stats['discovered']} indexed={stats['indexed']} "
|
|
454
|
+
f"skipped={stats['skipped']} failed={stats['failed']}"
|
|
455
|
+
)
|
|
456
|
+
prov_name = getattr(embedder.provider, "name", "hash")
|
|
457
|
+
console.print(
|
|
458
|
+
f"[green]✓[/green] Embedded [bold]{stats['embed_count']}[/bold] symbols "
|
|
459
|
+
f"using [bold]{prov_name}[/bold] ({stats['model']}) in [bold]{stats['elapsed']:.1f}s[/bold]"
|
|
460
|
+
)
|
|
408
461
|
|
|
409
462
|
|
|
410
463
|
@app.command("embed")
|
|
@@ -611,6 +611,57 @@ class Database:
|
|
|
611
611
|
with self.connect() as conn:
|
|
612
612
|
return conn.execute(sql, (project_name,)).fetchall()
|
|
613
613
|
|
|
614
|
+
def get_project_index_stats(self, project_name: str) -> dict[str, int]:
|
|
615
|
+
"""Aggregate counts for index pipeline / status displays."""
|
|
616
|
+
sql = """
|
|
617
|
+
SELECT
|
|
618
|
+
(SELECT COUNT(*) FROM files f
|
|
619
|
+
JOIN projects p ON f.project_id = p.id WHERE p.name = ?) AS files,
|
|
620
|
+
(SELECT COUNT(*) FROM symbols s
|
|
621
|
+
JOIN files f ON s.file_id = f.id
|
|
622
|
+
JOIN projects p ON f.project_id = p.id WHERE p.name = ?) AS symbols,
|
|
623
|
+
(SELECT COUNT(*) FROM relationships r
|
|
624
|
+
JOIN symbols src ON r.source_id = src.id
|
|
625
|
+
JOIN files f ON src.file_id = f.id
|
|
626
|
+
JOIN projects p ON f.project_id = p.id WHERE p.name = ?) AS relationships,
|
|
627
|
+
(SELECT COUNT(*) FROM embeddings e
|
|
628
|
+
JOIN symbols s ON e.symbol_id = s.id
|
|
629
|
+
JOIN files f ON s.file_id = f.id
|
|
630
|
+
JOIN projects p ON f.project_id = p.id WHERE p.name = ?) AS embeddings
|
|
631
|
+
"""
|
|
632
|
+
with self.connect() as conn:
|
|
633
|
+
row = conn.execute(sql, (project_name, project_name, project_name, project_name)).fetchone()
|
|
634
|
+
if row is None:
|
|
635
|
+
return {"files": 0, "symbols": 0, "relationships": 0, "embeddings": 0}
|
|
636
|
+
return {
|
|
637
|
+
"files": int(row["files"]),
|
|
638
|
+
"symbols": int(row["symbols"]),
|
|
639
|
+
"relationships": int(row["relationships"]),
|
|
640
|
+
"embeddings": int(row["embeddings"]),
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
def get_top_symbols_by_degree(self, project_name: str, limit: int = 80) -> list[sqlite3.Row]:
|
|
644
|
+
"""Top symbols by in+out relationship degree for graph preview."""
|
|
645
|
+
sql = """
|
|
646
|
+
SELECT
|
|
647
|
+
s.id,
|
|
648
|
+
s.name,
|
|
649
|
+
s.qualified_name,
|
|
650
|
+
s.type,
|
|
651
|
+
(
|
|
652
|
+
SELECT COUNT(*) FROM relationships r
|
|
653
|
+
WHERE r.source_id = s.id OR r.target_id = s.id
|
|
654
|
+
) AS degree
|
|
655
|
+
FROM symbols s
|
|
656
|
+
JOIN files f ON s.file_id = f.id
|
|
657
|
+
JOIN projects p ON f.project_id = p.id
|
|
658
|
+
WHERE p.name = ?
|
|
659
|
+
ORDER BY degree DESC, s.qualified_name, s.name
|
|
660
|
+
LIMIT ?
|
|
661
|
+
"""
|
|
662
|
+
with self.connect() as conn:
|
|
663
|
+
return conn.execute(sql, (project_name, limit)).fetchall()
|
|
664
|
+
|
|
614
665
|
def get_project_call_edges(self, project_name: str) -> list[sqlite3.Row]:
|
|
615
666
|
"""Return CALLS edges within project, with caller/target symbol + file context.
|
|
616
667
|
|
|
@@ -4,6 +4,8 @@ import hashlib
|
|
|
4
4
|
import re
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
7
9
|
from sampler.db import Database
|
|
8
10
|
from sampler.indexer.discover import discover_files, discover_files_multi
|
|
9
11
|
from sampler.indexer.imports import extract_imports
|
|
@@ -11,6 +13,11 @@ from sampler.indexer.parsers.go import GoParser
|
|
|
11
13
|
from sampler.indexer.parsers.python import PythonParser
|
|
12
14
|
from sampler.indexer.parsers.typescript import TypeScriptParser
|
|
13
15
|
from sampler.indexer.store import SymbolStore
|
|
16
|
+
from sampler.viz.discover_emit import emit_discover
|
|
17
|
+
from sampler.viz.events import FileParsing, LogLine, Stage, StageChanged
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from sampler.viz.bus import EventBus, NullEventBus
|
|
14
21
|
|
|
15
22
|
|
|
16
23
|
class IndexBuilder:
|
|
@@ -24,7 +31,14 @@ class IndexBuilder:
|
|
|
24
31
|
"javascript": TypeScriptParser(),
|
|
25
32
|
}
|
|
26
33
|
|
|
27
|
-
def index_project(
|
|
34
|
+
def index_project(
|
|
35
|
+
self,
|
|
36
|
+
project_name: str,
|
|
37
|
+
project_path: str,
|
|
38
|
+
language: str,
|
|
39
|
+
force: bool = False,
|
|
40
|
+
event_bus: EventBus | NullEventBus | None = None,
|
|
41
|
+
) -> dict:
|
|
28
42
|
is_auto = language.lower() == "auto"
|
|
29
43
|
if not is_auto and language not in self.parsers:
|
|
30
44
|
raise ValueError(f"Unsupported language: {language}")
|
|
@@ -38,10 +52,17 @@ class IndexBuilder:
|
|
|
38
52
|
else:
|
|
39
53
|
file_entries = [(f, language) for f in discover_files(project_path=project_abs_path, language=language)]
|
|
40
54
|
|
|
55
|
+
bus = event_bus
|
|
56
|
+
if bus is not None:
|
|
57
|
+
emit_discover(bus, project_abs_path, file_entries)
|
|
58
|
+
bus.emit(StageChanged(Stage.PARSING))
|
|
59
|
+
|
|
41
60
|
indexed = 0
|
|
42
61
|
skipped = 0
|
|
43
62
|
failed = 0
|
|
44
63
|
all_imports: set[str] = set()
|
|
64
|
+
total = len(file_entries)
|
|
65
|
+
parse_idx = 0
|
|
45
66
|
|
|
46
67
|
for filepath, file_language in file_entries:
|
|
47
68
|
parser = self.parsers.get(file_language)
|
|
@@ -63,7 +84,13 @@ class IndexBuilder:
|
|
|
63
84
|
skipped += 1
|
|
64
85
|
continue
|
|
65
86
|
|
|
87
|
+
if bus is not None:
|
|
88
|
+
bus.emit(FileParsing(path=filepath, index=parse_idx, total=total))
|
|
89
|
+
bus.emit(LogLine(message=f"+ parser {Path(filepath).name}"))
|
|
90
|
+
|
|
66
91
|
symbols, relationships = parser.parse(content=content, filepath=filepath)
|
|
92
|
+
if bus is not None:
|
|
93
|
+
bus.emit(StageChanged(Stage.RELATIONSHIPS))
|
|
67
94
|
self.store.save_symbols(
|
|
68
95
|
project_id=project_id,
|
|
69
96
|
filepath=filepath,
|
|
@@ -71,8 +98,10 @@ class IndexBuilder:
|
|
|
71
98
|
file_hash=file_hash,
|
|
72
99
|
symbols=symbols,
|
|
73
100
|
relationships=relationships,
|
|
101
|
+
event_bus=bus,
|
|
74
102
|
)
|
|
75
103
|
indexed += 1
|
|
104
|
+
parse_idx += 1
|
|
76
105
|
|
|
77
106
|
self.db.update_project_file_count(project_id)
|
|
78
107
|
self._resolve_project_dependencies(project_id, project_name, all_imports)
|
|
@@ -2,9 +2,13 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import hashlib
|
|
4
4
|
import re
|
|
5
|
-
from typing import Callable
|
|
5
|
+
from typing import TYPE_CHECKING, Callable
|
|
6
6
|
|
|
7
7
|
from sampler.db import Database
|
|
8
|
+
from sampler.viz.events import EmbeddingGenerated
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from sampler.viz.bus import EventBus, NullEventBus
|
|
8
12
|
|
|
9
13
|
# --- Public constants kept for backward compat ---
|
|
10
14
|
DEFAULT_BATCH_SIZE = 32
|
|
@@ -200,6 +204,7 @@ class Embedder:
|
|
|
200
204
|
project_name: str,
|
|
201
205
|
batch_size: int = DEFAULT_BATCH_SIZE,
|
|
202
206
|
on_progress: Callable[[int, int], None] | None = None,
|
|
207
|
+
event_bus: EventBus | NullEventBus | None = None,
|
|
203
208
|
) -> int:
|
|
204
209
|
"""Generate and store embeddings for every symbol in a project using the active provider.
|
|
205
210
|
|
|
@@ -220,6 +225,7 @@ class Embedder:
|
|
|
220
225
|
# Use provider batch (supports for_query=False for document storage)
|
|
221
226
|
vectors = provider.embed_batch(texts, for_query=False)
|
|
222
227
|
|
|
228
|
+
done_before = start
|
|
223
229
|
for row, vec in zip(batch, vectors):
|
|
224
230
|
try:
|
|
225
231
|
import numpy as np
|
|
@@ -234,6 +240,12 @@ class Embedder:
|
|
|
234
240
|
dim=dim,
|
|
235
241
|
vector=vec_bytes,
|
|
236
242
|
)
|
|
243
|
+
done_before += 1
|
|
244
|
+
if event_bus is not None:
|
|
245
|
+
name = row["qualified_name"] or row["name"]
|
|
246
|
+
event_bus.emit(
|
|
247
|
+
EmbeddingGenerated(name=name, index=done_before, total=total)
|
|
248
|
+
)
|
|
237
249
|
|
|
238
250
|
if on_progress is not None:
|
|
239
251
|
on_progress(min(start + batch_size, total), total)
|
|
@@ -1,4 +1,12 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
1
5
|
from sampler.db import Database
|
|
6
|
+
from sampler.viz.events import RelationshipCreated, SymbolExtracted
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from sampler.viz.bus import EventBus, NullEventBus
|
|
2
10
|
|
|
3
11
|
|
|
4
12
|
class SymbolStore:
|
|
@@ -87,6 +95,7 @@ class SymbolStore:
|
|
|
87
95
|
file_hash: str,
|
|
88
96
|
symbols: list[dict],
|
|
89
97
|
relationships: list[dict],
|
|
98
|
+
event_bus: EventBus | NullEventBus | None = None,
|
|
90
99
|
) -> None:
|
|
91
100
|
file_id = self.db.upsert_file(project_id=project_id, path=filepath, language=language, file_hash=file_hash)
|
|
92
101
|
self.db.clear_file_data(file_id)
|
|
@@ -103,6 +112,14 @@ class SymbolStore:
|
|
|
103
112
|
local_by_name.setdefault(name, []).append(inserted_id)
|
|
104
113
|
if name not in symbol_id_map:
|
|
105
114
|
symbol_id_map[name] = inserted_id
|
|
115
|
+
if event_bus is not None:
|
|
116
|
+
event_bus.emit(
|
|
117
|
+
SymbolExtracted(
|
|
118
|
+
name=qualified or name or "symbol",
|
|
119
|
+
symbol_type=symbol.get("type") or "symbol",
|
|
120
|
+
file_path=filepath,
|
|
121
|
+
)
|
|
122
|
+
)
|
|
106
123
|
|
|
107
124
|
for relation in relationships:
|
|
108
125
|
source_key = relation.get("source")
|
|
@@ -131,3 +148,11 @@ class SymbolStore:
|
|
|
131
148
|
continue
|
|
132
149
|
|
|
133
150
|
self.db.insert_relationship(source_id=source_id, target_id=target_id, relation=relation)
|
|
151
|
+
if event_bus is not None:
|
|
152
|
+
event_bus.emit(
|
|
153
|
+
RelationshipCreated(
|
|
154
|
+
source=source_key,
|
|
155
|
+
target=target_key,
|
|
156
|
+
relation_type=relation.get("type") or "CALLS",
|
|
157
|
+
)
|
|
158
|
+
)
|
|
@@ -119,7 +119,31 @@ class QueryEngine:
|
|
|
119
119
|
def _is_test_path(path: str) -> bool:
|
|
120
120
|
p = path.replace("\\", "/").lower()
|
|
121
121
|
name = Path(path).name.lower()
|
|
122
|
-
|
|
122
|
+
if any(seg in p for seg in ("/tests/", "/test/", "/__tests__/", "/spec/")):
|
|
123
|
+
return True
|
|
124
|
+
|
|
125
|
+
if name.startswith("test_"):
|
|
126
|
+
return True
|
|
127
|
+
|
|
128
|
+
if name.endswith(("_test.py", "_test.go")):
|
|
129
|
+
return True
|
|
130
|
+
|
|
131
|
+
return name.endswith(
|
|
132
|
+
(
|
|
133
|
+
".test.ts",
|
|
134
|
+
".test.tsx",
|
|
135
|
+
".test.js",
|
|
136
|
+
".test.jsx",
|
|
137
|
+
".test.mjs",
|
|
138
|
+
".test.cjs",
|
|
139
|
+
".spec.ts",
|
|
140
|
+
".spec.tsx",
|
|
141
|
+
".spec.js",
|
|
142
|
+
".spec.jsx",
|
|
143
|
+
".spec.mjs",
|
|
144
|
+
".spec.cjs",
|
|
145
|
+
)
|
|
146
|
+
)
|
|
123
147
|
|
|
124
148
|
def stale_code_candidates(self, project_name: str) -> list[dict]:
|
|
125
149
|
"""Detect code likely stale: function/method called by tests but not by non-test code."""
|
|
@@ -156,6 +180,9 @@ class QueryEngine:
|
|
|
156
180
|
|
|
157
181
|
stale: list[dict] = []
|
|
158
182
|
for entry in by_target.values():
|
|
183
|
+
# Test-file symbols (helpers, fixtures) are expected to be test-only callers.
|
|
184
|
+
if self._is_test_path(entry["file_path"]):
|
|
185
|
+
continue
|
|
159
186
|
if entry["test_callers"] and not entry["non_test_callers"]:
|
|
160
187
|
stale.append(
|
|
161
188
|
{
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
|
|
5
|
+
from sampler.viz.events import IndexEvent
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class EventBus:
|
|
9
|
+
"""Synchronous pub/sub for index pipeline events."""
|
|
10
|
+
|
|
11
|
+
def __init__(self) -> None:
|
|
12
|
+
self._handlers: list[Callable[[IndexEvent], None]] = []
|
|
13
|
+
|
|
14
|
+
def subscribe(self, handler: Callable[[IndexEvent], None]) -> None:
|
|
15
|
+
self._handlers.append(handler)
|
|
16
|
+
|
|
17
|
+
def emit(self, event: IndexEvent) -> None:
|
|
18
|
+
for handler in self._handlers:
|
|
19
|
+
handler(event)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class NullEventBus:
|
|
23
|
+
"""No-op bus for plain/headless indexing."""
|
|
24
|
+
|
|
25
|
+
def subscribe(self, handler: Callable[[IndexEvent], None]) -> None:
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
def emit(self, event: IndexEvent) -> None:
|
|
29
|
+
return None
|