sampler-cli 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sampler/__init__.py +3 -0
- sampler/__main__.py +5 -0
- sampler/cli/__init__.py +0 -0
- sampler/cli/main.py +187 -0
- sampler/config.py +77 -0
- sampler/db.py +316 -0
- sampler/indexer/__init__.py +0 -0
- sampler/indexer/builder.py +70 -0
- sampler/indexer/discover.py +53 -0
- sampler/indexer/parsers/__init__.py +0 -0
- sampler/indexer/parsers/base.py +9 -0
- sampler/indexer/parsers/go.py +9 -0
- sampler/indexer/parsers/python.py +139 -0
- sampler/indexer/parsers/typescript.py +9 -0
- sampler/indexer/store.py +47 -0
- sampler/mcp/__init__.py +0 -0
- sampler/mcp/server.py +2 -0
- sampler/models.py +35 -0
- sampler/query/__init__.py +0 -0
- sampler/query/engine.py +16 -0
- sampler/query/semantic.py +4 -0
- sampler_cli-0.2.0.dist-info/METADATA +130 -0
- sampler_cli-0.2.0.dist-info/RECORD +27 -0
- sampler_cli-0.2.0.dist-info/WHEEL +5 -0
- sampler_cli-0.2.0.dist-info/entry_points.txt +2 -0
- sampler_cli-0.2.0.dist-info/licenses/LICENSE +21 -0
- sampler_cli-0.2.0.dist-info/top_level.txt +1 -0
sampler/__init__.py
ADDED
sampler/__main__.py
ADDED
sampler/cli/__init__.py
ADDED
|
File without changes
|
sampler/cli/main.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
|
|
6
|
+
from sampler import __version__
|
|
7
|
+
from sampler.config import ConfigManager
|
|
8
|
+
from sampler.db import Database
|
|
9
|
+
from sampler.indexer.builder import IndexBuilder
|
|
10
|
+
from sampler.query.engine import QueryEngine
|
|
11
|
+
|
|
12
|
+
app = typer.Typer(help="Sampler CLI")
|
|
13
|
+
project_app = typer.Typer(help="Project management commands")
|
|
14
|
+
app.add_typer(project_app, name="project")
|
|
15
|
+
console = Console()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _database() -> Database:
|
|
19
|
+
cfg = ConfigManager().load()
|
|
20
|
+
db_path = Path(cfg.cache_dir).expanduser() / "graph.db"
|
|
21
|
+
db = Database(db_path=db_path)
|
|
22
|
+
db.init_schema()
|
|
23
|
+
return db
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _get_project_roots() -> dict[str, Path]:
|
|
27
|
+
"""Map project name -> absolute root path for relative path computation."""
|
|
28
|
+
config = ConfigManager()
|
|
29
|
+
roots: dict[str, Path] = {}
|
|
30
|
+
for p in config.list_projects():
|
|
31
|
+
try:
|
|
32
|
+
roots[p.name] = Path(p.path).expanduser().resolve()
|
|
33
|
+
except Exception:
|
|
34
|
+
pass
|
|
35
|
+
return roots
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _short_path(project_name: str, full_path: str, roots: dict[str, Path]) -> str:
|
|
39
|
+
"""Return shortest useful path for display: relative to project root if possible, else tail or name."""
|
|
40
|
+
root = roots.get(project_name)
|
|
41
|
+
if root:
|
|
42
|
+
try:
|
|
43
|
+
return str(Path(full_path).resolve().relative_to(root))
|
|
44
|
+
except Exception:
|
|
45
|
+
pass
|
|
46
|
+
# Fallback: last 1-2 path segments to keep output short (token friendly)
|
|
47
|
+
p = Path(full_path)
|
|
48
|
+
if len(p.parts) >= 3:
|
|
49
|
+
return "/".join(p.parts[-2:])
|
|
50
|
+
return p.name
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@app.command("version")
|
|
54
|
+
def version() -> None:
|
|
55
|
+
"""Show installed sampler version."""
|
|
56
|
+
console.print(f"sampler {__version__}")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@app.command("init")
|
|
60
|
+
def init() -> None:
|
|
61
|
+
"""Initialize sampler local data directory."""
|
|
62
|
+
config = ConfigManager()
|
|
63
|
+
config.load()
|
|
64
|
+
data_dir = Path.home() / ".sampler"
|
|
65
|
+
console.print(f"Initialized [bold]{data_dir}[/bold]")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@project_app.command("list")
|
|
69
|
+
def project_list() -> None:
|
|
70
|
+
"""List registered projects."""
|
|
71
|
+
config = ConfigManager()
|
|
72
|
+
projects = config.list_projects()
|
|
73
|
+
home = str(Path.home().resolve())
|
|
74
|
+
|
|
75
|
+
for project in projects:
|
|
76
|
+
try:
|
|
77
|
+
pp = Path(project.path).resolve()
|
|
78
|
+
ps = str(pp)
|
|
79
|
+
if ps.startswith(home):
|
|
80
|
+
disp = "~" + ps[len(home):]
|
|
81
|
+
else:
|
|
82
|
+
parts = pp.parts
|
|
83
|
+
disp = "/".join(parts[-2:]) if len(parts) > 2 else ps
|
|
84
|
+
except Exception:
|
|
85
|
+
disp = project.path
|
|
86
|
+
console.print(f"{project.name} {disp}")
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@project_app.command("add")
|
|
90
|
+
def project_add(name: str, path: str, language: str = "python") -> None:
|
|
91
|
+
"""Register project in global config."""
|
|
92
|
+
config = ConfigManager()
|
|
93
|
+
try:
|
|
94
|
+
project = config.add_project(name=name, path=path, language=language)
|
|
95
|
+
except ValueError as exc:
|
|
96
|
+
raise typer.BadParameter(str(exc)) from exc
|
|
97
|
+
console.print(f"Added project [bold]{project.name}[/bold]")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
@project_app.command("remove")
|
|
101
|
+
def project_remove(name: str) -> None:
|
|
102
|
+
"""Remove project from global config."""
|
|
103
|
+
config = ConfigManager()
|
|
104
|
+
try:
|
|
105
|
+
config.remove_project(name)
|
|
106
|
+
except ValueError as exc:
|
|
107
|
+
raise typer.BadParameter(str(exc)) from exc
|
|
108
|
+
console.print(f"Removed project [bold]{name}[/bold]")
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@app.command("search")
|
|
112
|
+
def search(
|
|
113
|
+
query: str,
|
|
114
|
+
project: str | None = typer.Option(None, "--project", "-p"),
|
|
115
|
+
type: str | None = typer.Option(None, "--type", "-t", help="filter e.g. function,class"),
|
|
116
|
+
limit: int = typer.Option(100, "--limit", "-l"),
|
|
117
|
+
) -> None:
|
|
118
|
+
"""Search symbols by name."""
|
|
119
|
+
engine = QueryEngine(db=_database())
|
|
120
|
+
types = [x.strip() for x in type.split(",")] if type else None
|
|
121
|
+
if types:
|
|
122
|
+
exp = set(types)
|
|
123
|
+
for t in list(types):
|
|
124
|
+
if t == "function": exp.add("async function")
|
|
125
|
+
elif t == "method": exp.add("async method")
|
|
126
|
+
types = list(exp)
|
|
127
|
+
rows = engine.search(query=query, project_name=project, types=types, limit=limit)
|
|
128
|
+
roots = _get_project_roots()
|
|
129
|
+
|
|
130
|
+
for r in rows:
|
|
131
|
+
shortf = _short_path(r["project_name"], r["file_path"], roots)
|
|
132
|
+
name = r["qualified_name"] or r["name"]
|
|
133
|
+
sig = r.get("signature") or ""
|
|
134
|
+
line = f"{r['project_name']}:{shortf}:{r['start_line'] or '-'} {r['type']} {name}"
|
|
135
|
+
if sig:
|
|
136
|
+
line += f" {sig}"
|
|
137
|
+
console.print(line)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
@app.command("search-all")
|
|
141
|
+
def search_all(
|
|
142
|
+
query: str,
|
|
143
|
+
type: str | None = typer.Option(None, "--type", "-t", help="filter e.g. function,class"),
|
|
144
|
+
limit: int = typer.Option(100, "--limit", "-l"),
|
|
145
|
+
) -> None:
|
|
146
|
+
"""Search symbols across ALL projects."""
|
|
147
|
+
search(query=query, project=None, type=type, limit=limit)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@app.command("index")
|
|
151
|
+
def index(project: str) -> None:
|
|
152
|
+
"""Index selected project."""
|
|
153
|
+
config = ConfigManager()
|
|
154
|
+
project_cfg = config.get_project(project)
|
|
155
|
+
if project_cfg is None:
|
|
156
|
+
raise typer.BadParameter(f"Project '{project}' not found. Use 'sampler project list'.")
|
|
157
|
+
|
|
158
|
+
builder = IndexBuilder(db=_database())
|
|
159
|
+
stats = builder.index_project(
|
|
160
|
+
project_name=project_cfg.name,
|
|
161
|
+
project_path=project_cfg.path,
|
|
162
|
+
language=project_cfg.language,
|
|
163
|
+
)
|
|
164
|
+
console.print(
|
|
165
|
+
"Indexed project "
|
|
166
|
+
f"[bold]{stats['project']}[/bold]: discovered={stats['discovered']} indexed={stats['indexed']} "
|
|
167
|
+
f"skipped={stats['skipped']} failed={stats['failed']}"
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
@app.command("overview")
|
|
172
|
+
def overview(filepath: str) -> None:
|
|
173
|
+
"""Show symbols for file."""
|
|
174
|
+
engine = QueryEngine(db=_database())
|
|
175
|
+
rows = engine.overview(filepath=filepath)
|
|
176
|
+
|
|
177
|
+
for r in rows:
|
|
178
|
+
name = r["qualified_name"] or r["name"]
|
|
179
|
+
sig = r.get("signature") or ""
|
|
180
|
+
line = f"{r['start_line'] or '-'}: {r['type']} {name}"
|
|
181
|
+
if sig:
|
|
182
|
+
line += f" {sig}"
|
|
183
|
+
console.print(line)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
if __name__ == "__main__":
|
|
187
|
+
app()
|
sampler/config.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import yaml
|
|
4
|
+
from pydantic import BaseModel, Field
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def default_data_dir() -> Path:
|
|
8
|
+
return Path.home() / ".sampler"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ProjectConfig(BaseModel):
|
|
12
|
+
name: str
|
|
13
|
+
path: str
|
|
14
|
+
language: str
|
|
15
|
+
enabled: bool = True
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class GlobalConfig(BaseModel):
|
|
19
|
+
version: int = 1
|
|
20
|
+
cache_dir: str = str(default_data_dir())
|
|
21
|
+
projects: dict[str, ProjectConfig] = Field(default_factory=dict)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ConfigManager:
|
|
25
|
+
def __init__(self, config_path: Path | None = None) -> None:
|
|
26
|
+
self.config_path = config_path or (default_data_dir() / "config.yaml")
|
|
27
|
+
|
|
28
|
+
def load(self) -> GlobalConfig:
|
|
29
|
+
if not self.config_path.exists():
|
|
30
|
+
config = GlobalConfig()
|
|
31
|
+
self.save(config)
|
|
32
|
+
return config
|
|
33
|
+
|
|
34
|
+
with self.config_path.open("r", encoding="utf-8") as f:
|
|
35
|
+
raw = yaml.safe_load(f) or {}
|
|
36
|
+
config = GlobalConfig.model_validate(raw)
|
|
37
|
+
|
|
38
|
+
if config.version != 1:
|
|
39
|
+
config.version = 1
|
|
40
|
+
self.save(config)
|
|
41
|
+
|
|
42
|
+
return config
|
|
43
|
+
|
|
44
|
+
def save(self, config: GlobalConfig) -> None:
|
|
45
|
+
self.config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
46
|
+
payload = config.model_dump(mode="python")
|
|
47
|
+
with self.config_path.open("w", encoding="utf-8") as f:
|
|
48
|
+
yaml.safe_dump(payload, f, sort_keys=False)
|
|
49
|
+
|
|
50
|
+
def add_project(self, name: str, path: str, language: str, enabled: bool = True) -> ProjectConfig:
|
|
51
|
+
config = self.load()
|
|
52
|
+
if name in config.projects:
|
|
53
|
+
raise ValueError(f"Project '{name}' already exists")
|
|
54
|
+
|
|
55
|
+
project_path = str(Path(path).expanduser().resolve())
|
|
56
|
+
if not Path(project_path).exists():
|
|
57
|
+
raise ValueError(f"Project path does not exist: {project_path}")
|
|
58
|
+
|
|
59
|
+
project = ProjectConfig(name=name, path=project_path, language=language, enabled=enabled)
|
|
60
|
+
config.projects[name] = project
|
|
61
|
+
self.save(config)
|
|
62
|
+
return project
|
|
63
|
+
|
|
64
|
+
def remove_project(self, name: str) -> None:
|
|
65
|
+
config = self.load()
|
|
66
|
+
if name not in config.projects:
|
|
67
|
+
raise ValueError(f"Project '{name}' does not exist")
|
|
68
|
+
del config.projects[name]
|
|
69
|
+
self.save(config)
|
|
70
|
+
|
|
71
|
+
def get_project(self, name: str) -> ProjectConfig | None:
|
|
72
|
+
config = self.load()
|
|
73
|
+
return config.projects.get(name)
|
|
74
|
+
|
|
75
|
+
def list_projects(self) -> list[ProjectConfig]:
|
|
76
|
+
config = self.load()
|
|
77
|
+
return list(config.projects.values())
|
sampler/db.py
ADDED
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
import sqlite3
|
|
2
|
+
import json
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Database:
|
|
7
|
+
def __init__(self, db_path: Path) -> None:
|
|
8
|
+
self.db_path = db_path
|
|
9
|
+
|
|
10
|
+
def connect(self) -> sqlite3.Connection:
|
|
11
|
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
12
|
+
conn = sqlite3.connect(self.db_path)
|
|
13
|
+
conn.row_factory = sqlite3.Row
|
|
14
|
+
return conn
|
|
15
|
+
|
|
16
|
+
def init_schema(self) -> None:
|
|
17
|
+
with self.connect() as conn:
|
|
18
|
+
conn.executescript(
|
|
19
|
+
"""
|
|
20
|
+
CREATE TABLE IF NOT EXISTS projects (
|
|
21
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
22
|
+
name TEXT UNIQUE NOT NULL,
|
|
23
|
+
path TEXT NOT NULL,
|
|
24
|
+
language TEXT,
|
|
25
|
+
indexed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
26
|
+
file_count INTEGER DEFAULT 0
|
|
27
|
+
);
|
|
28
|
+
|
|
29
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
30
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
31
|
+
project_id INTEGER REFERENCES projects(id),
|
|
32
|
+
path TEXT NOT NULL,
|
|
33
|
+
language TEXT,
|
|
34
|
+
hash TEXT,
|
|
35
|
+
last_indexed TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
36
|
+
UNIQUE(project_id, path)
|
|
37
|
+
);
|
|
38
|
+
|
|
39
|
+
CREATE TABLE IF NOT EXISTS symbols (
|
|
40
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
41
|
+
file_id INTEGER REFERENCES files(id),
|
|
42
|
+
type TEXT NOT NULL,
|
|
43
|
+
name TEXT NOT NULL,
|
|
44
|
+
qualified_name TEXT,
|
|
45
|
+
signature TEXT,
|
|
46
|
+
docstring TEXT,
|
|
47
|
+
start_line INTEGER,
|
|
48
|
+
end_line INTEGER,
|
|
49
|
+
metadata JSON
|
|
50
|
+
);
|
|
51
|
+
|
|
52
|
+
CREATE TABLE IF NOT EXISTS relationships (
|
|
53
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
54
|
+
source_id INTEGER REFERENCES symbols(id),
|
|
55
|
+
target_id INTEGER REFERENCES symbols(id),
|
|
56
|
+
type TEXT NOT NULL,
|
|
57
|
+
line INTEGER,
|
|
58
|
+
metadata JSON
|
|
59
|
+
);
|
|
60
|
+
|
|
61
|
+
CREATE TABLE IF NOT EXISTS project_dependencies (
|
|
62
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
63
|
+
source_project_id INTEGER REFERENCES projects(id),
|
|
64
|
+
target_project_id INTEGER REFERENCES projects(id),
|
|
65
|
+
type TEXT NOT NULL,
|
|
66
|
+
metadata JSON,
|
|
67
|
+
UNIQUE(source_project_id, target_project_id, type)
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_name ON symbols(name);
|
|
71
|
+
CREATE INDEX IF NOT EXISTS idx_symbols_qualified ON symbols(qualified_name);
|
|
72
|
+
CREATE INDEX IF NOT EXISTS idx_relations_source ON relationships(source_id);
|
|
73
|
+
CREATE INDEX IF NOT EXISTS idx_relations_target ON relationships(target_id);
|
|
74
|
+
CREATE INDEX IF NOT EXISTS idx_files_project ON files(project_id);
|
|
75
|
+
"""
|
|
76
|
+
)
|
|
77
|
+
conn.commit()
|
|
78
|
+
|
|
79
|
+
def add_project(self, name: str, path: str, language: str) -> int:
|
|
80
|
+
with self.connect() as conn:
|
|
81
|
+
cur = conn.execute(
|
|
82
|
+
"""
|
|
83
|
+
INSERT INTO projects(name, path, language)
|
|
84
|
+
VALUES (?, ?, ?)
|
|
85
|
+
ON CONFLICT(name) DO UPDATE SET
|
|
86
|
+
path=excluded.path,
|
|
87
|
+
language=excluded.language,
|
|
88
|
+
indexed_at=CURRENT_TIMESTAMP
|
|
89
|
+
""",
|
|
90
|
+
(name, path, language),
|
|
91
|
+
)
|
|
92
|
+
conn.commit()
|
|
93
|
+
|
|
94
|
+
if cur.lastrowid:
|
|
95
|
+
return int(cur.lastrowid)
|
|
96
|
+
|
|
97
|
+
row = conn.execute("SELECT id FROM projects WHERE name = ?", (name,)).fetchone()
|
|
98
|
+
if row is None:
|
|
99
|
+
raise RuntimeError("Failed to upsert project")
|
|
100
|
+
return int(row["id"])
|
|
101
|
+
|
|
102
|
+
def list_projects(self) -> list[sqlite3.Row]:
|
|
103
|
+
with self.connect() as conn:
|
|
104
|
+
rows = conn.execute(
|
|
105
|
+
"""
|
|
106
|
+
SELECT id, name, path, language, indexed_at, file_count
|
|
107
|
+
FROM projects
|
|
108
|
+
ORDER BY name
|
|
109
|
+
"""
|
|
110
|
+
).fetchall()
|
|
111
|
+
return rows
|
|
112
|
+
|
|
113
|
+
def get_project(self, name: str) -> sqlite3.Row | None:
|
|
114
|
+
with self.connect() as conn:
|
|
115
|
+
return conn.execute(
|
|
116
|
+
"SELECT id, name, path, language, indexed_at, file_count FROM projects WHERE name = ?",
|
|
117
|
+
(name,),
|
|
118
|
+
).fetchone()
|
|
119
|
+
|
|
120
|
+
def remove_project(self, name: str) -> None:
|
|
121
|
+
with self.connect() as conn:
|
|
122
|
+
row = conn.execute("SELECT id FROM projects WHERE name = ?", (name,)).fetchone()
|
|
123
|
+
if row is None:
|
|
124
|
+
return
|
|
125
|
+
project_id = int(row["id"])
|
|
126
|
+
conn.execute(
|
|
127
|
+
"DELETE FROM relationships WHERE source_id IN (SELECT id FROM symbols WHERE file_id IN (SELECT id FROM files WHERE project_id = ?))",
|
|
128
|
+
(project_id,),
|
|
129
|
+
)
|
|
130
|
+
conn.execute(
|
|
131
|
+
"DELETE FROM relationships WHERE target_id IN (SELECT id FROM symbols WHERE file_id IN (SELECT id FROM files WHERE project_id = ?))",
|
|
132
|
+
(project_id,),
|
|
133
|
+
)
|
|
134
|
+
conn.execute(
|
|
135
|
+
"DELETE FROM symbols WHERE file_id IN (SELECT id FROM files WHERE project_id = ?)",
|
|
136
|
+
(project_id,),
|
|
137
|
+
)
|
|
138
|
+
conn.execute("DELETE FROM files WHERE project_id = ?", (project_id,))
|
|
139
|
+
conn.execute("DELETE FROM project_dependencies WHERE source_project_id = ? OR target_project_id = ?", (project_id, project_id))
|
|
140
|
+
conn.execute("DELETE FROM projects WHERE id = ?", (project_id,))
|
|
141
|
+
conn.commit()
|
|
142
|
+
|
|
143
|
+
def get_file(self, project_id: int, path: str) -> sqlite3.Row | None:
|
|
144
|
+
with self.connect() as conn:
|
|
145
|
+
return conn.execute(
|
|
146
|
+
"SELECT id, project_id, path, language, hash, last_indexed FROM files WHERE project_id = ? AND path = ?",
|
|
147
|
+
(project_id, path),
|
|
148
|
+
).fetchone()
|
|
149
|
+
|
|
150
|
+
def upsert_file(self, project_id: int, path: str, language: str, file_hash: str) -> int:
|
|
151
|
+
with self.connect() as conn:
|
|
152
|
+
conn.execute(
|
|
153
|
+
"""
|
|
154
|
+
INSERT INTO files(project_id, path, language, hash)
|
|
155
|
+
VALUES (?, ?, ?, ?)
|
|
156
|
+
ON CONFLICT(project_id, path) DO UPDATE SET
|
|
157
|
+
language=excluded.language,
|
|
158
|
+
hash=excluded.hash,
|
|
159
|
+
last_indexed=CURRENT_TIMESTAMP
|
|
160
|
+
""",
|
|
161
|
+
(project_id, path, language, file_hash),
|
|
162
|
+
)
|
|
163
|
+
row = conn.execute(
|
|
164
|
+
"SELECT id FROM files WHERE project_id = ? AND path = ?",
|
|
165
|
+
(project_id, path),
|
|
166
|
+
).fetchone()
|
|
167
|
+
conn.commit()
|
|
168
|
+
if row is None:
|
|
169
|
+
raise RuntimeError("Failed to upsert file")
|
|
170
|
+
return int(row["id"])
|
|
171
|
+
|
|
172
|
+
def clear_file_data(self, file_id: int) -> None:
|
|
173
|
+
with self.connect() as conn:
|
|
174
|
+
conn.execute(
|
|
175
|
+
"DELETE FROM relationships WHERE source_id IN (SELECT id FROM symbols WHERE file_id = ?)",
|
|
176
|
+
(file_id,),
|
|
177
|
+
)
|
|
178
|
+
conn.execute(
|
|
179
|
+
"DELETE FROM relationships WHERE target_id IN (SELECT id FROM symbols WHERE file_id = ?)",
|
|
180
|
+
(file_id,),
|
|
181
|
+
)
|
|
182
|
+
conn.execute("DELETE FROM symbols WHERE file_id = ?", (file_id,))
|
|
183
|
+
conn.commit()
|
|
184
|
+
|
|
185
|
+
def insert_symbol(self, file_id: int, symbol: dict) -> int:
|
|
186
|
+
with self.connect() as conn:
|
|
187
|
+
metadata = symbol.get("metadata")
|
|
188
|
+
row = conn.execute(
|
|
189
|
+
"""
|
|
190
|
+
INSERT INTO symbols(
|
|
191
|
+
file_id, type, name, qualified_name, signature,
|
|
192
|
+
docstring, start_line, end_line, metadata
|
|
193
|
+
)
|
|
194
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
195
|
+
""",
|
|
196
|
+
(
|
|
197
|
+
file_id,
|
|
198
|
+
symbol.get("type"),
|
|
199
|
+
symbol.get("name"),
|
|
200
|
+
symbol.get("qualified_name"),
|
|
201
|
+
symbol.get("signature"),
|
|
202
|
+
symbol.get("docstring"),
|
|
203
|
+
symbol.get("start_line"),
|
|
204
|
+
symbol.get("end_line"),
|
|
205
|
+
json.dumps(metadata) if metadata is not None else None,
|
|
206
|
+
),
|
|
207
|
+
)
|
|
208
|
+
conn.commit()
|
|
209
|
+
return int(row.lastrowid)
|
|
210
|
+
|
|
211
|
+
def find_symbol_id_in_project(self, project_id: int, symbol_name: str) -> int | None:
|
|
212
|
+
with self.connect() as conn:
|
|
213
|
+
row = conn.execute(
|
|
214
|
+
"""
|
|
215
|
+
SELECT s.id
|
|
216
|
+
FROM symbols s
|
|
217
|
+
JOIN files f ON s.file_id = f.id
|
|
218
|
+
WHERE f.project_id = ?
|
|
219
|
+
AND (s.qualified_name = ? OR s.name = ?)
|
|
220
|
+
ORDER BY s.id ASC
|
|
221
|
+
LIMIT 1
|
|
222
|
+
""",
|
|
223
|
+
(project_id, symbol_name, symbol_name),
|
|
224
|
+
).fetchone()
|
|
225
|
+
return None if row is None else int(row["id"])
|
|
226
|
+
|
|
227
|
+
def insert_relationship(self, source_id: int, target_id: int, relation: dict) -> None:
|
|
228
|
+
with self.connect() as conn:
|
|
229
|
+
metadata = relation.get("metadata")
|
|
230
|
+
conn.execute(
|
|
231
|
+
"""
|
|
232
|
+
INSERT INTO relationships(source_id, target_id, type, line, metadata)
|
|
233
|
+
VALUES (?, ?, ?, ?, ?)
|
|
234
|
+
""",
|
|
235
|
+
(
|
|
236
|
+
source_id,
|
|
237
|
+
target_id,
|
|
238
|
+
relation.get("type"),
|
|
239
|
+
relation.get("line"),
|
|
240
|
+
json.dumps(metadata) if metadata is not None else None,
|
|
241
|
+
),
|
|
242
|
+
)
|
|
243
|
+
conn.commit()
|
|
244
|
+
|
|
245
|
+
def update_project_file_count(self, project_id: int) -> None:
|
|
246
|
+
with self.connect() as conn:
|
|
247
|
+
conn.execute(
|
|
248
|
+
"""
|
|
249
|
+
UPDATE projects
|
|
250
|
+
SET file_count = (SELECT COUNT(*) FROM files WHERE project_id = ?),
|
|
251
|
+
indexed_at = CURRENT_TIMESTAMP
|
|
252
|
+
WHERE id = ?
|
|
253
|
+
""",
|
|
254
|
+
(project_id, project_id),
|
|
255
|
+
)
|
|
256
|
+
conn.commit()
|
|
257
|
+
|
|
258
|
+
def search_symbols(self, query: str, project_name: str | None = None, types: list[str] | None = None, limit: int | None = None, offset: int = 0) -> list[sqlite3.Row]:
|
|
259
|
+
where = "WHERE (lower(s.name) LIKE lower(?) OR lower(COALESCE(s.qualified_name, '')) LIKE lower(?))"
|
|
260
|
+
params: list = [f"%{query}%", f"%{query}%"]
|
|
261
|
+
if project_name:
|
|
262
|
+
where += " AND p.name = ?"
|
|
263
|
+
params.append(project_name)
|
|
264
|
+
if types:
|
|
265
|
+
ph = ",".join("?" * len(types))
|
|
266
|
+
where += f" AND s.type IN ({ph})"
|
|
267
|
+
params.extend(types)
|
|
268
|
+
|
|
269
|
+
sql = f"""
|
|
270
|
+
SELECT
|
|
271
|
+
s.type,
|
|
272
|
+
s.name,
|
|
273
|
+
s.qualified_name,
|
|
274
|
+
s.signature,
|
|
275
|
+
s.start_line,
|
|
276
|
+
f.path AS file_path,
|
|
277
|
+
p.name AS project_name
|
|
278
|
+
FROM symbols s
|
|
279
|
+
JOIN files f ON s.file_id = f.id
|
|
280
|
+
JOIN projects p ON f.project_id = p.id
|
|
281
|
+
{where}
|
|
282
|
+
ORDER BY p.name, f.path, s.start_line
|
|
283
|
+
"""
|
|
284
|
+
if limit is not None:
|
|
285
|
+
sql += " LIMIT ? OFFSET ?"
|
|
286
|
+
params.extend([limit, offset])
|
|
287
|
+
|
|
288
|
+
with self.connect() as conn:
|
|
289
|
+
return conn.execute(sql, params).fetchall()
|
|
290
|
+
|
|
291
|
+
def get_symbols_by_filepath(self, filepath: str, project_name: str | None = None) -> list[sqlite3.Row]:
|
|
292
|
+
where = "WHERE f.path = ?"
|
|
293
|
+
params: list[str] = [filepath]
|
|
294
|
+
if project_name:
|
|
295
|
+
where += " AND p.name = ?"
|
|
296
|
+
params.append(project_name)
|
|
297
|
+
|
|
298
|
+
with self.connect() as conn:
|
|
299
|
+
return conn.execute(
|
|
300
|
+
f"""
|
|
301
|
+
SELECT
|
|
302
|
+
s.type,
|
|
303
|
+
s.name,
|
|
304
|
+
s.qualified_name,
|
|
305
|
+
s.signature,
|
|
306
|
+
s.start_line,
|
|
307
|
+
f.path AS file_path,
|
|
308
|
+
p.name AS project_name
|
|
309
|
+
FROM symbols s
|
|
310
|
+
JOIN files f ON s.file_id = f.id
|
|
311
|
+
JOIN projects p ON f.project_id = p.id
|
|
312
|
+
{where}
|
|
313
|
+
ORDER BY s.start_line
|
|
314
|
+
""",
|
|
315
|
+
params,
|
|
316
|
+
).fetchall()
|
|
File without changes
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from sampler.db import Database
|
|
7
|
+
from sampler.indexer.discover import discover_files
|
|
8
|
+
from sampler.indexer.parsers.go import GoParser
|
|
9
|
+
from sampler.indexer.parsers.python import PythonParser
|
|
10
|
+
from sampler.indexer.parsers.typescript import TypeScriptParser
|
|
11
|
+
from sampler.indexer.store import SymbolStore
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class IndexBuilder:
|
|
15
|
+
def __init__(self, db: Database) -> None:
|
|
16
|
+
self.db = db
|
|
17
|
+
self.store = SymbolStore(db)
|
|
18
|
+
self.parsers = {
|
|
19
|
+
"python": PythonParser(),
|
|
20
|
+
"go": GoParser(),
|
|
21
|
+
"typescript": TypeScriptParser(),
|
|
22
|
+
"javascript": TypeScriptParser(),
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
def index_project(self, project_name: str, project_path: str, language: str, force: bool = False) -> dict:
|
|
26
|
+
parser = self.parsers.get(language)
|
|
27
|
+
if parser is None:
|
|
28
|
+
raise ValueError(f"Unsupported language: {language}")
|
|
29
|
+
|
|
30
|
+
project_abs_path = str(Path(project_path).expanduser().resolve())
|
|
31
|
+
project_id = self.db.add_project(name=project_name, path=project_abs_path, language=language)
|
|
32
|
+
|
|
33
|
+
files = discover_files(project_path=project_abs_path, language=language)
|
|
34
|
+
indexed = 0
|
|
35
|
+
skipped = 0
|
|
36
|
+
failed = 0
|
|
37
|
+
|
|
38
|
+
for filepath in files:
|
|
39
|
+
try:
|
|
40
|
+
content = Path(filepath).read_text(encoding="utf-8")
|
|
41
|
+
except UnicodeDecodeError:
|
|
42
|
+
failed += 1
|
|
43
|
+
continue
|
|
44
|
+
|
|
45
|
+
file_hash = hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
46
|
+
previous = self.db.get_file(project_id=project_id, path=filepath)
|
|
47
|
+
if not force and previous is not None and previous["hash"] == file_hash:
|
|
48
|
+
skipped += 1
|
|
49
|
+
continue
|
|
50
|
+
|
|
51
|
+
symbols, relationships = parser.parse(content=content, filepath=filepath)
|
|
52
|
+
self.store.save_symbols(
|
|
53
|
+
project_id=project_id,
|
|
54
|
+
filepath=filepath,
|
|
55
|
+
language=language,
|
|
56
|
+
file_hash=file_hash,
|
|
57
|
+
symbols=symbols,
|
|
58
|
+
relationships=relationships,
|
|
59
|
+
)
|
|
60
|
+
indexed += 1
|
|
61
|
+
|
|
62
|
+
self.db.update_project_file_count(project_id)
|
|
63
|
+
return {
|
|
64
|
+
"project": project_name,
|
|
65
|
+
"language": language,
|
|
66
|
+
"discovered": len(files),
|
|
67
|
+
"indexed": indexed,
|
|
68
|
+
"skipped": skipped,
|
|
69
|
+
"failed": failed,
|
|
70
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from gitignore_parser import parse_gitignore
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
LANGUAGE_EXTENSIONS: dict[str, set[str]] = {
|
|
7
|
+
"python": {".py"},
|
|
8
|
+
"go": {".go"},
|
|
9
|
+
"typescript": {".ts", ".tsx", ".js", ".jsx"},
|
|
10
|
+
"javascript": {".js", ".jsx", ".mjs", ".cjs"},
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
DEFAULT_IGNORE_PARTS = {
|
|
14
|
+
".git",
|
|
15
|
+
"node_modules",
|
|
16
|
+
"venv",
|
|
17
|
+
".venv",
|
|
18
|
+
"__pycache__",
|
|
19
|
+
".pytest_cache",
|
|
20
|
+
".mypy_cache",
|
|
21
|
+
".ruff_cache",
|
|
22
|
+
"dist",
|
|
23
|
+
"build",
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def discover_files(project_path: str, language: str, ignore_patterns: list[str] | None = None) -> list[str]:
|
|
28
|
+
root = Path(project_path)
|
|
29
|
+
if not root.exists() or not root.is_dir():
|
|
30
|
+
return []
|
|
31
|
+
|
|
32
|
+
exts = LANGUAGE_EXTENSIONS.get(language.lower())
|
|
33
|
+
if exts is None:
|
|
34
|
+
return []
|
|
35
|
+
|
|
36
|
+
gitignore = root / ".gitignore"
|
|
37
|
+
gitignore_matcher = parse_gitignore(gitignore) if gitignore.exists() else None
|
|
38
|
+
|
|
39
|
+
discovered: list[str] = []
|
|
40
|
+
for file_path in root.rglob("*"):
|
|
41
|
+
if not file_path.is_file():
|
|
42
|
+
continue
|
|
43
|
+
if file_path.suffix.lower() not in exts:
|
|
44
|
+
continue
|
|
45
|
+
if any(part in DEFAULT_IGNORE_PARTS for part in file_path.parts):
|
|
46
|
+
continue
|
|
47
|
+
if ignore_patterns and any(pattern in str(file_path) for pattern in ignore_patterns):
|
|
48
|
+
continue
|
|
49
|
+
if gitignore_matcher and gitignore_matcher(str(file_path)):
|
|
50
|
+
continue
|
|
51
|
+
discovered.append(str(file_path.resolve()))
|
|
52
|
+
|
|
53
|
+
return sorted(discovered)
|
|
File without changes
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import ast
|
|
4
|
+
|
|
5
|
+
from sampler.indexer.parsers.base import BaseParser
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class PythonParser(BaseParser):
|
|
9
|
+
language = "python"
|
|
10
|
+
|
|
11
|
+
def parse(self, content: str, filepath: str) -> tuple[list[dict], list[dict]]:
|
|
12
|
+
symbols: list[dict] = []
|
|
13
|
+
relationships: list[dict] = []
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
module = ast.parse(content)
|
|
17
|
+
except SyntaxError:
|
|
18
|
+
# Syntax error: no symbols extracted for this file (indexing continues).
|
|
19
|
+
return symbols, relationships
|
|
20
|
+
|
|
21
|
+
for stmt in module.body:
|
|
22
|
+
if isinstance(stmt, ast.Assign):
|
|
23
|
+
for target in stmt.targets:
|
|
24
|
+
if isinstance(target, ast.Name):
|
|
25
|
+
symbols.append(
|
|
26
|
+
{
|
|
27
|
+
"type": "variable",
|
|
28
|
+
"name": target.id,
|
|
29
|
+
"qualified_name": target.id,
|
|
30
|
+
"signature": None,
|
|
31
|
+
"docstring": None,
|
|
32
|
+
"start_line": getattr(stmt, "lineno", 1),
|
|
33
|
+
"end_line": getattr(stmt, "end_lineno", getattr(stmt, "lineno", 1)),
|
|
34
|
+
"metadata": {"scope": "module"},
|
|
35
|
+
}
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
elif isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
39
|
+
self._append_function(stmt, symbols, relationships, class_name=None)
|
|
40
|
+
|
|
41
|
+
elif isinstance(stmt, ast.ClassDef):
|
|
42
|
+
decos = [ast.unparse(d) if not isinstance(d, ast.Name) else d.id for d in stmt.decorator_list] or None
|
|
43
|
+
symbols.append(
|
|
44
|
+
{
|
|
45
|
+
"type": "class",
|
|
46
|
+
"name": stmt.name,
|
|
47
|
+
"qualified_name": stmt.name,
|
|
48
|
+
"signature": f"class {stmt.name}",
|
|
49
|
+
"docstring": ast.get_docstring(stmt),
|
|
50
|
+
"start_line": getattr(stmt, "lineno", 1),
|
|
51
|
+
"end_line": getattr(stmt, "end_lineno", getattr(stmt, "lineno", 1)),
|
|
52
|
+
"metadata": {"decorators": decos} if decos else None,
|
|
53
|
+
}
|
|
54
|
+
)
|
|
55
|
+
for class_stmt in stmt.body:
|
|
56
|
+
if isinstance(class_stmt, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
57
|
+
self._append_function(class_stmt, symbols, relationships, class_name=stmt.name)
|
|
58
|
+
relationships.append(
|
|
59
|
+
{
|
|
60
|
+
"source": stmt.name,
|
|
61
|
+
"target": f"{stmt.name}.{class_stmt.name}",
|
|
62
|
+
"type": "CONTAINS",
|
|
63
|
+
"line": getattr(class_stmt, "lineno", 1),
|
|
64
|
+
"metadata": None,
|
|
65
|
+
}
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
return symbols, relationships
|
|
69
|
+
|
|
70
|
+
def _append_function(
|
|
71
|
+
self,
|
|
72
|
+
func: ast.FunctionDef | ast.AsyncFunctionDef,
|
|
73
|
+
symbols: list[dict],
|
|
74
|
+
relationships: list[dict],
|
|
75
|
+
class_name: str | None,
|
|
76
|
+
) -> None:
|
|
77
|
+
qualified = f"{class_name}.{func.name}" if class_name else func.name
|
|
78
|
+
signature = self._build_signature(func)
|
|
79
|
+
is_async = isinstance(func, ast.AsyncFunctionDef)
|
|
80
|
+
typ = ("async method" if class_name else "async function") if is_async else ("method" if class_name else "function")
|
|
81
|
+
decos = [ast.unparse(d) if not isinstance(d, ast.Name) else d.id for d in func.decorator_list] or None
|
|
82
|
+
meta = {"class": class_name} if class_name else {}
|
|
83
|
+
if decos:
|
|
84
|
+
meta["decorators"] = decos
|
|
85
|
+
symbols.append(
|
|
86
|
+
{
|
|
87
|
+
"type": typ,
|
|
88
|
+
"name": func.name,
|
|
89
|
+
"qualified_name": qualified,
|
|
90
|
+
"signature": signature,
|
|
91
|
+
"docstring": ast.get_docstring(func),
|
|
92
|
+
"start_line": getattr(func, "lineno", 1),
|
|
93
|
+
"end_line": getattr(func, "end_lineno", getattr(func, "lineno", 1)),
|
|
94
|
+
"metadata": meta or None,
|
|
95
|
+
}
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
for node in ast.walk(func):
|
|
99
|
+
if not isinstance(node, ast.Call):
|
|
100
|
+
continue
|
|
101
|
+
callee = self._call_name(node.func)
|
|
102
|
+
if not callee:
|
|
103
|
+
continue
|
|
104
|
+
relationships.append(
|
|
105
|
+
{
|
|
106
|
+
"source": qualified,
|
|
107
|
+
"target": callee,
|
|
108
|
+
"type": "CALLS",
|
|
109
|
+
"line": getattr(node, "lineno", getattr(func, "lineno", 1)),
|
|
110
|
+
"metadata": None,
|
|
111
|
+
}
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
def _build_signature(self, func: ast.FunctionDef | ast.AsyncFunctionDef) -> str:
|
|
115
|
+
prefix = "async def" if isinstance(func, ast.AsyncFunctionDef) else "def"
|
|
116
|
+
args = []
|
|
117
|
+
for arg in func.args.args:
|
|
118
|
+
a = arg.arg
|
|
119
|
+
if arg.annotation:
|
|
120
|
+
a += f": {ast.unparse(arg.annotation)}"
|
|
121
|
+
args.append(a)
|
|
122
|
+
sig = f"{prefix} {func.name}({', '.join(args)})"
|
|
123
|
+
if func.returns:
|
|
124
|
+
sig += f" -> {ast.unparse(func.returns)}"
|
|
125
|
+
return sig
|
|
126
|
+
|
|
127
|
+
def _call_name(self, func_expr: ast.expr) -> str | None:
|
|
128
|
+
if isinstance(func_expr, ast.Name):
|
|
129
|
+
return func_expr.id
|
|
130
|
+
if isinstance(func_expr, ast.Attribute):
|
|
131
|
+
chain: list[str] = []
|
|
132
|
+
current: ast.expr | None = func_expr
|
|
133
|
+
while isinstance(current, ast.Attribute):
|
|
134
|
+
chain.append(current.attr)
|
|
135
|
+
current = current.value
|
|
136
|
+
if isinstance(current, ast.Name):
|
|
137
|
+
chain.append(current.id)
|
|
138
|
+
return ".".join(reversed(chain)) if chain else None
|
|
139
|
+
return None
|
sampler/indexer/store.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
from sampler.db import Database
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class SymbolStore:
|
|
5
|
+
def __init__(self, db: Database) -> None:
|
|
6
|
+
self.db = db
|
|
7
|
+
|
|
8
|
+
def save_symbols(
|
|
9
|
+
self,
|
|
10
|
+
project_id: int,
|
|
11
|
+
filepath: str,
|
|
12
|
+
language: str,
|
|
13
|
+
file_hash: str,
|
|
14
|
+
symbols: list[dict],
|
|
15
|
+
relationships: list[dict],
|
|
16
|
+
) -> None:
|
|
17
|
+
file_id = self.db.upsert_file(project_id=project_id, path=filepath, language=language, file_hash=file_hash)
|
|
18
|
+
self.db.clear_file_data(file_id)
|
|
19
|
+
|
|
20
|
+
symbol_id_map: dict[str, int] = {}
|
|
21
|
+
for symbol in symbols:
|
|
22
|
+
inserted_id = self.db.insert_symbol(file_id=file_id, symbol=symbol)
|
|
23
|
+
qualified = symbol.get("qualified_name") or symbol.get("name")
|
|
24
|
+
if qualified:
|
|
25
|
+
symbol_id_map[qualified] = inserted_id
|
|
26
|
+
name = symbol.get("name")
|
|
27
|
+
if name and name not in symbol_id_map:
|
|
28
|
+
symbol_id_map[name] = inserted_id
|
|
29
|
+
|
|
30
|
+
for relation in relationships:
|
|
31
|
+
source_key = relation.get("source")
|
|
32
|
+
target_key = relation.get("target")
|
|
33
|
+
if not source_key or not target_key:
|
|
34
|
+
continue
|
|
35
|
+
|
|
36
|
+
source_id = symbol_id_map.get(source_key)
|
|
37
|
+
if source_id is None:
|
|
38
|
+
source_id = self.db.find_symbol_id_in_project(project_id=project_id, symbol_name=source_key)
|
|
39
|
+
|
|
40
|
+
target_id = symbol_id_map.get(target_key)
|
|
41
|
+
if target_id is None:
|
|
42
|
+
target_id = self.db.find_symbol_id_in_project(project_id=project_id, symbol_name=target_key)
|
|
43
|
+
|
|
44
|
+
if source_id is None or target_id is None:
|
|
45
|
+
continue
|
|
46
|
+
|
|
47
|
+
self.db.insert_relationship(source_id=source_id, target_id=target_id, relation=relation)
|
sampler/mcp/__init__.py
ADDED
|
File without changes
|
sampler/mcp/server.py
ADDED
sampler/models.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@dataclass
|
|
6
|
+
class Project:
|
|
7
|
+
id: int | None
|
|
8
|
+
name: str
|
|
9
|
+
path: str
|
|
10
|
+
language: str
|
|
11
|
+
indexed_at: datetime | None = None
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class Symbol:
|
|
16
|
+
id: int | None
|
|
17
|
+
file_id: int
|
|
18
|
+
type: str
|
|
19
|
+
name: str
|
|
20
|
+
qualified_name: str | None = None
|
|
21
|
+
signature: str | None = None
|
|
22
|
+
docstring: str | None = None
|
|
23
|
+
start_line: int = 0
|
|
24
|
+
end_line: int = 0
|
|
25
|
+
metadata: dict | None = None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass
|
|
29
|
+
class Relationship:
|
|
30
|
+
id: int | None
|
|
31
|
+
source_id: int
|
|
32
|
+
target_id: int
|
|
33
|
+
type: str
|
|
34
|
+
line: int | None = None
|
|
35
|
+
metadata: dict | None = None
|
|
File without changes
|
sampler/query/engine.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from sampler.db import Database
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class QueryEngine:
|
|
7
|
+
def __init__(self, db: Database) -> None:
|
|
8
|
+
self.db = db
|
|
9
|
+
|
|
10
|
+
def search(self, query: str, project_name: str | None = None, types: list[str] | None = None, limit: int | None = None, offset: int = 0) -> list[dict]:
|
|
11
|
+
rows = self.db.search_symbols(query=query, project_name=project_name, types=types, limit=limit, offset=offset)
|
|
12
|
+
return [dict(row) for row in rows]
|
|
13
|
+
|
|
14
|
+
def overview(self, filepath: str, project_name: str | None = None) -> list[dict]:
|
|
15
|
+
rows = self.db.get_symbols_by_filepath(filepath=filepath, project_name=project_name)
|
|
16
|
+
return [dict(row) for row in rows]
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sampler-cli
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Token-efficient CLI for indexing and searching code symbols (Python-first, designed for minimal LLM/agent context size)
|
|
5
|
+
Author: Samuel Ignacio Carmona Rodriguez
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/sicr0/sampler-cli
|
|
8
|
+
Project-URL: Repository, https://github.com/sicr0/sampler-cli
|
|
9
|
+
Project-URL: Issues, https://github.com/sicr0/sampler-cli/issues
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
16
|
+
Classifier: Topic :: Software Development :: Code Generators
|
|
17
|
+
Requires-Python: >=3.11
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: typer>=0.12.0
|
|
21
|
+
Requires-Dist: rich>=13.7.0
|
|
22
|
+
Requires-Dist: tree-sitter>=0.21.0
|
|
23
|
+
Requires-Dist: tree-sitter-python>=0.23.0
|
|
24
|
+
Requires-Dist: gitignore-parser>=0.1.11
|
|
25
|
+
Requires-Dist: pydantic>=2.6.0
|
|
26
|
+
Requires-Dist: pyyaml>=6.0.0
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest>=7.4.0; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
30
|
+
Requires-Dist: ruff>=0.5.0; extra == "dev"
|
|
31
|
+
Requires-Dist: mypy>=1.7.0; extra == "dev"
|
|
32
|
+
Provides-Extra: mcp
|
|
33
|
+
Requires-Dist: fastmcp>=0.1.0; extra == "mcp"
|
|
34
|
+
Provides-Extra: semantic
|
|
35
|
+
Requires-Dist: sentence-transformers>=2.2.0; extra == "semantic"
|
|
36
|
+
Dynamic: license-file
|
|
37
|
+
|
|
38
|
+
# Sampler
|
|
39
|
+
|
|
40
|
+
CLI indexer para navegar símbolos y relaciones en codebases multiproyecto.
|
|
41
|
+
|
|
42
|
+
Versión actual: 0.1.2
|
|
43
|
+
|
|
44
|
+
## Requisitos
|
|
45
|
+
|
|
46
|
+
- Python 3.11+
|
|
47
|
+
- `uv` (recomendado)
|
|
48
|
+
- Go (instalado para soporte parser Fase 1)
|
|
49
|
+
|
|
50
|
+
## Instalación de Go (macOS)
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
brew install go
|
|
54
|
+
go version
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Instalación
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install sampler-cli
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
Para desarrollo (incluye tests, linters):
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install -e '.[dev]'
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Uso rápido
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install sampler-cli
|
|
73
|
+
sampler init
|
|
74
|
+
sampler project add myproj /absolute/path --language python
|
|
75
|
+
sampler project list
|
|
76
|
+
sampler index myproj
|
|
77
|
+
sampler search add --project myproj
|
|
78
|
+
sampler overview /absolute/path/file.py
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
**Demo / LLM use (token-efficient by design):**
|
|
82
|
+
- Default outputs are compact single-line (no tables, short paths, no noise).
|
|
83
|
+
- Ideal for pasting into agents/LLMs with minimal context size.
|
|
84
|
+
- Example: `sampler search worker --project myproj` → `myproj:src/tasks.py:42 function process def process()`
|
|
85
|
+
|
|
86
|
+
## Estado actual
|
|
87
|
+
|
|
88
|
+
Implementado:
|
|
89
|
+
|
|
90
|
+
- Bootstrap inicial de Fase 0
|
|
91
|
+
- Configuración global con archivo `~/.sampler/config.yaml`
|
|
92
|
+
- CRUD de proyectos en config (`add`, `list`, `remove`)
|
|
93
|
+
- Esquema SQLite core + queries de index/search en `src/sampler/db.py`
|
|
94
|
+
- Discovery de archivos por lenguaje con soporte `.gitignore`
|
|
95
|
+
- Parser Python estable basado en AST
|
|
96
|
+
- Indexer real (hash incremental + persistencia)
|
|
97
|
+
- Query engine real (`search`, `overview`)
|
|
98
|
+
- CI básico con GitHub Actions (`pytest -q`)
|
|
99
|
+
- Tests: smoke, config, db, cli, discovery, python_parser, index_query
|
|
100
|
+
|
|
101
|
+
Nota de estabilidad:
|
|
102
|
+
|
|
103
|
+
- Se desactivó uso runtime de tree-sitter en parser Python por crash nativo (`BUS/SEGV`) en indexación real.
|
|
104
|
+
- Se mantiene estrategia AST para estabilidad en producción local.
|
|
105
|
+
|
|
106
|
+
Pendiente inmediato:
|
|
107
|
+
|
|
108
|
+
- Filtros y paginación en búsqueda
|
|
109
|
+
- Comandos `callers`, `usages`, `related`
|
|
110
|
+
- Parsers Go y TypeScript/JavaScript
|
|
111
|
+
|
|
112
|
+
## Estructura clave
|
|
113
|
+
|
|
114
|
+
```text
|
|
115
|
+
src/sampler/cli/main.py # comandos CLI
|
|
116
|
+
src/sampler/config.py # config global YAML
|
|
117
|
+
src/sampler/db.py # capa SQLite
|
|
118
|
+
src/sampler/indexer/builder.py # indexación de proyectos
|
|
119
|
+
src/sampler/indexer/store.py # persistencia de símbolos/relaciones
|
|
120
|
+
src/sampler/indexer/parsers/python.py # parser python estable
|
|
121
|
+
src/sampler/query/engine.py # search/overview
|
|
122
|
+
src/sampler/indexer/discover.py # discovery y filtros
|
|
123
|
+
tests/ # pruebas base
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## Ejecutar pruebas
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
pytest -q
|
|
130
|
+
```
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
sampler/__init__.py,sha256=QwLGZDZhVE7dBKbOwCLRt3tRMXTtg8fYYHHDXi3sZ1c,49
|
|
2
|
+
sampler/__main__.py,sha256=9_JuHaFFFklO51USpVH94P871geP25UsshaZE2fMjP8,72
|
|
3
|
+
sampler/config.py,sha256=BcSQtNSP1fVVDLObbrgdq9GCk8GfhDfVk4kvf1vnUfg,2431
|
|
4
|
+
sampler/db.py,sha256=iuNX5fgUkIB56ZDQWSxfkHpB0zP4e8woWc709HC7WRU,12421
|
|
5
|
+
sampler/models.py,sha256=aem6T8cyTjK0mUeXfjlSYi52sFKq6YzBtFQdRYJIri0,635
|
|
6
|
+
sampler/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
+
sampler/cli/main.py,sha256=8-zLT-xObraEYCDiQXIY_QDZZKzvW9f0Ooj6AE2n6SI,5873
|
|
8
|
+
sampler/indexer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
|
+
sampler/indexer/builder.py,sha256=fyaMT7iVnuIPm08r_EYclXeR7eIc4nz00phMA8Ru2n0,2439
|
|
10
|
+
sampler/indexer/discover.py,sha256=_Gwi4BprW6jvnf0ChqDM_9_uioMdkWtp-aLNGpHLhXI,1449
|
|
11
|
+
sampler/indexer/store.py,sha256=LnAALWacbvKV2S80Xbvu84rG1GAKACnVhZtdOJdyCbw,1727
|
|
12
|
+
sampler/indexer/parsers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
+
sampler/indexer/parsers/base.py,sha256=gdnUahZj_oSnn2QxhSVn_05UjQ7CNCtwi0Azjyn4NiM,217
|
|
14
|
+
sampler/indexer/parsers/go.py,sha256=lIa0SJ9L4pk5Ho8WE6MNHfWuo2JkORXPGsOM3ohCjc0,238
|
|
15
|
+
sampler/indexer/parsers/python.py,sha256=6pIK-WndxQcgdlb6pUf5mYPArLPi-LgXOfk-R4DnEoo,5775
|
|
16
|
+
sampler/indexer/parsers/typescript.py,sha256=QAQADo5UIHJ-VqKRMcAuUrxAIwCJay8T7bz6maOg5MA,254
|
|
17
|
+
sampler/mcp/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
+
sampler/mcp/server.py,sha256=uGSobXj8wQ0zNeWDCFHTgAgLC-J_xPMtMWf--ZrugpQ,88
|
|
19
|
+
sampler/query/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
|
+
sampler/query/engine.py,sha256=t5_5lOsxnjvPCk4AqOuM67RBkpvm-EmGU44bpK3ayIE,698
|
|
21
|
+
sampler/query/semantic.py,sha256=iOIumz6horMJMWIi45gEQMdD35EEXdj4vKjP0Mg1fTw,156
|
|
22
|
+
sampler_cli-0.2.0.dist-info/licenses/LICENSE,sha256=0HU6UzeTcc_a14ihCQwRzQXvlOtUZLWHyk4ZxSWiLFY,1089
|
|
23
|
+
sampler_cli-0.2.0.dist-info/METADATA,sha256=UUC5F6gSO8_Hyw6P7U8vnj_gKj4EggYfC8eSZrJupmw,3932
|
|
24
|
+
sampler_cli-0.2.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
25
|
+
sampler_cli-0.2.0.dist-info/entry_points.txt,sha256=HYeHAaX1KKmdPjpLFjULGqWpBnUCOVDrEM8EJbNe-Oc,49
|
|
26
|
+
sampler_cli-0.2.0.dist-info/top_level.txt,sha256=s2IbPtyHmWaMta20F-P53OMgvEDarZO8uSzVN3uchgc,8
|
|
27
|
+
sampler_cli-0.2.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Samuel Ignacio Carmona Rodriguez
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
sampler
|