sampler-cli 0.4.3__tar.gz → 0.4.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sampler_cli-0.4.5/PKG-INFO +234 -0
- sampler_cli-0.4.5/README.md +184 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/pyproject.toml +4 -4
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/__init__.py +1 -1
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/cli/main.py +19 -3
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/db.py +18 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/indexer/builder.py +2 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/indexer/discover.py +5 -2
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/indexer/imports.py +1 -1
- sampler_cli-0.4.5/src/sampler/indexer/parsers/vue.py +73 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/query/engine.py +3 -0
- sampler_cli-0.4.5/src/sampler_cli.egg-info/PKG-INFO +234 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler_cli.egg-info/SOURCES.txt +3 -1
- sampler_cli-0.4.5/tests/test_db.py +46 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_discover.py +4 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_stale_code.py +2 -0
- sampler_cli-0.4.5/tests/test_vue_parser.py +91 -0
- sampler_cli-0.4.3/PKG-INFO +0 -209
- sampler_cli-0.4.3/README.md +0 -159
- sampler_cli-0.4.3/src/sampler_cli.egg-info/PKG-INFO +0 -209
- sampler_cli-0.4.3/tests/test_db.py +0 -22
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/LICENSE +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/setup.cfg +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/__main__.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/cli/__init__.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/cli/render.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/config.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/embeddings.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/indexer/__init__.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/indexer/embedder.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/indexer/parsers/__init__.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/indexer/parsers/base.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/indexer/parsers/go.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/indexer/parsers/python.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/indexer/parsers/typescript.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/indexer/store.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/mcp/__init__.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/mcp/server.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/models.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/query/__init__.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/query/semantic.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/viz/__init__.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/viz/bus.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/viz/canvas.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/viz/discover_emit.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/viz/engine.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/viz/events.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/viz/headline.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/viz/layout_algo.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/viz/live.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler/viz/pipeline.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler_cli.egg-info/dependency_links.txt +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler_cli.egg-info/entry_points.txt +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler_cli.egg-info/requires.txt +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/src/sampler_cli.egg-info/top_level.txt +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_canvas_graph.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_cli.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_config.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_embeddings.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_events.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_go_parser.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_headline.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_imports.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_index_query.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_python_parser.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_relationships.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_render_bars.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_semantic.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_smoke.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_typescript_parser.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_viz_engine.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_viz_layout.py +0 -0
- {sampler_cli-0.4.3 → sampler_cli-0.4.5}/tests/test_viz_pipeline.py +0 -0
|
@@ -0,0 +1,234 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sampler-cli
|
|
3
|
+
Version: 0.4.5
|
|
4
|
+
Summary: Token-efficient CLI for indexing and searching code symbols (Python-first, designed for minimal LLM/agent context size)
|
|
5
|
+
Author: Samuel Ignacio Carmona Rodriguez
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/SamuelCarmona83/sampler-cli
|
|
8
|
+
Project-URL: Repository, https://github.com/SamuelCarmona83/sampler-cli
|
|
9
|
+
Project-URL: Issues, https://github.com/SamuelCarmona83/sampler-cli/issues
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
16
|
+
Classifier: Topic :: Software Development :: Code Generators
|
|
17
|
+
Requires-Python: >=3.11
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: typer>=0.12.0
|
|
21
|
+
Requires-Dist: rich>=13.7.0
|
|
22
|
+
Requires-Dist: tree-sitter>=0.21.0
|
|
23
|
+
Requires-Dist: tree-sitter-python>=0.23.0
|
|
24
|
+
Requires-Dist: tree-sitter-go>=0.23.0
|
|
25
|
+
Requires-Dist: tree-sitter-typescript>=0.23.0
|
|
26
|
+
Requires-Dist: gitignore-parser>=0.1.11
|
|
27
|
+
Requires-Dist: pydantic>=2.6.0
|
|
28
|
+
Requires-Dist: pyyaml>=6.0.0
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
Requires-Dist: pytest>=7.4.0; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
32
|
+
Requires-Dist: ruff>=0.5.0; extra == "dev"
|
|
33
|
+
Requires-Dist: mypy>=1.7.0; extra == "dev"
|
|
34
|
+
Requires-Dist: numpy>=1.26.0; extra == "dev"
|
|
35
|
+
Requires-Dist: scikit-learn>=1.4.0; extra == "dev"
|
|
36
|
+
Requires-Dist: fastembed>=0.2.0; extra == "dev"
|
|
37
|
+
Provides-Extra: mcp
|
|
38
|
+
Requires-Dist: fastmcp>=0.1.0; extra == "mcp"
|
|
39
|
+
Provides-Extra: semantic
|
|
40
|
+
Requires-Dist: numpy>=1.26.0; extra == "semantic"
|
|
41
|
+
Requires-Dist: scikit-learn>=1.4.0; extra == "semantic"
|
|
42
|
+
Provides-Extra: embeddings
|
|
43
|
+
Requires-Dist: fastembed>=0.2.0; extra == "embeddings"
|
|
44
|
+
Requires-Dist: numpy>=1.26.0; extra == "embeddings"
|
|
45
|
+
Provides-Extra: ollama-embeddings
|
|
46
|
+
Requires-Dist: ollama>=0.3.0; extra == "ollama-embeddings"
|
|
47
|
+
Provides-Extra: openai-embeddings
|
|
48
|
+
Requires-Dist: openai>=1.0.0; extra == "openai-embeddings"
|
|
49
|
+
Dynamic: license-file
|
|
50
|
+
|
|
51
|
+
<p align="center">
|
|
52
|
+
<img src="./assets/sampler.png" alt="Sampler logo" width="220">
|
|
53
|
+
</p>
|
|
54
|
+
|
|
55
|
+
<h1 align="center">Sampler</h1>
|
|
56
|
+
|
|
57
|
+
<p align="center">
|
|
58
|
+
<strong>Token-efficient CLI for indexing and searching code symbols across projects.</strong><br>
|
|
59
|
+
Compact output. Short paths. Low-noise symbol views.
|
|
60
|
+
</p>
|
|
61
|
+
|
|
62
|
+
<p align="center">
|
|
63
|
+
<a href="https://pypi.org/project/sampler-cli/"><img src="https://img.shields.io/pypi/v/sampler-cli" alt="PyPI version"></a>
|
|
64
|
+
<img src="https://img.shields.io/badge/python-3.11%2B-blue" alt="Python 3.11+">
|
|
65
|
+
</p>
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
> *The code isn't the problem. The problem is the distance between you and the code.*
|
|
70
|
+
>
|
|
71
|
+
> Sampler closes that distance. One index. One query. The right symbol, the right relationship, the right context — delivered without the noise. Because in a world drowning in repositories, the person who finds what matters first is the person who moves the work forward.
|
|
72
|
+
|
|
73
|
+
## Installation
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
pip install sampler-cli
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Development:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
pip install -e '.[dev]'
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Semantic stack (TF-IDF + local hash fallback):
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
pip install -e '.[semantic]'
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
## Quick Start
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
sampler init
|
|
95
|
+
sampler project add myproj /absolute/path/to/project --language auto
|
|
96
|
+
sampler index myproj
|
|
97
|
+
sampler search retry --project myproj
|
|
98
|
+
sampler symbols myproj
|
|
99
|
+
sampler overview src/main.py
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Commands
|
|
103
|
+
|
|
104
|
+
### Core
|
|
105
|
+
|
|
106
|
+
| Command | Description |
|
|
107
|
+
| --- | --- |
|
|
108
|
+
| `sampler version [--plain]` | Show version |
|
|
109
|
+
| `sampler init` | Initialize Sampler config and data directory |
|
|
110
|
+
| `sampler index <project>` | Index a project's symbols and relationships |
|
|
111
|
+
| `sampler search <query>` | Search symbols across a project |
|
|
112
|
+
| `sampler search-all <query>` | Search across all indexed projects |
|
|
113
|
+
| `sampler symbols <project>` | List symbols in a project |
|
|
114
|
+
| `sampler overview <filepath>` | Show symbol overview for a file |
|
|
115
|
+
|
|
116
|
+
**Search options:** `--project`, `--type`, `--limit`, `--semantic`, `--style plain|bars`
|
|
117
|
+
|
|
118
|
+
### Relationships
|
|
119
|
+
|
|
120
|
+
| Command | Description |
|
|
121
|
+
| --- | --- |
|
|
122
|
+
| `sampler callers <symbol>` | Find callers of a symbol |
|
|
123
|
+
| `sampler usages <symbol>` | Find usages of a symbol |
|
|
124
|
+
| `sampler related <symbol>` | Find related symbols |
|
|
125
|
+
|
|
126
|
+
Symbols can also be selected as `<path>:<symbol>`, e.g. `app/utils/helpers.py:format_kda`.
|
|
127
|
+
|
|
128
|
+
### Project Management
|
|
129
|
+
|
|
130
|
+
| Command | Description |
|
|
131
|
+
| --- | --- |
|
|
132
|
+
| `sampler project add <name> <path> --language <lang>` | Add a project |
|
|
133
|
+
| `sampler project update <name>` | Update project path or language |
|
|
134
|
+
| `sampler project list` | List projects |
|
|
135
|
+
| `sampler project deps <name>` | Show project dependencies |
|
|
136
|
+
| `sampler project remove <name>` | Remove a project |
|
|
137
|
+
|
|
138
|
+
Languages: `python`, `go`, `typescript`, `javascript`, `vue`, `auto`.
|
|
139
|
+
|
|
140
|
+
### Config & Analysis
|
|
141
|
+
|
|
142
|
+
| Command | Description |
|
|
143
|
+
| --- | --- |
|
|
144
|
+
| `sampler config show` | Show current config |
|
|
145
|
+
| `sampler config embeddings` | Configure embedding provider |
|
|
146
|
+
| `sampler embed <project>` | Precompute embeddings |
|
|
147
|
+
| `sampler stale-code <project>` | Find candidate stale code |
|
|
148
|
+
|
|
149
|
+
## Embeddings & Semantic Search
|
|
150
|
+
|
|
151
|
+
`sampler search --semantic` uses a pluggable adapter pattern:
|
|
152
|
+
|
|
153
|
+
- **Default:** `bge-small` (BAAI/bge-small-en-v1.5 via fastembed — local ONNX, ~384 dim)
|
|
154
|
+
- **Built-ins:** `hash` (deterministic fallback), `ollama`, `nomic`, `openai`, `fastembed`
|
|
155
|
+
- **Lexical primary:** TF-IDF (sklearn, on-the-fly, no pre-embedding required)
|
|
156
|
+
- **Final fallback:** hash fingerprint (always available)
|
|
157
|
+
|
|
158
|
+
Configure in `~/.sampler/config.yaml` or via `sampler config embeddings`:
|
|
159
|
+
|
|
160
|
+
```yaml
|
|
161
|
+
embeddings:
|
|
162
|
+
provider: "bge-small"
|
|
163
|
+
# provider: "ollama"
|
|
164
|
+
# model: "nomic-embed-text"
|
|
165
|
+
# base_url: "http://localhost:11434"
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
Install extras:
|
|
169
|
+
|
|
170
|
+
```bash
|
|
171
|
+
pip install 'sampler-cli[embeddings]' # BGE (recommended)
|
|
172
|
+
pip install 'sampler-cli[ollama-embeddings]'
|
|
173
|
+
pip install 'sampler-cli[openai-embeddings]'
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
Run `sampler embed <project>` to precompute vectors for the active provider. Change providers? Re-run `embed` after updating config.
|
|
177
|
+
|
|
178
|
+
Offline or air-gapped: set `provider: hash`, or rely on TF-IDF + hash with the `[semantic]` extra.
|
|
179
|
+
|
|
180
|
+
## Language Support
|
|
181
|
+
|
|
182
|
+
| Language | Parser |
|
|
183
|
+
| --- | --- |
|
|
184
|
+
| Python | stdlib AST |
|
|
185
|
+
| Go | tree-sitter-go |
|
|
186
|
+
| TypeScript / JavaScript | tree-sitter-typescript |
|
|
187
|
+
| Vue | Extracts `<script>` / `<script setup>`, delegates to TS/JS parser |
|
|
188
|
+
| Auto | Per-file detection for monorepos and multi-language projects |
|
|
189
|
+
|
|
190
|
+
## Stale Code Detection
|
|
191
|
+
|
|
192
|
+
`sampler stale-code <project>` finds functions that may no longer be needed:
|
|
193
|
+
|
|
194
|
+
- Called only from test files
|
|
195
|
+
- Zero non-test callers in the project call graph
|
|
196
|
+
- Defined in production code
|
|
197
|
+
|
|
198
|
+
Supported test patterns:
|
|
199
|
+
|
|
200
|
+
- Python: `tests/`, `test_*.py`, `*_test.py`
|
|
201
|
+
- Go: `*_test.go`
|
|
202
|
+
- TypeScript / JavaScript / Vue: `__tests__/`, `test/`, `spec/`, `*.test.*`, `*.spec.*`
|
|
203
|
+
|
|
204
|
+
This is a heuristic signal, not a guarantee of dead code.
|
|
205
|
+
|
|
206
|
+
## Examples
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
$ sampler search worker --project myproj
|
|
210
|
+
myproj:src/tasks/celery_app.py:70 function on_worker_ready def on_worker_ready(sender)
|
|
211
|
+
|
|
212
|
+
$ sampler related ConfigManager --project myproj --style bars
|
|
213
|
+
myproj:src/config.py:24-105 class ConfigManager [parent]
|
|
214
|
+
...
|
|
215
|
+
|
|
216
|
+
$ sampler stale-code myproj
|
|
217
|
+
myproj:src/utils/retry.py:12-28 function retry_request test_callers=2 non_test_callers=0 [tests.test_retry.test_retry_request]
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
## Data Location
|
|
221
|
+
|
|
222
|
+
- Config: `~/.sampler/config.yaml`
|
|
223
|
+
- Database: `~/.sampler/graph.db`
|
|
224
|
+
|
|
225
|
+
## Running Tests
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
pytest -q
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
## Notes
|
|
232
|
+
|
|
233
|
+
- Compact output is the default by design — built for agent workflows and fast human scanning.
|
|
234
|
+
- For roadmap details, see [TODO.md](TODO.md) and [PLAN.md](PLAN.md).
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="./assets/sampler.png" alt="Sampler logo" width="220">
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
<h1 align="center">Sampler</h1>
|
|
6
|
+
|
|
7
|
+
<p align="center">
|
|
8
|
+
<strong>Token-efficient CLI for indexing and searching code symbols across projects.</strong><br>
|
|
9
|
+
Compact output. Short paths. Low-noise symbol views.
|
|
10
|
+
</p>
|
|
11
|
+
|
|
12
|
+
<p align="center">
|
|
13
|
+
<a href="https://pypi.org/project/sampler-cli/"><img src="https://img.shields.io/pypi/v/sampler-cli" alt="PyPI version"></a>
|
|
14
|
+
<img src="https://img.shields.io/badge/python-3.11%2B-blue" alt="Python 3.11+">
|
|
15
|
+
</p>
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
> *The code isn't the problem. The problem is the distance between you and the code.*
|
|
20
|
+
>
|
|
21
|
+
> Sampler closes that distance. One index. One query. The right symbol, the right relationship, the right context — delivered without the noise. Because in a world drowning in repositories, the person who finds what matters first is the person who moves the work forward.
|
|
22
|
+
|
|
23
|
+
## Installation
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install sampler-cli
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Development:
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install -e '.[dev]'
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
Semantic stack (TF-IDF + local hash fallback):
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install -e '.[semantic]'
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Quick Start
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
sampler init
|
|
45
|
+
sampler project add myproj /absolute/path/to/project --language auto
|
|
46
|
+
sampler index myproj
|
|
47
|
+
sampler search retry --project myproj
|
|
48
|
+
sampler symbols myproj
|
|
49
|
+
sampler overview src/main.py
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
## Commands
|
|
53
|
+
|
|
54
|
+
### Core
|
|
55
|
+
|
|
56
|
+
| Command | Description |
|
|
57
|
+
| --- | --- |
|
|
58
|
+
| `sampler version [--plain]` | Show version |
|
|
59
|
+
| `sampler init` | Initialize Sampler config and data directory |
|
|
60
|
+
| `sampler index <project>` | Index a project's symbols and relationships |
|
|
61
|
+
| `sampler search <query>` | Search symbols across a project |
|
|
62
|
+
| `sampler search-all <query>` | Search across all indexed projects |
|
|
63
|
+
| `sampler symbols <project>` | List symbols in a project |
|
|
64
|
+
| `sampler overview <filepath>` | Show symbol overview for a file |
|
|
65
|
+
|
|
66
|
+
**Search options:** `--project`, `--type`, `--limit`, `--semantic`, `--style plain|bars`
|
|
67
|
+
|
|
68
|
+
### Relationships
|
|
69
|
+
|
|
70
|
+
| Command | Description |
|
|
71
|
+
| --- | --- |
|
|
72
|
+
| `sampler callers <symbol>` | Find callers of a symbol |
|
|
73
|
+
| `sampler usages <symbol>` | Find usages of a symbol |
|
|
74
|
+
| `sampler related <symbol>` | Find related symbols |
|
|
75
|
+
|
|
76
|
+
Symbols can also be selected as `<path>:<symbol>`, e.g. `app/utils/helpers.py:format_kda`.
|
|
77
|
+
|
|
78
|
+
### Project Management
|
|
79
|
+
|
|
80
|
+
| Command | Description |
|
|
81
|
+
| --- | --- |
|
|
82
|
+
| `sampler project add <name> <path> --language <lang>` | Add a project |
|
|
83
|
+
| `sampler project update <name>` | Update project path or language |
|
|
84
|
+
| `sampler project list` | List projects |
|
|
85
|
+
| `sampler project deps <name>` | Show project dependencies |
|
|
86
|
+
| `sampler project remove <name>` | Remove a project |
|
|
87
|
+
|
|
88
|
+
Languages: `python`, `go`, `typescript`, `javascript`, `vue`, `auto`.
|
|
89
|
+
|
|
90
|
+
### Config & Analysis
|
|
91
|
+
|
|
92
|
+
| Command | Description |
|
|
93
|
+
| --- | --- |
|
|
94
|
+
| `sampler config show` | Show current config |
|
|
95
|
+
| `sampler config embeddings` | Configure embedding provider |
|
|
96
|
+
| `sampler embed <project>` | Precompute embeddings |
|
|
97
|
+
| `sampler stale-code <project>` | Find candidate stale code |
|
|
98
|
+
|
|
99
|
+
## Embeddings & Semantic Search
|
|
100
|
+
|
|
101
|
+
`sampler search --semantic` uses a pluggable adapter pattern:
|
|
102
|
+
|
|
103
|
+
- **Default:** `bge-small` (BAAI/bge-small-en-v1.5 via fastembed — local ONNX, ~384 dim)
|
|
104
|
+
- **Built-ins:** `hash` (deterministic fallback), `ollama`, `nomic`, `openai`, `fastembed`
|
|
105
|
+
- **Lexical primary:** TF-IDF (sklearn, on-the-fly, no pre-embedding required)
|
|
106
|
+
- **Final fallback:** hash fingerprint (always available)
|
|
107
|
+
|
|
108
|
+
Configure in `~/.sampler/config.yaml` or via `sampler config embeddings`:
|
|
109
|
+
|
|
110
|
+
```yaml
|
|
111
|
+
embeddings:
|
|
112
|
+
provider: "bge-small"
|
|
113
|
+
# provider: "ollama"
|
|
114
|
+
# model: "nomic-embed-text"
|
|
115
|
+
# base_url: "http://localhost:11434"
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
Install extras:
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
pip install 'sampler-cli[embeddings]' # BGE (recommended)
|
|
122
|
+
pip install 'sampler-cli[ollama-embeddings]'
|
|
123
|
+
pip install 'sampler-cli[openai-embeddings]'
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
Run `sampler embed <project>` to precompute vectors for the active provider. Change providers? Re-run `embed` after updating config.
|
|
127
|
+
|
|
128
|
+
Offline or air-gapped: set `provider: hash`, or rely on TF-IDF + hash with the `[semantic]` extra.
|
|
129
|
+
|
|
130
|
+
## Language Support
|
|
131
|
+
|
|
132
|
+
| Language | Parser |
|
|
133
|
+
| --- | --- |
|
|
134
|
+
| Python | stdlib AST |
|
|
135
|
+
| Go | tree-sitter-go |
|
|
136
|
+
| TypeScript / JavaScript | tree-sitter-typescript |
|
|
137
|
+
| Vue | Extracts `<script>` / `<script setup>`, delegates to TS/JS parser |
|
|
138
|
+
| Auto | Per-file detection for monorepos and multi-language projects |
|
|
139
|
+
|
|
140
|
+
## Stale Code Detection
|
|
141
|
+
|
|
142
|
+
`sampler stale-code <project>` finds functions that may no longer be needed:
|
|
143
|
+
|
|
144
|
+
- Called only from test files
|
|
145
|
+
- Zero non-test callers in the project call graph
|
|
146
|
+
- Defined in production code
|
|
147
|
+
|
|
148
|
+
Supported test patterns:
|
|
149
|
+
|
|
150
|
+
- Python: `tests/`, `test_*.py`, `*_test.py`
|
|
151
|
+
- Go: `*_test.go`
|
|
152
|
+
- TypeScript / JavaScript / Vue: `__tests__/`, `test/`, `spec/`, `*.test.*`, `*.spec.*`
|
|
153
|
+
|
|
154
|
+
This is a heuristic signal, not a guarantee of dead code.
|
|
155
|
+
|
|
156
|
+
## Examples
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
$ sampler search worker --project myproj
|
|
160
|
+
myproj:src/tasks/celery_app.py:70 function on_worker_ready def on_worker_ready(sender)
|
|
161
|
+
|
|
162
|
+
$ sampler related ConfigManager --project myproj --style bars
|
|
163
|
+
myproj:src/config.py:24-105 class ConfigManager [parent]
|
|
164
|
+
...
|
|
165
|
+
|
|
166
|
+
$ sampler stale-code myproj
|
|
167
|
+
myproj:src/utils/retry.py:12-28 function retry_request test_callers=2 non_test_callers=0 [tests.test_retry.test_retry_request]
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Data Location
|
|
171
|
+
|
|
172
|
+
- Config: `~/.sampler/config.yaml`
|
|
173
|
+
- Database: `~/.sampler/graph.db`
|
|
174
|
+
|
|
175
|
+
## Running Tests
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
pytest -q
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## Notes
|
|
182
|
+
|
|
183
|
+
- Compact output is the default by design — built for agent workflows and fast human scanning.
|
|
184
|
+
- For roadmap details, see [TODO.md](TODO.md) and [PLAN.md](PLAN.md).
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "sampler-cli"
|
|
3
|
-
version = "0.4.
|
|
3
|
+
version = "0.4.5"
|
|
4
4
|
description = "Token-efficient CLI for indexing and searching code symbols (Python-first, designed for minimal LLM/agent context size)"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
license = { text = "MIT" }
|
|
@@ -69,6 +69,6 @@ pythonpath = ["src"]
|
|
|
69
69
|
testpaths = ["tests"]
|
|
70
70
|
|
|
71
71
|
[project.urls]
|
|
72
|
-
Homepage = "https://github.com/
|
|
73
|
-
Repository = "https://github.com/
|
|
74
|
-
Issues = "https://github.com/
|
|
72
|
+
Homepage = "https://github.com/SamuelCarmona83/sampler-cli"
|
|
73
|
+
Repository = "https://github.com/SamuelCarmona83/sampler-cli"
|
|
74
|
+
Issues = "https://github.com/SamuelCarmona83/sampler-cli/issues"
|
|
@@ -183,6 +183,8 @@ def project_list() -> None:
|
|
|
183
183
|
table.add_column("Language", style="green")
|
|
184
184
|
table.add_column("Enabled", justify="center")
|
|
185
185
|
|
|
186
|
+
db = _database()
|
|
187
|
+
|
|
186
188
|
for p in projects:
|
|
187
189
|
try:
|
|
188
190
|
pp = Path(p.path).resolve()
|
|
@@ -195,7 +197,21 @@ def project_list() -> None:
|
|
|
195
197
|
except Exception:
|
|
196
198
|
disp = p.path
|
|
197
199
|
enabled = "[green]yes[/green]" if p.enabled else "[dim]no[/dim]"
|
|
198
|
-
|
|
200
|
+
|
|
201
|
+
lang_display = p.language
|
|
202
|
+
if (p.language or "").lower() == "auto":
|
|
203
|
+
breakdown = db.get_project_language_breakdown(p.name)
|
|
204
|
+
total = sum(breakdown.values()) or 1
|
|
205
|
+
parts = []
|
|
206
|
+
for lang, cnt in sorted(breakdown.items(), key=lambda kv: -kv[1])[:4]: # top 4 for brevity
|
|
207
|
+
pct = int(round(cnt * 100 / total))
|
|
208
|
+
parts.append(f"{lang} {pct}%")
|
|
209
|
+
if parts:
|
|
210
|
+
lang_display = f"auto ({', '.join(parts)})"
|
|
211
|
+
else:
|
|
212
|
+
lang_display = "auto (no files yet)"
|
|
213
|
+
|
|
214
|
+
table.add_row(p.name, disp, lang_display, enabled)
|
|
199
215
|
|
|
200
216
|
console.print(table)
|
|
201
217
|
|
|
@@ -205,7 +221,7 @@ def project_add(
|
|
|
205
221
|
name: str,
|
|
206
222
|
path: str,
|
|
207
223
|
language: str = typer.Option(
|
|
208
|
-
"python", "--language", help="python, go, typescript, javascript, or 'auto' for monorepos"
|
|
224
|
+
"python", "--language", help="python, go, typescript, javascript, vue, or 'auto' for monorepos"
|
|
209
225
|
),
|
|
210
226
|
) -> None:
|
|
211
227
|
"""Register project in global config."""
|
|
@@ -232,7 +248,7 @@ def project_remove(name: str) -> None:
|
|
|
232
248
|
def project_update(
|
|
233
249
|
name: str,
|
|
234
250
|
path: str | None = typer.Option(None, "--path", help="New absolute path for the project"),
|
|
235
|
-
language: str | None = typer.Option(None, "--language", help="New language (
|
|
251
|
+
language: str | None = typer.Option(None, "--language", help="New language (python|go|typescript|javascript|vue|auto)"),
|
|
236
252
|
) -> None:
|
|
237
253
|
"""Update a registered project's path/language in place (no remove/add needed)."""
|
|
238
254
|
if path is None and language is None:
|
|
@@ -640,6 +640,24 @@ class Database:
|
|
|
640
640
|
"embeddings": int(row["embeddings"]),
|
|
641
641
|
}
|
|
642
642
|
|
|
643
|
+
def get_project_language_breakdown(self, project_name: str) -> dict[str, int]:
|
|
644
|
+
"""For auto projects (or any), return {language: file_count} from the files table.
|
|
645
|
+
|
|
646
|
+
Used to display per-language % in `project list` for language=auto projects.
|
|
647
|
+
Very cheap (single grouped query); data is already stored during index.
|
|
648
|
+
"""
|
|
649
|
+
sql = """
|
|
650
|
+
SELECT COALESCE(f.language, 'unknown') AS lang, COUNT(*) AS cnt
|
|
651
|
+
FROM files f
|
|
652
|
+
JOIN projects p ON f.project_id = p.id
|
|
653
|
+
WHERE p.name = ?
|
|
654
|
+
GROUP BY lang
|
|
655
|
+
ORDER BY cnt DESC
|
|
656
|
+
"""
|
|
657
|
+
with self.connect() as conn:
|
|
658
|
+
rows = conn.execute(sql, (project_name,)).fetchall()
|
|
659
|
+
return {row["lang"]: int(row["cnt"]) for row in rows}
|
|
660
|
+
|
|
643
661
|
def get_top_symbols_by_degree(self, project_name: str, limit: int = 80) -> list[sqlite3.Row]:
|
|
644
662
|
"""Top symbols by in+out relationship degree for graph preview."""
|
|
645
663
|
sql = """
|
|
@@ -12,6 +12,7 @@ from sampler.indexer.imports import extract_imports
|
|
|
12
12
|
from sampler.indexer.parsers.go import GoParser
|
|
13
13
|
from sampler.indexer.parsers.python import PythonParser
|
|
14
14
|
from sampler.indexer.parsers.typescript import TypeScriptParser
|
|
15
|
+
from sampler.indexer.parsers.vue import VueParser
|
|
15
16
|
from sampler.indexer.store import SymbolStore
|
|
16
17
|
from sampler.viz.discover_emit import emit_discover
|
|
17
18
|
from sampler.viz.events import FileParsing, LogLine, Stage, StageChanged
|
|
@@ -29,6 +30,7 @@ class IndexBuilder:
|
|
|
29
30
|
"go": GoParser(),
|
|
30
31
|
"typescript": TypeScriptParser(),
|
|
31
32
|
"javascript": TypeScriptParser(),
|
|
33
|
+
"vue": VueParser(),
|
|
32
34
|
}
|
|
33
35
|
|
|
34
36
|
def index_project(
|
|
@@ -8,6 +8,7 @@ LANGUAGE_EXTENSIONS: dict[str, set[str]] = {
|
|
|
8
8
|
"go": {".go"},
|
|
9
9
|
"typescript": {".ts", ".tsx", ".js", ".jsx"},
|
|
10
10
|
"javascript": {".js", ".jsx", ".mjs", ".cjs"},
|
|
11
|
+
"vue": {".vue"},
|
|
11
12
|
}
|
|
12
13
|
|
|
13
14
|
DEFAULT_IGNORE_PARTS = {
|
|
@@ -59,10 +60,11 @@ def _build_extension_language_map() -> dict[str, str]:
|
|
|
59
60
|
Iteration order determines the winner for extensions shared by multiple
|
|
60
61
|
languages (.js/.jsx are listed under both "typescript" and "javascript";
|
|
61
62
|
both route to the same real parser implementation, so the choice is
|
|
62
|
-
cosmetic but kept stable).
|
|
63
|
+
cosmetic but kept stable). .vue maps to dedicated "vue" (which delegates
|
|
64
|
+
to the TS/JS parser after <script> extraction).
|
|
63
65
|
"""
|
|
64
66
|
ext_to_lang: dict[str, str] = {}
|
|
65
|
-
for lang in ("python", "go", "typescript", "javascript"):
|
|
67
|
+
for lang in ("python", "go", "vue", "typescript", "javascript"):
|
|
66
68
|
for ext in LANGUAGE_EXTENSIONS[lang]:
|
|
67
69
|
ext_to_lang.setdefault(ext, lang)
|
|
68
70
|
return ext_to_lang
|
|
@@ -74,6 +76,7 @@ def discover_files_multi(
|
|
|
74
76
|
"""Discover files across ALL supported languages, returning (path, detected_language) pairs.
|
|
75
77
|
|
|
76
78
|
Used for monorepo/multi-language projects indexed with language="auto".
|
|
79
|
+
Vue SFCs are detected as language "vue".
|
|
77
80
|
"""
|
|
78
81
|
root = Path(project_path)
|
|
79
82
|
if not root.exists() or not root.is_dir():
|
|
@@ -27,7 +27,7 @@ def extract_imports(content: str, language: str) -> list[str]:
|
|
|
27
27
|
return _extract_python_imports(content)
|
|
28
28
|
if language == "go":
|
|
29
29
|
return _extract_go_imports(content)
|
|
30
|
-
if language in ("typescript", "javascript"):
|
|
30
|
+
if language in ("typescript", "javascript", "vue"):
|
|
31
31
|
return _extract_ts_imports(content)
|
|
32
32
|
return []
|
|
33
33
|
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
|
|
5
|
+
from sampler.indexer.parsers.base import BaseParser
|
|
6
|
+
from sampler.indexer.parsers.typescript import TypeScriptParser
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class VueParser(BaseParser):
|
|
10
|
+
"""Parser for Vue single-file components (.vue).
|
|
11
|
+
|
|
12
|
+
Extracts the <script> (or <script setup>) section using stdlib re (supports lang="ts|tsx|js|jsx|typescript",
|
|
13
|
+
setup attribute, various quoting). Delegates symbol/relationship extraction to the existing
|
|
14
|
+
TypeScriptParser (which covers JS/TS + arrows/classes/etc.) using a dummy filepath so that
|
|
15
|
+
the delegate's _select_language picks the right grammar (ts vs tsx).
|
|
16
|
+
|
|
17
|
+
Line numbers in results are offset so they are correct relative to the original .vue file.
|
|
18
|
+
Graceful empty return if no <script> section or other issues.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
language = "vue"
|
|
22
|
+
|
|
23
|
+
_SCRIPT_RE = re.compile(
|
|
24
|
+
r"(?is)<script([^>]*)>(.*?)</script>"
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
def parse(self, content: str, filepath: str) -> tuple[list[dict], list[dict]]:
|
|
28
|
+
symbols: list[dict] = []
|
|
29
|
+
relationships: list[dict] = []
|
|
30
|
+
|
|
31
|
+
if not filepath.lower().endswith(".vue"):
|
|
32
|
+
# Shouldn't normally happen, but delegate to TS parser for safety
|
|
33
|
+
return TypeScriptParser().parse(content, filepath)
|
|
34
|
+
|
|
35
|
+
extracted = self._extract_vue_script(content)
|
|
36
|
+
if extracted is None:
|
|
37
|
+
return symbols, relationships
|
|
38
|
+
|
|
39
|
+
script_text, line_offset, is_tsx = extracted
|
|
40
|
+
|
|
41
|
+
# Dummy filepath controls grammar selection inside the (unchanged) TS parser
|
|
42
|
+
dummy = "Comp.script.tsx" if is_tsx else "Comp.script.ts"
|
|
43
|
+
inner_symbols, inner_relationships = TypeScriptParser().parse(script_text, dummy)
|
|
44
|
+
|
|
45
|
+
# Offset lines so they refer to the original .vue file (script content lines are 0-based in tree)
|
|
46
|
+
for s in inner_symbols:
|
|
47
|
+
s["start_line"] += line_offset
|
|
48
|
+
s["end_line"] += line_offset
|
|
49
|
+
for r in inner_relationships:
|
|
50
|
+
if "line" in r and r["line"] is not None:
|
|
51
|
+
r["line"] += line_offset
|
|
52
|
+
|
|
53
|
+
return inner_symbols, inner_relationships
|
|
54
|
+
|
|
55
|
+
def _extract_vue_script(self, content: str) -> tuple[str, int, bool] | None:
|
|
56
|
+
"""Return (script_text, 0-based_line_offset_for_script_body, use_tsx) or None."""
|
|
57
|
+
m = self._SCRIPT_RE.search(content)
|
|
58
|
+
if not m:
|
|
59
|
+
return None
|
|
60
|
+
|
|
61
|
+
attrs = m.group(1) or ""
|
|
62
|
+
script = m.group(2)
|
|
63
|
+
|
|
64
|
+
# Compute offset: number of newlines before the start of the script body
|
|
65
|
+
prefix = content[: m.start(2)]
|
|
66
|
+
line_offset = prefix.count("\n")
|
|
67
|
+
|
|
68
|
+
# Detect if we should force TSX grammar inside delegate (rare for Vue but supported)
|
|
69
|
+
lang_match = re.search(r'lang\s*=\s*["\']?([^"\'\s>]+)', attrs, re.IGNORECASE)
|
|
70
|
+
lang_val = (lang_match.group(1) if lang_match else "").lower()
|
|
71
|
+
use_tsx = lang_val in ("tsx", "jsx")
|
|
72
|
+
|
|
73
|
+
return script, line_offset, use_tsx
|
|
@@ -128,6 +128,7 @@ class QueryEngine:
|
|
|
128
128
|
if name.endswith(("_test.py", "_test.go")):
|
|
129
129
|
return True
|
|
130
130
|
|
|
131
|
+
# Note: .vue test files (e.g. Foo.test.vue) are also supported for Vue projects
|
|
131
132
|
return name.endswith(
|
|
132
133
|
(
|
|
133
134
|
".test.ts",
|
|
@@ -136,12 +137,14 @@ class QueryEngine:
|
|
|
136
137
|
".test.jsx",
|
|
137
138
|
".test.mjs",
|
|
138
139
|
".test.cjs",
|
|
140
|
+
".test.vue",
|
|
139
141
|
".spec.ts",
|
|
140
142
|
".spec.tsx",
|
|
141
143
|
".spec.js",
|
|
142
144
|
".spec.jsx",
|
|
143
145
|
".spec.mjs",
|
|
144
146
|
".spec.cjs",
|
|
147
|
+
".spec.vue",
|
|
145
148
|
)
|
|
146
149
|
)
|
|
147
150
|
|