sampler-cli 0.2.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. sampler_cli-0.4.1/PKG-INFO +202 -0
  2. sampler_cli-0.4.1/README.md +152 -0
  3. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/pyproject.toml +12 -2
  4. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/src/sampler/__init__.py +1 -1
  5. sampler_cli-0.4.1/src/sampler/cli/main.py +707 -0
  6. sampler_cli-0.4.1/src/sampler/cli/render.py +170 -0
  7. sampler_cli-0.4.1/src/sampler/config.py +144 -0
  8. sampler_cli-0.4.1/src/sampler/db.py +646 -0
  9. sampler_cli-0.4.1/src/sampler/embeddings.py +133 -0
  10. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/src/sampler/indexer/builder.py +41 -7
  11. sampler_cli-0.4.1/src/sampler/indexer/discover.py +102 -0
  12. sampler_cli-0.4.1/src/sampler/indexer/embedder.py +241 -0
  13. sampler_cli-0.4.1/src/sampler/indexer/imports.py +59 -0
  14. sampler_cli-0.4.1/src/sampler/indexer/parsers/go.py +196 -0
  15. sampler_cli-0.4.1/src/sampler/indexer/parsers/typescript.py +257 -0
  16. sampler_cli-0.4.1/src/sampler/indexer/store.py +133 -0
  17. sampler_cli-0.4.1/src/sampler/query/engine.py +171 -0
  18. sampler_cli-0.4.1/src/sampler/query/semantic.py +213 -0
  19. sampler_cli-0.4.1/src/sampler_cli.egg-info/PKG-INFO +202 -0
  20. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/src/sampler_cli.egg-info/SOURCES.txt +13 -1
  21. sampler_cli-0.4.1/src/sampler_cli.egg-info/requires.txt +35 -0
  22. sampler_cli-0.4.1/tests/test_cli.py +198 -0
  23. sampler_cli-0.4.1/tests/test_config.py +50 -0
  24. sampler_cli-0.4.1/tests/test_discover.py +38 -0
  25. sampler_cli-0.4.1/tests/test_embeddings.py +50 -0
  26. sampler_cli-0.4.1/tests/test_go_parser.py +44 -0
  27. sampler_cli-0.4.1/tests/test_imports.py +49 -0
  28. sampler_cli-0.4.1/tests/test_index_query.py +127 -0
  29. sampler_cli-0.4.1/tests/test_relationships.py +91 -0
  30. sampler_cli-0.4.1/tests/test_render_bars.py +31 -0
  31. sampler_cli-0.4.1/tests/test_semantic.py +151 -0
  32. sampler_cli-0.4.1/tests/test_stale_code.py +75 -0
  33. sampler_cli-0.4.1/tests/test_typescript_parser.py +55 -0
  34. sampler_cli-0.2.0/PKG-INFO +0 -130
  35. sampler_cli-0.2.0/README.md +0 -93
  36. sampler_cli-0.2.0/src/sampler/cli/main.py +0 -187
  37. sampler_cli-0.2.0/src/sampler/config.py +0 -77
  38. sampler_cli-0.2.0/src/sampler/db.py +0 -316
  39. sampler_cli-0.2.0/src/sampler/indexer/discover.py +0 -53
  40. sampler_cli-0.2.0/src/sampler/indexer/parsers/go.py +0 -9
  41. sampler_cli-0.2.0/src/sampler/indexer/parsers/typescript.py +0 -9
  42. sampler_cli-0.2.0/src/sampler/indexer/store.py +0 -47
  43. sampler_cli-0.2.0/src/sampler/query/engine.py +0 -16
  44. sampler_cli-0.2.0/src/sampler/query/semantic.py +0 -4
  45. sampler_cli-0.2.0/src/sampler_cli.egg-info/PKG-INFO +0 -130
  46. sampler_cli-0.2.0/src/sampler_cli.egg-info/requires.txt +0 -19
  47. sampler_cli-0.2.0/tests/test_cli.py +0 -23
  48. sampler_cli-0.2.0/tests/test_config.py +0 -20
  49. sampler_cli-0.2.0/tests/test_discover.py +0 -18
  50. sampler_cli-0.2.0/tests/test_index_query.py +0 -42
  51. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/LICENSE +0 -0
  52. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/setup.cfg +0 -0
  53. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/src/sampler/__main__.py +0 -0
  54. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/src/sampler/cli/__init__.py +0 -0
  55. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/src/sampler/indexer/__init__.py +0 -0
  56. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/src/sampler/indexer/parsers/__init__.py +0 -0
  57. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/src/sampler/indexer/parsers/base.py +0 -0
  58. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/src/sampler/indexer/parsers/python.py +0 -0
  59. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/src/sampler/mcp/__init__.py +0 -0
  60. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/src/sampler/mcp/server.py +0 -0
  61. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/src/sampler/models.py +0 -0
  62. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/src/sampler/query/__init__.py +0 -0
  63. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/src/sampler_cli.egg-info/dependency_links.txt +0 -0
  64. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/src/sampler_cli.egg-info/entry_points.txt +0 -0
  65. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/src/sampler_cli.egg-info/top_level.txt +0 -0
  66. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/tests/test_db.py +0 -0
  67. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/tests/test_python_parser.py +0 -0
  68. {sampler_cli-0.2.0 → sampler_cli-0.4.1}/tests/test_smoke.py +0 -0
@@ -0,0 +1,202 @@
1
+ Metadata-Version: 2.4
2
+ Name: sampler-cli
3
+ Version: 0.4.1
4
+ Summary: Token-efficient CLI for indexing and searching code symbols (Python-first, designed for minimal LLM/agent context size)
5
+ Author: Samuel Ignacio Carmona Rodriguez
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/sicr0/sampler-cli
8
+ Project-URL: Repository, https://github.com/sicr0/sampler-cli
9
+ Project-URL: Issues, https://github.com/sicr0/sampler-cli/issues
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: License :: OSI Approved :: MIT License
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
16
+ Classifier: Topic :: Software Development :: Code Generators
17
+ Requires-Python: >=3.11
18
+ Description-Content-Type: text/markdown
19
+ License-File: LICENSE
20
+ Requires-Dist: typer>=0.12.0
21
+ Requires-Dist: rich>=13.7.0
22
+ Requires-Dist: tree-sitter>=0.21.0
23
+ Requires-Dist: tree-sitter-python>=0.23.0
24
+ Requires-Dist: tree-sitter-go>=0.23.0
25
+ Requires-Dist: tree-sitter-typescript>=0.23.0
26
+ Requires-Dist: gitignore-parser>=0.1.11
27
+ Requires-Dist: pydantic>=2.6.0
28
+ Requires-Dist: pyyaml>=6.0.0
29
+ Provides-Extra: dev
30
+ Requires-Dist: pytest>=7.4.0; extra == "dev"
31
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
32
+ Requires-Dist: ruff>=0.5.0; extra == "dev"
33
+ Requires-Dist: mypy>=1.7.0; extra == "dev"
34
+ Requires-Dist: numpy>=1.26.0; extra == "dev"
35
+ Requires-Dist: scikit-learn>=1.4.0; extra == "dev"
36
+ Requires-Dist: fastembed>=0.2.0; extra == "dev"
37
+ Provides-Extra: mcp
38
+ Requires-Dist: fastmcp>=0.1.0; extra == "mcp"
39
+ Provides-Extra: semantic
40
+ Requires-Dist: numpy>=1.26.0; extra == "semantic"
41
+ Requires-Dist: scikit-learn>=1.4.0; extra == "semantic"
42
+ Provides-Extra: embeddings
43
+ Requires-Dist: fastembed>=0.2.0; extra == "embeddings"
44
+ Requires-Dist: numpy>=1.26.0; extra == "embeddings"
45
+ Provides-Extra: ollama-embeddings
46
+ Requires-Dist: ollama>=0.3.0; extra == "ollama-embeddings"
47
+ Provides-Extra: openai-embeddings
48
+ Requires-Dist: openai>=1.0.0; extra == "openai-embeddings"
49
+ Dynamic: license-file
50
+
51
+ # Sampler
52
+
53
+ Token-efficient CLI for indexing and searching code symbols across multiple projects.
54
+
55
+ Current version: 0.4.1
56
+
57
+ Designed for humans and agents: compact default output, short paths, and low-noise symbol views.
58
+
59
+ ## Requirements
60
+
61
+ - Python 3.11+
62
+
63
+ ## Installation
64
+
65
+ ```bash
66
+ pip install sampler-cli
67
+ ```
68
+
69
+ Development setup:
70
+
71
+ ```bash
72
+ pip install -e '.[dev]'
73
+ ```
74
+
75
+ Semantic stack (TF-IDF + local hash fallback):
76
+
77
+ ```bash
78
+ pip install -e '.[semantic]'
79
+ ```
80
+
81
+ ## Quick Start
82
+
83
+ ```bash
84
+ sampler init
85
+ sampler project add myproj /absolute/path/to/project --language auto
86
+ sampler index myproj
87
+ sampler search retry --project myproj
88
+ sampler symbols myproj
89
+ sampler overview src/main.py
90
+ ```
91
+
92
+ ## Command Overview
93
+
94
+ Core:
95
+ - `sampler version`
96
+ - `sampler init`
97
+ - `sampler index <project>`
98
+ - `sampler search <query> [--project <name>] [--type <t>] [--limit <n>] [--semantic] [--style plain|bars]`
99
+ - `sampler search-all <query> [--type <t>] [--limit <n>]`
100
+ - `sampler symbols <project> [--type <t>] [--limit <n>]`
101
+ - `sampler overview <filepath> [--style plain|bars]`
102
+
103
+ Relationships:
104
+ - `sampler callers <symbol> [--project <name>] [--file <path-or-suffix>]`
105
+ - `sampler usages <symbol> [--project <name>] [--file <path-or-suffix>]`
106
+ - `sampler related <symbol> [--project <name>] [--file <path-or-suffix>] [--style plain|bars]`
107
+ - Selector alternativo: `<path>:<symbol>` (ej. `app/utils/helpers.py:format_kda`)
108
+
109
+ Project management:
110
+ - `sampler project add <name> <path> --language <python|go|typescript|javascript|auto>`
111
+ - `sampler project update <name> [--path <abs-path>] [--language <lang>]`
112
+ - `sampler project list`
113
+ - `sampler project deps <name>`
114
+ - `sampler project remove <name>`
115
+
116
+ Config:
117
+ - `sampler config show`
118
+ - `sampler config embeddings [--provider P] [--model M]`
119
+
120
+ Semantic and analysis:
121
+ - `sampler embed <project> [--batch-size <n>]`
122
+ - `sampler stale-code <project> [--limit <n>]`
123
+
124
+ ## Embeddings & Semantic Search
125
+
126
+ `sampler search --semantic` (and hybrid ranking) supports pluggable providers via the adapter pattern:
127
+
128
+ - **Default**: `bge-small` (BAAI/bge-small-en-v1.5 via fastembed — lightweight ONNX, ~384 dim, local).
129
+ - Other built-ins: `hash` (always-on deterministic fallback), `ollama` (e.g. nomic-embed-text), `nomic`, `openai`, `fastembed`.
130
+ - TF-IDF (sklearn, on-the-fly, no pre-embed) remains the fast lexical primary when no provider embeddings are precomputed for the active model.
131
+ - Hash fingerprint is the final always-available fallback.
132
+
133
+ Configuration (in `~/.sampler/config.yaml` or via `sampler config embeddings ...`):
134
+
135
+ ```yaml
136
+ embeddings:
137
+ provider: "bge-small"
138
+ # provider: "ollama"
139
+ # model: "nomic-embed-text"
140
+ # base_url: "http://localhost:11434"
141
+ ```
142
+
143
+ Install:
144
+
145
+ ```bash
146
+ # For default BGE (recommended for most users)
147
+ pip install 'sampler-cli[embeddings]'
148
+
149
+ # Or for Ollama / OpenAI only
150
+ pip install 'sampler-cli[ollama-embeddings]'
151
+ pip install 'sampler-cli[openai-embeddings]'
152
+ ```
153
+
154
+ `sampler embed <project>` precomputes vectors using the **current configured provider** (progress bar). Changing provider? Re-run `embed` after updating config (old vectors are ignored until re-embedded).
155
+
156
+ Offline / air-gapped: `provider: hash` (or just don't install the embeddings extra — TF-IDF + hash still work if you have `[semantic]`).
157
+
158
+ ## Language Support
159
+
160
+ - Python parser: stdlib AST (stable)
161
+ - Go parser: tree-sitter-go (real extraction)
162
+ - TypeScript/JavaScript parser: tree-sitter-typescript (real extraction)
163
+ - `--language auto`: per-file language detection for monorepos/multi-language projects
164
+
165
+ ## Stale Code Detection
166
+
167
+ `sampler stale-code <project>` reports candidate stale functions/methods where:
168
+
169
+ - function is called from test files
170
+ - function has zero non-test callers in project call graph
171
+
172
+ This is heuristic signal, not guaranteed dead-code proof.
173
+
174
+ ## Examples
175
+
176
+ ```bash
177
+ $ sampler search worker --project myproj
178
+ myproj:src/tasks/celery_app.py:70 function on_worker_ready def on_worker_ready(sender)
179
+
180
+ $ sampler related ConfigManager --project myproj --style bars
181
+ myproj:src/config.py:24-105 class ConfigManager [parent]
182
+ ...
183
+
184
+ $ sampler stale-code myproj
185
+ myproj:src/utils/retry.py:12-28 function retry_request test_callers=2 non_test_callers=0 [tests.test_retry.test_retry_request]
186
+ ```
187
+
188
+ ## Data Location
189
+
190
+ - Config: `~/.sampler/config.yaml`
191
+ - DB: `~/.sampler/graph.db`
192
+
193
+ ## Running Tests
194
+
195
+ ```bash
196
+ pytest -q
197
+ ```
198
+
199
+ ## Notes
200
+
201
+ - Compact output is default by design (token-efficient for agent workflows).
202
+ - For broader roadmap details, see `TODO.md` and `PLAN.md`.
@@ -0,0 +1,152 @@
1
+ # Sampler
2
+
3
+ Token-efficient CLI for indexing and searching code symbols across multiple projects.
4
+
5
+ Current version: 0.4.1
6
+
7
+ Designed for humans and agents: compact default output, short paths, and low-noise symbol views.
8
+
9
+ ## Requirements
10
+
11
+ - Python 3.11+
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ pip install sampler-cli
17
+ ```
18
+
19
+ Development setup:
20
+
21
+ ```bash
22
+ pip install -e '.[dev]'
23
+ ```
24
+
25
+ Semantic stack (TF-IDF + local hash fallback):
26
+
27
+ ```bash
28
+ pip install -e '.[semantic]'
29
+ ```
30
+
31
+ ## Quick Start
32
+
33
+ ```bash
34
+ sampler init
35
+ sampler project add myproj /absolute/path/to/project --language auto
36
+ sampler index myproj
37
+ sampler search retry --project myproj
38
+ sampler symbols myproj
39
+ sampler overview src/main.py
40
+ ```
41
+
42
+ ## Command Overview
43
+
44
+ Core:
45
+ - `sampler version`
46
+ - `sampler init`
47
+ - `sampler index <project>`
48
+ - `sampler search <query> [--project <name>] [--type <t>] [--limit <n>] [--semantic] [--style plain|bars]`
49
+ - `sampler search-all <query> [--type <t>] [--limit <n>]`
50
+ - `sampler symbols <project> [--type <t>] [--limit <n>]`
51
+ - `sampler overview <filepath> [--style plain|bars]`
52
+
53
+ Relationships:
54
+ - `sampler callers <symbol> [--project <name>] [--file <path-or-suffix>]`
55
+ - `sampler usages <symbol> [--project <name>] [--file <path-or-suffix>]`
56
+ - `sampler related <symbol> [--project <name>] [--file <path-or-suffix>] [--style plain|bars]`
57
+ - Selector alternativo: `<path>:<symbol>` (ej. `app/utils/helpers.py:format_kda`)
58
+
59
+ Project management:
60
+ - `sampler project add <name> <path> --language <python|go|typescript|javascript|auto>`
61
+ - `sampler project update <name> [--path <abs-path>] [--language <lang>]`
62
+ - `sampler project list`
63
+ - `sampler project deps <name>`
64
+ - `sampler project remove <name>`
65
+
66
+ Config:
67
+ - `sampler config show`
68
+ - `sampler config embeddings [--provider P] [--model M]`
69
+
70
+ Semantic and analysis:
71
+ - `sampler embed <project> [--batch-size <n>]`
72
+ - `sampler stale-code <project> [--limit <n>]`
73
+
74
+ ## Embeddings & Semantic Search
75
+
76
+ `sampler search --semantic` (and hybrid ranking) supports pluggable providers via the adapter pattern:
77
+
78
+ - **Default**: `bge-small` (BAAI/bge-small-en-v1.5 via fastembed — lightweight ONNX, ~384 dim, local).
79
+ - Other built-ins: `hash` (always-on deterministic fallback), `ollama` (e.g. nomic-embed-text), `nomic`, `openai`, `fastembed`.
80
+ - TF-IDF (sklearn, on-the-fly, no pre-embed) remains the fast lexical primary when no provider embeddings are precomputed for the active model.
81
+ - Hash fingerprint is the final always-available fallback.
82
+
83
+ Configuration (in `~/.sampler/config.yaml` or via `sampler config embeddings ...`):
84
+
85
+ ```yaml
86
+ embeddings:
87
+ provider: "bge-small"
88
+ # provider: "ollama"
89
+ # model: "nomic-embed-text"
90
+ # base_url: "http://localhost:11434"
91
+ ```
92
+
93
+ Install:
94
+
95
+ ```bash
96
+ # For default BGE (recommended for most users)
97
+ pip install 'sampler-cli[embeddings]'
98
+
99
+ # Or for Ollama / OpenAI only
100
+ pip install 'sampler-cli[ollama-embeddings]'
101
+ pip install 'sampler-cli[openai-embeddings]'
102
+ ```
103
+
104
+ `sampler embed <project>` precomputes vectors using the **current configured provider** (progress bar). Changing provider? Re-run `embed` after updating config (old vectors are ignored until re-embedded).
105
+
106
+ Offline / air-gapped: `provider: hash` (or just don't install the embeddings extra — TF-IDF + hash still work if you have `[semantic]`).
107
+
108
+ ## Language Support
109
+
110
+ - Python parser: stdlib AST (stable)
111
+ - Go parser: tree-sitter-go (real extraction)
112
+ - TypeScript/JavaScript parser: tree-sitter-typescript (real extraction)
113
+ - `--language auto`: per-file language detection for monorepos/multi-language projects
114
+
115
+ ## Stale Code Detection
116
+
117
+ `sampler stale-code <project>` reports candidate stale functions/methods where:
118
+
119
+ - function is called from test files
120
+ - function has zero non-test callers in project call graph
121
+
122
+ This is heuristic signal, not guaranteed dead-code proof.
123
+
124
+ ## Examples
125
+
126
+ ```bash
127
+ $ sampler search worker --project myproj
128
+ myproj:src/tasks/celery_app.py:70 function on_worker_ready def on_worker_ready(sender)
129
+
130
+ $ sampler related ConfigManager --project myproj --style bars
131
+ myproj:src/config.py:24-105 class ConfigManager [parent]
132
+ ...
133
+
134
+ $ sampler stale-code myproj
135
+ myproj:src/utils/retry.py:12-28 function retry_request test_callers=2 non_test_callers=0 [tests.test_retry.test_retry_request]
136
+ ```
137
+
138
+ ## Data Location
139
+
140
+ - Config: `~/.sampler/config.yaml`
141
+ - DB: `~/.sampler/graph.db`
142
+
143
+ ## Running Tests
144
+
145
+ ```bash
146
+ pytest -q
147
+ ```
148
+
149
+ ## Notes
150
+
151
+ - Compact output is default by design (token-efficient for agent workflows).
152
+ - For broader roadmap details, see `TODO.md` and `PLAN.md`.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sampler-cli"
3
- version = "0.2.0"
3
+ version = "0.4.1"
4
4
  description = "Token-efficient CLI for indexing and searching code symbols (Python-first, designed for minimal LLM/agent context size)"
5
5
  readme = "README.md"
6
6
  license = { text = "MIT" }
@@ -22,6 +22,8 @@ dependencies = [
22
22
  "rich>=13.7.0",
23
23
  "tree-sitter>=0.21.0",
24
24
  "tree-sitter-python>=0.23.0",
25
+ "tree-sitter-go>=0.23.0",
26
+ "tree-sitter-typescript>=0.23.0",
25
27
  "gitignore-parser>=0.1.11",
26
28
  "pydantic>=2.6.0",
27
29
  "pyyaml>=6.0.0",
@@ -33,9 +35,17 @@ dev = [
33
35
  "pytest-cov>=4.1.0",
34
36
  "ruff>=0.5.0",
35
37
  "mypy>=1.7.0",
38
+ "numpy>=1.26.0",
39
+ "scikit-learn>=1.4.0",
40
+ # dev pulls the common embeddings stack too
41
+ "fastembed>=0.2.0",
36
42
  ]
37
43
  mcp = ["fastmcp>=0.1.0"]
38
- semantic = ["sentence-transformers>=2.2.0"]
44
+ semantic = ["numpy>=1.26.0", "scikit-learn>=1.4.0"]
45
+ # Embeddings providers (pluggable, lazy). bge-small (default) is lightweight via fastembed (ONNX).
46
+ embeddings = ["fastembed>=0.2.0", "numpy>=1.26.0"]
47
+ ollama-embeddings = ["ollama>=0.3.0"]
48
+ openai-embeddings = ["openai>=1.0.0"]
39
49
 
40
50
  [project.scripts]
41
51
  sampler = "sampler.cli.main:app"
@@ -1,3 +1,3 @@
1
1
  __all__ = ["__version__"]
2
2
 
3
- __version__ = "0.2.0"
3
+ __version__ = "0.4.1"