repointel 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- repointel-0.1.0/LICENSE +21 -0
- repointel-0.1.0/PKG-INFO +316 -0
- repointel-0.1.0/README.md +272 -0
- repointel-0.1.0/pyproject.toml +41 -0
- repointel-0.1.0/repointel.egg-info/PKG-INFO +316 -0
- repointel-0.1.0/repointel.egg-info/SOURCES.txt +24 -0
- repointel-0.1.0/repointel.egg-info/dependency_links.txt +1 -0
- repointel-0.1.0/repointel.egg-info/entry_points.txt +2 -0
- repointel-0.1.0/repointel.egg-info/requires.txt +29 -0
- repointel-0.1.0/repointel.egg-info/top_level.txt +1 -0
- repointel-0.1.0/reposcope/__init__.py +4 -0
- repointel-0.1.0/reposcope/__main__.py +6 -0
- repointel-0.1.0/reposcope/answer.py +222 -0
- repointel-0.1.0/reposcope/api.py +123 -0
- repointel-0.1.0/reposcope/chunker.py +176 -0
- repointel-0.1.0/reposcope/cli.py +195 -0
- repointel-0.1.0/reposcope/embedder.py +55 -0
- repointel-0.1.0/reposcope/indexer.py +141 -0
- repointel-0.1.0/reposcope/models.py +31 -0
- repointel-0.1.0/reposcope/retrieval.py +158 -0
- repointel-0.1.0/reposcope/scanner.py +92 -0
- repointel-0.1.0/reposcope/vector_store.py +35 -0
- repointel-0.1.0/setup.cfg +4 -0
- repointel-0.1.0/tests/test_chunker.py +87 -0
- repointel-0.1.0/tests/test_cli.py +77 -0
- repointel-0.1.0/tests/test_retrieval.py +97 -0
repointel-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Nirakar Jena
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
repointel-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,316 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: repointel
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Local-first codebase-aware AI: index any repo and ask questions about it.
|
|
5
|
+
Author-email: Nirakar Jena <jenashubham60@gmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/nirakar24/reposcope
|
|
8
|
+
Project-URL: Bug Tracker, https://github.com/nirakar24/reposcope/issues
|
|
9
|
+
Keywords: codebase,rag,ai,search,developer-tools
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Environment :: Console
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
17
|
+
Classifier: Topic :: Utilities
|
|
18
|
+
Requires-Python: >=3.11
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE
|
|
21
|
+
Provides-Extra: api
|
|
22
|
+
Requires-Dist: fastapi>=0.115; extra == "api"
|
|
23
|
+
Requires-Dist: uvicorn>=0.30; extra == "api"
|
|
24
|
+
Provides-Extra: openai
|
|
25
|
+
Requires-Dist: openai>=1.0; extra == "openai"
|
|
26
|
+
Provides-Extra: gemini
|
|
27
|
+
Requires-Dist: google-genai>=0.6; extra == "gemini"
|
|
28
|
+
Provides-Extra: claude
|
|
29
|
+
Requires-Dist: anthropic>=0.40; extra == "claude"
|
|
30
|
+
Provides-Extra: embed
|
|
31
|
+
Requires-Dist: sentence-transformers>=3.0; extra == "embed"
|
|
32
|
+
Requires-Dist: numpy>=1.26; extra == "embed"
|
|
33
|
+
Provides-Extra: all
|
|
34
|
+
Requires-Dist: fastapi>=0.115; extra == "all"
|
|
35
|
+
Requires-Dist: uvicorn>=0.30; extra == "all"
|
|
36
|
+
Requires-Dist: openai>=1.0; extra == "all"
|
|
37
|
+
Requires-Dist: google-genai>=0.6; extra == "all"
|
|
38
|
+
Requires-Dist: anthropic>=0.40; extra == "all"
|
|
39
|
+
Requires-Dist: sentence-transformers>=3.0; extra == "all"
|
|
40
|
+
Requires-Dist: numpy>=1.26; extra == "all"
|
|
41
|
+
Provides-Extra: dev
|
|
42
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
43
|
+
Dynamic: license-file
|
|
44
|
+
|
|
45
|
+
# RepoScope
|
|
46
|
+
|
|
47
|
+
A local-first tool for querying codebases in plain English. Point it at any repository, build an index, and ask questions — no cloud sync, no background services, no mandatory API keys.
|
|
48
|
+
|
|
49
|
+
---
|
|
50
|
+
|
|
51
|
+
## How it works
|
|
52
|
+
|
|
53
|
+
**Without `--embed` (default):**
|
|
54
|
+
```
|
|
55
|
+
repo → file discovery → chunking → JSON index → BM25 retrieval → LLM answer
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
**With `--embed` (semantic search):**
|
|
59
|
+
```
|
|
60
|
+
repo → file discovery → chunking → JSON index + .npy embeddings → BM25 + vector → RRF merge → LLM answer
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
RepoScope walks your project and breaks files into chunks — by function, class, or method boundary for Python, JS/TS, and C# (using regex-based detection), and by fixed line windows for everything else. Chunks are stored in a local JSON index and ranked with BM25 scoring at query time. If you run `index --embed`, it also generates sentence embeddings and merges the two results with Reciprocal Rank Fusion for better semantic matches.
|
|
64
|
+
|
|
65
|
+
If an LLM key is configured, `ask` feeds the top-ranked chunks to the model and returns a cited answer. If not, it falls back to showing the top matches with previews.
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## Installation
|
|
70
|
+
|
|
71
|
+
RepoScope is not yet on PyPI. Install from source:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
git clone https://github.com/nirakar24/RepoScope.git
|
|
75
|
+
cd RepoScope
|
|
76
|
+
pip install -e .
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
To add optional features:
|
|
80
|
+
|
|
81
|
+
```bash
|
|
82
|
+
pip install -e ".[embed]" # semantic search (downloads ~80 MB model on first use)
|
|
83
|
+
pip install -e ".[claude]" # Anthropic Claude for LLM answers
|
|
84
|
+
pip install -e ".[gemini]" # Google Gemini for LLM answers
|
|
85
|
+
pip install -e ".[openai]" # OpenAI for LLM answers and embeddings
|
|
86
|
+
pip install -e ".[api]" # FastAPI server
|
|
87
|
+
pip install -e ".[all]" # everything
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## Setting up an LLM API key
|
|
93
|
+
|
|
94
|
+
`index` and `search` work with no API key. `ask` needs one to generate answers.
|
|
95
|
+
|
|
96
|
+
### Interactive setup (recommended)
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
repointel configure
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
Prompts for your provider and key (input is hidden), then saves to `~/.config/repointel/.env`. Picked up automatically in every session and directory from then on.
|
|
103
|
+
|
|
104
|
+
```
|
|
105
|
+
Which provider do you want to use for 'ask' answers?
|
|
106
|
+
1) Anthropic Claude
|
|
107
|
+
2) Google Gemini
|
|
108
|
+
3) OpenAI
|
|
109
|
+
|
|
110
|
+
Enter 1, 2, or 3: 2
|
|
111
|
+
|
|
112
|
+
Google Gemini API key (input hidden):
|
|
113
|
+
Saved GEMINI_API_KEY to ~/.config/repointel/.env
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Environment variable
|
|
117
|
+
|
|
118
|
+
Add to your shell profile (`~/.bashrc`, `~/.zshrc`, etc.):
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
export ANTHROPIC_API_KEY="sk-ant-..."
|
|
122
|
+
export GEMINI_API_KEY="AIza..."
|
|
123
|
+
export OPENAI_API_KEY="sk-..."
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
### `.env` file
|
|
127
|
+
|
|
128
|
+
Create a `.env` in your project directory or any parent. RepoScope walks up from the current directory automatically:
|
|
129
|
+
|
|
130
|
+
```
|
|
131
|
+
GEMINI_API_KEY=AIza...
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
### Provider priority and model overrides
|
|
135
|
+
|
|
136
|
+
If multiple keys are present, the order is: **Claude → Gemini → OpenAI**.
|
|
137
|
+
|
|
138
|
+
Override the default model with environment variables:
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
REPOSCOPE_CLAUDE_MODEL=claude-sonnet-4-6
|
|
142
|
+
REPOSCOPE_GEMINI_MODEL=gemini-2.5-flash
|
|
143
|
+
REPOSCOPE_OPENAI_MODEL=gpt-4.1-mini
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
### Where to get keys
|
|
147
|
+
|
|
148
|
+
| Provider | Free tier | Key page |
|
|
149
|
+
|---|---|---|
|
|
150
|
+
| Google Gemini | Yes | https://aistudio.google.com/apikey |
|
|
151
|
+
| Anthropic Claude | No | https://console.anthropic.com |
|
|
152
|
+
| OpenAI | No | https://platform.openai.com/api-keys |
|
|
153
|
+
|
|
154
|
+
---
|
|
155
|
+
|
|
156
|
+
## Quick start
|
|
157
|
+
|
|
158
|
+
```bash
|
|
159
|
+
# Index your project
|
|
160
|
+
repointel index /path/to/project
|
|
161
|
+
|
|
162
|
+
# Search (instant, no LLM)
|
|
163
|
+
repointel search "where is authentication handled"
|
|
164
|
+
|
|
165
|
+
# Ask a question (requires an LLM key)
|
|
166
|
+
repointel ask "how does the database schema relate to the API routes?"
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
## Commands
|
|
172
|
+
|
|
173
|
+
### `configure`
|
|
174
|
+
Interactive first-time setup. Saves your LLM API key to `~/.config/repointel/.env`.
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
repointel configure
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
---
|
|
181
|
+
|
|
182
|
+
### `index`
|
|
183
|
+
Walks a directory, chunks its files, and writes a JSON index.
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
repointel index /path/to/project
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
Add `--embed` to generate sentence embeddings alongside the index. Once present, `search` and `ask` automatically switch to hybrid retrieval — no extra flag needed at query time.
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
repointel index /path/to/project --embed
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
Use `--index-file` (before the subcommand) to control where the index is written. Useful for keeping separate indexes per project:
|
|
196
|
+
|
|
197
|
+
```bash
|
|
198
|
+
repointel --index-file .reposcope/backend.json index ./backend --embed
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
The default path is `.reposcope/index.json` in the current directory.
|
|
202
|
+
|
|
203
|
+
---
|
|
204
|
+
|
|
205
|
+
### `search`
|
|
206
|
+
Retrieves the most relevant chunks for a query. Instant — no network call.
|
|
207
|
+
|
|
208
|
+
Uses BM25 by default. Automatically switches to hybrid BM25 + vector search if embeddings exist for the current index.
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
repointel search "JWT token validation"
|
|
212
|
+
repointel search "database migration" --top-k 5
|
|
213
|
+
repointel search "controller routes" --json
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
| Flag | Default | Description |
|
|
217
|
+
|---|---|---|
|
|
218
|
+
| `--top-k` | `8` | Number of results to return |
|
|
219
|
+
| `--json` | off | Emit results as a JSON array |
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
### `ask`
|
|
224
|
+
Retrieves top chunks and sends them to an LLM for a cited answer.
|
|
225
|
+
|
|
226
|
+
```bash
|
|
227
|
+
repointel ask "how does authentication work?"
|
|
228
|
+
repointel ask "what entities exist in the database?" --top-k 12
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
Falls back to listing top matches with text previews if no LLM key is set.
|
|
232
|
+
|
|
233
|
+
---
|
|
234
|
+
|
|
235
|
+
### `stats`
|
|
236
|
+
Prints a breakdown of the current index.
|
|
237
|
+
|
|
238
|
+
```bash
|
|
239
|
+
repointel stats
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
```json
|
|
243
|
+
{
|
|
244
|
+
"files_indexed": 72,
|
|
245
|
+
"chunks_indexed": 428,
|
|
246
|
+
"languages": { "csharp": 160, "javascript": 25, "json": 221 },
|
|
247
|
+
"kinds": { "method": 117, "block": 247, "class": 25 },
|
|
248
|
+
"embeddings": ".reposcope/index.npy"
|
|
249
|
+
}
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
---
|
|
253
|
+
|
|
254
|
+
## Multiple projects
|
|
255
|
+
|
|
256
|
+
Use `--index-file` to maintain separate indexes. The flag goes before the subcommand.
|
|
257
|
+
|
|
258
|
+
```bash
|
|
259
|
+
repointel --index-file .reposcope/frontend.json index ./frontend
|
|
260
|
+
repointel --index-file .reposcope/backend.json index ./backend
|
|
261
|
+
|
|
262
|
+
repointel --index-file .reposcope/frontend.json ask "how is routing configured?"
|
|
263
|
+
repointel --index-file .reposcope/backend.json ask "what database tables exist?"
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
---
|
|
267
|
+
|
|
268
|
+
## Optional REST API
|
|
269
|
+
|
|
270
|
+
```bash
|
|
271
|
+
pip install -e ".[api]"
|
|
272
|
+
uvicorn reposcope.api:app --reload
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
| Method | Endpoint | Body |
|
|
276
|
+
|---|---|---|
|
|
277
|
+
| `POST` | `/index` | `{ "path": "/abs/path/to/repo", "embed": false }` |
|
|
278
|
+
| `POST` | `/search` | `{ "query": "...", "top_k": 8 }` |
|
|
279
|
+
| `POST` | `/ask` | `{ "query": "...", "top_k": 8 }` |
|
|
280
|
+
| `GET` | `/stats` | — |
|
|
281
|
+
|
|
282
|
+
Docs at `http://localhost:8000/docs`.
|
|
283
|
+
|
|
284
|
+
---
|
|
285
|
+
|
|
286
|
+
## Supported languages
|
|
287
|
+
|
|
288
|
+
| Language | Chunking |
|
|
289
|
+
|---|---|
|
|
290
|
+
| Python | Regex-based: splits at `def` / `async def` / `class` boundaries |
|
|
291
|
+
| JavaScript / TypeScript / JSX / TSX | Regex-based: splits at `function`, `class`, and arrow function boundaries |
|
|
292
|
+
| C# | Regex-based: splits at class, interface, record, struct, and method boundaries |
|
|
293
|
+
| SQL, Markdown, JSON, YAML, TOML, CSS, SCSS, HTML, Dockerfile, Makefile | Fixed 80-line windows with 15-line overlap |
|
|
294
|
+
|
|
295
|
+
Files over 750 KB and generated lock files (`package-lock.json`, `yarn.lock`, `pnpm-lock.yaml`) are skipped.
|
|
296
|
+
|
|
297
|
+
---
|
|
298
|
+
|
|
299
|
+
## Ignored directories
|
|
300
|
+
|
|
301
|
+
`node_modules`, `.git`, `dist`, `build`, `bin`, `obj`, `.venv`, `__pycache__`, `.next`, `.nuxt`, `coverage`, `target`, `temp`, `vendor`, and other standard build/cache directories are excluded automatically.
|
|
302
|
+
|
|
303
|
+
---
|
|
304
|
+
|
|
305
|
+
## Roadmap
|
|
306
|
+
|
|
307
|
+
- [ ] Tree-sitter chunking for true AST-level boundaries (replacing the regex approach)
|
|
308
|
+
- [ ] Incremental re-indexing on file change
|
|
309
|
+
- [ ] Cross-repo index merging for monorepos
|
|
310
|
+
- [ ] Qdrant / Chroma backend for repositories with >50k chunks
|
|
311
|
+
|
|
312
|
+
---
|
|
313
|
+
|
|
314
|
+
## License
|
|
315
|
+
|
|
316
|
+
MIT
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
# RepoScope
|
|
2
|
+
|
|
3
|
+
A local-first tool for querying codebases in plain English. Point it at any repository, build an index, and ask questions — no cloud sync, no background services, no mandatory API keys.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## How it works
|
|
8
|
+
|
|
9
|
+
**Without `--embed` (default):**
|
|
10
|
+
```
|
|
11
|
+
repo → file discovery → chunking → JSON index → BM25 retrieval → LLM answer
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
**With `--embed` (semantic search):**
|
|
15
|
+
```
|
|
16
|
+
repo → file discovery → chunking → JSON index + .npy embeddings → BM25 + vector → RRF merge → LLM answer
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
RepoScope walks your project and breaks files into chunks — by function, class, or method boundary for Python, JS/TS, and C# (using regex-based detection), and by fixed line windows for everything else. Chunks are stored in a local JSON index and ranked with BM25 scoring at query time. If you run `index --embed`, it also generates sentence embeddings and merges the two results with Reciprocal Rank Fusion for better semantic matches.
|
|
20
|
+
|
|
21
|
+
If an LLM key is configured, `ask` feeds the top-ranked chunks to the model and returns a cited answer. If not, it falls back to showing the top matches with previews.
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Installation
|
|
26
|
+
|
|
27
|
+
RepoScope is not yet on PyPI. Install from source:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
git clone https://github.com/nirakar24/RepoScope.git
|
|
31
|
+
cd RepoScope
|
|
32
|
+
pip install -e .
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
To add optional features:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install -e ".[embed]" # semantic search (downloads ~80 MB model on first use)
|
|
39
|
+
pip install -e ".[claude]" # Anthropic Claude for LLM answers
|
|
40
|
+
pip install -e ".[gemini]" # Google Gemini for LLM answers
|
|
41
|
+
pip install -e ".[openai]" # OpenAI for LLM answers and embeddings
|
|
42
|
+
pip install -e ".[api]" # FastAPI server
|
|
43
|
+
pip install -e ".[all]" # everything
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Setting up an LLM API key
|
|
49
|
+
|
|
50
|
+
`index` and `search` work with no API key. `ask` needs one to generate answers.
|
|
51
|
+
|
|
52
|
+
### Interactive setup (recommended)
|
|
53
|
+
|
|
54
|
+
```bash
|
|
55
|
+
repointel configure
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Prompts for your provider and key (input is hidden), then saves to `~/.config/repointel/.env`. Picked up automatically in every session and directory from then on.
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
Which provider do you want to use for 'ask' answers?
|
|
62
|
+
1) Anthropic Claude
|
|
63
|
+
2) Google Gemini
|
|
64
|
+
3) OpenAI
|
|
65
|
+
|
|
66
|
+
Enter 1, 2, or 3: 2
|
|
67
|
+
|
|
68
|
+
Google Gemini API key (input hidden):
|
|
69
|
+
Saved GEMINI_API_KEY to ~/.config/repointel/.env
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Environment variable
|
|
73
|
+
|
|
74
|
+
Add to your shell profile (`~/.bashrc`, `~/.zshrc`, etc.):
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
export ANTHROPIC_API_KEY="sk-ant-..."
|
|
78
|
+
export GEMINI_API_KEY="AIza..."
|
|
79
|
+
export OPENAI_API_KEY="sk-..."
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### `.env` file
|
|
83
|
+
|
|
84
|
+
Create a `.env` in your project directory or any parent. RepoScope walks up from the current directory automatically:
|
|
85
|
+
|
|
86
|
+
```
|
|
87
|
+
GEMINI_API_KEY=AIza...
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Provider priority and model overrides
|
|
91
|
+
|
|
92
|
+
If multiple keys are present, the order is: **Claude → Gemini → OpenAI**.
|
|
93
|
+
|
|
94
|
+
Override the default model with environment variables:
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
REPOSCOPE_CLAUDE_MODEL=claude-sonnet-4-6
|
|
98
|
+
REPOSCOPE_GEMINI_MODEL=gemini-2.5-flash
|
|
99
|
+
REPOSCOPE_OPENAI_MODEL=gpt-4.1-mini
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
### Where to get keys
|
|
103
|
+
|
|
104
|
+
| Provider | Free tier | Key page |
|
|
105
|
+
|---|---|---|
|
|
106
|
+
| Google Gemini | Yes | https://aistudio.google.com/apikey |
|
|
107
|
+
| Anthropic Claude | No | https://console.anthropic.com |
|
|
108
|
+
| OpenAI | No | https://platform.openai.com/api-keys |
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## Quick start
|
|
113
|
+
|
|
114
|
+
```bash
|
|
115
|
+
# Index your project
|
|
116
|
+
repointel index /path/to/project
|
|
117
|
+
|
|
118
|
+
# Search (instant, no LLM)
|
|
119
|
+
repointel search "where is authentication handled"
|
|
120
|
+
|
|
121
|
+
# Ask a question (requires an LLM key)
|
|
122
|
+
repointel ask "how does the database schema relate to the API routes?"
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
## Commands
|
|
128
|
+
|
|
129
|
+
### `configure`
|
|
130
|
+
Interactive first-time setup. Saves your LLM API key to `~/.config/repointel/.env`.
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
repointel configure
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
---
|
|
137
|
+
|
|
138
|
+
### `index`
|
|
139
|
+
Walks a directory, chunks its files, and writes a JSON index.
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
repointel index /path/to/project
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
Add `--embed` to generate sentence embeddings alongside the index. Once present, `search` and `ask` automatically switch to hybrid retrieval — no extra flag needed at query time.
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
repointel index /path/to/project --embed
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Use `--index-file` (before the subcommand) to control where the index is written. Useful for keeping separate indexes per project:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
repointel --index-file .reposcope/backend.json index ./backend --embed
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
The default path is `.reposcope/index.json` in the current directory.
|
|
158
|
+
|
|
159
|
+
---
|
|
160
|
+
|
|
161
|
+
### `search`
|
|
162
|
+
Retrieves the most relevant chunks for a query. Instant — no network call.
|
|
163
|
+
|
|
164
|
+
Uses BM25 by default. Automatically switches to hybrid BM25 + vector search if embeddings exist for the current index.
|
|
165
|
+
|
|
166
|
+
```bash
|
|
167
|
+
repointel search "JWT token validation"
|
|
168
|
+
repointel search "database migration" --top-k 5
|
|
169
|
+
repointel search "controller routes" --json
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
| Flag | Default | Description |
|
|
173
|
+
|---|---|---|
|
|
174
|
+
| `--top-k` | `8` | Number of results to return |
|
|
175
|
+
| `--json` | off | Emit results as a JSON array |
|
|
176
|
+
|
|
177
|
+
---
|
|
178
|
+
|
|
179
|
+
### `ask`
|
|
180
|
+
Retrieves top chunks and sends them to an LLM for a cited answer.
|
|
181
|
+
|
|
182
|
+
```bash
|
|
183
|
+
repointel ask "how does authentication work?"
|
|
184
|
+
repointel ask "what entities exist in the database?" --top-k 12
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
Falls back to listing top matches with text previews if no LLM key is set.
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
### `stats`
|
|
192
|
+
Prints a breakdown of the current index.
|
|
193
|
+
|
|
194
|
+
```bash
|
|
195
|
+
repointel stats
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
```json
|
|
199
|
+
{
|
|
200
|
+
"files_indexed": 72,
|
|
201
|
+
"chunks_indexed": 428,
|
|
202
|
+
"languages": { "csharp": 160, "javascript": 25, "json": 221 },
|
|
203
|
+
"kinds": { "method": 117, "block": 247, "class": 25 },
|
|
204
|
+
"embeddings": ".reposcope/index.npy"
|
|
205
|
+
}
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
---
|
|
209
|
+
|
|
210
|
+
## Multiple projects
|
|
211
|
+
|
|
212
|
+
Use `--index-file` to maintain separate indexes. The flag goes before the subcommand.
|
|
213
|
+
|
|
214
|
+
```bash
|
|
215
|
+
repointel --index-file .reposcope/frontend.json index ./frontend
|
|
216
|
+
repointel --index-file .reposcope/backend.json index ./backend
|
|
217
|
+
|
|
218
|
+
repointel --index-file .reposcope/frontend.json ask "how is routing configured?"
|
|
219
|
+
repointel --index-file .reposcope/backend.json ask "what database tables exist?"
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## Optional REST API
|
|
225
|
+
|
|
226
|
+
```bash
|
|
227
|
+
pip install -e ".[api]"
|
|
228
|
+
uvicorn reposcope.api:app --reload
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
| Method | Endpoint | Body |
|
|
232
|
+
|---|---|---|
|
|
233
|
+
| `POST` | `/index` | `{ "path": "/abs/path/to/repo", "embed": false }` |
|
|
234
|
+
| `POST` | `/search` | `{ "query": "...", "top_k": 8 }` |
|
|
235
|
+
| `POST` | `/ask` | `{ "query": "...", "top_k": 8 }` |
|
|
236
|
+
| `GET` | `/stats` | — |
|
|
237
|
+
|
|
238
|
+
Docs at `http://localhost:8000/docs`.
|
|
239
|
+
|
|
240
|
+
---
|
|
241
|
+
|
|
242
|
+
## Supported languages
|
|
243
|
+
|
|
244
|
+
| Language | Chunking |
|
|
245
|
+
|---|---|
|
|
246
|
+
| Python | Regex-based: splits at `def` / `async def` / `class` boundaries |
|
|
247
|
+
| JavaScript / TypeScript / JSX / TSX | Regex-based: splits at `function`, `class`, and arrow function boundaries |
|
|
248
|
+
| C# | Regex-based: splits at class, interface, record, struct, and method boundaries |
|
|
249
|
+
| SQL, Markdown, JSON, YAML, TOML, CSS, SCSS, HTML, Dockerfile, Makefile | Fixed 80-line windows with 15-line overlap |
|
|
250
|
+
|
|
251
|
+
Files over 750 KB and generated lock files (`package-lock.json`, `yarn.lock`, `pnpm-lock.yaml`) are skipped.
|
|
252
|
+
|
|
253
|
+
---
|
|
254
|
+
|
|
255
|
+
## Ignored directories
|
|
256
|
+
|
|
257
|
+
`node_modules`, `.git`, `dist`, `build`, `bin`, `obj`, `.venv`, `__pycache__`, `.next`, `.nuxt`, `coverage`, `target`, `temp`, `vendor`, and other standard build/cache directories are excluded automatically.
|
|
258
|
+
|
|
259
|
+
---
|
|
260
|
+
|
|
261
|
+
## Roadmap
|
|
262
|
+
|
|
263
|
+
- [ ] Tree-sitter chunking for true AST-level boundaries (replacing the regex approach)
|
|
264
|
+
- [ ] Incremental re-indexing on file change
|
|
265
|
+
- [ ] Cross-repo index merging for monorepos
|
|
266
|
+
- [ ] Qdrant / Chroma backend for repositories with >50k chunks
|
|
267
|
+
|
|
268
|
+
---
|
|
269
|
+
|
|
270
|
+
## License
|
|
271
|
+
|
|
272
|
+
MIT
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "repointel"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Local-first codebase-aware AI: index any repo and ask questions about it."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.11"
|
|
7
|
+
license = "MIT"
|
|
8
|
+
license-files = ["LICENSE"]
|
|
9
|
+
authors = [{name = "Nirakar Jena", email = "jenashubham60@gmail.com"}]
|
|
10
|
+
keywords = ["codebase", "rag", "ai", "search", "developer-tools"]
|
|
11
|
+
classifiers = [
|
|
12
|
+
"Development Status :: 3 - Alpha",
|
|
13
|
+
"Environment :: Console",
|
|
14
|
+
"Intended Audience :: Developers",
|
|
15
|
+
"Programming Language :: Python :: 3",
|
|
16
|
+
"Programming Language :: Python :: 3.11",
|
|
17
|
+
"Programming Language :: Python :: 3.12",
|
|
18
|
+
"Topic :: Software Development :: Libraries",
|
|
19
|
+
"Topic :: Utilities",
|
|
20
|
+
]
|
|
21
|
+
dependencies = []
|
|
22
|
+
|
|
23
|
+
[project.optional-dependencies]
|
|
24
|
+
api = ["fastapi>=0.115", "uvicorn>=0.30"]
|
|
25
|
+
openai = ["openai>=1.0"]
|
|
26
|
+
gemini = ["google-genai>=0.6"]
|
|
27
|
+
claude = ["anthropic>=0.40"]
|
|
28
|
+
embed = ["sentence-transformers>=3.0", "numpy>=1.26"]
|
|
29
|
+
all = ["fastapi>=0.115", "uvicorn>=0.30", "openai>=1.0", "google-genai>=0.6", "anthropic>=0.40", "sentence-transformers>=3.0", "numpy>=1.26"]
|
|
30
|
+
dev = ["pytest>=8.0"]
|
|
31
|
+
|
|
32
|
+
[project.scripts]
|
|
33
|
+
repointel = "reposcope.cli:main"
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Repository = "https://github.com/nirakar24/reposcope"
|
|
37
|
+
"Bug Tracker" = "https://github.com/nirakar24/reposcope/issues"
|
|
38
|
+
|
|
39
|
+
[build-system]
|
|
40
|
+
requires = ["setuptools>=68"]
|
|
41
|
+
build-backend = "setuptools.build_meta"
|