scai 0.1.163 → 0.1.165
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +203 -133
- package/dist/db/fileIndex.js +128 -126
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,98 +1,196 @@
|
|
|
1
1
|
# ⚙️ SCAI — Source Code AI 🌿
|
|
2
2
|
|
|
3
|
-
> **
|
|
4
|
-
> **100% local • No token
|
|
3
|
+
> **A local-first AI CLI for understanding, querying, and iterating on large codebases.**
|
|
4
|
+
> **100% local • No token costs • No cloud • No prompt injection • Private by design**
|
|
5
5
|
|
|
6
6
|
🔗 **Website:** [https://scai.dk](https://scai.dk)
|
|
7
|
+
🇪🇺 Built in Denmark / EU
|
|
7
8
|
|
|
8
|
-
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## What is SCAI?
|
|
12
|
+
|
|
13
|
+
**SCAI** is an AI-powered command-line tool that helps developers explore and reason about source code using **local large language models only**.
|
|
14
|
+
|
|
15
|
+
Inspired by tools such as *Claude Code* and *Gemini CLI*, SCAI is designed to feel like a natural extension of the terminal. It enables natural-language interaction with your codebase while deliberately avoiding cloud dependencies and network-connected agents.
|
|
16
|
+
|
|
17
|
+
SCAI runs entirely on your local system:
|
|
18
|
+
|
|
19
|
+
* **No token costs** — no usage-based pricing
|
|
20
|
+
* **No internet access for agents**
|
|
21
|
+
* **No prompt injection from web content**
|
|
22
|
+
* No external AI APIs
|
|
23
|
+
* No telemetry or tracking
|
|
24
|
+
* No API keys
|
|
25
|
+
|
|
26
|
+
Your code never leaves your machine. All analysis and reasoning happens locally.
|
|
27
|
+
|
|
28
|
+
> **Local model tradeoff**
|
|
29
|
+
> SCAI uses local LLMs. Output quality depends on your hardware and selected model. Cloud-hosted systems may perform better on general reasoning tasks, but SCAI prioritizes privacy, predictability, and control.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## ⚠️ Alpha Status
|
|
34
|
+
|
|
35
|
+
SCAI is currently in **alpha**.
|
|
36
|
+
|
|
37
|
+
If you have previously installed SCAI, reset the local database before upgrading:
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
scai db reset
|
|
41
|
+
scai index start
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
Breaking changes and evolving behavior should be expected.
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Why SCAI?
|
|
49
|
+
|
|
50
|
+
### 🔐 Local-Only by Design
|
|
51
|
+
|
|
52
|
+
SCAI agents operate **entirely offline**.
|
|
53
|
+
|
|
54
|
+
They do not:
|
|
55
|
+
|
|
56
|
+
* Browse the web
|
|
57
|
+
* Fetch URLs
|
|
58
|
+
* Ingest external documents
|
|
59
|
+
* Execute remote prompts
|
|
9
60
|
|
|
10
|
-
**
|
|
61
|
+
**Security implications:**
|
|
11
62
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
>
|
|
19
|
-
> before using this version.
|
|
63
|
+
* No prompt injection via web content
|
|
64
|
+
* No data exfiltration
|
|
65
|
+
* No hidden network calls
|
|
66
|
+
* Fully auditable execution
|
|
67
|
+
|
|
68
|
+
This makes SCAI suitable for **private repositories, regulated environments, and GDPR-compliant workflows**.
|
|
20
69
|
|
|
21
70
|
---
|
|
22
71
|
|
|
23
|
-
|
|
72
|
+
### 🧠 Codebase-Aware Analysis
|
|
73
|
+
|
|
74
|
+
SCAI builds and maintains a structured internal representation of your repository using:
|
|
75
|
+
|
|
76
|
+
* Language-aware parsing
|
|
77
|
+
* Symbol and dependency indexing
|
|
78
|
+
* Static and heuristic analysis
|
|
79
|
+
* Cross-file context tracking
|
|
80
|
+
|
|
81
|
+
This enables repository-level questions that go beyond single-file inspection.
|
|
24
82
|
|
|
25
|
-
|
|
83
|
+
---
|
|
26
84
|
|
|
27
|
-
###
|
|
85
|
+
### ✂️ Assisted Code Iteration (Early)
|
|
28
86
|
|
|
29
|
-
|
|
87
|
+
SCAI can assist with **lightweight, example-driven code iteration**, primarily focused on understanding and improving existing code rather than large-scale automated refactoring.
|
|
30
88
|
|
|
31
|
-
|
|
89
|
+
Current strengths include:
|
|
32
90
|
|
|
33
|
-
|
|
91
|
+
* Explaining what functions, files, or modules do
|
|
92
|
+
* Identifying patterns and responsibilities across files
|
|
93
|
+
* Generating or improving comments and documentation
|
|
94
|
+
* Highlighting structural or readability issues
|
|
95
|
+
* Suggesting small, localized improvements
|
|
34
96
|
|
|
35
|
-
|
|
97
|
+
Changes are **guided by indexed context and user prompts**, and are intended to support human review and decision-making.
|
|
36
98
|
|
|
37
|
-
|
|
99
|
+
Large-scale or fully automated repository-wide refactoring should currently be considered **experimental**.
|
|
38
100
|
|
|
39
|
-
|
|
101
|
+
---
|
|
40
102
|
|
|
41
|
-
|
|
103
|
+
### 🛠 Built for Developer Workflows
|
|
42
104
|
|
|
43
|
-
|
|
105
|
+
SCAI is a **terminal-native tool** designed to integrate cleanly into daily development:
|
|
44
106
|
|
|
45
|
-
|
|
107
|
+
* Natural-language queries over your codebase
|
|
108
|
+
* Code understanding and exploration
|
|
109
|
+
* Assisted iteration and suggestions
|
|
110
|
+
* Commit message generation
|
|
111
|
+
* Background indexing and analysis
|
|
112
|
+
* Interactive REPL
|
|
46
113
|
|
|
47
|
-
|
|
114
|
+
No browser UI. No cloud login. No vendor lock-in.
|
|
48
115
|
|
|
49
116
|
---
|
|
50
117
|
|
|
51
|
-
|
|
118
|
+
### 🇪🇺 Privacy & Compliance First
|
|
52
119
|
|
|
53
|
-
|
|
120
|
+
* Fully local execution
|
|
121
|
+
* No telemetry
|
|
122
|
+
* No cloud services
|
|
123
|
+
* Developed in Denmark / EU
|
|
124
|
+
* GDPR-friendly by default
|
|
125
|
+
|
|
126
|
+
---
|
|
54
127
|
|
|
55
|
-
|
|
56
|
-
|
|
128
|
+
## Language Support
|
|
129
|
+
|
|
130
|
+
SCAI is currently **tested and supported** for:
|
|
131
|
+
|
|
132
|
+
* **JavaScript**
|
|
133
|
+
* **TypeScript**
|
|
57
134
|
* **Java**
|
|
58
135
|
|
|
59
|
-
Other languages may work partially, but
|
|
136
|
+
Other languages may work partially, but indexing quality, analysis accuracy, and agent behavior are **not guaranteed**.
|
|
137
|
+
|
|
138
|
+
SCAI should currently be considered **JS / TS / Java-first**.
|
|
60
139
|
|
|
61
140
|
---
|
|
62
141
|
|
|
63
|
-
##
|
|
142
|
+
## Getting Started
|
|
64
143
|
|
|
65
|
-
###
|
|
144
|
+
### Install & Initialize
|
|
66
145
|
|
|
67
|
-
bash
|
|
146
|
+
```bash
|
|
68
147
|
npm install -g scai
|
|
69
148
|
scai init
|
|
70
149
|
scai index start
|
|
150
|
+
```
|
|
71
151
|
|
|
152
|
+
This:
|
|
72
153
|
|
|
73
|
-
|
|
154
|
+
* Initializes local configuration
|
|
155
|
+
* Starts the background daemon
|
|
156
|
+
* Begins indexing the current repository
|
|
74
157
|
|
|
75
|
-
>
|
|
158
|
+
> **Note**
|
|
159
|
+
> Initial indexing can take **minutes to hours**, depending on repository size and enabled analysis.
|
|
76
160
|
|
|
77
|
-
|
|
161
|
+
---
|
|
162
|
+
|
|
163
|
+
### Starting SCAI
|
|
164
|
+
|
|
165
|
+
Running the `scai` command with no arguments starts the interactive shell:
|
|
78
166
|
|
|
79
167
|
```bash
|
|
80
|
-
scai
|
|
168
|
+
scai
|
|
81
169
|
```
|
|
82
170
|
|
|
83
|
-
|
|
171
|
+
You can also start it explicitly:
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
scai shell
|
|
175
|
+
```
|
|
84
176
|
|
|
85
|
-
|
|
177
|
+
---
|
|
86
178
|
|
|
87
|
-
|
|
179
|
+
### View Available Commands
|
|
88
180
|
|
|
89
181
|
```bash
|
|
90
|
-
scai
|
|
182
|
+
scai --help
|
|
91
183
|
```
|
|
92
184
|
|
|
93
|
-
|
|
185
|
+
---
|
|
186
|
+
|
|
187
|
+
## Interactive REPL
|
|
188
|
+
|
|
189
|
+
The REPL is the primary interface for working with SCAI.
|
|
190
|
+
|
|
191
|
+
### Ask questions about your codebase
|
|
94
192
|
|
|
95
|
-
|
|
193
|
+
Be specific for better results.
|
|
96
194
|
|
|
97
195
|
```text
|
|
98
196
|
scai> what does withContext function do in index.ts file?
|
|
@@ -103,32 +201,28 @@ scai> Where are all the database queries defined?
|
|
|
103
201
|
scai> List files involved in authentication
|
|
104
202
|
```
|
|
105
203
|
|
|
106
|
-
### Run CLI commands
|
|
204
|
+
### Run CLI commands inside the REPL
|
|
107
205
|
|
|
108
206
|
```text
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
scai> /index switch
|
|
113
|
-
scai> /index delete
|
|
207
|
+
/index list
|
|
208
|
+
/index switch
|
|
209
|
+
/git commit
|
|
114
210
|
```
|
|
115
211
|
|
|
116
212
|
### Execute shell commands
|
|
117
213
|
|
|
118
214
|
```text
|
|
119
|
-
|
|
120
|
-
|
|
215
|
+
!git status
|
|
216
|
+
!ls -la
|
|
121
217
|
```
|
|
122
218
|
|
|
123
|
-
|
|
219
|
+
All interactions remain **offline and free**, with **no token usage**.
|
|
124
220
|
|
|
125
221
|
---
|
|
126
222
|
|
|
127
|
-
##
|
|
223
|
+
## Repository Indexing
|
|
128
224
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
### Common Index Commands
|
|
225
|
+
Repositories must be indexed before querying:
|
|
132
226
|
|
|
133
227
|
```bash
|
|
134
228
|
scai index set /path/to/repo
|
|
@@ -138,47 +232,43 @@ scai index switch
|
|
|
138
232
|
scai index delete
|
|
139
233
|
```
|
|
140
234
|
|
|
141
|
-
Only indexed repositories
|
|
235
|
+
Only indexed repositories are accessible to agents.
|
|
142
236
|
|
|
143
237
|
---
|
|
144
238
|
|
|
145
|
-
##
|
|
146
|
-
|
|
147
|
-
SCAI performs **deep repository indexing and static analysis** using background workers. This includes:
|
|
239
|
+
## Background Analysis (Daemon)
|
|
148
240
|
|
|
149
|
-
|
|
150
|
-
* Language-aware parsing (JS / TS / Java)
|
|
151
|
-
* Symbol and dependency mapping
|
|
152
|
-
* Heuristic analysis for tests, architecture, and patterns
|
|
241
|
+
SCAI performs deep analysis in the background, including:
|
|
153
242
|
|
|
154
|
-
|
|
243
|
+
* File discovery
|
|
244
|
+
* AST parsing
|
|
245
|
+
* Dependency graph construction
|
|
246
|
+
* Symbol resolution
|
|
247
|
+
* Heuristic structure analysis
|
|
155
248
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
### Daemon Commands
|
|
249
|
+
Daemon control:
|
|
159
250
|
|
|
160
251
|
```bash
|
|
161
252
|
scai daemon start
|
|
162
253
|
scai daemon stop
|
|
163
254
|
scai daemon restart
|
|
164
255
|
scai daemon status
|
|
165
|
-
scai daemon unlock
|
|
166
256
|
scai daemon logs
|
|
167
257
|
```
|
|
168
258
|
|
|
169
|
-
|
|
259
|
+
Indexing progress resumes automatically after restart.
|
|
170
260
|
|
|
171
261
|
---
|
|
172
262
|
|
|
173
|
-
##
|
|
263
|
+
## Configuration
|
|
174
264
|
|
|
175
|
-
Set
|
|
265
|
+
Set a local model (recommended):
|
|
176
266
|
|
|
177
267
|
```bash
|
|
178
268
|
scai config set-model qwen3-coder:30b
|
|
179
269
|
```
|
|
180
270
|
|
|
181
|
-
View
|
|
271
|
+
View configuration:
|
|
182
272
|
|
|
183
273
|
```bash
|
|
184
274
|
scai config show --raw
|
|
@@ -186,22 +276,22 @@ scai config show --raw
|
|
|
186
276
|
|
|
187
277
|
---
|
|
188
278
|
|
|
189
|
-
##
|
|
279
|
+
## Git Commit Assistant
|
|
190
280
|
|
|
191
|
-
Generate
|
|
281
|
+
Generate commit messages from staged changes:
|
|
192
282
|
|
|
193
283
|
```bash
|
|
194
284
|
git add .
|
|
195
285
|
scai git commit
|
|
196
286
|
```
|
|
197
287
|
|
|
198
|
-
All
|
|
288
|
+
All diff inspection and reasoning is performed locally.
|
|
199
289
|
|
|
200
290
|
---
|
|
201
291
|
|
|
202
|
-
##
|
|
292
|
+
## GitHub Authentication
|
|
203
293
|
|
|
204
|
-
|
|
294
|
+
Required only for GitHub-related features:
|
|
205
295
|
|
|
206
296
|
```bash
|
|
207
297
|
scai auth set
|
|
@@ -211,98 +301,78 @@ scai auth reset
|
|
|
211
301
|
|
|
212
302
|
---
|
|
213
303
|
|
|
214
|
-
##
|
|
215
|
-
|
|
216
|
-
* `Summarize codeTransform.js`
|
|
217
|
-
* `Explain utils/helpers.ts architecture`
|
|
218
|
-
* `List all functions without tests in services/`
|
|
219
|
-
* `Show where database queries are defined`
|
|
220
|
-
* `Highlight potential memory leaks`
|
|
221
|
-
* `Describe how authentication works`
|
|
222
|
-
* `Summarize repo architecture`
|
|
223
|
-
|
|
224
|
-
---
|
|
225
|
-
|
|
226
|
-
## 🔐 Privacy & GDPR
|
|
304
|
+
## Privacy & Security Summary
|
|
227
305
|
|
|
228
|
-
*
|
|
306
|
+
* 100% local execution
|
|
307
|
+
* No internet access for agents
|
|
308
|
+
* No prompt injection from web content
|
|
229
309
|
* No API keys
|
|
230
|
-
*
|
|
231
|
-
* GDPR-friendly
|
|
310
|
+
* No token costs
|
|
311
|
+
* GDPR-friendly by default
|
|
232
312
|
|
|
233
313
|
---
|
|
234
314
|
|
|
235
|
-
##
|
|
236
|
-
|
|
237
|
-
Feedback, bugs, and ideas are very welcome:
|
|
315
|
+
## Feedback & Community
|
|
238
316
|
|
|
239
|
-
* 🌍
|
|
240
|
-
* 🧵
|
|
241
|
-
|
|
242
|
-
<br>
|
|
317
|
+
* 🌍 [https://scai.dk](https://scai.dk)
|
|
318
|
+
* 🧵 [https://threads.net/@scai.dk](https://threads.net/@scai.dk)
|
|
243
319
|
|
|
244
320
|
---
|
|
245
321
|
|
|
246
|
-
|
|
247
|
-
<br>
|
|
248
|
-
|
|
249
|
-
## 🔐 License & Usage Terms
|
|
322
|
+
# License & Usage Terms
|
|
250
323
|
|
|
251
|
-
|
|
252
|
-
All rights reserved.
|
|
324
|
+
© SCAI — All rights reserved.
|
|
253
325
|
|
|
254
|
-
SCAI is **free
|
|
326
|
+
SCAI is **free for non-commercial use only**.
|
|
255
327
|
|
|
256
328
|
---
|
|
257
329
|
|
|
258
|
-
##
|
|
330
|
+
## Permitted Use
|
|
259
331
|
|
|
260
|
-
You may use SCAI
|
|
332
|
+
You may use SCAI free of charge for:
|
|
261
333
|
|
|
262
334
|
* Personal projects
|
|
263
335
|
* Educational use
|
|
264
336
|
* Research and experimentation
|
|
265
|
-
* Non-commercial open-source
|
|
266
|
-
* Internal evaluation or proof-of-
|
|
337
|
+
* Non-commercial open-source work
|
|
338
|
+
* Internal evaluation or proof-of-concepts
|
|
267
339
|
|
|
268
|
-
You may
|
|
340
|
+
You may fork and modify the source code **for non-commercial purposes only**.
|
|
269
341
|
|
|
270
342
|
---
|
|
271
343
|
|
|
272
|
-
##
|
|
344
|
+
## Restricted Use
|
|
273
345
|
|
|
274
|
-
The following
|
|
346
|
+
The following require a **commercial license**:
|
|
275
347
|
|
|
276
|
-
*
|
|
277
|
-
*
|
|
278
|
-
*
|
|
279
|
-
*
|
|
280
|
-
*
|
|
281
|
-
* Resale, sublicensing, or redistribution for commercial purposes
|
|
348
|
+
* Any commercial or enterprise use
|
|
349
|
+
* Consultancy or client work
|
|
350
|
+
* Paid products or services
|
|
351
|
+
* Internal enterprise tooling
|
|
352
|
+
* Commercial redistribution or resale
|
|
282
353
|
|
|
283
354
|
---
|
|
284
355
|
|
|
285
|
-
##
|
|
356
|
+
## Commercial Licensing
|
|
286
357
|
|
|
287
|
-
|
|
358
|
+
Commercial and enterprise use requires a **paid license** and explicit permission from the author.
|
|
288
359
|
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
Please contact the author to discuss commercial licensing terms.
|
|
360
|
+
Please contact the author to discuss licensing terms.
|
|
292
361
|
|
|
293
362
|
---
|
|
294
363
|
|
|
295
|
-
##
|
|
364
|
+
## Disclaimer
|
|
296
365
|
|
|
297
|
-
This software is provided
|
|
366
|
+
This software is provided **“as is”**, without warranty of any kind.
|
|
298
367
|
|
|
299
|
-
|
|
368
|
+
The author is not liable for any damages arising from its use.
|
|
300
369
|
|
|
301
370
|
---
|
|
302
371
|
|
|
303
|
-
###
|
|
372
|
+
### Non-Binding Summary
|
|
304
373
|
|
|
305
374
|
* Free for personal and non-commercial use
|
|
306
|
-
*
|
|
307
|
-
*
|
|
308
|
-
|
|
375
|
+
* Fully local, offline AI
|
|
376
|
+
* No token costs
|
|
377
|
+
* No prompt injection surface
|
|
378
|
+
* Commercial use requires a license
|
package/dist/db/fileIndex.js
CHANGED
|
@@ -100,54 +100,143 @@ export function queryFiles(safeQuery, limit = 10) {
|
|
|
100
100
|
// - Uses LLM aggressively
|
|
101
101
|
// - Optimizes for precision
|
|
102
102
|
// --------------------------------------------------
|
|
103
|
-
export async function semanticSearchFiles(originalQuery,
|
|
103
|
+
export async function semanticSearchFiles(originalQuery, _query, // ignored now – LLM owns query construction
|
|
104
|
+
topK = 5) {
|
|
104
105
|
const db = getDbForRepo();
|
|
105
|
-
|
|
106
|
-
//
|
|
107
|
-
//
|
|
108
|
-
|
|
109
|
-
|
|
106
|
+
// --------------------------------------------------
|
|
107
|
+
// 1. LLM → primary FTS query (always)
|
|
108
|
+
// --------------------------------------------------
|
|
109
|
+
const primaryFtsQuery = await generatePrimaryFtsQuery(originalQuery);
|
|
110
|
+
logInputOutput("semanticSearchFiles LLM primary query", "output", {
|
|
111
|
+
originalQuery,
|
|
112
|
+
ftsQuery: primaryFtsQuery,
|
|
113
|
+
});
|
|
114
|
+
// --------------------------------------------------
|
|
115
|
+
// 2. Run primary FTS once
|
|
116
|
+
// --------------------------------------------------
|
|
110
117
|
const primaryResults = db
|
|
111
118
|
.prepare(sqlTemplates.searchFilesTemplate)
|
|
112
|
-
.all(
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
count: primaryResults.length,
|
|
116
|
-
});
|
|
117
|
-
if (primaryResults.length === 0) {
|
|
118
|
-
// No FTS at all → blind expansion
|
|
119
|
-
return semanticFallbackSearch(originalQuery, topK);
|
|
119
|
+
.all(primaryFtsQuery, RELATED_FILES_LIMIT);
|
|
120
|
+
if (primaryResults.length > 0) {
|
|
121
|
+
return rankAndMap(new Map(primaryResults.map(r => [r.id, r])), topK);
|
|
120
122
|
}
|
|
121
|
-
//
|
|
122
|
-
//
|
|
123
|
-
//
|
|
124
|
-
const
|
|
125
|
-
logInputOutput("semanticSearchFiles
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
// Not relevant → model-guided expansion
|
|
135
|
-
// -----------------------------
|
|
136
|
-
for (const term of relevance.suggestedTerms) {
|
|
137
|
-
const safeTerm = sanitizeQueryForFts(term);
|
|
123
|
+
// --------------------------------------------------
|
|
124
|
+
// 3. Fallback: LLM → 2–3 subqueries (ONLY if zero results)
|
|
125
|
+
// --------------------------------------------------
|
|
126
|
+
const subQueries = await generateFallbackFtsQueries(originalQuery, primaryFtsQuery);
|
|
127
|
+
logInputOutput("semanticSearchFiles LLM fallback queries", "output", {
|
|
128
|
+
originalQuery,
|
|
129
|
+
primaryFtsQuery,
|
|
130
|
+
subQueries,
|
|
131
|
+
});
|
|
132
|
+
// --------------------------------------------------
|
|
133
|
+
// 4. Execute fallback queries sequentially
|
|
134
|
+
// --------------------------------------------------
|
|
135
|
+
for (const subQuery of subQueries) {
|
|
138
136
|
const rows = db
|
|
139
137
|
.prepare(sqlTemplates.searchFilesTemplate)
|
|
140
|
-
.all(
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
138
|
+
.all(subQuery, RELATED_FILES_LIMIT);
|
|
139
|
+
if (rows.length > 0) {
|
|
140
|
+
return rankAndMap(new Map(rows.map(r => [r.id, r])), topK);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
// --------------------------------------------------
|
|
144
|
+
// 5. Hard stop
|
|
145
|
+
// --------------------------------------------------
|
|
146
|
+
return [];
|
|
147
|
+
}
|
|
148
|
+
async function generatePrimaryFtsQuery(userQuery) {
|
|
149
|
+
const prompt = `
|
|
150
|
+
You are generating a SQLite FTS query for searching a source code repository.
|
|
151
|
+
|
|
152
|
+
Input (natural language):
|
|
153
|
+
"${userQuery}"
|
|
154
|
+
|
|
155
|
+
Task:
|
|
156
|
+
- Produce ONE concise FTS query
|
|
157
|
+
- Focus on filenames, symbols, module names, domain nouns
|
|
158
|
+
- Prefer literal identifiers likely to exist in code
|
|
159
|
+
- NO sentences
|
|
160
|
+
- NO stopwords
|
|
161
|
+
- NO explanations
|
|
162
|
+
- NO wildcards unless absolutely necessary
|
|
163
|
+
- Use OR between terms
|
|
164
|
+
- **MAX 10 terms only** — be selective and concise
|
|
165
|
+
|
|
166
|
+
Output JSON ONLY:
|
|
167
|
+
{
|
|
168
|
+
"ftsQuery": "term1 OR term2 OR term3"
|
|
169
|
+
}
|
|
170
|
+
`.trim();
|
|
171
|
+
try {
|
|
172
|
+
const response = await generate({ content: prompt, query: "" });
|
|
173
|
+
const cleaned = await cleanupModule.run({
|
|
174
|
+
query: userQuery,
|
|
175
|
+
content: response.data,
|
|
176
|
+
});
|
|
177
|
+
if (cleaned.data &&
|
|
178
|
+
typeof cleaned.data === "object" &&
|
|
179
|
+
"ftsQuery" in cleaned.data &&
|
|
180
|
+
typeof cleaned.data.ftsQuery === "string") {
|
|
181
|
+
return cleaned.data.ftsQuery;
|
|
144
182
|
}
|
|
145
183
|
}
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
primaryResults.forEach(r => seen.set(r.id, r));
|
|
184
|
+
catch (err) {
|
|
185
|
+
log(`⚠️ [semanticSearchFiles] Failed to generate primary FTS query: ${String(err)}`);
|
|
149
186
|
}
|
|
150
|
-
|
|
187
|
+
// Absolute safety fallback — never explode
|
|
188
|
+
return sanitizeQueryForFts(userQuery);
|
|
189
|
+
}
|
|
190
|
+
async function generateFallbackFtsQueries(userQuery, failedQuery) {
|
|
191
|
+
const prompt = `
|
|
192
|
+
You are generating fallback SQLite FTS queries for a source code search.
|
|
193
|
+
|
|
194
|
+
Original user query:
|
|
195
|
+
"${userQuery}"
|
|
196
|
+
|
|
197
|
+
Primary FTS query returned ZERO results:
|
|
198
|
+
"${failedQuery}"
|
|
199
|
+
|
|
200
|
+
Task:
|
|
201
|
+
- Generate 2–3 independent FTS queries (MAX 3)
|
|
202
|
+
- Each query should be concise: no more than 10 OR-joined search terms
|
|
203
|
+
- Focus on filenames, symbols, module names
|
|
204
|
+
- Avoid natural-language sentences
|
|
205
|
+
- Avoid recursion or refinement loops
|
|
206
|
+
- Use OR between terms
|
|
207
|
+
|
|
208
|
+
Output JSON ONLY:
|
|
209
|
+
{
|
|
210
|
+
"subQueries": [
|
|
211
|
+
"query1",
|
|
212
|
+
"query2",
|
|
213
|
+
"query3"
|
|
214
|
+
]
|
|
215
|
+
}
|
|
216
|
+
`.trim();
|
|
217
|
+
try {
|
|
218
|
+
const response = await generate({ content: prompt, query: "" });
|
|
219
|
+
const cleaned = await cleanupModule.run({
|
|
220
|
+
query: userQuery,
|
|
221
|
+
content: response.data,
|
|
222
|
+
});
|
|
223
|
+
if (cleaned.data &&
|
|
224
|
+
typeof cleaned.data === "object" &&
|
|
225
|
+
Array.isArray(cleaned.data.subQueries)) {
|
|
226
|
+
return cleaned.data.subQueries
|
|
227
|
+
.filter((q) => typeof q === "string")
|
|
228
|
+
.slice(0, 3) // cap to 3 queries
|
|
229
|
+
.map((q) => q
|
|
230
|
+
.split(' OR ')
|
|
231
|
+
.map(term => sanitizeQueryForFts(term)) // sanitize each term individually
|
|
232
|
+
.slice(0, 10) // cap terms per query
|
|
233
|
+
.join(' OR '));
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
catch (err) {
|
|
237
|
+
log(`⚠️ [semanticSearchFiles] Failed to generate fallback queries: ${String(err)}`);
|
|
238
|
+
}
|
|
239
|
+
return [];
|
|
151
240
|
}
|
|
152
241
|
// --------------------------------------------------
|
|
153
242
|
// PLANNER SEARCH (fileSearchModule, discovery)
|
|
@@ -209,71 +298,6 @@ function rankAndMap(seen, topK) {
|
|
|
209
298
|
bm25Score: r.bm25Score,
|
|
210
299
|
}));
|
|
211
300
|
}
|
|
212
|
-
/**
|
|
213
|
-
* Ask the model whether the initial FTS hits are relevant to the query.
|
|
214
|
-
* If not, suggest new search terms for another FTS pass.
|
|
215
|
-
*/
|
|
216
|
-
async function checkFtsRelevanceWithModel(query, ftsResults) {
|
|
217
|
-
if (ftsResults.length === 0) {
|
|
218
|
-
return { relevant: false, suggestedTerms: [] };
|
|
219
|
-
}
|
|
220
|
-
const prompt = `
|
|
221
|
-
You are assisting a code search system that uses full-text search (FTS)
|
|
222
|
-
over source code.
|
|
223
|
-
|
|
224
|
-
Query (natural language):
|
|
225
|
-
"${query}"
|
|
226
|
-
|
|
227
|
-
Initial FTS results (filenames and summaries):
|
|
228
|
-
${JSON.stringify(ftsResults)}
|
|
229
|
-
|
|
230
|
-
Task:
|
|
231
|
-
1. Decide whether these results are relevant to the query.
|
|
232
|
-
2. If they are NOT relevant, suggest alternative search terms.
|
|
233
|
-
|
|
234
|
-
IMPORTANT RULES FOR SUGGESTED TERMS:
|
|
235
|
-
- Terms MUST be likely to appear literally in source code.
|
|
236
|
-
- Prefer: filenames, module names, function names, variables, symbols, config keys.
|
|
237
|
-
- Use short identifiers (1–3 words max).
|
|
238
|
-
- Avoid natural-language phrases or explanations.
|
|
239
|
-
- Avoid conceptual or architectural descriptions.
|
|
240
|
-
- Examples of GOOD terms: "api", "router", "frontend", "backend", "client", "server", "routes", "config.ts"
|
|
241
|
-
- Examples of BAD terms: "frontend backend separation", "code architecture", "business logic"
|
|
242
|
-
|
|
243
|
-
Output format:
|
|
244
|
-
- If relevant:
|
|
245
|
-
{ "relevant": true, "suggestedTerms": [] }
|
|
246
|
-
- If not relevant:
|
|
247
|
-
{ "relevant": false, "suggestedTerms": ["term1", "term2", "term3"] }
|
|
248
|
-
|
|
249
|
-
Return ONLY valid JSON.
|
|
250
|
-
`.trim();
|
|
251
|
-
try {
|
|
252
|
-
const response = await generate({ content: prompt, query: "" });
|
|
253
|
-
const cleaned = await cleanupModule.run({
|
|
254
|
-
query,
|
|
255
|
-
content: response.data,
|
|
256
|
-
});
|
|
257
|
-
const data = cleaned.data;
|
|
258
|
-
// Type guard: ensure it's an object with correct properties
|
|
259
|
-
if (data &&
|
|
260
|
-
typeof data === "object" &&
|
|
261
|
-
"relevant" in data &&
|
|
262
|
-
"suggestedTerms" in data &&
|
|
263
|
-
typeof data.relevant === "boolean" &&
|
|
264
|
-
Array.isArray(data.suggestedTerms)) {
|
|
265
|
-
const relevant = data.relevant;
|
|
266
|
-
const suggestedTerms = data.suggestedTerms.filter((t) => typeof t === "string");
|
|
267
|
-
return { relevant, suggestedTerms };
|
|
268
|
-
}
|
|
269
|
-
return { relevant: false, suggestedTerms: [] };
|
|
270
|
-
}
|
|
271
|
-
catch (err) {
|
|
272
|
-
log(`⚠️ [searchFiles] Failed to check FTS relevance: ${String(err)}`);
|
|
273
|
-
return { relevant: false, suggestedTerms: [] };
|
|
274
|
-
}
|
|
275
|
-
}
|
|
276
|
-
;
|
|
277
301
|
async function expandQueryWithModel(query) {
|
|
278
302
|
const prompt = `
|
|
279
303
|
You are assisting a code search system.
|
|
@@ -305,25 +329,3 @@ Question:
|
|
|
305
329
|
return [];
|
|
306
330
|
}
|
|
307
331
|
}
|
|
308
|
-
async function semanticFallbackSearch(query, topK) {
|
|
309
|
-
const db = getDbForRepo();
|
|
310
|
-
const seen = new Map();
|
|
311
|
-
const llmTerms = await expandQueryWithModel(query);
|
|
312
|
-
logInputOutput("semanticSearchFiles blind expansion", "output", {
|
|
313
|
-
query,
|
|
314
|
-
suggestedTerms: llmTerms,
|
|
315
|
-
});
|
|
316
|
-
for (const term of llmTerms) {
|
|
317
|
-
const safeTerm = sanitizeQueryForFts(term);
|
|
318
|
-
const rows = db
|
|
319
|
-
.prepare(sqlTemplates.searchFilesTemplate)
|
|
320
|
-
.all(safeTerm, RELATED_FILES_LIMIT);
|
|
321
|
-
for (const row of rows) {
|
|
322
|
-
if (!seen.has(row.id))
|
|
323
|
-
seen.set(row.id, row);
|
|
324
|
-
}
|
|
325
|
-
}
|
|
326
|
-
if (seen.size === 0)
|
|
327
|
-
return [];
|
|
328
|
-
return rankAndMap(seen, topK);
|
|
329
|
-
}
|