scai 0.1.165 → 0.1.166

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,6 @@
1
+ // File: src/agents/reasonNextTaskStep.ts
2
+ import { generate } from "../lib/generate.js";
3
+ import { cleanupModule } from "../pipeline/modules/cleanupModule.js";
1
4
  import { logInputOutput } from "../utils/promptLogHelper.js";
2
5
  /**
3
6
  * REASON NEXT TASK STEP
@@ -95,6 +98,48 @@ export const reasonNextTaskStep = {
95
98
  confidence = 0.98;
96
99
  }
97
100
  // ---------------------------
101
+ // 6.5️⃣ Optional: Reason over known risks
102
+ // ---------------------------
103
+ const knownRisks = context.analysis.understanding?.risks ?? [];
104
+ if (knownRisks.length > 0) {
105
+ // Optionally call the LLM with constrained instructions
106
+ const riskPrompt = `
107
+ You are given the following KNOWN RISKS (authoritative, do not invent new ones):
108
+ ${knownRisks.map(r => "- " + r).join("\n")}
109
+
110
+ Task:
111
+ - Decide whether it is reasonable to ask the user for clarification before proceeding.
112
+ - Return STRICT JSON: { askUser: true|false, rationale: string }
113
+ `;
114
+ try {
115
+ const aiResponse = await generate({
116
+ query: context.initContext?.userQuery ?? "",
117
+ content: riskPrompt
118
+ });
119
+ const cleaned = await cleanupModule.run({
120
+ query: context.initContext?.userQuery ?? "",
121
+ content: aiResponse.data ?? ""
122
+ });
123
+ const parsed = cleaned.data;
124
+ // type guard
125
+ if (parsed &&
126
+ typeof parsed === "object" &&
127
+ "askUser" in parsed &&
128
+ "rationale" in parsed &&
129
+ typeof parsed.rationale === "string") {
130
+ if (parsed.askUser) {
131
+ nextAction = "request-feedback";
132
+ rationale += `\nUser clarification recommended due to known risks: ${parsed.rationale}`;
133
+ confidence = Math.min(confidence, 0.8); // slightly lower because human needed
134
+ }
135
+ }
136
+ }
137
+ catch (err) {
138
+ console.warn("[reasonNextTaskStep] Risk reasoning failed", err);
139
+ // fallback: ignore, keep deterministic nextAction
140
+ }
141
+ }
142
+ // ---------------------------
98
143
  // 7️⃣ Ensure a TaskStep exists for nextFile
99
144
  // ---------------------------
100
145
  if (nextFile) {
@@ -11,12 +11,15 @@ import { IGNORED_FOLDER_GLOBS } from '../fileRules/ignoredPaths.js';
11
11
  import { Config } from '../config.js';
12
12
  import { log } from '../utils/log.js';
13
13
  import { startDaemon } from '../commands/DaemonCmd.js';
14
- import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
15
14
  import * as sqlTemplates from '../db/sqlTemplates.js';
16
15
  import { RELATED_FILES_LIMIT } from '../constants.js';
17
16
  import { generate } from '../lib/generate.js';
18
- import { cleanupModule } from '../pipeline/modules/cleanupModule.js';
19
17
  import { logInputOutput } from '../utils/promptLogHelper.js';
18
+ import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
19
+ import { extractTaggedContent } from '../utils/parseTaggedContent.js';
20
+ /* -------------------------------------------------- */
21
+ /* DB LOCK */
22
+ /* -------------------------------------------------- */
20
23
  async function lockDb() {
21
24
  try {
22
25
  return await lockfile.lock(getDbPathForRepo());
@@ -26,6 +29,9 @@ async function lockDb() {
26
29
  throw err;
27
30
  }
28
31
  }
32
+ /* -------------------------------------------------- */
33
+ /* INDEX COMMAND */
34
+ /* -------------------------------------------------- */
29
35
  export async function runIndexCommand() {
30
36
  try {
31
37
  initSchema();
@@ -57,9 +63,6 @@ export async function runIndexCommand() {
57
63
  const type = detectFileType(file);
58
64
  const normalizedPath = path.normalize(file).replace(/\\/g, '/');
59
65
  const filename = path.basename(normalizedPath);
60
- // --------------------------------------------------
61
- // Enqueue file for daemon processing
62
- // --------------------------------------------------
63
66
  db.prepare(upsertFileTemplate).run({
64
67
  path: normalizedPath,
65
68
  filename,
@@ -73,7 +76,7 @@ export async function runIndexCommand() {
73
76
  count++;
74
77
  }
75
78
  catch (err) {
76
- log(`⚠️ Skipped in indexCmd ${file}: ${err instanceof Error ? err.message : err}`);
79
+ log(`⚠️ Skipped in indexCmd ${file}: ${String(err)}`);
77
80
  }
78
81
  }
79
82
  }
@@ -82,110 +85,82 @@ export async function runIndexCommand() {
82
85
  }
83
86
  log('📊 Discovered files by extension:', JSON.stringify(countByExt, null, 2));
84
87
  log(`✅ Done. Enqueued ${count} files for indexing.`);
85
- // Kick the daemon — it now owns all processing
86
88
  startDaemon();
87
89
  }
88
- // --------------------------------------------------
89
- // QUERY API (read-only, used by CLI / raw search)
90
- // --------------------------------------------------
90
+ /* -------------------------------------------------- */
91
+ /* QUERY API */
92
+ /* -------------------------------------------------- */
91
93
  export function queryFiles(safeQuery, limit = 10) {
92
94
  const db = getDbForRepo();
93
95
  return db
94
96
  .prepare(sqlTemplates.queryFilesTemplate)
95
97
  .all(safeQuery, limit);
96
98
  }
97
- // --------------------------------------------------
98
- // SEMANTIC SEARCH (AskCmd, answering user directly)
99
- // - Discards noisy FTS
100
- // - Uses LLM aggressively
101
- // - Optimizes for precision
102
- // --------------------------------------------------
103
- export async function semanticSearchFiles(originalQuery, _query, // ignored now – LLM owns query construction
104
- topK = 5) {
99
+ /* -------------------------------------------------- */
100
+ /* SEMANTIC SEARCH */
101
+ /* -------------------------------------------------- */
102
+ export async function semanticSearchFiles(originalQuery, _query, topK = 5) {
105
103
  const db = getDbForRepo();
106
- // --------------------------------------------------
107
- // 1. LLM → primary FTS query (always)
108
- // --------------------------------------------------
109
104
  const primaryFtsQuery = await generatePrimaryFtsQuery(originalQuery);
110
105
  logInputOutput("semanticSearchFiles LLM primary query", "output", {
111
106
  originalQuery,
112
107
  ftsQuery: primaryFtsQuery,
113
108
  });
114
- // --------------------------------------------------
115
- // 2. Run primary FTS once
116
- // --------------------------------------------------
117
109
  const primaryResults = db
118
110
  .prepare(sqlTemplates.searchFilesTemplate)
119
111
  .all(primaryFtsQuery, RELATED_FILES_LIMIT);
120
112
  if (primaryResults.length > 0) {
121
113
  return rankAndMap(new Map(primaryResults.map(r => [r.id, r])), topK);
122
114
  }
123
- // --------------------------------------------------
124
- // 3. Fallback: LLM 2–3 subqueries (ONLY if zero results)
125
- // --------------------------------------------------
126
- const subQueries = await generateFallbackFtsQueries(originalQuery, primaryFtsQuery);
127
- logInputOutput("semanticSearchFiles LLM fallback queries", "output", {
115
+ const fallbackQuery = await generateFallbackFtsQueries(originalQuery, primaryFtsQuery);
116
+ logInputOutput("semanticSearchFiles LLM fallback query", "output", {
128
117
  originalQuery,
129
118
  primaryFtsQuery,
130
- subQueries,
119
+ fallbackQuery,
131
120
  });
132
- // --------------------------------------------------
133
- // 4. Execute fallback queries sequentially
134
- // --------------------------------------------------
135
- for (const subQuery of subQueries) {
136
- const rows = db
137
- .prepare(sqlTemplates.searchFilesTemplate)
138
- .all(subQuery, RELATED_FILES_LIMIT);
139
- if (rows.length > 0) {
140
- return rankAndMap(new Map(rows.map(r => [r.id, r])), topK);
121
+ if (fallbackQuery && fallbackQuery.length > 0) {
122
+ const stmt = db.prepare(sqlTemplates.searchFilesTemplate);
123
+ for (const query of fallbackQuery) {
124
+ const rows = stmt.all(query, RELATED_FILES_LIMIT);
125
+ if (rows.length > 0) {
126
+ return rankAndMap(new Map(rows.map(r => [r.id, r])), topK);
127
+ }
141
128
  }
142
129
  }
143
- // --------------------------------------------------
144
- // 5. Hard stop
145
- // --------------------------------------------------
146
130
  return [];
147
131
  }
132
+ /* -------------------------------------------------- */
133
+ /* LLM → FTS QUERY GENERATION (TAG-BASED) */
134
+ /* -------------------------------------------------- */
148
135
  async function generatePrimaryFtsQuery(userQuery) {
149
136
  const prompt = `
150
- You are generating a SQLite FTS query for searching a source code repository.
137
+ Generate a SQLite FTS query for searching a source code repository.
151
138
 
152
- Input (natural language):
139
+ Input:
153
140
  "${userQuery}"
154
141
 
155
- Task:
156
- - Produce ONE concise FTS query
157
- - Focus on filenames, symbols, module names, domain nouns
158
- - Prefer literal identifiers likely to exist in code
159
- - NO sentences
160
- - NO stopwords
161
- - NO explanations
162
- - NO wildcards unless absolutely necessary
142
+ Rules:
143
+ - Output ONLY the query terms
163
144
  - Use OR between terms
164
- - **MAX 10 terms only** — be selective and concise
145
+ - Max 10 terms
146
+ - No explanations
147
+ - No sentences
165
148
 
166
- Output JSON ONLY:
167
- {
168
- "ftsQuery": "term1 OR term2 OR term3"
169
- }
149
+ Wrap the result in <FILE_CONTENT> tags.
150
+
151
+ <FILE_CONTENT>
152
+ term1 OR term2 OR term3
153
+ </FILE_CONTENT>
170
154
  `.trim();
171
155
  try {
172
156
  const response = await generate({ content: prompt, query: "" });
173
- const cleaned = await cleanupModule.run({
174
- query: userQuery,
175
- content: response.data,
176
- });
177
- if (cleaned.data &&
178
- typeof cleaned.data === "object" &&
179
- "ftsQuery" in cleaned.data &&
180
- typeof cleaned.data.ftsQuery === "string") {
181
- return cleaned.data.ftsQuery;
182
- }
157
+ const rawText = String(response.data ?? "");
158
+ const { content } = extractTaggedContent(rawText, "FILE_CONTENT");
159
+ return sanitizeQueryForFts(content);
183
160
  }
184
161
  catch (err) {
185
- log(`⚠️ [semanticSearchFiles] Failed to generate primary FTS query: ${String(err)}`);
162
+ return sanitizeQueryForFts(userQuery);
186
163
  }
187
- // Absolute safety fallback — never explode
188
- return sanitizeQueryForFts(userQuery);
189
164
  }
190
165
  async function generateFallbackFtsQueries(userQuery, failedQuery) {
191
166
  const prompt = `
@@ -199,57 +174,44 @@ Primary FTS query returned ZERO results:
199
174
 
200
175
  Task:
201
176
  - Generate 2–3 independent FTS queries (MAX 3)
202
- - Each query should be concise: no more than 10 OR-joined search terms
177
+ - Each query must be a single OR-joined expression
178
+ - Max 10 terms per query
203
179
  - Focus on filenames, symbols, module names
204
- - Avoid natural-language sentences
205
- - Avoid recursion or refinement loops
206
- - Use OR between terms
180
+ - Avoid natural language sentences
181
+ - Avoid explanations or commentary
207
182
 
208
- Output JSON ONLY:
209
- {
210
- "subQueries": [
211
- "query1",
212
- "query2",
213
- "query3"
214
- ]
215
- }
183
+ Output format (STRICT):
184
+ <FILE_CONTENT>
185
+ query1
186
+ query2
187
+ query3
188
+ </FILE_CONTENT>
216
189
  `.trim();
217
190
  try {
218
191
  const response = await generate({ content: prompt, query: "" });
219
- const cleaned = await cleanupModule.run({
220
- query: userQuery,
221
- content: response.data,
222
- });
223
- if (cleaned.data &&
224
- typeof cleaned.data === "object" &&
225
- Array.isArray(cleaned.data.subQueries)) {
226
- return cleaned.data.subQueries
227
- .filter((q) => typeof q === "string")
228
- .slice(0, 3) // cap to 3 queries
229
- .map((q) => q
230
- .split(' OR ')
231
- .map(term => sanitizeQueryForFts(term)) // sanitize each term individually
232
- .slice(0, 10) // cap terms per query
233
- .join(' OR '));
192
+ const rawText = String(response.data ?? "");
193
+ const { content } = extractTaggedContent(rawText, "FILE_CONTENT");
194
+ const subQueries = content
195
+ .split(/\r?\n/)
196
+ .map(q => sanitizeQueryForFts(q.trim()))
197
+ .filter(Boolean)
198
+ .slice(0, 3);
199
+ if (!subQueries.length) {
200
+ throw new Error("No fallback subqueries generated");
234
201
  }
202
+ return subQueries;
235
203
  }
236
204
  catch (err) {
237
- log(`⚠️ [semanticSearchFiles] Failed to generate fallback queries: ${String(err)}`);
205
+ log(`⚠️ [semanticSearchFiles] Fallback FTS generation failed: ${String(err)}`);
206
+ return null;
238
207
  }
239
- return [];
240
208
  }
241
- // --------------------------------------------------
242
- // PLANNER SEARCH (fileSearchModule, discovery)
243
- // - Never discards FTS
244
- // - LLM ONLY if FTS is empty
245
- // - Optimizes for recall
246
- // --------------------------------------------------
209
+ /* -------------------------------------------------- */
210
+ /* PLANNER SEARCH */
211
+ /* -------------------------------------------------- */
247
212
  export async function plannerSearchFiles(originalQuery, query, topK = 5) {
248
213
  const db = getDbForRepo();
249
214
  const seen = new Map();
250
- // -----------------------------
251
- // Primary FTS (always trusted)
252
- // -----------------------------
253
215
  const safeQuery = sanitizeQueryForFts(query);
254
216
  const primaryResults = db
255
217
  .prepare(sqlTemplates.searchFilesTemplate)
@@ -259,36 +221,31 @@ export async function plannerSearchFiles(originalQuery, query, topK = 5) {
259
221
  safeQuery,
260
222
  count: primaryResults.length,
261
223
  });
262
- // -----------------------------
263
- // Only call LLM if FTS is empty
264
- // -----------------------------
265
224
  if (primaryResults.length === 0) {
266
- const llmTerms = await expandQueryWithModel(originalQuery);
267
- logInputOutput("plannerSearchFiles LLM terms (FTS empty)", "output", {
268
- originalQuery,
269
- suggestedTerms: llmTerms,
270
- });
271
- for (const term of llmTerms) {
272
- const safeTerm = sanitizeQueryForFts(term);
225
+ const expanded = await expandQueryWithModel(originalQuery);
226
+ if (expanded) {
227
+ const safeTerm = sanitizeQueryForFts(expanded);
273
228
  const rows = db
274
229
  .prepare(sqlTemplates.searchFilesTemplate)
275
230
  .all(safeTerm, RELATED_FILES_LIMIT);
276
- for (const row of rows) {
277
- if (!seen.has(row.id))
278
- seen.set(row.id, row);
279
- }
231
+ rows.forEach(r => {
232
+ if (!seen.has(r.id))
233
+ seen.set(r.id, r);
234
+ });
280
235
  }
281
236
  }
282
237
  if (seen.size === 0)
283
238
  return [];
284
239
  return rankAndMap(seen, topK);
285
240
  }
286
- // --------------------------------------------------
287
- // Helpers
288
- // --------------------------------------------------
241
+ /* -------------------------------------------------- */
242
+ /* HELPERS */
243
+ /* -------------------------------------------------- */
289
244
  function rankAndMap(seen, topK) {
290
- const merged = Array.from(seen.values()).sort((a, b) => (a.bm25Score ?? 0) - (b.bm25Score ?? 0));
291
- return merged.slice(0, topK).map(r => ({
245
+ return Array.from(seen.values())
246
+ .sort((a, b) => (a.bm25Score ?? 0) - (b.bm25Score ?? 0))
247
+ .slice(0, topK)
248
+ .map(r => ({
292
249
  id: r.id,
293
250
  path: r.path,
294
251
  filename: r.filename,
@@ -300,32 +257,20 @@ function rankAndMap(seen, topK) {
300
257
  }
301
258
  async function expandQueryWithModel(query) {
302
259
  const prompt = `
303
- You are assisting a code search system.
304
-
305
- Given a natural-language question about a codebase, return a JSON array
306
- of 3–8 concrete search terms that are likely to appear literally in source code.
260
+ Return concrete search terms likely to appear in source code.
307
261
 
308
- Rules:
309
- - Return ONLY a JSON array of strings
310
- - No explanations
311
- - Prefer filenames, function names, symbols, library names
262
+ Wrap the result in <FILE_CONTENT> tags.
312
263
 
313
264
  Question:
314
265
  "${query}"
315
266
  `.trim();
316
267
  try {
317
268
  const response = await generate({ content: prompt, query: "" });
318
- const cleaned = await cleanupModule.run({
319
- query,
320
- content: response.data,
321
- });
322
- const terms = Array.isArray(cleaned.data)
323
- ? cleaned.data.filter((t) => typeof t === "string")
324
- : [];
325
- return terms;
269
+ const rawText = String(response.data ?? "");
270
+ const { content } = extractTaggedContent(rawText, "FILE_CONTENT");
271
+ return sanitizeQueryForFts(content);
326
272
  }
327
- catch (err) {
328
- log(`⚠️ [searchFiles] Failed to expand query: ${String(err)}`);
329
- return [];
273
+ catch {
274
+ return null;
330
275
  }
331
276
  }
@@ -28,6 +28,8 @@ export const finalAnswerModule = {
28
28
  (!focus?.relevantFiles || focus.relevantFiles.includes(path)))
29
29
  .map(([path, fa]) => ({ path, analysis: fa }))
30
30
  .slice(0, MAX_FILES);
31
+ // Collect analyzed files for output
32
+ const analyzedFiles = meaningfulFiles.map(f => f.path);
31
33
  // --------------------------------------------------
32
34
  // 2️⃣ Collect supporting code snippets from working files
33
35
  // --------------------------------------------------
@@ -104,6 +106,9 @@ ${query}
104
106
  Rationale for focus:
105
107
  ${rationale}
106
108
 
109
+ Analyzed files:
110
+ ${analyzedFiles.join("\n")}
111
+
107
112
  ==================== PROPOSED CHANGES ====================
108
113
 
109
114
  ${semanticSection}
@@ -130,17 +135,24 @@ ${codeSection}
130
135
  // 5️⃣ Generate final answer
131
136
  // --------------------------------------------------
132
137
  const aiResponse = await generate({ query, content: prompt });
138
+ // ✅ Prepend analyzed files to finalText so user sees them
133
139
  const finalText = typeof aiResponse.data === "string"
134
- ? aiResponse.data
135
- : JSON.stringify(aiResponse.data, null, 2);
140
+ ? `Analyzed files:\n${analyzedFiles.join("\n")}\n\n${aiResponse.data}`
141
+ : `Analyzed files:\n${analyzedFiles.join("\n")}\n\n${JSON.stringify(aiResponse.data, null, 2)}`;
136
142
  context.analysis || (context.analysis = {});
137
143
  context.analysis.finalAnswer = finalText;
138
- logInputOutput("finalAnswerModule", "output", aiResponse.data);
144
+ logInputOutput("finalAnswerModule", "output", {
145
+ data: aiResponse.data,
146
+ analyzedFiles,
147
+ });
139
148
  console.log(chalk.yellow(`\n\n[FINAL ANSWER]\n${finalText}\n`));
140
149
  return {
141
150
  query,
142
151
  content: finalText,
143
- data: aiResponse.data,
152
+ data: {
153
+ response: aiResponse.data,
154
+ analyzedFiles,
155
+ },
144
156
  context,
145
157
  };
146
158
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "scai",
3
- "version": "0.1.165",
3
+ "version": "0.1.166",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "scai": "./dist/index.js"