scai 0.1.176 → 0.1.177

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  import { queryFiles } from '../db/fileIndex.js';
2
- import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
2
+ import { compileSearchQuery } from '../utils/compileSearchQuery.js';
3
3
  import path from 'path';
4
4
  import os from 'os';
5
5
  export async function runFindCommand(query) {
@@ -8,8 +8,8 @@ export async function runFindCommand(query) {
8
8
  return;
9
9
  }
10
10
  console.log(`\nšŸ” Searching for: "${query}"\n`);
11
- const sanitizedQuery = sanitizeQueryForFts(query);
12
- const results = queryFiles(sanitizedQuery);
11
+ const compiled = compileSearchQuery({ query, mode: "fts" });
12
+ const results = compiled.fts.expression ? queryFiles(compiled.fts.expression) : [];
13
13
  if (results.length === 0) {
14
14
  console.log('āš ļø No matching files found.');
15
15
  return;
@@ -4,7 +4,7 @@ import readline from 'readline';
4
4
  import { queryFiles } from '../db/fileIndex.js';
5
5
  import { summaryModule } from '../pipeline/modules/summaryModule.js';
6
6
  import { detectFileType } from '../fileRules/detectFileType.js';
7
- import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
7
+ import { compileSearchQuery } from '../utils/compileSearchQuery.js';
8
8
  import { styleText } from '../utils/outputFormatter.js';
9
9
  import { indexFile } from '../daemon/runIndexingBatch.js';
10
10
  export async function summarizeFile(filepath) {
@@ -12,8 +12,8 @@ export async function summarizeFile(filepath) {
12
12
  let filePathResolved;
13
13
  // šŸ“ Resolve file path from index or local disk
14
14
  if (filepath) {
15
- const sanitizedQuery = sanitizeQueryForFts(filepath);
16
- const matches = queryFiles(sanitizedQuery);
15
+ const compiled = compileSearchQuery({ query: filepath, mode: "fts" });
16
+ const matches = compiled.fts.expression ? queryFiles(compiled.fts.expression) : [];
17
17
  if (matches.length > 0) {
18
18
  const topMatch = matches[0];
19
19
  filePathResolved = path.resolve(process.cwd(), topMatch.path);
@@ -34,7 +34,8 @@ export async function summarizeFile(filepath) {
34
34
  }
35
35
  // šŸ“„ Load file content (from path or stdin)
36
36
  if (filePathResolved) {
37
- const matches = queryFiles(`"${filePathResolved}"`);
37
+ const compiled = compileSearchQuery({ query: filePathResolved, mode: "fts" });
38
+ const matches = compiled.fts.expression ? queryFiles(compiled.fts.expression) : [];
38
39
  const match = matches.find(row => path.resolve(row.path) === filePathResolved);
39
40
  if (match?.summary) {
40
41
  console.log(`🧠 Cached summary for ${filepath}:\n`);
@@ -15,9 +15,8 @@ import * as sqlTemplates from '../db/sqlTemplates.js';
15
15
  import { RELATED_FILES_LIMIT } from '../constants.js';
16
16
  import { generate } from '../lib/generate.js';
17
17
  import { logInputOutput } from '../utils/promptLogHelper.js';
18
- import { sanitizeQueryForFts } from '../utils/sanitizeQuery.js';
18
+ import { compileSearchQuery } from '../utils/compileSearchQuery.js';
19
19
  import { extractTaggedContent } from '../utils/parseTaggedContent.js';
20
- import { extractFileReferences } from '../utils/extractFileReferences.js';
21
20
  const QUERY_OPERATOR_TOKENS = new Set(["or", "and", "not", "near"]);
22
21
  const GENERIC_FTS_TERMS = new Set([
23
22
  "file",
@@ -659,12 +658,14 @@ export async function plannerSearchFiles(originalQuery, query, topK = 5) {
659
658
  const db = getDbForRepo();
660
659
  const seen = new Map();
661
660
  const usedQueries = [];
662
- const safeQuery = sanitizeQueryForFts(query);
661
+ const compiledPrimary = compileSearchQuery({ query, mode: "fts" });
662
+ const safeQuery = compiledPrimary.fts.expression;
663
663
  if (safeQuery)
664
664
  usedQueries.push(safeQuery);
665
- const primaryResults = db
666
- .prepare(sqlTemplates.searchFilesTemplate)
667
- .all(safeQuery, RELATED_FILES_LIMIT);
665
+ const primaryResults = safeQuery
666
+ ? db.prepare(sqlTemplates.searchFilesTemplate)
667
+ .all(safeQuery, RELATED_FILES_LIMIT)
668
+ : [];
668
669
  primaryResults.forEach(r => seen.set(r.id, r));
669
670
  logInputOutput("plannerSearchFiles primary FTS", "input", {
670
671
  safeQuery,
@@ -673,12 +674,12 @@ export async function plannerSearchFiles(originalQuery, query, topK = 5) {
673
674
  if (primaryResults.length === 0) {
674
675
  const stmt = db.prepare(sqlTemplates.searchFilesTemplate);
675
676
  const llmPrimaryQuery = await generatePrimaryFtsQuery(originalQuery);
676
- const llmFallbackQueries = await generateFallbackFtsQueries(originalQuery, llmPrimaryQuery || safeQuery);
677
+ const llmFallbackQueries = await generateFallbackFtsQueries(originalQuery, llmPrimaryQuery || safeQuery || originalQuery);
677
678
  const candidateQueries = [];
678
679
  const pushCandidate = (q) => {
679
680
  if (!q)
680
681
  return;
681
- const sanitized = sanitizeQueryForFts(q);
682
+ const sanitized = compileSearchQuery({ query: q, mode: "fts" }).fts.expression;
682
683
  if (!sanitized)
683
684
  return;
684
685
  if (sanitized === safeQuery)
@@ -764,8 +765,12 @@ function buildQueryExpansionTerms(originalQuery, queries) {
764
765
  * - output: "\"fileIndex.ts\" OR semanticsearchfiles* OR output*"
765
766
  */
766
767
  function enforceFtsQueryPolicy(userQuery, candidateQuery, maxTerms, intent = {}) {
767
- const safe = sanitizeQueryForFts(candidateQuery);
768
- const candidateTerms = splitOrTerms(safe);
768
+ const candidateTerms = compileSearchQuery({
769
+ query: candidateQuery,
770
+ intent,
771
+ mode: "fts",
772
+ maxTerms,
773
+ }).fts.terms;
769
774
  const prioritizedAnchors = buildAnchorTerms(userQuery, intent);
770
775
  const hasAnchors = prioritizedAnchors.length > 0;
771
776
  const filtered = candidateTerms.filter(term => {
@@ -777,25 +782,17 @@ function enforceFtsQueryPolicy(userQuery, candidateQuery, maxTerms, intent = {})
777
782
  return true;
778
783
  });
779
784
  const merged = dedupeTerms([...prioritizedAnchors, ...filtered]);
780
- return merged.slice(0, maxTerms).join(" OR ");
781
- }
782
- /**
783
- * Splits an OR query into raw terms.
784
- * Example: "\"fileIndex.ts\" OR semanticsearchfiles* OR output*" ->
785
- * ["\"fileIndex.ts\"", "semanticsearchfiles*", "output*"]
786
- */
787
- function splitOrTerms(query) {
788
- return query
789
- .split(/\s+OR\s+/i)
790
- .map(part => part.trim())
791
- .filter(Boolean);
785
+ return merged
786
+ .slice(0, maxTerms)
787
+ .map(term => `${normalizeFtsTerm(term)}*`)
788
+ .join(" OR ");
792
789
  }
793
790
  /**
794
791
  * Normalizes an FTS term for stable comparisons by removing quotes/wildcards.
795
792
  * Example: "\"fileIndex.ts\"" -> "fileindex.ts", "Module*" -> "module"
796
793
  */
797
794
  function normalizeFtsTerm(term) {
798
- return term.replace(/["*]/g, "").toLowerCase().trim();
795
+ return term.toLowerCase().replace(/[^a-z0-9_]/g, "").trim();
799
796
  }
800
797
  /**
801
798
  * Dedupes terms using normalized keys while preserving first-seen order.
@@ -820,54 +817,10 @@ function dedupeTerms(terms) {
820
817
  * - output includes: ["\"fileIndex.ts\"", "semanticsearchfiles*"]
821
818
  */
822
819
  function buildAnchorTerms(userQuery, intent = {}) {
823
- const anchorTerms = [];
824
- const explicitFiles = extractFileReferences(userQuery);
825
- const targetFiles = Array.isArray(intent.targetFiles)
826
- ? dedupeNormalizedStrings(intent.targetFiles)
827
- : [];
828
- const targetSymbols = Array.isArray(intent.targetSymbols)
829
- ? dedupeNormalizedStrings(intent.targetSymbols)
830
- : [];
831
- for (const fileRef of targetFiles) {
832
- for (const matchedFile of extractFileReferences(fileRef)) {
833
- const filename = path.basename(matchedFile);
834
- if (filename)
835
- anchorTerms.push(`"${filename}"`);
836
- }
837
- }
838
- for (const fileRef of explicitFiles) {
839
- const filename = path.basename(fileRef);
840
- if (filename)
841
- anchorTerms.push(`"${filename}"`);
842
- }
843
- for (const symbol of targetSymbols) {
844
- for (const form of buildSymbolSearchForms(symbol)) {
845
- const normalized = normalizeFtsTerm(form);
846
- if (!normalized)
847
- continue;
848
- if (QUERY_OPERATOR_TOKENS.has(normalized))
849
- continue;
850
- if (GENERIC_FTS_TERMS.has(normalized))
851
- continue;
852
- anchorTerms.push(`${normalized}*`);
853
- }
854
- }
855
- const symbolMatches = userQuery.match(/[A-Za-z_][A-Za-z0-9_]*/g) ?? [];
856
- for (const token of symbolMatches) {
857
- const isLikelySymbol = /[A-Z]/.test(token) ||
858
- token.includes("_") ||
859
- token.endsWith("Step") ||
860
- token.endsWith("Module");
861
- if (!isLikelySymbol)
862
- continue;
863
- const normalized = token.toLowerCase();
864
- if (QUERY_OPERATOR_TOKENS.has(normalized))
865
- continue;
866
- if (GENERIC_FTS_TERMS.has(normalized))
867
- continue;
868
- anchorTerms.push(`${normalized}*`);
869
- }
870
- return dedupeTerms(anchorTerms);
820
+ const compiled = compileSearchQuery({ query: userQuery, intent, mode: "fts" });
821
+ const fileTerms = compiled.anchors.files.flatMap(fileRef => compileSearchQuery({ query: path.basename(fileRef), mode: "fts", maxTerms: 8 }).fts.terms);
822
+ const symbolTerms = compiled.anchors.symbols.flatMap(symbol => compileSearchQuery({ query: symbol, mode: "fts", maxTerms: 8 }).fts.terms);
823
+ return dedupeTerms([...fileTerms, ...symbolTerms]);
871
824
  }
872
825
  function dedupeNormalizedStrings(tokens) {
873
826
  // Lightweight normalization only (trim + slash normalization + case-insensitive dedupe).
@@ -909,7 +862,7 @@ Question:
909
862
  const response = await generate({ content: prompt, query: "" });
910
863
  const rawText = String(response.data ?? "");
911
864
  const { content } = extractTaggedContent(rawText, "FILE_CONTENT");
912
- return sanitizeQueryForFts(content);
865
+ return compileSearchQuery({ query: content, mode: "fts" }).fts.expression;
913
866
  }
914
867
  catch {
915
868
  return null;
@@ -8,6 +8,7 @@ import { getDbForRepo } from "../../db/client.js";
8
8
  import { IGNORED_EXTENSIONS } from "../../fileRules/ignoredExtensions.js";
9
9
  import { IGNORED_DIR_NAMES, isPathIgnoredByFolderGlobs } from "../../fileRules/ignoredPaths.js";
10
10
  import { logInputOutput } from "../../utils/promptLogHelper.js";
11
+ import { compileSearchQuery } from "../../utils/compileSearchQuery.js";
11
12
  async function fetchSummariesForPaths(paths) {
12
13
  if (paths.length === 0)
13
14
  return {};
@@ -66,8 +67,25 @@ export const fileSearchModule = {
66
67
  try {
67
68
  const excludeExtArgs = IGNORED_EXTENSIONS.map(ext => `--exclude=*${ext}`);
68
69
  const excludeDirArgs = IGNORED_DIR_NAMES.map(dir => `--exclude-dir=${dir}`);
69
- const grepArgs = ["-ril", ...excludeDirArgs, ...excludeExtArgs, subQuery, repoRoot];
70
- const stdout = execFileSync("grep", grepArgs, { encoding: "utf8" });
70
+ const compiledFallback = compileSearchQuery({ query: subQuery, mode: "auto" });
71
+ const rgExcludeExtArgs = excludeExtArgs.map(arg => {
72
+ const value = arg.replace("--exclude=", "");
73
+ return `!${value}`;
74
+ });
75
+ const rgExcludeDirArgs = excludeDirArgs.map(arg => {
76
+ const value = arg.replace("--exclude-dir=", "");
77
+ return `!${value}/**`;
78
+ });
79
+ const rgArgs = [
80
+ "--files-with-matches",
81
+ "--no-messages",
82
+ ...rgExcludeDirArgs.flatMap(pattern => ["-g", pattern]),
83
+ ...rgExcludeExtArgs.flatMap(pattern => ["-g", pattern]),
84
+ ...(compiledFallback.regex.enabled ? [] : ["-F"]),
85
+ subQuery,
86
+ repoRoot,
87
+ ];
88
+ const stdout = execFileSync("rg", rgArgs, { encoding: "utf8" });
71
89
  results = stdout
72
90
  .split("\n")
73
91
  .filter(Boolean)
@@ -1,7 +1,7 @@
1
1
  // src/pipeline/modules/gatherInfoModule.ts
2
2
  import { getDbForRepo } from "../../db/client.js";
3
3
  import chalk from "chalk";
4
- import { sanitizeQueryForFts } from "../../utils/sanitizeQuery.js";
4
+ import { compileSearchQuery } from "../../utils/compileSearchQuery.js";
5
5
  import { logInputOutput } from "../../utils/promptLogHelper.js"; // āœ… import logger
6
6
  /** Escape % and _ for LIKE queries */
7
7
  function sanitizeForLike(input) {
@@ -32,13 +32,15 @@ export const gatherInfoModule = {
32
32
  logInputOutput("gatherInfo", "output", stripEmbeddings(emptyOutput)); // āœ…
33
33
  return emptyOutput;
34
34
  }
35
- const sanitizedFts = sanitizeQueryForFts(query);
35
+ const compiled = compileSearchQuery({ query, mode: "fts" });
36
+ const sanitizedFts = compiled.fts.expression;
36
37
  const likeQuery = `%${sanitizeForLike(query)}%`;
37
38
  // 🩹 Handle legacy DBs that might not have `functions_extracted`
38
39
  let files = [];
39
40
  try {
40
- files = db
41
- .prepare(`
41
+ if (sanitizedFts) {
42
+ files = db
43
+ .prepare(`
42
44
  SELECT f.id, f.path, f.type, f.summary, f.embedding,
43
45
  f.last_modified, f.indexed_at,
44
46
  COALESCE(f.functions_extracted, 0) AS functions_extracted,
@@ -49,12 +51,14 @@ export const gatherInfoModule = {
49
51
  ORDER BY f.path ASC
50
52
  LIMIT ?;
51
53
  `)
52
- .all(maxFiles);
54
+ .all(maxFiles);
55
+ }
53
56
  }
54
57
  catch (err) {
55
58
  console.warn(chalk.yellow("āš ļø 'functions_extracted' column missing in files table, running fallback query..."));
56
- files = db
57
- .prepare(`
59
+ if (sanitizedFts) {
60
+ files = db
61
+ .prepare(`
58
62
  SELECT f.id, f.path, f.type, f.summary, f.embedding,
59
63
  f.last_modified, f.indexed_at, f.processing_status
60
64
  FROM files f
@@ -63,7 +67,8 @@ export const gatherInfoModule = {
63
67
  ORDER BY f.path ASC
64
68
  LIMIT ?;
65
69
  `)
66
- .all(maxFiles);
70
+ .all(maxFiles);
71
+ }
67
72
  }
68
73
  const functions = db
69
74
  .prepare(`
@@ -0,0 +1,138 @@
1
+ import path from "path";
2
+ import { STOP_WORDS } from "../fileRules/stopWords.js";
3
+ import { extractFileReferences } from "./extractFileReferences.js";
4
+ const QUERY_OPERATOR_TOKENS = new Set(["or", "and", "not", "near"]);
5
+ const GENERIC_FTS_TERMS = new Set([
6
+ "file",
7
+ "files",
8
+ "code",
9
+ "source",
10
+ "repository",
11
+ "result",
12
+ "results",
13
+ "output",
14
+ "entry",
15
+ "database",
16
+ "configuration",
17
+ ]);
18
+ const SYMBOL_HINT_SUFFIXES = ["step", "module", "service", "controller", "handler", "manager"];
19
+ export function compileSearchQuery(args) {
20
+ const rawQuery = String(args.query ?? "").trim();
21
+ const intent = args.intent ?? {};
22
+ const mode = args.mode ?? "auto";
23
+ const maxTerms = Math.max(1, args.maxTerms ?? 12);
24
+ const fileRefs = dedupeNormalizedStrings([
25
+ ...extractFileReferences(rawQuery),
26
+ ...collectIntentFileRefs(intent),
27
+ ]);
28
+ const symbolRefs = dedupeNormalizedStrings([
29
+ ...collectIntentSymbols(intent),
30
+ ...extractLikelySymbols(rawQuery),
31
+ ]);
32
+ const anchorFileTerms = fileRefs.flatMap(ref => toSearchTerms(path.basename(ref)));
33
+ const anchorSymbolTerms = symbolRefs.flatMap(ref => toSearchTerms(ref));
34
+ const baseTerms = dedupeNormalizedStrings([
35
+ ...toSearchTerms(rawQuery),
36
+ ...anchorFileTerms,
37
+ ...anchorSymbolTerms,
38
+ ]);
39
+ const droppedTerms = [];
40
+ const filteredTerms = baseTerms.filter(term => {
41
+ if (term.length < 2) {
42
+ droppedTerms.push(term);
43
+ return false;
44
+ }
45
+ if (STOP_WORDS.has(term) || QUERY_OPERATOR_TOKENS.has(term)) {
46
+ droppedTerms.push(term);
47
+ return false;
48
+ }
49
+ return true;
50
+ });
51
+ const hasAnchors = anchorFileTerms.length > 0 || anchorSymbolTerms.length > 0;
52
+ const ftsTerms = dedupeNormalizedStrings(filteredTerms.filter(term => !(hasAnchors && GENERIC_FTS_TERMS.has(term)))).slice(0, maxTerms);
53
+ const literalTerms = dedupeNormalizedStrings([
54
+ rawQuery,
55
+ ...fileRefs,
56
+ ...symbolRefs,
57
+ ...filteredTerms,
58
+ ]).filter(Boolean);
59
+ const regexEnabled = mode === "regex" || (mode === "auto" && looksLikeRegex(rawQuery));
60
+ return {
61
+ rawQuery,
62
+ mode,
63
+ anchors: {
64
+ files: fileRefs,
65
+ symbols: symbolRefs,
66
+ },
67
+ fts: {
68
+ terms: ftsTerms,
69
+ expression: ftsTerms.length > 0 ? ftsTerms.map(term => `${term}*`).join(" OR ") : null,
70
+ },
71
+ literal: {
72
+ terms: literalTerms,
73
+ },
74
+ regex: {
75
+ terms: regexEnabled ? [rawQuery].filter(Boolean) : [],
76
+ enabled: regexEnabled,
77
+ },
78
+ diagnostics: {
79
+ droppedTerms,
80
+ },
81
+ };
82
+ }
83
+ function collectIntentFileRefs(intent) {
84
+ if (!Array.isArray(intent.targetFiles))
85
+ return [];
86
+ return intent.targetFiles
87
+ .map(entry => String(entry ?? "").trim())
88
+ .filter(Boolean);
89
+ }
90
+ function collectIntentSymbols(intent) {
91
+ if (!Array.isArray(intent.targetSymbols))
92
+ return [];
93
+ return intent.targetSymbols
94
+ .map(entry => String(entry ?? "").trim())
95
+ .filter(Boolean);
96
+ }
97
+ function dedupeNormalizedStrings(tokens) {
98
+ const out = [];
99
+ const seen = new Set();
100
+ for (const token of tokens) {
101
+ if (typeof token !== "string")
102
+ continue;
103
+ const normalized = token.trim().replace(/\\/g, "/");
104
+ if (!normalized)
105
+ continue;
106
+ const key = normalized.toLowerCase();
107
+ if (seen.has(key))
108
+ continue;
109
+ seen.add(key);
110
+ out.push(normalized);
111
+ }
112
+ return out;
113
+ }
114
+ function extractLikelySymbols(input) {
115
+ const matches = String(input ?? "").match(/[A-Za-z_][A-Za-z0-9_]{2,}/g) ?? [];
116
+ const out = new Set();
117
+ for (const token of matches) {
118
+ const lowered = token.toLowerCase();
119
+ const looksLikeSymbol = /[A-Z]/.test(token) ||
120
+ token.includes("_") ||
121
+ SYMBOL_HINT_SUFFIXES.some(suffix => lowered.endsWith(suffix));
122
+ if (!looksLikeSymbol)
123
+ continue;
124
+ out.add(token);
125
+ }
126
+ return Array.from(out);
127
+ }
128
+ function toSearchTerms(input) {
129
+ const matches = String(input ?? "")
130
+ .toLowerCase()
131
+ .match(/[a-z0-9_]{2,}/g) ?? [];
132
+ return Array.from(new Set(matches));
133
+ }
134
+ function looksLikeRegex(input) {
135
+ if (!input)
136
+ return false;
137
+ return /[\[\]{}()+?|\\.^$]/.test(input);
138
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "scai",
3
- "version": "0.1.176",
3
+ "version": "0.1.177",
4
4
  "type": "module",
5
5
  "bin": {
6
6
  "scai": "./dist/index.js"