supipowers 2.0.2 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/README.md +5 -6
  2. package/package.json +4 -2
  3. package/skills/harness/SKILL.md +1 -0
  4. package/src/bootstrap.ts +5 -133
  5. package/src/config/defaults.ts +5 -5
  6. package/src/config/loader.ts +1 -0
  7. package/src/config/schema.ts +2 -6
  8. package/src/context-mode/knowledge/store.ts +381 -43
  9. package/src/context-mode/tools.ts +41 -3
  10. package/src/deps/registry.ts +1 -12
  11. package/src/fix-pr/assessment.ts +1 -0
  12. package/src/fix-pr/prompt-builder.ts +1 -0
  13. package/src/git/commit.ts +76 -18
  14. package/src/harness/command.ts +103 -6
  15. package/src/harness/default-agents/docs.md +39 -0
  16. package/src/harness/docs/config.ts +29 -0
  17. package/src/harness/docs/glob-match.ts +27 -0
  18. package/src/harness/docs/index-renderer.ts +82 -0
  19. package/src/harness/docs/provenance.ts +125 -0
  20. package/src/harness/docs/regen-decision.ts +167 -0
  21. package/src/harness/docs/representative-files.ts +175 -0
  22. package/src/harness/docs/source-hash.ts +106 -0
  23. package/src/harness/docs/validator.ts +233 -0
  24. package/src/harness/hooks/layer-context-inject.ts +35 -1
  25. package/src/harness/hooks/register.ts +24 -3
  26. package/src/harness/pipeline.ts +20 -5
  27. package/src/harness/pr-comment/baseline.ts +105 -0
  28. package/src/harness/pr-comment/ci-env.ts +120 -0
  29. package/src/harness/pr-comment/gh-poster.ts +227 -0
  30. package/src/harness/pr-comment/handler.ts +198 -0
  31. package/src/harness/pr-comment/render.ts +297 -0
  32. package/src/harness/pr-comment/status.ts +95 -0
  33. package/src/harness/pr-comment/types.ts +73 -0
  34. package/src/harness/pr-comment/workflow-summary.ts +47 -0
  35. package/src/harness/project-paths.ts +95 -0
  36. package/src/harness/stages/design.ts +1 -0
  37. package/src/harness/stages/discover.ts +1 -13
  38. package/src/harness/stages/docs.ts +708 -0
  39. package/src/harness/stages/implement-apply.ts +877 -0
  40. package/src/harness/stages/implement.ts +64 -51
  41. package/src/harness/stages/plan.ts +25 -16
  42. package/src/harness/stages/validate.ts +370 -0
  43. package/src/harness/storage.ts +142 -0
  44. package/src/harness/tools.ts +130 -0
  45. package/src/mempalace/bridge.ts +207 -41
  46. package/src/mempalace/config.ts +10 -4
  47. package/src/mempalace/format.ts +122 -6
  48. package/src/mempalace/hooks.ts +204 -56
  49. package/src/mempalace/installer-helper.ts +18 -4
  50. package/src/mempalace/python/mempalace_bridge.py +128 -3
  51. package/src/mempalace/runtime.ts +53 -16
  52. package/src/mempalace/schema.ts +151 -30
  53. package/src/mempalace/session-summary.ts +5 -0
  54. package/src/mempalace/tool.ts +17 -4
  55. package/src/mempalace/upstream-limits.ts +69 -0
  56. package/src/planning/approval-flow.ts +25 -2
  57. package/src/planning/planning-ask-tool.ts +34 -4
  58. package/src/planning/system-prompt.ts +1 -1
  59. package/src/tool-catalog/active-tool-controller.ts +0 -22
  60. package/src/tool-catalog/active-tool-planner.ts +0 -26
  61. package/src/tool-catalog/tool-groups.ts +1 -9
  62. package/src/types.ts +87 -8
  63. package/src/ui-design/session.ts +114 -8
  64. package/src/utils/executable.ts +10 -1
  65. package/src/workspace/state-paths.ts +1 -1
  66. package/src/commands/mcp.ts +0 -814
  67. package/src/mcp/activation.ts +0 -77
  68. package/src/mcp/config.ts +0 -223
  69. package/src/mcp/docs.ts +0 -154
  70. package/src/mcp/gateway.ts +0 -103
  71. package/src/mcp/lifecycle.ts +0 -79
  72. package/src/mcp/manager-tool.ts +0 -104
  73. package/src/mcp/mcpc.ts +0 -113
  74. package/src/mcp/registry.ts +0 -98
  75. package/src/mcp/triggers.ts +0 -62
  76. package/src/mcp/types.ts +0 -95
@@ -11,6 +11,8 @@ export interface SearchOptions {
11
11
  includeAllSessions?: boolean;
12
12
  }
13
13
 
14
+ export type SearchMatchLayer = "porter" | "trigram" | "rrf" | "rrf-fuzzy";
15
+
14
16
  export interface SearchResult {
15
17
  title: string;
16
18
  body: string;
@@ -19,6 +21,12 @@ export interface SearchResult {
19
21
  score: number;
20
22
  ownerScope: KnowledgeOwnerScope;
21
23
  ownerId: string;
24
+ /** Which layer of the fallback chain surfaced this row. Optional for backward compat. */
25
+ matchLayer?: SearchMatchLayer;
26
+ }
27
+
28
+ interface RankedSearchResult extends SearchResult {
29
+ chunkId: number;
22
30
  }
23
31
 
24
32
  export interface QueryGroupedResults {
@@ -37,7 +45,7 @@ export interface KnowledgeClearResult {
37
45
  urlCacheDeleted: number;
38
46
  }
39
47
 
40
- const SCHEMA_VERSION = 2;
48
+ const SCHEMA_VERSION = 3;
41
49
 
42
50
  const SCHEMA = `
43
51
  CREATE TABLE IF NOT EXISTS content_chunks (
@@ -58,12 +66,26 @@ CREATE VIRTUAL TABLE IF NOT EXISTS content_chunks_fts USING fts5(
58
66
  tokenize='porter'
59
67
  );
60
68
 
69
+ CREATE VIRTUAL TABLE IF NOT EXISTS content_chunks_trigram USING fts5(
70
+ title,
71
+ body,
72
+ content='content_chunks',
73
+ content_rowid='id',
74
+ tokenize='trigram'
75
+ );
76
+
77
+ CREATE TABLE IF NOT EXISTS vocabulary (
78
+ word TEXT PRIMARY KEY
79
+ );
80
+
61
81
  CREATE TRIGGER IF NOT EXISTS content_chunks_ai AFTER INSERT ON content_chunks BEGIN
62
82
  INSERT INTO content_chunks_fts(rowid, title, body) VALUES (new.id, new.title, new.body);
83
+ INSERT INTO content_chunks_trigram(rowid, title, body) VALUES (new.id, new.title, new.body);
63
84
  END;
64
85
 
65
86
  CREATE TRIGGER IF NOT EXISTS content_chunks_ad AFTER DELETE ON content_chunks BEGIN
66
87
  INSERT INTO content_chunks_fts(content_chunks_fts, rowid, title, body) VALUES ('delete', old.id, old.title, old.body);
88
+ INSERT INTO content_chunks_trigram(content_chunks_trigram, rowid, title, body) VALUES ('delete', old.id, old.title, old.body);
67
89
  END;
68
90
 
69
91
  CREATE INDEX IF NOT EXISTS idx_content_chunks_owner ON content_chunks(owner_scope, owner_id);
@@ -106,7 +128,50 @@ export class KnowledgeStore {
106
128
  this.#ensureDeleteJournalMode();
107
129
  this.#migrate();
108
130
  this._db.exec(SCHEMA);
131
+ this.#rebuildFtsIfNeeded();
132
+ this.#backfillVocabularyIfNeeded();
133
+ }
134
+
135
+ #rebuildFtsIfNeeded(): void {
136
+ const chunkCount = this._db.prepare("SELECT COUNT(*) AS cnt FROM content_chunks").get() as { cnt: number };
137
+ if (chunkCount.cnt === 0) return;
138
+
139
+ const ftsCount = this.#countRows("content_chunks_fts");
140
+ const trigramCount = this.#countRows("content_chunks_trigram");
141
+ if (ftsCount === chunkCount.cnt && trigramCount === chunkCount.cnt) return;
142
+
109
143
  this._db.exec("INSERT INTO content_chunks_fts(content_chunks_fts) VALUES('rebuild')");
144
+ this._db.exec("INSERT INTO content_chunks_trigram(content_chunks_trigram) VALUES('rebuild')");
145
+ }
146
+
147
+ #countRows(table: "content_chunks_fts" | "content_chunks_trigram"): number {
148
+ const row = this._db.prepare(`SELECT COUNT(*) AS cnt FROM ${table}`).get() as { cnt: number };
149
+ return row.cnt;
150
+ }
151
+
152
+ /**
153
+ * Populate `vocabulary` from existing chunks when it is empty but the store
154
+ * is not. Runs once on the first init() after a v2 → v3 migration; a no-op
155
+ * for fresh stores (no chunks) and for already-populated stores.
156
+ */
157
+ #backfillVocabularyIfNeeded(): void {
158
+ const vocabCount = this._db.prepare("SELECT COUNT(*) AS cnt FROM vocabulary").get() as { cnt: number };
159
+ if (vocabCount.cnt > 0) return;
160
+
161
+ const chunkCount = this._db.prepare("SELECT COUNT(*) AS cnt FROM content_chunks").get() as { cnt: number };
162
+ if (chunkCount.cnt === 0) return;
163
+
164
+ const ins = this._db.prepare("INSERT OR IGNORE INTO vocabulary (word) VALUES (?)");
165
+ const rows = this._db
166
+ .prepare("SELECT title, body FROM content_chunks")
167
+ .iterate() as IterableIterator<{ title: string; body: string }>;
168
+ this._db.transaction(() => {
169
+ for (const row of rows) {
170
+ for (const word of extractVocabWords(`${row.title}\n${row.body}`)) {
171
+ ins.run(word);
172
+ }
173
+ }
174
+ })();
110
175
  }
111
176
 
112
177
  #ensureDeleteJournalMode(): void {
@@ -176,6 +241,15 @@ export class KnowledgeStore {
176
241
  `);
177
242
  }
178
243
 
244
+ if (user_version < 3) {
245
+ // v2 → v3: the INSERT/DELETE triggers now also fan out to
246
+ // `content_chunks_trigram`. Drop the legacy single-table triggers so the
247
+ // idempotent CREATE TRIGGER IF NOT EXISTS in `SCHEMA` reinstalls the
248
+ // multi-table versions. Trigram + vocab backfill happen in init().
249
+ this._db.exec(`DROP TRIGGER IF EXISTS content_chunks_ai;`);
250
+ this._db.exec(`DROP TRIGGER IF EXISTS content_chunks_ad;`);
251
+ }
252
+
179
253
  this._db.exec(`PRAGMA user_version = ${SCHEMA_VERSION}`);
180
254
  }
181
255
 
@@ -193,6 +267,7 @@ export class KnowledgeStore {
193
267
  `INSERT INTO content_chunks (source, title, body, content_type, owner_scope, owner_id)
194
268
  VALUES (?, ?, ?, ?, ?, ?)`,
195
269
  );
270
+ const vocabIns = this._db.prepare("INSERT OR IGNORE INTO vocabulary (word) VALUES (?)");
196
271
 
197
272
  this._db.transaction(() => {
198
273
  del.run(source, resolvedOwner.ownerScope, resolvedOwner.ownerId, resolvedOwner.ownerScope);
@@ -205,6 +280,9 @@ export class KnowledgeStore {
205
280
  resolvedOwner.ownerScope,
206
281
  resolvedOwner.ownerId,
207
282
  );
283
+ for (const word of extractVocabWords(`${chunk.title}\n${chunk.body}`)) {
284
+ vocabIns.run(word);
285
+ }
208
286
  }
209
287
  })();
210
288
  }
@@ -216,53 +294,126 @@ export class KnowledgeStore {
216
294
  const results: QueryGroupedResults[] = [];
217
295
 
218
296
  for (const query of queries) {
219
- const sanitized = sanitizeFtsQuery(query);
220
- if (!sanitized) {
297
+ const tokens = tokenizeQuery(query);
298
+ if (tokens.length === 0) {
221
299
  results.push({ query, results: [] });
222
300
  continue;
223
301
  }
224
302
 
225
- let sql = `
226
- SELECT c.title, c.body, c.source, c.content_type AS contentType,
227
- c.owner_scope AS ownerScope,
228
- c.owner_id AS ownerId,
229
- bm25(content_chunks_fts, 5.0, 1.0) AS score
230
- FROM content_chunks_fts f
231
- JOIN content_chunks c ON c.id = f.rowid
232
- WHERE content_chunks_fts MATCH ?
233
- `;
234
- const params: (string | number)[] = [sanitized];
235
-
236
- if (options?.source) {
237
- sql += " AND c.source LIKE '%' || ? || '%'";
238
- params.push(options.source);
239
- }
240
- if (options?.contentType) {
241
- sql += " AND c.content_type = ?";
242
- params.push(options.contentType);
243
- }
303
+ const fetchLimit = Math.max(limit * 2, 10);
304
+ const porterRows = this.#runFts("content_chunks_fts", buildOrQuery(tokens), fetchLimit, options);
305
+ const trigramRows = this.#runFts("content_chunks_trigram", buildOrQuery(tokens.filter(t => t.length >= 3)), fetchLimit, options);
244
306
 
245
- const visibility = buildVisibilityClause(options);
246
- if (visibility) {
247
- sql += ` AND ${visibility.sql}`;
248
- params.push(...visibility.params);
249
- }
250
-
251
- sql += " ORDER BY score LIMIT ?";
252
- params.push(limit);
307
+ let fused = rrfFuse(porterRows, trigramRows, limit, "rrf");
253
308
 
254
- try {
255
- const rows = this._db.prepare(sql).all(...params) as SearchResult[];
256
- results.push({ query, results: rows });
257
- } catch {
258
- // FTS5 query syntax error return empty for this query
259
- results.push({ query, results: [] });
309
+ if (fused.length === 0) {
310
+ const corrected = this.#fuzzyCorrectTokens(tokens);
311
+ if (corrected && corrected.join(" ") !== tokens.join(" ")) {
312
+ const porter2 = this.#runFts("content_chunks_fts", buildOrQuery(corrected), fetchLimit, options);
313
+ const trigram2 = this.#runFts("content_chunks_trigram", buildOrQuery(corrected.filter(t => t.length >= 3)), fetchLimit, options);
314
+ fused = rrfFuse(porter2, trigram2, limit, "rrf-fuzzy");
315
+ }
260
316
  }
317
+
318
+ const reranked = applyProximityReranking(fused, tokens);
319
+ results.push({ query, results: reranked });
261
320
  }
262
321
 
263
322
  return results;
264
323
  }
265
324
 
325
+ /**
326
+ * Run one FTS5 MATCH query against `table` with the standard source /
327
+ * contentType / visibility filters. Returns empty on FTS5 syntax errors so
328
+ * a single bad token in a multi-query call cannot break sibling queries.
329
+ */
330
+ #runFts(
331
+ table: "content_chunks_fts" | "content_chunks_trigram",
332
+ matchExpr: string,
333
+ limit: number,
334
+ options: SearchOptions | undefined,
335
+ ): RankedSearchResult[] {
336
+ if (!matchExpr) return [];
337
+
338
+ const sql: string[] = [
339
+ `SELECT c.id AS chunkId, c.title, c.body, c.source, c.content_type AS contentType,`,
340
+ ` c.owner_scope AS ownerScope,`,
341
+ ` c.owner_id AS ownerId,`,
342
+ ` bm25(${table}, 5.0, 1.0) AS score`,
343
+ `FROM ${table} f`,
344
+ `JOIN content_chunks c ON c.id = f.rowid`,
345
+ `WHERE ${table} MATCH ?`,
346
+ ];
347
+ const params: (string | number)[] = [matchExpr];
348
+
349
+ if (options?.source) {
350
+ sql.push("AND c.source LIKE '%' || ? || '%'");
351
+ params.push(options.source);
352
+ }
353
+ if (options?.contentType) {
354
+ sql.push("AND c.content_type = ?");
355
+ params.push(options.contentType);
356
+ }
357
+
358
+ const visibility = buildVisibilityClause(options);
359
+ if (visibility) {
360
+ sql.push(`AND ${visibility.sql}`);
361
+ params.push(...visibility.params);
362
+ }
363
+
364
+ sql.push("ORDER BY score LIMIT ?");
365
+ params.push(limit);
366
+
367
+ try {
368
+ return this._db.prepare(sql.join("\n")).all(...params) as RankedSearchResult[];
369
+ } catch {
370
+ return [];
371
+ }
372
+ }
373
+
374
+ /**
375
+ * Try to repair each token via Levenshtein lookup against `vocabulary`.
376
+ * Returns null when nothing was corrected (caller skips fuzzy retry).
377
+ */
378
+ #fuzzyCorrectTokens(tokens: string[]): string[] | null {
379
+ const candidatesByLen = this._db.prepare(
380
+ "SELECT word FROM vocabulary WHERE length(word) BETWEEN ? AND ?",
381
+ );
382
+ const corrected: string[] = [];
383
+ let changed = false;
384
+ for (const token of tokens) {
385
+ const lower = token.toLowerCase();
386
+ if (lower.length < 3) {
387
+ corrected.push(token);
388
+ continue;
389
+ }
390
+ const maxDist = maxEditDistance(lower.length);
391
+ const candidates = candidatesByLen.all(lower.length - maxDist, lower.length + maxDist) as Array<{ word: string }>;
392
+ let best: string | null = null;
393
+ let bestDist = maxDist + 1;
394
+ let exact = false;
395
+ for (const { word } of candidates) {
396
+ if (word === lower) { exact = true; break; }
397
+ const dist = levenshtein(lower, word);
398
+ if (dist < bestDist) {
399
+ bestDist = dist;
400
+ best = word;
401
+ }
402
+ }
403
+ if (exact) {
404
+ corrected.push(token);
405
+ continue;
406
+ }
407
+ if (best && bestDist <= maxDist) {
408
+ corrected.push(best);
409
+ changed = true;
410
+ } else {
411
+ corrected.push(token);
412
+ }
413
+ }
414
+ return changed ? corrected : null;
415
+ }
416
+
266
417
  purge(): number {
267
418
  const row = this._db.prepare("SELECT COUNT(*) AS cnt FROM content_chunks").get() as {
268
419
  cnt: number;
@@ -270,6 +421,8 @@ export class KnowledgeStore {
270
421
  const count = row.cnt;
271
422
  this._db.exec("DELETE FROM content_chunks");
272
423
  this._db.exec("INSERT INTO content_chunks_fts(content_chunks_fts) VALUES('rebuild')");
424
+ this._db.exec("INSERT INTO content_chunks_trigram(content_chunks_trigram) VALUES('rebuild')");
425
+ this._db.exec("DELETE FROM vocabulary");
273
426
  this._db.exec("DELETE FROM url_cache");
274
427
  return count;
275
428
  }
@@ -325,6 +478,7 @@ export class KnowledgeStore {
325
478
  "DELETE FROM url_cache WHERE owner_scope = 'session' AND owner_id = ?",
326
479
  ).run(ownerId);
327
480
  this._db.exec("INSERT INTO content_chunks_fts(content_chunks_fts) VALUES('rebuild')");
481
+ this._db.exec("INSERT INTO content_chunks_trigram(content_chunks_trigram) VALUES('rebuild')");
328
482
  return { chunksDeleted: chunks.cnt, urlCacheDeleted: urls.cnt };
329
483
  }
330
484
 
@@ -338,6 +492,8 @@ export class KnowledgeStore {
338
492
  this._db.exec("DELETE FROM content_chunks");
339
493
  this._db.exec("DELETE FROM url_cache");
340
494
  this._db.exec("INSERT INTO content_chunks_fts(content_chunks_fts) VALUES('rebuild')");
495
+ this._db.exec("INSERT INTO content_chunks_trigram(content_chunks_trigram) VALUES('rebuild')");
496
+ this._db.exec("DELETE FROM vocabulary");
341
497
  return { chunksDeleted: chunks.cnt, urlCacheDeleted: urls.cnt };
342
498
  }
343
499
 
@@ -420,12 +576,194 @@ function addColumnIfMissing(db: Database, table: string, column: string, definit
420
576
  db.exec(`ALTER TABLE ${table} ADD COLUMN ${column} ${definition}`);
421
577
  }
422
578
 
423
- /** Strip FTS5 special operators to prevent syntax errors. Keep alphanumeric + spaces. */
424
- function sanitizeFtsQuery(query: string): string {
425
- // Remove characters that have special meaning in FTS5: ^, *, ", (, ), {, }, +, -
426
- // Keep words separated by spaces for implicit AND matching
579
+ // ── Tokenization ─────────────────────────────────────────────
580
+
581
+ /** Common English stopwords and noise terms kept out of the vocabulary
582
+ * table so fuzzy correction does not snap rare typos to "the" or "fix". */
583
+ const STOPWORDS = new Set<string>([
584
+ "the", "and", "for", "are", "but", "not", "you", "all", "can", "had",
585
+ "her", "was", "one", "our", "out", "has", "his", "how", "its", "may",
586
+ "new", "now", "old", "see", "way", "who", "did", "get", "got", "let",
587
+ "say", "she", "too", "use", "will", "with", "this", "that", "from",
588
+ "they", "been", "have", "many", "some", "them", "than", "each", "make",
589
+ "like", "just", "over", "such", "take", "into", "year", "your", "good",
590
+ "could", "would", "about", "which", "their", "there", "other", "after",
591
+ "should", "through", "also", "more", "most", "only", "very", "when",
592
+ "what", "then", "these", "those", "being", "does", "done", "both",
593
+ "same", "still", "while", "where", "here", "were", "much",
594
+ "update", "updates", "updated", "deps", "dev", "tests", "test",
595
+ "add", "added", "fix", "fixed", "run", "running", "using",
596
+ ]);
597
+
598
+ /** FTS5 operators we strip from queries to avoid syntax errors. */
599
+ const FTS5_OPERATORS = new Set(["AND", "OR", "NOT", "NEAR"]);
600
+
601
+ /**
602
+ * Split a user query into FTS5-safe tokens.
603
+ *
604
+ * - Unicode-letters, digits, and underscore are token chars (so snake_case
605
+ * stays joined for trigram matching — the porter tokenizer will resplit
606
+ * on underscore at index-time, which is what we want).
607
+ * - Bare FTS5 operators (`AND`, `OR`, `NOT`, `NEAR`) are dropped.
608
+ * - Returns lowercase tokens with no quoting; caller picks AND/OR shape.
609
+ */
610
+ function tokenizeQuery(query: string): string[] {
427
611
  return query
428
- .replace(/[^\p{L}\p{N}\s]/gu, " ")
429
- .replace(/\s+/g, " ")
430
- .trim();
612
+ .replace(/[^\p{L}\p{N}_\s]/gu, " ")
613
+ .split(/\s+/)
614
+ .filter((w) => w.length > 0 && !FTS5_OPERATORS.has(w.toUpperCase()))
615
+ .map((w) => w.toLowerCase());
616
+ }
617
+
618
+ /** Build an FTS5 OR query: each token quoted and joined by " OR ". */
619
+ function buildOrQuery(tokens: string[]): string {
620
+ if (tokens.length === 0) return "";
621
+ return tokens.map((t) => `"${t.replace(/"/g, "")}"`).filter((t) => t !== `""`).join(" OR ");
622
+ }
623
+
624
+ /** Words ≥3 chars, stopword-filtered, lowercased — used for the vocab table. */
625
+ function extractVocabWords(text: string): Set<string> {
626
+ const seen = new Set<string>();
627
+ for (const raw of text.toLowerCase().split(/[^\p{L}\p{N}_-]+/u)) {
628
+ if (raw.length < 3) continue;
629
+ if (STOPWORDS.has(raw)) continue;
630
+ seen.add(raw);
631
+ }
632
+ return seen;
633
+ }
634
+
635
+ // ── Fuzzy correction ─────────────────────────────────────────
636
+
637
+ /** Edit-distance budget by word length — short words tolerate fewer typos. */
638
+ function maxEditDistance(wordLength: number): number {
639
+ if (wordLength <= 4) return 1;
640
+ if (wordLength <= 12) return 2;
641
+ return 3;
642
+ }
643
+
644
+ function levenshtein(a: string, b: string): number {
645
+ if (a.length === 0) return b.length;
646
+ if (b.length === 0) return a.length;
647
+ let prev = Array.from({ length: b.length + 1 }, (_, i) => i);
648
+ for (let i = 1; i <= a.length; i++) {
649
+ const curr = [i];
650
+ for (let j = 1; j <= b.length; j++) {
651
+ curr[j] = a[i - 1] === b[j - 1]
652
+ ? prev[j - 1]
653
+ : 1 + Math.min(prev[j], curr[j - 1], prev[j - 1]);
654
+ }
655
+ prev = curr;
656
+ }
657
+ return prev[b.length];
658
+ }
659
+
660
+ // ── Reciprocal Rank Fusion (Cormack et al. 2009) ────────────
661
+
662
+ /**
663
+ * Fuse two BM25-ranked result lists into one ranking. Standard RRF with
664
+ * K = 60: each result contributes 1/(K + rank) to its chunk-id key,
665
+ * top-`limit` survives, lower RRF score becomes a more negative `score`
666
+ * for downstream ORDER-BY-`score` ascending callers (e.g. tests).
667
+ */
668
+ function rrfFuse(
669
+ porter: RankedSearchResult[],
670
+ trigram: RankedSearchResult[],
671
+ limit: number,
672
+ layer: SearchMatchLayer,
673
+ ): SearchResult[] {
674
+ const K = 60;
675
+ const scoreMap = new Map<number, { result: RankedSearchResult; score: number }>();
676
+ const key = (r: RankedSearchResult) => r.chunkId;
677
+
678
+ for (let i = 0; i < porter.length; i++) {
679
+ const r = porter[i];
680
+ const k = key(r);
681
+ const existing = scoreMap.get(k);
682
+ const contribution = 1 / (K + i + 1);
683
+ if (existing) existing.score += contribution;
684
+ else scoreMap.set(k, { result: r, score: contribution });
685
+ }
686
+ for (let i = 0; i < trigram.length; i++) {
687
+ const r = trigram[i];
688
+ const k = key(r);
689
+ const existing = scoreMap.get(k);
690
+ const contribution = 1 / (K + i + 1);
691
+ if (existing) existing.score += contribution;
692
+ else scoreMap.set(k, { result: r, score: contribution });
693
+ }
694
+
695
+ return Array.from(scoreMap.values())
696
+ .sort((a, b) => b.score - a.score)
697
+ .slice(0, limit)
698
+ .map(({ result, score }) => {
699
+ const { chunkId: _chunkId, ...publicResult } = result;
700
+ return { ...publicResult, score: -score, matchLayer: layer };
701
+ });
702
+ }
703
+
704
+ // ── Proximity reranking ─────────────────────────────────────
705
+
706
+ function findAllPositions(text: string, term: string): number[] {
707
+ if (!term) return [];
708
+ const positions: number[] = [];
709
+ let idx = text.indexOf(term);
710
+ while (idx !== -1) {
711
+ positions.push(idx);
712
+ idx = text.indexOf(term, idx + 1);
713
+ }
714
+ return positions;
715
+ }
716
+
717
+ /**
718
+ * Find the minimum span (window size in chars) covering one position from
719
+ * each list. Sweep-line: advance the pointer at the current minimum.
720
+ */
721
+ function findMinSpan(positionLists: number[][]): number {
722
+ if (positionLists.length === 0) return Infinity;
723
+ if (positionLists.length === 1) return 0;
724
+
725
+ const sorted = positionLists.map((p) => [...p].sort((a, b) => a - b));
726
+ const ptrs = new Array(sorted.length).fill(0);
727
+ let minSpan = Infinity;
728
+
729
+ while (true) {
730
+ let curMin = Infinity;
731
+ let curMax = -Infinity;
732
+ let minIdx = 0;
733
+ for (let i = 0; i < sorted.length; i++) {
734
+ const val = sorted[i][ptrs[i]];
735
+ if (val < curMin) { curMin = val; minIdx = i; }
736
+ if (val > curMax) curMax = val;
737
+ }
738
+ const span = curMax - curMin;
739
+ if (span < minSpan) minSpan = span;
740
+ ptrs[minIdx]++;
741
+ if (ptrs[minIdx] >= sorted[minIdx].length) break;
742
+ }
743
+
744
+ return minSpan;
745
+ }
746
+
747
+ /**
748
+ * For multi-term queries, rerank fused results so that rows where the terms
749
+ * appear close together (small min-span) float to the top. Single-term
750
+ * queries are returned untouched.
751
+ */
752
+ function applyProximityReranking(results: SearchResult[], tokens: string[]): SearchResult[] {
753
+ const terms = tokens.filter((t) => t.length >= 2);
754
+ if (terms.length < 2) return results;
755
+
756
+ return results
757
+ .map((r) => {
758
+ const haystack = r.body.toLowerCase();
759
+ const positions = terms.map((t) => findAllPositions(haystack, t));
760
+ if (positions.some((p) => p.length === 0)) {
761
+ return { result: r, boost: 0 };
762
+ }
763
+ const minSpan = findMinSpan(positions);
764
+ const boost = 1 / (1 + minSpan / Math.max(haystack.length, 1));
765
+ return { result: r, boost };
766
+ })
767
+ .sort((a, b) => b.boost - a.boost || a.result.score - b.result.score)
768
+ .map(({ result }) => result);
431
769
  }
@@ -303,12 +303,30 @@ const AUTO_INDEX_MAX_FILE_BYTES = 256 * 1024;
303
303
  const AUTO_INDEX_MAX_DEPTH = 8;
304
304
  const AUTO_INDEX_MIN_TERM_LEN = 3;
305
305
 
306
- /** Sessions for which we have already attempted bootstrap. Prevents repeat scans. */
306
+ /**
307
+ * Sessions for which bootstrap should NOT run again. A `sessionKey` lands
308
+ * here when bootstrap was either successful (chunks indexed) or definitively
309
+ * barren (no scannable files in the cwd at all) — neither warrants a retry.
310
+ */
307
311
  const _autoIndexAttempted = new Set<string>();
308
312
 
313
+ /**
314
+ * Per-session memoization of failed bootstrap scans, keyed by the query-term
315
+ * fingerprint. A scan that walked files but produced zero indexed chunks is
316
+ * not retried for the same fingerprint, but a different fingerprint (i.e. a
317
+ * reformulated query) is allowed through.
318
+ */
319
+ const _autoIndexNoMatchByQuery = new Map<string, Set<string>>();
320
+
309
321
  /** Reset auto-index attempt tracking. Test-only. */
310
322
  export function _resetAutoIndexAttempts(): void {
311
323
  _autoIndexAttempted.clear();
324
+ _autoIndexNoMatchByQuery.clear();
325
+ }
326
+
327
+ /** Compute the per-query fingerprint used to gate retries on no-match scans. */
328
+ function autoIndexQueryFingerprint(queries: string[]): string {
329
+ return extractIndexTerms(queries).slice().sort().join("|");
312
330
  }
313
331
 
314
332
  /** Drop bootstrap-attempted entries that belong to a closed session. */
@@ -318,6 +336,9 @@ export function _forgetAutoIndexSession(ownerId: string): void {
318
336
  for (const key of _autoIndexAttempted) {
319
337
  if (key.startsWith(prefix)) _autoIndexAttempted.delete(key);
320
338
  }
339
+ for (const key of [..._autoIndexNoMatchByQuery.keys()]) {
340
+ if (key.startsWith(prefix)) _autoIndexNoMatchByQuery.delete(key);
341
+ }
321
342
  }
322
343
 
323
344
  function extractIndexTerms(queries: string[]): string[] {
@@ -736,15 +757,17 @@ export function registerContextModeTools(
736
757
 
737
758
  const allEmpty = results.every((g) => g.results.length === 0);
738
759
  const sessionKey = `${owner.ownerId}|${source ?? ""}`;
760
+ const queryFingerprint = autoIndexQueryFingerprint(queries);
761
+ const failedFingerprints = _autoIndexNoMatchByQuery.get(sessionKey);
739
762
  const canBootstrap =
740
763
  allEmpty &&
741
764
  Array.isArray(queries) &&
742
765
  queries.length > 0 &&
743
766
  !source &&
744
- !_autoIndexAttempted.has(sessionKey);
767
+ !_autoIndexAttempted.has(sessionKey) &&
768
+ !(failedFingerprints?.has(queryFingerprint) ?? false);
745
769
 
746
770
  if (canBootstrap) {
747
- _autoIndexAttempted.add(sessionKey);
748
771
  const stats = store.getStats();
749
772
  if (stats.totalChunks === 0) {
750
773
  const cwd = typeof ctx?.cwd === "string" && ctx.cwd.length > 0 ? ctx.cwd : process.cwd();
@@ -755,15 +778,30 @@ export function registerContextModeTools(
755
778
  bootstrap = { chunksIndexed: 0, filesIndexed: 0, filesScanned: 0 };
756
779
  }
757
780
  if (bootstrap.chunksIndexed > 0) {
781
+ _autoIndexAttempted.add(sessionKey);
758
782
  results = store.search(queries, { source, contentType, limit, owner });
759
783
  bootstrapNote =
760
784
  `[auto-indexed ${bootstrap.filesIndexed} files (${bootstrap.chunksIndexed} chunks) ` +
761
785
  `from ${bootstrap.filesScanned} scanned to bootstrap the empty knowledge store]\n\n`;
762
786
  } else if (bootstrap.filesScanned > 0) {
787
+ // Scan ran but nothing matched this query fingerprint. Memoize by
788
+ // fingerprint so a reformulated query still gets a fresh scan.
789
+ let set = _autoIndexNoMatchByQuery.get(sessionKey);
790
+ if (!set) {
791
+ set = new Set<string>();
792
+ _autoIndexNoMatchByQuery.set(sessionKey, set);
793
+ }
794
+ set.add(queryFingerprint);
763
795
  bootstrapNote =
764
796
  `[scanned ${bootstrap.filesScanned} files but none matched the query terms; ` +
765
797
  `use ctx_batch_execute or ctx_index to index relevant content explicitly]\n\n`;
798
+ } else {
799
+ // No scannable files at all — cwd is barren. Don't retry, period.
800
+ _autoIndexAttempted.add(sessionKey);
766
801
  }
802
+ } else {
803
+ // Store had chunks but the query still missed; record nothing here —
804
+ // the search-side fallback already exhausted its options.
767
805
  }
768
806
  }
769
807
 
@@ -10,7 +10,7 @@ export interface Dependency {
10
10
  name: string;
11
11
  binary: string;
12
12
  required: boolean;
13
- category: "core" | "mcp" | "lsp" | "testing";
13
+ category: "core" | "lsp" | "testing";
14
14
  description: string;
15
15
  checkFn: (exec: ExecFn) => Promise<{ installed: boolean; version?: string }>;
16
16
  installCmd: string | null;
@@ -83,16 +83,6 @@ export const DEPENDENCIES: Dependency[] = [
83
83
  installCmd: null,
84
84
  url: "https://bun.sh",
85
85
  },
86
- {
87
- name: "mcpc",
88
- binary: "mcpc",
89
- required: false,
90
- category: "mcp",
91
- description: "MCP client CLI for server management",
92
- checkFn: (exec) => checkBinary(exec, "mcpc"),
93
- installCmd: "npm install -g @apify/mcpc",
94
- url: "https://github.com/apify/mcpc",
95
- },
96
86
  {
97
87
  name: "TypeScript LSP",
98
88
  binary: "typescript-language-server",
@@ -244,7 +234,6 @@ export function formatReport(
244
234
  // Object.keys preserves insertion order for string keys in all major engines (V8/JSC/SM).
245
235
  const categoryLabels: Record<Dependency["category"], string> = {
246
236
  core: "Core",
247
- mcp: "MCP",
248
237
  lsp: "Language Servers",
249
238
  testing: "Testing",
250
239
  };
@@ -55,6 +55,7 @@ function buildAssessmentPrompt(args: BuildAssessmentPromptArgs): string {
55
55
  "",
56
56
  "Rules:",
57
57
  "- Read the referenced code before assigning a verdict.",
58
+ `- If a comment's diffHunk lacks enough context, read \`pr://${args.repo}/${args.prNumber}/diff/all\`; the changed-file list is \`pr://${args.repo}/${args.prNumber}/diff\`. Use these only for verdict assessment and do not edit during assessment.`,
58
59
  "- Do not perform any code edits. This is a pure assessment pass.",
59
60
  "- One assessment per comment. `commentId` ties back to the PR comment id.",
60
61
  "",
@@ -84,6 +84,7 @@ export function buildFixPrOrchestratorPrompt(options: FixPrPromptOptions): strin
84
84
  `- Session dir: \`${sessionDir}\``,
85
85
  `- Iteration: ${iteration} of ${maxIter}`,
86
86
  `- Selected target: ${selectedTargetLabel}`,
87
+ `- Full PR diff: \`pr://${repo}/${prNumber}/diff/all\`; changed-file list: \`pr://${repo}/${prNumber}/diff\``,
87
88
  `- Comment reply policy: ${config.commentPolicy}`,
88
89
  `- Reviewer: ${reviewer.type}${reviewer.triggerMethod ? ` (trigger: ${reviewer.triggerMethod})` : ""}`,
89
90
  deferredCommentsSummary