npm - @pi-unipi/compactor - Versions diffs - 0.1.7 → 0.2.2 - Mend

@pi-unipi/compactor 0.1.7 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.md +50 -24
package/index.ts +7 -0
package/package.json +4 -2
package/skills/compactor/SKILL.md +21 -65
package/skills/compactor-detail/SKILL.md +133 -0
package/src/commands/index.ts +186 -109
package/src/compaction/filter-noise.ts +4 -3
package/src/compaction/hooks.ts +25 -6
package/src/compaction/search-entries.ts +51 -4
package/src/config/manager.ts +55 -6
package/src/config/presets.ts +69 -5
package/src/config/schema.ts +10 -1
package/src/display/diff-presentation.ts +6 -1
package/src/display/diff-renderer.ts +34 -8
package/src/display/diff-width-safety.ts +83 -0
package/src/display/line-width-safety.ts +14 -2
package/src/index.ts +297 -16
package/src/info-screen.ts +137 -46
package/src/security/policy.ts +23 -0
package/src/session/analytics.ts +198 -0
package/src/session/auto-inject.ts +60 -0
package/src/session/db.ts +68 -8
package/src/session/resume-inject.ts +13 -1
package/src/store/db-base.ts +11 -0
package/src/store/index.ts +150 -4
package/src/store/unified.ts +109 -0
package/src/tools/context-budget.ts +50 -0
package/src/tools/ctx-batch-execute.ts +2 -5
package/src/tools/ctx-fetch-and-index.ts +3 -8
package/src/tools/ctx-index.ts +3 -9
package/src/tools/ctx-search.ts +3 -7
package/src/tools/ctx-stats.ts +6 -4
package/src/tools/register.ts +251 -216
package/src/tui/settings-overlay.ts +359 -149
package/src/types.ts +30 -7
package/skills/compactor-ops/SKILL.md +0 -65
package/skills/compactor-tools/SKILL.md +0 -120

package/src/store/index.ts CHANGED Viewed

@@ -8,6 +8,7 @@ import { loadSQLite, applyWALPragmas, withRetry, isSQLiteCorruptionError, defaul
 import type { PreparedStatement } from "./db-base.js";
 import { autoChunk } from "./chunking.js";
 import type { IndexResult, SearchResult, StoreStats } from "../types.js";
+import { loadConfig } from "../config/manager.js";
 // --- Fuzzy correction ---
@@ -131,6 +132,118 @@ function rrfMerge(
     .map((s) => ({ ...s.result, rank: s.score }));
 }
+// ── Proximity Reranking (from context-mode) ──────────────────
+/** Find all character positions of a term in text */
+function findAllPositions(text: string, term: string): number[] {
+  const positions: number[] = [];
+  let idx = text.indexOf(term);
+  while (idx !== -1) {
+    positions.push(idx);
+    idx = text.indexOf(term, idx + 1);
+  }
+  return positions;
+}
+/** Sweep-line algorithm to find minimum span covering all terms */
+function findMinSpan(positionLists: number[][]): number {
+  if (positionLists.length === 0) return Infinity;
+  if (positionLists.length === 1) return 0;
+  const sorted = positionLists.map((p) => [...p].sort((a, b) => a - b));
+  const ptrs = new Array(sorted.length).fill(0);
+  let minSpan = Infinity;
+  while (true) {
+    let curMin = Infinity;
+    let curMax = -Infinity;
+    let minIdx = 0;
+    for (let i = 0; i < sorted.length; i++) {
+      const val = sorted[i][ptrs[i]];
+      if (val < curMin) { curMin = val; minIdx = i; }
+      if (val > curMax) { curMax = val; }
+    }
+    const span = curMax - curMin;
+    if (span < minSpan) minSpan = span;
+    ptrs[minIdx]++;
+    if (ptrs[minIdx] >= sorted[minIdx].length) break;
+  }
+  return minSpan;
+}
+/** Count adjacent term pairs within a character gap */
+function countAdjacentPairs(
+  positionLists: number[][],
+  terms: string[],
+  gap: number = 30,
+): number {
+  if (positionLists.length < 2 || terms.length < 2) return 0;
+  let total = 0;
+  const pairs = Math.min(positionLists.length, terms.length) - 1;
+  for (let i = 0; i < pairs; i++) {
+    const left = positionLists[i];
+    const right = positionLists[i + 1];
+    const leftLen = terms[i].length;
+    let j = 0;
+    for (const p of left) {
+      const minStart = p + leftLen;
+      const maxStart = minStart + gap;
+      while (j < right.length && right[j] < minStart) j++;
+      if (j < right.length && right[j] <= maxStart) {
+        total++;
+        j++;
+      }
+    }
+  }
+  return total;
+}
+/** Apply proximity reranking to RRF results */
+function applyProximityReranking(
+  results: SearchResult[],
+  query: string,
+): SearchResult[] {
+  const allTerms = query
+    .toLowerCase()
+    .split(/\s+/)
+    .filter((w) => w.length >= 2);
+  const filtered = allTerms.filter((w) => !STOPWORDS.has(w));
+  const terms = filtered.length > 0 ? filtered : allTerms;
+  if (terms.length < 2) return results; // Single-term queries skip proximity
+  const scored = results.map((r) => {
+    const titleLower = r.title.toLowerCase();
+    const titleHits = terms.filter((t) => titleLower.includes(t)).length;
+    const titleWeight = r.contentType === "code" ? 0.6 : 0.3;
+    const titleBoost = titleHits > 0 ? titleWeight * (titleHits / terms.length) : 0;
+    let proximityBoost = 0;
+    let phraseBoost = 0;
+    const content = r.content.toLowerCase();
+    const positions = terms.map((t) => findAllPositions(content, t));
+    if (!positions.some((p) => p.length === 0)) {
+      const minSpan = findMinSpan(positions);
+      proximityBoost = 1 / (1 + minSpan / Math.max(content.length, 1));
+      const adjacentPairs = countAdjacentPairs(positions, terms);
+      phraseBoost = 0.5 * Math.min(1, adjacentPairs / 4);
+    }
+    return { result: r, boost: titleBoost + proximityBoost + phraseBoost };
+  });
+  return scored
+    .sort((a, b) => b.boost - a.boost || a.result.rank - b.result.rank)
+    .map((s) => s.result);
+}
 const STOPWORDS = new Set([
   "the", "and", "for", "are", "but", "not", "you", "all", "can", "had",
   "her", "was", "one", "our", "out", "has", "his", "how", "its", "may",
@@ -176,6 +289,7 @@ export class ContentStore {
   private stmts: Map<string, PreparedStatement> = new Map();
   private dbPath: string;
   private ready = false;
+  private writeCount = 0;
   constructor(opts?: { dbPath?: string }) {
     this.dbPath = opts?.dbPath ?? defaultDBPath("content");
@@ -227,6 +341,7 @@ export class ContentStore {
     p("deleteByLabel", `DELETE FROM content_fts WHERE label = ?`);
     p("insertSource", `INSERT INTO content_sources (label, source, content_type, mtime, sha256, chunk_count) VALUES (?, ?, ?, ?, ?, ?) ON CONFLICT(label) DO UPDATE SET source=excluded.source, content_type=excluded.content_type, mtime=excluded.mtime, sha256=excluded.sha256, chunk_count=excluded.chunk_count, indexed_at=datetime('now')`);
     p("getSource", `SELECT label, source, content_type, mtime, sha256, chunk_count, indexed_at FROM content_sources WHERE label = ?`);
+    p("getSourceMeta", `SELECT label, chunk_count, indexed_at FROM content_sources WHERE label = ?`);
     p("deleteSource", `DELETE FROM content_sources WHERE label = ?`);
     p("countSources", `SELECT COUNT(*) AS cnt FROM content_sources`);
     p("countFTS", `SELECT COUNT(*) AS cnt FROM content_fts`);
@@ -271,6 +386,7 @@ export class ContentStore {
     });
     withRetry(() => transaction());
+    this.afterWrite();
     return { sourceId: 1, label, totalChunks: chunks.length, codeChunks };
   }
@@ -312,7 +428,13 @@ export class ContentStore {
     // RRF fusion
     const rrfResults = rrfMerge(porterResults, trigramResults);
-    if (mode === "rrf") return rrfResults.slice(0, limit);
+    // Apply proximity reranking to all RRF results (if enabled)
+    const config = loadConfig();
+    const rerankedResults = config.pipeline.proximityReranking
+      ? applyProximityReranking(rrfResults, query)
+      : rrfResults;
+    if (mode === "rrf") return rerankedResults.slice(0, limit);
     // Fuzzy mode: apply fuzzy correction to query terms
     const vocab = buildVocabulary(allRows);
@@ -336,11 +458,11 @@ export class ContentStore {
         matchLayer: "fuzzy" as const,
         rank: r.rank * 0.9, // slightly lower confidence
       }));
-      const merged = rrfMerge(rrfResults, correctedResults);
-      return merged.slice(0, limit);
+      const merged = rrfMerge(rerankedResults, correctedResults);
+      return applyProximityReranking(merged, query).slice(0, limit);
     }
-    return rrfResults.slice(0, limit);
+    return rerankedResults.slice(0, limit);
   }
   async getStats(): Promise<StoreStats> {
@@ -354,13 +476,37 @@ export class ContentStore {
     };
   }
+  /** Get source metadata for TTL cache check */
+  getSourceMeta(label: string): { label: string; chunkCount: number; indexedAt: string } | null {
+    const row = this.stmt("getSourceMeta").get(label) as { label: string; chunk_count: number; indexed_at: string } | undefined;
+    if (!row) return null;
+    return { label: row.label, chunkCount: row.chunk_count, indexedAt: row.indexed_at };
+  }
   async purge(): Promise<number> {
     if (!this.ready) await this.init();
     this.db.exec(`DELETE FROM content_fts; DELETE FROM content_sources;`);
+    this.afterWrite();
     const row = this.stmt("countSources").get() as { cnt: number };
     return row.cnt;
   }
+  /** Run WAL checkpoint to prevent unbounded WAL file growth. */
+  checkpointWAL(mode: "PASSIVE" | "TRUNCATE" = "PASSIVE"): void {
+    if (!this.db) return;
+    try {
+      this.db.exec(`PRAGMA wal_checkpoint(${mode});`);
+    } catch { /* ignore */ }
+  }
+  /** Increment write counter and trigger PASSIVE checkpoint every 10th write. */
+  private afterWrite(): void {
+    this.writeCount++;
+    if (this.writeCount % 10 === 0) {
+      this.checkpointWAL("PASSIVE");
+    }
+  }
   close(): void {
     try { this.db.close(); } catch { /* ignore */ }
   }

package/src/store/unified.ts ADDED Viewed

@@ -0,0 +1,109 @@
+/**
+ * Unified search across ContentStore + SessionDB events
+ * Supports timeline (chronological) and relevance sorting
+ * (from context-mode src/search/unified.ts)
+ */
+import type { ContentStore } from "./index.js";
+import type { SessionDB } from "../session/db.js";
+import type { SearchResult } from "../types.js";
+export interface UnifiedSearchResult {
+  title: string;
+  content: string;
+  source: string;
+  origin: "current-session" | "prior-session";
+  timestamp: string;
+  rank: number;
+  matchLayer: string;
+  contentType: "prose" | "code";
+}
+export interface UnifiedSearchOptions {
+  query: string;
+  limit?: number;
+  sort?: "relevance" | "timeline";
+  source?: string;
+  contentType?: string;
+  projectDir?: string;
+}
+/**
+ * Search across multiple sources and optionally sort chronologically.
+ * - relevance: ContentStore only, ranked by RRF
+ * - timeline: ContentStore + SessionDB events, sorted by timestamp
+ */
+export async function searchAllSources(
+  store: ContentStore,
+  sessionDB: SessionDB | null,
+  opts: UnifiedSearchOptions,
+): Promise<UnifiedSearchResult[]> {
+  const limit = opts.limit ?? 10;
+  const sort = opts.sort ?? "relevance";
+  const sessionStartTime = new Date().toISOString();
+  const results: UnifiedSearchResult[] = [];
+  // Source 1: ContentStore (always, both modes)
+  try {
+    const storeResults = await store.search(opts.query, { limit });
+    results.push(
+      ...storeResults.map((r) => ({
+        title: r.title,
+        content: r.content,
+        source: r.source,
+        origin: "current-session" as const,
+        timestamp: sessionStartTime, // ContentStore doesn't track per-result timestamps yet
+        rank: r.rank,
+        matchLayer: r.matchLayer ?? "porter",
+        contentType: r.contentType,
+      })),
+    );
+  } catch {
+    // ContentStore search failed — continue with other sources
+  }
+  // Source 2: SessionDB events (timeline mode only)
+  if (sort === "timeline" && sessionDB) {
+    try {
+      const sessionId = opts.projectDir ?? "";
+      const events = sessionDB.getEvents(sessionId, { limit: 100 });
+      const queryLower = opts.query.toLowerCase();
+      const matchingEvents = events.filter((e) => {
+        const data = String(e.data ?? "").toLowerCase();
+        const type = String(e.type ?? "").toLowerCase();
+        const category = String(e.category ?? "").toLowerCase();
+        return data.includes(queryLower) || type.includes(queryLower) || category.includes(queryLower);
+      });
+      results.push(
+        ...matchingEvents.slice(0, limit).map((e) => ({
+          title: `[${e.category}] ${e.type}`,
+          content: String(e.data ?? "").slice(0, 500),
+          source: "prior-session",
+          origin: "prior-session" as const,
+          timestamp: e.created_at ?? sessionStartTime,
+          rank: 0,
+          matchLayer: "event",
+          contentType: "prose" as const,
+        })),
+      );
+    } catch {
+      // SessionDB search failed — continue
+    }
+  }
+  // Normalize SQLite datetime format to ISO 8601
+  for (const r of results) {
+    if (r.timestamp && !r.timestamp.includes("T")) {
+      r.timestamp = r.timestamp.replace(" ", "T") + "Z";
+    }
+  }
+  // Sort: timeline = chronological, relevance = by rank
+  if (sort === "timeline") {
+    results.sort((a, b) => (a.timestamp || "").localeCompare(b.timestamp || ""));
+  }
+  return results.slice(0, limit);
+}

package/src/tools/context-budget.ts ADDED Viewed

@@ -0,0 +1,50 @@
+/**
+ * context_budget tool — estimate remaining context window
+ */
+export interface ContextBudgetResult {
+  percentFull: number;
+  remainingTokens: number;
+  totalTokens: number;
+  message: string;
+  advice: string;
+}
+export function estimateContextBudget(
+  tokensBefore?: number,
+  contextWindowSize?: number,
+): ContextBudgetResult | null {
+  const windowSize = contextWindowSize ?? 200000; // Default 200K context
+  const used = tokensBefore ?? 0;
+  if (used <= 0 && tokensBefore === undefined) return null;
+  const remaining = Math.max(0, windowSize - used);
+  const percentFull = windowSize > 0 ? Math.round((used / windowSize) * 100) : 0;
+  let advice: string;
+  if (percentFull >= 90) {
+    advice = "CRITICAL: Compact immediately. Very little room for complex tasks.";
+  } else if (percentFull >= 75) {
+    advice = "Context is filling up. Compact before starting complex work.";
+  } else if (percentFull >= 50) {
+    advice = "Moderate context usage. Compact before large multi-step tasks.";
+  } else {
+    advice = "Context has plenty of room. No compaction needed yet.";
+  }
+  const message = `Context: ~${percentFull}% full (estimated ${remaining.toLocaleString()} tokens remaining)`;
+  return { percentFull, remainingTokens: remaining, totalTokens: windowSize, message, advice };
+}
+/**
+ * The context_budget tool handler.
+ * Called from the tool registration — receives tokensBefore from Pi context.
+ */
+export function contextBudgetTool(tokensBefore?: number): string {
+  const budget = estimateContextBudget(tokensBefore);
+  if (!budget) return "Context budget: Unknown (no token data available from session).";
+  return `${budget.message}\nAdvice: ${budget.advice}`;
+}

package/src/tools/ctx-batch-execute.ts CHANGED Viewed

@@ -3,7 +3,7 @@
  */
 import { PolyglotExecutor } from "../executor/executor.js";
-import { ContentStore } from "../store/index.js";
+import type { ContentStore } from "../store/index.js";
 import type { Language, ExecResult, SearchResult } from "../types.js";
 export interface BatchCommand {
@@ -28,11 +28,9 @@ export interface BatchResult {
   >;
 }
-export async function ctxBatchExecute(items: BatchItem[]): Promise<BatchResult> {
+export async function ctxBatchExecute(store: ContentStore, items: BatchItem[]): Promise<BatchResult> {
   const results: BatchResult["results"] = [];
   const executor = new PolyglotExecutor();
-  const store = new ContentStore();
-  await store.init();
   for (const item of items) {
     if (item.type === "execute") {
@@ -48,6 +46,5 @@ export async function ctxBatchExecute(items: BatchItem[]): Promise<BatchResult>
     }
   }
-  store.close();
   return { results };
 }

package/src/tools/ctx-fetch-and-index.ts CHANGED Viewed

@@ -2,7 +2,7 @@
  * ctx_fetch_and_index tool — fetch URL → markdown → index
  */
-import { ContentStore } from "../store/index.js";
+import type { ContentStore } from "../store/index.js";
 import type { IndexResult } from "../types.js";
 export interface CtxFetchAndIndexInput {
@@ -11,7 +11,7 @@ export interface CtxFetchAndIndexInput {
   chunkSize?: number;
 }
-export async function ctxFetchAndIndex(input: CtxFetchAndIndexInput): Promise<IndexResult> {
+export async function ctxFetchAndIndex(store: ContentStore, input: CtxFetchAndIndexInput): Promise<IndexResult> {
   const label = input.label ?? input.url;
   const response = await fetch(input.url, {
@@ -23,15 +23,10 @@ export async function ctxFetchAndIndex(input: CtxFetchAndIndexInput): Promise<In
   }
   const text = await response.text();
-  const store = new ContentStore();
-  await store.init();
-  const result = await store.index(label, text, {
+  return store.index(label, text, {
     contentType: "plain",
     source: input.url,
     chunkSize: input.chunkSize,
   });
-  store.close();
-  return result;
 }

package/src/tools/ctx-index.ts CHANGED Viewed

@@ -2,7 +2,7 @@
  * ctx_index tool — chunk content → index into FTS5
  */
-import { ContentStore } from "../store/index.js";
+import type { ContentStore } from "../store/index.js";
 import type { IndexResult } from "../types.js";
 import { readFileSync } from "node:fs";
@@ -14,10 +14,7 @@ export interface CtxIndexInput {
   chunkSize?: number;
 }
-export async function ctxIndex(input: CtxIndexInput): Promise<IndexResult> {
-  const store = new ContentStore();
-  await store.init();
+export async function ctxIndex(store: ContentStore, input: CtxIndexInput): Promise<IndexResult> {
   let text: string;
   let source: string;
@@ -31,12 +28,9 @@ export async function ctxIndex(input: CtxIndexInput): Promise<IndexResult> {
     throw new Error("Either content or filePath must be provided");
   }
-  const result = await store.index(input.label, text, {
+  return store.index(input.label, text, {
     contentType: input.contentType ?? "plain",
     source,
     chunkSize: input.chunkSize,
   });
-  store.close();
-  return result;
 }

package/src/tools/ctx-search.ts CHANGED Viewed

@@ -2,7 +2,7 @@
  * ctx_search tool — query indexed content
  */
-import { ContentStore } from "../store/index.js";
+import type { ContentStore } from "../store/index.js";
 import type { SearchResult } from "../types.js";
 export interface CtxSearchInput {
@@ -11,13 +11,9 @@ export interface CtxSearchInput {
   offset?: number;
 }
-export async function ctxSearch(input: CtxSearchInput): Promise<SearchResult[]> {
-  const store = new ContentStore();
-  await store.init();
-  const results = await store.search(input.query, {
+export async function ctxSearch(store: ContentStore, input: CtxSearchInput): Promise<SearchResult[]> {
+  return store.search(input.query, {
     limit: input.limit ?? 10,
     offset: input.offset ?? 0,
   });
-  store.close();
-  return results;
 }

package/src/tools/ctx-stats.ts CHANGED Viewed

@@ -4,6 +4,7 @@
 import type { SessionDB } from "../session/db.js";
 import type { ContentStore } from "../store/index.js";
+import type { RuntimeCounters } from "../types.js";
 export interface CtxStatsResult {
   sessionEvents: number;
@@ -20,18 +21,19 @@ export async function ctxStats(
   sessionDB: SessionDB,
   contentStore: ContentStore,
   sessionId: string,
+  counters?: RuntimeCounters,
 ): Promise<CtxStatsResult> {
   const sessionStats = sessionDB.getSessionStats(sessionId);
   const storeStats = await contentStore.getStats();
   return {
     sessionEvents: sessionStats?.event_count ?? 0,
-    compactions: sessionStats?.compact_count ?? 0,
-    tokensSaved: 0, // populated by caller from compaction stats
+    compactions: counters?.compactions ?? sessionStats?.compact_count ?? 0,
+    tokensSaved: counters?.totalTokensCompacted ?? 0,
     compressionRatio: "N/A",
     indexedDocs: storeStats.sources,
     indexedChunks: storeStats.chunks,
-    sandboxRuns: 0,
-    searchQueries: 0,
+    sandboxRuns: counters?.sandboxRuns ?? 0,
+    searchQueries: counters?.searchQueries ?? 0,
   };
 }