npm - @winci/local-rag - Versions diffs - 0.2.1 - Mend

@winci/local-rag 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

package/.claude-plugin/plugin.json +24 -0
package/.mcp.json +11 -0
package/LICENSE +21 -0
package/README.md +567 -0
package/hooks/hooks.json +25 -0
package/hooks/scripts/reindex-file.sh +19 -0
package/hooks/scripts/session-start.sh +11 -0
package/package.json +52 -0
package/skills/local-rag/SKILL.md +42 -0
package/src/cli/commands/analytics.ts +58 -0
package/src/cli/commands/benchmark.ts +30 -0
package/src/cli/commands/checkpoint.ts +85 -0
package/src/cli/commands/conversation.ts +102 -0
package/src/cli/commands/demo.ts +119 -0
package/src/cli/commands/eval.ts +31 -0
package/src/cli/commands/index-cmd.ts +26 -0
package/src/cli/commands/init.ts +35 -0
package/src/cli/commands/map.ts +21 -0
package/src/cli/commands/remove.ts +15 -0
package/src/cli/commands/search-cmd.ts +59 -0
package/src/cli/commands/serve.ts +5 -0
package/src/cli/commands/status.ts +13 -0
package/src/cli/index.ts +117 -0
package/src/cli/progress.ts +21 -0
package/src/cli/setup.ts +192 -0
package/src/config/index.ts +101 -0
package/src/conversation/indexer.ts +147 -0
package/src/conversation/parser.ts +323 -0
package/src/db/analytics.ts +116 -0
package/src/db/annotations.ts +161 -0
package/src/db/checkpoints.ts +166 -0
package/src/db/conversation.ts +241 -0
package/src/db/files.ts +146 -0
package/src/db/graph.ts +250 -0
package/src/db/index.ts +468 -0
package/src/db/search.ts +244 -0
package/src/db/types.ts +85 -0
package/src/embeddings/embed.ts +73 -0
package/src/graph/resolver.ts +305 -0
package/src/indexing/chunker.ts +523 -0
package/src/indexing/indexer.ts +263 -0
package/src/indexing/parse.ts +99 -0
package/src/indexing/watcher.ts +84 -0
package/src/main.ts +8 -0
package/src/search/benchmark.ts +139 -0
package/src/search/eval.ts +171 -0
package/src/search/hybrid.ts +194 -0
package/src/search/reranker.ts +99 -0
package/src/search/usages.ts +27 -0
package/src/server/index.ts +126 -0
package/src/tools/analytics-tools.ts +58 -0
package/src/tools/annotation-tools.ts +89 -0
package/src/tools/checkpoint-tools.ts +147 -0
package/src/tools/conversation-tools.ts +86 -0
package/src/tools/git-tools.ts +103 -0
package/src/tools/graph-tools.ts +163 -0
package/src/tools/index-tools.ts +91 -0
package/src/tools/index.ts +33 -0
package/src/tools/search.ts +238 -0
package/src/types.ts +9 -0
package/src/utils/log.ts +39 -0

package/src/conversation/parser.ts ADDED Viewed

@@ -0,0 +1,323 @@
+import { readFileSync, statSync, openSync, readSync, closeSync } from "fs";
+import { Glob } from "bun";
+// ── JSONL entry types ──────────────────────────────────────────────
+export interface JournalEntry {
+  type: "user" | "assistant" | "queue-operation" | "file-history-snapshot";
+  uuid?: string;
+  parentUuid?: string | null;
+  timestamp?: string;
+  sessionId?: string;
+  isSidechain?: boolean;
+  requestId?: string;
+  message?: {
+    role: string;
+    content: ContentBlock[];
+    usage?: {
+      input_tokens?: number;
+      output_tokens?: number;
+    };
+  };
+  toolUseResult?: {
+    type?: string;
+    filenames?: string[];
+    durationMs?: number;
+    numFiles?: number;
+    truncated?: boolean;
+  };
+}
+export type ContentBlock =
+  | { type: "text"; text: string }
+  | { type: "thinking"; thinking: string }
+  | { type: "tool_use"; id: string; name: string; input: Record<string, unknown> }
+  | { type: "tool_result"; tool_use_id: string; content: string | ContentBlock[] };
+// ── Parsed turn ────────────────────────────────────────────────────
+export interface ParsedTurn {
+  turnIndex: number;
+  timestamp: string;
+  sessionId: string;
+  userText: string;
+  assistantText: string;
+  toolResults: ToolResultInfo[];
+  toolsUsed: string[];
+  filesReferenced: string[];
+  tokenCost: number;
+  summary: string; // first 200 chars of assistant text
+}
+export interface ToolResultInfo {
+  toolName: string;
+  content: string;
+  durationMs?: number;
+  filenames: string[];
+}
+// Tools whose results are redundant with the code index — skip their content
+const SKIP_CONTENT_TOOLS = new Set(["Read", "Glob", "Write", "Edit", "NotebookEdit"]);
+// Maximum size for "short" tool results that are always indexed
+const SHORT_RESULT_THRESHOLD = 500;
+// ── JSONL parsing ──────────────────────────────────────────────────
+/**
+ * Read a JSONL file from a byte offset. Returns parsed entries and
+ * the new byte offset (for incremental reads).
+ */
+export function readJSONL(
+  filePath: string,
+  fromOffset = 0
+): { entries: JournalEntry[]; newOffset: number } {
+  const stat = statSync(filePath);
+  if (fromOffset >= stat.size) {
+    return { entries: [], newOffset: fromOffset };
+  }
+  const bytesToRead = stat.size - fromOffset;
+  const buf = Buffer.alloc(bytesToRead);
+  const fd = openSync(filePath, "r");
+  try {
+    readSync(fd, buf, 0, bytesToRead, fromOffset);
+  } finally {
+    closeSync(fd);
+  }
+  const text = buf.toString("utf-8");
+  const entries: JournalEntry[] = [];
+  for (const line of text.split("\n")) {
+    const trimmed = line.trim();
+    if (!trimmed) continue;
+    try {
+      entries.push(JSON.parse(trimmed));
+    } catch {
+      // Skip malformed lines
+    }
+  }
+  return { entries, newOffset: stat.size };
+}
+/**
+ * Parse JSONL entries into conversation turns.
+ *
+ * A "turn" starts with a user text message and includes everything
+ * until the next user text message. Tool use/result exchanges within
+ * a turn are aggregated.
+ */
+export function parseTurns(
+  entries: JournalEntry[],
+  sessionId?: string,
+  startTurnIndex = 0
+): ParsedTurn[] {
+  const turns: ParsedTurn[] = [];
+  // Collect only user/assistant messages (skip queue-operation, file-history-snapshot)
+  const messages = entries.filter(
+    (e) => (e.type === "user" || e.type === "assistant") && e.message
+  );
+  // Track the current tool_use name by tool_use_id so we can label results
+  const toolUseNames = new Map<string, string>();
+  let current: {
+    userText: string;
+    assistantText: string;
+    toolResults: ToolResultInfo[];
+    toolsUsed: string[];
+    filesReferenced: string[];
+    tokenCost: number;
+    timestamp: string;
+    sessionId: string;
+  } | null = null;
+  function flushTurn() {
+    if (!current) return;
+    // Only create a turn if there's meaningful content
+    if (!current.userText && !current.assistantText) return;
+    const summary = current.assistantText.slice(0, 200);
+    turns.push({
+      turnIndex: startTurnIndex + turns.length,
+      timestamp: current.timestamp,
+      sessionId: current.sessionId,
+      userText: current.userText,
+      assistantText: current.assistantText,
+      toolResults: current.toolResults,
+      toolsUsed: [...new Set(current.toolsUsed)],
+      filesReferenced: [...new Set(current.filesReferenced)],
+      tokenCost: current.tokenCost,
+      summary,
+    });
+  }
+  for (const msg of messages) {
+    const content = msg.message!.content;
+    if (!Array.isArray(content)) continue;
+    if (msg.type === "user") {
+      // Check if this is a real user message (has text) or a tool_result
+      const hasText = content.some(
+        (b) => b.type === "text" && typeof (b as { text: string }).text === "string"
+      );
+      const hasToolResult = content.some((b) => b.type === "tool_result");
+      if (hasText && !hasToolResult) {
+        // New turn boundary
+        flushTurn();
+        const textParts = content
+          .filter((b): b is { type: "text"; text: string } => b.type === "text")
+          .map((b) => b.text);
+        current = {
+          userText: textParts.join("\n"),
+          assistantText: "",
+          toolResults: [],
+          toolsUsed: [],
+          filesReferenced: [],
+          tokenCost: 0,
+          timestamp: msg.timestamp || "",
+          sessionId: sessionId || msg.sessionId || "",
+        };
+      } else if (hasToolResult && current) {
+        // Tool result — extract content selectively
+        for (const block of content) {
+          if (block.type !== "tool_result") continue;
+          const toolResult = block as {
+            type: "tool_result";
+            tool_use_id: string;
+            content: string | ContentBlock[];
+          };
+          const toolName = toolUseNames.get(toolResult.tool_use_id) || "unknown";
+          // Extract text from tool result
+          let resultText = "";
+          if (typeof toolResult.content === "string") {
+            resultText = toolResult.content;
+          } else if (Array.isArray(toolResult.content)) {
+            resultText = toolResult.content
+              .filter((c): c is { type: "text"; text: string } => c.type === "text")
+              .map((c) => c.text)
+              .join("\n");
+          }
+          // Collect file references from toolUseResult metadata
+          const filenames = msg.toolUseResult?.filenames || [];
+          if (filenames.length > 0) {
+            current.filesReferenced.push(...filenames);
+          }
+          // Selective indexing: skip content for Read/Glob/Write/Edit,
+          // keep Bash/Grep output and short results
+          const shouldIndex =
+            !SKIP_CONTENT_TOOLS.has(toolName) ||
+            resultText.length <= SHORT_RESULT_THRESHOLD;
+          if (shouldIndex && resultText) {
+            current.toolResults.push({
+              toolName,
+              content: resultText,
+              durationMs: msg.toolUseResult?.durationMs,
+              filenames,
+            });
+          }
+        }
+      }
+    } else if (msg.type === "assistant" && current) {
+      for (const block of content) {
+        if (block.type === "text") {
+          const textBlock = block as { type: "text"; text: string };
+          if (current.assistantText) current.assistantText += "\n";
+          current.assistantText += textBlock.text;
+        } else if (block.type === "tool_use") {
+          const toolBlock = block as { type: "tool_use"; id: string; name: string };
+          current.toolsUsed.push(toolBlock.name);
+          toolUseNames.set(toolBlock.id, toolBlock.name);
+        }
+      }
+      // Accumulate token cost
+      const usage = msg.message!.usage;
+      if (usage) {
+        current.tokenCost += (usage.input_tokens || 0) + (usage.output_tokens || 0);
+      }
+    }
+  }
+  // Flush last turn
+  flushTurn();
+  return turns;
+}
+/**
+ * Build the indexable text for a turn. Combines user text, assistant text,
+ * and selected tool result content.
+ */
+export function buildTurnText(turn: ParsedTurn): string {
+  const parts: string[] = [];
+  if (turn.userText) {
+    parts.push(`User: ${turn.userText}`);
+  }
+  if (turn.assistantText) {
+    parts.push(`Assistant: ${turn.assistantText}`);
+  }
+  for (const result of turn.toolResults) {
+    parts.push(`[${result.toolName}]: ${result.content}`);
+  }
+  return parts.join("\n\n");
+}
+// ── Session discovery ──────────────────────────────────────────────
+export interface SessionInfo {
+  sessionId: string;
+  jsonlPath: string;
+  mtime: number;
+  size: number;
+}
+/**
+ * Find all conversation JSONL files for a given project directory.
+ * Claude Code stores transcripts in ~/.claude/projects/<encoded-path>/.
+ */
+export function discoverSessions(projectDir: string): SessionInfo[] {
+  const encoded = projectDir.replace(/\//g, "-");
+  const claudeProjectDir = `${process.env.HOME}/.claude/projects/${encoded}`;
+  const sessions: SessionInfo[] = [];
+  const glob = new Glob("*.jsonl");
+  try {
+    for (const file of glob.scanSync(claudeProjectDir)) {
+      const fullPath = `${claudeProjectDir}/${file}`;
+      const sessionId = file.replace(".jsonl", "");
+      try {
+        const stat = statSync(fullPath);
+        sessions.push({
+          sessionId,
+          jsonlPath: fullPath,
+          mtime: stat.mtimeMs,
+          size: stat.size,
+        });
+      } catch {
+        // Skip files we can't stat
+      }
+    }
+  } catch {
+    // Claude project dir doesn't exist yet
+  }
+  // Sort by mtime descending (most recent first)
+  sessions.sort((a, b) => b.mtime - a.mtime);
+  return sessions;
+}

package/src/db/analytics.ts ADDED Viewed

@@ -0,0 +1,116 @@
+import { Database } from "bun:sqlite";
+export function logQuery(db: Database, query: string, resultCount: number, topScore: number | null, topPath: string | null, durationMs: number) {
+  db.run(
+    "INSERT INTO query_log (query, result_count, top_score, top_path, duration_ms, created_at) VALUES (?, ?, ?, ?, ?, ?)",
+    [query, resultCount, topScore, topPath, durationMs, new Date().toISOString()]
+  );
+}
+export function getAnalytics(db: Database, days: number = 30): {
+  totalQueries: number;
+  avgResultCount: number;
+  avgTopScore: number | null;
+  zeroResultQueries: { query: string; count: number }[];
+  lowScoreQueries: { query: string; topScore: number; timestamp: string }[];
+  topSearchedTerms: { query: string; count: number }[];
+  queriesPerDay: { date: string; count: number }[];
+} {
+  const since = new Date(Date.now() - days * 86400000).toISOString();
+  const total = db
+    .query<{ count: number }, [string]>("SELECT COUNT(*) as count FROM query_log WHERE created_at >= ?")
+    .get(since)!;
+  const avgResult = db
+    .query<{ avg: number | null }, [string]>("SELECT AVG(result_count) as avg FROM query_log WHERE created_at >= ?")
+    .get(since)!;
+  const avgScore = db
+    .query<{ avg: number | null }, [string]>("SELECT AVG(top_score) as avg FROM query_log WHERE top_score IS NOT NULL AND created_at >= ?")
+    .get(since)!;
+  const zeroResult = db
+    .query<{ query: string; count: number }, [string]>(
+      "SELECT query, COUNT(*) as count FROM query_log WHERE result_count = 0 AND created_at >= ? GROUP BY query ORDER BY count DESC LIMIT 10"
+    )
+    .all(since);
+  const lowScore = db
+    .query<{ query: string; top_score: number; created_at: string }, [string]>(
+      "SELECT query, top_score, created_at FROM query_log WHERE top_score IS NOT NULL AND top_score < 0.3 AND created_at >= ? ORDER BY top_score ASC LIMIT 10"
+    )
+    .all(since)
+    .map((r) => ({ query: r.query, topScore: r.top_score, timestamp: r.created_at }));
+  const topTerms = db
+    .query<{ query: string; count: number }, [string]>(
+      "SELECT query, COUNT(*) as count FROM query_log WHERE created_at >= ? GROUP BY query ORDER BY count DESC LIMIT 10"
+    )
+    .all(since);
+  const perDay = db
+    .query<{ date: string; count: number }, [string]>(
+      "SELECT substr(created_at, 1, 10) as date, COUNT(*) as count FROM query_log WHERE created_at >= ? GROUP BY date ORDER BY date"
+    )
+    .all(since);
+  return {
+    totalQueries: total.count,
+    avgResultCount: avgResult.avg ?? 0,
+    avgTopScore: avgScore.avg,
+    zeroResultQueries: zeroResult,
+    lowScoreQueries: lowScore,
+    topSearchedTerms: topTerms,
+    queriesPerDay: perDay,
+  };
+}
+export function getAnalyticsTrend(db: Database, days: number = 7): {
+  current: { totalQueries: number; avgTopScore: number | null; zeroResultRate: number };
+  previous: { totalQueries: number; avgTopScore: number | null; zeroResultRate: number };
+  delta: { queries: number; avgTopScore: number | null; zeroResultRate: number };
+} {
+  const now = Date.now();
+  const currentStart = new Date(now - days * 86400000).toISOString();
+  const previousStart = new Date(now - days * 2 * 86400000).toISOString();
+  const getCounts = (since: string, until: string) => {
+    const total = db
+      .query<{ count: number }, [string, string]>(
+        "SELECT COUNT(*) as count FROM query_log WHERE created_at >= ? AND created_at < ?"
+      )
+      .get(since, until)!;
+    const avgScore = db
+      .query<{ avg: number | null }, [string, string]>(
+        "SELECT AVG(top_score) as avg FROM query_log WHERE top_score IS NOT NULL AND created_at >= ? AND created_at < ?"
+      )
+      .get(since, until)!;
+    const zeroCount = db
+      .query<{ count: number }, [string, string]>(
+        "SELECT COUNT(*) as count FROM query_log WHERE result_count = 0 AND created_at >= ? AND created_at < ?"
+      )
+      .get(since, until)!;
+    const zeroResultRate = total.count > 0 ? zeroCount.count / total.count : 0;
+    return { totalQueries: total.count, avgTopScore: avgScore.avg, zeroResultRate };
+  };
+  const farFuture = "9999-12-31T23:59:59.999Z";
+  const current = getCounts(currentStart, farFuture);
+  const previous = getCounts(previousStart, currentStart);
+  const delta = {
+    queries: current.totalQueries - previous.totalQueries,
+    avgTopScore:
+      current.avgTopScore !== null && previous.avgTopScore !== null
+        ? current.avgTopScore - previous.avgTopScore
+        : null,
+    zeroResultRate: current.zeroResultRate - previous.zeroResultRate,
+  };
+  return { current, previous, delta };
+}

package/src/db/annotations.ts ADDED Viewed

@@ -0,0 +1,161 @@
+import { Database } from "bun:sqlite";
+import { type AnnotationRow } from "./types";
+export function upsertAnnotation(
+  db: Database,
+  path: string,
+  note: string,
+  embedding: Float32Array,
+  symbolName?: string | null,
+  author?: string | null
+): number {
+  let annotationId = 0;
+  const tx = db.transaction(() => {
+    let existing: { id: number; note: string } | null = null;
+    if (symbolName) {
+      existing = db
+        .query<{ id: number; note: string }, [string, string]>(
+          "SELECT id, note FROM annotations WHERE path = ? AND symbol_name = ?"
+        )
+        .get(path, symbolName);
+    } else {
+      existing = db
+        .query<{ id: number; note: string }, [string]>(
+          "SELECT id, note FROM annotations WHERE path = ? AND symbol_name IS NULL"
+        )
+        .get(path);
+    }
+    const now = new Date().toISOString();
+    if (existing) {
+      db.run(
+        "INSERT INTO fts_annotations(fts_annotations, rowid, note) VALUES ('delete', ?, ?)",
+        [existing.id, existing.note]
+      );
+      db.run(
+        "UPDATE annotations SET note = ?, author = ?, updated_at = ? WHERE id = ?",
+        [note, author ?? null, now, existing.id]
+      );
+      db.run("INSERT INTO fts_annotations(rowid, note) VALUES (?, ?)", [existing.id, note]);
+      db.run("DELETE FROM vec_annotations WHERE annotation_id = ?", [existing.id]);
+      db.run(
+        "INSERT INTO vec_annotations (annotation_id, embedding) VALUES (?, ?)",
+        [existing.id, new Uint8Array(embedding.buffer)]
+      );
+      annotationId = existing.id;
+    } else {
+      db.run(
+        "INSERT INTO annotations (path, symbol_name, note, author, created_at, updated_at) VALUES (?, ?, ?, ?, ?, ?)",
+        [path, symbolName ?? null, note, author ?? null, now, now]
+      );
+      annotationId = Number(
+        db.query<{ id: number }, []>("SELECT last_insert_rowid() as id").get()!.id
+      );
+      db.run("INSERT INTO fts_annotations(rowid, note) VALUES (?, ?)", [annotationId, note]);
+      db.run(
+        "INSERT INTO vec_annotations (annotation_id, embedding) VALUES (?, ?)",
+        [annotationId, new Uint8Array(embedding.buffer)]
+      );
+    }
+  });
+  tx();
+  return annotationId;
+}
+export function getAnnotations(db: Database, path?: string, symbolName?: string | null): AnnotationRow[] {
+  let sql = "SELECT * FROM annotations WHERE 1=1";
+  const params: (string | null)[] = [];
+  if (path !== undefined) {
+    sql += " AND path = ?";
+    params.push(path);
+  }
+  if (symbolName !== undefined) {
+    if (symbolName === null) {
+      sql += " AND symbol_name IS NULL";
+    } else {
+      sql += " AND symbol_name = ?";
+      params.push(symbolName);
+    }
+  }
+  sql += " ORDER BY updated_at DESC";
+  return db
+    .query<
+      { id: number; path: string; symbol_name: string | null; note: string; author: string | null; created_at: string; updated_at: string },
+      (string | null)[]
+    >(sql)
+    .all(...params)
+    .map((r) => ({
+      id: r.id,
+      path: r.path,
+      symbolName: r.symbol_name,
+      note: r.note,
+      author: r.author,
+      createdAt: r.created_at,
+      updatedAt: r.updated_at,
+    }));
+}
+export function searchAnnotations(
+  db: Database,
+  queryEmbedding: Float32Array,
+  topK: number = 10
+): (AnnotationRow & { score: number })[] {
+  return db
+    .query<
+      {
+        annotation_id: number;
+        distance: number;
+        id: number;
+        path: string;
+        symbol_name: string | null;
+        note: string;
+        author: string | null;
+        created_at: string;
+        updated_at: string;
+      },
+      [Uint8Array, number]
+    >(
+      `SELECT v.annotation_id, v.distance,
+              a.id, a.path, a.symbol_name, a.note, a.author, a.created_at, a.updated_at
+       FROM (SELECT annotation_id, distance FROM vec_annotations WHERE embedding MATCH ? ORDER BY distance LIMIT ?) v
+       JOIN annotations a ON a.id = v.annotation_id`
+    )
+    .all(new Uint8Array(queryEmbedding.buffer), topK)
+    .map((row) => ({
+      id: row.id,
+      path: row.path,
+      symbolName: row.symbol_name,
+      note: row.note,
+      author: row.author,
+      createdAt: row.created_at,
+      updatedAt: row.updated_at,
+      score: 1 / (1 + row.distance),
+    }));
+}
+export function deleteAnnotation(db: Database, id: number): boolean {
+  const existing = db
+    .query<{ id: number; note: string }, [number]>(
+      "SELECT id, note FROM annotations WHERE id = ?"
+    )
+    .get(id);
+  if (!existing) return false;
+  const tx = db.transaction(() => {
+    db.run(
+      "INSERT INTO fts_annotations(fts_annotations, rowid, note) VALUES ('delete', ?, ?)",
+      [id, existing.note]
+    );
+    db.run("DELETE FROM vec_annotations WHERE annotation_id = ?", [id]);
+    db.run("DELETE FROM annotations WHERE id = ?", [id]);
+  });
+  tx();
+  return true;
+}