npm - wicked-brain - Versions diffs - 0.8.1 → 0.9.0 - Mend

wicked-brain 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/package.json +1 -1
package/server/bin/wicked-brain-server.mjs +1 -0
package/server/lib/sqlite-search.mjs +158 -11
package/server/package.json +1 -1
package/skills/wicked-brain-ingest/SKILL.md +7 -5

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "wicked-brain",
-  "version": "0.8.1",
+  "version": "0.9.0",
   "type": "module",
   "description": "Digital brain as skills for AI coding CLIs — no vector DB, no embeddings, no infrastructure",
   "keywords": [

package/server/bin/wicked-brain-server.mjs CHANGED Viewed

@@ -194,6 +194,7 @@ const actions = {
   link_health: () => db.linkHealth(),
   tag_frequency: () => ({ tags: db.tagFrequency() }),
   search_misses: (p) => ({ misses: db.searchMisses(p) }),
+  wiki_list: (p) => db.wikiList(p),
   // LSP actions
   "lsp-health": () => lsp.health(),
   "lsp-symbols": (p) => lsp.symbols(p),

package/server/lib/sqlite-search.mjs CHANGED Viewed

@@ -12,6 +12,19 @@ function extractBodyExcerpt(content, maxLen = 300) {
   return body.trim().slice(0, maxLen);
 }
+/**
+ * Derives the source type from a document path.
+ * - Paths starting with "wiki/" → "wiki"
+ * - Paths starting with "memory/" or "memories/" → "memory"
+ * - Everything else → "chunk"
+ */
+export function deriveSourceType(path) {
+  const normalized = (path ?? "").replace(/\\/g, "/");
+  if (normalized.startsWith("wiki/")) return "wiki";
+  if (normalized.startsWith("memory/") || normalized.startsWith("memories/")) return "memory";
+  return "chunk";
+}
 function escapeFtsQuery(query) {
   return query
     .trim()
@@ -24,6 +37,44 @@ function escapeFtsQuery(query) {
 /** Weight factor for backlink count in search ranking (PageRank-lite). */
 const BACKLINK_WEIGHT = 0.5;
+/**
+ * Additive boost applied to FTS5 BM25 score when a query term appears as a
+ * substring of the document's path. BM25 scores in SQLite FTS5 are negative
+ * (more negative = more relevant), so we SUBTRACT this value to push path
+ * matches ahead. Addresses the case where a query term matches a module/file
+ * name but the chunk body has only sparse mentions: a dense body chunk in an
+ * unrelated file can have a very negative BM25, so a multiplicative boost on
+ * the sparse-but-path-matching chunk's weaker score is insufficient. A flat
+ * additive bonus larger than the typical BM25 magnitude reliably promotes it.
+ */
+const PATH_MATCH_BOOST = 20;
+/**
+ * Overfetch multiplier for path-name boost re-ranking. We pull this many times
+ * the requested limit from FTS so that boosted rows below the BM25 cutoff can
+ * still be promoted into the top N.
+ */
+const PATH_BOOST_OVERFETCH = 5;
+/** Tokenize a free-text query the same way we want to match against paths:
+ *  lowercase, split on non-word (underscores preserved). */
+function tokenizeQueryForPath(query) {
+  return query
+    .toLowerCase()
+    .split(/[^\w]+/)
+    .filter(Boolean);
+}
+/** Returns true if any query term appears as a substring of the lowercased path. */
+function pathMatchesQuery(path, terms) {
+  if (!path || terms.length === 0) return false;
+  const lowered = path.toLowerCase();
+  for (const term of terms) {
+    if (lowered.includes(term)) return true;
+  }
+  return false;
+}
 /** Weight factor for average backlink confidence in search ranking. */
 const CONFIDENCE_WEIGHT = 0.3;
@@ -267,7 +318,11 @@ export class SqliteSearch {
     const sinceClause = since ? `AND d.indexed_at >= ?` : "";
     const sinceParams = since ? [new Date(since).getTime()] : [];
-    const rows = this.#db
+    // Overfetch so the path-name boost can promote rows that sit below the
+    // raw BM25 cutoff. We re-rank in JS, then slice to the requested limit.
+    const fetchLimit = (limit + offset) * PATH_BOOST_OVERFETCH;
+    const rawRows = this.#db
       .prepare(`
         SELECT
           d.id,
@@ -277,7 +332,8 @@ export class SqliteSearch {
           SUBSTR(d.content, 1, 1000) AS raw_content,
           COALESCE(link_count.cnt, 0) AS backlink_count,
           COALESCE(ac.cnt, 0) AS access_count,
-          COALESCE(link_conf.avg_conf, 0.5) AS avg_backlink_confidence
+          COALESCE(link_conf.avg_conf, 0.5) AS avg_backlink_confidence,
+          (f.rank - (COALESCE(link_count.cnt, 0) * ${BACKLINK_WEIGHT}) - (COALESCE(ac.cnt, 0) * ${SEARCH_ACCESS_WEIGHT}) - (COALESCE(link_conf.avg_conf, 0.5) * ${CONFIDENCE_WEIGHT})) AS composite_score
         FROM documents_fts f
         JOIN documents d ON d.id = f.id
         LEFT JOIN (
@@ -297,15 +353,29 @@ export class SqliteSearch {
         ) ac ON d.id = ac.doc_id
         WHERE documents_fts MATCH ?
         ${sinceClause}
-        ORDER BY (f.rank - (COALESCE(link_count.cnt, 0) * ${BACKLINK_WEIGHT}) - (COALESCE(ac.cnt, 0) * ${SEARCH_ACCESS_WEIGHT}) - (COALESCE(link_conf.avg_conf, 0.5) * ${CONFIDENCE_WEIGHT}))
-        LIMIT ? OFFSET ?
+        ORDER BY composite_score
+        LIMIT ?
       `)
-      .all(escaped, ...sinceParams, limit, offset)
-      .map((row) => {
-        const body_excerpt = extractBodyExcerpt(row.raw_content ?? "");
-        delete row.raw_content;
-        return { ...row, body_excerpt };
-      });
+      .all(escaped, ...sinceParams, fetchLimit);
+    // Path-name boost: if any query term appears in the path, multiply the
+    // (negative) composite score by PATH_MATCH_BOOST so it sorts higher.
+    const queryTerms = tokenizeQueryForPath(query);
+    for (const row of rawRows) {
+      row.boosted_score = pathMatchesQuery(row.path, queryTerms)
+        ? row.composite_score - PATH_MATCH_BOOST
+        : row.composite_score;
+    }
+    rawRows.sort((a, b) => a.boosted_score - b.boosted_score);
+    const rows = rawRows.slice(offset, offset + limit).map((row) => {
+      const body_excerpt = extractBodyExcerpt(row.raw_content ?? "");
+      const source_type = deriveSourceType(row.path);
+      delete row.raw_content;
+      delete row.composite_score;
+      delete row.boosted_score;
+      return { ...row, source_type, body_excerpt };
+    });
     const countRow = this.#db
       .prepare(
@@ -371,7 +441,7 @@ export class SqliteSearch {
               LIMIT ?
             `)
             .all(escaped, limit);
-          allResults.push(...rows);
+          allResults.push(...rows.map((r) => ({ ...r, source_type: deriveSourceType(r.path) })));
         } finally {
           this.#db.prepare(`DETACH DATABASE ${attached}`).run();
         }
@@ -816,6 +886,83 @@ export class SqliteSearch {
     return row || null;
   }
+  /**
+   * List wiki articles with metadata (no full content).
+   * Optional FTS5 keyword filter.
+   * @param {object} opts
+   * @param {string|null} [opts.query] - Optional FTS5 query to filter articles
+   * @param {number} [opts.limit=50]
+   * @returns {{ articles: Array<{ path: string, title: string|null, description: string|null, tags: string[], word_count: number }> }}
+   */
+  wikiList({ query = null, limit = 50 } = {}) {
+    let rows;
+    if (query) {
+      const escaped = escapeFtsQuery(query);
+      if (!escaped) return { articles: [] };
+      rows = this.#db.prepare(`
+        SELECT d.path, d.frontmatter, d.content
+        FROM documents_fts f
+        JOIN documents d ON d.id = f.id
+        WHERE documents_fts MATCH ?
+          AND d.path LIKE 'wiki/%'
+        ORDER BY rank
+        LIMIT ?
+      `).all(escaped, limit);
+    } else {
+      rows = this.#db.prepare(`
+        SELECT path, frontmatter, content
+        FROM documents
+        WHERE path LIKE 'wiki/%'
+        ORDER BY path
+        LIMIT ?
+      `).all(limit);
+    }
+    const articles = rows.map((row) => {
+      const fm = row.frontmatter || SqliteSearch.#extractFrontmatter(row.content) || "";
+      const title = this.#extractFrontmatterField(fm, "title") || null;
+      const description = this.#extractFrontmatterField(fm, "description") || null;
+      const tags = this.#parseTags(fm);
+      const word_count = (row.content || "").split(/\s+/).filter(Boolean).length;
+      return { path: row.path, title, description, tags, word_count };
+    });
+    return { articles };
+  }
+  /**
+   * Parse tags from frontmatter string.
+   * Supports space-separated inline, JSON array, and YAML block list formats.
+   */
+  #parseTags(fm) {
+    if (!fm) return [];
+    // Inline: tags: tag1 tag2 tag3  or  tags: ["tag1","tag2"]
+    const inlineMatch = fm.match(/^tags:[ \t]+(\S.*)$/m);
+    if (inlineMatch) {
+      const raw = inlineMatch[1].trim();
+      if (raw.startsWith("[")) {
+        try {
+          return JSON.parse(raw).map(String);
+        } catch {
+          return raw.replace(/[\[\]"]/g, "").split(/[\s,]+/).filter(Boolean);
+        }
+      }
+      return raw.split(/\s+/).filter(Boolean);
+    }
+    // YAML block list
+    const blockMatch = fm.match(/^tags:\s*\n((?:\s+-\s+.+\n?)+)/m);
+    if (blockMatch) {
+      const listLines = blockMatch[1].match(/^\s+-\s+(.+)$/gm) || [];
+      return listLines.map((line) => line.replace(/^\s+-\s+/, "").trim()).filter(Boolean);
+    }
+    return [];
+  }
   close() {
     this.#db.close();
   }

package/server/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "wicked-brain-server",
-  "version": "0.8.1",
+  "version": "0.9.0",
   "type": "module",
   "description": "SQLite FTS5 search server for wicked-brain digital knowledge bases",
   "keywords": [

package/skills/wicked-brain-ingest/SKILL.md CHANGED Viewed

@@ -323,12 +323,14 @@ async function ingestFile(filePath) {
     // Note: These keywords are for FTS indexing. The LLM-based ingest
     // generates richer synonym-expanded tags in the contains: field.
     // This batch script extracts basic keywords only.
+    // Replace non-word chars with space (not empty) so adjacent tokens don't glue.
+    // Preserve underscores so snake_case identifiers survive. Floor at 4 chars so
+    // short domain terms like 'task', 'hook', 'crew' aren't dropped.
+    const cleaned = chunks[i].toLowerCase().replace(/[^a-z0-9_\s-]/g, " ");
+    const tokens = cleaned.split(/\s+/).filter(Boolean);
     const keywords = [...new Set(
-      chunks[i].toLowerCase()
-        .replace(/[^a-z0-9\s-]/g, "")
-        .split(/\s+/)
-        .filter(w => w.length > 5 && !STOP.has(w))
-    )].slice(0, 10);
+      tokens.filter(w => w.length >= 4 && !STOP.has(w))
+    )].slice(0, 12);
     const frontmatter = [
       "---",