wicked-brain 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wicked-brain",
3
- "version": "0.8.1",
3
+ "version": "0.9.0",
4
4
  "type": "module",
5
5
  "description": "Digital brain as skills for AI coding CLIs — no vector DB, no embeddings, no infrastructure",
6
6
  "keywords": [
@@ -194,6 +194,7 @@ const actions = {
194
194
  link_health: () => db.linkHealth(),
195
195
  tag_frequency: () => ({ tags: db.tagFrequency() }),
196
196
  search_misses: (p) => ({ misses: db.searchMisses(p) }),
197
+ wiki_list: (p) => db.wikiList(p),
197
198
  // LSP actions
198
199
  "lsp-health": () => lsp.health(),
199
200
  "lsp-symbols": (p) => lsp.symbols(p),
@@ -12,6 +12,19 @@ function extractBodyExcerpt(content, maxLen = 300) {
12
12
  return body.trim().slice(0, maxLen);
13
13
  }
14
14
 
15
+ /**
16
+ * Derives the source type from a document path.
17
+ * - Paths starting with "wiki/" → "wiki"
18
+ * - Paths starting with "memory/" or "memories/" → "memory"
19
+ * - Everything else → "chunk"
20
+ */
21
+ export function deriveSourceType(path) {
22
+ const normalized = (path ?? "").replace(/\\/g, "/");
23
+ if (normalized.startsWith("wiki/")) return "wiki";
24
+ if (normalized.startsWith("memory/") || normalized.startsWith("memories/")) return "memory";
25
+ return "chunk";
26
+ }
27
+
15
28
  function escapeFtsQuery(query) {
16
29
  return query
17
30
  .trim()
@@ -24,6 +37,44 @@ function escapeFtsQuery(query) {
24
37
  /** Weight factor for backlink count in search ranking (PageRank-lite). */
25
38
  const BACKLINK_WEIGHT = 0.5;
26
39
 
40
+ /**
41
+ * Additive boost applied to FTS5 BM25 score when a query term appears as a
42
+ * substring of the document's path. BM25 scores in SQLite FTS5 are negative
43
+ * (more negative = more relevant), so we SUBTRACT this value to push path
44
+ * matches ahead. Addresses the case where a query term matches a module/file
45
+ * name but the chunk body has only sparse mentions: a dense body chunk in an
46
+ * unrelated file can have a very negative BM25, so a multiplicative boost on
47
+ * the sparse-but-path-matching chunk's weaker score is insufficient. A flat
48
+ * additive bonus larger than the typical BM25 magnitude reliably promotes it.
49
+ */
50
+ const PATH_MATCH_BOOST = 20;
51
+
52
+ /**
53
+ * Overfetch multiplier for path-name boost re-ranking. We pull this many times
54
+ * the requested limit from FTS so that boosted rows below the BM25 cutoff can
55
+ * still be promoted into the top N.
56
+ */
57
+ const PATH_BOOST_OVERFETCH = 5;
58
+
59
+ /** Tokenize a free-text query the same way we want to match against paths:
60
+ * lowercase, split on non-word (underscores preserved). */
61
+ function tokenizeQueryForPath(query) {
62
+ return query
63
+ .toLowerCase()
64
+ .split(/[^\w]+/)
65
+ .filter(Boolean);
66
+ }
67
+
68
+ /** Returns true if any query term appears as a substring of the lowercased path. */
69
+ function pathMatchesQuery(path, terms) {
70
+ if (!path || terms.length === 0) return false;
71
+ const lowered = path.toLowerCase();
72
+ for (const term of terms) {
73
+ if (lowered.includes(term)) return true;
74
+ }
75
+ return false;
76
+ }
77
+
27
78
  /** Weight factor for average backlink confidence in search ranking. */
28
79
  const CONFIDENCE_WEIGHT = 0.3;
29
80
 
@@ -267,7 +318,11 @@ export class SqliteSearch {
267
318
  const sinceClause = since ? `AND d.indexed_at >= ?` : "";
268
319
  const sinceParams = since ? [new Date(since).getTime()] : [];
269
320
 
270
- const rows = this.#db
321
+ // Overfetch so the path-name boost can promote rows that sit below the
322
+ // raw BM25 cutoff. We re-rank in JS, then slice to the requested limit.
323
+ const fetchLimit = (limit + offset) * PATH_BOOST_OVERFETCH;
324
+
325
+ const rawRows = this.#db
271
326
  .prepare(`
272
327
  SELECT
273
328
  d.id,
@@ -277,7 +332,8 @@ export class SqliteSearch {
277
332
  SUBSTR(d.content, 1, 1000) AS raw_content,
278
333
  COALESCE(link_count.cnt, 0) AS backlink_count,
279
334
  COALESCE(ac.cnt, 0) AS access_count,
280
- COALESCE(link_conf.avg_conf, 0.5) AS avg_backlink_confidence
335
+ COALESCE(link_conf.avg_conf, 0.5) AS avg_backlink_confidence,
336
+ (f.rank - (COALESCE(link_count.cnt, 0) * ${BACKLINK_WEIGHT}) - (COALESCE(ac.cnt, 0) * ${SEARCH_ACCESS_WEIGHT}) - (COALESCE(link_conf.avg_conf, 0.5) * ${CONFIDENCE_WEIGHT})) AS composite_score
281
337
  FROM documents_fts f
282
338
  JOIN documents d ON d.id = f.id
283
339
  LEFT JOIN (
@@ -297,15 +353,29 @@ export class SqliteSearch {
297
353
  ) ac ON d.id = ac.doc_id
298
354
  WHERE documents_fts MATCH ?
299
355
  ${sinceClause}
300
- ORDER BY (f.rank - (COALESCE(link_count.cnt, 0) * ${BACKLINK_WEIGHT}) - (COALESCE(ac.cnt, 0) * ${SEARCH_ACCESS_WEIGHT}) - (COALESCE(link_conf.avg_conf, 0.5) * ${CONFIDENCE_WEIGHT}))
301
- LIMIT ? OFFSET ?
356
+ ORDER BY composite_score
357
+ LIMIT ?
302
358
  `)
303
- .all(escaped, ...sinceParams, limit, offset)
304
- .map((row) => {
305
- const body_excerpt = extractBodyExcerpt(row.raw_content ?? "");
306
- delete row.raw_content;
307
- return { ...row, body_excerpt };
308
- });
359
+ .all(escaped, ...sinceParams, fetchLimit);
360
+
361
+ // Path-name boost: if any query term appears in the path, multiply the
362
+ // (negative) composite score by PATH_MATCH_BOOST so it sorts higher.
363
+ const queryTerms = tokenizeQueryForPath(query);
364
+ for (const row of rawRows) {
365
+ row.boosted_score = pathMatchesQuery(row.path, queryTerms)
366
+ ? row.composite_score - PATH_MATCH_BOOST
367
+ : row.composite_score;
368
+ }
369
+ rawRows.sort((a, b) => a.boosted_score - b.boosted_score);
370
+
371
+ const rows = rawRows.slice(offset, offset + limit).map((row) => {
372
+ const body_excerpt = extractBodyExcerpt(row.raw_content ?? "");
373
+ const source_type = deriveSourceType(row.path);
374
+ delete row.raw_content;
375
+ delete row.composite_score;
376
+ delete row.boosted_score;
377
+ return { ...row, source_type, body_excerpt };
378
+ });
309
379
 
310
380
  const countRow = this.#db
311
381
  .prepare(
@@ -371,7 +441,7 @@ export class SqliteSearch {
371
441
  LIMIT ?
372
442
  `)
373
443
  .all(escaped, limit);
374
- allResults.push(...rows);
444
+ allResults.push(...rows.map((r) => ({ ...r, source_type: deriveSourceType(r.path) })));
375
445
  } finally {
376
446
  this.#db.prepare(`DETACH DATABASE ${attached}`).run();
377
447
  }
@@ -816,6 +886,83 @@ export class SqliteSearch {
816
886
  return row || null;
817
887
  }
818
888
 
889
+ /**
890
+ * List wiki articles with metadata (no full content).
891
+ * Optional FTS5 keyword filter.
892
+ * @param {object} opts
893
+ * @param {string|null} [opts.query] - Optional FTS5 query to filter articles
894
+ * @param {number} [opts.limit=50]
895
+ * @returns {{ articles: Array<{ path: string, title: string|null, description: string|null, tags: string[], word_count: number }> }}
896
+ */
897
+ wikiList({ query = null, limit = 50 } = {}) {
898
+ let rows;
899
+
900
+ if (query) {
901
+ const escaped = escapeFtsQuery(query);
902
+ if (!escaped) return { articles: [] };
903
+
904
+ rows = this.#db.prepare(`
905
+ SELECT d.path, d.frontmatter, d.content
906
+ FROM documents_fts f
907
+ JOIN documents d ON d.id = f.id
908
+ WHERE documents_fts MATCH ?
909
+ AND d.path LIKE 'wiki/%'
910
+ ORDER BY rank
911
+ LIMIT ?
912
+ `).all(escaped, limit);
913
+ } else {
914
+ rows = this.#db.prepare(`
915
+ SELECT path, frontmatter, content
916
+ FROM documents
917
+ WHERE path LIKE 'wiki/%'
918
+ ORDER BY path
919
+ LIMIT ?
920
+ `).all(limit);
921
+ }
922
+
923
+ const articles = rows.map((row) => {
924
+ const fm = row.frontmatter || SqliteSearch.#extractFrontmatter(row.content) || "";
925
+ const title = this.#extractFrontmatterField(fm, "title") || null;
926
+ const description = this.#extractFrontmatterField(fm, "description") || null;
927
+ const tags = this.#parseTags(fm);
928
+ const word_count = (row.content || "").split(/\s+/).filter(Boolean).length;
929
+ return { path: row.path, title, description, tags, word_count };
930
+ });
931
+
932
+ return { articles };
933
+ }
934
+
935
+ /**
936
+ * Parse tags from frontmatter string.
937
+ * Supports space-separated inline, JSON array, and YAML block list formats.
938
+ */
939
+ #parseTags(fm) {
940
+ if (!fm) return [];
941
+
942
+ // Inline: tags: tag1 tag2 tag3 or tags: ["tag1","tag2"]
943
+ const inlineMatch = fm.match(/^tags:[ \t]+(\S.*)$/m);
944
+ if (inlineMatch) {
945
+ const raw = inlineMatch[1].trim();
946
+ if (raw.startsWith("[")) {
947
+ try {
948
+ return JSON.parse(raw).map(String);
949
+ } catch {
950
+ return raw.replace(/[\[\]"]/g, "").split(/[\s,]+/).filter(Boolean);
951
+ }
952
+ }
953
+ return raw.split(/\s+/).filter(Boolean);
954
+ }
955
+
956
+ // YAML block list
957
+ const blockMatch = fm.match(/^tags:\s*\n((?:\s+-\s+.+\n?)+)/m);
958
+ if (blockMatch) {
959
+ const listLines = blockMatch[1].match(/^\s+-\s+(.+)$/gm) || [];
960
+ return listLines.map((line) => line.replace(/^\s+-\s+/, "").trim()).filter(Boolean);
961
+ }
962
+
963
+ return [];
964
+ }
965
+
819
966
  close() {
820
967
  this.#db.close();
821
968
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wicked-brain-server",
3
- "version": "0.8.1",
3
+ "version": "0.9.0",
4
4
  "type": "module",
5
5
  "description": "SQLite FTS5 search server for wicked-brain digital knowledge bases",
6
6
  "keywords": [
@@ -323,12 +323,14 @@ async function ingestFile(filePath) {
323
323
  // Note: These keywords are for FTS indexing. The LLM-based ingest
324
324
  // generates richer synonym-expanded tags in the contains: field.
325
325
  // This batch script extracts basic keywords only.
326
+ // Replace non-word chars with space (not empty) so adjacent tokens don't glue.
327
+ // Preserve underscores so snake_case identifiers survive. Floor at 4 chars so
328
+ // short domain terms like 'task', 'hook', 'crew' aren't dropped.
329
+ const cleaned = chunks[i].toLowerCase().replace(/[^a-z0-9_\s-]/g, " ");
330
+ const tokens = cleaned.split(/\s+/).filter(Boolean);
326
331
  const keywords = [...new Set(
327
- chunks[i].toLowerCase()
328
- .replace(/[^a-z0-9\s-]/g, "")
329
- .split(/\s+/)
330
- .filter(w => w.length > 5 && !STOP.has(w))
331
- )].slice(0, 10);
332
+ tokens.filter(w => w.length >= 4 && !STOP.has(w))
333
+ )].slice(0, 12);
332
334
 
333
335
  const frontmatter = [
334
336
  "---",