claude-mem-lite 2.0.13 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  // Runs at Stop hook to track adoption and outcomes of recommendations
3
3
 
4
4
  import { getSessionInvocations, updateInvocation, updateResourceStats } from './registry.mjs';
5
- import { debugCatch } from './utils.mjs';
5
+ import { debugCatch, EDIT_TOOLS } from './utils.mjs';
6
6
 
7
7
  // ─── Adoption Detection ──────────────────────────────────────────────────────
8
8
 
@@ -100,6 +100,30 @@ function detectAdoption(invocation, sessionEvents) {
100
100
  }
101
101
  }
102
102
 
103
+ // Behavioral adoption: detect usage patterns matching the recommended resource
104
+ const resourceLower = resource_name.toLowerCase();
105
+
106
+ // Debugging pattern: Read→Bash(error)→Read→Edit cycle
107
+ if (resourceLower.includes('debug') || resourceLower.includes('troubleshoot')) {
108
+ let hasRead = false, hasBashError = false, hasEditAfterError = false;
109
+ for (const e of sessionEvents) {
110
+ if (e.tool_name === 'Read') hasRead = true;
111
+ if (e.tool_name === 'Bash' && /error|fail|exception/i.test(e.tool_response || '')) hasBashError = true;
112
+ if (hasBashError && EDIT_TOOLS.has(e.tool_name)) hasEditAfterError = true;
113
+ }
114
+ if (hasRead && hasBashError && hasEditAfterError) return true;
115
+ }
116
+
117
+ // Code review pattern: Agent with 'review' in prompt/description
118
+ if (resourceLower.includes('review')) {
119
+ for (const e of sessionEvents) {
120
+ if (e.tool_name === 'Agent') {
121
+ const text = ((e.tool_input?.prompt || '') + (e.tool_input?.description || '')).toLowerCase();
122
+ if (text.includes('review')) return true;
123
+ }
124
+ }
125
+ }
126
+
103
127
  return false;
104
128
  }
105
129
 
@@ -127,7 +151,7 @@ function detectOutcome(sessionEvents) {
127
151
  lastErrorIndex = i;
128
152
  }
129
153
 
130
- if (['Edit', 'Write', 'NotebookEdit'].includes(e.tool_name)) {
154
+ if (EDIT_TOOLS.has(e.tool_name)) {
131
155
  hasEdit = true;
132
156
  if (lastErrorIndex >= 0 && i > lastErrorIndex) {
133
157
  errorThenFix = true;
@@ -187,3 +211,6 @@ export async function collectFeedback(db, sessionId, sessionEvents = []) {
187
211
  debugCatch(e, 'collectFeedback');
188
212
  }
189
213
  }
214
+
215
+ // Test exports
216
+ export { detectAdoption as _detectAdoption };
@@ -9,6 +9,13 @@ import { DB_DIR } from './schema.mjs';
9
9
 
10
10
  const MAX_INJECTION_CHARS = 3000;
11
11
 
12
+ /** Truncate multi-line content preserving newlines (unlike utils.truncate which flattens). */
13
+ function truncateContent(str, max) {
14
+ if (!str) return '';
15
+ const trimmed = str.trim();
16
+ return trimmed.length > max ? trimmed.slice(0, max - 1) + '…' : trimmed;
17
+ }
18
+
12
19
  // Allowed base directories for resource file reads (defense-in-depth)
13
20
  const ALLOWED_BASES = [
14
21
  join(homedir(), '.claude'),
@@ -79,7 +86,7 @@ function injectSkillManaged(resource) {
79
86
  } catch {}
80
87
  }
81
88
 
82
- const truncatedContent = truncate(content, MAX_INJECTION_CHARS - 300);
89
+ const truncatedContent = truncateContent(content, MAX_INJECTION_CHARS - 300);
83
90
 
84
91
  return `[Auto-suggestion] Recommended skill for this task: "${resource.name}"
85
92
  Capability: ${truncate(resource.capability_summary, 100)}
@@ -113,7 +120,7 @@ function injectAgent(resource) {
113
120
  }
114
121
 
115
122
  if (agentDef) {
116
- const truncatedDef = truncate(agentDef, MAX_INJECTION_CHARS - 300);
123
+ const truncatedDef = truncateContent(agentDef, MAX_INJECTION_CHARS - 300);
117
124
  return `[Auto-suggestion] A specialized agent "${resource.name}" is recommended for this task.
118
125
  Capability: ${truncate(resource.capability_summary, 100)}
119
126
  Use the Agent tool with this agent definition:
package/dispatch.mjs CHANGED
@@ -6,7 +6,7 @@
6
6
 
7
7
  import { basename, join } from 'path';
8
8
  import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs';
9
- import { retrieveResources, buildEnhancedQuery, buildQueryFromText } from './registry-retriever.mjs';
9
+ import { retrieveResources, buildEnhancedQuery, buildQueryFromText, DISPATCH_SYNONYMS } from './registry-retriever.mjs';
10
10
  import { renderInjection } from './dispatch-inject.mjs';
11
11
  import { updateResourceStats, recordInvocation } from './registry.mjs';
12
12
  import { callHaikuJSON } from './haiku-client.mjs';
@@ -199,6 +199,7 @@ export function extractContextSignals(event, sessionCtx = {}) {
199
199
  intent: '', // comma-separated intent tags, primary first
200
200
  primaryIntent: '', // first/strongest intent (for column-targeted queries)
201
201
  suppressedIntents: [], // intents detected but actively suppressed (e.g. test-run)
202
+ rawKeywords: [], // domain-specific keywords not captured by intent patterns (e.g. "seo")
202
203
  techStack: '',
203
204
  action: '',
204
205
  errorDomain: '',
@@ -210,6 +211,11 @@ export function extractContextSignals(event, sessionCtx = {}) {
210
211
  signals.intent = intent;
211
212
  signals.suppressedIntents = suppressed;
212
213
  signals.primaryIntent = signals.intent.split(',')[0] || '';
214
+ // Extract raw domain keywords not captured by intent patterns.
215
+ // Intent patterns cover generic actions (test, fix, review) but miss domain
216
+ // topics (seo, kubernetes, oauth). These raw keywords supplement the enhanced
217
+ // query to ensure domain-specific resources are found.
218
+ signals.rawKeywords = extractRawKeywords(sessionCtx.userPrompt, signals.intent);
213
219
  }
214
220
 
215
221
  // Infer tech stack from recent files, current tool_input, or prompt text
@@ -263,7 +269,7 @@ const _WRITE_TEST_CJK = /(?:写测试|加测试|补测试|补单测|缺测试|
263
269
  * @returns {string} Comma-separated intent tags, primary intent listed first (e.g. "test,fix")
264
270
  */
265
271
  function extractIntent(prompt) {
266
- if (!prompt) return '';
272
+ if (!prompt) return { intent: '', suppressed: [] };
267
273
  // English patterns — use trailing-optional boundaries for verb conjugations:
268
274
  // \b prefix ensures word start, but many suffixed forms (debugging, refactoring, deployed)
269
275
  // fail with trailing \b. Use \b...\w* for words that commonly have suffixes.
@@ -366,7 +372,53 @@ function extractIntent(prompt) {
366
372
  }
367
373
 
368
374
  /** Exported for testing. */
369
- export { NEGATION_EN as _NEGATION_EN, NEGATION_CJK as _NEGATION_CJK };
375
+ export { NEGATION_EN as _NEGATION_EN, NEGATION_CJK as _NEGATION_CJK, reRankByKeywords as _reRankByKeywords, applyAdoptionDecay as _applyAdoptionDecay, passesConfidenceGate as _passesConfidenceGate };
376
+
377
+ // Stop words for raw keyword extraction.
378
+ // Includes common English stop words + action verbs already covered by intent patterns.
379
+ // Domain-specific technical terms (seo, kubernetes, react, etc.) pass through.
380
+ const RAW_KW_STOP = new Set([
381
+ // Standard English stop words
382
+ 'the', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had',
383
+ 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might',
384
+ 'can', 'shall', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by',
385
+ 'from', 'as', 'into', 'about', 'and', 'or', 'but', 'not', 'no', 'this',
386
+ 'that', 'it', 'its', 'my', 'your', 'me', 'us', 'you', 'he', 'she', 'we', 'they',
387
+ 'if', 'so', 'just', 'also', 'then', 'how', 'what', 'when', 'where', 'who',
388
+ 'use', 'using', 'need', 'want', 'check', 'look', 'help', 'please', 'let',
389
+ 'some', 'all', 'any', 'each', 'every', 'new', 'like', 'before', 'after',
390
+ // Action verbs — captured by intent patterns, not domain keywords
391
+ 'design', 'build', 'create', 'make', 'add', 'remove', 'delete', 'update',
392
+ 'write', 'read', 'run', 'test', 'tests', 'testing', 'fix', 'debug',
393
+ 'review', 'deploy', 'commit', 'push', 'plan', 'clean', 'refactor',
394
+ 'find', 'get', 'set', 'show', 'list', 'change', 'move', 'copy', 'send',
395
+ 'start', 'stop', 'open', 'close', 'save', 'load', 'install', 'setup',
396
+ 'implement', 'configure', 'code', 'file', 'function', 'module', 'app',
397
+ ]);
398
+
399
+ /**
400
+ * Extract raw domain keywords from prompt text that aren't captured by intent patterns.
401
+ * Handles embedded English words in CJK text (e.g. "seo" from "用seo技能检查下").
402
+ * Filters out words already covered by extracted intents to avoid duplication.
403
+ * @param {string} prompt User prompt text
404
+ * @param {string} intentStr Comma-separated intents already extracted
405
+ * @returns {string[]} Array of raw keywords (max 5)
406
+ */
407
+ function extractRawKeywords(prompt, intentStr) {
408
+ if (!prompt) return [];
409
+ // Extract all English words (2+ chars) from the prompt
410
+ const words = prompt.match(/[a-zA-Z]{2,}/gi) || [];
411
+ const intentSet = new Set((intentStr || '').split(',').filter(Boolean));
412
+ const seen = new Set();
413
+ const result = [];
414
+ for (const w of words) {
415
+ const lower = w.toLowerCase();
416
+ if (lower.length < 2 || RAW_KW_STOP.has(lower) || intentSet.has(lower) || seen.has(lower)) continue;
417
+ seen.add(lower);
418
+ result.push(lower);
419
+ }
420
+ return result.slice(0, 5);
421
+ }
370
422
 
371
423
  /**
372
424
  * Infer tech stack from file extensions.
@@ -602,6 +654,91 @@ export function isRecentlyRecommended(db, resourceId, sessionId) {
602
654
  return !!cooldownHit;
603
655
  }
604
656
 
657
+ // ─── Keyword Re-ranking ──────────────────────────────────────────────────────
658
+
659
+ /**
660
+ * Re-rank results to prefer resources matching rawKeywords in their intent_tags.
661
+ * When a user mentions domain-specific terms (e.g. "seo"), resources in that domain
662
+ * should rank above generic resources that only match the action intent (e.g. "review").
663
+ * Within each group (matching vs non-matching), original BM25 order is preserved.
664
+ * No-op when rawKeywords is empty.
665
+ * @param {object[]} results FTS5 results
666
+ * @param {string[]} rawKeywords Domain keywords from prompt
667
+ * @returns {object[]} Re-ranked results
668
+ */
669
+ function reRankByKeywords(results, rawKeywords) {
670
+ if (!rawKeywords?.length || results.length <= 1) return results;
671
+ const matching = [];
672
+ const rest = [];
673
+ for (const r of results) {
674
+ const tags = (r.intent_tags || '').toLowerCase();
675
+ if (rawKeywords.some(kw => tags.includes(kw))) {
676
+ matching.push(r);
677
+ } else {
678
+ rest.push(r);
679
+ }
680
+ }
681
+ return [...matching, ...rest];
682
+ }
683
+
684
+ /**
685
+ * Apply adoption-rate-based score decay to penalize zombie resources.
686
+ * Uses Laplace-smoothed adoption rate with tiered multipliers.
687
+ * Cold start protection: no penalty for recommend_count < 10.
688
+ * @param {object[]} results FTS5 results with recommend_count/adopt_count
689
+ * @returns {object[]} Filtered results with decayed scores
690
+ */
691
+ function applyAdoptionDecay(results) {
692
+ return results.map(r => {
693
+ const recs = r.recommend_count || 0;
694
+ const adopts = r.adopt_count || 0;
695
+ if (recs < 10) return r; // Cold start protection
696
+
697
+ const rate = (adopts + 1) / (recs + 2); // Laplace smoothing
698
+ let multiplier = 1.0;
699
+ if (recs > 100 && rate < 0.01) multiplier = 0; // Block entirely
700
+ else if (recs > 50 && rate < 0.02) multiplier = 0.1; // Heavy penalty
701
+ else if (recs > 20 && rate < 0.05) multiplier = 0.3; // Light penalty
702
+
703
+ if (multiplier === 0) return null;
704
+ if (multiplier < 1) {
705
+ return { ...r, relevance: r.relevance * multiplier, _decayed: true };
706
+ }
707
+ return r;
708
+ }).filter(Boolean);
709
+ }
710
+
711
+ /**
712
+ * Gate results by confidence: require at least one intent signal
713
+ * to directly match the resource's intent_tags.
714
+ * Prevents recommendations based solely on incidental text overlap.
715
+ * @param {object[]} results FTS5 results
716
+ * @param {object} signals Context signals with intent and rawKeywords arrays
717
+ * @returns {object[]} Filtered results that pass the gate
718
+ */
719
+ function passesConfidenceGate(results, signals) {
720
+ // signals.intent is a comma-separated string (e.g. "test,fix"), not an array
721
+ const intentTokens = typeof signals?.intent === 'string'
722
+ ? signals.intent.split(',').filter(Boolean)
723
+ : Array.isArray(signals?.intent) ? signals.intent : [];
724
+
725
+ // No structured intent → skip gate (rawKeywords match FTS5 text columns, not intent_tags)
726
+ if (intentTokens.length === 0) return results;
727
+
728
+ // Expand intent tokens through DISPATCH_SYNONYMS so "fast" also matches "performance", etc.
729
+ const rawKw = signals?.rawKeywords || [];
730
+ const intentSet = new Set([...intentTokens, ...rawKw]);
731
+ for (const token of intentTokens) {
732
+ const syns = DISPATCH_SYNONYMS[token];
733
+ if (syns) for (const s of syns) intentSet.add(s);
734
+ }
735
+
736
+ return results.filter(r => {
737
+ const tags = (r.intent_tags || '').toLowerCase().split(/[\s,]+/).filter(Boolean);
738
+ return tags.some(t => intentSet.has(t));
739
+ });
740
+ }
741
+
605
742
  // ─── Main Dispatch Functions ─────────────────────────────────────────────────
606
743
 
607
744
  /**
@@ -621,7 +758,10 @@ export async function dispatchOnSessionStart(db, userPrompt, sessionId) {
621
758
  const signals = extractContextSignals({ tool_name: '_session_start' }, { userPrompt });
622
759
  const enhancedQuery = buildEnhancedQuery(signals);
623
760
 
624
- let results = enhancedQuery ? retrieveResources(db, enhancedQuery, { limit: 3, projectDomains }) : [];
761
+ // Fetch extra results when rawKeywords present BM25 may rank intent-matching
762
+ // resources above domain-specific ones; extra headroom lets reRankByKeywords promote them.
763
+ const fetchLimit = signals.rawKeywords.length > 0 ? 8 : 3;
764
+ let results = enhancedQuery ? retrieveResources(db, enhancedQuery, { limit: fetchLimit, projectDomains }) : [];
625
765
 
626
766
  // Fallback: broad text query (catches prompts without clear intent patterns)
627
767
  if (results.length === 0) {
@@ -637,6 +777,11 @@ export async function dispatchOnSessionStart(db, userPrompt, sessionId) {
637
777
  }
638
778
  }
639
779
 
780
+ results = reRankByKeywords(results, signals.rawKeywords);
781
+ results = applyAdoptionDecay(results);
782
+ results = passesConfidenceGate(results, signals);
783
+ results = results.slice(0, 3);
784
+
640
785
  let tier = 2;
641
786
 
642
787
  // Tier 3: Haiku semantic fallback (SessionStart has 10s budget)
@@ -702,7 +847,12 @@ export async function dispatchOnUserPrompt(db, userPrompt, sessionId) {
702
847
  const signals = extractContextSignals({ tool_name: '_user_prompt' }, { userPrompt });
703
848
  const enhancedQuery = buildEnhancedQuery(signals);
704
849
 
705
- let results = enhancedQuery ? retrieveResources(db, enhancedQuery, { limit: 3, projectDomains }) : [];
850
+ // Fetch extra results when rawKeywords are present the top-3 by BM25 may be
851
+ // dominated by intent synonyms (e.g. "review" expands to many code-review terms),
852
+ // pushing domain-specific resources (e.g. SEO) below the limit. Extra headroom
853
+ // lets reRankByKeywords() promote domain-matched resources to the top.
854
+ const fetchLimit = signals.rawKeywords.length > 0 ? 8 : 3;
855
+ let results = enhancedQuery ? retrieveResources(db, enhancedQuery, { limit: fetchLimit, projectDomains }) : [];
706
856
 
707
857
  // Fallback: broad text query
708
858
  if (results.length === 0) {
@@ -717,10 +867,25 @@ export async function dispatchOnUserPrompt(db, userPrompt, sessionId) {
717
867
  }
718
868
  }
719
869
 
870
+ // Re-rank: when rawKeywords are present, prefer resources whose intent_tags
871
+ // match those keywords. "帮我做一下SEO审查" → rawKeywords=["seo"] → SEO audit
872
+ // resources should rank above generic code-review resources.
873
+ results = reRankByKeywords(results, signals.rawKeywords);
874
+ results = applyAdoptionDecay(results);
875
+ results = passesConfidenceGate(results, signals);
876
+ results = results.slice(0, 3);
877
+
720
878
  if (results.length === 0) return null;
721
879
 
722
- // Skip if low confidence (no Haiku fallback — stay fast)
723
- if (needsHaikuDispatch(results)) return null;
880
+ // Skip if low confidence (no Haiku fallback — stay fast).
881
+ // Exception: when results match the user's raw domain keywords (e.g. "seo"),
882
+ // close BM25 scores indicate "multiple equally good options in the right domain"
883
+ // rather than "ambiguous/wrong match". Trust the domain match.
884
+ if (needsHaikuDispatch(results)) {
885
+ const hasKeywordMatch = signals.rawKeywords?.length > 0 && results.some(r =>
886
+ signals.rawKeywords.some(kw => (r.intent_tags || '').toLowerCase().includes(kw)));
887
+ if (!hasKeywordMatch) return null;
888
+ }
724
889
 
725
890
  // Filter by cooldown + session dedup (prevents double-recommend with SessionStart)
726
891
  const viable = sessionId
@@ -769,7 +934,9 @@ export async function dispatchOnPreToolUse(db, event, sessionCtx = {}) {
769
934
  const projectDomains = detectProjectDomains();
770
935
 
771
936
  // Tier 2: FTS5 retrieval
772
- const results = retrieveResources(db, query, { limit: 3, projectDomains });
937
+ let results = retrieveResources(db, query, { limit: 3, projectDomains });
938
+ results = applyAdoptionDecay(results);
939
+ results = passesConfidenceGate(results, signals);
773
940
  if (results.length === 0) return null;
774
941
 
775
942
  const tier = 2; // Tier 3 disabled for PreToolUse — 2s hook timeout insufficient
package/hook-episode.mjs CHANGED
@@ -3,7 +3,7 @@
3
3
 
4
4
  import { join } from 'path';
5
5
  import { readFileSync, writeFileSync, unlinkSync, readdirSync, openSync, closeSync, writeSync, renameSync, statSync, constants as fsConstants } from 'fs';
6
- import { inferProject } from './utils.mjs';
6
+ import { inferProject, EDIT_TOOLS } from './utils.mjs';
7
7
  import { RUNTIME_DIR } from './hook-shared.mjs';
8
8
 
9
9
  /**
@@ -216,7 +216,7 @@ export function mergePendingEntries(episode) {
216
216
  */
217
217
  export function episodeHasSignificantContent(episode) {
218
218
  return episode.entries.some(e =>
219
- ['Edit', 'Write', 'NotebookEdit'].includes(e.tool) ||
219
+ EDIT_TOOLS.has(e.tool) ||
220
220
  (e.tool === 'Bash' && e.isError)
221
221
  );
222
222
  }
package/hook-llm.mjs CHANGED
@@ -6,7 +6,7 @@ import { existsSync, readFileSync, unlinkSync, readdirSync } from 'fs';
6
6
  import {
7
7
  jaccardSimilarity, truncate, clampImportance, computeRuleImportance,
8
8
  inferProject, parseJsonFromLLM,
9
- computeMinHash, estimateJaccardFromMinHash, debugCatch, debugLog,
9
+ computeMinHash, estimateJaccardFromMinHash, cjkBigrams, EDIT_TOOLS, debugCatch, debugLog,
10
10
  } from './utils.mjs';
11
11
  import { acquireLLMSlot, releaseLLMSlot } from './hook-semaphore.mjs';
12
12
  import {
@@ -16,6 +16,14 @@ import {
16
16
 
17
17
  // ─── Save Observation to DB ─────────────────────────────────────────────────
18
18
 
19
+ /** Build the FTS5 text field from observation data (concepts + facts + CJK bigrams). */
20
+ function buildFtsTextField(obs) {
21
+ const conceptsText = Array.isArray(obs.concepts) ? obs.concepts.join(' ') : '';
22
+ const factsText = Array.isArray(obs.facts) ? obs.facts.join(' ') : '';
23
+ const bigramText = cjkBigrams((obs.title || '') + ' ' + (obs.narrative || ''));
24
+ return { conceptsText, factsText, textField: [conceptsText, factsText, bigramText].filter(Boolean).join(' ') };
25
+ }
26
+
19
27
  export function saveObservation(obs, projectOverride, sessionIdOverride, externalDb) {
20
28
  const db = externalDb || openDb();
21
29
  if (!db) return null;
@@ -58,9 +66,7 @@ export function saveObservation(obs, projectOverride, sessionIdOverride, externa
58
66
  }
59
67
  }
60
68
 
61
- const conceptsText = Array.isArray(obs.concepts) ? obs.concepts.join(' ') : '';
62
- const factsText = Array.isArray(obs.facts) ? obs.facts.join(' ') : '';
63
- const textField = [conceptsText, factsText].filter(Boolean).join(' ');
69
+ const { conceptsText, factsText, textField } = buildFtsTextField(obs);
64
70
 
65
71
  const result = db.prepare(`
66
72
  INSERT INTO observations (memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts, files_read, files_modified, importance, minhash_sig, created_at, created_at_epoch)
@@ -160,10 +166,10 @@ function linkRelatedObservations(db, savedId, obs, episode) {
160
166
  // When LLM is unavailable, build a readable title from episode metadata
161
167
  // instead of using raw makeEntryDesc output (which contains JSON stdout).
162
168
 
163
- function buildDegradedTitle(episode) {
169
+ export function buildDegradedTitle(episode) {
164
170
  const files = (episode.files || []).filter(Boolean);
165
171
  const hasError = episode.entries.some(e => e.isError);
166
- const hasEdit = episode.entries.some(e => ['Edit', 'Write', 'NotebookEdit'].includes(e.tool));
172
+ const hasEdit = episode.entries.some(e => EDIT_TOOLS.has(e.tool));
167
173
 
168
174
  if (files.length > 0) {
169
175
  const names = files.map(f => basename(f)).slice(0, 3).join(', ');
@@ -270,8 +276,14 @@ importance: 1=routine, 2=notable (error fix, arch decision, config change), 3=cr
270
276
 
271
277
  if (!obs) {
272
278
  if (!gotSlot) debugLog('WARN', 'llm-episode', 'semaphore timeout, using degraded storage');
279
+ // If pre-saved observation exists, LLM degraded mode doesn't need to overwrite — keep pre-saved data
280
+ if (episode.savedId) {
281
+ debugLog('DEBUG', 'llm-episode', `LLM failed but pre-saved obs #${episode.savedId} exists, keeping`);
282
+ try { unlinkSync(tmpFile); } catch {}
283
+ return;
284
+ }
273
285
  const hasError = episode.entries.some(e => e.isError);
274
- const hasEdit = episode.entries.some(e => ['Edit', 'Write', 'NotebookEdit'].includes(e.tool));
286
+ const hasEdit = episode.entries.some(e => EDIT_TOOLS.has(e.tool));
275
287
  const inferredType = hasError ? 'bugfix' : hasEdit ? 'change' : 'discovery';
276
288
  obs = {
277
289
  type: inferredType,
@@ -290,7 +302,30 @@ importance: 1=routine, 2=notable (error fix, arch decision, config change), 3=cr
290
302
  if (!db) { try { unlinkSync(tmpFile); } catch {} return; }
291
303
 
292
304
  try {
293
- const savedId = saveObservation(obs, episode.project, episode.sessionId, db);
305
+ let savedId;
306
+
307
+ if (episode.savedId && obs) {
308
+ // Upgrade pre-saved observation with LLM-enriched data
309
+ const { conceptsText, factsText, textField } = buildFtsTextField(obs);
310
+ const minhashSig = computeMinHash((obs.title || '') + ' ' + (obs.narrative || ''));
311
+ db.prepare(`
312
+ UPDATE observations SET type=?, title=?, subtitle=?, narrative=?, concepts=?, facts=?,
313
+ text=?, importance=?, files_read=?, minhash_sig=?
314
+ WHERE id = ?
315
+ `).run(
316
+ obs.type, truncate(obs.title, 120), obs.subtitle || '',
317
+ truncate(obs.narrative || '', 500),
318
+ conceptsText, factsText, textField,
319
+ obs.importance,
320
+ JSON.stringify(obs.filesRead || []),
321
+ minhashSig,
322
+ episode.savedId
323
+ );
324
+ savedId = episode.savedId;
325
+ debugLog('DEBUG', 'llm-episode', `upgraded pre-saved obs #${savedId}`);
326
+ } else {
327
+ savedId = saveObservation(obs, episode.project, episode.sessionId, db);
328
+ }
294
329
 
295
330
  if (savedId) {
296
331
  try {
@@ -0,0 +1,68 @@
1
+ // claude-mem-lite — Semantic Memory Injection
2
+ // Search past observations for relevant memories to inject as context at user-prompt time.
3
+
4
+ import { sanitizeFtsQuery, debugCatch } from './utils.mjs';
5
+
6
+ const MAX_MEMORY_INJECTIONS = 2;
7
+ const MEMORY_LOOKBACK_MS = 14 * 86400000; // 14 days
8
+ const MEMORY_TYPE_BOOST = { bugfix: 1.5, decision: 1.3, discovery: 1.0, feature: 0.8, change: 0.5, refactor: 0.5 };
9
+
10
+ /**
11
+ * Search for relevant past observations to inject as memory context.
12
+ * Strict quality gates: importance>=2, type-boosted, BM25-thresholded.
13
+ * @param {import('better-sqlite3').Database} db Memory database
14
+ * @param {string} userPrompt User's prompt text
15
+ * @param {string} project Current project
16
+ * @param {number[]} excludeIds Observation IDs already in Key Context
17
+ * @returns {object[]} Top memories (max 2) with {id, type, title}
18
+ */
19
+ export function searchRelevantMemories(db, userPrompt, project, excludeIds = []) {
20
+ if (!db || !userPrompt || userPrompt.length < 5) return [];
21
+
22
+ try {
23
+ const ftsQuery = sanitizeFtsQuery(userPrompt);
24
+ if (!ftsQuery) return [];
25
+
26
+ const cutoff = Date.now() - MEMORY_LOOKBACK_MS;
27
+ const excludeSet = new Set(excludeIds);
28
+
29
+ const selectStmt = db.prepare(`
30
+ SELECT o.id, o.type, o.title, o.importance,
31
+ bm25(observations_fts) as relevance
32
+ FROM observations_fts
33
+ JOIN observations o ON o.id = observations_fts.rowid
34
+ WHERE observations_fts MATCH ?
35
+ AND o.project = ?
36
+ AND o.importance >= 2
37
+ AND o.created_at_epoch > ?
38
+ AND COALESCE(o.compressed_into, 0) = 0
39
+ ORDER BY bm25(observations_fts)
40
+ LIMIT 10
41
+ `);
42
+ const rows = selectStmt.all(ftsQuery, project, cutoff);
43
+
44
+ // Score: BM25 × type boost, filter by threshold, exclude Key Context IDs
45
+ const scored = rows
46
+ .filter(r => !excludeSet.has(r.id))
47
+ .map(r => ({
48
+ ...r,
49
+ score: Math.abs(r.relevance) * (MEMORY_TYPE_BOOST[r.type] || 1.0),
50
+ }))
51
+ .sort((a, b) => b.score - a.score);
52
+
53
+ // Strict threshold: only inject if best match has meaningful score
54
+ if (scored.length === 0 || scored[0].score < 1.0) return [];
55
+
56
+ // Update access_count for injected memories
57
+ const result = scored.slice(0, MAX_MEMORY_INJECTIONS);
58
+ const updateStmt = db.prepare('UPDATE observations SET access_count = COALESCE(access_count, 0) + 1 WHERE id = ?');
59
+ for (const r of result) {
60
+ updateStmt.run(r.id);
61
+ }
62
+
63
+ return result;
64
+ } catch (e) {
65
+ debugCatch(e, 'searchRelevantMemories');
66
+ return [];
67
+ }
68
+ }
package/hook-shared.mjs CHANGED
@@ -121,6 +121,18 @@ export function spawnBackground(bgEvent, ...extraArgs) {
121
121
 
122
122
  export function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
123
123
 
124
+ // ─── Injection Budget (per-session, in-memory) ──────────────────────────────
125
+ // Limits total context injections across all hooks to prevent context bloat.
126
+ // Reset at session-start. Each hook checks before injecting.
127
+
128
+ export const MAX_INJECTIONS_PER_SESSION = 3;
129
+ let _injectionCount = 0;
130
+
131
+ export function getInjectionCount() { return _injectionCount; }
132
+ export function incrementInjection() { _injectionCount++; }
133
+ export function resetInjectionBudget() { _injectionCount = 0; }
134
+ export function hasInjectionBudget() { return _injectionCount < MAX_INJECTIONS_PER_SESSION; }
135
+
124
136
  // ─── Tool Event Tracking (for dispatch feedback) ────────────────────────────
125
137
  // PostToolUse appends feedback-relevant tool events (Skill, Task, Edit, Write, Bash errors).
126
138
  // Stop handler reads them and passes to collectFeedback for adoption/outcome detection.
package/hook.mjs CHANGED
@@ -10,7 +10,7 @@ import { readFileSync, writeFileSync, unlinkSync, readdirSync, renameSync, statS
10
10
  import {
11
11
  truncate, typeIcon, inferProject, detectBashSignificance,
12
12
  extractErrorKeywords, extractFilePaths, isRelatedToEpisode,
13
- makeEntryDesc, scrubSecrets, debugCatch, debugLog, fmtTime,
13
+ makeEntryDesc, scrubSecrets, computeRuleImportance, EDIT_TOOLS, debugCatch, debugLog, fmtTime,
14
14
  } from './utils.mjs';
15
15
  import {
16
16
  readEpisodeRaw, episodeFile,
@@ -27,8 +27,10 @@ import {
27
27
  RESOURCE_RESCAN_INTERVAL_MS,
28
28
  sessionFile, getSessionId, createSessionId, openDb, getRegistryDb,
29
29
  closeRegistryDb, spawnBackground, appendToolEvent, readAndClearToolEvents,
30
+ resetInjectionBudget, hasInjectionBudget, incrementInjection,
30
31
  } from './hook-shared.mjs';
31
- import { handleLLMEpisode, handleLLMSummary } from './hook-llm.mjs';
32
+ import { handleLLMEpisode, handleLLMSummary, saveObservation, buildDegradedTitle } from './hook-llm.mjs';
33
+ import { searchRelevantMemories } from './hook-memory.mjs';
32
34
 
33
35
  // Prevent recursive hooks from background claude -p calls
34
36
  // Background workers (llm-episode, llm-summary, resource-scan) are exempt — they're ours
@@ -80,6 +82,32 @@ function flushEpisode(episode) {
80
82
  episode.filesRead = episode.filesRead || [];
81
83
  }
82
84
 
85
+ const isSignificant = episodeHasSignificantContent(episode);
86
+
87
+ // Immediate save: create rule-based observation for instant visibility.
88
+ // LLM background worker will upgrade title/narrative/importance later.
89
+ if (isSignificant) {
90
+ try {
91
+ const hasError = episode.entries.some(e => e.isError);
92
+ const hasEdit = episode.entries.some(e => EDIT_TOOLS.has(e.tool));
93
+ const inferredType = hasError ? 'bugfix' : hasEdit ? 'change' : 'discovery';
94
+ const fileList = (episode.files || []).map(f => basename(f)).join(', ') || '(multiple)';
95
+ const obs = {
96
+ type: inferredType,
97
+ title: truncate(buildDegradedTitle(episode), 120),
98
+ subtitle: fileList,
99
+ narrative: episode.entries.map(e => e.desc).join('; '),
100
+ concepts: [],
101
+ facts: [],
102
+ files: episode.files,
103
+ filesRead: episode.filesRead || [],
104
+ importance: computeRuleImportance(episode),
105
+ };
106
+ const id = saveObservation(obs, episode.project, episode.sessionId);
107
+ if (id) episode.savedId = id;
108
+ } catch (e) { debugCatch(e, 'flushEpisode-immediateSave'); }
109
+ }
110
+
83
111
  // Write episode to flush file, then remove buffer AFTER spawn to prevent race
84
112
  const flushFile = join(RUNTIME_DIR, `ep-flush-${Date.now()}-${randomUUID().slice(0, 8)}.json`);
85
113
  try {
@@ -88,7 +116,7 @@ function flushEpisode(episode) {
88
116
  return;
89
117
  }
90
118
 
91
- if (episodeHasSignificantContent(episode)) {
119
+ if (isSignificant) {
92
120
  spawnBackground('llm-episode', flushFile);
93
121
  } else {
94
122
  try { unlinkSync(flushFile); } catch {}
@@ -150,7 +178,7 @@ async function handlePostToolUse() {
150
178
  files,
151
179
  ts: Date.now(),
152
180
  isError: bashSig?.isError || false,
153
- isSignificant: ['Edit', 'Write', 'NotebookEdit'].includes(tool_name) ||
181
+ isSignificant: EDIT_TOOLS.has(tool_name) ||
154
182
  bashSig?.isSignificant || false,
155
183
  bashSig: bashSig || null,
156
184
  };
@@ -203,7 +231,7 @@ async function handlePostToolUse() {
203
231
  addFileToEpisode(episode, files);
204
232
 
205
233
  // Proactive file history: show past observations for files being edited
206
- if (['Edit', 'Write', 'NotebookEdit'].includes(tool_name) && files.length > 0) {
234
+ if (EDIT_TOOLS.has(tool_name) && files.length > 0) {
207
235
  const d = getDb();
208
236
  if (d) {
209
237
  for (const f of files) {
@@ -356,6 +384,8 @@ async function handleStop() {
356
384
  // ─── SessionStart Handler + CLAUDE.md Persistence (Tier 1 A, E) ─────────────
357
385
 
358
386
  async function handleSessionStart() {
387
+ resetInjectionBudget();
388
+
359
389
  // Flush any leftover episode buffer from previous session (e.g. after /clear)
360
390
  if (acquireLock()) {
361
391
  try {
@@ -636,11 +666,12 @@ async function handleSessionStart() {
636
666
  // Dispatch: recommend skill/agent based on session context
637
667
  try {
638
668
  const rdb = getRegistryDb();
639
- if (rdb) {
669
+ if (rdb && hasInjectionBudget()) {
640
670
  const promptCtx = latestSummary?.next_steps || '';
641
671
  const dispatchResult = await dispatchOnSessionStart(rdb, promptCtx, sessionId);
642
672
  if (dispatchResult) {
643
673
  process.stdout.write(dispatchResult + '\n');
674
+ incrementInjection();
644
675
  }
645
676
  }
646
677
  } catch (e) { debugCatch(e, 'handleSessionStart-dispatch'); }
@@ -693,9 +724,12 @@ async function handlePreToolUse() {
693
724
  }
694
725
  } catch {}
695
726
 
696
- const injection = await dispatchOnPreToolUse(rdb, hookData, sessionCtx);
697
- if (injection) {
698
- process.stdout.write(injection + '\n');
727
+ if (hasInjectionBudget()) {
728
+ const injection = await dispatchOnPreToolUse(rdb, hookData, sessionCtx);
729
+ if (injection) {
730
+ process.stdout.write(injection + '\n');
731
+ incrementInjection();
732
+ }
699
733
  }
700
734
  }
701
735
 
@@ -715,11 +749,12 @@ async function handleUserPrompt() {
715
749
  const db = openDb();
716
750
  if (!db) return;
717
751
 
752
+ const project = inferProject();
753
+
718
754
  try {
719
755
  const now = new Date();
720
756
 
721
757
  // Ensure session exists (INSERT OR IGNORE avoids race condition)
722
- const project = inferProject();
723
758
  db.prepare(`
724
759
  INSERT OR IGNORE INTO sdk_sessions (content_session_id, memory_session_id, project, started_at, started_at_epoch, status)
725
760
  VALUES (?, ?, ?, ?, ?, 'active')
@@ -738,6 +773,30 @@ async function handleUserPrompt() {
738
773
  counter?.prompt_counter || 1,
739
774
  now.toISOString(), now.getTime()
740
775
  );
776
+
777
+ // Semantic memory injection: search past observations for the user's prompt
778
+ if (hasInjectionBudget()) {
779
+ try {
780
+ const keyObs = db.prepare(`
781
+ SELECT id FROM observations
782
+ WHERE project = ? AND COALESCE(compressed_into, 0) = 0
783
+ AND COALESCE(importance, 1) >= 2
784
+ ORDER BY created_at_epoch DESC LIMIT 5
785
+ `).all(project);
786
+ const keyContextIds = keyObs.map(o => o.id);
787
+
788
+ const memories = searchRelevantMemories(db, promptText, project, keyContextIds);
789
+ if (memories.length > 0) {
790
+ const lines = ['<memory-context relevance="high">'];
791
+ for (const m of memories) {
792
+ lines.push(`- [${m.type}] ${truncate(m.title, 80)} (#${m.id})`);
793
+ }
794
+ lines.push('</memory-context>');
795
+ process.stdout.write(lines.join('\n') + '\n');
796
+ incrementInjection();
797
+ }
798
+ } catch (e) { debugCatch(e, 'handleUserPrompt-memory'); }
799
+ }
741
800
  } finally {
742
801
  db.close();
743
802
  }
@@ -749,10 +808,11 @@ async function handleUserPrompt() {
749
808
  // Cooldown + session dedup (invocations table) prevents double-recommending with SessionStart.
750
809
  try {
751
810
  const rdb = getRegistryDb();
752
- if (rdb) {
811
+ if (rdb && hasInjectionBudget()) {
753
812
  const result = await dispatchOnUserPrompt(rdb, promptText, sessionId);
754
813
  if (result) {
755
814
  process.stdout.write(result + '\n');
815
+ incrementInjection();
756
816
  }
757
817
  }
758
818
  } catch (e) { debugCatch(e, 'handleUserPrompt-dispatch'); }
package/install.mjs CHANGED
@@ -1212,7 +1212,7 @@ async function install() {
1212
1212
 
1213
1213
  const SOURCE_FILES = [
1214
1214
  'server.mjs', 'server-internals.mjs', 'tool-schemas.mjs',
1215
- 'hook.mjs', 'hook-shared.mjs', 'hook-llm.mjs',
1215
+ 'hook.mjs', 'hook-shared.mjs', 'hook-llm.mjs', 'hook-memory.mjs',
1216
1216
  'hook-semaphore.mjs', 'hook-episode.mjs', 'hook-context.mjs',
1217
1217
  'haiku-client.mjs', 'utils.mjs', 'schema.mjs', 'package.json', 'skill.md',
1218
1218
  'registry.mjs', 'registry-scanner.mjs', 'registry-indexer.mjs',
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "2.0.13",
3
+ "version": "2.1.1",
4
4
  "description": "Lightweight persistent memory system for Claude Code",
5
5
  "type": "module",
6
6
  "engines": {
@@ -28,6 +28,7 @@
28
28
  "hook.mjs",
29
29
  "hook-shared.mjs",
30
30
  "hook-llm.mjs",
31
+ "hook-memory.mjs",
31
32
  "hook-semaphore.mjs",
32
33
  "hook-episode.mjs",
33
34
  "hook-context.mjs",
@@ -5,7 +5,7 @@ import { debugCatch } from './utils.mjs';
5
5
 
6
6
  // ─── Domain Synonyms ─────────────────────────────────────────────────────────
7
7
 
8
- const DISPATCH_SYNONYMS = {
8
+ export const DISPATCH_SYNONYMS = {
9
9
  // English intent synonyms
10
10
  'clean': ['refactor', 'lint', 'format', 'organize', 'tidy', 'simplify', 'restructure', 'rewrite', 'smell', 'debt'],
11
11
  'test': ['testing', 'unittest', 'e2e', 'coverage', 'tdd', 'qa', 'spec', 'jest', 'vitest', 'pytest', 'mocha', 'cypress', 'playwright'],
@@ -190,6 +190,18 @@ export function buildEnhancedQuery(signals) {
190
190
  }
191
191
  }
192
192
 
193
+ // Raw keywords from prompt: domain-specific terms not captured by intent patterns.
194
+ // Added as column-targeted intent_tags + literal general match (no synonym expansion).
195
+ // Synonym expansion is harmful for rawKeywords: "database" expanding to ORM/SQL terms
196
+ // would dilute BM25 precision. Literal matching is sufficient — "seo" matches "seo"
197
+ // directly across name, intent_tags, capability_summary, trigger_patterns.
198
+ if (signals.rawKeywords?.length > 0) {
199
+ for (const kw of signals.rawKeywords) {
200
+ parts.push(`intent_tags:${kw}`);
201
+ parts.push(kw); // literal, no synonym expansion
202
+ }
203
+ }
204
+
193
205
  // Add general tokens (expanded with synonyms)
194
206
  for (const t of generalTokens) {
195
207
  parts.push(expandToken(t));
@@ -205,39 +217,46 @@ export function buildEnhancedQuery(signals) {
205
217
  * @param {string} text Raw text input
206
218
  * @returns {string|null} FTS5 query string or null
207
219
  */
220
+ const TEXT_QUERY_STOP_WORDS = new Set([
221
+ 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
222
+ 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
223
+ 'should', 'may', 'might', 'can', 'shall', 'to', 'of', 'in', 'for',
224
+ 'on', 'with', 'at', 'by', 'from', 'as', 'into', 'about', 'between',
225
+ 'after', 'before', 'above', 'below', 'and', 'or', 'but', 'not', 'no',
226
+ 'this', 'that', 'these', 'those', 'it', 'its', 'my', 'your', 'his',
227
+ 'her', 'our', 'their', 'me', 'him', 'us', 'them', 'i', 'you', 'he',
228
+ 'she', 'we', 'they', 'what', 'which', 'who', 'when', 'where', 'how',
229
+ 'all', 'each', 'every', 'both', 'few', 'more', 'most', 'other', 'some',
230
+ 'such', 'than', 'too', 'very', 'just', 'also', 'then', 'so', 'if',
231
+ '的', '了', '是', '在', '我', '有', '和', '就', '不', '人', '都',
232
+ '一', '一个', '上', '也', '这', '那', '你', '他', '她', '它', '们',
233
+ '把', '让', '给', '用', '来', '去', '做', '说', '要', '会', '能',
234
+ '帮', '帮我', '请', '下', '吧',
235
+ ]);
236
+
208
237
  export function buildQueryFromText(text) {
209
238
  if (!text || typeof text !== 'string') return null;
210
239
 
211
- const STOP_WORDS = new Set([
212
- 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
213
- 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
214
- 'should', 'may', 'might', 'can', 'shall', 'to', 'of', 'in', 'for',
215
- 'on', 'with', 'at', 'by', 'from', 'as', 'into', 'about', 'between',
216
- 'after', 'before', 'above', 'below', 'and', 'or', 'but', 'not', 'no',
217
- 'this', 'that', 'these', 'those', 'it', 'its', 'my', 'your', 'his',
218
- 'her', 'our', 'their', 'me', 'him', 'us', 'them', 'i', 'you', 'he',
219
- 'she', 'we', 'they', 'what', 'which', 'who', 'when', 'where', 'how',
220
- 'all', 'each', 'every', 'both', 'few', 'more', 'most', 'other', 'some',
221
- 'such', 'than', 'too', 'very', 'just', 'also', 'then', 'so', 'if',
222
- '的', '了', '是', '在', '我', '有', '和', '就', '不', '人', '都',
223
- '一', '一个', '上', '也', '这', '那', '你', '他', '她', '它', '们',
224
- '把', '让', '给', '用', '来', '去', '做', '说', '要', '会', '能',
225
- '帮', '帮我', '请', '下', '吧',
226
- ]);
227
-
228
240
  const cleaned = text.replace(/[{}()[\]^~*:@#$%&]/g, ' ').trim();
229
241
 
230
242
  // Extract CJK compound words before whitespace split (Chinese has no spaces)
231
243
  const cjkTokens = extractCJKTokens(cleaned);
232
244
 
245
+ // Extract embedded English words from mixed CJK/Latin text.
246
+ // Handles "用seo技能检查下网站的seo优化问题" → extracts "seo".
247
+ // Whitespace split fails here because CJK text has no spaces.
248
+ const embeddedEnTokens = (cleaned.match(/[a-zA-Z]{2,}/g) || [])
249
+ .map(w => w.toLowerCase());
250
+
233
251
  const wsTokens = cleaned.split(/\s+/)
234
- .filter(t => t.length > 1 && !STOP_WORDS.has(t.toLowerCase()) && !/^\d+$/.test(t));
252
+ .filter(t => t.length > 1 && !TEXT_QUERY_STOP_WORDS.has(t.toLowerCase()) && !/^\d+$/.test(t));
235
253
 
236
- // Merge: CJK tokens first (high signal), then whitespace tokens, deduplicated
254
+ // Merge: CJK tokens first (high signal), then embedded English, then whitespace tokens, deduplicated
237
255
  const seen = new Set();
238
256
  const tokens = [];
239
- for (const t of [...cjkTokens, ...wsTokens]) {
240
- if (!seen.has(t)) { seen.add(t); tokens.push(t); }
257
+ for (const t of [...cjkTokens, ...embeddedEnTokens, ...wsTokens]) {
258
+ const key = t.toLowerCase();
259
+ if (!seen.has(key) && !TEXT_QUERY_STOP_WORDS.has(key)) { seen.add(key); tokens.push(t); }
241
260
  }
242
261
  tokens.splice(8); // Limit to 8 most relevant tokens
243
262
 
package/server.mjs CHANGED
@@ -686,7 +686,10 @@ server.registerTool(
686
686
  const deletedIds = new Set(args.ids);
687
687
  const deleteTx = db.transaction(() => {
688
688
  // Clean up stale references in other observations' related_ids
689
- // Use LIKE filter to avoid O(N) full-table scan — only fetch rows that may reference deleted IDs
689
+ // Use LIKE filter to avoid O(N) full-table scan — only fetch rows that may reference deleted IDs.
690
+ // NOTE: LIKE %id% has false positives (e.g. %1% matches [10], [21]). This is intentional —
691
+ // the LIKE is a coarse pre-filter; the JSON parse + Set.has below is the precise filter.
692
+ // Acceptable because observation count per user is typically <10K.
690
693
  const likeConditions = args.ids.map(() => `related_ids LIKE ?`).join(' OR ');
691
694
  const likeParams = args.ids.map(id => `%${id}%`);
692
695
  const referencing = db.prepare(`
package/utils.mjs CHANGED
@@ -261,8 +261,8 @@ for (const [abbr, full] of SYNONYM_PAIRS) {
261
261
 
262
262
  // Format a term for FTS5: quote if it contains spaces, hyphens, or special chars
263
263
  function ftsToken(term) {
264
- // Bare tokens are safe only if purely alphanumeric
265
- if (/^[a-zA-Z0-9]+$/.test(term)) return term;
264
+ // Bare tokens are safe if purely alphanumeric or CJK characters
265
+ if (/^[a-zA-Z0-9\u4e00-\u9fff\u3400-\u4dbf]+$/.test(term)) return term;
266
266
  return `"${term.replace(/"/g, '""')}"`;
267
267
  }
268
268
 
@@ -292,10 +292,26 @@ export function sanitizeFtsQuery(query) {
292
292
  if (!cleaned) return null;
293
293
  const tokens = cleaned.split(/\s+/).filter(t => t && !/^-+$/.test(t) && !FTS5_KEYWORDS.has(t.toUpperCase()));
294
294
  if (tokens.length === 0) return null;
295
- const expanded = tokens.map(t => expandToken(t));
295
+ // Replace single CJK character tokens with bigrams for better phrase matching.
296
+ // Individual CJK chars ("系","统") are too noisy; bigrams ("系统") capture compound words.
297
+ const bigrams = cjkBigrams(cleaned);
298
+ const bigramSet = new Set(bigrams ? bigrams.split(' ').filter(Boolean) : []);
299
+ const hasBigrams = bigramSet.size > 0;
300
+ const finalTokens = [];
301
+ const seen = new Set();
302
+ for (const t of tokens) {
303
+ // Skip single CJK characters when we have bigrams — they're subsumed by bigram tokens
304
+ if (hasBigrams && /^[\u4e00-\u9fff\u3400-\u4dbf]$/.test(t)) continue;
305
+ const expanded = expandToken(t);
306
+ if (!seen.has(expanded)) { seen.add(expanded); finalTokens.push(expanded); }
307
+ }
308
+ for (const bg of bigramSet) {
309
+ if (!seen.has(bg)) { seen.add(bg); finalTokens.push(bg); }
310
+ }
311
+ if (finalTokens.length === 0) return null;
296
312
  // FTS5 requires explicit AND after parenthesized OR groups
297
- const hasGroup = expanded.some(e => e.startsWith('('));
298
- return expanded.join(hasGroup ? ' AND ' : ' ');
313
+ const hasGroup = finalTokens.some(e => e.startsWith('('));
314
+ return finalTokens.join(hasGroup ? ' AND ' : ' ');
299
315
  }
300
316
 
301
317
  /**
@@ -336,11 +352,24 @@ export function clampImportance(val) {
336
352
  * @param {object} episode Episode with entries array
337
353
  * @returns {number} Rule-based importance (1, 2, or 3)
338
354
  */
355
+ // Tools that produce file edits (used for significance detection, feedback, importance)
356
+ export const EDIT_TOOLS = new Set(['Edit', 'Write', 'NotebookEdit']);
357
+
339
358
  export function computeRuleImportance(episode) {
340
359
  let importance = 1;
360
+ const toolTypes = new Set();
361
+ let hasErrorThenEdit = false;
362
+ let lastWasError = false;
363
+
341
364
  for (const entry of episode.entries) {
342
365
  const sig = entry.bashSig;
343
366
  const files = entry.files || [];
367
+ toolTypes.add(entry.tool);
368
+
369
+ // Track error→edit debug cycle pattern
370
+ if (lastWasError && EDIT_TOOLS.has(entry.tool)) hasErrorThenEdit = true;
371
+ lastWasError = entry.isError || sig?.isError;
372
+
344
373
  if (sig?.isError && (sig?.isTest || sig?.isBuild)) { importance = 3; break; }
345
374
  if (files.some(f => /\.(env|pem|key)$|\/auth\.|\/credential|\/password/i.test(f))) { importance = 3; break; }
346
375
  if (files.some(f => /migration|schema\.|prisma|alembic/i.test(f))) { importance = 3; break; }
@@ -349,9 +378,35 @@ export function computeRuleImportance(episode) {
349
378
  if (sig?.isDeploy && importance < 2) importance = 2;
350
379
  if (files.some(f => /\.config\.|tsconfig|Dockerfile|docker-compose|package\.json|\.yml$|\.yaml$/i.test(basename(f))) && importance < 2) importance = 2;
351
380
  }
381
+
382
+ // Tool diversity: Edit + Bash + another tool = complete dev cycle
383
+ if (toolTypes.size >= 3 && toolTypes.has('Edit') && importance < 2) importance = 2;
384
+ // Debug cycle: error followed by edit = active debugging
385
+ if (hasErrorThenEdit && importance < 2) importance = 2;
386
+ // Broad change: many files touched
387
+ if ((episode.files || []).length >= 5 && importance < 2) importance = 2;
388
+
352
389
  return importance;
353
390
  }
354
391
 
392
+ /**
393
+ * Generate CJK bigrams from text for improved Chinese phrase matching in FTS5.
394
+ * "修复了系统崩溃" → "修复 系统 统崩 崩溃"
395
+ * @param {string} text Input text containing CJK characters
396
+ * @returns {string} Space-separated bigrams
397
+ */
398
+ export function cjkBigrams(text) {
399
+ if (!text) return '';
400
+ const runs = text.match(/[\u4e00-\u9fff\u3400-\u4dbf]{2,}/g) || [];
401
+ const bigrams = [];
402
+ for (const run of runs) {
403
+ for (let i = 0; i < run.length - 1; i++) {
404
+ bigrams.push(run[i] + run[i + 1]);
405
+ }
406
+ }
407
+ return bigrams.join(' ');
408
+ }
409
+
355
410
  // ─── Project Inference ───────────────────────────────────────────────────────
356
411
 
357
412
  /**