claude-mem-lite 2.0.12 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,7 +2,7 @@
2
2
  // Runs at Stop hook to track adoption and outcomes of recommendations
3
3
 
4
4
  import { getSessionInvocations, updateInvocation, updateResourceStats } from './registry.mjs';
5
- import { debugCatch } from './utils.mjs';
5
+ import { debugCatch, EDIT_TOOLS } from './utils.mjs';
6
6
 
7
7
  // ─── Adoption Detection ──────────────────────────────────────────────────────
8
8
 
@@ -100,6 +100,30 @@ function detectAdoption(invocation, sessionEvents) {
100
100
  }
101
101
  }
102
102
 
103
+ // Behavioral adoption: detect usage patterns matching the recommended resource
104
+ const resourceLower = resource_name.toLowerCase();
105
+
106
+ // Debugging pattern: Read→Bash(error)→Read→Edit cycle
107
+ if (resourceLower.includes('debug') || resourceLower.includes('troubleshoot')) {
108
+ let hasRead = false, hasBashError = false, hasEditAfterError = false;
109
+ for (const e of sessionEvents) {
110
+ if (e.tool_name === 'Read') hasRead = true;
111
+ if (e.tool_name === 'Bash' && /error|fail|exception/i.test(e.tool_response || '')) hasBashError = true;
112
+ if (hasBashError && EDIT_TOOLS.has(e.tool_name)) hasEditAfterError = true;
113
+ }
114
+ if (hasRead && hasBashError && hasEditAfterError) return true;
115
+ }
116
+
117
+ // Code review pattern: Agent with 'review' in prompt/description
118
+ if (resourceLower.includes('review')) {
119
+ for (const e of sessionEvents) {
120
+ if (e.tool_name === 'Agent') {
121
+ const text = ((e.tool_input?.prompt || '') + (e.tool_input?.description || '')).toLowerCase();
122
+ if (text.includes('review')) return true;
123
+ }
124
+ }
125
+ }
126
+
103
127
  return false;
104
128
  }
105
129
 
@@ -127,7 +151,7 @@ function detectOutcome(sessionEvents) {
127
151
  lastErrorIndex = i;
128
152
  }
129
153
 
130
- if (['Edit', 'Write', 'NotebookEdit'].includes(e.tool_name)) {
154
+ if (EDIT_TOOLS.has(e.tool_name)) {
131
155
  hasEdit = true;
132
156
  if (lastErrorIndex >= 0 && i > lastErrorIndex) {
133
157
  errorThenFix = true;
@@ -187,3 +211,6 @@ export async function collectFeedback(db, sessionId, sessionEvents = []) {
187
211
  debugCatch(e, 'collectFeedback');
188
212
  }
189
213
  }
214
+
215
+ // Test exports
216
+ export { detectAdoption as _detectAdoption };
@@ -9,6 +9,13 @@ import { DB_DIR } from './schema.mjs';
9
9
 
10
10
  const MAX_INJECTION_CHARS = 3000;
11
11
 
12
+ /** Truncate multi-line content preserving newlines (unlike utils.truncate which flattens). */
13
+ function truncateContent(str, max) {
14
+ if (!str) return '';
15
+ const trimmed = str.trim();
16
+ return trimmed.length > max ? trimmed.slice(0, max - 1) + '…' : trimmed;
17
+ }
18
+
12
19
  // Allowed base directories for resource file reads (defense-in-depth)
13
20
  const ALLOWED_BASES = [
14
21
  join(homedir(), '.claude'),
@@ -79,7 +86,7 @@ function injectSkillManaged(resource) {
79
86
  } catch {}
80
87
  }
81
88
 
82
- const truncatedContent = truncate(content, MAX_INJECTION_CHARS - 300);
89
+ const truncatedContent = truncateContent(content, MAX_INJECTION_CHARS - 300);
83
90
 
84
91
  return `[Auto-suggestion] Recommended skill for this task: "${resource.name}"
85
92
  Capability: ${truncate(resource.capability_summary, 100)}
@@ -113,7 +120,7 @@ function injectAgent(resource) {
113
120
  }
114
121
 
115
122
  if (agentDef) {
116
- const truncatedDef = truncate(agentDef, MAX_INJECTION_CHARS - 300);
123
+ const truncatedDef = truncateContent(agentDef, MAX_INJECTION_CHARS - 300);
117
124
  return `[Auto-suggestion] A specialized agent "${resource.name}" is recommended for this task.
118
125
  Capability: ${truncate(resource.capability_summary, 100)}
119
126
  Use the Agent tool with this agent definition:
package/dispatch.mjs CHANGED
@@ -6,7 +6,7 @@
6
6
 
7
7
  import { basename, join } from 'path';
8
8
  import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs';
9
- import { retrieveResources, buildEnhancedQuery, buildQueryFromText } from './registry-retriever.mjs';
9
+ import { retrieveResources, buildEnhancedQuery, buildQueryFromText, DISPATCH_SYNONYMS } from './registry-retriever.mjs';
10
10
  import { renderInjection } from './dispatch-inject.mjs';
11
11
  import { updateResourceStats, recordInvocation } from './registry.mjs';
12
12
  import { callHaikuJSON } from './haiku-client.mjs';
@@ -199,6 +199,7 @@ export function extractContextSignals(event, sessionCtx = {}) {
199
199
  intent: '', // comma-separated intent tags, primary first
200
200
  primaryIntent: '', // first/strongest intent (for column-targeted queries)
201
201
  suppressedIntents: [], // intents detected but actively suppressed (e.g. test-run)
202
+ rawKeywords: [], // domain-specific keywords not captured by intent patterns (e.g. "seo")
202
203
  techStack: '',
203
204
  action: '',
204
205
  errorDomain: '',
@@ -210,6 +211,11 @@ export function extractContextSignals(event, sessionCtx = {}) {
210
211
  signals.intent = intent;
211
212
  signals.suppressedIntents = suppressed;
212
213
  signals.primaryIntent = signals.intent.split(',')[0] || '';
214
+ // Extract raw domain keywords not captured by intent patterns.
215
+ // Intent patterns cover generic actions (test, fix, review) but miss domain
216
+ // topics (seo, kubernetes, oauth). These raw keywords supplement the enhanced
217
+ // query to ensure domain-specific resources are found.
218
+ signals.rawKeywords = extractRawKeywords(sessionCtx.userPrompt, signals.intent);
213
219
  }
214
220
 
215
221
  // Infer tech stack from recent files, current tool_input, or prompt text
@@ -263,7 +269,7 @@ const _WRITE_TEST_CJK = /(?:写测试|加测试|补测试|补单测|缺测试|
263
269
  * @returns {string} Comma-separated intent tags, primary intent listed first (e.g. "test,fix")
264
270
  */
265
271
  function extractIntent(prompt) {
266
- if (!prompt) return '';
272
+ if (!prompt) return { intent: '', suppressed: [] };
267
273
  // English patterns — use trailing-optional boundaries for verb conjugations:
268
274
  // \b prefix ensures word start, but many suffixed forms (debugging, refactoring, deployed)
269
275
  // fail with trailing \b. Use \b...\w* for words that commonly have suffixes.
@@ -291,6 +297,9 @@ function extractIntent(prompt) {
291
297
  [/\b(perf|performance|optimiz\w*|fast\w*|slow\w*|speed\w*|latency|bottleneck|laggy)\b/i, 'fast'],
292
298
  [/\b(lint\w*|format\w*|style|prettier|eslint|biome|stylelint)\b/i, 'lint'],
293
299
  // ── Generic / overloaded (easily confused with domain terms) ──
300
+ // Note: bare "design" intentionally excluded — too ambiguous ("design database" vs "design UI").
301
+ // Only UI-specific keywords trigger design intent. Prompts like "design the homepage" without
302
+ // UI terms will rely on text-based FTS5 fallback rather than intent matching.
294
303
  [/\b(ui|ux|frontend|css|tailwind|responsive|layout|theme|component)\b/i, 'design'],
295
304
  // ── Chinese patterns ──
296
305
  [/(测试|写测试|单测|单元测试|用例|覆盖率)/, 'test'],
@@ -363,7 +372,53 @@ function extractIntent(prompt) {
363
372
  }
364
373
 
365
374
  /** Exported for testing. */
366
- export { NEGATION_EN as _NEGATION_EN, NEGATION_CJK as _NEGATION_CJK };
375
+ export { NEGATION_EN as _NEGATION_EN, NEGATION_CJK as _NEGATION_CJK, reRankByKeywords as _reRankByKeywords, applyAdoptionDecay as _applyAdoptionDecay, passesConfidenceGate as _passesConfidenceGate };
376
+
377
+ // Stop words for raw keyword extraction.
378
+ // Includes common English stop words + action verbs already covered by intent patterns.
379
+ // Domain-specific technical terms (seo, kubernetes, react, etc.) pass through.
380
+ const RAW_KW_STOP = new Set([
381
+ // Standard English stop words
382
+ 'the', 'is', 'are', 'was', 'were', 'be', 'been', 'have', 'has', 'had',
383
+ 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'may', 'might',
384
+ 'can', 'shall', 'to', 'of', 'in', 'for', 'on', 'with', 'at', 'by',
385
+ 'from', 'as', 'into', 'about', 'and', 'or', 'but', 'not', 'no', 'this',
386
+ 'that', 'it', 'its', 'my', 'your', 'me', 'us', 'you', 'he', 'she', 'we', 'they',
387
+ 'if', 'so', 'just', 'also', 'then', 'how', 'what', 'when', 'where', 'who',
388
+ 'use', 'using', 'need', 'want', 'check', 'look', 'help', 'please', 'let',
389
+ 'some', 'all', 'any', 'each', 'every', 'new', 'like', 'before', 'after',
390
+ // Action verbs — captured by intent patterns, not domain keywords
391
+ 'design', 'build', 'create', 'make', 'add', 'remove', 'delete', 'update',
392
+ 'write', 'read', 'run', 'test', 'tests', 'testing', 'fix', 'debug',
393
+ 'review', 'deploy', 'commit', 'push', 'plan', 'clean', 'refactor',
394
+ 'find', 'get', 'set', 'show', 'list', 'change', 'move', 'copy', 'send',
395
+ 'start', 'stop', 'open', 'close', 'save', 'load', 'install', 'setup',
396
+ 'implement', 'configure', 'code', 'file', 'function', 'module', 'app',
397
+ ]);
398
+
399
+ /**
400
+ * Extract raw domain keywords from prompt text that aren't captured by intent patterns.
401
+ * Handles embedded English words in CJK text (e.g. "seo" from "用seo技能检查下").
402
+ * Filters out words already covered by extracted intents to avoid duplication.
403
+ * @param {string} prompt User prompt text
404
+ * @param {string} intentStr Comma-separated intents already extracted
405
+ * @returns {string[]} Array of raw keywords (max 5)
406
+ */
407
+ function extractRawKeywords(prompt, intentStr) {
408
+ if (!prompt) return [];
409
+ // Extract all English words (2+ chars) from the prompt
410
+ const words = prompt.match(/[a-zA-Z]{2,}/gi) || [];
411
+ const intentSet = new Set((intentStr || '').split(',').filter(Boolean));
412
+ const seen = new Set();
413
+ const result = [];
414
+ for (const w of words) {
415
+ const lower = w.toLowerCase();
416
+ if (lower.length < 2 || RAW_KW_STOP.has(lower) || intentSet.has(lower) || seen.has(lower)) continue;
417
+ seen.add(lower);
418
+ result.push(lower);
419
+ }
420
+ return result.slice(0, 5);
421
+ }
367
422
 
368
423
  /**
369
424
  * Infer tech stack from file extensions.
@@ -599,6 +654,91 @@ export function isRecentlyRecommended(db, resourceId, sessionId) {
599
654
  return !!cooldownHit;
600
655
  }
601
656
 
657
+ // ─── Keyword Re-ranking ──────────────────────────────────────────────────────
658
+
659
+ /**
660
+ * Re-rank results to prefer resources matching rawKeywords in their intent_tags.
661
+ * When a user mentions domain-specific terms (e.g. "seo"), resources in that domain
662
+ * should rank above generic resources that only match the action intent (e.g. "review").
663
+ * Within each group (matching vs non-matching), original BM25 order is preserved.
664
+ * No-op when rawKeywords is empty.
665
+ * @param {object[]} results FTS5 results
666
+ * @param {string[]} rawKeywords Domain keywords from prompt
667
+ * @returns {object[]} Re-ranked results
668
+ */
669
+ function reRankByKeywords(results, rawKeywords) {
670
+ if (!rawKeywords?.length || results.length <= 1) return results;
671
+ const matching = [];
672
+ const rest = [];
673
+ for (const r of results) {
674
+ const tags = (r.intent_tags || '').toLowerCase();
675
+ if (rawKeywords.some(kw => tags.includes(kw))) {
676
+ matching.push(r);
677
+ } else {
678
+ rest.push(r);
679
+ }
680
+ }
681
+ return [...matching, ...rest];
682
+ }
683
+
684
+ /**
685
+ * Apply adoption-rate-based score decay to penalize zombie resources.
686
+ * Uses Laplace-smoothed adoption rate with tiered multipliers.
687
+ * Cold start protection: no penalty for recommend_count < 10.
688
+ * @param {object[]} results FTS5 results with recommend_count/adopt_count
689
+ * @returns {object[]} Filtered results with decayed scores
690
+ */
691
+ function applyAdoptionDecay(results) {
692
+ return results.map(r => {
693
+ const recs = r.recommend_count || 0;
694
+ const adopts = r.adopt_count || 0;
695
+ if (recs < 10) return r; // Cold start protection
696
+
697
+ const rate = (adopts + 1) / (recs + 2); // Laplace smoothing
698
+ let multiplier = 1.0;
699
+ if (recs > 100 && rate < 0.01) multiplier = 0; // Block entirely
700
+ else if (recs > 50 && rate < 0.02) multiplier = 0.1; // Heavy penalty
701
+ else if (recs > 20 && rate < 0.05) multiplier = 0.3; // Light penalty
702
+
703
+ if (multiplier === 0) return null;
704
+ if (multiplier < 1) {
705
+ return { ...r, relevance: r.relevance * multiplier, _decayed: true };
706
+ }
707
+ return r;
708
+ }).filter(Boolean);
709
+ }
710
+
711
+ /**
712
+ * Gate results by confidence: require at least one intent signal
713
+ * to directly match the resource's intent_tags.
714
+ * Prevents recommendations based solely on incidental text overlap.
715
+ * @param {object[]} results FTS5 results
716
+ * @param {object} signals Context signals with intent and rawKeywords arrays
717
+ * @returns {object[]} Filtered results that pass the gate
718
+ */
719
+ function passesConfidenceGate(results, signals) {
720
+ // signals.intent is a comma-separated string (e.g. "test,fix"), not an array
721
+ const intentTokens = typeof signals?.intent === 'string'
722
+ ? signals.intent.split(',').filter(Boolean)
723
+ : Array.isArray(signals?.intent) ? signals.intent : [];
724
+
725
+ // No structured intent → skip gate (rawKeywords match FTS5 text columns, not intent_tags)
726
+ if (intentTokens.length === 0) return results;
727
+
728
+ // Expand intent tokens through DISPATCH_SYNONYMS so "fast" also matches "performance", etc.
729
+ const rawKw = signals?.rawKeywords || [];
730
+ const intentSet = new Set([...intentTokens, ...rawKw]);
731
+ for (const token of intentTokens) {
732
+ const syns = DISPATCH_SYNONYMS[token];
733
+ if (syns) for (const s of syns) intentSet.add(s);
734
+ }
735
+
736
+ return results.filter(r => {
737
+ const tags = (r.intent_tags || '').toLowerCase().split(/[\s,]+/).filter(Boolean);
738
+ return tags.some(t => intentSet.has(t));
739
+ });
740
+ }
741
+
602
742
  // ─── Main Dispatch Functions ─────────────────────────────────────────────────
603
743
 
604
744
  /**
@@ -618,7 +758,10 @@ export async function dispatchOnSessionStart(db, userPrompt, sessionId) {
618
758
  const signals = extractContextSignals({ tool_name: '_session_start' }, { userPrompt });
619
759
  const enhancedQuery = buildEnhancedQuery(signals);
620
760
 
621
- let results = enhancedQuery ? retrieveResources(db, enhancedQuery, { limit: 3, projectDomains }) : [];
761
+ // Fetch extra results when rawKeywords present BM25 may rank intent-matching
762
+ // resources above domain-specific ones; extra headroom lets reRankByKeywords promote them.
763
+ const fetchLimit = signals.rawKeywords.length > 0 ? 8 : 3;
764
+ let results = enhancedQuery ? retrieveResources(db, enhancedQuery, { limit: fetchLimit, projectDomains }) : [];
622
765
 
623
766
  // Fallback: broad text query (catches prompts without clear intent patterns)
624
767
  if (results.length === 0) {
@@ -634,6 +777,11 @@ export async function dispatchOnSessionStart(db, userPrompt, sessionId) {
634
777
  }
635
778
  }
636
779
 
780
+ results = reRankByKeywords(results, signals.rawKeywords);
781
+ results = applyAdoptionDecay(results);
782
+ results = passesConfidenceGate(results, signals);
783
+ results = results.slice(0, 3);
784
+
637
785
  let tier = 2;
638
786
 
639
787
  // Tier 3: Haiku semantic fallback (SessionStart has 10s budget)
@@ -699,7 +847,12 @@ export async function dispatchOnUserPrompt(db, userPrompt, sessionId) {
699
847
  const signals = extractContextSignals({ tool_name: '_user_prompt' }, { userPrompt });
700
848
  const enhancedQuery = buildEnhancedQuery(signals);
701
849
 
702
- let results = enhancedQuery ? retrieveResources(db, enhancedQuery, { limit: 3, projectDomains }) : [];
850
+ // Fetch extra results when rawKeywords are present the top-3 by BM25 may be
851
+ // dominated by intent synonyms (e.g. "review" expands to many code-review terms),
852
+ // pushing domain-specific resources (e.g. SEO) below the limit. Extra headroom
853
+ // lets reRankByKeywords() promote domain-matched resources to the top.
854
+ const fetchLimit = signals.rawKeywords.length > 0 ? 8 : 3;
855
+ let results = enhancedQuery ? retrieveResources(db, enhancedQuery, { limit: fetchLimit, projectDomains }) : [];
703
856
 
704
857
  // Fallback: broad text query
705
858
  if (results.length === 0) {
@@ -714,10 +867,25 @@ export async function dispatchOnUserPrompt(db, userPrompt, sessionId) {
714
867
  }
715
868
  }
716
869
 
870
+ // Re-rank: when rawKeywords are present, prefer resources whose intent_tags
871
+ // match those keywords. "帮我做一下SEO审查" → rawKeywords=["seo"] → SEO audit
872
+ // resources should rank above generic code-review resources.
873
+ results = reRankByKeywords(results, signals.rawKeywords);
874
+ results = applyAdoptionDecay(results);
875
+ results = passesConfidenceGate(results, signals);
876
+ results = results.slice(0, 3);
877
+
717
878
  if (results.length === 0) return null;
718
879
 
719
- // Skip if low confidence (no Haiku fallback — stay fast)
720
- if (needsHaikuDispatch(results)) return null;
880
+ // Skip if low confidence (no Haiku fallback — stay fast).
881
+ // Exception: when results match the user's raw domain keywords (e.g. "seo"),
882
+ // close BM25 scores indicate "multiple equally good options in the right domain"
883
+ // rather than "ambiguous/wrong match". Trust the domain match.
884
+ if (needsHaikuDispatch(results)) {
885
+ const hasKeywordMatch = signals.rawKeywords?.length > 0 && results.some(r =>
886
+ signals.rawKeywords.some(kw => (r.intent_tags || '').toLowerCase().includes(kw)));
887
+ if (!hasKeywordMatch) return null;
888
+ }
721
889
 
722
890
  // Filter by cooldown + session dedup (prevents double-recommend with SessionStart)
723
891
  const viable = sessionId
@@ -766,7 +934,9 @@ export async function dispatchOnPreToolUse(db, event, sessionCtx = {}) {
766
934
  const projectDomains = detectProjectDomains();
767
935
 
768
936
  // Tier 2: FTS5 retrieval
769
- const results = retrieveResources(db, query, { limit: 3, projectDomains });
937
+ let results = retrieveResources(db, query, { limit: 3, projectDomains });
938
+ results = applyAdoptionDecay(results);
939
+ results = passesConfidenceGate(results, signals);
770
940
  if (results.length === 0) return null;
771
941
 
772
942
  const tier = 2; // Tier 3 disabled for PreToolUse — 2s hook timeout insufficient
package/hook-episode.mjs CHANGED
@@ -3,7 +3,7 @@
3
3
 
4
4
  import { join } from 'path';
5
5
  import { readFileSync, writeFileSync, unlinkSync, readdirSync, openSync, closeSync, writeSync, renameSync, statSync, constants as fsConstants } from 'fs';
6
- import { inferProject } from './utils.mjs';
6
+ import { inferProject, EDIT_TOOLS } from './utils.mjs';
7
7
  import { RUNTIME_DIR } from './hook-shared.mjs';
8
8
 
9
9
  /**
@@ -216,7 +216,7 @@ export function mergePendingEntries(episode) {
216
216
  */
217
217
  export function episodeHasSignificantContent(episode) {
218
218
  return episode.entries.some(e =>
219
- ['Edit', 'Write', 'NotebookEdit'].includes(e.tool) ||
219
+ EDIT_TOOLS.has(e.tool) ||
220
220
  (e.tool === 'Bash' && e.isError)
221
221
  );
222
222
  }
package/hook-llm.mjs CHANGED
@@ -6,7 +6,7 @@ import { existsSync, readFileSync, unlinkSync, readdirSync } from 'fs';
6
6
  import {
7
7
  jaccardSimilarity, truncate, clampImportance, computeRuleImportance,
8
8
  inferProject, parseJsonFromLLM,
9
- computeMinHash, estimateJaccardFromMinHash, debugCatch, debugLog,
9
+ computeMinHash, estimateJaccardFromMinHash, cjkBigrams, EDIT_TOOLS, debugCatch, debugLog,
10
10
  } from './utils.mjs';
11
11
  import { acquireLLMSlot, releaseLLMSlot } from './hook-semaphore.mjs';
12
12
  import {
@@ -16,6 +16,14 @@ import {
16
16
 
17
17
  // ─── Save Observation to DB ─────────────────────────────────────────────────
18
18
 
19
+ /** Build the FTS5 text field from observation data (concepts + facts + CJK bigrams). */
20
+ function buildFtsTextField(obs) {
21
+ const conceptsText = Array.isArray(obs.concepts) ? obs.concepts.join(' ') : '';
22
+ const factsText = Array.isArray(obs.facts) ? obs.facts.join(' ') : '';
23
+ const bigramText = cjkBigrams((obs.title || '') + ' ' + (obs.narrative || ''));
24
+ return { conceptsText, factsText, textField: [conceptsText, factsText, bigramText].filter(Boolean).join(' ') };
25
+ }
26
+
19
27
  export function saveObservation(obs, projectOverride, sessionIdOverride, externalDb) {
20
28
  const db = externalDb || openDb();
21
29
  if (!db) return null;
@@ -58,9 +66,7 @@ export function saveObservation(obs, projectOverride, sessionIdOverride, externa
58
66
  }
59
67
  }
60
68
 
61
- const conceptsText = Array.isArray(obs.concepts) ? obs.concepts.join(' ') : '';
62
- const factsText = Array.isArray(obs.facts) ? obs.facts.join(' ') : '';
63
- const textField = [conceptsText, factsText].filter(Boolean).join(' ');
69
+ const { conceptsText, factsText, textField } = buildFtsTextField(obs);
64
70
 
65
71
  const result = db.prepare(`
66
72
  INSERT INTO observations (memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts, files_read, files_modified, importance, minhash_sig, created_at, created_at_epoch)
@@ -160,10 +166,10 @@ function linkRelatedObservations(db, savedId, obs, episode) {
160
166
  // When LLM is unavailable, build a readable title from episode metadata
161
167
  // instead of using raw makeEntryDesc output (which contains JSON stdout).
162
168
 
163
- function buildDegradedTitle(episode) {
169
+ export function buildDegradedTitle(episode) {
164
170
  const files = (episode.files || []).filter(Boolean);
165
171
  const hasError = episode.entries.some(e => e.isError);
166
- const hasEdit = episode.entries.some(e => ['Edit', 'Write', 'NotebookEdit'].includes(e.tool));
172
+ const hasEdit = episode.entries.some(e => EDIT_TOOLS.has(e.tool));
167
173
 
168
174
  if (files.length > 0) {
169
175
  const names = files.map(f => basename(f)).slice(0, 3).join(', ');
@@ -270,8 +276,14 @@ importance: 1=routine, 2=notable (error fix, arch decision, config change), 3=cr
270
276
 
271
277
  if (!obs) {
272
278
  if (!gotSlot) debugLog('WARN', 'llm-episode', 'semaphore timeout, using degraded storage');
279
+ // If pre-saved observation exists, LLM degraded mode doesn't need to overwrite — keep pre-saved data
280
+ if (episode.savedId) {
281
+ debugLog('DEBUG', 'llm-episode', `LLM failed but pre-saved obs #${episode.savedId} exists, keeping`);
282
+ try { unlinkSync(tmpFile); } catch {}
283
+ return;
284
+ }
273
285
  const hasError = episode.entries.some(e => e.isError);
274
- const hasEdit = episode.entries.some(e => ['Edit', 'Write', 'NotebookEdit'].includes(e.tool));
286
+ const hasEdit = episode.entries.some(e => EDIT_TOOLS.has(e.tool));
275
287
  const inferredType = hasError ? 'bugfix' : hasEdit ? 'change' : 'discovery';
276
288
  obs = {
277
289
  type: inferredType,
@@ -290,7 +302,30 @@ importance: 1=routine, 2=notable (error fix, arch decision, config change), 3=cr
290
302
  if (!db) { try { unlinkSync(tmpFile); } catch {} return; }
291
303
 
292
304
  try {
293
- const savedId = saveObservation(obs, episode.project, episode.sessionId, db);
305
+ let savedId;
306
+
307
+ if (episode.savedId && obs) {
308
+ // Upgrade pre-saved observation with LLM-enriched data
309
+ const { conceptsText, factsText, textField } = buildFtsTextField(obs);
310
+ const minhashSig = computeMinHash((obs.title || '') + ' ' + (obs.narrative || ''));
311
+ db.prepare(`
312
+ UPDATE observations SET type=?, title=?, subtitle=?, narrative=?, concepts=?, facts=?,
313
+ text=?, importance=?, files_read=?, minhash_sig=?
314
+ WHERE id = ?
315
+ `).run(
316
+ obs.type, truncate(obs.title, 120), obs.subtitle || '',
317
+ truncate(obs.narrative || '', 500),
318
+ conceptsText, factsText, textField,
319
+ obs.importance,
320
+ JSON.stringify(obs.filesRead || []),
321
+ minhashSig,
322
+ episode.savedId
323
+ );
324
+ savedId = episode.savedId;
325
+ debugLog('DEBUG', 'llm-episode', `upgraded pre-saved obs #${savedId}`);
326
+ } else {
327
+ savedId = saveObservation(obs, episode.project, episode.sessionId, db);
328
+ }
294
329
 
295
330
  if (savedId) {
296
331
  try {
package/hook-shared.mjs CHANGED
@@ -121,6 +121,18 @@ export function spawnBackground(bgEvent, ...extraArgs) {
121
121
 
122
122
  export function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
123
123
 
124
+ // ─── Injection Budget (per-session, in-memory) ──────────────────────────────
125
+ // Limits total context injections across all hooks to prevent context bloat.
126
+ // Reset at session-start. Each hook checks before injecting.
127
+
128
+ export const MAX_INJECTIONS_PER_SESSION = 3;
129
+ let _injectionCount = 0;
130
+
131
+ export function getInjectionCount() { return _injectionCount; }
132
+ export function incrementInjection() { _injectionCount++; }
133
+ export function resetInjectionBudget() { _injectionCount = 0; }
134
+ export function hasInjectionBudget() { return _injectionCount < MAX_INJECTIONS_PER_SESSION; }
135
+
124
136
  // ─── Tool Event Tracking (for dispatch feedback) ────────────────────────────
125
137
  // PostToolUse appends feedback-relevant tool events (Skill, Task, Edit, Write, Bash errors).
126
138
  // Stop handler reads them and passes to collectFeedback for adoption/outcome detection.
package/hook.mjs CHANGED
@@ -10,7 +10,7 @@ import { readFileSync, writeFileSync, unlinkSync, readdirSync, renameSync, statS
10
10
  import {
11
11
  truncate, typeIcon, inferProject, detectBashSignificance,
12
12
  extractErrorKeywords, extractFilePaths, isRelatedToEpisode,
13
- makeEntryDesc, scrubSecrets, debugCatch, debugLog, fmtTime,
13
+ makeEntryDesc, scrubSecrets, computeRuleImportance, EDIT_TOOLS, debugCatch, debugLog, fmtTime,
14
14
  } from './utils.mjs';
15
15
  import {
16
16
  readEpisodeRaw, episodeFile,
@@ -27,8 +27,10 @@ import {
27
27
  RESOURCE_RESCAN_INTERVAL_MS,
28
28
  sessionFile, getSessionId, createSessionId, openDb, getRegistryDb,
29
29
  closeRegistryDb, spawnBackground, appendToolEvent, readAndClearToolEvents,
30
+ resetInjectionBudget, hasInjectionBudget, incrementInjection,
30
31
  } from './hook-shared.mjs';
31
- import { handleLLMEpisode, handleLLMSummary } from './hook-llm.mjs';
32
+ import { handleLLMEpisode, handleLLMSummary, saveObservation, buildDegradedTitle } from './hook-llm.mjs';
33
+ import { searchRelevantMemories } from './hook-memory.mjs';
32
34
 
33
35
  // Prevent recursive hooks from background claude -p calls
34
36
  // Background workers (llm-episode, llm-summary, resource-scan) are exempt — they're ours
@@ -80,6 +82,32 @@ function flushEpisode(episode) {
80
82
  episode.filesRead = episode.filesRead || [];
81
83
  }
82
84
 
85
+ const isSignificant = episodeHasSignificantContent(episode);
86
+
87
+ // Immediate save: create rule-based observation for instant visibility.
88
+ // LLM background worker will upgrade title/narrative/importance later.
89
+ if (isSignificant) {
90
+ try {
91
+ const hasError = episode.entries.some(e => e.isError);
92
+ const hasEdit = episode.entries.some(e => EDIT_TOOLS.has(e.tool));
93
+ const inferredType = hasError ? 'bugfix' : hasEdit ? 'change' : 'discovery';
94
+ const fileList = (episode.files || []).map(f => basename(f)).join(', ') || '(multiple)';
95
+ const obs = {
96
+ type: inferredType,
97
+ title: truncate(buildDegradedTitle(episode), 120),
98
+ subtitle: fileList,
99
+ narrative: episode.entries.map(e => e.desc).join('; '),
100
+ concepts: [],
101
+ facts: [],
102
+ files: episode.files,
103
+ filesRead: episode.filesRead || [],
104
+ importance: computeRuleImportance(episode),
105
+ };
106
+ const id = saveObservation(obs, episode.project, episode.sessionId);
107
+ if (id) episode.savedId = id;
108
+ } catch (e) { debugCatch(e, 'flushEpisode-immediateSave'); }
109
+ }
110
+
83
111
  // Write episode to flush file, then remove buffer AFTER spawn to prevent race
84
112
  const flushFile = join(RUNTIME_DIR, `ep-flush-${Date.now()}-${randomUUID().slice(0, 8)}.json`);
85
113
  try {
@@ -88,7 +116,7 @@ function flushEpisode(episode) {
88
116
  return;
89
117
  }
90
118
 
91
- if (episodeHasSignificantContent(episode)) {
119
+ if (isSignificant) {
92
120
  spawnBackground('llm-episode', flushFile);
93
121
  } else {
94
122
  try { unlinkSync(flushFile); } catch {}
@@ -150,7 +178,7 @@ async function handlePostToolUse() {
150
178
  files,
151
179
  ts: Date.now(),
152
180
  isError: bashSig?.isError || false,
153
- isSignificant: ['Edit', 'Write', 'NotebookEdit'].includes(tool_name) ||
181
+ isSignificant: EDIT_TOOLS.has(tool_name) ||
154
182
  bashSig?.isSignificant || false,
155
183
  bashSig: bashSig || null,
156
184
  };
@@ -203,7 +231,7 @@ async function handlePostToolUse() {
203
231
  addFileToEpisode(episode, files);
204
232
 
205
233
  // Proactive file history: show past observations for files being edited
206
- if (['Edit', 'Write', 'NotebookEdit'].includes(tool_name) && files.length > 0) {
234
+ if (EDIT_TOOLS.has(tool_name) && files.length > 0) {
207
235
  const d = getDb();
208
236
  if (d) {
209
237
  for (const f of files) {
@@ -356,6 +384,8 @@ async function handleStop() {
356
384
  // ─── SessionStart Handler + CLAUDE.md Persistence (Tier 1 A, E) ─────────────
357
385
 
358
386
  async function handleSessionStart() {
387
+ resetInjectionBudget();
388
+
359
389
  // Flush any leftover episode buffer from previous session (e.g. after /clear)
360
390
  if (acquireLock()) {
361
391
  try {
@@ -636,11 +666,12 @@ async function handleSessionStart() {
636
666
  // Dispatch: recommend skill/agent based on session context
637
667
  try {
638
668
  const rdb = getRegistryDb();
639
- if (rdb) {
669
+ if (rdb && hasInjectionBudget()) {
640
670
  const promptCtx = latestSummary?.next_steps || '';
641
671
  const dispatchResult = await dispatchOnSessionStart(rdb, promptCtx, sessionId);
642
672
  if (dispatchResult) {
643
673
  process.stdout.write(dispatchResult + '\n');
674
+ incrementInjection();
644
675
  }
645
676
  }
646
677
  } catch (e) { debugCatch(e, 'handleSessionStart-dispatch'); }
@@ -693,9 +724,12 @@ async function handlePreToolUse() {
693
724
  }
694
725
  } catch {}
695
726
 
696
- const injection = await dispatchOnPreToolUse(rdb, hookData, sessionCtx);
697
- if (injection) {
698
- process.stdout.write(injection + '\n');
727
+ if (hasInjectionBudget()) {
728
+ const injection = await dispatchOnPreToolUse(rdb, hookData, sessionCtx);
729
+ if (injection) {
730
+ process.stdout.write(injection + '\n');
731
+ incrementInjection();
732
+ }
699
733
  }
700
734
  }
701
735
 
@@ -715,11 +749,12 @@ async function handleUserPrompt() {
715
749
  const db = openDb();
716
750
  if (!db) return;
717
751
 
752
+ const project = inferProject();
753
+
718
754
  try {
719
755
  const now = new Date();
720
756
 
721
757
  // Ensure session exists (INSERT OR IGNORE avoids race condition)
722
- const project = inferProject();
723
758
  db.prepare(`
724
759
  INSERT OR IGNORE INTO sdk_sessions (content_session_id, memory_session_id, project, started_at, started_at_epoch, status)
725
760
  VALUES (?, ?, ?, ?, ?, 'active')
@@ -738,6 +773,30 @@ async function handleUserPrompt() {
738
773
  counter?.prompt_counter || 1,
739
774
  now.toISOString(), now.getTime()
740
775
  );
776
+
777
+ // Semantic memory injection: search past observations for the user's prompt
778
+ if (hasInjectionBudget()) {
779
+ try {
780
+ const keyObs = db.prepare(`
781
+ SELECT id FROM observations
782
+ WHERE project = ? AND COALESCE(compressed_into, 0) = 0
783
+ AND COALESCE(importance, 1) >= 2
784
+ ORDER BY created_at_epoch DESC LIMIT 5
785
+ `).all(project);
786
+ const keyContextIds = keyObs.map(o => o.id);
787
+
788
+ const memories = searchRelevantMemories(db, promptText, project, keyContextIds);
789
+ if (memories.length > 0) {
790
+ const lines = ['<memory-context relevance="high">'];
791
+ for (const m of memories) {
792
+ lines.push(`- [${m.type}] ${truncate(m.title, 80)} (#${m.id})`);
793
+ }
794
+ lines.push('</memory-context>');
795
+ process.stdout.write(lines.join('\n') + '\n');
796
+ incrementInjection();
797
+ }
798
+ } catch (e) { debugCatch(e, 'handleUserPrompt-memory'); }
799
+ }
741
800
  } finally {
742
801
  db.close();
743
802
  }
@@ -749,10 +808,11 @@ async function handleUserPrompt() {
749
808
  // Cooldown + session dedup (invocations table) prevents double-recommending with SessionStart.
750
809
  try {
751
810
  const rdb = getRegistryDb();
752
- if (rdb) {
811
+ if (rdb && hasInjectionBudget()) {
753
812
  const result = await dispatchOnUserPrompt(rdb, promptText, sessionId);
754
813
  if (result) {
755
814
  process.stdout.write(result + '\n');
815
+ incrementInjection();
756
816
  }
757
817
  }
758
818
  } catch (e) { debugCatch(e, 'handleUserPrompt-dispatch'); }
package/install.mjs CHANGED
@@ -445,7 +445,7 @@ const RESOURCE_METADATA = {
445
445
  trigger_patterns: 'when user needs social media content posts or engagement strategies',
446
446
  },
447
447
  'skill:seo-audit': {
448
- intent_tags: 'seo,audit,technical,analysis,crawl,performance,indexing',
448
+ intent_tags: 'seo,audit,technical,analysis,crawl,indexing',
449
449
  domain_tags: 'seo,audit,web',
450
450
  capability_summary: 'Comprehensive SEO audit with technical analysis crawl errors and performance checks',
451
451
  trigger_patterns: 'when user needs a comprehensive SEO audit or technical site analysis',
@@ -523,8 +523,8 @@ const RESOURCE_METADATA = {
523
523
  trigger_patterns: 'when user wants automated SEO content writing or optimization assistance',
524
524
  },
525
525
  'agent:seo-performance-agent': {
526
- intent_tags: 'seo,performance,agent,speed,core-web-vitals,pagespeed',
527
- domain_tags: 'seo,performance,agent',
526
+ intent_tags: 'seo,agent,core-web-vitals,pagespeed',
527
+ domain_tags: 'seo,agent',
528
528
  capability_summary: 'SEO performance monitoring agent for core web vitals and page speed analysis',
529
529
  trigger_patterns: 'when user needs automated SEO performance monitoring or speed optimization',
530
530
  },
@@ -1212,7 +1212,7 @@ async function install() {
1212
1212
 
1213
1213
  const SOURCE_FILES = [
1214
1214
  'server.mjs', 'server-internals.mjs', 'tool-schemas.mjs',
1215
- 'hook.mjs', 'hook-shared.mjs', 'hook-llm.mjs',
1215
+ 'hook.mjs', 'hook-shared.mjs', 'hook-llm.mjs', 'hook-memory.mjs',
1216
1216
  'hook-semaphore.mjs', 'hook-episode.mjs', 'hook-context.mjs',
1217
1217
  'haiku-client.mjs', 'utils.mjs', 'schema.mjs', 'package.json', 'skill.md',
1218
1218
  'registry.mjs', 'registry-scanner.mjs', 'registry-indexer.mjs',
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "2.0.12",
3
+ "version": "2.1.0",
4
4
  "description": "Lightweight persistent memory system for Claude Code",
5
5
  "type": "module",
6
6
  "engines": {
@@ -5,7 +5,7 @@ import { debugCatch } from './utils.mjs';
5
5
 
6
6
  // ─── Domain Synonyms ─────────────────────────────────────────────────────────
7
7
 
8
- const DISPATCH_SYNONYMS = {
8
+ export const DISPATCH_SYNONYMS = {
9
9
  // English intent synonyms
10
10
  'clean': ['refactor', 'lint', 'format', 'organize', 'tidy', 'simplify', 'restructure', 'rewrite', 'smell', 'debt'],
11
11
  'test': ['testing', 'unittest', 'e2e', 'coverage', 'tdd', 'qa', 'spec', 'jest', 'vitest', 'pytest', 'mocha', 'cypress', 'playwright'],
@@ -190,6 +190,18 @@ export function buildEnhancedQuery(signals) {
190
190
  }
191
191
  }
192
192
 
193
+ // Raw keywords from prompt: domain-specific terms not captured by intent patterns.
194
+ // Added as column-targeted intent_tags + literal general match (no synonym expansion).
195
+ // Synonym expansion is harmful for rawKeywords: "database" expanding to ORM/SQL terms
196
+ // would dilute BM25 precision. Literal matching is sufficient — "seo" matches "seo"
197
+ // directly across name, intent_tags, capability_summary, trigger_patterns.
198
+ if (signals.rawKeywords?.length > 0) {
199
+ for (const kw of signals.rawKeywords) {
200
+ parts.push(`intent_tags:${kw}`);
201
+ parts.push(kw); // literal, no synonym expansion
202
+ }
203
+ }
204
+
193
205
  // Add general tokens (expanded with synonyms)
194
206
  for (const t of generalTokens) {
195
207
  parts.push(expandToken(t));
@@ -205,39 +217,46 @@ export function buildEnhancedQuery(signals) {
205
217
  * @param {string} text Raw text input
206
218
  * @returns {string|null} FTS5 query string or null
207
219
  */
220
+ const TEXT_QUERY_STOP_WORDS = new Set([
221
+ 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
222
+ 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
223
+ 'should', 'may', 'might', 'can', 'shall', 'to', 'of', 'in', 'for',
224
+ 'on', 'with', 'at', 'by', 'from', 'as', 'into', 'about', 'between',
225
+ 'after', 'before', 'above', 'below', 'and', 'or', 'but', 'not', 'no',
226
+ 'this', 'that', 'these', 'those', 'it', 'its', 'my', 'your', 'his',
227
+ 'her', 'our', 'their', 'me', 'him', 'us', 'them', 'i', 'you', 'he',
228
+ 'she', 'we', 'they', 'what', 'which', 'who', 'when', 'where', 'how',
229
+ 'all', 'each', 'every', 'both', 'few', 'more', 'most', 'other', 'some',
230
+ 'such', 'than', 'too', 'very', 'just', 'also', 'then', 'so', 'if',
231
+ '的', '了', '是', '在', '我', '有', '和', '就', '不', '人', '都',
232
+ '一', '一个', '上', '也', '这', '那', '你', '他', '她', '它', '们',
233
+ '把', '让', '给', '用', '来', '去', '做', '说', '要', '会', '能',
234
+ '帮', '帮我', '请', '下', '吧',
235
+ ]);
236
+
208
237
  export function buildQueryFromText(text) {
209
238
  if (!text || typeof text !== 'string') return null;
210
239
 
211
- const STOP_WORDS = new Set([
212
- 'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'been', 'being',
213
- 'have', 'has', 'had', 'do', 'does', 'did', 'will', 'would', 'could',
214
- 'should', 'may', 'might', 'can', 'shall', 'to', 'of', 'in', 'for',
215
- 'on', 'with', 'at', 'by', 'from', 'as', 'into', 'about', 'between',
216
- 'after', 'before', 'above', 'below', 'and', 'or', 'but', 'not', 'no',
217
- 'this', 'that', 'these', 'those', 'it', 'its', 'my', 'your', 'his',
218
- 'her', 'our', 'their', 'me', 'him', 'us', 'them', 'i', 'you', 'he',
219
- 'she', 'we', 'they', 'what', 'which', 'who', 'when', 'where', 'how',
220
- 'all', 'each', 'every', 'both', 'few', 'more', 'most', 'other', 'some',
221
- 'such', 'than', 'too', 'very', 'just', 'also', 'then', 'so', 'if',
222
- '的', '了', '是', '在', '我', '有', '和', '就', '不', '人', '都',
223
- '一', '一个', '上', '也', '这', '那', '你', '他', '她', '它', '们',
224
- '把', '让', '给', '用', '来', '去', '做', '说', '要', '会', '能',
225
- '帮', '帮我', '请', '下', '吧',
226
- ]);
227
-
228
240
  const cleaned = text.replace(/[{}()[\]^~*:@#$%&]/g, ' ').trim();
229
241
 
230
242
  // Extract CJK compound words before whitespace split (Chinese has no spaces)
231
243
  const cjkTokens = extractCJKTokens(cleaned);
232
244
 
245
+ // Extract embedded English words from mixed CJK/Latin text.
246
+ // Handles "用seo技能检查下网站的seo优化问题" → extracts "seo".
247
+ // Whitespace split fails here because CJK text has no spaces.
248
+ const embeddedEnTokens = (cleaned.match(/[a-zA-Z]{2,}/g) || [])
249
+ .map(w => w.toLowerCase());
250
+
233
251
  const wsTokens = cleaned.split(/\s+/)
234
- .filter(t => t.length > 1 && !STOP_WORDS.has(t.toLowerCase()) && !/^\d+$/.test(t));
252
+ .filter(t => t.length > 1 && !TEXT_QUERY_STOP_WORDS.has(t.toLowerCase()) && !/^\d+$/.test(t));
235
253
 
236
- // Merge: CJK tokens first (high signal), then whitespace tokens, deduplicated
254
+ // Merge: CJK tokens first (high signal), then embedded English, then whitespace tokens, deduplicated
237
255
  const seen = new Set();
238
256
  const tokens = [];
239
- for (const t of [...cjkTokens, ...wsTokens]) {
240
- if (!seen.has(t)) { seen.add(t); tokens.push(t); }
257
+ for (const t of [...cjkTokens, ...embeddedEnTokens, ...wsTokens]) {
258
+ const key = t.toLowerCase();
259
+ if (!seen.has(key) && !TEXT_QUERY_STOP_WORDS.has(key)) { seen.add(key); tokens.push(t); }
241
260
  }
242
261
  tokens.splice(8); // Limit to 8 most relevant tokens
243
262
 
package/server.mjs CHANGED
@@ -686,7 +686,10 @@ server.registerTool(
686
686
  const deletedIds = new Set(args.ids);
687
687
  const deleteTx = db.transaction(() => {
688
688
  // Clean up stale references in other observations' related_ids
689
- // Use LIKE filter to avoid O(N) full-table scan — only fetch rows that may reference deleted IDs
689
+ // Use LIKE filter to avoid O(N) full-table scan — only fetch rows that may reference deleted IDs.
690
+ // NOTE: LIKE %id% has false positives (e.g. %1% matches [10], [21]). This is intentional —
691
+ // the LIKE is a coarse pre-filter; the JSON parse + Set.has below is the precise filter.
692
+ // Acceptable because observation count per user is typically <10K.
690
693
  const likeConditions = args.ids.map(() => `related_ids LIKE ?`).join(' OR ');
691
694
  const likeParams = args.ids.map(id => `%${id}%`);
692
695
  const referencing = db.prepare(`
package/utils.mjs CHANGED
@@ -261,8 +261,8 @@ for (const [abbr, full] of SYNONYM_PAIRS) {
261
261
 
262
262
  // Format a term for FTS5: quote if it contains spaces, hyphens, or special chars
263
263
  function ftsToken(term) {
264
- // Bare tokens are safe only if purely alphanumeric
265
- if (/^[a-zA-Z0-9]+$/.test(term)) return term;
264
+ // Bare tokens are safe if purely alphanumeric or CJK characters
265
+ if (/^[a-zA-Z0-9\u4e00-\u9fff\u3400-\u4dbf]+$/.test(term)) return term;
266
266
  return `"${term.replace(/"/g, '""')}"`;
267
267
  }
268
268
 
@@ -292,10 +292,26 @@ export function sanitizeFtsQuery(query) {
292
292
  if (!cleaned) return null;
293
293
  const tokens = cleaned.split(/\s+/).filter(t => t && !/^-+$/.test(t) && !FTS5_KEYWORDS.has(t.toUpperCase()));
294
294
  if (tokens.length === 0) return null;
295
- const expanded = tokens.map(t => expandToken(t));
295
+ // Replace single CJK character tokens with bigrams for better phrase matching.
296
+ // Individual CJK chars ("系","统") are too noisy; bigrams ("系统") capture compound words.
297
+ const bigrams = cjkBigrams(cleaned);
298
+ const bigramSet = new Set(bigrams ? bigrams.split(' ').filter(Boolean) : []);
299
+ const hasBigrams = bigramSet.size > 0;
300
+ const finalTokens = [];
301
+ const seen = new Set();
302
+ for (const t of tokens) {
303
+ // Skip single CJK characters when we have bigrams — they're subsumed by bigram tokens
304
+ if (hasBigrams && /^[\u4e00-\u9fff\u3400-\u4dbf]$/.test(t)) continue;
305
+ const expanded = expandToken(t);
306
+ if (!seen.has(expanded)) { seen.add(expanded); finalTokens.push(expanded); }
307
+ }
308
+ for (const bg of bigramSet) {
309
+ if (!seen.has(bg)) { seen.add(bg); finalTokens.push(bg); }
310
+ }
311
+ if (finalTokens.length === 0) return null;
296
312
  // FTS5 requires explicit AND after parenthesized OR groups
297
- const hasGroup = expanded.some(e => e.startsWith('('));
298
- return expanded.join(hasGroup ? ' AND ' : ' ');
313
+ const hasGroup = finalTokens.some(e => e.startsWith('('));
314
+ return finalTokens.join(hasGroup ? ' AND ' : ' ');
299
315
  }
300
316
 
301
317
  /**
@@ -336,11 +352,24 @@ export function clampImportance(val) {
336
352
  * @param {object} episode Episode with entries array
337
353
  * @returns {number} Rule-based importance (1, 2, or 3)
338
354
  */
355
+ // Tools that produce file edits (used for significance detection, feedback, importance)
356
+ export const EDIT_TOOLS = new Set(['Edit', 'Write', 'NotebookEdit']);
357
+
339
358
  export function computeRuleImportance(episode) {
340
359
  let importance = 1;
360
+ const toolTypes = new Set();
361
+ let hasErrorThenEdit = false;
362
+ let lastWasError = false;
363
+
341
364
  for (const entry of episode.entries) {
342
365
  const sig = entry.bashSig;
343
366
  const files = entry.files || [];
367
+ toolTypes.add(entry.tool);
368
+
369
+ // Track error→edit debug cycle pattern
370
+ if (lastWasError && EDIT_TOOLS.has(entry.tool)) hasErrorThenEdit = true;
371
+ lastWasError = entry.isError || sig?.isError;
372
+
344
373
  if (sig?.isError && (sig?.isTest || sig?.isBuild)) { importance = 3; break; }
345
374
  if (files.some(f => /\.(env|pem|key)$|\/auth\.|\/credential|\/password/i.test(f))) { importance = 3; break; }
346
375
  if (files.some(f => /migration|schema\.|prisma|alembic/i.test(f))) { importance = 3; break; }
@@ -349,9 +378,35 @@ export function computeRuleImportance(episode) {
349
378
  if (sig?.isDeploy && importance < 2) importance = 2;
350
379
  if (files.some(f => /\.config\.|tsconfig|Dockerfile|docker-compose|package\.json|\.yml$|\.yaml$/i.test(basename(f))) && importance < 2) importance = 2;
351
380
  }
381
+
382
+ // Tool diversity: Edit + Bash + another tool = complete dev cycle
383
+ if (toolTypes.size >= 3 && toolTypes.has('Edit') && importance < 2) importance = 2;
384
+ // Debug cycle: error followed by edit = active debugging
385
+ if (hasErrorThenEdit && importance < 2) importance = 2;
386
+ // Broad change: many files touched
387
+ if ((episode.files || []).length >= 5 && importance < 2) importance = 2;
388
+
352
389
  return importance;
353
390
  }
354
391
 
392
+ /**
393
+ * Generate CJK bigrams from text for improved Chinese phrase matching in FTS5.
394
+ * "修复了系统崩溃" → "修复 系统 统崩 崩溃"
395
+ * @param {string} text Input text containing CJK characters
396
+ * @returns {string} Space-separated bigrams
397
+ */
398
+ export function cjkBigrams(text) {
399
+ if (!text) return '';
400
+ const runs = text.match(/[\u4e00-\u9fff\u3400-\u4dbf]{2,}/g) || [];
401
+ const bigrams = [];
402
+ for (const run of runs) {
403
+ for (let i = 0; i < run.length - 1; i++) {
404
+ bigrams.push(run[i] + run[i + 1]);
405
+ }
406
+ }
407
+ return bigrams.join(' ');
408
+ }
409
+
355
410
  // ─── Project Inference ───────────────────────────────────────────────────────
356
411
 
357
412
  /**