@hawon/nexus 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +60 -38
  2. package/dist/cli/index.js +76 -145
  3. package/dist/index.js +15 -26
  4. package/dist/mcp/server.js +61 -32
  5. package/package.json +2 -1
  6. package/scripts/auto-skill.sh +54 -0
  7. package/scripts/auto-sync.sh +11 -0
  8. package/scripts/benchmark.ts +444 -0
  9. package/scripts/scan-tool-result.sh +46 -0
  10. package/src/cli/index.ts +79 -172
  11. package/src/index.ts +17 -29
  12. package/src/mcp/server.ts +67 -41
  13. package/src/memory-engine/index.ts +4 -6
  14. package/src/memory-engine/nexus-memory.test.ts +437 -0
  15. package/src/memory-engine/nexus-memory.ts +631 -0
  16. package/src/memory-engine/semantic.ts +380 -0
  17. package/src/parser/parse.ts +1 -21
  18. package/src/promptguard/advanced-rules.ts +129 -12
  19. package/src/promptguard/entropy.ts +21 -2
  20. package/src/promptguard/evolution/auto-update.ts +16 -6
  21. package/src/promptguard/multilingual-rules.ts +68 -0
  22. package/src/promptguard/rules.ts +87 -2
  23. package/src/promptguard/scanner.test.ts +262 -0
  24. package/src/promptguard/scanner.ts +1 -1
  25. package/src/promptguard/semantic.ts +19 -4
  26. package/src/promptguard/token-analysis.ts +17 -5
  27. package/src/review/analyzer.test.ts +279 -0
  28. package/src/review/analyzer.ts +112 -28
  29. package/src/shared/stop-words.ts +21 -0
  30. package/src/skills/index.ts +11 -27
  31. package/src/skills/memory-skill-engine.ts +1044 -0
  32. package/src/testing/health-check.ts +19 -2
  33. package/src/cost/index.ts +0 -3
  34. package/src/cost/tracker.ts +0 -290
  35. package/src/cost/types.ts +0 -34
  36. package/src/memory-engine/compressor.ts +0 -97
  37. package/src/memory-engine/context-window.ts +0 -113
  38. package/src/memory-engine/store.ts +0 -371
  39. package/src/memory-engine/types.ts +0 -32
  40. package/src/skills/context-engine.ts +0 -863
  41. package/src/skills/extractor.ts +0 -224
  42. package/src/skills/global-context.ts +0 -726
  43. package/src/skills/library.ts +0 -189
  44. package/src/skills/pattern-engine.ts +0 -712
  45. package/src/skills/render-evolved.ts +0 -160
  46. package/src/skills/skill-reconciler.ts +0 -703
  47. package/src/skills/smart-extractor.ts +0 -843
  48. package/src/skills/types.ts +0 -18
  49. package/src/skills/wisdom-extractor.ts +0 -737
  50. package/src/superdev-evolution/index.ts +0 -3
  51. package/src/superdev-evolution/skill-manager.ts +0 -266
  52. package/src/superdev-evolution/types.ts +0 -20
@@ -0,0 +1,1044 @@
1
+ /**
2
+ * Memory-Based Skill Engine
3
+ *
4
+ * Instead of extracting skills from individual sessions (weak),
5
+ * this engine discovers skills from ACCUMULATED MEMORY across all sessions.
6
+ *
7
+ * Key insight: A skill isn't what happened once — it's what happened
8
+ * REPEATEDLY across different contexts. If the same approach appears
9
+ * 3+ times in different sessions, it's a genuine reusable pattern.
10
+ *
11
+ * Process:
12
+ * 1. INGEST — All sessions → atomic observations in nexus memory
13
+ * 2. CLUSTER — Group similar observations using semantic similarity
14
+ * 3. FREQUENCY — Patterns that appear 3+ times = skill candidates
15
+ * 4. ABSTRACT — Extract the common principle from the cluster
16
+ * 5. BRANCH — If same topic has different approaches, find the condition
17
+ * 6. VALIDATE — Cross-check: does this skill hold across contexts?
18
+ *
19
+ * This leverages our BM25 + semantic + knowledge graph memory engine
20
+ * to do what raw regex/keyword extraction can't.
21
+ */
22
+
23
+ import type { ParsedSession, ParsedMessage } from "../parser/types.js";
24
+ import type { Observation, NexusMemory, KnowledgeNode } from "../memory-engine/nexus-memory.js";
25
+ import { createNexusMemory } from "../memory-engine/nexus-memory.js";
26
+ import { semanticSimilarity, getSynonyms } from "../memory-engine/semantic.js";
27
+ import { createHash } from "node:crypto";
28
+
29
+ // ═══════════════════════════════════════════════════════════════════
30
+ // TYPES
31
+ // ═══════════════════════════════════════════════════════════════════
32
+
33
+ /** Knowledge tier: skill (complex), tip (quick), fact (reference). */
34
+ export type KnowledgeTier = "skill" | "tip" | "fact";
35
+
36
+ export type LearnedKnowledge = {
37
+ id: string;
38
+ tier: KnowledgeTier;
39
+ name: string;
40
+ content: string;
41
+ domains: string[];
42
+ tags: string[];
43
+ evidenceCount: number;
44
+ confidence: number;
45
+ firstSeen: string;
46
+ lastSeen: string;
47
+ };
48
+
49
+ export type Tip = LearnedKnowledge & {
50
+ tier: "tip";
51
+ /** Quick one-liner advice. */
52
+ advice: string;
53
+ /** When this applies. */
54
+ trigger: string;
55
+ };
56
+
57
+ export type Fact = LearnedKnowledge & {
58
+ tier: "fact";
59
+ /** The fact itself. */
60
+ statement: string;
61
+ /** How often referenced. */
62
+ referenceCount: number;
63
+ };
64
+
65
+ export type MemorySkill = {
66
+ id: string;
67
+ /** Clear, actionable name. */
68
+ name: string;
69
+ /** When to use this skill. */
70
+ situation: string;
71
+ /** The principle / approach. */
72
+ principle: string;
73
+ /** Why this works (derived from evidence). */
74
+ reasoning: string;
75
+ /** Conditions that change the approach. */
76
+ conditions: SkillCondition[];
77
+ /** What NOT to do (from contradicting observations). */
78
+ antiPatterns: string[];
79
+ /** How many observations support this. */
80
+ evidenceCount: number;
81
+ /** Source domains (projects/contexts). */
82
+ domains: string[];
83
+ /** Tools typically involved. */
84
+ tools: string[];
85
+ /** Confidence 0-1. */
86
+ confidence: number;
87
+ /** When first/last observed. */
88
+ firstSeen: string;
89
+ lastSeen: string;
90
+ };
91
+
92
+ export type SkillCondition = {
93
+ /** When this condition is true... */
94
+ when: string;
95
+ /** ...use this approach instead. */
96
+ approach: string;
97
+ /** Evidence count for this branch. */
98
+ evidence: number;
99
+ };
100
+
101
+ export type ObservationCluster = {
102
+ /** Cluster centroid (representative observation). */
103
+ centroid: Observation;
104
+ /** All observations in this cluster. */
105
+ members: Observation[];
106
+ /** Common keywords across all members. */
107
+ commonKeywords: string[];
108
+ /** Common tools. */
109
+ commonTools: string[];
110
+ /** Unique domains represented. */
111
+ domains: Set<string>;
112
+ /** Average confidence. */
113
+ avgConfidence: number;
114
+ };
115
+
116
+ export type SkillExtractionResult = {
117
+ /** Complex skills (cross-session, multi-step). */
118
+ skills: MemorySkill[];
119
+ /** Quick tips (short, actionable). */
120
+ tips: Tip[];
121
+ /** Reference facts (frequently recalled). */
122
+ facts: Fact[];
123
+ /** Observations ingested. */
124
+ observationsIngested: number;
125
+ /** Clusters formed. */
126
+ clustersFormed: number;
127
+ /** Duration in ms. */
128
+ durationMs: number;
129
+ };
130
+
131
+ // ═══════════════════════════════════════════════════════════════════
132
+ // STEP 1: INGEST — Sessions → Observations
133
+ // ═══════════════════════════════════════════════════════════════════
134
+
135
+ /**
136
+ * Extract meaningful observations from a session.
137
+ * Focuses on ACTIONS taken and OUTCOMES observed, not raw chat.
138
+ */
139
+ function extractActionObservations(session: ParsedSession): {
140
+ text: string;
141
+ domain: string;
142
+ tags: string[];
143
+ }[] {
144
+ const observations: { text: string; domain: string; tags: string[] }[] = [];
145
+ const domain = session.cwd?.split("/").pop() ?? session.projectPath.split("/").pop() ?? "unknown";
146
+ const messages = session.messages;
147
+
148
+ for (let i = 0; i < messages.length; i++) {
149
+ const msg = messages[i];
150
+
151
+ // Skip noise
152
+ if (msg.content.startsWith("<") || msg.content.startsWith("{")) continue;
153
+ if (msg.content.length < 20) continue;
154
+
155
+ // Pattern 1: User asked + Claude used tools → the approach is the observation
156
+ if (msg.role === "assistant" && msg.toolCalls && msg.toolCalls.length >= 2) {
157
+ const userRequest = findPreviousUserMessage(messages, i);
158
+ if (!userRequest || userRequest.length < 10) continue;
159
+
160
+ const tools = msg.toolCalls.map((tc) => tc.name);
161
+ const uniqueTools = [...new Set(tools)];
162
+ const approach = describeApproach(msg.toolCalls);
163
+ const intent = classifyUserIntent(userRequest);
164
+
165
+ if (approach.length > 20 && intent) {
166
+ observations.push({
167
+ text: `[${intent}] ${approach}`,
168
+ domain,
169
+ tags: [...uniqueTools.map((t) => t.toLowerCase()), ...extractTags(userRequest)],
170
+ });
171
+ }
172
+ }
173
+
174
+ // Pattern 2: User correction → the corrected approach is valuable
175
+ if (msg.role === "user" && isCorrection(msg.content)) {
176
+ const prevAssistant = findPreviousAssistantMessage(messages, i);
177
+ const nextAssistant = findNextAssistantMessage(messages, i);
178
+
179
+ if (prevAssistant && nextAssistant) {
180
+ const wrongApproach = describeMessage(prevAssistant);
181
+ const rightApproach = describeMessage(nextAssistant);
182
+
183
+ if (wrongApproach.length > 10 && rightApproach.length > 10) {
184
+ observations.push({
185
+ text: `[수정] ${wrongApproach.slice(0, 40)} 대신 ${rightApproach.slice(0, 40)}`,
186
+ domain,
187
+ tags: ["correction", ...extractTags(msg.content)],
188
+ });
189
+ }
190
+ }
191
+ }
192
+
193
+ // Pattern 3: Error → Recovery → the recovery method is the observation
194
+ if (msg.role === "assistant" && msg.toolCalls) {
195
+ const hasError = msg.toolCalls.some((tc) =>
196
+ tc.result && /error|fail|denied|not found/i.test(tc.result),
197
+ );
198
+
199
+ if (hasError) {
200
+ // Look for recovery
201
+ for (let j = i + 1; j < Math.min(i + 4, messages.length); j++) {
202
+ const candidate = messages[j];
203
+ if (candidate.role === "assistant" && candidate.toolCalls?.length) {
204
+ const success = candidate.toolCalls.every((tc) =>
205
+ !tc.result || !/error|fail/i.test(tc.result),
206
+ );
207
+ if (success) {
208
+ const errorTool = msg.toolCalls.find((tc) => tc.result && /error/i.test(tc.result));
209
+ const recoveryApproach = describeApproach(candidate.toolCalls);
210
+ observations.push({
211
+ text: `에러 복구: ${errorTool?.name ?? "도구"} 실패 후 ${recoveryApproach}로 해결`,
212
+ domain,
213
+ tags: ["error-recovery", ...extractTags(recoveryApproach)],
214
+ });
215
+ break;
216
+ }
217
+ }
218
+ }
219
+ }
220
+ }
221
+
222
+ // Pattern 4: Successful multi-step task (positive feedback follows tools)
223
+ if (msg.role === "user" && isPositiveFeedback(msg.content) && i > 0) {
224
+ const prevAssistant = messages[i - 1];
225
+ if (prevAssistant?.role === "assistant" && prevAssistant.toolCalls && prevAssistant.toolCalls.length >= 2) {
226
+ const userRequest = findPreviousUserMessage(messages, i - 1);
227
+ const intent = userRequest ? classifyUserIntent(userRequest) : null;
228
+ const approach = describeApproach(prevAssistant.toolCalls);
229
+
230
+ if (intent && approach.length > 20) {
231
+ observations.push({
232
+ text: `[${intent}:성공] ${approach}`,
233
+ domain,
234
+ tags: ["validated", ...extractTags(approach)],
235
+ });
236
+ }
237
+ }
238
+ }
239
+
240
+ // Pattern 5: Claude explains a concept (learning moments)
241
+ if (msg.role === "assistant" && msg.content.length > 100 && !msg.toolCalls?.length) {
242
+ // Look for explanatory patterns
243
+ const explanations = msg.content.match(
244
+ /(?:이유는?|because|때문에|핵심은?|중요한\s*(?:것|점)은?|결론은?|요약하면)\s*[::]?\s*(.{20,120})/i,
245
+ );
246
+ if (explanations) {
247
+ const insight = explanations[1].trim()
248
+ .replace(/\*\*/g, "")
249
+ .replace(/`/g, "");
250
+
251
+ if (insight.length > 20 && insight.length < 120) {
252
+ observations.push({
253
+ text: `[인사이트] ${insight}`,
254
+ domain,
255
+ tags: ["insight", ...extractTags(insight)],
256
+ });
257
+ }
258
+ }
259
+ }
260
+
261
+ // Pattern 6: Tool with specific file type (language-specific tips)
262
+ if (msg.role === "assistant" && msg.toolCalls) {
263
+ for (const tc of msg.toolCalls) {
264
+ const filePath = (tc.input["file_path"] ?? tc.input["path"]) as string | undefined;
265
+ if (!filePath) continue;
266
+
267
+ const ext = filePath.split(".").pop()?.toLowerCase();
268
+ if (!ext) continue;
269
+
270
+ const langMap: Record<string, string> = {
271
+ ts: "TypeScript", js: "JavaScript", py: "Python",
272
+ rs: "Rust", go: "Go", java: "Java", kt: "Kotlin",
273
+ swift: "Swift", rb: "Ruby", php: "PHP",
274
+ };
275
+
276
+ const lang = langMap[ext];
277
+ if (lang && tc.name === "Edit") {
278
+ const intent = findPreviousUserMessage(messages, i);
279
+ const classified = intent ? classifyUserIntent(intent) : null;
280
+ if (classified) {
281
+ observations.push({
282
+ text: `[${classified}:${lang}] 코드 수정으로 해결`,
283
+ domain,
284
+ tags: [ext, lang.toLowerCase(), "edit"],
285
+ });
286
+ }
287
+ }
288
+ }
289
+ }
290
+ }
291
+
292
+ return observations;
293
+ }
294
+
295
+ // ═══════════════════════════════════════════════════════════════════
296
+ // STEP 2: CLUSTER — Group similar observations
297
+ // ═══════════════════════════════════════════════════════════════════
298
+
299
+ function clusterObservations(
300
+ memory: NexusMemory,
301
+ minClusterSize: number,
302
+ ): ObservationCluster[] {
303
+ const stats = memory.getStats();
304
+ if (stats.validObservations < minClusterSize) return [];
305
+
306
+ // Get all valid observations via L1 scan
307
+ const allObs = memory.scanIndex();
308
+ const clusters: ObservationCluster[] = [];
309
+ const assigned = new Set<string>();
310
+
311
+ // For each observation, find its semantic neighbors
312
+ for (const obs of allObs) {
313
+ if (assigned.has(obs.id)) continue;
314
+
315
+ // Search for similar observations
316
+ const results = memory.search(obs.content, 20);
317
+ const neighbors = results
318
+ .filter((r) => r.score > 0.3 && !assigned.has(r.observation.id))
319
+ .map((r) => r.observation);
320
+
321
+ if (neighbors.length < minClusterSize - 1) continue; // Not enough similar observations
322
+
323
+ // Form cluster
324
+ const members = [obs, ...neighbors];
325
+ for (const m of members) assigned.add(m.id);
326
+
327
+ // Find common keywords
328
+ const keywordCounts = new Map<string, number>();
329
+ for (const m of members) {
330
+ const words = tokenize(m.content);
331
+ const unique = new Set(words);
332
+ for (const w of unique) {
333
+ keywordCounts.set(w, (keywordCounts.get(w) ?? 0) + 1);
334
+ }
335
+ }
336
+ const commonKeywords = [...keywordCounts.entries()]
337
+ .filter(([, count]) => count >= Math.ceil(members.length * 0.5))
338
+ .sort(([, a], [, b]) => b - a)
339
+ .map(([word]) => word)
340
+ .slice(0, 10);
341
+
342
+ // Collect tools and domains
343
+ const commonTools = [...new Set(members.flatMap((m) => m.tags.filter((t) =>
344
+ ["bash", "edit", "read", "write", "grep", "agent", "websearch"].includes(t),
345
+ )))];
346
+
347
+ const domains = new Set(members.map((m) => m.domain));
348
+
349
+ clusters.push({
350
+ centroid: obs,
351
+ members,
352
+ commonKeywords,
353
+ commonTools,
354
+ domains,
355
+ avgConfidence: members.reduce((s, m) => s + m.confidence, 0) / members.length,
356
+ });
357
+ }
358
+
359
+ return clusters.sort((a, b) => b.members.length - a.members.length);
360
+ }
361
+
362
+ // ═══════════════════════════════════════════════════════════════════
363
+ // STEP 3-6: ABSTRACT, BRANCH, VALIDATE → Skill
364
+ // ═══════════════════════════════════════════════════════════════════
365
+
366
+ function clusterToSkill(cluster: ObservationCluster): MemorySkill | null {
367
+ const { members, commonKeywords, commonTools, domains } = cluster;
368
+
369
+ if (members.length < 2) return null;
370
+ if (commonKeywords.length < 2) return null;
371
+
372
+ // QUALITY GATE: Reject clusters that are just noise
373
+ const cleanKeywords = commonKeywords.filter((k) => !SKILL_NAME_NOISE.has(k) && k.length > 3);
374
+ if (cleanKeywords.length < 1) return null;
375
+
376
+ // Reject if all members are from the same 1-message context (not cross-session)
377
+ const uniqueSessions = new Set(members.map((m) => m.sourceSessionId).filter(Boolean));
378
+ // Allow single-session clusters only if they have 4+ members
379
+ if (uniqueSessions.size < 2 && members.length < 4) return null;
380
+
381
+ // ABSTRACT: Build the skill name and principle from common patterns
382
+ const name = buildSkillName(cleanKeywords, commonTools, members);
383
+ const situation = buildSituation(members);
384
+ const principle = buildPrinciple(members);
385
+ const reasoning = buildReasoning(members);
386
+
387
+ if (!name || !principle || principle.length < 20) return null;
388
+
389
+ // Reject principles that are just UI messages, not technical insights
390
+ const meaninglessPatterns = [
391
+ /^이\s*URL을/i, /^흠,?\s*몇/i, /^잠깐/i,
392
+ /^`\S+`\s*이름이/i, // "promptguard 이름이 이미 있는"
393
+ /^\|.*\|.*\|/, // Markdown tables
394
+ /^http/i, // Raw URLs
395
+ /^```/, // Code fences
396
+ ];
397
+ if (meaninglessPatterns.some((p) => p.test(principle))) return null;
398
+
399
+ // Must contain at least one actionable word
400
+ const actionable = /해야|하면|사용|필요|방법|대신|instead|should|use|need|avoid|better|always|never|경우|때는|위해/i;
401
+ if (!actionable.test(principle) && !actionable.test(name)) return null;
402
+
403
+ // BRANCH: Find conditions where approach differs
404
+ const conditions = findConditions(members);
405
+
406
+ // VALIDATE: Anti-patterns from corrections/errors
407
+ const antiPatterns = members
408
+ .filter((m) => m.content.includes("주의:") || m.content.includes("대신"))
409
+ .map((m) => {
410
+ const match = m.content.match(/주의:\s*"([^"]+)"/);
411
+ return match ? match[1] : m.content.slice(0, 60);
412
+ })
413
+ .slice(0, 3);
414
+
415
+ const timestamps = members.map((m) => m.createdAt).sort();
416
+
417
+ // Confidence: more members + more domains + validated = higher
418
+ const validated = members.filter((m) => m.tags.includes("validated")).length;
419
+ const confidence = Math.min(0.95,
420
+ 0.2 +
421
+ members.length * 0.05 +
422
+ domains.size * 0.1 +
423
+ validated * 0.15 +
424
+ (antiPatterns.length > 0 ? 0.1 : 0),
425
+ );
426
+
427
+ return {
428
+ id: createHash("sha256").update(name + situation).digest("hex").slice(0, 12),
429
+ name,
430
+ situation,
431
+ principle,
432
+ reasoning,
433
+ conditions,
434
+ antiPatterns,
435
+ evidenceCount: members.length,
436
+ domains: [...domains],
437
+ tools: commonTools,
438
+ confidence,
439
+ firstSeen: timestamps[0] ?? "",
440
+ lastSeen: timestamps[timestamps.length - 1] ?? "",
441
+ };
442
+ }
443
+
444
+ function buildSkillName(keywords: string[], tools: string[], members: Observation[]): string {
445
+ // Filter out noise keywords
446
+ const meaningful = keywords
447
+ .filter((k) => k.length > 3 && !SKILL_NAME_NOISE.has(k))
448
+ .slice(0, 3);
449
+
450
+ if (meaningful.length === 0) return "";
451
+
452
+ // Try to find intent from members
453
+ const intents = members
454
+ .map((m) => m.content.match(/^\[([^\]]+)\]/)?.[1])
455
+ .filter(Boolean) as string[];
456
+
457
+ const topIntent = mostFrequent(intents);
458
+ const toolStr = tools.length > 0 ? ` (${tools.slice(0, 2).join(", ")})` : "";
459
+
460
+ if (topIntent) {
461
+ return `${topIntent}: ${meaningful.slice(0, 2).join(", ")}${toolStr}`;
462
+ }
463
+
464
+ return `${meaningful.join(", ")}${toolStr}`;
465
+ }
466
+
467
+ function mostFrequent(arr: string[]): string | null {
468
+ if (arr.length === 0) return null;
469
+ const counts = new Map<string, number>();
470
+ for (const item of arr) counts.set(item, (counts.get(item) ?? 0) + 1);
471
+ return [...counts.entries()].sort(([, a], [, b]) => b - a)[0][0];
472
+ }
473
+
474
+ function buildSituation(members: Observation[]): string {
475
+ // Extract "상황:" parts from observations
476
+ const situations: string[] = [];
477
+ for (const m of members) {
478
+ const match = m.content.match(/상황:\s*(.{10,80}?)(?:\s*→|$)/);
479
+ if (match) situations.push(match[1].trim());
480
+ }
481
+
482
+ if (situations.length > 0) {
483
+ // Find common prefix/theme across situations
484
+ return situations[0].slice(0, 80);
485
+ }
486
+
487
+ // Fallback: use common topic
488
+ return members[0].topic ?? "일반적인 개발 상황";
489
+ }
490
+
491
+ function buildPrinciple(members: Observation[]): string {
492
+ // Priority 1: Find intent-tagged observations with tool approach
493
+ const intentTagged = members
494
+ .filter((m) => m.content.startsWith("["))
495
+ .map((m) => {
496
+ const match = m.content.match(/^\[[^\]]+\]\s*(.+)/);
497
+ return match ? match[1].trim() : null;
498
+ })
499
+ .filter(Boolean) as string[];
500
+
501
+ if (intentTagged.length > 0) {
502
+ // Find most common approach pattern
503
+ const approaches = new Map<string, number>();
504
+ for (const approach of intentTagged) {
505
+ approaches.set(approach, (approaches.get(approach) ?? 0) + 1);
506
+ }
507
+ const sorted = [...approaches.entries()].sort(([, a], [, b]) => b - a);
508
+ return sorted[0][0];
509
+ }
510
+
511
+ // Priority 2: Validated observations
512
+ const validated = members.find((m) => m.tags.includes("validated"));
513
+ if (validated) {
514
+ const clean = validated.content.replace(/^\[[^\]]+\]\s*/, "");
515
+ return clean.slice(0, 150);
516
+ }
517
+
518
+ // Priority 3: Error recovery observations
519
+ const recovery = members.find((m) => m.tags.includes("error-recovery"));
520
+ if (recovery) return recovery.content.slice(0, 150);
521
+
522
+ // Fallback: first member, cleaned
523
+ const clean = members[0].content.replace(/^\[[^\]]+\]\s*/, "");
524
+ return clean.slice(0, 150);
525
+ }
526
+
527
+ function buildReasoning(members: Observation[]): string {
528
+ const reasons: string[] = [];
529
+
530
+ // From error recoveries
531
+ const recoveries = members.filter((m) => m.tags.includes("error-recovery"));
532
+ if (recoveries.length > 0) {
533
+ reasons.push(`${recoveries.length}회 에러 복구 경험에서 학습`);
534
+ }
535
+
536
+ // From validations
537
+ const validations = members.filter((m) => m.tags.includes("validated"));
538
+ if (validations.length > 0) {
539
+ reasons.push(`${validations.length}회 성공 확인됨`);
540
+ }
541
+
542
+ // From corrections
543
+ const corrections = members.filter((m) => m.tags.includes("correction"));
544
+ if (corrections.length > 0) {
545
+ reasons.push(`${corrections.length}회 유저 수정 피드백 반영`);
546
+ }
547
+
548
+ // Cross-domain
549
+ const domains = new Set(members.map((m) => m.domain));
550
+ if (domains.size > 1) {
551
+ reasons.push(`${domains.size}개 프로젝트에서 반복 확인`);
552
+ }
553
+
554
+ return reasons.length > 0
555
+ ? reasons.join(". ") + "."
556
+ : `${members.length}개 관찰에서 공통 패턴 발견`;
557
+ }
558
+
559
+ function findConditions(members: Observation[]): SkillCondition[] {
560
+ const conditions: SkillCondition[] = [];
561
+
562
+ // Group by domain and check if approaches differ
563
+ const byDomain = new Map<string, Observation[]>();
564
+ for (const m of members) {
565
+ if (!byDomain.has(m.domain)) byDomain.set(m.domain, []);
566
+ byDomain.get(m.domain)!.push(m);
567
+ }
568
+
569
+ if (byDomain.size < 2) return conditions;
570
+
571
+ // Check if different domains have different approaches
572
+ const domainApproaches = new Map<string, string>();
573
+ for (const [domain, obs] of byDomain) {
574
+ const approach = obs[0].content.match(/접근:\s*(.{10,80})/)?.[1] ?? obs[0].content.slice(0, 60);
575
+ domainApproaches.set(domain, approach);
576
+ }
577
+
578
+ // Find divergent approaches
579
+ const approaches = [...domainApproaches.values()];
580
+ const first = approaches[0];
581
+ for (const [domain, approach] of domainApproaches) {
582
+ if (semanticSimilarity(first, approach) < 0.3) {
583
+ conditions.push({
584
+ when: `${domain} 프로젝트 컨텍스트`,
585
+ approach: approach.slice(0, 100),
586
+ evidence: byDomain.get(domain)?.length ?? 0,
587
+ });
588
+ }
589
+ }
590
+
591
+ return conditions;
592
+ }
593
+
594
+ // ═══════════════════════════════════════════════════════════════════
595
+ // MAIN PIPELINE
596
+ // ═══════════════════════════════════════════════════════════════════
597
+
598
+ /**
599
+ * Full memory-based skill extraction pipeline.
600
+ *
601
+ * 1. Ingest all sessions into nexus memory
602
+ * 2. Cluster similar observations
603
+ * 3. Promote clusters with 3+ members to skills
604
+ */
605
+ export function extractMemorySkills(
606
+ sessions: ParsedSession[],
607
+ dataDir: string,
608
+ minClusterSize = 3,
609
+ ): SkillExtractionResult {
610
+ const start = performance.now();
611
+ const memory = createNexusMemory(dataDir);
612
+
613
+ // Step 1: Ingest all sessions
614
+ let totalIngested = 0;
615
+ for (const session of sessions) {
616
+ const observations = extractActionObservations(session);
617
+ for (const obs of observations) {
618
+ const count = memory.ingest(obs.text, obs.domain, session.sessionId);
619
+ totalIngested += count;
620
+ }
621
+ }
622
+ memory.save();
623
+
624
+ // Step 2: Cluster for skills (need 3+ similar observations)
625
+ const clusters = clusterObservations(memory, minClusterSize);
626
+
627
+ // Step 3: Convert clusters to skills (strict gate)
628
+ const skills: MemorySkill[] = [];
629
+ for (const cluster of clusters) {
630
+ const skill = clusterToSkill(cluster);
631
+ if (skill) skills.push(skill);
632
+ }
633
+
634
+ // Step 4: Extract tips — smaller clusters (2+) with actionable content
635
+ const tips = extractTips(memory);
636
+
637
+ // Step 5: Extract facts — frequently accessed observations
638
+ const facts = extractFacts(memory);
639
+
640
+ const durationMs = Math.round(performance.now() - start);
641
+
642
+ return {
643
+ skills: skills.sort((a, b) => b.confidence - a.confidence),
644
+ tips: tips.sort((a, b) => b.confidence - a.confidence),
645
+ facts: facts.sort((a, b) => b.referenceCount - a.referenceCount),
646
+ observationsIngested: totalIngested,
647
+ clustersFormed: clusters.length,
648
+ durationMs,
649
+ };
650
+ }
651
+
652
+ // ═══════════════════════════════════════════════════════════════════
653
+ // TIPS — Short, actionable advice from single observations
654
+ // ═══════════════════════════════════════════════════════════════════
655
+
656
+ /** Patterns that indicate a tip-worthy observation. */
657
+ const TIP_PATTERNS: [string, RegExp][] = [
658
+ ["명령어", /에서는?\s+.{5,40}(?:해야|필요|써야|붙여야|사용해야)/i],
659
+ ["command", /(?:use|need|must|should|always|never)\s+.{5,50}/i],
660
+ ["경로", /경로는?\s+.{5,40}/i],
661
+ ["해결", /(?:해결|fix|solved|resolved).{5,40}(?:으로|by|with|via)/i],
662
+ ["설정", /(?:설정|config|set).{5,30}(?:해야|to|=)/i],
663
+ ["주의", /(?:주의|caution|warning|avoid|don't).{5,40}/i],
664
+ ["대신", /.{5,30}대신\s+.{5,30}/i],
665
+ ["instead", /.{5,30}instead of\s+.{5,30}/i],
666
+ ];
667
+
668
+ function extractTips(memory: NexusMemory): Tip[] {
669
+ const tips: Tip[] = [];
670
+ const allObs = memory.scanIndex();
671
+
672
+ for (const obs of allObs) {
673
+ if (!obs.valid) continue;
674
+ const content = obs.content;
675
+
676
+ // Must be short-ish (tip, not essay)
677
+ if (content.length < 15 || content.length > 200) continue;
678
+
679
+ // Skip noise
680
+ if (content.startsWith("[수정]")) continue; // Corrections go to skills
681
+ if (/^\[.*\]\s*(명령어|파일|패턴|코드|에이전트)/.test(content)) continue; // Tool approaches go to skills
682
+
683
+ // Check if it matches tip patterns
684
+ let tipTrigger = "";
685
+ let matched = false;
686
+ for (const [trigger, pattern] of TIP_PATTERNS) {
687
+ if (pattern.test(content)) {
688
+ tipTrigger = trigger;
689
+ matched = true;
690
+ break;
691
+ }
692
+ }
693
+
694
+ if (!matched) continue;
695
+
696
+ // Must not be pure noise
697
+ if (SKILL_NAME_NOISE.has(content.split(/\s/)[0].toLowerCase())) continue;
698
+
699
+ // Must not contain markdown tables, long dashes, or quote fragments
700
+ if (/^\||\|---|대신\s+.{0,5}$|^\*\*|^>/.test(content)) continue;
701
+
702
+ // Extract the advice
703
+ const advice = content
704
+ .replace(/^\[[^\]]+\]\s*/, "") // Remove intent tags
705
+ .replace(/\s+/g, " ")
706
+ .trim();
707
+
708
+ if (advice.length < 15) continue;
709
+
710
+ tips.push({
711
+ id: obs.id,
712
+ tier: "tip",
713
+ name: `${tipTrigger}: ${advice.slice(0, 40)}`,
714
+ content: advice,
715
+ advice,
716
+ trigger: tipTrigger,
717
+ domains: [obs.domain],
718
+ tags: obs.tags,
719
+ evidenceCount: 1,
720
+ confidence: Math.min(0.8, obs.confidence + 0.1),
721
+ firstSeen: obs.createdAt,
722
+ lastSeen: obs.accessedAt,
723
+ });
724
+ }
725
+
726
+ // Deduplicate similar tips
727
+ return deduplicateTips(tips).slice(0, 50);
728
+ }
729
+
730
+ function deduplicateTips(tips: Tip[]): Tip[] {
731
+ const unique: Tip[] = [];
732
+
733
+ for (const tip of tips) {
734
+ // Check semantic similarity against all existing tips
735
+ const isDuplicate = unique.some((existing) =>
736
+ semanticSimilarity(existing.advice, tip.advice) > 0.4,
737
+ );
738
+ if (!isDuplicate) unique.push(tip);
739
+ }
740
+
741
+ return unique;
742
+ }
743
+
744
+ // ═══════════════════════════════════════════════════════════════════
745
+ // FACTS — Frequently referenced knowledge
746
+ // ═══════════════════════════════════════════════════════════════════
747
+
748
+ function extractFacts(memory: NexusMemory): Fact[] {
749
+ const facts: Fact[] = [];
750
+ const allObs = memory.scanIndex();
751
+
752
+ for (const obs of allObs) {
753
+ if (!obs.valid) continue;
754
+
755
+ // Facts are short, declarative statements
756
+ if (obs.content.length < 10 || obs.content.length > 150) continue;
757
+
758
+ // Allow initial facts (accessCount 0) if content is clearly factual
759
+ // Facts grow in confidence as they get accessed more
760
+
761
+ // Must look like a fact (declarative, not procedural)
762
+ const content = obs.content.replace(/^\[[^\]]+\]\s*/, "").trim();
763
+
764
+ // Fact patterns: "X는 Y이다", "X uses Y", paths, versions, configs
765
+ const factPatterns = [
766
+ /^.{3,20}(?:는|은|이)\s+.{3,40}(?:이다|입니다|다|임)/i, // Korean declarative
767
+ /경로|path|url|port|version|버전/i, // Reference info
768
+ /기본값|default|기본\s*설정/i, // Defaults
769
+ /호환|compatible|support|지원/i, // Compatibility
770
+ ];
771
+
772
+ const isFact = factPatterns.some((p) => p.test(content));
773
+ if (!isFact) continue;
774
+
775
+ // Must not be noise
776
+ if (/^\||^```|^http|^#|^\*\*|^>|대신|하지만|근본적/.test(content)) continue;
777
+ if (content.length < 15) continue;
778
+ // Must not be a sentence fragment (ends with proper punctuation or is self-contained)
779
+ if (/[:\-—]$/.test(content.trim())) continue;
780
+
781
+ facts.push({
782
+ id: obs.id,
783
+ tier: "fact",
784
+ name: content.slice(0, 40),
785
+ content,
786
+ statement: content,
787
+ referenceCount: obs.accessCount + 1,
788
+ domains: [obs.domain],
789
+ tags: obs.tags,
790
+ evidenceCount: 1,
791
+ confidence: obs.confidence,
792
+ firstSeen: obs.createdAt,
793
+ lastSeen: obs.accessedAt,
794
+ });
795
+ }
796
+
797
+ return facts.slice(0, 30);
798
+ }
799
+
800
+ // ═══════════════════════════════════════════════════════════════════
801
+ // OBSIDIAN RENDERER
802
+ // ═══════════════════════════════════════════════════════════════════
803
+
804
+ export function renderMemorySkillMarkdown(skill: MemorySkill): string {
805
+ const lines: string[] = [];
806
+
807
+ lines.push("---");
808
+ lines.push(`type: memory-skill`);
809
+ lines.push(`name: "${skill.name.slice(0, 60)}"`);
810
+ lines.push(`confidence: ${skill.confidence.toFixed(2)}`);
811
+ lines.push(`evidence: ${skill.evidenceCount}`);
812
+ lines.push(`domains: [${skill.domains.map((d) => `"${d}"`).join(", ")}]`);
813
+ lines.push(`tools: [${skill.tools.map((t) => `"${t}"`).join(", ")}]`);
814
+ lines.push(`tags: [nexus/skill]`);
815
+ lines.push("---");
816
+ lines.push("");
817
+ lines.push(`# ${skill.name}`);
818
+ lines.push("");
819
+ lines.push(`> 확신도: ${(skill.confidence * 100).toFixed(0)}% | 증거: ${skill.evidenceCount}개 | 도메인: ${skill.domains.join(", ")}`);
820
+ lines.push("");
821
+
822
+ lines.push("## 상황");
823
+ lines.push("");
824
+ lines.push(skill.situation);
825
+ lines.push("");
826
+
827
+ lines.push("## 원칙");
828
+ lines.push("");
829
+ lines.push(skill.principle);
830
+ lines.push("");
831
+
832
+ lines.push("## 이유");
833
+ lines.push("");
834
+ lines.push(skill.reasoning);
835
+ lines.push("");
836
+
837
+ if (skill.conditions.length > 0) {
838
+ lines.push("## 조건별 분기");
839
+ lines.push("");
840
+ for (const cond of skill.conditions) {
841
+ lines.push(`- **${cond.when}**: ${cond.approach} (증거 ${cond.evidence}개)`);
842
+ }
843
+ lines.push("");
844
+ }
845
+
846
+ if (skill.antiPatterns.length > 0) {
847
+ lines.push("## 하지 말 것");
848
+ lines.push("");
849
+ for (const ap of skill.antiPatterns) {
850
+ lines.push(`- ~~${ap}~~`);
851
+ }
852
+ lines.push("");
853
+ }
854
+
855
+ return lines.join("\n");
856
+ }
857
+
858
+ /** Render all knowledge (skills + tips + facts) as a single Obsidian page. */
859
+ export function renderKnowledgeBase(result: SkillExtractionResult): string {
860
+ const lines: string[] = [];
861
+
862
+ lines.push("---");
863
+ lines.push("type: knowledge-base");
864
+ lines.push(`generated: "${new Date().toISOString()}"`);
865
+ lines.push(`skills: ${result.skills.length}`);
866
+ lines.push(`tips: ${result.tips.length}`);
867
+ lines.push(`facts: ${result.facts.length}`);
868
+ lines.push("tags: [nexus/knowledge]");
869
+ lines.push("---");
870
+ lines.push("");
871
+ lines.push("# Nexus Knowledge Base");
872
+ lines.push("");
873
+ lines.push(`> ${result.skills.length} skills | ${result.tips.length} tips | ${result.facts.length} facts | ${result.observationsIngested} observations`);
874
+ lines.push("");
875
+
876
+ // Skills
877
+ if (result.skills.length > 0) {
878
+ lines.push("## Skills");
879
+ lines.push("");
880
+ for (const s of result.skills) {
881
+ lines.push(`### ${s.name}`);
882
+ lines.push(`> ${(s.confidence * 100).toFixed(0)}% confidence | ${s.evidenceCount} evidence | ${s.domains.join(", ")}`);
883
+ lines.push("");
884
+ lines.push(`**상황**: ${s.situation}`);
885
+ lines.push("");
886
+ lines.push(`**원칙**: ${s.principle}`);
887
+ lines.push("");
888
+ lines.push(`**이유**: ${s.reasoning}`);
889
+ if (s.antiPatterns.length > 0) {
890
+ lines.push("");
891
+ lines.push("**하지 말 것:**");
892
+ for (const ap of s.antiPatterns) lines.push(`- ~~${ap}~~`);
893
+ }
894
+ lines.push("");
895
+ }
896
+ }
897
+
898
+ // Tips
899
+ if (result.tips.length > 0) {
900
+ lines.push("## Tips");
901
+ lines.push("");
902
+ for (const t of result.tips) {
903
+ lines.push(`- 💡 **${t.trigger}**: ${t.advice}`);
904
+ }
905
+ lines.push("");
906
+ }
907
+
908
+ // Facts
909
+ if (result.facts.length > 0) {
910
+ lines.push("## Facts");
911
+ lines.push("");
912
+ for (const f of result.facts) {
913
+ lines.push(`- 📌 ${f.statement}`);
914
+ }
915
+ lines.push("");
916
+ }
917
+
918
+ return lines.join("\n");
919
+ }
920
+
921
+ // ═══════════════════════════════════════════════════════════════════
922
+ // HELPERS
923
+ // ═══════════════════════════════════════════════════════════════════
924
+
925
+ import { STOP_WORDS } from "../shared/stop-words.js";
926
+
927
+ /** Classify user intent into abstract category. */
928
+ function classifyUserIntent(text: string): string | null {
929
+ const lower = text.toLowerCase();
930
+ const patterns: [string, RegExp][] = [
931
+ ["코드 리뷰", /리뷰|review|검토|봐봐|봐줘|체크/i],
932
+ ["버그 수정", /fix|고치|수정|bug|에러|error|안돼|doesn.t work/i],
933
+ ["기능 구현", /만들|create|build|add|추가|implement|구현/i],
934
+ ["리팩토링", /refactor|리팩토|정리|clean|개선/i],
935
+ ["보안 분석", /보안|security|취약|vulnerab|audit|감사|scan/i],
936
+ ["배포", /deploy|배포|publish|push|release|npm/i],
937
+ ["테스트", /test|테스트|검증|verify/i],
938
+ ["디버깅", /debug|디버그|trace|로그|log/i],
939
+ ["설정", /설정|setup|install|config|설치/i],
940
+ ["분석", /분석|analyze|조사|찾아|search|scan/i],
941
+ ["문서화", /문서|doc|readme|설명/i],
942
+ ["최적화", /최적화|optimize|performance|성능|빠르/i],
943
+ ["아키텍처 매핑", /map|매핑|구조|architecture|onboard/i],
944
+ ];
945
+
946
+ for (const [intent, pattern] of patterns) {
947
+ if (pattern.test(lower)) return intent;
948
+ }
949
+
950
+ // Skip if no clear intent (avoids noise)
951
+ return null;
952
+ }
953
+
954
+ /** Noise keywords that should never appear in skill names. */
955
+ const SKILL_NAME_NOISE = new Set([
956
+ "users", "hawon", "home", "mnt", "tmp", "claude", "dist", "src",
957
+ "node", "npm", "git", "http", "https", "com", "org", "json",
958
+ "file", "path", "data", "test", "true", "false", "null",
959
+ "있습니다", "합니다", "입니다", "습니다", "했습니다",
960
+ "대신", "주의", "에러", "하지", "그리고",
961
+ ]);
962
+
963
+ function tokenize(text: string): string[] {
964
+ return text.toLowerCase().replace(/[^a-z가-힣0-9\s]/g, " ").split(/\s+/)
965
+ .filter((w) => w.length > 2 && !STOP_WORDS.has(w));
966
+ }
967
+
968
+ function findPreviousUserMessage(messages: ParsedMessage[], index: number): string | null {
969
+ for (let i = index - 1; i >= Math.max(0, index - 4); i--) {
970
+ if (messages[i].role === "user" && messages[i].content.length > 10) {
971
+ if (messages[i].content.startsWith("<")) continue;
972
+ return messages[i].content;
973
+ }
974
+ }
975
+ return null;
976
+ }
977
+
978
+ function findPreviousAssistantMessage(messages: ParsedMessage[], index: number): ParsedMessage | null {
979
+ for (let i = index - 1; i >= Math.max(0, index - 3); i--) {
980
+ if (messages[i].role === "assistant" && messages[i].content.length > 10) return messages[i];
981
+ }
982
+ return null;
983
+ }
984
+
985
+ function findNextAssistantMessage(messages: ParsedMessage[], index: number): ParsedMessage | null {
986
+ for (let i = index + 1; i < Math.min(index + 4, messages.length); i++) {
987
+ if (messages[i].role === "assistant" && messages[i].content.length > 10) return messages[i];
988
+ }
989
+ return null;
990
+ }
991
+
992
+ function describeApproach(toolCalls: { name: string; input: Record<string, unknown> }[]): string {
993
+ const steps: string[] = [];
994
+ const seen = new Set<string>();
995
+
996
+ for (const tc of toolCalls) {
997
+ if (seen.has(tc.name)) continue;
998
+ seen.add(tc.name);
999
+
1000
+ switch (tc.name) {
1001
+ case "Bash": steps.push("명령어로 상태 확인"); break;
1002
+ case "Read": steps.push("파일 구조 파악"); break;
1003
+ case "Grep": steps.push("패턴 검색"); break;
1004
+ case "Edit": steps.push("코드 수정"); break;
1005
+ case "Write": steps.push("새 파일 생성"); break;
1006
+ case "Agent": steps.push("에이전트 위임"); break;
1007
+ case "WebSearch": steps.push("웹 검색"); break;
1008
+ default: steps.push(`${tc.name} 사용`);
1009
+ }
1010
+ }
1011
+
1012
+ return steps.join(" → ");
1013
+ }
1014
+
1015
+ function describeMessage(msg: ParsedMessage): string {
1016
+ if (msg.toolCalls && msg.toolCalls.length > 0) {
1017
+ return describeApproach(msg.toolCalls);
1018
+ }
1019
+ return msg.content.slice(0, 80);
1020
+ }
1021
+
1022
+ function isCorrection(text: string): boolean {
1023
+ return /아니|말고|대신|다시|바꿔|아닌데|그거\s*말고|no[,.]?\s*(not|don't|instead)|wait|actually|wrong|instead/i.test(text);
1024
+ }
1025
+
1026
+ function isPositiveFeedback(text: string): boolean {
1027
+ return /좋아|완벽|됐어|됐다|고마워|감사|ㅇㅇ|ㄱㄱ|ㅇㅋ|good|perfect|great|thanks|nice|works|다음|이제|next/i.test(text);
1028
+ }
1029
+
1030
+ function extractTags(text: string): string[] {
1031
+ const tags: string[] = [];
1032
+ const patterns: [string, RegExp][] = [
1033
+ ["security", /보안|security|취약|exploit|injection/i],
1034
+ ["testing", /테스트|test|coverage/i],
1035
+ ["devops", /deploy|배포|docker|npm/i],
1036
+ ["debug", /debug|에러|error|fix/i],
1037
+ ["refactor", /refactor|리팩토|정리|clean/i],
1038
+ ["git", /git|commit|push|pr/i],
1039
+ ];
1040
+ for (const [tag, pattern] of patterns) {
1041
+ if (pattern.test(text)) tags.push(tag);
1042
+ }
1043
+ return tags;
1044
+ }