@chigichan24/crune 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,10 +10,10 @@
10
10
  import * as fs from "node:fs";
11
11
  import * as path from "node:path";
12
12
  import * as os from "node:os";
13
- import { buildSemanticKnowledgeGraph, } from "./knowledge-graph-builder.js";
14
- import { buildSynthesisPrompt, synthesizeWithClaude } from "./skill-synthesizer.js";
13
+ import { buildSemanticKnowledgeGraph, readFacetsDir, aggregateFacetsForTopic, } from "./knowledge-graph-builder.js";
14
+ import { buildSynthesisPrompt, synthesizeWithClaude, stripSynthesisPreamble } from "./skill-synthesizer.js";
15
15
  import { generateSessionSummary } from "./session-summarizer.js";
16
- import { discoverSessions, parseJsonlFile, buildTurns, extractMetadata, parseSubagents, loadLinkedPlan, } from "./session-parser.js";
16
+ import { discoverSessions, parseJsonlFile, buildTurns, extractMetadata, parseSubagents, loadLinkedPlan, isNonInteractiveSession, } from "./session-parser.js";
17
17
  function parseArgs() {
18
18
  const args = process.argv.slice(2);
19
19
  let sessionsDir = path.join(os.homedir(), ".claude", "projects");
@@ -21,6 +21,8 @@ function parseArgs() {
21
21
  let skipSynthesis = false;
22
22
  let synthesisModel;
23
23
  let synthesisCount = 5;
24
+ let facetsDir = path.join(os.homedir(), ".claude", "usage-data", "facets");
25
+ let skipFacets = false;
24
26
  for (let i = 0; i < args.length; i++) {
25
27
  if (args[i] === "--sessions-dir" && args[i + 1]) {
26
28
  sessionsDir = path.resolve(args[++i]);
@@ -31,17 +33,32 @@ function parseArgs() {
31
33
  else if (args[i] === "--skip-synthesis") {
32
34
  skipSynthesis = true;
33
35
  }
36
+ else if (args[i] === "--skip-synthesize") {
37
+ skipSynthesis = true;
38
+ }
34
39
  else if (args[i] === "--synthesis-model" && args[i + 1]) {
35
40
  synthesisModel = args[++i];
36
41
  }
42
+ else if (args[i] === "--synthesize-model" && args[i + 1]) {
43
+ synthesisModel = args[++i];
44
+ }
37
45
  else if (args[i] === "--synthesis-count" && args[i + 1]) {
38
46
  synthesisCount = Math.max(1, parseInt(args[++i], 10) || 5);
39
47
  }
48
+ else if (args[i] === "--synthesize-count" && args[i + 1]) {
49
+ synthesisCount = Math.max(1, parseInt(args[++i], 10) || 5);
50
+ }
51
+ else if (args[i] === "--facets-dir" && args[i + 1]) {
52
+ facetsDir = path.resolve(args[++i]);
53
+ }
54
+ else if (args[i] === "--skip-facets") {
55
+ skipFacets = true;
56
+ }
40
57
  }
41
- return { sessionsDir, outputDir, skipSynthesis, synthesisModel, synthesisCount };
58
+ return { sessionsDir, outputDir, skipSynthesis, synthesisModel, synthesisCount, facetsDir, skipFacets };
42
59
  }
43
60
  // ─── Task 1.5: index.json Generation ────────────────────────────────────────
44
- function generateIndex(sessions) {
61
+ function generateIndex(sessions, facetsMap) {
45
62
  const projectMap = new Map();
46
63
  const sessionSummaries = sessions.map((s) => {
47
64
  const existing = projectMap.get(s.projectDisplayName) || {
@@ -69,7 +86,7 @@ function generateIndex(sessions) {
69
86
  durationMinutes: s.meta.durationMinutes,
70
87
  turnCount: s.meta.turnCount,
71
88
  toolBreakdown: s.meta.toolBreakdown,
72
- firstUserPrompt: s.meta.firstUserPrompt,
89
+ firstUserPrompt: facetsMap?.get(s.meta.sessionId)?.briefSummary || s.meta.firstUserPrompt,
73
90
  summaryText: summaryInfo.summary,
74
91
  keywords: summaryInfo.keywords,
75
92
  scope: summaryInfo.scope,
@@ -250,7 +267,9 @@ async function generateOverview(sessions, synthesisConfig = { skip: false, count
250
267
  subagentCount: s.meta.subagentCount,
251
268
  },
252
269
  }));
253
- const knowledgeGraph = buildSemanticKnowledgeGraph(sessionInputs);
270
+ const knowledgeGraph = buildSemanticKnowledgeGraph(sessionInputs, {
271
+ facetsDir: synthesisConfig.facetsDir,
272
+ });
254
273
  // Top files
255
274
  const topFiles = [...fileEditCounts.entries()]
256
275
  .sort((a, b) => b[1] - a[1])
@@ -337,16 +356,21 @@ async function generateOverview(sessions, synthesisConfig = { skip: false, count
337
356
  const topicSessionSet = new Set(topic.sessionIds);
338
357
  const relatedSequences = knowledgeGraph.enrichedToolSequences.filter((seq) => seq.sessionIds.some((sid) => topicSessionSet.has(sid)));
339
358
  console.error(`[crune] [${i + 1}/${total}] ${topic.label}...`);
359
+ // Build facets insights for this topic if facets data is available
360
+ const facetsInsights = synthesisConfig.facetsDir
361
+ ? aggregateFacetsForTopic(topic.sessionIds, readFacetsDir(synthesisConfig.facetsDir))
362
+ : undefined;
340
363
  const prompt = buildSynthesisPrompt({
341
364
  skillCandidate: candidate,
342
365
  topicNode: topic,
343
366
  enrichedSequences: relatedSequences,
367
+ facetsInsights: facetsInsights,
344
368
  });
345
369
  const result = await synthesizeWithClaude(prompt, synthOpts);
346
370
  if (result.success) {
347
371
  const original = knowledgeGraph.skillCandidates.find((sc) => sc.topicId === candidate.topicId);
348
372
  if (original) {
349
- original.synthesizedMarkdown = result.stdout;
373
+ original.synthesizedMarkdown = stripSynthesisPreamble(result.stdout);
350
374
  }
351
375
  console.error(`[crune] [${i + 1}/${total}] Done.`);
352
376
  }
@@ -386,9 +410,21 @@ function getWeekLabel(date) {
386
410
  // buildKnowledgeGraphEdges removed — replaced by buildSemanticKnowledgeGraph
387
411
  // ─── Main Pipeline ──────────────────────────────────────────────────────────
388
412
  async function main() {
389
- const { sessionsDir, outputDir, skipSynthesis, synthesisModel, synthesisCount } = parseArgs();
413
+ const { sessionsDir, outputDir, skipSynthesis, synthesisModel, synthesisCount, facetsDir, skipFacets } = parseArgs();
390
414
  console.error(`[crune] Sessions dir: ${sessionsDir}`);
391
415
  console.error(`[crune] Output dir: ${outputDir}`);
416
+ console.error(`[crune] Facets dir: ${skipFacets ? "(skipped)" : facetsDir}`);
417
+ // Step 0: Refresh /insights data if facets are enabled
418
+ if (!skipFacets) {
419
+ console.error(`\n[crune] Refreshing /insights data...`);
420
+ const refreshResult = await synthesizeWithClaude("/insights", { timeoutMs: 300_000 });
421
+ if (refreshResult.success) {
422
+ console.error(`[crune] /insights data refreshed.`);
423
+ }
424
+ else {
425
+ console.error(`[crune] /insights refresh failed (continuing without): ${refreshResult.error ?? "unknown"}`);
426
+ }
427
+ }
392
428
  // Step 1: Discover sessions
393
429
  console.error(`\n[crune] Discovering sessions...`);
394
430
  const sessionFiles = discoverSessions(sessionsDir);
@@ -397,11 +433,17 @@ async function main() {
397
433
  console.error("[crune] No sessions found. Exiting.");
398
434
  process.exit(1);
399
435
  }
436
+ // Step 1.5: Filter out non-interactive sessions (claude -p synthesis, /insights)
437
+ const interactiveSessions = sessionFiles.filter((sf) => !isNonInteractiveSession(sf.filePath));
438
+ const skippedCount = sessionFiles.length - interactiveSessions.length;
439
+ if (skippedCount > 0) {
440
+ console.error(`[crune] Skipped ${skippedCount} non-interactive sessions (claude -p)`);
441
+ }
400
442
  // Step 2: Parse each session with metadata and subagents
401
443
  const parsedSessions = [];
402
- for (let i = 0; i < sessionFiles.length; i++) {
403
- const sf = sessionFiles[i];
404
- console.error(`[crune] Processing session ${i + 1}/${sessionFiles.length}: ${sf.sessionId}`);
444
+ for (let i = 0; i < interactiveSessions.length; i++) {
445
+ const sf = interactiveSessions[i];
446
+ console.error(`[crune] Processing session ${i + 1}/${interactiveSessions.length}: ${sf.sessionId}`);
405
447
  try {
406
448
  // Parse main JSONL
407
449
  const lines = await parseJsonlFile(sf.filePath);
@@ -439,7 +481,8 @@ async function main() {
439
481
  fs.mkdirSync(outputDir, { recursive: true });
440
482
  fs.mkdirSync(path.join(outputDir, "detail"), { recursive: true });
441
483
  // index.json
442
- const indexData = generateIndex(parsedSessions);
484
+ const indexFacetsMap = skipFacets ? undefined : readFacetsDir(facetsDir);
485
+ const indexData = generateIndex(parsedSessions, indexFacetsMap);
443
486
  const indexPath = path.join(outputDir, "index.json");
444
487
  fs.writeFileSync(indexPath, JSON.stringify(indexData, null, 2));
445
488
  const indexSize = fs.statSync(indexPath).size;
@@ -458,6 +501,7 @@ async function main() {
458
501
  skip: skipSynthesis,
459
502
  model: synthesisModel,
460
503
  count: synthesisCount,
504
+ facetsDir: skipFacets ? undefined : facetsDir,
461
505
  });
462
506
  const overviewPath = path.join(outputDir, "overview.json");
463
507
  fs.writeFileSync(overviewPath, JSON.stringify(overviewData, null, 2));
package/dist-cli/cli.js CHANGED
@@ -7,7 +7,7 @@ import * as path from "node:path";
7
7
  import * as os from "node:os";
8
8
  import { discoverSessions, parseJsonlFile, buildTurns, extractMetadata, parseSubagents, } from "./session-parser.js";
9
9
  import { buildSemanticKnowledgeGraph, } from "./knowledge-graph-builder.js";
10
- import { buildSynthesisPrompt, synthesizeWithClaude, } from "./skill-synthesizer.js";
10
+ import { buildSynthesisPrompt, synthesizeWithClaude, stripSynthesisPreamble, } from "./skill-synthesizer.js";
11
11
  export function parseCliArgs(argv) {
12
12
  const args = argv.slice(2);
13
13
  let sessionsDir = path.join(os.homedir(), ".claude", "projects");
@@ -168,7 +168,7 @@ async function main() {
168
168
  model: config.model,
169
169
  });
170
170
  if (result.success) {
171
- markdown = result.stdout;
171
+ markdown = stripSynthesisPreamble(result.stdout);
172
172
  console.error(` Synthesized`);
173
173
  }
174
174
  else {
@@ -1,6 +1,7 @@
1
1
  /**
2
2
  * Agglomerative clustering with average linkage and automatic elbow detection.
3
3
  */
4
+ import { normalizeGoalCategory } from "./facets-reader.js";
4
5
  export function agglomerativeClusteringFromDistMatrix(sessionIds, precomputedDist) {
5
6
  const n = sessionIds.length;
6
7
  if (n === 0)
@@ -172,3 +173,110 @@ export function splitOversizedClusters(clusters, totalSessions, precomputedDist,
172
173
  }
173
174
  return result;
174
175
  }
176
+ /**
177
+ * Merge narrow (≤ maxNarrowSize sessions) clusters that share normalized goal
178
+ * categories from facets data. This addresses the "too narrow scope" issue
179
+ * where the clustering algorithm creates single-session topics.
180
+ *
181
+ * Algorithm:
182
+ * 1. Identify narrow clusters (≤ maxNarrowSize members)
183
+ * 2. For each pair of narrow clusters, check if they share normalized goal categories
184
+ * 3. Merge if they share ≥1 category AND average inter-cluster distance < distanceThreshold
185
+ * 4. Stop merging if merged size exceeds maxMergedSize
186
+ */
187
+ export function mergeNarrowClusters(clusters, sessionIds, facetsMap, precomputedDist, maxNarrowSize = 2, distanceThreshold = 0.7, maxMergedSize = 8) {
188
+ // Separate clusters into narrow and large
189
+ const largeClusters = [];
190
+ const narrowClusters = [];
191
+ for (const cluster of clusters) {
192
+ if (cluster.length > maxNarrowSize) {
193
+ largeClusters.push(cluster);
194
+ }
195
+ else {
196
+ narrowClusters.push([...cluster]);
197
+ }
198
+ }
199
+ // If no narrow clusters, return as-is
200
+ if (narrowClusters.length === 0) {
201
+ return clusters;
202
+ }
203
+ // For each narrow cluster, collect normalized goal categories
204
+ const clusterCategories = narrowClusters.map((cluster) => {
205
+ const cats = new Set();
206
+ for (const idx of cluster) {
207
+ const sid = sessionIds[idx];
208
+ const facets = facetsMap.get(sid);
209
+ if (facets && facets.goalCategories) {
210
+ for (const rawCat of Object.keys(facets.goalCategories)) {
211
+ cats.add(normalizeGoalCategory(rawCat));
212
+ }
213
+ }
214
+ }
215
+ return cats;
216
+ });
217
+ // Helper: compute average inter-cluster distance
218
+ const avgInterClusterDist = (clusterA, clusterB) => {
219
+ let sum = 0;
220
+ let count = 0;
221
+ for (const i of clusterA) {
222
+ for (const j of clusterB) {
223
+ const lo = Math.min(i, j);
224
+ const hi = Math.max(i, j);
225
+ const d = precomputedDist.get(`${lo}:${hi}`) ?? 1.0;
226
+ sum += d;
227
+ count++;
228
+ }
229
+ }
230
+ return count > 0 ? sum / count : 1.0;
231
+ };
232
+ // Helper: check if two category sets share at least one category
233
+ const sharesCategory = (a, b) => {
234
+ for (const cat of a) {
235
+ if (b.has(cat))
236
+ return true;
237
+ }
238
+ return false;
239
+ };
240
+ // Track which narrow clusters have been consumed
241
+ const consumed = new Set();
242
+ // Greedily merge narrow clusters
243
+ for (let i = 0; i < narrowClusters.length; i++) {
244
+ if (consumed.has(i))
245
+ continue;
246
+ // Skip if this cluster has no facets categories
247
+ if (clusterCategories[i].size === 0)
248
+ continue;
249
+ for (let j = i + 1; j < narrowClusters.length; j++) {
250
+ if (consumed.has(j))
251
+ continue;
252
+ // Skip if target cluster has no facets categories
253
+ if (clusterCategories[j].size === 0)
254
+ continue;
255
+ // Check shared categories
256
+ if (!sharesCategory(clusterCategories[i], clusterCategories[j]))
257
+ continue;
258
+ // Check merged size
259
+ if (narrowClusters[i].length + narrowClusters[j].length > maxMergedSize)
260
+ continue;
261
+ // Check distance
262
+ const dist = avgInterClusterDist(narrowClusters[i], narrowClusters[j]);
263
+ if (dist >= distanceThreshold)
264
+ continue;
265
+ // Merge j into i
266
+ narrowClusters[i].push(...narrowClusters[j]);
267
+ // Merge categories
268
+ for (const cat of clusterCategories[j]) {
269
+ clusterCategories[i].add(cat);
270
+ }
271
+ consumed.add(j);
272
+ }
273
+ }
274
+ // Collect surviving narrow clusters
275
+ const mergedNarrow = [];
276
+ for (let i = 0; i < narrowClusters.length; i++) {
277
+ if (!consumed.has(i)) {
278
+ mergedNarrow.push(narrowClusters[i]);
279
+ }
280
+ }
281
+ return [...largeClusters, ...mergedNarrow];
282
+ }
@@ -0,0 +1,188 @@
1
+ /**
2
+ * Reads facets JSON files from /insights directory and provides
3
+ * utilities for normalization and aggregation.
4
+ */
5
+ import { readFileSync, readdirSync, existsSync } from "node:fs";
6
+ import { join } from "node:path";
7
+ // ─── Goal category normalization ────────────────────────────────────────────
8
+ export function normalizeGoalCategory(raw) {
9
+ if (raw.startsWith("feature") ||
10
+ raw.startsWith("ui_") ||
11
+ raw.startsWith("css_")) {
12
+ return "feature";
13
+ }
14
+ if (raw.startsWith("fix_") ||
15
+ raw.startsWith("bug_") ||
16
+ raw.startsWith("debug")) {
17
+ return "bugfix";
18
+ }
19
+ if (raw.startsWith("refactoring") ||
20
+ raw === "code_cleanup" ||
21
+ raw.startsWith("cleanup_")) {
22
+ return "refactoring";
23
+ }
24
+ if (raw.startsWith("documentation") || raw.startsWith("readme_")) {
25
+ return "documentation";
26
+ }
27
+ if (raw.startsWith("code_review")) {
28
+ return "review";
29
+ }
30
+ if (raw.startsWith("test_")) {
31
+ return "testing";
32
+ }
33
+ if (raw.startsWith("ci_")) {
34
+ return "ci";
35
+ }
36
+ if (raw.startsWith("git_") ||
37
+ raw === "create_pr" ||
38
+ raw.startsWith("pr_") ||
39
+ raw.startsWith("resolve_merge")) {
40
+ return "git_ops";
41
+ }
42
+ if (raw.startsWith("setup_") ||
43
+ raw.startsWith("configuration_") ||
44
+ raw.startsWith("dependency_") ||
45
+ raw.startsWith("npm_")) {
46
+ return "setup";
47
+ }
48
+ return "other";
49
+ }
50
+ // ─── Read facets directory ──────────────────────────────────────────────────
51
+ export function readFacetsDir(facetsDir) {
52
+ const result = new Map();
53
+ if (!existsSync(facetsDir)) {
54
+ console.warn(`[facets-reader] Directory not found: ${facetsDir}`);
55
+ return result;
56
+ }
57
+ let files;
58
+ try {
59
+ files = readdirSync(facetsDir).filter((f) => f.endsWith(".json"));
60
+ }
61
+ catch (err) {
62
+ console.warn(`[facets-reader] Failed to read directory: ${facetsDir}`, err);
63
+ return result;
64
+ }
65
+ for (const file of files) {
66
+ try {
67
+ const raw = readFileSync(join(facetsDir, file), "utf-8");
68
+ const json = JSON.parse(raw);
69
+ const facets = {
70
+ sessionId: json.session_id,
71
+ underlyingGoal: json.underlying_goal,
72
+ goalCategories: json.goal_categories,
73
+ outcome: json.outcome,
74
+ claudeHelpfulness: json.claude_helpfulness,
75
+ sessionType: json.session_type,
76
+ frictionCounts: json.friction_counts,
77
+ frictionDetail: json.friction_detail,
78
+ primarySuccess: json.primary_success,
79
+ briefSummary: json.brief_summary,
80
+ };
81
+ if (facets.sessionId) {
82
+ result.set(facets.sessionId, facets);
83
+ }
84
+ }
85
+ catch (err) {
86
+ console.warn(`[facets-reader] Failed to parse ${file}`, err);
87
+ }
88
+ }
89
+ return result;
90
+ }
91
+ // ─── Helpfulness mapping ────────────────────────────────────────────────────
92
+ export function helpfulnessToScore(helpfulness) {
93
+ switch (helpfulness) {
94
+ case "essential":
95
+ return 1.0;
96
+ case "very_helpful":
97
+ return 0.8;
98
+ case "moderately_helpful":
99
+ return 0.5;
100
+ case "slightly_helpful":
101
+ return 0.25;
102
+ case "unhelpful":
103
+ return 0.0;
104
+ default:
105
+ return 0.5;
106
+ }
107
+ }
108
+ // ─── Aggregate facets for a topic ───────────────────────────────────────────
109
+ export function aggregateFacetsForTopic(sessionIds, facetsMap) {
110
+ const matchedFacets = [];
111
+ for (const sid of sessionIds) {
112
+ const f = facetsMap.get(sid);
113
+ if (f)
114
+ matchedFacets.push(f);
115
+ }
116
+ if (matchedFacets.length === 0)
117
+ return undefined;
118
+ // aggregatedGoals: unique underlying_goal strings, max 3
119
+ const goalsSet = new Set();
120
+ for (const f of matchedFacets) {
121
+ if (f.underlyingGoal)
122
+ goalsSet.add(f.underlyingGoal);
123
+ }
124
+ const aggregatedGoals = [...goalsSet].slice(0, 3);
125
+ // normalizedCategories: unique normalized goal categories across all sessions
126
+ const categorySet = new Set();
127
+ for (const f of matchedFacets) {
128
+ if (f.goalCategories) {
129
+ for (const key of Object.keys(f.goalCategories)) {
130
+ categorySet.add(normalizeGoalCategory(key));
131
+ }
132
+ }
133
+ }
134
+ const normalizedCategories = [...categorySet];
135
+ // successRate: fraction with fully_achieved or mostly_achieved
136
+ // Sessions without facets get 0.5
137
+ let successSum = 0;
138
+ for (const sid of sessionIds) {
139
+ const f = facetsMap.get(sid);
140
+ if (f) {
141
+ const achieved = f.outcome === "fully_achieved" || f.outcome === "mostly_achieved";
142
+ successSum += achieved ? 1 : 0;
143
+ }
144
+ else {
145
+ successSum += 0.5;
146
+ }
147
+ }
148
+ const successRate = sessionIds.length > 0 ? successSum / sessionIds.length : 0;
149
+ // helpfulnessScore: average helpfulnessToScore
150
+ // Sessions without facets get 0.5
151
+ let helpSum = 0;
152
+ for (const sid of sessionIds) {
153
+ const f = facetsMap.get(sid);
154
+ if (f) {
155
+ helpSum += helpfulnessToScore(f.claudeHelpfulness);
156
+ }
157
+ else {
158
+ helpSum += 0.5;
159
+ }
160
+ }
161
+ const helpfulnessScore = sessionIds.length > 0 ? helpSum / sessionIds.length : 0;
162
+ // commonFrictions: merge friction_counts, sort by count desc, top 5
163
+ const frictionMerged = new Map();
164
+ for (const f of matchedFacets) {
165
+ if (f.frictionCounts) {
166
+ for (const [key, count] of Object.entries(f.frictionCounts)) {
167
+ frictionMerged.set(key, (frictionMerged.get(key) || 0) + count);
168
+ }
169
+ }
170
+ }
171
+ const commonFrictions = [...frictionMerged.entries()]
172
+ .sort((a, b) => b[1] - a[1])
173
+ .slice(0, 5)
174
+ .map(([key]) => key);
175
+ // frictionDetails: non-empty friction_detail strings, max 3
176
+ const frictionDetails = matchedFacets
177
+ .map((f) => f.frictionDetail)
178
+ .filter((d) => d && d.length > 0)
179
+ .slice(0, 3);
180
+ return {
181
+ aggregatedGoals,
182
+ normalizedCategories,
183
+ successRate,
184
+ helpfulnessScore,
185
+ commonFrictions,
186
+ frictionDetails,
187
+ };
188
+ }
@@ -8,7 +8,8 @@ import { buildTfidf } from "./tfidf.js";
8
8
  import { buildToolIdf, buildStructuralVectors } from "./feature-extraction.js";
9
9
  import { buildCombinedMatrix, truncatedSvd, interpretLatentDimensions } from "./svd.js";
10
10
  import { cosineDistance } from "./similarity.js";
11
- import { agglomerativeClusteringFromDistMatrix, splitOversizedClusters, } from "./clustering.js";
11
+ import { agglomerativeClusteringFromDistMatrix, splitOversizedClusters, mergeNarrowClusters, } from "./clustering.js";
12
+ import { readFacetsDir } from "./facets-reader.js";
12
13
  import { buildTopicNodes } from "./topic-nodes.js";
13
14
  import { buildTopicEdges } from "./edges.js";
14
15
  import { louvainDetection, brandesBetweenness } from "./community.js";
@@ -20,17 +21,23 @@ export { buildTfidf } from "./tfidf.js";
20
21
  export { buildToolIdf, buildStructuralVectors } from "./feature-extraction.js";
21
22
  export { buildCombinedMatrix, truncatedSvd, interpretLatentDimensions } from "./svd.js";
22
23
  export { cosineSimilarity, cosineDistance } from "./similarity.js";
23
- export { agglomerativeClusteringFromDistMatrix, findElbowThreshold, clusterWithThresholdFromDistMatrix, splitOversizedClusters, } from "./clustering.js";
24
+ export { agglomerativeClusteringFromDistMatrix, findElbowThreshold, clusterWithThresholdFromDistMatrix, splitOversizedClusters, mergeNarrowClusters, } from "./clustering.js";
24
25
  export { extractDominantAction, selectRepresentativePrompts, generateSuggestedPrompt, computeToolSignature, classifyDominantRole, buildTopicNodes, } from "./topic-nodes.js";
25
26
  export { buildTopicEdges, classifyEdge, findSharedKeywords, findCommonPathPrefix, } from "./edges.js";
26
27
  export { louvainDetection, brandesBetweenness } from "./community.js";
27
28
  export { computeReusabilityScores } from "./reusability.js";
28
29
  export { abstractToolCall, extractEnrichedSequences } from "./tool-pattern.js";
29
30
  export { generateSkillMarkdown, generateHookJson, generateSkillCandidates } from "./skill-generator.js";
31
+ export { readFacetsDir, normalizeGoalCategory, helpfulnessToScore, aggregateFacetsForTopic } from "./facets-reader.js";
30
32
  // ─── Main Entry Point ───────────────────────────────────────────────────────
31
33
  export function buildSemanticKnowledgeGraph(sessions, options = {}) {
32
- const { enableLouvain = true, enableBrandes = true } = options;
34
+ const { enableLouvain = true, enableBrandes = true, facetsDir } = options;
33
35
  console.log(` [Knowledge Graph] Processing ${sessions.length} sessions...`);
36
+ // Read facets data if directory is provided
37
+ const facetsMap = facetsDir ? readFacetsDir(facetsDir) : new Map();
38
+ if (facetsDir) {
39
+ console.log(` [Knowledge Graph] Facets: ${facetsMap.size} sessions with /insights data`);
40
+ }
34
41
  // Edge case: too few sessions
35
42
  if (sessions.length === 0) {
36
43
  return {
@@ -89,8 +96,8 @@ export function buildSemanticKnowledgeGraph(sessions, options = {}) {
89
96
  if (activeSessions.length < 2) {
90
97
  // Single session: create one topic
91
98
  const emptyTfidf = { vocabulary: [], vocabIndex: new Map(), vectors: new Map() };
92
- const singleTopic = buildTopicNodes([sessionIds.map((_, i) => i)], activeSessions, emptyTfidf, toolIdf);
93
- computeReusabilityScores(singleTopic);
99
+ const singleTopic = buildTopicNodes([sessionIds.map((_, i) => i)], activeSessions, emptyTfidf, toolIdf, facetsMap);
100
+ computeReusabilityScores(singleTopic, new Date(), facetsMap);
94
101
  const skillCandidates = generateSkillCandidates(singleTopic, enrichedSequences);
95
102
  return {
96
103
  nodes: singleTopic,
@@ -154,12 +161,20 @@ export function buildSemanticKnowledgeGraph(sessions, options = {}) {
154
161
  clusterMembers = agglomerativeClusteringFromDistMatrix(sessionIds, svdDist);
155
162
  // Split oversized clusters
156
163
  clusterMembers = splitOversizedClusters(clusterMembers, activeSessions.length, svdDist);
164
+ // Merge narrow clusters using facets goal categories
165
+ if (facetsMap.size > 0) {
166
+ const beforeCount = clusterMembers.length;
167
+ clusterMembers = mergeNarrowClusters(clusterMembers, sessionIds, facetsMap, svdDist);
168
+ if (clusterMembers.length < beforeCount) {
169
+ console.log(` [Knowledge Graph] Merged narrow clusters: ${beforeCount} → ${clusterMembers.length} topics`);
170
+ }
171
+ }
157
172
  }
158
173
  console.log(` [Knowledge Graph] Clustering: ${clusterMembers.length} topics from ${activeSessions.length} sessions`);
159
174
  // Step 5: Build topic nodes
160
- const topics = buildTopicNodes(clusterMembers, activeSessions, tfidf, toolIdf);
175
+ const topics = buildTopicNodes(clusterMembers, activeSessions, tfidf, toolIdf, facetsMap);
161
176
  // Step 5b: Compute reusability scores
162
- computeReusabilityScores(topics);
177
+ computeReusabilityScores(topics, new Date(), facetsMap);
163
178
  console.log(` [Knowledge Graph] Reusability scores computed for ${topics.length} topics`);
164
179
  // Step 6: Build topic edges (using SVD vectors for semantic similarity)
165
180
  const edges = buildTopicEdges(topics, activeSessions, tfidf, svd);
@@ -2,7 +2,8 @@
2
2
  * Reusability score computation for topic nodes.
3
3
  * Quantifies how valuable a topic pattern is for automation as skill/hook.
4
4
  */
5
- export function computeReusabilityScores(topics, now = new Date()) {
5
+ import { helpfulnessToScore } from "./facets-reader.js";
6
+ export function computeReusabilityScores(topics, now = new Date(), facetsMap) {
6
7
  if (topics.length === 0)
7
8
  return;
8
9
  const maxSessionCount = Math.max(...topics.map((t) => t.sessionCount));
@@ -17,6 +18,7 @@ export function computeReusabilityScores(topics, now = new Date()) {
17
18
  return Math.max(0, diff / (1000 * 60 * 60 * 24));
18
19
  });
19
20
  const maxDays = Math.max(...daysSinceLastSeen.filter((d) => isFinite(d)), 1);
21
+ const useFacets = facetsMap != null && facetsMap.size > 0;
20
22
  for (let i = 0; i < topics.length; i++) {
21
23
  const topic = topics[i];
22
24
  const frequency = maxSessionCount > 0
@@ -35,17 +37,51 @@ export function computeReusabilityScores(topics, now = new Date()) {
35
37
  const recency = isFinite(days) && maxDays > 0
36
38
  ? 1 - days / maxDays
37
39
  : 0;
38
- const overall = 0.35 * frequency +
39
- 0.25 * timeCost +
40
- 0.25 * crossProjectScore +
41
- 0.15 * recency;
40
+ let overall;
42
41
  const score = {
43
- overall: Math.round(overall * 1000) / 1000,
42
+ overall: 0,
44
43
  frequency: Math.round(frequency * 1000) / 1000,
45
44
  timeCost: Math.round(timeCost * 1000) / 1000,
46
45
  crossProjectScore: Math.round(crossProjectScore * 1000) / 1000,
47
46
  recency: Math.round(recency * 1000) / 1000,
48
47
  };
48
+ if (useFacets) {
49
+ // Compute successRate and helpfulness from facets
50
+ let successSum = 0;
51
+ let helpfulnessSum = 0;
52
+ for (const sessionId of topic.sessionIds) {
53
+ const facets = facetsMap.get(sessionId);
54
+ if (facets) {
55
+ const outcome = facets.outcome;
56
+ successSum += (outcome === "fully_achieved" || outcome === "mostly_achieved") ? 1.0 : 0.0;
57
+ helpfulnessSum += helpfulnessToScore(facets.claudeHelpfulness);
58
+ }
59
+ else {
60
+ successSum += 0.5;
61
+ helpfulnessSum += 0.5;
62
+ }
63
+ }
64
+ const sessionCount = topic.sessionIds.length || 1;
65
+ const successRate = successSum / sessionCount;
66
+ const helpfulness = helpfulnessSum / sessionCount;
67
+ overall =
68
+ 0.30 * frequency +
69
+ 0.20 * timeCost +
70
+ 0.20 * crossProjectScore +
71
+ 0.10 * recency +
72
+ 0.10 * successRate +
73
+ 0.10 * helpfulness;
74
+ score.successRate = Math.round(successRate * 1000) / 1000;
75
+ score.helpfulness = Math.round(helpfulness * 1000) / 1000;
76
+ }
77
+ else {
78
+ overall =
79
+ 0.35 * frequency +
80
+ 0.25 * timeCost +
81
+ 0.25 * crossProjectScore +
82
+ 0.15 * recency;
83
+ }
84
+ score.overall = Math.round(overall * 1000) / 1000;
49
85
  topic.reusabilityScore = score;
50
86
  }
51
87
  }