@chigichan24/crune 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +56 -8
- package/dist-cli/__tests__/facets-reader.test.js +180 -0
- package/dist-cli/__tests__/merge-narrow-clusters.test.js +171 -0
- package/dist-cli/__tests__/reusability-scoring.test.js +175 -0
- package/dist-cli/analyze-sessions.js +57 -13
- package/dist-cli/cli.js +8 -6
- package/dist-cli/knowledge-graph/clustering.js +108 -0
- package/dist-cli/knowledge-graph/facets-reader.js +188 -0
- package/dist-cli/knowledge-graph/index.js +22 -7
- package/dist-cli/knowledge-graph/reusability.js +42 -6
- package/dist-cli/knowledge-graph/topic-nodes.js +27 -4
- package/dist-cli/knowledge-graph-builder.js +4 -2
- package/dist-cli/session-parser.js +19 -0
- package/dist-cli/skill-server.js +2 -2
- package/dist-cli/skill-synthesizer.js +39 -4
- package/package.json +4 -1
|
@@ -10,10 +10,10 @@
|
|
|
10
10
|
import * as fs from "node:fs";
|
|
11
11
|
import * as path from "node:path";
|
|
12
12
|
import * as os from "node:os";
|
|
13
|
-
import { buildSemanticKnowledgeGraph, } from "./knowledge-graph-builder.js";
|
|
14
|
-
import { buildSynthesisPrompt, synthesizeWithClaude } from "./skill-synthesizer.js";
|
|
13
|
+
import { buildSemanticKnowledgeGraph, readFacetsDir, aggregateFacetsForTopic, } from "./knowledge-graph-builder.js";
|
|
14
|
+
import { buildSynthesisPrompt, synthesizeWithClaude, stripSynthesisPreamble } from "./skill-synthesizer.js";
|
|
15
15
|
import { generateSessionSummary } from "./session-summarizer.js";
|
|
16
|
-
import { discoverSessions, parseJsonlFile, buildTurns, extractMetadata, parseSubagents, loadLinkedPlan, } from "./session-parser.js";
|
|
16
|
+
import { discoverSessions, parseJsonlFile, buildTurns, extractMetadata, parseSubagents, loadLinkedPlan, isNonInteractiveSession, } from "./session-parser.js";
|
|
17
17
|
function parseArgs() {
|
|
18
18
|
const args = process.argv.slice(2);
|
|
19
19
|
let sessionsDir = path.join(os.homedir(), ".claude", "projects");
|
|
@@ -21,6 +21,8 @@ function parseArgs() {
|
|
|
21
21
|
let skipSynthesis = false;
|
|
22
22
|
let synthesisModel;
|
|
23
23
|
let synthesisCount = 5;
|
|
24
|
+
let facetsDir = path.join(os.homedir(), ".claude", "usage-data", "facets");
|
|
25
|
+
let skipFacets = false;
|
|
24
26
|
for (let i = 0; i < args.length; i++) {
|
|
25
27
|
if (args[i] === "--sessions-dir" && args[i + 1]) {
|
|
26
28
|
sessionsDir = path.resolve(args[++i]);
|
|
@@ -31,17 +33,32 @@ function parseArgs() {
|
|
|
31
33
|
else if (args[i] === "--skip-synthesis") {
|
|
32
34
|
skipSynthesis = true;
|
|
33
35
|
}
|
|
36
|
+
else if (args[i] === "--skip-synthesize") {
|
|
37
|
+
skipSynthesis = true;
|
|
38
|
+
}
|
|
34
39
|
else if (args[i] === "--synthesis-model" && args[i + 1]) {
|
|
35
40
|
synthesisModel = args[++i];
|
|
36
41
|
}
|
|
42
|
+
else if (args[i] === "--synthesize-model" && args[i + 1]) {
|
|
43
|
+
synthesisModel = args[++i];
|
|
44
|
+
}
|
|
37
45
|
else if (args[i] === "--synthesis-count" && args[i + 1]) {
|
|
38
46
|
synthesisCount = Math.max(1, parseInt(args[++i], 10) || 5);
|
|
39
47
|
}
|
|
48
|
+
else if (args[i] === "--synthesize-count" && args[i + 1]) {
|
|
49
|
+
synthesisCount = Math.max(1, parseInt(args[++i], 10) || 5);
|
|
50
|
+
}
|
|
51
|
+
else if (args[i] === "--facets-dir" && args[i + 1]) {
|
|
52
|
+
facetsDir = path.resolve(args[++i]);
|
|
53
|
+
}
|
|
54
|
+
else if (args[i] === "--skip-facets") {
|
|
55
|
+
skipFacets = true;
|
|
56
|
+
}
|
|
40
57
|
}
|
|
41
|
-
return { sessionsDir, outputDir, skipSynthesis, synthesisModel, synthesisCount };
|
|
58
|
+
return { sessionsDir, outputDir, skipSynthesis, synthesisModel, synthesisCount, facetsDir, skipFacets };
|
|
42
59
|
}
|
|
43
60
|
// ─── Task 1.5: index.json Generation ────────────────────────────────────────
|
|
44
|
-
function generateIndex(sessions) {
|
|
61
|
+
function generateIndex(sessions, facetsMap) {
|
|
45
62
|
const projectMap = new Map();
|
|
46
63
|
const sessionSummaries = sessions.map((s) => {
|
|
47
64
|
const existing = projectMap.get(s.projectDisplayName) || {
|
|
@@ -69,7 +86,7 @@ function generateIndex(sessions) {
|
|
|
69
86
|
durationMinutes: s.meta.durationMinutes,
|
|
70
87
|
turnCount: s.meta.turnCount,
|
|
71
88
|
toolBreakdown: s.meta.toolBreakdown,
|
|
72
|
-
firstUserPrompt: s.meta.firstUserPrompt,
|
|
89
|
+
firstUserPrompt: facetsMap?.get(s.meta.sessionId)?.briefSummary || s.meta.firstUserPrompt,
|
|
73
90
|
summaryText: summaryInfo.summary,
|
|
74
91
|
keywords: summaryInfo.keywords,
|
|
75
92
|
scope: summaryInfo.scope,
|
|
@@ -250,7 +267,9 @@ async function generateOverview(sessions, synthesisConfig = { skip: false, count
|
|
|
250
267
|
subagentCount: s.meta.subagentCount,
|
|
251
268
|
},
|
|
252
269
|
}));
|
|
253
|
-
const knowledgeGraph = buildSemanticKnowledgeGraph(sessionInputs
|
|
270
|
+
const knowledgeGraph = buildSemanticKnowledgeGraph(sessionInputs, {
|
|
271
|
+
facetsDir: synthesisConfig.facetsDir,
|
|
272
|
+
});
|
|
254
273
|
// Top files
|
|
255
274
|
const topFiles = [...fileEditCounts.entries()]
|
|
256
275
|
.sort((a, b) => b[1] - a[1])
|
|
@@ -337,16 +356,21 @@ async function generateOverview(sessions, synthesisConfig = { skip: false, count
|
|
|
337
356
|
const topicSessionSet = new Set(topic.sessionIds);
|
|
338
357
|
const relatedSequences = knowledgeGraph.enrichedToolSequences.filter((seq) => seq.sessionIds.some((sid) => topicSessionSet.has(sid)));
|
|
339
358
|
console.error(`[crune] [${i + 1}/${total}] ${topic.label}...`);
|
|
359
|
+
// Build facets insights for this topic if facets data is available
|
|
360
|
+
const facetsInsights = synthesisConfig.facetsDir
|
|
361
|
+
? aggregateFacetsForTopic(topic.sessionIds, readFacetsDir(synthesisConfig.facetsDir))
|
|
362
|
+
: undefined;
|
|
340
363
|
const prompt = buildSynthesisPrompt({
|
|
341
364
|
skillCandidate: candidate,
|
|
342
365
|
topicNode: topic,
|
|
343
366
|
enrichedSequences: relatedSequences,
|
|
367
|
+
facetsInsights: facetsInsights,
|
|
344
368
|
});
|
|
345
369
|
const result = await synthesizeWithClaude(prompt, synthOpts);
|
|
346
370
|
if (result.success) {
|
|
347
371
|
const original = knowledgeGraph.skillCandidates.find((sc) => sc.topicId === candidate.topicId);
|
|
348
372
|
if (original) {
|
|
349
|
-
original.synthesizedMarkdown = result.stdout;
|
|
373
|
+
original.synthesizedMarkdown = stripSynthesisPreamble(result.stdout);
|
|
350
374
|
}
|
|
351
375
|
console.error(`[crune] [${i + 1}/${total}] Done.`);
|
|
352
376
|
}
|
|
@@ -386,9 +410,21 @@ function getWeekLabel(date) {
|
|
|
386
410
|
// buildKnowledgeGraphEdges removed — replaced by buildSemanticKnowledgeGraph
|
|
387
411
|
// ─── Main Pipeline ──────────────────────────────────────────────────────────
|
|
388
412
|
async function main() {
|
|
389
|
-
const { sessionsDir, outputDir, skipSynthesis, synthesisModel, synthesisCount } = parseArgs();
|
|
413
|
+
const { sessionsDir, outputDir, skipSynthesis, synthesisModel, synthesisCount, facetsDir, skipFacets } = parseArgs();
|
|
390
414
|
console.error(`[crune] Sessions dir: ${sessionsDir}`);
|
|
391
415
|
console.error(`[crune] Output dir: ${outputDir}`);
|
|
416
|
+
console.error(`[crune] Facets dir: ${skipFacets ? "(skipped)" : facetsDir}`);
|
|
417
|
+
// Step 0: Refresh /insights data if facets are enabled
|
|
418
|
+
if (!skipFacets) {
|
|
419
|
+
console.error(`\n[crune] Refreshing /insights data...`);
|
|
420
|
+
const refreshResult = await synthesizeWithClaude("/insights", { timeoutMs: 300_000 });
|
|
421
|
+
if (refreshResult.success) {
|
|
422
|
+
console.error(`[crune] /insights data refreshed.`);
|
|
423
|
+
}
|
|
424
|
+
else {
|
|
425
|
+
console.error(`[crune] /insights refresh failed (continuing without): ${refreshResult.error ?? "unknown"}`);
|
|
426
|
+
}
|
|
427
|
+
}
|
|
392
428
|
// Step 1: Discover sessions
|
|
393
429
|
console.error(`\n[crune] Discovering sessions...`);
|
|
394
430
|
const sessionFiles = discoverSessions(sessionsDir);
|
|
@@ -397,11 +433,17 @@ async function main() {
|
|
|
397
433
|
console.error("[crune] No sessions found. Exiting.");
|
|
398
434
|
process.exit(1);
|
|
399
435
|
}
|
|
436
|
+
// Step 1.5: Filter out non-interactive sessions (claude -p synthesis, /insights)
|
|
437
|
+
const interactiveSessions = sessionFiles.filter((sf) => !isNonInteractiveSession(sf.filePath));
|
|
438
|
+
const skippedCount = sessionFiles.length - interactiveSessions.length;
|
|
439
|
+
if (skippedCount > 0) {
|
|
440
|
+
console.error(`[crune] Skipped ${skippedCount} non-interactive sessions (claude -p)`);
|
|
441
|
+
}
|
|
400
442
|
// Step 2: Parse each session with metadata and subagents
|
|
401
443
|
const parsedSessions = [];
|
|
402
|
-
for (let i = 0; i <
|
|
403
|
-
const sf =
|
|
404
|
-
console.error(`[crune] Processing session ${i + 1}/${
|
|
444
|
+
for (let i = 0; i < interactiveSessions.length; i++) {
|
|
445
|
+
const sf = interactiveSessions[i];
|
|
446
|
+
console.error(`[crune] Processing session ${i + 1}/${interactiveSessions.length}: ${sf.sessionId}`);
|
|
405
447
|
try {
|
|
406
448
|
// Parse main JSONL
|
|
407
449
|
const lines = await parseJsonlFile(sf.filePath);
|
|
@@ -439,7 +481,8 @@ async function main() {
|
|
|
439
481
|
fs.mkdirSync(outputDir, { recursive: true });
|
|
440
482
|
fs.mkdirSync(path.join(outputDir, "detail"), { recursive: true });
|
|
441
483
|
// index.json
|
|
442
|
-
const
|
|
484
|
+
const indexFacetsMap = skipFacets ? undefined : readFacetsDir(facetsDir);
|
|
485
|
+
const indexData = generateIndex(parsedSessions, indexFacetsMap);
|
|
443
486
|
const indexPath = path.join(outputDir, "index.json");
|
|
444
487
|
fs.writeFileSync(indexPath, JSON.stringify(indexData, null, 2));
|
|
445
488
|
const indexSize = fs.statSync(indexPath).size;
|
|
@@ -458,6 +501,7 @@ async function main() {
|
|
|
458
501
|
skip: skipSynthesis,
|
|
459
502
|
model: synthesisModel,
|
|
460
503
|
count: synthesisCount,
|
|
504
|
+
facetsDir: skipFacets ? undefined : facetsDir,
|
|
461
505
|
});
|
|
462
506
|
const overviewPath = path.join(outputDir, "overview.json");
|
|
463
507
|
fs.writeFileSync(overviewPath, JSON.stringify(overviewData, null, 2));
|
package/dist-cli/cli.js
CHANGED
|
@@ -7,7 +7,7 @@ import * as path from "node:path";
|
|
|
7
7
|
import * as os from "node:os";
|
|
8
8
|
import { discoverSessions, parseJsonlFile, buildTurns, extractMetadata, parseSubagents, } from "./session-parser.js";
|
|
9
9
|
import { buildSemanticKnowledgeGraph, } from "./knowledge-graph-builder.js";
|
|
10
|
-
import { buildSynthesisPrompt, synthesizeWithClaude, } from "./skill-synthesizer.js";
|
|
10
|
+
import { buildSynthesisPrompt, synthesizeWithClaude, stripSynthesisPreamble, } from "./skill-synthesizer.js";
|
|
11
11
|
export function parseCliArgs(argv) {
|
|
12
12
|
const args = argv.slice(2);
|
|
13
13
|
let sessionsDir = path.join(os.homedir(), ".claude", "projects");
|
|
@@ -168,7 +168,7 @@ async function main() {
|
|
|
168
168
|
model: config.model,
|
|
169
169
|
});
|
|
170
170
|
if (result.success) {
|
|
171
|
-
markdown = result.stdout;
|
|
171
|
+
markdown = stripSynthesisPreamble(result.stdout);
|
|
172
172
|
console.error(` Synthesized`);
|
|
173
173
|
}
|
|
174
174
|
else {
|
|
@@ -203,7 +203,9 @@ function extractSkillName(markdown, fallbackLabel) {
|
|
|
203
203
|
.slice(0, 40);
|
|
204
204
|
}
|
|
205
205
|
// ─── Entry point ───────────────────────────────────────────────────
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
206
|
+
if (!process.env.VITEST) {
|
|
207
|
+
main().catch((err) => {
|
|
208
|
+
console.error("Fatal error:", err);
|
|
209
|
+
process.exit(1);
|
|
210
|
+
});
|
|
211
|
+
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Agglomerative clustering with average linkage and automatic elbow detection.
|
|
3
3
|
*/
|
|
4
|
+
import { normalizeGoalCategory } from "./facets-reader.js";
|
|
4
5
|
export function agglomerativeClusteringFromDistMatrix(sessionIds, precomputedDist) {
|
|
5
6
|
const n = sessionIds.length;
|
|
6
7
|
if (n === 0)
|
|
@@ -172,3 +173,110 @@ export function splitOversizedClusters(clusters, totalSessions, precomputedDist,
|
|
|
172
173
|
}
|
|
173
174
|
return result;
|
|
174
175
|
}
|
|
176
|
+
/**
|
|
177
|
+
* Merge narrow (≤ maxNarrowSize sessions) clusters that share normalized goal
|
|
178
|
+
* categories from facets data. This addresses the "too narrow scope" issue
|
|
179
|
+
* where the clustering algorithm creates single-session topics.
|
|
180
|
+
*
|
|
181
|
+
* Algorithm:
|
|
182
|
+
* 1. Identify narrow clusters (≤ maxNarrowSize members)
|
|
183
|
+
* 2. For each pair of narrow clusters, check if they share normalized goal categories
|
|
184
|
+
* 3. Merge if they share ≥1 category AND average inter-cluster distance < distanceThreshold
|
|
185
|
+
* 4. Stop merging if merged size exceeds maxMergedSize
|
|
186
|
+
*/
|
|
187
|
+
export function mergeNarrowClusters(clusters, sessionIds, facetsMap, precomputedDist, maxNarrowSize = 2, distanceThreshold = 0.7, maxMergedSize = 8) {
|
|
188
|
+
// Separate clusters into narrow and large
|
|
189
|
+
const largeClusters = [];
|
|
190
|
+
const narrowClusters = [];
|
|
191
|
+
for (const cluster of clusters) {
|
|
192
|
+
if (cluster.length > maxNarrowSize) {
|
|
193
|
+
largeClusters.push(cluster);
|
|
194
|
+
}
|
|
195
|
+
else {
|
|
196
|
+
narrowClusters.push([...cluster]);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
// If no narrow clusters, return as-is
|
|
200
|
+
if (narrowClusters.length === 0) {
|
|
201
|
+
return clusters;
|
|
202
|
+
}
|
|
203
|
+
// For each narrow cluster, collect normalized goal categories
|
|
204
|
+
const clusterCategories = narrowClusters.map((cluster) => {
|
|
205
|
+
const cats = new Set();
|
|
206
|
+
for (const idx of cluster) {
|
|
207
|
+
const sid = sessionIds[idx];
|
|
208
|
+
const facets = facetsMap.get(sid);
|
|
209
|
+
if (facets && facets.goalCategories) {
|
|
210
|
+
for (const rawCat of Object.keys(facets.goalCategories)) {
|
|
211
|
+
cats.add(normalizeGoalCategory(rawCat));
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
return cats;
|
|
216
|
+
});
|
|
217
|
+
// Helper: compute average inter-cluster distance
|
|
218
|
+
const avgInterClusterDist = (clusterA, clusterB) => {
|
|
219
|
+
let sum = 0;
|
|
220
|
+
let count = 0;
|
|
221
|
+
for (const i of clusterA) {
|
|
222
|
+
for (const j of clusterB) {
|
|
223
|
+
const lo = Math.min(i, j);
|
|
224
|
+
const hi = Math.max(i, j);
|
|
225
|
+
const d = precomputedDist.get(`${lo}:${hi}`) ?? 1.0;
|
|
226
|
+
sum += d;
|
|
227
|
+
count++;
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
return count > 0 ? sum / count : 1.0;
|
|
231
|
+
};
|
|
232
|
+
// Helper: check if two category sets share at least one category
|
|
233
|
+
const sharesCategory = (a, b) => {
|
|
234
|
+
for (const cat of a) {
|
|
235
|
+
if (b.has(cat))
|
|
236
|
+
return true;
|
|
237
|
+
}
|
|
238
|
+
return false;
|
|
239
|
+
};
|
|
240
|
+
// Track which narrow clusters have been consumed
|
|
241
|
+
const consumed = new Set();
|
|
242
|
+
// Greedily merge narrow clusters
|
|
243
|
+
for (let i = 0; i < narrowClusters.length; i++) {
|
|
244
|
+
if (consumed.has(i))
|
|
245
|
+
continue;
|
|
246
|
+
// Skip if this cluster has no facets categories
|
|
247
|
+
if (clusterCategories[i].size === 0)
|
|
248
|
+
continue;
|
|
249
|
+
for (let j = i + 1; j < narrowClusters.length; j++) {
|
|
250
|
+
if (consumed.has(j))
|
|
251
|
+
continue;
|
|
252
|
+
// Skip if target cluster has no facets categories
|
|
253
|
+
if (clusterCategories[j].size === 0)
|
|
254
|
+
continue;
|
|
255
|
+
// Check shared categories
|
|
256
|
+
if (!sharesCategory(clusterCategories[i], clusterCategories[j]))
|
|
257
|
+
continue;
|
|
258
|
+
// Check merged size
|
|
259
|
+
if (narrowClusters[i].length + narrowClusters[j].length > maxMergedSize)
|
|
260
|
+
continue;
|
|
261
|
+
// Check distance
|
|
262
|
+
const dist = avgInterClusterDist(narrowClusters[i], narrowClusters[j]);
|
|
263
|
+
if (dist >= distanceThreshold)
|
|
264
|
+
continue;
|
|
265
|
+
// Merge j into i
|
|
266
|
+
narrowClusters[i].push(...narrowClusters[j]);
|
|
267
|
+
// Merge categories
|
|
268
|
+
for (const cat of clusterCategories[j]) {
|
|
269
|
+
clusterCategories[i].add(cat);
|
|
270
|
+
}
|
|
271
|
+
consumed.add(j);
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
// Collect surviving narrow clusters
|
|
275
|
+
const mergedNarrow = [];
|
|
276
|
+
for (let i = 0; i < narrowClusters.length; i++) {
|
|
277
|
+
if (!consumed.has(i)) {
|
|
278
|
+
mergedNarrow.push(narrowClusters[i]);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
return [...largeClusters, ...mergedNarrow];
|
|
282
|
+
}
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Reads facets JSON files from /insights directory and provides
|
|
3
|
+
* utilities for normalization and aggregation.
|
|
4
|
+
*/
|
|
5
|
+
import { readFileSync, readdirSync, existsSync } from "node:fs";
|
|
6
|
+
import { join } from "node:path";
|
|
7
|
+
// ─── Goal category normalization ────────────────────────────────────────────
|
|
8
|
+
export function normalizeGoalCategory(raw) {
|
|
9
|
+
if (raw.startsWith("feature") ||
|
|
10
|
+
raw.startsWith("ui_") ||
|
|
11
|
+
raw.startsWith("css_")) {
|
|
12
|
+
return "feature";
|
|
13
|
+
}
|
|
14
|
+
if (raw.startsWith("fix_") ||
|
|
15
|
+
raw.startsWith("bug_") ||
|
|
16
|
+
raw.startsWith("debug")) {
|
|
17
|
+
return "bugfix";
|
|
18
|
+
}
|
|
19
|
+
if (raw.startsWith("refactoring") ||
|
|
20
|
+
raw === "code_cleanup" ||
|
|
21
|
+
raw.startsWith("cleanup_")) {
|
|
22
|
+
return "refactoring";
|
|
23
|
+
}
|
|
24
|
+
if (raw.startsWith("documentation") || raw.startsWith("readme_")) {
|
|
25
|
+
return "documentation";
|
|
26
|
+
}
|
|
27
|
+
if (raw.startsWith("code_review")) {
|
|
28
|
+
return "review";
|
|
29
|
+
}
|
|
30
|
+
if (raw.startsWith("test_")) {
|
|
31
|
+
return "testing";
|
|
32
|
+
}
|
|
33
|
+
if (raw.startsWith("ci_")) {
|
|
34
|
+
return "ci";
|
|
35
|
+
}
|
|
36
|
+
if (raw.startsWith("git_") ||
|
|
37
|
+
raw === "create_pr" ||
|
|
38
|
+
raw.startsWith("pr_") ||
|
|
39
|
+
raw.startsWith("resolve_merge")) {
|
|
40
|
+
return "git_ops";
|
|
41
|
+
}
|
|
42
|
+
if (raw.startsWith("setup_") ||
|
|
43
|
+
raw.startsWith("configuration_") ||
|
|
44
|
+
raw.startsWith("dependency_") ||
|
|
45
|
+
raw.startsWith("npm_")) {
|
|
46
|
+
return "setup";
|
|
47
|
+
}
|
|
48
|
+
return "other";
|
|
49
|
+
}
|
|
50
|
+
// ─── Read facets directory ──────────────────────────────────────────────────
|
|
51
|
+
export function readFacetsDir(facetsDir) {
|
|
52
|
+
const result = new Map();
|
|
53
|
+
if (!existsSync(facetsDir)) {
|
|
54
|
+
console.warn(`[facets-reader] Directory not found: ${facetsDir}`);
|
|
55
|
+
return result;
|
|
56
|
+
}
|
|
57
|
+
let files;
|
|
58
|
+
try {
|
|
59
|
+
files = readdirSync(facetsDir).filter((f) => f.endsWith(".json"));
|
|
60
|
+
}
|
|
61
|
+
catch (err) {
|
|
62
|
+
console.warn(`[facets-reader] Failed to read directory: ${facetsDir}`, err);
|
|
63
|
+
return result;
|
|
64
|
+
}
|
|
65
|
+
for (const file of files) {
|
|
66
|
+
try {
|
|
67
|
+
const raw = readFileSync(join(facetsDir, file), "utf-8");
|
|
68
|
+
const json = JSON.parse(raw);
|
|
69
|
+
const facets = {
|
|
70
|
+
sessionId: json.session_id,
|
|
71
|
+
underlyingGoal: json.underlying_goal,
|
|
72
|
+
goalCategories: json.goal_categories,
|
|
73
|
+
outcome: json.outcome,
|
|
74
|
+
claudeHelpfulness: json.claude_helpfulness,
|
|
75
|
+
sessionType: json.session_type,
|
|
76
|
+
frictionCounts: json.friction_counts,
|
|
77
|
+
frictionDetail: json.friction_detail,
|
|
78
|
+
primarySuccess: json.primary_success,
|
|
79
|
+
briefSummary: json.brief_summary,
|
|
80
|
+
};
|
|
81
|
+
if (facets.sessionId) {
|
|
82
|
+
result.set(facets.sessionId, facets);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
catch (err) {
|
|
86
|
+
console.warn(`[facets-reader] Failed to parse ${file}`, err);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
return result;
|
|
90
|
+
}
|
|
91
|
+
// ─── Helpfulness mapping ────────────────────────────────────────────────────
|
|
92
|
+
export function helpfulnessToScore(helpfulness) {
|
|
93
|
+
switch (helpfulness) {
|
|
94
|
+
case "essential":
|
|
95
|
+
return 1.0;
|
|
96
|
+
case "very_helpful":
|
|
97
|
+
return 0.8;
|
|
98
|
+
case "moderately_helpful":
|
|
99
|
+
return 0.5;
|
|
100
|
+
case "slightly_helpful":
|
|
101
|
+
return 0.25;
|
|
102
|
+
case "unhelpful":
|
|
103
|
+
return 0.0;
|
|
104
|
+
default:
|
|
105
|
+
return 0.5;
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
// ─── Aggregate facets for a topic ───────────────────────────────────────────
|
|
109
|
+
export function aggregateFacetsForTopic(sessionIds, facetsMap) {
|
|
110
|
+
const matchedFacets = [];
|
|
111
|
+
for (const sid of sessionIds) {
|
|
112
|
+
const f = facetsMap.get(sid);
|
|
113
|
+
if (f)
|
|
114
|
+
matchedFacets.push(f);
|
|
115
|
+
}
|
|
116
|
+
if (matchedFacets.length === 0)
|
|
117
|
+
return undefined;
|
|
118
|
+
// aggregatedGoals: unique underlying_goal strings, max 3
|
|
119
|
+
const goalsSet = new Set();
|
|
120
|
+
for (const f of matchedFacets) {
|
|
121
|
+
if (f.underlyingGoal)
|
|
122
|
+
goalsSet.add(f.underlyingGoal);
|
|
123
|
+
}
|
|
124
|
+
const aggregatedGoals = [...goalsSet].slice(0, 3);
|
|
125
|
+
// normalizedCategories: unique normalized goal categories across all sessions
|
|
126
|
+
const categorySet = new Set();
|
|
127
|
+
for (const f of matchedFacets) {
|
|
128
|
+
if (f.goalCategories) {
|
|
129
|
+
for (const key of Object.keys(f.goalCategories)) {
|
|
130
|
+
categorySet.add(normalizeGoalCategory(key));
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
const normalizedCategories = [...categorySet];
|
|
135
|
+
// successRate: fraction with fully_achieved or mostly_achieved
|
|
136
|
+
// Sessions without facets get 0.5
|
|
137
|
+
let successSum = 0;
|
|
138
|
+
for (const sid of sessionIds) {
|
|
139
|
+
const f = facetsMap.get(sid);
|
|
140
|
+
if (f) {
|
|
141
|
+
const achieved = f.outcome === "fully_achieved" || f.outcome === "mostly_achieved";
|
|
142
|
+
successSum += achieved ? 1 : 0;
|
|
143
|
+
}
|
|
144
|
+
else {
|
|
145
|
+
successSum += 0.5;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
const successRate = sessionIds.length > 0 ? successSum / sessionIds.length : 0;
|
|
149
|
+
// helpfulnessScore: average helpfulnessToScore
|
|
150
|
+
// Sessions without facets get 0.5
|
|
151
|
+
let helpSum = 0;
|
|
152
|
+
for (const sid of sessionIds) {
|
|
153
|
+
const f = facetsMap.get(sid);
|
|
154
|
+
if (f) {
|
|
155
|
+
helpSum += helpfulnessToScore(f.claudeHelpfulness);
|
|
156
|
+
}
|
|
157
|
+
else {
|
|
158
|
+
helpSum += 0.5;
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
const helpfulnessScore = sessionIds.length > 0 ? helpSum / sessionIds.length : 0;
|
|
162
|
+
// commonFrictions: merge friction_counts, sort by count desc, top 5
|
|
163
|
+
const frictionMerged = new Map();
|
|
164
|
+
for (const f of matchedFacets) {
|
|
165
|
+
if (f.frictionCounts) {
|
|
166
|
+
for (const [key, count] of Object.entries(f.frictionCounts)) {
|
|
167
|
+
frictionMerged.set(key, (frictionMerged.get(key) || 0) + count);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
const commonFrictions = [...frictionMerged.entries()]
|
|
172
|
+
.sort((a, b) => b[1] - a[1])
|
|
173
|
+
.slice(0, 5)
|
|
174
|
+
.map(([key]) => key);
|
|
175
|
+
// frictionDetails: non-empty friction_detail strings, max 3
|
|
176
|
+
const frictionDetails = matchedFacets
|
|
177
|
+
.map((f) => f.frictionDetail)
|
|
178
|
+
.filter((d) => d && d.length > 0)
|
|
179
|
+
.slice(0, 3);
|
|
180
|
+
return {
|
|
181
|
+
aggregatedGoals,
|
|
182
|
+
normalizedCategories,
|
|
183
|
+
successRate,
|
|
184
|
+
helpfulnessScore,
|
|
185
|
+
commonFrictions,
|
|
186
|
+
frictionDetails,
|
|
187
|
+
};
|
|
188
|
+
}
|
|
@@ -8,7 +8,8 @@ import { buildTfidf } from "./tfidf.js";
|
|
|
8
8
|
import { buildToolIdf, buildStructuralVectors } from "./feature-extraction.js";
|
|
9
9
|
import { buildCombinedMatrix, truncatedSvd, interpretLatentDimensions } from "./svd.js";
|
|
10
10
|
import { cosineDistance } from "./similarity.js";
|
|
11
|
-
import { agglomerativeClusteringFromDistMatrix, splitOversizedClusters, } from "./clustering.js";
|
|
11
|
+
import { agglomerativeClusteringFromDistMatrix, splitOversizedClusters, mergeNarrowClusters, } from "./clustering.js";
|
|
12
|
+
import { readFacetsDir } from "./facets-reader.js";
|
|
12
13
|
import { buildTopicNodes } from "./topic-nodes.js";
|
|
13
14
|
import { buildTopicEdges } from "./edges.js";
|
|
14
15
|
import { louvainDetection, brandesBetweenness } from "./community.js";
|
|
@@ -20,17 +21,23 @@ export { buildTfidf } from "./tfidf.js";
|
|
|
20
21
|
export { buildToolIdf, buildStructuralVectors } from "./feature-extraction.js";
|
|
21
22
|
export { buildCombinedMatrix, truncatedSvd, interpretLatentDimensions } from "./svd.js";
|
|
22
23
|
export { cosineSimilarity, cosineDistance } from "./similarity.js";
|
|
23
|
-
export { agglomerativeClusteringFromDistMatrix, findElbowThreshold, clusterWithThresholdFromDistMatrix, splitOversizedClusters, } from "./clustering.js";
|
|
24
|
+
export { agglomerativeClusteringFromDistMatrix, findElbowThreshold, clusterWithThresholdFromDistMatrix, splitOversizedClusters, mergeNarrowClusters, } from "./clustering.js";
|
|
24
25
|
export { extractDominantAction, selectRepresentativePrompts, generateSuggestedPrompt, computeToolSignature, classifyDominantRole, buildTopicNodes, } from "./topic-nodes.js";
|
|
25
26
|
export { buildTopicEdges, classifyEdge, findSharedKeywords, findCommonPathPrefix, } from "./edges.js";
|
|
26
27
|
export { louvainDetection, brandesBetweenness } from "./community.js";
|
|
27
28
|
export { computeReusabilityScores } from "./reusability.js";
|
|
28
29
|
export { abstractToolCall, extractEnrichedSequences } from "./tool-pattern.js";
|
|
29
30
|
export { generateSkillMarkdown, generateHookJson, generateSkillCandidates } from "./skill-generator.js";
|
|
31
|
+
export { readFacetsDir, normalizeGoalCategory, helpfulnessToScore, aggregateFacetsForTopic } from "./facets-reader.js";
|
|
30
32
|
// ─── Main Entry Point ───────────────────────────────────────────────────────
|
|
31
33
|
export function buildSemanticKnowledgeGraph(sessions, options = {}) {
|
|
32
|
-
const { enableLouvain = true, enableBrandes = true } = options;
|
|
34
|
+
const { enableLouvain = true, enableBrandes = true, facetsDir } = options;
|
|
33
35
|
console.log(` [Knowledge Graph] Processing ${sessions.length} sessions...`);
|
|
36
|
+
// Read facets data if directory is provided
|
|
37
|
+
const facetsMap = facetsDir ? readFacetsDir(facetsDir) : new Map();
|
|
38
|
+
if (facetsDir) {
|
|
39
|
+
console.log(` [Knowledge Graph] Facets: ${facetsMap.size} sessions with /insights data`);
|
|
40
|
+
}
|
|
34
41
|
// Edge case: too few sessions
|
|
35
42
|
if (sessions.length === 0) {
|
|
36
43
|
return {
|
|
@@ -89,8 +96,8 @@ export function buildSemanticKnowledgeGraph(sessions, options = {}) {
|
|
|
89
96
|
if (activeSessions.length < 2) {
|
|
90
97
|
// Single session: create one topic
|
|
91
98
|
const emptyTfidf = { vocabulary: [], vocabIndex: new Map(), vectors: new Map() };
|
|
92
|
-
const singleTopic = buildTopicNodes([sessionIds.map((_, i) => i)], activeSessions, emptyTfidf, toolIdf);
|
|
93
|
-
computeReusabilityScores(singleTopic);
|
|
99
|
+
const singleTopic = buildTopicNodes([sessionIds.map((_, i) => i)], activeSessions, emptyTfidf, toolIdf, facetsMap);
|
|
100
|
+
computeReusabilityScores(singleTopic, new Date(), facetsMap);
|
|
94
101
|
const skillCandidates = generateSkillCandidates(singleTopic, enrichedSequences);
|
|
95
102
|
return {
|
|
96
103
|
nodes: singleTopic,
|
|
@@ -154,12 +161,20 @@ export function buildSemanticKnowledgeGraph(sessions, options = {}) {
|
|
|
154
161
|
clusterMembers = agglomerativeClusteringFromDistMatrix(sessionIds, svdDist);
|
|
155
162
|
// Split oversized clusters
|
|
156
163
|
clusterMembers = splitOversizedClusters(clusterMembers, activeSessions.length, svdDist);
|
|
164
|
+
// Merge narrow clusters using facets goal categories
|
|
165
|
+
if (facetsMap.size > 0) {
|
|
166
|
+
const beforeCount = clusterMembers.length;
|
|
167
|
+
clusterMembers = mergeNarrowClusters(clusterMembers, sessionIds, facetsMap, svdDist);
|
|
168
|
+
if (clusterMembers.length < beforeCount) {
|
|
169
|
+
console.log(` [Knowledge Graph] Merged narrow clusters: ${beforeCount} → ${clusterMembers.length} topics`);
|
|
170
|
+
}
|
|
171
|
+
}
|
|
157
172
|
}
|
|
158
173
|
console.log(` [Knowledge Graph] Clustering: ${clusterMembers.length} topics from ${activeSessions.length} sessions`);
|
|
159
174
|
// Step 5: Build topic nodes
|
|
160
|
-
const topics = buildTopicNodes(clusterMembers, activeSessions, tfidf, toolIdf);
|
|
175
|
+
const topics = buildTopicNodes(clusterMembers, activeSessions, tfidf, toolIdf, facetsMap);
|
|
161
176
|
// Step 5b: Compute reusability scores
|
|
162
|
-
computeReusabilityScores(topics);
|
|
177
|
+
computeReusabilityScores(topics, new Date(), facetsMap);
|
|
163
178
|
console.log(` [Knowledge Graph] Reusability scores computed for ${topics.length} topics`);
|
|
164
179
|
// Step 6: Build topic edges (using SVD vectors for semantic similarity)
|
|
165
180
|
const edges = buildTopicEdges(topics, activeSessions, tfidf, svd);
|
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
* Reusability score computation for topic nodes.
|
|
3
3
|
* Quantifies how valuable a topic pattern is for automation as skill/hook.
|
|
4
4
|
*/
|
|
5
|
-
|
|
5
|
+
import { helpfulnessToScore } from "./facets-reader.js";
|
|
6
|
+
export function computeReusabilityScores(topics, now = new Date(), facetsMap) {
|
|
6
7
|
if (topics.length === 0)
|
|
7
8
|
return;
|
|
8
9
|
const maxSessionCount = Math.max(...topics.map((t) => t.sessionCount));
|
|
@@ -17,6 +18,7 @@ export function computeReusabilityScores(topics, now = new Date()) {
|
|
|
17
18
|
return Math.max(0, diff / (1000 * 60 * 60 * 24));
|
|
18
19
|
});
|
|
19
20
|
const maxDays = Math.max(...daysSinceLastSeen.filter((d) => isFinite(d)), 1);
|
|
21
|
+
const useFacets = facetsMap != null && facetsMap.size > 0;
|
|
20
22
|
for (let i = 0; i < topics.length; i++) {
|
|
21
23
|
const topic = topics[i];
|
|
22
24
|
const frequency = maxSessionCount > 0
|
|
@@ -35,17 +37,51 @@ export function computeReusabilityScores(topics, now = new Date()) {
|
|
|
35
37
|
const recency = isFinite(days) && maxDays > 0
|
|
36
38
|
? 1 - days / maxDays
|
|
37
39
|
: 0;
|
|
38
|
-
|
|
39
|
-
0.25 * timeCost +
|
|
40
|
-
0.25 * crossProjectScore +
|
|
41
|
-
0.15 * recency;
|
|
40
|
+
let overall;
|
|
42
41
|
const score = {
|
|
43
|
-
overall:
|
|
42
|
+
overall: 0,
|
|
44
43
|
frequency: Math.round(frequency * 1000) / 1000,
|
|
45
44
|
timeCost: Math.round(timeCost * 1000) / 1000,
|
|
46
45
|
crossProjectScore: Math.round(crossProjectScore * 1000) / 1000,
|
|
47
46
|
recency: Math.round(recency * 1000) / 1000,
|
|
48
47
|
};
|
|
48
|
+
if (useFacets) {
|
|
49
|
+
// Compute successRate and helpfulness from facets
|
|
50
|
+
let successSum = 0;
|
|
51
|
+
let helpfulnessSum = 0;
|
|
52
|
+
for (const sessionId of topic.sessionIds) {
|
|
53
|
+
const facets = facetsMap.get(sessionId);
|
|
54
|
+
if (facets) {
|
|
55
|
+
const outcome = facets.outcome;
|
|
56
|
+
successSum += (outcome === "fully_achieved" || outcome === "mostly_achieved") ? 1.0 : 0.0;
|
|
57
|
+
helpfulnessSum += helpfulnessToScore(facets.claudeHelpfulness);
|
|
58
|
+
}
|
|
59
|
+
else {
|
|
60
|
+
successSum += 0.5;
|
|
61
|
+
helpfulnessSum += 0.5;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
const sessionCount = topic.sessionIds.length || 1;
|
|
65
|
+
const successRate = successSum / sessionCount;
|
|
66
|
+
const helpfulness = helpfulnessSum / sessionCount;
|
|
67
|
+
overall =
|
|
68
|
+
0.30 * frequency +
|
|
69
|
+
0.20 * timeCost +
|
|
70
|
+
0.20 * crossProjectScore +
|
|
71
|
+
0.10 * recency +
|
|
72
|
+
0.10 * successRate +
|
|
73
|
+
0.10 * helpfulness;
|
|
74
|
+
score.successRate = Math.round(successRate * 1000) / 1000;
|
|
75
|
+
score.helpfulness = Math.round(helpfulness * 1000) / 1000;
|
|
76
|
+
}
|
|
77
|
+
else {
|
|
78
|
+
overall =
|
|
79
|
+
0.35 * frequency +
|
|
80
|
+
0.25 * timeCost +
|
|
81
|
+
0.25 * crossProjectScore +
|
|
82
|
+
0.15 * recency;
|
|
83
|
+
}
|
|
84
|
+
score.overall = Math.round(overall * 1000) / 1000;
|
|
49
85
|
topic.reusabilityScore = score;
|
|
50
86
|
}
|
|
51
87
|
}
|