@chigichan24/crune 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +155 -0
  3. package/bin/crune.js +2 -0
  4. package/dist-cli/__tests__/cli.test.js +63 -0
  5. package/dist-cli/__tests__/clustering.test.js +200 -0
  6. package/dist-cli/__tests__/community.test.js +115 -0
  7. package/dist-cli/__tests__/edges.test.js +130 -0
  8. package/dist-cli/__tests__/feature-extraction.test.js +66 -0
  9. package/dist-cli/__tests__/fixtures.js +192 -0
  10. package/dist-cli/__tests__/orchestrator.test.js +253 -0
  11. package/dist-cli/__tests__/session-parser.test.js +335 -0
  12. package/dist-cli/__tests__/session-summarizer.test.js +117 -0
  13. package/dist-cli/__tests__/skill-server.test.js +191 -0
  14. package/dist-cli/__tests__/svd.test.js +112 -0
  15. package/dist-cli/__tests__/tfidf.test.js +88 -0
  16. package/dist-cli/__tests__/tokenizer.test.js +125 -0
  17. package/dist-cli/__tests__/topic-nodes.test.js +184 -0
  18. package/dist-cli/analyze-sessions.js +476 -0
  19. package/dist-cli/cli.js +215 -0
  20. package/dist-cli/knowledge-graph/clustering.js +174 -0
  21. package/dist-cli/knowledge-graph/community.js +220 -0
  22. package/dist-cli/knowledge-graph/constants.js +58 -0
  23. package/dist-cli/knowledge-graph/edges.js +193 -0
  24. package/dist-cli/knowledge-graph/feature-extraction.js +124 -0
  25. package/dist-cli/knowledge-graph/index.js +235 -0
  26. package/dist-cli/knowledge-graph/reusability.js +51 -0
  27. package/dist-cli/knowledge-graph/similarity.js +13 -0
  28. package/dist-cli/knowledge-graph/skill-generator.js +203 -0
  29. package/dist-cli/knowledge-graph/svd.js +195 -0
  30. package/dist-cli/knowledge-graph/tfidf.js +54 -0
  31. package/dist-cli/knowledge-graph/tokenizer.js +66 -0
  32. package/dist-cli/knowledge-graph/tool-pattern.js +173 -0
  33. package/dist-cli/knowledge-graph/topic-nodes.js +199 -0
  34. package/dist-cli/knowledge-graph/types.js +4 -0
  35. package/dist-cli/knowledge-graph-builder.js +27 -0
  36. package/dist-cli/session-parser.js +360 -0
  37. package/dist-cli/session-summarizer.js +133 -0
  38. package/dist-cli/skill-server.js +62 -0
  39. package/dist-cli/skill-synthesizer.js +189 -0
  40. package/package.json +47 -0
@@ -0,0 +1,215 @@
1
+ /**
2
+ * CLI entry point for `npx @chigichan24/crune`
3
+ * Generates skill definitions from Claude Code session logs.
4
+ */
5
+ import * as fs from "node:fs";
6
+ import * as path from "node:path";
7
+ import * as os from "node:os";
8
+ import { discoverSessions, parseJsonlFile, buildTurns, extractMetadata, parseSubagents, } from "./session-parser.js";
9
+ import { buildSemanticKnowledgeGraph, } from "./knowledge-graph-builder.js";
10
+ import { buildSynthesisPrompt, synthesizeWithClaude, } from "./skill-synthesizer.js";
11
+ export function parseCliArgs(argv) {
12
+ const args = argv.slice(2);
13
+ let sessionsDir = path.join(os.homedir(), ".claude", "projects");
14
+ let outputDir = path.resolve("skills");
15
+ let count = 5;
16
+ let model;
17
+ let skipSynthesis = false;
18
+ let dryRun = false;
19
+ for (let i = 0; i < args.length; i++) {
20
+ if (args[i] === "--sessions-dir" && args[i + 1]) {
21
+ sessionsDir = path.resolve(args[++i]);
22
+ }
23
+ else if (args[i] === "--output-dir" && args[i + 1]) {
24
+ outputDir = path.resolve(args[++i]);
25
+ }
26
+ else if (args[i] === "--count" && args[i + 1]) {
27
+ const parsed = parseInt(args[++i], 10);
28
+ count = Math.max(1, Number.isNaN(parsed) ? 5 : parsed);
29
+ }
30
+ else if (args[i] === "--model" && args[i + 1]) {
31
+ model = args[++i];
32
+ }
33
+ else if (args[i] === "--skip-synthesis") {
34
+ skipSynthesis = true;
35
+ }
36
+ else if (args[i] === "--dry-run") {
37
+ dryRun = true;
38
+ }
39
+ else if (args[i] === "--help" || args[i] === "-h") {
40
+ printUsage();
41
+ process.exit(0);
42
+ }
43
+ }
44
+ return { sessionsDir, outputDir, count, model, skipSynthesis, dryRun };
45
+ }
46
+ function printUsage() {
47
+ console.error(`Usage: crune [options]
48
+
49
+ Generate reusable skill definitions from Claude Code session logs.
50
+
51
+ Options:
52
+ --sessions-dir <path> Session logs directory (default: ~/.claude/projects)
53
+ --output-dir <path> Output directory for skill files (default: ./skills)
54
+ --count <n> Number of skills to generate (default: 5)
55
+ --model <model> Claude model for synthesis (e.g., haiku, sonnet)
56
+ --skip-synthesis Skip LLM synthesis, output heuristic skills only
57
+ --dry-run Show candidates without writing files
58
+ -h, --help Show this help message`);
59
+ }
60
+ // ─── Main pipeline ─────────────────────────────────────────────────
61
+ async function main() {
62
+ const config = parseCliArgs(process.argv);
63
+ console.error("Discovering sessions...");
64
+ const sessionFiles = discoverSessions(config.sessionsDir);
65
+ if (sessionFiles.length === 0) {
66
+ console.error(`No sessions found in ${config.sessionsDir}`);
67
+ process.exit(1);
68
+ }
69
+ console.error(` Found ${sessionFiles.length} sessions`);
70
+ // Parse all sessions
71
+ console.error("Parsing sessions...");
72
+ const parsedSessions = [];
73
+ for (const sf of sessionFiles) {
74
+ const lines = await parseJsonlFile(sf.filePath);
75
+ if (lines.length === 0)
76
+ continue;
77
+ const turns = buildTurns(lines);
78
+ const meta = extractMetadata(sf, lines, turns);
79
+ const subagents = await parseSubagents(sf.subagentFiles);
80
+ parsedSessions.push({
81
+ meta,
82
+ turns,
83
+ subagents,
84
+ linkedPlan: null,
85
+ projectDir: sf.projectDir,
86
+ projectDisplayName: sf.projectDisplayName,
87
+ });
88
+ }
89
+ console.error(` Parsed ${parsedSessions.length} sessions`);
90
+ // Build knowledge graph — reuse same conversion as analyze-sessions.ts
91
+ console.error("Building knowledge graph...");
92
+ const sessionInputs = parsedSessions.map((s) => ({
93
+ sessionId: s.meta.sessionId,
94
+ projectDisplayName: s.projectDisplayName,
95
+ turns: s.turns.map((t) => ({
96
+ userPrompt: t.userPrompt,
97
+ assistantTexts: t.assistantTexts,
98
+ toolCalls: t.toolCalls.map((tc) => ({
99
+ toolName: tc.toolName,
100
+ input: tc.input,
101
+ })),
102
+ })),
103
+ subagents: Object.fromEntries(Object.entries(s.subagents).map(([id, sub]) => [
104
+ id,
105
+ {
106
+ agentId: sub.agentId,
107
+ agentType: sub.agentType,
108
+ turns: sub.turns.map((t) => ({
109
+ userPrompt: t.userPrompt,
110
+ assistantTexts: t.assistantTexts,
111
+ toolCalls: t.toolCalls.map((tc) => ({
112
+ toolName: tc.toolName,
113
+ input: tc.input,
114
+ })),
115
+ })),
116
+ },
117
+ ])),
118
+ meta: {
119
+ sessionId: s.meta.sessionId,
120
+ createdAt: s.meta.createdAt,
121
+ lastActiveAt: s.meta.lastActiveAt,
122
+ durationMinutes: s.meta.durationMinutes,
123
+ filesEdited: s.meta.filesEdited,
124
+ gitBranch: s.meta.gitBranch,
125
+ toolBreakdown: s.meta.toolBreakdown,
126
+ subagentCount: s.meta.subagentCount,
127
+ },
128
+ }));
129
+ const knowledgeGraph = buildSemanticKnowledgeGraph(sessionInputs);
130
+ console.error(` ${knowledgeGraph.nodes.length} topics, ${knowledgeGraph.skillCandidates.length} skill candidates`);
131
+ // Select top candidates
132
+ const topCandidates = [...knowledgeGraph.skillCandidates]
133
+ .sort((a, b) => b.reusabilityScore - a.reusabilityScore)
134
+ .slice(0, config.count);
135
+ if (topCandidates.length === 0) {
136
+ console.error("No skill candidates found.");
137
+ process.exit(0);
138
+ }
139
+ // Dry run — just list candidates
140
+ if (config.dryRun) {
141
+ console.error("\nSkill candidates (dry run):\n");
142
+ for (const c of topCandidates) {
143
+ const topic = knowledgeGraph.nodes.find((n) => n.id === c.topicId);
144
+ console.error(` [${c.reusabilityScore.toFixed(2)}] ${topic?.label ?? c.topicId}`);
145
+ console.error(` Keywords: ${topic?.keywords.join(", ") ?? "—"}`);
146
+ console.error(` Sessions: ${topic?.sessionCount ?? "?"}`);
147
+ console.error("");
148
+ }
149
+ process.exit(0);
150
+ }
151
+ // Synthesize skills
152
+ console.error(`\nGenerating ${topCandidates.length} skills...`);
153
+ for (const candidate of topCandidates) {
154
+ const topic = knowledgeGraph.nodes.find((n) => n.id === candidate.topicId);
155
+ const label = topic?.label ?? candidate.topicId;
156
+ console.error(` -> ${label}`);
157
+ let markdown = candidate.skillMarkdown;
158
+ if (!config.skipSynthesis && topic) {
159
+ // Find enriched sequences related to this topic's sessions
160
+ const topicSessionSet = new Set(topic.sessionIds);
161
+ const relatedSequences = knowledgeGraph.enrichedToolSequences.filter((seq) => seq.sessionIds.some((sid) => topicSessionSet.has(sid)));
162
+ const prompt = buildSynthesisPrompt({
163
+ skillCandidate: candidate,
164
+ topicNode: topic,
165
+ enrichedSequences: relatedSequences,
166
+ });
167
+ const result = await synthesizeWithClaude(prompt, {
168
+ model: config.model,
169
+ });
170
+ if (result.success) {
171
+ markdown = result.stdout;
172
+ console.error(` Synthesized`);
173
+ }
174
+ else {
175
+ console.error(` Synthesis failed: ${result.error ?? "unknown error"}, using heuristic`);
176
+ }
177
+ }
178
+ else if (config.skipSynthesis) {
179
+ console.error(` Heuristic only`);
180
+ }
181
+ // Write skill file as <output-dir>/<skill-name>/SKILL.md
182
+ const skillName = extractSkillName(markdown, label);
183
+ const skillDir = path.join(config.outputDir, skillName);
184
+ const outputPath = path.join(skillDir, "SKILL.md");
185
+ fs.mkdirSync(skillDir, { recursive: true });
186
+ fs.writeFileSync(outputPath, markdown, "utf-8");
187
+ console.error(` ${outputPath}`);
188
+ }
189
+ console.error(`\nDone! ${topCandidates.length} skills written to ${config.outputDir}`);
190
+ }
191
+ function extractSkillName(markdown, fallbackLabel) {
192
+ // Try to extract name from YAML frontmatter
193
+ const frontmatterMatch = markdown.match(/^---\s*\n[\s\S]*?name:\s*(.+?)\s*\n[\s\S]*?---/);
194
+ if (frontmatterMatch?.[1]) {
195
+ return frontmatterMatch[1].replace(/[^a-zA-Z0-9-_]/g, "-").toLowerCase();
196
+ }
197
+ // Fallback: kebab-case from label
198
+ return fallbackLabel
199
+ .replace(/[^a-zA-Z0-9\s-]/g, "")
200
+ .trim()
201
+ .replace(/\s+/g, "-")
202
+ .toLowerCase()
203
+ .slice(0, 40);
204
+ }
205
+ // ─── Entry point ───────────────────────────────────────────────────
206
+ const entryScript = process.argv[1] ?? "";
207
+ const isDirectRun = entryScript.endsWith("/cli.ts") ||
208
+ entryScript.endsWith("/cli.js") ||
209
+ entryScript.endsWith("/bin/crune.js");
210
+ if (isDirectRun) {
211
+ main().catch((err) => {
212
+ console.error("Fatal error:", err);
213
+ process.exit(1);
214
+ });
215
+ }
@@ -0,0 +1,174 @@
1
+ /**
2
+ * Agglomerative clustering with average linkage and automatic elbow detection.
3
+ */
4
+ export function agglomerativeClusteringFromDistMatrix(sessionIds, precomputedDist) {
5
+ const n = sessionIds.length;
6
+ if (n === 0)
7
+ return [];
8
+ if (n === 1)
9
+ return [[0]];
10
+ // Initialize: each session is its own cluster
11
+ const clusterMembers = [];
12
+ for (let i = 0; i < n; i++)
13
+ clusterMembers.push([i]);
14
+ // Track active clusters
15
+ const active = new Set();
16
+ for (let i = 0; i < n; i++)
17
+ active.add(i);
18
+ // Copy precomputed distances (will be updated during merges)
19
+ const distMatrix = new Map(precomputedDist);
20
+ const distKey = (i, j) => i < j ? `${i}:${j}` : `${j}:${i}`;
21
+ // Merge history for elbow detection
22
+ const mergeDistances = [];
23
+ // Iteratively merge closest pair
24
+ while (active.size > 1) {
25
+ let minDist = Infinity;
26
+ let mergeI = -1;
27
+ let mergeJ = -1;
28
+ for (const i of active) {
29
+ for (const j of active) {
30
+ if (i >= j)
31
+ continue;
32
+ const d = distMatrix.get(distKey(i, j)) ?? Infinity;
33
+ if (d < minDist) {
34
+ minDist = d;
35
+ mergeI = i;
36
+ mergeJ = j;
37
+ }
38
+ }
39
+ }
40
+ if (mergeI === -1)
41
+ break;
42
+ mergeDistances.push(minDist);
43
+ // Merge j into i (average linkage: weighted average of distances)
44
+ const sizeI = clusterMembers[mergeI].length;
45
+ const sizeJ = clusterMembers[mergeJ].length;
46
+ const newSize = sizeI + sizeJ;
47
+ clusterMembers[mergeI].push(...clusterMembers[mergeJ]);
48
+ active.delete(mergeJ);
49
+ // Update distances using average linkage formula
50
+ for (const k of active) {
51
+ if (k === mergeI)
52
+ continue;
53
+ const distIK = distMatrix.get(distKey(mergeI, k)) ?? 1.0;
54
+ const distJK = distMatrix.get(distKey(mergeJ, k)) ?? 1.0;
55
+ const newDist = (distIK * sizeI + distJK * sizeJ) / newSize;
56
+ distMatrix.set(distKey(mergeI, k), newDist);
57
+ }
58
+ }
59
+ // Find elbow: cut point where merging starts getting expensive
60
+ const threshold = findElbowThreshold(mergeDistances);
61
+ // Re-run clustering with threshold using precomputed distances
62
+ return clusterWithThresholdFromDistMatrix(n, precomputedDist, threshold);
63
+ }
64
+ export function findElbowThreshold(distances) {
65
+ if (distances.length < 3)
66
+ return 0.7; // fallback
67
+ // Compute second derivative (acceleration)
68
+ let maxAccel = 0;
69
+ let elbowIdx = Math.floor(distances.length * 0.5); // default: cut at midpoint
70
+ for (let i = 1; i < distances.length - 1; i++) {
71
+ const accel = distances[i + 1] - 2 * distances[i] + distances[i - 1];
72
+ if (accel > maxAccel) {
73
+ maxAccel = accel;
74
+ elbowIdx = i;
75
+ }
76
+ }
77
+ const threshold = distances[elbowIdx];
78
+ // Clamp to reasonable range
79
+ return Math.max(0.3, Math.min(0.9, threshold));
80
+ }
81
+ export function clusterWithThresholdFromDistMatrix(n, precomputedDist, threshold) {
82
+ if (n === 0)
83
+ return [];
84
+ if (n === 1)
85
+ return [[0]];
86
+ const clusterMembers = [];
87
+ for (let i = 0; i < n; i++)
88
+ clusterMembers.push([i]);
89
+ const active = new Set();
90
+ for (let i = 0; i < n; i++)
91
+ active.add(i);
92
+ const distKey = (i, j) => i < j ? `${i}:${j}` : `${j}:${i}`;
93
+ const distMatrix = new Map(precomputedDist);
94
+ while (active.size > 1) {
95
+ let minDist = Infinity;
96
+ let mergeI = -1;
97
+ let mergeJ = -1;
98
+ for (const i of active) {
99
+ for (const j of active) {
100
+ if (i >= j)
101
+ continue;
102
+ const d = distMatrix.get(distKey(i, j)) ?? Infinity;
103
+ if (d < minDist) {
104
+ minDist = d;
105
+ mergeI = i;
106
+ mergeJ = j;
107
+ }
108
+ }
109
+ }
110
+ if (mergeI === -1 || minDist > threshold)
111
+ break;
112
+ // Average linkage merge
113
+ const sizeI = clusterMembers[mergeI].length;
114
+ const sizeJ = clusterMembers[mergeJ].length;
115
+ const newSize = sizeI + sizeJ;
116
+ clusterMembers[mergeI].push(...clusterMembers[mergeJ]);
117
+ active.delete(mergeJ);
118
+ for (const k of active) {
119
+ if (k === mergeI)
120
+ continue;
121
+ const distIK = distMatrix.get(distKey(mergeI, k)) ?? 1.0;
122
+ const distJK = distMatrix.get(distKey(mergeJ, k)) ?? 1.0;
123
+ const newDist = (distIK * sizeI + distJK * sizeJ) / newSize;
124
+ distMatrix.set(distKey(mergeI, k), newDist);
125
+ }
126
+ }
127
+ return [...active].map((i) => clusterMembers[i]);
128
+ }
129
+ /**
130
+ * Split oversized clusters by re-clustering their members with a stricter
131
+ * (halved) threshold. This prevents a single catch-all cluster from
132
+ * dominating the graph when the global elbow threshold is too loose.
133
+ *
134
+ * maxClusterRatio: a cluster with > (totalSessions * ratio) members is re-split.
135
+ * Default 0.25 = 25% of all sessions.
136
+ */
137
+ export function splitOversizedClusters(clusters, totalSessions, precomputedDist, maxClusterRatio = 0.25) {
138
+ const maxSize = Math.max(10, Math.floor(totalSessions * maxClusterRatio));
139
+ const result = [];
140
+ for (const members of clusters) {
141
+ if (members.length <= maxSize) {
142
+ result.push(members);
143
+ continue;
144
+ }
145
+ // Extract sub-distance-matrix for this cluster's members
146
+ const n = members.length;
147
+ const subDist = new Map();
148
+ const distKey = (i, j) => i < j ? `${i}:${j}` : `${j}:${i}`;
149
+ const origDistKey = (i, j) => i < j ? `${i}:${j}` : `${j}:${i}`;
150
+ for (let i = 0; i < n; i++) {
151
+ for (let j = i + 1; j < n; j++) {
152
+ const origI = members[i];
153
+ const origJ = members[j];
154
+ const d = precomputedDist.get(origDistKey(origI, origJ)) ?? 1.0;
155
+ subDist.set(distKey(i, j), d);
156
+ }
157
+ }
158
+ // Find sub-elbow threshold from this cluster's internal distances
159
+ const internalDists = [];
160
+ for (const [, d] of subDist) {
161
+ internalDists.push(d);
162
+ }
163
+ internalDists.sort((a, b) => a - b);
164
+ // Use a stricter threshold: median of internal distances
165
+ const medianDist = internalDists[Math.floor(internalDists.length / 2)] ?? 0.5;
166
+ const subThreshold = Math.max(0.15, medianDist * 0.8);
167
+ const subClusters = clusterWithThresholdFromDistMatrix(n, subDist, subThreshold);
168
+ // Map sub-cluster indices back to original indices
169
+ for (const subMembers of subClusters) {
170
+ result.push(subMembers.map((i) => members[i]));
171
+ }
172
+ }
173
+ return result;
174
+ }
@@ -0,0 +1,220 @@
1
+ /**
2
+ * Louvain community detection and Brandes betweenness centrality.
3
+ */
4
+ export function louvainDetection(topics, edges) {
5
+ const n = topics.length;
6
+ if (n === 0)
7
+ return { communities: [], modularity: 0 };
8
+ const nodeIndex = new Map();
9
+ topics.forEach((t, i) => nodeIndex.set(t.id, i));
10
+ // Build adjacency with weights
11
+ const adjWeights = Array.from({ length: n }, () => new Array(n).fill(0));
12
+ let totalWeight = 0;
13
+ for (const e of edges) {
14
+ const i = nodeIndex.get(e.source);
15
+ const j = nodeIndex.get(e.target);
16
+ if (i === undefined || j === undefined)
17
+ continue;
18
+ adjWeights[i][j] = e.strength;
19
+ adjWeights[j][i] = e.strength;
20
+ totalWeight += e.strength;
21
+ }
22
+ if (totalWeight === 0) {
23
+ // No edges: each node is its own community
24
+ const communities = topics.map((t, i) => ({
25
+ id: i,
26
+ topicIds: [t.id],
27
+ label: t.keywords[0] || t.label,
28
+ dominantProject: t.project,
29
+ }));
30
+ return { communities, modularity: 0 };
31
+ }
32
+ const m2 = totalWeight; // sum of all edge weights (each edge counted once)
33
+ // Node strengths (weighted degree)
34
+ const k = new Float64Array(n);
35
+ for (let i = 0; i < n; i++) {
36
+ for (let j = 0; j < n; j++) {
37
+ k[i] += adjWeights[i][j];
38
+ }
39
+ }
40
+ // Community assignments
41
+ const community = new Int32Array(n);
42
+ for (let i = 0; i < n; i++)
43
+ community[i] = i;
44
+ // Phase 1: Local modularity optimization
45
+ let improved = true;
46
+ const maxIter = 100;
47
+ let iter = 0;
48
+ while (improved && iter < maxIter) {
49
+ improved = false;
50
+ iter++;
51
+ for (let i = 0; i < n; i++) {
52
+ const currentComm = community[i];
53
+ // Compute sum of weights to each neighboring community
54
+ const commWeights = new Map();
55
+ for (let j = 0; j < n; j++) {
56
+ if (adjWeights[i][j] > 0 && i !== j) {
57
+ const c = community[j];
58
+ commWeights.set(c, (commWeights.get(c) || 0) + adjWeights[i][j]);
59
+ }
60
+ }
61
+ // Sum of weights in current community
62
+ const ki = k[i];
63
+ // Try moving to each neighboring community
64
+ let bestComm = currentComm;
65
+ let bestDeltaQ = 0;
66
+ // Remove node i from its current community and compute cost
67
+ let sumCurrentComm = 0;
68
+ let kCurrentComm = 0;
69
+ for (let j = 0; j < n; j++) {
70
+ if (j !== i && community[j] === currentComm) {
71
+ sumCurrentComm += adjWeights[i][j];
72
+ kCurrentComm += k[j];
73
+ }
74
+ }
75
+ for (const [targetComm, wToComm] of commWeights) {
76
+ if (targetComm === currentComm)
77
+ continue;
78
+ // Sum of weights of nodes in target community
79
+ let kTargetComm = 0;
80
+ for (let j = 0; j < n; j++) {
81
+ if (community[j] === targetComm) {
82
+ kTargetComm += k[j];
83
+ }
84
+ }
85
+ // deltaQ = [w_to_target / m - ki * k_target / (2m²)] - [w_to_current / m - ki * k_current / (2m²)]
86
+ const deltaQ = (wToComm - sumCurrentComm) / m2 -
87
+ (ki * (kTargetComm - kCurrentComm)) / (2 * m2 * m2);
88
+ if (deltaQ > bestDeltaQ) {
89
+ bestDeltaQ = deltaQ;
90
+ bestComm = targetComm;
91
+ }
92
+ }
93
+ if (bestComm !== currentComm) {
94
+ community[i] = bestComm;
95
+ improved = true;
96
+ }
97
+ }
98
+ }
99
+ // Compute modularity Q
100
+ let modularity = 0;
101
+ for (let i = 0; i < n; i++) {
102
+ for (let j = 0; j < n; j++) {
103
+ if (community[i] === community[j]) {
104
+ modularity += adjWeights[i][j] - (k[i] * k[j]) / (2 * m2);
105
+ }
106
+ }
107
+ }
108
+ modularity /= 2 * m2;
109
+ // Build community objects
110
+ const commGroups = new Map();
111
+ for (let i = 0; i < n; i++) {
112
+ const c = community[i];
113
+ const group = commGroups.get(c) || [];
114
+ group.push(i);
115
+ commGroups.set(c, group);
116
+ }
117
+ // Renumber communities
118
+ let commIdx = 0;
119
+ const communities = [];
120
+ for (const [, members] of commGroups) {
121
+ const topicIds = members.map((i) => topics[i].id);
122
+ // Label from most frequent keywords
123
+ const kwCount = new Map();
124
+ for (const i of members) {
125
+ for (const kw of topics[i].keywords) {
126
+ kwCount.set(kw, (kwCount.get(kw) || 0) + 1);
127
+ }
128
+ }
129
+ const topKw = [...kwCount.entries()]
130
+ .sort((a, b) => b[1] - a[1])
131
+ .slice(0, 3)
132
+ .map(([kw]) => kw);
133
+ // Dominant project
134
+ const projCount = new Map();
135
+ for (const i of members) {
136
+ projCount.set(topics[i].project, (projCount.get(topics[i].project) || 0) + 1);
137
+ }
138
+ const dominantProject = [...projCount.entries()].sort((a, b) => b[1] - a[1])[0]?.[0] ?? "";
139
+ // Assign community ID to topics
140
+ for (const i of members) {
141
+ topics[i].communityId = commIdx;
142
+ }
143
+ communities.push({
144
+ id: commIdx,
145
+ topicIds,
146
+ label: topKw.join(", ") || `Community ${commIdx}`,
147
+ dominantProject,
148
+ });
149
+ commIdx++;
150
+ }
151
+ return { communities, modularity };
152
+ }
153
+ export function brandesBetweenness(topics, edges) {
154
+ const n = topics.length;
155
+ if (n <= 2)
156
+ return;
157
+ const nodeIndex = new Map();
158
+ topics.forEach((t, i) => nodeIndex.set(t.id, i));
159
+ // Build adjacency list
160
+ const adj = Array.from({ length: n }, () => []);
161
+ for (const e of edges) {
162
+ const i = nodeIndex.get(e.source);
163
+ const j = nodeIndex.get(e.target);
164
+ if (i === undefined || j === undefined)
165
+ continue;
166
+ adj[i].push(j);
167
+ adj[j].push(i);
168
+ }
169
+ const CB = new Float64Array(n);
170
+ for (let s = 0; s < n; s++) {
171
+ // BFS from s
172
+ const stack = [];
173
+ const pred = Array.from({ length: n }, () => []);
174
+ const sigma = new Float64Array(n);
175
+ sigma[s] = 1;
176
+ const dist = new Int32Array(n).fill(-1);
177
+ dist[s] = 0;
178
+ const queue = [s];
179
+ while (queue.length > 0) {
180
+ const v = queue.shift();
181
+ stack.push(v);
182
+ for (const w of adj[v]) {
183
+ if (dist[w] < 0) {
184
+ dist[w] = dist[v] + 1;
185
+ queue.push(w);
186
+ }
187
+ if (dist[w] === dist[v] + 1) {
188
+ sigma[w] += sigma[v];
189
+ pred[w].push(v);
190
+ }
191
+ }
192
+ }
193
+ // Back-propagation
194
+ const delta = new Float64Array(n);
195
+ while (stack.length > 0) {
196
+ const w = stack.pop();
197
+ for (const v of pred[w]) {
198
+ delta[v] += (sigma[v] / sigma[w]) * (1 + delta[w]);
199
+ }
200
+ if (w !== s) {
201
+ CB[w] += delta[w];
202
+ }
203
+ }
204
+ }
205
+ // Normalize for undirected graph: divide by 2
206
+ const normFactor = n > 2 ? 2 / ((n - 1) * (n - 2)) : 1;
207
+ for (let i = 0; i < n; i++) {
208
+ CB[i] = (CB[i] / 2) * normFactor;
209
+ }
210
+ // Assign to topic nodes
211
+ for (let i = 0; i < n; i++) {
212
+ topics[i].betweennessCentrality = Math.round(CB[i] * 10000) / 10000;
213
+ }
214
+ // Degree centrality
215
+ for (let i = 0; i < n; i++) {
216
+ const degree = adj[i].length;
217
+ topics[i].degreeCentrality =
218
+ n > 1 ? Math.round((degree / (n - 1)) * 10000) / 10000 : 0;
219
+ }
220
+ }
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Constants for knowledge graph construction.
3
+ */
4
+ // ─── Stop words ─────────────────────────────────────────────────────────────
5
+ export const STOP_WORDS = new Set([
6
+ // English
7
+ "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
8
+ "have", "has", "had", "do", "does", "did", "will", "would", "could",
9
+ "should", "may", "might", "shall", "can", "need", "must", "ought",
10
+ "i", "you", "he", "she", "it", "we", "they", "me", "him", "her",
11
+ "us", "them", "my", "your", "his", "its", "our", "their", "mine",
12
+ "yours", "hers", "ours", "theirs", "this", "that", "these", "those",
13
+ "what", "which", "who", "whom", "whose", "when", "where", "why", "how",
14
+ "all", "each", "every", "both", "few", "more", "most", "other", "some",
15
+ "such", "no", "nor", "not", "only", "own", "same", "so", "than", "too",
16
+ "very", "just", "because", "as", "until", "while", "of", "at", "by",
17
+ "for", "with", "about", "against", "between", "through", "during",
18
+ "before", "after", "above", "below", "to", "from", "up", "down", "in",
19
+ "out", "on", "off", "over", "under", "again", "further", "then", "once",
20
+ "here", "there", "and", "but", "or", "if", "else", "also", "like",
21
+ "please", "thanks", "thank", "yes", "no", "ok", "okay", "sure", "let",
22
+ "make", "use", "using", "used", "want", "see", "look", "try", "get",
23
+ "got", "think", "know", "now", "new", "way", "well", "back", "still",
24
+ "file", "code", "change", "changes", "add", "update", "fix", "set",
25
+ // Japanese particles and common words
26
+ "の", "に", "は", "を", "が", "で", "と", "も", "か", "な", "だ",
27
+ "です", "ます", "する", "した", "して", "ない", "ある", "いる",
28
+ "これ", "それ", "あれ", "この", "その", "あの", "ここ", "そこ",
29
+ "こと", "もの", "ため", "よう", "から", "まで", "より", "ほど",
30
+ "など", "ので", "けど", "でも", "しかし", "また", "そして",
31
+ "って", "という", "ください", "お願い", "確認",
32
+ ]);
33
+ // ─── Noise token patterns ───────────────────────────────────────────────────
34
+ export const UUID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
35
+ export const HEX_PATTERN = /^[0-9a-f]{6,}$/i;
36
+ export const NUM_PATTERN = /^\d+$/;
37
+ // ─── Structural features ────────────────────────────────────────────────────
38
+ export const STRUCTURAL_DIM = 7;
39
+ // ─── Feature weights ────────────────────────────────────────────────────────
40
+ export const WEIGHT_TEXT = 0.50;
41
+ export const WEIGHT_TOOL = 0.25;
42
+ export const WEIGHT_STRUCT = 0.25;
43
+ // ─── Action verbs ───────────────────────────────────────────────────────────
44
+ export const ACTION_VERBS_EN = new Set([
45
+ "fix", "add", "implement", "create", "update", "refactor", "remove",
46
+ "delete", "move", "rename", "test", "debug", "optimize", "migrate",
47
+ "deploy", "configure", "setup", "integrate", "build", "review",
48
+ "investigate", "analyze", "check", "resolve", "extract", "convert",
49
+ ]);
50
+ export const ACTION_VERBS_JA = [
51
+ [/修正/, "fix"], [/追加/, "add"], [/実装/, "implement"],
52
+ [/作成|作って/, "create"], [/更新/, "update"], [/リファクタ/, "refactor"],
53
+ [/削除/, "remove"], [/テスト/, "test"], [/デバッグ/, "debug"],
54
+ [/最適化/, "optimize"], [/移行|マイグレ/, "migrate"],
55
+ [/設定|セットアップ/, "configure"], [/統合/, "integrate"],
56
+ [/ビルド/, "build"], [/レビュー/, "review"], [/調査/, "investigate"],
57
+ [/確認|チェック/, "check"], [/解決/, "resolve"],
58
+ ];