@chigichan24/crune 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +155 -0
- package/bin/crune.js +2 -0
- package/dist-cli/__tests__/cli.test.js +63 -0
- package/dist-cli/__tests__/clustering.test.js +200 -0
- package/dist-cli/__tests__/community.test.js +115 -0
- package/dist-cli/__tests__/edges.test.js +130 -0
- package/dist-cli/__tests__/feature-extraction.test.js +66 -0
- package/dist-cli/__tests__/fixtures.js +192 -0
- package/dist-cli/__tests__/orchestrator.test.js +253 -0
- package/dist-cli/__tests__/session-parser.test.js +335 -0
- package/dist-cli/__tests__/session-summarizer.test.js +117 -0
- package/dist-cli/__tests__/skill-server.test.js +191 -0
- package/dist-cli/__tests__/svd.test.js +112 -0
- package/dist-cli/__tests__/tfidf.test.js +88 -0
- package/dist-cli/__tests__/tokenizer.test.js +125 -0
- package/dist-cli/__tests__/topic-nodes.test.js +184 -0
- package/dist-cli/analyze-sessions.js +476 -0
- package/dist-cli/cli.js +215 -0
- package/dist-cli/knowledge-graph/clustering.js +174 -0
- package/dist-cli/knowledge-graph/community.js +220 -0
- package/dist-cli/knowledge-graph/constants.js +58 -0
- package/dist-cli/knowledge-graph/edges.js +193 -0
- package/dist-cli/knowledge-graph/feature-extraction.js +124 -0
- package/dist-cli/knowledge-graph/index.js +235 -0
- package/dist-cli/knowledge-graph/reusability.js +51 -0
- package/dist-cli/knowledge-graph/similarity.js +13 -0
- package/dist-cli/knowledge-graph/skill-generator.js +203 -0
- package/dist-cli/knowledge-graph/svd.js +195 -0
- package/dist-cli/knowledge-graph/tfidf.js +54 -0
- package/dist-cli/knowledge-graph/tokenizer.js +66 -0
- package/dist-cli/knowledge-graph/tool-pattern.js +173 -0
- package/dist-cli/knowledge-graph/topic-nodes.js +199 -0
- package/dist-cli/knowledge-graph/types.js +4 -0
- package/dist-cli/knowledge-graph-builder.js +27 -0
- package/dist-cli/session-parser.js +360 -0
- package/dist-cli/session-summarizer.js +133 -0
- package/dist-cli/skill-server.js +62 -0
- package/dist-cli/skill-synthesizer.js +189 -0
- package/package.json +47 -0
package/dist-cli/cli.js
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI entry point for `npx @chigichan24/crune`
|
|
3
|
+
* Generates skill definitions from Claude Code session logs.
|
|
4
|
+
*/
|
|
5
|
+
import * as fs from "node:fs";
|
|
6
|
+
import * as path from "node:path";
|
|
7
|
+
import * as os from "node:os";
|
|
8
|
+
import { discoverSessions, parseJsonlFile, buildTurns, extractMetadata, parseSubagents, } from "./session-parser.js";
|
|
9
|
+
import { buildSemanticKnowledgeGraph, } from "./knowledge-graph-builder.js";
|
|
10
|
+
import { buildSynthesisPrompt, synthesizeWithClaude, } from "./skill-synthesizer.js";
|
|
11
|
+
export function parseCliArgs(argv) {
|
|
12
|
+
const args = argv.slice(2);
|
|
13
|
+
let sessionsDir = path.join(os.homedir(), ".claude", "projects");
|
|
14
|
+
let outputDir = path.resolve("skills");
|
|
15
|
+
let count = 5;
|
|
16
|
+
let model;
|
|
17
|
+
let skipSynthesis = false;
|
|
18
|
+
let dryRun = false;
|
|
19
|
+
for (let i = 0; i < args.length; i++) {
|
|
20
|
+
if (args[i] === "--sessions-dir" && args[i + 1]) {
|
|
21
|
+
sessionsDir = path.resolve(args[++i]);
|
|
22
|
+
}
|
|
23
|
+
else if (args[i] === "--output-dir" && args[i + 1]) {
|
|
24
|
+
outputDir = path.resolve(args[++i]);
|
|
25
|
+
}
|
|
26
|
+
else if (args[i] === "--count" && args[i + 1]) {
|
|
27
|
+
const parsed = parseInt(args[++i], 10);
|
|
28
|
+
count = Math.max(1, Number.isNaN(parsed) ? 5 : parsed);
|
|
29
|
+
}
|
|
30
|
+
else if (args[i] === "--model" && args[i + 1]) {
|
|
31
|
+
model = args[++i];
|
|
32
|
+
}
|
|
33
|
+
else if (args[i] === "--skip-synthesis") {
|
|
34
|
+
skipSynthesis = true;
|
|
35
|
+
}
|
|
36
|
+
else if (args[i] === "--dry-run") {
|
|
37
|
+
dryRun = true;
|
|
38
|
+
}
|
|
39
|
+
else if (args[i] === "--help" || args[i] === "-h") {
|
|
40
|
+
printUsage();
|
|
41
|
+
process.exit(0);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
return { sessionsDir, outputDir, count, model, skipSynthesis, dryRun };
|
|
45
|
+
}
|
|
46
|
+
function printUsage() {
|
|
47
|
+
console.error(`Usage: crune [options]
|
|
48
|
+
|
|
49
|
+
Generate reusable skill definitions from Claude Code session logs.
|
|
50
|
+
|
|
51
|
+
Options:
|
|
52
|
+
--sessions-dir <path> Session logs directory (default: ~/.claude/projects)
|
|
53
|
+
--output-dir <path> Output directory for skill files (default: ./skills)
|
|
54
|
+
--count <n> Number of skills to generate (default: 5)
|
|
55
|
+
--model <model> Claude model for synthesis (e.g., haiku, sonnet)
|
|
56
|
+
--skip-synthesis Skip LLM synthesis, output heuristic skills only
|
|
57
|
+
--dry-run Show candidates without writing files
|
|
58
|
+
-h, --help Show this help message`);
|
|
59
|
+
}
|
|
60
|
+
// ─── Main pipeline ─────────────────────────────────────────────────
|
|
61
|
+
async function main() {
|
|
62
|
+
const config = parseCliArgs(process.argv);
|
|
63
|
+
console.error("Discovering sessions...");
|
|
64
|
+
const sessionFiles = discoverSessions(config.sessionsDir);
|
|
65
|
+
if (sessionFiles.length === 0) {
|
|
66
|
+
console.error(`No sessions found in ${config.sessionsDir}`);
|
|
67
|
+
process.exit(1);
|
|
68
|
+
}
|
|
69
|
+
console.error(` Found ${sessionFiles.length} sessions`);
|
|
70
|
+
// Parse all sessions
|
|
71
|
+
console.error("Parsing sessions...");
|
|
72
|
+
const parsedSessions = [];
|
|
73
|
+
for (const sf of sessionFiles) {
|
|
74
|
+
const lines = await parseJsonlFile(sf.filePath);
|
|
75
|
+
if (lines.length === 0)
|
|
76
|
+
continue;
|
|
77
|
+
const turns = buildTurns(lines);
|
|
78
|
+
const meta = extractMetadata(sf, lines, turns);
|
|
79
|
+
const subagents = await parseSubagents(sf.subagentFiles);
|
|
80
|
+
parsedSessions.push({
|
|
81
|
+
meta,
|
|
82
|
+
turns,
|
|
83
|
+
subagents,
|
|
84
|
+
linkedPlan: null,
|
|
85
|
+
projectDir: sf.projectDir,
|
|
86
|
+
projectDisplayName: sf.projectDisplayName,
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
console.error(` Parsed ${parsedSessions.length} sessions`);
|
|
90
|
+
// Build knowledge graph — reuse same conversion as analyze-sessions.ts
|
|
91
|
+
console.error("Building knowledge graph...");
|
|
92
|
+
const sessionInputs = parsedSessions.map((s) => ({
|
|
93
|
+
sessionId: s.meta.sessionId,
|
|
94
|
+
projectDisplayName: s.projectDisplayName,
|
|
95
|
+
turns: s.turns.map((t) => ({
|
|
96
|
+
userPrompt: t.userPrompt,
|
|
97
|
+
assistantTexts: t.assistantTexts,
|
|
98
|
+
toolCalls: t.toolCalls.map((tc) => ({
|
|
99
|
+
toolName: tc.toolName,
|
|
100
|
+
input: tc.input,
|
|
101
|
+
})),
|
|
102
|
+
})),
|
|
103
|
+
subagents: Object.fromEntries(Object.entries(s.subagents).map(([id, sub]) => [
|
|
104
|
+
id,
|
|
105
|
+
{
|
|
106
|
+
agentId: sub.agentId,
|
|
107
|
+
agentType: sub.agentType,
|
|
108
|
+
turns: sub.turns.map((t) => ({
|
|
109
|
+
userPrompt: t.userPrompt,
|
|
110
|
+
assistantTexts: t.assistantTexts,
|
|
111
|
+
toolCalls: t.toolCalls.map((tc) => ({
|
|
112
|
+
toolName: tc.toolName,
|
|
113
|
+
input: tc.input,
|
|
114
|
+
})),
|
|
115
|
+
})),
|
|
116
|
+
},
|
|
117
|
+
])),
|
|
118
|
+
meta: {
|
|
119
|
+
sessionId: s.meta.sessionId,
|
|
120
|
+
createdAt: s.meta.createdAt,
|
|
121
|
+
lastActiveAt: s.meta.lastActiveAt,
|
|
122
|
+
durationMinutes: s.meta.durationMinutes,
|
|
123
|
+
filesEdited: s.meta.filesEdited,
|
|
124
|
+
gitBranch: s.meta.gitBranch,
|
|
125
|
+
toolBreakdown: s.meta.toolBreakdown,
|
|
126
|
+
subagentCount: s.meta.subagentCount,
|
|
127
|
+
},
|
|
128
|
+
}));
|
|
129
|
+
const knowledgeGraph = buildSemanticKnowledgeGraph(sessionInputs);
|
|
130
|
+
console.error(` ${knowledgeGraph.nodes.length} topics, ${knowledgeGraph.skillCandidates.length} skill candidates`);
|
|
131
|
+
// Select top candidates
|
|
132
|
+
const topCandidates = [...knowledgeGraph.skillCandidates]
|
|
133
|
+
.sort((a, b) => b.reusabilityScore - a.reusabilityScore)
|
|
134
|
+
.slice(0, config.count);
|
|
135
|
+
if (topCandidates.length === 0) {
|
|
136
|
+
console.error("No skill candidates found.");
|
|
137
|
+
process.exit(0);
|
|
138
|
+
}
|
|
139
|
+
// Dry run — just list candidates
|
|
140
|
+
if (config.dryRun) {
|
|
141
|
+
console.error("\nSkill candidates (dry run):\n");
|
|
142
|
+
for (const c of topCandidates) {
|
|
143
|
+
const topic = knowledgeGraph.nodes.find((n) => n.id === c.topicId);
|
|
144
|
+
console.error(` [${c.reusabilityScore.toFixed(2)}] ${topic?.label ?? c.topicId}`);
|
|
145
|
+
console.error(` Keywords: ${topic?.keywords.join(", ") ?? "—"}`);
|
|
146
|
+
console.error(` Sessions: ${topic?.sessionCount ?? "?"}`);
|
|
147
|
+
console.error("");
|
|
148
|
+
}
|
|
149
|
+
process.exit(0);
|
|
150
|
+
}
|
|
151
|
+
// Synthesize skills
|
|
152
|
+
console.error(`\nGenerating ${topCandidates.length} skills...`);
|
|
153
|
+
for (const candidate of topCandidates) {
|
|
154
|
+
const topic = knowledgeGraph.nodes.find((n) => n.id === candidate.topicId);
|
|
155
|
+
const label = topic?.label ?? candidate.topicId;
|
|
156
|
+
console.error(` -> ${label}`);
|
|
157
|
+
let markdown = candidate.skillMarkdown;
|
|
158
|
+
if (!config.skipSynthesis && topic) {
|
|
159
|
+
// Find enriched sequences related to this topic's sessions
|
|
160
|
+
const topicSessionSet = new Set(topic.sessionIds);
|
|
161
|
+
const relatedSequences = knowledgeGraph.enrichedToolSequences.filter((seq) => seq.sessionIds.some((sid) => topicSessionSet.has(sid)));
|
|
162
|
+
const prompt = buildSynthesisPrompt({
|
|
163
|
+
skillCandidate: candidate,
|
|
164
|
+
topicNode: topic,
|
|
165
|
+
enrichedSequences: relatedSequences,
|
|
166
|
+
});
|
|
167
|
+
const result = await synthesizeWithClaude(prompt, {
|
|
168
|
+
model: config.model,
|
|
169
|
+
});
|
|
170
|
+
if (result.success) {
|
|
171
|
+
markdown = result.stdout;
|
|
172
|
+
console.error(` Synthesized`);
|
|
173
|
+
}
|
|
174
|
+
else {
|
|
175
|
+
console.error(` Synthesis failed: ${result.error ?? "unknown error"}, using heuristic`);
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
else if (config.skipSynthesis) {
|
|
179
|
+
console.error(` Heuristic only`);
|
|
180
|
+
}
|
|
181
|
+
// Write skill file as <output-dir>/<skill-name>/SKILL.md
|
|
182
|
+
const skillName = extractSkillName(markdown, label);
|
|
183
|
+
const skillDir = path.join(config.outputDir, skillName);
|
|
184
|
+
const outputPath = path.join(skillDir, "SKILL.md");
|
|
185
|
+
fs.mkdirSync(skillDir, { recursive: true });
|
|
186
|
+
fs.writeFileSync(outputPath, markdown, "utf-8");
|
|
187
|
+
console.error(` ${outputPath}`);
|
|
188
|
+
}
|
|
189
|
+
console.error(`\nDone! ${topCandidates.length} skills written to ${config.outputDir}`);
|
|
190
|
+
}
|
|
191
|
+
function extractSkillName(markdown, fallbackLabel) {
|
|
192
|
+
// Try to extract name from YAML frontmatter
|
|
193
|
+
const frontmatterMatch = markdown.match(/^---\s*\n[\s\S]*?name:\s*(.+?)\s*\n[\s\S]*?---/);
|
|
194
|
+
if (frontmatterMatch?.[1]) {
|
|
195
|
+
return frontmatterMatch[1].replace(/[^a-zA-Z0-9-_]/g, "-").toLowerCase();
|
|
196
|
+
}
|
|
197
|
+
// Fallback: kebab-case from label
|
|
198
|
+
return fallbackLabel
|
|
199
|
+
.replace(/[^a-zA-Z0-9\s-]/g, "")
|
|
200
|
+
.trim()
|
|
201
|
+
.replace(/\s+/g, "-")
|
|
202
|
+
.toLowerCase()
|
|
203
|
+
.slice(0, 40);
|
|
204
|
+
}
|
|
205
|
+
// ─── Entry point ───────────────────────────────────────────────────
|
|
206
|
+
const entryScript = process.argv[1] ?? "";
|
|
207
|
+
const isDirectRun = entryScript.endsWith("/cli.ts") ||
|
|
208
|
+
entryScript.endsWith("/cli.js") ||
|
|
209
|
+
entryScript.endsWith("/bin/crune.js");
|
|
210
|
+
if (isDirectRun) {
|
|
211
|
+
main().catch((err) => {
|
|
212
|
+
console.error("Fatal error:", err);
|
|
213
|
+
process.exit(1);
|
|
214
|
+
});
|
|
215
|
+
}
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agglomerative clustering with average linkage and automatic elbow detection.
|
|
3
|
+
*/
|
|
4
|
+
export function agglomerativeClusteringFromDistMatrix(sessionIds, precomputedDist) {
|
|
5
|
+
const n = sessionIds.length;
|
|
6
|
+
if (n === 0)
|
|
7
|
+
return [];
|
|
8
|
+
if (n === 1)
|
|
9
|
+
return [[0]];
|
|
10
|
+
// Initialize: each session is its own cluster
|
|
11
|
+
const clusterMembers = [];
|
|
12
|
+
for (let i = 0; i < n; i++)
|
|
13
|
+
clusterMembers.push([i]);
|
|
14
|
+
// Track active clusters
|
|
15
|
+
const active = new Set();
|
|
16
|
+
for (let i = 0; i < n; i++)
|
|
17
|
+
active.add(i);
|
|
18
|
+
// Copy precomputed distances (will be updated during merges)
|
|
19
|
+
const distMatrix = new Map(precomputedDist);
|
|
20
|
+
const distKey = (i, j) => i < j ? `${i}:${j}` : `${j}:${i}`;
|
|
21
|
+
// Merge history for elbow detection
|
|
22
|
+
const mergeDistances = [];
|
|
23
|
+
// Iteratively merge closest pair
|
|
24
|
+
while (active.size > 1) {
|
|
25
|
+
let minDist = Infinity;
|
|
26
|
+
let mergeI = -1;
|
|
27
|
+
let mergeJ = -1;
|
|
28
|
+
for (const i of active) {
|
|
29
|
+
for (const j of active) {
|
|
30
|
+
if (i >= j)
|
|
31
|
+
continue;
|
|
32
|
+
const d = distMatrix.get(distKey(i, j)) ?? Infinity;
|
|
33
|
+
if (d < minDist) {
|
|
34
|
+
minDist = d;
|
|
35
|
+
mergeI = i;
|
|
36
|
+
mergeJ = j;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
if (mergeI === -1)
|
|
41
|
+
break;
|
|
42
|
+
mergeDistances.push(minDist);
|
|
43
|
+
// Merge j into i (average linkage: weighted average of distances)
|
|
44
|
+
const sizeI = clusterMembers[mergeI].length;
|
|
45
|
+
const sizeJ = clusterMembers[mergeJ].length;
|
|
46
|
+
const newSize = sizeI + sizeJ;
|
|
47
|
+
clusterMembers[mergeI].push(...clusterMembers[mergeJ]);
|
|
48
|
+
active.delete(mergeJ);
|
|
49
|
+
// Update distances using average linkage formula
|
|
50
|
+
for (const k of active) {
|
|
51
|
+
if (k === mergeI)
|
|
52
|
+
continue;
|
|
53
|
+
const distIK = distMatrix.get(distKey(mergeI, k)) ?? 1.0;
|
|
54
|
+
const distJK = distMatrix.get(distKey(mergeJ, k)) ?? 1.0;
|
|
55
|
+
const newDist = (distIK * sizeI + distJK * sizeJ) / newSize;
|
|
56
|
+
distMatrix.set(distKey(mergeI, k), newDist);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
// Find elbow: cut point where merging starts getting expensive
|
|
60
|
+
const threshold = findElbowThreshold(mergeDistances);
|
|
61
|
+
// Re-run clustering with threshold using precomputed distances
|
|
62
|
+
return clusterWithThresholdFromDistMatrix(n, precomputedDist, threshold);
|
|
63
|
+
}
|
|
64
|
+
export function findElbowThreshold(distances) {
|
|
65
|
+
if (distances.length < 3)
|
|
66
|
+
return 0.7; // fallback
|
|
67
|
+
// Compute second derivative (acceleration)
|
|
68
|
+
let maxAccel = 0;
|
|
69
|
+
let elbowIdx = Math.floor(distances.length * 0.5); // default: cut at midpoint
|
|
70
|
+
for (let i = 1; i < distances.length - 1; i++) {
|
|
71
|
+
const accel = distances[i + 1] - 2 * distances[i] + distances[i - 1];
|
|
72
|
+
if (accel > maxAccel) {
|
|
73
|
+
maxAccel = accel;
|
|
74
|
+
elbowIdx = i;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
const threshold = distances[elbowIdx];
|
|
78
|
+
// Clamp to reasonable range
|
|
79
|
+
return Math.max(0.3, Math.min(0.9, threshold));
|
|
80
|
+
}
|
|
81
|
+
export function clusterWithThresholdFromDistMatrix(n, precomputedDist, threshold) {
|
|
82
|
+
if (n === 0)
|
|
83
|
+
return [];
|
|
84
|
+
if (n === 1)
|
|
85
|
+
return [[0]];
|
|
86
|
+
const clusterMembers = [];
|
|
87
|
+
for (let i = 0; i < n; i++)
|
|
88
|
+
clusterMembers.push([i]);
|
|
89
|
+
const active = new Set();
|
|
90
|
+
for (let i = 0; i < n; i++)
|
|
91
|
+
active.add(i);
|
|
92
|
+
const distKey = (i, j) => i < j ? `${i}:${j}` : `${j}:${i}`;
|
|
93
|
+
const distMatrix = new Map(precomputedDist);
|
|
94
|
+
while (active.size > 1) {
|
|
95
|
+
let minDist = Infinity;
|
|
96
|
+
let mergeI = -1;
|
|
97
|
+
let mergeJ = -1;
|
|
98
|
+
for (const i of active) {
|
|
99
|
+
for (const j of active) {
|
|
100
|
+
if (i >= j)
|
|
101
|
+
continue;
|
|
102
|
+
const d = distMatrix.get(distKey(i, j)) ?? Infinity;
|
|
103
|
+
if (d < minDist) {
|
|
104
|
+
minDist = d;
|
|
105
|
+
mergeI = i;
|
|
106
|
+
mergeJ = j;
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
if (mergeI === -1 || minDist > threshold)
|
|
111
|
+
break;
|
|
112
|
+
// Average linkage merge
|
|
113
|
+
const sizeI = clusterMembers[mergeI].length;
|
|
114
|
+
const sizeJ = clusterMembers[mergeJ].length;
|
|
115
|
+
const newSize = sizeI + sizeJ;
|
|
116
|
+
clusterMembers[mergeI].push(...clusterMembers[mergeJ]);
|
|
117
|
+
active.delete(mergeJ);
|
|
118
|
+
for (const k of active) {
|
|
119
|
+
if (k === mergeI)
|
|
120
|
+
continue;
|
|
121
|
+
const distIK = distMatrix.get(distKey(mergeI, k)) ?? 1.0;
|
|
122
|
+
const distJK = distMatrix.get(distKey(mergeJ, k)) ?? 1.0;
|
|
123
|
+
const newDist = (distIK * sizeI + distJK * sizeJ) / newSize;
|
|
124
|
+
distMatrix.set(distKey(mergeI, k), newDist);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
return [...active].map((i) => clusterMembers[i]);
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Split oversized clusters by re-clustering their members with a stricter
|
|
131
|
+
* (halved) threshold. This prevents a single catch-all cluster from
|
|
132
|
+
* dominating the graph when the global elbow threshold is too loose.
|
|
133
|
+
*
|
|
134
|
+
* maxClusterRatio: a cluster with > (totalSessions * ratio) members is re-split.
|
|
135
|
+
* Default 0.25 = 25% of all sessions.
|
|
136
|
+
*/
|
|
137
|
+
export function splitOversizedClusters(clusters, totalSessions, precomputedDist, maxClusterRatio = 0.25) {
|
|
138
|
+
const maxSize = Math.max(10, Math.floor(totalSessions * maxClusterRatio));
|
|
139
|
+
const result = [];
|
|
140
|
+
for (const members of clusters) {
|
|
141
|
+
if (members.length <= maxSize) {
|
|
142
|
+
result.push(members);
|
|
143
|
+
continue;
|
|
144
|
+
}
|
|
145
|
+
// Extract sub-distance-matrix for this cluster's members
|
|
146
|
+
const n = members.length;
|
|
147
|
+
const subDist = new Map();
|
|
148
|
+
const distKey = (i, j) => i < j ? `${i}:${j}` : `${j}:${i}`;
|
|
149
|
+
const origDistKey = (i, j) => i < j ? `${i}:${j}` : `${j}:${i}`;
|
|
150
|
+
for (let i = 0; i < n; i++) {
|
|
151
|
+
for (let j = i + 1; j < n; j++) {
|
|
152
|
+
const origI = members[i];
|
|
153
|
+
const origJ = members[j];
|
|
154
|
+
const d = precomputedDist.get(origDistKey(origI, origJ)) ?? 1.0;
|
|
155
|
+
subDist.set(distKey(i, j), d);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
// Find sub-elbow threshold from this cluster's internal distances
|
|
159
|
+
const internalDists = [];
|
|
160
|
+
for (const [, d] of subDist) {
|
|
161
|
+
internalDists.push(d);
|
|
162
|
+
}
|
|
163
|
+
internalDists.sort((a, b) => a - b);
|
|
164
|
+
// Use a stricter threshold: median of internal distances
|
|
165
|
+
const medianDist = internalDists[Math.floor(internalDists.length / 2)] ?? 0.5;
|
|
166
|
+
const subThreshold = Math.max(0.15, medianDist * 0.8);
|
|
167
|
+
const subClusters = clusterWithThresholdFromDistMatrix(n, subDist, subThreshold);
|
|
168
|
+
// Map sub-cluster indices back to original indices
|
|
169
|
+
for (const subMembers of subClusters) {
|
|
170
|
+
result.push(subMembers.map((i) => members[i]));
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
return result;
|
|
174
|
+
}
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Louvain community detection and Brandes betweenness centrality.
|
|
3
|
+
*/
|
|
4
|
+
export function louvainDetection(topics, edges) {
|
|
5
|
+
const n = topics.length;
|
|
6
|
+
if (n === 0)
|
|
7
|
+
return { communities: [], modularity: 0 };
|
|
8
|
+
const nodeIndex = new Map();
|
|
9
|
+
topics.forEach((t, i) => nodeIndex.set(t.id, i));
|
|
10
|
+
// Build adjacency with weights
|
|
11
|
+
const adjWeights = Array.from({ length: n }, () => new Array(n).fill(0));
|
|
12
|
+
let totalWeight = 0;
|
|
13
|
+
for (const e of edges) {
|
|
14
|
+
const i = nodeIndex.get(e.source);
|
|
15
|
+
const j = nodeIndex.get(e.target);
|
|
16
|
+
if (i === undefined || j === undefined)
|
|
17
|
+
continue;
|
|
18
|
+
adjWeights[i][j] = e.strength;
|
|
19
|
+
adjWeights[j][i] = e.strength;
|
|
20
|
+
totalWeight += e.strength;
|
|
21
|
+
}
|
|
22
|
+
if (totalWeight === 0) {
|
|
23
|
+
// No edges: each node is its own community
|
|
24
|
+
const communities = topics.map((t, i) => ({
|
|
25
|
+
id: i,
|
|
26
|
+
topicIds: [t.id],
|
|
27
|
+
label: t.keywords[0] || t.label,
|
|
28
|
+
dominantProject: t.project,
|
|
29
|
+
}));
|
|
30
|
+
return { communities, modularity: 0 };
|
|
31
|
+
}
|
|
32
|
+
const m2 = totalWeight; // sum of all edge weights (each edge counted once)
|
|
33
|
+
// Node strengths (weighted degree)
|
|
34
|
+
const k = new Float64Array(n);
|
|
35
|
+
for (let i = 0; i < n; i++) {
|
|
36
|
+
for (let j = 0; j < n; j++) {
|
|
37
|
+
k[i] += adjWeights[i][j];
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
// Community assignments
|
|
41
|
+
const community = new Int32Array(n);
|
|
42
|
+
for (let i = 0; i < n; i++)
|
|
43
|
+
community[i] = i;
|
|
44
|
+
// Phase 1: Local modularity optimization
|
|
45
|
+
let improved = true;
|
|
46
|
+
const maxIter = 100;
|
|
47
|
+
let iter = 0;
|
|
48
|
+
while (improved && iter < maxIter) {
|
|
49
|
+
improved = false;
|
|
50
|
+
iter++;
|
|
51
|
+
for (let i = 0; i < n; i++) {
|
|
52
|
+
const currentComm = community[i];
|
|
53
|
+
// Compute sum of weights to each neighboring community
|
|
54
|
+
const commWeights = new Map();
|
|
55
|
+
for (let j = 0; j < n; j++) {
|
|
56
|
+
if (adjWeights[i][j] > 0 && i !== j) {
|
|
57
|
+
const c = community[j];
|
|
58
|
+
commWeights.set(c, (commWeights.get(c) || 0) + adjWeights[i][j]);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
// Sum of weights in current community
|
|
62
|
+
const ki = k[i];
|
|
63
|
+
// Try moving to each neighboring community
|
|
64
|
+
let bestComm = currentComm;
|
|
65
|
+
let bestDeltaQ = 0;
|
|
66
|
+
// Remove node i from its current community and compute cost
|
|
67
|
+
let sumCurrentComm = 0;
|
|
68
|
+
let kCurrentComm = 0;
|
|
69
|
+
for (let j = 0; j < n; j++) {
|
|
70
|
+
if (j !== i && community[j] === currentComm) {
|
|
71
|
+
sumCurrentComm += adjWeights[i][j];
|
|
72
|
+
kCurrentComm += k[j];
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
for (const [targetComm, wToComm] of commWeights) {
|
|
76
|
+
if (targetComm === currentComm)
|
|
77
|
+
continue;
|
|
78
|
+
// Sum of weights of nodes in target community
|
|
79
|
+
let kTargetComm = 0;
|
|
80
|
+
for (let j = 0; j < n; j++) {
|
|
81
|
+
if (community[j] === targetComm) {
|
|
82
|
+
kTargetComm += k[j];
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
// deltaQ = [w_to_target / m - ki * k_target / (2m²)] - [w_to_current / m - ki * k_current / (2m²)]
|
|
86
|
+
const deltaQ = (wToComm - sumCurrentComm) / m2 -
|
|
87
|
+
(ki * (kTargetComm - kCurrentComm)) / (2 * m2 * m2);
|
|
88
|
+
if (deltaQ > bestDeltaQ) {
|
|
89
|
+
bestDeltaQ = deltaQ;
|
|
90
|
+
bestComm = targetComm;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
if (bestComm !== currentComm) {
|
|
94
|
+
community[i] = bestComm;
|
|
95
|
+
improved = true;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
// Compute modularity Q
|
|
100
|
+
let modularity = 0;
|
|
101
|
+
for (let i = 0; i < n; i++) {
|
|
102
|
+
for (let j = 0; j < n; j++) {
|
|
103
|
+
if (community[i] === community[j]) {
|
|
104
|
+
modularity += adjWeights[i][j] - (k[i] * k[j]) / (2 * m2);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
modularity /= 2 * m2;
|
|
109
|
+
// Build community objects
|
|
110
|
+
const commGroups = new Map();
|
|
111
|
+
for (let i = 0; i < n; i++) {
|
|
112
|
+
const c = community[i];
|
|
113
|
+
const group = commGroups.get(c) || [];
|
|
114
|
+
group.push(i);
|
|
115
|
+
commGroups.set(c, group);
|
|
116
|
+
}
|
|
117
|
+
// Renumber communities
|
|
118
|
+
let commIdx = 0;
|
|
119
|
+
const communities = [];
|
|
120
|
+
for (const [, members] of commGroups) {
|
|
121
|
+
const topicIds = members.map((i) => topics[i].id);
|
|
122
|
+
// Label from most frequent keywords
|
|
123
|
+
const kwCount = new Map();
|
|
124
|
+
for (const i of members) {
|
|
125
|
+
for (const kw of topics[i].keywords) {
|
|
126
|
+
kwCount.set(kw, (kwCount.get(kw) || 0) + 1);
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
const topKw = [...kwCount.entries()]
|
|
130
|
+
.sort((a, b) => b[1] - a[1])
|
|
131
|
+
.slice(0, 3)
|
|
132
|
+
.map(([kw]) => kw);
|
|
133
|
+
// Dominant project
|
|
134
|
+
const projCount = new Map();
|
|
135
|
+
for (const i of members) {
|
|
136
|
+
projCount.set(topics[i].project, (projCount.get(topics[i].project) || 0) + 1);
|
|
137
|
+
}
|
|
138
|
+
const dominantProject = [...projCount.entries()].sort((a, b) => b[1] - a[1])[0]?.[0] ?? "";
|
|
139
|
+
// Assign community ID to topics
|
|
140
|
+
for (const i of members) {
|
|
141
|
+
topics[i].communityId = commIdx;
|
|
142
|
+
}
|
|
143
|
+
communities.push({
|
|
144
|
+
id: commIdx,
|
|
145
|
+
topicIds,
|
|
146
|
+
label: topKw.join(", ") || `Community ${commIdx}`,
|
|
147
|
+
dominantProject,
|
|
148
|
+
});
|
|
149
|
+
commIdx++;
|
|
150
|
+
}
|
|
151
|
+
return { communities, modularity };
|
|
152
|
+
}
|
|
153
|
+
export function brandesBetweenness(topics, edges) {
|
|
154
|
+
const n = topics.length;
|
|
155
|
+
if (n <= 2)
|
|
156
|
+
return;
|
|
157
|
+
const nodeIndex = new Map();
|
|
158
|
+
topics.forEach((t, i) => nodeIndex.set(t.id, i));
|
|
159
|
+
// Build adjacency list
|
|
160
|
+
const adj = Array.from({ length: n }, () => []);
|
|
161
|
+
for (const e of edges) {
|
|
162
|
+
const i = nodeIndex.get(e.source);
|
|
163
|
+
const j = nodeIndex.get(e.target);
|
|
164
|
+
if (i === undefined || j === undefined)
|
|
165
|
+
continue;
|
|
166
|
+
adj[i].push(j);
|
|
167
|
+
adj[j].push(i);
|
|
168
|
+
}
|
|
169
|
+
const CB = new Float64Array(n);
|
|
170
|
+
for (let s = 0; s < n; s++) {
|
|
171
|
+
// BFS from s
|
|
172
|
+
const stack = [];
|
|
173
|
+
const pred = Array.from({ length: n }, () => []);
|
|
174
|
+
const sigma = new Float64Array(n);
|
|
175
|
+
sigma[s] = 1;
|
|
176
|
+
const dist = new Int32Array(n).fill(-1);
|
|
177
|
+
dist[s] = 0;
|
|
178
|
+
const queue = [s];
|
|
179
|
+
while (queue.length > 0) {
|
|
180
|
+
const v = queue.shift();
|
|
181
|
+
stack.push(v);
|
|
182
|
+
for (const w of adj[v]) {
|
|
183
|
+
if (dist[w] < 0) {
|
|
184
|
+
dist[w] = dist[v] + 1;
|
|
185
|
+
queue.push(w);
|
|
186
|
+
}
|
|
187
|
+
if (dist[w] === dist[v] + 1) {
|
|
188
|
+
sigma[w] += sigma[v];
|
|
189
|
+
pred[w].push(v);
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
// Back-propagation
|
|
194
|
+
const delta = new Float64Array(n);
|
|
195
|
+
while (stack.length > 0) {
|
|
196
|
+
const w = stack.pop();
|
|
197
|
+
for (const v of pred[w]) {
|
|
198
|
+
delta[v] += (sigma[v] / sigma[w]) * (1 + delta[w]);
|
|
199
|
+
}
|
|
200
|
+
if (w !== s) {
|
|
201
|
+
CB[w] += delta[w];
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
// Normalize for undirected graph: divide by 2
|
|
206
|
+
const normFactor = n > 2 ? 2 / ((n - 1) * (n - 2)) : 1;
|
|
207
|
+
for (let i = 0; i < n; i++) {
|
|
208
|
+
CB[i] = (CB[i] / 2) * normFactor;
|
|
209
|
+
}
|
|
210
|
+
// Assign to topic nodes
|
|
211
|
+
for (let i = 0; i < n; i++) {
|
|
212
|
+
topics[i].betweennessCentrality = Math.round(CB[i] * 10000) / 10000;
|
|
213
|
+
}
|
|
214
|
+
// Degree centrality
|
|
215
|
+
for (let i = 0; i < n; i++) {
|
|
216
|
+
const degree = adj[i].length;
|
|
217
|
+
topics[i].degreeCentrality =
|
|
218
|
+
n > 1 ? Math.round((degree / (n - 1)) * 10000) / 10000 : 0;
|
|
219
|
+
}
|
|
220
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Constants for knowledge graph construction.
|
|
3
|
+
*/
|
|
4
|
+
// ─── Stop words ─────────────────────────────────────────────────────────────
|
|
5
|
+
export const STOP_WORDS = new Set([
|
|
6
|
+
// English
|
|
7
|
+
"the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
|
|
8
|
+
"have", "has", "had", "do", "does", "did", "will", "would", "could",
|
|
9
|
+
"should", "may", "might", "shall", "can", "need", "must", "ought",
|
|
10
|
+
"i", "you", "he", "she", "it", "we", "they", "me", "him", "her",
|
|
11
|
+
"us", "them", "my", "your", "his", "its", "our", "their", "mine",
|
|
12
|
+
"yours", "hers", "ours", "theirs", "this", "that", "these", "those",
|
|
13
|
+
"what", "which", "who", "whom", "whose", "when", "where", "why", "how",
|
|
14
|
+
"all", "each", "every", "both", "few", "more", "most", "other", "some",
|
|
15
|
+
"such", "no", "nor", "not", "only", "own", "same", "so", "than", "too",
|
|
16
|
+
"very", "just", "because", "as", "until", "while", "of", "at", "by",
|
|
17
|
+
"for", "with", "about", "against", "between", "through", "during",
|
|
18
|
+
"before", "after", "above", "below", "to", "from", "up", "down", "in",
|
|
19
|
+
"out", "on", "off", "over", "under", "again", "further", "then", "once",
|
|
20
|
+
"here", "there", "and", "but", "or", "if", "else", "also", "like",
|
|
21
|
+
"please", "thanks", "thank", "yes", "no", "ok", "okay", "sure", "let",
|
|
22
|
+
"make", "use", "using", "used", "want", "see", "look", "try", "get",
|
|
23
|
+
"got", "think", "know", "now", "new", "way", "well", "back", "still",
|
|
24
|
+
"file", "code", "change", "changes", "add", "update", "fix", "set",
|
|
25
|
+
// Japanese particles and common words
|
|
26
|
+
"の", "に", "は", "を", "が", "で", "と", "も", "か", "な", "だ",
|
|
27
|
+
"です", "ます", "する", "した", "して", "ない", "ある", "いる",
|
|
28
|
+
"これ", "それ", "あれ", "この", "その", "あの", "ここ", "そこ",
|
|
29
|
+
"こと", "もの", "ため", "よう", "から", "まで", "より", "ほど",
|
|
30
|
+
"など", "ので", "けど", "でも", "しかし", "また", "そして",
|
|
31
|
+
"って", "という", "ください", "お願い", "確認",
|
|
32
|
+
]);
|
|
33
|
+
// ─── Noise token patterns ───────────────────────────────────────────────────
|
|
34
|
+
export const UUID_PATTERN = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
35
|
+
export const HEX_PATTERN = /^[0-9a-f]{6,}$/i;
|
|
36
|
+
export const NUM_PATTERN = /^\d+$/;
|
|
37
|
+
// ─── Structural features ────────────────────────────────────────────────────
|
|
38
|
+
export const STRUCTURAL_DIM = 7;
|
|
39
|
+
// ─── Feature weights ────────────────────────────────────────────────────────
|
|
40
|
+
export const WEIGHT_TEXT = 0.50;
|
|
41
|
+
export const WEIGHT_TOOL = 0.25;
|
|
42
|
+
export const WEIGHT_STRUCT = 0.25;
|
|
43
|
+
// ─── Action verbs ───────────────────────────────────────────────────────────
|
|
44
|
+
export const ACTION_VERBS_EN = new Set([
|
|
45
|
+
"fix", "add", "implement", "create", "update", "refactor", "remove",
|
|
46
|
+
"delete", "move", "rename", "test", "debug", "optimize", "migrate",
|
|
47
|
+
"deploy", "configure", "setup", "integrate", "build", "review",
|
|
48
|
+
"investigate", "analyze", "check", "resolve", "extract", "convert",
|
|
49
|
+
]);
|
|
50
|
+
export const ACTION_VERBS_JA = [
|
|
51
|
+
[/修正/, "fix"], [/追加/, "add"], [/実装/, "implement"],
|
|
52
|
+
[/作成|作って/, "create"], [/更新/, "update"], [/リファクタ/, "refactor"],
|
|
53
|
+
[/削除/, "remove"], [/テスト/, "test"], [/デバッグ/, "debug"],
|
|
54
|
+
[/最適化/, "optimize"], [/移行|マイグレ/, "migrate"],
|
|
55
|
+
[/設定|セットアップ/, "configure"], [/統合/, "integrate"],
|
|
56
|
+
[/ビルド/, "build"], [/レビュー/, "review"], [/調査/, "investigate"],
|
|
57
|
+
[/確認|チェック/, "check"], [/解決/, "resolve"],
|
|
58
|
+
];
|