@codragraph/cli 2.0.0 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +60 -22
- package/dist/_shared/cgdb/schema-constants.d.ts +16 -0
- package/dist/_shared/cgdb/schema-constants.d.ts.map +1 -0
- package/dist/_shared/cgdb/schema-constants.js +70 -0
- package/dist/_shared/cgdb/schema-constants.js.map +1 -0
- package/dist/_shared/feature-clusters.d.ts +99 -0
- package/dist/_shared/feature-clusters.d.ts.map +1 -0
- package/dist/_shared/feature-clusters.js +2 -0
- package/dist/_shared/feature-clusters.js.map +1 -0
- package/dist/_shared/graph/types.d.ts +16 -2
- package/dist/_shared/graph/types.d.ts.map +1 -1
- package/dist/_shared/index.d.ts +3 -2
- package/dist/_shared/index.d.ts.map +1 -1
- package/dist/_shared/index.js +1 -1
- package/dist/_shared/index.js.map +1 -1
- package/dist/_shared/pipeline.d.ts +1 -1
- package/dist/_shared/pipeline.d.ts.map +1 -1
- package/dist/cli/ai-context.js +4 -0
- package/dist/cli/analyze.js +30 -27
- package/dist/cli/graphstore.js +21 -21
- package/dist/cli/index-repo.js +3 -3
- package/dist/cli/index.js +37 -0
- package/dist/cli/setup.js +9 -5
- package/dist/cli/tool.d.ts +25 -0
- package/dist/cli/tool.js +74 -0
- package/dist/cli/wiki.js +3 -3
- package/dist/config/supported-languages.d.ts +3 -3
- package/dist/config/supported-languages.js +3 -3
- package/dist/core/augmentation/engine.js +7 -7
- package/dist/core/cgdb/cgdb-adapter.d.ts +176 -0
- package/dist/core/cgdb/cgdb-adapter.js +1336 -0
- package/dist/core/cgdb/content-read.d.ts +46 -0
- package/dist/core/cgdb/content-read.js +64 -0
- package/dist/core/cgdb/csv-generator.d.ts +29 -0
- package/dist/core/cgdb/csv-generator.js +523 -0
- package/dist/core/cgdb/pool-adapter.d.ts +93 -0
- package/dist/core/cgdb/pool-adapter.js +550 -0
- package/dist/core/cgdb/schema.d.ts +63 -0
- package/dist/core/cgdb/schema.js +557 -0
- package/dist/core/embeddings/embedder.js +4 -2
- package/dist/core/embeddings/embedding-pipeline.js +4 -4
- package/dist/core/graphstore/cgdb-row-source.d.ts +19 -0
- package/dist/core/graphstore/cgdb-row-source.js +141 -0
- package/dist/core/graphstore/index.d.ts +2 -2
- package/dist/core/graphstore/index.js +4 -4
- package/dist/core/group/bridge-db.d.ts +2 -2
- package/dist/core/group/bridge-db.js +18 -18
- package/dist/core/group/bridge-schema.d.ts +4 -4
- package/dist/core/group/bridge-schema.js +4 -4
- package/dist/core/group/cross-impact.js +3 -3
- package/dist/core/group/service.d.ts +16 -0
- package/dist/core/group/service.js +360 -0
- package/dist/core/group/sync.js +4 -4
- package/dist/core/ingestion/emit-references.d.ts +1 -1
- package/dist/core/ingestion/emit-references.js +1 -1
- package/dist/core/ingestion/feature-cluster-processor.d.ts +62 -0
- package/dist/core/ingestion/feature-cluster-processor.js +626 -0
- package/dist/core/ingestion/finalize-orchestrator.js +1 -1
- package/dist/core/ingestion/model/registration-table.js +1 -0
- package/dist/core/ingestion/model/resolve.d.ts +2 -2
- package/dist/core/ingestion/model/resolve.js +3 -3
- package/dist/core/ingestion/model/semantic-model.d.ts +1 -1
- package/dist/core/ingestion/model/semantic-model.js +1 -1
- package/dist/core/ingestion/model/symbol-table.d.ts +1 -1
- package/dist/core/ingestion/model/symbol-table.js +1 -1
- package/dist/core/ingestion/pipeline-phases/feature-clusters.d.ts +17 -0
- package/dist/core/ingestion/pipeline-phases/feature-clusters.js +88 -0
- package/dist/core/ingestion/pipeline-phases/index.d.ts +1 -0
- package/dist/core/ingestion/pipeline-phases/index.js +1 -0
- package/dist/core/ingestion/pipeline.d.ts +4 -0
- package/dist/core/ingestion/pipeline.js +9 -5
- package/dist/core/run-analyze.d.ts +1 -0
- package/dist/core/run-analyze.js +36 -30
- package/dist/core/search/bm25-index.d.ts +3 -3
- package/dist/core/search/bm25-index.js +9 -9
- package/dist/core/search/hybrid-search.js +2 -2
- package/dist/core/wiki/generator.d.ts +2 -2
- package/dist/core/wiki/generator.js +4 -4
- package/dist/core/wiki/graph-queries.d.ts +2 -2
- package/dist/core/wiki/graph-queries.js +5 -5
- package/dist/mcp/core/cgdb-adapter.d.ts +5 -0
- package/dist/mcp/core/cgdb-adapter.js +5 -0
- package/dist/mcp/core/embedder.js +6 -3
- package/dist/mcp/local/local-backend.d.ts +14 -2
- package/dist/mcp/local/local-backend.js +396 -18
- package/dist/mcp/resources.js +139 -0
- package/dist/mcp/server.js +3 -3
- package/dist/mcp/tools.js +175 -3
- package/dist/server/analyze-worker.js +2 -2
- package/dist/server/api.js +147 -31
- package/dist/storage/repo-manager.d.ts +10 -5
- package/dist/storage/repo-manager.js +10 -6
- package/dist/types/pipeline.d.ts +2 -0
- package/hooks/claude/codragraph-hook.cjs +4 -4
- package/package.json +15 -6
- package/scripts/build.js +21 -21
- package/skills/codragraph-cli.md +17 -1
- package/skills/codragraph-guide.md +6 -2
- package/skills/codragraph-onboarding.md +2 -2
- package/vendor/tree-sitter-proto/bindings/node/index.js +3 -3
- package/vendor/tree-sitter-proto/src/node-types.json +1 -1
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase: featureClusters
|
|
3
|
+
*
|
|
4
|
+
* Creates human-facing FeatureCluster nodes above algorithmic Community nodes.
|
|
5
|
+
* This is the layer agents query for product/domain areas such as Settings,
|
|
6
|
+
* AI, Auth, MCP, or Ingestion before drilling into exact symbols.
|
|
7
|
+
*
|
|
8
|
+
* @deps processes, structure
|
|
9
|
+
* @reads graph (all nodes and relationships)
|
|
10
|
+
* @writes graph (FeatureCluster nodes, FEATURE_MEMBER_OF, FEATURE_DEPENDS_ON)
|
|
11
|
+
*/
|
|
12
|
+
import type { PipelinePhase } from './types.js';
|
|
13
|
+
import { type FeatureClusterDetectionResult } from '../feature-cluster-processor.js';
|
|
14
|
+
export interface FeatureClustersOutput {
|
|
15
|
+
featureClusterResult: FeatureClusterDetectionResult;
|
|
16
|
+
}
|
|
17
|
+
export declare const featureClustersPhase: PipelinePhase<FeatureClustersOutput>;
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase: featureClusters
|
|
3
|
+
*
|
|
4
|
+
* Creates human-facing FeatureCluster nodes above algorithmic Community nodes.
|
|
5
|
+
* This is the layer agents query for product/domain areas such as Settings,
|
|
6
|
+
* AI, Auth, MCP, or Ingestion before drilling into exact symbols.
|
|
7
|
+
*
|
|
8
|
+
* @deps processes, structure
|
|
9
|
+
* @reads graph (all nodes and relationships)
|
|
10
|
+
* @writes graph (FeatureCluster nodes, FEATURE_MEMBER_OF, FEATURE_DEPENDS_ON)
|
|
11
|
+
*/
|
|
12
|
+
import { getPhaseOutput } from './types.js';
|
|
13
|
+
import { processFeatureClusters, } from '../feature-cluster-processor.js';
|
|
14
|
+
import { generateId } from '../../../lib/utils.js';
|
|
15
|
+
import { isDev } from '../utils/env.js';
|
|
16
|
+
export const featureClustersPhase = {
|
|
17
|
+
name: 'featureClusters',
|
|
18
|
+
deps: ['processes', 'structure'],
|
|
19
|
+
async execute(ctx, deps) {
|
|
20
|
+
const { totalFiles } = getPhaseOutput(deps, 'structure');
|
|
21
|
+
ctx.onProgress({
|
|
22
|
+
phase: 'feature_clusters',
|
|
23
|
+
percent: 99,
|
|
24
|
+
message: 'Building feature clusters...',
|
|
25
|
+
stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: ctx.graph.nodeCount },
|
|
26
|
+
});
|
|
27
|
+
const featureClusterResult = await processFeatureClusters(ctx.graph, (message, progress) => {
|
|
28
|
+
ctx.onProgress({
|
|
29
|
+
phase: 'feature_clusters',
|
|
30
|
+
percent: Math.round(99 + progress * 0.009),
|
|
31
|
+
message,
|
|
32
|
+
stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: ctx.graph.nodeCount },
|
|
33
|
+
});
|
|
34
|
+
}, {
|
|
35
|
+
repo: ctx.options?.featureClusterRepo,
|
|
36
|
+
lastIndexedCommit: ctx.options?.lastIndexedCommit,
|
|
37
|
+
});
|
|
38
|
+
if (isDev) {
|
|
39
|
+
console.log(`Feature clustering: ${featureClusterResult.stats.totalClusters} clusters, ${featureClusterResult.stats.totalMemberships} memberships`);
|
|
40
|
+
}
|
|
41
|
+
featureClusterResult.clusters.forEach((cluster) => {
|
|
42
|
+
ctx.graph.addNode({
|
|
43
|
+
id: cluster.id,
|
|
44
|
+
label: 'FeatureCluster',
|
|
45
|
+
properties: {
|
|
46
|
+
name: cluster.name,
|
|
47
|
+
filePath: '',
|
|
48
|
+
slug: cluster.slug,
|
|
49
|
+
featureKind: cluster.featureKind,
|
|
50
|
+
summary: cluster.summary,
|
|
51
|
+
description: cluster.description,
|
|
52
|
+
repo: cluster.repo,
|
|
53
|
+
service: cluster.service,
|
|
54
|
+
signals: cluster.signals,
|
|
55
|
+
memberCount: cluster.memberCount,
|
|
56
|
+
entryPointIds: cluster.entryPointIds,
|
|
57
|
+
routes: cluster.routes,
|
|
58
|
+
tools: cluster.tools,
|
|
59
|
+
testCoverageHints: cluster.testCoverageHints,
|
|
60
|
+
lastIndexedCommit: cluster.lastIndexedCommit,
|
|
61
|
+
confidence: cluster.confidence,
|
|
62
|
+
source: 'heuristic',
|
|
63
|
+
},
|
|
64
|
+
});
|
|
65
|
+
});
|
|
66
|
+
featureClusterResult.memberships.forEach((membership) => {
|
|
67
|
+
ctx.graph.addRelationship({
|
|
68
|
+
id: generateId('FEATURE_MEMBER_OF', `${membership.nodeId}->${membership.clusterId}`),
|
|
69
|
+
sourceId: membership.nodeId,
|
|
70
|
+
targetId: membership.clusterId,
|
|
71
|
+
type: 'FEATURE_MEMBER_OF',
|
|
72
|
+
confidence: membership.confidence,
|
|
73
|
+
reason: membership.signals.join('|'),
|
|
74
|
+
});
|
|
75
|
+
});
|
|
76
|
+
featureClusterResult.dependencies.forEach((dependency) => {
|
|
77
|
+
ctx.graph.addRelationship({
|
|
78
|
+
id: generateId('FEATURE_DEPENDS_ON', `${dependency.sourceClusterId}->${dependency.targetClusterId}`),
|
|
79
|
+
sourceId: dependency.sourceClusterId,
|
|
80
|
+
targetId: dependency.targetClusterId,
|
|
81
|
+
type: 'FEATURE_DEPENDS_ON',
|
|
82
|
+
confidence: dependency.confidence,
|
|
83
|
+
reason: `member-dependency|edges:${dependency.edgeCount}|types:${dependency.relationshipTypes.join(',')}`,
|
|
84
|
+
});
|
|
85
|
+
});
|
|
86
|
+
return { featureClusterResult };
|
|
87
|
+
},
|
|
88
|
+
};
|
|
@@ -17,6 +17,7 @@ export { scopeResolutionPhase, type ScopeResolutionOutput, } from '../scope-reso
|
|
|
17
17
|
export { mroPhase, type MROOutput } from './mro.js';
|
|
18
18
|
export { communitiesPhase, type CommunitiesOutput } from './communities.js';
|
|
19
19
|
export { processesPhase, type ProcessesOutput } from './processes.js';
|
|
20
|
+
export { featureClustersPhase, type FeatureClustersOutput } from './feature-clusters.js';
|
|
20
21
|
export { runPipeline } from './runner.js';
|
|
21
22
|
export type { PipelinePhase, PipelineContext, PhaseResult } from './types.js';
|
|
22
23
|
export { getPhaseOutput } from './types.js';
|
|
@@ -18,6 +18,7 @@ export { scopeResolutionPhase, } from '../scope-resolution/pipeline/phase.js';
|
|
|
18
18
|
export { mroPhase } from './mro.js';
|
|
19
19
|
export { communitiesPhase } from './communities.js';
|
|
20
20
|
export { processesPhase } from './processes.js';
|
|
21
|
+
export { featureClustersPhase } from './feature-clusters.js';
|
|
21
22
|
// ── Infrastructure ─────────────────────────────────────────────────────────
|
|
22
23
|
export { runPipeline } from './runner.js';
|
|
23
24
|
export { getPhaseOutput } from './types.js';
|
|
@@ -21,6 +21,10 @@ export interface PipelineOptions {
|
|
|
21
21
|
skipGraphPhases?: boolean;
|
|
22
22
|
/** Force sequential parsing (no worker pool). Useful for testing the sequential path. */
|
|
23
23
|
skipWorkers?: boolean;
|
|
24
|
+
/** Repo label written onto FeatureCluster metadata. */
|
|
25
|
+
featureClusterRepo?: string;
|
|
26
|
+
/** Indexed source commit written onto FeatureCluster metadata. */
|
|
27
|
+
lastIndexedCommit?: string;
|
|
24
28
|
/**
|
|
25
29
|
* @internal Test-only override for worker-pool gating thresholds.
|
|
26
30
|
* When unset, production defaults apply (15 files OR 512 KB total bytes).
|
|
@@ -15,15 +15,16 @@
|
|
|
15
15
|
* See ARCHITECTURE.md for the full phase dependency diagram.
|
|
16
16
|
*/
|
|
17
17
|
import { createKnowledgeGraph } from '../graph/graph.js';
|
|
18
|
-
import { runPipeline, getPhaseOutput, scanPhase, structurePhase, markdownPhase, cobolPhase, parsePhase, routesPhase, toolsPhase, ormPhase, crossFilePhase, scopeResolutionPhase, mroPhase, communitiesPhase, processesPhase, } from './pipeline-phases/index.js';
|
|
18
|
+
import { runPipeline, getPhaseOutput, scanPhase, structurePhase, markdownPhase, cobolPhase, parsePhase, routesPhase, toolsPhase, ormPhase, crossFilePhase, scopeResolutionPhase, mroPhase, communitiesPhase, processesPhase, featureClustersPhase, } from './pipeline-phases/index.js';
|
|
19
19
|
// ── Phase registry ─────────────────────────────────────────────────────────
|
|
20
20
|
/**
|
|
21
21
|
* All pipeline phases with their dependency relationships.
|
|
22
22
|
*
|
|
23
23
|
* Phase dependency graph:
|
|
24
24
|
*
|
|
25
|
-
* scan
|
|
26
|
-
*
|
|
25
|
+
* scan -> structure -> [markdown, cobol] -> parse -> [routes, tools, orm]
|
|
26
|
+
* -> crossFile -> scopeResolution -> mro -> communities -> processes
|
|
27
|
+
* -> featureClusters
|
|
27
28
|
*
|
|
28
29
|
* To add a new phase: create a file in pipeline-phases/, export the phase
|
|
29
30
|
* object, and add it to the appropriate position in this array.
|
|
@@ -42,7 +43,7 @@ function buildPhaseList(options) {
|
|
|
42
43
|
scopeResolutionPhase,
|
|
43
44
|
];
|
|
44
45
|
if (!options?.skipGraphPhases) {
|
|
45
|
-
phases.push(mroPhase, communitiesPhase, processesPhase);
|
|
46
|
+
phases.push(mroPhase, communitiesPhase, processesPhase, featureClustersPhase);
|
|
46
47
|
}
|
|
47
48
|
return phases;
|
|
48
49
|
}
|
|
@@ -62,15 +63,17 @@ export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
|
|
|
62
63
|
const { totalFiles, usedWorkerPool } = getPhaseOutput(results, 'parse');
|
|
63
64
|
let communityResult;
|
|
64
65
|
let processResult;
|
|
66
|
+
let featureClusterResult;
|
|
65
67
|
if (!options?.skipGraphPhases) {
|
|
66
68
|
communityResult = getPhaseOutput(results, 'communities').communityResult;
|
|
67
69
|
processResult = getPhaseOutput(results, 'processes').processResult;
|
|
70
|
+
featureClusterResult = getPhaseOutput(results, 'featureClusters').featureClusterResult;
|
|
68
71
|
}
|
|
69
72
|
onProgress({
|
|
70
73
|
phase: 'complete',
|
|
71
74
|
percent: 100,
|
|
72
75
|
message: communityResult && processResult
|
|
73
|
-
? `Graph complete! ${communityResult.stats.totalCommunities} communities, ${processResult.stats.totalProcesses} processes detected.`
|
|
76
|
+
? `Graph complete! ${communityResult.stats.totalCommunities} communities, ${processResult.stats.totalProcesses} processes, ${featureClusterResult?.stats.totalClusters ?? 0} feature clusters detected.`
|
|
74
77
|
: 'Graph complete! (graph phases skipped)',
|
|
75
78
|
stats: {
|
|
76
79
|
filesProcessed: totalFiles,
|
|
@@ -84,6 +87,7 @@ export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
|
|
|
84
87
|
totalFileCount: totalFiles,
|
|
85
88
|
communityResult,
|
|
86
89
|
processResult,
|
|
90
|
+
featureClusterResult,
|
|
87
91
|
usedWorkerPool,
|
|
88
92
|
};
|
|
89
93
|
};
|
package/dist/core/run-analyze.js
CHANGED
|
@@ -13,13 +13,13 @@ import fs from 'fs/promises';
|
|
|
13
13
|
import * as fsSync from 'node:fs';
|
|
14
14
|
import * as v8 from 'node:v8';
|
|
15
15
|
import { runPipelineFromRepo } from './ingestion/pipeline.js';
|
|
16
|
-
import {
|
|
16
|
+
import { initCgdb, loadGraphToCgdb, getCgdbStats, executeQuery, executeWithReusedStatement, closeCgdb, loadCachedEmbeddings, } from './cgdb/cgdb-adapter.js';
|
|
17
17
|
import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, INDEX_SCHEMA_VERSION, } from '../storage/repo-manager.js';
|
|
18
18
|
import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName } from '../storage/git.js';
|
|
19
19
|
import { recordAnalysisSnapshot } from './graphstore/index.js';
|
|
20
20
|
import { generateAIContextFiles } from '../cli/ai-context.js';
|
|
21
|
-
import { EMBEDDING_TABLE_NAME } from './
|
|
22
|
-
import { STALE_HASH_SENTINEL } from './
|
|
21
|
+
import { EMBEDDING_TABLE_NAME } from './cgdb/schema.js';
|
|
22
|
+
import { STALE_HASH_SENTINEL } from './cgdb/schema.js';
|
|
23
23
|
/** Threshold: auto-skip embeddings for repos with more nodes than this */
|
|
24
24
|
const EMBEDDING_NODE_LIMIT = 50_000;
|
|
25
25
|
export const PHASE_LABELS = {
|
|
@@ -31,8 +31,9 @@ export const PHASE_LABELS = {
|
|
|
31
31
|
heritage: 'Extracting inheritance',
|
|
32
32
|
communities: 'Detecting communities',
|
|
33
33
|
processes: 'Detecting processes',
|
|
34
|
+
feature_clusters: 'Building feature clusters',
|
|
34
35
|
complete: 'Pipeline complete',
|
|
35
|
-
|
|
36
|
+
cgdb: 'Loading into LadybugDB',
|
|
36
37
|
fts: 'Creating search indexes',
|
|
37
38
|
embeddings: 'Generating embeddings',
|
|
38
39
|
done: 'Done',
|
|
@@ -81,7 +82,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
81
82
|
const progress = (phase, percent, message) => {
|
|
82
83
|
callbacks.onProgress(phase, percent, message);
|
|
83
84
|
// Only snapshot on phase transitions, not every tick. Phase strings come
|
|
84
|
-
// from runPipelineFromRepo /
|
|
85
|
+
// from runPipelineFromRepo / loadGraphToCgdb and are stable.
|
|
85
86
|
if (heapProfileEnabled && phase && phase !== lastProfilePhase) {
|
|
86
87
|
lastProfilePhase = phase;
|
|
87
88
|
const ts = Date.now();
|
|
@@ -117,7 +118,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
117
118
|
}
|
|
118
119
|
}
|
|
119
120
|
};
|
|
120
|
-
const { storagePath,
|
|
121
|
+
const { storagePath, cgdbPath } = getStoragePaths(repoPath);
|
|
121
122
|
// Clean up stale KuzuDB files from before the LadybugDB migration.
|
|
122
123
|
const kuzuResult = await cleanupOldKuzuFiles(storagePath);
|
|
123
124
|
if (kuzuResult.found && kuzuResult.needsReindex) {
|
|
@@ -129,10 +130,10 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
129
130
|
// ── Early-return: already up to date ──────────────────────────────
|
|
130
131
|
// Schema-version mismatch forces a full re-analyze regardless of commit
|
|
131
132
|
// equality: existing 1.7.x indexes have no `schemaVersion` field at all,
|
|
132
|
-
// and
|
|
133
|
-
//
|
|
134
|
-
//
|
|
135
|
-
//
|
|
133
|
+
// and current readers expect contentEncoding plus rich FeatureCluster
|
|
134
|
+
// context-pack columns. LadybugDB ALTER on existing tables is not validated
|
|
135
|
+
// end-to-end yet, so the supported migration path is re-analyze via a fresh
|
|
136
|
+
// CREATE NODE TABLE.
|
|
136
137
|
const schemaUpToDate = !!existingMeta && (existingMeta.schemaVersion ?? 0) >= INDEX_SCHEMA_VERSION;
|
|
137
138
|
if (existingMeta &&
|
|
138
139
|
schemaUpToDate &&
|
|
@@ -150,7 +151,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
150
151
|
}
|
|
151
152
|
if (existingMeta && !schemaUpToDate) {
|
|
152
153
|
log(`Index schema version ${existingMeta.schemaVersion ?? '<missing>'} is older than ` +
|
|
153
|
-
`${INDEX_SCHEMA_VERSION} (
|
|
154
|
+
`${INDEX_SCHEMA_VERSION} (FeatureCluster context-pack schema). ` +
|
|
154
155
|
`Re-analyzing.`);
|
|
155
156
|
}
|
|
156
157
|
// ── Cache embeddings from existing index before rebuild ────────────
|
|
@@ -159,15 +160,15 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
159
160
|
if (options.embeddings && existingMeta && !options.force) {
|
|
160
161
|
try {
|
|
161
162
|
progress('embeddings', 0, 'Caching embeddings...');
|
|
162
|
-
await
|
|
163
|
+
await initCgdb(cgdbPath);
|
|
163
164
|
const cached = await loadCachedEmbeddings();
|
|
164
165
|
cachedEmbeddingNodeIds = cached.embeddingNodeIds;
|
|
165
166
|
cachedEmbeddings = cached.embeddings;
|
|
166
|
-
await
|
|
167
|
+
await closeCgdb();
|
|
167
168
|
}
|
|
168
169
|
catch {
|
|
169
170
|
try {
|
|
170
|
-
await
|
|
171
|
+
await closeCgdb();
|
|
171
172
|
}
|
|
172
173
|
catch {
|
|
173
174
|
/* swallow */
|
|
@@ -175,16 +176,20 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
175
176
|
}
|
|
176
177
|
}
|
|
177
178
|
// ── Phase 1: Full Pipeline (0–60%) ────────────────────────────────
|
|
179
|
+
const repoNameForFeatureClusters = options.registryName ?? getInferredRepoName(repoPath) ?? path.basename(repoPath);
|
|
178
180
|
const pipelineResult = await runPipelineFromRepo(repoPath, (p) => {
|
|
179
181
|
const phaseLabel = PHASE_LABELS[p.phase] || p.phase;
|
|
180
182
|
const scaled = Math.round(p.percent * 0.6);
|
|
181
183
|
progress(p.phase, scaled, phaseLabel);
|
|
184
|
+
}, {
|
|
185
|
+
featureClusterRepo: repoNameForFeatureClusters,
|
|
186
|
+
lastIndexedCommit: currentCommit || undefined,
|
|
182
187
|
});
|
|
183
188
|
// ── Phase 2: LadybugDB (60–85%) ──────────────────────────────────
|
|
184
|
-
progress('
|
|
185
|
-
await
|
|
186
|
-
const
|
|
187
|
-
for (const f of
|
|
189
|
+
progress('cgdb', 60, 'Loading into LadybugDB...');
|
|
190
|
+
await closeCgdb();
|
|
191
|
+
const cgdbFiles = [cgdbPath, `${cgdbPath}.wal`, `${cgdbPath}.lock`];
|
|
192
|
+
for (const f of cgdbFiles) {
|
|
188
193
|
try {
|
|
189
194
|
await fs.rm(f, { recursive: true, force: true });
|
|
190
195
|
}
|
|
@@ -192,16 +197,16 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
192
197
|
/* swallow */
|
|
193
198
|
}
|
|
194
199
|
}
|
|
195
|
-
await
|
|
200
|
+
await initCgdb(cgdbPath);
|
|
196
201
|
try {
|
|
197
|
-
// All work after
|
|
202
|
+
// All work after initCgdb is wrapped in try/finally to ensure closeCgdb()
|
|
198
203
|
// is called even if an error occurs — the module-level singleton DB handle
|
|
199
204
|
// must be released to avoid blocking subsequent invocations.
|
|
200
|
-
let
|
|
201
|
-
await
|
|
202
|
-
|
|
203
|
-
const pct = Math.min(84, 60 + Math.round((
|
|
204
|
-
progress('
|
|
205
|
+
let cgdbMsgCount = 0;
|
|
206
|
+
await loadGraphToCgdb(pipelineResult.graph, pipelineResult.repoPath, storagePath, (msg) => {
|
|
207
|
+
cgdbMsgCount++;
|
|
208
|
+
const pct = Math.min(84, 60 + Math.round((cgdbMsgCount / (cgdbMsgCount + 10)) * 24));
|
|
209
|
+
progress('cgdb', pct, msg);
|
|
205
210
|
},
|
|
206
211
|
// RFC 0001 Phase 2: when --compress is set, every content row goes
|
|
207
212
|
// through encodeContent before hitting the CSV. Default 'none' is
|
|
@@ -244,7 +249,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
244
249
|
// ── Phase 3.5: Re-insert cached embeddings ────────────────────────
|
|
245
250
|
if (cachedEmbeddings.length > 0) {
|
|
246
251
|
const cachedDims = cachedEmbeddings[0].embedding.length;
|
|
247
|
-
const { EMBEDDING_DIMS } = await import('./
|
|
252
|
+
const { EMBEDDING_DIMS } = await import('./cgdb/schema.js');
|
|
248
253
|
if (cachedDims !== EMBEDDING_DIMS) {
|
|
249
254
|
// Dimensions changed (e.g. switched embedding model) — discard cache and re-embed all
|
|
250
255
|
log(`Embedding dimensions changed (${cachedDims}d -> ${EMBEDDING_DIMS}d), discarding cache`);
|
|
@@ -267,7 +272,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
267
272
|
}
|
|
268
273
|
}
|
|
269
274
|
// ── Phase 4: Embeddings (90–98%) ──────────────────────────────────
|
|
270
|
-
const stats = await
|
|
275
|
+
const stats = await getCgdbStats();
|
|
271
276
|
let embeddingSkipped = true;
|
|
272
277
|
if (options.embeddings) {
|
|
273
278
|
if (stats.nodes <= EMBEDDING_NODE_LIMIT) {
|
|
@@ -334,6 +339,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
334
339
|
nodes: stats.nodes,
|
|
335
340
|
edges: stats.edges,
|
|
336
341
|
communities: pipelineResult.communityResult?.stats.totalCommunities,
|
|
342
|
+
featureClusters: pipelineResult.featureClusterResult?.stats.totalClusters,
|
|
337
343
|
processes: pipelineResult.processResult?.stats.totalProcesses,
|
|
338
344
|
embeddings: embeddingCount,
|
|
339
345
|
},
|
|
@@ -372,7 +378,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
372
378
|
nodes: stats.nodes,
|
|
373
379
|
edges: stats.edges,
|
|
374
380
|
communities: pipelineResult.communityResult?.stats.totalCommunities,
|
|
375
|
-
clusters: aggregatedClusterCount,
|
|
381
|
+
clusters: pipelineResult.featureClusterResult?.stats.totalClusters ?? aggregatedClusterCount,
|
|
376
382
|
processes: pipelineResult.processResult?.stats.totalProcesses,
|
|
377
383
|
}, undefined, { skipAgentsMd: options.skipAgentsMd, noStats: options.noStats });
|
|
378
384
|
}
|
|
@@ -380,7 +386,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
380
386
|
// Best-effort — don't fail the entire analysis for context file issues
|
|
381
387
|
}
|
|
382
388
|
// ── Close LadybugDB ──────────────────────────────────────────────
|
|
383
|
-
await
|
|
389
|
+
await closeCgdb();
|
|
384
390
|
progress('done', 100, 'Done');
|
|
385
391
|
return {
|
|
386
392
|
repoName: projectName,
|
|
@@ -392,7 +398,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
392
398
|
catch (err) {
|
|
393
399
|
// Ensure LadybugDB is closed even on error
|
|
394
400
|
try {
|
|
395
|
-
await
|
|
401
|
+
await closeCgdb();
|
|
396
402
|
}
|
|
397
403
|
catch {
|
|
398
404
|
/* swallow */
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
* Always reads from the database (no cached state to drift).
|
|
6
6
|
*
|
|
7
7
|
* FTS indexes are created lazily on first query (via `ensureFTSIndex`) — see
|
|
8
|
-
* `
|
|
8
|
+
* `cgdb-adapter.ts` for the rationale. This keeps `analyze` fast (the
|
|
9
9
|
* ~440 ms × 5 LadybugDB CREATE_FTS_INDEX cost dominates pipeline time on
|
|
10
10
|
* small repos / CI runners) at the cost of paying that overhead on the
|
|
11
11
|
* first `query`/`context` call in a session.
|
|
@@ -20,7 +20,7 @@ export interface BM25SearchResult {
|
|
|
20
20
|
* Drop all ensured-FTS cache entries for a given repoId.
|
|
21
21
|
*
|
|
22
22
|
* Called from the pool-close listener so that a pool teardown / recreation
|
|
23
|
-
* forces the next `
|
|
23
|
+
* forces the next `searchFTSFromCgdb` call to re-issue `CREATE_FTS_INDEX`
|
|
24
24
|
* against the fresh connection rather than trust stale ensure-state from a
|
|
25
25
|
* previous pool lifetime.
|
|
26
26
|
*
|
|
@@ -38,4 +38,4 @@ export declare function invalidateEnsuredFTSForRepo(repoId: string): void;
|
|
|
38
38
|
* @param repoId - If provided, queries will be routed via the MCP connection pool
|
|
39
39
|
* @returns Ranked search results from FTS indexes
|
|
40
40
|
*/
|
|
41
|
-
export declare const
|
|
41
|
+
export declare const searchFTSFromCgdb: (query: string, limit?: number, repoId?: string) => Promise<BM25SearchResult[]>;
|
|
@@ -5,14 +5,14 @@
|
|
|
5
5
|
* Always reads from the database (no cached state to drift).
|
|
6
6
|
*
|
|
7
7
|
* FTS indexes are created lazily on first query (via `ensureFTSIndex`) — see
|
|
8
|
-
* `
|
|
8
|
+
* `cgdb-adapter.ts` for the rationale. This keeps `analyze` fast (the
|
|
9
9
|
* ~440 ms × 5 LadybugDB CREATE_FTS_INDEX cost dominates pipeline time on
|
|
10
10
|
* small repos / CI runners) at the cost of paying that overhead on the
|
|
11
11
|
* first `query`/`context` call in a session.
|
|
12
12
|
*/
|
|
13
|
-
import { queryFTS, ensureFTSIndex, executeQuery as executeCoreQuery, } from '../
|
|
13
|
+
import { queryFTS, ensureFTSIndex, executeQuery as executeCoreQuery, } from '../cgdb/cgdb-adapter.js';
|
|
14
14
|
/**
|
|
15
|
-
* FTS table set served by `
|
|
15
|
+
* FTS table set served by `searchFTSFromCgdb`. Centralised so that both
|
|
16
16
|
* the CLI/pipeline path and the MCP pool path stay in lockstep.
|
|
17
17
|
*
|
|
18
18
|
* The properties list is computed at FTS-create time via `ftsPropertiesFor`
|
|
@@ -72,7 +72,7 @@ const FALLBACK_FIELD_WEIGHTS = {
|
|
|
72
72
|
/**
|
|
73
73
|
* Per-process cache for the MCP pool path: tracks which `(repoId, table)`
|
|
74
74
|
* pairs have been ensured. The CLI/pipeline path gets its own cache inside
|
|
75
|
-
* `
|
|
75
|
+
* `cgdb-adapter.ts` keyed by table/index, scoped to the singleton connection.
|
|
76
76
|
*
|
|
77
77
|
* IMPORTANT: an entry is added ONLY when the index was confirmed to exist
|
|
78
78
|
* (CREATE_FTS_INDEX succeeded, or failed with `'already exists'`). Other
|
|
@@ -80,14 +80,14 @@ const FALLBACK_FIELD_WEIGHTS = {
|
|
|
80
80
|
* unset so the next query retries instead of silently caching the failure.
|
|
81
81
|
*
|
|
82
82
|
* Entries for a given repoId are invalidated when its pool is closed —
|
|
83
|
-
* see the `addPoolCloseListener` registration in `
|
|
83
|
+
* see the `addPoolCloseListener` registration in `searchFTSFromCgdb`.
|
|
84
84
|
*/
|
|
85
85
|
const ensuredPoolFTS = new Set();
|
|
86
86
|
/**
|
|
87
87
|
* Drop all ensured-FTS cache entries for a given repoId.
|
|
88
88
|
*
|
|
89
89
|
* Called from the pool-close listener so that a pool teardown / recreation
|
|
90
|
-
* forces the next `
|
|
90
|
+
* forces the next `searchFTSFromCgdb` call to re-issue `CREATE_FTS_INDEX`
|
|
91
91
|
* against the fresh connection rather than trust stale ensure-state from a
|
|
92
92
|
* previous pool lifetime.
|
|
93
93
|
*
|
|
@@ -245,7 +245,7 @@ properties = ['name', 'content']) {
|
|
|
245
245
|
* @param repoId - If provided, queries will be routed via the MCP connection pool
|
|
246
246
|
* @returns Ranked search results from FTS indexes
|
|
247
247
|
*/
|
|
248
|
-
export const
|
|
248
|
+
export const searchFTSFromCgdb = async (query, limit = 20, repoId) => {
|
|
249
249
|
if (!query.trim() || limit <= 0)
|
|
250
250
|
return [];
|
|
251
251
|
let fileResults, functionResults, classResults, methodResults, interfaceResults;
|
|
@@ -253,7 +253,7 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
|
|
|
253
253
|
// Use MCP connection pool via dynamic import
|
|
254
254
|
// IMPORTANT: FTS queries run sequentially to avoid connection contention.
|
|
255
255
|
// The MCP pool supports multiple connections, but FTS is best run serially.
|
|
256
|
-
const poolMod = await import('../
|
|
256
|
+
const poolMod = await import('../cgdb/pool-adapter.js');
|
|
257
257
|
const { executeQuery, addPoolCloseListener } = poolMod;
|
|
258
258
|
// Register the pool-close listener lazily on first use so a teardown of
|
|
259
259
|
// the pool entry (LRU eviction, idle timeout, explicit close) drops the
|
|
@@ -287,7 +287,7 @@ export const searchFTSFromLbug = async (query, limit = 20, repoId) => {
|
|
|
287
287
|
}
|
|
288
288
|
}
|
|
289
289
|
else {
|
|
290
|
-
// Use core
|
|
290
|
+
// Use core cgdb adapter (CLI / pipeline context) — also sequential for safety.
|
|
291
291
|
// Lazy-create FTS indexes on first query (analyze no longer does it).
|
|
292
292
|
// RFC 0001 Phase 2.5 — same `compress`-aware property selection as the MCP
|
|
293
293
|
// path; the CLI walks up from cwd to find the repo's meta.json.
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
* This is the same approach used by Elasticsearch, Pinecone, and other
|
|
8
8
|
* production search systems.
|
|
9
9
|
*/
|
|
10
|
-
import {
|
|
10
|
+
import { searchFTSFromCgdb } from './bm25-index.js';
|
|
11
11
|
/**
|
|
12
12
|
* RRF constant - standard value used in the literature
|
|
13
13
|
* Higher values give more weight to lower-ranked results
|
|
@@ -112,7 +112,7 @@ export const formatHybridResults = (results) => {
|
|
|
112
112
|
*/
|
|
113
113
|
export const hybridSearch = async (query, limit, executeQuery, semanticSearch) => {
|
|
114
114
|
// Use LadybugDB FTS for always-fresh BM25 results
|
|
115
|
-
const bm25Results = await
|
|
115
|
+
const bm25Results = await searchFTSFromCgdb(query, limit);
|
|
116
116
|
const semanticResults = await semanticSearch(executeQuery, query, limit);
|
|
117
117
|
return mergeWithRRF(bm25Results, semanticResults, limit);
|
|
118
118
|
};
|
|
@@ -41,14 +41,14 @@ export declare class WikiGenerator {
|
|
|
41
41
|
private repoPath;
|
|
42
42
|
private storagePath;
|
|
43
43
|
private wikiDir;
|
|
44
|
-
private
|
|
44
|
+
private cgdbPath;
|
|
45
45
|
private llmConfig;
|
|
46
46
|
private maxTokensPerModule;
|
|
47
47
|
private concurrency;
|
|
48
48
|
private options;
|
|
49
49
|
private onProgress;
|
|
50
50
|
private failedModules;
|
|
51
|
-
constructor(repoPath: string, storagePath: string,
|
|
51
|
+
constructor(repoPath: string, storagePath: string, cgdbPath: string, llmConfig: LLMConfig, options?: WikiOptions, onProgress?: ProgressCallback);
|
|
52
52
|
private lastPercent;
|
|
53
53
|
/**
|
|
54
54
|
* Create streaming options that report LLM progress to the progress bar.
|
|
@@ -26,18 +26,18 @@ export class WikiGenerator {
|
|
|
26
26
|
repoPath;
|
|
27
27
|
storagePath;
|
|
28
28
|
wikiDir;
|
|
29
|
-
|
|
29
|
+
cgdbPath;
|
|
30
30
|
llmConfig;
|
|
31
31
|
maxTokensPerModule;
|
|
32
32
|
concurrency;
|
|
33
33
|
options;
|
|
34
34
|
onProgress;
|
|
35
35
|
failedModules = [];
|
|
36
|
-
constructor(repoPath, storagePath,
|
|
36
|
+
constructor(repoPath, storagePath, cgdbPath, llmConfig, options = {}, onProgress) {
|
|
37
37
|
this.repoPath = repoPath;
|
|
38
38
|
this.storagePath = storagePath;
|
|
39
39
|
this.wikiDir = path.join(storagePath, WIKI_DIR);
|
|
40
|
-
this.
|
|
40
|
+
this.cgdbPath = cgdbPath;
|
|
41
41
|
this.options = options;
|
|
42
42
|
this.llmConfig = llmConfig;
|
|
43
43
|
this.maxTokensPerModule = options.maxTokensPerModule ?? DEFAULT_MAX_TOKENS_PER_MODULE;
|
|
@@ -134,7 +134,7 @@ export class WikiGenerator {
|
|
|
134
134
|
}
|
|
135
135
|
// Init graph
|
|
136
136
|
this.onProgress('init', 2, 'Connecting to knowledge graph...');
|
|
137
|
-
await initWikiDb(this.
|
|
137
|
+
await initWikiDb(this.cgdbPath);
|
|
138
138
|
let result;
|
|
139
139
|
try {
|
|
140
140
|
if (!forceMode && existingMeta && existingMeta.fromCommit) {
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* Graph Queries for Wiki Generation
|
|
3
3
|
*
|
|
4
4
|
* Encapsulated Cypher queries against the CodraGraph knowledge graph.
|
|
5
|
-
* Uses the MCP-style pooled
|
|
5
|
+
* Uses the MCP-style pooled cgdb-adapter for connection management.
|
|
6
6
|
*/
|
|
7
7
|
/**
|
|
8
8
|
* Touch the wiki DB connection to prevent idle timeout during long LLM calls.
|
|
@@ -36,7 +36,7 @@ export interface ProcessInfo {
|
|
|
36
36
|
/**
|
|
37
37
|
* Initialize the LadybugDB connection for wiki generation.
|
|
38
38
|
*/
|
|
39
|
-
export declare function initWikiDb(
|
|
39
|
+
export declare function initWikiDb(cgdbPath: string): Promise<void>;
|
|
40
40
|
/**
|
|
41
41
|
* Close the LadybugDB connection.
|
|
42
42
|
*/
|
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
* Graph Queries for Wiki Generation
|
|
3
3
|
*
|
|
4
4
|
* Encapsulated Cypher queries against the CodraGraph knowledge graph.
|
|
5
|
-
* Uses the MCP-style pooled
|
|
5
|
+
* Uses the MCP-style pooled cgdb-adapter for connection management.
|
|
6
6
|
*/
|
|
7
|
-
import {
|
|
7
|
+
import { initCgdb, executeQuery, closeCgdb, touchRepo } from '../cgdb/pool-adapter.js';
|
|
8
8
|
const REPO_ID = '__wiki__';
|
|
9
9
|
/**
|
|
10
10
|
* Touch the wiki DB connection to prevent idle timeout during long LLM calls.
|
|
@@ -15,14 +15,14 @@ export function touchWikiDb() {
|
|
|
15
15
|
/**
|
|
16
16
|
* Initialize the LadybugDB connection for wiki generation.
|
|
17
17
|
*/
|
|
18
|
-
export async function initWikiDb(
|
|
19
|
-
await
|
|
18
|
+
export async function initWikiDb(cgdbPath) {
|
|
19
|
+
await initCgdb(REPO_ID, cgdbPath);
|
|
20
20
|
}
|
|
21
21
|
/**
|
|
22
22
|
* Close the LadybugDB connection.
|
|
23
23
|
*/
|
|
24
24
|
export async function closeWikiDb() {
|
|
25
|
-
await
|
|
25
|
+
await closeCgdb(REPO_ID);
|
|
26
26
|
}
|
|
27
27
|
/**
|
|
28
28
|
* Get all source files with their exported symbol names and types.
|