@codragraph/cli 2.1.0 → 2.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/README.md +62 -21
  2. package/dist/_shared/cgdb/schema-constants.d.ts +2 -2
  3. package/dist/_shared/cgdb/schema-constants.d.ts.map +1 -1
  4. package/dist/_shared/cgdb/schema-constants.js +3 -0
  5. package/dist/_shared/cgdb/schema-constants.js.map +1 -1
  6. package/dist/_shared/feature-clusters.d.ts +99 -0
  7. package/dist/_shared/feature-clusters.d.ts.map +1 -0
  8. package/dist/_shared/feature-clusters.js +2 -0
  9. package/dist/_shared/feature-clusters.js.map +1 -0
  10. package/dist/_shared/graph/types.d.ts +16 -2
  11. package/dist/_shared/graph/types.d.ts.map +1 -1
  12. package/dist/_shared/index.d.ts +1 -0
  13. package/dist/_shared/index.d.ts.map +1 -1
  14. package/dist/_shared/index.js.map +1 -1
  15. package/dist/_shared/pipeline.d.ts +1 -1
  16. package/dist/_shared/pipeline.d.ts.map +1 -1
  17. package/dist/cli/ai-context.js +4 -0
  18. package/dist/cli/analyze.js +46 -26
  19. package/dist/cli/index.js +39 -1
  20. package/dist/cli/serve.d.ts +1 -0
  21. package/dist/cli/serve.js +3 -1
  22. package/dist/cli/setup.js +42 -21
  23. package/dist/cli/status.d.ts +13 -0
  24. package/dist/cli/status.js +99 -0
  25. package/dist/cli/tool.d.ts +25 -0
  26. package/dist/cli/tool.js +74 -0
  27. package/dist/config/ignore-service.js +2 -0
  28. package/dist/config/supported-languages.d.ts +3 -3
  29. package/dist/config/supported-languages.js +3 -3
  30. package/dist/core/cgdb/cgdb-adapter.js +19 -3
  31. package/dist/core/cgdb/csv-generator.js +33 -2
  32. package/dist/core/cgdb/schema.d.ts +2 -1
  33. package/dist/core/cgdb/schema.js +55 -0
  34. package/dist/core/embeddings/embedder.js +4 -2
  35. package/dist/core/graphstore/cgdb-row-source.js +3 -2
  36. package/dist/core/graphstore/index.d.ts +1 -1
  37. package/dist/core/graphstore/index.js +1 -1
  38. package/dist/core/group/bridge-db.js +42 -10
  39. package/dist/core/group/service.d.ts +16 -0
  40. package/dist/core/group/service.js +360 -0
  41. package/dist/core/ingestion/emit-references.d.ts +1 -1
  42. package/dist/core/ingestion/emit-references.js +1 -1
  43. package/dist/core/ingestion/feature-cluster-processor.d.ts +62 -0
  44. package/dist/core/ingestion/feature-cluster-processor.js +626 -0
  45. package/dist/core/ingestion/finalize-orchestrator.js +1 -1
  46. package/dist/core/ingestion/model/registration-table.js +1 -0
  47. package/dist/core/ingestion/model/resolve.d.ts +2 -2
  48. package/dist/core/ingestion/model/resolve.js +3 -3
  49. package/dist/core/ingestion/model/semantic-model.d.ts +1 -1
  50. package/dist/core/ingestion/model/semantic-model.js +1 -1
  51. package/dist/core/ingestion/model/symbol-table.d.ts +1 -1
  52. package/dist/core/ingestion/model/symbol-table.js +1 -1
  53. package/dist/core/ingestion/pipeline-phases/feature-clusters.d.ts +17 -0
  54. package/dist/core/ingestion/pipeline-phases/feature-clusters.js +88 -0
  55. package/dist/core/ingestion/pipeline-phases/index.d.ts +1 -0
  56. package/dist/core/ingestion/pipeline-phases/index.js +1 -0
  57. package/dist/core/ingestion/pipeline.d.ts +4 -0
  58. package/dist/core/ingestion/pipeline.js +9 -5
  59. package/dist/core/run-analyze.d.ts +21 -0
  60. package/dist/core/run-analyze.js +213 -6
  61. package/dist/core/search/hybrid-search.js +11 -3
  62. package/dist/mcp/core/embedder.js +5 -2
  63. package/dist/mcp/local/local-backend.d.ts +12 -0
  64. package/dist/mcp/local/local-backend.js +381 -3
  65. package/dist/mcp/resources.js +139 -0
  66. package/dist/mcp/tools.js +174 -2
  67. package/dist/server/api.d.ts +14 -2
  68. package/dist/server/api.js +206 -7
  69. package/dist/server/mcp-http.d.ts +22 -0
  70. package/dist/server/mcp-http.js +21 -2
  71. package/dist/server/web-dashboard.d.ts +28 -0
  72. package/dist/server/web-dashboard.js +61 -0
  73. package/dist/storage/repo-manager.d.ts +6 -1
  74. package/dist/storage/repo-manager.js +5 -1
  75. package/dist/types/pipeline.d.ts +2 -0
  76. package/dist/web/assets/agent-D5lb0zXz.js +1089 -0
  77. package/dist/web/assets/architectureDiagram-EMZXCZ2Q-CZtc99v_.js +36 -0
  78. package/dist/web/assets/blockDiagram-IGV67L2C-BtoUp-6Y.js +132 -0
  79. package/dist/web/assets/c4Diagram-DFAF54RM-C4Hl3J2U.js +10 -0
  80. package/dist/web/assets/chunk-3GS5O3IE-DkUjU0WD.js +231 -0
  81. package/dist/web/assets/chunk-3YCYZ6SJ-CQkVgT_z.js +1 -0
  82. package/dist/web/assets/chunk-7RZVMHOQ-BitYcNVR.js +338 -0
  83. package/dist/web/assets/chunk-AEOMTBSW-BgTIXPsY.js +1 -0
  84. package/dist/web/assets/chunk-H3VCZNTA-Cx5XV_aC.js +13 -0
  85. package/dist/web/assets/chunk-HN6EAY2L-BBnyTNdB.js +1 -0
  86. package/dist/web/assets/chunk-KSICW3F5-BYzvDLNI.js +15 -0
  87. package/dist/web/assets/chunk-O5ABG6QK-dHwHzA6n.js +1 -0
  88. package/dist/web/assets/chunk-PK6DOVAG-CvsEnugt.js +206 -0
  89. package/dist/web/assets/chunk-RWUO3TPN-BgRTY0_k.js +1 -0
  90. package/dist/web/assets/chunk-TBF5ZNIQ-DL5stGM1.js +1 -0
  91. package/dist/web/assets/chunk-TU3PZOEN-RLyvLcv-.js +1 -0
  92. package/dist/web/assets/classDiagram-PPOCWD7C-DTr8QIOf.js +1 -0
  93. package/dist/web/assets/classDiagram-v2-23LJLIIU-DTr8QIOf.js +1 -0
  94. package/dist/web/assets/context-builder-22jU3V56.js +16 -0
  95. package/dist/web/assets/cose-bilkent-PNC4W37J-DVhePRYg.js +1 -0
  96. package/dist/web/assets/dagre-E77IOHMT-Dzx0A6ZU.js +4 -0
  97. package/dist/web/assets/diagram-H7BISOXX-CC9pRew1.js +43 -0
  98. package/dist/web/assets/diagram-JC5VWROH-Bau_i9tf.js +24 -0
  99. package/dist/web/assets/diagram-LXUTUG65-D9_FM2Gt.js +10 -0
  100. package/dist/web/assets/diagram-WEHSV5V5-BMlayouL.js +24 -0
  101. package/dist/web/assets/erDiagram-GCSMX5X6-C3dhDFA8.js +85 -0
  102. package/dist/web/assets/flowDiagram-OTCZ4VVT-CWSFWmhr.js +162 -0
  103. package/dist/web/assets/ganttDiagram-MUNLMDZQ-D3a67Yol.js +292 -0
  104. package/dist/web/assets/gitGraphDiagram-3HKGZ4G3-7jmry-vM.js +106 -0
  105. package/dist/web/assets/index-BgeqpYgd.js +1415 -0
  106. package/dist/web/assets/index-CT0GtFLZ.css +1 -0
  107. package/dist/web/assets/infoDiagram-MN7RKWGX-G7lhP0Ib.js +2 -0
  108. package/dist/web/assets/ishikawaDiagram-YMYX4NHK-DUoJvNP2.js +70 -0
  109. package/dist/web/assets/journeyDiagram-SO5T7YLQ-RMFPNNqz.js +139 -0
  110. package/dist/web/assets/kanban-definition-LJHFXRCJ-BzpDs1K9.js +89 -0
  111. package/dist/web/assets/katex-GD7MH7QM-DBQvrix-.js +261 -0
  112. package/dist/web/assets/mindmap-definition-2EUWGEK5-Bk0O4roa.js +96 -0
  113. package/dist/web/assets/pieDiagram-3IATQBI2-DKU7kpgS.js +30 -0
  114. package/dist/web/assets/quadrantDiagram-E256RVCF-BY0TGWCS.js +7 -0
  115. package/dist/web/assets/requirementDiagram-M5DCFWZL-DLHOVTSv.js +84 -0
  116. package/dist/web/assets/sankeyDiagram-L3NBLAOT-DVMj5rX2.js +10 -0
  117. package/dist/web/assets/sequenceDiagram-ZOUHS735-CJC73bV-.js +157 -0
  118. package/dist/web/assets/stateDiagram-MLPALWAM-BCFyESls.js +1 -0
  119. package/dist/web/assets/stateDiagram-v2-B5LQ5ZB2-DahzzIca.js +1 -0
  120. package/dist/web/assets/timeline-definition-5SPVSISX-TRSDRgPw.js +120 -0
  121. package/dist/web/assets/vennDiagram-IE5QUKF5-DNy7HRBM.js +34 -0
  122. package/dist/web/assets/wardley-RL74JXVD-BCRCBASE-B-eZEzf9.js +161 -0
  123. package/dist/web/assets/wardleyDiagram-XU3VSMPF-BP-r1xzR.js +20 -0
  124. package/dist/web/assets/xychartDiagram-ZHJ5623Y-Dr9r7a35.js +7 -0
  125. package/dist/web/codragraph-logo-512.png +0 -0
  126. package/dist/web/codragraph-logo.png +0 -0
  127. package/dist/web/favicon.png +0 -0
  128. package/dist/web/index.html +36 -0
  129. package/hooks/claude/codragraph-hook.cjs +24 -9
  130. package/hooks/claude/pre-tool-use.sh +6 -1
  131. package/package.json +15 -4
  132. package/scripts/build.js +75 -16
  133. package/scripts/patch-tree-sitter-swift.cjs +0 -1
  134. package/skills/codragraph-cli.md +17 -1
  135. package/skills/codragraph-guide.md +6 -2
  136. package/skills/codragraph-onboarding.md +2 -2
  137. package/vendor/leiden/index.cjs +272 -285
  138. package/vendor/leiden/utils.cjs +264 -274
  139. package/dist/_shared/lbug/schema-constants.d.ts +0 -16
  140. package/dist/_shared/lbug/schema-constants.d.ts.map +0 -1
  141. package/dist/_shared/lbug/schema-constants.js +0 -67
  142. package/dist/_shared/lbug/schema-constants.js.map +0 -1
  143. package/dist/core/graphstore/lbug-row-source.d.ts +0 -19
  144. package/dist/core/graphstore/lbug-row-source.js +0 -141
  145. package/dist/core/lbug/content-read.d.ts +0 -46
  146. package/dist/core/lbug/content-read.js +0 -64
  147. package/dist/core/lbug/csv-generator.d.ts +0 -29
  148. package/dist/core/lbug/csv-generator.js +0 -492
  149. package/dist/core/lbug/lbug-adapter.d.ts +0 -176
  150. package/dist/core/lbug/lbug-adapter.js +0 -1320
  151. package/dist/core/lbug/pool-adapter.d.ts +0 -93
  152. package/dist/core/lbug/pool-adapter.js +0 -550
  153. package/dist/core/lbug/schema.d.ts +0 -62
  154. package/dist/core/lbug/schema.js +0 -502
  155. package/dist/mcp/core/lbug-adapter.d.ts +0 -5
  156. package/dist/mcp/core/lbug-adapter.js +0 -5
@@ -9,7 +9,7 @@
9
9
  *
10
10
  * ## Dependency direction
11
11
  *
12
- * codragraph-shared (NodeLabel) — leaf
12
+ * @codragraph/shared (NodeLabel) — leaf
13
13
  * ↑
14
14
  * symbol-table.ts — pure file/callable index
15
15
  * ↑
@@ -17,7 +17,7 @@
17
17
  *
18
18
  * Dependency direction (strictly enforced):
19
19
  *
20
- * codragraph-shared (NodeLabel) — leaf type
20
+ * @codragraph/shared (NodeLabel) — leaf type
21
21
  * ↑
22
22
  * symbol-table.ts — THIS FILE (pure storage)
23
23
  * ↑
@@ -17,7 +17,7 @@
17
17
  *
18
18
  * Dependency direction (strictly enforced):
19
19
  *
20
- * codragraph-shared (NodeLabel) — leaf type
20
+ * @codragraph/shared (NodeLabel) — leaf type
21
21
  * ↑
22
22
  * symbol-table.ts — THIS FILE (pure storage)
23
23
  * ↑
@@ -0,0 +1,17 @@
1
+ /**
2
+ * Phase: featureClusters
3
+ *
4
+ * Creates human-facing FeatureCluster nodes above algorithmic Community nodes.
5
+ * This is the layer agents query for product/domain areas such as Settings,
6
+ * AI, Auth, MCP, or Ingestion before drilling into exact symbols.
7
+ *
8
+ * @deps processes, structure
9
+ * @reads graph (all nodes and relationships)
10
+ * @writes graph (FeatureCluster nodes, FEATURE_MEMBER_OF, FEATURE_DEPENDS_ON)
11
+ */
12
+ import type { PipelinePhase } from './types.js';
13
+ import { type FeatureClusterDetectionResult } from '../feature-cluster-processor.js';
14
+ export interface FeatureClustersOutput {
15
+ featureClusterResult: FeatureClusterDetectionResult;
16
+ }
17
+ export declare const featureClustersPhase: PipelinePhase<FeatureClustersOutput>;
@@ -0,0 +1,88 @@
1
+ /**
2
+ * Phase: featureClusters
3
+ *
4
+ * Creates human-facing FeatureCluster nodes above algorithmic Community nodes.
5
+ * This is the layer agents query for product/domain areas such as Settings,
6
+ * AI, Auth, MCP, or Ingestion before drilling into exact symbols.
7
+ *
8
+ * @deps processes, structure
9
+ * @reads graph (all nodes and relationships)
10
+ * @writes graph (FeatureCluster nodes, FEATURE_MEMBER_OF, FEATURE_DEPENDS_ON)
11
+ */
12
+ import { getPhaseOutput } from './types.js';
13
+ import { processFeatureClusters, } from '../feature-cluster-processor.js';
14
+ import { generateId } from '../../../lib/utils.js';
15
+ import { isDev } from '../utils/env.js';
16
+ export const featureClustersPhase = {
17
+ name: 'featureClusters',
18
+ deps: ['processes', 'structure'],
19
+ async execute(ctx, deps) {
20
+ const { totalFiles } = getPhaseOutput(deps, 'structure');
21
+ ctx.onProgress({
22
+ phase: 'feature_clusters',
23
+ percent: 99,
24
+ message: 'Building feature clusters...',
25
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: ctx.graph.nodeCount },
26
+ });
27
+ const featureClusterResult = await processFeatureClusters(ctx.graph, (message, progress) => {
28
+ ctx.onProgress({
29
+ phase: 'feature_clusters',
30
+ percent: Math.round(99 + progress * 0.009),
31
+ message,
32
+ stats: { filesProcessed: totalFiles, totalFiles, nodesCreated: ctx.graph.nodeCount },
33
+ });
34
+ }, {
35
+ repo: ctx.options?.featureClusterRepo,
36
+ lastIndexedCommit: ctx.options?.lastIndexedCommit,
37
+ });
38
+ if (isDev) {
39
+ console.log(`Feature clustering: ${featureClusterResult.stats.totalClusters} clusters, ${featureClusterResult.stats.totalMemberships} memberships`);
40
+ }
41
+ featureClusterResult.clusters.forEach((cluster) => {
42
+ ctx.graph.addNode({
43
+ id: cluster.id,
44
+ label: 'FeatureCluster',
45
+ properties: {
46
+ name: cluster.name,
47
+ filePath: '',
48
+ slug: cluster.slug,
49
+ featureKind: cluster.featureKind,
50
+ summary: cluster.summary,
51
+ description: cluster.description,
52
+ repo: cluster.repo,
53
+ service: cluster.service,
54
+ signals: cluster.signals,
55
+ memberCount: cluster.memberCount,
56
+ entryPointIds: cluster.entryPointIds,
57
+ routes: cluster.routes,
58
+ tools: cluster.tools,
59
+ testCoverageHints: cluster.testCoverageHints,
60
+ lastIndexedCommit: cluster.lastIndexedCommit,
61
+ confidence: cluster.confidence,
62
+ source: 'heuristic',
63
+ },
64
+ });
65
+ });
66
+ featureClusterResult.memberships.forEach((membership) => {
67
+ ctx.graph.addRelationship({
68
+ id: generateId('FEATURE_MEMBER_OF', `${membership.nodeId}->${membership.clusterId}`),
69
+ sourceId: membership.nodeId,
70
+ targetId: membership.clusterId,
71
+ type: 'FEATURE_MEMBER_OF',
72
+ confidence: membership.confidence,
73
+ reason: membership.signals.join('|'),
74
+ });
75
+ });
76
+ featureClusterResult.dependencies.forEach((dependency) => {
77
+ ctx.graph.addRelationship({
78
+ id: generateId('FEATURE_DEPENDS_ON', `${dependency.sourceClusterId}->${dependency.targetClusterId}`),
79
+ sourceId: dependency.sourceClusterId,
80
+ targetId: dependency.targetClusterId,
81
+ type: 'FEATURE_DEPENDS_ON',
82
+ confidence: dependency.confidence,
83
+ reason: `member-dependency|edges:${dependency.edgeCount}|types:${dependency.relationshipTypes.join(',')}`,
84
+ });
85
+ });
86
+ return { featureClusterResult };
87
+ },
88
+ };
@@ -17,6 +17,7 @@ export { scopeResolutionPhase, type ScopeResolutionOutput, } from '../scope-reso
17
17
  export { mroPhase, type MROOutput } from './mro.js';
18
18
  export { communitiesPhase, type CommunitiesOutput } from './communities.js';
19
19
  export { processesPhase, type ProcessesOutput } from './processes.js';
20
+ export { featureClustersPhase, type FeatureClustersOutput } from './feature-clusters.js';
20
21
  export { runPipeline } from './runner.js';
21
22
  export type { PipelinePhase, PipelineContext, PhaseResult } from './types.js';
22
23
  export { getPhaseOutput } from './types.js';
@@ -18,6 +18,7 @@ export { scopeResolutionPhase, } from '../scope-resolution/pipeline/phase.js';
18
18
  export { mroPhase } from './mro.js';
19
19
  export { communitiesPhase } from './communities.js';
20
20
  export { processesPhase } from './processes.js';
21
+ export { featureClustersPhase } from './feature-clusters.js';
21
22
  // ── Infrastructure ─────────────────────────────────────────────────────────
22
23
  export { runPipeline } from './runner.js';
23
24
  export { getPhaseOutput } from './types.js';
@@ -21,6 +21,10 @@ export interface PipelineOptions {
21
21
  skipGraphPhases?: boolean;
22
22
  /** Force sequential parsing (no worker pool). Useful for testing the sequential path. */
23
23
  skipWorkers?: boolean;
24
+ /** Repo label written onto FeatureCluster metadata. */
25
+ featureClusterRepo?: string;
26
+ /** Indexed source commit written onto FeatureCluster metadata. */
27
+ lastIndexedCommit?: string;
24
28
  /**
25
29
  * @internal Test-only override for worker-pool gating thresholds.
26
30
  * When unset, production defaults apply (15 files OR 512 KB total bytes).
@@ -15,15 +15,16 @@
15
15
  * See ARCHITECTURE.md for the full phase dependency diagram.
16
16
  */
17
17
  import { createKnowledgeGraph } from '../graph/graph.js';
18
- import { runPipeline, getPhaseOutput, scanPhase, structurePhase, markdownPhase, cobolPhase, parsePhase, routesPhase, toolsPhase, ormPhase, crossFilePhase, scopeResolutionPhase, mroPhase, communitiesPhase, processesPhase, } from './pipeline-phases/index.js';
18
+ import { runPipeline, getPhaseOutput, scanPhase, structurePhase, markdownPhase, cobolPhase, parsePhase, routesPhase, toolsPhase, ormPhase, crossFilePhase, scopeResolutionPhase, mroPhase, communitiesPhase, processesPhase, featureClustersPhase, } from './pipeline-phases/index.js';
19
19
  // ── Phase registry ─────────────────────────────────────────────────────────
20
20
  /**
21
21
  * All pipeline phases with their dependency relationships.
22
22
  *
23
23
  * Phase dependency graph:
24
24
  *
25
- * scan structure [markdown, cobol] parse [routes, tools, orm]
26
- * crossFile mro communities processes
25
+ * scan -> structure -> [markdown, cobol] -> parse -> [routes, tools, orm]
26
+ * -> crossFile -> scopeResolution -> mro -> communities -> processes
27
+ * -> featureClusters
27
28
  *
28
29
  * To add a new phase: create a file in pipeline-phases/, export the phase
29
30
  * object, and add it to the appropriate position in this array.
@@ -42,7 +43,7 @@ function buildPhaseList(options) {
42
43
  scopeResolutionPhase,
43
44
  ];
44
45
  if (!options?.skipGraphPhases) {
45
- phases.push(mroPhase, communitiesPhase, processesPhase);
46
+ phases.push(mroPhase, communitiesPhase, processesPhase, featureClustersPhase);
46
47
  }
47
48
  return phases;
48
49
  }
@@ -62,15 +63,17 @@ export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
62
63
  const { totalFiles, usedWorkerPool } = getPhaseOutput(results, 'parse');
63
64
  let communityResult;
64
65
  let processResult;
66
+ let featureClusterResult;
65
67
  if (!options?.skipGraphPhases) {
66
68
  communityResult = getPhaseOutput(results, 'communities').communityResult;
67
69
  processResult = getPhaseOutput(results, 'processes').processResult;
70
+ featureClusterResult = getPhaseOutput(results, 'featureClusters').featureClusterResult;
68
71
  }
69
72
  onProgress({
70
73
  phase: 'complete',
71
74
  percent: 100,
72
75
  message: communityResult && processResult
73
- ? `Graph complete! ${communityResult.stats.totalCommunities} communities, ${processResult.stats.totalProcesses} processes detected.`
76
+ ? `Graph complete! ${communityResult.stats.totalCommunities} communities, ${processResult.stats.totalProcesses} processes, ${featureClusterResult?.stats.totalClusters ?? 0} feature clusters detected.`
74
77
  : 'Graph complete! (graph phases skipped)',
75
78
  stats: {
76
79
  filesProcessed: totalFiles,
@@ -84,6 +87,7 @@ export const runPipelineFromRepo = async (repoPath, onProgress, options) => {
84
87
  totalFileCount: totalFiles,
85
88
  communityResult,
86
89
  processResult,
90
+ featureClusterResult,
87
91
  usedWorkerPool,
88
92
  };
89
93
  };
@@ -8,6 +8,7 @@
8
8
  * IMPORTANT: This module must NEVER call process.exit(). The caller (CLI
9
9
  * wrapper or server worker) is responsible for process lifecycle.
10
10
  */
11
+ import { type RepoMeta } from '../storage/repo-manager.js';
11
12
  import type { ContentEncoding } from '@codragraph/graphstore';
12
13
  export interface AnalyzeCallbacks {
13
14
  onProgress: (phase: string, percent: number, message: string) => void;
@@ -63,14 +64,34 @@ export interface AnalyzeResult {
63
64
  nodes?: number;
64
65
  edges?: number;
65
66
  communities?: number;
67
+ featureClusters?: number;
66
68
  processes?: number;
67
69
  embeddings?: number;
68
70
  };
69
71
  alreadyUpToDate?: boolean;
72
+ /** User-facing explanation for a reused index fast path. */
73
+ reuseReason?: string;
74
+ /** True when the git commit advanced but indexed inputs did not. */
75
+ reusedExistingIndex?: boolean;
70
76
  /** The raw pipeline result — only populated when needed by callers (e.g. skill generation). */
71
77
  pipelineResult?: any;
72
78
  }
79
+ export interface AnalyzeChangedPath {
80
+ /** Git name-status token, e.g. M, A, D, R100. */
81
+ status: string;
82
+ /** Current path for additions/modifications, or deleted path for deletions. */
83
+ path: string;
84
+ /** Previous path for renames/copies. */
85
+ previousPath?: string;
86
+ }
73
87
  export declare const PHASE_LABELS: Record<string, string>;
88
+ export declare const parseGitNameStatus: (raw: string) => AnalyzeChangedPath[];
89
+ export declare const listChangedPathsBetweenCommits: (repoPath: string, fromRef: string, toRef: string) => AnalyzeChangedPath[] | null;
90
+ export declare const isGeneratedAgentContextPath: (filePath: string) => boolean;
91
+ export declare const isGraphContentPath: (filePath: string) => boolean;
92
+ export declare const changedPathAffectsGraph: (change: AnalyzeChangedPath) => boolean;
93
+ export declare const getGraphRelevantChangedPaths: (changes: readonly AnalyzeChangedPath[]) => AnalyzeChangedPath[];
94
+ export declare const getAnalyzeConfigRebuildReason: (existingMeta: Pick<RepoMeta, "compress" | "stats">, options: Pick<AnalyzeOptions, "compress" | "embeddings">) => string | null;
74
95
  /**
75
96
  * Run the full CodraGraph analysis pipeline.
76
97
  *
@@ -10,18 +10,52 @@
10
10
  */
11
11
  import path from 'path';
12
12
  import fs from 'fs/promises';
13
+ import { execFileSync } from 'node:child_process';
13
14
  import * as fsSync from 'node:fs';
14
15
  import * as v8 from 'node:v8';
16
+ import { getLanguageFromFilename } from '../_shared/index.js';
15
17
  import { runPipelineFromRepo } from './ingestion/pipeline.js';
16
18
  import { initCgdb, loadGraphToCgdb, getCgdbStats, executeQuery, executeWithReusedStatement, closeCgdb, loadCachedEmbeddings, } from './cgdb/cgdb-adapter.js';
17
19
  import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, INDEX_SCHEMA_VERSION, } from '../storage/repo-manager.js';
18
20
  import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName } from '../storage/git.js';
21
+ import { shouldIgnorePath } from '../config/ignore-service.js';
19
22
  import { recordAnalysisSnapshot } from './graphstore/index.js';
20
23
  import { generateAIContextFiles } from '../cli/ai-context.js';
21
24
  import { EMBEDDING_TABLE_NAME } from './cgdb/schema.js';
22
25
  import { STALE_HASH_SENTINEL } from './cgdb/schema.js';
23
26
  /** Threshold: auto-skip embeddings for repos with more nodes than this */
24
27
  const EMBEDDING_NODE_LIMIT = 50_000;
28
+ const GENERATED_AGENT_CONTEXT_PATHS = new Set(['agents.md', 'claude.md']);
29
+ const GENERATED_AGENT_CONTEXT_PREFIXES = [
30
+ '.claude/skills/generated/',
31
+ '.cursor/rules/codragraph-generated/',
32
+ ];
33
+ const IGNORE_CONTROL_FILES = new Set(['.gitignore', '.codragraphignore']);
34
+ const GRAPH_CONFIG_BASENAMES = new Set([
35
+ 'package.json',
36
+ 'tsconfig.json',
37
+ 'jsconfig.json',
38
+ 'go.mod',
39
+ 'cargo.toml',
40
+ 'pyproject.toml',
41
+ 'requirements.txt',
42
+ 'composer.json',
43
+ 'gemfile',
44
+ 'pom.xml',
45
+ 'build.gradle',
46
+ 'build.gradle.kts',
47
+ 'settings.gradle',
48
+ 'settings.gradle.kts',
49
+ 'pubspec.yaml',
50
+ 'pubspec.yml',
51
+ 'mix.exs',
52
+ 'rebar.config',
53
+ 'cmakelists.txt',
54
+ 'makefile',
55
+ 'dockerfile',
56
+ ]);
57
+ const GRAPH_CONFIG_PATTERNS = [/^tsconfig\..+\.json$/i, /^jsconfig\..+\.json$/i];
58
+ const MARKDOWN_EXTENSIONS = new Set(['.md', '.mdx']);
25
59
  export const PHASE_LABELS = {
26
60
  extracting: 'Scanning files',
27
61
  structure: 'Building structure',
@@ -31,12 +65,126 @@ export const PHASE_LABELS = {
31
65
  heritage: 'Extracting inheritance',
32
66
  communities: 'Detecting communities',
33
67
  processes: 'Detecting processes',
68
+ feature_clusters: 'Building feature clusters',
34
69
  complete: 'Pipeline complete',
35
70
  cgdb: 'Loading into LadybugDB',
36
71
  fts: 'Creating search indexes',
37
72
  embeddings: 'Generating embeddings',
38
73
  done: 'Done',
39
74
  };
75
+ const normalizeGitPath = (filePath) => filePath.replace(/\\/g, '/');
76
+ export const parseGitNameStatus = (raw) => {
77
+ const tokens = raw.split('\0').filter(Boolean);
78
+ const changes = [];
79
+ for (let i = 0; i < tokens.length;) {
80
+ const status = tokens[i++] ?? '';
81
+ const code = status[0]?.toUpperCase();
82
+ if (code === 'R' || code === 'C') {
83
+ const previousPath = tokens[i++];
84
+ const nextPath = tokens[i++];
85
+ if (previousPath && nextPath) {
86
+ changes.push({
87
+ status,
88
+ path: normalizeGitPath(nextPath),
89
+ previousPath: normalizeGitPath(previousPath),
90
+ });
91
+ }
92
+ continue;
93
+ }
94
+ const changedPath = tokens[i++];
95
+ if (status && changedPath) {
96
+ changes.push({ status, path: normalizeGitPath(changedPath) });
97
+ }
98
+ }
99
+ return changes;
100
+ };
101
+ export const listChangedPathsBetweenCommits = (repoPath, fromRef, toRef) => {
102
+ if (!fromRef || !toRef || fromRef === toRef)
103
+ return [];
104
+ try {
105
+ const stdout = execFileSync('git', ['diff', '--name-status', '-z', `${fromRef}..${toRef}`], {
106
+ cwd: repoPath,
107
+ encoding: 'utf8',
108
+ maxBuffer: 20 * 1024 * 1024,
109
+ stdio: ['ignore', 'pipe', 'pipe'],
110
+ });
111
+ return parseGitNameStatus(stdout);
112
+ }
113
+ catch {
114
+ return null;
115
+ }
116
+ };
117
+ export const isGeneratedAgentContextPath = (filePath) => {
118
+ const normalized = normalizeGitPath(filePath).toLowerCase();
119
+ const basename = path.posix.basename(normalized);
120
+ return (GENERATED_AGENT_CONTEXT_PATHS.has(basename) ||
121
+ GENERATED_AGENT_CONTEXT_PREFIXES.some((prefix) => normalized.startsWith(prefix)));
122
+ };
123
+ export const isGraphContentPath = (filePath) => {
124
+ const normalized = normalizeGitPath(filePath);
125
+ const basename = path.posix.basename(normalized);
126
+ const lowerBasename = basename.toLowerCase();
127
+ if (isGeneratedAgentContextPath(normalized))
128
+ return false;
129
+ if (IGNORE_CONTROL_FILES.has(lowerBasename))
130
+ return true;
131
+ if (shouldIgnorePath(normalized))
132
+ return false;
133
+ if (getLanguageFromFilename(normalized) !== null)
134
+ return true;
135
+ const ext = path.posix.extname(lowerBasename);
136
+ if (MARKDOWN_EXTENSIONS.has(ext))
137
+ return true;
138
+ if (GRAPH_CONFIG_BASENAMES.has(lowerBasename))
139
+ return true;
140
+ return GRAPH_CONFIG_PATTERNS.some((pattern) => pattern.test(basename));
141
+ };
142
+ export const changedPathAffectsGraph = (change) => {
143
+ const statusCode = change.status[0]?.toUpperCase();
144
+ const paths = [change.path, change.previousPath].filter((p) => Boolean(p));
145
+ if (paths.some(isGraphContentPath))
146
+ return true;
147
+ // Add/delete/rename/copy can change File/Folder structure even when content
148
+ // is not parsed. Ignored or generated-agent paths are outside the index.
149
+ if (statusCode === 'A' || statusCode === 'D' || statusCode === 'R' || statusCode === 'C') {
150
+ return paths.some((p) => !isGeneratedAgentContextPath(p) && !shouldIgnorePath(p));
151
+ }
152
+ // Modified non-code/non-doc files keep the same path and are not read by the
153
+ // graph pipeline, so the existing graph can be reused.
154
+ if (statusCode === 'M' || statusCode === 'T')
155
+ return false;
156
+ // Unknown git status: rebuild rather than risk stale graph state.
157
+ return true;
158
+ };
159
+ export const getGraphRelevantChangedPaths = (changes) => changes.filter(changedPathAffectsGraph);
160
+ export const getAnalyzeConfigRebuildReason = (existingMeta, options) => {
161
+ const existingCompress = existingMeta.compress ?? 'none';
162
+ if (options.compress && options.compress !== existingCompress) {
163
+ return `requested compression changed from ${existingCompress} to ${options.compress}`;
164
+ }
165
+ if (options.embeddings && (existingMeta.stats?.embeddings ?? 0) === 0) {
166
+ return 'embeddings were requested but the existing index has no vectors';
167
+ }
168
+ return null;
169
+ };
170
+ const formatChangeForLog = (change) => change.previousPath ? `${change.previousPath} -> ${change.path}` : change.path;
171
+ const buildReusedMeta = (existingMeta, repoPath, currentCommit) => ({
172
+ ...existingMeta,
173
+ repoPath,
174
+ lastCommit: currentCommit,
175
+ indexedAt: new Date().toISOString(),
176
+ schemaVersion: INDEX_SCHEMA_VERSION,
177
+ remoteUrl: hasGitDir(repoPath) ? getRemoteUrl(repoPath) : existingMeta.remoteUrl,
178
+ });
179
+ const pathExists = async (targetPath) => {
180
+ try {
181
+ await fs.stat(targetPath);
182
+ return true;
183
+ }
184
+ catch {
185
+ return false;
186
+ }
187
+ };
40
188
  // ---------------------------------------------------------------------------
41
189
  // Main orchestrator
42
190
  // ---------------------------------------------------------------------------
@@ -129,14 +277,23 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
129
277
  // ── Early-return: already up to date ──────────────────────────────
130
278
  // Schema-version mismatch forces a full re-analyze regardless of commit
131
279
  // equality: existing 1.7.x indexes have no `schemaVersion` field at all,
132
- // and 1.8+ readers expect every node table to carry a `contentEncoding`
133
- // column (RFC 0001 Phase 2). LadybugDB ALTER on existing tables is not
134
- // validated end-to-end yet, so the supported migration path is
135
- // re-analyze → fresh CREATE NODE TABLE.
280
+ // and current readers expect contentEncoding plus rich FeatureCluster
281
+ // context-pack columns. LadybugDB ALTER on existing tables is not validated
282
+ // end-to-end yet, so the supported migration path is re-analyze via a fresh
283
+ // CREATE NODE TABLE.
136
284
  const schemaUpToDate = !!existingMeta && (existingMeta.schemaVersion ?? 0) >= INDEX_SCHEMA_VERSION;
285
+ const existingCgdbPresent = existingMeta ? await pathExists(cgdbPath) : false;
286
+ const storageRebuildReason = existingMeta && schemaUpToDate && !existingCgdbPresent
287
+ ? 'graph database files are missing'
288
+ : null;
289
+ const configRebuildReason = storageRebuildReason ??
290
+ (existingMeta && schemaUpToDate && !options.force
291
+ ? getAnalyzeConfigRebuildReason(existingMeta, options)
292
+ : null);
137
293
  if (existingMeta &&
138
294
  schemaUpToDate &&
139
295
  !options.force &&
296
+ !configRebuildReason &&
140
297
  existingMeta.lastCommit === currentCommit) {
141
298
  // Non-git folders have currentCommit = '' — always rebuild since we can't detect changes
142
299
  if (currentCommit !== '') {
@@ -148,9 +305,54 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
148
305
  };
149
306
  }
150
307
  }
308
+ if (existingMeta && schemaUpToDate && !options.force && configRebuildReason) {
309
+ log(`Re-analyzing: ${configRebuildReason}.`);
310
+ }
311
+ if (existingMeta &&
312
+ schemaUpToDate &&
313
+ !options.force &&
314
+ !configRebuildReason &&
315
+ currentCommit !== '' &&
316
+ existingMeta.lastCommit !== currentCommit) {
317
+ const changedPaths = listChangedPathsBetweenCommits(repoPath, existingMeta.lastCommit, currentCommit);
318
+ if (changedPaths) {
319
+ const graphRelevantChanges = getGraphRelevantChangedPaths(changedPaths);
320
+ if (graphRelevantChanges.length === 0) {
321
+ const reusedMeta = buildReusedMeta(existingMeta, repoPath, currentCommit);
322
+ await saveMeta(storagePath, reusedMeta);
323
+ const projectName = await registerRepo(repoPath, reusedMeta, {
324
+ name: options.registryName,
325
+ allowDuplicateName: options.allowDuplicateName,
326
+ });
327
+ if (hasGitDir(repoPath)) {
328
+ await addToGitignore(repoPath);
329
+ }
330
+ const reuseReason = `Smart analyze reused the existing graph; ${changedPaths.length} changed ` +
331
+ `file(s) did not affect indexed code, docs, config, or file structure.`;
332
+ log(reuseReason);
333
+ progress('done', 100, 'Existing graph reused');
334
+ return {
335
+ repoName: projectName,
336
+ repoPath,
337
+ stats: reusedMeta.stats ?? {},
338
+ alreadyUpToDate: true,
339
+ reusedExistingIndex: true,
340
+ reuseReason,
341
+ };
342
+ }
343
+ const preview = graphRelevantChanges.slice(0, 5).map(formatChangeForLog).join(', ');
344
+ const suffix = graphRelevantChanges.length > 5 ? ', ...' : '';
345
+ log(`Smart analyze: ${graphRelevantChanges.length} indexed change(s) require rebuild` +
346
+ (preview ? ` (${preview}${suffix})` : '') +
347
+ '.');
348
+ }
349
+ else {
350
+ log('Smart analyze: could not inspect git diff; rebuilding.');
351
+ }
352
+ }
151
353
  if (existingMeta && !schemaUpToDate) {
152
354
  log(`Index schema version ${existingMeta.schemaVersion ?? '<missing>'} is older than ` +
153
- `${INDEX_SCHEMA_VERSION} (RFC 0001 Phase 2 — adds contentEncoding column). ` +
355
+ `${INDEX_SCHEMA_VERSION} (FeatureCluster context-pack schema). ` +
154
356
  `Re-analyzing.`);
155
357
  }
156
358
  // ── Cache embeddings from existing index before rebuild ────────────
@@ -175,10 +377,14 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
175
377
  }
176
378
  }
177
379
  // ── Phase 1: Full Pipeline (0–60%) ────────────────────────────────
380
+ const repoNameForFeatureClusters = options.registryName ?? getInferredRepoName(repoPath) ?? path.basename(repoPath);
178
381
  const pipelineResult = await runPipelineFromRepo(repoPath, (p) => {
179
382
  const phaseLabel = PHASE_LABELS[p.phase] || p.phase;
180
383
  const scaled = Math.round(p.percent * 0.6);
181
384
  progress(p.phase, scaled, phaseLabel);
385
+ }, {
386
+ featureClusterRepo: repoNameForFeatureClusters,
387
+ lastIndexedCommit: currentCommit || undefined,
182
388
  });
183
389
  // ── Phase 2: LadybugDB (60–85%) ──────────────────────────────────
184
390
  progress('cgdb', 60, 'Loading into LadybugDB...');
@@ -334,6 +540,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
334
540
  nodes: stats.nodes,
335
541
  edges: stats.edges,
336
542
  communities: pipelineResult.communityResult?.stats.totalCommunities,
543
+ featureClusters: pipelineResult.featureClusterResult?.stats.totalClusters,
337
544
  processes: pipelineResult.processResult?.stats.totalProcesses,
338
545
  embeddings: embeddingCount,
339
546
  },
@@ -372,7 +579,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
372
579
  nodes: stats.nodes,
373
580
  edges: stats.edges,
374
581
  communities: pipelineResult.communityResult?.stats.totalCommunities,
375
- clusters: aggregatedClusterCount,
582
+ clusters: pipelineResult.featureClusterResult?.stats.totalClusters ?? aggregatedClusterCount,
376
583
  processes: pipelineResult.processResult?.stats.totalProcesses,
377
584
  }, undefined, { skipAgentsMd: options.skipAgentsMd, noStats: options.noStats });
378
585
  }
@@ -111,8 +111,16 @@ export const formatHybridResults = (results) => {
111
111
  * The semanticSearch function is injected to keep this module environment-agnostic.
112
112
  */
113
113
  export const hybridSearch = async (query, limit, executeQuery, semanticSearch) => {
114
- // Use LadybugDB FTS for always-fresh BM25 results
115
- const bm25Results = await searchFTSFromCgdb(query, limit);
116
- const semanticResults = await semanticSearch(executeQuery, query, limit);
114
+ const bm25Promise = searchFTSFromCgdb(query, limit);
115
+ // Start semantic work immediately, but gate its DB calls behind BM25.
116
+ // semanticSearch performs embedding before it calls executeQuery, so this
117
+ // overlaps CPU/model work with BM25 while avoiding concurrent queries on the
118
+ // singleton LadybugDB connection used by CLI/HTTP paths.
119
+ const executeAfterBm25 = async (cypher) => {
120
+ await bm25Promise;
121
+ return executeQuery(cypher);
122
+ };
123
+ const semanticPromise = semanticSearch(executeAfterBm25, query, limit);
124
+ const [bm25Results, semanticResults] = await Promise.all([bm25Promise, semanticPromise]);
117
125
  return mergeWithRRF(bm25Results, semanticResults, limit);
118
126
  };
@@ -6,6 +6,8 @@
6
6
  */
7
7
  import { pipeline, env } from '@huggingface/transformers';
8
8
  import { isHttpMode, getHttpDimensions, httpEmbedQuery, } from '../../core/embeddings/http-client.js';
9
+ import { homedir } from 'os';
10
+ import { join } from 'path';
9
11
  import { silenceStdout, restoreStdout, realStderrWrite } from '../../core/cgdb/pool-adapter.js';
10
12
  // Model config
11
13
  const MODEL_ID = 'Snowflake/snowflake-arctic-embed-xs';
@@ -33,8 +35,9 @@ export const initEmbedder = async () => {
33
35
  // Default cache to user-writable location. transformers.js defaults to
34
36
  // ./node_modules/.cache inside its own install dir, which is unwritable
35
37
  // when codragraph is installed globally (e.g. /usr/lib/node_modules/).
36
- // Respect HF_HOME if set, otherwise fall back to ~/.cache/huggingface.
37
- env.cacheDir = process.env.HF_HOME ?? `${process.env.HOME}/.cache/huggingface`;
38
+ // Respect HF_HOME if set, otherwise fall back to a user-writable cache
39
+ // path using Node's OS-aware home directory resolution.
40
+ env.cacheDir = process.env.HF_HOME ?? join(homedir(), '.cache', 'huggingface');
38
41
  console.error('CodraGraph: Loading embedding model (first search may take a moment)...');
39
42
  // Try GPU first (DirectML on Windows, CUDA on Linux), fall back to CPU
40
43
  const isWindows = process.platform === 'win32';
@@ -323,6 +323,18 @@ export declare class LocalBackend {
323
323
  * Query clusters (communities) directly from graph.
324
324
  * Used by getClustersResource — avoids legacy overview() dispatch.
325
325
  */
326
+ /**
327
+ * Query feature clusters directly from graph.
328
+ * FeatureCluster is the human-facing project area layer above Communities.
329
+ */
330
+ queryFeatureClusters(repoName?: string, limit?: number, query?: string): Promise<{
331
+ clusters: any[];
332
+ }>;
333
+ /**
334
+ * Query one feature cluster with members, dependencies, and process links.
335
+ */
336
+ queryFeatureContext(name: string, repoName?: string, limit?: number): Promise<any>;
337
+ queryFeatureImpact(name: string, repoName?: string, direction?: 'upstream' | 'downstream' | 'both', limit?: number): Promise<any>;
326
338
  queryClusters(repoName?: string, limit?: number): Promise<{
327
339
  clusters: any[];
328
340
  }>;