gitnexus 1.2.7 → 1.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,7 @@ import cliProgress from 'cli-progress';
8
8
  import { runPipelineFromRepo } from '../core/ingestion/pipeline.js';
9
9
  import { initKuzu, loadGraphToKuzu, getKuzuStats, executeQuery, executeWithReusedStatement, closeKuzu, createFTSIndex, loadCachedEmbeddings } from '../core/kuzu/kuzu-adapter.js';
10
10
  import { runEmbeddingPipeline } from '../core/embeddings/embedding-pipeline.js';
11
- import { disposeEmbedder } from '../core/embeddings/embedder.js';
11
+ // disposeEmbedder intentionally not called — ONNX Runtime segfaults on cleanup (see #38)
12
12
  import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, getGlobalRegistryPath } from '../storage/repo-manager.js';
13
13
  import { getCurrentCommit, isGitRepo, getGitRoot } from '../storage/git.js';
14
14
  import { generateAIContextFiles } from './ai-context.js';
@@ -70,11 +70,29 @@ export const analyzeCommand = async (inputPath, options) => {
70
70
  stopOnComplete: false,
71
71
  }, cliProgress.Presets.shades_grey);
72
72
  bar.start(100, 0, { phase: 'Initializing...' });
73
+ // Route all console output through bar.log() so the bar doesn't stamp itself
74
+ // multiple times when other code writes to stdout/stderr mid-render.
75
+ const origLog = console.log.bind(console);
76
+ const origWarn = console.warn.bind(console);
77
+ const origError = console.error.bind(console);
78
+ const barLog = (...args) => bar.log(args.map(a => (typeof a === 'string' ? a : String(a))).join(' '));
79
+ console.log = barLog;
80
+ console.warn = barLog;
81
+ console.error = barLog;
82
+ // Show elapsed seconds for phases that run longer than 3s
83
+ let lastPhaseLabel = 'Initializing...';
84
+ let phaseStart = Date.now();
85
+ const elapsedTimer = setInterval(() => {
86
+ const elapsed = Math.round((Date.now() - phaseStart) / 1000);
87
+ if (elapsed >= 3) {
88
+ bar.update({ phase: `${lastPhaseLabel} (${elapsed}s)` });
89
+ }
90
+ }, 1000);
73
91
  const t0Global = Date.now();
74
92
  // ── Cache embeddings from existing index before rebuild ────────────
75
93
  let cachedEmbeddingNodeIds = new Set();
76
94
  let cachedEmbeddings = [];
77
- if (existingMeta && !options?.force) {
95
+ if (options?.embeddings && existingMeta && !options?.force) {
78
96
  try {
79
97
  bar.update(0, { phase: 'Caching embeddings...' });
80
98
  await initKuzu(kuzuPath);
@@ -94,10 +112,16 @@ export const analyzeCommand = async (inputPath, options) => {
94
112
  const pipelineResult = await runPipelineFromRepo(repoPath, (progress) => {
95
113
  const phaseLabel = PHASE_LABELS[progress.phase] || progress.phase;
96
114
  const scaled = Math.round(progress.percent * 0.6);
115
+ if (phaseLabel !== lastPhaseLabel) {
116
+ lastPhaseLabel = phaseLabel;
117
+ phaseStart = Date.now();
118
+ }
97
119
  bar.update(scaled, { phase: phaseLabel });
98
120
  });
99
121
  // ── Phase 2: KuzuDB (60–85%) ──────────────────────────────────────
100
- bar.update(60, { phase: 'Loading into KuzuDB...' });
122
+ lastPhaseLabel = 'Loading into KuzuDB...';
123
+ phaseStart = Date.now();
124
+ bar.update(60, { phase: lastPhaseLabel });
101
125
  await closeKuzu();
102
126
  const kuzuFiles = [kuzuPath, `${kuzuPath}.wal`, `${kuzuPath}.lock`];
103
127
  for (const f of kuzuFiles) {
@@ -117,7 +141,9 @@ export const analyzeCommand = async (inputPath, options) => {
117
141
  const kuzuTime = ((Date.now() - t0Kuzu) / 1000).toFixed(1);
118
142
  const kuzuWarnings = kuzuResult.warnings;
119
143
  // ── Phase 3: FTS (85–90%) ─────────────────────────────────────────
120
- bar.update(85, { phase: 'Creating search indexes...' });
144
+ lastPhaseLabel = 'Creating search indexes...';
145
+ phaseStart = Date.now();
146
+ bar.update(85, { phase: lastPhaseLabel });
121
147
  const t0Fts = Date.now();
122
148
  try {
123
149
  await createFTSIndex('File', 'file_fts', ['name', 'content']);
@@ -146,22 +172,28 @@ export const analyzeCommand = async (inputPath, options) => {
146
172
  // ── Phase 4: Embeddings (90–98%) ──────────────────────────────────
147
173
  const stats = await getKuzuStats();
148
174
  let embeddingTime = '0.0';
149
- let embeddingSkipped = false;
150
- let embeddingSkipReason = '';
151
- if (options?.skipEmbeddings) {
152
- embeddingSkipped = true;
153
- embeddingSkipReason = 'skipped (--skip-embeddings)';
154
- }
155
- else if (stats.nodes > EMBEDDING_NODE_LIMIT) {
156
- embeddingSkipped = true;
157
- embeddingSkipReason = `skipped (${stats.nodes.toLocaleString()} nodes > ${EMBEDDING_NODE_LIMIT.toLocaleString()} limit)`;
175
+ let embeddingSkipped = true;
176
+ let embeddingSkipReason = 'off (use --embeddings to enable)';
177
+ if (options?.embeddings) {
178
+ if (stats.nodes > EMBEDDING_NODE_LIMIT) {
179
+ embeddingSkipReason = `skipped (${stats.nodes.toLocaleString()} nodes > ${EMBEDDING_NODE_LIMIT.toLocaleString()} limit)`;
180
+ }
181
+ else {
182
+ embeddingSkipped = false;
183
+ }
158
184
  }
159
185
  if (!embeddingSkipped) {
160
- bar.update(90, { phase: 'Loading embedding model...' });
186
+ lastPhaseLabel = 'Loading embedding model...';
187
+ phaseStart = Date.now();
188
+ bar.update(90, { phase: lastPhaseLabel });
161
189
  const t0Emb = Date.now();
162
190
  await runEmbeddingPipeline(executeQuery, executeWithReusedStatement, (progress) => {
163
191
  const scaled = 90 + Math.round((progress.percent / 100) * 8);
164
192
  const label = progress.phase === 'loading-model' ? 'Loading embedding model...' : `Embedding ${progress.nodesProcessed || 0}/${progress.totalNodes || '?'}`;
193
+ if (label !== lastPhaseLabel) {
194
+ lastPhaseLabel = label;
195
+ phaseStart = Date.now();
196
+ }
165
197
  bar.update(scaled, { phase: label });
166
198
  }, {}, cachedEmbeddingNodeIds.size > 0 ? cachedEmbeddingNodeIds : undefined);
167
199
  embeddingTime = ((Date.now() - t0Emb) / 1000).toFixed(1);
@@ -203,8 +235,14 @@ export const analyzeCommand = async (inputPath, options) => {
203
235
  processes: pipelineResult.processResult?.stats.totalProcesses,
204
236
  });
205
237
  await closeKuzu();
206
- await disposeEmbedder();
238
+ // Note: we intentionally do NOT call disposeEmbedder() here.
239
+ // ONNX Runtime's native cleanup segfaults on macOS and some Linux configs.
240
+ // Since the process exits immediately after, Node.js reclaims everything.
207
241
  const totalTime = ((Date.now() - t0Global) / 1000).toFixed(1);
242
+ clearInterval(elapsedTimer);
243
+ console.log = origLog;
244
+ console.warn = origWarn;
245
+ console.error = origError;
208
246
  bar.update(100, { phase: 'Done' });
209
247
  bar.stop();
210
248
  // ── Summary ───────────────────────────────────────────────────────
@@ -233,4 +271,10 @@ export const analyzeCommand = async (inputPath, options) => {
233
271
  console.log('\n Tip: Run `gitnexus setup` to configure MCP for your editor.');
234
272
  }
235
273
  console.log('');
274
+ // ONNX Runtime registers native atexit hooks that segfault during process
275
+ // shutdown on macOS (#38) and some Linux configs (#40). Force-exit to
276
+ // bypass them when embeddings were loaded.
277
+ if (!embeddingSkipped) {
278
+ process.exit(0);
279
+ }
236
280
  };
package/dist/cli/index.js CHANGED
@@ -24,7 +24,7 @@ program
24
24
  .command('analyze [path]')
25
25
  .description('Index a repository (full analysis)')
26
26
  .option('-f, --force', 'Force full re-index even if up to date')
27
- .option('--skip-embeddings', 'Skip embedding generation (faster)')
27
+ .option('--embeddings', 'Enable embedding generation for semantic search (off by default)')
28
28
  .action(analyzeCommand);
29
29
  program
30
30
  .command('serve')
@@ -98,11 +98,11 @@ export async function augment(pattern, cwd) {
98
98
  for (const result of bm25Results.slice(0, 5)) {
99
99
  const escaped = result.filePath.replace(/'/g, "''");
100
100
  try {
101
- const symbols = await executeQuery(repoId, `
102
- MATCH (n) WHERE n.filePath = '${escaped}'
103
- AND n.name CONTAINS '${pattern.replace(/'/g, "''").split(/\s+/)[0]}'
104
- RETURN n.id AS id, n.name AS name, labels(n)[0] AS type, n.filePath AS filePath
105
- LIMIT 3
101
+ const symbols = await executeQuery(repoId, `
102
+ MATCH (n) WHERE n.filePath = '${escaped}'
103
+ AND n.name CONTAINS '${pattern.replace(/'/g, "''").split(/\s+/)[0]}'
104
+ RETURN n.id AS id, n.name AS name, labels(n)[0] AS type, n.filePath AS filePath
105
+ LIMIT 3
106
106
  `);
107
107
  for (const sym of symbols) {
108
108
  symbolMatches.push({
@@ -130,10 +130,10 @@ export async function augment(pattern, cwd) {
130
130
  // Callers
131
131
  let callers = [];
132
132
  try {
133
- const rows = await executeQuery(repoId, `
134
- MATCH (caller)-[:CodeRelation {type: 'CALLS'}]->(n {id: '${escaped}'})
135
- RETURN caller.name AS name
136
- LIMIT 3
133
+ const rows = await executeQuery(repoId, `
134
+ MATCH (caller)-[:CodeRelation {type: 'CALLS'}]->(n {id: '${escaped}'})
135
+ RETURN caller.name AS name
136
+ LIMIT 3
137
137
  `);
138
138
  callers = rows.map((r) => r.name || r[0]).filter(Boolean);
139
139
  }
@@ -141,10 +141,10 @@ export async function augment(pattern, cwd) {
141
141
  // Callees
142
142
  let callees = [];
143
143
  try {
144
- const rows = await executeQuery(repoId, `
145
- MATCH (n {id: '${escaped}'})-[:CodeRelation {type: 'CALLS'}]->(callee)
146
- RETURN callee.name AS name
147
- LIMIT 3
144
+ const rows = await executeQuery(repoId, `
145
+ MATCH (n {id: '${escaped}'})-[:CodeRelation {type: 'CALLS'}]->(callee)
146
+ RETURN callee.name AS name
147
+ LIMIT 3
148
148
  `);
149
149
  callees = rows.map((r) => r.name || r[0]).filter(Boolean);
150
150
  }
@@ -152,9 +152,9 @@ export async function augment(pattern, cwd) {
152
152
  // Processes
153
153
  let processes = [];
154
154
  try {
155
- const rows = await executeQuery(repoId, `
156
- MATCH (n {id: '${escaped}'})-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process)
157
- RETURN p.heuristicLabel AS label, r.step AS step, p.stepCount AS stepCount
155
+ const rows = await executeQuery(repoId, `
156
+ MATCH (n {id: '${escaped}'})-[r:CodeRelation {type: 'STEP_IN_PROCESS'}]->(p:Process)
157
+ RETURN p.heuristicLabel AS label, r.step AS step, p.stepCount AS stepCount
158
158
  `);
159
159
  processes = rows.map((r) => {
160
160
  const label = r.label || r[0];
@@ -167,10 +167,10 @@ export async function augment(pattern, cwd) {
167
167
  // Cluster cohesion (internal ranking signal)
168
168
  let cohesion = 0;
169
169
  try {
170
- const rows = await executeQuery(repoId, `
171
- MATCH (n {id: '${escaped}'})-[:CodeRelation {type: 'MEMBER_OF'}]->(c:Community)
172
- RETURN c.cohesion AS cohesion
173
- LIMIT 1
170
+ const rows = await executeQuery(repoId, `
171
+ MATCH (n {id: '${escaped}'})-[:CodeRelation {type: 'MEMBER_OF'}]->(c:Community)
172
+ RETURN c.cohesion AS cohesion
173
+ LIMIT 1
174
174
  `);
175
175
  if (rows.length > 0) {
176
176
  cohesion = (rows[0].cohesion ?? rows[0][0]) || 0;
@@ -89,6 +89,7 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
89
89
  device: device,
90
90
  dtype: 'fp32',
91
91
  progress_callback: progressCallback,
92
+ session_options: { logSeverityLevel: 3 },
92
93
  });
93
94
  currentDevice = device;
94
95
  if (isDev) {
@@ -24,19 +24,19 @@ const queryEmbeddableNodes = async (executeQuery) => {
24
24
  let query;
25
25
  if (label === 'File') {
26
26
  // File nodes don't have startLine/endLine
27
- query = `
28
- MATCH (n:File)
29
- RETURN n.id AS id, n.name AS name, 'File' AS label,
30
- n.filePath AS filePath, n.content AS content
27
+ query = `
28
+ MATCH (n:File)
29
+ RETURN n.id AS id, n.name AS name, 'File' AS label,
30
+ n.filePath AS filePath, n.content AS content
31
31
  `;
32
32
  }
33
33
  else {
34
34
  // Code elements have startLine/endLine
35
- query = `
36
- MATCH (n:${label})
37
- RETURN n.id AS id, n.name AS name, '${label}' AS label,
38
- n.filePath AS filePath, n.content AS content,
39
- n.startLine AS startLine, n.endLine AS endLine
35
+ query = `
36
+ MATCH (n:${label})
37
+ RETURN n.id AS id, n.name AS name, '${label}' AS label,
38
+ n.filePath AS filePath, n.content AS content,
39
+ n.startLine AS startLine, n.endLine AS endLine
40
40
  `;
41
41
  }
42
42
  const rows = await executeQuery(query);
@@ -77,8 +77,8 @@ const batchInsertEmbeddings = async (executeWithReusedStatement, updates) => {
77
77
  * Now indexes the separate CodeEmbedding table
78
78
  */
79
79
  const createVectorIndex = async (executeQuery) => {
80
- const cypher = `
81
- CALL CREATE_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx', 'embedding', metric := 'cosine')
80
+ const cypher = `
81
+ CALL CREATE_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx', 'embedding', metric := 'cosine')
82
82
  `;
83
83
  try {
84
84
  await executeQuery(cypher);
@@ -240,14 +240,14 @@ export const semanticSearch = async (executeQuery, query, k = 10, maxDistance =
240
240
  const queryVec = embeddingToArray(queryEmbedding);
241
241
  const queryVecStr = `[${queryVec.join(',')}]`;
242
242
  // Query the vector index on CodeEmbedding to get nodeIds and distances
243
- const vectorQuery = `
244
- CALL QUERY_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx',
245
- CAST(${queryVecStr} AS FLOAT[384]), ${k})
246
- YIELD node AS emb, distance
247
- WITH emb, distance
248
- WHERE distance < ${maxDistance}
249
- RETURN emb.nodeId AS nodeId, distance
250
- ORDER BY distance
243
+ const vectorQuery = `
244
+ CALL QUERY_VECTOR_INDEX('CodeEmbedding', 'code_embedding_idx',
245
+ CAST(${queryVecStr} AS FLOAT[384]), ${k})
246
+ YIELD node AS emb, distance
247
+ WITH emb, distance
248
+ WHERE distance < ${maxDistance}
249
+ RETURN emb.nodeId AS nodeId, distance
250
+ ORDER BY distance
251
251
  `;
252
252
  const embResults = await executeQuery(vectorQuery);
253
253
  if (embResults.length === 0) {
@@ -266,16 +266,16 @@ export const semanticSearch = async (executeQuery, query, k = 10, maxDistance =
266
266
  try {
267
267
  let nodeQuery;
268
268
  if (label === 'File') {
269
- nodeQuery = `
270
- MATCH (n:File {id: '${nodeId.replace(/'/g, "''")}'})
271
- RETURN n.name AS name, n.filePath AS filePath
269
+ nodeQuery = `
270
+ MATCH (n:File {id: '${nodeId.replace(/'/g, "''")}'})
271
+ RETURN n.name AS name, n.filePath AS filePath
272
272
  `;
273
273
  }
274
274
  else {
275
- nodeQuery = `
276
- MATCH (n:${label} {id: '${nodeId.replace(/'/g, "''")}'})
277
- RETURN n.name AS name, n.filePath AS filePath,
278
- n.startLine AS startLine, n.endLine AS endLine
275
+ nodeQuery = `
276
+ MATCH (n:${label} {id: '${nodeId.replace(/'/g, "''")}'})
277
+ RETURN n.name AS name, n.filePath AS filePath,
278
+ n.startLine AS startLine, n.endLine AS endLine
279
279
  `;
280
280
  }
281
281
  const nodeRows = await executeQuery(nodeQuery);
@@ -13,12 +13,12 @@ const buildEnrichmentPrompt = (members, heuristicLabel) => {
13
13
  const memberList = limitedMembers
14
14
  .map(m => `${m.name} (${m.type})`)
15
15
  .join(', ');
16
- return `Analyze this code cluster and provide a semantic name and short description.
17
-
18
- Heuristic: "${heuristicLabel}"
19
- Members: ${memberList}${members.length > 20 ? ` (+${members.length - 20} more)` : ''}
20
-
21
- Reply with JSON only:
16
+ return `Analyze this code cluster and provide a semantic name and short description.
17
+
18
+ Heuristic: "${heuristicLabel}"
19
+ Members: ${memberList}${members.length > 20 ? ` (+${members.length - 20} more)` : ''}
20
+
21
+ Reply with JSON only:
22
22
  {"name": "2-4 word semantic name", "description": "One sentence describing purpose"}`;
23
23
  };
24
24
  // ============================================================================
@@ -115,18 +115,18 @@ export const enrichClustersBatch = async (communities, memberMap, llmClient, bat
115
115
  const memberList = limitedMembers
116
116
  .map(m => `${m.name} (${m.type})`)
117
117
  .join(', ');
118
- return `Cluster ${idx + 1} (id: ${community.id}):
119
- Heuristic: "${community.heuristicLabel}"
118
+ return `Cluster ${idx + 1} (id: ${community.id}):
119
+ Heuristic: "${community.heuristicLabel}"
120
120
  Members: ${memberList}`;
121
121
  }).join('\n\n');
122
- const prompt = `Analyze these code clusters and generate semantic names, keywords, and descriptions.
123
-
124
- ${batchPrompt}
125
-
126
- Output JSON array:
127
- [
128
- {"id": "comm_X", "name": "...", "keywords": [...], "description": "..."},
129
- ...
122
+ const prompt = `Analyze these code clusters and generate semantic names, keywords, and descriptions.
123
+
124
+ ${batchPrompt}
125
+
126
+ Output JSON array:
127
+ [
128
+ {"id": "comm_X", "name": "...", "keywords": [...], "description": "..."},
129
+ ...
130
130
  ]`;
131
131
  try {
132
132
  const response = await llmClient.generate(prompt);