gitnexus 1.6.4-rc.74 → 1.6.4-rc.75

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,14 @@
9
9
  */
10
10
  export interface AnalyzeOptions {
11
11
  force?: boolean;
12
- embeddings?: boolean;
12
+ /**
13
+ * Embedding generation toggle. Commander parses `--embeddings [limit]` as:
14
+ * - `undefined` when the flag is omitted
15
+ * - `true` when passed without an argument (use default 50K node cap)
16
+ * - a string when passed with an argument (`--embeddings 0` disables the
17
+ * cap, `--embeddings <n>` uses `<n>` as the cap)
18
+ */
19
+ embeddings?: boolean | string;
13
20
  /**
14
21
  * Explicitly drop existing embeddings on rebuild instead of preserving
15
22
  * them. Without this flag, a routine `analyze` keeps any embeddings
@@ -104,6 +104,22 @@ export const analyzeCommand = async (inputPath, options) => {
104
104
  }
105
105
  process.env.GITNEXUS_WORKER_SUB_BATCH_TIMEOUT_MS = String(Math.round(workerTimeoutSeconds * 1000));
106
106
  }
107
+ // Parse `--embeddings [limit]`: `true` → default cap, string → numeric cap
108
+ // (0 disables the cap entirely). Validated up here so failures match the
109
+ // sibling-validation pattern (exit before bar.start() — otherwise
110
+ // process.exit() leaves the progress bar's hidden cursor uncleared).
111
+ let embeddingsNodeLimit;
112
+ if (typeof options?.embeddings === 'string') {
113
+ const parsed = Number(options.embeddings);
114
+ if (!Number.isInteger(parsed) || parsed < 0) {
115
+ console.error(` --embeddings expects a non-negative integer (got "${options.embeddings}"). ` +
116
+ `Pass 0 to disable the safety cap, or omit the value to keep the default.\n`);
117
+ process.exitCode = 1;
118
+ return;
119
+ }
120
+ embeddingsNodeLimit = parsed;
121
+ }
122
+ const embeddingsEnabled = !!options?.embeddings;
107
123
  const setPositiveEnv = (optionName, envName, value) => {
108
124
  if (value === undefined)
109
125
  return true;
@@ -231,7 +247,8 @@ export const analyzeCommand = async (inputPath, options) => {
231
247
  // needs a fresh pipelineResult. Has no bearing on the registry
232
248
  // collision guard (see allowDuplicateName below).
233
249
  force: options?.force || options?.skills,
234
- embeddings: options?.embeddings,
250
+ embeddings: embeddingsEnabled,
251
+ embeddingsNodeLimit,
235
252
  dropEmbeddings: options?.dropEmbeddings,
236
253
  skipGit: options?.skipGit,
237
254
  skipAgentsMd: options?.skipAgentsMd,
package/dist/cli/index.js CHANGED
@@ -17,7 +17,8 @@ program
17
17
  .command('analyze [path]')
18
18
  .description('Index a repository (full analysis)')
19
19
  .option('-f, --force', 'Force full re-index even if up to date')
20
- .option('--embeddings', 'Enable embedding generation for semantic search (off by default)')
20
+ .option('--embeddings [limit]', 'Enable embedding generation for semantic search (off by default). ' +
21
+ 'Optional [limit] overrides the 50,000-node safety cap; pass 0 to disable the cap entirely.')
21
22
  .option('--drop-embeddings', 'Drop existing embeddings on rebuild. By default, an `analyze` without `--embeddings` ' +
22
23
  'preserves any embeddings already present in the index.')
23
24
  .option('--skills', 'Generate repo-specific skill files from detected communities')
@@ -27,4 +27,25 @@ export interface EmbeddingMode {
27
27
  /** True when we need to load cached embeddings from the existing DB before the rebuild. */
28
28
  shouldLoadCache: boolean;
29
29
  }
30
+ /** Default safety cap on graph node count for embedding generation. */
31
+ export declare const DEFAULT_EMBEDDING_NODE_LIMIT = 50000;
32
+ export interface EmbeddingCapDecision {
33
+ /** True when the node-count cap blocks generation for this graph. */
34
+ skipForCap: boolean;
35
+ /** True when the user explicitly disabled the cap (`--embeddings 0`). */
36
+ capDisabled: boolean;
37
+ /** Effective node limit applied (`0` means disabled). */
38
+ nodeLimit: number;
39
+ }
40
+ /**
41
+ * Decide whether the node-count safety cap blocks embedding generation.
42
+ *
43
+ * - `embeddingsNodeLimit === undefined` → use {@link DEFAULT_EMBEDDING_NODE_LIMIT}
44
+ * - `embeddingsNodeLimit === 0` → cap disabled, generation always proceeds
45
+ * - any positive integer → custom cap (skip if `nodeCount > limit`)
46
+ *
47
+ * Lives in `embedding-mode.ts` (not `run-analyze.ts`) so the branching
48
+ * contract is unit-testable without spinning up LadybugDB or the pipeline.
49
+ */
50
+ export declare function deriveEmbeddingCap(nodeCount: number, embeddingsNodeLimit: number | undefined): EmbeddingCapDecision;
30
51
  export declare function deriveEmbeddingMode(options: EmbeddingModeInput, existingEmbeddingCount: number): EmbeddingMode;
@@ -12,6 +12,24 @@
12
12
  * (default) + existing>0 -> preserve only (load + restore, no generation)
13
13
  * any path with existing=0 -> no cache work, no preservation
14
14
  */
15
+ /** Default safety cap on graph node count for embedding generation. */
16
+ export const DEFAULT_EMBEDDING_NODE_LIMIT = 50_000;
17
+ /**
18
+ * Decide whether the node-count safety cap blocks embedding generation.
19
+ *
20
+ * - `embeddingsNodeLimit === undefined` → use {@link DEFAULT_EMBEDDING_NODE_LIMIT}
21
+ * - `embeddingsNodeLimit === 0` → cap disabled, generation always proceeds
22
+ * - any positive integer → custom cap (skip if `nodeCount > limit`)
23
+ *
24
+ * Lives in `embedding-mode.ts` (not `run-analyze.ts`) so the branching
25
+ * contract is unit-testable without spinning up LadybugDB or the pipeline.
26
+ */
27
+ export function deriveEmbeddingCap(nodeCount, embeddingsNodeLimit) {
28
+ const nodeLimit = embeddingsNodeLimit ?? DEFAULT_EMBEDDING_NODE_LIMIT;
29
+ const capDisabled = nodeLimit === 0;
30
+ const skipForCap = !capDisabled && nodeCount > nodeLimit;
31
+ return { skipForCap, capDisabled, nodeLimit };
32
+ }
15
33
  export function deriveEmbeddingMode(options, existingEmbeddingCount) {
16
34
  const hasExisting = existingEmbeddingCount > 0;
17
35
  const drop = !!options.dropEmbeddings;
@@ -21,6 +21,13 @@ export interface AnalyzeOptions {
21
21
  */
22
22
  force?: boolean;
23
23
  embeddings?: boolean;
24
+ /**
25
+ * Override the auto-skip node-count cap for embedding generation.
26
+ * `undefined` (default) keeps the built-in 50,000-node safety limit;
27
+ * `0` disables the cap entirely; any positive integer sets a custom cap.
28
+ * Mapped from the CLI's `--embeddings [limit]` argument.
29
+ */
30
+ embeddingsNodeLimit?: number;
24
31
  /**
25
32
  * Explicitly drop any embeddings present in the existing index instead of
26
33
  * preserving them. Only meaningful when `embeddings` is false/undefined:
@@ -66,7 +73,7 @@ export interface AnalyzeResult {
66
73
  /** The raw pipeline result — only populated when needed by callers (e.g. skill generation). */
67
74
  pipelineResult?: any;
68
75
  }
69
- export { deriveEmbeddingMode } from './embedding-mode.js';
76
+ export { deriveEmbeddingMode, DEFAULT_EMBEDDING_NODE_LIMIT } from './embedding-mode.js';
70
77
  export type { EmbeddingMode } from './embedding-mode.js';
71
78
  export declare const PHASE_LABELS: Record<string, string>;
72
79
  /**
@@ -18,12 +18,10 @@ import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName, resolve
18
18
  import { generateAIContextFiles } from '../cli/ai-context.js';
19
19
  import { EMBEDDING_TABLE_NAME } from './lbug/schema.js';
20
20
  import { STALE_HASH_SENTINEL } from './lbug/schema.js';
21
- /** Threshold: auto-skip embeddings for repos with more nodes than this */
22
- const EMBEDDING_NODE_LIMIT = 50_000;
23
21
  // Re-export the pure flag-derivation helper so external callers (and tests)
24
22
  // keep importing from this module's stable surface.
25
- export { deriveEmbeddingMode } from './embedding-mode.js';
26
- import { deriveEmbeddingMode as _deriveEmbeddingMode } from './embedding-mode.js';
23
+ export { deriveEmbeddingMode, DEFAULT_EMBEDDING_NODE_LIMIT } from './embedding-mode.js';
24
+ import { deriveEmbeddingMode as _deriveEmbeddingMode, deriveEmbeddingCap, DEFAULT_EMBEDDING_NODE_LIMIT, } from './embedding-mode.js';
27
25
  export const PHASE_LABELS = {
28
26
  extracting: 'Scanning files',
29
27
  structure: 'Building structure',
@@ -206,8 +204,21 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
206
204
  let embeddingSkipped = true;
207
205
  let semanticMode;
208
206
  if (shouldGenerateEmbeddings) {
209
- if (stats.nodes <= EMBEDDING_NODE_LIMIT) {
207
+ const { skipForCap, capDisabled, nodeLimit } = deriveEmbeddingCap(stats.nodes, options.embeddingsNodeLimit);
208
+ if (!skipForCap) {
210
209
  embeddingSkipped = false;
210
+ if (capDisabled && stats.nodes > DEFAULT_EMBEDDING_NODE_LIMIT) {
211
+ log(`Embedding node-count cap disabled — generating embeddings for ` +
212
+ `${stats.nodes.toLocaleString()} nodes. Ensure sufficient memory; ` +
213
+ `the default ${DEFAULT_EMBEDDING_NODE_LIMIT.toLocaleString()}-node ` +
214
+ `cap exists to prevent OOM.`);
215
+ }
216
+ }
217
+ else {
218
+ log(`Embeddings skipped: ${stats.nodes.toLocaleString()} nodes exceeds ` +
219
+ `the ${nodeLimit.toLocaleString()}-node safety cap. ` +
220
+ `Override with \`--embeddings 0\` to disable the cap, or ` +
221
+ `\`--embeddings <n>\` to set a custom cap.`);
211
222
  }
212
223
  }
213
224
  if (!embeddingSkipped) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.4-rc.74",
3
+ "version": "1.6.4-rc.75",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",