gitnexus 1.6.4-rc.74 → 1.6.4-rc.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/analyze.d.ts +8 -1
- package/dist/cli/analyze.js +18 -1
- package/dist/cli/index.js +2 -1
- package/dist/core/embedding-mode.d.ts +21 -0
- package/dist/core/embedding-mode.js +18 -0
- package/dist/core/ingestion/languages/go/range-binding.js +2 -2
- package/dist/core/ingestion/languages/go/type-binding.js +3 -3
- package/dist/core/run-analyze.d.ts +8 -1
- package/dist/core/run-analyze.js +16 -5
- package/package.json +1 -1
package/dist/cli/analyze.d.ts
CHANGED
|
@@ -9,7 +9,14 @@
|
|
|
9
9
|
*/
|
|
10
10
|
export interface AnalyzeOptions {
|
|
11
11
|
force?: boolean;
|
|
12
|
-
|
|
12
|
+
/**
|
|
13
|
+
* Embedding generation toggle. Commander parses `--embeddings [limit]` as:
|
|
14
|
+
* - `undefined` when the flag is omitted
|
|
15
|
+
* - `true` when passed without an argument (use default 50K node cap)
|
|
16
|
+
* - a string when passed with an argument (`--embeddings 0` disables the
|
|
17
|
+
* cap, `--embeddings <n>` uses `<n>` as the cap)
|
|
18
|
+
*/
|
|
19
|
+
embeddings?: boolean | string;
|
|
13
20
|
/**
|
|
14
21
|
* Explicitly drop existing embeddings on rebuild instead of preserving
|
|
15
22
|
* them. Without this flag, a routine `analyze` keeps any embeddings
|
package/dist/cli/analyze.js
CHANGED
|
@@ -104,6 +104,22 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
104
104
|
}
|
|
105
105
|
process.env.GITNEXUS_WORKER_SUB_BATCH_TIMEOUT_MS = String(Math.round(workerTimeoutSeconds * 1000));
|
|
106
106
|
}
|
|
107
|
+
// Parse `--embeddings [limit]`: `true` → default cap, string → numeric cap
|
|
108
|
+
// (0 disables the cap entirely). Validated up here so failures match the
|
|
109
|
+
// sibling-validation pattern (exit before bar.start() — otherwise
|
|
110
|
+
// process.exit() leaves the progress bar's hidden cursor uncleared).
|
|
111
|
+
let embeddingsNodeLimit;
|
|
112
|
+
if (typeof options?.embeddings === 'string') {
|
|
113
|
+
const parsed = Number(options.embeddings);
|
|
114
|
+
if (!Number.isInteger(parsed) || parsed < 0) {
|
|
115
|
+
console.error(` --embeddings expects a non-negative integer (got "${options.embeddings}"). ` +
|
|
116
|
+
`Pass 0 to disable the safety cap, or omit the value to keep the default.\n`);
|
|
117
|
+
process.exitCode = 1;
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
embeddingsNodeLimit = parsed;
|
|
121
|
+
}
|
|
122
|
+
const embeddingsEnabled = !!options?.embeddings;
|
|
107
123
|
const setPositiveEnv = (optionName, envName, value) => {
|
|
108
124
|
if (value === undefined)
|
|
109
125
|
return true;
|
|
@@ -231,7 +247,8 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
231
247
|
// needs a fresh pipelineResult. Has no bearing on the registry
|
|
232
248
|
// collision guard (see allowDuplicateName below).
|
|
233
249
|
force: options?.force || options?.skills,
|
|
234
|
-
embeddings:
|
|
250
|
+
embeddings: embeddingsEnabled,
|
|
251
|
+
embeddingsNodeLimit,
|
|
235
252
|
dropEmbeddings: options?.dropEmbeddings,
|
|
236
253
|
skipGit: options?.skipGit,
|
|
237
254
|
skipAgentsMd: options?.skipAgentsMd,
|
package/dist/cli/index.js
CHANGED
|
@@ -17,7 +17,8 @@ program
|
|
|
17
17
|
.command('analyze [path]')
|
|
18
18
|
.description('Index a repository (full analysis)')
|
|
19
19
|
.option('-f, --force', 'Force full re-index even if up to date')
|
|
20
|
-
.option('--embeddings', 'Enable embedding generation for semantic search (off by default)'
|
|
20
|
+
.option('--embeddings [limit]', 'Enable embedding generation for semantic search (off by default). ' +
|
|
21
|
+
'Optional [limit] overrides the 50,000-node safety cap; pass 0 to disable the cap entirely.')
|
|
21
22
|
.option('--drop-embeddings', 'Drop existing embeddings on rebuild. By default, an `analyze` without `--embeddings` ' +
|
|
22
23
|
'preserves any embeddings already present in the index.')
|
|
23
24
|
.option('--skills', 'Generate repo-specific skill files from detected communities')
|
|
@@ -27,4 +27,25 @@ export interface EmbeddingMode {
|
|
|
27
27
|
/** True when we need to load cached embeddings from the existing DB before the rebuild. */
|
|
28
28
|
shouldLoadCache: boolean;
|
|
29
29
|
}
|
|
30
|
+
/** Default safety cap on graph node count for embedding generation. */
|
|
31
|
+
export declare const DEFAULT_EMBEDDING_NODE_LIMIT = 50000;
|
|
32
|
+
export interface EmbeddingCapDecision {
|
|
33
|
+
/** True when the node-count cap blocks generation for this graph. */
|
|
34
|
+
skipForCap: boolean;
|
|
35
|
+
/** True when the user explicitly disabled the cap (`--embeddings 0`). */
|
|
36
|
+
capDisabled: boolean;
|
|
37
|
+
/** Effective node limit applied (`0` means disabled). */
|
|
38
|
+
nodeLimit: number;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Decide whether the node-count safety cap blocks embedding generation.
|
|
42
|
+
*
|
|
43
|
+
* - `embeddingsNodeLimit === undefined` → use {@link DEFAULT_EMBEDDING_NODE_LIMIT}
|
|
44
|
+
* - `embeddingsNodeLimit === 0` → cap disabled, generation always proceeds
|
|
45
|
+
* - any positive integer → custom cap (skip if `nodeCount > limit`)
|
|
46
|
+
*
|
|
47
|
+
* Lives in `embedding-mode.ts` (not `run-analyze.ts`) so the branching
|
|
48
|
+
* contract is unit-testable without spinning up LadybugDB or the pipeline.
|
|
49
|
+
*/
|
|
50
|
+
export declare function deriveEmbeddingCap(nodeCount: number, embeddingsNodeLimit: number | undefined): EmbeddingCapDecision;
|
|
30
51
|
export declare function deriveEmbeddingMode(options: EmbeddingModeInput, existingEmbeddingCount: number): EmbeddingMode;
|
|
@@ -12,6 +12,24 @@
|
|
|
12
12
|
* (default) + existing>0 -> preserve only (load + restore, no generation)
|
|
13
13
|
* any path with existing=0 -> no cache work, no preservation
|
|
14
14
|
*/
|
|
15
|
+
/** Default safety cap on graph node count for embedding generation. */
|
|
16
|
+
export const DEFAULT_EMBEDDING_NODE_LIMIT = 50_000;
|
|
17
|
+
/**
|
|
18
|
+
* Decide whether the node-count safety cap blocks embedding generation.
|
|
19
|
+
*
|
|
20
|
+
* - `embeddingsNodeLimit === undefined` → use {@link DEFAULT_EMBEDDING_NODE_LIMIT}
|
|
21
|
+
* - `embeddingsNodeLimit === 0` → cap disabled, generation always proceeds
|
|
22
|
+
* - any positive integer → custom cap (skip if `nodeCount > limit`)
|
|
23
|
+
*
|
|
24
|
+
* Lives in `embedding-mode.ts` (not `run-analyze.ts`) so the branching
|
|
25
|
+
* contract is unit-testable without spinning up LadybugDB or the pipeline.
|
|
26
|
+
*/
|
|
27
|
+
export function deriveEmbeddingCap(nodeCount, embeddingsNodeLimit) {
|
|
28
|
+
const nodeLimit = embeddingsNodeLimit ?? DEFAULT_EMBEDDING_NODE_LIMIT;
|
|
29
|
+
const capDisabled = nodeLimit === 0;
|
|
30
|
+
const skipForCap = !capDisabled && nodeCount > nodeLimit;
|
|
31
|
+
return { skipForCap, capDisabled, nodeLimit };
|
|
32
|
+
}
|
|
15
33
|
export function deriveEmbeddingMode(options, existingEmbeddingCount) {
|
|
16
34
|
const hasExisting = existingEmbeddingCount > 0;
|
|
17
35
|
const drop = !!options.dropEmbeddings;
|
|
@@ -17,10 +17,10 @@ export function populateGoRangeBindings(parsedFiles, _indexes, ctx) {
|
|
|
17
17
|
const scopeMap = new Map(parsed.scopes.map((s) => [s.id, s]));
|
|
18
18
|
for (const rangeNode of tree.rootNode.descendantsOfType('for_statement')) {
|
|
19
19
|
const rangeClause = rangeNode.namedChildren.find((c) => c.type === 'range_clause');
|
|
20
|
-
if (rangeClause ===
|
|
20
|
+
if (rangeClause === undefined)
|
|
21
21
|
continue;
|
|
22
22
|
const left = rangeClause.namedChildren.find((c) => c.type === 'expression_list');
|
|
23
|
-
if (left ===
|
|
23
|
+
if (left === undefined)
|
|
24
24
|
continue;
|
|
25
25
|
const rangeExpr = rangeClause.namedChildren.find((c, idx) => c.type !== 'expression_list' && idx > rangeClause.namedChildren.indexOf(left));
|
|
26
26
|
if (rangeExpr === undefined)
|
|
@@ -38,7 +38,7 @@ export function synthesizeGoTypeBindings(rootNode) {
|
|
|
38
38
|
const args = expr.childForFieldName('arguments');
|
|
39
39
|
if (fn?.type === 'identifier' && fn.text === 'new' && args !== null) {
|
|
40
40
|
const typeArg = args.namedChildren.find((c) => ['type_identifier', 'qualified_type'].includes(c.type));
|
|
41
|
-
if (typeArg !==
|
|
41
|
+
if (typeArg !== undefined) {
|
|
42
42
|
const typeName = extractSimpleTypeNameText(typeArg);
|
|
43
43
|
const nameNodes = lhs.namedChildren.filter((c) => c.type === 'identifier');
|
|
44
44
|
if (nameNodes.length > 0) {
|
|
@@ -54,11 +54,11 @@ export function synthesizeGoTypeBindings(rootNode) {
|
|
|
54
54
|
const sliceOrMap = args.namedChildren.find((c) =>
|
|
55
55
|
// V1: channel_type not handled — make(chan T) produces no typeBinding.
|
|
56
56
|
['slice_type', 'map_type'].includes(c.type));
|
|
57
|
-
if (sliceOrMap !==
|
|
57
|
+
if (sliceOrMap !== undefined) {
|
|
58
58
|
let typeName = '';
|
|
59
59
|
if (sliceOrMap.type === 'slice_type') {
|
|
60
60
|
const elem = sliceOrMap.namedChildren.find((c) => ['type_identifier', 'qualified_type'].includes(c.type));
|
|
61
|
-
if (elem !==
|
|
61
|
+
if (elem !== undefined)
|
|
62
62
|
typeName = extractSimpleTypeNameText(elem);
|
|
63
63
|
}
|
|
64
64
|
else if (sliceOrMap.type === 'map_type') {
|
|
@@ -21,6 +21,13 @@ export interface AnalyzeOptions {
|
|
|
21
21
|
*/
|
|
22
22
|
force?: boolean;
|
|
23
23
|
embeddings?: boolean;
|
|
24
|
+
/**
|
|
25
|
+
* Override the auto-skip node-count cap for embedding generation.
|
|
26
|
+
* `undefined` (default) keeps the built-in 50,000-node safety limit;
|
|
27
|
+
* `0` disables the cap entirely; any positive integer sets a custom cap.
|
|
28
|
+
* Mapped from the CLI's `--embeddings [limit]` argument.
|
|
29
|
+
*/
|
|
30
|
+
embeddingsNodeLimit?: number;
|
|
24
31
|
/**
|
|
25
32
|
* Explicitly drop any embeddings present in the existing index instead of
|
|
26
33
|
* preserving them. Only meaningful when `embeddings` is false/undefined:
|
|
@@ -66,7 +73,7 @@ export interface AnalyzeResult {
|
|
|
66
73
|
/** The raw pipeline result — only populated when needed by callers (e.g. skill generation). */
|
|
67
74
|
pipelineResult?: any;
|
|
68
75
|
}
|
|
69
|
-
export { deriveEmbeddingMode } from './embedding-mode.js';
|
|
76
|
+
export { deriveEmbeddingMode, DEFAULT_EMBEDDING_NODE_LIMIT } from './embedding-mode.js';
|
|
70
77
|
export type { EmbeddingMode } from './embedding-mode.js';
|
|
71
78
|
export declare const PHASE_LABELS: Record<string, string>;
|
|
72
79
|
/**
|
package/dist/core/run-analyze.js
CHANGED
|
@@ -18,12 +18,10 @@ import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName, resolve
|
|
|
18
18
|
import { generateAIContextFiles } from '../cli/ai-context.js';
|
|
19
19
|
import { EMBEDDING_TABLE_NAME } from './lbug/schema.js';
|
|
20
20
|
import { STALE_HASH_SENTINEL } from './lbug/schema.js';
|
|
21
|
-
/** Threshold: auto-skip embeddings for repos with more nodes than this */
|
|
22
|
-
const EMBEDDING_NODE_LIMIT = 50_000;
|
|
23
21
|
// Re-export the pure flag-derivation helper so external callers (and tests)
|
|
24
22
|
// keep importing from this module's stable surface.
|
|
25
|
-
export { deriveEmbeddingMode } from './embedding-mode.js';
|
|
26
|
-
import { deriveEmbeddingMode as _deriveEmbeddingMode } from './embedding-mode.js';
|
|
23
|
+
export { deriveEmbeddingMode, DEFAULT_EMBEDDING_NODE_LIMIT } from './embedding-mode.js';
|
|
24
|
+
import { deriveEmbeddingMode as _deriveEmbeddingMode, deriveEmbeddingCap, DEFAULT_EMBEDDING_NODE_LIMIT, } from './embedding-mode.js';
|
|
27
25
|
export const PHASE_LABELS = {
|
|
28
26
|
extracting: 'Scanning files',
|
|
29
27
|
structure: 'Building structure',
|
|
@@ -206,8 +204,21 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
206
204
|
let embeddingSkipped = true;
|
|
207
205
|
let semanticMode;
|
|
208
206
|
if (shouldGenerateEmbeddings) {
|
|
209
|
-
|
|
207
|
+
const { skipForCap, capDisabled, nodeLimit } = deriveEmbeddingCap(stats.nodes, options.embeddingsNodeLimit);
|
|
208
|
+
if (!skipForCap) {
|
|
210
209
|
embeddingSkipped = false;
|
|
210
|
+
if (capDisabled && stats.nodes > DEFAULT_EMBEDDING_NODE_LIMIT) {
|
|
211
|
+
log(`Embedding node-count cap disabled — generating embeddings for ` +
|
|
212
|
+
`${stats.nodes.toLocaleString()} nodes. Ensure sufficient memory; ` +
|
|
213
|
+
`the default ${DEFAULT_EMBEDDING_NODE_LIMIT.toLocaleString()}-node ` +
|
|
214
|
+
`cap exists to prevent OOM.`);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
else {
|
|
218
|
+
log(`Embeddings skipped: ${stats.nodes.toLocaleString()} nodes exceeds ` +
|
|
219
|
+
`the ${nodeLimit.toLocaleString()}-node safety cap. ` +
|
|
220
|
+
`Override with \`--embeddings 0\` to disable the cap, or ` +
|
|
221
|
+
`\`--embeddings <n>\` to set a custom cap.`);
|
|
211
222
|
}
|
|
212
223
|
}
|
|
213
224
|
if (!embeddingSkipped) {
|
package/package.json
CHANGED