gitnexus 1.6.4-rc.73 → 1.6.4-rc.75
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/analyze.d.ts +8 -1
- package/dist/cli/analyze.js +18 -1
- package/dist/cli/index.js +2 -1
- package/dist/core/embedding-mode.d.ts +21 -0
- package/dist/core/embedding-mode.js +18 -0
- package/dist/core/run-analyze.d.ts +8 -1
- package/dist/core/run-analyze.js +16 -5
- package/dist/server/validation.d.ts +3 -0
- package/dist/server/validation.js +8 -2
- package/package.json +1 -1
package/dist/cli/analyze.d.ts
CHANGED
|
@@ -9,7 +9,14 @@
|
|
|
9
9
|
*/
|
|
10
10
|
export interface AnalyzeOptions {
|
|
11
11
|
force?: boolean;
|
|
12
|
-
|
|
12
|
+
/**
|
|
13
|
+
* Embedding generation toggle. Commander parses `--embeddings [limit]` as:
|
|
14
|
+
* - `undefined` when the flag is omitted
|
|
15
|
+
* - `true` when passed without an argument (use default 50K node cap)
|
|
16
|
+
* - a string when passed with an argument (`--embeddings 0` disables the
|
|
17
|
+
* cap, `--embeddings <n>` uses `<n>` as the cap)
|
|
18
|
+
*/
|
|
19
|
+
embeddings?: boolean | string;
|
|
13
20
|
/**
|
|
14
21
|
* Explicitly drop existing embeddings on rebuild instead of preserving
|
|
15
22
|
* them. Without this flag, a routine `analyze` keeps any embeddings
|
package/dist/cli/analyze.js
CHANGED
|
@@ -104,6 +104,22 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
104
104
|
}
|
|
105
105
|
process.env.GITNEXUS_WORKER_SUB_BATCH_TIMEOUT_MS = String(Math.round(workerTimeoutSeconds * 1000));
|
|
106
106
|
}
|
|
107
|
+
// Parse `--embeddings [limit]`: `true` → default cap, string → numeric cap
|
|
108
|
+
// (0 disables the cap entirely). Validated up here so failures match the
|
|
109
|
+
// sibling-validation pattern (exit before bar.start() — otherwise
|
|
110
|
+
// process.exit() leaves the progress bar's hidden cursor uncleared).
|
|
111
|
+
let embeddingsNodeLimit;
|
|
112
|
+
if (typeof options?.embeddings === 'string') {
|
|
113
|
+
const parsed = Number(options.embeddings);
|
|
114
|
+
if (!Number.isInteger(parsed) || parsed < 0) {
|
|
115
|
+
console.error(` --embeddings expects a non-negative integer (got "${options.embeddings}"). ` +
|
|
116
|
+
`Pass 0 to disable the safety cap, or omit the value to keep the default.\n`);
|
|
117
|
+
process.exitCode = 1;
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
embeddingsNodeLimit = parsed;
|
|
121
|
+
}
|
|
122
|
+
const embeddingsEnabled = !!options?.embeddings;
|
|
107
123
|
const setPositiveEnv = (optionName, envName, value) => {
|
|
108
124
|
if (value === undefined)
|
|
109
125
|
return true;
|
|
@@ -231,7 +247,8 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
231
247
|
// needs a fresh pipelineResult. Has no bearing on the registry
|
|
232
248
|
// collision guard (see allowDuplicateName below).
|
|
233
249
|
force: options?.force || options?.skills,
|
|
234
|
-
embeddings:
|
|
250
|
+
embeddings: embeddingsEnabled,
|
|
251
|
+
embeddingsNodeLimit,
|
|
235
252
|
dropEmbeddings: options?.dropEmbeddings,
|
|
236
253
|
skipGit: options?.skipGit,
|
|
237
254
|
skipAgentsMd: options?.skipAgentsMd,
|
package/dist/cli/index.js
CHANGED
|
@@ -17,7 +17,8 @@ program
|
|
|
17
17
|
.command('analyze [path]')
|
|
18
18
|
.description('Index a repository (full analysis)')
|
|
19
19
|
.option('-f, --force', 'Force full re-index even if up to date')
|
|
20
|
-
.option('--embeddings', 'Enable embedding generation for semantic search (off by default)'
|
|
20
|
+
.option('--embeddings [limit]', 'Enable embedding generation for semantic search (off by default). ' +
|
|
21
|
+
'Optional [limit] overrides the 50,000-node safety cap; pass 0 to disable the cap entirely.')
|
|
21
22
|
.option('--drop-embeddings', 'Drop existing embeddings on rebuild. By default, an `analyze` without `--embeddings` ' +
|
|
22
23
|
'preserves any embeddings already present in the index.')
|
|
23
24
|
.option('--skills', 'Generate repo-specific skill files from detected communities')
|
|
@@ -27,4 +27,25 @@ export interface EmbeddingMode {
|
|
|
27
27
|
/** True when we need to load cached embeddings from the existing DB before the rebuild. */
|
|
28
28
|
shouldLoadCache: boolean;
|
|
29
29
|
}
|
|
30
|
+
/** Default safety cap on graph node count for embedding generation. */
|
|
31
|
+
export declare const DEFAULT_EMBEDDING_NODE_LIMIT = 50000;
|
|
32
|
+
export interface EmbeddingCapDecision {
|
|
33
|
+
/** True when the node-count cap blocks generation for this graph. */
|
|
34
|
+
skipForCap: boolean;
|
|
35
|
+
/** True when the user explicitly disabled the cap (`--embeddings 0`). */
|
|
36
|
+
capDisabled: boolean;
|
|
37
|
+
/** Effective node limit applied (`0` means disabled). */
|
|
38
|
+
nodeLimit: number;
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Decide whether the node-count safety cap blocks embedding generation.
|
|
42
|
+
*
|
|
43
|
+
* - `embeddingsNodeLimit === undefined` → use {@link DEFAULT_EMBEDDING_NODE_LIMIT}
|
|
44
|
+
* - `embeddingsNodeLimit === 0` → cap disabled, generation always proceeds
|
|
45
|
+
* - any positive integer → custom cap (skip if `nodeCount > limit`)
|
|
46
|
+
*
|
|
47
|
+
* Lives in `embedding-mode.ts` (not `run-analyze.ts`) so the branching
|
|
48
|
+
* contract is unit-testable without spinning up LadybugDB or the pipeline.
|
|
49
|
+
*/
|
|
50
|
+
export declare function deriveEmbeddingCap(nodeCount: number, embeddingsNodeLimit: number | undefined): EmbeddingCapDecision;
|
|
30
51
|
export declare function deriveEmbeddingMode(options: EmbeddingModeInput, existingEmbeddingCount: number): EmbeddingMode;
|
|
@@ -12,6 +12,24 @@
|
|
|
12
12
|
* (default) + existing>0 -> preserve only (load + restore, no generation)
|
|
13
13
|
* any path with existing=0 -> no cache work, no preservation
|
|
14
14
|
*/
|
|
15
|
+
/** Default safety cap on graph node count for embedding generation. */
|
|
16
|
+
export const DEFAULT_EMBEDDING_NODE_LIMIT = 50_000;
|
|
17
|
+
/**
|
|
18
|
+
* Decide whether the node-count safety cap blocks embedding generation.
|
|
19
|
+
*
|
|
20
|
+
* - `embeddingsNodeLimit === undefined` → use {@link DEFAULT_EMBEDDING_NODE_LIMIT}
|
|
21
|
+
* - `embeddingsNodeLimit === 0` → cap disabled, generation always proceeds
|
|
22
|
+
* - any positive integer → custom cap (skip if `nodeCount > limit`)
|
|
23
|
+
*
|
|
24
|
+
* Lives in `embedding-mode.ts` (not `run-analyze.ts`) so the branching
|
|
25
|
+
* contract is unit-testable without spinning up LadybugDB or the pipeline.
|
|
26
|
+
*/
|
|
27
|
+
export function deriveEmbeddingCap(nodeCount, embeddingsNodeLimit) {
|
|
28
|
+
const nodeLimit = embeddingsNodeLimit ?? DEFAULT_EMBEDDING_NODE_LIMIT;
|
|
29
|
+
const capDisabled = nodeLimit === 0;
|
|
30
|
+
const skipForCap = !capDisabled && nodeCount > nodeLimit;
|
|
31
|
+
return { skipForCap, capDisabled, nodeLimit };
|
|
32
|
+
}
|
|
15
33
|
export function deriveEmbeddingMode(options, existingEmbeddingCount) {
|
|
16
34
|
const hasExisting = existingEmbeddingCount > 0;
|
|
17
35
|
const drop = !!options.dropEmbeddings;
|
|
@@ -21,6 +21,13 @@ export interface AnalyzeOptions {
|
|
|
21
21
|
*/
|
|
22
22
|
force?: boolean;
|
|
23
23
|
embeddings?: boolean;
|
|
24
|
+
/**
|
|
25
|
+
* Override the auto-skip node-count cap for embedding generation.
|
|
26
|
+
* `undefined` (default) keeps the built-in 50,000-node safety limit;
|
|
27
|
+
* `0` disables the cap entirely; any positive integer sets a custom cap.
|
|
28
|
+
* Mapped from the CLI's `--embeddings [limit]` argument.
|
|
29
|
+
*/
|
|
30
|
+
embeddingsNodeLimit?: number;
|
|
24
31
|
/**
|
|
25
32
|
* Explicitly drop any embeddings present in the existing index instead of
|
|
26
33
|
* preserving them. Only meaningful when `embeddings` is false/undefined:
|
|
@@ -66,7 +73,7 @@ export interface AnalyzeResult {
|
|
|
66
73
|
/** The raw pipeline result — only populated when needed by callers (e.g. skill generation). */
|
|
67
74
|
pipelineResult?: any;
|
|
68
75
|
}
|
|
69
|
-
export { deriveEmbeddingMode } from './embedding-mode.js';
|
|
76
|
+
export { deriveEmbeddingMode, DEFAULT_EMBEDDING_NODE_LIMIT } from './embedding-mode.js';
|
|
70
77
|
export type { EmbeddingMode } from './embedding-mode.js';
|
|
71
78
|
export declare const PHASE_LABELS: Record<string, string>;
|
|
72
79
|
/**
|
package/dist/core/run-analyze.js
CHANGED
|
@@ -18,12 +18,10 @@ import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName, resolve
|
|
|
18
18
|
import { generateAIContextFiles } from '../cli/ai-context.js';
|
|
19
19
|
import { EMBEDDING_TABLE_NAME } from './lbug/schema.js';
|
|
20
20
|
import { STALE_HASH_SENTINEL } from './lbug/schema.js';
|
|
21
|
-
/** Threshold: auto-skip embeddings for repos with more nodes than this */
|
|
22
|
-
const EMBEDDING_NODE_LIMIT = 50_000;
|
|
23
21
|
// Re-export the pure flag-derivation helper so external callers (and tests)
|
|
24
22
|
// keep importing from this module's stable surface.
|
|
25
|
-
export { deriveEmbeddingMode } from './embedding-mode.js';
|
|
26
|
-
import { deriveEmbeddingMode as _deriveEmbeddingMode } from './embedding-mode.js';
|
|
23
|
+
export { deriveEmbeddingMode, DEFAULT_EMBEDDING_NODE_LIMIT } from './embedding-mode.js';
|
|
24
|
+
import { deriveEmbeddingMode as _deriveEmbeddingMode, deriveEmbeddingCap, DEFAULT_EMBEDDING_NODE_LIMIT, } from './embedding-mode.js';
|
|
27
25
|
export const PHASE_LABELS = {
|
|
28
26
|
extracting: 'Scanning files',
|
|
29
27
|
structure: 'Building structure',
|
|
@@ -206,8 +204,21 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
206
204
|
let embeddingSkipped = true;
|
|
207
205
|
let semanticMode;
|
|
208
206
|
if (shouldGenerateEmbeddings) {
|
|
209
|
-
|
|
207
|
+
const { skipForCap, capDisabled, nodeLimit } = deriveEmbeddingCap(stats.nodes, options.embeddingsNodeLimit);
|
|
208
|
+
if (!skipForCap) {
|
|
210
209
|
embeddingSkipped = false;
|
|
210
|
+
if (capDisabled && stats.nodes > DEFAULT_EMBEDDING_NODE_LIMIT) {
|
|
211
|
+
log(`Embedding node-count cap disabled — generating embeddings for ` +
|
|
212
|
+
`${stats.nodes.toLocaleString()} nodes. Ensure sufficient memory; ` +
|
|
213
|
+
`the default ${DEFAULT_EMBEDDING_NODE_LIMIT.toLocaleString()}-node ` +
|
|
214
|
+
`cap exists to prevent OOM.`);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
else {
|
|
218
|
+
log(`Embeddings skipped: ${stats.nodes.toLocaleString()} nodes exceeds ` +
|
|
219
|
+
`the ${nodeLimit.toLocaleString()}-node safety cap. ` +
|
|
220
|
+
`Override with \`--embeddings 0\` to disable the cap, or ` +
|
|
221
|
+
`\`--embeddings <n>\` to set a custom cap.`);
|
|
211
222
|
}
|
|
212
223
|
}
|
|
213
224
|
if (!embeddingSkipped) {
|
|
@@ -86,6 +86,9 @@ export interface RouteLimiterOverrides {
|
|
|
86
86
|
* - keyGenerator: req.ip with a socket.remoteAddress fallback so abruptly
|
|
87
87
|
* closed connections do not trigger ERR_ERL_UNDEFINED_IP_ADDRESS
|
|
88
88
|
* (which would 500 the request via Express's default error handler).
|
|
89
|
+
* The IP is passed through `ipKeyGenerator` so IPv6 addresses are
|
|
90
|
+
* normalised to their /56 subnet — without this, each IPv6 address
|
|
91
|
+
* gets its own counter and the limit is trivially bypassed (#1360).
|
|
89
92
|
* Caller must wire `app.set('trust proxy', ...)` correctly — see
|
|
90
93
|
* createServer in api.ts.
|
|
91
94
|
*
|
|
@@ -18,7 +18,7 @@
|
|
|
18
18
|
* in this module too but are introduced with the dependency they require.
|
|
19
19
|
*/
|
|
20
20
|
import path from 'node:path';
|
|
21
|
-
import rateLimit from 'express-rate-limit';
|
|
21
|
+
import rateLimit, { ipKeyGenerator } from 'express-rate-limit';
|
|
22
22
|
/**
|
|
23
23
|
* Thrown by validation helpers when user input is rejected.
|
|
24
24
|
* Routes catch via existing try/catch and convert with err.status / err.message.
|
|
@@ -116,6 +116,9 @@ const DEFAULT_RATE_LIMIT_RPM = 60;
|
|
|
116
116
|
* - keyGenerator: req.ip with a socket.remoteAddress fallback so abruptly
|
|
117
117
|
* closed connections do not trigger ERR_ERL_UNDEFINED_IP_ADDRESS
|
|
118
118
|
* (which would 500 the request via Express's default error handler).
|
|
119
|
+
* The IP is passed through `ipKeyGenerator` so IPv6 addresses are
|
|
120
|
+
* normalised to their /56 subnet — without this, each IPv6 address
|
|
121
|
+
* gets its own counter and the limit is trivially bypassed (#1360).
|
|
119
122
|
* Caller must wire `app.set('trust proxy', ...)` correctly — see
|
|
120
123
|
* createServer in api.ts.
|
|
121
124
|
*
|
|
@@ -129,7 +132,10 @@ export function createRouteLimiter(opts) {
|
|
|
129
132
|
standardHeaders: 'draft-7',
|
|
130
133
|
legacyHeaders: false,
|
|
131
134
|
passOnStoreError: true,
|
|
132
|
-
keyGenerator: (req) =>
|
|
135
|
+
keyGenerator: (req) => {
|
|
136
|
+
const ip = req.ip ?? req.socket?.remoteAddress;
|
|
137
|
+
return ip ? ipKeyGenerator(ip) : 'unknown';
|
|
138
|
+
},
|
|
133
139
|
message: { error: 'Too many requests, please try again later.' },
|
|
134
140
|
...opts,
|
|
135
141
|
});
|
package/package.json
CHANGED