gitnexus 1.6.3-rc.47 → 1.6.3-rc.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/analyze.d.ts +6 -0
- package/dist/cli/analyze.js +1 -0
- package/dist/cli/index.js +2 -0
- package/dist/core/embedding-mode.d.ts +30 -0
- package/dist/core/embedding-mode.js +30 -0
- package/dist/core/run-analyze.d.ts +11 -0
- package/dist/core/run-analyze.js +45 -3
- package/dist/server/api.js +6 -2
- package/package.json +1 -1
- package/skills/gitnexus-cli.md +1 -0
package/dist/cli/analyze.d.ts
CHANGED
|
@@ -10,6 +10,12 @@
|
|
|
10
10
|
export interface AnalyzeOptions {
|
|
11
11
|
force?: boolean;
|
|
12
12
|
embeddings?: boolean;
|
|
13
|
+
/**
|
|
14
|
+
* Explicitly drop existing embeddings on rebuild instead of preserving
|
|
15
|
+
* them. Without this flag, a routine `analyze` keeps any embeddings
|
|
16
|
+
* already present in the index even when `--embeddings` is omitted.
|
|
17
|
+
*/
|
|
18
|
+
dropEmbeddings?: boolean;
|
|
13
19
|
skills?: boolean;
|
|
14
20
|
verbose?: boolean;
|
|
15
21
|
/** Skip AGENTS.md and CLAUDE.md gitnexus block updates. */
|
package/dist/cli/analyze.js
CHANGED
|
@@ -159,6 +159,7 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
159
159
|
// collision guard (see allowDuplicateName below).
|
|
160
160
|
force: options?.force || options?.skills,
|
|
161
161
|
embeddings: options?.embeddings,
|
|
162
|
+
dropEmbeddings: options?.dropEmbeddings,
|
|
162
163
|
skipGit: options?.skipGit,
|
|
163
164
|
skipAgentsMd: options?.skipAgentsMd,
|
|
164
165
|
noStats: options?.noStats,
|
package/dist/cli/index.js
CHANGED
|
@@ -18,6 +18,8 @@ program
|
|
|
18
18
|
.description('Index a repository (full analysis)')
|
|
19
19
|
.option('-f, --force', 'Force full re-index even if up to date')
|
|
20
20
|
.option('--embeddings', 'Enable embedding generation for semantic search (off by default)')
|
|
21
|
+
.option('--drop-embeddings', 'Drop existing embeddings on rebuild. By default, an `analyze` without `--embeddings` ' +
|
|
22
|
+
'preserves any embeddings already present in the index.')
|
|
21
23
|
.option('--skills', 'Generate repo-specific skill files from detected communities')
|
|
22
24
|
.option('--skip-agents-md', 'Skip updating the gitnexus section in AGENTS.md and CLAUDE.md')
|
|
23
25
|
.option('--no-stats', 'Omit volatile file/symbol counts from AGENTS.md and CLAUDE.md')
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure derivation of the embedding-mode flags for `runFullAnalysis`.
|
|
3
|
+
*
|
|
4
|
+
* Lives in its own module (no native imports) so the branching contract can
|
|
5
|
+
* be unit-tested without spinning up LadybugDB, tree-sitter, or any of the
|
|
6
|
+
* other side-effecting dependencies pulled in by `run-analyze.ts`.
|
|
7
|
+
*
|
|
8
|
+
* Semantics:
|
|
9
|
+
* --drop-embeddings -> wipe (skip cache load entirely)
|
|
10
|
+
* --embeddings -> load cache, restore, then generate
|
|
11
|
+
* --force + existing>0 -> load cache, restore, then generate (regenerate top-up)
|
|
12
|
+
* (default) + existing>0 -> preserve only (load + restore, no generation)
|
|
13
|
+
* any path with existing=0 -> no cache work, no preservation
|
|
14
|
+
*/
|
|
15
|
+
export interface EmbeddingModeInput {
|
|
16
|
+
force?: boolean;
|
|
17
|
+
embeddings?: boolean;
|
|
18
|
+
dropEmbeddings?: boolean;
|
|
19
|
+
}
|
|
20
|
+
export interface EmbeddingMode {
|
|
21
|
+
/** True when phase 4 should run the embedding generation pipeline. */
|
|
22
|
+
shouldGenerateEmbeddings: boolean;
|
|
23
|
+
/** True when we should load the cache to re-insert vectors after rebuild without generating new ones. */
|
|
24
|
+
preserveExistingEmbeddings: boolean;
|
|
25
|
+
/** True when `--force` upgraded a default analyze into a regeneration because the repo was already embedded. */
|
|
26
|
+
forceRegenerateEmbeddings: boolean;
|
|
27
|
+
/** True when we need to load cached embeddings from the existing DB before the rebuild. */
|
|
28
|
+
shouldLoadCache: boolean;
|
|
29
|
+
}
|
|
30
|
+
export declare function deriveEmbeddingMode(options: EmbeddingModeInput, existingEmbeddingCount: number): EmbeddingMode;
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Pure derivation of the embedding-mode flags for `runFullAnalysis`.
|
|
3
|
+
*
|
|
4
|
+
* Lives in its own module (no native imports) so the branching contract can
|
|
5
|
+
* be unit-tested without spinning up LadybugDB, tree-sitter, or any of the
|
|
6
|
+
* other side-effecting dependencies pulled in by `run-analyze.ts`.
|
|
7
|
+
*
|
|
8
|
+
* Semantics:
|
|
9
|
+
* --drop-embeddings -> wipe (skip cache load entirely)
|
|
10
|
+
* --embeddings -> load cache, restore, then generate
|
|
11
|
+
* --force + existing>0 -> load cache, restore, then generate (regenerate top-up)
|
|
12
|
+
* (default) + existing>0 -> preserve only (load + restore, no generation)
|
|
13
|
+
* any path with existing=0 -> no cache work, no preservation
|
|
14
|
+
*/
|
|
15
|
+
export function deriveEmbeddingMode(options, existingEmbeddingCount) {
|
|
16
|
+
const hasExisting = existingEmbeddingCount > 0;
|
|
17
|
+
const drop = !!options.dropEmbeddings;
|
|
18
|
+
const explicit = !!options.embeddings;
|
|
19
|
+
const force = !!options.force;
|
|
20
|
+
const forceRegenerateEmbeddings = force && !explicit && !drop && hasExisting;
|
|
21
|
+
const preserveExistingEmbeddings = !explicit && !drop && !forceRegenerateEmbeddings && hasExisting;
|
|
22
|
+
const shouldGenerateEmbeddings = explicit || forceRegenerateEmbeddings;
|
|
23
|
+
const shouldLoadCache = !drop && (shouldGenerateEmbeddings || preserveExistingEmbeddings);
|
|
24
|
+
return {
|
|
25
|
+
shouldGenerateEmbeddings,
|
|
26
|
+
preserveExistingEmbeddings,
|
|
27
|
+
forceRegenerateEmbeddings,
|
|
28
|
+
shouldLoadCache,
|
|
29
|
+
};
|
|
30
|
+
}
|
|
@@ -21,6 +21,15 @@ export interface AnalyzeOptions {
|
|
|
21
21
|
*/
|
|
22
22
|
force?: boolean;
|
|
23
23
|
embeddings?: boolean;
|
|
24
|
+
/**
|
|
25
|
+
* Explicitly drop any embeddings present in the existing index instead of
|
|
26
|
+
* preserving them. Only meaningful when `embeddings` is false/undefined:
|
|
27
|
+
* the default behavior in that case is to load the previously generated
|
|
28
|
+
* embeddings and re-insert them after the rebuild so a routine
|
|
29
|
+
* re-analyze does not silently wipe a long embedding pass (#issue: analyze
|
|
30
|
+
* silently wipes existing embeddings when run without --embeddings).
|
|
31
|
+
*/
|
|
32
|
+
dropEmbeddings?: boolean;
|
|
24
33
|
skipGit?: boolean;
|
|
25
34
|
/** Skip AGENTS.md and CLAUDE.md gitnexus block updates. */
|
|
26
35
|
skipAgentsMd?: boolean;
|
|
@@ -57,6 +66,8 @@ export interface AnalyzeResult {
|
|
|
57
66
|
/** The raw pipeline result — only populated when needed by callers (e.g. skill generation). */
|
|
58
67
|
pipelineResult?: any;
|
|
59
68
|
}
|
|
69
|
+
export { deriveEmbeddingMode } from './embedding-mode.js';
|
|
70
|
+
export type { EmbeddingMode } from './embedding-mode.js';
|
|
60
71
|
export declare const PHASE_LABELS: Record<string, string>;
|
|
61
72
|
/**
|
|
62
73
|
* Run the full GitNexus analysis pipeline.
|
package/dist/core/run-analyze.js
CHANGED
|
@@ -19,6 +19,10 @@ import { EMBEDDING_TABLE_NAME } from './lbug/schema.js';
|
|
|
19
19
|
import { STALE_HASH_SENTINEL } from './lbug/schema.js';
|
|
20
20
|
/** Threshold: auto-skip embeddings for repos with more nodes than this */
|
|
21
21
|
const EMBEDDING_NODE_LIMIT = 50_000;
|
|
22
|
+
// Re-export the pure flag-derivation helper so external callers (and tests)
|
|
23
|
+
// keep importing from this module's stable surface.
|
|
24
|
+
export { deriveEmbeddingMode } from './embedding-mode.js';
|
|
25
|
+
import { deriveEmbeddingMode as _deriveEmbeddingMode } from './embedding-mode.js';
|
|
22
26
|
export const PHASE_LABELS = {
|
|
23
27
|
extracting: 'Scanning files',
|
|
24
28
|
structure: 'Building structure',
|
|
@@ -73,9 +77,39 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
73
77
|
}
|
|
74
78
|
}
|
|
75
79
|
// ── Cache embeddings from existing index before rebuild ────────────
|
|
80
|
+
// Four modes:
|
|
81
|
+
// --embeddings -> load cache, restore, then generate any new ones
|
|
82
|
+
// --force (with existing
|
|
83
|
+
// embeddings) -> auto-imply --embeddings: load cache, restore,
|
|
84
|
+
// regenerate embeddings for new/changed nodes
|
|
85
|
+
// (a forced re-index of an embedded repo
|
|
86
|
+
// shouldn't quietly downgrade to "preserve only")
|
|
87
|
+
// (default) -> if existing index has embeddings, preserve them
|
|
88
|
+
// (load + restore, but do not generate); otherwise no-op
|
|
89
|
+
// --drop-embeddings -> skip cache load entirely; rebuild wipes embeddings
|
|
90
|
+
//
|
|
91
|
+
// The default-preserve branch is what makes a routine `analyze` (e.g. a
|
|
92
|
+
// post-commit hook) safe: a multi-minute embedding pass is no longer
|
|
93
|
+
// silently dropped just because the caller omitted `--embeddings`.
|
|
76
94
|
let cachedEmbeddingNodeIds = new Set();
|
|
77
95
|
let cachedEmbeddings = [];
|
|
78
|
-
|
|
96
|
+
const existingEmbeddingCount = existingMeta?.stats?.embeddings ?? 0;
|
|
97
|
+
const { forceRegenerateEmbeddings, preserveExistingEmbeddings, shouldGenerateEmbeddings, shouldLoadCache, } = _deriveEmbeddingMode(options, existingEmbeddingCount);
|
|
98
|
+
if (options.dropEmbeddings && existingEmbeddingCount > 0) {
|
|
99
|
+
log(`Dropping ${existingEmbeddingCount} existing embeddings (--drop-embeddings). ` +
|
|
100
|
+
`Re-run with --embeddings to regenerate.`);
|
|
101
|
+
}
|
|
102
|
+
else if (forceRegenerateEmbeddings) {
|
|
103
|
+
log(`--force on a repo with ${existingEmbeddingCount} existing embeddings: ` +
|
|
104
|
+
`regenerating embeddings for new/changed nodes. ` +
|
|
105
|
+
`Pass --drop-embeddings to wipe them instead.`);
|
|
106
|
+
}
|
|
107
|
+
else if (preserveExistingEmbeddings) {
|
|
108
|
+
log(`Preserving ${existingEmbeddingCount} existing embeddings. ` +
|
|
109
|
+
`Pass --embeddings to also generate embeddings for new/changed nodes, ` +
|
|
110
|
+
`or --drop-embeddings to wipe them.`);
|
|
111
|
+
}
|
|
112
|
+
if (shouldLoadCache && existingMeta) {
|
|
79
113
|
try {
|
|
80
114
|
progress('embeddings', 0, 'Caching embeddings...');
|
|
81
115
|
await initLbug(lbugPath);
|
|
@@ -84,7 +118,15 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
84
118
|
cachedEmbeddings = cached.embeddings;
|
|
85
119
|
await closeLbug();
|
|
86
120
|
}
|
|
87
|
-
catch {
|
|
121
|
+
catch (err) {
|
|
122
|
+
// Surface cache-load failures explicitly: silently swallowing here would
|
|
123
|
+
// re-introduce the original silent-data-loss symptom (embeddings end up
|
|
124
|
+
// at 0 in meta.json with no diagnostic) through a different door.
|
|
125
|
+
log(`Warning: could not load cached embeddings ` +
|
|
126
|
+
`(${err?.message ?? String(err)}). ` +
|
|
127
|
+
`Embeddings will not be preserved on this run.`);
|
|
128
|
+
cachedEmbeddingNodeIds = new Set();
|
|
129
|
+
cachedEmbeddings = [];
|
|
88
130
|
try {
|
|
89
131
|
await closeLbug();
|
|
90
132
|
}
|
|
@@ -157,7 +199,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
|
|
|
157
199
|
// ── Phase 4: Embeddings (90–98%) ──────────────────────────────────
|
|
158
200
|
const stats = await getLbugStats();
|
|
159
201
|
let embeddingSkipped = true;
|
|
160
|
-
if (
|
|
202
|
+
if (shouldGenerateEmbeddings) {
|
|
161
203
|
if (stats.nodes <= EMBEDDING_NODE_LIMIT) {
|
|
162
204
|
embeddingSkipped = false;
|
|
163
205
|
}
|
package/dist/server/api.js
CHANGED
|
@@ -1002,7 +1002,7 @@ export const createServer = async (port, host = '127.0.0.1') => {
|
|
|
1002
1002
|
// POST /api/analyze — start a new analysis job
|
|
1003
1003
|
app.post('/api/analyze', async (req, res) => {
|
|
1004
1004
|
try {
|
|
1005
|
-
const { url: repoUrl, path: repoLocalPath, force, embeddings } = req.body;
|
|
1005
|
+
const { url: repoUrl, path: repoLocalPath, force, embeddings, dropEmbeddings } = req.body;
|
|
1006
1006
|
// Input type validation
|
|
1007
1007
|
if (repoUrl !== undefined && typeof repoUrl !== 'string') {
|
|
1008
1008
|
res.status(400).json({ error: '"url" must be a string' });
|
|
@@ -1176,7 +1176,11 @@ export const createServer = async (port, host = '127.0.0.1') => {
|
|
|
1176
1176
|
child.send({
|
|
1177
1177
|
type: 'start',
|
|
1178
1178
|
repoPath: targetPath,
|
|
1179
|
-
options: {
|
|
1179
|
+
options: {
|
|
1180
|
+
force: !!force,
|
|
1181
|
+
embeddings: !!embeddings,
|
|
1182
|
+
dropEmbeddings: !!dropEmbeddings,
|
|
1183
|
+
},
|
|
1180
1184
|
});
|
|
1181
1185
|
};
|
|
1182
1186
|
forkWorker();
|
package/package.json
CHANGED
package/skills/gitnexus-cli.md
CHANGED
|
@@ -21,6 +21,7 @@ Run from the project root. This parses all source files, builds the knowledge gr
|
|
|
21
21
|
| -------------- | ---------------------------------------------------------------- |
|
|
22
22
|
| `--force` | Force full re-index even if up to date |
|
|
23
23
|
| `--embeddings` | Enable embedding generation for semantic search (off by default) |
|
|
24
|
+
| `--drop-embeddings` | Drop existing embeddings on rebuild. By default, an `analyze` without `--embeddings` preserves them. |
|
|
24
25
|
|
|
25
26
|
**When to run:** First time in a project, after major code changes, or when `gitnexus://repo/{name}/context` reports the index is stale. In Claude Code, a PostToolUse hook runs `analyze` automatically after `git commit` and `git merge`, preserving embeddings if previously generated.
|
|
26
27
|
|