gitnexus 1.6.3-rc.46 → 1.6.3-rc.48

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,6 +10,12 @@
10
10
  export interface AnalyzeOptions {
11
11
  force?: boolean;
12
12
  embeddings?: boolean;
13
+ /**
14
+ * Explicitly drop existing embeddings on rebuild instead of preserving
15
+ * them. Without this flag, a routine `analyze` keeps any embeddings
16
+ * already present in the index even when `--embeddings` is omitted.
17
+ */
18
+ dropEmbeddings?: boolean;
13
19
  skills?: boolean;
14
20
  verbose?: boolean;
15
21
  /** Skip AGENTS.md and CLAUDE.md gitnexus block updates. */
@@ -159,6 +159,7 @@ export const analyzeCommand = async (inputPath, options) => {
159
159
  // collision guard (see allowDuplicateName below).
160
160
  force: options?.force || options?.skills,
161
161
  embeddings: options?.embeddings,
162
+ dropEmbeddings: options?.dropEmbeddings,
162
163
  skipGit: options?.skipGit,
163
164
  skipAgentsMd: options?.skipAgentsMd,
164
165
  noStats: options?.noStats,
package/dist/cli/index.js CHANGED
@@ -18,6 +18,8 @@ program
18
18
  .description('Index a repository (full analysis)')
19
19
  .option('-f, --force', 'Force full re-index even if up to date')
20
20
  .option('--embeddings', 'Enable embedding generation for semantic search (off by default)')
21
+ .option('--drop-embeddings', 'Drop existing embeddings on rebuild. By default, an `analyze` without `--embeddings` ' +
22
+ 'preserves any embeddings already present in the index.')
21
23
  .option('--skills', 'Generate repo-specific skill files from detected communities')
22
24
  .option('--skip-agents-md', 'Skip updating the gitnexus section in AGENTS.md and CLAUDE.md')
23
25
  .option('--no-stats', 'Omit volatile file/symbol counts from AGENTS.md and CLAUDE.md')
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Pure derivation of the embedding-mode flags for `runFullAnalysis`.
3
+ *
4
+ * Lives in its own module (no native imports) so the branching contract can
5
+ * be unit-tested without spinning up LadybugDB, tree-sitter, or any of the
6
+ * other side-effecting dependencies pulled in by `run-analyze.ts`.
7
+ *
8
+ * Semantics:
9
+ * --drop-embeddings -> wipe (skip cache load entirely)
10
+ * --embeddings -> load cache, restore, then generate
11
+ * --force + existing>0 -> load cache, restore, then generate (regenerate top-up)
12
+ * (default) + existing>0 -> preserve only (load + restore, no generation)
13
+ * any path with existing=0 -> no cache work, no preservation
14
+ */
15
+ export interface EmbeddingModeInput {
16
+ force?: boolean;
17
+ embeddings?: boolean;
18
+ dropEmbeddings?: boolean;
19
+ }
20
+ export interface EmbeddingMode {
21
+ /** True when phase 4 should run the embedding generation pipeline. */
22
+ shouldGenerateEmbeddings: boolean;
23
+ /** True when we should load the cache to re-insert vectors after rebuild without generating new ones. */
24
+ preserveExistingEmbeddings: boolean;
25
+ /** True when `--force` upgraded a default analyze into a regeneration because the repo was already embedded. */
26
+ forceRegenerateEmbeddings: boolean;
27
+ /** True when we need to load cached embeddings from the existing DB before the rebuild. */
28
+ shouldLoadCache: boolean;
29
+ }
30
+ export declare function deriveEmbeddingMode(options: EmbeddingModeInput, existingEmbeddingCount: number): EmbeddingMode;
@@ -0,0 +1,30 @@
1
+ /**
2
+ * Pure derivation of the embedding-mode flags for `runFullAnalysis`.
3
+ *
4
+ * Lives in its own module (no native imports) so the branching contract can
5
+ * be unit-tested without spinning up LadybugDB, tree-sitter, or any of the
6
+ * other side-effecting dependencies pulled in by `run-analyze.ts`.
7
+ *
8
+ * Semantics:
9
+ * --drop-embeddings -> wipe (skip cache load entirely)
10
+ * --embeddings -> load cache, restore, then generate
11
+ * --force + existing>0 -> load cache, restore, then generate (regenerate top-up)
12
+ * (default) + existing>0 -> preserve only (load + restore, no generation)
13
+ * any path with existing=0 -> no cache work, no preservation
14
+ */
15
+ export function deriveEmbeddingMode(options, existingEmbeddingCount) {
16
+ const hasExisting = existingEmbeddingCount > 0;
17
+ const drop = !!options.dropEmbeddings;
18
+ const explicit = !!options.embeddings;
19
+ const force = !!options.force;
20
+ const forceRegenerateEmbeddings = force && !explicit && !drop && hasExisting;
21
+ const preserveExistingEmbeddings = !explicit && !drop && !forceRegenerateEmbeddings && hasExisting;
22
+ const shouldGenerateEmbeddings = explicit || forceRegenerateEmbeddings;
23
+ const shouldLoadCache = !drop && (shouldGenerateEmbeddings || preserveExistingEmbeddings);
24
+ return {
25
+ shouldGenerateEmbeddings,
26
+ preserveExistingEmbeddings,
27
+ forceRegenerateEmbeddings,
28
+ shouldLoadCache,
29
+ };
30
+ }
@@ -21,6 +21,15 @@ export interface AnalyzeOptions {
21
21
  */
22
22
  force?: boolean;
23
23
  embeddings?: boolean;
24
+ /**
25
+ * Explicitly drop any embeddings present in the existing index instead of
26
+ * preserving them. Only meaningful when `embeddings` is false/undefined:
27
+ * the default behavior in that case is to load the previously generated
28
+ * embeddings and re-insert them after the rebuild so a routine
29
+ * re-analyze does not silently wipe a long embedding pass (#issue: analyze
30
+ * silently wipes existing embeddings when run without --embeddings).
31
+ */
32
+ dropEmbeddings?: boolean;
24
33
  skipGit?: boolean;
25
34
  /** Skip AGENTS.md and CLAUDE.md gitnexus block updates. */
26
35
  skipAgentsMd?: boolean;
@@ -57,6 +66,8 @@ export interface AnalyzeResult {
57
66
  /** The raw pipeline result — only populated when needed by callers (e.g. skill generation). */
58
67
  pipelineResult?: any;
59
68
  }
69
+ export { deriveEmbeddingMode } from './embedding-mode.js';
70
+ export type { EmbeddingMode } from './embedding-mode.js';
60
71
  export declare const PHASE_LABELS: Record<string, string>;
61
72
  /**
62
73
  * Run the full GitNexus analysis pipeline.
@@ -19,6 +19,10 @@ import { EMBEDDING_TABLE_NAME } from './lbug/schema.js';
19
19
  import { STALE_HASH_SENTINEL } from './lbug/schema.js';
20
20
  /** Threshold: auto-skip embeddings for repos with more nodes than this */
21
21
  const EMBEDDING_NODE_LIMIT = 50_000;
22
+ // Re-export the pure flag-derivation helper so external callers (and tests)
23
+ // keep importing from this module's stable surface.
24
+ export { deriveEmbeddingMode } from './embedding-mode.js';
25
+ import { deriveEmbeddingMode as _deriveEmbeddingMode } from './embedding-mode.js';
22
26
  export const PHASE_LABELS = {
23
27
  extracting: 'Scanning files',
24
28
  structure: 'Building structure',
@@ -73,9 +77,39 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
73
77
  }
74
78
  }
75
79
  // ── Cache embeddings from existing index before rebuild ────────────
80
+ // Four modes:
81
+ // --embeddings -> load cache, restore, then generate any new ones
82
+ // --force (with existing
83
+ // embeddings) -> auto-imply --embeddings: load cache, restore,
84
+ // regenerate embeddings for new/changed nodes
85
+ // (a forced re-index of an embedded repo
86
+ // shouldn't quietly downgrade to "preserve only")
87
+ // (default) -> if existing index has embeddings, preserve them
88
+ // (load + restore, but do not generate); otherwise no-op
89
+ // --drop-embeddings -> skip cache load entirely; rebuild wipes embeddings
90
+ //
91
+ // The default-preserve branch is what makes a routine `analyze` (e.g. a
92
+ // post-commit hook) safe: a multi-minute embedding pass is no longer
93
+ // silently dropped just because the caller omitted `--embeddings`.
76
94
  let cachedEmbeddingNodeIds = new Set();
77
95
  let cachedEmbeddings = [];
78
- if (options.embeddings && existingMeta && !options.force) {
96
+ const existingEmbeddingCount = existingMeta?.stats?.embeddings ?? 0;
97
+ const { forceRegenerateEmbeddings, preserveExistingEmbeddings, shouldGenerateEmbeddings, shouldLoadCache, } = _deriveEmbeddingMode(options, existingEmbeddingCount);
98
+ if (options.dropEmbeddings && existingEmbeddingCount > 0) {
99
+ log(`Dropping ${existingEmbeddingCount} existing embeddings (--drop-embeddings). ` +
100
+ `Re-run with --embeddings to regenerate.`);
101
+ }
102
+ else if (forceRegenerateEmbeddings) {
103
+ log(`--force on a repo with ${existingEmbeddingCount} existing embeddings: ` +
104
+ `regenerating embeddings for new/changed nodes. ` +
105
+ `Pass --drop-embeddings to wipe them instead.`);
106
+ }
107
+ else if (preserveExistingEmbeddings) {
108
+ log(`Preserving ${existingEmbeddingCount} existing embeddings. ` +
109
+ `Pass --embeddings to also generate embeddings for new/changed nodes, ` +
110
+ `or --drop-embeddings to wipe them.`);
111
+ }
112
+ if (shouldLoadCache && existingMeta) {
79
113
  try {
80
114
  progress('embeddings', 0, 'Caching embeddings...');
81
115
  await initLbug(lbugPath);
@@ -84,7 +118,15 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
84
118
  cachedEmbeddings = cached.embeddings;
85
119
  await closeLbug();
86
120
  }
87
- catch {
121
+ catch (err) {
122
+ // Surface cache-load failures explicitly: silently swallowing here would
123
+ // re-introduce the original silent-data-loss symptom (embeddings end up
124
+ // at 0 in meta.json with no diagnostic) through a different door.
125
+ log(`Warning: could not load cached embeddings ` +
126
+ `(${err?.message ?? String(err)}). ` +
127
+ `Embeddings will not be preserved on this run.`);
128
+ cachedEmbeddingNodeIds = new Set();
129
+ cachedEmbeddings = [];
88
130
  try {
89
131
  await closeLbug();
90
132
  }
@@ -157,7 +199,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
157
199
  // ── Phase 4: Embeddings (90–98%) ──────────────────────────────────
158
200
  const stats = await getLbugStats();
159
201
  let embeddingSkipped = true;
160
- if (options.embeddings) {
202
+ if (shouldGenerateEmbeddings) {
161
203
  if (stats.nodes <= EMBEDDING_NODE_LIMIT) {
162
204
  embeddingSkipped = false;
163
205
  }
@@ -1002,7 +1002,7 @@ export const createServer = async (port, host = '127.0.0.1') => {
1002
1002
  // POST /api/analyze — start a new analysis job
1003
1003
  app.post('/api/analyze', async (req, res) => {
1004
1004
  try {
1005
- const { url: repoUrl, path: repoLocalPath, force, embeddings } = req.body;
1005
+ const { url: repoUrl, path: repoLocalPath, force, embeddings, dropEmbeddings } = req.body;
1006
1006
  // Input type validation
1007
1007
  if (repoUrl !== undefined && typeof repoUrl !== 'string') {
1008
1008
  res.status(400).json({ error: '"url" must be a string' });
@@ -1176,7 +1176,11 @@ export const createServer = async (port, host = '127.0.0.1') => {
1176
1176
  child.send({
1177
1177
  type: 'start',
1178
1178
  repoPath: targetPath,
1179
- options: { force: !!force, embeddings: !!embeddings },
1179
+ options: {
1180
+ force: !!force,
1181
+ embeddings: !!embeddings,
1182
+ dropEmbeddings: !!dropEmbeddings,
1183
+ },
1180
1184
  });
1181
1185
  };
1182
1186
  forkWorker();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.3-rc.46",
3
+ "version": "1.6.3-rc.48",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",
@@ -21,6 +21,7 @@ Run from the project root. This parses all source files, builds the knowledge gr
21
21
  | -------------- | ---------------------------------------------------------------- |
22
22
  | `--force` | Force full re-index even if up to date |
23
23
  | `--embeddings` | Enable embedding generation for semantic search (off by default) |
24
+ | `--drop-embeddings` | Drop existing embeddings on rebuild. By default, an `analyze` without `--embeddings` preserves them. |
24
25
 
25
26
  **When to run:** First time in a project, after major code changes, or when `gitnexus://repo/{name}/context` reports the index is stale. In Claude Code, a PostToolUse hook runs `analyze` automatically after `git commit` and `git merge`, preserving embeddings if previously generated.
26
27