gitnexus 1.6.4-rc.37 → 1.6.4-rc.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@
10
10
  */
11
11
  import path from 'path';
12
12
  import fs from 'fs/promises';
13
- import { getStoragePaths, loadMeta, addToGitignore, registerRepo, } from '../storage/repo-manager.js';
13
+ import { getStoragePaths, loadMeta, ensureGitNexusIgnored, registerRepo, } from '../storage/repo-manager.js';
14
14
  import { getGitRoot, getRemoteUrl, isGitRepo } from '../storage/git.js';
15
15
  export const indexCommand = async (inputPathParts, options) => {
16
16
  console.log('\n GitNexus Index\n');
@@ -96,7 +96,7 @@ export const indexCommand = async (inputPathParts, options) => {
96
96
  meta.remoteUrl = getRemoteUrl(repoPath);
97
97
  }
98
98
  await registerRepo(repoPath, meta);
99
- await addToGitignore(repoPath);
99
+ await ensureGitNexusIgnored(repoPath);
100
100
  const projectName = path.basename(repoPath);
101
101
  const { stats } = meta;
102
102
  console.log(` Repository registered: ${projectName}`);
@@ -1,6 +1,7 @@
1
1
  import * as path from 'node:path';
2
2
  import { glob } from 'glob';
3
3
  import Parser from 'tree-sitter';
4
+ import { createIgnoreFilter } from '../../../config/ignore-service.js';
4
5
  import { readSafe } from './fs-utils.js';
5
6
  import { GRPC_SCAN_GLOB, getPluginForFile, hasProtoPlugin, } from './grpc-patterns/index.js';
6
7
  /**
@@ -185,11 +186,16 @@ function longestSharedSegmentRun(aPath, bPath) {
185
186
  }
186
187
  async function buildProtoContext(repoPath) {
187
188
  const servicesByName = new Map();
189
+ // `.gitnexusignore` / `.gitignore` honoured via the shared IgnoreService —
190
+ // see `filesystem-walker.ts` for the canonical pattern. Replaces a
191
+ // hardcoded `[node_modules, .git, vendor]` array; those names plus the
192
+ // rest of `DEFAULT_IGNORE_LIST` are still excluded by default (#1185).
193
+ const protoIgnoreFilter = await createIgnoreFilter(repoPath);
188
194
  const protoFiles = await glob('**/*.proto', {
189
195
  cwd: repoPath,
190
196
  absolute: false,
191
197
  nodir: true,
192
- ignore: ['**/node_modules/**', '**/.git/**', '**/vendor/**'],
198
+ ignore: protoIgnoreFilter,
193
199
  });
194
200
  const contents = new Map();
195
201
  for (const rel of protoFiles) {
@@ -324,9 +330,14 @@ export class GrpcExtractor {
324
330
  }
325
331
  }
326
332
  // ─── Source files (+ .proto when plugin available) ────────────
333
+ // Honour `.gitnexusignore` / `.gitignore` via the shared IgnoreService —
334
+ // mirrors `filesystem-walker.ts`. Replaces a hardcoded
335
+ // `[node_modules, .git, vendor, dist, build]` array; those names are all
336
+ // in `DEFAULT_IGNORE_LIST`, so default behaviour is preserved (#1185).
337
+ const sourceIgnoreFilter = await createIgnoreFilter(repoPath);
327
338
  const sourceFiles = await glob(GRPC_SCAN_GLOB, {
328
339
  cwd: repoPath,
329
- ignore: ['**/node_modules/**', '**/.git/**', '**/vendor/**', '**/dist/**', '**/build/**'],
340
+ ignore: sourceIgnoreFilter,
330
341
  nodir: true,
331
342
  });
332
343
  const parser = new Parser();
@@ -1,6 +1,7 @@
1
1
  import * as path from 'node:path';
2
2
  import { glob } from 'glob';
3
3
  import Parser from 'tree-sitter';
4
+ import { createIgnoreFilter } from '../../../config/ignore-service.js';
4
5
  import { readSafe } from './fs-utils.js';
5
6
  import { getPluginForFile, HTTP_SCAN_GLOB } from './http-patterns/index.js';
6
7
  /**
@@ -183,9 +184,16 @@ export class HttpRouteExtractor {
183
184
  return [...providers, ...consumers];
184
185
  }
185
186
  async scanFiles(repoPath) {
187
+ // Honour `.gitnexusignore` and `.gitignore` via the shared IgnoreService
188
+ // so contract extraction respects the same exclusion rules as the rest of
189
+ // the ingestion pipeline. Mirrors `filesystem-walker.ts` which uses the
190
+ // same shape. Replaces a hardcoded `[node_modules, .git, dist, build,
191
+ // vendor]` array — those names are still in `DEFAULT_IGNORE_LIST`, so
192
+ // default behaviour is preserved (#1185).
193
+ const ignoreFilter = await createIgnoreFilter(repoPath);
186
194
  return glob(HTTP_SCAN_GLOB, {
187
195
  cwd: repoPath,
188
- ignore: ['**/node_modules/**', '**/.git/**', '**/dist/**', '**/build/**', '**/vendor/**'],
196
+ ignore: ignoreFilter,
189
197
  nodir: true,
190
198
  });
191
199
  }
@@ -1,5 +1,6 @@
1
1
  import { glob } from 'glob';
2
2
  import Parser from 'tree-sitter';
3
+ import { createIgnoreFilter } from '../../../config/ignore-service.js';
3
4
  import { readSafe } from './fs-utils.js';
4
5
  import { scanFile, unquoteLiteral } from './tree-sitter-scanner.js';
5
6
  import { TOPIC_SCAN_GLOB, getProviderForFile, } from './topic-patterns/index.js';
@@ -40,22 +41,21 @@ export class TopicExtractor {
40
41
  return true;
41
42
  }
42
43
  async extract(_dbExecutor, repoPath, _repo) {
44
+ // Honour `.gitnexusignore` / `.gitignore` via the shared IgnoreService —
45
+ // mirrors `filesystem-walker.ts`. The 5-name hardcoded list
46
+ // (`node_modules, .git, vendor, dist, build`) is preserved because every
47
+ // entry is in `DEFAULT_IGNORE_LIST`, so default behaviour is unchanged
48
+ // (#1185). The Go-specific `**/*_test.go` filter is layered on top via a
49
+ // small wrapper so glob-level pruning is preserved (we never read those
50
+ // files); the wrapper short-circuits before calling the base filter.
51
+ const baseFilter = await createIgnoreFilter(repoPath);
52
+ const ignoreFilter = {
53
+ ignored: (p) => p.relative().endsWith('_test.go') || baseFilter.ignored(p),
54
+ childrenIgnored: (p) => baseFilter.childrenIgnored(p),
55
+ };
43
56
  const files = await glob(TOPIC_SCAN_GLOB, {
44
57
  cwd: repoPath,
45
- ignore: [
46
- '**/node_modules/**',
47
- '**/.git/**',
48
- '**/vendor/**',
49
- '**/dist/**',
50
- '**/build/**',
51
- // Language-level test file conventions. Go test files
52
- // `*_test.go` live next to source; other languages either use
53
- // separate test directories (Python's `tests/`, Java's
54
- // `src/test/`) or are already covered by the dist/build ignores.
55
- // Pushed to the glob level so the orchestrator stays
56
- // language-agnostic.
57
- '**/*_test.go',
58
- ],
58
+ ignore: ignoreFilter,
59
59
  nodir: true,
60
60
  });
61
61
  // One parser reused across files; the scanner calls `setLanguage` per
@@ -41,35 +41,76 @@ export function resolvePythonImportTarget(parsedImport, workspaceIndex) {
41
41
  const pathLike = parsedImport.targetRaw.replace(/\./g, '/');
42
42
  if (pathLike.includes('/')) {
43
43
  const [leadingSegment] = pathLike.split('/').filter(Boolean);
44
- if (!leadingSegment || !hasRepoCandidate(leadingSegment, ctx.allFilePaths)) {
44
+ if (!leadingSegment || !hasRepoCandidate(leadingSegment, ctx.allFilePaths, ctx.fromFile)) {
45
45
  return null;
46
46
  }
47
47
  }
48
- // Multi-segment absolute resolve: try exact paths first, then suffix
49
- // match in nested repos. Using direct `Set.has` + `endsWith` instead of
50
- // `suffixResolve`'s shared helper because that helper requires a
51
- // pre-built `SuffixIndex` to disambiguate ties without one it falls
52
- // back to an O(files) scan that silently picks the wrong file when
53
- // the last segment collides across directories (e.g. `accounts.models`
54
- // matching `billing/models.py` when both files exist).
55
- return resolveAbsoluteFromFiles(pathLike, ctx.allFilePaths);
48
+ // Multi-segment absolute resolve: try exact paths first, then ancestor
49
+ // walk (mirrors the single-segment ancestor walk in
50
+ // `resolvePythonImportInternal`), then a suffix match in nested repos.
51
+ // Using direct `Set.has` + `endsWith` instead of `suffixResolve`'s shared
52
+ // helper because that helper requires a pre-built `SuffixIndex` to
53
+ // disambiguate ties without one it falls back to an O(files) scan that
54
+ // silently picks the wrong file when the last segment collides across
55
+ // directories (e.g. `accounts.models` matching `billing/models.py` when
56
+ // both files exist).
57
+ return resolveAbsoluteFromFiles(pathLike, ctx.allFilePaths, ctx.fromFile);
56
58
  }
57
59
  /**
58
60
  * Resolve `package/sub/module` style paths (already dot-flattened) to a
59
- * concrete file in `allFilePaths`. Tries the exact path first, then the
60
- * `__init__.py` variant, then a suffix match for nested layouts.
61
+ * concrete file in `allFilePaths`. Tries the exact path first, then walks
62
+ * ancestors of `fromFile` looking for `<ancestor>/<pathLike>.py` (or
63
+ * `__init__.py`), then falls back to a suffix match for nested layouts.
61
64
  * Returns the original (un-normalized) path from the set.
65
+ *
66
+ * Precedence order:
67
+ * 1. Workspace-root direct hit (`<pathLike>.py`, `<pathLike>/__init__.py`).
68
+ * 2. Closest-ancestor match walking up from the importer's directory.
69
+ * 3. Suffix fallback (first match).
70
+ *
71
+ * Root wins over ancestor by construction — if both `services/sync.py` and
72
+ * `backend/services/sync.py` exist, `backend/routers/cron.py`'s
73
+ * `from services.sync import X` resolves to the root file. This mirrors
74
+ * Python's `sys.path` semantics where the project root is searched first.
75
+ *
76
+ * The ancestor walk mirrors the single-segment behavior in
77
+ * `resolvePythonImportInternal`. For `from services.sync import X` in
78
+ * `backend/routers/cron.py`, walk up: `backend/routers/services/sync.py` →
79
+ * `backend/services/sync.py` ✓.
62
80
  */
63
- function resolveAbsoluteFromFiles(pathLike, allFilePaths) {
81
+ function resolveAbsoluteFromFiles(pathLike, allFilePaths, fromFile) {
64
82
  const directFile = `${pathLike}.py`;
65
83
  const directPkg = `${pathLike}/__init__.py`;
84
+ // Direct hit at workspace root.
85
+ if (allFilePaths.has(directFile))
86
+ return directFile;
87
+ if (allFilePaths.has(directPkg))
88
+ return directPkg;
89
+ // Ancestor walk — match the single-segment resolver's behavior at
90
+ // multi-segment granularity. Closest match wins. Stop at `i > 0` because
91
+ // `i === 0` would re-check the workspace-root candidates already covered
92
+ // by the direct check above.
93
+ const importerDir = fromFile.replace(/\\/g, '/').split('/').slice(0, -1).join('/');
94
+ if (importerDir) {
95
+ const dirParts = importerDir.split('/').filter(Boolean);
96
+ for (let i = dirParts.length; i > 0; i--) {
97
+ const ancestor = dirParts.slice(0, i).join('/');
98
+ const prefix = `${ancestor}/`;
99
+ const candidateFile = `${prefix}${directFile}`;
100
+ const candidatePkg = `${prefix}${directPkg}`;
101
+ if (allFilePaths.has(candidateFile))
102
+ return candidateFile;
103
+ if (allFilePaths.has(candidatePkg))
104
+ return candidatePkg;
105
+ }
106
+ }
107
+ // Existing suffix-match fallback (preserved for monorepo/nested-repo
108
+ // layouts that don't share a directory ancestor with the importer).
66
109
  const suffixFile = `/${directFile}`;
67
110
  const suffixPkg = `/${directPkg}`;
68
111
  let suffixMatch = null;
69
112
  for (const raw of allFilePaths) {
70
113
  const f = raw.replace(/\\/g, '/');
71
- if (f === directFile || f === directPkg)
72
- return raw;
73
114
  if (suffixMatch === null && (f.endsWith(suffixFile) || f.endsWith(suffixPkg))) {
74
115
  suffixMatch = raw;
75
116
  }
@@ -77,23 +118,51 @@ function resolveAbsoluteFromFiles(pathLike, allFilePaths) {
77
118
  return suffixMatch;
78
119
  }
79
120
  /**
80
- * Does the repo contain a module/package named `leadingSegment` at the top
81
- * level? Used to guard against false-positive suffix matches on external
82
- * dotted imports (e.g. `django.apps` matching a local `accounts/apps.py`).
121
+ * Does the repo contain a module/package named `leadingSegment` somewhere
122
+ * the importer can plausibly reach?
83
123
  *
84
- * Checks, in order: `<segment>.py` root file, `<segment>/__init__.py`
85
- * regular package, or any `<segment>/**.py` file (namespace package).
124
+ * Used to guard against false-positive suffix matches on external dotted
125
+ * imports (e.g. `django.apps` matching a local `accounts/apps.py`).
126
+ *
127
+ * Checks, in order:
128
+ * 1. `SEGMENT.py` root file or `SEGMENT/__init__.py` regular package.
129
+ * 2. Any `SEGMENT/...py` file at the workspace root (namespace package).
130
+ * 3. Any `<importer-ancestor>/SEGMENT/...py` file (nested namespace
131
+ * package the importer could reach via an ancestor walk, e.g.
132
+ * `backend/services/sync.py` from `backend/routers/cron.py`).
133
+ *
134
+ * The nested case is bounded to the importer's own ancestors so a
135
+ * vendored copy of an external package (e.g. `vendor/django/urls.py`)
136
+ * does not gate-pass external imports like `from django.urls import path`
137
+ * issued from `app/main.py`. Files inside the vendored tree itself
138
+ * (importer under `vendor/django/...`) still resolve correctly because
139
+ * the ancestor walk includes their own parents.
86
140
  */
87
- function hasRepoCandidate(leadingSegment, allFilePaths) {
141
+ function hasRepoCandidate(leadingSegment, allFilePaths, fromFile) {
88
142
  const prefix = `${leadingSegment}/`;
89
143
  const rootFile = `${leadingSegment}.py`;
90
144
  const initFile = `${leadingSegment}/__init__.py`;
145
+ // Build importer-ancestor prefixes: for `backend/routers/cron.py`,
146
+ // produces `["backend/routers/services/", "backend/services/"]` for
147
+ // segment `services` (closest first, root excluded — covered above).
148
+ const importerDir = fromFile.replace(/\\/g, '/').split('/').slice(0, -1).join('/');
149
+ const dirParts = importerDir ? importerDir.split('/').filter(Boolean) : [];
150
+ const ancestorPrefixes = [];
151
+ for (let i = dirParts.length; i > 0; i--) {
152
+ ancestorPrefixes.push(`${dirParts.slice(0, i).join('/')}/${leadingSegment}/`);
153
+ }
91
154
  for (const raw of allFilePaths) {
92
155
  const f = raw.replace(/\\/g, '/');
93
156
  if (f === rootFile || f === initFile)
94
157
  return true;
95
158
  if (f.startsWith(prefix) && f.endsWith('.py'))
96
159
  return true;
160
+ if (f.endsWith('.py')) {
161
+ for (const ap of ancestorPrefixes) {
162
+ if (f.startsWith(ap))
163
+ return true;
164
+ }
165
+ }
97
166
  }
98
167
  return false;
99
168
  }
@@ -13,7 +13,7 @@ import fs from 'fs/promises';
13
13
  import { runPipelineFromRepo } from './ingestion/pipeline.js';
14
14
  import { initLbug, loadGraphToLbug, getLbugStats, executeQuery, executeWithReusedStatement, closeLbug, loadCachedEmbeddings, } from './lbug/lbug-adapter.js';
15
15
  import { createSearchFTSIndexes } from './search/fts-indexes.js';
16
- import { getStoragePaths, saveMeta, loadMeta, addToGitignore, registerRepo, cleanupOldKuzuFiles, } from '../storage/repo-manager.js';
16
+ import { getStoragePaths, saveMeta, loadMeta, ensureGitNexusIgnored, registerRepo, cleanupOldKuzuFiles, } from '../storage/repo-manager.js';
17
17
  import { getCurrentCommit, getRemoteUrl, hasGitDir, getInferredRepoName } from '../storage/git.js';
18
18
  import { generateAIContextFiles } from '../cli/ai-context.js';
19
19
  import { EMBEDDING_TABLE_NAME } from './lbug/schema.js';
@@ -69,6 +69,7 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
69
69
  if (existingMeta && !options.force && existingMeta.lastCommit === currentCommit) {
70
70
  // Non-git folders have currentCommit = '' — always rebuild since we can't detect changes
71
71
  if (currentCommit !== '') {
72
+ await ensureGitNexusIgnored(repoPath);
72
73
  return {
73
74
  repoName: options.registryName ?? getInferredRepoName(repoPath) ?? path.basename(repoPath),
74
75
  repoPath,
@@ -302,10 +303,8 @@ export async function runFullAnalysis(repoPath, options, callbacks) {
302
303
  name: options.registryName,
303
304
  allowDuplicateName: options.allowDuplicateName,
304
305
  });
305
- // Only attempt to update .gitignore when a .git directory is present.
306
- if (hasGitDir(repoPath)) {
307
- await addToGitignore(repoPath);
308
- }
306
+ // Keep generated .gitnexus contents ignored without editing the user's root .gitignore.
307
+ await ensureGitNexusIgnored(repoPath);
309
308
  // ── Generate AI context files (best-effort) ───────────────────────
310
309
  let aggregatedClusterCount = 0;
311
310
  if (pipelineResult.communityResult?.communities) {
@@ -128,9 +128,9 @@ export declare const loadRepo: (repoPath: string) => Promise<IndexedRepo | null>
128
128
  */
129
129
  export declare const findRepo: (startPath: string) => Promise<IndexedRepo | null>;
130
130
  /**
131
- * Add .gitnexus to .gitignore if not already present
131
+ * Keep generated index files ignored without modifying the user's root .gitignore.
132
132
  */
133
- export declare const addToGitignore: (repoPath: string) => Promise<void>;
133
+ export declare const ensureGitNexusIgnored: (repoPath: string) => Promise<void>;
134
134
  /**
135
135
  * Get the path to the global GitNexus directory
136
136
  */
@@ -50,6 +50,7 @@ export const canonicalizePath = (p) => {
50
50
  }
51
51
  };
52
52
  const GITNEXUS_DIR = '.gitnexus';
53
+ const GITNEXUS_EXCLUDE_ENTRY = `${GITNEXUS_DIR}/`;
53
54
  // ─── Local Storage Helpers ─────────────────────────────────────────────
54
55
  /**
55
56
  * Get the .gitnexus storage path for a repository
@@ -185,23 +186,42 @@ export const findRepo = async (startPath) => {
185
186
  return null;
186
187
  };
187
188
  /**
188
- * Add .gitnexus to .gitignore if not already present
189
+ * Keep generated index files ignored without modifying the user's root .gitignore.
189
190
  */
190
- export const addToGitignore = async (repoPath) => {
191
- const gitignorePath = path.join(repoPath, '.gitignore');
191
+ export const ensureGitNexusIgnored = async (repoPath) => {
192
+ const gitignorePath = path.join(getStoragePath(repoPath), '.gitignore');
193
+ await fs.mkdir(path.dirname(gitignorePath), { recursive: true });
194
+ await fs.writeFile(gitignorePath, '*\n', 'utf-8');
195
+ await ensureGitInfoExclude(repoPath);
196
+ };
197
+ const ensureGitInfoExclude = async (repoPath) => {
198
+ const gitDirPath = path.join(path.resolve(repoPath), '.git');
199
+ const excludePath = path.join(gitDirPath, 'info', 'exclude');
192
200
  try {
193
- const content = await fs.readFile(gitignorePath, 'utf-8');
194
- if (content.includes(GITNEXUS_DIR))
201
+ const gitDir = await fs.stat(gitDirPath);
202
+ if (!gitDir.isDirectory())
195
203
  return;
196
- const newContent = content.endsWith('\n')
197
- ? `${content}${GITNEXUS_DIR}\n`
198
- : `${content}\n${GITNEXUS_DIR}\n`;
199
- await fs.writeFile(gitignorePath, newContent, 'utf-8');
200
204
  }
201
205
  catch {
202
- // .gitignore doesn't exist, create it
203
- await fs.writeFile(gitignorePath, `${GITNEXUS_DIR}\n`, 'utf-8');
206
+ return;
204
207
  }
208
+ await fs.mkdir(path.dirname(excludePath), { recursive: true });
209
+ let content = '';
210
+ try {
211
+ content = await fs.readFile(excludePath, 'utf-8');
212
+ }
213
+ catch (err) {
214
+ if (err?.code !== 'ENOENT')
215
+ throw err;
216
+ }
217
+ const excludes = content
218
+ .split(/\r?\n/)
219
+ .map((line) => line.trim())
220
+ .filter((line) => line && !line.startsWith('#'));
221
+ if (excludes.includes(GITNEXUS_DIR) || excludes.includes(GITNEXUS_EXCLUDE_ENTRY))
222
+ return;
223
+ const separator = content.length === 0 || content.endsWith('\n') ? '' : '\n';
224
+ await fs.writeFile(excludePath, `${content}${separator}${GITNEXUS_EXCLUDE_ENTRY}\n`, 'utf-8');
205
225
  };
206
226
  // ─── Global Registry (~/.gitnexus/registry.json) ───────────────────────
207
227
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.4-rc.37",
3
+ "version": "1.6.4-rc.39",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",