gitnexus 1.6.3-rc.39 → 1.6.3-rc.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,5 +33,11 @@ export interface AnalyzeOptions {
33
33
  * `allowDuplicateName` option end-to-end.
34
34
  */
35
35
  allowDuplicateName?: boolean;
36
+ /**
37
+ * Override the walker's large-file skip threshold (#991). Value in KB;
38
+ * clamped downstream to the tree-sitter 32 MB ceiling. Sets
39
+ * `GITNEXUS_MAX_FILE_SIZE` for the rest of the pipeline.
40
+ */
41
+ maxFileSize?: string;
36
42
  }
37
43
  export declare const analyzeCommand: (inputPath?: string, options?: AnalyzeOptions) => Promise<void>;
@@ -15,6 +15,7 @@ import { closeLbug } from '../core/lbug/lbug-adapter.js';
15
15
  import { getStoragePaths, getGlobalRegistryPath, RegistryNameCollisionError, } from '../storage/repo-manager.js';
16
16
  import { getGitRoot, hasGitDir } from '../storage/git.js';
17
17
  import { runFullAnalysis } from '../core/run-analyze.js';
18
+ import { getMaxFileSizeBannerMessage } from '../core/ingestion/utils/max-file-size.js';
18
19
  import fs from 'fs/promises';
19
20
  const HEAP_MB = 8192;
20
21
  const HEAP_FLAG = `--max-old-space-size=${HEAP_MB}`;
@@ -51,6 +52,9 @@ export const analyzeCommand = async (inputPath, options) => {
51
52
  if (options?.verbose) {
52
53
  process.env.GITNEXUS_VERBOSE = '1';
53
54
  }
55
+ if (options?.maxFileSize) {
56
+ process.env.GITNEXUS_MAX_FILE_SIZE = options.maxFileSize;
57
+ }
54
58
  console.log('\n GitNexus Analyzer\n');
55
59
  let repoPath;
56
60
  if (inputPath) {
@@ -85,6 +89,10 @@ export const analyzeCommand = async (inputPath, options) => {
85
89
  if (process.env.GITNEXUS_NO_GITIGNORE) {
86
90
  console.log(' GITNEXUS_NO_GITIGNORE is set — skipping .gitignore (still reading .gitnexusignore)\n');
87
91
  }
92
+ const maxFileSizeBanner = getMaxFileSizeBannerMessage();
93
+ if (maxFileSizeBanner) {
94
+ console.log(`${maxFileSizeBanner}\n`);
95
+ }
88
96
  // ── CLI progress bar setup ─────────────────────────────────────────
89
97
  const bar = new cliProgress.SingleBar({
90
98
  format: ' {bar} {percentage}% | {phase}',
package/dist/cli/index.js CHANGED
@@ -27,7 +27,10 @@ program
27
27
  .option('--allow-duplicate-name', 'Register this repo even if another path already uses the same --name alias. ' +
28
28
  'Leaves `-r <name>` ambiguous for the two paths; use -r <path> to disambiguate.')
29
29
  .option('-v, --verbose', 'Enable verbose ingestion warnings (default: false)')
30
- .addHelpText('after', '\nEnvironment variables:\n GITNEXUS_NO_GITIGNORE=1 Skip .gitignore parsing (still reads .gitnexusignore)')
30
+ .option('--max-file-size <kb>', 'Skip files larger than this (KB). Default: 512. Hard cap: 32768 (tree-sitter limit).')
31
+ .addHelpText('after', '\nEnvironment variables:\n' +
32
+ ' GITNEXUS_NO_GITIGNORE=1 Skip .gitignore parsing (still reads .gitnexusignore)\n' +
33
+ ' GITNEXUS_MAX_FILE_SIZE=N Override large-file skip threshold (KB). Default 512, max 32768.')
31
34
  .action(createLazyAction(() => import('./analyze.js'), 'analyzeCommand'));
32
35
  program
33
36
  .command('index [path...]')
@@ -1,17 +1,17 @@
1
1
  import { isVerboseIngestionEnabled } from './utils/verbose.js';
2
+ import { DEFAULT_MAX_FILE_SIZE_BYTES, getMaxFileSizeBytes } from './utils/max-file-size.js';
2
3
  import fs from 'fs/promises';
3
4
  import path from 'path';
4
5
  import { glob } from 'glob';
5
6
  import { createIgnoreFilter } from '../../config/ignore-service.js';
6
7
  const READ_CONCURRENCY = 32;
7
- /** Skip files larger than 512KB — they're usually generated/vendored and crash tree-sitter */
8
- const MAX_FILE_SIZE = 512 * 1024;
9
8
  /**
10
9
  * Phase 1: Scan repository — stat files to get paths + sizes, no content loaded.
11
10
  * Memory: ~10MB for 100K files vs ~1GB+ with content.
12
11
  */
13
12
  export const walkRepositoryPaths = async (repoPath, onProgress) => {
14
13
  const ignoreFilter = await createIgnoreFilter(repoPath);
14
+ const maxFileSizeBytes = getMaxFileSizeBytes();
15
15
  const filtered = await glob('**/*', {
16
16
  cwd: repoPath,
17
17
  nodir: true,
@@ -27,7 +27,7 @@ export const walkRepositoryPaths = async (repoPath, onProgress) => {
27
27
  const results = await Promise.allSettled(batch.map(async (relativePath) => {
28
28
  const fullPath = path.join(repoPath, relativePath);
29
29
  const stat = await fs.stat(fullPath);
30
- if (stat.size > MAX_FILE_SIZE) {
30
+ if (stat.size > maxFileSizeBytes) {
31
31
  skippedLarge++;
32
32
  skippedLargePaths.push(relativePath.replace(/\\/g, '/'));
33
33
  return null;
@@ -46,7 +46,9 @@ export const walkRepositoryPaths = async (repoPath, onProgress) => {
46
46
  }
47
47
  }
48
48
  if (skippedLarge > 0) {
49
- console.warn(` Skipped ${skippedLarge} large files (>${MAX_FILE_SIZE / 1024}KB, likely generated/vendored)`);
49
+ const isDefault = maxFileSizeBytes === DEFAULT_MAX_FILE_SIZE_BYTES;
50
+ const suffix = isDefault ? ', likely generated/vendored' : '';
51
+ console.warn(` Skipped ${skippedLarge} large files (>${maxFileSizeBytes / 1024}KB${suffix})`);
50
52
  if (isVerboseIngestionEnabled()) {
51
53
  for (const p of skippedLargePaths) {
52
54
  console.warn(` - ${p}`);
@@ -0,0 +1,20 @@
1
+ /** Default threshold (512 KB). Files larger than this are skipped by the walker. */
2
+ export declare const DEFAULT_MAX_FILE_SIZE_BYTES: number;
3
+ /** Hard upper bound — tree-sitter refuses buffers above this regardless. */
4
+ export declare const MAX_FILE_SIZE_UPPER_BOUND_BYTES: number;
5
+ /**
6
+ * Resolve the effective file-size skip threshold (bytes) for the walker.
7
+ * Reads `GITNEXUS_MAX_FILE_SIZE` (KB). Invalid values fall back to the default
8
+ * and emit a one-time warning. Values above the tree-sitter ceiling are clamped.
9
+ */
10
+ export declare const getMaxFileSizeBytes: () => number;
11
+ /**
12
+ * Build the CLI banner message announcing an active file-size override.
13
+ * Returns `null` when the effective threshold equals the default — the caller
14
+ * should print nothing in that case. The returned message reflects the
15
+ * *effective* post-clamp threshold, not the raw env value, so operators reading
16
+ * startup output see the actual configuration the walker will use.
17
+ */
18
+ export declare const getMaxFileSizeBannerMessage: () => string | null;
19
+ /** Test-only: reset the warn-once cache so repeated test runs can re-observe warnings. */
20
+ export declare const _resetMaxFileSizeWarnings: () => void;
@@ -0,0 +1,52 @@
1
+ import { TREE_SITTER_MAX_BUFFER } from '../constants.js';
2
+ /** Default threshold (512 KB). Files larger than this are skipped by the walker. */
3
+ export const DEFAULT_MAX_FILE_SIZE_BYTES = 512 * 1024;
4
+ /** Hard upper bound — tree-sitter refuses buffers above this regardless. */
5
+ export const MAX_FILE_SIZE_UPPER_BOUND_BYTES = TREE_SITTER_MAX_BUFFER;
6
+ const warned = new Set();
7
+ const warnOnce = (key, message) => {
8
+ if (warned.has(key))
9
+ return;
10
+ warned.add(key);
11
+ console.warn(message);
12
+ };
13
+ /**
14
+ * Resolve the effective file-size skip threshold (bytes) for the walker.
15
+ * Reads `GITNEXUS_MAX_FILE_SIZE` (KB). Invalid values fall back to the default
16
+ * and emit a one-time warning. Values above the tree-sitter ceiling are clamped.
17
+ */
18
+ export const getMaxFileSizeBytes = () => {
19
+ const raw = process.env.GITNEXUS_MAX_FILE_SIZE;
20
+ if (!raw)
21
+ return DEFAULT_MAX_FILE_SIZE_BYTES;
22
+ const parsed = Number(raw);
23
+ if (!Number.isFinite(parsed) || parsed <= 0 || !Number.isInteger(parsed)) {
24
+ warnOnce(`invalid:${raw}`, ` GITNEXUS_MAX_FILE_SIZE must be a positive integer (KB), got "${raw}" — using default ${DEFAULT_MAX_FILE_SIZE_BYTES / 1024}KB`);
25
+ return DEFAULT_MAX_FILE_SIZE_BYTES;
26
+ }
27
+ const bytes = parsed * 1024;
28
+ if (bytes > MAX_FILE_SIZE_UPPER_BOUND_BYTES) {
29
+ warnOnce(`clamp:${raw}`, ` GITNEXUS_MAX_FILE_SIZE=${parsed}KB exceeds tree-sitter ceiling (${MAX_FILE_SIZE_UPPER_BOUND_BYTES / 1024}KB) — clamping`);
30
+ return MAX_FILE_SIZE_UPPER_BOUND_BYTES;
31
+ }
32
+ return bytes;
33
+ };
34
+ /**
35
+ * Build the CLI banner message announcing an active file-size override.
36
+ * Returns `null` when the effective threshold equals the default — the caller
37
+ * should print nothing in that case. The returned message reflects the
38
+ * *effective* post-clamp threshold, not the raw env value, so operators reading
39
+ * startup output see the actual configuration the walker will use.
40
+ */
41
+ export const getMaxFileSizeBannerMessage = () => {
42
+ const effectiveBytes = getMaxFileSizeBytes();
43
+ if (effectiveBytes === DEFAULT_MAX_FILE_SIZE_BYTES)
44
+ return null;
45
+ const effectiveKb = effectiveBytes / 1024;
46
+ const defaultKb = DEFAULT_MAX_FILE_SIZE_BYTES / 1024;
47
+ return ` GITNEXUS_MAX_FILE_SIZE: effective threshold ${effectiveKb}KB (default ${defaultKb}KB)`;
48
+ };
49
+ /** Test-only: reset the warn-once cache so repeated test runs can re-observe warnings. */
50
+ export const _resetMaxFileSizeWarnings = () => {
51
+ warned.clear();
52
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.3-rc.39",
3
+ "version": "1.6.3-rc.40",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",