gitnexus 1.6.6-rc.52 → 1.6.6-rc.53

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -94,6 +94,7 @@ export function finalizeScopeModel(parsedFiles, options = {}) {
94
94
  // AFTER `finalizeScopeModel` returns, before `resolveReferenceSites`
95
95
  // consumes the bundle. Most languages leave it empty.
96
96
  bindingAugmentations: new Map(),
97
+ workspaceFqnBindings: new Map(),
97
98
  referenceSites: Object.freeze([...allReferenceSites]),
98
99
  sccs: finalizeOut.sccs,
99
100
  stats: finalizeOut.stats,
@@ -26,6 +26,30 @@
26
26
  */
27
27
  import type { ParsedFile } from '../../../../_shared/index.js';
28
28
  import type { ScopeResolutionIndexes } from '../../model/scope-resolution-indexes.js';
29
+ interface PhpFileStructure {
30
+ /** The declared namespace (backslash-separated), or '' for global namespace. */
31
+ readonly namespace: string;
32
+ }
33
+ /**
34
+ * Extract a PHP namespace declaration from raw source without tree-sitter.
35
+ *
36
+ * Single-pass line scanner that skips heredoc/nowdoc bodies, block
37
+ * comments, and single-line comments before matching. This avoids the
38
+ * false positives that a multiline regex produces when `namespace` appears
39
+ * inside a heredoc, nowdoc, string, or comment.
40
+ */
41
+ export declare function extractNamespaceViaScanner(content: string): string;
42
+ /**
43
+ * Extract the declared namespace from a PHP file's source.
44
+ * Uses the cached AST tree when available to avoid re-parsing.
45
+ *
46
+ * When no cached tree is available (worker-parsed files can't transfer
47
+ * native Tree objects across MessageChannels), uses a line scanner
48
+ * instead of re-parsing every file with tree-sitter. For 16K+ PHP files
49
+ * this eliminates ~16K tree-sitter re-parses during the namespace-siblings
50
+ * pass. See: https://github.com/abhigyanpatwari/GitNexus/issues/1741
51
+ */
52
+ export declare function extractPhpFileStructure(content: string, cachedTree: unknown): PhpFileStructure;
29
53
  export interface PhpSiblingInputs {
30
54
  readonly fileContents: ReadonlyMap<string, string>;
31
55
  readonly treeCache?: {
@@ -49,3 +73,4 @@ export declare function getPhpNamespaceForFile(filePath: string): string;
49
73
  * explicit `use` imports (`origin: 'import'`) and local declarations.
50
74
  */
51
75
  export declare function populatePhpNamespaceSiblings(parsedFiles: readonly ParsedFile[], indexes: ScopeResolutionIndexes, inputs: PhpSiblingInputs): void;
76
+ export {};
@@ -24,22 +24,85 @@
24
24
  * to extract namespace declarations — same AST that `extractParsedFile`
25
25
  * already parsed, reused via `treeCache` to avoid double-parsing.
26
26
  */
27
- import { getPhpParser } from './query.js';
28
- import { getTreeSitterBufferSize } from '../../constants.js';
29
- import { parseSourceSafe } from '../../../tree-sitter/safe-parse.js';
27
+ const NAMESPACE_RE = /^\s*namespace\s+([\w\\]+)\s*[;{]/i;
28
+ const HEREDOC_START_RE = /<<<\s*['"]?(\w+)['"]?\s*$/;
29
+ /**
30
+ * Extract a PHP namespace declaration from raw source without tree-sitter.
31
+ *
32
+ * Single-pass line scanner that skips heredoc/nowdoc bodies, block
33
+ * comments, and single-line comments before matching. This avoids the
34
+ * false positives that a multiline regex produces when `namespace` appears
35
+ * inside a heredoc, nowdoc, string, or comment.
36
+ */
37
+ export function extractNamespaceViaScanner(content) {
38
+ const lines = content.split('\n');
39
+ let inBlockComment = false;
40
+ let heredocDelimiter = null;
41
+ for (const raw of lines) {
42
+ if (heredocDelimiter !== null) {
43
+ const trimmed = raw.trim();
44
+ if (trimmed === heredocDelimiter + ';' || trimmed === heredocDelimiter) {
45
+ heredocDelimiter = null;
46
+ }
47
+ continue;
48
+ }
49
+ if (inBlockComment) {
50
+ if (raw.includes('*/')) {
51
+ inBlockComment = false;
52
+ }
53
+ continue;
54
+ }
55
+ let line = raw;
56
+ const blockStart = line.indexOf('/*');
57
+ if (blockStart >= 0) {
58
+ const blockEnd = line.indexOf('*/', blockStart + 2);
59
+ if (blockEnd >= 0) {
60
+ line = line.slice(0, blockStart) + line.slice(blockEnd + 2);
61
+ }
62
+ else {
63
+ line = line.slice(0, blockStart);
64
+ inBlockComment = true;
65
+ }
66
+ }
67
+ const slashIdx = line.indexOf('//');
68
+ const hashIdx = line.indexOf('#');
69
+ if (slashIdx >= 0 && (hashIdx < 0 || slashIdx < hashIdx)) {
70
+ line = line.slice(0, slashIdx);
71
+ }
72
+ else if (hashIdx >= 0) {
73
+ line = line.slice(0, hashIdx);
74
+ }
75
+ const heredocMatch = raw.match(HEREDOC_START_RE);
76
+ if (heredocMatch) {
77
+ heredocDelimiter = heredocMatch[1];
78
+ continue;
79
+ }
80
+ const stripped = line.replace(/<\?php/gi, '').replace(/declare\s*\([^)]*\)\s*;?/gi, '');
81
+ const nsMatch = stripped.match(NAMESPACE_RE);
82
+ if (nsMatch) {
83
+ return nsMatch[1];
84
+ }
85
+ }
86
+ return '';
87
+ }
30
88
  /**
31
89
  * Extract the declared namespace from a PHP file's source.
32
90
  * Uses the cached AST tree when available to avoid re-parsing.
91
+ *
92
+ * When no cached tree is available (worker-parsed files can't transfer
93
+ * native Tree objects across MessageChannels), uses a line scanner
94
+ * instead of re-parsing every file with tree-sitter. For 16K+ PHP files
95
+ * this eliminates ~16K tree-sitter re-parses during the namespace-siblings
96
+ * pass. See: https://github.com/abhigyanpatwari/GitNexus/issues/1741
33
97
  */
34
- function extractPhpFileStructure(content, cachedTree) {
35
- const tree = cachedTree ??
36
- parseSourceSafe(getPhpParser(), content, undefined, {
37
- bufferSize: getTreeSitterBufferSize(content),
38
- });
98
+ export function extractPhpFileStructure(content, cachedTree) {
99
+ if (!cachedTree) {
100
+ return { namespace: extractNamespaceViaScanner(content) };
101
+ }
39
102
  // Walk top-level nodes looking for namespace_definition.
40
103
  // PHP files have at most one namespace declaration (PSR-4 convention).
41
104
  // `namespace_definition` has a `name:` field of type `namespace_name`.
42
- const root = tree.rootNode;
105
+ const root = cachedTree.rootNode;
43
106
  for (let i = 0; i < root.namedChildCount; i++) {
44
107
  const child = root.namedChild(i);
45
108
  if (child === null)
@@ -194,38 +257,31 @@ export function populatePhpNamespaceSiblings(parsedFiles, indexes, inputs) {
194
257
  }
195
258
  }
196
259
  }
197
- // Step 3b: Inject fully-qualified-name bindings into every PHP file's
198
- // Module scope. PHP `\App\Models\User` (leading-backslash FQN) and
199
- // `App\Models\User` (already-qualified relative) on a parameter or
200
- // typed receiver must resolve to the exact namespace-qualified class
201
- // regardless of which simple-name `User` the caller's `use` imports
202
- // shadowed. The shared `findClassBindingInScope` scope-chain walk
203
- // consumes these augmentations via `lookupBindingsAt`, so adding the
204
- // qualified key on every file's module scope routes FQN-receivers to
205
- // the right def. Codex PR #1497 review, finding 1.
260
+ // Step 3b: Register FQN bindings in a workspace-level map instead of
261
+ // per-scope augmentations. PHP `\App\Models\User` and `App\Models\User`
262
+ // must resolve regardless of which file the lookup originates from.
263
+ // `lookupBindingsAt` consults `workspaceFqnBindings` as a third source.
206
264
  //
207
- // Cost: O(PHP files × class-like defs in the workspace) augmentation
208
- // entries. Bounded and acceptable in practice typical PHP projects
209
- // have hundreds of files and classes, not tens of thousands.
210
- for (const parsed of parsedFiles) {
211
- const moduleScope = parsed.scopes.find((s) => s.kind === 'Module');
212
- if (moduleScope === undefined)
265
+ // Cost: O(class-like defs) entries NOT O(files × classDefs). For 16K
266
+ // PHP files with 5K classes, this is 5K entries instead of 80M.
267
+ const fqnMap = indexes.workspaceFqnBindings;
268
+ for (const [ns, bucket] of buckets) {
269
+ if (ns === '')
213
270
  continue;
214
- const moduleScopeId = moduleScope.id;
215
- for (const [ns, bucket] of buckets) {
216
- if (ns === '')
217
- continue; // global-namespace classes have no qualified form to register
218
- for (const def of bucket.classDefs) {
219
- const q = def.qualifiedName ?? '';
220
- const simpleName = q.includes('\\') ? q.slice(q.lastIndexOf('\\') + 1) : q;
221
- if (simpleName === '')
222
- continue;
223
- const fqn = `${ns}\\${simpleName}`;
224
- const arr = getAugmentationBucket(augmentations, moduleScopeId, fqn);
225
- if (arr.some((b) => b.def.nodeId === def.nodeId))
226
- continue;
227
- arr.push({ def, origin: 'namespace' });
271
+ for (const def of bucket.classDefs) {
272
+ const q = def.qualifiedName ?? '';
273
+ const simpleName = q.includes('\\') ? q.slice(q.lastIndexOf('\\') + 1) : q;
274
+ if (simpleName === '')
275
+ continue;
276
+ const fqn = `${ns}\\${simpleName}`;
277
+ let arr = fqnMap.get(fqn);
278
+ if (arr === undefined) {
279
+ arr = [];
280
+ fqnMap.set(fqn, arr);
228
281
  }
282
+ if (arr.some((b) => b.def.nodeId === def.nodeId))
283
+ continue;
284
+ arr.push({ def, origin: 'namespace' });
229
285
  }
230
286
  }
231
287
  // Step 4: Mirror return-type bindings from same-namespace sibling files.
@@ -63,6 +63,12 @@ export interface ScopeResolutionIndexes {
63
63
  * are returned first and win duplicate `def.nodeId` metadata, with
64
64
  * unique augmentations appended after. See I8. */
65
65
  readonly bindingAugmentations: ReadonlyMap<ScopeId, ReadonlyMap<string, readonly BindingRef[]>>;
66
+ /** Workspace-level FQN binding lookup. Populated by PHP namespace-
67
+ * siblings Step 3b as a shared map instead of per-scope duplication.
68
+ * Consulted by `lookupBindingsAt` as a third source after finalized
69
+ * and per-scope augmented bindings. Keys are backslash-separated FQNs
70
+ * (e.g. `App\Models\User`). */
71
+ readonly workspaceFqnBindings: ReadonlyMap<string, readonly BindingRef[]>;
66
72
  /** Pre-resolution usage facts; consumed by the resolution phase. */
67
73
  readonly referenceSites: readonly ReferenceSite[];
68
74
  /** SCC condensation of the file-level import graph — callers that want
@@ -124,6 +124,15 @@ export const scopeResolutionPhase = {
124
124
  }
125
125
  },
126
126
  }, provider);
127
+ // Release file contents and pre-extracted entries after each language
128
+ // to reduce memory pressure. For large codebases (16K+ PHP files),
129
+ // holding all source code simultaneously with scope trees causes OOM.
130
+ // See: https://github.com/abhigyanpatwari/GitNexus/issues/1741
131
+ files.length = 0;
132
+ contents.clear();
133
+ for (const fp of filePaths) {
134
+ preExtractedByPath.delete(fp);
135
+ }
127
136
  anyRan = true;
128
137
  totalFiles += stats.filesProcessed;
129
138
  totalImports += stats.importsEmitted;
@@ -44,24 +44,40 @@ const EMPTY_BINDINGS = Object.freeze([]);
44
44
  export function lookupBindingsAt(scopeId, name, scopes) {
45
45
  const finalized = scopes.bindings.get(scopeId)?.get(name);
46
46
  const augmented = scopes.bindingAugmentations.get(scopeId)?.get(name);
47
+ const workspace = scopes.workspaceFqnBindings?.get(name);
47
48
  const fLen = finalized?.length ?? 0;
48
49
  const aLen = augmented?.length ?? 0;
49
- if (fLen === 0 && aLen === 0)
50
+ const wLen = workspace?.length ?? 0;
51
+ if (fLen === 0 && aLen === 0 && wLen === 0)
50
52
  return EMPTY_BINDINGS;
51
- if (aLen === 0)
53
+ if (aLen === 0 && wLen === 0)
52
54
  return finalized;
53
- if (fLen === 0)
55
+ if (fLen === 0 && wLen === 0)
54
56
  return augmented;
57
+ if (fLen === 0 && aLen === 0)
58
+ return workspace;
55
59
  const seen = new Set();
56
60
  const out = [];
57
- for (const r of finalized) {
58
- seen.add(r.def.nodeId);
59
- out.push(r);
61
+ if (fLen > 0) {
62
+ for (const r of finalized) {
63
+ seen.add(r.def.nodeId);
64
+ out.push(r);
65
+ }
60
66
  }
61
- for (const r of augmented) {
62
- if (seen.has(r.def.nodeId))
63
- continue;
64
- out.push(r);
67
+ if (aLen > 0) {
68
+ for (const r of augmented) {
69
+ if (seen.has(r.def.nodeId))
70
+ continue;
71
+ seen.add(r.def.nodeId);
72
+ out.push(r);
73
+ }
74
+ }
75
+ if (wLen > 0) {
76
+ for (const r of workspace) {
77
+ if (seen.has(r.def.nodeId))
78
+ continue;
79
+ out.push(r);
80
+ }
65
81
  }
66
82
  return out;
67
83
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.6-rc.52",
3
+ "version": "1.6.6-rc.53",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",