gitnexus 1.6.6-rc.86 → 1.6.6-rc.88

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,16 @@ import type { ContractExtractor, CypherExecutor } from '../contract-extractor.js
2
2
  import type { ExtractedContract, RepoHandle } from '../types.js';
3
3
  export interface ProtoServiceInfo {
4
4
  package: string;
5
+ /**
6
+ * Optional. Value of `option java_package = "..."` declared in the
7
+ * same `.proto` file, when present and different from `package`.
8
+ * Empty string when the option is absent or equals `package`. Used by
9
+ * `detectionToContract()` to translate a Java import path back to the
10
+ * proto package whenever the proto explicitly publishes its generated
11
+ * Java code under a different namespace (a common pattern in
12
+ * Google-style protobuf projects).
13
+ */
14
+ javaPackage: string;
5
15
  serviceName: string;
6
16
  methods: string[];
7
17
  protoPath: string;
@@ -19,6 +29,57 @@ export declare class GrpcExtractor implements ContractExtractor {
19
29
  * either a service-level (`grpc::pkg.Svc/*`) or method-level
20
30
  * (`grpc::pkg.Svc/Method`) contract id, and selecting confidence
21
31
  * based on whether the proto map had an entry.
32
+ *
33
+ * Resolution order for the package prefix:
34
+ *
35
+ * 1. **Java-package translation** (when detection
36
+ * supplied a `protoPackage` from a Java import).
37
+ * A `.proto` in the SAME repo may set `option
38
+ * java_package = "..."` to publish its generated
39
+ * Java classes under a namespace different from
40
+ * the proto `package`. Real-world projects (e.g.
41
+ * Google Cloud Java APIs) routinely do this.
42
+ * When the import-derived package matches that
43
+ * `java_package` value, translate back to the
44
+ * proto `package` so the resulting contract id
45
+ * is wire-correct rather than Java-namespace.
46
+ *
47
+ * 2. **Per-repo proto map check** (when the same
48
+ * service name has `.proto` candidates in this
49
+ * repo). The proto file is the authoritative
50
+ * source. If the proto's `package` agrees with
51
+ * the import's `protoPackage`, both paths produce
52
+ * the same FQN — emit it. If they DISAGREE (e.g.
53
+ * a typo'd Java import, or a mismatched
54
+ * java_package the reverse index didn't catch),
55
+ * trust the proto map and warn — the import
56
+ * MUST NOT silently overwrite an authoritative
57
+ * proto package.
58
+ *
59
+ * 3. **Import-derived FQN fallback** (when neither
60
+ * a `java_package` translation nor a proto map
61
+ * candidate exists in this repo). Typical for the
62
+ * "client-jar" pattern, where a consumer repo
63
+ * depends on a published stub jar and never
64
+ * carries the originating `.proto`. Use the
65
+ * import path verbatim as the proto package. Note
66
+ * the known limitation: when the published proto
67
+ * sets `option java_package` differing from
68
+ * `package`, the resulting FQN reflects the Java
69
+ * namespace rather than the proto namespace and
70
+ * will not match a provider repo's contract id —
71
+ * we cannot translate without sight of the proto.
72
+ *
73
+ * 4. **Per-repo proto map (no import)** — the legacy
74
+ * path. Used when the plugin didn't supply
75
+ * `protoPackage` (no import statement, wildcard
76
+ * import only, or non-Java languages that haven't
77
+ * been retrofitted yet).
78
+ *
79
+ * 5. **Short-name fallback** — when none of the
80
+ * above resolves a package, emit a service-only
81
+ * short-name contract id (`grpc::Svc/*`),
82
+ * preserving the pre-fix behaviour.
22
83
  */
23
84
  private detectionToContract;
24
85
  private dedupe;
@@ -170,6 +170,18 @@ function extractProtoImports(content) {
170
170
  }
171
171
  return imports;
172
172
  }
173
+ /**
174
+ * Extract `option java_package = "..."` from a `.proto` file, if any.
175
+ * The Java code generator places generated `XxxGrpc.java` classes under
176
+ * this package (instead of the proto `package` declaration) when the
177
+ * option is set. Real-world projects (Google Cloud Java APIs, internal
178
+ * shaded SDKs) routinely use this to publish their Java artifacts under
179
+ * a corporate namespace different from the wire-protocol package.
180
+ */
181
+ function extractJavaPackageOption(content) {
182
+ const m = content.match(/^\s*option\s+java_package\s*=\s*"([\w.]+)"\s*;/m);
183
+ return m?.[1] ?? '';
184
+ }
173
185
  function longestSharedSegmentRun(aPath, bPath) {
174
186
  const a = aPath.split('/').filter(Boolean);
175
187
  const b = bPath.split('/').filter(Boolean);
@@ -188,6 +200,7 @@ function longestSharedSegmentRun(aPath, bPath) {
188
200
  }
189
201
  async function buildProtoContext(repoPath) {
190
202
  const servicesByName = new Map();
203
+ const servicesByJavaPackage = new Map();
191
204
  // `.gitnexusignore` / `.gitignore` honoured via the shared IgnoreService —
192
205
  // see `filesystem-walker.ts` for the canonical pattern. Replaces a
193
206
  // hardcoded `[node_modules, .git, vendor]` array; those names plus the
@@ -246,6 +259,13 @@ async function buildProtoContext(repoPath) {
246
259
  if (!content)
247
260
  continue;
248
261
  const pkg = resolvePackage(normalizedRel);
262
+ const javaPkgOption = extractJavaPackageOption(content);
263
+ // Only retain `javaPackage` when it actively diverges from `pkg`.
264
+ // When equal (or absent), the import-derived path produces the
265
+ // same FQN as the proto-derived path, so no translation is needed
266
+ // and we keep the field empty to avoid populating the reverse
267
+ // index with redundant entries.
268
+ const javaPackage = javaPkgOption && javaPkgOption !== pkg ? javaPkgOption : '';
249
269
  const serviceBlocks = extractServiceBlocks(content);
250
270
  for (const block of serviceBlocks) {
251
271
  const rpcRe = /rpc\s+(\w+)\s*\(/g;
@@ -256,6 +276,7 @@ async function buildProtoContext(repoPath) {
256
276
  }
257
277
  const info = {
258
278
  package: pkg,
279
+ javaPackage,
259
280
  serviceName: block.name,
260
281
  methods,
261
282
  protoPath: normalizedRel,
@@ -263,9 +284,14 @@ async function buildProtoContext(repoPath) {
263
284
  const existing = servicesByName.get(block.name) ?? [];
264
285
  existing.push(info);
265
286
  servicesByName.set(block.name, existing);
287
+ if (javaPackage) {
288
+ const byJava = servicesByJavaPackage.get(javaPackage) ?? [];
289
+ byJava.push(info);
290
+ servicesByJavaPackage.set(javaPackage, byJava);
291
+ }
266
292
  }
267
293
  }
268
- return { packagesByProto, servicesByName };
294
+ return { packagesByProto, servicesByName, servicesByJavaPackage };
269
295
  }
270
296
  export async function buildProtoMap(repoPath) {
271
297
  const { servicesByName } = await buildProtoContext(repoPath);
@@ -311,6 +337,7 @@ export class GrpcExtractor {
311
337
  const out = [];
312
338
  const protoContext = await buildProtoContext(repoPath);
313
339
  const protoMap = protoContext.servicesByName;
340
+ const javaPackageMap = protoContext.servicesByJavaPackage;
314
341
  // ─── Proto files — definitive provider source ─────────────────
315
342
  // When tree-sitter-proto is available, .proto files are handled by
316
343
  // the plugin loop below (they're in GRPC_SCAN_GLOB). Otherwise
@@ -360,7 +387,7 @@ export class GrpcExtractor {
360
387
  continue;
361
388
  }
362
389
  for (const d of detections) {
363
- const contract = this.detectionToContract(d, rel, protoMap);
390
+ const contract = this.detectionToContract(d, rel, protoMap, javaPackageMap);
364
391
  if (contract)
365
392
  out.push(contract);
366
393
  }
@@ -373,8 +400,157 @@ export class GrpcExtractor {
373
400
  * either a service-level (`grpc::pkg.Svc/*`) or method-level
374
401
  * (`grpc::pkg.Svc/Method`) contract id, and selecting confidence
375
402
  * based on whether the proto map had an entry.
403
+ *
404
+ * Resolution order for the package prefix:
405
+ *
406
+ * 1. **Java-package translation** (when detection
407
+ * supplied a `protoPackage` from a Java import).
408
+ * A `.proto` in the SAME repo may set `option
409
+ * java_package = "..."` to publish its generated
410
+ * Java classes under a namespace different from
411
+ * the proto `package`. Real-world projects (e.g.
412
+ * Google Cloud Java APIs) routinely do this.
413
+ * When the import-derived package matches that
414
+ * `java_package` value, translate back to the
415
+ * proto `package` so the resulting contract id
416
+ * is wire-correct rather than Java-namespace.
417
+ *
418
+ * 2. **Per-repo proto map check** (when the same
419
+ * service name has `.proto` candidates in this
420
+ * repo). The proto file is the authoritative
421
+ * source. If the proto's `package` agrees with
422
+ * the import's `protoPackage`, both paths produce
423
+ * the same FQN — emit it. If they DISAGREE (e.g.
424
+ * a typo'd Java import, or a mismatched
425
+ * java_package the reverse index didn't catch),
426
+ * trust the proto map and warn — the import
427
+ * MUST NOT silently overwrite an authoritative
428
+ * proto package.
429
+ *
430
+ * 3. **Import-derived FQN fallback** (when neither
431
+ * a `java_package` translation nor a proto map
432
+ * candidate exists in this repo). Typical for the
433
+ * "client-jar" pattern, where a consumer repo
434
+ * depends on a published stub jar and never
435
+ * carries the originating `.proto`. Use the
436
+ * import path verbatim as the proto package. Note
437
+ * the known limitation: when the published proto
438
+ * sets `option java_package` differing from
439
+ * `package`, the resulting FQN reflects the Java
440
+ * namespace rather than the proto namespace and
441
+ * will not match a provider repo's contract id —
442
+ * we cannot translate without sight of the proto.
443
+ *
444
+ * 4. **Per-repo proto map (no import)** — the legacy
445
+ * path. Used when the plugin didn't supply
446
+ * `protoPackage` (no import statement, wildcard
447
+ * import only, or non-Java languages that haven't
448
+ * been retrofitted yet).
449
+ *
450
+ * 5. **Short-name fallback** — when none of the
451
+ * above resolves a package, emit a service-only
452
+ * short-name contract id (`grpc::Svc/*`),
453
+ * preserving the pre-fix behaviour.
376
454
  */
377
- detectionToContract(d, filePath, protoMap) {
455
+ detectionToContract(d, filePath, protoMap, javaPackageMap) {
456
+ if (d.protoPackage) {
457
+ // Step 1: java_package translation. The import-derived package
458
+ // may be the `option java_package` value of a `.proto` in the
459
+ // SAME repo. Look it up and, if found for the same service name,
460
+ // use the underlying proto `package` to build a wire-correct
461
+ // contract id.
462
+ const javaCandidates = javaPackageMap.get(d.protoPackage) ?? [];
463
+ const javaTranslated = javaCandidates.find((p) => p.serviceName === d.serviceName);
464
+ if (javaTranslated) {
465
+ const cid = d.methodName
466
+ ? contractId(javaTranslated.package, d.serviceName, d.methodName)
467
+ : serviceContractId(javaTranslated.package, d.serviceName);
468
+ const meta = {
469
+ service: d.serviceName,
470
+ source: d.source,
471
+ package: javaTranslated.package,
472
+ protoPackageSource: 'import-translated',
473
+ };
474
+ if (d.methodName)
475
+ meta.method = d.methodName;
476
+ return makeContract(cid, d.role, filePath, d.symbolName, d.confidenceWithProto, meta);
477
+ }
478
+ // Step 2: proto map cross-check. When this repo also carries a
479
+ // `.proto` defining the same short service name, the proto is
480
+ // authoritative and decides the package. The import is only used
481
+ // to disambiguate among same-short-name candidates when the
482
+ // resolution heuristic can't pick a unique winner on path alone.
483
+ const candidates = protoMap.get(d.serviceName) ?? [];
484
+ if (candidates.length > 0) {
485
+ const proto = resolveProtoConflict(d.serviceName, filePath, candidates);
486
+ if (proto === null) {
487
+ // Ambiguous proto resolution; resolveProtoConflict already warned.
488
+ return null;
489
+ }
490
+ const protoPkg = proto.package;
491
+ if (protoPkg === d.protoPackage) {
492
+ // Both paths agree.
493
+ const cid = d.methodName
494
+ ? contractId(protoPkg, d.serviceName, d.methodName)
495
+ : serviceContractId(protoPkg, d.serviceName);
496
+ const meta = {
497
+ service: d.serviceName,
498
+ source: d.source,
499
+ package: protoPkg,
500
+ protoPackageSource: 'import',
501
+ };
502
+ if (d.methodName)
503
+ meta.method = d.methodName;
504
+ return makeContract(cid, d.role, filePath, d.symbolName, d.confidenceWithProto, meta);
505
+ }
506
+ // Disagreement. Trust the proto file and emit a warning so
507
+ // operators can investigate the import. This protects against
508
+ // the symmetric Finding 2 case: a stale or typo'd Java import
509
+ // silently corrupting the contract id of a service whose
510
+ // `.proto` lives in the same repo.
511
+ logger.warn(`[grpc-extractor] Java import package "${d.protoPackage}" for service ` +
512
+ `"${d.serviceName}" disagrees with local proto package "${protoPkg}" at ` +
513
+ `${filePath}; using proto package as authoritative source`);
514
+ const cid = d.methodName
515
+ ? contractId(protoPkg, d.serviceName, d.methodName)
516
+ : serviceContractId(protoPkg, d.serviceName);
517
+ const meta = {
518
+ service: d.serviceName,
519
+ source: d.source,
520
+ package: protoPkg,
521
+ protoPackageSource: 'proto-override',
522
+ importPackage: d.protoPackage,
523
+ };
524
+ if (d.methodName)
525
+ meta.method = d.methodName;
526
+ return makeContract(cid, d.role, filePath, d.symbolName, d.confidenceWithProto, meta);
527
+ }
528
+ // Step 3: import-derived fallback. No `.proto` in this repo
529
+ // names the service, and no `java_package` reverse-lookup
530
+ // matched. Emit the FQN with the import-derived package. This
531
+ // is the typical client-jar consumer path.
532
+ //
533
+ // Known limitation: when the published proto sets
534
+ // `option java_package` to a value that differs from
535
+ // `package`, this path produces a contract id that reflects
536
+ // the Java namespace, not the proto namespace, and will not
537
+ // match a provider repo. Resolving that case requires
538
+ // group-level proto knowledge, which is intentionally out of
539
+ // scope for this fix.
540
+ const cid = d.methodName
541
+ ? contractId(d.protoPackage, d.serviceName, d.methodName)
542
+ : serviceContractId(d.protoPackage, d.serviceName);
543
+ const meta = {
544
+ service: d.serviceName,
545
+ source: d.source,
546
+ package: d.protoPackage,
547
+ protoPackageSource: 'import',
548
+ };
549
+ if (d.methodName)
550
+ meta.method = d.methodName;
551
+ return makeContract(cid, d.role, filePath, d.symbolName, d.confidenceWithProto, meta);
552
+ }
553
+ // Steps 4 + 5: legacy per-repo proto map resolution (no import).
378
554
  const candidates = protoMap.get(d.serviceName) ?? [];
379
555
  const proto = resolveProtoConflict(d.serviceName, filePath, candidates);
380
556
  // If there were proto candidates but resolution was ambiguous, skip
@@ -66,6 +66,32 @@ const STUB_PATTERNS = compilePatterns({
66
66
  },
67
67
  ],
68
68
  });
69
+ // `import <pkg>.<XxxGrpc>;` — captures the proto package of the
70
+ // imported gRPC class (e.g. `cn.unipus.ucf.admin.proto.client.service`
71
+ // for `import cn.unipus.ucf.admin.proto.client.service.ContentRpcServiceGrpc`).
72
+ // Used by `scan` to build a per-file `XxxGrpc → fullPackage` map so
73
+ // consumer-side detections can carry a fully-qualified contract id
74
+ // even when the consumer repo does not contain any `.proto` files.
75
+ //
76
+ // `import static …` is excluded by tree-sitter shape: the `name:`
77
+ // field is only present on the non-static form. `import w.x.*;` is
78
+ // also excluded for the same reason — wildcard imports have an
79
+ // `asterisk` child instead of a named identifier.
80
+ const GRPC_CLASS_IMPORT_PATTERNS = compilePatterns({
81
+ name: 'java-grpc-class-import',
82
+ language: Java,
83
+ patterns: [
84
+ {
85
+ meta: {},
86
+ query: `
87
+ (import_declaration
88
+ (scoped_identifier
89
+ scope: (_) @import_pkg
90
+ name: (identifier) @import_name (#match? @import_name "Grpc$")))
91
+ `,
92
+ },
93
+ ],
94
+ });
69
95
  /**
70
96
  * Check whether a `class_declaration` node has a `@GrpcService`
71
97
  * annotation in its modifiers list. In tree-sitter-java, class-level
@@ -108,6 +134,37 @@ export const JAVA_GRPC_PLUGIN = {
108
134
  scan(tree) {
109
135
  const out = [];
110
136
  const emittedClassIds = new Set();
137
+ // ─── Build per-file gRPC class import map ───────────────────────
138
+ // Maps `XxxGrpc` (short class name) → fully-qualified proto package
139
+ // (e.g. `cn.unipus.ucf.admin.proto.client.service`). Used below to
140
+ // tag both provider and consumer detections with a `protoPackage`
141
+ // so the orchestrator can build a fully-qualified contract id
142
+ // without depending on the current repo carrying any `.proto`
143
+ // files. This is the key fix for client-jar consumer repos.
144
+ //
145
+ // Same-short-name disambiguation: when two distinct `import` lines
146
+ // bring different `XxxGrpc` classes from different packages into
147
+ // the same file (rare for grpc — the second import would be a
148
+ // compile error in Java), the last one wins. Java's compiler
149
+ // forbids that case so we don't bother modelling it.
150
+ const grpcClassImports = new Map();
151
+ for (const match of runCompiledPatterns(GRPC_CLASS_IMPORT_PATTERNS, tree)) {
152
+ const pkgNode = match.captures.import_pkg;
153
+ const nameNode = match.captures.import_name;
154
+ if (!pkgNode || !nameNode)
155
+ continue;
156
+ grpcClassImports.set(nameNode.text, pkgNode.text);
157
+ }
158
+ /**
159
+ * Resolve the fully-qualified proto package for a short service
160
+ * name in this file. Looks up `<serviceName>Grpc` in the import
161
+ * map; returns `undefined` when the class is referenced via a
162
+ * fully-qualified name on every call site (no import line) or
163
+ * when only a wildcard import is present. The orchestrator falls
164
+ * back to the per-repo proto map in that case, preserving the
165
+ * pre-fix behaviour.
166
+ */
167
+ const protoPackageFor = (serviceName) => grpcClassImports.get(`${serviceName}Grpc`);
111
168
  // ─── Providers: scoped form (`...Grpc.XxxImplBase`) ─────────────
112
169
  for (const match of runCompiledPatterns(SCOPED_IMPL_BASE_PATTERNS, tree)) {
113
170
  const classNode = match.captures.class;
@@ -119,6 +176,7 @@ export const JAVA_GRPC_PLUGIN = {
119
176
  continue;
120
177
  emittedClassIds.add(classNode.id);
121
178
  const annotated = hasGrpcServiceAnnotation(classNode);
179
+ const protoPackage = protoPackageFor(serviceName);
122
180
  out.push({
123
181
  role: 'provider',
124
182
  serviceName,
@@ -126,6 +184,7 @@ export const JAVA_GRPC_PLUGIN = {
126
184
  source: annotated ? 'java_grpc_service' : 'java_impl_base',
127
185
  confidenceWithProto: 0.8,
128
186
  confidenceWithoutProto: 0.65,
187
+ ...(protoPackage ? { protoPackage } : {}),
129
188
  });
130
189
  }
131
190
  // ─── Providers: plain form (`XxxImplBase`) ──────────────────────
@@ -141,6 +200,7 @@ export const JAVA_GRPC_PLUGIN = {
141
200
  continue;
142
201
  emittedClassIds.add(classNode.id);
143
202
  const annotated = hasGrpcServiceAnnotation(classNode);
203
+ const protoPackage = protoPackageFor(serviceName);
144
204
  out.push({
145
205
  role: 'provider',
146
206
  serviceName,
@@ -148,6 +208,7 @@ export const JAVA_GRPC_PLUGIN = {
148
208
  source: annotated ? 'java_grpc_service' : 'java_impl_base',
149
209
  confidenceWithProto: 0.8,
150
210
  confidenceWithoutProto: 0.65,
211
+ ...(protoPackage ? { protoPackage } : {}),
151
212
  });
152
213
  }
153
214
  // ─── Consumers: `XxxGrpc.newBlockingStub(...)` / `newStub(...)` ─
@@ -159,6 +220,7 @@ export const JAVA_GRPC_PLUGIN = {
159
220
  if (!grpcMatch)
160
221
  continue;
161
222
  const serviceName = grpcMatch[1];
223
+ const protoPackage = protoPackageFor(serviceName);
162
224
  out.push({
163
225
  role: 'consumer',
164
226
  serviceName,
@@ -166,6 +228,7 @@ export const JAVA_GRPC_PLUGIN = {
166
228
  source: 'java_stub',
167
229
  confidenceWithProto: 0.75,
168
230
  confidenceWithoutProto: 0.55,
231
+ ...(protoPackage ? { protoPackage } : {}),
169
232
  });
170
233
  }
171
234
  return out;
@@ -33,6 +33,18 @@ export interface GrpcDetection {
33
33
  confidenceWithProto: number;
34
34
  /** Confidence when the proto map has no entry. */
35
35
  confidenceWithoutProto: number;
36
+ /**
37
+ * Optional. Fully-qualified proto package the detection's service
38
+ * belongs to (e.g. `cn.unipus.ucf.admin.proto.client.service`),
39
+ * derived directly from the source file's import statements when
40
+ * available. When set, the orchestrator uses this package to build
41
+ * the contract id INSTEAD of consulting the per-repo proto map —
42
+ * letting consumer repos that don't carry `.proto` files (the
43
+ * client-jar architecture used by most Java gRPC microservices)
44
+ * still emit a fully-qualified contract id that matches the
45
+ * provider repo's contract id verbatim.
46
+ */
47
+ protoPackage?: string;
36
48
  }
37
49
  /**
38
50
  * One language-scoped gRPC plugin. Plugins own the tree-sitter grammar
@@ -101,9 +101,26 @@ export const processCobol = (graph, files, allPathSet) => {
101
101
  const entry = copybookMap.get(name.toUpperCase());
102
102
  return entry ? entry.path : null;
103
103
  };
104
+ // Memoize preprocessed copybook content for the duration of this
105
+ // processCobol call. A single copybook is COPYed by many programs (and at
106
+ // many COPY sites within a program); without this cache
107
+ // preprocessCobolSource would re-run once per COPY site —
108
+ // O(programs × copybooks) preprocessing passes over the same content.
109
+ // Keyed by the resolved copybook path. REPLACING is applied later by the
110
+ // expander on the returned (pre-REPLACING) content (see
111
+ // cobol-copy-expander.ts readFile→applyReplacing), so caching the
112
+ // pre-REPLACING preprocessed text here is safe and per-call-scoped.
113
+ const preprocessedCopyCache = new Map();
104
114
  const readCopy = (copyPath) => {
115
+ const cached = preprocessedCopyCache.get(copyPath);
116
+ if (cached !== undefined)
117
+ return cached;
105
118
  const content = copybookByPath.get(copyPath);
106
- return content ? preprocessCobolSource(content) : null;
119
+ if (!content)
120
+ return null; // preserves original falsy→null (missing/empty)
121
+ const preprocessed = preprocessCobolSource(content);
122
+ preprocessedCopyCache.set(copyPath, preprocessed);
123
+ return preprocessed;
107
124
  };
108
125
  // Track module names for cross-program CALL resolution
109
126
  const moduleNodeIds = new Map(); // uppercase program name -> node id
@@ -59,7 +59,7 @@ export function emitCobolScopeCaptures(sourceText, _filePath, _cachedTree) {
59
59
  ? rangeOf(progIdLine, 7, progIdLine, lines[progIdLine - 1]?.length ?? endCol)
60
60
  : rangeOf(startLine, startCol, endLine, endCol);
61
61
  const grouped = {
62
- '@scope.module': capture('@scope.module', nameRange, name),
62
+ '@scope.module': capture('@scope.module', rangeOf(startLine, startCol, endLine, endCol), name),
63
63
  '@declaration.program': capture('@declaration.program', rangeOf(startLine, startCol, endLine, endCol), name),
64
64
  '@declaration.name': capture('@declaration.name', nameRange, name),
65
65
  };
@@ -83,7 +83,7 @@ export function emitCobolScopeCaptures(sourceText, _filePath, _cachedTree) {
83
83
  ? rangeOf(progIdLine, 7, progIdLine, lines[progIdLine - 1]?.length ?? endCol)
84
84
  : rangeOf(startLine, startCol, endLine, endCol);
85
85
  const grouped = {
86
- '@scope.module': capture('@scope.module', nameRange, prog.name),
86
+ '@scope.module': capture('@scope.module', rangeOf(startLine, startCol, endLine, endCol), prog.name),
87
87
  '@declaration.program': capture('@declaration.program', rangeOf(startLine, startCol, endLine, endCol), prog.name),
88
88
  '@declaration.name': capture('@declaration.name', nameRange, prog.name),
89
89
  };
@@ -79,6 +79,7 @@ export const MIGRATED_LANGUAGES = new Set([
79
79
  SupportedLanguages.Java,
80
80
  SupportedLanguages.Rust,
81
81
  SupportedLanguages.Ruby,
82
+ SupportedLanguages.Cobol,
82
83
  ]);
83
84
  /**
84
85
  * Return the env-var name that controls a given language's registry-
@@ -81,6 +81,20 @@ export const scopeResolutionPhase = {
81
81
  for (const pf of workerParsedFiles) {
82
82
  preExtractedByPath.set(pf.filePath, pf);
83
83
  }
84
+ // Drop pre-extracted entries for standalone providers — these
85
+ // languages are skipped by the canonical guard below (line 164)
86
+ // and never consume preExtractedByPath, so holding onto their
87
+ // entries leaks memory until the cleanup loop at 262-264 which
88
+ // also never runs for skipped providers.
89
+ for (const [path] of preExtractedByPath) {
90
+ const lang = getLanguageFromFilename(path);
91
+ if (lang === null)
92
+ continue;
93
+ const provider = SCOPE_RESOLVERS.get(lang);
94
+ if (provider?.languageProvider.parseStrategy === 'standalone') {
95
+ preExtractedByPath.delete(path);
96
+ }
97
+ }
84
98
  let totalFiles = 0;
85
99
  let totalImports = 0;
86
100
  let totalRefs = 0;
@@ -114,6 +128,14 @@ export const scopeResolutionPhase = {
114
128
  for (const [lang, provider] of SCOPE_RESOLVERS) {
115
129
  if (!isRegistryPrimary(lang))
116
130
  continue;
131
+ // Standalone providers (COBOL, JCL) don't emit graph edges yet
132
+ // through the scope-resolution path. This is the canonical guard:
133
+ // runScopeResolution is never called for standalone providers, which
134
+ // keeps cobolPhase as the sole IMPORTS edge producer. Keep this guard
135
+ // in sync with any additional standalone providers added to
136
+ // SCOPE_RESOLVERS.
137
+ if (provider.languageProvider.parseStrategy === 'standalone')
138
+ continue;
117
139
  const langFiles = scannedFiles.filter((f) => getLanguageFromFilename(f.path) === lang);
118
140
  if (langFiles.length === 0)
119
141
  continue;
@@ -473,9 +473,29 @@ const processBatch = (files, onProgress) => {
473
473
  for (const [language, langFiles] of byLanguage) {
474
474
  const provider = getProvider(language);
475
475
  const queryString = provider.treeSitterQueries;
476
- if (!queryString)
476
+ if (!queryString) {
477
+ // Standalone providers (regex-based, no tree-sitter) that implement
478
+ // emitScopeCaptures feed into the scope-resolution pipeline via
479
+ // extractParsedFile directly — no tree-sitter involved.
480
+ if (provider.emitScopeCaptures) {
481
+ for (const file of langFiles) {
482
+ const parsedFile = extractParsedFile(provider, file.content, file.path, (message) => {
483
+ if (parentPort) {
484
+ parentPort.postMessage({ type: 'warning', message });
485
+ }
486
+ else {
487
+ logger.warn(message);
488
+ }
489
+ }, undefined);
490
+ if (parsedFile !== undefined) {
491
+ result.parsedFiles.push(parsedFile);
492
+ result.fileCount++;
493
+ onFileProcessed?.();
494
+ }
495
+ }
496
+ }
477
497
  continue;
478
- // Track if we need to handle tsx separately
498
+ }
479
499
  const tsxFiles = [];
480
500
  const regularFiles = [];
481
501
  if (language === SupportedLanguages.TypeScript) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.6-rc.86",
3
+ "version": "1.6.6-rc.88",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",