gitnexus 1.6.6-rc.85 → 1.6.6-rc.87

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,18 +9,22 @@ import { compilePatterns, runCompiledPatterns, unquoteLiteral, } from '../tree-s
9
9
  * named annotation arguments (`@GetMapping(value = "/x")` and
10
10
  * `@GetMapping(path = "/x")`) are supported.
11
11
  *
12
- * **Consumers** (this PR) three call-site patterns common in Kotlin
12
+ * **Consumers** — four call-site patterns common in Kotlin
13
13
  * Spring projects:
14
14
  *
15
- * 1. `restTemplate.getForObject("/x", ...)` and friends
16
- * 2. `webClient.get().uri("/x")` (short form, 1 verb hop + 1 uri hop)
17
- * 3. `Request.Builder().url("/x")` (OkHttp)
15
+ * 1. `restTemplate.getForObject("/x", ...)` and friends (#1855)
16
+ * 2. `webClient.get().uri("/x")` short form (#1855)
17
+ * 3. `Request.Builder().url("/x")` — OkHttp (#1855)
18
+ * 4. `webClient.method(HttpMethod.X).uri("/y")` — long form (this PR)
18
19
  *
19
- * The long-form `webClient.method(HttpMethod.X).uri("/y")` chain is
20
- * intentionally deferred to a follow-up: it requires walk-up logic
21
- * to recover the verb from a sibling `call_expression`, and we can
22
- * land 80% of real-world Kotlin Spring consumer coverage with the
23
- * three simpler patterns above.
20
+ * The long form puts the verb on a sibling `call_expression` two hops
21
+ * away from the path. Rather than introducing imperative walk-up logic,
22
+ * we use a single deeper tree-sitter query that matches the full chain
23
+ * structurally see `WEB_CLIENT_LONG_PATTERNS` below. The verb is
24
+ * captured directly as the `simple_identifier` of `HttpMethod.X`, so
25
+ * variable-bound verbs (`val verb = HttpMethod.PATCH; webClient.method(verb)...`)
26
+ * are intentionally NOT picked up — those need a graph-aware resolver
27
+ * and are out of scope for source-scan.
24
28
  *
25
29
  * tree-sitter-kotlin (fwcd) AST shapes used here:
26
30
  * class_declaration
@@ -96,6 +100,15 @@ const WEB_CLIENT_SHORT_TO_HTTP = {
96
100
  delete: 'DELETE',
97
101
  patch: 'PATCH',
98
102
  };
103
+ /**
104
+ * Allowed HTTP verbs for the WebClient long-form path
105
+ * `webClient.method(HttpMethod.X).uri("/y")`. Compiled once at module
106
+ * load (instead of inside the scan loop) per maintainer feedback on
107
+ * PR #1884. Mirrors the keys of `WEB_CLIENT_SHORT_TO_HTTP` above —
108
+ * keeping HEAD/OPTIONS/TRACE intentionally excluded for symmetry
109
+ * with the short form and the Java plugin.
110
+ */
111
+ const WEB_CLIENT_LONG_VERB_RE = /^(GET|POST|PUT|DELETE|PATCH)$/;
99
112
  /**
100
113
  * Build the plugin only if the Kotlin grammar is available. Compiling
101
114
  * the queries against a null grammar would throw at module load time
@@ -249,8 +262,9 @@ function buildKotlinPlugin(language) {
249
262
  // - outer call's first value_argument is a string literal
250
263
  //
251
264
  // The long-form `webClient.method(HttpMethod.GET).uri("/x")` chain
252
- // uses an extra navigation hop and an enum field access — it's
253
- // intentionally out of scope here (see file header).
265
+ // uses an extra navigation hop and an enum field access — handled
266
+ // by `WEB_CLIENT_LONG_PATTERNS` below, separately so each query is
267
+ // straightforward to reason about.
254
268
  const WEB_CLIENT_SHORT_PATTERNS = compilePatterns({
255
269
  name: 'kotlin-web-client-short',
256
270
  language,
@@ -273,6 +287,58 @@ function buildKotlinPlugin(language) {
273
287
  },
274
288
  ],
275
289
  });
290
+ // ─── Consumer: Spring WebClient (long form) ───────────────────────────
291
+ // The fluent long form passes the verb as a `HttpMethod.X` enum field
292
+ // access through `.method(...)`, then carries the path on a separate
293
+ // `.uri(...)` hop further down the chain:
294
+ //
295
+ // webClient.method(HttpMethod.GET).uri("/x").retrieve().awaitBody<T>()
296
+ //
297
+ // Compared to the short form there are two extra structural hops:
298
+ // - the inner `.method(...)` `call_expression` has a `value_argument`
299
+ // whose payload is itself a `navigation_expression` (HttpMethod → .GET)
300
+ // - the outer `.uri(...)` is reached via one more
301
+ // `navigation_expression` wrapping that inner call
302
+ //
303
+ // We capture the verb at the `simple_identifier` under `HttpMethod`'s
304
+ // `navigation_suffix`. That `simple_identifier` is the literal field
305
+ // name (`GET`, `POST`, ...) used in source — Kotlin enum fields by
306
+ // convention are upper-case, matching `HttpMethod` from
307
+ // `org.springframework.http`. We forward the captured text as-is.
308
+ //
309
+ // Variable-bound verbs (`val verb = HttpMethod.PATCH; webClient.method(verb)...`)
310
+ // do NOT match — they fail the `(navigation_expression ...)` shape
311
+ // because the value_argument carries a bare `simple_identifier` instead
312
+ // of a `HttpMethod.X` field access. This is intentional: source-scan
313
+ // can't follow the binding without graph context. Pinned by an
314
+ // anti-overreach test in the consumer suite.
315
+ const WEB_CLIENT_LONG_PATTERNS = compilePatterns({
316
+ name: 'kotlin-web-client-long',
317
+ language,
318
+ patterns: [
319
+ {
320
+ meta: {},
321
+ query: `
322
+ (call_expression
323
+ (navigation_expression
324
+ (call_expression
325
+ (navigation_expression
326
+ (simple_identifier) @obj (#eq? @obj "webClient")
327
+ (navigation_suffix
328
+ (simple_identifier) @method_call (#eq? @method_call "method")))
329
+ (call_suffix
330
+ (value_arguments
331
+ . (value_argument
332
+ (navigation_expression
333
+ (simple_identifier) @httpMethodCls (#eq? @httpMethodCls "HttpMethod")
334
+ (navigation_suffix (simple_identifier) @verb))))))
335
+ (navigation_suffix (simple_identifier) @uri (#eq? @uri "uri")))
336
+ (call_suffix
337
+ (value_arguments . (value_argument . (string_literal) @path))))
338
+ `,
339
+ },
340
+ ],
341
+ });
276
342
  // ─── Consumer: OkHttp Request.Builder().url("/x") ─────────────────────
277
343
  // Kotlin parses `Request.Builder()` as a `call_expression` whose
278
344
  // callee is a `navigation_expression` (Request → .Builder), NOT as
@@ -425,6 +491,35 @@ function buildKotlinPlugin(language) {
425
491
  confidence: 0.7,
426
492
  });
427
493
  }
494
+ // ─── Consumers: WebClient long form (.method(HttpMethod.X) → .uri) ─
495
+ for (const match of runCompiledPatterns(WEB_CLIENT_LONG_PATTERNS, tree)) {
496
+ const verbNode = match.captures.verb;
497
+ const pathNode = match.captures.path;
498
+ if (!verbNode || !pathNode)
499
+ continue;
500
+ // The captured text is the literal `HttpMethod.X` field name.
501
+ // Spring's `org.springframework.http.HttpMethod` defines GET,
502
+ // POST, PUT, DELETE, PATCH, HEAD, OPTIONS, TRACE — we only
503
+ // emit for the five verbs we already handle elsewhere, so
504
+ // exotic ones are silently skipped (consistent with the
505
+ // short form's WEB_CLIENT_SHORT_TO_HTTP guard). The accepted
506
+ // verb regex is hoisted to module scope (see
507
+ // `WEB_CLIENT_LONG_VERB_RE` near the top of this file).
508
+ const verbText = verbNode.text;
509
+ if (!WEB_CLIENT_LONG_VERB_RE.test(verbText))
510
+ continue;
511
+ const path = unquoteLiteral(pathNode.text);
512
+ if (path === null)
513
+ continue;
514
+ out.push({
515
+ role: 'consumer',
516
+ framework: 'spring-web-client',
517
+ method: verbText,
518
+ path,
519
+ name: null,
520
+ confidence: 0.7,
521
+ });
522
+ }
428
523
  // ─── Consumers: OkHttp Request.Builder().url("path") ────────────
429
524
  for (const match of runCompiledPatterns(OK_HTTP_PATTERNS, tree)) {
430
525
  const pathNode = match.captures.path;
@@ -101,9 +101,26 @@ export const processCobol = (graph, files, allPathSet) => {
101
101
  const entry = copybookMap.get(name.toUpperCase());
102
102
  return entry ? entry.path : null;
103
103
  };
104
+ // Memoize preprocessed copybook content for the duration of this
105
+ // processCobol call. A single copybook is COPYed by many programs (and at
106
+ // many COPY sites within a program); without this cache
107
+ // preprocessCobolSource would re-run once per COPY site —
108
+ // O(programs × copybooks) preprocessing passes over the same content.
109
+ // Keyed by the resolved copybook path. REPLACING is applied later by the
110
+ // expander on the returned (pre-REPLACING) content (see
111
+ // cobol-copy-expander.ts readFile→applyReplacing), so caching the
112
+ // pre-REPLACING preprocessed text here is safe and per-call-scoped.
113
+ const preprocessedCopyCache = new Map();
104
114
  const readCopy = (copyPath) => {
115
+ const cached = preprocessedCopyCache.get(copyPath);
116
+ if (cached !== undefined)
117
+ return cached;
105
118
  const content = copybookByPath.get(copyPath);
106
- return content ? preprocessCobolSource(content) : null;
119
+ if (!content)
120
+ return null; // preserves original falsy→null (missing/empty)
121
+ const preprocessed = preprocessCobolSource(content);
122
+ preprocessedCopyCache.set(copyPath, preprocessed);
123
+ return preprocessed;
107
124
  };
108
125
  // Track module names for cross-program CALL resolution
109
126
  const moduleNodeIds = new Map(); // uppercase program name -> node id
@@ -59,7 +59,7 @@ export function emitCobolScopeCaptures(sourceText, _filePath, _cachedTree) {
59
59
  ? rangeOf(progIdLine, 7, progIdLine, lines[progIdLine - 1]?.length ?? endCol)
60
60
  : rangeOf(startLine, startCol, endLine, endCol);
61
61
  const grouped = {
62
- '@scope.module': capture('@scope.module', nameRange, name),
62
+ '@scope.module': capture('@scope.module', rangeOf(startLine, startCol, endLine, endCol), name),
63
63
  '@declaration.program': capture('@declaration.program', rangeOf(startLine, startCol, endLine, endCol), name),
64
64
  '@declaration.name': capture('@declaration.name', nameRange, name),
65
65
  };
@@ -83,7 +83,7 @@ export function emitCobolScopeCaptures(sourceText, _filePath, _cachedTree) {
83
83
  ? rangeOf(progIdLine, 7, progIdLine, lines[progIdLine - 1]?.length ?? endCol)
84
84
  : rangeOf(startLine, startCol, endLine, endCol);
85
85
  const grouped = {
86
- '@scope.module': capture('@scope.module', nameRange, prog.name),
86
+ '@scope.module': capture('@scope.module', rangeOf(startLine, startCol, endLine, endCol), prog.name),
87
87
  '@declaration.program': capture('@declaration.program', rangeOf(startLine, startCol, endLine, endCol), prog.name),
88
88
  '@declaration.name': capture('@declaration.name', nameRange, prog.name),
89
89
  };
@@ -79,6 +79,7 @@ export const MIGRATED_LANGUAGES = new Set([
79
79
  SupportedLanguages.Java,
80
80
  SupportedLanguages.Rust,
81
81
  SupportedLanguages.Ruby,
82
+ SupportedLanguages.Cobol,
82
83
  ]);
83
84
  /**
84
85
  * Return the env-var name that controls a given language's registry-
@@ -81,6 +81,20 @@ export const scopeResolutionPhase = {
81
81
  for (const pf of workerParsedFiles) {
82
82
  preExtractedByPath.set(pf.filePath, pf);
83
83
  }
84
+ // Drop pre-extracted entries for standalone providers — these
85
+ // languages are skipped by the canonical guard below (line 164)
86
+ // and never consume preExtractedByPath, so holding onto their
87
+ // entries leaks memory until the cleanup loop at 262-264 which
88
+ // also never runs for skipped providers.
89
+ for (const [path] of preExtractedByPath) {
90
+ const lang = getLanguageFromFilename(path);
91
+ if (lang === null)
92
+ continue;
93
+ const provider = SCOPE_RESOLVERS.get(lang);
94
+ if (provider?.languageProvider.parseStrategy === 'standalone') {
95
+ preExtractedByPath.delete(path);
96
+ }
97
+ }
84
98
  let totalFiles = 0;
85
99
  let totalImports = 0;
86
100
  let totalRefs = 0;
@@ -114,6 +128,14 @@ export const scopeResolutionPhase = {
114
128
  for (const [lang, provider] of SCOPE_RESOLVERS) {
115
129
  if (!isRegistryPrimary(lang))
116
130
  continue;
131
+ // Standalone providers (COBOL, JCL) don't emit graph edges yet
132
+ // through the scope-resolution path. This is the canonical guard:
133
+ // runScopeResolution is never called for standalone providers, which
134
+ // keeps cobolPhase as the sole IMPORTS edge producer. Keep this guard
135
+ // in sync with any additional standalone providers added to
136
+ // SCOPE_RESOLVERS.
137
+ if (provider.languageProvider.parseStrategy === 'standalone')
138
+ continue;
117
139
  const langFiles = scannedFiles.filter((f) => getLanguageFromFilename(f.path) === lang);
118
140
  if (langFiles.length === 0)
119
141
  continue;
@@ -473,9 +473,29 @@ const processBatch = (files, onProgress) => {
473
473
  for (const [language, langFiles] of byLanguage) {
474
474
  const provider = getProvider(language);
475
475
  const queryString = provider.treeSitterQueries;
476
- if (!queryString)
476
+ if (!queryString) {
477
+ // Standalone providers (regex-based, no tree-sitter) that implement
478
+ // emitScopeCaptures feed into the scope-resolution pipeline via
479
+ // extractParsedFile directly — no tree-sitter involved.
480
+ if (provider.emitScopeCaptures) {
481
+ for (const file of langFiles) {
482
+ const parsedFile = extractParsedFile(provider, file.content, file.path, (message) => {
483
+ if (parentPort) {
484
+ parentPort.postMessage({ type: 'warning', message });
485
+ }
486
+ else {
487
+ logger.warn(message);
488
+ }
489
+ }, undefined);
490
+ if (parsedFile !== undefined) {
491
+ result.parsedFiles.push(parsedFile);
492
+ result.fileCount++;
493
+ onFileProcessed?.();
494
+ }
495
+ }
496
+ }
477
497
  continue;
478
- // Track if we need to handle tsx separately
498
+ }
479
499
  const tsxFiles = [];
480
500
  const regularFiles = [];
481
501
  if (language === SupportedLanguages.TypeScript) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.6-rc.85",
3
+ "version": "1.6.6-rc.87",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",