gitnexus 1.6.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. package/dist/cli/analyze.js +28 -3
  2. package/dist/core/group/extractors/fs-utils.d.ts +10 -0
  3. package/dist/core/group/extractors/fs-utils.js +24 -0
  4. package/dist/core/group/extractors/grpc-extractor.d.ts +17 -8
  5. package/dist/core/group/extractors/grpc-extractor.js +313 -191
  6. package/dist/core/group/extractors/grpc-patterns/go.d.ts +2 -0
  7. package/dist/core/group/extractors/grpc-patterns/go.js +97 -0
  8. package/dist/core/group/extractors/grpc-patterns/index.d.ts +19 -0
  9. package/dist/core/group/extractors/grpc-patterns/index.js +46 -0
  10. package/dist/core/group/extractors/grpc-patterns/java.d.ts +2 -0
  11. package/dist/core/group/extractors/grpc-patterns/java.js +173 -0
  12. package/dist/core/group/extractors/grpc-patterns/node.d.ts +4 -0
  13. package/dist/core/group/extractors/grpc-patterns/node.js +290 -0
  14. package/dist/core/group/extractors/grpc-patterns/proto.d.ts +9 -0
  15. package/dist/core/group/extractors/grpc-patterns/proto.js +134 -0
  16. package/dist/core/group/extractors/grpc-patterns/python.d.ts +2 -0
  17. package/dist/core/group/extractors/grpc-patterns/python.js +67 -0
  18. package/dist/core/group/extractors/grpc-patterns/types.d.ts +50 -0
  19. package/dist/core/group/extractors/grpc-patterns/types.js +1 -0
  20. package/dist/core/group/extractors/http-patterns/go.d.ts +2 -0
  21. package/dist/core/group/extractors/http-patterns/go.js +215 -0
  22. package/dist/core/group/extractors/http-patterns/index.d.ts +17 -0
  23. package/dist/core/group/extractors/http-patterns/index.js +44 -0
  24. package/dist/core/group/extractors/http-patterns/java.d.ts +2 -0
  25. package/dist/core/group/extractors/http-patterns/java.js +253 -0
  26. package/dist/core/group/extractors/http-patterns/node.d.ts +4 -0
  27. package/dist/core/group/extractors/http-patterns/node.js +354 -0
  28. package/dist/core/group/extractors/http-patterns/php.d.ts +2 -0
  29. package/dist/core/group/extractors/http-patterns/php.js +70 -0
  30. package/dist/core/group/extractors/http-patterns/python.d.ts +2 -0
  31. package/dist/core/group/extractors/http-patterns/python.js +133 -0
  32. package/dist/core/group/extractors/http-patterns/types.d.ts +61 -0
  33. package/dist/core/group/extractors/http-patterns/types.js +1 -0
  34. package/dist/core/group/extractors/http-route-extractor.d.ts +10 -13
  35. package/dist/core/group/extractors/http-route-extractor.js +201 -238
  36. package/dist/core/group/extractors/manifest-extractor.d.ts +54 -0
  37. package/dist/core/group/extractors/manifest-extractor.js +235 -0
  38. package/dist/core/group/extractors/topic-extractor.d.ts +0 -1
  39. package/dist/core/group/extractors/topic-extractor.js +55 -192
  40. package/dist/core/group/extractors/topic-patterns/go.d.ts +2 -0
  41. package/dist/core/group/extractors/topic-patterns/go.js +120 -0
  42. package/dist/core/group/extractors/topic-patterns/index.d.ts +14 -0
  43. package/dist/core/group/extractors/topic-patterns/index.js +38 -0
  44. package/dist/core/group/extractors/topic-patterns/java.d.ts +2 -0
  45. package/dist/core/group/extractors/topic-patterns/java.js +80 -0
  46. package/dist/core/group/extractors/topic-patterns/node.d.ts +4 -0
  47. package/dist/core/group/extractors/topic-patterns/node.js +155 -0
  48. package/dist/core/group/extractors/topic-patterns/python.d.ts +2 -0
  49. package/dist/core/group/extractors/topic-patterns/python.js +116 -0
  50. package/dist/core/group/extractors/topic-patterns/types.d.ts +25 -0
  51. package/dist/core/group/extractors/topic-patterns/types.js +10 -0
  52. package/dist/core/group/extractors/tree-sitter-scanner.d.ts +113 -0
  53. package/dist/core/group/extractors/tree-sitter-scanner.js +94 -0
  54. package/dist/core/ingestion/binding-accumulator.d.ts +22 -17
  55. package/dist/core/ingestion/binding-accumulator.js +29 -25
  56. package/dist/core/ingestion/cobol-processor.d.ts +1 -1
  57. package/dist/core/ingestion/import-processor.js +1 -1
  58. package/dist/core/ingestion/language-config.js +1 -1
  59. package/dist/core/ingestion/language-provider.d.ts +8 -0
  60. package/dist/core/ingestion/languages/ruby.js +15 -0
  61. package/dist/core/ingestion/markdown-processor.d.ts +1 -1
  62. package/dist/core/ingestion/method-extractors/configs/jvm.js +1 -0
  63. package/dist/core/ingestion/method-extractors/configs/ruby.js +1 -0
  64. package/dist/core/ingestion/method-extractors/generic.d.ts +6 -0
  65. package/dist/core/ingestion/method-extractors/generic.js +48 -4
  66. package/dist/core/ingestion/method-types.d.ts +4 -0
  67. package/dist/core/ingestion/model/resolve.js +103 -48
  68. package/dist/core/ingestion/model/semantic-model.d.ts +1 -1
  69. package/dist/core/ingestion/model/semantic-model.js +1 -1
  70. package/dist/core/ingestion/model/symbol-table.d.ts +7 -7
  71. package/dist/core/ingestion/model/symbol-table.js +7 -7
  72. package/dist/core/ingestion/mro-processor.d.ts +1 -1
  73. package/dist/core/ingestion/mro-processor.js +1 -1
  74. package/dist/core/ingestion/parsing-processor.js +54 -42
  75. package/dist/core/ingestion/pipeline-phases/cobol.d.ts +16 -0
  76. package/dist/core/ingestion/pipeline-phases/cobol.js +45 -0
  77. package/dist/core/ingestion/pipeline-phases/communities.d.ts +16 -0
  78. package/dist/core/ingestion/pipeline-phases/communities.js +62 -0
  79. package/dist/core/ingestion/pipeline-phases/cross-file-impl.d.ts +17 -0
  80. package/dist/core/ingestion/pipeline-phases/cross-file-impl.js +156 -0
  81. package/dist/core/ingestion/pipeline-phases/cross-file.d.ts +37 -0
  82. package/dist/core/ingestion/pipeline-phases/cross-file.js +63 -0
  83. package/dist/core/ingestion/pipeline-phases/index.d.ts +21 -0
  84. package/dist/core/ingestion/pipeline-phases/index.js +22 -0
  85. package/dist/core/ingestion/pipeline-phases/markdown.d.ts +17 -0
  86. package/dist/core/ingestion/pipeline-phases/markdown.js +33 -0
  87. package/dist/core/ingestion/pipeline-phases/mro.d.ts +18 -0
  88. package/dist/core/ingestion/pipeline-phases/mro.js +36 -0
  89. package/dist/core/ingestion/pipeline-phases/orm-extraction.d.ts +22 -0
  90. package/dist/core/ingestion/pipeline-phases/orm-extraction.js +92 -0
  91. package/dist/core/ingestion/pipeline-phases/orm.d.ts +15 -0
  92. package/dist/core/ingestion/pipeline-phases/orm.js +74 -0
  93. package/dist/core/ingestion/pipeline-phases/parse-impl.d.ts +47 -0
  94. package/dist/core/ingestion/pipeline-phases/parse-impl.js +437 -0
  95. package/dist/core/ingestion/pipeline-phases/parse.d.ts +49 -0
  96. package/dist/core/ingestion/pipeline-phases/parse.js +33 -0
  97. package/dist/core/ingestion/pipeline-phases/processes.d.ts +16 -0
  98. package/dist/core/ingestion/pipeline-phases/processes.js +143 -0
  99. package/dist/core/ingestion/pipeline-phases/routes.d.ts +21 -0
  100. package/dist/core/ingestion/pipeline-phases/routes.js +243 -0
  101. package/dist/core/ingestion/pipeline-phases/runner.d.ts +22 -0
  102. package/dist/core/ingestion/pipeline-phases/runner.js +203 -0
  103. package/dist/core/ingestion/pipeline-phases/scan.d.ts +21 -0
  104. package/dist/core/ingestion/pipeline-phases/scan.js +46 -0
  105. package/dist/core/ingestion/pipeline-phases/structure.d.ts +27 -0
  106. package/dist/core/ingestion/pipeline-phases/structure.js +35 -0
  107. package/dist/core/ingestion/pipeline-phases/tools.d.ts +20 -0
  108. package/dist/core/ingestion/pipeline-phases/tools.js +79 -0
  109. package/dist/core/ingestion/pipeline-phases/types.d.ts +79 -0
  110. package/dist/core/ingestion/pipeline-phases/types.js +37 -0
  111. package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts +35 -0
  112. package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js +174 -0
  113. package/dist/core/ingestion/pipeline.d.ts +16 -10
  114. package/dist/core/ingestion/pipeline.js +66 -1534
  115. package/dist/core/ingestion/process-processor.js +1 -1
  116. package/dist/core/ingestion/tree-sitter-queries.d.ts +2 -2
  117. package/dist/core/ingestion/tree-sitter-queries.js +69 -0
  118. package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -3
  119. package/dist/core/ingestion/utils/ast-helpers.js +48 -21
  120. package/dist/core/ingestion/utils/env.d.ts +10 -0
  121. package/dist/core/ingestion/utils/env.js +10 -0
  122. package/dist/core/ingestion/utils/graph-sort.d.ts +58 -0
  123. package/dist/core/ingestion/utils/graph-sort.js +100 -0
  124. package/dist/core/ingestion/workers/parse-worker.js +12 -8
  125. package/dist/core/lbug/lbug-adapter.js +66 -24
  126. package/package.json +3 -3
  127. package/vendor/tree-sitter-proto/binding.gyp +30 -0
  128. package/vendor/tree-sitter-proto/bindings/node/binding.cc +20 -0
  129. package/vendor/tree-sitter-proto/bindings/node/index.d.ts +28 -0
  130. package/vendor/tree-sitter-proto/bindings/node/index.js +7 -0
  131. package/vendor/tree-sitter-proto/package.json +18 -0
  132. package/vendor/tree-sitter-proto/src/node-types.json +1145 -0
  133. package/vendor/tree-sitter-proto/src/parser.c +10149 -0
  134. package/vendor/tree-sitter-proto/src/tree_sitter/alloc.h +54 -0
  135. package/vendor/tree-sitter-proto/src/tree_sitter/array.h +291 -0
  136. package/vendor/tree-sitter-proto/src/tree_sitter/parser.h +266 -0
@@ -1,19 +1,29 @@
1
- import * as fs from 'node:fs';
2
1
  import * as path from 'node:path';
3
2
  import { glob } from 'glob';
4
- function readSafe(repoPath, rel) {
5
- const abs = path.resolve(repoPath, rel);
6
- const base = path.resolve(repoPath);
7
- const relToBase = path.relative(base, abs);
8
- if (relToBase.startsWith('..') || path.isAbsolute(relToBase))
9
- return null;
10
- try {
11
- return fs.readFileSync(abs, 'utf-8');
12
- }
13
- catch {
14
- return null;
15
- }
16
- }
3
+ import Parser from 'tree-sitter';
4
+ import { readSafe } from './fs-utils.js';
5
+ import { GRPC_SCAN_GLOB, getPluginForFile, hasProtoPlugin, } from './grpc-patterns/index.js';
6
+ /**
7
+ * Language-agnostic orchestrator for gRPC (provider + consumer) contract
8
+ * extraction.
9
+ *
10
+ * Two parts:
11
+ *
12
+ * 1. **`.proto` parsing** — tree-sitter when `tree-sitter-proto` is
13
+ * installed (optionalDependency vendored in `vendor/tree-sitter-proto/`),
14
+ * via the `.proto` entry in `grpc-patterns/` and `hasProtoPlugin`.
15
+ * When the grammar isn't available (platform incompatibility, native
16
+ * build failure) the orchestrator falls back to the in-process
17
+ * string-sanitizing parser defined below (`stripProtoCommentsAndStrings`
18
+ * + `extractServiceBlocks`). The fallback preserves offsets so any
19
+ * downstream regex scans run against a sanitized copy without
20
+ * affecting line numbers of the original.
21
+ *
22
+ * 2. **Source-scan providers / consumers** — delegated to per-language
23
+ * plugins in `./grpc-patterns/`. The orchestrator imports NO
24
+ * tree-sitter grammars or query strings — each plugin owns its own.
25
+ */
26
+ // ─── .proto fallback parser (used only when tree-sitter-proto is absent) ───
17
27
  function contractId(pkg, service, method) {
18
28
  const prefix = pkg ? `${pkg}.${service}` : service;
19
29
  return `grpc::${prefix}/${method}`;
@@ -21,18 +31,103 @@ function contractId(pkg, service, method) {
21
31
  function serviceOnlyContractId(serviceName) {
22
32
  return `grpc::${serviceName}/*`;
23
33
  }
34
+ /**
35
+ * Replace all .proto comments and string literals with spaces, preserving the
36
+ * original length and character offsets of the input. This lets downstream
37
+ * regex / brace-depth parsers run on a "sanitized" copy without having to
38
+ * understand proto syntax, while any RegExp.exec/index-based lookups that
39
+ * were already positional against `content` continue to work against the
40
+ * original string.
41
+ *
42
+ * Supported comment forms: `// line comment`, `/* block comment * /`.
43
+ * Supported strings: double-quoted ("…") and single-quoted ('…') with `\`
44
+ * escape handling. Raw/unterminated strings are not supported — we stop
45
+ * on a line break for line-style comments and on EOF for unterminated
46
+ * strings/blocks, which matches how most real proto files parse.
47
+ */
48
+ function stripProtoCommentsAndStrings(content) {
49
+ const out = new Array(content.length);
50
+ let i = 0;
51
+ while (i < content.length) {
52
+ const ch = content[i];
53
+ const next = content[i + 1];
54
+ // Line comment: // ... \n
55
+ if (ch === '/' && next === '/') {
56
+ out[i] = ' ';
57
+ out[i + 1] = ' ';
58
+ i += 2;
59
+ while (i < content.length && content[i] !== '\n') {
60
+ out[i] = content[i] === '\r' ? '\r' : ' ';
61
+ i++;
62
+ }
63
+ continue;
64
+ }
65
+ // Block comment: /* ... */
66
+ if (ch === '/' && next === '*') {
67
+ out[i] = ' ';
68
+ out[i + 1] = ' ';
69
+ i += 2;
70
+ while (i < content.length) {
71
+ if (content[i] === '*' && content[i + 1] === '/') {
72
+ out[i] = ' ';
73
+ out[i + 1] = ' ';
74
+ i += 2;
75
+ break;
76
+ }
77
+ // Preserve newlines so line numbers stay stable for downstream code.
78
+ out[i] = content[i] === '\n' || content[i] === '\r' ? content[i] : ' ';
79
+ i++;
80
+ }
81
+ continue;
82
+ }
83
+ // String literal: "..." or '...'
84
+ if (ch === '"' || ch === "'") {
85
+ const quote = ch;
86
+ out[i] = ' '; // replace opening quote
87
+ i++;
88
+ while (i < content.length) {
89
+ const c = content[i];
90
+ if (c === '\\' && i + 1 < content.length) {
91
+ // Skip escaped pair (e.g. \" \n \\)
92
+ out[i] = ' ';
93
+ out[i + 1] = ' ';
94
+ i += 2;
95
+ continue;
96
+ }
97
+ if (c === quote) {
98
+ out[i] = ' ';
99
+ i++;
100
+ break;
101
+ }
102
+ // Preserve newlines; proto technically disallows unescaped newlines
103
+ // inside strings, but real files occasionally have them.
104
+ out[i] = c === '\n' || c === '\r' ? c : ' ';
105
+ i++;
106
+ }
107
+ continue;
108
+ }
109
+ out[i] = ch;
110
+ i++;
111
+ }
112
+ return out.join('');
113
+ }
24
114
  function extractServiceBlocks(content) {
25
115
  const results = [];
26
- // v1: brace-depth only — braces inside comments or string literals are not filtered (see spec Fix 2)
116
+ // Sanitize comments and string literals so braces inside them don't
117
+ // throw off the depth counter. The sanitized copy has the same length
118
+ // and offsets as the original, so we use it ONLY to scan for service
119
+ // headers and braces; the service body we return is sliced from the
120
+ // ORIGINAL content to preserve exact source text for downstream use.
121
+ const sanitized = stripProtoCommentsAndStrings(content);
27
122
  const headerRe = /service\s+(\w+)\s*\{/g;
28
123
  let headerMatch;
29
- while ((headerMatch = headerRe.exec(content)) !== null) {
124
+ while ((headerMatch = headerRe.exec(sanitized)) !== null) {
30
125
  const serviceName = headerMatch[1];
31
126
  const bodyStart = headerMatch.index + headerMatch[0].length;
32
127
  let depth = 1;
33
128
  let pos = bodyStart;
34
- while (pos < content.length && depth > 0) {
35
- const ch = content[pos];
129
+ while (pos < sanitized.length && depth > 0) {
130
+ const ch = sanitized[pos];
36
131
  if (ch === '{')
37
132
  depth++;
38
133
  else if (ch === '}')
@@ -60,6 +155,137 @@ function makeContract(cid, role, filePath, symbolName, confidence, meta) {
60
155
  meta: { ...meta, extractionStrategy: 'source_scan' },
61
156
  };
62
157
  }
158
+ function normalizeProtoPath(rel) {
159
+ return rel.replace(/\\/g, '/');
160
+ }
161
+ function extractProtoImports(content) {
162
+ const imports = [];
163
+ const re = /^\s*import\s+"([^"]+)"\s*;/gm;
164
+ let match;
165
+ while ((match = re.exec(content)) !== null) {
166
+ imports.push(match[1]);
167
+ }
168
+ return imports;
169
+ }
170
+ function longestSharedSegmentRun(aPath, bPath) {
171
+ const a = aPath.split('/').filter(Boolean);
172
+ const b = bPath.split('/').filter(Boolean);
173
+ let best = 0;
174
+ for (let i = 0; i < a.length; i++) {
175
+ for (let j = 0; j < b.length; j++) {
176
+ let run = 0;
177
+ while (a[i + run] && b[j + run] && a[i + run] === b[j + run]) {
178
+ run++;
179
+ }
180
+ if (run > best)
181
+ best = run;
182
+ }
183
+ }
184
+ return best;
185
+ }
186
+ async function buildProtoContext(repoPath) {
187
+ const servicesByName = new Map();
188
+ const protoFiles = await glob('**/*.proto', {
189
+ cwd: repoPath,
190
+ absolute: false,
191
+ nodir: true,
192
+ ignore: ['**/node_modules/**', '**/.git/**', '**/vendor/**'],
193
+ });
194
+ const contents = new Map();
195
+ for (const rel of protoFiles) {
196
+ const content = readSafe(repoPath, rel);
197
+ if (!content)
198
+ continue;
199
+ contents.set(normalizeProtoPath(rel), content);
200
+ }
201
+ const packagesByProto = new Map();
202
+ const resolvePackage = (protoPath, seen = new Set()) => {
203
+ if (packagesByProto.has(protoPath))
204
+ return packagesByProto.get(protoPath) ?? '';
205
+ if (seen.has(protoPath))
206
+ return '';
207
+ const content = contents.get(protoPath);
208
+ if (!content)
209
+ return '';
210
+ seen.add(protoPath);
211
+ const pkgMatch = content.match(/^\s*package\s+([\w.]+)\s*;/m);
212
+ if (pkgMatch?.[1]) {
213
+ packagesByProto.set(protoPath, pkgMatch[1]);
214
+ return pkgMatch[1];
215
+ }
216
+ for (const importPath of extractProtoImports(content)) {
217
+ const normalizedImport = normalizeProtoPath(importPath);
218
+ const candidates = [
219
+ normalizeProtoPath(path.posix.normalize(path.posix.join(path.posix.dirname(protoPath), normalizedImport))),
220
+ normalizedImport,
221
+ ];
222
+ for (const candidate of candidates) {
223
+ if (!contents.has(candidate))
224
+ continue;
225
+ const inheritedPackage = resolvePackage(candidate, seen);
226
+ if (inheritedPackage) {
227
+ packagesByProto.set(protoPath, inheritedPackage);
228
+ return inheritedPackage;
229
+ }
230
+ }
231
+ }
232
+ packagesByProto.set(protoPath, '');
233
+ return '';
234
+ };
235
+ for (const rel of protoFiles) {
236
+ const normalizedRel = normalizeProtoPath(rel);
237
+ const content = contents.get(normalizedRel);
238
+ if (!content)
239
+ continue;
240
+ const pkg = resolvePackage(normalizedRel);
241
+ const serviceBlocks = extractServiceBlocks(content);
242
+ for (const block of serviceBlocks) {
243
+ const rpcRe = /rpc\s+(\w+)\s*\(/g;
244
+ const methods = [];
245
+ let m;
246
+ while ((m = rpcRe.exec(block.body)) !== null) {
247
+ methods.push(m[1]);
248
+ }
249
+ const info = {
250
+ package: pkg,
251
+ serviceName: block.name,
252
+ methods,
253
+ protoPath: normalizedRel,
254
+ };
255
+ const existing = servicesByName.get(block.name) ?? [];
256
+ existing.push(info);
257
+ servicesByName.set(block.name, existing);
258
+ }
259
+ }
260
+ return { packagesByProto, servicesByName };
261
+ }
262
+ export async function buildProtoMap(repoPath) {
263
+ const { servicesByName } = await buildProtoContext(repoPath);
264
+ return servicesByName;
265
+ }
266
+ export function resolveProtoConflict(_serviceName, sourceFilePath, candidates) {
267
+ if (candidates.length === 0)
268
+ return null;
269
+ if (candidates.length === 1)
270
+ return candidates[0];
271
+ const sourceDir = normalizeProtoPath(path.dirname(sourceFilePath));
272
+ let best = candidates[0];
273
+ let bestScore = -1;
274
+ for (const c of candidates) {
275
+ const protoDir = normalizeProtoPath(path.dirname(c.protoPath));
276
+ const sharedRun = longestSharedSegmentRun(sourceDir, protoDir);
277
+ if (sharedRun > bestScore) {
278
+ bestScore = sharedRun;
279
+ best = c;
280
+ }
281
+ }
282
+ return best;
283
+ }
284
+ export function serviceContractId(pkg, serviceName) {
285
+ const prefix = pkg ? `${pkg}.${serviceName}` : serviceName;
286
+ return `grpc::${prefix}/*`;
287
+ }
288
+ // ─── Orchestrator ────────────────────────────────────────────────────
63
289
  export class GrpcExtractor {
64
290
  type = 'grpc';
65
291
  async canExtract(_repo) {
@@ -67,198 +293,94 @@ export class GrpcExtractor {
67
293
  }
68
294
  async extract(_dbExecutor, repoPath, _repo) {
69
295
  const out = [];
70
- // Proto files definitive provider source
71
- const protoFiles = await glob('**/*.proto', {
72
- cwd: repoPath,
73
- ignore: ['**/node_modules/**', '**/.git/**', '**/vendor/**'],
74
- nodir: true,
75
- });
76
- for (const rel of protoFiles) {
77
- const content = readSafe(repoPath, rel);
78
- if (content)
79
- out.push(...this.parseProtoFile(content, rel));
296
+ const protoContext = await buildProtoContext(repoPath);
297
+ const protoMap = protoContext.servicesByName;
298
+ // ─── Proto files — definitive provider source ─────────────────
299
+ // When tree-sitter-proto is available, .proto files are handled by
300
+ // the plugin loop below (they're in GRPC_SCAN_GLOB). Otherwise
301
+ // emit provider contracts directly from the proto map that
302
+ // `buildProtoContext` already built no second glob / parse pass.
303
+ if (!hasProtoPlugin) {
304
+ for (const infos of protoMap.values()) {
305
+ for (const info of infos) {
306
+ for (const methodName of info.methods) {
307
+ const cid = contractId(info.package, info.serviceName, methodName);
308
+ out.push(makeContract(cid, 'provider', info.protoPath, `${info.serviceName}.${methodName}`, 0.85, {
309
+ package: info.package,
310
+ service: info.serviceName,
311
+ method: methodName,
312
+ source: 'proto',
313
+ }));
314
+ }
315
+ }
316
+ }
80
317
  }
81
- // Source files server/client detection
82
- const sourceFiles = await glob('**/*.{go,java,py,ts,tsx,js,jsx}', {
318
+ // ─── Source files (+ .proto when plugin available) ────────────
319
+ const sourceFiles = await glob(GRPC_SCAN_GLOB, {
83
320
  cwd: repoPath,
84
321
  ignore: ['**/node_modules/**', '**/.git/**', '**/vendor/**', '**/dist/**', '**/build/**'],
85
322
  nodir: true,
86
323
  });
324
+ const parser = new Parser();
87
325
  for (const rel of sourceFiles) {
326
+ const plugin = getPluginForFile(rel);
327
+ if (!plugin)
328
+ continue;
88
329
  const content = readSafe(repoPath, rel);
89
330
  if (!content)
90
331
  continue;
91
- const ext = path.extname(rel).toLowerCase();
92
- if (ext === '.go') {
93
- out.push(...this.scanGoProviders(content, rel));
94
- out.push(...this.scanGoConsumers(content, rel));
95
- }
96
- else if (ext === '.java') {
97
- out.push(...this.scanJavaProviders(content, rel));
98
- out.push(...this.scanJavaConsumers(content, rel));
332
+ let detections = [];
333
+ try {
334
+ parser.setLanguage(plugin.language);
335
+ const tree = parser.parse(content);
336
+ detections = plugin.scan(tree);
99
337
  }
100
- else if (ext === '.py') {
101
- out.push(...this.scanPythonProviders(content, rel));
102
- out.push(...this.scanPythonConsumers(content, rel));
338
+ catch {
339
+ continue;
103
340
  }
104
- else if (['.ts', '.tsx', '.js', '.jsx'].includes(ext)) {
105
- out.push(...this.scanTsProviders(content, rel));
341
+ for (const d of detections) {
342
+ out.push(this.detectionToContract(d, rel, protoMap));
106
343
  }
107
344
  }
108
345
  return this.dedupe(out);
109
346
  }
110
- parseProtoFile(content, filePath) {
111
- const out = [];
112
- const pkgMatch = content.match(/^package\s+([\w.]+)\s*;/m);
113
- const pkg = pkgMatch ? pkgMatch[1] : '';
114
- for (const { name: serviceName, body } of extractServiceBlocks(content)) {
115
- const rpcRe = /rpc\s+(\w+)\s*\(/g;
116
- let rpcMatch;
117
- while ((rpcMatch = rpcRe.exec(body)) !== null) {
118
- const methodName = rpcMatch[1];
119
- const cid = contractId(pkg, serviceName, methodName);
120
- out.push(makeContract(cid, 'provider', filePath, `${serviceName}.${methodName}`, 0.85, {
121
- package: pkg,
122
- service: serviceName,
123
- method: methodName,
124
- source: 'proto',
125
- }));
126
- }
127
- }
128
- return out;
129
- }
130
- scanGoProviders(content, filePath) {
131
- const out = [];
132
- // pb.RegisterXxxServer(
133
- const registerRe = /\w+\.Register(\w+)Server\s*\(/g;
134
- let m;
135
- while ((m = registerRe.exec(content)) !== null) {
136
- const serviceName = m[1];
137
- out.push(makeContract(serviceOnlyContractId(serviceName), 'provider', filePath, `Register${serviceName}Server`, 0.8, { service: serviceName, source: 'go_register' }));
138
- }
139
- // pb.UnimplementedXxxServer
140
- const unimplRe = /\w+\.Unimplemented(\w+)Server\b/g;
141
- while ((m = unimplRe.exec(content)) !== null) {
142
- const serviceName = m[1];
143
- out.push(makeContract(serviceOnlyContractId(serviceName), 'provider', filePath, `Unimplemented${serviceName}Server`, 0.8, { service: serviceName, source: 'go_unimplemented' }));
144
- }
145
- return out;
146
- }
147
- scanGoConsumers(content, filePath) {
148
- const out = [];
149
- const re = /\w+\.New(\w+)Client\s*\(/g;
150
- let m;
151
- while ((m = re.exec(content)) !== null) {
152
- const serviceName = m[1];
153
- out.push(makeContract(serviceOnlyContractId(serviceName), 'consumer', filePath, `New${serviceName}Client`, 0.7, { service: serviceName, source: 'go_client' }));
154
- }
155
- return out;
156
- }
157
- scanJavaProviders(content, filePath) {
158
- const out = [];
159
- // @GrpcService
160
- if (content.includes('@GrpcService')) {
161
- const implBaseRe = /extends\s+(\w+)Grpc\.(\w+)ImplBase/;
162
- const m = content.match(implBaseRe);
163
- if (m) {
164
- out.push(makeContract(serviceOnlyContractId(m[1]), 'provider', filePath, m[2], 0.8, {
165
- service: m[1],
166
- source: 'java_grpc_service',
167
- }));
168
- }
169
- else {
170
- // Try extracting service name from class name
171
- const classRe = /class\s+(\w*?)(?:Grpc)?(?:Service)?\s+extends\s+(\w+)(?:Grpc\.(\w+))?ImplBase/;
172
- const cm = content.match(classRe);
173
- if (cm) {
174
- const svcName = cm[2].replace(/Grpc$/, '');
175
- out.push(makeContract(serviceOnlyContractId(svcName), 'provider', filePath, cm[1], 0.8, {
176
- service: svcName,
177
- source: 'java_grpc_service',
178
- }));
179
- }
180
- }
181
- }
182
- // extends XxxImplBase (without @GrpcService)
183
- if (!content.includes('@GrpcService')) {
184
- const implRe = /extends\s+(\w+?)(?:Grpc\.(\w+))?ImplBase/;
185
- const m = content.match(implRe);
186
- if (m) {
187
- const svcName = m[2] || m[1].replace(/Grpc$/, '');
188
- out.push(makeContract(serviceOnlyContractId(svcName), 'provider', filePath, svcName, 0.8, {
189
- service: svcName,
190
- source: 'java_impl_base',
191
- }));
192
- }
193
- }
194
- return out;
195
- }
196
- scanJavaConsumers(content, filePath) {
197
- const out = [];
198
- // XxxGrpc.newBlockingStub( or XxxGrpc.newStub(
199
- const re = /(\w+)Grpc\.new(?:Blocking)?Stub\s*\(/g;
200
- let m;
201
- while ((m = re.exec(content)) !== null) {
202
- const serviceName = m[1];
203
- out.push(makeContract(serviceOnlyContractId(serviceName), 'consumer', filePath, `${serviceName}Stub`, 0.7, { service: serviceName, source: 'java_stub' }));
204
- }
205
- return out;
206
- }
207
- scanPythonProviders(content, filePath) {
208
- const out = [];
209
- // add_XxxServicer_to_server(
210
- const re = /add_(\w+?)Servicer_to_server\s*\(/g;
211
- let m;
212
- while ((m = re.exec(content)) !== null) {
213
- const serviceName = m[1];
214
- out.push(makeContract(serviceOnlyContractId(serviceName), 'provider', filePath, `add_${serviceName}Servicer_to_server`, 0.8, { service: serviceName, source: 'python_servicer' }));
215
- }
216
- return out;
217
- }
218
- scanPythonConsumers(content, filePath) {
219
- const out = [];
220
- // XxxStub(
221
- const re = /(\w+)Stub\s*\(/g;
222
- let m;
223
- while ((m = re.exec(content)) !== null) {
224
- const name = m[1];
225
- // Filter out common false positives
226
- if (['Mock', 'Test', 'Fake', 'Stub'].includes(name))
227
- continue;
228
- out.push(makeContract(serviceOnlyContractId(name), 'consumer', filePath, `${name}Stub`, 0.7, {
229
- service: name,
230
- source: 'python_stub',
231
- }));
232
- }
233
- return out;
234
- }
235
- scanTsProviders(content, filePath) {
236
- const out = [];
237
- // @GrpcMethod('ServiceName', 'MethodName')
238
- const re = /@GrpcMethod\s*\(\s*['"](\w+)['"]\s*,\s*['"](\w+)['"]\s*\)/g;
239
- let m;
240
- while ((m = re.exec(content)) !== null) {
241
- const serviceName = m[1];
242
- const methodName = m[2];
243
- const cid = contractId('', serviceName, methodName);
244
- out.push(makeContract(cid, 'provider', filePath, `${serviceName}.${methodName}`, 0.8, {
245
- service: serviceName,
246
- method: methodName,
247
- source: 'ts_grpc_method',
248
- }));
249
- }
250
- return out;
347
+ /**
348
+ * Convert a plugin `GrpcDetection` into a concrete `ExtractedContract`
349
+ * by resolving the short service name against the proto map, building
350
+ * either a service-level (`grpc::pkg.Svc/*`) or method-level
351
+ * (`grpc::pkg.Svc/Method`) contract id, and selecting confidence
352
+ * based on whether the proto map had an entry.
353
+ */
354
+ detectionToContract(d, filePath, protoMap) {
355
+ const candidates = protoMap.get(d.serviceName);
356
+ const proto = resolveProtoConflict(d.serviceName, filePath, candidates ?? []);
357
+ const pkg = proto?.package ?? '';
358
+ const cid = d.methodName
359
+ ? contractId(pkg, d.serviceName, d.methodName)
360
+ : proto
361
+ ? serviceContractId(pkg, d.serviceName)
362
+ : serviceOnlyContractId(d.serviceName);
363
+ const confidence = proto ? d.confidenceWithProto : d.confidenceWithoutProto;
364
+ const meta = {
365
+ service: d.serviceName,
366
+ source: d.source,
367
+ };
368
+ if (d.methodName)
369
+ meta.method = d.methodName;
370
+ return makeContract(cid, d.role, filePath, d.symbolName, confidence, meta);
251
371
  }
252
372
  dedupe(items) {
253
- const seen = new Set();
254
- const out = [];
373
+ const byKey = new Map();
255
374
  for (const c of items) {
256
375
  const k = `${c.contractId}|${c.role}|${c.symbolRef.filePath}`;
257
- if (seen.has(k))
258
- continue;
259
- seen.add(k);
260
- out.push(c);
376
+ const existing = byKey.get(k);
377
+ if (!existing ||
378
+ c.confidence > existing.confidence ||
379
+ (c.confidence === existing.confidence &&
380
+ String(c.meta.source) < String(existing.meta.source))) {
381
+ byKey.set(k, c);
382
+ }
261
383
  }
262
- return out;
384
+ return Array.from(byKey.values());
263
385
  }
264
386
  }
@@ -0,0 +1,2 @@
1
+ import type { GrpcLanguagePlugin } from './types.js';
2
+ export declare const GO_GRPC_PLUGIN: GrpcLanguagePlugin;
@@ -0,0 +1,97 @@
1
+ import Go from 'tree-sitter-go';
2
+ import { compilePatterns, runCompiledPatterns, } from '../tree-sitter-scanner.js';
3
+ /**
4
+ * Go gRPC plugin. Detects:
5
+ * - Provider: `pb.RegisterXxxServer(...)` calls
6
+ * - Provider: `pb.UnimplementedXxxServer` embedded in a struct
7
+ * - Consumer: `pb.NewXxxClient(conn)` calls
8
+ */
9
+ const REGISTER_RE = /^Register(\w+)Server$/;
10
+ const UNIMPLEMENTED_RE = /^Unimplemented(\w+)Server$/;
11
+ const NEW_CLIENT_RE = /^New(\w+)Client$/;
12
+ // Any `xxx.<fn>(...)` call — plugin filters the field identifier text.
13
+ const SELECTOR_CALL_PATTERNS = compilePatterns({
14
+ name: 'go-grpc-selector-call',
15
+ language: Go,
16
+ patterns: [
17
+ {
18
+ meta: {},
19
+ query: `
20
+ (call_expression
21
+ function: (selector_expression
22
+ field: (field_identifier) @fn))
23
+ `,
24
+ },
25
+ ],
26
+ });
27
+ // Any `qualified_type` used as a struct field — for `pb.UnimplementedXxxServer`.
28
+ const STRUCT_EMBEDDING_PATTERNS = compilePatterns({
29
+ name: 'go-grpc-struct-embedding',
30
+ language: Go,
31
+ patterns: [
32
+ {
33
+ meta: {},
34
+ query: `
35
+ (struct_type
36
+ (field_declaration_list
37
+ (field_declaration
38
+ type: (qualified_type
39
+ name: (type_identifier) @field_type))))
40
+ `,
41
+ },
42
+ ],
43
+ });
44
+ export const GO_GRPC_PLUGIN = {
45
+ name: 'go-grpc',
46
+ language: Go,
47
+ scan(tree) {
48
+ const out = [];
49
+ for (const match of runCompiledPatterns(SELECTOR_CALL_PATTERNS, tree)) {
50
+ const fnNode = match.captures.fn;
51
+ if (!fnNode)
52
+ continue;
53
+ const fnText = fnNode.text;
54
+ const registerMatch = REGISTER_RE.exec(fnText);
55
+ if (registerMatch) {
56
+ out.push({
57
+ role: 'provider',
58
+ serviceName: registerMatch[1],
59
+ symbolName: fnText,
60
+ source: 'go_register',
61
+ confidenceWithProto: 0.8,
62
+ confidenceWithoutProto: 0.65,
63
+ });
64
+ continue;
65
+ }
66
+ const newClientMatch = NEW_CLIENT_RE.exec(fnText);
67
+ if (newClientMatch) {
68
+ out.push({
69
+ role: 'consumer',
70
+ serviceName: newClientMatch[1],
71
+ symbolName: fnText,
72
+ source: 'go_client',
73
+ confidenceWithProto: 0.75,
74
+ confidenceWithoutProto: 0.55,
75
+ });
76
+ continue;
77
+ }
78
+ }
79
+ for (const match of runCompiledPatterns(STRUCT_EMBEDDING_PATTERNS, tree)) {
80
+ const fieldNode = match.captures.field_type;
81
+ if (!fieldNode)
82
+ continue;
83
+ const unimpl = UNIMPLEMENTED_RE.exec(fieldNode.text);
84
+ if (!unimpl)
85
+ continue;
86
+ out.push({
87
+ role: 'provider',
88
+ serviceName: unimpl[1],
89
+ symbolName: fieldNode.text,
90
+ source: 'go_unimplemented',
91
+ confidenceWithProto: 0.8,
92
+ confidenceWithoutProto: 0.65,
93
+ });
94
+ }
95
+ return out;
96
+ },
97
+ };
@@ -0,0 +1,19 @@
1
+ import type { GrpcLanguagePlugin } from './types.js';
2
+ export type { GrpcDetection, GrpcLanguagePlugin, GrpcRole } from './types.js';
3
+ export { PROTO_GRPC_PLUGIN, extractPackageFromTree } from './proto.js';
4
+ /**
5
+ * Glob for source files worth scanning for gRPC server/client patterns.
6
+ * Includes `.proto` when the grammar is available.
7
+ */
8
+ export declare const GRPC_SCAN_GLOB: string;
9
+ /**
10
+ * Whether the tree-sitter proto plugin is available. The orchestrator
11
+ * uses this to decide between the tree-sitter path and the fallback
12
+ * manual parser for `.proto` files.
13
+ */
14
+ export declare const hasProtoPlugin: boolean;
15
+ /**
16
+ * Return the gRPC plugin registered for the given file's extension,
17
+ * or `undefined` if the extension is not registered.
18
+ */
19
+ export declare function getPluginForFile(rel: string): GrpcLanguagePlugin | undefined;