gitnexus 1.6.0 → 1.6.2-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. package/README.md +73 -0
  2. package/dist/cli/analyze.js +50 -3
  3. package/dist/core/group/extractors/fs-utils.d.ts +10 -0
  4. package/dist/core/group/extractors/fs-utils.js +24 -0
  5. package/dist/core/group/extractors/grpc-extractor.d.ts +17 -8
  6. package/dist/core/group/extractors/grpc-extractor.js +328 -191
  7. package/dist/core/group/extractors/grpc-patterns/go.d.ts +2 -0
  8. package/dist/core/group/extractors/grpc-patterns/go.js +97 -0
  9. package/dist/core/group/extractors/grpc-patterns/index.d.ts +19 -0
  10. package/dist/core/group/extractors/grpc-patterns/index.js +46 -0
  11. package/dist/core/group/extractors/grpc-patterns/java.d.ts +2 -0
  12. package/dist/core/group/extractors/grpc-patterns/java.js +173 -0
  13. package/dist/core/group/extractors/grpc-patterns/node.d.ts +4 -0
  14. package/dist/core/group/extractors/grpc-patterns/node.js +290 -0
  15. package/dist/core/group/extractors/grpc-patterns/proto.d.ts +9 -0
  16. package/dist/core/group/extractors/grpc-patterns/proto.js +134 -0
  17. package/dist/core/group/extractors/grpc-patterns/python.d.ts +2 -0
  18. package/dist/core/group/extractors/grpc-patterns/python.js +67 -0
  19. package/dist/core/group/extractors/grpc-patterns/types.d.ts +50 -0
  20. package/dist/core/group/extractors/grpc-patterns/types.js +1 -0
  21. package/dist/core/group/extractors/http-patterns/go.d.ts +2 -0
  22. package/dist/core/group/extractors/http-patterns/go.js +215 -0
  23. package/dist/core/group/extractors/http-patterns/index.d.ts +17 -0
  24. package/dist/core/group/extractors/http-patterns/index.js +44 -0
  25. package/dist/core/group/extractors/http-patterns/java.d.ts +2 -0
  26. package/dist/core/group/extractors/http-patterns/java.js +253 -0
  27. package/dist/core/group/extractors/http-patterns/node.d.ts +4 -0
  28. package/dist/core/group/extractors/http-patterns/node.js +354 -0
  29. package/dist/core/group/extractors/http-patterns/php.d.ts +2 -0
  30. package/dist/core/group/extractors/http-patterns/php.js +70 -0
  31. package/dist/core/group/extractors/http-patterns/python.d.ts +2 -0
  32. package/dist/core/group/extractors/http-patterns/python.js +133 -0
  33. package/dist/core/group/extractors/http-patterns/types.d.ts +61 -0
  34. package/dist/core/group/extractors/http-patterns/types.js +1 -0
  35. package/dist/core/group/extractors/http-route-extractor.d.ts +10 -13
  36. package/dist/core/group/extractors/http-route-extractor.js +231 -238
  37. package/dist/core/group/extractors/manifest-extractor.d.ts +54 -0
  38. package/dist/core/group/extractors/manifest-extractor.js +277 -0
  39. package/dist/core/group/extractors/topic-extractor.d.ts +0 -1
  40. package/dist/core/group/extractors/topic-extractor.js +55 -192
  41. package/dist/core/group/extractors/topic-patterns/go.d.ts +2 -0
  42. package/dist/core/group/extractors/topic-patterns/go.js +120 -0
  43. package/dist/core/group/extractors/topic-patterns/index.d.ts +14 -0
  44. package/dist/core/group/extractors/topic-patterns/index.js +38 -0
  45. package/dist/core/group/extractors/topic-patterns/java.d.ts +2 -0
  46. package/dist/core/group/extractors/topic-patterns/java.js +80 -0
  47. package/dist/core/group/extractors/topic-patterns/node.d.ts +4 -0
  48. package/dist/core/group/extractors/topic-patterns/node.js +155 -0
  49. package/dist/core/group/extractors/topic-patterns/python.d.ts +2 -0
  50. package/dist/core/group/extractors/topic-patterns/python.js +116 -0
  51. package/dist/core/group/extractors/topic-patterns/types.d.ts +25 -0
  52. package/dist/core/group/extractors/topic-patterns/types.js +10 -0
  53. package/dist/core/group/extractors/tree-sitter-scanner.d.ts +113 -0
  54. package/dist/core/group/extractors/tree-sitter-scanner.js +94 -0
  55. package/dist/core/ingestion/binding-accumulator.d.ts +22 -17
  56. package/dist/core/ingestion/binding-accumulator.js +29 -25
  57. package/dist/core/ingestion/cobol-processor.d.ts +1 -1
  58. package/dist/core/ingestion/import-processor.js +1 -1
  59. package/dist/core/ingestion/language-config.js +1 -1
  60. package/dist/core/ingestion/language-provider.d.ts +32 -5
  61. package/dist/core/ingestion/languages/c-cpp.js +2 -2
  62. package/dist/core/ingestion/languages/dart.d.ts +1 -1
  63. package/dist/core/ingestion/languages/dart.js +2 -2
  64. package/dist/core/ingestion/languages/go.d.ts +1 -1
  65. package/dist/core/ingestion/languages/go.js +2 -2
  66. package/dist/core/ingestion/languages/ruby.js +16 -1
  67. package/dist/core/ingestion/languages/swift.d.ts +1 -1
  68. package/dist/core/ingestion/languages/swift.js +2 -2
  69. package/dist/core/ingestion/markdown-processor.d.ts +1 -1
  70. package/dist/core/ingestion/method-extractors/configs/jvm.js +1 -0
  71. package/dist/core/ingestion/method-extractors/configs/ruby.js +1 -0
  72. package/dist/core/ingestion/method-extractors/generic.d.ts +6 -0
  73. package/dist/core/ingestion/method-extractors/generic.js +48 -4
  74. package/dist/core/ingestion/method-types.d.ts +4 -0
  75. package/dist/core/ingestion/model/resolve.js +103 -48
  76. package/dist/core/ingestion/model/semantic-model.d.ts +1 -1
  77. package/dist/core/ingestion/model/semantic-model.js +1 -1
  78. package/dist/core/ingestion/model/symbol-table.d.ts +7 -7
  79. package/dist/core/ingestion/model/symbol-table.js +7 -7
  80. package/dist/core/ingestion/mro-processor.d.ts +1 -1
  81. package/dist/core/ingestion/mro-processor.js +1 -1
  82. package/dist/core/ingestion/parsing-processor.js +54 -42
  83. package/dist/core/ingestion/pipeline-phases/cobol.d.ts +16 -0
  84. package/dist/core/ingestion/pipeline-phases/cobol.js +45 -0
  85. package/dist/core/ingestion/pipeline-phases/communities.d.ts +16 -0
  86. package/dist/core/ingestion/pipeline-phases/communities.js +62 -0
  87. package/dist/core/ingestion/pipeline-phases/cross-file-impl.d.ts +17 -0
  88. package/dist/core/ingestion/pipeline-phases/cross-file-impl.js +156 -0
  89. package/dist/core/ingestion/pipeline-phases/cross-file.d.ts +37 -0
  90. package/dist/core/ingestion/pipeline-phases/cross-file.js +63 -0
  91. package/dist/core/ingestion/pipeline-phases/index.d.ts +21 -0
  92. package/dist/core/ingestion/pipeline-phases/index.js +22 -0
  93. package/dist/core/ingestion/pipeline-phases/markdown.d.ts +17 -0
  94. package/dist/core/ingestion/pipeline-phases/markdown.js +33 -0
  95. package/dist/core/ingestion/pipeline-phases/mro.d.ts +18 -0
  96. package/dist/core/ingestion/pipeline-phases/mro.js +36 -0
  97. package/dist/core/ingestion/pipeline-phases/orm-extraction.d.ts +22 -0
  98. package/dist/core/ingestion/pipeline-phases/orm-extraction.js +92 -0
  99. package/dist/core/ingestion/pipeline-phases/orm.d.ts +15 -0
  100. package/dist/core/ingestion/pipeline-phases/orm.js +74 -0
  101. package/dist/core/ingestion/pipeline-phases/parse-impl.d.ts +47 -0
  102. package/dist/core/ingestion/pipeline-phases/parse-impl.js +437 -0
  103. package/dist/core/ingestion/pipeline-phases/parse.d.ts +49 -0
  104. package/dist/core/ingestion/pipeline-phases/parse.js +33 -0
  105. package/dist/core/ingestion/pipeline-phases/processes.d.ts +16 -0
  106. package/dist/core/ingestion/pipeline-phases/processes.js +143 -0
  107. package/dist/core/ingestion/pipeline-phases/routes.d.ts +21 -0
  108. package/dist/core/ingestion/pipeline-phases/routes.js +243 -0
  109. package/dist/core/ingestion/pipeline-phases/runner.d.ts +22 -0
  110. package/dist/core/ingestion/pipeline-phases/runner.js +203 -0
  111. package/dist/core/ingestion/pipeline-phases/scan.d.ts +21 -0
  112. package/dist/core/ingestion/pipeline-phases/scan.js +46 -0
  113. package/dist/core/ingestion/pipeline-phases/structure.d.ts +27 -0
  114. package/dist/core/ingestion/pipeline-phases/structure.js +35 -0
  115. package/dist/core/ingestion/pipeline-phases/tools.d.ts +20 -0
  116. package/dist/core/ingestion/pipeline-phases/tools.js +79 -0
  117. package/dist/core/ingestion/pipeline-phases/types.d.ts +79 -0
  118. package/dist/core/ingestion/pipeline-phases/types.js +37 -0
  119. package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts +70 -0
  120. package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js +312 -0
  121. package/dist/core/ingestion/pipeline.d.ts +16 -10
  122. package/dist/core/ingestion/pipeline.js +66 -1534
  123. package/dist/core/ingestion/process-processor.js +1 -1
  124. package/dist/core/ingestion/tree-sitter-queries.d.ts +2 -2
  125. package/dist/core/ingestion/tree-sitter-queries.js +69 -0
  126. package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -3
  127. package/dist/core/ingestion/utils/ast-helpers.js +48 -21
  128. package/dist/core/ingestion/utils/env.d.ts +10 -0
  129. package/dist/core/ingestion/utils/env.js +10 -0
  130. package/dist/core/ingestion/utils/graph-sort.d.ts +58 -0
  131. package/dist/core/ingestion/utils/graph-sort.js +100 -0
  132. package/dist/core/ingestion/workers/parse-worker.js +12 -8
  133. package/dist/core/lbug/lbug-adapter.d.ts +28 -0
  134. package/dist/core/lbug/lbug-adapter.js +162 -57
  135. package/package.json +3 -3
  136. package/vendor/tree-sitter-proto/binding.gyp +30 -0
  137. package/vendor/tree-sitter-proto/bindings/node/binding.cc +20 -0
  138. package/vendor/tree-sitter-proto/bindings/node/index.d.ts +28 -0
  139. package/vendor/tree-sitter-proto/bindings/node/index.js +7 -0
  140. package/vendor/tree-sitter-proto/package.json +18 -0
  141. package/vendor/tree-sitter-proto/src/node-types.json +1145 -0
  142. package/vendor/tree-sitter-proto/src/parser.c +10149 -0
  143. package/vendor/tree-sitter-proto/src/tree_sitter/alloc.h +54 -0
  144. package/vendor/tree-sitter-proto/src/tree_sitter/array.h +291 -0
  145. package/vendor/tree-sitter-proto/src/tree_sitter/parser.h +266 -0
@@ -1,19 +1,29 @@
1
- import * as fs from 'node:fs';
2
1
  import * as path from 'node:path';
3
2
  import { glob } from 'glob';
4
- function readSafe(repoPath, rel) {
5
- const abs = path.resolve(repoPath, rel);
6
- const base = path.resolve(repoPath);
7
- const relToBase = path.relative(base, abs);
8
- if (relToBase.startsWith('..') || path.isAbsolute(relToBase))
9
- return null;
10
- try {
11
- return fs.readFileSync(abs, 'utf-8');
12
- }
13
- catch {
14
- return null;
15
- }
16
- }
3
+ import Parser from 'tree-sitter';
4
+ import { readSafe } from './fs-utils.js';
5
+ import { GRPC_SCAN_GLOB, getPluginForFile, hasProtoPlugin, } from './grpc-patterns/index.js';
6
+ /**
7
+ * Language-agnostic orchestrator for gRPC (provider + consumer) contract
8
+ * extraction.
9
+ *
10
+ * Two parts:
11
+ *
12
+ * 1. **`.proto` parsing** — tree-sitter when `tree-sitter-proto` is
13
+ * installed (optionalDependency vendored in `vendor/tree-sitter-proto/`),
14
+ * via the `.proto` entry in `grpc-patterns/` and `hasProtoPlugin`.
15
+ * When the grammar isn't available (platform incompatibility, native
16
+ * build failure) the orchestrator falls back to the in-process
17
+ * string-sanitizing parser defined below (`stripProtoCommentsAndStrings`
18
+ * + `extractServiceBlocks`). The fallback preserves offsets so any
19
+ * downstream regex scans run against a sanitized copy without
20
+ * affecting line numbers of the original.
21
+ *
22
+ * 2. **Source-scan providers / consumers** — delegated to per-language
23
+ * plugins in `./grpc-patterns/`. The orchestrator imports NO
24
+ * tree-sitter grammars or query strings — each plugin owns its own.
25
+ */
26
+ // ─── .proto fallback parser (used only when tree-sitter-proto is absent) ───
17
27
  function contractId(pkg, service, method) {
18
28
  const prefix = pkg ? `${pkg}.${service}` : service;
19
29
  return `grpc::${prefix}/${method}`;
@@ -21,18 +31,103 @@ function contractId(pkg, service, method) {
21
31
  function serviceOnlyContractId(serviceName) {
22
32
  return `grpc::${serviceName}/*`;
23
33
  }
34
+ /**
35
+ * Replace all .proto comments and string literals with spaces, preserving the
36
+ * original length and character offsets of the input. This lets downstream
37
+ * regex / brace-depth parsers run on a "sanitized" copy without having to
38
+ * understand proto syntax, while any RegExp.exec/index-based lookups that
39
+ * were already positional against `content` continue to work against the
40
+ * original string.
41
+ *
42
+ * Supported comment forms: `// line comment`, `/* block comment * /`.
43
+ * Supported strings: double-quoted ("…") and single-quoted ('…') with `\`
44
+ * escape handling. Raw/unterminated strings are not supported — we stop
45
+ * on a line break for line-style comments and on EOF for unterminated
46
+ * strings/blocks, which matches how most real proto files parse.
47
+ */
48
+ function stripProtoCommentsAndStrings(content) {
49
+ const out = new Array(content.length);
50
+ let i = 0;
51
+ while (i < content.length) {
52
+ const ch = content[i];
53
+ const next = content[i + 1];
54
+ // Line comment: // ... \n
55
+ if (ch === '/' && next === '/') {
56
+ out[i] = ' ';
57
+ out[i + 1] = ' ';
58
+ i += 2;
59
+ while (i < content.length && content[i] !== '\n') {
60
+ out[i] = content[i] === '\r' ? '\r' : ' ';
61
+ i++;
62
+ }
63
+ continue;
64
+ }
65
+ // Block comment: /* ... */
66
+ if (ch === '/' && next === '*') {
67
+ out[i] = ' ';
68
+ out[i + 1] = ' ';
69
+ i += 2;
70
+ while (i < content.length) {
71
+ if (content[i] === '*' && content[i + 1] === '/') {
72
+ out[i] = ' ';
73
+ out[i + 1] = ' ';
74
+ i += 2;
75
+ break;
76
+ }
77
+ // Preserve newlines so line numbers stay stable for downstream code.
78
+ out[i] = content[i] === '\n' || content[i] === '\r' ? content[i] : ' ';
79
+ i++;
80
+ }
81
+ continue;
82
+ }
83
+ // String literal: "..." or '...'
84
+ if (ch === '"' || ch === "'") {
85
+ const quote = ch;
86
+ out[i] = ' '; // replace opening quote
87
+ i++;
88
+ while (i < content.length) {
89
+ const c = content[i];
90
+ if (c === '\\' && i + 1 < content.length) {
91
+ // Skip escaped pair (e.g. \" \n \\)
92
+ out[i] = ' ';
93
+ out[i + 1] = ' ';
94
+ i += 2;
95
+ continue;
96
+ }
97
+ if (c === quote) {
98
+ out[i] = ' ';
99
+ i++;
100
+ break;
101
+ }
102
+ // Preserve newlines; proto technically disallows unescaped newlines
103
+ // inside strings, but real files occasionally have them.
104
+ out[i] = c === '\n' || c === '\r' ? c : ' ';
105
+ i++;
106
+ }
107
+ continue;
108
+ }
109
+ out[i] = ch;
110
+ i++;
111
+ }
112
+ return out.join('');
113
+ }
24
114
  function extractServiceBlocks(content) {
25
115
  const results = [];
26
- // v1: brace-depth only — braces inside comments or string literals are not filtered (see spec Fix 2)
116
+ // Sanitize comments and string literals so braces inside them don't
117
+ // throw off the depth counter. The sanitized copy has the same length
118
+ // and offsets as the original, so we use it ONLY to scan for service
119
+ // headers and braces; the service body we return is sliced from the
120
+ // ORIGINAL content to preserve exact source text for downstream use.
121
+ const sanitized = stripProtoCommentsAndStrings(content);
27
122
  const headerRe = /service\s+(\w+)\s*\{/g;
28
123
  let headerMatch;
29
- while ((headerMatch = headerRe.exec(content)) !== null) {
124
+ while ((headerMatch = headerRe.exec(sanitized)) !== null) {
30
125
  const serviceName = headerMatch[1];
31
126
  const bodyStart = headerMatch.index + headerMatch[0].length;
32
127
  let depth = 1;
33
128
  let pos = bodyStart;
34
- while (pos < content.length && depth > 0) {
35
- const ch = content[pos];
129
+ while (pos < sanitized.length && depth > 0) {
130
+ const ch = sanitized[pos];
36
131
  if (ch === '{')
37
132
  depth++;
38
133
  else if (ch === '}')
@@ -60,6 +155,145 @@ function makeContract(cid, role, filePath, symbolName, confidence, meta) {
60
155
  meta: { ...meta, extractionStrategy: 'source_scan' },
61
156
  };
62
157
  }
158
+ function normalizeProtoPath(rel) {
159
+ return rel.replace(/\\/g, '/');
160
+ }
161
+ function extractProtoImports(content) {
162
+ const imports = [];
163
+ const re = /^\s*import\s+"([^"]+)"\s*;/gm;
164
+ let match;
165
+ while ((match = re.exec(content)) !== null) {
166
+ imports.push(match[1]);
167
+ }
168
+ return imports;
169
+ }
170
+ function longestSharedSegmentRun(aPath, bPath) {
171
+ const a = aPath.split('/').filter(Boolean);
172
+ const b = bPath.split('/').filter(Boolean);
173
+ let best = 0;
174
+ for (let i = 0; i < a.length; i++) {
175
+ for (let j = 0; j < b.length; j++) {
176
+ let run = 0;
177
+ while (a[i + run] && b[j + run] && a[i + run] === b[j + run]) {
178
+ run++;
179
+ }
180
+ if (run > best)
181
+ best = run;
182
+ }
183
+ }
184
+ return best;
185
+ }
186
+ async function buildProtoContext(repoPath) {
187
+ const servicesByName = new Map();
188
+ const protoFiles = await glob('**/*.proto', {
189
+ cwd: repoPath,
190
+ absolute: false,
191
+ nodir: true,
192
+ ignore: ['**/node_modules/**', '**/.git/**', '**/vendor/**'],
193
+ });
194
+ const contents = new Map();
195
+ for (const rel of protoFiles) {
196
+ const content = readSafe(repoPath, rel);
197
+ if (!content)
198
+ continue;
199
+ contents.set(normalizeProtoPath(rel), content);
200
+ }
201
+ const packagesByProto = new Map();
202
+ const resolvePackage = (protoPath, seen = new Set()) => {
203
+ if (packagesByProto.has(protoPath))
204
+ return packagesByProto.get(protoPath) ?? '';
205
+ if (seen.has(protoPath))
206
+ return '';
207
+ const content = contents.get(protoPath);
208
+ if (!content)
209
+ return '';
210
+ seen.add(protoPath);
211
+ const pkgMatch = content.match(/^\s*package\s+([\w.]+)\s*;/m);
212
+ if (pkgMatch?.[1]) {
213
+ packagesByProto.set(protoPath, pkgMatch[1]);
214
+ return pkgMatch[1];
215
+ }
216
+ for (const importPath of extractProtoImports(content)) {
217
+ const normalizedImport = normalizeProtoPath(importPath);
218
+ const candidates = [
219
+ normalizeProtoPath(path.posix.normalize(path.posix.join(path.posix.dirname(protoPath), normalizedImport))),
220
+ normalizedImport,
221
+ ];
222
+ for (const candidate of candidates) {
223
+ if (!contents.has(candidate))
224
+ continue;
225
+ const inheritedPackage = resolvePackage(candidate, seen);
226
+ if (inheritedPackage) {
227
+ packagesByProto.set(protoPath, inheritedPackage);
228
+ return inheritedPackage;
229
+ }
230
+ }
231
+ }
232
+ packagesByProto.set(protoPath, '');
233
+ return '';
234
+ };
235
+ for (const rel of protoFiles) {
236
+ const normalizedRel = normalizeProtoPath(rel);
237
+ const content = contents.get(normalizedRel);
238
+ if (!content)
239
+ continue;
240
+ const pkg = resolvePackage(normalizedRel);
241
+ const serviceBlocks = extractServiceBlocks(content);
242
+ for (const block of serviceBlocks) {
243
+ const rpcRe = /rpc\s+(\w+)\s*\(/g;
244
+ const methods = [];
245
+ let m;
246
+ while ((m = rpcRe.exec(block.body)) !== null) {
247
+ methods.push(m[1]);
248
+ }
249
+ const info = {
250
+ package: pkg,
251
+ serviceName: block.name,
252
+ methods,
253
+ protoPath: normalizedRel,
254
+ };
255
+ const existing = servicesByName.get(block.name) ?? [];
256
+ existing.push(info);
257
+ servicesByName.set(block.name, existing);
258
+ }
259
+ }
260
+ return { packagesByProto, servicesByName };
261
+ }
262
+ export async function buildProtoMap(repoPath) {
263
+ const { servicesByName } = await buildProtoContext(repoPath);
264
+ return servicesByName;
265
+ }
266
+ export function resolveProtoConflict(serviceName, sourceFilePath, candidates) {
267
+ if (candidates.length === 0)
268
+ return null;
269
+ if (candidates.length === 1)
270
+ return candidates[0];
271
+ const sourceDir = normalizeProtoPath(path.dirname(sourceFilePath));
272
+ const scored = candidates.map((c) => {
273
+ const protoDir = normalizeProtoPath(path.dirname(c.protoPath));
274
+ return { candidate: c, score: longestSharedSegmentRun(sourceDir, protoDir) };
275
+ });
276
+ let maxScore = -1;
277
+ for (const s of scored) {
278
+ if (s.score > maxScore)
279
+ maxScore = s.score;
280
+ }
281
+ const winners = scored.filter((s) => s.score === maxScore);
282
+ // Path heuristic cannot uniquely identify a winner — refuse to guess.
283
+ // Ties (including all-zero ties) would otherwise silently merge unrelated
284
+ // services under a fabricated package-qualified contract id.
285
+ if (winners.length !== 1) {
286
+ const paths = candidates.map((c) => c.protoPath).join(', ');
287
+ console.warn(`[grpc-extractor] Ambiguous proto resolution for service "${serviceName}" from ${sourceFilePath}: ${winners.length} candidates tied at score ${maxScore} among [${paths}] — skipping canonical contract`);
288
+ return null;
289
+ }
290
+ return winners[0].candidate;
291
+ }
292
+ export function serviceContractId(pkg, serviceName) {
293
+ const prefix = pkg ? `${pkg}.${serviceName}` : serviceName;
294
+ return `grpc::${prefix}/*`;
295
+ }
296
+ // ─── Orchestrator ────────────────────────────────────────────────────
63
297
  export class GrpcExtractor {
64
298
  type = 'grpc';
65
299
  async canExtract(_repo) {
@@ -67,198 +301,101 @@ export class GrpcExtractor {
67
301
  }
68
302
  async extract(_dbExecutor, repoPath, _repo) {
69
303
  const out = [];
70
- // Proto files definitive provider source
71
- const protoFiles = await glob('**/*.proto', {
72
- cwd: repoPath,
73
- ignore: ['**/node_modules/**', '**/.git/**', '**/vendor/**'],
74
- nodir: true,
75
- });
76
- for (const rel of protoFiles) {
77
- const content = readSafe(repoPath, rel);
78
- if (content)
79
- out.push(...this.parseProtoFile(content, rel));
304
+ const protoContext = await buildProtoContext(repoPath);
305
+ const protoMap = protoContext.servicesByName;
306
+ // ─── Proto files — definitive provider source ─────────────────
307
+ // When tree-sitter-proto is available, .proto files are handled by
308
+ // the plugin loop below (they're in GRPC_SCAN_GLOB). Otherwise
309
+ // emit provider contracts directly from the proto map that
310
+ // `buildProtoContext` already built no second glob / parse pass.
311
+ if (!hasProtoPlugin) {
312
+ for (const infos of protoMap.values()) {
313
+ for (const info of infos) {
314
+ for (const methodName of info.methods) {
315
+ const cid = contractId(info.package, info.serviceName, methodName);
316
+ out.push(makeContract(cid, 'provider', info.protoPath, `${info.serviceName}.${methodName}`, 0.85, {
317
+ package: info.package,
318
+ service: info.serviceName,
319
+ method: methodName,
320
+ source: 'proto',
321
+ }));
322
+ }
323
+ }
324
+ }
80
325
  }
81
- // Source files server/client detection
82
- const sourceFiles = await glob('**/*.{go,java,py,ts,tsx,js,jsx}', {
326
+ // ─── Source files (+ .proto when plugin available) ────────────
327
+ const sourceFiles = await glob(GRPC_SCAN_GLOB, {
83
328
  cwd: repoPath,
84
329
  ignore: ['**/node_modules/**', '**/.git/**', '**/vendor/**', '**/dist/**', '**/build/**'],
85
330
  nodir: true,
86
331
  });
332
+ const parser = new Parser();
87
333
  for (const rel of sourceFiles) {
334
+ const plugin = getPluginForFile(rel);
335
+ if (!plugin)
336
+ continue;
88
337
  const content = readSafe(repoPath, rel);
89
338
  if (!content)
90
339
  continue;
91
- const ext = path.extname(rel).toLowerCase();
92
- if (ext === '.go') {
93
- out.push(...this.scanGoProviders(content, rel));
94
- out.push(...this.scanGoConsumers(content, rel));
95
- }
96
- else if (ext === '.java') {
97
- out.push(...this.scanJavaProviders(content, rel));
98
- out.push(...this.scanJavaConsumers(content, rel));
340
+ let detections = [];
341
+ try {
342
+ parser.setLanguage(plugin.language);
343
+ const tree = parser.parse(content);
344
+ detections = plugin.scan(tree);
99
345
  }
100
- else if (ext === '.py') {
101
- out.push(...this.scanPythonProviders(content, rel));
102
- out.push(...this.scanPythonConsumers(content, rel));
346
+ catch {
347
+ continue;
103
348
  }
104
- else if (['.ts', '.tsx', '.js', '.jsx'].includes(ext)) {
105
- out.push(...this.scanTsProviders(content, rel));
349
+ for (const d of detections) {
350
+ const contract = this.detectionToContract(d, rel, protoMap);
351
+ if (contract)
352
+ out.push(contract);
106
353
  }
107
354
  }
108
355
  return this.dedupe(out);
109
356
  }
110
- parseProtoFile(content, filePath) {
111
- const out = [];
112
- const pkgMatch = content.match(/^package\s+([\w.]+)\s*;/m);
113
- const pkg = pkgMatch ? pkgMatch[1] : '';
114
- for (const { name: serviceName, body } of extractServiceBlocks(content)) {
115
- const rpcRe = /rpc\s+(\w+)\s*\(/g;
116
- let rpcMatch;
117
- while ((rpcMatch = rpcRe.exec(body)) !== null) {
118
- const methodName = rpcMatch[1];
119
- const cid = contractId(pkg, serviceName, methodName);
120
- out.push(makeContract(cid, 'provider', filePath, `${serviceName}.${methodName}`, 0.85, {
121
- package: pkg,
122
- service: serviceName,
123
- method: methodName,
124
- source: 'proto',
125
- }));
126
- }
127
- }
128
- return out;
129
- }
130
- scanGoProviders(content, filePath) {
131
- const out = [];
132
- // pb.RegisterXxxServer(
133
- const registerRe = /\w+\.Register(\w+)Server\s*\(/g;
134
- let m;
135
- while ((m = registerRe.exec(content)) !== null) {
136
- const serviceName = m[1];
137
- out.push(makeContract(serviceOnlyContractId(serviceName), 'provider', filePath, `Register${serviceName}Server`, 0.8, { service: serviceName, source: 'go_register' }));
138
- }
139
- // pb.UnimplementedXxxServer
140
- const unimplRe = /\w+\.Unimplemented(\w+)Server\b/g;
141
- while ((m = unimplRe.exec(content)) !== null) {
142
- const serviceName = m[1];
143
- out.push(makeContract(serviceOnlyContractId(serviceName), 'provider', filePath, `Unimplemented${serviceName}Server`, 0.8, { service: serviceName, source: 'go_unimplemented' }));
144
- }
145
- return out;
146
- }
147
- scanGoConsumers(content, filePath) {
148
- const out = [];
149
- const re = /\w+\.New(\w+)Client\s*\(/g;
150
- let m;
151
- while ((m = re.exec(content)) !== null) {
152
- const serviceName = m[1];
153
- out.push(makeContract(serviceOnlyContractId(serviceName), 'consumer', filePath, `New${serviceName}Client`, 0.7, { service: serviceName, source: 'go_client' }));
154
- }
155
- return out;
156
- }
157
- scanJavaProviders(content, filePath) {
158
- const out = [];
159
- // @GrpcService
160
- if (content.includes('@GrpcService')) {
161
- const implBaseRe = /extends\s+(\w+)Grpc\.(\w+)ImplBase/;
162
- const m = content.match(implBaseRe);
163
- if (m) {
164
- out.push(makeContract(serviceOnlyContractId(m[1]), 'provider', filePath, m[2], 0.8, {
165
- service: m[1],
166
- source: 'java_grpc_service',
167
- }));
168
- }
169
- else {
170
- // Try extracting service name from class name
171
- const classRe = /class\s+(\w*?)(?:Grpc)?(?:Service)?\s+extends\s+(\w+)(?:Grpc\.(\w+))?ImplBase/;
172
- const cm = content.match(classRe);
173
- if (cm) {
174
- const svcName = cm[2].replace(/Grpc$/, '');
175
- out.push(makeContract(serviceOnlyContractId(svcName), 'provider', filePath, cm[1], 0.8, {
176
- service: svcName,
177
- source: 'java_grpc_service',
178
- }));
179
- }
180
- }
181
- }
182
- // extends XxxImplBase (without @GrpcService)
183
- if (!content.includes('@GrpcService')) {
184
- const implRe = /extends\s+(\w+?)(?:Grpc\.(\w+))?ImplBase/;
185
- const m = content.match(implRe);
186
- if (m) {
187
- const svcName = m[2] || m[1].replace(/Grpc$/, '');
188
- out.push(makeContract(serviceOnlyContractId(svcName), 'provider', filePath, svcName, 0.8, {
189
- service: svcName,
190
- source: 'java_impl_base',
191
- }));
192
- }
193
- }
194
- return out;
195
- }
196
- scanJavaConsumers(content, filePath) {
197
- const out = [];
198
- // XxxGrpc.newBlockingStub( or XxxGrpc.newStub(
199
- const re = /(\w+)Grpc\.new(?:Blocking)?Stub\s*\(/g;
200
- let m;
201
- while ((m = re.exec(content)) !== null) {
202
- const serviceName = m[1];
203
- out.push(makeContract(serviceOnlyContractId(serviceName), 'consumer', filePath, `${serviceName}Stub`, 0.7, { service: serviceName, source: 'java_stub' }));
204
- }
205
- return out;
206
- }
207
- scanPythonProviders(content, filePath) {
208
- const out = [];
209
- // add_XxxServicer_to_server(
210
- const re = /add_(\w+?)Servicer_to_server\s*\(/g;
211
- let m;
212
- while ((m = re.exec(content)) !== null) {
213
- const serviceName = m[1];
214
- out.push(makeContract(serviceOnlyContractId(serviceName), 'provider', filePath, `add_${serviceName}Servicer_to_server`, 0.8, { service: serviceName, source: 'python_servicer' }));
215
- }
216
- return out;
217
- }
218
- scanPythonConsumers(content, filePath) {
219
- const out = [];
220
- // XxxStub(
221
- const re = /(\w+)Stub\s*\(/g;
222
- let m;
223
- while ((m = re.exec(content)) !== null) {
224
- const name = m[1];
225
- // Filter out common false positives
226
- if (['Mock', 'Test', 'Fake', 'Stub'].includes(name))
227
- continue;
228
- out.push(makeContract(serviceOnlyContractId(name), 'consumer', filePath, `${name}Stub`, 0.7, {
229
- service: name,
230
- source: 'python_stub',
231
- }));
232
- }
233
- return out;
234
- }
235
- scanTsProviders(content, filePath) {
236
- const out = [];
237
- // @GrpcMethod('ServiceName', 'MethodName')
238
- const re = /@GrpcMethod\s*\(\s*['"](\w+)['"]\s*,\s*['"](\w+)['"]\s*\)/g;
239
- let m;
240
- while ((m = re.exec(content)) !== null) {
241
- const serviceName = m[1];
242
- const methodName = m[2];
243
- const cid = contractId('', serviceName, methodName);
244
- out.push(makeContract(cid, 'provider', filePath, `${serviceName}.${methodName}`, 0.8, {
245
- service: serviceName,
246
- method: methodName,
247
- source: 'ts_grpc_method',
248
- }));
249
- }
250
- return out;
357
+ /**
358
+ * Convert a plugin `GrpcDetection` into a concrete `ExtractedContract`
359
+ * by resolving the short service name against the proto map, building
360
+ * either a service-level (`grpc::pkg.Svc/*`) or method-level
361
+ * (`grpc::pkg.Svc/Method`) contract id, and selecting confidence
362
+ * based on whether the proto map had an entry.
363
+ */
364
+ detectionToContract(d, filePath, protoMap) {
365
+ const candidates = protoMap.get(d.serviceName) ?? [];
366
+ const proto = resolveProtoConflict(d.serviceName, filePath, candidates);
367
+ // If there were proto candidates but resolution was ambiguous, skip
368
+ // contract emission rather than fabricating a package-qualified id from
369
+ // an arbitrary candidate. resolveProtoConflict already warned.
370
+ if (candidates.length > 0 && proto === null)
371
+ return null;
372
+ const pkg = proto?.package ?? '';
373
+ const cid = d.methodName
374
+ ? contractId(pkg, d.serviceName, d.methodName)
375
+ : proto
376
+ ? serviceContractId(pkg, d.serviceName)
377
+ : serviceOnlyContractId(d.serviceName);
378
+ const confidence = proto ? d.confidenceWithProto : d.confidenceWithoutProto;
379
+ const meta = {
380
+ service: d.serviceName,
381
+ source: d.source,
382
+ };
383
+ if (d.methodName)
384
+ meta.method = d.methodName;
385
+ return makeContract(cid, d.role, filePath, d.symbolName, confidence, meta);
251
386
  }
252
387
  dedupe(items) {
253
- const seen = new Set();
254
- const out = [];
388
+ const byKey = new Map();
255
389
  for (const c of items) {
256
390
  const k = `${c.contractId}|${c.role}|${c.symbolRef.filePath}`;
257
- if (seen.has(k))
258
- continue;
259
- seen.add(k);
260
- out.push(c);
391
+ const existing = byKey.get(k);
392
+ if (!existing ||
393
+ c.confidence > existing.confidence ||
394
+ (c.confidence === existing.confidence &&
395
+ String(c.meta.source) < String(existing.meta.source))) {
396
+ byKey.set(k, c);
397
+ }
261
398
  }
262
- return out;
399
+ return Array.from(byKey.values());
263
400
  }
264
401
  }
@@ -0,0 +1,2 @@
1
+ import type { GrpcLanguagePlugin } from './types.js';
2
+ export declare const GO_GRPC_PLUGIN: GrpcLanguagePlugin;
@@ -0,0 +1,97 @@
1
+ import Go from 'tree-sitter-go';
2
+ import { compilePatterns, runCompiledPatterns, } from '../tree-sitter-scanner.js';
3
+ /**
4
+ * Go gRPC plugin. Detects:
5
+ * - Provider: `pb.RegisterXxxServer(...)` calls
6
+ * - Provider: `pb.UnimplementedXxxServer` embedded in a struct
7
+ * - Consumer: `pb.NewXxxClient(conn)` calls
8
+ */
9
+ const REGISTER_RE = /^Register(\w+)Server$/;
10
+ const UNIMPLEMENTED_RE = /^Unimplemented(\w+)Server$/;
11
+ const NEW_CLIENT_RE = /^New(\w+)Client$/;
12
+ // Any `xxx.<fn>(...)` call — plugin filters the field identifier text.
13
+ const SELECTOR_CALL_PATTERNS = compilePatterns({
14
+ name: 'go-grpc-selector-call',
15
+ language: Go,
16
+ patterns: [
17
+ {
18
+ meta: {},
19
+ query: `
20
+ (call_expression
21
+ function: (selector_expression
22
+ field: (field_identifier) @fn))
23
+ `,
24
+ },
25
+ ],
26
+ });
27
+ // Any `qualified_type` used as a struct field — for `pb.UnimplementedXxxServer`.
28
+ const STRUCT_EMBEDDING_PATTERNS = compilePatterns({
29
+ name: 'go-grpc-struct-embedding',
30
+ language: Go,
31
+ patterns: [
32
+ {
33
+ meta: {},
34
+ query: `
35
+ (struct_type
36
+ (field_declaration_list
37
+ (field_declaration
38
+ type: (qualified_type
39
+ name: (type_identifier) @field_type))))
40
+ `,
41
+ },
42
+ ],
43
+ });
44
+ export const GO_GRPC_PLUGIN = {
45
+ name: 'go-grpc',
46
+ language: Go,
47
+ scan(tree) {
48
+ const out = [];
49
+ for (const match of runCompiledPatterns(SELECTOR_CALL_PATTERNS, tree)) {
50
+ const fnNode = match.captures.fn;
51
+ if (!fnNode)
52
+ continue;
53
+ const fnText = fnNode.text;
54
+ const registerMatch = REGISTER_RE.exec(fnText);
55
+ if (registerMatch) {
56
+ out.push({
57
+ role: 'provider',
58
+ serviceName: registerMatch[1],
59
+ symbolName: fnText,
60
+ source: 'go_register',
61
+ confidenceWithProto: 0.8,
62
+ confidenceWithoutProto: 0.65,
63
+ });
64
+ continue;
65
+ }
66
+ const newClientMatch = NEW_CLIENT_RE.exec(fnText);
67
+ if (newClientMatch) {
68
+ out.push({
69
+ role: 'consumer',
70
+ serviceName: newClientMatch[1],
71
+ symbolName: fnText,
72
+ source: 'go_client',
73
+ confidenceWithProto: 0.75,
74
+ confidenceWithoutProto: 0.55,
75
+ });
76
+ continue;
77
+ }
78
+ }
79
+ for (const match of runCompiledPatterns(STRUCT_EMBEDDING_PATTERNS, tree)) {
80
+ const fieldNode = match.captures.field_type;
81
+ if (!fieldNode)
82
+ continue;
83
+ const unimpl = UNIMPLEMENTED_RE.exec(fieldNode.text);
84
+ if (!unimpl)
85
+ continue;
86
+ out.push({
87
+ role: 'provider',
88
+ serviceName: unimpl[1],
89
+ symbolName: fieldNode.text,
90
+ source: 'go_unimplemented',
91
+ confidenceWithProto: 0.8,
92
+ confidenceWithoutProto: 0.65,
93
+ });
94
+ }
95
+ return out;
96
+ },
97
+ };