gitnexus 1.6.0 → 1.6.2-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -0
- package/dist/cli/analyze.js +50 -3
- package/dist/core/group/extractors/fs-utils.d.ts +10 -0
- package/dist/core/group/extractors/fs-utils.js +24 -0
- package/dist/core/group/extractors/grpc-extractor.d.ts +17 -8
- package/dist/core/group/extractors/grpc-extractor.js +328 -191
- package/dist/core/group/extractors/grpc-patterns/go.d.ts +2 -0
- package/dist/core/group/extractors/grpc-patterns/go.js +97 -0
- package/dist/core/group/extractors/grpc-patterns/index.d.ts +19 -0
- package/dist/core/group/extractors/grpc-patterns/index.js +46 -0
- package/dist/core/group/extractors/grpc-patterns/java.d.ts +2 -0
- package/dist/core/group/extractors/grpc-patterns/java.js +173 -0
- package/dist/core/group/extractors/grpc-patterns/node.d.ts +4 -0
- package/dist/core/group/extractors/grpc-patterns/node.js +290 -0
- package/dist/core/group/extractors/grpc-patterns/proto.d.ts +9 -0
- package/dist/core/group/extractors/grpc-patterns/proto.js +134 -0
- package/dist/core/group/extractors/grpc-patterns/python.d.ts +2 -0
- package/dist/core/group/extractors/grpc-patterns/python.js +67 -0
- package/dist/core/group/extractors/grpc-patterns/types.d.ts +50 -0
- package/dist/core/group/extractors/grpc-patterns/types.js +1 -0
- package/dist/core/group/extractors/http-patterns/go.d.ts +2 -0
- package/dist/core/group/extractors/http-patterns/go.js +215 -0
- package/dist/core/group/extractors/http-patterns/index.d.ts +17 -0
- package/dist/core/group/extractors/http-patterns/index.js +44 -0
- package/dist/core/group/extractors/http-patterns/java.d.ts +2 -0
- package/dist/core/group/extractors/http-patterns/java.js +253 -0
- package/dist/core/group/extractors/http-patterns/node.d.ts +4 -0
- package/dist/core/group/extractors/http-patterns/node.js +354 -0
- package/dist/core/group/extractors/http-patterns/php.d.ts +2 -0
- package/dist/core/group/extractors/http-patterns/php.js +70 -0
- package/dist/core/group/extractors/http-patterns/python.d.ts +2 -0
- package/dist/core/group/extractors/http-patterns/python.js +133 -0
- package/dist/core/group/extractors/http-patterns/types.d.ts +61 -0
- package/dist/core/group/extractors/http-patterns/types.js +1 -0
- package/dist/core/group/extractors/http-route-extractor.d.ts +10 -13
- package/dist/core/group/extractors/http-route-extractor.js +231 -238
- package/dist/core/group/extractors/manifest-extractor.d.ts +54 -0
- package/dist/core/group/extractors/manifest-extractor.js +277 -0
- package/dist/core/group/extractors/topic-extractor.d.ts +0 -1
- package/dist/core/group/extractors/topic-extractor.js +55 -192
- package/dist/core/group/extractors/topic-patterns/go.d.ts +2 -0
- package/dist/core/group/extractors/topic-patterns/go.js +120 -0
- package/dist/core/group/extractors/topic-patterns/index.d.ts +14 -0
- package/dist/core/group/extractors/topic-patterns/index.js +38 -0
- package/dist/core/group/extractors/topic-patterns/java.d.ts +2 -0
- package/dist/core/group/extractors/topic-patterns/java.js +80 -0
- package/dist/core/group/extractors/topic-patterns/node.d.ts +4 -0
- package/dist/core/group/extractors/topic-patterns/node.js +155 -0
- package/dist/core/group/extractors/topic-patterns/python.d.ts +2 -0
- package/dist/core/group/extractors/topic-patterns/python.js +116 -0
- package/dist/core/group/extractors/topic-patterns/types.d.ts +25 -0
- package/dist/core/group/extractors/topic-patterns/types.js +10 -0
- package/dist/core/group/extractors/tree-sitter-scanner.d.ts +113 -0
- package/dist/core/group/extractors/tree-sitter-scanner.js +94 -0
- package/dist/core/ingestion/binding-accumulator.d.ts +22 -17
- package/dist/core/ingestion/binding-accumulator.js +29 -25
- package/dist/core/ingestion/cobol-processor.d.ts +1 -1
- package/dist/core/ingestion/import-processor.js +1 -1
- package/dist/core/ingestion/language-config.js +1 -1
- package/dist/core/ingestion/language-provider.d.ts +32 -5
- package/dist/core/ingestion/languages/c-cpp.js +2 -2
- package/dist/core/ingestion/languages/dart.d.ts +1 -1
- package/dist/core/ingestion/languages/dart.js +2 -2
- package/dist/core/ingestion/languages/go.d.ts +1 -1
- package/dist/core/ingestion/languages/go.js +2 -2
- package/dist/core/ingestion/languages/ruby.js +16 -1
- package/dist/core/ingestion/languages/swift.d.ts +1 -1
- package/dist/core/ingestion/languages/swift.js +2 -2
- package/dist/core/ingestion/markdown-processor.d.ts +1 -1
- package/dist/core/ingestion/method-extractors/configs/jvm.js +1 -0
- package/dist/core/ingestion/method-extractors/configs/ruby.js +1 -0
- package/dist/core/ingestion/method-extractors/generic.d.ts +6 -0
- package/dist/core/ingestion/method-extractors/generic.js +48 -4
- package/dist/core/ingestion/method-types.d.ts +4 -0
- package/dist/core/ingestion/model/resolve.js +103 -48
- package/dist/core/ingestion/model/semantic-model.d.ts +1 -1
- package/dist/core/ingestion/model/semantic-model.js +1 -1
- package/dist/core/ingestion/model/symbol-table.d.ts +7 -7
- package/dist/core/ingestion/model/symbol-table.js +7 -7
- package/dist/core/ingestion/mro-processor.d.ts +1 -1
- package/dist/core/ingestion/mro-processor.js +1 -1
- package/dist/core/ingestion/parsing-processor.js +54 -42
- package/dist/core/ingestion/pipeline-phases/cobol.d.ts +16 -0
- package/dist/core/ingestion/pipeline-phases/cobol.js +45 -0
- package/dist/core/ingestion/pipeline-phases/communities.d.ts +16 -0
- package/dist/core/ingestion/pipeline-phases/communities.js +62 -0
- package/dist/core/ingestion/pipeline-phases/cross-file-impl.d.ts +17 -0
- package/dist/core/ingestion/pipeline-phases/cross-file-impl.js +156 -0
- package/dist/core/ingestion/pipeline-phases/cross-file.d.ts +37 -0
- package/dist/core/ingestion/pipeline-phases/cross-file.js +63 -0
- package/dist/core/ingestion/pipeline-phases/index.d.ts +21 -0
- package/dist/core/ingestion/pipeline-phases/index.js +22 -0
- package/dist/core/ingestion/pipeline-phases/markdown.d.ts +17 -0
- package/dist/core/ingestion/pipeline-phases/markdown.js +33 -0
- package/dist/core/ingestion/pipeline-phases/mro.d.ts +18 -0
- package/dist/core/ingestion/pipeline-phases/mro.js +36 -0
- package/dist/core/ingestion/pipeline-phases/orm-extraction.d.ts +22 -0
- package/dist/core/ingestion/pipeline-phases/orm-extraction.js +92 -0
- package/dist/core/ingestion/pipeline-phases/orm.d.ts +15 -0
- package/dist/core/ingestion/pipeline-phases/orm.js +74 -0
- package/dist/core/ingestion/pipeline-phases/parse-impl.d.ts +47 -0
- package/dist/core/ingestion/pipeline-phases/parse-impl.js +437 -0
- package/dist/core/ingestion/pipeline-phases/parse.d.ts +49 -0
- package/dist/core/ingestion/pipeline-phases/parse.js +33 -0
- package/dist/core/ingestion/pipeline-phases/processes.d.ts +16 -0
- package/dist/core/ingestion/pipeline-phases/processes.js +143 -0
- package/dist/core/ingestion/pipeline-phases/routes.d.ts +21 -0
- package/dist/core/ingestion/pipeline-phases/routes.js +243 -0
- package/dist/core/ingestion/pipeline-phases/runner.d.ts +22 -0
- package/dist/core/ingestion/pipeline-phases/runner.js +203 -0
- package/dist/core/ingestion/pipeline-phases/scan.d.ts +21 -0
- package/dist/core/ingestion/pipeline-phases/scan.js +46 -0
- package/dist/core/ingestion/pipeline-phases/structure.d.ts +27 -0
- package/dist/core/ingestion/pipeline-phases/structure.js +35 -0
- package/dist/core/ingestion/pipeline-phases/tools.d.ts +20 -0
- package/dist/core/ingestion/pipeline-phases/tools.js +79 -0
- package/dist/core/ingestion/pipeline-phases/types.d.ts +79 -0
- package/dist/core/ingestion/pipeline-phases/types.js +37 -0
- package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts +70 -0
- package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js +312 -0
- package/dist/core/ingestion/pipeline.d.ts +16 -10
- package/dist/core/ingestion/pipeline.js +66 -1534
- package/dist/core/ingestion/process-processor.js +1 -1
- package/dist/core/ingestion/tree-sitter-queries.d.ts +2 -2
- package/dist/core/ingestion/tree-sitter-queries.js +69 -0
- package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -3
- package/dist/core/ingestion/utils/ast-helpers.js +48 -21
- package/dist/core/ingestion/utils/env.d.ts +10 -0
- package/dist/core/ingestion/utils/env.js +10 -0
- package/dist/core/ingestion/utils/graph-sort.d.ts +58 -0
- package/dist/core/ingestion/utils/graph-sort.js +100 -0
- package/dist/core/ingestion/workers/parse-worker.js +12 -8
- package/dist/core/lbug/lbug-adapter.d.ts +28 -0
- package/dist/core/lbug/lbug-adapter.js +162 -57
- package/package.json +3 -3
- package/vendor/tree-sitter-proto/binding.gyp +30 -0
- package/vendor/tree-sitter-proto/bindings/node/binding.cc +20 -0
- package/vendor/tree-sitter-proto/bindings/node/index.d.ts +28 -0
- package/vendor/tree-sitter-proto/bindings/node/index.js +7 -0
- package/vendor/tree-sitter-proto/package.json +18 -0
- package/vendor/tree-sitter-proto/src/node-types.json +1145 -0
- package/vendor/tree-sitter-proto/src/parser.c +10149 -0
- package/vendor/tree-sitter-proto/src/tree_sitter/alloc.h +54 -0
- package/vendor/tree-sitter-proto/src/tree_sitter/array.h +291 -0
- package/vendor/tree-sitter-proto/src/tree_sitter/parser.h +266 -0
|
@@ -1,19 +1,29 @@
|
|
|
1
|
-
import * as fs from 'node:fs';
|
|
2
1
|
import * as path from 'node:path';
|
|
3
2
|
import { glob } from 'glob';
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
3
|
+
import Parser from 'tree-sitter';
|
|
4
|
+
import { readSafe } from './fs-utils.js';
|
|
5
|
+
import { GRPC_SCAN_GLOB, getPluginForFile, hasProtoPlugin, } from './grpc-patterns/index.js';
|
|
6
|
+
/**
|
|
7
|
+
* Language-agnostic orchestrator for gRPC (provider + consumer) contract
|
|
8
|
+
* extraction.
|
|
9
|
+
*
|
|
10
|
+
* Two parts:
|
|
11
|
+
*
|
|
12
|
+
* 1. **`.proto` parsing** — tree-sitter when `tree-sitter-proto` is
|
|
13
|
+
* installed (optionalDependency vendored in `vendor/tree-sitter-proto/`),
|
|
14
|
+
* via the `.proto` entry in `grpc-patterns/` and `hasProtoPlugin`.
|
|
15
|
+
* When the grammar isn't available (platform incompatibility, native
|
|
16
|
+
* build failure) the orchestrator falls back to the in-process
|
|
17
|
+
* string-sanitizing parser defined below (`stripProtoCommentsAndStrings`
|
|
18
|
+
* + `extractServiceBlocks`). The fallback preserves offsets so any
|
|
19
|
+
* downstream regex scans run against a sanitized copy without
|
|
20
|
+
* affecting line numbers of the original.
|
|
21
|
+
*
|
|
22
|
+
* 2. **Source-scan providers / consumers** — delegated to per-language
|
|
23
|
+
* plugins in `./grpc-patterns/`. The orchestrator imports NO
|
|
24
|
+
* tree-sitter grammars or query strings — each plugin owns its own.
|
|
25
|
+
*/
|
|
26
|
+
// ─── .proto fallback parser (used only when tree-sitter-proto is absent) ───
|
|
17
27
|
function contractId(pkg, service, method) {
|
|
18
28
|
const prefix = pkg ? `${pkg}.${service}` : service;
|
|
19
29
|
return `grpc::${prefix}/${method}`;
|
|
@@ -21,18 +31,103 @@ function contractId(pkg, service, method) {
|
|
|
21
31
|
function serviceOnlyContractId(serviceName) {
|
|
22
32
|
return `grpc::${serviceName}/*`;
|
|
23
33
|
}
|
|
34
|
+
/**
|
|
35
|
+
* Replace all .proto comments and string literals with spaces, preserving the
|
|
36
|
+
* original length and character offsets of the input. This lets downstream
|
|
37
|
+
* regex / brace-depth parsers run on a "sanitized" copy without having to
|
|
38
|
+
* understand proto syntax, while any RegExp.exec/index-based lookups that
|
|
39
|
+
* were already positional against `content` continue to work against the
|
|
40
|
+
* original string.
|
|
41
|
+
*
|
|
42
|
+
* Supported comment forms: `// line comment`, `/* block comment * /`.
|
|
43
|
+
* Supported strings: double-quoted ("…") and single-quoted ('…') with `\`
|
|
44
|
+
* escape handling. Raw/unterminated strings are not supported — we stop
|
|
45
|
+
* on a line break for line-style comments and on EOF for unterminated
|
|
46
|
+
* strings/blocks, which matches how most real proto files parse.
|
|
47
|
+
*/
|
|
48
|
+
function stripProtoCommentsAndStrings(content) {
|
|
49
|
+
const out = new Array(content.length);
|
|
50
|
+
let i = 0;
|
|
51
|
+
while (i < content.length) {
|
|
52
|
+
const ch = content[i];
|
|
53
|
+
const next = content[i + 1];
|
|
54
|
+
// Line comment: // ... \n
|
|
55
|
+
if (ch === '/' && next === '/') {
|
|
56
|
+
out[i] = ' ';
|
|
57
|
+
out[i + 1] = ' ';
|
|
58
|
+
i += 2;
|
|
59
|
+
while (i < content.length && content[i] !== '\n') {
|
|
60
|
+
out[i] = content[i] === '\r' ? '\r' : ' ';
|
|
61
|
+
i++;
|
|
62
|
+
}
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
// Block comment: /* ... */
|
|
66
|
+
if (ch === '/' && next === '*') {
|
|
67
|
+
out[i] = ' ';
|
|
68
|
+
out[i + 1] = ' ';
|
|
69
|
+
i += 2;
|
|
70
|
+
while (i < content.length) {
|
|
71
|
+
if (content[i] === '*' && content[i + 1] === '/') {
|
|
72
|
+
out[i] = ' ';
|
|
73
|
+
out[i + 1] = ' ';
|
|
74
|
+
i += 2;
|
|
75
|
+
break;
|
|
76
|
+
}
|
|
77
|
+
// Preserve newlines so line numbers stay stable for downstream code.
|
|
78
|
+
out[i] = content[i] === '\n' || content[i] === '\r' ? content[i] : ' ';
|
|
79
|
+
i++;
|
|
80
|
+
}
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
// String literal: "..." or '...'
|
|
84
|
+
if (ch === '"' || ch === "'") {
|
|
85
|
+
const quote = ch;
|
|
86
|
+
out[i] = ' '; // replace opening quote
|
|
87
|
+
i++;
|
|
88
|
+
while (i < content.length) {
|
|
89
|
+
const c = content[i];
|
|
90
|
+
if (c === '\\' && i + 1 < content.length) {
|
|
91
|
+
// Skip escaped pair (e.g. \" \n \\)
|
|
92
|
+
out[i] = ' ';
|
|
93
|
+
out[i + 1] = ' ';
|
|
94
|
+
i += 2;
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
if (c === quote) {
|
|
98
|
+
out[i] = ' ';
|
|
99
|
+
i++;
|
|
100
|
+
break;
|
|
101
|
+
}
|
|
102
|
+
// Preserve newlines; proto technically disallows unescaped newlines
|
|
103
|
+
// inside strings, but real files occasionally have them.
|
|
104
|
+
out[i] = c === '\n' || c === '\r' ? c : ' ';
|
|
105
|
+
i++;
|
|
106
|
+
}
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
out[i] = ch;
|
|
110
|
+
i++;
|
|
111
|
+
}
|
|
112
|
+
return out.join('');
|
|
113
|
+
}
|
|
24
114
|
function extractServiceBlocks(content) {
|
|
25
115
|
const results = [];
|
|
26
|
-
//
|
|
116
|
+
// Sanitize comments and string literals so braces inside them don't
|
|
117
|
+
// throw off the depth counter. The sanitized copy has the same length
|
|
118
|
+
// and offsets as the original, so we use it ONLY to scan for service
|
|
119
|
+
// headers and braces; the service body we return is sliced from the
|
|
120
|
+
// ORIGINAL content to preserve exact source text for downstream use.
|
|
121
|
+
const sanitized = stripProtoCommentsAndStrings(content);
|
|
27
122
|
const headerRe = /service\s+(\w+)\s*\{/g;
|
|
28
123
|
let headerMatch;
|
|
29
|
-
while ((headerMatch = headerRe.exec(
|
|
124
|
+
while ((headerMatch = headerRe.exec(sanitized)) !== null) {
|
|
30
125
|
const serviceName = headerMatch[1];
|
|
31
126
|
const bodyStart = headerMatch.index + headerMatch[0].length;
|
|
32
127
|
let depth = 1;
|
|
33
128
|
let pos = bodyStart;
|
|
34
|
-
while (pos <
|
|
35
|
-
const ch =
|
|
129
|
+
while (pos < sanitized.length && depth > 0) {
|
|
130
|
+
const ch = sanitized[pos];
|
|
36
131
|
if (ch === '{')
|
|
37
132
|
depth++;
|
|
38
133
|
else if (ch === '}')
|
|
@@ -60,6 +155,145 @@ function makeContract(cid, role, filePath, symbolName, confidence, meta) {
|
|
|
60
155
|
meta: { ...meta, extractionStrategy: 'source_scan' },
|
|
61
156
|
};
|
|
62
157
|
}
|
|
158
|
+
function normalizeProtoPath(rel) {
|
|
159
|
+
return rel.replace(/\\/g, '/');
|
|
160
|
+
}
|
|
161
|
+
function extractProtoImports(content) {
|
|
162
|
+
const imports = [];
|
|
163
|
+
const re = /^\s*import\s+"([^"]+)"\s*;/gm;
|
|
164
|
+
let match;
|
|
165
|
+
while ((match = re.exec(content)) !== null) {
|
|
166
|
+
imports.push(match[1]);
|
|
167
|
+
}
|
|
168
|
+
return imports;
|
|
169
|
+
}
|
|
170
|
+
function longestSharedSegmentRun(aPath, bPath) {
|
|
171
|
+
const a = aPath.split('/').filter(Boolean);
|
|
172
|
+
const b = bPath.split('/').filter(Boolean);
|
|
173
|
+
let best = 0;
|
|
174
|
+
for (let i = 0; i < a.length; i++) {
|
|
175
|
+
for (let j = 0; j < b.length; j++) {
|
|
176
|
+
let run = 0;
|
|
177
|
+
while (a[i + run] && b[j + run] && a[i + run] === b[j + run]) {
|
|
178
|
+
run++;
|
|
179
|
+
}
|
|
180
|
+
if (run > best)
|
|
181
|
+
best = run;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
return best;
|
|
185
|
+
}
|
|
186
|
+
async function buildProtoContext(repoPath) {
|
|
187
|
+
const servicesByName = new Map();
|
|
188
|
+
const protoFiles = await glob('**/*.proto', {
|
|
189
|
+
cwd: repoPath,
|
|
190
|
+
absolute: false,
|
|
191
|
+
nodir: true,
|
|
192
|
+
ignore: ['**/node_modules/**', '**/.git/**', '**/vendor/**'],
|
|
193
|
+
});
|
|
194
|
+
const contents = new Map();
|
|
195
|
+
for (const rel of protoFiles) {
|
|
196
|
+
const content = readSafe(repoPath, rel);
|
|
197
|
+
if (!content)
|
|
198
|
+
continue;
|
|
199
|
+
contents.set(normalizeProtoPath(rel), content);
|
|
200
|
+
}
|
|
201
|
+
const packagesByProto = new Map();
|
|
202
|
+
const resolvePackage = (protoPath, seen = new Set()) => {
|
|
203
|
+
if (packagesByProto.has(protoPath))
|
|
204
|
+
return packagesByProto.get(protoPath) ?? '';
|
|
205
|
+
if (seen.has(protoPath))
|
|
206
|
+
return '';
|
|
207
|
+
const content = contents.get(protoPath);
|
|
208
|
+
if (!content)
|
|
209
|
+
return '';
|
|
210
|
+
seen.add(protoPath);
|
|
211
|
+
const pkgMatch = content.match(/^\s*package\s+([\w.]+)\s*;/m);
|
|
212
|
+
if (pkgMatch?.[1]) {
|
|
213
|
+
packagesByProto.set(protoPath, pkgMatch[1]);
|
|
214
|
+
return pkgMatch[1];
|
|
215
|
+
}
|
|
216
|
+
for (const importPath of extractProtoImports(content)) {
|
|
217
|
+
const normalizedImport = normalizeProtoPath(importPath);
|
|
218
|
+
const candidates = [
|
|
219
|
+
normalizeProtoPath(path.posix.normalize(path.posix.join(path.posix.dirname(protoPath), normalizedImport))),
|
|
220
|
+
normalizedImport,
|
|
221
|
+
];
|
|
222
|
+
for (const candidate of candidates) {
|
|
223
|
+
if (!contents.has(candidate))
|
|
224
|
+
continue;
|
|
225
|
+
const inheritedPackage = resolvePackage(candidate, seen);
|
|
226
|
+
if (inheritedPackage) {
|
|
227
|
+
packagesByProto.set(protoPath, inheritedPackage);
|
|
228
|
+
return inheritedPackage;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
packagesByProto.set(protoPath, '');
|
|
233
|
+
return '';
|
|
234
|
+
};
|
|
235
|
+
for (const rel of protoFiles) {
|
|
236
|
+
const normalizedRel = normalizeProtoPath(rel);
|
|
237
|
+
const content = contents.get(normalizedRel);
|
|
238
|
+
if (!content)
|
|
239
|
+
continue;
|
|
240
|
+
const pkg = resolvePackage(normalizedRel);
|
|
241
|
+
const serviceBlocks = extractServiceBlocks(content);
|
|
242
|
+
for (const block of serviceBlocks) {
|
|
243
|
+
const rpcRe = /rpc\s+(\w+)\s*\(/g;
|
|
244
|
+
const methods = [];
|
|
245
|
+
let m;
|
|
246
|
+
while ((m = rpcRe.exec(block.body)) !== null) {
|
|
247
|
+
methods.push(m[1]);
|
|
248
|
+
}
|
|
249
|
+
const info = {
|
|
250
|
+
package: pkg,
|
|
251
|
+
serviceName: block.name,
|
|
252
|
+
methods,
|
|
253
|
+
protoPath: normalizedRel,
|
|
254
|
+
};
|
|
255
|
+
const existing = servicesByName.get(block.name) ?? [];
|
|
256
|
+
existing.push(info);
|
|
257
|
+
servicesByName.set(block.name, existing);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
return { packagesByProto, servicesByName };
|
|
261
|
+
}
|
|
262
|
+
export async function buildProtoMap(repoPath) {
|
|
263
|
+
const { servicesByName } = await buildProtoContext(repoPath);
|
|
264
|
+
return servicesByName;
|
|
265
|
+
}
|
|
266
|
+
export function resolveProtoConflict(serviceName, sourceFilePath, candidates) {
|
|
267
|
+
if (candidates.length === 0)
|
|
268
|
+
return null;
|
|
269
|
+
if (candidates.length === 1)
|
|
270
|
+
return candidates[0];
|
|
271
|
+
const sourceDir = normalizeProtoPath(path.dirname(sourceFilePath));
|
|
272
|
+
const scored = candidates.map((c) => {
|
|
273
|
+
const protoDir = normalizeProtoPath(path.dirname(c.protoPath));
|
|
274
|
+
return { candidate: c, score: longestSharedSegmentRun(sourceDir, protoDir) };
|
|
275
|
+
});
|
|
276
|
+
let maxScore = -1;
|
|
277
|
+
for (const s of scored) {
|
|
278
|
+
if (s.score > maxScore)
|
|
279
|
+
maxScore = s.score;
|
|
280
|
+
}
|
|
281
|
+
const winners = scored.filter((s) => s.score === maxScore);
|
|
282
|
+
// Path heuristic cannot uniquely identify a winner — refuse to guess.
|
|
283
|
+
// Ties (including all-zero ties) would otherwise silently merge unrelated
|
|
284
|
+
// services under a fabricated package-qualified contract id.
|
|
285
|
+
if (winners.length !== 1) {
|
|
286
|
+
const paths = candidates.map((c) => c.protoPath).join(', ');
|
|
287
|
+
console.warn(`[grpc-extractor] Ambiguous proto resolution for service "${serviceName}" from ${sourceFilePath}: ${winners.length} candidates tied at score ${maxScore} among [${paths}] — skipping canonical contract`);
|
|
288
|
+
return null;
|
|
289
|
+
}
|
|
290
|
+
return winners[0].candidate;
|
|
291
|
+
}
|
|
292
|
+
export function serviceContractId(pkg, serviceName) {
|
|
293
|
+
const prefix = pkg ? `${pkg}.${serviceName}` : serviceName;
|
|
294
|
+
return `grpc::${prefix}/*`;
|
|
295
|
+
}
|
|
296
|
+
// ─── Orchestrator ────────────────────────────────────────────────────
|
|
63
297
|
export class GrpcExtractor {
|
|
64
298
|
type = 'grpc';
|
|
65
299
|
async canExtract(_repo) {
|
|
@@ -67,198 +301,101 @@ export class GrpcExtractor {
|
|
|
67
301
|
}
|
|
68
302
|
async extract(_dbExecutor, repoPath, _repo) {
|
|
69
303
|
const out = [];
|
|
70
|
-
|
|
71
|
-
const
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
304
|
+
const protoContext = await buildProtoContext(repoPath);
|
|
305
|
+
const protoMap = protoContext.servicesByName;
|
|
306
|
+
// ─── Proto files — definitive provider source ─────────────────
|
|
307
|
+
// When tree-sitter-proto is available, .proto files are handled by
|
|
308
|
+
// the plugin loop below (they're in GRPC_SCAN_GLOB). Otherwise
|
|
309
|
+
// emit provider contracts directly from the proto map that
|
|
310
|
+
// `buildProtoContext` already built — no second glob / parse pass.
|
|
311
|
+
if (!hasProtoPlugin) {
|
|
312
|
+
for (const infos of protoMap.values()) {
|
|
313
|
+
for (const info of infos) {
|
|
314
|
+
for (const methodName of info.methods) {
|
|
315
|
+
const cid = contractId(info.package, info.serviceName, methodName);
|
|
316
|
+
out.push(makeContract(cid, 'provider', info.protoPath, `${info.serviceName}.${methodName}`, 0.85, {
|
|
317
|
+
package: info.package,
|
|
318
|
+
service: info.serviceName,
|
|
319
|
+
method: methodName,
|
|
320
|
+
source: 'proto',
|
|
321
|
+
}));
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
}
|
|
80
325
|
}
|
|
81
|
-
// Source files
|
|
82
|
-
const sourceFiles = await glob(
|
|
326
|
+
// ─── Source files (+ .proto when plugin available) ────────────
|
|
327
|
+
const sourceFiles = await glob(GRPC_SCAN_GLOB, {
|
|
83
328
|
cwd: repoPath,
|
|
84
329
|
ignore: ['**/node_modules/**', '**/.git/**', '**/vendor/**', '**/dist/**', '**/build/**'],
|
|
85
330
|
nodir: true,
|
|
86
331
|
});
|
|
332
|
+
const parser = new Parser();
|
|
87
333
|
for (const rel of sourceFiles) {
|
|
334
|
+
const plugin = getPluginForFile(rel);
|
|
335
|
+
if (!plugin)
|
|
336
|
+
continue;
|
|
88
337
|
const content = readSafe(repoPath, rel);
|
|
89
338
|
if (!content)
|
|
90
339
|
continue;
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
else if (ext === '.java') {
|
|
97
|
-
out.push(...this.scanJavaProviders(content, rel));
|
|
98
|
-
out.push(...this.scanJavaConsumers(content, rel));
|
|
340
|
+
let detections = [];
|
|
341
|
+
try {
|
|
342
|
+
parser.setLanguage(plugin.language);
|
|
343
|
+
const tree = parser.parse(content);
|
|
344
|
+
detections = plugin.scan(tree);
|
|
99
345
|
}
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
out.push(...this.scanPythonConsumers(content, rel));
|
|
346
|
+
catch {
|
|
347
|
+
continue;
|
|
103
348
|
}
|
|
104
|
-
|
|
105
|
-
|
|
349
|
+
for (const d of detections) {
|
|
350
|
+
const contract = this.detectionToContract(d, rel, protoMap);
|
|
351
|
+
if (contract)
|
|
352
|
+
out.push(contract);
|
|
106
353
|
}
|
|
107
354
|
}
|
|
108
355
|
return this.dedupe(out);
|
|
109
356
|
}
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
const
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
// pb.UnimplementedXxxServer
|
|
140
|
-
const unimplRe = /\w+\.Unimplemented(\w+)Server\b/g;
|
|
141
|
-
while ((m = unimplRe.exec(content)) !== null) {
|
|
142
|
-
const serviceName = m[1];
|
|
143
|
-
out.push(makeContract(serviceOnlyContractId(serviceName), 'provider', filePath, `Unimplemented${serviceName}Server`, 0.8, { service: serviceName, source: 'go_unimplemented' }));
|
|
144
|
-
}
|
|
145
|
-
return out;
|
|
146
|
-
}
|
|
147
|
-
scanGoConsumers(content, filePath) {
|
|
148
|
-
const out = [];
|
|
149
|
-
const re = /\w+\.New(\w+)Client\s*\(/g;
|
|
150
|
-
let m;
|
|
151
|
-
while ((m = re.exec(content)) !== null) {
|
|
152
|
-
const serviceName = m[1];
|
|
153
|
-
out.push(makeContract(serviceOnlyContractId(serviceName), 'consumer', filePath, `New${serviceName}Client`, 0.7, { service: serviceName, source: 'go_client' }));
|
|
154
|
-
}
|
|
155
|
-
return out;
|
|
156
|
-
}
|
|
157
|
-
scanJavaProviders(content, filePath) {
|
|
158
|
-
const out = [];
|
|
159
|
-
// @GrpcService
|
|
160
|
-
if (content.includes('@GrpcService')) {
|
|
161
|
-
const implBaseRe = /extends\s+(\w+)Grpc\.(\w+)ImplBase/;
|
|
162
|
-
const m = content.match(implBaseRe);
|
|
163
|
-
if (m) {
|
|
164
|
-
out.push(makeContract(serviceOnlyContractId(m[1]), 'provider', filePath, m[2], 0.8, {
|
|
165
|
-
service: m[1],
|
|
166
|
-
source: 'java_grpc_service',
|
|
167
|
-
}));
|
|
168
|
-
}
|
|
169
|
-
else {
|
|
170
|
-
// Try extracting service name from class name
|
|
171
|
-
const classRe = /class\s+(\w*?)(?:Grpc)?(?:Service)?\s+extends\s+(\w+)(?:Grpc\.(\w+))?ImplBase/;
|
|
172
|
-
const cm = content.match(classRe);
|
|
173
|
-
if (cm) {
|
|
174
|
-
const svcName = cm[2].replace(/Grpc$/, '');
|
|
175
|
-
out.push(makeContract(serviceOnlyContractId(svcName), 'provider', filePath, cm[1], 0.8, {
|
|
176
|
-
service: svcName,
|
|
177
|
-
source: 'java_grpc_service',
|
|
178
|
-
}));
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
// extends XxxImplBase (without @GrpcService)
|
|
183
|
-
if (!content.includes('@GrpcService')) {
|
|
184
|
-
const implRe = /extends\s+(\w+?)(?:Grpc\.(\w+))?ImplBase/;
|
|
185
|
-
const m = content.match(implRe);
|
|
186
|
-
if (m) {
|
|
187
|
-
const svcName = m[2] || m[1].replace(/Grpc$/, '');
|
|
188
|
-
out.push(makeContract(serviceOnlyContractId(svcName), 'provider', filePath, svcName, 0.8, {
|
|
189
|
-
service: svcName,
|
|
190
|
-
source: 'java_impl_base',
|
|
191
|
-
}));
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
return out;
|
|
195
|
-
}
|
|
196
|
-
scanJavaConsumers(content, filePath) {
|
|
197
|
-
const out = [];
|
|
198
|
-
// XxxGrpc.newBlockingStub( or XxxGrpc.newStub(
|
|
199
|
-
const re = /(\w+)Grpc\.new(?:Blocking)?Stub\s*\(/g;
|
|
200
|
-
let m;
|
|
201
|
-
while ((m = re.exec(content)) !== null) {
|
|
202
|
-
const serviceName = m[1];
|
|
203
|
-
out.push(makeContract(serviceOnlyContractId(serviceName), 'consumer', filePath, `${serviceName}Stub`, 0.7, { service: serviceName, source: 'java_stub' }));
|
|
204
|
-
}
|
|
205
|
-
return out;
|
|
206
|
-
}
|
|
207
|
-
scanPythonProviders(content, filePath) {
|
|
208
|
-
const out = [];
|
|
209
|
-
// add_XxxServicer_to_server(
|
|
210
|
-
const re = /add_(\w+?)Servicer_to_server\s*\(/g;
|
|
211
|
-
let m;
|
|
212
|
-
while ((m = re.exec(content)) !== null) {
|
|
213
|
-
const serviceName = m[1];
|
|
214
|
-
out.push(makeContract(serviceOnlyContractId(serviceName), 'provider', filePath, `add_${serviceName}Servicer_to_server`, 0.8, { service: serviceName, source: 'python_servicer' }));
|
|
215
|
-
}
|
|
216
|
-
return out;
|
|
217
|
-
}
|
|
218
|
-
scanPythonConsumers(content, filePath) {
|
|
219
|
-
const out = [];
|
|
220
|
-
// XxxStub(
|
|
221
|
-
const re = /(\w+)Stub\s*\(/g;
|
|
222
|
-
let m;
|
|
223
|
-
while ((m = re.exec(content)) !== null) {
|
|
224
|
-
const name = m[1];
|
|
225
|
-
// Filter out common false positives
|
|
226
|
-
if (['Mock', 'Test', 'Fake', 'Stub'].includes(name))
|
|
227
|
-
continue;
|
|
228
|
-
out.push(makeContract(serviceOnlyContractId(name), 'consumer', filePath, `${name}Stub`, 0.7, {
|
|
229
|
-
service: name,
|
|
230
|
-
source: 'python_stub',
|
|
231
|
-
}));
|
|
232
|
-
}
|
|
233
|
-
return out;
|
|
234
|
-
}
|
|
235
|
-
scanTsProviders(content, filePath) {
|
|
236
|
-
const out = [];
|
|
237
|
-
// @GrpcMethod('ServiceName', 'MethodName')
|
|
238
|
-
const re = /@GrpcMethod\s*\(\s*['"](\w+)['"]\s*,\s*['"](\w+)['"]\s*\)/g;
|
|
239
|
-
let m;
|
|
240
|
-
while ((m = re.exec(content)) !== null) {
|
|
241
|
-
const serviceName = m[1];
|
|
242
|
-
const methodName = m[2];
|
|
243
|
-
const cid = contractId('', serviceName, methodName);
|
|
244
|
-
out.push(makeContract(cid, 'provider', filePath, `${serviceName}.${methodName}`, 0.8, {
|
|
245
|
-
service: serviceName,
|
|
246
|
-
method: methodName,
|
|
247
|
-
source: 'ts_grpc_method',
|
|
248
|
-
}));
|
|
249
|
-
}
|
|
250
|
-
return out;
|
|
357
|
+
/**
|
|
358
|
+
* Convert a plugin `GrpcDetection` into a concrete `ExtractedContract`
|
|
359
|
+
* by resolving the short service name against the proto map, building
|
|
360
|
+
* either a service-level (`grpc::pkg.Svc/*`) or method-level
|
|
361
|
+
* (`grpc::pkg.Svc/Method`) contract id, and selecting confidence
|
|
362
|
+
* based on whether the proto map had an entry.
|
|
363
|
+
*/
|
|
364
|
+
detectionToContract(d, filePath, protoMap) {
|
|
365
|
+
const candidates = protoMap.get(d.serviceName) ?? [];
|
|
366
|
+
const proto = resolveProtoConflict(d.serviceName, filePath, candidates);
|
|
367
|
+
// If there were proto candidates but resolution was ambiguous, skip
|
|
368
|
+
// contract emission rather than fabricating a package-qualified id from
|
|
369
|
+
// an arbitrary candidate. resolveProtoConflict already warned.
|
|
370
|
+
if (candidates.length > 0 && proto === null)
|
|
371
|
+
return null;
|
|
372
|
+
const pkg = proto?.package ?? '';
|
|
373
|
+
const cid = d.methodName
|
|
374
|
+
? contractId(pkg, d.serviceName, d.methodName)
|
|
375
|
+
: proto
|
|
376
|
+
? serviceContractId(pkg, d.serviceName)
|
|
377
|
+
: serviceOnlyContractId(d.serviceName);
|
|
378
|
+
const confidence = proto ? d.confidenceWithProto : d.confidenceWithoutProto;
|
|
379
|
+
const meta = {
|
|
380
|
+
service: d.serviceName,
|
|
381
|
+
source: d.source,
|
|
382
|
+
};
|
|
383
|
+
if (d.methodName)
|
|
384
|
+
meta.method = d.methodName;
|
|
385
|
+
return makeContract(cid, d.role, filePath, d.symbolName, confidence, meta);
|
|
251
386
|
}
|
|
252
387
|
dedupe(items) {
|
|
253
|
-
const
|
|
254
|
-
const out = [];
|
|
388
|
+
const byKey = new Map();
|
|
255
389
|
for (const c of items) {
|
|
256
390
|
const k = `${c.contractId}|${c.role}|${c.symbolRef.filePath}`;
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
391
|
+
const existing = byKey.get(k);
|
|
392
|
+
if (!existing ||
|
|
393
|
+
c.confidence > existing.confidence ||
|
|
394
|
+
(c.confidence === existing.confidence &&
|
|
395
|
+
String(c.meta.source) < String(existing.meta.source))) {
|
|
396
|
+
byKey.set(k, c);
|
|
397
|
+
}
|
|
261
398
|
}
|
|
262
|
-
return
|
|
399
|
+
return Array.from(byKey.values());
|
|
263
400
|
}
|
|
264
401
|
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import Go from 'tree-sitter-go';
|
|
2
|
+
import { compilePatterns, runCompiledPatterns, } from '../tree-sitter-scanner.js';
|
|
3
|
+
/**
|
|
4
|
+
* Go gRPC plugin. Detects:
|
|
5
|
+
* - Provider: `pb.RegisterXxxServer(...)` calls
|
|
6
|
+
* - Provider: `pb.UnimplementedXxxServer` embedded in a struct
|
|
7
|
+
* - Consumer: `pb.NewXxxClient(conn)` calls
|
|
8
|
+
*/
|
|
9
|
+
const REGISTER_RE = /^Register(\w+)Server$/;
|
|
10
|
+
const UNIMPLEMENTED_RE = /^Unimplemented(\w+)Server$/;
|
|
11
|
+
const NEW_CLIENT_RE = /^New(\w+)Client$/;
|
|
12
|
+
// Any `xxx.<fn>(...)` call — plugin filters the field identifier text.
|
|
13
|
+
const SELECTOR_CALL_PATTERNS = compilePatterns({
|
|
14
|
+
name: 'go-grpc-selector-call',
|
|
15
|
+
language: Go,
|
|
16
|
+
patterns: [
|
|
17
|
+
{
|
|
18
|
+
meta: {},
|
|
19
|
+
query: `
|
|
20
|
+
(call_expression
|
|
21
|
+
function: (selector_expression
|
|
22
|
+
field: (field_identifier) @fn))
|
|
23
|
+
`,
|
|
24
|
+
},
|
|
25
|
+
],
|
|
26
|
+
});
|
|
27
|
+
// Any `qualified_type` used as a struct field — for `pb.UnimplementedXxxServer`.
|
|
28
|
+
const STRUCT_EMBEDDING_PATTERNS = compilePatterns({
|
|
29
|
+
name: 'go-grpc-struct-embedding',
|
|
30
|
+
language: Go,
|
|
31
|
+
patterns: [
|
|
32
|
+
{
|
|
33
|
+
meta: {},
|
|
34
|
+
query: `
|
|
35
|
+
(struct_type
|
|
36
|
+
(field_declaration_list
|
|
37
|
+
(field_declaration
|
|
38
|
+
type: (qualified_type
|
|
39
|
+
name: (type_identifier) @field_type))))
|
|
40
|
+
`,
|
|
41
|
+
},
|
|
42
|
+
],
|
|
43
|
+
});
|
|
44
|
+
export const GO_GRPC_PLUGIN = {
|
|
45
|
+
name: 'go-grpc',
|
|
46
|
+
language: Go,
|
|
47
|
+
scan(tree) {
|
|
48
|
+
const out = [];
|
|
49
|
+
for (const match of runCompiledPatterns(SELECTOR_CALL_PATTERNS, tree)) {
|
|
50
|
+
const fnNode = match.captures.fn;
|
|
51
|
+
if (!fnNode)
|
|
52
|
+
continue;
|
|
53
|
+
const fnText = fnNode.text;
|
|
54
|
+
const registerMatch = REGISTER_RE.exec(fnText);
|
|
55
|
+
if (registerMatch) {
|
|
56
|
+
out.push({
|
|
57
|
+
role: 'provider',
|
|
58
|
+
serviceName: registerMatch[1],
|
|
59
|
+
symbolName: fnText,
|
|
60
|
+
source: 'go_register',
|
|
61
|
+
confidenceWithProto: 0.8,
|
|
62
|
+
confidenceWithoutProto: 0.65,
|
|
63
|
+
});
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
const newClientMatch = NEW_CLIENT_RE.exec(fnText);
|
|
67
|
+
if (newClientMatch) {
|
|
68
|
+
out.push({
|
|
69
|
+
role: 'consumer',
|
|
70
|
+
serviceName: newClientMatch[1],
|
|
71
|
+
symbolName: fnText,
|
|
72
|
+
source: 'go_client',
|
|
73
|
+
confidenceWithProto: 0.75,
|
|
74
|
+
confidenceWithoutProto: 0.55,
|
|
75
|
+
});
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
for (const match of runCompiledPatterns(STRUCT_EMBEDDING_PATTERNS, tree)) {
|
|
80
|
+
const fieldNode = match.captures.field_type;
|
|
81
|
+
if (!fieldNode)
|
|
82
|
+
continue;
|
|
83
|
+
const unimpl = UNIMPLEMENTED_RE.exec(fieldNode.text);
|
|
84
|
+
if (!unimpl)
|
|
85
|
+
continue;
|
|
86
|
+
out.push({
|
|
87
|
+
role: 'provider',
|
|
88
|
+
serviceName: unimpl[1],
|
|
89
|
+
symbolName: fieldNode.text,
|
|
90
|
+
source: 'go_unimplemented',
|
|
91
|
+
confidenceWithProto: 0.8,
|
|
92
|
+
confidenceWithoutProto: 0.65,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
return out;
|
|
96
|
+
},
|
|
97
|
+
};
|