gitnexus 1.6.0 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/analyze.js +28 -3
- package/dist/core/group/extractors/fs-utils.d.ts +10 -0
- package/dist/core/group/extractors/fs-utils.js +24 -0
- package/dist/core/group/extractors/grpc-extractor.d.ts +17 -8
- package/dist/core/group/extractors/grpc-extractor.js +313 -191
- package/dist/core/group/extractors/grpc-patterns/go.d.ts +2 -0
- package/dist/core/group/extractors/grpc-patterns/go.js +97 -0
- package/dist/core/group/extractors/grpc-patterns/index.d.ts +19 -0
- package/dist/core/group/extractors/grpc-patterns/index.js +46 -0
- package/dist/core/group/extractors/grpc-patterns/java.d.ts +2 -0
- package/dist/core/group/extractors/grpc-patterns/java.js +173 -0
- package/dist/core/group/extractors/grpc-patterns/node.d.ts +4 -0
- package/dist/core/group/extractors/grpc-patterns/node.js +290 -0
- package/dist/core/group/extractors/grpc-patterns/proto.d.ts +9 -0
- package/dist/core/group/extractors/grpc-patterns/proto.js +134 -0
- package/dist/core/group/extractors/grpc-patterns/python.d.ts +2 -0
- package/dist/core/group/extractors/grpc-patterns/python.js +67 -0
- package/dist/core/group/extractors/grpc-patterns/types.d.ts +50 -0
- package/dist/core/group/extractors/grpc-patterns/types.js +1 -0
- package/dist/core/group/extractors/http-patterns/go.d.ts +2 -0
- package/dist/core/group/extractors/http-patterns/go.js +215 -0
- package/dist/core/group/extractors/http-patterns/index.d.ts +17 -0
- package/dist/core/group/extractors/http-patterns/index.js +44 -0
- package/dist/core/group/extractors/http-patterns/java.d.ts +2 -0
- package/dist/core/group/extractors/http-patterns/java.js +253 -0
- package/dist/core/group/extractors/http-patterns/node.d.ts +4 -0
- package/dist/core/group/extractors/http-patterns/node.js +354 -0
- package/dist/core/group/extractors/http-patterns/php.d.ts +2 -0
- package/dist/core/group/extractors/http-patterns/php.js +70 -0
- package/dist/core/group/extractors/http-patterns/python.d.ts +2 -0
- package/dist/core/group/extractors/http-patterns/python.js +133 -0
- package/dist/core/group/extractors/http-patterns/types.d.ts +61 -0
- package/dist/core/group/extractors/http-patterns/types.js +1 -0
- package/dist/core/group/extractors/http-route-extractor.d.ts +10 -13
- package/dist/core/group/extractors/http-route-extractor.js +201 -238
- package/dist/core/group/extractors/manifest-extractor.d.ts +54 -0
- package/dist/core/group/extractors/manifest-extractor.js +235 -0
- package/dist/core/group/extractors/topic-extractor.d.ts +0 -1
- package/dist/core/group/extractors/topic-extractor.js +55 -192
- package/dist/core/group/extractors/topic-patterns/go.d.ts +2 -0
- package/dist/core/group/extractors/topic-patterns/go.js +120 -0
- package/dist/core/group/extractors/topic-patterns/index.d.ts +14 -0
- package/dist/core/group/extractors/topic-patterns/index.js +38 -0
- package/dist/core/group/extractors/topic-patterns/java.d.ts +2 -0
- package/dist/core/group/extractors/topic-patterns/java.js +80 -0
- package/dist/core/group/extractors/topic-patterns/node.d.ts +4 -0
- package/dist/core/group/extractors/topic-patterns/node.js +155 -0
- package/dist/core/group/extractors/topic-patterns/python.d.ts +2 -0
- package/dist/core/group/extractors/topic-patterns/python.js +116 -0
- package/dist/core/group/extractors/topic-patterns/types.d.ts +25 -0
- package/dist/core/group/extractors/topic-patterns/types.js +10 -0
- package/dist/core/group/extractors/tree-sitter-scanner.d.ts +113 -0
- package/dist/core/group/extractors/tree-sitter-scanner.js +94 -0
- package/dist/core/ingestion/binding-accumulator.d.ts +22 -17
- package/dist/core/ingestion/binding-accumulator.js +29 -25
- package/dist/core/ingestion/cobol-processor.d.ts +1 -1
- package/dist/core/ingestion/import-processor.js +1 -1
- package/dist/core/ingestion/language-config.js +1 -1
- package/dist/core/ingestion/language-provider.d.ts +8 -0
- package/dist/core/ingestion/languages/ruby.js +15 -0
- package/dist/core/ingestion/markdown-processor.d.ts +1 -1
- package/dist/core/ingestion/method-extractors/configs/jvm.js +1 -0
- package/dist/core/ingestion/method-extractors/configs/ruby.js +1 -0
- package/dist/core/ingestion/method-extractors/generic.d.ts +6 -0
- package/dist/core/ingestion/method-extractors/generic.js +48 -4
- package/dist/core/ingestion/method-types.d.ts +4 -0
- package/dist/core/ingestion/model/resolve.js +103 -48
- package/dist/core/ingestion/model/semantic-model.d.ts +1 -1
- package/dist/core/ingestion/model/semantic-model.js +1 -1
- package/dist/core/ingestion/model/symbol-table.d.ts +7 -7
- package/dist/core/ingestion/model/symbol-table.js +7 -7
- package/dist/core/ingestion/mro-processor.d.ts +1 -1
- package/dist/core/ingestion/mro-processor.js +1 -1
- package/dist/core/ingestion/parsing-processor.js +54 -42
- package/dist/core/ingestion/pipeline-phases/cobol.d.ts +16 -0
- package/dist/core/ingestion/pipeline-phases/cobol.js +45 -0
- package/dist/core/ingestion/pipeline-phases/communities.d.ts +16 -0
- package/dist/core/ingestion/pipeline-phases/communities.js +62 -0
- package/dist/core/ingestion/pipeline-phases/cross-file-impl.d.ts +17 -0
- package/dist/core/ingestion/pipeline-phases/cross-file-impl.js +156 -0
- package/dist/core/ingestion/pipeline-phases/cross-file.d.ts +37 -0
- package/dist/core/ingestion/pipeline-phases/cross-file.js +63 -0
- package/dist/core/ingestion/pipeline-phases/index.d.ts +21 -0
- package/dist/core/ingestion/pipeline-phases/index.js +22 -0
- package/dist/core/ingestion/pipeline-phases/markdown.d.ts +17 -0
- package/dist/core/ingestion/pipeline-phases/markdown.js +33 -0
- package/dist/core/ingestion/pipeline-phases/mro.d.ts +18 -0
- package/dist/core/ingestion/pipeline-phases/mro.js +36 -0
- package/dist/core/ingestion/pipeline-phases/orm-extraction.d.ts +22 -0
- package/dist/core/ingestion/pipeline-phases/orm-extraction.js +92 -0
- package/dist/core/ingestion/pipeline-phases/orm.d.ts +15 -0
- package/dist/core/ingestion/pipeline-phases/orm.js +74 -0
- package/dist/core/ingestion/pipeline-phases/parse-impl.d.ts +47 -0
- package/dist/core/ingestion/pipeline-phases/parse-impl.js +437 -0
- package/dist/core/ingestion/pipeline-phases/parse.d.ts +49 -0
- package/dist/core/ingestion/pipeline-phases/parse.js +33 -0
- package/dist/core/ingestion/pipeline-phases/processes.d.ts +16 -0
- package/dist/core/ingestion/pipeline-phases/processes.js +143 -0
- package/dist/core/ingestion/pipeline-phases/routes.d.ts +21 -0
- package/dist/core/ingestion/pipeline-phases/routes.js +243 -0
- package/dist/core/ingestion/pipeline-phases/runner.d.ts +22 -0
- package/dist/core/ingestion/pipeline-phases/runner.js +203 -0
- package/dist/core/ingestion/pipeline-phases/scan.d.ts +21 -0
- package/dist/core/ingestion/pipeline-phases/scan.js +46 -0
- package/dist/core/ingestion/pipeline-phases/structure.d.ts +27 -0
- package/dist/core/ingestion/pipeline-phases/structure.js +35 -0
- package/dist/core/ingestion/pipeline-phases/tools.d.ts +20 -0
- package/dist/core/ingestion/pipeline-phases/tools.js +79 -0
- package/dist/core/ingestion/pipeline-phases/types.d.ts +79 -0
- package/dist/core/ingestion/pipeline-phases/types.js +37 -0
- package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts +35 -0
- package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js +174 -0
- package/dist/core/ingestion/pipeline.d.ts +16 -10
- package/dist/core/ingestion/pipeline.js +66 -1534
- package/dist/core/ingestion/process-processor.js +1 -1
- package/dist/core/ingestion/tree-sitter-queries.d.ts +2 -2
- package/dist/core/ingestion/tree-sitter-queries.js +69 -0
- package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -3
- package/dist/core/ingestion/utils/ast-helpers.js +48 -21
- package/dist/core/ingestion/utils/env.d.ts +10 -0
- package/dist/core/ingestion/utils/env.js +10 -0
- package/dist/core/ingestion/utils/graph-sort.d.ts +58 -0
- package/dist/core/ingestion/utils/graph-sort.js +100 -0
- package/dist/core/ingestion/workers/parse-worker.js +12 -8
- package/dist/core/lbug/lbug-adapter.js +66 -24
- package/package.json +3 -3
- package/vendor/tree-sitter-proto/binding.gyp +30 -0
- package/vendor/tree-sitter-proto/bindings/node/binding.cc +20 -0
- package/vendor/tree-sitter-proto/bindings/node/index.d.ts +28 -0
- package/vendor/tree-sitter-proto/bindings/node/index.js +7 -0
- package/vendor/tree-sitter-proto/package.json +18 -0
- package/vendor/tree-sitter-proto/src/node-types.json +1145 -0
- package/vendor/tree-sitter-proto/src/parser.c +10149 -0
- package/vendor/tree-sitter-proto/src/tree_sitter/alloc.h +54 -0
- package/vendor/tree-sitter-proto/src/tree_sitter/array.h +291 -0
- package/vendor/tree-sitter-proto/src/tree_sitter/parser.h +266 -0
|
@@ -1,19 +1,29 @@
|
|
|
1
|
-
import * as fs from 'node:fs';
|
|
2
1
|
import * as path from 'node:path';
|
|
3
2
|
import { glob } from 'glob';
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
3
|
+
import Parser from 'tree-sitter';
|
|
4
|
+
import { readSafe } from './fs-utils.js';
|
|
5
|
+
import { GRPC_SCAN_GLOB, getPluginForFile, hasProtoPlugin, } from './grpc-patterns/index.js';
|
|
6
|
+
/**
|
|
7
|
+
* Language-agnostic orchestrator for gRPC (provider + consumer) contract
|
|
8
|
+
* extraction.
|
|
9
|
+
*
|
|
10
|
+
* Two parts:
|
|
11
|
+
*
|
|
12
|
+
* 1. **`.proto` parsing** — tree-sitter when `tree-sitter-proto` is
|
|
13
|
+
* installed (optionalDependency vendored in `vendor/tree-sitter-proto/`),
|
|
14
|
+
* via the `.proto` entry in `grpc-patterns/` and `hasProtoPlugin`.
|
|
15
|
+
* When the grammar isn't available (platform incompatibility, native
|
|
16
|
+
* build failure) the orchestrator falls back to the in-process
|
|
17
|
+
* string-sanitizing parser defined below (`stripProtoCommentsAndStrings`
|
|
18
|
+
* + `extractServiceBlocks`). The fallback preserves offsets so any
|
|
19
|
+
* downstream regex scans run against a sanitized copy without
|
|
20
|
+
* affecting line numbers of the original.
|
|
21
|
+
*
|
|
22
|
+
* 2. **Source-scan providers / consumers** — delegated to per-language
|
|
23
|
+
* plugins in `./grpc-patterns/`. The orchestrator imports NO
|
|
24
|
+
* tree-sitter grammars or query strings — each plugin owns its own.
|
|
25
|
+
*/
|
|
26
|
+
// ─── .proto fallback parser (used only when tree-sitter-proto is absent) ───
|
|
17
27
|
function contractId(pkg, service, method) {
|
|
18
28
|
const prefix = pkg ? `${pkg}.${service}` : service;
|
|
19
29
|
return `grpc::${prefix}/${method}`;
|
|
@@ -21,18 +31,103 @@ function contractId(pkg, service, method) {
|
|
|
21
31
|
function serviceOnlyContractId(serviceName) {
|
|
22
32
|
return `grpc::${serviceName}/*`;
|
|
23
33
|
}
|
|
34
|
+
/**
|
|
35
|
+
* Replace all .proto comments and string literals with spaces, preserving the
|
|
36
|
+
* original length and character offsets of the input. This lets downstream
|
|
37
|
+
* regex / brace-depth parsers run on a "sanitized" copy without having to
|
|
38
|
+
* understand proto syntax, while any RegExp.exec/index-based lookups that
|
|
39
|
+
* were already positional against `content` continue to work against the
|
|
40
|
+
* original string.
|
|
41
|
+
*
|
|
42
|
+
* Supported comment forms: `// line comment`, `/* block comment * /`.
|
|
43
|
+
* Supported strings: double-quoted ("…") and single-quoted ('…') with `\`
|
|
44
|
+
* escape handling. Raw/unterminated strings are not supported — we stop
|
|
45
|
+
* on a line break for line-style comments and on EOF for unterminated
|
|
46
|
+
* strings/blocks, which matches how most real proto files parse.
|
|
47
|
+
*/
|
|
48
|
+
function stripProtoCommentsAndStrings(content) {
|
|
49
|
+
const out = new Array(content.length);
|
|
50
|
+
let i = 0;
|
|
51
|
+
while (i < content.length) {
|
|
52
|
+
const ch = content[i];
|
|
53
|
+
const next = content[i + 1];
|
|
54
|
+
// Line comment: // ... \n
|
|
55
|
+
if (ch === '/' && next === '/') {
|
|
56
|
+
out[i] = ' ';
|
|
57
|
+
out[i + 1] = ' ';
|
|
58
|
+
i += 2;
|
|
59
|
+
while (i < content.length && content[i] !== '\n') {
|
|
60
|
+
out[i] = content[i] === '\r' ? '\r' : ' ';
|
|
61
|
+
i++;
|
|
62
|
+
}
|
|
63
|
+
continue;
|
|
64
|
+
}
|
|
65
|
+
// Block comment: /* ... */
|
|
66
|
+
if (ch === '/' && next === '*') {
|
|
67
|
+
out[i] = ' ';
|
|
68
|
+
out[i + 1] = ' ';
|
|
69
|
+
i += 2;
|
|
70
|
+
while (i < content.length) {
|
|
71
|
+
if (content[i] === '*' && content[i + 1] === '/') {
|
|
72
|
+
out[i] = ' ';
|
|
73
|
+
out[i + 1] = ' ';
|
|
74
|
+
i += 2;
|
|
75
|
+
break;
|
|
76
|
+
}
|
|
77
|
+
// Preserve newlines so line numbers stay stable for downstream code.
|
|
78
|
+
out[i] = content[i] === '\n' || content[i] === '\r' ? content[i] : ' ';
|
|
79
|
+
i++;
|
|
80
|
+
}
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
// String literal: "..." or '...'
|
|
84
|
+
if (ch === '"' || ch === "'") {
|
|
85
|
+
const quote = ch;
|
|
86
|
+
out[i] = ' '; // replace opening quote
|
|
87
|
+
i++;
|
|
88
|
+
while (i < content.length) {
|
|
89
|
+
const c = content[i];
|
|
90
|
+
if (c === '\\' && i + 1 < content.length) {
|
|
91
|
+
// Skip escaped pair (e.g. \" \n \\)
|
|
92
|
+
out[i] = ' ';
|
|
93
|
+
out[i + 1] = ' ';
|
|
94
|
+
i += 2;
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
if (c === quote) {
|
|
98
|
+
out[i] = ' ';
|
|
99
|
+
i++;
|
|
100
|
+
break;
|
|
101
|
+
}
|
|
102
|
+
// Preserve newlines; proto technically disallows unescaped newlines
|
|
103
|
+
// inside strings, but real files occasionally have them.
|
|
104
|
+
out[i] = c === '\n' || c === '\r' ? c : ' ';
|
|
105
|
+
i++;
|
|
106
|
+
}
|
|
107
|
+
continue;
|
|
108
|
+
}
|
|
109
|
+
out[i] = ch;
|
|
110
|
+
i++;
|
|
111
|
+
}
|
|
112
|
+
return out.join('');
|
|
113
|
+
}
|
|
24
114
|
function extractServiceBlocks(content) {
|
|
25
115
|
const results = [];
|
|
26
|
-
//
|
|
116
|
+
// Sanitize comments and string literals so braces inside them don't
|
|
117
|
+
// throw off the depth counter. The sanitized copy has the same length
|
|
118
|
+
// and offsets as the original, so we use it ONLY to scan for service
|
|
119
|
+
// headers and braces; the service body we return is sliced from the
|
|
120
|
+
// ORIGINAL content to preserve exact source text for downstream use.
|
|
121
|
+
const sanitized = stripProtoCommentsAndStrings(content);
|
|
27
122
|
const headerRe = /service\s+(\w+)\s*\{/g;
|
|
28
123
|
let headerMatch;
|
|
29
|
-
while ((headerMatch = headerRe.exec(
|
|
124
|
+
while ((headerMatch = headerRe.exec(sanitized)) !== null) {
|
|
30
125
|
const serviceName = headerMatch[1];
|
|
31
126
|
const bodyStart = headerMatch.index + headerMatch[0].length;
|
|
32
127
|
let depth = 1;
|
|
33
128
|
let pos = bodyStart;
|
|
34
|
-
while (pos <
|
|
35
|
-
const ch =
|
|
129
|
+
while (pos < sanitized.length && depth > 0) {
|
|
130
|
+
const ch = sanitized[pos];
|
|
36
131
|
if (ch === '{')
|
|
37
132
|
depth++;
|
|
38
133
|
else if (ch === '}')
|
|
@@ -60,6 +155,137 @@ function makeContract(cid, role, filePath, symbolName, confidence, meta) {
|
|
|
60
155
|
meta: { ...meta, extractionStrategy: 'source_scan' },
|
|
61
156
|
};
|
|
62
157
|
}
|
|
158
|
+
function normalizeProtoPath(rel) {
|
|
159
|
+
return rel.replace(/\\/g, '/');
|
|
160
|
+
}
|
|
161
|
+
function extractProtoImports(content) {
|
|
162
|
+
const imports = [];
|
|
163
|
+
const re = /^\s*import\s+"([^"]+)"\s*;/gm;
|
|
164
|
+
let match;
|
|
165
|
+
while ((match = re.exec(content)) !== null) {
|
|
166
|
+
imports.push(match[1]);
|
|
167
|
+
}
|
|
168
|
+
return imports;
|
|
169
|
+
}
|
|
170
|
+
function longestSharedSegmentRun(aPath, bPath) {
|
|
171
|
+
const a = aPath.split('/').filter(Boolean);
|
|
172
|
+
const b = bPath.split('/').filter(Boolean);
|
|
173
|
+
let best = 0;
|
|
174
|
+
for (let i = 0; i < a.length; i++) {
|
|
175
|
+
for (let j = 0; j < b.length; j++) {
|
|
176
|
+
let run = 0;
|
|
177
|
+
while (a[i + run] && b[j + run] && a[i + run] === b[j + run]) {
|
|
178
|
+
run++;
|
|
179
|
+
}
|
|
180
|
+
if (run > best)
|
|
181
|
+
best = run;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
return best;
|
|
185
|
+
}
|
|
186
|
+
async function buildProtoContext(repoPath) {
|
|
187
|
+
const servicesByName = new Map();
|
|
188
|
+
const protoFiles = await glob('**/*.proto', {
|
|
189
|
+
cwd: repoPath,
|
|
190
|
+
absolute: false,
|
|
191
|
+
nodir: true,
|
|
192
|
+
ignore: ['**/node_modules/**', '**/.git/**', '**/vendor/**'],
|
|
193
|
+
});
|
|
194
|
+
const contents = new Map();
|
|
195
|
+
for (const rel of protoFiles) {
|
|
196
|
+
const content = readSafe(repoPath, rel);
|
|
197
|
+
if (!content)
|
|
198
|
+
continue;
|
|
199
|
+
contents.set(normalizeProtoPath(rel), content);
|
|
200
|
+
}
|
|
201
|
+
const packagesByProto = new Map();
|
|
202
|
+
const resolvePackage = (protoPath, seen = new Set()) => {
|
|
203
|
+
if (packagesByProto.has(protoPath))
|
|
204
|
+
return packagesByProto.get(protoPath) ?? '';
|
|
205
|
+
if (seen.has(protoPath))
|
|
206
|
+
return '';
|
|
207
|
+
const content = contents.get(protoPath);
|
|
208
|
+
if (!content)
|
|
209
|
+
return '';
|
|
210
|
+
seen.add(protoPath);
|
|
211
|
+
const pkgMatch = content.match(/^\s*package\s+([\w.]+)\s*;/m);
|
|
212
|
+
if (pkgMatch?.[1]) {
|
|
213
|
+
packagesByProto.set(protoPath, pkgMatch[1]);
|
|
214
|
+
return pkgMatch[1];
|
|
215
|
+
}
|
|
216
|
+
for (const importPath of extractProtoImports(content)) {
|
|
217
|
+
const normalizedImport = normalizeProtoPath(importPath);
|
|
218
|
+
const candidates = [
|
|
219
|
+
normalizeProtoPath(path.posix.normalize(path.posix.join(path.posix.dirname(protoPath), normalizedImport))),
|
|
220
|
+
normalizedImport,
|
|
221
|
+
];
|
|
222
|
+
for (const candidate of candidates) {
|
|
223
|
+
if (!contents.has(candidate))
|
|
224
|
+
continue;
|
|
225
|
+
const inheritedPackage = resolvePackage(candidate, seen);
|
|
226
|
+
if (inheritedPackage) {
|
|
227
|
+
packagesByProto.set(protoPath, inheritedPackage);
|
|
228
|
+
return inheritedPackage;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
packagesByProto.set(protoPath, '');
|
|
233
|
+
return '';
|
|
234
|
+
};
|
|
235
|
+
for (const rel of protoFiles) {
|
|
236
|
+
const normalizedRel = normalizeProtoPath(rel);
|
|
237
|
+
const content = contents.get(normalizedRel);
|
|
238
|
+
if (!content)
|
|
239
|
+
continue;
|
|
240
|
+
const pkg = resolvePackage(normalizedRel);
|
|
241
|
+
const serviceBlocks = extractServiceBlocks(content);
|
|
242
|
+
for (const block of serviceBlocks) {
|
|
243
|
+
const rpcRe = /rpc\s+(\w+)\s*\(/g;
|
|
244
|
+
const methods = [];
|
|
245
|
+
let m;
|
|
246
|
+
while ((m = rpcRe.exec(block.body)) !== null) {
|
|
247
|
+
methods.push(m[1]);
|
|
248
|
+
}
|
|
249
|
+
const info = {
|
|
250
|
+
package: pkg,
|
|
251
|
+
serviceName: block.name,
|
|
252
|
+
methods,
|
|
253
|
+
protoPath: normalizedRel,
|
|
254
|
+
};
|
|
255
|
+
const existing = servicesByName.get(block.name) ?? [];
|
|
256
|
+
existing.push(info);
|
|
257
|
+
servicesByName.set(block.name, existing);
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
return { packagesByProto, servicesByName };
|
|
261
|
+
}
|
|
262
|
+
export async function buildProtoMap(repoPath) {
|
|
263
|
+
const { servicesByName } = await buildProtoContext(repoPath);
|
|
264
|
+
return servicesByName;
|
|
265
|
+
}
|
|
266
|
+
export function resolveProtoConflict(_serviceName, sourceFilePath, candidates) {
|
|
267
|
+
if (candidates.length === 0)
|
|
268
|
+
return null;
|
|
269
|
+
if (candidates.length === 1)
|
|
270
|
+
return candidates[0];
|
|
271
|
+
const sourceDir = normalizeProtoPath(path.dirname(sourceFilePath));
|
|
272
|
+
let best = candidates[0];
|
|
273
|
+
let bestScore = -1;
|
|
274
|
+
for (const c of candidates) {
|
|
275
|
+
const protoDir = normalizeProtoPath(path.dirname(c.protoPath));
|
|
276
|
+
const sharedRun = longestSharedSegmentRun(sourceDir, protoDir);
|
|
277
|
+
if (sharedRun > bestScore) {
|
|
278
|
+
bestScore = sharedRun;
|
|
279
|
+
best = c;
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
return best;
|
|
283
|
+
}
|
|
284
|
+
export function serviceContractId(pkg, serviceName) {
|
|
285
|
+
const prefix = pkg ? `${pkg}.${serviceName}` : serviceName;
|
|
286
|
+
return `grpc::${prefix}/*`;
|
|
287
|
+
}
|
|
288
|
+
// ─── Orchestrator ────────────────────────────────────────────────────
|
|
63
289
|
export class GrpcExtractor {
|
|
64
290
|
type = 'grpc';
|
|
65
291
|
async canExtract(_repo) {
|
|
@@ -67,198 +293,94 @@ export class GrpcExtractor {
|
|
|
67
293
|
}
|
|
68
294
|
async extract(_dbExecutor, repoPath, _repo) {
|
|
69
295
|
const out = [];
|
|
70
|
-
|
|
71
|
-
const
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
296
|
+
const protoContext = await buildProtoContext(repoPath);
|
|
297
|
+
const protoMap = protoContext.servicesByName;
|
|
298
|
+
// ─── Proto files — definitive provider source ─────────────────
|
|
299
|
+
// When tree-sitter-proto is available, .proto files are handled by
|
|
300
|
+
// the plugin loop below (they're in GRPC_SCAN_GLOB). Otherwise
|
|
301
|
+
// emit provider contracts directly from the proto map that
|
|
302
|
+
// `buildProtoContext` already built — no second glob / parse pass.
|
|
303
|
+
if (!hasProtoPlugin) {
|
|
304
|
+
for (const infos of protoMap.values()) {
|
|
305
|
+
for (const info of infos) {
|
|
306
|
+
for (const methodName of info.methods) {
|
|
307
|
+
const cid = contractId(info.package, info.serviceName, methodName);
|
|
308
|
+
out.push(makeContract(cid, 'provider', info.protoPath, `${info.serviceName}.${methodName}`, 0.85, {
|
|
309
|
+
package: info.package,
|
|
310
|
+
service: info.serviceName,
|
|
311
|
+
method: methodName,
|
|
312
|
+
source: 'proto',
|
|
313
|
+
}));
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
}
|
|
80
317
|
}
|
|
81
|
-
// Source files
|
|
82
|
-
const sourceFiles = await glob(
|
|
318
|
+
// ─── Source files (+ .proto when plugin available) ────────────
|
|
319
|
+
const sourceFiles = await glob(GRPC_SCAN_GLOB, {
|
|
83
320
|
cwd: repoPath,
|
|
84
321
|
ignore: ['**/node_modules/**', '**/.git/**', '**/vendor/**', '**/dist/**', '**/build/**'],
|
|
85
322
|
nodir: true,
|
|
86
323
|
});
|
|
324
|
+
const parser = new Parser();
|
|
87
325
|
for (const rel of sourceFiles) {
|
|
326
|
+
const plugin = getPluginForFile(rel);
|
|
327
|
+
if (!plugin)
|
|
328
|
+
continue;
|
|
88
329
|
const content = readSafe(repoPath, rel);
|
|
89
330
|
if (!content)
|
|
90
331
|
continue;
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
else if (ext === '.java') {
|
|
97
|
-
out.push(...this.scanJavaProviders(content, rel));
|
|
98
|
-
out.push(...this.scanJavaConsumers(content, rel));
|
|
332
|
+
let detections = [];
|
|
333
|
+
try {
|
|
334
|
+
parser.setLanguage(plugin.language);
|
|
335
|
+
const tree = parser.parse(content);
|
|
336
|
+
detections = plugin.scan(tree);
|
|
99
337
|
}
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
out.push(...this.scanPythonConsumers(content, rel));
|
|
338
|
+
catch {
|
|
339
|
+
continue;
|
|
103
340
|
}
|
|
104
|
-
|
|
105
|
-
out.push(
|
|
341
|
+
for (const d of detections) {
|
|
342
|
+
out.push(this.detectionToContract(d, rel, protoMap));
|
|
106
343
|
}
|
|
107
344
|
}
|
|
108
345
|
return this.dedupe(out);
|
|
109
346
|
}
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
let m;
|
|
135
|
-
while ((m = registerRe.exec(content)) !== null) {
|
|
136
|
-
const serviceName = m[1];
|
|
137
|
-
out.push(makeContract(serviceOnlyContractId(serviceName), 'provider', filePath, `Register${serviceName}Server`, 0.8, { service: serviceName, source: 'go_register' }));
|
|
138
|
-
}
|
|
139
|
-
// pb.UnimplementedXxxServer
|
|
140
|
-
const unimplRe = /\w+\.Unimplemented(\w+)Server\b/g;
|
|
141
|
-
while ((m = unimplRe.exec(content)) !== null) {
|
|
142
|
-
const serviceName = m[1];
|
|
143
|
-
out.push(makeContract(serviceOnlyContractId(serviceName), 'provider', filePath, `Unimplemented${serviceName}Server`, 0.8, { service: serviceName, source: 'go_unimplemented' }));
|
|
144
|
-
}
|
|
145
|
-
return out;
|
|
146
|
-
}
|
|
147
|
-
scanGoConsumers(content, filePath) {
|
|
148
|
-
const out = [];
|
|
149
|
-
const re = /\w+\.New(\w+)Client\s*\(/g;
|
|
150
|
-
let m;
|
|
151
|
-
while ((m = re.exec(content)) !== null) {
|
|
152
|
-
const serviceName = m[1];
|
|
153
|
-
out.push(makeContract(serviceOnlyContractId(serviceName), 'consumer', filePath, `New${serviceName}Client`, 0.7, { service: serviceName, source: 'go_client' }));
|
|
154
|
-
}
|
|
155
|
-
return out;
|
|
156
|
-
}
|
|
157
|
-
scanJavaProviders(content, filePath) {
|
|
158
|
-
const out = [];
|
|
159
|
-
// @GrpcService
|
|
160
|
-
if (content.includes('@GrpcService')) {
|
|
161
|
-
const implBaseRe = /extends\s+(\w+)Grpc\.(\w+)ImplBase/;
|
|
162
|
-
const m = content.match(implBaseRe);
|
|
163
|
-
if (m) {
|
|
164
|
-
out.push(makeContract(serviceOnlyContractId(m[1]), 'provider', filePath, m[2], 0.8, {
|
|
165
|
-
service: m[1],
|
|
166
|
-
source: 'java_grpc_service',
|
|
167
|
-
}));
|
|
168
|
-
}
|
|
169
|
-
else {
|
|
170
|
-
// Try extracting service name from class name
|
|
171
|
-
const classRe = /class\s+(\w*?)(?:Grpc)?(?:Service)?\s+extends\s+(\w+)(?:Grpc\.(\w+))?ImplBase/;
|
|
172
|
-
const cm = content.match(classRe);
|
|
173
|
-
if (cm) {
|
|
174
|
-
const svcName = cm[2].replace(/Grpc$/, '');
|
|
175
|
-
out.push(makeContract(serviceOnlyContractId(svcName), 'provider', filePath, cm[1], 0.8, {
|
|
176
|
-
service: svcName,
|
|
177
|
-
source: 'java_grpc_service',
|
|
178
|
-
}));
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
// extends XxxImplBase (without @GrpcService)
|
|
183
|
-
if (!content.includes('@GrpcService')) {
|
|
184
|
-
const implRe = /extends\s+(\w+?)(?:Grpc\.(\w+))?ImplBase/;
|
|
185
|
-
const m = content.match(implRe);
|
|
186
|
-
if (m) {
|
|
187
|
-
const svcName = m[2] || m[1].replace(/Grpc$/, '');
|
|
188
|
-
out.push(makeContract(serviceOnlyContractId(svcName), 'provider', filePath, svcName, 0.8, {
|
|
189
|
-
service: svcName,
|
|
190
|
-
source: 'java_impl_base',
|
|
191
|
-
}));
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
return out;
|
|
195
|
-
}
|
|
196
|
-
scanJavaConsumers(content, filePath) {
|
|
197
|
-
const out = [];
|
|
198
|
-
// XxxGrpc.newBlockingStub( or XxxGrpc.newStub(
|
|
199
|
-
const re = /(\w+)Grpc\.new(?:Blocking)?Stub\s*\(/g;
|
|
200
|
-
let m;
|
|
201
|
-
while ((m = re.exec(content)) !== null) {
|
|
202
|
-
const serviceName = m[1];
|
|
203
|
-
out.push(makeContract(serviceOnlyContractId(serviceName), 'consumer', filePath, `${serviceName}Stub`, 0.7, { service: serviceName, source: 'java_stub' }));
|
|
204
|
-
}
|
|
205
|
-
return out;
|
|
206
|
-
}
|
|
207
|
-
scanPythonProviders(content, filePath) {
|
|
208
|
-
const out = [];
|
|
209
|
-
// add_XxxServicer_to_server(
|
|
210
|
-
const re = /add_(\w+?)Servicer_to_server\s*\(/g;
|
|
211
|
-
let m;
|
|
212
|
-
while ((m = re.exec(content)) !== null) {
|
|
213
|
-
const serviceName = m[1];
|
|
214
|
-
out.push(makeContract(serviceOnlyContractId(serviceName), 'provider', filePath, `add_${serviceName}Servicer_to_server`, 0.8, { service: serviceName, source: 'python_servicer' }));
|
|
215
|
-
}
|
|
216
|
-
return out;
|
|
217
|
-
}
|
|
218
|
-
scanPythonConsumers(content, filePath) {
|
|
219
|
-
const out = [];
|
|
220
|
-
// XxxStub(
|
|
221
|
-
const re = /(\w+)Stub\s*\(/g;
|
|
222
|
-
let m;
|
|
223
|
-
while ((m = re.exec(content)) !== null) {
|
|
224
|
-
const name = m[1];
|
|
225
|
-
// Filter out common false positives
|
|
226
|
-
if (['Mock', 'Test', 'Fake', 'Stub'].includes(name))
|
|
227
|
-
continue;
|
|
228
|
-
out.push(makeContract(serviceOnlyContractId(name), 'consumer', filePath, `${name}Stub`, 0.7, {
|
|
229
|
-
service: name,
|
|
230
|
-
source: 'python_stub',
|
|
231
|
-
}));
|
|
232
|
-
}
|
|
233
|
-
return out;
|
|
234
|
-
}
|
|
235
|
-
scanTsProviders(content, filePath) {
|
|
236
|
-
const out = [];
|
|
237
|
-
// @GrpcMethod('ServiceName', 'MethodName')
|
|
238
|
-
const re = /@GrpcMethod\s*\(\s*['"](\w+)['"]\s*,\s*['"](\w+)['"]\s*\)/g;
|
|
239
|
-
let m;
|
|
240
|
-
while ((m = re.exec(content)) !== null) {
|
|
241
|
-
const serviceName = m[1];
|
|
242
|
-
const methodName = m[2];
|
|
243
|
-
const cid = contractId('', serviceName, methodName);
|
|
244
|
-
out.push(makeContract(cid, 'provider', filePath, `${serviceName}.${methodName}`, 0.8, {
|
|
245
|
-
service: serviceName,
|
|
246
|
-
method: methodName,
|
|
247
|
-
source: 'ts_grpc_method',
|
|
248
|
-
}));
|
|
249
|
-
}
|
|
250
|
-
return out;
|
|
347
|
+
/**
|
|
348
|
+
* Convert a plugin `GrpcDetection` into a concrete `ExtractedContract`
|
|
349
|
+
* by resolving the short service name against the proto map, building
|
|
350
|
+
* either a service-level (`grpc::pkg.Svc/*`) or method-level
|
|
351
|
+
* (`grpc::pkg.Svc/Method`) contract id, and selecting confidence
|
|
352
|
+
* based on whether the proto map had an entry.
|
|
353
|
+
*/
|
|
354
|
+
detectionToContract(d, filePath, protoMap) {
|
|
355
|
+
const candidates = protoMap.get(d.serviceName);
|
|
356
|
+
const proto = resolveProtoConflict(d.serviceName, filePath, candidates ?? []);
|
|
357
|
+
const pkg = proto?.package ?? '';
|
|
358
|
+
const cid = d.methodName
|
|
359
|
+
? contractId(pkg, d.serviceName, d.methodName)
|
|
360
|
+
: proto
|
|
361
|
+
? serviceContractId(pkg, d.serviceName)
|
|
362
|
+
: serviceOnlyContractId(d.serviceName);
|
|
363
|
+
const confidence = proto ? d.confidenceWithProto : d.confidenceWithoutProto;
|
|
364
|
+
const meta = {
|
|
365
|
+
service: d.serviceName,
|
|
366
|
+
source: d.source,
|
|
367
|
+
};
|
|
368
|
+
if (d.methodName)
|
|
369
|
+
meta.method = d.methodName;
|
|
370
|
+
return makeContract(cid, d.role, filePath, d.symbolName, confidence, meta);
|
|
251
371
|
}
|
|
252
372
|
dedupe(items) {
|
|
253
|
-
const
|
|
254
|
-
const out = [];
|
|
373
|
+
const byKey = new Map();
|
|
255
374
|
for (const c of items) {
|
|
256
375
|
const k = `${c.contractId}|${c.role}|${c.symbolRef.filePath}`;
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
376
|
+
const existing = byKey.get(k);
|
|
377
|
+
if (!existing ||
|
|
378
|
+
c.confidence > existing.confidence ||
|
|
379
|
+
(c.confidence === existing.confidence &&
|
|
380
|
+
String(c.meta.source) < String(existing.meta.source))) {
|
|
381
|
+
byKey.set(k, c);
|
|
382
|
+
}
|
|
261
383
|
}
|
|
262
|
-
return
|
|
384
|
+
return Array.from(byKey.values());
|
|
263
385
|
}
|
|
264
386
|
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import Go from 'tree-sitter-go';
|
|
2
|
+
import { compilePatterns, runCompiledPatterns, } from '../tree-sitter-scanner.js';
|
|
3
|
+
/**
|
|
4
|
+
* Go gRPC plugin. Detects:
|
|
5
|
+
* - Provider: `pb.RegisterXxxServer(...)` calls
|
|
6
|
+
* - Provider: `pb.UnimplementedXxxServer` embedded in a struct
|
|
7
|
+
* - Consumer: `pb.NewXxxClient(conn)` calls
|
|
8
|
+
*/
|
|
9
|
+
const REGISTER_RE = /^Register(\w+)Server$/;
|
|
10
|
+
const UNIMPLEMENTED_RE = /^Unimplemented(\w+)Server$/;
|
|
11
|
+
const NEW_CLIENT_RE = /^New(\w+)Client$/;
|
|
12
|
+
// Any `xxx.<fn>(...)` call — plugin filters the field identifier text.
|
|
13
|
+
const SELECTOR_CALL_PATTERNS = compilePatterns({
|
|
14
|
+
name: 'go-grpc-selector-call',
|
|
15
|
+
language: Go,
|
|
16
|
+
patterns: [
|
|
17
|
+
{
|
|
18
|
+
meta: {},
|
|
19
|
+
query: `
|
|
20
|
+
(call_expression
|
|
21
|
+
function: (selector_expression
|
|
22
|
+
field: (field_identifier) @fn))
|
|
23
|
+
`,
|
|
24
|
+
},
|
|
25
|
+
],
|
|
26
|
+
});
|
|
27
|
+
// Any `qualified_type` used as a struct field — for `pb.UnimplementedXxxServer`.
|
|
28
|
+
const STRUCT_EMBEDDING_PATTERNS = compilePatterns({
|
|
29
|
+
name: 'go-grpc-struct-embedding',
|
|
30
|
+
language: Go,
|
|
31
|
+
patterns: [
|
|
32
|
+
{
|
|
33
|
+
meta: {},
|
|
34
|
+
query: `
|
|
35
|
+
(struct_type
|
|
36
|
+
(field_declaration_list
|
|
37
|
+
(field_declaration
|
|
38
|
+
type: (qualified_type
|
|
39
|
+
name: (type_identifier) @field_type))))
|
|
40
|
+
`,
|
|
41
|
+
},
|
|
42
|
+
],
|
|
43
|
+
});
|
|
44
|
+
export const GO_GRPC_PLUGIN = {
|
|
45
|
+
name: 'go-grpc',
|
|
46
|
+
language: Go,
|
|
47
|
+
scan(tree) {
|
|
48
|
+
const out = [];
|
|
49
|
+
for (const match of runCompiledPatterns(SELECTOR_CALL_PATTERNS, tree)) {
|
|
50
|
+
const fnNode = match.captures.fn;
|
|
51
|
+
if (!fnNode)
|
|
52
|
+
continue;
|
|
53
|
+
const fnText = fnNode.text;
|
|
54
|
+
const registerMatch = REGISTER_RE.exec(fnText);
|
|
55
|
+
if (registerMatch) {
|
|
56
|
+
out.push({
|
|
57
|
+
role: 'provider',
|
|
58
|
+
serviceName: registerMatch[1],
|
|
59
|
+
symbolName: fnText,
|
|
60
|
+
source: 'go_register',
|
|
61
|
+
confidenceWithProto: 0.8,
|
|
62
|
+
confidenceWithoutProto: 0.65,
|
|
63
|
+
});
|
|
64
|
+
continue;
|
|
65
|
+
}
|
|
66
|
+
const newClientMatch = NEW_CLIENT_RE.exec(fnText);
|
|
67
|
+
if (newClientMatch) {
|
|
68
|
+
out.push({
|
|
69
|
+
role: 'consumer',
|
|
70
|
+
serviceName: newClientMatch[1],
|
|
71
|
+
symbolName: fnText,
|
|
72
|
+
source: 'go_client',
|
|
73
|
+
confidenceWithProto: 0.75,
|
|
74
|
+
confidenceWithoutProto: 0.55,
|
|
75
|
+
});
|
|
76
|
+
continue;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
for (const match of runCompiledPatterns(STRUCT_EMBEDDING_PATTERNS, tree)) {
|
|
80
|
+
const fieldNode = match.captures.field_type;
|
|
81
|
+
if (!fieldNode)
|
|
82
|
+
continue;
|
|
83
|
+
const unimpl = UNIMPLEMENTED_RE.exec(fieldNode.text);
|
|
84
|
+
if (!unimpl)
|
|
85
|
+
continue;
|
|
86
|
+
out.push({
|
|
87
|
+
role: 'provider',
|
|
88
|
+
serviceName: unimpl[1],
|
|
89
|
+
symbolName: fieldNode.text,
|
|
90
|
+
source: 'go_unimplemented',
|
|
91
|
+
confidenceWithProto: 0.8,
|
|
92
|
+
confidenceWithoutProto: 0.65,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
return out;
|
|
96
|
+
},
|
|
97
|
+
};
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { GrpcLanguagePlugin } from './types.js';
|
|
2
|
+
export type { GrpcDetection, GrpcLanguagePlugin, GrpcRole } from './types.js';
|
|
3
|
+
export { PROTO_GRPC_PLUGIN, extractPackageFromTree } from './proto.js';
|
|
4
|
+
/**
|
|
5
|
+
* Glob for source files worth scanning for gRPC server/client patterns.
|
|
6
|
+
* Includes `.proto` when the grammar is available.
|
|
7
|
+
*/
|
|
8
|
+
export declare const GRPC_SCAN_GLOB: string;
|
|
9
|
+
/**
|
|
10
|
+
* Whether the tree-sitter proto plugin is available. The orchestrator
|
|
11
|
+
* uses this to decide between the tree-sitter path and the fallback
|
|
12
|
+
* manual parser for `.proto` files.
|
|
13
|
+
*/
|
|
14
|
+
export declare const hasProtoPlugin: boolean;
|
|
15
|
+
/**
|
|
16
|
+
* Return the gRPC plugin registered for the given file's extension,
|
|
17
|
+
* or `undefined` if the extension is not registered.
|
|
18
|
+
*/
|
|
19
|
+
export declare function getPluginForFile(rel: string): GrpcLanguagePlugin | undefined;
|