gitnexus 1.6.4-rc.71 → 1.6.4-rc.73

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,12 @@
1
1
  import { createRequire } from 'node:module';
2
2
  const _require = createRequire(import.meta.url);
3
3
  const yaml = _require('js-yaml');
4
- const VALID_CONTRACT_TYPES = ['http', 'grpc', 'topic', 'lib', 'custom'];
4
+ const VALID_CONTRACT_TYPES = ['http', 'grpc', 'thrift', 'topic', 'lib', 'custom'];
5
5
  const VALID_ROLES = ['provider', 'consumer'];
6
6
  const DEFAULT_DETECT = {
7
7
  http: true,
8
8
  grpc: true,
9
+ thrift: true,
9
10
  topics: true,
10
11
  shared_libs: true,
11
12
  embedding_fallback: true,
@@ -136,7 +136,7 @@ export class ManifestExtractor {
136
136
  return null;
137
137
  // NOTE: All lookups use EXACT equality on the relevant name field and
138
138
  // deterministic ORDER BY before LIMIT 1. Previous versions used CONTAINS
139
- // for fuzzy matching (plus an unconditional ".proto" fallback for gRPC)
139
+ // for fuzzy matching (plus an unconditional IDL file fallback for gRPC)
140
140
  // which produced silent false positives: e.g. manifest "/orders" would
141
141
  // match "/suborders", and a gRPC manifest entry in a repo with any
142
142
  // .proto file would attach to a random proto symbol.
@@ -180,16 +180,20 @@ export class ManifestExtractor {
180
180
  ORDER BY n.filePath ASC
181
181
  LIMIT 1`, { contract: link.contract });
182
182
  }
183
- else if (link.type === 'grpc') {
183
+ else if (link.type === 'grpc' || link.type === 'thrift') {
184
184
  // Contract is "Service/Method" or just "Service" (or package.Service
185
185
  // variants). Prefer matching by method name when present, otherwise
186
- // by service name. NO .proto path fallback that's guaranteed to
187
- // return a wrong symbol in any repo with more than one proto file.
186
+ // by service name. Thrift generated Java classes often use
187
+ // package.Service in manifests while graph Class/Interface names are
188
+ // stored as bare Service, so strip the package prefix for thrift
189
+ // service-name lookups. NO IDL path fallback — that's guaranteed to
190
+ // return a wrong symbol in any repo with more than one IDL file.
188
191
  // Label filters scope lookups: methods → Function|Method, services
189
192
  // → Class|Interface (no label match = no silent wrong hits on
190
193
  // File/Variable nodes that happen to share the name).
191
194
  const parts = link.contract.split('/');
192
- const serviceName = parts[0]?.trim() ?? '';
195
+ const rawServiceName = parts[0]?.trim() ?? '';
196
+ const serviceName = link.type === 'thrift' ? (rawServiceName.split('.').pop() ?? '') : rawServiceName;
193
197
  const methodName = parts[1]?.trim() ?? '';
194
198
  if (methodName) {
195
199
  rows = await executor(`MATCH (n:Function|Method) WHERE n.name = $methodName
@@ -290,6 +294,8 @@ export class ManifestExtractor {
290
294
  }
291
295
  case 'grpc':
292
296
  return `grpc::${contract}`;
297
+ case 'thrift':
298
+ return `thrift::${contract}`;
293
299
  case 'topic':
294
300
  return `topic::${contract}`;
295
301
  case 'lib':
@@ -0,0 +1,22 @@
1
+ import type { ContractExtractor, CypherExecutor } from '../contract-extractor.js';
2
+ import type { ExtractedContract, RepoHandle } from '../types.js';
3
+ export interface ThriftServiceInfo {
4
+ namespace: string;
5
+ serviceName: string;
6
+ methods: string[];
7
+ thriftPath: string;
8
+ }
9
+ export interface ThriftContext {
10
+ namespacesByThrift: Map<string, string>;
11
+ servicesByName: Map<string, ThriftServiceInfo[]>;
12
+ }
13
+ export declare function thriftMethodContractId(namespace: string, serviceName: string, methodName: string): string;
14
+ export declare function thriftServiceContractId(namespace: string, serviceName: string): string;
15
+ export declare function buildThriftContext(repoPath: string): Promise<ThriftContext>;
16
+ export declare class ThriftExtractor implements ContractExtractor {
17
+ type: "thrift";
18
+ canExtract(_repo: RepoHandle): Promise<boolean>;
19
+ extract(_dbExecutor: CypherExecutor | null, repoPath: string, _repo: RepoHandle): Promise<ExtractedContract[]>;
20
+ private detectionToContract;
21
+ private dedupe;
22
+ }
@@ -0,0 +1,274 @@
1
+ import { glob } from 'glob';
2
+ import Parser from 'tree-sitter';
3
+ import { readSafe } from './fs-utils.js';
4
+ import { getPluginForFile, THRIFT_SCAN_GLOB, } from './thrift-patterns/index.js';
5
+ function normalizeThriftPath(rel) {
6
+ return rel.replace(/\\/g, '/');
7
+ }
8
+ export function thriftMethodContractId(namespace, serviceName, methodName) {
9
+ const prefix = namespace ? `${namespace}.${serviceName}` : serviceName;
10
+ return `thrift::${prefix}/${methodName}`;
11
+ }
12
+ export function thriftServiceContractId(namespace, serviceName) {
13
+ const prefix = namespace ? `${namespace}.${serviceName}` : serviceName;
14
+ return `thrift::${prefix}/*`;
15
+ }
16
+ /**
17
+ * Replace Thrift comments and string literals with spaces while preserving
18
+ * newlines and character offsets. Service block scanning can then count braces
19
+ * without being confused by examples or comments inside the IDL.
20
+ */
21
+ function stripThriftCommentsAndStrings(content) {
22
+ const out = new Array(content.length);
23
+ let i = 0;
24
+ while (i < content.length) {
25
+ const ch = content[i];
26
+ const next = content[i + 1];
27
+ if (ch === '/' && next === '/') {
28
+ out[i] = ' ';
29
+ out[i + 1] = ' ';
30
+ i += 2;
31
+ while (i < content.length && content[i] !== '\n') {
32
+ out[i] = content[i] === '\r' ? '\r' : ' ';
33
+ i++;
34
+ }
35
+ continue;
36
+ }
37
+ if (ch === '#') {
38
+ out[i] = ' ';
39
+ i++;
40
+ while (i < content.length && content[i] !== '\n') {
41
+ out[i] = content[i] === '\r' ? '\r' : ' ';
42
+ i++;
43
+ }
44
+ continue;
45
+ }
46
+ if (ch === '/' && next === '*') {
47
+ out[i] = ' ';
48
+ out[i + 1] = ' ';
49
+ i += 2;
50
+ while (i < content.length) {
51
+ if (content[i] === '*' && content[i + 1] === '/') {
52
+ out[i] = ' ';
53
+ out[i + 1] = ' ';
54
+ i += 2;
55
+ break;
56
+ }
57
+ out[i] = content[i] === '\n' || content[i] === '\r' ? content[i] : ' ';
58
+ i++;
59
+ }
60
+ continue;
61
+ }
62
+ if (ch === '"' || ch === "'") {
63
+ const quote = ch;
64
+ out[i] = ' ';
65
+ i++;
66
+ while (i < content.length) {
67
+ const c = content[i];
68
+ if (c === '\\' && i + 1 < content.length) {
69
+ out[i] = ' ';
70
+ out[i + 1] = ' ';
71
+ i += 2;
72
+ continue;
73
+ }
74
+ if (c === quote) {
75
+ out[i] = ' ';
76
+ i++;
77
+ break;
78
+ }
79
+ out[i] = c === '\n' || c === '\r' ? c : ' ';
80
+ i++;
81
+ }
82
+ continue;
83
+ }
84
+ out[i] = ch;
85
+ i++;
86
+ }
87
+ return out.join('');
88
+ }
89
+ function extractNamespace(sanitizedContent) {
90
+ const namespaces = [];
91
+ const namespaceRe = /^\s*namespace\s+([A-Za-z_*][\w.*-]*)\s+([A-Za-z_][\w.]*)\s*$/gm;
92
+ let match;
93
+ while ((match = namespaceRe.exec(sanitizedContent)) !== null) {
94
+ namespaces.push({ language: match[1], namespace: match[2] });
95
+ }
96
+ return (namespaces.find((entry) => entry.language === 'java')?.namespace ??
97
+ namespaces[0]?.namespace ??
98
+ '');
99
+ }
100
+ function extractServiceBlocks(sanitizedContent) {
101
+ const results = [];
102
+ const headerRe = /service\s+([A-Za-z_]\w*)\s*(?:extends\s+[A-Za-z_][\w.]*)?\s*\{/g;
103
+ let headerMatch;
104
+ while ((headerMatch = headerRe.exec(sanitizedContent)) !== null) {
105
+ const serviceName = headerMatch[1];
106
+ const bodyStart = headerMatch.index + headerMatch[0].length;
107
+ let depth = 1;
108
+ let pos = bodyStart;
109
+ while (pos < sanitizedContent.length && depth > 0) {
110
+ const ch = sanitizedContent[pos];
111
+ if (ch === '{')
112
+ depth++;
113
+ else if (ch === '}')
114
+ depth--;
115
+ pos++;
116
+ }
117
+ if (depth !== 0)
118
+ continue;
119
+ results.push({
120
+ name: serviceName,
121
+ body: sanitizedContent.slice(bodyStart, pos - 1),
122
+ });
123
+ }
124
+ return results;
125
+ }
126
+ function extractMethods(sanitizedServiceBody) {
127
+ const methods = [];
128
+ const methodRe = /(?:^|[;,\n\r])\s*(?:oneway\s+)?[A-Za-z_][\w.]*(?:\s*<[^(){};]*>)?\s+([A-Za-z_]\w*)\s*\(/g;
129
+ let match;
130
+ while ((match = methodRe.exec(sanitizedServiceBody)) !== null) {
131
+ methods.push(match[1]);
132
+ }
133
+ return methods;
134
+ }
135
+ function thriftSourceScanSymbolUid(contractId, role, filePath, symbolName) {
136
+ const contractKey = contractId.startsWith('thrift::')
137
+ ? contractId.slice('thrift::'.length)
138
+ : contractId;
139
+ return ['source-scan::thrift', role, contractKey, normalizeThriftPath(filePath), symbolName].join('::');
140
+ }
141
+ function makeContract(cid, role, filePath, symbolName, confidence, meta) {
142
+ return {
143
+ contractId: cid,
144
+ type: 'thrift',
145
+ role,
146
+ symbolUid: thriftSourceScanSymbolUid(cid, role, filePath, symbolName),
147
+ symbolRef: { filePath: normalizeThriftPath(filePath), name: symbolName },
148
+ symbolName,
149
+ confidence,
150
+ meta: { ...meta, extractionStrategy: 'source_scan' },
151
+ };
152
+ }
153
+ export async function buildThriftContext(repoPath) {
154
+ const thriftFiles = await glob('**/*.thrift', {
155
+ cwd: repoPath,
156
+ absolute: false,
157
+ nodir: true,
158
+ ignore: ['**/node_modules/**', '**/.git/**', '**/vendor/**', '**/dist/**', '**/build/**'],
159
+ });
160
+ const namespacesByThrift = new Map();
161
+ const servicesByName = new Map();
162
+ for (const rel of thriftFiles) {
163
+ const thriftPath = normalizeThriftPath(rel);
164
+ const content = readSafe(repoPath, rel);
165
+ if (!content)
166
+ continue;
167
+ const sanitized = stripThriftCommentsAndStrings(content);
168
+ const namespace = extractNamespace(sanitized);
169
+ namespacesByThrift.set(thriftPath, namespace);
170
+ for (const block of extractServiceBlocks(sanitized)) {
171
+ const methods = extractMethods(block.body);
172
+ const info = {
173
+ namespace,
174
+ serviceName: block.name,
175
+ methods,
176
+ thriftPath,
177
+ };
178
+ const existing = servicesByName.get(block.name) ?? [];
179
+ existing.push(info);
180
+ servicesByName.set(block.name, existing);
181
+ }
182
+ }
183
+ return { namespacesByThrift, servicesByName };
184
+ }
185
+ export class ThriftExtractor {
186
+ type = 'thrift';
187
+ async canExtract(_repo) {
188
+ return true;
189
+ }
190
+ async extract(_dbExecutor, repoPath, _repo) {
191
+ const out = [];
192
+ const context = await buildThriftContext(repoPath);
193
+ for (const infos of context.servicesByName.values()) {
194
+ for (const info of infos) {
195
+ for (const methodName of info.methods) {
196
+ const symbolName = `${info.serviceName}.${methodName}`;
197
+ out.push(makeContract(thriftMethodContractId(info.namespace, info.serviceName, methodName), 'provider', info.thriftPath, symbolName, 0.85, {
198
+ namespace: info.namespace,
199
+ service: info.serviceName,
200
+ method: methodName,
201
+ source: 'thrift_idl',
202
+ }));
203
+ }
204
+ }
205
+ }
206
+ const sourceFiles = await glob(THRIFT_SCAN_GLOB, {
207
+ cwd: repoPath,
208
+ absolute: false,
209
+ nodir: true,
210
+ ignore: ['**/node_modules/**', '**/.git/**', '**/vendor/**', '**/dist/**', '**/build/**'],
211
+ });
212
+ const parser = new Parser();
213
+ for (const rel of sourceFiles) {
214
+ const plugin = getPluginForFile(rel);
215
+ if (!plugin)
216
+ continue;
217
+ const content = readSafe(repoPath, rel);
218
+ if (!content)
219
+ continue;
220
+ let detections = [];
221
+ try {
222
+ parser.setLanguage(plugin.language);
223
+ const tree = parser.parse(content);
224
+ detections = plugin.scan(tree);
225
+ }
226
+ catch {
227
+ continue;
228
+ }
229
+ for (const detection of detections) {
230
+ const contract = this.detectionToContract(detection, rel, context);
231
+ if (contract)
232
+ out.push(contract);
233
+ }
234
+ }
235
+ return this.dedupe(out);
236
+ }
237
+ detectionToContract(detection, filePath, context) {
238
+ const candidates = context.servicesByName.get(detection.serviceName) ?? [];
239
+ if (candidates.length > 1)
240
+ return null;
241
+ const info = candidates[0];
242
+ if (info) {
243
+ if (!info.methods.includes(detection.methodName))
244
+ return null;
245
+ return makeContract(thriftMethodContractId(info.namespace, info.serviceName, detection.methodName), detection.role, filePath, detection.symbolName, detection.confidenceWithIdl, {
246
+ namespace: info.namespace,
247
+ service: info.serviceName,
248
+ method: detection.methodName,
249
+ source: detection.source,
250
+ });
251
+ }
252
+ if (detection.role !== 'consumer' ||
253
+ !detection.methodName ||
254
+ !detection.usesGeneratedServiceMember) {
255
+ return null;
256
+ }
257
+ return makeContract(thriftMethodContractId('', detection.serviceName, detection.methodName), detection.role, filePath, detection.symbolName, detection.confidenceWithoutIdl, {
258
+ service: detection.serviceName,
259
+ method: detection.methodName,
260
+ source: 'java_thrift_consumer_weak',
261
+ });
262
+ }
263
+ dedupe(items) {
264
+ const byKey = new Map();
265
+ for (const c of items) {
266
+ const key = `${c.contractId}|${c.role}|${c.symbolRef.filePath}|${c.symbolName}`;
267
+ const existing = byKey.get(key);
268
+ if (!existing || c.confidence > existing.confidence) {
269
+ byKey.set(key, c);
270
+ }
271
+ }
272
+ return Array.from(byKey.values());
273
+ }
274
+ }
@@ -0,0 +1,4 @@
1
+ import type { ThriftLanguagePlugin } from './types.js';
2
+ export type { ThriftDetection, ThriftLanguagePlugin, ThriftRole } from './types.js';
3
+ export declare const THRIFT_SCAN_GLOB = "**/*.java";
4
+ export declare function getPluginForFile(rel: string): ThriftLanguagePlugin | undefined;
@@ -0,0 +1,10 @@
1
+ import * as path from 'node:path';
2
+ import { JAVA_THRIFT_PLUGIN } from './java.js';
3
+ const REGISTRY = {
4
+ '.java': JAVA_THRIFT_PLUGIN,
5
+ };
6
+ export const THRIFT_SCAN_GLOB = '**/*.java';
7
+ export function getPluginForFile(rel) {
8
+ const ext = path.extname(rel).toLowerCase();
9
+ return REGISTRY[ext];
10
+ }
@@ -0,0 +1,2 @@
1
+ import type { ThriftLanguagePlugin } from './types.js';
2
+ export declare const JAVA_THRIFT_PLUGIN: ThriftLanguagePlugin;
@@ -0,0 +1,220 @@
1
+ import Java from 'tree-sitter-java';
2
+ import { compilePatterns, runCompiledPatterns, } from '../tree-sitter-scanner.js';
3
+ const GENERATED_MEMBER_TYPES = new Set(['Iface', 'Client']);
4
+ const SERVICE_TYPE_RE = /^[A-Z][A-Za-z0-9]*(?:Service|Management)$/;
5
+ const VARIABLE_PATTERNS = compilePatterns({
6
+ name: 'java-thrift-variables',
7
+ language: Java,
8
+ patterns: [
9
+ {
10
+ meta: {},
11
+ query: `
12
+ (field_declaration
13
+ type: (_) @type
14
+ declarator: (variable_declarator
15
+ name: (identifier) @var))
16
+ `,
17
+ },
18
+ {
19
+ meta: {},
20
+ query: `
21
+ (local_variable_declaration
22
+ type: (_) @type
23
+ declarator: (variable_declarator
24
+ name: (identifier) @var))
25
+ `,
26
+ },
27
+ {
28
+ meta: {},
29
+ query: `
30
+ (formal_parameter
31
+ type: (_) @type
32
+ name: (identifier) @var)
33
+ `,
34
+ },
35
+ ],
36
+ });
37
+ const CALL_PATTERNS = compilePatterns({
38
+ name: 'java-thrift-method-calls',
39
+ language: Java,
40
+ patterns: [
41
+ {
42
+ meta: {},
43
+ query: `
44
+ (method_invocation
45
+ object: (identifier) @receiver
46
+ name: (identifier) @method)
47
+ `,
48
+ },
49
+ {
50
+ meta: {},
51
+ query: `
52
+ (method_invocation
53
+ object: (field_access
54
+ object: (this)
55
+ field: (identifier) @receiver)
56
+ name: (identifier) @method)
57
+ `,
58
+ },
59
+ ],
60
+ });
61
+ const PROVIDER_PATTERNS = compilePatterns({
62
+ name: 'java-thrift-providers',
63
+ language: Java,
64
+ patterns: [
65
+ {
66
+ meta: {},
67
+ query: `
68
+ (class_declaration
69
+ name: (identifier) @class_name
70
+ (super_interfaces
71
+ (type_list
72
+ (_) @type))
73
+ body: (class_body) @body) @class
74
+ `,
75
+ },
76
+ ],
77
+ });
78
+ function serviceFromType(typeText) {
79
+ const segments = typeText.split('.').filter((segment) => segment.length > 0);
80
+ const last = segments.at(-1);
81
+ const service = segments.at(-2);
82
+ if (last && service && GENERATED_MEMBER_TYPES.has(last)) {
83
+ return { serviceName: service, usesGeneratedServiceMember: true };
84
+ }
85
+ return last && SERVICE_TYPE_RE.test(last)
86
+ ? { serviceName: last, usesGeneratedServiceMember: false }
87
+ : null;
88
+ }
89
+ function methodNamesInClassBody(body) {
90
+ const names = [];
91
+ for (let i = 0; i < body.namedChildCount; i++) {
92
+ const child = body.namedChild(i);
93
+ if (!child || child.type !== 'method_declaration')
94
+ continue;
95
+ const name = child.childForFieldName('name');
96
+ if (name?.text)
97
+ names.push(name.text);
98
+ }
99
+ return names;
100
+ }
101
+ function nearestAncestor(node, types) {
102
+ let current = node;
103
+ while (current) {
104
+ if (types.has(current.type))
105
+ return current;
106
+ current = current.parent;
107
+ }
108
+ return null;
109
+ }
110
+ function bindingScope(varNode) {
111
+ const declaration = nearestAncestor(varNode, new Set(['field_declaration', 'local_variable_declaration', 'formal_parameter']));
112
+ if (!declaration)
113
+ return null;
114
+ if (declaration.type === 'field_declaration') {
115
+ const classBody = nearestAncestor(declaration, new Set(['class_body']));
116
+ if (!classBody)
117
+ return null;
118
+ return { scope: classBody, declarationEnd: 0 };
119
+ }
120
+ if (declaration.type === 'formal_parameter') {
121
+ const callable = nearestAncestor(declaration, new Set(['method_declaration', 'constructor_declaration']));
122
+ if (!callable)
123
+ return null;
124
+ return { scope: callable, declarationEnd: 0 };
125
+ }
126
+ const block = nearestAncestor(declaration, new Set(['block']));
127
+ if (!block)
128
+ return null;
129
+ return { scope: block, declarationEnd: declaration.endIndex };
130
+ }
131
+ function resolveServiceForReceiver(bindings, receiver, callNode) {
132
+ const callStart = callNode.startIndex;
133
+ const candidates = bindings.filter((binding) => binding.name === receiver &&
134
+ binding.scopeStart <= callStart &&
135
+ callStart <= binding.scopeEnd &&
136
+ binding.declarationEnd <= callStart);
137
+ candidates.sort((a, b) => {
138
+ if (a.scopeSize !== b.scopeSize)
139
+ return a.scopeSize - b.scopeSize;
140
+ return b.declarationEnd - a.declarationEnd;
141
+ });
142
+ return candidates[0] ?? null;
143
+ }
144
+ export const JAVA_THRIFT_PLUGIN = {
145
+ name: 'java-thrift',
146
+ language: Java,
147
+ scan(tree) {
148
+ const out = [];
149
+ const bindings = [];
150
+ for (const match of runCompiledPatterns(VARIABLE_PATTERNS, tree)) {
151
+ const typeNode = match.captures.type;
152
+ const varNode = match.captures.var;
153
+ if (!typeNode || !varNode)
154
+ continue;
155
+ const service = serviceFromType(typeNode.text);
156
+ if (!service)
157
+ continue;
158
+ const scope = bindingScope(varNode);
159
+ if (!scope)
160
+ continue;
161
+ bindings.push({
162
+ name: varNode.text,
163
+ serviceName: service.serviceName,
164
+ usesGeneratedServiceMember: service.usesGeneratedServiceMember,
165
+ scopeStart: scope.scope.startIndex,
166
+ scopeEnd: scope.scope.endIndex,
167
+ declarationEnd: scope.declarationEnd,
168
+ scopeSize: scope.scope.endIndex - scope.scope.startIndex,
169
+ });
170
+ }
171
+ for (const match of runCompiledPatterns(CALL_PATTERNS, tree)) {
172
+ const receiver = match.captures.receiver?.text;
173
+ const methodName = match.captures.method?.text;
174
+ const callNode = match.captures.receiver?.parent;
175
+ if (!receiver || !methodName)
176
+ continue;
177
+ if (!callNode)
178
+ continue;
179
+ const binding = resolveServiceForReceiver(bindings, receiver, callNode);
180
+ if (!binding)
181
+ continue;
182
+ out.push({
183
+ role: 'consumer',
184
+ serviceName: binding.serviceName,
185
+ methodName,
186
+ symbolName: `${receiver}.${methodName}`,
187
+ source: 'java_thrift_consumer',
188
+ confidenceWithIdl: 0.75,
189
+ confidenceWithoutIdl: 0.45,
190
+ usesGeneratedServiceMember: binding.usesGeneratedServiceMember,
191
+ });
192
+ }
193
+ const emittedProviders = new Set();
194
+ for (const match of runCompiledPatterns(PROVIDER_PATTERNS, tree)) {
195
+ const typeNode = match.captures.type;
196
+ const bodyNode = match.captures.body;
197
+ if (!typeNode || !bodyNode)
198
+ continue;
199
+ const service = serviceFromType(typeNode.text);
200
+ if (!service)
201
+ continue;
202
+ for (const methodName of methodNamesInClassBody(bodyNode)) {
203
+ const key = `${service.serviceName}.${methodName}`;
204
+ if (emittedProviders.has(key))
205
+ continue;
206
+ emittedProviders.add(key);
207
+ out.push({
208
+ role: 'provider',
209
+ serviceName: service.serviceName,
210
+ methodName,
211
+ symbolName: `${service.serviceName}.${methodName}`,
212
+ source: 'java_thrift_provider',
213
+ confidenceWithIdl: 0.8,
214
+ confidenceWithoutIdl: 0,
215
+ });
216
+ }
217
+ }
218
+ return out;
219
+ },
220
+ };
@@ -0,0 +1,17 @@
1
+ import type Parser from 'tree-sitter';
2
+ export type ThriftRole = 'provider' | 'consumer';
3
+ export interface ThriftDetection {
4
+ role: ThriftRole;
5
+ serviceName: string;
6
+ methodName: string;
7
+ symbolName: string;
8
+ source: string;
9
+ confidenceWithIdl: number;
10
+ confidenceWithoutIdl: number;
11
+ usesGeneratedServiceMember?: boolean;
12
+ }
13
+ export interface ThriftLanguagePlugin {
14
+ name: string;
15
+ language: unknown;
16
+ scan(tree: Parser.Tree): ThriftDetection[];
17
+ }
@@ -1,5 +1,5 @@
1
- function isGrpcWildcard(cid) {
2
- return cid.startsWith('grpc::') && cid.endsWith('/*');
1
+ function isServiceWildcard(cid) {
2
+ return (cid.startsWith('grpc::') || cid.startsWith('thrift::')) && cid.endsWith('/*');
3
3
  }
4
4
  /**
5
5
  * Detect HTTP contracts that are too generic or infrastructure-level to
@@ -55,8 +55,9 @@ export function normalizeContractId(id) {
55
55
  }
56
56
  return id;
57
57
  }
58
- case 'grpc': {
59
- // Canonical form: `grpc::<lowercased-package-or-service>[/<method>]`.
58
+ case 'grpc':
59
+ case 'thrift': {
60
+ // Canonical form: `<type>::<lowercased-package-or-service>[/<method>]`.
60
61
  //
61
62
  // The package/service segment is lowercased because gRPC package
62
63
  // names are effectively case-insensitive across language bindings
@@ -70,22 +71,23 @@ export function normalizeContractId(id) {
70
71
  // as DISTINCT canonical forms: `grpc::userservice` does not match
71
72
  // `grpc::userservice/Login`. That's by design — callers that want
72
73
  // service-level manifest matching against method-level providers
73
- // should use the gRPC wildcard form `grpc::UserService/*` which is
74
+ // should use the service wildcard form `grpc::UserService/*` or
75
+ // `thrift::UserService/*` which is
74
76
  // handled by runWildcardMatch below.
75
77
  const slashIdx = rest.indexOf('/');
76
78
  if (slashIdx > 0) {
77
79
  const pkg = rest.substring(0, slashIdx).toLowerCase();
78
80
  const method = rest.substring(slashIdx);
79
- return `grpc::${pkg}${method}`;
81
+ return `${type}::${pkg}${method}`;
80
82
  }
81
83
  if (slashIdx === 0) {
82
84
  // Malformed "/method" with leading slash — keep as-is so two
83
85
  // equally malformed ids can still match each other.
84
- return `grpc::${rest}`;
86
+ return `${type}::${rest}`;
85
87
  }
86
88
  // No slash: package/service only. Lowercase to match the package
87
89
  // segment produced by the pkg/method branch above.
88
- return `grpc::${rest.toLowerCase()}`;
90
+ return `${type}::${rest.toLowerCase()}`;
89
91
  }
90
92
  case 'topic':
91
93
  return `topic::${rest.trim().toLowerCase()}`;
@@ -109,6 +111,34 @@ function findMatchingKeys(contractId, index) {
109
111
  }
110
112
  return matches;
111
113
  }
114
+ if (normalized.startsWith('thrift::')) {
115
+ const rest = normalized.substring('thrift::'.length);
116
+ const slashIdx = rest.indexOf('/');
117
+ if (slashIdx > 0) {
118
+ const service = rest.substring(0, slashIdx);
119
+ const method = rest.substring(slashIdx + 1);
120
+ if (!service.includes('.') && method && method !== '*') {
121
+ const matches = [];
122
+ for (const key of index.keys()) {
123
+ if (!key.startsWith('thrift::') || key.endsWith('/*'))
124
+ continue;
125
+ const providerRest = key.substring('thrift::'.length);
126
+ const providerSlashIdx = providerRest.indexOf('/');
127
+ if (providerSlashIdx < 0)
128
+ continue;
129
+ const providerService = providerRest.substring(0, providerSlashIdx);
130
+ const providerMethod = providerRest.substring(providerSlashIdx + 1);
131
+ if (providerMethod !== method)
132
+ continue;
133
+ if (providerService === service || providerService.endsWith('.' + service)) {
134
+ matches.push(key);
135
+ }
136
+ }
137
+ matches.sort();
138
+ return matches.length === 1 ? matches : [];
139
+ }
140
+ }
141
+ }
112
142
  return [];
113
143
  }
114
144
  export function buildProviderIndex(contracts, matchingConfig) {
@@ -126,7 +156,8 @@ export function buildProviderIndex(contracts, matchingConfig) {
126
156
  export function runExactMatch(contracts, providerIndex, matchingConfig) {
127
157
  const isNoisy = buildNoisyContractFilter(matchingConfig);
128
158
  const index = providerIndex ?? buildProviderIndex(contracts, matchingConfig);
129
- const consumers = contracts.filter((c) => c.role === 'consumer' && !isGrpcWildcard(c.contractId) && !isNoisy(c.contractId));
159
+ // Skip service wildcard consumers they go to wildcard pass only
160
+ const consumers = contracts.filter((c) => c.role === 'consumer' && !isServiceWildcard(c.contractId) && !isNoisy(c.contractId));
130
161
  const matched = [];
131
162
  const matchedConsumerIds = new Set();
132
163
  const matchedProviderIds = new Set();
@@ -165,32 +196,40 @@ export function runExactMatch(contracts, providerIndex, matchingConfig) {
165
196
  }
166
197
  // normalUnmatched: contracts that weren't matched in exact pass
167
198
  const normalUnmatched = contracts.filter((c) => {
168
- if (isGrpcWildcard(c.contractId))
199
+ if (isServiceWildcard(c.contractId))
169
200
  return false; // excluded from exact, handled separately
170
201
  if (isNoisy(c.contractId))
171
202
  return false; // excluded from matching — don't surface as unmatched
172
203
  const id = `${c.repo}::${c.contractId}`;
173
204
  return c.role === 'provider' ? !matchedProviderIds.has(id) : !matchedConsumerIds.has(id);
174
205
  });
175
- // Re-add gRPC wildcard contracts — they were never in exact matching
176
- const grpcWildcards = contracts.filter((c) => isGrpcWildcard(c.contractId));
177
- const unmatched = [...normalUnmatched, ...grpcWildcards];
206
+ // Re-add service wildcard contracts — they were never in exact matching
207
+ const serviceWildcards = contracts.filter((c) => isServiceWildcard(c.contractId));
208
+ const unmatched = [...normalUnmatched, ...serviceWildcards];
178
209
  return { matched, unmatched };
179
210
  }
180
211
  export function runWildcardMatch(unmatched, providerIndex) {
181
- const wildcardConsumers = unmatched.filter((c) => c.role === 'consumer' && isGrpcWildcard(c.contractId));
212
+ const wildcardConsumers = unmatched.filter((c) => c.role === 'consumer' && isServiceWildcard(c.contractId));
182
213
  const matched = [];
183
214
  const matchedConsumerIds = new Set();
184
215
  for (const consumer of wildcardConsumers) {
185
216
  const normalized = normalizeContractId(consumer.contractId);
217
+ const typeEnd = normalized.indexOf('::');
218
+ const consumerType = normalized.slice(0, typeEnd);
186
219
  // "grpc::com.example.userservice/*" → "com.example.userservice"
187
- // "grpc::userservice/*" → "userservice"
188
- const fqService = normalized.slice(normalized.indexOf('::') + 2, -2); // strip "grpc::" and "/*"
220
+ // "thrift::userservice/*" → "userservice"
221
+ const fqService = normalized.slice(typeEnd + 2, -2); // strip "<type>::" and "/*"
222
+ const candidateProviders = [];
223
+ const matchedProviderServices = new Set();
189
224
  for (const [key, providers] of providerIndex) {
190
- // Only match against non-wildcard gRPC providers (method-level IDs)
191
- if (!key.startsWith('grpc::') || key.endsWith('/*'))
225
+ // Only match against non-wildcard same-type providers (method-level IDs).
226
+ const keyTypeEnd = key.indexOf('::');
227
+ if (keyTypeEnd < 0 || key.endsWith('/*'))
228
+ continue;
229
+ const providerType = key.slice(0, keyTypeEnd);
230
+ if (providerType !== consumerType)
192
231
  continue;
193
- const afterPrefix = key.slice(6); // strip "grpc::"
232
+ const afterPrefix = key.slice(keyTypeEnd + 2); // strip "<type>::"
194
233
  const slashIdx = afterPrefix.indexOf('/');
195
234
  if (slashIdx < 0)
196
235
  continue;
@@ -200,37 +239,42 @@ export function runWildcardMatch(unmatched, providerIndex) {
200
239
  (!fqService.includes('.') && providerFqService.endsWith('.' + fqService));
201
240
  if (!isMatch)
202
241
  continue;
203
- for (const provider of providers) {
204
- // Skip same-repo same-service (same logic as runExactMatch)
205
- if (provider.repo === consumer.repo) {
206
- if (!provider.service || !consumer.service || provider.service === consumer.service) {
207
- continue;
208
- }
242
+ matchedProviderServices.add(providerFqService);
243
+ candidateProviders.push(...providers);
244
+ }
245
+ if (consumerType === 'thrift' && !fqService.includes('.') && matchedProviderServices.size > 1) {
246
+ continue;
247
+ }
248
+ for (const provider of candidateProviders) {
249
+ // Skip same-repo same-service (same logic as runExactMatch)
250
+ if (provider.repo === consumer.repo) {
251
+ if (!provider.service || !consumer.service || provider.service === consumer.service) {
252
+ continue;
209
253
  }
210
- matched.push({
211
- from: {
212
- repo: consumer.repo,
213
- service: consumer.service,
214
- symbolUid: consumer.symbolUid,
215
- symbolRef: consumer.symbolRef,
216
- },
217
- to: {
218
- repo: provider.repo,
219
- service: provider.service,
220
- symbolUid: provider.symbolUid,
221
- symbolRef: provider.symbolRef,
222
- },
223
- type: consumer.type,
224
- contractId: consumer.contractId, // consumer's wildcard ID
225
- matchType: 'wildcard',
226
- confidence: Math.min(provider.confidence, consumer.confidence),
227
- });
228
- matchedConsumerIds.add(`${consumer.repo}::${consumer.contractId}`);
229
254
  }
255
+ matched.push({
256
+ from: {
257
+ repo: consumer.repo,
258
+ service: consumer.service,
259
+ symbolUid: consumer.symbolUid,
260
+ symbolRef: consumer.symbolRef,
261
+ },
262
+ to: {
263
+ repo: provider.repo,
264
+ service: provider.service,
265
+ symbolUid: provider.symbolUid,
266
+ symbolRef: provider.symbolRef,
267
+ },
268
+ type: consumer.type,
269
+ contractId: consumer.contractId, // consumer's wildcard ID
270
+ matchType: 'wildcard',
271
+ confidence: Math.min(provider.confidence, consumer.confidence),
272
+ });
273
+ matchedConsumerIds.add(`${consumer.repo}::${consumer.contractId}`);
230
274
  }
231
275
  }
232
276
  const remaining = unmatched.filter((c) => {
233
- if (c.role !== 'consumer' || !isGrpcWildcard(c.contractId))
277
+ if (c.role !== 'consumer' || !isServiceWildcard(c.contractId))
234
278
  return true;
235
279
  return !matchedConsumerIds.has(`${c.repo}::${c.contractId}`);
236
280
  });
@@ -5,10 +5,11 @@ import { initLbug, closeLbug, executeParameterized } from '../lbug/pool-adapter.
5
5
  import { readRegistry } from '../../storage/repo-manager.js';
6
6
  import { HttpRouteExtractor } from './extractors/http-route-extractor.js';
7
7
  import { GrpcExtractor } from './extractors/grpc-extractor.js';
8
+ import { ThriftExtractor } from './extractors/thrift-extractor.js';
8
9
  import { TopicExtractor } from './extractors/topic-extractor.js';
9
10
  import { ManifestExtractor } from './extractors/manifest-extractor.js';
10
11
  import { discoverWorkspaceLinks } from './extractors/workspace-extractor.js';
11
- import { runExactMatch } from './matching.js';
12
+ import { buildProviderIndex, runExactMatch, runWildcardMatch } from './matching.js';
12
13
  import { detectServiceBoundaries, assignService } from './service-boundary-detector.js';
13
14
  import { writeContractRegistry } from './storage.js';
14
15
  export function stableRepoPoolId(entry, allEntries) {
@@ -70,6 +71,7 @@ export async function syncGroup(config, opts) {
70
71
  const resolve = opts?.resolveRepoHandle ?? defaultResolveHandle(entries);
71
72
  const httpEx = new HttpRouteExtractor();
72
73
  const grpcEx = new GrpcExtractor();
74
+ const thriftEx = new ThriftExtractor();
73
75
  const topicEx = new TopicExtractor();
74
76
  dbExecutors = new Map();
75
77
  const openPoolIds = [];
@@ -108,6 +110,16 @@ export async function syncGroup(config, opts) {
108
110
  });
109
111
  }
110
112
  }
113
+ if (config.detect.thrift) {
114
+ const extracted = await thriftEx.extract(executor, handle.repoPath, handle);
115
+ for (const c of extracted) {
116
+ autoContracts.push({
117
+ ...c,
118
+ repo: groupPath,
119
+ service: assignService(c.symbolRef.filePath, boundaries),
120
+ });
121
+ }
122
+ }
111
123
  if (config.detect.topics) {
112
124
  const extracted = await topicEx.extract(executor, handle.repoPath, handle);
113
125
  for (const c of extracted) {
@@ -191,12 +203,14 @@ export async function syncGroup(config, opts) {
191
203
  console.log(` manifest: ${manifestCrossLinks.length} cross-links from ${allLinks.length} links (${config.links.length} declared + ${allLinks.length - config.links.length} discovered)`);
192
204
  }
193
205
  }
194
- const { matched, unmatched } = runExactMatch(autoContracts, undefined, config.matching);
206
+ const providerIndex = buildProviderIndex(autoContracts, config.matching);
207
+ const { matched, unmatched } = runExactMatch(autoContracts, providerIndex, config.matching);
208
+ const wildcard = runWildcardMatch(unmatched, providerIndex);
195
209
  // Dedupe cross-links. Manifest contracts participate in runExactMatch, so a
196
210
  // manifest-declared link can also emit a matchType:'exact' CrossLink with the
197
211
  // same endpoints. Prefer the manifest version — it reflects operator intent
198
212
  // and carries matchType:'manifest' which downstream consumers may rely on.
199
- const crossLinks = dedupeCrossLinks([...manifestCrossLinks, ...matched]);
213
+ const crossLinks = dedupeCrossLinks([...manifestCrossLinks, ...matched, ...wildcard.matched]);
200
214
  const allContracts = autoContracts;
201
215
  const registry = {
202
216
  version: 1,
@@ -212,7 +226,7 @@ export async function syncGroup(config, opts) {
212
226
  return {
213
227
  contracts: allContracts,
214
228
  crossLinks,
215
- unmatched,
229
+ unmatched: wildcard.remaining,
216
230
  missingRepos,
217
231
  repoSnapshots,
218
232
  };
@@ -1,4 +1,4 @@
1
- export type ContractType = 'http' | 'grpc' | 'topic' | 'lib' | 'custom';
1
+ export type ContractType = 'http' | 'grpc' | 'thrift' | 'topic' | 'lib' | 'custom';
2
2
  export type MatchType = 'exact' | 'manifest' | 'wildcard' | 'bm25' | 'embedding';
3
3
  export type ContractRole = 'provider' | 'consumer';
4
4
  export interface GroupConfig {
@@ -21,6 +21,7 @@ export interface GroupManifestLink {
21
21
  export interface DetectConfig {
22
22
  http: boolean;
23
23
  grpc: boolean;
24
+ thrift: boolean;
24
25
  topics: boolean;
25
26
  shared_libs: boolean;
26
27
  embedding_fallback: boolean;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.4-rc.71",
3
+ "version": "1.6.4-rc.73",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",