gitnexus 1.6.0 → 1.6.2-rc.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -0
- package/dist/cli/analyze.js +50 -3
- package/dist/core/group/extractors/fs-utils.d.ts +10 -0
- package/dist/core/group/extractors/fs-utils.js +24 -0
- package/dist/core/group/extractors/grpc-extractor.d.ts +17 -8
- package/dist/core/group/extractors/grpc-extractor.js +328 -191
- package/dist/core/group/extractors/grpc-patterns/go.d.ts +2 -0
- package/dist/core/group/extractors/grpc-patterns/go.js +97 -0
- package/dist/core/group/extractors/grpc-patterns/index.d.ts +19 -0
- package/dist/core/group/extractors/grpc-patterns/index.js +46 -0
- package/dist/core/group/extractors/grpc-patterns/java.d.ts +2 -0
- package/dist/core/group/extractors/grpc-patterns/java.js +173 -0
- package/dist/core/group/extractors/grpc-patterns/node.d.ts +4 -0
- package/dist/core/group/extractors/grpc-patterns/node.js +290 -0
- package/dist/core/group/extractors/grpc-patterns/proto.d.ts +9 -0
- package/dist/core/group/extractors/grpc-patterns/proto.js +134 -0
- package/dist/core/group/extractors/grpc-patterns/python.d.ts +2 -0
- package/dist/core/group/extractors/grpc-patterns/python.js +67 -0
- package/dist/core/group/extractors/grpc-patterns/types.d.ts +50 -0
- package/dist/core/group/extractors/grpc-patterns/types.js +1 -0
- package/dist/core/group/extractors/http-patterns/go.d.ts +2 -0
- package/dist/core/group/extractors/http-patterns/go.js +215 -0
- package/dist/core/group/extractors/http-patterns/index.d.ts +17 -0
- package/dist/core/group/extractors/http-patterns/index.js +44 -0
- package/dist/core/group/extractors/http-patterns/java.d.ts +2 -0
- package/dist/core/group/extractors/http-patterns/java.js +253 -0
- package/dist/core/group/extractors/http-patterns/node.d.ts +4 -0
- package/dist/core/group/extractors/http-patterns/node.js +354 -0
- package/dist/core/group/extractors/http-patterns/php.d.ts +2 -0
- package/dist/core/group/extractors/http-patterns/php.js +70 -0
- package/dist/core/group/extractors/http-patterns/python.d.ts +2 -0
- package/dist/core/group/extractors/http-patterns/python.js +133 -0
- package/dist/core/group/extractors/http-patterns/types.d.ts +61 -0
- package/dist/core/group/extractors/http-patterns/types.js +1 -0
- package/dist/core/group/extractors/http-route-extractor.d.ts +10 -13
- package/dist/core/group/extractors/http-route-extractor.js +231 -238
- package/dist/core/group/extractors/manifest-extractor.d.ts +54 -0
- package/dist/core/group/extractors/manifest-extractor.js +277 -0
- package/dist/core/group/extractors/topic-extractor.d.ts +0 -1
- package/dist/core/group/extractors/topic-extractor.js +55 -192
- package/dist/core/group/extractors/topic-patterns/go.d.ts +2 -0
- package/dist/core/group/extractors/topic-patterns/go.js +120 -0
- package/dist/core/group/extractors/topic-patterns/index.d.ts +14 -0
- package/dist/core/group/extractors/topic-patterns/index.js +38 -0
- package/dist/core/group/extractors/topic-patterns/java.d.ts +2 -0
- package/dist/core/group/extractors/topic-patterns/java.js +80 -0
- package/dist/core/group/extractors/topic-patterns/node.d.ts +4 -0
- package/dist/core/group/extractors/topic-patterns/node.js +155 -0
- package/dist/core/group/extractors/topic-patterns/python.d.ts +2 -0
- package/dist/core/group/extractors/topic-patterns/python.js +116 -0
- package/dist/core/group/extractors/topic-patterns/types.d.ts +25 -0
- package/dist/core/group/extractors/topic-patterns/types.js +10 -0
- package/dist/core/group/extractors/tree-sitter-scanner.d.ts +113 -0
- package/dist/core/group/extractors/tree-sitter-scanner.js +94 -0
- package/dist/core/ingestion/binding-accumulator.d.ts +22 -17
- package/dist/core/ingestion/binding-accumulator.js +29 -25
- package/dist/core/ingestion/cobol-processor.d.ts +1 -1
- package/dist/core/ingestion/import-processor.js +1 -1
- package/dist/core/ingestion/language-config.js +1 -1
- package/dist/core/ingestion/language-provider.d.ts +32 -5
- package/dist/core/ingestion/languages/c-cpp.js +2 -2
- package/dist/core/ingestion/languages/dart.d.ts +1 -1
- package/dist/core/ingestion/languages/dart.js +2 -2
- package/dist/core/ingestion/languages/go.d.ts +1 -1
- package/dist/core/ingestion/languages/go.js +2 -2
- package/dist/core/ingestion/languages/ruby.js +16 -1
- package/dist/core/ingestion/languages/swift.d.ts +1 -1
- package/dist/core/ingestion/languages/swift.js +2 -2
- package/dist/core/ingestion/markdown-processor.d.ts +1 -1
- package/dist/core/ingestion/method-extractors/configs/jvm.js +1 -0
- package/dist/core/ingestion/method-extractors/configs/ruby.js +1 -0
- package/dist/core/ingestion/method-extractors/generic.d.ts +6 -0
- package/dist/core/ingestion/method-extractors/generic.js +48 -4
- package/dist/core/ingestion/method-types.d.ts +4 -0
- package/dist/core/ingestion/model/resolve.js +103 -48
- package/dist/core/ingestion/model/semantic-model.d.ts +1 -1
- package/dist/core/ingestion/model/semantic-model.js +1 -1
- package/dist/core/ingestion/model/symbol-table.d.ts +7 -7
- package/dist/core/ingestion/model/symbol-table.js +7 -7
- package/dist/core/ingestion/mro-processor.d.ts +1 -1
- package/dist/core/ingestion/mro-processor.js +1 -1
- package/dist/core/ingestion/parsing-processor.js +54 -42
- package/dist/core/ingestion/pipeline-phases/cobol.d.ts +16 -0
- package/dist/core/ingestion/pipeline-phases/cobol.js +45 -0
- package/dist/core/ingestion/pipeline-phases/communities.d.ts +16 -0
- package/dist/core/ingestion/pipeline-phases/communities.js +62 -0
- package/dist/core/ingestion/pipeline-phases/cross-file-impl.d.ts +17 -0
- package/dist/core/ingestion/pipeline-phases/cross-file-impl.js +156 -0
- package/dist/core/ingestion/pipeline-phases/cross-file.d.ts +37 -0
- package/dist/core/ingestion/pipeline-phases/cross-file.js +63 -0
- package/dist/core/ingestion/pipeline-phases/index.d.ts +21 -0
- package/dist/core/ingestion/pipeline-phases/index.js +22 -0
- package/dist/core/ingestion/pipeline-phases/markdown.d.ts +17 -0
- package/dist/core/ingestion/pipeline-phases/markdown.js +33 -0
- package/dist/core/ingestion/pipeline-phases/mro.d.ts +18 -0
- package/dist/core/ingestion/pipeline-phases/mro.js +36 -0
- package/dist/core/ingestion/pipeline-phases/orm-extraction.d.ts +22 -0
- package/dist/core/ingestion/pipeline-phases/orm-extraction.js +92 -0
- package/dist/core/ingestion/pipeline-phases/orm.d.ts +15 -0
- package/dist/core/ingestion/pipeline-phases/orm.js +74 -0
- package/dist/core/ingestion/pipeline-phases/parse-impl.d.ts +47 -0
- package/dist/core/ingestion/pipeline-phases/parse-impl.js +437 -0
- package/dist/core/ingestion/pipeline-phases/parse.d.ts +49 -0
- package/dist/core/ingestion/pipeline-phases/parse.js +33 -0
- package/dist/core/ingestion/pipeline-phases/processes.d.ts +16 -0
- package/dist/core/ingestion/pipeline-phases/processes.js +143 -0
- package/dist/core/ingestion/pipeline-phases/routes.d.ts +21 -0
- package/dist/core/ingestion/pipeline-phases/routes.js +243 -0
- package/dist/core/ingestion/pipeline-phases/runner.d.ts +22 -0
- package/dist/core/ingestion/pipeline-phases/runner.js +203 -0
- package/dist/core/ingestion/pipeline-phases/scan.d.ts +21 -0
- package/dist/core/ingestion/pipeline-phases/scan.js +46 -0
- package/dist/core/ingestion/pipeline-phases/structure.d.ts +27 -0
- package/dist/core/ingestion/pipeline-phases/structure.js +35 -0
- package/dist/core/ingestion/pipeline-phases/tools.d.ts +20 -0
- package/dist/core/ingestion/pipeline-phases/tools.js +79 -0
- package/dist/core/ingestion/pipeline-phases/types.d.ts +79 -0
- package/dist/core/ingestion/pipeline-phases/types.js +37 -0
- package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts +70 -0
- package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js +312 -0
- package/dist/core/ingestion/pipeline.d.ts +16 -10
- package/dist/core/ingestion/pipeline.js +66 -1534
- package/dist/core/ingestion/process-processor.js +1 -1
- package/dist/core/ingestion/tree-sitter-queries.d.ts +2 -2
- package/dist/core/ingestion/tree-sitter-queries.js +69 -0
- package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -3
- package/dist/core/ingestion/utils/ast-helpers.js +48 -21
- package/dist/core/ingestion/utils/env.d.ts +10 -0
- package/dist/core/ingestion/utils/env.js +10 -0
- package/dist/core/ingestion/utils/graph-sort.d.ts +58 -0
- package/dist/core/ingestion/utils/graph-sort.js +100 -0
- package/dist/core/ingestion/workers/parse-worker.js +12 -8
- package/dist/core/lbug/lbug-adapter.d.ts +28 -0
- package/dist/core/lbug/lbug-adapter.js +162 -57
- package/package.json +3 -3
- package/vendor/tree-sitter-proto/binding.gyp +30 -0
- package/vendor/tree-sitter-proto/bindings/node/binding.cc +20 -0
- package/vendor/tree-sitter-proto/bindings/node/index.d.ts +28 -0
- package/vendor/tree-sitter-proto/bindings/node/index.js +7 -0
- package/vendor/tree-sitter-proto/package.json +18 -0
- package/vendor/tree-sitter-proto/src/node-types.json +1145 -0
- package/vendor/tree-sitter-proto/src/parser.c +10149 -0
- package/vendor/tree-sitter-proto/src/tree_sitter/alloc.h +54 -0
- package/vendor/tree-sitter-proto/src/tree_sitter/array.h +291 -0
- package/vendor/tree-sitter-proto/src/tree_sitter/parser.h +266 -0
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Canonicalize an HTTP path for matching against Route.name in the graph.
|
|
3
|
+
* Mirrors core/ingestion/pipeline.ts ensureSlash semantics:
|
|
4
|
+
* - Ensures a leading slash.
|
|
5
|
+
* - Strips trailing slashes (except the root "/").
|
|
6
|
+
* - Normalizes consecutive slashes.
|
|
7
|
+
* - Does NOT lowercase (route matching is case-sensitive).
|
|
8
|
+
*/
|
|
9
|
+
function normalizeRoutePath(raw) {
|
|
10
|
+
const trimmed = raw.trim();
|
|
11
|
+
if (!trimmed)
|
|
12
|
+
return '/';
|
|
13
|
+
const withLeading = trimmed.startsWith('/') ? trimmed : `/${trimmed}`;
|
|
14
|
+
const collapsed = withLeading.replace(/\/+/g, '/');
|
|
15
|
+
if (collapsed === '/')
|
|
16
|
+
return '/';
|
|
17
|
+
return collapsed.replace(/\/+$/, '');
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Split a manifest HTTP contract into its optional `METHOD::` prefix and
|
|
21
|
+
* its path portion.
|
|
22
|
+
*
|
|
23
|
+
* `buildContractId` recommends the explicit-method form `GET::/api/orders`
|
|
24
|
+
* in group.yaml; if we hand that raw string to `normalizeRoutePath` we get
|
|
25
|
+
* `/GET::/api/orders`, which can never match `Route.name = "/api/orders"`
|
|
26
|
+
* in the graph. This helper extracts the path portion so the Cypher
|
|
27
|
+
* lookup uses the canonical route name.
|
|
28
|
+
*
|
|
29
|
+
* The method prefix regex mirrors `buildContractId` (line ~251) for
|
|
30
|
+
* symmetry: case-insensitive `[A-Za-z]+` followed by `::`. The captured
|
|
31
|
+
* method is upper-cased for downstream use; method-constrained matching
|
|
32
|
+
* against `HANDLES_ROUTE` is a future enhancement (not yet wired).
|
|
33
|
+
*
|
|
34
|
+
* Edge cases:
|
|
35
|
+
* - `"::/api/orders"` — empty method portion, no alpha prefix match, so
|
|
36
|
+
* the whole string is treated as a bare path (matches buildContractId
|
|
37
|
+
* which also requires `[A-Za-z]+`).
|
|
38
|
+
* - `"GET::"` — method with empty path, returns `{ method: 'GET', path: '' }`;
|
|
39
|
+
* `normalizeRoutePath('')` resolves to `/` for caller.
|
|
40
|
+
*/
|
|
41
|
+
function parseHttpContract(raw) {
|
|
42
|
+
const match = raw.match(/^([A-Za-z]+)::/);
|
|
43
|
+
if (!match)
|
|
44
|
+
return { method: null, path: raw };
|
|
45
|
+
return { method: match[1].toUpperCase(), path: raw.slice(match[0].length) };
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Stable synthetic symbolUid for a manifest-declared contract whose target
|
|
49
|
+
* symbol could not be resolved against the per-repo graph (resolveSymbol
|
|
50
|
+
* returned null). Two reasons we don't leave the uid empty:
|
|
51
|
+
*
|
|
52
|
+
* 1. The bridge stores Contract nodes keyed in part by symbolUid; an empty
|
|
53
|
+
* uid means downstream Cypher queries that anchor on `provider.symbolUid`
|
|
54
|
+
* can't tell two different unresolved manifest contracts apart.
|
|
55
|
+
* 2. The cross-impact bridge query in cross-impact.ts joins local impact
|
|
56
|
+
* results to bridge contracts via `WHERE provider.symbolUid IN $localUids`.
|
|
57
|
+
* If the local impact engine produces a deterministic identifier for the
|
|
58
|
+
* unresolved target, it must agree with the value the bridge stored. A
|
|
59
|
+
* synthetic uid keyed off (repo, contractId) is the only thing both sides
|
|
60
|
+
* can derive without knowing about each other.
|
|
61
|
+
*
|
|
62
|
+
* Format: `manifest::<repo>::<contractId>`. Stable across syncs, scoped to a
|
|
63
|
+
* single repo within a group, and never collides with real indexer uids
|
|
64
|
+
* (which never start with `manifest::`).
|
|
65
|
+
*/
|
|
66
|
+
export function manifestSymbolUid(repo, contractId) {
|
|
67
|
+
return `manifest::${repo}::${contractId}`;
|
|
68
|
+
}
|
|
69
|
+
export class ManifestExtractor {
|
|
70
|
+
async extractFromManifest(links, dbExecutors) {
|
|
71
|
+
const contracts = [];
|
|
72
|
+
const crossLinks = [];
|
|
73
|
+
for (const link of links) {
|
|
74
|
+
const contractId = this.buildContractId(link.type, link.contract);
|
|
75
|
+
const providerRepo = link.role === 'provider' ? link.from : link.to;
|
|
76
|
+
const consumerRepo = link.role === 'provider' ? link.to : link.from;
|
|
77
|
+
const providerSymbol = await this.resolveSymbol(providerRepo, link, dbExecutors);
|
|
78
|
+
const consumerSymbol = await this.resolveSymbol(consumerRepo, link, dbExecutors);
|
|
79
|
+
const providerRef = providerSymbol || { filePath: '', name: link.contract };
|
|
80
|
+
const consumerRef = consumerSymbol || { filePath: '', name: link.contract };
|
|
81
|
+
// When the resolver finds a real graph symbol we keep its uid, otherwise
|
|
82
|
+
// fall back to the deterministic synthetic uid (see manifestSymbolUid).
|
|
83
|
+
const providerUid = providerSymbol?.uid || manifestSymbolUid(providerRepo, contractId);
|
|
84
|
+
const consumerUid = consumerSymbol?.uid || manifestSymbolUid(consumerRepo, contractId);
|
|
85
|
+
contracts.push({
|
|
86
|
+
contractId,
|
|
87
|
+
type: link.type,
|
|
88
|
+
role: 'provider',
|
|
89
|
+
symbolUid: providerUid,
|
|
90
|
+
symbolRef: providerRef,
|
|
91
|
+
symbolName: link.contract,
|
|
92
|
+
confidence: 1.0,
|
|
93
|
+
meta: { source: 'manifest' },
|
|
94
|
+
repo: providerRepo,
|
|
95
|
+
});
|
|
96
|
+
contracts.push({
|
|
97
|
+
contractId,
|
|
98
|
+
type: link.type,
|
|
99
|
+
role: 'consumer',
|
|
100
|
+
symbolUid: consumerUid,
|
|
101
|
+
symbolRef: consumerRef,
|
|
102
|
+
symbolName: link.contract,
|
|
103
|
+
confidence: 1.0,
|
|
104
|
+
meta: { source: 'manifest' },
|
|
105
|
+
repo: consumerRepo,
|
|
106
|
+
});
|
|
107
|
+
crossLinks.push({
|
|
108
|
+
from: { repo: consumerRepo, symbolUid: consumerUid, symbolRef: consumerRef },
|
|
109
|
+
to: { repo: providerRepo, symbolUid: providerUid, symbolRef: providerRef },
|
|
110
|
+
type: link.type,
|
|
111
|
+
contractId,
|
|
112
|
+
matchType: 'manifest',
|
|
113
|
+
confidence: 1.0,
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
return { contracts, crossLinks };
|
|
117
|
+
}
|
|
118
|
+
async resolveSymbol(repoPathKey, link, dbExecutors) {
|
|
119
|
+
const executor = dbExecutors?.get(repoPathKey);
|
|
120
|
+
if (!executor)
|
|
121
|
+
return null;
|
|
122
|
+
// NOTE: All lookups use EXACT equality on the relevant name field and
|
|
123
|
+
// deterministic ORDER BY before LIMIT 1. Previous versions used CONTAINS
|
|
124
|
+
// for fuzzy matching (plus an unconditional ".proto" fallback for gRPC)
|
|
125
|
+
// which produced silent false positives: e.g. manifest "/orders" would
|
|
126
|
+
// match "/suborders", and a gRPC manifest entry in a repo with any
|
|
127
|
+
// .proto file would attach to a random proto symbol.
|
|
128
|
+
//
|
|
129
|
+
// If resolveSymbol returns null, the extractor falls back to a
|
|
130
|
+
// deterministic synthetic uid via `manifestSymbolUid(repo, contractId)`
|
|
131
|
+
// (see the function's docstring for why synthetic rather than empty).
|
|
132
|
+
// Cross-impact still works: the bridge query joins on the synthetic
|
|
133
|
+
// uid, and the local impact engine derives the same uid for the
|
|
134
|
+
// unresolved symbol — name-based hints are the additional safety net.
|
|
135
|
+
try {
|
|
136
|
+
let rows;
|
|
137
|
+
if (link.type === 'http') {
|
|
138
|
+
// Route.name is the canonicalized URL path (see
|
|
139
|
+
// core/ingestion/pipeline.ts ensureSlash + generateId('Route', ...)).
|
|
140
|
+
// Normalize the manifest contract the same way so a user-written
|
|
141
|
+
// "/api/orders" matches "api/orders" in the graph.
|
|
142
|
+
//
|
|
143
|
+
// The contract may also use the explicit-method form "GET::/api/orders"
|
|
144
|
+
// recommended by buildContractId. Strip the METHOD:: prefix before
|
|
145
|
+
// normalizing — otherwise `normalizeRoutePath('GET::/api/orders')`
|
|
146
|
+
// returns `/GET::/api/orders` and never matches Route.name. The
|
|
147
|
+
// captured method is not yet used to constrain the Cypher query
|
|
148
|
+
// (method-aware HANDLES_ROUTE matching is a future enhancement).
|
|
149
|
+
const parsed = parseHttpContract(link.contract);
|
|
150
|
+
const normalized = normalizeRoutePath(parsed.path);
|
|
151
|
+
rows = await executor(`MATCH (handler)-[r:CodeRelation {type: 'HANDLES_ROUTE'}]->(route:Route)
|
|
152
|
+
WHERE route.name = $normalized
|
|
153
|
+
RETURN handler.id AS uid, handler.name AS name, handler.filePath AS filePath
|
|
154
|
+
ORDER BY handler.filePath ASC
|
|
155
|
+
LIMIT 1`, { normalized });
|
|
156
|
+
}
|
|
157
|
+
else if (link.type === 'topic') {
|
|
158
|
+
// Topic names aren't a first-class NodeLabel in the graph —
|
|
159
|
+
// topics are referenced by function/method symbols (Kafka
|
|
160
|
+
// listeners, publishers). Restrict to symbol-like labels to
|
|
161
|
+
// avoid cross-matching Files/Variables/Imports that happen to
|
|
162
|
+
// share the topic name.
|
|
163
|
+
rows = await executor(`MATCH (n:Function|Method|Class|Interface) WHERE n.name = $contract
|
|
164
|
+
RETURN n.id AS uid, n.name AS name, n.filePath AS filePath
|
|
165
|
+
ORDER BY n.filePath ASC
|
|
166
|
+
LIMIT 1`, { contract: link.contract });
|
|
167
|
+
}
|
|
168
|
+
else if (link.type === 'grpc') {
|
|
169
|
+
// Contract is "Service/Method" or just "Service" (or package.Service
|
|
170
|
+
// variants). Prefer matching by method name when present, otherwise
|
|
171
|
+
// by service name. NO .proto path fallback — that's guaranteed to
|
|
172
|
+
// return a wrong symbol in any repo with more than one proto file.
|
|
173
|
+
// Label filters scope lookups: methods → Function|Method, services
|
|
174
|
+
// → Class|Interface (no label match = no silent wrong hits on
|
|
175
|
+
// File/Variable nodes that happen to share the name).
|
|
176
|
+
const parts = link.contract.split('/');
|
|
177
|
+
const serviceName = parts[0]?.trim() ?? '';
|
|
178
|
+
const methodName = parts[1]?.trim() ?? '';
|
|
179
|
+
if (methodName) {
|
|
180
|
+
rows = await executor(`MATCH (n:Function|Method) WHERE n.name = $methodName
|
|
181
|
+
RETURN n.id AS uid, n.name AS name, n.filePath AS filePath
|
|
182
|
+
ORDER BY n.filePath ASC
|
|
183
|
+
LIMIT 1`, { methodName });
|
|
184
|
+
}
|
|
185
|
+
else if (serviceName) {
|
|
186
|
+
rows = await executor(`MATCH (n:Class|Interface) WHERE n.name = $serviceName
|
|
187
|
+
RETURN n.id AS uid, n.name AS name, n.filePath AS filePath
|
|
188
|
+
ORDER BY n.filePath ASC
|
|
189
|
+
LIMIT 1`, { serviceName });
|
|
190
|
+
}
|
|
191
|
+
else {
|
|
192
|
+
rows = [];
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
else if (link.type === 'lib') {
|
|
196
|
+
// Only exact match on the symbol's name. Previous fallback to
|
|
197
|
+
// CONTAINS on n.filePath would promote "react" to "react-native"
|
|
198
|
+
// or "@types/react" — silent wrong attribution. Restrict to
|
|
199
|
+
// package-level labels so we don't return arbitrary symbols
|
|
200
|
+
// named after a library.
|
|
201
|
+
rows = await executor(`MATCH (n:Package|Module) WHERE n.name = $contract
|
|
202
|
+
RETURN n.id AS uid, n.name AS name, n.filePath AS filePath
|
|
203
|
+
ORDER BY n.filePath ASC
|
|
204
|
+
LIMIT 1`, { contract: link.contract });
|
|
205
|
+
}
|
|
206
|
+
else {
|
|
207
|
+
return null;
|
|
208
|
+
}
|
|
209
|
+
if (rows.length > 0) {
|
|
210
|
+
return {
|
|
211
|
+
filePath: rows[0].filePath,
|
|
212
|
+
name: rows[0].name,
|
|
213
|
+
uid: String(rows[0].uid ?? ''),
|
|
214
|
+
};
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
catch (err) {
|
|
218
|
+
// Log but don't throw: a broken graph query in one repo shouldn't
|
|
219
|
+
// fail the whole manifest extraction. Unresolved contracts still
|
|
220
|
+
// get a synthetic symbolUid below, so cross-impact can proceed.
|
|
221
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
222
|
+
console.warn(`[manifest-extractor] resolveSymbol failed for ${link.type}:${link.contract} ` +
|
|
223
|
+
`in ${repoPathKey}: ${message}`);
|
|
224
|
+
}
|
|
225
|
+
return null;
|
|
226
|
+
}
|
|
227
|
+
/**
|
|
228
|
+
* Build a canonical contract id for a manifest link.
|
|
229
|
+
*
|
|
230
|
+
* HTTP is the only type with two valid forms:
|
|
231
|
+
* - Explicit method: `"GET::/api/orders"` → `"http::GET::/api/orders"`
|
|
232
|
+
* (matches exactly against `HttpRouteExtractor` provider/consumer
|
|
233
|
+
* contracts, which are also keyed by `http::<METHOD>::<path>`).
|
|
234
|
+
* - Method-agnostic: `"/api/orders"` → `"http::*::/api/orders"`
|
|
235
|
+
* — the `*` is a wildcard and is intended to match any concrete
|
|
236
|
+
* HTTP method on that path. Wildcard-aware matching is the
|
|
237
|
+
* responsibility of the sync / cross-impact layer (see #793);
|
|
238
|
+
* downstream code should treat `http::*::<path>` as matching
|
|
239
|
+
* every `http::<METHOD>::<path>` for the same path.
|
|
240
|
+
*
|
|
241
|
+
* Recommend the explicit-method form in group.yaml whenever the
|
|
242
|
+
* manifest author knows the method — it round-trips through exact
|
|
243
|
+
* equality matching without requiring wildcard logic downstream.
|
|
244
|
+
*
|
|
245
|
+
* NOTE on exhaustiveness: the switch covers every current
|
|
246
|
+
* `ContractType` variant and falls through to a `never` assertion so
|
|
247
|
+
* TypeScript fails the build if a new variant is added without a
|
|
248
|
+
* corresponding case.
|
|
249
|
+
*/
|
|
250
|
+
buildContractId(type, contract) {
|
|
251
|
+
switch (type) {
|
|
252
|
+
case 'http': {
|
|
253
|
+
// Canonicalize method casing and path separators so logically
|
|
254
|
+
// equivalent inputs (`get::/api/orders` vs `GET::/api/orders`,
|
|
255
|
+
// or trailing-slash variants) produce the same contractId and
|
|
256
|
+
// matching `manifestSymbolUid` fallback. Without this, raw
|
|
257
|
+
// user casing leaks into cross-impact join keys and fragments
|
|
258
|
+
// matches across repos.
|
|
259
|
+
const { method, path: rawPath } = parseHttpContract(contract);
|
|
260
|
+
const normalizedPath = normalizeRoutePath(rawPath);
|
|
261
|
+
return method ? `http::${method}::${normalizedPath}` : `http::*::${normalizedPath}`;
|
|
262
|
+
}
|
|
263
|
+
case 'grpc':
|
|
264
|
+
return `grpc::${contract}`;
|
|
265
|
+
case 'topic':
|
|
266
|
+
return `topic::${contract}`;
|
|
267
|
+
case 'lib':
|
|
268
|
+
return `lib::${contract}`;
|
|
269
|
+
case 'custom':
|
|
270
|
+
return `custom::${contract}`;
|
|
271
|
+
default: {
|
|
272
|
+
const _exhaustive = type;
|
|
273
|
+
throw new Error(`Unhandled ContractType: ${String(_exhaustive)}`);
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
}
|
|
@@ -4,6 +4,5 @@ export declare class TopicExtractor implements ContractExtractor {
|
|
|
4
4
|
type: "topic";
|
|
5
5
|
canExtract(_repo: RepoHandle): Promise<boolean>;
|
|
6
6
|
extract(_dbExecutor: CypherExecutor | null, repoPath: string, _repo: RepoHandle): Promise<ExtractedContract[]>;
|
|
7
|
-
private scanFile;
|
|
8
7
|
private dedupe;
|
|
9
8
|
}
|
|
@@ -1,223 +1,86 @@
|
|
|
1
|
-
import * as fs from 'node:fs';
|
|
2
|
-
import * as path from 'node:path';
|
|
3
1
|
import { glob } from 'glob';
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
}
|
|
17
|
-
|
|
2
|
+
import Parser from 'tree-sitter';
|
|
3
|
+
import { readSafe } from './fs-utils.js';
|
|
4
|
+
import { scanFile, unquoteLiteral } from './tree-sitter-scanner.js';
|
|
5
|
+
import { TOPIC_SCAN_GLOB, getProviderForFile, } from './topic-patterns/index.js';
|
|
6
|
+
/**
|
|
7
|
+
* Language-agnostic orchestrator for topic (message broker) contract
|
|
8
|
+
* extraction. All grammar-specific knowledge lives in `topic-patterns/*`
|
|
9
|
+
* — this file must not import any tree-sitter grammar directly.
|
|
10
|
+
*
|
|
11
|
+
* Flow per file:
|
|
12
|
+
* 1. `getProviderForFile(rel)` → compiled plugin (or `undefined` if the
|
|
13
|
+
* file's extension isn't registered, in which case we skip it).
|
|
14
|
+
* 2. `scanFile(parser, provider, content)` → list of `{meta, valueText}`
|
|
15
|
+
* pairs, one per matched literal.
|
|
16
|
+
* 3. `unquoteLiteral(valueText)` → the raw topic string.
|
|
17
|
+
* 4. `makeContract(topic, meta, relPath)` → `ExtractedContract`.
|
|
18
|
+
*
|
|
19
|
+
* Adding a new language is a one-file edit in `topic-patterns/index.ts`.
|
|
20
|
+
*/
|
|
21
|
+
function makeContract(topicName, meta, filePath) {
|
|
18
22
|
return {
|
|
19
23
|
contractId: `topic::${topicName}`,
|
|
20
24
|
type: 'topic',
|
|
21
|
-
role,
|
|
25
|
+
role: meta.role,
|
|
22
26
|
symbolUid: '',
|
|
23
|
-
symbolRef: { filePath: filePath.replace(/\\/g, '/'), name: symbolName },
|
|
24
|
-
symbolName,
|
|
25
|
-
confidence,
|
|
27
|
+
symbolRef: { filePath: filePath.replace(/\\/g, '/'), name: meta.symbolName },
|
|
28
|
+
symbolName: meta.symbolName,
|
|
29
|
+
confidence: meta.confidence,
|
|
26
30
|
meta: {
|
|
27
|
-
broker,
|
|
31
|
+
broker: meta.broker,
|
|
28
32
|
topicName,
|
|
29
|
-
extractionStrategy: '
|
|
33
|
+
extractionStrategy: 'tree_sitter',
|
|
30
34
|
},
|
|
31
35
|
};
|
|
32
36
|
}
|
|
33
|
-
// --- Kafka patterns ---
|
|
34
|
-
const KAFKA_PATTERNS = [
|
|
35
|
-
// Java: @KafkaListener(topics = "xxx")
|
|
36
|
-
{
|
|
37
|
-
regex: /@KafkaListener\s*\(\s*topics\s*=\s*"([^"]+)"/g,
|
|
38
|
-
role: 'consumer',
|
|
39
|
-
broker: 'kafka',
|
|
40
|
-
confidence: 0.8,
|
|
41
|
-
topicGroup: 1,
|
|
42
|
-
symbolName: 'kafkaListener',
|
|
43
|
-
},
|
|
44
|
-
// Java: kafkaTemplate.send("xxx"
|
|
45
|
-
{
|
|
46
|
-
regex: /kafkaTemplate\.send\s*\(\s*"([^"]+)"/gi,
|
|
47
|
-
role: 'provider',
|
|
48
|
-
broker: 'kafka',
|
|
49
|
-
confidence: 0.8,
|
|
50
|
-
topicGroup: 1,
|
|
51
|
-
symbolName: 'kafkaTemplate.send',
|
|
52
|
-
},
|
|
53
|
-
// Node: producer.send({ topic: 'xxx'
|
|
54
|
-
{
|
|
55
|
-
regex: /producer\.send\s*\(\s*\{\s*topic:\s*['"]([^'"]+)['"]/g,
|
|
56
|
-
role: 'provider',
|
|
57
|
-
broker: 'kafka',
|
|
58
|
-
confidence: 0.8,
|
|
59
|
-
topicGroup: 1,
|
|
60
|
-
symbolName: 'producer.send',
|
|
61
|
-
},
|
|
62
|
-
// Node: consumer.subscribe({ topic: 'xxx'
|
|
63
|
-
{
|
|
64
|
-
regex: /consumer\.subscribe\s*\(\s*\{\s*topic:\s*['"]([^'"]+)['"]/g,
|
|
65
|
-
role: 'consumer',
|
|
66
|
-
broker: 'kafka',
|
|
67
|
-
confidence: 0.8,
|
|
68
|
-
topicGroup: 1,
|
|
69
|
-
symbolName: 'consumer.subscribe',
|
|
70
|
-
},
|
|
71
|
-
// Go: consumer.ConsumePartition("xxx"
|
|
72
|
-
{
|
|
73
|
-
regex: /\.ConsumePartition\s*\(\s*"([^"]+)"/g,
|
|
74
|
-
role: 'consumer',
|
|
75
|
-
broker: 'kafka',
|
|
76
|
-
confidence: 0.7,
|
|
77
|
-
topicGroup: 1,
|
|
78
|
-
symbolName: 'ConsumePartition',
|
|
79
|
-
},
|
|
80
|
-
// Python: KafkaConsumer('xxx'
|
|
81
|
-
{
|
|
82
|
-
regex: /KafkaConsumer\s*\(\s*['"]([^'"]+)['"]/g,
|
|
83
|
-
role: 'consumer',
|
|
84
|
-
broker: 'kafka',
|
|
85
|
-
confidence: 0.7,
|
|
86
|
-
topicGroup: 1,
|
|
87
|
-
symbolName: 'KafkaConsumer',
|
|
88
|
-
},
|
|
89
|
-
// Python: producer.send('xxx' or producer.produce('xxx'
|
|
90
|
-
{
|
|
91
|
-
regex: /producer\.(?:send|produce)\s*\(\s*['"]([^'"]+)['"]/g,
|
|
92
|
-
role: 'provider',
|
|
93
|
-
broker: 'kafka',
|
|
94
|
-
confidence: 0.7,
|
|
95
|
-
topicGroup: 1,
|
|
96
|
-
symbolName: 'producer.send',
|
|
97
|
-
},
|
|
98
|
-
];
|
|
99
|
-
// --- RabbitMQ patterns ---
|
|
100
|
-
const RABBITMQ_PATTERNS = [
|
|
101
|
-
// Java: @RabbitListener(queues = "xxx")
|
|
102
|
-
{
|
|
103
|
-
regex: /@RabbitListener\s*\(\s*queues\s*=\s*"([^"]+)"/g,
|
|
104
|
-
role: 'consumer',
|
|
105
|
-
broker: 'rabbitmq',
|
|
106
|
-
confidence: 0.8,
|
|
107
|
-
topicGroup: 1,
|
|
108
|
-
symbolName: 'rabbitListener',
|
|
109
|
-
},
|
|
110
|
-
// Java: rabbitTemplate.convertAndSend("xxx"
|
|
111
|
-
{
|
|
112
|
-
regex: /rabbitTemplate\.convertAndSend\s*\(\s*"([^"]+)"/gi,
|
|
113
|
-
role: 'provider',
|
|
114
|
-
broker: 'rabbitmq',
|
|
115
|
-
confidence: 0.8,
|
|
116
|
-
topicGroup: 1,
|
|
117
|
-
symbolName: 'rabbitTemplate.convertAndSend',
|
|
118
|
-
},
|
|
119
|
-
// Node: channel.consume("xxx"
|
|
120
|
-
{
|
|
121
|
-
regex: /channel\.consume\s*\(\s*"([^"]+)"/g,
|
|
122
|
-
role: 'consumer',
|
|
123
|
-
broker: 'rabbitmq',
|
|
124
|
-
confidence: 0.8,
|
|
125
|
-
topicGroup: 1,
|
|
126
|
-
symbolName: 'channel.consume',
|
|
127
|
-
},
|
|
128
|
-
// Node: channel.publish("xxx"
|
|
129
|
-
{
|
|
130
|
-
regex: /channel\.publish\s*\(\s*"([^"]+)"/g,
|
|
131
|
-
role: 'provider',
|
|
132
|
-
broker: 'rabbitmq',
|
|
133
|
-
confidence: 0.8,
|
|
134
|
-
topicGroup: 1,
|
|
135
|
-
symbolName: 'channel.publish',
|
|
136
|
-
},
|
|
137
|
-
// Node: channel.sendToQueue("xxx"
|
|
138
|
-
{
|
|
139
|
-
regex: /channel\.sendToQueue\s*\(\s*"([^"]+)"/g,
|
|
140
|
-
role: 'provider',
|
|
141
|
-
broker: 'rabbitmq',
|
|
142
|
-
confidence: 0.8,
|
|
143
|
-
topicGroup: 1,
|
|
144
|
-
symbolName: 'channel.sendToQueue',
|
|
145
|
-
},
|
|
146
|
-
// Python: channel.basic_consume(queue='xxx'
|
|
147
|
-
{
|
|
148
|
-
regex: /channel\.basic_consume\s*\(\s*queue\s*=\s*['"]([^'"]+)['"]/g,
|
|
149
|
-
role: 'consumer',
|
|
150
|
-
broker: 'rabbitmq',
|
|
151
|
-
confidence: 0.7,
|
|
152
|
-
topicGroup: 1,
|
|
153
|
-
symbolName: 'basic_consume',
|
|
154
|
-
},
|
|
155
|
-
// Python: channel.basic_publish(exchange='xxx'
|
|
156
|
-
{
|
|
157
|
-
regex: /channel\.basic_publish\s*\([^)]*exchange\s*=\s*['"]([^'"]+)['"]/g,
|
|
158
|
-
role: 'provider',
|
|
159
|
-
broker: 'rabbitmq',
|
|
160
|
-
confidence: 0.7,
|
|
161
|
-
topicGroup: 1,
|
|
162
|
-
symbolName: 'basic_publish',
|
|
163
|
-
},
|
|
164
|
-
];
|
|
165
|
-
// --- NATS patterns ---
|
|
166
|
-
const NATS_PATTERNS = [
|
|
167
|
-
// Go/Node: nc.Subscribe("xxx" or nc.subscribe("xxx"
|
|
168
|
-
{
|
|
169
|
-
regex: /nc\.(?:S|s)ubscribe\s*\(\s*"([^"]+)"/g,
|
|
170
|
-
role: 'consumer',
|
|
171
|
-
broker: 'nats',
|
|
172
|
-
confidence: 0.8,
|
|
173
|
-
topicGroup: 1,
|
|
174
|
-
symbolName: 'nc.Subscribe',
|
|
175
|
-
},
|
|
176
|
-
// Go/Node: nc.Publish("xxx" or nc.publish("xxx"
|
|
177
|
-
{
|
|
178
|
-
regex: /nc\.(?:P|p)ublish\s*\(\s*"([^"]+)"/g,
|
|
179
|
-
role: 'provider',
|
|
180
|
-
broker: 'nats',
|
|
181
|
-
confidence: 0.8,
|
|
182
|
-
topicGroup: 1,
|
|
183
|
-
symbolName: 'nc.Publish',
|
|
184
|
-
},
|
|
185
|
-
];
|
|
186
|
-
const ALL_PATTERNS = [...KAFKA_PATTERNS, ...RABBITMQ_PATTERNS, ...NATS_PATTERNS];
|
|
187
37
|
export class TopicExtractor {
|
|
188
38
|
type = 'topic';
|
|
189
39
|
async canExtract(_repo) {
|
|
190
40
|
return true;
|
|
191
41
|
}
|
|
192
42
|
async extract(_dbExecutor, repoPath, _repo) {
|
|
193
|
-
const files = await glob(
|
|
43
|
+
const files = await glob(TOPIC_SCAN_GLOB, {
|
|
194
44
|
cwd: repoPath,
|
|
195
|
-
ignore: [
|
|
45
|
+
ignore: [
|
|
46
|
+
'**/node_modules/**',
|
|
47
|
+
'**/.git/**',
|
|
48
|
+
'**/vendor/**',
|
|
49
|
+
'**/dist/**',
|
|
50
|
+
'**/build/**',
|
|
51
|
+
// Language-level test file conventions. Go test files
|
|
52
|
+
// `*_test.go` live next to source; other languages either use
|
|
53
|
+
// separate test directories (Python's `tests/`, Java's
|
|
54
|
+
// `src/test/`) or are already covered by the dist/build ignores.
|
|
55
|
+
// Pushed to the glob level so the orchestrator stays
|
|
56
|
+
// language-agnostic.
|
|
57
|
+
'**/*_test.go',
|
|
58
|
+
],
|
|
196
59
|
nodir: true,
|
|
197
60
|
});
|
|
61
|
+
// One parser reused across files; the scanner calls `setLanguage` per
|
|
62
|
+
// file based on which plugin the registry returns.
|
|
63
|
+
const parser = new Parser();
|
|
198
64
|
const out = [];
|
|
199
65
|
for (const rel of files) {
|
|
66
|
+
const provider = getProviderForFile(rel);
|
|
67
|
+
if (!provider)
|
|
68
|
+
continue;
|
|
200
69
|
const content = readSafe(repoPath, rel);
|
|
201
70
|
if (!content)
|
|
202
71
|
continue;
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
for (const pattern of ALL_PATTERNS) {
|
|
210
|
-
// Reset regex state for each file
|
|
211
|
-
const re = new RegExp(pattern.regex.source, pattern.regex.flags);
|
|
212
|
-
let m;
|
|
213
|
-
while ((m = re.exec(content)) !== null) {
|
|
214
|
-
const topicName = m[pattern.topicGroup];
|
|
72
|
+
const matches = scanFile(parser, provider, content);
|
|
73
|
+
for (const match of matches) {
|
|
74
|
+
const valueNode = match.captures.value;
|
|
75
|
+
if (!valueNode)
|
|
76
|
+
continue;
|
|
77
|
+
const topicName = unquoteLiteral(valueNode.text);
|
|
215
78
|
if (!topicName)
|
|
216
79
|
continue;
|
|
217
|
-
out.push(makeContract(topicName,
|
|
80
|
+
out.push(makeContract(topicName, match.meta, rel));
|
|
218
81
|
}
|
|
219
82
|
}
|
|
220
|
-
return out;
|
|
83
|
+
return this.dedupe(out);
|
|
221
84
|
}
|
|
222
85
|
dedupe(items) {
|
|
223
86
|
const seen = new Set();
|