gitnexus 1.6.0 → 1.6.2-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (145) hide show
  1. package/README.md +73 -0
  2. package/dist/cli/analyze.js +50 -3
  3. package/dist/core/group/extractors/fs-utils.d.ts +10 -0
  4. package/dist/core/group/extractors/fs-utils.js +24 -0
  5. package/dist/core/group/extractors/grpc-extractor.d.ts +17 -8
  6. package/dist/core/group/extractors/grpc-extractor.js +328 -191
  7. package/dist/core/group/extractors/grpc-patterns/go.d.ts +2 -0
  8. package/dist/core/group/extractors/grpc-patterns/go.js +97 -0
  9. package/dist/core/group/extractors/grpc-patterns/index.d.ts +19 -0
  10. package/dist/core/group/extractors/grpc-patterns/index.js +46 -0
  11. package/dist/core/group/extractors/grpc-patterns/java.d.ts +2 -0
  12. package/dist/core/group/extractors/grpc-patterns/java.js +173 -0
  13. package/dist/core/group/extractors/grpc-patterns/node.d.ts +4 -0
  14. package/dist/core/group/extractors/grpc-patterns/node.js +290 -0
  15. package/dist/core/group/extractors/grpc-patterns/proto.d.ts +9 -0
  16. package/dist/core/group/extractors/grpc-patterns/proto.js +134 -0
  17. package/dist/core/group/extractors/grpc-patterns/python.d.ts +2 -0
  18. package/dist/core/group/extractors/grpc-patterns/python.js +67 -0
  19. package/dist/core/group/extractors/grpc-patterns/types.d.ts +50 -0
  20. package/dist/core/group/extractors/grpc-patterns/types.js +1 -0
  21. package/dist/core/group/extractors/http-patterns/go.d.ts +2 -0
  22. package/dist/core/group/extractors/http-patterns/go.js +215 -0
  23. package/dist/core/group/extractors/http-patterns/index.d.ts +17 -0
  24. package/dist/core/group/extractors/http-patterns/index.js +44 -0
  25. package/dist/core/group/extractors/http-patterns/java.d.ts +2 -0
  26. package/dist/core/group/extractors/http-patterns/java.js +253 -0
  27. package/dist/core/group/extractors/http-patterns/node.d.ts +4 -0
  28. package/dist/core/group/extractors/http-patterns/node.js +354 -0
  29. package/dist/core/group/extractors/http-patterns/php.d.ts +2 -0
  30. package/dist/core/group/extractors/http-patterns/php.js +70 -0
  31. package/dist/core/group/extractors/http-patterns/python.d.ts +2 -0
  32. package/dist/core/group/extractors/http-patterns/python.js +133 -0
  33. package/dist/core/group/extractors/http-patterns/types.d.ts +61 -0
  34. package/dist/core/group/extractors/http-patterns/types.js +1 -0
  35. package/dist/core/group/extractors/http-route-extractor.d.ts +10 -13
  36. package/dist/core/group/extractors/http-route-extractor.js +231 -238
  37. package/dist/core/group/extractors/manifest-extractor.d.ts +54 -0
  38. package/dist/core/group/extractors/manifest-extractor.js +277 -0
  39. package/dist/core/group/extractors/topic-extractor.d.ts +0 -1
  40. package/dist/core/group/extractors/topic-extractor.js +55 -192
  41. package/dist/core/group/extractors/topic-patterns/go.d.ts +2 -0
  42. package/dist/core/group/extractors/topic-patterns/go.js +120 -0
  43. package/dist/core/group/extractors/topic-patterns/index.d.ts +14 -0
  44. package/dist/core/group/extractors/topic-patterns/index.js +38 -0
  45. package/dist/core/group/extractors/topic-patterns/java.d.ts +2 -0
  46. package/dist/core/group/extractors/topic-patterns/java.js +80 -0
  47. package/dist/core/group/extractors/topic-patterns/node.d.ts +4 -0
  48. package/dist/core/group/extractors/topic-patterns/node.js +155 -0
  49. package/dist/core/group/extractors/topic-patterns/python.d.ts +2 -0
  50. package/dist/core/group/extractors/topic-patterns/python.js +116 -0
  51. package/dist/core/group/extractors/topic-patterns/types.d.ts +25 -0
  52. package/dist/core/group/extractors/topic-patterns/types.js +10 -0
  53. package/dist/core/group/extractors/tree-sitter-scanner.d.ts +113 -0
  54. package/dist/core/group/extractors/tree-sitter-scanner.js +94 -0
  55. package/dist/core/ingestion/binding-accumulator.d.ts +22 -17
  56. package/dist/core/ingestion/binding-accumulator.js +29 -25
  57. package/dist/core/ingestion/cobol-processor.d.ts +1 -1
  58. package/dist/core/ingestion/import-processor.js +1 -1
  59. package/dist/core/ingestion/language-config.js +1 -1
  60. package/dist/core/ingestion/language-provider.d.ts +32 -5
  61. package/dist/core/ingestion/languages/c-cpp.js +2 -2
  62. package/dist/core/ingestion/languages/dart.d.ts +1 -1
  63. package/dist/core/ingestion/languages/dart.js +2 -2
  64. package/dist/core/ingestion/languages/go.d.ts +1 -1
  65. package/dist/core/ingestion/languages/go.js +2 -2
  66. package/dist/core/ingestion/languages/ruby.js +16 -1
  67. package/dist/core/ingestion/languages/swift.d.ts +1 -1
  68. package/dist/core/ingestion/languages/swift.js +2 -2
  69. package/dist/core/ingestion/markdown-processor.d.ts +1 -1
  70. package/dist/core/ingestion/method-extractors/configs/jvm.js +1 -0
  71. package/dist/core/ingestion/method-extractors/configs/ruby.js +1 -0
  72. package/dist/core/ingestion/method-extractors/generic.d.ts +6 -0
  73. package/dist/core/ingestion/method-extractors/generic.js +48 -4
  74. package/dist/core/ingestion/method-types.d.ts +4 -0
  75. package/dist/core/ingestion/model/resolve.js +103 -48
  76. package/dist/core/ingestion/model/semantic-model.d.ts +1 -1
  77. package/dist/core/ingestion/model/semantic-model.js +1 -1
  78. package/dist/core/ingestion/model/symbol-table.d.ts +7 -7
  79. package/dist/core/ingestion/model/symbol-table.js +7 -7
  80. package/dist/core/ingestion/mro-processor.d.ts +1 -1
  81. package/dist/core/ingestion/mro-processor.js +1 -1
  82. package/dist/core/ingestion/parsing-processor.js +54 -42
  83. package/dist/core/ingestion/pipeline-phases/cobol.d.ts +16 -0
  84. package/dist/core/ingestion/pipeline-phases/cobol.js +45 -0
  85. package/dist/core/ingestion/pipeline-phases/communities.d.ts +16 -0
  86. package/dist/core/ingestion/pipeline-phases/communities.js +62 -0
  87. package/dist/core/ingestion/pipeline-phases/cross-file-impl.d.ts +17 -0
  88. package/dist/core/ingestion/pipeline-phases/cross-file-impl.js +156 -0
  89. package/dist/core/ingestion/pipeline-phases/cross-file.d.ts +37 -0
  90. package/dist/core/ingestion/pipeline-phases/cross-file.js +63 -0
  91. package/dist/core/ingestion/pipeline-phases/index.d.ts +21 -0
  92. package/dist/core/ingestion/pipeline-phases/index.js +22 -0
  93. package/dist/core/ingestion/pipeline-phases/markdown.d.ts +17 -0
  94. package/dist/core/ingestion/pipeline-phases/markdown.js +33 -0
  95. package/dist/core/ingestion/pipeline-phases/mro.d.ts +18 -0
  96. package/dist/core/ingestion/pipeline-phases/mro.js +36 -0
  97. package/dist/core/ingestion/pipeline-phases/orm-extraction.d.ts +22 -0
  98. package/dist/core/ingestion/pipeline-phases/orm-extraction.js +92 -0
  99. package/dist/core/ingestion/pipeline-phases/orm.d.ts +15 -0
  100. package/dist/core/ingestion/pipeline-phases/orm.js +74 -0
  101. package/dist/core/ingestion/pipeline-phases/parse-impl.d.ts +47 -0
  102. package/dist/core/ingestion/pipeline-phases/parse-impl.js +437 -0
  103. package/dist/core/ingestion/pipeline-phases/parse.d.ts +49 -0
  104. package/dist/core/ingestion/pipeline-phases/parse.js +33 -0
  105. package/dist/core/ingestion/pipeline-phases/processes.d.ts +16 -0
  106. package/dist/core/ingestion/pipeline-phases/processes.js +143 -0
  107. package/dist/core/ingestion/pipeline-phases/routes.d.ts +21 -0
  108. package/dist/core/ingestion/pipeline-phases/routes.js +243 -0
  109. package/dist/core/ingestion/pipeline-phases/runner.d.ts +22 -0
  110. package/dist/core/ingestion/pipeline-phases/runner.js +203 -0
  111. package/dist/core/ingestion/pipeline-phases/scan.d.ts +21 -0
  112. package/dist/core/ingestion/pipeline-phases/scan.js +46 -0
  113. package/dist/core/ingestion/pipeline-phases/structure.d.ts +27 -0
  114. package/dist/core/ingestion/pipeline-phases/structure.js +35 -0
  115. package/dist/core/ingestion/pipeline-phases/tools.d.ts +20 -0
  116. package/dist/core/ingestion/pipeline-phases/tools.js +79 -0
  117. package/dist/core/ingestion/pipeline-phases/types.d.ts +79 -0
  118. package/dist/core/ingestion/pipeline-phases/types.js +37 -0
  119. package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.d.ts +70 -0
  120. package/dist/core/ingestion/pipeline-phases/wildcard-synthesis.js +312 -0
  121. package/dist/core/ingestion/pipeline.d.ts +16 -10
  122. package/dist/core/ingestion/pipeline.js +66 -1534
  123. package/dist/core/ingestion/process-processor.js +1 -1
  124. package/dist/core/ingestion/tree-sitter-queries.d.ts +2 -2
  125. package/dist/core/ingestion/tree-sitter-queries.js +69 -0
  126. package/dist/core/ingestion/utils/ast-helpers.d.ts +1 -3
  127. package/dist/core/ingestion/utils/ast-helpers.js +48 -21
  128. package/dist/core/ingestion/utils/env.d.ts +10 -0
  129. package/dist/core/ingestion/utils/env.js +10 -0
  130. package/dist/core/ingestion/utils/graph-sort.d.ts +58 -0
  131. package/dist/core/ingestion/utils/graph-sort.js +100 -0
  132. package/dist/core/ingestion/workers/parse-worker.js +12 -8
  133. package/dist/core/lbug/lbug-adapter.d.ts +28 -0
  134. package/dist/core/lbug/lbug-adapter.js +162 -57
  135. package/package.json +3 -3
  136. package/vendor/tree-sitter-proto/binding.gyp +30 -0
  137. package/vendor/tree-sitter-proto/bindings/node/binding.cc +20 -0
  138. package/vendor/tree-sitter-proto/bindings/node/index.d.ts +28 -0
  139. package/vendor/tree-sitter-proto/bindings/node/index.js +7 -0
  140. package/vendor/tree-sitter-proto/package.json +18 -0
  141. package/vendor/tree-sitter-proto/src/node-types.json +1145 -0
  142. package/vendor/tree-sitter-proto/src/parser.c +10149 -0
  143. package/vendor/tree-sitter-proto/src/tree_sitter/alloc.h +54 -0
  144. package/vendor/tree-sitter-proto/src/tree_sitter/array.h +291 -0
  145. package/vendor/tree-sitter-proto/src/tree_sitter/parser.h +266 -0
@@ -0,0 +1,277 @@
1
+ /**
2
+ * Canonicalize an HTTP path for matching against Route.name in the graph.
3
+ * Mirrors core/ingestion/pipeline.ts ensureSlash semantics:
4
+ * - Ensures a leading slash.
5
+ * - Strips trailing slashes (except the root "/").
6
+ * - Normalizes consecutive slashes.
7
+ * - Does NOT lowercase (route matching is case-sensitive).
8
+ */
9
+ function normalizeRoutePath(raw) {
10
+ const trimmed = raw.trim();
11
+ if (!trimmed)
12
+ return '/';
13
+ const withLeading = trimmed.startsWith('/') ? trimmed : `/${trimmed}`;
14
+ const collapsed = withLeading.replace(/\/+/g, '/');
15
+ if (collapsed === '/')
16
+ return '/';
17
+ return collapsed.replace(/\/+$/, '');
18
+ }
19
+ /**
20
+ * Split a manifest HTTP contract into its optional `METHOD::` prefix and
21
+ * its path portion.
22
+ *
23
+ * `buildContractId` recommends the explicit-method form `GET::/api/orders`
24
+ * in group.yaml; if we hand that raw string to `normalizeRoutePath` we get
25
+ * `/GET::/api/orders`, which can never match `Route.name = "/api/orders"`
26
+ * in the graph. This helper extracts the path portion so the Cypher
27
+ * lookup uses the canonical route name.
28
+ *
29
+ * The method prefix regex mirrors `buildContractId` (line ~251) for
30
+ * symmetry: case-insensitive `[A-Za-z]+` followed by `::`. The captured
31
+ * method is upper-cased for downstream use; method-constrained matching
32
+ * against `HANDLES_ROUTE` is a future enhancement (not yet wired).
33
+ *
34
+ * Edge cases:
35
+ * - `"::/api/orders"` — empty method portion, no alpha prefix match, so
36
+ * the whole string is treated as a bare path (matches buildContractId
37
+ * which also requires `[A-Za-z]+`).
38
+ * - `"GET::"` — method with empty path, returns `{ method: 'GET', path: '' }`;
39
+ * `normalizeRoutePath('')` resolves to `/` for caller.
40
+ */
41
+ function parseHttpContract(raw) {
42
+ const match = raw.match(/^([A-Za-z]+)::/);
43
+ if (!match)
44
+ return { method: null, path: raw };
45
+ return { method: match[1].toUpperCase(), path: raw.slice(match[0].length) };
46
+ }
47
+ /**
48
+ * Stable synthetic symbolUid for a manifest-declared contract whose target
49
+ * symbol could not be resolved against the per-repo graph (resolveSymbol
50
+ * returned null). Two reasons we don't leave the uid empty:
51
+ *
52
+ * 1. The bridge stores Contract nodes keyed in part by symbolUid; an empty
53
+ * uid means downstream Cypher queries that anchor on `provider.symbolUid`
54
+ * can't tell two different unresolved manifest contracts apart.
55
+ * 2. The cross-impact bridge query in cross-impact.ts joins local impact
56
+ * results to bridge contracts via `WHERE provider.symbolUid IN $localUids`.
57
+ * If the local impact engine produces a deterministic identifier for the
58
+ * unresolved target, it must agree with the value the bridge stored. A
59
+ * synthetic uid keyed off (repo, contractId) is the only thing both sides
60
+ * can derive without knowing about each other.
61
+ *
62
+ * Format: `manifest::<repo>::<contractId>`. Stable across syncs, scoped to a
63
+ * single repo within a group, and never collides with real indexer uids
64
+ * (which never start with `manifest::`).
65
+ */
66
+ export function manifestSymbolUid(repo, contractId) {
67
+ return `manifest::${repo}::${contractId}`;
68
+ }
69
+ export class ManifestExtractor {
70
+ async extractFromManifest(links, dbExecutors) {
71
+ const contracts = [];
72
+ const crossLinks = [];
73
+ for (const link of links) {
74
+ const contractId = this.buildContractId(link.type, link.contract);
75
+ const providerRepo = link.role === 'provider' ? link.from : link.to;
76
+ const consumerRepo = link.role === 'provider' ? link.to : link.from;
77
+ const providerSymbol = await this.resolveSymbol(providerRepo, link, dbExecutors);
78
+ const consumerSymbol = await this.resolveSymbol(consumerRepo, link, dbExecutors);
79
+ const providerRef = providerSymbol || { filePath: '', name: link.contract };
80
+ const consumerRef = consumerSymbol || { filePath: '', name: link.contract };
81
+ // When the resolver finds a real graph symbol we keep its uid, otherwise
82
+ // fall back to the deterministic synthetic uid (see manifestSymbolUid).
83
+ const providerUid = providerSymbol?.uid || manifestSymbolUid(providerRepo, contractId);
84
+ const consumerUid = consumerSymbol?.uid || manifestSymbolUid(consumerRepo, contractId);
85
+ contracts.push({
86
+ contractId,
87
+ type: link.type,
88
+ role: 'provider',
89
+ symbolUid: providerUid,
90
+ symbolRef: providerRef,
91
+ symbolName: link.contract,
92
+ confidence: 1.0,
93
+ meta: { source: 'manifest' },
94
+ repo: providerRepo,
95
+ });
96
+ contracts.push({
97
+ contractId,
98
+ type: link.type,
99
+ role: 'consumer',
100
+ symbolUid: consumerUid,
101
+ symbolRef: consumerRef,
102
+ symbolName: link.contract,
103
+ confidence: 1.0,
104
+ meta: { source: 'manifest' },
105
+ repo: consumerRepo,
106
+ });
107
+ crossLinks.push({
108
+ from: { repo: consumerRepo, symbolUid: consumerUid, symbolRef: consumerRef },
109
+ to: { repo: providerRepo, symbolUid: providerUid, symbolRef: providerRef },
110
+ type: link.type,
111
+ contractId,
112
+ matchType: 'manifest',
113
+ confidence: 1.0,
114
+ });
115
+ }
116
+ return { contracts, crossLinks };
117
+ }
118
+ async resolveSymbol(repoPathKey, link, dbExecutors) {
119
+ const executor = dbExecutors?.get(repoPathKey);
120
+ if (!executor)
121
+ return null;
122
+ // NOTE: All lookups use EXACT equality on the relevant name field and
123
+ // deterministic ORDER BY before LIMIT 1. Previous versions used CONTAINS
124
+ // for fuzzy matching (plus an unconditional ".proto" fallback for gRPC)
125
+ // which produced silent false positives: e.g. manifest "/orders" would
126
+ // match "/suborders", and a gRPC manifest entry in a repo with any
127
+ // .proto file would attach to a random proto symbol.
128
+ //
129
+ // If resolveSymbol returns null, the extractor falls back to a
130
+ // deterministic synthetic uid via `manifestSymbolUid(repo, contractId)`
131
+ // (see the function's docstring for why synthetic rather than empty).
132
+ // Cross-impact still works: the bridge query joins on the synthetic
133
+ // uid, and the local impact engine derives the same uid for the
134
+ // unresolved symbol — name-based hints are the additional safety net.
135
+ try {
136
+ let rows;
137
+ if (link.type === 'http') {
138
+ // Route.name is the canonicalized URL path (see
139
+ // core/ingestion/pipeline.ts ensureSlash + generateId('Route', ...)).
140
+ // Normalize the manifest contract the same way so a user-written
141
+ // "/api/orders" matches "api/orders" in the graph.
142
+ //
143
+ // The contract may also use the explicit-method form "GET::/api/orders"
144
+ // recommended by buildContractId. Strip the METHOD:: prefix before
145
+ // normalizing — otherwise `normalizeRoutePath('GET::/api/orders')`
146
+ // returns `/GET::/api/orders` and never matches Route.name. The
147
+ // captured method is not yet used to constrain the Cypher query
148
+ // (method-aware HANDLES_ROUTE matching is a future enhancement).
149
+ const parsed = parseHttpContract(link.contract);
150
+ const normalized = normalizeRoutePath(parsed.path);
151
+ rows = await executor(`MATCH (handler)-[r:CodeRelation {type: 'HANDLES_ROUTE'}]->(route:Route)
152
+ WHERE route.name = $normalized
153
+ RETURN handler.id AS uid, handler.name AS name, handler.filePath AS filePath
154
+ ORDER BY handler.filePath ASC
155
+ LIMIT 1`, { normalized });
156
+ }
157
+ else if (link.type === 'topic') {
158
+ // Topic names aren't a first-class NodeLabel in the graph —
159
+ // topics are referenced by function/method symbols (Kafka
160
+ // listeners, publishers). Restrict to symbol-like labels to
161
+ // avoid cross-matching Files/Variables/Imports that happen to
162
+ // share the topic name.
163
+ rows = await executor(`MATCH (n:Function|Method|Class|Interface) WHERE n.name = $contract
164
+ RETURN n.id AS uid, n.name AS name, n.filePath AS filePath
165
+ ORDER BY n.filePath ASC
166
+ LIMIT 1`, { contract: link.contract });
167
+ }
168
+ else if (link.type === 'grpc') {
169
+ // Contract is "Service/Method" or just "Service" (or package.Service
170
+ // variants). Prefer matching by method name when present, otherwise
171
+ // by service name. NO .proto path fallback — that's guaranteed to
172
+ // return a wrong symbol in any repo with more than one proto file.
173
+ // Label filters scope lookups: methods → Function|Method, services
174
+ // → Class|Interface (no label match = no silent wrong hits on
175
+ // File/Variable nodes that happen to share the name).
176
+ const parts = link.contract.split('/');
177
+ const serviceName = parts[0]?.trim() ?? '';
178
+ const methodName = parts[1]?.trim() ?? '';
179
+ if (methodName) {
180
+ rows = await executor(`MATCH (n:Function|Method) WHERE n.name = $methodName
181
+ RETURN n.id AS uid, n.name AS name, n.filePath AS filePath
182
+ ORDER BY n.filePath ASC
183
+ LIMIT 1`, { methodName });
184
+ }
185
+ else if (serviceName) {
186
+ rows = await executor(`MATCH (n:Class|Interface) WHERE n.name = $serviceName
187
+ RETURN n.id AS uid, n.name AS name, n.filePath AS filePath
188
+ ORDER BY n.filePath ASC
189
+ LIMIT 1`, { serviceName });
190
+ }
191
+ else {
192
+ rows = [];
193
+ }
194
+ }
195
+ else if (link.type === 'lib') {
196
+ // Only exact match on the symbol's name. Previous fallback to
197
+ // CONTAINS on n.filePath would promote "react" to "react-native"
198
+ // or "@types/react" — silent wrong attribution. Restrict to
199
+ // package-level labels so we don't return arbitrary symbols
200
+ // named after a library.
201
+ rows = await executor(`MATCH (n:Package|Module) WHERE n.name = $contract
202
+ RETURN n.id AS uid, n.name AS name, n.filePath AS filePath
203
+ ORDER BY n.filePath ASC
204
+ LIMIT 1`, { contract: link.contract });
205
+ }
206
+ else {
207
+ return null;
208
+ }
209
+ if (rows.length > 0) {
210
+ return {
211
+ filePath: rows[0].filePath,
212
+ name: rows[0].name,
213
+ uid: String(rows[0].uid ?? ''),
214
+ };
215
+ }
216
+ }
217
+ catch (err) {
218
+ // Log but don't throw: a broken graph query in one repo shouldn't
219
+ // fail the whole manifest extraction. Unresolved contracts still
220
+ // get a synthetic symbolUid below, so cross-impact can proceed.
221
+ const message = err instanceof Error ? err.message : String(err);
222
+ console.warn(`[manifest-extractor] resolveSymbol failed for ${link.type}:${link.contract} ` +
223
+ `in ${repoPathKey}: ${message}`);
224
+ }
225
+ return null;
226
+ }
227
+ /**
228
+ * Build a canonical contract id for a manifest link.
229
+ *
230
+ * HTTP is the only type with two valid forms:
231
+ * - Explicit method: `"GET::/api/orders"` → `"http::GET::/api/orders"`
232
+ * (matches exactly against `HttpRouteExtractor` provider/consumer
233
+ * contracts, which are also keyed by `http::<METHOD>::<path>`).
234
+ * - Method-agnostic: `"/api/orders"` → `"http::*::/api/orders"`
235
+ * — the `*` is a wildcard and is intended to match any concrete
236
+ * HTTP method on that path. Wildcard-aware matching is the
237
+ * responsibility of the sync / cross-impact layer (see #793);
238
+ * downstream code should treat `http::*::<path>` as matching
239
+ * every `http::<METHOD>::<path>` for the same path.
240
+ *
241
+ * Recommend the explicit-method form in group.yaml whenever the
242
+ * manifest author knows the method — it round-trips through exact
243
+ * equality matching without requiring wildcard logic downstream.
244
+ *
245
+ * NOTE on exhaustiveness: the switch covers every current
246
+ * `ContractType` variant and falls through to a `never` assertion so
247
+ * TypeScript fails the build if a new variant is added without a
248
+ * corresponding case.
249
+ */
250
+ buildContractId(type, contract) {
251
+ switch (type) {
252
+ case 'http': {
253
+ // Canonicalize method casing and path separators so logically
254
+ // equivalent inputs (`get::/api/orders` vs `GET::/api/orders`,
255
+ // or trailing-slash variants) produce the same contractId and
256
+ // matching `manifestSymbolUid` fallback. Without this, raw
257
+ // user casing leaks into cross-impact join keys and fragments
258
+ // matches across repos.
259
+ const { method, path: rawPath } = parseHttpContract(contract);
260
+ const normalizedPath = normalizeRoutePath(rawPath);
261
+ return method ? `http::${method}::${normalizedPath}` : `http::*::${normalizedPath}`;
262
+ }
263
+ case 'grpc':
264
+ return `grpc::${contract}`;
265
+ case 'topic':
266
+ return `topic::${contract}`;
267
+ case 'lib':
268
+ return `lib::${contract}`;
269
+ case 'custom':
270
+ return `custom::${contract}`;
271
+ default: {
272
+ const _exhaustive = type;
273
+ throw new Error(`Unhandled ContractType: ${String(_exhaustive)}`);
274
+ }
275
+ }
276
+ }
277
+ }
@@ -4,6 +4,5 @@ export declare class TopicExtractor implements ContractExtractor {
4
4
  type: "topic";
5
5
  canExtract(_repo: RepoHandle): Promise<boolean>;
6
6
  extract(_dbExecutor: CypherExecutor | null, repoPath: string, _repo: RepoHandle): Promise<ExtractedContract[]>;
7
- private scanFile;
8
7
  private dedupe;
9
8
  }
@@ -1,223 +1,86 @@
1
- import * as fs from 'node:fs';
2
- import * as path from 'node:path';
3
1
  import { glob } from 'glob';
4
- function readSafe(repoPath, rel) {
5
- const abs = path.resolve(repoPath, rel);
6
- const base = path.resolve(repoPath);
7
- const relToBase = path.relative(base, abs);
8
- if (relToBase.startsWith('..') || path.isAbsolute(relToBase))
9
- return null;
10
- try {
11
- return fs.readFileSync(abs, 'utf-8');
12
- }
13
- catch {
14
- return null;
15
- }
16
- }
17
- function makeContract(topicName, role, filePath, symbolName, confidence, broker) {
2
+ import Parser from 'tree-sitter';
3
+ import { readSafe } from './fs-utils.js';
4
+ import { scanFile, unquoteLiteral } from './tree-sitter-scanner.js';
5
+ import { TOPIC_SCAN_GLOB, getProviderForFile, } from './topic-patterns/index.js';
6
+ /**
7
+ * Language-agnostic orchestrator for topic (message broker) contract
8
+ * extraction. All grammar-specific knowledge lives in `topic-patterns/*`
9
+ * — this file must not import any tree-sitter grammar directly.
10
+ *
11
+ * Flow per file:
12
+ * 1. `getProviderForFile(rel)` → compiled plugin (or `undefined` if the
13
+ * file's extension isn't registered, in which case we skip it).
14
+ * 2. `scanFile(parser, provider, content)` → list of `{meta, valueText}`
15
+ * pairs, one per matched literal.
16
+ * 3. `unquoteLiteral(valueText)` → the raw topic string.
17
+ * 4. `makeContract(topic, meta, relPath)` → `ExtractedContract`.
18
+ *
19
+ * Adding a new language is a one-file edit in `topic-patterns/index.ts`.
20
+ */
21
+ function makeContract(topicName, meta, filePath) {
18
22
  return {
19
23
  contractId: `topic::${topicName}`,
20
24
  type: 'topic',
21
- role,
25
+ role: meta.role,
22
26
  symbolUid: '',
23
- symbolRef: { filePath: filePath.replace(/\\/g, '/'), name: symbolName },
24
- symbolName,
25
- confidence,
27
+ symbolRef: { filePath: filePath.replace(/\\/g, '/'), name: meta.symbolName },
28
+ symbolName: meta.symbolName,
29
+ confidence: meta.confidence,
26
30
  meta: {
27
- broker,
31
+ broker: meta.broker,
28
32
  topicName,
29
- extractionStrategy: 'source_scan',
33
+ extractionStrategy: 'tree_sitter',
30
34
  },
31
35
  };
32
36
  }
33
- // --- Kafka patterns ---
34
- const KAFKA_PATTERNS = [
35
- // Java: @KafkaListener(topics = "xxx")
36
- {
37
- regex: /@KafkaListener\s*\(\s*topics\s*=\s*"([^"]+)"/g,
38
- role: 'consumer',
39
- broker: 'kafka',
40
- confidence: 0.8,
41
- topicGroup: 1,
42
- symbolName: 'kafkaListener',
43
- },
44
- // Java: kafkaTemplate.send("xxx"
45
- {
46
- regex: /kafkaTemplate\.send\s*\(\s*"([^"]+)"/gi,
47
- role: 'provider',
48
- broker: 'kafka',
49
- confidence: 0.8,
50
- topicGroup: 1,
51
- symbolName: 'kafkaTemplate.send',
52
- },
53
- // Node: producer.send({ topic: 'xxx'
54
- {
55
- regex: /producer\.send\s*\(\s*\{\s*topic:\s*['"]([^'"]+)['"]/g,
56
- role: 'provider',
57
- broker: 'kafka',
58
- confidence: 0.8,
59
- topicGroup: 1,
60
- symbolName: 'producer.send',
61
- },
62
- // Node: consumer.subscribe({ topic: 'xxx'
63
- {
64
- regex: /consumer\.subscribe\s*\(\s*\{\s*topic:\s*['"]([^'"]+)['"]/g,
65
- role: 'consumer',
66
- broker: 'kafka',
67
- confidence: 0.8,
68
- topicGroup: 1,
69
- symbolName: 'consumer.subscribe',
70
- },
71
- // Go: consumer.ConsumePartition("xxx"
72
- {
73
- regex: /\.ConsumePartition\s*\(\s*"([^"]+)"/g,
74
- role: 'consumer',
75
- broker: 'kafka',
76
- confidence: 0.7,
77
- topicGroup: 1,
78
- symbolName: 'ConsumePartition',
79
- },
80
- // Python: KafkaConsumer('xxx'
81
- {
82
- regex: /KafkaConsumer\s*\(\s*['"]([^'"]+)['"]/g,
83
- role: 'consumer',
84
- broker: 'kafka',
85
- confidence: 0.7,
86
- topicGroup: 1,
87
- symbolName: 'KafkaConsumer',
88
- },
89
- // Python: producer.send('xxx' or producer.produce('xxx'
90
- {
91
- regex: /producer\.(?:send|produce)\s*\(\s*['"]([^'"]+)['"]/g,
92
- role: 'provider',
93
- broker: 'kafka',
94
- confidence: 0.7,
95
- topicGroup: 1,
96
- symbolName: 'producer.send',
97
- },
98
- ];
99
- // --- RabbitMQ patterns ---
100
- const RABBITMQ_PATTERNS = [
101
- // Java: @RabbitListener(queues = "xxx")
102
- {
103
- regex: /@RabbitListener\s*\(\s*queues\s*=\s*"([^"]+)"/g,
104
- role: 'consumer',
105
- broker: 'rabbitmq',
106
- confidence: 0.8,
107
- topicGroup: 1,
108
- symbolName: 'rabbitListener',
109
- },
110
- // Java: rabbitTemplate.convertAndSend("xxx"
111
- {
112
- regex: /rabbitTemplate\.convertAndSend\s*\(\s*"([^"]+)"/gi,
113
- role: 'provider',
114
- broker: 'rabbitmq',
115
- confidence: 0.8,
116
- topicGroup: 1,
117
- symbolName: 'rabbitTemplate.convertAndSend',
118
- },
119
- // Node: channel.consume("xxx"
120
- {
121
- regex: /channel\.consume\s*\(\s*"([^"]+)"/g,
122
- role: 'consumer',
123
- broker: 'rabbitmq',
124
- confidence: 0.8,
125
- topicGroup: 1,
126
- symbolName: 'channel.consume',
127
- },
128
- // Node: channel.publish("xxx"
129
- {
130
- regex: /channel\.publish\s*\(\s*"([^"]+)"/g,
131
- role: 'provider',
132
- broker: 'rabbitmq',
133
- confidence: 0.8,
134
- topicGroup: 1,
135
- symbolName: 'channel.publish',
136
- },
137
- // Node: channel.sendToQueue("xxx"
138
- {
139
- regex: /channel\.sendToQueue\s*\(\s*"([^"]+)"/g,
140
- role: 'provider',
141
- broker: 'rabbitmq',
142
- confidence: 0.8,
143
- topicGroup: 1,
144
- symbolName: 'channel.sendToQueue',
145
- },
146
- // Python: channel.basic_consume(queue='xxx'
147
- {
148
- regex: /channel\.basic_consume\s*\(\s*queue\s*=\s*['"]([^'"]+)['"]/g,
149
- role: 'consumer',
150
- broker: 'rabbitmq',
151
- confidence: 0.7,
152
- topicGroup: 1,
153
- symbolName: 'basic_consume',
154
- },
155
- // Python: channel.basic_publish(exchange='xxx'
156
- {
157
- regex: /channel\.basic_publish\s*\([^)]*exchange\s*=\s*['"]([^'"]+)['"]/g,
158
- role: 'provider',
159
- broker: 'rabbitmq',
160
- confidence: 0.7,
161
- topicGroup: 1,
162
- symbolName: 'basic_publish',
163
- },
164
- ];
165
- // --- NATS patterns ---
166
- const NATS_PATTERNS = [
167
- // Go/Node: nc.Subscribe("xxx" or nc.subscribe("xxx"
168
- {
169
- regex: /nc\.(?:S|s)ubscribe\s*\(\s*"([^"]+)"/g,
170
- role: 'consumer',
171
- broker: 'nats',
172
- confidence: 0.8,
173
- topicGroup: 1,
174
- symbolName: 'nc.Subscribe',
175
- },
176
- // Go/Node: nc.Publish("xxx" or nc.publish("xxx"
177
- {
178
- regex: /nc\.(?:P|p)ublish\s*\(\s*"([^"]+)"/g,
179
- role: 'provider',
180
- broker: 'nats',
181
- confidence: 0.8,
182
- topicGroup: 1,
183
- symbolName: 'nc.Publish',
184
- },
185
- ];
186
- const ALL_PATTERNS = [...KAFKA_PATTERNS, ...RABBITMQ_PATTERNS, ...NATS_PATTERNS];
187
37
  export class TopicExtractor {
188
38
  type = 'topic';
189
39
  async canExtract(_repo) {
190
40
  return true;
191
41
  }
192
42
  async extract(_dbExecutor, repoPath, _repo) {
193
- const files = await glob('**/*.{ts,tsx,js,jsx,java,go,py}', {
43
+ const files = await glob(TOPIC_SCAN_GLOB, {
194
44
  cwd: repoPath,
195
- ignore: ['**/node_modules/**', '**/.git/**', '**/vendor/**', '**/dist/**', '**/build/**'],
45
+ ignore: [
46
+ '**/node_modules/**',
47
+ '**/.git/**',
48
+ '**/vendor/**',
49
+ '**/dist/**',
50
+ '**/build/**',
51
+ // Language-level test file conventions. Go test files
52
+ // `*_test.go` live next to source; other languages either use
53
+ // separate test directories (Python's `tests/`, Java's
54
+ // `src/test/`) or are already covered by the dist/build ignores.
55
+ // Pushed to the glob level so the orchestrator stays
56
+ // language-agnostic.
57
+ '**/*_test.go',
58
+ ],
196
59
  nodir: true,
197
60
  });
61
+ // One parser reused across files; the scanner calls `setLanguage` per
62
+ // file based on which plugin the registry returns.
63
+ const parser = new Parser();
198
64
  const out = [];
199
65
  for (const rel of files) {
66
+ const provider = getProviderForFile(rel);
67
+ if (!provider)
68
+ continue;
200
69
  const content = readSafe(repoPath, rel);
201
70
  if (!content)
202
71
  continue;
203
- out.push(...this.scanFile(content, rel));
204
- }
205
- return this.dedupe(out);
206
- }
207
- scanFile(content, filePath) {
208
- const out = [];
209
- for (const pattern of ALL_PATTERNS) {
210
- // Reset regex state for each file
211
- const re = new RegExp(pattern.regex.source, pattern.regex.flags);
212
- let m;
213
- while ((m = re.exec(content)) !== null) {
214
- const topicName = m[pattern.topicGroup];
72
+ const matches = scanFile(parser, provider, content);
73
+ for (const match of matches) {
74
+ const valueNode = match.captures.value;
75
+ if (!valueNode)
76
+ continue;
77
+ const topicName = unquoteLiteral(valueNode.text);
215
78
  if (!topicName)
216
79
  continue;
217
- out.push(makeContract(topicName, pattern.role, filePath, pattern.symbolName, pattern.confidence, pattern.broker));
80
+ out.push(makeContract(topicName, match.meta, rel));
218
81
  }
219
82
  }
220
- return out;
83
+ return this.dedupe(out);
221
84
  }
222
85
  dedupe(items) {
223
86
  const seen = new Set();
@@ -0,0 +1,2 @@
1
+ import type { TopicMeta } from './types.js';
2
+ export declare const GO_TOPIC_PROVIDER: import("../tree-sitter-scanner.js").CompiledPatterns<TopicMeta>;