gitnexus 1.6.4-rc.41 → 1.6.4-rc.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,7 @@ const DEFAULT_DETECT = {
9
9
  topics: true,
10
10
  shared_libs: true,
11
11
  embedding_fallback: true,
12
+ workspace_deps: true,
12
13
  };
13
14
  const DEFAULT_MATCHING = {
14
15
  bm25_threshold: 0.7,
@@ -216,6 +216,17 @@ export class ManifestExtractor {
216
216
  rows = await executor(`MATCH (n:Package|Module) WHERE n.name = $contract
217
217
  RETURN n.id AS uid, n.name AS name, n.filePath AS filePath
218
218
  ORDER BY n.filePath ASC
219
+ LIMIT 1`, { contract: link.contract });
220
+ }
221
+ else if (link.type === 'custom') {
222
+ // V1: exact name-only match on code-definition nodes.
223
+ // Positive allowlist mirrors other contract types. If multiple code
224
+ // symbols share the same name, ORDER BY filePath ASC LIMIT 1 picks
225
+ // the alphabetically-first occurrence deterministically.
226
+ rows = await executor(`MATCH (n:Function|Method|Class|Interface|Struct|Enum|Trait|Constructor|TypeAlias|Impl|Macro|Union|Typedef|Property|Record|Delegate|Annotation|Template|Const|Static|CodeElement)
227
+ WHERE n.name = $contract
228
+ RETURN n.id AS uid, n.name AS name, n.filePath AS filePath
229
+ ORDER BY n.filePath ASC
219
230
  LIMIT 1`, { contract: link.contract });
220
231
  }
221
232
  else {
@@ -0,0 +1,30 @@
1
+ import type { CypherExecutor } from '../contract-extractor.js';
2
+ import type { GroupManifestLink } from '../types.js';
3
+ /**
4
+ * Discover cross-crate contracts in a Rust workspace by reading each
5
+ * member's `Cargo.toml` dependencies and scanning source files for
6
+ * `use <workspace_dep>::<Type>` imports.
7
+ *
8
+ * Emits `GroupManifestLink[]` with `type: 'custom'` that feed into the
9
+ * existing ManifestExtractor pipeline — no new matching logic needed.
10
+ *
11
+ * Designed for the group-level sync pipeline: it receives all repos in
12
+ * a group and produces cross-repo links between them.
13
+ */
14
+ interface CrateMeta {
15
+ name: string;
16
+ groupPath: string;
17
+ repoPath: string;
18
+ workspaceDeps: string[];
19
+ }
20
+ export interface RustWorkspaceResult {
21
+ links: GroupManifestLink[];
22
+ discoveredCrates: Map<string, CrateMeta>;
23
+ }
24
+ /**
25
+ * Discover cross-crate contracts across all Rust repos in a group.
26
+ *
27
+ * Returns `GroupManifestLink[]` ready to feed into `ManifestExtractor`.
28
+ */
29
+ export declare function extractRustWorkspaceLinks(repos: Record<string, string>, repoPaths: Map<string, string>, _dbExecutors?: Map<string, CypherExecutor>): Promise<RustWorkspaceResult>;
30
+ export {};
@@ -0,0 +1,214 @@
1
+ import fs from 'node:fs/promises';
2
+ import path from 'node:path';
3
+ import { shouldIgnorePath } from '../../../config/ignore-service.js';
4
+ import { loadIgnoreRules } from '../../../config/ignore-service.js';
5
+ /**
6
+ * Parse a Cargo.toml to extract the crate name and workspace dependency
7
+ * names. Uses simple line-based parsing — no TOML library needed for
8
+ * the subset we care about.
9
+ */
10
+ async function parseCrateManifest(repoPath) {
11
+ const cargoPath = path.join(repoPath, 'Cargo.toml');
12
+ let content;
13
+ try {
14
+ content = await fs.readFile(cargoPath, 'utf-8');
15
+ }
16
+ catch {
17
+ return null;
18
+ }
19
+ let name = '';
20
+ const workspaceDeps = [];
21
+ const nameMatch = content.match(/^\[package\]\s*\n(?:[^\[]*?\n)*?name\s*=\s*"([^"]+)"/m);
22
+ if (nameMatch)
23
+ name = nameMatch[1];
24
+ // Match dependencies that use workspace = true, which indicates they
25
+ // are workspace-internal deps:
26
+ // dep_name = { workspace = true }
27
+ // dep_name.workspace = true
28
+ //
29
+ // Also match plain path dependencies:
30
+ // dep_name = { path = "../other" }
31
+ const depSections = content.matchAll(/\[(dependencies|dev-dependencies|build-dependencies)\]\s*\n([\s\S]*?)(?=\n\[|$)/g);
32
+ for (const section of depSections) {
33
+ const sectionBody = section[2];
34
+ // workspace = true style
35
+ const wsMatches = sectionBody.matchAll(/^(\w[\w-]*)\s*=\s*\{[^}]*workspace\s*=\s*true[^}]*\}/gm);
36
+ for (const m of wsMatches)
37
+ workspaceDeps.push(m[1]);
38
+ // dotted workspace style: dep_name.workspace = true
39
+ const dottedMatches = sectionBody.matchAll(/^(\w[\w-]*)\.workspace\s*=\s*true/gm);
40
+ for (const m of dottedMatches)
41
+ workspaceDeps.push(m[1]);
42
+ // path = "../other" style (local path deps within workspace)
43
+ const pathMatches = sectionBody.matchAll(/^(\w[\w-]*)\s*=\s*\{[^}]*path\s*=\s*"[^"]*"[^}]*\}/gm);
44
+ for (const m of pathMatches)
45
+ workspaceDeps.push(m[1]);
46
+ }
47
+ if (!name)
48
+ return null;
49
+ return { name, workspaceDeps: [...new Set(workspaceDeps)] };
50
+ }
51
+ /**
52
+ * Scan Rust source files for `use <crate>::<path>::<Symbol>` patterns
53
+ * where <crate> is a known workspace dependency.
54
+ */
55
+ async function scanImports(repoPath, knownCrates) {
56
+ const results = [];
57
+ const normalizedCrates = new Map();
58
+ for (const c of knownCrates) {
59
+ normalizedCrates.set(c.replace(/-/g, '_'), c);
60
+ }
61
+ const sourceFiles = await findRustFiles(repoPath);
62
+ for (const relFile of sourceFiles) {
63
+ const absPath = path.join(repoPath, relFile);
64
+ let content;
65
+ try {
66
+ content = await fs.readFile(absPath, 'utf-8');
67
+ }
68
+ catch {
69
+ continue;
70
+ }
71
+ // Match patterns:
72
+ // use crate_name::Type;
73
+ // use crate_name::module::Type;
74
+ // use crate_name::{Type1, Type2};
75
+ // use crate_name::module::{Type1, Type2};
76
+ const useRegex = /^use\s+(\w+)::(.+);/gm;
77
+ let match;
78
+ while ((match = useRegex.exec(content)) !== null) {
79
+ const crateName = match[1];
80
+ const originalCrateName = normalizedCrates.get(crateName);
81
+ if (!originalCrateName)
82
+ continue;
83
+ const importPath = match[2].trim();
84
+ // Handle grouped imports: {Type1, Type2, module::Type3}
85
+ const braceMatch = importPath.match(/\{([^}]+)\}/);
86
+ if (braceMatch) {
87
+ const items = braceMatch[1].split(',').map((s) => s.trim());
88
+ for (const item of items) {
89
+ const symbolName = extractSymbolName(item);
90
+ if (symbolName && isTypeName(symbolName)) {
91
+ results.push({ crateName: originalCrateName, symbolName, filePath: relFile });
92
+ }
93
+ }
94
+ }
95
+ else {
96
+ const symbolName = extractSymbolName(importPath);
97
+ if (symbolName && isTypeName(symbolName)) {
98
+ results.push({ crateName: originalCrateName, symbolName, filePath: relFile });
99
+ }
100
+ }
101
+ }
102
+ }
103
+ return results;
104
+ }
105
+ /** Extract the final symbol name from a path like `module::submod::TypeName`. */
106
+ function extractSymbolName(importPath) {
107
+ const trimmed = importPath.trim();
108
+ if (!trimmed || trimmed === '*' || trimmed === 'self')
109
+ return null;
110
+ const parts = trimmed.split('::');
111
+ return parts[parts.length - 1].trim() || null;
112
+ }
113
+ /**
114
+ * Heuristic: in Rust, types (structs, enums, traits) are PascalCase.
115
+ * Functions and modules are snake_case. We only want types as cross-crate
116
+ * contracts — functions are too granular and modules too broad.
117
+ */
118
+ function isTypeName(name) {
119
+ return /^[A-Z][A-Za-z0-9]*$/.test(name);
120
+ }
121
+ async function findRustFiles(repoPath) {
122
+ const results = [];
123
+ const ig = await loadIgnoreRules(repoPath);
124
+ async function walk(dir, rel) {
125
+ let entries;
126
+ try {
127
+ entries = await fs.readdir(dir, { withFileTypes: true });
128
+ }
129
+ catch {
130
+ return;
131
+ }
132
+ for (const entry of entries) {
133
+ const childRel = rel ? `${rel}/${entry.name}` : entry.name;
134
+ if (entry.isDirectory()) {
135
+ if (shouldIgnorePath(childRel))
136
+ continue;
137
+ if (ig && ig.ignores(childRel + '/'))
138
+ continue;
139
+ await walk(path.join(dir, entry.name), childRel);
140
+ }
141
+ else if (entry.name.endsWith('.rs')) {
142
+ if (shouldIgnorePath(childRel))
143
+ continue;
144
+ if (ig && ig.ignores(childRel))
145
+ continue;
146
+ results.push(childRel);
147
+ }
148
+ }
149
+ }
150
+ await walk(repoPath, '');
151
+ return results;
152
+ }
153
+ /**
154
+ * Discover cross-crate contracts across all Rust repos in a group.
155
+ *
156
+ * Returns `GroupManifestLink[]` ready to feed into `ManifestExtractor`.
157
+ */
158
+ export async function extractRustWorkspaceLinks(repos, repoPaths, _dbExecutors) {
159
+ // Phase 1: Parse all Cargo.toml files to build crate registry
160
+ const cratesByName = new Map();
161
+ const cratesByGroupPath = new Map();
162
+ for (const [groupPath] of Object.entries(repos)) {
163
+ const repoPath = repoPaths.get(groupPath);
164
+ if (!repoPath)
165
+ continue;
166
+ const manifest = await parseCrateManifest(repoPath);
167
+ if (!manifest)
168
+ continue;
169
+ const meta = {
170
+ name: manifest.name,
171
+ groupPath,
172
+ repoPath,
173
+ workspaceDeps: manifest.workspaceDeps,
174
+ };
175
+ const existing = cratesByName.get(manifest.name);
176
+ if (existing) {
177
+ console.warn(`[rust-workspace-extractor] duplicate crate name "${manifest.name}" in "${groupPath}" and "${existing.groupPath}" — skipping "${groupPath}"`);
178
+ continue;
179
+ }
180
+ cratesByName.set(manifest.name, meta);
181
+ cratesByGroupPath.set(groupPath, meta);
182
+ }
183
+ // Phase 2: For each crate, identify which of its workspace deps are
184
+ // also in this group (i.e., repos we can link to)
185
+ const links = [];
186
+ const seen = new Set();
187
+ for (const [, crate] of cratesByGroupPath) {
188
+ const groupCrateDeps = crate.workspaceDeps.filter((d) => cratesByName.has(d));
189
+ if (groupCrateDeps.length === 0)
190
+ continue;
191
+ // Phase 3: Scan source files for imports from workspace deps
192
+ const knownCrates = new Set(groupCrateDeps);
193
+ const imports = await scanImports(crate.repoPath, knownCrates);
194
+ for (const imp of imports) {
195
+ const providerCrate = cratesByName.get(imp.crateName);
196
+ if (!providerCrate)
197
+ continue;
198
+ const qualifiedContract = `${imp.crateName}::${imp.symbolName}`;
199
+ const key = `${crate.groupPath}→${providerCrate.groupPath}::${qualifiedContract}`;
200
+ if (seen.has(key))
201
+ continue;
202
+ seen.add(key);
203
+ const link = {
204
+ from: providerCrate.groupPath,
205
+ to: crate.groupPath,
206
+ type: 'custom',
207
+ contract: qualifiedContract,
208
+ role: 'provider',
209
+ };
210
+ links.push(link);
211
+ }
212
+ }
213
+ return { links, discoveredCrates: cratesByGroupPath };
214
+ }
@@ -7,6 +7,7 @@ import { HttpRouteExtractor } from './extractors/http-route-extractor.js';
7
7
  import { GrpcExtractor } from './extractors/grpc-extractor.js';
8
8
  import { TopicExtractor } from './extractors/topic-extractor.js';
9
9
  import { ManifestExtractor } from './extractors/manifest-extractor.js';
10
+ import { extractRustWorkspaceLinks } from './extractors/rust-workspace-extractor.js';
10
11
  import { runExactMatch } from './matching.js';
11
12
  import { detectServiceBoundaries, assignService } from './service-boundary-detector.js';
12
13
  import { writeContractRegistry } from './storage.js';
@@ -58,12 +59,14 @@ export async function syncGroup(config, opts) {
58
59
  let autoContracts = [];
59
60
  let manifestCrossLinks = [];
60
61
  let dbExecutors;
62
+ let registryEntries;
61
63
  const eo = opts?.extractorOverride;
62
64
  if (eo && eo.length === 0) {
63
65
  autoContracts = await eo();
64
66
  }
65
67
  else {
66
- const entries = await readRegistry();
68
+ registryEntries = await readRegistry();
69
+ const entries = registryEntries;
67
70
  const resolve = opts?.resolveRepoHandle ?? defaultResolveHandle(entries);
68
71
  const httpEx = new HttpRouteExtractor();
69
72
  const grpcEx = new GrpcExtractor();
@@ -143,29 +146,47 @@ export async function syncGroup(config, opts) {
143
146
  }
144
147
  }
145
148
  }
146
- // Process manifest links declared in group.yaml.
149
+ // Auto-discover workspace dependency contracts (Rust Cargo workspaces, etc.)
150
+ // and merge them with explicit manifest links. Discovered links use the same
151
+ // ManifestExtractor pipeline as hand-written links in group.yaml.
152
+ let allLinks = [...config.links];
153
+ if (config.detect.workspace_deps) {
154
+ const repoPaths = new Map();
155
+ if (!registryEntries)
156
+ registryEntries = await readRegistry();
157
+ for (const [groupPath, regName] of Object.entries(config.repos)) {
158
+ const e = registryEntries.find((en) => en.name === regName);
159
+ if (e)
160
+ repoPaths.set(groupPath, e.path);
161
+ }
162
+ const wsResult = await extractRustWorkspaceLinks(config.repos, repoPaths, dbExecutors);
163
+ if (wsResult.links.length > 0) {
164
+ allLinks = [...allLinks, ...wsResult.links];
165
+ if (opts?.verbose) {
166
+ console.log(` workspace-deps: discovered ${wsResult.links.length} cross-crate links from ${wsResult.discoveredCrates.size} Rust crates`);
167
+ }
168
+ }
169
+ }
170
+ // Process manifest links declared in group.yaml (plus any auto-discovered).
147
171
  // ManifestExtractor is fully implemented but was never wired into this
148
172
  // pipeline — config.links were parsed and validated but silently dropped.
149
173
  // Placed after the DB try/finally: resolveSymbol falls back to synthetic
150
174
  // UIDs when dbExecutors is undefined or a pool is closed, so cross-links
151
175
  // are always generated regardless of whether real DB executors are available.
152
- if (config.links.length > 0) {
153
- // Warn about dangling links that reference repos not declared in config.repos.
154
- // They still generate cross-links via synthetic UIDs (determinism is preserved),
155
- // but the operator probably meant something that now silently does nothing useful.
176
+ if (allLinks.length > 0) {
156
177
  const knownRepos = new Set(Object.keys(config.repos));
157
- for (const link of config.links) {
178
+ for (const link of allLinks) {
158
179
  const dangling = [link.from, link.to].filter((r) => !knownRepos.has(r));
159
180
  if (dangling.length > 0) {
160
181
  console.warn(`[group/sync] manifest link ${link.type}:${link.contract} references repos not in config.repos: ${dangling.join(', ')} — cross-links will use synthetic UIDs`);
161
182
  }
162
183
  }
163
184
  const manifestEx = new ManifestExtractor();
164
- const manifestResult = await manifestEx.extractFromManifest(config.links, dbExecutors);
185
+ const manifestResult = await manifestEx.extractFromManifest(allLinks, dbExecutors);
165
186
  autoContracts.push(...manifestResult.contracts);
166
187
  manifestCrossLinks = manifestResult.crossLinks;
167
188
  if (opts?.verbose) {
168
- console.log(` manifest: ${manifestCrossLinks.length} cross-links from ${config.links.length} declared links`);
189
+ console.log(` manifest: ${manifestCrossLinks.length} cross-links from ${allLinks.length} links (${config.links.length} declared + ${allLinks.length - config.links.length} discovered)`);
169
190
  }
170
191
  }
171
192
  const { matched, unmatched } = runExactMatch(autoContracts, undefined, config.matching);
@@ -24,6 +24,7 @@ export interface DetectConfig {
24
24
  topics: boolean;
25
25
  shared_libs: boolean;
26
26
  embedding_fallback: boolean;
27
+ workspace_deps: boolean;
27
28
  }
28
29
  export interface MatchingConfig {
29
30
  bm25_threshold: number;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.4-rc.41",
3
+ "version": "1.6.4-rc.43",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",