gitnexus 1.6.4-rc.42 → 1.6.4-rc.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/embeddings/embedder.js +6 -6
- package/dist/core/embeddings/hf-env.d.ts +46 -0
- package/dist/core/embeddings/hf-env.js +46 -0
- package/dist/core/group/config-parser.js +1 -0
- package/dist/core/group/extractors/rust-workspace-extractor.d.ts +30 -0
- package/dist/core/group/extractors/rust-workspace-extractor.js +214 -0
- package/dist/core/group/sync.js +30 -9
- package/dist/core/group/types.d.ts +1 -0
- package/dist/mcp/core/embedder.js +6 -7
- package/package.json +1 -1
|
@@ -13,7 +13,6 @@ if (!process.env.ORT_LOG_LEVEL) {
|
|
|
13
13
|
process.env.ORT_LOG_LEVEL = '3';
|
|
14
14
|
}
|
|
15
15
|
import { pipeline, env } from '@huggingface/transformers';
|
|
16
|
-
import os from 'os';
|
|
17
16
|
import { existsSync } from 'fs';
|
|
18
17
|
import { execFileSync } from 'child_process';
|
|
19
18
|
import { join, dirname } from 'path';
|
|
@@ -21,6 +20,7 @@ import { createRequire } from 'module';
|
|
|
21
20
|
import { DEFAULT_EMBEDDING_CONFIG } from './types.js';
|
|
22
21
|
import { isHttpMode, getHttpDimensions, httpEmbed } from './http-client.js';
|
|
23
22
|
import { resolveEmbeddingConfig } from './config.js';
|
|
23
|
+
import { applyHfEnvOverrides } from './hf-env.js';
|
|
24
24
|
/**
|
|
25
25
|
* Check whether the onnxruntime-node package that @huggingface/transformers
|
|
26
26
|
* will actually load at runtime ships the CUDA execution provider.
|
|
@@ -132,11 +132,11 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
|
|
|
132
132
|
try {
|
|
133
133
|
// Configure transformers.js environment
|
|
134
134
|
env.allowLocalModels = false;
|
|
135
|
-
//
|
|
136
|
-
//
|
|
137
|
-
//
|
|
138
|
-
//
|
|
139
|
-
env
|
|
135
|
+
// Bridge user-controlled env vars to transformers.js: HF_HOME →
|
|
136
|
+
// env.cacheDir, HF_ENDPOINT → env.remoteHost (#1205). Centralised in
|
|
137
|
+
// applyHfEnvOverrides so the MCP embedder entry point behaves
|
|
138
|
+
// identically.
|
|
139
|
+
applyHfEnvOverrides(env);
|
|
140
140
|
const isDev = process.env.NODE_ENV === 'development';
|
|
141
141
|
if (isDev) {
|
|
142
142
|
console.log(`🧠 Loading embedding model: ${finalConfig.modelId}`);
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @internal Exported only for unit tests and the two embedder entry points
|
|
3
|
+
* (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
|
|
4
|
+
* public package API.
|
|
5
|
+
*
|
|
6
|
+
* Minimal subset of `@huggingface/transformers`' `env` object that gitnexus
|
|
7
|
+
* mutates. Defining a local structural type keeps this helper free of a
|
|
8
|
+
* transitive dependency on transformers' generated `.d.ts` while still
|
|
9
|
+
* giving full type-checking on the two fields we actually touch.
|
|
10
|
+
*/
|
|
11
|
+
export interface HfEnvSubset {
|
|
12
|
+
cacheDir: string;
|
|
13
|
+
remoteHost: string;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* @internal Exported only for unit tests and the two embedder entry points
|
|
17
|
+
* (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
|
|
18
|
+
* public package API.
|
|
19
|
+
*
|
|
20
|
+
* Apply user-controlled HuggingFace environment overrides to the
|
|
21
|
+
* `@huggingface/transformers` `env` object. Centralises the two env-var
|
|
22
|
+
* bridges so every gitnexus embedder entry point (the analyze pipeline
|
|
23
|
+
* and the MCP server) behaves identically.
|
|
24
|
+
*
|
|
25
|
+
* - **`HF_HOME`** → `env.cacheDir` (default: `~/.cache/huggingface`).
|
|
26
|
+
* transformers.js otherwise defaults to `./node_modules/.cache` inside
|
|
27
|
+
* its own install dir, which is unwritable when gitnexus is installed
|
|
28
|
+
* globally (e.g. `/usr/lib/node_modules/`).
|
|
29
|
+
*
|
|
30
|
+
* - **`HF_ENDPOINT`** → `env.remoteHost` (#1205). transformers.js does
|
|
31
|
+
* not read `HF_ENDPOINT` on its own — it reads `env.remoteHost` —
|
|
32
|
+
* even though `HF_ENDPOINT` is the standard env var the upstream
|
|
33
|
+
* `huggingface_hub` Python client and the official HF mirror docs
|
|
34
|
+
* tell users to set. Bridging the two unblocks `--embeddings` for
|
|
35
|
+
* users behind networks where `huggingface.co` is unreachable
|
|
36
|
+
* (corporate proxies, the GFW, air-gapped mirrors). The trailing
|
|
37
|
+
* slash is normalised because transformers.js builds URLs by string
|
|
38
|
+
* concatenation and a missing slash silently falls through to its
|
|
39
|
+
* default `huggingface.co/...` host.
|
|
40
|
+
*
|
|
41
|
+
* Mutation rather than return-and-apply because callers already hold a
|
|
42
|
+
* reference to the live `env` object imported from
|
|
43
|
+
* `@huggingface/transformers` — passing the same reference in keeps the
|
|
44
|
+
* call site a single line at each entry point.
|
|
45
|
+
*/
|
|
46
|
+
export declare function applyHfEnvOverrides(env: HfEnvSubset): void;
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import os from 'node:os';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
/**
|
|
4
|
+
* @internal Exported only for unit tests and the two embedder entry points
|
|
5
|
+
* (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
|
|
6
|
+
* public package API.
|
|
7
|
+
*
|
|
8
|
+
* Apply user-controlled HuggingFace environment overrides to the
|
|
9
|
+
* `@huggingface/transformers` `env` object. Centralises the two env-var
|
|
10
|
+
* bridges so every gitnexus embedder entry point (the analyze pipeline
|
|
11
|
+
* and the MCP server) behaves identically.
|
|
12
|
+
*
|
|
13
|
+
* - **`HF_HOME`** → `env.cacheDir` (default: `~/.cache/huggingface`).
|
|
14
|
+
* transformers.js otherwise defaults to `./node_modules/.cache` inside
|
|
15
|
+
* its own install dir, which is unwritable when gitnexus is installed
|
|
16
|
+
* globally (e.g. `/usr/lib/node_modules/`).
|
|
17
|
+
*
|
|
18
|
+
* - **`HF_ENDPOINT`** → `env.remoteHost` (#1205). transformers.js does
|
|
19
|
+
* not read `HF_ENDPOINT` on its own — it reads `env.remoteHost` —
|
|
20
|
+
* even though `HF_ENDPOINT` is the standard env var the upstream
|
|
21
|
+
* `huggingface_hub` Python client and the official HF mirror docs
|
|
22
|
+
* tell users to set. Bridging the two unblocks `--embeddings` for
|
|
23
|
+
* users behind networks where `huggingface.co` is unreachable
|
|
24
|
+
* (corporate proxies, the GFW, air-gapped mirrors). The trailing
|
|
25
|
+
* slash is normalised because transformers.js builds URLs by string
|
|
26
|
+
* concatenation and a missing slash silently falls through to its
|
|
27
|
+
* default `huggingface.co/...` host.
|
|
28
|
+
*
|
|
29
|
+
* Mutation rather than return-and-apply because callers already hold a
|
|
30
|
+
* reference to the live `env` object imported from
|
|
31
|
+
* `@huggingface/transformers` — passing the same reference in keeps the
|
|
32
|
+
* call site a single line at each entry point.
|
|
33
|
+
*/
|
|
34
|
+
export function applyHfEnvOverrides(env) {
|
|
35
|
+
env.cacheDir = process.env.HF_HOME ?? join(os.homedir(), '.cache', 'huggingface');
|
|
36
|
+
// `.trim()` guards against the common copy-paste failure mode of
|
|
37
|
+
// `HF_ENDPOINT=" https://hf-mirror.com "` (leading/trailing whitespace
|
|
38
|
+
// from shell scripts or docs) — without it, a whitespace-only value
|
|
39
|
+
// would be truthy and produce an invalid `env.remoteHost = ' /'` that
|
|
40
|
+
// silently misroutes downloads. Empty string remains falsy in JS so the
|
|
41
|
+
// truthy guard already handles the unset/empty cases.
|
|
42
|
+
const endpoint = process.env.HF_ENDPOINT?.trim();
|
|
43
|
+
if (endpoint) {
|
|
44
|
+
env.remoteHost = endpoint.endsWith('/') ? endpoint : endpoint + '/';
|
|
45
|
+
}
|
|
46
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import type { CypherExecutor } from '../contract-extractor.js';
|
|
2
|
+
import type { GroupManifestLink } from '../types.js';
|
|
3
|
+
/**
|
|
4
|
+
* Discover cross-crate contracts in a Rust workspace by reading each
|
|
5
|
+
* member's `Cargo.toml` dependencies and scanning source files for
|
|
6
|
+
* `use <workspace_dep>::<Type>` imports.
|
|
7
|
+
*
|
|
8
|
+
* Emits `GroupManifestLink[]` with `type: 'custom'` that feed into the
|
|
9
|
+
* existing ManifestExtractor pipeline — no new matching logic needed.
|
|
10
|
+
*
|
|
11
|
+
* Designed for the group-level sync pipeline: it receives all repos in
|
|
12
|
+
* a group and produces cross-repo links between them.
|
|
13
|
+
*/
|
|
14
|
+
interface CrateMeta {
|
|
15
|
+
name: string;
|
|
16
|
+
groupPath: string;
|
|
17
|
+
repoPath: string;
|
|
18
|
+
workspaceDeps: string[];
|
|
19
|
+
}
|
|
20
|
+
export interface RustWorkspaceResult {
|
|
21
|
+
links: GroupManifestLink[];
|
|
22
|
+
discoveredCrates: Map<string, CrateMeta>;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Discover cross-crate contracts across all Rust repos in a group.
|
|
26
|
+
*
|
|
27
|
+
* Returns `GroupManifestLink[]` ready to feed into `ManifestExtractor`.
|
|
28
|
+
*/
|
|
29
|
+
export declare function extractRustWorkspaceLinks(repos: Record<string, string>, repoPaths: Map<string, string>, _dbExecutors?: Map<string, CypherExecutor>): Promise<RustWorkspaceResult>;
|
|
30
|
+
export {};
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
import fs from 'node:fs/promises';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { shouldIgnorePath } from '../../../config/ignore-service.js';
|
|
4
|
+
import { loadIgnoreRules } from '../../../config/ignore-service.js';
|
|
5
|
+
/**
|
|
6
|
+
* Parse a Cargo.toml to extract the crate name and workspace dependency
|
|
7
|
+
* names. Uses simple line-based parsing — no TOML library needed for
|
|
8
|
+
* the subset we care about.
|
|
9
|
+
*/
|
|
10
|
+
async function parseCrateManifest(repoPath) {
|
|
11
|
+
const cargoPath = path.join(repoPath, 'Cargo.toml');
|
|
12
|
+
let content;
|
|
13
|
+
try {
|
|
14
|
+
content = await fs.readFile(cargoPath, 'utf-8');
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
return null;
|
|
18
|
+
}
|
|
19
|
+
let name = '';
|
|
20
|
+
const workspaceDeps = [];
|
|
21
|
+
const nameMatch = content.match(/^\[package\]\s*\n(?:[^\[]*?\n)*?name\s*=\s*"([^"]+)"/m);
|
|
22
|
+
if (nameMatch)
|
|
23
|
+
name = nameMatch[1];
|
|
24
|
+
// Match dependencies that use workspace = true, which indicates they
|
|
25
|
+
// are workspace-internal deps:
|
|
26
|
+
// dep_name = { workspace = true }
|
|
27
|
+
// dep_name.workspace = true
|
|
28
|
+
//
|
|
29
|
+
// Also match plain path dependencies:
|
|
30
|
+
// dep_name = { path = "../other" }
|
|
31
|
+
const depSections = content.matchAll(/\[(dependencies|dev-dependencies|build-dependencies)\]\s*\n([\s\S]*?)(?=\n\[|$)/g);
|
|
32
|
+
for (const section of depSections) {
|
|
33
|
+
const sectionBody = section[2];
|
|
34
|
+
// workspace = true style
|
|
35
|
+
const wsMatches = sectionBody.matchAll(/^(\w[\w-]*)\s*=\s*\{[^}]*workspace\s*=\s*true[^}]*\}/gm);
|
|
36
|
+
for (const m of wsMatches)
|
|
37
|
+
workspaceDeps.push(m[1]);
|
|
38
|
+
// dotted workspace style: dep_name.workspace = true
|
|
39
|
+
const dottedMatches = sectionBody.matchAll(/^(\w[\w-]*)\.workspace\s*=\s*true/gm);
|
|
40
|
+
for (const m of dottedMatches)
|
|
41
|
+
workspaceDeps.push(m[1]);
|
|
42
|
+
// path = "../other" style (local path deps within workspace)
|
|
43
|
+
const pathMatches = sectionBody.matchAll(/^(\w[\w-]*)\s*=\s*\{[^}]*path\s*=\s*"[^"]*"[^}]*\}/gm);
|
|
44
|
+
for (const m of pathMatches)
|
|
45
|
+
workspaceDeps.push(m[1]);
|
|
46
|
+
}
|
|
47
|
+
if (!name)
|
|
48
|
+
return null;
|
|
49
|
+
return { name, workspaceDeps: [...new Set(workspaceDeps)] };
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Scan Rust source files for `use <crate>::<path>::<Symbol>` patterns
|
|
53
|
+
* where <crate> is a known workspace dependency.
|
|
54
|
+
*/
|
|
55
|
+
async function scanImports(repoPath, knownCrates) {
|
|
56
|
+
const results = [];
|
|
57
|
+
const normalizedCrates = new Map();
|
|
58
|
+
for (const c of knownCrates) {
|
|
59
|
+
normalizedCrates.set(c.replace(/-/g, '_'), c);
|
|
60
|
+
}
|
|
61
|
+
const sourceFiles = await findRustFiles(repoPath);
|
|
62
|
+
for (const relFile of sourceFiles) {
|
|
63
|
+
const absPath = path.join(repoPath, relFile);
|
|
64
|
+
let content;
|
|
65
|
+
try {
|
|
66
|
+
content = await fs.readFile(absPath, 'utf-8');
|
|
67
|
+
}
|
|
68
|
+
catch {
|
|
69
|
+
continue;
|
|
70
|
+
}
|
|
71
|
+
// Match patterns:
|
|
72
|
+
// use crate_name::Type;
|
|
73
|
+
// use crate_name::module::Type;
|
|
74
|
+
// use crate_name::{Type1, Type2};
|
|
75
|
+
// use crate_name::module::{Type1, Type2};
|
|
76
|
+
const useRegex = /^use\s+(\w+)::(.+);/gm;
|
|
77
|
+
let match;
|
|
78
|
+
while ((match = useRegex.exec(content)) !== null) {
|
|
79
|
+
const crateName = match[1];
|
|
80
|
+
const originalCrateName = normalizedCrates.get(crateName);
|
|
81
|
+
if (!originalCrateName)
|
|
82
|
+
continue;
|
|
83
|
+
const importPath = match[2].trim();
|
|
84
|
+
// Handle grouped imports: {Type1, Type2, module::Type3}
|
|
85
|
+
const braceMatch = importPath.match(/\{([^}]+)\}/);
|
|
86
|
+
if (braceMatch) {
|
|
87
|
+
const items = braceMatch[1].split(',').map((s) => s.trim());
|
|
88
|
+
for (const item of items) {
|
|
89
|
+
const symbolName = extractSymbolName(item);
|
|
90
|
+
if (symbolName && isTypeName(symbolName)) {
|
|
91
|
+
results.push({ crateName: originalCrateName, symbolName, filePath: relFile });
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
const symbolName = extractSymbolName(importPath);
|
|
97
|
+
if (symbolName && isTypeName(symbolName)) {
|
|
98
|
+
results.push({ crateName: originalCrateName, symbolName, filePath: relFile });
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return results;
|
|
104
|
+
}
|
|
105
|
+
/** Extract the final symbol name from a path like `module::submod::TypeName`. */
|
|
106
|
+
function extractSymbolName(importPath) {
|
|
107
|
+
const trimmed = importPath.trim();
|
|
108
|
+
if (!trimmed || trimmed === '*' || trimmed === 'self')
|
|
109
|
+
return null;
|
|
110
|
+
const parts = trimmed.split('::');
|
|
111
|
+
return parts[parts.length - 1].trim() || null;
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Heuristic: in Rust, types (structs, enums, traits) are PascalCase.
|
|
115
|
+
* Functions and modules are snake_case. We only want types as cross-crate
|
|
116
|
+
* contracts — functions are too granular and modules too broad.
|
|
117
|
+
*/
|
|
118
|
+
function isTypeName(name) {
|
|
119
|
+
return /^[A-Z][A-Za-z0-9]*$/.test(name);
|
|
120
|
+
}
|
|
121
|
+
async function findRustFiles(repoPath) {
|
|
122
|
+
const results = [];
|
|
123
|
+
const ig = await loadIgnoreRules(repoPath);
|
|
124
|
+
async function walk(dir, rel) {
|
|
125
|
+
let entries;
|
|
126
|
+
try {
|
|
127
|
+
entries = await fs.readdir(dir, { withFileTypes: true });
|
|
128
|
+
}
|
|
129
|
+
catch {
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
for (const entry of entries) {
|
|
133
|
+
const childRel = rel ? `${rel}/${entry.name}` : entry.name;
|
|
134
|
+
if (entry.isDirectory()) {
|
|
135
|
+
if (shouldIgnorePath(childRel))
|
|
136
|
+
continue;
|
|
137
|
+
if (ig && ig.ignores(childRel + '/'))
|
|
138
|
+
continue;
|
|
139
|
+
await walk(path.join(dir, entry.name), childRel);
|
|
140
|
+
}
|
|
141
|
+
else if (entry.name.endsWith('.rs')) {
|
|
142
|
+
if (shouldIgnorePath(childRel))
|
|
143
|
+
continue;
|
|
144
|
+
if (ig && ig.ignores(childRel))
|
|
145
|
+
continue;
|
|
146
|
+
results.push(childRel);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
await walk(repoPath, '');
|
|
151
|
+
return results;
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* Discover cross-crate contracts across all Rust repos in a group.
|
|
155
|
+
*
|
|
156
|
+
* Returns `GroupManifestLink[]` ready to feed into `ManifestExtractor`.
|
|
157
|
+
*/
|
|
158
|
+
export async function extractRustWorkspaceLinks(repos, repoPaths, _dbExecutors) {
|
|
159
|
+
// Phase 1: Parse all Cargo.toml files to build crate registry
|
|
160
|
+
const cratesByName = new Map();
|
|
161
|
+
const cratesByGroupPath = new Map();
|
|
162
|
+
for (const [groupPath] of Object.entries(repos)) {
|
|
163
|
+
const repoPath = repoPaths.get(groupPath);
|
|
164
|
+
if (!repoPath)
|
|
165
|
+
continue;
|
|
166
|
+
const manifest = await parseCrateManifest(repoPath);
|
|
167
|
+
if (!manifest)
|
|
168
|
+
continue;
|
|
169
|
+
const meta = {
|
|
170
|
+
name: manifest.name,
|
|
171
|
+
groupPath,
|
|
172
|
+
repoPath,
|
|
173
|
+
workspaceDeps: manifest.workspaceDeps,
|
|
174
|
+
};
|
|
175
|
+
const existing = cratesByName.get(manifest.name);
|
|
176
|
+
if (existing) {
|
|
177
|
+
console.warn(`[rust-workspace-extractor] duplicate crate name "${manifest.name}" in "${groupPath}" and "${existing.groupPath}" — skipping "${groupPath}"`);
|
|
178
|
+
continue;
|
|
179
|
+
}
|
|
180
|
+
cratesByName.set(manifest.name, meta);
|
|
181
|
+
cratesByGroupPath.set(groupPath, meta);
|
|
182
|
+
}
|
|
183
|
+
// Phase 2: For each crate, identify which of its workspace deps are
|
|
184
|
+
// also in this group (i.e., repos we can link to)
|
|
185
|
+
const links = [];
|
|
186
|
+
const seen = new Set();
|
|
187
|
+
for (const [, crate] of cratesByGroupPath) {
|
|
188
|
+
const groupCrateDeps = crate.workspaceDeps.filter((d) => cratesByName.has(d));
|
|
189
|
+
if (groupCrateDeps.length === 0)
|
|
190
|
+
continue;
|
|
191
|
+
// Phase 3: Scan source files for imports from workspace deps
|
|
192
|
+
const knownCrates = new Set(groupCrateDeps);
|
|
193
|
+
const imports = await scanImports(crate.repoPath, knownCrates);
|
|
194
|
+
for (const imp of imports) {
|
|
195
|
+
const providerCrate = cratesByName.get(imp.crateName);
|
|
196
|
+
if (!providerCrate)
|
|
197
|
+
continue;
|
|
198
|
+
const qualifiedContract = `${imp.crateName}::${imp.symbolName}`;
|
|
199
|
+
const key = `${crate.groupPath}→${providerCrate.groupPath}::${qualifiedContract}`;
|
|
200
|
+
if (seen.has(key))
|
|
201
|
+
continue;
|
|
202
|
+
seen.add(key);
|
|
203
|
+
const link = {
|
|
204
|
+
from: providerCrate.groupPath,
|
|
205
|
+
to: crate.groupPath,
|
|
206
|
+
type: 'custom',
|
|
207
|
+
contract: qualifiedContract,
|
|
208
|
+
role: 'provider',
|
|
209
|
+
};
|
|
210
|
+
links.push(link);
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
return { links, discoveredCrates: cratesByGroupPath };
|
|
214
|
+
}
|
package/dist/core/group/sync.js
CHANGED
|
@@ -7,6 +7,7 @@ import { HttpRouteExtractor } from './extractors/http-route-extractor.js';
|
|
|
7
7
|
import { GrpcExtractor } from './extractors/grpc-extractor.js';
|
|
8
8
|
import { TopicExtractor } from './extractors/topic-extractor.js';
|
|
9
9
|
import { ManifestExtractor } from './extractors/manifest-extractor.js';
|
|
10
|
+
import { extractRustWorkspaceLinks } from './extractors/rust-workspace-extractor.js';
|
|
10
11
|
import { runExactMatch } from './matching.js';
|
|
11
12
|
import { detectServiceBoundaries, assignService } from './service-boundary-detector.js';
|
|
12
13
|
import { writeContractRegistry } from './storage.js';
|
|
@@ -58,12 +59,14 @@ export async function syncGroup(config, opts) {
|
|
|
58
59
|
let autoContracts = [];
|
|
59
60
|
let manifestCrossLinks = [];
|
|
60
61
|
let dbExecutors;
|
|
62
|
+
let registryEntries;
|
|
61
63
|
const eo = opts?.extractorOverride;
|
|
62
64
|
if (eo && eo.length === 0) {
|
|
63
65
|
autoContracts = await eo();
|
|
64
66
|
}
|
|
65
67
|
else {
|
|
66
|
-
|
|
68
|
+
registryEntries = await readRegistry();
|
|
69
|
+
const entries = registryEntries;
|
|
67
70
|
const resolve = opts?.resolveRepoHandle ?? defaultResolveHandle(entries);
|
|
68
71
|
const httpEx = new HttpRouteExtractor();
|
|
69
72
|
const grpcEx = new GrpcExtractor();
|
|
@@ -143,29 +146,47 @@ export async function syncGroup(config, opts) {
|
|
|
143
146
|
}
|
|
144
147
|
}
|
|
145
148
|
}
|
|
146
|
-
//
|
|
149
|
+
// Auto-discover workspace dependency contracts (Rust Cargo workspaces, etc.)
|
|
150
|
+
// and merge them with explicit manifest links. Discovered links use the same
|
|
151
|
+
// ManifestExtractor pipeline as hand-written links in group.yaml.
|
|
152
|
+
let allLinks = [...config.links];
|
|
153
|
+
if (config.detect.workspace_deps) {
|
|
154
|
+
const repoPaths = new Map();
|
|
155
|
+
if (!registryEntries)
|
|
156
|
+
registryEntries = await readRegistry();
|
|
157
|
+
for (const [groupPath, regName] of Object.entries(config.repos)) {
|
|
158
|
+
const e = registryEntries.find((en) => en.name === regName);
|
|
159
|
+
if (e)
|
|
160
|
+
repoPaths.set(groupPath, e.path);
|
|
161
|
+
}
|
|
162
|
+
const wsResult = await extractRustWorkspaceLinks(config.repos, repoPaths, dbExecutors);
|
|
163
|
+
if (wsResult.links.length > 0) {
|
|
164
|
+
allLinks = [...allLinks, ...wsResult.links];
|
|
165
|
+
if (opts?.verbose) {
|
|
166
|
+
console.log(` workspace-deps: discovered ${wsResult.links.length} cross-crate links from ${wsResult.discoveredCrates.size} Rust crates`);
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
// Process manifest links declared in group.yaml (plus any auto-discovered).
|
|
147
171
|
// ManifestExtractor is fully implemented but was never wired into this
|
|
148
172
|
// pipeline — config.links were parsed and validated but silently dropped.
|
|
149
173
|
// Placed after the DB try/finally: resolveSymbol falls back to synthetic
|
|
150
174
|
// UIDs when dbExecutors is undefined or a pool is closed, so cross-links
|
|
151
175
|
// are always generated regardless of whether real DB executors are available.
|
|
152
|
-
if (
|
|
153
|
-
// Warn about dangling links that reference repos not declared in config.repos.
|
|
154
|
-
// They still generate cross-links via synthetic UIDs (determinism is preserved),
|
|
155
|
-
// but the operator probably meant something that now silently does nothing useful.
|
|
176
|
+
if (allLinks.length > 0) {
|
|
156
177
|
const knownRepos = new Set(Object.keys(config.repos));
|
|
157
|
-
for (const link of
|
|
178
|
+
for (const link of allLinks) {
|
|
158
179
|
const dangling = [link.from, link.to].filter((r) => !knownRepos.has(r));
|
|
159
180
|
if (dangling.length > 0) {
|
|
160
181
|
console.warn(`[group/sync] manifest link ${link.type}:${link.contract} references repos not in config.repos: ${dangling.join(', ')} — cross-links will use synthetic UIDs`);
|
|
161
182
|
}
|
|
162
183
|
}
|
|
163
184
|
const manifestEx = new ManifestExtractor();
|
|
164
|
-
const manifestResult = await manifestEx.extractFromManifest(
|
|
185
|
+
const manifestResult = await manifestEx.extractFromManifest(allLinks, dbExecutors);
|
|
165
186
|
autoContracts.push(...manifestResult.contracts);
|
|
166
187
|
manifestCrossLinks = manifestResult.crossLinks;
|
|
167
188
|
if (opts?.verbose) {
|
|
168
|
-
console.log(` manifest: ${manifestCrossLinks.length} cross-links from ${config.links.length} declared links`);
|
|
189
|
+
console.log(` manifest: ${manifestCrossLinks.length} cross-links from ${allLinks.length} links (${config.links.length} declared + ${allLinks.length - config.links.length} discovered)`);
|
|
169
190
|
}
|
|
170
191
|
}
|
|
171
192
|
const { matched, unmatched } = runExactMatch(autoContracts, undefined, config.matching);
|
|
@@ -5,10 +5,9 @@
|
|
|
5
5
|
* For MCP, we only need to compute query embeddings, not batch embed.
|
|
6
6
|
*/
|
|
7
7
|
import { pipeline, env } from '@huggingface/transformers';
|
|
8
|
-
import os from 'os';
|
|
9
|
-
import { join } from 'path';
|
|
10
8
|
import { isHttpMode, getHttpDimensions, httpEmbedQuery, } from '../../core/embeddings/http-client.js';
|
|
11
9
|
import { resolveEmbeddingConfig } from '../../core/embeddings/config.js';
|
|
10
|
+
import { applyHfEnvOverrides } from '../../core/embeddings/hf-env.js';
|
|
12
11
|
import { silenceStdout, restoreStdout, realStderrWrite } from '../../core/lbug/pool-adapter.js';
|
|
13
12
|
// Model config
|
|
14
13
|
const MODEL_ID = 'Snowflake/snowflake-arctic-embed-xs';
|
|
@@ -33,11 +32,11 @@ export const initEmbedder = async () => {
|
|
|
33
32
|
initPromise = (async () => {
|
|
34
33
|
try {
|
|
35
34
|
env.allowLocalModels = false;
|
|
36
|
-
//
|
|
37
|
-
//
|
|
38
|
-
//
|
|
39
|
-
//
|
|
40
|
-
env
|
|
35
|
+
// Bridge user-controlled env vars to transformers.js: HF_HOME →
|
|
36
|
+
// env.cacheDir, HF_ENDPOINT → env.remoteHost (#1205). Centralised in
|
|
37
|
+
// applyHfEnvOverrides so this MCP entry point behaves identically to
|
|
38
|
+
// the analyze pipeline embedder.
|
|
39
|
+
applyHfEnvOverrides(env);
|
|
41
40
|
const embeddingConfig = resolveEmbeddingConfig();
|
|
42
41
|
console.error('GitNexus: Loading embedding model (first search may take a moment)...');
|
|
43
42
|
const devicesToTry = embeddingConfig.device === 'dml' || embeddingConfig.device === 'cuda'
|
package/package.json
CHANGED