gitnexus 1.6.4-rc.43 → 1.6.4-rc.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/core/embeddings/embedder.js +6 -6
- package/dist/core/embeddings/hf-env.d.ts +46 -0
- package/dist/core/embeddings/hf-env.js +46 -0
- package/dist/mcp/core/embedder.js +6 -7
- package/dist/mcp/server.js +1 -0
- package/dist/mcp/tools.d.ts +2 -0
- package/dist/mcp/tools.js +33 -0
- package/package.json +1 -1
|
@@ -13,7 +13,6 @@ if (!process.env.ORT_LOG_LEVEL) {
|
|
|
13
13
|
process.env.ORT_LOG_LEVEL = '3';
|
|
14
14
|
}
|
|
15
15
|
import { pipeline, env } from '@huggingface/transformers';
|
|
16
|
-
import os from 'os';
|
|
17
16
|
import { existsSync } from 'fs';
|
|
18
17
|
import { execFileSync } from 'child_process';
|
|
19
18
|
import { join, dirname } from 'path';
|
|
@@ -21,6 +20,7 @@ import { createRequire } from 'module';
|
|
|
21
20
|
import { DEFAULT_EMBEDDING_CONFIG } from './types.js';
|
|
22
21
|
import { isHttpMode, getHttpDimensions, httpEmbed } from './http-client.js';
|
|
23
22
|
import { resolveEmbeddingConfig } from './config.js';
|
|
23
|
+
import { applyHfEnvOverrides } from './hf-env.js';
|
|
24
24
|
/**
|
|
25
25
|
* Check whether the onnxruntime-node package that @huggingface/transformers
|
|
26
26
|
* will actually load at runtime ships the CUDA execution provider.
|
|
@@ -132,11 +132,11 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
|
|
|
132
132
|
try {
|
|
133
133
|
// Configure transformers.js environment
|
|
134
134
|
env.allowLocalModels = false;
|
|
135
|
-
//
|
|
136
|
-
//
|
|
137
|
-
//
|
|
138
|
-
//
|
|
139
|
-
env
|
|
135
|
+
// Bridge user-controlled env vars to transformers.js: HF_HOME →
|
|
136
|
+
// env.cacheDir, HF_ENDPOINT → env.remoteHost (#1205). Centralised in
|
|
137
|
+
// applyHfEnvOverrides so the MCP embedder entry point behaves
|
|
138
|
+
// identically.
|
|
139
|
+
applyHfEnvOverrides(env);
|
|
140
140
|
const isDev = process.env.NODE_ENV === 'development';
|
|
141
141
|
if (isDev) {
|
|
142
142
|
console.log(`🧠 Loading embedding model: ${finalConfig.modelId}`);
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @internal Exported only for unit tests and the two embedder entry points
|
|
3
|
+
* (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
|
|
4
|
+
* public package API.
|
|
5
|
+
*
|
|
6
|
+
* Minimal subset of `@huggingface/transformers`' `env` object that gitnexus
|
|
7
|
+
* mutates. Defining a local structural type keeps this helper free of a
|
|
8
|
+
* transitive dependency on transformers' generated `.d.ts` while still
|
|
9
|
+
* giving full type-checking on the two fields we actually touch.
|
|
10
|
+
*/
|
|
11
|
+
export interface HfEnvSubset {
|
|
12
|
+
cacheDir: string;
|
|
13
|
+
remoteHost: string;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* @internal Exported only for unit tests and the two embedder entry points
|
|
17
|
+
* (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
|
|
18
|
+
* public package API.
|
|
19
|
+
*
|
|
20
|
+
* Apply user-controlled HuggingFace environment overrides to the
|
|
21
|
+
* `@huggingface/transformers` `env` object. Centralises the two env-var
|
|
22
|
+
* bridges so every gitnexus embedder entry point (the analyze pipeline
|
|
23
|
+
* and the MCP server) behaves identically.
|
|
24
|
+
*
|
|
25
|
+
* - **`HF_HOME`** → `env.cacheDir` (default: `~/.cache/huggingface`).
|
|
26
|
+
* transformers.js otherwise defaults to `./node_modules/.cache` inside
|
|
27
|
+
* its own install dir, which is unwritable when gitnexus is installed
|
|
28
|
+
* globally (e.g. `/usr/lib/node_modules/`).
|
|
29
|
+
*
|
|
30
|
+
* - **`HF_ENDPOINT`** → `env.remoteHost` (#1205). transformers.js does
|
|
31
|
+
* not read `HF_ENDPOINT` on its own — it reads `env.remoteHost` —
|
|
32
|
+
* even though `HF_ENDPOINT` is the standard env var the upstream
|
|
33
|
+
* `huggingface_hub` Python client and the official HF mirror docs
|
|
34
|
+
* tell users to set. Bridging the two unblocks `--embeddings` for
|
|
35
|
+
* users behind networks where `huggingface.co` is unreachable
|
|
36
|
+
* (corporate proxies, the GFW, air-gapped mirrors). The trailing
|
|
37
|
+
* slash is normalised because transformers.js builds URLs by string
|
|
38
|
+
* concatenation and a missing slash silently falls through to its
|
|
39
|
+
* default `huggingface.co/...` host.
|
|
40
|
+
*
|
|
41
|
+
* Mutation rather than return-and-apply because callers already hold a
|
|
42
|
+
* reference to the live `env` object imported from
|
|
43
|
+
* `@huggingface/transformers` — passing the same reference in keeps the
|
|
44
|
+
* call site a single line at each entry point.
|
|
45
|
+
*/
|
|
46
|
+
export declare function applyHfEnvOverrides(env: HfEnvSubset): void;
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import os from 'node:os';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
/**
|
|
4
|
+
* @internal Exported only for unit tests and the two embedder entry points
|
|
5
|
+
* (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
|
|
6
|
+
* public package API.
|
|
7
|
+
*
|
|
8
|
+
* Apply user-controlled HuggingFace environment overrides to the
|
|
9
|
+
* `@huggingface/transformers` `env` object. Centralises the two env-var
|
|
10
|
+
* bridges so every gitnexus embedder entry point (the analyze pipeline
|
|
11
|
+
* and the MCP server) behaves identically.
|
|
12
|
+
*
|
|
13
|
+
* - **`HF_HOME`** → `env.cacheDir` (default: `~/.cache/huggingface`).
|
|
14
|
+
* transformers.js otherwise defaults to `./node_modules/.cache` inside
|
|
15
|
+
* its own install dir, which is unwritable when gitnexus is installed
|
|
16
|
+
* globally (e.g. `/usr/lib/node_modules/`).
|
|
17
|
+
*
|
|
18
|
+
* - **`HF_ENDPOINT`** → `env.remoteHost` (#1205). transformers.js does
|
|
19
|
+
* not read `HF_ENDPOINT` on its own — it reads `env.remoteHost` —
|
|
20
|
+
* even though `HF_ENDPOINT` is the standard env var the upstream
|
|
21
|
+
* `huggingface_hub` Python client and the official HF mirror docs
|
|
22
|
+
* tell users to set. Bridging the two unblocks `--embeddings` for
|
|
23
|
+
* users behind networks where `huggingface.co` is unreachable
|
|
24
|
+
* (corporate proxies, the GFW, air-gapped mirrors). The trailing
|
|
25
|
+
* slash is normalised because transformers.js builds URLs by string
|
|
26
|
+
* concatenation and a missing slash silently falls through to its
|
|
27
|
+
* default `huggingface.co/...` host.
|
|
28
|
+
*
|
|
29
|
+
* Mutation rather than return-and-apply because callers already hold a
|
|
30
|
+
* reference to the live `env` object imported from
|
|
31
|
+
* `@huggingface/transformers` — passing the same reference in keeps the
|
|
32
|
+
* call site a single line at each entry point.
|
|
33
|
+
*/
|
|
34
|
+
export function applyHfEnvOverrides(env) {
|
|
35
|
+
env.cacheDir = process.env.HF_HOME ?? join(os.homedir(), '.cache', 'huggingface');
|
|
36
|
+
// `.trim()` guards against the common copy-paste failure mode of
|
|
37
|
+
// `HF_ENDPOINT=" https://hf-mirror.com "` (leading/trailing whitespace
|
|
38
|
+
// from shell scripts or docs) — without it, a whitespace-only value
|
|
39
|
+
// would be truthy and produce an invalid `env.remoteHost = ' /'` that
|
|
40
|
+
// silently misroutes downloads. Empty string remains falsy in JS so the
|
|
41
|
+
// truthy guard already handles the unset/empty cases.
|
|
42
|
+
const endpoint = process.env.HF_ENDPOINT?.trim();
|
|
43
|
+
if (endpoint) {
|
|
44
|
+
env.remoteHost = endpoint.endsWith('/') ? endpoint : endpoint + '/';
|
|
45
|
+
}
|
|
46
|
+
}
|
|
@@ -5,10 +5,9 @@
|
|
|
5
5
|
* For MCP, we only need to compute query embeddings, not batch embed.
|
|
6
6
|
*/
|
|
7
7
|
import { pipeline, env } from '@huggingface/transformers';
|
|
8
|
-
import os from 'os';
|
|
9
|
-
import { join } from 'path';
|
|
10
8
|
import { isHttpMode, getHttpDimensions, httpEmbedQuery, } from '../../core/embeddings/http-client.js';
|
|
11
9
|
import { resolveEmbeddingConfig } from '../../core/embeddings/config.js';
|
|
10
|
+
import { applyHfEnvOverrides } from '../../core/embeddings/hf-env.js';
|
|
12
11
|
import { silenceStdout, restoreStdout, realStderrWrite } from '../../core/lbug/pool-adapter.js';
|
|
13
12
|
// Model config
|
|
14
13
|
const MODEL_ID = 'Snowflake/snowflake-arctic-embed-xs';
|
|
@@ -33,11 +32,11 @@ export const initEmbedder = async () => {
|
|
|
33
32
|
initPromise = (async () => {
|
|
34
33
|
try {
|
|
35
34
|
env.allowLocalModels = false;
|
|
36
|
-
//
|
|
37
|
-
//
|
|
38
|
-
//
|
|
39
|
-
//
|
|
40
|
-
env
|
|
35
|
+
// Bridge user-controlled env vars to transformers.js: HF_HOME →
|
|
36
|
+
// env.cacheDir, HF_ENDPOINT → env.remoteHost (#1205). Centralised in
|
|
37
|
+
// applyHfEnvOverrides so this MCP entry point behaves identically to
|
|
38
|
+
// the analyze pipeline embedder.
|
|
39
|
+
applyHfEnvOverrides(env);
|
|
41
40
|
const embeddingConfig = resolveEmbeddingConfig();
|
|
42
41
|
console.error('GitNexus: Loading embedding model (first search may take a moment)...');
|
|
43
42
|
const devicesToTry = embeddingConfig.device === 'dml' || embeddingConfig.device === 'cuda'
|
package/dist/mcp/server.js
CHANGED
|
@@ -130,6 +130,7 @@ export function createMCPServer(backend) {
|
|
|
130
130
|
name: tool.name,
|
|
131
131
|
description: tool.description,
|
|
132
132
|
inputSchema: tool.inputSchema,
|
|
133
|
+
annotations: tool.annotations,
|
|
133
134
|
})),
|
|
134
135
|
}));
|
|
135
136
|
// Handle tool calls — append next-step hints to guide agent workflow
|
package/dist/mcp/tools.d.ts
CHANGED
|
@@ -4,9 +4,11 @@
|
|
|
4
4
|
* Defines the tools that GitNexus exposes to external AI agents.
|
|
5
5
|
* All tools support an optional `repo` parameter for multi-repo setups.
|
|
6
6
|
*/
|
|
7
|
+
import type { ToolAnnotations } from '@modelcontextprotocol/sdk/types.js';
|
|
7
8
|
export interface ToolDefinition {
|
|
8
9
|
name: string;
|
|
9
10
|
description: string;
|
|
11
|
+
annotations: ToolAnnotations;
|
|
10
12
|
inputSchema: {
|
|
11
13
|
type: 'object';
|
|
12
14
|
properties: Record<string, {
|
package/dist/mcp/tools.js
CHANGED
|
@@ -4,6 +4,24 @@
|
|
|
4
4
|
* Defines the tools that GitNexus exposes to external AI agents.
|
|
5
5
|
* All tools support an optional `repo` parameter for multi-repo setups.
|
|
6
6
|
*/
|
|
7
|
+
const READ_ONLY_TOOL_ANNOTATIONS = {
|
|
8
|
+
readOnlyHint: true,
|
|
9
|
+
destructiveHint: false,
|
|
10
|
+
idempotentHint: true,
|
|
11
|
+
openWorldHint: false,
|
|
12
|
+
};
|
|
13
|
+
const QUERY_TOOL_ANNOTATIONS = {
|
|
14
|
+
readOnlyHint: true,
|
|
15
|
+
destructiveHint: false,
|
|
16
|
+
idempotentHint: true,
|
|
17
|
+
openWorldHint: true,
|
|
18
|
+
};
|
|
19
|
+
const DESTRUCTIVE_TOOL_ANNOTATIONS = {
|
|
20
|
+
readOnlyHint: false,
|
|
21
|
+
destructiveHint: true,
|
|
22
|
+
idempotentHint: false,
|
|
23
|
+
openWorldHint: false,
|
|
24
|
+
};
|
|
7
25
|
export const GITNEXUS_TOOLS = [
|
|
8
26
|
{
|
|
9
27
|
name: 'list_repos',
|
|
@@ -16,6 +34,7 @@ AFTER THIS: READ gitnexus://repo/{name}/context for the repo you want to work wi
|
|
|
16
34
|
|
|
17
35
|
When multiple repos are indexed, you MUST specify the "repo" parameter
|
|
18
36
|
on other tools (query, context, impact, etc.) to target the correct one.`,
|
|
37
|
+
annotations: READ_ONLY_TOOL_ANNOTATIONS,
|
|
19
38
|
inputSchema: {
|
|
20
39
|
type: 'object',
|
|
21
40
|
properties: {},
|
|
@@ -40,6 +59,7 @@ Hybrid ranking: BM25 keyword + semantic vector search, ranked by Reciprocal Rank
|
|
|
40
59
|
GROUP MODE: set "repo" to "@<groupName>" to search all member repos in that group (merged via RRF), or "@<groupName>/<groupRepoPath>" to run against a single member (same path keys as in group.yaml). If you use "@<groupName>" only, the member repo defaults to the lexicographically first key in group.yaml "repos". Prefer resources for contracts/status (see migration from legacy group_* tools).
|
|
41
60
|
|
|
42
61
|
SERVICE: optional monorepo path prefix (POSIX-style, case-sensitive segments). When "repo" starts with "@", only processes whose symbols fall under that prefix are included. For a normal indexed repo name (no leading @), this field is currently ignored by the server.`,
|
|
62
|
+
annotations: QUERY_TOOL_ANNOTATIONS,
|
|
43
63
|
inputSchema: {
|
|
44
64
|
type: 'object',
|
|
45
65
|
properties: {
|
|
@@ -130,6 +150,7 @@ TIPS:
|
|
|
130
150
|
- Community = auto-detected functional area (Leiden algorithm). Properties: heuristicLabel, cohesion, symbolCount, keywords, description, enrichedBy
|
|
131
151
|
- Process = execution flow trace from entry point to terminal. Properties: heuristicLabel, processType, stepCount, communities, entryPointId, terminalId
|
|
132
152
|
- Use heuristicLabel (not label) for human-readable community/process names`,
|
|
153
|
+
annotations: READ_ONLY_TOOL_ANNOTATIONS,
|
|
133
154
|
inputSchema: {
|
|
134
155
|
type: 'object',
|
|
135
156
|
properties: {
|
|
@@ -157,6 +178,7 @@ NOTE: ACCESSES edges (field read/write tracking) are included in context results
|
|
|
157
178
|
GROUP MODE: set "repo" to "@<groupName>" to run context in each member repo (aggregated list), or "@<groupName>/<groupRepoPath>" for one member. If you use "@<groupName>" only, the member defaults to the lexicographically first key in group.yaml "repos".
|
|
158
179
|
|
|
159
180
|
SERVICE: optional monorepo path prefix (case-sensitive path segments). When "repo" starts with "@", prefix-matches resolved symbol file paths; when a hit is outside the prefix, that member returns an empty payload for the symbol. Ignored for a normal indexed repo name.`,
|
|
181
|
+
annotations: READ_ONLY_TOOL_ANNOTATIONS,
|
|
160
182
|
inputSchema: {
|
|
161
183
|
type: 'object',
|
|
162
184
|
properties: {
|
|
@@ -197,6 +219,7 @@ WHEN TO USE: Before committing — to understand what your changes affect. Pre-c
|
|
|
197
219
|
AFTER THIS: Review affected processes. Use context() on high-risk symbols. READ gitnexus://repo/{name}/process/{name} for full traces.
|
|
198
220
|
|
|
199
221
|
Returns: changed symbols, affected processes, and a risk summary.`,
|
|
222
|
+
annotations: READ_ONLY_TOOL_ANNOTATIONS,
|
|
200
223
|
inputSchema: {
|
|
201
224
|
type: 'object',
|
|
202
225
|
properties: {
|
|
@@ -229,6 +252,7 @@ AFTER THIS: Run detect_changes() to verify no unexpected side effects.
|
|
|
229
252
|
Each edit is tagged with confidence:
|
|
230
253
|
- "graph": found via knowledge graph relationships (high confidence, safe to accept)
|
|
231
254
|
- "text_search": found via regex text search (lower confidence, review carefully)`,
|
|
255
|
+
annotations: DESTRUCTIVE_TOOL_ANNOTATIONS,
|
|
232
256
|
inputSchema: {
|
|
233
257
|
type: 'object',
|
|
234
258
|
properties: {
|
|
@@ -282,6 +306,7 @@ Confidence: 1.0 = certain, <0.8 = fuzzy match
|
|
|
282
306
|
GROUP MODE: set "repo" to "@<groupName>" for cross-repo impact anchored at the default member (lexicographically first key in group.yaml "repos"), or "@<groupName>/<groupRepoPath>" to choose the member (same path keys as in group.yaml). Phase-1 walk runs in that member; cross-boundary fan-out uses the group bridge.
|
|
283
307
|
|
|
284
308
|
SERVICE: optional monorepo path prefix (case-sensitive path segments). When "repo" starts with "@", scopes the local impact walk and cross-repo symbol paths to files under that prefix; ignored for a normal indexed repo name.`,
|
|
309
|
+
annotations: READ_ONLY_TOOL_ANNOTATIONS,
|
|
285
310
|
inputSchema: {
|
|
286
311
|
type: 'object',
|
|
287
312
|
properties: {
|
|
@@ -366,6 +391,7 @@ WHEN TO USE: Understanding API consumption patterns, finding orphaned routes. Fo
|
|
|
366
391
|
AFTER THIS: Use impact() on specific route handlers to see full blast radius.
|
|
367
392
|
|
|
368
393
|
Returns: route nodes with their handlers, middleware wrapper chains (e.g., withAuth, withRateLimit), and consumers.`,
|
|
394
|
+
annotations: READ_ONLY_TOOL_ANNOTATIONS,
|
|
369
395
|
inputSchema: {
|
|
370
396
|
type: 'object',
|
|
371
397
|
properties: {
|
|
@@ -388,6 +414,7 @@ Returns: route nodes with their handlers, middleware wrapper chains (e.g., withA
|
|
|
388
414
|
WHEN TO USE: Understanding tool APIs, finding tool implementations, impact analysis for tool changes.
|
|
389
415
|
|
|
390
416
|
Returns: tool nodes with their handler files and descriptions.`,
|
|
417
|
+
annotations: READ_ONLY_TOOL_ANNOTATIONS,
|
|
391
418
|
inputSchema: {
|
|
392
419
|
type: 'object',
|
|
393
420
|
properties: {
|
|
@@ -405,6 +432,7 @@ WHEN TO USE: Detecting mismatches between what an API route returns and what con
|
|
|
405
432
|
REQUIRES: Route nodes with responseKeys (extracted from .json({...}) calls during indexing).
|
|
406
433
|
|
|
407
434
|
Returns routes that have both detected response keys AND consumers. Shows top-level keys each endpoint returns (e.g., data, pagination, error) and what keys each consumer accesses. Reports MISMATCH status when a consumer accesses keys not present in the route's response shape.`,
|
|
435
|
+
annotations: READ_ONLY_TOOL_ANNOTATIONS,
|
|
408
436
|
inputSchema: {
|
|
409
437
|
type: 'object',
|
|
410
438
|
properties: {
|
|
@@ -429,6 +457,7 @@ WHEN TO USE: BEFORE modifying any API route handler. Shows what consumers depend
|
|
|
429
457
|
Risk levels: LOW (0-3 consumers), MEDIUM (4-9 or any mismatches), HIGH (10+ consumers or mismatches with 4+ consumers). Mismatches with confidence "low" indicate the consumer file fetches multiple routes — property attribution is approximate.
|
|
430
458
|
|
|
431
459
|
Returns: single route object when one match, or { routes: [...], total: N } for multiple matches. Combines route_map, shape_check, and impact data.`,
|
|
460
|
+
annotations: READ_ONLY_TOOL_ANNOTATIONS,
|
|
432
461
|
inputSchema: {
|
|
433
462
|
type: 'object',
|
|
434
463
|
properties: {
|
|
@@ -444,6 +473,7 @@ Returns: single route object when one match, or { routes: [...], total: N } for
|
|
|
444
473
|
description: `List all configured repository groups, or return details for one group (repos, manifest links).
|
|
445
474
|
|
|
446
475
|
WHEN TO USE: Discover groups before group_sync. Optional "name" returns a single group's config.`,
|
|
476
|
+
annotations: READ_ONLY_TOOL_ANNOTATIONS,
|
|
447
477
|
inputSchema: {
|
|
448
478
|
type: 'object',
|
|
449
479
|
properties: {
|
|
@@ -457,6 +487,9 @@ WHEN TO USE: Discover groups before group_sync. Optional "name" returns a single
|
|
|
457
487
|
description: `Rebuild the Contract Registry (contracts.json) for a group: extract HTTP contracts, apply manifest links, exact-match cross-links.
|
|
458
488
|
|
|
459
489
|
WHEN TO USE: After changing group.yaml or re-indexing member repos.`,
|
|
490
|
+
// Writes contracts.json on every call; conservatively non-idempotent
|
|
491
|
+
// even though output is deterministic for identical input.
|
|
492
|
+
annotations: DESTRUCTIVE_TOOL_ANNOTATIONS,
|
|
460
493
|
inputSchema: {
|
|
461
494
|
type: 'object',
|
|
462
495
|
properties: {
|
package/package.json
CHANGED