gitnexus 1.6.4-rc.43 → 1.6.4-rc.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,7 +13,6 @@ if (!process.env.ORT_LOG_LEVEL) {
13
13
  process.env.ORT_LOG_LEVEL = '3';
14
14
  }
15
15
  import { pipeline, env } from '@huggingface/transformers';
16
- import os from 'os';
17
16
  import { existsSync } from 'fs';
18
17
  import { execFileSync } from 'child_process';
19
18
  import { join, dirname } from 'path';
@@ -21,6 +20,7 @@ import { createRequire } from 'module';
21
20
  import { DEFAULT_EMBEDDING_CONFIG } from './types.js';
22
21
  import { isHttpMode, getHttpDimensions, httpEmbed } from './http-client.js';
23
22
  import { resolveEmbeddingConfig } from './config.js';
23
+ import { applyHfEnvOverrides } from './hf-env.js';
24
24
  /**
25
25
  * Check whether the onnxruntime-node package that @huggingface/transformers
26
26
  * will actually load at runtime ships the CUDA execution provider.
@@ -132,11 +132,11 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
132
132
  try {
133
133
  // Configure transformers.js environment
134
134
  env.allowLocalModels = false;
135
- // Default cache to user-writable location. transformers.js defaults to
136
- // ./node_modules/.cache inside its own install dir, which is unwritable
137
- // when gitnexus is installed globally (e.g. /usr/lib/node_modules/).
138
- // Respect HF_HOME if set, otherwise fall back to ~/.cache/huggingface.
139
- env.cacheDir = process.env.HF_HOME ?? join(os.homedir(), '.cache', 'huggingface');
135
+ // Bridge user-controlled env vars to transformers.js: HF_HOME
136
+ // env.cacheDir, HF_ENDPOINT env.remoteHost (#1205). Centralised in
137
+ // applyHfEnvOverrides so the MCP embedder entry point behaves
138
+ // identically.
139
+ applyHfEnvOverrides(env);
140
140
  const isDev = process.env.NODE_ENV === 'development';
141
141
  if (isDev) {
142
142
  console.log(`🧠 Loading embedding model: ${finalConfig.modelId}`);
@@ -0,0 +1,46 @@
1
+ /**
2
+ * @internal Exported only for unit tests and the two embedder entry points
3
+ * (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
4
+ * public package API.
5
+ *
6
+ * Minimal subset of `@huggingface/transformers`' `env` object that gitnexus
7
+ * mutates. Defining a local structural type keeps this helper free of a
8
+ * transitive dependency on transformers' generated `.d.ts` while still
9
+ * giving full type-checking on the two fields we actually touch.
10
+ */
11
+ export interface HfEnvSubset {
12
+ cacheDir: string;
13
+ remoteHost: string;
14
+ }
15
+ /**
16
+ * @internal Exported only for unit tests and the two embedder entry points
17
+ * (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
18
+ * public package API.
19
+ *
20
+ * Apply user-controlled HuggingFace environment overrides to the
21
+ * `@huggingface/transformers` `env` object. Centralises the two env-var
22
+ * bridges so every gitnexus embedder entry point (the analyze pipeline
23
+ * and the MCP server) behaves identically.
24
+ *
25
+ * - **`HF_HOME`** → `env.cacheDir` (default: `~/.cache/huggingface`).
26
+ * transformers.js otherwise defaults to `./node_modules/.cache` inside
27
+ * its own install dir, which is unwritable when gitnexus is installed
28
+ * globally (e.g. `/usr/lib/node_modules/`).
29
+ *
30
+ * - **`HF_ENDPOINT`** → `env.remoteHost` (#1205). transformers.js does
31
+ * not read `HF_ENDPOINT` on its own — it reads `env.remoteHost` —
32
+ * even though `HF_ENDPOINT` is the standard env var the upstream
33
+ * `huggingface_hub` Python client and the official HF mirror docs
34
+ * tell users to set. Bridging the two unblocks `--embeddings` for
35
+ * users behind networks where `huggingface.co` is unreachable
36
+ * (corporate proxies, the GFW, air-gapped mirrors). The trailing
37
+ * slash is normalised because transformers.js builds URLs by string
38
+ * concatenation and a missing slash silently falls through to its
39
+ * default `huggingface.co/...` host.
40
+ *
41
+ * Mutation rather than return-and-apply because callers already hold a
42
+ * reference to the live `env` object imported from
43
+ * `@huggingface/transformers` — passing the same reference in keeps the
44
+ * call site a single line at each entry point.
45
+ */
46
+ export declare function applyHfEnvOverrides(env: HfEnvSubset): void;
@@ -0,0 +1,46 @@
1
+ import os from 'node:os';
2
+ import { join } from 'node:path';
3
+ /**
4
+ * @internal Exported only for unit tests and the two embedder entry points
5
+ * (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
6
+ * public package API.
7
+ *
8
+ * Apply user-controlled HuggingFace environment overrides to the
9
+ * `@huggingface/transformers` `env` object. Centralises the two env-var
10
+ * bridges so every gitnexus embedder entry point (the analyze pipeline
11
+ * and the MCP server) behaves identically.
12
+ *
13
+ * - **`HF_HOME`** → `env.cacheDir` (default: `~/.cache/huggingface`).
14
+ * transformers.js otherwise defaults to `./node_modules/.cache` inside
15
+ * its own install dir, which is unwritable when gitnexus is installed
16
+ * globally (e.g. `/usr/lib/node_modules/`).
17
+ *
18
+ * - **`HF_ENDPOINT`** → `env.remoteHost` (#1205). transformers.js does
19
+ * not read `HF_ENDPOINT` on its own — it reads `env.remoteHost` —
20
+ * even though `HF_ENDPOINT` is the standard env var the upstream
21
+ * `huggingface_hub` Python client and the official HF mirror docs
22
+ * tell users to set. Bridging the two unblocks `--embeddings` for
23
+ * users behind networks where `huggingface.co` is unreachable
24
+ * (corporate proxies, the GFW, air-gapped mirrors). The trailing
25
+ * slash is normalised because transformers.js builds URLs by string
26
+ * concatenation and a missing slash silently falls through to its
27
+ * default `huggingface.co/...` host.
28
+ *
29
+ * Mutation rather than return-and-apply because callers already hold a
30
+ * reference to the live `env` object imported from
31
+ * `@huggingface/transformers` — passing the same reference in keeps the
32
+ * call site a single line at each entry point.
33
+ */
34
+ export function applyHfEnvOverrides(env) {
35
+ env.cacheDir = process.env.HF_HOME ?? join(os.homedir(), '.cache', 'huggingface');
36
+ // `.trim()` guards against the common copy-paste failure mode of
37
+ // `HF_ENDPOINT=" https://hf-mirror.com "` (leading/trailing whitespace
38
+ // from shell scripts or docs) — without it, a whitespace-only value
39
+ // would be truthy and produce an invalid `env.remoteHost = ' /'` that
40
+ // silently misroutes downloads. Empty string remains falsy in JS so the
41
+ // truthy guard already handles the unset/empty cases.
42
+ const endpoint = process.env.HF_ENDPOINT?.trim();
43
+ if (endpoint) {
44
+ env.remoteHost = endpoint.endsWith('/') ? endpoint : endpoint + '/';
45
+ }
46
+ }
@@ -5,10 +5,9 @@
5
5
  * For MCP, we only need to compute query embeddings, not batch embed.
6
6
  */
7
7
  import { pipeline, env } from '@huggingface/transformers';
8
- import os from 'os';
9
- import { join } from 'path';
10
8
  import { isHttpMode, getHttpDimensions, httpEmbedQuery, } from '../../core/embeddings/http-client.js';
11
9
  import { resolveEmbeddingConfig } from '../../core/embeddings/config.js';
10
+ import { applyHfEnvOverrides } from '../../core/embeddings/hf-env.js';
12
11
  import { silenceStdout, restoreStdout, realStderrWrite } from '../../core/lbug/pool-adapter.js';
13
12
  // Model config
14
13
  const MODEL_ID = 'Snowflake/snowflake-arctic-embed-xs';
@@ -33,11 +32,11 @@ export const initEmbedder = async () => {
33
32
  initPromise = (async () => {
34
33
  try {
35
34
  env.allowLocalModels = false;
36
- // Default cache to user-writable location. transformers.js defaults to
37
- // ./node_modules/.cache inside its own install dir, which is unwritable
38
- // when gitnexus is installed globally (e.g. /usr/lib/node_modules/).
39
- // Respect HF_HOME if set, otherwise fall back to ~/.cache/huggingface.
40
- env.cacheDir = process.env.HF_HOME ?? join(os.homedir(), '.cache', 'huggingface');
35
+ // Bridge user-controlled env vars to transformers.js: HF_HOME
36
+ // env.cacheDir, HF_ENDPOINT env.remoteHost (#1205). Centralised in
37
+ // applyHfEnvOverrides so this MCP entry point behaves identically to
38
+ // the analyze pipeline embedder.
39
+ applyHfEnvOverrides(env);
41
40
  const embeddingConfig = resolveEmbeddingConfig();
42
41
  console.error('GitNexus: Loading embedding model (first search may take a moment)...');
43
42
  const devicesToTry = embeddingConfig.device === 'dml' || embeddingConfig.device === 'cuda'
@@ -130,6 +130,7 @@ export function createMCPServer(backend) {
130
130
  name: tool.name,
131
131
  description: tool.description,
132
132
  inputSchema: tool.inputSchema,
133
+ annotations: tool.annotations,
133
134
  })),
134
135
  }));
135
136
  // Handle tool calls — append next-step hints to guide agent workflow
@@ -4,9 +4,11 @@
4
4
  * Defines the tools that GitNexus exposes to external AI agents.
5
5
  * All tools support an optional `repo` parameter for multi-repo setups.
6
6
  */
7
+ import type { ToolAnnotations } from '@modelcontextprotocol/sdk/types.js';
7
8
  export interface ToolDefinition {
8
9
  name: string;
9
10
  description: string;
11
+ annotations: ToolAnnotations;
10
12
  inputSchema: {
11
13
  type: 'object';
12
14
  properties: Record<string, {
package/dist/mcp/tools.js CHANGED
@@ -4,6 +4,24 @@
4
4
  * Defines the tools that GitNexus exposes to external AI agents.
5
5
  * All tools support an optional `repo` parameter for multi-repo setups.
6
6
  */
7
+ const READ_ONLY_TOOL_ANNOTATIONS = {
8
+ readOnlyHint: true,
9
+ destructiveHint: false,
10
+ idempotentHint: true,
11
+ openWorldHint: false,
12
+ };
13
+ const QUERY_TOOL_ANNOTATIONS = {
14
+ readOnlyHint: true,
15
+ destructiveHint: false,
16
+ idempotentHint: true,
17
+ openWorldHint: true,
18
+ };
19
+ const DESTRUCTIVE_TOOL_ANNOTATIONS = {
20
+ readOnlyHint: false,
21
+ destructiveHint: true,
22
+ idempotentHint: false,
23
+ openWorldHint: false,
24
+ };
7
25
  export const GITNEXUS_TOOLS = [
8
26
  {
9
27
  name: 'list_repos',
@@ -16,6 +34,7 @@ AFTER THIS: READ gitnexus://repo/{name}/context for the repo you want to work wi
16
34
 
17
35
  When multiple repos are indexed, you MUST specify the "repo" parameter
18
36
  on other tools (query, context, impact, etc.) to target the correct one.`,
37
+ annotations: READ_ONLY_TOOL_ANNOTATIONS,
19
38
  inputSchema: {
20
39
  type: 'object',
21
40
  properties: {},
@@ -40,6 +59,7 @@ Hybrid ranking: BM25 keyword + semantic vector search, ranked by Reciprocal Rank
40
59
  GROUP MODE: set "repo" to "@<groupName>" to search all member repos in that group (merged via RRF), or "@<groupName>/<groupRepoPath>" to run against a single member (same path keys as in group.yaml). If you use "@<groupName>" only, the member repo defaults to the lexicographically first key in group.yaml "repos". Prefer resources for contracts/status (see migration from legacy group_* tools).
41
60
 
42
61
  SERVICE: optional monorepo path prefix (POSIX-style, case-sensitive segments). When "repo" starts with "@", only processes whose symbols fall under that prefix are included. For a normal indexed repo name (no leading @), this field is currently ignored by the server.`,
62
+ annotations: QUERY_TOOL_ANNOTATIONS,
43
63
  inputSchema: {
44
64
  type: 'object',
45
65
  properties: {
@@ -130,6 +150,7 @@ TIPS:
130
150
  - Community = auto-detected functional area (Leiden algorithm). Properties: heuristicLabel, cohesion, symbolCount, keywords, description, enrichedBy
131
151
  - Process = execution flow trace from entry point to terminal. Properties: heuristicLabel, processType, stepCount, communities, entryPointId, terminalId
132
152
  - Use heuristicLabel (not label) for human-readable community/process names`,
153
+ annotations: READ_ONLY_TOOL_ANNOTATIONS,
133
154
  inputSchema: {
134
155
  type: 'object',
135
156
  properties: {
@@ -157,6 +178,7 @@ NOTE: ACCESSES edges (field read/write tracking) are included in context results
157
178
  GROUP MODE: set "repo" to "@<groupName>" to run context in each member repo (aggregated list), or "@<groupName>/<groupRepoPath>" for one member. If you use "@<groupName>" only, the member defaults to the lexicographically first key in group.yaml "repos".
158
179
 
159
180
  SERVICE: optional monorepo path prefix (case-sensitive path segments). When "repo" starts with "@", prefix-matches resolved symbol file paths; when a hit is outside the prefix, that member returns an empty payload for the symbol. Ignored for a normal indexed repo name.`,
181
+ annotations: READ_ONLY_TOOL_ANNOTATIONS,
160
182
  inputSchema: {
161
183
  type: 'object',
162
184
  properties: {
@@ -197,6 +219,7 @@ WHEN TO USE: Before committing — to understand what your changes affect. Pre-c
197
219
  AFTER THIS: Review affected processes. Use context() on high-risk symbols. READ gitnexus://repo/{name}/process/{name} for full traces.
198
220
 
199
221
  Returns: changed symbols, affected processes, and a risk summary.`,
222
+ annotations: READ_ONLY_TOOL_ANNOTATIONS,
200
223
  inputSchema: {
201
224
  type: 'object',
202
225
  properties: {
@@ -229,6 +252,7 @@ AFTER THIS: Run detect_changes() to verify no unexpected side effects.
229
252
  Each edit is tagged with confidence:
230
253
  - "graph": found via knowledge graph relationships (high confidence, safe to accept)
231
254
  - "text_search": found via regex text search (lower confidence, review carefully)`,
255
+ annotations: DESTRUCTIVE_TOOL_ANNOTATIONS,
232
256
  inputSchema: {
233
257
  type: 'object',
234
258
  properties: {
@@ -282,6 +306,7 @@ Confidence: 1.0 = certain, <0.8 = fuzzy match
282
306
  GROUP MODE: set "repo" to "@<groupName>" for cross-repo impact anchored at the default member (lexicographically first key in group.yaml "repos"), or "@<groupName>/<groupRepoPath>" to choose the member (same path keys as in group.yaml). Phase-1 walk runs in that member; cross-boundary fan-out uses the group bridge.
283
307
 
284
308
  SERVICE: optional monorepo path prefix (case-sensitive path segments). When "repo" starts with "@", scopes the local impact walk and cross-repo symbol paths to files under that prefix; ignored for a normal indexed repo name.`,
309
+ annotations: READ_ONLY_TOOL_ANNOTATIONS,
285
310
  inputSchema: {
286
311
  type: 'object',
287
312
  properties: {
@@ -366,6 +391,7 @@ WHEN TO USE: Understanding API consumption patterns, finding orphaned routes. Fo
366
391
  AFTER THIS: Use impact() on specific route handlers to see full blast radius.
367
392
 
368
393
  Returns: route nodes with their handlers, middleware wrapper chains (e.g., withAuth, withRateLimit), and consumers.`,
394
+ annotations: READ_ONLY_TOOL_ANNOTATIONS,
369
395
  inputSchema: {
370
396
  type: 'object',
371
397
  properties: {
@@ -388,6 +414,7 @@ Returns: route nodes with their handlers, middleware wrapper chains (e.g., withA
388
414
  WHEN TO USE: Understanding tool APIs, finding tool implementations, impact analysis for tool changes.
389
415
 
390
416
  Returns: tool nodes with their handler files and descriptions.`,
417
+ annotations: READ_ONLY_TOOL_ANNOTATIONS,
391
418
  inputSchema: {
392
419
  type: 'object',
393
420
  properties: {
@@ -405,6 +432,7 @@ WHEN TO USE: Detecting mismatches between what an API route returns and what con
405
432
  REQUIRES: Route nodes with responseKeys (extracted from .json({...}) calls during indexing).
406
433
 
407
434
  Returns routes that have both detected response keys AND consumers. Shows top-level keys each endpoint returns (e.g., data, pagination, error) and what keys each consumer accesses. Reports MISMATCH status when a consumer accesses keys not present in the route's response shape.`,
435
+ annotations: READ_ONLY_TOOL_ANNOTATIONS,
408
436
  inputSchema: {
409
437
  type: 'object',
410
438
  properties: {
@@ -429,6 +457,7 @@ WHEN TO USE: BEFORE modifying any API route handler. Shows what consumers depend
429
457
  Risk levels: LOW (0-3 consumers), MEDIUM (4-9 or any mismatches), HIGH (10+ consumers or mismatches with 4+ consumers). Mismatches with confidence "low" indicate the consumer file fetches multiple routes — property attribution is approximate.
430
458
 
431
459
  Returns: single route object when one match, or { routes: [...], total: N } for multiple matches. Combines route_map, shape_check, and impact data.`,
460
+ annotations: READ_ONLY_TOOL_ANNOTATIONS,
432
461
  inputSchema: {
433
462
  type: 'object',
434
463
  properties: {
@@ -444,6 +473,7 @@ Returns: single route object when one match, or { routes: [...], total: N } for
444
473
  description: `List all configured repository groups, or return details for one group (repos, manifest links).
445
474
 
446
475
  WHEN TO USE: Discover groups before group_sync. Optional "name" returns a single group's config.`,
476
+ annotations: READ_ONLY_TOOL_ANNOTATIONS,
447
477
  inputSchema: {
448
478
  type: 'object',
449
479
  properties: {
@@ -457,6 +487,9 @@ WHEN TO USE: Discover groups before group_sync. Optional "name" returns a single
457
487
  description: `Rebuild the Contract Registry (contracts.json) for a group: extract HTTP contracts, apply manifest links, exact-match cross-links.
458
488
 
459
489
  WHEN TO USE: After changing group.yaml or re-indexing member repos.`,
490
+ // Writes contracts.json on every call; conservatively non-idempotent
491
+ // even though output is deterministic for identical input.
492
+ annotations: DESTRUCTIVE_TOOL_ANNOTATIONS,
460
493
  inputSchema: {
461
494
  type: 'object',
462
495
  properties: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "gitnexus",
3
- "version": "1.6.4-rc.43",
3
+ "version": "1.6.4-rc.45",
4
4
  "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
5
5
  "author": "Abhigyan Patwari",
6
6
  "license": "PolyForm-Noncommercial-1.0.0",