npm - gitnexus - Versions diffs - 1.6.4-rc.43 → 1.6.4-rc.45 - Mend

gitnexus 1.6.4-rc.43 → 1.6.4-rc.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/core/embeddings/embedder.js +6 -6
package/dist/core/embeddings/hf-env.d.ts +46 -0
package/dist/core/embeddings/hf-env.js +46 -0
package/dist/mcp/core/embedder.js +6 -7
package/dist/mcp/server.js +1 -0
package/dist/mcp/tools.d.ts +2 -0
package/dist/mcp/tools.js +33 -0
package/package.json +1 -1

package/dist/core/embeddings/embedder.js CHANGED Viewed

@@ -13,7 +13,6 @@ if (!process.env.ORT_LOG_LEVEL) {
     process.env.ORT_LOG_LEVEL = '3';
 }
 import { pipeline, env } from '@huggingface/transformers';
-import os from 'os';
 import { existsSync } from 'fs';
 import { execFileSync } from 'child_process';
 import { join, dirname } from 'path';
@@ -21,6 +20,7 @@ import { createRequire } from 'module';
 import { DEFAULT_EMBEDDING_CONFIG } from './types.js';
 import { isHttpMode, getHttpDimensions, httpEmbed } from './http-client.js';
 import { resolveEmbeddingConfig } from './config.js';
+import { applyHfEnvOverrides } from './hf-env.js';
 /**
  * Check whether the onnxruntime-node package that @huggingface/transformers
  * will actually load at runtime ships the CUDA execution provider.
@@ -132,11 +132,11 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
         try {
             // Configure transformers.js environment
             env.allowLocalModels = false;
-            // Default cache to user-writable location. transformers.js defaults to
-            // ./node_modules/.cache inside its own install dir, which is unwritable
-            // when gitnexus is installed globally (e.g. /usr/lib/node_modules/).
-            // Respect HF_HOME if set, otherwise fall back to ~/.cache/huggingface.
-            env.cacheDir = process.env.HF_HOME ?? join(os.homedir(), '.cache', 'huggingface');
+            // Bridge user-controlled env vars to transformers.js: HF_HOME →
+            // env.cacheDir, HF_ENDPOINT → env.remoteHost (#1205). Centralised in
+            // applyHfEnvOverrides so the MCP embedder entry point behaves
+            // identically.
+            applyHfEnvOverrides(env);
             const isDev = process.env.NODE_ENV === 'development';
             if (isDev) {
                 console.log(`🧠 Loading embedding model: ${finalConfig.modelId}`);

package/dist/core/embeddings/hf-env.d.ts ADDED Viewed

@@ -0,0 +1,46 @@
+/**
+ * @internal Exported only for unit tests and the two embedder entry points
+ * (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
+ * public package API.
+ *
+ * Minimal subset of `@huggingface/transformers`' `env` object that gitnexus
+ * mutates. Defining a local structural type keeps this helper free of a
+ * transitive dependency on transformers' generated `.d.ts` while still
+ * giving full type-checking on the two fields we actually touch.
+ */
+export interface HfEnvSubset {
+    cacheDir: string;
+    remoteHost: string;
+}
+/**
+ * @internal Exported only for unit tests and the two embedder entry points
+ * (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
+ * public package API.
+ *
+ * Apply user-controlled HuggingFace environment overrides to the
+ * `@huggingface/transformers` `env` object. Centralises the two env-var
+ * bridges so every gitnexus embedder entry point (the analyze pipeline
+ * and the MCP server) behaves identically.
+ *
+ * - **`HF_HOME`** → `env.cacheDir` (default: `~/.cache/huggingface`).
+ *   transformers.js otherwise defaults to `./node_modules/.cache` inside
+ *   its own install dir, which is unwritable when gitnexus is installed
+ *   globally (e.g. `/usr/lib/node_modules/`).
+ *
+ * - **`HF_ENDPOINT`** → `env.remoteHost` (#1205). transformers.js does
+ *   not read `HF_ENDPOINT` on its own — it reads `env.remoteHost` —
+ *   even though `HF_ENDPOINT` is the standard env var the upstream
+ *   `huggingface_hub` Python client and the official HF mirror docs
+ *   tell users to set. Bridging the two unblocks `--embeddings` for
+ *   users behind networks where `huggingface.co` is unreachable
+ *   (corporate proxies, the GFW, air-gapped mirrors). The trailing
+ *   slash is normalised because transformers.js builds URLs by string
+ *   concatenation and a missing slash silently falls through to its
+ *   default `huggingface.co/...` host.
+ *
+ * Mutation rather than return-and-apply because callers already hold a
+ * reference to the live `env` object imported from
+ * `@huggingface/transformers` — passing the same reference in keeps the
+ * call site a single line at each entry point.
+ */
+export declare function applyHfEnvOverrides(env: HfEnvSubset): void;

package/dist/core/embeddings/hf-env.js ADDED Viewed

@@ -0,0 +1,46 @@
+import os from 'node:os';
+import { join } from 'node:path';
+/**
+ * @internal Exported only for unit tests and the two embedder entry points
+ * (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
+ * public package API.
+ *
+ * Apply user-controlled HuggingFace environment overrides to the
+ * `@huggingface/transformers` `env` object. Centralises the two env-var
+ * bridges so every gitnexus embedder entry point (the analyze pipeline
+ * and the MCP server) behaves identically.
+ *
+ * - **`HF_HOME`** → `env.cacheDir` (default: `~/.cache/huggingface`).
+ *   transformers.js otherwise defaults to `./node_modules/.cache` inside
+ *   its own install dir, which is unwritable when gitnexus is installed
+ *   globally (e.g. `/usr/lib/node_modules/`).
+ *
+ * - **`HF_ENDPOINT`** → `env.remoteHost` (#1205). transformers.js does
+ *   not read `HF_ENDPOINT` on its own — it reads `env.remoteHost` —
+ *   even though `HF_ENDPOINT` is the standard env var the upstream
+ *   `huggingface_hub` Python client and the official HF mirror docs
+ *   tell users to set. Bridging the two unblocks `--embeddings` for
+ *   users behind networks where `huggingface.co` is unreachable
+ *   (corporate proxies, the GFW, air-gapped mirrors). The trailing
+ *   slash is normalised because transformers.js builds URLs by string
+ *   concatenation and a missing slash silently falls through to its
+ *   default `huggingface.co/...` host.
+ *
+ * Mutation rather than return-and-apply because callers already hold a
+ * reference to the live `env` object imported from
+ * `@huggingface/transformers` — passing the same reference in keeps the
+ * call site a single line at each entry point.
+ */
+export function applyHfEnvOverrides(env) {
+    env.cacheDir = process.env.HF_HOME ?? join(os.homedir(), '.cache', 'huggingface');
+    // `.trim()` guards against the common copy-paste failure mode of
+    // `HF_ENDPOINT="  https://hf-mirror.com  "` (leading/trailing whitespace
+    // from shell scripts or docs) — without it, a whitespace-only value
+    // would be truthy and produce an invalid `env.remoteHost = '   /'` that
+    // silently misroutes downloads. Empty string remains falsy in JS so the
+    // truthy guard already handles the unset/empty cases.
+    const endpoint = process.env.HF_ENDPOINT?.trim();
+    if (endpoint) {
+        env.remoteHost = endpoint.endsWith('/') ? endpoint : endpoint + '/';
+    }
+}

package/dist/mcp/core/embedder.js CHANGED Viewed

@@ -5,10 +5,9 @@
  * For MCP, we only need to compute query embeddings, not batch embed.
  */
 import { pipeline, env } from '@huggingface/transformers';
-import os from 'os';
-import { join } from 'path';
 import { isHttpMode, getHttpDimensions, httpEmbedQuery, } from '../../core/embeddings/http-client.js';
 import { resolveEmbeddingConfig } from '../../core/embeddings/config.js';
+import { applyHfEnvOverrides } from '../../core/embeddings/hf-env.js';
 import { silenceStdout, restoreStdout, realStderrWrite } from '../../core/lbug/pool-adapter.js';
 // Model config
 const MODEL_ID = 'Snowflake/snowflake-arctic-embed-xs';
@@ -33,11 +32,11 @@ export const initEmbedder = async () => {
     initPromise = (async () => {
         try {
             env.allowLocalModels = false;
-            // Default cache to user-writable location. transformers.js defaults to
-            // ./node_modules/.cache inside its own install dir, which is unwritable
-            // when gitnexus is installed globally (e.g. /usr/lib/node_modules/).
-            // Respect HF_HOME if set, otherwise fall back to ~/.cache/huggingface.
-            env.cacheDir = process.env.HF_HOME ?? join(os.homedir(), '.cache', 'huggingface');
+            // Bridge user-controlled env vars to transformers.js: HF_HOME →
+            // env.cacheDir, HF_ENDPOINT → env.remoteHost (#1205). Centralised in
+            // applyHfEnvOverrides so this MCP entry point behaves identically to
+            // the analyze pipeline embedder.
+            applyHfEnvOverrides(env);
             const embeddingConfig = resolveEmbeddingConfig();
             console.error('GitNexus: Loading embedding model (first search may take a moment)...');
             const devicesToTry = embeddingConfig.device === 'dml' || embeddingConfig.device === 'cuda'

package/dist/mcp/server.js CHANGED Viewed

@@ -130,6 +130,7 @@ export function createMCPServer(backend) {
             name: tool.name,
             description: tool.description,
             inputSchema: tool.inputSchema,
+            annotations: tool.annotations,
         })),
     }));
     // Handle tool calls — append next-step hints to guide agent workflow

package/dist/mcp/tools.d.ts CHANGED Viewed

@@ -4,9 +4,11 @@
  * Defines the tools that GitNexus exposes to external AI agents.
  * All tools support an optional `repo` parameter for multi-repo setups.
  */
+import type { ToolAnnotations } from '@modelcontextprotocol/sdk/types.js';
 export interface ToolDefinition {
     name: string;
     description: string;
+    annotations: ToolAnnotations;
     inputSchema: {
         type: 'object';
         properties: Record<string, {

package/dist/mcp/tools.js CHANGED Viewed

@@ -4,6 +4,24 @@
  * Defines the tools that GitNexus exposes to external AI agents.
  * All tools support an optional `repo` parameter for multi-repo setups.
  */
+const READ_ONLY_TOOL_ANNOTATIONS = {
+    readOnlyHint: true,
+    destructiveHint: false,
+    idempotentHint: true,
+    openWorldHint: false,
+};
+const QUERY_TOOL_ANNOTATIONS = {
+    readOnlyHint: true,
+    destructiveHint: false,
+    idempotentHint: true,
+    openWorldHint: true,
+};
+const DESTRUCTIVE_TOOL_ANNOTATIONS = {
+    readOnlyHint: false,
+    destructiveHint: true,
+    idempotentHint: false,
+    openWorldHint: false,
+};
 export const GITNEXUS_TOOLS = [
     {
         name: 'list_repos',
@@ -16,6 +34,7 @@ AFTER THIS: READ gitnexus://repo/{name}/context for the repo you want to work wi
 When multiple repos are indexed, you MUST specify the "repo" parameter
 on other tools (query, context, impact, etc.) to target the correct one.`,
+        annotations: READ_ONLY_TOOL_ANNOTATIONS,
         inputSchema: {
             type: 'object',
             properties: {},
@@ -40,6 +59,7 @@ Hybrid ranking: BM25 keyword + semantic vector search, ranked by Reciprocal Rank
 GROUP MODE: set "repo" to "@<groupName>" to search all member repos in that group (merged via RRF), or "@<groupName>/<groupRepoPath>" to run against a single member (same path keys as in group.yaml). If you use "@<groupName>" only, the member repo defaults to the lexicographically first key in group.yaml "repos". Prefer resources for contracts/status (see migration from legacy group_* tools).
 SERVICE: optional monorepo path prefix (POSIX-style, case-sensitive segments). When "repo" starts with "@", only processes whose symbols fall under that prefix are included. For a normal indexed repo name (no leading @), this field is currently ignored by the server.`,
+        annotations: QUERY_TOOL_ANNOTATIONS,
         inputSchema: {
             type: 'object',
             properties: {
@@ -130,6 +150,7 @@ TIPS:
 - Community = auto-detected functional area (Leiden algorithm). Properties: heuristicLabel, cohesion, symbolCount, keywords, description, enrichedBy
 - Process = execution flow trace from entry point to terminal. Properties: heuristicLabel, processType, stepCount, communities, entryPointId, terminalId
 - Use heuristicLabel (not label) for human-readable community/process names`,
+        annotations: READ_ONLY_TOOL_ANNOTATIONS,
         inputSchema: {
             type: 'object',
             properties: {
@@ -157,6 +178,7 @@ NOTE: ACCESSES edges (field read/write tracking) are included in context results
 GROUP MODE: set "repo" to "@<groupName>" to run context in each member repo (aggregated list), or "@<groupName>/<groupRepoPath>" for one member. If you use "@<groupName>" only, the member defaults to the lexicographically first key in group.yaml "repos".
 SERVICE: optional monorepo path prefix (case-sensitive path segments). When "repo" starts with "@", prefix-matches resolved symbol file paths; when a hit is outside the prefix, that member returns an empty payload for the symbol. Ignored for a normal indexed repo name.`,
+        annotations: READ_ONLY_TOOL_ANNOTATIONS,
         inputSchema: {
             type: 'object',
             properties: {
@@ -197,6 +219,7 @@ WHEN TO USE: Before committing — to understand what your changes affect. Pre-c
 AFTER THIS: Review affected processes. Use context() on high-risk symbols. READ gitnexus://repo/{name}/process/{name} for full traces.
 Returns: changed symbols, affected processes, and a risk summary.`,
+        annotations: READ_ONLY_TOOL_ANNOTATIONS,
         inputSchema: {
             type: 'object',
             properties: {
@@ -229,6 +252,7 @@ AFTER THIS: Run detect_changes() to verify no unexpected side effects.
 Each edit is tagged with confidence:
 - "graph": found via knowledge graph relationships (high confidence, safe to accept)
 - "text_search": found via regex text search (lower confidence, review carefully)`,
+        annotations: DESTRUCTIVE_TOOL_ANNOTATIONS,
         inputSchema: {
             type: 'object',
             properties: {
@@ -282,6 +306,7 @@ Confidence: 1.0 = certain, <0.8 = fuzzy match
 GROUP MODE: set "repo" to "@<groupName>" for cross-repo impact anchored at the default member (lexicographically first key in group.yaml "repos"), or "@<groupName>/<groupRepoPath>" to choose the member (same path keys as in group.yaml). Phase-1 walk runs in that member; cross-boundary fan-out uses the group bridge.
 SERVICE: optional monorepo path prefix (case-sensitive path segments). When "repo" starts with "@", scopes the local impact walk and cross-repo symbol paths to files under that prefix; ignored for a normal indexed repo name.`,
+        annotations: READ_ONLY_TOOL_ANNOTATIONS,
         inputSchema: {
             type: 'object',
             properties: {
@@ -366,6 +391,7 @@ WHEN TO USE: Understanding API consumption patterns, finding orphaned routes. Fo
 AFTER THIS: Use impact() on specific route handlers to see full blast radius.
 Returns: route nodes with their handlers, middleware wrapper chains (e.g., withAuth, withRateLimit), and consumers.`,
+        annotations: READ_ONLY_TOOL_ANNOTATIONS,
         inputSchema: {
             type: 'object',
             properties: {
@@ -388,6 +414,7 @@ Returns: route nodes with their handlers, middleware wrapper chains (e.g., withA
 WHEN TO USE: Understanding tool APIs, finding tool implementations, impact analysis for tool changes.
 Returns: tool nodes with their handler files and descriptions.`,
+        annotations: READ_ONLY_TOOL_ANNOTATIONS,
         inputSchema: {
             type: 'object',
             properties: {
@@ -405,6 +432,7 @@ WHEN TO USE: Detecting mismatches between what an API route returns and what con
 REQUIRES: Route nodes with responseKeys (extracted from .json({...}) calls during indexing).
 Returns routes that have both detected response keys AND consumers. Shows top-level keys each endpoint returns (e.g., data, pagination, error) and what keys each consumer accesses. Reports MISMATCH status when a consumer accesses keys not present in the route's response shape.`,
+        annotations: READ_ONLY_TOOL_ANNOTATIONS,
         inputSchema: {
             type: 'object',
             properties: {
@@ -429,6 +457,7 @@ WHEN TO USE: BEFORE modifying any API route handler. Shows what consumers depend
 Risk levels: LOW (0-3 consumers), MEDIUM (4-9 or any mismatches), HIGH (10+ consumers or mismatches with 4+ consumers). Mismatches with confidence "low" indicate the consumer file fetches multiple routes — property attribution is approximate.
 Returns: single route object when one match, or { routes: [...], total: N } for multiple matches. Combines route_map, shape_check, and impact data.`,
+        annotations: READ_ONLY_TOOL_ANNOTATIONS,
         inputSchema: {
             type: 'object',
             properties: {
@@ -444,6 +473,7 @@ Returns: single route object when one match, or { routes: [...], total: N } for
         description: `List all configured repository groups, or return details for one group (repos, manifest links).
 WHEN TO USE: Discover groups before group_sync. Optional "name" returns a single group's config.`,
+        annotations: READ_ONLY_TOOL_ANNOTATIONS,
         inputSchema: {
             type: 'object',
             properties: {
@@ -457,6 +487,9 @@ WHEN TO USE: Discover groups before group_sync. Optional "name" returns a single
         description: `Rebuild the Contract Registry (contracts.json) for a group: extract HTTP contracts, apply manifest links, exact-match cross-links.
 WHEN TO USE: After changing group.yaml or re-indexing member repos.`,
+        // Writes contracts.json on every call; conservatively non-idempotent
+        // even though output is deterministic for identical input.
+        annotations: DESTRUCTIVE_TOOL_ANNOTATIONS,
         inputSchema: {
             type: 'object',
             properties: {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "gitnexus",
-  "version": "1.6.4-rc.43",
+  "version": "1.6.4-rc.45",
   "description": "Graph-powered code intelligence for AI agents. Index any codebase, query via MCP or CLI.",
   "author": "Abhigyan Patwari",
   "license": "PolyForm-Noncommercial-1.0.0",