gitnexus 1.6.4-rc.43 → 1.6.4-rc.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -13,7 +13,6 @@ if (!process.env.ORT_LOG_LEVEL) {
|
|
|
13
13
|
process.env.ORT_LOG_LEVEL = '3';
|
|
14
14
|
}
|
|
15
15
|
import { pipeline, env } from '@huggingface/transformers';
|
|
16
|
-
import os from 'os';
|
|
17
16
|
import { existsSync } from 'fs';
|
|
18
17
|
import { execFileSync } from 'child_process';
|
|
19
18
|
import { join, dirname } from 'path';
|
|
@@ -21,6 +20,7 @@ import { createRequire } from 'module';
|
|
|
21
20
|
import { DEFAULT_EMBEDDING_CONFIG } from './types.js';
|
|
22
21
|
import { isHttpMode, getHttpDimensions, httpEmbed } from './http-client.js';
|
|
23
22
|
import { resolveEmbeddingConfig } from './config.js';
|
|
23
|
+
import { applyHfEnvOverrides } from './hf-env.js';
|
|
24
24
|
/**
|
|
25
25
|
* Check whether the onnxruntime-node package that @huggingface/transformers
|
|
26
26
|
* will actually load at runtime ships the CUDA execution provider.
|
|
@@ -132,11 +132,11 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
|
|
|
132
132
|
try {
|
|
133
133
|
// Configure transformers.js environment
|
|
134
134
|
env.allowLocalModels = false;
|
|
135
|
-
//
|
|
136
|
-
//
|
|
137
|
-
//
|
|
138
|
-
//
|
|
139
|
-
env
|
|
135
|
+
// Bridge user-controlled env vars to transformers.js: HF_HOME →
|
|
136
|
+
// env.cacheDir, HF_ENDPOINT → env.remoteHost (#1205). Centralised in
|
|
137
|
+
// applyHfEnvOverrides so the MCP embedder entry point behaves
|
|
138
|
+
// identically.
|
|
139
|
+
applyHfEnvOverrides(env);
|
|
140
140
|
const isDev = process.env.NODE_ENV === 'development';
|
|
141
141
|
if (isDev) {
|
|
142
142
|
console.log(`🧠 Loading embedding model: ${finalConfig.modelId}`);
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @internal Exported only for unit tests and the two embedder entry points
|
|
3
|
+
* (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
|
|
4
|
+
* public package API.
|
|
5
|
+
*
|
|
6
|
+
* Minimal subset of `@huggingface/transformers`' `env` object that gitnexus
|
|
7
|
+
* mutates. Defining a local structural type keeps this helper free of a
|
|
8
|
+
* transitive dependency on transformers' generated `.d.ts` while still
|
|
9
|
+
* giving full type-checking on the two fields we actually touch.
|
|
10
|
+
*/
|
|
11
|
+
export interface HfEnvSubset {
|
|
12
|
+
cacheDir: string;
|
|
13
|
+
remoteHost: string;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* @internal Exported only for unit tests and the two embedder entry points
|
|
17
|
+
* (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
|
|
18
|
+
* public package API.
|
|
19
|
+
*
|
|
20
|
+
* Apply user-controlled HuggingFace environment overrides to the
|
|
21
|
+
* `@huggingface/transformers` `env` object. Centralises the two env-var
|
|
22
|
+
* bridges so every gitnexus embedder entry point (the analyze pipeline
|
|
23
|
+
* and the MCP server) behaves identically.
|
|
24
|
+
*
|
|
25
|
+
* - **`HF_HOME`** → `env.cacheDir` (default: `~/.cache/huggingface`).
|
|
26
|
+
* transformers.js otherwise defaults to `./node_modules/.cache` inside
|
|
27
|
+
* its own install dir, which is unwritable when gitnexus is installed
|
|
28
|
+
* globally (e.g. `/usr/lib/node_modules/`).
|
|
29
|
+
*
|
|
30
|
+
* - **`HF_ENDPOINT`** → `env.remoteHost` (#1205). transformers.js does
|
|
31
|
+
* not read `HF_ENDPOINT` on its own — it reads `env.remoteHost` —
|
|
32
|
+
* even though `HF_ENDPOINT` is the standard env var the upstream
|
|
33
|
+
* `huggingface_hub` Python client and the official HF mirror docs
|
|
34
|
+
* tell users to set. Bridging the two unblocks `--embeddings` for
|
|
35
|
+
* users behind networks where `huggingface.co` is unreachable
|
|
36
|
+
* (corporate proxies, the GFW, air-gapped mirrors). The trailing
|
|
37
|
+
* slash is normalised because transformers.js builds URLs by string
|
|
38
|
+
* concatenation and a missing slash silently falls through to its
|
|
39
|
+
* default `huggingface.co/...` host.
|
|
40
|
+
*
|
|
41
|
+
* Mutation rather than return-and-apply because callers already hold a
|
|
42
|
+
* reference to the live `env` object imported from
|
|
43
|
+
* `@huggingface/transformers` — passing the same reference in keeps the
|
|
44
|
+
* call site a single line at each entry point.
|
|
45
|
+
*/
|
|
46
|
+
export declare function applyHfEnvOverrides(env: HfEnvSubset): void;
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import os from 'node:os';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
/**
|
|
4
|
+
* @internal Exported only for unit tests and the two embedder entry points
|
|
5
|
+
* (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
|
|
6
|
+
* public package API.
|
|
7
|
+
*
|
|
8
|
+
* Apply user-controlled HuggingFace environment overrides to the
|
|
9
|
+
* `@huggingface/transformers` `env` object. Centralises the two env-var
|
|
10
|
+
* bridges so every gitnexus embedder entry point (the analyze pipeline
|
|
11
|
+
* and the MCP server) behaves identically.
|
|
12
|
+
*
|
|
13
|
+
* - **`HF_HOME`** → `env.cacheDir` (default: `~/.cache/huggingface`).
|
|
14
|
+
* transformers.js otherwise defaults to `./node_modules/.cache` inside
|
|
15
|
+
* its own install dir, which is unwritable when gitnexus is installed
|
|
16
|
+
* globally (e.g. `/usr/lib/node_modules/`).
|
|
17
|
+
*
|
|
18
|
+
* - **`HF_ENDPOINT`** → `env.remoteHost` (#1205). transformers.js does
|
|
19
|
+
* not read `HF_ENDPOINT` on its own — it reads `env.remoteHost` —
|
|
20
|
+
* even though `HF_ENDPOINT` is the standard env var the upstream
|
|
21
|
+
* `huggingface_hub` Python client and the official HF mirror docs
|
|
22
|
+
* tell users to set. Bridging the two unblocks `--embeddings` for
|
|
23
|
+
* users behind networks where `huggingface.co` is unreachable
|
|
24
|
+
* (corporate proxies, the GFW, air-gapped mirrors). The trailing
|
|
25
|
+
* slash is normalised because transformers.js builds URLs by string
|
|
26
|
+
* concatenation and a missing slash silently falls through to its
|
|
27
|
+
* default `huggingface.co/...` host.
|
|
28
|
+
*
|
|
29
|
+
* Mutation rather than return-and-apply because callers already hold a
|
|
30
|
+
* reference to the live `env` object imported from
|
|
31
|
+
* `@huggingface/transformers` — passing the same reference in keeps the
|
|
32
|
+
* call site a single line at each entry point.
|
|
33
|
+
*/
|
|
34
|
+
export function applyHfEnvOverrides(env) {
|
|
35
|
+
env.cacheDir = process.env.HF_HOME ?? join(os.homedir(), '.cache', 'huggingface');
|
|
36
|
+
// `.trim()` guards against the common copy-paste failure mode of
|
|
37
|
+
// `HF_ENDPOINT=" https://hf-mirror.com "` (leading/trailing whitespace
|
|
38
|
+
// from shell scripts or docs) — without it, a whitespace-only value
|
|
39
|
+
// would be truthy and produce an invalid `env.remoteHost = ' /'` that
|
|
40
|
+
// silently misroutes downloads. Empty string remains falsy in JS so the
|
|
41
|
+
// truthy guard already handles the unset/empty cases.
|
|
42
|
+
const endpoint = process.env.HF_ENDPOINT?.trim();
|
|
43
|
+
if (endpoint) {
|
|
44
|
+
env.remoteHost = endpoint.endsWith('/') ? endpoint : endpoint + '/';
|
|
45
|
+
}
|
|
46
|
+
}
|
|
@@ -5,10 +5,9 @@
|
|
|
5
5
|
* For MCP, we only need to compute query embeddings, not batch embed.
|
|
6
6
|
*/
|
|
7
7
|
import { pipeline, env } from '@huggingface/transformers';
|
|
8
|
-
import os from 'os';
|
|
9
|
-
import { join } from 'path';
|
|
10
8
|
import { isHttpMode, getHttpDimensions, httpEmbedQuery, } from '../../core/embeddings/http-client.js';
|
|
11
9
|
import { resolveEmbeddingConfig } from '../../core/embeddings/config.js';
|
|
10
|
+
import { applyHfEnvOverrides } from '../../core/embeddings/hf-env.js';
|
|
12
11
|
import { silenceStdout, restoreStdout, realStderrWrite } from '../../core/lbug/pool-adapter.js';
|
|
13
12
|
// Model config
|
|
14
13
|
const MODEL_ID = 'Snowflake/snowflake-arctic-embed-xs';
|
|
@@ -33,11 +32,11 @@ export const initEmbedder = async () => {
|
|
|
33
32
|
initPromise = (async () => {
|
|
34
33
|
try {
|
|
35
34
|
env.allowLocalModels = false;
|
|
36
|
-
//
|
|
37
|
-
//
|
|
38
|
-
//
|
|
39
|
-
//
|
|
40
|
-
env
|
|
35
|
+
// Bridge user-controlled env vars to transformers.js: HF_HOME →
|
|
36
|
+
// env.cacheDir, HF_ENDPOINT → env.remoteHost (#1205). Centralised in
|
|
37
|
+
// applyHfEnvOverrides so this MCP entry point behaves identically to
|
|
38
|
+
// the analyze pipeline embedder.
|
|
39
|
+
applyHfEnvOverrides(env);
|
|
41
40
|
const embeddingConfig = resolveEmbeddingConfig();
|
|
42
41
|
console.error('GitNexus: Loading embedding model (first search may take a moment)...');
|
|
43
42
|
const devicesToTry = embeddingConfig.device === 'dml' || embeddingConfig.device === 'cuda'
|
package/package.json
CHANGED