gitnexus 1.6.4-rc.89 → 1.6.4-rc.90
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli/analyze.js
CHANGED
|
@@ -20,6 +20,7 @@ import { warnMissingOptionalGrammars } from './optional-grammars.js';
|
|
|
20
20
|
import { glob } from 'glob';
|
|
21
21
|
import fs from 'fs/promises';
|
|
22
22
|
import { cliError } from './cli-message.js';
|
|
23
|
+
import { isHfDownloadFailure } from '../core/embeddings/hf-env.js';
|
|
23
24
|
// Capture stderr.write at module load BEFORE anything (LadybugDB native
|
|
24
25
|
// init, progress bar, console redirection) can monkey-patch it. The
|
|
25
26
|
// fatal handlers below MUST reach the user even when the analyze path
|
|
@@ -427,6 +428,22 @@ export const analyzeCommand = async (inputPath, options) => {
|
|
|
427
428
|
process.exitCode = 1;
|
|
428
429
|
return;
|
|
429
430
|
}
|
|
431
|
+
// HF download failure — show clean guidance without the raw stack trace.
|
|
432
|
+
// Checked before writeFatalToStderr so the user sees one focused message
|
|
433
|
+
// rather than a stack-trace dump followed by a second remediation block.
|
|
434
|
+
if (isHfDownloadFailure(msg) || msg.includes('Failed to download embedding model')) {
|
|
435
|
+
cliError(` The embedding model could not be downloaded.\n` +
|
|
436
|
+
` huggingface.co may be unreachable from your network\n` +
|
|
437
|
+
` (e.g. behind a corporate proxy or a regional firewall).\n` +
|
|
438
|
+
` Suggestions:\n` +
|
|
439
|
+
` 1. Set HF_ENDPOINT to a mirror and retry:\n` +
|
|
440
|
+
` HF_ENDPOINT=https://hf-mirror.com npx gitnexus analyze --embeddings\n` +
|
|
441
|
+
` (Windows: set HF_ENDPOINT=https://hf-mirror.com && npx gitnexus analyze --embeddings)\n` +
|
|
442
|
+
` 2. Check your proxy / VPN settings.\n` +
|
|
443
|
+
` 3. Once downloaded the model is cached — future runs work offline.\n`, { recoveryHint: 'hf-endpoint-unreachable' });
|
|
444
|
+
process.exitCode = 1;
|
|
445
|
+
return;
|
|
446
|
+
}
|
|
430
447
|
// Bypass the redirected console.error and write the full stack to
|
|
431
448
|
// the real stderr captured at module load. The redirected
|
|
432
449
|
// console.error wraps every line with `\\x1b[2K\\r` (ANSI clear-line)
|
|
@@ -12,7 +12,7 @@
|
|
|
12
12
|
if (!process.env.ORT_LOG_LEVEL) {
|
|
13
13
|
process.env.ORT_LOG_LEVEL = '3';
|
|
14
14
|
}
|
|
15
|
-
import { pipeline, env } from '@huggingface/transformers';
|
|
15
|
+
import { pipeline, env, } from '@huggingface/transformers';
|
|
16
16
|
import { existsSync } from 'fs';
|
|
17
17
|
import { execFileSync } from 'child_process';
|
|
18
18
|
import { join, dirname } from 'path';
|
|
@@ -20,7 +20,7 @@ import { createRequire } from 'module';
|
|
|
20
20
|
import { DEFAULT_EMBEDDING_CONFIG } from './types.js';
|
|
21
21
|
import { isHttpMode, getHttpDimensions, httpEmbed } from './http-client.js';
|
|
22
22
|
import { resolveEmbeddingConfig } from './config.js';
|
|
23
|
-
import { applyHfEnvOverrides } from './hf-env.js';
|
|
23
|
+
import { applyHfEnvOverrides, isHfDownloadFailure, withHfDownloadRetry } from './hf-env.js';
|
|
24
24
|
import { logger } from '../logger.js';
|
|
25
25
|
/**
|
|
26
26
|
* Check whether the onnxruntime-node package that @huggingface/transformers
|
|
@@ -145,11 +145,15 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
|
|
|
145
145
|
const progressCallback = onProgress
|
|
146
146
|
? (data) => {
|
|
147
147
|
const progress = {
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
148
|
+
// Map the `progress_total` aggregate event (not in ModelProgress.status)
|
|
149
|
+
// back to 'progress' so callers don't need to handle it separately.
|
|
150
|
+
status: data.status === 'progress_total'
|
|
151
|
+
? 'progress'
|
|
152
|
+
: (data.status ?? 'progress'),
|
|
153
|
+
file: 'file' in data ? data.file : undefined,
|
|
154
|
+
progress: 'progress' in data ? data.progress : undefined,
|
|
155
|
+
loaded: 'loaded' in data ? data.loaded : undefined,
|
|
156
|
+
total: 'total' in data ? data.total : undefined,
|
|
153
157
|
};
|
|
154
158
|
onProgress(progress);
|
|
155
159
|
}
|
|
@@ -173,7 +177,7 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
|
|
|
173
177
|
else if (isDev && device === 'wasm') {
|
|
174
178
|
logger.info('🔧 Using WASM backend (slower)...');
|
|
175
179
|
}
|
|
176
|
-
embedderInstance = await pipeline('feature-extraction', finalConfig.modelId, {
|
|
180
|
+
embedderInstance = await withHfDownloadRetry(() => pipeline('feature-extraction', finalConfig.modelId, {
|
|
177
181
|
device: device,
|
|
178
182
|
dtype: 'fp32',
|
|
179
183
|
progress_callback: progressCallback,
|
|
@@ -183,6 +187,10 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
|
|
|
183
187
|
interOpNumThreads: 1,
|
|
184
188
|
executionMode: 'sequential',
|
|
185
189
|
},
|
|
190
|
+
}), {
|
|
191
|
+
onRetry: isDev
|
|
192
|
+
? (attempt, max, err) => logger.warn({ attempt, max, err: err.message }, `⚠️ Model download network error (attempt ${attempt}/${max}), retrying…`)
|
|
193
|
+
: undefined,
|
|
186
194
|
});
|
|
187
195
|
currentDevice = device;
|
|
188
196
|
if (isDev) {
|
|
@@ -197,6 +205,20 @@ export const initEmbedder = async (onProgress, config = {}, forceDevice) => {
|
|
|
197
205
|
return embedderInstance;
|
|
198
206
|
}
|
|
199
207
|
catch (deviceError) {
|
|
208
|
+
// Network errors and circuit-open errors are not device-specific —
|
|
209
|
+
// they will fail the same way on every device. Rethrow immediately
|
|
210
|
+
// with actionable HF_ENDPOINT guidance rather than silently falling
|
|
211
|
+
// back to the next device.
|
|
212
|
+
const errMsg = deviceError instanceof Error ? deviceError.message : String(deviceError);
|
|
213
|
+
if (isHfDownloadFailure(errMsg)) {
|
|
214
|
+
const endpointHint = process.env.HF_ENDPOINT
|
|
215
|
+
? `The configured endpoint (${process.env.HF_ENDPOINT}) may be unreachable.`
|
|
216
|
+
: `huggingface.co may be unreachable from your network.\n` +
|
|
217
|
+
` Set HF_ENDPOINT to a mirror and retry:\n` +
|
|
218
|
+
` HF_ENDPOINT=https://hf-mirror.com npx gitnexus analyze --embeddings\n` +
|
|
219
|
+
` (Windows: set HF_ENDPOINT=https://hf-mirror.com && npx gitnexus analyze --embeddings)`;
|
|
220
|
+
throw new Error(`Failed to download embedding model: ${errMsg}\n ${endpointHint}`);
|
|
221
|
+
}
|
|
200
222
|
if (isDev && (device === 'cuda' || device === 'dml')) {
|
|
201
223
|
const gpuType = device === 'dml' ? 'DirectML' : 'CUDA';
|
|
202
224
|
logger.info(`⚠️ ${gpuType} not available, falling back to CPU...`);
|
|
@@ -1,3 +1,17 @@
|
|
|
1
|
+
/** Per-attempt timeout for the full model download (5 minutes). */
|
|
2
|
+
export declare const HF_DOWNLOAD_TIMEOUT_MS: number;
|
|
3
|
+
/** Maximum total download attempts (1 initial + N-1 retries). */
|
|
4
|
+
export declare const HF_MAX_ATTEMPTS = 3;
|
|
5
|
+
/** Initial delay between retry attempts; doubles on each subsequent retry. */
|
|
6
|
+
export declare const HF_BASE_DELAY_MS = 2000;
|
|
7
|
+
/** Number of consecutive failures required to open the circuit. */
|
|
8
|
+
export declare const CB_FAILURE_THRESHOLD = 3;
|
|
9
|
+
/** How long the circuit stays open before transitioning to half-open. */
|
|
10
|
+
export declare const CB_RESET_TIMEOUT_MS = 60000;
|
|
11
|
+
/** Upper bound clamped on the env-override per-attempt timeout (30 minutes). */
|
|
12
|
+
export declare const HF_MAX_TIMEOUT_MS: number;
|
|
13
|
+
/** Upper bound clamped on the env-override attempt count. */
|
|
14
|
+
export declare const HF_MAX_ATTEMPTS_CAP = 10;
|
|
1
15
|
/**
|
|
2
16
|
* @internal Exported only for unit tests and the two embedder entry points
|
|
3
17
|
* (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
|
|
@@ -44,3 +58,106 @@ export interface HfEnvSubset {
|
|
|
44
58
|
* call site a single line at each entry point.
|
|
45
59
|
*/
|
|
46
60
|
export declare function applyHfEnvOverrides(env: HfEnvSubset): void;
|
|
61
|
+
/**
|
|
62
|
+
* @internal Exported for unit tests and the two embedder entry points.
|
|
63
|
+
*
|
|
64
|
+
* Returns true when an error message indicates a network-level fetch failure
|
|
65
|
+
* during HuggingFace model download (e.g. `TypeError: fetch failed`,
|
|
66
|
+
* `ECONNREFUSED`, `ENOTFOUND`, `ETIMEDOUT`, `ECONNRESET`).
|
|
67
|
+
*
|
|
68
|
+
* These errors are not device-specific and cannot be fixed by falling back to
|
|
69
|
+
* a different ONNX device — the caller should rethrow immediately with
|
|
70
|
+
* guidance about `HF_ENDPOINT`.
|
|
71
|
+
*/
|
|
72
|
+
export declare function isNetworkFetchError(message: string): boolean;
|
|
73
|
+
/** @internal Used by `withHfDownloadRetry` to mark a circuit-open rejection. */
|
|
74
|
+
export declare const CIRCUIT_OPEN_TAG = "hf-circuit-open";
|
|
75
|
+
/** Circuit-breaker states. */
|
|
76
|
+
type CircuitState = 'closed' | 'open' | 'half-open';
|
|
77
|
+
/**
|
|
78
|
+
* Circuit breaker for HuggingFace model downloads.
|
|
79
|
+
*
|
|
80
|
+
* After `failureThreshold` consecutive network failures the circuit opens and
|
|
81
|
+
* all subsequent calls to `withHfDownloadRetry` fail immediately without
|
|
82
|
+
* issuing any network requests. After `resetTimeoutMs` the circuit enters the
|
|
83
|
+
* half-open state and the next call is attempted — if it succeeds the circuit
|
|
84
|
+
* closes again; if it fails the circuit re-opens.
|
|
85
|
+
*
|
|
86
|
+
* Exported for unit-testing; production code should use the module-level
|
|
87
|
+
* `hfDownloadCircuit` singleton.
|
|
88
|
+
*/
|
|
89
|
+
export declare class HfDownloadCircuitBreaker {
|
|
90
|
+
readonly failureThreshold: number;
|
|
91
|
+
readonly resetTimeoutMs: number;
|
|
92
|
+
private _state;
|
|
93
|
+
private _failures;
|
|
94
|
+
/** Timestamp of the last recorded failure (ms since epoch). */
|
|
95
|
+
lastFailureAt: number;
|
|
96
|
+
constructor(failureThreshold?: number, resetTimeoutMs?: number);
|
|
97
|
+
/** Effective state, factoring in the reset-timeout transition. */
|
|
98
|
+
get state(): CircuitState;
|
|
99
|
+
/** Returns true when the circuit is open and calls should be rejected. */
|
|
100
|
+
isOpen(): boolean;
|
|
101
|
+
/** Record a successful call — resets the failure counter and closes the circuit. */
|
|
102
|
+
recordSuccess(): void;
|
|
103
|
+
/** Record a failed call — increments the counter and opens the circuit when the threshold is reached. */
|
|
104
|
+
recordFailure(): void;
|
|
105
|
+
/** @internal Reset to initial state (used in tests). */
|
|
106
|
+
reset(): void;
|
|
107
|
+
}
|
|
108
|
+
/** Module-level singleton shared by both embedder entry points. */
|
|
109
|
+
export declare const hfDownloadCircuit: HfDownloadCircuitBreaker;
|
|
110
|
+
/** @internal Returns true for errors that should abort without retry (circuit-open). */
|
|
111
|
+
export declare function isHfCircuitOpenError(message: string): boolean;
|
|
112
|
+
/**
|
|
113
|
+
* Returns true for any HuggingFace download failure that warrants showing the
|
|
114
|
+
* `HF_ENDPOINT` remediation hint: either a raw network error or a
|
|
115
|
+
* circuit-open rejection (which itself was caused by repeated network errors).
|
|
116
|
+
*/
|
|
117
|
+
export declare function isHfDownloadFailure(message: string): boolean;
|
|
118
|
+
/** @internal Wraps `fn` in a hard time-limit. The timeout error contains
|
|
119
|
+
* `ETIMEDOUT` so that `isNetworkFetchError` classifies it correctly.
|
|
120
|
+
*/
|
|
121
|
+
export declare function withDownloadTimeout<T>(fn: () => Promise<T>, timeoutMs: number): Promise<T>;
|
|
122
|
+
/** @internal Async sleep (exposed for testing). */
|
|
123
|
+
export declare function sleep(ms: number): Promise<void>;
|
|
124
|
+
export interface HfRetryOptions {
|
|
125
|
+
/** Maximum total attempts including the initial one (default: `HF_MAX_ATTEMPTS`). */
|
|
126
|
+
maxAttempts?: number;
|
|
127
|
+
/** Delay before the first retry; doubles on each subsequent attempt (default: `HF_BASE_DELAY_MS`). */
|
|
128
|
+
baseDelayMs?: number;
|
|
129
|
+
/** Per-attempt wall-clock timeout in ms (default: `HF_DOWNLOAD_TIMEOUT_MS`). */
|
|
130
|
+
timeoutMs?: number;
|
|
131
|
+
/**
|
|
132
|
+
* Circuit-breaker instance to use. Defaults to the module-level
|
|
133
|
+
* `hfDownloadCircuit` singleton. Pass a fresh instance in tests.
|
|
134
|
+
*/
|
|
135
|
+
circuit?: HfDownloadCircuitBreaker;
|
|
136
|
+
/**
|
|
137
|
+
* Optional callback invoked before each retry (not the initial attempt).
|
|
138
|
+
* @param attempt - 1-based retry number
|
|
139
|
+
* @param max - total allowed attempts
|
|
140
|
+
* @param error - the error that triggered the retry
|
|
141
|
+
*/
|
|
142
|
+
onRetry?: (attempt: number, max: number, error: Error) => void;
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Retry wrapper for HuggingFace model downloads with per-attempt timeout and
|
|
146
|
+
* circuit-breaker protection.
|
|
147
|
+
*
|
|
148
|
+
* Behaviour:
|
|
149
|
+
* - If the circuit is **open**, fails immediately with a `CIRCUIT_OPEN_TAG`
|
|
150
|
+
* message (so `isHfDownloadFailure` still returns true and the caller can
|
|
151
|
+
* show `HF_ENDPOINT` guidance).
|
|
152
|
+
* - Each attempt is wrapped in `withDownloadTimeout`.
|
|
153
|
+
* - On a network-level error (`isNetworkFetchError`) the attempt is retried
|
|
154
|
+
* with exponential back-off; non-network errors (e.g. ONNX device failure)
|
|
155
|
+
* are rethrown immediately without retry.
|
|
156
|
+
* - Every network failure is recorded on the circuit breaker; a success resets
|
|
157
|
+
* it.
|
|
158
|
+
* - After all attempts are exhausted, the last network error is rethrown
|
|
159
|
+
* so the existing `isNetworkFetchError` / `isHfDownloadFailure` guards in
|
|
160
|
+
* the calling code still fire.
|
|
161
|
+
*/
|
|
162
|
+
export declare function withHfDownloadRetry<T>(fn: () => Promise<T>, options?: HfRetryOptions): Promise<T>;
|
|
163
|
+
export {};
|
|
@@ -1,5 +1,22 @@
|
|
|
1
1
|
import os from 'node:os';
|
|
2
2
|
import { join } from 'node:path';
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// Download resilience defaults
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
/** Per-attempt timeout for the full model download (5 minutes). */
|
|
7
|
+
export const HF_DOWNLOAD_TIMEOUT_MS = 5 * 60 * 1_000;
|
|
8
|
+
/** Maximum total download attempts (1 initial + N-1 retries). */
|
|
9
|
+
export const HF_MAX_ATTEMPTS = 3;
|
|
10
|
+
/** Initial delay between retry attempts; doubles on each subsequent retry. */
|
|
11
|
+
export const HF_BASE_DELAY_MS = 2_000;
|
|
12
|
+
/** Number of consecutive failures required to open the circuit. */
|
|
13
|
+
export const CB_FAILURE_THRESHOLD = 3;
|
|
14
|
+
/** How long the circuit stays open before transitioning to half-open. */
|
|
15
|
+
export const CB_RESET_TIMEOUT_MS = 60_000;
|
|
16
|
+
/** Upper bound clamped on the env-override per-attempt timeout (30 minutes). */
|
|
17
|
+
export const HF_MAX_TIMEOUT_MS = 30 * 60 * 1_000;
|
|
18
|
+
/** Upper bound clamped on the env-override attempt count. */
|
|
19
|
+
export const HF_MAX_ATTEMPTS_CAP = 10;
|
|
3
20
|
/**
|
|
4
21
|
* @internal Exported only for unit tests and the two embedder entry points
|
|
5
22
|
* (`core/embeddings/embedder.ts` + `mcp/core/embedder.ts`). Not part of the
|
|
@@ -44,3 +61,190 @@ export function applyHfEnvOverrides(env) {
|
|
|
44
61
|
env.remoteHost = endpoint.endsWith('/') ? endpoint : endpoint + '/';
|
|
45
62
|
}
|
|
46
63
|
}
|
|
64
|
+
/**
|
|
65
|
+
* @internal Exported for unit tests and the two embedder entry points.
|
|
66
|
+
*
|
|
67
|
+
* Returns true when an error message indicates a network-level fetch failure
|
|
68
|
+
* during HuggingFace model download (e.g. `TypeError: fetch failed`,
|
|
69
|
+
* `ECONNREFUSED`, `ENOTFOUND`, `ETIMEDOUT`, `ECONNRESET`).
|
|
70
|
+
*
|
|
71
|
+
* These errors are not device-specific and cannot be fixed by falling back to
|
|
72
|
+
* a different ONNX device — the caller should rethrow immediately with
|
|
73
|
+
* guidance about `HF_ENDPOINT`.
|
|
74
|
+
*/
|
|
75
|
+
export function isNetworkFetchError(message) {
|
|
76
|
+
return (message.includes('fetch failed') ||
|
|
77
|
+
message.includes('ECONNREFUSED') ||
|
|
78
|
+
message.includes('ENOTFOUND') ||
|
|
79
|
+
message.includes('ETIMEDOUT') ||
|
|
80
|
+
message.includes('ECONNRESET'));
|
|
81
|
+
}
|
|
82
|
+
// ---------------------------------------------------------------------------
|
|
83
|
+
// Circuit breaker
|
|
84
|
+
// ---------------------------------------------------------------------------
|
|
85
|
+
/** @internal Used by `withHfDownloadRetry` to mark a circuit-open rejection. */
|
|
86
|
+
export const CIRCUIT_OPEN_TAG = 'hf-circuit-open';
|
|
87
|
+
/**
|
|
88
|
+
* Circuit breaker for HuggingFace model downloads.
|
|
89
|
+
*
|
|
90
|
+
* After `failureThreshold` consecutive network failures the circuit opens and
|
|
91
|
+
* all subsequent calls to `withHfDownloadRetry` fail immediately without
|
|
92
|
+
* issuing any network requests. After `resetTimeoutMs` the circuit enters the
|
|
93
|
+
* half-open state and the next call is attempted — if it succeeds the circuit
|
|
94
|
+
* closes again; if it fails the circuit re-opens.
|
|
95
|
+
*
|
|
96
|
+
* Exported for unit-testing; production code should use the module-level
|
|
97
|
+
* `hfDownloadCircuit` singleton.
|
|
98
|
+
*/
|
|
99
|
+
export class HfDownloadCircuitBreaker {
|
|
100
|
+
failureThreshold;
|
|
101
|
+
resetTimeoutMs;
|
|
102
|
+
_state = 'closed';
|
|
103
|
+
_failures = 0;
|
|
104
|
+
/** Timestamp of the last recorded failure (ms since epoch). */
|
|
105
|
+
lastFailureAt = 0;
|
|
106
|
+
constructor(failureThreshold = CB_FAILURE_THRESHOLD, resetTimeoutMs = CB_RESET_TIMEOUT_MS) {
|
|
107
|
+
this.failureThreshold = failureThreshold;
|
|
108
|
+
this.resetTimeoutMs = resetTimeoutMs;
|
|
109
|
+
}
|
|
110
|
+
/** Effective state, factoring in the reset-timeout transition. */
|
|
111
|
+
get state() {
|
|
112
|
+
if (this._state === 'open' && Date.now() - this.lastFailureAt > this.resetTimeoutMs) {
|
|
113
|
+
this._state = 'half-open';
|
|
114
|
+
}
|
|
115
|
+
return this._state;
|
|
116
|
+
}
|
|
117
|
+
/** Returns true when the circuit is open and calls should be rejected. */
|
|
118
|
+
isOpen() {
|
|
119
|
+
return this.state === 'open';
|
|
120
|
+
}
|
|
121
|
+
/** Record a successful call — resets the failure counter and closes the circuit. */
|
|
122
|
+
recordSuccess() {
|
|
123
|
+
this._failures = 0;
|
|
124
|
+
this._state = 'closed';
|
|
125
|
+
}
|
|
126
|
+
/** Record a failed call — increments the counter and opens the circuit when the threshold is reached. */
|
|
127
|
+
recordFailure() {
|
|
128
|
+
this._failures++;
|
|
129
|
+
this.lastFailureAt = Date.now();
|
|
130
|
+
if (this._failures >= this.failureThreshold) {
|
|
131
|
+
this._state = 'open';
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
/** @internal Reset to initial state (used in tests). */
|
|
135
|
+
reset() {
|
|
136
|
+
this._failures = 0;
|
|
137
|
+
this._state = 'closed';
|
|
138
|
+
this.lastFailureAt = 0;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
/** Module-level singleton shared by both embedder entry points. */
|
|
142
|
+
export const hfDownloadCircuit = new HfDownloadCircuitBreaker();
|
|
143
|
+
// ---------------------------------------------------------------------------
|
|
144
|
+
// Retry + timeout wrapper
|
|
145
|
+
// ---------------------------------------------------------------------------
|
|
146
|
+
/** @internal Returns true for errors that should abort without retry (circuit-open). */
|
|
147
|
+
export function isHfCircuitOpenError(message) {
|
|
148
|
+
return message.includes(CIRCUIT_OPEN_TAG);
|
|
149
|
+
}
|
|
150
|
+
/**
|
|
151
|
+
* Returns true for any HuggingFace download failure that warrants showing the
|
|
152
|
+
* `HF_ENDPOINT` remediation hint: either a raw network error or a
|
|
153
|
+
* circuit-open rejection (which itself was caused by repeated network errors).
|
|
154
|
+
*/
|
|
155
|
+
export function isHfDownloadFailure(message) {
|
|
156
|
+
return isNetworkFetchError(message) || isHfCircuitOpenError(message);
|
|
157
|
+
}
|
|
158
|
+
/** @internal Wraps `fn` in a hard time-limit. The timeout error contains
|
|
159
|
+
* `ETIMEDOUT` so that `isNetworkFetchError` classifies it correctly.
|
|
160
|
+
*/
|
|
161
|
+
export function withDownloadTimeout(fn, timeoutMs) {
|
|
162
|
+
return new Promise((resolve, reject) => {
|
|
163
|
+
const timer = setTimeout(() => reject(new Error(`ETIMEDOUT: model download timed out after ${Math.round(timeoutMs / 1000)}s — ` +
|
|
164
|
+
`check your network speed or set HF_ENDPOINT to a faster mirror`)), timeoutMs);
|
|
165
|
+
fn().then((v) => {
|
|
166
|
+
clearTimeout(timer);
|
|
167
|
+
resolve(v);
|
|
168
|
+
}, (e) => {
|
|
169
|
+
clearTimeout(timer);
|
|
170
|
+
reject(e);
|
|
171
|
+
});
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
/** @internal Async sleep (exposed for testing). */
|
|
175
|
+
export function sleep(ms) {
|
|
176
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
177
|
+
}
|
|
178
|
+
/**
|
|
179
|
+
* Retry wrapper for HuggingFace model downloads with per-attempt timeout and
|
|
180
|
+
* circuit-breaker protection.
|
|
181
|
+
*
|
|
182
|
+
* Behaviour:
|
|
183
|
+
* - If the circuit is **open**, fails immediately with a `CIRCUIT_OPEN_TAG`
|
|
184
|
+
* message (so `isHfDownloadFailure` still returns true and the caller can
|
|
185
|
+
* show `HF_ENDPOINT` guidance).
|
|
186
|
+
* - Each attempt is wrapped in `withDownloadTimeout`.
|
|
187
|
+
* - On a network-level error (`isNetworkFetchError`) the attempt is retried
|
|
188
|
+
* with exponential back-off; non-network errors (e.g. ONNX device failure)
|
|
189
|
+
* are rethrown immediately without retry.
|
|
190
|
+
* - Every network failure is recorded on the circuit breaker; a success resets
|
|
191
|
+
* it.
|
|
192
|
+
* - After all attempts are exhausted, the last network error is rethrown
|
|
193
|
+
* so the existing `isNetworkFetchError` / `isHfDownloadFailure` guards in
|
|
194
|
+
* the calling code still fire.
|
|
195
|
+
*/
|
|
196
|
+
export async function withHfDownloadRetry(fn, options = {}) {
|
|
197
|
+
// Resolve effective values — explicit options take precedence over env vars,
|
|
198
|
+
// which take precedence over built-in defaults. This lets users lower the
|
|
199
|
+
// per-attempt timeout without rebuilding (e.g.
|
|
200
|
+
// HF_DOWNLOAD_TIMEOUT_MS=60000 npx gitnexus analyze --embeddings
|
|
201
|
+
// reduces the worst-case wait from 15 minutes to ~3 minutes).
|
|
202
|
+
//
|
|
203
|
+
// Upper bounds are clamped to prevent accidental runaway configuration:
|
|
204
|
+
// - timeoutMs is capped at HF_MAX_TIMEOUT_MS (30 min)
|
|
205
|
+
// - maxAttempts is floored (fractional values → integer) and capped at
|
|
206
|
+
// HF_MAX_ATTEMPTS_CAP (10). Values ≤ 0, NaN, or Infinity fall back to
|
|
207
|
+
// the built-in defaults.
|
|
208
|
+
const envTimeout = Number(process.env.HF_DOWNLOAD_TIMEOUT_MS);
|
|
209
|
+
const envMaxAttempts = Number(process.env.HF_MAX_ATTEMPTS);
|
|
210
|
+
const resolvedTimeout = Number.isFinite(envTimeout) && envTimeout > 0
|
|
211
|
+
? Math.min(envTimeout, HF_MAX_TIMEOUT_MS)
|
|
212
|
+
: HF_DOWNLOAD_TIMEOUT_MS;
|
|
213
|
+
const resolvedMaxAttempts = Number.isFinite(envMaxAttempts) && envMaxAttempts > 0
|
|
214
|
+
? Math.min(Math.floor(envMaxAttempts), HF_MAX_ATTEMPTS_CAP)
|
|
215
|
+
: HF_MAX_ATTEMPTS;
|
|
216
|
+
const { maxAttempts = resolvedMaxAttempts, baseDelayMs = HF_BASE_DELAY_MS, timeoutMs = resolvedTimeout, circuit = hfDownloadCircuit, onRetry, } = options;
|
|
217
|
+
if (circuit.isOpen()) {
|
|
218
|
+
const secsUntilReset = Math.ceil((circuit.resetTimeoutMs - (Date.now() - circuit.lastFailureAt)) / 1000);
|
|
219
|
+
throw new Error(`${CIRCUIT_OPEN_TAG}: HuggingFace download circuit is open after repeated network failures` +
|
|
220
|
+
(secsUntilReset > 0 ? ` — will reset in ~${secsUntilReset}s` : ''));
|
|
221
|
+
}
|
|
222
|
+
let lastError = new Error('unknown error');
|
|
223
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
224
|
+
try {
|
|
225
|
+
const result = await withDownloadTimeout(fn, timeoutMs);
|
|
226
|
+
circuit.recordSuccess();
|
|
227
|
+
return result;
|
|
228
|
+
}
|
|
229
|
+
catch (err) {
|
|
230
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
231
|
+
if (!isNetworkFetchError(lastError.message)) {
|
|
232
|
+
// Non-network error (e.g. CUDA unavailable) — propagate without retry
|
|
233
|
+
throw lastError;
|
|
234
|
+
}
|
|
235
|
+
circuit.recordFailure();
|
|
236
|
+
if (circuit.isOpen()) {
|
|
237
|
+
// Circuit just tripped — fail fast, no more retries
|
|
238
|
+
throw new Error(`${CIRCUIT_OPEN_TAG}: HuggingFace download circuit opened after ${circuit.failureThreshold} consecutive failures`);
|
|
239
|
+
}
|
|
240
|
+
if (attempt < maxAttempts - 1) {
|
|
241
|
+
const delay = baseDelayMs * Math.pow(2, attempt);
|
|
242
|
+
onRetry?.(attempt + 1, maxAttempts, lastError);
|
|
243
|
+
await sleep(delay);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
// All retries exhausted — throw the last network error so isNetworkFetchError
|
|
248
|
+
// patterns in the calling code still match and surface HF_ENDPOINT guidance.
|
|
249
|
+
throw lastError;
|
|
250
|
+
}
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
import { pipeline, env } from '@huggingface/transformers';
|
|
8
8
|
import { isHttpMode, getHttpDimensions, httpEmbedQuery, } from '../../core/embeddings/http-client.js';
|
|
9
9
|
import { resolveEmbeddingConfig } from '../../core/embeddings/config.js';
|
|
10
|
-
import { applyHfEnvOverrides } from '../../core/embeddings/hf-env.js';
|
|
10
|
+
import { applyHfEnvOverrides, isHfDownloadFailure, withHfDownloadRetry, } from '../../core/embeddings/hf-env.js';
|
|
11
11
|
import { silenceStdout, restoreStdout, realStderrWrite } from '../../core/lbug/pool-adapter.js';
|
|
12
12
|
import { logger } from '../../core/logger.js';
|
|
13
13
|
// Model config
|
|
@@ -53,7 +53,7 @@ export const initEmbedder = async () => {
|
|
|
53
53
|
silenceStdout();
|
|
54
54
|
process.stderr.write = (() => true);
|
|
55
55
|
try {
|
|
56
|
-
embedderInstance = await pipeline('feature-extraction', MODEL_ID, {
|
|
56
|
+
embedderInstance = await withHfDownloadRetry(() => pipeline('feature-extraction', MODEL_ID, {
|
|
57
57
|
device: device,
|
|
58
58
|
dtype: 'fp32',
|
|
59
59
|
session_options: {
|
|
@@ -62,7 +62,7 @@ export const initEmbedder = async () => {
|
|
|
62
62
|
interOpNumThreads: 1,
|
|
63
63
|
executionMode: 'sequential',
|
|
64
64
|
},
|
|
65
|
-
});
|
|
65
|
+
}));
|
|
66
66
|
}
|
|
67
67
|
finally {
|
|
68
68
|
restoreStdout();
|
|
@@ -71,7 +71,21 @@ export const initEmbedder = async () => {
|
|
|
71
71
|
logger.info({ device }, 'GitNexus: Embedding model loaded');
|
|
72
72
|
return embedderInstance;
|
|
73
73
|
}
|
|
74
|
-
catch {
|
|
74
|
+
catch (deviceError) {
|
|
75
|
+
// Network errors and circuit-open errors are not device-specific —
|
|
76
|
+
// they will fail the same way on every device. Rethrow immediately
|
|
77
|
+
// with actionable HF_ENDPOINT guidance rather than silently falling
|
|
78
|
+
// back to the next device.
|
|
79
|
+
const errMsg = deviceError instanceof Error ? deviceError.message : String(deviceError);
|
|
80
|
+
if (isHfDownloadFailure(errMsg)) {
|
|
81
|
+
const endpointHint = process.env.HF_ENDPOINT
|
|
82
|
+
? `The configured endpoint (${process.env.HF_ENDPOINT}) may be unreachable.`
|
|
83
|
+
: `huggingface.co may be unreachable from your network.\n` +
|
|
84
|
+
` Set HF_ENDPOINT to a mirror and retry:\n` +
|
|
85
|
+
` HF_ENDPOINT=https://hf-mirror.com npx gitnexus analyze --embeddings\n` +
|
|
86
|
+
` (Windows: set HF_ENDPOINT=https://hf-mirror.com && npx gitnexus analyze --embeddings)`;
|
|
87
|
+
throw new Error(`Failed to download embedding model: ${errMsg}\n ${endpointHint}`);
|
|
88
|
+
}
|
|
75
89
|
if (device === 'cpu')
|
|
76
90
|
throw new Error('Failed to load embedding model');
|
|
77
91
|
}
|
package/package.json
CHANGED