@rigour-labs/core 4.2.3 → 4.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,39 @@
1
+ /**
2
+ * Side-Effect Analysis Rules
3
+ *
4
+ * Pattern definitions for detecting unbounded side effects that cause
5
+ * real-world consequences: process spawns, resource exhaustion, circular
6
+ * triggers, missing circuit breakers.
7
+ *
8
+ * Each rule has:
9
+ * - regex patterns per language
10
+ * - a check function that verifies context (surrounding lines)
11
+ * - severity and description
12
+ *
13
+ * @since v4.3.0
14
+ */
15
+ export type SideEffectLang = 'js' | 'ts' | 'py' | 'go' | 'rs' | 'cs' | 'java' | 'rb';
16
+ export interface SideEffectViolation {
17
+ rule: string;
18
+ severity: 'critical' | 'high' | 'medium' | 'low';
19
+ file: string;
20
+ line: number;
21
+ match: string;
22
+ description: string;
23
+ hint: string;
24
+ }
25
+ export declare const LANG_MAP: Record<string, SideEffectLang>;
26
+ export declare const FILE_GLOBS: string[];
27
+ export declare const TIMER_CREATE_PATTERNS: Record<string, RegExp[]>;
28
+ export declare const TIMER_CLEANUP_PATTERNS: Record<string, RegExp[]>;
29
+ export declare const PROCESS_SPAWN_PATTERNS: Record<string, RegExp[]>;
30
+ export declare const PROCESS_EXIT_PATTERNS: Record<string, RegExp[]>;
31
+ export declare const UNBOUNDED_LOOP_PATTERNS: Record<string, RegExp[]>;
32
+ export declare const IO_PATTERNS: Record<string, RegExp[]>;
33
+ export declare const RETRY_PATTERNS: Record<string, RegExp[]>;
34
+ export declare const MAX_RETRY_INDICATORS: RegExp[];
35
+ export declare const WATCHER_PATTERNS: Record<string, RegExp[]>;
36
+ export declare const WRITE_PATTERNS: Record<string, RegExp[]>;
37
+ export declare const RESOURCE_OPEN_PATTERNS: Record<string, RegExp[]>;
38
+ export declare const RESOURCE_CLOSE_PATTERNS: Record<string, RegExp[]>;
39
+ export declare const AUTO_RESTART_PATTERNS: Record<string, RegExp[]>;
@@ -0,0 +1,302 @@
1
+ /**
2
+ * Side-Effect Analysis Rules
3
+ *
4
+ * Pattern definitions for detecting unbounded side effects that cause
5
+ * real-world consequences: process spawns, resource exhaustion, circular
6
+ * triggers, missing circuit breakers.
7
+ *
8
+ * Each rule has:
9
+ * - regex patterns per language
10
+ * - a check function that verifies context (surrounding lines)
11
+ * - severity and description
12
+ *
13
+ * @since v4.3.0
14
+ */
15
+ // ── Language detection ──
16
+ export const LANG_MAP = {
17
+ '.ts': 'ts', '.tsx': 'ts', '.mts': 'ts',
18
+ '.js': 'js', '.jsx': 'js', '.mjs': 'js', '.cjs': 'js',
19
+ '.py': 'py',
20
+ '.go': 'go',
21
+ '.rs': 'rs',
22
+ '.cs': 'cs',
23
+ '.java': 'java',
24
+ '.rb': 'rb',
25
+ };
26
+ export const FILE_GLOBS = [
27
+ '**/*.{ts,tsx,mts,js,jsx,mjs,cjs}',
28
+ '**/*.py',
29
+ '**/*.go',
30
+ '**/*.rs',
31
+ '**/*.cs',
32
+ '**/*.java',
33
+ '**/*.rb',
34
+ ];
35
+ // ── Timer patterns (setInterval/setTimeout without cleanup) ──
36
+ export const TIMER_CREATE_PATTERNS = {
37
+ js: [
38
+ /\bsetInterval\s*\(/,
39
+ /\bsetTimeout\s*\(/,
40
+ ],
41
+ ts: [
42
+ /\bsetInterval\s*\(/,
43
+ /\bsetTimeout\s*\(/,
44
+ ],
45
+ py: [
46
+ /\bscheduler\.enter\s*\(/,
47
+ /\bTimer\s*\(/,
48
+ /\bschedule\.every\b/,
49
+ ],
50
+ go: [
51
+ /\btime\.NewTicker\s*\(/,
52
+ /\btime\.Tick\s*\(/,
53
+ ],
54
+ java: [
55
+ /\bScheduledExecutorService\b/,
56
+ /\bTimer\(\)\.schedule\b/,
57
+ /\bTimer\(\)\.scheduleAtFixedRate\b/,
58
+ ],
59
+ rs: [],
60
+ cs: [
61
+ /\bnew\s+Timer\s*\(/,
62
+ /\bSetInterval\s*\(/,
63
+ ],
64
+ rb: [],
65
+ };
66
+ export const TIMER_CLEANUP_PATTERNS = {
67
+ js: [/\bclearInterval\s*\(/, /\bclearTimeout\s*\(/],
68
+ ts: [/\bclearInterval\s*\(/, /\bclearTimeout\s*\(/],
69
+ py: [/\.cancel\s*\(/],
70
+ go: [/\.Stop\s*\(/],
71
+ java: [/\.shutdown\s*\(/, /\.cancel\s*\(/],
72
+ rs: [],
73
+ cs: [/\.Dispose\s*\(/, /\.Stop\s*\(/],
74
+ rb: [],
75
+ };
76
+ // ── Process spawn patterns ──
77
+ export const PROCESS_SPAWN_PATTERNS = {
78
+ js: [
79
+ /\bchild_process\.\w+\s*\(/,
80
+ /\bspawn\s*\(/,
81
+ /\bexec\s*\(/,
82
+ /\bexecFile\s*\(/,
83
+ /\bfork\s*\(/,
84
+ /\bexeca\s*\(/,
85
+ ],
86
+ ts: [
87
+ /\bchild_process\.\w+\s*\(/,
88
+ /\bspawn\s*\(/,
89
+ /\bexec\s*\(/,
90
+ /\bexecFile\s*\(/,
91
+ /\bfork\s*\(/,
92
+ /\bexeca\s*\(/,
93
+ ],
94
+ py: [
95
+ /\bsubprocess\.\w+\s*\(/,
96
+ /\bPopen\s*\(/,
97
+ /\bos\.system\s*\(/,
98
+ /\bos\.exec\w*\s*\(/,
99
+ /\bos\.spawn\w*\s*\(/,
100
+ ],
101
+ go: [
102
+ /\bexec\.Command\s*\(/,
103
+ /\bos\/exec\b/,
104
+ /\bcmd\.Start\s*\(/,
105
+ /\bcmd\.Run\s*\(/,
106
+ ],
107
+ java: [
108
+ /\bProcessBuilder\b/,
109
+ /\bRuntime\.getRuntime\(\)\.exec\s*\(/,
110
+ ],
111
+ rs: [
112
+ /\bCommand::new\s*\(/,
113
+ /\bstd::process::Command\b/,
114
+ ],
115
+ cs: [
116
+ /\bProcess\.Start\s*\(/,
117
+ /\bnew\s+ProcessStartInfo\b/,
118
+ ],
119
+ rb: [
120
+ /\bsystem\s*\(/,
121
+ /\bspawn\s*\(/,
122
+ /\b`[^`]+`/,
123
+ /\bIO\.popen\s*\(/,
124
+ ],
125
+ };
126
+ export const PROCESS_EXIT_PATTERNS = {
127
+ js: [/\.on\s*\(\s*['"](?:exit|close)['"]/, /\.kill\s*\(/, /\.disconnect\s*\(/],
128
+ ts: [/\.on\s*\(\s*['"](?:exit|close)['"]/, /\.kill\s*\(/, /\.disconnect\s*\(/],
129
+ py: [/\.wait\s*\(/, /\.terminate\s*\(/, /\.kill\s*\(/, /\.communicate\s*\(/],
130
+ go: [/\.Wait\s*\(/, /cmd\.Process\.Kill\s*\(/],
131
+ java: [/\.waitFor\s*\(/, /\.destroy\s*\(/, /\.destroyForcibly\s*\(/],
132
+ rs: [/\.wait\s*\(/, /\.kill\s*\(/],
133
+ cs: [/\.WaitForExit\s*\(/, /\.Kill\s*\(/, /\.Close\s*\(/],
134
+ rb: [/Process\.wait\b/, /Process\.kill\b/],
135
+ };
136
+ // ── Unbounded loop patterns ──
137
+ export const UNBOUNDED_LOOP_PATTERNS = {
138
+ js: [/\bwhile\s*\(\s*true\s*\)/, /\bwhile\s*\(\s*1\s*\)/, /\bfor\s*\(\s*;\s*;\s*\)/],
139
+ ts: [/\bwhile\s*\(\s*true\s*\)/, /\bwhile\s*\(\s*1\s*\)/, /\bfor\s*\(\s*;\s*;\s*\)/],
140
+ py: [/\bwhile\s+True\s*:/, /\bwhile\s+1\s*:/],
141
+ go: [/\bfor\s*\{/, /\bfor\s+\{/], // bare `for {` in Go = infinite loop
142
+ java: [/\bwhile\s*\(\s*true\s*\)/, /\bfor\s*\(\s*;\s*;\s*\)/],
143
+ rs: [/\bloop\s*\{/],
144
+ cs: [/\bwhile\s*\(\s*true\s*\)/, /\bfor\s*\(\s*;\s*;\s*\)/],
145
+ rb: [/\bloop\s+do\b/, /\bwhile\s+true\b/],
146
+ };
147
+ // I/O operations inside loops that indicate resource impact
148
+ export const IO_PATTERNS = {
149
+ js: [
150
+ /\bfs\.\w+/, /\bfetch\s*\(/, /\baxios\.\w+/, /\bhttp\.\w+/,
151
+ /\.write\s*\(/, /\.send\s*\(/, /\bchild_process\./,
152
+ /\bconsole\.\w+/, /\bprocess\.stdout/,
153
+ ],
154
+ ts: [
155
+ /\bfs\.\w+/, /\bfetch\s*\(/, /\baxios\.\w+/, /\bhttp\.\w+/,
156
+ /\.write\s*\(/, /\.send\s*\(/, /\bchild_process\./,
157
+ ],
158
+ py: [
159
+ /\bopen\s*\(/, /\brequests\.\w+/, /\burllib\.\w+/,
160
+ /\bsubprocess\./, /\bos\.\w+/, /\bsocket\.\w+/,
161
+ /\.write\s*\(/, /\bprint\s*\(/,
162
+ ],
163
+ go: [
164
+ /\bos\.\w+/, /\bnet\.\w+/, /\bhttp\.\w+/,
165
+ /\bio\.\w+/, /\bfmt\.Fprint/, /\bioutil\.\w+/,
166
+ /\bexec\.Command/,
167
+ ],
168
+ java: [
169
+ /\bnew\s+File\w*\(/, /\bHttpClient\b/, /\bSocket\b/,
170
+ /\.write\s*\(/, /\bRuntime\.getRuntime\(\)/,
171
+ ],
172
+ rs: [
173
+ /\bstd::fs::/, /\bstd::net::/, /\bstd::process::/,
174
+ /\.write\s*\(/, /\btokio::\w+/,
175
+ ],
176
+ cs: [
177
+ /\bFile\.\w+/, /\bHttpClient\b/, /\bProcess\.Start/,
178
+ /\.Write\s*\(/, /\bSocket\b/,
179
+ ],
180
+ rb: [
181
+ /\bFile\.\w+/, /\bNet::HTTP\b/, /\bIO\.\w+/,
182
+ /\.write\s*\(/, /\bsystem\s*\(/,
183
+ ],
184
+ };
185
+ // ── Retry without limit patterns ──
186
+ export const RETRY_PATTERNS = {
187
+ js: [/\bcatch\s*\([^)]*\)\s*\{/, /\.catch\s*\(/],
188
+ ts: [/\bcatch\s*\([^)]*\)\s*\{/, /\.catch\s*\(/],
189
+ py: [/\bexcept\s+\w+/, /\bexcept\s*:/],
190
+ go: [/\bif\s+err\s*!=\s*nil\b/],
191
+ java: [/\bcatch\s*\(\w+\s+\w+\)/, /\bcatch\s*\(\s*Exception\b/],
192
+ rs: [/\.unwrap_or_else\s*\(/, /\bif\s+let\s+Err\b/],
193
+ cs: [/\bcatch\s*\(\w+\b/, /\bcatch\s*\{/],
194
+ rb: [/\brescue\b/],
195
+ };
196
+ export const MAX_RETRY_INDICATORS = [
197
+ /max.?retries?/i,
198
+ /retry.?count/i,
199
+ /retry.?limit/i,
200
+ /attempt/i,
201
+ /retries?\s*[<>=!]+\s*\d+/,
202
+ /count\s*[<>=!]+\s*\d+/,
203
+ /MAX_/,
204
+ /backoff/i,
205
+ /circuit.?breaker/i,
206
+ ];
207
+ // ── File watcher patterns (circular trigger detection) ──
208
+ export const WATCHER_PATTERNS = {
209
+ js: [
210
+ /\bfs\.watch\s*\(/, /\bfs\.watchFile\s*\(/,
211
+ /\bchokidar\.watch\s*\(/, /\bnodemon\b/,
212
+ /\bnew\s+FSWatcher\b/,
213
+ ],
214
+ ts: [
215
+ /\bfs\.watch\s*\(/, /\bfs\.watchFile\s*\(/,
216
+ /\bchokidar\.watch\s*\(/,
217
+ /\bnew\s+FSWatcher\b/,
218
+ ],
219
+ py: [
220
+ /\bwatchdog\b/, /\bObserver\s*\(/,
221
+ /\binotify\b/, /\bwatchfiles\b/,
222
+ ],
223
+ go: [
224
+ /\bfsnotify\.\w+/, /\bNewWatcher\s*\(/,
225
+ ],
226
+ java: [
227
+ /\bWatchService\b/, /\bWatchKey\b/,
228
+ ],
229
+ rs: [
230
+ /\bnotify::/, /\bRecommendedWatcher\b/,
231
+ ],
232
+ cs: [
233
+ /\bFileSystemWatcher\b/, /\bnew\s+FileSystemWatcher\b/,
234
+ ],
235
+ rb: [
236
+ /\bListen\.\w+/, /\brb-inotify\b/,
237
+ ],
238
+ };
239
+ export const WRITE_PATTERNS = {
240
+ js: [/\bfs\.writeFile/, /\bfs\.appendFile/, /\bfs\.createWriteStream/, /\.write\s*\(/],
241
+ ts: [/\bfs\.writeFile/, /\bfs\.appendFile/, /\bfs\.createWriteStream/, /\.write\s*\(/],
242
+ py: [/\bopen\s*\([^)]*['"][wa]['"]/, /\.write\s*\(/, /\bshutil\.\w+/],
243
+ go: [/\bos\.WriteFile/, /\bos\.Create/, /\bio\.WriteString/, /\.Write\s*\(/],
244
+ java: [/\bFileWriter\b/, /\bBufferedWriter\b/, /\.write\s*\(/],
245
+ rs: [/\bfs::write/, /\bFile::create/, /\.write_all\s*\(/],
246
+ cs: [/\bFile\.Write/, /\bStreamWriter\b/, /\.Write\s*\(/],
247
+ rb: [/\bFile\.write/, /\bFile\.open\s*\([^)]*['"]w['"]/, /\.write\s*\(/],
248
+ };
249
+ // ── Resource lifecycle patterns (open without close) ──
250
+ export const RESOURCE_OPEN_PATTERNS = {
251
+ js: [/\bfs\.open\s*\(/, /\bfs\.createReadStream\s*\(/, /\bfs\.createWriteStream\s*\(/],
252
+ ts: [/\bfs\.open\s*\(/, /\bfs\.createReadStream\s*\(/, /\bfs\.createWriteStream\s*\(/],
253
+ py: [/\bopen\s*\(/],
254
+ go: [/\bos\.Open\s*\(/, /\bos\.Create\s*\(/, /\bos\.OpenFile\s*\(/],
255
+ java: [/\bnew\s+FileInputStream\b/, /\bnew\s+FileOutputStream\b/, /\bnew\s+BufferedReader\b/],
256
+ rs: [/\bFile::open\s*\(/, /\bFile::create\s*\(/],
257
+ cs: [/\bFile\.Open\s*\(/, /\bnew\s+FileStream\b/, /\bnew\s+StreamReader\b/],
258
+ rb: [/\bFile\.open\s*\(/],
259
+ };
260
+ export const RESOURCE_CLOSE_PATTERNS = {
261
+ js: [/\.close\s*\(/, /\.destroy\s*\(/, /\.end\s*\(/],
262
+ ts: [/\.close\s*\(/, /\.destroy\s*\(/, /\.end\s*\(/],
263
+ py: [/\.close\s*\(/, /\bwith\s+open\b/], // `with` auto-closes
264
+ go: [/\.Close\s*\(/, /\bdefer\b/], // defer auto-closes
265
+ java: [/\.close\s*\(/, /\btry\s*\(/], // try-with-resources
266
+ rs: [/\bdrop\s*\(/, /\}$/], // Rust auto-drops
267
+ cs: [/\.Close\s*\(/, /\.Dispose\s*\(/, /\busing\s*\(/], // using auto-disposes
268
+ rb: [/\.close\b/, /\bFile\.open\s*\([^)]*\)\s*do\b/], // block form auto-closes
269
+ };
270
+ // ── Auto-restart / self-respawn patterns ──
271
+ export const AUTO_RESTART_PATTERNS = {
272
+ js: [
273
+ /process\.on\s*\(\s*['"](?:exit|uncaughtException|SIGTERM)['"]\s*,\s*(?:function|\(|=>).*(?:spawn|exec|fork)/,
274
+ /process\.on\s*\(\s*['"]exit['"]/,
275
+ ],
276
+ ts: [
277
+ /process\.on\s*\(\s*['"](?:exit|uncaughtException|SIGTERM)['"]\s*,\s*(?:function|\(|=>).*(?:spawn|exec|fork)/,
278
+ /process\.on\s*\(\s*['"]exit['"]/,
279
+ ],
280
+ py: [
281
+ /\batexit\.register\s*\(/,
282
+ /\bsignal\.signal\s*\(\s*signal\.SIG\w+\s*,/,
283
+ ],
284
+ go: [
285
+ /\bsignal\.Notify\s*\(/,
286
+ /\bos\.Exit\s*\(/,
287
+ ],
288
+ java: [
289
+ /\bRuntime\.getRuntime\(\)\.addShutdownHook\b/,
290
+ ],
291
+ rs: [
292
+ /\bctrlc::set_handler\b/,
293
+ /\bsignal::ctrl_c\b/,
294
+ ],
295
+ cs: [
296
+ /\bAppDomain\.CurrentDomain\.ProcessExit\b/,
297
+ ],
298
+ rb: [
299
+ /\bat_exit\b/,
300
+ /\btrap\s*\(/,
301
+ ],
302
+ };
package/dist/index.d.ts CHANGED
@@ -6,6 +6,7 @@ export * from './templates/index.js';
6
6
  export * from './types/fix-packet.js';
7
7
  export { Gate, GateContext } from './gates/base.js';
8
8
  export { RetryLoopBreakerGate } from './gates/retry-loop-breaker.js';
9
+ export { SideEffectAnalysisGate } from './gates/side-effect-analysis.js';
9
10
  export * from './utils/logger.js';
10
11
  export * from './services/score-history.js';
11
12
  export * from './hooks/index.js';
@@ -17,5 +18,7 @@ export type { InferenceProvider, DeepFinding, DeepAnalysisResult, ModelTier } fr
17
18
  export { MODELS } from './inference/types.js';
18
19
  export { isModelCached, getModelsDir, getModelInfo } from './inference/model-manager.js';
19
20
  export { extractFacts, factsToPromptString } from './deep/fact-extractor.js';
20
- export { openDatabase, isSQLiteAvailable, insertScan, insertFindings, getRecentScans, getScoreTrendFromDB, getTopIssues, reinforcePattern, getStrongPatterns } from './storage/index.js';
21
- export type { RigourDB } from './storage/index.js';
21
+ export { openDatabase, isSQLiteAvailable, compactDatabase, getDatabaseSize, resetDatabase, insertScan, insertFindings, getRecentScans, getScoreTrendFromDB, getTopIssues, reinforcePattern, getStrongPatterns } from './storage/index.js';
22
+ export type { RigourDB, CompactResult } from './storage/index.js';
23
+ export { checkLocalPatterns, persistAndReinforce, getProjectStats } from './storage/index.js';
24
+ export type { ProjectStats } from './storage/index.js';
package/dist/index.js CHANGED
@@ -6,6 +6,7 @@ export * from './templates/index.js';
6
6
  export * from './types/fix-packet.js';
7
7
  export { Gate } from './gates/base.js';
8
8
  export { RetryLoopBreakerGate } from './gates/retry-loop-breaker.js';
9
+ export { SideEffectAnalysisGate } from './gates/side-effect-analysis.js';
9
10
  export * from './utils/logger.js';
10
11
  export * from './services/score-history.js';
11
12
  export * from './hooks/index.js';
@@ -18,7 +19,9 @@ export { MODELS } from './inference/types.js';
18
19
  export { isModelCached, getModelsDir, getModelInfo } from './inference/model-manager.js';
19
20
  export { extractFacts, factsToPromptString } from './deep/fact-extractor.js';
20
21
  // Storage (SQLite Brain)
21
- export { openDatabase, isSQLiteAvailable, insertScan, insertFindings, getRecentScans, getScoreTrendFromDB, getTopIssues, reinforcePattern, getStrongPatterns } from './storage/index.js';
22
+ export { openDatabase, isSQLiteAvailable, compactDatabase, getDatabaseSize, resetDatabase, insertScan, insertFindings, getRecentScans, getScoreTrendFromDB, getTopIssues, reinforcePattern, getStrongPatterns } from './storage/index.js';
23
+ // Local Project Memory (hybrid intelligence — SQLite-backed per-project learning)
24
+ export { checkLocalPatterns, persistAndReinforce, getProjectStats } from './storage/index.js';
22
25
  // Pattern Index is intentionally NOT exported here to prevent
23
26
  // native dependency issues (sharp/transformers) from leaking into
24
27
  // non-AI parts of the system.
@@ -2,11 +2,11 @@ import { type ModelTier, type ModelInfo } from './types.js';
2
2
  export declare function extractSha256FromEtag(etag: string | null): string | null;
3
3
  export declare function hashFileSha256(filePath: string): Promise<string>;
4
4
  /**
5
- * Check if a model is already downloaded and valid.
5
+ * Check if any model for this tier is cached (fine-tuned or fallback).
6
6
  */
7
7
  export declare function isModelCached(tier: ModelTier): Promise<boolean>;
8
8
  /**
9
- * Get the path to a cached model.
9
+ * Get the path to a cached model (prefers fine-tuned over fallback).
10
10
  */
11
11
  export declare function getModelPath(tier: ModelTier): string;
12
12
  /**
@@ -15,7 +15,7 @@ export declare function getModelPath(tier: ModelTier): string;
15
15
  export declare function getModelInfo(tier: ModelTier): ModelInfo;
16
16
  /**
17
17
  * Download a model from HuggingFace CDN.
18
- * Calls onProgress with status updates.
18
+ * Tries fine-tuned model first, falls back to stock Qwen if unavailable.
19
19
  */
20
20
  export declare function downloadModel(tier: ModelTier, onProgress?: (message: string, percent?: number) => void): Promise<string>;
21
21
  /**
@@ -6,11 +6,11 @@ import path from 'path';
6
6
  import fs from 'fs-extra';
7
7
  import { createHash } from 'crypto';
8
8
  import { RIGOUR_DIR } from '../storage/db.js';
9
- import { MODELS } from './types.js';
9
+ import { MODELS, FALLBACK_MODELS } from './types.js';
10
10
  const MODELS_DIR = path.join(RIGOUR_DIR, 'models');
11
11
  const SHA256_RE = /^[a-f0-9]{64}$/i;
12
- function getModelMetadataPath(tier) {
13
- return path.join(MODELS_DIR, MODELS[tier].filename + '.meta.json');
12
+ function getModelMetadataPath(filename) {
13
+ return path.join(MODELS_DIR, filename + '.meta.json');
14
14
  }
15
15
  function isValidMetadata(raw) {
16
16
  return !!raw &&
@@ -34,17 +34,15 @@ export async function hashFileSha256(filePath) {
34
34
  }
35
35
  return hash.digest('hex');
36
36
  }
37
- async function writeModelMetadata(tier, metadata) {
38
- const metadataPath = getModelMetadataPath(tier);
39
- await fs.writeJson(metadataPath, metadata, { spaces: 2 });
37
+ async function writeModelMeta(filename, metadata) {
38
+ await fs.writeJson(getModelMetadataPath(filename), metadata, { spaces: 2 });
40
39
  }
41
- async function readModelMetadata(tier) {
42
- const metadataPath = getModelMetadataPath(tier);
43
- if (!(await fs.pathExists(metadataPath))) {
40
+ async function readModelMeta(filename) {
41
+ const p = getModelMetadataPath(filename);
42
+ if (!(await fs.pathExists(p)))
44
43
  return null;
45
- }
46
44
  try {
47
- const raw = await fs.readJson(metadataPath);
45
+ const raw = await fs.readJson(p);
48
46
  return isValidMetadata(raw) ? raw : null;
49
47
  }
50
48
  catch {
@@ -52,17 +50,15 @@ async function readModelMetadata(tier) {
52
50
  }
53
51
  }
54
52
  /**
55
- * Check if a model is already downloaded and valid.
53
+ * Check if a single model file is cached and valid.
56
54
  */
57
- export async function isModelCached(tier) {
58
- const model = MODELS[tier];
55
+ async function isFileCached(model) {
59
56
  const modelPath = path.join(MODELS_DIR, model.filename);
60
57
  if (!(await fs.pathExists(modelPath)))
61
58
  return false;
62
- const metadata = await readModelMetadata(tier);
59
+ const metadata = await readModelMeta(model.filename);
63
60
  if (!metadata)
64
61
  return false;
65
- // Size check + "changed since verification" check.
66
62
  const stat = await fs.stat(modelPath);
67
63
  const tolerance = model.sizeBytes * 0.1;
68
64
  if (stat.size <= model.sizeBytes - tolerance)
@@ -74,10 +70,22 @@ export async function isModelCached(tier) {
74
70
  return true;
75
71
  }
76
72
  /**
77
- * Get the path to a cached model.
73
+ * Check if any model for this tier is cached (fine-tuned or fallback).
74
+ */
75
+ export async function isModelCached(tier) {
76
+ if (await isFileCached(MODELS[tier]))
77
+ return true;
78
+ const fb = FALLBACK_MODELS[tier];
79
+ return fb.url !== MODELS[tier].url && await isFileCached(fb);
80
+ }
81
+ /**
82
+ * Get the path to a cached model (prefers fine-tuned over fallback).
78
83
  */
79
84
  export function getModelPath(tier) {
80
- return path.join(MODELS_DIR, MODELS[tier].filename);
85
+ const primary = path.join(MODELS_DIR, MODELS[tier].filename);
86
+ if (fs.pathExistsSync(primary))
87
+ return primary;
88
+ return path.join(MODELS_DIR, FALLBACK_MODELS[tier].filename);
81
89
  }
82
90
  /**
83
91
  * Get model info for a tier.
@@ -86,73 +94,73 @@ export function getModelInfo(tier) {
86
94
  return MODELS[tier];
87
95
  }
88
96
  /**
89
- * Download a model from HuggingFace CDN.
90
- * Calls onProgress with status updates.
97
+ * Stream a response body to disk with progress + SHA256.
98
+ * Returns { sha256, downloaded } on success.
91
99
  */
92
- export async function downloadModel(tier, onProgress) {
93
- const model = MODELS[tier];
100
+ async function streamToDisk(response, tempPath, model, onProgress) {
101
+ const contentLength = parseInt(response.headers.get('content-length') || '0', 10);
102
+ const reader = response.body?.getReader();
103
+ if (!reader)
104
+ throw new Error('No response body');
105
+ const writeStream = fs.createWriteStream(tempPath);
106
+ const hash = createHash('sha256');
107
+ let downloaded = 0;
108
+ let lastPct = 0;
109
+ while (true) {
110
+ const { done, value } = await reader.read();
111
+ if (done)
112
+ break;
113
+ const chunk = Buffer.from(value);
114
+ writeStream.write(chunk);
115
+ hash.update(chunk);
116
+ downloaded += value.length;
117
+ if (contentLength > 0) {
118
+ const pct = Math.round((downloaded / contentLength) * 100);
119
+ if (pct >= lastPct + 5) {
120
+ lastPct = pct;
121
+ onProgress?.(`Downloading ${model.name}: ${pct}%`, pct);
122
+ }
123
+ }
124
+ }
125
+ writeStream.end();
126
+ await new Promise((resolve, reject) => {
127
+ writeStream.on('finish', resolve);
128
+ writeStream.on('error', reject);
129
+ });
130
+ return { sha256: hash.digest('hex'), downloaded };
131
+ }
132
+ /**
133
+ * Verify SHA256 against ETag, allowing LFS OID mismatches
134
+ * if the download size is reasonable.
135
+ */
136
+ function verifySha256(expectedSha256, actualSha256, downloaded, model) {
137
+ if (!expectedSha256 || actualSha256 === expectedSha256)
138
+ return;
139
+ const tolerance = model.sizeBytes * 0.1;
140
+ if (downloaded < model.sizeBytes - tolerance) {
141
+ throw new Error(`Checksum mismatch for ${model.name}: ` +
142
+ `expected ${expectedSha256}, got ${actualSha256} ` +
143
+ `(undersized: ${downloaded} bytes)`);
144
+ }
145
+ // Size OK — ETag likely a Git LFS OID, not content SHA256
146
+ }
147
+ /**
148
+ * Download a specific model from its URL, write to disk, save metadata.
149
+ */
150
+ async function downloadFromUrl(tier, model, onProgress) {
94
151
  const destPath = path.join(MODELS_DIR, model.filename);
95
152
  const tempPath = destPath + '.download';
96
- fs.ensureDirSync(MODELS_DIR);
97
- // Already cached
98
- if (await isModelCached(tier)) {
99
- onProgress?.(`Model ${model.name} already cached`, 100);
100
- return destPath;
101
- }
102
- onProgress?.(`Downloading ${model.name} (${model.sizeHuman})...`, 0);
103
153
  try {
104
154
  const response = await fetch(model.url);
105
155
  if (!response.ok) {
106
156
  throw new Error(`HTTP ${response.status}: ${response.statusText}`);
107
157
  }
108
- const expectedSha256 = extractSha256FromEtag(response.headers.get('etag'));
109
- const contentLength = parseInt(response.headers.get('content-length') || '0', 10);
110
- const reader = response.body?.getReader();
111
- if (!reader)
112
- throw new Error('No response body');
113
- const writeStream = fs.createWriteStream(tempPath);
114
- const hash = createHash('sha256');
115
- let downloaded = 0;
116
- let lastProgressPercent = 0;
117
- while (true) {
118
- const { done, value } = await reader.read();
119
- if (done)
120
- break;
121
- const chunk = Buffer.from(value);
122
- writeStream.write(chunk);
123
- hash.update(chunk);
124
- downloaded += value.length;
125
- if (contentLength > 0) {
126
- const percent = Math.round((downloaded / contentLength) * 100);
127
- if (percent >= lastProgressPercent + 5) { // Report every 5%
128
- lastProgressPercent = percent;
129
- onProgress?.(`Downloading ${model.name}: ${percent}%`, percent);
130
- }
131
- }
132
- }
133
- writeStream.end();
134
- await new Promise((resolve, reject) => {
135
- writeStream.on('finish', resolve);
136
- writeStream.on('error', reject);
137
- });
138
- const actualSha256 = hash.digest('hex');
139
- if (expectedSha256 && actualSha256 !== expectedSha256) {
140
- // HuggingFace ETags for LFS files may contain the Git LFS OID (pointer hash)
141
- // rather than the SHA256 of the actual served bytes. This is common when
142
- // CDN/Cloudfront serves the file. Only hard-fail if the download is also
143
- // suspiciously small (likely corrupt). Otherwise warn and proceed — the
144
- // actual content hash is still recorded in metadata for future verification.
145
- const tolerance = model.sizeBytes * 0.1;
146
- if (downloaded < model.sizeBytes - tolerance) {
147
- throw new Error(`Model checksum mismatch for ${model.name}: expected ${expectedSha256}, got ${actualSha256} (download also undersized: ${downloaded} bytes)`);
148
- }
149
- // Download size is reasonable — ETag likely a Git LFS OID, not content SHA256
150
- }
151
- // Atomic rename
158
+ const expectedSha = extractSha256FromEtag(response.headers.get('etag'));
159
+ const { sha256, downloaded } = await streamToDisk(response, tempPath, model, onProgress);
160
+ verifySha256(expectedSha, sha256, downloaded, model);
152
161
  fs.renameSync(tempPath, destPath);
153
- await writeModelMetadata(tier, {
154
- sha256: actualSha256,
155
- sizeBytes: downloaded,
162
+ await writeModelMeta(model.filename, {
163
+ sha256, sizeBytes: downloaded,
156
164
  verifiedAt: new Date().toISOString(),
157
165
  sourceUrl: model.url,
158
166
  sourceEtag: response.headers.get('etag') || undefined,
@@ -161,11 +169,35 @@ export async function downloadModel(tier, onProgress) {
161
169
  return destPath;
162
170
  }
163
171
  catch (error) {
164
- // Clean up temp file on failure
165
172
  fs.removeSync(tempPath);
166
173
  throw error;
167
174
  }
168
175
  }
176
+ /**
177
+ * Download a model from HuggingFace CDN.
178
+ * Tries fine-tuned model first, falls back to stock Qwen if unavailable.
179
+ */
180
+ export async function downloadModel(tier, onProgress) {
181
+ fs.ensureDirSync(MODELS_DIR);
182
+ if (await isModelCached(tier)) {
183
+ onProgress?.(`Model ${MODELS[tier].name} already cached`, 100);
184
+ return getModelPath(tier);
185
+ }
186
+ const model = MODELS[tier];
187
+ onProgress?.(`Downloading ${model.name} (${model.sizeHuman})...`, 0);
188
+ try {
189
+ return await downloadFromUrl(tier, model, onProgress);
190
+ }
191
+ catch (error) {
192
+ // Fine-tuned model not available — try stock fallback
193
+ const fallback = FALLBACK_MODELS[tier];
194
+ if (fallback && fallback.url !== model.url) {
195
+ onProgress?.(`Fine-tuned model unavailable, using ${fallback.name}`, 0);
196
+ return downloadFromUrl(tier, fallback, onProgress);
197
+ }
198
+ throw error;
199
+ }
200
+ }
169
201
  /**
170
202
  * Ensure a model is available, downloading if needed.
171
203
  */
@@ -73,5 +73,16 @@ export interface ModelInfo {
73
73
  sizeBytes: number;
74
74
  sizeHuman: string;
75
75
  }
76
+ /**
77
+ * Model version — bump when new fine-tuned GGUF is published.
78
+ * The RLAIF pipeline uploads new models to HuggingFace, and
79
+ * model-manager checks this version to auto-update.
80
+ */
81
+ export declare const MODEL_VERSION = "1";
76
82
  /** All supported model definitions */
77
83
  export declare const MODELS: Record<ModelTier, ModelInfo>;
84
+ /**
85
+ * Fallback stock models — used when fine-tuned model is not yet
86
+ * available on HuggingFace (initial setup / first-time users).
87
+ */
88
+ export declare const FALLBACK_MODELS: Record<ModelTier, ModelInfo>;