@rigour-labs/core 4.2.2 → 4.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,6 +11,7 @@
11
11
  import { Gate } from './base.js';
12
12
  import { createProvider } from '../inference/index.js';
13
13
  import { extractFacts, factsToPromptString, chunkFacts, buildAnalysisPrompt, buildCrossFilePrompt, verifyFindings } from '../deep/index.js';
14
+ import { checkLocalPatterns } from '../storage/local-memory.js';
14
15
  import { Logger } from '../utils/logger.js';
15
16
  /** Max files to analyze before truncating (prevents OOM on huge repos) */
16
17
  const MAX_ANALYZABLE_FILES = 500;
@@ -62,9 +63,15 @@ export class DeepAnalysisGate extends Gate {
62
63
  const agentCount = this.config.options.agents || 1;
63
64
  const isCloud = !!this.config.options.apiKey;
64
65
  onProgress?.(` Found ${allFacts.length} files to analyze${agentCount > 1 ? ` with ${agentCount} parallel agents` : ''}.`);
66
+ // Step 1.5: Check local project memory for known patterns (instant, no LLM)
67
+ const fileList = allFacts.map(f => f.path).filter(Boolean);
68
+ const localFindings = checkLocalPatterns(context.cwd, fileList);
69
+ if (localFindings.length > 0) {
70
+ onProgress?.(` 🧠 Local memory: ${localFindings.length} known pattern(s) matched instantly.`);
71
+ }
65
72
  // Step 2: LLM interprets facts (in chunks)
66
73
  const chunks = chunkFacts(allFacts);
67
- const allFindings = [];
74
+ const allFindings = [...localFindings];
68
75
  let failedChunks = 0;
69
76
  if (agentCount > 1 && isCloud) {
70
77
  // ── Multi-agent mode: partition chunks across N agents, analyze in parallel ──
@@ -1,5 +1,6 @@
1
1
  import { SEVERITY_WEIGHTS } from '../types/index.js';
2
2
  import { DeepAnalysisGate } from './deep-analysis.js';
3
+ import { persistAndReinforce } from '../storage/local-memory.js';
3
4
  import { FileGate } from './file.js';
4
5
  import { ContentGate } from './content.js';
5
6
  import { StructureGate } from './structure.js';
@@ -254,7 +255,8 @@ export class GateRunner {
254
255
  break;
255
256
  }
256
257
  }
257
- return {
258
+ // Persist findings + reinforce patterns in local SQLite (fire-and-forget)
259
+ const report = {
258
260
  status,
259
261
  summary,
260
262
  failures,
@@ -269,5 +271,16 @@ export class GateRunner {
269
271
  ...(deepStats ? { deep: deepStats } : {}),
270
272
  },
271
273
  };
274
+ // Store findings + reinforce patterns in local SQLite (non-blocking)
275
+ try {
276
+ persistAndReinforce(cwd, report, deepStats ? {
277
+ deepTier: deepStats.tier,
278
+ deepModel: deepStats.model,
279
+ } : undefined);
280
+ }
281
+ catch {
282
+ // Silent — local memory is advisory, never blocks scans
283
+ }
284
+ return report;
272
285
  }
273
286
  }
package/dist/index.d.ts CHANGED
@@ -17,5 +17,7 @@ export type { InferenceProvider, DeepFinding, DeepAnalysisResult, ModelTier } fr
17
17
  export { MODELS } from './inference/types.js';
18
18
  export { isModelCached, getModelsDir, getModelInfo } from './inference/model-manager.js';
19
19
  export { extractFacts, factsToPromptString } from './deep/fact-extractor.js';
20
- export { openDatabase, isSQLiteAvailable, insertScan, insertFindings, getRecentScans, getScoreTrendFromDB, getTopIssues, reinforcePattern, getStrongPatterns } from './storage/index.js';
21
- export type { RigourDB } from './storage/index.js';
20
+ export { openDatabase, isSQLiteAvailable, compactDatabase, getDatabaseSize, resetDatabase, insertScan, insertFindings, getRecentScans, getScoreTrendFromDB, getTopIssues, reinforcePattern, getStrongPatterns } from './storage/index.js';
21
+ export type { RigourDB, CompactResult } from './storage/index.js';
22
+ export { checkLocalPatterns, persistAndReinforce, getProjectStats } from './storage/index.js';
23
+ export type { ProjectStats } from './storage/index.js';
package/dist/index.js CHANGED
@@ -18,7 +18,9 @@ export { MODELS } from './inference/types.js';
18
18
  export { isModelCached, getModelsDir, getModelInfo } from './inference/model-manager.js';
19
19
  export { extractFacts, factsToPromptString } from './deep/fact-extractor.js';
20
20
  // Storage (SQLite Brain)
21
- export { openDatabase, isSQLiteAvailable, insertScan, insertFindings, getRecentScans, getScoreTrendFromDB, getTopIssues, reinforcePattern, getStrongPatterns } from './storage/index.js';
21
+ export { openDatabase, isSQLiteAvailable, compactDatabase, getDatabaseSize, resetDatabase, insertScan, insertFindings, getRecentScans, getScoreTrendFromDB, getTopIssues, reinforcePattern, getStrongPatterns } from './storage/index.js';
22
+ // Local Project Memory (hybrid intelligence — SQLite-backed per-project learning)
23
+ export { checkLocalPatterns, persistAndReinforce, getProjectStats } from './storage/index.js';
22
24
  // Pattern Index is intentionally NOT exported here to prevent
23
25
  // native dependency issues (sharp/transformers) from leaking into
24
26
  // non-AI parts of the system.
@@ -2,11 +2,11 @@ import { type ModelTier, type ModelInfo } from './types.js';
2
2
  export declare function extractSha256FromEtag(etag: string | null): string | null;
3
3
  export declare function hashFileSha256(filePath: string): Promise<string>;
4
4
  /**
5
- * Check if a model is already downloaded and valid.
5
+ * Check if any model for this tier is cached (fine-tuned or fallback).
6
6
  */
7
7
  export declare function isModelCached(tier: ModelTier): Promise<boolean>;
8
8
  /**
9
- * Get the path to a cached model.
9
+ * Get the path to a cached model (prefers fine-tuned over fallback).
10
10
  */
11
11
  export declare function getModelPath(tier: ModelTier): string;
12
12
  /**
@@ -15,7 +15,7 @@ export declare function getModelPath(tier: ModelTier): string;
15
15
  export declare function getModelInfo(tier: ModelTier): ModelInfo;
16
16
  /**
17
17
  * Download a model from HuggingFace CDN.
18
- * Calls onProgress with status updates.
18
+ * Tries fine-tuned model first, falls back to stock Qwen if unavailable.
19
19
  */
20
20
  export declare function downloadModel(tier: ModelTier, onProgress?: (message: string, percent?: number) => void): Promise<string>;
21
21
  /**
@@ -6,11 +6,11 @@ import path from 'path';
6
6
  import fs from 'fs-extra';
7
7
  import { createHash } from 'crypto';
8
8
  import { RIGOUR_DIR } from '../storage/db.js';
9
- import { MODELS } from './types.js';
9
+ import { MODELS, FALLBACK_MODELS } from './types.js';
10
10
  const MODELS_DIR = path.join(RIGOUR_DIR, 'models');
11
11
  const SHA256_RE = /^[a-f0-9]{64}$/i;
12
- function getModelMetadataPath(tier) {
13
- return path.join(MODELS_DIR, MODELS[tier].filename + '.meta.json');
12
+ function getModelMetadataPath(filename) {
13
+ return path.join(MODELS_DIR, filename + '.meta.json');
14
14
  }
15
15
  function isValidMetadata(raw) {
16
16
  return !!raw &&
@@ -34,17 +34,15 @@ export async function hashFileSha256(filePath) {
34
34
  }
35
35
  return hash.digest('hex');
36
36
  }
37
- async function writeModelMetadata(tier, metadata) {
38
- const metadataPath = getModelMetadataPath(tier);
39
- await fs.writeJson(metadataPath, metadata, { spaces: 2 });
37
+ async function writeModelMeta(filename, metadata) {
38
+ await fs.writeJson(getModelMetadataPath(filename), metadata, { spaces: 2 });
40
39
  }
41
- async function readModelMetadata(tier) {
42
- const metadataPath = getModelMetadataPath(tier);
43
- if (!(await fs.pathExists(metadataPath))) {
40
+ async function readModelMeta(filename) {
41
+ const p = getModelMetadataPath(filename);
42
+ if (!(await fs.pathExists(p)))
44
43
  return null;
45
- }
46
44
  try {
47
- const raw = await fs.readJson(metadataPath);
45
+ const raw = await fs.readJson(p);
48
46
  return isValidMetadata(raw) ? raw : null;
49
47
  }
50
48
  catch {
@@ -52,17 +50,15 @@ async function readModelMetadata(tier) {
52
50
  }
53
51
  }
54
52
  /**
55
- * Check if a model is already downloaded and valid.
53
+ * Check if a single model file is cached and valid.
56
54
  */
57
- export async function isModelCached(tier) {
58
- const model = MODELS[tier];
55
+ async function isFileCached(model) {
59
56
  const modelPath = path.join(MODELS_DIR, model.filename);
60
57
  if (!(await fs.pathExists(modelPath)))
61
58
  return false;
62
- const metadata = await readModelMetadata(tier);
59
+ const metadata = await readModelMeta(model.filename);
63
60
  if (!metadata)
64
61
  return false;
65
- // Size check + "changed since verification" check.
66
62
  const stat = await fs.stat(modelPath);
67
63
  const tolerance = model.sizeBytes * 0.1;
68
64
  if (stat.size <= model.sizeBytes - tolerance)
@@ -74,10 +70,22 @@ export async function isModelCached(tier) {
74
70
  return true;
75
71
  }
76
72
  /**
77
- * Get the path to a cached model.
73
+ * Check if any model for this tier is cached (fine-tuned or fallback).
74
+ */
75
+ export async function isModelCached(tier) {
76
+ if (await isFileCached(MODELS[tier]))
77
+ return true;
78
+ const fb = FALLBACK_MODELS[tier];
79
+ return fb.url !== MODELS[tier].url && await isFileCached(fb);
80
+ }
81
+ /**
82
+ * Get the path to a cached model (prefers fine-tuned over fallback).
78
83
  */
79
84
  export function getModelPath(tier) {
80
- return path.join(MODELS_DIR, MODELS[tier].filename);
85
+ const primary = path.join(MODELS_DIR, MODELS[tier].filename);
86
+ if (fs.pathExistsSync(primary))
87
+ return primary;
88
+ return path.join(MODELS_DIR, FALLBACK_MODELS[tier].filename);
81
89
  }
82
90
  /**
83
91
  * Get model info for a tier.
@@ -86,73 +94,73 @@ export function getModelInfo(tier) {
86
94
  return MODELS[tier];
87
95
  }
88
96
  /**
89
- * Download a model from HuggingFace CDN.
90
- * Calls onProgress with status updates.
97
+ * Stream a response body to disk with progress + SHA256.
98
+ * Returns { sha256, downloaded } on success.
91
99
  */
92
- export async function downloadModel(tier, onProgress) {
93
- const model = MODELS[tier];
100
+ async function streamToDisk(response, tempPath, model, onProgress) {
101
+ const contentLength = parseInt(response.headers.get('content-length') || '0', 10);
102
+ const reader = response.body?.getReader();
103
+ if (!reader)
104
+ throw new Error('No response body');
105
+ const writeStream = fs.createWriteStream(tempPath);
106
+ const hash = createHash('sha256');
107
+ let downloaded = 0;
108
+ let lastPct = 0;
109
+ while (true) {
110
+ const { done, value } = await reader.read();
111
+ if (done)
112
+ break;
113
+ const chunk = Buffer.from(value);
114
+ writeStream.write(chunk);
115
+ hash.update(chunk);
116
+ downloaded += value.length;
117
+ if (contentLength > 0) {
118
+ const pct = Math.round((downloaded / contentLength) * 100);
119
+ if (pct >= lastPct + 5) {
120
+ lastPct = pct;
121
+ onProgress?.(`Downloading ${model.name}: ${pct}%`, pct);
122
+ }
123
+ }
124
+ }
125
+ writeStream.end();
126
+ await new Promise((resolve, reject) => {
127
+ writeStream.on('finish', resolve);
128
+ writeStream.on('error', reject);
129
+ });
130
+ return { sha256: hash.digest('hex'), downloaded };
131
+ }
132
+ /**
133
+ * Verify SHA256 against ETag, allowing LFS OID mismatches
134
+ * if the download size is reasonable.
135
+ */
136
+ function verifySha256(expectedSha256, actualSha256, downloaded, model) {
137
+ if (!expectedSha256 || actualSha256 === expectedSha256)
138
+ return;
139
+ const tolerance = model.sizeBytes * 0.1;
140
+ if (downloaded < model.sizeBytes - tolerance) {
141
+ throw new Error(`Checksum mismatch for ${model.name}: ` +
142
+ `expected ${expectedSha256}, got ${actualSha256} ` +
143
+ `(undersized: ${downloaded} bytes)`);
144
+ }
145
+ // Size OK — ETag likely a Git LFS OID, not content SHA256
146
+ }
147
+ /**
148
+ * Download a specific model from its URL, write to disk, save metadata.
149
+ */
150
+ async function downloadFromUrl(tier, model, onProgress) {
94
151
  const destPath = path.join(MODELS_DIR, model.filename);
95
152
  const tempPath = destPath + '.download';
96
- fs.ensureDirSync(MODELS_DIR);
97
- // Already cached
98
- if (await isModelCached(tier)) {
99
- onProgress?.(`Model ${model.name} already cached`, 100);
100
- return destPath;
101
- }
102
- onProgress?.(`Downloading ${model.name} (${model.sizeHuman})...`, 0);
103
153
  try {
104
154
  const response = await fetch(model.url);
105
155
  if (!response.ok) {
106
156
  throw new Error(`HTTP ${response.status}: ${response.statusText}`);
107
157
  }
108
- const expectedSha256 = extractSha256FromEtag(response.headers.get('etag'));
109
- const contentLength = parseInt(response.headers.get('content-length') || '0', 10);
110
- const reader = response.body?.getReader();
111
- if (!reader)
112
- throw new Error('No response body');
113
- const writeStream = fs.createWriteStream(tempPath);
114
- const hash = createHash('sha256');
115
- let downloaded = 0;
116
- let lastProgressPercent = 0;
117
- while (true) {
118
- const { done, value } = await reader.read();
119
- if (done)
120
- break;
121
- const chunk = Buffer.from(value);
122
- writeStream.write(chunk);
123
- hash.update(chunk);
124
- downloaded += value.length;
125
- if (contentLength > 0) {
126
- const percent = Math.round((downloaded / contentLength) * 100);
127
- if (percent >= lastProgressPercent + 5) { // Report every 5%
128
- lastProgressPercent = percent;
129
- onProgress?.(`Downloading ${model.name}: ${percent}%`, percent);
130
- }
131
- }
132
- }
133
- writeStream.end();
134
- await new Promise((resolve, reject) => {
135
- writeStream.on('finish', resolve);
136
- writeStream.on('error', reject);
137
- });
138
- const actualSha256 = hash.digest('hex');
139
- if (expectedSha256 && actualSha256 !== expectedSha256) {
140
- // HuggingFace ETags for LFS files may contain the Git LFS OID (pointer hash)
141
- // rather than the SHA256 of the actual served bytes. This is common when
142
- // CDN/Cloudfront serves the file. Only hard-fail if the download is also
143
- // suspiciously small (likely corrupt). Otherwise warn and proceed — the
144
- // actual content hash is still recorded in metadata for future verification.
145
- const tolerance = model.sizeBytes * 0.1;
146
- if (downloaded < model.sizeBytes - tolerance) {
147
- throw new Error(`Model checksum mismatch for ${model.name}: expected ${expectedSha256}, got ${actualSha256} (download also undersized: ${downloaded} bytes)`);
148
- }
149
- // Download size is reasonable — ETag likely a Git LFS OID, not content SHA256
150
- }
151
- // Atomic rename
158
+ const expectedSha = extractSha256FromEtag(response.headers.get('etag'));
159
+ const { sha256, downloaded } = await streamToDisk(response, tempPath, model, onProgress);
160
+ verifySha256(expectedSha, sha256, downloaded, model);
152
161
  fs.renameSync(tempPath, destPath);
153
- await writeModelMetadata(tier, {
154
- sha256: actualSha256,
155
- sizeBytes: downloaded,
162
+ await writeModelMeta(model.filename, {
163
+ sha256, sizeBytes: downloaded,
156
164
  verifiedAt: new Date().toISOString(),
157
165
  sourceUrl: model.url,
158
166
  sourceEtag: response.headers.get('etag') || undefined,
@@ -161,11 +169,35 @@ export async function downloadModel(tier, onProgress) {
161
169
  return destPath;
162
170
  }
163
171
  catch (error) {
164
- // Clean up temp file on failure
165
172
  fs.removeSync(tempPath);
166
173
  throw error;
167
174
  }
168
175
  }
176
+ /**
177
+ * Download a model from HuggingFace CDN.
178
+ * Tries fine-tuned model first, falls back to stock Qwen if unavailable.
179
+ */
180
+ export async function downloadModel(tier, onProgress) {
181
+ fs.ensureDirSync(MODELS_DIR);
182
+ if (await isModelCached(tier)) {
183
+ onProgress?.(`Model ${MODELS[tier].name} already cached`, 100);
184
+ return getModelPath(tier);
185
+ }
186
+ const model = MODELS[tier];
187
+ onProgress?.(`Downloading ${model.name} (${model.sizeHuman})...`, 0);
188
+ try {
189
+ return await downloadFromUrl(tier, model, onProgress);
190
+ }
191
+ catch (error) {
192
+ // Fine-tuned model not available — try stock fallback
193
+ const fallback = FALLBACK_MODELS[tier];
194
+ if (fallback && fallback.url !== model.url) {
195
+ onProgress?.(`Fine-tuned model unavailable, using ${fallback.name}`, 0);
196
+ return downloadFromUrl(tier, fallback, onProgress);
197
+ }
198
+ throw error;
199
+ }
200
+ }
169
201
  /**
170
202
  * Ensure a model is available, downloading if needed.
171
203
  */
@@ -73,5 +73,16 @@ export interface ModelInfo {
73
73
  sizeBytes: number;
74
74
  sizeHuman: string;
75
75
  }
76
+ /**
77
+ * Model version — bump when new fine-tuned GGUF is published.
78
+ * The RLAIF pipeline uploads new models to HuggingFace, and
79
+ * model-manager checks this version to auto-update.
80
+ */
81
+ export declare const MODEL_VERSION = "1";
76
82
  /** All supported model definitions */
77
83
  export declare const MODELS: Record<ModelTier, ModelInfo>;
84
+ /**
85
+ * Fallback stock models — used when fine-tuned model is not yet
86
+ * available on HuggingFace (initial setup / first-time users).
87
+ */
88
+ export declare const FALLBACK_MODELS: Record<ModelTier, ModelInfo>;
@@ -1,8 +1,36 @@
1
+ /**
2
+ * Model version — bump when new fine-tuned GGUF is published.
3
+ * The RLAIF pipeline uploads new models to HuggingFace, and
4
+ * model-manager checks this version to auto-update.
5
+ */
6
+ export const MODEL_VERSION = '1';
1
7
  /** All supported model definitions */
2
8
  export const MODELS = {
3
9
  deep: {
4
10
  tier: 'deep',
5
- name: 'Qwen2.5-Coder-0.5B-Instruct',
11
+ name: 'Rigour-Deep-v1 (Qwen2.5-Coder-0.5B fine-tuned)',
12
+ filename: `rigour-deep-v${MODEL_VERSION}-q4_k_m.gguf`,
13
+ url: `https://huggingface.co/rigour-labs/rigour-deep-v1-gguf/resolve/main/rigour-deep-v${MODEL_VERSION}-q4_k_m.gguf`,
14
+ sizeBytes: 350_000_000,
15
+ sizeHuman: '350MB',
16
+ },
17
+ pro: {
18
+ tier: 'pro',
19
+ name: 'Rigour-Pro-v1 (Qwen2.5-Coder-1.5B fine-tuned)',
20
+ filename: `rigour-pro-v${MODEL_VERSION}-q4_k_m.gguf`,
21
+ url: `https://huggingface.co/rigour-labs/rigour-pro-v1-gguf/resolve/main/rigour-pro-v${MODEL_VERSION}-q4_k_m.gguf`,
22
+ sizeBytes: 900_000_000,
23
+ sizeHuman: '900MB',
24
+ },
25
+ };
26
+ /**
27
+ * Fallback stock models — used when fine-tuned model is not yet
28
+ * available on HuggingFace (initial setup / first-time users).
29
+ */
30
+ export const FALLBACK_MODELS = {
31
+ deep: {
32
+ tier: 'deep',
33
+ name: 'Qwen2.5-Coder-0.5B-Instruct (stock)',
6
34
  filename: 'qwen2.5-coder-0.5b-instruct-q4_k_m.gguf',
7
35
  url: 'https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-0.5b-instruct-q4_k_m.gguf',
8
36
  sizeBytes: 350_000_000,
@@ -10,7 +38,7 @@ export const MODELS = {
10
38
  },
11
39
  pro: {
12
40
  tier: 'pro',
13
- name: 'Qwen2.5-Coder-1.5B-Instruct',
41
+ name: 'Qwen2.5-Coder-1.5B-Instruct (stock)',
14
42
  filename: 'qwen2.5-coder-1.5b-instruct-q4_k_m.gguf',
15
43
  url: 'https://huggingface.co/Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF/resolve/main/qwen2.5-coder-1.5b-instruct-q4_k_m.gguf',
16
44
  sizeBytes: 900_000_000,
@@ -9,6 +9,25 @@ export interface RigourDB {
9
9
  * Returns null if better-sqlite3 is not available.
10
10
  */
11
11
  export declare function openDatabase(dbPath?: string): RigourDB | null;
12
+ /**
13
+ * Compact the database — prune old data, reclaim disk space.
14
+ * Retention policy: keep last `retainDays` of findings, merge old patterns.
15
+ */
16
+ export declare function compactDatabase(retainDays?: number): CompactResult;
17
+ export interface CompactResult {
18
+ pruned: number;
19
+ patternsDecayed: number;
20
+ sizeBefore: number;
21
+ sizeAfter: number;
22
+ }
23
+ /**
24
+ * Get database file size in bytes. Returns 0 if DB doesn't exist.
25
+ */
26
+ export declare function getDatabaseSize(): number;
27
+ /**
28
+ * Reset the database — delete and recreate from scratch.
29
+ */
30
+ export declare function resetDatabase(): void;
12
31
  /**
13
32
  * Check if SQLite is available (better-sqlite3 installed)
14
33
  */
@@ -26,7 +26,15 @@ function loadDatabase() {
26
26
  }
27
27
  const RIGOUR_DIR = path.join(os.homedir(), '.rigour');
28
28
  const DB_PATH = path.join(RIGOUR_DIR, 'rigour.db');
29
+ /** Current schema version — bump when adding migrations. */
30
+ const SCHEMA_VERSION = 2;
29
31
  const SCHEMA_SQL = `
32
+ -- Schema version tracking
33
+ CREATE TABLE IF NOT EXISTS meta (
34
+ key TEXT PRIMARY KEY,
35
+ value TEXT NOT NULL
36
+ );
37
+
30
38
  -- Every scan result, forever
31
39
  CREATE TABLE IF NOT EXISTS scans (
32
40
  id TEXT PRIMARY KEY,
@@ -114,8 +122,9 @@ export function openDatabase(dbPath) {
114
122
  // WAL mode for better concurrent read performance
115
123
  db.pragma('journal_mode = WAL');
116
124
  db.pragma('foreign_keys = ON');
117
- // Run schema migration
125
+ // Run schema creation + migrations
118
126
  db.exec(SCHEMA_SQL);
127
+ runMigrations(db);
119
128
  return {
120
129
  db,
121
130
  close() {
@@ -123,6 +132,84 @@ export function openDatabase(dbPath) {
123
132
  },
124
133
  };
125
134
  }
135
+ /**
136
+ * Run incremental schema migrations based on stored version.
137
+ */
138
+ function runMigrations(db) {
139
+ const row = db.prepare("SELECT value FROM meta WHERE key = 'schema_version'").get();
140
+ const current = row ? parseInt(row.value, 10) : 0;
141
+ if (current < 1) {
142
+ // v1: base schema (already created by SCHEMA_SQL)
143
+ db.prepare("INSERT OR REPLACE INTO meta (key, value) VALUES ('schema_version', '1')").run();
144
+ }
145
+ if (current < 2) {
146
+ // v2: retention indexes for compaction queries
147
+ db.exec(`
148
+ CREATE INDEX IF NOT EXISTS idx_findings_file ON findings(file);
149
+ CREATE INDEX IF NOT EXISTS idx_scans_repo_ts ON scans(repo, timestamp);
150
+ `);
151
+ db.prepare("INSERT OR REPLACE INTO meta (key, value) VALUES ('schema_version', '2')").run();
152
+ }
153
+ // Future: if (current < 3) { ... ALTER TABLE ... }
154
+ }
155
+ /**
156
+ * Compact the database — prune old data, reclaim disk space.
157
+ * Retention policy: keep last `retainDays` of findings, merge old patterns.
158
+ */
159
+ export function compactDatabase(retainDays = 90) {
160
+ const Db = loadDatabase();
161
+ if (!Db)
162
+ return { pruned: 0, patternsDecayed: 0, sizeBefore: 0, sizeAfter: 0 };
163
+ const resolvedPath = DB_PATH;
164
+ const sizeBefore = fs.existsSync(resolvedPath) ? fs.statSync(resolvedPath).size : 0;
165
+ const db = new Db(resolvedPath);
166
+ db.pragma('journal_mode = WAL');
167
+ const cutoff = Date.now() - (retainDays * 24 * 60 * 60 * 1000);
168
+ let pruned = 0;
169
+ let patternsDecayed = 0;
170
+ try {
171
+ db.transaction(() => {
172
+ // 1. Delete old findings (keep scan records for trend lines)
173
+ const r1 = db.prepare(`
174
+ DELETE FROM findings WHERE scan_id IN (
175
+ SELECT id FROM scans WHERE timestamp < ?
176
+ )
177
+ `).run(cutoff);
178
+ pruned += r1.changes;
179
+ // 2. Prune weak patterns (never grew, seen < 3 times)
180
+ const r2 = db.prepare("DELETE FROM patterns WHERE strength < 0.3 AND times_seen < 3").run();
181
+ patternsDecayed += r2.changes;
182
+ // 3. Prune orphaned feedback
183
+ db.prepare("DELETE FROM feedback WHERE finding_id NOT IN (SELECT id FROM findings)").run();
184
+ // 4. Prune old codebase index entries
185
+ db.prepare("DELETE FROM codebase WHERE last_indexed < ?").run(cutoff);
186
+ })();
187
+ // 5. Reclaim disk space
188
+ db.exec('VACUUM');
189
+ }
190
+ finally {
191
+ db.close();
192
+ }
193
+ const sizeAfter = fs.existsSync(resolvedPath) ? fs.statSync(resolvedPath).size : 0;
194
+ return { pruned, patternsDecayed, sizeBefore, sizeAfter };
195
+ }
196
+ /**
197
+ * Get database file size in bytes. Returns 0 if DB doesn't exist.
198
+ */
199
+ export function getDatabaseSize() {
200
+ return fs.existsSync(DB_PATH) ? fs.statSync(DB_PATH).size : 0;
201
+ }
202
+ /**
203
+ * Reset the database — delete and recreate from scratch.
204
+ */
205
+ export function resetDatabase() {
206
+ if (fs.existsSync(DB_PATH))
207
+ fs.removeSync(DB_PATH);
208
+ if (fs.existsSync(DB_PATH + '-wal'))
209
+ fs.removeSync(DB_PATH + '-wal');
210
+ if (fs.existsSync(DB_PATH + '-shm'))
211
+ fs.removeSync(DB_PATH + '-shm');
212
+ }
126
213
  /**
127
214
  * Check if SQLite is available (better-sqlite3 installed)
128
215
  */
@@ -9,6 +9,7 @@ export declare function insertFindings(store: RigourDB, scanId: string, failures
9
9
  */
10
10
  export declare function getFindingsForScan(store: RigourDB, scanId: string): any[];
11
11
  /**
12
- * Get all deep analysis findings for a repo.
12
+ * Get deep analysis and high-confidence AST findings for a repo.
13
+ * Used by local memory to match known patterns against new scans.
13
14
  */
14
15
  export declare function getDeepFindings(store: RigourDB, repo: string, limit?: number): any[];
@@ -25,13 +25,14 @@ export function getFindingsForScan(store, scanId) {
25
25
  return stmt.all(scanId);
26
26
  }
27
27
  /**
28
- * Get all deep analysis findings for a repo.
28
+ * Get deep analysis and high-confidence AST findings for a repo.
29
+ * Used by local memory to match known patterns against new scans.
29
30
  */
30
31
  export function getDeepFindings(store, repo, limit = 50) {
31
32
  const stmt = store.db.prepare(`
32
33
  SELECT f.* FROM findings f
33
34
  JOIN scans s ON f.scan_id = s.id
34
- WHERE s.repo = ? AND f.source = 'llm'
35
+ WHERE s.repo = ? AND (f.source = 'llm' OR f.source = 'hybrid' OR f.confidence >= 0.7)
35
36
  ORDER BY f.confidence DESC LIMIT ?
36
37
  `);
37
38
  return stmt.all(repo, limit);
@@ -2,8 +2,10 @@
2
2
  * Rigour Brain — SQLite storage layer.
3
3
  * Everything in one file: ~/.rigour/rigour.db
4
4
  */
5
- export { openDatabase, isSQLiteAvailable, RIGOUR_DIR, DB_PATH } from './db.js';
6
- export type { RigourDB } from './db.js';
5
+ export { openDatabase, isSQLiteAvailable, compactDatabase, getDatabaseSize, resetDatabase, RIGOUR_DIR, DB_PATH } from './db.js';
6
+ export type { RigourDB, CompactResult } from './db.js';
7
7
  export { insertScan, getRecentScans, getScoreTrendFromDB, getTopIssues } from './scans.js';
8
8
  export { insertFindings, getFindingsForScan, getDeepFindings } from './findings.js';
9
9
  export { reinforcePattern, decayPatterns, getStrongPatterns, getPatterns, getHardRules } from './patterns.js';
10
+ export { checkLocalPatterns, persistAndReinforce, getProjectStats } from './local-memory.js';
11
+ export type { ProjectStats } from './local-memory.js';
@@ -2,7 +2,8 @@
2
2
  * Rigour Brain — SQLite storage layer.
3
3
  * Everything in one file: ~/.rigour/rigour.db
4
4
  */
5
- export { openDatabase, isSQLiteAvailable, RIGOUR_DIR, DB_PATH } from './db.js';
5
+ export { openDatabase, isSQLiteAvailable, compactDatabase, getDatabaseSize, resetDatabase, RIGOUR_DIR, DB_PATH } from './db.js';
6
6
  export { insertScan, getRecentScans, getScoreTrendFromDB, getTopIssues } from './scans.js';
7
7
  export { insertFindings, getFindingsForScan, getDeepFindings } from './findings.js';
8
8
  export { reinforcePattern, decayPatterns, getStrongPatterns, getPatterns, getHardRules } from './patterns.js';
9
+ export { checkLocalPatterns, persistAndReinforce, getProjectStats } from './local-memory.js';
@@ -0,0 +1,37 @@
1
+ import type { Failure } from '../types/index.js';
2
+ import type { DeepFinding } from '../inference/types.js';
3
+ /**
4
+ * Pre-scan: check local SQLite for known patterns → produce instant findings.
5
+ * Returns findings that match known project patterns, WITHOUT model inference.
6
+ *
7
+ * @param cwd - Absolute project root path
8
+ * @param fileList - Relative file paths (from FileFacts.path or globby)
9
+ */
10
+ export declare function checkLocalPatterns(cwd: string, fileList: string[]): DeepFinding[];
11
+ /**
12
+ * Post-scan: persist findings and reinforce patterns.
13
+ * Wrapped in a single transaction for atomicity.
14
+ * Called after every scan (check, scan, scan --deep).
15
+ */
16
+ export declare function persistAndReinforce(cwd: string, report: {
17
+ status: string;
18
+ failures: Failure[];
19
+ stats: any;
20
+ }, meta?: {
21
+ deepTier?: string;
22
+ deepModel?: string;
23
+ }): void;
24
+ /**
25
+ * Get project learning stats (for display in scan output).
26
+ */
27
+ export declare function getProjectStats(cwd: string): ProjectStats | null;
28
+ export interface ProjectStats {
29
+ totalScans: number;
30
+ learnedPatterns: number;
31
+ hardRules: number;
32
+ topPatterns: Array<{
33
+ name: string;
34
+ strength: number;
35
+ timesSeen: number;
36
+ }>;
37
+ }
@@ -0,0 +1,153 @@
1
+ /**
2
+ * Local Project Memory — the hybrid intelligence layer.
3
+ *
4
+ * Before deep scan: checks local SQLite for known patterns in this project.
5
+ * After any scan: stores verified findings and reinforces patterns.
6
+ * Result: Rigour gets smarter every time you use it, code never leaves the machine.
7
+ */
8
+ import path from 'path';
9
+ import { openDatabase } from './db.js';
10
+ import { insertScan, getRecentScans } from './scans.js';
11
+ import { insertFindings, getDeepFindings } from './findings.js';
12
+ import { reinforcePattern, getStrongPatterns, getHardRules, decayPatterns } from './patterns.js';
13
+ import { Logger } from '../utils/logger.js';
14
+ /** Minimum pattern strength to produce instant findings (skip LLM). */
15
+ const INSTANT_MATCH_THRESHOLD = 0.7;
16
+ /** Max local findings to inject per scan (avoid flooding). */
17
+ const MAX_LOCAL_FINDINGS = 15;
18
+ /** Min confidence to reuse a past finding from local memory. */
19
+ const MIN_REUSE_CONFIDENCE = 0.7;
20
+ /**
21
+ * Build a map of relative file path → known issues from past findings.
22
+ */
23
+ function buildFileIssueMap(recentFindings) {
24
+ const map = new Map();
25
+ for (const f of recentFindings) {
26
+ const existing = map.get(f.file) || [];
27
+ existing.push(f);
28
+ map.set(f.file, existing);
29
+ }
30
+ return map;
31
+ }
32
+ /**
33
+ * Pre-scan: check local SQLite for known patterns → produce instant findings.
34
+ * Returns findings that match known project patterns, WITHOUT model inference.
35
+ *
36
+ * @param cwd - Absolute project root path
37
+ * @param fileList - Relative file paths (from FileFacts.path or globby)
38
+ */
39
+ export function checkLocalPatterns(cwd, fileList) {
40
+ const db = openDatabase();
41
+ if (!db)
42
+ return [];
43
+ const repoName = path.basename(cwd);
44
+ const findings = [];
45
+ try {
46
+ const patterns = getStrongPatterns(db, repoName, INSTANT_MATCH_THRESHOLD);
47
+ if (patterns.length === 0)
48
+ return [];
49
+ // Include both AST and LLM findings from history
50
+ const recentFindings = getDeepFindings(db, repoName, 200);
51
+ if (recentFindings.length === 0)
52
+ return [];
53
+ const fileIssueMap = buildFileIssueMap(recentFindings);
54
+ // fileList contains relative paths — match directly against DB (also relative)
55
+ for (const relPath of fileList) {
56
+ const known = fileIssueMap.get(relPath);
57
+ if (!known)
58
+ continue;
59
+ for (const issue of known) {
60
+ if ((issue.confidence ?? 0) < MIN_REUSE_CONFIDENCE)
61
+ continue;
62
+ const matchingPattern = patterns.find((p) => p.pattern === issue.category && p.strength >= INSTANT_MATCH_THRESHOLD);
63
+ if (!matchingPattern)
64
+ continue;
65
+ findings.push({
66
+ category: issue.category,
67
+ severity: issue.severity || 'medium',
68
+ file: relPath,
69
+ line: issue.line ?? undefined,
70
+ description: `[Local Memory] ${issue.description}`,
71
+ suggestion: issue.suggestion || 'Review this known issue.',
72
+ confidence: Math.min(matchingPattern.strength, issue.confidence ?? 0.5),
73
+ });
74
+ if (findings.length >= MAX_LOCAL_FINDINGS)
75
+ break;
76
+ }
77
+ if (findings.length >= MAX_LOCAL_FINDINGS)
78
+ break;
79
+ }
80
+ }
81
+ catch (error) {
82
+ Logger.warn(`Local memory check failed: ${error}`);
83
+ }
84
+ finally {
85
+ db?.db.close();
86
+ }
87
+ return findings;
88
+ }
89
+ /**
90
+ * Post-scan: persist findings and reinforce patterns.
91
+ * Wrapped in a single transaction for atomicity.
92
+ * Called after every scan (check, scan, scan --deep).
93
+ */
94
+ export function persistAndReinforce(cwd, report, meta) {
95
+ const db = openDatabase();
96
+ if (!db)
97
+ return;
98
+ const repoName = path.basename(cwd);
99
+ try {
100
+ // Wrap all writes in a single transaction for atomicity
101
+ const persist = db.db.transaction(() => {
102
+ const scanId = insertScan(db, repoName, report, meta);
103
+ if (report.failures.length > 0) {
104
+ insertFindings(db, scanId, report.failures);
105
+ }
106
+ for (const f of report.failures) {
107
+ const category = f.category || f.id;
108
+ const source = f.source === 'llm' ? 'llm' : 'ast';
109
+ reinforcePattern(db, repoName, category, `${f.title}: ${f.details?.substring(0, 120)}`, source);
110
+ }
111
+ decayPatterns(db, 30);
112
+ });
113
+ persist();
114
+ Logger.info(`Local memory: stored ${report.failures.length} findings, ` +
115
+ `reinforced ${report.failures.length} patterns for ${repoName}`);
116
+ }
117
+ catch (error) {
118
+ Logger.warn(`Local memory persist failed: ${error}`);
119
+ }
120
+ finally {
121
+ db?.db.close();
122
+ }
123
+ }
124
+ /**
125
+ * Get project learning stats (for display in scan output).
126
+ */
127
+ export function getProjectStats(cwd) {
128
+ const db = openDatabase();
129
+ if (!db)
130
+ return null;
131
+ const repoName = path.basename(cwd);
132
+ try {
133
+ const scans = getRecentScans(db, repoName, 100);
134
+ const patterns = getStrongPatterns(db, repoName, 0.3);
135
+ const hardRules = getHardRules(db, repoName);
136
+ return {
137
+ totalScans: scans.length,
138
+ learnedPatterns: patterns.length,
139
+ hardRules: hardRules.length,
140
+ topPatterns: patterns.slice(0, 5).map(p => ({
141
+ name: p.pattern,
142
+ strength: p.strength,
143
+ timesSeen: p.times_seen,
144
+ })),
145
+ };
146
+ }
147
+ catch {
148
+ return null;
149
+ }
150
+ finally {
151
+ db?.db.close();
152
+ }
153
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rigour-labs/core",
3
- "version": "4.2.2",
3
+ "version": "4.3.0",
4
4
  "description": "Deterministic quality gate engine for AI-generated code. AST analysis, drift detection, and Fix Packet generation across TypeScript, JavaScript, Python, Go, Ruby, and C#.",
5
5
  "license": "MIT",
6
6
  "homepage": "https://rigour.run",
@@ -59,11 +59,11 @@
59
59
  "@xenova/transformers": "^2.17.2",
60
60
  "better-sqlite3": "^11.0.0",
61
61
  "openai": "^4.104.0",
62
- "@rigour-labs/brain-darwin-arm64": "4.2.2",
63
- "@rigour-labs/brain-linux-arm64": "4.2.2",
64
- "@rigour-labs/brain-linux-x64": "4.2.2",
65
- "@rigour-labs/brain-win-x64": "4.2.2",
66
- "@rigour-labs/brain-darwin-x64": "4.2.2"
62
+ "@rigour-labs/brain-linux-arm64": "4.3.0",
63
+ "@rigour-labs/brain-linux-x64": "4.3.0",
64
+ "@rigour-labs/brain-darwin-x64": "4.3.0",
65
+ "@rigour-labs/brain-win-x64": "4.3.0",
66
+ "@rigour-labs/brain-darwin-arm64": "4.3.0"
67
67
  },
68
68
  "devDependencies": {
69
69
  "@types/better-sqlite3": "^7.6.12",