npm - @antodevs/groundtruth - Versions diffs - 0.2.5 → 0.3.1 - Mend

@antodevs/groundtruth 0.2.5 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/index.js CHANGED Viewed

@@ -3,6 +3,7 @@
  * @module index
  * @description Entry point runtime groundtruth delegazione CLI o proxy flow logic.
  */
+import './src/http-agent.js';
 import { chalk, label } from './src/logger.js';
 import { usePackageJson, antigravityMode, claudeCodeMode, uninstallMode, port, intervalMinutes, batchSize, version } from './src/cli.js';
 import { createServer } from './src/proxy.js';

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@antodevs/groundtruth",
-  "version": "0.2.5",
+  "version": "0.3.1",
   "description": "Lightweight Node.js proxy to intercept API requests from coding agents and inject fresh web context",
   "publishConfig": {
     "access": "public"
@@ -30,7 +30,6 @@
     "index.js",
     "src/",
     "assets/",
-    "specification.yaml",
     "README.md",
     "LICENSE"
   ],
@@ -44,10 +43,9 @@
     "release:minor": "git add . && git commit -m \"chore: auto-commit before release\" || true && npm version minor && git push origin main --tags && npm publish"
   },
   "dependencies": {
-    "@mozilla/readability": "^0.5.0",
+    "@mozilla/readability": "^0.6.0",
     "chalk": "^5.3.0",
-    "cheerio": "^1.0.0",
-    "linkedom": "^0.18.12",
-    "node-fetch": "^3.3.2"
+    "cheerio": "^1.2.0",
+    "linkedom": "^0.18.12"
   }
 }

package/src/cache.js CHANGED Viewed

@@ -105,3 +105,5 @@ class LRUCache {
 // Esporta singola cache istanza module globale (singleton per memory context node)
 export const searchCache = new LRUCache({ max: 500, ttl: 5 * 60 * 1000 });
+export { LRUCache };

package/src/circuit-breaker.js CHANGED Viewed

@@ -13,6 +13,7 @@ export class CircuitBreaker {
         this.state = 'CLOSED'; // CLOSED, OPEN, HALF_OPEN
         this.failures = 0;
         this.lastFailureTime = null;
+        this.halfOpenSuccesses = 0;
     }
     /**
@@ -57,6 +58,7 @@ export class CircuitBreaker {
         // 429 rate limit apre il circuito immediatamente
         if (err?.message?.includes('429')) {
             this.failures = this.failureThreshold;
+            this.state = 'OPEN';
         } else {
             this.failures++;
         }

package/src/config.js CHANGED Viewed

@@ -45,7 +45,7 @@ export async function loadConfig() {
         const parsed = JSON.parse(raw);
         return {
-            maxTokens: clamp(parsed.maxTokens ?? DEFAULTS.maxTokens, 500, 8000),
+            maxTokens: clamp(parsed.maxTokens || DEFAULTS.maxTokens, 500, 8000),
             quality: ['low', 'medium', 'high'].includes(parsed.quality) ? parsed.quality : DEFAULTS.quality,
             verbose: typeof parsed.verbose === 'boolean' ? parsed.verbose : DEFAULTS.verbose,
             sources: Array.isArray(parsed.sources) ? parsed.sources.filter(s => s && s.url) : DEFAULTS.sources,

package/src/env.js CHANGED Viewed

@@ -23,13 +23,12 @@ export async function autoSetEnv(p) {
         if (process.env.ANTHROPIC_BASE_URL === targetUrl) return;
         const homeDir = os.homedir();
-        // Test exist pattern specifico shell config di fish locale
-        const isFish = process.env.SHELL?.includes('fish') || existsSync(`${homeDir}/.config/fish/config.fish`);
+        const fishConfigFile = path.join(homeDir, '.config', 'fish', 'config.fish');
+        const isFish = process.env.SHELL?.includes('fish') || existsSync(fishConfigFile);
         let foundAny = false;
         const modifiedFiles = [];
         if (isFish) {
-            const fishConfigFile = path.join(homeDir, '.config', 'fish', 'config.fish');
             await fs.mkdir(path.dirname(fishConfigFile), { recursive: true });
             foundAny = true;
             try {

package/src/http-agent.js CHANGED Viewed

@@ -1,21 +1,20 @@
-/**
- * @module http-agent
- * @description Pool manager per connessioni API http e requests HTTPS in proxy context.
- */
+import { Agent, setGlobalDispatcher } from 'undici';
 import { Agent as HttpsAgent } from 'https';
 import { Agent as HttpAgent } from 'http';
-// Evita timeout TCP handshakes costanti per network node-fetch requests proxy target
-export const httpsAgent = new HttpsAgent({
-    keepAlive: true,
-    maxSockets: 10,
-    maxFreeSockets: 5,
-    timeout: 5000,
+const globalAgent = new Agent({
+    keepAliveTimeout: 20 * 1000,
+    keepAliveMaxTimeout: 60 * 1000,
+    connections: 10,
+    pipelining: 1,
 });
+setGlobalDispatcher(globalAgent);
+export { globalAgent };
+export const httpsAgent = new HttpsAgent({
+    keepAlive: true, maxSockets: 10, maxFreeSockets: 5, timeout: 5000,
+});
 export const httpAgent = new HttpAgent({
-    keepAlive: true,
-    maxSockets: 10,
-    maxFreeSockets: 5,
-    timeout: 5000,
+    keepAlive: true, maxSockets: 10, maxFreeSockets: 5, timeout: 5000,
 });

package/src/inject.js CHANGED Viewed

@@ -9,6 +9,14 @@ import os from 'os';
 import { chalk, log, LOG_WARN, LOG_REFRESH } from './logger.js';
 import { atomicWrite } from './utils/atomic-write.js';
+const fileLocks = new Map();
+async function withFileLock(filePath, fn) {
+    const previous = fileLocks.get(filePath) || Promise.resolve();
+    const next = previous.then(fn, fn);
+    fileLocks.set(filePath, next.catch(() => { }));
+    return next;
+}
 // ─── Document injection rules ────────────────────────
 /**
@@ -19,24 +27,30 @@ import { atomicWrite } from './utils/atomic-write.js';
  * @returns {Promise<void>}
  */
 export async function injectBlock(filePath, content, blockId) {
-    let fileContent = '';
-    if (existsSync(filePath)) {
-        fileContent = await fs.readFile(filePath, 'utf8');
-    }
-    const startTag = `<!-- groundtruth:block-${blockId}:start -->`;
-    const endTag = `<!-- groundtruth:block-${blockId}:end -->`;
-    const block = `${startTag}\n${content.trim()}\n${endTag}`;
+    return withFileLock(filePath, async () => {
+        let fileContent = '';
+        if (existsSync(filePath)) {
+            fileContent = await fs.readFile(filePath, 'utf8');
+        }
+        const startTag = `<!-- groundtruth:block-${blockId}:start -->`;
+        const endTag = `<!-- groundtruth:block-${blockId}:end -->`;
+        const block = `${startTag}\n${content.trim()}\n${endTag}`;
-    const startIndex = fileContent.indexOf(startTag);
-    const endIndex = fileContent.indexOf(endTag);
+        const startIndex = fileContent.indexOf(startTag);
+        const endIndex = fileContent.indexOf(endTag);
-    if (startIndex !== -1 && endIndex !== -1 && endIndex > startIndex) {
-        fileContent = fileContent.slice(0, startIndex) + block + fileContent.slice(endIndex + endTag.length);
-    } else {
-        fileContent = fileContent.trimEnd() + '\n\n' + block + '\n';
-    }
+        if (startIndex !== -1 && endIndex !== -1 && endIndex > startIndex) {
+            // Sostituisce il blocco esistente mantenendo il resto del file intatto
+            const before = fileContent.slice(0, startIndex);
+            const after = fileContent.slice(endIndex + endTag.length);
+            fileContent = before + block + after;
+        } else {
+            // Aggiunge in fondo se non esiste
+            fileContent = fileContent.trimEnd() + '\n\n' + block + '\n';
+        }
-    await atomicWrite(filePath, fileContent);
+        await atomicWrite(filePath, fileContent);
+    });
 }
 /**
@@ -48,7 +62,7 @@ export async function injectBlock(filePath, content, blockId) {
 export async function removeStaleBlocks(filePath, activeBlockIds) {
     if (!existsSync(filePath)) return;
     let fileContent = await fs.readFile(filePath, 'utf8');
-    const regex = /<!-- groundtruth:block-(\w+):start -->[\s\S]*?<!-- groundtruth:block-\w+:end -->/g;
+    const regex = /<!-- groundtruth:block-(\w+):start -->[\s\S]*?<!-- groundtruth:block-\1:end -->/g;
     let modified = false;
     fileContent = fileContent.replace(regex, (match, blockId) => {

package/src/packages.js CHANGED Viewed

@@ -23,24 +23,36 @@ export async function readPackageDeps() {
         }
         const pkg = JSON.parse(await fs.readFile(pkgPath, 'utf8'));
-        const excludeList = ["plugin", "adapter", "check", "eslint", "prettier", "vite", "rollup", "webpack", "babel"];
+        const EXACT_EXCLUDE = new Set(['eslint', 'prettier', 'vite', 'rollup', 'webpack', 'babel', 'turbo', 'esbuild']);
+        const SUBSTR_EXCLUDE = ['plugin', 'adapter', '-check', 'lint-staged'];
         const filterAndFormat = (depsObj) => {
             if (!depsObj) return [];
             return Object.entries(depsObj)
-                .filter(([n]) => !excludeList.some(ex => n.toLowerCase().includes(ex)))
+                .filter(([n]) => {
+                    const lower = n.toLowerCase();
+                    const base = lower.startsWith('@') ? lower.split('/')[1] : lower;
+                    if (EXACT_EXCLUDE.has(base)) return false;
+                    if (SUBSTR_EXCLUDE.some(ex => lower.includes(ex))) return false;
+                    return true;
+                })
                 .map(([n, v]) => {
                     let cleanName = n;
                     if (n === '@sveltejs/kit') cleanName = 'sveltekit';
-                    else if (n.startsWith('@')) cleanName = n.split('/')[1];
                     let cleanVersion = String(v).replace(/[\^~>=<]/g, '').split('.').slice(0, 2).join('.');
                     return `${cleanName} ${cleanVersion}`;
                 });
         };
-        let selected = filterAndFormat(pkg.dependencies);
-        selected = selected.concat(filterAndFormat(pkg.devDependencies));
+        const depMap = new Map();
+        for (const [n, v] of Object.entries(pkg.dependencies || {})) {
+            depMap.set(n, v);
+        }
+        for (const [n, v] of Object.entries(pkg.devDependencies || {})) {
+            if (!depMap.has(n)) depMap.set(n, v);
+        }
+        const selected = filterAndFormat(Object.fromEntries(depMap));
         return selected.length > 0 ? selected : null;
     } catch (err) {
         log(LOG_WARN, chalk.yellow, chalk.white('package.json parse error') + `  →  ${chalk.yellow(err.message)}`);

package/src/proxy.js CHANGED Viewed

@@ -9,6 +9,8 @@ import { readPackageDeps, buildQuery } from './packages.js';
 import { chalk, log, LOG_WARN, LOG_BOLT } from './logger.js';
 import { httpsAgent } from './http-agent.js';
 import { sanitizeWebContent } from './sanitize.js';
+import { watch } from 'fs';
+import path from 'path';
 import { maxTokens, qualitySettings, verbose } from './cli.js';
 // ─── HTTP Node server daemon ─────────────────────────
@@ -20,12 +22,32 @@ import { maxTokens, qualitySettings, verbose } from './cli.js';
  */
 export async function createServer(usePackageJson) {
     let packageQueryCache = null;
+    let cacheStale = true;
     if (usePackageJson) {
         const depEntries = await readPackageDeps();
-        if (depEntries) packageQueryCache = buildQuery(depEntries);
+        if (depEntries) {
+            packageQueryCache = buildQuery(depEntries);
+            cacheStale = false;
+        }
+        const pkgPath = path.resolve(process.cwd(), 'package.json');
+        try {
+            watch(pkgPath, { persistent: false }, () => {
+                cacheStale = true;
+                log(LOG_REFRESH, chalk.cyan, chalk.white('package.json changed — cache invalidated'));
+            });
+        } catch (_) { }
     }
     const server = http.createServer(async (req, res) => {
+        if (usePackageJson && cacheStale) {
+            const depEntries = await readPackageDeps();
+            if (depEntries) {
+                packageQueryCache = buildQuery(depEntries);
+                cacheStale = false;
+            }
+        }
         if (req.method !== 'POST') { res.writeHead(404); res.end(); return; }
         let protocol = null;
@@ -95,7 +117,7 @@ export async function createServer(usePackageJson) {
             try {
                 if (!query || query.trim() === String(new Date().getFullYear())) throw new Error('Empty query');
                 // parallel load in proxy app process to boost response load
-                const { results, pageText } = await webSearch(query, true, {
+                const { results, pageText } = await webSearch(query, false, {
                     ddgResults: qualitySettings.ddgResults,
                     maxLen: qualitySettings.charsPerPage,
                     jinaTimeout: qualitySettings.jinaTimeout,
@@ -155,7 +177,13 @@ export async function createServer(usePackageJson) {
             headers['content-length'] = Buffer.byteLength(reqBodyStr);
             const proxyReq = https.request(targetUrl, { method: req.method, headers, agent: httpsAgent }, (proxyRes) => {
-                res.writeHead(proxyRes.statusCode, proxyRes.headers);
+                const responseHeaders = { ...proxyRes.headers };
+                delete responseHeaders['content-security-policy'];
+                delete responseHeaders['x-content-type-options'];
+                delete responseHeaders['content-encoding'];
+                delete responseHeaders['content-length'];
+                res.writeHead(proxyRes.statusCode, responseHeaders);
                 proxyRes.pipe(res);
             });
             proxyReq.on('error', () => { if (!res.headersSent) { res.writeHead(502); res.end('Bad Gateway'); } });

package/src/registry.js CHANGED Viewed

@@ -2,12 +2,13 @@
  * @module registry
  * @description Interroga il Cloudflare Worker (Remote Registry) per risolvere URL docs ufficiali.
  */
-import fetch from 'node-fetch';
+import { LRUCache } from './cache.js';
 const REGISTRY_API_URL = 'https://groundtruth-registry.antony-flex01.workers.dev/lookup';
-// Cache in memoria per evitare query multiple allo stesso endpoint durante lo stesso run del watcher
-const lookupCache = new Map();
+// Cache in memoria con LRU per evitare query multiple allo stesso endpoint
+const registryCache = new LRUCache({ max: 1000, ttl: 60 * 60 * 1000 });
 /**
  * @description Interroga asincronamente l'API cloudflare per cercare URL docs nel registry remoto
@@ -18,38 +19,42 @@ export async function lookupRegistryUrl(depName) {
     if (!depName) return null;
     // Normalizzazione preventiva
-    const name = depName.split(' ')[0].toLowerCase().trim();
+    let name = depName.split(' ')[0].toLowerCase().trim();
+    // Alias mapping per framework comuni con scope npm
+    if (name === '@sveltejs/kit') name = 'sveltekit';
     // Check hit in memoria (ritorna subito)
-    if (lookupCache.has(name)) {
-        return lookupCache.get(name);
-    }
+    const cached = registryCache.get(name);
+    if (cached !== undefined) return cached;
     try {
         // Fetch asincrono con timeout stretto per evitare latenze di fallback
         const res = await fetch(`${REGISTRY_API_URL}?pkg=${encodeURIComponent(name)}`, {
-            signal: AbortSignal.timeout(1500), // Max 1.5s aspetta il Cloudflare worker
+            signal: AbortSignal.timeout(1500),
             headers: {
-                'Accept': 'application/json'
+                'Accept': 'application/json',
+                'X-GroundTruth-Key': process.env.GROUNDTRUTH_REGISTRY_KEY || ''
             }
         });
         if (res.ok) {
             const data = await res.json();
             if (data && data.found && data.url) {
-                lookupCache.set(name, data.url); // Cache hit success
+                registryCache.set(name, data.url); // Cache hit success
                 return data.url;
             }
         }
         // Se l'API restituisce 404/not found
-        lookupCache.set(name, null); // Cache negative (così non rifacciamo network)
+        registryCache.set(name, null); // Cache negative (così non rifacciamo network)
         return null;
     } catch (err) {
         // Failover silente! (timeout o worker rotto). Se Cloudflare fallisce,
         // noi non diamo errore all'utente ma facciamo DDG search fallback locale naturale.
-        lookupCache.set(name, null);
+        registryCache.set(name, null);
         return null;
     }
 }

package/src/sanitize.js CHANGED Viewed

@@ -19,17 +19,66 @@ const DANGEROUS_PATTERNS = [
     /HUMAN:\s/gi,
 ];
+const NOISE_PATTERNS = [
+    /Skip to content/gi,
+    /Navigation Menu/gi,
+    /Toggle navigation/gi,
+    /Appearance settings/gi,
+    /AI CODE CREATION/gi,
+    /GitHub Copilot Write better code with AI/gi,
+    /Sign in/gi,
+    /Sign up/gi,
+    /Notifications/gi,
+    /Fork\s+\d+/gi,
+    /Star\s+[\d.]+[kK]?/gi,
+    /Code/gi,
+    /Issues/gi,
+    /Pull requests/gi,
+    /Actions/gi,
+    /Projects/gi,
+    /Security/gi,
+    /Insights/gi,
+    /Why GitHub/gi,
+    /Solutions/gi,
+    /Resources/gi,
+    /Open Source/gi,
+    /Enterprises/gi,
+    /Startups/gi,
+    /Customer stories|Ebooks & reports|Events & webinars/gi,
+    /GitHub (Sponsors|Skills|Accelerator|Archive Program|Spark|Models)/gi,
+    /Weekly Downloads|Unpacked Size|Total Files|Collaborators/gi,
+    /Analyze with Socket|Check bundle size|View package health|Explore dependencies/gi,
+    /Skip to content|Skip to main content|skip to:\[content\]|package search/gi,
+    /\[Signing in\]\(https:\/\/github\.com\/login\)/gi,
+    /Performing verification|This website uses a service to protect against malicious bots/gi,
+    /Radix Primitives|Visually or semantically separates content/gi,
+    /View docs here|Check bundle size|View package health/gi,
+];
 /**
- * @description Filtra pattern pericolosi di prompt injection dal testo web scrappato.
+ * @description Filtra pattern pericolosi e rumore di navigazione dal testo web scrappato.
  * @param   {string} text - Testo raw proveniente da web scraping
  * @param   {number} maxLen - Lunghezza massima output (default 8000)
  * @returns {string} Testo sanitizzato
  */
 export function sanitizeWebContent(text, maxLen = 8000) {
     if (!text || typeof text !== 'string') return '';
     let cleaned = text;
+    // 1. Rimuoviamo il rumore di navigazione
+    for (const pattern of NOISE_PATTERNS) {
+        cleaned = cleaned.replace(pattern, '');
+    }
+    // 2. Rimuoviamo pattern pericolosi
     for (const p of DANGEROUS_PATTERNS) {
         cleaned = cleaned.replace(p, '[FILTERED]');
     }
+    // 3. Normalizzazione spazi bianchi per risparmiare token
+    cleaned = cleaned.replace(/\s+/g, ' ').trim();
     return cleaned.slice(0, maxLen);
 }

package/src/search.js CHANGED Viewed

@@ -2,13 +2,11 @@
  * @module search
  * @description Logica di scraping web: Jina Reader → fallback Readability, registry bypass, DDG search.
  */
-import fetch from 'node-fetch';
 import * as cheerio from 'cheerio';
 import { Readability } from '@mozilla/readability';
 import { DOMParser } from 'linkedom';
 import { searchCache } from './cache.js';
 import { CircuitBreaker } from './circuit-breaker.js';
-import { httpAgent, httpsAgent } from './http-agent.js';
 import { sanitizeWebContent } from './sanitize.js';
 import { lookupRegistryUrl } from './registry.js';
@@ -48,7 +46,7 @@ export async function fetchPageContent(url, userAgent, opts = {}) {
             const text = await jinaRes.text();
             if (text && text.length > 200) {
                 if (verbose) console.log(`    [jina] ✓ ${url} → ${text.length} chars`);
-                return sanitizeWebContent(text.replace(/\s+/g, ' '), maxLen);
+                return sanitizeWebContent(text, maxLen);
             }
         }
     } catch (_) {
@@ -59,8 +57,7 @@ export async function fetchPageContent(url, userAgent, opts = {}) {
     try {
         const pageRes = await fetch(url, {
             signal: AbortSignal.timeout(5000),
-            headers: { 'User-Agent': userAgent },
-            agent: url.startsWith('https:') ? httpsAgent : httpAgent
+            headers: { 'User-Agent': userAgent }
         });
         if (pageRes.ok) {
             const document = new DOMParser().parseFromString(await pageRes.text(), 'text/html');
@@ -71,10 +68,7 @@ export async function fetchPageContent(url, userAgent, opts = {}) {
             } catch (_) {
                 text = document.body?.textContent || '';
             }
-            if (text) {
-                if (verbose) console.log(`    [readability] ✓ ${url} → ${text.length} chars`);
-                return sanitizeWebContent(text.replace(/\s+/g, ' '), maxLen);
-            }
+            return sanitizeWebContent(text, maxLen);
         }
     } catch (_) { }
@@ -142,7 +136,7 @@ async function doSearch(query, resultsLimit = 3) {
     const userAgent = getRandomUA();
     const searchRes = await fetch(
         `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`,
-        { signal: AbortSignal.timeout(5000), headers: { 'User-Agent': userAgent }, agent: httpsAgent }
+        { signal: AbortSignal.timeout(5000), headers: { 'User-Agent': userAgent } }
     );
     if (!searchRes.ok) throw new Error(`DDG ${searchRes.status}`);

package/src/state.js CHANGED Viewed

@@ -13,26 +13,39 @@ const STATE_FILE = path.join(STATE_DIR, 'watcher-state.json');
 /**
  * @description Carica gli hash validati e memorizzati dallo schedule storage locale.
- * @returns {Promise<Map>} Restituisce le hash map entries persistite del cron logic stream precedente.
+ * @param {string} currentVersion - Versione attuale dell'applicazione per validare la cache.
+ * @returns {Promise<Map>} Restituisce le hash map entries persistite o una mappa vuota se la versione differisce.
  */
-export async function loadBatchState() {
+export async function loadBatchState(currentVersion) {
     try {
         if (!existsSync(STATE_FILE)) return new Map();
         const data = await readFile(STATE_FILE, 'utf8');
-        const parsed = JSON.parse(data);
-        return new Map(Object.entries(parsed));
+        const state = JSON.parse(data);
+        // Invalida la cache se la versione è differente (forza refresh dopo update)
+        if (state.version !== currentVersion) {
+            return new Map();
+        }
+        return new Map(Object.entries(state.hashes || {}));
     } catch {
         return new Map();
     }
 }
 /**
- * @description Sincronizza hash batches per fault tolerance cross process
- * @param {Map} map - Oggetto dei blocchi hashati validi in mem persist state map
+ * @description Sincronizza hash batches e versione per fault tolerance cross process.
+ * @param {Map} map - Oggetto dei blocchi hashati validi.
+ * @param {string} version - Versione attuale dell'applicazione.
  * @returns {Promise<void>}
  */
-export async function saveBatchState(map) {
+export async function saveBatchState(map, version) {
     await mkdir(STATE_DIR, { recursive: true });
-    const obj = Object.fromEntries(map);
-    await atomicWrite(STATE_FILE, JSON.stringify(obj, null, 2), { backup: false });
+    const state = {
+        version: version,
+        updatedAt: new Date().toISOString(),
+        hashes: Object.fromEntries(map)
+    };
+    await atomicWrite(STATE_FILE, JSON.stringify(state, null, 2), { backup: false });
 }

package/src/utils/atomic-write.js CHANGED Viewed

@@ -49,6 +49,7 @@ export async function atomicWrite(filePath, content, options = {}) {
             }
         }
+        await unlink(tempFile).catch(() => { });
         return { success: true, backupPath: backup ? backupPath : null };
     } catch (err) {
         // Cleanup temp in caso di errore catch

package/src/watcher.js CHANGED Viewed

@@ -11,7 +11,6 @@ import { updateGeminiFiles, removeStaleBlocks } from './inject.js';
 import { chalk, label, log, LOG_WARN, LOG_REFRESH } from './logger.js';
 import { version, maxTokens, quality, qualitySettings, verbose, customSources } from './cli.js';
 import { loadBatchState, saveBatchState } from './state.js';
-import { httpsAgent } from './http-agent.js';
 // ─── Scheduler Watcher Instance ──────────────────────
@@ -52,7 +51,7 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
     async function updateSkill() {
         if (previousBatchHashes.size === 0) {
-            previousBatchHashes = await loadBatchState();
+            previousBatchHashes = await loadBatchState(version);
         }
         const deps = await readPackageDeps();
         if (!deps || deps.length === 0) {
@@ -70,7 +69,7 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
         for (const batch of batches) {
             const promise = (async () => {
-                const blockId = batchHash(batch);
+                const blockId = batchHash(batch.map(d => d.split(' ')[0]));
                 activeBlockIds.add(blockId);
                 const currentHash = batchHash(batch);
@@ -160,13 +159,16 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
         // ── Custom sources from .groundtruth.json ──
         if (customSources.length > 0) {
-            for (const src of customSources) {
+            const CUSTOM_SOURCE_TTL_MS = 60 * 60 * 1000;
+            const customWork = customSources.map(async (src) => {
                 const blockId = 'src_' + Buffer.from(src.url).toString('base64url').slice(0, 8);
+                const tsKey = 'src_ts_' + blockId;
                 activeBlockIds.add(blockId);
-                if (previousBatchHashes.has(blockId)) {
+                const lastFetchTime = previousBatchHashes.get(tsKey) || 0;
+                if ((Date.now() - lastFetchTime) < CUSTOM_SOURCE_TTL_MS) {
                     skippedCount++;
-                    continue;
+                    return;
                 }
                 try {
@@ -180,37 +182,27 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
                             globalContent: `## ${srcLabel}\n${sanitizeWebContent(text, 500)}\n`,
                             workspaceContent: md
                         }]);
-                        previousBatchHashes.set(blockId, blockId);
+                        previousBatchHashes.set(tsKey, Date.now());
                         updatedCount++;
                         log(LOG_REFRESH, chalk.cyan, `custom source updated → ${srcLabel}`);
                     }
                 } catch (_) {
                     failedCount++;
                 }
-            }
+            });
+            await Promise.all(customWork);
         }
         await removeStaleBlocks(globalPath, activeBlockIds);
         await removeStaleBlocks(workspacePath, activeBlockIds);
-        await saveBatchState(previousBatchHashes);
+        await saveBatchState(previousBatchHashes, version);
         log(LOG_REFRESH, chalk.gray, `cycle done → ${activeBlockIds.size} blocks active, ${updatedCount} updated, ${skippedCount} skipped, ${failedCount} errors`);
     }
-    let cycleCount = 0;
-    process.on('SIGINT', async () => {
-        await saveBatchState(previousBatchHashes);
-        process.exit(0);
-    });
-    updateSkill();
+    updateSkill().catch(err => log(LOG_WARN, chalk.yellow, 'updateSkill error: ' + err.message));
     setInterval(() => {
-        cycleCount++;
-        if (cycleCount % 10 === 0) {
-            httpsAgent.destroy();
-        }
-        updateSkill();
+        updateSkill().catch(err => log(LOG_WARN, chalk.yellow, 'updateSkill error: ' + err.message));
     }, intervalMinutes * 60 * 1000);
 }

package/specification.yaml DELETED Viewed

@@ -1,143 +0,0 @@
-name: GroundTruth
-version: 0.1.4
-description: |
-  GroundTruth is a zero-configuration, transparent middleware context injection layer.
-  It is designed to bridge the deterministic knowledge cutoff gap of LLM-based coding agents
-  (such as Claude Code or Antigravity) by dynamically fetching and injecting live,
-  dependency-specific documentation right before inference or via out-of-band rule files.
-architecture:
-  modes:
-    - name: Proxy Intercept Mode
-      alias: claude-code
-      description: |
-        Operates as a local HTTP reverse-proxy interceptor that captures outgoing API payloads
-        targeting Anthropic or Google Gemini endpoints, mutating them in transit.
-      target_endpoints:
-        - "https://api.anthropic.com/v1/messages"
-        - "https://generativelanguage.googleapis.com/v1beta/models/*"
-      flow:
-        1: "Listen on localhost port (default: 8080) and capture POST requests."
-        2: "Extract the last user message from the JSON body (supports `messages` array for Anthropic and `contents` array for Gemini)."
-        3: "Determine search query: use `--use-package-json` AST parsing, or fallback to the user message text."
-        4: "Scrape DuckDuckGo concurrently to retrieve live context (title, snippet, Readability-parsed text up to 4000 chars)."
-        5: "Mutate the `system` instruction prompt in the JSON payload by appending the live context block."
-        6: "Forward the modified request to the actual LLM provider, streaming the response back to the client."
-      components:
-        - src/proxy.js
-    - name: File Watcher Mode
-      alias: antigravity
-      description: |
-        Runs as a persistent background daemon. It polls the local project's dependencies,
-        fetches up-to-date documentation, and generates synchronized knowledge base dotfiles
-        (`GEMINI.md`) that the agent natively reads on invocation.
-      flow:
-        1: "Parses `package.json` dependencies and filters out build/tooling packages (e.g., eslint, vite, adapter)."
-        2: "Groups the filtered dependencies into chunks (batching) of configurable size (default: 3, max: 5) using `groupIntoBatches`."
-        3: "Hashes each dependency chunk (`batchHash` md5 sliced to 8 chars) to uniquely identify rule blocks and track state."
-        4: "Checks previous state to avoid redundant network fetches if the batch hasn't changed (`previousDepsKey` mapping)."
-        5: "Fetches live DuckDuckGo results per batch asynchronously, filtering out low-quality pages (403, captcha, < 200 chars)."
-        6: "Injects distinct dependency rule blocks bounded by `<!-- groundtruth:block-{hash}:start/end -->` directly inside `~/.gemini/GEMINI.md` (global) and `./.gemini/GEMINI.md` (workspace)."
-        7: "Garbage-collects stale blocks (`removeStaleBlocks`) belonging to evicted or resolved dependencies by regex matching active block IDs."
-      components:
-        - src/watcher.js
-        - src/inject.js
-core_modules:
-  - name: cli.js
-    responsibilities:
-      - "Process `process.argv` argument parsing."
-      - "Validation and defaulting of arguments (`--port`, `--interval`, `--batch-size`, `--claude-code`, `--antigravity`)."
-      - "Help/Docs Screen rendering and early exit conditions with aesthetic formatting inspired by Claude Code."
-  - name: search.js
-    responsibilities:
-      - "DuckDuckGo HTML scraping using `cheerio`."
-      - "URL resolution from DuckDuckGo's `uddg` tracking links."
-      - "User-Agent rotation to mitigate scraping blocks."
-      - "Integration with `CircuitBreaker` pattern for rate-limit protection."
-      - "Integration with bounded custom O(1) `LRUCache` from `cache.js`."
-      - "Page content extraction using `linkedom` and Mozilla's `Readability`."
-      - "Integration with persistent connection pooling components from `http-agent.js`."
-  - name: packages.js
-    responsibilities:
-      - "Read local Node modules context (`package.json`)."
-      - "Clean semantic versions (e.g., `^1.2.3` -> `1.2`)."
-      - "Filter out non-informative tooling (`vite`, `prettier`, `eslint`, `plugin`, `adapter`, `check`)."
-      - "Group dependencies into manageable batches (default: 3) prioritizing core dependencies over devDependencies."
-      - "Generate deterministic MD5 identifiers per batch for block management (`batchHash`)."
-      - "Construct search queries based on dependency batches plus temporal identifiers (`latest 2026`)."
-  - name: logger.js
-    responsibilities:
-      - "Chalk-driven aesthetic terminal formatting."
-      - "Centralized status symbolizing constants (✓, ⚠, ⚡, ↻, ◆, ✻)."
-      - "Timestamp generation mapped to `it-IT` locale."
-  - name: env.js
-    responsibilities:
-      - "Shell configuration auto-instrumentation (`.zshrc`, `.bashrc`, `.bash_profile`, `config.fish`)."
-      - "Exporting `ANTHROPIC_BASE_URL` to route CLI tools (like Claude Code) through the proxy."
-      - "Cross-Platform Environment Override (Bypassing Windows systems safely)."
-  - name: inject.js
-    responsibilities:
-      - "File I/O operations for `GEMINI.md` in both `$HOME` and `$CWD`."
-      - "Regex-based block injection using exact start/end bounds matching."
-      - "Stale block eviction via `removeStaleBlocks`."
-      - "Uses `atomicWrite` for zero-corruption file replacements."
-  - name: cache.js
-    responsibilities:
-      - "Implements zero-dependency O(1) bounded LRU caching logic."
-      - "Provides getter/setter mechanisms tied to temporal eviction limits."
-  - name: circuit-breaker.js
-    responsibilities:
-      - "Manages DuckDuckGo fetch attempts via threshold-based error state wrapping (OPEN/HALF_OPEN/CLOSED)."
-  - name: state.js
-    responsibilities:
-      - "Persistent recovery system for dependency batch hashes mapping across system crash/restarts."
-      - "Reads and writes `.gemini/watcher-state.json`."
-  - name: http-agent.js
-    responsibilities:
-      - "Creates reusable Keep-Alive HTTP and HTTPS configuration agents to mitigate handshake overheads."
-  - name: utils/atomic-write.js
-    responsibilities:
-      - "Creates temporary file blocks inside the target's directory (to prevent EXDEV cross-device link errors) performing `fs.rename` (POSIX) or safe-copies (Windows)."
-dependencies:
-  runtime: "Node.js >= 18.0.0 (uses ES Modules)"
-  built_ins:
-    - fs
-    - path
-    - os
-    - http
-    - https
-    - crypto
-  third_party:
-    - chalk: "^5.3.0" # Terminal styling
-    - cheerio: "^1.0.0" # Fast HTML parsing for DDG results
-    - linkedom: "^0.18.5" # Lightweight DOM emulation for Mozilla Readability
-    - node-fetch: "^3.3.2" # WHATWG Fetch API polyfill for Node
-    - "@mozilla/readability": "^0.5.0" # Main content extraction
-mechanics:
-  caching_and_eviction:
-    - "Search level caching: Runtime searches are cached for 5 minutes (`CACHE_TTL`), matching identical queries to avoid redundant network transit."
-    - "Watcher level caching: the daemon uses a Map tracking `blockId` -> `JSON.stringify(batch)`. If the hash signature matches across cycles, the network layer is skipped."
-  quality_assurance:
-    - "Content verification: Extracted text is sanitized and evaluated. If a page returns < 200 characters, or contains indicators of bot protection (e.g., '403', 'captcha', 'access denied'), the result is flagged."
-    - "Fallback mechanism: If a result is flagged as low-quality, the watcher rolls back and retains the successfully injected markdown block from the previous cycle."
-  network_resilience:
-    - "Timeouts: All outbound `node-fetch` requests strictly adhere to a 5-second `AbortSignal.timeout(5000)`."
-    - "Retries & Bans: `search.js` relies on a `CircuitBreaker` class mitigating recursive DuckDuckGo IP bans."
-    - "Resource Connection: Avoids TCP handshakes through persistent keep-alive Agent dispatching."
-  shell_integration:
-    - "Darwin/Linux-first: Windows OS (`win32`) skips autoconfig cleanly."
-    - "Fish Shell paths uniquely utilize `set -gx` constructs unlike standard Bash/Zsh `export` syntax, appending recursively or mutating existing assignments."