@antodevs/groundtruth 0.2.5 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -3,6 +3,7 @@
3
3
  * @module index
4
4
  * @description Entry point runtime groundtruth delegazione CLI o proxy flow logic.
5
5
  */
6
+ import './src/http-agent.js';
6
7
  import { chalk, label } from './src/logger.js';
7
8
  import { usePackageJson, antigravityMode, claudeCodeMode, uninstallMode, port, intervalMinutes, batchSize, version } from './src/cli.js';
8
9
  import { createServer } from './src/proxy.js';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@antodevs/groundtruth",
3
- "version": "0.2.5",
3
+ "version": "0.3.1",
4
4
  "description": "Lightweight Node.js proxy to intercept API requests from coding agents and inject fresh web context",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -30,7 +30,6 @@
30
30
  "index.js",
31
31
  "src/",
32
32
  "assets/",
33
- "specification.yaml",
34
33
  "README.md",
35
34
  "LICENSE"
36
35
  ],
@@ -44,10 +43,9 @@
44
43
  "release:minor": "git add . && git commit -m \"chore: auto-commit before release\" || true && npm version minor && git push origin main --tags && npm publish"
45
44
  },
46
45
  "dependencies": {
47
- "@mozilla/readability": "^0.5.0",
46
+ "@mozilla/readability": "^0.6.0",
48
47
  "chalk": "^5.3.0",
49
- "cheerio": "^1.0.0",
50
- "linkedom": "^0.18.12",
51
- "node-fetch": "^3.3.2"
48
+ "cheerio": "^1.2.0",
49
+ "linkedom": "^0.18.12"
52
50
  }
53
51
  }
package/src/cache.js CHANGED
@@ -105,3 +105,5 @@ class LRUCache {
105
105
 
106
106
  // Esporta singola cache istanza module globale (singleton per memory context node)
107
107
  export const searchCache = new LRUCache({ max: 500, ttl: 5 * 60 * 1000 });
108
+
109
+ export { LRUCache };
@@ -13,6 +13,7 @@ export class CircuitBreaker {
13
13
  this.state = 'CLOSED'; // CLOSED, OPEN, HALF_OPEN
14
14
  this.failures = 0;
15
15
  this.lastFailureTime = null;
16
+ this.halfOpenSuccesses = 0;
16
17
  }
17
18
 
18
19
  /**
@@ -57,6 +58,7 @@ export class CircuitBreaker {
57
58
  // 429 rate limit apre il circuito immediatamente
58
59
  if (err?.message?.includes('429')) {
59
60
  this.failures = this.failureThreshold;
61
+ this.state = 'OPEN';
60
62
  } else {
61
63
  this.failures++;
62
64
  }
package/src/config.js CHANGED
@@ -45,7 +45,7 @@ export async function loadConfig() {
45
45
  const parsed = JSON.parse(raw);
46
46
 
47
47
  return {
48
- maxTokens: clamp(parsed.maxTokens ?? DEFAULTS.maxTokens, 500, 8000),
48
+ maxTokens: clamp(parsed.maxTokens || DEFAULTS.maxTokens, 500, 8000),
49
49
  quality: ['low', 'medium', 'high'].includes(parsed.quality) ? parsed.quality : DEFAULTS.quality,
50
50
  verbose: typeof parsed.verbose === 'boolean' ? parsed.verbose : DEFAULTS.verbose,
51
51
  sources: Array.isArray(parsed.sources) ? parsed.sources.filter(s => s && s.url) : DEFAULTS.sources,
package/src/env.js CHANGED
@@ -23,13 +23,12 @@ export async function autoSetEnv(p) {
23
23
  if (process.env.ANTHROPIC_BASE_URL === targetUrl) return;
24
24
 
25
25
  const homeDir = os.homedir();
26
- // Test exist pattern specifico shell config di fish locale
27
- const isFish = process.env.SHELL?.includes('fish') || existsSync(`${homeDir}/.config/fish/config.fish`);
26
+ const fishConfigFile = path.join(homeDir, '.config', 'fish', 'config.fish');
27
+ const isFish = process.env.SHELL?.includes('fish') || existsSync(fishConfigFile);
28
28
  let foundAny = false;
29
29
  const modifiedFiles = [];
30
30
 
31
31
  if (isFish) {
32
- const fishConfigFile = path.join(homeDir, '.config', 'fish', 'config.fish');
33
32
  await fs.mkdir(path.dirname(fishConfigFile), { recursive: true });
34
33
  foundAny = true;
35
34
  try {
package/src/http-agent.js CHANGED
@@ -1,21 +1,20 @@
1
- /**
2
- * @module http-agent
3
- * @description Pool manager per connessioni API http e requests HTTPS in proxy context.
4
- */
1
+ import { Agent, setGlobalDispatcher } from 'undici';
5
2
  import { Agent as HttpsAgent } from 'https';
6
3
  import { Agent as HttpAgent } from 'http';
7
4
 
8
- // Evita timeout TCP handshakes costanti per network node-fetch requests proxy target
9
- export const httpsAgent = new HttpsAgent({
10
- keepAlive: true,
11
- maxSockets: 10,
12
- maxFreeSockets: 5,
13
- timeout: 5000,
5
+ const globalAgent = new Agent({
6
+ keepAliveTimeout: 20 * 1000,
7
+ keepAliveMaxTimeout: 60 * 1000,
8
+ connections: 10,
9
+ pipelining: 1,
14
10
  });
11
+ setGlobalDispatcher(globalAgent);
12
+
13
+ export { globalAgent };
15
14
 
15
+ export const httpsAgent = new HttpsAgent({
16
+ keepAlive: true, maxSockets: 10, maxFreeSockets: 5, timeout: 5000,
17
+ });
16
18
  export const httpAgent = new HttpAgent({
17
- keepAlive: true,
18
- maxSockets: 10,
19
- maxFreeSockets: 5,
20
- timeout: 5000,
19
+ keepAlive: true, maxSockets: 10, maxFreeSockets: 5, timeout: 5000,
21
20
  });
package/src/inject.js CHANGED
@@ -9,6 +9,14 @@ import os from 'os';
9
9
  import { chalk, log, LOG_WARN, LOG_REFRESH } from './logger.js';
10
10
  import { atomicWrite } from './utils/atomic-write.js';
11
11
 
12
+ const fileLocks = new Map();
13
+ async function withFileLock(filePath, fn) {
14
+ const previous = fileLocks.get(filePath) || Promise.resolve();
15
+ const next = previous.then(fn, fn);
16
+ fileLocks.set(filePath, next.catch(() => { }));
17
+ return next;
18
+ }
19
+
12
20
  // ─── Document injection rules ────────────────────────
13
21
 
14
22
  /**
@@ -19,24 +27,30 @@ import { atomicWrite } from './utils/atomic-write.js';
19
27
  * @returns {Promise<void>}
20
28
  */
21
29
  export async function injectBlock(filePath, content, blockId) {
22
- let fileContent = '';
23
- if (existsSync(filePath)) {
24
- fileContent = await fs.readFile(filePath, 'utf8');
25
- }
26
- const startTag = `<!-- groundtruth:block-${blockId}:start -->`;
27
- const endTag = `<!-- groundtruth:block-${blockId}:end -->`;
28
- const block = `${startTag}\n${content.trim()}\n${endTag}`;
30
+ return withFileLock(filePath, async () => {
31
+ let fileContent = '';
32
+ if (existsSync(filePath)) {
33
+ fileContent = await fs.readFile(filePath, 'utf8');
34
+ }
35
+ const startTag = `<!-- groundtruth:block-${blockId}:start -->`;
36
+ const endTag = `<!-- groundtruth:block-${blockId}:end -->`;
37
+ const block = `${startTag}\n${content.trim()}\n${endTag}`;
29
38
 
30
- const startIndex = fileContent.indexOf(startTag);
31
- const endIndex = fileContent.indexOf(endTag);
39
+ const startIndex = fileContent.indexOf(startTag);
40
+ const endIndex = fileContent.indexOf(endTag);
32
41
 
33
- if (startIndex !== -1 && endIndex !== -1 && endIndex > startIndex) {
34
- fileContent = fileContent.slice(0, startIndex) + block + fileContent.slice(endIndex + endTag.length);
35
- } else {
36
- fileContent = fileContent.trimEnd() + '\n\n' + block + '\n';
37
- }
42
+ if (startIndex !== -1 && endIndex !== -1 && endIndex > startIndex) {
43
+ // Sostituisce il blocco esistente mantenendo il resto del file intatto
44
+ const before = fileContent.slice(0, startIndex);
45
+ const after = fileContent.slice(endIndex + endTag.length);
46
+ fileContent = before + block + after;
47
+ } else {
48
+ // Aggiunge in fondo se non esiste
49
+ fileContent = fileContent.trimEnd() + '\n\n' + block + '\n';
50
+ }
38
51
 
39
- await atomicWrite(filePath, fileContent);
52
+ await atomicWrite(filePath, fileContent);
53
+ });
40
54
  }
41
55
 
42
56
  /**
@@ -48,7 +62,7 @@ export async function injectBlock(filePath, content, blockId) {
48
62
  export async function removeStaleBlocks(filePath, activeBlockIds) {
49
63
  if (!existsSync(filePath)) return;
50
64
  let fileContent = await fs.readFile(filePath, 'utf8');
51
- const regex = /<!-- groundtruth:block-(\w+):start -->[\s\S]*?<!-- groundtruth:block-\w+:end -->/g;
65
+ const regex = /<!-- groundtruth:block-(\w+):start -->[\s\S]*?<!-- groundtruth:block-\1:end -->/g;
52
66
 
53
67
  let modified = false;
54
68
  fileContent = fileContent.replace(regex, (match, blockId) => {
package/src/packages.js CHANGED
@@ -23,24 +23,36 @@ export async function readPackageDeps() {
23
23
  }
24
24
  const pkg = JSON.parse(await fs.readFile(pkgPath, 'utf8'));
25
25
 
26
- const excludeList = ["plugin", "adapter", "check", "eslint", "prettier", "vite", "rollup", "webpack", "babel"];
26
+ const EXACT_EXCLUDE = new Set(['eslint', 'prettier', 'vite', 'rollup', 'webpack', 'babel', 'turbo', 'esbuild']);
27
+ const SUBSTR_EXCLUDE = ['plugin', 'adapter', '-check', 'lint-staged'];
27
28
 
28
29
  const filterAndFormat = (depsObj) => {
29
30
  if (!depsObj) return [];
30
31
  return Object.entries(depsObj)
31
- .filter(([n]) => !excludeList.some(ex => n.toLowerCase().includes(ex)))
32
+ .filter(([n]) => {
33
+ const lower = n.toLowerCase();
34
+ const base = lower.startsWith('@') ? lower.split('/')[1] : lower;
35
+ if (EXACT_EXCLUDE.has(base)) return false;
36
+ if (SUBSTR_EXCLUDE.some(ex => lower.includes(ex))) return false;
37
+ return true;
38
+ })
32
39
  .map(([n, v]) => {
33
40
  let cleanName = n;
34
41
  if (n === '@sveltejs/kit') cleanName = 'sveltekit';
35
- else if (n.startsWith('@')) cleanName = n.split('/')[1];
36
42
  let cleanVersion = String(v).replace(/[\^~>=<]/g, '').split('.').slice(0, 2).join('.');
37
43
  return `${cleanName} ${cleanVersion}`;
38
44
  });
39
45
  };
40
46
 
41
- let selected = filterAndFormat(pkg.dependencies);
42
- selected = selected.concat(filterAndFormat(pkg.devDependencies));
47
+ const depMap = new Map();
48
+ for (const [n, v] of Object.entries(pkg.dependencies || {})) {
49
+ depMap.set(n, v);
50
+ }
51
+ for (const [n, v] of Object.entries(pkg.devDependencies || {})) {
52
+ if (!depMap.has(n)) depMap.set(n, v);
53
+ }
43
54
 
55
+ const selected = filterAndFormat(Object.fromEntries(depMap));
44
56
  return selected.length > 0 ? selected : null;
45
57
  } catch (err) {
46
58
  log(LOG_WARN, chalk.yellow, chalk.white('package.json parse error') + ` → ${chalk.yellow(err.message)}`);
package/src/proxy.js CHANGED
@@ -9,6 +9,8 @@ import { readPackageDeps, buildQuery } from './packages.js';
9
9
  import { chalk, log, LOG_WARN, LOG_BOLT } from './logger.js';
10
10
  import { httpsAgent } from './http-agent.js';
11
11
  import { sanitizeWebContent } from './sanitize.js';
12
+ import { watch } from 'fs';
13
+ import path from 'path';
12
14
  import { maxTokens, qualitySettings, verbose } from './cli.js';
13
15
 
14
16
  // ─── HTTP Node server daemon ─────────────────────────
@@ -20,12 +22,32 @@ import { maxTokens, qualitySettings, verbose } from './cli.js';
20
22
  */
21
23
  export async function createServer(usePackageJson) {
22
24
  let packageQueryCache = null;
25
+ let cacheStale = true;
26
+
23
27
  if (usePackageJson) {
24
28
  const depEntries = await readPackageDeps();
25
- if (depEntries) packageQueryCache = buildQuery(depEntries);
29
+ if (depEntries) {
30
+ packageQueryCache = buildQuery(depEntries);
31
+ cacheStale = false;
32
+ }
33
+
34
+ const pkgPath = path.resolve(process.cwd(), 'package.json');
35
+ try {
36
+ watch(pkgPath, { persistent: false }, () => {
37
+ cacheStale = true;
38
+ log(LOG_REFRESH, chalk.cyan, chalk.white('package.json changed — cache invalidated'));
39
+ });
40
+ } catch (_) { }
26
41
  }
27
42
 
28
43
  const server = http.createServer(async (req, res) => {
44
+ if (usePackageJson && cacheStale) {
45
+ const depEntries = await readPackageDeps();
46
+ if (depEntries) {
47
+ packageQueryCache = buildQuery(depEntries);
48
+ cacheStale = false;
49
+ }
50
+ }
29
51
  if (req.method !== 'POST') { res.writeHead(404); res.end(); return; }
30
52
 
31
53
  let protocol = null;
@@ -95,7 +117,7 @@ export async function createServer(usePackageJson) {
95
117
  try {
96
118
  if (!query || query.trim() === String(new Date().getFullYear())) throw new Error('Empty query');
97
119
  // parallel load in proxy app process to boost response load
98
- const { results, pageText } = await webSearch(query, true, {
120
+ const { results, pageText } = await webSearch(query, false, {
99
121
  ddgResults: qualitySettings.ddgResults,
100
122
  maxLen: qualitySettings.charsPerPage,
101
123
  jinaTimeout: qualitySettings.jinaTimeout,
@@ -155,7 +177,13 @@ export async function createServer(usePackageJson) {
155
177
  headers['content-length'] = Buffer.byteLength(reqBodyStr);
156
178
 
157
179
  const proxyReq = https.request(targetUrl, { method: req.method, headers, agent: httpsAgent }, (proxyRes) => {
158
- res.writeHead(proxyRes.statusCode, proxyRes.headers);
180
+ const responseHeaders = { ...proxyRes.headers };
181
+ delete responseHeaders['content-security-policy'];
182
+ delete responseHeaders['x-content-type-options'];
183
+ delete responseHeaders['content-encoding'];
184
+ delete responseHeaders['content-length'];
185
+
186
+ res.writeHead(proxyRes.statusCode, responseHeaders);
159
187
  proxyRes.pipe(res);
160
188
  });
161
189
  proxyReq.on('error', () => { if (!res.headersSent) { res.writeHead(502); res.end('Bad Gateway'); } });
package/src/registry.js CHANGED
@@ -2,12 +2,13 @@
2
2
  * @module registry
3
3
  * @description Interroga il Cloudflare Worker (Remote Registry) per risolvere URL docs ufficiali.
4
4
  */
5
- import fetch from 'node-fetch';
5
+
6
+ import { LRUCache } from './cache.js';
6
7
 
7
8
  const REGISTRY_API_URL = 'https://groundtruth-registry.antony-flex01.workers.dev/lookup';
8
9
 
9
- // Cache in memoria per evitare query multiple allo stesso endpoint durante lo stesso run del watcher
10
- const lookupCache = new Map();
10
+ // Cache in memoria con LRU per evitare query multiple allo stesso endpoint
11
+ const registryCache = new LRUCache({ max: 1000, ttl: 60 * 60 * 1000 });
11
12
 
12
13
  /**
13
14
  * @description Interroga asincronamente l'API cloudflare per cercare URL docs nel registry remoto
@@ -18,38 +19,42 @@ export async function lookupRegistryUrl(depName) {
18
19
  if (!depName) return null;
19
20
 
20
21
  // Normalizzazione preventiva
21
- const name = depName.split(' ')[0].toLowerCase().trim();
22
+ let name = depName.split(' ')[0].toLowerCase().trim();
23
+
24
+ // Alias mapping per framework comuni con scope npm
25
+ if (name === '@sveltejs/kit') name = 'sveltekit';
26
+
22
27
 
23
28
  // Check hit in memoria (ritorna subito)
24
- if (lookupCache.has(name)) {
25
- return lookupCache.get(name);
26
- }
29
+ const cached = registryCache.get(name);
30
+ if (cached !== undefined) return cached;
27
31
 
28
32
  try {
29
33
  // Fetch asincrono con timeout stretto per evitare latenze di fallback
30
34
  const res = await fetch(`${REGISTRY_API_URL}?pkg=${encodeURIComponent(name)}`, {
31
- signal: AbortSignal.timeout(1500), // Max 1.5s aspetta il Cloudflare worker
35
+ signal: AbortSignal.timeout(1500),
32
36
  headers: {
33
- 'Accept': 'application/json'
37
+ 'Accept': 'application/json',
38
+ 'X-GroundTruth-Key': process.env.GROUNDTRUTH_REGISTRY_KEY || ''
34
39
  }
35
40
  });
36
41
 
37
42
  if (res.ok) {
38
43
  const data = await res.json();
39
44
  if (data && data.found && data.url) {
40
- lookupCache.set(name, data.url); // Cache hit success
45
+ registryCache.set(name, data.url); // Cache hit success
41
46
  return data.url;
42
47
  }
43
48
  }
44
49
 
45
50
  // Se l'API restituisce 404/not found
46
- lookupCache.set(name, null); // Cache negative (così non rifacciamo network)
51
+ registryCache.set(name, null); // Cache negative (così non rifacciamo network)
47
52
  return null;
48
53
 
49
54
  } catch (err) {
50
55
  // Failover silente! (timeout o worker rotto). Se Cloudflare fallisce,
51
56
  // noi non diamo errore all'utente ma facciamo DDG search fallback locale naturale.
52
- lookupCache.set(name, null);
57
+ registryCache.set(name, null);
53
58
  return null;
54
59
  }
55
60
  }
package/src/sanitize.js CHANGED
@@ -19,17 +19,66 @@ const DANGEROUS_PATTERNS = [
19
19
  /HUMAN:\s/gi,
20
20
  ];
21
21
 
22
+ const NOISE_PATTERNS = [
23
+ /Skip to content/gi,
24
+ /Navigation Menu/gi,
25
+ /Toggle navigation/gi,
26
+ /Appearance settings/gi,
27
+ /AI CODE CREATION/gi,
28
+ /GitHub Copilot Write better code with AI/gi,
29
+ /Sign in/gi,
30
+ /Sign up/gi,
31
+ /Notifications/gi,
32
+ /Fork\s+\d+/gi,
33
+ /Star\s+[\d.]+[kK]?/gi,
34
+ /Code/gi,
35
+ /Issues/gi,
36
+ /Pull requests/gi,
37
+ /Actions/gi,
38
+ /Projects/gi,
39
+ /Security/gi,
40
+ /Insights/gi,
41
+ /Why GitHub/gi,
42
+ /Solutions/gi,
43
+ /Resources/gi,
44
+ /Open Source/gi,
45
+ /Enterprises/gi,
46
+ /Startups/gi,
47
+ /Customer stories|Ebooks & reports|Events & webinars/gi,
48
+ /GitHub (Sponsors|Skills|Accelerator|Archive Program|Spark|Models)/gi,
49
+ /Weekly Downloads|Unpacked Size|Total Files|Collaborators/gi,
50
+ /Analyze with Socket|Check bundle size|View package health|Explore dependencies/gi,
51
+ /Skip to content|Skip to main content|skip to:\[content\]|package search/gi,
52
+ /\[Signing in\]\(https:\/\/github\.com\/login\)/gi,
53
+ /Performing verification|This website uses a service to protect against malicious bots/gi,
54
+ /Radix Primitives|Visually or semantically separates content/gi,
55
+ /View docs here|Check bundle size|View package health/gi,
56
+ ];
57
+
58
+
22
59
  /**
23
- * @description Filtra pattern pericolosi di prompt injection dal testo web scrappato.
60
+ * @description Filtra pattern pericolosi e rumore di navigazione dal testo web scrappato.
24
61
  * @param {string} text - Testo raw proveniente da web scraping
25
62
  * @param {number} maxLen - Lunghezza massima output (default 8000)
26
63
  * @returns {string} Testo sanitizzato
27
64
  */
28
65
  export function sanitizeWebContent(text, maxLen = 8000) {
29
66
  if (!text || typeof text !== 'string') return '';
67
+
30
68
  let cleaned = text;
69
+
70
+ // 1. Rimuoviamo il rumore di navigazione
71
+ for (const pattern of NOISE_PATTERNS) {
72
+ cleaned = cleaned.replace(pattern, '');
73
+ }
74
+
75
+ // 2. Rimuoviamo pattern pericolosi
31
76
  for (const p of DANGEROUS_PATTERNS) {
32
77
  cleaned = cleaned.replace(p, '[FILTERED]');
33
78
  }
79
+
80
+ // 3. Normalizzazione spazi bianchi per risparmiare token
81
+ cleaned = cleaned.replace(/\s+/g, ' ').trim();
82
+
34
83
  return cleaned.slice(0, maxLen);
35
84
  }
package/src/search.js CHANGED
@@ -2,13 +2,11 @@
2
2
  * @module search
3
3
  * @description Logica di scraping web: Jina Reader → fallback Readability, registry bypass, DDG search.
4
4
  */
5
- import fetch from 'node-fetch';
6
5
  import * as cheerio from 'cheerio';
7
6
  import { Readability } from '@mozilla/readability';
8
7
  import { DOMParser } from 'linkedom';
9
8
  import { searchCache } from './cache.js';
10
9
  import { CircuitBreaker } from './circuit-breaker.js';
11
- import { httpAgent, httpsAgent } from './http-agent.js';
12
10
  import { sanitizeWebContent } from './sanitize.js';
13
11
  import { lookupRegistryUrl } from './registry.js';
14
12
 
@@ -48,7 +46,7 @@ export async function fetchPageContent(url, userAgent, opts = {}) {
48
46
  const text = await jinaRes.text();
49
47
  if (text && text.length > 200) {
50
48
  if (verbose) console.log(` [jina] ✓ ${url} → ${text.length} chars`);
51
- return sanitizeWebContent(text.replace(/\s+/g, ' '), maxLen);
49
+ return sanitizeWebContent(text, maxLen);
52
50
  }
53
51
  }
54
52
  } catch (_) {
@@ -59,8 +57,7 @@ export async function fetchPageContent(url, userAgent, opts = {}) {
59
57
  try {
60
58
  const pageRes = await fetch(url, {
61
59
  signal: AbortSignal.timeout(5000),
62
- headers: { 'User-Agent': userAgent },
63
- agent: url.startsWith('https:') ? httpsAgent : httpAgent
60
+ headers: { 'User-Agent': userAgent }
64
61
  });
65
62
  if (pageRes.ok) {
66
63
  const document = new DOMParser().parseFromString(await pageRes.text(), 'text/html');
@@ -71,10 +68,7 @@ export async function fetchPageContent(url, userAgent, opts = {}) {
71
68
  } catch (_) {
72
69
  text = document.body?.textContent || '';
73
70
  }
74
- if (text) {
75
- if (verbose) console.log(` [readability] ✓ ${url} → ${text.length} chars`);
76
- return sanitizeWebContent(text.replace(/\s+/g, ' '), maxLen);
77
- }
71
+ return sanitizeWebContent(text, maxLen);
78
72
  }
79
73
  } catch (_) { }
80
74
 
@@ -142,7 +136,7 @@ async function doSearch(query, resultsLimit = 3) {
142
136
  const userAgent = getRandomUA();
143
137
  const searchRes = await fetch(
144
138
  `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`,
145
- { signal: AbortSignal.timeout(5000), headers: { 'User-Agent': userAgent }, agent: httpsAgent }
139
+ { signal: AbortSignal.timeout(5000), headers: { 'User-Agent': userAgent } }
146
140
  );
147
141
  if (!searchRes.ok) throw new Error(`DDG ${searchRes.status}`);
148
142
 
package/src/state.js CHANGED
@@ -13,26 +13,39 @@ const STATE_FILE = path.join(STATE_DIR, 'watcher-state.json');
13
13
 
14
14
  /**
15
15
  * @description Carica gli hash validati e memorizzati dallo schedule storage locale.
16
- * @returns {Promise<Map>} Restituisce le hash map entries persistite del cron logic stream precedente.
16
+ * @param {string} currentVersion - Versione attuale dell'applicazione per validare la cache.
17
+ * @returns {Promise<Map>} Restituisce le hash map entries persistite o una mappa vuota se la versione differisce.
17
18
  */
18
- export async function loadBatchState() {
19
+ export async function loadBatchState(currentVersion) {
19
20
  try {
20
21
  if (!existsSync(STATE_FILE)) return new Map();
21
22
  const data = await readFile(STATE_FILE, 'utf8');
22
- const parsed = JSON.parse(data);
23
- return new Map(Object.entries(parsed));
23
+ const state = JSON.parse(data);
24
+
25
+ // Invalida la cache se la versione è differente (forza refresh dopo update)
26
+ if (state.version !== currentVersion) {
27
+ return new Map();
28
+ }
29
+
30
+ return new Map(Object.entries(state.hashes || {}));
24
31
  } catch {
25
32
  return new Map();
26
33
  }
27
34
  }
28
35
 
29
36
  /**
30
- * @description Sincronizza hash batches per fault tolerance cross process
31
- * @param {Map} map - Oggetto dei blocchi hashati validi in mem persist state map
37
+ * @description Sincronizza hash batches e versione per fault tolerance cross process.
38
+ * @param {Map} map - Oggetto dei blocchi hashati validi.
39
+ * @param {string} version - Versione attuale dell'applicazione.
32
40
  * @returns {Promise<void>}
33
41
  */
34
- export async function saveBatchState(map) {
42
+ export async function saveBatchState(map, version) {
35
43
  await mkdir(STATE_DIR, { recursive: true });
36
- const obj = Object.fromEntries(map);
37
- await atomicWrite(STATE_FILE, JSON.stringify(obj, null, 2), { backup: false });
44
+ const state = {
45
+ version: version,
46
+ updatedAt: new Date().toISOString(),
47
+ hashes: Object.fromEntries(map)
48
+ };
49
+ await atomicWrite(STATE_FILE, JSON.stringify(state, null, 2), { backup: false });
38
50
  }
51
+
@@ -49,6 +49,7 @@ export async function atomicWrite(filePath, content, options = {}) {
49
49
  }
50
50
  }
51
51
 
52
+ await unlink(tempFile).catch(() => { });
52
53
  return { success: true, backupPath: backup ? backupPath : null };
53
54
  } catch (err) {
54
55
  // Cleanup temp in caso di errore catch
package/src/watcher.js CHANGED
@@ -11,7 +11,6 @@ import { updateGeminiFiles, removeStaleBlocks } from './inject.js';
11
11
  import { chalk, label, log, LOG_WARN, LOG_REFRESH } from './logger.js';
12
12
  import { version, maxTokens, quality, qualitySettings, verbose, customSources } from './cli.js';
13
13
  import { loadBatchState, saveBatchState } from './state.js';
14
- import { httpsAgent } from './http-agent.js';
15
14
 
16
15
  // ─── Scheduler Watcher Instance ──────────────────────
17
16
 
@@ -52,7 +51,7 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
52
51
 
53
52
  async function updateSkill() {
54
53
  if (previousBatchHashes.size === 0) {
55
- previousBatchHashes = await loadBatchState();
54
+ previousBatchHashes = await loadBatchState(version);
56
55
  }
57
56
  const deps = await readPackageDeps();
58
57
  if (!deps || deps.length === 0) {
@@ -70,7 +69,7 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
70
69
 
71
70
  for (const batch of batches) {
72
71
  const promise = (async () => {
73
- const blockId = batchHash(batch);
72
+ const blockId = batchHash(batch.map(d => d.split(' ')[0]));
74
73
  activeBlockIds.add(blockId);
75
74
 
76
75
  const currentHash = batchHash(batch);
@@ -160,13 +159,16 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
160
159
 
161
160
  // ── Custom sources from .groundtruth.json ──
162
161
  if (customSources.length > 0) {
163
- for (const src of customSources) {
162
+ const CUSTOM_SOURCE_TTL_MS = 60 * 60 * 1000;
163
+ const customWork = customSources.map(async (src) => {
164
164
  const blockId = 'src_' + Buffer.from(src.url).toString('base64url').slice(0, 8);
165
+ const tsKey = 'src_ts_' + blockId;
165
166
  activeBlockIds.add(blockId);
166
167
 
167
- if (previousBatchHashes.has(blockId)) {
168
+ const lastFetchTime = previousBatchHashes.get(tsKey) || 0;
169
+ if ((Date.now() - lastFetchTime) < CUSTOM_SOURCE_TTL_MS) {
168
170
  skippedCount++;
169
- continue;
171
+ return;
170
172
  }
171
173
 
172
174
  try {
@@ -180,37 +182,27 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
180
182
  globalContent: `## ${srcLabel}\n${sanitizeWebContent(text, 500)}\n`,
181
183
  workspaceContent: md
182
184
  }]);
183
- previousBatchHashes.set(blockId, blockId);
185
+ previousBatchHashes.set(tsKey, Date.now());
184
186
  updatedCount++;
185
187
  log(LOG_REFRESH, chalk.cyan, `custom source updated → ${srcLabel}`);
186
188
  }
187
189
  } catch (_) {
188
190
  failedCount++;
189
191
  }
190
- }
192
+ });
193
+ await Promise.all(customWork);
191
194
  }
192
195
 
193
196
  await removeStaleBlocks(globalPath, activeBlockIds);
194
197
  await removeStaleBlocks(workspacePath, activeBlockIds);
195
198
 
196
- await saveBatchState(previousBatchHashes);
199
+ await saveBatchState(previousBatchHashes, version);
197
200
 
198
201
  log(LOG_REFRESH, chalk.gray, `cycle done → ${activeBlockIds.size} blocks active, ${updatedCount} updated, ${skippedCount} skipped, ${failedCount} errors`);
199
202
  }
200
203
 
201
- let cycleCount = 0;
202
-
203
- process.on('SIGINT', async () => {
204
- await saveBatchState(previousBatchHashes);
205
- process.exit(0);
206
- });
207
-
208
- updateSkill();
204
+ updateSkill().catch(err => log(LOG_WARN, chalk.yellow, 'updateSkill error: ' + err.message));
209
205
  setInterval(() => {
210
- cycleCount++;
211
- if (cycleCount % 10 === 0) {
212
- httpsAgent.destroy();
213
- }
214
- updateSkill();
206
+ updateSkill().catch(err => log(LOG_WARN, chalk.yellow, 'updateSkill error: ' + err.message));
215
207
  }, intervalMinutes * 60 * 1000);
216
208
  }
@@ -1,143 +0,0 @@
1
- name: GroundTruth
2
- version: 0.1.4
3
- description: |
4
- GroundTruth is a zero-configuration, transparent middleware context injection layer.
5
- It is designed to bridge the deterministic knowledge cutoff gap of LLM-based coding agents
6
- (such as Claude Code or Antigravity) by dynamically fetching and injecting live,
7
- dependency-specific documentation right before inference or via out-of-band rule files.
8
-
9
- architecture:
10
- modes:
11
- - name: Proxy Intercept Mode
12
- alias: claude-code
13
- description: |
14
- Operates as a local HTTP reverse-proxy interceptor that captures outgoing API payloads
15
- targeting Anthropic or Google Gemini endpoints, mutating them in transit.
16
- target_endpoints:
17
- - "https://api.anthropic.com/v1/messages"
18
- - "https://generativelanguage.googleapis.com/v1beta/models/*"
19
- flow:
20
- 1: "Listen on localhost port (default: 8080) and capture POST requests."
21
- 2: "Extract the last user message from the JSON body (supports `messages` array for Anthropic and `contents` array for Gemini)."
22
- 3: "Determine search query: use `--use-package-json` AST parsing, or fallback to the user message text."
23
- 4: "Scrape DuckDuckGo concurrently to retrieve live context (title, snippet, Readability-parsed text up to 4000 chars)."
24
- 5: "Mutate the `system` instruction prompt in the JSON payload by appending the live context block."
25
- 6: "Forward the modified request to the actual LLM provider, streaming the response back to the client."
26
- components:
27
- - src/proxy.js
28
-
29
- - name: File Watcher Mode
30
- alias: antigravity
31
- description: |
32
- Runs as a persistent background daemon. It polls the local project's dependencies,
33
- fetches up-to-date documentation, and generates synchronized knowledge base dotfiles
34
- (`GEMINI.md`) that the agent natively reads on invocation.
35
- flow:
36
- 1: "Parses `package.json` dependencies and filters out build/tooling packages (e.g., eslint, vite, adapter)."
37
- 2: "Groups the filtered dependencies into chunks (batching) of configurable size (default: 3, max: 5) using `groupIntoBatches`."
38
- 3: "Hashes each dependency chunk (`batchHash` md5 sliced to 8 chars) to uniquely identify rule blocks and track state."
39
- 4: "Checks previous state to avoid redundant network fetches if the batch hasn't changed (`previousDepsKey` mapping)."
40
- 5: "Fetches live DuckDuckGo results per batch asynchronously, filtering out low-quality pages (403, captcha, < 200 chars)."
41
- 6: "Injects distinct dependency rule blocks bounded by `<!-- groundtruth:block-{hash}:start/end -->` directly inside `~/.gemini/GEMINI.md` (global) and `./.gemini/GEMINI.md` (workspace)."
42
- 7: "Garbage-collects stale blocks (`removeStaleBlocks`) belonging to evicted or resolved dependencies by regex matching active block IDs."
43
- components:
44
- - src/watcher.js
45
- - src/inject.js
46
-
47
- core_modules:
48
- - name: cli.js
49
- responsibilities:
50
- - "Process `process.argv` argument parsing."
51
- - "Validation and defaulting of arguments (`--port`, `--interval`, `--batch-size`, `--claude-code`, `--antigravity`)."
52
- - "Help/Docs Screen rendering and early exit conditions with aesthetic formatting inspired by Claude Code."
53
-
54
- - name: search.js
55
- responsibilities:
56
- - "DuckDuckGo HTML scraping using `cheerio`."
57
- - "URL resolution from DuckDuckGo's `uddg` tracking links."
58
- - "User-Agent rotation to mitigate scraping blocks."
59
- - "Integration with `CircuitBreaker` pattern for rate-limit protection."
60
- - "Integration with bounded custom O(1) `LRUCache` from `cache.js`."
61
- - "Page content extraction using `linkedom` and Mozilla's `Readability`."
62
- - "Integration with persistent connection pooling components from `http-agent.js`."
63
-
64
- - name: packages.js
65
- responsibilities:
66
- - "Read local Node modules context (`package.json`)."
67
- - "Clean semantic versions (e.g., `^1.2.3` -> `1.2`)."
68
- - "Filter out non-informative tooling (`vite`, `prettier`, `eslint`, `plugin`, `adapter`, `check`)."
69
- - "Group dependencies into manageable batches (default: 3) prioritizing core dependencies over devDependencies."
70
- - "Generate deterministic MD5 identifiers per batch for block management (`batchHash`)."
71
- - "Construct search queries based on dependency batches plus temporal identifiers (`latest 2026`)."
72
-
73
- - name: logger.js
74
- responsibilities:
75
- - "Chalk-driven aesthetic terminal formatting."
76
- - "Centralized status symbolizing constants (✓, ⚠, ⚡, ↻, ◆, ✻)."
77
- - "Timestamp generation mapped to `it-IT` locale."
78
-
79
- - name: env.js
80
- responsibilities:
81
- - "Shell configuration auto-instrumentation (`.zshrc`, `.bashrc`, `.bash_profile`, `config.fish`)."
82
- - "Exporting `ANTHROPIC_BASE_URL` to route CLI tools (like Claude Code) through the proxy."
83
- - "Cross-Platform Environment Override (Bypassing Windows systems safely)."
84
-
85
- - name: inject.js
86
- responsibilities:
87
- - "File I/O operations for `GEMINI.md` in both `$HOME` and `$CWD`."
88
- - "Regex-based block injection using exact start/end bounds matching."
89
- - "Stale block eviction via `removeStaleBlocks`."
90
- - "Uses `atomicWrite` for zero-corruption file replacements."
91
-
92
- - name: cache.js
93
- responsibilities:
94
- - "Implements zero-dependency O(1) bounded LRU caching logic."
95
- - "Provides getter/setter mechanisms tied to temporal eviction limits."
96
-
97
- - name: circuit-breaker.js
98
- responsibilities:
99
- - "Manages DuckDuckGo fetch attempts via threshold-based error state wrapping (OPEN/HALF_OPEN/CLOSED)."
100
-
101
- - name: state.js
102
- responsibilities:
103
- - "Persistent recovery system for dependency batch hashes mapping across system crash/restarts."
104
- - "Reads and writes `.gemini/watcher-state.json`."
105
-
106
- - name: http-agent.js
107
- responsibilities:
108
- - "Creates reusable Keep-Alive HTTP and HTTPS configuration agents to mitigate handshake overheads."
109
-
110
- - name: utils/atomic-write.js
111
- responsibilities:
112
- - "Creates temporary file blocks inside the target's directory (to prevent EXDEV cross-device link errors) performing `fs.rename` (POSIX) or safe-copies (Windows)."
113
-
114
- dependencies:
115
- runtime: "Node.js >= 18.0.0 (uses ES Modules)"
116
- built_ins:
117
- - fs
118
- - path
119
- - os
120
- - http
121
- - https
122
- - crypto
123
- third_party:
124
- - chalk: "^5.3.0" # Terminal styling
125
- - cheerio: "^1.0.0" # Fast HTML parsing for DDG results
126
- - linkedom: "^0.18.5" # Lightweight DOM emulation for Mozilla Readability
127
- - node-fetch: "^3.3.2" # WHATWG Fetch API polyfill for Node
128
- - "@mozilla/readability": "^0.5.0" # Main content extraction
129
-
130
- mechanics:
131
- caching_and_eviction:
132
- - "Search level caching: Runtime searches are cached for 5 minutes (`CACHE_TTL`), matching identical queries to avoid redundant network transit."
133
- - "Watcher level caching: the daemon uses a Map tracking `blockId` -> `JSON.stringify(batch)`. If the hash signature matches across cycles, the network layer is skipped."
134
- quality_assurance:
135
- - "Content verification: Extracted text is sanitized and evaluated. If a page returns < 200 characters, or contains indicators of bot protection (e.g., '403', 'captcha', 'access denied'), the result is flagged."
136
- - "Fallback mechanism: If a result is flagged as low-quality, the watcher rolls back and retains the successfully injected markdown block from the previous cycle."
137
- network_resilience:
138
- - "Timeouts: All outbound `node-fetch` requests strictly adhere to a 5-second `AbortSignal.timeout(5000)`."
139
- - "Retries & Bans: `search.js` relies on a `CircuitBreaker` class mitigating recursive DuckDuckGo IP bans."
140
- - "Resource Connection: Avoids TCP handshakes through persistent keep-alive Agent dispatching."
141
- shell_integration:
142
- - "Darwin/Linux-first: Windows OS (`win32`) skips autoconfig cleanly."
143
- - "Fish Shell paths uniquely utilize `set -gx` constructs unlike standard Bash/Zsh `export` syntax, appending recursively or mutating existing assignments."