@antodevs/groundtruth 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +130 -0
- package/assets/banner.png +0 -0
- package/index.js +32 -0
- package/package.json +48 -0
- package/specification.yaml +143 -0
- package/src/cache.js +107 -0
- package/src/circuit-breaker.js +63 -0
- package/src/cli.js +58 -0
- package/src/env.js +120 -0
- package/src/http-agent.js +21 -0
- package/src/inject.js +93 -0
- package/src/logger.js +47 -0
- package/src/packages.js +87 -0
- package/src/proxy.js +164 -0
- package/src/search.js +157 -0
- package/src/state.js +37 -0
- package/src/utils/atomic-write.js +58 -0
- package/src/watcher.js +146 -0
package/src/search.js
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module search
|
|
3
|
+
* @description Logica di scraping web su DuckDuckGo tramite cheerio e linkedom.
|
|
4
|
+
*/
|
|
5
|
+
import fetch from 'node-fetch';
|
|
6
|
+
import * as cheerio from 'cheerio';
|
|
7
|
+
import { Readability } from '@mozilla/readability';
|
|
8
|
+
import { DOMParser } from 'linkedom';
|
|
9
|
+
import { searchCache } from './cache.js';
|
|
10
|
+
import { CircuitBreaker } from './circuit-breaker.js';
|
|
11
|
+
import { httpAgent, httpsAgent } from './http-agent.js';
|
|
12
|
+
|
|
13
|
+
// ─── Config & Cache ──────────────────────────────────
|
|
14
|
+
|
|
15
|
+
// Evitiamo IP bans ruotando UA comuni in Chrome desktop
|
|
16
|
+
const USER_AGENTS = [
|
|
17
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
|
|
18
|
+
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
|
|
19
|
+
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
20
|
+
];
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* @description Seleziona uno User-Agent rnd dall'array disponibile
|
|
24
|
+
* @returns {string} Stringa di uno User Agent
|
|
25
|
+
*/
|
|
26
|
+
function getRandomUA() {
|
|
27
|
+
return USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)];
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const ddgCircuit = new CircuitBreaker({ failureThreshold: 3, resetTimeout: 30000 });
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* @description Decodifica link mascherati DuckDuckGo recuperando `uddg` querystring.
|
|
34
|
+
* @param {string} href - Url incapsulato proveniente da nodeDDG
|
|
35
|
+
* @returns {string} Url reale target in chiaro
|
|
36
|
+
*/
|
|
37
|
+
export function resolveDDGUrl(href) {
|
|
38
|
+
try {
|
|
39
|
+
const url = new URL(href, 'https://duckduckgo.com');
|
|
40
|
+
const uddg = url.searchParams.get('uddg');
|
|
41
|
+
return uddg ? decodeURIComponent(uddg) : href;
|
|
42
|
+
} catch {
|
|
43
|
+
return href;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* @description Esegue chiamata http reale su node DDG.
|
|
49
|
+
* @param {string} query - Ricerca DDG formattata
|
|
50
|
+
* @returns {Promise<Object>} { results, userAgent }
|
|
51
|
+
* @throws {Error} Fallimento http DDG request
|
|
52
|
+
*/
|
|
53
|
+
async function doSearch(query) {
|
|
54
|
+
const userAgent = getRandomUA();
|
|
55
|
+
// Fetch DDG raw HTML search endpoint ignoring CSS/JS payloads
|
|
56
|
+
const searchRes = await fetch(
|
|
57
|
+
`https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`,
|
|
58
|
+
{ signal: AbortSignal.timeout(5000), headers: { 'User-Agent': userAgent }, agent: httpsAgent }
|
|
59
|
+
);
|
|
60
|
+
if (!searchRes.ok) throw new Error(`DDG ${searchRes.status}`);
|
|
61
|
+
|
|
62
|
+
const $ = cheerio.load(await searchRes.text());
|
|
63
|
+
let results = [];
|
|
64
|
+
$('.result__body').each((i, el) => {
|
|
65
|
+
const title = $(el).find('.result__title').text().trim();
|
|
66
|
+
const snippet = $(el).find('.result__snippet').text().trim();
|
|
67
|
+
let rawUrl = $(el).find('.result__url').attr('href') || $(el).find('a.result__url').attr('href');
|
|
68
|
+
const resultUrl = rawUrl ? resolveDDGUrl(rawUrl) : '';
|
|
69
|
+
if (title && resultUrl) results.push({ title, snippet, url: resultUrl });
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
const seen = new Set();
|
|
73
|
+
results = results.filter(r => r.url && !seen.has(r.url) && seen.add(r.url)).slice(0, 3);
|
|
74
|
+
|
|
75
|
+
if (results.length === 0) throw new Error('No DDG results');
|
|
76
|
+
return { results, userAgent };
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* @description Punto d'accesso caching+retry orchestrator web.
|
|
81
|
+
* @param {string} query - Input utente di ricerca convertibile web
|
|
82
|
+
* @param {boolean} parallel - Promise.all fast per multiple page scraping
|
|
83
|
+
* @returns {Promise<Object>} Oggetto risultati + pageText formattato str
|
|
84
|
+
*/
|
|
85
|
+
export async function webSearch(query, parallel = false) {
|
|
86
|
+
const now = Date.now();
|
|
87
|
+
// In cache mode skip costose chiamate network
|
|
88
|
+
const cached = searchCache.get(query);
|
|
89
|
+
if (cached) {
|
|
90
|
+
return { results: cached.results, pageText: cached.pageText };
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
let results, userAgent;
|
|
94
|
+
try {
|
|
95
|
+
const res = await ddgCircuit.execute(() => doSearch(query));
|
|
96
|
+
results = res.results;
|
|
97
|
+
userAgent = res.userAgent;
|
|
98
|
+
} catch (err) {
|
|
99
|
+
throw err;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
let pageText = '';
|
|
103
|
+
// Se claude-code usa parallel mode; altrimenti solo primo link (antigravity)
|
|
104
|
+
if (parallel) {
|
|
105
|
+
const pages = await Promise.all(results.map(async (r) => {
|
|
106
|
+
try {
|
|
107
|
+
const pageRes = await fetch(r.url, {
|
|
108
|
+
signal: AbortSignal.timeout(5000),
|
|
109
|
+
headers: { 'User-Agent': userAgent },
|
|
110
|
+
agent: r.url.startsWith('https:') ? httpsAgent : httpAgent
|
|
111
|
+
});
|
|
112
|
+
if (pageRes.ok) {
|
|
113
|
+
const document = new DOMParser().parseFromString(await pageRes.text(), 'text/html');
|
|
114
|
+
let text = '';
|
|
115
|
+
try {
|
|
116
|
+
const article = new Readability(document).parse();
|
|
117
|
+
text = article?.textContent || '';
|
|
118
|
+
} catch (_) {
|
|
119
|
+
text = document.body?.textContent || '';
|
|
120
|
+
}
|
|
121
|
+
if (text) return text.replace(/\s+/g, ' ').slice(0, 4000);
|
|
122
|
+
}
|
|
123
|
+
} catch (_) { // fail silenzioso parallelo tollerato per timeout link third-party
|
|
124
|
+
}
|
|
125
|
+
return '';
|
|
126
|
+
}));
|
|
127
|
+
pageText = pages.filter(Boolean).join('\n\n');
|
|
128
|
+
} else {
|
|
129
|
+
try {
|
|
130
|
+
if (results[0]) {
|
|
131
|
+
const pageRes = await fetch(results[0].url, {
|
|
132
|
+
signal: AbortSignal.timeout(5000), // node-fetch hang timeout catch
|
|
133
|
+
headers: { 'User-Agent': userAgent },
|
|
134
|
+
agent: results[0].url.startsWith('https:') ? httpsAgent : httpAgent
|
|
135
|
+
});
|
|
136
|
+
if (pageRes.ok) {
|
|
137
|
+
const document = new DOMParser().parseFromString(await pageRes.text(), 'text/html');
|
|
138
|
+
let text = '';
|
|
139
|
+
try {
|
|
140
|
+
const article = new Readability(document).parse();
|
|
141
|
+
text = article?.textContent || '';
|
|
142
|
+
} catch (_) {
|
|
143
|
+
text = document.body?.textContent || '';
|
|
144
|
+
}
|
|
145
|
+
if (text) {
|
|
146
|
+
pageText = text.replace(/\s+/g, ' ').slice(0, 4000);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
} catch (_) { // bypass errore url target: fallback al contesto vuoto
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
const resultData = { results, pageText };
|
|
155
|
+
searchCache.set(query, resultData);
|
|
156
|
+
return resultData;
|
|
157
|
+
}
|
package/src/state.js
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module state
|
|
3
|
+
* @description Persiste la memoria di antigravity prev-hash per fault tolleranza riavvii.
|
|
4
|
+
*/
|
|
5
|
+
import { readFile, writeFile, mkdir } from 'fs/promises';
|
|
6
|
+
import { existsSync } from 'fs';
|
|
7
|
+
import path from 'path';
|
|
8
|
+
import os from 'os';
|
|
9
|
+
|
|
10
|
+
const STATE_DIR = path.join(os.homedir(), '.groundtruth');
|
|
11
|
+
const STATE_FILE = path.join(STATE_DIR, 'watcher-state.json');
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* @description Carica gli hash validati e memorizzati dallo schedule storage locale.
|
|
15
|
+
* @returns {Promise<Map>} Restituisce le hash map entries persistite del cron logic stream precedente.
|
|
16
|
+
*/
|
|
17
|
+
export async function loadBatchState() {
|
|
18
|
+
try {
|
|
19
|
+
if (!existsSync(STATE_FILE)) return new Map();
|
|
20
|
+
const data = await readFile(STATE_FILE, 'utf8');
|
|
21
|
+
const parsed = JSON.parse(data);
|
|
22
|
+
return new Map(Object.entries(parsed));
|
|
23
|
+
} catch {
|
|
24
|
+
return new Map();
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* @description Sincronizza hash batches per fault tolerance cross process
|
|
30
|
+
* @param {Map} map - Oggetto dei blocchi hashati validi in mem persist state map
|
|
31
|
+
* @returns {Promise<void>}
|
|
32
|
+
*/
|
|
33
|
+
export async function saveBatchState(map) {
|
|
34
|
+
await mkdir(STATE_DIR, { recursive: true });
|
|
35
|
+
const obj = Object.fromEntries(map);
|
|
36
|
+
await writeFile(STATE_FILE, JSON.stringify(obj, null, 2), 'utf8');
|
|
37
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module atomic-write
|
|
3
|
+
* @description Scrittura file atomica cross-platform con fallback.
|
|
4
|
+
*/
|
|
5
|
+
import { writeFile, rename, unlink, copyFile } from 'fs/promises';
|
|
6
|
+
import { existsSync } from 'fs';
|
|
7
|
+
import { tmpdir } from 'os';
|
|
8
|
+
import path from 'path';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* @description Scrive file in modo atomico con backup automatico opzionale.
|
|
12
|
+
* @param {string} filePath - Path destinazione.
|
|
13
|
+
* @param {string} content - Contenuto da scrivere.
|
|
14
|
+
* @param {Object} options - { backup: boolean, mode: number }
|
|
15
|
+
* @returns {Promise<Object>} Esito operazione e path backup
|
|
16
|
+
* @throws {Error} In caso di fallimento filesystem
|
|
17
|
+
*/
|
|
18
|
+
export async function atomicWrite(filePath, content, options = {}) {
|
|
19
|
+
const { backup = true, mode = 0o644 } = options;
|
|
20
|
+
const tempFile = path.join(tmpdir(), `.gt-${Date.now()}-${Math.random().toString(36).slice(2)}.tmp`);
|
|
21
|
+
const backupPath = `${filePath}.bak`;
|
|
22
|
+
|
|
23
|
+
try {
|
|
24
|
+
// Scrittura su file temporaneo
|
|
25
|
+
await writeFile(tempFile, content, { mode, encoding: 'utf8' });
|
|
26
|
+
|
|
27
|
+
// Backup esistente se richiesto
|
|
28
|
+
if (backup && existsSync(filePath)) {
|
|
29
|
+
await copyFile(filePath, backupPath);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Atomic rename (POSIX) o safe best-effort (Windows)
|
|
33
|
+
try {
|
|
34
|
+
await rename(tempFile, filePath);
|
|
35
|
+
} catch (renameErr) {
|
|
36
|
+
if (process.platform === 'win32' && (renameErr.code === 'EACCES' || renameErr.code === 'EPERM' || renameErr.code === 'EBUSY')) {
|
|
37
|
+
let success = false;
|
|
38
|
+
for (let i = 0; i < 5; i++) {
|
|
39
|
+
await new Promise(r => setTimeout(r, 100 * (2 ** i)));
|
|
40
|
+
try {
|
|
41
|
+
await rename(tempFile, filePath);
|
|
42
|
+
success = true;
|
|
43
|
+
break;
|
|
44
|
+
} catch (_) { }
|
|
45
|
+
}
|
|
46
|
+
if (!success) throw new Error(`Rename failed on Windows after 5 retries`);
|
|
47
|
+
} else {
|
|
48
|
+
throw renameErr;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
return { success: true, backupPath: backup ? backupPath : null };
|
|
53
|
+
} catch (err) {
|
|
54
|
+
// Cleanup temp in caso di errore catch
|
|
55
|
+
await unlink(tempFile).catch(() => { });
|
|
56
|
+
throw err;
|
|
57
|
+
}
|
|
58
|
+
}
|
package/src/watcher.js
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module watcher
|
|
3
|
+
* @description Timer poll di Antigravity update locale skill inject doc rules, ora con caching a batch blocchi separati.
|
|
4
|
+
*/
|
|
5
|
+
import os from 'os';
|
|
6
|
+
import path from 'path';
|
|
7
|
+
import { webSearch } from './search.js';
|
|
8
|
+
import { readPackageDeps, buildQuery, groupIntoBatches, batchHash } from './packages.js';
|
|
9
|
+
import { updateGeminiFiles, removeStaleBlocks } from './inject.js';
|
|
10
|
+
import { chalk, label, log, LOG_WARN, LOG_REFRESH } from './logger.js';
|
|
11
|
+
import { version } from './cli.js';
|
|
12
|
+
import { loadBatchState, saveBatchState } from './state.js';
|
|
13
|
+
import { httpsAgent } from './http-agent.js';
|
|
14
|
+
|
|
15
|
+
// ─── Scheduler Watcher Instance ──────────────────────
|
|
16
|
+
|
|
17
|
+
export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
|
|
18
|
+
const homeDir = os.homedir();
|
|
19
|
+
const globalPath = path.join(homeDir, '.gemini', 'GEMINI.md');
|
|
20
|
+
const workspacePath = path.join(process.cwd(), '.gemini', 'GEMINI.md');
|
|
21
|
+
|
|
22
|
+
const globalSkillFilePretty = '~/.gemini/GEMINI.md';
|
|
23
|
+
const skillFilePretty = '.gemini/GEMINI.md';
|
|
24
|
+
|
|
25
|
+
console.log();
|
|
26
|
+
console.log(` ${chalk.white.bold('GroundTruth')} ${chalk.gray(`v${version}`)} ${chalk.gray('[antigravity mode]')}`);
|
|
27
|
+
console.log();
|
|
28
|
+
console.log(label('◆', 'global', globalSkillFilePretty));
|
|
29
|
+
console.log(label('◆', 'workspace', skillFilePretty));
|
|
30
|
+
console.log(label('◆', 'interval', `every ${intervalMinutes} min`));
|
|
31
|
+
console.log(label('◆', 'batch_size', `chunk limit ${batchSize}`));
|
|
32
|
+
console.log(label('◆', 'context', 'DuckDuckGo → live'));
|
|
33
|
+
console.log();
|
|
34
|
+
console.log(` ${chalk.cyan('✻')} Running. Antigravity will load context automatically.`);
|
|
35
|
+
console.log();
|
|
36
|
+
|
|
37
|
+
let previousBatchHashes = new Map();
|
|
38
|
+
|
|
39
|
+
async function updateSkill() {
|
|
40
|
+
if (previousBatchHashes.size === 0) {
|
|
41
|
+
previousBatchHashes = await loadBatchState();
|
|
42
|
+
}
|
|
43
|
+
const deps = await readPackageDeps(); // tutte le deps
|
|
44
|
+
if (!deps || deps.length === 0) {
|
|
45
|
+
return; // fall back to something default or just skip
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const batches = groupIntoBatches(deps, batchSize);
|
|
49
|
+
const activeBlockIds = new Set();
|
|
50
|
+
let updatedCount = 0;
|
|
51
|
+
let skippedCount = 0;
|
|
52
|
+
let failedCount = 0;
|
|
53
|
+
|
|
54
|
+
const maxConcurrency = 3;
|
|
55
|
+
const executing = new Set();
|
|
56
|
+
|
|
57
|
+
for (const batch of batches) {
|
|
58
|
+
const promise = (async () => {
|
|
59
|
+
const blockId = batchHash(batch);
|
|
60
|
+
activeBlockIds.add(blockId);
|
|
61
|
+
|
|
62
|
+
const currentHash = batchHash(batch);
|
|
63
|
+
if (previousBatchHashes.get(blockId) === currentHash) {
|
|
64
|
+
skippedCount++;
|
|
65
|
+
return;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
const query = buildQuery(batch);
|
|
69
|
+
try {
|
|
70
|
+
const { results, pageText } = await webSearch(query, false);
|
|
71
|
+
const badSignals = ['403', 'captcha', 'blocked', 'access denied', 'forbidden'];
|
|
72
|
+
const isBad = !pageText || pageText.length < 200 || badSignals.some(s => pageText.toLowerCase().includes(s));
|
|
73
|
+
if (isBad && previousBatchHashes.has(blockId)) {
|
|
74
|
+
log(LOG_WARN, chalk.yellow, `low quality result for block ${blockId} → keeping previous context`);
|
|
75
|
+
failedCount++;
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const now = new Date();
|
|
80
|
+
const nowStr = now.toLocaleString('it-IT');
|
|
81
|
+
const batchTitle = batch.map(b => b.split(' ')[0]).join(', ');
|
|
82
|
+
|
|
83
|
+
let globalMd = `## Live Context — ${batchTitle} (${nowStr})\n`;
|
|
84
|
+
globalMd += `**Query:** ${query}\n\n`;
|
|
85
|
+
if (results.length > 0) {
|
|
86
|
+
globalMd += `### ${results[0].title}\n`;
|
|
87
|
+
globalMd += `${results[0].snippet.slice(0, 300)} — ${results[0].url}\n`;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
let md = `## Live Context — ${batchTitle} (${nowStr})\n`;
|
|
91
|
+
md += `**Query:** ${query}\n\n`;
|
|
92
|
+
for (const r of results) {
|
|
93
|
+
md += `### ${r.title}\n${r.snippet} — ${r.url}\n\n`;
|
|
94
|
+
}
|
|
95
|
+
if (pageText) {
|
|
96
|
+
md += `FULL TEXT: ${pageText}\n`;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
await updateGeminiFiles([{
|
|
100
|
+
blockId,
|
|
101
|
+
globalContent: globalMd,
|
|
102
|
+
workspaceContent: md
|
|
103
|
+
}]);
|
|
104
|
+
|
|
105
|
+
previousBatchHashes.set(blockId, currentHash);
|
|
106
|
+
updatedCount++;
|
|
107
|
+
log(LOG_REFRESH, chalk.cyan, `block ${blockId} updated → ${batch.join(', ')}`);
|
|
108
|
+
} catch (e) {
|
|
109
|
+
failedCount++;
|
|
110
|
+
log(LOG_WARN, chalk.yellow, `block ${blockId} fetch failed → keeping previous`);
|
|
111
|
+
}
|
|
112
|
+
})().then(() => executing.delete(promise));
|
|
113
|
+
|
|
114
|
+
executing.add(promise);
|
|
115
|
+
if (executing.size >= maxConcurrency) {
|
|
116
|
+
await Promise.race(executing);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
await Promise.all(executing);
|
|
120
|
+
|
|
121
|
+
await removeStaleBlocks(globalPath, activeBlockIds);
|
|
122
|
+
await removeStaleBlocks(workspacePath, activeBlockIds);
|
|
123
|
+
|
|
124
|
+
await saveBatchState(previousBatchHashes);
|
|
125
|
+
|
|
126
|
+
log(LOG_REFRESH, chalk.gray, `cycle done → ${activeBlockIds.size} blocks active, ${updatedCount} updated, ${skippedCount} skipped, ${failedCount} errors`);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
let cycleCount = 0;
|
|
130
|
+
|
|
131
|
+
// Periodical state persistence on process exit to avoid total crash data loss
|
|
132
|
+
process.on('SIGINT', async () => {
|
|
133
|
+
await saveBatchState(previousBatchHashes);
|
|
134
|
+
process.exit(0);
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
// Lancio a startup immediato
|
|
138
|
+
updateSkill();
|
|
139
|
+
setInterval(() => {
|
|
140
|
+
cycleCount++;
|
|
141
|
+
if (cycleCount % 10 === 0) {
|
|
142
|
+
httpsAgent.destroy(); // Forza chiusura idle connections
|
|
143
|
+
}
|
|
144
|
+
updateSkill();
|
|
145
|
+
}, intervalMinutes * 60 * 1000);
|
|
146
|
+
}
|