@antodevs/groundtruth 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/search.js ADDED
@@ -0,0 +1,157 @@
1
+ /**
2
+ * @module search
3
+ * @description Logica di scraping web su DuckDuckGo tramite cheerio e linkedom.
4
+ */
5
+ import fetch from 'node-fetch';
6
+ import * as cheerio from 'cheerio';
7
+ import { Readability } from '@mozilla/readability';
8
+ import { DOMParser } from 'linkedom';
9
+ import { searchCache } from './cache.js';
10
+ import { CircuitBreaker } from './circuit-breaker.js';
11
+ import { httpAgent, httpsAgent } from './http-agent.js';
12
+
13
+ // ─── Config & Cache ──────────────────────────────────
14
+
15
+ // Evitiamo IP bans ruotando UA comuni in Chrome desktop
16
+ const USER_AGENTS = [
17
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
18
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
19
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
20
+ ];
21
+
22
+ /**
23
+ * @description Seleziona uno User-Agent rnd dall'array disponibile
24
+ * @returns {string} Stringa di uno User Agent
25
+ */
26
+ function getRandomUA() {
27
+ return USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)];
28
+ }
29
+
30
+ const ddgCircuit = new CircuitBreaker({ failureThreshold: 3, resetTimeout: 30000 });
31
+
32
+ /**
33
+ * @description Decodifica link mascherati DuckDuckGo recuperando `uddg` querystring.
34
+ * @param {string} href - Url incapsulato proveniente da nodeDDG
35
+ * @returns {string} Url reale target in chiaro
36
+ */
37
+ export function resolveDDGUrl(href) {
38
+ try {
39
+ const url = new URL(href, 'https://duckduckgo.com');
40
+ const uddg = url.searchParams.get('uddg');
41
+ return uddg ? decodeURIComponent(uddg) : href;
42
+ } catch {
43
+ return href;
44
+ }
45
+ }
46
+
47
+ /**
48
+ * @description Esegue chiamata http reale su node DDG.
49
+ * @param {string} query - Ricerca DDG formattata
50
+ * @returns {Promise<Object>} { results, userAgent }
51
+ * @throws {Error} Fallimento http DDG request
52
+ */
53
+ async function doSearch(query) {
54
+ const userAgent = getRandomUA();
55
+ // Fetch DDG raw HTML search endpoint ignoring CSS/JS payloads
56
+ const searchRes = await fetch(
57
+ `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`,
58
+ { signal: AbortSignal.timeout(5000), headers: { 'User-Agent': userAgent }, agent: httpsAgent }
59
+ );
60
+ if (!searchRes.ok) throw new Error(`DDG ${searchRes.status}`);
61
+
62
+ const $ = cheerio.load(await searchRes.text());
63
+ let results = [];
64
+ $('.result__body').each((i, el) => {
65
+ const title = $(el).find('.result__title').text().trim();
66
+ const snippet = $(el).find('.result__snippet').text().trim();
67
+ let rawUrl = $(el).find('.result__url').attr('href') || $(el).find('a.result__url').attr('href');
68
+ const resultUrl = rawUrl ? resolveDDGUrl(rawUrl) : '';
69
+ if (title && resultUrl) results.push({ title, snippet, url: resultUrl });
70
+ });
71
+
72
+ const seen = new Set();
73
+ results = results.filter(r => r.url && !seen.has(r.url) && seen.add(r.url)).slice(0, 3);
74
+
75
+ if (results.length === 0) throw new Error('No DDG results');
76
+ return { results, userAgent };
77
+ }
78
+
79
+ /**
80
+ * @description Punto d'accesso caching+retry orchestrator web.
81
+ * @param {string} query - Input utente di ricerca convertibile web
82
+ * @param {boolean} parallel - Promise.all fast per multiple page scraping
83
+ * @returns {Promise<Object>} Oggetto risultati + pageText formattato str
84
+ */
85
+ export async function webSearch(query, parallel = false) {
86
+ const now = Date.now();
87
+ // In cache mode skip costose chiamate network
88
+ const cached = searchCache.get(query);
89
+ if (cached) {
90
+ return { results: cached.results, pageText: cached.pageText };
91
+ }
92
+
93
+ let results, userAgent;
94
+ try {
95
+ const res = await ddgCircuit.execute(() => doSearch(query));
96
+ results = res.results;
97
+ userAgent = res.userAgent;
98
+ } catch (err) {
99
+ throw err;
100
+ }
101
+
102
+ let pageText = '';
103
+ // Se claude-code usa parallel mode; altrimenti solo primo link (antigravity)
104
+ if (parallel) {
105
+ const pages = await Promise.all(results.map(async (r) => {
106
+ try {
107
+ const pageRes = await fetch(r.url, {
108
+ signal: AbortSignal.timeout(5000),
109
+ headers: { 'User-Agent': userAgent },
110
+ agent: r.url.startsWith('https:') ? httpsAgent : httpAgent
111
+ });
112
+ if (pageRes.ok) {
113
+ const document = new DOMParser().parseFromString(await pageRes.text(), 'text/html');
114
+ let text = '';
115
+ try {
116
+ const article = new Readability(document).parse();
117
+ text = article?.textContent || '';
118
+ } catch (_) {
119
+ text = document.body?.textContent || '';
120
+ }
121
+ if (text) return text.replace(/\s+/g, ' ').slice(0, 4000);
122
+ }
123
+ } catch (_) { // fail silenzioso parallelo tollerato per timeout link third-party
124
+ }
125
+ return '';
126
+ }));
127
+ pageText = pages.filter(Boolean).join('\n\n');
128
+ } else {
129
+ try {
130
+ if (results[0]) {
131
+ const pageRes = await fetch(results[0].url, {
132
+ signal: AbortSignal.timeout(5000), // node-fetch hang timeout catch
133
+ headers: { 'User-Agent': userAgent },
134
+ agent: results[0].url.startsWith('https:') ? httpsAgent : httpAgent
135
+ });
136
+ if (pageRes.ok) {
137
+ const document = new DOMParser().parseFromString(await pageRes.text(), 'text/html');
138
+ let text = '';
139
+ try {
140
+ const article = new Readability(document).parse();
141
+ text = article?.textContent || '';
142
+ } catch (_) {
143
+ text = document.body?.textContent || '';
144
+ }
145
+ if (text) {
146
+ pageText = text.replace(/\s+/g, ' ').slice(0, 4000);
147
+ }
148
+ }
149
+ }
150
+ } catch (_) { // bypass errore url target: fallback al contesto vuoto
151
+ }
152
+ }
153
+
154
+ const resultData = { results, pageText };
155
+ searchCache.set(query, resultData);
156
+ return resultData;
157
+ }
package/src/state.js ADDED
@@ -0,0 +1,37 @@
1
+ /**
2
+ * @module state
3
+ * @description Persiste la memoria di antigravity prev-hash per fault tolleranza riavvii.
4
+ */
5
+ import { readFile, writeFile, mkdir } from 'fs/promises';
6
+ import { existsSync } from 'fs';
7
+ import path from 'path';
8
+ import os from 'os';
9
+
10
+ const STATE_DIR = path.join(os.homedir(), '.groundtruth');
11
+ const STATE_FILE = path.join(STATE_DIR, 'watcher-state.json');
12
+
13
+ /**
14
+ * @description Carica gli hash validati e memorizzati dallo schedule storage locale.
15
+ * @returns {Promise<Map>} Restituisce le hash map entries persistite del cron logic stream precedente.
16
+ */
17
+ export async function loadBatchState() {
18
+ try {
19
+ if (!existsSync(STATE_FILE)) return new Map();
20
+ const data = await readFile(STATE_FILE, 'utf8');
21
+ const parsed = JSON.parse(data);
22
+ return new Map(Object.entries(parsed));
23
+ } catch {
24
+ return new Map();
25
+ }
26
+ }
27
+
28
+ /**
29
+ * @description Sincronizza hash batches per fault tolerance cross process
30
+ * @param {Map} map - Oggetto dei blocchi hashati validi in mem persist state map
31
+ * @returns {Promise<void>}
32
+ */
33
+ export async function saveBatchState(map) {
34
+ await mkdir(STATE_DIR, { recursive: true });
35
+ const obj = Object.fromEntries(map);
36
+ await writeFile(STATE_FILE, JSON.stringify(obj, null, 2), 'utf8');
37
+ }
@@ -0,0 +1,58 @@
1
+ /**
2
+ * @module atomic-write
3
+ * @description Scrittura file atomica cross-platform con fallback.
4
+ */
5
+ import { writeFile, rename, unlink, copyFile } from 'fs/promises';
6
+ import { existsSync } from 'fs';
7
+ import { tmpdir } from 'os';
8
+ import path from 'path';
9
+
10
+ /**
11
+ * @description Scrive file in modo atomico con backup automatico opzionale.
12
+ * @param {string} filePath - Path destinazione.
13
+ * @param {string} content - Contenuto da scrivere.
14
+ * @param {Object} options - { backup: boolean, mode: number }
15
+ * @returns {Promise<Object>} Esito operazione e path backup
16
+ * @throws {Error} In caso di fallimento filesystem
17
+ */
18
+ export async function atomicWrite(filePath, content, options = {}) {
19
+ const { backup = true, mode = 0o644 } = options;
20
+ const tempFile = path.join(tmpdir(), `.gt-${Date.now()}-${Math.random().toString(36).slice(2)}.tmp`);
21
+ const backupPath = `${filePath}.bak`;
22
+
23
+ try {
24
+ // Scrittura su file temporaneo
25
+ await writeFile(tempFile, content, { mode, encoding: 'utf8' });
26
+
27
+ // Backup esistente se richiesto
28
+ if (backup && existsSync(filePath)) {
29
+ await copyFile(filePath, backupPath);
30
+ }
31
+
32
+ // Atomic rename (POSIX) o safe best-effort (Windows)
33
+ try {
34
+ await rename(tempFile, filePath);
35
+ } catch (renameErr) {
36
+ if (process.platform === 'win32' && (renameErr.code === 'EACCES' || renameErr.code === 'EPERM' || renameErr.code === 'EBUSY')) {
37
+ let success = false;
38
+ for (let i = 0; i < 5; i++) {
39
+ await new Promise(r => setTimeout(r, 100 * (2 ** i)));
40
+ try {
41
+ await rename(tempFile, filePath);
42
+ success = true;
43
+ break;
44
+ } catch (_) { }
45
+ }
46
+ if (!success) throw new Error(`Rename failed on Windows after 5 retries`);
47
+ } else {
48
+ throw renameErr;
49
+ }
50
+ }
51
+
52
+ return { success: true, backupPath: backup ? backupPath : null };
53
+ } catch (err) {
54
+ // Cleanup temp in caso di errore catch
55
+ await unlink(tempFile).catch(() => { });
56
+ throw err;
57
+ }
58
+ }
package/src/watcher.js ADDED
@@ -0,0 +1,146 @@
1
+ /**
2
+ * @module watcher
3
+ * @description Timer poll di Antigravity update locale skill inject doc rules, ora con caching a batch blocchi separati.
4
+ */
5
+ import os from 'os';
6
+ import path from 'path';
7
+ import { webSearch } from './search.js';
8
+ import { readPackageDeps, buildQuery, groupIntoBatches, batchHash } from './packages.js';
9
+ import { updateGeminiFiles, removeStaleBlocks } from './inject.js';
10
+ import { chalk, label, log, LOG_WARN, LOG_REFRESH } from './logger.js';
11
+ import { version } from './cli.js';
12
+ import { loadBatchState, saveBatchState } from './state.js';
13
+ import { httpsAgent } from './http-agent.js';
14
+
15
+ // ─── Scheduler Watcher Instance ──────────────────────
16
+
17
+ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
18
+ const homeDir = os.homedir();
19
+ const globalPath = path.join(homeDir, '.gemini', 'GEMINI.md');
20
+ const workspacePath = path.join(process.cwd(), '.gemini', 'GEMINI.md');
21
+
22
+ const globalSkillFilePretty = '~/.gemini/GEMINI.md';
23
+ const skillFilePretty = '.gemini/GEMINI.md';
24
+
25
+ console.log();
26
+ console.log(` ${chalk.white.bold('GroundTruth')} ${chalk.gray(`v${version}`)} ${chalk.gray('[antigravity mode]')}`);
27
+ console.log();
28
+ console.log(label('◆', 'global', globalSkillFilePretty));
29
+ console.log(label('◆', 'workspace', skillFilePretty));
30
+ console.log(label('◆', 'interval', `every ${intervalMinutes} min`));
31
+ console.log(label('◆', 'batch_size', `chunk limit ${batchSize}`));
32
+ console.log(label('◆', 'context', 'DuckDuckGo → live'));
33
+ console.log();
34
+ console.log(` ${chalk.cyan('✻')} Running. Antigravity will load context automatically.`);
35
+ console.log();
36
+
37
+ let previousBatchHashes = new Map();
38
+
39
+ async function updateSkill() {
40
+ if (previousBatchHashes.size === 0) {
41
+ previousBatchHashes = await loadBatchState();
42
+ }
43
+ const deps = await readPackageDeps(); // tutte le deps
44
+ if (!deps || deps.length === 0) {
45
+ return; // fall back to something default or just skip
46
+ }
47
+
48
+ const batches = groupIntoBatches(deps, batchSize);
49
+ const activeBlockIds = new Set();
50
+ let updatedCount = 0;
51
+ let skippedCount = 0;
52
+ let failedCount = 0;
53
+
54
+ const maxConcurrency = 3;
55
+ const executing = new Set();
56
+
57
+ for (const batch of batches) {
58
+ const promise = (async () => {
59
+ const blockId = batchHash(batch);
60
+ activeBlockIds.add(blockId);
61
+
62
+ const currentHash = batchHash(batch);
63
+ if (previousBatchHashes.get(blockId) === currentHash) {
64
+ skippedCount++;
65
+ return;
66
+ }
67
+
68
+ const query = buildQuery(batch);
69
+ try {
70
+ const { results, pageText } = await webSearch(query, false);
71
+ const badSignals = ['403', 'captcha', 'blocked', 'access denied', 'forbidden'];
72
+ const isBad = !pageText || pageText.length < 200 || badSignals.some(s => pageText.toLowerCase().includes(s));
73
+ if (isBad && previousBatchHashes.has(blockId)) {
74
+ log(LOG_WARN, chalk.yellow, `low quality result for block ${blockId} → keeping previous context`);
75
+ failedCount++;
76
+ return;
77
+ }
78
+
79
+ const now = new Date();
80
+ const nowStr = now.toLocaleString('it-IT');
81
+ const batchTitle = batch.map(b => b.split(' ')[0]).join(', ');
82
+
83
+ let globalMd = `## Live Context — ${batchTitle} (${nowStr})\n`;
84
+ globalMd += `**Query:** ${query}\n\n`;
85
+ if (results.length > 0) {
86
+ globalMd += `### ${results[0].title}\n`;
87
+ globalMd += `${results[0].snippet.slice(0, 300)} — ${results[0].url}\n`;
88
+ }
89
+
90
+ let md = `## Live Context — ${batchTitle} (${nowStr})\n`;
91
+ md += `**Query:** ${query}\n\n`;
92
+ for (const r of results) {
93
+ md += `### ${r.title}\n${r.snippet} — ${r.url}\n\n`;
94
+ }
95
+ if (pageText) {
96
+ md += `FULL TEXT: ${pageText}\n`;
97
+ }
98
+
99
+ await updateGeminiFiles([{
100
+ blockId,
101
+ globalContent: globalMd,
102
+ workspaceContent: md
103
+ }]);
104
+
105
+ previousBatchHashes.set(blockId, currentHash);
106
+ updatedCount++;
107
+ log(LOG_REFRESH, chalk.cyan, `block ${blockId} updated → ${batch.join(', ')}`);
108
+ } catch (e) {
109
+ failedCount++;
110
+ log(LOG_WARN, chalk.yellow, `block ${blockId} fetch failed → keeping previous`);
111
+ }
112
+ })().then(() => executing.delete(promise));
113
+
114
+ executing.add(promise);
115
+ if (executing.size >= maxConcurrency) {
116
+ await Promise.race(executing);
117
+ }
118
+ }
119
+ await Promise.all(executing);
120
+
121
+ await removeStaleBlocks(globalPath, activeBlockIds);
122
+ await removeStaleBlocks(workspacePath, activeBlockIds);
123
+
124
+ await saveBatchState(previousBatchHashes);
125
+
126
+ log(LOG_REFRESH, chalk.gray, `cycle done → ${activeBlockIds.size} blocks active, ${updatedCount} updated, ${skippedCount} skipped, ${failedCount} errors`);
127
+ }
128
+
129
+ let cycleCount = 0;
130
+
131
+ // Periodical state persistence on process exit to avoid total crash data loss
132
+ process.on('SIGINT', async () => {
133
+ await saveBatchState(previousBatchHashes);
134
+ process.exit(0);
135
+ });
136
+
137
+ // Lancio a startup immediato
138
+ updateSkill();
139
+ setInterval(() => {
140
+ cycleCount++;
141
+ if (cycleCount % 10 === 0) {
142
+ httpsAgent.destroy(); // Forza chiusura idle connections
143
+ }
144
+ updateSkill();
145
+ }, intervalMinutes * 60 * 1000);
146
+ }