@antodevs/groundtruth 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +7 -5
- package/package.json +4 -5
- package/src/registry.js +5 -2
- package/src/sanitize.js +40 -1
- package/src/search.js +2 -4
- package/src/state.js +22 -9
- package/src/watcher.js +3 -3
package/README.md
CHANGED
|
@@ -41,12 +41,14 @@ Current-generation AI coding assistants (Claude Code, Antigravity, Cursor) suffe
|
|
|
41
41
|
|
|
42
42
|
**GroundTruth** acts as a transparent middleware layer that resolves this by dynamically injecting real-time, stack-specific documentation directly into the agent's context window prior to inference.
|
|
43
43
|
|
|
44
|
-
### The v0.2
|
|
44
|
+
### The v0.2 Engine: Global Cloud Intelligence
|
|
45
|
+
|
|
46
|
+
GroundTruth v0.2 introduces a paradigm shift in context quality and scalability:
|
|
47
|
+
- **Global Cloud Registry**: Bypasses search engines by querying a high-performance **Cloudflare Worker** registry. It covers the top ~200 frameworks with "Golden List" manual precision and over **10,000+ npm packages** via automated background indexing.
|
|
48
|
+
- **Jina Reader API Integration**: Seamlessly parses dynamic, JavaScript-rendered SPAs (like Vercel AI SDK, Next.js, and Svelte docs) into clean, LLM-optimized Markdown.
|
|
49
|
+
- **Automated "Gentle" Indexer**: A remote bot periodically synchronizes the latest documentation URLs from the npm ecosystem directly to the cloud registry, ensuring your context is never stale.
|
|
50
|
+
- **Zero-Config Resilience**: Operates locally with a strictly enforced 1.5s cloud timeout. If the registry is unreachable, it silently falls back to local Readability extraction or search.
|
|
45
51
|
|
|
46
|
-
GroundTruth v0.2.0 introduces a massive upgrade to content quality:
|
|
47
|
-
- **Jina Reader API Integration**: Parses dynamic, JavaScript-rendered SPAs (like Vercel AI SDK, Next.js, and Svelte docs) into clean, LLM-optimized Markdown.
|
|
48
|
-
- **Smart Source Registry**: Automatically bypasses search engines for the top 20+ frameworks (React, Svelte, Vue, Astro, etc.) and fetches their official documentation directly.
|
|
49
|
-
- **Readability Fallback**: Ensures reliable extraction even if the primary engine fails.
|
|
50
52
|
|
|
51
53
|
---
|
|
52
54
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@antodevs/groundtruth",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.6",
|
|
4
4
|
"description": "Lightweight Node.js proxy to intercept API requests from coding agents and inject fresh web context",
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -44,10 +44,9 @@
|
|
|
44
44
|
"release:minor": "git add . && git commit -m \"chore: auto-commit before release\" || true && npm version minor && git push origin main --tags && npm publish"
|
|
45
45
|
},
|
|
46
46
|
"dependencies": {
|
|
47
|
-
"@mozilla/readability": "^0.
|
|
47
|
+
"@mozilla/readability": "^0.6.0",
|
|
48
48
|
"chalk": "^5.3.0",
|
|
49
|
-
"cheerio": "^1.
|
|
50
|
-
"linkedom": "^0.18.12"
|
|
51
|
-
"node-fetch": "^3.3.2"
|
|
49
|
+
"cheerio": "^1.2.0",
|
|
50
|
+
"linkedom": "^0.18.12"
|
|
52
51
|
}
|
|
53
52
|
}
|
package/src/registry.js
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
* @module registry
|
|
3
3
|
* @description Interroga il Cloudflare Worker (Remote Registry) per risolvere URL docs ufficiali.
|
|
4
4
|
*/
|
|
5
|
-
import fetch from 'node-fetch';
|
|
6
5
|
|
|
7
6
|
const REGISTRY_API_URL = 'https://groundtruth-registry.antony-flex01.workers.dev/lookup';
|
|
8
7
|
|
|
@@ -18,7 +17,11 @@ export async function lookupRegistryUrl(depName) {
|
|
|
18
17
|
if (!depName) return null;
|
|
19
18
|
|
|
20
19
|
// Normalizzazione preventiva
|
|
21
|
-
|
|
20
|
+
let name = depName.split(' ')[0].toLowerCase().trim();
|
|
21
|
+
|
|
22
|
+
// Alias mapping per framework comuni con scope npm
|
|
23
|
+
if (name === '@sveltejs/kit') name = 'sveltekit';
|
|
24
|
+
|
|
22
25
|
|
|
23
26
|
// Check hit in memoria (ritorna subito)
|
|
24
27
|
if (lookupCache.has(name)) {
|
package/src/sanitize.js
CHANGED
|
@@ -19,17 +19,56 @@ const DANGEROUS_PATTERNS = [
|
|
|
19
19
|
/HUMAN:\s/gi,
|
|
20
20
|
];
|
|
21
21
|
|
|
22
|
+
const NOISE_PATTERNS = [
|
|
23
|
+
/Skip to content/gi,
|
|
24
|
+
/Navigation Menu/gi,
|
|
25
|
+
/Toggle navigation/gi,
|
|
26
|
+
/Appearance settings/gi,
|
|
27
|
+
/AI CODE CREATION/gi,
|
|
28
|
+
/GitHub Copilot Write better code with AI/gi,
|
|
29
|
+
/Sign in/gi,
|
|
30
|
+
/Sign up/gi,
|
|
31
|
+
/Notifications/gi,
|
|
32
|
+
/Fork\s+\d+/gi,
|
|
33
|
+
/Star\s+[\d.]+[kK]?/gi,
|
|
34
|
+
/Code/gi,
|
|
35
|
+
/Issues/gi,
|
|
36
|
+
/Pull requests/gi,
|
|
37
|
+
/Actions/gi,
|
|
38
|
+
/Projects/gi,
|
|
39
|
+
/Security/gi,
|
|
40
|
+
/Insights/gi,
|
|
41
|
+
/Why GitHub/gi,
|
|
42
|
+
/Solutions/gi,
|
|
43
|
+
/Resources/gi,
|
|
44
|
+
/Open Source/gi,
|
|
45
|
+
/Enterprises/gi,
|
|
46
|
+
/Startups/gi,
|
|
47
|
+
];
|
|
48
|
+
|
|
22
49
|
/**
|
|
23
|
-
* @description Filtra pattern pericolosi di
|
|
50
|
+
* @description Filtra pattern pericolosi e rumore di navigazione dal testo web scrappato.
|
|
24
51
|
* @param {string} text - Testo raw proveniente da web scraping
|
|
25
52
|
* @param {number} maxLen - Lunghezza massima output (default 8000)
|
|
26
53
|
* @returns {string} Testo sanitizzato
|
|
27
54
|
*/
|
|
28
55
|
export function sanitizeWebContent(text, maxLen = 8000) {
|
|
29
56
|
if (!text || typeof text !== 'string') return '';
|
|
57
|
+
|
|
30
58
|
let cleaned = text;
|
|
59
|
+
|
|
60
|
+
// 1. Rimuoviamo il rumore di navigazione
|
|
61
|
+
for (const pattern of NOISE_PATTERNS) {
|
|
62
|
+
cleaned = cleaned.replace(pattern, '');
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// 2. Rimuoviamo pattern pericolosi
|
|
31
66
|
for (const p of DANGEROUS_PATTERNS) {
|
|
32
67
|
cleaned = cleaned.replace(p, '[FILTERED]');
|
|
33
68
|
}
|
|
69
|
+
|
|
70
|
+
// 3. Normalizzazione spazi bianchi per risparmiare token
|
|
71
|
+
cleaned = cleaned.replace(/\s+/g, ' ').trim();
|
|
72
|
+
|
|
34
73
|
return cleaned.slice(0, maxLen);
|
|
35
74
|
}
|
package/src/search.js
CHANGED
|
@@ -2,7 +2,6 @@
|
|
|
2
2
|
* @module search
|
|
3
3
|
* @description Logica di scraping web: Jina Reader → fallback Readability, registry bypass, DDG search.
|
|
4
4
|
*/
|
|
5
|
-
import fetch from 'node-fetch';
|
|
6
5
|
import * as cheerio from 'cheerio';
|
|
7
6
|
import { Readability } from '@mozilla/readability';
|
|
8
7
|
import { DOMParser } from 'linkedom';
|
|
@@ -59,8 +58,7 @@ export async function fetchPageContent(url, userAgent, opts = {}) {
|
|
|
59
58
|
try {
|
|
60
59
|
const pageRes = await fetch(url, {
|
|
61
60
|
signal: AbortSignal.timeout(5000),
|
|
62
|
-
headers: { 'User-Agent': userAgent }
|
|
63
|
-
agent: url.startsWith('https:') ? httpsAgent : httpAgent
|
|
61
|
+
headers: { 'User-Agent': userAgent }
|
|
64
62
|
});
|
|
65
63
|
if (pageRes.ok) {
|
|
66
64
|
const document = new DOMParser().parseFromString(await pageRes.text(), 'text/html');
|
|
@@ -142,7 +140,7 @@ async function doSearch(query, resultsLimit = 3) {
|
|
|
142
140
|
const userAgent = getRandomUA();
|
|
143
141
|
const searchRes = await fetch(
|
|
144
142
|
`https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`,
|
|
145
|
-
{ signal: AbortSignal.timeout(5000), headers: { 'User-Agent': userAgent }
|
|
143
|
+
{ signal: AbortSignal.timeout(5000), headers: { 'User-Agent': userAgent } }
|
|
146
144
|
);
|
|
147
145
|
if (!searchRes.ok) throw new Error(`DDG ${searchRes.status}`);
|
|
148
146
|
|
package/src/state.js
CHANGED
|
@@ -13,26 +13,39 @@ const STATE_FILE = path.join(STATE_DIR, 'watcher-state.json');
|
|
|
13
13
|
|
|
14
14
|
/**
|
|
15
15
|
* @description Carica gli hash validati e memorizzati dallo schedule storage locale.
|
|
16
|
-
* @
|
|
16
|
+
* @param {string} currentVersion - Versione attuale dell'applicazione per validare la cache.
|
|
17
|
+
* @returns {Promise<Map>} Restituisce le hash map entries persistite o una mappa vuota se la versione differisce.
|
|
17
18
|
*/
|
|
18
|
-
export async function loadBatchState() {
|
|
19
|
+
export async function loadBatchState(currentVersion) {
|
|
19
20
|
try {
|
|
20
21
|
if (!existsSync(STATE_FILE)) return new Map();
|
|
21
22
|
const data = await readFile(STATE_FILE, 'utf8');
|
|
22
|
-
const
|
|
23
|
-
|
|
23
|
+
const state = JSON.parse(data);
|
|
24
|
+
|
|
25
|
+
// Invalida la cache se la versione è differente (forza refresh dopo update)
|
|
26
|
+
if (state.version !== currentVersion) {
|
|
27
|
+
return new Map();
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
return new Map(Object.entries(state.hashes || {}));
|
|
24
31
|
} catch {
|
|
25
32
|
return new Map();
|
|
26
33
|
}
|
|
27
34
|
}
|
|
28
35
|
|
|
29
36
|
/**
|
|
30
|
-
* @description Sincronizza hash batches per fault tolerance cross process
|
|
31
|
-
* @param {Map} map - Oggetto dei blocchi hashati validi
|
|
37
|
+
* @description Sincronizza hash batches e versione per fault tolerance cross process.
|
|
38
|
+
* @param {Map} map - Oggetto dei blocchi hashati validi.
|
|
39
|
+
* @param {string} version - Versione attuale dell'applicazione.
|
|
32
40
|
* @returns {Promise<void>}
|
|
33
41
|
*/
|
|
34
|
-
export async function saveBatchState(map) {
|
|
42
|
+
export async function saveBatchState(map, version) {
|
|
35
43
|
await mkdir(STATE_DIR, { recursive: true });
|
|
36
|
-
const
|
|
37
|
-
|
|
44
|
+
const state = {
|
|
45
|
+
version: version,
|
|
46
|
+
updatedAt: new Date().toISOString(),
|
|
47
|
+
hashes: Object.fromEntries(map)
|
|
48
|
+
};
|
|
49
|
+
await atomicWrite(STATE_FILE, JSON.stringify(state, null, 2), { backup: false });
|
|
38
50
|
}
|
|
51
|
+
|
package/src/watcher.js
CHANGED
|
@@ -52,7 +52,7 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
|
|
|
52
52
|
|
|
53
53
|
async function updateSkill() {
|
|
54
54
|
if (previousBatchHashes.size === 0) {
|
|
55
|
-
previousBatchHashes = await loadBatchState();
|
|
55
|
+
previousBatchHashes = await loadBatchState(version);
|
|
56
56
|
}
|
|
57
57
|
const deps = await readPackageDeps();
|
|
58
58
|
if (!deps || deps.length === 0) {
|
|
@@ -193,7 +193,7 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
|
|
|
193
193
|
await removeStaleBlocks(globalPath, activeBlockIds);
|
|
194
194
|
await removeStaleBlocks(workspacePath, activeBlockIds);
|
|
195
195
|
|
|
196
|
-
await saveBatchState(previousBatchHashes);
|
|
196
|
+
await saveBatchState(previousBatchHashes, version);
|
|
197
197
|
|
|
198
198
|
log(LOG_REFRESH, chalk.gray, `cycle done → ${activeBlockIds.size} blocks active, ${updatedCount} updated, ${skippedCount} skipped, ${failedCount} errors`);
|
|
199
199
|
}
|
|
@@ -201,7 +201,7 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
|
|
|
201
201
|
let cycleCount = 0;
|
|
202
202
|
|
|
203
203
|
process.on('SIGINT', async () => {
|
|
204
|
-
await saveBatchState(previousBatchHashes);
|
|
204
|
+
await saveBatchState(previousBatchHashes, version);
|
|
205
205
|
process.exit(0);
|
|
206
206
|
});
|
|
207
207
|
|