@antodevs/groundtruth 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -41,12 +41,14 @@ Current-generation AI coding assistants (Claude Code, Antigravity, Cursor) suffe
41
41
 
42
42
  **GroundTruth** acts as a transparent middleware layer that resolves this by dynamically injecting real-time, stack-specific documentation directly into the agent's context window prior to inference.
43
43
 
44
- ### The v0.2.0 Engine: Jina Reader & Source Registry
44
+ ### The v0.2 Engine: Global Cloud Intelligence
45
+
46
+ GroundTruth v0.2 introduces a paradigm shift in context quality and scalability:
47
+ - **Global Cloud Registry**: Bypasses search engines by querying a high-performance **Cloudflare Worker** registry. It covers the top ~200 frameworks with "Golden List" manual precision and over **10,000+ npm packages** via automated background indexing.
48
+ - **Jina Reader API Integration**: Seamlessly parses dynamic, JavaScript-rendered SPAs (like Vercel AI SDK, Next.js, and Svelte docs) into clean, LLM-optimized Markdown.
49
+ - **Automated "Gentle" Indexer**: A remote bot periodically synchronizes the latest documentation URLs from the npm ecosystem directly to the cloud registry, ensuring your context is never stale.
50
+ - **Zero-Config Resilience**: Operates locally with a strictly enforced 1.5s cloud timeout. If the registry is unreachable, it silently falls back to local Readability extraction or search.
45
51
 
46
- GroundTruth v0.2.0 introduces a massive upgrade to content quality:
47
- - **Jina Reader API Integration**: Parses dynamic, JavaScript-rendered SPAs (like Vercel AI SDK, Next.js, and Svelte docs) into clean, LLM-optimized Markdown.
48
- - **Smart Source Registry**: Automatically bypasses search engines for the top 20+ frameworks (React, Svelte, Vue, Astro, etc.) and fetches their official documentation directly.
49
- - **Readability Fallback**: Ensures reliable extraction even if the primary engine fails.
50
52
 
51
53
  ---
52
54
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@antodevs/groundtruth",
3
- "version": "0.2.4",
3
+ "version": "0.2.6",
4
4
  "description": "Lightweight Node.js proxy to intercept API requests from coding agents and inject fresh web context",
5
5
  "publishConfig": {
6
6
  "access": "public"
@@ -44,10 +44,9 @@
44
44
  "release:minor": "git add . && git commit -m \"chore: auto-commit before release\" || true && npm version minor && git push origin main --tags && npm publish"
45
45
  },
46
46
  "dependencies": {
47
- "@mozilla/readability": "^0.5.0",
47
+ "@mozilla/readability": "^0.6.0",
48
48
  "chalk": "^5.3.0",
49
- "cheerio": "^1.0.0",
50
- "linkedom": "^0.18.12",
51
- "node-fetch": "^3.3.2"
49
+ "cheerio": "^1.2.0",
50
+ "linkedom": "^0.18.12"
52
51
  }
53
52
  }
package/src/registry.js CHANGED
@@ -2,7 +2,6 @@
2
2
  * @module registry
3
3
  * @description Interroga il Cloudflare Worker (Remote Registry) per risolvere URL docs ufficiali.
4
4
  */
5
- import fetch from 'node-fetch';
6
5
 
7
6
  const REGISTRY_API_URL = 'https://groundtruth-registry.antony-flex01.workers.dev/lookup';
8
7
 
@@ -18,7 +17,11 @@ export async function lookupRegistryUrl(depName) {
18
17
  if (!depName) return null;
19
18
 
20
19
  // Normalizzazione preventiva
21
- const name = depName.split(' ')[0].toLowerCase().trim();
20
+ let name = depName.split(' ')[0].toLowerCase().trim();
21
+
22
+ // Alias mapping per framework comuni con scope npm
23
+ if (name === '@sveltejs/kit') name = 'sveltekit';
24
+
22
25
 
23
26
  // Check hit in memoria (ritorna subito)
24
27
  if (lookupCache.has(name)) {
package/src/sanitize.js CHANGED
@@ -19,17 +19,56 @@ const DANGEROUS_PATTERNS = [
19
19
  /HUMAN:\s/gi,
20
20
  ];
21
21
 
22
+ const NOISE_PATTERNS = [
23
+ /Skip to content/gi,
24
+ /Navigation Menu/gi,
25
+ /Toggle navigation/gi,
26
+ /Appearance settings/gi,
27
+ /AI CODE CREATION/gi,
28
+ /GitHub Copilot Write better code with AI/gi,
29
+ /Sign in/gi,
30
+ /Sign up/gi,
31
+ /Notifications/gi,
32
+ /Fork\s+\d+/gi,
33
+ /Star\s+[\d.]+[kK]?/gi,
34
+ /Code/gi,
35
+ /Issues/gi,
36
+ /Pull requests/gi,
37
+ /Actions/gi,
38
+ /Projects/gi,
39
+ /Security/gi,
40
+ /Insights/gi,
41
+ /Why GitHub/gi,
42
+ /Solutions/gi,
43
+ /Resources/gi,
44
+ /Open Source/gi,
45
+ /Enterprises/gi,
46
+ /Startups/gi,
47
+ ];
48
+
22
49
  /**
23
- * @description Filtra pattern pericolosi di prompt injection dal testo web scrappato.
50
+ * @description Filtra pattern pericolosi e rumore di navigazione dal testo web scrappato.
24
51
  * @param {string} text - Testo raw proveniente da web scraping
25
52
  * @param {number} maxLen - Lunghezza massima output (default 8000)
26
53
  * @returns {string} Testo sanitizzato
27
54
  */
28
55
  export function sanitizeWebContent(text, maxLen = 8000) {
29
56
  if (!text || typeof text !== 'string') return '';
57
+
30
58
  let cleaned = text;
59
+
60
+ // 1. Rimuoviamo il rumore di navigazione
61
+ for (const pattern of NOISE_PATTERNS) {
62
+ cleaned = cleaned.replace(pattern, '');
63
+ }
64
+
65
+ // 2. Rimuoviamo pattern pericolosi
31
66
  for (const p of DANGEROUS_PATTERNS) {
32
67
  cleaned = cleaned.replace(p, '[FILTERED]');
33
68
  }
69
+
70
+ // 3. Normalizzazione spazi bianchi per risparmiare token
71
+ cleaned = cleaned.replace(/\s+/g, ' ').trim();
72
+
34
73
  return cleaned.slice(0, maxLen);
35
74
  }
package/src/search.js CHANGED
@@ -2,7 +2,6 @@
2
2
  * @module search
3
3
  * @description Logica di scraping web: Jina Reader → fallback Readability, registry bypass, DDG search.
4
4
  */
5
- import fetch from 'node-fetch';
6
5
  import * as cheerio from 'cheerio';
7
6
  import { Readability } from '@mozilla/readability';
8
7
  import { DOMParser } from 'linkedom';
@@ -59,8 +58,7 @@ export async function fetchPageContent(url, userAgent, opts = {}) {
59
58
  try {
60
59
  const pageRes = await fetch(url, {
61
60
  signal: AbortSignal.timeout(5000),
62
- headers: { 'User-Agent': userAgent },
63
- agent: url.startsWith('https:') ? httpsAgent : httpAgent
61
+ headers: { 'User-Agent': userAgent }
64
62
  });
65
63
  if (pageRes.ok) {
66
64
  const document = new DOMParser().parseFromString(await pageRes.text(), 'text/html');
@@ -142,7 +140,7 @@ async function doSearch(query, resultsLimit = 3) {
142
140
  const userAgent = getRandomUA();
143
141
  const searchRes = await fetch(
144
142
  `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`,
145
- { signal: AbortSignal.timeout(5000), headers: { 'User-Agent': userAgent }, agent: httpsAgent }
143
+ { signal: AbortSignal.timeout(5000), headers: { 'User-Agent': userAgent } }
146
144
  );
147
145
  if (!searchRes.ok) throw new Error(`DDG ${searchRes.status}`);
148
146
 
package/src/state.js CHANGED
@@ -13,26 +13,39 @@ const STATE_FILE = path.join(STATE_DIR, 'watcher-state.json');
13
13
 
14
14
  /**
15
15
  * @description Carica gli hash validati e memorizzati dallo schedule storage locale.
16
- * @returns {Promise<Map>} Restituisce le hash map entries persistite del cron logic stream precedente.
16
+ * @param {string} currentVersion - Versione attuale dell'applicazione per validare la cache.
17
+ * @returns {Promise<Map>} Restituisce le hash map entries persistite o una mappa vuota se la versione differisce.
17
18
  */
18
- export async function loadBatchState() {
19
+ export async function loadBatchState(currentVersion) {
19
20
  try {
20
21
  if (!existsSync(STATE_FILE)) return new Map();
21
22
  const data = await readFile(STATE_FILE, 'utf8');
22
- const parsed = JSON.parse(data);
23
- return new Map(Object.entries(parsed));
23
+ const state = JSON.parse(data);
24
+
25
+ // Invalida la cache se la versione è differente (forza refresh dopo update)
26
+ if (state.version !== currentVersion) {
27
+ return new Map();
28
+ }
29
+
30
+ return new Map(Object.entries(state.hashes || {}));
24
31
  } catch {
25
32
  return new Map();
26
33
  }
27
34
  }
28
35
 
29
36
  /**
30
- * @description Sincronizza hash batches per fault tolerance cross process
31
- * @param {Map} map - Oggetto dei blocchi hashati validi in mem persist state map
37
+ * @description Sincronizza hash batches e versione per fault tolerance cross process.
38
+ * @param {Map} map - Oggetto dei blocchi hashati validi.
39
+ * @param {string} version - Versione attuale dell'applicazione.
32
40
  * @returns {Promise<void>}
33
41
  */
34
- export async function saveBatchState(map) {
42
+ export async function saveBatchState(map, version) {
35
43
  await mkdir(STATE_DIR, { recursive: true });
36
- const obj = Object.fromEntries(map);
37
- await atomicWrite(STATE_FILE, JSON.stringify(obj, null, 2), { backup: false });
44
+ const state = {
45
+ version: version,
46
+ updatedAt: new Date().toISOString(),
47
+ hashes: Object.fromEntries(map)
48
+ };
49
+ await atomicWrite(STATE_FILE, JSON.stringify(state, null, 2), { backup: false });
38
50
  }
51
+
package/src/watcher.js CHANGED
@@ -52,7 +52,7 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
52
52
 
53
53
  async function updateSkill() {
54
54
  if (previousBatchHashes.size === 0) {
55
- previousBatchHashes = await loadBatchState();
55
+ previousBatchHashes = await loadBatchState(version);
56
56
  }
57
57
  const deps = await readPackageDeps();
58
58
  if (!deps || deps.length === 0) {
@@ -193,7 +193,7 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
193
193
  await removeStaleBlocks(globalPath, activeBlockIds);
194
194
  await removeStaleBlocks(workspacePath, activeBlockIds);
195
195
 
196
- await saveBatchState(previousBatchHashes);
196
+ await saveBatchState(previousBatchHashes, version);
197
197
 
198
198
  log(LOG_REFRESH, chalk.gray, `cycle done → ${activeBlockIds.size} blocks active, ${updatedCount} updated, ${skippedCount} skipped, ${failedCount} errors`);
199
199
  }
@@ -201,7 +201,7 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
201
201
  let cycleCount = 0;
202
202
 
203
203
  process.on('SIGINT', async () => {
204
- await saveBatchState(previousBatchHashes);
204
+ await saveBatchState(previousBatchHashes, version);
205
205
  process.exit(0);
206
206
  });
207
207