@antodevs/groundtruth 0.1.4 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -51
- package/index.js +7 -3
- package/package.json +1 -1
- package/specification.yaml +1 -1
- package/src/circuit-breaker.js +9 -3
- package/src/cli.js +41 -4
- package/src/config.js +67 -0
- package/src/env.js +40 -0
- package/src/inject.js +1 -0
- package/src/packages.js +3 -1
- package/src/proxy.js +10 -3
- package/src/registry.js +62 -0
- package/src/sanitize.js +35 -0
- package/src/search.js +112 -62
- package/src/state.js +3 -2
- package/src/watcher.js +90 -20
- package/assets/banner.png +0 -0
package/README.md
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-

|
|
2
|
-
|
|
3
1
|
# GroundTruth
|
|
4
2
|
|
|
5
3
|
> Zero-configuration context injection layer for LLM-based coding agents.
|
|
@@ -43,6 +41,13 @@ Current-generation AI coding assistants (Claude Code, Antigravity, Cursor) suffe
|
|
|
43
41
|
|
|
44
42
|
**GroundTruth** acts as a transparent middleware layer that resolves this by dynamically injecting real-time, stack-specific documentation directly into the agent's context window prior to inference.
|
|
45
43
|
|
|
44
|
+
### The v0.2.0 Engine: Jina Reader & Source Registry
|
|
45
|
+
|
|
46
|
+
GroundTruth v0.2.0 introduces a massive upgrade to content quality:
|
|
47
|
+
- **Jina Reader API Integration**: Parses dynamic, JavaScript-rendered SPAs (like Vercel AI SDK, Next.js, and Svelte docs) into clean, LLM-optimized Markdown.
|
|
48
|
+
- **Smart Source Registry**: Automatically bypasses search engines for the top 20+ frameworks (React, Svelte, Vue, Astro, etc.) and fetches their official documentation directly.
|
|
49
|
+
- **Readability Fallback**: Ensures reliable extraction even if the primary engine fails.
|
|
50
|
+
|
|
46
51
|
---
|
|
47
52
|
|
|
48
53
|
## Architecture & Operational Mechanics
|
|
@@ -56,22 +61,18 @@ In this mode, GroundTruth provisions a local HTTP proxy that intercepts outbound
|
|
|
56
61
|
```mermaid
|
|
57
62
|
sequenceDiagram
|
|
58
63
|
participant Agent as Claude Code
|
|
59
|
-
participant Proxy as GroundTruth
|
|
60
|
-
participant
|
|
64
|
+
participant Proxy as GroundTruth
|
|
65
|
+
participant Jina as Jina Reader API
|
|
61
66
|
participant API as Anthropic API
|
|
62
67
|
|
|
63
|
-
Agent->>Proxy: Send Prompt
|
|
64
|
-
Proxy->>
|
|
65
|
-
|
|
68
|
+
Agent->>Proxy: Send Prompt
|
|
69
|
+
Proxy->>Jina: Fetch docs (Direct Registry / DDG)
|
|
70
|
+
Jina-->>Proxy: Return clean Markdown
|
|
66
71
|
Note over Proxy: Injects live context<br/>into System Prompt
|
|
67
72
|
Proxy->>API: Forward mutated request
|
|
68
|
-
API-->>Agent: Return
|
|
73
|
+
API-->>Agent: Return response
|
|
69
74
|
```
|
|
70
75
|
|
|
71
|
-
- **Query Extraction**: Parses the user prompt to identify context dependencies.
|
|
72
|
-
- **Data Hydration**: Orchestrates an automated DuckDuckGo search to fetch the most recent documentation. It relies on a deterministic `LRUCache`, TCP keep-alive Pool configurations, and a 429-aware `CircuitBreaker` pattern to safeguard network operations safely.
|
|
73
|
-
- **Payload Mutation**: Mutates the outgoing system prompt to inject the scraped live context before forwarding the request to the Anthropic completion endpoint. (It includes type-guard structures making it safe from undocumented Gemini system changes).
|
|
74
|
-
|
|
75
76
|
### 2. File Watcher Mode (Designed for `antigravity` / `gemini`)
|
|
76
77
|
|
|
77
78
|
For agents that support side-channel context ingestion via dotfiles (like Antigravity Rules), GroundTruth runs as a background daemon.
|
|
@@ -79,42 +80,33 @@ For agents that support side-channel context ingestion via dotfiles (like Antigr
|
|
|
79
80
|
```mermaid
|
|
80
81
|
flowchart TD
|
|
81
82
|
pkg([package.json]) -->|Parse Dependencies| GT{GroundTruth Watcher}
|
|
82
|
-
GT -->|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
classDef core fill:#3B82F6,stroke:#fff,stroke-width:2px,color:#fff;
|
|
89
|
-
class GT,Agent core;
|
|
83
|
+
GT -->|Smart Routing| Map{Registry?}
|
|
84
|
+
Map -->|Yes| Jina[Jina Reader API]
|
|
85
|
+
Map -->|No| DDG[DuckDuckGo Search] --> Jina
|
|
86
|
+
Jina -->|Clean Markdown| Gen[Write to ~/.gemini/GEMINI.md]
|
|
87
|
+
Gen --> Agent(Coding Assistant)
|
|
90
88
|
```
|
|
91
89
|
|
|
92
|
-
- **Stack Introspection**: Analyzes the local `package.json` to infer the project's dependency graph.
|
|
93
|
-
- **Intelligent Chunking**: Groups the filtered dependencies in configurable size batches (default 3) and uniquely hashes them to avoid redundant context-fetching loops unless changes are detected.
|
|
94
|
-
- **Automated Polling**: Periodically fetches updated documentation for the detected stack chunks in parallel.
|
|
95
|
-
- **State Persistence**: Hashes are serialized persistently avoiding redundant DuckDuckGo scraping operations across application crashes.
|
|
96
|
-
- **Block-Based Synchronization**: Writes the parsed context discretely into hash-oriented blocks inside `~/.gemini/GEMINI.md`. Native POSIX bindings and intra-device temporary files are leveraged ensuring `Atomic Writes` without EXDEV link errors. Stale contexts are efficiently garbage-collected via regex matching over tracked batch hashes.
|
|
97
|
-
|
|
98
90
|
---
|
|
99
91
|
|
|
100
|
-
|
|
101
|
-
```bash
|
|
102
|
-
# Initialize GroundTruth in proxy mode (auto-exports ANTHROPIC_BASE_URL)
|
|
103
|
-
npx @antodevs/groundtruth --claude-code
|
|
104
|
-
|
|
105
|
-
# Execute your agent in a separate TTY
|
|
106
|
-
claude
|
|
107
|
-
```
|
|
108
|
-
> **Note:** The daemon automatically mutates your shell environment (`~/.zshrc`, `~/.bashrc`, `~/.bash_profile`, `~/.config/fish/config.fish`) to route traffic through the localhost proxy.
|
|
92
|
+
## Configuration (`.groundtruth.json`)
|
|
109
93
|
|
|
110
|
-
|
|
111
|
-
```bash
|
|
112
|
-
cd /workspace/your-project
|
|
94
|
+
You can globally or locally configure GroundTruth by creating a `.groundtruth.json` file in your directory:
|
|
113
95
|
|
|
114
|
-
|
|
115
|
-
|
|
96
|
+
```json
|
|
97
|
+
{
|
|
98
|
+
"maxTokens": 4000,
|
|
99
|
+
"quality": "high",
|
|
100
|
+
"verbose": true,
|
|
101
|
+
"sources": [
|
|
102
|
+
{ "url": "https://svelte.dev/docs/kit/introduction", "label": "SvelteKit Docs" }
|
|
103
|
+
]
|
|
104
|
+
}
|
|
116
105
|
```
|
|
117
|
-
|
|
106
|
+
|
|
107
|
+
- **`maxTokens`**: The maximum length of characters injected for a single page.
|
|
108
|
+
- **`quality`**: `low`, `medium`, or `high`. Controls how many search results to retrieve and the timeout budget.
|
|
109
|
+
- **`sources`**: Useful for custom, internal, or highly specific documentation that GroundTruth should always inject.
|
|
118
110
|
|
|
119
111
|
---
|
|
120
112
|
|
|
@@ -124,24 +116,27 @@ npx @antodevs/groundtruth --antigravity
|
|
|
124
116
|
|------|------|-------------|
|
|
125
117
|
| `--claude-code` | Proxy | Initializes HTTP interceptor for Anthropic API payloads. |
|
|
126
118
|
| `--antigravity` | Rules | Initializes background daemon for dotfile synchronization. |
|
|
127
|
-
| `--
|
|
119
|
+
| `--uninstall` | Cleanup | Removes `ANTHROPIC_BASE_URL` from all shell config files. |
|
|
128
120
|
| `--port <n>` | Proxy | Overrides default proxy listener port (Default: `8080`). |
|
|
121
|
+
| `--quality <level>`| Both | `low`, `medium`, or `high` quality preset (Default: `medium`). |
|
|
122
|
+
| `--max-tokens <n>` | Both | Modifies the character limit per injected context block (Default: `4000`). |
|
|
129
123
|
| `--interval <n>` | Rules | Overrides the polling interval for documentation refresh in minutes (Default: `5`). |
|
|
130
|
-
| `--batch-size <n>` | Rules | Changes the amount of dependencies per query chunk for block fetching
|
|
124
|
+
| `--batch-size <n>` | Rules | Changes the amount of dependencies per query chunk for block fetching. |
|
|
125
|
+
| `--verbose` | Both | Enables verbose logging output. |
|
|
131
126
|
|
|
132
127
|
---
|
|
133
128
|
|
|
134
129
|
## Benchmark & Comparison
|
|
135
130
|
|
|
136
|
-
GroundTruth is
|
|
131
|
+
GroundTruth is optimized for zero-configuration deployments and minimal token overhead compared to existing MCP solutions.
|
|
137
132
|
|
|
138
|
-
| Feature | GroundTruth |
|
|
139
|
-
|
|
140
|
-
| **
|
|
141
|
-
| **
|
|
142
|
-
| **
|
|
143
|
-
| **
|
|
144
|
-
| **
|
|
133
|
+
| Feature | GroundTruth | Jina Reader (Direct) | Crawl4AI / Playwright | Firecrawl |
|
|
134
|
+
|---------|-------------|----------------------|-----------------------|-----------|
|
|
135
|
+
| **Setup Required** | None (1 command) | Scripting needed | High (Docker/Deps) | High (API Key) |
|
|
136
|
+
| **JS Rendering** | ✅ Yes (via Jina) | ✅ Yes | ✅ Yes | ✅ Yes |
|
|
137
|
+
| **Agent Injection** | ✅ Auto (Proxy/File) | ❌ Manual integration | ❌ Manual integration | ❌ Manual integration |
|
|
138
|
+
| **Cost** | Free | Rate limits apply | Free | Paid |
|
|
139
|
+
| **Runtime Footprint** | < 1MB | N/A | ~200MB | N/A |
|
|
145
140
|
|
|
146
141
|
---
|
|
147
142
|
|
package/index.js
CHANGED
|
@@ -4,14 +4,18 @@
|
|
|
4
4
|
* @description Entry point runtime groundtruth delegazione CLI o proxy flow logic.
|
|
5
5
|
*/
|
|
6
6
|
import { chalk, label } from './src/logger.js';
|
|
7
|
-
import { usePackageJson, antigravityMode, claudeCodeMode, port, intervalMinutes, batchSize, version } from './src/cli.js';
|
|
7
|
+
import { usePackageJson, antigravityMode, claudeCodeMode, uninstallMode, port, intervalMinutes, batchSize, version } from './src/cli.js';
|
|
8
8
|
import { createServer } from './src/proxy.js';
|
|
9
|
-
import { autoSetEnv } from './src/env.js';
|
|
9
|
+
import { autoSetEnv, removeEnv } from './src/env.js';
|
|
10
10
|
import { startWatcher } from './src/watcher.js';
|
|
11
11
|
|
|
12
12
|
// ─── Dispatcher start app logic ──────────────────────
|
|
13
13
|
|
|
14
|
-
if (
|
|
14
|
+
if (uninstallMode) {
|
|
15
|
+
console.log(`\n ${chalk.white.bold('GroundTruth')} ${chalk.gray(`v${version}`)} ${chalk.gray('[uninstall]')}\n`);
|
|
16
|
+
await removeEnv();
|
|
17
|
+
process.exit(0);
|
|
18
|
+
} else if (antigravityMode) {
|
|
15
19
|
startWatcher({ intervalMinutes, usePackageJson, batchSize });
|
|
16
20
|
} else if (claudeCodeMode) {
|
|
17
21
|
const server = await createServer(usePackageJson);
|
package/package.json
CHANGED
package/specification.yaml
CHANGED
package/src/circuit-breaker.js
CHANGED
|
@@ -35,7 +35,7 @@ export class CircuitBreaker {
|
|
|
35
35
|
this.onSuccess();
|
|
36
36
|
return result;
|
|
37
37
|
} catch (err) {
|
|
38
|
-
this.onFailure();
|
|
38
|
+
this.onFailure(err);
|
|
39
39
|
throw err;
|
|
40
40
|
}
|
|
41
41
|
}
|
|
@@ -53,9 +53,15 @@ export class CircuitBreaker {
|
|
|
53
53
|
}
|
|
54
54
|
}
|
|
55
55
|
|
|
56
|
-
onFailure() {
|
|
57
|
-
|
|
56
|
+
onFailure(err) {
|
|
57
|
+
// 429 rate limit apre il circuito immediatamente
|
|
58
|
+
if (err?.message?.includes('429')) {
|
|
59
|
+
this.failures = this.failureThreshold;
|
|
60
|
+
} else {
|
|
61
|
+
this.failures++;
|
|
62
|
+
}
|
|
58
63
|
this.lastFailureTime = Date.now();
|
|
64
|
+
this.halfOpenSuccesses = 0; // reset per evitare accumulo tra cicli HALF_OPEN
|
|
59
65
|
if (this.failures >= this.failureThreshold) {
|
|
60
66
|
this.state = 'OPEN';
|
|
61
67
|
}
|
package/src/cli.js
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
*/
|
|
5
5
|
import { chalk } from './logger.js';
|
|
6
6
|
import { createRequire } from 'module';
|
|
7
|
+
import { loadConfig, resolveQuality } from './config.js';
|
|
7
8
|
|
|
8
9
|
const { version } = createRequire(import.meta.url)('../package.json');
|
|
9
10
|
|
|
@@ -13,21 +14,29 @@ const args = process.argv.slice(2);
|
|
|
13
14
|
const usePackageJson = args.includes('--use-package-json');
|
|
14
15
|
const antigravityMode = args.includes('--antigravity');
|
|
15
16
|
const claudeCodeMode = args.includes('--claude-code');
|
|
17
|
+
const uninstallMode = args.includes('--uninstall');
|
|
16
18
|
|
|
17
19
|
// Stop immediato se nessun mode definito
|
|
18
|
-
if (!antigravityMode && !claudeCodeMode) {
|
|
20
|
+
if (!antigravityMode && !claudeCodeMode && !uninstallMode) {
|
|
19
21
|
console.log();
|
|
20
22
|
console.log(` ${chalk.white.bold('GroundTruth')} ${chalk.gray(`v${version}`)}`);
|
|
21
23
|
console.log();
|
|
22
24
|
console.log(` Usage:`);
|
|
23
25
|
console.log(` groundtruth --claude-code proxy mode (Claude Code)`);
|
|
24
26
|
console.log(` groundtruth --antigravity rules mode (Antigravity/Gemini)`);
|
|
27
|
+
console.log(` groundtruth --uninstall remove shell env config`);
|
|
25
28
|
console.log();
|
|
26
29
|
console.log(` Options:`);
|
|
27
30
|
console.log(` --use-package-json use package.json as search query`);
|
|
28
31
|
console.log(` --port <n> custom port, default 8080 (claude-code only)`);
|
|
29
32
|
console.log(` --interval <n> refresh in minutes, default 5 (antigravity only)`);
|
|
30
33
|
console.log(` --batch-size <n> deps per search batch (default: 3)`);
|
|
34
|
+
console.log(` --max-tokens <n> max tokens per context block (default: 4000)`);
|
|
35
|
+
console.log(` --quality <level> low | medium | high (default: medium)`);
|
|
36
|
+
console.log(` --verbose enable detailed extraction logging`);
|
|
37
|
+
console.log();
|
|
38
|
+
console.log(` Config:`);
|
|
39
|
+
console.log(` Place a .groundtruth.json in your project root for persistent settings.`);
|
|
31
40
|
console.log();
|
|
32
41
|
console.log(` Docs:`);
|
|
33
42
|
console.log(` Claude Code → export ANTHROPIC_BASE_URL=http://localhost:8080`);
|
|
@@ -38,13 +47,13 @@ if (!antigravityMode && !claudeCodeMode) {
|
|
|
38
47
|
|
|
39
48
|
// ─── Default params override ─────────────────────────
|
|
40
49
|
|
|
41
|
-
let port = 8080;
|
|
50
|
+
let port = 8080;
|
|
42
51
|
const portArgIndex = args.indexOf('--port');
|
|
43
52
|
if (portArgIndex !== -1 && args[portArgIndex + 1]) {
|
|
44
53
|
port = parseInt(args[portArgIndex + 1], 10);
|
|
45
54
|
}
|
|
46
55
|
|
|
47
|
-
let intervalMinutes = 5;
|
|
56
|
+
let intervalMinutes = 5;
|
|
48
57
|
const intervalArgIndex = args.indexOf('--interval');
|
|
49
58
|
if (intervalArgIndex !== -1 && args[intervalArgIndex + 1]) {
|
|
50
59
|
intervalMinutes = parseInt(args[intervalArgIndex + 1], 10) || 5;
|
|
@@ -55,4 +64,32 @@ const batchSize = batchSizeIndex !== -1
|
|
|
55
64
|
? Math.max(2, Math.min(parseInt(args[batchSizeIndex + 1]) || 3, 5))
|
|
56
65
|
: 3;
|
|
57
66
|
|
|
58
|
-
|
|
67
|
+
// ─── New v1.2 flags ──────────────────────────────────
|
|
68
|
+
|
|
69
|
+
const maxTokensIndex = args.indexOf('--max-tokens');
|
|
70
|
+
const cliMaxTokens = maxTokensIndex !== -1
|
|
71
|
+
? Math.max(500, Math.min(parseInt(args[maxTokensIndex + 1]) || 4000, 8000))
|
|
72
|
+
: null;
|
|
73
|
+
|
|
74
|
+
const qualityIndex = args.indexOf('--quality');
|
|
75
|
+
const cliQuality = qualityIndex !== -1 && ['low', 'medium', 'high'].includes(args[qualityIndex + 1])
|
|
76
|
+
? args[qualityIndex + 1]
|
|
77
|
+
: null;
|
|
78
|
+
|
|
79
|
+
const cliVerbose = args.includes('--verbose');
|
|
80
|
+
|
|
81
|
+
// ─── Merge CLI + .groundtruth.json ───────────────────
|
|
82
|
+
|
|
83
|
+
const fileConfig = await loadConfig();
|
|
84
|
+
|
|
85
|
+
const maxTokens = cliMaxTokens ?? fileConfig.maxTokens;
|
|
86
|
+
const quality = cliQuality ?? fileConfig.quality;
|
|
87
|
+
const verbose = cliVerbose || fileConfig.verbose;
|
|
88
|
+
const qualitySettings = resolveQuality(quality);
|
|
89
|
+
const customSources = fileConfig.sources;
|
|
90
|
+
|
|
91
|
+
export {
|
|
92
|
+
args, usePackageJson, antigravityMode, claudeCodeMode, uninstallMode,
|
|
93
|
+
port, intervalMinutes, batchSize, version,
|
|
94
|
+
maxTokens, quality, qualitySettings, verbose, customSources
|
|
95
|
+
};
|
package/src/config.js
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module config
|
|
3
|
+
* @description Carica configurazione opzionale da .groundtruth.json nella cwd.
|
|
4
|
+
*/
|
|
5
|
+
import { readFile } from 'fs/promises';
|
|
6
|
+
import { existsSync } from 'fs';
|
|
7
|
+
import path from 'path';
|
|
8
|
+
|
|
9
|
+
// ─── Quality Presets ─────────────────────────────────
|
|
10
|
+
|
|
11
|
+
const QUALITY_PRESETS = {
|
|
12
|
+
low: { ddgResults: 1, charsPerPage: 2000, jinaTimeout: 5000 },
|
|
13
|
+
medium: { ddgResults: 3, charsPerPage: 4000, jinaTimeout: 8000 },
|
|
14
|
+
high: { ddgResults: 5, charsPerPage: 8000, jinaTimeout: 12000 },
|
|
15
|
+
};
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* @description Risolve preset quality da stringa a parametri operativi.
|
|
19
|
+
* @param {string} level - "low" | "medium" | "high"
|
|
20
|
+
* @returns {Object} { ddgResults, charsPerPage, jinaTimeout }
|
|
21
|
+
*/
|
|
22
|
+
export function resolveQuality(level) {
|
|
23
|
+
return QUALITY_PRESETS[level] || QUALITY_PRESETS.medium;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
// ─── Config Defaults ─────────────────────────────────
|
|
27
|
+
|
|
28
|
+
const DEFAULTS = {
|
|
29
|
+
maxTokens: 4000,
|
|
30
|
+
quality: 'medium',
|
|
31
|
+
verbose: false,
|
|
32
|
+
sources: [],
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* @description Carica .groundtruth.json dalla cwd, merge con defaults.
|
|
37
|
+
* @returns {Promise<Object>} Configurazione finale mergiata
|
|
38
|
+
*/
|
|
39
|
+
export async function loadConfig() {
|
|
40
|
+
const configPath = path.resolve(process.cwd(), '.groundtruth.json');
|
|
41
|
+
if (!existsSync(configPath)) return { ...DEFAULTS };
|
|
42
|
+
|
|
43
|
+
try {
|
|
44
|
+
const raw = await readFile(configPath, 'utf8');
|
|
45
|
+
const parsed = JSON.parse(raw);
|
|
46
|
+
|
|
47
|
+
return {
|
|
48
|
+
maxTokens: clamp(parsed.maxTokens ?? DEFAULTS.maxTokens, 500, 8000),
|
|
49
|
+
quality: ['low', 'medium', 'high'].includes(parsed.quality) ? parsed.quality : DEFAULTS.quality,
|
|
50
|
+
verbose: typeof parsed.verbose === 'boolean' ? parsed.verbose : DEFAULTS.verbose,
|
|
51
|
+
sources: Array.isArray(parsed.sources) ? parsed.sources.filter(s => s && s.url) : DEFAULTS.sources,
|
|
52
|
+
};
|
|
53
|
+
} catch {
|
|
54
|
+
return { ...DEFAULTS };
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* @description Clamp numerico con min/max bounds.
|
|
60
|
+
*/
|
|
61
|
+
function clamp(val, min, max) {
|
|
62
|
+
const n = parseInt(val, 10);
|
|
63
|
+
if (isNaN(n)) return min;
|
|
64
|
+
return Math.max(min, Math.min(n, max));
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export { QUALITY_PRESETS };
|
package/src/env.js
CHANGED
|
@@ -118,3 +118,43 @@ export async function autoSetEnv(p) {
|
|
|
118
118
|
log(LOG_WARN, chalk.yellow, chalk.white('env setup error') + ` → ${chalk.yellow(err.message)}`);
|
|
119
119
|
}
|
|
120
120
|
}
|
|
121
|
+
|
|
122
|
+
/**
|
|
123
|
+
* @description Rimuove ANTHROPIC_BASE_URL da tutti i file di configurazione shell.
|
|
124
|
+
* @returns {Promise<void>}
|
|
125
|
+
*/
|
|
126
|
+
export async function removeEnv() {
|
|
127
|
+
const homeDir = os.homedir();
|
|
128
|
+
const targets = [
|
|
129
|
+
{ file: path.join(homeDir, '.zshrc'), pattern: /^export ANTHROPIC_BASE_URL=.*\n?/gm },
|
|
130
|
+
{ file: path.join(homeDir, '.bashrc'), pattern: /^export ANTHROPIC_BASE_URL=.*\n?/gm },
|
|
131
|
+
{ file: path.join(homeDir, '.bash_profile'), pattern: /^export ANTHROPIC_BASE_URL=.*\n?/gm },
|
|
132
|
+
{ file: path.join(homeDir, '.profile'), pattern: /^export ANTHROPIC_BASE_URL=.*\n?/gm },
|
|
133
|
+
{ file: path.join(homeDir, '.config', 'fish', 'config.fish'), pattern: /^set -gx ANTHROPIC_BASE_URL .*\n?/gm },
|
|
134
|
+
];
|
|
135
|
+
|
|
136
|
+
let cleaned = 0;
|
|
137
|
+
for (const t of targets) {
|
|
138
|
+
if (!existsSync(t.file)) continue;
|
|
139
|
+
try {
|
|
140
|
+
const content = await fs.readFile(t.file, 'utf8');
|
|
141
|
+
const result = content.replace(t.pattern, '').replace(/\n{3,}/g, '\n\n');
|
|
142
|
+
if (result !== content) {
|
|
143
|
+
await atomicWrite(t.file, result);
|
|
144
|
+
const rel = t.file.replace(homeDir, '~');
|
|
145
|
+
log(LOG_OK, chalk.green, chalk.white('removed ANTHROPIC_BASE_URL from') + ' ' + chalk.white(rel));
|
|
146
|
+
cleaned++;
|
|
147
|
+
}
|
|
148
|
+
} catch (e) {
|
|
149
|
+
log(LOG_WARN, chalk.yellow, chalk.white(`cannot clean ${path.basename(t.file)}`) + ` → ${chalk.yellow(e.message)}`);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
if (cleaned === 0) {
|
|
154
|
+
log(LOG_WARN, chalk.yellow, chalk.white('nothing to clean') + ` → ${chalk.yellow('no ANTHROPIC_BASE_URL found in shell configs')}`);
|
|
155
|
+
} else {
|
|
156
|
+
log(LOG_OK, chalk.green, chalk.white(`cleaned ${cleaned} file(s)`));
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
delete process.env.ANTHROPIC_BASE_URL;
|
|
160
|
+
}
|
package/src/inject.js
CHANGED
|
@@ -33,6 +33,7 @@ export async function injectBlock(filePath, content, blockId) {
|
|
|
33
33
|
if (startIndex !== -1 && endIndex !== -1 && endIndex > startIndex) {
|
|
34
34
|
fileContent = fileContent.slice(0, startIndex) + block + fileContent.slice(endIndex + endTag.length);
|
|
35
35
|
} else {
|
|
36
|
+
fileContent = fileContent.trimEnd() + '\n\n' + block + '\n';
|
|
36
37
|
}
|
|
37
38
|
|
|
38
39
|
await atomicWrite(filePath, fileContent);
|
package/src/packages.js
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
import fs from 'fs/promises';
|
|
6
6
|
import path from 'path';
|
|
7
7
|
import { createHash } from 'crypto';
|
|
8
|
+
import { chalk, log, LOG_WARN } from './logger.js';
|
|
8
9
|
|
|
9
10
|
// ─── Logica Dipendenze ───────────────────────────────
|
|
10
11
|
|
|
@@ -41,7 +42,8 @@ export async function readPackageDeps() {
|
|
|
41
42
|
selected = selected.concat(filterAndFormat(pkg.devDependencies));
|
|
42
43
|
|
|
43
44
|
return selected.length > 0 ? selected : null;
|
|
44
|
-
} catch (
|
|
45
|
+
} catch (err) {
|
|
46
|
+
log(LOG_WARN, chalk.yellow, chalk.white('package.json parse error') + ` → ${chalk.yellow(err.message)}`);
|
|
45
47
|
return null;
|
|
46
48
|
}
|
|
47
49
|
}
|
package/src/proxy.js
CHANGED
|
@@ -8,6 +8,8 @@ import { webSearch } from './search.js';
|
|
|
8
8
|
import { readPackageDeps, buildQuery } from './packages.js';
|
|
9
9
|
import { chalk, log, LOG_WARN, LOG_BOLT } from './logger.js';
|
|
10
10
|
import { httpsAgent } from './http-agent.js';
|
|
11
|
+
import { sanitizeWebContent } from './sanitize.js';
|
|
12
|
+
import { maxTokens, qualitySettings, verbose } from './cli.js';
|
|
11
13
|
|
|
12
14
|
// ─── HTTP Node server daemon ─────────────────────────
|
|
13
15
|
|
|
@@ -93,14 +95,19 @@ export async function createServer(usePackageJson) {
|
|
|
93
95
|
try {
|
|
94
96
|
if (!query || query.trim() === String(new Date().getFullYear())) throw new Error('Empty query');
|
|
95
97
|
// parallel load in proxy app process to boost response load
|
|
96
|
-
const { results, pageText } = await webSearch(query, true
|
|
98
|
+
const { results, pageText } = await webSearch(query, true, {
|
|
99
|
+
ddgResults: qualitySettings.ddgResults,
|
|
100
|
+
maxLen: qualitySettings.charsPerPage,
|
|
101
|
+
jinaTimeout: qualitySettings.jinaTimeout,
|
|
102
|
+
verbose,
|
|
103
|
+
});
|
|
97
104
|
resultsCount = results.length;
|
|
98
105
|
|
|
99
106
|
contextBlock = `\n\n--- WEB CONTEXT (live, ${new Date().toISOString()}) ---\n`;
|
|
100
107
|
results.forEach((r, i) => {
|
|
101
|
-
contextBlock += `${i + 1}. ${r.title}: ${r.snippet} (${r.url})\n`;
|
|
108
|
+
contextBlock += `${i + 1}. ${r.title}: ${sanitizeWebContent(r.snippet, 500)} (${r.url})\n`;
|
|
102
109
|
});
|
|
103
|
-
if (pageText) contextBlock += `\nFULL TEXT:\n${pageText}\n`;
|
|
110
|
+
if (pageText) contextBlock += `\nFULL TEXT:\n${sanitizeWebContent(pageText, maxTokens)}\n`;
|
|
104
111
|
contextBlock += `--- END WEB CONTEXT ---\n`;
|
|
105
112
|
didInject = true;
|
|
106
113
|
} catch (_) {
|
package/src/registry.js
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module registry
|
|
3
|
+
* @description Mappa hardcodata dipendenza → URL docs ufficiale per bypass DDG su framework noti.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
// ─── Docs URL Registry ──────────────────────────────
|
|
7
|
+
|
|
8
|
+
const DOCS_REGISTRY = {
|
|
9
|
+
'svelte': 'https://svelte.dev/docs/svelte/overview',
|
|
10
|
+
'sveltekit': 'https://svelte.dev/docs/kit/introduction',
|
|
11
|
+
'react': 'https://react.dev/reference/react',
|
|
12
|
+
'react-dom': 'https://react.dev/reference/react-dom',
|
|
13
|
+
'next': 'https://nextjs.org/docs',
|
|
14
|
+
'nextjs': 'https://nextjs.org/docs',
|
|
15
|
+
'vue': 'https://vuejs.org/api/',
|
|
16
|
+
'nuxt': 'https://nuxt.com/docs/api',
|
|
17
|
+
'angular': 'https://angular.dev/overview',
|
|
18
|
+
'astro': 'https://docs.astro.build/en/reference/configuration-reference/',
|
|
19
|
+
'tailwindcss': 'https://tailwindcss.com/docs',
|
|
20
|
+
'typescript': 'https://www.typescriptlang.org/docs/',
|
|
21
|
+
'express': 'https://expressjs.com/en/5x/api.html',
|
|
22
|
+
'fastify': 'https://fastify.dev/docs/latest/',
|
|
23
|
+
'hono': 'https://hono.dev/docs/',
|
|
24
|
+
'solid-js': 'https://docs.solidjs.com/',
|
|
25
|
+
'qwik': 'https://qwik.dev/docs/',
|
|
26
|
+
'remix': 'https://remix.run/docs/en/main',
|
|
27
|
+
'prisma': 'https://www.prisma.io/docs',
|
|
28
|
+
'drizzle-orm': 'https://orm.drizzle.team/docs/overview',
|
|
29
|
+
'three': 'https://threejs.org/docs/',
|
|
30
|
+
'zod': 'https://zod.dev/',
|
|
31
|
+
'trpc': 'https://trpc.io/docs',
|
|
32
|
+
'tanstack-query': 'https://tanstack.com/query/latest/docs/overview',
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* @description Normalizza nome dipendenza e cerca URL docs nel registry.
|
|
37
|
+
* @param {string} depName - Nome dipendenza da package.json (es. "svelte 5.51" o "@sveltejs/kit")
|
|
38
|
+
* @returns {string|null} URL docs diretto o null se non trovato
|
|
39
|
+
*/
|
|
40
|
+
export function lookupRegistryUrl(depName) {
|
|
41
|
+
// Prende solo il nome senza versione ("svelte 5.51" → "svelte")
|
|
42
|
+
const name = depName.split(' ')[0].toLowerCase();
|
|
43
|
+
|
|
44
|
+
// Match diretto
|
|
45
|
+
if (DOCS_REGISTRY[name]) return DOCS_REGISTRY[name];
|
|
46
|
+
|
|
47
|
+
// Strip @scope/ prefix ("@sveltejs/kit" → "kit", ma usiamo mapping speciali)
|
|
48
|
+
if (name === '@sveltejs/kit') return DOCS_REGISTRY['sveltekit'];
|
|
49
|
+
if (name === 'next' || name === '@next/core') return DOCS_REGISTRY['next'];
|
|
50
|
+
|
|
51
|
+
// Generic scope strip
|
|
52
|
+
const stripped = name.startsWith('@') ? name.split('/')[1] : name;
|
|
53
|
+
if (DOCS_REGISTRY[stripped]) return DOCS_REGISTRY[stripped];
|
|
54
|
+
|
|
55
|
+
// Strip -js suffix ("solid-js" → "solid")
|
|
56
|
+
const noJs = stripped.replace(/-js$/, '');
|
|
57
|
+
if (noJs !== stripped && DOCS_REGISTRY[noJs]) return DOCS_REGISTRY[noJs];
|
|
58
|
+
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export { DOCS_REGISTRY };
|
package/src/sanitize.js
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module sanitize
|
|
3
|
+
* @description Sanitizzazione contenuto web contro prompt injection attacks.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
// Pattern noti di prompt injection che devono essere filtrati
|
|
7
|
+
const DANGEROUS_PATTERNS = [
|
|
8
|
+
/ignore\s+(all\s+)?previous\s+instructions?/gi,
|
|
9
|
+
/disregard\s+(all\s+)?previous/gi,
|
|
10
|
+
/you\s+are\s+now\s+/gi,
|
|
11
|
+
/forget\s+(all\s+)?(your\s+)?instructions?/gi,
|
|
12
|
+
/new\s+instructions?\s*:/gi,
|
|
13
|
+
/system\s*prompt\s*:/gi,
|
|
14
|
+
/\[INST\]/gi,
|
|
15
|
+
/<\|im_start\|>/gi,
|
|
16
|
+
/<\|im_end\|>/gi,
|
|
17
|
+
/```system/gi,
|
|
18
|
+
/ASSISTANT:\s/gi,
|
|
19
|
+
/HUMAN:\s/gi,
|
|
20
|
+
];
|
|
21
|
+
|
|
22
|
+
/**
|
|
23
|
+
* @description Filtra pattern pericolosi di prompt injection dal testo web scrappato.
|
|
24
|
+
* @param {string} text - Testo raw proveniente da web scraping
|
|
25
|
+
* @param {number} maxLen - Lunghezza massima output (default 8000)
|
|
26
|
+
* @returns {string} Testo sanitizzato
|
|
27
|
+
*/
|
|
28
|
+
export function sanitizeWebContent(text, maxLen = 8000) {
|
|
29
|
+
if (!text || typeof text !== 'string') return '';
|
|
30
|
+
let cleaned = text;
|
|
31
|
+
for (const p of DANGEROUS_PATTERNS) {
|
|
32
|
+
cleaned = cleaned.replace(p, '[FILTERED]');
|
|
33
|
+
}
|
|
34
|
+
return cleaned.slice(0, maxLen);
|
|
35
|
+
}
|
package/src/search.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @module search
|
|
3
|
-
* @description Logica di scraping web
|
|
3
|
+
* @description Logica di scraping web: Jina Reader → fallback Readability, registry bypass, DDG search.
|
|
4
4
|
*/
|
|
5
5
|
import fetch from 'node-fetch';
|
|
6
6
|
import * as cheerio from 'cheerio';
|
|
@@ -9,26 +9,114 @@ import { DOMParser } from 'linkedom';
|
|
|
9
9
|
import { searchCache } from './cache.js';
|
|
10
10
|
import { CircuitBreaker } from './circuit-breaker.js';
|
|
11
11
|
import { httpAgent, httpsAgent } from './http-agent.js';
|
|
12
|
+
import { sanitizeWebContent } from './sanitize.js';
|
|
13
|
+
import { lookupRegistryUrl } from './registry.js';
|
|
12
14
|
|
|
13
15
|
// ─── Config & Cache ──────────────────────────────────
|
|
14
16
|
|
|
15
|
-
// Evitiamo IP bans ruotando UA comuni in Chrome desktop
|
|
16
17
|
const USER_AGENTS = [
|
|
17
18
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
|
|
18
19
|
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
|
|
19
20
|
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
20
21
|
];
|
|
21
22
|
|
|
22
|
-
/**
|
|
23
|
-
* @description Seleziona uno User-Agent rnd dall'array disponibile
|
|
24
|
-
* @returns {string} Stringa di uno User Agent
|
|
25
|
-
*/
|
|
26
23
|
function getRandomUA() {
|
|
27
24
|
return USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)];
|
|
28
25
|
}
|
|
29
26
|
|
|
30
27
|
const ddgCircuit = new CircuitBreaker({ failureThreshold: 3, resetTimeout: 30000 });
|
|
31
28
|
|
|
29
|
+
// ─── Jina Reader + Readability Fallback ──────────────
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* @description Fetch contenuto pagina: prima Jina Reader (JS rendering + markdown), poi fallback Readability.
|
|
33
|
+
* @param {string} url - URL della pagina
|
|
34
|
+
* @param {string} userAgent - UA per il fallback fetch
|
|
35
|
+
* @param {Object} opts - { jinaTimeout, maxLen, verbose }
|
|
36
|
+
* @returns {Promise<string>} Contenuto markdown/text estratto
|
|
37
|
+
*/
|
|
38
|
+
export async function fetchPageContent(url, userAgent, opts = {}) {
|
|
39
|
+
const { jinaTimeout = 8000, maxLen = 4000, verbose = false } = opts;
|
|
40
|
+
|
|
41
|
+
// ── Try Jina Reader API first ──
|
|
42
|
+
try {
|
|
43
|
+
const jinaRes = await fetch(`https://r.jina.ai/${url}`, {
|
|
44
|
+
signal: AbortSignal.timeout(jinaTimeout),
|
|
45
|
+
headers: { 'Accept': 'text/markdown', 'X-No-Cache': 'true' }
|
|
46
|
+
});
|
|
47
|
+
if (jinaRes.ok) {
|
|
48
|
+
const text = await jinaRes.text();
|
|
49
|
+
if (text && text.length > 200) {
|
|
50
|
+
if (verbose) console.log(` [jina] ✓ ${url} → ${text.length} chars`);
|
|
51
|
+
return sanitizeWebContent(text.replace(/\s+/g, ' '), maxLen);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
} catch (_) {
|
|
55
|
+
if (verbose) console.log(` [jina] ✗ ${url} → fallback readability`);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// ── Fallback: fetch + Readability ──
|
|
59
|
+
try {
|
|
60
|
+
const pageRes = await fetch(url, {
|
|
61
|
+
signal: AbortSignal.timeout(5000),
|
|
62
|
+
headers: { 'User-Agent': userAgent },
|
|
63
|
+
agent: url.startsWith('https:') ? httpsAgent : httpAgent
|
|
64
|
+
});
|
|
65
|
+
if (pageRes.ok) {
|
|
66
|
+
const document = new DOMParser().parseFromString(await pageRes.text(), 'text/html');
|
|
67
|
+
let text = '';
|
|
68
|
+
try {
|
|
69
|
+
const article = new Readability(document).parse();
|
|
70
|
+
text = article?.textContent || '';
|
|
71
|
+
} catch (_) {
|
|
72
|
+
text = document.body?.textContent || '';
|
|
73
|
+
}
|
|
74
|
+
if (text) {
|
|
75
|
+
if (verbose) console.log(` [readability] ✓ ${url} → ${text.length} chars`);
|
|
76
|
+
return sanitizeWebContent(text.replace(/\s+/g, ' '), maxLen);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
} catch (_) { }
|
|
80
|
+
|
|
81
|
+
return '';
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
// ─── Registry Direct Fetch ───────────────────────────
|
|
85
|
+
|
|
86
|
+
/**
|
|
87
|
+
* @description Fetch diretto dalle docs ufficiali per dipendenze nel registry.
|
|
88
|
+
* @param {Array} deps - Array di dipendenze ("svelte 5.51", "sveltekit 2.50")
|
|
89
|
+
* @param {Object} opts - { jinaTimeout, maxLen, verbose }
|
|
90
|
+
* @returns {Promise<Object>} { registryText, coveredDeps }
|
|
91
|
+
*/
|
|
92
|
+
export async function registryFetch(deps, opts = {}) {
|
|
93
|
+
const { verbose = false } = opts;
|
|
94
|
+
const userAgent = getRandomUA();
|
|
95
|
+
let registryText = '';
|
|
96
|
+
const coveredDeps = new Set();
|
|
97
|
+
|
|
98
|
+
for (const dep of deps) {
|
|
99
|
+
const docUrl = lookupRegistryUrl(dep);
|
|
100
|
+
if (!docUrl) continue;
|
|
101
|
+
|
|
102
|
+
const depName = dep.split(' ')[0];
|
|
103
|
+
try {
|
|
104
|
+
const text = await fetchPageContent(docUrl, userAgent, opts);
|
|
105
|
+
if (text && text.length > 100) {
|
|
106
|
+
registryText += `\n### ${depName} (official docs)\n${text}\n`;
|
|
107
|
+
coveredDeps.add(dep);
|
|
108
|
+
if (verbose) console.log(` [registry] ✓ ${depName} → ${docUrl}`);
|
|
109
|
+
}
|
|
110
|
+
} catch (_) {
|
|
111
|
+
if (verbose) console.log(` [registry] ✗ ${depName} → fetch failed`);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return { registryText, coveredDeps };
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// ─── DDG Search ──────────────────────────────────────
|
|
119
|
+
|
|
32
120
|
/**
|
|
33
121
|
* @description Decodifica link mascherati DuckDuckGo recuperando `uddg` querystring.
|
|
34
122
|
* @param {string} href - Url incapsulato proveniente da nodeDDG
|
|
@@ -46,13 +134,12 @@ export function resolveDDGUrl(href) {
|
|
|
46
134
|
|
|
47
135
|
/**
|
|
48
136
|
* @description Esegue chiamata http reale su node DDG.
|
|
49
|
-
* @param {string} query
|
|
137
|
+
* @param {string} query - Ricerca DDG formattata
|
|
138
|
+
* @param {number} resultsLimit - Max risultati da ritornare
|
|
50
139
|
* @returns {Promise<Object>} { results, userAgent }
|
|
51
|
-
* @throws {Error} Fallimento http DDG request
|
|
52
140
|
*/
|
|
53
|
-
async function doSearch(query) {
|
|
141
|
+
async function doSearch(query, resultsLimit = 3) {
|
|
54
142
|
const userAgent = getRandomUA();
|
|
55
|
-
// Fetch DDG raw HTML search endpoint ignoring CSS/JS payloads
|
|
56
143
|
const searchRes = await fetch(
|
|
57
144
|
`https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`,
|
|
58
145
|
{ signal: AbortSignal.timeout(5000), headers: { 'User-Agent': userAgent }, agent: httpsAgent }
|
|
@@ -70,21 +157,24 @@ async function doSearch(query) {
|
|
|
70
157
|
});
|
|
71
158
|
|
|
72
159
|
const seen = new Set();
|
|
73
|
-
results = results.filter(r => r.url && !seen.has(r.url) && seen.add(r.url)).slice(0,
|
|
160
|
+
results = results.filter(r => r.url && !seen.has(r.url) && seen.add(r.url)).slice(0, resultsLimit);
|
|
74
161
|
|
|
75
162
|
if (results.length === 0) throw new Error('No DDG results');
|
|
76
163
|
return { results, userAgent };
|
|
77
164
|
}
|
|
78
165
|
|
|
166
|
+
// ─── Main Web Search ─────────────────────────────────
|
|
167
|
+
|
|
79
168
|
/**
|
|
80
169
|
* @description Punto d'accesso caching+retry orchestrator web.
|
|
81
|
-
* @param {string} query
|
|
82
|
-
* @param {boolean} parallel
|
|
170
|
+
* @param {string} query - Input utente di ricerca convertibile web
|
|
171
|
+
* @param {boolean} parallel - Promise.all fast per multiple page scraping
|
|
172
|
+
* @param {Object} opts - { ddgResults, maxLen, jinaTimeout, verbose }
|
|
83
173
|
* @returns {Promise<Object>} Oggetto risultati + pageText formattato str
|
|
84
174
|
*/
|
|
85
|
-
export async function webSearch(query, parallel = false) {
|
|
86
|
-
const
|
|
87
|
-
|
|
175
|
+
export async function webSearch(query, parallel = false, opts = {}) {
|
|
176
|
+
const { ddgResults = 3, maxLen = 4000, jinaTimeout = 8000, verbose = false } = opts;
|
|
177
|
+
|
|
88
178
|
const cached = searchCache.get(query);
|
|
89
179
|
if (cached) {
|
|
90
180
|
return { results: cached.results, pageText: cached.pageText };
|
|
@@ -92,62 +182,22 @@ export async function webSearch(query, parallel = false) {
|
|
|
92
182
|
|
|
93
183
|
let results, userAgent;
|
|
94
184
|
try {
|
|
95
|
-
const res = await ddgCircuit.execute(() => doSearch(query));
|
|
185
|
+
const res = await ddgCircuit.execute(() => doSearch(query, ddgResults));
|
|
96
186
|
results = res.results;
|
|
97
187
|
userAgent = res.userAgent;
|
|
98
188
|
} catch (err) {
|
|
99
189
|
throw err;
|
|
100
190
|
}
|
|
101
191
|
|
|
192
|
+
const fetchOpts = { jinaTimeout, maxLen, verbose };
|
|
102
193
|
let pageText = '';
|
|
103
|
-
|
|
194
|
+
|
|
104
195
|
if (parallel) {
|
|
105
|
-
const pages = await Promise.all(results.map(
|
|
106
|
-
try {
|
|
107
|
-
const pageRes = await fetch(r.url, {
|
|
108
|
-
signal: AbortSignal.timeout(5000),
|
|
109
|
-
headers: { 'User-Agent': userAgent },
|
|
110
|
-
agent: r.url.startsWith('https:') ? httpsAgent : httpAgent
|
|
111
|
-
});
|
|
112
|
-
if (pageRes.ok) {
|
|
113
|
-
const document = new DOMParser().parseFromString(await pageRes.text(), 'text/html');
|
|
114
|
-
let text = '';
|
|
115
|
-
try {
|
|
116
|
-
const article = new Readability(document).parse();
|
|
117
|
-
text = article?.textContent || '';
|
|
118
|
-
} catch (_) {
|
|
119
|
-
text = document.body?.textContent || '';
|
|
120
|
-
}
|
|
121
|
-
if (text) return text.replace(/\s+/g, ' ').slice(0, 4000);
|
|
122
|
-
}
|
|
123
|
-
} catch (_) { // fail silenzioso parallelo tollerato per timeout link third-party
|
|
124
|
-
}
|
|
125
|
-
return '';
|
|
126
|
-
}));
|
|
196
|
+
const pages = await Promise.all(results.map(r => fetchPageContent(r.url, userAgent, fetchOpts)));
|
|
127
197
|
pageText = pages.filter(Boolean).join('\n\n');
|
|
128
198
|
} else {
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
const pageRes = await fetch(results[0].url, {
|
|
132
|
-
signal: AbortSignal.timeout(5000), // node-fetch hang timeout catch
|
|
133
|
-
headers: { 'User-Agent': userAgent },
|
|
134
|
-
agent: results[0].url.startsWith('https:') ? httpsAgent : httpAgent
|
|
135
|
-
});
|
|
136
|
-
if (pageRes.ok) {
|
|
137
|
-
const document = new DOMParser().parseFromString(await pageRes.text(), 'text/html');
|
|
138
|
-
let text = '';
|
|
139
|
-
try {
|
|
140
|
-
const article = new Readability(document).parse();
|
|
141
|
-
text = article?.textContent || '';
|
|
142
|
-
} catch (_) {
|
|
143
|
-
text = document.body?.textContent || '';
|
|
144
|
-
}
|
|
145
|
-
if (text) {
|
|
146
|
-
pageText = text.replace(/\s+/g, ' ').slice(0, 4000);
|
|
147
|
-
}
|
|
148
|
-
}
|
|
149
|
-
}
|
|
150
|
-
} catch (_) { // bypass errore url target: fallback al contesto vuoto
|
|
199
|
+
if (results[0]) {
|
|
200
|
+
pageText = await fetchPageContent(results[0].url, userAgent, fetchOpts);
|
|
151
201
|
}
|
|
152
202
|
}
|
|
153
203
|
|
package/src/state.js
CHANGED
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
* @module state
|
|
3
3
|
* @description Persiste la memoria di antigravity prev-hash per fault tolleranza riavvii.
|
|
4
4
|
*/
|
|
5
|
-
import { readFile,
|
|
5
|
+
import { readFile, mkdir } from 'fs/promises';
|
|
6
|
+
import { atomicWrite } from './utils/atomic-write.js';
|
|
6
7
|
import { existsSync } from 'fs';
|
|
7
8
|
import path from 'path';
|
|
8
9
|
import os from 'os';
|
|
@@ -33,5 +34,5 @@ export async function loadBatchState() {
|
|
|
33
34
|
export async function saveBatchState(map) {
|
|
34
35
|
await mkdir(STATE_DIR, { recursive: true });
|
|
35
36
|
const obj = Object.fromEntries(map);
|
|
36
|
-
await
|
|
37
|
+
await atomicWrite(STATE_FILE, JSON.stringify(obj, null, 2), { backup: false });
|
|
37
38
|
}
|
package/src/watcher.js
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* @module watcher
|
|
3
|
-
* @description Timer poll di Antigravity update locale skill inject doc rules,
|
|
3
|
+
* @description Timer poll di Antigravity update locale skill inject doc rules, con registry bypass e quality settings.
|
|
4
4
|
*/
|
|
5
5
|
import os from 'os';
|
|
6
6
|
import path from 'path';
|
|
7
|
-
import { webSearch } from './search.js';
|
|
7
|
+
import { webSearch, registryFetch, fetchPageContent } from './search.js';
|
|
8
8
|
import { readPackageDeps, buildQuery, groupIntoBatches, batchHash } from './packages.js';
|
|
9
|
+
import { sanitizeWebContent } from './sanitize.js';
|
|
9
10
|
import { updateGeminiFiles, removeStaleBlocks } from './inject.js';
|
|
10
11
|
import { chalk, label, log, LOG_WARN, LOG_REFRESH } from './logger.js';
|
|
11
|
-
import { version } from './cli.js';
|
|
12
|
+
import { version, maxTokens, quality, qualitySettings, verbose, customSources } from './cli.js';
|
|
12
13
|
import { loadBatchState, saveBatchState } from './state.js';
|
|
13
14
|
import { httpsAgent } from './http-agent.js';
|
|
14
15
|
|
|
@@ -29,20 +30,33 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
|
|
|
29
30
|
console.log(label('◆', 'workspace', skillFilePretty));
|
|
30
31
|
console.log(label('◆', 'interval', `every ${intervalMinutes} min`));
|
|
31
32
|
console.log(label('◆', 'batch_size', `chunk limit ${batchSize}`));
|
|
32
|
-
console.log(label('◆', '
|
|
33
|
+
console.log(label('◆', 'engine', 'Jina Reader → Readability fallback'));
|
|
34
|
+
console.log(label('◆', 'quality', `${quality} (${qualitySettings.ddgResults} results, ${qualitySettings.charsPerPage} chars)`));
|
|
35
|
+
console.log(label('◆', 'max_tokens', `${maxTokens}`));
|
|
36
|
+
if (customSources.length > 0) {
|
|
37
|
+
console.log(label('◆', 'sources', `${customSources.length} custom URL(s)`));
|
|
38
|
+
}
|
|
39
|
+
if (verbose) console.log(label('◆', 'verbose', 'enabled'));
|
|
33
40
|
console.log();
|
|
34
41
|
console.log(` ${chalk.cyan('✻')} Running. Antigravity will load context automatically.`);
|
|
35
42
|
console.log();
|
|
36
43
|
|
|
37
44
|
let previousBatchHashes = new Map();
|
|
38
45
|
|
|
46
|
+
const searchOpts = {
|
|
47
|
+
ddgResults: qualitySettings.ddgResults,
|
|
48
|
+
maxLen: qualitySettings.charsPerPage,
|
|
49
|
+
jinaTimeout: qualitySettings.jinaTimeout,
|
|
50
|
+
verbose,
|
|
51
|
+
};
|
|
52
|
+
|
|
39
53
|
async function updateSkill() {
|
|
40
54
|
if (previousBatchHashes.size === 0) {
|
|
41
55
|
previousBatchHashes = await loadBatchState();
|
|
42
56
|
}
|
|
43
|
-
const deps = await readPackageDeps();
|
|
57
|
+
const deps = await readPackageDeps();
|
|
44
58
|
if (!deps || deps.length === 0) {
|
|
45
|
-
return;
|
|
59
|
+
return;
|
|
46
60
|
}
|
|
47
61
|
|
|
48
62
|
const batches = groupIntoBatches(deps, batchSize);
|
|
@@ -65,11 +79,30 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
|
|
|
65
79
|
return;
|
|
66
80
|
}
|
|
67
81
|
|
|
68
|
-
const query = buildQuery(batch);
|
|
69
82
|
try {
|
|
70
|
-
|
|
83
|
+
// ── Registry fetch per dipendenze note ──
|
|
84
|
+
const { registryText, coveredDeps } = await registryFetch(batch, searchOpts);
|
|
85
|
+
|
|
86
|
+
// ── DDG search per dipendenze non coperte dal registry ──
|
|
87
|
+
const uncoveredBatch = batch.filter(d => !coveredDeps.has(d));
|
|
88
|
+
let ddgText = '';
|
|
89
|
+
let results = [];
|
|
90
|
+
|
|
91
|
+
if (uncoveredBatch.length > 0) {
|
|
92
|
+
const query = buildQuery(uncoveredBatch);
|
|
93
|
+
try {
|
|
94
|
+
const res = await webSearch(query, false, searchOpts);
|
|
95
|
+
results = res.results;
|
|
96
|
+
ddgText = res.pageText;
|
|
97
|
+
} catch (_) {
|
|
98
|
+
if (verbose) log(LOG_WARN, chalk.yellow, `DDG search failed for: ${uncoveredBatch.join(', ')}`);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
const combinedText = registryText + (ddgText || '');
|
|
71
103
|
const badSignals = ['403', 'captcha', 'blocked', 'access denied', 'forbidden'];
|
|
72
|
-
const isBad = !
|
|
104
|
+
const isBad = !combinedText || combinedText.length < 200 || badSignals.some(s => combinedText.toLowerCase().includes(s));
|
|
105
|
+
|
|
73
106
|
if (isBad && previousBatchHashes.has(blockId)) {
|
|
74
107
|
log(LOG_WARN, chalk.yellow, `low quality result for block ${blockId} → keeping previous context`);
|
|
75
108
|
failedCount++;
|
|
@@ -81,19 +114,22 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
|
|
|
81
114
|
const batchTitle = batch.map(b => b.split(' ')[0]).join(', ');
|
|
82
115
|
|
|
83
116
|
let globalMd = `## Live Context — ${batchTitle} (${nowStr})\n`;
|
|
84
|
-
|
|
85
|
-
|
|
117
|
+
if (registryText) {
|
|
118
|
+
globalMd += sanitizeWebContent(registryText, 500) + '\n';
|
|
119
|
+
} else if (results.length > 0) {
|
|
86
120
|
globalMd += `### ${results[0].title}\n`;
|
|
87
|
-
globalMd += `${results[0].snippet
|
|
121
|
+
globalMd += `${sanitizeWebContent(results[0].snippet, 300)} — ${results[0].url}\n`;
|
|
88
122
|
}
|
|
89
123
|
|
|
90
124
|
let md = `## Live Context — ${batchTitle} (${nowStr})\n`;
|
|
91
|
-
|
|
125
|
+
if (registryText) {
|
|
126
|
+
md += sanitizeWebContent(registryText, maxTokens) + '\n\n';
|
|
127
|
+
}
|
|
92
128
|
for (const r of results) {
|
|
93
|
-
md += `### ${r.title}\n${r.snippet} — ${r.url}\n\n`;
|
|
129
|
+
md += `### ${r.title}\n${sanitizeWebContent(r.snippet, 500)} — ${r.url}\n\n`;
|
|
94
130
|
}
|
|
95
|
-
if (
|
|
96
|
-
md += `FULL TEXT: ${
|
|
131
|
+
if (ddgText) {
|
|
132
|
+
md += `FULL TEXT: ${sanitizeWebContent(ddgText, maxTokens)}\n`;
|
|
97
133
|
}
|
|
98
134
|
|
|
99
135
|
await updateGeminiFiles([{
|
|
@@ -104,7 +140,11 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
|
|
|
104
140
|
|
|
105
141
|
previousBatchHashes.set(blockId, currentHash);
|
|
106
142
|
updatedCount++;
|
|
107
|
-
|
|
143
|
+
|
|
144
|
+
const sources = [];
|
|
145
|
+
if (coveredDeps.size > 0) sources.push(`registry:${coveredDeps.size}`);
|
|
146
|
+
if (results.length > 0) sources.push(`ddg:${results.length}`);
|
|
147
|
+
log(LOG_REFRESH, chalk.cyan, `block ${blockId} updated → ${batch.join(', ')} [${sources.join(', ')}]`);
|
|
108
148
|
} catch (e) {
|
|
109
149
|
failedCount++;
|
|
110
150
|
log(LOG_WARN, chalk.yellow, `block ${blockId} fetch failed → keeping previous`);
|
|
@@ -118,6 +158,38 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
|
|
|
118
158
|
}
|
|
119
159
|
await Promise.all(executing);
|
|
120
160
|
|
|
161
|
+
// ── Custom sources from .groundtruth.json ──
|
|
162
|
+
if (customSources.length > 0) {
|
|
163
|
+
for (const src of customSources) {
|
|
164
|
+
const blockId = 'src_' + Buffer.from(src.url).toString('base64url').slice(0, 8);
|
|
165
|
+
activeBlockIds.add(blockId);
|
|
166
|
+
|
|
167
|
+
if (previousBatchHashes.has(blockId)) {
|
|
168
|
+
skippedCount++;
|
|
169
|
+
continue;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
try {
|
|
173
|
+
const text = await fetchPageContent(src.url, '', searchOpts);
|
|
174
|
+
if (text && text.length > 100) {
|
|
175
|
+
const srcLabel = src.label || new URL(src.url).hostname;
|
|
176
|
+
const md = `## Custom Source — ${srcLabel}\n${sanitizeWebContent(text, maxTokens)}\n`;
|
|
177
|
+
|
|
178
|
+
await updateGeminiFiles([{
|
|
179
|
+
blockId,
|
|
180
|
+
globalContent: `## ${srcLabel}\n${sanitizeWebContent(text, 500)}\n`,
|
|
181
|
+
workspaceContent: md
|
|
182
|
+
}]);
|
|
183
|
+
previousBatchHashes.set(blockId, blockId);
|
|
184
|
+
updatedCount++;
|
|
185
|
+
log(LOG_REFRESH, chalk.cyan, `custom source updated → ${srcLabel}`);
|
|
186
|
+
}
|
|
187
|
+
} catch (_) {
|
|
188
|
+
failedCount++;
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
121
193
|
await removeStaleBlocks(globalPath, activeBlockIds);
|
|
122
194
|
await removeStaleBlocks(workspacePath, activeBlockIds);
|
|
123
195
|
|
|
@@ -128,18 +200,16 @@ export function startWatcher({ intervalMinutes, usePackageJson, batchSize }) {
|
|
|
128
200
|
|
|
129
201
|
let cycleCount = 0;
|
|
130
202
|
|
|
131
|
-
// Periodical state persistence on process exit to avoid total crash data loss
|
|
132
203
|
process.on('SIGINT', async () => {
|
|
133
204
|
await saveBatchState(previousBatchHashes);
|
|
134
205
|
process.exit(0);
|
|
135
206
|
});
|
|
136
207
|
|
|
137
|
-
// Lancio a startup immediato
|
|
138
208
|
updateSkill();
|
|
139
209
|
setInterval(() => {
|
|
140
210
|
cycleCount++;
|
|
141
211
|
if (cycleCount % 10 === 0) {
|
|
142
|
-
httpsAgent.destroy();
|
|
212
|
+
httpsAgent.destroy();
|
|
143
213
|
}
|
|
144
214
|
updateSkill();
|
|
145
215
|
}, intervalMinutes * 60 * 1000);
|
package/assets/banner.png
DELETED
|
Binary file
|