@agentprojectcontext/apx 1.10.4 → 1.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,290 @@
1
+ // daemon/tools/search.js
2
+ // WebSearch tool for APX — 3 modes:
3
+ // 1. DuckDuckGo HTML scraping (no API key, uses node-fetch)
4
+ // 2. Brave Search API (requires BRAVE_API_KEY env)
5
+ // 3. Puppeteer Google fallback (requires puppeteer installed)
6
+ //
7
+ // Endpoint: POST /tools/search
8
+ // Body: { query, mode: "auto"|"ddg"|"brave"|"browser", limit? }
9
+
10
+ import { browser_navigate, browser_get_text, browser_evaluate } from "./browser.js";
11
+
12
+ // ---------------------------------------------------------------------------
13
+ // Helpers
14
+ // ---------------------------------------------------------------------------
15
+
16
+ /** Build a fetch agent that uses the system proxy when HTTPS_PROXY / https_proxy is set */
17
+ async function buildAgent(url) {
18
+ const proxyUrl = process.env.HTTPS_PROXY || process.env.https_proxy || process.env.HTTP_PROXY || process.env.http_proxy;
19
+ if (!proxyUrl) return undefined;
20
+ try {
21
+ const { HttpsProxyAgent } = await import("https-proxy-agent");
22
+ const { HttpProxyAgent } = await import("http-proxy-agent");
23
+ return url.startsWith("https") ? new HttpsProxyAgent(proxyUrl) : new HttpProxyAgent(proxyUrl);
24
+ } catch {
25
+ return undefined;
26
+ }
27
+ }
28
+
29
+ async function nodeFetch(url, opts = {}) {
30
+ const { default: fetch } = await import("node-fetch");
31
+ if (!opts.agent) {
32
+ const agent = await buildAgent(url);
33
+ if (agent) opts = { ...opts, agent };
34
+ }
35
+ return fetch(url, opts);
36
+ }
37
+
38
+ /** Very small regex-based HTML text extractor (avoids full parse5 dependency) */
39
+ function extractText(html) {
40
+ return html
41
+ .replace(/<script[\s\S]*?<\/script>/gi, "")
42
+ .replace(/<style[\s\S]*?<\/style>/gi, "")
43
+ .replace(/<[^>]+>/g, " ")
44
+ .replace(/&amp;/g, "&")
45
+ .replace(/&lt;/g, "<")
46
+ .replace(/&gt;/g, ">")
47
+ .replace(/&quot;/g, '"')
48
+ .replace(/&#039;/g, "'")
49
+ .replace(/&nbsp;/g, " ")
50
+ .replace(/\s{2,}/g, " ")
51
+ .trim();
52
+ }
53
+
54
+ /** Parse DuckDuckGo HTML results */
55
+ function parseDdgResults(html, limit) {
56
+ const results = [];
57
+ // Match result blocks: each has a link (.result__a) and snippet (.result__snippet)
58
+ const blockRe = /<a[^>]+class="[^"]*result__a[^"]*"[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/gi;
59
+ const snippetRe = /<a[^>]+class="[^"]*result__snippet[^"]*"[^>]*>([\s\S]*?)<\/a>/gi;
60
+
61
+ const links = [];
62
+ let m;
63
+ while ((m = blockRe.exec(html)) !== null && links.length < limit * 2) {
64
+ const href = m[1];
65
+ const title = extractText(m[2]).trim();
66
+ if (href && title && !href.startsWith("//duckduckgo") && !href.includes("duckduckgo.com")) {
67
+ links.push({ url: href, title });
68
+ }
69
+ }
70
+
71
+ const snippets = [];
72
+ while ((m = snippetRe.exec(html)) !== null) {
73
+ snippets.push(extractText(m[1]).trim());
74
+ }
75
+
76
+ for (let i = 0; i < Math.min(links.length, limit); i++) {
77
+ results.push({
78
+ title: links[i].title,
79
+ url: links[i].url,
80
+ snippet: snippets[i] || "",
81
+ });
82
+ }
83
+
84
+ // Fallback: if no structured results found, try simpler extraction
85
+ if (results.length === 0) {
86
+ const hrefRe = /href="(https?:\/\/[^"]+)"[^>]*>([^<]{5,})/gi;
87
+ while ((m = hrefRe.exec(html)) !== null && results.length < limit) {
88
+ const url = m[1];
89
+ const text = extractText(m[2]).trim();
90
+ if (!url.includes("duckduckgo") && text.length > 10) {
91
+ results.push({ title: text.slice(0, 120), url, snippet: "" });
92
+ }
93
+ }
94
+ }
95
+
96
+ return results;
97
+ }
98
+
99
+ // ---------------------------------------------------------------------------
100
+ // Mode 1: DuckDuckGo scraping
101
+ // ---------------------------------------------------------------------------
102
+
103
+ async function searchDdg(query, limit = 5) {
104
+ const url = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(query)}`;
105
+ const res = await nodeFetch(url, {
106
+ headers: {
107
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
108
+ "Accept": "text/html",
109
+ "Accept-Language": "en-US,en;q=0.9",
110
+ },
111
+ });
112
+
113
+ if (!res.ok) throw new Error(`DuckDuckGo returned ${res.status}`);
114
+ const html = await res.text();
115
+ const results = parseDdgResults(html, limit);
116
+
117
+ if (results.length === 0) {
118
+ // Return raw text excerpt as fallback
119
+ const text = extractText(html).slice(0, 2000);
120
+ return { mode: "ddg", query, results: [], raw_excerpt: text };
121
+ }
122
+
123
+ return { mode: "ddg", query, results };
124
+ }
125
+
126
+ // ---------------------------------------------------------------------------
127
+ // Mode 2: Brave Search API
128
+ // ---------------------------------------------------------------------------
129
+
130
+ async function searchBrave(query, limit = 5) {
131
+ const apiKey = process.env.BRAVE_API_KEY;
132
+ if (!apiKey) throw new Error("BRAVE_API_KEY not set in environment");
133
+
134
+ const url = `https://api.search.brave.com/res/v1/web/search?q=${encodeURIComponent(query)}&count=${limit}`;
135
+ const res = await nodeFetch(url, {
136
+ headers: {
137
+ "Accept": "application/json",
138
+ "Accept-Encoding": "gzip",
139
+ "X-Subscription-Token": apiKey,
140
+ },
141
+ });
142
+
143
+ if (!res.ok) {
144
+ const body = await res.text().catch(() => "");
145
+ throw new Error(`Brave API returned ${res.status}: ${body.slice(0, 200)}`);
146
+ }
147
+
148
+ const data = await res.json();
149
+ const webResults = data?.web?.results || [];
150
+ const results = webResults.slice(0, limit).map((r) => ({
151
+ title: r.title || "",
152
+ url: r.url || "",
153
+ snippet: r.description || "",
154
+ age: r.age || null,
155
+ }));
156
+
157
+ return { mode: "brave", query, results };
158
+ }
159
+
160
+ // ---------------------------------------------------------------------------
161
+ // Mode 3: Puppeteer Google fallback
162
+ // ---------------------------------------------------------------------------
163
+
164
+ async function searchBrowser(query, limit = 5) {
165
+ try {
166
+ // Navigate to Google
167
+ await browser_navigate({ url: `https://www.google.com/search?q=${encodeURIComponent(query)}&hl=en` });
168
+
169
+ // Extract search results via JS in page context
170
+ const { result } = await browser_evaluate({
171
+ code: `
172
+ (function() {
173
+ const items = [];
174
+ const cards = document.querySelectorAll('div.g, div[data-sokoban-container]');
175
+ for (const card of cards) {
176
+ const a = card.querySelector('a[href^="http"]');
177
+ const h3 = card.querySelector('h3');
178
+ const snippet = card.querySelector('.VwiC3b, [data-sncf], .s3v9rd');
179
+ if (a && h3) {
180
+ items.push({
181
+ title: h3.innerText || h3.textContent || '',
182
+ url: a.href || '',
183
+ snippet: snippet ? (snippet.innerText || snippet.textContent || '') : '',
184
+ });
185
+ }
186
+ if (items.length >= ${limit}) break;
187
+ }
188
+ return items;
189
+ })()
190
+ `,
191
+ });
192
+
193
+ const results = Array.isArray(result) ? result.slice(0, limit) : [];
194
+
195
+ if (results.length === 0) {
196
+ // Fallback to page text
197
+ const { text } = await browser_get_text();
198
+ return { mode: "browser", query, results: [], raw_excerpt: text.slice(0, 2000) };
199
+ }
200
+
201
+ return { mode: "browser", query, results };
202
+ } catch (e) {
203
+ throw new Error(`Browser search failed: ${e.message}`);
204
+ }
205
+ }
206
+
207
+ // ---------------------------------------------------------------------------
208
+ // Auto mode: tries DDG → Brave → Browser
209
+ // ---------------------------------------------------------------------------
210
+
211
+ async function searchAuto(query, limit = 5) {
212
+ const errors = [];
213
+
214
+ // 1. Try DuckDuckGo
215
+ try {
216
+ const r = await searchDdg(query, limit);
217
+ if (r.results && r.results.length > 0) return r;
218
+ errors.push("ddg: 0 results");
219
+ } catch (e) {
220
+ errors.push(`ddg: ${e.message}`);
221
+ }
222
+
223
+ // 2. Try Brave (only if key is set)
224
+ if (process.env.BRAVE_API_KEY) {
225
+ try {
226
+ const r = await searchBrave(query, limit);
227
+ if (r.results && r.results.length > 0) return r;
228
+ errors.push("brave: 0 results");
229
+ } catch (e) {
230
+ errors.push(`brave: ${e.message}`);
231
+ }
232
+ }
233
+
234
+ // 3. Try browser
235
+ try {
236
+ const r = await searchBrowser(query, limit);
237
+ return r;
238
+ } catch (e) {
239
+ errors.push(`browser: ${e.message}`);
240
+ }
241
+
242
+ throw new Error(`All search modes failed: ${errors.join("; ")}`);
243
+ }
244
+
245
+ // ---------------------------------------------------------------------------
246
+ // Main search dispatcher
247
+ // ---------------------------------------------------------------------------
248
+
249
+ export async function webSearch({ query, mode = "auto", limit = 5 }) {
250
+ if (!query) throw new Error("query required");
251
+ const n = Math.min(Math.max(parseInt(limit, 10) || 5, 1), 20);
252
+
253
+ switch (mode) {
254
+ case "ddg": return searchDdg(query, n);
255
+ case "brave": return searchBrave(query, n);
256
+ case "browser": return searchBrowser(query, n);
257
+ case "auto": return searchAuto(query, n);
258
+ default: throw new Error(`Unknown mode "${mode}". Use: auto, ddg, brave, browser`);
259
+ }
260
+ }
261
+
262
+ // ---------------------------------------------------------------------------
263
+ // Express router factory
264
+ // ---------------------------------------------------------------------------
265
+
266
+ export function buildSearchRouter(express) {
267
+ const router = express.Router();
268
+
269
+ router.post("/", async (req, res) => {
270
+ try {
271
+ const result = await webSearch(req.body || {});
272
+ res.json(result);
273
+ } catch (e) {
274
+ res.status(500).json({ error: e.message });
275
+ }
276
+ });
277
+
278
+ // GET convenience: /tools/search?q=...&mode=auto&limit=5
279
+ router.get("/", async (req, res) => {
280
+ const { q, query, mode, limit } = req.query;
281
+ try {
282
+ const result = await webSearch({ query: q || query, mode, limit });
283
+ res.json(result);
284
+ } catch (e) {
285
+ res.status(500).json({ error: e.message });
286
+ }
287
+ });
288
+
289
+ return router;
290
+ }