shmakk 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/web.js ADDED
@@ -0,0 +1,228 @@
1
+ // Web search and URL fetch helpers extracted from agent.js.
2
+ // Uses DuckDuckGo Lite (no API key required) and plain http(s) fetch.
3
+
4
+ const MAX_FETCH_BYTES = 128 * 1024;
5
+
6
+ function decodeDdgUrl(url) {
7
+ try {
8
+ let raw = String(url || '');
9
+ // Handle protocol-relative URLs (//duckduckgo.com/...)
10
+ if (raw.startsWith('//')) raw = 'https:' + raw;
11
+ // Decode HTML entities that appear in href attribute values
12
+ raw = raw.replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>')
13
+ .replace(/&quot;/g, '"').replace(/&#39;/g, "'");
14
+ const u = new URL(raw);
15
+ const p = u.searchParams;
16
+ if ((u.hostname === 'duckduckgo.com' || u.hostname.endsWith('.duckduckgo.com')) && p.has('uddg')) {
17
+ const raw = p.get('uddg');
18
+ if (raw) {
19
+ const decoded = decodeURIComponent(raw);
20
+ if (/^https?:\/\//i.test(decoded)) return decoded;
21
+ }
22
+ }
23
+ return url;
24
+ } catch { return url; }
25
+ }
26
+
27
+ function stripTags(html) {
28
+ return String(html || '')
29
+ .replace(/&amp;/g, '&')
30
+ .replace(/&lt;/g, '<')
31
+ .replace(/&gt;/g, '>')
32
+ .replace(/&quot;/g, '"')
33
+ .replace(/&#39;/g, "'")
34
+ .replace(/&nbsp;/g, ' ')
35
+ .replace(/<[^>]*>/g, '')
36
+ .replace(/\s+/g, ' ')
37
+ .trim();
38
+ }
39
+
40
+ function parseDdgLite(html, maxResults = 5) {
41
+ const results = [];
42
+ const seen = new Set();
43
+
44
+ // Primary: old/current lite table result rows
45
+ const rowRe = /<tr[^>]*>([\s\S]*?)<\/tr>/gi;
46
+ let row;
47
+ while ((row = rowRe.exec(html)) && results.length < maxResults) {
48
+ const block = row[1];
49
+ const link = /<a[^>]+(?:class="result-link"|rel="nofollow")[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/i.exec(block);
50
+ if (!link) continue;
51
+ const url = decodeDdgUrl(link[1]);
52
+ if (!/^https?:\/\//i.test(url) || seen.has(url)) continue;
53
+ const snippetMatch = /<td[^>]+class="result-snippet"[^>]*>([\s\S]*?)<\/td>/i.exec(block);
54
+ const title = stripTags(link[2]);
55
+ if (!title) continue;
56
+ seen.add(url);
57
+ results.push({
58
+ title,
59
+ url,
60
+ snippet: snippetMatch ? stripTags(snippetMatch[1]) : '',
61
+ });
62
+ }
63
+
64
+ // Fallback: generic anchor extraction for changed DDG markup
65
+ if (results.length < maxResults) {
66
+ const anchorRe = /<a[^>]+href="([^"]+)"[^>]*>([\s\S]*?)<\/a>/gi;
67
+ let a;
68
+ while ((a = anchorRe.exec(html)) && results.length < maxResults) {
69
+ const url = decodeDdgUrl(a[1]);
70
+ if (!/^https?:\/\//i.test(url) || seen.has(url)) continue;
71
+ if (/duckduckgo\.com\/(?:lite|html|\?|$)/i.test(url)) continue;
72
+ const title = stripTags(a[2]);
73
+ if (!title || title.length < 3) continue;
74
+ seen.add(url);
75
+ results.push({ title, url, snippet: '' });
76
+ }
77
+ }
78
+
79
+ return results;
80
+ }
81
+
82
+ function fetchWithTimeout(url, opts = {}, timeoutMs = 12000) {
83
+ const ctrl = new AbortController();
84
+ const timer = setTimeout(() => ctrl.abort(new Error('timeout')), timeoutMs);
85
+ let removeUpstreamAbortListener = null;
86
+ if (opts.signal) {
87
+ if (opts.signal.aborted) ctrl.abort(opts.signal.reason);
88
+ else {
89
+ const onAbort = () => ctrl.abort(opts.signal.reason);
90
+ opts.signal.addEventListener('abort', onAbort, { once: true });
91
+ removeUpstreamAbortListener = () => opts.signal.removeEventListener('abort', onAbort);
92
+ }
93
+ }
94
+ return fetch(url, { ...opts, signal: ctrl.signal }).finally(() => {
95
+ clearTimeout(timer);
96
+ if (removeUpstreamAbortListener) removeUpstreamAbortListener();
97
+ });
98
+ }
99
+
100
+ async function webSearch(query, maxResults, signal) {
101
+ const q = String(query || '').trim();
102
+ if (!q) return { error: 'query required' };
103
+ const limit = Math.max(1, Math.min(10, Number(maxResults) || 5));
104
+ const liteUrl = `https://lite.duckduckgo.com/lite/?q=${encodeURIComponent(q)}`;
105
+ const htmlUrl = `https://html.duckduckgo.com/html/?q=${encodeURIComponent(q)}`;
106
+ const headers = { 'user-agent': 'Mozilla/5.0 (compatible; shmakk/0.1; +https://duckduckgo.com)' };
107
+
108
+ async function searchOne(url) {
109
+ const resp = await fetchWithTimeout(url, { signal, headers });
110
+ const html = await resp.text();
111
+ if (!resp.ok) return { error: `search failed: HTTP ${resp.status}`, results: [] };
112
+ return { results: parseDdgLite(html, limit) };
113
+ }
114
+
115
+ try {
116
+ const lite = await searchOne(liteUrl);
117
+ if (lite.results.length) return { query: q, results: lite.results, source: 'ddg-lite' };
118
+
119
+ const html = await searchOne(htmlUrl);
120
+ if (html.results.length) return { query: q, results: html.results, source: 'ddg-html-fallback' };
121
+
122
+ return { query: q, results: [], source: 'ddg-lite+html', note: lite.error || html.error || 'no results parsed' };
123
+ } catch (e) {
124
+ return { error: String(e.message || e) };
125
+ }
126
+ }
127
+
128
+ async function fetchUrl(url, signal) {
129
+ let parsed;
130
+ try { parsed = new URL(String(url || '')); } catch { return { error: 'invalid URL' }; }
131
+ if (!/^https?:$/.test(parsed.protocol)) return { error: 'only http(s) URLs are supported' };
132
+ try {
133
+ const resp = await fetchWithTimeout(parsed.href, {
134
+ signal,
135
+ headers: { 'user-agent': 'shmakk/0.1' },
136
+ });
137
+ const text = await resp.text();
138
+ return {
139
+ url: parsed.href,
140
+ status: resp.status,
141
+ contentType: resp.headers.get('content-type') || '',
142
+ text: stripTags(text).slice(0, MAX_FETCH_BYTES),
143
+ truncated: text.length > MAX_FETCH_BYTES,
144
+ };
145
+ } catch (e) {
146
+ return { error: String(e.message || e) };
147
+ }
148
+ }
149
+
150
+ // ── JSON fallback action extraction ─────────────────────────────────────────
151
+
152
+ function stripJsonFence(s) {
153
+ const t = String(s || '').trim();
154
+ const m = /^```(?:json)?\s*([\s\S]*?)\s*```$/i.exec(t);
155
+ return m ? m[1].trim() : t;
156
+ }
157
+
158
+ function parseFallbackActions(content) {
159
+ const text = stripJsonFence(content);
160
+ if (!text) return [];
161
+
162
+ let obj = null;
163
+ try {
164
+ obj = JSON.parse(text);
165
+ } catch {
166
+ const start = text.indexOf('{');
167
+ const end = text.lastIndexOf('}');
168
+ if (start === -1 || end <= start) return [];
169
+ try { obj = JSON.parse(text.slice(start, end + 1)); } catch { return []; }
170
+ }
171
+
172
+ const rawActions = Array.isArray(obj?.shmakk_actions) ? obj.shmakk_actions : [];
173
+ const allowed = new Set(['read_file', 'list_dir', 'web_search', 'fetch_url',
174
+ 'write_file', 'edit_file', 'make_dir', 'delete_file', 'run']);
175
+ const actions = [];
176
+ for (const a of rawActions) {
177
+ const name = a?.tool || a?.name;
178
+ const args = a?.args && typeof a.args === 'object' ? a.args : {};
179
+ if (allowed.has(name)) actions.push({ name, args });
180
+ }
181
+ return actions;
182
+ }
183
+
184
+ // ── XML fallback action extraction ──────────────────────────────────────────
185
+
186
+ function parseXmlFallbackActions(content) {
187
+ const text = String(content || '');
188
+ if (!text) return [];
189
+ const allowed = new Set(['read_file', 'list_dir', 'web_search', 'fetch_url',
190
+ 'write_file', 'edit_file', 'make_dir', 'delete_file', 'run']);
191
+ const actions = [];
192
+
193
+ const tcRe = /<tool_call>([\s\S]*?)<\/tool_call>/gi;
194
+ let m;
195
+ while ((m = tcRe.exec(text))) {
196
+ const block = m[1];
197
+ const fnMatch = /<function\s*=\s*([a-zA-Z0-9_]+)\s*>([\s\S]*?)<\/function>/i.exec(block);
198
+ if (!fnMatch) continue;
199
+ const name = fnMatch[1];
200
+ if (!allowed.has(name)) continue;
201
+ const body = fnMatch[2] || '';
202
+ const args = {};
203
+ const pRe = /<parameter\s*=\s*([a-zA-Z0-9_]+)\s*>([\s\S]*?)<\/parameter>/gi;
204
+ let p;
205
+ while ((p = pRe.exec(body))) {
206
+ const k = p[1];
207
+ const raw = (p[2] || '').trim();
208
+ if (/^(true|false)$/i.test(raw)) args[k] = /^true$/i.test(raw);
209
+ else if (/^-?\d+(?:\.\d+)?$/.test(raw)) args[k] = Number(raw);
210
+ else args[k] = raw;
211
+ }
212
+ actions.push({ name, args });
213
+ }
214
+
215
+ return actions;
216
+ }
217
+
218
+ module.exports = {
219
+ webSearch,
220
+ fetchUrl,
221
+ parseDdgLite,
222
+ stripTags,
223
+ decodeDdgUrl,
224
+ fetchWithTimeout,
225
+ stripJsonFence,
226
+ parseFallbackActions,
227
+ parseXmlFallbackActions,
228
+ };
@@ -0,0 +1,213 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+
4
+ const SKIP_DIRS = new Set(['.git', 'node_modules', '.next', 'dist', 'build', 'coverage']);
5
+ const MAX_FILE_BYTES = 96 * 1024;
6
+ const CODE_EXTS = new Set(['.js', '.cjs', '.mjs', '.ts', '.tsx', '.jsx', '.py', '.go', '.rs']);
7
+
8
+ function safeRead(p) {
9
+ try { return fs.readFileSync(p, 'utf8'); } catch { return ''; }
10
+ }
11
+
12
+ function firstNonEmptyLines(content, limit = 20) {
13
+ return String(content || '')
14
+ .split(/\r?\n/)
15
+ .map((x) => x.trimEnd())
16
+ .filter((x) => x.trim())
17
+ .slice(0, limit);
18
+ }
19
+
20
+ function extractHints(content) {
21
+ const symbols = [];
22
+ const imports = [];
23
+ const exports = [];
24
+ const lines = String(content || '').split(/\r?\n/).slice(0, 400);
25
+ for (const l of lines) {
26
+ const s = l.trim();
27
+ let m = /^export\s+(?:async\s+)?function\s+([a-zA-Z0-9_]+)/.exec(s)
28
+ || /^(?:async\s+)?function\s+([a-zA-Z0-9_]+)/.exec(s)
29
+ || /^class\s+([a-zA-Z0-9_]+)/.exec(s)
30
+ || /^const\s+([a-zA-Z0-9_]+)\s*=\s*\(/.exec(s);
31
+ if (m) symbols.push(m[1]);
32
+ m = /^module\.exports\s*=\s*([a-zA-Z0-9_]+)/.exec(s)
33
+ || /^export\s+default\s+([a-zA-Z0-9_]+)/.exec(s)
34
+ || /^export\s*\{\s*([^}]+)\s*\}/.exec(s);
35
+ if (m) exports.push(m[1]);
36
+ m = /^import\s+.*?from\s+['"]([^'"]+)['"]/.exec(s)
37
+ || /^const\s+.*?=\s*require\(['"]([^'"]+)['"]\)/.exec(s);
38
+ if (m) imports.push(m[1]);
39
+ }
40
+ return {
41
+ symbols: symbols.slice(0, 20),
42
+ imports: imports.slice(0, 20),
43
+ exports: exports.slice(0, 20),
44
+ };
45
+ }
46
+
47
+ function detectRole(rel) {
48
+ const base = path.basename(rel).toLowerCase();
49
+ const dir = path.dirname(rel).toLowerCase();
50
+ if (base === 'package.json' || /tsconfig|vite\.config|next\.config|dockerfile|readme/.test(base)) return 'config';
51
+ if (dir.includes('test') || base.includes('.test.') || base.includes('.spec.')) return 'test';
52
+ if (base === 'index.js' || base === 'main.js' || rel.startsWith('bin/')) return 'entry';
53
+ if (dir.includes('hooks')) return 'hook';
54
+ if (dir.includes('services')) return 'service';
55
+ if (dir.includes('src')) return 'source';
56
+ return 'file';
57
+ }
58
+
59
+ function resolveImportTarget(rel, imp, allFiles) {
60
+ if (!imp || !imp.startsWith('.')) return null;
61
+ const baseDir = path.dirname(rel);
62
+ const raw = path.normalize(path.join(baseDir, imp));
63
+ const candidates = [
64
+ raw,
65
+ `${raw}.js`, `${raw}.cjs`, `${raw}.mjs`, `${raw}.ts`, `${raw}.tsx`, `${raw}.jsx`,
66
+ path.join(raw, 'index.js'), path.join(raw, 'index.ts'), path.join(raw, 'index.tsx'),
67
+ ];
68
+ for (const c of candidates) {
69
+ if (allFiles.has(c)) return c;
70
+ }
71
+ return null;
72
+ }
73
+
74
+ function walkFiles(root, dir = root, out = []) {
75
+ let ents = [];
76
+ try { ents = fs.readdirSync(dir, { withFileTypes: true }); } catch { return out; }
77
+ for (const e of ents) {
78
+ const abs = path.join(dir, e.name);
79
+ const rel = path.relative(root, abs);
80
+ if (!rel) continue;
81
+ if (e.isDirectory()) {
82
+ if (SKIP_DIRS.has(e.name)) continue;
83
+ walkFiles(root, abs, out);
84
+ continue;
85
+ }
86
+ if (!e.isFile()) continue;
87
+ out.push(rel);
88
+ }
89
+ return out;
90
+ }
91
+
92
+ function indexFilePath(root) {
93
+ return path.join(root, '.shmakk', 'state', 'index.json');
94
+ }
95
+
96
+ function loadIndex(root) {
97
+ const p = indexFilePath(root);
98
+ try { return JSON.parse(fs.readFileSync(p, 'utf8')); } catch { return null; }
99
+ }
100
+
101
+ function saveIndex(root, index) {
102
+ const p = indexFilePath(root);
103
+ fs.mkdirSync(path.dirname(p), { recursive: true });
104
+ fs.writeFileSync(p, JSON.stringify(index), 'utf8');
105
+ }
106
+
107
+ function buildOrRefreshIndex(root) {
108
+ const now = Date.now();
109
+ const existing = loadIndex(root) || { root, files: {}, updatedAt: 0 };
110
+ const seen = new Set(walkFiles(root));
111
+
112
+ for (const rel of Object.keys(existing.files)) {
113
+ if (!seen.has(rel)) delete existing.files[rel];
114
+ }
115
+
116
+ for (const rel of seen) {
117
+ const abs = path.join(root, rel);
118
+ let st;
119
+ try { st = fs.statSync(abs); } catch { continue; }
120
+ const prev = existing.files[rel];
121
+ const mtimeMs = st.mtimeMs;
122
+ const size = st.size;
123
+ if (prev && prev.mtimeMs === mtimeMs && prev.size === size) continue;
124
+
125
+ const sample = safeRead(abs).slice(0, MAX_FILE_BYTES);
126
+ const hints = extractHints(sample);
127
+ existing.files[rel] = {
128
+ path: rel,
129
+ mtimeMs,
130
+ size,
131
+ ext: path.extname(rel).toLowerCase(),
132
+ role: detectRole(rel),
133
+ symbols: hints.symbols,
134
+ imports: hints.imports,
135
+ exports: hints.exports,
136
+ snippet: firstNonEmptyLines(sample, 12),
137
+ edges: [],
138
+ };
139
+ }
140
+
141
+ const allFiles = new Set(Object.keys(existing.files));
142
+ for (const rel of Object.keys(existing.files)) {
143
+ const f = existing.files[rel];
144
+ const edges = [];
145
+ for (const imp of f.imports || []) {
146
+ const target = resolveImportTarget(rel, imp, allFiles);
147
+ if (target) edges.push(target);
148
+ }
149
+ f.edges = edges.slice(0, 30);
150
+ }
151
+
152
+ existing.updatedAt = now;
153
+ saveIndex(root, existing);
154
+ return existing;
155
+ }
156
+
157
+ function tokenize(s) {
158
+ return String(s || '').toLowerCase().split(/[^a-z0-9_./-]+/).filter(Boolean);
159
+ }
160
+
161
+ function relevantFiles(index, query, limit = 20) {
162
+ const q = tokenize(query);
163
+ if (!q.length) return [];
164
+ const scored = [];
165
+ for (const f of Object.values(index.files || {})) {
166
+ const hay = `${f.path} ${(f.symbols || []).join(' ')} ${(f.imports || []).join(' ')}`.toLowerCase();
167
+ let score = 0;
168
+ for (const t of q) {
169
+ if (hay.includes(t)) score += 2;
170
+ if (f.path.toLowerCase().includes(t)) score += 3;
171
+ }
172
+ if (score > 0) scored.push({ file: f.path, score });
173
+ }
174
+ scored.sort((a, b) => b.score - a.score || a.file.localeCompare(b.file));
175
+ return scored.slice(0, limit).map((x) => x.file);
176
+ }
177
+
178
+ function relevantSubgraph(index, query, limit = 12, maxHops = 1) {
179
+ const seeds = relevantFiles(index, query, Math.max(4, Math.min(limit, 8)));
180
+ const files = index.files || {};
181
+ const visited = new Set(seeds);
182
+ const queue = seeds.map((file) => ({ file, hop: 0 }));
183
+ const out = [];
184
+
185
+ while (queue.length && out.length < limit) {
186
+ const { file, hop } = queue.shift();
187
+ const node = files[file];
188
+ if (!node) continue;
189
+ out.push({
190
+ path: node.path,
191
+ role: node.role,
192
+ symbols: node.symbols || [],
193
+ imports: node.imports || [],
194
+ exports: node.exports || [],
195
+ snippet: node.snippet || [],
196
+ edges: node.edges || [],
197
+ });
198
+ if (hop >= maxHops) continue;
199
+ for (const next of node.edges || []) {
200
+ if (visited.has(next)) continue;
201
+ visited.add(next);
202
+ queue.push({ file: next, hop: hop + 1 });
203
+ }
204
+ }
205
+
206
+ return out;
207
+ }
208
+
209
+ function clearIndex(root) {
210
+ try { fs.rmSync(indexFilePath(root), { force: true }); } catch {}
211
+ }
212
+
213
+ module.exports = { buildOrRefreshIndex, clearIndex, relevantFiles, relevantSubgraph };