cowork-cli 2.1.0 → 2.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -2
- package/src/engine/models/BaseModel.js +30 -7
- package/src/engine/tools/gitDiff.js +76 -0
- package/src/engine/tools/gitLog.js +60 -0
- package/src/engine/tools/gitStatus.js +92 -0
- package/src/engine/tools/index.js +50 -1
- package/src/engine/tools/listTools.js +18 -0
- package/src/engine/tools/webFetch.js +7 -10
- package/src/engine/tools/webSearch.js +11 -12
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "cowork-cli",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.2.2",
|
|
4
4
|
"description": "work with cowork",
|
|
5
5
|
"bin": {
|
|
6
6
|
"cwk": "bin/cli.js"
|
|
@@ -33,6 +33,7 @@
|
|
|
33
33
|
"dependencies": {
|
|
34
34
|
"dotenv": "^17.4.2",
|
|
35
35
|
"ipaddr.js": "^2.4.0",
|
|
36
|
+
"node-html-parser": "^7.1.0",
|
|
36
37
|
"openai": "^6.38.0"
|
|
37
38
|
}
|
|
38
|
-
}
|
|
39
|
+
}
|
|
@@ -121,20 +121,33 @@ export default class BaseModel {
|
|
|
121
121
|
return response;
|
|
122
122
|
|
|
123
123
|
} catch (err) {
|
|
124
|
-
|
|
124
|
+
// Transient HTTP status codes (rate-limit, server errors)
|
|
125
|
+
const isHttpTransient = [429, 500, 502, 503, 504].includes(err.status);
|
|
126
|
+
// Transient Node.js-level network errors (flaky connections, DNS hiccups)
|
|
127
|
+
const TRANSIENT_NET_CODES = new Set([
|
|
128
|
+
'ECONNRESET', // Connection forcibly closed by the remote side
|
|
129
|
+
'ETIMEDOUT', // Connection or operation timed out
|
|
130
|
+
'ECONNREFUSED', // Remote host actively refused the connection
|
|
131
|
+
'EAI_AGAIN', // Temporary DNS resolution failure
|
|
132
|
+
'ENETUNREACH', // Network is unreachable
|
|
133
|
+
'EHOSTUNREACH', // Host is unreachable
|
|
134
|
+
]);
|
|
135
|
+
const isNetTransient = TRANSIENT_NET_CODES.has(err.code);
|
|
136
|
+
const isTransient = isHttpTransient || isNetTransient;
|
|
137
|
+
|
|
125
138
|
if (isTransient && retries < maxRetries) {
|
|
126
139
|
retries++;
|
|
127
|
-
|
|
140
|
+
|
|
128
141
|
let delay = Math.pow(2, retries) * 1000;
|
|
129
|
-
|
|
130
|
-
// 2. Adhere to Retry-After header if present
|
|
142
|
+
|
|
143
|
+
// 2. Adhere to Retry-After header if present (HTTP errors only)
|
|
131
144
|
const retryAfter = err.headers?.['retry-after'];
|
|
132
145
|
if (retryAfter) {
|
|
133
146
|
const seconds = parseInt(retryAfter);
|
|
134
147
|
if (!isNaN(seconds)) {
|
|
135
148
|
delay = seconds * 1000;
|
|
136
149
|
} else {
|
|
137
|
-
// Handle Date string
|
|
150
|
+
// Handle Date string format
|
|
138
151
|
const retryDate = new Date(retryAfter);
|
|
139
152
|
if (!isNaN(retryDate.getTime())) {
|
|
140
153
|
delay = Math.max(0, retryDate.getTime() - Date.now());
|
|
@@ -146,7 +159,9 @@ export default class BaseModel {
|
|
|
146
159
|
const jitter = Math.random() * 500;
|
|
147
160
|
const finalDelay = delay + jitter;
|
|
148
161
|
|
|
149
|
-
|
|
162
|
+
// Show a meaningful label: HTTP status or Node error code
|
|
163
|
+
const errLabel = err.status ?? err.code ?? 'Network error';
|
|
164
|
+
ui.update(`${errLabel}. Retrying in ${(finalDelay/1000).toFixed(1)}s`);
|
|
150
165
|
await new Promise(resolve => setTimeout(resolve, finalDelay));
|
|
151
166
|
ui.update('Thinking');
|
|
152
167
|
continue;
|
|
@@ -184,7 +199,10 @@ export default class BaseModel {
|
|
|
184
199
|
webSearch: 'searching web',
|
|
185
200
|
findFile: 'finding',
|
|
186
201
|
findDir: 'finding',
|
|
187
|
-
listTools: 'listing'
|
|
202
|
+
listTools: 'listing',
|
|
203
|
+
gitDiff: 'git diff',
|
|
204
|
+
gitLog: 'git log',
|
|
205
|
+
gitStatus: 'git status',
|
|
188
206
|
};
|
|
189
207
|
|
|
190
208
|
const label = toolLabels[name] || name;
|
|
@@ -197,6 +215,11 @@ export default class BaseModel {
|
|
|
197
215
|
else if (name === 'webSearch') displayArg = `'${args.query}'`;
|
|
198
216
|
else if (name === 'findFile' || name === 'findDir') displayArg = `'${args.pattern}' in ${args.dirPath || '.'}`;
|
|
199
217
|
else if (name === 'readFileChunk') displayArg = `${args.filePath} [L${args.startLine}-${args.endLine}]`;
|
|
218
|
+
else if (name === 'gitDiff') {
|
|
219
|
+
const scope = args.staged ? 'staged' : 'unstaged';
|
|
220
|
+
displayArg = args.filePath ? `${scope} · ${args.filePath}` : scope;
|
|
221
|
+
}
|
|
222
|
+
else if (name === 'gitLog') displayArg = `last ${args.limit ?? 10} commits`;
|
|
200
223
|
else displayArg = args.url || args.filePath || args.dirPath || args.path || args.pattern || JSON.stringify(args);
|
|
201
224
|
|
|
202
225
|
// ui.start() handles terminal-aware truncation internally.
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import { execFile } from 'node:child_process';
|
|
2
|
+
import { promisify } from 'node:util';
|
|
3
|
+
import { safePath } from '../../utils/fsUtils.js';
|
|
4
|
+
|
|
5
|
+
const execFileAsync = promisify(execFile);
|
|
6
|
+
|
|
7
|
+
const MAX_LINES = 500;
|
|
8
|
+
const DEFAULT_MAX_LINES = 300;
|
|
9
|
+
const GIT_TIMEOUT_MS = 10_000;
|
|
10
|
+
const GIT_MAX_BUFFER = 1024 * 1024; // 1 MB raw output cap
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Runs a git command safely using execFile (no shell — args are an array).
|
|
14
|
+
* @param {string[]} args Git sub-command and flags.
|
|
15
|
+
* @param {string} cwd Working directory.
|
|
16
|
+
* @returns {Promise<{ok: boolean, output?: string, error?: string}>}
|
|
17
|
+
*/
|
|
18
|
+
async function runGit(args, cwd = process.cwd()) {
|
|
19
|
+
try {
|
|
20
|
+
const { stdout } = await execFileAsync('git', args, {
|
|
21
|
+
cwd,
|
|
22
|
+
timeout: GIT_TIMEOUT_MS,
|
|
23
|
+
maxBuffer: GIT_MAX_BUFFER,
|
|
24
|
+
});
|
|
25
|
+
return { ok: true, output: stdout };
|
|
26
|
+
} catch (err) {
|
|
27
|
+
if (err.code === 'ENOENT') {
|
|
28
|
+
return { ok: false, error: 'Error: Git is not installed or not found in PATH.' };
|
|
29
|
+
}
|
|
30
|
+
const msg = (err.stderr || err.message || '').trim();
|
|
31
|
+
return { ok: false, error: `Error: ${msg || 'Unknown git error.'}` };
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* gitDiff tool: Shows unstaged or staged file changes as a unified diff.
|
|
37
|
+
* @param {Object} args
|
|
38
|
+
* @param {boolean} [args.staged=false] If true, diffs the staging area (--staged).
|
|
39
|
+
* @param {string} [args.filePath] Limit diff to a specific file (validated with safePath).
|
|
40
|
+
* @param {number} [args.maxLines=300] Maximum output lines to return (max: 500).
|
|
41
|
+
*/
|
|
42
|
+
export default async function gitDiff({ staged = false, filePath, maxLines = DEFAULT_MAX_LINES }) {
|
|
43
|
+
// Clamp maxLines
|
|
44
|
+
const limit = Math.min(Math.max(1, maxLines), MAX_LINES);
|
|
45
|
+
|
|
46
|
+
// Build git args — all discrete, never shell-interpolated
|
|
47
|
+
const args = ['diff', '--no-color'];
|
|
48
|
+
if (staged) args.push('--staged');
|
|
49
|
+
|
|
50
|
+
// Validate and append optional path filter
|
|
51
|
+
if (filePath) {
|
|
52
|
+
let resolved;
|
|
53
|
+
try {
|
|
54
|
+
resolved = safePath(filePath);
|
|
55
|
+
} catch (err) {
|
|
56
|
+
return `Error: ${err.message}`;
|
|
57
|
+
}
|
|
58
|
+
args.push('--', resolved);
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
const result = await runGit(args);
|
|
62
|
+
if (!result.ok) return result.error;
|
|
63
|
+
|
|
64
|
+
const output = result.output.trim();
|
|
65
|
+
if (!output) {
|
|
66
|
+
return staged
|
|
67
|
+
? 'No staged changes found.'
|
|
68
|
+
: 'No unstaged changes found. Working tree is clean.';
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const lines = output.split('\n');
|
|
72
|
+
if (lines.length > limit) {
|
|
73
|
+
return lines.slice(0, limit).join('\n') + `\n\n[Warning: Truncated at ${limit} lines]`;
|
|
74
|
+
}
|
|
75
|
+
return output;
|
|
76
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { execFile } from 'node:child_process';
|
|
2
|
+
import { promisify } from 'node:util';
|
|
3
|
+
|
|
4
|
+
const execFileAsync = promisify(execFile);
|
|
5
|
+
|
|
6
|
+
const MAX_LIMIT = 50;
|
|
7
|
+
const DEFAULT_LIMIT = 10;
|
|
8
|
+
const GIT_TIMEOUT_MS = 10_000;
|
|
9
|
+
const GIT_MAX_BUFFER = 1024 * 1024; // 1 MB raw output cap
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Runs a git command safely using execFile (no shell — args are an array).
|
|
13
|
+
* @param {string[]} args Git sub-command and flags.
|
|
14
|
+
* @param {string} cwd Working directory.
|
|
15
|
+
* @returns {Promise<{ok: boolean, output?: string, error?: string}>}
|
|
16
|
+
*/
|
|
17
|
+
async function runGit(args, cwd = process.cwd()) {
|
|
18
|
+
try {
|
|
19
|
+
const { stdout } = await execFileAsync('git', args, {
|
|
20
|
+
cwd,
|
|
21
|
+
timeout: GIT_TIMEOUT_MS,
|
|
22
|
+
maxBuffer: GIT_MAX_BUFFER,
|
|
23
|
+
});
|
|
24
|
+
return { ok: true, output: stdout };
|
|
25
|
+
} catch (err) {
|
|
26
|
+
if (err.code === 'ENOENT') {
|
|
27
|
+
return { ok: false, error: 'Error: Git is not installed or not found in PATH.' };
|
|
28
|
+
}
|
|
29
|
+
const msg = (err.stderr || err.message || '').trim();
|
|
30
|
+
return { ok: false, error: `Error: ${msg || 'Unknown git error.'}` };
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* gitLog tool: Returns recent commit history in a readable format.
|
|
36
|
+
* @param {Object} args
|
|
37
|
+
* @param {number} [args.limit=10] Number of commits to retrieve (max: 50).
|
|
38
|
+
* @param {boolean} [args.oneline=false] Compact single-line format per commit.
|
|
39
|
+
*/
|
|
40
|
+
export default async function gitLog({ limit = DEFAULT_LIMIT, oneline = false }) {
|
|
41
|
+
// Clamp limit
|
|
42
|
+
const count = Math.min(Math.max(1, limit), MAX_LIMIT);
|
|
43
|
+
|
|
44
|
+
const args = ['log', '--no-color', `-n`, String(count)];
|
|
45
|
+
if (oneline) {
|
|
46
|
+
args.push('--oneline');
|
|
47
|
+
} else {
|
|
48
|
+
// Human-readable multi-line format: hash | author | date | subject
|
|
49
|
+
args.push('--pretty=format:commit %H%nauthor: %an <%ae>%ndate: %ad%n%n %s%n%b%n---');
|
|
50
|
+
args.push('--date=short');
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const result = await runGit(args);
|
|
54
|
+
if (!result.ok) return result.error;
|
|
55
|
+
|
|
56
|
+
const output = result.output.trim();
|
|
57
|
+
if (!output) return 'No commits found in this repository.';
|
|
58
|
+
|
|
59
|
+
return output;
|
|
60
|
+
}
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { execFile } from 'node:child_process';
|
|
2
|
+
import { promisify } from 'node:util';
|
|
3
|
+
|
|
4
|
+
const execFileAsync = promisify(execFile);
|
|
5
|
+
|
|
6
|
+
const GIT_TIMEOUT_MS = 10_000;
|
|
7
|
+
const GIT_MAX_BUFFER = 1024 * 1024; // 1 MB raw output cap
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Runs a git command safely using execFile (no shell — args are an array).
|
|
11
|
+
* @param {string[]} args Git sub-command and flags.
|
|
12
|
+
* @param {string} cwd Working directory.
|
|
13
|
+
* @returns {Promise<{ok: boolean, output?: string, error?: string}>}
|
|
14
|
+
*/
|
|
15
|
+
async function runGit(args, cwd = process.cwd()) {
|
|
16
|
+
try {
|
|
17
|
+
const { stdout } = await execFileAsync('git', args, {
|
|
18
|
+
cwd,
|
|
19
|
+
timeout: GIT_TIMEOUT_MS,
|
|
20
|
+
maxBuffer: GIT_MAX_BUFFER,
|
|
21
|
+
});
|
|
22
|
+
return { ok: true, output: stdout };
|
|
23
|
+
} catch (err) {
|
|
24
|
+
if (err.code === 'ENOENT') {
|
|
25
|
+
return { ok: false, error: 'Error: Git is not installed or not found in PATH.' };
|
|
26
|
+
}
|
|
27
|
+
const msg = (err.stderr || err.message || '').trim();
|
|
28
|
+
return { ok: false, error: `Error: ${msg || 'Unknown git error.'}` };
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Porcelain v1 two-character status codes → human description
|
|
33
|
+
const STATUS_LABELS = {
|
|
34
|
+
M: 'modified',
|
|
35
|
+
A: 'added',
|
|
36
|
+
D: 'deleted',
|
|
37
|
+
R: 'renamed',
|
|
38
|
+
C: 'copied',
|
|
39
|
+
U: 'unmerged',
|
|
40
|
+
'?': 'untracked',
|
|
41
|
+
'!': 'ignored',
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Parses `git status --porcelain` output into grouped sections.
|
|
46
|
+
* @param {string} raw Raw porcelain output.
|
|
47
|
+
* @returns {string} Formatted status string.
|
|
48
|
+
*/
|
|
49
|
+
function parsePorcelain(raw) {
|
|
50
|
+
if (!raw.trim()) return 'Working tree is clean. Nothing to report.';
|
|
51
|
+
|
|
52
|
+
const staged = [];
|
|
53
|
+
const unstaged = [];
|
|
54
|
+
const untracked = [];
|
|
55
|
+
|
|
56
|
+
for (const line of raw.split('\n')) {
|
|
57
|
+
if (!line) continue;
|
|
58
|
+
|
|
59
|
+
const x = line[0]; // index (staged) status
|
|
60
|
+
const y = line[1]; // worktree (unstaged) status
|
|
61
|
+
const file = line.slice(3);
|
|
62
|
+
|
|
63
|
+
if (x === '?' && y === '?') {
|
|
64
|
+
untracked.push(file);
|
|
65
|
+
continue;
|
|
66
|
+
}
|
|
67
|
+
if (x !== ' ' && x !== '?') {
|
|
68
|
+
staged.push(` ${STATUS_LABELS[x] ?? x} ${file}`);
|
|
69
|
+
}
|
|
70
|
+
if (y !== ' ' && y !== '?') {
|
|
71
|
+
unstaged.push(` ${STATUS_LABELS[y] ?? y} ${file}`);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const sections = [];
|
|
76
|
+
if (staged.length) sections.push(`Staged changes:\n${staged.join('\n')}`);
|
|
77
|
+
if (unstaged.length) sections.push(`Unstaged changes:\n${unstaged.join('\n')}`);
|
|
78
|
+
if (untracked.length) sections.push(`Untracked files:\n${untracked.map(f => ` ${f}`).join('\n')}`);
|
|
79
|
+
|
|
80
|
+
return sections.join('\n\n');
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* gitStatus tool: Shows the working tree status grouped into staged, unstaged, and untracked sections.
|
|
85
|
+
* No parameters — always operates on process.cwd().
|
|
86
|
+
*/
|
|
87
|
+
export default async function gitStatus() {
|
|
88
|
+
const result = await runGit(['status', '--porcelain']);
|
|
89
|
+
if (!result.ok) return result.error;
|
|
90
|
+
|
|
91
|
+
return parsePorcelain(result.output);
|
|
92
|
+
}
|
|
@@ -10,6 +10,9 @@ import findFile from './findFile.js';
|
|
|
10
10
|
import findDir from './findDir.js';
|
|
11
11
|
import askUser from './askUser.js';
|
|
12
12
|
import askConfirm from './askConfirm.js';
|
|
13
|
+
import gitDiff from './gitDiff.js';
|
|
14
|
+
import gitLog from './gitLog.js';
|
|
15
|
+
import gitStatus from './gitStatus.js';
|
|
13
16
|
|
|
14
17
|
export const toolDefinitions = [
|
|
15
18
|
{
|
|
@@ -189,6 +192,49 @@ export const toolDefinitions = [
|
|
|
189
192
|
required: ["question"]
|
|
190
193
|
}
|
|
191
194
|
}
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
type: "function",
|
|
198
|
+
function: {
|
|
199
|
+
name: "gitDiff",
|
|
200
|
+
description: "Show file changes as a unified diff. Use staged:true to see staged changes, or omit for unstaged changes. Optionally limit to a single file.",
|
|
201
|
+
parameters: {
|
|
202
|
+
type: "object",
|
|
203
|
+
properties: {
|
|
204
|
+
staged: { type: "boolean", description: "If true, shows staged (index) changes. Default: false (unstaged)." },
|
|
205
|
+
filePath: { type: "string", description: "Limit diff to this specific file path (optional)." },
|
|
206
|
+
maxLines: { type: "number", description: "Maximum lines of diff output to return (default: 300, max: 500)." }
|
|
207
|
+
},
|
|
208
|
+
required: []
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
},
|
|
212
|
+
{
|
|
213
|
+
type: "function",
|
|
214
|
+
function: {
|
|
215
|
+
name: "gitLog",
|
|
216
|
+
description: "Show recent commit history. Returns commit hash, author, date, and message.",
|
|
217
|
+
parameters: {
|
|
218
|
+
type: "object",
|
|
219
|
+
properties: {
|
|
220
|
+
limit: { type: "number", description: "Number of commits to retrieve (default: 10, max: 50)." },
|
|
221
|
+
oneline: { type: "boolean", description: "If true, shows each commit as a compact single line. Default: false." }
|
|
222
|
+
},
|
|
223
|
+
required: []
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
},
|
|
227
|
+
{
|
|
228
|
+
type: "function",
|
|
229
|
+
function: {
|
|
230
|
+
name: "gitStatus",
|
|
231
|
+
description: "Show the working tree status grouped into staged changes, unstaged changes, and untracked files.",
|
|
232
|
+
parameters: {
|
|
233
|
+
type: "object",
|
|
234
|
+
properties: {},
|
|
235
|
+
required: []
|
|
236
|
+
}
|
|
237
|
+
}
|
|
192
238
|
}
|
|
193
239
|
];
|
|
194
240
|
|
|
@@ -204,7 +250,10 @@ const toolImplementations = {
|
|
|
204
250
|
findFile,
|
|
205
251
|
findDir,
|
|
206
252
|
askUser,
|
|
207
|
-
askConfirm
|
|
253
|
+
askConfirm,
|
|
254
|
+
gitDiff,
|
|
255
|
+
gitLog,
|
|
256
|
+
gitStatus,
|
|
208
257
|
};
|
|
209
258
|
|
|
210
259
|
/**
|
|
@@ -75,6 +75,24 @@ export default async function listTools() {
|
|
|
75
75
|
usage: "askConfirm({ question: 'Should I proceed with deleting this file?' })",
|
|
76
76
|
description: "Asks the user a yes/no question using an interactive toggle. Returns { confirmed: true } for yes, { confirmed: false } for no, or { confirmed: false, dismissed: true } on cancellation.",
|
|
77
77
|
whenToUse: "When only a boolean decision is needed from the user. Prefer this over askUser for simple yes/no choices."
|
|
78
|
+
},
|
|
79
|
+
{
|
|
80
|
+
name: "gitDiff",
|
|
81
|
+
usage: "gitDiff({ staged: false, filePath: 'src/main.js', maxLines: 300 })",
|
|
82
|
+
description: "Shows file changes as a unified diff. Defaults to unstaged changes. Pass staged:true for the staging area. Optionally limit to a single validated file path.",
|
|
83
|
+
whenToUse: "To review what has changed in the working tree or staging area before a commit, or to analyse a specific file's modifications."
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
name: "gitLog",
|
|
87
|
+
usage: "gitLog({ limit: 10, oneline: false })",
|
|
88
|
+
description: "Returns recent commit history with hash, author, date, and message. Use oneline:true for a compact summary.",
|
|
89
|
+
whenToUse: "To understand the project's commit history, identify when a change was introduced, or generate a changelog summary."
|
|
90
|
+
},
|
|
91
|
+
{
|
|
92
|
+
name: "gitStatus",
|
|
93
|
+
usage: "gitStatus({})",
|
|
94
|
+
description: "Shows the working tree status grouped into staged changes, unstaged changes, and untracked files.",
|
|
95
|
+
whenToUse: "To get a quick overview of the current repository state before reviewing diffs or logs."
|
|
78
96
|
}
|
|
79
97
|
];
|
|
80
98
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { lookup } from 'node:dns/promises';
|
|
2
2
|
import { URL } from 'node:url';
|
|
3
3
|
import ipaddr from 'ipaddr.js';
|
|
4
|
+
import { parse } from 'node-html-parser';
|
|
4
5
|
|
|
5
6
|
const MAX_CHARS = 15000;
|
|
6
7
|
const TIMEOUT_MS = 10000;
|
|
@@ -117,16 +118,12 @@ export default async function webFetch({ url }) {
|
|
|
117
118
|
|
|
118
119
|
// 5. HTML Stripping (Aggressive for context awareness)
|
|
119
120
|
if (contentType.includes('text/html')) {
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
.replace(/<aside\b[^<]*(?:(?!<\/aside>)<[^<]*)*<\/aside>/gi, '')
|
|
127
|
-
.replace(/<[^>]+>/g, ' ')
|
|
128
|
-
.replace(/\s+/g, ' ')
|
|
129
|
-
.trim();
|
|
121
|
+
const root = parse(text);
|
|
122
|
+
root.querySelectorAll('script, style, nav, header, footer, aside').forEach(el => el.remove());
|
|
123
|
+
text = root.textContent;
|
|
124
|
+
text = text.replace(/<!DOCTYPE\b[^>]*>/gi, '');
|
|
125
|
+
text = text.replace(/<\?xml\b[^>]*>/gi, '');
|
|
126
|
+
text = text.replace(/\s+/g, ' ').trim();
|
|
130
127
|
} else if (contentType.includes('application/json')) {
|
|
131
128
|
try {
|
|
132
129
|
text = JSON.stringify(JSON.parse(text), null, 2);
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import { parse } from 'node-html-parser';
|
|
2
|
+
|
|
1
3
|
const TIMEOUT_MS = 10000;
|
|
2
4
|
const MAX_RESULTS_HARD_LIMIT = 20;
|
|
3
5
|
|
|
@@ -38,23 +40,20 @@ export default async function webSearch({ query, limit = 5 }) {
|
|
|
38
40
|
const html = await response.text();
|
|
39
41
|
const results = [];
|
|
40
42
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
const blocks = html.split('class="links_main links_deep result__body"').slice(1);
|
|
43
|
+
const root = parse(html);
|
|
44
|
+
const resultNodes = root.querySelectorAll('.result__body');
|
|
44
45
|
|
|
45
|
-
for (const
|
|
46
|
+
for (const node of resultNodes) {
|
|
46
47
|
if (results.length >= maxLimit) break;
|
|
47
48
|
|
|
48
|
-
const
|
|
49
|
-
const
|
|
49
|
+
const titleEl = node.querySelector('.result__title a');
|
|
50
|
+
const snippetEl = node.querySelector('.result__snippet');
|
|
50
51
|
|
|
51
|
-
if (
|
|
52
|
-
|
|
53
|
-
const
|
|
54
|
-
const snippet = snippetMatch[2].replace(/<[^>]+>/g, '').trim();
|
|
52
|
+
if (titleEl && snippetEl) {
|
|
53
|
+
const title = titleEl.textContent.replace(/\s+/g, ' ').trim();
|
|
54
|
+
const snippet = snippetEl.textContent.replace(/\s+/g, ' ').trim();
|
|
55
55
|
|
|
56
|
-
|
|
57
|
-
let url = snippetMatch[1];
|
|
56
|
+
let url = titleEl.getAttribute('href') || '';
|
|
58
57
|
if (url.startsWith('//duckduckgo.com/l/?uddg=')) {
|
|
59
58
|
url = decodeURIComponent(url.split('uddg=')[1].split('&')[0]);
|
|
60
59
|
}
|