ddg-search 2026.2.15-1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 camo@hiddendj.com
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,78 @@
1
+ # ddg-search
2
+
3
+ DuckDuckGo HTML search scraper with multiple output formats. Provides a CLI and small library helpers to fetch result pages, handle pagination, and emit OpenSearch-style structured data.
4
+
5
+ ## Requirements
6
+ - Node.js 22 or newer
7
+
8
+ ## Installation
9
+ - Global CLI (npm): `npm install -g ddg-search`
10
+ - One-off run (npx): `npx ddg-search --help`
11
+ - Project dependency: `npm install ddg-search`
12
+ - Local dev from this repo: `pnpm install` then `pnpm link --global` or `pnpm install -g .` (enable via `corepack enable` if needed)
13
+
14
+ ## CLI usage
15
+ ```
16
+ Usage: ddg-search [options] <query>
17
+
18
+ Search DuckDuckGo and output results in structured formats.
19
+
20
+ Options:
21
+ -f, --format <fmt> Output format (default: json). See formats below.
22
+ -p, --pages <n> Maximum pages to scrape, 0 for unlimited (default: 5)
23
+ -r, --region <code> Region code, e.g. us-en, uk-en (default: all regions)
24
+ -t, --time <range> Time filter: d (day), w (week), m (month), y (year)
25
+ -h, --help Show this help message
26
+
27
+ Formats:
28
+ json OpenSearch 1.1 response conventions in JSON
29
+ jsonl One JSON object per result line (streaming-friendly)
30
+ csv CSV with headers
31
+ opensearch OpenSearch 1.1 Atom XML
32
+ markdown Numbered markdown list (LLM-friendly)
33
+ compact Minimal token format for LLM context windows
34
+
35
+ Results are written to stdout; progress is written to stderr.
36
+ ```
37
+
38
+ ## Examples
39
+ - `ddg-search "node.js tutorial"`
40
+ - `ddg-search -f csv -p 3 "linux kernel"`
41
+ - `ddg-search -f opensearch "rust programming" > results.xml`
42
+ - `ddg-search -f compact "api docs" | llm "summarize these results"`
43
+ - `ddg-search -p 0 "scrape everything"`
44
+ - `ddg-search -r us-en -t w "recent news"`
45
+ - `ddg-search "rust programming" | jq '.items[].link'`
46
+
47
+ ## Programmatic usage
48
+ ```js
49
+ import { search, formatJson } from 'ddg-search';
50
+
51
+ const { results, spelling, zeroClick } = await search('rust programming', {
52
+ maxPages: 2,
53
+ region: 'us-en',
54
+ time: 'w',
55
+ });
56
+
57
+ // Convert to OpenSearch-style JSON
58
+ const output = formatJson({ results, spelling, zeroClick });
59
+ console.log(output);
60
+ ```
61
+ Exports also include `fetchPage`, `parsePage`, and formatters like `formatCsv`, `formatJsonl`, `formatMarkdown`, `formatOpenSearch`, and `formatCompact`.
62
+
63
+ ## Notes
64
+ - DuckDuckGo may present bot-detection. The scraper stops early and returns collected results if that happens.
65
+ - Respect site terms of use and rate-limit your requests; `search()` inserts random delays between pages by default.
66
+
67
+ ## Development
68
+ - Run tests: `pnpm test`
69
+ - Coverage: `pnpm run coverage`
70
+ - Lint: `pnpm run lint`
71
+ - Format check: `pnpm run format`; auto-fix: `pnpm run format:write`
72
+
73
+ ## Links
74
+ - npm: https://www.npmjs.com/package/ddg-search
75
+ - GitHub: https://github.com/camohiddendj/ddg-search
76
+
77
+ ## Contributing
78
+ Please see [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines on issues and pull requests.
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env node
2
+ import { main } from '../src/index.js';
3
+
4
+ main();
package/package.json ADDED
@@ -0,0 +1,52 @@
1
+ {
2
+ "name": "ddg-search",
3
+ "version": "2026.2.15-1",
4
+ "description": "DuckDuckGo HTML search scraper with multiple output formats",
5
+ "license": "MIT",
6
+ "repository": {
7
+ "type": "git",
8
+ "url": "https://github.com/camohiddendj/ddg-search.git"
9
+ },
10
+ "bugs": {
11
+ "url": "https://github.com/camohiddendj/ddg-search/issues"
12
+ },
13
+ "homepage": "https://github.com/camohiddendj/ddg-search#readme",
14
+ "type": "module",
15
+ "main": "src/index.js",
16
+ "files": [
17
+ "bin",
18
+ "src",
19
+ "README.md",
20
+ "LICENSE"
21
+ ],
22
+ "bin": {
23
+ "ddg-search": "bin/ddg-search.js"
24
+ },
25
+ "scripts": {
26
+ "test": "node --test",
27
+ "coverage": "c8 node --test",
28
+ "lint": "eslint .",
29
+ "format": "prettier --check .",
30
+ "format:write": "prettier --write ."
31
+ },
32
+ "engines": {
33
+ "node": ">=22"
34
+ },
35
+ "keywords": [
36
+ "duckduckgo",
37
+ "search",
38
+ "cli",
39
+ "scraper",
40
+ "opensearch"
41
+ ],
42
+ "dependencies": {
43
+ "cheerio": "^1.2.0"
44
+ },
45
+ "devDependencies": {
46
+ "c8": "^9.1.0",
47
+ "eslint": "^8.57.1",
48
+ "eslint-config-prettier": "^9.1.0",
49
+ "prettier": "^3.3.3"
50
+ },
51
+ "packageManager": "pnpm@10.29.3+sha512.498e1fb4cca5aa06c1dcf2611e6fafc50972ffe7189998c409e90de74566444298ffe43e6cd2acdc775ba1aa7cc5e092a8b7054c811ba8c5770f84693d33d2dc"
52
+ }
package/src/args.js ADDED
@@ -0,0 +1,57 @@
1
+ import { parseArgs } from 'node:util';
2
+ import { usage } from './usage.js';
3
+
4
+ const SUPPORTED_FORMATS = ['json', 'jsonl', 'csv', 'opensearch', 'markdown', 'compact'];
5
+
6
+ export function parseCliArgs(argv = process.argv.slice(2), exitFn = process.exit) {
7
+ let parsed;
8
+ try {
9
+ parsed = parseArgs({
10
+ allowPositionals: true,
11
+ args: argv,
12
+ options: {
13
+ format: { type: 'string', short: 'f', default: 'json' },
14
+ pages: { type: 'string', short: 'p' },
15
+ region: { type: 'string', short: 'r' },
16
+ time: { type: 'string', short: 't' },
17
+ help: { type: 'boolean', short: 'h', default: false },
18
+ },
19
+ });
20
+ } catch (e) {
21
+ console.error(`Error: ${e.message}`);
22
+ exitFn(1);
23
+ return { query: '', maxPages: 0, format: 'json', region: '', time: '' };
24
+ }
25
+
26
+ const { values, positionals } = parsed;
27
+
28
+ if (values.help || positionals.length === 0) {
29
+ usage(exitFn);
30
+ }
31
+
32
+ const query = positionals.join(' ');
33
+ const parsedPages = values.pages != null ? parseInt(values.pages, 10) : 5;
34
+ const maxPages = parsedPages === 0 ? Infinity : parsedPages;
35
+ const format = values.format;
36
+ const region = values.region || '';
37
+ const time = values.time || '';
38
+
39
+ if (!SUPPORTED_FORMATS.includes(format)) {
40
+ console.error(`Unknown format: ${format}. Supported: ${SUPPORTED_FORMATS.join(', ')}`);
41
+ exitFn(1);
42
+ }
43
+
44
+ if (Number.isNaN(parsedPages) || parsedPages < 0) {
45
+ console.error('--pages must be a non-negative integer (0 for unlimited)');
46
+ exitFn(1);
47
+ }
48
+
49
+ if (time && !['d', 'w', 'm', 'y'].includes(time)) {
50
+ console.error('Unknown time range: d, w, m, y');
51
+ exitFn(1);
52
+ }
53
+
54
+ return { query, maxPages, format, region, time };
55
+ }
56
+
57
+ export { usage };
package/src/cli.js ADDED
@@ -0,0 +1,50 @@
1
+ import { parseCliArgs } from './args.js';
2
+ import {
3
+ formatCompact,
4
+ formatCsv,
5
+ formatJson,
6
+ formatJsonl,
7
+ formatMarkdown,
8
+ formatOpenSearch,
9
+ } from './formatters.js';
10
+ import { search } from './search.js';
11
+
12
+ export async function main(
13
+ argv = process.argv.slice(2),
14
+ { searchImpl = search, stdout = process.stdout, exit = process.exit } = {},
15
+ ) {
16
+ const { query, maxPages, format, region, time } = parseCliArgs(argv, exit);
17
+
18
+ try {
19
+ const data = await searchImpl(query, { maxPages, region, time });
20
+
21
+ let output;
22
+ switch (format) {
23
+ case 'json':
24
+ output = formatJson(data);
25
+ break;
26
+ case 'jsonl':
27
+ output = formatJsonl(data);
28
+ break;
29
+ case 'csv':
30
+ output = formatCsv(data);
31
+ break;
32
+ case 'opensearch':
33
+ output = formatOpenSearch(data);
34
+ break;
35
+ case 'markdown':
36
+ output = formatMarkdown(data);
37
+ break;
38
+ case 'compact':
39
+ output = formatCompact(data);
40
+ break;
41
+ default:
42
+ throw new Error(`Unsupported format: ${format}`);
43
+ }
44
+
45
+ stdout.write(output + '\n');
46
+ } catch (err) {
47
+ console.error(`Error: ${err.message}`);
48
+ exit(1);
49
+ }
50
+ }
@@ -0,0 +1,4 @@
1
+ export const USER_AGENT =
2
+ 'Mozilla/5.0 (U; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.93 Safari/537.36';
3
+
4
+ export const BASE_URL = 'https://html.duckduckgo.com/html/';
@@ -0,0 +1,169 @@
1
+ import { BASE_URL } from './constants.js';
2
+
3
+ export function escapeCsv(str) {
4
+ if (str.includes('"') || str.includes(',') || str.includes('\n')) {
5
+ return '"' + str.replace(/"/g, '""') + '"';
6
+ }
7
+ return str;
8
+ }
9
+
10
+ export function escapeXml(str) {
11
+ return str
12
+ .replace(/&/g, '&amp;')
13
+ .replace(/</g, '&lt;')
14
+ .replace(/>/g, '&gt;')
15
+ .replace(/"/g, '&quot;')
16
+ .replace(/'/g, '&apos;');
17
+ }
18
+
19
+ export function formatJson(data) {
20
+ const output = {
21
+ 'opensearch:totalResults': data.results.length,
22
+ 'opensearch:startIndex': 1,
23
+ 'opensearch:itemsPerPage': data.results.length,
24
+ 'opensearch:Query': {
25
+ role: 'request',
26
+ searchTerms: data.query,
27
+ },
28
+ pagesScraped: data.pagesScraped,
29
+ };
30
+
31
+ if (data.spelling) {
32
+ output.spelling = data.spelling;
33
+ }
34
+
35
+ if (data.zeroClick) {
36
+ output.zeroClick = data.zeroClick;
37
+ }
38
+
39
+ output.items = data.results.map((r, i) => ({
40
+ position: i + 1,
41
+ title: r.title,
42
+ link: r.url,
43
+ description: r.description,
44
+ displayUrl: r.displayUrl,
45
+ }));
46
+
47
+ return JSON.stringify(output, null, 2);
48
+ }
49
+
50
+ export function formatJsonl(data) {
51
+ const lines = [];
52
+ if (data.zeroClick) {
53
+ lines.push(JSON.stringify({ type: 'zeroClick', ...data.zeroClick }));
54
+ }
55
+ lines.push(
56
+ ...data.results.map((r, i) =>
57
+ JSON.stringify({
58
+ position: i + 1,
59
+ title: r.title,
60
+ link: r.url,
61
+ description: r.description,
62
+ }),
63
+ ),
64
+ );
65
+ return lines.join('\n');
66
+ }
67
+
68
+ export function formatCsv(data) {
69
+ const lines = ['position,title,link,description'];
70
+ for (let i = 0; i < data.results.length; i++) {
71
+ const r = data.results[i];
72
+ lines.push([i + 1, escapeCsv(r.title), escapeCsv(r.url), escapeCsv(r.description)].join(','));
73
+ }
74
+ return lines.join('\n');
75
+ }
76
+
77
+ export function formatOpenSearch(data) {
78
+ const now = new Date().toISOString();
79
+ const searchUrl = `${BASE_URL}?q=${encodeURIComponent(data.query)}`;
80
+
81
+ let xml = '<?xml version="1.0" encoding="UTF-8"?>\n';
82
+ xml += '<feed xmlns="http://www.w3.org/2005/Atom"\n';
83
+ xml += ' xmlns:opensearch="http://a9.com/-/spec/opensearch/1.1/">\n';
84
+ xml += ` <title>DuckDuckGo: ${escapeXml(data.query)}</title>\n`;
85
+ xml += ` <link href="${escapeXml(searchUrl)}"/>\n`;
86
+ xml += ` <updated>${now}</updated>\n`;
87
+ xml += ` <id>${escapeXml(searchUrl)}</id>\n`;
88
+ xml += ` <opensearch:totalResults>${data.results.length}</opensearch:totalResults>\n`;
89
+ xml += ' <opensearch:startIndex>1</opensearch:startIndex>\n';
90
+ xml += ` <opensearch:itemsPerPage>${data.results.length}</opensearch:itemsPerPage>\n`;
91
+ xml += ` <opensearch:Query role="request" searchTerms="${escapeXml(data.query)}"/>\n`;
92
+
93
+ if (data.zeroClick) {
94
+ const zc = data.zeroClick;
95
+ xml += ' <entry>\n';
96
+ xml += ` <title type="text">${escapeXml(zc.heading)}</title>\n`;
97
+ xml += ` <link href="${escapeXml(zc.url)}"/>\n`;
98
+ xml += ` <id>${escapeXml(zc.url)}</id>\n`;
99
+ xml += ` <summary>${escapeXml(zc.abstract)}</summary>\n`;
100
+ xml += ' <category term="zeroClick"/>\n';
101
+ xml += ' </entry>\n';
102
+ }
103
+
104
+ for (const r of data.results) {
105
+ xml += ' <entry>\n';
106
+ xml += ` <title>${escapeXml(r.title)}</title>\n`;
107
+ xml += ` <link href="${escapeXml(r.url)}"/>\n`;
108
+ xml += ` <id>${escapeXml(r.url)}</id>\n`;
109
+ xml += ` <summary>${escapeXml(r.description)}</summary>\n`;
110
+ xml += ' </entry>\n';
111
+ }
112
+
113
+ xml += '</feed>';
114
+ return xml;
115
+ }
116
+
117
+ export function formatMarkdown(data) {
118
+ const lines = [];
119
+ lines.push(`# Search: ${data.query}`);
120
+ lines.push(`${data.results.length} results from ${data.pagesScraped} page(s)\n`);
121
+
122
+ if (data.spelling) {
123
+ lines.push(`> **Did you mean:** ${data.spelling.corrected}\n`);
124
+ }
125
+
126
+ if (data.zeroClick) {
127
+ const zc = data.zeroClick;
128
+ lines.push(`> **${zc.heading}** — ${zc.abstract}`);
129
+ const suffix = zc.source ? ` (${zc.source})` : '';
130
+ lines.push(`> [Read more](${zc.url})${suffix}\n`);
131
+ }
132
+
133
+ for (let i = 0; i < data.results.length; i++) {
134
+ const r = data.results[i];
135
+ lines.push(`${i + 1}. [${r.title}](${r.url})`);
136
+ if (r.description) {
137
+ lines.push(` ${r.description}`);
138
+ }
139
+ lines.push('');
140
+ }
141
+
142
+ return lines.join('\n');
143
+ }
144
+
145
+ export function formatCompact(data) {
146
+ const lines = [];
147
+ lines.push(`query: ${data.query}`);
148
+ lines.push(`results: ${data.results.length}`);
149
+ if (data.spelling) {
150
+ lines.push(`did_you_mean: ${data.spelling.corrected}`);
151
+ }
152
+ if (data.zeroClick) {
153
+ lines.push(`zero_click: ${data.zeroClick.heading}`);
154
+ lines.push(` ${data.zeroClick.url}`);
155
+ lines.push(` ${data.zeroClick.abstract}`);
156
+ }
157
+ lines.push('---');
158
+
159
+ for (let i = 0; i < data.results.length; i++) {
160
+ const r = data.results[i];
161
+ lines.push(`[${i + 1}] ${r.title}`);
162
+ lines.push(` ${r.url}`);
163
+ if (r.description) {
164
+ lines.push(` ${r.description}`);
165
+ }
166
+ }
167
+
168
+ return lines.join('\n');
169
+ }
package/src/index.js ADDED
@@ -0,0 +1,16 @@
1
+ export { BASE_URL, USER_AGENT } from './constants.js';
2
+ export { usage } from './usage.js';
3
+ export { parseCliArgs } from './args.js';
4
+ export { parsePage, isBotDetection } from './parser.js';
5
+ export { fetchPage, randomDelay, search } from './search.js';
6
+ export {
7
+ escapeCsv,
8
+ escapeXml,
9
+ formatCompact,
10
+ formatCsv,
11
+ formatJson,
12
+ formatJsonl,
13
+ formatMarkdown,
14
+ formatOpenSearch,
15
+ } from './formatters.js';
16
+ export { main } from './cli.js';
package/src/parser.js ADDED
@@ -0,0 +1,87 @@
1
+ import * as cheerio from 'cheerio';
2
+
3
+ export function isBotDetection(html) {
4
+ return html.includes('anomaly-modal') || html.includes('challenge-form');
5
+ }
6
+
7
+ export function parsePage(html) {
8
+ const $ = cheerio.load(html);
9
+
10
+ let spelling = null;
11
+ const didYouMean = $('#did_you_mean');
12
+ if (didYouMean.length) {
13
+ const links = didYouMean.find('a');
14
+ const correctedLink = links.first();
15
+ if (correctedLink.length) {
16
+ spelling = { corrected: correctedLink.text().trim() };
17
+ const originalLink = links.eq(1);
18
+ if (originalLink.length) {
19
+ spelling.original = originalLink.text().trim().replace(/^"|"$/g, '');
20
+ }
21
+ }
22
+ }
23
+
24
+ let zeroClick = null;
25
+ const zciEl = $('.zci-wrapper .zci');
26
+ if (zciEl.length) {
27
+ const headingAnchor = zciEl.find('.zci__heading a');
28
+ const abstractEl = zciEl.find('#zero_click_abstract');
29
+ const imageEl = abstractEl.find('.zci__image');
30
+ const sourceLink = abstractEl.find('a q');
31
+
32
+ const heading = headingAnchor.text().trim();
33
+ const url = headingAnchor.attr('href') || '';
34
+
35
+ const abstractClone = abstractEl.clone();
36
+ abstractClone.find('a').remove();
37
+ const abstract = abstractClone.text().trim();
38
+
39
+ if (heading) {
40
+ zeroClick = { heading, url, abstract };
41
+ const imageSrc = imageEl.attr('src');
42
+ if (imageSrc) zeroClick.image = imageSrc;
43
+ const sourceName = sourceLink.text().trim();
44
+ if (sourceName) zeroClick.source = sourceName;
45
+ }
46
+ }
47
+
48
+ const results = [];
49
+ $('.result.web-result')
50
+ .not('.result--ad')
51
+ .not('.result--no-result')
52
+ .each((_i, el) => {
53
+ const $el = $(el);
54
+ const titleEl = $el.find('.result__a');
55
+ const snippetEl = $el.find('.result__snippet');
56
+ const urlEl = $el.find('.result__url');
57
+
58
+ const title = titleEl.text().trim();
59
+ const url = titleEl.attr('href') || '';
60
+ const description = snippetEl.text().trim();
61
+ const displayUrl = urlEl.text().trim();
62
+
63
+ if (title && url) {
64
+ results.push({ title, url, description, displayUrl });
65
+ }
66
+ });
67
+
68
+ const noMoreResults = $('.result--no-result').length > 0;
69
+
70
+ let nextPageData = null;
71
+ $('.nav-link').each((_i, el) => {
72
+ const $form = $(el).find('form');
73
+ const submitBtn = $form.find('input[type="submit"]');
74
+ if (submitBtn.val() === 'Next') {
75
+ nextPageData = {};
76
+ $form.find('input[type="hidden"]').each((_j, input) => {
77
+ const $input = $(input);
78
+ const name = $input.attr('name');
79
+ if (name) {
80
+ nextPageData[name] = $input.attr('value') || '';
81
+ }
82
+ });
83
+ }
84
+ });
85
+
86
+ return { results, spelling, zeroClick, noMoreResults, nextPageData };
87
+ }
package/src/search.js ADDED
@@ -0,0 +1,84 @@
1
+ import { BASE_URL, USER_AGENT } from './constants.js';
2
+ import { isBotDetection, parsePage } from './parser.js';
3
+
4
+ export async function fetchPage(url, postData, fetchImpl = fetch) {
5
+ const opts = {
6
+ headers: { 'User-Agent': USER_AGENT },
7
+ };
8
+
9
+ if (postData) {
10
+ opts.method = 'POST';
11
+ opts.headers['Content-Type'] = 'application/x-www-form-urlencoded';
12
+ opts.body = new URLSearchParams(postData).toString();
13
+ }
14
+
15
+ const resp = await fetchImpl(url, opts);
16
+ if (!resp.ok) {
17
+ throw new Error(`HTTP ${resp.status} ${resp.statusText}`);
18
+ }
19
+ return resp.text();
20
+ }
21
+
22
+ export function randomDelay() {
23
+ const ms = 800 + Math.random() * 2100;
24
+ return new Promise((resolve) => setTimeout(resolve, ms));
25
+ }
26
+
27
+ export async function search(
28
+ query,
29
+ { maxPages, region, time, fetchImpl = fetch, delay = randomDelay, stderr = process.stderr },
30
+ ) {
31
+ const allResults = [];
32
+ let spelling = null;
33
+ let zeroClick = null;
34
+ let page = 0;
35
+ const showProgress = stderr.isTTY;
36
+
37
+ const params = new URLSearchParams({ q: query });
38
+ if (region) params.set('kl', region);
39
+ if (time) params.set('df', time);
40
+
41
+ const firstHtml = await fetchPage(`${BASE_URL}?${params}`, null, fetchImpl);
42
+
43
+ if (isBotDetection(firstHtml)) {
44
+ throw new Error('Anti-bot detection triggered on first request. Try again later.');
45
+ }
46
+
47
+ let parsed = parsePage(firstHtml);
48
+ allResults.push(...parsed.results);
49
+ spelling = parsed.spelling;
50
+ zeroClick = parsed.zeroClick;
51
+ page++;
52
+
53
+ if (showProgress) {
54
+ stderr.write(`\rPage ${page}: ${parsed.results.length} results (${allResults.length} total)`);
55
+ }
56
+
57
+ while (parsed.nextPageData && !parsed.noMoreResults && page < maxPages) {
58
+ await delay();
59
+
60
+ const html = await fetchPage(BASE_URL, parsed.nextPageData, fetchImpl);
61
+
62
+ if (isBotDetection(html)) {
63
+ if (showProgress) {
64
+ stderr.write('\n');
65
+ stderr.write('Anti-bot detection hit. Returning results collected so far.\n');
66
+ }
67
+ break;
68
+ }
69
+
70
+ parsed = parsePage(html);
71
+ allResults.push(...parsed.results);
72
+ page++;
73
+
74
+ if (showProgress) {
75
+ stderr.write(`\rPage ${page}: ${parsed.results.length} results (${allResults.length} total)`);
76
+ }
77
+ }
78
+
79
+ if (showProgress) {
80
+ stderr.write('\n');
81
+ }
82
+
83
+ return { results: allResults, spelling, zeroClick, pagesScraped: page, query };
84
+ }
package/src/usage.js ADDED
@@ -0,0 +1,33 @@
1
+ export function usage(exitFn = process.exit) {
2
+ const text = `Usage: ddg-search [options] <query>
3
+
4
+ Search DuckDuckGo and output results in structured formats.
5
+
6
+ Options:
7
+ -f, --format <fmt> Output format (default: json). See formats below.
8
+ -p, --pages <n> Maximum pages to scrape, 0 for unlimited (default: 5)
9
+ -r, --region <code> Region code, e.g. us-en, uk-en (default: all regions)
10
+ -t, --time <range> Time filter: d (day), w (week), m (month), y (year)
11
+ -h, --help Show this help message
12
+
13
+ Formats:
14
+ json OpenSearch 1.1 response conventions in JSON
15
+ jsonl One JSON object per result line (streaming-friendly)
16
+ csv CSV with headers
17
+ opensearch OpenSearch 1.1 Atom XML
18
+ markdown Numbered markdown list (AI/LLM-friendly)
19
+ compact Minimal token format for LLM context windows
20
+
21
+ Results are written to stdout; progress is written to stderr.
22
+
23
+ Examples:
24
+ ddg-search "node.js tutorial"
25
+ ddg-search -f csv -p 3 "linux kernel"
26
+ ddg-search -f opensearch "rust programming" > results.xml
27
+ ddg-search -f compact "api docs" | llm "summarize these results"
28
+ ddg-search -p 0 "scrape everything"
29
+ ddg-search -r us-en -t w "recent news"
30
+ ddg-search "rust programming" | jq '.items[].link'`;
31
+ console.error(text);
32
+ exitFn(1);
33
+ }