nothing-browser 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +375 -0
- package/package.json +56 -0
- package/piggy/cache/memory.ts +38 -0
- package/piggy/client/index.ts +330 -0
- package/piggy/human/index.ts +53 -0
- package/piggy/launch/detect.ts +33 -0
- package/piggy/launch/spawn.ts +101 -0
- package/piggy/logger/index.ts +59 -0
- package/piggy/open/index.ts +5 -0
- package/piggy/register/index.ts +316 -0
- package/piggy/server/index.ts +137 -0
- package/piggy.ts +135 -0
package/README.md
ADDED
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="nothing_browser_pig_pink.svg" width="160" alt="Nothing Browser logo"/>
|
|
3
|
+
</p>
|
|
4
|
+
|
|
5
|
+
<h1 align="center">nothing-browser</h1>
|
|
6
|
+
<p align="center"><em>Does nothing... except everything that matters.</em></p>
|
|
7
|
+
|
|
8
|
+
<p align="center">
|
|
9
|
+
<a href="https://www.npmjs.com/package/nothing-browser"><img src="https://img.shields.io/npm/v/nothing-browser" alt="npm version"/></a>
|
|
10
|
+
<a href="LICENSE"><img src="https://img.shields.io/github/license/BunElysiaReact/nothing-browser" alt="license"/></a>
|
|
11
|
+
<a href="https://github.com/BunElysiaReact/nothing-browser/releases"><img src="https://img.shields.io/github/v/release/BunElysiaReact/nothing-browser" alt="releases"/></a>
|
|
12
|
+
</p>
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
A scraper-first headless browser library powered by the Nothing Browser Qt6/Chromium engine. Control real browser tabs, intercept network traffic, spoof fingerprints, capture WebSockets — all from Bun + TypeScript.
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
## Requirements
|
|
21
|
+
|
|
22
|
+
- [Bun](https://bun.sh) ≥ 1.0
|
|
23
|
+
- Nothing Browser headless binary (see below)
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## Install
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
bun add nothing-browser
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Then download the **headless binary** for your platform from [GitHub Releases](https://github.com/BunElysiaReact/nothing-browser/releases) and place it in your project root.
|
|
34
|
+
|
|
35
|
+
**Linux**
|
|
36
|
+
```bash
|
|
37
|
+
tar -xzf nothing-browser-headless-*-linux-x86_64.tar.gz
|
|
38
|
+
chmod +x nothing-browser-headless
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
**Windows** — extract the `.zip`, place `nothing-browser-headless.exe` in project root.
|
|
42
|
+
|
|
43
|
+
**macOS** — extract the `.tar.gz`, place `nothing-browser-headless` in project root.
|
|
44
|
+
|
|
45
|
+
---
|
|
46
|
+
|
|
47
|
+
## Quick Start
|
|
48
|
+
|
|
49
|
+
```ts
|
|
50
|
+
import piggy from "nothing-browser";
|
|
51
|
+
|
|
52
|
+
await piggy.launch({ mode: "tab" });
|
|
53
|
+
await piggy.register("books", "https://books.toscrape.com");
|
|
54
|
+
|
|
55
|
+
await piggy.books.navigate();
|
|
56
|
+
await piggy.books.waitForSelector(".product_pod");
|
|
57
|
+
|
|
58
|
+
const books = await piggy.books.evaluate(() =>
|
|
59
|
+
Array.from(document.querySelectorAll(".product_pod")).map(el => ({
|
|
60
|
+
title: el.querySelector("h3 a")?.getAttribute("title") ?? "",
|
|
61
|
+
price: el.querySelector(".price_color")?.textContent?.trim() ?? "",
|
|
62
|
+
}))
|
|
63
|
+
);
|
|
64
|
+
|
|
65
|
+
console.log(books);
|
|
66
|
+
await piggy.close();
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
## Modes
|
|
72
|
+
|
|
73
|
+
### Tab mode (default)
|
|
74
|
+
All sites share one browser process, each in its own tab.
|
|
75
|
+
|
|
76
|
+
```ts
|
|
77
|
+
await piggy.launch({ mode: "tab" });
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Process mode
|
|
81
|
+
Each site gets its own browser process on a dedicated socket. More isolation, more RAM.
|
|
82
|
+
|
|
83
|
+
```ts
|
|
84
|
+
await piggy.launch({ mode: "process" });
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## Examples
|
|
90
|
+
|
|
91
|
+
### Scrape a site and expose it as an API
|
|
92
|
+
|
|
93
|
+
```ts
|
|
94
|
+
import piggy from "nothing-browser";
|
|
95
|
+
|
|
96
|
+
await piggy.launch({ mode: "tab" });
|
|
97
|
+
await piggy.register("books", "https://books.toscrape.com");
|
|
98
|
+
|
|
99
|
+
// Block ads/trackers before any navigation
|
|
100
|
+
await piggy.books.intercept.block("*google-analytics*");
|
|
101
|
+
await piggy.books.intercept.block("*doubleclick*");
|
|
102
|
+
await piggy.books.intercept.block("*facebook*");
|
|
103
|
+
|
|
104
|
+
piggy.books.api("/list", async (_params, query) => {
|
|
105
|
+
const page = query.page ? parseInt(query.page) : 1;
|
|
106
|
+
const url = page === 1
|
|
107
|
+
? "https://books.toscrape.com"
|
|
108
|
+
: `https://books.toscrape.com/catalogue/page-${page}.html`;
|
|
109
|
+
|
|
110
|
+
await piggy.books.navigate(url);
|
|
111
|
+
await piggy.books.waitForSelector(".product_pod", 10000);
|
|
112
|
+
|
|
113
|
+
const books = await piggy.books.evaluate(() => {
|
|
114
|
+
const ratingMap: Record<string, number> = {
|
|
115
|
+
One: 1, Two: 2, Three: 3, Four: 4, Five: 5,
|
|
116
|
+
};
|
|
117
|
+
return Array.from(document.querySelectorAll(".product_pod")).map(el => ({
|
|
118
|
+
title: el.querySelector("h3 a")?.getAttribute("title") ?? "",
|
|
119
|
+
price: el.querySelector(".price_color")?.textContent?.trim() ?? "",
|
|
120
|
+
rating: ratingMap[el.querySelector(".star-rating")?.className.replace("star-rating","").trim() ?? ""] ?? 0,
|
|
121
|
+
available: el.querySelector(".availability")?.textContent?.trim() ?? "",
|
|
122
|
+
}));
|
|
123
|
+
});
|
|
124
|
+
|
|
125
|
+
return { page, count: books.length, books };
|
|
126
|
+
}, { ttl: 300_000 });
|
|
127
|
+
|
|
128
|
+
piggy.books.noclose();
|
|
129
|
+
await piggy.serve(3000);
|
|
130
|
+
// GET http://localhost:3000/books/list
|
|
131
|
+
// GET http://localhost:3000/books/list?page=2
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
---
|
|
135
|
+
|
|
136
|
+
### Middleware — auth + logging
|
|
137
|
+
|
|
138
|
+
```ts
|
|
139
|
+
const logMiddleware = async ({ query, params }: any) => {
|
|
140
|
+
console.log("[middleware] incoming request", { params, query });
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
const authMiddleware = async ({ headers, set }: any) => {
|
|
144
|
+
const key = headers["x-api-key"];
|
|
145
|
+
if (!key || key !== "piggy-secret") {
|
|
146
|
+
set.status = 401;
|
|
147
|
+
throw new Error("Unauthorized: missing or invalid x-api-key");
|
|
148
|
+
}
|
|
149
|
+
};
|
|
150
|
+
|
|
151
|
+
piggy.books.api("/search", async (_params, query) => {
|
|
152
|
+
if (!query.q) return { error: "query param 'q' required" };
|
|
153
|
+
|
|
154
|
+
await piggy.books.navigate("https://books.toscrape.com");
|
|
155
|
+
await piggy.books.waitForSelector(".product_pod", 10000);
|
|
156
|
+
|
|
157
|
+
const books = await piggy.books.evaluate((q: string) =>
|
|
158
|
+
Array.from(document.querySelectorAll(".product_pod"))
|
|
159
|
+
.filter(el =>
|
|
160
|
+
el.querySelector("h3 a")?.getAttribute("title")?.toLowerCase().includes(q.toLowerCase())
|
|
161
|
+
)
|
|
162
|
+
.map(el => ({
|
|
163
|
+
title: el.querySelector("h3 a")?.getAttribute("title") ?? "",
|
|
164
|
+
price: el.querySelector(".price_color")?.textContent?.trim() ?? "",
|
|
165
|
+
}))
|
|
166
|
+
, query.q);
|
|
167
|
+
|
|
168
|
+
return { query: query.q, count: books.length, books };
|
|
169
|
+
}, { ttl: 120_000, before: [logMiddleware, authMiddleware] });
|
|
170
|
+
|
|
171
|
+
// curl -H 'x-api-key: piggy-secret' 'http://localhost:3000/books/search?q=light'
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
### Network capture
|
|
177
|
+
|
|
178
|
+
```ts
|
|
179
|
+
await piggy.books.capture.clear();
|
|
180
|
+
await piggy.books.capture.start();
|
|
181
|
+
await piggy.books.wait(300); // ensure capture is active before nav
|
|
182
|
+
|
|
183
|
+
await piggy.books.navigate("https://books.toscrape.com");
|
|
184
|
+
await piggy.books.waitForSelector("body", 10000);
|
|
185
|
+
await piggy.books.wait(2000); // let async XHR/fetch calls settle
|
|
186
|
+
|
|
187
|
+
await piggy.books.capture.stop();
|
|
188
|
+
|
|
189
|
+
const requests = await piggy.books.capture.requests();
|
|
190
|
+
const ws = await piggy.books.capture.ws();
|
|
191
|
+
const storage = await piggy.books.capture.storage();
|
|
192
|
+
const cookies = await piggy.books.capture.cookies();
|
|
193
|
+
|
|
194
|
+
console.log(`${requests.length} requests, ${ws.length} WS frames`);
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
### Session persistence
|
|
200
|
+
|
|
201
|
+
```ts
|
|
202
|
+
import { existsSync, readFileSync, writeFileSync } from "fs";
|
|
203
|
+
|
|
204
|
+
const SESSION_FILE = "./session.json";
|
|
205
|
+
|
|
206
|
+
// Restore on startup
|
|
207
|
+
if (existsSync(SESSION_FILE)) {
|
|
208
|
+
const saved = JSON.parse(readFileSync(SESSION_FILE, "utf8"));
|
|
209
|
+
await piggy.books.session.import(saved);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// Save on shutdown — always BEFORE piggy.close()
|
|
213
|
+
process.on("SIGINT", async () => {
|
|
214
|
+
const session = await piggy.books.session.export();
|
|
215
|
+
writeFileSync(SESSION_FILE, JSON.stringify(session, null, 2));
|
|
216
|
+
await piggy.close({ force: true });
|
|
217
|
+
process.exit(0);
|
|
218
|
+
});
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
---
|
|
222
|
+
|
|
223
|
+
### Human mode
|
|
224
|
+
|
|
225
|
+
Makes interactions look less robotic — random delays, simulated typos + self-correction.
|
|
226
|
+
|
|
227
|
+
```ts
|
|
228
|
+
piggy.actHuman(true);
|
|
229
|
+
|
|
230
|
+
await piggy.books.click(".product_pod h3 a");
|
|
231
|
+
await piggy.books.type("#search", "mystery novels");
|
|
232
|
+
await piggy.books.scroll.by(400);
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
Affects: `click`, `type`, `hover`, `scroll.by`, `wait`.
|
|
236
|
+
|
|
237
|
+
---
|
|
238
|
+
|
|
239
|
+
### Screenshot / PDF
|
|
240
|
+
|
|
241
|
+
```ts
|
|
242
|
+
// Save to disk
|
|
243
|
+
await piggy.books.screenshot("./out/page.png");
|
|
244
|
+
await piggy.books.pdf("./out/page.pdf");
|
|
245
|
+
|
|
246
|
+
// Or get base64
|
|
247
|
+
const b64 = await piggy.books.screenshot();
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
---
|
|
251
|
+
|
|
252
|
+
### Multi-site parallel scraping
|
|
253
|
+
|
|
254
|
+
```ts
|
|
255
|
+
await piggy.register("site1", "https://example.com");
|
|
256
|
+
await piggy.register("site2", "https://example.org");
|
|
257
|
+
|
|
258
|
+
// Same method on both sites in parallel
|
|
259
|
+
const titles = await piggy.all([piggy.site1, piggy.site2]).title();
|
|
260
|
+
|
|
261
|
+
// Keyed results by site name
|
|
262
|
+
const h1s = await piggy.diff([piggy.site1, piggy.site2]).fetchText("h1");
|
|
263
|
+
// → { site1: "...", site2: "..." }
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
---
|
|
267
|
+
|
|
268
|
+
## API Reference
|
|
269
|
+
|
|
270
|
+
### `piggy.launch(opts?)`
|
|
271
|
+
|
|
272
|
+
| Option | Type | Default |
|
|
273
|
+
|--------|------|---------|
|
|
274
|
+
| `mode` | `"tab" \| "process"` | `"tab"` |
|
|
275
|
+
|
|
276
|
+
### `piggy.register(name, url)`
|
|
277
|
+
Registers a site. Accessible as `piggy.<name>` after registration.
|
|
278
|
+
|
|
279
|
+
### `piggy.actHuman(enable)`
|
|
280
|
+
Toggles human-like interaction timing globally.
|
|
281
|
+
|
|
282
|
+
### `piggy.serve(port, opts?)`
|
|
283
|
+
Starts the Elysia HTTP server. Built-in routes: `GET /health`, `GET /cache/keys`, `DELETE /cache`.
|
|
284
|
+
|
|
285
|
+
### `piggy.routes()`
|
|
286
|
+
Returns all registered API routes with method, path, TTL, and middleware count.
|
|
287
|
+
|
|
288
|
+
### `piggy.close(opts?)`
|
|
289
|
+
|
|
290
|
+
```ts
|
|
291
|
+
await piggy.close(); // graceful — respects noclose()
|
|
292
|
+
await piggy.close({ force: true }); // kills everything immediately
|
|
293
|
+
```
|
|
294
|
+
|
|
295
|
+
### Site methods
|
|
296
|
+
|
|
297
|
+
#### Navigation
|
|
298
|
+
```ts
|
|
299
|
+
site.navigate(url?)
|
|
300
|
+
site.reload() / site.goBack() / site.goForward()
|
|
301
|
+
site.waitForNavigation()
|
|
302
|
+
site.waitForSelector(selector, timeout?)
|
|
303
|
+
site.waitForResponse(urlPattern, timeout?)
|
|
304
|
+
site.title() / site.url() / site.content()
|
|
305
|
+
site.wait(ms)
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
#### Interactions
|
|
309
|
+
```ts
|
|
310
|
+
site.click(selector, opts?)
|
|
311
|
+
site.doubleClick(selector) / site.hover(selector)
|
|
312
|
+
site.type(selector, text, opts?) // opts: { delay?, wpm?, fact? }
|
|
313
|
+
site.select(selector, value)
|
|
314
|
+
site.keyboard.press(key)
|
|
315
|
+
site.keyboard.combo(combo) // e.g. "Ctrl+A"
|
|
316
|
+
site.mouse.move(x, y)
|
|
317
|
+
site.mouse.drag(from, to)
|
|
318
|
+
site.scroll.to(selector) / site.scroll.by(px)
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
#### Data
|
|
322
|
+
```ts
|
|
323
|
+
site.fetchText(selector) // → string | null
|
|
324
|
+
site.fetchLinks(selector) // → string[]
|
|
325
|
+
site.fetchImages(selector) // → string[]
|
|
326
|
+
site.search.css(query) / site.search.id(query)
|
|
327
|
+
site.evaluate(js | fn, ...args)
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
#### Network
|
|
331
|
+
```ts
|
|
332
|
+
site.capture.start() / .stop() / .clear()
|
|
333
|
+
site.capture.requests() / .ws() / .cookies() / .storage()
|
|
334
|
+
site.intercept.block(pattern)
|
|
335
|
+
site.intercept.redirect(pattern, redirectUrl)
|
|
336
|
+
site.intercept.headers(pattern, headers)
|
|
337
|
+
site.intercept.clear()
|
|
338
|
+
site.blockImages() / site.unblockImages()
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
#### Cookies & Session
|
|
342
|
+
```ts
|
|
343
|
+
site.cookies.set(name, value, domain, path?)
|
|
344
|
+
site.cookies.get(name) / .delete(name) / .list()
|
|
345
|
+
site.session.export() / site.session.import(data)
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
#### API
|
|
349
|
+
```ts
|
|
350
|
+
site.api(path, handler, opts?)
|
|
351
|
+
// opts: { ttl?, method?, before?: middleware[] }
|
|
352
|
+
// handler: (params, query, body) => Promise<any>
|
|
353
|
+
|
|
354
|
+
site.noclose() // keep site alive across piggy.close()
|
|
355
|
+
site.screenshot(filePath?) / site.pdf(filePath?)
|
|
356
|
+
```
|
|
357
|
+
|
|
358
|
+
---
|
|
359
|
+
|
|
360
|
+
## Binary download
|
|
361
|
+
|
|
362
|
+
| Platform | File |
|
|
363
|
+
|----------|------|
|
|
364
|
+
| Linux x86_64 (deb) | `nothing-browser-headless_*_amd64.deb` |
|
|
365
|
+
| Linux x86_64 (tar.gz) | `nothing-browser-headless-*-linux-x86_64.tar.gz` |
|
|
366
|
+
| Windows x64 | `nothing-browser-headless-*.zip` |
|
|
367
|
+
| macOS | `nothing-browser-headless-*.tar.gz` |
|
|
368
|
+
|
|
369
|
+
→ [All releases](https://github.com/BunElysiaReact/nothing-browser/releases)
|
|
370
|
+
|
|
371
|
+
---
|
|
372
|
+
|
|
373
|
+
## License
|
|
374
|
+
|
|
375
|
+
MIT © [Ernest Tech House](https://github.com/BunElysiaReact)
|
package/package.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "nothing-browser",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"description": "Scraper-first headless browser library — control real tabs, intercept network traffic, capture WebSockets, spoof fingerprints. Powered by Qt6/Chromium.",
|
|
5
|
+
"module": "piggy.ts",
|
|
6
|
+
"main": "piggy.ts",
|
|
7
|
+
"type": "module",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"import": "./piggy.ts",
|
|
11
|
+
"types": "./piggy.ts"
|
|
12
|
+
}
|
|
13
|
+
},
|
|
14
|
+
"keywords": [
|
|
15
|
+
"scraper",
|
|
16
|
+
"headless-browser",
|
|
17
|
+
"browser-automation",
|
|
18
|
+
"web-scraping",
|
|
19
|
+
"chromium",
|
|
20
|
+
"qt6",
|
|
21
|
+
"bun",
|
|
22
|
+
"elysia",
|
|
23
|
+
"websocket-capture",
|
|
24
|
+
"fingerprint-spoofing",
|
|
25
|
+
"network-interception"
|
|
26
|
+
],
|
|
27
|
+
"author": "Ernest Tech House <BunElysiaReact>",
|
|
28
|
+
"license": "MIT",
|
|
29
|
+
"homepage": "https://github.com/BunElysiaReact/nothing-browser#readme",
|
|
30
|
+
"repository": {
|
|
31
|
+
"type": "git",
|
|
32
|
+
"url": "https://github.com/BunElysiaReact/nothing-browser.git"
|
|
33
|
+
},
|
|
34
|
+
"bugs": {
|
|
35
|
+
"url": "https://github.com/BunElysiaReact/nothing-browser/issues"
|
|
36
|
+
},
|
|
37
|
+
"files": [
|
|
38
|
+
"piggy.ts",
|
|
39
|
+
"piggy/",
|
|
40
|
+
"README.md",
|
|
41
|
+
"LICENSE"
|
|
42
|
+
],
|
|
43
|
+
"devDependencies": {
|
|
44
|
+
"@types/bun": "latest"
|
|
45
|
+
},
|
|
46
|
+
"peerDependencies": {
|
|
47
|
+
"typescript": "^5"
|
|
48
|
+
},
|
|
49
|
+
"dependencies": {
|
|
50
|
+
"elysia": "^1.4.28",
|
|
51
|
+
"ernest-logger": "^2.0.4"
|
|
52
|
+
},
|
|
53
|
+
"engines": {
|
|
54
|
+
"bun": ">=1.0.0"
|
|
55
|
+
}
|
|
56
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
// piggy/cache/memory.ts
|
|
2
|
+
|
|
3
|
+
interface CacheEntry {
|
|
4
|
+
data: any;
|
|
5
|
+
expires: number;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
const store = new Map<string, CacheEntry>();
|
|
9
|
+
|
|
10
|
+
export function get(key: string): any | null {
|
|
11
|
+
const entry = store.get(key);
|
|
12
|
+
if (!entry) return null;
|
|
13
|
+
if (Date.now() > entry.expires) {
|
|
14
|
+
store.delete(key);
|
|
15
|
+
return null;
|
|
16
|
+
}
|
|
17
|
+
return entry.data;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function set(key: string, data: any, ttlMs: number) {
|
|
21
|
+
store.set(key, { data, expires: Date.now() + ttlMs });
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function del(key: string) {
|
|
25
|
+
store.delete(key);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export function clear() {
|
|
29
|
+
store.clear();
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function size() {
|
|
33
|
+
return store.size;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function keys() {
|
|
37
|
+
return Array.from(store.keys());
|
|
38
|
+
}
|