@pablovitasso/szkrabok 1.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +114 -0
- package/package.json +124 -0
- package/packages/runtime/config.js +173 -0
- package/packages/runtime/index.js +10 -0
- package/packages/runtime/launch.js +240 -0
- package/packages/runtime/logger.js +42 -0
- package/packages/runtime/mcp-client/adapters/szkrabok-session.js +69 -0
- package/packages/runtime/mcp-client/codegen/generate-mcp-tools.mjs +66 -0
- package/packages/runtime/mcp-client/codegen/render-tools.js +219 -0
- package/packages/runtime/mcp-client/codegen/schema-to-jsdoc.js +60 -0
- package/packages/runtime/mcp-client/mcp-tools.d.ts +92 -0
- package/packages/runtime/mcp-client/mcp-tools.js +99 -0
- package/packages/runtime/mcp-client/runtime/invoker.js +95 -0
- package/packages/runtime/mcp-client/runtime/logger.js +145 -0
- package/packages/runtime/mcp-client/runtime/transport.js +35 -0
- package/packages/runtime/package.json +25 -0
- package/packages/runtime/pool.js +59 -0
- package/packages/runtime/scripts/patch-playwright.js +736 -0
- package/packages/runtime/sessions.js +77 -0
- package/packages/runtime/stealth.js +232 -0
- package/packages/runtime/storage.js +64 -0
- package/scripts/detect_browsers.sh +147 -0
- package/scripts/patch-playwright.js +736 -0
- package/scripts/postinstall.js +47 -0
- package/scripts/release-publish.js +19 -0
- package/scripts/release-reminder.js +14 -0
- package/scripts/setup.js +17 -0
- package/src/cli.js +166 -0
- package/src/config.js +36 -0
- package/src/index.js +53 -0
- package/src/server.js +40 -0
- package/src/tools/registry.js +171 -0
- package/src/tools/scaffold.js +133 -0
- package/src/tools/szkrabok_browser.js +227 -0
- package/src/tools/szkrabok_session.js +174 -0
- package/src/tools/templates/automation/example.mcp.spec.js +54 -0
- package/src/tools/templates/automation/example.spec.js +29 -0
- package/src/tools/templates/automation/fixtures.js +59 -0
- package/src/tools/templates/playwright.config.js +10 -0
- package/src/tools/templates/szkrabok.config.local.toml.example +12 -0
- package/src/tools/workflow.js +45 -0
- package/src/utils/errors.js +36 -0
- package/src/utils/logger.js +64 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# Szkrabok
|
|
2
|
+
|
|
3
|
+
MCP server supplementing [microsoft/playwright-mcp](https://github.com/microsoft/playwright-mcp) with persistent sessions, stealth mode, and scripted automation.
|
|
4
|
+
|
|
5
|
+
**Core Enhancements:**
|
|
6
|
+
|
|
7
|
+
* **Named Sessions:** Persistent cookies, localStorage, and Chromium profiles.
|
|
8
|
+
* **Stealth:** Integrated `playwright-extra` + stealth plugin and anti-bot CDP patches.
|
|
9
|
+
* **Deterministic Ports:** Fixed CDP ports per session for `connectOverCDP()`.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Tools & Capabilities
|
|
14
|
+
|
|
15
|
+
| Tool | Description |
|
|
16
|
+
| --- | --- |
|
|
17
|
+
| `session_manage` | Manage sessions: `open` (launch/resume), `close`, `list`, `delete`, `endpoint` (CDP/WS URLs). |
|
|
18
|
+
| `workflow.scrape` | Auto-scrape current page into LLM-ready text (headings, content, links, tables). Optional CSS selectors to target specific areas. |
|
|
19
|
+
| `browser_run` | Execute Playwright JS on session page: pass `code` (inline snippet) or `path` (named export from `.mjs` file). |
|
|
20
|
+
| `browser.run_test` | Run `.spec.js` tests via CDP (requires `scaffold.init`). |
|
|
21
|
+
| `scaffold.init` | Bootstrap project with `playwright.config.js` and templates. |
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Setup
|
|
26
|
+
|
|
27
|
+
**1. Install**
|
|
28
|
+
|
|
29
|
+
| Use Case | Command |
|
|
30
|
+
| --- | --- |
|
|
31
|
+
| **Scaffolding a new project** | `npx @pablovitasso/szkrabok init` |
|
|
32
|
+
| **Claude Code** | `claude mcp add szkrabok -- npx -y @pablovitasso/szkrabok` |
|
|
33
|
+
| **Claude Desktop** | See config snippet below |
|
|
34
|
+
| **Development (from source)** | `npm ci && claude mcp add szkrabok node /path/to/szkrabok/src/index.js` |
|
|
35
|
+
|
|
36
|
+
Claude Desktop — add to `claude_desktop_config.json`:
|
|
37
|
+
```json
|
|
38
|
+
{
|
|
39
|
+
"mcpServers": {
|
|
40
|
+
"szkrabok": {
|
|
41
|
+
"command": "npx",
|
|
42
|
+
"args": ["-y", "@pablovitasso/szkrabok"]
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
> **Browser not installed?** Run `npx @pablovitasso/szkrabok --setup` once in your terminal, then restart Claude.
|
|
49
|
+
> Set `CI=true` or `SZKRABOK_SKIP_BROWSER_INSTALL=1` to suppress the auto-install in CI / Docker.
|
|
50
|
+
|
|
51
|
+
**2. Configure**
|
|
52
|
+
|
|
53
|
+
Optionally create `szkrabok.config.local.toml` to set a custom browser binary or user agent:
|
|
54
|
+
|
|
55
|
+
```toml
|
|
56
|
+
[default]
|
|
57
|
+
executablePath = "/path/to/your/chrome"
|
|
58
|
+
overrideUserAgent = true
|
|
59
|
+
userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/145.0.0.0 Safari/537.36"
|
|
60
|
+
log_level = "debug"
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## Usage
|
|
66
|
+
|
|
67
|
+
### Claude Code (LLM)
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
session_manage { "action": "open", "sessionName": "my-session", "url": "https://example.com" }
|
|
71
|
+
workflow.scrape { "sessionName": "my-session" }
|
|
72
|
+
session_manage { "action": "close", "sessionName": "my-session" }
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
Bootstrap a new project, then run tests:
|
|
76
|
+
|
|
77
|
+
```
|
|
78
|
+
scaffold.init { "dir": "/path/to/project", "preset": "full" }
|
|
79
|
+
session_manage { "action": "open", "sessionName": "my-session" }
|
|
80
|
+
browser.run_test { "sessionName": "my-session", "files": ["automation/example.spec.js"] }
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### Bebok (CLI)
|
|
84
|
+
|
|
85
|
+
`bebok` is the human/shell interface — calls the same handlers as MCP tools:
|
|
86
|
+
|
|
87
|
+
```bash
|
|
88
|
+
bebok open <profile> # Launch browser, print CDP endpoint, stay alive
|
|
89
|
+
bebok session list # Show all sessions (active + stored)
|
|
90
|
+
bebok session inspect <id> # Dump cookie/localStorage counts
|
|
91
|
+
bebok session delete <id> # Delete a session
|
|
92
|
+
bebok session cleanup --days 30 # Delete sessions unused for N days
|
|
93
|
+
bebok endpoint <sessionName> # Print CDP + WS endpoints
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
---
|
|
97
|
+
|
|
98
|
+
## Project Structure
|
|
99
|
+
|
|
100
|
+
* **`@pablovitasso/szkrabok/runtime`** (`packages/runtime/`): Browser bootstrap, stealth, session pool, MCP client (`mcpConnect`, `spawnClient`, codegen).
|
|
101
|
+
* **Config**: `szkrabok.config.toml` (defaults) deep-merged with `szkrabok.config.local.toml` (machine-specific, gitignored).
|
|
102
|
+
* **Release**: `npm run release:patch` bumps version, then `npm publish --access public`.
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## Documentation
|
|
107
|
+
|
|
108
|
+
| Doc | Contents |
|
|
109
|
+
|-----|----------|
|
|
110
|
+
| [docs/architecture.md](./docs/architecture.md) | Layer map, file layout, session lifecycle, invariants |
|
|
111
|
+
| [docs/development.md](./docs/development.md) | Adding tools, CLI design, release workflow |
|
|
112
|
+
| [docs/testing.md](./docs/testing.md) | Test categories, how to run, writing specs |
|
|
113
|
+
| [docs/mcp-client-library.md](./docs/mcp-client-library.md) | MCP client library and codegen |
|
|
114
|
+
| [docs/scaffold-init.md](./docs/scaffold-init.md) | scaffold.init presets and template structure |
|
package/package.json
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@pablovitasso/szkrabok",
|
|
3
|
+
"version": "1.0.10",
|
|
4
|
+
"description": "Production-grade MCP browser automation layer with persistent sessions and stealth capabilities",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "src/index.js",
|
|
7
|
+
"exports": {
|
|
8
|
+
".": "./src/index.js",
|
|
9
|
+
"./runtime": "./packages/runtime/index.js"
|
|
10
|
+
},
|
|
11
|
+
"imports": {
|
|
12
|
+
"#runtime": "./packages/runtime/index.js"
|
|
13
|
+
},
|
|
14
|
+
"files": [
|
|
15
|
+
"src",
|
|
16
|
+
"packages/runtime",
|
|
17
|
+
"scripts",
|
|
18
|
+
"README.md"
|
|
19
|
+
],
|
|
20
|
+
"bin": {
|
|
21
|
+
"szkrabok": "./src/index.js",
|
|
22
|
+
"bebok": "./src/cli.js"
|
|
23
|
+
},
|
|
24
|
+
"engines": {
|
|
25
|
+
"node": ">=18.0.0"
|
|
26
|
+
},
|
|
27
|
+
"workspaces": [
|
|
28
|
+
"packages/runtime"
|
|
29
|
+
],
|
|
30
|
+
"scripts": {
|
|
31
|
+
"start": "node src/index.js",
|
|
32
|
+
"dev": "node --watch src/index.js",
|
|
33
|
+
"lint": "eslint .",
|
|
34
|
+
"lint:fix": "eslint . --fix",
|
|
35
|
+
"prettier": "prettier --write \"{src,tests}/**/*.{js,json,md}\" \"*.{js,json,md}\"",
|
|
36
|
+
"format": "npm run prettier",
|
|
37
|
+
"format:check": "prettier --check \"{src,tests}/**/*.{js,json,md}\" \"*.{js,json,md}\"",
|
|
38
|
+
"codegen:mcp": "node packages/runtime/mcp-client/codegen/generate-mcp-tools.mjs",
|
|
39
|
+
"prepack": "git describe --exact-match --tags HEAD 2>/dev/null || (echo 'ERROR: HEAD is not tagged. Run npm run release:patch or release:minor first.' && exit 1)",
|
|
40
|
+
"pack:runtime": "npm pack --workspace=packages/runtime --pack-destination=dist/",
|
|
41
|
+
"pack": "mkdir -p dist && npm run pack:runtime",
|
|
42
|
+
"release:patch": "npm version patch --workspaces --include-workspace-root --ignore-scripts && npm run pack && node scripts/release-reminder.js",
|
|
43
|
+
"release:minor": "npm version minor --workspaces --include-workspace-root --ignore-scripts && npm run pack && node scripts/release-reminder.js",
|
|
44
|
+
"release:publish": "node scripts/release-publish.js",
|
|
45
|
+
"test": "npm run test:self && npm run test:auto",
|
|
46
|
+
"test:node": "node --test tests/node/*.test.js",
|
|
47
|
+
"test:runtime:unit": "node --test tests/node/runtime/unit.test.js",
|
|
48
|
+
"test:runtime:integration": "node --test tests/node/runtime/integration.test.js",
|
|
49
|
+
"test:contracts": "node --test tests/node/contracts.test.js",
|
|
50
|
+
"test:playwright": "playwright test --project=integration",
|
|
51
|
+
"prepare": "node scripts/setup.js",
|
|
52
|
+
"postinstall": "node scripts/patch-playwright.js && node scripts/postinstall.js",
|
|
53
|
+
"test:self": "playwright test --project=integration && node --test tests/node/*.test.js",
|
|
54
|
+
"test:auto": "PLAYWRIGHT_PROJECT=e2e playwright test --project=e2e"
|
|
55
|
+
},
|
|
56
|
+
"keywords": [
|
|
57
|
+
"mcp",
|
|
58
|
+
"playwright",
|
|
59
|
+
"browser-automation",
|
|
60
|
+
"stealth",
|
|
61
|
+
"session-management",
|
|
62
|
+
"llm-tools",
|
|
63
|
+
"web-scraping",
|
|
64
|
+
"model-context-protocol"
|
|
65
|
+
],
|
|
66
|
+
"author": "",
|
|
67
|
+
"license": "MIT",
|
|
68
|
+
"repository": {
|
|
69
|
+
"type": "git",
|
|
70
|
+
"url": "git+https://github.com/PabloVitasso/szkrabok.git"
|
|
71
|
+
},
|
|
72
|
+
"dependencies": {
|
|
73
|
+
"@modelcontextprotocol/sdk": "^1.0.4",
|
|
74
|
+
"dotenv": "^17.3.1",
|
|
75
|
+
"playwright": "^1.49.1",
|
|
76
|
+
"playwright-core": "^1.58.2",
|
|
77
|
+
"playwright-extra": "^4.3.6",
|
|
78
|
+
"puppeteer": "^24.34.0",
|
|
79
|
+
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
|
80
|
+
"smol-toml": "^1.6.0",
|
|
81
|
+
"zod": "^3.24.1"
|
|
82
|
+
},
|
|
83
|
+
"devDependencies": {
|
|
84
|
+
"@eslint/js": "^9.17.0",
|
|
85
|
+
"@types/puppeteer": "^5.4.7",
|
|
86
|
+
"ajv": "^8.17.1",
|
|
87
|
+
"ajv-formats": "^3.0.1",
|
|
88
|
+
"eslint": "^9.17.0",
|
|
89
|
+
"eslint-config-prettier": "^9.1.0",
|
|
90
|
+
"eslint-plugin-node": "^11.1.0",
|
|
91
|
+
"globals": "^15.13.0",
|
|
92
|
+
"prettier": "^3.4.2",
|
|
93
|
+
"puppeteer-extra": "^3.3.6"
|
|
94
|
+
},
|
|
95
|
+
"eslintConfig": {
|
|
96
|
+
"env": {
|
|
97
|
+
"node": true,
|
|
98
|
+
"es2022": true
|
|
99
|
+
},
|
|
100
|
+
"extends": [
|
|
101
|
+
"eslint:recommended",
|
|
102
|
+
"prettier"
|
|
103
|
+
],
|
|
104
|
+
"parserOptions": {
|
|
105
|
+
"ecmaVersion": "latest",
|
|
106
|
+
"sourceType": "module"
|
|
107
|
+
},
|
|
108
|
+
"rules": {
|
|
109
|
+
"no-unused-vars": [
|
|
110
|
+
"warn",
|
|
111
|
+
{
|
|
112
|
+
"argsIgnorePattern": "^_",
|
|
113
|
+
"varsIgnorePattern": "^_"
|
|
114
|
+
}
|
|
115
|
+
],
|
|
116
|
+
"no-console": "off",
|
|
117
|
+
"prefer-const": "error",
|
|
118
|
+
"no-var": "error",
|
|
119
|
+
"object-shorthand": "error",
|
|
120
|
+
"prefer-template": "error",
|
|
121
|
+
"prefer-arrow-callback": "error"
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
import { homedir } from 'os';
|
|
2
|
+
import { join, resolve, dirname } from 'path';
|
|
3
|
+
import { readdirSync, existsSync, readFileSync } from 'fs';
|
|
4
|
+
import { fileURLToPath } from 'url';
|
|
5
|
+
import { parse } from 'smol-toml';
|
|
6
|
+
|
|
7
|
+
// TOML config resolution:
|
|
8
|
+
// 1. SZKRABOK_CONFIG env var (absolute path)
|
|
9
|
+
// 2. CWD/szkrabok.config.toml + CWD/szkrabok.config.local.toml
|
|
10
|
+
// 3. package dir / szkrabok.config.toml (fallback for monorepo root)
|
|
11
|
+
|
|
12
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
13
|
+
const PACKAGE_ROOT = resolve(__dirname, '..', '..');
|
|
14
|
+
|
|
15
|
+
const isPlainObject = v => v !== null && typeof v === 'object' && !Array.isArray(v);
|
|
16
|
+
|
|
17
|
+
const deepMerge = (base, override) => {
|
|
18
|
+
const result = { ...base };
|
|
19
|
+
for (const key of Object.keys(override)) {
|
|
20
|
+
result[key] =
|
|
21
|
+
isPlainObject(base[key]) && isPlainObject(override[key])
|
|
22
|
+
? deepMerge(base[key], override[key])
|
|
23
|
+
: override[key];
|
|
24
|
+
}
|
|
25
|
+
return result;
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
const loadToml = () => {
|
|
29
|
+
const configEnv = process.env.SZKRABOK_CONFIG;
|
|
30
|
+
if (configEnv && existsSync(configEnv)) {
|
|
31
|
+
return parse(readFileSync(configEnv, 'utf8'));
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const cwdBase = join(process.cwd(), 'szkrabok.config.toml');
|
|
35
|
+
const cwdLocal = join(process.cwd(), 'szkrabok.config.local.toml');
|
|
36
|
+
const pkgBase = join(PACKAGE_ROOT, 'szkrabok.config.toml');
|
|
37
|
+
const pkgLocal = join(PACKAGE_ROOT, 'szkrabok.config.local.toml');
|
|
38
|
+
|
|
39
|
+
const basePath = existsSync(cwdBase) ? cwdBase : existsSync(pkgBase) ? pkgBase : null;
|
|
40
|
+
const localPath = existsSync(cwdLocal) ? cwdLocal : existsSync(pkgLocal) ? pkgLocal : null;
|
|
41
|
+
|
|
42
|
+
const base = basePath ? parse(readFileSync(basePath, 'utf8')) : {};
|
|
43
|
+
const local = localPath ? parse(readFileSync(localPath, 'utf8')) : {};
|
|
44
|
+
return deepMerge(base, local);
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
const toml = loadToml();
|
|
48
|
+
const tomlDefault = toml.default ?? {};
|
|
49
|
+
const tomlPresets = toml.preset ?? {};
|
|
50
|
+
|
|
51
|
+
// Resolve a preset by name: merge [default] → [preset.<name>]
|
|
52
|
+
export const resolvePreset = name => {
|
|
53
|
+
const base = {
|
|
54
|
+
label: tomlDefault.label ?? 'Default',
|
|
55
|
+
userAgent: tomlDefault.userAgent ?? null,
|
|
56
|
+
overrideUserAgent: tomlDefault.overrideUserAgent ?? null,
|
|
57
|
+
viewport: tomlDefault.viewport ?? null,
|
|
58
|
+
locale: tomlDefault.locale ?? null,
|
|
59
|
+
timezone: tomlDefault.timezone ?? null,
|
|
60
|
+
headless: tomlDefault.headless ?? null,
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
if (!name || name === 'default') {
|
|
64
|
+
return { preset: 'chromium-honest', ...base };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const override = tomlPresets[name];
|
|
68
|
+
if (!override) {
|
|
69
|
+
return { preset: 'chromium-honest', ...base };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
return {
|
|
73
|
+
preset: name,
|
|
74
|
+
label: override.label ?? base.label,
|
|
75
|
+
userAgent: override.userAgent ?? base.userAgent,
|
|
76
|
+
overrideUserAgent: override.overrideUserAgent ?? base.overrideUserAgent,
|
|
77
|
+
viewport: override.viewport ?? base.viewport,
|
|
78
|
+
locale: override.locale ?? base.locale,
|
|
79
|
+
timezone: override.timezone ?? base.timezone,
|
|
80
|
+
headless: override.headless ?? base.headless,
|
|
81
|
+
};
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
export const PRESETS = Object.keys(tomlPresets);
|
|
85
|
+
|
|
86
|
+
// ── stealth config ──────────────────────────────────────────────────────────
|
|
87
|
+
|
|
88
|
+
const tomlStealth = toml['puppeteer-extra-plugin-stealth'] ?? {};
|
|
89
|
+
const tomlStealthEvasions = tomlStealth.evasions ?? {};
|
|
90
|
+
|
|
91
|
+
export const STEALTH_ENABLED = tomlStealth.enabled ?? true;
|
|
92
|
+
|
|
93
|
+
export const STEALTH_CONFIG = {
|
|
94
|
+
evasions: tomlStealthEvasions,
|
|
95
|
+
'user-agent-override': tomlStealth['user-agent-override'] ?? { enabled: true, mask_linux: true },
|
|
96
|
+
'navigator.vendor': tomlStealth['navigator.vendor'] ?? { enabled: true, vendor: 'Google Inc.' },
|
|
97
|
+
'navigator.hardwareConcurrency': tomlStealth['navigator.hardwareConcurrency'] ?? {
|
|
98
|
+
enabled: true,
|
|
99
|
+
hardware_concurrency: 4,
|
|
100
|
+
},
|
|
101
|
+
'navigator.languages': tomlStealth['navigator.languages'] ?? { enabled: true },
|
|
102
|
+
'webgl.vendor': tomlStealth['webgl.vendor'] ?? {
|
|
103
|
+
enabled: true,
|
|
104
|
+
vendor: 'Intel Inc.',
|
|
105
|
+
renderer: 'Intel Iris OpenGL Engine',
|
|
106
|
+
},
|
|
107
|
+
};
|
|
108
|
+
|
|
109
|
+
// ── resolved defaults ───────────────────────────────────────────────────────
|
|
110
|
+
|
|
111
|
+
const defaults = resolvePreset('default');
|
|
112
|
+
|
|
113
|
+
export const DEFAULT_TIMEOUT = 30000;
|
|
114
|
+
export const TIMEOUT = tomlDefault.timeout ?? DEFAULT_TIMEOUT;
|
|
115
|
+
|
|
116
|
+
export const HEADLESS =
|
|
117
|
+
process.env.HEADLESS !== undefined
|
|
118
|
+
? process.env.HEADLESS === 'true'
|
|
119
|
+
: process.env.DISPLAY
|
|
120
|
+
? (defaults.headless ?? false)
|
|
121
|
+
: true;
|
|
122
|
+
|
|
123
|
+
export const DISABLE_WEBGL = tomlDefault.disable_webgl ?? false;
|
|
124
|
+
export const LOG_LEVEL = tomlDefault.log_level ?? 'info';
|
|
125
|
+
|
|
126
|
+
export const VIEWPORT = defaults.viewport ?? { width: 1280, height: 800 };
|
|
127
|
+
|
|
128
|
+
export const USER_AGENT =
|
|
129
|
+
defaults.userAgent ||
|
|
130
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36';
|
|
131
|
+
|
|
132
|
+
export const LOCALE = defaults.locale || 'en-US';
|
|
133
|
+
export const TIMEZONE = defaults.timezone || 'America/New_York';
|
|
134
|
+
|
|
135
|
+
// ── Chromium path resolution ────────────────────────────────────────────────
|
|
136
|
+
|
|
137
|
+
export const findChromiumPath = () => {
|
|
138
|
+
if (tomlDefault.executablePath) {
|
|
139
|
+
return tomlDefault.executablePath;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const playwrightCache = join(homedir(), '.cache', 'ms-playwright');
|
|
143
|
+
|
|
144
|
+
if (existsSync(playwrightCache)) {
|
|
145
|
+
const dirs = readdirSync(playwrightCache)
|
|
146
|
+
.filter(d => d.startsWith('chromium-'))
|
|
147
|
+
.sort()
|
|
148
|
+
.reverse();
|
|
149
|
+
|
|
150
|
+
for (const dir of dirs) {
|
|
151
|
+
const paths = [
|
|
152
|
+
join(playwrightCache, dir, 'chrome-linux', 'chrome'),
|
|
153
|
+
join(playwrightCache, dir, 'chrome-linux64', 'chrome'),
|
|
154
|
+
];
|
|
155
|
+
|
|
156
|
+
for (const path of paths) {
|
|
157
|
+
if (existsSync(path)) return path;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const systemChromiums = [
|
|
163
|
+
'/usr/bin/chromium',
|
|
164
|
+
'/usr/bin/chromium-browser',
|
|
165
|
+
'/usr/bin/google-chrome',
|
|
166
|
+
];
|
|
167
|
+
|
|
168
|
+
for (const path of systemChromiums) {
|
|
169
|
+
if (existsSync(path)) return path;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
return null;
|
|
173
|
+
};
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
// @szkrabok/runtime — public API
|
|
2
|
+
// Do NOT expose: pool internals, storage internals, stealth utilities
|
|
3
|
+
|
|
4
|
+
export { launch, connect } from './launch.js';
|
|
5
|
+
export { closeSession, getSession, listSessions as listRuntimeSessions, listStoredSessions, updateSessionMeta, deleteStoredSession, updateSessionPage, closeAllSessions } from './sessions.js';
|
|
6
|
+
export { resolvePreset, PRESETS } from './config.js';
|
|
7
|
+
|
|
8
|
+
// MCP client
|
|
9
|
+
export { mcpConnect } from './mcp-client/mcp-tools.js';
|
|
10
|
+
export { spawnClient } from './mcp-client/runtime/transport.js';
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
// launch.js — the one true browser bootstrap entry point.
|
|
2
|
+
// Only this file calls launchPersistentContext.
|
|
3
|
+
|
|
4
|
+
import { chromium } from 'playwright';
|
|
5
|
+
import {
|
|
6
|
+
resolvePreset,
|
|
7
|
+
findChromiumPath,
|
|
8
|
+
HEADLESS,
|
|
9
|
+
VIEWPORT,
|
|
10
|
+
USER_AGENT,
|
|
11
|
+
LOCALE,
|
|
12
|
+
TIMEZONE,
|
|
13
|
+
STEALTH_ENABLED,
|
|
14
|
+
} from './config.js';
|
|
15
|
+
import { enhanceWithStealth, applyStealthToExistingPage } from './stealth.js';
|
|
16
|
+
import * as storage from './storage.js';
|
|
17
|
+
import * as pool from './pool.js';
|
|
18
|
+
import { log } from './logger.js';
|
|
19
|
+
|
|
20
|
+
// Derive a deterministic CDP port from session id.
|
|
21
|
+
// Range 20000–29999 — avoids common service ports, gives 10 000 slots.
|
|
22
|
+
const cdpPortForId = id => {
|
|
23
|
+
let h = 0;
|
|
24
|
+
for (let i = 0; i < id.length; i++) h = (Math.imul(31, h) + id.charCodeAt(i)) | 0;
|
|
25
|
+
return 20000 + (Math.abs(h) % 10000);
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
// _launchPersistentContext — internal, not exported.
|
|
29
|
+
// Only called by launch() below.
|
|
30
|
+
const _launchPersistentContext = async (userDataDir, options = {}) => {
|
|
31
|
+
const presetConfig = options.presetConfig ?? {};
|
|
32
|
+
const pw = options.stealth ? enhanceWithStealth(chromium, presetConfig) : chromium;
|
|
33
|
+
const executablePath = findChromiumPath();
|
|
34
|
+
|
|
35
|
+
if (executablePath) {
|
|
36
|
+
log('Using existing Chromium for persistent context', { path: executablePath });
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const launchOptions = {
|
|
40
|
+
...options,
|
|
41
|
+
headless: options.headless ?? HEADLESS,
|
|
42
|
+
executablePath,
|
|
43
|
+
viewport: options.viewport,
|
|
44
|
+
locale: options.locale,
|
|
45
|
+
timezoneId: options.timezoneId,
|
|
46
|
+
userAgent: options.userAgent,
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
delete launchOptions.stealth;
|
|
50
|
+
delete launchOptions.presetConfig;
|
|
51
|
+
|
|
52
|
+
launchOptions.args = [
|
|
53
|
+
'--hide-crash-restore-bubble',
|
|
54
|
+
'--disable-features=PortalActivationDelegate',
|
|
55
|
+
...(launchOptions.args || []),
|
|
56
|
+
];
|
|
57
|
+
|
|
58
|
+
if (launchOptions.cdpPort) {
|
|
59
|
+
launchOptions.args = [
|
|
60
|
+
...launchOptions.args,
|
|
61
|
+
`--remote-debugging-port=${launchOptions.cdpPort}`,
|
|
62
|
+
];
|
|
63
|
+
delete launchOptions.cdpPort;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const context = await pw.launchPersistentContext(userDataDir, launchOptions);
|
|
67
|
+
|
|
68
|
+
if (options.stealth) {
|
|
69
|
+
const pages = context.pages();
|
|
70
|
+
if (pages.length > 0) {
|
|
71
|
+
await applyStealthToExistingPage(pages[0], presetConfig);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return context;
|
|
76
|
+
};
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Launch a browser session.
|
|
80
|
+
*
|
|
81
|
+
* @param {object} [options]
|
|
82
|
+
* @param {string} [options.profile] Session name / profile dir key
|
|
83
|
+
* @param {string} [options.preset] TOML preset name (default: "default")
|
|
84
|
+
* @param {boolean} [options.headless] Overrides TOML + env
|
|
85
|
+
* @param {boolean} [options.stealth] Overrides TOML stealth setting
|
|
86
|
+
* @param {string} [options.userAgent] Overrides TOML + preset userAgent
|
|
87
|
+
* @param {object} [options.viewport] Overrides TOML + preset viewport { width, height }
|
|
88
|
+
* @param {string} [options.locale] Overrides TOML + preset locale
|
|
89
|
+
* @param {string} [options.timezone] Overrides TOML + preset timezone
|
|
90
|
+
* @param {boolean} [options.reuse] Return existing if profile already open (default: true)
|
|
91
|
+
* @returns {Promise<{ browser: import('playwright').Browser, context: import('playwright').BrowserContext, cdpEndpoint: string, close(): Promise<void> }>}
|
|
92
|
+
*/
|
|
93
|
+
export const launch = async (options = {}) => {
|
|
94
|
+
const { profile = 'default', preset: presetName, headless, stealth, userAgent, viewport, locale, timezone, reuse = true } = options;
|
|
95
|
+
|
|
96
|
+
// Idempotency: return existing handle when reuse=true and profile is open
|
|
97
|
+
if (reuse && pool.has(profile)) {
|
|
98
|
+
log(`Reusing existing session: ${profile}`);
|
|
99
|
+
const existing = pool.get(profile);
|
|
100
|
+
const cdpEndpoint = `http://localhost:${existing.cdpPort}`;
|
|
101
|
+
return {
|
|
102
|
+
browser: existing.context.browser(),
|
|
103
|
+
context: existing.context,
|
|
104
|
+
cdpEndpoint,
|
|
105
|
+
close: async () => {
|
|
106
|
+
const state = await existing.context.storageState();
|
|
107
|
+
await storage.saveState(profile, state);
|
|
108
|
+
await storage.updateMeta(profile, { lastUsed: Date.now() });
|
|
109
|
+
await existing.context.close();
|
|
110
|
+
pool.remove(profile);
|
|
111
|
+
},
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
await storage.ensureSessionsDir();
|
|
116
|
+
|
|
117
|
+
const savedMeta = await storage.loadMeta(profile);
|
|
118
|
+
const savedConfig = savedMeta?.config ?? {};
|
|
119
|
+
|
|
120
|
+
// If an explicit preset is given, it resets the baseline — savedConfig is bypassed
|
|
121
|
+
// for preset-derived fields. Individual field overrides (userAgent etc.) always win.
|
|
122
|
+
const resolved = resolvePreset(presetName ?? savedMeta?.preset);
|
|
123
|
+
const base = presetName ? {} : savedConfig;
|
|
124
|
+
|
|
125
|
+
const effectiveViewport = viewport || base.viewport || resolved.viewport || VIEWPORT;
|
|
126
|
+
const effectiveUserAgent = userAgent || base.userAgent || resolved.userAgent || USER_AGENT;
|
|
127
|
+
const effectiveLocale = locale || base.locale || resolved.locale || LOCALE;
|
|
128
|
+
const effectiveTimezone = timezone || base.timezone || resolved.timezone || TIMEZONE;
|
|
129
|
+
const effectiveStealth = stealth ?? savedConfig.stealth ?? STEALTH_ENABLED;
|
|
130
|
+
const effectiveHeadless = headless ?? savedConfig.headless ?? HEADLESS;
|
|
131
|
+
|
|
132
|
+
const presetConfig = {
|
|
133
|
+
userAgent: effectiveUserAgent,
|
|
134
|
+
locale: effectiveLocale,
|
|
135
|
+
overrideUserAgent: resolved.overrideUserAgent,
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
const userDataDir = storage.getUserDataDir(profile);
|
|
139
|
+
const cdpPort = cdpPortForId(profile);
|
|
140
|
+
|
|
141
|
+
const context = await _launchPersistentContext(userDataDir, {
|
|
142
|
+
stealth: effectiveStealth,
|
|
143
|
+
presetConfig,
|
|
144
|
+
viewport: effectiveViewport,
|
|
145
|
+
userAgent: effectiveUserAgent,
|
|
146
|
+
locale: effectiveLocale,
|
|
147
|
+
timezoneId: effectiveTimezone,
|
|
148
|
+
headless: effectiveHeadless,
|
|
149
|
+
cdpPort,
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
// Restore saved state (cookies + localStorage)
|
|
153
|
+
const savedState = await storage.loadState(profile);
|
|
154
|
+
if (savedState) {
|
|
155
|
+
if (savedState.cookies?.length) {
|
|
156
|
+
try {
|
|
157
|
+
await context.addCookies(savedState.cookies);
|
|
158
|
+
log(`Restored ${savedState.cookies.length} cookies for ${profile}`);
|
|
159
|
+
} catch (err) {
|
|
160
|
+
log(`Cookie restore failed for ${profile}: ${err.message}`);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
if (savedState.origins?.length) {
|
|
164
|
+
const origins = savedState.origins;
|
|
165
|
+
await context.addInitScript(savedOrigins => {
|
|
166
|
+
const origin = location.origin;
|
|
167
|
+
const entry = savedOrigins.find(o => o.origin === origin);
|
|
168
|
+
if (!entry?.localStorage?.length) return;
|
|
169
|
+
for (const { name, value } of entry.localStorage) {
|
|
170
|
+
try { localStorage.setItem(name, value); } catch {}
|
|
171
|
+
}
|
|
172
|
+
}, origins);
|
|
173
|
+
log(`Restored localStorage for ${savedState.origins.length} origin(s) in ${profile}`);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
// Mask iframe fingerprints
|
|
178
|
+
await context.addInitScript(() => {
|
|
179
|
+
const originalCreateElement = document.createElement;
|
|
180
|
+
document.createElement = function (tag) {
|
|
181
|
+
return originalCreateElement.call(document, tag);
|
|
182
|
+
};
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
context.on('close', () => {
|
|
186
|
+
log(`Context ${profile} was closed`);
|
|
187
|
+
if (pool.has(profile)) pool.remove(profile);
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
const pages = context.pages();
|
|
191
|
+
const page = pages.length > 0 ? pages[0] : await context.newPage();
|
|
192
|
+
|
|
193
|
+
pool.add(profile, context, page, cdpPort, resolved.preset, resolved.label);
|
|
194
|
+
|
|
195
|
+
const meta = {
|
|
196
|
+
sessionName: profile,
|
|
197
|
+
created: savedMeta?.created ?? Date.now(),
|
|
198
|
+
lastUsed: Date.now(),
|
|
199
|
+
preset: resolved.preset,
|
|
200
|
+
label: resolved.label,
|
|
201
|
+
config: {
|
|
202
|
+
userAgent: effectiveUserAgent,
|
|
203
|
+
viewport: effectiveViewport,
|
|
204
|
+
locale: effectiveLocale,
|
|
205
|
+
timezone: effectiveTimezone,
|
|
206
|
+
stealth: effectiveStealth,
|
|
207
|
+
headless: effectiveHeadless,
|
|
208
|
+
},
|
|
209
|
+
userDataDir,
|
|
210
|
+
};
|
|
211
|
+
await storage.saveMeta(profile, meta);
|
|
212
|
+
|
|
213
|
+
const cdpEndpoint = `http://localhost:${cdpPort}`;
|
|
214
|
+
|
|
215
|
+
return {
|
|
216
|
+
browser: context.browser(),
|
|
217
|
+
context,
|
|
218
|
+
cdpEndpoint,
|
|
219
|
+
close: async () => {
|
|
220
|
+
const state = await context.storageState();
|
|
221
|
+
await storage.saveState(profile, state);
|
|
222
|
+
await storage.updateMeta(profile, { lastUsed: Date.now() });
|
|
223
|
+
await context.close();
|
|
224
|
+
pool.remove(profile);
|
|
225
|
+
},
|
|
226
|
+
};
|
|
227
|
+
};
|
|
228
|
+
|
|
229
|
+
/**
|
|
230
|
+
* Connect to an already-running browser via CDP endpoint.
|
|
231
|
+
*
|
|
232
|
+
* @param {string} cdpEndpoint
|
|
233
|
+
* @returns {Promise<{ browser: import('playwright').Browser, context: import('playwright').BrowserContext }>}
|
|
234
|
+
*/
|
|
235
|
+
export const connect = async cdpEndpoint => {
|
|
236
|
+
const browser = await chromium.connectOverCDP(cdpEndpoint);
|
|
237
|
+
const contexts = browser.contexts();
|
|
238
|
+
const context = contexts[0] ?? (await browser.newContext());
|
|
239
|
+
return { browser, context };
|
|
240
|
+
};
|