lionscraper 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lionscraper-1.0.2/PKG-INFO +130 -0
- lionscraper-1.0.2/README.md +105 -0
- lionscraper-1.0.2/lionscraper.egg-info/PKG-INFO +130 -0
- lionscraper-1.0.2/lionscraper.egg-info/SOURCES.txt +56 -0
- lionscraper-1.0.2/lionscraper.egg-info/dependency_links.txt +1 -0
- lionscraper-1.0.2/lionscraper.egg-info/entry_points.txt +3 -0
- lionscraper-1.0.2/lionscraper.egg-info/requires.txt +9 -0
- lionscraper-1.0.2/lionscraper.egg-info/top_level.txt +1 -0
- lionscraper-1.0.2/pyproject.toml +58 -0
- lionscraper-1.0.2/setup.cfg +4 -0
- lionscraper-1.0.2/src/__init__.py +6 -0
- lionscraper-1.0.2/src/__main__.py +4 -0
- lionscraper-1.0.2/src/bridge/__init__.py +0 -0
- lionscraper-1.0.2/src/bridge/protocol.py +148 -0
- lionscraper-1.0.2/src/bridge/session.py +101 -0
- lionscraper-1.0.2/src/bridge/timeout.py +49 -0
- lionscraper-1.0.2/src/bridge/websocket.py +388 -0
- lionscraper-1.0.2/src/cli/__init__.py +0 -0
- lionscraper-1.0.2/src/cli/build_tool_args.py +316 -0
- lionscraper-1.0.2/src/cli/entry.py +42 -0
- lionscraper-1.0.2/src/cli/mcp_bin_argv.py +5 -0
- lionscraper-1.0.2/src/cli/router.py +206 -0
- lionscraper-1.0.2/src/client/__init__.py +0 -0
- lionscraper-1.0.2/src/client/daemon_client.py +230 -0
- lionscraper-1.0.2/src/client/daemon_lifecycle.py +65 -0
- lionscraper-1.0.2/src/core/__init__.py +0 -0
- lionscraper-1.0.2/src/core/bridge_service.py +78 -0
- lionscraper-1.0.2/src/daemon/__init__.py +0 -0
- lionscraper-1.0.2/src/daemon/daemon_main.py +69 -0
- lionscraper-1.0.2/src/daemon/http_api.py +214 -0
- lionscraper-1.0.2/src/i18n/__init__.py +0 -0
- lionscraper-1.0.2/src/i18n/lang.py +159 -0
- lionscraper-1.0.2/src/locale/en-US.json +195 -0
- lionscraper-1.0.2/src/locale/zh-CN.json +195 -0
- lionscraper-1.0.2/src/mcp/__init__.py +0 -0
- lionscraper-1.0.2/src/mcp/handler.py +411 -0
- lionscraper-1.0.2/src/mcp/mcp_prompts.py +118 -0
- lionscraper-1.0.2/src/mcp/mcp_resources.py +56 -0
- lionscraper-1.0.2/src/mcp/mcp_stdio_app.py +31 -0
- lionscraper-1.0.2/src/mcp/thin_mcp_server.py +189 -0
- lionscraper-1.0.2/src/mcp/tools.py +182 -0
- lionscraper-1.0.2/src/mcp/validate_tool_input.py +26 -0
- lionscraper-1.0.2/src/types/__init__.py +0 -0
- lionscraper-1.0.2/src/types/bridge.py +100 -0
- lionscraper-1.0.2/src/types/errors.py +142 -0
- lionscraper-1.0.2/src/utils/__init__.py +0 -0
- lionscraper-1.0.2/src/utils/browser_env.py +217 -0
- lionscraper-1.0.2/src/utils/config.py +48 -0
- lionscraper-1.0.2/src/utils/daemon_config.py +14 -0
- lionscraper-1.0.2/src/utils/logger.py +65 -0
- lionscraper-1.0.2/src/utils/port.py +234 -0
- lionscraper-1.0.2/src/version.py +6 -0
- lionscraper-1.0.2/tests/test_cli_build_tool_args.py +78 -0
- lionscraper-1.0.2/tests/test_mcp_bin_argv.py +7 -0
- lionscraper-1.0.2/tests/test_port.py +36 -0
- lionscraper-1.0.2/tests/test_protocol.py +39 -0
- lionscraper-1.0.2/tests/test_timeout.py +80 -0
- lionscraper-1.0.2/tests/test_validate_tool_input.py +16 -0
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lionscraper
|
|
3
|
+
Version: 1.0.2
|
|
4
|
+
Summary: LionScraper bridge daemon, thin MCP stdio, and CLI — local HTTP + WebSocket to Chrome extension (Python)
|
|
5
|
+
Author: LionScraper
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: lionscraper,mcp,cli,web-scraping,chrome-extension
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
Requires-Dist: aiohttp>=3.9.0
|
|
18
|
+
Requires-Dist: httpx>=0.27.0
|
|
19
|
+
Requires-Dist: pydantic>=2.6.0
|
|
20
|
+
Requires-Dist: mcp>=1.8.0
|
|
21
|
+
Requires-Dist: websockets>=12.0
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
24
|
+
Requires-Dist: pytest-asyncio>=0.24.0; extra == "dev"
|
|
25
|
+
|
|
26
|
+
# LionScraper (Python)
|
|
27
|
+
|
|
28
|
+
Python edition of **LionScraper**: a local **HTTP + WebSocket** daemon that talks to the **Chrome/Edge LionScraper extension**, plus a **thin MCP server** over **stdio** that forwards tool calls to the daemon. Behavior is intended to match the npm/`packages/node` implementation in this repository.
|
|
29
|
+
|
|
30
|
+
## Requirements
|
|
31
|
+
|
|
32
|
+
- Python **3.10+**
|
|
33
|
+
- The LionScraper browser extension installed and configured to use the same **PORT** as the daemon (default **13808**).
|
|
34
|
+
|
|
35
|
+
## Install
|
|
36
|
+
|
|
37
|
+
From PyPI (when published):
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install lionscraper
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
From a checkout of this repo:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
cd packages/python
|
|
47
|
+
pip install -e ".[dev]"
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Commands
|
|
51
|
+
|
|
52
|
+
| Command | Role |
|
|
53
|
+
|--------|------|
|
|
54
|
+
| `lionscraper` | Full CLI (`daemon`, `stop`, `scrape`, `ping`, …). |
|
|
55
|
+
| `lionscraper-mcp` | If **no** extra arguments: thin MCP over stdio. **Any** extra argument delegates to the same CLI as `lionscraper`. |
|
|
56
|
+
|
|
57
|
+
Equivalent module entry:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
python -m lionscraper --help
|
|
61
|
+
python -m lionscraper daemon
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## MCP client configuration
|
|
65
|
+
|
|
66
|
+
Use **`lionscraper-mcp`** as the MCP server command (no arguments) so the host spawns thin MCP stdio:
|
|
67
|
+
|
|
68
|
+
```json
|
|
69
|
+
{
|
|
70
|
+
"mcpServers": {
|
|
71
|
+
"lionscraper": {
|
|
72
|
+
"command": "lionscraper-mcp",
|
|
73
|
+
"env": {
|
|
74
|
+
"PORT": "13808",
|
|
75
|
+
"LANG": "en-US"
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
To run the same routing as `lionscraper-mcp` via Python (stdio MCP when there are **no** extra arguments after the module name):
|
|
83
|
+
|
|
84
|
+
```json
|
|
85
|
+
"command": "python",
|
|
86
|
+
"args": ["-m", "lionscraper"]
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Thin stdio mode is selected only when `sys.argv` after the program entry has **length 0** (same as Node). Any extra token (including `--debug`) selects the CLI path instead.
|
|
90
|
+
|
|
91
|
+
## Environment variables
|
|
92
|
+
|
|
93
|
+
| Variable | Meaning |
|
|
94
|
+
|----------|---------|
|
|
95
|
+
| `PORT` | HTTP + WebSocket listen port (default **13808**). Must match the extension bridge port. |
|
|
96
|
+
| `TOKEN` | Optional bearer token for `Authorization` on loopback HTTP. |
|
|
97
|
+
| `DAEMON` | Set to `0` to disable auto-start of the daemon from CLI / thin MCP. |
|
|
98
|
+
| `TIMEOUT` | Default timeout hints (see root repo docs). |
|
|
99
|
+
| `LANG` | Locale for log and tool metadata (`en-US` / `zh-CN`). |
|
|
100
|
+
|
|
101
|
+
## PyPI release (maintainers)
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
cd packages/python
|
|
105
|
+
python -m pip install build twine
|
|
106
|
+
python -m build
|
|
107
|
+
python -m twine upload dist/*
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Project name on PyPI: **`lionscraper`** (single package providing both console scripts).
|
|
111
|
+
|
|
112
|
+
## Parity with Node
|
|
113
|
+
|
|
114
|
+
This package mirrors `packages/node`: bridge protocol, daemon HTTP API (`/v1/health`, `/v1/daemon/shutdown`, `/v1/tools/call` with optional NDJSON progress), port probing, and tool input validation (Pydantic vs Zod). Locale JSON is copied under `src/locale/` so the Python wheel does not depend on the Node tree.
|
|
115
|
+
|
|
116
|
+
## Development tests
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
cd packages/python
|
|
120
|
+
pytest
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Manual smoke test
|
|
124
|
+
|
|
125
|
+
1. Start the daemon: `lionscraper daemon` (or let CLI / `lionscraper-mcp` auto-spawn with default `DAEMON`).
|
|
126
|
+
2. Connect the extension to `ws://127.0.0.1:<PORT>` (same as HTTP).
|
|
127
|
+
3. `curl -s http://127.0.0.1:13808/v1/health` — expect JSON with `"ok": true` and `identity` **lionscraper** when ready.
|
|
128
|
+
4. Run `lionscraper-mcp` with no args under an MCP host and list tools — expect `ping`, `scrape`, `scrape_article`, etc.
|
|
129
|
+
|
|
130
|
+
Chinese documentation: [README_cn.md](README_cn.md).
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# LionScraper (Python)
|
|
2
|
+
|
|
3
|
+
Python edition of **LionScraper**: a local **HTTP + WebSocket** daemon that talks to the **Chrome/Edge LionScraper extension**, plus a **thin MCP server** over **stdio** that forwards tool calls to the daemon. Behavior is intended to match the npm/`packages/node` implementation in this repository.
|
|
4
|
+
|
|
5
|
+
## Requirements
|
|
6
|
+
|
|
7
|
+
- Python **3.10+**
|
|
8
|
+
- The LionScraper browser extension installed and configured to use the same **PORT** as the daemon (default **13808**).
|
|
9
|
+
|
|
10
|
+
## Install
|
|
11
|
+
|
|
12
|
+
From PyPI (when published):
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
pip install lionscraper
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
From a checkout of this repo:
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
cd packages/python
|
|
22
|
+
pip install -e ".[dev]"
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Commands
|
|
26
|
+
|
|
27
|
+
| Command | Role |
|
|
28
|
+
|--------|------|
|
|
29
|
+
| `lionscraper` | Full CLI (`daemon`, `stop`, `scrape`, `ping`, …). |
|
|
30
|
+
| `lionscraper-mcp` | If **no** extra arguments: thin MCP over stdio. **Any** extra argument delegates to the same CLI as `lionscraper`. |
|
|
31
|
+
|
|
32
|
+
Equivalent module entry:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
python -m lionscraper --help
|
|
36
|
+
python -m lionscraper daemon
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
## MCP client configuration
|
|
40
|
+
|
|
41
|
+
Use **`lionscraper-mcp`** as the MCP server command (no arguments) so the host spawns thin MCP stdio:
|
|
42
|
+
|
|
43
|
+
```json
|
|
44
|
+
{
|
|
45
|
+
"mcpServers": {
|
|
46
|
+
"lionscraper": {
|
|
47
|
+
"command": "lionscraper-mcp",
|
|
48
|
+
"env": {
|
|
49
|
+
"PORT": "13808",
|
|
50
|
+
"LANG": "en-US"
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
To run the same routing as `lionscraper-mcp` via Python (stdio MCP when there are **no** extra arguments after the module name):
|
|
58
|
+
|
|
59
|
+
```json
|
|
60
|
+
"command": "python",
|
|
61
|
+
"args": ["-m", "lionscraper"]
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
Thin stdio mode is selected only when `sys.argv` after the program entry has **length 0** (same as Node). Any extra token (including `--debug`) selects the CLI path instead.
|
|
65
|
+
|
|
66
|
+
## Environment variables
|
|
67
|
+
|
|
68
|
+
| Variable | Meaning |
|
|
69
|
+
|----------|---------|
|
|
70
|
+
| `PORT` | HTTP + WebSocket listen port (default **13808**). Must match the extension bridge port. |
|
|
71
|
+
| `TOKEN` | Optional bearer token for `Authorization` on loopback HTTP. |
|
|
72
|
+
| `DAEMON` | Set to `0` to disable auto-start of the daemon from CLI / thin MCP. |
|
|
73
|
+
| `TIMEOUT` | Default timeout hints (see root repo docs). |
|
|
74
|
+
| `LANG` | Locale for log and tool metadata (`en-US` / `zh-CN`). |
|
|
75
|
+
|
|
76
|
+
## PyPI release (maintainers)
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
cd packages/python
|
|
80
|
+
python -m pip install build twine
|
|
81
|
+
python -m build
|
|
82
|
+
python -m twine upload dist/*
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
Project name on PyPI: **`lionscraper`** (single package providing both console scripts).
|
|
86
|
+
|
|
87
|
+
## Parity with Node
|
|
88
|
+
|
|
89
|
+
This package mirrors `packages/node`: bridge protocol, daemon HTTP API (`/v1/health`, `/v1/daemon/shutdown`, `/v1/tools/call` with optional NDJSON progress), port probing, and tool input validation (Pydantic vs Zod). Locale JSON is copied under `src/locale/` so the Python wheel does not depend on the Node tree.
|
|
90
|
+
|
|
91
|
+
## Development tests
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
cd packages/python
|
|
95
|
+
pytest
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## Manual smoke test
|
|
99
|
+
|
|
100
|
+
1. Start the daemon: `lionscraper daemon` (or let CLI / `lionscraper-mcp` auto-spawn with default `DAEMON`).
|
|
101
|
+
2. Connect the extension to `ws://127.0.0.1:<PORT>` (same as HTTP).
|
|
102
|
+
3. `curl -s http://127.0.0.1:13808/v1/health` — expect JSON with `"ok": true` and `identity` **lionscraper** when ready.
|
|
103
|
+
4. Run `lionscraper-mcp` with no args under an MCP host and list tools — expect `ping`, `scrape`, `scrape_article`, etc.
|
|
104
|
+
|
|
105
|
+
Chinese documentation: [README_cn.md](README_cn.md).
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lionscraper
|
|
3
|
+
Version: 1.0.2
|
|
4
|
+
Summary: LionScraper bridge daemon, thin MCP stdio, and CLI — local HTTP + WebSocket to Chrome extension (Python)
|
|
5
|
+
Author: LionScraper
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: lionscraper,mcp,cli,web-scraping,chrome-extension
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
Requires-Dist: aiohttp>=3.9.0
|
|
18
|
+
Requires-Dist: httpx>=0.27.0
|
|
19
|
+
Requires-Dist: pydantic>=2.6.0
|
|
20
|
+
Requires-Dist: mcp>=1.8.0
|
|
21
|
+
Requires-Dist: websockets>=12.0
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
24
|
+
Requires-Dist: pytest-asyncio>=0.24.0; extra == "dev"
|
|
25
|
+
|
|
26
|
+
# LionScraper (Python)
|
|
27
|
+
|
|
28
|
+
Python edition of **LionScraper**: a local **HTTP + WebSocket** daemon that talks to the **Chrome/Edge LionScraper extension**, plus a **thin MCP server** over **stdio** that forwards tool calls to the daemon. Behavior is intended to match the npm/`packages/node` implementation in this repository.
|
|
29
|
+
|
|
30
|
+
## Requirements
|
|
31
|
+
|
|
32
|
+
- Python **3.10+**
|
|
33
|
+
- The LionScraper browser extension installed and configured to use the same **PORT** as the daemon (default **13808**).
|
|
34
|
+
|
|
35
|
+
## Install
|
|
36
|
+
|
|
37
|
+
From PyPI (when published):
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install lionscraper
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
From a checkout of this repo:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
cd packages/python
|
|
47
|
+
pip install -e ".[dev]"
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Commands
|
|
51
|
+
|
|
52
|
+
| Command | Role |
|
|
53
|
+
|--------|------|
|
|
54
|
+
| `lionscraper` | Full CLI (`daemon`, `stop`, `scrape`, `ping`, …). |
|
|
55
|
+
| `lionscraper-mcp` | If **no** extra arguments: thin MCP over stdio. **Any** extra argument delegates to the same CLI as `lionscraper`. |
|
|
56
|
+
|
|
57
|
+
Equivalent module entry:
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
python -m lionscraper --help
|
|
61
|
+
python -m lionscraper daemon
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## MCP client configuration
|
|
65
|
+
|
|
66
|
+
Use **`lionscraper-mcp`** as the MCP server command (no arguments) so the host spawns thin MCP stdio:
|
|
67
|
+
|
|
68
|
+
```json
|
|
69
|
+
{
|
|
70
|
+
"mcpServers": {
|
|
71
|
+
"lionscraper": {
|
|
72
|
+
"command": "lionscraper-mcp",
|
|
73
|
+
"env": {
|
|
74
|
+
"PORT": "13808",
|
|
75
|
+
"LANG": "en-US"
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
To run the same routing as `lionscraper-mcp` via Python (stdio MCP when there are **no** extra arguments after the module name):
|
|
83
|
+
|
|
84
|
+
```json
|
|
85
|
+
"command": "python",
|
|
86
|
+
"args": ["-m", "lionscraper"]
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
Thin stdio mode is selected only when `sys.argv` after the program entry has **length 0** (same as Node). Any extra token (including `--debug`) selects the CLI path instead.
|
|
90
|
+
|
|
91
|
+
## Environment variables
|
|
92
|
+
|
|
93
|
+
| Variable | Meaning |
|
|
94
|
+
|----------|---------|
|
|
95
|
+
| `PORT` | HTTP + WebSocket listen port (default **13808**). Must match the extension bridge port. |
|
|
96
|
+
| `TOKEN` | Optional bearer token for `Authorization` on loopback HTTP. |
|
|
97
|
+
| `DAEMON` | Set to `0` to disable auto-start of the daemon from CLI / thin MCP. |
|
|
98
|
+
| `TIMEOUT` | Default timeout hints (see root repo docs). |
|
|
99
|
+
| `LANG` | Locale for log and tool metadata (`en-US` / `zh-CN`). |
|
|
100
|
+
|
|
101
|
+
## PyPI release (maintainers)
|
|
102
|
+
|
|
103
|
+
```bash
|
|
104
|
+
cd packages/python
|
|
105
|
+
python -m pip install build twine
|
|
106
|
+
python -m build
|
|
107
|
+
python -m twine upload dist/*
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
Project name on PyPI: **`lionscraper`** (single package providing both console scripts).
|
|
111
|
+
|
|
112
|
+
## Parity with Node
|
|
113
|
+
|
|
114
|
+
This package mirrors `packages/node`: bridge protocol, daemon HTTP API (`/v1/health`, `/v1/daemon/shutdown`, `/v1/tools/call` with optional NDJSON progress), port probing, and tool input validation (Pydantic vs Zod). Locale JSON is copied under `src/locale/` so the Python wheel does not depend on the Node tree.
|
|
115
|
+
|
|
116
|
+
## Development tests
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
cd packages/python
|
|
120
|
+
pytest
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Manual smoke test
|
|
124
|
+
|
|
125
|
+
1. Start the daemon: `lionscraper daemon` (or let CLI / `lionscraper-mcp` auto-spawn with default `DAEMON`).
|
|
126
|
+
2. Connect the extension to `ws://127.0.0.1:<PORT>` (same as HTTP).
|
|
127
|
+
3. `curl -s http://127.0.0.1:13808/v1/health` — expect JSON with `"ok": true` and `identity` **lionscraper** when ready.
|
|
128
|
+
4. Run `lionscraper-mcp` with no args under an MCP host and list tools — expect `ping`, `scrape`, `scrape_article`, etc.
|
|
129
|
+
|
|
130
|
+
Chinese documentation: [README_cn.md](README_cn.md).
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
README.md
|
|
2
|
+
pyproject.toml
|
|
3
|
+
lionscraper.egg-info/PKG-INFO
|
|
4
|
+
lionscraper.egg-info/SOURCES.txt
|
|
5
|
+
lionscraper.egg-info/dependency_links.txt
|
|
6
|
+
lionscraper.egg-info/entry_points.txt
|
|
7
|
+
lionscraper.egg-info/requires.txt
|
|
8
|
+
lionscraper.egg-info/top_level.txt
|
|
9
|
+
src/__init__.py
|
|
10
|
+
src/__main__.py
|
|
11
|
+
src/version.py
|
|
12
|
+
src/bridge/__init__.py
|
|
13
|
+
src/bridge/protocol.py
|
|
14
|
+
src/bridge/session.py
|
|
15
|
+
src/bridge/timeout.py
|
|
16
|
+
src/bridge/websocket.py
|
|
17
|
+
src/cli/__init__.py
|
|
18
|
+
src/cli/build_tool_args.py
|
|
19
|
+
src/cli/entry.py
|
|
20
|
+
src/cli/mcp_bin_argv.py
|
|
21
|
+
src/cli/router.py
|
|
22
|
+
src/client/__init__.py
|
|
23
|
+
src/client/daemon_client.py
|
|
24
|
+
src/client/daemon_lifecycle.py
|
|
25
|
+
src/core/__init__.py
|
|
26
|
+
src/core/bridge_service.py
|
|
27
|
+
src/daemon/__init__.py
|
|
28
|
+
src/daemon/daemon_main.py
|
|
29
|
+
src/daemon/http_api.py
|
|
30
|
+
src/i18n/__init__.py
|
|
31
|
+
src/i18n/lang.py
|
|
32
|
+
src/locale/en-US.json
|
|
33
|
+
src/locale/zh-CN.json
|
|
34
|
+
src/mcp/__init__.py
|
|
35
|
+
src/mcp/handler.py
|
|
36
|
+
src/mcp/mcp_prompts.py
|
|
37
|
+
src/mcp/mcp_resources.py
|
|
38
|
+
src/mcp/mcp_stdio_app.py
|
|
39
|
+
src/mcp/thin_mcp_server.py
|
|
40
|
+
src/mcp/tools.py
|
|
41
|
+
src/mcp/validate_tool_input.py
|
|
42
|
+
src/types/__init__.py
|
|
43
|
+
src/types/bridge.py
|
|
44
|
+
src/types/errors.py
|
|
45
|
+
src/utils/__init__.py
|
|
46
|
+
src/utils/browser_env.py
|
|
47
|
+
src/utils/config.py
|
|
48
|
+
src/utils/daemon_config.py
|
|
49
|
+
src/utils/logger.py
|
|
50
|
+
src/utils/port.py
|
|
51
|
+
tests/test_cli_build_tool_args.py
|
|
52
|
+
tests/test_mcp_bin_argv.py
|
|
53
|
+
tests/test_port.py
|
|
54
|
+
tests/test_protocol.py
|
|
55
|
+
tests/test_timeout.py
|
|
56
|
+
tests/test_validate_tool_input.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
lionscraper
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=69", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "lionscraper"
|
|
7
|
+
version = "1.0.2"
|
|
8
|
+
description = "LionScraper bridge daemon, thin MCP stdio, and CLI — local HTTP + WebSocket to Chrome extension (Python)"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { text = "MIT" }
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [{ name = "LionScraper" }]
|
|
13
|
+
keywords = ["lionscraper", "mcp", "cli", "web-scraping", "chrome-extension"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 4 - Beta",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"License :: OSI Approved :: MIT License",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
]
|
|
23
|
+
dependencies = [
|
|
24
|
+
"aiohttp>=3.9.0",
|
|
25
|
+
"httpx>=0.27.0",
|
|
26
|
+
"pydantic>=2.6.0",
|
|
27
|
+
"mcp>=1.8.0",
|
|
28
|
+
"websockets>=12.0",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
[project.optional-dependencies]
|
|
32
|
+
dev = ["pytest>=8.0", "pytest-asyncio>=0.24.0"]
|
|
33
|
+
|
|
34
|
+
[project.scripts]
|
|
35
|
+
lionscraper = "lionscraper.cli.entry:main_cli"
|
|
36
|
+
"lionscraper-mcp" = "lionscraper.cli.entry:main_mcp_or_cli"
|
|
37
|
+
|
|
38
|
+
[tool.setuptools]
|
|
39
|
+
package-dir = { "lionscraper" = "src" }
|
|
40
|
+
packages = [
|
|
41
|
+
"lionscraper",
|
|
42
|
+
"lionscraper.i18n",
|
|
43
|
+
"lionscraper.utils",
|
|
44
|
+
"lionscraper.types",
|
|
45
|
+
"lionscraper.bridge",
|
|
46
|
+
"lionscraper.core",
|
|
47
|
+
"lionscraper.daemon",
|
|
48
|
+
"lionscraper.client",
|
|
49
|
+
"lionscraper.cli",
|
|
50
|
+
"lionscraper.mcp",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
[tool.setuptools.package-data]
|
|
54
|
+
lionscraper = ["locale/*.json"]
|
|
55
|
+
|
|
56
|
+
[tool.pytest.ini_options]
|
|
57
|
+
asyncio_mode = "auto"
|
|
58
|
+
testpaths = ["tests"]
|
|
File without changes
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import uuid
|
|
5
|
+
from typing import Any, Callable
|
|
6
|
+
|
|
7
|
+
from lionscraper.i18n.lang import SupportedLang, t
|
|
8
|
+
from lionscraper.types.bridge import BridgeMethod, BridgeProgressParams, BridgeRequest, BridgeResponse
|
|
9
|
+
from lionscraper.types.errors import BridgeErrorCode, LionScraperError, create_error
|
|
10
|
+
from lionscraper.utils.logger import logger
|
|
11
|
+
|
|
12
|
+
DEFAULT_TIMEOUT_MS = 60_000
|
|
13
|
+
|
|
14
|
+
BridgeProgressHandler = Callable[[BridgeProgressParams], None]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def create_bridge_request(method: BridgeMethod, params: dict[str, Any] | None = None) -> BridgeRequest:
|
|
18
|
+
req: BridgeRequest = {
|
|
19
|
+
"jsonrpc": "2.0",
|
|
20
|
+
"id": str(uuid.uuid4()),
|
|
21
|
+
"method": method,
|
|
22
|
+
}
|
|
23
|
+
if params is not None:
|
|
24
|
+
req["params"] = params
|
|
25
|
+
return req
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def create_bridge_response(req_id: str, result: Any) -> BridgeResponse:
|
|
29
|
+
return {"jsonrpc": "2.0", "id": req_id, "result": result}
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def create_bridge_error_response(
|
|
33
|
+
req_id: str,
|
|
34
|
+
code: int,
|
|
35
|
+
message: str,
|
|
36
|
+
data: dict[str, Any] | None = None,
|
|
37
|
+
) -> BridgeResponse:
|
|
38
|
+
err: dict[str, Any] = {"code": code, "message": message}
|
|
39
|
+
if data:
|
|
40
|
+
err["data"] = data
|
|
41
|
+
return {"jsonrpc": "2.0", "id": req_id, "error": err}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
DisconnectRejectId = str
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class PendingRequest:
|
|
48
|
+
__slots__ = ("id", "method", "fut", "timer_handle", "created_at", "lang", "on_progress")
|
|
49
|
+
|
|
50
|
+
def __init__(
|
|
51
|
+
self,
|
|
52
|
+
id: str,
|
|
53
|
+
method: str,
|
|
54
|
+
fut: asyncio.Future[Any],
|
|
55
|
+
timer_handle: asyncio.TimerHandle,
|
|
56
|
+
lang: SupportedLang,
|
|
57
|
+
on_progress: BridgeProgressHandler | None,
|
|
58
|
+
):
|
|
59
|
+
self.id = id
|
|
60
|
+
self.method = method
|
|
61
|
+
self.fut = fut
|
|
62
|
+
self.timer_handle = timer_handle
|
|
63
|
+
self.lang = lang
|
|
64
|
+
self.on_progress = on_progress
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class PendingRequestManager:
|
|
68
|
+
def __init__(self) -> None:
|
|
69
|
+
self._pending: dict[str, PendingRequest] = {}
|
|
70
|
+
|
|
71
|
+
def begin_request(
|
|
72
|
+
self,
|
|
73
|
+
req_id: str,
|
|
74
|
+
method: BridgeMethod,
|
|
75
|
+
timeout_ms: int = DEFAULT_TIMEOUT_MS,
|
|
76
|
+
lang: SupportedLang = "en-US",
|
|
77
|
+
on_progress: BridgeProgressHandler | None = None,
|
|
78
|
+
) -> asyncio.Future[Any]:
|
|
79
|
+
loop = asyncio.get_running_loop()
|
|
80
|
+
fut: asyncio.Future[Any] = loop.create_future()
|
|
81
|
+
|
|
82
|
+
def on_timeout() -> None:
|
|
83
|
+
self._pending.pop(req_id, None)
|
|
84
|
+
if not fut.done():
|
|
85
|
+
logger.warn(f"Bridge request timed out: {req_id} ({method})")
|
|
86
|
+
fut.set_exception(
|
|
87
|
+
create_error(
|
|
88
|
+
BridgeErrorCode.BRIDGE_TIMEOUT,
|
|
89
|
+
t(lang, "bridge_timeout", {"ms": timeout_ms}),
|
|
90
|
+
)
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
handle = loop.call_later(timeout_ms / 1000.0, on_timeout)
|
|
94
|
+
self._pending[req_id] = PendingRequest(req_id, method, fut, handle, lang, on_progress)
|
|
95
|
+
return fut
|
|
96
|
+
|
|
97
|
+
def dispatch_progress(self, request_id: str, params: BridgeProgressParams) -> bool:
|
|
98
|
+
pending = self._pending.get(request_id)
|
|
99
|
+
if not pending:
|
|
100
|
+
return False
|
|
101
|
+
if pending.on_progress:
|
|
102
|
+
try:
|
|
103
|
+
pending.on_progress(params)
|
|
104
|
+
except Exception:
|
|
105
|
+
pass
|
|
106
|
+
return True
|
|
107
|
+
|
|
108
|
+
def resolve(self, req_id: str, result: Any) -> bool:
|
|
109
|
+
pending = self._pending.pop(req_id, None)
|
|
110
|
+
if not pending:
|
|
111
|
+
return False
|
|
112
|
+
pending.timer_handle.cancel()
|
|
113
|
+
if not pending.fut.done():
|
|
114
|
+
pending.fut.set_result(result)
|
|
115
|
+
return True
|
|
116
|
+
|
|
117
|
+
def reject(self, req_id: str, error: LionScraperError) -> bool:
|
|
118
|
+
pending = self._pending.pop(req_id, None)
|
|
119
|
+
if not pending:
|
|
120
|
+
return False
|
|
121
|
+
pending.timer_handle.cancel()
|
|
122
|
+
if not pending.fut.done():
|
|
123
|
+
pending.fut.set_exception(error)
|
|
124
|
+
return True
|
|
125
|
+
|
|
126
|
+
def reject_all_disconnected(self, reason_key: str) -> None:
|
|
127
|
+
for req_id, pending in list(self._pending.items()):
|
|
128
|
+
pending.timer_handle.cancel()
|
|
129
|
+
if not pending.fut.done():
|
|
130
|
+
pending.fut.set_exception(
|
|
131
|
+
create_error(
|
|
132
|
+
BridgeErrorCode.BRIDGE_DISCONNECTED,
|
|
133
|
+
t(pending.lang, reason_key),
|
|
134
|
+
)
|
|
135
|
+
)
|
|
136
|
+
self._pending.pop(req_id, None)
|
|
137
|
+
|
|
138
|
+
def has(self, req_id: str) -> bool:
|
|
139
|
+
return req_id in self._pending
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def size(self) -> int:
|
|
143
|
+
return len(self._pending)
|
|
144
|
+
|
|
145
|
+
def clear(self) -> None:
|
|
146
|
+
for p in self._pending.values():
|
|
147
|
+
p.timer_handle.cancel()
|
|
148
|
+
self._pending.clear()
|