lightpanda-py 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: lightpanda-py
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python wrapper for Lightpanda browser with CDP server and Playwright integration for lightweight web scraping and automation
|
|
5
|
+
Author-email: Tom Clesius <tomclesius@gmail.com>
|
|
6
|
+
Requires-Python: >=3.13
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
|
|
9
|
+
# lightpanda-py
|
|
10
|
+
|
|
11
|
+
Python bindings for [Lightpanda](https://github.com/lightpanda-io/browser), a fast headless browser for AI agents and web automation.
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install lightpanda-py
|
|
17
|
+
# or
|
|
18
|
+
uv add lightpanda-py
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
|
|
23
|
+
### Quick fetch
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
import lightpanda
|
|
27
|
+
|
|
28
|
+
response = lightpanda.fetch("https://example.com")
|
|
29
|
+
print(response.text)
|
|
30
|
+
|
|
31
|
+
# JSON APIs
|
|
32
|
+
response = lightpanda.fetch("https://httpbin.org/ip")
|
|
33
|
+
data = response.json()
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
### CDP Server
|
|
37
|
+
|
|
38
|
+
Start a CDP server to use with Playwright, Puppeteer, or any CDP client:
|
|
39
|
+
|
|
40
|
+
```python
|
|
41
|
+
import lightpanda
|
|
42
|
+
|
|
43
|
+
proc = lightpanda.serve(host="127.0.0.1", port=9222)
|
|
44
|
+
# 🐼 Running Lightpanda's CDP server... { pid: 12345 }
|
|
45
|
+
|
|
46
|
+
# Connect with your favorite CDP client...
|
|
47
|
+
|
|
48
|
+
proc.kill()
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
With Playwright:
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
import lightpanda
|
|
55
|
+
from playwright.sync_api import sync_playwright
|
|
56
|
+
|
|
57
|
+
proc = lightpanda.serve()
|
|
58
|
+
|
|
59
|
+
with sync_playwright() as p:
|
|
60
|
+
browser = p.chromium.connect_over_cdp("http://127.0.0.1:9222")
|
|
61
|
+
page = browser.new_page()
|
|
62
|
+
page.goto("https://example.com")
|
|
63
|
+
print(page.content())
|
|
64
|
+
browser.close()
|
|
65
|
+
|
|
66
|
+
proc.kill()
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## License
|
|
70
|
+
|
|
71
|
+
MIT
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
# lightpanda-py
|
|
2
|
+
|
|
3
|
+
Python bindings for [Lightpanda](https://github.com/lightpanda-io/browser), a fast headless browser for AI agents and web automation.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install lightpanda-py
|
|
9
|
+
# or
|
|
10
|
+
uv add lightpanda-py
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Usage
|
|
14
|
+
|
|
15
|
+
### Quick fetch
|
|
16
|
+
|
|
17
|
+
```python
|
|
18
|
+
import lightpanda
|
|
19
|
+
|
|
20
|
+
response = lightpanda.fetch("https://example.com")
|
|
21
|
+
print(response.text)
|
|
22
|
+
|
|
23
|
+
# JSON APIs
|
|
24
|
+
response = lightpanda.fetch("https://httpbin.org/ip")
|
|
25
|
+
data = response.json()
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
### CDP Server
|
|
29
|
+
|
|
30
|
+
Start a CDP server to use with Playwright, Puppeteer, or any CDP client:
|
|
31
|
+
|
|
32
|
+
```python
|
|
33
|
+
import lightpanda
|
|
34
|
+
|
|
35
|
+
proc = lightpanda.serve(host="127.0.0.1", port=9222)
|
|
36
|
+
# 🐼 Running Lightpanda's CDP server... { pid: 12345 }
|
|
37
|
+
|
|
38
|
+
# Connect with your favorite CDP client...
|
|
39
|
+
|
|
40
|
+
proc.kill()
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
With Playwright:
|
|
44
|
+
|
|
45
|
+
```python
|
|
46
|
+
import lightpanda
|
|
47
|
+
from playwright.sync_api import sync_playwright
|
|
48
|
+
|
|
49
|
+
proc = lightpanda.serve()
|
|
50
|
+
|
|
51
|
+
with sync_playwright() as p:
|
|
52
|
+
browser = p.chromium.connect_over_cdp("http://127.0.0.1:9222")
|
|
53
|
+
page = browser.new_page()
|
|
54
|
+
page.goto("https://example.com")
|
|
55
|
+
print(page.content())
|
|
56
|
+
browser.close()
|
|
57
|
+
|
|
58
|
+
proc.kill()
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## License
|
|
62
|
+
|
|
63
|
+
MIT
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import platform
|
|
3
|
+
import urllib.request
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from hatchling.builders.hooks.plugin.interface import BuildHookInterface
|
|
6
|
+
|
|
7
|
+
GITHUB_RELEASES = "https://api.github.com/repos/lightpanda-io/browser/releases"
|
|
8
|
+
RELEASE_TAG = "nightly"
|
|
9
|
+
|
|
10
|
+
PLATFORMS = {
|
|
11
|
+
"Darwin": "macos",
|
|
12
|
+
"Linux": "linux",
|
|
13
|
+
}
|
|
14
|
+
ARCHITECTURES = {
|
|
15
|
+
"arm64": "aarch64",
|
|
16
|
+
"aarch64": "aarch64",
|
|
17
|
+
"x86_64": "x86_64",
|
|
18
|
+
"AMD64": "x86_64",
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class BinaryDownloadHook(BuildHookInterface):
|
|
23
|
+
PLUGIN_NAME = "custom"
|
|
24
|
+
|
|
25
|
+
def initialize(self, version: str, build_data: dict) -> None:
|
|
26
|
+
os_name, arch = self._detect_platform()
|
|
27
|
+
bin_dir = Path(self.root) / "src" / "lightpanda" / "bin"
|
|
28
|
+
bin_dir.mkdir(parents=True, exist_ok=True)
|
|
29
|
+
binary_path = bin_dir / f"lightpanda-{arch}-{os_name}"
|
|
30
|
+
|
|
31
|
+
if not binary_path.is_file() or binary_path.stat().st_size == 0:
|
|
32
|
+
url = self._find_release_url(os_name, arch)
|
|
33
|
+
self._download_file(url, binary_path)
|
|
34
|
+
|
|
35
|
+
def _detect_platform(self) -> tuple[str, str]:
|
|
36
|
+
system, machine = platform.system(), platform.machine()
|
|
37
|
+
if system not in PLATFORMS:
|
|
38
|
+
raise RuntimeError(f"Unsupported OS: {system}")
|
|
39
|
+
if machine not in ARCHITECTURES:
|
|
40
|
+
raise RuntimeError(f"Unsupported arch: {machine}")
|
|
41
|
+
return PLATFORMS[system], ARCHITECTURES[machine]
|
|
42
|
+
|
|
43
|
+
def _find_release_url(self, os_name: str, arch: str) -> str:
|
|
44
|
+
binary_name = f"lightpanda-{arch}-{os_name}"
|
|
45
|
+
request = urllib.request.Request(GITHUB_RELEASES)
|
|
46
|
+
request.add_header("Accept", "application/vnd.github.v3+json")
|
|
47
|
+
with urllib.request.urlopen(request, timeout=30) as response:
|
|
48
|
+
releases = json.loads(response.read().decode())
|
|
49
|
+
|
|
50
|
+
for r in releases:
|
|
51
|
+
if r.get("tag_name") == RELEASE_TAG:
|
|
52
|
+
for asset in r.get("assets", []):
|
|
53
|
+
if asset.get("name") == binary_name:
|
|
54
|
+
return asset["browser_download_url"]
|
|
55
|
+
raise RuntimeError(f"Binary '{binary_name}' not found in release")
|
|
56
|
+
raise RuntimeError(f"Release '{RELEASE_TAG}' not found")
|
|
57
|
+
|
|
58
|
+
def _download_file(self, url: str, dest: Path) -> None:
|
|
59
|
+
with urllib.request.urlopen(url, timeout=300) as response:
|
|
60
|
+
dest.write_bytes(response.read())
|
|
61
|
+
dest.chmod(0o755)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "lightpanda-py"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Python wrapper for Lightpanda browser with CDP server and Playwright integration for lightweight web scraping and automation"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [{ name = "Tom Clesius", email = "tomclesius@gmail.com" }]
|
|
7
|
+
requires-python = ">=3.13"
|
|
8
|
+
dependencies = []
|
|
9
|
+
|
|
10
|
+
[build-system]
|
|
11
|
+
requires = ["hatchling>=1.21.0"]
|
|
12
|
+
build-backend = "hatchling.build"
|
|
13
|
+
|
|
14
|
+
[tool.hatch.build.hooks.custom]
|
|
15
|
+
|
|
16
|
+
[tool.hatch.build.targets.sdist]
|
|
17
|
+
only-include = ["src/lightpanda", "hatch_build.py", "README.md"]
|
|
18
|
+
|
|
19
|
+
[tool.hatch.build.targets.wheel]
|
|
20
|
+
packages = ["src/lightpanda"]
|
|
21
|
+
artifacts = ["src/lightpanda/bin/lightpanda-*"]
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import subprocess
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Literal
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
os.environ.setdefault("LIGHTPANDA_DISABLE_TELEMETRY", "true")
|
|
8
|
+
|
|
9
|
+
BINARY = str(list((Path(__file__).parent / "bin").glob("lightpanda-*"))[0])
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def build_cmd(*args, **options):
|
|
13
|
+
cmd = [BINARY, *args]
|
|
14
|
+
for key, value in options.items():
|
|
15
|
+
if value is None:
|
|
16
|
+
continue
|
|
17
|
+
elif value is True:
|
|
18
|
+
cmd.append(f"--{key}")
|
|
19
|
+
elif value is not False:
|
|
20
|
+
cmd.extend([f"--{key}", str(value)])
|
|
21
|
+
return cmd
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class Response:
|
|
25
|
+
def __init__(self, html: str) -> None:
|
|
26
|
+
self.html = html
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def text(self):
|
|
30
|
+
return self.html
|
|
31
|
+
|
|
32
|
+
def json(self):
|
|
33
|
+
return json.loads(self.html.split("<pre>")[1].split("</pre>")[0])
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def serve(
|
|
37
|
+
host: str = "127.0.0.1",
|
|
38
|
+
port: int = 9222,
|
|
39
|
+
timeout: int | None = 10,
|
|
40
|
+
log_level: Literal["debug", "info", "warning", "error"] = "error",
|
|
41
|
+
http_proxy: str | None = None,
|
|
42
|
+
http_timeout: int | None = None,
|
|
43
|
+
) -> subprocess.Popen:
|
|
44
|
+
"""
|
|
45
|
+
Start Lightpanda browser process with CDP server.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
host: Host to bind the CDP server to (default: "127.0.0.1")
|
|
49
|
+
port: Port to bind the CDP server to (default: 9222)
|
|
50
|
+
timeout: Connection timeout in seconds (default: 10)
|
|
51
|
+
log_level: Logging level (default: "error")
|
|
52
|
+
http_proxy: HTTP proxy URL (optional)
|
|
53
|
+
http_timeout: HTTP request timeout in seconds (optional)
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
subprocess.Popen: The process object for the Lightpanda browser
|
|
57
|
+
|
|
58
|
+
Example:
|
|
59
|
+
>>> proc = lightpanda.serve(host='127.0.0.1', port=9222)
|
|
60
|
+
>>> # Do your magic ✨
|
|
61
|
+
>>> proc.kill()
|
|
62
|
+
"""
|
|
63
|
+
cmd = build_cmd(
|
|
64
|
+
"serve",
|
|
65
|
+
host=host,
|
|
66
|
+
port=port,
|
|
67
|
+
timeout=timeout,
|
|
68
|
+
log_level=log_level,
|
|
69
|
+
http_proxy=http_proxy,
|
|
70
|
+
http_timeout=http_timeout,
|
|
71
|
+
)
|
|
72
|
+
proc = subprocess.Popen(cmd)
|
|
73
|
+
print(f"🐼 Running Lightpanda's CDP server... {{ pid: {proc.pid} }}")
|
|
74
|
+
return proc
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def fetch(
|
|
78
|
+
url: str,
|
|
79
|
+
*,
|
|
80
|
+
with_base: bool = False,
|
|
81
|
+
log_level: Literal["debug", "info", "warning", "error"] = "error",
|
|
82
|
+
http_proxy: str | None = None,
|
|
83
|
+
http_timeout: int | None = None,
|
|
84
|
+
) -> Response:
|
|
85
|
+
"""
|
|
86
|
+
Fetch a page with an ephemeral Lightpanda browser.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
url: The url to fetch
|
|
90
|
+
log_level: Logging level (default: "error")
|
|
91
|
+
http_proxy: HTTP proxy URL (optional)
|
|
92
|
+
http_timeout: HTTP request timeout in seconds (optional)
|
|
93
|
+
|
|
94
|
+
Returns:
|
|
95
|
+
Response: A convenience wrapper around the text response
|
|
96
|
+
|
|
97
|
+
Example:
|
|
98
|
+
>>> response = lightpanda.fetch("http://example.com")
|
|
99
|
+
>>> json = response.json()
|
|
100
|
+
"""
|
|
101
|
+
cmd = build_cmd(
|
|
102
|
+
"fetch",
|
|
103
|
+
url,
|
|
104
|
+
dump=True,
|
|
105
|
+
with_base=with_base,
|
|
106
|
+
log_level=log_level,
|
|
107
|
+
http_proxy=http_proxy,
|
|
108
|
+
http_timeout=http_timeout,
|
|
109
|
+
)
|
|
110
|
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
111
|
+
if result.returncode != 0:
|
|
112
|
+
raise RuntimeError(f"lightpanda failed: {result.stderr}")
|
|
113
|
+
return Response(result.stdout)
|