pwbase 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(python -m pytest:*)",
5
+ "Bash(uv run pytest:*)"
6
+ ]
7
+ }
8
+ }
@@ -0,0 +1,11 @@
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
11
+ settings.json
@@ -0,0 +1 @@
1
+ 3.12
pwbase-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Floyd
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
pwbase-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,266 @@
1
+ Metadata-Version: 2.4
2
+ Name: pwbase
3
+ Version: 0.1.0
4
+ Summary: A lightweight async Playwright wrapper for Python that supports three browser launch strategies and can intercept authenticated HTTP sessions from live browser traffic.
5
+ Project-URL: Homepage, https://github.com/virgotagle/pwbase
6
+ Project-URL: Repository, https://github.com/virgotagle/pwbase
7
+ Project-URL: Issues, https://github.com/virgotagle/pwbase/issues
8
+ Author-email: Floyd <pagarfloyd@gmail.com>
9
+ License: MIT License
10
+
11
+ Copyright (c) 2025 Floyd
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Keywords: automation,browser,cdp,http,playwright,scraping,stealth
32
+ Classifier: Development Status :: 3 - Alpha
33
+ Classifier: Intended Audience :: Developers
34
+ Classifier: License :: OSI Approved :: MIT License
35
+ Classifier: Programming Language :: Python :: 3
36
+ Classifier: Programming Language :: Python :: 3.12
37
+ Classifier: Programming Language :: Python :: 3.13
38
+ Classifier: Topic :: Internet :: WWW/HTTP :: Browsers
39
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
40
+ Requires-Python: >=3.12
41
+ Requires-Dist: playwright-stealth>=2.0.2
42
+ Requires-Dist: playwright>=1.58.0
43
+ Requires-Dist: python-dotenv>=1.2.1
44
+ Requires-Dist: requests>=2.32.5
45
+ Provides-Extra: dev
46
+ Requires-Dist: build>=1.4.0; extra == 'dev'
47
+ Requires-Dist: pytest-asyncio>=1.3.0; extra == 'dev'
48
+ Requires-Dist: pytest-mock>=3.15.1; extra == 'dev'
49
+ Requires-Dist: pytest>=9.0.2; extra == 'dev'
50
+ Requires-Dist: twine>=6.2.0; extra == 'dev'
51
+ Description-Content-Type: text/markdown
52
+
53
+ # pwbase
54
+
55
+ A lightweight async Playwright wrapper for Python that supports three browser launch strategies and can intercept authenticated HTTP sessions from live browser traffic.
56
+
57
+ ## Features
58
+
59
+ - Three browser modes: plain Playwright, stealth (bot-detection evasion), and CDP attachment
60
+ - Persistent browser state (cookies + localStorage) via `save_state` / `state_path`
61
+ - `BrowserSessionExtractor` — intercepts JSON responses and converts them into authenticated `requests.Session` objects
62
+ - Fully async, context-manager-friendly API
63
+
64
+ ## Requirements
65
+
66
+ - Python 3.12+
67
+ - [uv](https://github.com/astral-sh/uv) (recommended) or pip
68
+
69
+ ## Installation
70
+
71
+ ```bash
72
+ uv add pwbase
73
+ # or
74
+ pip install pwbase
75
+ ```
76
+
77
+ Install Playwright browsers after installing the package:
78
+
79
+ ```bash
80
+ playwright install chromium
81
+ ```
82
+
83
+ ## Quick Start
84
+
85
+ ```python
86
+ import asyncio
87
+ from pwbase import Browser, BrowserConfig, BrowserType
88
+
89
+ async def main():
90
+ async with Browser(BrowserConfig(type=BrowserType.STEALTH)) as browser:
91
+ page = await browser.get_page()
92
+ await page.goto("https://example.com")
93
+ print(await page.title())
94
+
95
+ asyncio.run(main())
96
+ ```
97
+
98
+ ## Browser Modes
99
+
100
+ | Mode | `BrowserType` | Description |
101
+ |---|---|---|
102
+ | Default | `DEFAULT` | Pure Playwright, no extras |
103
+ | Stealth | `STEALTH` | Applies `playwright-stealth` to reduce bot detection signals |
104
+ | CDP | `CDP` | Attaches to an existing Chrome instance via Chrome DevTools Protocol |
105
+
106
+ ### Default
107
+
108
+ ```python
109
+ Browser(BrowserConfig(type=BrowserType.DEFAULT))
110
+ ```
111
+
112
+ ### Stealth
113
+
114
+ ```python
115
+ Browser(BrowserConfig(type=BrowserType.STEALTH))
116
+ ```
117
+
118
+ ### CDP
119
+
120
+ Start Chrome with remote debugging enabled:
121
+
122
+ ```bash
123
+ google-chrome --remote-debugging-port=9222
124
+ ```
125
+
126
+ Then attach:
127
+
128
+ ```python
129
+ Browser(BrowserConfig(type=BrowserType.CDP, cdp_url="http://localhost:9222"))
130
+ ```
131
+
132
+ > **Note:** `headless`, `state_path`, `viewport`, and related options are ignored in CDP mode. `save_state()` is not available in CDP mode.
133
+
134
+ ## BrowserConfig Reference
135
+
136
+ ```python
137
+ @dataclass
138
+ class BrowserConfig:
139
+ type: BrowserType = BrowserType.DEFAULT
140
+ headless: bool = True
141
+ state_path: Path | None = None # Load/save cookies + localStorage
142
+ channel: str = "chrome" # Browser channel for STEALTH mode
143
+ cdp_url: str = "http://localhost:9222"
144
+ viewport: tuple[int, int] = (1920, 1080)
145
+ user_agent: str = "..." # Windows Chrome UA by default
146
+ locale: str = "en-US"
147
+ timezone: str = "America/New_York"
148
+ args: list[str] = [ # Extra Chromium flags
149
+ "--disable-blink-features=AutomationControlled",
150
+ "--no-sandbox",
151
+ ]
152
+ ```
153
+
154
+ ## Saving and Restoring Browser State
155
+
156
+ ```python
157
+ from pathlib import Path
158
+ from pwbase import Browser, BrowserConfig, BrowserType
159
+
160
+ config = BrowserConfig(
161
+ type=BrowserType.STEALTH,
162
+ state_path=Path("state.json"),
163
+ )
164
+
165
+ # First run — log in and save session
166
+ async with Browser(config) as browser:
167
+ page = await browser.get_page()
168
+ await page.goto("https://example.com/login")
169
+ # ... perform login ...
170
+ await browser.save_state()
171
+
172
+ # Subsequent runs — state is restored automatically
173
+ async with Browser(config) as browser:
174
+ page = await browser.get_page()
175
+ await page.goto("https://example.com/dashboard")
176
+ ```
177
+
178
+ ## Session Extraction
179
+
180
+ `BrowserSessionExtractor` extends `Browser` and intercepts JSON responses in real time. Use it to capture authenticated sessions without manually copying cookies or headers.
181
+
182
+ ```python
183
+ from pwbase import BrowserSessionExtractor, BrowserConfig, BrowserType
184
+
185
+ async with BrowserSessionExtractor(BrowserConfig(type=BrowserType.STEALTH)) as browser:
186
+ page = await browser.get_page()
187
+ await browser.start_recording(page)
188
+
189
+ await page.goto("https://example.com")
190
+ # Trigger the API call you want to capture, then:
191
+
192
+ response = browser.find_response("api/data")
193
+ if response:
194
+ session = browser.to_session(response)
195
+ r = session.get("https://example.com/api/data")
196
+ print(r.json())
197
+ ```
198
+
199
+ ### API
200
+
201
+ | Method | Description |
202
+ |---|---|
203
+ | `start_recording(page)` | Begin intercepting JSON responses on `page` |
204
+ | `stop_recording()` | Stop intercepting; safe to call if never started |
205
+ | `find_response(url_contains)` | Return the most recent captured response matching the substring |
206
+ | `find_all_responses(url_contains)` | Return all captured responses matching the substring |
207
+ | `wait_for_response(url_contains, timeout)` | Poll until a matching response is captured |
208
+ | `to_session(response)` | Build an authenticated `requests.Session` from a `CapturedResponse` |
209
+
210
+ ### CapturedResponse Fields
211
+
212
+ ```python
213
+ @dataclass
214
+ class CapturedResponse:
215
+ url: str
216
+ method: str
217
+ headers: dict[str, str] # Response headers
218
+ body: dict | list | None # Parsed JSON body
219
+ request_headers: dict[str, str] # Request headers (HTTP/2 pseudo-headers excluded from session)
220
+ request_post_data: str | None
221
+ cookies: list[Cookie]
222
+ ```
223
+
224
+ ## Manual Lifecycle
225
+
226
+ If you prefer not to use the context manager:
227
+
228
+ ```python
229
+ browser = Browser(BrowserConfig())
230
+ await browser.start()
231
+ page = await browser.get_page()
232
+ # ... do work ...
233
+ await browser.stop()
234
+ ```
235
+
236
+ ## Development
237
+
238
+ ```bash
239
+ # Install with dev dependencies
240
+ uv sync --group dev
241
+
242
+ # Run tests
243
+ uv run pytest
244
+
245
+ # Run tests with output
246
+ uv run pytest -v
247
+ ```
248
+
249
+ ### Project Structure
250
+
251
+ ```
252
+ src/pwbase/
253
+ ├── __init__.py # Public API surface
254
+ ├── browser.py # Browser — core async Playwright wrapper
255
+ ├── browser_config.py # BrowserConfig dataclass
256
+ ├── browser_type.py # BrowserType enum
257
+ └── browser_session_extractor.py # BrowserSessionExtractor + CapturedResponse
258
+ tests/
259
+ ├── conftest.py # Shared async mock fixtures
260
+ ├── test_browser.py # Unit tests for Browser (all three modes)
261
+ └── test_browser_session_extractor.py
262
+ ```
263
+
264
+ ## License
265
+
266
+ MIT
pwbase-0.1.0/README.md ADDED
@@ -0,0 +1,214 @@
1
+ # pwbase
2
+
3
+ A lightweight async Playwright wrapper for Python that supports three browser launch strategies and can intercept authenticated HTTP sessions from live browser traffic.
4
+
5
+ ## Features
6
+
7
+ - Three browser modes: plain Playwright, stealth (bot-detection evasion), and CDP attachment
8
+ - Persistent browser state (cookies + localStorage) via `save_state` / `state_path`
9
+ - `BrowserSessionExtractor` — intercepts JSON responses and converts them into authenticated `requests.Session` objects
10
+ - Fully async, context-manager-friendly API
11
+
12
+ ## Requirements
13
+
14
+ - Python 3.12+
15
+ - [uv](https://github.com/astral-sh/uv) (recommended) or pip
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ uv add pwbase
21
+ # or
22
+ pip install pwbase
23
+ ```
24
+
25
+ Install Playwright browsers after installing the package:
26
+
27
+ ```bash
28
+ playwright install chromium
29
+ ```
30
+
31
+ ## Quick Start
32
+
33
+ ```python
34
+ import asyncio
35
+ from pwbase import Browser, BrowserConfig, BrowserType
36
+
37
+ async def main():
38
+ async with Browser(BrowserConfig(type=BrowserType.STEALTH)) as browser:
39
+ page = await browser.get_page()
40
+ await page.goto("https://example.com")
41
+ print(await page.title())
42
+
43
+ asyncio.run(main())
44
+ ```
45
+
46
+ ## Browser Modes
47
+
48
+ | Mode | `BrowserType` | Description |
49
+ |---|---|---|
50
+ | Default | `DEFAULT` | Pure Playwright, no extras |
51
+ | Stealth | `STEALTH` | Applies `playwright-stealth` to reduce bot detection signals |
52
+ | CDP | `CDP` | Attaches to an existing Chrome instance via Chrome DevTools Protocol |
53
+
54
+ ### Default
55
+
56
+ ```python
57
+ Browser(BrowserConfig(type=BrowserType.DEFAULT))
58
+ ```
59
+
60
+ ### Stealth
61
+
62
+ ```python
63
+ Browser(BrowserConfig(type=BrowserType.STEALTH))
64
+ ```
65
+
66
+ ### CDP
67
+
68
+ Start Chrome with remote debugging enabled:
69
+
70
+ ```bash
71
+ google-chrome --remote-debugging-port=9222
72
+ ```
73
+
74
+ Then attach:
75
+
76
+ ```python
77
+ Browser(BrowserConfig(type=BrowserType.CDP, cdp_url="http://localhost:9222"))
78
+ ```
79
+
80
+ > **Note:** `headless`, `state_path`, `viewport`, and related options are ignored in CDP mode. `save_state()` is not available in CDP mode.
81
+
82
+ ## BrowserConfig Reference
83
+
84
+ ```python
85
+ @dataclass
86
+ class BrowserConfig:
87
+ type: BrowserType = BrowserType.DEFAULT
88
+ headless: bool = True
89
+ state_path: Path | None = None # Load/save cookies + localStorage
90
+ channel: str = "chrome" # Browser channel for STEALTH mode
91
+ cdp_url: str = "http://localhost:9222"
92
+ viewport: tuple[int, int] = (1920, 1080)
93
+ user_agent: str = "..." # Windows Chrome UA by default
94
+ locale: str = "en-US"
95
+ timezone: str = "America/New_York"
96
+ args: list[str] = [ # Extra Chromium flags
97
+ "--disable-blink-features=AutomationControlled",
98
+ "--no-sandbox",
99
+ ]
100
+ ```
101
+
102
+ ## Saving and Restoring Browser State
103
+
104
+ ```python
105
+ from pathlib import Path
106
+ from pwbase import Browser, BrowserConfig, BrowserType
107
+
108
+ config = BrowserConfig(
109
+ type=BrowserType.STEALTH,
110
+ state_path=Path("state.json"),
111
+ )
112
+
113
+ # First run — log in and save session
114
+ async with Browser(config) as browser:
115
+ page = await browser.get_page()
116
+ await page.goto("https://example.com/login")
117
+ # ... perform login ...
118
+ await browser.save_state()
119
+
120
+ # Subsequent runs — state is restored automatically
121
+ async with Browser(config) as browser:
122
+ page = await browser.get_page()
123
+ await page.goto("https://example.com/dashboard")
124
+ ```
125
+
126
+ ## Session Extraction
127
+
128
+ `BrowserSessionExtractor` extends `Browser` and intercepts JSON responses in real time. Use it to capture authenticated sessions without manually copying cookies or headers.
129
+
130
+ ```python
131
+ from pwbase import BrowserSessionExtractor, BrowserConfig, BrowserType
132
+
133
+ async with BrowserSessionExtractor(BrowserConfig(type=BrowserType.STEALTH)) as browser:
134
+ page = await browser.get_page()
135
+ await browser.start_recording(page)
136
+
137
+ await page.goto("https://example.com")
138
+ # Trigger the API call you want to capture, then:
139
+
140
+ response = browser.find_response("api/data")
141
+ if response:
142
+ session = browser.to_session(response)
143
+ r = session.get("https://example.com/api/data")
144
+ print(r.json())
145
+ ```
146
+
147
+ ### API
148
+
149
+ | Method | Description |
150
+ |---|---|
151
+ | `start_recording(page)` | Begin intercepting JSON responses on `page` |
152
+ | `stop_recording()` | Stop intercepting; safe to call if never started |
153
+ | `find_response(url_contains)` | Return the most recent captured response matching the substring |
154
+ | `find_all_responses(url_contains)` | Return all captured responses matching the substring |
155
+ | `wait_for_response(url_contains, timeout)` | Poll until a matching response is captured |
156
+ | `to_session(response)` | Build an authenticated `requests.Session` from a `CapturedResponse` |
157
+
158
+ ### CapturedResponse Fields
159
+
160
+ ```python
161
+ @dataclass
162
+ class CapturedResponse:
163
+ url: str
164
+ method: str
165
+ headers: dict[str, str] # Response headers
166
+ body: dict | list | None # Parsed JSON body
167
+ request_headers: dict[str, str] # Request headers (HTTP/2 pseudo-headers excluded from session)
168
+ request_post_data: str | None
169
+ cookies: list[Cookie]
170
+ ```
171
+
172
+ ## Manual Lifecycle
173
+
174
+ If you prefer not to use the context manager:
175
+
176
+ ```python
177
+ browser = Browser(BrowserConfig())
178
+ await browser.start()
179
+ page = await browser.get_page()
180
+ # ... do work ...
181
+ await browser.stop()
182
+ ```
183
+
184
+ ## Development
185
+
186
+ ```bash
187
+ # Install with dev dependencies
188
+ uv sync --group dev
189
+
190
+ # Run tests
191
+ uv run pytest
192
+
193
+ # Run tests with output
194
+ uv run pytest -v
195
+ ```
196
+
197
+ ### Project Structure
198
+
199
+ ```
200
+ src/pwbase/
201
+ ├── __init__.py # Public API surface
202
+ ├── browser.py # Browser — core async Playwright wrapper
203
+ ├── browser_config.py # BrowserConfig dataclass
204
+ ├── browser_type.py # BrowserType enum
205
+ └── browser_session_extractor.py # BrowserSessionExtractor + CapturedResponse
206
+ tests/
207
+ ├── conftest.py # Shared async mock fixtures
208
+ ├── test_browser.py # Unit tests for Browser (all three modes)
209
+ └── test_browser_session_extractor.py
210
+ ```
211
+
212
+ ## License
213
+
214
+ MIT
@@ -0,0 +1,57 @@
1
+ [project]
2
+ name = "pwbase"
3
+ version = "0.1.0"
4
+ description = "A lightweight async Playwright wrapper for Python that supports three browser launch strategies and can intercept authenticated HTTP sessions from live browser traffic."
5
+ readme = "README.md"
6
+ license = { file = "LICENSE" }
7
+ authors = [
8
+ { name = "Floyd", email = "pagarfloyd@gmail.com" }
9
+ ]
10
+ requires-python = ">=3.12"
11
+ keywords = ["playwright", "browser", "automation", "stealth", "cdp", "http", "scraping"]
12
+ classifiers = [
13
+ "Development Status :: 3 - Alpha",
14
+ "Intended Audience :: Developers",
15
+ "License :: OSI Approved :: MIT License",
16
+ "Programming Language :: Python :: 3",
17
+ "Programming Language :: Python :: 3.12",
18
+ "Programming Language :: Python :: 3.13",
19
+ "Topic :: Software Development :: Libraries :: Python Modules",
20
+ "Topic :: Internet :: WWW/HTTP :: Browsers",
21
+ ]
22
+ dependencies = [
23
+ "playwright>=1.58.0",
24
+ "playwright-stealth>=2.0.2",
25
+ "python-dotenv>=1.2.1",
26
+ "requests>=2.32.5",
27
+ ]
28
+
29
+ # Remove [project.scripts] entirely unless you have a real CLI entrypoint.
30
+ # If you do, make sure pwbase:main exists:
31
+ # src/pwbase/__main__.py or src/pwbase/__init__.py with a main() function.
32
+ #
33
+ # [project.scripts]
34
+ # pwbase = "pwbase:main"
35
+
36
+ [project.urls]
37
+ Homepage = "https://github.com/virgotagle/pwbase"
38
+ Repository = "https://github.com/virgotagle/pwbase"
39
+ Issues = "https://github.com/virgotagle/pwbase/issues"
40
+
41
+ [build-system]
42
+ requires = ["hatchling"]
43
+ build-backend = "hatchling.build"
44
+
45
+ [tool.hatch.build.targets.wheel]
46
+ packages = ["src/pwbase"] # adjust if your package is not under src/
47
+
48
+ # dev dependencies belong here, not in [dependency-groups]
49
+ # [dependency-groups] is a uv-specific key; twine/build don't read it
50
+ [project.optional-dependencies]
51
+ dev = [
52
+ "build>=1.4.0",
53
+ "pytest>=9.0.2",
54
+ "pytest-asyncio>=1.3.0",
55
+ "pytest-mock>=3.15.1",
56
+ "twine>=6.2.0",
57
+ ]
@@ -0,0 +1,14 @@
1
+ """pwbase — Playwright browser toolkit."""
2
+
3
+ from .browser import Browser
4
+ from .browser_config import BrowserConfig
5
+ from .browser_session_extractor import BrowserSessionExtractor, CapturedResponse
6
+ from .browser_type import BrowserType
7
+
8
+ __all__ = [
9
+ "Browser",
10
+ "BrowserConfig",
11
+ "BrowserType",
12
+ "BrowserSessionExtractor",
13
+ "CapturedResponse",
14
+ ]