orcheems 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- orcheems-0.1.0/PKG-INFO +383 -0
- orcheems-0.1.0/README.pypi.md +359 -0
- orcheems-0.1.0/orcheems/__init__.py +22 -0
- orcheems-0.1.0/orcheems/browser.py +234 -0
- orcheems-0.1.0/orcheems/config.py +13 -0
- orcheems-0.1.0/orcheems/events.py +15 -0
- orcheems-0.1.0/orcheems/log.py +234 -0
- orcheems-0.1.0/orcheems/login/__init__.py +28 -0
- orcheems-0.1.0/orcheems/login/base.py +583 -0
- orcheems-0.1.0/orcheems/login/register.py +79 -0
- orcheems-0.1.0/orcheems/login/schema.py +86 -0
- orcheems-0.1.0/orcheems/operator.py +386 -0
- orcheems-0.1.0/orcheems/session/__init__.py +2 -0
- orcheems-0.1.0/orcheems/session/manager.py +373 -0
- orcheems-0.1.0/orcheems/session/schema.py +55 -0
- orcheems-0.1.0/orcheems/storage/__init__.py +3 -0
- orcheems-0.1.0/orcheems/storage/base.py +61 -0
- orcheems-0.1.0/orcheems/storage/local.py +97 -0
- orcheems-0.1.0/orcheems/storage/redis.py +200 -0
- orcheems-0.1.0/orcheems/task/__init__.py +2 -0
- orcheems-0.1.0/orcheems/task/base.py +210 -0
- orcheems-0.1.0/orcheems/task/decorators.py +61 -0
- orcheems-0.1.0/pyproject.toml +53 -0
orcheems-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,383 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: orcheems
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Centralized browser session orchestration framework built on Playwright and FastAPI
|
|
5
|
+
Project-URL: Homepage, https://github.com/yourname/orcheems
|
|
6
|
+
Project-URL: Repository, https://github.com/yourname/orcheems
|
|
7
|
+
License: MIT
|
|
8
|
+
Keywords: browser-automation,fastapi,playwright,session-management
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Framework :: FastAPI
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Topic :: Internet :: WWW/HTTP
|
|
15
|
+
Classifier: Topic :: Software Development :: Libraries :: Application Frameworks
|
|
16
|
+
Requires-Python: >=3.12
|
|
17
|
+
Requires-Dist: fastapi>=0.115.0
|
|
18
|
+
Requires-Dist: playwright>=1.40.0
|
|
19
|
+
Requires-Dist: pydantic>=2.0.0
|
|
20
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
21
|
+
Requires-Dist: redis>=5.0.0
|
|
22
|
+
Requires-Dist: uvicorn>=0.30.0
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
|
|
25
|
+
# Orcheems
|
|
26
|
+
|
|
27
|
+
**Centralized browser session orchestration for Playwright + FastAPI.**
|
|
28
|
+
|
|
29
|
+
Orcheems solves one problem and solves it well: managing shared browser sessions across concurrent tasks without login conflicts, resource leaks, or race conditions. Built for internal automation services that need to stay alive under load.
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
pip install orcheems
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
---
|
|
36
|
+
|
|
37
|
+
## Why Orcheems
|
|
38
|
+
|
|
39
|
+
Running multiple Playwright tasks against the same authenticated site is harder than it looks. Naive implementations either login on every request (slow, rate-limited) or share browser contexts between tasks (race conditions, session corruption). Orcheems sits in the middle: one session per credential, one task at a time per session, automatic cookie reuse, and a TTL watcher that cleans up idle contexts before they leak RAM.
|
|
40
|
+
|
|
41
|
+
The core idea: tasks declare what they want to do with a page. Orcheems handles everything else.
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## How it works
|
|
46
|
+
|
|
47
|
+
Three layers, each with a single responsibility:
|
|
48
|
+
|
|
49
|
+
```
|
|
50
|
+
BrowserManager — one shared Chromium process per worker
|
|
51
|
+
└── SessionManager — one context per credential, PENDING → READY ↔ LOCKED
|
|
52
|
+
└── BaseTask — where you write business logic, nothing else
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
**Session states:**
|
|
56
|
+
|
|
57
|
+
| State | Meaning |
|
|
58
|
+
|---|---|
|
|
59
|
+
| `PENDING` | Login in progress — slot reserved, all requests rejected |
|
|
60
|
+
| `READY` | Idle, available for the next task |
|
|
61
|
+
| `LOCKED` | Task running — no concurrent access allowed |
|
|
62
|
+
|
|
63
|
+
When a task calls `with_page()`, Orcheems logs in if needed, locks the session, runs your code, then releases the lock. If another request arrives while the session is LOCKED, it gets a `409` immediately — no queuing, no silent waiting, no corrupted state.
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## Installation
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install orcheems
|
|
71
|
+
|
|
72
|
+
# Install Playwright browsers after
|
|
73
|
+
playwright install chromium
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
**Requirements:** Python 3.12+
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## Quickstart
|
|
81
|
+
|
|
82
|
+
### 1. Implement a login service for your site
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
# app/sites/vnpt.py
|
|
86
|
+
from orcheems import BaseLoginService, SiteLoginServiceRegister
|
|
87
|
+
from orcheems.login.base import cookie_incomplete_handler
|
|
88
|
+
from playwright.async_api import BrowserContext, Page
|
|
89
|
+
|
|
90
|
+
@SiteLoginServiceRegister.register
|
|
91
|
+
class VNPTLoginService(BaseLoginService):
|
|
92
|
+
SITE = "vnpt"
|
|
93
|
+
|
|
94
|
+
async def _perform_login(
|
|
95
|
+
self,
|
|
96
|
+
page: Page,
|
|
97
|
+
context: BrowserContext,
|
|
98
|
+
credential,
|
|
99
|
+
) -> Page:
|
|
100
|
+
await page.goto(credential.base_url, wait_until="networkidle")
|
|
101
|
+
await page.fill("#UserName", credential.data["username"])
|
|
102
|
+
await page.fill("#Password", credential.data["password"])
|
|
103
|
+
await page.click("button[type='submit']")
|
|
104
|
+
return page
|
|
105
|
+
|
|
106
|
+
async def _is_session_valid(self, page: Page) -> bool:
|
|
107
|
+
try:
|
|
108
|
+
return await page.wait_for_selector("#logted", timeout=5000) is not None
|
|
109
|
+
except Exception:
|
|
110
|
+
return False
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Two methods to implement — that's it. `_perform_login` runs your login steps. `_is_session_valid` checks whether the resulting page is actually authenticated.
|
|
114
|
+
|
|
115
|
+
### 2. Write a task
|
|
116
|
+
|
|
117
|
+
```python
|
|
118
|
+
# app/tasks/invoice.py
|
|
119
|
+
from orcheems import BaseTask, Credential, task_registration
|
|
120
|
+
from fastapi import APIRouter
|
|
121
|
+
from pydantic import BaseModel
|
|
122
|
+
|
|
123
|
+
@task_registration(prefix="/vnpt", tags=["vnpt"])
|
|
124
|
+
class InvoiceDownloadTask(BaseTask):
|
|
125
|
+
|
|
126
|
+
def register_route(self, router: APIRouter):
|
|
127
|
+
|
|
128
|
+
class Body(BaseModel):
|
|
129
|
+
credential: Credential
|
|
130
|
+
invoice_id: str
|
|
131
|
+
|
|
132
|
+
@router.post("/download")
|
|
133
|
+
async def download(body: Body):
|
|
134
|
+
result = await self.with_page(
|
|
135
|
+
body.credential,
|
|
136
|
+
lambda page: self._fetch_invoice(page, body.invoice_id),
|
|
137
|
+
using_state=True, # try saved cookies first
|
|
138
|
+
ttl_seconds=120, # keep context alive for 2 min after task
|
|
139
|
+
)
|
|
140
|
+
return {"status": "ok", "data": result}
|
|
141
|
+
|
|
142
|
+
async def _fetch_invoice(self, page, invoice_id: str):
|
|
143
|
+
await page.goto(f"/invoices/{invoice_id}")
|
|
144
|
+
return await page.inner_text(".invoice-total")
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### 3. Add auto-discovery to each app package
|
|
148
|
+
|
|
149
|
+
`import app.sites` only runs `app/sites/__init__.py` — it does **not**
|
|
150
|
+
automatically import `vnpt.py`, `wfx.py`, or any other file inside the
|
|
151
|
+
package. Without auto-discovery, the decorators in those files never run
|
|
152
|
+
and both registries stay empty.
|
|
153
|
+
|
|
154
|
+
Add this to `app/sites/__init__.py` and `app/tasks/__init__.py`:
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
# app/sites/__init__.py (repeat identically for app/tasks/__init__.py)
|
|
158
|
+
import importlib
|
|
159
|
+
import pkgutil
|
|
160
|
+
from pathlib import Path
|
|
161
|
+
|
|
162
|
+
for _, module_name, _ in pkgutil.iter_modules([str(Path(__file__).parent)]):
|
|
163
|
+
importlib.import_module(f"{__name__}.{module_name}")
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
Now `import app.sites` triggers every `@SiteLoginServiceRegister.register`
|
|
167
|
+
in the package, and `import app.tasks` triggers every `@task_registration`.
|
|
168
|
+
Adding a new site or task is just adding a new file — no other changes needed.
|
|
169
|
+
|
|
170
|
+
### 4. Wire everything together
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
# main.py
|
|
174
|
+
import app.sites # triggers @SiteLoginServiceRegister.register
|
|
175
|
+
import app.tasks # triggers @task_registration(...)
|
|
176
|
+
|
|
177
|
+
from orcheems import Orcheemstrator
|
|
178
|
+
from orcheems.storage import RedisStateStorage
|
|
179
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
180
|
+
|
|
181
|
+
operator = Orcheemstrator(
|
|
182
|
+
state_storage=RedisStateStorage(), # or LocalStateStorage(".cookies")
|
|
183
|
+
)
|
|
184
|
+
app = operator.auto_register_and_build()
|
|
185
|
+
|
|
186
|
+
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
uvicorn main:app --reload
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
## Credential identity
|
|
196
|
+
|
|
197
|
+
Orcheems identifies accounts using **UUIDv5** derived deterministically from `(site, base_url, data)`:
|
|
198
|
+
|
|
199
|
+
```python
|
|
200
|
+
from orcheems import Credential
|
|
201
|
+
|
|
202
|
+
credential = Credential(
|
|
203
|
+
site = "vnpt",
|
|
204
|
+
base_url = "https://example-tt78.vnpt-invoice.com.vn/",
|
|
205
|
+
data = {"username": "admin", "password": "secret"},
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
print(credential.credential_id)
|
|
209
|
+
# → "3f2a1b4c-..." — always the same for the same input
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
Same credential object from any client always maps to the same session slot. No external ID management needed.
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
## Session lifecycle
|
|
217
|
+
|
|
218
|
+
### Cookie reuse (bypass login)
|
|
219
|
+
|
|
220
|
+
Pass `using_state=True` to attempt login via saved cookies before triggering a full browser login:
|
|
221
|
+
|
|
222
|
+
```python
|
|
223
|
+
result = await self.with_page(
|
|
224
|
+
credential,
|
|
225
|
+
lambda page: do_work(page),
|
|
226
|
+
using_state=True,
|
|
227
|
+
)
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
If the saved state is invalid, Orcheems falls back to full login automatically.
|
|
231
|
+
|
|
232
|
+
### Multi-step cookie recovery
|
|
233
|
+
|
|
234
|
+
Some sites require an extra step (OTP, captcha re-entry) when cookies are partially valid. Use `@cookie_incomplete_handler`:
|
|
235
|
+
|
|
236
|
+
```python
|
|
237
|
+
from orcheems.login.base import cookie_incomplete_handler
|
|
238
|
+
|
|
239
|
+
class MyLoginService(BaseLoginService):
|
|
240
|
+
SITE = "mysite"
|
|
241
|
+
|
|
242
|
+
@cookie_incomplete_handler
|
|
243
|
+
async def handle_otp(self, context, page, credential):
|
|
244
|
+
await page.fill("#otp", credential.data["otp"])
|
|
245
|
+
await page.click("#submit")
|
|
246
|
+
return page
|
|
247
|
+
|
|
248
|
+
async def _perform_login(self, page, context, credential) -> Page:
|
|
249
|
+
...
|
|
250
|
+
|
|
251
|
+
async def _is_session_valid(self, page) -> bool:
|
|
252
|
+
...
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
### Keep-alive and TTL
|
|
256
|
+
|
|
257
|
+
By default, the browser context is closed immediately after a task completes. Use `keep_alive` or `ttl_seconds` to hold it open for reuse:
|
|
258
|
+
|
|
259
|
+
```python
|
|
260
|
+
# Keep alive indefinitely until manually closed or server restart
|
|
261
|
+
await self.with_page(credential, work, keep_alive=True)
|
|
262
|
+
|
|
263
|
+
# Keep alive for 90 seconds, then auto-close
|
|
264
|
+
await self.with_page(credential, work, ttl_seconds=90)
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
The TTL watcher runs every 5 seconds in the background and only closes `READY` sessions — it never interrupts a running task.
|
|
268
|
+
|
|
269
|
+
---
|
|
270
|
+
|
|
271
|
+
## SSE streaming
|
|
272
|
+
|
|
273
|
+
For long-running tasks, use `with_page_stream()` to push progress events back to the client:
|
|
274
|
+
|
|
275
|
+
```python
|
|
276
|
+
@router.post("/crawl")
|
|
277
|
+
async def crawl(body: Body):
|
|
278
|
+
|
|
279
|
+
async def work(page, emit):
|
|
280
|
+
await emit("progress", {"step": "navigating"})
|
|
281
|
+
await page.goto("/data")
|
|
282
|
+
|
|
283
|
+
await emit("progress", {"step": "extracting"})
|
|
284
|
+
rows = await page.query_selector_all("tr")
|
|
285
|
+
|
|
286
|
+
return {"count": len(rows)}
|
|
287
|
+
|
|
288
|
+
return self.with_page_stream(body.credential, work, using_state=True)
|
|
289
|
+
```
|
|
290
|
+
|
|
291
|
+
Client receives a stream of newline-delimited JSON events:
|
|
292
|
+
|
|
293
|
+
```
|
|
294
|
+
data: {"type": "progress", "data": {"step": "navigating"}}
|
|
295
|
+
data: {"type": "progress", "data": {"step": "extracting"}}
|
|
296
|
+
data: {"type": "done", "data": {"count": 42}}
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
---
|
|
300
|
+
|
|
301
|
+
## Storage backends
|
|
302
|
+
|
|
303
|
+
```python
|
|
304
|
+
from orcheems.storage import LocalStateStorage, RedisStateStorage
|
|
305
|
+
|
|
306
|
+
# Local files — good for development
|
|
307
|
+
LocalStateStorage(".cookies") # layout: .cookies/{site}/{credential_id}.json
|
|
308
|
+
|
|
309
|
+
# Redis — recommended for production
|
|
310
|
+
RedisStateStorage() # reads REDIS_URL from environment
|
|
311
|
+
RedisStateStorage("redis://localhost:6379/0", ttl_seconds=10800)
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
Implement `BaseStateStorage` to add your own backend (S3, database, etc.).
|
|
315
|
+
|
|
316
|
+
---
|
|
317
|
+
|
|
318
|
+
## Management API
|
|
319
|
+
|
|
320
|
+
Orcheems mounts a built-in management router on every app:
|
|
321
|
+
|
|
322
|
+
| Method | Path | Description |
|
|
323
|
+
|---|---|---|
|
|
324
|
+
| `GET` | `/health` | Liveness check + storage status |
|
|
325
|
+
| `GET` | `/sessions` | List all active sessions |
|
|
326
|
+
| `POST` | `/sessions/status` | Check one session by Credential |
|
|
327
|
+
| `DELETE` | `/sessions/{credential_id}` | Force-close a READY session |
|
|
328
|
+
|
|
329
|
+
**Guard pattern** — call `/sessions/status` before sending a task request to detect conflicts cheaply, before any browser resource is allocated:
|
|
330
|
+
|
|
331
|
+
```bash
|
|
332
|
+
POST /sessions/status
|
|
333
|
+
{"site": "vnpt", "base_url": "https://...", "data": {...}}
|
|
334
|
+
|
|
335
|
+
# 200 → {"action": "proceed", "ready": true}
|
|
336
|
+
# 409 → {"action": "wait", "ready": false} # LOCKED or PENDING
|
|
337
|
+
# 404 → {"action": "login_required", "ready": false} # no session yet
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
---
|
|
341
|
+
|
|
342
|
+
## Manual registration
|
|
343
|
+
|
|
344
|
+
Auto-discovery via `auto_register_and_build()` is the recommended pattern, but you can register tasks manually:
|
|
345
|
+
|
|
346
|
+
```python
|
|
347
|
+
from orcheems import Orcheemstrator
|
|
348
|
+
from orcheems.storage import LocalStateStorage
|
|
349
|
+
from app.tasks.invoice import InvoiceDownloadTask
|
|
350
|
+
from app.tasks.stock import StockTask
|
|
351
|
+
|
|
352
|
+
app = (
|
|
353
|
+
Orcheemstrator(state_storage=LocalStateStorage(".cookies"))
|
|
354
|
+
.register_task(InvoiceDownloadTask(), prefix="/invoice", tags=["invoice"])
|
|
355
|
+
.register_task(StockTask(), prefix="/stock", tags=["stock"])
|
|
356
|
+
.build()
|
|
357
|
+
)
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
---
|
|
361
|
+
|
|
362
|
+
## Project layout
|
|
363
|
+
|
|
364
|
+
```
|
|
365
|
+
your-project/
|
|
366
|
+
├── orcheems/ # the framework — don't edit
|
|
367
|
+
├── app/
|
|
368
|
+
│ ├── sites/
|
|
369
|
+
│ │ ├── __init__.py # auto-discovers all site modules
|
|
370
|
+
│ │ ├── vnpt.py # @SiteLoginServiceRegister.register
|
|
371
|
+
│ │ └── wfx.py
|
|
372
|
+
│ └── tasks/
|
|
373
|
+
│ ├── __init__.py # auto-discovers all task modules
|
|
374
|
+
│ └── invoice.py # @task_registration(...)
|
|
375
|
+
├── main.py # entry point
|
|
376
|
+
└── pyproject.toml
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
---
|
|
380
|
+
|
|
381
|
+
## License
|
|
382
|
+
|
|
383
|
+
MIT
|