orcheems 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
orcheems/__init__.py ADDED
@@ -0,0 +1,22 @@
1
+ from .operator import Orcheemstrator
2
+ from .task.base import BaseTask
3
+ from .task.decorators import task_registration
4
+ from .login.schema import Credential, LoginResult
5
+ from .login.base import BaseLoginService, cookie_incomplete_handler
6
+ from .login.register import SiteLoginServiceRegister
7
+ from .session.manager import SessionManager
8
+ from .session.schema import SessionStatus, SessionResources
9
+
10
+ __all__ = [
11
+ "Orcheemstrator",
12
+ "BaseTask",
13
+ "task_registration",
14
+ "Credential",
15
+ "LoginResult",
16
+ "BaseLoginService",
17
+ "cookie_incomplete_handler",
18
+ "SiteLoginServiceRegister",
19
+ "SessionManager",
20
+ "SessionStatus",
21
+ "SessionResources",
22
+ ]
orcheems/browser.py ADDED
@@ -0,0 +1,234 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from dotenv import load_dotenv
5
+ from playwright.async_api import async_playwright, Browser, BrowserContext, Playwright
6
+ from typing import Any, Optional, Set
7
+
8
+ from .config import BROWSER
9
+
10
+ load_dotenv()
11
+
12
+
13
+ class BrowserManager:
14
+ """
15
+ Managing the Playwright browser lifecycle.
16
+
17
+ Design Principles:
18
+ - Browser can be shared within a worker/process to save resources.
19
+ - BrowserContext is NOT shared between users/tasks/sessiones.
20
+ - Pages are NOT shared between users/tasks/sessions.
21
+ - A new context should be created using `new_context()` each time a task runs.
22
+ - If the old session needs to be loaded, pass `storage_state`.
23
+ - After the task is complete, save the storage_state if necessary and close the context.
24
+
25
+ Recommendations:
26
+ BrowserManager
27
+ └── Browser singleton for worker
28
+ ├── BrowserContext separated for task A
29
+ ├── BrowserContext separated for task B
30
+ └── BrowserContext separated for task C
31
+ """
32
+
33
+ def __init__(self, max_concurrent_contexts: int = 50):
34
+ self._browser_cfg: dict[str, Any] = BROWSER
35
+
36
+ # Playwright runtime
37
+ # Only start 1 time on BrowserManager lifecycle
38
+ self._playwright: Optional[Playwright] = None
39
+
40
+ # Browser Instance
41
+ self._browser: Optional[Browser] = None
42
+
43
+ # track active contexts for cleanup
44
+ self._contexts: Set[BrowserContext] = set()
45
+ self._max_contexts = max_concurrent_contexts
46
+
47
+ @property
48
+ def is_dev_env(self) -> bool:
49
+ return os.getenv("APP_ENV", "DEV") == "DEV"
50
+
51
+ @property
52
+ def is_started(self):
53
+ """
54
+ Check if Instance is already started or not.
55
+ """
56
+ return self._browser is not None and self._browser.is_connected()
57
+
58
+ @property
59
+ def browser(self) -> Browser:
60
+ """
61
+ return current browser
62
+
63
+ Only property, no setter, to ensure the lifecycle is managed by BrowserManager.
64
+ """
65
+
66
+ if not self._browser or not self._browser.is_connected():
67
+ raise RuntimeError(
68
+ "Browser has not been started. Call `await browser_manager.start()` first."
69
+ )
70
+
71
+ return self._browser
72
+
73
+ async def start(self):
74
+ """
75
+ Start Playwright and launch the browser.
76
+
77
+ If browser is already started, do nothing. Avoid calling start() multiple times.
78
+ """
79
+ if self.is_started:
80
+ return
81
+
82
+ try:
83
+ if self._playwright is None:
84
+ self._playwright = await async_playwright().start()
85
+
86
+ self._browser = await self._launch_browser()
87
+ except Exception as e:
88
+ raise RuntimeError(f"Failed to start browser: {e}") from e
89
+
90
+ async def close(self):
91
+ """
92
+ Close browser and stop Playwright runtime.
93
+
94
+ Should be called when shutdown worker/app.
95
+ Not necessary to call after each task, as browser can be reused.
96
+ """
97
+ # Close all active contexts first
98
+ for context in list(self._contexts):
99
+ await self.close_context(context)
100
+
101
+ self._contexts.clear()
102
+
103
+ # Then close browser
104
+ if self._browser is not None:
105
+ try:
106
+ await self._browser.close()
107
+ except Exception as e:
108
+ print(f"Warning: Error closing browser: {e}")
109
+ finally:
110
+ self._browser = None
111
+
112
+ # Finally stop playwright
113
+ if self._playwright is not None:
114
+ try:
115
+ await self._playwright.stop()
116
+ except Exception as e:
117
+ print(f"Warning: Error stopping playwright: {e}")
118
+ finally:
119
+ self._playwright = None
120
+
121
+ async def __aenter__(self):
122
+ """
123
+ Allow using:
124
+
125
+ async with BrowserManager() as browser_manager:
126
+ ...
127
+
128
+ When entering the block -> start the browser.
129
+ """
130
+ await self.start()
131
+ return self
132
+
133
+ async def __aexit__(self, *args: object):
134
+ """
135
+ When exiting the block -> close the browser.
136
+ """
137
+ await self.close()
138
+
139
+ async def _launch_browser(self) -> Browser:
140
+ """
141
+ Launch Chromium browser.
142
+
143
+ Note:
144
+ - Shared worker/process
145
+ - Not shared cookie/session
146
+ - Each session will create/use a BrowserContext, which is separated from each other.
147
+ """
148
+ if self._playwright is None:
149
+ raise RuntimeError("Playwright is not started. Call `await browser_manager.start()` first.")
150
+
151
+ launch_args = self._browser_cfg.get("launch_args", [])
152
+
153
+ # If HEADLESS is set in config, use it.
154
+ # If not, DEV will show browser (headless=False), other environments run headless.
155
+ headless = self._browser_cfg.get("HEADLESS")
156
+ if headless is None:
157
+ headless = not self.is_dev_env
158
+
159
+ try:
160
+ return await self._playwright.chromium.launch(
161
+ headless=headless,
162
+ args=launch_args,
163
+ )
164
+ except Exception as e:
165
+ raise RuntimeError(f"Failed to launch browser: {e}") from e
166
+
167
+ async def new_context(self, **kwargs: Any) -> BrowserContext:
168
+ """
169
+ Create a new BrowserContext.
170
+
171
+ This is the most important point for session isolation.
172
+
173
+ Each new context will have its own environment:
174
+ - its own cookie
175
+ - its own localStorage
176
+ - its own sessionStorage
177
+ - its own permissions
178
+ - its own context-specific cache
179
+ - its own pages
180
+
181
+ If you want to load a saved session, pass:
182
+
183
+ context = await browser_manager.new_context(
184
+ storage_state="sessions/vnpt/user_001.json"
185
+ )
186
+
187
+ Do not share a context between multiple tasks/users.
188
+ """
189
+ if not self.is_started:
190
+ await self.start()
191
+
192
+ # Check concurrency limit
193
+ if len(self._contexts) >= self._max_contexts:
194
+ raise RuntimeError(
195
+ f"Too many concurrent contexts: {len(self._contexts)}/{self._max_contexts}"
196
+ )
197
+
198
+ # Default params for browser context
199
+ defaults: dict[str, Any] = {
200
+ "viewport": {"width": 1280, "height": 800},
201
+ "ignore_https_errors": True,
202
+ "accept_downloads": True,
203
+ }
204
+
205
+ user_agent = self._browser_cfg.get("USER_AGENT")
206
+ if user_agent:
207
+ defaults["user_agent"] = user_agent
208
+
209
+ context_options = {**defaults, **kwargs}
210
+
211
+ try:
212
+ context = await self.browser.new_context(**context_options)
213
+ self._contexts.add(context)
214
+ return context
215
+ except Exception as e:
216
+ raise RuntimeError(f"Failed to create new context: {e}") from e
217
+
218
+ async def close_context(self, context: BrowserContext):
219
+ """
220
+ Close a specific BrowserContext and remove it from tracking.
221
+
222
+ Should be called immediately after a task/session is complete
223
+ to free up concurrent slots and memory.
224
+ """
225
+
226
+ if not context:
227
+ return
228
+
229
+ try:
230
+ await context.close()
231
+ except Exception as e:
232
+ print(f"Warning: Error closing context: {e}")
233
+ finally:
234
+ self._contexts.discard(context)
orcheems/config.py ADDED
@@ -0,0 +1,13 @@
1
+ # App enviroment to run with browser or s.t else
2
+ APP_ENV = "PROD"
3
+
4
+ BROWSER = {
5
+ "USER_AGENT": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
6
+ "LAUNCH_ARGS": [
7
+ "--no-sandbox",
8
+ "--disable-setuid-sandbox",
9
+ "--disable-dev-shm-usage",
10
+ "--disable-blink-features=AutomationControlled",
11
+ "--disable-gpu",
12
+ ]
13
+ }
orcheems/events.py ADDED
@@ -0,0 +1,15 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass
5
+ from typing import Any
6
+
7
+
8
+ @dataclass
9
+ class SseEvent:
10
+ type: str
11
+ data: Any = None
12
+
13
+ def encode(self) -> str:
14
+ payload = json.dumps({"type": self.type, "data": self.data}, ensure_ascii=False)
15
+ return f"data: {payload}\n\n"
orcheems/log.py ADDED
@@ -0,0 +1,234 @@
1
+ """
2
+ Colored logging cho everyflow-automation.
3
+ Không cần thư viện ngoài — dùng ANSI escape codes thuần.
4
+
5
+ Usage:
6
+ # main.py hoặc bất kỳ entry point nào
7
+ from core.logging_config import setup_logging
8
+
9
+ setup_logging() # mặc định: INFO, màu bật nếu terminal hỗ trợ
10
+ setup_logging(level="DEBUG")
11
+ setup_logging(level="DEBUG", force_color=True) # force màu dù pipe/redirect
12
+ setup_logging(json=True) # JSON mode cho production/k8s
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ import logging
19
+ import os
20
+ import sys
21
+ from datetime import datetime, timezone
22
+ from typing import Literal, Optional
23
+
24
+
25
+ # ──────────────────────────────────────────────
26
+ # ANSI color codes
27
+ # ──────────────────────────────────────────────
28
+ class _C:
29
+ RESET = "\033[0m"
30
+ BOLD = "\033[1m"
31
+ DIM = "\033[2m"
32
+
33
+ # text colors
34
+ WHITE = "\033[97m"
35
+ GRAY = "\033[90m"
36
+
37
+ BLUE = "\033[34m"
38
+ CYAN = "\033[36m"
39
+ GREEN = "\033[32m"
40
+ YELLOW = "\033[33m"
41
+ RED = "\033[31m"
42
+ PURPLE = "\033[35m"
43
+
44
+ # bright variants
45
+ B_GREEN = "\033[92m"
46
+ B_YELLOW = "\033[93m"
47
+ B_RED = "\033[91m"
48
+ B_CYAN = "\033[96m"
49
+ B_BLUE = "\033[94m"
50
+
51
+
52
+ _LEVEL_STYLE: dict[int, tuple[str, str]] = {
53
+ logging.DEBUG: (_C.PURPLE, "DEBUG "),
54
+ logging.INFO: (_C.B_GREEN, "INFO "),
55
+ logging.WARNING: (_C.B_YELLOW, "WARNING"),
56
+ logging.ERROR: (_C.B_RED, "ERROR "),
57
+ logging.CRITICAL: (_C.B_RED, "CRITICAL"),
58
+ }
59
+
60
+
61
+ # ──────────────────────────────────────────────
62
+ # Colored formatter
63
+ # ──────────────────────────────────────────────
64
+ class ColoredFormatter(logging.Formatter):
65
+ """
66
+ Format:
67
+ 10:42 24/06/26 │ INFO │ module_name │ message key=value
68
+ """
69
+
70
+ MOD_WIDTH = 22
71
+ SEP = f"{_C.GRAY} │ {_C.RESET}"
72
+
73
+ def __init__(self, use_color: bool = True) -> None:
74
+ super().__init__()
75
+ self.use_color = use_color
76
+
77
+ def _c(self, code: str, text: str) -> str:
78
+ if not self.use_color:
79
+ return text
80
+ return f"{code}{text}{_C.RESET}"
81
+
82
+ def format(self, record: logging.LogRecord) -> str:
83
+ ts = datetime.now(timezone.utc).strftime("%H:%M %d/%m/%y")
84
+ ts_str = self._c(_C.BLUE, ts)
85
+
86
+ level_color, level_label = _LEVEL_STYLE.get(
87
+ record.levelno, (_C.WHITE, record.levelname[:7].ljust(7))
88
+ )
89
+ level_str = self._c(level_color, level_label)
90
+
91
+ # module name: dùng tên logger, truncate + pad
92
+ mod_raw = record.name.split(".")[-1] # lấy phần cuối e.g. "manager"
93
+ mod_padded = mod_raw[:self.MOD_WIDTH].ljust(self.MOD_WIDTH)
94
+ mod_str = self._c(_C.CYAN, mod_padded)
95
+
96
+ # message
97
+ msg = record.getMessage()
98
+ msg_str = self._c(_C.WHITE, msg)
99
+
100
+ # extra key=value pairs được attach qua logger.info("...", extra={...})
101
+ # hoặc qua LogRecord.xxx attrs đặt thủ công
102
+ extras = self._format_extras(record)
103
+
104
+ sep = self.SEP
105
+ line = f"{ts_str}{sep}{level_str}{sep}{mod_str}{sep}{msg_str}{extras}"
106
+
107
+ # exception traceback (nếu có)
108
+ if record.exc_info:
109
+ exc = self.formatException(record.exc_info)
110
+ line = f"{line}\n{self._c(_C.GRAY, exc)}"
111
+
112
+ return line
113
+
114
+ def _format_extras(self, record: logging.LogRecord) -> str:
115
+ """
116
+ Thu thập các attr không thuộc LogRecord chuẩn để in dạng key=value.
117
+
118
+ Cách dùng:
119
+ logger.info("session locked", extra={"credential_id": "abc-123", "seconds": 3})
120
+ """
121
+ SKIP = {
122
+ "name", "msg", "args", "levelname", "levelno", "pathname",
123
+ "filename", "module", "exc_info", "exc_text", "stack_info",
124
+ "lineno", "funcName", "created", "msecs", "relativeCreated",
125
+ "thread", "threadName", "processName", "process", "message",
126
+ "taskName",
127
+ }
128
+ parts: list[str] = []
129
+ for k, v in record.__dict__.items():
130
+ if k.startswith("_") or k in SKIP:
131
+ continue
132
+ key_str = self._c(_C.B_CYAN, k)
133
+ if isinstance(v, str):
134
+ val_str = self._c(_C.B_GREEN, f"'{v}'")
135
+ elif isinstance(v, (int, float)):
136
+ val_str = self._c(_C.YELLOW, str(v))
137
+ else:
138
+ val_str = self._c(_C.PURPLE, repr(v))
139
+ parts.append(f"{key_str}={val_str}")
140
+
141
+ return (" " + " ".join(parts)) if parts else ""
142
+
143
+
144
+ # ──────────────────────────────────────────────
145
+ # JSON formatter (production / k8s / Graylog)
146
+ # ──────────────────────────────────────────────
147
+ class JsonFormatter(logging.Formatter):
148
+ """Structured JSON — 1 dòng/record, dễ ingest vào Graylog / Loki."""
149
+
150
+ SKIP = {
151
+ "name", "msg", "args", "levelname", "levelno", "pathname",
152
+ "filename", "module", "exc_info", "exc_text", "stack_info",
153
+ "lineno", "funcName", "created", "msecs", "relativeCreated",
154
+ "thread", "threadName", "processName", "process", "message",
155
+ "taskName",
156
+ }
157
+
158
+ def format(self, record: logging.LogRecord) -> str:
159
+ payload: dict = {
160
+ "ts": datetime.now(timezone.utc).isoformat(),
161
+ "level": record.levelname,
162
+ "logger": record.name,
163
+ "message": record.getMessage(),
164
+ }
165
+ for k, v in record.__dict__.items():
166
+ if not k.startswith("_") and k not in self.SKIP:
167
+ payload[k] = v
168
+ if record.exc_info:
169
+ payload["exc"] = self.formatException(record.exc_info)
170
+ return json.dumps(payload, ensure_ascii=False)
171
+
172
+
173
+ # ──────────────────────────────────────────────
174
+ # Setup helper
175
+ # ──────────────────────────────────────────────
176
+ def _supports_color() -> bool:
177
+ """True nếu stdout là terminal thật và không bị force-disable."""
178
+ if os.environ.get("NO_COLOR"):
179
+ return False
180
+ if os.environ.get("FORCE_COLOR"):
181
+ return True
182
+ return hasattr(sys.stdout, "isatty") and sys.stdout.isatty()
183
+
184
+
185
+ def setup_logging(
186
+ level: str = "INFO",
187
+ json: bool = False,
188
+ force_color: Optional[bool] = None,
189
+ loggers: Optional[list[str]] = None,
190
+ ) -> None:
191
+ """
192
+ Cấu hình root logger (và tuỳ chọn một số logger cụ thể).
193
+
194
+ Args:
195
+ level : Log level — "DEBUG" | "INFO" | "WARNING" | "ERROR"
196
+ json : True → dùng JsonFormatter (production/k8s)
197
+ force_color : None = auto-detect, True = bật, False = tắt
198
+ loggers : Danh sách tên logger muốn set riêng level DEBUG,
199
+ dù root logger đang ở INFO.
200
+ Ví dụ: ["task.app_operator", "session.manager"]
201
+
202
+ Usage:
203
+ # development
204
+ setup_logging(level="DEBUG")
205
+
206
+ # production / k8s
207
+ setup_logging(json=True)
208
+
209
+ # chỉ debug 2 module cụ thể
210
+ setup_logging(level="INFO", loggers=["session.manager", "task.base_task"])
211
+ """
212
+ use_color = force_color if force_color is not None else _supports_color()
213
+
214
+ if json:
215
+ formatter: logging.Formatter = JsonFormatter()
216
+ else:
217
+ formatter = ColoredFormatter(use_color=use_color)
218
+
219
+ handler = logging.StreamHandler(sys.stdout)
220
+ handler.setFormatter(formatter)
221
+
222
+ root = logging.getLogger()
223
+ root.handlers.clear()
224
+ root.addHandler(handler)
225
+ root.setLevel(getattr(logging, level.upper(), logging.INFO))
226
+
227
+ # Tắt bớt noise từ thư viện bên ngoài
228
+ for noisy in ("httpx", "httpcore", "uvicorn.access", "playwright"):
229
+ logging.getLogger(noisy).setLevel(logging.WARNING)
230
+
231
+ # Sub-logger override
232
+ if loggers:
233
+ for name in loggers:
234
+ logging.getLogger(name).setLevel(logging.DEBUG)
@@ -0,0 +1,28 @@
1
+ # from .register import SiteLoginServiceRegister
2
+
3
+ # import importlib
4
+ # import pkgutil
5
+ # from . import sites as sites_pkg
6
+
7
+ # for _, module_name, _ in pkgutil.iter_modules(sites_pkg.__path__):
8
+ # importlib.import_module(f"login_service.sites.{module_name}")
9
+
10
+ # __all__ = ["SiteLoginServiceRegister"]
11
+
12
+ # from .schema import Credential, LoginResult
13
+
14
+ from .register import SiteLoginServiceRegister
15
+ from .schema import Credential, LoginResult
16
+ from .base import BaseLoginService, cookie_incomplete_handler
17
+
18
+ __all__ = [
19
+ "SiteLoginServiceRegister",
20
+ "Credential",
21
+ "LoginResult",
22
+ "BaseLoginService",
23
+ "cookie_incomplete_handler",
24
+ ]
25
+
26
+ # Site implementations live in app/sites/ and are discovered by the
27
+ # application at startup — not auto-imported here.
28
+ # In main.py: import app.sites (triggers @SiteLoginServiceRegister.register)