omnius 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4959 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +630665 -0
- package/dist/launcher.cjs +78 -0
- package/dist/postinstall-daemon.cjs +776 -0
- package/dist/preinstall.cjs +92 -0
- package/dist/scripts/autoresearch-prepare.py +459 -0
- package/dist/scripts/autoresearch-train.py +661 -0
- package/dist/scripts/crawlee-scraper.py +358 -0
- package/dist/scripts/live-nemotron.py +478 -0
- package/dist/scripts/live-whisper.py +242 -0
- package/dist/scripts/ocr-advanced.py +571 -0
- package/dist/scripts/start-moondream.py +112 -0
- package/dist/scripts/tor/UPSTREAM-README.md +148 -0
- package/dist/scripts/tor/destroy_tor.sh +29 -0
- package/dist/scripts/tor/tor_setup.sh +163 -0
- package/dist/scripts/transcribe-file.py +63 -0
- package/dist/scripts/web_scrape.py +1295 -0
- package/npm-shrinkwrap.json +7412 -0
- package/package.json +142 -0
- package/prompts/agentic/system-large.md +569 -0
- package/prompts/agentic/system-medium.md +211 -0
- package/prompts/agentic/system-small.md +114 -0
- package/prompts/compaction/context-compaction.md +44 -0
- package/prompts/personality/level-1-minimal.md +3 -0
- package/prompts/personality/level-2-concise.md +3 -0
- package/prompts/personality/level-4-explanatory.md +3 -0
- package/prompts/personality/level-5-thorough.md +3 -0
- package/prompts/personality/level-autist.md +3 -0
- package/prompts/personality/level-stark.md +3 -0
- package/prompts/runners/dispatcher.md +24 -0
- package/prompts/runners/editor.md +44 -0
- package/prompts/runners/evaluator.md +30 -0
- package/prompts/runners/merge-summary.md +9 -0
- package/prompts/runners/normalizer.md +23 -0
- package/prompts/runners/planner.md +33 -0
- package/prompts/runners/scout.md +39 -0
- package/prompts/runners/verifier.md +36 -0
- package/prompts/skill-builder/seed-analysis.md +30 -0
- package/prompts/skill-builder/skill-expansion.md +76 -0
- package/prompts/skill-builder/skill-validation.md +31 -0
- package/prompts/templates/analysis.md +14 -0
- package/prompts/templates/code-review.md +16 -0
- package/prompts/templates/code.md +13 -0
- package/prompts/templates/document.md +13 -0
- package/prompts/templates/error-diagnosis.md +14 -0
- package/prompts/templates/general.md +9 -0
- package/prompts/templates/plan.md +15 -0
- package/prompts/templates/system.md +16 -0
- package/prompts/tui/dmn-gather.md +128 -0
- package/prompts/tui/dream-consolidate.md +48 -0
- package/prompts/tui/dream-lucid-eval.md +17 -0
- package/prompts/tui/dream-lucid-implement.md +14 -0
- package/prompts/tui/dream-stages.md +19 -0
- package/prompts/tui/emotion-behavioral.md +2 -0
- package/prompts/tui/emotion-center.md +12 -0
- package/voices/personaplex/OverBarn.pt +0 -0
- package/voices/personaplex/clone-voice.py +384 -0
- package/voices/personaplex/dequant-loader.py +174 -0
- package/voices/personaplex/quantize-weights.py +167 -0
|
@@ -0,0 +1,1295 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Chrome automation microservice for Hydra.
|
|
5
|
+
|
|
6
|
+
Boot behavior mirrors other Hydra services:
|
|
7
|
+
- Ensures a private virtualenv and installs dependencies on first run.
|
|
8
|
+
- Persists configuration in .env with sane defaults.
|
|
9
|
+
- Applies global rate limiting and concurrency guard to Selenium operations.
|
|
10
|
+
- Exposes REST endpoints for browser lifecycle, navigation, DOM capture, and screenshots.
|
|
11
|
+
- Streams structured events over Server-Sent Events so clients can react in real time.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import base64
|
|
17
|
+
import ipaddress
|
|
18
|
+
import json
|
|
19
|
+
import os
|
|
20
|
+
import platform
|
|
21
|
+
import random
|
|
22
|
+
import shutil
|
|
23
|
+
import subprocess
|
|
24
|
+
import sys
|
|
25
|
+
import threading
|
|
26
|
+
import time
|
|
27
|
+
import uuid
|
|
28
|
+
from pathlib import Path
|
|
29
|
+
from queue import Empty, Queue
|
|
30
|
+
from typing import Dict, Optional
|
|
31
|
+
|
|
32
|
+
# ──────────────────────────────────────────────────────────────
|
|
33
|
+
# 0) Embedded venv bootstrap (same pattern as other services)
|
|
34
|
+
# ──────────────────────────────────────────────────────────────
|
|
35
|
+
VENV_DIR = Path.cwd() / ".venv"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _in_venv() -> bool:
|
|
39
|
+
base = getattr(sys, "base_prefix", None)
|
|
40
|
+
return base is not None and sys.prefix != base
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _ensure_venv_and_reexec() -> None:
|
|
44
|
+
if sys.version_info < (3, 9):
|
|
45
|
+
print("ERROR: Python 3.9+ required.", file=sys.stderr)
|
|
46
|
+
sys.exit(1)
|
|
47
|
+
if _in_venv():
|
|
48
|
+
return
|
|
49
|
+
python = sys.executable
|
|
50
|
+
if not VENV_DIR.exists():
|
|
51
|
+
print(f"[bootstrap] creating virtualenv at {VENV_DIR}", file=sys.stderr)
|
|
52
|
+
subprocess.check_call([python, "-m", "venv", str(VENV_DIR)])
|
|
53
|
+
pip_bin = VENV_DIR / ("Scripts/pip.exe" if os.name == "nt" else "bin/pip")
|
|
54
|
+
subprocess.check_call([str(pip_bin), "install", "--upgrade", "pip"])
|
|
55
|
+
new_env = os.environ.copy()
|
|
56
|
+
new_env["VIRTUAL_ENV"] = str(VENV_DIR)
|
|
57
|
+
if os.name == "nt":
|
|
58
|
+
python_bin = VENV_DIR / "Scripts" / "python.exe"
|
|
59
|
+
else:
|
|
60
|
+
new_env["PATH"] = f"{VENV_DIR}/bin:{new_env.get('PATH', '')}"
|
|
61
|
+
python_bin = VENV_DIR / "bin" / "python"
|
|
62
|
+
os.execve(str(python_bin), [str(python_bin), *sys.argv], new_env)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
_ensure_venv_and_reexec()
|
|
66
|
+
|
|
67
|
+
# ──────────────────────────────────────────────────────────────
|
|
68
|
+
# 1) One-time dependency install + config scaffold
|
|
69
|
+
# ──────────────────────────────────────────────────────────────
|
|
70
|
+
import subprocess # noqa: E402 (re-import after re-exec)
|
|
71
|
+
|
|
72
|
+
SCRIPT_PATH = Path(__file__).resolve()
|
|
73
|
+
SCRIPT_DIR = SCRIPT_PATH.parent
|
|
74
|
+
SETUP_MARKER = SCRIPT_DIR / ".scrape_setup_complete"
|
|
75
|
+
OUT_DIR = SCRIPT_DIR / "frames"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _pip_install(*pkgs: str) -> None:
|
|
79
|
+
subprocess.check_call([sys.executable, "-m", "pip", "install", *pkgs])
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
if not SETUP_MARKER.exists():
|
|
83
|
+
_pip_install(
|
|
84
|
+
"--upgrade",
|
|
85
|
+
"pip",
|
|
86
|
+
"Flask",
|
|
87
|
+
"Flask-Cors",
|
|
88
|
+
"python-dotenv",
|
|
89
|
+
"requests",
|
|
90
|
+
"beautifulsoup4",
|
|
91
|
+
"lxml",
|
|
92
|
+
"selenium",
|
|
93
|
+
"webdriver-manager",
|
|
94
|
+
"pillow",
|
|
95
|
+
)
|
|
96
|
+
env_path = SCRIPT_DIR / ".env"
|
|
97
|
+
if not env_path.exists():
|
|
98
|
+
env_path.write_text(
|
|
99
|
+
"SCRAPE_API_KEY={key}\n"
|
|
100
|
+
"SCRAPE_BIND=0.0.0.0\n"
|
|
101
|
+
"SCRAPE_PORT=8130\n"
|
|
102
|
+
"SCRAPE_REQUIRE_AUTH=0\n"
|
|
103
|
+
"SCRAPE_MAX_CONCURRENCY=4\n"
|
|
104
|
+
"SCRAPE_QUEUE_TIMEOUT_S=0\n"
|
|
105
|
+
"SCRAPE_RATE_LIMIT_RPS=60\n"
|
|
106
|
+
"SCRAPE_RATE_LIMIT_BURST=180\n"
|
|
107
|
+
"SCRAPE_RATE_LIMIT_LOCAL_BYPASS=1\n"
|
|
108
|
+
"SCRAPE_RATE_LIMIT_DISABLED=0\n"
|
|
109
|
+
"SCRAPE_RATE_LIMIT_WHITELIST=\n"
|
|
110
|
+
"SCRAPE_FILE_TTL_S=900\n"
|
|
111
|
+
"SCRAPE_FRAME_KEEPALIVE_S=45\n"
|
|
112
|
+
"SCRAPE_HEADLESS_DEFAULT=1\n".format(key=uuid.uuid4().hex),
|
|
113
|
+
encoding="utf-8",
|
|
114
|
+
)
|
|
115
|
+
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
116
|
+
SETUP_MARKER.write_text("ok", encoding="utf-8")
|
|
117
|
+
os.execv(sys.executable, [sys.executable, *sys.argv])
|
|
118
|
+
|
|
119
|
+
# ──────────────────────────────────────────────────────────────
|
|
120
|
+
# 2) Runtime imports (after env ready)
|
|
121
|
+
# ──────────────────────────────────────────────────────────────
|
|
122
|
+
from flask import Flask, Response, jsonify, request, send_from_directory, g # noqa: E402
|
|
123
|
+
from flask_cors import CORS # noqa: E402
|
|
124
|
+
from dotenv import load_dotenv # noqa: E402
|
|
125
|
+
from PIL import Image # noqa: E402
|
|
126
|
+
from selenium import webdriver # noqa: E402
|
|
127
|
+
from selenium.common.exceptions import TimeoutException, WebDriverException # noqa: E402
|
|
128
|
+
from selenium.webdriver.common.by import By # noqa: E402
|
|
129
|
+
from selenium.webdriver.common.keys import Keys # noqa: E402
|
|
130
|
+
from selenium.webdriver.chrome.options import Options # noqa: E402
|
|
131
|
+
from selenium.webdriver.chrome.service import Service # noqa: E402
|
|
132
|
+
from selenium.webdriver.support import expected_conditions as EC # noqa: E402
|
|
133
|
+
from selenium.webdriver.support.ui import WebDriverWait # noqa: E402
|
|
134
|
+
from webdriver_manager.chrome import ChromeDriverManager # noqa: E402
|
|
135
|
+
from selenium.webdriver.common.action_chains import ActionChains # noqa: E402
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def log_message(msg: str, level: str = "INFO") -> None:
|
|
139
|
+
ts = time.strftime("%Y-%m-%d %H:%M:%S")
|
|
140
|
+
print(f"[{ts}] [{level.upper()}] {msg}")
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class Tools:
|
|
144
|
+
_driver: Optional[webdriver.Chrome] = None
|
|
145
|
+
|
|
146
|
+
@staticmethod
|
|
147
|
+
def _find_system_chromedriver() -> Optional[str]:
|
|
148
|
+
candidates = [
|
|
149
|
+
shutil.which("chromedriver"),
|
|
150
|
+
"/usr/bin/chromedriver",
|
|
151
|
+
"/usr/local/bin/chromedriver",
|
|
152
|
+
"/snap/bin/chromium.chromedriver",
|
|
153
|
+
"/usr/lib/chromium-browser/chromedriver",
|
|
154
|
+
"/opt/homebrew/bin/chromedriver",
|
|
155
|
+
]
|
|
156
|
+
for path in filter(None, candidates):
|
|
157
|
+
if os.path.isfile(path) and os.access(path, os.X_OK):
|
|
158
|
+
try:
|
|
159
|
+
subprocess.run([path, "--version"], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
160
|
+
return path
|
|
161
|
+
except Exception:
|
|
162
|
+
continue
|
|
163
|
+
return None
|
|
164
|
+
|
|
165
|
+
@staticmethod
|
|
166
|
+
def open_browser(headless: bool = False, force_new: bool = False) -> str:
|
|
167
|
+
if force_new and Tools._driver:
|
|
168
|
+
try:
|
|
169
|
+
Tools._driver.quit()
|
|
170
|
+
except Exception:
|
|
171
|
+
pass
|
|
172
|
+
Tools._driver = None
|
|
173
|
+
|
|
174
|
+
if Tools._driver:
|
|
175
|
+
return "Browser already open"
|
|
176
|
+
|
|
177
|
+
chrome_bin = (
|
|
178
|
+
os.getenv("CHROME_BIN")
|
|
179
|
+
or shutil.which("google-chrome")
|
|
180
|
+
or shutil.which("chromium-browser")
|
|
181
|
+
or shutil.which("chromium")
|
|
182
|
+
or "/snap/bin/chromium"
|
|
183
|
+
or "/usr/bin/chromium-browser"
|
|
184
|
+
or "/usr/bin/chromium"
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
opts = Options()
|
|
188
|
+
if chrome_bin:
|
|
189
|
+
opts.binary_location = chrome_bin
|
|
190
|
+
if headless:
|
|
191
|
+
opts.add_argument("--headless=new")
|
|
192
|
+
opts.add_argument("--window-size=1920,1080")
|
|
193
|
+
opts.add_argument("--disable-gpu")
|
|
194
|
+
opts.add_argument("--no-sandbox")
|
|
195
|
+
opts.add_argument("--disable-dev-shm-usage")
|
|
196
|
+
opts.add_argument("--remote-allow-origins=*")
|
|
197
|
+
opts.add_argument(f"--remote-debugging-port={random.randint(45000, 65000)}")
|
|
198
|
+
|
|
199
|
+
try:
|
|
200
|
+
log_message("[open_browser] Trying Selenium-Manager…", "DEBUG")
|
|
201
|
+
Tools._driver = webdriver.Chrome(options=opts)
|
|
202
|
+
log_message("[open_browser] Launched via Selenium-Manager.", "SUCCESS")
|
|
203
|
+
return "Browser launched (selenium-manager)"
|
|
204
|
+
except WebDriverException as e:
|
|
205
|
+
log_message(f"[open_browser] Selenium-Manager failed: {e}", "WARNING")
|
|
206
|
+
|
|
207
|
+
snap_drv = "/snap/chromium/current/usr/lib/chromium-browser/chromedriver"
|
|
208
|
+
if os.path.exists(snap_drv):
|
|
209
|
+
try:
|
|
210
|
+
log_message(f"[open_browser] Using snap chromedriver at {snap_drv}", "DEBUG")
|
|
211
|
+
Tools._driver = webdriver.Chrome(service=Service(snap_drv), options=opts)
|
|
212
|
+
log_message("[open_browser] Launched via snap chromedriver.", "SUCCESS")
|
|
213
|
+
return "Browser launched (snap chromedriver)"
|
|
214
|
+
except WebDriverException as e:
|
|
215
|
+
log_message(f"[open_browser] Snap chromedriver failed: {e}", "WARNING")
|
|
216
|
+
|
|
217
|
+
sys_drv = Tools._find_system_chromedriver()
|
|
218
|
+
if sys_drv:
|
|
219
|
+
try:
|
|
220
|
+
log_message(f"[open_browser] Trying system chromedriver at {sys_drv}", "DEBUG")
|
|
221
|
+
Tools._driver = webdriver.Chrome(service=Service(sys_drv), options=opts)
|
|
222
|
+
log_message("[open_browser] Launched via system chromedriver.", "SUCCESS")
|
|
223
|
+
return "Browser launched (system chromedriver)"
|
|
224
|
+
except WebDriverException as e:
|
|
225
|
+
log_message(f"[open_browser] System chromedriver failed: {e}", "WARNING")
|
|
226
|
+
|
|
227
|
+
arch = (platform.machine() or "").lower()
|
|
228
|
+
if arch in ("aarch64", "arm64", "armv8l", "armv7l") and chrome_bin:
|
|
229
|
+
try:
|
|
230
|
+
raw = subprocess.check_output([chrome_bin, "--version"]).decode().strip()
|
|
231
|
+
ver = raw.split()[1]
|
|
232
|
+
url = (
|
|
233
|
+
f"https://edgedl.me.gvt1.com/edgedl/chrome/chrome-for-testing/"
|
|
234
|
+
f"{ver}/linux-arm64/chromedriver-linux-arm64.zip"
|
|
235
|
+
)
|
|
236
|
+
tmp_zip = "/tmp/chromedriver_arm64.zip"
|
|
237
|
+
log_message(f"[open_browser] Downloading ARM64 driver from {url}", "DEBUG")
|
|
238
|
+
subprocess.check_call(["wget", "-qO", tmp_zip, url])
|
|
239
|
+
subprocess.check_call(["unzip", "-o", tmp_zip, "-d", "/tmp"])
|
|
240
|
+
subprocess.check_call(["sudo", "mv", "/tmp/chromedriver", "/usr/local/bin/chromedriver"])
|
|
241
|
+
subprocess.check_call(["sudo", "chmod", "+x", "/usr/local/bin/chromedriver"])
|
|
242
|
+
drv = shutil.which("chromedriver")
|
|
243
|
+
log_message(f"[open_browser] Installed ARM64 driver at {drv}", "DEBUG")
|
|
244
|
+
Tools._driver = webdriver.Chrome(service=Service(drv), options=opts)
|
|
245
|
+
log_message("[open_browser] Launched via downloaded ARM64 chromedriver.", "SUCCESS")
|
|
246
|
+
return "Browser launched (downloaded ARM64 chromedriver)"
|
|
247
|
+
except Exception as e:
|
|
248
|
+
log_message(f"[open_browser] ARM64 download/install failed: {e}", "WARNING")
|
|
249
|
+
|
|
250
|
+
if arch in ("x86_64", "amd64") and chrome_bin:
|
|
251
|
+
try:
|
|
252
|
+
raw = subprocess.check_output([chrome_bin, "--version"]).decode().strip()
|
|
253
|
+
browser_major = raw.split()[1].split(".")[0]
|
|
254
|
+
except Exception:
|
|
255
|
+
browser_major = "latest"
|
|
256
|
+
try:
|
|
257
|
+
log_message(f"[open_browser] Installing ChromeDriver {browser_major} via webdriver-manager", "DEBUG")
|
|
258
|
+
drv_path = ChromeDriverManager(driver_version=browser_major).install()
|
|
259
|
+
Tools._driver = webdriver.Chrome(service=Service(drv_path), options=opts)
|
|
260
|
+
log_message("[open_browser] Launched via webdriver-manager.", "SUCCESS")
|
|
261
|
+
return "Browser launched (webdriver-manager)"
|
|
262
|
+
except Exception as e:
|
|
263
|
+
log_message(f"[open_browser] webdriver-manager failed: {e}", "ERROR")
|
|
264
|
+
|
|
265
|
+
try:
|
|
266
|
+
log_message("[open_browser] Attempting `sudo snap install chromium`…", "DEBUG")
|
|
267
|
+
subprocess.check_call(["sudo", "snap", "install", "chromium"])
|
|
268
|
+
Tools._driver = webdriver.Chrome(service=Service(snap_drv), options=opts)
|
|
269
|
+
log_message("[open_browser] Launched via newly-installed snap chromium.", "SUCCESS")
|
|
270
|
+
return "Browser launched (snap install fallback)"
|
|
271
|
+
except Exception as e:
|
|
272
|
+
log_message(f"[open_browser] Auto-snap install failed or Chrome still not found: {e}", "ERROR")
|
|
273
|
+
|
|
274
|
+
raise RuntimeError(
|
|
275
|
+
"No usable Chrome/Chromium driver. Install Chrome and a matching chromedriver, "
|
|
276
|
+
"or set CHROME_BIN and ensure chromedriver is on PATH."
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
@staticmethod
|
|
280
|
+
def close_browser() -> str:
|
|
281
|
+
if Tools._driver:
|
|
282
|
+
try:
|
|
283
|
+
Tools._driver.quit()
|
|
284
|
+
log_message("[close_browser] Browser closed.", "DEBUG")
|
|
285
|
+
except Exception:
|
|
286
|
+
pass
|
|
287
|
+
Tools._driver = None
|
|
288
|
+
return "Browser closed"
|
|
289
|
+
return "No browser to close"
|
|
290
|
+
|
|
291
|
+
@staticmethod
|
|
292
|
+
def is_browser_open() -> bool:
|
|
293
|
+
return Tools._driver is not None
|
|
294
|
+
|
|
295
|
+
@staticmethod
|
|
296
|
+
def navigate(url: str) -> str:
|
|
297
|
+
if not Tools._driver:
|
|
298
|
+
return "Error: browser not open"
|
|
299
|
+
log_message(f"[navigate] → {url}", "DEBUG")
|
|
300
|
+
Tools._driver.get(url)
|
|
301
|
+
return f"Navigated to {url}"
|
|
302
|
+
|
|
303
|
+
@staticmethod
|
|
304
|
+
def click(selector: str, timeout: int = 8) -> str:
|
|
305
|
+
if not Tools._driver:
|
|
306
|
+
return "Error: browser not open"
|
|
307
|
+
try:
|
|
308
|
+
drv = Tools._driver
|
|
309
|
+
el = WebDriverWait(drv, timeout).until(EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
|
|
310
|
+
drv.execute_script("arguments[0].scrollIntoView({block:'center'});", el)
|
|
311
|
+
el.click()
|
|
312
|
+
focused = drv.execute_script("return document.activeElement === arguments[0];", el)
|
|
313
|
+
log_message(f"[click] {selector} clicked (focused={focused})", "DEBUG")
|
|
314
|
+
return f"Clicked {selector}"
|
|
315
|
+
except Exception as e:
|
|
316
|
+
log_message(f"[click] Error clicking {selector}: {e}", "ERROR")
|
|
317
|
+
return f"Error clicking {selector}: {e}"
|
|
318
|
+
|
|
319
|
+
@staticmethod
|
|
320
|
+
def input(selector: str, text: str, timeout: int = 8) -> str:
|
|
321
|
+
if not Tools._driver:
|
|
322
|
+
return "Error: browser not open"
|
|
323
|
+
try:
|
|
324
|
+
drv = Tools._driver
|
|
325
|
+
el = WebDriverWait(drv, timeout).until(EC.element_to_be_clickable((By.CSS_SELECTOR, selector)))
|
|
326
|
+
drv.execute_script("arguments[0].scrollIntoView({block:'center'});", el)
|
|
327
|
+
el.clear()
|
|
328
|
+
el.send_keys(text + Keys.RETURN)
|
|
329
|
+
log_message(f"[input] Sent {text!r} to {selector}", "DEBUG")
|
|
330
|
+
return f"Sent {text!r} to {selector}"
|
|
331
|
+
except Exception as e:
|
|
332
|
+
log_message(f"[input] Error typing into {selector}: {e}", "ERROR")
|
|
333
|
+
return f"Error typing into {selector}: {e}"
|
|
334
|
+
|
|
335
|
+
@staticmethod
|
|
336
|
+
def get_dom_snapshot(max_chars: int = 200_000) -> str:
|
|
337
|
+
if not Tools._driver:
|
|
338
|
+
return ""
|
|
339
|
+
try:
|
|
340
|
+
dom = Tools._driver.execute_script("return document.documentElement.outerHTML;")
|
|
341
|
+
if dom and len(dom) > max_chars:
|
|
342
|
+
dom = dom[:max_chars]
|
|
343
|
+
return dom or ""
|
|
344
|
+
except Exception as exc:
|
|
345
|
+
log_message(f"[dom] snapshot failed: {exc}", "WARNING")
|
|
346
|
+
return ""
|
|
347
|
+
|
|
348
|
+
@staticmethod
|
|
349
|
+
def scroll(amount: int = 600) -> str:
|
|
350
|
+
if not Tools._driver:
|
|
351
|
+
return "Error: browser not open"
|
|
352
|
+
try:
|
|
353
|
+
Tools._driver.execute_script("window.scrollBy(0, arguments[0]);", amount)
|
|
354
|
+
return f"Scrolled by {amount}"
|
|
355
|
+
except Exception as exc:
|
|
356
|
+
log_message(f"[scroll] failed: {exc}", "WARNING")
|
|
357
|
+
return f"Error scrolling: {exc}"
|
|
358
|
+
|
|
359
|
+
@staticmethod
|
|
360
|
+
def screenshot(filename: str = "screenshot.png") -> str:
|
|
361
|
+
if not Tools._driver:
|
|
362
|
+
return "Error: browser not open"
|
|
363
|
+
Tools._driver.save_screenshot(filename)
|
|
364
|
+
return filename
|
|
365
|
+
|
|
366
|
+
@staticmethod
|
|
367
|
+
def go_back() -> str:
|
|
368
|
+
if not Tools._driver:
|
|
369
|
+
return "Error: browser not open"
|
|
370
|
+
try:
|
|
371
|
+
Tools._driver.back()
|
|
372
|
+
log_message("[history] Navigated back", "DEBUG")
|
|
373
|
+
return "Navigated back"
|
|
374
|
+
except Exception as exc:
|
|
375
|
+
log_message(f"[history] back failed: {exc}", "ERROR")
|
|
376
|
+
return f"Error navigating back: {exc}"
|
|
377
|
+
|
|
378
|
+
@staticmethod
|
|
379
|
+
def go_forward() -> str:
|
|
380
|
+
if not Tools._driver:
|
|
381
|
+
return "Error: browser not open"
|
|
382
|
+
try:
|
|
383
|
+
Tools._driver.forward()
|
|
384
|
+
log_message("[history] Navigated forward", "DEBUG")
|
|
385
|
+
return "Navigated forward"
|
|
386
|
+
except Exception as exc:
|
|
387
|
+
log_message(f"[history] forward failed: {exc}", "ERROR")
|
|
388
|
+
return f"Error navigating forward: {exc}"
|
|
389
|
+
|
|
390
|
+
@staticmethod
|
|
391
|
+
def drag(start_x: float, start_y: float, end_x: float, end_y: float) -> str:
|
|
392
|
+
if not Tools._driver:
|
|
393
|
+
return "Error: browser not open"
|
|
394
|
+
try:
|
|
395
|
+
drv = Tools._driver
|
|
396
|
+
body = drv.find_element(By.TAG_NAME, "body")
|
|
397
|
+
sx = int(round(start_x))
|
|
398
|
+
sy = int(round(start_y))
|
|
399
|
+
ex = int(round(end_x))
|
|
400
|
+
ey = int(round(end_y))
|
|
401
|
+
actions = ActionChains(drv)
|
|
402
|
+
actions.move_to_element_with_offset(body, sx, sy)
|
|
403
|
+
actions.click_and_hold()
|
|
404
|
+
actions.move_by_offset(ex - sx, ey - sy)
|
|
405
|
+
actions.release()
|
|
406
|
+
actions.perform()
|
|
407
|
+
msg = f"Dragged from ({sx},{sy}) to ({ex},{ey})"
|
|
408
|
+
log_message(f"[drag] {msg}", "DEBUG")
|
|
409
|
+
return msg
|
|
410
|
+
except Exception as exc:
|
|
411
|
+
log_message(f"[drag] failed: {exc}", "ERROR")
|
|
412
|
+
return f"Error dragging: {exc}"
|
|
413
|
+
|
|
414
|
+
@staticmethod
|
|
415
|
+
def scroll_point(x: float, y: float, delta_x: float, delta_y: float) -> str:
|
|
416
|
+
if not Tools._driver:
|
|
417
|
+
return "Error: browser not open"
|
|
418
|
+
try:
|
|
419
|
+
script = """
|
|
420
|
+
const x = arguments[0];
|
|
421
|
+
const y = arguments[1];
|
|
422
|
+
const dx = arguments[2] || 0;
|
|
423
|
+
const dy = arguments[3] || 0;
|
|
424
|
+
const target = document.elementFromPoint(x, y) || document.body;
|
|
425
|
+
if (!target) return { ok: false, reason: 'element_from_point_null' };
|
|
426
|
+
const evt = new WheelEvent('wheel', {
|
|
427
|
+
clientX: x,
|
|
428
|
+
clientY: y,
|
|
429
|
+
deltaX: dx,
|
|
430
|
+
deltaY: dy,
|
|
431
|
+
bubbles: true,
|
|
432
|
+
cancelable: true
|
|
433
|
+
});
|
|
434
|
+
const cancelled = !target.dispatchEvent(evt);
|
|
435
|
+
if (!cancelled && typeof window !== 'undefined') {
|
|
436
|
+
window.scrollBy(dx, dy);
|
|
437
|
+
}
|
|
438
|
+
return { ok: true, cancelled };
|
|
439
|
+
"""
|
|
440
|
+
res = Tools._driver.execute_script(script, float(x), float(y), float(delta_x), float(delta_y))
|
|
441
|
+
if not isinstance(res, dict) or not res.get("ok"):
|
|
442
|
+
reason = res.get("reason") if isinstance(res, dict) else "unknown"
|
|
443
|
+
return f"Error scrolling at point: {reason}"
|
|
444
|
+
log_message(f"[scroll_at] wheel dx={delta_x:.2f} dy={delta_y:.2f} at ({x:.1f},{y:.1f})", "DEBUG")
|
|
445
|
+
return "Scrolled at point"
|
|
446
|
+
except Exception as exc:
|
|
447
|
+
log_message(f"[scroll_at] failed: {exc}", "ERROR")
|
|
448
|
+
return f"Error scrolling at point: {exc}"
|
|
449
|
+
|
|
450
|
+
@staticmethod
|
|
451
|
+
def sync_input(value: str, selector: str = "", submit: bool = False, input_type: str = "", data: Optional[str] = None) -> str:
|
|
452
|
+
if not Tools._driver:
|
|
453
|
+
return "Error: browser not open"
|
|
454
|
+
try:
|
|
455
|
+
drv = Tools._driver
|
|
456
|
+
|
|
457
|
+
script = """
|
|
458
|
+
const selector = arguments[0];
|
|
459
|
+
const value = arguments[1];
|
|
460
|
+
const submit = !!arguments[2];
|
|
461
|
+
const inputType = arguments[3] || '';
|
|
462
|
+
const data = arguments[4] === null ? null : arguments[4];
|
|
463
|
+
|
|
464
|
+
const isTextTarget = (el) => {
|
|
465
|
+
if (!el) return false;
|
|
466
|
+
const tag = (el.tagName || '').toUpperCase();
|
|
467
|
+
if (tag === 'TEXTAREA') return true;
|
|
468
|
+
if (el.isContentEditable) return true;
|
|
469
|
+
if (tag !== 'INPUT') return false;
|
|
470
|
+
const type = (el.type || '').toLowerCase();
|
|
471
|
+
return !['button','checkbox','radio','submit','reset','file','image','range','color','hidden'].includes(type);
|
|
472
|
+
};
|
|
473
|
+
|
|
474
|
+
let target = document.activeElement;
|
|
475
|
+
if (!isTextTarget(target) && selector) {
|
|
476
|
+
const found = document.querySelector(selector);
|
|
477
|
+
if (isTextTarget(found)) {
|
|
478
|
+
target = found;
|
|
479
|
+
try { target.focus({ preventScroll: false }); } catch (_) { target.focus(); }
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
if (!isTextTarget(target)) {
|
|
483
|
+
return { ok: false, reason: 'no_focusable_input' };
|
|
484
|
+
}
|
|
485
|
+
if (target !== document.activeElement && typeof target.focus === 'function') {
|
|
486
|
+
try { target.focus({ preventScroll: false }); } catch (_) { target.focus(); }
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
const setValue = (el, next) => {
|
|
490
|
+
if (!el) return;
|
|
491
|
+
if (el.isContentEditable) {
|
|
492
|
+
el.textContent = next;
|
|
493
|
+
return;
|
|
494
|
+
}
|
|
495
|
+
const proto = Object.getPrototypeOf(el);
|
|
496
|
+
const descriptor = proto && Object.getOwnPropertyDescriptor(proto, 'value');
|
|
497
|
+
const setter =
|
|
498
|
+
descriptor?.set ||
|
|
499
|
+
Object.getOwnPropertyDescriptor(HTMLInputElement.prototype, 'value')?.set ||
|
|
500
|
+
Object.getOwnPropertyDescriptor(HTMLTextAreaElement.prototype, 'value')?.set;
|
|
501
|
+
if (setter) setter.call(el, next);
|
|
502
|
+
else el.value = next;
|
|
503
|
+
};
|
|
504
|
+
|
|
505
|
+
setValue(target, value);
|
|
506
|
+
|
|
507
|
+
const eventInit = { bubbles: true, cancelable: true };
|
|
508
|
+
try {
|
|
509
|
+
if (typeof InputEvent === 'function') {
|
|
510
|
+
const inputEvt = new InputEvent('input', { ...eventInit, inputType: inputType || 'insertText', data });
|
|
511
|
+
target.dispatchEvent(inputEvt);
|
|
512
|
+
} else {
|
|
513
|
+
target.dispatchEvent(new Event('input', eventInit));
|
|
514
|
+
}
|
|
515
|
+
} catch (_) {
|
|
516
|
+
target.dispatchEvent(new Event('input', eventInit));
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
if (submit) {
|
|
520
|
+
target.dispatchEvent(new Event('change', eventInit));
|
|
521
|
+
const form = target.form;
|
|
522
|
+
if (form) {
|
|
523
|
+
if (typeof form.requestSubmit === 'function') form.requestSubmit();
|
|
524
|
+
else form.submit();
|
|
525
|
+
} else {
|
|
526
|
+
const down = new KeyboardEvent('keydown', { key: 'Enter', code: 'Enter', bubbles: true, cancelable: true });
|
|
527
|
+
target.dispatchEvent(down);
|
|
528
|
+
const up = new KeyboardEvent('keyup', { key: 'Enter', code: 'Enter', bubbles: true, cancelable: true });
|
|
529
|
+
target.dispatchEvent(up);
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
return {
|
|
534
|
+
ok: true,
|
|
535
|
+
tag: target.tagName || '',
|
|
536
|
+
id: target.id || '',
|
|
537
|
+
name: target.name || '',
|
|
538
|
+
value: target.isContentEditable ? target.textContent || '' : target.value || ''
|
|
539
|
+
};
|
|
540
|
+
"""
|
|
541
|
+
|
|
542
|
+
res = drv.execute_script(script, selector or '', value or '', bool(submit), input_type or '', data if data is not None else None)
|
|
543
|
+
if not isinstance(res, dict) or not res.get("ok"):
|
|
544
|
+
reason = res.get("reason") if isinstance(res, dict) else "sync_failed"
|
|
545
|
+
return f"Error syncing input: {reason}"
|
|
546
|
+
action = "Input submitted" if submit else "Input synced"
|
|
547
|
+
return action
|
|
548
|
+
except Exception as exc:
|
|
549
|
+
log_message(f"[sync_input] failed: {exc}", "ERROR")
|
|
550
|
+
return f"Error syncing input: {exc}"
|
|
551
|
+
|
|
552
|
+
# ──────────────────────────────────────────────────────────────
|
|
553
|
+
# 3) Environment configuration
|
|
554
|
+
# ──────────────────────────────────────────────────────────────
|
|
555
|
+
load_dotenv(SCRIPT_DIR / ".env")
|
|
556
|
+
|
|
557
|
+
API_KEY = (os.getenv("SCRAPE_API_KEY") or "").strip()
|
|
558
|
+
BIND = os.getenv("SCRAPE_BIND", "0.0.0.0")
|
|
559
|
+
PORT = int(os.getenv("SCRAPE_PORT", "8130"))
|
|
560
|
+
AUTH_REQUIRED = os.getenv("SCRAPE_REQUIRE_AUTH", "0") in ("1", "true", "TRUE")
|
|
561
|
+
MAX_CONCURRENCY = max(4, int(os.getenv("SCRAPE_MAX_CONCURRENCY", "4")))
|
|
562
|
+
QUEUE_TIMEOUT_S = float(os.getenv("SCRAPE_QUEUE_TIMEOUT_S", "2.0"))
|
|
563
|
+
RATE_LIMIT_RPS = max(60, int(os.getenv("SCRAPE_RATE_LIMIT_RPS", "60")))
|
|
564
|
+
RATE_LIMIT_BURST = max(180, int(os.getenv("SCRAPE_RATE_LIMIT_BURST", "180")))
|
|
565
|
+
RATE_LIMIT_DISABLED = os.getenv("SCRAPE_RATE_LIMIT_DISABLED", "0").strip().lower() in ("1", "true", "yes", "on")
|
|
566
|
+
RATE_LIMIT_LOCAL_BYPASS = os.getenv("SCRAPE_RATE_LIMIT_LOCAL_BYPASS", "1").strip().lower() in ("1", "true", "yes", "on")
|
|
567
|
+
RATE_LIMIT_WHITELIST = {
|
|
568
|
+
entry.strip() for entry in os.getenv("SCRAPE_RATE_LIMIT_WHITELIST", "").split(",") if entry.strip()
|
|
569
|
+
}
|
|
570
|
+
FILE_TTL_S = max(60, int(os.getenv("SCRAPE_FILE_TTL_S", "900")))
|
|
571
|
+
FRAME_KEEPALIVE_S = max(10, int(os.getenv("SCRAPE_FRAME_KEEPALIVE_S", "45")))
|
|
572
|
+
HEADLESS_DEFAULT = os.getenv("SCRAPE_HEADLESS_DEFAULT", "1") in ("1", "true", "TRUE", "yes")
|
|
573
|
+
|
|
574
|
+
app = Flask(__name__)
|
|
575
|
+
CORS(app, resources={r"/*": {"origins": "*"}})
|
|
576
|
+
|
|
577
|
+
# ──────────────────────────────────────────────────────────────
|
|
578
|
+
# 4) Service state
|
|
579
|
+
# ──────────────────────────────────────────────────────────────
|
|
580
|
+
_GLOBAL_LOCK = threading.Lock()
|
|
581
|
+
_SESSIONS: Dict[str, dict] = {}
|
|
582
|
+
_SESSION_EVENTS: Dict[str, Queue] = {}
|
|
583
|
+
_CONC_SEM = threading.BoundedSemaphore(MAX_CONCURRENCY)
|
|
584
|
+
_RATE_BUCKETS: Dict[str, dict] = {}
|
|
585
|
+
_RATE_LOCK = threading.Lock()
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
def _slot(timeout: Optional[float] = None):
|
|
589
|
+
class _Slot:
|
|
590
|
+
def __init__(self, timeout_val: Optional[float]):
|
|
591
|
+
self.timeout = float(QUEUE_TIMEOUT_S if timeout_val is None else timeout_val)
|
|
592
|
+
self.acquired = False
|
|
593
|
+
|
|
594
|
+
def __enter__(self):
|
|
595
|
+
if self.timeout <= 0:
|
|
596
|
+
_CONC_SEM.acquire()
|
|
597
|
+
self.acquired = True
|
|
598
|
+
else:
|
|
599
|
+
self.acquired = _CONC_SEM.acquire(timeout=self.timeout)
|
|
600
|
+
if not self.acquired:
|
|
601
|
+
raise TimeoutError("scrape at capacity")
|
|
602
|
+
return self
|
|
603
|
+
|
|
604
|
+
def __exit__(self, exc_type, exc, tb):
|
|
605
|
+
if self.acquired:
|
|
606
|
+
try:
|
|
607
|
+
_CONC_SEM.release()
|
|
608
|
+
except Exception:
|
|
609
|
+
pass
|
|
610
|
+
|
|
611
|
+
return _Slot(timeout)
|
|
612
|
+
|
|
613
|
+
|
|
614
|
+
def _session_meta(sid: str) -> Optional[dict]:
|
|
615
|
+
with _GLOBAL_LOCK:
|
|
616
|
+
return _SESSIONS.get(sid)
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
def _ensure_session(sid: str) -> Queue:
|
|
620
|
+
with _GLOBAL_LOCK:
|
|
621
|
+
meta = _SESSIONS.setdefault(
|
|
622
|
+
sid,
|
|
623
|
+
{
|
|
624
|
+
"created": time.time(),
|
|
625
|
+
"last": time.time(),
|
|
626
|
+
"headless": HEADLESS_DEFAULT,
|
|
627
|
+
"frames": {},
|
|
628
|
+
},
|
|
629
|
+
)
|
|
630
|
+
meta["last"] = time.time()
|
|
631
|
+
if sid not in _SESSION_EVENTS:
|
|
632
|
+
_SESSION_EVENTS[sid] = Queue(maxsize=256)
|
|
633
|
+
return _SESSION_EVENTS[sid]
|
|
634
|
+
|
|
635
|
+
|
|
636
|
+
def _touch_session(sid: str) -> None:
|
|
637
|
+
with _GLOBAL_LOCK:
|
|
638
|
+
meta = _SESSIONS.get(sid)
|
|
639
|
+
if meta is not None:
|
|
640
|
+
meta["last"] = time.time()
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
def _session_ids() -> list[str]:
|
|
644
|
+
with _GLOBAL_LOCK:
|
|
645
|
+
return list(_SESSIONS.keys())
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
def _clear_sessions() -> None:
|
|
649
|
+
with _GLOBAL_LOCK:
|
|
650
|
+
_SESSIONS.clear()
|
|
651
|
+
_SESSION_EVENTS.clear()
|
|
652
|
+
|
|
653
|
+
|
|
654
|
+
def _queue_event(sid: str, payload: dict) -> None:
|
|
655
|
+
q = _ensure_session(sid)
|
|
656
|
+
try:
|
|
657
|
+
q.put_nowait(payload)
|
|
658
|
+
except Exception:
|
|
659
|
+
try:
|
|
660
|
+
q.get_nowait()
|
|
661
|
+
except Exception:
|
|
662
|
+
pass
|
|
663
|
+
try:
|
|
664
|
+
q.put_nowait(payload)
|
|
665
|
+
except Exception:
|
|
666
|
+
pass
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def _result_ok(message: str) -> bool:
|
|
670
|
+
msg = (message or "").strip().lower()
|
|
671
|
+
return not msg.startswith("error")
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
def _sanitize_ip(raw_ip: str) -> str:
|
|
675
|
+
ip = (raw_ip or "").strip()
|
|
676
|
+
if not ip:
|
|
677
|
+
return "0.0.0.0"
|
|
678
|
+
# Handle IPv4 addresses that include a port (e.g. "127.0.0.1:8080")
|
|
679
|
+
if ip.count(":") == 1 and ip.rsplit(":", 1)[1].isdigit():
|
|
680
|
+
ip = ip.rsplit(":", 1)[0]
|
|
681
|
+
ip = ip.strip()
|
|
682
|
+
try:
|
|
683
|
+
ipaddress.ip_address(ip)
|
|
684
|
+
except ValueError:
|
|
685
|
+
return "0.0.0.0"
|
|
686
|
+
return ip
|
|
687
|
+
|
|
688
|
+
|
|
689
|
+
def _is_local_ip(ip: str) -> bool:
|
|
690
|
+
try:
|
|
691
|
+
addr = ipaddress.ip_address(ip)
|
|
692
|
+
except ValueError:
|
|
693
|
+
return False
|
|
694
|
+
return addr.is_loopback or addr.is_private
|
|
695
|
+
|
|
696
|
+
|
|
697
|
+
# ──────────────────────────────────────────────────────────────
|
|
698
|
+
# 5) Rate limit & auth helpers
|
|
699
|
+
# ──────────────────────────────────────────────────────────────
|
|
700
|
+
def _now() -> float:
|
|
701
|
+
return time.time()
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
@app.before_request
|
|
705
|
+
def _apply_rate_limit():
|
|
706
|
+
forwarded = request.headers.get("X-Forwarded-For", "").split(",")[0].strip()
|
|
707
|
+
ip = _sanitize_ip(forwarded or request.remote_addr or "0.0.0.0")
|
|
708
|
+
if RATE_LIMIT_DISABLED or (RATE_LIMIT_LOCAL_BYPASS and _is_local_ip(ip)) or ip in RATE_LIMIT_WHITELIST:
|
|
709
|
+
g.client_ip = ip
|
|
710
|
+
return
|
|
711
|
+
now = _now()
|
|
712
|
+
with _RATE_LOCK:
|
|
713
|
+
bucket = _RATE_BUCKETS.get(ip)
|
|
714
|
+
if not bucket:
|
|
715
|
+
bucket = {"tokens": float(RATE_LIMIT_BURST), "ts": now}
|
|
716
|
+
_RATE_BUCKETS[ip] = bucket
|
|
717
|
+
elapsed = max(0.0, now - bucket.get("ts", now))
|
|
718
|
+
bucket["ts"] = now
|
|
719
|
+
tokens = min(float(RATE_LIMIT_BURST), float(bucket.get("tokens", RATE_LIMIT_BURST)) + elapsed * RATE_LIMIT_RPS)
|
|
720
|
+
if tokens < 1.0:
|
|
721
|
+
return jsonify({"ok": False, "error": "rate limit"}), 429, {"Retry-After": "1"}
|
|
722
|
+
bucket["tokens"] = tokens - 1.0
|
|
723
|
+
g.client_ip = ip
|
|
724
|
+
|
|
725
|
+
|
|
726
|
+
def _auth_ok(req) -> bool:
|
|
727
|
+
if not AUTH_REQUIRED:
|
|
728
|
+
return True
|
|
729
|
+
header_key = (req.headers.get("X-API-Key") or "").strip()
|
|
730
|
+
if API_KEY and header_key and header_key == API_KEY:
|
|
731
|
+
return True
|
|
732
|
+
auth = (req.headers.get("Authorization") or "").strip()
|
|
733
|
+
if auth.lower().startswith("bearer ") and API_KEY and auth.split(None, 1)[1].strip() == API_KEY:
|
|
734
|
+
return True
|
|
735
|
+
return False
|
|
736
|
+
|
|
737
|
+
|
|
738
|
+
# ──────────────────────────────────────────────────────────────
|
|
739
|
+
# 6) Background cleaners
|
|
740
|
+
# ──────────────────────────────────────────────────────────────
|
|
741
|
+
_CLEAN_STOP = threading.Event()
|
|
742
|
+
|
|
743
|
+
|
|
744
|
+
def _cleanup_old_frames() -> None:
|
|
745
|
+
while not _CLEAN_STOP.is_set():
|
|
746
|
+
now = time.time()
|
|
747
|
+
for path in OUT_DIR.glob("*.png"):
|
|
748
|
+
try:
|
|
749
|
+
age = now - path.stat().st_mtime
|
|
750
|
+
except FileNotFoundError:
|
|
751
|
+
continue
|
|
752
|
+
if age > FILE_TTL_S:
|
|
753
|
+
with contextlib.suppress(Exception):
|
|
754
|
+
path.unlink()
|
|
755
|
+
for sid in _session_ids():
|
|
756
|
+
meta = _session_meta(sid)
|
|
757
|
+
if not meta:
|
|
758
|
+
continue
|
|
759
|
+
last = meta.get("last", 0)
|
|
760
|
+
if now - last > max(FILE_TTL_S, 2 * FRAME_KEEPALIVE_S):
|
|
761
|
+
with _GLOBAL_LOCK:
|
|
762
|
+
_SESSIONS.pop(sid, None)
|
|
763
|
+
_SESSION_EVENTS.pop(sid, None)
|
|
764
|
+
_CLEAN_STOP.wait(30.0)
|
|
765
|
+
|
|
766
|
+
|
|
767
|
+
import atexit # noqa: E402
|
|
768
|
+
import contextlib # noqa: E402
|
|
769
|
+
|
|
770
|
+
_clean_thread = threading.Thread(target=_cleanup_old_frames, daemon=True)
|
|
771
|
+
_clean_thread.start()
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
@atexit.register
|
|
775
|
+
def _shutdown_cleanup():
|
|
776
|
+
"""Clean up ALL resources: Chrome browser, frame cache thread."""
|
|
777
|
+
_CLEAN_STOP.set()
|
|
778
|
+
# CRITICAL: Close the Chrome browser to prevent orphaned Chrome processes
|
|
779
|
+
try:
|
|
780
|
+
Tools.close_browser()
|
|
781
|
+
except Exception:
|
|
782
|
+
pass
|
|
783
|
+
with contextlib.suppress(Exception):
|
|
784
|
+
_clean_thread.join(timeout=2.0)
|
|
785
|
+
|
|
786
|
+
|
|
787
|
+
# Signal handlers: ensure Chrome is killed on SIGTERM/SIGINT
|
|
788
|
+
import signal as _signal
|
|
789
|
+
|
|
790
|
+
def _handle_terminate(signum, frame):
|
|
791
|
+
"""Graceful shutdown on SIGTERM/SIGINT — close Chrome then exit."""
|
|
792
|
+
try:
|
|
793
|
+
Tools.close_browser()
|
|
794
|
+
except Exception:
|
|
795
|
+
pass
|
|
796
|
+
_CLEAN_STOP.set()
|
|
797
|
+
raise SystemExit(0)
|
|
798
|
+
|
|
799
|
+
_signal.signal(_signal.SIGTERM, _handle_terminate)
|
|
800
|
+
_signal.signal(_signal.SIGINT, _handle_terminate)
|
|
801
|
+
|
|
802
|
+
|
|
803
|
+
# ──────────────────────────────────────────────────────────────
|
|
804
|
+
# 7) Utility responses
|
|
805
|
+
# ──────────────────────────────────────────────────────────────
|
|
806
|
+
def _ok(**kwargs):
|
|
807
|
+
data = {"ok": True}
|
|
808
|
+
data.update(kwargs)
|
|
809
|
+
return jsonify(data)
|
|
810
|
+
|
|
811
|
+
|
|
812
|
+
def _error(message: str, status: int = 400):
|
|
813
|
+
return jsonify({"ok": False, "error": str(message)}), status
|
|
814
|
+
|
|
815
|
+
|
|
816
|
+
# ──────────────────────────────────────────────────────────────
|
|
817
|
+
# 8) Routes
|
|
818
|
+
# ──────────────────────────────────────────────────────────────
|
|
819
|
+
@app.get("/health")
|
|
820
|
+
def health():
|
|
821
|
+
return jsonify({"status": "ok", "browser_open": Tools.is_browser_open(), "sessions": len(_SESSIONS)})
|
|
822
|
+
|
|
823
|
+
|
|
824
|
+
@app.post("/session/start")
|
|
825
|
+
def session_start():
|
|
826
|
+
if not _auth_ok(request):
|
|
827
|
+
return _error("unauthorized", 401)
|
|
828
|
+
payload = request.get_json(silent=True) or {}
|
|
829
|
+
headless = bool(payload.get("headless", HEADLESS_DEFAULT))
|
|
830
|
+
with _slot():
|
|
831
|
+
msg = Tools.open_browser(headless=headless, force_new=True)
|
|
832
|
+
if not _result_ok(msg):
|
|
833
|
+
return _error(msg, 500)
|
|
834
|
+
sid = uuid.uuid4().hex
|
|
835
|
+
with _GLOBAL_LOCK:
|
|
836
|
+
_SESSIONS.clear()
|
|
837
|
+
_SESSION_EVENTS.clear()
|
|
838
|
+
_SESSIONS[sid] = {
|
|
839
|
+
"created": time.time(),
|
|
840
|
+
"last": time.time(),
|
|
841
|
+
"headless": headless,
|
|
842
|
+
"frames": {},
|
|
843
|
+
}
|
|
844
|
+
_queue_event(sid, {"type": "status", "msg": "browser_started", "detail": msg, "sid": sid, "ts": int(time.time() * 1000)})
|
|
845
|
+
return _ok(session_id=sid, message=msg, headless=headless)
|
|
846
|
+
|
|
847
|
+
|
|
848
|
+
@app.post("/session/close")
|
|
849
|
+
def session_close():
|
|
850
|
+
if not _auth_ok(request):
|
|
851
|
+
return _error("unauthorized", 401)
|
|
852
|
+
with _slot():
|
|
853
|
+
msg = Tools.close_browser()
|
|
854
|
+
_clear_sessions()
|
|
855
|
+
return _ok(message=msg)
|
|
856
|
+
|
|
857
|
+
|
|
858
|
+
@app.post("/navigate")
|
|
859
|
+
def navigate():
|
|
860
|
+
if not _auth_ok(request):
|
|
861
|
+
return _error("unauthorized", 401)
|
|
862
|
+
data = request.get_json(silent=True) or {}
|
|
863
|
+
url = (data.get("url") or "").strip()
|
|
864
|
+
if not url:
|
|
865
|
+
return _error("missing url", 400)
|
|
866
|
+
with _slot():
|
|
867
|
+
msg = Tools.navigate(url)
|
|
868
|
+
_queue_event(data.get("sid") or next(iter(_SESSIONS), ""), {"type": "status", "msg": msg, "ts": int(time.time() * 1000)})
|
|
869
|
+
if not _result_ok(msg):
|
|
870
|
+
return _error(msg, 500)
|
|
871
|
+
return _ok(message=msg)
|
|
872
|
+
|
|
873
|
+
|
|
874
|
+
@app.post("/click")
|
|
875
|
+
def click_selector():
|
|
876
|
+
if not _auth_ok(request):
|
|
877
|
+
return _error("unauthorized", 401)
|
|
878
|
+
data = request.get_json(silent=True) or {}
|
|
879
|
+
selector = (data.get("selector") or "").strip()
|
|
880
|
+
if not selector:
|
|
881
|
+
return _error("missing selector", 400)
|
|
882
|
+
with _slot():
|
|
883
|
+
msg = Tools.click(selector)
|
|
884
|
+
if not _result_ok(msg):
|
|
885
|
+
return _error(msg, 500)
|
|
886
|
+
_queue_event(data.get("sid") or next(iter(_SESSIONS), ""), {"type": "status", "msg": msg, "ts": int(time.time() * 1000)})
|
|
887
|
+
return _ok(message=msg)
|
|
888
|
+
|
|
889
|
+
|
|
890
|
+
@app.post("/type")
|
|
891
|
+
def type_text():
|
|
892
|
+
if not _auth_ok(request):
|
|
893
|
+
return _error("unauthorized", 401)
|
|
894
|
+
data = request.get_json(silent=True) or {}
|
|
895
|
+
selector = (data.get("selector") or "").strip()
|
|
896
|
+
text = data.get("text")
|
|
897
|
+
if not selector:
|
|
898
|
+
return _error("missing selector", 400)
|
|
899
|
+
if text is None:
|
|
900
|
+
return _error("missing text", 400)
|
|
901
|
+
with _slot():
|
|
902
|
+
msg = Tools.input(selector, str(text))
|
|
903
|
+
if not _result_ok(msg):
|
|
904
|
+
return _error(msg, 500)
|
|
905
|
+
_queue_event(data.get("sid") or next(iter(_SESSIONS), ""), {"type": "status", "msg": msg, "ts": int(time.time() * 1000)})
|
|
906
|
+
return _ok(message=msg)
|
|
907
|
+
|
|
908
|
+
|
|
909
|
+
@app.post("/scroll")
|
|
910
|
+
def scroll():
|
|
911
|
+
if not _auth_ok(request):
|
|
912
|
+
return _error("unauthorized", 401)
|
|
913
|
+
data = request.get_json(silent=True) or {}
|
|
914
|
+
amount = int(data.get("amount", 600))
|
|
915
|
+
with _slot():
|
|
916
|
+
msg = Tools.scroll(amount)
|
|
917
|
+
if not _result_ok(msg):
|
|
918
|
+
return _error(msg, 500)
|
|
919
|
+
_queue_event(data.get("sid") or next(iter(_SESSIONS), ""), {"type": "status", "msg": msg, "ts": int(time.time() * 1000)})
|
|
920
|
+
return _ok(message=msg)
|
|
921
|
+
|
|
922
|
+
|
|
923
|
+
@app.post("/scroll/up")
|
|
924
|
+
def scroll_up():
|
|
925
|
+
if not _auth_ok(request):
|
|
926
|
+
return _error("unauthorized", 401)
|
|
927
|
+
data = request.get_json(silent=True) or {}
|
|
928
|
+
amount = abs(int(data.get("amount", 600)))
|
|
929
|
+
with _slot():
|
|
930
|
+
msg = Tools.scroll(-amount)
|
|
931
|
+
if not _result_ok(msg):
|
|
932
|
+
return _error(msg, 500)
|
|
933
|
+
_queue_event(data.get("sid") or next(iter(_SESSIONS), ""), {"type": "status", "msg": msg, "ts": int(time.time() * 1000)})
|
|
934
|
+
return _ok(message=msg)
|
|
935
|
+
|
|
936
|
+
|
|
937
|
+
@app.post("/scroll/down")
|
|
938
|
+
def scroll_down():
|
|
939
|
+
if not _auth_ok(request):
|
|
940
|
+
return _error("unauthorized", 401)
|
|
941
|
+
data = request.get_json(silent=True) or {}
|
|
942
|
+
amount = abs(int(data.get("amount", 600)))
|
|
943
|
+
with _slot():
|
|
944
|
+
msg = Tools.scroll(amount)
|
|
945
|
+
if not _result_ok(msg):
|
|
946
|
+
return _error(msg, 500)
|
|
947
|
+
_queue_event(data.get("sid") or next(iter(_SESSIONS), ""), {"type": "status", "msg": msg, "ts": int(time.time() * 1000)})
|
|
948
|
+
return _ok(message=msg)
|
|
949
|
+
|
|
950
|
+
|
|
951
|
+
@app.post("/scroll/point")
|
|
952
|
+
def scroll_point():
|
|
953
|
+
if not _auth_ok(request):
|
|
954
|
+
return _error("unauthorized", 401)
|
|
955
|
+
data = request.get_json(silent=True) or {}
|
|
956
|
+
try:
|
|
957
|
+
x = float(data.get("x"))
|
|
958
|
+
y = float(data.get("y"))
|
|
959
|
+
delta_x = float(data.get("deltaX") or data.get("delta_x") or 0.0)
|
|
960
|
+
delta_y = float(data.get("deltaY") or data.get("delta_y") or 0.0)
|
|
961
|
+
viewport_w = float(data.get("viewportW") or data.get("viewport_width"))
|
|
962
|
+
viewport_h = float(data.get("viewportH") or data.get("viewport_height"))
|
|
963
|
+
natural_w = float(data.get("naturalW") or data.get("naturalWidth") or viewport_w)
|
|
964
|
+
natural_h = float(data.get("naturalH") or data.get("naturalHeight") or viewport_h)
|
|
965
|
+
except Exception:
|
|
966
|
+
return _error("invalid scroll coordinates", 400)
|
|
967
|
+
if viewport_w <= 0 or viewport_h <= 0:
|
|
968
|
+
return _error("invalid viewport dimensions", 400)
|
|
969
|
+
scale_x = natural_w / max(1.0, viewport_w)
|
|
970
|
+
scale_y = natural_h / max(1.0, viewport_h)
|
|
971
|
+
vx = x * scale_x
|
|
972
|
+
vy = y * scale_y
|
|
973
|
+
log_message(
|
|
974
|
+
f"[scroll_point] ({x:.1f},{y:.1f}) scaled ({vx:.1f},{vy:.1f}) delta ({delta_x:.2f},{delta_y:.2f})",
|
|
975
|
+
"DEBUG"
|
|
976
|
+
)
|
|
977
|
+
with _slot():
|
|
978
|
+
msg = Tools.scroll_point(vx, vy, delta_x, delta_y)
|
|
979
|
+
if not _result_ok(msg):
|
|
980
|
+
return _error(msg, 500)
|
|
981
|
+
sid = data.get("sid") or next(iter(_SESSIONS), "")
|
|
982
|
+
_queue_event(
|
|
983
|
+
sid,
|
|
984
|
+
{
|
|
985
|
+
"type": "status",
|
|
986
|
+
"msg": msg,
|
|
987
|
+
"detail": {"x": vx, "y": vy, "delta": [delta_x, delta_y]},
|
|
988
|
+
"ts": int(time.time() * 1000),
|
|
989
|
+
},
|
|
990
|
+
)
|
|
991
|
+
return _ok(message=msg)
|
|
992
|
+
|
|
993
|
+
|
|
994
|
+
@app.post("/history/back")
|
|
995
|
+
def history_back():
|
|
996
|
+
if not _auth_ok(request):
|
|
997
|
+
return _error("unauthorized", 401)
|
|
998
|
+
data = request.get_json(silent=True) or {}
|
|
999
|
+
with _slot():
|
|
1000
|
+
msg = Tools.go_back()
|
|
1001
|
+
if not _result_ok(msg):
|
|
1002
|
+
return _error(msg, 500)
|
|
1003
|
+
sid = data.get("sid") or next(iter(_SESSIONS), "")
|
|
1004
|
+
_queue_event(sid, {"type": "status", "msg": msg, "ts": int(time.time() * 1000)})
|
|
1005
|
+
return _ok(message=msg)
|
|
1006
|
+
|
|
1007
|
+
|
|
1008
|
+
@app.post("/history/forward")
|
|
1009
|
+
def history_forward():
|
|
1010
|
+
if not _auth_ok(request):
|
|
1011
|
+
return _error("unauthorized", 401)
|
|
1012
|
+
data = request.get_json(silent=True) or {}
|
|
1013
|
+
with _slot():
|
|
1014
|
+
msg = Tools.go_forward()
|
|
1015
|
+
if not _result_ok(msg):
|
|
1016
|
+
return _error(msg, 500)
|
|
1017
|
+
sid = data.get("sid") or next(iter(_SESSIONS), "")
|
|
1018
|
+
_queue_event(sid, {"type": "status", "msg": msg, "ts": int(time.time() * 1000)})
|
|
1019
|
+
return _ok(message=msg)
|
|
1020
|
+
|
|
1021
|
+
|
|
1022
|
+
@app.post("/click_xy")
|
|
1023
|
+
def click_xy():
|
|
1024
|
+
if not _auth_ok(request):
|
|
1025
|
+
return _error("unauthorized", 401)
|
|
1026
|
+
data = request.get_json(silent=True) or {}
|
|
1027
|
+
try:
|
|
1028
|
+
x = float(data.get("x"))
|
|
1029
|
+
y = float(data.get("y"))
|
|
1030
|
+
viewport_w = float(data.get("viewportW") or data.get("viewport_width"))
|
|
1031
|
+
viewport_h = float(data.get("viewportH") or data.get("viewport_height"))
|
|
1032
|
+
natural_w = float(data.get("naturalW") or data.get("naturalWidth") or viewport_w)
|
|
1033
|
+
natural_h = float(data.get("naturalH") or data.get("naturalHeight") or viewport_h)
|
|
1034
|
+
except Exception:
|
|
1035
|
+
return _error("invalid coordinates", 400)
|
|
1036
|
+
if viewport_w <= 0 or viewport_h <= 0:
|
|
1037
|
+
return _error("invalid viewport dimensions", 400)
|
|
1038
|
+
scale_x = natural_w / max(1.0, viewport_w)
|
|
1039
|
+
scale_y = natural_h / max(1.0, viewport_h)
|
|
1040
|
+
vx = x * scale_x
|
|
1041
|
+
vy = y * scale_y
|
|
1042
|
+
log_message(f"[click_xy] requested ({x:.1f}, {y:.1f}) → viewport ({vx:.1f},{vy:.1f})", "DEBUG")
|
|
1043
|
+
with _slot():
|
|
1044
|
+
try:
|
|
1045
|
+
drv = Tools._driver # type: ignore[attr-defined]
|
|
1046
|
+
except AttributeError:
|
|
1047
|
+
drv = None
|
|
1048
|
+
if not drv:
|
|
1049
|
+
return _error("browser not open", 409)
|
|
1050
|
+
result = drv.execute_script(
|
|
1051
|
+
"""
|
|
1052
|
+
const x = arguments[0];
|
|
1053
|
+
const y = arguments[1];
|
|
1054
|
+
const el = document.elementFromPoint(x, y);
|
|
1055
|
+
if (!el) return { ok: false, reason: 'element_from_point_null' };
|
|
1056
|
+
try { el.scrollIntoView({ block: 'center', inline: 'center' }); } catch (_) {}
|
|
1057
|
+
const rect = el.getBoundingClientRect();
|
|
1058
|
+
const detail = {
|
|
1059
|
+
tag: el.tagName || '',
|
|
1060
|
+
rect: { x: rect.x, y: rect.y, width: rect.width, height: rect.height },
|
|
1061
|
+
id: el.id || '',
|
|
1062
|
+
name: el.getAttribute('name') || '',
|
|
1063
|
+
type: el.getAttribute('type') || '',
|
|
1064
|
+
role: el.getAttribute('role') || '',
|
|
1065
|
+
contentEditable: !!el.isContentEditable,
|
|
1066
|
+
selector: '',
|
|
1067
|
+
value: ''
|
|
1068
|
+
};
|
|
1069
|
+
const esc = (val) => {
|
|
1070
|
+
if (typeof CSS !== 'undefined' && CSS.escape) return CSS.escape(val);
|
|
1071
|
+
return String(val).replace(/([ !"#$%&'()*+,./:;<=>?@[\\\]^`{|}~])/g, '\\\\$1');
|
|
1072
|
+
};
|
|
1073
|
+
if (detail.id) {
|
|
1074
|
+
detail.selector = `#${esc(detail.id)}`;
|
|
1075
|
+
} else if (detail.name && detail.tag === 'INPUT') {
|
|
1076
|
+
detail.selector = `${detail.tag.toLowerCase()}[name="${detail.name.replace(/"/g, '\\"')}"]`;
|
|
1077
|
+
}
|
|
1078
|
+
if (detail.tag === 'INPUT' || detail.tag === 'TEXTAREA') {
|
|
1079
|
+
detail.value = el.value || '';
|
|
1080
|
+
} else if (el.isContentEditable) {
|
|
1081
|
+
detail.value = el.textContent || '';
|
|
1082
|
+
}
|
|
1083
|
+
try {
|
|
1084
|
+
el.click();
|
|
1085
|
+
if (typeof el.focus === 'function') el.focus();
|
|
1086
|
+
return { ok: true, tag: detail.tag, rect: detail.rect, detail };
|
|
1087
|
+
} catch (err) {
|
|
1088
|
+
return { ok: false, reason: err && err.message ? err.message : String(err) };
|
|
1089
|
+
}
|
|
1090
|
+
""",
|
|
1091
|
+
float(vx),
|
|
1092
|
+
float(vy),
|
|
1093
|
+
)
|
|
1094
|
+
if not result or not result.get("ok"):
|
|
1095
|
+
return _error(result.get("reason") if isinstance(result, dict) else "click failed", 500)
|
|
1096
|
+
_queue_event(
|
|
1097
|
+
data.get("sid") or next(iter(_SESSIONS), ""),
|
|
1098
|
+
{
|
|
1099
|
+
"type": "status",
|
|
1100
|
+
"msg": "click_xy",
|
|
1101
|
+
"detail": result,
|
|
1102
|
+
"ts": int(time.time() * 1000),
|
|
1103
|
+
},
|
|
1104
|
+
)
|
|
1105
|
+
return _ok(message="click_xy", detail=result)
|
|
1106
|
+
|
|
1107
|
+
|
|
1108
|
+
@app.post("/input/sync")
|
|
1109
|
+
def input_sync():
|
|
1110
|
+
if not _auth_ok(request):
|
|
1111
|
+
return _error("unauthorized", 401)
|
|
1112
|
+
data = request.get_json(silent=True) or {}
|
|
1113
|
+
sid = (data.get("sid") or "").strip()
|
|
1114
|
+
if not sid:
|
|
1115
|
+
return _error("missing sid", 400)
|
|
1116
|
+
value = data.get("value", "")
|
|
1117
|
+
selector = (data.get("selector") or "").strip()
|
|
1118
|
+
submit = bool(data.get("submit"))
|
|
1119
|
+
input_type = (data.get("inputType") or "").strip()
|
|
1120
|
+
data_snippet = data.get("data")
|
|
1121
|
+
_touch_session(sid)
|
|
1122
|
+
with _slot():
|
|
1123
|
+
msg = Tools.sync_input(value, selector=selector, submit=submit, input_type=input_type, data=data_snippet)
|
|
1124
|
+
if not _result_ok(msg):
|
|
1125
|
+
return _error(msg, 500)
|
|
1126
|
+
return _ok(message=msg)
|
|
1127
|
+
|
|
1128
|
+
|
|
1129
|
+
@app.post("/drag")
|
|
1130
|
+
def drag():
|
|
1131
|
+
if not _auth_ok(request):
|
|
1132
|
+
return _error("unauthorized", 401)
|
|
1133
|
+
data = request.get_json(silent=True) or {}
|
|
1134
|
+
try:
|
|
1135
|
+
start_x = float(data.get("startX"))
|
|
1136
|
+
start_y = float(data.get("startY"))
|
|
1137
|
+
end_x = float(data.get("endX"))
|
|
1138
|
+
end_y = float(data.get("endY"))
|
|
1139
|
+
viewport_w = float(data.get("viewportW") or data.get("viewport_width"))
|
|
1140
|
+
viewport_h = float(data.get("viewportH") or data.get("viewport_height"))
|
|
1141
|
+
natural_w = float(data.get("naturalW") or data.get("naturalWidth") or viewport_w)
|
|
1142
|
+
natural_h = float(data.get("naturalH") or data.get("naturalHeight") or viewport_h)
|
|
1143
|
+
except Exception:
|
|
1144
|
+
return _error("invalid drag coordinates", 400)
|
|
1145
|
+
if viewport_w <= 0 or viewport_h <= 0:
|
|
1146
|
+
return _error("invalid viewport dimensions", 400)
|
|
1147
|
+
scale_x = natural_w / max(1.0, viewport_w)
|
|
1148
|
+
scale_y = natural_h / max(1.0, viewport_h)
|
|
1149
|
+
start_vx = start_x * scale_x
|
|
1150
|
+
start_vy = start_y * scale_y
|
|
1151
|
+
end_vx = end_x * scale_x
|
|
1152
|
+
end_vy = end_y * scale_y
|
|
1153
|
+
log_message(
|
|
1154
|
+
f"[drag] ({start_x:.1f},{start_y:.1f})→({end_x:.1f},{end_y:.1f}) viewport ({start_vx:.1f},{start_vy:.1f})→({end_vx:.1f},{end_vy:.1f})",
|
|
1155
|
+
"DEBUG"
|
|
1156
|
+
)
|
|
1157
|
+
with _slot():
|
|
1158
|
+
msg = Tools.drag(start_vx, start_vy, end_vx, end_vy)
|
|
1159
|
+
if not _result_ok(msg):
|
|
1160
|
+
return _error(msg, 500)
|
|
1161
|
+
sid = data.get("sid") or next(iter(_SESSIONS), "")
|
|
1162
|
+
_queue_event(
|
|
1163
|
+
sid,
|
|
1164
|
+
{
|
|
1165
|
+
"type": "status",
|
|
1166
|
+
"msg": msg,
|
|
1167
|
+
"detail": {
|
|
1168
|
+
"start": [start_vx, start_vy],
|
|
1169
|
+
"end": [end_vx, end_vy]
|
|
1170
|
+
},
|
|
1171
|
+
"ts": int(time.time() * 1000),
|
|
1172
|
+
},
|
|
1173
|
+
)
|
|
1174
|
+
return _ok(message=msg)
|
|
1175
|
+
|
|
1176
|
+
|
|
1177
|
+
@app.get("/dom")
|
|
1178
|
+
def dom_snapshot():
|
|
1179
|
+
if not _auth_ok(request):
|
|
1180
|
+
return _error("unauthorized", 401)
|
|
1181
|
+
html = Tools.get_dom_snapshot(max_chars=200_000)
|
|
1182
|
+
if not html:
|
|
1183
|
+
return _error("no dom (browser closed?)", 409)
|
|
1184
|
+
sid = request.args.get("sid") or next(iter(_SESSIONS), "")
|
|
1185
|
+
_queue_event(sid, {"type": "dom", "chars": len(html), "ts": int(time.time() * 1000)})
|
|
1186
|
+
return _ok(dom=html, length=len(html))
|
|
1187
|
+
|
|
1188
|
+
|
|
1189
|
+
def _record_frame_meta(sid: str, fname: str, width: int, height: int) -> None:
|
|
1190
|
+
with _GLOBAL_LOCK:
|
|
1191
|
+
meta = _SESSIONS.get(sid)
|
|
1192
|
+
if not meta:
|
|
1193
|
+
return
|
|
1194
|
+
frames = meta.setdefault("frames", {})
|
|
1195
|
+
frames[fname] = {"ts": int(time.time() * 1000), "width": width, "height": height}
|
|
1196
|
+
|
|
1197
|
+
|
|
1198
|
+
@app.get("/screenshot")
|
|
1199
|
+
def screenshot():
|
|
1200
|
+
if not _auth_ok(request):
|
|
1201
|
+
return _error("unauthorized", 401)
|
|
1202
|
+
sid = request.args.get("sid") or next(iter(_SESSIONS), "")
|
|
1203
|
+
fname = f"{uuid.uuid4().hex}.png"
|
|
1204
|
+
fpath = OUT_DIR / fname
|
|
1205
|
+
with _slot():
|
|
1206
|
+
msg = Tools.screenshot(str(fpath))
|
|
1207
|
+
if not _result_ok(msg):
|
|
1208
|
+
return _error(msg, 500)
|
|
1209
|
+
try:
|
|
1210
|
+
with Image.open(fpath) as im:
|
|
1211
|
+
width, height = im.size
|
|
1212
|
+
except Exception:
|
|
1213
|
+
width = height = 0
|
|
1214
|
+
try:
|
|
1215
|
+
raw_bytes = fpath.read_bytes()
|
|
1216
|
+
b64_data = base64.b64encode(raw_bytes).decode("ascii")
|
|
1217
|
+
except Exception:
|
|
1218
|
+
b64_data = ""
|
|
1219
|
+
rel_path = f"/frames/{fname}"
|
|
1220
|
+
_record_frame_meta(sid, fname, width, height)
|
|
1221
|
+
_queue_event(
|
|
1222
|
+
sid,
|
|
1223
|
+
{
|
|
1224
|
+
"type": "frame",
|
|
1225
|
+
"file": rel_path,
|
|
1226
|
+
"width": width,
|
|
1227
|
+
"height": height,
|
|
1228
|
+
"mime": "image/png",
|
|
1229
|
+
"b64": b64_data,
|
|
1230
|
+
"ts": int(time.time() * 1000),
|
|
1231
|
+
},
|
|
1232
|
+
)
|
|
1233
|
+
return _ok(file=rel_path, width=width, height=height, mime="image/png", b64=b64_data)
|
|
1234
|
+
|
|
1235
|
+
|
|
1236
|
+
@app.get("/frames/<path:filename>")
|
|
1237
|
+
def frames(filename):
|
|
1238
|
+
if not _auth_ok(request):
|
|
1239
|
+
return _error("unauthorized", 401)
|
|
1240
|
+
return send_from_directory(OUT_DIR, filename, as_attachment=False, cache_timeout=0)
|
|
1241
|
+
|
|
1242
|
+
|
|
1243
|
+
def _sse_iter(sid: str):
|
|
1244
|
+
q = _ensure_session(sid)
|
|
1245
|
+
keepalive_deadline = time.time() + FRAME_KEEPALIVE_S
|
|
1246
|
+
try:
|
|
1247
|
+
while True:
|
|
1248
|
+
try:
|
|
1249
|
+
payload = q.get(timeout=5.0)
|
|
1250
|
+
keepalive_deadline = time.time() + FRAME_KEEPALIVE_S
|
|
1251
|
+
data = json.dumps(payload, separators=(",", ":"))
|
|
1252
|
+
yield f"data: {data}\n\n"
|
|
1253
|
+
except Empty:
|
|
1254
|
+
now = time.time()
|
|
1255
|
+
if now >= keepalive_deadline:
|
|
1256
|
+
keepalive_deadline = now + FRAME_KEEPALIVE_S
|
|
1257
|
+
yield ":\n\n"
|
|
1258
|
+
except GeneratorExit:
|
|
1259
|
+
break
|
|
1260
|
+
finally:
|
|
1261
|
+
_touch_session(sid)
|
|
1262
|
+
|
|
1263
|
+
|
|
1264
|
+
@app.get("/events")
|
|
1265
|
+
def events():
|
|
1266
|
+
if not _auth_ok(request):
|
|
1267
|
+
return _error("unauthorized", 401)
|
|
1268
|
+
sid = request.args.get("sid") or ""
|
|
1269
|
+
if not sid:
|
|
1270
|
+
return _error("missing sid", 400)
|
|
1271
|
+
_touch_session(sid)
|
|
1272
|
+
return Response(_sse_iter(sid), mimetype="text/event-stream")
|
|
1273
|
+
|
|
1274
|
+
|
|
1275
|
+
@app.errorhandler(TimeoutError)
|
|
1276
|
+
def _timeout_handler(exc):
|
|
1277
|
+
return _error(str(exc), 503)
|
|
1278
|
+
|
|
1279
|
+
|
|
1280
|
+
@app.errorhandler(Exception)
|
|
1281
|
+
def _unhandled(exc):
|
|
1282
|
+
print(f"[error] {exc}", file=sys.stderr)
|
|
1283
|
+
return _error("internal error", 500)
|
|
1284
|
+
|
|
1285
|
+
|
|
1286
|
+
@app.after_request
|
|
1287
|
+
def _default_headers(resp):
|
|
1288
|
+
resp.headers.setdefault("Cache-Control", "no-store, max-age=0")
|
|
1289
|
+
resp.headers.setdefault("Access-Control-Allow-Headers", "Content-Type, Authorization, X-API-Key")
|
|
1290
|
+
return resp
|
|
1291
|
+
|
|
1292
|
+
|
|
1293
|
+
if __name__ == "__main__":
|
|
1294
|
+
print(f"[service] starting web_scrape on {BIND}:{PORT}", file=sys.stderr)
|
|
1295
|
+
app.run(host=BIND, port=PORT, debug=False, threaded=True)
|