kanibako-cli 1.5.0.dev14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. kanibako/__init__.py +3 -0
  2. kanibako/__main__.py +6 -0
  3. kanibako/auth_browser.py +296 -0
  4. kanibako/auth_parser.py +51 -0
  5. kanibako/browser_sidecar.py +183 -0
  6. kanibako/browser_state.py +103 -0
  7. kanibako/bun_sea.py +144 -0
  8. kanibako/cli.py +344 -0
  9. kanibako/commands/__init__.py +0 -0
  10. kanibako/commands/archive.py +228 -0
  11. kanibako/commands/box/__init__.py +22 -0
  12. kanibako/commands/box/_duplicate.py +395 -0
  13. kanibako/commands/box/_migrate.py +574 -0
  14. kanibako/commands/box/_parser.py +1178 -0
  15. kanibako/commands/clean.py +166 -0
  16. kanibako/commands/crab_cmd.py +480 -0
  17. kanibako/commands/diagnose.py +239 -0
  18. kanibako/commands/fork_cmd.py +51 -0
  19. kanibako/commands/helper_cmd.py +669 -0
  20. kanibako/commands/image.py +1300 -0
  21. kanibako/commands/install.py +152 -0
  22. kanibako/commands/refresh_credentials.py +67 -0
  23. kanibako/commands/restore.py +298 -0
  24. kanibako/commands/setup_cmd.py +89 -0
  25. kanibako/commands/start.py +1600 -0
  26. kanibako/commands/stop.py +116 -0
  27. kanibako/commands/system_cmd.py +224 -0
  28. kanibako/commands/upgrade.py +161 -0
  29. kanibako/commands/vault_cmd.py +199 -0
  30. kanibako/commands/workset_cmd.py +552 -0
  31. kanibako/config.py +514 -0
  32. kanibako/config_interface.py +573 -0
  33. kanibako/config_io.py +36 -0
  34. kanibako/container.py +607 -0
  35. kanibako/containerfiles.py +58 -0
  36. kanibako/containers/Containerfile.kanibako +99 -0
  37. kanibako/containers/Containerfile.template-android +55 -0
  38. kanibako/containers/Containerfile.template-dotnet +29 -0
  39. kanibako/containers/Containerfile.template-js +43 -0
  40. kanibako/containers/Containerfile.template-jvm +27 -0
  41. kanibako/containers/Containerfile.template-systems +46 -0
  42. kanibako/containers/__init__.py +0 -0
  43. kanibako/crabs.py +89 -0
  44. kanibako/errors.py +33 -0
  45. kanibako/freshness.py +67 -0
  46. kanibako/git.py +114 -0
  47. kanibako/helper_client.py +132 -0
  48. kanibako/helper_listener.py +538 -0
  49. kanibako/helpers.py +339 -0
  50. kanibako/hygiene.py +296 -0
  51. kanibako/image_sharing.py +133 -0
  52. kanibako/instructions.py +160 -0
  53. kanibako/log.py +31 -0
  54. kanibako/names.py +248 -0
  55. kanibako/paths.py +1483 -0
  56. kanibako/plugins/__init__.py +10 -0
  57. kanibako/registry.py +71 -0
  58. kanibako/rig_bundle.py +121 -0
  59. kanibako/rig_meta.py +92 -0
  60. kanibako/rig_registry.py +132 -0
  61. kanibako/rig_resolve.py +182 -0
  62. kanibako/rig_source.py +245 -0
  63. kanibako/scripts/__init__.py +0 -0
  64. kanibako/scripts/helper-init.sh +45 -0
  65. kanibako/scripts/kanibako-entry +12 -0
  66. kanibako/settings_resolve.py +312 -0
  67. kanibako/settings_seeds.py +154 -0
  68. kanibako/settings_shares.py +154 -0
  69. kanibako/shellenv.py +75 -0
  70. kanibako/snapshots.py +281 -0
  71. kanibako/targets/__init__.py +173 -0
  72. kanibako/targets/base.py +243 -0
  73. kanibako/targets/no_agent.py +58 -0
  74. kanibako/templates.py +60 -0
  75. kanibako/templates_image.py +224 -0
  76. kanibako/tweakcc.py +140 -0
  77. kanibako/tweakcc_cache.py +171 -0
  78. kanibako/utils.py +136 -0
  79. kanibako/workset.py +347 -0
  80. kanibako_cli-1.5.0.dev14.dist-info/METADATA +15 -0
  81. kanibako_cli-1.5.0.dev14.dist-info/RECORD +85 -0
  82. kanibako_cli-1.5.0.dev14.dist-info/WHEEL +5 -0
  83. kanibako_cli-1.5.0.dev14.dist-info/entry_points.txt +5 -0
  84. kanibako_cli-1.5.0.dev14.dist-info/licenses/LICENSE.md +594 -0
  85. kanibako_cli-1.5.0.dev14.dist-info/top_level.txt +1 -0
kanibako/__init__.py ADDED
@@ -0,0 +1,3 @@
1
+ """kanibako: Run AI coding agents in rootless containers with per-project isolation."""
2
+
3
+ __version__ = "1.5.0.dev14"
kanibako/__main__.py ADDED
@@ -0,0 +1,6 @@
1
+ """Allow running kanibako as `python -m kanibako`."""
2
+
3
+ from kanibako.cli import main
4
+
5
+ if __name__ == "__main__":
6
+ main()
@@ -0,0 +1,296 @@
1
+ """Automated OAuth refresh via headless browser.
2
+
3
+ Uses Playwright (optional dependency) to navigate the Claude Code OAuth
4
+ authorization page and click "Authorize" when the IdP session is still
5
+ valid. Falls back to manual login when the session is stale.
6
+
7
+ Requires: ``pip install playwright && playwright install chromium``
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from dataclasses import dataclass
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ from kanibako.browser_state import (
17
+ from_playwright_context,
18
+ load_state,
19
+ save_state,
20
+ to_playwright_context,
21
+ )
22
+ from kanibako.log import get_logger
23
+
24
+ logger = get_logger("auth_browser")
25
+
26
+ _AUTHORIZE_TIMEOUT_MS = 30_000
27
+ _NAVIGATION_TIMEOUT_MS = 30_000
28
+
29
+ # Lazy-loaded Playwright symbols. Populated by _check_playwright() so that
30
+ # tests can patch them at the module level without actually importing Playwright.
31
+ sync_playwright: Any = None
32
+ PWTimeout: type[Exception] = Exception # fallback type for except clauses
33
+
34
+
35
+ @dataclass
36
+ class AuthResult:
37
+ """Result of an automated OAuth refresh attempt."""
38
+
39
+ success: bool
40
+ key: str | None = None
41
+ error: str | None = None
42
+
43
+
44
+ def _check_playwright() -> bool:
45
+ """Check if Playwright is available and populate module-level symbols."""
46
+ global sync_playwright, PWTimeout # noqa: PLW0603
47
+ try:
48
+ from playwright.sync_api import ( # type: ignore[import-not-found]
49
+ sync_playwright as _sp,
50
+ TimeoutError as _te,
51
+ )
52
+ sync_playwright = _sp
53
+ PWTimeout = _te
54
+ return True
55
+ except ImportError:
56
+ return False
57
+
58
+
59
+ def refresh_auth(
60
+ url: str,
61
+ data_path: Path,
62
+ *,
63
+ headless: bool = True,
64
+ ) -> AuthResult:
65
+ """Attempt automated OAuth re-authorization via headless browser.
66
+
67
+ 1. Load stored browser state (cookies from previous sessions)
68
+ 2. Navigate to the OAuth URL
69
+ 3. If authorize button is visible → click it → extract key
70
+ 4. If IdP login form is shown → abort (manual login required)
71
+ 5. Save updated browser state on success
72
+
73
+ Returns :class:`AuthResult` with success status and optional key.
74
+ """
75
+ if not _check_playwright():
76
+ return AuthResult(
77
+ success=False,
78
+ error="Playwright not installed. Run: pip install playwright && playwright install chromium",
79
+ )
80
+
81
+ state = load_state(data_path)
82
+ storage_state = to_playwright_context(state) if state.cookies else None
83
+
84
+ try:
85
+ with sync_playwright() as pw:
86
+ browser = pw.chromium.launch(headless=headless)
87
+ try:
88
+ context = browser.new_context(
89
+ storage_state=storage_state,
90
+ ) if storage_state else browser.new_context()
91
+
92
+ page = context.new_page()
93
+ page.set_default_timeout(_NAVIGATION_TIMEOUT_MS)
94
+
95
+ logger.debug("Navigating to OAuth URL: %s", url)
96
+ page.goto(url, wait_until="networkidle")
97
+
98
+ # Detect page state
99
+ result = _handle_auth_page(page)
100
+
101
+ if result.success:
102
+ # Save updated browser context
103
+ ctx_data = context.storage_state()
104
+ new_state = from_playwright_context(ctx_data)
105
+ save_state(data_path, new_state)
106
+ logger.info("OAuth refresh succeeded")
107
+
108
+ context.close()
109
+ return result
110
+
111
+ finally:
112
+ browser.close()
113
+
114
+ except PWTimeout:
115
+ return AuthResult(success=False, error="OAuth page timed out")
116
+ except Exception as exc:
117
+ logger.warning("Browser automation failed: %s", exc)
118
+ return AuthResult(success=False, error=str(exc))
119
+
120
+
121
+ def _handle_auth_page(page) -> AuthResult:
122
+ """Detect and handle the OAuth authorization page.
123
+
124
+ Looks for an authorize button or a login form. If the IdP session
125
+ is still valid, the authorize button should be visible. If not,
126
+ a login form (Google, GitHub, etc.) will be shown instead.
127
+ """
128
+
129
+ # Check for authorize/approve button (Anthropic consent screen)
130
+ authorize_selectors = [
131
+ 'button:has-text("Authorize")',
132
+ 'button:has-text("Allow")',
133
+ 'button:has-text("Approve")',
134
+ 'input[type="submit"][value*="Authorize"]',
135
+ 'input[type="submit"][value*="Allow"]',
136
+ ]
137
+
138
+ for selector in authorize_selectors:
139
+ try:
140
+ button = page.wait_for_selector(selector, timeout=3000)
141
+ if button and button.is_visible():
142
+ logger.debug("Found authorize button: %s", selector)
143
+ button.click()
144
+
145
+ # Wait for redirect after authorization
146
+ page.wait_for_load_state("networkidle")
147
+
148
+ # Try to extract the authorization key from the page
149
+ key = _extract_key(page)
150
+ return AuthResult(success=True, key=key)
151
+ except PWTimeout:
152
+ continue
153
+
154
+ # Check for IdP login form (Google, GitHub, etc.)
155
+ login_indicators = [
156
+ 'input[type="email"]',
157
+ 'input[type="password"]',
158
+ '#identifierId', # Google
159
+ '#login_field', # GitHub
160
+ ]
161
+
162
+ for selector in login_indicators:
163
+ try:
164
+ el = page.wait_for_selector(selector, timeout=2000)
165
+ if el and el.is_visible():
166
+ return AuthResult(
167
+ success=False,
168
+ error="IdP session expired — manual login required",
169
+ )
170
+ except PWTimeout:
171
+ continue
172
+
173
+ # Neither authorize nor login found
174
+ page_text = page.text_content("body") or ""
175
+ logger.debug("Unrecognized page state. Body preview: %s", page_text[:200])
176
+ return AuthResult(
177
+ success=False,
178
+ error="Unrecognized OAuth page — manual login required",
179
+ )
180
+
181
+
182
+ def auto_refresh_auth(
183
+ claude_path: str,
184
+ data_path: Path,
185
+ *,
186
+ headless: bool = True,
187
+ login_timeout: float = 60,
188
+ ) -> AuthResult:
189
+ """Orchestrate fully automated OAuth: start login, parse URL, automate browser.
190
+
191
+ 1. Start ``claude auth login`` capturing stdout
192
+ 2. Parse the OAuth URL from the output
193
+ 3. Use :func:`refresh_auth` to navigate with stored cookies
194
+ 4. If the browser clicks "Authorize", the redirect completes the login
195
+ 5. Wait for ``claude auth login`` to finish
196
+
197
+ Returns :class:`AuthResult` indicating success or failure.
198
+ """
199
+ import subprocess
200
+ import threading
201
+
202
+ from kanibako.auth_parser import parse_auth_output
203
+
204
+ if not _check_playwright():
205
+ return AuthResult(
206
+ success=False,
207
+ error="Playwright not installed",
208
+ )
209
+
210
+ # Start claude auth login, capturing output to find the URL.
211
+ try:
212
+ proc = subprocess.Popen(
213
+ [claude_path, "auth", "login"],
214
+ stdout=subprocess.PIPE,
215
+ stderr=subprocess.STDOUT,
216
+ stdin=subprocess.PIPE,
217
+ text=True,
218
+ )
219
+ except (FileNotFoundError, OSError) as exc:
220
+ return AuthResult(success=False, error=f"Failed to start auth: {exc}")
221
+
222
+ # Read output lines until we find an OAuth URL or the process exits.
223
+ output_lines: list[str] = []
224
+ url: str | None = None
225
+ code: str | None = None
226
+
227
+ def _read_output() -> None:
228
+ nonlocal url, code
229
+ assert proc.stdout is not None
230
+ for line in proc.stdout:
231
+ output_lines.append(line)
232
+ if url is None:
233
+ prompt = parse_auth_output("".join(output_lines))
234
+ if prompt:
235
+ url = prompt.url
236
+ code = prompt.code
237
+ return # Got what we need
238
+
239
+ reader = threading.Thread(target=_read_output, daemon=True)
240
+ reader.start()
241
+ reader.join(timeout=15)
242
+
243
+ if not url:
244
+ # No URL found — kill process and bail.
245
+ proc.kill()
246
+ proc.wait()
247
+ return AuthResult(success=False, error="No OAuth URL found in auth output")
248
+
249
+ logger.info("Auto-auth: navigating to %s", url)
250
+ result = refresh_auth(url, data_path, headless=headless)
251
+
252
+ if result.success:
253
+ # If we got a key and the process is waiting for input, feed it.
254
+ key = result.key or code
255
+ if key and proc.poll() is None and proc.stdin:
256
+ try:
257
+ proc.stdin.write(key + "\n")
258
+ proc.stdin.flush()
259
+ except OSError:
260
+ pass
261
+
262
+ # Wait for claude auth login to complete.
263
+ try:
264
+ proc.wait(timeout=login_timeout)
265
+ except subprocess.TimeoutExpired:
266
+ proc.kill()
267
+ proc.wait()
268
+ else:
269
+ proc.kill()
270
+ proc.wait()
271
+
272
+ return result
273
+
274
+
275
+ def _extract_key(page) -> str | None:
276
+ """Try to extract the authorization key from the post-authorize page."""
277
+ # Look for common patterns: displayed code, input field with key, etc.
278
+ key_selectors = [
279
+ 'code',
280
+ '.authorization-code',
281
+ 'input[readonly]',
282
+ 'pre',
283
+ ]
284
+
285
+ for selector in key_selectors:
286
+ try:
287
+ el = page.wait_for_selector(selector, timeout=3000)
288
+ if el:
289
+ text = el.text_content() or el.get_attribute("value") or ""
290
+ text = text.strip()
291
+ if text and len(text) < 200: # reasonable key length
292
+ return text
293
+ except Exception:
294
+ continue
295
+
296
+ return None
@@ -0,0 +1,51 @@
1
+ """Parse Claude Code auth command output to extract OAuth URLs and codes.
2
+
3
+ Used by the automated OAuth refresh flow to extract the authorization URL
4
+ from ``claude auth login`` output and feed back the authorization code.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import re
10
+ from dataclasses import dataclass
11
+
12
+
13
+ @dataclass
14
+ class AuthPrompt:
15
+ """Parsed auth prompt from ``claude auth login`` output."""
16
+
17
+ url: str
18
+ code: str | None = None # verification code (if displayed)
19
+
20
+
21
+ # URL pattern: look for anthropic or console.anthropic URLs
22
+ _URL_RE = re.compile(
23
+ r"(https?://(?:console\.anthropic\.com|claude\.ai)[^\s\"'<>]+)",
24
+ )
25
+
26
+ # Verification code: typically 4-8 character alphanumeric.
27
+ # Matches patterns like "code: ABCD1234", "code is: XY12AB", "key = WXYZ99"
28
+ # Requires a colon or equals as separator to avoid false positives.
29
+ _CODE_RE = re.compile(
30
+ r"(?:verification\s+code|code|key)\s*(?:is)?[:=]\s*([A-Z0-9]{4,8})\b",
31
+ re.IGNORECASE,
32
+ )
33
+
34
+
35
+ def parse_auth_output(output: str) -> AuthPrompt | None:
36
+ """Extract OAuth URL and optional code from claude auth login output.
37
+
38
+ Returns *None* if no recognizable URL is found.
39
+ """
40
+ url_match = _URL_RE.search(output)
41
+ if not url_match:
42
+ return None
43
+
44
+ url = url_match.group(1)
45
+
46
+ code: str | None = None
47
+ code_match = _CODE_RE.search(output)
48
+ if code_match:
49
+ code = code_match.group(1)
50
+
51
+ return AuthPrompt(url=url, code=code)
@@ -0,0 +1,183 @@
1
+ """On-demand browser sidecar for AI agents.
2
+
3
+ Launches a headless Chrome container (``chromedp/headless-shell``) that
4
+ agents can connect to via the Chrome DevTools Protocol over WebSocket.
5
+ The agent receives the ``BROWSER_WS_ENDPOINT`` environment variable
6
+ pointing to the sidecar's DevTools port.
7
+
8
+ The sidecar is started before the agent container and stopped after it
9
+ exits. It is *not* a long-running service — it only lives for the
10
+ duration of one agent session.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import subprocess
17
+ import time
18
+ from dataclasses import dataclass
19
+
20
+ from kanibako.container import ContainerRuntime
21
+ from kanibako.log import get_logger
22
+
23
+ logger = get_logger("browser_sidecar")
24
+
25
+ _DEFAULT_IMAGE = "chromedp/headless-shell:latest"
26
+ _CDP_PORT = 9222
27
+ _STARTUP_TIMEOUT = 30
28
+ _HEALTH_CHECK_INTERVAL = 0.5
29
+
30
+
31
+ @dataclass
32
+ class BrowserSidecar:
33
+ """Manages a headless browser container for agent web access.
34
+
35
+ The sidecar publishes Chrome DevTools Protocol on a host port.
36
+ The agent container connects via the host gateway IP.
37
+ """
38
+
39
+ runtime: ContainerRuntime
40
+ container_name: str
41
+ image: str = _DEFAULT_IMAGE
42
+ host_port: int = 0 # 0 = auto-assign
43
+ _started: bool = False
44
+
45
+ def start(self) -> str:
46
+ """Start the browser sidecar and return the WebSocket endpoint URL.
47
+
48
+ Blocks until the container is healthy or *_STARTUP_TIMEOUT* elapses.
49
+
50
+ Returns the ``ws://`` URL suitable for ``BROWSER_WS_ENDPOINT``.
51
+
52
+ Raises :class:`BrowserSidecarError` on failure.
53
+ """
54
+ if self._started:
55
+ raise BrowserSidecarError("Sidecar already started")
56
+
57
+ port_spec = (
58
+ f"{self.host_port}:{_CDP_PORT}"
59
+ if self.host_port
60
+ else str(_CDP_PORT)
61
+ )
62
+
63
+ cmd = [
64
+ self.runtime.cmd,
65
+ "run",
66
+ "-d",
67
+ "--rm",
68
+ "--name",
69
+ self.container_name,
70
+ "--shm-size=2g",
71
+ "-p",
72
+ port_spec,
73
+ self.image,
74
+ # headless-shell uses --remote-debugging-address by default;
75
+ # ensure it listens on all interfaces inside the container.
76
+ "--remote-debugging-address=0.0.0.0",
77
+ f"--remote-debugging-port={_CDP_PORT}",
78
+ ]
79
+
80
+ logger.debug("Starting browser sidecar: %s", cmd)
81
+ result = subprocess.run(cmd, capture_output=True, text=True)
82
+ if result.returncode != 0:
83
+ raise BrowserSidecarError(
84
+ f"Failed to start sidecar: {result.stderr.strip()}"
85
+ )
86
+
87
+ self._started = True
88
+
89
+ # Resolve the actual host port (if auto-assigned).
90
+ actual_port = self._resolve_port()
91
+
92
+ # Wait for the DevTools endpoint to be ready.
93
+ ws_url = self._wait_for_endpoint(actual_port)
94
+ logger.info("Browser sidecar ready: %s", ws_url)
95
+ return ws_url
96
+
97
+ def stop(self) -> None:
98
+ """Stop and remove the browser sidecar."""
99
+ if not self._started:
100
+ return
101
+
102
+ logger.debug("Stopping browser sidecar: %s", self.container_name)
103
+ self.runtime.stop(self.container_name)
104
+ # --rm flag means container is auto-removed after stop, but
105
+ # call rm() defensively in case stop fails to clean up.
106
+ self.runtime.rm(self.container_name)
107
+ self._started = False
108
+
109
+ def _resolve_port(self) -> int:
110
+ """Discover the host port assigned to the sidecar's CDP port."""
111
+ if self.host_port:
112
+ return self.host_port
113
+
114
+ cmd = [
115
+ self.runtime.cmd,
116
+ "port",
117
+ self.container_name,
118
+ str(_CDP_PORT),
119
+ ]
120
+ result = subprocess.run(cmd, capture_output=True, text=True)
121
+ if result.returncode != 0:
122
+ raise BrowserSidecarError(
123
+ f"Failed to resolve sidecar port: {result.stderr.strip()}"
124
+ )
125
+
126
+ # Output format: "0.0.0.0:PORT\n" or "[::]:PORT\n"
127
+ for line in result.stdout.splitlines():
128
+ line = line.strip()
129
+ if ":" in line:
130
+ port_str = line.rsplit(":", 1)[-1]
131
+ try:
132
+ return int(port_str)
133
+ except ValueError:
134
+ continue
135
+
136
+ raise BrowserSidecarError(
137
+ f"Could not parse port from: {result.stdout.strip()}"
138
+ )
139
+
140
+ def _wait_for_endpoint(self, port: int) -> str:
141
+ """Poll the DevTools endpoint until it returns a WebSocket URL.
142
+
143
+ Chrome's ``/json/version`` endpoint returns the browser's WS URL.
144
+ """
145
+ import urllib.request
146
+
147
+ url = f"http://127.0.0.1:{port}/json/version"
148
+ deadline = time.monotonic() + _STARTUP_TIMEOUT
149
+
150
+ while time.monotonic() < deadline:
151
+ try:
152
+ with urllib.request.urlopen(url, timeout=2) as resp:
153
+ data = json.loads(resp.read())
154
+ ws_url = data.get("webSocketDebuggerUrl", "")
155
+ if ws_url:
156
+ # Replace the internal address with the host-accessible one.
157
+ # From inside another container, use the gateway IP.
158
+ ws_url = ws_url.replace("ws://0.0.0.0:", f"ws://127.0.0.1:{port}/")
159
+ ws_url = ws_url.replace(
160
+ f"ws://127.0.0.1:{_CDP_PORT}/",
161
+ f"ws://127.0.0.1:{port}/",
162
+ )
163
+ return ws_url
164
+ except Exception:
165
+ time.sleep(_HEALTH_CHECK_INTERVAL)
166
+
167
+ raise BrowserSidecarError(
168
+ f"Sidecar did not become ready within {_STARTUP_TIMEOUT}s"
169
+ )
170
+
171
+
172
+ def ws_endpoint_for_container(ws_url: str) -> str:
173
+ """Convert a host-local WS URL to one reachable from a container.
174
+
175
+ In rootless Podman, the host gateway is typically ``10.0.2.2``
176
+ (slirp4netns) or ``host.containers.internal`` (pasta).
177
+ We use ``host.containers.internal`` which works with both.
178
+ """
179
+ return ws_url.replace("127.0.0.1", "host.containers.internal")
180
+
181
+
182
+ class BrowserSidecarError(Exception):
183
+ """Error starting or managing the browser sidecar."""
@@ -0,0 +1,103 @@
1
+ """Persistent browser state for automated OAuth refresh.
2
+
3
+ Stores Playwright browser context (cookies, localStorage) so that the
4
+ OAuth provider recognizes the session on subsequent refreshes without
5
+ requiring a full re-login.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import time
12
+ from dataclasses import dataclass, field
13
+ from pathlib import Path
14
+
15
+ from kanibako.log import get_logger
16
+
17
+ logger = get_logger("browser_state")
18
+
19
+
20
+ @dataclass
21
+ class BrowserState:
22
+ """Persistent browser context for OAuth session reuse.
23
+
24
+ Serialized as JSON at ``{data_path}/browser-state/context.json``.
25
+ """
26
+
27
+ cookies: list[dict] = field(default_factory=list)
28
+ origins: list[dict] = field(default_factory=list) # localStorage per origin
29
+ updated_at: float = 0.0
30
+
31
+ def is_fresh(self, max_age_days: float = 30.0) -> bool:
32
+ """Check if the stored state is recent enough to be useful."""
33
+ if not self.cookies:
34
+ return False
35
+ age = time.time() - self.updated_at
36
+ return age < max_age_days * 86400
37
+
38
+
39
+ def state_path(data_path: Path) -> Path:
40
+ """Return the browser state file path."""
41
+ return data_path / "browser-state" / "context.json"
42
+
43
+
44
+ def load_state(data_path: Path) -> BrowserState:
45
+ """Load browser state from disk. Returns empty state on missing/corrupt file."""
46
+ path = state_path(data_path)
47
+ if not path.is_file():
48
+ return BrowserState()
49
+
50
+ try:
51
+ with open(path) as f:
52
+ data = json.load(f)
53
+ if not isinstance(data, dict):
54
+ return BrowserState()
55
+ return BrowserState(
56
+ cookies=data.get("cookies", []),
57
+ origins=data.get("origins", []),
58
+ updated_at=float(data.get("updated_at", 0)),
59
+ )
60
+ except (json.JSONDecodeError, OSError, ValueError) as exc:
61
+ logger.warning("Failed to load browser state: %s", exc)
62
+ return BrowserState()
63
+
64
+
65
+ def save_state(data_path: Path, state: BrowserState) -> None:
66
+ """Persist browser state to disk."""
67
+ path = state_path(data_path)
68
+ path.parent.mkdir(parents=True, exist_ok=True)
69
+
70
+ state.updated_at = time.time()
71
+ data = {
72
+ "cookies": state.cookies,
73
+ "origins": state.origins,
74
+ "updated_at": state.updated_at,
75
+ }
76
+ with open(path, "w") as f:
77
+ json.dump(data, f, indent=2)
78
+ logger.debug("Saved browser state: %d cookies", len(state.cookies))
79
+
80
+
81
+ def clear_state(data_path: Path) -> None:
82
+ """Remove stored browser state (e.g. on logout or credential invalidation)."""
83
+ path = state_path(data_path)
84
+ if path.is_file():
85
+ path.unlink()
86
+ logger.debug("Cleared browser state")
87
+
88
+
89
+ def to_playwright_context(state: BrowserState) -> dict:
90
+ """Convert BrowserState to Playwright's storageState format."""
91
+ return {
92
+ "cookies": state.cookies,
93
+ "origins": state.origins,
94
+ }
95
+
96
+
97
+ def from_playwright_context(context: dict) -> BrowserState:
98
+ """Create BrowserState from Playwright's storageState output."""
99
+ return BrowserState(
100
+ cookies=context.get("cookies", []),
101
+ origins=context.get("origins", []),
102
+ updated_at=time.time(),
103
+ )