cookiesync-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cookiesync/__init__.py +3 -0
- cookiesync/__main__.py +6 -0
- cookiesync/cli.py +339 -0
- cookiesync/cookie/__init__.py +20 -0
- cookiesync/cookie/backend.py +100 -0
- cookiesync/cookie/browsers.py +57 -0
- cookiesync/cookie/consent.py +128 -0
- cookiesync/cookie/crypto.py +85 -0
- cookiesync/cookie/domains.py +38 -0
- cookiesync/cookie/getcookie.py +113 -0
- cookiesync/cookie/merge.py +74 -0
- cookiesync/cookie/models.py +90 -0
- cookiesync/cookie/pipeline.py +101 -0
- cookiesync/cookie/serialize.py +132 -0
- cookiesync/cookie/stores.py +218 -0
- cookiesync/daemon/__init__.py +13 -0
- cookiesync/daemon/backend_ssh.py +70 -0
- cookiesync/daemon/cache.py +113 -0
- cookiesync/daemon/engine.py +195 -0
- cookiesync/daemon/rpc.py +153 -0
- cookiesync/daemon/server.py +378 -0
- cookiesync/daemon/session.py +117 -0
- cookiesync/daemon/sync.py +241 -0
- cookiesync/daemon/wire.py +90 -0
- cookiesync/helper.py +112 -0
- cookiesync/paths.py +87 -0
- cookiesync/py.typed +0 -0
- cookiesync/registry.py +79 -0
- cookiesync/service.py +214 -0
- cookiesync/state.py +173 -0
- cookiesync/transport.py +108 -0
- cookiesync_cli-0.1.0.dist-info/METADATA +120 -0
- cookiesync_cli-0.1.0.dist-info/RECORD +36 -0
- cookiesync_cli-0.1.0.dist-info/WHEEL +4 -0
- cookiesync_cli-0.1.0.dist-info/entry_points.txt +3 -0
- cookiesync_cli-0.1.0.dist-info/licenses/LICENSE +133 -0
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Chrome macOS cookie encryption and decryption (the ``v10`` scheme).
|
|
2
|
+
|
|
3
|
+
key = PBKDF2-HMAC-SHA1(safe_storage_password, b"saltysalt", 1003, dklen=16)
|
|
4
|
+
value = AES-128-CBC(key, iv=16x 0x20) over the ciphertext, PKCS7-(un)padded, with a
|
|
5
|
+
32-byte SHA256(host_key) domain-hash prefix Chrome v24+ prepends — verified on
|
|
6
|
+
decrypt (a hash mismatch is a wrong key, not garbage), and committed on encrypt
|
|
7
|
+
against the exact stored ``host_key`` (leading dot included).
|
|
8
|
+
|
|
9
|
+
``v20`` (app-bound) values cannot be (de)crypted with the Safe Storage key and are rejected.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import hashlib
|
|
15
|
+
|
|
16
|
+
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
|
17
|
+
|
|
18
|
+
from cookiesync.cookie.models import AesKey, HostKey, SafeStorageKey
|
|
19
|
+
|
|
20
|
+
SALT = b"saltysalt"
|
|
21
|
+
ITERATIONS = 1003
|
|
22
|
+
KEY_LENGTH = 16
|
|
23
|
+
IV = b"\x20" * 16
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class DecryptError(Exception):
|
|
27
|
+
"""A cookie value could not be decrypted (v20, malformed, or wrong key)."""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def derive_key(password: SafeStorageKey) -> AesKey:
|
|
31
|
+
"""Derive the 16-byte AES key from the raw 'Safe Storage' password."""
|
|
32
|
+
return AesKey(hashlib.pbkdf2_hmac("sha1", password.encode("utf-8"), SALT, ITERATIONS, dklen=KEY_LENGTH))
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def pkcs7_pad(data: bytes) -> bytes:
|
|
36
|
+
pad = 16 - len(data) % 16
|
|
37
|
+
return data + bytes([pad]) * pad
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def pkcs7_unpad(data: bytes) -> bytes:
|
|
41
|
+
if not data:
|
|
42
|
+
raise DecryptError("empty plaintext")
|
|
43
|
+
pad = data[-1]
|
|
44
|
+
if pad < 1 or pad > 16 or pad > len(data):
|
|
45
|
+
raise DecryptError(f"bad PKCS7 padding length {pad}")
|
|
46
|
+
if data[-pad:] != bytes([pad]) * pad:
|
|
47
|
+
raise DecryptError("inconsistent PKCS7 padding")
|
|
48
|
+
return data[:-pad]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def domain_hash(host_key: HostKey) -> bytes:
|
|
52
|
+
return hashlib.sha256(host_key.encode("utf-8")).digest()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def decrypt_value(encrypted: bytes, key: AesKey, host_key: HostKey) -> str:
|
|
56
|
+
"""Decrypt one Chrome cookie ``encrypted_value``. Raise ``DecryptError`` on failure."""
|
|
57
|
+
if not encrypted:
|
|
58
|
+
return ""
|
|
59
|
+
match encrypted[:3]:
|
|
60
|
+
case b"v20":
|
|
61
|
+
raise DecryptError("v20 app-bound cookie (not decryptable with the Safe Storage key)")
|
|
62
|
+
case b"v10":
|
|
63
|
+
ciphertext = encrypted[3:]
|
|
64
|
+
case _:
|
|
65
|
+
try:
|
|
66
|
+
return encrypted.decode("utf-8")
|
|
67
|
+
except UnicodeDecodeError as exc:
|
|
68
|
+
raise DecryptError("unrecognized cookie encoding") from exc
|
|
69
|
+
if not ciphertext or len(ciphertext) % 16 != 0:
|
|
70
|
+
raise DecryptError("ciphertext is not a positive multiple of the block size")
|
|
71
|
+
decryptor = Cipher(algorithms.AES(key), modes.CBC(IV)).decryptor()
|
|
72
|
+
plain = pkcs7_unpad(decryptor.update(ciphertext) + decryptor.finalize())
|
|
73
|
+
if len(plain) < 32 or plain[:32] not in {domain_hash(host_key), domain_hash(HostKey(host_key.lstrip(".")))}:
|
|
74
|
+
raise DecryptError("domain-hash prefix mismatch (wrong key)")
|
|
75
|
+
try:
|
|
76
|
+
return plain[32:].decode("utf-8")
|
|
77
|
+
except UnicodeDecodeError as exc:
|
|
78
|
+
raise DecryptError("decrypted value is not valid UTF-8 (likely wrong key)") from exc
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def encrypt_value(plaintext: str, key: AesKey, host_key: HostKey) -> bytes:
|
|
82
|
+
"""Encrypt one cookie value into Chrome's ``v10`` blob, committing to the exact ``host_key``."""
|
|
83
|
+
encryptor = Cipher(algorithms.AES(key), modes.CBC(IV)).encryptor()
|
|
84
|
+
block = pkcs7_pad(domain_hash(host_key) + plaintext.encode("utf-8"))
|
|
85
|
+
return b"v10" + encryptor.update(block) + encryptor.finalize()
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Host parsing and the cookie send-rule: what the browser would send to a host.
|
|
2
|
+
|
|
3
|
+
No public-suffix list: ``cookie_applies`` implements the actual domain-match the
|
|
4
|
+
browser uses, which is all we need to pick the cookies for one target host.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from cookiesync.cookie.models import Host, HostKey
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def normalize_host(url: str) -> Host:
|
|
13
|
+
"""Lowercase bare host from a URL or domain (strip scheme, path, query, port, leading dot)."""
|
|
14
|
+
v = url.strip().lower()
|
|
15
|
+
if "://" in v:
|
|
16
|
+
v = v.split("://", 1)[1]
|
|
17
|
+
v = v.split("/", 1)[0].split("?", 1)[0]
|
|
18
|
+
if "@" in v:
|
|
19
|
+
v = v.split("@", 1)[1]
|
|
20
|
+
if ":" in v:
|
|
21
|
+
v = v.split(":", 1)[0]
|
|
22
|
+
return Host(v.strip("."))
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def url_scheme(url: str, *, default: str = "https") -> str:
|
|
26
|
+
"""Scheme of a URL, or ``default`` for a bare domain."""
|
|
27
|
+
return url.split("://", 1)[0].lower() if "://" in url else default
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def cookie_applies(host_key: HostKey, host: Host) -> bool:
|
|
31
|
+
"""Would a browser send a cookie with this ``host_key`` to ``host``?
|
|
32
|
+
|
|
33
|
+
Domain cookies (leading dot) match the base host and any subdomain; host-only
|
|
34
|
+
cookies match exactly. Mirrors the browser's own send rule.
|
|
35
|
+
"""
|
|
36
|
+
hk = host_key.lower()
|
|
37
|
+
rh = host.lower()
|
|
38
|
+
return (rh == hk[1:] or rh.endswith(hk)) if hk.startswith(".") else rh == hk
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
"""Cross-browser cookie fallback via ``@mherod/get-cookie``, swept across every browser.
|
|
2
|
+
|
|
3
|
+
Used when Chrome self-decrypt finds nothing — the user is logged in via
|
|
4
|
+
Brave/Arc/Edge/Safari/Firefox, or the cookies are app-bound (v20). We deliberately
|
|
5
|
+
omit ``--browser`` so get-cookie queries every browser. The package is lazily
|
|
6
|
+
``bun add``-ed once into a persistent data dir (it needs the native better-sqlite3
|
|
7
|
+
module) and reused; ``bunx`` is the last-resort path.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import os
|
|
14
|
+
import shutil
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import TYPE_CHECKING
|
|
17
|
+
|
|
18
|
+
import anyio
|
|
19
|
+
|
|
20
|
+
from cookiesync.cookie.models import Host
|
|
21
|
+
from cookiesync.cookie.serialize import normalize_getcookie_record
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from cookiesync.cookie.models import Cookie
|
|
25
|
+
|
|
26
|
+
GETCOOKIE_VERSION = "4.4.3"
|
|
27
|
+
PACKAGE = f"@mherod/get-cookie@{GETCOOKIE_VERSION}"
|
|
28
|
+
PACKAGE_JSON = '{"name":"cookiesync-getcookie-cache","private":true}\n'
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class GetCookieError(Exception):
|
|
32
|
+
"""The get-cookie fallback could not run or its output could not be parsed."""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def data_dir() -> Path:
|
|
36
|
+
"""Persistent cache dir for the lazily installed get-cookie package."""
|
|
37
|
+
return Path(os.environ.get("XDG_CACHE_HOME") or Path.home() / ".cache") / "cookiesync"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def cached_cli() -> Path | None:
|
|
41
|
+
cli = data_dir() / "node_modules" / "@mherod" / "get-cookie" / "dist" / "cli.cjs"
|
|
42
|
+
return cli if cli.is_file() else None
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
async def ensure_installed() -> Path | None:
|
|
46
|
+
"""Lazily ``bun add`` get-cookie into the data dir (builds better-sqlite3). Cached."""
|
|
47
|
+
if cli := cached_cli():
|
|
48
|
+
return cli
|
|
49
|
+
if not (bun := shutil.which("bun")):
|
|
50
|
+
return None
|
|
51
|
+
(data := data_dir()).mkdir(parents=True, exist_ok=True)
|
|
52
|
+
if not (pkg := data / "package.json").is_file():
|
|
53
|
+
pkg.write_text(PACKAGE_JSON)
|
|
54
|
+
await anyio.run_process([bun, "add", PACKAGE], cwd=str(data), check=True)
|
|
55
|
+
return cached_cli()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
async def command(host: Host) -> list[str]:
|
|
59
|
+
"""The argv that runs get-cookie for ``host`` across all browsers (cached CLI, else bunx)."""
|
|
60
|
+
match (await ensure_installed(), shutil.which("bun"), shutil.which("bunx")):
|
|
61
|
+
case (cli, bun, _) if cli and bun:
|
|
62
|
+
return [bun, str(cli), "%", host, "--output", "json"]
|
|
63
|
+
case (_, _, bunx) if bunx:
|
|
64
|
+
return [bunx, PACKAGE, "%", host, "--output", "json"]
|
|
65
|
+
case _:
|
|
66
|
+
raise GetCookieError("neither a cached get-cookie nor bun/bunx is available")
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def _decode_anywhere(text: str) -> object | None:
|
|
70
|
+
decoder = json.JSONDecoder()
|
|
71
|
+
for i, ch in enumerate(text):
|
|
72
|
+
if ch in "[{":
|
|
73
|
+
try:
|
|
74
|
+
return decoder.raw_decode(text, i)[0]
|
|
75
|
+
except json.JSONDecodeError:
|
|
76
|
+
continue
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def parse(stdout: str) -> list[dict]:
|
|
81
|
+
"""Parse get-cookie JSON, tolerating leading log noise before the JSON value.
|
|
82
|
+
|
|
83
|
+
Scans every ``[``/``{`` offset and returns the first that ``raw_decode``s, so log
|
|
84
|
+
lines that themselves contain brackets (e.g. ``[get-cookie] ...``) don't trip it.
|
|
85
|
+
"""
|
|
86
|
+
if not (out := stdout.strip()):
|
|
87
|
+
return []
|
|
88
|
+
match _decode_anywhere(out):
|
|
89
|
+
case None:
|
|
90
|
+
raise GetCookieError("could not parse get-cookie JSON output")
|
|
91
|
+
case data:
|
|
92
|
+
...
|
|
93
|
+
match data:
|
|
94
|
+
case {"cookies": list() as cookies}:
|
|
95
|
+
return cookies
|
|
96
|
+
case {"data": list() as records}:
|
|
97
|
+
return records
|
|
98
|
+
case dict():
|
|
99
|
+
return [data]
|
|
100
|
+
case list():
|
|
101
|
+
return data
|
|
102
|
+
case _:
|
|
103
|
+
return []
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
async def fetch_cookies(host: Host) -> list[Cookie]:
|
|
107
|
+
"""Sweep every browser for ``host`` via get-cookie and return decoded cookies.
|
|
108
|
+
|
|
109
|
+
Example:
|
|
110
|
+
>>> await fetch_cookies(Host("github.com"))
|
|
111
|
+
"""
|
|
112
|
+
proc = await anyio.run_process(await command(host), check=True)
|
|
113
|
+
return [normalize_getcookie_record(record, host) for record in parse(proc.stdout.decode("utf-8"))]
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Pure union merge of cookie sets across machines: newest-wins, no tombstones.
|
|
2
|
+
|
|
3
|
+
Per the product decision, deletions are union-only — a cookie absent from one source is
|
|
4
|
+
never treated as a delete, so the merge is a plain union keyed by the cookie's logical
|
|
5
|
+
identity. The key is the *schema-superset* uniqueness tuple
|
|
6
|
+
``(host_key, top_frame_site_key, name, path, source_scheme, source_port,
|
|
7
|
+
has_cross_site_ancestor)``; a ``Cookie`` from a v18 store (which lacks the last three
|
|
8
|
+
columns) already carries the model's sentinel defaults for them, so heterogeneous
|
|
9
|
+
v18/v24 cookies share one logical key space. Within a key, the winner is the max by
|
|
10
|
+
``(last_update_utc, content_hash)``: ``content_hash`` breaks a timestamp tie
|
|
11
|
+
deterministically from the cookie's value and flags, so the result is independent of
|
|
12
|
+
source order — and two cookies with identical content collapse to the same stored row.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import hashlib
|
|
18
|
+
from typing import TYPE_CHECKING
|
|
19
|
+
|
|
20
|
+
from cookiesync.cookie.models import Cookie
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from collections.abc import Iterable
|
|
24
|
+
|
|
25
|
+
MergeKey = tuple[str, str, str, str, int, int, int]
|
|
26
|
+
MergeRank = tuple[int, str]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def merge_key(cookie: Cookie) -> MergeKey:
|
|
30
|
+
return (
|
|
31
|
+
cookie.host_key,
|
|
32
|
+
cookie.top_frame_site_key,
|
|
33
|
+
cookie.name,
|
|
34
|
+
cookie.path,
|
|
35
|
+
cookie.source_scheme,
|
|
36
|
+
cookie.source_port,
|
|
37
|
+
cookie.has_cross_site_ancestor,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def content_hash(cookie: Cookie) -> str:
|
|
42
|
+
return hashlib.sha256(
|
|
43
|
+
"\x00".join(
|
|
44
|
+
(
|
|
45
|
+
cookie.value,
|
|
46
|
+
str(cookie.expires_utc),
|
|
47
|
+
str(cookie.samesite),
|
|
48
|
+
str(int(cookie.is_secure)),
|
|
49
|
+
str(int(cookie.is_httponly)),
|
|
50
|
+
)
|
|
51
|
+
).encode("utf-8")
|
|
52
|
+
).hexdigest()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def merge_rank(cookie: Cookie) -> MergeRank:
|
|
56
|
+
return (int(cookie.last_update_utc), content_hash(cookie))
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def merge(*sources: Iterable[Cookie]) -> tuple[Cookie, ...]:
|
|
60
|
+
"""Union all ``sources`` into one cookie set, keeping the newest per logical key.
|
|
61
|
+
|
|
62
|
+
Each cookie is keyed by its schema-superset uniqueness tuple; for each key the winner
|
|
63
|
+
is the cookie with the greatest ``(last_update_utc, content_hash)``, so the result is
|
|
64
|
+
deterministic regardless of source order. No tombstones: a cookie missing from a source
|
|
65
|
+
is never a deletion.
|
|
66
|
+
|
|
67
|
+
Example:
|
|
68
|
+
>>> merge(machine_a_cookies, machine_b_cookies)
|
|
69
|
+
"""
|
|
70
|
+
winners: dict[MergeKey, Cookie] = {}
|
|
71
|
+
for cookie in (c for source in sources for c in source):
|
|
72
|
+
if (key := merge_key(cookie)) not in winners or merge_rank(cookie) > merge_rank(winners[key]):
|
|
73
|
+
winners[key] = cookie
|
|
74
|
+
return tuple(winners.values())
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Cookie data model: branded primitives, the decrypted ``Cookie``, its raw DB row, and storage state.
|
|
2
|
+
|
|
3
|
+
Timestamps stay Chrome-native (``ChromeMicros``, µs since 1601) throughout the model;
|
|
4
|
+
conversion to Unix seconds and Playwright sameSite strings happens only at serialize time.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from typing import NewType
|
|
11
|
+
|
|
12
|
+
Host = NewType("Host", str)
|
|
13
|
+
HostKey = NewType("HostKey", str)
|
|
14
|
+
SafeStorageKey = NewType("SafeStorageKey", str)
|
|
15
|
+
AesKey = NewType("AesKey", bytes)
|
|
16
|
+
ChromeMicros = NewType("ChromeMicros", int)
|
|
17
|
+
|
|
18
|
+
WINDOWS_EPOCH_OFFSET = 11_644_473_600
|
|
19
|
+
|
|
20
|
+
SAMESITE_PLAYWRIGHT = {-1: "Lax", 0: "None", 1: "Lax", 2: "Strict"}
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def chrome_micros_to_unix(micros: ChromeMicros) -> float:
|
|
24
|
+
"""Chrome timestamp (µs since 1601) to Unix seconds, or ``-1`` for a session cookie."""
|
|
25
|
+
return -1 if micros <= 0 else micros / 1_000_000 - WINDOWS_EPOCH_OFFSET
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def unix_to_chrome_micros(seconds: float) -> ChromeMicros:
|
|
29
|
+
"""Unix seconds to a Chrome timestamp (µs since 1601)."""
|
|
30
|
+
return ChromeMicros(round((seconds + WINDOWS_EPOCH_OFFSET) * 1_000_000))
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def samesite_to_playwright(samesite: int) -> str:
|
|
34
|
+
"""Chrome-native sameSite int (-1/0/1/2) to the Playwright string."""
|
|
35
|
+
return SAMESITE_PLAYWRIGHT[samesite]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass(frozen=True, slots=True)
|
|
39
|
+
class Cookie:
|
|
40
|
+
"""One decrypted cookie, with Chrome-native column values.
|
|
41
|
+
|
|
42
|
+
Example:
|
|
43
|
+
>>> Cookie(HostKey(".x.com"), "sid", "abc", "/", ChromeMicros(0), ...)
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
host_key: HostKey
|
|
47
|
+
name: str
|
|
48
|
+
value: str
|
|
49
|
+
path: str
|
|
50
|
+
expires_utc: ChromeMicros
|
|
51
|
+
last_update_utc: ChromeMicros
|
|
52
|
+
creation_utc: ChromeMicros
|
|
53
|
+
is_secure: bool
|
|
54
|
+
is_httponly: bool
|
|
55
|
+
samesite: int
|
|
56
|
+
source_scheme: int = 2
|
|
57
|
+
source_port: int = 443
|
|
58
|
+
top_frame_site_key: str = ""
|
|
59
|
+
has_cross_site_ancestor: int = 0
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass(frozen=True, slots=True)
|
|
63
|
+
class EncryptedRow:
|
|
64
|
+
"""A raw, pre-decrypt cookie row straight off the Chrome SQLite store.
|
|
65
|
+
|
|
66
|
+
Carries both the ``encrypted_value`` blob and the legacy plaintext ``value`` column.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
host_key: HostKey
|
|
70
|
+
name: str
|
|
71
|
+
encrypted_value: bytes
|
|
72
|
+
value: str
|
|
73
|
+
path: str
|
|
74
|
+
expires_utc: ChromeMicros
|
|
75
|
+
last_update_utc: ChromeMicros
|
|
76
|
+
creation_utc: ChromeMicros
|
|
77
|
+
is_secure: bool
|
|
78
|
+
is_httponly: bool
|
|
79
|
+
samesite: int
|
|
80
|
+
source_scheme: int = 2
|
|
81
|
+
source_port: int = 443
|
|
82
|
+
top_frame_site_key: str = ""
|
|
83
|
+
has_cross_site_ancestor: int = 0
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
@dataclass(frozen=True, slots=True)
|
|
87
|
+
class StorageState:
|
|
88
|
+
"""A bundle of decrypted cookies, ready to seed a browser session."""
|
|
89
|
+
|
|
90
|
+
cookies: tuple[Cookie, ...]
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""The extract/apply orchestration over a ``CookieBackend``.
|
|
2
|
+
|
|
3
|
+
``extract`` decrypts a host's cookies from a backend with an already-obtained key — the
|
|
4
|
+
consent gate lives in the backend, not here, so ``extract`` is pure given its key. It
|
|
5
|
+
filters to the host (in the backend), decrypts each row, drops expired cookies, and falls
|
|
6
|
+
back to the cross-browser ``get-cookie`` sweep when self-decrypt yields nothing. ``apply``
|
|
7
|
+
writes a cookie set back through the backend.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import time
|
|
13
|
+
from typing import TYPE_CHECKING
|
|
14
|
+
|
|
15
|
+
from cookiesync.cookie import getcookie
|
|
16
|
+
from cookiesync.cookie.crypto import DecryptError, decrypt_value
|
|
17
|
+
from cookiesync.cookie.domains import normalize_host
|
|
18
|
+
from cookiesync.cookie.models import (
|
|
19
|
+
Cookie,
|
|
20
|
+
StorageState,
|
|
21
|
+
chrome_micros_to_unix,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from collections.abc import Sequence
|
|
26
|
+
|
|
27
|
+
from cookiesync.cookie.backend import CookieBackend
|
|
28
|
+
from cookiesync.cookie.browsers import Browser
|
|
29
|
+
from cookiesync.cookie.models import AesKey, EncryptedRow
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _decrypt_row(row: EncryptedRow, key: AesKey, counts: dict[str, int]) -> Cookie | None:
|
|
33
|
+
try:
|
|
34
|
+
value = decrypt_value(row.encrypted_value, key, row.host_key)
|
|
35
|
+
except DecryptError as exc:
|
|
36
|
+
counts["v20" if "v20" in str(exc) else "failed"] += 1
|
|
37
|
+
return None
|
|
38
|
+
return Cookie(
|
|
39
|
+
host_key=row.host_key,
|
|
40
|
+
name=row.name,
|
|
41
|
+
value=value,
|
|
42
|
+
path=row.path,
|
|
43
|
+
expires_utc=row.expires_utc,
|
|
44
|
+
last_update_utc=row.last_update_utc,
|
|
45
|
+
creation_utc=row.creation_utc,
|
|
46
|
+
is_secure=row.is_secure,
|
|
47
|
+
is_httponly=row.is_httponly,
|
|
48
|
+
samesite=row.samesite,
|
|
49
|
+
source_scheme=row.source_scheme,
|
|
50
|
+
source_port=row.source_port,
|
|
51
|
+
top_frame_site_key=row.top_frame_site_key,
|
|
52
|
+
has_cross_site_ancestor=row.has_cross_site_ancestor,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _is_live(cookie: Cookie, *, now: float, include_expired: bool) -> bool:
|
|
57
|
+
return include_expired or (expires := chrome_micros_to_unix(cookie.expires_utc)) == -1 or expires >= now
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
async def extract(
|
|
61
|
+
url: str,
|
|
62
|
+
*,
|
|
63
|
+
browser: Browser,
|
|
64
|
+
key: AesKey,
|
|
65
|
+
backend: CookieBackend,
|
|
66
|
+
profile: str | None = None,
|
|
67
|
+
include_expired: bool = False,
|
|
68
|
+
fallback: bool = True,
|
|
69
|
+
) -> StorageState:
|
|
70
|
+
"""Decrypt a host's cookies via ``backend`` with an already-obtained ``key``.
|
|
71
|
+
|
|
72
|
+
The consent gate is the caller's responsibility — ``key`` is passed in, never obtained
|
|
73
|
+
here. Rows are read and host-filtered by the backend, decrypted with ``key`` (``v20``
|
|
74
|
+
app-bound and undecryptable rows are skipped), and expired cookies are dropped unless
|
|
75
|
+
``include_expired``. When self-decrypt yields nothing and ``fallback`` is set, the
|
|
76
|
+
cross-browser ``get-cookie`` sweep runs instead.
|
|
77
|
+
|
|
78
|
+
Example:
|
|
79
|
+
>>> await extract("https://x.com", browser=chrome, key=key, backend=backend)
|
|
80
|
+
"""
|
|
81
|
+
host = normalize_host(url)
|
|
82
|
+
counts = {"v20": 0, "failed": 0}
|
|
83
|
+
now = time.time()
|
|
84
|
+
cookies = tuple(
|
|
85
|
+
cookie
|
|
86
|
+
for row in await backend.read_rows(browser, host, profile=profile)
|
|
87
|
+
if (cookie := _decrypt_row(row, key, counts)) is not None
|
|
88
|
+
and _is_live(cookie, now=now, include_expired=include_expired)
|
|
89
|
+
)
|
|
90
|
+
if not cookies and fallback:
|
|
91
|
+
return StorageState(tuple(await getcookie.fetch_cookies(host)))
|
|
92
|
+
return StorageState(cookies)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
async def apply(cookies: Sequence[Cookie], *, browser: Browser, key: AesKey, backend: CookieBackend) -> int:
|
|
96
|
+
"""Write ``cookies`` back through ``backend``, returning the number of rows written.
|
|
97
|
+
|
|
98
|
+
Example:
|
|
99
|
+
>>> await apply(state.cookies, browser=chrome, key=key, backend=backend)
|
|
100
|
+
"""
|
|
101
|
+
return await backend.write_rows(browser, cookies, key)
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""Render a ``StorageState`` to stdout in one of four cookie wire formats.
|
|
2
|
+
|
|
3
|
+
The Playwright/agent-browser format is the load-bearing one: ``agent-browser
|
|
4
|
+
--state -`` consumes the standard ``{"cookies": [...], "origins": []}`` storageState
|
|
5
|
+
shape. We only carry cookies (the local store has no localStorage), so ``origins``
|
|
6
|
+
is always empty. The other formats serve cookies.txt (netscape), a ``Cookie:``
|
|
7
|
+
request header, and a raw JSON array of the same per-cookie dicts.
|
|
8
|
+
|
|
9
|
+
``render`` yields lines so the caller can stream straight to stdout.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
from enum import StrEnum
|
|
16
|
+
from typing import TYPE_CHECKING
|
|
17
|
+
|
|
18
|
+
from cookiesync.cookie.domains import normalize_host, url_scheme
|
|
19
|
+
from cookiesync.cookie.models import (
|
|
20
|
+
ChromeMicros,
|
|
21
|
+
Cookie,
|
|
22
|
+
HostKey,
|
|
23
|
+
chrome_micros_to_unix,
|
|
24
|
+
samesite_to_playwright,
|
|
25
|
+
unix_to_chrome_micros,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from collections.abc import Iterator
|
|
30
|
+
|
|
31
|
+
from cookiesync.cookie.models import StorageState
|
|
32
|
+
|
|
33
|
+
SAMESITE_GETCOOKIE = {"strict": 2, "lax": 1, "none": 0}
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class OutputFormat(StrEnum):
|
|
37
|
+
"""The wire format ``render`` emits a ``StorageState`` in.
|
|
38
|
+
|
|
39
|
+
Example:
|
|
40
|
+
>>> "".join(render(state, OutputFormat.PLAYWRIGHT))
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
PLAYWRIGHT = "playwright"
|
|
44
|
+
NETSCAPE = "netscape"
|
|
45
|
+
HEADER = "header"
|
|
46
|
+
JSON = "json"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def playwright_cookie(cookie: Cookie) -> dict:
|
|
50
|
+
"""One Playwright-shaped cookie dict from a Chrome-native ``Cookie``.
|
|
51
|
+
|
|
52
|
+
``sameSite=None`` forces ``secure`` true, since browsers reject the pair otherwise.
|
|
53
|
+
"""
|
|
54
|
+
same = samesite_to_playwright(cookie.samesite)
|
|
55
|
+
return {
|
|
56
|
+
"name": cookie.name,
|
|
57
|
+
"value": cookie.value,
|
|
58
|
+
"domain": cookie.host_key,
|
|
59
|
+
"path": cookie.path,
|
|
60
|
+
"expires": chrome_micros_to_unix(cookie.expires_utc),
|
|
61
|
+
"httpOnly": cookie.is_httponly,
|
|
62
|
+
"secure": cookie.is_secure or same == "None",
|
|
63
|
+
"sameSite": same,
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def netscape_line(cookie: Cookie) -> str:
|
|
68
|
+
"""One cookies.txt row: tab-separated, with the leading-dot subdomain flag."""
|
|
69
|
+
include_subdomains = cookie.host_key.startswith(".")
|
|
70
|
+
expires = chrome_micros_to_unix(cookie.expires_utc)
|
|
71
|
+
return "\t".join(
|
|
72
|
+
(
|
|
73
|
+
cookie.host_key,
|
|
74
|
+
"TRUE" if include_subdomains else "FALSE",
|
|
75
|
+
cookie.path,
|
|
76
|
+
"TRUE" if cookie.is_secure else "FALSE",
|
|
77
|
+
str(0 if expires < 0 else int(expires)),
|
|
78
|
+
cookie.name,
|
|
79
|
+
cookie.value,
|
|
80
|
+
)
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def render(state: StorageState, fmt: OutputFormat) -> Iterator[str]:
|
|
85
|
+
"""Yield the lines of ``state`` rendered in ``fmt``, ready to stream to stdout."""
|
|
86
|
+
match fmt:
|
|
87
|
+
case OutputFormat.PLAYWRIGHT:
|
|
88
|
+
yield json.dumps({"cookies": [playwright_cookie(c) for c in state.cookies], "origins": []})
|
|
89
|
+
case OutputFormat.JSON:
|
|
90
|
+
yield json.dumps([playwright_cookie(c) for c in state.cookies])
|
|
91
|
+
case OutputFormat.NETSCAPE:
|
|
92
|
+
yield "# Netscape HTTP Cookie File"
|
|
93
|
+
yield from (netscape_line(c) for c in state.cookies)
|
|
94
|
+
case OutputFormat.HEADER:
|
|
95
|
+
yield "; ".join(f"{c.name}={c.value}" for c in state.cookies)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def normalize_getcookie_record(record: dict, url: str) -> Cookie:
|
|
99
|
+
"""Map one ``@mherod/get-cookie`` JSON record into the ``Cookie`` model.
|
|
100
|
+
|
|
101
|
+
get-cookie reliably emits name/value/domain; the rest varies, so path defaults
|
|
102
|
+
to ``/``, secure follows the URL scheme, and attributes come from a ``meta``
|
|
103
|
+
block when present. A session cookie (no expiry) lands at ``ChromeMicros(0)``.
|
|
104
|
+
"""
|
|
105
|
+
meta = record.get("meta") or {}
|
|
106
|
+
host = normalize_host(url)
|
|
107
|
+
host_key = HostKey(record.get("domain") or host)
|
|
108
|
+
return Cookie(
|
|
109
|
+
host_key=host_key,
|
|
110
|
+
name=record["name"],
|
|
111
|
+
value=record["value"],
|
|
112
|
+
path=record.get("path") or "/",
|
|
113
|
+
expires_utc=_record_expiry(record),
|
|
114
|
+
last_update_utc=ChromeMicros(0),
|
|
115
|
+
creation_utc=ChromeMicros(0),
|
|
116
|
+
is_secure=bool(meta.get("secure", url_scheme(url) == "https")),
|
|
117
|
+
is_httponly=bool(meta.get("httpOnly", meta.get("httponly", False))),
|
|
118
|
+
samesite=SAMESITE_GETCOOKIE.get(str(meta.get("sameSite") or meta.get("samesite") or "lax").lower(), 1),
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def _record_expiry(record: dict) -> ChromeMicros:
|
|
123
|
+
raw = record.get("expiry", record.get("expires"))
|
|
124
|
+
match raw:
|
|
125
|
+
case bool():
|
|
126
|
+
return ChromeMicros(0)
|
|
127
|
+
case int() | float():
|
|
128
|
+
return unix_to_chrome_micros(float(raw))
|
|
129
|
+
case str() if raw.strip().lstrip("-").isdigit():
|
|
130
|
+
return unix_to_chrome_micros(float(raw))
|
|
131
|
+
case _:
|
|
132
|
+
return ChromeMicros(0)
|