durable-sync 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- durable_sync/__init__.py +26 -0
- durable_sync/activities.py +156 -0
- durable_sync/auth/__init__.py +8 -0
- durable_sync/auth/oauth/__init__.py +18 -0
- durable_sync/auth/oauth/flow.py +183 -0
- durable_sync/auth/oauth/refresh.py +58 -0
- durable_sync/auth/oauth/store.py +36 -0
- durable_sync/auth/oauth/token.py +36 -0
- durable_sync/auth/oauth/workflow.py +172 -0
- durable_sync/bootstrap.py +44 -0
- durable_sync/codec.py +80 -0
- durable_sync/config.py +35 -0
- durable_sync/connectors/__init__.py +14 -0
- durable_sync/connectors/asana/__init__.py +13 -0
- durable_sync/connectors/asana/destination.py +213 -0
- durable_sync/connectors/content.py +80 -0
- durable_sync/connectors/contentful/__init__.py +25 -0
- durable_sync/connectors/contentful/api.py +285 -0
- durable_sync/connectors/contentful/bootstrap.py +102 -0
- durable_sync/connectors/contentful/describe.py +61 -0
- durable_sync/connectors/contentful/destination.py +145 -0
- durable_sync/connectors/contentful/encode.py +49 -0
- durable_sync/connectors/contentful/introspect.py +69 -0
- durable_sync/connectors/contentful/mcp.py +95 -0
- durable_sync/connectors/contentful/mcp_destination.py +137 -0
- durable_sync/connectors/contentful/oauth.py +27 -0
- durable_sync/connectors/contentful/prove.py +51 -0
- durable_sync/connectors/contentful/source.py +192 -0
- durable_sync/connectors/contentful/start.py +46 -0
- durable_sync/connectors/contentful/store.py +25 -0
- durable_sync/connectors/contentful/token.py +13 -0
- durable_sync/connectors/contentful/token_check.py +42 -0
- durable_sync/connectors/github/__init__.py +33 -0
- durable_sync/connectors/github/api.py +169 -0
- durable_sync/connectors/github/source.py +230 -0
- durable_sync/connectors/luma/__init__.py +20 -0
- durable_sync/connectors/luma/api.py +121 -0
- durable_sync/connectors/luma/destination.py +128 -0
- durable_sync/connectors/luma/source.py +155 -0
- durable_sync/connectors/multi.py +78 -0
- durable_sync/connectors/notion/__init__.py +20 -0
- durable_sync/connectors/notion/bootstrap.py +97 -0
- durable_sync/connectors/notion/client.py +133 -0
- durable_sync/connectors/notion/destination.py +270 -0
- durable_sync/connectors/notion/oauth.py +25 -0
- durable_sync/connectors/notion/prove.py +57 -0
- durable_sync/connectors/notion/source.py +136 -0
- durable_sync/connectors/notion/start.py +46 -0
- durable_sync/connectors/notion/store.py +25 -0
- durable_sync/connectors/notion/token.py +13 -0
- durable_sync/connectors/youtube/__init__.py +13 -0
- durable_sync/connectors/youtube/api.py +122 -0
- durable_sync/connectors/youtube/source.py +152 -0
- durable_sync/core.py +210 -0
- durable_sync/env.py +55 -0
- durable_sync/http.py +71 -0
- durable_sync/linkstore.py +88 -0
- durable_sync/route.py +86 -0
- durable_sync/temporal_client.py +48 -0
- durable_sync/transport/__init__.py +12 -0
- durable_sync/transport/mcp.py +77 -0
- durable_sync/worker.py +109 -0
- durable_sync/workflows/__init__.py +9 -0
- durable_sync/workflows/sync.py +208 -0
- durable_sync-0.1.0.dist-info/METADATA +310 -0
- durable_sync-0.1.0.dist-info/RECORD +69 -0
- durable_sync-0.1.0.dist-info/WHEEL +5 -0
- durable_sync-0.1.0.dist-info/licenses/LICENSE +21 -0
- durable_sync-0.1.0.dist-info/top_level.txt +1 -0
durable_sync/__init__.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""durable-sync: durable, idempotent source -> destination sync on Temporal.
|
|
2
|
+
|
|
3
|
+
Public API — implement `Source` for your data, `Destination` for your target;
|
|
4
|
+
the spine (entity workflow, idempotent upsert, OAuth refresh, backoff) is
|
|
5
|
+
inherited. See `connectors/` (one subpackage per system — GitHub/Luma/YouTube/
|
|
6
|
+
Contentful sources, Notion/Asana destinations) for reference implementations.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from durable_sync.core import (
|
|
11
|
+
Destination,
|
|
12
|
+
DestinationSession,
|
|
13
|
+
Record,
|
|
14
|
+
Source,
|
|
15
|
+
SourceSpec,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"Record",
|
|
20
|
+
"SourceSpec",
|
|
21
|
+
"Source",
|
|
22
|
+
"Destination",
|
|
23
|
+
"DestinationSession",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
__version__ = "0.1.0"
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""Generic, source/destination-agnostic activities.
|
|
2
|
+
|
|
3
|
+
A library can't hardcode `from pipeline import SOURCE, DESTINATION` the way a
|
|
4
|
+
single app would, so the activities are produced by a FACTORY the app calls once
|
|
5
|
+
with its wired Source + Destination:
|
|
6
|
+
|
|
7
|
+
worker = Worker(..., activities=make_activities(SOURCE, DESTINATION))
|
|
8
|
+
|
|
9
|
+
The activities are registered under stable string names (FETCH_SOURCE /
|
|
10
|
+
SYNC_RECORDS); the workflow refers to them by those names, so it never imports
|
|
11
|
+
these closures and stays sandbox-clean.
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import datetime as dt
|
|
16
|
+
import inspect
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from typing import Awaitable, Callable, Union
|
|
19
|
+
|
|
20
|
+
from temporalio import activity
|
|
21
|
+
from temporalio.exceptions import ApplicationError
|
|
22
|
+
|
|
23
|
+
from durable_sync.core import Destination, Record, Source, SourceSpec
|
|
24
|
+
|
|
25
|
+
# Stable activity names — the workflow executes by name (see workflows/sync.py).
|
|
26
|
+
FETCH_SOURCE = "fetch_source"
|
|
27
|
+
SYNC_RECORDS = "sync_records"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class FetchPage:
|
|
32
|
+
"""One page of fetched+transformed records, plus the cursor for the NEXT page
|
|
33
|
+
(None on the last page). A Source that implements `fetch_page` lets the spine
|
|
34
|
+
bound Temporal history for arbitrarily large sources — neither the fetch result
|
|
35
|
+
nor the upsert payload is ever the whole dataset. A Source without it returns
|
|
36
|
+
everything as a single page (next_cursor=None), exactly as before."""
|
|
37
|
+
records: list[Record] = field(default_factory=list)
|
|
38
|
+
next_cursor: str | None = None
|
|
39
|
+
|
|
40
|
+
# The GENERIC transform seam: Record -> Record (mutate/derive/rename) or None
|
|
41
|
+
# (drop it — so transform doubles as a filter). Source- and destination-agnostic;
|
|
42
|
+
# may be sync or async. For transforms that need source internals use the source's
|
|
43
|
+
# enrich hook; for ones that read the destination use its session_enrich.
|
|
44
|
+
Transform = Callable[[Record], Union[Record, None, Awaitable[Union[Record, None]]]]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def make_activities(
|
|
48
|
+
source: Source, destination: Destination, *, transform: Transform | None = None
|
|
49
|
+
) -> list:
|
|
50
|
+
"""Build the two generic activities, closed over the app's Source +
|
|
51
|
+
Destination (+ optional generic transform). Returns a list ready to hand to a
|
|
52
|
+
Temporal Worker."""
|
|
53
|
+
|
|
54
|
+
async def _apply_transform(records: list[Record]) -> list[Record]:
|
|
55
|
+
if transform is None:
|
|
56
|
+
return records
|
|
57
|
+
out: list[Record] = []
|
|
58
|
+
for rec in records:
|
|
59
|
+
res = transform(rec)
|
|
60
|
+
if inspect.isawaitable(res):
|
|
61
|
+
res = await res
|
|
62
|
+
if res is not None:
|
|
63
|
+
out.append(res)
|
|
64
|
+
return out
|
|
65
|
+
|
|
66
|
+
@activity.defn(name=FETCH_SOURCE)
|
|
67
|
+
async def fetch_source(
|
|
68
|
+
spec: SourceSpec, only_items: list[str] | None = None, cursor: str | None = None
|
|
69
|
+
) -> FetchPage:
|
|
70
|
+
"""Fetch ONE page of a source unit (optionally just specific items), apply
|
|
71
|
+
the generic transform (which may drop records by returning None), and return
|
|
72
|
+
the records + the next-page cursor.
|
|
73
|
+
|
|
74
|
+
Adapts to the Source: if it implements `fetch_page(spec, only_items, cursor)
|
|
75
|
+
-> (records, next_cursor)` the spine paginates (bounded history for big
|
|
76
|
+
sources); otherwise it calls `fetch()` once and returns everything as a
|
|
77
|
+
single page (next_cursor=None) — unchanged behavior."""
|
|
78
|
+
fetch_page = getattr(source, "fetch_page", None)
|
|
79
|
+
if callable(fetch_page):
|
|
80
|
+
records, next_cursor = await fetch_page(spec, only_items, cursor)
|
|
81
|
+
else:
|
|
82
|
+
records = await source.fetch(spec, only_items)
|
|
83
|
+
next_cursor = None
|
|
84
|
+
return FetchPage(records=await _apply_transform(records), next_cursor=next_cursor)
|
|
85
|
+
|
|
86
|
+
@activity.defn(name=SYNC_RECORDS)
|
|
87
|
+
async def sync_records(records: list[Record]) -> dict:
|
|
88
|
+
"""Idempotent upsert into the Destination, keyed on each primary_key."""
|
|
89
|
+
if not destination.configured:
|
|
90
|
+
raise ApplicationError(
|
|
91
|
+
f"Destination is not configured ({destination.config_hint})",
|
|
92
|
+
type="ConfigError", non_retryable=True,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
synced_at = dt.datetime.now(dt.timezone.utc)
|
|
96
|
+
created = updated = skipped = 0
|
|
97
|
+
|
|
98
|
+
# Guard the idempotency key BEFORE any write (counts fold into `skipped`
|
|
99
|
+
# so created+updated+skipped == total still holds):
|
|
100
|
+
# * a falsy primary_key can't be idempotent — every keyless record
|
|
101
|
+
# collides on the same empty key (the first creates a row, the rest
|
|
102
|
+
# "update" that one row, or all overwrite one link). Drop them.
|
|
103
|
+
# * a duplicate primary_key WITHIN one batch would double-create:
|
|
104
|
+
# `existing` is queried once up front, so a 2nd occurrence is still
|
|
105
|
+
# "not existing" and creates a second row. Collapse to the LAST
|
|
106
|
+
# occurrence (freshest data) so the upsert stays idempotent.
|
|
107
|
+
deduped: dict[str, Record] = {}
|
|
108
|
+
for rec in records:
|
|
109
|
+
if not rec.primary_key:
|
|
110
|
+
skipped += 1
|
|
111
|
+
activity.logger.warning(
|
|
112
|
+
"Skipping record with empty primary_key (not idempotent): %r",
|
|
113
|
+
rec.properties,
|
|
114
|
+
)
|
|
115
|
+
continue
|
|
116
|
+
deduped[rec.primary_key] = rec
|
|
117
|
+
to_sync = list(deduped.values())
|
|
118
|
+
in_batch_dupes = (len(records) - skipped) - len(to_sync)
|
|
119
|
+
if in_batch_dupes:
|
|
120
|
+
skipped += in_batch_dupes
|
|
121
|
+
activity.logger.warning(
|
|
122
|
+
"Collapsed %d in-batch duplicate primary_key(s) (last-wins)", in_batch_dupes
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
async with destination.connect() as session:
|
|
127
|
+
existing = await session.query_existing_ids() # primary_key -> dest id
|
|
128
|
+
for rec in to_sync:
|
|
129
|
+
existing_id = existing.get(rec.primary_key)
|
|
130
|
+
if existing_id:
|
|
131
|
+
wrote = await session.update(existing_id, rec, synced_at)
|
|
132
|
+
updated += 1 if wrote else 0
|
|
133
|
+
else:
|
|
134
|
+
wrote = await session.create(rec, synced_at)
|
|
135
|
+
created += 1 if wrote else 0
|
|
136
|
+
skipped += 0 if wrote else 1 # dropped by a destination-side filter
|
|
137
|
+
activity.heartbeat(rec.primary_key)
|
|
138
|
+
except ApplicationError:
|
|
139
|
+
raise
|
|
140
|
+
except Exception as e:
|
|
141
|
+
# Auth failures are NOT retryable — only a human re-auth fixes them,
|
|
142
|
+
# so the workflow can pause instead of hammering a dead credential.
|
|
143
|
+
# Everything else stays retryable (transient).
|
|
144
|
+
if destination.is_auth_error(e):
|
|
145
|
+
raise ApplicationError(
|
|
146
|
+
"Destination authorization is no longer valid (token refresh "
|
|
147
|
+
"failed or was revoked). Re-authorize, then send `resume`.",
|
|
148
|
+
type="AuthError", non_retryable=True,
|
|
149
|
+
) from e
|
|
150
|
+
raise
|
|
151
|
+
|
|
152
|
+
stats = {"total": len(records), "created": created, "updated": updated, "skipped": skipped}
|
|
153
|
+
activity.logger.info("Sync complete: %s", stats)
|
|
154
|
+
return stats
|
|
155
|
+
|
|
156
|
+
return [fetch_source, sync_records]
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""Authentication mechanisms for destinations.
|
|
2
|
+
|
|
3
|
+
A cross-cutting toolkit (not a connector), organized by mechanism. Today there's
|
|
4
|
+
one: `oauth/` — the OAuth-as-a-workflow toolkit (token-owner workflow + flow +
|
|
5
|
+
store) for no-admin providers. Mechanisms that need NO shared code (e.g. a
|
|
6
|
+
self-serve PAT, which is a one-liner) live inline in their connector, so they get
|
|
7
|
+
no package here until there's something to share.
|
|
8
|
+
"""
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Generic OAuth-as-a-workflow toolkit (provider-agnostic).
|
|
2
|
+
|
|
3
|
+
For destinations whose API offers no admin-free static token: authorize as an
|
|
4
|
+
individual via OAuth 2.1 + PKCE + dynamic client registration, then let a Temporal
|
|
5
|
+
workflow OWN the rotating refresh token — refreshing on a timer and serving fresh
|
|
6
|
+
access tokens via query (so the secret never enters event history). Standards-based
|
|
7
|
+
(RFC 8414 discovery, RFC 7591 dynamic registration, PKCE).
|
|
8
|
+
|
|
9
|
+
Import from the SUBMODULES, not this package. This __init__ deliberately imports
|
|
10
|
+
nothing: `flow` pulls in `requests`, and the Temporal workflow sandbox forbids
|
|
11
|
+
that — so it must only ever be loaded via the workflow's pass-through import, never
|
|
12
|
+
eagerly here (an eager re-export here breaks `OAuthTokenWorkflow` sandbox validation).
|
|
13
|
+
|
|
14
|
+
from durable_sync.auth.oauth.workflow import OAuthTokenWorkflow, AuthParams
|
|
15
|
+
from durable_sync.auth.oauth.refresh import refresh_oauth_token
|
|
16
|
+
from durable_sync.auth.oauth.token import current_access_token
|
|
17
|
+
from durable_sync.auth.oauth import flow, store
|
|
18
|
+
"""
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
"""OAuth 2.1 (PKCE + dynamic client registration) — provider-agnostic HTTP
|
|
2
|
+
helpers. No Temporal, no browser, no file IO, no hardcoded provider: every
|
|
3
|
+
endpoint is passed in (discover() takes the server base URL). Reusable from an
|
|
4
|
+
interactive bootstrap AND from the refresh activity.
|
|
5
|
+
|
|
6
|
+
Public clients (token_endpoint_auth_method="none"): no client secret, PKCE
|
|
7
|
+
mandatory. Endpoints are discovered, not hardcoded, so this keeps working if a
|
|
8
|
+
provider moves them.
|
|
9
|
+
|
|
10
|
+
Deliberately NOT the MCP SDK's OAuthClientProvider: we own the token lifecycle
|
|
11
|
+
(the auth workflow does) and pass a plain Bearer header to the transport, which
|
|
12
|
+
sidesteps that SDK's cross-version auth API churn.
|
|
13
|
+
"""
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import base64
|
|
17
|
+
import hashlib
|
|
18
|
+
import os
|
|
19
|
+
import secrets
|
|
20
|
+
from typing import Any
|
|
21
|
+
from urllib.parse import urlsplit
|
|
22
|
+
|
|
23
|
+
import requests
|
|
24
|
+
|
|
25
|
+
_TIMEOUT = 30
|
|
26
|
+
DEFAULT_CLIENT_NAME = "durable-sync"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _registrable_domain(host: str) -> str:
|
|
30
|
+
"""Last two labels of a host (heuristic, no PSL): notion.com, contentful.com.
|
|
31
|
+
Good enough to pin discovered OAuth endpoints to the provider's own domain;
|
|
32
|
+
the hard guarantee is the https check in _validate_endpoint."""
|
|
33
|
+
labels = host.split(".")
|
|
34
|
+
return ".".join(labels[-2:]) if len(labels) >= 2 else host
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def _validate_endpoint(url: str, base_url: str, *, same_site: bool) -> str:
|
|
38
|
+
"""Reject a discovered OAuth endpoint that could exfiltrate the refresh token.
|
|
39
|
+
|
|
40
|
+
We POST the refresh token to whatever the discovery documents name, on every
|
|
41
|
+
refresh, unattended — so a tampered/compromised discovery response must not be
|
|
42
|
+
able to point us at an attacker host. Enforce https always; when `same_site`
|
|
43
|
+
(the default), also require the same registrable domain as the pinned base URL.
|
|
44
|
+
Providers whose authorization server is on a different domain pass same_site=False."""
|
|
45
|
+
parts = urlsplit(url)
|
|
46
|
+
if parts.scheme != "https":
|
|
47
|
+
raise ValueError(f"Refusing non-https OAuth endpoint from discovery: {url!r}")
|
|
48
|
+
if same_site:
|
|
49
|
+
base_host = urlsplit(base_url).hostname or ""
|
|
50
|
+
host = parts.hostname or ""
|
|
51
|
+
if _registrable_domain(host) != _registrable_domain(base_host):
|
|
52
|
+
raise ValueError(
|
|
53
|
+
f"Discovered OAuth endpoint {host!r} is off-domain from {base_host!r}; "
|
|
54
|
+
f"refusing (pass same_site=False if this provider's auth server is "
|
|
55
|
+
f"intentionally on another domain)."
|
|
56
|
+
)
|
|
57
|
+
return url
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def discover(base_url: str, *, same_site: bool = True) -> dict[str, str]:
|
|
61
|
+
"""Two-step OAuth discovery (RFC 9728 protected-resource -> RFC 8414 AS
|
|
62
|
+
metadata) against `base_url`. Returns authorization/token/registration
|
|
63
|
+
endpoints. Every discovered endpoint is validated (https + same-domain) before
|
|
64
|
+
return, because the token endpoint later receives the refresh token unattended
|
|
65
|
+
(see _validate_endpoint)."""
|
|
66
|
+
pr = requests.get(f"{base_url}/.well-known/oauth-protected-resource", timeout=_TIMEOUT)
|
|
67
|
+
pr.raise_for_status()
|
|
68
|
+
auth_server = _validate_endpoint(
|
|
69
|
+
pr.json()["authorization_servers"][0], base_url, same_site=same_site
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
md = requests.get(f"{auth_server}/.well-known/oauth-authorization-server", timeout=_TIMEOUT)
|
|
73
|
+
md.raise_for_status()
|
|
74
|
+
data = md.json()
|
|
75
|
+
return {
|
|
76
|
+
"authorization_endpoint": _validate_endpoint(data["authorization_endpoint"], base_url, same_site=same_site),
|
|
77
|
+
"token_endpoint": _validate_endpoint(data["token_endpoint"], base_url, same_site=same_site),
|
|
78
|
+
"registration_endpoint": _validate_endpoint(data["registration_endpoint"], base_url, same_site=same_site),
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def register_client(
|
|
83
|
+
registration_endpoint: str, redirect_uri: str, *, client_name: str = DEFAULT_CLIENT_NAME
|
|
84
|
+
) -> dict[str, Any]:
|
|
85
|
+
"""Dynamic Client Registration (RFC 7591) — no admin, no pre-approval."""
|
|
86
|
+
resp = requests.post(
|
|
87
|
+
registration_endpoint,
|
|
88
|
+
json={
|
|
89
|
+
"client_name": client_name,
|
|
90
|
+
"redirect_uris": [redirect_uri],
|
|
91
|
+
"grant_types": ["authorization_code", "refresh_token"],
|
|
92
|
+
"response_types": ["code"],
|
|
93
|
+
"token_endpoint_auth_method": "none",
|
|
94
|
+
},
|
|
95
|
+
timeout=_TIMEOUT,
|
|
96
|
+
)
|
|
97
|
+
resp.raise_for_status()
|
|
98
|
+
return resp.json()
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def gen_pkce() -> tuple[str, str]:
|
|
102
|
+
"""Return (verifier, challenge) for PKCE S256."""
|
|
103
|
+
verifier = base64.urlsafe_b64encode(os.urandom(32)).rstrip(b"=").decode()
|
|
104
|
+
digest = hashlib.sha256(verifier.encode()).digest()
|
|
105
|
+
challenge = base64.urlsafe_b64encode(digest).rstrip(b"=").decode()
|
|
106
|
+
return verifier, challenge
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def new_state() -> str:
|
|
110
|
+
return secrets.token_urlsafe(16)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def build_authorize_url(
|
|
114
|
+
authorization_endpoint: str, client_id: str, redirect_uri: str,
|
|
115
|
+
code_challenge: str, state: str,
|
|
116
|
+
) -> str:
|
|
117
|
+
from urllib.parse import urlencode
|
|
118
|
+
params = {
|
|
119
|
+
"response_type": "code",
|
|
120
|
+
"client_id": client_id,
|
|
121
|
+
"redirect_uri": redirect_uri,
|
|
122
|
+
"code_challenge": code_challenge,
|
|
123
|
+
"code_challenge_method": "S256",
|
|
124
|
+
"state": state,
|
|
125
|
+
}
|
|
126
|
+
return f"{authorization_endpoint}?{urlencode(params)}"
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def exchange_code(
|
|
130
|
+
token_endpoint: str, client_id: str, code: str, redirect_uri: str, code_verifier: str
|
|
131
|
+
) -> dict[str, Any]:
|
|
132
|
+
"""Authorization code -> tokens (access_token, refresh_token, expires_in)."""
|
|
133
|
+
resp = requests.post(
|
|
134
|
+
token_endpoint,
|
|
135
|
+
data={
|
|
136
|
+
"grant_type": "authorization_code",
|
|
137
|
+
"code": code,
|
|
138
|
+
"client_id": client_id,
|
|
139
|
+
"redirect_uri": redirect_uri,
|
|
140
|
+
"code_verifier": code_verifier,
|
|
141
|
+
},
|
|
142
|
+
timeout=_TIMEOUT,
|
|
143
|
+
)
|
|
144
|
+
resp.raise_for_status()
|
|
145
|
+
return resp.json()
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def refresh_access_token(token_endpoint: str, client_id: str, refresh_token: str) -> dict[str, Any]:
|
|
149
|
+
"""Refresh token -> a fresh access_token (and possibly a ROTATED refresh_token).
|
|
150
|
+
|
|
151
|
+
Providers like Notion rotate the refresh token on every use, so the caller
|
|
152
|
+
MUST persist the returned refresh_token; an `invalid_grant` means the stored
|
|
153
|
+
token was already spent -> re-bootstrap.
|
|
154
|
+
"""
|
|
155
|
+
resp = requests.post(
|
|
156
|
+
token_endpoint,
|
|
157
|
+
data={
|
|
158
|
+
"grant_type": "refresh_token",
|
|
159
|
+
"refresh_token": refresh_token,
|
|
160
|
+
"client_id": client_id,
|
|
161
|
+
},
|
|
162
|
+
timeout=_TIMEOUT,
|
|
163
|
+
)
|
|
164
|
+
if resp.status_code >= 400:
|
|
165
|
+
# OAuth errors are JSON: {"error": "...", "error_description": "..."}. Surface
|
|
166
|
+
# the body, and turn the common "your token is dead" cases into a plain-English
|
|
167
|
+
# hint instead of a bare HTTPError. Keep `invalid_grant`/401 in the message so
|
|
168
|
+
# is_auth_error still classifies it.
|
|
169
|
+
body = resp.text[:600]
|
|
170
|
+
try:
|
|
171
|
+
err = (resp.json() or {}).get("error", "")
|
|
172
|
+
except ValueError:
|
|
173
|
+
err = ""
|
|
174
|
+
if err in ("invalid_grant", "invalid_client") or resp.status_code in (400, 401):
|
|
175
|
+
raise RuntimeError(
|
|
176
|
+
f"OAuth token refresh rejected ({resp.status_code} {err or 'error'}). The stored "
|
|
177
|
+
f"refresh token is no longer valid — expired, revoked, or already spent (providers "
|
|
178
|
+
f"that rotate the refresh token on every use, e.g. Notion, invalidate the old one each "
|
|
179
|
+
f"refresh). Re-authorize to mint a fresh token by re-running your provider's bootstrap. "
|
|
180
|
+
f"Server said: {body}"
|
|
181
|
+
)
|
|
182
|
+
raise RuntimeError(f"OAuth token refresh failed ({resp.status_code}): {body}")
|
|
183
|
+
return resp.json()
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""The refresh activity — the only IO in the OAuth auth hot path.
|
|
2
|
+
|
|
3
|
+
Wraps oauth.refresh_access_token so OAuthTokenWorkflow stays deterministic (no
|
|
4
|
+
network in the workflow). Returns the new access token AND the rotated refresh
|
|
5
|
+
token; the workflow persists both in its state. Provider-agnostic — the token
|
|
6
|
+
endpoint + client id come in via the input. Kept in its own module so the
|
|
7
|
+
workflow imports it pass-through without dragging `requests` into the sandbox.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
|
|
13
|
+
from temporalio import activity
|
|
14
|
+
from temporalio.exceptions import ApplicationError
|
|
15
|
+
|
|
16
|
+
from durable_sync.auth.oauth import flow as oauth
|
|
17
|
+
from durable_sync.core import auth_error_in_chain
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass
|
|
21
|
+
class RefreshInput:
|
|
22
|
+
client_id: str
|
|
23
|
+
token_endpoint: str
|
|
24
|
+
refresh_token: str
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class RefreshOutput:
|
|
29
|
+
access_token: str
|
|
30
|
+
refresh_token: str # rotated — the workflow MUST store this
|
|
31
|
+
expires_in: int
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@activity.defn
|
|
35
|
+
def refresh_oauth_token(inp: RefreshInput) -> RefreshOutput:
|
|
36
|
+
try:
|
|
37
|
+
tokens = oauth.refresh_access_token(inp.token_endpoint, inp.client_id, inp.refresh_token)
|
|
38
|
+
except Exception as e:
|
|
39
|
+
# A revoked/expired/spent refresh token can't be fixed by retrying — only a
|
|
40
|
+
# human re-bootstrap mints a new one. Mark it non-retryable + typed so the
|
|
41
|
+
# OAuthTokenWorkflow PAUSES (stays queryable + resumable) instead of burning
|
|
42
|
+
# retries and then terminating. Transient failures stay retryable (re-raise).
|
|
43
|
+
if auth_error_in_chain(e):
|
|
44
|
+
raise ApplicationError(
|
|
45
|
+
"OAuth refresh token is no longer valid (expired, revoked, or spent). "
|
|
46
|
+
"Re-bootstrap to mint a fresh token, then send the `reauthorize` signal.",
|
|
47
|
+
type="AuthError", non_retryable=True,
|
|
48
|
+
) from e
|
|
49
|
+
raise
|
|
50
|
+
return RefreshOutput(
|
|
51
|
+
access_token=tokens["access_token"],
|
|
52
|
+
# Not every provider rotates the refresh token on each refresh — many omit
|
|
53
|
+
# `refresh_token` from the response when it's unchanged. Falling back to the
|
|
54
|
+
# one we sent keeps the chain alive instead of KeyError-ing the activity
|
|
55
|
+
# (which, after retries, would kill the token workflow and break auth).
|
|
56
|
+
refresh_token=tokens.get("refresh_token") or inp.refresh_token,
|
|
57
|
+
expires_in=int(tokens.get("expires_in", 3600)),
|
|
58
|
+
)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Local credential store for the OAuth bootstrap handoff (provider-agnostic).
|
|
2
|
+
|
|
3
|
+
BOOTSTRAP/PROOF persistence only. In the running system the refresh token lives
|
|
4
|
+
in OAuthTokenWorkflow's state (durable, single-owner) — but bootstrap needs
|
|
5
|
+
somewhere to hand off the initial token, and prove reads it back. The file is
|
|
6
|
+
gitignored; never commit it. Each provider passes its own `file` path.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def load(file: str) -> dict[str, Any] | None:
|
|
17
|
+
p = Path(file)
|
|
18
|
+
if not p.exists():
|
|
19
|
+
return None
|
|
20
|
+
return json.loads(p.read_text())
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def save(file: str, data: dict[str, Any]) -> None:
|
|
24
|
+
"""Write atomically so a crash mid-write can't corrupt the rotating token."""
|
|
25
|
+
p = Path(file)
|
|
26
|
+
tmp = p.with_suffix(".tmp")
|
|
27
|
+
tmp.write_text(json.dumps(data, indent=2))
|
|
28
|
+
try:
|
|
29
|
+
tmp.chmod(0o600)
|
|
30
|
+
except OSError:
|
|
31
|
+
pass
|
|
32
|
+
os.replace(tmp, p)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def resolve(file: str) -> Path:
|
|
36
|
+
return Path(file)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Access-token accessor for use INSIDE activities (a destination's session).
|
|
2
|
+
|
|
3
|
+
current_access_token(workflow_id) queries the OAuthTokenWorkflow with that id for
|
|
4
|
+
a valid token. The query result is used locally and never returned from the
|
|
5
|
+
activity, so the token stays out of Temporal event history.
|
|
6
|
+
|
|
7
|
+
Not workflow code — safe to do IO and cache a client.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from temporalio.client import Client
|
|
12
|
+
|
|
13
|
+
from durable_sync.auth.oauth.workflow import OAuthTokenWorkflow
|
|
14
|
+
from durable_sync.temporal_client import connect
|
|
15
|
+
|
|
16
|
+
_client: Client | None = None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def _get_client() -> Client:
|
|
20
|
+
global _client
|
|
21
|
+
if _client is None:
|
|
22
|
+
_client = await connect()
|
|
23
|
+
return _client
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
async def current_access_token(workflow_id: str) -> str:
|
|
27
|
+
"""Query the OAuthTokenWorkflow `workflow_id` for a fresh access token."""
|
|
28
|
+
client = await _get_client()
|
|
29
|
+
handle = client.get_workflow_handle(workflow_id)
|
|
30
|
+
token = await handle.query(OAuthTokenWorkflow.get_access_token)
|
|
31
|
+
if not token:
|
|
32
|
+
raise RuntimeError(
|
|
33
|
+
f"OAuthTokenWorkflow '{workflow_id}' returned an empty access token — "
|
|
34
|
+
f"is it running? Bootstrap + start it (see the destination's docs)."
|
|
35
|
+
)
|
|
36
|
+
return token
|