durable-sync 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- durable_sync/__init__.py +26 -0
- durable_sync/activities.py +156 -0
- durable_sync/auth/__init__.py +8 -0
- durable_sync/auth/oauth/__init__.py +18 -0
- durable_sync/auth/oauth/flow.py +183 -0
- durable_sync/auth/oauth/refresh.py +58 -0
- durable_sync/auth/oauth/store.py +36 -0
- durable_sync/auth/oauth/token.py +36 -0
- durable_sync/auth/oauth/workflow.py +172 -0
- durable_sync/bootstrap.py +44 -0
- durable_sync/codec.py +80 -0
- durable_sync/config.py +35 -0
- durable_sync/connectors/__init__.py +14 -0
- durable_sync/connectors/asana/__init__.py +13 -0
- durable_sync/connectors/asana/destination.py +213 -0
- durable_sync/connectors/content.py +80 -0
- durable_sync/connectors/contentful/__init__.py +25 -0
- durable_sync/connectors/contentful/api.py +285 -0
- durable_sync/connectors/contentful/bootstrap.py +102 -0
- durable_sync/connectors/contentful/describe.py +61 -0
- durable_sync/connectors/contentful/destination.py +145 -0
- durable_sync/connectors/contentful/encode.py +49 -0
- durable_sync/connectors/contentful/introspect.py +69 -0
- durable_sync/connectors/contentful/mcp.py +95 -0
- durable_sync/connectors/contentful/mcp_destination.py +137 -0
- durable_sync/connectors/contentful/oauth.py +27 -0
- durable_sync/connectors/contentful/prove.py +51 -0
- durable_sync/connectors/contentful/source.py +192 -0
- durable_sync/connectors/contentful/start.py +46 -0
- durable_sync/connectors/contentful/store.py +25 -0
- durable_sync/connectors/contentful/token.py +13 -0
- durable_sync/connectors/contentful/token_check.py +42 -0
- durable_sync/connectors/github/__init__.py +33 -0
- durable_sync/connectors/github/api.py +169 -0
- durable_sync/connectors/github/source.py +230 -0
- durable_sync/connectors/luma/__init__.py +20 -0
- durable_sync/connectors/luma/api.py +121 -0
- durable_sync/connectors/luma/destination.py +128 -0
- durable_sync/connectors/luma/source.py +155 -0
- durable_sync/connectors/multi.py +78 -0
- durable_sync/connectors/notion/__init__.py +20 -0
- durable_sync/connectors/notion/bootstrap.py +97 -0
- durable_sync/connectors/notion/client.py +133 -0
- durable_sync/connectors/notion/destination.py +270 -0
- durable_sync/connectors/notion/oauth.py +25 -0
- durable_sync/connectors/notion/prove.py +57 -0
- durable_sync/connectors/notion/source.py +136 -0
- durable_sync/connectors/notion/start.py +46 -0
- durable_sync/connectors/notion/store.py +25 -0
- durable_sync/connectors/notion/token.py +13 -0
- durable_sync/connectors/youtube/__init__.py +13 -0
- durable_sync/connectors/youtube/api.py +122 -0
- durable_sync/connectors/youtube/source.py +152 -0
- durable_sync/core.py +210 -0
- durable_sync/env.py +55 -0
- durable_sync/http.py +71 -0
- durable_sync/linkstore.py +88 -0
- durable_sync/route.py +86 -0
- durable_sync/temporal_client.py +48 -0
- durable_sync/transport/__init__.py +12 -0
- durable_sync/transport/mcp.py +77 -0
- durable_sync/worker.py +109 -0
- durable_sync/workflows/__init__.py +9 -0
- durable_sync/workflows/sync.py +208 -0
- durable_sync-0.1.0.dist-info/METADATA +310 -0
- durable_sync-0.1.0.dist-info/RECORD +69 -0
- durable_sync-0.1.0.dist-info/WHEEL +5 -0
- durable_sync-0.1.0.dist-info/licenses/LICENSE +21 -0
- durable_sync-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
"""OAuthTokenWorkflow — the entity workflow that owns a rotating OAuth refresh
|
|
2
|
+
token (provider-agnostic).
|
|
3
|
+
|
|
4
|
+
Why a workflow and not a cron job + a file:
|
|
5
|
+
- It's the SINGLE owner of the rotating refresh token, so refreshes are
|
|
6
|
+
serialized by construction — the concurrent-refresh `invalid_grant` race that
|
|
7
|
+
rotating-refresh-token providers warn about can't happen.
|
|
8
|
+
- Its state (the refresh token) is durable across worker restarts.
|
|
9
|
+
- It hands out fresh access tokens via @workflow.query, which is NOT recorded in
|
|
10
|
+
history — so activities fetch a token without it touching the event log. (Pair
|
|
11
|
+
with the encryption codec to protect the token in workflow state.)
|
|
12
|
+
|
|
13
|
+
Start one per provider/account, with the id the destination expects (e.g.
|
|
14
|
+
config.NOTION_AUTH_WORKFLOW_ID). A bootstrap captures the initial refresh token;
|
|
15
|
+
from then on this runs unattended, refreshing ~5 min before expiry.
|
|
16
|
+
"""
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
from datetime import timedelta
|
|
21
|
+
|
|
22
|
+
from temporalio import workflow
|
|
23
|
+
from temporalio.common import RetryPolicy
|
|
24
|
+
from temporalio.exceptions import ApplicationError
|
|
25
|
+
|
|
26
|
+
with workflow.unsafe.imports_passed_through():
|
|
27
|
+
from durable_sync.auth.oauth.refresh import RefreshInput, RefreshOutput, refresh_oauth_token
|
|
28
|
+
|
|
29
|
+
# Refresh this long before the access token's stated expiry.
|
|
30
|
+
_REFRESH_SKEW = timedelta(minutes=5)
|
|
31
|
+
# Continue-as-new after this many refreshes to keep event history small.
|
|
32
|
+
_REFRESHES_BEFORE_CONTINUE = 24
|
|
33
|
+
# Back-off between retries when a refresh fails for a TRANSIENT reason (the token
|
|
34
|
+
# endpoint is down) — so the workflow self-heals instead of giving up.
|
|
35
|
+
_TRANSIENT_BACKOFF = timedelta(seconds=60)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _is_auth_failure(err: BaseException | None) -> bool:
|
|
39
|
+
"""The refresh activity raises a non-retryable ApplicationError(type=AuthError)
|
|
40
|
+
when the refresh token is dead (mirrors sync_records). Type-only check, so it's
|
|
41
|
+
pure/deterministic and safe in the workflow."""
|
|
42
|
+
while err is not None:
|
|
43
|
+
if isinstance(err, ApplicationError) and err.type == "AuthError":
|
|
44
|
+
return True
|
|
45
|
+
err = err.__cause__
|
|
46
|
+
return False
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class AuthParams:
|
|
51
|
+
client_id: str
|
|
52
|
+
token_endpoint: str
|
|
53
|
+
refresh_token: str
|
|
54
|
+
# Carried across continue-as-new so the count survives history truncation.
|
|
55
|
+
refreshes_so_far: int = 0
|
|
56
|
+
# Carried across continue-as-new so the query stays warm at the boundary
|
|
57
|
+
# (otherwise the new run starts with an empty token until its first refresh,
|
|
58
|
+
# and any sync querying right then gets an empty token). The codec encrypts it
|
|
59
|
+
# in history — the reason the codec exists.
|
|
60
|
+
access_token: str = ""
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@workflow.defn
|
|
64
|
+
class OAuthTokenWorkflow:
|
|
65
|
+
@workflow.init
|
|
66
|
+
def __init__(self, params: AuthParams) -> None:
|
|
67
|
+
self._access_token = params.access_token
|
|
68
|
+
self._refresh_token = params.refresh_token
|
|
69
|
+
self._refreshes = params.refreshes_so_far
|
|
70
|
+
# Pause/recover state (mirrors SourceSyncWorkflow) — a dead refresh token
|
|
71
|
+
# parks the workflow instead of crashing it, so it stays queryable and is
|
|
72
|
+
# resumable via the `reauthorize` signal without re-creating it.
|
|
73
|
+
self._paused = False
|
|
74
|
+
self._last_error: str | None = None
|
|
75
|
+
self._last_refresh: str | None = None
|
|
76
|
+
self._new_refresh_token = "" # supplied by reauthorize after a re-bootstrap
|
|
77
|
+
|
|
78
|
+
@workflow.run
|
|
79
|
+
async def run(self, params: AuthParams) -> None:
|
|
80
|
+
while True:
|
|
81
|
+
try:
|
|
82
|
+
out: RefreshOutput = await workflow.execute_activity(
|
|
83
|
+
refresh_oauth_token,
|
|
84
|
+
RefreshInput(
|
|
85
|
+
client_id=params.client_id,
|
|
86
|
+
token_endpoint=params.token_endpoint,
|
|
87
|
+
refresh_token=self._refresh_token,
|
|
88
|
+
),
|
|
89
|
+
start_to_close_timeout=timedelta(seconds=30),
|
|
90
|
+
retry_policy=RetryPolicy(maximum_attempts=5),
|
|
91
|
+
)
|
|
92
|
+
except Exception as e: # noqa: BLE001 - classify, never let the workflow die
|
|
93
|
+
self._last_error = str(e)
|
|
94
|
+
if _is_auth_failure(e):
|
|
95
|
+
# Refresh token revoked/expired/spent — only a human re-auth fixes
|
|
96
|
+
# it. Park until `reauthorize` supplies a fresh refresh token.
|
|
97
|
+
self._paused = True
|
|
98
|
+
workflow.logger.error(
|
|
99
|
+
"OAuth refresh permanently rejected for %s — pausing until "
|
|
100
|
+
"`reauthorize` with a fresh refresh token.", params.client_id,
|
|
101
|
+
)
|
|
102
|
+
await workflow.wait_condition(lambda: not self._paused)
|
|
103
|
+
if self._new_refresh_token:
|
|
104
|
+
self._refresh_token = self._new_refresh_token
|
|
105
|
+
self._new_refresh_token = ""
|
|
106
|
+
else:
|
|
107
|
+
# Transient (endpoint down, network) — back off and retry rather
|
|
108
|
+
# than terminating the only source of access tokens.
|
|
109
|
+
workflow.logger.warning(
|
|
110
|
+
"OAuth refresh transient failure for %s; retrying after backoff.",
|
|
111
|
+
params.client_id,
|
|
112
|
+
)
|
|
113
|
+
await workflow.sleep(_TRANSIENT_BACKOFF)
|
|
114
|
+
continue
|
|
115
|
+
|
|
116
|
+
self._access_token = out.access_token
|
|
117
|
+
self._refresh_token = out.refresh_token # rotated — keep the newest
|
|
118
|
+
self._refreshes += 1
|
|
119
|
+
self._last_refresh = workflow.now().isoformat()
|
|
120
|
+
self._last_error = None
|
|
121
|
+
|
|
122
|
+
sleep_for = timedelta(seconds=out.expires_in) - _REFRESH_SKEW
|
|
123
|
+
if sleep_for <= timedelta(0):
|
|
124
|
+
sleep_for = timedelta(seconds=max(out.expires_in // 2, 30))
|
|
125
|
+
await workflow.sleep(sleep_for)
|
|
126
|
+
|
|
127
|
+
# Roll history only AFTER sleeping until the token is near expiry, so
|
|
128
|
+
# the fresh run's immediate refresh is the one that's actually due —
|
|
129
|
+
# not a wasted back-to-back rotation. Carries the latest refresh AND
|
|
130
|
+
# access token so the new run picks up exactly where this one left off
|
|
131
|
+
# (no empty-token query gap at the boundary).
|
|
132
|
+
if self._refreshes >= _REFRESHES_BEFORE_CONTINUE:
|
|
133
|
+
await workflow.wait_condition(workflow.all_handlers_finished)
|
|
134
|
+
workflow.continue_as_new(
|
|
135
|
+
AuthParams(
|
|
136
|
+
client_id=params.client_id,
|
|
137
|
+
token_endpoint=params.token_endpoint,
|
|
138
|
+
refresh_token=self._refresh_token,
|
|
139
|
+
refreshes_so_far=0,
|
|
140
|
+
access_token=self._access_token,
|
|
141
|
+
)
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# --- Signals (flip flags only; non-async; tolerate stray payloads) -------
|
|
145
|
+
|
|
146
|
+
@workflow.signal
|
|
147
|
+
def reauthorize(self, refresh_token: str = "", *_: object) -> None:
|
|
148
|
+
"""Resume after a pause. Pass a fresh refresh token (from re-running the
|
|
149
|
+
provider's bootstrap) when the old one was revoked/expired; a bare signal
|
|
150
|
+
just retries with the current token (e.g. to recover from a long outage)."""
|
|
151
|
+
if refresh_token:
|
|
152
|
+
self._new_refresh_token = refresh_token
|
|
153
|
+
self._paused = False
|
|
154
|
+
|
|
155
|
+
# --- Queries (read-only) -------------------------------------------------
|
|
156
|
+
|
|
157
|
+
@workflow.query
|
|
158
|
+
def get_access_token(self) -> str:
|
|
159
|
+
"""Current valid access token. Queries aren't written to history, so
|
|
160
|
+
callers get the secret without it ever touching the event log."""
|
|
161
|
+
return self._access_token
|
|
162
|
+
|
|
163
|
+
@workflow.query
|
|
164
|
+
def status(self) -> dict:
|
|
165
|
+
"""Operational state (no secret) — is it healthy, and if not, why."""
|
|
166
|
+
return {
|
|
167
|
+
"paused": self._paused,
|
|
168
|
+
"refreshes": self._refreshes,
|
|
169
|
+
"last_refresh": self._last_refresh,
|
|
170
|
+
"last_error": self._last_error,
|
|
171
|
+
"has_token": bool(self._access_token),
|
|
172
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
"""Start one entity workflow per source unit. Idempotent: re-running won't
|
|
2
|
+
disturb a workflow that's already up (USE_EXISTING), so it doubles as a reconcile.
|
|
3
|
+
|
|
4
|
+
from durable_sync.bootstrap import start_sources
|
|
5
|
+
await start_sources(SOURCE)
|
|
6
|
+
|
|
7
|
+
No Schedule is needed — each workflow's own timer loop is the periodicity. Drive
|
|
8
|
+
or inspect them by id, e.g.:
|
|
9
|
+
|
|
10
|
+
temporal workflow signal --workflow-id "durable-sync:org:temporal-community" \
|
|
11
|
+
--name sync_now --input '[]'
|
|
12
|
+
temporal workflow query --workflow-id "durable-sync:org:temporal-community" \
|
|
13
|
+
--type status
|
|
14
|
+
"""
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
from temporalio.client import Client
|
|
18
|
+
from temporalio.common import WorkflowIDConflictPolicy
|
|
19
|
+
|
|
20
|
+
from durable_sync import config
|
|
21
|
+
from durable_sync.core import Source
|
|
22
|
+
from durable_sync.temporal_client import connect
|
|
23
|
+
from durable_sync.workflows.sync import SourceState, SourceSyncWorkflow
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
async def start_sources(
|
|
27
|
+
source: Source,
|
|
28
|
+
*,
|
|
29
|
+
client: Client | None = None,
|
|
30
|
+
task_queue: str | None = None,
|
|
31
|
+
id_prefix: str = "durable-sync",
|
|
32
|
+
) -> None:
|
|
33
|
+
client = client or await connect()
|
|
34
|
+
tq = task_queue or config.TASK_QUEUE
|
|
35
|
+
for spec in source.specs():
|
|
36
|
+
wf_id = f"{id_prefix}:{spec.key}"
|
|
37
|
+
await client.start_workflow(
|
|
38
|
+
SourceSyncWorkflow.run,
|
|
39
|
+
SourceState(spec=spec),
|
|
40
|
+
id=wf_id,
|
|
41
|
+
task_queue=tq,
|
|
42
|
+
id_conflict_policy=WorkflowIDConflictPolicy.USE_EXISTING,
|
|
43
|
+
)
|
|
44
|
+
print(f"ensured entity workflow: {wf_id}")
|
durable_sync/codec.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Encryption codec for Temporal payloads (AES-256-GCM).
|
|
2
|
+
|
|
3
|
+
When a destination's auth uses a workflow-owned token (e.g. the Notion
|
|
4
|
+
auth workflow), the refresh token lives in that workflow's state and in the
|
|
5
|
+
refresh activity's input/output — all of which Temporal persists in event
|
|
6
|
+
history. This codec encrypts every payload's bytes before they leave the worker,
|
|
7
|
+
so secrets are ciphertext at rest in the cluster and the Web UI.
|
|
8
|
+
|
|
9
|
+
Opt-in: set DURABLE_SYNC_ENC_KEY to a base64-encoded 32-byte key (generate one
|
|
10
|
+
with `python -m durable_sync.codec`). With no key set, payloads are unencrypted
|
|
11
|
+
(fine for local dev). Wire `encryption_codec()` into your Temporal client's
|
|
12
|
+
data_converter so encode/decode stays consistent across the whole system.
|
|
13
|
+
|
|
14
|
+
Requires the `crypto` extra: pip install "durable-sync[crypto]"
|
|
15
|
+
"""
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import base64
|
|
19
|
+
import os
|
|
20
|
+
from typing import Iterable
|
|
21
|
+
|
|
22
|
+
from temporalio.api.common.v1 import Payload
|
|
23
|
+
from temporalio.converter import PayloadCodec
|
|
24
|
+
|
|
25
|
+
_ENCODING = b"binary/encrypted"
|
|
26
|
+
_KEY_ENV = "DURABLE_SYNC_ENC_KEY"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def load_key() -> bytes | None:
|
|
30
|
+
raw = os.getenv(_KEY_ENV, "")
|
|
31
|
+
if not raw:
|
|
32
|
+
return None
|
|
33
|
+
key = base64.b64decode(raw)
|
|
34
|
+
if len(key) != 32:
|
|
35
|
+
raise ValueError(f"{_KEY_ENV} must decode to 32 bytes (got {len(key)}).")
|
|
36
|
+
return key
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class EncryptionCodec(PayloadCodec):
|
|
40
|
+
def __init__(self, key: bytes) -> None:
|
|
41
|
+
from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
|
42
|
+
self._aesgcm = AESGCM(key)
|
|
43
|
+
|
|
44
|
+
async def encode(self, payloads: Iterable[Payload]) -> list[Payload]:
|
|
45
|
+
return [
|
|
46
|
+
Payload(
|
|
47
|
+
metadata={"encoding": _ENCODING},
|
|
48
|
+
data=self._encrypt(p.SerializeToString()),
|
|
49
|
+
)
|
|
50
|
+
for p in payloads
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
async def decode(self, payloads: Iterable[Payload]) -> list[Payload]:
|
|
54
|
+
out: list[Payload] = []
|
|
55
|
+
for p in payloads:
|
|
56
|
+
if p.metadata.get("encoding") != _ENCODING:
|
|
57
|
+
out.append(p) # not ours (e.g. written before encryption was on)
|
|
58
|
+
continue
|
|
59
|
+
decrypted = Payload()
|
|
60
|
+
decrypted.ParseFromString(self._decrypt(p.data))
|
|
61
|
+
out.append(decrypted)
|
|
62
|
+
return out
|
|
63
|
+
|
|
64
|
+
def _encrypt(self, data: bytes) -> bytes:
|
|
65
|
+
nonce = os.urandom(12)
|
|
66
|
+
return nonce + self._aesgcm.encrypt(nonce, data, None)
|
|
67
|
+
|
|
68
|
+
def _decrypt(self, data: bytes) -> bytes:
|
|
69
|
+
return self._aesgcm.decrypt(data[:12], data[12:], None)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def encryption_codec() -> EncryptionCodec | None:
|
|
73
|
+
"""The configured codec, or None if DURABLE_SYNC_ENC_KEY is unset (dev mode)."""
|
|
74
|
+
key = load_key()
|
|
75
|
+
return EncryptionCodec(key) if key else None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
if __name__ == "__main__":
|
|
79
|
+
# Generate a key to put in your env as DURABLE_SYNC_ENC_KEY=...
|
|
80
|
+
print(base64.b64encode(os.urandom(32)).decode())
|
durable_sync/config.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Runtime + connection config (generic). Side-effect-free: imported indirectly
|
|
2
|
+
into the workflow sandbox, so no IO / no import-time failures.
|
|
3
|
+
|
|
4
|
+
Integration-specific config (which orgs, which Notion DB, Asana project) lives in
|
|
5
|
+
the Source/Destination you wire up — not here.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
|
|
11
|
+
TASK_QUEUE = os.environ.get("DURABLE_SYNC_TASK_QUEUE", "durable-sync")
|
|
12
|
+
|
|
13
|
+
# Worker Versioning — OPT-IN, off by default so local/simple runs need zero setup.
|
|
14
|
+
# Set DURABLE_SYNC_BUILD_ID (e.g. a git SHA) in production: a redeploy whose
|
|
15
|
+
# workflow code changed then only affects NEW/continued executions, while in-flight
|
|
16
|
+
# histories drain on the old build — the safe way to evolve the long-lived entity
|
|
17
|
+
# workflows (SourceSyncWorkflow / OAuthTokenWorkflow) without non-determinism
|
|
18
|
+
# errors. When unset, all workflows run unversioned exactly as before. The
|
|
19
|
+
# alternative for in-place changes is workflow.patched() (see CONTRIBUTING).
|
|
20
|
+
BUILD_ID = os.environ.get("DURABLE_SYNC_BUILD_ID", "")
|
|
21
|
+
DEPLOYMENT_NAME = os.environ.get("DURABLE_SYNC_DEPLOYMENT_NAME", "durable-sync")
|
|
22
|
+
|
|
23
|
+
# Temporal connection (defaults to a local dev server; set these for Cloud).
|
|
24
|
+
TEMPORAL_ADDRESS = os.environ.get("TEMPORAL_ADDRESS", "localhost:7233")
|
|
25
|
+
TEMPORAL_NAMESPACE = os.environ.get("TEMPORAL_NAMESPACE", "default")
|
|
26
|
+
TEMPORAL_API_KEY = os.environ.get("TEMPORAL_API_KEY") # set for Temporal Cloud
|
|
27
|
+
|
|
28
|
+
# Ids of the workflows that own each provider's OAuth token
|
|
29
|
+
# (auth.workflow.OAuthTokenWorkflow, started via connectors.<provider>.start).
|
|
30
|
+
NOTION_AUTH_WORKFLOW_ID = os.environ.get(
|
|
31
|
+
"DURABLE_SYNC_NOTION_AUTH_WORKFLOW_ID", "durable-sync:notion-auth"
|
|
32
|
+
)
|
|
33
|
+
CONTENTFUL_AUTH_WORKFLOW_ID = os.environ.get(
|
|
34
|
+
"DURABLE_SYNC_CONTENTFUL_AUTH_WORKFLOW_ID", "durable-sync:contentful-auth"
|
|
35
|
+
)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Connectors — one subpackage per external system, each exposing the halves it
|
|
2
|
+
supports: a `source.py` (read: implements `Source`), a `destination.py` (write:
|
|
3
|
+
implements `Destination`), or both, sharing one client + auth.
|
|
4
|
+
|
|
5
|
+
Grouped by SYSTEM rather than by direction because a system is often both (Notion
|
|
6
|
+
is read in one route and written in another), and its read/write sides share a
|
|
7
|
+
transport (e.g. Notion's MCP client + OAuth). The neutral `Source`/`Destination`
|
|
8
|
+
protocols still live in `durable_sync.core`; this package is only packaging.
|
|
9
|
+
|
|
10
|
+
Reference systems: github / luma / youtube / contentful (sources today),
|
|
11
|
+
notion / asana (destinations today). `content.py` holds the shared neutral column
|
|
12
|
+
vocabulary for content-style sources; `multi.py` fans several sources onto one
|
|
13
|
+
worker. Import-free on purpose (a connector may contain workflow code).
|
|
14
|
+
"""
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Asana destination: direct REST + a self-serve Personal Access Token.
|
|
2
|
+
|
|
3
|
+
The second reference destination, deliberately different from Notion's MCP/OAuth:
|
|
4
|
+
plain REST, a PAT any user can mint (no admin, no workflow). If the Destination
|
|
5
|
+
protocol holds here too, it's neither transport- nor auth-shaped.
|
|
6
|
+
|
|
7
|
+
Requires the `asana` extra: pip install "durable-sync[asana]"
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from durable_sync.connectors.asana.destination import AsanaDestination
|
|
12
|
+
|
|
13
|
+
__all__ = ["AsanaDestination"]
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
"""Asana destination — direct REST, self-serve PAT.
|
|
2
|
+
|
|
3
|
+
Why this exists: it's the abstraction's stress test. Notion let property names BE
|
|
4
|
+
column names; Asana tasks have a FIXED schema (name, notes, due_on, completed) plus
|
|
5
|
+
custom fields addressed by gid — so a neutral Record needs an explicit
|
|
6
|
+
destination-owned `field_map`. That mapping living here (not in the source) is
|
|
7
|
+
exactly the seam working as intended.
|
|
8
|
+
|
|
9
|
+
Idempotency: the source's primary_key is stored in the task's `external.gid`
|
|
10
|
+
(Asana's purpose-built external-system handle). `query_existing_ids` lists the
|
|
11
|
+
project's tasks with `opt_fields=external` and maps external.gid -> task gid.
|
|
12
|
+
|
|
13
|
+
Auth: a Personal Access Token (Bearer). Self-serve, no admin, no auth workflow —
|
|
14
|
+
so this destination defines no aux_workflows/aux_activities.
|
|
15
|
+
|
|
16
|
+
Requires the `asana` extra: pip install "durable-sync[asana]"
|
|
17
|
+
"""
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import asyncio
|
|
21
|
+
import datetime as dt
|
|
22
|
+
import os
|
|
23
|
+
from contextlib import asynccontextmanager
|
|
24
|
+
from typing import Any, Awaitable, Callable, AsyncIterator
|
|
25
|
+
|
|
26
|
+
import httpx
|
|
27
|
+
|
|
28
|
+
from durable_sync.core import DestinationHTTPError, Record, auth_error_in_chain
|
|
29
|
+
from durable_sync.http import request_with_retry
|
|
30
|
+
|
|
31
|
+
ASANA_API = "https://app.asana.com/api/1.0"
|
|
32
|
+
_MAX_NOTES = 65000
|
|
33
|
+
_MAX_RETRIES = 6
|
|
34
|
+
_BACKOFF_BASE_SECONDS = 1.0
|
|
35
|
+
|
|
36
|
+
# Native task fields a field_map value may target directly (everything else must
|
|
37
|
+
# be a custom field). Kept small + explicit on purpose.
|
|
38
|
+
_NATIVE_FIELDS = {"name", "notes", "html_notes", "due_on", "due_at",
|
|
39
|
+
"start_on", "completed", "assignee", "resource_subtype"}
|
|
40
|
+
|
|
41
|
+
TokenProvider = Callable[[], Awaitable[str]]
|
|
42
|
+
# A field_map value: a native field name ("due_on") OR {"custom_field": "<gid>"}.
|
|
43
|
+
FieldTarget = Any
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class AsanaDestination:
|
|
47
|
+
name = "asana"
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
project_gid: str,
|
|
52
|
+
*,
|
|
53
|
+
title_property: str = "Name",
|
|
54
|
+
body_field: str = "notes", # native field record.body maps to
|
|
55
|
+
field_map: dict[str, FieldTarget] | None = None,
|
|
56
|
+
create_only_properties: set[str] | None = None,
|
|
57
|
+
token_provider: TokenProvider | None = None,
|
|
58
|
+
token_env: str = "ASANA_PAT",
|
|
59
|
+
synced_custom_field_gid: str | None = None, # optional date CF to stamp
|
|
60
|
+
pacing_seconds: float = 0.0,
|
|
61
|
+
):
|
|
62
|
+
self.project_gid = project_gid
|
|
63
|
+
self.title_property = title_property
|
|
64
|
+
self.body_field = body_field
|
|
65
|
+
# record-property -> Asana target. Unmapped properties are DROPPED
|
|
66
|
+
# (Asana can't hold arbitrary columns); title/body are handled separately.
|
|
67
|
+
self.field_map = field_map or {}
|
|
68
|
+
self.create_only_properties = create_only_properties or set()
|
|
69
|
+
self.token_env = token_env
|
|
70
|
+
self._token_provider = token_provider or self._env_token
|
|
71
|
+
self.synced_custom_field_gid = synced_custom_field_gid
|
|
72
|
+
self.pacing_seconds = pacing_seconds
|
|
73
|
+
|
|
74
|
+
async def _env_token(self) -> str:
|
|
75
|
+
return os.environ.get(self.token_env, "")
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def configured(self) -> bool:
|
|
79
|
+
return bool(self.project_gid)
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def config_hint(self) -> str:
|
|
83
|
+
return f"ASANA project gid / {self.token_env} unset"
|
|
84
|
+
|
|
85
|
+
@asynccontextmanager
|
|
86
|
+
async def connect(self) -> AsyncIterator["_AsanaSession"]:
|
|
87
|
+
token = await self._token_provider()
|
|
88
|
+
headers = {"Authorization": f"Bearer {token}", "Accept": "application/json"}
|
|
89
|
+
async with httpx.AsyncClient(base_url=ASANA_API, headers=headers, timeout=30) as client:
|
|
90
|
+
yield _AsanaSession(client, self)
|
|
91
|
+
|
|
92
|
+
@staticmethod
|
|
93
|
+
def is_auth_error(err: BaseException) -> bool:
|
|
94
|
+
"""A rejected PAT (401/403). Delegates to the shared matcher so we get the
|
|
95
|
+
word-boundary code check for free — Asana errors carry gids/request-ids,
|
|
96
|
+
and a bare `"401" in msg` would false-positive on one. "not authorized" is
|
|
97
|
+
Asana's own phrasing for a permission failure."""
|
|
98
|
+
return auth_error_in_chain(err, extra_needles=("not authorized",))
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class _AsanaSession:
|
|
102
|
+
def __init__(self, client: httpx.AsyncClient, dest: AsanaDestination):
|
|
103
|
+
self._client = client
|
|
104
|
+
self._d = dest
|
|
105
|
+
|
|
106
|
+
async def _request(self, method: str, path: str, *, params=None, json=None) -> dict:
|
|
107
|
+
# Shared backoff (honors Retry-After); we keep the raise here so the error
|
|
108
|
+
# text carries the status for is_auth_error to classify.
|
|
109
|
+
r = await request_with_retry(
|
|
110
|
+
self._client, method, path, params=params, json=json,
|
|
111
|
+
max_attempts=_MAX_RETRIES, base_delay=_BACKOFF_BASE_SECONDS,
|
|
112
|
+
)
|
|
113
|
+
if r.status_code >= 400:
|
|
114
|
+
raise DestinationHTTPError(
|
|
115
|
+
r.status_code, f"Asana {method} {path} -> {r.status_code}: {r.text[:600]}"
|
|
116
|
+
)
|
|
117
|
+
return r.json() if r.content else {}
|
|
118
|
+
|
|
119
|
+
async def query_existing_ids(self) -> dict[str, str]:
|
|
120
|
+
"""{ external.gid (== our primary_key) -> task gid } for the project."""
|
|
121
|
+
mapping: dict[str, str] = {}
|
|
122
|
+
params: dict[str, Any] = {
|
|
123
|
+
"project": self._d.project_gid, "opt_fields": "external", "limit": 100,
|
|
124
|
+
}
|
|
125
|
+
while True:
|
|
126
|
+
resp = await self._request("GET", "/tasks", params=params)
|
|
127
|
+
for t in resp.get("data", []):
|
|
128
|
+
ext = t.get("external") or {}
|
|
129
|
+
gid = ext.get("gid")
|
|
130
|
+
if gid and t.get("gid"):
|
|
131
|
+
mapping[gid] = t["gid"]
|
|
132
|
+
nxt = resp.get("next_page")
|
|
133
|
+
if not nxt or not nxt.get("offset"):
|
|
134
|
+
break
|
|
135
|
+
params["offset"] = nxt["offset"]
|
|
136
|
+
return mapping
|
|
137
|
+
|
|
138
|
+
async def create(self, record: Record, synced_at: dt.datetime) -> bool:
|
|
139
|
+
data = _encode_task(self._d, record, synced_at, creating=True)
|
|
140
|
+
await self._request("POST", "/tasks", json={"data": data})
|
|
141
|
+
await self._pace()
|
|
142
|
+
return True
|
|
143
|
+
|
|
144
|
+
async def update(self, existing_id: str, record: Record, synced_at: dt.datetime) -> bool:
|
|
145
|
+
data = _encode_task(self._d, record, synced_at, creating=False)
|
|
146
|
+
await self._request("PUT", f"/tasks/{existing_id}", json={"data": data})
|
|
147
|
+
await self._pace()
|
|
148
|
+
return True
|
|
149
|
+
|
|
150
|
+
async def _pace(self) -> None:
|
|
151
|
+
if self._d.pacing_seconds > 0:
|
|
152
|
+
await asyncio.sleep(self._d.pacing_seconds)
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def _encode_task(
|
|
156
|
+
dest: AsanaDestination, record: Record, synced_at: dt.datetime, *, creating: bool
|
|
157
|
+
) -> dict[str, Any]:
|
|
158
|
+
"""Neutral Record -> Asana task `data`. Pure (no IO) so it's unit-testable.
|
|
159
|
+
|
|
160
|
+
title_property -> name; record.body -> body_field; mapped props -> native
|
|
161
|
+
fields or custom fields; UNMAPPED props are dropped (Asana has no arbitrary
|
|
162
|
+
columns). On create we also set projects + external (idempotency key)."""
|
|
163
|
+
props = record.properties
|
|
164
|
+
data: dict[str, Any] = {}
|
|
165
|
+
custom: dict[str, Any] = {}
|
|
166
|
+
|
|
167
|
+
name = props.get(dest.title_property)
|
|
168
|
+
if name is not None:
|
|
169
|
+
data["name"] = str(name)
|
|
170
|
+
if record.body:
|
|
171
|
+
data[dest.body_field] = record.body[:_MAX_NOTES]
|
|
172
|
+
|
|
173
|
+
for key, val in props.items():
|
|
174
|
+
if key == dest.title_property or val is None:
|
|
175
|
+
continue
|
|
176
|
+
if not creating and key in dest.create_only_properties:
|
|
177
|
+
continue
|
|
178
|
+
target = dest.field_map.get(key)
|
|
179
|
+
if target is None:
|
|
180
|
+
continue # unmapped -> dropped (logged at debug by caller if desired)
|
|
181
|
+
if isinstance(target, dict) and "custom_field" in target:
|
|
182
|
+
custom[target["custom_field"]] = _coerce(val)
|
|
183
|
+
elif target in _NATIVE_FIELDS:
|
|
184
|
+
data[target] = _coerce_native(target, val)
|
|
185
|
+
|
|
186
|
+
if dest.synced_custom_field_gid:
|
|
187
|
+
custom[dest.synced_custom_field_gid] = synced_at.date().isoformat()
|
|
188
|
+
if custom:
|
|
189
|
+
data["custom_fields"] = custom
|
|
190
|
+
if creating:
|
|
191
|
+
data["projects"] = [dest.project_gid]
|
|
192
|
+
data["external"] = {"gid": record.primary_key} # idempotency handle
|
|
193
|
+
return data
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _coerce(val: Any) -> Any:
|
|
197
|
+
"""Custom-field value: numbers pass through; lists join (enum option gids are
|
|
198
|
+
app-specific, out of scope); everything else stringifies."""
|
|
199
|
+
if isinstance(val, bool):
|
|
200
|
+
return str(val)
|
|
201
|
+
if isinstance(val, (int, float)):
|
|
202
|
+
return val
|
|
203
|
+
if isinstance(val, (list, tuple)):
|
|
204
|
+
return ", ".join(str(v) for v in val)
|
|
205
|
+
return str(val)
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def _coerce_native(field: str, val: Any) -> Any:
|
|
209
|
+
if field == "completed":
|
|
210
|
+
return bool(val)
|
|
211
|
+
if field in ("due_on", "start_on"):
|
|
212
|
+
return str(val)[:10] # YYYY-MM-DD
|
|
213
|
+
return str(val)
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Shared property vocabulary for content-style sources (events, videos, CMS
|
|
2
|
+
entries, …).
|
|
3
|
+
|
|
4
|
+
These sources all map an external item onto the SAME neutral columns, so the
|
|
5
|
+
names live here ONCE instead of being hand-typed in each source's `_to_record` —
|
|
6
|
+
where they would silently drift (the same failure mode we hit with `is_auth_error`,
|
|
7
|
+
now fixed by one shared matcher). A destination or transform can import the `P_*`
|
|
8
|
+
constants to address the same columns without re-typing the strings.
|
|
9
|
+
|
|
10
|
+
Opt-in: `GitHubSource` deliberately does NOT use this — its columns (Stars, Forks,
|
|
11
|
+
License, …) are repo-specific. A source uses `content_record` only when the shared
|
|
12
|
+
content shape genuinely fits; per-source logic (URL building, status rules, author
|
|
13
|
+
resolution) still lives in that source — that's real variation, not duplication.
|
|
14
|
+
"""
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
from durable_sync.core import Record
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# --- paging cursor (shared by the windowed content sources) -----------------
|
|
24
|
+
# Each content source paginates a rolling time window, so its `fetch_page` cursor
|
|
25
|
+
# carries the window start (`after`, frozen on the first page so all pages query
|
|
26
|
+
# the same window) plus whatever native pagination token the API uses (Luma's
|
|
27
|
+
# next_cursor, YouTube's pageToken + resolved playlist, Contentful's skip). It's a
|
|
28
|
+
# small JSON blob threaded through Temporal as the spine's opaque cursor string.
|
|
29
|
+
|
|
30
|
+
def pack_cursor(**fields: Any) -> str:
|
|
31
|
+
return json.dumps(fields)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def unpack_cursor(cursor: str) -> dict[str, Any]:
|
|
35
|
+
return json.loads(cursor)
|
|
36
|
+
|
|
37
|
+
# Canonical neutral property names every content-style source emits.
|
|
38
|
+
P_TYPE = "Type"
|
|
39
|
+
P_SOURCE = "Source"
|
|
40
|
+
P_SOURCE_ID = "Source ID"
|
|
41
|
+
P_URL = "URL"
|
|
42
|
+
P_DATE = "Date"
|
|
43
|
+
P_STATUS = "Status"
|
|
44
|
+
P_AUTHOR = "Author"
|
|
45
|
+
P_AUTHORS = "Authors"
|
|
46
|
+
|
|
47
|
+
_MAX_TEXT = 2000
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def content_record(
|
|
51
|
+
*,
|
|
52
|
+
primary_key: str,
|
|
53
|
+
title_property: str,
|
|
54
|
+
title: str,
|
|
55
|
+
item_type: str,
|
|
56
|
+
source: str,
|
|
57
|
+
url: str | None = None,
|
|
58
|
+
date: str | None = None,
|
|
59
|
+
status: str = "Published",
|
|
60
|
+
author: str = "",
|
|
61
|
+
authors: list[str] | None = None,
|
|
62
|
+
extra: dict[str, Any] | None = None,
|
|
63
|
+
) -> Record:
|
|
64
|
+
"""Build a Record with the shared content columns (+ any source-specific
|
|
65
|
+
`extra`). `primary_key` is also written as the Source ID column. `title`/
|
|
66
|
+
`author` are length-capped here so each source doesn't repeat that."""
|
|
67
|
+
props: dict[str, Any] = {
|
|
68
|
+
title_property: (title or "")[:_MAX_TEXT],
|
|
69
|
+
P_TYPE: item_type,
|
|
70
|
+
P_SOURCE: source,
|
|
71
|
+
P_SOURCE_ID: primary_key,
|
|
72
|
+
P_URL: url,
|
|
73
|
+
P_DATE: date,
|
|
74
|
+
P_STATUS: status,
|
|
75
|
+
P_AUTHOR: (author or "")[:_MAX_TEXT],
|
|
76
|
+
P_AUTHORS: authors or [],
|
|
77
|
+
}
|
|
78
|
+
if extra:
|
|
79
|
+
props.update(extra)
|
|
80
|
+
return Record(primary_key=primary_key, properties=props)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Contentful connector: entries by content type, BOTH directions.
|
|
2
|
+
|
|
3
|
+
`ContentfulSource` reads (CDA preferred, CMA fallback — the only mode that sees
|
|
4
|
+
drafts); `ContentfulDestination` writes via the CMA (create/update/publish).
|
|
5
|
+
Source keep/drop policy for shared types belongs in your enrich/transform hook
|
|
6
|
+
(see ContentfulEntryContext). The destination takes a required `LinkStore`
|
|
7
|
+
(primary_key -> entry id) since neutral property names don't match content-model
|
|
8
|
+
field ids — see the boundary doctrine in CONTRIBUTING.
|
|
9
|
+
|
|
10
|
+
Requires the `contentful` extra: pip install "durable-sync[contentful]"
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from durable_sync.connectors.contentful.destination import ContentfulDestination
|
|
15
|
+
from durable_sync.connectors.contentful.mcp_destination import ContentfulMcpDestination
|
|
16
|
+
from durable_sync.connectors.contentful.source import (
|
|
17
|
+
ContentfulConfig,
|
|
18
|
+
ContentfulEntryContext,
|
|
19
|
+
ContentfulSource,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"ContentfulSource", "ContentfulConfig", "ContentfulEntryContext",
|
|
24
|
+
"ContentfulDestination", "ContentfulMcpDestination",
|
|
25
|
+
]
|