durable-sync 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. durable_sync/__init__.py +26 -0
  2. durable_sync/activities.py +156 -0
  3. durable_sync/auth/__init__.py +8 -0
  4. durable_sync/auth/oauth/__init__.py +18 -0
  5. durable_sync/auth/oauth/flow.py +183 -0
  6. durable_sync/auth/oauth/refresh.py +58 -0
  7. durable_sync/auth/oauth/store.py +36 -0
  8. durable_sync/auth/oauth/token.py +36 -0
  9. durable_sync/auth/oauth/workflow.py +172 -0
  10. durable_sync/bootstrap.py +44 -0
  11. durable_sync/codec.py +80 -0
  12. durable_sync/config.py +35 -0
  13. durable_sync/connectors/__init__.py +14 -0
  14. durable_sync/connectors/asana/__init__.py +13 -0
  15. durable_sync/connectors/asana/destination.py +213 -0
  16. durable_sync/connectors/content.py +80 -0
  17. durable_sync/connectors/contentful/__init__.py +25 -0
  18. durable_sync/connectors/contentful/api.py +285 -0
  19. durable_sync/connectors/contentful/bootstrap.py +102 -0
  20. durable_sync/connectors/contentful/describe.py +61 -0
  21. durable_sync/connectors/contentful/destination.py +145 -0
  22. durable_sync/connectors/contentful/encode.py +49 -0
  23. durable_sync/connectors/contentful/introspect.py +69 -0
  24. durable_sync/connectors/contentful/mcp.py +95 -0
  25. durable_sync/connectors/contentful/mcp_destination.py +137 -0
  26. durable_sync/connectors/contentful/oauth.py +27 -0
  27. durable_sync/connectors/contentful/prove.py +51 -0
  28. durable_sync/connectors/contentful/source.py +192 -0
  29. durable_sync/connectors/contentful/start.py +46 -0
  30. durable_sync/connectors/contentful/store.py +25 -0
  31. durable_sync/connectors/contentful/token.py +13 -0
  32. durable_sync/connectors/contentful/token_check.py +42 -0
  33. durable_sync/connectors/github/__init__.py +33 -0
  34. durable_sync/connectors/github/api.py +169 -0
  35. durable_sync/connectors/github/source.py +230 -0
  36. durable_sync/connectors/luma/__init__.py +20 -0
  37. durable_sync/connectors/luma/api.py +121 -0
  38. durable_sync/connectors/luma/destination.py +128 -0
  39. durable_sync/connectors/luma/source.py +155 -0
  40. durable_sync/connectors/multi.py +78 -0
  41. durable_sync/connectors/notion/__init__.py +20 -0
  42. durable_sync/connectors/notion/bootstrap.py +97 -0
  43. durable_sync/connectors/notion/client.py +133 -0
  44. durable_sync/connectors/notion/destination.py +270 -0
  45. durable_sync/connectors/notion/oauth.py +25 -0
  46. durable_sync/connectors/notion/prove.py +57 -0
  47. durable_sync/connectors/notion/source.py +136 -0
  48. durable_sync/connectors/notion/start.py +46 -0
  49. durable_sync/connectors/notion/store.py +25 -0
  50. durable_sync/connectors/notion/token.py +13 -0
  51. durable_sync/connectors/youtube/__init__.py +13 -0
  52. durable_sync/connectors/youtube/api.py +122 -0
  53. durable_sync/connectors/youtube/source.py +152 -0
  54. durable_sync/core.py +210 -0
  55. durable_sync/env.py +55 -0
  56. durable_sync/http.py +71 -0
  57. durable_sync/linkstore.py +88 -0
  58. durable_sync/route.py +86 -0
  59. durable_sync/temporal_client.py +48 -0
  60. durable_sync/transport/__init__.py +12 -0
  61. durable_sync/transport/mcp.py +77 -0
  62. durable_sync/worker.py +109 -0
  63. durable_sync/workflows/__init__.py +9 -0
  64. durable_sync/workflows/sync.py +208 -0
  65. durable_sync-0.1.0.dist-info/METADATA +310 -0
  66. durable_sync-0.1.0.dist-info/RECORD +69 -0
  67. durable_sync-0.1.0.dist-info/WHEEL +5 -0
  68. durable_sync-0.1.0.dist-info/licenses/LICENSE +21 -0
  69. durable_sync-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,172 @@
1
+ """OAuthTokenWorkflow — the entity workflow that owns a rotating OAuth refresh
2
+ token (provider-agnostic).
3
+
4
+ Why a workflow and not a cron job + a file:
5
+ - It's the SINGLE owner of the rotating refresh token, so refreshes are
6
+ serialized by construction — the concurrent-refresh `invalid_grant` race that
7
+ rotating-refresh-token providers warn about can't happen.
8
+ - Its state (the refresh token) is durable across worker restarts.
9
+ - It hands out fresh access tokens via @workflow.query, which is NOT recorded in
10
+ history — so activities fetch a token without it touching the event log. (Pair
11
+ with the encryption codec to protect the token in workflow state.)
12
+
13
+ Start one per provider/account, with the id the destination expects (e.g.
14
+ config.NOTION_AUTH_WORKFLOW_ID). A bootstrap captures the initial refresh token;
15
+ from then on this runs unattended, refreshing ~5 min before expiry.
16
+ """
17
+ from __future__ import annotations
18
+
19
+ from dataclasses import dataclass
20
+ from datetime import timedelta
21
+
22
+ from temporalio import workflow
23
+ from temporalio.common import RetryPolicy
24
+ from temporalio.exceptions import ApplicationError
25
+
26
+ with workflow.unsafe.imports_passed_through():
27
+ from durable_sync.auth.oauth.refresh import RefreshInput, RefreshOutput, refresh_oauth_token
28
+
29
+ # Refresh this long before the access token's stated expiry.
30
+ _REFRESH_SKEW = timedelta(minutes=5)
31
+ # Continue-as-new after this many refreshes to keep event history small.
32
+ _REFRESHES_BEFORE_CONTINUE = 24
33
+ # Back-off between retries when a refresh fails for a TRANSIENT reason (the token
34
+ # endpoint is down) — so the workflow self-heals instead of giving up.
35
+ _TRANSIENT_BACKOFF = timedelta(seconds=60)
36
+
37
+
38
+ def _is_auth_failure(err: BaseException | None) -> bool:
39
+ """The refresh activity raises a non-retryable ApplicationError(type=AuthError)
40
+ when the refresh token is dead (mirrors sync_records). Type-only check, so it's
41
+ pure/deterministic and safe in the workflow."""
42
+ while err is not None:
43
+ if isinstance(err, ApplicationError) and err.type == "AuthError":
44
+ return True
45
+ err = err.__cause__
46
+ return False
47
+
48
+
49
+ @dataclass
50
+ class AuthParams:
51
+ client_id: str
52
+ token_endpoint: str
53
+ refresh_token: str
54
+ # Carried across continue-as-new so the count survives history truncation.
55
+ refreshes_so_far: int = 0
56
+ # Carried across continue-as-new so the query stays warm at the boundary
57
+ # (otherwise the new run starts with an empty token until its first refresh,
58
+ # and any sync querying right then gets an empty token). The codec encrypts it
59
+ # in history — the reason the codec exists.
60
+ access_token: str = ""
61
+
62
+
63
+ @workflow.defn
64
+ class OAuthTokenWorkflow:
65
+ @workflow.init
66
+ def __init__(self, params: AuthParams) -> None:
67
+ self._access_token = params.access_token
68
+ self._refresh_token = params.refresh_token
69
+ self._refreshes = params.refreshes_so_far
70
+ # Pause/recover state (mirrors SourceSyncWorkflow) — a dead refresh token
71
+ # parks the workflow instead of crashing it, so it stays queryable and is
72
+ # resumable via the `reauthorize` signal without re-creating it.
73
+ self._paused = False
74
+ self._last_error: str | None = None
75
+ self._last_refresh: str | None = None
76
+ self._new_refresh_token = "" # supplied by reauthorize after a re-bootstrap
77
+
78
+ @workflow.run
79
+ async def run(self, params: AuthParams) -> None:
80
+ while True:
81
+ try:
82
+ out: RefreshOutput = await workflow.execute_activity(
83
+ refresh_oauth_token,
84
+ RefreshInput(
85
+ client_id=params.client_id,
86
+ token_endpoint=params.token_endpoint,
87
+ refresh_token=self._refresh_token,
88
+ ),
89
+ start_to_close_timeout=timedelta(seconds=30),
90
+ retry_policy=RetryPolicy(maximum_attempts=5),
91
+ )
92
+ except Exception as e: # noqa: BLE001 - classify, never let the workflow die
93
+ self._last_error = str(e)
94
+ if _is_auth_failure(e):
95
+ # Refresh token revoked/expired/spent — only a human re-auth fixes
96
+ # it. Park until `reauthorize` supplies a fresh refresh token.
97
+ self._paused = True
98
+ workflow.logger.error(
99
+ "OAuth refresh permanently rejected for %s — pausing until "
100
+ "`reauthorize` with a fresh refresh token.", params.client_id,
101
+ )
102
+ await workflow.wait_condition(lambda: not self._paused)
103
+ if self._new_refresh_token:
104
+ self._refresh_token = self._new_refresh_token
105
+ self._new_refresh_token = ""
106
+ else:
107
+ # Transient (endpoint down, network) — back off and retry rather
108
+ # than terminating the only source of access tokens.
109
+ workflow.logger.warning(
110
+ "OAuth refresh transient failure for %s; retrying after backoff.",
111
+ params.client_id,
112
+ )
113
+ await workflow.sleep(_TRANSIENT_BACKOFF)
114
+ continue
115
+
116
+ self._access_token = out.access_token
117
+ self._refresh_token = out.refresh_token # rotated — keep the newest
118
+ self._refreshes += 1
119
+ self._last_refresh = workflow.now().isoformat()
120
+ self._last_error = None
121
+
122
+ sleep_for = timedelta(seconds=out.expires_in) - _REFRESH_SKEW
123
+ if sleep_for <= timedelta(0):
124
+ sleep_for = timedelta(seconds=max(out.expires_in // 2, 30))
125
+ await workflow.sleep(sleep_for)
126
+
127
+ # Roll history only AFTER sleeping until the token is near expiry, so
128
+ # the fresh run's immediate refresh is the one that's actually due —
129
+ # not a wasted back-to-back rotation. Carries the latest refresh AND
130
+ # access token so the new run picks up exactly where this one left off
131
+ # (no empty-token query gap at the boundary).
132
+ if self._refreshes >= _REFRESHES_BEFORE_CONTINUE:
133
+ await workflow.wait_condition(workflow.all_handlers_finished)
134
+ workflow.continue_as_new(
135
+ AuthParams(
136
+ client_id=params.client_id,
137
+ token_endpoint=params.token_endpoint,
138
+ refresh_token=self._refresh_token,
139
+ refreshes_so_far=0,
140
+ access_token=self._access_token,
141
+ )
142
+ )
143
+
144
+ # --- Signals (flip flags only; non-async; tolerate stray payloads) -------
145
+
146
+ @workflow.signal
147
+ def reauthorize(self, refresh_token: str = "", *_: object) -> None:
148
+ """Resume after a pause. Pass a fresh refresh token (from re-running the
149
+ provider's bootstrap) when the old one was revoked/expired; a bare signal
150
+ just retries with the current token (e.g. to recover from a long outage)."""
151
+ if refresh_token:
152
+ self._new_refresh_token = refresh_token
153
+ self._paused = False
154
+
155
+ # --- Queries (read-only) -------------------------------------------------
156
+
157
+ @workflow.query
158
+ def get_access_token(self) -> str:
159
+ """Current valid access token. Queries aren't written to history, so
160
+ callers get the secret without it ever touching the event log."""
161
+ return self._access_token
162
+
163
+ @workflow.query
164
+ def status(self) -> dict:
165
+ """Operational state (no secret) — is it healthy, and if not, why."""
166
+ return {
167
+ "paused": self._paused,
168
+ "refreshes": self._refreshes,
169
+ "last_refresh": self._last_refresh,
170
+ "last_error": self._last_error,
171
+ "has_token": bool(self._access_token),
172
+ }
@@ -0,0 +1,44 @@
1
+ """Start one entity workflow per source unit. Idempotent: re-running won't
2
+ disturb a workflow that's already up (USE_EXISTING), so it doubles as a reconcile.
3
+
4
+ from durable_sync.bootstrap import start_sources
5
+ await start_sources(SOURCE)
6
+
7
+ No Schedule is needed — each workflow's own timer loop is the periodicity. Drive
8
+ or inspect them by id, e.g.:
9
+
10
+ temporal workflow signal --workflow-id "durable-sync:org:temporal-community" \
11
+ --name sync_now --input '[]'
12
+ temporal workflow query --workflow-id "durable-sync:org:temporal-community" \
13
+ --type status
14
+ """
15
+ from __future__ import annotations
16
+
17
+ from temporalio.client import Client
18
+ from temporalio.common import WorkflowIDConflictPolicy
19
+
20
+ from durable_sync import config
21
+ from durable_sync.core import Source
22
+ from durable_sync.temporal_client import connect
23
+ from durable_sync.workflows.sync import SourceState, SourceSyncWorkflow
24
+
25
+
26
+ async def start_sources(
27
+ source: Source,
28
+ *,
29
+ client: Client | None = None,
30
+ task_queue: str | None = None,
31
+ id_prefix: str = "durable-sync",
32
+ ) -> None:
33
+ client = client or await connect()
34
+ tq = task_queue or config.TASK_QUEUE
35
+ for spec in source.specs():
36
+ wf_id = f"{id_prefix}:{spec.key}"
37
+ await client.start_workflow(
38
+ SourceSyncWorkflow.run,
39
+ SourceState(spec=spec),
40
+ id=wf_id,
41
+ task_queue=tq,
42
+ id_conflict_policy=WorkflowIDConflictPolicy.USE_EXISTING,
43
+ )
44
+ print(f"ensured entity workflow: {wf_id}")
durable_sync/codec.py ADDED
@@ -0,0 +1,80 @@
1
+ """Encryption codec for Temporal payloads (AES-256-GCM).
2
+
3
+ When a destination's auth uses a workflow-owned token (e.g. the Notion
4
+ auth workflow), the refresh token lives in that workflow's state and in the
5
+ refresh activity's input/output — all of which Temporal persists in event
6
+ history. This codec encrypts every payload's bytes before they leave the worker,
7
+ so secrets are ciphertext at rest in the cluster and the Web UI.
8
+
9
+ Opt-in: set DURABLE_SYNC_ENC_KEY to a base64-encoded 32-byte key (generate one
10
+ with `python -m durable_sync.codec`). With no key set, payloads are unencrypted
11
+ (fine for local dev). Wire `encryption_codec()` into your Temporal client's
12
+ data_converter so encode/decode stays consistent across the whole system.
13
+
14
+ Requires the `crypto` extra: pip install "durable-sync[crypto]"
15
+ """
16
+ from __future__ import annotations
17
+
18
+ import base64
19
+ import os
20
+ from typing import Iterable
21
+
22
+ from temporalio.api.common.v1 import Payload
23
+ from temporalio.converter import PayloadCodec
24
+
25
+ _ENCODING = b"binary/encrypted"
26
+ _KEY_ENV = "DURABLE_SYNC_ENC_KEY"
27
+
28
+
29
+ def load_key() -> bytes | None:
30
+ raw = os.getenv(_KEY_ENV, "")
31
+ if not raw:
32
+ return None
33
+ key = base64.b64decode(raw)
34
+ if len(key) != 32:
35
+ raise ValueError(f"{_KEY_ENV} must decode to 32 bytes (got {len(key)}).")
36
+ return key
37
+
38
+
39
+ class EncryptionCodec(PayloadCodec):
40
+ def __init__(self, key: bytes) -> None:
41
+ from cryptography.hazmat.primitives.ciphers.aead import AESGCM
42
+ self._aesgcm = AESGCM(key)
43
+
44
+ async def encode(self, payloads: Iterable[Payload]) -> list[Payload]:
45
+ return [
46
+ Payload(
47
+ metadata={"encoding": _ENCODING},
48
+ data=self._encrypt(p.SerializeToString()),
49
+ )
50
+ for p in payloads
51
+ ]
52
+
53
+ async def decode(self, payloads: Iterable[Payload]) -> list[Payload]:
54
+ out: list[Payload] = []
55
+ for p in payloads:
56
+ if p.metadata.get("encoding") != _ENCODING:
57
+ out.append(p) # not ours (e.g. written before encryption was on)
58
+ continue
59
+ decrypted = Payload()
60
+ decrypted.ParseFromString(self._decrypt(p.data))
61
+ out.append(decrypted)
62
+ return out
63
+
64
+ def _encrypt(self, data: bytes) -> bytes:
65
+ nonce = os.urandom(12)
66
+ return nonce + self._aesgcm.encrypt(nonce, data, None)
67
+
68
+ def _decrypt(self, data: bytes) -> bytes:
69
+ return self._aesgcm.decrypt(data[:12], data[12:], None)
70
+
71
+
72
+ def encryption_codec() -> EncryptionCodec | None:
73
+ """The configured codec, or None if DURABLE_SYNC_ENC_KEY is unset (dev mode)."""
74
+ key = load_key()
75
+ return EncryptionCodec(key) if key else None
76
+
77
+
78
+ if __name__ == "__main__":
79
+ # Generate a key to put in your env as DURABLE_SYNC_ENC_KEY=...
80
+ print(base64.b64encode(os.urandom(32)).decode())
durable_sync/config.py ADDED
@@ -0,0 +1,35 @@
1
+ """Runtime + connection config (generic). Side-effect-free: imported indirectly
2
+ into the workflow sandbox, so no IO / no import-time failures.
3
+
4
+ Integration-specific config (which orgs, which Notion DB, Asana project) lives in
5
+ the Source/Destination you wire up — not here.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import os
10
+
11
+ TASK_QUEUE = os.environ.get("DURABLE_SYNC_TASK_QUEUE", "durable-sync")
12
+
13
+ # Worker Versioning — OPT-IN, off by default so local/simple runs need zero setup.
14
+ # Set DURABLE_SYNC_BUILD_ID (e.g. a git SHA) in production: a redeploy whose
15
+ # workflow code changed then only affects NEW/continued executions, while in-flight
16
+ # histories drain on the old build — the safe way to evolve the long-lived entity
17
+ # workflows (SourceSyncWorkflow / OAuthTokenWorkflow) without non-determinism
18
+ # errors. When unset, all workflows run unversioned exactly as before. The
19
+ # alternative for in-place changes is workflow.patched() (see CONTRIBUTING).
20
+ BUILD_ID = os.environ.get("DURABLE_SYNC_BUILD_ID", "")
21
+ DEPLOYMENT_NAME = os.environ.get("DURABLE_SYNC_DEPLOYMENT_NAME", "durable-sync")
22
+
23
+ # Temporal connection (defaults to a local dev server; set these for Cloud).
24
+ TEMPORAL_ADDRESS = os.environ.get("TEMPORAL_ADDRESS", "localhost:7233")
25
+ TEMPORAL_NAMESPACE = os.environ.get("TEMPORAL_NAMESPACE", "default")
26
+ TEMPORAL_API_KEY = os.environ.get("TEMPORAL_API_KEY") # set for Temporal Cloud
27
+
28
+ # Ids of the workflows that own each provider's OAuth token
29
+ # (auth.workflow.OAuthTokenWorkflow, started via connectors.<provider>.start).
30
+ NOTION_AUTH_WORKFLOW_ID = os.environ.get(
31
+ "DURABLE_SYNC_NOTION_AUTH_WORKFLOW_ID", "durable-sync:notion-auth"
32
+ )
33
+ CONTENTFUL_AUTH_WORKFLOW_ID = os.environ.get(
34
+ "DURABLE_SYNC_CONTENTFUL_AUTH_WORKFLOW_ID", "durable-sync:contentful-auth"
35
+ )
@@ -0,0 +1,14 @@
1
+ """Connectors — one subpackage per external system, each exposing the halves it
2
+ supports: a `source.py` (read: implements `Source`), a `destination.py` (write:
3
+ implements `Destination`), or both, sharing one client + auth.
4
+
5
+ Grouped by SYSTEM rather than by direction because a system is often both (Notion
6
+ is read in one route and written in another), and its read/write sides share a
7
+ transport (e.g. Notion's MCP client + OAuth). The neutral `Source`/`Destination`
8
+ protocols still live in `durable_sync.core`; this package is only packaging.
9
+
10
+ Reference systems: github / luma / youtube / contentful (sources today),
11
+ notion / asana (destinations today). `content.py` holds the shared neutral column
12
+ vocabulary for content-style sources; `multi.py` fans several sources onto one
13
+ worker. Import-free on purpose (a connector may contain workflow code).
14
+ """
@@ -0,0 +1,13 @@
1
+ """Asana destination: direct REST + a self-serve Personal Access Token.
2
+
3
+ The second reference destination, deliberately different from Notion's MCP/OAuth:
4
+ plain REST, a PAT any user can mint (no admin, no workflow). If the Destination
5
+ protocol holds here too, it's neither transport- nor auth-shaped.
6
+
7
+ Requires the `asana` extra: pip install "durable-sync[asana]"
8
+ """
9
+ from __future__ import annotations
10
+
11
+ from durable_sync.connectors.asana.destination import AsanaDestination
12
+
13
+ __all__ = ["AsanaDestination"]
@@ -0,0 +1,213 @@
1
+ """Asana destination — direct REST, self-serve PAT.
2
+
3
+ Why this exists: it's the abstraction's stress test. Notion let property names BE
4
+ column names; Asana tasks have a FIXED schema (name, notes, due_on, completed) plus
5
+ custom fields addressed by gid — so a neutral Record needs an explicit
6
+ destination-owned `field_map`. That mapping living here (not in the source) is
7
+ exactly the seam working as intended.
8
+
9
+ Idempotency: the source's primary_key is stored in the task's `external.gid`
10
+ (Asana's purpose-built external-system handle). `query_existing_ids` lists the
11
+ project's tasks with `opt_fields=external` and maps external.gid -> task gid.
12
+
13
+ Auth: a Personal Access Token (Bearer). Self-serve, no admin, no auth workflow —
14
+ so this destination defines no aux_workflows/aux_activities.
15
+
16
+ Requires the `asana` extra: pip install "durable-sync[asana]"
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import asyncio
21
+ import datetime as dt
22
+ import os
23
+ from contextlib import asynccontextmanager
24
+ from typing import Any, Awaitable, Callable, AsyncIterator
25
+
26
+ import httpx
27
+
28
+ from durable_sync.core import DestinationHTTPError, Record, auth_error_in_chain
29
+ from durable_sync.http import request_with_retry
30
+
31
+ ASANA_API = "https://app.asana.com/api/1.0"
32
+ _MAX_NOTES = 65000
33
+ _MAX_RETRIES = 6
34
+ _BACKOFF_BASE_SECONDS = 1.0
35
+
36
+ # Native task fields a field_map value may target directly (everything else must
37
+ # be a custom field). Kept small + explicit on purpose.
38
+ _NATIVE_FIELDS = {"name", "notes", "html_notes", "due_on", "due_at",
39
+ "start_on", "completed", "assignee", "resource_subtype"}
40
+
41
+ TokenProvider = Callable[[], Awaitable[str]]
42
+ # A field_map value: a native field name ("due_on") OR {"custom_field": "<gid>"}.
43
+ FieldTarget = Any
44
+
45
+
46
+ class AsanaDestination:
47
+ name = "asana"
48
+
49
+ def __init__(
50
+ self,
51
+ project_gid: str,
52
+ *,
53
+ title_property: str = "Name",
54
+ body_field: str = "notes", # native field record.body maps to
55
+ field_map: dict[str, FieldTarget] | None = None,
56
+ create_only_properties: set[str] | None = None,
57
+ token_provider: TokenProvider | None = None,
58
+ token_env: str = "ASANA_PAT",
59
+ synced_custom_field_gid: str | None = None, # optional date CF to stamp
60
+ pacing_seconds: float = 0.0,
61
+ ):
62
+ self.project_gid = project_gid
63
+ self.title_property = title_property
64
+ self.body_field = body_field
65
+ # record-property -> Asana target. Unmapped properties are DROPPED
66
+ # (Asana can't hold arbitrary columns); title/body are handled separately.
67
+ self.field_map = field_map or {}
68
+ self.create_only_properties = create_only_properties or set()
69
+ self.token_env = token_env
70
+ self._token_provider = token_provider or self._env_token
71
+ self.synced_custom_field_gid = synced_custom_field_gid
72
+ self.pacing_seconds = pacing_seconds
73
+
74
+ async def _env_token(self) -> str:
75
+ return os.environ.get(self.token_env, "")
76
+
77
+ @property
78
+ def configured(self) -> bool:
79
+ return bool(self.project_gid)
80
+
81
+ @property
82
+ def config_hint(self) -> str:
83
+ return f"ASANA project gid / {self.token_env} unset"
84
+
85
+ @asynccontextmanager
86
+ async def connect(self) -> AsyncIterator["_AsanaSession"]:
87
+ token = await self._token_provider()
88
+ headers = {"Authorization": f"Bearer {token}", "Accept": "application/json"}
89
+ async with httpx.AsyncClient(base_url=ASANA_API, headers=headers, timeout=30) as client:
90
+ yield _AsanaSession(client, self)
91
+
92
+ @staticmethod
93
+ def is_auth_error(err: BaseException) -> bool:
94
+ """A rejected PAT (401/403). Delegates to the shared matcher so we get the
95
+ word-boundary code check for free — Asana errors carry gids/request-ids,
96
+ and a bare `"401" in msg` would false-positive on one. "not authorized" is
97
+ Asana's own phrasing for a permission failure."""
98
+ return auth_error_in_chain(err, extra_needles=("not authorized",))
99
+
100
+
101
+ class _AsanaSession:
102
+ def __init__(self, client: httpx.AsyncClient, dest: AsanaDestination):
103
+ self._client = client
104
+ self._d = dest
105
+
106
+ async def _request(self, method: str, path: str, *, params=None, json=None) -> dict:
107
+ # Shared backoff (honors Retry-After); we keep the raise here so the error
108
+ # text carries the status for is_auth_error to classify.
109
+ r = await request_with_retry(
110
+ self._client, method, path, params=params, json=json,
111
+ max_attempts=_MAX_RETRIES, base_delay=_BACKOFF_BASE_SECONDS,
112
+ )
113
+ if r.status_code >= 400:
114
+ raise DestinationHTTPError(
115
+ r.status_code, f"Asana {method} {path} -> {r.status_code}: {r.text[:600]}"
116
+ )
117
+ return r.json() if r.content else {}
118
+
119
+ async def query_existing_ids(self) -> dict[str, str]:
120
+ """{ external.gid (== our primary_key) -> task gid } for the project."""
121
+ mapping: dict[str, str] = {}
122
+ params: dict[str, Any] = {
123
+ "project": self._d.project_gid, "opt_fields": "external", "limit": 100,
124
+ }
125
+ while True:
126
+ resp = await self._request("GET", "/tasks", params=params)
127
+ for t in resp.get("data", []):
128
+ ext = t.get("external") or {}
129
+ gid = ext.get("gid")
130
+ if gid and t.get("gid"):
131
+ mapping[gid] = t["gid"]
132
+ nxt = resp.get("next_page")
133
+ if not nxt or not nxt.get("offset"):
134
+ break
135
+ params["offset"] = nxt["offset"]
136
+ return mapping
137
+
138
+ async def create(self, record: Record, synced_at: dt.datetime) -> bool:
139
+ data = _encode_task(self._d, record, synced_at, creating=True)
140
+ await self._request("POST", "/tasks", json={"data": data})
141
+ await self._pace()
142
+ return True
143
+
144
+ async def update(self, existing_id: str, record: Record, synced_at: dt.datetime) -> bool:
145
+ data = _encode_task(self._d, record, synced_at, creating=False)
146
+ await self._request("PUT", f"/tasks/{existing_id}", json={"data": data})
147
+ await self._pace()
148
+ return True
149
+
150
+ async def _pace(self) -> None:
151
+ if self._d.pacing_seconds > 0:
152
+ await asyncio.sleep(self._d.pacing_seconds)
153
+
154
+
155
+ def _encode_task(
156
+ dest: AsanaDestination, record: Record, synced_at: dt.datetime, *, creating: bool
157
+ ) -> dict[str, Any]:
158
+ """Neutral Record -> Asana task `data`. Pure (no IO) so it's unit-testable.
159
+
160
+ title_property -> name; record.body -> body_field; mapped props -> native
161
+ fields or custom fields; UNMAPPED props are dropped (Asana has no arbitrary
162
+ columns). On create we also set projects + external (idempotency key)."""
163
+ props = record.properties
164
+ data: dict[str, Any] = {}
165
+ custom: dict[str, Any] = {}
166
+
167
+ name = props.get(dest.title_property)
168
+ if name is not None:
169
+ data["name"] = str(name)
170
+ if record.body:
171
+ data[dest.body_field] = record.body[:_MAX_NOTES]
172
+
173
+ for key, val in props.items():
174
+ if key == dest.title_property or val is None:
175
+ continue
176
+ if not creating and key in dest.create_only_properties:
177
+ continue
178
+ target = dest.field_map.get(key)
179
+ if target is None:
180
+ continue # unmapped -> dropped (logged at debug by caller if desired)
181
+ if isinstance(target, dict) and "custom_field" in target:
182
+ custom[target["custom_field"]] = _coerce(val)
183
+ elif target in _NATIVE_FIELDS:
184
+ data[target] = _coerce_native(target, val)
185
+
186
+ if dest.synced_custom_field_gid:
187
+ custom[dest.synced_custom_field_gid] = synced_at.date().isoformat()
188
+ if custom:
189
+ data["custom_fields"] = custom
190
+ if creating:
191
+ data["projects"] = [dest.project_gid]
192
+ data["external"] = {"gid": record.primary_key} # idempotency handle
193
+ return data
194
+
195
+
196
+ def _coerce(val: Any) -> Any:
197
+ """Custom-field value: numbers pass through; lists join (enum option gids are
198
+ app-specific, out of scope); everything else stringifies."""
199
+ if isinstance(val, bool):
200
+ return str(val)
201
+ if isinstance(val, (int, float)):
202
+ return val
203
+ if isinstance(val, (list, tuple)):
204
+ return ", ".join(str(v) for v in val)
205
+ return str(val)
206
+
207
+
208
+ def _coerce_native(field: str, val: Any) -> Any:
209
+ if field == "completed":
210
+ return bool(val)
211
+ if field in ("due_on", "start_on"):
212
+ return str(val)[:10] # YYYY-MM-DD
213
+ return str(val)
@@ -0,0 +1,80 @@
1
+ """Shared property vocabulary for content-style sources (events, videos, CMS
2
+ entries, …).
3
+
4
+ These sources all map an external item onto the SAME neutral columns, so the
5
+ names live here ONCE instead of being hand-typed in each source's `_to_record` —
6
+ where they would silently drift (the same failure mode we hit with `is_auth_error`,
7
+ now fixed by one shared matcher). A destination or transform can import the `P_*`
8
+ constants to address the same columns without re-typing the strings.
9
+
10
+ Opt-in: `GitHubSource` deliberately does NOT use this — its columns (Stars, Forks,
11
+ License, …) are repo-specific. A source uses `content_record` only when the shared
12
+ content shape genuinely fits; per-source logic (URL building, status rules, author
13
+ resolution) still lives in that source — that's real variation, not duplication.
14
+ """
15
+ from __future__ import annotations
16
+
17
+ import json
18
+ from typing import Any
19
+
20
+ from durable_sync.core import Record
21
+
22
+
23
+ # --- paging cursor (shared by the windowed content sources) -----------------
24
+ # Each content source paginates a rolling time window, so its `fetch_page` cursor
25
+ # carries the window start (`after`, frozen on the first page so all pages query
26
+ # the same window) plus whatever native pagination token the API uses (Luma's
27
+ # next_cursor, YouTube's pageToken + resolved playlist, Contentful's skip). It's a
28
+ # small JSON blob threaded through Temporal as the spine's opaque cursor string.
29
+
30
+ def pack_cursor(**fields: Any) -> str:
31
+ return json.dumps(fields)
32
+
33
+
34
+ def unpack_cursor(cursor: str) -> dict[str, Any]:
35
+ return json.loads(cursor)
36
+
37
+ # Canonical neutral property names every content-style source emits.
38
+ P_TYPE = "Type"
39
+ P_SOURCE = "Source"
40
+ P_SOURCE_ID = "Source ID"
41
+ P_URL = "URL"
42
+ P_DATE = "Date"
43
+ P_STATUS = "Status"
44
+ P_AUTHOR = "Author"
45
+ P_AUTHORS = "Authors"
46
+
47
+ _MAX_TEXT = 2000
48
+
49
+
50
+ def content_record(
51
+ *,
52
+ primary_key: str,
53
+ title_property: str,
54
+ title: str,
55
+ item_type: str,
56
+ source: str,
57
+ url: str | None = None,
58
+ date: str | None = None,
59
+ status: str = "Published",
60
+ author: str = "",
61
+ authors: list[str] | None = None,
62
+ extra: dict[str, Any] | None = None,
63
+ ) -> Record:
64
+ """Build a Record with the shared content columns (+ any source-specific
65
+ `extra`). `primary_key` is also written as the Source ID column. `title`/
66
+ `author` are length-capped here so each source doesn't repeat that."""
67
+ props: dict[str, Any] = {
68
+ title_property: (title or "")[:_MAX_TEXT],
69
+ P_TYPE: item_type,
70
+ P_SOURCE: source,
71
+ P_SOURCE_ID: primary_key,
72
+ P_URL: url,
73
+ P_DATE: date,
74
+ P_STATUS: status,
75
+ P_AUTHOR: (author or "")[:_MAX_TEXT],
76
+ P_AUTHORS: authors or [],
77
+ }
78
+ if extra:
79
+ props.update(extra)
80
+ return Record(primary_key=primary_key, properties=props)
@@ -0,0 +1,25 @@
1
+ """Contentful connector: entries by content type, BOTH directions.
2
+
3
+ `ContentfulSource` reads (CDA preferred, CMA fallback — the only mode that sees
4
+ drafts); `ContentfulDestination` writes via the CMA (create/update/publish).
5
+ Source keep/drop policy for shared types belongs in your enrich/transform hook
6
+ (see ContentfulEntryContext). The destination takes a required `LinkStore`
7
+ (primary_key -> entry id) since neutral property names don't match content-model
8
+ field ids — see the boundary doctrine in CONTRIBUTING.
9
+
10
+ Requires the `contentful` extra: pip install "durable-sync[contentful]"
11
+ """
12
+ from __future__ import annotations
13
+
14
+ from durable_sync.connectors.contentful.destination import ContentfulDestination
15
+ from durable_sync.connectors.contentful.mcp_destination import ContentfulMcpDestination
16
+ from durable_sync.connectors.contentful.source import (
17
+ ContentfulConfig,
18
+ ContentfulEntryContext,
19
+ ContentfulSource,
20
+ )
21
+
22
+ __all__ = [
23
+ "ContentfulSource", "ContentfulConfig", "ContentfulEntryContext",
24
+ "ContentfulDestination", "ContentfulMcpDestination",
25
+ ]