durable-sync 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. durable_sync/__init__.py +26 -0
  2. durable_sync/activities.py +156 -0
  3. durable_sync/auth/__init__.py +8 -0
  4. durable_sync/auth/oauth/__init__.py +18 -0
  5. durable_sync/auth/oauth/flow.py +183 -0
  6. durable_sync/auth/oauth/refresh.py +58 -0
  7. durable_sync/auth/oauth/store.py +36 -0
  8. durable_sync/auth/oauth/token.py +36 -0
  9. durable_sync/auth/oauth/workflow.py +172 -0
  10. durable_sync/bootstrap.py +44 -0
  11. durable_sync/codec.py +80 -0
  12. durable_sync/config.py +35 -0
  13. durable_sync/connectors/__init__.py +14 -0
  14. durable_sync/connectors/asana/__init__.py +13 -0
  15. durable_sync/connectors/asana/destination.py +213 -0
  16. durable_sync/connectors/content.py +80 -0
  17. durable_sync/connectors/contentful/__init__.py +25 -0
  18. durable_sync/connectors/contentful/api.py +285 -0
  19. durable_sync/connectors/contentful/bootstrap.py +102 -0
  20. durable_sync/connectors/contentful/describe.py +61 -0
  21. durable_sync/connectors/contentful/destination.py +145 -0
  22. durable_sync/connectors/contentful/encode.py +49 -0
  23. durable_sync/connectors/contentful/introspect.py +69 -0
  24. durable_sync/connectors/contentful/mcp.py +95 -0
  25. durable_sync/connectors/contentful/mcp_destination.py +137 -0
  26. durable_sync/connectors/contentful/oauth.py +27 -0
  27. durable_sync/connectors/contentful/prove.py +51 -0
  28. durable_sync/connectors/contentful/source.py +192 -0
  29. durable_sync/connectors/contentful/start.py +46 -0
  30. durable_sync/connectors/contentful/store.py +25 -0
  31. durable_sync/connectors/contentful/token.py +13 -0
  32. durable_sync/connectors/contentful/token_check.py +42 -0
  33. durable_sync/connectors/github/__init__.py +33 -0
  34. durable_sync/connectors/github/api.py +169 -0
  35. durable_sync/connectors/github/source.py +230 -0
  36. durable_sync/connectors/luma/__init__.py +20 -0
  37. durable_sync/connectors/luma/api.py +121 -0
  38. durable_sync/connectors/luma/destination.py +128 -0
  39. durable_sync/connectors/luma/source.py +155 -0
  40. durable_sync/connectors/multi.py +78 -0
  41. durable_sync/connectors/notion/__init__.py +20 -0
  42. durable_sync/connectors/notion/bootstrap.py +97 -0
  43. durable_sync/connectors/notion/client.py +133 -0
  44. durable_sync/connectors/notion/destination.py +270 -0
  45. durable_sync/connectors/notion/oauth.py +25 -0
  46. durable_sync/connectors/notion/prove.py +57 -0
  47. durable_sync/connectors/notion/source.py +136 -0
  48. durable_sync/connectors/notion/start.py +46 -0
  49. durable_sync/connectors/notion/store.py +25 -0
  50. durable_sync/connectors/notion/token.py +13 -0
  51. durable_sync/connectors/youtube/__init__.py +13 -0
  52. durable_sync/connectors/youtube/api.py +122 -0
  53. durable_sync/connectors/youtube/source.py +152 -0
  54. durable_sync/core.py +210 -0
  55. durable_sync/env.py +55 -0
  56. durable_sync/http.py +71 -0
  57. durable_sync/linkstore.py +88 -0
  58. durable_sync/route.py +86 -0
  59. durable_sync/temporal_client.py +48 -0
  60. durable_sync/transport/__init__.py +12 -0
  61. durable_sync/transport/mcp.py +77 -0
  62. durable_sync/worker.py +109 -0
  63. durable_sync/workflows/__init__.py +9 -0
  64. durable_sync/workflows/sync.py +208 -0
  65. durable_sync-0.1.0.dist-info/METADATA +310 -0
  66. durable_sync-0.1.0.dist-info/RECORD +69 -0
  67. durable_sync-0.1.0.dist-info/WHEEL +5 -0
  68. durable_sync-0.1.0.dist-info/licenses/LICENSE +21 -0
  69. durable_sync-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,69 @@
1
+ """List a Contentful space's content types + fields, so you can fill in the
2
+ destination's content_type / field_map (and the smoke's CONTENTFUL_SMOKE_* vars).
3
+
4
+ Uses whichever token is set: a Management (CMA) token (api.contentful.com) or a
5
+ read-only Delivery (CDA) token (cdn.contentful.com) — both expose the content
6
+ model. Prints each content type's id and its title field (Contentful's
7
+ `displayField`) in copy-paste-ready form, plus every field and type.
8
+
9
+ CONTENTFUL_SPACE_ID=... CONTENTFUL_CMA_TOKEN=... \
10
+ PYTHONPATH=. python -m durable_sync.connectors.contentful.introspect
11
+
12
+ Requires the `contentful` extra.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import os
17
+ import sys
18
+
19
+ import httpx
20
+
21
+ from durable_sync.env import load_env
22
+
23
+ CDA_BASE = "https://cdn.contentful.com"
24
+ CMA_BASE = "https://api.contentful.com"
25
+
26
+
27
+ def _describe(field: dict) -> str:
28
+ ftype = field.get("type", "?")
29
+ if ftype == "Array":
30
+ items = field.get("items", {})
31
+ ftype = f"Array<{items.get('linkType') or items.get('type') or '?'}>"
32
+ elif ftype == "Link":
33
+ ftype = f"Link<{field.get('linkType', '?')}>"
34
+ return ftype
35
+
36
+
37
+ def main() -> None:
38
+ load_env()
39
+ space = os.environ.get("CONTENTFUL_SPACE_ID")
40
+ env = os.environ.get("CONTENTFUL_ENVIRONMENT", "master")
41
+ cma = os.environ.get("CONTENTFUL_CMA_TOKEN")
42
+ cda = os.environ.get("CONTENTFUL_DELIVERY_TOKEN")
43
+ if not space or not (cma or cda):
44
+ sys.exit("Set CONTENTFUL_SPACE_ID and CONTENTFUL_CMA_TOKEN (or CONTENTFUL_DELIVERY_TOKEN).")
45
+
46
+ base, token = (CMA_BASE, cma) if cma else (CDA_BASE, cda)
47
+ url = f"{base}/spaces/{space}/environments/{env}/content_types"
48
+ r = httpx.get(url, headers={"Authorization": f"Bearer {token}"}, params={"limit": 1000}, timeout=30)
49
+ if r.status_code >= 400:
50
+ sys.exit(f"Contentful {r.status_code}: {r.text[:400]}")
51
+
52
+ items = r.json().get("items", [])
53
+ print(f"# {len(items)} content type(s) in space {space} (env {env}) via "
54
+ f"{'CMA' if cma else 'CDA'}\n")
55
+ for ct in sorted(items, key=lambda c: c.get("sys", {}).get("id", "")):
56
+ ct_id = ct.get("sys", {}).get("id", "?")
57
+ display = ct.get("displayField") or "?" # the field used as the entry title
58
+ print(f"## {ct.get('name', '?')}")
59
+ print(f"CONTENTFUL_SMOKE_CONTENT_TYPE={ct_id}")
60
+ print(f"CONTENTFUL_SMOKE_TITLE_FIELD={display}")
61
+ for field in ct.get("fields", []):
62
+ fid = field.get("id", "?")
63
+ mark = " <- title" if fid == display else ""
64
+ print(f" {fid} : {_describe(field)}{mark}")
65
+ print()
66
+
67
+
68
+ if __name__ == "__main__":
69
+ main()
@@ -0,0 +1,95 @@
1
+ """Contentful over its hosted MCP server (mcp.contentful.com) — the no-admin / SSO
2
+ path, when a static CMA token is blocked and the MCP-OAuth token doesn't work
3
+ against the REST API.
4
+
5
+ Contentful's MCP is AGENT-oriented: tools take clean JSON but return LLM-formatted
6
+ pseudo-XML (with prose prefixes, arrays as repeated elements). So:
7
+ * WRITES are reliable — inputs are clean JSON (fields = {fieldId:{locale:value}},
8
+ same shape as the REST encoder), and we only scrape two scalars from responses:
9
+ the new entry's sys.id (create) and sys.version (for the optimistic-lock update).
10
+ * READS over MCP are fragile (multi-entry XML) — prefer the REST source when you
11
+ have CMA access; this module is the write path.
12
+
13
+ Pairs the generic MCP transport (durable_sync.transport.mcp) with the OAuth binding
14
+ (connectors.contentful.oauth). get_initial_context is called once on open (the
15
+ server requires it first).
16
+ """
17
+ from __future__ import annotations
18
+
19
+ import re
20
+ from contextlib import asynccontextmanager
21
+ from typing import Any, AsyncIterator
22
+
23
+ from durable_sync.transport.mcp import TokenProvider, open_session as _open_session
24
+ from durable_sync.connectors.contentful import oauth
25
+
26
+
27
+ class ContentfulMcp:
28
+ """Write-oriented wrapper over an open Contentful-MCP session."""
29
+
30
+ def __init__(self, session, space_id: str, environment: str):
31
+ self._s = session
32
+ self.space_id = space_id
33
+ self.environment = environment
34
+
35
+ def _base(self) -> dict[str, str]:
36
+ return {"spaceId": self.space_id, "environmentId": self.environment}
37
+
38
+ async def call_raw(self, tool: str, args: dict[str, Any]) -> str:
39
+ """Escape hatch for discovery/smokes: call a tool, return the raw text."""
40
+ return await self._s.call(tool, {**self._base(), **args})
41
+
42
+ async def create_entry(self, content_type: str, fields: dict[str, Any]) -> str | None:
43
+ """Create an entry; return its new sys.id (None if it couldn't be scraped)."""
44
+ raw = await self._s.call(
45
+ "create_entry", {**self._base(), "contentTypeId": content_type, "fields": fields}
46
+ )
47
+ return entry_id(raw)
48
+
49
+ async def entry_version(self, entry_id_: str) -> int | None:
50
+ """sys.version of an entry (required for the optimistic-lock update)."""
51
+ raw = await self._s.call("get_entry", {**self._base(), "entryId": entry_id_})
52
+ return entry_version_of(raw)
53
+
54
+ async def update_entry(self, entry_id_: str, fields: dict[str, Any], version: int) -> None:
55
+ await self._s.call(
56
+ "update_entry",
57
+ {**self._base(), "entryId": entry_id_, "version": version, "fields": fields},
58
+ )
59
+
60
+ async def publish_entry(self, entry_id_: str) -> None:
61
+ await self._s.call("publish_entry", {**self._base(), "entryId": [entry_id_]})
62
+
63
+
64
+ @asynccontextmanager
65
+ async def open_contentful(
66
+ space_id: str, environment: str, token_provider: TokenProvider
67
+ ) -> AsyncIterator[ContentfulMcp]:
68
+ """Open a Contentful-MCP session (calls get_initial_context first, as required)."""
69
+ async with _open_session(oauth.MCP_ENDPOINT, token_provider) as session:
70
+ cf = ContentfulMcp(session, space_id, environment)
71
+ await session.call("get_initial_context", {})
72
+ yield cf
73
+
74
+
75
+ # --- response scraping ------------------------------------------------------
76
+ # Contentful's MCP returns prose-prefixed pseudo-XML, and it is NOT reliably
77
+ # parseable: it contains invalid tags (e.g. `<fieldStatus><*>…`) and unescaped
78
+ # content, so an XML parser chokes. We don't need the whole document — only two
79
+ # scalars — so we scrape them with anchored regexes:
80
+ # * the entry id from the sys URN (…/entries/<id>), which is unambiguous (the
81
+ # bare <id> elements are dangerous — space/environment/contentType ids appear
82
+ # first), with a post-</space> fallback for any URN-less response;
83
+ # * the version from the lone <version> tag (distinct from <publishedVersion>).
84
+
85
+ def entry_id(raw: str) -> str | None:
86
+ m = re.search(r"/entries/([A-Za-z0-9_-]+)", raw) # from the sys URN
87
+ if m:
88
+ return m.group(1)
89
+ m = re.search(r"</space>\s*<id>\s*([A-Za-z0-9_-]+)\s*</id>", raw) # sys order: space, then id
90
+ return m.group(1) if m else None
91
+
92
+
93
+ def entry_version_of(raw: str) -> int | None:
94
+ m = re.search(r"<version>\s*(\d+)\s*</version>", raw) # not <publishedVersion>
95
+ return int(m.group(1)) if m else None
@@ -0,0 +1,137 @@
1
+ """ContentfulMcpDestination — write entries to Contentful over its MCP server.
2
+
3
+ The no-admin / SSO-blocked path: where a static CMA token can't reach the space,
4
+ OAuth-as-an-individual through the MCP server can. Create + idempotent update are
5
+ live-verified; `publish` is optional and TOLERANT — the Contentful MCP app
6
+ installation has its own per-tool permission layer (separate from OAuth scopes),
7
+ so `publish_entry` may be disallowed by a space admin. We create/update the entry
8
+ regardless and only skip publishing with a warning when it's gated.
9
+
10
+ Idempotency uses a LinkStore (primary_key -> entry id), like the REST destination
11
+ and Luma — Contentful field ids don't match neutral property names. Reuses the
12
+ shared `encode_fields` for the wire shape. `token_provider` yields the OAuth access
13
+ token (e.g. a query to the workflow-owned token; the smoke passes one directly).
14
+
15
+ Requires the `contentful` extra.
16
+ """
17
+ from __future__ import annotations
18
+
19
+ import datetime as dt
20
+ import logging
21
+ from contextlib import asynccontextmanager
22
+ from typing import AsyncIterator
23
+
24
+ from durable_sync.core import Record, auth_error_in_chain
25
+ from durable_sync.linkstore import LinkStore
26
+ from durable_sync.transport.mcp import TokenProvider
27
+ from durable_sync.connectors.contentful.encode import encode_fields
28
+ from durable_sync.connectors.contentful.mcp import ContentfulMcp, open_contentful
29
+ from durable_sync.connectors.contentful.token import current_access_token
30
+
31
+ log = logging.getLogger("durable_sync.connectors.contentful.mcp_destination")
32
+
33
+
34
+ class ContentfulMcpDestination:
35
+ name = "contentful"
36
+
37
+ def __init__(
38
+ self,
39
+ *,
40
+ space_id: str,
41
+ content_type: str,
42
+ field_map: dict[str, str],
43
+ link_store: LinkStore,
44
+ token_provider: TokenProvider | None = None,
45
+ environment: str = "master",
46
+ default_locale: str = "en-US",
47
+ create_only_properties: set[str] | None = None,
48
+ publish: bool = False,
49
+ ):
50
+ self.space_id = space_id
51
+ self.content_type = content_type
52
+ self.field_map = field_map
53
+ self.link_store = link_store
54
+ self.environment = environment
55
+ self.default_locale = default_locale
56
+ self.create_only_properties = create_only_properties or set()
57
+ self.publish = publish
58
+ # Default: query the workflow that owns the Contentful OAuth token (started
59
+ # via connectors.contentful.start), so a worker runs unattended.
60
+ self._token_provider = token_provider or current_access_token
61
+
62
+ @property
63
+ def configured(self) -> bool:
64
+ return bool(self.space_id and self.content_type)
65
+
66
+ @property
67
+ def config_hint(self) -> str:
68
+ return "Contentful space id / content type unset"
69
+
70
+ @asynccontextmanager
71
+ async def connect(self) -> AsyncIterator["_McpSession"]:
72
+ async with open_contentful(self.space_id, self.environment, self._token_provider) as cf:
73
+ yield _McpSession(cf, self)
74
+
75
+ @staticmethod
76
+ def is_auth_error(err: BaseException) -> bool:
77
+ return auth_error_in_chain(err)
78
+
79
+ # The worker registers these so the Contentful token-owner workflow runs
80
+ # alongside the sync (same OAuth-as-a-workflow toolkit as Notion).
81
+ def aux_workflows(self) -> list:
82
+ from durable_sync.auth.oauth.workflow import OAuthTokenWorkflow
83
+ return [OAuthTokenWorkflow]
84
+
85
+ def aux_activities(self) -> list:
86
+ from durable_sync.auth.oauth.refresh import refresh_oauth_token
87
+ return [refresh_oauth_token]
88
+
89
+
90
+ class _McpSession:
91
+ def __init__(self, cf: ContentfulMcp, dest: ContentfulMcpDestination):
92
+ self._cf = cf
93
+ self._d = dest
94
+
95
+ async def query_existing_ids(self) -> dict[str, str]:
96
+ return await self._d.link_store.get_all()
97
+
98
+ def _fields(self, record: Record, *, creating: bool):
99
+ return encode_fields(
100
+ record, field_map=self._d.field_map, default_locale=self._d.default_locale,
101
+ create_only_properties=self._d.create_only_properties, creating=creating,
102
+ )
103
+
104
+ async def create(self, record: Record, synced_at: dt.datetime) -> bool:
105
+ entry_id = await self._cf.create_entry(self.content_type, self._fields(record, creating=True))
106
+ if not entry_id:
107
+ raise RuntimeError("Contentful MCP create_entry: could not determine the new entry id")
108
+ await self._d.link_store.put(record.primary_key, entry_id)
109
+ await self._maybe_publish(entry_id)
110
+ return True
111
+
112
+ async def update(self, existing_id: str, record: Record, synced_at: dt.datetime) -> bool:
113
+ version = await self._cf.entry_version(existing_id)
114
+ if version is None:
115
+ raise RuntimeError(f"Contentful MCP: could not read version for entry {existing_id}")
116
+ await self._cf.update_entry(existing_id, self._fields(record, creating=False), version)
117
+ await self._maybe_publish(existing_id)
118
+ return True
119
+
120
+ @property
121
+ def content_type(self) -> str:
122
+ return self._d.content_type
123
+
124
+ async def _maybe_publish(self, entry_id: str) -> None:
125
+ """Publish if asked — but tolerate the MCP app's per-tool permission gate:
126
+ the entry is already created/updated, so a forbidden publish_entry leaves a
127
+ draft + a warning rather than failing the whole sync."""
128
+ if not self._d.publish:
129
+ return
130
+ try:
131
+ await self._cf.publish_entry(entry_id)
132
+ except RuntimeError as e:
133
+ if "publish_entry" in str(e) or "permission" in str(e).lower():
134
+ log.warning("Contentful publish_entry not permitted for %s (MCP app config) — left as draft: %s",
135
+ entry_id, e)
136
+ return
137
+ raise
@@ -0,0 +1,27 @@
1
+ """Contentful binding of the generic OAuth client (durable_sync.auth.oauth).
2
+
3
+ Same flow as Notion — OAuth 2.1 + PKCE + dynamic client registration — just
4
+ pinned to Contentful's hosted MCP server. Confirmed via discover(): Contentful
5
+ exposes /authorize, /token, and /register (DCR), so no admin / no pre-registered
6
+ app is needed; you authorize as yourself (through your org's SSO, which is what
7
+ makes a static CFPAT unnecessary here).
8
+ """
9
+ from __future__ import annotations
10
+
11
+ from durable_sync.auth.oauth.flow import ( # noqa: F401 (re-exported for callers)
12
+ build_authorize_url,
13
+ exchange_code,
14
+ gen_pkce,
15
+ new_state,
16
+ refresh_access_token,
17
+ register_client,
18
+ )
19
+ from durable_sync.auth.oauth import flow as _generic
20
+
21
+ MCP_BASE = "https://mcp.contentful.com"
22
+ MCP_ENDPOINT = f"{MCP_BASE}/mcp" # Streamable HTTP transport
23
+
24
+
25
+ def discover() -> dict[str, str]:
26
+ """Discover Contentful's OAuth endpoints (generic flow, Contentful base URL)."""
27
+ return _generic.discover(MCP_BASE)
@@ -0,0 +1,51 @@
1
+ """Headless proof + tool discovery for Contentful's MCP server. NO browser.
2
+
3
+ Loads the saved refresh token, mints a fresh access token, opens the MCP session,
4
+ and lists the tools Contentful exposes — which is exactly what we need to build the
5
+ MCP source/destination (their tool names + schemas, instead of guessing).
6
+
7
+ PYTHONPATH=. python -m durable_sync.connectors.contentful.prove
8
+
9
+ If this lists tools, the no-admin OAuth path works headlessly — the Temporal auth
10
+ workflow can mint tokens unattended, same as Notion.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import asyncio
15
+
16
+ from durable_sync.connectors.contentful import oauth, store
17
+ from durable_sync.transport.mcp import open_session
18
+
19
+
20
+ async def _list_tools(access_token: str) -> list[str]:
21
+ async def token_provider() -> str:
22
+ return access_token
23
+ async with open_session(oauth.MCP_ENDPOINT, token_provider) as session:
24
+ return await session.tool_names()
25
+
26
+
27
+ def main() -> None:
28
+ creds = store.load()
29
+ if not creds:
30
+ raise SystemExit(
31
+ f"No credentials at {store.path()}. Run the bootstrap first:\n"
32
+ f" PYTHONPATH=. python -m durable_sync.connectors.contentful.bootstrap"
33
+ )
34
+
35
+ print("Refreshing access token (headless, no browser)...")
36
+ tokens = oauth.refresh_access_token(creds["token_endpoint"], creds["client_id"], creds["refresh_token"])
37
+ # Persist a rotated refresh token now (providers rotate on every use).
38
+ if tokens.get("refresh_token"):
39
+ creds["refresh_token"] = tokens["refresh_token"]
40
+ store.save(creds)
41
+ print(f" Got access token (expires in {tokens.get('expires_in')}s).\n")
42
+
43
+ names = asyncio.run(_list_tools(tokens["access_token"]))
44
+ print(f"SUCCESS — authenticated headlessly. Contentful MCP exposes {len(names)} tool(s):")
45
+ for name in names:
46
+ print(f" - {name}")
47
+ print("\nHeadless auth proven — paste this tool list and we'll build the connector against it.")
48
+
49
+
50
+ if __name__ == "__main__":
51
+ main()
@@ -0,0 +1,192 @@
1
+ """ContentfulSource — entries of chosen content types -> Records, with an
2
+ enrichment hook.
3
+
4
+ Contentful is usually shared across teams, so a Source here is scoped by CONTENT
5
+ TYPE: `content_types` maps each content-type id you care about to the "Type" label
6
+ it should carry (e.g. {"blogPost": "Blog"}). One entity workflow per content type.
7
+ Whether a *shared* type's entries are kept (e.g. only when an author matches your
8
+ own directory) is app policy — do it in your `enrich`/transform hook, which gets
9
+ the resolved authors via `ContentfulEntryContext`.
10
+
11
+ Auth: prefer a read-only Delivery (CDA) token; a self-serve Management (CMA) PAT
12
+ is the fallback (and the only mode that sees drafts). Each is read from the env
13
+ var named in `ContentfulConfig`. Requires the `contentful` extra.
14
+ """
15
+ from __future__ import annotations
16
+
17
+ import inspect
18
+ import logging
19
+ import os
20
+ from dataclasses import dataclass, field
21
+ from datetime import datetime, timedelta, timezone
22
+ from typing import Any, Awaitable, Callable, Union
23
+
24
+ import httpx
25
+ from temporalio import activity
26
+
27
+ from durable_sync.core import Record, SourceSpec
28
+ from durable_sync.connectors import content
29
+ from durable_sync.connectors.contentful import api
30
+ from durable_sync.connectors.contentful.api import ContentfulSpace
31
+
32
+ log = logging.getLogger("durable_sync.connectors.contentful")
33
+
34
+ EnrichHook = Callable[[Record, "ContentfulEntryContext"], Union[Record, Awaitable[Record]]]
35
+
36
+
37
+ @dataclass
38
+ class ContentfulConfig:
39
+ """Everything Contentful-specific a deployment supplies. `content_types` maps
40
+ content-type id -> the "Type" label its entries carry (and is the allowlist of
41
+ what gets fetched). `url_prefixes` maps content-type id -> a public URL prefix
42
+ the entry slug is appended to."""
43
+ space_id: str
44
+ content_types: dict[str, str] # {ct_id: type_label}
45
+ url_prefixes: dict[str, str] = field(default_factory=dict) # {ct_id: url_prefix}
46
+ environment: str = "master"
47
+ default_locale: str = "en-US"
48
+ delivery_token_env: str = "CONTENTFUL_DELIVERY_TOKEN" # CDA, preferred
49
+ cma_token_env: str = "CONTENTFUL_CMA_TOKEN" # CMA PAT, fallback
50
+ lookback_days: int = 21
51
+ interval_minutes: int = 360
52
+ title_property: str = "Name"
53
+
54
+
55
+ @dataclass
56
+ class ContentfulEntryContext:
57
+ """Handed to the enrich hook: the raw (flattened) entry, its resolved authors
58
+ ({name, email}), the content type, and the live client."""
59
+ raw_entry: dict
60
+ authors: list[dict]
61
+ content_type: str
62
+ client: httpx.AsyncClient
63
+
64
+
65
+ def _heartbeat(detail: str) -> None:
66
+ if activity.in_activity():
67
+ activity.heartbeat(detail)
68
+
69
+
70
+ class ContentfulSource:
71
+ name = "contentful"
72
+
73
+ def __init__(self, config: ContentfulConfig, *, enrich: EnrichHook | None = None):
74
+ self._config = config
75
+ self._enrich = enrich
76
+
77
+ def specs(self) -> list[SourceSpec]:
78
+ # One spec (=> one entity workflow) per content type, so each type syncs +
79
+ # retries independently.
80
+ cfg = self._config
81
+ return [
82
+ SourceSpec(key=f"type:{ct_id}", interval_minutes=cfg.interval_minutes,
83
+ params={"content_type": ct_id, "item_type": label})
84
+ for ct_id, label in cfg.content_types.items()
85
+ ]
86
+
87
+ def _space(self) -> ContentfulSpace:
88
+ cfg = self._config
89
+ return ContentfulSpace(
90
+ space_id=cfg.space_id,
91
+ environment=cfg.environment,
92
+ default_locale=cfg.default_locale,
93
+ delivery_token=os.environ.get(cfg.delivery_token_env, ""),
94
+ cma_token=os.environ.get(cfg.cma_token_env, ""),
95
+ )
96
+
97
+ async def fetch_page(
98
+ self, spec: SourceSpec, only_items: list[str] | None, cursor: str | None
99
+ ) -> tuple[list[Record], str | None]:
100
+ """ONE page of entries + next cursor (None on the last page). The cursor
101
+ carries the frozen window start + the CDA/CMA `skip` offset. A targeted
102
+ (`only_items`) refresh filters each page to the named ids but still walks
103
+ the window (Contentful has no cheap by-id batch here), as before."""
104
+ cfg = self._config
105
+ content_type = spec.params["content_type"]
106
+ item_type = spec.params.get("item_type") or cfg.content_types.get(content_type, content_type)
107
+ space = self._space()
108
+ targeted = set(only_items or [])
109
+
110
+ if cursor is None:
111
+ after_iso = (datetime.now(timezone.utc) - timedelta(days=cfg.lookback_days)).isoformat()
112
+ skip = 0
113
+ else:
114
+ c = content.unpack_cursor(cursor)
115
+ after_iso, skip = c["after"], c["skip"]
116
+
117
+ async with httpx.AsyncClient(timeout=30) as client:
118
+ pairs, next_skip = await api.iter_entries_page(
119
+ client, space, content_type, after_iso, skip=skip)
120
+ out: list[Record] = []
121
+ for entry, authors in pairs:
122
+ source_id = entry.get("sys", {}).get("id", "")
123
+ if targeted and source_id not in targeted:
124
+ continue
125
+ if not _has_title(entry):
126
+ continue # empty-shell draft, no title yet -> not a real item
127
+ record = self._to_record(entry, item_type, authors)
128
+ if self._enrich is not None:
129
+ ctx = ContentfulEntryContext(raw_entry=entry, authors=authors,
130
+ content_type=content_type, client=client)
131
+ result = self._enrich(record, ctx)
132
+ record = await result if inspect.isawaitable(result) else result
133
+ out.append(record)
134
+ _heartbeat(source_id)
135
+
136
+ next_cursor = content.pack_cursor(after=after_iso, skip=next_skip) if next_skip is not None else None
137
+ log.info("Fetched %d Contentful %s entries for %s (skip=%s)", len(out), content_type, spec.key, skip)
138
+ return out, next_cursor
139
+
140
+ async def fetch(self, spec: SourceSpec, only_items: list[str] | None = None) -> list[Record]:
141
+ """Whole window as one list — drains fetch_page (standalone/non-Temporal)."""
142
+ records: list[Record] = []
143
+ cursor: str | None = None
144
+ while True:
145
+ page, cursor = await self.fetch_page(spec, only_items, cursor)
146
+ records.extend(page)
147
+ if cursor is None:
148
+ return records
149
+
150
+ def _to_record(self, entry: dict, item_type: str, authors: list[dict]) -> Record:
151
+ """Map one Contentful entry (+ resolved authors) to a neutral Record. Pure."""
152
+ cfg = self._config
153
+ sys = entry.get("sys", {})
154
+ fields = entry.get("fields", {})
155
+
156
+ source_id = sys.get("id", "")
157
+ name = fields.get("title") or fields.get("name") or "(untitled entry)"
158
+ # Date = explicit publish-date field if present, else createdAt.
159
+ item_date = fields.get("publishDate") or fields.get("date") or sys.get("createdAt")
160
+ status = "Published" if entry.get("_published", True) else "Draft"
161
+
162
+ slug = fields.get("slug")
163
+ ct_id = sys.get("contentType", {}).get("sys", {}).get("id", "")
164
+ prefix = cfg.url_prefixes.get(ct_id) if slug else None
165
+ url = f"{prefix}{slug}" if prefix else None
166
+
167
+ host_names = [a["name"] for a in authors if a.get("name")]
168
+ # authorOverwriteText (a community author with no `person`) wins for the
169
+ # human-readable label; resolved names still drive any author matching.
170
+ author = fields.get("authorOverwriteText") or ", ".join(host_names)
171
+ tags = [t for t in (fields.get("tags") or []) if isinstance(t, str)]
172
+
173
+ return content.content_record(
174
+ primary_key=source_id,
175
+ title_property=cfg.title_property,
176
+ title=str(name),
177
+ item_type=item_type,
178
+ source="Contentful",
179
+ url=url,
180
+ date=item_date,
181
+ status=status,
182
+ author=str(author),
183
+ authors=host_names,
184
+ extra={"Tags": tags},
185
+ )
186
+
187
+
188
+ def _has_title(entry: dict[str, Any]) -> bool:
189
+ """True if the entry has a real title/name (titled drafts count; blank ones
190
+ don't). Keeps us from writing '(untitled entry)' placeholder rows. Pure."""
191
+ fields = entry.get("fields", {})
192
+ return bool(fields.get("title") or fields.get("name"))
@@ -0,0 +1,46 @@
1
+ """Launch OAuthTokenWorkflow from the saved Contentful bootstrap credentials.
2
+
3
+ PYTHONPATH=. python -m durable_sync.connectors.contentful.start
4
+
5
+ Reads the bootstrap creds, starts the long-running auth workflow that owns the
6
+ rotating refresh token, and serves fresh access tokens via query. After this, a
7
+ worker hosting ContentfulMcpDestination keeps tokens fresh unattended.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import asyncio
12
+
13
+ from durable_sync import config
14
+ from durable_sync.auth.oauth.workflow import AuthParams, OAuthTokenWorkflow
15
+ from durable_sync.connectors.contentful import store
16
+ from durable_sync.temporal_client import connect
17
+
18
+
19
+ async def main() -> None:
20
+ creds = store.load()
21
+ if not creds:
22
+ raise SystemExit(
23
+ f"No credentials at {store.path()}. Run the bootstrap first:\n"
24
+ f" PYTHONPATH=. python -m durable_sync.connectors.contentful.bootstrap"
25
+ )
26
+
27
+ client = await connect()
28
+ handle = await client.start_workflow(
29
+ OAuthTokenWorkflow.run,
30
+ AuthParams(
31
+ client_id=creds["client_id"],
32
+ token_endpoint=creds["token_endpoint"],
33
+ refresh_token=creds["refresh_token"],
34
+ ),
35
+ id=config.CONTENTFUL_AUTH_WORKFLOW_ID,
36
+ task_queue=config.TASK_QUEUE,
37
+ )
38
+ print(
39
+ f"Started OAuthTokenWorkflow (id={handle.id}). It now owns the Contentful "
40
+ f"refresh token and keeps access tokens fresh.\n"
41
+ f"Verify: temporal workflow query --workflow-id {handle.id} --type get_access_token"
42
+ )
43
+
44
+
45
+ if __name__ == "__main__":
46
+ asyncio.run(main())
@@ -0,0 +1,25 @@
1
+ """Contentful binding of the generic creds store (durable_sync.auth.oauth.store).
2
+
3
+ Pins Contentful's auth file path; bootstrap/prove call load()/save()/path().
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import os
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from durable_sync.auth.oauth import store as _store
12
+
13
+ _FILE = os.getenv("DURABLE_SYNC_CONTENTFUL_AUTH_FILE", ".contentful_auth.json")
14
+
15
+
16
+ def load() -> dict[str, Any] | None:
17
+ return _store.load(_FILE)
18
+
19
+
20
+ def save(data: dict[str, Any]) -> None:
21
+ _store.save(_FILE, data)
22
+
23
+
24
+ def path() -> Path:
25
+ return _store.resolve(_FILE)
@@ -0,0 +1,13 @@
1
+ """Contentful binding of the generic token accessor (durable_sync.auth.oauth.token).
2
+
3
+ The default token_provider for ContentfulMcpDestination: query the OAuthTokenWorkflow
4
+ running under config.CONTENTFUL_AUTH_WORKFLOW_ID for a fresh access token.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ from durable_sync import config
9
+ from durable_sync.auth.oauth.token import current_access_token as _current
10
+
11
+
12
+ async def current_access_token() -> str:
13
+ return await _current(config.CONTENTFUL_AUTH_WORKFLOW_ID)