durable-sync 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- durable_sync/__init__.py +26 -0
- durable_sync/activities.py +156 -0
- durable_sync/auth/__init__.py +8 -0
- durable_sync/auth/oauth/__init__.py +18 -0
- durable_sync/auth/oauth/flow.py +183 -0
- durable_sync/auth/oauth/refresh.py +58 -0
- durable_sync/auth/oauth/store.py +36 -0
- durable_sync/auth/oauth/token.py +36 -0
- durable_sync/auth/oauth/workflow.py +172 -0
- durable_sync/bootstrap.py +44 -0
- durable_sync/codec.py +80 -0
- durable_sync/config.py +35 -0
- durable_sync/connectors/__init__.py +14 -0
- durable_sync/connectors/asana/__init__.py +13 -0
- durable_sync/connectors/asana/destination.py +213 -0
- durable_sync/connectors/content.py +80 -0
- durable_sync/connectors/contentful/__init__.py +25 -0
- durable_sync/connectors/contentful/api.py +285 -0
- durable_sync/connectors/contentful/bootstrap.py +102 -0
- durable_sync/connectors/contentful/describe.py +61 -0
- durable_sync/connectors/contentful/destination.py +145 -0
- durable_sync/connectors/contentful/encode.py +49 -0
- durable_sync/connectors/contentful/introspect.py +69 -0
- durable_sync/connectors/contentful/mcp.py +95 -0
- durable_sync/connectors/contentful/mcp_destination.py +137 -0
- durable_sync/connectors/contentful/oauth.py +27 -0
- durable_sync/connectors/contentful/prove.py +51 -0
- durable_sync/connectors/contentful/source.py +192 -0
- durable_sync/connectors/contentful/start.py +46 -0
- durable_sync/connectors/contentful/store.py +25 -0
- durable_sync/connectors/contentful/token.py +13 -0
- durable_sync/connectors/contentful/token_check.py +42 -0
- durable_sync/connectors/github/__init__.py +33 -0
- durable_sync/connectors/github/api.py +169 -0
- durable_sync/connectors/github/source.py +230 -0
- durable_sync/connectors/luma/__init__.py +20 -0
- durable_sync/connectors/luma/api.py +121 -0
- durable_sync/connectors/luma/destination.py +128 -0
- durable_sync/connectors/luma/source.py +155 -0
- durable_sync/connectors/multi.py +78 -0
- durable_sync/connectors/notion/__init__.py +20 -0
- durable_sync/connectors/notion/bootstrap.py +97 -0
- durable_sync/connectors/notion/client.py +133 -0
- durable_sync/connectors/notion/destination.py +270 -0
- durable_sync/connectors/notion/oauth.py +25 -0
- durable_sync/connectors/notion/prove.py +57 -0
- durable_sync/connectors/notion/source.py +136 -0
- durable_sync/connectors/notion/start.py +46 -0
- durable_sync/connectors/notion/store.py +25 -0
- durable_sync/connectors/notion/token.py +13 -0
- durable_sync/connectors/youtube/__init__.py +13 -0
- durable_sync/connectors/youtube/api.py +122 -0
- durable_sync/connectors/youtube/source.py +152 -0
- durable_sync/core.py +210 -0
- durable_sync/env.py +55 -0
- durable_sync/http.py +71 -0
- durable_sync/linkstore.py +88 -0
- durable_sync/route.py +86 -0
- durable_sync/temporal_client.py +48 -0
- durable_sync/transport/__init__.py +12 -0
- durable_sync/transport/mcp.py +77 -0
- durable_sync/worker.py +109 -0
- durable_sync/workflows/__init__.py +9 -0
- durable_sync/workflows/sync.py +208 -0
- durable_sync-0.1.0.dist-info/METADATA +310 -0
- durable_sync-0.1.0.dist-info/RECORD +69 -0
- durable_sync-0.1.0.dist-info/WHEEL +5 -0
- durable_sync-0.1.0.dist-info/licenses/LICENSE +21 -0
- durable_sync-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""List a Contentful space's content types + fields, so you can fill in the
|
|
2
|
+
destination's content_type / field_map (and the smoke's CONTENTFUL_SMOKE_* vars).
|
|
3
|
+
|
|
4
|
+
Uses whichever token is set: a Management (CMA) token (api.contentful.com) or a
|
|
5
|
+
read-only Delivery (CDA) token (cdn.contentful.com) — both expose the content
|
|
6
|
+
model. Prints each content type's id and its title field (Contentful's
|
|
7
|
+
`displayField`) in copy-paste-ready form, plus every field and type.
|
|
8
|
+
|
|
9
|
+
CONTENTFUL_SPACE_ID=... CONTENTFUL_CMA_TOKEN=... \
|
|
10
|
+
PYTHONPATH=. python -m durable_sync.connectors.contentful.introspect
|
|
11
|
+
|
|
12
|
+
Requires the `contentful` extra.
|
|
13
|
+
"""
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
import sys
|
|
18
|
+
|
|
19
|
+
import httpx
|
|
20
|
+
|
|
21
|
+
from durable_sync.env import load_env
|
|
22
|
+
|
|
23
|
+
CDA_BASE = "https://cdn.contentful.com"
|
|
24
|
+
CMA_BASE = "https://api.contentful.com"
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _describe(field: dict) -> str:
|
|
28
|
+
ftype = field.get("type", "?")
|
|
29
|
+
if ftype == "Array":
|
|
30
|
+
items = field.get("items", {})
|
|
31
|
+
ftype = f"Array<{items.get('linkType') or items.get('type') or '?'}>"
|
|
32
|
+
elif ftype == "Link":
|
|
33
|
+
ftype = f"Link<{field.get('linkType', '?')}>"
|
|
34
|
+
return ftype
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def main() -> None:
|
|
38
|
+
load_env()
|
|
39
|
+
space = os.environ.get("CONTENTFUL_SPACE_ID")
|
|
40
|
+
env = os.environ.get("CONTENTFUL_ENVIRONMENT", "master")
|
|
41
|
+
cma = os.environ.get("CONTENTFUL_CMA_TOKEN")
|
|
42
|
+
cda = os.environ.get("CONTENTFUL_DELIVERY_TOKEN")
|
|
43
|
+
if not space or not (cma or cda):
|
|
44
|
+
sys.exit("Set CONTENTFUL_SPACE_ID and CONTENTFUL_CMA_TOKEN (or CONTENTFUL_DELIVERY_TOKEN).")
|
|
45
|
+
|
|
46
|
+
base, token = (CMA_BASE, cma) if cma else (CDA_BASE, cda)
|
|
47
|
+
url = f"{base}/spaces/{space}/environments/{env}/content_types"
|
|
48
|
+
r = httpx.get(url, headers={"Authorization": f"Bearer {token}"}, params={"limit": 1000}, timeout=30)
|
|
49
|
+
if r.status_code >= 400:
|
|
50
|
+
sys.exit(f"Contentful {r.status_code}: {r.text[:400]}")
|
|
51
|
+
|
|
52
|
+
items = r.json().get("items", [])
|
|
53
|
+
print(f"# {len(items)} content type(s) in space {space} (env {env}) via "
|
|
54
|
+
f"{'CMA' if cma else 'CDA'}\n")
|
|
55
|
+
for ct in sorted(items, key=lambda c: c.get("sys", {}).get("id", "")):
|
|
56
|
+
ct_id = ct.get("sys", {}).get("id", "?")
|
|
57
|
+
display = ct.get("displayField") or "?" # the field used as the entry title
|
|
58
|
+
print(f"## {ct.get('name', '?')}")
|
|
59
|
+
print(f"CONTENTFUL_SMOKE_CONTENT_TYPE={ct_id}")
|
|
60
|
+
print(f"CONTENTFUL_SMOKE_TITLE_FIELD={display}")
|
|
61
|
+
for field in ct.get("fields", []):
|
|
62
|
+
fid = field.get("id", "?")
|
|
63
|
+
mark = " <- title" if fid == display else ""
|
|
64
|
+
print(f" {fid} : {_describe(field)}{mark}")
|
|
65
|
+
print()
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
if __name__ == "__main__":
|
|
69
|
+
main()
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
"""Contentful over its hosted MCP server (mcp.contentful.com) — the no-admin / SSO
|
|
2
|
+
path, when a static CMA token is blocked and the MCP-OAuth token doesn't work
|
|
3
|
+
against the REST API.
|
|
4
|
+
|
|
5
|
+
Contentful's MCP is AGENT-oriented: tools take clean JSON but return LLM-formatted
|
|
6
|
+
pseudo-XML (with prose prefixes, arrays as repeated elements). So:
|
|
7
|
+
* WRITES are reliable — inputs are clean JSON (fields = {fieldId:{locale:value}},
|
|
8
|
+
same shape as the REST encoder), and we only scrape two scalars from responses:
|
|
9
|
+
the new entry's sys.id (create) and sys.version (for the optimistic-lock update).
|
|
10
|
+
* READS over MCP are fragile (multi-entry XML) — prefer the REST source when you
|
|
11
|
+
have CMA access; this module is the write path.
|
|
12
|
+
|
|
13
|
+
Pairs the generic MCP transport (durable_sync.transport.mcp) with the OAuth binding
|
|
14
|
+
(connectors.contentful.oauth). get_initial_context is called once on open (the
|
|
15
|
+
server requires it first).
|
|
16
|
+
"""
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import re
|
|
20
|
+
from contextlib import asynccontextmanager
|
|
21
|
+
from typing import Any, AsyncIterator
|
|
22
|
+
|
|
23
|
+
from durable_sync.transport.mcp import TokenProvider, open_session as _open_session
|
|
24
|
+
from durable_sync.connectors.contentful import oauth
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ContentfulMcp:
|
|
28
|
+
"""Write-oriented wrapper over an open Contentful-MCP session."""
|
|
29
|
+
|
|
30
|
+
def __init__(self, session, space_id: str, environment: str):
|
|
31
|
+
self._s = session
|
|
32
|
+
self.space_id = space_id
|
|
33
|
+
self.environment = environment
|
|
34
|
+
|
|
35
|
+
def _base(self) -> dict[str, str]:
|
|
36
|
+
return {"spaceId": self.space_id, "environmentId": self.environment}
|
|
37
|
+
|
|
38
|
+
async def call_raw(self, tool: str, args: dict[str, Any]) -> str:
|
|
39
|
+
"""Escape hatch for discovery/smokes: call a tool, return the raw text."""
|
|
40
|
+
return await self._s.call(tool, {**self._base(), **args})
|
|
41
|
+
|
|
42
|
+
async def create_entry(self, content_type: str, fields: dict[str, Any]) -> str | None:
|
|
43
|
+
"""Create an entry; return its new sys.id (None if it couldn't be scraped)."""
|
|
44
|
+
raw = await self._s.call(
|
|
45
|
+
"create_entry", {**self._base(), "contentTypeId": content_type, "fields": fields}
|
|
46
|
+
)
|
|
47
|
+
return entry_id(raw)
|
|
48
|
+
|
|
49
|
+
async def entry_version(self, entry_id_: str) -> int | None:
|
|
50
|
+
"""sys.version of an entry (required for the optimistic-lock update)."""
|
|
51
|
+
raw = await self._s.call("get_entry", {**self._base(), "entryId": entry_id_})
|
|
52
|
+
return entry_version_of(raw)
|
|
53
|
+
|
|
54
|
+
async def update_entry(self, entry_id_: str, fields: dict[str, Any], version: int) -> None:
|
|
55
|
+
await self._s.call(
|
|
56
|
+
"update_entry",
|
|
57
|
+
{**self._base(), "entryId": entry_id_, "version": version, "fields": fields},
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
async def publish_entry(self, entry_id_: str) -> None:
|
|
61
|
+
await self._s.call("publish_entry", {**self._base(), "entryId": [entry_id_]})
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@asynccontextmanager
|
|
65
|
+
async def open_contentful(
|
|
66
|
+
space_id: str, environment: str, token_provider: TokenProvider
|
|
67
|
+
) -> AsyncIterator[ContentfulMcp]:
|
|
68
|
+
"""Open a Contentful-MCP session (calls get_initial_context first, as required)."""
|
|
69
|
+
async with _open_session(oauth.MCP_ENDPOINT, token_provider) as session:
|
|
70
|
+
cf = ContentfulMcp(session, space_id, environment)
|
|
71
|
+
await session.call("get_initial_context", {})
|
|
72
|
+
yield cf
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# --- response scraping ------------------------------------------------------
|
|
76
|
+
# Contentful's MCP returns prose-prefixed pseudo-XML, and it is NOT reliably
|
|
77
|
+
# parseable: it contains invalid tags (e.g. `<fieldStatus><*>…`) and unescaped
|
|
78
|
+
# content, so an XML parser chokes. We don't need the whole document — only two
|
|
79
|
+
# scalars — so we scrape them with anchored regexes:
|
|
80
|
+
# * the entry id from the sys URN (…/entries/<id>), which is unambiguous (the
|
|
81
|
+
# bare <id> elements are dangerous — space/environment/contentType ids appear
|
|
82
|
+
# first), with a post-</space> fallback for any URN-less response;
|
|
83
|
+
# * the version from the lone <version> tag (distinct from <publishedVersion>).
|
|
84
|
+
|
|
85
|
+
def entry_id(raw: str) -> str | None:
|
|
86
|
+
m = re.search(r"/entries/([A-Za-z0-9_-]+)", raw) # from the sys URN
|
|
87
|
+
if m:
|
|
88
|
+
return m.group(1)
|
|
89
|
+
m = re.search(r"</space>\s*<id>\s*([A-Za-z0-9_-]+)\s*</id>", raw) # sys order: space, then id
|
|
90
|
+
return m.group(1) if m else None
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def entry_version_of(raw: str) -> int | None:
|
|
94
|
+
m = re.search(r"<version>\s*(\d+)\s*</version>", raw) # not <publishedVersion>
|
|
95
|
+
return int(m.group(1)) if m else None
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""ContentfulMcpDestination — write entries to Contentful over its MCP server.
|
|
2
|
+
|
|
3
|
+
The no-admin / SSO-blocked path: where a static CMA token can't reach the space,
|
|
4
|
+
OAuth-as-an-individual through the MCP server can. Create + idempotent update are
|
|
5
|
+
live-verified; `publish` is optional and TOLERANT — the Contentful MCP app
|
|
6
|
+
installation has its own per-tool permission layer (separate from OAuth scopes),
|
|
7
|
+
so `publish_entry` may be disallowed by a space admin. We create/update the entry
|
|
8
|
+
regardless and only skip publishing with a warning when it's gated.
|
|
9
|
+
|
|
10
|
+
Idempotency uses a LinkStore (primary_key -> entry id), like the REST destination
|
|
11
|
+
and Luma — Contentful field ids don't match neutral property names. Reuses the
|
|
12
|
+
shared `encode_fields` for the wire shape. `token_provider` yields the OAuth access
|
|
13
|
+
token (e.g. a query to the workflow-owned token; the smoke passes one directly).
|
|
14
|
+
|
|
15
|
+
Requires the `contentful` extra.
|
|
16
|
+
"""
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
import datetime as dt
|
|
20
|
+
import logging
|
|
21
|
+
from contextlib import asynccontextmanager
|
|
22
|
+
from typing import AsyncIterator
|
|
23
|
+
|
|
24
|
+
from durable_sync.core import Record, auth_error_in_chain
|
|
25
|
+
from durable_sync.linkstore import LinkStore
|
|
26
|
+
from durable_sync.transport.mcp import TokenProvider
|
|
27
|
+
from durable_sync.connectors.contentful.encode import encode_fields
|
|
28
|
+
from durable_sync.connectors.contentful.mcp import ContentfulMcp, open_contentful
|
|
29
|
+
from durable_sync.connectors.contentful.token import current_access_token
|
|
30
|
+
|
|
31
|
+
log = logging.getLogger("durable_sync.connectors.contentful.mcp_destination")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class ContentfulMcpDestination:
|
|
35
|
+
name = "contentful"
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
*,
|
|
40
|
+
space_id: str,
|
|
41
|
+
content_type: str,
|
|
42
|
+
field_map: dict[str, str],
|
|
43
|
+
link_store: LinkStore,
|
|
44
|
+
token_provider: TokenProvider | None = None,
|
|
45
|
+
environment: str = "master",
|
|
46
|
+
default_locale: str = "en-US",
|
|
47
|
+
create_only_properties: set[str] | None = None,
|
|
48
|
+
publish: bool = False,
|
|
49
|
+
):
|
|
50
|
+
self.space_id = space_id
|
|
51
|
+
self.content_type = content_type
|
|
52
|
+
self.field_map = field_map
|
|
53
|
+
self.link_store = link_store
|
|
54
|
+
self.environment = environment
|
|
55
|
+
self.default_locale = default_locale
|
|
56
|
+
self.create_only_properties = create_only_properties or set()
|
|
57
|
+
self.publish = publish
|
|
58
|
+
# Default: query the workflow that owns the Contentful OAuth token (started
|
|
59
|
+
# via connectors.contentful.start), so a worker runs unattended.
|
|
60
|
+
self._token_provider = token_provider or current_access_token
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def configured(self) -> bool:
|
|
64
|
+
return bool(self.space_id and self.content_type)
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def config_hint(self) -> str:
|
|
68
|
+
return "Contentful space id / content type unset"
|
|
69
|
+
|
|
70
|
+
@asynccontextmanager
|
|
71
|
+
async def connect(self) -> AsyncIterator["_McpSession"]:
|
|
72
|
+
async with open_contentful(self.space_id, self.environment, self._token_provider) as cf:
|
|
73
|
+
yield _McpSession(cf, self)
|
|
74
|
+
|
|
75
|
+
@staticmethod
|
|
76
|
+
def is_auth_error(err: BaseException) -> bool:
|
|
77
|
+
return auth_error_in_chain(err)
|
|
78
|
+
|
|
79
|
+
# The worker registers these so the Contentful token-owner workflow runs
|
|
80
|
+
# alongside the sync (same OAuth-as-a-workflow toolkit as Notion).
|
|
81
|
+
def aux_workflows(self) -> list:
|
|
82
|
+
from durable_sync.auth.oauth.workflow import OAuthTokenWorkflow
|
|
83
|
+
return [OAuthTokenWorkflow]
|
|
84
|
+
|
|
85
|
+
def aux_activities(self) -> list:
|
|
86
|
+
from durable_sync.auth.oauth.refresh import refresh_oauth_token
|
|
87
|
+
return [refresh_oauth_token]
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class _McpSession:
|
|
91
|
+
def __init__(self, cf: ContentfulMcp, dest: ContentfulMcpDestination):
|
|
92
|
+
self._cf = cf
|
|
93
|
+
self._d = dest
|
|
94
|
+
|
|
95
|
+
async def query_existing_ids(self) -> dict[str, str]:
|
|
96
|
+
return await self._d.link_store.get_all()
|
|
97
|
+
|
|
98
|
+
def _fields(self, record: Record, *, creating: bool):
|
|
99
|
+
return encode_fields(
|
|
100
|
+
record, field_map=self._d.field_map, default_locale=self._d.default_locale,
|
|
101
|
+
create_only_properties=self._d.create_only_properties, creating=creating,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
async def create(self, record: Record, synced_at: dt.datetime) -> bool:
|
|
105
|
+
entry_id = await self._cf.create_entry(self.content_type, self._fields(record, creating=True))
|
|
106
|
+
if not entry_id:
|
|
107
|
+
raise RuntimeError("Contentful MCP create_entry: could not determine the new entry id")
|
|
108
|
+
await self._d.link_store.put(record.primary_key, entry_id)
|
|
109
|
+
await self._maybe_publish(entry_id)
|
|
110
|
+
return True
|
|
111
|
+
|
|
112
|
+
async def update(self, existing_id: str, record: Record, synced_at: dt.datetime) -> bool:
|
|
113
|
+
version = await self._cf.entry_version(existing_id)
|
|
114
|
+
if version is None:
|
|
115
|
+
raise RuntimeError(f"Contentful MCP: could not read version for entry {existing_id}")
|
|
116
|
+
await self._cf.update_entry(existing_id, self._fields(record, creating=False), version)
|
|
117
|
+
await self._maybe_publish(existing_id)
|
|
118
|
+
return True
|
|
119
|
+
|
|
120
|
+
@property
|
|
121
|
+
def content_type(self) -> str:
|
|
122
|
+
return self._d.content_type
|
|
123
|
+
|
|
124
|
+
async def _maybe_publish(self, entry_id: str) -> None:
|
|
125
|
+
"""Publish if asked — but tolerate the MCP app's per-tool permission gate:
|
|
126
|
+
the entry is already created/updated, so a forbidden publish_entry leaves a
|
|
127
|
+
draft + a warning rather than failing the whole sync."""
|
|
128
|
+
if not self._d.publish:
|
|
129
|
+
return
|
|
130
|
+
try:
|
|
131
|
+
await self._cf.publish_entry(entry_id)
|
|
132
|
+
except RuntimeError as e:
|
|
133
|
+
if "publish_entry" in str(e) or "permission" in str(e).lower():
|
|
134
|
+
log.warning("Contentful publish_entry not permitted for %s (MCP app config) — left as draft: %s",
|
|
135
|
+
entry_id, e)
|
|
136
|
+
return
|
|
137
|
+
raise
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Contentful binding of the generic OAuth client (durable_sync.auth.oauth).
|
|
2
|
+
|
|
3
|
+
Same flow as Notion — OAuth 2.1 + PKCE + dynamic client registration — just
|
|
4
|
+
pinned to Contentful's hosted MCP server. Confirmed via discover(): Contentful
|
|
5
|
+
exposes /authorize, /token, and /register (DCR), so no admin / no pre-registered
|
|
6
|
+
app is needed; you authorize as yourself (through your org's SSO, which is what
|
|
7
|
+
makes a static CFPAT unnecessary here).
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from durable_sync.auth.oauth.flow import ( # noqa: F401 (re-exported for callers)
|
|
12
|
+
build_authorize_url,
|
|
13
|
+
exchange_code,
|
|
14
|
+
gen_pkce,
|
|
15
|
+
new_state,
|
|
16
|
+
refresh_access_token,
|
|
17
|
+
register_client,
|
|
18
|
+
)
|
|
19
|
+
from durable_sync.auth.oauth import flow as _generic
|
|
20
|
+
|
|
21
|
+
MCP_BASE = "https://mcp.contentful.com"
|
|
22
|
+
MCP_ENDPOINT = f"{MCP_BASE}/mcp" # Streamable HTTP transport
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def discover() -> dict[str, str]:
|
|
26
|
+
"""Discover Contentful's OAuth endpoints (generic flow, Contentful base URL)."""
|
|
27
|
+
return _generic.discover(MCP_BASE)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Headless proof + tool discovery for Contentful's MCP server. NO browser.
|
|
2
|
+
|
|
3
|
+
Loads the saved refresh token, mints a fresh access token, opens the MCP session,
|
|
4
|
+
and lists the tools Contentful exposes — which is exactly what we need to build the
|
|
5
|
+
MCP source/destination (their tool names + schemas, instead of guessing).
|
|
6
|
+
|
|
7
|
+
PYTHONPATH=. python -m durable_sync.connectors.contentful.prove
|
|
8
|
+
|
|
9
|
+
If this lists tools, the no-admin OAuth path works headlessly — the Temporal auth
|
|
10
|
+
workflow can mint tokens unattended, same as Notion.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import asyncio
|
|
15
|
+
|
|
16
|
+
from durable_sync.connectors.contentful import oauth, store
|
|
17
|
+
from durable_sync.transport.mcp import open_session
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
async def _list_tools(access_token: str) -> list[str]:
|
|
21
|
+
async def token_provider() -> str:
|
|
22
|
+
return access_token
|
|
23
|
+
async with open_session(oauth.MCP_ENDPOINT, token_provider) as session:
|
|
24
|
+
return await session.tool_names()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def main() -> None:
|
|
28
|
+
creds = store.load()
|
|
29
|
+
if not creds:
|
|
30
|
+
raise SystemExit(
|
|
31
|
+
f"No credentials at {store.path()}. Run the bootstrap first:\n"
|
|
32
|
+
f" PYTHONPATH=. python -m durable_sync.connectors.contentful.bootstrap"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
print("Refreshing access token (headless, no browser)...")
|
|
36
|
+
tokens = oauth.refresh_access_token(creds["token_endpoint"], creds["client_id"], creds["refresh_token"])
|
|
37
|
+
# Persist a rotated refresh token now (providers rotate on every use).
|
|
38
|
+
if tokens.get("refresh_token"):
|
|
39
|
+
creds["refresh_token"] = tokens["refresh_token"]
|
|
40
|
+
store.save(creds)
|
|
41
|
+
print(f" Got access token (expires in {tokens.get('expires_in')}s).\n")
|
|
42
|
+
|
|
43
|
+
names = asyncio.run(_list_tools(tokens["access_token"]))
|
|
44
|
+
print(f"SUCCESS — authenticated headlessly. Contentful MCP exposes {len(names)} tool(s):")
|
|
45
|
+
for name in names:
|
|
46
|
+
print(f" - {name}")
|
|
47
|
+
print("\nHeadless auth proven — paste this tool list and we'll build the connector against it.")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
if __name__ == "__main__":
|
|
51
|
+
main()
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
"""ContentfulSource — entries of chosen content types -> Records, with an
|
|
2
|
+
enrichment hook.
|
|
3
|
+
|
|
4
|
+
Contentful is usually shared across teams, so a Source here is scoped by CONTENT
|
|
5
|
+
TYPE: `content_types` maps each content-type id you care about to the "Type" label
|
|
6
|
+
it should carry (e.g. {"blogPost": "Blog"}). One entity workflow per content type.
|
|
7
|
+
Whether a *shared* type's entries are kept (e.g. only when an author matches your
|
|
8
|
+
own directory) is app policy — do it in your `enrich`/transform hook, which gets
|
|
9
|
+
the resolved authors via `ContentfulEntryContext`.
|
|
10
|
+
|
|
11
|
+
Auth: prefer a read-only Delivery (CDA) token; a self-serve Management (CMA) PAT
|
|
12
|
+
is the fallback (and the only mode that sees drafts). Each is read from the env
|
|
13
|
+
var named in `ContentfulConfig`. Requires the `contentful` extra.
|
|
14
|
+
"""
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import inspect
|
|
18
|
+
import logging
|
|
19
|
+
import os
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from datetime import datetime, timedelta, timezone
|
|
22
|
+
from typing import Any, Awaitable, Callable, Union
|
|
23
|
+
|
|
24
|
+
import httpx
|
|
25
|
+
from temporalio import activity
|
|
26
|
+
|
|
27
|
+
from durable_sync.core import Record, SourceSpec
|
|
28
|
+
from durable_sync.connectors import content
|
|
29
|
+
from durable_sync.connectors.contentful import api
|
|
30
|
+
from durable_sync.connectors.contentful.api import ContentfulSpace
|
|
31
|
+
|
|
32
|
+
log = logging.getLogger("durable_sync.connectors.contentful")
|
|
33
|
+
|
|
34
|
+
EnrichHook = Callable[[Record, "ContentfulEntryContext"], Union[Record, Awaitable[Record]]]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass
|
|
38
|
+
class ContentfulConfig:
|
|
39
|
+
"""Everything Contentful-specific a deployment supplies. `content_types` maps
|
|
40
|
+
content-type id -> the "Type" label its entries carry (and is the allowlist of
|
|
41
|
+
what gets fetched). `url_prefixes` maps content-type id -> a public URL prefix
|
|
42
|
+
the entry slug is appended to."""
|
|
43
|
+
space_id: str
|
|
44
|
+
content_types: dict[str, str] # {ct_id: type_label}
|
|
45
|
+
url_prefixes: dict[str, str] = field(default_factory=dict) # {ct_id: url_prefix}
|
|
46
|
+
environment: str = "master"
|
|
47
|
+
default_locale: str = "en-US"
|
|
48
|
+
delivery_token_env: str = "CONTENTFUL_DELIVERY_TOKEN" # CDA, preferred
|
|
49
|
+
cma_token_env: str = "CONTENTFUL_CMA_TOKEN" # CMA PAT, fallback
|
|
50
|
+
lookback_days: int = 21
|
|
51
|
+
interval_minutes: int = 360
|
|
52
|
+
title_property: str = "Name"
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class ContentfulEntryContext:
|
|
57
|
+
"""Handed to the enrich hook: the raw (flattened) entry, its resolved authors
|
|
58
|
+
({name, email}), the content type, and the live client."""
|
|
59
|
+
raw_entry: dict
|
|
60
|
+
authors: list[dict]
|
|
61
|
+
content_type: str
|
|
62
|
+
client: httpx.AsyncClient
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _heartbeat(detail: str) -> None:
|
|
66
|
+
if activity.in_activity():
|
|
67
|
+
activity.heartbeat(detail)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class ContentfulSource:
|
|
71
|
+
name = "contentful"
|
|
72
|
+
|
|
73
|
+
def __init__(self, config: ContentfulConfig, *, enrich: EnrichHook | None = None):
|
|
74
|
+
self._config = config
|
|
75
|
+
self._enrich = enrich
|
|
76
|
+
|
|
77
|
+
def specs(self) -> list[SourceSpec]:
|
|
78
|
+
# One spec (=> one entity workflow) per content type, so each type syncs +
|
|
79
|
+
# retries independently.
|
|
80
|
+
cfg = self._config
|
|
81
|
+
return [
|
|
82
|
+
SourceSpec(key=f"type:{ct_id}", interval_minutes=cfg.interval_minutes,
|
|
83
|
+
params={"content_type": ct_id, "item_type": label})
|
|
84
|
+
for ct_id, label in cfg.content_types.items()
|
|
85
|
+
]
|
|
86
|
+
|
|
87
|
+
def _space(self) -> ContentfulSpace:
|
|
88
|
+
cfg = self._config
|
|
89
|
+
return ContentfulSpace(
|
|
90
|
+
space_id=cfg.space_id,
|
|
91
|
+
environment=cfg.environment,
|
|
92
|
+
default_locale=cfg.default_locale,
|
|
93
|
+
delivery_token=os.environ.get(cfg.delivery_token_env, ""),
|
|
94
|
+
cma_token=os.environ.get(cfg.cma_token_env, ""),
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
async def fetch_page(
|
|
98
|
+
self, spec: SourceSpec, only_items: list[str] | None, cursor: str | None
|
|
99
|
+
) -> tuple[list[Record], str | None]:
|
|
100
|
+
"""ONE page of entries + next cursor (None on the last page). The cursor
|
|
101
|
+
carries the frozen window start + the CDA/CMA `skip` offset. A targeted
|
|
102
|
+
(`only_items`) refresh filters each page to the named ids but still walks
|
|
103
|
+
the window (Contentful has no cheap by-id batch here), as before."""
|
|
104
|
+
cfg = self._config
|
|
105
|
+
content_type = spec.params["content_type"]
|
|
106
|
+
item_type = spec.params.get("item_type") or cfg.content_types.get(content_type, content_type)
|
|
107
|
+
space = self._space()
|
|
108
|
+
targeted = set(only_items or [])
|
|
109
|
+
|
|
110
|
+
if cursor is None:
|
|
111
|
+
after_iso = (datetime.now(timezone.utc) - timedelta(days=cfg.lookback_days)).isoformat()
|
|
112
|
+
skip = 0
|
|
113
|
+
else:
|
|
114
|
+
c = content.unpack_cursor(cursor)
|
|
115
|
+
after_iso, skip = c["after"], c["skip"]
|
|
116
|
+
|
|
117
|
+
async with httpx.AsyncClient(timeout=30) as client:
|
|
118
|
+
pairs, next_skip = await api.iter_entries_page(
|
|
119
|
+
client, space, content_type, after_iso, skip=skip)
|
|
120
|
+
out: list[Record] = []
|
|
121
|
+
for entry, authors in pairs:
|
|
122
|
+
source_id = entry.get("sys", {}).get("id", "")
|
|
123
|
+
if targeted and source_id not in targeted:
|
|
124
|
+
continue
|
|
125
|
+
if not _has_title(entry):
|
|
126
|
+
continue # empty-shell draft, no title yet -> not a real item
|
|
127
|
+
record = self._to_record(entry, item_type, authors)
|
|
128
|
+
if self._enrich is not None:
|
|
129
|
+
ctx = ContentfulEntryContext(raw_entry=entry, authors=authors,
|
|
130
|
+
content_type=content_type, client=client)
|
|
131
|
+
result = self._enrich(record, ctx)
|
|
132
|
+
record = await result if inspect.isawaitable(result) else result
|
|
133
|
+
out.append(record)
|
|
134
|
+
_heartbeat(source_id)
|
|
135
|
+
|
|
136
|
+
next_cursor = content.pack_cursor(after=after_iso, skip=next_skip) if next_skip is not None else None
|
|
137
|
+
log.info("Fetched %d Contentful %s entries for %s (skip=%s)", len(out), content_type, spec.key, skip)
|
|
138
|
+
return out, next_cursor
|
|
139
|
+
|
|
140
|
+
async def fetch(self, spec: SourceSpec, only_items: list[str] | None = None) -> list[Record]:
|
|
141
|
+
"""Whole window as one list — drains fetch_page (standalone/non-Temporal)."""
|
|
142
|
+
records: list[Record] = []
|
|
143
|
+
cursor: str | None = None
|
|
144
|
+
while True:
|
|
145
|
+
page, cursor = await self.fetch_page(spec, only_items, cursor)
|
|
146
|
+
records.extend(page)
|
|
147
|
+
if cursor is None:
|
|
148
|
+
return records
|
|
149
|
+
|
|
150
|
+
def _to_record(self, entry: dict, item_type: str, authors: list[dict]) -> Record:
|
|
151
|
+
"""Map one Contentful entry (+ resolved authors) to a neutral Record. Pure."""
|
|
152
|
+
cfg = self._config
|
|
153
|
+
sys = entry.get("sys", {})
|
|
154
|
+
fields = entry.get("fields", {})
|
|
155
|
+
|
|
156
|
+
source_id = sys.get("id", "")
|
|
157
|
+
name = fields.get("title") or fields.get("name") or "(untitled entry)"
|
|
158
|
+
# Date = explicit publish-date field if present, else createdAt.
|
|
159
|
+
item_date = fields.get("publishDate") or fields.get("date") or sys.get("createdAt")
|
|
160
|
+
status = "Published" if entry.get("_published", True) else "Draft"
|
|
161
|
+
|
|
162
|
+
slug = fields.get("slug")
|
|
163
|
+
ct_id = sys.get("contentType", {}).get("sys", {}).get("id", "")
|
|
164
|
+
prefix = cfg.url_prefixes.get(ct_id) if slug else None
|
|
165
|
+
url = f"{prefix}{slug}" if prefix else None
|
|
166
|
+
|
|
167
|
+
host_names = [a["name"] for a in authors if a.get("name")]
|
|
168
|
+
# authorOverwriteText (a community author with no `person`) wins for the
|
|
169
|
+
# human-readable label; resolved names still drive any author matching.
|
|
170
|
+
author = fields.get("authorOverwriteText") or ", ".join(host_names)
|
|
171
|
+
tags = [t for t in (fields.get("tags") or []) if isinstance(t, str)]
|
|
172
|
+
|
|
173
|
+
return content.content_record(
|
|
174
|
+
primary_key=source_id,
|
|
175
|
+
title_property=cfg.title_property,
|
|
176
|
+
title=str(name),
|
|
177
|
+
item_type=item_type,
|
|
178
|
+
source="Contentful",
|
|
179
|
+
url=url,
|
|
180
|
+
date=item_date,
|
|
181
|
+
status=status,
|
|
182
|
+
author=str(author),
|
|
183
|
+
authors=host_names,
|
|
184
|
+
extra={"Tags": tags},
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _has_title(entry: dict[str, Any]) -> bool:
|
|
189
|
+
"""True if the entry has a real title/name (titled drafts count; blank ones
|
|
190
|
+
don't). Keeps us from writing '(untitled entry)' placeholder rows. Pure."""
|
|
191
|
+
fields = entry.get("fields", {})
|
|
192
|
+
return bool(fields.get("title") or fields.get("name"))
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Launch OAuthTokenWorkflow from the saved Contentful bootstrap credentials.
|
|
2
|
+
|
|
3
|
+
PYTHONPATH=. python -m durable_sync.connectors.contentful.start
|
|
4
|
+
|
|
5
|
+
Reads the bootstrap creds, starts the long-running auth workflow that owns the
|
|
6
|
+
rotating refresh token, and serves fresh access tokens via query. After this, a
|
|
7
|
+
worker hosting ContentfulMcpDestination keeps tokens fresh unattended.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
|
|
13
|
+
from durable_sync import config
|
|
14
|
+
from durable_sync.auth.oauth.workflow import AuthParams, OAuthTokenWorkflow
|
|
15
|
+
from durable_sync.connectors.contentful import store
|
|
16
|
+
from durable_sync.temporal_client import connect
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def main() -> None:
|
|
20
|
+
creds = store.load()
|
|
21
|
+
if not creds:
|
|
22
|
+
raise SystemExit(
|
|
23
|
+
f"No credentials at {store.path()}. Run the bootstrap first:\n"
|
|
24
|
+
f" PYTHONPATH=. python -m durable_sync.connectors.contentful.bootstrap"
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
client = await connect()
|
|
28
|
+
handle = await client.start_workflow(
|
|
29
|
+
OAuthTokenWorkflow.run,
|
|
30
|
+
AuthParams(
|
|
31
|
+
client_id=creds["client_id"],
|
|
32
|
+
token_endpoint=creds["token_endpoint"],
|
|
33
|
+
refresh_token=creds["refresh_token"],
|
|
34
|
+
),
|
|
35
|
+
id=config.CONTENTFUL_AUTH_WORKFLOW_ID,
|
|
36
|
+
task_queue=config.TASK_QUEUE,
|
|
37
|
+
)
|
|
38
|
+
print(
|
|
39
|
+
f"Started OAuthTokenWorkflow (id={handle.id}). It now owns the Contentful "
|
|
40
|
+
f"refresh token and keeps access tokens fresh.\n"
|
|
41
|
+
f"Verify: temporal workflow query --workflow-id {handle.id} --type get_access_token"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
if __name__ == "__main__":
|
|
46
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Contentful binding of the generic creds store (durable_sync.auth.oauth.store).
|
|
2
|
+
|
|
3
|
+
Pins Contentful's auth file path; bootstrap/prove call load()/save()/path().
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from durable_sync.auth.oauth import store as _store
|
|
12
|
+
|
|
13
|
+
_FILE = os.getenv("DURABLE_SYNC_CONTENTFUL_AUTH_FILE", ".contentful_auth.json")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def load() -> dict[str, Any] | None:
|
|
17
|
+
return _store.load(_FILE)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def save(data: dict[str, Any]) -> None:
|
|
21
|
+
_store.save(_FILE, data)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def path() -> Path:
|
|
25
|
+
return _store.resolve(_FILE)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Contentful binding of the generic token accessor (durable_sync.auth.oauth.token).
|
|
2
|
+
|
|
3
|
+
The default token_provider for ContentfulMcpDestination: query the OAuthTokenWorkflow
|
|
4
|
+
running under config.CONTENTFUL_AUTH_WORKFLOW_ID for a fresh access token.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from durable_sync import config
|
|
9
|
+
from durable_sync.auth.oauth.token import current_access_token as _current
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def current_access_token() -> str:
|
|
13
|
+
return await _current(config.CONTENTFUL_AUTH_WORKFLOW_ID)
|