durable-sync 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. durable_sync/__init__.py +26 -0
  2. durable_sync/activities.py +156 -0
  3. durable_sync/auth/__init__.py +8 -0
  4. durable_sync/auth/oauth/__init__.py +18 -0
  5. durable_sync/auth/oauth/flow.py +183 -0
  6. durable_sync/auth/oauth/refresh.py +58 -0
  7. durable_sync/auth/oauth/store.py +36 -0
  8. durable_sync/auth/oauth/token.py +36 -0
  9. durable_sync/auth/oauth/workflow.py +172 -0
  10. durable_sync/bootstrap.py +44 -0
  11. durable_sync/codec.py +80 -0
  12. durable_sync/config.py +35 -0
  13. durable_sync/connectors/__init__.py +14 -0
  14. durable_sync/connectors/asana/__init__.py +13 -0
  15. durable_sync/connectors/asana/destination.py +213 -0
  16. durable_sync/connectors/content.py +80 -0
  17. durable_sync/connectors/contentful/__init__.py +25 -0
  18. durable_sync/connectors/contentful/api.py +285 -0
  19. durable_sync/connectors/contentful/bootstrap.py +102 -0
  20. durable_sync/connectors/contentful/describe.py +61 -0
  21. durable_sync/connectors/contentful/destination.py +145 -0
  22. durable_sync/connectors/contentful/encode.py +49 -0
  23. durable_sync/connectors/contentful/introspect.py +69 -0
  24. durable_sync/connectors/contentful/mcp.py +95 -0
  25. durable_sync/connectors/contentful/mcp_destination.py +137 -0
  26. durable_sync/connectors/contentful/oauth.py +27 -0
  27. durable_sync/connectors/contentful/prove.py +51 -0
  28. durable_sync/connectors/contentful/source.py +192 -0
  29. durable_sync/connectors/contentful/start.py +46 -0
  30. durable_sync/connectors/contentful/store.py +25 -0
  31. durable_sync/connectors/contentful/token.py +13 -0
  32. durable_sync/connectors/contentful/token_check.py +42 -0
  33. durable_sync/connectors/github/__init__.py +33 -0
  34. durable_sync/connectors/github/api.py +169 -0
  35. durable_sync/connectors/github/source.py +230 -0
  36. durable_sync/connectors/luma/__init__.py +20 -0
  37. durable_sync/connectors/luma/api.py +121 -0
  38. durable_sync/connectors/luma/destination.py +128 -0
  39. durable_sync/connectors/luma/source.py +155 -0
  40. durable_sync/connectors/multi.py +78 -0
  41. durable_sync/connectors/notion/__init__.py +20 -0
  42. durable_sync/connectors/notion/bootstrap.py +97 -0
  43. durable_sync/connectors/notion/client.py +133 -0
  44. durable_sync/connectors/notion/destination.py +270 -0
  45. durable_sync/connectors/notion/oauth.py +25 -0
  46. durable_sync/connectors/notion/prove.py +57 -0
  47. durable_sync/connectors/notion/source.py +136 -0
  48. durable_sync/connectors/notion/start.py +46 -0
  49. durable_sync/connectors/notion/store.py +25 -0
  50. durable_sync/connectors/notion/token.py +13 -0
  51. durable_sync/connectors/youtube/__init__.py +13 -0
  52. durable_sync/connectors/youtube/api.py +122 -0
  53. durable_sync/connectors/youtube/source.py +152 -0
  54. durable_sync/core.py +210 -0
  55. durable_sync/env.py +55 -0
  56. durable_sync/http.py +71 -0
  57. durable_sync/linkstore.py +88 -0
  58. durable_sync/route.py +86 -0
  59. durable_sync/temporal_client.py +48 -0
  60. durable_sync/transport/__init__.py +12 -0
  61. durable_sync/transport/mcp.py +77 -0
  62. durable_sync/worker.py +109 -0
  63. durable_sync/workflows/__init__.py +9 -0
  64. durable_sync/workflows/sync.py +208 -0
  65. durable_sync-0.1.0.dist-info/METADATA +310 -0
  66. durable_sync-0.1.0.dist-info/RECORD +69 -0
  67. durable_sync-0.1.0.dist-info/WHEEL +5 -0
  68. durable_sync-0.1.0.dist-info/licenses/LICENSE +21 -0
  69. durable_sync-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,270 @@
1
+ """Reference Destination: Notion via the hosted MCP server.
2
+
3
+ Merges the two lineages:
4
+ * clean neutral-Record encoding + paginated idempotent upsert (ex-devrel-demos),
5
+ * Bearer-token transport (NO MCP SDK OAuthClientProvider), 429 backoff, and
6
+ inter-write pacing (ex-devrel-ships).
7
+
8
+ Auth: the access token comes from `token_provider` (an async () -> str). The
9
+ default queries OAuthTokenWorkflow, which owns the rotating refresh token; the
10
+ token never enters event history. We pass it as a plain `Authorization: Bearer`
11
+ header to the streamable-HTTP transport.
12
+
13
+ Property encoding quirks (live-server facts): dates expand to
14
+ `date:{prop}:start` (+ `:is_datetime`); multi-selects are JSON arrays (options
15
+ must pre-exist); checkboxes are `__YES__`/`__NO__`; a property literally named
16
+ `id`/`url` must be addressed `userDefined:{name}` (declare via
17
+ `user_defined_properties`).
18
+
19
+ Requires the `notion` extra: pip install "durable-sync[notion]"
20
+ """
21
+ from __future__ import annotations
22
+
23
+ import asyncio
24
+ import datetime as dt
25
+ import json
26
+ from contextlib import asynccontextmanager
27
+ from typing import Any, Awaitable, Callable, AsyncIterator
28
+
29
+ from mcp.client.session import ClientSession
30
+
31
+ from durable_sync.core import Record, auth_error_in_chain
32
+ from durable_sync.connectors.notion import client as mcp
33
+ from durable_sync.connectors.notion.client import NotionMCP, TokenProvider
34
+ from durable_sync.connectors.notion.token import current_access_token
35
+
36
+ _MAX_BODY = 50000 # cap page body length to keep create snappy
37
+ _MAX_TEXT = 2000 # Notion's hard per-rich_text limit; a longer value is
38
+ # rejected by the API and the whole record silently
39
+ # fails every sync, so the destination truncates.
40
+
41
+ # Optional hooks (app-supplied), kept out of the generic core:
42
+ # TokenProvider is imported from client.py (shared with the source).
43
+ # Runs inside the open MCP session before each write — for DESTINATION-SIDE
44
+ # enrichment that must read Notion (e.g. resolving author handles to a relation).
45
+ # Gets the live session + the record; returns the (possibly mutated) record.
46
+ SessionEnrich = Callable[[ClientSession, Record], Awaitable[Record]]
47
+ # Maps a record to a page icon (emoji or URL), or None. Keeps Notion's icon
48
+ # concept off the neutral Record.
49
+ IconFor = Callable[[Record], "str | None"]
50
+
51
+
52
+ class NotionDestination:
53
+ """Notion-MCP Destination. Configure with the target data source id and which
54
+ property is the title / idempotency key / sync heartbeat."""
55
+
56
+ name = "notion"
57
+
58
+ def __init__(
59
+ self,
60
+ data_source_id: str,
61
+ *,
62
+ title_property: str = "Name",
63
+ key_property: str = "Repo ID",
64
+ synced_property: str | None = "Last synced",
65
+ date_properties: set[str] | None = None,
66
+ create_only_properties: set[str] | None = None,
67
+ user_defined_properties: set[str] | None = None,
68
+ token_provider: TokenProvider | None = None,
69
+ session_enrich: SessionEnrich | None = None,
70
+ icon_for: IconFor | None = None,
71
+ pacing_seconds: float = 0.3,
72
+ resolve_data_source: bool = True,
73
+ ):
74
+ # `data_source_id` may be a data source id OR a database id/URL — with
75
+ # resolve_data_source on (default) the latter is resolved automatically.
76
+ self.data_source_id = data_source_id
77
+ self._resolve_ds = resolve_data_source
78
+ self.title_property = title_property
79
+ self.key_property = key_property
80
+ self.synced_property = synced_property
81
+ self.date_properties = date_properties or set()
82
+ # Written only on CREATE (enrichment seeds): objective fields refresh every
83
+ # run, but these are seeded once so human edits stick.
84
+ self.create_only_properties = create_only_properties or set()
85
+ # Property names that must be addressed as `userDefined:{name}` (Notion
86
+ # reserves bare `id`/`url`). Ours deliberately avoid those, but a BYO
87
+ # schema may need e.g. {"URL"}.
88
+ self.user_defined_properties = user_defined_properties or set()
89
+ self._token_provider = token_provider or current_access_token
90
+ self._session_enrich = session_enrich
91
+ self._icon_for = icon_for
92
+ self.pacing_seconds = pacing_seconds
93
+
94
+ @property
95
+ def configured(self) -> bool:
96
+ return bool(self.data_source_id)
97
+
98
+ @property
99
+ def config_hint(self) -> str:
100
+ return "NOTION_DATA_SOURCE_ID unset"
101
+
102
+ @asynccontextmanager
103
+ async def connect(self) -> AsyncIterator["_NotionSession"]:
104
+ async with mcp.open_session(self._token_provider) as session:
105
+ ds = self.data_source_id
106
+ if self._resolve_ds:
107
+ ds = await mcp.resolve_data_source_id(session, ds)
108
+ yield _NotionSession(session, self, data_source_id=ds)
109
+
110
+ # The worker auto-registers these so the token-owner workflow runs alongside
111
+ # the sync. (Optional hook; destinations without aux work omit it.)
112
+ def aux_workflows(self) -> list:
113
+ from durable_sync.auth.oauth.workflow import OAuthTokenWorkflow
114
+ return [OAuthTokenWorkflow]
115
+
116
+ def aux_activities(self) -> list:
117
+ from durable_sync.auth.oauth.refresh import refresh_oauth_token
118
+ return [refresh_oauth_token]
119
+
120
+ @staticmethod
121
+ def is_auth_error(err: BaseException) -> bool:
122
+ """A rejected Bearer token / broken refresh chain (revoked or expired) ->
123
+ re-bootstrap. The default signatures (401/403, unauthorized, forbidden,
124
+ invalid_token/grant) cover every Notion auth failure we've seen, so we
125
+ delegate to the shared, word-boundary-correct matcher in the spine."""
126
+ return auth_error_in_chain(err)
127
+
128
+
129
+ class _NotionSession:
130
+ """One open MCP connection. Implements the DestinationSession protocol."""
131
+
132
+ def __init__(self, session: NotionMCP, destination: NotionDestination, *, data_source_id: str):
133
+ self._mcp = session
134
+ self._destination = destination
135
+ self._ds = data_source_id # already resolved (database id -> data source id)
136
+
137
+ async def call(self, name: str, arguments: dict[str, Any]) -> str:
138
+ return await self._mcp.call(name, arguments)
139
+
140
+ async def query_existing_ids(self) -> dict[str, str]:
141
+ """{ key-property value -> page id } for rows already in the DB.
142
+
143
+ Paginates LIMIT/OFFSET with ORDER BY the key property; unordered OFFSET
144
+ reshuffles under concurrent edits and skips rows -> duplicates, so the
145
+ ORDER BY is REQUIRED."""
146
+ ds = self._ds
147
+ key = self._destination.key_property
148
+ PAGE = 100
149
+ mapping: dict[str, str] = {}
150
+ offset = 0
151
+ while True:
152
+ sql = mcp.query_sql(ds, order_by=key, limit=PAGE, offset=offset)
153
+ raw = await self.call(
154
+ "notion-query-data-sources",
155
+ {"data": {"data_source_urls": [f"collection://{ds}"], "query": sql}},
156
+ )
157
+ rows = mcp.rows_from_result(raw)
158
+ for row in rows:
159
+ kval = str(row.get(key) or "").strip()
160
+ page_id = mcp.page_id_from_row(row)
161
+ if kval and page_id:
162
+ mapping[kval] = page_id
163
+ if len(rows) < PAGE:
164
+ break
165
+ offset += PAGE
166
+ return mapping
167
+
168
+ async def create(self, record: Record, synced_at: dt.datetime) -> bool:
169
+ record = await self._maybe_enrich(record)
170
+ if record is None:
171
+ return False # session_enrich dropped it (out of scope)
172
+ page: dict[str, Any] = {"properties": self._encode(record.properties, synced_at)}
173
+ if record.body:
174
+ page["content"] = record.body[:_MAX_BODY]
175
+ icon = self._icon(record)
176
+ if icon:
177
+ page["icon"] = icon
178
+ await self.call(
179
+ "notion-create-pages",
180
+ {"parent": {"data_source_id": self._ds}, "pages": [page]},
181
+ )
182
+ await self._pace()
183
+ return True
184
+
185
+ async def update(self, existing_id: str, record: Record, synced_at: dt.datetime) -> bool:
186
+ record = await self._maybe_enrich(record)
187
+ if record is None:
188
+ return False # session_enrich dropped it (out of scope)
189
+ # Skip create-only seeds (enrichment) so human edits to them survive;
190
+ # refresh the rest. Page body is written on create, not refreshed.
191
+ props = {
192
+ k: v for k, v in record.properties.items()
193
+ if k not in self._destination.create_only_properties
194
+ }
195
+ args: dict[str, Any] = {
196
+ "page_id": existing_id,
197
+ "command": "update_properties",
198
+ "properties": self._encode(props, synced_at),
199
+ }
200
+ icon = self._icon(record)
201
+ if icon:
202
+ args["icon"] = icon
203
+ await self.call("notion-update-page", args)
204
+ await self._pace()
205
+ return True
206
+
207
+ async def _maybe_enrich(self, record: Record) -> Record | None:
208
+ """Run the destination-side enrich hook (if any). It may return None to
209
+ DROP the record (an out-of-scope filter)."""
210
+ if self._destination._session_enrich is not None:
211
+ return await self._destination._session_enrich(self._mcp.session, record)
212
+ return record
213
+
214
+ def _icon(self, record: Record) -> str | None:
215
+ fn = self._destination._icon_for
216
+ return fn(record) if fn else None
217
+
218
+ async def _pace(self) -> None:
219
+ # Stay under Notion's MCP rate limit (~few req/s). Backoff handles the
220
+ # residual; this keeps us from hitting it in the first place.
221
+ if self._destination.pacing_seconds > 0:
222
+ await asyncio.sleep(self._destination.pacing_seconds)
223
+
224
+ def _encode(self, properties: dict[str, Any], synced_at: dt.datetime) -> dict[str, Any]:
225
+ """Neutral Python values -> Notion MCP wire format. bool is checked before
226
+ int because bool subclasses int."""
227
+ dest = self._destination
228
+ out: dict[str, Any] = {}
229
+ for name, val in properties.items():
230
+ if val is None:
231
+ continue
232
+ if name in dest.date_properties:
233
+ if val:
234
+ start, is_dt = _encode_date(val)
235
+ out[f"date:{name}:start"] = start
236
+ out[f"date:{name}:is_datetime"] = is_dt
237
+ elif isinstance(val, bool):
238
+ out[_key(name, dest)] = "__YES__" if val else "__NO__"
239
+ elif isinstance(val, (int, float)):
240
+ out[_key(name, dest)] = val
241
+ elif isinstance(val, (list, tuple)):
242
+ if val: # multi-selects are JSON arrays; options must pre-exist
243
+ out[_key(name, dest)] = json.dumps(list(val))
244
+ else:
245
+ # Notion rejects any rich_text/title over 2000 chars; a long
246
+ # value (e.g. a verbose repo description) would 400 and the record
247
+ # would silently fail to sync every run. Truncate as a backstop —
248
+ # the destination owns wire limits (per the core contract).
249
+ out[_key(name, dest)] = str(val)[:_MAX_TEXT]
250
+ # Sync heartbeat: "Last synced" is a DATE column -> stamp the UTC date.
251
+ if dest.synced_property:
252
+ out[f"date:{dest.synced_property}:start"] = synced_at.date().isoformat()
253
+ out[f"date:{dest.synced_property}:is_datetime"] = 0
254
+ return out
255
+
256
+
257
+ def _key(name: str, dest: NotionDestination) -> str:
258
+ """Prefix props that collide with Notion's reserved id/url addressing."""
259
+ return f"userDefined:{name}" if name in dest.user_defined_properties else name
260
+
261
+
262
+ def _encode_date(val: Any) -> tuple[str, int]:
263
+ """Return (start-string, is_datetime). A datetime, or an ISO string with a
264
+ 'T', carries time -> is_datetime=1; a plain date -> 0."""
265
+ if isinstance(val, dt.datetime):
266
+ return val.isoformat(), 1
267
+ if isinstance(val, dt.date):
268
+ return val.isoformat(), 0
269
+ s = str(val)
270
+ return s, (1 if "T" in s else 0)
@@ -0,0 +1,25 @@
1
+ """Notion binding of the generic OAuth client (durable_sync.auth.oauth).
2
+
3
+ The OAuth 2.1 + PKCE + DCR flow is provider-agnostic and lives in auth/oauth/flow.py;
4
+ this module just pins Notion's hosted MCP server and re-exports the flow so the
5
+ Notion bootstrap/prove/destination can keep importing `oauth.*`.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from durable_sync.auth.oauth.flow import ( # noqa: F401 (re-exported for callers)
10
+ build_authorize_url,
11
+ exchange_code,
12
+ gen_pkce,
13
+ new_state,
14
+ refresh_access_token,
15
+ register_client,
16
+ )
17
+ from durable_sync.auth.oauth import flow as _generic
18
+
19
+ MCP_BASE = "https://mcp.notion.com"
20
+ MCP_ENDPOINT = f"{MCP_BASE}/mcp" # Streamable HTTP transport
21
+
22
+
23
+ def discover() -> dict[str, str]:
24
+ """Discover Notion's OAuth endpoints (generic flow, Notion base URL)."""
25
+ return _generic.discover(MCP_BASE)
@@ -0,0 +1,57 @@
1
+ """Headless proof: NO browser. Loads the saved refresh token, mints a fresh
2
+ access token, and uses it to actually talk to the Notion MCP server.
3
+
4
+ PYTHONPATH=. python -m durable_sync.connectors.notion.prove
5
+
6
+ The de-risking step for the whole architecture: if this works, the Temporal
7
+ auth workflow can do exactly the same on a timer with no human present.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import asyncio
12
+
13
+ from mcp import ClientSession
14
+ from mcp.client.streamable_http import streamablehttp_client
15
+
16
+ from durable_sync.connectors.notion import oauth, store
17
+
18
+
19
+ async def _call_mcp(access_token: str) -> list[str]:
20
+ """Connect to Notion MCP with the access token; return tool names. list_tools()
21
+ succeeding proves the token authenticated the session."""
22
+ headers = {"Authorization": f"Bearer {access_token}"}
23
+ async with streamablehttp_client(oauth.MCP_ENDPOINT, headers=headers) as (read, write, _):
24
+ async with ClientSession(read, write) as session:
25
+ await session.initialize()
26
+ tools = await session.list_tools()
27
+ return [t.name for t in tools.tools]
28
+
29
+
30
+ def main() -> None:
31
+ creds = store.load()
32
+ if not creds:
33
+ raise SystemExit(
34
+ f"No credentials at {store.path()}. Run the bootstrap first:\n"
35
+ f" PYTHONPATH=. python -m durable_sync.connectors.notion.bootstrap"
36
+ )
37
+
38
+ print("Refreshing access token (headless, no browser)...")
39
+ tokens = oauth.refresh_access_token(
40
+ creds["token_endpoint"], creds["client_id"], creds["refresh_token"]
41
+ )
42
+ # Notion ROTATES the refresh token on every use — persist the new one now,
43
+ # atomically, or the next run fails with invalid_grant.
44
+ creds["refresh_token"] = tokens["refresh_token"]
45
+ store.save(creds)
46
+ print(f" Got access token (expires in {tokens.get('expires_in')}s). Rotated refresh token persisted.")
47
+
48
+ print("Calling Notion MCP with the minted access token...")
49
+ tool_names = asyncio.run(_call_mcp(tokens["access_token"]))
50
+ print(f"\nSUCCESS — authenticated headlessly. MCP exposed {len(tool_names)} tools:")
51
+ for name in tool_names:
52
+ print(f" - {name}")
53
+ print("\nHeadless auth proven — the Temporal auth workflow can run this unattended.")
54
+
55
+
56
+ if __name__ == "__main__":
57
+ main()
@@ -0,0 +1,136 @@
1
+ """NotionSource — read rows from a Notion data source -> Records.
2
+
3
+ The read half of the Notion connector; shares the MCP client + OAuth with
4
+ NotionDestination (see client.py), which is the whole reason connectors are
5
+ grouped by system. Each row becomes a Record keyed on its Notion page id — the
6
+ immutable, sync-safe id when Notion is the system of record.
7
+
8
+ Column values come back as the query renders them (text); for precise typing or
9
+ to pull page body content, use the `enrich` hook — it gets the raw row plus the
10
+ live MCP session for extra calls. Requires the `notion` extra.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import inspect
15
+ import logging
16
+ from dataclasses import dataclass
17
+ from typing import Awaitable, Callable, Union
18
+
19
+ from temporalio import activity
20
+
21
+ from durable_sync.core import Record, SourceSpec
22
+ from durable_sync.connectors.notion import client as mcp
23
+ from durable_sync.connectors.notion.client import NotionMCP, TokenProvider
24
+ from durable_sync.connectors.notion.token import current_access_token
25
+
26
+ log = logging.getLogger("durable_sync.connectors.notion.source")
27
+
28
+ EnrichHook = Callable[[Record, "NotionRowContext"], Union[Record, Awaitable[Record]]]
29
+
30
+ _PAGE = 100
31
+
32
+
33
+ @dataclass
34
+ class NotionRowContext:
35
+ """Handed to the enrich hook: the raw queried row + the live MCP session, so
36
+ enrich can type-coerce columns or fetch page content without re-connecting."""
37
+ raw_row: dict
38
+ session: NotionMCP
39
+
40
+
41
+ def _heartbeat(detail: str) -> None:
42
+ if activity.in_activity():
43
+ activity.heartbeat(detail)
44
+
45
+
46
+ class NotionSource:
47
+ name = "notion"
48
+
49
+ def __init__(
50
+ self,
51
+ data_source_id: str,
52
+ *,
53
+ order_property: str | None = None,
54
+ interval_minutes: int = 30,
55
+ token_provider: TokenProvider | None = None,
56
+ enrich: EnrichHook | None = None,
57
+ resolve_data_source: bool = True,
58
+ decode: bool = True,
59
+ ):
60
+ # `data_source_id` may be a data source id OR a database id/URL — with
61
+ # resolve_data_source on (default) the latter is resolved automatically.
62
+ self.data_source_id = data_source_id
63
+ # Pagination is LIMIT/OFFSET; ordering by a STABLE column keeps pages from
64
+ # reshuffling under concurrent edits (else a run can skip/dupe rows — self-
65
+ # corrects next run since the upsert is idempotent, but order if you can).
66
+ self.order_property = order_property
67
+ self.interval_minutes = interval_minutes
68
+ self._token_provider = token_provider or current_access_token
69
+ self._enrich = enrich
70
+ self._resolve_ds = resolve_data_source
71
+ self._decode = decode
72
+ self._resolved_ds: str | None = None
73
+
74
+ def specs(self) -> list[SourceSpec]:
75
+ return [SourceSpec(key=f"ds:{self.data_source_id}", interval_minutes=self.interval_minutes,
76
+ params={"data_source_id": self.data_source_id})]
77
+
78
+ # The Notion OAuth token workflow must run alongside ANY route that touches
79
+ # Notion — source or destination. The worker registers a source's aux work too
80
+ # (and dedupes, so a Notion->Notion route registers it once).
81
+ def aux_workflows(self) -> list:
82
+ from durable_sync.auth.oauth.workflow import OAuthTokenWorkflow
83
+ return [OAuthTokenWorkflow]
84
+
85
+ def aux_activities(self) -> list:
86
+ from durable_sync.auth.oauth.refresh import refresh_oauth_token
87
+ return [refresh_oauth_token]
88
+
89
+ async def fetch(self, spec: SourceSpec, only_items: list[str] | None = None) -> list[Record]:
90
+ ds = spec.params.get("data_source_id", self.data_source_id)
91
+ targeted = set(only_items or []) # page ids for a targeted refresh
92
+ out: list[Record] = []
93
+ async with mcp.open_session(self._token_provider) as session:
94
+ if self._resolve_ds:
95
+ if self._resolved_ds is None:
96
+ self._resolved_ds = await mcp.resolve_data_source_id(session, ds)
97
+ if self._resolved_ds != ds:
98
+ log.info("Resolved database %s -> data source %s", ds, self._resolved_ds)
99
+ ds = self._resolved_ds
100
+ offset = 0
101
+ while True:
102
+ sql = mcp.query_sql(ds, order_by=self.order_property, limit=_PAGE, offset=offset)
103
+ raw = await session.call(
104
+ "notion-query-data-sources",
105
+ {"data": {"data_source_urls": [f"collection://{ds}"], "query": sql}},
106
+ )
107
+ rows = mcp.rows_from_result(raw)
108
+ for row in rows:
109
+ record = self._to_record(row)
110
+ if record is None:
111
+ continue
112
+ if targeted and record.primary_key not in targeted:
113
+ continue
114
+ if self._enrich is not None:
115
+ ctx = NotionRowContext(raw_row=row, session=session)
116
+ result = self._enrich(record, ctx)
117
+ record = await result if inspect.isawaitable(result) else result
118
+ out.append(record)
119
+ _heartbeat(record.primary_key)
120
+ if len(rows) < _PAGE:
121
+ break
122
+ offset += _PAGE
123
+ log.info("Fetched %d Notion rows for %s", len(out), spec.key)
124
+ return out
125
+
126
+ def _to_record(self, row: dict) -> Record | None:
127
+ """Map one queried Notion row to a neutral Record. Pure (no IO). Returns
128
+ None for a row with no resolvable page id — it can't be keyed idempotently
129
+ (primary_key must be the immutable page id, never a column value)."""
130
+ page_id = mcp.page_id_from_row(row)
131
+ if not page_id:
132
+ return None
133
+ columns = mcp.row_columns(row)
134
+ if self._decode:
135
+ columns = mcp.decode_row(columns)
136
+ return Record(primary_key=page_id, properties=columns)
@@ -0,0 +1,46 @@
1
+ """Launch OAuthTokenWorkflow from the credentials bootstrap saved.
2
+
3
+ PYTHONPATH=. python -m durable_sync.connectors.notion.start
4
+
5
+ Reads the bootstrap creds, starts the single long-running auth workflow, and
6
+ hands ownership of the refresh token to it. After this, the worker keeps access
7
+ tokens fresh unattended; the local file is no longer the source of truth.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import asyncio
12
+
13
+ from durable_sync import config
14
+ from durable_sync.auth.oauth.workflow import AuthParams, OAuthTokenWorkflow
15
+ from durable_sync.connectors.notion import store
16
+ from durable_sync.temporal_client import connect
17
+
18
+
19
+ async def main() -> None:
20
+ creds = store.load()
21
+ if not creds:
22
+ raise SystemExit(
23
+ f"No credentials at {store.path()}. Run the bootstrap first:\n"
24
+ f" PYTHONPATH=. python -m durable_sync.connectors.notion.bootstrap"
25
+ )
26
+
27
+ client = await connect()
28
+ handle = await client.start_workflow(
29
+ OAuthTokenWorkflow.run,
30
+ AuthParams(
31
+ client_id=creds["client_id"],
32
+ token_endpoint=creds["token_endpoint"],
33
+ refresh_token=creds["refresh_token"],
34
+ ),
35
+ id=config.NOTION_AUTH_WORKFLOW_ID,
36
+ task_queue=config.TASK_QUEUE,
37
+ )
38
+ print(
39
+ f"Started OAuthTokenWorkflow (id={handle.id}). It now owns the refresh "
40
+ f"token and keeps access tokens fresh.\n"
41
+ f"Verify: temporal workflow query --workflow-id {handle.id} --type get_access_token"
42
+ )
43
+
44
+
45
+ if __name__ == "__main__":
46
+ asyncio.run(main())
@@ -0,0 +1,25 @@
1
+ """Notion binding of the generic creds store (durable_sync.auth.store).
2
+
3
+ Pins Notion's auth file path; bootstrap/prove/start call load()/save()/path().
4
+ """
5
+ from __future__ import annotations
6
+
7
+ import os
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+ from durable_sync.auth.oauth import store as _store
12
+
13
+ _FILE = os.getenv("DURABLE_SYNC_NOTION_AUTH_FILE", ".notion_auth.json")
14
+
15
+
16
+ def load() -> dict[str, Any] | None:
17
+ return _store.load(_FILE)
18
+
19
+
20
+ def save(data: dict[str, Any]) -> None:
21
+ _store.save(_FILE, data)
22
+
23
+
24
+ def path() -> Path:
25
+ return _store.resolve(_FILE)
@@ -0,0 +1,13 @@
1
+ """Notion binding of the generic token accessor (durable_sync.auth.oauth.token).
2
+
3
+ The default token_provider for NotionDestination: query the OAuthTokenWorkflow
4
+ running under config.NOTION_AUTH_WORKFLOW_ID for a fresh access token.
5
+ """
6
+ from __future__ import annotations
7
+
8
+ from durable_sync import config
9
+ from durable_sync.auth.oauth.token import current_access_token as _current
10
+
11
+
12
+ async def current_access_token() -> str:
13
+ return await _current(config.NOTION_AUTH_WORKFLOW_ID)
@@ -0,0 +1,13 @@
1
+ """YouTube source: a channel's uploads -> Records.
2
+
3
+ YouTube exposes no per-video author, so attribution (if you need it) is an
4
+ app-side concern: the Record carries a "Scan Text" field and the enrich hook gets
5
+ a YouTubeVideoContext for inverted name-matching against your own directory of people.
6
+
7
+ Requires the `youtube` extra: pip install "durable-sync[youtube]"
8
+ """
9
+ from __future__ import annotations
10
+
11
+ from durable_sync.connectors.youtube.source import YouTubeConfig, YouTubeSource, YouTubeVideoContext
12
+
13
+ __all__ = ["YouTubeSource", "YouTubeConfig", "YouTubeVideoContext"]