durable-sync 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- durable_sync/__init__.py +26 -0
- durable_sync/activities.py +156 -0
- durable_sync/auth/__init__.py +8 -0
- durable_sync/auth/oauth/__init__.py +18 -0
- durable_sync/auth/oauth/flow.py +183 -0
- durable_sync/auth/oauth/refresh.py +58 -0
- durable_sync/auth/oauth/store.py +36 -0
- durable_sync/auth/oauth/token.py +36 -0
- durable_sync/auth/oauth/workflow.py +172 -0
- durable_sync/bootstrap.py +44 -0
- durable_sync/codec.py +80 -0
- durable_sync/config.py +35 -0
- durable_sync/connectors/__init__.py +14 -0
- durable_sync/connectors/asana/__init__.py +13 -0
- durable_sync/connectors/asana/destination.py +213 -0
- durable_sync/connectors/content.py +80 -0
- durable_sync/connectors/contentful/__init__.py +25 -0
- durable_sync/connectors/contentful/api.py +285 -0
- durable_sync/connectors/contentful/bootstrap.py +102 -0
- durable_sync/connectors/contentful/describe.py +61 -0
- durable_sync/connectors/contentful/destination.py +145 -0
- durable_sync/connectors/contentful/encode.py +49 -0
- durable_sync/connectors/contentful/introspect.py +69 -0
- durable_sync/connectors/contentful/mcp.py +95 -0
- durable_sync/connectors/contentful/mcp_destination.py +137 -0
- durable_sync/connectors/contentful/oauth.py +27 -0
- durable_sync/connectors/contentful/prove.py +51 -0
- durable_sync/connectors/contentful/source.py +192 -0
- durable_sync/connectors/contentful/start.py +46 -0
- durable_sync/connectors/contentful/store.py +25 -0
- durable_sync/connectors/contentful/token.py +13 -0
- durable_sync/connectors/contentful/token_check.py +42 -0
- durable_sync/connectors/github/__init__.py +33 -0
- durable_sync/connectors/github/api.py +169 -0
- durable_sync/connectors/github/source.py +230 -0
- durable_sync/connectors/luma/__init__.py +20 -0
- durable_sync/connectors/luma/api.py +121 -0
- durable_sync/connectors/luma/destination.py +128 -0
- durable_sync/connectors/luma/source.py +155 -0
- durable_sync/connectors/multi.py +78 -0
- durable_sync/connectors/notion/__init__.py +20 -0
- durable_sync/connectors/notion/bootstrap.py +97 -0
- durable_sync/connectors/notion/client.py +133 -0
- durable_sync/connectors/notion/destination.py +270 -0
- durable_sync/connectors/notion/oauth.py +25 -0
- durable_sync/connectors/notion/prove.py +57 -0
- durable_sync/connectors/notion/source.py +136 -0
- durable_sync/connectors/notion/start.py +46 -0
- durable_sync/connectors/notion/store.py +25 -0
- durable_sync/connectors/notion/token.py +13 -0
- durable_sync/connectors/youtube/__init__.py +13 -0
- durable_sync/connectors/youtube/api.py +122 -0
- durable_sync/connectors/youtube/source.py +152 -0
- durable_sync/core.py +210 -0
- durable_sync/env.py +55 -0
- durable_sync/http.py +71 -0
- durable_sync/linkstore.py +88 -0
- durable_sync/route.py +86 -0
- durable_sync/temporal_client.py +48 -0
- durable_sync/transport/__init__.py +12 -0
- durable_sync/transport/mcp.py +77 -0
- durable_sync/worker.py +109 -0
- durable_sync/workflows/__init__.py +9 -0
- durable_sync/workflows/sync.py +208 -0
- durable_sync-0.1.0.dist-info/METADATA +310 -0
- durable_sync-0.1.0.dist-info/RECORD +69 -0
- durable_sync-0.1.0.dist-info/WHEEL +5 -0
- durable_sync-0.1.0.dist-info/licenses/LICENSE +21 -0
- durable_sync-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
"""Reference Destination: Notion via the hosted MCP server.
|
|
2
|
+
|
|
3
|
+
Merges the two lineages:
|
|
4
|
+
* clean neutral-Record encoding + paginated idempotent upsert (ex-devrel-demos),
|
|
5
|
+
* Bearer-token transport (NO MCP SDK OAuthClientProvider), 429 backoff, and
|
|
6
|
+
inter-write pacing (ex-devrel-ships).
|
|
7
|
+
|
|
8
|
+
Auth: the access token comes from `token_provider` (an async () -> str). The
|
|
9
|
+
default queries OAuthTokenWorkflow, which owns the rotating refresh token; the
|
|
10
|
+
token never enters event history. We pass it as a plain `Authorization: Bearer`
|
|
11
|
+
header to the streamable-HTTP transport.
|
|
12
|
+
|
|
13
|
+
Property encoding quirks (live-server facts): dates expand to
|
|
14
|
+
`date:{prop}:start` (+ `:is_datetime`); multi-selects are JSON arrays (options
|
|
15
|
+
must pre-exist); checkboxes are `__YES__`/`__NO__`; a property literally named
|
|
16
|
+
`id`/`url` must be addressed `userDefined:{name}` (declare via
|
|
17
|
+
`user_defined_properties`).
|
|
18
|
+
|
|
19
|
+
Requires the `notion` extra: pip install "durable-sync[notion]"
|
|
20
|
+
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import asyncio
|
|
24
|
+
import datetime as dt
|
|
25
|
+
import json
|
|
26
|
+
from contextlib import asynccontextmanager
|
|
27
|
+
from typing import Any, Awaitable, Callable, AsyncIterator
|
|
28
|
+
|
|
29
|
+
from mcp.client.session import ClientSession
|
|
30
|
+
|
|
31
|
+
from durable_sync.core import Record, auth_error_in_chain
|
|
32
|
+
from durable_sync.connectors.notion import client as mcp
|
|
33
|
+
from durable_sync.connectors.notion.client import NotionMCP, TokenProvider
|
|
34
|
+
from durable_sync.connectors.notion.token import current_access_token
|
|
35
|
+
|
|
36
|
+
_MAX_BODY = 50000 # cap page body length to keep create snappy
|
|
37
|
+
_MAX_TEXT = 2000 # Notion's hard per-rich_text limit; a longer value is
|
|
38
|
+
# rejected by the API and the whole record silently
|
|
39
|
+
# fails every sync, so the destination truncates.
|
|
40
|
+
|
|
41
|
+
# Optional hooks (app-supplied), kept out of the generic core:
|
|
42
|
+
# TokenProvider is imported from client.py (shared with the source).
|
|
43
|
+
# Runs inside the open MCP session before each write — for DESTINATION-SIDE
|
|
44
|
+
# enrichment that must read Notion (e.g. resolving author handles to a relation).
|
|
45
|
+
# Gets the live session + the record; returns the (possibly mutated) record.
|
|
46
|
+
SessionEnrich = Callable[[ClientSession, Record], Awaitable[Record]]
|
|
47
|
+
# Maps a record to a page icon (emoji or URL), or None. Keeps Notion's icon
|
|
48
|
+
# concept off the neutral Record.
|
|
49
|
+
IconFor = Callable[[Record], "str | None"]
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class NotionDestination:
|
|
53
|
+
"""Notion-MCP Destination. Configure with the target data source id and which
|
|
54
|
+
property is the title / idempotency key / sync heartbeat."""
|
|
55
|
+
|
|
56
|
+
name = "notion"
|
|
57
|
+
|
|
58
|
+
def __init__(
|
|
59
|
+
self,
|
|
60
|
+
data_source_id: str,
|
|
61
|
+
*,
|
|
62
|
+
title_property: str = "Name",
|
|
63
|
+
key_property: str = "Repo ID",
|
|
64
|
+
synced_property: str | None = "Last synced",
|
|
65
|
+
date_properties: set[str] | None = None,
|
|
66
|
+
create_only_properties: set[str] | None = None,
|
|
67
|
+
user_defined_properties: set[str] | None = None,
|
|
68
|
+
token_provider: TokenProvider | None = None,
|
|
69
|
+
session_enrich: SessionEnrich | None = None,
|
|
70
|
+
icon_for: IconFor | None = None,
|
|
71
|
+
pacing_seconds: float = 0.3,
|
|
72
|
+
resolve_data_source: bool = True,
|
|
73
|
+
):
|
|
74
|
+
# `data_source_id` may be a data source id OR a database id/URL — with
|
|
75
|
+
# resolve_data_source on (default) the latter is resolved automatically.
|
|
76
|
+
self.data_source_id = data_source_id
|
|
77
|
+
self._resolve_ds = resolve_data_source
|
|
78
|
+
self.title_property = title_property
|
|
79
|
+
self.key_property = key_property
|
|
80
|
+
self.synced_property = synced_property
|
|
81
|
+
self.date_properties = date_properties or set()
|
|
82
|
+
# Written only on CREATE (enrichment seeds): objective fields refresh every
|
|
83
|
+
# run, but these are seeded once so human edits stick.
|
|
84
|
+
self.create_only_properties = create_only_properties or set()
|
|
85
|
+
# Property names that must be addressed as `userDefined:{name}` (Notion
|
|
86
|
+
# reserves bare `id`/`url`). Ours deliberately avoid those, but a BYO
|
|
87
|
+
# schema may need e.g. {"URL"}.
|
|
88
|
+
self.user_defined_properties = user_defined_properties or set()
|
|
89
|
+
self._token_provider = token_provider or current_access_token
|
|
90
|
+
self._session_enrich = session_enrich
|
|
91
|
+
self._icon_for = icon_for
|
|
92
|
+
self.pacing_seconds = pacing_seconds
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def configured(self) -> bool:
|
|
96
|
+
return bool(self.data_source_id)
|
|
97
|
+
|
|
98
|
+
@property
|
|
99
|
+
def config_hint(self) -> str:
|
|
100
|
+
return "NOTION_DATA_SOURCE_ID unset"
|
|
101
|
+
|
|
102
|
+
@asynccontextmanager
|
|
103
|
+
async def connect(self) -> AsyncIterator["_NotionSession"]:
|
|
104
|
+
async with mcp.open_session(self._token_provider) as session:
|
|
105
|
+
ds = self.data_source_id
|
|
106
|
+
if self._resolve_ds:
|
|
107
|
+
ds = await mcp.resolve_data_source_id(session, ds)
|
|
108
|
+
yield _NotionSession(session, self, data_source_id=ds)
|
|
109
|
+
|
|
110
|
+
# The worker auto-registers these so the token-owner workflow runs alongside
|
|
111
|
+
# the sync. (Optional hook; destinations without aux work omit it.)
|
|
112
|
+
def aux_workflows(self) -> list:
|
|
113
|
+
from durable_sync.auth.oauth.workflow import OAuthTokenWorkflow
|
|
114
|
+
return [OAuthTokenWorkflow]
|
|
115
|
+
|
|
116
|
+
def aux_activities(self) -> list:
|
|
117
|
+
from durable_sync.auth.oauth.refresh import refresh_oauth_token
|
|
118
|
+
return [refresh_oauth_token]
|
|
119
|
+
|
|
120
|
+
@staticmethod
|
|
121
|
+
def is_auth_error(err: BaseException) -> bool:
|
|
122
|
+
"""A rejected Bearer token / broken refresh chain (revoked or expired) ->
|
|
123
|
+
re-bootstrap. The default signatures (401/403, unauthorized, forbidden,
|
|
124
|
+
invalid_token/grant) cover every Notion auth failure we've seen, so we
|
|
125
|
+
delegate to the shared, word-boundary-correct matcher in the spine."""
|
|
126
|
+
return auth_error_in_chain(err)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class _NotionSession:
|
|
130
|
+
"""One open MCP connection. Implements the DestinationSession protocol."""
|
|
131
|
+
|
|
132
|
+
def __init__(self, session: NotionMCP, destination: NotionDestination, *, data_source_id: str):
|
|
133
|
+
self._mcp = session
|
|
134
|
+
self._destination = destination
|
|
135
|
+
self._ds = data_source_id # already resolved (database id -> data source id)
|
|
136
|
+
|
|
137
|
+
async def call(self, name: str, arguments: dict[str, Any]) -> str:
|
|
138
|
+
return await self._mcp.call(name, arguments)
|
|
139
|
+
|
|
140
|
+
async def query_existing_ids(self) -> dict[str, str]:
|
|
141
|
+
"""{ key-property value -> page id } for rows already in the DB.
|
|
142
|
+
|
|
143
|
+
Paginates LIMIT/OFFSET with ORDER BY the key property; unordered OFFSET
|
|
144
|
+
reshuffles under concurrent edits and skips rows -> duplicates, so the
|
|
145
|
+
ORDER BY is REQUIRED."""
|
|
146
|
+
ds = self._ds
|
|
147
|
+
key = self._destination.key_property
|
|
148
|
+
PAGE = 100
|
|
149
|
+
mapping: dict[str, str] = {}
|
|
150
|
+
offset = 0
|
|
151
|
+
while True:
|
|
152
|
+
sql = mcp.query_sql(ds, order_by=key, limit=PAGE, offset=offset)
|
|
153
|
+
raw = await self.call(
|
|
154
|
+
"notion-query-data-sources",
|
|
155
|
+
{"data": {"data_source_urls": [f"collection://{ds}"], "query": sql}},
|
|
156
|
+
)
|
|
157
|
+
rows = mcp.rows_from_result(raw)
|
|
158
|
+
for row in rows:
|
|
159
|
+
kval = str(row.get(key) or "").strip()
|
|
160
|
+
page_id = mcp.page_id_from_row(row)
|
|
161
|
+
if kval and page_id:
|
|
162
|
+
mapping[kval] = page_id
|
|
163
|
+
if len(rows) < PAGE:
|
|
164
|
+
break
|
|
165
|
+
offset += PAGE
|
|
166
|
+
return mapping
|
|
167
|
+
|
|
168
|
+
async def create(self, record: Record, synced_at: dt.datetime) -> bool:
|
|
169
|
+
record = await self._maybe_enrich(record)
|
|
170
|
+
if record is None:
|
|
171
|
+
return False # session_enrich dropped it (out of scope)
|
|
172
|
+
page: dict[str, Any] = {"properties": self._encode(record.properties, synced_at)}
|
|
173
|
+
if record.body:
|
|
174
|
+
page["content"] = record.body[:_MAX_BODY]
|
|
175
|
+
icon = self._icon(record)
|
|
176
|
+
if icon:
|
|
177
|
+
page["icon"] = icon
|
|
178
|
+
await self.call(
|
|
179
|
+
"notion-create-pages",
|
|
180
|
+
{"parent": {"data_source_id": self._ds}, "pages": [page]},
|
|
181
|
+
)
|
|
182
|
+
await self._pace()
|
|
183
|
+
return True
|
|
184
|
+
|
|
185
|
+
async def update(self, existing_id: str, record: Record, synced_at: dt.datetime) -> bool:
|
|
186
|
+
record = await self._maybe_enrich(record)
|
|
187
|
+
if record is None:
|
|
188
|
+
return False # session_enrich dropped it (out of scope)
|
|
189
|
+
# Skip create-only seeds (enrichment) so human edits to them survive;
|
|
190
|
+
# refresh the rest. Page body is written on create, not refreshed.
|
|
191
|
+
props = {
|
|
192
|
+
k: v for k, v in record.properties.items()
|
|
193
|
+
if k not in self._destination.create_only_properties
|
|
194
|
+
}
|
|
195
|
+
args: dict[str, Any] = {
|
|
196
|
+
"page_id": existing_id,
|
|
197
|
+
"command": "update_properties",
|
|
198
|
+
"properties": self._encode(props, synced_at),
|
|
199
|
+
}
|
|
200
|
+
icon = self._icon(record)
|
|
201
|
+
if icon:
|
|
202
|
+
args["icon"] = icon
|
|
203
|
+
await self.call("notion-update-page", args)
|
|
204
|
+
await self._pace()
|
|
205
|
+
return True
|
|
206
|
+
|
|
207
|
+
async def _maybe_enrich(self, record: Record) -> Record | None:
|
|
208
|
+
"""Run the destination-side enrich hook (if any). It may return None to
|
|
209
|
+
DROP the record (an out-of-scope filter)."""
|
|
210
|
+
if self._destination._session_enrich is not None:
|
|
211
|
+
return await self._destination._session_enrich(self._mcp.session, record)
|
|
212
|
+
return record
|
|
213
|
+
|
|
214
|
+
def _icon(self, record: Record) -> str | None:
|
|
215
|
+
fn = self._destination._icon_for
|
|
216
|
+
return fn(record) if fn else None
|
|
217
|
+
|
|
218
|
+
async def _pace(self) -> None:
|
|
219
|
+
# Stay under Notion's MCP rate limit (~few req/s). Backoff handles the
|
|
220
|
+
# residual; this keeps us from hitting it in the first place.
|
|
221
|
+
if self._destination.pacing_seconds > 0:
|
|
222
|
+
await asyncio.sleep(self._destination.pacing_seconds)
|
|
223
|
+
|
|
224
|
+
def _encode(self, properties: dict[str, Any], synced_at: dt.datetime) -> dict[str, Any]:
|
|
225
|
+
"""Neutral Python values -> Notion MCP wire format. bool is checked before
|
|
226
|
+
int because bool subclasses int."""
|
|
227
|
+
dest = self._destination
|
|
228
|
+
out: dict[str, Any] = {}
|
|
229
|
+
for name, val in properties.items():
|
|
230
|
+
if val is None:
|
|
231
|
+
continue
|
|
232
|
+
if name in dest.date_properties:
|
|
233
|
+
if val:
|
|
234
|
+
start, is_dt = _encode_date(val)
|
|
235
|
+
out[f"date:{name}:start"] = start
|
|
236
|
+
out[f"date:{name}:is_datetime"] = is_dt
|
|
237
|
+
elif isinstance(val, bool):
|
|
238
|
+
out[_key(name, dest)] = "__YES__" if val else "__NO__"
|
|
239
|
+
elif isinstance(val, (int, float)):
|
|
240
|
+
out[_key(name, dest)] = val
|
|
241
|
+
elif isinstance(val, (list, tuple)):
|
|
242
|
+
if val: # multi-selects are JSON arrays; options must pre-exist
|
|
243
|
+
out[_key(name, dest)] = json.dumps(list(val))
|
|
244
|
+
else:
|
|
245
|
+
# Notion rejects any rich_text/title over 2000 chars; a long
|
|
246
|
+
# value (e.g. a verbose repo description) would 400 and the record
|
|
247
|
+
# would silently fail to sync every run. Truncate as a backstop —
|
|
248
|
+
# the destination owns wire limits (per the core contract).
|
|
249
|
+
out[_key(name, dest)] = str(val)[:_MAX_TEXT]
|
|
250
|
+
# Sync heartbeat: "Last synced" is a DATE column -> stamp the UTC date.
|
|
251
|
+
if dest.synced_property:
|
|
252
|
+
out[f"date:{dest.synced_property}:start"] = synced_at.date().isoformat()
|
|
253
|
+
out[f"date:{dest.synced_property}:is_datetime"] = 0
|
|
254
|
+
return out
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
def _key(name: str, dest: NotionDestination) -> str:
|
|
258
|
+
"""Prefix props that collide with Notion's reserved id/url addressing."""
|
|
259
|
+
return f"userDefined:{name}" if name in dest.user_defined_properties else name
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _encode_date(val: Any) -> tuple[str, int]:
|
|
263
|
+
"""Return (start-string, is_datetime). A datetime, or an ISO string with a
|
|
264
|
+
'T', carries time -> is_datetime=1; a plain date -> 0."""
|
|
265
|
+
if isinstance(val, dt.datetime):
|
|
266
|
+
return val.isoformat(), 1
|
|
267
|
+
if isinstance(val, dt.date):
|
|
268
|
+
return val.isoformat(), 0
|
|
269
|
+
s = str(val)
|
|
270
|
+
return s, (1 if "T" in s else 0)
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Notion binding of the generic OAuth client (durable_sync.auth.oauth).
|
|
2
|
+
|
|
3
|
+
The OAuth 2.1 + PKCE + DCR flow is provider-agnostic and lives in auth/oauth/flow.py;
|
|
4
|
+
this module just pins Notion's hosted MCP server and re-exports the flow so the
|
|
5
|
+
Notion bootstrap/prove/destination can keep importing `oauth.*`.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from durable_sync.auth.oauth.flow import ( # noqa: F401 (re-exported for callers)
|
|
10
|
+
build_authorize_url,
|
|
11
|
+
exchange_code,
|
|
12
|
+
gen_pkce,
|
|
13
|
+
new_state,
|
|
14
|
+
refresh_access_token,
|
|
15
|
+
register_client,
|
|
16
|
+
)
|
|
17
|
+
from durable_sync.auth.oauth import flow as _generic
|
|
18
|
+
|
|
19
|
+
MCP_BASE = "https://mcp.notion.com"
|
|
20
|
+
MCP_ENDPOINT = f"{MCP_BASE}/mcp" # Streamable HTTP transport
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def discover() -> dict[str, str]:
|
|
24
|
+
"""Discover Notion's OAuth endpoints (generic flow, Notion base URL)."""
|
|
25
|
+
return _generic.discover(MCP_BASE)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""Headless proof: NO browser. Loads the saved refresh token, mints a fresh
|
|
2
|
+
access token, and uses it to actually talk to the Notion MCP server.
|
|
3
|
+
|
|
4
|
+
PYTHONPATH=. python -m durable_sync.connectors.notion.prove
|
|
5
|
+
|
|
6
|
+
The de-risking step for the whole architecture: if this works, the Temporal
|
|
7
|
+
auth workflow can do exactly the same on a timer with no human present.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
|
|
13
|
+
from mcp import ClientSession
|
|
14
|
+
from mcp.client.streamable_http import streamablehttp_client
|
|
15
|
+
|
|
16
|
+
from durable_sync.connectors.notion import oauth, store
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def _call_mcp(access_token: str) -> list[str]:
|
|
20
|
+
"""Connect to Notion MCP with the access token; return tool names. list_tools()
|
|
21
|
+
succeeding proves the token authenticated the session."""
|
|
22
|
+
headers = {"Authorization": f"Bearer {access_token}"}
|
|
23
|
+
async with streamablehttp_client(oauth.MCP_ENDPOINT, headers=headers) as (read, write, _):
|
|
24
|
+
async with ClientSession(read, write) as session:
|
|
25
|
+
await session.initialize()
|
|
26
|
+
tools = await session.list_tools()
|
|
27
|
+
return [t.name for t in tools.tools]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def main() -> None:
|
|
31
|
+
creds = store.load()
|
|
32
|
+
if not creds:
|
|
33
|
+
raise SystemExit(
|
|
34
|
+
f"No credentials at {store.path()}. Run the bootstrap first:\n"
|
|
35
|
+
f" PYTHONPATH=. python -m durable_sync.connectors.notion.bootstrap"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
print("Refreshing access token (headless, no browser)...")
|
|
39
|
+
tokens = oauth.refresh_access_token(
|
|
40
|
+
creds["token_endpoint"], creds["client_id"], creds["refresh_token"]
|
|
41
|
+
)
|
|
42
|
+
# Notion ROTATES the refresh token on every use — persist the new one now,
|
|
43
|
+
# atomically, or the next run fails with invalid_grant.
|
|
44
|
+
creds["refresh_token"] = tokens["refresh_token"]
|
|
45
|
+
store.save(creds)
|
|
46
|
+
print(f" Got access token (expires in {tokens.get('expires_in')}s). Rotated refresh token persisted.")
|
|
47
|
+
|
|
48
|
+
print("Calling Notion MCP with the minted access token...")
|
|
49
|
+
tool_names = asyncio.run(_call_mcp(tokens["access_token"]))
|
|
50
|
+
print(f"\nSUCCESS — authenticated headlessly. MCP exposed {len(tool_names)} tools:")
|
|
51
|
+
for name in tool_names:
|
|
52
|
+
print(f" - {name}")
|
|
53
|
+
print("\nHeadless auth proven — the Temporal auth workflow can run this unattended.")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
if __name__ == "__main__":
|
|
57
|
+
main()
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""NotionSource — read rows from a Notion data source -> Records.
|
|
2
|
+
|
|
3
|
+
The read half of the Notion connector; shares the MCP client + OAuth with
|
|
4
|
+
NotionDestination (see client.py), which is the whole reason connectors are
|
|
5
|
+
grouped by system. Each row becomes a Record keyed on its Notion page id — the
|
|
6
|
+
immutable, sync-safe id when Notion is the system of record.
|
|
7
|
+
|
|
8
|
+
Column values come back as the query renders them (text); for precise typing or
|
|
9
|
+
to pull page body content, use the `enrich` hook — it gets the raw row plus the
|
|
10
|
+
live MCP session for extra calls. Requires the `notion` extra.
|
|
11
|
+
"""
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import inspect
|
|
15
|
+
import logging
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from typing import Awaitable, Callable, Union
|
|
18
|
+
|
|
19
|
+
from temporalio import activity
|
|
20
|
+
|
|
21
|
+
from durable_sync.core import Record, SourceSpec
|
|
22
|
+
from durable_sync.connectors.notion import client as mcp
|
|
23
|
+
from durable_sync.connectors.notion.client import NotionMCP, TokenProvider
|
|
24
|
+
from durable_sync.connectors.notion.token import current_access_token
|
|
25
|
+
|
|
26
|
+
log = logging.getLogger("durable_sync.connectors.notion.source")
|
|
27
|
+
|
|
28
|
+
EnrichHook = Callable[[Record, "NotionRowContext"], Union[Record, Awaitable[Record]]]
|
|
29
|
+
|
|
30
|
+
_PAGE = 100
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class NotionRowContext:
|
|
35
|
+
"""Handed to the enrich hook: the raw queried row + the live MCP session, so
|
|
36
|
+
enrich can type-coerce columns or fetch page content without re-connecting."""
|
|
37
|
+
raw_row: dict
|
|
38
|
+
session: NotionMCP
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _heartbeat(detail: str) -> None:
|
|
42
|
+
if activity.in_activity():
|
|
43
|
+
activity.heartbeat(detail)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class NotionSource:
|
|
47
|
+
name = "notion"
|
|
48
|
+
|
|
49
|
+
def __init__(
|
|
50
|
+
self,
|
|
51
|
+
data_source_id: str,
|
|
52
|
+
*,
|
|
53
|
+
order_property: str | None = None,
|
|
54
|
+
interval_minutes: int = 30,
|
|
55
|
+
token_provider: TokenProvider | None = None,
|
|
56
|
+
enrich: EnrichHook | None = None,
|
|
57
|
+
resolve_data_source: bool = True,
|
|
58
|
+
decode: bool = True,
|
|
59
|
+
):
|
|
60
|
+
# `data_source_id` may be a data source id OR a database id/URL — with
|
|
61
|
+
# resolve_data_source on (default) the latter is resolved automatically.
|
|
62
|
+
self.data_source_id = data_source_id
|
|
63
|
+
# Pagination is LIMIT/OFFSET; ordering by a STABLE column keeps pages from
|
|
64
|
+
# reshuffling under concurrent edits (else a run can skip/dupe rows — self-
|
|
65
|
+
# corrects next run since the upsert is idempotent, but order if you can).
|
|
66
|
+
self.order_property = order_property
|
|
67
|
+
self.interval_minutes = interval_minutes
|
|
68
|
+
self._token_provider = token_provider or current_access_token
|
|
69
|
+
self._enrich = enrich
|
|
70
|
+
self._resolve_ds = resolve_data_source
|
|
71
|
+
self._decode = decode
|
|
72
|
+
self._resolved_ds: str | None = None
|
|
73
|
+
|
|
74
|
+
def specs(self) -> list[SourceSpec]:
|
|
75
|
+
return [SourceSpec(key=f"ds:{self.data_source_id}", interval_minutes=self.interval_minutes,
|
|
76
|
+
params={"data_source_id": self.data_source_id})]
|
|
77
|
+
|
|
78
|
+
# The Notion OAuth token workflow must run alongside ANY route that touches
|
|
79
|
+
# Notion — source or destination. The worker registers a source's aux work too
|
|
80
|
+
# (and dedupes, so a Notion->Notion route registers it once).
|
|
81
|
+
def aux_workflows(self) -> list:
|
|
82
|
+
from durable_sync.auth.oauth.workflow import OAuthTokenWorkflow
|
|
83
|
+
return [OAuthTokenWorkflow]
|
|
84
|
+
|
|
85
|
+
def aux_activities(self) -> list:
|
|
86
|
+
from durable_sync.auth.oauth.refresh import refresh_oauth_token
|
|
87
|
+
return [refresh_oauth_token]
|
|
88
|
+
|
|
89
|
+
async def fetch(self, spec: SourceSpec, only_items: list[str] | None = None) -> list[Record]:
|
|
90
|
+
ds = spec.params.get("data_source_id", self.data_source_id)
|
|
91
|
+
targeted = set(only_items or []) # page ids for a targeted refresh
|
|
92
|
+
out: list[Record] = []
|
|
93
|
+
async with mcp.open_session(self._token_provider) as session:
|
|
94
|
+
if self._resolve_ds:
|
|
95
|
+
if self._resolved_ds is None:
|
|
96
|
+
self._resolved_ds = await mcp.resolve_data_source_id(session, ds)
|
|
97
|
+
if self._resolved_ds != ds:
|
|
98
|
+
log.info("Resolved database %s -> data source %s", ds, self._resolved_ds)
|
|
99
|
+
ds = self._resolved_ds
|
|
100
|
+
offset = 0
|
|
101
|
+
while True:
|
|
102
|
+
sql = mcp.query_sql(ds, order_by=self.order_property, limit=_PAGE, offset=offset)
|
|
103
|
+
raw = await session.call(
|
|
104
|
+
"notion-query-data-sources",
|
|
105
|
+
{"data": {"data_source_urls": [f"collection://{ds}"], "query": sql}},
|
|
106
|
+
)
|
|
107
|
+
rows = mcp.rows_from_result(raw)
|
|
108
|
+
for row in rows:
|
|
109
|
+
record = self._to_record(row)
|
|
110
|
+
if record is None:
|
|
111
|
+
continue
|
|
112
|
+
if targeted and record.primary_key not in targeted:
|
|
113
|
+
continue
|
|
114
|
+
if self._enrich is not None:
|
|
115
|
+
ctx = NotionRowContext(raw_row=row, session=session)
|
|
116
|
+
result = self._enrich(record, ctx)
|
|
117
|
+
record = await result if inspect.isawaitable(result) else result
|
|
118
|
+
out.append(record)
|
|
119
|
+
_heartbeat(record.primary_key)
|
|
120
|
+
if len(rows) < _PAGE:
|
|
121
|
+
break
|
|
122
|
+
offset += _PAGE
|
|
123
|
+
log.info("Fetched %d Notion rows for %s", len(out), spec.key)
|
|
124
|
+
return out
|
|
125
|
+
|
|
126
|
+
def _to_record(self, row: dict) -> Record | None:
|
|
127
|
+
"""Map one queried Notion row to a neutral Record. Pure (no IO). Returns
|
|
128
|
+
None for a row with no resolvable page id — it can't be keyed idempotently
|
|
129
|
+
(primary_key must be the immutable page id, never a column value)."""
|
|
130
|
+
page_id = mcp.page_id_from_row(row)
|
|
131
|
+
if not page_id:
|
|
132
|
+
return None
|
|
133
|
+
columns = mcp.row_columns(row)
|
|
134
|
+
if self._decode:
|
|
135
|
+
columns = mcp.decode_row(columns)
|
|
136
|
+
return Record(primary_key=page_id, properties=columns)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Launch OAuthTokenWorkflow from the credentials bootstrap saved.
|
|
2
|
+
|
|
3
|
+
PYTHONPATH=. python -m durable_sync.connectors.notion.start
|
|
4
|
+
|
|
5
|
+
Reads the bootstrap creds, starts the single long-running auth workflow, and
|
|
6
|
+
hands ownership of the refresh token to it. After this, the worker keeps access
|
|
7
|
+
tokens fresh unattended; the local file is no longer the source of truth.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import asyncio
|
|
12
|
+
|
|
13
|
+
from durable_sync import config
|
|
14
|
+
from durable_sync.auth.oauth.workflow import AuthParams, OAuthTokenWorkflow
|
|
15
|
+
from durable_sync.connectors.notion import store
|
|
16
|
+
from durable_sync.temporal_client import connect
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def main() -> None:
|
|
20
|
+
creds = store.load()
|
|
21
|
+
if not creds:
|
|
22
|
+
raise SystemExit(
|
|
23
|
+
f"No credentials at {store.path()}. Run the bootstrap first:\n"
|
|
24
|
+
f" PYTHONPATH=. python -m durable_sync.connectors.notion.bootstrap"
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
client = await connect()
|
|
28
|
+
handle = await client.start_workflow(
|
|
29
|
+
OAuthTokenWorkflow.run,
|
|
30
|
+
AuthParams(
|
|
31
|
+
client_id=creds["client_id"],
|
|
32
|
+
token_endpoint=creds["token_endpoint"],
|
|
33
|
+
refresh_token=creds["refresh_token"],
|
|
34
|
+
),
|
|
35
|
+
id=config.NOTION_AUTH_WORKFLOW_ID,
|
|
36
|
+
task_queue=config.TASK_QUEUE,
|
|
37
|
+
)
|
|
38
|
+
print(
|
|
39
|
+
f"Started OAuthTokenWorkflow (id={handle.id}). It now owns the refresh "
|
|
40
|
+
f"token and keeps access tokens fresh.\n"
|
|
41
|
+
f"Verify: temporal workflow query --workflow-id {handle.id} --type get_access_token"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
if __name__ == "__main__":
|
|
46
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""Notion binding of the generic creds store (durable_sync.auth.store).
|
|
2
|
+
|
|
3
|
+
Pins Notion's auth file path; bootstrap/prove/start call load()/save()/path().
|
|
4
|
+
"""
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import os
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
from durable_sync.auth.oauth import store as _store
|
|
12
|
+
|
|
13
|
+
_FILE = os.getenv("DURABLE_SYNC_NOTION_AUTH_FILE", ".notion_auth.json")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def load() -> dict[str, Any] | None:
|
|
17
|
+
return _store.load(_FILE)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def save(data: dict[str, Any]) -> None:
|
|
21
|
+
_store.save(_FILE, data)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def path() -> Path:
|
|
25
|
+
return _store.resolve(_FILE)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Notion binding of the generic token accessor (durable_sync.auth.oauth.token).
|
|
2
|
+
|
|
3
|
+
The default token_provider for NotionDestination: query the OAuthTokenWorkflow
|
|
4
|
+
running under config.NOTION_AUTH_WORKFLOW_ID for a fresh access token.
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
from durable_sync import config
|
|
9
|
+
from durable_sync.auth.oauth.token import current_access_token as _current
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
async def current_access_token() -> str:
|
|
13
|
+
return await _current(config.NOTION_AUTH_WORKFLOW_ID)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""YouTube source: a channel's uploads -> Records.
|
|
2
|
+
|
|
3
|
+
YouTube exposes no per-video author, so attribution (if you need it) is an
|
|
4
|
+
app-side concern: the Record carries a "Scan Text" field and the enrich hook gets
|
|
5
|
+
a YouTubeVideoContext for inverted name-matching against your own directory of people.
|
|
6
|
+
|
|
7
|
+
Requires the `youtube` extra: pip install "durable-sync[youtube]"
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from durable_sync.connectors.youtube.source import YouTubeConfig, YouTubeSource, YouTubeVideoContext
|
|
12
|
+
|
|
13
|
+
__all__ = ["YouTubeSource", "YouTubeConfig", "YouTubeVideoContext"]
|