durable-sync 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- durable_sync-0.1.0/LICENSE +21 -0
- durable_sync-0.1.0/PKG-INFO +310 -0
- durable_sync-0.1.0/README.md +261 -0
- durable_sync-0.1.0/durable_sync/__init__.py +26 -0
- durable_sync-0.1.0/durable_sync/activities.py +156 -0
- durable_sync-0.1.0/durable_sync/auth/__init__.py +8 -0
- durable_sync-0.1.0/durable_sync/auth/oauth/__init__.py +18 -0
- durable_sync-0.1.0/durable_sync/auth/oauth/flow.py +183 -0
- durable_sync-0.1.0/durable_sync/auth/oauth/refresh.py +58 -0
- durable_sync-0.1.0/durable_sync/auth/oauth/store.py +36 -0
- durable_sync-0.1.0/durable_sync/auth/oauth/token.py +36 -0
- durable_sync-0.1.0/durable_sync/auth/oauth/workflow.py +172 -0
- durable_sync-0.1.0/durable_sync/bootstrap.py +44 -0
- durable_sync-0.1.0/durable_sync/codec.py +80 -0
- durable_sync-0.1.0/durable_sync/config.py +35 -0
- durable_sync-0.1.0/durable_sync/connectors/__init__.py +14 -0
- durable_sync-0.1.0/durable_sync/connectors/asana/__init__.py +13 -0
- durable_sync-0.1.0/durable_sync/connectors/asana/destination.py +213 -0
- durable_sync-0.1.0/durable_sync/connectors/content.py +80 -0
- durable_sync-0.1.0/durable_sync/connectors/contentful/__init__.py +25 -0
- durable_sync-0.1.0/durable_sync/connectors/contentful/api.py +285 -0
- durable_sync-0.1.0/durable_sync/connectors/contentful/bootstrap.py +102 -0
- durable_sync-0.1.0/durable_sync/connectors/contentful/describe.py +61 -0
- durable_sync-0.1.0/durable_sync/connectors/contentful/destination.py +145 -0
- durable_sync-0.1.0/durable_sync/connectors/contentful/encode.py +49 -0
- durable_sync-0.1.0/durable_sync/connectors/contentful/introspect.py +69 -0
- durable_sync-0.1.0/durable_sync/connectors/contentful/mcp.py +95 -0
- durable_sync-0.1.0/durable_sync/connectors/contentful/mcp_destination.py +137 -0
- durable_sync-0.1.0/durable_sync/connectors/contentful/oauth.py +27 -0
- durable_sync-0.1.0/durable_sync/connectors/contentful/prove.py +51 -0
- durable_sync-0.1.0/durable_sync/connectors/contentful/source.py +192 -0
- durable_sync-0.1.0/durable_sync/connectors/contentful/start.py +46 -0
- durable_sync-0.1.0/durable_sync/connectors/contentful/store.py +25 -0
- durable_sync-0.1.0/durable_sync/connectors/contentful/token.py +13 -0
- durable_sync-0.1.0/durable_sync/connectors/contentful/token_check.py +42 -0
- durable_sync-0.1.0/durable_sync/connectors/github/__init__.py +33 -0
- durable_sync-0.1.0/durable_sync/connectors/github/api.py +169 -0
- durable_sync-0.1.0/durable_sync/connectors/github/source.py +230 -0
- durable_sync-0.1.0/durable_sync/connectors/luma/__init__.py +20 -0
- durable_sync-0.1.0/durable_sync/connectors/luma/api.py +121 -0
- durable_sync-0.1.0/durable_sync/connectors/luma/destination.py +128 -0
- durable_sync-0.1.0/durable_sync/connectors/luma/source.py +155 -0
- durable_sync-0.1.0/durable_sync/connectors/multi.py +78 -0
- durable_sync-0.1.0/durable_sync/connectors/notion/__init__.py +20 -0
- durable_sync-0.1.0/durable_sync/connectors/notion/bootstrap.py +97 -0
- durable_sync-0.1.0/durable_sync/connectors/notion/client.py +133 -0
- durable_sync-0.1.0/durable_sync/connectors/notion/destination.py +270 -0
- durable_sync-0.1.0/durable_sync/connectors/notion/oauth.py +25 -0
- durable_sync-0.1.0/durable_sync/connectors/notion/prove.py +57 -0
- durable_sync-0.1.0/durable_sync/connectors/notion/source.py +136 -0
- durable_sync-0.1.0/durable_sync/connectors/notion/start.py +46 -0
- durable_sync-0.1.0/durable_sync/connectors/notion/store.py +25 -0
- durable_sync-0.1.0/durable_sync/connectors/notion/token.py +13 -0
- durable_sync-0.1.0/durable_sync/connectors/youtube/__init__.py +13 -0
- durable_sync-0.1.0/durable_sync/connectors/youtube/api.py +122 -0
- durable_sync-0.1.0/durable_sync/connectors/youtube/source.py +152 -0
- durable_sync-0.1.0/durable_sync/core.py +210 -0
- durable_sync-0.1.0/durable_sync/env.py +55 -0
- durable_sync-0.1.0/durable_sync/http.py +71 -0
- durable_sync-0.1.0/durable_sync/linkstore.py +88 -0
- durable_sync-0.1.0/durable_sync/route.py +86 -0
- durable_sync-0.1.0/durable_sync/temporal_client.py +48 -0
- durable_sync-0.1.0/durable_sync/transport/__init__.py +12 -0
- durable_sync-0.1.0/durable_sync/transport/mcp.py +77 -0
- durable_sync-0.1.0/durable_sync/worker.py +109 -0
- durable_sync-0.1.0/durable_sync/workflows/__init__.py +9 -0
- durable_sync-0.1.0/durable_sync/workflows/sync.py +208 -0
- durable_sync-0.1.0/durable_sync.egg-info/PKG-INFO +310 -0
- durable_sync-0.1.0/durable_sync.egg-info/SOURCES.txt +92 -0
- durable_sync-0.1.0/durable_sync.egg-info/dependency_links.txt +1 -0
- durable_sync-0.1.0/durable_sync.egg-info/requires.txt +33 -0
- durable_sync-0.1.0/durable_sync.egg-info/top_level.txt +1 -0
- durable_sync-0.1.0/pyproject.toml +53 -0
- durable_sync-0.1.0/setup.cfg +4 -0
- durable_sync-0.1.0/tests/test_asana_encode.py +83 -0
- durable_sync-0.1.0/tests/test_auth_errors.py +85 -0
- durable_sync-0.1.0/tests/test_contentful_destination.py +69 -0
- durable_sync-0.1.0/tests/test_contentful_idempotent.py +91 -0
- durable_sync-0.1.0/tests/test_contentful_mcp_destination.py +96 -0
- durable_sync-0.1.0/tests/test_contentful_mcp_parse.py +50 -0
- durable_sync-0.1.0/tests/test_contentful_normalize.py +93 -0
- durable_sync-0.1.0/tests/test_endpoint_validation.py +34 -0
- durable_sync-0.1.0/tests/test_fetch_page.py +66 -0
- durable_sync-0.1.0/tests/test_github_fetch_page.py +73 -0
- durable_sync-0.1.0/tests/test_linkstore.py +44 -0
- durable_sync-0.1.0/tests/test_luma_destination.py +72 -0
- durable_sync-0.1.0/tests/test_luma_fetch_page.py +69 -0
- durable_sync-0.1.0/tests/test_luma_normalize.py +51 -0
- durable_sync-0.1.0/tests/test_multi_source.py +68 -0
- durable_sync-0.1.0/tests/test_notion_parse.py +128 -0
- durable_sync-0.1.0/tests/test_oauth_refresh_errors.py +55 -0
- durable_sync-0.1.0/tests/test_route.py +63 -0
- durable_sync-0.1.0/tests/test_sync_records.py +63 -0
- durable_sync-0.1.0/tests/test_youtube_normalize.py +42 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Temporal Technologies Inc. and durable-sync contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: durable-sync
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Keep your tools in sync on autopilot — durable, idempotent data sync from any source into any destination, built on Temporal.
|
|
5
|
+
Author-email: Angie Byron <angela.byron@temporal.io>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/temporal-community/durable-sync
|
|
8
|
+
Project-URL: Repository, https://github.com/temporal-community/durable-sync
|
|
9
|
+
Project-URL: Issues, https://github.com/temporal-community/durable-sync/issues
|
|
10
|
+
Keywords: temporal,sync,etl,idempotent,workflow,oauth,integration
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
18
|
+
Classifier: Framework :: AsyncIO
|
|
19
|
+
Classifier: Topic :: Software Development :: Libraries
|
|
20
|
+
Classifier: Topic :: System :: Distributed Computing
|
|
21
|
+
Requires-Python: >=3.11
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: temporalio>=1.7
|
|
25
|
+
Provides-Extra: notion
|
|
26
|
+
Requires-Dist: mcp>=1.2; extra == "notion"
|
|
27
|
+
Requires-Dist: requests>=2.31; extra == "notion"
|
|
28
|
+
Provides-Extra: github
|
|
29
|
+
Requires-Dist: httpx>=0.27; extra == "github"
|
|
30
|
+
Provides-Extra: asana
|
|
31
|
+
Requires-Dist: httpx>=0.27; extra == "asana"
|
|
32
|
+
Provides-Extra: luma
|
|
33
|
+
Requires-Dist: httpx>=0.27; extra == "luma"
|
|
34
|
+
Provides-Extra: youtube
|
|
35
|
+
Requires-Dist: httpx>=0.27; extra == "youtube"
|
|
36
|
+
Provides-Extra: contentful
|
|
37
|
+
Requires-Dist: httpx>=0.27; extra == "contentful"
|
|
38
|
+
Provides-Extra: crypto
|
|
39
|
+
Requires-Dist: cryptography>=42; extra == "crypto"
|
|
40
|
+
Provides-Extra: dev
|
|
41
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
42
|
+
Requires-Dist: python-dotenv>=1.0; extra == "dev"
|
|
43
|
+
Provides-Extra: all
|
|
44
|
+
Requires-Dist: mcp>=1.2; extra == "all"
|
|
45
|
+
Requires-Dist: requests>=2.31; extra == "all"
|
|
46
|
+
Requires-Dist: httpx>=0.27; extra == "all"
|
|
47
|
+
Requires-Dist: cryptography>=42; extra == "all"
|
|
48
|
+
Dynamic: license-file
|
|
49
|
+
|
|
50
|
+
# durable-sync
|
|
51
|
+
|
|
52
|
+
**Keep the tools your team lives in automatically in sync — your events, videos,
|
|
53
|
+
repos, and published content flowing into the catalog or tracker you actually use —
|
|
54
|
+
without the brittle script that silently dies at 2am.**
|
|
55
|
+
|
|
56
|
+
Teams end up copy-pasting between tools, or babysitting a homegrown script that
|
|
57
|
+
breaks the moment an API hiccups or a token expires. durable-sync is a small Python
|
|
58
|
+
library for building syncs that just keep running: pull records from a **source**
|
|
59
|
+
(your YouTube channel, your Luma events, a Contentful CMS, a GitHub org) and keep
|
|
60
|
+
them continuously, accurately mirrored into a **destination** (a Notion database, an
|
|
61
|
+
Asana project). For example —
|
|
62
|
+
|
|
63
|
+
- every new **YouTube** video shows up as a row in your **Notion** content database,
|
|
64
|
+
- your **Luma** events stay mirrored into an **Asana** project,
|
|
65
|
+
- your published **Contentful** articles land in a marketing calendar.
|
|
66
|
+
|
|
67
|
+
You write a little Python to say *where to read* and *where to write*; the library
|
|
68
|
+
makes it durable. Built on [Temporal](https://temporal.io). GitHub → Notion is the
|
|
69
|
+
reference wiring.
|
|
70
|
+
|
|
71
|
+
## Why bother (vs. a quick script)
|
|
72
|
+
|
|
73
|
+
A weekend script works until it doesn't. durable-sync gives you, out of the box:
|
|
74
|
+
|
|
75
|
+
- **It just stays current.** Each sync runs on its own schedule, forever, keeping
|
|
76
|
+
itself up to date — no cron job to babysit.
|
|
77
|
+
- **No duplicates, ever.** Re-runs and retries update the existing row instead of
|
|
78
|
+
creating a second copy (every record carries a stable id).
|
|
79
|
+
- **It survives outages.** If your machine restarts or a service goes down
|
|
80
|
+
mid-sync, it resumes exactly where it left off.
|
|
81
|
+
- **It waits instead of flailing.** When a login expires or is revoked, the sync
|
|
82
|
+
pauses and tells you — rather than hammering a dead credential.
|
|
83
|
+
- **No admin required.** For tools like Notion you can authorize as *yourself* (no
|
|
84
|
+
IT-issued API key), and your login is refreshed safely in the background.
|
|
85
|
+
- **It scales.** From 10 records to hundreds of thousands, it pages through them
|
|
86
|
+
without falling over.
|
|
87
|
+
|
|
88
|
+
(Under the hood that's durable orchestration, idempotent upserts, headless OAuth,
|
|
89
|
+
and rate-limit backoff — all inherited from the library, none of it your problem.)
|
|
90
|
+
|
|
91
|
+
## The mental model: two seams
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
Source.fetch(spec) ─► [Record, …] ─► Destination upserts (idempotent, keyed on primary_key)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
- **`Record`** = `{primary_key, properties, body}`. `properties` are *neutral*
|
|
98
|
+
Python values (`str`/`int`/`bool`/`list`/`date`/`datetime`); the **destination**
|
|
99
|
+
owns all wire-encoding, so a source author never learns a destination's quirks.
|
|
100
|
+
- **`primary_key`** is the immutable idempotency key (a repo id, an event id) —
|
|
101
|
+
never a name or URL. This is the single most important field: it's what makes
|
|
102
|
+
retries safe.
|
|
103
|
+
|
|
104
|
+
Everything else — orchestration, OAuth, backoff — lives in the "spine" and is
|
|
105
|
+
shared by every connector.
|
|
106
|
+
|
|
107
|
+
## Requirements
|
|
108
|
+
|
|
109
|
+
- Python 3.11+
|
|
110
|
+
- A Temporal server. For local dev: [`temporal server
|
|
111
|
+
start-dev`](https://docs.temporal.io/cli#start-dev-server) (from the Temporal
|
|
112
|
+
CLI). For production: a self-hosted cluster or [Temporal
|
|
113
|
+
Cloud](https://temporal.io/cloud).
|
|
114
|
+
|
|
115
|
+
## Quickstart: see it run in two minutes
|
|
116
|
+
|
|
117
|
+
This runs the whole spine end-to-end with a network-free in-memory destination —
|
|
118
|
+
no tokens, no external services.
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
pip install "durable-sync[all,dev]"
|
|
122
|
+
|
|
123
|
+
# In one terminal: a local Temporal dev server
|
|
124
|
+
temporal server start-dev
|
|
125
|
+
|
|
126
|
+
# In another: the offline spine smoke (fetches fake records, upserts them twice,
|
|
127
|
+
# proves the second pass updates instead of duplicating)
|
|
128
|
+
PYTHONPATH=. python tests/smoke_spine.py
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
You should see a first pass *create* rows and a second pass *update* the same rows
|
|
132
|
+
— idempotency in action. Open the Temporal UI (http://localhost:8233) to watch the
|
|
133
|
+
workflow.
|
|
134
|
+
|
|
135
|
+
## Wire your own sync
|
|
136
|
+
|
|
137
|
+
A source and a destination are just two small classes. Here's a complete,
|
|
138
|
+
runnable sketch:
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
import asyncio
|
|
142
|
+
from contextlib import asynccontextmanager
|
|
143
|
+
|
|
144
|
+
from durable_sync.core import Record, SourceSpec
|
|
145
|
+
from durable_sync.worker import run_worker
|
|
146
|
+
from durable_sync.bootstrap import start_sources
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# 1) A SOURCE: produce neutral Records, keyed on a stable primary_key.
|
|
150
|
+
class TasksSource:
|
|
151
|
+
name = "tasks"
|
|
152
|
+
|
|
153
|
+
def specs(self):
|
|
154
|
+
# One SourceSpec per independent unit of work — each gets its own workflow.
|
|
155
|
+
return [SourceSpec(key="all", interval_minutes=15)]
|
|
156
|
+
|
|
157
|
+
async def fetch(self, spec, only_items=None):
|
|
158
|
+
rows = await my_api.list_tasks() # however you read your data
|
|
159
|
+
return [
|
|
160
|
+
Record(primary_key=str(r["id"]), # immutable id — NOT the title
|
|
161
|
+
properties={"Title": r["title"], "Done": r["completed"]})
|
|
162
|
+
for r in rows
|
|
163
|
+
]
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# 2) A DESTINATION: idempotent upsert. query_existing_ids() decides create vs update.
|
|
167
|
+
class PrinterDestination:
|
|
168
|
+
name = "printer"
|
|
169
|
+
configured = True # spine refuses to sync if False
|
|
170
|
+
config_hint = "(always configured)"
|
|
171
|
+
create_only_properties = set() # props written once, never overwritten
|
|
172
|
+
|
|
173
|
+
@asynccontextmanager
|
|
174
|
+
async def connect(self):
|
|
175
|
+
yield self # this object is also the session
|
|
176
|
+
|
|
177
|
+
async def query_existing_ids(self):
|
|
178
|
+
return {} # {primary_key: destination_id} already present
|
|
179
|
+
|
|
180
|
+
async def create(self, record, synced_at):
|
|
181
|
+
print("CREATE", record.primary_key, record.properties); return True
|
|
182
|
+
|
|
183
|
+
async def update(self, existing_id, record, synced_at):
|
|
184
|
+
print("UPDATE", existing_id, record.properties); return True
|
|
185
|
+
|
|
186
|
+
@staticmethod
|
|
187
|
+
def is_auth_error(err):
|
|
188
|
+
return False # no interactive auth to break
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
SOURCE, DESTINATION = TasksSource(), PrinterDestination()
|
|
192
|
+
|
|
193
|
+
async def main():
|
|
194
|
+
await start_sources(SOURCE) # ensure one entity workflow per spec (idempotent)
|
|
195
|
+
await run_worker(SOURCE, DESTINATION) # host the workflow + activities; runs forever
|
|
196
|
+
|
|
197
|
+
asyncio.run(main())
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
Operate the running sync from the Temporal CLI — the workflow id is
|
|
201
|
+
`durable-sync:<spec.key>`:
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
# Trigger a sync now instead of waiting for the interval:
|
|
205
|
+
temporal workflow signal --workflow-id "durable-sync:all" --name sync_now --input '[]'
|
|
206
|
+
|
|
207
|
+
# See when it last ran, its stats, and any error:
|
|
208
|
+
temporal workflow query --workflow-id "durable-sync:all" --type status
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
That's the whole contract. For the real interfaces (optional `body`, the
|
|
212
|
+
destination session split, source enrichment hooks, paginated `fetch_page`, the
|
|
213
|
+
`transform` seam), see [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
214
|
+
|
|
215
|
+
## Connectors
|
|
216
|
+
|
|
217
|
+
Reuse a built-in connector instead of writing your own. Each lives in
|
|
218
|
+
`durable_sync/connectors/<system>/`:
|
|
219
|
+
|
|
220
|
+
| System | Source | Destination | Notes |
|
|
221
|
+
|---------------|:------:|:-----------:|-------|
|
|
222
|
+
| **GitHub** | ✅ | | Orgs + named repos; per-repo enrichment hook |
|
|
223
|
+
| **YouTube** | ✅ | | A channel's uploads |
|
|
224
|
+
| **Luma** | ✅ | ✅ | Calendar events (REST); destination needs a `LinkStore` |
|
|
225
|
+
| **Contentful**| ✅ | ✅ | REST source (CDA/CMA); destination via REST CMA *or* MCP-over-OAuth for SSO-blocked spaces |
|
|
226
|
+
| **Notion** | ✅ | ✅ | MCP transport + workflow-owned OAuth (no admin token needed) |
|
|
227
|
+
| **Asana** | | ✅ | Direct REST + a self-serve personal token |
|
|
228
|
+
|
|
229
|
+
A connector is grouped by **system**, not direction, because a system is often both
|
|
230
|
+
a source and a destination and the two sides share a client + auth. Under the hood,
|
|
231
|
+
a connector composes a **transport** (MCP or REST/`http.py`) with an **auth
|
|
232
|
+
mechanism** (workflow-owned OAuth, or an inline token) — the two axes are
|
|
233
|
+
independent.
|
|
234
|
+
|
|
235
|
+
## Key concepts
|
|
236
|
+
|
|
237
|
+
- **One workflow per source unit.** `Source.specs()` returns a list of
|
|
238
|
+
`SourceSpec`s; each becomes a long-lived [entity
|
|
239
|
+
workflow](https://docs.temporal.io/encyclopedia/temporal-clients#entity-workflow)
|
|
240
|
+
that *is its own timer* (sleeps `interval_minutes`, wakes early on a `sync_now`
|
|
241
|
+
signal) and uses continue-as-new to bound history. No external scheduler.
|
|
242
|
+
- **Idempotency is keyed, never inferred.** The upsert does
|
|
243
|
+
`query_existing_ids()` → update-or-create per `primary_key`. Sync only ever
|
|
244
|
+
creates/updates rows it fetched — **it never deletes** — so hand-added data
|
|
245
|
+
survives.
|
|
246
|
+
- **OAuth as a workflow.** For services where you can't get an admin token, a
|
|
247
|
+
`OAuthTokenWorkflow` owns the rotating refresh token, serializes refreshes (no
|
|
248
|
+
rotation race), and serves fresh access tokens via query so the secret stays out
|
|
249
|
+
of history. (Pair with the opt-in AES-GCM payload codec to encrypt secrets at
|
|
250
|
+
rest in history too.)
|
|
251
|
+
- **`LinkStore` for FK-less destinations.** Some systems (Luma, Contentful over
|
|
252
|
+
MCP) can't store your `primary_key` on their own objects, so the correspondence
|
|
253
|
+
lives in an app-provided durable store. In-memory and SQLite references ship; use
|
|
254
|
+
a real datastore in production.
|
|
255
|
+
- **Scales by paging.** Large sources implement `fetch_page` so the spine fetches +
|
|
256
|
+
upserts page-by-page, keeping every payload under Temporal's limits. See the
|
|
257
|
+
Scaling section of [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
258
|
+
|
|
259
|
+
## Install
|
|
260
|
+
|
|
261
|
+
```bash
|
|
262
|
+
pip install "durable-sync[notion]" # a destination: notion / asana
|
|
263
|
+
pip install "durable-sync[github]" # a source: github / luma / youtube / contentful
|
|
264
|
+
pip install "durable-sync[crypto]" # opt-in AES-GCM payload encryption
|
|
265
|
+
pip install "durable-sync[all,dev]" # everything + test deps
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
## Configuration
|
|
269
|
+
|
|
270
|
+
All runtime config is environment variables (see `durable_sync/config.py`):
|
|
271
|
+
|
|
272
|
+
| Variable | Purpose |
|
|
273
|
+
|----------|---------|
|
|
274
|
+
| `TEMPORAL_ADDRESS` / `TEMPORAL_NAMESPACE` | Cluster to connect to (defaults to `localhost:7233` / `default`) |
|
|
275
|
+
| `TEMPORAL_API_KEY` | Set for Temporal Cloud (enables TLS) |
|
|
276
|
+
| `DURABLE_SYNC_TASK_QUEUE` | Task queue name |
|
|
277
|
+
| `DURABLE_SYNC_ENC_KEY` | base64 AES-256 key to encrypt payloads in history (`python -m durable_sync.codec` generates one) |
|
|
278
|
+
| `DURABLE_SYNC_BUILD_ID` | Opt-in Worker Versioning for safe redeploys of the long-lived workflows |
|
|
279
|
+
|
|
280
|
+
Connector-specific config (which org, which Notion database, which token env var)
|
|
281
|
+
lives in the source/destination you wire up — never in `config.py`.
|
|
282
|
+
|
|
283
|
+
## Project layout
|
|
284
|
+
|
|
285
|
+
```
|
|
286
|
+
durable_sync/
|
|
287
|
+
├── core.py Record + Source/Destination protocols (the contract)
|
|
288
|
+
├── activities.py generic fetch_source / sync_records
|
|
289
|
+
├── workflows/sync.py SourceSyncWorkflow — one durable entity workflow per source unit
|
|
290
|
+
├── worker.py run_worker(SOURCE, DESTINATION)
|
|
291
|
+
├── bootstrap.py start_sources(SOURCE) — one workflow per spec (idempotent)
|
|
292
|
+
├── codec.py opt-in AES-GCM payload codec
|
|
293
|
+
├── auth/oauth/ OAuth-as-a-workflow toolkit (token-owner workflow + flow)
|
|
294
|
+
├── transport/mcp.py generic Model Context Protocol transport (Notion + Contentful)
|
|
295
|
+
├── http.py shared httpx retry/backoff for REST connectors
|
|
296
|
+
├── linkstore.py idempotency map for FK-less destinations
|
|
297
|
+
├── route.py Route = source -> (transform, field ownership) -> destination
|
|
298
|
+
└── connectors/ one subpackage per system (github, youtube, luma, contentful, notion, asana)
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
## Contributing
|
|
302
|
+
|
|
303
|
+
[CONTRIBUTING.md](CONTRIBUTING.md) is the authoritative guide for adding a source,
|
|
304
|
+
destination, auth mechanism, or transformation — with real signatures, the testing
|
|
305
|
+
pattern, and the hard-won gotchas (workflow determinism, signal handlers, history
|
|
306
|
+
limits, scaling).
|
|
307
|
+
|
|
308
|
+
## License
|
|
309
|
+
|
|
310
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
# durable-sync
|
|
2
|
+
|
|
3
|
+
**Keep the tools your team lives in automatically in sync — your events, videos,
|
|
4
|
+
repos, and published content flowing into the catalog or tracker you actually use —
|
|
5
|
+
without the brittle script that silently dies at 2am.**
|
|
6
|
+
|
|
7
|
+
Teams end up copy-pasting between tools, or babysitting a homegrown script that
|
|
8
|
+
breaks the moment an API hiccups or a token expires. durable-sync is a small Python
|
|
9
|
+
library for building syncs that just keep running: pull records from a **source**
|
|
10
|
+
(your YouTube channel, your Luma events, a Contentful CMS, a GitHub org) and keep
|
|
11
|
+
them continuously, accurately mirrored into a **destination** (a Notion database, an
|
|
12
|
+
Asana project). For example —
|
|
13
|
+
|
|
14
|
+
- every new **YouTube** video shows up as a row in your **Notion** content database,
|
|
15
|
+
- your **Luma** events stay mirrored into an **Asana** project,
|
|
16
|
+
- your published **Contentful** articles land in a marketing calendar.
|
|
17
|
+
|
|
18
|
+
You write a little Python to say *where to read* and *where to write*; the library
|
|
19
|
+
makes it durable. Built on [Temporal](https://temporal.io). GitHub → Notion is the
|
|
20
|
+
reference wiring.
|
|
21
|
+
|
|
22
|
+
## Why bother (vs. a quick script)
|
|
23
|
+
|
|
24
|
+
A weekend script works until it doesn't. durable-sync gives you, out of the box:
|
|
25
|
+
|
|
26
|
+
- **It just stays current.** Each sync runs on its own schedule, forever, keeping
|
|
27
|
+
itself up to date — no cron job to babysit.
|
|
28
|
+
- **No duplicates, ever.** Re-runs and retries update the existing row instead of
|
|
29
|
+
creating a second copy (every record carries a stable id).
|
|
30
|
+
- **It survives outages.** If your machine restarts or a service goes down
|
|
31
|
+
mid-sync, it resumes exactly where it left off.
|
|
32
|
+
- **It waits instead of flailing.** When a login expires or is revoked, the sync
|
|
33
|
+
pauses and tells you — rather than hammering a dead credential.
|
|
34
|
+
- **No admin required.** For tools like Notion you can authorize as *yourself* (no
|
|
35
|
+
IT-issued API key), and your login is refreshed safely in the background.
|
|
36
|
+
- **It scales.** From 10 records to hundreds of thousands, it pages through them
|
|
37
|
+
without falling over.
|
|
38
|
+
|
|
39
|
+
(Under the hood that's durable orchestration, idempotent upserts, headless OAuth,
|
|
40
|
+
and rate-limit backoff — all inherited from the library, none of it your problem.)
|
|
41
|
+
|
|
42
|
+
## The mental model: two seams
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
Source.fetch(spec) ─► [Record, …] ─► Destination upserts (idempotent, keyed on primary_key)
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
- **`Record`** = `{primary_key, properties, body}`. `properties` are *neutral*
|
|
49
|
+
Python values (`str`/`int`/`bool`/`list`/`date`/`datetime`); the **destination**
|
|
50
|
+
owns all wire-encoding, so a source author never learns a destination's quirks.
|
|
51
|
+
- **`primary_key`** is the immutable idempotency key (a repo id, an event id) —
|
|
52
|
+
never a name or URL. This is the single most important field: it's what makes
|
|
53
|
+
retries safe.
|
|
54
|
+
|
|
55
|
+
Everything else — orchestration, OAuth, backoff — lives in the "spine" and is
|
|
56
|
+
shared by every connector.
|
|
57
|
+
|
|
58
|
+
## Requirements
|
|
59
|
+
|
|
60
|
+
- Python 3.11+
|
|
61
|
+
- A Temporal server. For local dev: [`temporal server
|
|
62
|
+
start-dev`](https://docs.temporal.io/cli#start-dev-server) (from the Temporal
|
|
63
|
+
CLI). For production: a self-hosted cluster or [Temporal
|
|
64
|
+
Cloud](https://temporal.io/cloud).
|
|
65
|
+
|
|
66
|
+
## Quickstart: see it run in two minutes
|
|
67
|
+
|
|
68
|
+
This runs the whole spine end-to-end with a network-free in-memory destination —
|
|
69
|
+
no tokens, no external services.
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
pip install "durable-sync[all,dev]"
|
|
73
|
+
|
|
74
|
+
# In one terminal: a local Temporal dev server
|
|
75
|
+
temporal server start-dev
|
|
76
|
+
|
|
77
|
+
# In another: the offline spine smoke (fetches fake records, upserts them twice,
|
|
78
|
+
# proves the second pass updates instead of duplicating)
|
|
79
|
+
PYTHONPATH=. python tests/smoke_spine.py
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
You should see a first pass *create* rows and a second pass *update* the same rows
|
|
83
|
+
— idempotency in action. Open the Temporal UI (http://localhost:8233) to watch the
|
|
84
|
+
workflow.
|
|
85
|
+
|
|
86
|
+
## Wire your own sync
|
|
87
|
+
|
|
88
|
+
A source and a destination are just two small classes. Here's a complete,
|
|
89
|
+
runnable sketch:
|
|
90
|
+
|
|
91
|
+
```python
|
|
92
|
+
import asyncio
|
|
93
|
+
from contextlib import asynccontextmanager
|
|
94
|
+
|
|
95
|
+
from durable_sync.core import Record, SourceSpec
|
|
96
|
+
from durable_sync.worker import run_worker
|
|
97
|
+
from durable_sync.bootstrap import start_sources
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# 1) A SOURCE: produce neutral Records, keyed on a stable primary_key.
|
|
101
|
+
class TasksSource:
|
|
102
|
+
name = "tasks"
|
|
103
|
+
|
|
104
|
+
def specs(self):
|
|
105
|
+
# One SourceSpec per independent unit of work — each gets its own workflow.
|
|
106
|
+
return [SourceSpec(key="all", interval_minutes=15)]
|
|
107
|
+
|
|
108
|
+
async def fetch(self, spec, only_items=None):
|
|
109
|
+
rows = await my_api.list_tasks() # however you read your data
|
|
110
|
+
return [
|
|
111
|
+
Record(primary_key=str(r["id"]), # immutable id — NOT the title
|
|
112
|
+
properties={"Title": r["title"], "Done": r["completed"]})
|
|
113
|
+
for r in rows
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
# 2) A DESTINATION: idempotent upsert. query_existing_ids() decides create vs update.
|
|
118
|
+
class PrinterDestination:
|
|
119
|
+
name = "printer"
|
|
120
|
+
configured = True # spine refuses to sync if False
|
|
121
|
+
config_hint = "(always configured)"
|
|
122
|
+
create_only_properties = set() # props written once, never overwritten
|
|
123
|
+
|
|
124
|
+
@asynccontextmanager
|
|
125
|
+
async def connect(self):
|
|
126
|
+
yield self # this object is also the session
|
|
127
|
+
|
|
128
|
+
async def query_existing_ids(self):
|
|
129
|
+
return {} # {primary_key: destination_id} already present
|
|
130
|
+
|
|
131
|
+
async def create(self, record, synced_at):
|
|
132
|
+
print("CREATE", record.primary_key, record.properties); return True
|
|
133
|
+
|
|
134
|
+
async def update(self, existing_id, record, synced_at):
|
|
135
|
+
print("UPDATE", existing_id, record.properties); return True
|
|
136
|
+
|
|
137
|
+
@staticmethod
|
|
138
|
+
def is_auth_error(err):
|
|
139
|
+
return False # no interactive auth to break
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
SOURCE, DESTINATION = TasksSource(), PrinterDestination()
|
|
143
|
+
|
|
144
|
+
async def main():
|
|
145
|
+
await start_sources(SOURCE) # ensure one entity workflow per spec (idempotent)
|
|
146
|
+
await run_worker(SOURCE, DESTINATION) # host the workflow + activities; runs forever
|
|
147
|
+
|
|
148
|
+
asyncio.run(main())
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Operate the running sync from the Temporal CLI — the workflow id is
|
|
152
|
+
`durable-sync:<spec.key>`:
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
# Trigger a sync now instead of waiting for the interval:
|
|
156
|
+
temporal workflow signal --workflow-id "durable-sync:all" --name sync_now --input '[]'
|
|
157
|
+
|
|
158
|
+
# See when it last ran, its stats, and any error:
|
|
159
|
+
temporal workflow query --workflow-id "durable-sync:all" --type status
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
That's the whole contract. For the real interfaces (optional `body`, the
|
|
163
|
+
destination session split, source enrichment hooks, paginated `fetch_page`, the
|
|
164
|
+
`transform` seam), see [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
165
|
+
|
|
166
|
+
## Connectors
|
|
167
|
+
|
|
168
|
+
Reuse a built-in connector instead of writing your own. Each lives in
|
|
169
|
+
`durable_sync/connectors/<system>/`:
|
|
170
|
+
|
|
171
|
+
| System | Source | Destination | Notes |
|
|
172
|
+
|---------------|:------:|:-----------:|-------|
|
|
173
|
+
| **GitHub** | ✅ | | Orgs + named repos; per-repo enrichment hook |
|
|
174
|
+
| **YouTube** | ✅ | | A channel's uploads |
|
|
175
|
+
| **Luma** | ✅ | ✅ | Calendar events (REST); destination needs a `LinkStore` |
|
|
176
|
+
| **Contentful**| ✅ | ✅ | REST source (CDA/CMA); destination via REST CMA *or* MCP-over-OAuth for SSO-blocked spaces |
|
|
177
|
+
| **Notion** | ✅ | ✅ | MCP transport + workflow-owned OAuth (no admin token needed) |
|
|
178
|
+
| **Asana** | | ✅ | Direct REST + a self-serve personal token |
|
|
179
|
+
|
|
180
|
+
A connector is grouped by **system**, not direction, because a system is often both
|
|
181
|
+
a source and a destination and the two sides share a client + auth. Under the hood,
|
|
182
|
+
a connector composes a **transport** (MCP or REST/`http.py`) with an **auth
|
|
183
|
+
mechanism** (workflow-owned OAuth, or an inline token) — the two axes are
|
|
184
|
+
independent.
|
|
185
|
+
|
|
186
|
+
## Key concepts
|
|
187
|
+
|
|
188
|
+
- **One workflow per source unit.** `Source.specs()` returns a list of
|
|
189
|
+
`SourceSpec`s; each becomes a long-lived [entity
|
|
190
|
+
workflow](https://docs.temporal.io/encyclopedia/temporal-clients#entity-workflow)
|
|
191
|
+
that *is its own timer* (sleeps `interval_minutes`, wakes early on a `sync_now`
|
|
192
|
+
signal) and uses continue-as-new to bound history. No external scheduler.
|
|
193
|
+
- **Idempotency is keyed, never inferred.** The upsert does
|
|
194
|
+
`query_existing_ids()` → update-or-create per `primary_key`. Sync only ever
|
|
195
|
+
creates/updates rows it fetched — **it never deletes** — so hand-added data
|
|
196
|
+
survives.
|
|
197
|
+
- **OAuth as a workflow.** For services where you can't get an admin token, a
|
|
198
|
+
`OAuthTokenWorkflow` owns the rotating refresh token, serializes refreshes (no
|
|
199
|
+
rotation race), and serves fresh access tokens via query so the secret stays out
|
|
200
|
+
of history. (Pair with the opt-in AES-GCM payload codec to encrypt secrets at
|
|
201
|
+
rest in history too.)
|
|
202
|
+
- **`LinkStore` for FK-less destinations.** Some systems (Luma, Contentful over
|
|
203
|
+
MCP) can't store your `primary_key` on their own objects, so the correspondence
|
|
204
|
+
lives in an app-provided durable store. In-memory and SQLite references ship; use
|
|
205
|
+
a real datastore in production.
|
|
206
|
+
- **Scales by paging.** Large sources implement `fetch_page` so the spine fetches +
|
|
207
|
+
upserts page-by-page, keeping every payload under Temporal's limits. See the
|
|
208
|
+
Scaling section of [CONTRIBUTING.md](CONTRIBUTING.md).
|
|
209
|
+
|
|
210
|
+
## Install
|
|
211
|
+
|
|
212
|
+
```bash
|
|
213
|
+
pip install "durable-sync[notion]" # a destination: notion / asana
|
|
214
|
+
pip install "durable-sync[github]" # a source: github / luma / youtube / contentful
|
|
215
|
+
pip install "durable-sync[crypto]" # opt-in AES-GCM payload encryption
|
|
216
|
+
pip install "durable-sync[all,dev]" # everything + test deps
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## Configuration
|
|
220
|
+
|
|
221
|
+
All runtime config is environment variables (see `durable_sync/config.py`):
|
|
222
|
+
|
|
223
|
+
| Variable | Purpose |
|
|
224
|
+
|----------|---------|
|
|
225
|
+
| `TEMPORAL_ADDRESS` / `TEMPORAL_NAMESPACE` | Cluster to connect to (defaults to `localhost:7233` / `default`) |
|
|
226
|
+
| `TEMPORAL_API_KEY` | Set for Temporal Cloud (enables TLS) |
|
|
227
|
+
| `DURABLE_SYNC_TASK_QUEUE` | Task queue name |
|
|
228
|
+
| `DURABLE_SYNC_ENC_KEY` | base64 AES-256 key to encrypt payloads in history (`python -m durable_sync.codec` generates one) |
|
|
229
|
+
| `DURABLE_SYNC_BUILD_ID` | Opt-in Worker Versioning for safe redeploys of the long-lived workflows |
|
|
230
|
+
|
|
231
|
+
Connector-specific config (which org, which Notion database, which token env var)
|
|
232
|
+
lives in the source/destination you wire up — never in `config.py`.
|
|
233
|
+
|
|
234
|
+
## Project layout
|
|
235
|
+
|
|
236
|
+
```
|
|
237
|
+
durable_sync/
|
|
238
|
+
├── core.py Record + Source/Destination protocols (the contract)
|
|
239
|
+
├── activities.py generic fetch_source / sync_records
|
|
240
|
+
├── workflows/sync.py SourceSyncWorkflow — one durable entity workflow per source unit
|
|
241
|
+
├── worker.py run_worker(SOURCE, DESTINATION)
|
|
242
|
+
├── bootstrap.py start_sources(SOURCE) — one workflow per spec (idempotent)
|
|
243
|
+
├── codec.py opt-in AES-GCM payload codec
|
|
244
|
+
├── auth/oauth/ OAuth-as-a-workflow toolkit (token-owner workflow + flow)
|
|
245
|
+
├── transport/mcp.py generic Model Context Protocol transport (Notion + Contentful)
|
|
246
|
+
├── http.py shared httpx retry/backoff for REST connectors
|
|
247
|
+
├── linkstore.py idempotency map for FK-less destinations
|
|
248
|
+
├── route.py Route = source -> (transform, field ownership) -> destination
|
|
249
|
+
└── connectors/ one subpackage per system (github, youtube, luma, contentful, notion, asana)
|
|
250
|
+
```
|
|
251
|
+
|
|
252
|
+
## Contributing
|
|
253
|
+
|
|
254
|
+
[CONTRIBUTING.md](CONTRIBUTING.md) is the authoritative guide for adding a source,
|
|
255
|
+
destination, auth mechanism, or transformation — with real signatures, the testing
|
|
256
|
+
pattern, and the hard-won gotchas (workflow determinism, signal handlers, history
|
|
257
|
+
limits, scaling).
|
|
258
|
+
|
|
259
|
+
## License
|
|
260
|
+
|
|
261
|
+
MIT — see [LICENSE](LICENSE).
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""durable-sync: durable, idempotent source -> destination sync on Temporal.
|
|
2
|
+
|
|
3
|
+
Public API — implement `Source` for your data, `Destination` for your target;
|
|
4
|
+
the spine (entity workflow, idempotent upsert, OAuth refresh, backoff) is
|
|
5
|
+
inherited. See `connectors/` (one subpackage per system — GitHub/Luma/YouTube/
|
|
6
|
+
Contentful sources, Notion/Asana destinations) for reference implementations.
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from durable_sync.core import (
|
|
11
|
+
Destination,
|
|
12
|
+
DestinationSession,
|
|
13
|
+
Record,
|
|
14
|
+
Source,
|
|
15
|
+
SourceSpec,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"Record",
|
|
20
|
+
"SourceSpec",
|
|
21
|
+
"Source",
|
|
22
|
+
"Destination",
|
|
23
|
+
"DestinationSession",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
__version__ = "0.1.0"
|