durable-sync 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. durable_sync-0.1.0/LICENSE +21 -0
  2. durable_sync-0.1.0/PKG-INFO +310 -0
  3. durable_sync-0.1.0/README.md +261 -0
  4. durable_sync-0.1.0/durable_sync/__init__.py +26 -0
  5. durable_sync-0.1.0/durable_sync/activities.py +156 -0
  6. durable_sync-0.1.0/durable_sync/auth/__init__.py +8 -0
  7. durable_sync-0.1.0/durable_sync/auth/oauth/__init__.py +18 -0
  8. durable_sync-0.1.0/durable_sync/auth/oauth/flow.py +183 -0
  9. durable_sync-0.1.0/durable_sync/auth/oauth/refresh.py +58 -0
  10. durable_sync-0.1.0/durable_sync/auth/oauth/store.py +36 -0
  11. durable_sync-0.1.0/durable_sync/auth/oauth/token.py +36 -0
  12. durable_sync-0.1.0/durable_sync/auth/oauth/workflow.py +172 -0
  13. durable_sync-0.1.0/durable_sync/bootstrap.py +44 -0
  14. durable_sync-0.1.0/durable_sync/codec.py +80 -0
  15. durable_sync-0.1.0/durable_sync/config.py +35 -0
  16. durable_sync-0.1.0/durable_sync/connectors/__init__.py +14 -0
  17. durable_sync-0.1.0/durable_sync/connectors/asana/__init__.py +13 -0
  18. durable_sync-0.1.0/durable_sync/connectors/asana/destination.py +213 -0
  19. durable_sync-0.1.0/durable_sync/connectors/content.py +80 -0
  20. durable_sync-0.1.0/durable_sync/connectors/contentful/__init__.py +25 -0
  21. durable_sync-0.1.0/durable_sync/connectors/contentful/api.py +285 -0
  22. durable_sync-0.1.0/durable_sync/connectors/contentful/bootstrap.py +102 -0
  23. durable_sync-0.1.0/durable_sync/connectors/contentful/describe.py +61 -0
  24. durable_sync-0.1.0/durable_sync/connectors/contentful/destination.py +145 -0
  25. durable_sync-0.1.0/durable_sync/connectors/contentful/encode.py +49 -0
  26. durable_sync-0.1.0/durable_sync/connectors/contentful/introspect.py +69 -0
  27. durable_sync-0.1.0/durable_sync/connectors/contentful/mcp.py +95 -0
  28. durable_sync-0.1.0/durable_sync/connectors/contentful/mcp_destination.py +137 -0
  29. durable_sync-0.1.0/durable_sync/connectors/contentful/oauth.py +27 -0
  30. durable_sync-0.1.0/durable_sync/connectors/contentful/prove.py +51 -0
  31. durable_sync-0.1.0/durable_sync/connectors/contentful/source.py +192 -0
  32. durable_sync-0.1.0/durable_sync/connectors/contentful/start.py +46 -0
  33. durable_sync-0.1.0/durable_sync/connectors/contentful/store.py +25 -0
  34. durable_sync-0.1.0/durable_sync/connectors/contentful/token.py +13 -0
  35. durable_sync-0.1.0/durable_sync/connectors/contentful/token_check.py +42 -0
  36. durable_sync-0.1.0/durable_sync/connectors/github/__init__.py +33 -0
  37. durable_sync-0.1.0/durable_sync/connectors/github/api.py +169 -0
  38. durable_sync-0.1.0/durable_sync/connectors/github/source.py +230 -0
  39. durable_sync-0.1.0/durable_sync/connectors/luma/__init__.py +20 -0
  40. durable_sync-0.1.0/durable_sync/connectors/luma/api.py +121 -0
  41. durable_sync-0.1.0/durable_sync/connectors/luma/destination.py +128 -0
  42. durable_sync-0.1.0/durable_sync/connectors/luma/source.py +155 -0
  43. durable_sync-0.1.0/durable_sync/connectors/multi.py +78 -0
  44. durable_sync-0.1.0/durable_sync/connectors/notion/__init__.py +20 -0
  45. durable_sync-0.1.0/durable_sync/connectors/notion/bootstrap.py +97 -0
  46. durable_sync-0.1.0/durable_sync/connectors/notion/client.py +133 -0
  47. durable_sync-0.1.0/durable_sync/connectors/notion/destination.py +270 -0
  48. durable_sync-0.1.0/durable_sync/connectors/notion/oauth.py +25 -0
  49. durable_sync-0.1.0/durable_sync/connectors/notion/prove.py +57 -0
  50. durable_sync-0.1.0/durable_sync/connectors/notion/source.py +136 -0
  51. durable_sync-0.1.0/durable_sync/connectors/notion/start.py +46 -0
  52. durable_sync-0.1.0/durable_sync/connectors/notion/store.py +25 -0
  53. durable_sync-0.1.0/durable_sync/connectors/notion/token.py +13 -0
  54. durable_sync-0.1.0/durable_sync/connectors/youtube/__init__.py +13 -0
  55. durable_sync-0.1.0/durable_sync/connectors/youtube/api.py +122 -0
  56. durable_sync-0.1.0/durable_sync/connectors/youtube/source.py +152 -0
  57. durable_sync-0.1.0/durable_sync/core.py +210 -0
  58. durable_sync-0.1.0/durable_sync/env.py +55 -0
  59. durable_sync-0.1.0/durable_sync/http.py +71 -0
  60. durable_sync-0.1.0/durable_sync/linkstore.py +88 -0
  61. durable_sync-0.1.0/durable_sync/route.py +86 -0
  62. durable_sync-0.1.0/durable_sync/temporal_client.py +48 -0
  63. durable_sync-0.1.0/durable_sync/transport/__init__.py +12 -0
  64. durable_sync-0.1.0/durable_sync/transport/mcp.py +77 -0
  65. durable_sync-0.1.0/durable_sync/worker.py +109 -0
  66. durable_sync-0.1.0/durable_sync/workflows/__init__.py +9 -0
  67. durable_sync-0.1.0/durable_sync/workflows/sync.py +208 -0
  68. durable_sync-0.1.0/durable_sync.egg-info/PKG-INFO +310 -0
  69. durable_sync-0.1.0/durable_sync.egg-info/SOURCES.txt +92 -0
  70. durable_sync-0.1.0/durable_sync.egg-info/dependency_links.txt +1 -0
  71. durable_sync-0.1.0/durable_sync.egg-info/requires.txt +33 -0
  72. durable_sync-0.1.0/durable_sync.egg-info/top_level.txt +1 -0
  73. durable_sync-0.1.0/pyproject.toml +53 -0
  74. durable_sync-0.1.0/setup.cfg +4 -0
  75. durable_sync-0.1.0/tests/test_asana_encode.py +83 -0
  76. durable_sync-0.1.0/tests/test_auth_errors.py +85 -0
  77. durable_sync-0.1.0/tests/test_contentful_destination.py +69 -0
  78. durable_sync-0.1.0/tests/test_contentful_idempotent.py +91 -0
  79. durable_sync-0.1.0/tests/test_contentful_mcp_destination.py +96 -0
  80. durable_sync-0.1.0/tests/test_contentful_mcp_parse.py +50 -0
  81. durable_sync-0.1.0/tests/test_contentful_normalize.py +93 -0
  82. durable_sync-0.1.0/tests/test_endpoint_validation.py +34 -0
  83. durable_sync-0.1.0/tests/test_fetch_page.py +66 -0
  84. durable_sync-0.1.0/tests/test_github_fetch_page.py +73 -0
  85. durable_sync-0.1.0/tests/test_linkstore.py +44 -0
  86. durable_sync-0.1.0/tests/test_luma_destination.py +72 -0
  87. durable_sync-0.1.0/tests/test_luma_fetch_page.py +69 -0
  88. durable_sync-0.1.0/tests/test_luma_normalize.py +51 -0
  89. durable_sync-0.1.0/tests/test_multi_source.py +68 -0
  90. durable_sync-0.1.0/tests/test_notion_parse.py +128 -0
  91. durable_sync-0.1.0/tests/test_oauth_refresh_errors.py +55 -0
  92. durable_sync-0.1.0/tests/test_route.py +63 -0
  93. durable_sync-0.1.0/tests/test_sync_records.py +63 -0
  94. durable_sync-0.1.0/tests/test_youtube_normalize.py +42 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Temporal Technologies Inc. and durable-sync contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,310 @@
1
+ Metadata-Version: 2.4
2
+ Name: durable-sync
3
+ Version: 0.1.0
4
+ Summary: Keep your tools in sync on autopilot — durable, idempotent data sync from any source into any destination, built on Temporal.
5
+ Author-email: Angie Byron <angela.byron@temporal.io>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/temporal-community/durable-sync
8
+ Project-URL: Repository, https://github.com/temporal-community/durable-sync
9
+ Project-URL: Issues, https://github.com/temporal-community/durable-sync/issues
10
+ Keywords: temporal,sync,etl,idempotent,workflow,oauth,integration
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Programming Language :: Python :: 3.13
17
+ Classifier: Programming Language :: Python :: 3.14
18
+ Classifier: Framework :: AsyncIO
19
+ Classifier: Topic :: Software Development :: Libraries
20
+ Classifier: Topic :: System :: Distributed Computing
21
+ Requires-Python: >=3.11
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: temporalio>=1.7
25
+ Provides-Extra: notion
26
+ Requires-Dist: mcp>=1.2; extra == "notion"
27
+ Requires-Dist: requests>=2.31; extra == "notion"
28
+ Provides-Extra: github
29
+ Requires-Dist: httpx>=0.27; extra == "github"
30
+ Provides-Extra: asana
31
+ Requires-Dist: httpx>=0.27; extra == "asana"
32
+ Provides-Extra: luma
33
+ Requires-Dist: httpx>=0.27; extra == "luma"
34
+ Provides-Extra: youtube
35
+ Requires-Dist: httpx>=0.27; extra == "youtube"
36
+ Provides-Extra: contentful
37
+ Requires-Dist: httpx>=0.27; extra == "contentful"
38
+ Provides-Extra: crypto
39
+ Requires-Dist: cryptography>=42; extra == "crypto"
40
+ Provides-Extra: dev
41
+ Requires-Dist: pytest>=8.0; extra == "dev"
42
+ Requires-Dist: python-dotenv>=1.0; extra == "dev"
43
+ Provides-Extra: all
44
+ Requires-Dist: mcp>=1.2; extra == "all"
45
+ Requires-Dist: requests>=2.31; extra == "all"
46
+ Requires-Dist: httpx>=0.27; extra == "all"
47
+ Requires-Dist: cryptography>=42; extra == "all"
48
+ Dynamic: license-file
49
+
50
+ # durable-sync
51
+
52
+ **Keep the tools your team lives in automatically in sync — your events, videos,
53
+ repos, and published content flowing into the catalog or tracker you actually use —
54
+ without the brittle script that silently dies at 2am.**
55
+
56
+ Teams end up copy-pasting between tools, or babysitting a homegrown script that
57
+ breaks the moment an API hiccups or a token expires. durable-sync is a small Python
58
+ library for building syncs that just keep running: pull records from a **source**
59
+ (your YouTube channel, your Luma events, a Contentful CMS, a GitHub org) and keep
60
+ them continuously, accurately mirrored into a **destination** (a Notion database, an
61
+ Asana project). For example —
62
+
63
+ - every new **YouTube** video shows up as a row in your **Notion** content database,
64
+ - your **Luma** events stay mirrored into an **Asana** project,
65
+ - your published **Contentful** articles land in a marketing calendar.
66
+
67
+ You write a little Python to say *where to read* and *where to write*; the library
68
+ makes it durable. Built on [Temporal](https://temporal.io). GitHub → Notion is the
69
+ reference wiring.
70
+
71
+ ## Why bother (vs. a quick script)
72
+
73
+ A weekend script works until it doesn't. durable-sync gives you, out of the box:
74
+
75
+ - **It just stays current.** Each sync runs on its own schedule, forever, keeping
76
+ itself up to date — no cron job to babysit.
77
+ - **No duplicates, ever.** Re-runs and retries update the existing row instead of
78
+ creating a second copy (every record carries a stable id).
79
+ - **It survives outages.** If your machine restarts or a service goes down
80
+ mid-sync, it resumes exactly where it left off.
81
+ - **It waits instead of flailing.** When a login expires or is revoked, the sync
82
+ pauses and tells you — rather than hammering a dead credential.
83
+ - **No admin required.** For tools like Notion you can authorize as *yourself* (no
84
+ IT-issued API key), and your login is refreshed safely in the background.
85
+ - **It scales.** From 10 records to hundreds of thousands, it pages through them
86
+ without falling over.
87
+
88
+ (Under the hood that's durable orchestration, idempotent upserts, headless OAuth,
89
+ and rate-limit backoff — all inherited from the library, none of it your problem.)
90
+
91
+ ## The mental model: two seams
92
+
93
+ ```
94
+ Source.fetch(spec) ─► [Record, …] ─► Destination upserts (idempotent, keyed on primary_key)
95
+ ```
96
+
97
+ - **`Record`** = `{primary_key, properties, body}`. `properties` are *neutral*
98
+ Python values (`str`/`int`/`bool`/`list`/`date`/`datetime`); the **destination**
99
+ owns all wire-encoding, so a source author never learns a destination's quirks.
100
+ - **`primary_key`** is the immutable idempotency key (a repo id, an event id) —
101
+ never a name or URL. This is the single most important field: it's what makes
102
+ retries safe.
103
+
104
+ Everything else — orchestration, OAuth, backoff — lives in the "spine" and is
105
+ shared by every connector.
106
+
107
+ ## Requirements
108
+
109
+ - Python 3.11+
110
+ - A Temporal server. For local dev: [`temporal server
111
+ start-dev`](https://docs.temporal.io/cli#start-dev-server) (from the Temporal
112
+ CLI). For production: a self-hosted cluster or [Temporal
113
+ Cloud](https://temporal.io/cloud).
114
+
115
+ ## Quickstart: see it run in two minutes
116
+
117
+ This runs the whole spine end-to-end with a network-free in-memory destination —
118
+ no tokens, no external services.
119
+
120
+ ```bash
121
+ pip install "durable-sync[all,dev]"
122
+
123
+ # In one terminal: a local Temporal dev server
124
+ temporal server start-dev
125
+
126
+ # In another: the offline spine smoke (fetches fake records, upserts them twice,
127
+ # proves the second pass updates instead of duplicating)
128
+ PYTHONPATH=. python tests/smoke_spine.py
129
+ ```
130
+
131
+ You should see a first pass *create* rows and a second pass *update* the same rows
132
+ — idempotency in action. Open the Temporal UI (http://localhost:8233) to watch the
133
+ workflow.
134
+
135
+ ## Wire your own sync
136
+
137
+ A source and a destination are just two small classes. Here's a complete,
138
+ runnable sketch:
139
+
140
+ ```python
141
+ import asyncio
142
+ from contextlib import asynccontextmanager
143
+
144
+ from durable_sync.core import Record, SourceSpec
145
+ from durable_sync.worker import run_worker
146
+ from durable_sync.bootstrap import start_sources
147
+
148
+
149
+ # 1) A SOURCE: produce neutral Records, keyed on a stable primary_key.
150
+ class TasksSource:
151
+ name = "tasks"
152
+
153
+ def specs(self):
154
+ # One SourceSpec per independent unit of work — each gets its own workflow.
155
+ return [SourceSpec(key="all", interval_minutes=15)]
156
+
157
+ async def fetch(self, spec, only_items=None):
158
+ rows = await my_api.list_tasks() # however you read your data
159
+ return [
160
+ Record(primary_key=str(r["id"]), # immutable id — NOT the title
161
+ properties={"Title": r["title"], "Done": r["completed"]})
162
+ for r in rows
163
+ ]
164
+
165
+
166
+ # 2) A DESTINATION: idempotent upsert. query_existing_ids() decides create vs update.
167
+ class PrinterDestination:
168
+ name = "printer"
169
+ configured = True # spine refuses to sync if False
170
+ config_hint = "(always configured)"
171
+ create_only_properties = set() # props written once, never overwritten
172
+
173
+ @asynccontextmanager
174
+ async def connect(self):
175
+ yield self # this object is also the session
176
+
177
+ async def query_existing_ids(self):
178
+ return {} # {primary_key: destination_id} already present
179
+
180
+ async def create(self, record, synced_at):
181
+ print("CREATE", record.primary_key, record.properties); return True
182
+
183
+ async def update(self, existing_id, record, synced_at):
184
+ print("UPDATE", existing_id, record.properties); return True
185
+
186
+ @staticmethod
187
+ def is_auth_error(err):
188
+ return False # no interactive auth to break
189
+
190
+
191
+ SOURCE, DESTINATION = TasksSource(), PrinterDestination()
192
+
193
+ async def main():
194
+ await start_sources(SOURCE) # ensure one entity workflow per spec (idempotent)
195
+ await run_worker(SOURCE, DESTINATION) # host the workflow + activities; runs forever
196
+
197
+ asyncio.run(main())
198
+ ```
199
+
200
+ Operate the running sync from the Temporal CLI — the workflow id is
201
+ `durable-sync:<spec.key>`:
202
+
203
+ ```bash
204
+ # Trigger a sync now instead of waiting for the interval:
205
+ temporal workflow signal --workflow-id "durable-sync:all" --name sync_now --input '[]'
206
+
207
+ # See when it last ran, its stats, and any error:
208
+ temporal workflow query --workflow-id "durable-sync:all" --type status
209
+ ```
210
+
211
+ That's the whole contract. For the real interfaces (optional `body`, the
212
+ destination session split, source enrichment hooks, paginated `fetch_page`, the
213
+ `transform` seam), see [CONTRIBUTING.md](CONTRIBUTING.md).
214
+
215
+ ## Connectors
216
+
217
+ Reuse a built-in connector instead of writing your own. Each lives in
218
+ `durable_sync/connectors/<system>/`:
219
+
220
+ | System | Source | Destination | Notes |
221
+ |---------------|:------:|:-----------:|-------|
222
+ | **GitHub** | ✅ | | Orgs + named repos; per-repo enrichment hook |
223
+ | **YouTube** | ✅ | | A channel's uploads |
224
+ | **Luma** | ✅ | ✅ | Calendar events (REST); destination needs a `LinkStore` |
225
+ | **Contentful**| ✅ | ✅ | REST source (CDA/CMA); destination via REST CMA *or* MCP-over-OAuth for SSO-blocked spaces |
226
+ | **Notion** | ✅ | ✅ | MCP transport + workflow-owned OAuth (no admin token needed) |
227
+ | **Asana** | | ✅ | Direct REST + a self-serve personal token |
228
+
229
+ A connector is grouped by **system**, not direction, because a system is often both
230
+ a source and a destination and the two sides share a client + auth. Under the hood,
231
+ a connector composes a **transport** (MCP or REST/`http.py`) with an **auth
232
+ mechanism** (workflow-owned OAuth, or an inline token) — the two axes are
233
+ independent.
234
+
235
+ ## Key concepts
236
+
237
+ - **One workflow per source unit.** `Source.specs()` returns a list of
238
+ `SourceSpec`s; each becomes a long-lived [entity
239
+ workflow](https://docs.temporal.io/encyclopedia/temporal-clients#entity-workflow)
240
+ that *is its own timer* (sleeps `interval_minutes`, wakes early on a `sync_now`
241
+ signal) and uses continue-as-new to bound history. No external scheduler.
242
+ - **Idempotency is keyed, never inferred.** The upsert does
243
+ `query_existing_ids()` → update-or-create per `primary_key`. Sync only ever
244
+ creates/updates rows it fetched — **it never deletes** — so hand-added data
245
+ survives.
246
+ - **OAuth as a workflow.** For services where you can't get an admin token, a
247
+ `OAuthTokenWorkflow` owns the rotating refresh token, serializes refreshes (no
248
+ rotation race), and serves fresh access tokens via query so the secret stays out
249
+ of history. (Pair with the opt-in AES-GCM payload codec to encrypt secrets at
250
+ rest in history too.)
251
+ - **`LinkStore` for FK-less destinations.** Some systems (Luma, Contentful over
252
+ MCP) can't store your `primary_key` on their own objects, so the correspondence
253
+ lives in an app-provided durable store. In-memory and SQLite references ship; use
254
+ a real datastore in production.
255
+ - **Scales by paging.** Large sources implement `fetch_page` so the spine fetches +
256
+ upserts page-by-page, keeping every payload under Temporal's limits. See the
257
+ Scaling section of [CONTRIBUTING.md](CONTRIBUTING.md).
258
+
259
+ ## Install
260
+
261
+ ```bash
262
+ pip install "durable-sync[notion]" # a destination: notion / asana
263
+ pip install "durable-sync[github]" # a source: github / luma / youtube / contentful
264
+ pip install "durable-sync[crypto]" # opt-in AES-GCM payload encryption
265
+ pip install "durable-sync[all,dev]" # everything + test deps
266
+ ```
267
+
268
+ ## Configuration
269
+
270
+ All runtime config is environment variables (see `durable_sync/config.py`):
271
+
272
+ | Variable | Purpose |
273
+ |----------|---------|
274
+ | `TEMPORAL_ADDRESS` / `TEMPORAL_NAMESPACE` | Cluster to connect to (defaults to `localhost:7233` / `default`) |
275
+ | `TEMPORAL_API_KEY` | Set for Temporal Cloud (enables TLS) |
276
+ | `DURABLE_SYNC_TASK_QUEUE` | Task queue name |
277
+ | `DURABLE_SYNC_ENC_KEY` | base64 AES-256 key to encrypt payloads in history (`python -m durable_sync.codec` generates one) |
278
+ | `DURABLE_SYNC_BUILD_ID` | Opt-in Worker Versioning for safe redeploys of the long-lived workflows |
279
+
280
+ Connector-specific config (which org, which Notion database, which token env var)
281
+ lives in the source/destination you wire up — never in `config.py`.
282
+
283
+ ## Project layout
284
+
285
+ ```
286
+ durable_sync/
287
+ ├── core.py Record + Source/Destination protocols (the contract)
288
+ ├── activities.py generic fetch_source / sync_records
289
+ ├── workflows/sync.py SourceSyncWorkflow — one durable entity workflow per source unit
290
+ ├── worker.py run_worker(SOURCE, DESTINATION)
291
+ ├── bootstrap.py start_sources(SOURCE) — one workflow per spec (idempotent)
292
+ ├── codec.py opt-in AES-GCM payload codec
293
+ ├── auth/oauth/ OAuth-as-a-workflow toolkit (token-owner workflow + flow)
294
+ ├── transport/mcp.py generic Model Context Protocol transport (Notion + Contentful)
295
+ ├── http.py shared httpx retry/backoff for REST connectors
296
+ ├── linkstore.py idempotency map for FK-less destinations
297
+ ├── route.py Route = source -> (transform, field ownership) -> destination
298
+ └── connectors/ one subpackage per system (github, youtube, luma, contentful, notion, asana)
299
+ ```
300
+
301
+ ## Contributing
302
+
303
+ [CONTRIBUTING.md](CONTRIBUTING.md) is the authoritative guide for adding a source,
304
+ destination, auth mechanism, or transformation — with real signatures, the testing
305
+ pattern, and the hard-won gotchas (workflow determinism, signal handlers, history
306
+ limits, scaling).
307
+
308
+ ## License
309
+
310
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,261 @@
1
+ # durable-sync
2
+
3
+ **Keep the tools your team lives in automatically in sync — your events, videos,
4
+ repos, and published content flowing into the catalog or tracker you actually use —
5
+ without the brittle script that silently dies at 2am.**
6
+
7
+ Teams end up copy-pasting between tools, or babysitting a homegrown script that
8
+ breaks the moment an API hiccups or a token expires. durable-sync is a small Python
9
+ library for building syncs that just keep running: pull records from a **source**
10
+ (your YouTube channel, your Luma events, a Contentful CMS, a GitHub org) and keep
11
+ them continuously, accurately mirrored into a **destination** (a Notion database, an
12
+ Asana project). For example —
13
+
14
+ - every new **YouTube** video shows up as a row in your **Notion** content database,
15
+ - your **Luma** events stay mirrored into an **Asana** project,
16
+ - your published **Contentful** articles land in a marketing calendar.
17
+
18
+ You write a little Python to say *where to read* and *where to write*; the library
19
+ makes it durable. Built on [Temporal](https://temporal.io). GitHub → Notion is the
20
+ reference wiring.
21
+
22
+ ## Why bother (vs. a quick script)
23
+
24
+ A weekend script works until it doesn't. durable-sync gives you, out of the box:
25
+
26
+ - **It just stays current.** Each sync runs on its own schedule, forever, keeping
27
+ itself up to date — no cron job to babysit.
28
+ - **No duplicates, ever.** Re-runs and retries update the existing row instead of
29
+ creating a second copy (every record carries a stable id).
30
+ - **It survives outages.** If your machine restarts or a service goes down
31
+ mid-sync, it resumes exactly where it left off.
32
+ - **It waits instead of flailing.** When a login expires or is revoked, the sync
33
+ pauses and tells you — rather than hammering a dead credential.
34
+ - **No admin required.** For tools like Notion you can authorize as *yourself* (no
35
+ IT-issued API key), and your login is refreshed safely in the background.
36
+ - **It scales.** From 10 records to hundreds of thousands, it pages through them
37
+ without falling over.
38
+
39
+ (Under the hood that's durable orchestration, idempotent upserts, headless OAuth,
40
+ and rate-limit backoff — all inherited from the library, none of it your problem.)
41
+
42
+ ## The mental model: two seams
43
+
44
+ ```
45
+ Source.fetch(spec) ─► [Record, …] ─► Destination upserts (idempotent, keyed on primary_key)
46
+ ```
47
+
48
+ - **`Record`** = `{primary_key, properties, body}`. `properties` are *neutral*
49
+ Python values (`str`/`int`/`bool`/`list`/`date`/`datetime`); the **destination**
50
+ owns all wire-encoding, so a source author never learns a destination's quirks.
51
+ - **`primary_key`** is the immutable idempotency key (a repo id, an event id) —
52
+ never a name or URL. This is the single most important field: it's what makes
53
+ retries safe.
54
+
55
+ Everything else — orchestration, OAuth, backoff — lives in the "spine" and is
56
+ shared by every connector.
57
+
58
+ ## Requirements
59
+
60
+ - Python 3.11+
61
+ - A Temporal server. For local dev: [`temporal server
62
+ start-dev`](https://docs.temporal.io/cli#start-dev-server) (from the Temporal
63
+ CLI). For production: a self-hosted cluster or [Temporal
64
+ Cloud](https://temporal.io/cloud).
65
+
66
+ ## Quickstart: see it run in two minutes
67
+
68
+ This runs the whole spine end-to-end with a network-free in-memory destination —
69
+ no tokens, no external services.
70
+
71
+ ```bash
72
+ pip install "durable-sync[all,dev]"
73
+
74
+ # In one terminal: a local Temporal dev server
75
+ temporal server start-dev
76
+
77
+ # In another: the offline spine smoke (fetches fake records, upserts them twice,
78
+ # proves the second pass updates instead of duplicating)
79
+ PYTHONPATH=. python tests/smoke_spine.py
80
+ ```
81
+
82
+ You should see a first pass *create* rows and a second pass *update* the same rows
83
+ — idempotency in action. Open the Temporal UI (http://localhost:8233) to watch the
84
+ workflow.
85
+
86
+ ## Wire your own sync
87
+
88
+ A source and a destination are just two small classes. Here's a complete,
89
+ runnable sketch:
90
+
91
+ ```python
92
+ import asyncio
93
+ from contextlib import asynccontextmanager
94
+
95
+ from durable_sync.core import Record, SourceSpec
96
+ from durable_sync.worker import run_worker
97
+ from durable_sync.bootstrap import start_sources
98
+
99
+
100
+ # 1) A SOURCE: produce neutral Records, keyed on a stable primary_key.
101
+ class TasksSource:
102
+ name = "tasks"
103
+
104
+ def specs(self):
105
+ # One SourceSpec per independent unit of work — each gets its own workflow.
106
+ return [SourceSpec(key="all", interval_minutes=15)]
107
+
108
+ async def fetch(self, spec, only_items=None):
109
+ rows = await my_api.list_tasks() # however you read your data
110
+ return [
111
+ Record(primary_key=str(r["id"]), # immutable id — NOT the title
112
+ properties={"Title": r["title"], "Done": r["completed"]})
113
+ for r in rows
114
+ ]
115
+
116
+
117
+ # 2) A DESTINATION: idempotent upsert. query_existing_ids() decides create vs update.
118
+ class PrinterDestination:
119
+ name = "printer"
120
+ configured = True # spine refuses to sync if False
121
+ config_hint = "(always configured)"
122
+ create_only_properties = set() # props written once, never overwritten
123
+
124
+ @asynccontextmanager
125
+ async def connect(self):
126
+ yield self # this object is also the session
127
+
128
+ async def query_existing_ids(self):
129
+ return {} # {primary_key: destination_id} already present
130
+
131
+ async def create(self, record, synced_at):
132
+ print("CREATE", record.primary_key, record.properties); return True
133
+
134
+ async def update(self, existing_id, record, synced_at):
135
+ print("UPDATE", existing_id, record.properties); return True
136
+
137
+ @staticmethod
138
+ def is_auth_error(err):
139
+ return False # no interactive auth to break
140
+
141
+
142
+ SOURCE, DESTINATION = TasksSource(), PrinterDestination()
143
+
144
+ async def main():
145
+ await start_sources(SOURCE) # ensure one entity workflow per spec (idempotent)
146
+ await run_worker(SOURCE, DESTINATION) # host the workflow + activities; runs forever
147
+
148
+ asyncio.run(main())
149
+ ```
150
+
151
+ Operate the running sync from the Temporal CLI — the workflow id is
152
+ `durable-sync:<spec.key>`:
153
+
154
+ ```bash
155
+ # Trigger a sync now instead of waiting for the interval:
156
+ temporal workflow signal --workflow-id "durable-sync:all" --name sync_now --input '[]'
157
+
158
+ # See when it last ran, its stats, and any error:
159
+ temporal workflow query --workflow-id "durable-sync:all" --type status
160
+ ```
161
+
162
+ That's the whole contract. For the real interfaces (optional `body`, the
163
+ destination session split, source enrichment hooks, paginated `fetch_page`, the
164
+ `transform` seam), see [CONTRIBUTING.md](CONTRIBUTING.md).
165
+
166
+ ## Connectors
167
+
168
+ Reuse a built-in connector instead of writing your own. Each lives in
169
+ `durable_sync/connectors/<system>/`:
170
+
171
+ | System | Source | Destination | Notes |
172
+ |---------------|:------:|:-----------:|-------|
173
+ | **GitHub** | ✅ | | Orgs + named repos; per-repo enrichment hook |
174
+ | **YouTube** | ✅ | | A channel's uploads |
175
+ | **Luma** | ✅ | ✅ | Calendar events (REST); destination needs a `LinkStore` |
176
+ | **Contentful**| ✅ | ✅ | REST source (CDA/CMA); destination via REST CMA *or* MCP-over-OAuth for SSO-blocked spaces |
177
+ | **Notion** | ✅ | ✅ | MCP transport + workflow-owned OAuth (no admin token needed) |
178
+ | **Asana** | | ✅ | Direct REST + a self-serve personal token |
179
+
180
+ A connector is grouped by **system**, not direction, because a system is often both
181
+ a source and a destination and the two sides share a client + auth. Under the hood,
182
+ a connector composes a **transport** (MCP or REST/`http.py`) with an **auth
183
+ mechanism** (workflow-owned OAuth, or an inline token) — the two axes are
184
+ independent.
185
+
186
+ ## Key concepts
187
+
188
+ - **One workflow per source unit.** `Source.specs()` returns a list of
189
+ `SourceSpec`s; each becomes a long-lived [entity
190
+ workflow](https://docs.temporal.io/encyclopedia/temporal-clients#entity-workflow)
191
+ that *is its own timer* (sleeps `interval_minutes`, wakes early on a `sync_now`
192
+ signal) and uses continue-as-new to bound history. No external scheduler.
193
+ - **Idempotency is keyed, never inferred.** The upsert does
194
+ `query_existing_ids()` → update-or-create per `primary_key`. Sync only ever
195
+ creates/updates rows it fetched — **it never deletes** — so hand-added data
196
+ survives.
197
+ - **OAuth as a workflow.** For services where you can't get an admin token, a
198
+ `OAuthTokenWorkflow` owns the rotating refresh token, serializes refreshes (no
199
+ rotation race), and serves fresh access tokens via query so the secret stays out
200
+ of history. (Pair with the opt-in AES-GCM payload codec to encrypt secrets at
201
+ rest in history too.)
202
+ - **`LinkStore` for FK-less destinations.** Some systems (Luma, Contentful over
203
+ MCP) can't store your `primary_key` on their own objects, so the correspondence
204
+ lives in an app-provided durable store. In-memory and SQLite references ship; use
205
+ a real datastore in production.
206
+ - **Scales by paging.** Large sources implement `fetch_page` so the spine fetches +
207
+ upserts page-by-page, keeping every payload under Temporal's limits. See the
208
+ Scaling section of [CONTRIBUTING.md](CONTRIBUTING.md).
209
+
210
+ ## Install
211
+
212
+ ```bash
213
+ pip install "durable-sync[notion]" # a destination: notion / asana
214
+ pip install "durable-sync[github]" # a source: github / luma / youtube / contentful
215
+ pip install "durable-sync[crypto]" # opt-in AES-GCM payload encryption
216
+ pip install "durable-sync[all,dev]" # everything + test deps
217
+ ```
218
+
219
+ ## Configuration
220
+
221
+ All runtime config is environment variables (see `durable_sync/config.py`):
222
+
223
+ | Variable | Purpose |
224
+ |----------|---------|
225
+ | `TEMPORAL_ADDRESS` / `TEMPORAL_NAMESPACE` | Cluster to connect to (defaults to `localhost:7233` / `default`) |
226
+ | `TEMPORAL_API_KEY` | Set for Temporal Cloud (enables TLS) |
227
+ | `DURABLE_SYNC_TASK_QUEUE` | Task queue name |
228
+ | `DURABLE_SYNC_ENC_KEY` | base64 AES-256 key to encrypt payloads in history (`python -m durable_sync.codec` generates one) |
229
+ | `DURABLE_SYNC_BUILD_ID` | Opt-in Worker Versioning for safe redeploys of the long-lived workflows |
230
+
231
+ Connector-specific config (which org, which Notion database, which token env var)
232
+ lives in the source/destination you wire up — never in `config.py`.
233
+
234
+ ## Project layout
235
+
236
+ ```
237
+ durable_sync/
238
+ ├── core.py Record + Source/Destination protocols (the contract)
239
+ ├── activities.py generic fetch_source / sync_records
240
+ ├── workflows/sync.py SourceSyncWorkflow — one durable entity workflow per source unit
241
+ ├── worker.py run_worker(SOURCE, DESTINATION)
242
+ ├── bootstrap.py start_sources(SOURCE) — one workflow per spec (idempotent)
243
+ ├── codec.py opt-in AES-GCM payload codec
244
+ ├── auth/oauth/ OAuth-as-a-workflow toolkit (token-owner workflow + flow)
245
+ ├── transport/mcp.py generic Model Context Protocol transport (Notion + Contentful)
246
+ ├── http.py shared httpx retry/backoff for REST connectors
247
+ ├── linkstore.py idempotency map for FK-less destinations
248
+ ├── route.py Route = source -> (transform, field ownership) -> destination
249
+ └── connectors/ one subpackage per system (github, youtube, luma, contentful, notion, asana)
250
+ ```
251
+
252
+ ## Contributing
253
+
254
+ [CONTRIBUTING.md](CONTRIBUTING.md) is the authoritative guide for adding a source,
255
+ destination, auth mechanism, or transformation — with real signatures, the testing
256
+ pattern, and the hard-won gotchas (workflow determinism, signal handlers, history
257
+ limits, scaling).
258
+
259
+ ## License
260
+
261
+ MIT — see [LICENSE](LICENSE).
@@ -0,0 +1,26 @@
1
+ """durable-sync: durable, idempotent source -> destination sync on Temporal.
2
+
3
+ Public API — implement `Source` for your data, `Destination` for your target;
4
+ the spine (entity workflow, idempotent upsert, OAuth refresh, backoff) is
5
+ inherited. See `connectors/` (one subpackage per system — GitHub/Luma/YouTube/
6
+ Contentful sources, Notion/Asana destinations) for reference implementations.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ from durable_sync.core import (
11
+ Destination,
12
+ DestinationSession,
13
+ Record,
14
+ Source,
15
+ SourceSpec,
16
+ )
17
+
18
+ __all__ = [
19
+ "Record",
20
+ "SourceSpec",
21
+ "Source",
22
+ "Destination",
23
+ "DestinationSession",
24
+ ]
25
+
26
+ __version__ = "0.1.0"