durable-sync 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. durable_sync/__init__.py +26 -0
  2. durable_sync/activities.py +156 -0
  3. durable_sync/auth/__init__.py +8 -0
  4. durable_sync/auth/oauth/__init__.py +18 -0
  5. durable_sync/auth/oauth/flow.py +183 -0
  6. durable_sync/auth/oauth/refresh.py +58 -0
  7. durable_sync/auth/oauth/store.py +36 -0
  8. durable_sync/auth/oauth/token.py +36 -0
  9. durable_sync/auth/oauth/workflow.py +172 -0
  10. durable_sync/bootstrap.py +44 -0
  11. durable_sync/codec.py +80 -0
  12. durable_sync/config.py +35 -0
  13. durable_sync/connectors/__init__.py +14 -0
  14. durable_sync/connectors/asana/__init__.py +13 -0
  15. durable_sync/connectors/asana/destination.py +213 -0
  16. durable_sync/connectors/content.py +80 -0
  17. durable_sync/connectors/contentful/__init__.py +25 -0
  18. durable_sync/connectors/contentful/api.py +285 -0
  19. durable_sync/connectors/contentful/bootstrap.py +102 -0
  20. durable_sync/connectors/contentful/describe.py +61 -0
  21. durable_sync/connectors/contentful/destination.py +145 -0
  22. durable_sync/connectors/contentful/encode.py +49 -0
  23. durable_sync/connectors/contentful/introspect.py +69 -0
  24. durable_sync/connectors/contentful/mcp.py +95 -0
  25. durable_sync/connectors/contentful/mcp_destination.py +137 -0
  26. durable_sync/connectors/contentful/oauth.py +27 -0
  27. durable_sync/connectors/contentful/prove.py +51 -0
  28. durable_sync/connectors/contentful/source.py +192 -0
  29. durable_sync/connectors/contentful/start.py +46 -0
  30. durable_sync/connectors/contentful/store.py +25 -0
  31. durable_sync/connectors/contentful/token.py +13 -0
  32. durable_sync/connectors/contentful/token_check.py +42 -0
  33. durable_sync/connectors/github/__init__.py +33 -0
  34. durable_sync/connectors/github/api.py +169 -0
  35. durable_sync/connectors/github/source.py +230 -0
  36. durable_sync/connectors/luma/__init__.py +20 -0
  37. durable_sync/connectors/luma/api.py +121 -0
  38. durable_sync/connectors/luma/destination.py +128 -0
  39. durable_sync/connectors/luma/source.py +155 -0
  40. durable_sync/connectors/multi.py +78 -0
  41. durable_sync/connectors/notion/__init__.py +20 -0
  42. durable_sync/connectors/notion/bootstrap.py +97 -0
  43. durable_sync/connectors/notion/client.py +133 -0
  44. durable_sync/connectors/notion/destination.py +270 -0
  45. durable_sync/connectors/notion/oauth.py +25 -0
  46. durable_sync/connectors/notion/prove.py +57 -0
  47. durable_sync/connectors/notion/source.py +136 -0
  48. durable_sync/connectors/notion/start.py +46 -0
  49. durable_sync/connectors/notion/store.py +25 -0
  50. durable_sync/connectors/notion/token.py +13 -0
  51. durable_sync/connectors/youtube/__init__.py +13 -0
  52. durable_sync/connectors/youtube/api.py +122 -0
  53. durable_sync/connectors/youtube/source.py +152 -0
  54. durable_sync/core.py +210 -0
  55. durable_sync/env.py +55 -0
  56. durable_sync/http.py +71 -0
  57. durable_sync/linkstore.py +88 -0
  58. durable_sync/route.py +86 -0
  59. durable_sync/temporal_client.py +48 -0
  60. durable_sync/transport/__init__.py +12 -0
  61. durable_sync/transport/mcp.py +77 -0
  62. durable_sync/worker.py +109 -0
  63. durable_sync/workflows/__init__.py +9 -0
  64. durable_sync/workflows/sync.py +208 -0
  65. durable_sync-0.1.0.dist-info/METADATA +310 -0
  66. durable_sync-0.1.0.dist-info/RECORD +69 -0
  67. durable_sync-0.1.0.dist-info/WHEEL +5 -0
  68. durable_sync-0.1.0.dist-info/licenses/LICENSE +21 -0
  69. durable_sync-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,42 @@
1
+ """Does the MCP-minted OAuth token also work against the plain CMA REST API?
2
+
3
+ The Contentful MCP returns LLM-oriented XML, awkward to parse for a sync pipeline.
4
+ But if the OAuth token we mint for the MCP server ALSO authenticates the CMA REST
5
+ API, we can skip the XML entirely: reuse the existing (clean-JSON) REST
6
+ ContentfulSource/Destination with a durable, workflow-owned OAuth token — no-admin
7
+ auth AND clean JSON. This probe answers that decisively.
8
+
9
+ PYTHONPATH=. python -m durable_sync.connectors.contentful.token_check
10
+ """
11
+ from __future__ import annotations
12
+
13
+ import httpx
14
+
15
+ from durable_sync.env import load_env
16
+ from durable_sync.connectors.contentful import oauth, store
17
+
18
+
19
+ def main() -> None:
20
+ load_env()
21
+ creds = store.load()
22
+ if not creds:
23
+ raise SystemExit("No credentials — run connectors.contentful.bootstrap first.")
24
+ tokens = oauth.refresh_access_token(creds["token_endpoint"], creds["client_id"], creds["refresh_token"])
25
+ if tokens.get("refresh_token"):
26
+ creds["refresh_token"] = tokens["refresh_token"]
27
+ store.save(creds)
28
+ token = tokens["access_token"]
29
+
30
+ for url in ("https://api.contentful.com/users/me", "https://api.contentful.com/spaces"):
31
+ r = httpx.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=30)
32
+ print(f"GET {url} -> {r.status_code}")
33
+ print(f" {r.text[:300].strip()}")
34
+ print()
35
+
36
+ print("Verdict:")
37
+ print(" /spaces 200 (lists spaces) -> MCP-OAuth token works on CMA REST: reuse the REST connector.")
38
+ print(" 401 / 403 / empty -> token is MCP-scoped only: we parse MCP tool output instead.")
39
+
40
+
41
+ if __name__ == "__main__":
42
+ main()
@@ -0,0 +1,33 @@
1
+ """GitHub source: orgs + named repos -> Records.
2
+
3
+ The reference Source. Ships the GitHub *mechanism* (HTTP fetchers + generic
4
+ helpers); the *policy/vocab* (which topics mean what, language->SDK maps,
5
+ static analysis) belongs in your app's `enrich` hook — see RepoContext.
6
+
7
+ Requires the `github` extra: pip install "durable-sync[github]"
8
+ """
9
+ from __future__ import annotations
10
+
11
+ from durable_sync.connectors.github.api import (
12
+ build_headers,
13
+ classify,
14
+ fetch_org_members,
15
+ is_member,
16
+ raw_languages,
17
+ )
18
+ from durable_sync.connectors.github.source import (
19
+ GitHubConfig,
20
+ GitHubSource,
21
+ RepoContext,
22
+ )
23
+
24
+ __all__ = [
25
+ "GitHubSource",
26
+ "GitHubConfig",
27
+ "RepoContext",
28
+ "is_member",
29
+ "classify",
30
+ "raw_languages",
31
+ "fetch_org_members",
32
+ "build_headers",
33
+ ]
@@ -0,0 +1,169 @@
1
+ """GitHub REST helpers — pure HTTP + small pure transforms. No Temporal, no
2
+ config globals: every call takes its `headers`. Reusable from the Source's
3
+ fetch loop AND from an app's enrich hook (which gets the live client via
4
+ RepoContext).
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+
10
+ import httpx
11
+
12
+ from durable_sync.http import request_with_retry
13
+
14
+ GITHUB_API = "https://api.github.com"
15
+ PER_PAGE = 100
16
+ log = logging.getLogger("durable_sync.connectors.github")
17
+
18
+
19
+ def build_headers(token: str | None, *, user_agent: str = "durable-sync") -> dict[str, str]:
20
+ h = {
21
+ "Accept": "application/vnd.github+json",
22
+ "X-GitHub-Api-Version": "2022-11-28",
23
+ "User-Agent": user_agent,
24
+ }
25
+ if token:
26
+ h["Authorization"] = f"Bearer {token}"
27
+ return h
28
+
29
+
30
+ # --- pure transforms -------------------------------------------------------
31
+
32
+ def raw_languages(byte_counts: dict[str, int]) -> list[str]:
33
+ """All GitHub-reported languages, most-bytes-first."""
34
+ return sorted(byte_counts, key=lambda lang: -byte_counts[lang])
35
+
36
+
37
+ def classify(topics: list[str], mapping: dict[str, str]) -> list[str]:
38
+ """Map topics through a {topic_lower: label} dict, de-duped, order-preserving.
39
+ The mapping itself is app vocab — the library just applies it."""
40
+ out: list[str] = []
41
+ for t in topics:
42
+ label = mapping.get(t.lower())
43
+ if label and label not in out:
44
+ out.append(label)
45
+ return out
46
+
47
+
48
+ def is_member(handle: str, members: set[str]) -> bool:
49
+ """Whether a contributor handle belongs to the org-member set (insider-or-not).
50
+ Neutral primitive: an app's enrich hook picks its own labels
51
+ (e.g. Employee/Community, Staff/External) from this boolean."""
52
+ return handle in members
53
+
54
+
55
+ def iso_date(s: str | None) -> str | None:
56
+ """ISO date (YYYY-MM-DD); the destination handles date_properties specially."""
57
+ return s[:10] if s else None
58
+
59
+
60
+ # --- HTTP fetchers ---------------------------------------------------------
61
+ # All go through request_with_retry, which backs off on 429 + GitHub's
62
+ # rate-limited 403 (honoring Retry-After). The enrichment fetchers below tolerate
63
+ # a failed call by returning empty, but LOG it first — a silently empty languages
64
+ # list (because we got rate-limited) reads as "this repo has no languages", which
65
+ # is a data-quality landmine on a large org sweep.
66
+
67
+ async def get_repo(client: httpx.AsyncClient, full_name: str, headers: dict) -> dict | None:
68
+ r = await request_with_retry(client, "GET", f"{GITHUB_API}/repos/{full_name}", headers=headers)
69
+ if r.status_code == 404:
70
+ log.warning("Repo not found, skipping: %s", full_name)
71
+ return None
72
+ r.raise_for_status()
73
+ return r.json()
74
+
75
+
76
+ async def fetch_org_repos_page(
77
+ client: httpx.AsyncClient, org: str, headers: dict, *, page: int, per_page: int = PER_PAGE
78
+ ) -> tuple[list[dict], bool]:
79
+ """ONE page of an org's public repos. Returns (batch, has_more). The page number
80
+ is the pagination cursor the spine threads through `GitHubSource.fetch_page`, so
81
+ the fetch result never passes through workflow history as one oversized payload.
82
+ Caller applies inclusion gating. Ordered by full_name (stable across pages)."""
83
+ r = await request_with_retry(
84
+ client, "GET", f"{GITHUB_API}/orgs/{org}/repos",
85
+ headers=headers,
86
+ params={"per_page": per_page, "page": page, "type": "public", "sort": "full_name"},
87
+ )
88
+ r.raise_for_status()
89
+ batch = r.json()
90
+ return batch, len(batch) == per_page
91
+
92
+
93
+ async def fetch_org_repos(
94
+ client: httpx.AsyncClient, org: str, headers: dict, *, per_page: int = PER_PAGE
95
+ ) -> list[dict]:
96
+ """All public repos in an org — drains fetch_org_repos_page. For non-Temporal
97
+ callers (an enrich hook, a script); the spine uses the paged form directly."""
98
+ repos: list[dict] = []
99
+ page = 1
100
+ while True:
101
+ batch, has_more = await fetch_org_repos_page(client, org, headers, page=page, per_page=per_page)
102
+ repos.extend(batch)
103
+ if not has_more:
104
+ return repos
105
+ page += 1
106
+
107
+
108
+ async def fetch_readme(client: httpx.AsyncClient, full_name: str, headers: dict) -> str | None:
109
+ h = dict(headers, Accept="application/vnd.github.raw")
110
+ r = await request_with_retry(client, "GET", f"{GITHUB_API}/repos/{full_name}/readme", headers=h)
111
+ if r.status_code == 200:
112
+ return r.text
113
+ if r.status_code != 404: # 404 = no README (normal); anything else is a real failure
114
+ log.warning("README fetch for %s failed: HTTP %s", full_name, r.status_code)
115
+ return None
116
+
117
+
118
+ async def fetch_languages(client: httpx.AsyncClient, full_name: str, headers: dict) -> dict[str, int]:
119
+ r = await request_with_retry(client, "GET", f"{GITHUB_API}/repos/{full_name}/languages", headers=headers)
120
+ if r.status_code == 200:
121
+ return r.json()
122
+ log.warning("Languages fetch for %s failed: HTTP %s — record will list none", full_name, r.status_code)
123
+ return {}
124
+
125
+
126
+ async def fetch_contributors(
127
+ client: httpx.AsyncClient, full_name: str, headers: dict, *, limit: int = 5
128
+ ) -> list[str]:
129
+ """Top contributor handles, most-commits-first, bots filtered out."""
130
+ r = await request_with_retry(
131
+ client, "GET", f"{GITHUB_API}/repos/{full_name}/contributors",
132
+ headers=headers, params={"per_page": 25},
133
+ )
134
+ if r.status_code != 200:
135
+ log.warning("Contributors fetch for %s failed: HTTP %s", full_name, r.status_code)
136
+ return []
137
+ data = r.json()
138
+ if not isinstance(data, list):
139
+ return []
140
+ out: list[str] = []
141
+ for c in data:
142
+ login = c.get("login")
143
+ if login and not login.endswith("[bot]"):
144
+ out.append(login)
145
+ if len(out) >= limit:
146
+ break
147
+ return out
148
+
149
+
150
+ async def fetch_org_members(client: httpx.AsyncClient, org: str, headers: dict) -> set[str]:
151
+ """Member logins for an org (needs read:org to see private members)."""
152
+ members: set[str] = set()
153
+ page = 1
154
+ while True:
155
+ r = await request_with_retry(
156
+ client, "GET", f"{GITHUB_API}/orgs/{org}/members",
157
+ headers=headers, params={"per_page": 100, "page": page},
158
+ )
159
+ if r.status_code != 200:
160
+ log.warning("Org members fetch for %s failed: HTTP %s", org, r.status_code)
161
+ break
162
+ batch = r.json()
163
+ if not isinstance(batch, list):
164
+ break
165
+ members.update(m["login"] for m in batch if m.get("login"))
166
+ if len(batch) < 100:
167
+ break
168
+ page += 1
169
+ return members
@@ -0,0 +1,230 @@
1
+ """GitHubSource — the reference Source, with a source-side enrichment hook.
2
+
3
+ Config is injected (no module globals), so the same code serves any orgs/repos.
4
+ The base fetch produces a raw Record per repo. If you pass an `enrich` hook, the
5
+ source ALSO hands it a `RepoContext` (the raw repo + readme + language bytes +
6
+ authors + employee members + the live HTTP client) so your app can layer on
7
+ domain enrichment WITHOUT importing the source's internals.
8
+ """
9
+ from __future__ import annotations
10
+
11
+ import inspect
12
+ import logging
13
+ import os
14
+ from dataclasses import dataclass, field
15
+ from typing import Any, Awaitable, Callable, Union
16
+
17
+ import httpx
18
+ from temporalio import activity
19
+
20
+ from durable_sync.core import Record, SourceSpec
21
+ from durable_sync.connectors.github import api
22
+
23
+ log = logging.getLogger("durable_sync.connectors.github")
24
+
25
+ # enrich(record, ctx) -> Record (sync) or Awaitable[Record] (async); both ok.
26
+ EnrichHook = Callable[[Record, "RepoContext"], Union[Record, Awaitable[Record]]]
27
+
28
+
29
+ @dataclass
30
+ class GitHubConfig:
31
+ """Everything GitHub-specific a deployment supplies.
32
+
33
+ sources: list of ("org", "name") and/or ("repos", ["owner/repo", ...]).
34
+ org sources are gated by inclusion_topic (unless it's None / discovery_mode);
35
+ named repos are included by virtue of being named.
36
+ """
37
+ sources: list[tuple[str, Any]]
38
+ # Include only org repos carrying this GitHub topic. None = no topic gate
39
+ # (include every non-archived repo). There's no universal default, so set the
40
+ # topic your org uses to mark in-scope repos.
41
+ inclusion_topic: str | None = None
42
+ discovery_mode: bool = False # org sweep ignores topic + skips README
43
+ # Orgs whose member logins are surfaced to the enrich hook as RepoContext.members
44
+ # (e.g. to distinguish insiders from outside contributors). The source attaches
45
+ # the set; YOUR hook decides what membership means.
46
+ member_orgs: list[str] = field(default_factory=list)
47
+ title_property: str = "Name"
48
+ interval_minutes: int = 30
49
+ per_page: int = api.PER_PAGE
50
+ token_env: str = "GITHUB_TOKEN"
51
+ contributor_limit: int = 5
52
+
53
+
54
+ @dataclass
55
+ class RepoContext:
56
+ """Handed to the enrich hook: everything already fetched for one repo, plus
57
+ the live client + headers so enrich can make extra calls (e.g. download a
58
+ tarball for static analysis) without re-authenticating or re-fetching."""
59
+ raw_repo: dict
60
+ readme: str | None
61
+ language_bytes: dict[str, int]
62
+ authors: list[str]
63
+ members: set[str]
64
+ client: httpx.AsyncClient
65
+ headers: dict[str, str]
66
+
67
+
68
+ def _heartbeat(detail: str) -> None:
69
+ """Heartbeat inside a Temporal activity; no-op otherwise, so the Source stays
70
+ runnable/testable standalone."""
71
+ if activity.in_activity():
72
+ activity.heartbeat(detail)
73
+
74
+
75
+ class GitHubSource:
76
+ name = "github"
77
+
78
+ def __init__(self, config: GitHubConfig, *, enrich: EnrichHook | None = None):
79
+ self._config = config
80
+ self._enrich = enrich
81
+
82
+ # --- Source protocol ---------------------------------------------------
83
+
84
+ def specs(self) -> list[SourceSpec]:
85
+ cfg = self._config
86
+ specs: list[SourceSpec] = []
87
+ for kind, value in cfg.sources:
88
+ if kind == "org":
89
+ specs.append(SourceSpec(
90
+ key=f"org:{value}",
91
+ interval_minutes=cfg.interval_minutes,
92
+ params={"kind": "org", "org": str(value)},
93
+ ))
94
+ else: # "repos"
95
+ specs.append(SourceSpec(
96
+ key="repos:named",
97
+ interval_minutes=cfg.interval_minutes,
98
+ params={"kind": "repos", "repos": list(value)},
99
+ ))
100
+ return specs
101
+
102
+ async def fetch_page(
103
+ self, spec: SourceSpec, only_items: list[str] | None, cursor: str | None
104
+ ) -> tuple[list[Record], str | None]:
105
+ """ONE page of records + the next cursor (None on the last page). For an org
106
+ sweep the cursor is the GitHub page number, so the spine bounds history even
107
+ for a huge org. The named-repos / targeted (`only_items`) paths are small and
108
+ bounded, so they return everything as a single page (next_cursor=None)."""
109
+ cfg = self._config
110
+ kind = spec.params.get("kind")
111
+ headers = api.build_headers(os.environ.get(cfg.token_env))
112
+
113
+ async with httpx.AsyncClient(timeout=30) as client:
114
+ members = await self._members(client, headers)
115
+ repos, next_cursor = await self._select_repos_page(
116
+ client, headers, spec, kind, only_items, cursor)
117
+ records = await self._records_for_repos(client, headers, repos, members)
118
+
119
+ log.info("Fetched %d records for %s (cursor=%s -> %s)", len(records), spec.key, cursor, next_cursor)
120
+ return records, next_cursor
121
+
122
+ async def fetch(
123
+ self, spec: SourceSpec, only_items: list[str] | None = None
124
+ ) -> list[Record]:
125
+ """Whole unit as one list — drains fetch_page. Convenience for standalone /
126
+ non-Temporal callers; the spine drives fetch_page page-by-page instead."""
127
+ records: list[Record] = []
128
+ cursor: str | None = None
129
+ while True:
130
+ page, cursor = await self.fetch_page(spec, only_items, cursor)
131
+ records.extend(page)
132
+ if cursor is None:
133
+ return records
134
+
135
+ # --- internals ---------------------------------------------------------
136
+
137
+ async def _members(self, client, headers) -> set[str]:
138
+ """Org member logins for the enrich hook — only when a hook can use them.
139
+ Re-fetched per page in the paged path (activities are stateless); members
140
+ change rarely and member_orgs is opt-in, so the extra calls are acceptable."""
141
+ members: set[str] = set()
142
+ if self._enrich and self._config.member_orgs:
143
+ for org in self._config.member_orgs:
144
+ members |= await api.fetch_org_members(client, org, headers)
145
+ return members
146
+
147
+ async def _records_for_repos(self, client, headers, repos, members) -> list[Record]:
148
+ cfg = self._config
149
+ out: list[Record] = []
150
+ seen: set[str] = set()
151
+ for repo in repos:
152
+ rid = str(repo["id"])
153
+ if rid in seen: # de-dupe within the page (cross-page dups resolve to
154
+ continue # updates in the idempotent upsert, so per-page is enough)
155
+ seen.add(rid)
156
+ # Discovery skips READMEs (hundreds of calls).
157
+ readme = None if cfg.discovery_mode else await api.fetch_readme(
158
+ client, repo["full_name"], headers)
159
+ lang_bytes = await api.fetch_languages(client, repo["full_name"], headers)
160
+ authors = await api.fetch_contributors(
161
+ client, repo["full_name"], headers, limit=cfg.contributor_limit)
162
+
163
+ record = self._base_record(repo, readme, lang_bytes, authors)
164
+ if self._enrich is not None:
165
+ ctx = RepoContext(
166
+ raw_repo=repo, readme=readme, language_bytes=lang_bytes,
167
+ authors=authors, members=members, client=client, headers=headers,
168
+ )
169
+ result = self._enrich(record, ctx)
170
+ record = await result if inspect.isawaitable(result) else result
171
+ out.append(record)
172
+ _heartbeat(repo["full_name"])
173
+ return out
174
+
175
+ async def _select_repos_page(
176
+ self, client, headers, spec, kind, only_items, cursor
177
+ ) -> tuple[list[dict], str | None]:
178
+ if only_items: # targeted refresh — bounded, one page (gate org repos)
179
+ return await self._repos_by_name(client, headers, only_items, gate=(kind == "org")), None
180
+ if kind == "org":
181
+ page = int(cursor) if cursor else 1
182
+ batch, has_more = await api.fetch_org_repos_page(
183
+ client, spec.params.get("org", ""), headers, page=page, per_page=self._config.per_page)
184
+ gated = [r for r in batch if self._passes_gate(r)]
185
+ return gated, (str(page + 1) if has_more else None)
186
+ # named repos — included by virtue of being named, bounded, one page
187
+ return await self._repos_by_name(client, headers, spec.params.get("repos", []), gate=False), None
188
+
189
+ async def _repos_by_name(self, client, headers, names, *, gate: bool) -> list[dict]:
190
+ repos: list[dict] = []
191
+ for full in names:
192
+ repo = await api.get_repo(client, full, headers)
193
+ if repo is None:
194
+ continue
195
+ if gate and not self._passes_gate(repo):
196
+ continue
197
+ repos.append(repo)
198
+ return repos
199
+
200
+ def _passes_gate(self, repo: dict) -> bool:
201
+ if repo.get("archived"):
202
+ return False
203
+ if self._config.discovery_mode or self._config.inclusion_topic is None:
204
+ return True
205
+ topics = [t.lower() for t in (repo.get("topics") or [])]
206
+ return self._config.inclusion_topic.lower() in topics
207
+
208
+ def _base_record(
209
+ self, repo: dict, readme: str | None, lang_bytes: dict[str, int], authors: list[str]
210
+ ) -> Record:
211
+ languages = api.raw_languages(lang_bytes)
212
+ spdx = (repo.get("license") or {}).get("spdx_id")
213
+ props = {
214
+ self._config.title_property: repo["name"],
215
+ "Repo ID": str(repo["id"]),
216
+ "Repo URL": repo["html_url"],
217
+ "Description": repo.get("description") or "",
218
+ "Languages": ", ".join(languages),
219
+ "Topics (raw)": ", ".join(repo.get("topics") or []),
220
+ "Authors": ", ".join(authors),
221
+ "Stars": int(repo.get("stargazers_count") or 0),
222
+ "Forks": int(repo.get("forks_count") or 0),
223
+ "Open issues": int(repo.get("open_issues_count") or 0),
224
+ "Is fork": bool(repo.get("fork")),
225
+ # NOASSERTION = no recognized license -> blank (itself a signal)
226
+ "License": spdx if spdx and spdx != "NOASSERTION" else None,
227
+ "Created": api.iso_date(repo.get("created_at")),
228
+ "Last updated": api.iso_date(repo.get("pushed_at") or repo.get("created_at")),
229
+ }
230
+ return Record(primary_key=str(repo["id"]), properties=props, body=readme)
@@ -0,0 +1,20 @@
1
+ """Luma connector: a Luma calendar's events, BOTH directions.
2
+
3
+ `LumaSource` reads events -> Records; `LumaDestination` creates/updates events
4
+ (e.g. cross-posting from Notion), sharing api.py. Source policy (e.g. matching
5
+ hosts against your own directory) belongs in the source's `enrich` hook — see
6
+ LumaEventContext. Because Luma events can't hold a foreign key, the destination
7
+ takes a required `LinkStore` (app-owned correspondence; see the boundary doctrine).
8
+
9
+ Requires the `luma` extra: pip install "durable-sync[luma]"
10
+ """
11
+ from __future__ import annotations
12
+
13
+ from durable_sync.linkstore import InMemoryLinkStore, LinkStore # re-export for convenience
14
+ from durable_sync.connectors.luma.destination import LumaDestination
15
+ from durable_sync.connectors.luma.source import LumaConfig, LumaEventContext, LumaSource
16
+
17
+ __all__ = [
18
+ "LumaSource", "LumaConfig", "LumaEventContext",
19
+ "LumaDestination", "LinkStore", "InMemoryLinkStore",
20
+ ]
@@ -0,0 +1,121 @@
1
+ """Luma API helpers — pure async HTTP + small pure transforms. No Temporal, no
2
+ config globals: every call takes its `headers`. Reusable from the Source's fetch
3
+ loop AND from an app's enrich hook (which gets the live client via the context).
4
+
5
+ Verify paths/params against Luma's docs as they evolve:
6
+ https://docs.luma.com/reference/get_v1-calendar-list-events
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import logging
11
+ from typing import Any
12
+
13
+ import httpx
14
+
15
+ from durable_sync.core import DestinationHTTPError
16
+ from durable_sync.http import request_with_retry
17
+
18
+ BASE_URL = "https://public-api.luma.com/v1"
19
+ LIST_EVENTS_PATH = "/calendar/list-events"
20
+ GET_EVENT_PATH = "/event/get"
21
+ PAGE_LIMIT = 50
22
+ log = logging.getLogger("durable_sync.connectors.luma")
23
+
24
+
25
+ def build_headers(api_key: str | None) -> dict[str, str]:
26
+ return {"x-luma-api-key": api_key or "", "Accept": "application/json"}
27
+
28
+
29
+ async def list_event_entries_page(
30
+ client: httpx.AsyncClient, headers: dict, after_iso: str, *,
31
+ cursor: str | None = None, page_limit: int = PAGE_LIMIT,
32
+ ) -> tuple[list[dict[str, Any]], str | None]:
33
+ """ONE page of raw Luma event entries on/after `after_iso`. Returns
34
+ (entries, next_cursor) where next_cursor is Luma's pagination_cursor for the
35
+ next page, or None when there are no more — the cursor the spine threads
36
+ through `LumaSource.fetch_page`. `list-events` does NOT include hosts."""
37
+ params: dict[str, Any] = {"after": after_iso, "pagination_limit": page_limit}
38
+ if cursor:
39
+ params["pagination_cursor"] = cursor
40
+ r = await request_with_retry(
41
+ client, "GET", f"{BASE_URL}{LIST_EVENTS_PATH}", headers=headers, params=params
42
+ )
43
+ r.raise_for_status()
44
+ data = r.json()
45
+ entries = data.get("entries", data.get("events", []))
46
+ next_cursor = data.get("next_cursor") if data.get("has_more") else None
47
+ return entries, next_cursor
48
+
49
+
50
+ async def list_event_entries(
51
+ client: httpx.AsyncClient, headers: dict, after_iso: str, *, page_limit: int = PAGE_LIMIT
52
+ ) -> list[dict[str, Any]]:
53
+ """All raw Luma event entries on/after `after_iso` — drains
54
+ list_event_entries_page. For non-Temporal callers; the spine pages directly."""
55
+ entries: list[dict[str, Any]] = []
56
+ cursor: str | None = None
57
+ while True:
58
+ batch, cursor = await list_event_entries_page(
59
+ client, headers, after_iso, cursor=cursor, page_limit=page_limit)
60
+ entries.extend(batch)
61
+ if cursor is None:
62
+ return entries
63
+
64
+
65
+ async def get_event(client: httpx.AsyncClient, headers: dict, api_id: str) -> dict[str, Any] | None:
66
+ """One event by id, as a list-style entry ({event, hosts, ...}) or None if
67
+ gone. Used for targeted refreshes (only_items)."""
68
+ if not api_id:
69
+ return None
70
+ r = await request_with_retry(
71
+ client, "GET", f"{BASE_URL}{GET_EVENT_PATH}", headers=headers, params={"api_id": api_id}
72
+ )
73
+ if r.status_code == 404:
74
+ log.warning("Luma event not found, skipping: %s", api_id)
75
+ return None
76
+ r.raise_for_status()
77
+ return r.json()
78
+
79
+
80
+ async def get_event_hosts(client: httpx.AsyncClient, headers: dict, api_id: str) -> list[dict[str, Any]]:
81
+ """Hosts for one event: [{name, email, ...}]. N+1 against the list (fine at
82
+ current volume; gate behind a change-token if a source grows high-volume)."""
83
+ if not api_id:
84
+ return []
85
+ r = await request_with_retry(
86
+ client, "GET", f"{BASE_URL}{GET_EVENT_PATH}", headers=headers, params={"api_id": api_id}
87
+ )
88
+ if r.status_code == 404:
89
+ return []
90
+ r.raise_for_status()
91
+ return r.json().get("hosts", [])
92
+
93
+
94
+ # --- write side (used by LumaDestination) -----------------------------------
95
+ # Verify paths/payload keys against Luma's docs — the write API evolves:
96
+ # https://docs.luma.com/reference/post_v1-event-create
97
+
98
+ CREATE_EVENT_PATH = "/event/create"
99
+ UPDATE_EVENT_PATH = "/event/update"
100
+
101
+
102
+ async def _write(client: httpx.AsyncClient, path: str, payload: dict[str, Any]) -> dict[str, Any]:
103
+ """POST to Luma; raise with status text (so is_auth_error can classify a 401).
104
+ The client carries the x-luma-api-key header (set in connect)."""
105
+ r = await request_with_retry(client, "POST", f"{BASE_URL}{path}", json=payload)
106
+ if r.status_code >= 400:
107
+ raise DestinationHTTPError(r.status_code, f"Luma POST {path} -> {r.status_code}: {r.text[:600]}")
108
+ return r.json() if r.content else {}
109
+
110
+
111
+ async def create_event(client: httpx.AsyncClient, payload: dict[str, Any]) -> str:
112
+ """Create an event; return its api_id."""
113
+ data = await _write(client, CREATE_EVENT_PATH, payload)
114
+ event = data.get("event", data)
115
+ return event.get("api_id") or data.get("api_id") or ""
116
+
117
+
118
+ async def update_event(client: httpx.AsyncClient, api_id: str, payload: dict[str, Any]) -> None:
119
+ """Update an existing event in place. NB: /event/update names the identifier
120
+ `event_id` (create returns it as `api_id`) — confirmed against the live API."""
121
+ await _write(client, UPDATE_EVENT_PATH, {"event_id": api_id, **payload})