durable-sync 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- durable_sync/__init__.py +26 -0
- durable_sync/activities.py +156 -0
- durable_sync/auth/__init__.py +8 -0
- durable_sync/auth/oauth/__init__.py +18 -0
- durable_sync/auth/oauth/flow.py +183 -0
- durable_sync/auth/oauth/refresh.py +58 -0
- durable_sync/auth/oauth/store.py +36 -0
- durable_sync/auth/oauth/token.py +36 -0
- durable_sync/auth/oauth/workflow.py +172 -0
- durable_sync/bootstrap.py +44 -0
- durable_sync/codec.py +80 -0
- durable_sync/config.py +35 -0
- durable_sync/connectors/__init__.py +14 -0
- durable_sync/connectors/asana/__init__.py +13 -0
- durable_sync/connectors/asana/destination.py +213 -0
- durable_sync/connectors/content.py +80 -0
- durable_sync/connectors/contentful/__init__.py +25 -0
- durable_sync/connectors/contentful/api.py +285 -0
- durable_sync/connectors/contentful/bootstrap.py +102 -0
- durable_sync/connectors/contentful/describe.py +61 -0
- durable_sync/connectors/contentful/destination.py +145 -0
- durable_sync/connectors/contentful/encode.py +49 -0
- durable_sync/connectors/contentful/introspect.py +69 -0
- durable_sync/connectors/contentful/mcp.py +95 -0
- durable_sync/connectors/contentful/mcp_destination.py +137 -0
- durable_sync/connectors/contentful/oauth.py +27 -0
- durable_sync/connectors/contentful/prove.py +51 -0
- durable_sync/connectors/contentful/source.py +192 -0
- durable_sync/connectors/contentful/start.py +46 -0
- durable_sync/connectors/contentful/store.py +25 -0
- durable_sync/connectors/contentful/token.py +13 -0
- durable_sync/connectors/contentful/token_check.py +42 -0
- durable_sync/connectors/github/__init__.py +33 -0
- durable_sync/connectors/github/api.py +169 -0
- durable_sync/connectors/github/source.py +230 -0
- durable_sync/connectors/luma/__init__.py +20 -0
- durable_sync/connectors/luma/api.py +121 -0
- durable_sync/connectors/luma/destination.py +128 -0
- durable_sync/connectors/luma/source.py +155 -0
- durable_sync/connectors/multi.py +78 -0
- durable_sync/connectors/notion/__init__.py +20 -0
- durable_sync/connectors/notion/bootstrap.py +97 -0
- durable_sync/connectors/notion/client.py +133 -0
- durable_sync/connectors/notion/destination.py +270 -0
- durable_sync/connectors/notion/oauth.py +25 -0
- durable_sync/connectors/notion/prove.py +57 -0
- durable_sync/connectors/notion/source.py +136 -0
- durable_sync/connectors/notion/start.py +46 -0
- durable_sync/connectors/notion/store.py +25 -0
- durable_sync/connectors/notion/token.py +13 -0
- durable_sync/connectors/youtube/__init__.py +13 -0
- durable_sync/connectors/youtube/api.py +122 -0
- durable_sync/connectors/youtube/source.py +152 -0
- durable_sync/core.py +210 -0
- durable_sync/env.py +55 -0
- durable_sync/http.py +71 -0
- durable_sync/linkstore.py +88 -0
- durable_sync/route.py +86 -0
- durable_sync/temporal_client.py +48 -0
- durable_sync/transport/__init__.py +12 -0
- durable_sync/transport/mcp.py +77 -0
- durable_sync/worker.py +109 -0
- durable_sync/workflows/__init__.py +9 -0
- durable_sync/workflows/sync.py +208 -0
- durable_sync-0.1.0.dist-info/METADATA +310 -0
- durable_sync-0.1.0.dist-info/RECORD +69 -0
- durable_sync-0.1.0.dist-info/WHEEL +5 -0
- durable_sync-0.1.0.dist-info/licenses/LICENSE +21 -0
- durable_sync-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Does the MCP-minted OAuth token also work against the plain CMA REST API?
|
|
2
|
+
|
|
3
|
+
The Contentful MCP returns LLM-oriented XML, awkward to parse for a sync pipeline.
|
|
4
|
+
But if the OAuth token we mint for the MCP server ALSO authenticates the CMA REST
|
|
5
|
+
API, we can skip the XML entirely: reuse the existing (clean-JSON) REST
|
|
6
|
+
ContentfulSource/Destination with a durable, workflow-owned OAuth token — no-admin
|
|
7
|
+
auth AND clean JSON. This probe answers that decisively.
|
|
8
|
+
|
|
9
|
+
PYTHONPATH=. python -m durable_sync.connectors.contentful.token_check
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import httpx
|
|
14
|
+
|
|
15
|
+
from durable_sync.env import load_env
|
|
16
|
+
from durable_sync.connectors.contentful import oauth, store
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def main() -> None:
|
|
20
|
+
load_env()
|
|
21
|
+
creds = store.load()
|
|
22
|
+
if not creds:
|
|
23
|
+
raise SystemExit("No credentials — run connectors.contentful.bootstrap first.")
|
|
24
|
+
tokens = oauth.refresh_access_token(creds["token_endpoint"], creds["client_id"], creds["refresh_token"])
|
|
25
|
+
if tokens.get("refresh_token"):
|
|
26
|
+
creds["refresh_token"] = tokens["refresh_token"]
|
|
27
|
+
store.save(creds)
|
|
28
|
+
token = tokens["access_token"]
|
|
29
|
+
|
|
30
|
+
for url in ("https://api.contentful.com/users/me", "https://api.contentful.com/spaces"):
|
|
31
|
+
r = httpx.get(url, headers={"Authorization": f"Bearer {token}"}, timeout=30)
|
|
32
|
+
print(f"GET {url} -> {r.status_code}")
|
|
33
|
+
print(f" {r.text[:300].strip()}")
|
|
34
|
+
print()
|
|
35
|
+
|
|
36
|
+
print("Verdict:")
|
|
37
|
+
print(" /spaces 200 (lists spaces) -> MCP-OAuth token works on CMA REST: reuse the REST connector.")
|
|
38
|
+
print(" 401 / 403 / empty -> token is MCP-scoped only: we parse MCP tool output instead.")
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
if __name__ == "__main__":
|
|
42
|
+
main()
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""GitHub source: orgs + named repos -> Records.
|
|
2
|
+
|
|
3
|
+
The reference Source. Ships the GitHub *mechanism* (HTTP fetchers + generic
|
|
4
|
+
helpers); the *policy/vocab* (which topics mean what, language->SDK maps,
|
|
5
|
+
static analysis) belongs in your app's `enrich` hook — see RepoContext.
|
|
6
|
+
|
|
7
|
+
Requires the `github` extra: pip install "durable-sync[github]"
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from durable_sync.connectors.github.api import (
|
|
12
|
+
build_headers,
|
|
13
|
+
classify,
|
|
14
|
+
fetch_org_members,
|
|
15
|
+
is_member,
|
|
16
|
+
raw_languages,
|
|
17
|
+
)
|
|
18
|
+
from durable_sync.connectors.github.source import (
|
|
19
|
+
GitHubConfig,
|
|
20
|
+
GitHubSource,
|
|
21
|
+
RepoContext,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"GitHubSource",
|
|
26
|
+
"GitHubConfig",
|
|
27
|
+
"RepoContext",
|
|
28
|
+
"is_member",
|
|
29
|
+
"classify",
|
|
30
|
+
"raw_languages",
|
|
31
|
+
"fetch_org_members",
|
|
32
|
+
"build_headers",
|
|
33
|
+
]
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
"""GitHub REST helpers — pure HTTP + small pure transforms. No Temporal, no
|
|
2
|
+
config globals: every call takes its `headers`. Reusable from the Source's
|
|
3
|
+
fetch loop AND from an app's enrich hook (which gets the live client via
|
|
4
|
+
RepoContext).
|
|
5
|
+
"""
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from durable_sync.http import request_with_retry
|
|
13
|
+
|
|
14
|
+
GITHUB_API = "https://api.github.com"
|
|
15
|
+
PER_PAGE = 100
|
|
16
|
+
log = logging.getLogger("durable_sync.connectors.github")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def build_headers(token: str | None, *, user_agent: str = "durable-sync") -> dict[str, str]:
|
|
20
|
+
h = {
|
|
21
|
+
"Accept": "application/vnd.github+json",
|
|
22
|
+
"X-GitHub-Api-Version": "2022-11-28",
|
|
23
|
+
"User-Agent": user_agent,
|
|
24
|
+
}
|
|
25
|
+
if token:
|
|
26
|
+
h["Authorization"] = f"Bearer {token}"
|
|
27
|
+
return h
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# --- pure transforms -------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
def raw_languages(byte_counts: dict[str, int]) -> list[str]:
|
|
33
|
+
"""All GitHub-reported languages, most-bytes-first."""
|
|
34
|
+
return sorted(byte_counts, key=lambda lang: -byte_counts[lang])
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def classify(topics: list[str], mapping: dict[str, str]) -> list[str]:
|
|
38
|
+
"""Map topics through a {topic_lower: label} dict, de-duped, order-preserving.
|
|
39
|
+
The mapping itself is app vocab — the library just applies it."""
|
|
40
|
+
out: list[str] = []
|
|
41
|
+
for t in topics:
|
|
42
|
+
label = mapping.get(t.lower())
|
|
43
|
+
if label and label not in out:
|
|
44
|
+
out.append(label)
|
|
45
|
+
return out
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def is_member(handle: str, members: set[str]) -> bool:
|
|
49
|
+
"""Whether a contributor handle belongs to the org-member set (insider-or-not).
|
|
50
|
+
Neutral primitive: an app's enrich hook picks its own labels
|
|
51
|
+
(e.g. Employee/Community, Staff/External) from this boolean."""
|
|
52
|
+
return handle in members
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def iso_date(s: str | None) -> str | None:
|
|
56
|
+
"""ISO date (YYYY-MM-DD); the destination handles date_properties specially."""
|
|
57
|
+
return s[:10] if s else None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# --- HTTP fetchers ---------------------------------------------------------
|
|
61
|
+
# All go through request_with_retry, which backs off on 429 + GitHub's
|
|
62
|
+
# rate-limited 403 (honoring Retry-After). The enrichment fetchers below tolerate
|
|
63
|
+
# a failed call by returning empty, but LOG it first — a silently empty languages
|
|
64
|
+
# list (because we got rate-limited) reads as "this repo has no languages", which
|
|
65
|
+
# is a data-quality landmine on a large org sweep.
|
|
66
|
+
|
|
67
|
+
async def get_repo(client: httpx.AsyncClient, full_name: str, headers: dict) -> dict | None:
|
|
68
|
+
r = await request_with_retry(client, "GET", f"{GITHUB_API}/repos/{full_name}", headers=headers)
|
|
69
|
+
if r.status_code == 404:
|
|
70
|
+
log.warning("Repo not found, skipping: %s", full_name)
|
|
71
|
+
return None
|
|
72
|
+
r.raise_for_status()
|
|
73
|
+
return r.json()
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
async def fetch_org_repos_page(
|
|
77
|
+
client: httpx.AsyncClient, org: str, headers: dict, *, page: int, per_page: int = PER_PAGE
|
|
78
|
+
) -> tuple[list[dict], bool]:
|
|
79
|
+
"""ONE page of an org's public repos. Returns (batch, has_more). The page number
|
|
80
|
+
is the pagination cursor the spine threads through `GitHubSource.fetch_page`, so
|
|
81
|
+
the fetch result never passes through workflow history as one oversized payload.
|
|
82
|
+
Caller applies inclusion gating. Ordered by full_name (stable across pages)."""
|
|
83
|
+
r = await request_with_retry(
|
|
84
|
+
client, "GET", f"{GITHUB_API}/orgs/{org}/repos",
|
|
85
|
+
headers=headers,
|
|
86
|
+
params={"per_page": per_page, "page": page, "type": "public", "sort": "full_name"},
|
|
87
|
+
)
|
|
88
|
+
r.raise_for_status()
|
|
89
|
+
batch = r.json()
|
|
90
|
+
return batch, len(batch) == per_page
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
async def fetch_org_repos(
|
|
94
|
+
client: httpx.AsyncClient, org: str, headers: dict, *, per_page: int = PER_PAGE
|
|
95
|
+
) -> list[dict]:
|
|
96
|
+
"""All public repos in an org — drains fetch_org_repos_page. For non-Temporal
|
|
97
|
+
callers (an enrich hook, a script); the spine uses the paged form directly."""
|
|
98
|
+
repos: list[dict] = []
|
|
99
|
+
page = 1
|
|
100
|
+
while True:
|
|
101
|
+
batch, has_more = await fetch_org_repos_page(client, org, headers, page=page, per_page=per_page)
|
|
102
|
+
repos.extend(batch)
|
|
103
|
+
if not has_more:
|
|
104
|
+
return repos
|
|
105
|
+
page += 1
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
async def fetch_readme(client: httpx.AsyncClient, full_name: str, headers: dict) -> str | None:
|
|
109
|
+
h = dict(headers, Accept="application/vnd.github.raw")
|
|
110
|
+
r = await request_with_retry(client, "GET", f"{GITHUB_API}/repos/{full_name}/readme", headers=h)
|
|
111
|
+
if r.status_code == 200:
|
|
112
|
+
return r.text
|
|
113
|
+
if r.status_code != 404: # 404 = no README (normal); anything else is a real failure
|
|
114
|
+
log.warning("README fetch for %s failed: HTTP %s", full_name, r.status_code)
|
|
115
|
+
return None
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
async def fetch_languages(client: httpx.AsyncClient, full_name: str, headers: dict) -> dict[str, int]:
|
|
119
|
+
r = await request_with_retry(client, "GET", f"{GITHUB_API}/repos/{full_name}/languages", headers=headers)
|
|
120
|
+
if r.status_code == 200:
|
|
121
|
+
return r.json()
|
|
122
|
+
log.warning("Languages fetch for %s failed: HTTP %s — record will list none", full_name, r.status_code)
|
|
123
|
+
return {}
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
async def fetch_contributors(
|
|
127
|
+
client: httpx.AsyncClient, full_name: str, headers: dict, *, limit: int = 5
|
|
128
|
+
) -> list[str]:
|
|
129
|
+
"""Top contributor handles, most-commits-first, bots filtered out."""
|
|
130
|
+
r = await request_with_retry(
|
|
131
|
+
client, "GET", f"{GITHUB_API}/repos/{full_name}/contributors",
|
|
132
|
+
headers=headers, params={"per_page": 25},
|
|
133
|
+
)
|
|
134
|
+
if r.status_code != 200:
|
|
135
|
+
log.warning("Contributors fetch for %s failed: HTTP %s", full_name, r.status_code)
|
|
136
|
+
return []
|
|
137
|
+
data = r.json()
|
|
138
|
+
if not isinstance(data, list):
|
|
139
|
+
return []
|
|
140
|
+
out: list[str] = []
|
|
141
|
+
for c in data:
|
|
142
|
+
login = c.get("login")
|
|
143
|
+
if login and not login.endswith("[bot]"):
|
|
144
|
+
out.append(login)
|
|
145
|
+
if len(out) >= limit:
|
|
146
|
+
break
|
|
147
|
+
return out
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
async def fetch_org_members(client: httpx.AsyncClient, org: str, headers: dict) -> set[str]:
|
|
151
|
+
"""Member logins for an org (needs read:org to see private members)."""
|
|
152
|
+
members: set[str] = set()
|
|
153
|
+
page = 1
|
|
154
|
+
while True:
|
|
155
|
+
r = await request_with_retry(
|
|
156
|
+
client, "GET", f"{GITHUB_API}/orgs/{org}/members",
|
|
157
|
+
headers=headers, params={"per_page": 100, "page": page},
|
|
158
|
+
)
|
|
159
|
+
if r.status_code != 200:
|
|
160
|
+
log.warning("Org members fetch for %s failed: HTTP %s", org, r.status_code)
|
|
161
|
+
break
|
|
162
|
+
batch = r.json()
|
|
163
|
+
if not isinstance(batch, list):
|
|
164
|
+
break
|
|
165
|
+
members.update(m["login"] for m in batch if m.get("login"))
|
|
166
|
+
if len(batch) < 100:
|
|
167
|
+
break
|
|
168
|
+
page += 1
|
|
169
|
+
return members
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""GitHubSource — the reference Source, with a source-side enrichment hook.
|
|
2
|
+
|
|
3
|
+
Config is injected (no module globals), so the same code serves any orgs/repos.
|
|
4
|
+
The base fetch produces a raw Record per repo. If you pass an `enrich` hook, the
|
|
5
|
+
source ALSO hands it a `RepoContext` (the raw repo + readme + language bytes +
|
|
6
|
+
authors + employee members + the live HTTP client) so your app can layer on
|
|
7
|
+
domain enrichment WITHOUT importing the source's internals.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import inspect
|
|
12
|
+
import logging
|
|
13
|
+
import os
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from typing import Any, Awaitable, Callable, Union
|
|
16
|
+
|
|
17
|
+
import httpx
|
|
18
|
+
from temporalio import activity
|
|
19
|
+
|
|
20
|
+
from durable_sync.core import Record, SourceSpec
|
|
21
|
+
from durable_sync.connectors.github import api
|
|
22
|
+
|
|
23
|
+
log = logging.getLogger("durable_sync.connectors.github")
|
|
24
|
+
|
|
25
|
+
# enrich(record, ctx) -> Record (sync) or Awaitable[Record] (async); both ok.
|
|
26
|
+
EnrichHook = Callable[[Record, "RepoContext"], Union[Record, Awaitable[Record]]]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class GitHubConfig:
|
|
31
|
+
"""Everything GitHub-specific a deployment supplies.
|
|
32
|
+
|
|
33
|
+
sources: list of ("org", "name") and/or ("repos", ["owner/repo", ...]).
|
|
34
|
+
org sources are gated by inclusion_topic (unless it's None / discovery_mode);
|
|
35
|
+
named repos are included by virtue of being named.
|
|
36
|
+
"""
|
|
37
|
+
sources: list[tuple[str, Any]]
|
|
38
|
+
# Include only org repos carrying this GitHub topic. None = no topic gate
|
|
39
|
+
# (include every non-archived repo). There's no universal default, so set the
|
|
40
|
+
# topic your org uses to mark in-scope repos.
|
|
41
|
+
inclusion_topic: str | None = None
|
|
42
|
+
discovery_mode: bool = False # org sweep ignores topic + skips README
|
|
43
|
+
# Orgs whose member logins are surfaced to the enrich hook as RepoContext.members
|
|
44
|
+
# (e.g. to distinguish insiders from outside contributors). The source attaches
|
|
45
|
+
# the set; YOUR hook decides what membership means.
|
|
46
|
+
member_orgs: list[str] = field(default_factory=list)
|
|
47
|
+
title_property: str = "Name"
|
|
48
|
+
interval_minutes: int = 30
|
|
49
|
+
per_page: int = api.PER_PAGE
|
|
50
|
+
token_env: str = "GITHUB_TOKEN"
|
|
51
|
+
contributor_limit: int = 5
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class RepoContext:
|
|
56
|
+
"""Handed to the enrich hook: everything already fetched for one repo, plus
|
|
57
|
+
the live client + headers so enrich can make extra calls (e.g. download a
|
|
58
|
+
tarball for static analysis) without re-authenticating or re-fetching."""
|
|
59
|
+
raw_repo: dict
|
|
60
|
+
readme: str | None
|
|
61
|
+
language_bytes: dict[str, int]
|
|
62
|
+
authors: list[str]
|
|
63
|
+
members: set[str]
|
|
64
|
+
client: httpx.AsyncClient
|
|
65
|
+
headers: dict[str, str]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _heartbeat(detail: str) -> None:
|
|
69
|
+
"""Heartbeat inside a Temporal activity; no-op otherwise, so the Source stays
|
|
70
|
+
runnable/testable standalone."""
|
|
71
|
+
if activity.in_activity():
|
|
72
|
+
activity.heartbeat(detail)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class GitHubSource:
|
|
76
|
+
name = "github"
|
|
77
|
+
|
|
78
|
+
def __init__(self, config: GitHubConfig, *, enrich: EnrichHook | None = None):
|
|
79
|
+
self._config = config
|
|
80
|
+
self._enrich = enrich
|
|
81
|
+
|
|
82
|
+
# --- Source protocol ---------------------------------------------------
|
|
83
|
+
|
|
84
|
+
def specs(self) -> list[SourceSpec]:
|
|
85
|
+
cfg = self._config
|
|
86
|
+
specs: list[SourceSpec] = []
|
|
87
|
+
for kind, value in cfg.sources:
|
|
88
|
+
if kind == "org":
|
|
89
|
+
specs.append(SourceSpec(
|
|
90
|
+
key=f"org:{value}",
|
|
91
|
+
interval_minutes=cfg.interval_minutes,
|
|
92
|
+
params={"kind": "org", "org": str(value)},
|
|
93
|
+
))
|
|
94
|
+
else: # "repos"
|
|
95
|
+
specs.append(SourceSpec(
|
|
96
|
+
key="repos:named",
|
|
97
|
+
interval_minutes=cfg.interval_minutes,
|
|
98
|
+
params={"kind": "repos", "repos": list(value)},
|
|
99
|
+
))
|
|
100
|
+
return specs
|
|
101
|
+
|
|
102
|
+
async def fetch_page(
|
|
103
|
+
self, spec: SourceSpec, only_items: list[str] | None, cursor: str | None
|
|
104
|
+
) -> tuple[list[Record], str | None]:
|
|
105
|
+
"""ONE page of records + the next cursor (None on the last page). For an org
|
|
106
|
+
sweep the cursor is the GitHub page number, so the spine bounds history even
|
|
107
|
+
for a huge org. The named-repos / targeted (`only_items`) paths are small and
|
|
108
|
+
bounded, so they return everything as a single page (next_cursor=None)."""
|
|
109
|
+
cfg = self._config
|
|
110
|
+
kind = spec.params.get("kind")
|
|
111
|
+
headers = api.build_headers(os.environ.get(cfg.token_env))
|
|
112
|
+
|
|
113
|
+
async with httpx.AsyncClient(timeout=30) as client:
|
|
114
|
+
members = await self._members(client, headers)
|
|
115
|
+
repos, next_cursor = await self._select_repos_page(
|
|
116
|
+
client, headers, spec, kind, only_items, cursor)
|
|
117
|
+
records = await self._records_for_repos(client, headers, repos, members)
|
|
118
|
+
|
|
119
|
+
log.info("Fetched %d records for %s (cursor=%s -> %s)", len(records), spec.key, cursor, next_cursor)
|
|
120
|
+
return records, next_cursor
|
|
121
|
+
|
|
122
|
+
async def fetch(
|
|
123
|
+
self, spec: SourceSpec, only_items: list[str] | None = None
|
|
124
|
+
) -> list[Record]:
|
|
125
|
+
"""Whole unit as one list — drains fetch_page. Convenience for standalone /
|
|
126
|
+
non-Temporal callers; the spine drives fetch_page page-by-page instead."""
|
|
127
|
+
records: list[Record] = []
|
|
128
|
+
cursor: str | None = None
|
|
129
|
+
while True:
|
|
130
|
+
page, cursor = await self.fetch_page(spec, only_items, cursor)
|
|
131
|
+
records.extend(page)
|
|
132
|
+
if cursor is None:
|
|
133
|
+
return records
|
|
134
|
+
|
|
135
|
+
# --- internals ---------------------------------------------------------
|
|
136
|
+
|
|
137
|
+
async def _members(self, client, headers) -> set[str]:
|
|
138
|
+
"""Org member logins for the enrich hook — only when a hook can use them.
|
|
139
|
+
Re-fetched per page in the paged path (activities are stateless); members
|
|
140
|
+
change rarely and member_orgs is opt-in, so the extra calls are acceptable."""
|
|
141
|
+
members: set[str] = set()
|
|
142
|
+
if self._enrich and self._config.member_orgs:
|
|
143
|
+
for org in self._config.member_orgs:
|
|
144
|
+
members |= await api.fetch_org_members(client, org, headers)
|
|
145
|
+
return members
|
|
146
|
+
|
|
147
|
+
async def _records_for_repos(self, client, headers, repos, members) -> list[Record]:
|
|
148
|
+
cfg = self._config
|
|
149
|
+
out: list[Record] = []
|
|
150
|
+
seen: set[str] = set()
|
|
151
|
+
for repo in repos:
|
|
152
|
+
rid = str(repo["id"])
|
|
153
|
+
if rid in seen: # de-dupe within the page (cross-page dups resolve to
|
|
154
|
+
continue # updates in the idempotent upsert, so per-page is enough)
|
|
155
|
+
seen.add(rid)
|
|
156
|
+
# Discovery skips READMEs (hundreds of calls).
|
|
157
|
+
readme = None if cfg.discovery_mode else await api.fetch_readme(
|
|
158
|
+
client, repo["full_name"], headers)
|
|
159
|
+
lang_bytes = await api.fetch_languages(client, repo["full_name"], headers)
|
|
160
|
+
authors = await api.fetch_contributors(
|
|
161
|
+
client, repo["full_name"], headers, limit=cfg.contributor_limit)
|
|
162
|
+
|
|
163
|
+
record = self._base_record(repo, readme, lang_bytes, authors)
|
|
164
|
+
if self._enrich is not None:
|
|
165
|
+
ctx = RepoContext(
|
|
166
|
+
raw_repo=repo, readme=readme, language_bytes=lang_bytes,
|
|
167
|
+
authors=authors, members=members, client=client, headers=headers,
|
|
168
|
+
)
|
|
169
|
+
result = self._enrich(record, ctx)
|
|
170
|
+
record = await result if inspect.isawaitable(result) else result
|
|
171
|
+
out.append(record)
|
|
172
|
+
_heartbeat(repo["full_name"])
|
|
173
|
+
return out
|
|
174
|
+
|
|
175
|
+
async def _select_repos_page(
|
|
176
|
+
self, client, headers, spec, kind, only_items, cursor
|
|
177
|
+
) -> tuple[list[dict], str | None]:
|
|
178
|
+
if only_items: # targeted refresh — bounded, one page (gate org repos)
|
|
179
|
+
return await self._repos_by_name(client, headers, only_items, gate=(kind == "org")), None
|
|
180
|
+
if kind == "org":
|
|
181
|
+
page = int(cursor) if cursor else 1
|
|
182
|
+
batch, has_more = await api.fetch_org_repos_page(
|
|
183
|
+
client, spec.params.get("org", ""), headers, page=page, per_page=self._config.per_page)
|
|
184
|
+
gated = [r for r in batch if self._passes_gate(r)]
|
|
185
|
+
return gated, (str(page + 1) if has_more else None)
|
|
186
|
+
# named repos — included by virtue of being named, bounded, one page
|
|
187
|
+
return await self._repos_by_name(client, headers, spec.params.get("repos", []), gate=False), None
|
|
188
|
+
|
|
189
|
+
async def _repos_by_name(self, client, headers, names, *, gate: bool) -> list[dict]:
|
|
190
|
+
repos: list[dict] = []
|
|
191
|
+
for full in names:
|
|
192
|
+
repo = await api.get_repo(client, full, headers)
|
|
193
|
+
if repo is None:
|
|
194
|
+
continue
|
|
195
|
+
if gate and not self._passes_gate(repo):
|
|
196
|
+
continue
|
|
197
|
+
repos.append(repo)
|
|
198
|
+
return repos
|
|
199
|
+
|
|
200
|
+
def _passes_gate(self, repo: dict) -> bool:
|
|
201
|
+
if repo.get("archived"):
|
|
202
|
+
return False
|
|
203
|
+
if self._config.discovery_mode or self._config.inclusion_topic is None:
|
|
204
|
+
return True
|
|
205
|
+
topics = [t.lower() for t in (repo.get("topics") or [])]
|
|
206
|
+
return self._config.inclusion_topic.lower() in topics
|
|
207
|
+
|
|
208
|
+
def _base_record(
|
|
209
|
+
self, repo: dict, readme: str | None, lang_bytes: dict[str, int], authors: list[str]
|
|
210
|
+
) -> Record:
|
|
211
|
+
languages = api.raw_languages(lang_bytes)
|
|
212
|
+
spdx = (repo.get("license") or {}).get("spdx_id")
|
|
213
|
+
props = {
|
|
214
|
+
self._config.title_property: repo["name"],
|
|
215
|
+
"Repo ID": str(repo["id"]),
|
|
216
|
+
"Repo URL": repo["html_url"],
|
|
217
|
+
"Description": repo.get("description") or "",
|
|
218
|
+
"Languages": ", ".join(languages),
|
|
219
|
+
"Topics (raw)": ", ".join(repo.get("topics") or []),
|
|
220
|
+
"Authors": ", ".join(authors),
|
|
221
|
+
"Stars": int(repo.get("stargazers_count") or 0),
|
|
222
|
+
"Forks": int(repo.get("forks_count") or 0),
|
|
223
|
+
"Open issues": int(repo.get("open_issues_count") or 0),
|
|
224
|
+
"Is fork": bool(repo.get("fork")),
|
|
225
|
+
# NOASSERTION = no recognized license -> blank (itself a signal)
|
|
226
|
+
"License": spdx if spdx and spdx != "NOASSERTION" else None,
|
|
227
|
+
"Created": api.iso_date(repo.get("created_at")),
|
|
228
|
+
"Last updated": api.iso_date(repo.get("pushed_at") or repo.get("created_at")),
|
|
229
|
+
}
|
|
230
|
+
return Record(primary_key=str(repo["id"]), properties=props, body=readme)
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Luma connector: a Luma calendar's events, BOTH directions.
|
|
2
|
+
|
|
3
|
+
`LumaSource` reads events -> Records; `LumaDestination` creates/updates events
|
|
4
|
+
(e.g. cross-posting from Notion), sharing api.py. Source policy (e.g. matching
|
|
5
|
+
hosts against your own directory) belongs in the source's `enrich` hook — see
|
|
6
|
+
LumaEventContext. Because Luma events can't hold a foreign key, the destination
|
|
7
|
+
takes a required `LinkStore` (app-owned correspondence; see the boundary doctrine).
|
|
8
|
+
|
|
9
|
+
Requires the `luma` extra: pip install "durable-sync[luma]"
|
|
10
|
+
"""
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from durable_sync.linkstore import InMemoryLinkStore, LinkStore # re-export for convenience
|
|
14
|
+
from durable_sync.connectors.luma.destination import LumaDestination
|
|
15
|
+
from durable_sync.connectors.luma.source import LumaConfig, LumaEventContext, LumaSource
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"LumaSource", "LumaConfig", "LumaEventContext",
|
|
19
|
+
"LumaDestination", "LinkStore", "InMemoryLinkStore",
|
|
20
|
+
]
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
"""Luma API helpers — pure async HTTP + small pure transforms. No Temporal, no
|
|
2
|
+
config globals: every call takes its `headers`. Reusable from the Source's fetch
|
|
3
|
+
loop AND from an app's enrich hook (which gets the live client via the context).
|
|
4
|
+
|
|
5
|
+
Verify paths/params against Luma's docs as they evolve:
|
|
6
|
+
https://docs.luma.com/reference/get_v1-calendar-list-events
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
import httpx
|
|
14
|
+
|
|
15
|
+
from durable_sync.core import DestinationHTTPError
|
|
16
|
+
from durable_sync.http import request_with_retry
|
|
17
|
+
|
|
18
|
+
BASE_URL = "https://public-api.luma.com/v1"
|
|
19
|
+
LIST_EVENTS_PATH = "/calendar/list-events"
|
|
20
|
+
GET_EVENT_PATH = "/event/get"
|
|
21
|
+
PAGE_LIMIT = 50
|
|
22
|
+
log = logging.getLogger("durable_sync.connectors.luma")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def build_headers(api_key: str | None) -> dict[str, str]:
|
|
26
|
+
return {"x-luma-api-key": api_key or "", "Accept": "application/json"}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
async def list_event_entries_page(
|
|
30
|
+
client: httpx.AsyncClient, headers: dict, after_iso: str, *,
|
|
31
|
+
cursor: str | None = None, page_limit: int = PAGE_LIMIT,
|
|
32
|
+
) -> tuple[list[dict[str, Any]], str | None]:
|
|
33
|
+
"""ONE page of raw Luma event entries on/after `after_iso`. Returns
|
|
34
|
+
(entries, next_cursor) where next_cursor is Luma's pagination_cursor for the
|
|
35
|
+
next page, or None when there are no more — the cursor the spine threads
|
|
36
|
+
through `LumaSource.fetch_page`. `list-events` does NOT include hosts."""
|
|
37
|
+
params: dict[str, Any] = {"after": after_iso, "pagination_limit": page_limit}
|
|
38
|
+
if cursor:
|
|
39
|
+
params["pagination_cursor"] = cursor
|
|
40
|
+
r = await request_with_retry(
|
|
41
|
+
client, "GET", f"{BASE_URL}{LIST_EVENTS_PATH}", headers=headers, params=params
|
|
42
|
+
)
|
|
43
|
+
r.raise_for_status()
|
|
44
|
+
data = r.json()
|
|
45
|
+
entries = data.get("entries", data.get("events", []))
|
|
46
|
+
next_cursor = data.get("next_cursor") if data.get("has_more") else None
|
|
47
|
+
return entries, next_cursor
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
async def list_event_entries(
|
|
51
|
+
client: httpx.AsyncClient, headers: dict, after_iso: str, *, page_limit: int = PAGE_LIMIT
|
|
52
|
+
) -> list[dict[str, Any]]:
|
|
53
|
+
"""All raw Luma event entries on/after `after_iso` — drains
|
|
54
|
+
list_event_entries_page. For non-Temporal callers; the spine pages directly."""
|
|
55
|
+
entries: list[dict[str, Any]] = []
|
|
56
|
+
cursor: str | None = None
|
|
57
|
+
while True:
|
|
58
|
+
batch, cursor = await list_event_entries_page(
|
|
59
|
+
client, headers, after_iso, cursor=cursor, page_limit=page_limit)
|
|
60
|
+
entries.extend(batch)
|
|
61
|
+
if cursor is None:
|
|
62
|
+
return entries
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
async def get_event(client: httpx.AsyncClient, headers: dict, api_id: str) -> dict[str, Any] | None:
|
|
66
|
+
"""One event by id, as a list-style entry ({event, hosts, ...}) or None if
|
|
67
|
+
gone. Used for targeted refreshes (only_items)."""
|
|
68
|
+
if not api_id:
|
|
69
|
+
return None
|
|
70
|
+
r = await request_with_retry(
|
|
71
|
+
client, "GET", f"{BASE_URL}{GET_EVENT_PATH}", headers=headers, params={"api_id": api_id}
|
|
72
|
+
)
|
|
73
|
+
if r.status_code == 404:
|
|
74
|
+
log.warning("Luma event not found, skipping: %s", api_id)
|
|
75
|
+
return None
|
|
76
|
+
r.raise_for_status()
|
|
77
|
+
return r.json()
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
async def get_event_hosts(client: httpx.AsyncClient, headers: dict, api_id: str) -> list[dict[str, Any]]:
|
|
81
|
+
"""Hosts for one event: [{name, email, ...}]. N+1 against the list (fine at
|
|
82
|
+
current volume; gate behind a change-token if a source grows high-volume)."""
|
|
83
|
+
if not api_id:
|
|
84
|
+
return []
|
|
85
|
+
r = await request_with_retry(
|
|
86
|
+
client, "GET", f"{BASE_URL}{GET_EVENT_PATH}", headers=headers, params={"api_id": api_id}
|
|
87
|
+
)
|
|
88
|
+
if r.status_code == 404:
|
|
89
|
+
return []
|
|
90
|
+
r.raise_for_status()
|
|
91
|
+
return r.json().get("hosts", [])
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
# --- write side (used by LumaDestination) -----------------------------------
|
|
95
|
+
# Verify paths/payload keys against Luma's docs — the write API evolves:
|
|
96
|
+
# https://docs.luma.com/reference/post_v1-event-create
|
|
97
|
+
|
|
98
|
+
CREATE_EVENT_PATH = "/event/create"
|
|
99
|
+
UPDATE_EVENT_PATH = "/event/update"
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
async def _write(client: httpx.AsyncClient, path: str, payload: dict[str, Any]) -> dict[str, Any]:
|
|
103
|
+
"""POST to Luma; raise with status text (so is_auth_error can classify a 401).
|
|
104
|
+
The client carries the x-luma-api-key header (set in connect)."""
|
|
105
|
+
r = await request_with_retry(client, "POST", f"{BASE_URL}{path}", json=payload)
|
|
106
|
+
if r.status_code >= 400:
|
|
107
|
+
raise DestinationHTTPError(r.status_code, f"Luma POST {path} -> {r.status_code}: {r.text[:600]}")
|
|
108
|
+
return r.json() if r.content else {}
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
async def create_event(client: httpx.AsyncClient, payload: dict[str, Any]) -> str:
|
|
112
|
+
"""Create an event; return its api_id."""
|
|
113
|
+
data = await _write(client, CREATE_EVENT_PATH, payload)
|
|
114
|
+
event = data.get("event", data)
|
|
115
|
+
return event.get("api_id") or data.get("api_id") or ""
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
async def update_event(client: httpx.AsyncClient, api_id: str, payload: dict[str, Any]) -> None:
|
|
119
|
+
"""Update an existing event in place. NB: /event/update names the identifier
|
|
120
|
+
`event_id` (create returns it as `api_id`) — confirmed against the live API."""
|
|
121
|
+
await _write(client, UPDATE_EVENT_PATH, {"event_id": api_id, **payload})
|