github-dkg 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
github_dkg/__init__.py ADDED
@@ -0,0 +1,8 @@
1
+ """github-dkg: Ingest GitHub issues, PRs, and reviews into DKG v10 Working Memory."""
2
+
3
+ from .client import DKGClient
4
+ from .github_client import GitHubClient
5
+ from .ingestor import GitHubDKGIngestor
6
+
7
+ __all__ = ["DKGClient", "GitHubClient", "GitHubDKGIngestor"]
8
+ __version__ = "0.1.0"
github_dkg/cli.py ADDED
@@ -0,0 +1,187 @@
1
+ """CLI entry point for github-dkg."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import sys
7
+
8
+ import click
9
+
10
+ from .client import DKGClient
11
+ from .github_client import GitHubClient
12
+ from .ingestor import GitHubDKGIngestor
13
+
14
+
15
+ def _make_clients(
16
+ dkg_token: str | None,
17
+ dkg_url: str | None,
18
+ github_token: str | None,
19
+ ) -> tuple[DKGClient, GitHubClient]:
20
+ dkg = DKGClient(base_url=dkg_url, token=dkg_token)
21
+ gh = GitHubClient(token=github_token)
22
+ return dkg, gh
23
+
24
+
25
+ @click.group()
26
+ def main() -> None:
27
+ """github-dkg: Ingest GitHub knowledge into DKG v10 Working Memory."""
28
+
29
+
30
+ @main.command()
31
+ @click.argument("repo") # owner/repo
32
+ @click.option("--context-graph", required=True, envvar="DKG_CONTEXT_GRAPH", help="Context Graph ID")
33
+ @click.option("--layer", default="wm", show_default=True, type=click.Choice(["wm", "swm"]))
34
+ @click.option("--since", default=None, help="ISO 8601 date — only ingest items updated after this date")
35
+ @click.option("--no-issues", is_flag=True, default=False, help="Skip issues")
36
+ @click.option("--no-pulls", is_flag=True, default=False, help="Skip pull requests")
37
+ @click.option("--concurrency", default=5, show_default=True, help="Parallel DKG writes")
38
+ @click.option("--dkg-token", envvar="DKG_TOKEN", default=None)
39
+ @click.option("--dkg-url", envvar="DKG_BASE_URL", default=None)
40
+ @click.option("--github-token", envvar="GITHUB_TOKEN", default=None)
41
+ def ingest(
42
+ repo: str,
43
+ context_graph: str,
44
+ layer: str,
45
+ since: str | None,
46
+ no_issues: bool,
47
+ no_pulls: bool,
48
+ concurrency: int,
49
+ dkg_token: str | None,
50
+ dkg_url: str | None,
51
+ github_token: str | None,
52
+ ) -> None:
53
+ """Bulk-ingest all issues and PRs from OWNER/REPO into Working Memory."""
54
+ if "/" not in repo:
55
+ click.echo("Error: REPO must be in owner/repo format", err=True)
56
+ sys.exit(1)
57
+ owner, repo_name = repo.split("/", 1)
58
+
59
+ async def run() -> int:
60
+ dkg, gh = _make_clients(dkg_token, dkg_url, github_token)
61
+
62
+ click.echo("Connecting to DKG node...")
63
+ if not await dkg.ping():
64
+ click.echo("Error: DKG node unreachable or token invalid", err=True)
65
+ return 1
66
+
67
+ ingestor = GitHubDKGIngestor(
68
+ dkg=dkg,
69
+ github=gh,
70
+ context_graph_id=context_graph,
71
+ layer=layer,
72
+ concurrency=concurrency,
73
+ )
74
+
75
+ click.echo(f"Ingesting {owner}/{repo_name} → context graph '{context_graph}' (layer={layer})")
76
+ result = await ingestor.ingest_repo(
77
+ owner=owner,
78
+ repo=repo_name,
79
+ since=since,
80
+ include_issues=not no_issues,
81
+ include_pulls=not no_pulls,
82
+ )
83
+
84
+ click.echo(f"Done: {result.issues_ingested} issues, {result.pulls_ingested} PRs ingested")
85
+ if result.errors:
86
+ click.echo(f"Errors ({len(result.errors)}):")
87
+ for err in result.errors:
88
+ click.echo(f" {err}", err=True)
89
+ return 1
90
+ return 0
91
+
92
+ sys.exit(asyncio.run(run()))
93
+
94
+
95
+ @main.command()
96
+ @click.argument("repo") # owner/repo
97
+ @click.argument("number", type=int)
98
+ @click.option("--type", "item_type", required=True, type=click.Choice(["issue", "pr"]))
99
+ @click.option("--context-graph", required=True, envvar="DKG_CONTEXT_GRAPH")
100
+ @click.option("--layer", default="wm", show_default=True, type=click.Choice(["wm", "swm"]))
101
+ @click.option("--dkg-token", envvar="DKG_TOKEN", default=None)
102
+ @click.option("--dkg-url", envvar="DKG_BASE_URL", default=None)
103
+ @click.option("--github-token", envvar="GITHUB_TOKEN", default=None)
104
+ def ingest_one(
105
+ repo: str,
106
+ number: int,
107
+ item_type: str,
108
+ context_graph: str,
109
+ layer: str,
110
+ dkg_token: str | None,
111
+ dkg_url: str | None,
112
+ github_token: str | None,
113
+ ) -> None:
114
+ """Ingest a single issue or PR by number."""
115
+ if "/" not in repo:
116
+ click.echo("Error: REPO must be in owner/repo format", err=True)
117
+ sys.exit(1)
118
+ owner, repo_name = repo.split("/", 1)
119
+
120
+ async def run() -> None:
121
+ dkg, gh = _make_clients(dkg_token, dkg_url, github_token)
122
+ ingestor = GitHubDKGIngestor(dkg=dkg, github=gh, context_graph_id=context_graph, layer=layer)
123
+ if item_type == "issue":
124
+ resp = await ingestor.ingest_issue(owner, repo_name, number)
125
+ else:
126
+ resp = await ingestor.ingest_pull(owner, repo_name, number)
127
+ turn_uri = resp.get("turnUri", "")
128
+ click.echo(f"Ingested: {turn_uri}")
129
+
130
+ asyncio.run(run())
131
+
132
+
133
+ @main.command()
134
+ @click.argument("turn-uri")
135
+ @click.option("--context-graph", required=True, envvar="DKG_CONTEXT_GRAPH")
136
+ @click.option("--dkg-token", envvar="DKG_TOKEN", default=None)
137
+ @click.option("--dkg-url", envvar="DKG_BASE_URL", default=None)
138
+ def promote(
139
+ turn_uri: str,
140
+ context_graph: str,
141
+ dkg_token: str | None,
142
+ dkg_url: str | None,
143
+ ) -> None:
144
+ """Promote a Working Memory Knowledge Asset to Shared Working Memory (SHARE)."""
145
+
146
+ async def run() -> None:
147
+ dkg = DKGClient(base_url=dkg_url, token=dkg_token)
148
+ ingestor = GitHubDKGIngestor(dkg=dkg, context_graph_id=context_graph)
149
+ resp = await ingestor.promote(turn_uri)
150
+ click.echo(f"Promoted: {resp}")
151
+
152
+ asyncio.run(run())
153
+
154
+
155
+ @main.command()
156
+ @click.argument("query")
157
+ @click.option("--context-graph", required=True, envvar="DKG_CONTEXT_GRAPH")
158
+ @click.option("--limit", default=10, show_default=True)
159
+ @click.option("--dkg-token", envvar="DKG_TOKEN", default=None)
160
+ @click.option("--dkg-url", envvar="DKG_BASE_URL", default=None)
161
+ def search(
162
+ query: str,
163
+ context_graph: str,
164
+ limit: int,
165
+ dkg_token: str | None,
166
+ dkg_url: str | None,
167
+ ) -> None:
168
+ """Search ingested GitHub knowledge in Working Memory."""
169
+
170
+ async def run() -> None:
171
+ dkg = DKGClient(base_url=dkg_url, token=dkg_token)
172
+ result = await dkg.memory_search(
173
+ context_graph_id=context_graph,
174
+ query=query,
175
+ limit=limit,
176
+ )
177
+ count = result.get("resultCount", 0)
178
+ click.echo(f"{count} result(s) for '{query}':")
179
+ for item in result.get("results", []):
180
+ label = item.get("label", item.get("entityUri", ""))
181
+ snippet = item.get("snippet", "")
182
+ layer = item.get("memoryLayer", "")
183
+ click.echo(f" [{layer}] {label}")
184
+ if snippet:
185
+ click.echo(f" {snippet[:120]}")
186
+
187
+ asyncio.run(run())
github_dkg/client.py ADDED
@@ -0,0 +1,139 @@
1
+ """HTTP client for the DKG v10 node API."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from typing import Any
7
+
8
+ import httpx
9
+
10
+
11
+ class DKGClient:
12
+ """Thin async wrapper around the DKG v10 HTTP API (port 9200).
13
+
14
+ All methods raise httpx.HTTPStatusError on non-2xx responses.
15
+ """
16
+
17
+ def __init__(
18
+ self,
19
+ base_url: str | None = None,
20
+ token: str | None = None,
21
+ timeout: float = 30.0,
22
+ ) -> None:
23
+ self.base_url = (
24
+ base_url or os.environ.get("DKG_BASE_URL", "http://localhost:9200")
25
+ ).rstrip("/")
26
+ token = token or os.environ.get("DKG_TOKEN", "")
27
+ if not token:
28
+ raise ValueError(
29
+ "DKG bearer token required. Pass token= or set DKG_TOKEN env var."
30
+ )
31
+ self._headers = {"Authorization": f"Bearer {token}"}
32
+ self._timeout = timeout
33
+
34
+ async def ping(self) -> bool:
35
+ try:
36
+ async with httpx.AsyncClient(timeout=5.0) as http:
37
+ r = await http.get(
38
+ f"{self.base_url}/api/agents", headers=self._headers
39
+ )
40
+ return r.status_code == 200
41
+ except Exception:
42
+ return False
43
+
44
+ async def create_context_graph(self, name: str) -> dict[str, Any]:
45
+ async with httpx.AsyncClient(timeout=self._timeout) as http:
46
+ r = await http.post(
47
+ f"{self.base_url}/api/context-graph/create",
48
+ headers=self._headers,
49
+ json={"name": name},
50
+ )
51
+ r.raise_for_status()
52
+ return r.json()
53
+
54
+ async def memory_turn(
55
+ self,
56
+ context_graph_id: str,
57
+ markdown: str,
58
+ session_uri: str | None = None,
59
+ layer: str = "wm",
60
+ sub_graph_name: str | None = None,
61
+ ) -> dict[str, Any]:
62
+ """Ingest a markdown artifact as a Knowledge Asset in Working Memory."""
63
+ body: dict[str, Any] = {
64
+ "contextGraphId": context_graph_id,
65
+ "markdown": markdown,
66
+ "layer": layer,
67
+ }
68
+ if session_uri:
69
+ body["sessionUri"] = session_uri
70
+ if sub_graph_name:
71
+ body["subGraphName"] = sub_graph_name
72
+ async with httpx.AsyncClient(timeout=self._timeout) as http:
73
+ r = await http.post(
74
+ f"{self.base_url}/api/memory/turn",
75
+ headers=self._headers,
76
+ json=body,
77
+ )
78
+ r.raise_for_status()
79
+ return r.json()
80
+
81
+ async def memory_search(
82
+ self,
83
+ context_graph_id: str,
84
+ query: str,
85
+ limit: int = 20,
86
+ memory_layers: list[str] | None = None,
87
+ ) -> dict[str, Any]:
88
+ body: dict[str, Any] = {
89
+ "contextGraphId": context_graph_id,
90
+ "query": query,
91
+ "limit": limit,
92
+ }
93
+ if memory_layers:
94
+ body["memoryLayers"] = memory_layers
95
+ async with httpx.AsyncClient(timeout=self._timeout) as http:
96
+ r = await http.post(
97
+ f"{self.base_url}/api/memory/search",
98
+ headers=self._headers,
99
+ json=body,
100
+ )
101
+ r.raise_for_status()
102
+ return r.json()
103
+
104
+ async def assertion_promote(
105
+ self,
106
+ name: str,
107
+ context_graph_id: str,
108
+ entities: list[str] | None = None,
109
+ ) -> dict[str, Any]:
110
+ """Promote a Working Memory assertion to Shared Working Memory (SHARE)."""
111
+ body: dict[str, Any] = {"contextGraphId": context_graph_id}
112
+ if entities:
113
+ body["entities"] = entities
114
+ async with httpx.AsyncClient(timeout=self._timeout) as http:
115
+ r = await http.post(
116
+ f"{self.base_url}/api/assertion/{name}/promote",
117
+ headers=self._headers,
118
+ json=body,
119
+ )
120
+ r.raise_for_status()
121
+ return r.json()
122
+
123
+ async def query(
124
+ self,
125
+ sparql: str,
126
+ include_workspace: bool = True,
127
+ ) -> dict[str, Any]:
128
+ body: dict[str, Any] = {
129
+ "sparql": sparql,
130
+ "includeWorkspace": include_workspace,
131
+ }
132
+ async with httpx.AsyncClient(timeout=self._timeout) as http:
133
+ r = await http.post(
134
+ f"{self.base_url}/api/query",
135
+ headers=self._headers,
136
+ json=body,
137
+ )
138
+ r.raise_for_status()
139
+ return r.json()
@@ -0,0 +1,135 @@
1
+ """Format GitHub items as Markdown Knowledge Assets for DKG v10 Working Memory."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+
8
+ def _label_names(labels: list[dict[str, Any]]) -> str:
9
+ names = [lbl.get("name", "") for lbl in labels if lbl.get("name")]
10
+ return ", ".join(names) if names else "none"
11
+
12
+
13
+ def _username(user: dict[str, Any] | None) -> str:
14
+ if not user:
15
+ return "unknown"
16
+ return user.get("login", "unknown")
17
+
18
+
19
+ def format_issue(
20
+ issue: dict[str, Any],
21
+ comments: list[dict[str, Any]],
22
+ owner: str,
23
+ repo: str,
24
+ ) -> str:
25
+ number = issue["number"]
26
+ title = issue.get("title", "")
27
+ author = _username(issue.get("user"))
28
+ labels = _label_names(issue.get("labels", []))
29
+ state = issue.get("state", "unknown")
30
+ state_reason = issue.get("state_reason") or ""
31
+ created = (issue.get("created_at") or "")[:10]
32
+ closed = (issue.get("closed_at") or "")[:10]
33
+ body = (issue.get("body") or "").strip()
34
+ url = issue.get("html_url", f"https://github.com/{owner}/{repo}/issues/{number}")
35
+
36
+ lines = [
37
+ f"**GitHub Issue #{number}:** {title}",
38
+ f"**Repository:** {owner}/{repo}",
39
+ f"**Author:** {author} | **Labels:** {labels} | **State:** {state}"
40
+ + (f" ({state_reason})" if state_reason else ""),
41
+ f"**Created:** {created}" + (f" | **Closed:** {closed}" if closed else ""),
42
+ f"**URL:** {url}",
43
+ ]
44
+
45
+ if body:
46
+ lines += ["", "**Description:**", body]
47
+
48
+ if comments:
49
+ lines += ["", "**Comments:**"]
50
+ for c in comments:
51
+ commenter = _username(c.get("user"))
52
+ when = (c.get("created_at") or "")[:10]
53
+ text = (c.get("body") or "").strip()
54
+ if text:
55
+ lines += [f"- **{commenter}** ({when}): {text}"]
56
+
57
+ return "\n".join(lines)
58
+
59
+
60
+ def format_pull_request(
61
+ pr: dict[str, Any],
62
+ reviews: list[dict[str, Any]],
63
+ inline_comments: list[dict[str, Any]],
64
+ owner: str,
65
+ repo: str,
66
+ ) -> str:
67
+ number = pr["number"]
68
+ title = pr.get("title", "")
69
+ author = _username(pr.get("user"))
70
+ labels = _label_names(pr.get("labels", []))
71
+ state = pr.get("state", "unknown")
72
+ draft = " (draft)" if pr.get("draft") else ""
73
+ created = (pr.get("created_at") or "")[:10]
74
+ merged = (pr.get("merged_at") or "")[:10]
75
+ closed = (pr.get("closed_at") or "")[:10]
76
+ body = (pr.get("body") or "").strip()
77
+ url = pr.get("html_url", f"https://github.com/{owner}/{repo}/pull/{number}")
78
+
79
+ base_ref = pr.get("base", {}).get("ref", "")
80
+ head_ref = pr.get("head", {}).get("ref", "")
81
+ branch_line = f"**Branch:** {head_ref} → {base_ref}" if base_ref or head_ref else ""
82
+
83
+ requested_reviewers = [
84
+ _username(r) for r in pr.get("requested_reviewers", [])
85
+ ]
86
+ reviewer_str = ", ".join(requested_reviewers) if requested_reviewers else ""
87
+
88
+ lines = [
89
+ f"**GitHub PR #{number}:** {title}{draft}",
90
+ f"**Repository:** {owner}/{repo}",
91
+ f"**Author:** {author} | **Labels:** {labels} | **State:** {state}",
92
+ ]
93
+ if reviewer_str:
94
+ lines.append(f"**Requested reviewers:** {reviewer_str}")
95
+ if branch_line:
96
+ lines.append(branch_line)
97
+ lines.append(
98
+ f"**Created:** {created}"
99
+ + (f" | **Merged:** {merged}" if merged else "")
100
+ + (f" | **Closed:** {closed}" if closed and not merged else "")
101
+ )
102
+ lines.append(f"**URL:** {url}")
103
+
104
+ if body:
105
+ lines += ["", "**Description:**", body]
106
+
107
+ if reviews:
108
+ lines += ["", "**Reviews:**"]
109
+ for rev in reviews:
110
+ reviewer = _username(rev.get("user"))
111
+ rev_state = rev.get("state", "")
112
+ rev_body = (rev.get("body") or "").strip()
113
+ submitted = (rev.get("submitted_at") or "")[:10]
114
+ summary = f"- **{reviewer}** {rev_state} ({submitted})"
115
+ if rev_body:
116
+ summary += f": {rev_body}"
117
+ lines.append(summary)
118
+
119
+ # Aggregate inline review comments by file path
120
+ if inline_comments:
121
+ by_path: dict[str, list[str]] = {}
122
+ for ic in inline_comments:
123
+ path = ic.get("path", "unknown")
124
+ commenter = _username(ic.get("user"))
125
+ text = (ic.get("body") or "").strip()
126
+ if text:
127
+ by_path.setdefault(path, []).append(f"{commenter}: {text}")
128
+ if by_path:
129
+ lines += ["", "**Inline review comments:**"]
130
+ for path, cmts in by_path.items():
131
+ lines.append(f"- `{path}`:")
132
+ for cmt in cmts:
133
+ lines.append(f" - {cmt}")
134
+
135
+ return "\n".join(lines)
@@ -0,0 +1,182 @@
1
+ """Thin async wrapper around the GitHub REST API v3."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ from typing import Any, AsyncIterator
7
+
8
+ import httpx
9
+
10
+ _BASE = "https://api.github.com"
11
+ _PER_PAGE = 100
12
+
13
+
14
+ class GitHubRateLimitError(RuntimeError):
15
+ """Raised when the GitHub API rate limit has been exhausted.
16
+
17
+ Attributes:
18
+ reset_at: Unix timestamp at which the rate limit resets.
19
+ """
20
+
21
+ def __init__(self, reset_at: int | None) -> None:
22
+ self.reset_at = reset_at
23
+ msg = "GitHub API rate limit exhausted"
24
+ if reset_at:
25
+ msg += f" (resets at unix={reset_at})"
26
+ super().__init__(msg)
27
+
28
+
29
+ def _check_rate_limit(response: httpx.Response) -> None:
30
+ """Raise GitHubRateLimitError if the response indicates rate-limit exhaustion."""
31
+ if response.status_code in (403, 429):
32
+ remaining = response.headers.get("X-RateLimit-Remaining")
33
+ if remaining == "0":
34
+ reset = response.headers.get("X-RateLimit-Reset")
35
+ raise GitHubRateLimitError(int(reset) if reset and reset.isdigit() else None)
36
+
37
+
38
+ class GitHubClient:
39
+ def __init__(self, token: str | None = None, timeout: float = 30.0) -> None:
40
+ token = token or os.environ.get("GITHUB_TOKEN", "")
41
+ if not token:
42
+ raise ValueError(
43
+ "GitHub token required. Pass token= or set GITHUB_TOKEN env var."
44
+ )
45
+ self._headers = {
46
+ "Authorization": f"Bearer {token}",
47
+ "Accept": "application/vnd.github+json",
48
+ "X-GitHub-Api-Version": "2022-11-28",
49
+ }
50
+ self._timeout = timeout
51
+
52
+ # ------------------------------------------------------------------
53
+ # Issues
54
+ # ------------------------------------------------------------------
55
+
56
+ async def list_issues(
57
+ self,
58
+ owner: str,
59
+ repo: str,
60
+ state: str = "all",
61
+ since: str | None = None,
62
+ ) -> AsyncIterator[dict[str, Any]]:
63
+ """Yield all issues (excluding PRs) page by page."""
64
+ params: dict[str, Any] = {
65
+ "state": state,
66
+ "per_page": _PER_PAGE,
67
+ "page": 1,
68
+ }
69
+ if since:
70
+ params["since"] = since
71
+ async for item in self._paginate(f"{_BASE}/repos/{owner}/{repo}/issues", params):
72
+ # GitHub returns PRs in the issues endpoint; filter them out
73
+ if "pull_request" not in item:
74
+ yield item
75
+
76
+ async def get_issue(
77
+ self, owner: str, repo: str, number: int
78
+ ) -> dict[str, Any]:
79
+ return await self._get(f"{_BASE}/repos/{owner}/{repo}/issues/{number}")
80
+
81
+ async def list_issue_comments(
82
+ self, owner: str, repo: str, issue_number: int
83
+ ) -> list[dict[str, Any]]:
84
+ results: list[dict[str, Any]] = []
85
+ params: dict[str, Any] = {"per_page": _PER_PAGE, "page": 1}
86
+ async for comment in self._paginate(
87
+ f"{_BASE}/repos/{owner}/{repo}/issues/{issue_number}/comments", params
88
+ ):
89
+ results.append(comment)
90
+ return results
91
+
92
+ # ------------------------------------------------------------------
93
+ # Pull Requests
94
+ # ------------------------------------------------------------------
95
+
96
+ async def list_pulls(
97
+ self,
98
+ owner: str,
99
+ repo: str,
100
+ state: str = "all",
101
+ since: str | None = None,
102
+ ) -> AsyncIterator[dict[str, Any]]:
103
+ """Yield PRs page by page.
104
+
105
+ ``since`` is an ISO 8601 timestamp. The /pulls endpoint does not support
106
+ a server-side ``since`` filter, so we sort by ``updated`` desc and stop
107
+ as soon as we see a PR older than the cutoff.
108
+ """
109
+ params: dict[str, Any] = {
110
+ "state": state,
111
+ "per_page": _PER_PAGE,
112
+ "page": 1,
113
+ "sort": "updated",
114
+ "direction": "desc",
115
+ }
116
+ async for pr in self._paginate(f"{_BASE}/repos/{owner}/{repo}/pulls", params):
117
+ if since and (pr.get("updated_at") or "") < since:
118
+ return
119
+ yield pr
120
+
121
+ async def get_pull(
122
+ self, owner: str, repo: str, number: int
123
+ ) -> dict[str, Any]:
124
+ return await self._get(f"{_BASE}/repos/{owner}/{repo}/pulls/{number}")
125
+
126
+ async def list_pull_reviews(
127
+ self, owner: str, repo: str, pull_number: int
128
+ ) -> list[dict[str, Any]]:
129
+ results: list[dict[str, Any]] = []
130
+ params: dict[str, Any] = {"per_page": _PER_PAGE, "page": 1}
131
+ async for review in self._paginate(
132
+ f"{_BASE}/repos/{owner}/{repo}/pulls/{pull_number}/reviews", params
133
+ ):
134
+ results.append(review)
135
+ return results
136
+
137
+ async def list_pull_comments(
138
+ self, owner: str, repo: str, pull_number: int
139
+ ) -> list[dict[str, Any]]:
140
+ """Inline review comments on a PR."""
141
+ results: list[dict[str, Any]] = []
142
+ params: dict[str, Any] = {"per_page": _PER_PAGE, "page": 1}
143
+ async for comment in self._paginate(
144
+ f"{_BASE}/repos/{owner}/{repo}/pulls/{pull_number}/comments", params
145
+ ):
146
+ results.append(comment)
147
+ return results
148
+
149
+ # ------------------------------------------------------------------
150
+ # Repository metadata
151
+ # ------------------------------------------------------------------
152
+
153
+ async def get_repo(self, owner: str, repo: str) -> dict[str, Any]:
154
+ return await self._get(f"{_BASE}/repos/{owner}/{repo}")
155
+
156
+ # ------------------------------------------------------------------
157
+ # Internal
158
+ # ------------------------------------------------------------------
159
+
160
+ async def _get(self, url: str) -> dict[str, Any]:
161
+ async with httpx.AsyncClient(timeout=self._timeout) as http:
162
+ r = await http.get(url, headers=self._headers)
163
+ _check_rate_limit(r)
164
+ r.raise_for_status()
165
+ return r.json()
166
+
167
+ async def _paginate(
168
+ self, url: str, params: dict[str, Any]
169
+ ) -> AsyncIterator[dict[str, Any]]:
170
+ async with httpx.AsyncClient(timeout=self._timeout) as http:
171
+ while True:
172
+ r = await http.get(url, headers=self._headers, params=params)
173
+ _check_rate_limit(r)
174
+ r.raise_for_status()
175
+ page = r.json()
176
+ if not page:
177
+ break
178
+ for item in page:
179
+ yield item
180
+ if len(page) < _PER_PAGE:
181
+ break
182
+ params = {**params, "page": params["page"] + 1}
github_dkg/ingestor.py ADDED
@@ -0,0 +1,203 @@
1
+ """Orchestrates fetching from GitHub and writing to DKG v10 Working Memory."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from dataclasses import dataclass, field
7
+ from typing import Any
8
+
9
+ from .client import DKGClient
10
+ from .formatter import format_issue, format_pull_request
11
+ from .github_client import GitHubClient
12
+
13
+
14
+ @dataclass
15
+ class IngestResult:
16
+ issues_ingested: int = 0
17
+ pulls_ingested: int = 0
18
+ errors: list[str] = field(default_factory=list)
19
+ turn_uris: list[str] = field(default_factory=list)
20
+
21
+ @property
22
+ def total(self) -> int:
23
+ return self.issues_ingested + self.pulls_ingested
24
+
25
+
26
+ class GitHubDKGIngestor:
27
+ """Fetch GitHub items and write them to DKG v10 Working Memory.
28
+
29
+ Each issue and PR becomes one Knowledge Asset (via /api/memory/turn).
30
+ All assets for a repo are scoped to a single Context Graph.
31
+
32
+ The ``github`` client is optional — required for ingest, but ``promote``
33
+ only touches DKG so it can be omitted there.
34
+ """
35
+
36
+ def __init__(
37
+ self,
38
+ dkg: DKGClient,
39
+ github: GitHubClient | None = None,
40
+ context_graph_id: str = "",
41
+ layer: str = "wm",
42
+ max_comments_per_issue: int = 20,
43
+ max_reviews_per_pr: int = 10,
44
+ concurrency: int = 5,
45
+ ) -> None:
46
+ self._dkg = dkg
47
+ self._gh = github
48
+ self._context_graph_id = context_graph_id
49
+ self._layer = layer
50
+ self._max_comments = max_comments_per_issue
51
+ self._max_reviews = max_reviews_per_pr
52
+ self._sem = asyncio.Semaphore(concurrency)
53
+
54
+ def _require_github(self) -> GitHubClient:
55
+ if self._gh is None:
56
+ raise RuntimeError(
57
+ "This operation requires a GitHubClient. "
58
+ "Pass github=... when constructing GitHubDKGIngestor."
59
+ )
60
+ return self._gh
61
+
62
+ # ------------------------------------------------------------------
63
+ # Public API
64
+ # ------------------------------------------------------------------
65
+
66
+ async def ingest_repo(
67
+ self,
68
+ owner: str,
69
+ repo: str,
70
+ since: str | None = None,
71
+ include_issues: bool = True,
72
+ include_pulls: bool = True,
73
+ ) -> IngestResult:
74
+ gh = self._require_github()
75
+ result = IngestResult()
76
+ tasks: list[Any] = []
77
+
78
+ if include_issues:
79
+ async for issue in gh.list_issues(owner, repo, since=since):
80
+ tasks.append(self._ingest_issue(owner, repo, issue, result))
81
+
82
+ if include_pulls:
83
+ async for pr in gh.list_pulls(owner, repo, since=since):
84
+ tasks.append(self._ingest_pull(owner, repo, pr, result))
85
+
86
+ await asyncio.gather(*tasks)
87
+ return result
88
+
89
+ async def ingest_issue(
90
+ self, owner: str, repo: str, issue_number: int
91
+ ) -> dict[str, Any]:
92
+ """Ingest a single issue by number. Returns the DKG turn response."""
93
+ gh = self._require_github()
94
+ issue = await gh.get_issue(owner, repo, issue_number)
95
+ comments = await gh.list_issue_comments(owner, repo, issue_number)
96
+ markdown = format_issue(issue, comments[: self._max_comments], owner, repo)
97
+ return await self._dkg.memory_turn(
98
+ context_graph_id=self._context_graph_id,
99
+ markdown=markdown,
100
+ layer=self._layer,
101
+ session_uri=_repo_session_uri(owner, repo),
102
+ )
103
+
104
+ async def ingest_pull(
105
+ self, owner: str, repo: str, pull_number: int
106
+ ) -> dict[str, Any]:
107
+ """Ingest a single PR by number. Returns the DKG turn response."""
108
+ gh = self._require_github()
109
+ pr, reviews, inline = await asyncio.gather(
110
+ gh.get_pull(owner, repo, pull_number),
111
+ gh.list_pull_reviews(owner, repo, pull_number),
112
+ gh.list_pull_comments(owner, repo, pull_number),
113
+ )
114
+ markdown = format_pull_request(
115
+ pr,
116
+ reviews[: self._max_reviews],
117
+ inline,
118
+ owner,
119
+ repo,
120
+ )
121
+ return await self._dkg.memory_turn(
122
+ context_graph_id=self._context_graph_id,
123
+ markdown=markdown,
124
+ layer=self._layer,
125
+ session_uri=_repo_session_uri(owner, repo),
126
+ )
127
+
128
+ async def promote(self, turn_uri: str) -> dict[str, Any]:
129
+ """Promote a Working Memory Knowledge Asset to Shared Working Memory (SHARE)."""
130
+ name = turn_uri.split("/")[-1]
131
+ return await self._dkg.assertion_promote(
132
+ name=name,
133
+ context_graph_id=self._context_graph_id,
134
+ )
135
+
136
+ # ------------------------------------------------------------------
137
+ # Internal helpers
138
+ # ------------------------------------------------------------------
139
+
140
+ async def _ingest_issue(
141
+ self,
142
+ owner: str,
143
+ repo: str,
144
+ issue: dict[str, Any],
145
+ result: IngestResult,
146
+ ) -> None:
147
+ gh = self._require_github()
148
+ async with self._sem:
149
+ try:
150
+ number = issue["number"]
151
+ comments = await gh.list_issue_comments(owner, repo, number)
152
+ markdown = format_issue(
153
+ issue, comments[: self._max_comments], owner, repo
154
+ )
155
+ resp = await self._dkg.memory_turn(
156
+ context_graph_id=self._context_graph_id,
157
+ markdown=markdown,
158
+ layer=self._layer,
159
+ session_uri=_repo_session_uri(owner, repo),
160
+ )
161
+ result.issues_ingested += 1
162
+ if uri := resp.get("turnUri"):
163
+ result.turn_uris.append(uri)
164
+ except Exception as exc:
165
+ result.errors.append(f"issue #{issue.get('number')}: {exc}")
166
+
167
+ async def _ingest_pull(
168
+ self,
169
+ owner: str,
170
+ repo: str,
171
+ pr: dict[str, Any],
172
+ result: IngestResult,
173
+ ) -> None:
174
+ gh = self._require_github()
175
+ async with self._sem:
176
+ try:
177
+ number = pr["number"]
178
+ reviews, inline = await asyncio.gather(
179
+ gh.list_pull_reviews(owner, repo, number),
180
+ gh.list_pull_comments(owner, repo, number),
181
+ )
182
+ markdown = format_pull_request(
183
+ pr,
184
+ reviews[: self._max_reviews],
185
+ inline,
186
+ owner,
187
+ repo,
188
+ )
189
+ resp = await self._dkg.memory_turn(
190
+ context_graph_id=self._context_graph_id,
191
+ markdown=markdown,
192
+ layer=self._layer,
193
+ session_uri=_repo_session_uri(owner, repo),
194
+ )
195
+ result.pulls_ingested += 1
196
+ if uri := resp.get("turnUri"):
197
+ result.turn_uris.append(uri)
198
+ except Exception as exc:
199
+ result.errors.append(f"PR #{pr.get('number')}: {exc}")
200
+
201
+
202
+ def _repo_session_uri(owner: str, repo: str) -> str:
203
+ return f"https://github.com/{owner}/{repo}"
@@ -0,0 +1,143 @@
1
+ Metadata-Version: 2.4
2
+ Name: github-dkg
3
+ Version: 0.1.0
4
+ Summary: Ingest GitHub issues, PRs, and review comments into DKG v10 Working Memory
5
+ Project-URL: Repository, https://github.com/haroldboom/github-dkg
6
+ License: MIT
7
+ License-File: LICENSE
8
+ Requires-Python: >=3.10
9
+ Requires-Dist: click>=8.1
10
+ Requires-Dist: httpx>=0.27
11
+ Provides-Extra: dev
12
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
13
+ Requires-Dist: pytest>=8; extra == 'dev'
14
+ Requires-Dist: respx>=0.21; extra == 'dev'
15
+ Description-Content-Type: text/markdown
16
+
17
+ # github-dkg
18
+
19
+ Ingest GitHub issues, pull requests, and review comments into [DKG v10](https://docs.origintrail.io) Working Memory as Knowledge Assets.
20
+
21
+ Every issue and PR becomes a queryable, attributable Knowledge Asset in your DKG v10 node. Key decisions can be promoted to Shared Working Memory — making your team's engineering knowledge accessible to agents.
22
+
23
+ ## Demo
24
+
25
+ - **Walkthrough notebook:** [`demo.ipynb`](demo.ipynb) — runs end-to-end against a built-in mock of GitHub and the DKG node, no tokens required. Open in [Colab](https://colab.research.google.com/github/haroldboom/github-dkg/blob/master/demo.ipynb).
26
+ - **Live recording script:** [`examples/demo_video.py`](examples/demo_video.py) — drives all three demos against a real DKG node and the GitHub API; this is the script behind the bounty walkthrough video.
27
+
28
+ ## Install
29
+
30
+ ```bash
31
+ pip install github-dkg
32
+ ```
33
+
34
+ ## Quickstart
35
+
36
+ ```bash
37
+ export DKG_TOKEN=your-dkg-token
38
+ export DKG_BASE_URL=http://localhost:9200
39
+ export DKG_CONTEXT_GRAPH=your-context-graph-id
40
+ export GITHUB_TOKEN=your-github-token
41
+
42
+ # Bulk-ingest all issues and PRs from a repository
43
+ github-dkg ingest owner/repo --context-graph $DKG_CONTEXT_GRAPH
44
+
45
+ # Ingest a single issue
46
+ github-dkg ingest-one owner/repo 42 --type issue --context-graph $DKG_CONTEXT_GRAPH
47
+
48
+ # Ingest a single PR
49
+ github-dkg ingest-one owner/repo 99 --type pr --context-graph $DKG_CONTEXT_GRAPH
50
+
51
+ # Search ingested knowledge
52
+ github-dkg search "authentication bug" --context-graph $DKG_CONTEXT_GRAPH
53
+
54
+ # Promote a Working Memory asset to Shared Working Memory (SHARE)
55
+ github-dkg promote dkg://wm/turn/abc123 --context-graph $DKG_CONTEXT_GRAPH
56
+ ```
57
+
58
+ ## GitHub Action
59
+
60
+ Automatically ingest issues and PRs as they are created or updated. Add to `.github/workflows/dkg-ingest.yml`:
61
+
62
+ ```yaml
63
+ on:
64
+ issues:
65
+ types: [opened, edited, closed]
66
+ pull_request:
67
+ types: [opened, edited, closed]
68
+ pull_request_review:
69
+ types: [submitted]
70
+
71
+ jobs:
72
+ ingest:
73
+ runs-on: ubuntu-latest
74
+ steps:
75
+ - uses: haroldboom/github-dkg@v0.1.0
76
+ id: ingest
77
+ with:
78
+ dkg-token: ${{ secrets.DKG_TOKEN }}
79
+ dkg-base-url: ${{ secrets.DKG_BASE_URL }}
80
+ dkg-context-graph: ${{ secrets.DKG_CONTEXT_GRAPH }}
81
+ ```
82
+
83
+ See `examples/workflow.yml` for a complete example including automatic promotion of architecture-decision PRs to Shared Working Memory.
84
+
85
+ ## Python API
86
+
87
+ ```python
88
+ import asyncio
89
+ from github_dkg import DKGClient, GitHubClient, GitHubDKGIngestor
90
+
91
+ async def main():
92
+ dkg = DKGClient(base_url="http://localhost:9200", token="your-token")
93
+ gh = GitHubClient(token="your-github-token")
94
+ ingestor = GitHubDKGIngestor(dkg=dkg, github=gh, context_graph_id="cg-123")
95
+
96
+ # Bulk ingest
97
+ result = await ingestor.ingest_repo("owner", "repo", since="2024-01-01")
98
+ print(f"Ingested {result.total} items ({len(result.errors)} errors)")
99
+
100
+ # Single item
101
+ resp = await ingestor.ingest_issue("owner", "repo", 42)
102
+ print(f"Turn URI: {resp['turnUri']}")
103
+
104
+ # Promote to Shared Working Memory
105
+ await ingestor.promote(resp["turnUri"])
106
+
107
+ asyncio.run(main())
108
+ ```
109
+
110
+ ## `--since` filtering
111
+
112
+ `--since` accepts an ISO 8601 timestamp and limits ingest to items updated after that point.
113
+
114
+ - **Issues:** filtered server-side by GitHub via the `since` parameter on `/issues`.
115
+ - **Pull requests:** GitHub's `/pulls` endpoint has no `since` filter, so the package requests `sort=updated&direction=desc` and stops paginating once results fall below the cutoff. Net result: only PRs touched after `--since` are fetched and ingested.
116
+
117
+ Comment-only updates (a new comment without an issue/PR body edit) still bump `updated_at`, so they're included.
118
+
119
+ ## Rate limiting
120
+
121
+ `GitHubClient` raises `github_dkg.github_client.GitHubRateLimitError` when GitHub returns `403`/`429` with `X-RateLimit-Remaining: 0`. The exception carries `reset_at` (unix timestamp) so callers can decide whether to back off, sleep, or fail. Authenticated tokens get 5,000 requests/hour; bulk-ingesting a large repo with many comment-heavy PRs can approach this limit.
122
+
123
+ ```python
124
+ from github_dkg.github_client import GitHubRateLimitError
125
+
126
+ try:
127
+ result = await ingestor.ingest_repo("OriginTrail", "dkg-v9")
128
+ except GitHubRateLimitError as e:
129
+ print(f"Rate limited; resets at unix={e.reset_at}")
130
+ ```
131
+
132
+ ## Memory layers
133
+
134
+ | Layer | Flag | Visibility |
135
+ |---|---|---|
136
+ | Working Memory | `--layer wm` (default) | Private to your node |
137
+ | Shared Working Memory | `--layer swm` | Gossiped across the paranet |
138
+
139
+ Promotion from Working Memory to Shared Working Memory is always explicit — nothing is shared automatically.
140
+
141
+ ## License
142
+
143
+ MIT
@@ -0,0 +1,11 @@
1
+ github_dkg/__init__.py,sha256=pInaVjlLzKX6l_tq0wkik3cQDBrBgyYtOsB2XmgkuHE,281
2
+ github_dkg/cli.py,sha256=tJulW7owe6O0Ex1xTlLRQjRWluFWSORb1KAbpiZOiBM,6391
3
+ github_dkg/client.py,sha256=B_o3XielJTQSV1_fE6TPD1SuQrdQXiQZ372V8b_-IDE,4424
4
+ github_dkg/formatter.py,sha256=II0vHLby0o0RoXWiRb7nI1-8zPdYFp-2lnC7020CVhk,4636
5
+ github_dkg/github_client.py,sha256=9G85qBDAZJMRJtg9GsMUlquwC8egM6pvLbem5VMVu24,6505
6
+ github_dkg/ingestor.py,sha256=xESZxR4ehXEv38ULeWSdXPzykIJvrnmlwgAn7oLRjXQ,7062
7
+ github_dkg-0.1.0.dist-info/METADATA,sha256=260qj5RC7ZtTwes54yyhXQPhrVQ4nqhUVF0gxgxMgD4,5128
8
+ github_dkg-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
9
+ github_dkg-0.1.0.dist-info/entry_points.txt,sha256=RRzXzSjXBtEKMnfQ2oLMLfrTv4h8hXUXbOVhgF7COdU,51
10
+ github_dkg-0.1.0.dist-info/licenses/LICENSE,sha256=Btzdu2kIoMbdSp6OyCLupB1aRgpTCJ_szMimgEnpkkE,1056
11
+ github_dkg-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.29.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ github-dkg = github_dkg.cli:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.