github-dkg 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- github_dkg/__init__.py +8 -0
- github_dkg/cli.py +187 -0
- github_dkg/client.py +139 -0
- github_dkg/formatter.py +135 -0
- github_dkg/github_client.py +182 -0
- github_dkg/ingestor.py +203 -0
- github_dkg-0.1.0.dist-info/METADATA +143 -0
- github_dkg-0.1.0.dist-info/RECORD +11 -0
- github_dkg-0.1.0.dist-info/WHEEL +4 -0
- github_dkg-0.1.0.dist-info/entry_points.txt +2 -0
- github_dkg-0.1.0.dist-info/licenses/LICENSE +21 -0
github_dkg/__init__.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
"""github-dkg: Ingest GitHub issues, PRs, and reviews into DKG v10 Working Memory."""
|
|
2
|
+
|
|
3
|
+
from .client import DKGClient
|
|
4
|
+
from .github_client import GitHubClient
|
|
5
|
+
from .ingestor import GitHubDKGIngestor
|
|
6
|
+
|
|
7
|
+
__all__ = ["DKGClient", "GitHubClient", "GitHubDKGIngestor"]
|
|
8
|
+
__version__ = "0.1.0"
|
github_dkg/cli.py
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
"""CLI entry point for github-dkg."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
import click
|
|
9
|
+
|
|
10
|
+
from .client import DKGClient
|
|
11
|
+
from .github_client import GitHubClient
|
|
12
|
+
from .ingestor import GitHubDKGIngestor
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _make_clients(
|
|
16
|
+
dkg_token: str | None,
|
|
17
|
+
dkg_url: str | None,
|
|
18
|
+
github_token: str | None,
|
|
19
|
+
) -> tuple[DKGClient, GitHubClient]:
|
|
20
|
+
dkg = DKGClient(base_url=dkg_url, token=dkg_token)
|
|
21
|
+
gh = GitHubClient(token=github_token)
|
|
22
|
+
return dkg, gh
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@click.group()
|
|
26
|
+
def main() -> None:
|
|
27
|
+
"""github-dkg: Ingest GitHub knowledge into DKG v10 Working Memory."""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@main.command()
|
|
31
|
+
@click.argument("repo") # owner/repo
|
|
32
|
+
@click.option("--context-graph", required=True, envvar="DKG_CONTEXT_GRAPH", help="Context Graph ID")
|
|
33
|
+
@click.option("--layer", default="wm", show_default=True, type=click.Choice(["wm", "swm"]))
|
|
34
|
+
@click.option("--since", default=None, help="ISO 8601 date — only ingest items updated after this date")
|
|
35
|
+
@click.option("--no-issues", is_flag=True, default=False, help="Skip issues")
|
|
36
|
+
@click.option("--no-pulls", is_flag=True, default=False, help="Skip pull requests")
|
|
37
|
+
@click.option("--concurrency", default=5, show_default=True, help="Parallel DKG writes")
|
|
38
|
+
@click.option("--dkg-token", envvar="DKG_TOKEN", default=None)
|
|
39
|
+
@click.option("--dkg-url", envvar="DKG_BASE_URL", default=None)
|
|
40
|
+
@click.option("--github-token", envvar="GITHUB_TOKEN", default=None)
|
|
41
|
+
def ingest(
|
|
42
|
+
repo: str,
|
|
43
|
+
context_graph: str,
|
|
44
|
+
layer: str,
|
|
45
|
+
since: str | None,
|
|
46
|
+
no_issues: bool,
|
|
47
|
+
no_pulls: bool,
|
|
48
|
+
concurrency: int,
|
|
49
|
+
dkg_token: str | None,
|
|
50
|
+
dkg_url: str | None,
|
|
51
|
+
github_token: str | None,
|
|
52
|
+
) -> None:
|
|
53
|
+
"""Bulk-ingest all issues and PRs from OWNER/REPO into Working Memory."""
|
|
54
|
+
if "/" not in repo:
|
|
55
|
+
click.echo("Error: REPO must be in owner/repo format", err=True)
|
|
56
|
+
sys.exit(1)
|
|
57
|
+
owner, repo_name = repo.split("/", 1)
|
|
58
|
+
|
|
59
|
+
async def run() -> int:
|
|
60
|
+
dkg, gh = _make_clients(dkg_token, dkg_url, github_token)
|
|
61
|
+
|
|
62
|
+
click.echo("Connecting to DKG node...")
|
|
63
|
+
if not await dkg.ping():
|
|
64
|
+
click.echo("Error: DKG node unreachable or token invalid", err=True)
|
|
65
|
+
return 1
|
|
66
|
+
|
|
67
|
+
ingestor = GitHubDKGIngestor(
|
|
68
|
+
dkg=dkg,
|
|
69
|
+
github=gh,
|
|
70
|
+
context_graph_id=context_graph,
|
|
71
|
+
layer=layer,
|
|
72
|
+
concurrency=concurrency,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
click.echo(f"Ingesting {owner}/{repo_name} → context graph '{context_graph}' (layer={layer})")
|
|
76
|
+
result = await ingestor.ingest_repo(
|
|
77
|
+
owner=owner,
|
|
78
|
+
repo=repo_name,
|
|
79
|
+
since=since,
|
|
80
|
+
include_issues=not no_issues,
|
|
81
|
+
include_pulls=not no_pulls,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
click.echo(f"Done: {result.issues_ingested} issues, {result.pulls_ingested} PRs ingested")
|
|
85
|
+
if result.errors:
|
|
86
|
+
click.echo(f"Errors ({len(result.errors)}):")
|
|
87
|
+
for err in result.errors:
|
|
88
|
+
click.echo(f" {err}", err=True)
|
|
89
|
+
return 1
|
|
90
|
+
return 0
|
|
91
|
+
|
|
92
|
+
sys.exit(asyncio.run(run()))
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@main.command()
|
|
96
|
+
@click.argument("repo") # owner/repo
|
|
97
|
+
@click.argument("number", type=int)
|
|
98
|
+
@click.option("--type", "item_type", required=True, type=click.Choice(["issue", "pr"]))
|
|
99
|
+
@click.option("--context-graph", required=True, envvar="DKG_CONTEXT_GRAPH")
|
|
100
|
+
@click.option("--layer", default="wm", show_default=True, type=click.Choice(["wm", "swm"]))
|
|
101
|
+
@click.option("--dkg-token", envvar="DKG_TOKEN", default=None)
|
|
102
|
+
@click.option("--dkg-url", envvar="DKG_BASE_URL", default=None)
|
|
103
|
+
@click.option("--github-token", envvar="GITHUB_TOKEN", default=None)
|
|
104
|
+
def ingest_one(
|
|
105
|
+
repo: str,
|
|
106
|
+
number: int,
|
|
107
|
+
item_type: str,
|
|
108
|
+
context_graph: str,
|
|
109
|
+
layer: str,
|
|
110
|
+
dkg_token: str | None,
|
|
111
|
+
dkg_url: str | None,
|
|
112
|
+
github_token: str | None,
|
|
113
|
+
) -> None:
|
|
114
|
+
"""Ingest a single issue or PR by number."""
|
|
115
|
+
if "/" not in repo:
|
|
116
|
+
click.echo("Error: REPO must be in owner/repo format", err=True)
|
|
117
|
+
sys.exit(1)
|
|
118
|
+
owner, repo_name = repo.split("/", 1)
|
|
119
|
+
|
|
120
|
+
async def run() -> None:
|
|
121
|
+
dkg, gh = _make_clients(dkg_token, dkg_url, github_token)
|
|
122
|
+
ingestor = GitHubDKGIngestor(dkg=dkg, github=gh, context_graph_id=context_graph, layer=layer)
|
|
123
|
+
if item_type == "issue":
|
|
124
|
+
resp = await ingestor.ingest_issue(owner, repo_name, number)
|
|
125
|
+
else:
|
|
126
|
+
resp = await ingestor.ingest_pull(owner, repo_name, number)
|
|
127
|
+
turn_uri = resp.get("turnUri", "")
|
|
128
|
+
click.echo(f"Ingested: {turn_uri}")
|
|
129
|
+
|
|
130
|
+
asyncio.run(run())
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@main.command()
|
|
134
|
+
@click.argument("turn-uri")
|
|
135
|
+
@click.option("--context-graph", required=True, envvar="DKG_CONTEXT_GRAPH")
|
|
136
|
+
@click.option("--dkg-token", envvar="DKG_TOKEN", default=None)
|
|
137
|
+
@click.option("--dkg-url", envvar="DKG_BASE_URL", default=None)
|
|
138
|
+
def promote(
|
|
139
|
+
turn_uri: str,
|
|
140
|
+
context_graph: str,
|
|
141
|
+
dkg_token: str | None,
|
|
142
|
+
dkg_url: str | None,
|
|
143
|
+
) -> None:
|
|
144
|
+
"""Promote a Working Memory Knowledge Asset to Shared Working Memory (SHARE)."""
|
|
145
|
+
|
|
146
|
+
async def run() -> None:
|
|
147
|
+
dkg = DKGClient(base_url=dkg_url, token=dkg_token)
|
|
148
|
+
ingestor = GitHubDKGIngestor(dkg=dkg, context_graph_id=context_graph)
|
|
149
|
+
resp = await ingestor.promote(turn_uri)
|
|
150
|
+
click.echo(f"Promoted: {resp}")
|
|
151
|
+
|
|
152
|
+
asyncio.run(run())
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
@main.command()
|
|
156
|
+
@click.argument("query")
|
|
157
|
+
@click.option("--context-graph", required=True, envvar="DKG_CONTEXT_GRAPH")
|
|
158
|
+
@click.option("--limit", default=10, show_default=True)
|
|
159
|
+
@click.option("--dkg-token", envvar="DKG_TOKEN", default=None)
|
|
160
|
+
@click.option("--dkg-url", envvar="DKG_BASE_URL", default=None)
|
|
161
|
+
def search(
|
|
162
|
+
query: str,
|
|
163
|
+
context_graph: str,
|
|
164
|
+
limit: int,
|
|
165
|
+
dkg_token: str | None,
|
|
166
|
+
dkg_url: str | None,
|
|
167
|
+
) -> None:
|
|
168
|
+
"""Search ingested GitHub knowledge in Working Memory."""
|
|
169
|
+
|
|
170
|
+
async def run() -> None:
|
|
171
|
+
dkg = DKGClient(base_url=dkg_url, token=dkg_token)
|
|
172
|
+
result = await dkg.memory_search(
|
|
173
|
+
context_graph_id=context_graph,
|
|
174
|
+
query=query,
|
|
175
|
+
limit=limit,
|
|
176
|
+
)
|
|
177
|
+
count = result.get("resultCount", 0)
|
|
178
|
+
click.echo(f"{count} result(s) for '{query}':")
|
|
179
|
+
for item in result.get("results", []):
|
|
180
|
+
label = item.get("label", item.get("entityUri", ""))
|
|
181
|
+
snippet = item.get("snippet", "")
|
|
182
|
+
layer = item.get("memoryLayer", "")
|
|
183
|
+
click.echo(f" [{layer}] {label}")
|
|
184
|
+
if snippet:
|
|
185
|
+
click.echo(f" {snippet[:120]}")
|
|
186
|
+
|
|
187
|
+
asyncio.run(run())
|
github_dkg/client.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""HTTP client for the DKG v10 node API."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class DKGClient:
|
|
12
|
+
"""Thin async wrapper around the DKG v10 HTTP API (port 9200).
|
|
13
|
+
|
|
14
|
+
All methods raise httpx.HTTPStatusError on non-2xx responses.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
base_url: str | None = None,
|
|
20
|
+
token: str | None = None,
|
|
21
|
+
timeout: float = 30.0,
|
|
22
|
+
) -> None:
|
|
23
|
+
self.base_url = (
|
|
24
|
+
base_url or os.environ.get("DKG_BASE_URL", "http://localhost:9200")
|
|
25
|
+
).rstrip("/")
|
|
26
|
+
token = token or os.environ.get("DKG_TOKEN", "")
|
|
27
|
+
if not token:
|
|
28
|
+
raise ValueError(
|
|
29
|
+
"DKG bearer token required. Pass token= or set DKG_TOKEN env var."
|
|
30
|
+
)
|
|
31
|
+
self._headers = {"Authorization": f"Bearer {token}"}
|
|
32
|
+
self._timeout = timeout
|
|
33
|
+
|
|
34
|
+
async def ping(self) -> bool:
|
|
35
|
+
try:
|
|
36
|
+
async with httpx.AsyncClient(timeout=5.0) as http:
|
|
37
|
+
r = await http.get(
|
|
38
|
+
f"{self.base_url}/api/agents", headers=self._headers
|
|
39
|
+
)
|
|
40
|
+
return r.status_code == 200
|
|
41
|
+
except Exception:
|
|
42
|
+
return False
|
|
43
|
+
|
|
44
|
+
async def create_context_graph(self, name: str) -> dict[str, Any]:
|
|
45
|
+
async with httpx.AsyncClient(timeout=self._timeout) as http:
|
|
46
|
+
r = await http.post(
|
|
47
|
+
f"{self.base_url}/api/context-graph/create",
|
|
48
|
+
headers=self._headers,
|
|
49
|
+
json={"name": name},
|
|
50
|
+
)
|
|
51
|
+
r.raise_for_status()
|
|
52
|
+
return r.json()
|
|
53
|
+
|
|
54
|
+
async def memory_turn(
|
|
55
|
+
self,
|
|
56
|
+
context_graph_id: str,
|
|
57
|
+
markdown: str,
|
|
58
|
+
session_uri: str | None = None,
|
|
59
|
+
layer: str = "wm",
|
|
60
|
+
sub_graph_name: str | None = None,
|
|
61
|
+
) -> dict[str, Any]:
|
|
62
|
+
"""Ingest a markdown artifact as a Knowledge Asset in Working Memory."""
|
|
63
|
+
body: dict[str, Any] = {
|
|
64
|
+
"contextGraphId": context_graph_id,
|
|
65
|
+
"markdown": markdown,
|
|
66
|
+
"layer": layer,
|
|
67
|
+
}
|
|
68
|
+
if session_uri:
|
|
69
|
+
body["sessionUri"] = session_uri
|
|
70
|
+
if sub_graph_name:
|
|
71
|
+
body["subGraphName"] = sub_graph_name
|
|
72
|
+
async with httpx.AsyncClient(timeout=self._timeout) as http:
|
|
73
|
+
r = await http.post(
|
|
74
|
+
f"{self.base_url}/api/memory/turn",
|
|
75
|
+
headers=self._headers,
|
|
76
|
+
json=body,
|
|
77
|
+
)
|
|
78
|
+
r.raise_for_status()
|
|
79
|
+
return r.json()
|
|
80
|
+
|
|
81
|
+
async def memory_search(
|
|
82
|
+
self,
|
|
83
|
+
context_graph_id: str,
|
|
84
|
+
query: str,
|
|
85
|
+
limit: int = 20,
|
|
86
|
+
memory_layers: list[str] | None = None,
|
|
87
|
+
) -> dict[str, Any]:
|
|
88
|
+
body: dict[str, Any] = {
|
|
89
|
+
"contextGraphId": context_graph_id,
|
|
90
|
+
"query": query,
|
|
91
|
+
"limit": limit,
|
|
92
|
+
}
|
|
93
|
+
if memory_layers:
|
|
94
|
+
body["memoryLayers"] = memory_layers
|
|
95
|
+
async with httpx.AsyncClient(timeout=self._timeout) as http:
|
|
96
|
+
r = await http.post(
|
|
97
|
+
f"{self.base_url}/api/memory/search",
|
|
98
|
+
headers=self._headers,
|
|
99
|
+
json=body,
|
|
100
|
+
)
|
|
101
|
+
r.raise_for_status()
|
|
102
|
+
return r.json()
|
|
103
|
+
|
|
104
|
+
async def assertion_promote(
|
|
105
|
+
self,
|
|
106
|
+
name: str,
|
|
107
|
+
context_graph_id: str,
|
|
108
|
+
entities: list[str] | None = None,
|
|
109
|
+
) -> dict[str, Any]:
|
|
110
|
+
"""Promote a Working Memory assertion to Shared Working Memory (SHARE)."""
|
|
111
|
+
body: dict[str, Any] = {"contextGraphId": context_graph_id}
|
|
112
|
+
if entities:
|
|
113
|
+
body["entities"] = entities
|
|
114
|
+
async with httpx.AsyncClient(timeout=self._timeout) as http:
|
|
115
|
+
r = await http.post(
|
|
116
|
+
f"{self.base_url}/api/assertion/{name}/promote",
|
|
117
|
+
headers=self._headers,
|
|
118
|
+
json=body,
|
|
119
|
+
)
|
|
120
|
+
r.raise_for_status()
|
|
121
|
+
return r.json()
|
|
122
|
+
|
|
123
|
+
async def query(
|
|
124
|
+
self,
|
|
125
|
+
sparql: str,
|
|
126
|
+
include_workspace: bool = True,
|
|
127
|
+
) -> dict[str, Any]:
|
|
128
|
+
body: dict[str, Any] = {
|
|
129
|
+
"sparql": sparql,
|
|
130
|
+
"includeWorkspace": include_workspace,
|
|
131
|
+
}
|
|
132
|
+
async with httpx.AsyncClient(timeout=self._timeout) as http:
|
|
133
|
+
r = await http.post(
|
|
134
|
+
f"{self.base_url}/api/query",
|
|
135
|
+
headers=self._headers,
|
|
136
|
+
json=body,
|
|
137
|
+
)
|
|
138
|
+
r.raise_for_status()
|
|
139
|
+
return r.json()
|
github_dkg/formatter.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""Format GitHub items as Markdown Knowledge Assets for DKG v10 Working Memory."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _label_names(labels: list[dict[str, Any]]) -> str:
|
|
9
|
+
names = [lbl.get("name", "") for lbl in labels if lbl.get("name")]
|
|
10
|
+
return ", ".join(names) if names else "none"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _username(user: dict[str, Any] | None) -> str:
|
|
14
|
+
if not user:
|
|
15
|
+
return "unknown"
|
|
16
|
+
return user.get("login", "unknown")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def format_issue(
|
|
20
|
+
issue: dict[str, Any],
|
|
21
|
+
comments: list[dict[str, Any]],
|
|
22
|
+
owner: str,
|
|
23
|
+
repo: str,
|
|
24
|
+
) -> str:
|
|
25
|
+
number = issue["number"]
|
|
26
|
+
title = issue.get("title", "")
|
|
27
|
+
author = _username(issue.get("user"))
|
|
28
|
+
labels = _label_names(issue.get("labels", []))
|
|
29
|
+
state = issue.get("state", "unknown")
|
|
30
|
+
state_reason = issue.get("state_reason") or ""
|
|
31
|
+
created = (issue.get("created_at") or "")[:10]
|
|
32
|
+
closed = (issue.get("closed_at") or "")[:10]
|
|
33
|
+
body = (issue.get("body") or "").strip()
|
|
34
|
+
url = issue.get("html_url", f"https://github.com/{owner}/{repo}/issues/{number}")
|
|
35
|
+
|
|
36
|
+
lines = [
|
|
37
|
+
f"**GitHub Issue #{number}:** {title}",
|
|
38
|
+
f"**Repository:** {owner}/{repo}",
|
|
39
|
+
f"**Author:** {author} | **Labels:** {labels} | **State:** {state}"
|
|
40
|
+
+ (f" ({state_reason})" if state_reason else ""),
|
|
41
|
+
f"**Created:** {created}" + (f" | **Closed:** {closed}" if closed else ""),
|
|
42
|
+
f"**URL:** {url}",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
if body:
|
|
46
|
+
lines += ["", "**Description:**", body]
|
|
47
|
+
|
|
48
|
+
if comments:
|
|
49
|
+
lines += ["", "**Comments:**"]
|
|
50
|
+
for c in comments:
|
|
51
|
+
commenter = _username(c.get("user"))
|
|
52
|
+
when = (c.get("created_at") or "")[:10]
|
|
53
|
+
text = (c.get("body") or "").strip()
|
|
54
|
+
if text:
|
|
55
|
+
lines += [f"- **{commenter}** ({when}): {text}"]
|
|
56
|
+
|
|
57
|
+
return "\n".join(lines)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def format_pull_request(
|
|
61
|
+
pr: dict[str, Any],
|
|
62
|
+
reviews: list[dict[str, Any]],
|
|
63
|
+
inline_comments: list[dict[str, Any]],
|
|
64
|
+
owner: str,
|
|
65
|
+
repo: str,
|
|
66
|
+
) -> str:
|
|
67
|
+
number = pr["number"]
|
|
68
|
+
title = pr.get("title", "")
|
|
69
|
+
author = _username(pr.get("user"))
|
|
70
|
+
labels = _label_names(pr.get("labels", []))
|
|
71
|
+
state = pr.get("state", "unknown")
|
|
72
|
+
draft = " (draft)" if pr.get("draft") else ""
|
|
73
|
+
created = (pr.get("created_at") or "")[:10]
|
|
74
|
+
merged = (pr.get("merged_at") or "")[:10]
|
|
75
|
+
closed = (pr.get("closed_at") or "")[:10]
|
|
76
|
+
body = (pr.get("body") or "").strip()
|
|
77
|
+
url = pr.get("html_url", f"https://github.com/{owner}/{repo}/pull/{number}")
|
|
78
|
+
|
|
79
|
+
base_ref = pr.get("base", {}).get("ref", "")
|
|
80
|
+
head_ref = pr.get("head", {}).get("ref", "")
|
|
81
|
+
branch_line = f"**Branch:** {head_ref} → {base_ref}" if base_ref or head_ref else ""
|
|
82
|
+
|
|
83
|
+
requested_reviewers = [
|
|
84
|
+
_username(r) for r in pr.get("requested_reviewers", [])
|
|
85
|
+
]
|
|
86
|
+
reviewer_str = ", ".join(requested_reviewers) if requested_reviewers else ""
|
|
87
|
+
|
|
88
|
+
lines = [
|
|
89
|
+
f"**GitHub PR #{number}:** {title}{draft}",
|
|
90
|
+
f"**Repository:** {owner}/{repo}",
|
|
91
|
+
f"**Author:** {author} | **Labels:** {labels} | **State:** {state}",
|
|
92
|
+
]
|
|
93
|
+
if reviewer_str:
|
|
94
|
+
lines.append(f"**Requested reviewers:** {reviewer_str}")
|
|
95
|
+
if branch_line:
|
|
96
|
+
lines.append(branch_line)
|
|
97
|
+
lines.append(
|
|
98
|
+
f"**Created:** {created}"
|
|
99
|
+
+ (f" | **Merged:** {merged}" if merged else "")
|
|
100
|
+
+ (f" | **Closed:** {closed}" if closed and not merged else "")
|
|
101
|
+
)
|
|
102
|
+
lines.append(f"**URL:** {url}")
|
|
103
|
+
|
|
104
|
+
if body:
|
|
105
|
+
lines += ["", "**Description:**", body]
|
|
106
|
+
|
|
107
|
+
if reviews:
|
|
108
|
+
lines += ["", "**Reviews:**"]
|
|
109
|
+
for rev in reviews:
|
|
110
|
+
reviewer = _username(rev.get("user"))
|
|
111
|
+
rev_state = rev.get("state", "")
|
|
112
|
+
rev_body = (rev.get("body") or "").strip()
|
|
113
|
+
submitted = (rev.get("submitted_at") or "")[:10]
|
|
114
|
+
summary = f"- **{reviewer}** {rev_state} ({submitted})"
|
|
115
|
+
if rev_body:
|
|
116
|
+
summary += f": {rev_body}"
|
|
117
|
+
lines.append(summary)
|
|
118
|
+
|
|
119
|
+
# Aggregate inline review comments by file path
|
|
120
|
+
if inline_comments:
|
|
121
|
+
by_path: dict[str, list[str]] = {}
|
|
122
|
+
for ic in inline_comments:
|
|
123
|
+
path = ic.get("path", "unknown")
|
|
124
|
+
commenter = _username(ic.get("user"))
|
|
125
|
+
text = (ic.get("body") or "").strip()
|
|
126
|
+
if text:
|
|
127
|
+
by_path.setdefault(path, []).append(f"{commenter}: {text}")
|
|
128
|
+
if by_path:
|
|
129
|
+
lines += ["", "**Inline review comments:**"]
|
|
130
|
+
for path, cmts in by_path.items():
|
|
131
|
+
lines.append(f"- `{path}`:")
|
|
132
|
+
for cmt in cmts:
|
|
133
|
+
lines.append(f" - {cmt}")
|
|
134
|
+
|
|
135
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""Thin async wrapper around the GitHub REST API v3."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from typing import Any, AsyncIterator
|
|
7
|
+
|
|
8
|
+
import httpx
|
|
9
|
+
|
|
10
|
+
_BASE = "https://api.github.com"
|
|
11
|
+
_PER_PAGE = 100
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class GitHubRateLimitError(RuntimeError):
|
|
15
|
+
"""Raised when the GitHub API rate limit has been exhausted.
|
|
16
|
+
|
|
17
|
+
Attributes:
|
|
18
|
+
reset_at: Unix timestamp at which the rate limit resets.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, reset_at: int | None) -> None:
|
|
22
|
+
self.reset_at = reset_at
|
|
23
|
+
msg = "GitHub API rate limit exhausted"
|
|
24
|
+
if reset_at:
|
|
25
|
+
msg += f" (resets at unix={reset_at})"
|
|
26
|
+
super().__init__(msg)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _check_rate_limit(response: httpx.Response) -> None:
|
|
30
|
+
"""Raise GitHubRateLimitError if the response indicates rate-limit exhaustion."""
|
|
31
|
+
if response.status_code in (403, 429):
|
|
32
|
+
remaining = response.headers.get("X-RateLimit-Remaining")
|
|
33
|
+
if remaining == "0":
|
|
34
|
+
reset = response.headers.get("X-RateLimit-Reset")
|
|
35
|
+
raise GitHubRateLimitError(int(reset) if reset and reset.isdigit() else None)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class GitHubClient:
|
|
39
|
+
def __init__(self, token: str | None = None, timeout: float = 30.0) -> None:
|
|
40
|
+
token = token or os.environ.get("GITHUB_TOKEN", "")
|
|
41
|
+
if not token:
|
|
42
|
+
raise ValueError(
|
|
43
|
+
"GitHub token required. Pass token= or set GITHUB_TOKEN env var."
|
|
44
|
+
)
|
|
45
|
+
self._headers = {
|
|
46
|
+
"Authorization": f"Bearer {token}",
|
|
47
|
+
"Accept": "application/vnd.github+json",
|
|
48
|
+
"X-GitHub-Api-Version": "2022-11-28",
|
|
49
|
+
}
|
|
50
|
+
self._timeout = timeout
|
|
51
|
+
|
|
52
|
+
# ------------------------------------------------------------------
|
|
53
|
+
# Issues
|
|
54
|
+
# ------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
async def list_issues(
|
|
57
|
+
self,
|
|
58
|
+
owner: str,
|
|
59
|
+
repo: str,
|
|
60
|
+
state: str = "all",
|
|
61
|
+
since: str | None = None,
|
|
62
|
+
) -> AsyncIterator[dict[str, Any]]:
|
|
63
|
+
"""Yield all issues (excluding PRs) page by page."""
|
|
64
|
+
params: dict[str, Any] = {
|
|
65
|
+
"state": state,
|
|
66
|
+
"per_page": _PER_PAGE,
|
|
67
|
+
"page": 1,
|
|
68
|
+
}
|
|
69
|
+
if since:
|
|
70
|
+
params["since"] = since
|
|
71
|
+
async for item in self._paginate(f"{_BASE}/repos/{owner}/{repo}/issues", params):
|
|
72
|
+
# GitHub returns PRs in the issues endpoint; filter them out
|
|
73
|
+
if "pull_request" not in item:
|
|
74
|
+
yield item
|
|
75
|
+
|
|
76
|
+
async def get_issue(
|
|
77
|
+
self, owner: str, repo: str, number: int
|
|
78
|
+
) -> dict[str, Any]:
|
|
79
|
+
return await self._get(f"{_BASE}/repos/{owner}/{repo}/issues/{number}")
|
|
80
|
+
|
|
81
|
+
async def list_issue_comments(
|
|
82
|
+
self, owner: str, repo: str, issue_number: int
|
|
83
|
+
) -> list[dict[str, Any]]:
|
|
84
|
+
results: list[dict[str, Any]] = []
|
|
85
|
+
params: dict[str, Any] = {"per_page": _PER_PAGE, "page": 1}
|
|
86
|
+
async for comment in self._paginate(
|
|
87
|
+
f"{_BASE}/repos/{owner}/{repo}/issues/{issue_number}/comments", params
|
|
88
|
+
):
|
|
89
|
+
results.append(comment)
|
|
90
|
+
return results
|
|
91
|
+
|
|
92
|
+
# ------------------------------------------------------------------
|
|
93
|
+
# Pull Requests
|
|
94
|
+
# ------------------------------------------------------------------
|
|
95
|
+
|
|
96
|
+
async def list_pulls(
|
|
97
|
+
self,
|
|
98
|
+
owner: str,
|
|
99
|
+
repo: str,
|
|
100
|
+
state: str = "all",
|
|
101
|
+
since: str | None = None,
|
|
102
|
+
) -> AsyncIterator[dict[str, Any]]:
|
|
103
|
+
"""Yield PRs page by page.
|
|
104
|
+
|
|
105
|
+
``since`` is an ISO 8601 timestamp. The /pulls endpoint does not support
|
|
106
|
+
a server-side ``since`` filter, so we sort by ``updated`` desc and stop
|
|
107
|
+
as soon as we see a PR older than the cutoff.
|
|
108
|
+
"""
|
|
109
|
+
params: dict[str, Any] = {
|
|
110
|
+
"state": state,
|
|
111
|
+
"per_page": _PER_PAGE,
|
|
112
|
+
"page": 1,
|
|
113
|
+
"sort": "updated",
|
|
114
|
+
"direction": "desc",
|
|
115
|
+
}
|
|
116
|
+
async for pr in self._paginate(f"{_BASE}/repos/{owner}/{repo}/pulls", params):
|
|
117
|
+
if since and (pr.get("updated_at") or "") < since:
|
|
118
|
+
return
|
|
119
|
+
yield pr
|
|
120
|
+
|
|
121
|
+
async def get_pull(
|
|
122
|
+
self, owner: str, repo: str, number: int
|
|
123
|
+
) -> dict[str, Any]:
|
|
124
|
+
return await self._get(f"{_BASE}/repos/{owner}/{repo}/pulls/{number}")
|
|
125
|
+
|
|
126
|
+
async def list_pull_reviews(
|
|
127
|
+
self, owner: str, repo: str, pull_number: int
|
|
128
|
+
) -> list[dict[str, Any]]:
|
|
129
|
+
results: list[dict[str, Any]] = []
|
|
130
|
+
params: dict[str, Any] = {"per_page": _PER_PAGE, "page": 1}
|
|
131
|
+
async for review in self._paginate(
|
|
132
|
+
f"{_BASE}/repos/{owner}/{repo}/pulls/{pull_number}/reviews", params
|
|
133
|
+
):
|
|
134
|
+
results.append(review)
|
|
135
|
+
return results
|
|
136
|
+
|
|
137
|
+
async def list_pull_comments(
|
|
138
|
+
self, owner: str, repo: str, pull_number: int
|
|
139
|
+
) -> list[dict[str, Any]]:
|
|
140
|
+
"""Inline review comments on a PR."""
|
|
141
|
+
results: list[dict[str, Any]] = []
|
|
142
|
+
params: dict[str, Any] = {"per_page": _PER_PAGE, "page": 1}
|
|
143
|
+
async for comment in self._paginate(
|
|
144
|
+
f"{_BASE}/repos/{owner}/{repo}/pulls/{pull_number}/comments", params
|
|
145
|
+
):
|
|
146
|
+
results.append(comment)
|
|
147
|
+
return results
|
|
148
|
+
|
|
149
|
+
# ------------------------------------------------------------------
|
|
150
|
+
# Repository metadata
|
|
151
|
+
# ------------------------------------------------------------------
|
|
152
|
+
|
|
153
|
+
async def get_repo(self, owner: str, repo: str) -> dict[str, Any]:
|
|
154
|
+
return await self._get(f"{_BASE}/repos/{owner}/{repo}")
|
|
155
|
+
|
|
156
|
+
# ------------------------------------------------------------------
|
|
157
|
+
# Internal
|
|
158
|
+
# ------------------------------------------------------------------
|
|
159
|
+
|
|
160
|
+
async def _get(self, url: str) -> dict[str, Any]:
|
|
161
|
+
async with httpx.AsyncClient(timeout=self._timeout) as http:
|
|
162
|
+
r = await http.get(url, headers=self._headers)
|
|
163
|
+
_check_rate_limit(r)
|
|
164
|
+
r.raise_for_status()
|
|
165
|
+
return r.json()
|
|
166
|
+
|
|
167
|
+
async def _paginate(
|
|
168
|
+
self, url: str, params: dict[str, Any]
|
|
169
|
+
) -> AsyncIterator[dict[str, Any]]:
|
|
170
|
+
async with httpx.AsyncClient(timeout=self._timeout) as http:
|
|
171
|
+
while True:
|
|
172
|
+
r = await http.get(url, headers=self._headers, params=params)
|
|
173
|
+
_check_rate_limit(r)
|
|
174
|
+
r.raise_for_status()
|
|
175
|
+
page = r.json()
|
|
176
|
+
if not page:
|
|
177
|
+
break
|
|
178
|
+
for item in page:
|
|
179
|
+
yield item
|
|
180
|
+
if len(page) < _PER_PAGE:
|
|
181
|
+
break
|
|
182
|
+
params = {**params, "page": params["page"] + 1}
|
github_dkg/ingestor.py
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
"""Orchestrates fetching from GitHub and writing to DKG v10 Working Memory."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from .client import DKGClient
|
|
10
|
+
from .formatter import format_issue, format_pull_request
|
|
11
|
+
from .github_client import GitHubClient
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class IngestResult:
|
|
16
|
+
issues_ingested: int = 0
|
|
17
|
+
pulls_ingested: int = 0
|
|
18
|
+
errors: list[str] = field(default_factory=list)
|
|
19
|
+
turn_uris: list[str] = field(default_factory=list)
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
def total(self) -> int:
|
|
23
|
+
return self.issues_ingested + self.pulls_ingested
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class GitHubDKGIngestor:
|
|
27
|
+
"""Fetch GitHub items and write them to DKG v10 Working Memory.
|
|
28
|
+
|
|
29
|
+
Each issue and PR becomes one Knowledge Asset (via /api/memory/turn).
|
|
30
|
+
All assets for a repo are scoped to a single Context Graph.
|
|
31
|
+
|
|
32
|
+
The ``github`` client is optional — required for ingest, but ``promote``
|
|
33
|
+
only touches DKG so it can be omitted there.
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
def __init__(
|
|
37
|
+
self,
|
|
38
|
+
dkg: DKGClient,
|
|
39
|
+
github: GitHubClient | None = None,
|
|
40
|
+
context_graph_id: str = "",
|
|
41
|
+
layer: str = "wm",
|
|
42
|
+
max_comments_per_issue: int = 20,
|
|
43
|
+
max_reviews_per_pr: int = 10,
|
|
44
|
+
concurrency: int = 5,
|
|
45
|
+
) -> None:
|
|
46
|
+
self._dkg = dkg
|
|
47
|
+
self._gh = github
|
|
48
|
+
self._context_graph_id = context_graph_id
|
|
49
|
+
self._layer = layer
|
|
50
|
+
self._max_comments = max_comments_per_issue
|
|
51
|
+
self._max_reviews = max_reviews_per_pr
|
|
52
|
+
self._sem = asyncio.Semaphore(concurrency)
|
|
53
|
+
|
|
54
|
+
def _require_github(self) -> GitHubClient:
|
|
55
|
+
if self._gh is None:
|
|
56
|
+
raise RuntimeError(
|
|
57
|
+
"This operation requires a GitHubClient. "
|
|
58
|
+
"Pass github=... when constructing GitHubDKGIngestor."
|
|
59
|
+
)
|
|
60
|
+
return self._gh
|
|
61
|
+
|
|
62
|
+
# ------------------------------------------------------------------
|
|
63
|
+
# Public API
|
|
64
|
+
# ------------------------------------------------------------------
|
|
65
|
+
|
|
66
|
+
async def ingest_repo(
|
|
67
|
+
self,
|
|
68
|
+
owner: str,
|
|
69
|
+
repo: str,
|
|
70
|
+
since: str | None = None,
|
|
71
|
+
include_issues: bool = True,
|
|
72
|
+
include_pulls: bool = True,
|
|
73
|
+
) -> IngestResult:
|
|
74
|
+
gh = self._require_github()
|
|
75
|
+
result = IngestResult()
|
|
76
|
+
tasks: list[Any] = []
|
|
77
|
+
|
|
78
|
+
if include_issues:
|
|
79
|
+
async for issue in gh.list_issues(owner, repo, since=since):
|
|
80
|
+
tasks.append(self._ingest_issue(owner, repo, issue, result))
|
|
81
|
+
|
|
82
|
+
if include_pulls:
|
|
83
|
+
async for pr in gh.list_pulls(owner, repo, since=since):
|
|
84
|
+
tasks.append(self._ingest_pull(owner, repo, pr, result))
|
|
85
|
+
|
|
86
|
+
await asyncio.gather(*tasks)
|
|
87
|
+
return result
|
|
88
|
+
|
|
89
|
+
async def ingest_issue(
|
|
90
|
+
self, owner: str, repo: str, issue_number: int
|
|
91
|
+
) -> dict[str, Any]:
|
|
92
|
+
"""Ingest a single issue by number. Returns the DKG turn response."""
|
|
93
|
+
gh = self._require_github()
|
|
94
|
+
issue = await gh.get_issue(owner, repo, issue_number)
|
|
95
|
+
comments = await gh.list_issue_comments(owner, repo, issue_number)
|
|
96
|
+
markdown = format_issue(issue, comments[: self._max_comments], owner, repo)
|
|
97
|
+
return await self._dkg.memory_turn(
|
|
98
|
+
context_graph_id=self._context_graph_id,
|
|
99
|
+
markdown=markdown,
|
|
100
|
+
layer=self._layer,
|
|
101
|
+
session_uri=_repo_session_uri(owner, repo),
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
async def ingest_pull(
|
|
105
|
+
self, owner: str, repo: str, pull_number: int
|
|
106
|
+
) -> dict[str, Any]:
|
|
107
|
+
"""Ingest a single PR by number. Returns the DKG turn response."""
|
|
108
|
+
gh = self._require_github()
|
|
109
|
+
pr, reviews, inline = await asyncio.gather(
|
|
110
|
+
gh.get_pull(owner, repo, pull_number),
|
|
111
|
+
gh.list_pull_reviews(owner, repo, pull_number),
|
|
112
|
+
gh.list_pull_comments(owner, repo, pull_number),
|
|
113
|
+
)
|
|
114
|
+
markdown = format_pull_request(
|
|
115
|
+
pr,
|
|
116
|
+
reviews[: self._max_reviews],
|
|
117
|
+
inline,
|
|
118
|
+
owner,
|
|
119
|
+
repo,
|
|
120
|
+
)
|
|
121
|
+
return await self._dkg.memory_turn(
|
|
122
|
+
context_graph_id=self._context_graph_id,
|
|
123
|
+
markdown=markdown,
|
|
124
|
+
layer=self._layer,
|
|
125
|
+
session_uri=_repo_session_uri(owner, repo),
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
async def promote(self, turn_uri: str) -> dict[str, Any]:
|
|
129
|
+
"""Promote a Working Memory Knowledge Asset to Shared Working Memory (SHARE)."""
|
|
130
|
+
name = turn_uri.split("/")[-1]
|
|
131
|
+
return await self._dkg.assertion_promote(
|
|
132
|
+
name=name,
|
|
133
|
+
context_graph_id=self._context_graph_id,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# ------------------------------------------------------------------
|
|
137
|
+
# Internal helpers
|
|
138
|
+
# ------------------------------------------------------------------
|
|
139
|
+
|
|
140
|
+
async def _ingest_issue(
|
|
141
|
+
self,
|
|
142
|
+
owner: str,
|
|
143
|
+
repo: str,
|
|
144
|
+
issue: dict[str, Any],
|
|
145
|
+
result: IngestResult,
|
|
146
|
+
) -> None:
|
|
147
|
+
gh = self._require_github()
|
|
148
|
+
async with self._sem:
|
|
149
|
+
try:
|
|
150
|
+
number = issue["number"]
|
|
151
|
+
comments = await gh.list_issue_comments(owner, repo, number)
|
|
152
|
+
markdown = format_issue(
|
|
153
|
+
issue, comments[: self._max_comments], owner, repo
|
|
154
|
+
)
|
|
155
|
+
resp = await self._dkg.memory_turn(
|
|
156
|
+
context_graph_id=self._context_graph_id,
|
|
157
|
+
markdown=markdown,
|
|
158
|
+
layer=self._layer,
|
|
159
|
+
session_uri=_repo_session_uri(owner, repo),
|
|
160
|
+
)
|
|
161
|
+
result.issues_ingested += 1
|
|
162
|
+
if uri := resp.get("turnUri"):
|
|
163
|
+
result.turn_uris.append(uri)
|
|
164
|
+
except Exception as exc:
|
|
165
|
+
result.errors.append(f"issue #{issue.get('number')}: {exc}")
|
|
166
|
+
|
|
167
|
+
async def _ingest_pull(
|
|
168
|
+
self,
|
|
169
|
+
owner: str,
|
|
170
|
+
repo: str,
|
|
171
|
+
pr: dict[str, Any],
|
|
172
|
+
result: IngestResult,
|
|
173
|
+
) -> None:
|
|
174
|
+
gh = self._require_github()
|
|
175
|
+
async with self._sem:
|
|
176
|
+
try:
|
|
177
|
+
number = pr["number"]
|
|
178
|
+
reviews, inline = await asyncio.gather(
|
|
179
|
+
gh.list_pull_reviews(owner, repo, number),
|
|
180
|
+
gh.list_pull_comments(owner, repo, number),
|
|
181
|
+
)
|
|
182
|
+
markdown = format_pull_request(
|
|
183
|
+
pr,
|
|
184
|
+
reviews[: self._max_reviews],
|
|
185
|
+
inline,
|
|
186
|
+
owner,
|
|
187
|
+
repo,
|
|
188
|
+
)
|
|
189
|
+
resp = await self._dkg.memory_turn(
|
|
190
|
+
context_graph_id=self._context_graph_id,
|
|
191
|
+
markdown=markdown,
|
|
192
|
+
layer=self._layer,
|
|
193
|
+
session_uri=_repo_session_uri(owner, repo),
|
|
194
|
+
)
|
|
195
|
+
result.pulls_ingested += 1
|
|
196
|
+
if uri := resp.get("turnUri"):
|
|
197
|
+
result.turn_uris.append(uri)
|
|
198
|
+
except Exception as exc:
|
|
199
|
+
result.errors.append(f"PR #{pr.get('number')}: {exc}")
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _repo_session_uri(owner: str, repo: str) -> str:
|
|
203
|
+
return f"https://github.com/{owner}/{repo}"
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: github-dkg
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Ingest GitHub issues, PRs, and review comments into DKG v10 Working Memory
|
|
5
|
+
Project-URL: Repository, https://github.com/haroldboom/github-dkg
|
|
6
|
+
License: MIT
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Requires-Dist: click>=8.1
|
|
10
|
+
Requires-Dist: httpx>=0.27
|
|
11
|
+
Provides-Extra: dev
|
|
12
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
|
|
13
|
+
Requires-Dist: pytest>=8; extra == 'dev'
|
|
14
|
+
Requires-Dist: respx>=0.21; extra == 'dev'
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
|
|
17
|
+
# github-dkg
|
|
18
|
+
|
|
19
|
+
Ingest GitHub issues, pull requests, and review comments into [DKG v10](https://docs.origintrail.io) Working Memory as Knowledge Assets.
|
|
20
|
+
|
|
21
|
+
Every issue and PR becomes a queryable, attributable Knowledge Asset in your DKG v10 node. Key decisions can be promoted to Shared Working Memory — making your team's engineering knowledge accessible to agents.
|
|
22
|
+
|
|
23
|
+
## Demo
|
|
24
|
+
|
|
25
|
+
- **Walkthrough notebook:** [`demo.ipynb`](demo.ipynb) — runs end-to-end against a built-in mock of GitHub and the DKG node, no tokens required. Open in [Colab](https://colab.research.google.com/github/haroldboom/github-dkg/blob/master/demo.ipynb).
|
|
26
|
+
- **Live recording script:** [`examples/demo_video.py`](examples/demo_video.py) — drives all three demos against a real DKG node and the GitHub API; this is the script behind the bounty walkthrough video.
|
|
27
|
+
|
|
28
|
+
## Install
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install github-dkg
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Quickstart
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
export DKG_TOKEN=your-dkg-token
|
|
38
|
+
export DKG_BASE_URL=http://localhost:9200
|
|
39
|
+
export DKG_CONTEXT_GRAPH=your-context-graph-id
|
|
40
|
+
export GITHUB_TOKEN=your-github-token
|
|
41
|
+
|
|
42
|
+
# Bulk-ingest all issues and PRs from a repository
|
|
43
|
+
github-dkg ingest owner/repo --context-graph $DKG_CONTEXT_GRAPH
|
|
44
|
+
|
|
45
|
+
# Ingest a single issue
|
|
46
|
+
github-dkg ingest-one owner/repo 42 --type issue --context-graph $DKG_CONTEXT_GRAPH
|
|
47
|
+
|
|
48
|
+
# Ingest a single PR
|
|
49
|
+
github-dkg ingest-one owner/repo 99 --type pr --context-graph $DKG_CONTEXT_GRAPH
|
|
50
|
+
|
|
51
|
+
# Search ingested knowledge
|
|
52
|
+
github-dkg search "authentication bug" --context-graph $DKG_CONTEXT_GRAPH
|
|
53
|
+
|
|
54
|
+
# Promote a Working Memory asset to Shared Working Memory (SHARE)
|
|
55
|
+
github-dkg promote dkg://wm/turn/abc123 --context-graph $DKG_CONTEXT_GRAPH
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## GitHub Action
|
|
59
|
+
|
|
60
|
+
Automatically ingest issues and PRs as they are created or updated. Add to `.github/workflows/dkg-ingest.yml`:
|
|
61
|
+
|
|
62
|
+
```yaml
|
|
63
|
+
on:
|
|
64
|
+
issues:
|
|
65
|
+
types: [opened, edited, closed]
|
|
66
|
+
pull_request:
|
|
67
|
+
types: [opened, edited, closed]
|
|
68
|
+
pull_request_review:
|
|
69
|
+
types: [submitted]
|
|
70
|
+
|
|
71
|
+
jobs:
|
|
72
|
+
ingest:
|
|
73
|
+
runs-on: ubuntu-latest
|
|
74
|
+
steps:
|
|
75
|
+
- uses: haroldboom/github-dkg@v0.1.0
|
|
76
|
+
id: ingest
|
|
77
|
+
with:
|
|
78
|
+
dkg-token: ${{ secrets.DKG_TOKEN }}
|
|
79
|
+
dkg-base-url: ${{ secrets.DKG_BASE_URL }}
|
|
80
|
+
dkg-context-graph: ${{ secrets.DKG_CONTEXT_GRAPH }}
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
See `examples/workflow.yml` for a complete example including automatic promotion of architecture-decision PRs to Shared Working Memory.
|
|
84
|
+
|
|
85
|
+
## Python API
|
|
86
|
+
|
|
87
|
+
```python
|
|
88
|
+
import asyncio
|
|
89
|
+
from github_dkg import DKGClient, GitHubClient, GitHubDKGIngestor
|
|
90
|
+
|
|
91
|
+
async def main():
|
|
92
|
+
dkg = DKGClient(base_url="http://localhost:9200", token="your-token")
|
|
93
|
+
gh = GitHubClient(token="your-github-token")
|
|
94
|
+
ingestor = GitHubDKGIngestor(dkg=dkg, github=gh, context_graph_id="cg-123")
|
|
95
|
+
|
|
96
|
+
# Bulk ingest
|
|
97
|
+
result = await ingestor.ingest_repo("owner", "repo", since="2024-01-01")
|
|
98
|
+
print(f"Ingested {result.total} items ({len(result.errors)} errors)")
|
|
99
|
+
|
|
100
|
+
# Single item
|
|
101
|
+
resp = await ingestor.ingest_issue("owner", "repo", 42)
|
|
102
|
+
print(f"Turn URI: {resp['turnUri']}")
|
|
103
|
+
|
|
104
|
+
# Promote to Shared Working Memory
|
|
105
|
+
await ingestor.promote(resp["turnUri"])
|
|
106
|
+
|
|
107
|
+
asyncio.run(main())
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## `--since` filtering
|
|
111
|
+
|
|
112
|
+
`--since` accepts an ISO 8601 timestamp and limits ingest to items updated after that point.
|
|
113
|
+
|
|
114
|
+
- **Issues:** filtered server-side by GitHub via the `since` parameter on `/issues`.
|
|
115
|
+
- **Pull requests:** GitHub's `/pulls` endpoint has no `since` filter, so the package requests `sort=updated&direction=desc` and stops paginating once results fall below the cutoff. Net result: only PRs touched after `--since` are fetched and ingested.
|
|
116
|
+
|
|
117
|
+
Comment-only updates (a new comment without an issue/PR body edit) still bump `updated_at`, so they're included.
|
|
118
|
+
|
|
119
|
+
## Rate limiting
|
|
120
|
+
|
|
121
|
+
`GitHubClient` raises `github_dkg.github_client.GitHubRateLimitError` when GitHub returns `403`/`429` with `X-RateLimit-Remaining: 0`. The exception carries `reset_at` (unix timestamp) so callers can decide whether to back off, sleep, or fail. Authenticated tokens get 5,000 requests/hour; bulk-ingesting a large repo with many comment-heavy PRs can approach this limit.
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
from github_dkg.github_client import GitHubRateLimitError
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
result = await ingestor.ingest_repo("OriginTrail", "dkg-v9")
|
|
128
|
+
except GitHubRateLimitError as e:
|
|
129
|
+
print(f"Rate limited; resets at unix={e.reset_at}")
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## Memory layers
|
|
133
|
+
|
|
134
|
+
| Layer | Flag | Visibility |
|
|
135
|
+
|---|---|---|
|
|
136
|
+
| Working Memory | `--layer wm` (default) | Private to your node |
|
|
137
|
+
| Shared Working Memory | `--layer swm` | Gossiped across the paranet |
|
|
138
|
+
|
|
139
|
+
Promotion from Working Memory to Shared Working Memory is always explicit — nothing is shared automatically.
|
|
140
|
+
|
|
141
|
+
## License
|
|
142
|
+
|
|
143
|
+
MIT
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
github_dkg/__init__.py,sha256=pInaVjlLzKX6l_tq0wkik3cQDBrBgyYtOsB2XmgkuHE,281
|
|
2
|
+
github_dkg/cli.py,sha256=tJulW7owe6O0Ex1xTlLRQjRWluFWSORb1KAbpiZOiBM,6391
|
|
3
|
+
github_dkg/client.py,sha256=B_o3XielJTQSV1_fE6TPD1SuQrdQXiQZ372V8b_-IDE,4424
|
|
4
|
+
github_dkg/formatter.py,sha256=II0vHLby0o0RoXWiRb7nI1-8zPdYFp-2lnC7020CVhk,4636
|
|
5
|
+
github_dkg/github_client.py,sha256=9G85qBDAZJMRJtg9GsMUlquwC8egM6pvLbem5VMVu24,6505
|
|
6
|
+
github_dkg/ingestor.py,sha256=xESZxR4ehXEv38ULeWSdXPzykIJvrnmlwgAn7oLRjXQ,7062
|
|
7
|
+
github_dkg-0.1.0.dist-info/METADATA,sha256=260qj5RC7ZtTwes54yyhXQPhrVQ4nqhUVF0gxgxMgD4,5128
|
|
8
|
+
github_dkg-0.1.0.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
9
|
+
github_dkg-0.1.0.dist-info/entry_points.txt,sha256=RRzXzSjXBtEKMnfQ2oLMLfrTv4h8hXUXbOVhgF7COdU,51
|
|
10
|
+
github_dkg-0.1.0.dist-info/licenses/LICENSE,sha256=Btzdu2kIoMbdSp6OyCLupB1aRgpTCJ_szMimgEnpkkE,1056
|
|
11
|
+
github_dkg-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|