kestrel-feature-github 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,541 @@
1
+ """GitHub API client for repository access."""
2
+ import base64
3
+ import logging
4
+ import os
5
+ from typing import Optional
6
+ from urllib.parse import quote
7
+
8
+ import httpx
9
+
10
+ from .models import FileContent, FileType, RepoFile, SearchResult
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ # Default HTTP timeout in seconds
15
+ HTTP_TIMEOUT_DEFAULT = 30
16
+
17
+
18
+ class GitHubClientError(Exception):
19
+ """Error from GitHub API."""
20
+ def __init__(self, message: str, status_code: int = 0):
21
+ super().__init__(message)
22
+ self.status_code = status_code
23
+
24
+
25
+ class GitHubClient:
26
+ """Client for GitHub REST API."""
27
+
28
+ BASE_URL = "https://api.github.com"
29
+
30
+ def __init__(
31
+ self,
32
+ token: Optional[str] = None,
33
+ key_resolver: Optional["KeyResolutionService"] = None,
34
+ ):
35
+ """Initialize with optional token.
36
+
37
+ If no token is provided, the client will be created in a limited mode
38
+ where all API calls return an error explaining that GITHUB_PAT is required.
39
+ This allows the application to start without GITHUB_PAT configured.
40
+
41
+ Args:
42
+ token: GitHub Personal Access Token
43
+ key_resolver: Optional KeyResolutionService for dynamic key resolution
44
+ """
45
+ self._key_resolver = key_resolver
46
+ self.token = token or os.getenv("GITHUB_PAT") or os.getenv("GITHUB_TOKEN") or os.getenv("GH_TOKEN")
47
+ self._configured = bool(self.token)
48
+
49
+ if self._configured:
50
+ self.headers = {
51
+ "Accept": "application/vnd.github.v3+json",
52
+ "Authorization": f"Bearer {self.token}",
53
+ "X-GitHub-Api-Version": "2022-11-28",
54
+ }
55
+ else:
56
+ self.headers = {}
57
+ logger.warning("GitHub client initialized without token - GitHub features will be unavailable")
58
+
59
+ self._client: Optional[httpx.AsyncClient] = None
60
+
61
+ async def _ensure_configured(self):
62
+ """Ensure client is configured, using key resolver if available."""
63
+ if self._configured:
64
+ return
65
+
66
+ # Try key resolver
67
+ if self._key_resolver:
68
+ try:
69
+ token = await self._key_resolver.resolve_key("github", require=False)
70
+ if token:
71
+ self.token = token
72
+ self._configured = True
73
+ self.headers = {
74
+ "Accept": "application/vnd.github.v3+json",
75
+ "Authorization": f"Bearer {token}",
76
+ "X-GitHub-Api-Version": "2022-11-28",
77
+ }
78
+ # Reset client to pick up new headers
79
+ if self._client and not self._client.is_closed:
80
+ await self._client.aclose()
81
+ self._client = None
82
+ logger.info("GitHub client configured via key resolver")
83
+ return
84
+ except Exception as e:
85
+ logger.warning(f"Key resolver failed for GitHub: {e}")
86
+
87
+ def _check_configured(self):
88
+ """Check if client is properly configured with a token."""
89
+ if not self._configured:
90
+ raise GitHubClientError(
91
+ "GitHub feature not available: No GITHUB_PAT, GITHUB_TOKEN, or GH_TOKEN environment variable set. "
92
+ "Contact your administrator to enable GitHub integration.",
93
+ status_code=503
94
+ )
95
+
96
+ async def _get_client(self) -> httpx.AsyncClient:
97
+ """Get or create HTTP client."""
98
+ await self._ensure_configured()
99
+ self._check_configured()
100
+ if self._client is None or self._client.is_closed:
101
+ self._client = httpx.AsyncClient(
102
+ base_url=self.BASE_URL,
103
+ headers=self.headers,
104
+ timeout=HTTP_TIMEOUT_DEFAULT,
105
+ )
106
+ return self._client
107
+
108
+ async def close(self):
109
+ """Close the HTTP client."""
110
+ if self._client and not self._client.is_closed:
111
+ await self._client.aclose()
112
+ self._client = None
113
+
114
+ def _parse_repo(self, repo: str) -> tuple[str, str]:
115
+ """Parse owner/repo string."""
116
+ if "/" not in repo:
117
+ raise GitHubClientError(f"Invalid repo format: {repo}. Expected 'owner/repo'.")
118
+ parts = repo.split("/", 1)
119
+ return parts[0], parts[1]
120
+
121
+ async def get_file_content(
122
+ self,
123
+ repo: str,
124
+ path: str,
125
+ ref: str = "main",
126
+ ) -> FileContent:
127
+ """Get content of a file from repository.
128
+
129
+ Args:
130
+ repo: Repository in 'owner/repo' format
131
+ path: Path to file within repository
132
+ ref: Branch, tag, or commit SHA
133
+
134
+ Returns:
135
+ FileContent with decoded content
136
+ """
137
+ owner, repo_name = self._parse_repo(repo)
138
+ client = await self._get_client()
139
+
140
+ # URL encode the path
141
+ encoded_path = quote(path, safe="")
142
+ url = f"/repos/{owner}/{repo_name}/contents/{encoded_path}"
143
+
144
+ response = await client.get(url, params={"ref": ref})
145
+
146
+ if response.status_code == 404:
147
+ raise GitHubClientError(f"File not found: {path} in {repo}", 404)
148
+ elif response.status_code == 403:
149
+ raise GitHubClientError("Rate limited or access denied", 403)
150
+ elif response.status_code != 200:
151
+ raise GitHubClientError(f"GitHub API error: {response.text}", response.status_code)
152
+
153
+ data = response.json()
154
+
155
+ if data.get("type") != "file":
156
+ raise GitHubClientError(f"Path is not a file: {path}")
157
+
158
+ # Decode base64 content
159
+ content_b64 = data.get("content", "")
160
+ try:
161
+ content = base64.b64decode(content_b64).decode("utf-8")
162
+ except Exception as e:
163
+ raise GitHubClientError(f"Failed to decode file content: {e}")
164
+
165
+ return FileContent(
166
+ path=path,
167
+ content=content,
168
+ sha=data.get("sha", ""),
169
+ size=data.get("size", 0),
170
+ encoding="utf-8",
171
+ repo=repo,
172
+ ref=ref,
173
+ )
174
+
175
+ async def list_directory(
176
+ self,
177
+ repo: str,
178
+ path: str = "",
179
+ ref: str = "main",
180
+ ) -> list[RepoFile]:
181
+ """List contents of a directory.
182
+
183
+ Args:
184
+ repo: Repository in 'owner/repo' format
185
+ path: Path to directory (empty for root)
186
+ ref: Branch, tag, or commit SHA
187
+
188
+ Returns:
189
+ List of files and directories
190
+ """
191
+ owner, repo_name = self._parse_repo(repo)
192
+ client = await self._get_client()
193
+
194
+ # URL encode the path
195
+ encoded_path = quote(path, safe="") if path else ""
196
+ url = f"/repos/{owner}/{repo_name}/contents/{encoded_path}"
197
+
198
+ response = await client.get(url, params={"ref": ref})
199
+
200
+ if response.status_code == 404:
201
+ raise GitHubClientError(f"Path not found: {path} in {repo}", 404)
202
+ elif response.status_code != 200:
203
+ raise GitHubClientError(f"GitHub API error: {response.text}", response.status_code)
204
+
205
+ data = response.json()
206
+
207
+ # Handle single file case
208
+ if isinstance(data, dict):
209
+ return [self._parse_repo_file(data)]
210
+
211
+ # Parse directory listing
212
+ return [self._parse_repo_file(item) for item in data]
213
+
214
+ def _parse_repo_file(self, data: dict) -> RepoFile:
215
+ """Parse API response into RepoFile."""
216
+ file_type = FileType.FILE
217
+ type_str = data.get("type", "file")
218
+ if type_str == "dir":
219
+ file_type = FileType.DIR
220
+ elif type_str == "symlink":
221
+ file_type = FileType.SYMLINK
222
+ elif type_str == "submodule":
223
+ file_type = FileType.SUBMODULE
224
+
225
+ return RepoFile(
226
+ path=data.get("path", ""),
227
+ name=data.get("name", ""),
228
+ type=file_type,
229
+ size=data.get("size", 0),
230
+ sha=data.get("sha", ""),
231
+ download_url=data.get("download_url"),
232
+ )
233
+
234
+ async def search_code(
235
+ self,
236
+ query: str,
237
+ repo: Optional[str] = None,
238
+ path: Optional[str] = None,
239
+ extension: Optional[str] = None,
240
+ max_results: int = 30,
241
+ ) -> list[SearchResult]:
242
+ """Search for code in repositories.
243
+
244
+ Args:
245
+ query: Search query
246
+ repo: Limit to specific repo (owner/repo format)
247
+ path: Limit to path prefix
248
+ extension: Limit to file extension
249
+ max_results: Maximum results to return
250
+
251
+ Returns:
252
+ List of search results
253
+ """
254
+ client = await self._get_client()
255
+
256
+ # Build search query
257
+ q_parts = [query]
258
+ if repo:
259
+ q_parts.append(f"repo:{repo}")
260
+ if path:
261
+ q_parts.append(f"path:{path}")
262
+ if extension:
263
+ q_parts.append(f"extension:{extension}")
264
+
265
+ q = " ".join(q_parts)
266
+
267
+ response = await client.get(
268
+ "/search/code",
269
+ params={
270
+ "q": q,
271
+ "per_page": min(max_results, 100),
272
+ },
273
+ headers={
274
+ **self.headers,
275
+ "Accept": "application/vnd.github.text-match+json",
276
+ },
277
+ )
278
+
279
+ if response.status_code == 403:
280
+ raise GitHubClientError("Rate limited or access denied", 403)
281
+ elif response.status_code == 422:
282
+ raise GitHubClientError(f"Invalid search query: {query}", 422)
283
+ elif response.status_code != 200:
284
+ raise GitHubClientError(f"GitHub API error: {response.text}", response.status_code)
285
+
286
+ data = response.json()
287
+ items = data.get("items", [])
288
+
289
+ results = []
290
+ for item in items[:max_results]:
291
+ repo_info = item.get("repository", {})
292
+ results.append(SearchResult(
293
+ path=item.get("path", ""),
294
+ repo=repo_info.get("full_name", ""),
295
+ name=item.get("name", ""),
296
+ sha=item.get("sha", ""),
297
+ score=item.get("score", 0.0),
298
+ html_url=item.get("html_url", ""),
299
+ text_matches=item.get("text_matches", []),
300
+ ))
301
+
302
+ return results
303
+
304
+ async def get_repo_info(self, repo: str) -> dict:
305
+ """Get repository metadata.
306
+
307
+ Args:
308
+ repo: Repository in 'owner/repo' format
309
+
310
+ Returns:
311
+ Repository metadata dict
312
+ """
313
+ owner, repo_name = self._parse_repo(repo)
314
+ client = await self._get_client()
315
+
316
+ response = await client.get(f"/repos/{owner}/{repo_name}")
317
+
318
+ if response.status_code == 404:
319
+ raise GitHubClientError(f"Repository not found: {repo}", 404)
320
+ elif response.status_code != 200:
321
+ raise GitHubClientError(f"GitHub API error: {response.text}", response.status_code)
322
+
323
+ return response.json()
324
+
325
+ async def get_tree(
326
+ self,
327
+ repo: str,
328
+ ref: str = "main",
329
+ recursive: bool = True,
330
+ ) -> list[RepoFile]:
331
+ """Get full repository tree.
332
+
333
+ More efficient than recursive list_directory calls.
334
+
335
+ Args:
336
+ repo: Repository in 'owner/repo' format
337
+ ref: Branch, tag, or commit SHA
338
+ recursive: Whether to get full tree recursively
339
+
340
+ Returns:
341
+ List of all files in repository
342
+ """
343
+ owner, repo_name = self._parse_repo(repo)
344
+ client = await self._get_client()
345
+
346
+ url = f"/repos/{owner}/{repo_name}/git/trees/{ref}"
347
+ params = {"recursive": "1"} if recursive else {}
348
+
349
+ response = await client.get(url, params=params)
350
+
351
+ if response.status_code == 404:
352
+ raise GitHubClientError(f"Tree not found: {ref} in {repo}", 404)
353
+ elif response.status_code != 200:
354
+ raise GitHubClientError(f"GitHub API error: {response.text}", response.status_code)
355
+
356
+ data = response.json()
357
+ tree = data.get("tree", [])
358
+
359
+ results = []
360
+ for item in tree:
361
+ file_type = FileType.FILE if item.get("type") == "blob" else FileType.DIR
362
+ if item.get("type") == "commit":
363
+ file_type = FileType.SUBMODULE
364
+
365
+ results.append(RepoFile(
366
+ path=item.get("path", ""),
367
+ name=item.get("path", "").split("/")[-1],
368
+ type=file_type,
369
+ size=item.get("size", 0),
370
+ sha=item.get("sha", ""),
371
+ ))
372
+
373
+ return results
374
+
375
+ async def create_issue(
376
+ self,
377
+ repo: str,
378
+ title: str,
379
+ body: str,
380
+ labels: Optional[list[str]] = None,
381
+ assignees: Optional[list[str]] = None,
382
+ ) -> dict:
383
+ """Create a GitHub issue.
384
+
385
+ Args:
386
+ repo: Repository in 'owner/repo' format
387
+ title: Issue title
388
+ body: Issue body (markdown supported)
389
+ labels: Optional list of label names
390
+ assignees: Optional list of GitHub usernames to assign
391
+
392
+ Returns:
393
+ Created issue data including 'html_url', 'number', 'id'
394
+
395
+ Raises:
396
+ GitHubClientError: If issue creation fails
397
+ """
398
+ owner, repo_name = self._parse_repo(repo)
399
+ client = await self._get_client()
400
+
401
+ data = {"title": title, "body": body}
402
+ if labels:
403
+ data["labels"] = labels
404
+ if assignees:
405
+ data["assignees"] = assignees
406
+
407
+ response = await client.post(
408
+ f"/repos/{owner}/{repo_name}/issues",
409
+ json=data,
410
+ )
411
+
412
+ if response.status_code == 201:
413
+ result = response.json()
414
+ logger.info(f"Created issue #{result.get('number')} in {repo}: {title}")
415
+ return result
416
+ elif response.status_code == 403:
417
+ raise GitHubClientError("Rate limited or access denied", 403)
418
+ elif response.status_code == 404:
419
+ raise GitHubClientError(f"Repository not found or no permission: {repo}", 404)
420
+ elif response.status_code == 410:
421
+ raise GitHubClientError("Issues are disabled for this repository", 410)
422
+ elif response.status_code == 422:
423
+ raise GitHubClientError(f"Validation failed: {response.text}", 422)
424
+ else:
425
+ raise GitHubClientError(f"Issue creation failed: {response.text}", response.status_code)
426
+
427
+ async def list_issues(
428
+ self,
429
+ repo: str,
430
+ state: str = "open",
431
+ labels: Optional[list[str]] = None,
432
+ per_page: int = 30,
433
+ ) -> list[dict]:
434
+ """List issues in a repository.
435
+
436
+ Args:
437
+ repo: Repository in 'owner/repo' format
438
+ state: Issue state filter ('open', 'closed', 'all')
439
+ labels: Optional list of label names to filter by
440
+ per_page: Number of results per page (max 100)
441
+
442
+ Returns:
443
+ List of issue dicts (excludes pull requests)
444
+
445
+ Raises:
446
+ GitHubClientError: If request fails
447
+ """
448
+ owner, repo_name = self._parse_repo(repo)
449
+ client = await self._get_client()
450
+
451
+ params = {
452
+ "state": state,
453
+ "per_page": min(per_page, 100),
454
+ }
455
+ if labels:
456
+ params["labels"] = ",".join(labels)
457
+
458
+ response = await client.get(
459
+ f"/repos/{owner}/{repo_name}/issues",
460
+ params=params,
461
+ )
462
+
463
+ if response.status_code == 404:
464
+ raise GitHubClientError(f"Repository not found: {repo}", 404)
465
+ elif response.status_code == 403:
466
+ raise GitHubClientError("Rate limited or access denied", 403)
467
+ elif response.status_code != 200:
468
+ raise GitHubClientError(f"GitHub API error: {response.text}", response.status_code)
469
+
470
+ # GitHub's issues endpoint also returns PRs; filter them out
471
+ items = response.json()
472
+ return [i for i in items if "pull_request" not in i]
473
+
474
+ async def get_issue(
475
+ self,
476
+ repo: str,
477
+ issue_number: int,
478
+ ) -> dict:
479
+ """Get a specific issue by number.
480
+
481
+ Args:
482
+ repo: Repository in 'owner/repo' format
483
+ issue_number: Issue number
484
+
485
+ Returns:
486
+ Issue data dict
487
+
488
+ Raises:
489
+ GitHubClientError: If request fails
490
+ """
491
+ owner, repo_name = self._parse_repo(repo)
492
+ client = await self._get_client()
493
+
494
+ response = await client.get(
495
+ f"/repos/{owner}/{repo_name}/issues/{issue_number}",
496
+ )
497
+
498
+ if response.status_code == 404:
499
+ raise GitHubClientError(f"Issue #{issue_number} not found in {repo}", 404)
500
+ elif response.status_code == 403:
501
+ raise GitHubClientError("Rate limited or access denied", 403)
502
+ elif response.status_code != 200:
503
+ raise GitHubClientError(f"GitHub API error: {response.text}", response.status_code)
504
+
505
+ return response.json()
506
+
507
+ async def get_issue_comments(
508
+ self,
509
+ repo: str,
510
+ issue_number: int,
511
+ per_page: int = 30,
512
+ ) -> list[dict]:
513
+ """Get comments on an issue.
514
+
515
+ Args:
516
+ repo: Repository in 'owner/repo' format
517
+ issue_number: Issue number
518
+ per_page: Number of results per page (max 100)
519
+
520
+ Returns:
521
+ List of comment dicts
522
+
523
+ Raises:
524
+ GitHubClientError: If request fails
525
+ """
526
+ owner, repo_name = self._parse_repo(repo)
527
+ client = await self._get_client()
528
+
529
+ response = await client.get(
530
+ f"/repos/{owner}/{repo_name}/issues/{issue_number}/comments",
531
+ params={"per_page": min(per_page, 100)},
532
+ )
533
+
534
+ if response.status_code == 404:
535
+ raise GitHubClientError(f"Issue #{issue_number} not found in {repo}", 404)
536
+ elif response.status_code == 403:
537
+ raise GitHubClientError("Rate limited or access denied", 403)
538
+ elif response.status_code != 200:
539
+ raise GitHubClientError(f"GitHub API error: {response.text}", response.status_code)
540
+
541
+ return response.json()