devs-webhook 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,113 @@
1
+ """Content-based deduplication cache."""
2
+
3
+ import time
4
+ from typing import Dict, Tuple
5
+ import structlog
6
+
7
+ logger = structlog.get_logger()
8
+
9
+
10
+ class DeduplicationCache:
11
+ """In-memory cache for content-based deduplication with TTL."""
12
+
13
+ def __init__(self, ttl_seconds: int = 300): # 5 minutes default
14
+ """Initialize deduplication cache.
15
+
16
+ Args:
17
+ ttl_seconds: Time-to-live for cache entries in seconds
18
+ """
19
+ self.ttl_seconds = ttl_seconds
20
+ self._cache: Dict[str, Tuple[float, str]] = {} # hash -> (timestamp, description)
21
+
22
+ def is_duplicate(self, content_hash: str, description: str = "") -> bool:
23
+ """Check if content hash was recently processed.
24
+
25
+ Args:
26
+ content_hash: Hash of the content to check
27
+ description: Optional description for logging
28
+
29
+ Returns:
30
+ True if this is a duplicate within the TTL window
31
+ """
32
+ current_time = time.time()
33
+
34
+ # Clean expired entries
35
+ self._cleanup_expired(current_time)
36
+
37
+ if content_hash in self._cache:
38
+ cached_time, cached_desc = self._cache[content_hash]
39
+ age_seconds = current_time - cached_time
40
+
41
+ logger.info("Duplicate content detected",
42
+ content_hash=content_hash,
43
+ age_seconds=round(age_seconds, 1),
44
+ ttl_seconds=self.ttl_seconds,
45
+ description=description,
46
+ cached_description=cached_desc,
47
+ is_duplicate=True)
48
+ return True
49
+
50
+ # Not a duplicate - add to cache
51
+ self._cache[content_hash] = (current_time, description)
52
+
53
+ logger.info("New content hash cached",
54
+ content_hash=content_hash,
55
+ cache_size=len(self._cache),
56
+ description=description,
57
+ is_duplicate=False)
58
+
59
+ return False
60
+
61
+ def _cleanup_expired(self, current_time: float) -> None:
62
+ """Remove expired entries from cache."""
63
+ expired_keys = [
64
+ key for key, (timestamp, _) in self._cache.items()
65
+ if current_time - timestamp > self.ttl_seconds
66
+ ]
67
+
68
+ for key in expired_keys:
69
+ del self._cache[key]
70
+
71
+ if expired_keys:
72
+ logger.debug("Cleaned up expired cache entries",
73
+ expired_count=len(expired_keys),
74
+ remaining_count=len(self._cache))
75
+
76
+ def get_stats(self) -> Dict[str, int]:
77
+ """Get cache statistics."""
78
+ current_time = time.time()
79
+ valid_entries = sum(
80
+ 1 for timestamp, _ in self._cache.values()
81
+ if current_time - timestamp <= self.ttl_seconds
82
+ )
83
+
84
+ return {
85
+ "total_entries": len(self._cache),
86
+ "valid_entries": valid_entries,
87
+ "ttl_seconds": self.ttl_seconds
88
+ }
89
+
90
+ def clear(self) -> None:
91
+ """Clear all cache entries."""
92
+ cleared_count = len(self._cache)
93
+ self._cache.clear()
94
+ logger.info("Deduplication cache cleared", cleared_count=cleared_count)
95
+
96
+
97
+ # Global cache instance
98
+ _global_cache = DeduplicationCache()
99
+
100
+
101
+ def is_duplicate_content(content_hash: str, description: str = "") -> bool:
102
+ """Check if content hash is a duplicate using global cache."""
103
+ return _global_cache.is_duplicate(content_hash, description)
104
+
105
+
106
+ def get_cache_stats() -> Dict[str, int]:
107
+ """Get global cache statistics."""
108
+ return _global_cache.get_stats()
109
+
110
+
111
+ def clear_cache() -> None:
112
+ """Clear global cache."""
113
+ _global_cache.clear()
@@ -0,0 +1,197 @@
1
+ """Repository management for webhook handler."""
2
+
3
+ import asyncio
4
+ from pathlib import Path
5
+ from typing import Optional, Dict
6
+ import structlog
7
+
8
+ from ..config import get_config
9
+ from ..github.client import GitHubClient
10
+ from ..utils.async_utils import run_git_async
11
+
12
+ logger = structlog.get_logger()
13
+
14
+
15
+ class RepositoryManager:
16
+ """Manages repository cloning and caching for webhook tasks."""
17
+
18
+ def __init__(self):
19
+ """Initialize repository manager."""
20
+ self.config = get_config()
21
+
22
+ self.github_client = GitHubClient(self.config)
23
+
24
+ # Track repository status
25
+ self.repo_locks: Dict[str, asyncio.Lock] = {}
26
+
27
+ logger.info("Repository manager initialized",
28
+ cache_dir=str(self.config.repo_cache_dir))
29
+
30
+ async def ensure_repository(
31
+ self,
32
+ repo_name: str,
33
+ clone_url: str
34
+ ) -> Optional[Path]:
35
+ """Ensure repository is available locally and up to date.
36
+
37
+ Args:
38
+ repo_name: Repository name in format "owner/repo"
39
+ clone_url: Repository clone URL
40
+
41
+ Returns:
42
+ Path to local repository or None if failed
43
+ """
44
+ # Get or create lock for this repository
45
+ if repo_name not in self.repo_locks:
46
+ self.repo_locks[repo_name] = asyncio.Lock()
47
+
48
+ async with self.repo_locks[repo_name]:
49
+ return await self._ensure_repository_locked(repo_name, clone_url)
50
+
51
+ async def _ensure_repository_locked(
52
+ self,
53
+ repo_name: str,
54
+ clone_url: str
55
+ ) -> Optional[Path]:
56
+ """Ensure repository is available (called with lock held).
57
+
58
+ Args:
59
+ repo_name: Repository name
60
+ clone_url: Repository clone URL
61
+
62
+ Returns:
63
+ Path to local repository or None if failed
64
+ """
65
+ # Calculate local path
66
+ repo_dir = self.config.repo_cache_dir / repo_name.replace("/", "-")
67
+
68
+ try:
69
+ if repo_dir.exists():
70
+ # Repository exists, update it
71
+ logger.info("Updating existing repository",
72
+ repo=repo_name, path=str(repo_dir))
73
+
74
+ success = await self._update_repository(repo_dir)
75
+ if success:
76
+ return repo_dir
77
+ else:
78
+ # Update failed, try to reclone
79
+ logger.warning("Update failed, recloning repository",
80
+ repo=repo_name)
81
+ await self._remove_repository(repo_dir)
82
+
83
+ # Clone repository
84
+ logger.info("Cloning repository",
85
+ repo=repo_name, path=str(repo_dir))
86
+
87
+ success = await self.github_client.clone_repository(
88
+ repo_name, repo_dir
89
+ )
90
+
91
+ if success:
92
+ return repo_dir
93
+ else:
94
+ return None
95
+
96
+ except Exception as e:
97
+ logger.error("Failed to ensure repository",
98
+ repo=repo_name,
99
+ error=str(e))
100
+ return None
101
+
102
+ async def _update_repository(self, repo_dir: Path) -> bool:
103
+ """Update an existing repository.
104
+
105
+ Args:
106
+ repo_dir: Path to repository directory
107
+
108
+ Returns:
109
+ True if update successful
110
+ """
111
+ try:
112
+ # Fetch all remotes using async git
113
+ success, _, stderr = await run_git_async(
114
+ ["fetch", "--all"],
115
+ str(repo_dir)
116
+ )
117
+
118
+ if not success:
119
+ logger.warning("Git fetch failed",
120
+ path=str(repo_dir),
121
+ error=stderr)
122
+ return False
123
+
124
+ # Reset to origin/main or origin/master
125
+ for branch in ["main", "master"]:
126
+ success, _, _ = await run_git_async(
127
+ ["reset", "--hard", f"origin/{branch}"],
128
+ str(repo_dir)
129
+ )
130
+
131
+ if success:
132
+ logger.info("Repository updated",
133
+ path=str(repo_dir),
134
+ branch=branch)
135
+ return True
136
+
137
+ logger.warning("Could not reset to main or master branch",
138
+ path=str(repo_dir))
139
+ return False
140
+
141
+ except Exception as e:
142
+ logger.error("Error updating repository",
143
+ path=str(repo_dir),
144
+ error=str(e))
145
+ return False
146
+
147
+ async def _remove_repository(self, repo_dir: Path) -> None:
148
+ """Remove a repository directory.
149
+
150
+ Args:
151
+ repo_dir: Path to repository directory
152
+ """
153
+ try:
154
+ import shutil
155
+ shutil.rmtree(repo_dir)
156
+ logger.info("Repository removed", path=str(repo_dir))
157
+ except Exception as e:
158
+ logger.error("Failed to remove repository",
159
+ path=str(repo_dir),
160
+ error=str(e))
161
+
162
+ async def get_repository_info(self, repo_name: str) -> Optional[Dict]:
163
+ """Get information about a repository.
164
+
165
+ Args:
166
+ repo_name: Repository name
167
+
168
+ Returns:
169
+ Repository info dict or None if not found
170
+ """
171
+ return await self.github_client.get_repository_info(repo_name)
172
+
173
+ async def cleanup_old_repositories(self, max_age_days: int = 7) -> None:
174
+ """Clean up old repository caches.
175
+
176
+ Args:
177
+ max_age_days: Maximum age in days before cleanup
178
+ """
179
+ try:
180
+ import time
181
+ from datetime import datetime, timedelta
182
+
183
+ cutoff_time = time.time() - (max_age_days * 24 * 60 * 60)
184
+
185
+ for repo_dir in self.config.repo_cache_dir.iterdir():
186
+ if repo_dir.is_dir():
187
+ # Check last modification time
188
+ mtime = repo_dir.stat().st_mtime
189
+
190
+ if mtime < cutoff_time:
191
+ logger.info("Cleaning up old repository cache",
192
+ repo=repo_dir.name,
193
+ age_days=(time.time() - mtime) / (24 * 60 * 60))
194
+ await self._remove_repository(repo_dir)
195
+
196
+ except Exception as e:
197
+ logger.error("Error during repository cleanup", error=str(e))
@@ -0,0 +1,286 @@
1
+ """Core task processor that handles webhook event processing.
2
+
3
+ This module provides the TaskProcessor class which is decoupled from the task source
4
+ (webhook, SQS, etc.) and handles all the business logic of processing GitHub events.
5
+ """
6
+
7
+ from typing import Dict, Any
8
+ import structlog
9
+
10
+ from ..config import get_config
11
+ from ..github.parser import WebhookParser
12
+ from ..github.client import GitHubClient
13
+ from ..github.models import IssueEvent, PullRequestEvent, CommentEvent
14
+ from .container_pool import ContainerPool
15
+ from .deduplication import is_duplicate_content, get_cache_stats
16
+
17
+ logger = structlog.get_logger()
18
+
19
+
20
+ class TaskProcessor:
21
+ """Core task processor that handles webhook event processing.
22
+
23
+ This class is decoupled from the task source (webhook endpoint, SQS, etc.)
24
+ and contains all the business logic for processing GitHub webhook events.
25
+ """
26
+
27
+ def __init__(self, container_pool: ContainerPool = None):
28
+ """Initialize task processor.
29
+
30
+ Args:
31
+ container_pool: Optional container pool instance. If not provided,
32
+ a new one will be created.
33
+ """
34
+ self.config = get_config()
35
+ self.container_pool = container_pool or ContainerPool()
36
+ self.github_client = GitHubClient(self.config)
37
+
38
+ logger.info("Task processor initialized",
39
+ mentioned_user=self.config.github_mentioned_user,
40
+ container_pool=self.config.get_container_pool_list())
41
+
42
+ async def _add_eyes_reaction(self, event: Any, repo_name: str) -> None:
43
+ """Add an eyes reaction to indicate we're processing the event.
44
+
45
+ Args:
46
+ event: The webhook event object
47
+ repo_name: Repository in format "owner/repo"
48
+ """
49
+ try:
50
+ reaction_added = False
51
+
52
+ # Determine what to react to based on event type
53
+ if isinstance(event, CommentEvent):
54
+ # React to the comment itself
55
+ reaction_added = await self.github_client.add_reaction_to_comment(
56
+ repo=repo_name,
57
+ comment_id=event.comment.id,
58
+ reaction="eyes"
59
+ )
60
+ logger.info("Attempting to add reaction to comment",
61
+ comment_id=event.comment.id,
62
+ repo=repo_name)
63
+ elif isinstance(event, IssueEvent):
64
+ # React to the issue
65
+ reaction_added = await self.github_client.add_reaction_to_issue(
66
+ repo=repo_name,
67
+ issue_number=event.issue.number,
68
+ reaction="eyes"
69
+ )
70
+ logger.info("Attempting to add reaction to issue",
71
+ issue_number=event.issue.number,
72
+ repo=repo_name)
73
+ elif isinstance(event, PullRequestEvent):
74
+ # React to the PR (PRs are issues in GitHub API)
75
+ reaction_added = await self.github_client.add_reaction_to_pr(
76
+ repo=repo_name,
77
+ pr_number=event.pull_request.number,
78
+ reaction="eyes"
79
+ )
80
+ logger.info("Attempting to add reaction to PR",
81
+ pr_number=event.pull_request.number,
82
+ repo=repo_name)
83
+
84
+ if reaction_added:
85
+ logger.info("Successfully added eyes reaction",
86
+ event_type=type(event).__name__,
87
+ repo=repo_name)
88
+ else:
89
+ logger.warning("Could not add eyes reaction",
90
+ event_type=type(event).__name__,
91
+ repo=repo_name)
92
+
93
+ except Exception as e:
94
+ # Log the error but don't fail the webhook processing
95
+ logger.error("Error adding reaction to event - continuing anyway",
96
+ error=str(e),
97
+ event_type=type(event).__name__,
98
+ repo=repo_name,
99
+ exc_info=True)
100
+
101
+ async def process_webhook(
102
+ self,
103
+ headers: Dict[str, str],
104
+ payload: bytes,
105
+ delivery_id: str
106
+ ) -> None:
107
+ """Process a GitHub webhook event.
108
+
109
+ This is the main entry point for processing webhook events, regardless
110
+ of the source (FastAPI endpoint, SQS, etc.).
111
+
112
+ Args:
113
+ headers: HTTP headers from webhook
114
+ payload: Raw webhook payload
115
+ delivery_id: Unique delivery ID for tracking
116
+ """
117
+ try:
118
+ # Parse webhook event
119
+ event = WebhookParser.parse_webhook(headers, payload)
120
+
121
+ if event is None:
122
+ logger.info("Unsupported webhook event type",
123
+ event_type=headers.get("x-github-event"),
124
+ delivery_id=delivery_id)
125
+ return
126
+
127
+ # Check if the user who triggered the event is authorized
128
+ trigger_user = event.sender.login
129
+ if not self.config.is_user_authorized_to_trigger(trigger_user):
130
+ logger.warning("User not authorized to trigger webhook processing",
131
+ user=trigger_user,
132
+ repo=event.repository.full_name,
133
+ delivery_id=delivery_id,
134
+ event_type=type(event).__name__)
135
+ return
136
+
137
+ # Check if repository is allowed
138
+ repo_owner = event.repository.owner.login
139
+ if not self.config.is_repository_allowed(event.repository.full_name, repo_owner):
140
+ logger.warning("Repository not in allowlist - rejecting webhook",
141
+ repo=event.repository.full_name,
142
+ owner=repo_owner,
143
+ delivery_id=delivery_id,
144
+ event_type=type(event).__name__)
145
+ return
146
+
147
+ # Load repository configuration to check for CI mode
148
+ devs_options = await self.container_pool.ensure_repo_config(event.repository.full_name)
149
+
150
+ # Check if we should process this event for CI
151
+ process_for_ci = WebhookParser.should_process_event_for_ci(event, devs_options)
152
+
153
+ # Check if we should process this event for mentions
154
+ process_for_mentions = WebhookParser.should_process_event(event, self.config.github_mentioned_user)
155
+
156
+ # Skip if neither CI nor mentions apply
157
+ if not process_for_ci and not process_for_mentions:
158
+ logger.info("Event does not trigger CI or contain target mentions",
159
+ event_type=type(event).__name__,
160
+ mentioned_user=self.config.github_mentioned_user,
161
+ ci_enabled=devs_options.ci_enabled if devs_options else False,
162
+ delivery_id=delivery_id)
163
+ return
164
+
165
+ logger.info("Event processing mode determined",
166
+ event_type=type(event).__name__,
167
+ process_for_ci=process_for_ci,
168
+ process_for_mentions=process_for_mentions,
169
+ delivery_id=delivery_id)
170
+
171
+ # Check for duplicate content
172
+ content_hash = event.get_content_hash()
173
+ if content_hash:
174
+ event_description = f"{type(event).__name__}({event.action}) {event.repository.full_name}"
175
+ if hasattr(event, 'issue'):
176
+ event_description += f" issue#{event.issue.number}"
177
+ elif hasattr(event, 'pull_request'):
178
+ event_description += f" pr#{event.pull_request.number}"
179
+
180
+ if is_duplicate_content(content_hash, event_description):
181
+ logger.info("Duplicate content detected, skipping processing",
182
+ event_type=type(event).__name__,
183
+ action=event.action,
184
+ content_hash=content_hash,
185
+ event_description=event_description,
186
+ delivery_id=delivery_id)
187
+ return
188
+
189
+ logger.info("Processing webhook event",
190
+ event_type=type(event).__name__,
191
+ repo=event.repository.full_name,
192
+ action=event.action,
193
+ delivery_id=delivery_id)
194
+
195
+ tasks_queued = []
196
+
197
+ # Queue CI task if applicable
198
+ if process_for_ci:
199
+ ci_task_id = f"{delivery_id}-ci"
200
+ ci_success = await self.container_pool.queue_task(
201
+ task_id=ci_task_id,
202
+ repo_name=event.repository.full_name,
203
+ task_description="", # Not used for CI tasks
204
+ event=event,
205
+ task_type='tests'
206
+ )
207
+
208
+ if ci_success:
209
+ tasks_queued.append("CI tests")
210
+ logger.info("CI task queued successfully",
211
+ delivery_id=ci_task_id,
212
+ repo=event.repository.full_name)
213
+ else:
214
+ logger.error("Failed to queue CI task",
215
+ delivery_id=ci_task_id,
216
+ repo=event.repository.full_name)
217
+
218
+ # Queue mention-based task if applicable
219
+ if process_for_mentions:
220
+ # Get context from the event for Claude
221
+ task_description = event.get_context_for_claude()
222
+
223
+ mention_task_id = f"{delivery_id}-claude" if process_for_ci else delivery_id
224
+ mention_success = await self.container_pool.queue_task(
225
+ task_id=mention_task_id,
226
+ repo_name=event.repository.full_name,
227
+ task_description=task_description,
228
+ event=event,
229
+ task_type='claude'
230
+ )
231
+
232
+ if mention_success:
233
+ tasks_queued.append("Claude processing")
234
+ logger.info("Claude task queued successfully",
235
+ delivery_id=mention_task_id,
236
+ repo=event.repository.full_name)
237
+
238
+ # Try to add "eyes" reaction to indicate we're looking into it
239
+ await self._add_eyes_reaction(event, event.repository.full_name)
240
+ else:
241
+ logger.error("Failed to queue Claude task",
242
+ delivery_id=mention_task_id,
243
+ repo=event.repository.full_name)
244
+
245
+ if tasks_queued:
246
+ logger.info("Tasks queued successfully",
247
+ delivery_id=delivery_id,
248
+ repo=event.repository.full_name,
249
+ tasks=tasks_queued)
250
+ else:
251
+ logger.error("Failed to queue any tasks",
252
+ delivery_id=delivery_id,
253
+ repo=event.repository.full_name)
254
+
255
+ except Exception as e:
256
+ logger.error("Error processing webhook",
257
+ error=str(e),
258
+ delivery_id=delivery_id,
259
+ exc_info=True)
260
+
261
+ async def get_status(self) -> Dict[str, Any]:
262
+ """Get current processor status."""
263
+ container_status = await self.container_pool.get_status()
264
+
265
+ # Calculate total queued tasks across all containers
266
+ total_queued = sum(
267
+ self.container_pool.container_queues[container].qsize()
268
+ for container in self.config.get_container_pool_list()
269
+ )
270
+
271
+ return {
272
+ "queued_tasks": total_queued,
273
+ "container_pool_size": len(self.config.get_container_pool_list()),
274
+ "containers": container_status,
275
+ "mentioned_user": self.config.github_mentioned_user,
276
+ "authorized_trigger_users": self.config.get_authorized_trigger_users_list(),
277
+ "deduplication_cache": get_cache_stats(),
278
+ }
279
+
280
+ async def stop_container(self, container_name: str) -> bool:
281
+ """Manually stop a container."""
282
+ return await self.container_pool.force_stop_container(container_name)
283
+
284
+ async def list_containers(self) -> Dict[str, Any]:
285
+ """List all managed containers."""
286
+ return await self.container_pool.get_status()