devs-webhook 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,306 @@
1
+ """SQS task source for receiving webhook events from AWS SQS.
2
+
3
+ This module provides the SQS task source that polls an AWS SQS queue for
4
+ webhook events and forwards them to the task processor.
5
+ """
6
+
7
+ import asyncio
8
+ import json
9
+ import uuid
10
+ from typing import Optional, Dict, Any
11
+ import structlog
12
+
13
+ from .base import TaskSource
14
+ from ..core.task_processor import TaskProcessor
15
+ from ..config import get_config
16
+ from ..utils.github import verify_github_signature
17
+
18
+ logger = structlog.get_logger()
19
+
20
+
21
+ class SQSTaskSource(TaskSource):
22
+ """Task source that polls AWS SQS for GitHub webhook events.
23
+
24
+ This allows decoupling the webhook receiver (which can be a separate
25
+ service) from the task processor. The webhook receiver puts messages
26
+ into SQS, and this source polls them for processing.
27
+
28
+ Expected SQS message format:
29
+ {
30
+ "headers": {
31
+ "x-github-event": "issues",
32
+ "x-github-delivery": "...",
33
+ ...
34
+ },
35
+ "payload": "<base64-encoded-or-raw-json>"
36
+ }
37
+ """
38
+
39
+ def __init__(self, task_processor: Optional[TaskProcessor] = None):
40
+ """Initialize SQS task source.
41
+
42
+ Args:
43
+ task_processor: Optional task processor instance. If not provided,
44
+ a new one will be created.
45
+ """
46
+ self.task_processor = task_processor or TaskProcessor()
47
+ self.config = get_config()
48
+ self._running = False
49
+ self._poll_task: Optional[asyncio.Task] = None
50
+
51
+ # Import boto3 lazily to avoid requiring it for webhook-only deployments
52
+ try:
53
+ import boto3
54
+ self.sqs_client = boto3.client(
55
+ 'sqs',
56
+ region_name=self.config.aws_region
57
+ )
58
+ except ImportError:
59
+ logger.error("boto3 not installed - required for SQS task source")
60
+ raise ImportError(
61
+ "boto3 is required for SQS task source. "
62
+ "Install with: pip install boto3"
63
+ )
64
+
65
+ logger.info(
66
+ "SQS task source initialized",
67
+ queue_url=self.config.aws_sqs_queue_url,
68
+ region=self.config.aws_region,
69
+ )
70
+
71
+ async def start(self) -> None:
72
+ """Start polling SQS for webhook events.
73
+
74
+ This method blocks until the source is stopped.
75
+ """
76
+ logger.info("Starting SQS task source")
77
+ self._running = True
78
+
79
+ try:
80
+ while self._running:
81
+ await self._poll_and_process_messages()
82
+ except asyncio.CancelledError:
83
+ logger.info("SQS polling cancelled")
84
+ raise
85
+ except Exception as e:
86
+ logger.error("SQS polling error", error=str(e), exc_info=True)
87
+ raise
88
+ finally:
89
+ self._running = False
90
+
91
+ async def stop(self) -> None:
92
+ """Stop polling SQS."""
93
+ logger.info("Stopping SQS task source")
94
+ self._running = False
95
+
96
+ if self._poll_task and not self._poll_task.done():
97
+ self._poll_task.cancel()
98
+ try:
99
+ await self._poll_task
100
+ except asyncio.CancelledError:
101
+ pass
102
+
103
+ logger.info("SQS task source stopped")
104
+
105
+ async def _poll_and_process_messages(self) -> None:
106
+ """Poll SQS and process any available messages.
107
+
108
+ This uses long polling to efficiently wait for messages.
109
+ """
110
+ try:
111
+ # Run SQS receive in executor to avoid blocking
112
+ loop = asyncio.get_event_loop()
113
+ response = await loop.run_in_executor(
114
+ None,
115
+ lambda: self.sqs_client.receive_message(
116
+ QueueUrl=self.config.aws_sqs_queue_url,
117
+ MaxNumberOfMessages=1, # Process one at a time
118
+ WaitTimeSeconds=self.config.sqs_wait_time_seconds,
119
+ AttributeNames=['All'],
120
+ MessageAttributeNames=['All']
121
+ )
122
+ )
123
+
124
+ messages = response.get('Messages', [])
125
+
126
+ if not messages:
127
+ # No messages available, will poll again
128
+ return
129
+
130
+ for message in messages:
131
+ await self._process_message(message)
132
+
133
+ except Exception as e:
134
+ logger.error("Error polling SQS", error=str(e), exc_info=True)
135
+ # Wait a bit before retrying on error
136
+ await asyncio.sleep(5)
137
+
138
+ async def _process_message(self, message: Dict[str, Any]) -> None:
139
+ """Process a single SQS message.
140
+
141
+ Args:
142
+ message: SQS message containing webhook event
143
+ """
144
+ receipt_handle = message['ReceiptHandle']
145
+ message_id = message['MessageId']
146
+
147
+ try:
148
+ # Parse message body
149
+ body = json.loads(message['Body'])
150
+
151
+ # Extract headers and payload
152
+ headers = body.get('headers', {})
153
+ payload_data = body.get('payload')
154
+
155
+ # Convert payload to bytes if needed
156
+ if isinstance(payload_data, str):
157
+ # Check if it's base64 encoded
158
+ if payload_data.startswith('{') or payload_data.startswith('['):
159
+ # Raw JSON string
160
+ payload = payload_data.encode('utf-8')
161
+ else:
162
+ # Assume base64 encoded
163
+ import base64
164
+ payload = base64.b64decode(payload_data)
165
+ elif isinstance(payload_data, dict):
166
+ # Already parsed JSON, re-encode to bytes
167
+ payload = json.dumps(payload_data).encode('utf-8')
168
+ else:
169
+ payload = payload_data
170
+
171
+ # Generate delivery ID if not present
172
+ delivery_id = headers.get('x-github-delivery', f'sqs-{message_id}')
173
+
174
+ # Verify GitHub webhook signature (defense in depth)
175
+ signature = headers.get('x-hub-signature-256', '')
176
+ if not verify_github_signature(payload, signature, self.config.github_webhook_secret):
177
+ error_msg = "Invalid GitHub webhook signature - possible security breach or misconfiguration"
178
+ logger.error(
179
+ "Invalid webhook signature from SQS message",
180
+ message_id=message_id,
181
+ delivery_id=delivery_id,
182
+ signature_present=bool(signature),
183
+ )
184
+ # Send to DLQ for investigation
185
+ if self.config.aws_sqs_dlq_url:
186
+ await self._send_to_dlq(message, error_msg)
187
+ # Delete from main queue (don't retry invalid signatures)
188
+ loop = asyncio.get_event_loop()
189
+ await loop.run_in_executor(
190
+ None,
191
+ lambda: self.sqs_client.delete_message(
192
+ QueueUrl=self.config.aws_sqs_queue_url,
193
+ ReceiptHandle=receipt_handle
194
+ )
195
+ )
196
+ logger.warning(
197
+ "Rejected SQS message with invalid signature",
198
+ message_id=message_id,
199
+ )
200
+ return
201
+
202
+ logger.info(
203
+ "Processing SQS message",
204
+ message_id=message_id,
205
+ delivery_id=delivery_id,
206
+ event_type=headers.get('x-github-event', 'unknown'),
207
+ )
208
+
209
+ # Process the webhook event
210
+ await self.task_processor.process_webhook(
211
+ headers=headers,
212
+ payload=payload,
213
+ delivery_id=delivery_id
214
+ )
215
+
216
+ # Delete message from queue on success
217
+ loop = asyncio.get_event_loop()
218
+ await loop.run_in_executor(
219
+ None,
220
+ lambda: self.sqs_client.delete_message(
221
+ QueueUrl=self.config.aws_sqs_queue_url,
222
+ ReceiptHandle=receipt_handle
223
+ )
224
+ )
225
+
226
+ logger.info(
227
+ "SQS message processed successfully",
228
+ message_id=message_id,
229
+ delivery_id=delivery_id,
230
+ )
231
+
232
+ except Exception as e:
233
+ error_msg = f"Failed to process SQS message: {str(e)}"
234
+ logger.error(
235
+ "Error processing SQS message",
236
+ message_id=message_id,
237
+ error=error_msg,
238
+ exc_info=True,
239
+ )
240
+
241
+ # Send to DLQ if configured
242
+ if self.config.aws_sqs_dlq_url:
243
+ await self._send_to_dlq(message, error_msg)
244
+
245
+ # Delete message from main queue to prevent reprocessing
246
+ # (it's already in DLQ or we've logged the error)
247
+ try:
248
+ loop = asyncio.get_event_loop()
249
+ await loop.run_in_executor(
250
+ None,
251
+ lambda: self.sqs_client.delete_message(
252
+ QueueUrl=self.config.aws_sqs_queue_url,
253
+ ReceiptHandle=receipt_handle
254
+ )
255
+ )
256
+ logger.info(
257
+ "Deleted failed message from queue",
258
+ message_id=message_id,
259
+ )
260
+ except Exception as delete_error:
261
+ logger.error(
262
+ "Failed to delete message after error",
263
+ message_id=message_id,
264
+ error=str(delete_error),
265
+ )
266
+
267
+ async def _send_to_dlq(self, message: Dict[str, Any], error_msg: str) -> None:
268
+ """Send a failed message to the dead-letter queue.
269
+
270
+ Args:
271
+ message: Original SQS message
272
+ error_msg: Error message describing the failure
273
+ """
274
+ try:
275
+ message_id = message['MessageId']
276
+
277
+ # Add error information to the message
278
+ dlq_body = {
279
+ 'original_message': message['Body'],
280
+ 'error': error_msg,
281
+ 'failed_at': asyncio.get_event_loop().time(),
282
+ 'original_message_id': message_id,
283
+ }
284
+
285
+ loop = asyncio.get_event_loop()
286
+ await loop.run_in_executor(
287
+ None,
288
+ lambda: self.sqs_client.send_message(
289
+ QueueUrl=self.config.aws_sqs_dlq_url,
290
+ MessageBody=json.dumps(dlq_body)
291
+ )
292
+ )
293
+
294
+ logger.info(
295
+ "Sent failed message to DLQ",
296
+ message_id=message_id,
297
+ dlq_url=self.config.aws_sqs_dlq_url,
298
+ )
299
+
300
+ except Exception as e:
301
+ logger.error(
302
+ "Failed to send message to DLQ",
303
+ message_id=message.get('MessageId', 'unknown'),
304
+ error=str(e),
305
+ exc_info=True,
306
+ )
@@ -0,0 +1,82 @@
1
+ """Webhook task source using FastAPI.
2
+
3
+ This module provides the webhook task source that receives GitHub webhooks
4
+ via a FastAPI HTTP endpoint and forwards them to the task processor.
5
+ """
6
+
7
+ import asyncio
8
+ import structlog
9
+ from typing import Optional
10
+
11
+ from .base import TaskSource
12
+ from ..core.task_processor import TaskProcessor
13
+
14
+ logger = structlog.get_logger()
15
+
16
+
17
+ class WebhookTaskSource(TaskSource):
18
+ """Task source that receives GitHub webhooks via FastAPI.
19
+
20
+ This is the traditional webhook endpoint approach that receives
21
+ HTTP POST requests from GitHub and processes them.
22
+ """
23
+
24
+ def __init__(self, task_processor: Optional[TaskProcessor] = None):
25
+ """Initialize webhook task source.
26
+
27
+ Args:
28
+ task_processor: Optional task processor instance. If not provided,
29
+ a new one will be created.
30
+ """
31
+ self.task_processor = task_processor or TaskProcessor()
32
+ self._server_task: Optional[asyncio.Task] = None
33
+ logger.info("Webhook task source initialized")
34
+
35
+ async def start(self) -> None:
36
+ """Start the FastAPI webhook server.
37
+
38
+ This method blocks until the server is stopped.
39
+ """
40
+ logger.info("Starting webhook task source (FastAPI)")
41
+
42
+ # Import uvicorn here to avoid import issues
43
+ import uvicorn
44
+ from ..config import get_config
45
+
46
+ config = get_config()
47
+
48
+ # Create uvicorn config
49
+ uvicorn_config = uvicorn.Config(
50
+ "devs_webhook.app:app",
51
+ host=config.webhook_host,
52
+ port=config.webhook_port,
53
+ log_config=None, # Use our structlog config
54
+ )
55
+
56
+ # Create and start server
57
+ server = uvicorn.Server(uvicorn_config)
58
+
59
+ logger.info(
60
+ "Webhook server starting",
61
+ host=config.webhook_host,
62
+ port=config.webhook_port,
63
+ )
64
+
65
+ # Run the server (blocks until stopped)
66
+ await server.serve()
67
+
68
+ async def stop(self) -> None:
69
+ """Stop the webhook server."""
70
+ logger.info("Stopping webhook task source")
71
+
72
+ # The uvicorn server handles its own shutdown
73
+ # We just need to cancel any running tasks
74
+
75
+ if self._server_task and not self._server_task.done():
76
+ self._server_task.cancel()
77
+ try:
78
+ await self._server_task
79
+ except asyncio.CancelledError:
80
+ pass
81
+
82
+ logger.info("Webhook task source stopped")
@@ -0,0 +1 @@
1
+ """Utility modules."""
@@ -0,0 +1,86 @@
1
+ """Async utilities for non-blocking operations."""
2
+
3
+ import asyncio
4
+ import subprocess
5
+ from typing import Optional, Tuple
6
+ import structlog
7
+
8
+ logger = structlog.get_logger()
9
+
10
+
11
+ async def run_subprocess_async(
12
+ cmd: list[str],
13
+ cwd: Optional[str] = None,
14
+ timeout: Optional[float] = None
15
+ ) -> Tuple[int, str, str]:
16
+ """Run a subprocess asynchronously without blocking the event loop.
17
+
18
+ Args:
19
+ cmd: Command and arguments to run
20
+ cwd: Working directory
21
+ timeout: Timeout in seconds
22
+
23
+ Returns:
24
+ Tuple of (return_code, stdout, stderr)
25
+ """
26
+ try:
27
+ # Create subprocess
28
+ process = await asyncio.create_subprocess_exec(
29
+ *cmd,
30
+ stdout=asyncio.subprocess.PIPE,
31
+ stderr=asyncio.subprocess.PIPE,
32
+ cwd=cwd
33
+ )
34
+
35
+ # Wait for completion with optional timeout
36
+ try:
37
+ stdout, stderr = await asyncio.wait_for(
38
+ process.communicate(),
39
+ timeout=timeout
40
+ )
41
+ except asyncio.TimeoutError:
42
+ process.kill()
43
+ await process.wait()
44
+ return -1, "", f"Command timed out after {timeout} seconds"
45
+
46
+ return (
47
+ process.returncode or 0,
48
+ stdout.decode('utf-8', errors='replace') if stdout else "",
49
+ stderr.decode('utf-8', errors='replace') if stderr else ""
50
+ )
51
+
52
+ except Exception as e:
53
+ logger.error("Subprocess execution failed",
54
+ cmd=cmd,
55
+ error=str(e))
56
+ return -1, "", str(e)
57
+
58
+
59
+ async def run_git_async(
60
+ args: list[str],
61
+ cwd: str,
62
+ timeout: float = 30.0
63
+ ) -> Tuple[bool, str, str]:
64
+ """Run a git command asynchronously.
65
+
66
+ Args:
67
+ args: Git command arguments (without 'git' prefix)
68
+ cwd: Working directory
69
+ timeout: Timeout in seconds
70
+
71
+ Returns:
72
+ Tuple of (success, stdout, stderr)
73
+ """
74
+ cmd = ["git"] + args
75
+ returncode, stdout, stderr = await run_subprocess_async(cmd, cwd, timeout)
76
+
77
+ success = returncode == 0
78
+
79
+ if not success:
80
+ logger.debug("Git command failed",
81
+ args=args,
82
+ cwd=cwd,
83
+ returncode=returncode,
84
+ stderr=stderr)
85
+
86
+ return success, stdout, stderr
@@ -0,0 +1,43 @@
1
+ """GitHub webhook utilities."""
2
+
3
+ import hmac
4
+ import hashlib
5
+
6
+
7
+ def verify_github_signature(payload: bytes, signature: str, secret: str) -> bool:
8
+ """Verify GitHub webhook signature using HMAC-SHA256.
9
+
10
+ GitHub signs webhook payloads with HMAC-SHA256 using the webhook secret.
11
+ The signature is sent in the X-Hub-Signature-256 header as 'sha256=<hex>'.
12
+
13
+ Args:
14
+ payload: Raw webhook payload bytes (must be the exact bytes received)
15
+ signature: GitHub signature header (e.g., 'sha256=abc123...')
16
+ secret: Webhook secret configured in GitHub
17
+
18
+ Returns:
19
+ True if signature is valid, False otherwise
20
+
21
+ Example:
22
+ >>> payload = b'{"action": "opened", ...}'
23
+ >>> signature = request.headers.get('X-Hub-Signature-256')
24
+ >>> secret = os.environ['GITHUB_WEBHOOK_SECRET']
25
+ >>> if verify_github_signature(payload, signature, secret):
26
+ ... # Process webhook
27
+ ... pass
28
+ """
29
+ if not signature:
30
+ return False
31
+
32
+ if not signature.startswith("sha256="):
33
+ return False
34
+
35
+ # Compute expected signature
36
+ expected_signature = "sha256=" + hmac.new(
37
+ secret.encode("utf-8"),
38
+ payload,
39
+ hashlib.sha256
40
+ ).hexdigest()
41
+
42
+ # Use constant-time comparison to prevent timing attacks
43
+ return hmac.compare_digest(signature, expected_signature)
@@ -0,0 +1,34 @@
1
+ """Logging configuration."""
2
+
3
+ import sys
4
+ import logging
5
+ import structlog
6
+ from typing import Any, Dict
7
+
8
+ from ..config import get_config
9
+
10
+
11
+ def setup_logging() -> None:
12
+ """Set up structured logging."""
13
+ config = get_config()
14
+
15
+ # Configure structlog
16
+ structlog.configure(
17
+ processors=[
18
+ structlog.contextvars.merge_contextvars,
19
+ structlog.processors.add_log_level,
20
+ structlog.processors.TimeStamper(fmt="ISO"),
21
+ structlog.dev.ConsoleRenderer() if config.log_format == "console"
22
+ else structlog.processors.JSONRenderer()
23
+ ],
24
+ wrapper_class=structlog.make_filtering_bound_logger(
25
+ getattr(logging, config.log_level.upper(), logging.INFO)
26
+ ),
27
+ logger_factory=structlog.WriteLoggerFactory(sys.stdout),
28
+ cache_logger_on_first_use=True,
29
+ )
30
+
31
+
32
+ def get_logger(**context: Any) -> structlog.BoundLogger:
33
+ """Get a logger with context."""
34
+ return structlog.get_logger().bind(**context)
@@ -0,0 +1,102 @@
1
+ """JSON serialization utilities for webhook objects."""
2
+
3
+ import json
4
+ import base64
5
+ from typing import Any, Dict
6
+ from pathlib import Path
7
+ from datetime import datetime
8
+
9
+ from ..github.models import WebhookEvent, DevsOptions, IssueEvent, PullRequestEvent, CommentEvent
10
+
11
+
12
+ class WebhookEventEncoder(json.JSONEncoder):
13
+ """Custom JSON encoder for WebhookEvent objects."""
14
+
15
+ def default(self, obj):
16
+ # Handle datetime objects
17
+ if isinstance(obj, datetime):
18
+ return obj.isoformat()
19
+ # Handle pydantic BaseModel instances
20
+ elif hasattr(obj, 'model_dump'):
21
+ return obj.model_dump()
22
+ elif isinstance(obj, (IssueEvent, PullRequestEvent, CommentEvent)):
23
+ return {
24
+ '__type__': obj.__class__.__name__,
25
+ '__data__': obj.model_dump()
26
+ }
27
+ elif isinstance(obj, Path):
28
+ return str(obj)
29
+ return super().default(obj)
30
+
31
+
32
+ def serialize_webhook_event(event: WebhookEvent) -> str:
33
+ """Serialize WebhookEvent to base64-encoded JSON string.
34
+
35
+ Args:
36
+ event: WebhookEvent to serialize
37
+
38
+ Returns:
39
+ Base64-encoded JSON string
40
+ """
41
+ json_str = json.dumps(event, cls=WebhookEventEncoder, ensure_ascii=True)
42
+ return base64.b64encode(json_str.encode()).decode('ascii')
43
+
44
+
45
+ def deserialize_webhook_event(data: str) -> WebhookEvent:
46
+ """Deserialize WebhookEvent from base64-encoded JSON string.
47
+
48
+ Args:
49
+ data: Base64-encoded JSON string
50
+
51
+ Returns:
52
+ WebhookEvent instance
53
+ """
54
+ json_str = base64.b64decode(data.encode('ascii')).decode()
55
+ raw_data = json.loads(json_str)
56
+
57
+ # Reconstruct the appropriate event type
58
+ event_type = raw_data.get('__type__')
59
+ event_data = raw_data.get('__data__', raw_data)
60
+
61
+ if event_type == 'IssueEvent':
62
+ return IssueEvent.model_validate(event_data)
63
+ elif event_type == 'PullRequestEvent':
64
+ return PullRequestEvent.model_validate(event_data)
65
+ elif event_type == 'CommentEvent':
66
+ return CommentEvent.model_validate(event_data)
67
+ else:
68
+ # Fallback - try to determine from data structure
69
+ if 'issue' in event_data:
70
+ return IssueEvent.model_validate(event_data)
71
+ elif 'pull_request' in event_data:
72
+ return PullRequestEvent.model_validate(event_data)
73
+ else:
74
+ return CommentEvent.model_validate(event_data)
75
+
76
+
77
+ def serialize_devs_options(options: DevsOptions) -> str:
78
+ """Serialize DevsOptions to base64-encoded JSON string.
79
+
80
+ Args:
81
+ options: DevsOptions to serialize
82
+
83
+ Returns:
84
+ Base64-encoded JSON string
85
+ """
86
+ json_str = json.dumps(options.model_dump(), ensure_ascii=True)
87
+ return base64.b64encode(json_str.encode()).decode('ascii')
88
+
89
+
90
+ def deserialize_devs_options(data: str) -> DevsOptions:
91
+ """Deserialize DevsOptions from base64-encoded JSON string.
92
+
93
+ Args:
94
+ data: Base64-encoded JSON string
95
+
96
+ Returns:
97
+ DevsOptions instance
98
+ """
99
+ json_str = base64.b64decode(data.encode('ascii')).decode()
100
+ raw_data = json.loads(json_str)
101
+
102
+ return DevsOptions.model_validate(raw_data)