devs-webhook 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1109 @@
1
+ """Container pool management for webhook tasks."""
2
+
3
+ import asyncio
4
+ import json
5
+ import os
6
+ import shutil
7
+ import sys
8
+ from datetime import datetime, timedelta, timezone
9
+ from typing import Dict, Optional, Any, NamedTuple
10
+ from pathlib import Path
11
+ import structlog
12
+ import yaml
13
+
14
+ from devs_common.core.project import Project
15
+ from devs_common.core.container import ContainerManager
16
+ from devs_common.core.workspace import WorkspaceManager
17
+
18
+ from ..config import get_config
19
+ from ..github.models import WebhookEvent, DevsOptions, IssueEvent, PullRequestEvent, CommentEvent
20
+ from .claude_dispatcher import ClaudeDispatcher, TaskResult
21
+ from ..github.client import GitHubClient
22
+
23
+ logger = structlog.get_logger()
24
+
25
+
26
+ class QueuedTask(NamedTuple):
27
+ """A task queued for execution in a container."""
28
+ task_id: str
29
+ repo_name: str
30
+ task_description: str
31
+ event: WebhookEvent
32
+ task_type: str = 'claude'
33
+
34
+
35
+
36
+ class ContainerPool:
37
+ """Manages a pool of named containers for webhook tasks."""
38
+
39
+ def __init__(self):
40
+ """Initialize container pool."""
41
+ self.config = get_config()
42
+ self.claude_dispatcher = ClaudeDispatcher()
43
+
44
+ # Track running containers for idle cleanup
45
+ self.running_containers: Dict[str, Dict[str, Any]] = {}
46
+ self._lock = asyncio.Lock()
47
+
48
+ # Task queues - one per dev name
49
+ self.container_queues: Dict[str, asyncio.Queue] = {
50
+ dev_name: asyncio.Queue() for dev_name in self.config.get_container_pool_list()
51
+ }
52
+
53
+ # Container workers - one per dev name
54
+ self.container_workers: Dict[str, asyncio.Task] = {}
55
+
56
+ # Cache DEVS.yml configuration for repositories
57
+ self.repo_configs: Dict[str, DevsOptions] = {} # repo_name -> DevsOptions
58
+
59
+ # Track which container is assigned to single-queue repos
60
+ self.single_queue_assignments: Dict[str, str] = {} # repo_name -> container_name
61
+
62
+ # Start worker tasks for each container
63
+ self._start_workers()
64
+
65
+ # Start the idle container cleanup task
66
+ self.cleanup_worker = asyncio.create_task(self._idle_cleanup_worker())
67
+
68
+ logger.info("Container pool initialized",
69
+ containers=self.config.get_container_pool_list())
70
+
71
+
72
+ def get_repo_config(self, repo_name: str) -> Optional[DevsOptions]:
73
+ """Get cached repository configuration.
74
+
75
+ Args:
76
+ repo_name: Repository name (owner/repo)
77
+
78
+ Returns:
79
+ DevsOptions if cached, None if not yet loaded
80
+ """
81
+ return self.repo_configs.get(repo_name)
82
+
83
+ async def ensure_repo_config(self, repo_name: str) -> DevsOptions:
84
+ """Ensure repository configuration is loaded and cached.
85
+
86
+ First checks user-specific DEVS.yml files, only clones repository if needed.
87
+
88
+ Args:
89
+ repo_name: Repository name (owner/repo)
90
+
91
+ Returns:
92
+ DevsOptions from cache or newly loaded from DEVS.yml
93
+ """
94
+ # Check if already cached
95
+ if repo_name in self.repo_configs:
96
+ return self.repo_configs[repo_name]
97
+
98
+ # Try to load from user-specific configuration first (no cloning needed)
99
+ devs_options = self._try_load_user_config(repo_name)
100
+
101
+ if devs_options is not None:
102
+ # Found user configuration, cache it
103
+ logger.info("Repository config loaded from user-specific DEVS.yml (no cloning needed)",
104
+ repo=repo_name)
105
+ self.repo_configs[repo_name] = devs_options
106
+ else:
107
+ # No user config found, need to clone and read repository DEVS.yml
108
+ logger.info("No user-specific config found, cloning repository to read DEVS.yml",
109
+ repo=repo_name)
110
+
111
+ # Calculate repo path
112
+ repo_path = self.config.repo_cache_dir / repo_name.replace('/', '-')
113
+
114
+ # Clone repository and read config
115
+ devs_options = await self._ensure_repository_cloned(repo_name, repo_path)
116
+
117
+ # Cache the config
118
+ self.repo_configs[repo_name] = devs_options
119
+
120
+ # Update single-queue assignment tracking if needed
121
+ if devs_options.single_queue and repo_name not in self.single_queue_assignments:
122
+ # We'll assign a container when the first task is actually queued
123
+ pass
124
+
125
+ logger.info("Repository config cached",
126
+ repo=repo_name,
127
+ single_queue=devs_options.single_queue,
128
+ ci_enabled=devs_options.ci_enabled)
129
+
130
+ return devs_options
131
+
132
+ def _try_load_user_config(self, repo_name: str) -> Optional[DevsOptions]:
133
+ """Try to load configuration from user-specific DEVS.yml files only.
134
+
135
+ Checks for user-specific configuration without cloning the repository.
136
+
137
+ Args:
138
+ repo_name: Repository name (owner/repo)
139
+
140
+ Returns:
141
+ DevsOptions if user-specific config exists, None otherwise
142
+ """
143
+ def _load_devs_yml(file_path: Path) -> dict:
144
+ """Load and parse a DEVS.yml file, returning empty dict if not found."""
145
+ if not file_path.exists():
146
+ return {}
147
+
148
+ try:
149
+ with open(file_path, 'r') as f:
150
+ data = yaml.safe_load(f)
151
+ return data if data else {}
152
+ except Exception as e:
153
+ logger.warning("Failed to parse user DEVS.yml",
154
+ file_path=str(file_path),
155
+ error=str(e))
156
+ return {}
157
+
158
+ # Check for user-specific configuration files
159
+ user_envs_dir = Path.home() / ".devs" / "envs"
160
+ default_devs_yml = user_envs_dir / "default" / "DEVS.yml"
161
+ project_name = repo_name.replace('/', '-') # Convert org/repo to org-repo
162
+ project_devs_yml = user_envs_dir / project_name / "DEVS.yml"
163
+
164
+ # If no user configuration files exist, return None
165
+ if not default_devs_yml.exists() and not project_devs_yml.exists():
166
+ return None
167
+
168
+ # Load user configuration
169
+ devs_options = DevsOptions() # Start with defaults
170
+
171
+ # 1. Load user default DEVS.yml
172
+ default_data = _load_devs_yml(default_devs_yml)
173
+
174
+ # 2. Load user project-specific DEVS.yml (higher priority)
175
+ project_data = _load_devs_yml(project_devs_yml)
176
+
177
+ # Merge data in priority order
178
+ all_data = {}
179
+ all_data.update(default_data)
180
+ all_data.update(project_data)
181
+
182
+ # Update devs_options with merged values
183
+ if all_data:
184
+ if 'default_branch' in all_data:
185
+ devs_options.default_branch = all_data['default_branch']
186
+ if 'prompt_extra' in all_data:
187
+ devs_options.prompt_extra = all_data['prompt_extra']
188
+ if 'prompt_override' in all_data:
189
+ devs_options.prompt_override = all_data['prompt_override']
190
+ if 'direct_commit' in all_data:
191
+ devs_options.direct_commit = all_data['direct_commit']
192
+ if 'single_queue' in all_data:
193
+ devs_options.single_queue = all_data['single_queue']
194
+ if 'ci_enabled' in all_data:
195
+ devs_options.ci_enabled = all_data['ci_enabled']
196
+ if 'ci_test_command' in all_data:
197
+ devs_options.ci_test_command = all_data['ci_test_command']
198
+ if 'ci_branches' in all_data:
199
+ devs_options.ci_branches = all_data['ci_branches']
200
+
201
+ # Merge env_vars from both sources
202
+ merged_env_vars = {}
203
+ for source_data in [default_data, project_data]:
204
+ if 'env_vars' in source_data and source_data['env_vars']:
205
+ for container_name, env_dict in source_data['env_vars'].items():
206
+ if container_name not in merged_env_vars:
207
+ merged_env_vars[container_name] = {}
208
+ merged_env_vars[container_name].update(env_dict)
209
+
210
+ if merged_env_vars:
211
+ devs_options.env_vars = merged_env_vars
212
+
213
+ logger.info("Loaded user-specific DEVS.yml configuration",
214
+ repo=repo_name,
215
+ default_file_exists=default_devs_yml.exists(),
216
+ project_file_exists=project_devs_yml.exists(),
217
+ default_branch=devs_options.default_branch,
218
+ single_queue=devs_options.single_queue,
219
+ ci_enabled=devs_options.ci_enabled,
220
+ env_vars_containers=list(devs_options.env_vars.keys()) if devs_options.env_vars else [])
221
+
222
+ return devs_options
223
+
224
+ # No meaningful configuration found
225
+ return None
226
+
227
+ def _read_devs_options(self, repo_path: Path, repo_name: str) -> DevsOptions:
228
+ """Read and parse DEVS.yml options from multiple sources.
229
+
230
+ Loads from multiple sources in priority order:
231
+ 1. ~/.devs/envs/{org-repo}/DEVS.yml (user-specific overrides)
232
+ 2. ~/.devs/envs/default/DEVS.yml (user defaults)
233
+ 3. {repo_path}/DEVS.yml (repository configuration)
234
+
235
+ Args:
236
+ repo_path: Path to repository
237
+ repo_name: Repository name (org/repo format)
238
+
239
+ Returns:
240
+ DevsOptions with values from DEVS.yml files or defaults
241
+ """
242
+ devs_options = DevsOptions() # Start with defaults
243
+
244
+ def _load_devs_yml(file_path: Path) -> dict:
245
+ """Load and parse a DEVS.yml file, returning empty dict if not found."""
246
+ if not file_path.exists():
247
+ return {}
248
+
249
+ try:
250
+ with open(file_path, 'r') as f:
251
+ data = yaml.safe_load(f)
252
+ return data if data else {}
253
+ except Exception as e:
254
+ logger.warning("Failed to parse DEVS.yml",
255
+ file_path=str(file_path),
256
+ error=str(e))
257
+ return {}
258
+
259
+ # 1. Load repository DEVS.yml (lowest priority)
260
+ repo_devs_yml = repo_path / "DEVS.yml"
261
+ repo_data = _load_devs_yml(repo_devs_yml)
262
+
263
+ # 2. Load user default DEVS.yml
264
+ user_envs_dir = Path.home() / ".devs" / "envs"
265
+ default_devs_yml = user_envs_dir / "default" / "DEVS.yml"
266
+ default_data = _load_devs_yml(default_devs_yml)
267
+
268
+ # 3. Load user project-specific DEVS.yml (highest priority)
269
+ project_name = repo_name.replace('/', '-') # Convert org/repo to org-repo
270
+ project_devs_yml = user_envs_dir / project_name / "DEVS.yml"
271
+ project_data = _load_devs_yml(project_devs_yml)
272
+
273
+ # Merge data in priority order (later updates override earlier ones)
274
+ all_data = {}
275
+ for source_data in [repo_data, default_data, project_data]:
276
+ all_data.update(source_data)
277
+
278
+ # Update devs_options with merged values
279
+ if all_data:
280
+ if 'default_branch' in all_data:
281
+ devs_options.default_branch = all_data['default_branch']
282
+ if 'prompt_extra' in all_data:
283
+ devs_options.prompt_extra = all_data['prompt_extra']
284
+ if 'prompt_override' in all_data:
285
+ devs_options.prompt_override = all_data['prompt_override']
286
+ if 'direct_commit' in all_data:
287
+ devs_options.direct_commit = all_data['direct_commit']
288
+ if 'single_queue' in all_data:
289
+ devs_options.single_queue = all_data['single_queue']
290
+ if 'ci_enabled' in all_data:
291
+ devs_options.ci_enabled = all_data['ci_enabled']
292
+ if 'ci_test_command' in all_data:
293
+ devs_options.ci_test_command = all_data['ci_test_command']
294
+ if 'ci_branches' in all_data:
295
+ devs_options.ci_branches = all_data['ci_branches']
296
+
297
+ # Merge env_vars from all sources (repository < default < project)
298
+ merged_env_vars = {}
299
+ for source_data in [repo_data, default_data, project_data]:
300
+ if 'env_vars' in source_data and source_data['env_vars']:
301
+ for container_name, env_dict in source_data['env_vars'].items():
302
+ if container_name not in merged_env_vars:
303
+ merged_env_vars[container_name] = {}
304
+ merged_env_vars[container_name].update(env_dict)
305
+
306
+ if merged_env_vars:
307
+ devs_options.env_vars = merged_env_vars
308
+
309
+ logger.info("Loaded DEVS.yml configuration from multiple sources",
310
+ repo=repo_name,
311
+ repo_file_exists=repo_devs_yml.exists(),
312
+ default_file_exists=default_devs_yml.exists(),
313
+ project_file_exists=project_devs_yml.exists(),
314
+ default_branch=devs_options.default_branch,
315
+ has_prompt_extra=bool(devs_options.prompt_extra),
316
+ has_prompt_override=bool(devs_options.prompt_override),
317
+ direct_commit=devs_options.direct_commit,
318
+ single_queue=devs_options.single_queue,
319
+ ci_enabled=devs_options.ci_enabled,
320
+ ci_test_command=devs_options.ci_test_command,
321
+ ci_branches=devs_options.ci_branches,
322
+ env_vars_containers=list(devs_options.env_vars.keys()) if devs_options.env_vars else [])
323
+
324
+ return devs_options
325
+
326
+ async def _ensure_repository_files_available(self, repo_name: str, repo_path: Path) -> None:
327
+ """Ensure repository files are available locally without re-reading config.
328
+
329
+ This is used when we already have the DEVS.yml config cached but need
330
+ to ensure the actual repository files are available for the worker.
331
+
332
+ Args:
333
+ repo_name: Repository name (owner/repo)
334
+ repo_path: Path where repository should be cloned
335
+ """
336
+ logger.info("Ensuring repository files are available",
337
+ repo=repo_name,
338
+ repo_path=str(repo_path),
339
+ exists=repo_path.exists())
340
+
341
+ if repo_path.exists():
342
+ # Repository already exists, try to pull latest changes
343
+ try:
344
+ logger.info("Repository exists, attempting to pull latest changes",
345
+ repo=repo_name,
346
+ repo_path=str(repo_path))
347
+
348
+ # Set up authentication for private repos
349
+ if self.config.github_token:
350
+ set_remote_cmd = ["git", "-C", str(repo_path), "remote", "set-url", "origin",
351
+ f"https://x-access-token:{self.config.github_token}@github.com/{repo_name}.git"]
352
+
353
+ process = await asyncio.create_subprocess_exec(*set_remote_cmd)
354
+ await process.wait()
355
+
356
+ # Pull latest changes
357
+ pull_cmd = ["git", "-C", str(repo_path), "pull", "origin"]
358
+ process = await asyncio.create_subprocess_exec(
359
+ *pull_cmd,
360
+ stdout=asyncio.subprocess.PIPE,
361
+ stderr=asyncio.subprocess.PIPE
362
+ )
363
+ stdout, stderr = await process.communicate()
364
+
365
+ if process.returncode != 0:
366
+ error_msg = stderr.decode('utf-8', errors='replace') if stderr else "Unknown error"
367
+ logger.warning("Failed to pull repository, will try fresh clone",
368
+ repo=repo_name,
369
+ error=error_msg)
370
+
371
+ # Remove the directory and fall through to fresh clone
372
+ import shutil
373
+ shutil.rmtree(repo_path)
374
+ else:
375
+ logger.info("Repository pull successful",
376
+ repo=repo_name)
377
+ return # Success, repository is up to date
378
+
379
+ except Exception as e:
380
+ logger.warning("Error during repository pull, will try fresh clone",
381
+ repo=repo_name,
382
+ error=str(e))
383
+ # Remove the directory and fall through to fresh clone
384
+ import shutil
385
+ if repo_path.exists():
386
+ shutil.rmtree(repo_path)
387
+
388
+ # Clone repository fresh (either first time or after failed pull)
389
+ try:
390
+ logger.info("Cloning repository",
391
+ repo=repo_name,
392
+ repo_path=str(repo_path))
393
+
394
+ # Ensure parent directory exists
395
+ repo_path.parent.mkdir(parents=True, exist_ok=True)
396
+
397
+ # Clone with authentication if we have a token
398
+ if self.config.github_token:
399
+ clone_url = f"https://x-access-token:{self.config.github_token}@github.com/{repo_name}.git"
400
+ else:
401
+ clone_url = f"https://github.com/{repo_name}.git"
402
+
403
+ clone_cmd = ["git", "clone", "--depth", "1", clone_url, str(repo_path)]
404
+ process = await asyncio.create_subprocess_exec(
405
+ *clone_cmd,
406
+ stdout=asyncio.subprocess.PIPE,
407
+ stderr=asyncio.subprocess.PIPE
408
+ )
409
+ stdout, stderr = await process.communicate()
410
+
411
+ if process.returncode == 0:
412
+ logger.info("Repository cloned successfully",
413
+ repo=repo_name)
414
+ else:
415
+ error_msg = stderr.decode('utf-8', errors='replace') if stderr else stdout.decode('utf-8', errors='replace')
416
+ logger.error("Git clone failed",
417
+ repo=repo_name,
418
+ error=error_msg)
419
+ raise Exception(f"Git clone failed: {error_msg}")
420
+
421
+ except Exception as e:
422
+ logger.error("Repository cloning failed",
423
+ repo=repo_name,
424
+ error=str(e))
425
+ raise
426
+
427
+ async def queue_task(
428
+ self,
429
+ task_id: str,
430
+ repo_name: str,
431
+ task_description: str,
432
+ event: WebhookEvent,
433
+ task_type: str = 'claude'
434
+ ) -> bool:
435
+ """Queue a task for execution in the next available container.
436
+
437
+ For repositories with single_queue enabled in DEVS.yml, all tasks
438
+ are routed to the same container to avoid conflicts. The single_queue
439
+ setting is detected after the first clone and cached in memory.
440
+
441
+ Args:
442
+ task_id: Unique task identifier
443
+ repo_name: Repository name (owner/repo)
444
+ task_description: Task description for Claude (unused for tests)
445
+ event: Original webhook event
446
+ task_type: Task type ('claude' or 'tests')
447
+
448
+ Returns:
449
+ True if task was queued successfully
450
+ """
451
+ try:
452
+ # Get repository configuration (cached or load it)
453
+ repo_config = self.get_repo_config(repo_name)
454
+ single_queue_required = repo_config.single_queue if repo_config else False
455
+
456
+ # Determine which container to use
457
+ best_container = None
458
+
459
+ if single_queue_required:
460
+ # Use the previously assigned container for this single-queue repo
461
+ if repo_name in self.single_queue_assignments:
462
+ best_container = self.single_queue_assignments[repo_name]
463
+ logger.info("Using previously assigned container for single-queue repo",
464
+ repo=repo_name,
465
+ container=best_container)
466
+ else:
467
+ # First time for this single-queue repo, assign a container
468
+ min_queue_size = float('inf')
469
+ for dev_name in self.config.get_container_pool_list():
470
+ queue_size = self.container_queues[dev_name].qsize()
471
+ if queue_size < min_queue_size:
472
+ min_queue_size = queue_size
473
+ best_container = dev_name
474
+
475
+ if best_container:
476
+ self.single_queue_assignments[repo_name] = best_container
477
+ logger.info("Assigned container for single-queue repo",
478
+ repo=repo_name,
479
+ container=best_container)
480
+ else:
481
+ # Normal load balancing - find container with shortest queue
482
+ min_queue_size = float('inf')
483
+ for dev_name in self.config.get_container_pool_list():
484
+ queue_size = self.container_queues[dev_name].qsize()
485
+ if queue_size < min_queue_size:
486
+ min_queue_size = queue_size
487
+ best_container = dev_name
488
+
489
+ if best_container is None:
490
+ logger.error("No containers available for task queuing")
491
+ return False
492
+
493
+ # Create queued task
494
+ queued_task = QueuedTask(
495
+ task_id=task_id,
496
+ repo_name=repo_name,
497
+ task_description=task_description,
498
+ event=event,
499
+ task_type=task_type
500
+ )
501
+
502
+ # Add to queue
503
+ await self.container_queues[best_container].put(queued_task)
504
+
505
+ queue_size = self.container_queues[best_container].qsize()
506
+ logger.info("Task queued successfully",
507
+ task_id=task_id,
508
+ container=best_container,
509
+ queue_size=queue_size,
510
+ repo=repo_name,
511
+ single_queue=single_queue_required)
512
+
513
+ return True
514
+
515
+ except Exception as e:
516
+ logger.error("Failed to queue task",
517
+ task_id=task_id,
518
+ error=str(e))
519
+ return False
520
+
521
+ def _start_workers(self) -> None:
522
+ """Start worker tasks for each container."""
523
+ for dev_name in self.config.get_container_pool_list():
524
+ worker_task = asyncio.create_task(
525
+ self._container_worker(dev_name)
526
+ )
527
+ self.container_workers[dev_name] = worker_task
528
+
529
+ logger.info("Started worker for container", container=dev_name)
530
+
531
+ async def _container_worker(self, dev_name: str) -> None:
532
+ """Worker process for a specific container.
533
+
534
+ Args:
535
+ dev_name: Name of the container this worker manages
536
+ """
537
+ logger.info("Container worker started", container=dev_name)
538
+
539
+ try:
540
+ while True:
541
+ # Wait for a task from the queue
542
+ try:
543
+ queued_task = await self.container_queues[dev_name].get()
544
+
545
+ try:
546
+ logger.info("Worker processing task",
547
+ container=dev_name,
548
+ task_id=queued_task.task_id,
549
+ repo=queued_task.repo_name)
550
+
551
+ # Process the task via subprocess for Docker safety
552
+ await self._process_task_subprocess(dev_name, queued_task)
553
+
554
+ finally:
555
+ # Always mark task as done, regardless of success/failure
556
+ self.container_queues[dev_name].task_done()
557
+
558
+ except asyncio.CancelledError:
559
+ logger.info("Container worker cancelled", container=dev_name)
560
+ break
561
+ except Exception as e:
562
+ logger.error("Error in container worker",
563
+ container=dev_name,
564
+ error=str(e))
565
+ # Continue processing other tasks
566
+ continue
567
+
568
+ except Exception as e:
569
+ logger.error("Container worker failed",
570
+ container=dev_name,
571
+ error=str(e))
572
+
573
+ async def _process_task_subprocess(self, dev_name: str, queued_task: QueuedTask) -> None:
574
+ """Process a single task via subprocess for Docker safety.
575
+
576
+ Args:
577
+ dev_name: Name of container to execute in
578
+ queued_task: Task to process
579
+ """
580
+ repo_name = queued_task.repo_name
581
+ repo_path = self.config.repo_cache_dir / repo_name.replace("/", "-")
582
+
583
+ logger.info("Starting task processing via subprocess",
584
+ task_id=queued_task.task_id,
585
+ container=dev_name,
586
+ repo_name=repo_name,
587
+ repo_path=str(repo_path))
588
+
589
+ try:
590
+ # Get cached config or ensure it's loaded
591
+ devs_options = self.get_repo_config(repo_name)
592
+ if devs_options is None:
593
+ # Not cached yet, need to clone and read config
594
+ logger.info("Repository config not cached, cloning to read DEVS.yml",
595
+ task_id=queued_task.task_id,
596
+ container=dev_name,
597
+ repo_name=repo_name)
598
+
599
+ devs_options = await self._ensure_repository_cloned(repo_name, repo_path)
600
+
601
+ # Cache the repository configuration for future use
602
+ self.repo_configs[repo_name] = devs_options
603
+
604
+ # Handle single-queue container assignment
605
+ if devs_options and devs_options.single_queue:
606
+ if repo_name not in self.single_queue_assignments:
607
+ # This is the first time we've seen this repo needs single-queue
608
+ # Register it with the current container
609
+ self.single_queue_assignments[repo_name] = dev_name
610
+ logger.info("Assigned container for single_queue repo after first clone",
611
+ repo=repo_name,
612
+ container=dev_name)
613
+ elif repo_name in self.single_queue_assignments:
614
+ # The repo was previously single-queue but no longer is
615
+ # Remove it from the assignments tracking
616
+ previous_container = self.single_queue_assignments[repo_name]
617
+ del self.single_queue_assignments[repo_name]
618
+ logger.info("Removed single_queue assignment - DEVS.yml no longer has single_queue=true",
619
+ repo=repo_name,
620
+ previously_assigned_container=previous_container)
621
+ else:
622
+ # Config already cached, just ensure repository is cloned without re-reading config
623
+ logger.info("Using cached repository config, ensuring repo is cloned",
624
+ task_id=queued_task.task_id,
625
+ container=dev_name,
626
+ repo_name=repo_name)
627
+
628
+ # Still need to ensure the repository files are available locally
629
+ # but we can skip re-reading DEVS.yml
630
+ await self._ensure_repository_files_available(repo_name, repo_path)
631
+
632
+ logger.info("Repository cloning completed, launching worker subprocess",
633
+ task_id=queued_task.task_id,
634
+ container=dev_name,
635
+ devs_options_present=devs_options is not None)
636
+
637
+ # Build JSON payload for stdin (no base64 encoding needed)
638
+ stdin_payload = {
639
+ "task_description": queued_task.task_description,
640
+ "event": queued_task.event.model_dump(mode='json'), # Use JSON mode for datetime serialization
641
+ }
642
+ if devs_options:
643
+ stdin_payload["devs_options"] = devs_options.model_dump(mode='json')
644
+
645
+ stdin_json = json.dumps(stdin_payload)
646
+
647
+ # Build subprocess command (only basic args, large data via stdin)
648
+ cmd = [
649
+ sys.executable, "-m", "devs_webhook.cli.worker",
650
+ "--task-id", queued_task.task_id,
651
+ "--dev-name", dev_name,
652
+ "--repo-name", repo_name,
653
+ "--repo-path", str(repo_path),
654
+ "--task-type", queued_task.task_type,
655
+ "--timeout", str(3600) # 60 minute timeout
656
+ ]
657
+
658
+ logger.info("Launching worker subprocess",
659
+ task_id=queued_task.task_id,
660
+ container=dev_name,
661
+ command_length=len(' '.join(cmd)),
662
+ stdin_payload_size=len(stdin_json))
663
+
664
+ # Launch subprocess with timeout
665
+ # Set environment to suppress console output
666
+ env = os.environ.copy()
667
+ env['DEVS_WEBHOOK_MODE'] = '1'
668
+
669
+ process = await asyncio.create_subprocess_exec(
670
+ *cmd,
671
+ stdin=asyncio.subprocess.PIPE,
672
+ stdout=asyncio.subprocess.PIPE,
673
+ stderr=asyncio.subprocess.PIPE,
674
+ env=env
675
+ )
676
+
677
+ try:
678
+ # Wait for subprocess with timeout, sending JSON via stdin
679
+ stdout, stderr = await asyncio.wait_for(
680
+ process.communicate(input=stdin_json.encode('utf-8')),
681
+ timeout=3600 # 60 minute timeout
682
+ )
683
+
684
+ # Check result based on exit code
685
+ if process.returncode == 0:
686
+ # Success - task completed
687
+ stdout_content = stdout.decode('utf-8', errors='replace') if stdout else ''
688
+ stderr_content = stderr.decode('utf-8', errors='replace') if stderr else ''
689
+
690
+ logger.info("Subprocess task completed successfully",
691
+ task_id=queued_task.task_id,
692
+ container=dev_name,
693
+ return_code=process.returncode)
694
+
695
+ # Log stdout and stderr for debugging (even on success)
696
+ if stdout_content:
697
+ logger.info("Subprocess stdout",
698
+ task_id=queued_task.task_id,
699
+ container=dev_name,
700
+ stdout=stdout_content[:2000]) # First 2000 chars
701
+
702
+ if stderr_content:
703
+ logger.info("Subprocess stderr",
704
+ task_id=queued_task.task_id,
705
+ container=dev_name,
706
+ stderr=stderr_content[:8000]) # First 8000 chars for debugging
707
+
708
+ # Try to extract Claude's output from JSON if possible (for logging)
709
+ try:
710
+ result_data = json.loads(stdout_content)
711
+ output_preview = result_data.get('output', '')[:200]
712
+ logger.info("Task output preview",
713
+ task_id=queued_task.task_id,
714
+ output_preview=output_preview)
715
+ except:
716
+ # If JSON parsing fails, just log that task succeeded
717
+ pass
718
+ else:
719
+ # Failure - post error to GitHub
720
+ stdout_content = stdout.decode('utf-8', errors='replace') if stdout else ''
721
+ stderr_content = stderr.decode('utf-8', errors='replace') if stderr else ''
722
+
723
+ # Try to extract error from JSON if possible
724
+ error_msg = f"Task failed with exit code {process.returncode}"
725
+ try:
726
+ error_data = json.loads(stdout_content)
727
+ if error_data.get('error'):
728
+ error_msg = error_data['error']
729
+ except:
730
+ pass
731
+
732
+ logger.error("Subprocess task failed",
733
+ task_id=queued_task.task_id,
734
+ container=dev_name,
735
+ return_code=process.returncode,
736
+ error=error_msg)
737
+
738
+ # Log stdout and stderr for debugging
739
+ if stdout_content:
740
+ logger.error("Subprocess stdout",
741
+ task_id=queued_task.task_id,
742
+ container=dev_name,
743
+ stdout=stdout_content[:2000]) # First 2000 chars
744
+
745
+ if stderr_content:
746
+ logger.error("Subprocess stderr",
747
+ task_id=queued_task.task_id,
748
+ container=dev_name,
749
+ stderr=stderr_content[:2000]) # First 2000 chars
750
+
751
+ # Post error to GitHub with both stdout and stderr
752
+ error_details = f"Task processing failed with exit code {process.returncode}\n\n"
753
+ if error_msg != f"Task failed with exit code {process.returncode}":
754
+ error_details += f"Error: {error_msg}\n\n"
755
+ if stderr_content:
756
+ error_details += f"Stderr output:\n```\n{stderr_content[:1500]}\n```\n\n"
757
+ if stdout_content and not stdout_content.startswith('{'):
758
+ # Include stdout if it's not JSON
759
+ error_details += f"Stdout output:\n```\n{stdout_content[:1500]}\n```"
760
+
761
+ await self._post_subprocess_error_to_github(
762
+ queued_task,
763
+ error_details
764
+ )
765
+
766
+ except asyncio.TimeoutError:
767
+ logger.error("Subprocess task timed out",
768
+ task_id=queued_task.task_id,
769
+ container=dev_name,
770
+ timeout_seconds=3600)
771
+
772
+ # Kill the subprocess
773
+ process.kill()
774
+ await process.wait()
775
+
776
+ # Post timeout error to GitHub
777
+ await self._post_subprocess_error_to_github(
778
+ queued_task,
779
+ "Task processing timed out after 60 minutes. The task may have been too complex or encountered an issue."
780
+ )
781
+
782
+ # Don't raise exception - just log the timeout
783
+
784
+ except Exception as e:
785
+ logger.error("Subprocess task processing failed",
786
+ task_id=queued_task.task_id,
787
+ container=dev_name,
788
+ repo_name=repo_name,
789
+ repo_path=str(repo_path),
790
+ error=str(e),
791
+ error_type=type(e).__name__,
792
+ exc_info=True)
793
+
794
+ # Post error to GitHub for any other exceptions
795
+ await self._post_subprocess_error_to_github(
796
+ queued_task,
797
+ f"Task processing encountered an error: {type(e).__name__}\n\n{str(e)}"
798
+ )
799
+
800
+ # Task execution failed, but we've logged it - don't re-raise
801
+
802
+ async def _ensure_repository_cloned(
803
+ self,
804
+ repo_name: str,
805
+ repo_path: Path
806
+ ) -> DevsOptions:
807
+ """Ensure repository is cloned to the workspace directory.
808
+
809
+ Uses a simple strategy: if repository exists but pull fails,
810
+ remove it and do a fresh clone.
811
+
812
+ Args:
813
+ repo_name: Repository name (owner/repo)
814
+ repo_path: Path where repository should be cloned
815
+
816
+ Returns:
817
+ DevsOptions parsed from DEVS.yml or defaults
818
+ """
819
+ logger.info("Checking repository status",
820
+ repo=repo_name,
821
+ repo_path=str(repo_path),
822
+ exists=repo_path.exists())
823
+
824
+ if repo_path.exists():
825
+ # Repository already exists, try to pull latest changes
826
+ try:
827
+ logger.info("Repository exists, attempting to pull latest changes",
828
+ repo=repo_name,
829
+ repo_path=str(repo_path))
830
+
831
+ # Set up authentication for private repos
832
+ if self.config.github_token:
833
+ # Configure the token for this specific repo
834
+ remote_url = f"https://{self.config.github_token}@github.com/{repo_name}.git"
835
+ set_remote_cmd = ["git", "-C", str(repo_path), "remote", "set-url", "origin", remote_url]
836
+ await asyncio.create_subprocess_exec(*set_remote_cmd)
837
+
838
+ # Try to pull - using main as default, but this might fail
839
+ cmd = ["git", "-C", str(repo_path), "pull"]
840
+ process = await asyncio.create_subprocess_exec(
841
+ *cmd,
842
+ stdout=asyncio.subprocess.PIPE,
843
+ stderr=asyncio.subprocess.PIPE
844
+ )
845
+ stdout, stderr = await process.communicate()
846
+
847
+ if process.returncode == 0:
848
+ logger.info("Git pull succeeded",
849
+ repo=repo_name,
850
+ stdout=stdout.decode()[:200] if stdout else "")
851
+ logger.info("Repository updated", repo=repo_name, path=str(repo_path))
852
+ else:
853
+ # Pull failed - remove and re-clone
854
+ logger.warning("Git pull failed, removing and re-cloning",
855
+ repo=repo_name,
856
+ return_code=process.returncode,
857
+ stderr=stderr.decode()[:200] if stderr else "")
858
+
859
+ # Remove the existing directory
860
+ logger.info("Removing existing repository directory",
861
+ repo=repo_name,
862
+ repo_path=str(repo_path))
863
+ shutil.rmtree(repo_path)
864
+
865
+ # Now fall through to clone logic
866
+
867
+ except Exception as e:
868
+ logger.warning("Failed to update repository, removing and re-cloning",
869
+ repo=repo_name,
870
+ error=str(e),
871
+ error_type=type(e).__name__)
872
+
873
+ # Remove the existing directory
874
+ try:
875
+ shutil.rmtree(repo_path)
876
+ logger.info("Removed existing repository directory",
877
+ repo=repo_name,
878
+ repo_path=str(repo_path))
879
+ except Exception as rm_error:
880
+ logger.error("Failed to remove repository directory",
881
+ repo=repo_name,
882
+ repo_path=str(repo_path),
883
+ error=str(rm_error))
884
+ raise
885
+
886
+ # If we get here, either the repo didn't exist or we removed it
887
+ if not repo_path.exists():
888
+ # Clone the repository
889
+ try:
890
+ logger.info("Repository does not exist, cloning",
891
+ repo=repo_name,
892
+ repo_path=str(repo_path))
893
+
894
+ repo_path.parent.mkdir(parents=True, exist_ok=True)
895
+
896
+ # Use GitHub token for authentication
897
+ if self.config.github_token:
898
+ clone_url = f"https://{self.config.github_token}@github.com/{repo_name}.git"
899
+ else:
900
+ clone_url = f"https://github.com/{repo_name}.git"
901
+
902
+ cmd = ["git", "clone", clone_url, str(repo_path)]
903
+
904
+ # Don't log the token!
905
+ safe_url = f"https://github.com/{repo_name}.git"
906
+ logger.info("Starting git clone",
907
+ repo=repo_name,
908
+ clone_url=safe_url,
909
+ target_path=str(repo_path))
910
+
911
+ process = await asyncio.create_subprocess_exec(
912
+ *cmd,
913
+ stdout=asyncio.subprocess.PIPE,
914
+ stderr=asyncio.subprocess.PIPE
915
+ )
916
+
917
+ stdout, stderr = await process.communicate()
918
+
919
+ logger.info("Git clone completed",
920
+ repo=repo_name,
921
+ return_code=process.returncode,
922
+ stdout=stdout.decode()[:200] if stdout else "",
923
+ stderr=stderr.decode()[:200] if stderr else "")
924
+
925
+ if process.returncode == 0:
926
+ logger.info("Repository cloned successfully",
927
+ repo=repo_name,
928
+ path=str(repo_path))
929
+ else:
930
+ error_msg = stderr.decode('utf-8', errors='replace')
931
+ logger.error("Failed to clone repository",
932
+ repo=repo_name,
933
+ error=error_msg)
934
+ raise Exception(f"Git clone failed: {error_msg}")
935
+
936
+ except Exception as e:
937
+ logger.error("Repository cloning failed",
938
+ repo=repo_name,
939
+ error=str(e))
940
+ raise
941
+
942
+ # Read DEVS.yml configuration using shared method
943
+ devs_options = self._read_devs_options(repo_path, repo_name)
944
+ return devs_options
945
+
946
+ async def shutdown(self) -> None:
947
+ """Shutdown the container pool and all workers."""
948
+ logger.info("Shutting down container pool")
949
+
950
+ # Cancel the cleanup worker
951
+ self.cleanup_worker.cancel()
952
+ try:
953
+ await self.cleanup_worker
954
+ except asyncio.CancelledError:
955
+ pass
956
+
957
+ # Cancel all worker tasks
958
+ for dev_name, worker_task in self.container_workers.items():
959
+ worker_task.cancel()
960
+
961
+ try:
962
+ await worker_task
963
+ except asyncio.CancelledError:
964
+ pass
965
+
966
+ logger.info("Worker shut down", container=dev_name)
967
+
968
+ # Clean up any remaining running containers
969
+ async with self._lock:
970
+ for dev_name, info in self.running_containers.items():
971
+ await self._cleanup_container(dev_name, info["repo_path"])
972
+
973
+ logger.info("Container pool shutdown complete")
974
+
975
+ async def _post_subprocess_error_to_github(self, queued_task: QueuedTask, error_message: str) -> None:
976
+ """Post an error message to GitHub when subprocess fails.
977
+
978
+ Args:
979
+ queued_task: The task that failed
980
+ error_message: Error message to post
981
+ """
982
+ try:
983
+ # Skip GitHub operations for test events
984
+ if queued_task.event.is_test:
985
+ logger.info("Skipping GitHub error comment for test event",
986
+ error=error_message[:200])
987
+ return
988
+
989
+ # Create GitHub client
990
+ github_client = GitHubClient(self.config)
991
+
992
+ # Build error comment
993
+ comment = f"""I encountered an error while processing your request:
994
+
995
+ {error_message}
996
+
997
+ Please check the webhook handler logs for more details, or try mentioning me again."""
998
+
999
+ # Post comment based on event type
1000
+ repo_name = queued_task.event.repository.full_name
1001
+
1002
+ if isinstance(queued_task.event, IssueEvent):
1003
+ await github_client.comment_on_issue(
1004
+ repo_name, queued_task.event.issue.number, comment
1005
+ )
1006
+ elif isinstance(queued_task.event, PullRequestEvent):
1007
+ await github_client.comment_on_pr(
1008
+ repo_name, queued_task.event.pull_request.number, comment
1009
+ )
1010
+ elif isinstance(queued_task.event, CommentEvent):
1011
+ if queued_task.event.issue:
1012
+ await github_client.comment_on_issue(
1013
+ repo_name, queued_task.event.issue.number, comment
1014
+ )
1015
+ elif queued_task.event.pull_request:
1016
+ await github_client.comment_on_pr(
1017
+ repo_name, queued_task.event.pull_request.number, comment
1018
+ )
1019
+
1020
+ logger.info("Posted error comment to GitHub",
1021
+ task_id=queued_task.task_id,
1022
+ repo=repo_name)
1023
+
1024
+ except Exception as e:
1025
+ logger.error("Failed to post error to GitHub",
1026
+ task_id=queued_task.task_id,
1027
+ error=str(e))
1028
+
1029
+ async def get_status(self) -> Dict[str, Any]:
1030
+ """Get current pool status."""
1031
+ async with self._lock:
1032
+ return {
1033
+ "container_queues": {
1034
+ name: queue.qsize()
1035
+ for name, queue in self.container_queues.items()
1036
+ },
1037
+ "running_containers": {
1038
+ name: {
1039
+ "repo_path": str(info["repo_path"]),
1040
+ "last_used": info["last_used"].isoformat(),
1041
+ }
1042
+ for name, info in self.running_containers.items()
1043
+ },
1044
+ "total_containers": len(self.config.get_container_pool_list()),
1045
+ "single_queue_assignments": self.single_queue_assignments.copy(),
1046
+ "cached_repo_configs": list(self.repo_configs.keys()),
1047
+ }
1048
+
1049
+ async def _idle_cleanup_worker(self) -> None:
1050
+ """Periodically clean up idle containers."""
1051
+ while True:
1052
+ try:
1053
+ await asyncio.sleep(60) # Check every minute
1054
+
1055
+ async with self._lock:
1056
+ now = datetime.now(tz=timezone.utc)
1057
+ idle_timeout = timedelta(minutes=self.config.container_timeout_minutes)
1058
+
1059
+ idle_containers = []
1060
+ for dev_name, info in self.running_containers.items():
1061
+ if now - info["last_used"] > idle_timeout:
1062
+ idle_containers.append((dev_name, info["repo_path"]))
1063
+
1064
+ for dev_name, repo_path in idle_containers:
1065
+ logger.info("Container idle, cleaning up", container=dev_name)
1066
+ await self._cleanup_container(dev_name, repo_path)
1067
+ del self.running_containers[dev_name]
1068
+
1069
+ except asyncio.CancelledError:
1070
+ logger.info("Idle cleanup worker cancelled")
1071
+ break
1072
+ except Exception as e:
1073
+ logger.error("Error in idle cleanup worker", error=str(e))
1074
+
1075
+
1076
+ async def _cleanup_container(self, dev_name: str, repo_path: Path) -> None:
1077
+ """Clean up a container after use.
1078
+
1079
+ Args:
1080
+ dev_name: Name of container to clean up
1081
+ repo_path: Path to repository on host
1082
+ """
1083
+ try:
1084
+ # Create project and managers for cleanup
1085
+ project = Project(repo_path)
1086
+
1087
+ # Use the same config as the rest of the webhook handler
1088
+ workspace_manager = WorkspaceManager(project, self.config)
1089
+ container_manager = ContainerManager(project, self.config)
1090
+
1091
+ # Stop container
1092
+ logger.info("Starting container stop", container=dev_name)
1093
+ stop_success = container_manager.stop_container(dev_name)
1094
+ logger.info("Container stop result", container=dev_name, success=stop_success)
1095
+
1096
+ # Remove workspace
1097
+ logger.info("Starting workspace removal", container=dev_name)
1098
+ workspace_success = workspace_manager.remove_workspace(dev_name)
1099
+ logger.info("Workspace removal result", container=dev_name, success=workspace_success)
1100
+
1101
+ logger.info("Container cleanup complete",
1102
+ container=dev_name,
1103
+ container_stopped=stop_success,
1104
+ workspace_removed=workspace_success)
1105
+
1106
+ except Exception as e:
1107
+ logger.error("Container cleanup failed",
1108
+ container=dev_name,
1109
+ error=str(e))