caption-flow 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
caption_flow/cli.py ADDED
@@ -0,0 +1,709 @@
1
+ """Command-line interface for CaptionFlow with smart configuration handling."""
2
+
3
+ import asyncio
4
+ import json
5
+ import logging
6
+ import os
7
+ import sys
8
+ from pathlib import Path
9
+ from typing import Optional, Dict, Any, List
10
+
11
+ import click
12
+ import yaml
13
+ from rich.console import Console
14
+ from rich.logging import RichHandler
15
+ from datetime import datetime
16
+
17
+ from .orchestrator import Orchestrator
18
+ from .worker import Worker
19
+ from .monitor import Monitor
20
+ from .utils.certificates import CertificateManager
21
+
22
+ console = Console()
23
+
24
+
25
+ class ConfigManager:
26
+ """Smart configuration discovery and management following XDG Base Directory spec."""
27
+
28
+ CONFIG_NAMES = {
29
+ "orchestrator": "orchestrator.yaml",
30
+ "worker": "worker.yaml",
31
+ "monitor": "monitor.yaml",
32
+ }
33
+
34
+ @classmethod
35
+ def get_xdg_config_home(cls) -> Path:
36
+ """Get XDG_CONFIG_HOME or default."""
37
+ xdg_config = os.environ.get("XDG_CONFIG_HOME")
38
+ if xdg_config:
39
+ return Path(xdg_config)
40
+ return Path.home() / ".config"
41
+
42
+ @classmethod
43
+ def get_xdg_config_dirs(cls) -> List[Path]:
44
+ """Get XDG_CONFIG_DIRS or defaults."""
45
+ xdg_dirs = os.environ.get("XDG_CONFIG_DIRS", "/etc/xdg").split(":")
46
+ return [Path(d) for d in xdg_dirs]
47
+
48
+ @classmethod
49
+ def find_config(
50
+ cls, component: str, explicit_path: Optional[str] = None
51
+ ) -> Optional[Dict[str, Any]]:
52
+ """
53
+ Find and load configuration for a component.
54
+
55
+ Search order:
56
+ 1. Explicit path if provided
57
+ 2. Current directory
58
+ 3. ~/.caption-flow/<component_config>.yaml
59
+ 4. $XDG_CONFIG_HOME/caption-flow/<component_config>.yaml
60
+ 5. /etc/caption-flow/<component_config>.yaml (system-wide)
61
+ 6. $XDG_CONFIG_DIRS/caption-flow/<component_config>.yaml
62
+ 7. ./examples/<component_config>.yaml (fallback)
63
+ """
64
+ config_name = cls.CONFIG_NAMES.get(component, "config.yaml")
65
+
66
+ # If explicit path provided, use only that
67
+ if explicit_path:
68
+ path = Path(explicit_path)
69
+ if path.exists():
70
+ console.print(f"[dim]Using config: {path}[/dim]")
71
+ return cls.load_yaml(path)
72
+ console.print(f"[yellow]Config not found: {path}[/yellow]")
73
+ return None
74
+
75
+ # Search paths in order
76
+ search_paths = [
77
+ Path.cwd() / config_name, # Current directory
78
+ Path.cwd() / "config" / config_name, # Current directory / config subdir
79
+ Path.home() / ".caption-flow" / config_name, # Home directory
80
+ cls.get_xdg_config_home() / "caption-flow" / config_name, # XDG config home
81
+ Path("/etc/caption-flow") / config_name, # System-wide
82
+ ]
83
+
84
+ # Add XDG config dirs
85
+ for xdg_dir in cls.get_xdg_config_dirs():
86
+ search_paths.append(xdg_dir / "caption-flow" / config_name)
87
+
88
+ # Fallback to examples
89
+ search_paths.append(Path("examples") / config_name)
90
+
91
+ # Try each path
92
+ for path in search_paths:
93
+ if path.exists():
94
+ console.print(f"[dim]Found config: {path}[/dim]")
95
+ return cls.load_yaml(path)
96
+
97
+ return None
98
+
99
+ @classmethod
100
+ def load_yaml(cls, path: Path) -> Optional[Dict[str, Any]]:
101
+ """Load and parse YAML config file."""
102
+ try:
103
+ with open(path) as f:
104
+ return yaml.safe_load(f) or {}
105
+ except Exception as e:
106
+ console.print(f"[red]Error loading {path}: {e}[/red]")
107
+ return None
108
+
109
+ @classmethod
110
+ def merge_configs(cls, base: Dict[str, Any], override: Dict[str, Any]) -> Dict[str, Any]:
111
+ """Deep merge override config into base config."""
112
+ result = base.copy()
113
+
114
+ for key, value in override.items():
115
+ if key in result and isinstance(result[key], dict) and isinstance(value, dict):
116
+ result[key] = cls.merge_configs(result[key], value)
117
+ else:
118
+ result[key] = value
119
+
120
+ return result
121
+
122
+
123
+ def setup_logging(verbose: bool = False):
124
+ """Configure logging with rich handler."""
125
+ level = logging.DEBUG if verbose else logging.INFO
126
+ logging.basicConfig(
127
+ level=level,
128
+ format="%(message)s",
129
+ handlers=[
130
+ RichHandler(console=console, rich_tracebacks=True, show_path=False, show_time=False)
131
+ ],
132
+ )
133
+
134
+
135
+ def apply_cli_overrides(config: Dict[str, Any], **kwargs) -> Dict[str, Any]:
136
+ """Apply CLI arguments as overrides to config, filtering out None values."""
137
+ overrides = {k: v for k, v in kwargs.items() if v is not None}
138
+ return ConfigManager.merge_configs(config, overrides)
139
+
140
+
141
+ @click.group()
142
+ @click.option("--verbose", is_flag=True, help="Enable verbose logging")
143
+ @click.pass_context
144
+ def main(ctx, verbose: bool):
145
+ """CaptionFlow - Distributed community captioning system."""
146
+ setup_logging(verbose)
147
+ ctx.obj = {"verbose": verbose}
148
+
149
+
150
+ @main.command()
151
+ @click.option("--config", type=click.Path(exists=True), help="Configuration file")
152
+ @click.option("--port", type=int, help="WebSocket server port")
153
+ @click.option("--host", help="Bind address")
154
+ @click.option("--data-dir", help="Storage directory")
155
+ @click.option("--cert", help="SSL certificate path")
156
+ @click.option("--key", help="SSL key path")
157
+ @click.option("--no-ssl", is_flag=True, help="Disable SSL (development only)")
158
+ @click.option("--vllm", is_flag=True, help="Use vLLM orchestrator for WebDataset/HF datasets")
159
+ @click.pass_context
160
+ def orchestrator(ctx, config: Optional[str], **kwargs):
161
+ """Start the orchestrator server."""
162
+ # Load configuration
163
+ base_config = ConfigManager.find_config("orchestrator", config) or {}
164
+
165
+ # Extract orchestrator section if it exists
166
+ if "orchestrator" in base_config:
167
+ config_data = base_config["orchestrator"]
168
+ else:
169
+ config_data = base_config
170
+
171
+ console.print(f"Config contents: {config_data}")
172
+
173
+ # Apply CLI overrides
174
+ if kwargs.get("port"):
175
+ config_data["port"] = kwargs["port"]
176
+ if kwargs.get("host"):
177
+ config_data["host"] = kwargs["host"]
178
+ if kwargs.get("data_dir"):
179
+ config_data.setdefault("storage", {})["data_dir"] = kwargs["data_dir"]
180
+
181
+ # Handle SSL configuration
182
+ if not kwargs.get("no_ssl"):
183
+ if kwargs.get("cert") and kwargs.get("key"):
184
+ config_data.setdefault("ssl", {})
185
+ config_data["ssl"]["cert"] = kwargs["cert"]
186
+ config_data["ssl"]["key"] = kwargs["key"]
187
+ elif not config_data.get("ssl"):
188
+ console.print(
189
+ "[yellow]Warning: Running without SSL. Use --cert and --key for production.[/yellow]"
190
+ )
191
+
192
+ if kwargs.get("vllm") and "vllm" not in config_data:
193
+ raise ValueError("Must provide vLLM config.")
194
+
195
+ orchestrator_instance = Orchestrator(config_data)
196
+
197
+ try:
198
+ asyncio.run(orchestrator_instance.start())
199
+ except KeyboardInterrupt:
200
+ console.print("\n[yellow]Shutting down orchestrator...[/yellow]")
201
+ asyncio.run(orchestrator_instance.shutdown())
202
+
203
+
204
+ @main.command()
205
+ @click.option("--config", type=click.Path(exists=True), help="Configuration file")
206
+ @click.option("--server", help="Orchestrator WebSocket URL")
207
+ @click.option("--token", help="Worker authentication token")
208
+ @click.option("--name", help="Worker display name")
209
+ @click.option("--batch-size", type=int, help="Inference batch size")
210
+ @click.option("--no-verify-ssl", is_flag=True, help="Skip SSL verification")
211
+ @click.option("--vllm", is_flag=True, help="Use vLLM worker for GPU inference")
212
+ @click.option("--gpu-id", type=int, help="GPU device ID (for vLLM)")
213
+ @click.option("--precision", help="Model precision (for vLLM)")
214
+ @click.option("--model", help="Model name (for vLLM)")
215
+ @click.pass_context
216
+ def worker(ctx, config: Optional[str], **kwargs):
217
+ """Start a worker node."""
218
+ # Load configuration
219
+ base_config = ConfigManager.find_config("worker", config) or {}
220
+
221
+ # Extract worker section if it exists
222
+ if "worker" in base_config:
223
+ config_data = base_config["worker"]
224
+ else:
225
+ config_data = base_config
226
+
227
+ # Apply CLI overrides (only non-None values)
228
+ for key in ["server", "token", "name", "batch_size", "gpu_id", "precision", "model"]:
229
+ if kwargs.get(key) is not None:
230
+ config_data[key] = kwargs[key]
231
+
232
+ if kwargs.get("no_verify_ssl"):
233
+ config_data["verify_ssl"] = False
234
+
235
+ # Validate required fields
236
+ if not config_data.get("server"):
237
+ console.print("[red]Error: --server required (or set in config)[/red]")
238
+ sys.exit(1)
239
+ if not config_data.get("token"):
240
+ console.print("[red]Error: --token required (or set in config)[/red]")
241
+ sys.exit(1)
242
+
243
+ # Choose worker type
244
+ if kwargs.get("vllm") or config_data.get("vllm"):
245
+ from .worker_vllm import VLLMWorker
246
+
247
+ worker_instance = VLLMWorker(config_data)
248
+ else:
249
+ worker_instance = Worker(config_data)
250
+
251
+ try:
252
+ asyncio.run(worker_instance.start())
253
+ except KeyboardInterrupt:
254
+ console.print("\n[yellow]Shutting down worker...[/yellow]")
255
+ asyncio.run(worker_instance.shutdown())
256
+
257
+
258
+ @main.command()
259
+ @click.option("--config", type=click.Path(exists=True), help="Configuration file")
260
+ @click.option("--server", help="Orchestrator WebSocket URL")
261
+ @click.option("--token", help="Authentication token")
262
+ @click.option("--no-verify-ssl", is_flag=True, help="Skip SSL verification")
263
+ @click.option("--debug", is_flag=True, help="Enable debug output")
264
+ @click.pass_context
265
+ def monitor(ctx, config: Optional[str], server: Optional[str], token: Optional[str],
266
+ no_verify_ssl: bool, debug: bool):
267
+ """Start the monitoring TUI."""
268
+
269
+ # Enable debug logging if requested
270
+ if debug:
271
+ setup_logging(verbose=True)
272
+ console.print("[yellow]Debug mode enabled[/yellow]")
273
+
274
+ # Load configuration
275
+ base_config = ConfigManager.find_config('monitor', config)
276
+
277
+ if not base_config:
278
+ # Try to find monitor config in orchestrator config as fallback
279
+ orch_config = ConfigManager.find_config('orchestrator')
280
+ if orch_config and 'monitor' in orch_config:
281
+ base_config = {'monitor': orch_config['monitor']}
282
+ console.print("[dim]Using monitor config from orchestrator.yaml[/dim]")
283
+ else:
284
+ base_config = {}
285
+ if not server or not token:
286
+ console.print("[yellow]No monitor config found, using CLI args[/yellow]")
287
+
288
+ # Handle different config structures
289
+ # Case 1: Config has top-level 'monitor' section
290
+ if 'monitor' in base_config:
291
+ config_data = base_config['monitor']
292
+ # Case 2: Config IS the monitor config (no wrapper)
293
+ else:
294
+ config_data = base_config
295
+
296
+ # Apply CLI overrides (CLI always wins)
297
+ if server:
298
+ config_data['server'] = server
299
+ if token:
300
+ config_data['token'] = token
301
+ if no_verify_ssl:
302
+ config_data['verify_ssl'] = False
303
+
304
+ # Debug output
305
+ if debug:
306
+ console.print("\n[cyan]Final monitor configuration:[/cyan]")
307
+ console.print(f" Server: {config_data.get('server', 'NOT SET')}")
308
+ console.print(f" Token: {'***' + config_data.get('token', '')[-4:] if config_data.get('token') else 'NOT SET'}")
309
+ console.print(f" Verify SSL: {config_data.get('verify_ssl', True)}")
310
+ console.print()
311
+
312
+ # Validate required fields
313
+ if not config_data.get('server'):
314
+ console.print("[red]Error: --server required (or set 'server' in monitor.yaml)[/red]")
315
+ console.print("\n[dim]Example monitor.yaml:[/dim]")
316
+ console.print("server: wss://localhost:8765")
317
+ console.print("token: your-token-here")
318
+ sys.exit(1)
319
+
320
+ if not config_data.get('token'):
321
+ console.print("[red]Error: --token required (or set 'token' in monitor.yaml)[/red]")
322
+ console.print("\n[dim]Example monitor.yaml:[/dim]")
323
+ console.print("server: wss://localhost:8765")
324
+ console.print("token: your-token-here")
325
+ sys.exit(1)
326
+
327
+ # Set defaults for optional settings
328
+ config_data.setdefault('refresh_interval', 1.0)
329
+ config_data.setdefault('show_inactive_workers', False)
330
+ config_data.setdefault('max_log_lines', 100)
331
+
332
+ # Create and start monitor
333
+ try:
334
+ monitor_instance = Monitor(config_data)
335
+
336
+ if debug:
337
+ console.print("[green]Starting monitor...[/green]")
338
+ console.print(f"[dim]Connecting to: {config_data['server']}[/dim]")
339
+ sys.exit(1)
340
+
341
+ asyncio.run(monitor_instance.start())
342
+
343
+ except KeyboardInterrupt:
344
+ console.print("\n[yellow]Closing monitor...[/yellow]")
345
+ except ConnectionRefusedError:
346
+ console.print(f"\n[red]Error: Cannot connect to {config_data['server']}[/red]")
347
+ console.print("[yellow]Check that the orchestrator is running and accessible[/yellow]")
348
+ sys.exit(1)
349
+ except Exception as e:
350
+ console.print(f"\n[red]Error starting monitor: {e}[/red]")
351
+ if debug:
352
+ import traceback
353
+ traceback.print_exc()
354
+ sys.exit(1)
355
+
356
+ @main.command()
357
+ @click.option("--config", type=click.Path(exists=True), help="Configuration file")
358
+ @click.option("--server", help="Orchestrator WebSocket URL")
359
+ @click.option("--token", help="Admin authentication token")
360
+ @click.option(
361
+ "--new-config", type=click.Path(exists=True), required=True, help="New configuration file"
362
+ )
363
+ @click.option("--no-verify-ssl", is_flag=True, help="Skip SSL verification")
364
+ def reload_config(
365
+ config: Optional[str],
366
+ server: Optional[str],
367
+ token: Optional[str],
368
+ new_config: str,
369
+ no_verify_ssl: bool,
370
+ ):
371
+ """Reload orchestrator configuration via admin connection."""
372
+ import websockets
373
+ import ssl
374
+
375
+ # Load base config to get server/token if not provided via CLI
376
+ if not server or not token:
377
+ base_config = ConfigManager.find_config("orchestrator", config) or {}
378
+ admin_config = base_config.get("admin", {})
379
+
380
+ if not server:
381
+ server = admin_config.get("server")
382
+ if not token:
383
+ token = admin_config.get("token")
384
+
385
+ if not server:
386
+ console.print("[red]Error: --server required (or set in config)[/red]")
387
+ sys.exit(1)
388
+ if not token:
389
+ console.print("[red]Error: --token required (or set in config)[/red]")
390
+ sys.exit(1)
391
+
392
+ console.print(f"[cyan]Loading configuration from {new_config}...[/cyan]")
393
+
394
+ # Load the new configuration
395
+ new_cfg = ConfigManager.load_yaml(Path(new_config))
396
+ if not new_cfg:
397
+ console.print("[red]Failed to load configuration[/red]")
398
+ sys.exit(1)
399
+
400
+ # Setup SSL
401
+ ssl_context = None
402
+ if server.startswith("wss://"):
403
+ if no_verify_ssl:
404
+ ssl_context = ssl.create_default_context()
405
+ ssl_context.check_hostname = False
406
+ ssl_context.verify_mode = ssl.CERT_NONE
407
+ else:
408
+ ssl_context = ssl.create_default_context()
409
+
410
+ async def send_reload():
411
+ try:
412
+ async with websockets.connect(server, ssl=ssl_context) as websocket:
413
+ # Authenticate as admin
414
+ await websocket.send(json.dumps({"token": token, "role": "admin"}))
415
+
416
+ response = await websocket.recv()
417
+ auth_response = json.loads(response)
418
+
419
+ if "error" in auth_response:
420
+ console.print(f"[red]Authentication failed: {auth_response['error']}[/red]")
421
+ return False
422
+
423
+ console.print("[green]✓ Authenticated as admin[/green]")
424
+
425
+ # Send reload command
426
+ await websocket.send(json.dumps({"type": "reload_config", "config": new_cfg}))
427
+
428
+ response = await websocket.recv()
429
+ reload_response = json.loads(response)
430
+
431
+ if reload_response.get("type") == "reload_complete":
432
+ if "message" in reload_response and "No changes" in reload_response["message"]:
433
+ console.print(f"[yellow]{reload_response['message']}[/yellow]")
434
+ else:
435
+ console.print("[green]✓ Configuration reloaded successfully![/green]")
436
+
437
+ if "updated" in reload_response and reload_response["updated"]:
438
+ console.print("\n[cyan]Updated sections:[/cyan]")
439
+ for section in reload_response["updated"]:
440
+ console.print(f" • {section}")
441
+
442
+ if "warnings" in reload_response and reload_response["warnings"]:
443
+ console.print("\n[yellow]Warnings:[/yellow]")
444
+ for warning in reload_response["warnings"]:
445
+ console.print(f" ⚠ {warning}")
446
+
447
+ return True
448
+ else:
449
+ error = reload_response.get("error", "Unknown error")
450
+ console.print(f"[red]Reload failed: {error}[/red]")
451
+ return False
452
+
453
+ except Exception as e:
454
+ console.print(f"[red]Error: {e}[/red]")
455
+ return False
456
+
457
+ success = asyncio.run(send_reload())
458
+ if not success:
459
+ sys.exit(1)
460
+
461
+
462
+ @main.command()
463
+ @click.option("--data-dir", default="./caption_data", help="Storage directory")
464
+ @click.option("--checkpoint-dir", default="./checkpoints", help="Checkpoint directory")
465
+ @click.option("--fix", is_flag=True, help="Fix issues by resetting abandoned chunks")
466
+ @click.option("--verbose", is_flag=True, help="Show detailed information")
467
+ def scan_chunks(data_dir: str, checkpoint_dir: str, fix: bool, verbose: bool):
468
+ """Scan for sparse or abandoned chunks and optionally fix them."""
469
+ from .utils.chunk_tracker import ChunkTracker
470
+ from .storage import StorageManager
471
+ import pyarrow.parquet as pq
472
+
473
+ console.print("[bold cyan]Scanning for sparse/abandoned chunks...[/bold cyan]\n")
474
+
475
+ checkpoint_path = Path(checkpoint_dir) / "chunks.json"
476
+ if not checkpoint_path.exists():
477
+ console.print("[red]No chunk checkpoint found![/red]")
478
+ return
479
+
480
+ tracker = ChunkTracker(checkpoint_path)
481
+ storage = StorageManager(Path(data_dir))
482
+
483
+ # Get and display stats
484
+ stats = tracker.get_stats()
485
+ console.print(f"[green]Total chunks:[/green] {stats['total']}")
486
+ console.print(f"[green]Completed:[/green] {stats['completed']}")
487
+ console.print(f"[yellow]Pending:[/yellow] {stats['pending']}")
488
+ console.print(f"[yellow]Assigned:[/yellow] {stats['assigned']}")
489
+ console.print(f"[red]Failed:[/red] {stats['failed']}\n")
490
+
491
+ # Find abandoned chunks
492
+ abandoned_chunks = []
493
+ stale_threshold = 3600 # 1 hour
494
+ current_time = datetime.utcnow()
495
+
496
+ for chunk_id, chunk_state in tracker.chunks.items():
497
+ if chunk_state.status == "assigned" and chunk_state.assigned_at:
498
+ age = (current_time - chunk_state.assigned_at).total_seconds()
499
+ if age > stale_threshold:
500
+ abandoned_chunks.append((chunk_id, chunk_state, age))
501
+
502
+ if abandoned_chunks:
503
+ console.print(f"[red]Found {len(abandoned_chunks)} abandoned chunks:[/red]")
504
+ for chunk_id, chunk_state, age in abandoned_chunks[:10]:
505
+ age_str = f"{age/3600:.1f} hours" if age > 3600 else f"{age/60:.1f} minutes"
506
+ console.print(f" • {chunk_id} (assigned to {chunk_state.assigned_to} {age_str} ago)")
507
+
508
+ if len(abandoned_chunks) > 10:
509
+ console.print(f" ... and {len(abandoned_chunks) - 10} more")
510
+
511
+ if fix:
512
+ console.print("\n[yellow]Resetting abandoned chunks to pending...[/yellow]")
513
+ for chunk_id, _, _ in abandoned_chunks:
514
+ tracker.mark_failed(chunk_id)
515
+ console.print(f"[green]✓ Reset {len(abandoned_chunks)} chunks[/green]")
516
+
517
+ # Check for sparse shards
518
+ console.print("\n[bold cyan]Checking for sparse shards...[/bold cyan]")
519
+
520
+ shards_summary = tracker.get_shards_summary()
521
+ sparse_shards = []
522
+
523
+ for shard_name, shard_info in shards_summary.items():
524
+ if not shard_info["is_complete"]:
525
+ chunks = sorted(shard_info["chunks"], key=lambda c: c.start_index)
526
+ expected_index = 0
527
+ has_gaps = False
528
+
529
+ for chunk in chunks:
530
+ if chunk.start_index != expected_index:
531
+ has_gaps = True
532
+ break
533
+ expected_index = chunk.start_index + chunk.chunk_size
534
+
535
+ if has_gaps or shard_info["failed_chunks"] > 0:
536
+ sparse_shards.append((shard_name, shard_info, has_gaps))
537
+
538
+ if sparse_shards:
539
+ console.print(f"\n[yellow]Found {len(sparse_shards)} sparse/incomplete shards:[/yellow]")
540
+ for shard_name, shard_info, has_gaps in sparse_shards[:5]:
541
+ status = []
542
+ if shard_info["pending_chunks"] > 0:
543
+ status.append(f"{shard_info['pending_chunks']} pending")
544
+ if shard_info["assigned_chunks"] > 0:
545
+ status.append(f"{shard_info['assigned_chunks']} assigned")
546
+ if shard_info["failed_chunks"] > 0:
547
+ status.append(f"{shard_info['failed_chunks']} failed")
548
+ if has_gaps:
549
+ status.append("has gaps")
550
+
551
+ console.print(f" • {shard_name}: {', '.join(status)}")
552
+ console.print(
553
+ f" Progress: {shard_info['completed_chunks']}/{shard_info['total_chunks']} chunks"
554
+ )
555
+
556
+ if len(sparse_shards) > 5:
557
+ console.print(f" ... and {len(sparse_shards) - 5} more")
558
+
559
+ # Cross-check with storage if verbose
560
+ if storage.captions_path.exists() and verbose:
561
+ console.print("\n[bold cyan]Cross-checking with stored captions...[/bold cyan]")
562
+
563
+ try:
564
+ table = pq.read_table(storage.captions_path, columns=["chunk_id"])
565
+ stored_chunk_ids = set(c for c in table["chunk_id"].to_pylist() if c)
566
+
567
+ tracker_completed = set(c for c, s in tracker.chunks.items() if s.status == "completed")
568
+
569
+ missing_in_storage = tracker_completed - stored_chunk_ids
570
+ missing_in_tracker = stored_chunk_ids - set(tracker.chunks.keys())
571
+
572
+ if missing_in_storage:
573
+ console.print(
574
+ f"\n[red]Chunks marked complete but missing from storage:[/red] {len(missing_in_storage)}"
575
+ )
576
+ for chunk_id in list(missing_in_storage)[:5]:
577
+ console.print(f" • {chunk_id}")
578
+
579
+ if fix:
580
+ console.print("[yellow]Resetting these chunks to pending...[/yellow]")
581
+ for chunk_id in missing_in_storage:
582
+ tracker.mark_failed(chunk_id)
583
+ console.print(f"[green]✓ Reset {len(missing_in_storage)} chunks[/green]")
584
+
585
+ if missing_in_tracker:
586
+ console.print(
587
+ f"\n[yellow]Chunks in storage but not tracked:[/yellow] {len(missing_in_tracker)}"
588
+ )
589
+
590
+ except Exception as e:
591
+ console.print(f"[red]Error reading storage: {e}[/red]")
592
+
593
+ # Summary
594
+ console.print("\n[bold cyan]Summary:[/bold cyan]")
595
+
596
+ total_issues = len(abandoned_chunks) + len(sparse_shards)
597
+ if total_issues == 0:
598
+ console.print("[green]✓ No issues found![/green]")
599
+ else:
600
+ console.print(f"[yellow]Found {total_issues} total issues[/yellow]")
601
+
602
+ if not fix:
603
+ console.print(
604
+ "\n[cyan]Run with --fix flag to automatically reset abandoned chunks[/cyan]"
605
+ )
606
+ else:
607
+ console.print(
608
+ "\n[green]✓ Issues have been fixed. Restart orchestrator to reprocess.[/green]"
609
+ )
610
+
611
+ if fix:
612
+ tracker.save_checkpoint()
613
+
614
+
615
+ @main.command()
616
+ @click.option("--domain", help="Domain for Let's Encrypt certificate")
617
+ @click.option("--email", help="Email for Let's Encrypt registration")
618
+ @click.option("--self-signed", is_flag=True, help="Generate self-signed certificate")
619
+ @click.option("--output-dir", default="./certs", help="Output directory for certificates")
620
+ @click.option("--staging", is_flag=True, help="Use Let's Encrypt staging server (for testing)")
621
+ def generate_cert(
622
+ domain: Optional[str], email: Optional[str], self_signed: bool, output_dir: str, staging: bool
623
+ ):
624
+ """Generate SSL certificates."""
625
+ cert_manager = CertificateManager()
626
+
627
+ if self_signed:
628
+ console.print("[yellow]Generating self-signed certificate...[/yellow]")
629
+ cert_domain = domain or "localhost"
630
+ cert_path, key_path = cert_manager.generate_self_signed(Path(output_dir), cert_domain)
631
+ console.print(f"[green]✓[/green] Certificate: {cert_path}")
632
+ console.print(f"[green]✓[/green] Key: {key_path}")
633
+ console.print(f"\n[cyan]Use these paths in your config or CLI:[/cyan]")
634
+ console.print(f" --cert {cert_path}")
635
+ console.print(f" --key {key_path}")
636
+ elif domain and email:
637
+ mode = "staging" if staging else "production"
638
+ console.print(
639
+ f"[yellow]Requesting Let's Encrypt {mode} certificate for {domain}...[/yellow]"
640
+ )
641
+
642
+ le_output = Path(output_dir) if output_dir != "./certs" else None
643
+
644
+ try:
645
+ cert_path, key_path = cert_manager.generate_letsencrypt(
646
+ domain, email, output_dir=le_output, staging=staging
647
+ )
648
+ console.print(f"[green]✓[/green] Certificate: {cert_path}")
649
+ console.print(f"[green]✓[/green] Key: {key_path}")
650
+ console.print(f"\n[cyan]Use these paths in your config or CLI:[/cyan]")
651
+ console.print(f" --cert {cert_path}")
652
+ console.print(f" --key {key_path}")
653
+
654
+ if staging:
655
+ console.print(
656
+ "\n[yellow]⚠ This is a staging certificate (not trusted by browsers)[/yellow]"
657
+ )
658
+ console.print(
659
+ "[yellow] Remove --staging flag for production certificates[/yellow]"
660
+ )
661
+ except RuntimeError as e:
662
+ console.print(f"[red]Error: {e}[/red]")
663
+ console.print("\n[yellow]Troubleshooting:[/yellow]")
664
+ console.print(" • Ensure port 80 is accessible for Let's Encrypt validation")
665
+ console.print(" • Check that the domain points to this server")
666
+ console.print(" • Try --staging flag for testing")
667
+ sys.exit(1)
668
+ else:
669
+ console.print("[red]Error: Specify either --self-signed or --domain with --email[/red]")
670
+ sys.exit(1)
671
+
672
+
673
+ @main.command()
674
+ @click.argument("cert_path", type=click.Path(exists=True))
675
+ def inspect_cert(cert_path: str):
676
+ """Inspect an SSL certificate."""
677
+ cert_manager = CertificateManager()
678
+
679
+ try:
680
+ info = cert_manager.get_cert_info(Path(cert_path))
681
+
682
+ console.print("\n[bold cyan]Certificate Information[/bold cyan]")
683
+ console.print(f"[green]Subject:[/green] {info['subject']}")
684
+ console.print(f"[green]Issuer:[/green] {info['issuer']}")
685
+ console.print(f"[green]Valid From:[/green] {info['not_before']}")
686
+ console.print(f"[green]Valid Until:[/green] {info['not_after']}")
687
+ console.print(f"[green]Serial Number:[/green] {info['serial_number']}")
688
+
689
+ if info["is_self_signed"]:
690
+ console.print("[yellow]⚠ This is a self-signed certificate[/yellow]")
691
+
692
+ from datetime import datetime
693
+
694
+ if info["not_after"] < datetime.utcnow():
695
+ console.print("[red]✗ Certificate has expired![/red]")
696
+ elif (info["not_after"] - datetime.utcnow()).days < 30:
697
+ days_left = (info["not_after"] - datetime.utcnow()).days
698
+ console.print(f"[yellow]⚠ Certificate expires in {days_left} days[/yellow]")
699
+ else:
700
+ days_left = (info["not_after"] - datetime.utcnow()).days
701
+ console.print(f"[green]✓ Certificate valid for {days_left} more days[/green]")
702
+
703
+ except Exception as e:
704
+ console.print(f"[red]Error reading certificate: {e}[/red]")
705
+ sys.exit(1)
706
+
707
+
708
+ if __name__ == "__main__":
709
+ main()