ado-git-repo-insights 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ """ado-git-repo-insights: Azure DevOps PR metrics extraction and CSV generation."""
2
+
3
+ __version__ = "0.0.0" # Managed by setuptools_scm
@@ -0,0 +1,354 @@
1
+ """CLI entry point for ado-git-repo-insights."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import logging
7
+ import sys
8
+ import time
9
+ from datetime import date
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING
12
+
13
+ from .config import ConfigurationError, load_config
14
+ from .extractor.ado_client import ADOClient, ExtractionError
15
+ from .extractor.pr_extractor import PRExtractor
16
+ from .persistence.database import DatabaseError, DatabaseManager
17
+ from .transform.csv_generator import CSVGenerationError, CSVGenerator
18
+ from .utils.logging_config import LoggingConfig, setup_logging
19
+ from .utils.run_summary import (
20
+ RunCounts,
21
+ RunSummary,
22
+ RunTimings,
23
+ create_minimal_summary,
24
+ get_git_sha,
25
+ get_tool_version,
26
+ )
27
+
28
+ if TYPE_CHECKING:
29
+ from argparse import Namespace
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ def create_parser() -> argparse.ArgumentParser: # pragma: no cover
35
+ """Create the argument parser for the CLI."""
36
+ parser = argparse.ArgumentParser(
37
+ prog="ado-insights",
38
+ description="Extract Azure DevOps PR metrics and generate PowerBI-compatible CSVs.",
39
+ )
40
+
41
+ # Global options
42
+ parser.add_argument(
43
+ "--log-format",
44
+ type=str,
45
+ choices=["console", "jsonl"],
46
+ default="console",
47
+ help="Log format: console (human-readable) or jsonl (structured)",
48
+ )
49
+ parser.add_argument(
50
+ "--artifacts-dir",
51
+ type=Path,
52
+ default=Path("run_artifacts"),
53
+ help="Directory for run artifacts (summary, logs)",
54
+ )
55
+
56
+ subparsers = parser.add_subparsers(dest="command", required=True)
57
+
58
+ # Extract command
59
+ extract_parser = subparsers.add_parser(
60
+ "extract",
61
+ help="Extract PR data from Azure DevOps",
62
+ )
63
+ extract_parser.add_argument(
64
+ "--organization",
65
+ type=str,
66
+ help="Azure DevOps organization name",
67
+ )
68
+ extract_parser.add_argument(
69
+ "--projects",
70
+ type=str,
71
+ help="Comma-separated list of project names",
72
+ )
73
+ extract_parser.add_argument(
74
+ "--pat",
75
+ type=str,
76
+ required=True,
77
+ help="Personal Access Token with Code (Read) scope",
78
+ )
79
+ extract_parser.add_argument(
80
+ "--config",
81
+ type=Path,
82
+ help="Path to config.yaml file",
83
+ )
84
+ extract_parser.add_argument(
85
+ "--database",
86
+ type=Path,
87
+ default=Path("ado-insights.sqlite"),
88
+ help="Path to SQLite database file",
89
+ )
90
+ extract_parser.add_argument(
91
+ "--start-date",
92
+ type=str,
93
+ help="Override start date (YYYY-MM-DD)",
94
+ )
95
+ extract_parser.add_argument(
96
+ "--end-date",
97
+ type=str,
98
+ help="Override end date (YYYY-MM-DD)",
99
+ )
100
+ extract_parser.add_argument(
101
+ "--backfill-days",
102
+ type=int,
103
+ help="Number of days to backfill for convergence",
104
+ )
105
+
106
+ # Generate CSV command
107
+ csv_parser = subparsers.add_parser(
108
+ "generate-csv",
109
+ help="Generate CSV files from SQLite database",
110
+ )
111
+ csv_parser.add_argument(
112
+ "--database",
113
+ type=Path,
114
+ required=True,
115
+ help="Path to SQLite database file",
116
+ )
117
+ csv_parser.add_argument(
118
+ "--output",
119
+ type=Path,
120
+ default=Path("csv_output"),
121
+ help="Output directory for CSV files",
122
+ )
123
+
124
+ return parser
125
+
126
+
127
+ def cmd_extract(args: Namespace) -> int:
128
+ """Execute the extract command."""
129
+ start_time = time.perf_counter()
130
+ timing = RunTimings()
131
+ counts = RunCounts()
132
+ warnings_list: list[str] = []
133
+ per_project_status: dict[str, str] = {}
134
+ first_fatal_error: str | None = None
135
+
136
+ try:
137
+ # Load and validate configuration
138
+ config = load_config(
139
+ config_path=args.config,
140
+ organization=args.organization,
141
+ projects=args.projects,
142
+ pat=args.pat,
143
+ database=args.database,
144
+ start_date=args.start_date,
145
+ end_date=args.end_date,
146
+ backfill_days=args.backfill_days,
147
+ )
148
+ config.log_summary()
149
+
150
+ # Connect to database
151
+ extract_start = time.perf_counter()
152
+ db = DatabaseManager(config.database)
153
+ db.connect()
154
+
155
+ try:
156
+ # Create ADO client
157
+ client = ADOClient(
158
+ organization=config.organization,
159
+ pat=config.pat, # Invariant 19: PAT handled securely
160
+ config=config.api,
161
+ )
162
+
163
+ # Test connection
164
+ client.test_connection(config.projects[0])
165
+
166
+ # Run extraction
167
+ extractor = PRExtractor(client, db, config)
168
+ summary = extractor.extract_all(backfill_days=args.backfill_days)
169
+
170
+ # Collect timing
171
+ timing.extract_seconds = time.perf_counter() - extract_start
172
+
173
+ # Collect counts and warnings
174
+ counts.prs_fetched = summary.total_prs
175
+ if hasattr(summary, "warnings"):
176
+ warnings_list.extend(summary.warnings)
177
+
178
+ # Collect per-project status
179
+ for project_result in summary.projects:
180
+ status = "success" if project_result.success else "failed"
181
+ per_project_status[project_result.project] = status
182
+
183
+ # Capture first fatal error
184
+ if not project_result.success and first_fatal_error is None:
185
+ first_fatal_error = (
186
+ project_result.error
187
+ or f"Extraction failed for project: {project_result.project}"
188
+ )
189
+
190
+ # Fail-fast: any project failure = exit 1
191
+ if not summary.success:
192
+ logger.error("Extraction failed")
193
+ timing.total_seconds = time.perf_counter() - start_time
194
+
195
+ # Write failure summary
196
+ run_summary = RunSummary(
197
+ tool_version=get_tool_version(),
198
+ git_sha=get_git_sha(),
199
+ organization=config.organization,
200
+ projects=config.projects,
201
+ date_range_start=str(config.date_range.start or date.today()),
202
+ date_range_end=str(config.date_range.end or date.today()),
203
+ counts=counts,
204
+ timings=timing,
205
+ warnings=warnings_list,
206
+ final_status="failed",
207
+ per_project_status=per_project_status,
208
+ first_fatal_error=first_fatal_error,
209
+ )
210
+ run_summary.write(args.artifacts_dir / "run_summary.json")
211
+ run_summary.print_final_line()
212
+ run_summary.emit_ado_commands()
213
+ return 1
214
+
215
+ logger.info(f"Extraction complete: {summary.total_prs} PRs")
216
+ timing.total_seconds = time.perf_counter() - start_time
217
+
218
+ # Write success summary
219
+ run_summary = RunSummary(
220
+ tool_version=get_tool_version(),
221
+ git_sha=get_git_sha(),
222
+ organization=config.organization,
223
+ projects=config.projects,
224
+ date_range_start=str(config.date_range.start or date.today()),
225
+ date_range_end=str(config.date_range.end or date.today()),
226
+ counts=counts,
227
+ timings=timing,
228
+ warnings=warnings_list,
229
+ final_status="success",
230
+ per_project_status=per_project_status,
231
+ first_fatal_error=None,
232
+ )
233
+ run_summary.write(args.artifacts_dir / "run_summary.json")
234
+ run_summary.print_final_line()
235
+ run_summary.emit_ado_commands()
236
+ return 0
237
+
238
+ finally:
239
+ db.close()
240
+
241
+ except ConfigurationError as e:
242
+ logger.error(f"Configuration error: {e}")
243
+ # P2 Fix: Write minimal summary for caught errors
244
+ minimal_summary = create_minimal_summary(
245
+ f"Configuration error: {e}", args.artifacts_dir
246
+ )
247
+ minimal_summary.write(args.artifacts_dir / "run_summary.json")
248
+ return 1
249
+ except DatabaseError as e:
250
+ logger.error(f"Database error: {e}")
251
+ # P2 Fix: Write minimal summary for caught errors
252
+ minimal_summary = create_minimal_summary(
253
+ f"Database error: {e}", args.artifacts_dir
254
+ )
255
+ minimal_summary.write(args.artifacts_dir / "run_summary.json")
256
+ return 1
257
+ except ExtractionError as e:
258
+ logger.error(f"Extraction error: {e}")
259
+ # P2 Fix: Write minimal summary for caught errors
260
+ minimal_summary = create_minimal_summary(
261
+ f"Extraction error: {e}", args.artifacts_dir
262
+ )
263
+ minimal_summary.write(args.artifacts_dir / "run_summary.json")
264
+ return 1
265
+
266
+
267
+ def cmd_generate_csv(args: Namespace) -> int:
268
+ """Execute the generate-csv command."""
269
+ logger.info("Generating CSV files...")
270
+ logger.info(f"Database: {args.database}")
271
+ logger.info(f"Output: {args.output}")
272
+
273
+ if not args.database.exists():
274
+ logger.error(f"Database not found: {args.database}")
275
+ return 1
276
+
277
+ try:
278
+ db = DatabaseManager(args.database)
279
+ db.connect()
280
+
281
+ try:
282
+ generator = CSVGenerator(db, args.output)
283
+ results = generator.generate_all()
284
+
285
+ # Validate schemas (Invariant 1)
286
+ generator.validate_schemas()
287
+
288
+ logger.info("CSV generation complete:")
289
+ for table, count in results.items():
290
+ logger.info(f" {table}: {count} rows")
291
+
292
+ return 0
293
+
294
+ finally:
295
+ db.close()
296
+
297
+ except DatabaseError as e:
298
+ logger.error(f"Database error: {e}")
299
+ return 1
300
+ except CSVGenerationError as e:
301
+ logger.error(f"CSV generation error: {e}")
302
+ return 1
303
+
304
+
305
+ def main() -> int:
306
+ """Main entry point for the CLI."""
307
+ parser = create_parser()
308
+ args = parser.parse_args()
309
+
310
+ # Setup logging early
311
+ log_config = LoggingConfig(
312
+ format=getattr(args, "log_format", "console"),
313
+ artifacts_dir=getattr(args, "artifacts_dir", Path("run_artifacts")),
314
+ )
315
+ setup_logging(log_config)
316
+
317
+ # Ensure artifacts directory exists
318
+ artifacts_dir = getattr(args, "artifacts_dir", Path("run_artifacts"))
319
+ artifacts_dir.mkdir(parents=True, exist_ok=True)
320
+
321
+ summary_path = artifacts_dir / "run_summary.json"
322
+
323
+ try:
324
+ if args.command == "extract":
325
+ return cmd_extract(args)
326
+ elif args.command == "generate-csv":
327
+ return cmd_generate_csv(args)
328
+ else:
329
+ parser.print_help()
330
+ return 1
331
+ except KeyboardInterrupt:
332
+ logger.info("Operation cancelled by user")
333
+
334
+ # Write minimal failure summary if success summary doesn't exist
335
+ if not summary_path.exists():
336
+ minimal_summary = create_minimal_summary(
337
+ "Operation cancelled by user", artifacts_dir
338
+ )
339
+ minimal_summary.write(summary_path)
340
+
341
+ return 130
342
+ except Exception as e:
343
+ logger.exception(f"Unexpected error: {e}")
344
+
345
+ # Write minimal failure summary if success summary doesn't exist
346
+ if not summary_path.exists():
347
+ minimal_summary = create_minimal_summary(str(e), artifacts_dir)
348
+ minimal_summary.write(summary_path)
349
+
350
+ return 1
351
+
352
+
353
+ if __name__ == "__main__":
354
+ sys.exit(main())
@@ -0,0 +1,186 @@
1
+ """Configuration loader for ado-git-repo-insights.
2
+
3
+ Loads and validates configuration from YAML files or CLI arguments.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ import os
10
+ from dataclasses import dataclass, field
11
+ from datetime import date
12
+ from pathlib import Path
13
+ from typing import Any
14
+
15
+ import yaml
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class ConfigurationError(Exception):
21
+ """Configuration validation error."""
22
+
23
+
24
+ @dataclass
25
+ class APIConfig:
26
+ """API configuration settings."""
27
+
28
+ base_url: str = "https://dev.azure.com"
29
+ version: str = "7.1-preview.1"
30
+ rate_limit_sleep_seconds: float = 0.5
31
+ max_retries: int = 3
32
+ retry_delay_seconds: float = 5.0
33
+ retry_backoff_multiplier: float = 2.0
34
+
35
+
36
+ @dataclass
37
+ class BackfillConfig:
38
+ """Backfill configuration settings (Adjustment 1)."""
39
+
40
+ enabled: bool = True
41
+ window_days: int = 60 # Default: 60 days (configurable 30-90)
42
+
43
+
44
+ @dataclass
45
+ class DateRangeConfig:
46
+ """Optional date range override."""
47
+
48
+ start: date | None = None
49
+ end: date | None = None
50
+
51
+
52
+ @dataclass
53
+ class Config:
54
+ """Main configuration for ado-git-repo-insights."""
55
+
56
+ organization: str
57
+ projects: list[str]
58
+ pat: str # Will be masked in logs
59
+ database: Path = field(default_factory=lambda: Path("ado-insights.sqlite"))
60
+ api: APIConfig = field(default_factory=APIConfig)
61
+ backfill: BackfillConfig = field(default_factory=BackfillConfig)
62
+ date_range: DateRangeConfig = field(default_factory=DateRangeConfig)
63
+
64
+ def __post_init__(self) -> None:
65
+ """Validate configuration after initialization."""
66
+ if not self.organization:
67
+ raise ConfigurationError("organization is required")
68
+ if not self.projects:
69
+ raise ConfigurationError("At least one project is required")
70
+ if not self.pat:
71
+ raise ConfigurationError("PAT is required")
72
+
73
+ def __repr__(self) -> str:
74
+ """Repr with masked PAT (Invariant 19: Never expose secrets)."""
75
+ return (
76
+ f"Config(organization={self.organization!r}, "
77
+ f"projects={self.projects!r}, "
78
+ f"pat='********', " # Masked
79
+ f"database={self.database!r}, "
80
+ f"api={self.api!r}, "
81
+ f"backfill={self.backfill!r}, "
82
+ f"date_range={self.date_range!r})"
83
+ )
84
+
85
+ def log_summary(self) -> None:
86
+ """Log configuration summary (with PAT masked)."""
87
+ logger.info(f"Organization: {self.organization}")
88
+ logger.info(f"Projects: {', '.join(self.projects)}")
89
+ logger.info(f"Database: {self.database}")
90
+ logger.info(f"PAT: {'*' * 8}...{'*' * 4}") # Invariant 19: Never log PAT
91
+ if self.date_range.start or self.date_range.end:
92
+ logger.info(f"Date range: {self.date_range.start} → {self.date_range.end}")
93
+ if self.backfill.enabled:
94
+ logger.info(f"Backfill: {self.backfill.window_days} days")
95
+
96
+
97
+ def load_config(
98
+ config_path: Path | None = None,
99
+ organization: str | None = None,
100
+ projects: str | None = None,
101
+ pat: str | None = None,
102
+ database: Path | None = None,
103
+ start_date: str | None = None,
104
+ end_date: str | None = None,
105
+ backfill_days: int | None = None,
106
+ ) -> Config:
107
+ """Load configuration from file and/or CLI arguments.
108
+
109
+ CLI arguments override file values.
110
+
111
+ Args:
112
+ config_path: Path to config.yaml file.
113
+ organization: Organization name (CLI override).
114
+ projects: Comma-separated project names (CLI override).
115
+ pat: Personal Access Token (CLI override).
116
+ database: Database path (CLI override).
117
+ start_date: Start date YYYY-MM-DD (CLI override).
118
+ end_date: End date YYYY-MM-DD (CLI override).
119
+ backfill_days: Backfill window in days (CLI override).
120
+
121
+ Returns:
122
+ Validated Config instance.
123
+
124
+ Raises:
125
+ ConfigurationError: If configuration is invalid.
126
+ """
127
+ # Start with defaults
128
+ config_data: dict[str, Any] = {}
129
+
130
+ # Load from file if provided
131
+ if config_path and config_path.exists():
132
+ logger.info(f"Loading configuration from {config_path}")
133
+ with config_path.open() as f:
134
+ config_data = yaml.safe_load(f) or {}
135
+
136
+ # Apply CLI overrides
137
+ if organization:
138
+ config_data["organization"] = organization
139
+ if projects:
140
+ config_data["projects"] = [p.strip() for p in projects.split(",")]
141
+ if pat:
142
+ config_data["pat"] = pat
143
+ elif not config_data.get("pat"):
144
+ # Try environment variable
145
+ config_data["pat"] = os.environ.get("ADO_PAT", "")
146
+
147
+ # Build API config
148
+ api_data = config_data.get("api", {})
149
+ api_config = APIConfig(
150
+ base_url=api_data.get("base_url", "https://dev.azure.com"),
151
+ version=api_data.get("version", "7.1-preview.1"),
152
+ rate_limit_sleep_seconds=api_data.get("rate_limit_sleep_seconds", 0.5),
153
+ max_retries=api_data.get("max_retries", 3),
154
+ retry_delay_seconds=api_data.get("retry_delay_seconds", 5.0),
155
+ retry_backoff_multiplier=api_data.get("retry_backoff_multiplier", 2.0),
156
+ )
157
+
158
+ # Build backfill config
159
+ backfill_data = config_data.get("backfill", {})
160
+ backfill_config = BackfillConfig(
161
+ enabled=backfill_data.get("enabled", True),
162
+ window_days=backfill_days or backfill_data.get("window_days", 60),
163
+ )
164
+
165
+ # Build date range config
166
+ date_range = DateRangeConfig()
167
+ if start_date:
168
+ date_range.start = date.fromisoformat(start_date)
169
+ elif config_data.get("date_range", {}).get("start"):
170
+ date_range.start = date.fromisoformat(config_data["date_range"]["start"])
171
+
172
+ if end_date:
173
+ date_range.end = date.fromisoformat(end_date)
174
+ elif config_data.get("date_range", {}).get("end"):
175
+ date_range.end = date.fromisoformat(config_data["date_range"]["end"])
176
+
177
+ # Build main config
178
+ return Config(
179
+ organization=config_data.get("organization", ""),
180
+ projects=config_data.get("projects", []),
181
+ pat=config_data.get("pat", ""),
182
+ database=database or Path(config_data.get("database", "ado-insights.sqlite")),
183
+ api=api_config,
184
+ backfill=backfill_config,
185
+ date_range=date_range,
186
+ )
@@ -0,0 +1 @@
1
+ """Extractor module for Azure DevOps API interactions."""