ado-git-repo-insights 1.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,101 @@
1
+ """Datetime utilities for ado-git-repo-insights.
2
+
3
+ Ported from the original generate_raw_data.py to ensure identical behavior.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ from datetime import datetime
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def parse_iso_datetime(date_str: str | None) -> datetime | None:
15
+ """Parse ISO 8601 datetime strings from ADO API.
16
+
17
+ Handles 7-digit microseconds and 'Z' suffix quirks from ADO API responses.
18
+ Preserved from original implementation for compatibility.
19
+
20
+ Args:
21
+ date_str: ISO 8601 datetime string, or None.
22
+
23
+ Returns:
24
+ Parsed datetime, or None if parsing fails or input is None.
25
+
26
+ Examples:
27
+ >>> parse_iso_datetime("2024-01-15T10:30:45.1234567Z")
28
+ datetime.datetime(2024, 1, 15, 10, 30, 45, 123456)
29
+ >>> parse_iso_datetime(None)
30
+ None
31
+ """
32
+ if not date_str:
33
+ return None
34
+
35
+ try:
36
+ # Remove trailing 'Z' (Zulu/UTC indicator)
37
+ date_str = date_str.rstrip("Z")
38
+
39
+ if "." in date_str:
40
+ # ADO API sometimes returns 7-digit microseconds, Python only supports 6
41
+ date_part, microseconds = date_str.split(".")
42
+ microseconds = microseconds[:6] # Truncate to 6 digits
43
+ date_str = f"{date_part}.{microseconds}"
44
+ return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S.%f")
45
+ else:
46
+ # No microseconds
47
+ return datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%S")
48
+
49
+ except ValueError as e:
50
+ logger.warning(f"Failed to parse date '{date_str}': {e}")
51
+ return None
52
+
53
+
54
+ def calculate_cycle_time_minutes(
55
+ creation_date: str | None, closed_date: str | None
56
+ ) -> float | None:
57
+ """Calculate PR cycle time in minutes.
58
+
59
+ Cycle time is the duration from PR creation to closure.
60
+ Minimum value is 1 minute to avoid zero/negative values.
61
+
62
+ Args:
63
+ creation_date: ISO 8601 creation date string.
64
+ closed_date: ISO 8601 closed date string.
65
+
66
+ Returns:
67
+ Cycle time in minutes (minimum 1.0), or None if dates are invalid.
68
+
69
+ Examples:
70
+ >>> calculate_cycle_time_minutes(
71
+ ... "2024-01-15T10:00:00Z",
72
+ ... "2024-01-15T10:30:00Z"
73
+ ... )
74
+ 30.0
75
+ """
76
+ created = parse_iso_datetime(creation_date)
77
+ closed = parse_iso_datetime(closed_date)
78
+
79
+ if created and closed:
80
+ delta_seconds = (closed - created).total_seconds()
81
+ minutes = delta_seconds / 60
82
+ # Minimum 1 minute, rounded to 2 decimal places
83
+ return max(1.0, round(minutes, 2))
84
+
85
+ return None
86
+
87
+
88
+ def format_date_for_api(dt: datetime) -> str:
89
+ """Format a datetime for ADO API queries.
90
+
91
+ Args:
92
+ dt: Datetime to format.
93
+
94
+ Returns:
95
+ ISO 8601 formatted string with 'Z' suffix.
96
+
97
+ Examples:
98
+ >>> format_date_for_api(datetime(2024, 1, 15, 10, 30, 0))
99
+ '2024-01-15T10:30:00Z'
100
+ """
101
+ return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
@@ -0,0 +1,172 @@
1
+ """Logging configuration with selective secret redaction.
2
+
3
+ Provides console and JSONL logging formats with precise redaction rules.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import json
9
+ import logging
10
+ import os
11
+ import re
12
+ from dataclasses import dataclass, field
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+
17
+ @dataclass
18
+ class RedactionConfig:
19
+ """Configuration for selective secret redaction."""
20
+
21
+ # Known secret value patterns (regex)
22
+ value_patterns: list[str] = field(
23
+ default_factory=lambda: [
24
+ r"[A-Za-z0-9]{52}", # Azure DevOps PAT format (52 chars)
25
+ r"Bearer\s+[A-Za-z0-9\-._~+/]+=*", # Bearer tokens
26
+ ]
27
+ )
28
+
29
+ # Explicit key deny-list (exact matches, case-insensitive)
30
+ key_denylist: set[str] = field(
31
+ default_factory=lambda: {
32
+ "pat",
33
+ "personal_access_token",
34
+ "auth_header",
35
+ "authorization",
36
+ "webhook_url",
37
+ "secret",
38
+ "password",
39
+ }
40
+ )
41
+
42
+ def should_redact_key(self, key: str) -> bool:
43
+ """Check if a key should be redacted based on deny-list."""
44
+ return key.lower() in self.key_denylist
45
+
46
+ def redact_value(self, value: str) -> str:
47
+ """Redact known secret patterns in a value."""
48
+ result = value
49
+ for pattern in self.value_patterns:
50
+ result = re.sub(pattern, "***REDACTED***", result)
51
+ return result
52
+
53
+
54
+ class RedactingFormatter(logging.Formatter):
55
+ """Formatter that redacts sensitive information."""
56
+
57
+ def __init__(self, fmt: str | None = None, datefmt: str | None = None) -> None:
58
+ super().__init__(fmt, datefmt)
59
+ self.redaction_config = RedactionConfig()
60
+
61
+ def format(self, record: logging.LogRecord) -> str:
62
+ # Redact message
63
+ if isinstance(record.msg, str):
64
+ record.msg = self.redaction_config.redact_value(record.msg)
65
+
66
+ # Redact args
67
+ if record.args:
68
+ record.args = tuple(
69
+ self.redaction_config.redact_value(str(arg))
70
+ if isinstance(arg, str)
71
+ else arg
72
+ for arg in record.args
73
+ )
74
+
75
+ return super().format(record)
76
+
77
+
78
+ class JsonlHandler(logging.Handler):
79
+ """Handler that writes structured JSONL log entries with redaction."""
80
+
81
+ def __init__(self, log_file: Path) -> None:
82
+ super().__init__()
83
+ self.log_file = log_file
84
+ self.redaction_config = RedactionConfig()
85
+
86
+ # Set a basic formatter for timestamp formatting
87
+ self.setFormatter(logging.Formatter())
88
+
89
+ # Ensure parent directory exists
90
+ self.log_file.parent.mkdir(parents=True, exist_ok=True)
91
+
92
+ def emit(self, record: logging.LogRecord) -> None:
93
+ try:
94
+ # P1 Fix: Redact the message before writing to JSONL
95
+ message = record.getMessage()
96
+ redacted_message = self.redaction_config.redact_value(message)
97
+
98
+ log_entry: dict[str, Any] = {
99
+ "timestamp": self.formatter.formatTime(record)
100
+ if self.formatter
101
+ else "",
102
+ "level": record.levelname,
103
+ "logger": record.name,
104
+ "message": redacted_message,
105
+ }
106
+
107
+ # Add extra fields if present (context dict)
108
+ if hasattr(record, "extra") and isinstance(record.extra, dict):
109
+ log_entry["context"] = self._redact_dict(record.extra)
110
+
111
+ with self.log_file.open("a", encoding="utf-8") as f:
112
+ f.write(json.dumps(log_entry) + "\n")
113
+
114
+ except Exception:
115
+ self.handleError(record)
116
+
117
+ def _redact_dict(self, data: dict[str, Any]) -> dict[str, Any]:
118
+ """Recursively redact sensitive keys/values in a dictionary."""
119
+ result: dict[str, Any] = {}
120
+ for key, value in data.items():
121
+ if self.redaction_config.should_redact_key(key):
122
+ result[key] = "***REDACTED***"
123
+ elif isinstance(value, str):
124
+ result[key] = self.redaction_config.redact_value(value)
125
+ elif isinstance(value, dict):
126
+ result[key] = self._redact_dict(value) # Recursive call
127
+ else:
128
+ result[key] = value
129
+ return result
130
+
131
+
132
+ @dataclass
133
+ class LoggingConfig:
134
+ """Configuration for logging setup."""
135
+
136
+ format: str = "console" # "console" or "jsonl"
137
+ artifacts_dir: Path = field(default_factory=lambda: Path("run_artifacts"))
138
+ log_file: Path | None = None
139
+
140
+
141
+ def setup_logging(config: LoggingConfig) -> None:
142
+ """Configure logging based on format selection.
143
+
144
+ Args:
145
+ config: Logging configuration.
146
+ """
147
+ # Get root logger
148
+ root_logger = logging.getLogger()
149
+ root_logger.setLevel(logging.INFO)
150
+
151
+ # Remove existing handlers
152
+ root_logger.handlers.clear()
153
+
154
+ if config.format == "console":
155
+ # Console handler with redaction
156
+ handler = logging.StreamHandler()
157
+ formatter = RedactingFormatter(
158
+ "%(asctime)s - %(levelname)s - %(message)s",
159
+ )
160
+ handler.setFormatter(formatter)
161
+ root_logger.addHandler(handler)
162
+
163
+ elif config.format == "jsonl":
164
+ # JSONL file handler with redaction
165
+ if config.log_file is None:
166
+ config.log_file = config.artifacts_dir / f"run_{os.getpid()}.log.jsonl"
167
+
168
+ jsonl_handler: logging.Handler = JsonlHandler(config.log_file)
169
+ root_logger.addHandler(jsonl_handler)
170
+
171
+ else:
172
+ raise ValueError(f"Invalid log format: {config.format}")
@@ -0,0 +1,206 @@
1
+ """Run summary tracking with enriched error diagnostics.
2
+
3
+ Captures comprehensive run telemetry including per-project status and first fatal error.
4
+ """
5
+ # ruff: noqa: S603, S607
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import os
11
+ import re
12
+ import subprocess
13
+ from dataclasses import dataclass, field
14
+ from datetime import date
15
+ from pathlib import Path
16
+ from typing import Any, Literal
17
+
18
+
19
+ def normalize_error_message(error: str, max_length: int = 500) -> str:
20
+ """Normalize and bound error messages to prevent secret leakage.
21
+
22
+ Args:
23
+ error: Raw error message.
24
+ max_length: Maximum length for bounded message.
25
+
26
+ Returns:
27
+ Normalized error message.
28
+ """
29
+ # Strip URLs with query strings (can contain secrets)
30
+ error = re.sub(r"https?://[^\s]+\?[^\s]+", "[URL_WITH_PARAMS]", error)
31
+
32
+ # Strip full URLs (can contain hostnames/paths)
33
+ error = re.sub(r"https?://[^\s]+", "[URL]", error)
34
+
35
+ # Truncate to max length
36
+ if len(error) > max_length:
37
+ error = error[:max_length] + "...[truncated]"
38
+
39
+ return error
40
+
41
+
42
+ @dataclass
43
+ class RunCounts:
44
+ """Counts of extracted/generated items."""
45
+
46
+ prs_fetched: int = 0
47
+ prs_updated: int = 0
48
+ rows_per_csv: dict[str, int] = field(default_factory=dict)
49
+
50
+
51
+ @dataclass
52
+ class RunTimings:
53
+ """Timing information for run phases."""
54
+
55
+ total_seconds: float = 0.0
56
+ extract_seconds: float = 0.0
57
+ persist_seconds: float = 0.0
58
+ export_seconds: float = 0.0
59
+
60
+
61
+ @dataclass
62
+ class RunSummary:
63
+ """Comprehensive run summary with forensic diagnostics."""
64
+
65
+ tool_version: str
66
+ git_sha: str | None
67
+ organization: str
68
+ projects: list[str]
69
+ date_range_start: str # ISO format date
70
+ date_range_end: str # ISO format date
71
+ counts: RunCounts
72
+ timings: RunTimings
73
+ warnings: list[str]
74
+ final_status: Literal["success", "failed"]
75
+ per_project_status: dict[str, str] = field(default_factory=dict)
76
+ first_fatal_error: str | None = None
77
+
78
+ def __post_init__(self) -> None:
79
+ """Normalize error message on initialization."""
80
+ if self.first_fatal_error:
81
+ self.first_fatal_error = normalize_error_message(self.first_fatal_error)
82
+
83
+ def to_dict(self) -> dict[str, Any]:
84
+ """Convert to dictionary for JSON serialization."""
85
+ return {
86
+ "tool_version": self.tool_version,
87
+ "git_sha": self.git_sha,
88
+ "organization": self.organization,
89
+ "projects": self.projects,
90
+ "date_range": {
91
+ "start": self.date_range_start,
92
+ "end": self.date_range_end,
93
+ },
94
+ "counts": {
95
+ "prs_fetched": self.counts.prs_fetched,
96
+ "prs_updated": self.counts.prs_updated,
97
+ "rows_per_csv": self.counts.rows_per_csv,
98
+ },
99
+ "timings": {
100
+ "total_seconds": self.timings.total_seconds,
101
+ "extract_seconds": self.timings.extract_seconds,
102
+ "persist_seconds": self.timings.persist_seconds,
103
+ "export_seconds": self.timings.export_seconds,
104
+ },
105
+ "warnings": self.warnings,
106
+ "final_status": self.final_status,
107
+ "per_project_status": self.per_project_status,
108
+ "first_fatal_error": self.first_fatal_error,
109
+ }
110
+
111
+ def write(self, path: Path) -> None:
112
+ """Write summary to JSON file.
113
+
114
+ Args:
115
+ path: Path to write summary file.
116
+ """
117
+ path.parent.mkdir(parents=True, exist_ok=True)
118
+ with path.open("w", encoding="utf-8") as f:
119
+ json.dump(self.to_dict(), f, indent=2)
120
+
121
+ def print_final_line(self) -> None:
122
+ """Print one-liner summary to stdout."""
123
+ status_symbol = "✓" if self.final_status == "success" else "✗"
124
+ print(
125
+ f"{status_symbol} {self.final_status.upper()}: "
126
+ f"{self.counts.prs_fetched} PRs extracted, "
127
+ f"{len(self.counts.rows_per_csv)} CSVs written "
128
+ f"({self.timings.total_seconds:.1f}s)"
129
+ )
130
+
131
+ def emit_ado_commands(self) -> None:
132
+ """Emit Azure Pipelines logging commands."""
133
+ # Only emit if running in Azure Pipelines
134
+ if os.environ.get("TF_BUILD") != "true":
135
+ return
136
+
137
+ if self.final_status == "failed":
138
+ if self.first_fatal_error:
139
+ print(f"##vso[task.logissue type=error]{self.first_fatal_error}")
140
+ print("##vso[task.complete result=Failed]")
141
+ elif self.warnings:
142
+ for warning in self.warnings:
143
+ print(f"##vso[task.logissue type=warning]{warning}")
144
+
145
+
146
+ def get_tool_version() -> str:
147
+ """Get tool version from VERSION file."""
148
+ version_file = Path(__file__).parent.parent.parent.parent / "VERSION"
149
+ if version_file.exists():
150
+ return version_file.read_text().strip()
151
+ return "unknown"
152
+
153
+
154
+ def get_git_sha() -> str | None:
155
+ """Get Git SHA from VERSION file or git command.
156
+
157
+ Returns:
158
+ Git SHA or None if unavailable.
159
+ """
160
+ # Try VERSION file first
161
+ version_file = Path(__file__).parent.parent.parent.parent / "VERSION"
162
+ if version_file.exists():
163
+ version = version_file.read_text().strip()
164
+ if "+" in version: # Version format like "1.0.7+8d88fb4"
165
+ return version.split("+")[1]
166
+
167
+ # Fallback to git command
168
+ try:
169
+ result = subprocess.run( # noqa: S603, S607
170
+ ["git", "rev-parse", "--short", "HEAD"],
171
+ capture_output=True,
172
+ text=True,
173
+ check=True,
174
+ timeout=5,
175
+ )
176
+ return result.stdout.strip()
177
+ except Exception:
178
+ return None
179
+
180
+
181
+ def create_minimal_summary(
182
+ error_message: str,
183
+ artifacts_dir: Path = Path("run_artifacts"),
184
+ ) -> RunSummary:
185
+ """Create a partial summary for early failures.
186
+
187
+ Args:
188
+ error_message: Error message describing the failure.
189
+ artifacts_dir: Directory for artifacts.
190
+
191
+ Returns:
192
+ Minimal RunSummary with failure status.
193
+ """
194
+ return RunSummary(
195
+ tool_version=get_tool_version(),
196
+ git_sha=get_git_sha(),
197
+ organization="unknown",
198
+ projects=[],
199
+ date_range_start=str(date.today()),
200
+ date_range_end=str(date.today()),
201
+ counts=RunCounts(),
202
+ timings=RunTimings(),
203
+ warnings=[],
204
+ final_status="failed",
205
+ first_fatal_error=normalize_error_message(error_message),
206
+ )
@@ -0,0 +1,225 @@
1
+ Metadata-Version: 2.4
2
+ Name: ado-git-repo-insights
3
+ Version: 1.2.1
4
+ Summary: Extract Azure DevOps Pull Request metrics to SQLite and generate PowerBI-compatible CSVs.
5
+ Author-email: "Odd Essentials, LLC" <admin@oddessentials.com>
6
+ License: MIT
7
+ Classifier: Development Status :: 4 - Beta
8
+ Classifier: Intended Audience :: Developers
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Requires-Python: >=3.10
15
+ Description-Content-Type: text/markdown
16
+ License-File: LICENSE
17
+ Requires-Dist: requests>=2.28.0
18
+ Requires-Dist: pyyaml>=6.0
19
+ Requires-Dist: pandas>=2.0.0
20
+ Requires-Dist: azure-storage-blob>=12.0.0
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest>=7.0; extra == "dev"
23
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
24
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
25
+ Requires-Dist: mypy>=1.0; extra == "dev"
26
+ Requires-Dist: pre-commit>=3.0; extra == "dev"
27
+ Requires-Dist: types-requests>=2.28.0; extra == "dev"
28
+ Requires-Dist: types-PyYAML>=6.0; extra == "dev"
29
+ Requires-Dist: pandas-stubs>=2.0.0; extra == "dev"
30
+ Dynamic: license-file
31
+
32
+ # ado-git-repo-insights
33
+
34
+ ![CI](https://github.com/oddessentials/ado-git-repo-insights/actions/workflows/ci.yml/badge.svg)
35
+ [![codecov](https://codecov.io/gh/oddessentials/ado-git-repo-insights/graph/badge.svg)](https://codecov.io/gh/oddessentials/ado-git-repo-insights)
36
+ ![Python](https://img.shields.io/badge/python-3.10%20%7C%203.11%20%7C%203.12-blue)
37
+ ![License](https://img.shields.io/badge/license-MIT-green)
38
+
39
+ Extract Azure DevOps Pull Request metrics to SQLite and generate PowerBI-compatible CSVs.
40
+
41
+ ## Overview
42
+
43
+ This tool replaces the MongoDB-based `ado-pull-request-metrics` with a lightweight, file-based solution that:
44
+
45
+ - **Stores data in SQLite** - No external database required
46
+ - **Runs as an Azure DevOps Pipeline Task** - Scheduled daily extraction
47
+ - **Preserves the PowerBI CSV contract** - Same filenames, columns, and ordering
48
+ - **Supports incremental + backfill extraction** - Efficient daily updates with periodic convergence
49
+
50
+ ## Quick Start
51
+
52
+ ### Installation
53
+
54
+ ```bash
55
+ pip install ado-git-repo-insights
56
+ ```
57
+
58
+ ## Usage Options
59
+
60
+ This tool provides **two ways** to extract Azure DevOps Pull Request metrics:
61
+
62
+ | Aspect | CLI (Option 1) | Extension (Option 2) |
63
+ |--------|----------------|----------------------|
64
+ | **Requires Python** | Yes | No (bundled) |
65
+ | **Installation** | `pip install` | Upload VSIX to ADO |
66
+ | **Pipeline syntax** | Script steps | Task step |
67
+ | **Works outside ADO** | Yes | No (ADO only) |
68
+ | **Flexibility** | Higher | Standard |
69
+
70
+ ### Option 1: Python CLI
71
+
72
+ Best for users comfortable with Python/pip, custom scripts, and non-ADO CI/CD systems.
73
+
74
+
75
+ #### First Run (Extract Data)
76
+
77
+ ```bash
78
+ ado-insights extract \
79
+ --organization MyOrg \
80
+ --projects "ProjectOne,ProjectTwo" \
81
+ --pat $ADO_PAT \
82
+ --database ./ado-insights.sqlite
83
+ ```
84
+
85
+ > **Note**: End date defaults to yesterday (to avoid incomplete data).
86
+ > Include today: `--end-date $(date +%Y-%m-%d)` (Bash) or `--end-date (Get-Date -Format yyyy-MM-dd)` (PowerShell)
87
+
88
+ #### Generate CSVs
89
+
90
+ ```bash
91
+ ado-insights generate-csv \
92
+ --database ./ado-insights.sqlite \
93
+ --output ./csv_output
94
+ ```
95
+
96
+ #### Backfill Mode (Weekly Convergence)
97
+
98
+ ```bash
99
+ ado-insights extract \
100
+ --organization MyOrg \
101
+ --projects "ProjectOne,ProjectTwo" \
102
+ --pat $ADO_PAT \
103
+ --database ./ado-insights.sqlite \
104
+ --backfill-days 60
105
+ ```
106
+
107
+ ### Option 2: Azure DevOps Extension
108
+
109
+ Best for teams that prefer the ADO pipeline editor UI or want a self-contained task without managing Python dependencies.
110
+
111
+ ```yaml
112
+ steps:
113
+ - task: ExtractPullRequests@1
114
+ inputs:
115
+ organization: 'MyOrg'
116
+ projects: 'Project1,Project2'
117
+ pat: '$(PAT_SECRET)'
118
+ database: '$(Pipeline.Workspace)/data/ado-insights.sqlite'
119
+ outputDir: '$(Pipeline.Workspace)/csv_output'
120
+ ```
121
+
122
+ **Installation:**
123
+ 1. Download the `.vsix` from [GitHub Releases](https://github.com/oddessentials/ado-git-repo-insights/releases)
124
+ 2. Install in your ADO organization: Organization Settings → Extensions → Browse local extensions
125
+
126
+ ## Configuration
127
+
128
+ Create a `config.yaml` file:
129
+
130
+ ```yaml
131
+ organization: MyOrg
132
+
133
+ projects:
134
+ - ProjectOne
135
+ - ProjectTwo
136
+ - Project%20Three # URL-encoded names supported
137
+
138
+ api:
139
+ base_url: https://dev.azure.com
140
+ version: 7.1-preview.1
141
+ rate_limit_sleep_seconds: 0.5
142
+ max_retries: 3
143
+ retry_delay_seconds: 5
144
+ retry_backoff_multiplier: 2.0
145
+
146
+ backfill:
147
+ enabled: true
148
+ window_days: 60
149
+ ```
150
+
151
+ Then run:
152
+
153
+ ```bash
154
+ ado-insights extract --config config.yaml --pat $ADO_PAT
155
+ ```
156
+
157
+ ## Azure DevOps Pipeline Integration
158
+
159
+ See [sample-pipeline.yml](sample-pipeline.yml) for a complete example.
160
+
161
+ ### Scheduled Daily Extraction
162
+
163
+ ```yaml
164
+ schedules:
165
+ - cron: "0 6 * * *" # Daily at 6 AM UTC
166
+ displayName: "Daily PR Extraction"
167
+ branches:
168
+ include: [main]
169
+ always: true
170
+ ```
171
+
172
+ ### Weekly Backfill
173
+
174
+ ```yaml
175
+ schedules:
176
+ - cron: "0 6 * * 0" # Weekly on Sunday
177
+ displayName: "Weekly Backfill"
178
+ branches:
179
+ include: [main]
180
+ always: true
181
+ ```
182
+
183
+ ## CSV Output Contract
184
+
185
+ The following CSVs are generated with **exact schema and column order** for PowerBI compatibility:
186
+
187
+ | File | Columns |
188
+ |------|---------|
189
+ | `organizations.csv` | `organization_name` |
190
+ | `projects.csv` | `organization_name`, `project_name` |
191
+ | `repositories.csv` | `repository_id`, `repository_name`, `project_name`, `organization_name` |
192
+ | `pull_requests.csv` | `pull_request_uid`, `pull_request_id`, `organization_name`, `project_name`, `repository_id`, `user_id`, `title`, `status`, `description`, `creation_date`, `closed_date`, `cycle_time_minutes` |
193
+ | `users.csv` | `user_id`, `display_name`, `email` |
194
+ | `reviewers.csv` | `pull_request_uid`, `user_id`, `vote`, `repository_id` |
195
+
196
+ ## Governance
197
+
198
+ This project is governed by authoritative documents in `agents/`:
199
+
200
+ - [INVARIANTS.md](agents/INVARIANTS.md) - 25 non-negotiable invariants
201
+ - [definition-of-done.md](agents/definition-of-done.md) - Completion criteria
202
+ - [victory-gates.md](agents/victory-gates.md) - Verification gates
203
+
204
+ ## Development
205
+
206
+ ```bash
207
+ # Setup
208
+ python -m venv .venv
209
+ source .venv/bin/activate # or .venv\Scripts\activate on Windows
210
+ pip install -e .[dev]
211
+
212
+ # Lint + Format
213
+ ruff check .
214
+ ruff format .
215
+
216
+ # Type Check
217
+ mypy src/
218
+
219
+ # Test
220
+ pytest
221
+ ```
222
+
223
+ ## License
224
+
225
+ MIT