ado-git-repo-insights 1.2.1__py3-none-any.whl → 2.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ado_git_repo_insights/__init__.py +3 -3
- ado_git_repo_insights/cli.py +703 -354
- ado_git_repo_insights/config.py +186 -186
- ado_git_repo_insights/extractor/__init__.py +1 -1
- ado_git_repo_insights/extractor/ado_client.py +452 -246
- ado_git_repo_insights/extractor/pr_extractor.py +239 -239
- ado_git_repo_insights/ml/__init__.py +13 -0
- ado_git_repo_insights/ml/date_utils.py +70 -0
- ado_git_repo_insights/ml/forecaster.py +288 -0
- ado_git_repo_insights/ml/insights.py +497 -0
- ado_git_repo_insights/persistence/__init__.py +1 -1
- ado_git_repo_insights/persistence/database.py +193 -193
- ado_git_repo_insights/persistence/models.py +207 -145
- ado_git_repo_insights/persistence/repository.py +662 -376
- ado_git_repo_insights/transform/__init__.py +1 -1
- ado_git_repo_insights/transform/aggregators.py +950 -0
- ado_git_repo_insights/transform/csv_generator.py +132 -132
- ado_git_repo_insights/utils/__init__.py +1 -1
- ado_git_repo_insights/utils/datetime_utils.py +101 -101
- ado_git_repo_insights/utils/logging_config.py +172 -172
- ado_git_repo_insights/utils/run_summary.py +207 -206
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/METADATA +56 -15
- ado_git_repo_insights-2.7.4.dist-info/RECORD +27 -0
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/licenses/LICENSE +21 -21
- ado_git_repo_insights-1.2.1.dist-info/RECORD +0 -22
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/WHEEL +0 -0
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/entry_points.txt +0 -0
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/top_level.txt +0 -0
ado_git_repo_insights/config.py
CHANGED
|
@@ -1,186 +1,186 @@
|
|
|
1
|
-
"""Configuration loader for ado-git-repo-insights.
|
|
2
|
-
|
|
3
|
-
Loads and validates configuration from YAML files or CLI arguments.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
from __future__ import annotations
|
|
7
|
-
|
|
8
|
-
import logging
|
|
9
|
-
import os
|
|
10
|
-
from dataclasses import dataclass, field
|
|
11
|
-
from datetime import date
|
|
12
|
-
from pathlib import Path
|
|
13
|
-
from typing import Any
|
|
14
|
-
|
|
15
|
-
import yaml
|
|
16
|
-
|
|
17
|
-
logger = logging.getLogger(__name__)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class ConfigurationError(Exception):
|
|
21
|
-
"""Configuration validation error."""
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
@dataclass
|
|
25
|
-
class APIConfig:
|
|
26
|
-
"""API configuration settings."""
|
|
27
|
-
|
|
28
|
-
base_url: str = "https://dev.azure.com"
|
|
29
|
-
version: str = "7.1-preview.1"
|
|
30
|
-
rate_limit_sleep_seconds: float = 0.5
|
|
31
|
-
max_retries: int = 3
|
|
32
|
-
retry_delay_seconds: float = 5.0
|
|
33
|
-
retry_backoff_multiplier: float = 2.0
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
@dataclass
|
|
37
|
-
class BackfillConfig:
|
|
38
|
-
"""Backfill configuration settings (Adjustment 1)."""
|
|
39
|
-
|
|
40
|
-
enabled: bool = True
|
|
41
|
-
window_days: int = 60 # Default: 60 days (configurable 30-90)
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
@dataclass
|
|
45
|
-
class DateRangeConfig:
|
|
46
|
-
"""Optional date range override."""
|
|
47
|
-
|
|
48
|
-
start: date | None = None
|
|
49
|
-
end: date | None = None
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
@dataclass
|
|
53
|
-
class Config:
|
|
54
|
-
"""Main configuration for ado-git-repo-insights."""
|
|
55
|
-
|
|
56
|
-
organization: str
|
|
57
|
-
projects: list[str]
|
|
58
|
-
pat: str # Will be masked in logs
|
|
59
|
-
database: Path = field(default_factory=lambda: Path("ado-insights.sqlite"))
|
|
60
|
-
api: APIConfig = field(default_factory=APIConfig)
|
|
61
|
-
backfill: BackfillConfig = field(default_factory=BackfillConfig)
|
|
62
|
-
date_range: DateRangeConfig = field(default_factory=DateRangeConfig)
|
|
63
|
-
|
|
64
|
-
def __post_init__(self) -> None:
|
|
65
|
-
"""Validate configuration after initialization."""
|
|
66
|
-
if not self.organization:
|
|
67
|
-
raise ConfigurationError("organization is required")
|
|
68
|
-
if not self.projects:
|
|
69
|
-
raise ConfigurationError("At least one project is required")
|
|
70
|
-
if not self.pat:
|
|
71
|
-
raise ConfigurationError("PAT is required")
|
|
72
|
-
|
|
73
|
-
def __repr__(self) -> str:
|
|
74
|
-
"""Repr with masked PAT (Invariant 19: Never expose secrets)."""
|
|
75
|
-
return (
|
|
76
|
-
f"Config(organization={self.organization!r}, "
|
|
77
|
-
f"projects={self.projects!r}, "
|
|
78
|
-
f"pat='********', " # Masked
|
|
79
|
-
f"database={self.database!r}, "
|
|
80
|
-
f"api={self.api!r}, "
|
|
81
|
-
f"backfill={self.backfill!r}, "
|
|
82
|
-
f"date_range={self.date_range!r})"
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
def log_summary(self) -> None:
|
|
86
|
-
"""Log configuration summary (with PAT masked)."""
|
|
87
|
-
logger.info(f"Organization: {self.organization}")
|
|
88
|
-
logger.info(f"Projects: {', '.join(self.projects)}")
|
|
89
|
-
logger.info(f"Database: {self.database}")
|
|
90
|
-
logger.info(f"PAT: {'*' * 8}...{'*' * 4}") # Invariant 19: Never log PAT
|
|
91
|
-
if self.date_range.start or self.date_range.end:
|
|
92
|
-
logger.info(f"Date range: {self.date_range.start} → {self.date_range.end}")
|
|
93
|
-
if self.backfill.enabled:
|
|
94
|
-
logger.info(f"Backfill: {self.backfill.window_days} days")
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def load_config(
|
|
98
|
-
config_path: Path | None = None,
|
|
99
|
-
organization: str | None = None,
|
|
100
|
-
projects: str | None = None,
|
|
101
|
-
pat: str | None = None,
|
|
102
|
-
database: Path | None = None,
|
|
103
|
-
start_date: str | None = None,
|
|
104
|
-
end_date: str | None = None,
|
|
105
|
-
backfill_days: int | None = None,
|
|
106
|
-
) -> Config:
|
|
107
|
-
"""Load configuration from file and/or CLI arguments.
|
|
108
|
-
|
|
109
|
-
CLI arguments override file values.
|
|
110
|
-
|
|
111
|
-
Args:
|
|
112
|
-
config_path: Path to config.yaml file.
|
|
113
|
-
organization: Organization name (CLI override).
|
|
114
|
-
projects: Comma-separated project names (CLI override).
|
|
115
|
-
pat: Personal Access Token (CLI override).
|
|
116
|
-
database: Database path (CLI override).
|
|
117
|
-
start_date: Start date YYYY-MM-DD (CLI override).
|
|
118
|
-
end_date: End date YYYY-MM-DD (CLI override).
|
|
119
|
-
backfill_days: Backfill window in days (CLI override).
|
|
120
|
-
|
|
121
|
-
Returns:
|
|
122
|
-
Validated Config instance.
|
|
123
|
-
|
|
124
|
-
Raises:
|
|
125
|
-
ConfigurationError: If configuration is invalid.
|
|
126
|
-
"""
|
|
127
|
-
# Start with defaults
|
|
128
|
-
config_data: dict[str, Any] = {}
|
|
129
|
-
|
|
130
|
-
# Load from file if provided
|
|
131
|
-
if config_path and config_path.exists():
|
|
132
|
-
logger.info(f"Loading configuration from {config_path}")
|
|
133
|
-
with config_path.open() as f:
|
|
134
|
-
config_data = yaml.safe_load(f) or {}
|
|
135
|
-
|
|
136
|
-
# Apply CLI overrides
|
|
137
|
-
if organization:
|
|
138
|
-
config_data["organization"] = organization
|
|
139
|
-
if projects:
|
|
140
|
-
config_data["projects"] = [p.strip() for p in projects.split(",")]
|
|
141
|
-
if pat:
|
|
142
|
-
config_data["pat"] = pat
|
|
143
|
-
elif not config_data.get("pat"):
|
|
144
|
-
# Try environment variable
|
|
145
|
-
config_data["pat"] = os.environ.get("ADO_PAT", "")
|
|
146
|
-
|
|
147
|
-
# Build API config
|
|
148
|
-
api_data = config_data.get("api", {})
|
|
149
|
-
api_config = APIConfig(
|
|
150
|
-
base_url=api_data.get("base_url", "https://dev.azure.com"),
|
|
151
|
-
version=api_data.get("version", "7.1-preview.1"),
|
|
152
|
-
rate_limit_sleep_seconds=api_data.get("rate_limit_sleep_seconds", 0.5),
|
|
153
|
-
max_retries=api_data.get("max_retries", 3),
|
|
154
|
-
retry_delay_seconds=api_data.get("retry_delay_seconds", 5.0),
|
|
155
|
-
retry_backoff_multiplier=api_data.get("retry_backoff_multiplier", 2.0),
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
# Build backfill config
|
|
159
|
-
backfill_data = config_data.get("backfill", {})
|
|
160
|
-
backfill_config = BackfillConfig(
|
|
161
|
-
enabled=backfill_data.get("enabled", True),
|
|
162
|
-
window_days=backfill_days or backfill_data.get("window_days", 60),
|
|
163
|
-
)
|
|
164
|
-
|
|
165
|
-
# Build date range config
|
|
166
|
-
date_range = DateRangeConfig()
|
|
167
|
-
if start_date:
|
|
168
|
-
date_range.start = date.fromisoformat(start_date)
|
|
169
|
-
elif config_data.get("date_range", {}).get("start"):
|
|
170
|
-
date_range.start = date.fromisoformat(config_data["date_range"]["start"])
|
|
171
|
-
|
|
172
|
-
if end_date:
|
|
173
|
-
date_range.end = date.fromisoformat(end_date)
|
|
174
|
-
elif config_data.get("date_range", {}).get("end"):
|
|
175
|
-
date_range.end = date.fromisoformat(config_data["date_range"]["end"])
|
|
176
|
-
|
|
177
|
-
# Build main config
|
|
178
|
-
return Config(
|
|
179
|
-
organization=config_data.get("organization", ""),
|
|
180
|
-
projects=config_data.get("projects", []),
|
|
181
|
-
pat=config_data.get("pat", ""),
|
|
182
|
-
database=database or Path(config_data.get("database", "ado-insights.sqlite")),
|
|
183
|
-
api=api_config,
|
|
184
|
-
backfill=backfill_config,
|
|
185
|
-
date_range=date_range,
|
|
186
|
-
)
|
|
1
|
+
"""Configuration loader for ado-git-repo-insights.
|
|
2
|
+
|
|
3
|
+
Loads and validates configuration from YAML files or CLI arguments.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from datetime import date
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
import yaml
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ConfigurationError(Exception):
|
|
21
|
+
"""Configuration validation error."""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class APIConfig:
|
|
26
|
+
"""API configuration settings."""
|
|
27
|
+
|
|
28
|
+
base_url: str = "https://dev.azure.com"
|
|
29
|
+
version: str = "7.1-preview.1"
|
|
30
|
+
rate_limit_sleep_seconds: float = 0.5
|
|
31
|
+
max_retries: int = 3
|
|
32
|
+
retry_delay_seconds: float = 5.0
|
|
33
|
+
retry_backoff_multiplier: float = 2.0
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class BackfillConfig:
|
|
38
|
+
"""Backfill configuration settings (Adjustment 1)."""
|
|
39
|
+
|
|
40
|
+
enabled: bool = True
|
|
41
|
+
window_days: int = 60 # Default: 60 days (configurable 30-90)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class DateRangeConfig:
|
|
46
|
+
"""Optional date range override."""
|
|
47
|
+
|
|
48
|
+
start: date | None = None
|
|
49
|
+
end: date | None = None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class Config:
|
|
54
|
+
"""Main configuration for ado-git-repo-insights."""
|
|
55
|
+
|
|
56
|
+
organization: str
|
|
57
|
+
projects: list[str]
|
|
58
|
+
pat: str # Will be masked in logs
|
|
59
|
+
database: Path = field(default_factory=lambda: Path("ado-insights.sqlite"))
|
|
60
|
+
api: APIConfig = field(default_factory=APIConfig)
|
|
61
|
+
backfill: BackfillConfig = field(default_factory=BackfillConfig)
|
|
62
|
+
date_range: DateRangeConfig = field(default_factory=DateRangeConfig)
|
|
63
|
+
|
|
64
|
+
def __post_init__(self) -> None:
|
|
65
|
+
"""Validate configuration after initialization."""
|
|
66
|
+
if not self.organization:
|
|
67
|
+
raise ConfigurationError("organization is required")
|
|
68
|
+
if not self.projects:
|
|
69
|
+
raise ConfigurationError("At least one project is required")
|
|
70
|
+
if not self.pat:
|
|
71
|
+
raise ConfigurationError("PAT is required")
|
|
72
|
+
|
|
73
|
+
def __repr__(self) -> str:
|
|
74
|
+
"""Repr with masked PAT (Invariant 19: Never expose secrets)."""
|
|
75
|
+
return (
|
|
76
|
+
f"Config(organization={self.organization!r}, "
|
|
77
|
+
f"projects={self.projects!r}, "
|
|
78
|
+
f"pat='********', " # Masked
|
|
79
|
+
f"database={self.database!r}, "
|
|
80
|
+
f"api={self.api!r}, "
|
|
81
|
+
f"backfill={self.backfill!r}, "
|
|
82
|
+
f"date_range={self.date_range!r})"
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
def log_summary(self) -> None:
|
|
86
|
+
"""Log configuration summary (with PAT masked)."""
|
|
87
|
+
logger.info(f"Organization: {self.organization}")
|
|
88
|
+
logger.info(f"Projects: {', '.join(self.projects)}")
|
|
89
|
+
logger.info(f"Database: {self.database}")
|
|
90
|
+
logger.info(f"PAT: {'*' * 8}...{'*' * 4}") # Invariant 19: Never log PAT
|
|
91
|
+
if self.date_range.start or self.date_range.end:
|
|
92
|
+
logger.info(f"Date range: {self.date_range.start} → {self.date_range.end}")
|
|
93
|
+
if self.backfill.enabled:
|
|
94
|
+
logger.info(f"Backfill: {self.backfill.window_days} days")
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def load_config(
|
|
98
|
+
config_path: Path | None = None,
|
|
99
|
+
organization: str | None = None,
|
|
100
|
+
projects: str | None = None,
|
|
101
|
+
pat: str | None = None,
|
|
102
|
+
database: Path | None = None,
|
|
103
|
+
start_date: str | None = None,
|
|
104
|
+
end_date: str | None = None,
|
|
105
|
+
backfill_days: int | None = None,
|
|
106
|
+
) -> Config:
|
|
107
|
+
"""Load configuration from file and/or CLI arguments.
|
|
108
|
+
|
|
109
|
+
CLI arguments override file values.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
config_path: Path to config.yaml file.
|
|
113
|
+
organization: Organization name (CLI override).
|
|
114
|
+
projects: Comma-separated project names (CLI override).
|
|
115
|
+
pat: Personal Access Token (CLI override).
|
|
116
|
+
database: Database path (CLI override).
|
|
117
|
+
start_date: Start date YYYY-MM-DD (CLI override).
|
|
118
|
+
end_date: End date YYYY-MM-DD (CLI override).
|
|
119
|
+
backfill_days: Backfill window in days (CLI override).
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Validated Config instance.
|
|
123
|
+
|
|
124
|
+
Raises:
|
|
125
|
+
ConfigurationError: If configuration is invalid.
|
|
126
|
+
"""
|
|
127
|
+
# Start with defaults
|
|
128
|
+
config_data: dict[str, Any] = {}
|
|
129
|
+
|
|
130
|
+
# Load from file if provided
|
|
131
|
+
if config_path and config_path.exists():
|
|
132
|
+
logger.info(f"Loading configuration from {config_path}")
|
|
133
|
+
with config_path.open() as f:
|
|
134
|
+
config_data = yaml.safe_load(f) or {}
|
|
135
|
+
|
|
136
|
+
# Apply CLI overrides
|
|
137
|
+
if organization:
|
|
138
|
+
config_data["organization"] = organization
|
|
139
|
+
if projects:
|
|
140
|
+
config_data["projects"] = [p.strip() for p in projects.split(",")]
|
|
141
|
+
if pat:
|
|
142
|
+
config_data["pat"] = pat
|
|
143
|
+
elif not config_data.get("pat"):
|
|
144
|
+
# Try environment variable
|
|
145
|
+
config_data["pat"] = os.environ.get("ADO_PAT", "")
|
|
146
|
+
|
|
147
|
+
# Build API config
|
|
148
|
+
api_data = config_data.get("api", {})
|
|
149
|
+
api_config = APIConfig(
|
|
150
|
+
base_url=api_data.get("base_url", "https://dev.azure.com"),
|
|
151
|
+
version=api_data.get("version", "7.1-preview.1"),
|
|
152
|
+
rate_limit_sleep_seconds=api_data.get("rate_limit_sleep_seconds", 0.5),
|
|
153
|
+
max_retries=api_data.get("max_retries", 3),
|
|
154
|
+
retry_delay_seconds=api_data.get("retry_delay_seconds", 5.0),
|
|
155
|
+
retry_backoff_multiplier=api_data.get("retry_backoff_multiplier", 2.0),
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
# Build backfill config
|
|
159
|
+
backfill_data = config_data.get("backfill", {})
|
|
160
|
+
backfill_config = BackfillConfig(
|
|
161
|
+
enabled=backfill_data.get("enabled", True),
|
|
162
|
+
window_days=backfill_days or backfill_data.get("window_days", 60),
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
# Build date range config
|
|
166
|
+
date_range = DateRangeConfig()
|
|
167
|
+
if start_date:
|
|
168
|
+
date_range.start = date.fromisoformat(start_date)
|
|
169
|
+
elif config_data.get("date_range", {}).get("start"):
|
|
170
|
+
date_range.start = date.fromisoformat(config_data["date_range"]["start"])
|
|
171
|
+
|
|
172
|
+
if end_date:
|
|
173
|
+
date_range.end = date.fromisoformat(end_date)
|
|
174
|
+
elif config_data.get("date_range", {}).get("end"):
|
|
175
|
+
date_range.end = date.fromisoformat(config_data["date_range"]["end"])
|
|
176
|
+
|
|
177
|
+
# Build main config
|
|
178
|
+
return Config(
|
|
179
|
+
organization=config_data.get("organization", ""),
|
|
180
|
+
projects=config_data.get("projects", []),
|
|
181
|
+
pat=config_data.get("pat", ""),
|
|
182
|
+
database=database or Path(config_data.get("database", "ado-insights.sqlite")),
|
|
183
|
+
api=api_config,
|
|
184
|
+
backfill=backfill_config,
|
|
185
|
+
date_range=date_range,
|
|
186
|
+
)
|
|
@@ -1 +1 @@
|
|
|
1
|
-
"""Extractor module for Azure DevOps API interactions."""
|
|
1
|
+
"""Extractor module for Azure DevOps API interactions."""
|