ado-git-repo-insights 1.2.1__py3-none-any.whl → 2.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. ado_git_repo_insights/__init__.py +3 -3
  2. ado_git_repo_insights/cli.py +703 -354
  3. ado_git_repo_insights/config.py +186 -186
  4. ado_git_repo_insights/extractor/__init__.py +1 -1
  5. ado_git_repo_insights/extractor/ado_client.py +452 -246
  6. ado_git_repo_insights/extractor/pr_extractor.py +239 -239
  7. ado_git_repo_insights/ml/__init__.py +13 -0
  8. ado_git_repo_insights/ml/date_utils.py +70 -0
  9. ado_git_repo_insights/ml/forecaster.py +288 -0
  10. ado_git_repo_insights/ml/insights.py +497 -0
  11. ado_git_repo_insights/persistence/__init__.py +1 -1
  12. ado_git_repo_insights/persistence/database.py +193 -193
  13. ado_git_repo_insights/persistence/models.py +207 -145
  14. ado_git_repo_insights/persistence/repository.py +662 -376
  15. ado_git_repo_insights/transform/__init__.py +1 -1
  16. ado_git_repo_insights/transform/aggregators.py +950 -0
  17. ado_git_repo_insights/transform/csv_generator.py +132 -132
  18. ado_git_repo_insights/utils/__init__.py +1 -1
  19. ado_git_repo_insights/utils/datetime_utils.py +101 -101
  20. ado_git_repo_insights/utils/logging_config.py +172 -172
  21. ado_git_repo_insights/utils/run_summary.py +207 -206
  22. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/METADATA +56 -15
  23. ado_git_repo_insights-2.7.4.dist-info/RECORD +27 -0
  24. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/licenses/LICENSE +21 -21
  25. ado_git_repo_insights-1.2.1.dist-info/RECORD +0 -22
  26. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/WHEEL +0 -0
  27. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/entry_points.txt +0 -0
  28. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/top_level.txt +0 -0
@@ -1,239 +1,239 @@
1
- """Pull Request extractor orchestration.
2
-
3
- Coordinates extraction across multiple projects with incremental and backfill support.
4
- """
5
-
6
- from __future__ import annotations
7
-
8
- import logging
9
- from dataclasses import dataclass, field
10
- from datetime import date, timedelta
11
-
12
- from ..config import Config
13
- from ..persistence.database import DatabaseManager
14
- from ..persistence.repository import PRRepository
15
- from .ado_client import ADOClient, ExtractionError
16
-
17
- logger = logging.getLogger(__name__)
18
-
19
-
20
- @dataclass
21
- class ProjectExtractionResult:
22
- """Result of extracting PRs for a single project."""
23
-
24
- project: str
25
- start_date: date
26
- end_date: date
27
- prs_extracted: int
28
- success: bool
29
- error: str | None = None
30
-
31
-
32
- @dataclass
33
- class ExtractionSummary:
34
- """Summary of an extraction run."""
35
-
36
- projects: list[ProjectExtractionResult] = field(default_factory=list)
37
- total_prs: int = 0
38
- success: bool = True
39
-
40
- def add_result(self, result: ProjectExtractionResult) -> None:
41
- """Add a project result to the summary."""
42
- self.projects.append(result)
43
- self.total_prs += result.prs_extracted
44
- if not result.success:
45
- self.success = False
46
-
47
- def log_summary(self) -> None:
48
- """Log the extraction summary."""
49
- logger.info("=" * 50)
50
- logger.info("Extraction Summary")
51
- logger.info("=" * 50)
52
- for result in self.projects:
53
- status = "✓" if result.success else "✗"
54
- logger.info(
55
- f" {status} {result.project}: "
56
- f"{result.prs_extracted} PRs ({result.start_date} → {result.end_date})"
57
- )
58
- if result.error:
59
- logger.error(f" Error: {result.error}")
60
- logger.info(f"Total: {self.total_prs} PRs")
61
- logger.info(f"Status: {'SUCCESS' if self.success else 'FAILED'}")
62
- logger.info("=" * 50)
63
-
64
-
65
- class PRExtractor:
66
- """Orchestrates PR extraction across multiple projects.
67
-
68
- Invariant 10: Daily incremental extraction is the default mode.
69
- Invariant 11: Periodic backfill is required to prevent drift.
70
- """
71
-
72
- def __init__(
73
- self,
74
- client: ADOClient,
75
- db: DatabaseManager,
76
- config: Config,
77
- ) -> None:
78
- """Initialize the PR extractor.
79
-
80
- Args:
81
- client: ADO API client.
82
- db: Database manager.
83
- config: Extraction configuration.
84
- """
85
- self.client = client
86
- self.db = db
87
- self.repository = PRRepository(db)
88
- self.config = config
89
-
90
- def extract_all(self, backfill_days: int | None = None) -> ExtractionSummary:
91
- """Extract PRs for all configured projects.
92
-
93
- For each project:
94
- 1. Determine date range (incremental from last extraction, or configured)
95
- 2. Fetch PRs from ADO API
96
- 3. UPSERT into SQLite
97
- 4. Update extraction metadata
98
-
99
- Args:
100
- backfill_days: If provided, re-extract the last N days (Adjustment 1).
101
-
102
- Returns:
103
- Summary of extraction results.
104
- """
105
- summary = ExtractionSummary()
106
-
107
- for project in self.config.projects:
108
- result = self._extract_project(project, backfill_days)
109
- summary.add_result(result)
110
-
111
- # Adjustment 4: Fail fast on any project failure
112
- if not result.success:
113
- logger.error(f"Extraction failed for {project}, aborting run")
114
- break
115
-
116
- summary.log_summary()
117
- return summary
118
-
119
- def _extract_project(
120
- self,
121
- project: str,
122
- backfill_days: int | None,
123
- ) -> ProjectExtractionResult:
124
- """Extract PRs for a single project.
125
-
126
- Args:
127
- project: Project name.
128
- backfill_days: Optional backfill window.
129
-
130
- Returns:
131
- Extraction result for this project.
132
- """
133
- try:
134
- start_date = self._determine_start_date(project, backfill_days)
135
- end_date = self._determine_end_date()
136
-
137
- if start_date > end_date:
138
- logger.info(f"{project}: Already up to date (last: {start_date})")
139
- return ProjectExtractionResult(
140
- project=project,
141
- start_date=start_date,
142
- end_date=end_date,
143
- prs_extracted=0,
144
- success=True,
145
- )
146
-
147
- logger.info(
148
- f"Extracting {self.config.organization}/{project}: "
149
- f"{start_date} → {end_date}"
150
- )
151
-
152
- count = 0
153
- for pr_data in self.client.get_pull_requests(project, start_date, end_date):
154
- self.repository.upsert_pr_with_related(
155
- pr_data=pr_data,
156
- organization_name=self.config.organization,
157
- project_name=project,
158
- )
159
- count += 1
160
-
161
- # Update extraction metadata only on success
162
- self.repository.update_extraction_metadata(
163
- self.config.organization,
164
- project,
165
- end_date,
166
- )
167
-
168
- logger.info(f"{project}: Extracted {count} PRs")
169
- return ProjectExtractionResult(
170
- project=project,
171
- start_date=start_date,
172
- end_date=end_date,
173
- prs_extracted=count,
174
- success=True,
175
- )
176
-
177
- except ExtractionError as e:
178
- logger.error(f"{project}: Extraction failed: {e}")
179
- return ProjectExtractionResult(
180
- project=project,
181
- start_date=start_date if "start_date" in dir() else date.today(),
182
- end_date=end_date if "end_date" in dir() else date.today(),
183
- prs_extracted=0,
184
- success=False,
185
- error=str(e),
186
- )
187
-
188
- def _determine_start_date(
189
- self,
190
- project: str,
191
- backfill_days: int | None,
192
- ) -> date:
193
- """Determine the start date for extraction.
194
-
195
- Invariant 10: Incremental by default.
196
- Invariant 11: Backfill for convergence.
197
-
198
- Args:
199
- project: Project name.
200
- backfill_days: Optional backfill window.
201
-
202
- Returns:
203
- Start date for extraction.
204
- """
205
- # Priority 1: Explicit date range from config
206
- if self.config.date_range.start:
207
- return self.config.date_range.start
208
-
209
- # Priority 2: Backfill mode
210
- if backfill_days:
211
- backfill_start = date.today() - timedelta(days=backfill_days)
212
- logger.info(f"{project}: Backfill mode - {backfill_days} days")
213
- return backfill_start
214
-
215
- # Priority 3: Incremental from last extraction
216
- last_date = self.repository.get_last_extraction_date(
217
- self.config.organization,
218
- project,
219
- )
220
- if last_date:
221
- # Start from day after last extraction
222
- return last_date + timedelta(days=1)
223
-
224
- # Default: Start of current year (first run)
225
- default_start = date(date.today().year, 1, 1)
226
- logger.info(f"{project}: First run - starting from {default_start}")
227
- return default_start
228
-
229
- def _determine_end_date(self) -> date:
230
- """Determine the end date for extraction.
231
-
232
- Returns:
233
- End date (yesterday by default, or configured).
234
- """
235
- if self.config.date_range.end:
236
- return self.config.date_range.end
237
-
238
- # Default: yesterday (avoids incomplete day data)
239
- return date.today() - timedelta(days=1)
1
+ """Pull Request extractor orchestration.
2
+
3
+ Coordinates extraction across multiple projects with incremental and backfill support.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ from dataclasses import dataclass, field
10
+ from datetime import date, timedelta
11
+
12
+ from ..config import Config
13
+ from ..persistence.database import DatabaseManager
14
+ from ..persistence.repository import PRRepository
15
+ from .ado_client import ADOClient, ExtractionError
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @dataclass
21
+ class ProjectExtractionResult:
22
+ """Result of extracting PRs for a single project."""
23
+
24
+ project: str
25
+ start_date: date
26
+ end_date: date
27
+ prs_extracted: int
28
+ success: bool
29
+ error: str | None = None
30
+
31
+
32
+ @dataclass
33
+ class ExtractionSummary:
34
+ """Summary of an extraction run."""
35
+
36
+ projects: list[ProjectExtractionResult] = field(default_factory=list)
37
+ total_prs: int = 0
38
+ success: bool = True
39
+
40
+ def add_result(self, result: ProjectExtractionResult) -> None:
41
+ """Add a project result to the summary."""
42
+ self.projects.append(result)
43
+ self.total_prs += result.prs_extracted
44
+ if not result.success:
45
+ self.success = False
46
+
47
+ def log_summary(self) -> None:
48
+ """Log the extraction summary."""
49
+ logger.info("=" * 50)
50
+ logger.info("Extraction Summary")
51
+ logger.info("=" * 50)
52
+ for result in self.projects:
53
+ status = "✓" if result.success else "✗"
54
+ logger.info(
55
+ f" {status} {result.project}: "
56
+ f"{result.prs_extracted} PRs ({result.start_date} → {result.end_date})"
57
+ )
58
+ if result.error:
59
+ logger.error(f" Error: {result.error}")
60
+ logger.info(f"Total: {self.total_prs} PRs")
61
+ logger.info(f"Status: {'SUCCESS' if self.success else 'FAILED'}")
62
+ logger.info("=" * 50)
63
+
64
+
65
+ class PRExtractor:
66
+ """Orchestrates PR extraction across multiple projects.
67
+
68
+ Invariant 10: Daily incremental extraction is the default mode.
69
+ Invariant 11: Periodic backfill is required to prevent drift.
70
+ """
71
+
72
+ def __init__(
73
+ self,
74
+ client: ADOClient,
75
+ db: DatabaseManager,
76
+ config: Config,
77
+ ) -> None:
78
+ """Initialize the PR extractor.
79
+
80
+ Args:
81
+ client: ADO API client.
82
+ db: Database manager.
83
+ config: Extraction configuration.
84
+ """
85
+ self.client = client
86
+ self.db = db
87
+ self.repository = PRRepository(db)
88
+ self.config = config
89
+
90
+ def extract_all(self, backfill_days: int | None = None) -> ExtractionSummary:
91
+ """Extract PRs for all configured projects.
92
+
93
+ For each project:
94
+ 1. Determine date range (incremental from last extraction, or configured)
95
+ 2. Fetch PRs from ADO API
96
+ 3. UPSERT into SQLite
97
+ 4. Update extraction metadata
98
+
99
+ Args:
100
+ backfill_days: If provided, re-extract the last N days (Adjustment 1).
101
+
102
+ Returns:
103
+ Summary of extraction results.
104
+ """
105
+ summary = ExtractionSummary()
106
+
107
+ for project in self.config.projects:
108
+ result = self._extract_project(project, backfill_days)
109
+ summary.add_result(result)
110
+
111
+ # Adjustment 4: Fail fast on any project failure
112
+ if not result.success:
113
+ logger.error(f"Extraction failed for {project}, aborting run")
114
+ break
115
+
116
+ summary.log_summary()
117
+ return summary
118
+
119
+ def _extract_project(
120
+ self,
121
+ project: str,
122
+ backfill_days: int | None,
123
+ ) -> ProjectExtractionResult:
124
+ """Extract PRs for a single project.
125
+
126
+ Args:
127
+ project: Project name.
128
+ backfill_days: Optional backfill window.
129
+
130
+ Returns:
131
+ Extraction result for this project.
132
+ """
133
+ try:
134
+ start_date = self._determine_start_date(project, backfill_days)
135
+ end_date = self._determine_end_date()
136
+
137
+ if start_date > end_date:
138
+ logger.info(f"{project}: Already up to date (last: {start_date})")
139
+ return ProjectExtractionResult(
140
+ project=project,
141
+ start_date=start_date,
142
+ end_date=end_date,
143
+ prs_extracted=0,
144
+ success=True,
145
+ )
146
+
147
+ logger.info(
148
+ f"Extracting {self.config.organization}/{project}: "
149
+ f"{start_date} → {end_date}"
150
+ )
151
+
152
+ count = 0
153
+ for pr_data in self.client.get_pull_requests(project, start_date, end_date):
154
+ self.repository.upsert_pr_with_related(
155
+ pr_data=pr_data,
156
+ organization_name=self.config.organization,
157
+ project_name=project,
158
+ )
159
+ count += 1
160
+
161
+ # Update extraction metadata only on success
162
+ self.repository.update_extraction_metadata(
163
+ self.config.organization,
164
+ project,
165
+ end_date,
166
+ )
167
+
168
+ logger.info(f"{project}: Extracted {count} PRs")
169
+ return ProjectExtractionResult(
170
+ project=project,
171
+ start_date=start_date,
172
+ end_date=end_date,
173
+ prs_extracted=count,
174
+ success=True,
175
+ )
176
+
177
+ except ExtractionError as e:
178
+ logger.error(f"{project}: Extraction failed: {e}")
179
+ return ProjectExtractionResult(
180
+ project=project,
181
+ start_date=start_date if "start_date" in dir() else date.today(),
182
+ end_date=end_date if "end_date" in dir() else date.today(),
183
+ prs_extracted=0,
184
+ success=False,
185
+ error=str(e),
186
+ )
187
+
188
+ def _determine_start_date(
189
+ self,
190
+ project: str,
191
+ backfill_days: int | None,
192
+ ) -> date:
193
+ """Determine the start date for extraction.
194
+
195
+ Invariant 10: Incremental by default.
196
+ Invariant 11: Backfill for convergence.
197
+
198
+ Args:
199
+ project: Project name.
200
+ backfill_days: Optional backfill window.
201
+
202
+ Returns:
203
+ Start date for extraction.
204
+ """
205
+ # Priority 1: Explicit date range from config
206
+ if self.config.date_range.start:
207
+ return self.config.date_range.start
208
+
209
+ # Priority 2: Backfill mode
210
+ if backfill_days:
211
+ backfill_start = date.today() - timedelta(days=backfill_days)
212
+ logger.info(f"{project}: Backfill mode - {backfill_days} days")
213
+ return backfill_start
214
+
215
+ # Priority 3: Incremental from last extraction
216
+ last_date = self.repository.get_last_extraction_date(
217
+ self.config.organization,
218
+ project,
219
+ )
220
+ if last_date:
221
+ # Start from day after last extraction
222
+ return last_date + timedelta(days=1)
223
+
224
+ # Default: Start of current year (first run)
225
+ default_start = date(date.today().year, 1, 1)
226
+ logger.info(f"{project}: First run - starting from {default_start}")
227
+ return default_start
228
+
229
+ def _determine_end_date(self) -> date:
230
+ """Determine the end date for extraction.
231
+
232
+ Returns:
233
+ End date (yesterday by default, or configured).
234
+ """
235
+ if self.config.date_range.end:
236
+ return self.config.date_range.end
237
+
238
+ # Default: yesterday (avoids incomplete day data)
239
+ return date.today() - timedelta(days=1)
@@ -0,0 +1,13 @@
1
+ """ML package for Advanced Analytics & ML features (Phase 5).
2
+
3
+ This package contains:
4
+ - ProphetForecaster: Prophet-based trend forecasting
5
+ - LLMInsightsGenerator: OpenAI-based insights generation
6
+
7
+ Note: These modules require the [ml] optional dependencies.
8
+ Install with: pip install -e ".[ml]"
9
+ """
10
+
11
+ # Lazy imports only - no heavy module imports at package level
12
+ # to avoid breaking base installs without [ml] extras
13
+ __all__ = ["ProphetForecaster", "LLMInsightsGenerator"]
@@ -0,0 +1,70 @@
1
+ """Date utility functions for ML forecasting.
2
+
3
+ Provides ISO-week-aware date alignment functions with edge-case handling
4
+ for year boundaries and week 53 scenarios.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from datetime import date, timedelta
10
+
11
+
12
+ def align_to_monday(d: date) -> date:
13
+ """Align a date to the Monday of its ISO week.
14
+
15
+ Uses ISO week date system (ISO 8601), which ensures:
16
+ - Week 1 is the week containing the first Thursday of the year
17
+ - Weeks start on Monday and end on Sunday
18
+ - A year has 52 or 53 weeks
19
+
20
+ Edge cases handled:
21
+ - Jan 1-3 may belong to the previous year's week 53
22
+ - Dec 29-31 may belong to the next year's week 1
23
+ - Week 53 correctly spans year boundaries
24
+
25
+ Args:
26
+ d: Input date to align
27
+
28
+ Returns:
29
+ Monday of the ISO week containing d
30
+
31
+ Examples:
32
+ >>> align_to_monday(date(2026, 1, 1)) # Thursday
33
+ date(2025, 12, 29) # Previous year's Monday
34
+
35
+ >>> align_to_monday(date(2026, 12, 28)) # Monday of week 53
36
+ date(2026, 12, 28)
37
+
38
+ >>> align_to_monday(date(2026, 12, 30)) # Wednesday of week 53
39
+ date(2026, 12, 28) # Monday of same ISO week
40
+ """
41
+ iso_year, iso_week, _ = d.isocalendar()
42
+ return date.fromisocalendar(iso_year, iso_week, 1)
43
+
44
+
45
+ def get_next_monday(d: date | None = None) -> date:
46
+ """Get the next Monday from a given date (or today).
47
+
48
+ Args:
49
+ d: Input date, defaults to today
50
+
51
+ Returns:
52
+ The next Monday (or today if today is Monday)
53
+
54
+ Examples:
55
+ >>> get_next_monday(date(2026, 1, 15)) # Thursday
56
+ date(2026, 1, 19) # Next Monday
57
+
58
+ >>> get_next_monday(date(2026, 1, 19)) # Monday
59
+ date(2026, 1, 19) # Same day
60
+ """
61
+ if d is None:
62
+ d = date.today()
63
+
64
+ # If already Monday, return as-is
65
+ if d.weekday() == 0:
66
+ return d
67
+
68
+ # Calculate days until next Monday
69
+ days_until_monday = (7 - d.weekday()) % 7
70
+ return d + timedelta(days=days_until_monday)