ado-git-repo-insights 1.2.1__py3-none-any.whl → 2.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. ado_git_repo_insights/__init__.py +3 -3
  2. ado_git_repo_insights/cli.py +703 -354
  3. ado_git_repo_insights/config.py +186 -186
  4. ado_git_repo_insights/extractor/__init__.py +1 -1
  5. ado_git_repo_insights/extractor/ado_client.py +452 -246
  6. ado_git_repo_insights/extractor/pr_extractor.py +239 -239
  7. ado_git_repo_insights/ml/__init__.py +13 -0
  8. ado_git_repo_insights/ml/date_utils.py +70 -0
  9. ado_git_repo_insights/ml/forecaster.py +288 -0
  10. ado_git_repo_insights/ml/insights.py +497 -0
  11. ado_git_repo_insights/persistence/__init__.py +1 -1
  12. ado_git_repo_insights/persistence/database.py +193 -193
  13. ado_git_repo_insights/persistence/models.py +207 -145
  14. ado_git_repo_insights/persistence/repository.py +662 -376
  15. ado_git_repo_insights/transform/__init__.py +1 -1
  16. ado_git_repo_insights/transform/aggregators.py +950 -0
  17. ado_git_repo_insights/transform/csv_generator.py +132 -132
  18. ado_git_repo_insights/utils/__init__.py +1 -1
  19. ado_git_repo_insights/utils/datetime_utils.py +101 -101
  20. ado_git_repo_insights/utils/logging_config.py +172 -172
  21. ado_git_repo_insights/utils/run_summary.py +207 -206
  22. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/METADATA +56 -15
  23. ado_git_repo_insights-2.7.4.dist-info/RECORD +27 -0
  24. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/licenses/LICENSE +21 -21
  25. ado_git_repo_insights-1.2.1.dist-info/RECORD +0 -22
  26. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/WHEEL +0 -0
  27. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/entry_points.txt +0 -0
  28. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/top_level.txt +0 -0
@@ -1,376 +1,662 @@
1
- """Data access layer for ado-git-repo-insights.
2
-
3
- This module implements UPSERT operations and state tracking per Invariant 8
4
- (idempotent and convergent state updates).
5
- """
6
-
7
- from __future__ import annotations
8
-
9
- import json
10
- import logging
11
- from dataclasses import dataclass
12
- from datetime import date, datetime, timezone
13
- from typing import TYPE_CHECKING, Any
14
-
15
- if TYPE_CHECKING:
16
- from .database import DatabaseManager
17
-
18
- logger = logging.getLogger(__name__)
19
-
20
-
21
- @dataclass
22
- class ExtractionMetadata:
23
- """Metadata about the last extraction for a project."""
24
-
25
- organization_name: str
26
- project_name: str
27
- last_extraction_date: date
28
- last_extraction_timestamp: datetime
29
-
30
-
31
- class PRRepository:
32
- """Data access layer for Pull Request data.
33
-
34
- Invariant 8: State updates must be idempotent and converge.
35
- Invariant 14: Stable identifiers are required for UPSERT keys.
36
- Invariant 15: All entities must be scoped to organization + project.
37
- """
38
-
39
- def __init__(self, db: DatabaseManager) -> None:
40
- """Initialize the repository.
41
-
42
- Args:
43
- db: Database manager instance.
44
- """
45
- self.db = db
46
-
47
- # --- Extraction Metadata ---
48
-
49
- def get_last_extraction_date(self, organization: str, project: str) -> date | None:
50
- """Get the last successful extraction date for a project.
51
-
52
- Args:
53
- organization: Organization name.
54
- project: Project name.
55
-
56
- Returns:
57
- Last extraction date, or None if never extracted or metadata is corrupt.
58
- """
59
- cursor = self.db.execute(
60
- """
61
- SELECT last_extraction_date FROM extraction_metadata
62
- WHERE organization_name = ? AND project_name = ?
63
- """,
64
- (organization, project),
65
- )
66
- row = cursor.fetchone()
67
- if row:
68
- date_value = row["last_extraction_date"]
69
- # Handle NULL or empty string
70
- if not date_value:
71
- return None
72
- # Handle corrupt date format gracefully (warn + fallback)
73
- try:
74
- return date.fromisoformat(date_value)
75
- except (ValueError, TypeError) as e:
76
- logger.warning(
77
- f"Invalid/corrupt extraction metadata date for "
78
- f"{organization}/{project}: '{date_value}' - {e}"
79
- )
80
- return None
81
- return None
82
-
83
- def update_extraction_metadata(
84
- self, organization: str, project: str, extraction_date: date
85
- ) -> None:
86
- """Record successful extraction for the given date.
87
-
88
- Args:
89
- organization: Organization name.
90
- project: Project name.
91
- extraction_date: Date that was extracted.
92
- """
93
- self.db.execute(
94
- """
95
- INSERT OR REPLACE INTO extraction_metadata
96
- (organization_name, project_name, last_extraction_date, last_extraction_timestamp)
97
- VALUES (?, ?, ?, ?)
98
- """,
99
- (
100
- organization,
101
- project,
102
- extraction_date.isoformat(),
103
- datetime.now(timezone.utc).isoformat(),
104
- ),
105
- )
106
- logger.debug(
107
- f"Updated extraction metadata: {organization}/{project} = {extraction_date}"
108
- )
109
-
110
- # --- Organizations ---
111
-
112
- def upsert_organization(self, organization_name: str) -> None:
113
- """Insert or update an organization.
114
-
115
- Args:
116
- organization_name: Organization name.
117
- """
118
- self.db.execute(
119
- "INSERT OR IGNORE INTO organizations (organization_name) VALUES (?)",
120
- (organization_name,),
121
- )
122
-
123
- # --- Projects ---
124
-
125
- def upsert_project(self, organization_name: str, project_name: str) -> None:
126
- """Insert or update a project.
127
-
128
- Args:
129
- organization_name: Organization name.
130
- project_name: Project name.
131
- """
132
- # Ensure organization exists first
133
- self.upsert_organization(organization_name)
134
-
135
- self.db.execute(
136
- """
137
- INSERT OR IGNORE INTO projects (organization_name, project_name)
138
- VALUES (?, ?)
139
- """,
140
- (organization_name, project_name),
141
- )
142
-
143
- # --- Repositories ---
144
-
145
- def upsert_repository(
146
- self,
147
- repository_id: str,
148
- repository_name: str,
149
- project_name: str,
150
- organization_name: str,
151
- ) -> None:
152
- """Insert or update a repository.
153
-
154
- Invariant 14: repository_id is the stable ADO ID.
155
- Invariant 16: repository_name is a mutable label.
156
-
157
- Args:
158
- repository_id: Stable ADO repository ID.
159
- repository_name: Current repository name.
160
- project_name: Project name.
161
- organization_name: Organization name.
162
- """
163
- # Ensure project exists first
164
- self.upsert_project(organization_name, project_name)
165
-
166
- self.db.execute(
167
- """
168
- INSERT OR REPLACE INTO repositories
169
- (repository_id, repository_name, project_name, organization_name)
170
- VALUES (?, ?, ?, ?)
171
- """,
172
- (repository_id, repository_name, project_name, organization_name),
173
- )
174
-
175
- # --- Users ---
176
-
177
- def upsert_user(
178
- self, user_id: str, display_name: str, email: str | None = None
179
- ) -> None:
180
- """Insert or update a user.
181
-
182
- Invariant 16: user_id is stable, display_name/email are mutable.
183
-
184
- Args:
185
- user_id: Stable ADO user ID.
186
- display_name: Current display name.
187
- email: Current email (optional).
188
- """
189
- self.db.execute(
190
- """
191
- INSERT OR REPLACE INTO users (user_id, display_name, email)
192
- VALUES (?, ?, ?)
193
- """,
194
- (user_id, display_name, email),
195
- )
196
-
197
- # --- Pull Requests ---
198
-
199
- def upsert_pull_request(
200
- self,
201
- pull_request_uid: str,
202
- pull_request_id: int,
203
- organization_name: str,
204
- project_name: str,
205
- repository_id: str,
206
- user_id: str,
207
- title: str,
208
- status: str,
209
- description: str | None,
210
- creation_date: str,
211
- closed_date: str | None,
212
- cycle_time_minutes: float | None,
213
- raw_json: dict[str, Any] | None = None,
214
- ) -> None:
215
- """Insert or update a pull request.
216
-
217
- Invariant 8: UPSERT semantics ensure idempotent updates.
218
- Invariant 14: pull_request_uid = {repository_id}-{pull_request_id}.
219
-
220
- Args:
221
- pull_request_uid: Unique identifier (repo_id-pr_id).
222
- pull_request_id: ADO PR ID.
223
- organization_name: Organization name.
224
- project_name: Project name.
225
- repository_id: Repository ID.
226
- user_id: Author user ID.
227
- title: PR title.
228
- status: PR status.
229
- description: PR description.
230
- creation_date: ISO 8601 creation date.
231
- closed_date: ISO 8601 closed date.
232
- cycle_time_minutes: Calculated cycle time.
233
- raw_json: Original ADO API response for auditing.
234
- """
235
- self.db.execute(
236
- """
237
- INSERT OR REPLACE INTO pull_requests (
238
- pull_request_uid, pull_request_id, organization_name, project_name,
239
- repository_id, user_id, title, status, description,
240
- creation_date, closed_date, cycle_time_minutes, raw_json
241
- ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
242
- """,
243
- (
244
- pull_request_uid,
245
- pull_request_id,
246
- organization_name,
247
- project_name,
248
- repository_id,
249
- user_id,
250
- title,
251
- status,
252
- description,
253
- creation_date,
254
- closed_date,
255
- cycle_time_minutes,
256
- json.dumps(raw_json) if raw_json else None,
257
- ),
258
- )
259
-
260
- # --- Reviewers ---
261
-
262
- def upsert_reviewer(
263
- self,
264
- pull_request_uid: str,
265
- user_id: str,
266
- vote: int,
267
- repository_id: str,
268
- ) -> None:
269
- """Insert or update a reviewer.
270
-
271
- Args:
272
- pull_request_uid: PR unique identifier.
273
- user_id: Reviewer user ID.
274
- vote: Vote value.
275
- repository_id: Repository ID.
276
- """
277
- self.db.execute(
278
- """
279
- INSERT OR REPLACE INTO reviewers
280
- (pull_request_uid, user_id, vote, repository_id)
281
- VALUES (?, ?, ?, ?)
282
- """,
283
- (pull_request_uid, user_id, vote, repository_id),
284
- )
285
-
286
- # --- Bulk Operations ---
287
-
288
- def upsert_pr_with_related(
289
- self,
290
- pr_data: dict[str, Any],
291
- organization_name: str,
292
- project_name: str,
293
- ) -> None:
294
- """Insert or update a PR and all related entities.
295
-
296
- This is the main entry point for processing a PR from the ADO API.
297
- Handles repository, user, reviewers, and the PR itself.
298
-
299
- Args:
300
- pr_data: Raw PR data from ADO API.
301
- organization_name: Organization name.
302
- project_name: Project name.
303
- """
304
- from ..utils.datetime_utils import calculate_cycle_time_minutes
305
-
306
- # Extract repository
307
- repo = pr_data.get("repository", {})
308
- repository_id = repo.get("id", "")
309
- repository_name = repo.get("name", "")
310
-
311
- self.upsert_repository(
312
- repository_id=repository_id,
313
- repository_name=repository_name,
314
- project_name=project_name,
315
- organization_name=organization_name,
316
- )
317
-
318
- # Extract author
319
- created_by = pr_data.get("createdBy", {})
320
- user_id = created_by.get("id", "")
321
- display_name = created_by.get("displayName", "")
322
- email = created_by.get("uniqueName")
323
-
324
- self.upsert_user(
325
- user_id=user_id,
326
- display_name=display_name,
327
- email=email,
328
- )
329
-
330
- # Build PR UID (Invariant 14)
331
- pr_id = pr_data.get("pullRequestId", 0)
332
- pull_request_uid = f"{repository_id}-{pr_id}"
333
-
334
- # Calculate cycle time
335
- creation_date = pr_data.get("creationDate", "")
336
- closed_date = pr_data.get("closedDate")
337
- cycle_time = calculate_cycle_time_minutes(creation_date, closed_date)
338
-
339
- # Upsert PR
340
- self.upsert_pull_request(
341
- pull_request_uid=pull_request_uid,
342
- pull_request_id=pr_id,
343
- organization_name=organization_name,
344
- project_name=project_name,
345
- repository_id=repository_id,
346
- user_id=user_id,
347
- title=pr_data.get("title", ""),
348
- status=pr_data.get("status", ""),
349
- description=pr_data.get("description"),
350
- creation_date=creation_date,
351
- closed_date=closed_date,
352
- cycle_time_minutes=cycle_time,
353
- raw_json=pr_data,
354
- )
355
-
356
- # Upsert reviewers
357
- for reviewer in pr_data.get("reviewers", []):
358
- reviewer_id = reviewer.get("id", "")
359
- reviewer_name = reviewer.get("displayName", "")
360
- reviewer_email = reviewer.get("uniqueName")
361
- vote = reviewer.get("vote", 0)
362
-
363
- self.upsert_user(
364
- user_id=reviewer_id,
365
- display_name=reviewer_name,
366
- email=reviewer_email,
367
- )
368
-
369
- self.upsert_reviewer(
370
- pull_request_uid=pull_request_uid,
371
- user_id=reviewer_id,
372
- vote=vote,
373
- repository_id=repository_id,
374
- )
375
-
376
- logger.debug(f"Upserted PR: {pull_request_uid}")
1
+ """Data access layer for ado-git-repo-insights.
2
+
3
+ This module implements UPSERT operations and state tracking per Invariant 8
4
+ (idempotent and convergent state updates).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import logging
11
+ from dataclasses import dataclass
12
+ from datetime import date, datetime, timezone
13
+ from typing import TYPE_CHECKING, Any
14
+
15
+ if TYPE_CHECKING:
16
+ from .database import DatabaseManager
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ @dataclass
22
+ class ExtractionMetadata:
23
+ """Metadata about the last extraction for a project."""
24
+
25
+ organization_name: str
26
+ project_name: str
27
+ last_extraction_date: date
28
+ last_extraction_timestamp: datetime
29
+
30
+
31
+ class PRRepository:
32
+ """Data access layer for Pull Request data.
33
+
34
+ Invariant 8: State updates must be idempotent and converge.
35
+ Invariant 14: Stable identifiers are required for UPSERT keys.
36
+ Invariant 15: All entities must be scoped to organization + project.
37
+ """
38
+
39
+ def __init__(self, db: DatabaseManager) -> None:
40
+ """Initialize the repository.
41
+
42
+ Args:
43
+ db: Database manager instance.
44
+ """
45
+ self.db = db
46
+
47
+ # --- Extraction Metadata ---
48
+
49
+ def get_last_extraction_date(self, organization: str, project: str) -> date | None:
50
+ """Get the last successful extraction date for a project.
51
+
52
+ Args:
53
+ organization: Organization name.
54
+ project: Project name.
55
+
56
+ Returns:
57
+ Last extraction date, or None if never extracted or metadata is corrupt.
58
+ """
59
+ cursor = self.db.execute(
60
+ """
61
+ SELECT last_extraction_date FROM extraction_metadata
62
+ WHERE organization_name = ? AND project_name = ?
63
+ """,
64
+ (organization, project),
65
+ )
66
+ row = cursor.fetchone()
67
+ if row:
68
+ date_value = row["last_extraction_date"]
69
+ # Handle NULL or empty string
70
+ if not date_value:
71
+ return None
72
+ # Handle corrupt date format gracefully (warn + fallback)
73
+ try:
74
+ return date.fromisoformat(date_value)
75
+ except (ValueError, TypeError) as e:
76
+ logger.warning(
77
+ f"Invalid/corrupt extraction metadata date for "
78
+ f"{organization}/{project}: '{date_value}' - {e}"
79
+ )
80
+ return None
81
+ return None
82
+
83
+ def update_extraction_metadata(
84
+ self, organization: str, project: str, extraction_date: date
85
+ ) -> None:
86
+ """Record successful extraction for the given date.
87
+
88
+ Args:
89
+ organization: Organization name.
90
+ project: Project name.
91
+ extraction_date: Date that was extracted.
92
+ """
93
+ self.db.execute(
94
+ """
95
+ INSERT OR REPLACE INTO extraction_metadata
96
+ (organization_name, project_name, last_extraction_date, last_extraction_timestamp)
97
+ VALUES (?, ?, ?, ?)
98
+ """,
99
+ (
100
+ organization,
101
+ project,
102
+ extraction_date.isoformat(),
103
+ datetime.now(timezone.utc).isoformat(),
104
+ ),
105
+ )
106
+ logger.debug(
107
+ f"Updated extraction metadata: {organization}/{project} = {extraction_date}"
108
+ )
109
+
110
+ # --- Organizations ---
111
+
112
+ def upsert_organization(self, organization_name: str) -> None:
113
+ """Insert or update an organization.
114
+
115
+ Args:
116
+ organization_name: Organization name.
117
+ """
118
+ self.db.execute(
119
+ "INSERT OR IGNORE INTO organizations (organization_name) VALUES (?)",
120
+ (organization_name,),
121
+ )
122
+
123
+ # --- Projects ---
124
+
125
+ def upsert_project(self, organization_name: str, project_name: str) -> None:
126
+ """Insert or update a project.
127
+
128
+ Args:
129
+ organization_name: Organization name.
130
+ project_name: Project name.
131
+ """
132
+ # Ensure organization exists first
133
+ self.upsert_organization(organization_name)
134
+
135
+ self.db.execute(
136
+ """
137
+ INSERT OR IGNORE INTO projects (organization_name, project_name)
138
+ VALUES (?, ?)
139
+ """,
140
+ (organization_name, project_name),
141
+ )
142
+
143
+ # --- Repositories ---
144
+
145
+ def upsert_repository(
146
+ self,
147
+ repository_id: str,
148
+ repository_name: str,
149
+ project_name: str,
150
+ organization_name: str,
151
+ ) -> None:
152
+ """Insert or update a repository.
153
+
154
+ Invariant 14: repository_id is the stable ADO ID.
155
+ Invariant 16: repository_name is a mutable label.
156
+
157
+ Args:
158
+ repository_id: Stable ADO repository ID.
159
+ repository_name: Current repository name.
160
+ project_name: Project name.
161
+ organization_name: Organization name.
162
+ """
163
+ # Ensure project exists first
164
+ self.upsert_project(organization_name, project_name)
165
+
166
+ self.db.execute(
167
+ """
168
+ INSERT OR REPLACE INTO repositories
169
+ (repository_id, repository_name, project_name, organization_name)
170
+ VALUES (?, ?, ?, ?)
171
+ """,
172
+ (repository_id, repository_name, project_name, organization_name),
173
+ )
174
+
175
+ # --- Users ---
176
+
177
+ def upsert_user(
178
+ self, user_id: str, display_name: str, email: str | None = None
179
+ ) -> None:
180
+ """Insert or update a user.
181
+
182
+ Invariant 16: user_id is stable, display_name/email are mutable.
183
+
184
+ Args:
185
+ user_id: Stable ADO user ID.
186
+ display_name: Current display name.
187
+ email: Current email (optional).
188
+ """
189
+ self.db.execute(
190
+ """
191
+ INSERT OR REPLACE INTO users (user_id, display_name, email)
192
+ VALUES (?, ?, ?)
193
+ """,
194
+ (user_id, display_name, email),
195
+ )
196
+
197
+ # --- Pull Requests ---
198
+
199
+ def upsert_pull_request(
200
+ self,
201
+ pull_request_uid: str,
202
+ pull_request_id: int,
203
+ organization_name: str,
204
+ project_name: str,
205
+ repository_id: str,
206
+ user_id: str,
207
+ title: str,
208
+ status: str,
209
+ description: str | None,
210
+ creation_date: str,
211
+ closed_date: str | None,
212
+ cycle_time_minutes: float | None,
213
+ raw_json: dict[str, Any] | None = None,
214
+ ) -> None:
215
+ """Insert or update a pull request.
216
+
217
+ Invariant 8: UPSERT semantics ensure idempotent updates.
218
+ Invariant 14: pull_request_uid = {repository_id}-{pull_request_id}.
219
+
220
+ Args:
221
+ pull_request_uid: Unique identifier (repo_id-pr_id).
222
+ pull_request_id: ADO PR ID.
223
+ organization_name: Organization name.
224
+ project_name: Project name.
225
+ repository_id: Repository ID.
226
+ user_id: Author user ID.
227
+ title: PR title.
228
+ status: PR status.
229
+ description: PR description.
230
+ creation_date: ISO 8601 creation date.
231
+ closed_date: ISO 8601 closed date.
232
+ cycle_time_minutes: Calculated cycle time.
233
+ raw_json: Original ADO API response for auditing.
234
+ """
235
+ self.db.execute(
236
+ """
237
+ INSERT OR REPLACE INTO pull_requests (
238
+ pull_request_uid, pull_request_id, organization_name, project_name,
239
+ repository_id, user_id, title, status, description,
240
+ creation_date, closed_date, cycle_time_minutes, raw_json
241
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
242
+ """,
243
+ (
244
+ pull_request_uid,
245
+ pull_request_id,
246
+ organization_name,
247
+ project_name,
248
+ repository_id,
249
+ user_id,
250
+ title,
251
+ status,
252
+ description,
253
+ creation_date,
254
+ closed_date,
255
+ cycle_time_minutes,
256
+ json.dumps(raw_json) if raw_json else None,
257
+ ),
258
+ )
259
+
260
+ # --- Reviewers ---
261
+
262
+ def upsert_reviewer(
263
+ self,
264
+ pull_request_uid: str,
265
+ user_id: str,
266
+ vote: int,
267
+ repository_id: str,
268
+ ) -> None:
269
+ """Insert or update a reviewer.
270
+
271
+ Args:
272
+ pull_request_uid: PR unique identifier.
273
+ user_id: Reviewer user ID.
274
+ vote: Vote value.
275
+ repository_id: Repository ID.
276
+ """
277
+ self.db.execute(
278
+ """
279
+ INSERT OR REPLACE INTO reviewers
280
+ (pull_request_uid, user_id, vote, repository_id)
281
+ VALUES (?, ?, ?, ?)
282
+ """,
283
+ (pull_request_uid, user_id, vote, repository_id),
284
+ )
285
+
286
+ # --- Bulk Operations ---
287
+
288
+ def upsert_pr_with_related(
289
+ self,
290
+ pr_data: dict[str, Any],
291
+ organization_name: str,
292
+ project_name: str,
293
+ ) -> None:
294
+ """Insert or update a PR and all related entities.
295
+
296
+ This is the main entry point for processing a PR from the ADO API.
297
+ Handles repository, user, reviewers, and the PR itself.
298
+
299
+ Args:
300
+ pr_data: Raw PR data from ADO API.
301
+ organization_name: Organization name.
302
+ project_name: Project name.
303
+ """
304
+ from ..utils.datetime_utils import calculate_cycle_time_minutes
305
+
306
+ # Extract repository
307
+ repo = pr_data.get("repository", {})
308
+ repository_id = repo.get("id", "")
309
+ repository_name = repo.get("name", "")
310
+
311
+ self.upsert_repository(
312
+ repository_id=repository_id,
313
+ repository_name=repository_name,
314
+ project_name=project_name,
315
+ organization_name=organization_name,
316
+ )
317
+
318
+ # Extract author
319
+ created_by = pr_data.get("createdBy", {})
320
+ user_id = created_by.get("id", "")
321
+ display_name = created_by.get("displayName", "")
322
+ email = created_by.get("uniqueName")
323
+
324
+ self.upsert_user(
325
+ user_id=user_id,
326
+ display_name=display_name,
327
+ email=email,
328
+ )
329
+
330
+ # Build PR UID (Invariant 14)
331
+ pr_id = pr_data.get("pullRequestId", 0)
332
+ pull_request_uid = f"{repository_id}-{pr_id}"
333
+
334
+ # Calculate cycle time
335
+ creation_date = pr_data.get("creationDate", "")
336
+ closed_date = pr_data.get("closedDate")
337
+ cycle_time = calculate_cycle_time_minutes(creation_date, closed_date)
338
+
339
+ # Upsert PR
340
+ self.upsert_pull_request(
341
+ pull_request_uid=pull_request_uid,
342
+ pull_request_id=pr_id,
343
+ organization_name=organization_name,
344
+ project_name=project_name,
345
+ repository_id=repository_id,
346
+ user_id=user_id,
347
+ title=pr_data.get("title", ""),
348
+ status=pr_data.get("status", ""),
349
+ description=pr_data.get("description"),
350
+ creation_date=creation_date,
351
+ closed_date=closed_date,
352
+ cycle_time_minutes=cycle_time,
353
+ raw_json=pr_data,
354
+ )
355
+
356
+ # Upsert reviewers
357
+ for reviewer in pr_data.get("reviewers", []):
358
+ reviewer_id = reviewer.get("id", "")
359
+ reviewer_name = reviewer.get("displayName", "")
360
+ reviewer_email = reviewer.get("uniqueName")
361
+ vote = reviewer.get("vote", 0)
362
+
363
+ self.upsert_user(
364
+ user_id=reviewer_id,
365
+ display_name=reviewer_name,
366
+ email=reviewer_email,
367
+ )
368
+
369
+ self.upsert_reviewer(
370
+ pull_request_uid=pull_request_uid,
371
+ user_id=reviewer_id,
372
+ vote=vote,
373
+ repository_id=repository_id,
374
+ )
375
+
376
+ logger.debug(f"Upserted PR: {pull_request_uid}")
377
+
378
+ # --- Phase 3.3: Team Operations ---
379
+
380
+ def upsert_team(
381
+ self,
382
+ team_id: str,
383
+ team_name: str,
384
+ project_name: str,
385
+ organization_name: str,
386
+ description: str | None = None,
387
+ ) -> None:
388
+ """Insert or update a team.
389
+
390
+ §5: Teams are project-scoped, represent current state.
391
+
392
+ Args:
393
+ team_id: Stable team identifier.
394
+ team_name: Team name.
395
+ project_name: Project name.
396
+ organization_name: Organization name.
397
+ description: Optional team description.
398
+ """
399
+ from datetime import datetime, timezone
400
+
401
+ now = datetime.now(timezone.utc).isoformat()
402
+
403
+ self.db.execute(
404
+ """
405
+ INSERT INTO teams (team_id, team_name, project_name, organization_name, description, last_updated)
406
+ VALUES (?, ?, ?, ?, ?, ?)
407
+ ON CONFLICT(team_id) DO UPDATE SET
408
+ team_name = excluded.team_name,
409
+ description = excluded.description,
410
+ last_updated = excluded.last_updated
411
+ """,
412
+ (team_id, team_name, project_name, organization_name, description, now),
413
+ )
414
+
415
+ def upsert_team_member(
416
+ self,
417
+ team_id: str,
418
+ user_id: str,
419
+ display_name: str,
420
+ email: str | None = None,
421
+ is_team_admin: bool = False,
422
+ ) -> None:
423
+ """Insert or update a team membership.
424
+
425
+ §5: Represents current membership, not historical.
426
+
427
+ Note: Team members may not exist in the users table (they may never
428
+ have authored a PR), so we upsert the user from team API data first.
429
+
430
+ Args:
431
+ team_id: Team identifier.
432
+ user_id: User identifier.
433
+ display_name: User display name from team API.
434
+ email: User email from team API.
435
+ is_team_admin: Whether user is a team admin.
436
+ """
437
+ # First ensure user exists (P2 fix: avoid FK violation)
438
+ self.upsert_user(user_id=user_id, display_name=display_name, email=email)
439
+
440
+ self.db.execute(
441
+ """
442
+ INSERT INTO team_members (team_id, user_id, is_team_admin)
443
+ VALUES (?, ?, ?)
444
+ ON CONFLICT(team_id, user_id) DO UPDATE SET
445
+ is_team_admin = excluded.is_team_admin
446
+ """,
447
+ (team_id, user_id, 1 if is_team_admin else 0),
448
+ )
449
+
450
+ def clear_team_members(self, team_id: str) -> None:
451
+ """Clear all members for a team before refresh.
452
+
453
+ Used to ensure current-state membership on each run.
454
+
455
+ Args:
456
+ team_id: Team identifier.
457
+ """
458
+ self.db.execute(
459
+ "DELETE FROM team_members WHERE team_id = ?",
460
+ (team_id,),
461
+ )
462
+
463
+ def get_teams_for_project(
464
+ self, organization_name: str, project_name: str
465
+ ) -> list[dict[str, Any]]:
466
+ """Get all teams for a project.
467
+
468
+ Args:
469
+ organization_name: Organization name.
470
+ project_name: Project name.
471
+
472
+ Returns:
473
+ List of team dictionaries.
474
+ """
475
+ cursor = self.db.execute(
476
+ """
477
+ SELECT team_id, team_name, description, last_updated
478
+ FROM teams
479
+ WHERE organization_name = ? AND project_name = ?
480
+ ORDER BY team_name
481
+ """,
482
+ (organization_name, project_name),
483
+ )
484
+ return [dict(row) for row in cursor.fetchall()]
485
+
486
+ def get_team_members(self, team_id: str) -> list[dict[str, Any]]:
487
+ """Get all members for a team.
488
+
489
+ Args:
490
+ team_id: Team identifier.
491
+
492
+ Returns:
493
+ List of member dictionaries with user info.
494
+ """
495
+ cursor = self.db.execute(
496
+ """
497
+ SELECT tm.user_id, u.display_name, u.email, tm.is_team_admin
498
+ FROM team_members tm
499
+ LEFT JOIN users u ON tm.user_id = u.user_id
500
+ WHERE tm.team_id = ?
501
+ ORDER BY u.display_name
502
+ """,
503
+ (team_id,),
504
+ )
505
+ return [dict(row) for row in cursor.fetchall()]
506
+
507
+ # --- Phase 3.4: Thread/Comment Operations ---
508
+
509
+ def upsert_thread(
510
+ self,
511
+ thread_id: str,
512
+ pull_request_uid: str,
513
+ status: str | None,
514
+ thread_context: str | None,
515
+ last_updated: str,
516
+ created_at: str,
517
+ is_deleted: bool = False,
518
+ ) -> None:
519
+ """Insert or update a PR thread.
520
+
521
+ §6: Indexed by last_updated for incremental sync.
522
+
523
+ Args:
524
+ thread_id: Thread identifier.
525
+ pull_request_uid: PR unique identifier.
526
+ status: Thread status (active, fixed, closed).
527
+ thread_context: JSON context (file, line range).
528
+ last_updated: ISO 8601 timestamp.
529
+ created_at: ISO 8601 timestamp.
530
+ is_deleted: Whether thread is deleted.
531
+ """
532
+ self.db.execute(
533
+ """
534
+ INSERT INTO pr_threads (
535
+ thread_id, pull_request_uid, status, thread_context,
536
+ last_updated, created_at, is_deleted
537
+ ) VALUES (?, ?, ?, ?, ?, ?, ?)
538
+ ON CONFLICT(thread_id) DO UPDATE SET
539
+ status = excluded.status,
540
+ thread_context = excluded.thread_context,
541
+ last_updated = excluded.last_updated,
542
+ is_deleted = excluded.is_deleted
543
+ """,
544
+ (
545
+ thread_id,
546
+ pull_request_uid,
547
+ status,
548
+ thread_context,
549
+ last_updated,
550
+ created_at,
551
+ 1 if is_deleted else 0,
552
+ ),
553
+ )
554
+
555
+ def upsert_comment(
556
+ self,
557
+ comment_id: str,
558
+ thread_id: str,
559
+ pull_request_uid: str,
560
+ author_id: str,
561
+ content: str | None,
562
+ comment_type: str | None,
563
+ created_at: str,
564
+ last_updated: str | None = None,
565
+ is_deleted: bool = False,
566
+ ) -> None:
567
+ """Insert or update a PR comment.
568
+
569
+ Args:
570
+ comment_id: Comment identifier.
571
+ thread_id: Parent thread identifier.
572
+ pull_request_uid: PR unique identifier.
573
+ author_id: Author user ID.
574
+ content: Comment text content.
575
+ comment_type: Type (text, codeChange, system).
576
+ created_at: ISO 8601 timestamp.
577
+ last_updated: ISO 8601 timestamp.
578
+ is_deleted: Whether comment is deleted.
579
+ """
580
+ self.db.execute(
581
+ """
582
+ INSERT INTO pr_comments (
583
+ comment_id, thread_id, pull_request_uid, author_id,
584
+ content, comment_type, created_at, last_updated, is_deleted
585
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
586
+ ON CONFLICT(comment_id) DO UPDATE SET
587
+ content = excluded.content,
588
+ last_updated = excluded.last_updated,
589
+ is_deleted = excluded.is_deleted
590
+ """,
591
+ (
592
+ comment_id,
593
+ thread_id,
594
+ pull_request_uid,
595
+ author_id,
596
+ content,
597
+ comment_type,
598
+ created_at,
599
+ last_updated,
600
+ 1 if is_deleted else 0,
601
+ ),
602
+ )
603
+
604
+ def get_thread_last_updated(self, pull_request_uid: str) -> str | None:
605
+ """Get the most recent thread update time for a PR.
606
+
607
+ §6: Used for incremental sync to avoid refetching unchanged threads.
608
+
609
+ Args:
610
+ pull_request_uid: PR unique identifier.
611
+
612
+ Returns:
613
+ ISO 8601 timestamp of most recent update, or None.
614
+ """
615
+ cursor = self.db.execute(
616
+ """
617
+ SELECT MAX(last_updated) as max_updated
618
+ FROM pr_threads
619
+ WHERE pull_request_uid = ?
620
+ """,
621
+ (pull_request_uid,),
622
+ )
623
+ row = cursor.fetchone()
624
+ return row["max_updated"] if row and row["max_updated"] else None
625
+
626
+ def get_thread_count(self, pull_request_uid: str | None = None) -> int:
627
+ """Get thread count, optionally filtered by PR.
628
+
629
+ Args:
630
+ pull_request_uid: Optional PR filter.
631
+
632
+ Returns:
633
+ Thread count.
634
+ """
635
+ if pull_request_uid:
636
+ cursor = self.db.execute(
637
+ "SELECT COUNT(*) FROM pr_threads WHERE pull_request_uid = ?",
638
+ (pull_request_uid,),
639
+ )
640
+ else:
641
+ cursor = self.db.execute("SELECT COUNT(*) FROM pr_threads")
642
+ row = cursor.fetchone()
643
+ return int(row[0]) if row else 0
644
+
645
+ def get_comment_count(self, pull_request_uid: str | None = None) -> int:
646
+ """Get comment count, optionally filtered by PR.
647
+
648
+ Args:
649
+ pull_request_uid: Optional PR filter.
650
+
651
+ Returns:
652
+ Comment count.
653
+ """
654
+ if pull_request_uid:
655
+ cursor = self.db.execute(
656
+ "SELECT COUNT(*) FROM pr_comments WHERE pull_request_uid = ?",
657
+ (pull_request_uid,),
658
+ )
659
+ else:
660
+ cursor = self.db.execute("SELECT COUNT(*) FROM pr_comments")
661
+ row = cursor.fetchone()
662
+ return int(row[0]) if row else 0