ado-git-repo-insights 1.2.1__py3-none-any.whl → 2.7.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. ado_git_repo_insights/__init__.py +3 -3
  2. ado_git_repo_insights/cli.py +703 -354
  3. ado_git_repo_insights/config.py +186 -186
  4. ado_git_repo_insights/extractor/__init__.py +1 -1
  5. ado_git_repo_insights/extractor/ado_client.py +452 -246
  6. ado_git_repo_insights/extractor/pr_extractor.py +239 -239
  7. ado_git_repo_insights/ml/__init__.py +13 -0
  8. ado_git_repo_insights/ml/date_utils.py +70 -0
  9. ado_git_repo_insights/ml/forecaster.py +288 -0
  10. ado_git_repo_insights/ml/insights.py +497 -0
  11. ado_git_repo_insights/persistence/__init__.py +1 -1
  12. ado_git_repo_insights/persistence/database.py +193 -193
  13. ado_git_repo_insights/persistence/models.py +207 -145
  14. ado_git_repo_insights/persistence/repository.py +662 -376
  15. ado_git_repo_insights/transform/__init__.py +1 -1
  16. ado_git_repo_insights/transform/aggregators.py +950 -0
  17. ado_git_repo_insights/transform/csv_generator.py +132 -132
  18. ado_git_repo_insights/utils/__init__.py +1 -1
  19. ado_git_repo_insights/utils/datetime_utils.py +101 -101
  20. ado_git_repo_insights/utils/logging_config.py +172 -172
  21. ado_git_repo_insights/utils/run_summary.py +207 -206
  22. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/METADATA +56 -15
  23. ado_git_repo_insights-2.7.4.dist-info/RECORD +27 -0
  24. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/licenses/LICENSE +21 -21
  25. ado_git_repo_insights-1.2.1.dist-info/RECORD +0 -22
  26. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/WHEEL +0 -0
  27. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/entry_points.txt +0 -0
  28. {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/top_level.txt +0 -0
@@ -1,145 +1,207 @@
1
- """SQLite database schema and models for ado-git-repo-insights.
2
-
3
- This module defines the SQLite schema that maps directly to the CSV output contract.
4
- Schema changes must preserve invariants 1-4, 14-16 from INVARIANTS.md.
5
- """
6
-
7
- from __future__ import annotations
8
-
9
- # SQL schema that will be executed to create tables
10
- # Mirrors the CSV output contract exactly
11
-
12
- SCHEMA_SQL = """
13
- -- Metadata table for incremental extraction state (Invariant 6)
14
- CREATE TABLE IF NOT EXISTS extraction_metadata (
15
- id INTEGER PRIMARY KEY,
16
- organization_name TEXT NOT NULL,
17
- project_name TEXT NOT NULL,
18
- last_extraction_date TEXT NOT NULL, -- ISO 8601 (YYYY-MM-DD)
19
- last_extraction_timestamp TEXT NOT NULL, -- ISO 8601 with time
20
- UNIQUE(organization_name, project_name)
21
- );
22
-
23
- -- Core entity tables (matching CSV output contract - Invariants 1-4)
24
-
25
- -- organizations.csv: organization_name
26
- CREATE TABLE IF NOT EXISTS organizations (
27
- organization_name TEXT PRIMARY KEY
28
- );
29
-
30
- -- projects.csv: organization_name, project_name
31
- CREATE TABLE IF NOT EXISTS projects (
32
- organization_name TEXT NOT NULL,
33
- project_name TEXT NOT NULL,
34
- PRIMARY KEY (organization_name, project_name),
35
- FOREIGN KEY (organization_name) REFERENCES organizations(organization_name)
36
- );
37
-
38
- -- repositories.csv: repository_id, repository_name, project_name, organization_name
39
- -- Invariant 14: repository_id is the stable ADO ID
40
- CREATE TABLE IF NOT EXISTS repositories (
41
- repository_id TEXT PRIMARY KEY,
42
- repository_name TEXT NOT NULL,
43
- project_name TEXT NOT NULL,
44
- organization_name TEXT NOT NULL,
45
- FOREIGN KEY (organization_name, project_name)
46
- REFERENCES projects(organization_name, project_name)
47
- );
48
- CREATE INDEX IF NOT EXISTS idx_repositories_project
49
- ON repositories(organization_name, project_name);
50
-
51
- -- users.csv: user_id, display_name, email
52
- -- Invariant 16: user_id is stable ADO ID, display_name/email are mutable labels
53
- CREATE TABLE IF NOT EXISTS users (
54
- user_id TEXT PRIMARY KEY,
55
- display_name TEXT NOT NULL,
56
- email TEXT
57
- );
58
-
59
- -- pull_requests.csv: pull_request_uid, pull_request_id, organization_name, project_name,
60
- -- repository_id, user_id, title, status, description,
61
- -- creation_date, closed_date, cycle_time_minutes
62
- -- Invariant 14: pull_request_uid = {repository_id}-{pull_request_id}
63
- CREATE TABLE IF NOT EXISTS pull_requests (
64
- pull_request_uid TEXT PRIMARY KEY,
65
- pull_request_id INTEGER NOT NULL,
66
- organization_name TEXT NOT NULL,
67
- project_name TEXT NOT NULL,
68
- repository_id TEXT NOT NULL,
69
- user_id TEXT NOT NULL,
70
- title TEXT NOT NULL,
71
- status TEXT NOT NULL,
72
- description TEXT,
73
- creation_date TEXT NOT NULL, -- ISO 8601
74
- closed_date TEXT, -- ISO 8601
75
- cycle_time_minutes REAL,
76
- raw_json TEXT, -- Original ADO response for auditing
77
- FOREIGN KEY (repository_id) REFERENCES repositories(repository_id),
78
- FOREIGN KEY (user_id) REFERENCES users(user_id)
79
- );
80
- CREATE INDEX IF NOT EXISTS idx_pull_requests_closed_date
81
- ON pull_requests(closed_date);
82
- CREATE INDEX IF NOT EXISTS idx_pull_requests_org_project
83
- ON pull_requests(organization_name, project_name);
84
-
85
- -- reviewers.csv: pull_request_uid, user_id, vote, repository_id
86
- CREATE TABLE IF NOT EXISTS reviewers (
87
- id INTEGER PRIMARY KEY AUTOINCREMENT,
88
- pull_request_uid TEXT NOT NULL,
89
- user_id TEXT NOT NULL,
90
- vote INTEGER NOT NULL,
91
- repository_id TEXT NOT NULL,
92
- FOREIGN KEY (pull_request_uid) REFERENCES pull_requests(pull_request_uid),
93
- FOREIGN KEY (user_id) REFERENCES users(user_id),
94
- UNIQUE(pull_request_uid, user_id) -- One vote per reviewer per PR
95
- );
96
- CREATE INDEX IF NOT EXISTS idx_reviewers_pr ON reviewers(pull_request_uid);
97
-
98
- -- Schema version for future migrations
99
- CREATE TABLE IF NOT EXISTS schema_version (
100
- version INTEGER PRIMARY KEY,
101
- applied_at TEXT NOT NULL
102
- );
103
-
104
- -- Insert initial schema version
105
- INSERT OR IGNORE INTO schema_version (version, applied_at)
106
- VALUES (1, datetime('now'));
107
- """
108
-
109
- # CSV column order contract (NON-NEGOTIABLE per Invariants 1-4)
110
- CSV_SCHEMAS: dict[str, list[str]] = {
111
- "organizations": ["organization_name"],
112
- "projects": ["organization_name", "project_name"],
113
- "repositories": [
114
- "repository_id",
115
- "repository_name",
116
- "project_name",
117
- "organization_name",
118
- ],
119
- "pull_requests": [
120
- "pull_request_uid",
121
- "pull_request_id",
122
- "organization_name",
123
- "project_name",
124
- "repository_id",
125
- "user_id",
126
- "title",
127
- "status",
128
- "description",
129
- "creation_date",
130
- "closed_date",
131
- "cycle_time_minutes",
132
- ],
133
- "users": ["user_id", "display_name", "email"],
134
- "reviewers": ["pull_request_uid", "user_id", "vote", "repository_id"],
135
- }
136
-
137
- # Deterministic row ordering: primary key + tie-breaker (Adjustment 3)
138
- SORT_KEYS: dict[str, list[str]] = {
139
- "organizations": ["organization_name"],
140
- "projects": ["organization_name", "project_name"],
141
- "repositories": ["repository_id"],
142
- "pull_requests": ["pull_request_uid", "creation_date"],
143
- "users": ["user_id"],
144
- "reviewers": ["pull_request_uid", "user_id"],
145
- }
1
+ """SQLite database schema and models for ado-git-repo-insights.
2
+
3
+ This module defines the SQLite schema that maps directly to the CSV output contract.
4
+ Schema changes must preserve invariants 1-4, 14-16 from INVARIANTS.md.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ # SQL schema that will be executed to create tables
10
+ # Mirrors the CSV output contract exactly
11
+
12
+ SCHEMA_SQL = """
13
+ -- Metadata table for incremental extraction state (Invariant 6)
14
+ CREATE TABLE IF NOT EXISTS extraction_metadata (
15
+ id INTEGER PRIMARY KEY,
16
+ organization_name TEXT NOT NULL,
17
+ project_name TEXT NOT NULL,
18
+ last_extraction_date TEXT NOT NULL, -- ISO 8601 (YYYY-MM-DD)
19
+ last_extraction_timestamp TEXT NOT NULL, -- ISO 8601 with time
20
+ UNIQUE(organization_name, project_name)
21
+ );
22
+
23
+ -- Core entity tables (matching CSV output contract - Invariants 1-4)
24
+
25
+ -- organizations.csv: organization_name
26
+ CREATE TABLE IF NOT EXISTS organizations (
27
+ organization_name TEXT PRIMARY KEY
28
+ );
29
+
30
+ -- projects.csv: organization_name, project_name
31
+ CREATE TABLE IF NOT EXISTS projects (
32
+ organization_name TEXT NOT NULL,
33
+ project_name TEXT NOT NULL,
34
+ PRIMARY KEY (organization_name, project_name),
35
+ FOREIGN KEY (organization_name) REFERENCES organizations(organization_name)
36
+ );
37
+
38
+ -- repositories.csv: repository_id, repository_name, project_name, organization_name
39
+ -- Invariant 14: repository_id is the stable ADO ID
40
+ CREATE TABLE IF NOT EXISTS repositories (
41
+ repository_id TEXT PRIMARY KEY,
42
+ repository_name TEXT NOT NULL,
43
+ project_name TEXT NOT NULL,
44
+ organization_name TEXT NOT NULL,
45
+ FOREIGN KEY (organization_name, project_name)
46
+ REFERENCES projects(organization_name, project_name)
47
+ );
48
+ CREATE INDEX IF NOT EXISTS idx_repositories_project
49
+ ON repositories(organization_name, project_name);
50
+
51
+ -- users.csv: user_id, display_name, email
52
+ -- Invariant 16: user_id is stable ADO ID, display_name/email are mutable labels
53
+ CREATE TABLE IF NOT EXISTS users (
54
+ user_id TEXT PRIMARY KEY,
55
+ display_name TEXT NOT NULL,
56
+ email TEXT
57
+ );
58
+
59
+ -- pull_requests.csv: pull_request_uid, pull_request_id, organization_name, project_name,
60
+ -- repository_id, user_id, title, status, description,
61
+ -- creation_date, closed_date, cycle_time_minutes
62
+ -- Invariant 14: pull_request_uid = {repository_id}-{pull_request_id}
63
+ CREATE TABLE IF NOT EXISTS pull_requests (
64
+ pull_request_uid TEXT PRIMARY KEY,
65
+ pull_request_id INTEGER NOT NULL,
66
+ organization_name TEXT NOT NULL,
67
+ project_name TEXT NOT NULL,
68
+ repository_id TEXT NOT NULL,
69
+ user_id TEXT NOT NULL,
70
+ title TEXT NOT NULL,
71
+ status TEXT NOT NULL,
72
+ description TEXT,
73
+ creation_date TEXT NOT NULL, -- ISO 8601
74
+ closed_date TEXT, -- ISO 8601
75
+ cycle_time_minutes REAL,
76
+ raw_json TEXT, -- Original ADO response for auditing
77
+ FOREIGN KEY (repository_id) REFERENCES repositories(repository_id),
78
+ FOREIGN KEY (user_id) REFERENCES users(user_id)
79
+ );
80
+ CREATE INDEX IF NOT EXISTS idx_pull_requests_closed_date
81
+ ON pull_requests(closed_date);
82
+ CREATE INDEX IF NOT EXISTS idx_pull_requests_org_project
83
+ ON pull_requests(organization_name, project_name);
84
+
85
+ -- reviewers.csv: pull_request_uid, user_id, vote, repository_id
86
+ CREATE TABLE IF NOT EXISTS reviewers (
87
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
88
+ pull_request_uid TEXT NOT NULL,
89
+ user_id TEXT NOT NULL,
90
+ vote INTEGER NOT NULL,
91
+ repository_id TEXT NOT NULL,
92
+ FOREIGN KEY (pull_request_uid) REFERENCES pull_requests(pull_request_uid),
93
+ FOREIGN KEY (user_id) REFERENCES users(user_id),
94
+ UNIQUE(pull_request_uid, user_id) -- One vote per reviewer per PR
95
+ );
96
+ CREATE INDEX IF NOT EXISTS idx_reviewers_pr ON reviewers(pull_request_uid);
97
+
98
+ -- Phase 3.3: Teams (current-state membership)
99
+ -- Teams are project-scoped and fetched per run
100
+ CREATE TABLE IF NOT EXISTS teams (
101
+ team_id TEXT PRIMARY KEY,
102
+ team_name TEXT NOT NULL,
103
+ project_name TEXT NOT NULL,
104
+ organization_name TEXT NOT NULL,
105
+ description TEXT,
106
+ last_updated TEXT NOT NULL, -- ISO 8601 timestamp of last fetch
107
+ FOREIGN KEY (organization_name, project_name)
108
+ REFERENCES projects(organization_name, project_name)
109
+ );
110
+ CREATE INDEX IF NOT EXISTS idx_teams_project
111
+ ON teams(organization_name, project_name);
112
+
113
+ -- Team membership mapping (team_id ↔ user_id)
114
+ -- Represents current membership, not historical snapshots
115
+ CREATE TABLE IF NOT EXISTS team_members (
116
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
117
+ team_id TEXT NOT NULL,
118
+ user_id TEXT NOT NULL,
119
+ is_team_admin INTEGER DEFAULT 0,
120
+ FOREIGN KEY (team_id) REFERENCES teams(team_id),
121
+ FOREIGN KEY (user_id) REFERENCES users(user_id),
122
+ UNIQUE(team_id, user_id)
123
+ );
124
+ CREATE INDEX IF NOT EXISTS idx_team_members_team ON team_members(team_id);
125
+ CREATE INDEX IF NOT EXISTS idx_team_members_user ON team_members(user_id);
126
+
127
+ -- Phase 3.4: PR Threads/Comments (feature-flagged)
128
+ -- Normalized tables indexed by PR UID and update time
129
+ CREATE TABLE IF NOT EXISTS pr_threads (
130
+ thread_id TEXT PRIMARY KEY,
131
+ pull_request_uid TEXT NOT NULL,
132
+ status TEXT, -- active, fixed, closed, etc.
133
+ thread_context TEXT, -- JSON: file path, line range, etc.
134
+ last_updated TEXT NOT NULL, -- ISO 8601, used for incremental sync
135
+ created_at TEXT NOT NULL,
136
+ is_deleted INTEGER DEFAULT 0,
137
+ FOREIGN KEY (pull_request_uid) REFERENCES pull_requests(pull_request_uid)
138
+ );
139
+ CREATE INDEX IF NOT EXISTS idx_pr_threads_pr ON pr_threads(pull_request_uid);
140
+ CREATE INDEX IF NOT EXISTS idx_pr_threads_updated ON pr_threads(last_updated);
141
+
142
+ CREATE TABLE IF NOT EXISTS pr_comments (
143
+ comment_id TEXT PRIMARY KEY,
144
+ thread_id TEXT NOT NULL,
145
+ pull_request_uid TEXT NOT NULL,
146
+ author_id TEXT NOT NULL,
147
+ content TEXT,
148
+ comment_type TEXT, -- text, codeChange, system
149
+ created_at TEXT NOT NULL,
150
+ last_updated TEXT,
151
+ is_deleted INTEGER DEFAULT 0,
152
+ FOREIGN KEY (thread_id) REFERENCES pr_threads(thread_id),
153
+ FOREIGN KEY (pull_request_uid) REFERENCES pull_requests(pull_request_uid),
154
+ FOREIGN KEY (author_id) REFERENCES users(user_id)
155
+ );
156
+ CREATE INDEX IF NOT EXISTS idx_pr_comments_thread ON pr_comments(thread_id);
157
+ CREATE INDEX IF NOT EXISTS idx_pr_comments_pr ON pr_comments(pull_request_uid);
158
+ CREATE INDEX IF NOT EXISTS idx_pr_comments_author ON pr_comments(author_id);
159
+
160
+ -- Schema version for future migrations
161
+ CREATE TABLE IF NOT EXISTS schema_version (
162
+ version INTEGER PRIMARY KEY,
163
+ applied_at TEXT NOT NULL
164
+ );
165
+
166
+ -- Insert initial schema version
167
+ INSERT OR IGNORE INTO schema_version (version, applied_at)
168
+ VALUES (1, datetime('now'));
169
+ """
170
+
171
+ # CSV column order contract (NON-NEGOTIABLE per Invariants 1-4)
172
+ CSV_SCHEMAS: dict[str, list[str]] = {
173
+ "organizations": ["organization_name"],
174
+ "projects": ["organization_name", "project_name"],
175
+ "repositories": [
176
+ "repository_id",
177
+ "repository_name",
178
+ "project_name",
179
+ "organization_name",
180
+ ],
181
+ "pull_requests": [
182
+ "pull_request_uid",
183
+ "pull_request_id",
184
+ "organization_name",
185
+ "project_name",
186
+ "repository_id",
187
+ "user_id",
188
+ "title",
189
+ "status",
190
+ "description",
191
+ "creation_date",
192
+ "closed_date",
193
+ "cycle_time_minutes",
194
+ ],
195
+ "users": ["user_id", "display_name", "email"],
196
+ "reviewers": ["pull_request_uid", "user_id", "vote", "repository_id"],
197
+ }
198
+
199
+ # Deterministic row ordering: primary key + tie-breaker (Adjustment 3)
200
+ SORT_KEYS: dict[str, list[str]] = {
201
+ "organizations": ["organization_name"],
202
+ "projects": ["organization_name", "project_name"],
203
+ "repositories": ["repository_id"],
204
+ "pull_requests": ["pull_request_uid", "creation_date"],
205
+ "users": ["user_id"],
206
+ "reviewers": ["pull_request_uid", "user_id"],
207
+ }