ado-git-repo-insights 1.2.1__py3-none-any.whl → 2.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ado_git_repo_insights/__init__.py +3 -3
- ado_git_repo_insights/cli.py +703 -354
- ado_git_repo_insights/config.py +186 -186
- ado_git_repo_insights/extractor/__init__.py +1 -1
- ado_git_repo_insights/extractor/ado_client.py +452 -246
- ado_git_repo_insights/extractor/pr_extractor.py +239 -239
- ado_git_repo_insights/ml/__init__.py +13 -0
- ado_git_repo_insights/ml/date_utils.py +70 -0
- ado_git_repo_insights/ml/forecaster.py +288 -0
- ado_git_repo_insights/ml/insights.py +497 -0
- ado_git_repo_insights/persistence/__init__.py +1 -1
- ado_git_repo_insights/persistence/database.py +193 -193
- ado_git_repo_insights/persistence/models.py +207 -145
- ado_git_repo_insights/persistence/repository.py +662 -376
- ado_git_repo_insights/transform/__init__.py +1 -1
- ado_git_repo_insights/transform/aggregators.py +950 -0
- ado_git_repo_insights/transform/csv_generator.py +132 -132
- ado_git_repo_insights/utils/__init__.py +1 -1
- ado_git_repo_insights/utils/datetime_utils.py +101 -101
- ado_git_repo_insights/utils/logging_config.py +172 -172
- ado_git_repo_insights/utils/run_summary.py +207 -206
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/METADATA +56 -15
- ado_git_repo_insights-2.7.4.dist-info/RECORD +27 -0
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/licenses/LICENSE +21 -21
- ado_git_repo_insights-1.2.1.dist-info/RECORD +0 -22
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/WHEEL +0 -0
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/entry_points.txt +0 -0
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/top_level.txt +0 -0
|
@@ -1,145 +1,207 @@
|
|
|
1
|
-
"""SQLite database schema and models for ado-git-repo-insights.
|
|
2
|
-
|
|
3
|
-
This module defines the SQLite schema that maps directly to the CSV output contract.
|
|
4
|
-
Schema changes must preserve invariants 1-4, 14-16 from INVARIANTS.md.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from __future__ import annotations
|
|
8
|
-
|
|
9
|
-
# SQL schema that will be executed to create tables
|
|
10
|
-
# Mirrors the CSV output contract exactly
|
|
11
|
-
|
|
12
|
-
SCHEMA_SQL = """
|
|
13
|
-
-- Metadata table for incremental extraction state (Invariant 6)
|
|
14
|
-
CREATE TABLE IF NOT EXISTS extraction_metadata (
|
|
15
|
-
id INTEGER PRIMARY KEY,
|
|
16
|
-
organization_name TEXT NOT NULL,
|
|
17
|
-
project_name TEXT NOT NULL,
|
|
18
|
-
last_extraction_date TEXT NOT NULL, -- ISO 8601 (YYYY-MM-DD)
|
|
19
|
-
last_extraction_timestamp TEXT NOT NULL, -- ISO 8601 with time
|
|
20
|
-
UNIQUE(organization_name, project_name)
|
|
21
|
-
);
|
|
22
|
-
|
|
23
|
-
-- Core entity tables (matching CSV output contract - Invariants 1-4)
|
|
24
|
-
|
|
25
|
-
-- organizations.csv: organization_name
|
|
26
|
-
CREATE TABLE IF NOT EXISTS organizations (
|
|
27
|
-
organization_name TEXT PRIMARY KEY
|
|
28
|
-
);
|
|
29
|
-
|
|
30
|
-
-- projects.csv: organization_name, project_name
|
|
31
|
-
CREATE TABLE IF NOT EXISTS projects (
|
|
32
|
-
organization_name TEXT NOT NULL,
|
|
33
|
-
project_name TEXT NOT NULL,
|
|
34
|
-
PRIMARY KEY (organization_name, project_name),
|
|
35
|
-
FOREIGN KEY (organization_name) REFERENCES organizations(organization_name)
|
|
36
|
-
);
|
|
37
|
-
|
|
38
|
-
-- repositories.csv: repository_id, repository_name, project_name, organization_name
|
|
39
|
-
-- Invariant 14: repository_id is the stable ADO ID
|
|
40
|
-
CREATE TABLE IF NOT EXISTS repositories (
|
|
41
|
-
repository_id TEXT PRIMARY KEY,
|
|
42
|
-
repository_name TEXT NOT NULL,
|
|
43
|
-
project_name TEXT NOT NULL,
|
|
44
|
-
organization_name TEXT NOT NULL,
|
|
45
|
-
FOREIGN KEY (organization_name, project_name)
|
|
46
|
-
REFERENCES projects(organization_name, project_name)
|
|
47
|
-
);
|
|
48
|
-
CREATE INDEX IF NOT EXISTS idx_repositories_project
|
|
49
|
-
ON repositories(organization_name, project_name);
|
|
50
|
-
|
|
51
|
-
-- users.csv: user_id, display_name, email
|
|
52
|
-
-- Invariant 16: user_id is stable ADO ID, display_name/email are mutable labels
|
|
53
|
-
CREATE TABLE IF NOT EXISTS users (
|
|
54
|
-
user_id TEXT PRIMARY KEY,
|
|
55
|
-
display_name TEXT NOT NULL,
|
|
56
|
-
email TEXT
|
|
57
|
-
);
|
|
58
|
-
|
|
59
|
-
-- pull_requests.csv: pull_request_uid, pull_request_id, organization_name, project_name,
|
|
60
|
-
-- repository_id, user_id, title, status, description,
|
|
61
|
-
-- creation_date, closed_date, cycle_time_minutes
|
|
62
|
-
-- Invariant 14: pull_request_uid = {repository_id}-{pull_request_id}
|
|
63
|
-
CREATE TABLE IF NOT EXISTS pull_requests (
|
|
64
|
-
pull_request_uid TEXT PRIMARY KEY,
|
|
65
|
-
pull_request_id INTEGER NOT NULL,
|
|
66
|
-
organization_name TEXT NOT NULL,
|
|
67
|
-
project_name TEXT NOT NULL,
|
|
68
|
-
repository_id TEXT NOT NULL,
|
|
69
|
-
user_id TEXT NOT NULL,
|
|
70
|
-
title TEXT NOT NULL,
|
|
71
|
-
status TEXT NOT NULL,
|
|
72
|
-
description TEXT,
|
|
73
|
-
creation_date TEXT NOT NULL, -- ISO 8601
|
|
74
|
-
closed_date TEXT, -- ISO 8601
|
|
75
|
-
cycle_time_minutes REAL,
|
|
76
|
-
raw_json TEXT, -- Original ADO response for auditing
|
|
77
|
-
FOREIGN KEY (repository_id) REFERENCES repositories(repository_id),
|
|
78
|
-
FOREIGN KEY (user_id) REFERENCES users(user_id)
|
|
79
|
-
);
|
|
80
|
-
CREATE INDEX IF NOT EXISTS idx_pull_requests_closed_date
|
|
81
|
-
ON pull_requests(closed_date);
|
|
82
|
-
CREATE INDEX IF NOT EXISTS idx_pull_requests_org_project
|
|
83
|
-
ON pull_requests(organization_name, project_name);
|
|
84
|
-
|
|
85
|
-
-- reviewers.csv: pull_request_uid, user_id, vote, repository_id
|
|
86
|
-
CREATE TABLE IF NOT EXISTS reviewers (
|
|
87
|
-
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
88
|
-
pull_request_uid TEXT NOT NULL,
|
|
89
|
-
user_id TEXT NOT NULL,
|
|
90
|
-
vote INTEGER NOT NULL,
|
|
91
|
-
repository_id TEXT NOT NULL,
|
|
92
|
-
FOREIGN KEY (pull_request_uid) REFERENCES pull_requests(pull_request_uid),
|
|
93
|
-
FOREIGN KEY (user_id) REFERENCES users(user_id),
|
|
94
|
-
UNIQUE(pull_request_uid, user_id) -- One vote per reviewer per PR
|
|
95
|
-
);
|
|
96
|
-
CREATE INDEX IF NOT EXISTS idx_reviewers_pr ON reviewers(pull_request_uid);
|
|
97
|
-
|
|
98
|
-
--
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
1
|
+
"""SQLite database schema and models for ado-git-repo-insights.
|
|
2
|
+
|
|
3
|
+
This module defines the SQLite schema that maps directly to the CSV output contract.
|
|
4
|
+
Schema changes must preserve invariants 1-4, 14-16 from INVARIANTS.md.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
# SQL schema that will be executed to create tables
|
|
10
|
+
# Mirrors the CSV output contract exactly
|
|
11
|
+
|
|
12
|
+
SCHEMA_SQL = """
|
|
13
|
+
-- Metadata table for incremental extraction state (Invariant 6)
|
|
14
|
+
CREATE TABLE IF NOT EXISTS extraction_metadata (
|
|
15
|
+
id INTEGER PRIMARY KEY,
|
|
16
|
+
organization_name TEXT NOT NULL,
|
|
17
|
+
project_name TEXT NOT NULL,
|
|
18
|
+
last_extraction_date TEXT NOT NULL, -- ISO 8601 (YYYY-MM-DD)
|
|
19
|
+
last_extraction_timestamp TEXT NOT NULL, -- ISO 8601 with time
|
|
20
|
+
UNIQUE(organization_name, project_name)
|
|
21
|
+
);
|
|
22
|
+
|
|
23
|
+
-- Core entity tables (matching CSV output contract - Invariants 1-4)
|
|
24
|
+
|
|
25
|
+
-- organizations.csv: organization_name
|
|
26
|
+
CREATE TABLE IF NOT EXISTS organizations (
|
|
27
|
+
organization_name TEXT PRIMARY KEY
|
|
28
|
+
);
|
|
29
|
+
|
|
30
|
+
-- projects.csv: organization_name, project_name
|
|
31
|
+
CREATE TABLE IF NOT EXISTS projects (
|
|
32
|
+
organization_name TEXT NOT NULL,
|
|
33
|
+
project_name TEXT NOT NULL,
|
|
34
|
+
PRIMARY KEY (organization_name, project_name),
|
|
35
|
+
FOREIGN KEY (organization_name) REFERENCES organizations(organization_name)
|
|
36
|
+
);
|
|
37
|
+
|
|
38
|
+
-- repositories.csv: repository_id, repository_name, project_name, organization_name
|
|
39
|
+
-- Invariant 14: repository_id is the stable ADO ID
|
|
40
|
+
CREATE TABLE IF NOT EXISTS repositories (
|
|
41
|
+
repository_id TEXT PRIMARY KEY,
|
|
42
|
+
repository_name TEXT NOT NULL,
|
|
43
|
+
project_name TEXT NOT NULL,
|
|
44
|
+
organization_name TEXT NOT NULL,
|
|
45
|
+
FOREIGN KEY (organization_name, project_name)
|
|
46
|
+
REFERENCES projects(organization_name, project_name)
|
|
47
|
+
);
|
|
48
|
+
CREATE INDEX IF NOT EXISTS idx_repositories_project
|
|
49
|
+
ON repositories(organization_name, project_name);
|
|
50
|
+
|
|
51
|
+
-- users.csv: user_id, display_name, email
|
|
52
|
+
-- Invariant 16: user_id is stable ADO ID, display_name/email are mutable labels
|
|
53
|
+
CREATE TABLE IF NOT EXISTS users (
|
|
54
|
+
user_id TEXT PRIMARY KEY,
|
|
55
|
+
display_name TEXT NOT NULL,
|
|
56
|
+
email TEXT
|
|
57
|
+
);
|
|
58
|
+
|
|
59
|
+
-- pull_requests.csv: pull_request_uid, pull_request_id, organization_name, project_name,
|
|
60
|
+
-- repository_id, user_id, title, status, description,
|
|
61
|
+
-- creation_date, closed_date, cycle_time_minutes
|
|
62
|
+
-- Invariant 14: pull_request_uid = {repository_id}-{pull_request_id}
|
|
63
|
+
CREATE TABLE IF NOT EXISTS pull_requests (
|
|
64
|
+
pull_request_uid TEXT PRIMARY KEY,
|
|
65
|
+
pull_request_id INTEGER NOT NULL,
|
|
66
|
+
organization_name TEXT NOT NULL,
|
|
67
|
+
project_name TEXT NOT NULL,
|
|
68
|
+
repository_id TEXT NOT NULL,
|
|
69
|
+
user_id TEXT NOT NULL,
|
|
70
|
+
title TEXT NOT NULL,
|
|
71
|
+
status TEXT NOT NULL,
|
|
72
|
+
description TEXT,
|
|
73
|
+
creation_date TEXT NOT NULL, -- ISO 8601
|
|
74
|
+
closed_date TEXT, -- ISO 8601
|
|
75
|
+
cycle_time_minutes REAL,
|
|
76
|
+
raw_json TEXT, -- Original ADO response for auditing
|
|
77
|
+
FOREIGN KEY (repository_id) REFERENCES repositories(repository_id),
|
|
78
|
+
FOREIGN KEY (user_id) REFERENCES users(user_id)
|
|
79
|
+
);
|
|
80
|
+
CREATE INDEX IF NOT EXISTS idx_pull_requests_closed_date
|
|
81
|
+
ON pull_requests(closed_date);
|
|
82
|
+
CREATE INDEX IF NOT EXISTS idx_pull_requests_org_project
|
|
83
|
+
ON pull_requests(organization_name, project_name);
|
|
84
|
+
|
|
85
|
+
-- reviewers.csv: pull_request_uid, user_id, vote, repository_id
|
|
86
|
+
CREATE TABLE IF NOT EXISTS reviewers (
|
|
87
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
88
|
+
pull_request_uid TEXT NOT NULL,
|
|
89
|
+
user_id TEXT NOT NULL,
|
|
90
|
+
vote INTEGER NOT NULL,
|
|
91
|
+
repository_id TEXT NOT NULL,
|
|
92
|
+
FOREIGN KEY (pull_request_uid) REFERENCES pull_requests(pull_request_uid),
|
|
93
|
+
FOREIGN KEY (user_id) REFERENCES users(user_id),
|
|
94
|
+
UNIQUE(pull_request_uid, user_id) -- One vote per reviewer per PR
|
|
95
|
+
);
|
|
96
|
+
CREATE INDEX IF NOT EXISTS idx_reviewers_pr ON reviewers(pull_request_uid);
|
|
97
|
+
|
|
98
|
+
-- Phase 3.3: Teams (current-state membership)
|
|
99
|
+
-- Teams are project-scoped and fetched per run
|
|
100
|
+
CREATE TABLE IF NOT EXISTS teams (
|
|
101
|
+
team_id TEXT PRIMARY KEY,
|
|
102
|
+
team_name TEXT NOT NULL,
|
|
103
|
+
project_name TEXT NOT NULL,
|
|
104
|
+
organization_name TEXT NOT NULL,
|
|
105
|
+
description TEXT,
|
|
106
|
+
last_updated TEXT NOT NULL, -- ISO 8601 timestamp of last fetch
|
|
107
|
+
FOREIGN KEY (organization_name, project_name)
|
|
108
|
+
REFERENCES projects(organization_name, project_name)
|
|
109
|
+
);
|
|
110
|
+
CREATE INDEX IF NOT EXISTS idx_teams_project
|
|
111
|
+
ON teams(organization_name, project_name);
|
|
112
|
+
|
|
113
|
+
-- Team membership mapping (team_id ↔ user_id)
|
|
114
|
+
-- Represents current membership, not historical snapshots
|
|
115
|
+
CREATE TABLE IF NOT EXISTS team_members (
|
|
116
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
117
|
+
team_id TEXT NOT NULL,
|
|
118
|
+
user_id TEXT NOT NULL,
|
|
119
|
+
is_team_admin INTEGER DEFAULT 0,
|
|
120
|
+
FOREIGN KEY (team_id) REFERENCES teams(team_id),
|
|
121
|
+
FOREIGN KEY (user_id) REFERENCES users(user_id),
|
|
122
|
+
UNIQUE(team_id, user_id)
|
|
123
|
+
);
|
|
124
|
+
CREATE INDEX IF NOT EXISTS idx_team_members_team ON team_members(team_id);
|
|
125
|
+
CREATE INDEX IF NOT EXISTS idx_team_members_user ON team_members(user_id);
|
|
126
|
+
|
|
127
|
+
-- Phase 3.4: PR Threads/Comments (feature-flagged)
|
|
128
|
+
-- Normalized tables indexed by PR UID and update time
|
|
129
|
+
CREATE TABLE IF NOT EXISTS pr_threads (
|
|
130
|
+
thread_id TEXT PRIMARY KEY,
|
|
131
|
+
pull_request_uid TEXT NOT NULL,
|
|
132
|
+
status TEXT, -- active, fixed, closed, etc.
|
|
133
|
+
thread_context TEXT, -- JSON: file path, line range, etc.
|
|
134
|
+
last_updated TEXT NOT NULL, -- ISO 8601, used for incremental sync
|
|
135
|
+
created_at TEXT NOT NULL,
|
|
136
|
+
is_deleted INTEGER DEFAULT 0,
|
|
137
|
+
FOREIGN KEY (pull_request_uid) REFERENCES pull_requests(pull_request_uid)
|
|
138
|
+
);
|
|
139
|
+
CREATE INDEX IF NOT EXISTS idx_pr_threads_pr ON pr_threads(pull_request_uid);
|
|
140
|
+
CREATE INDEX IF NOT EXISTS idx_pr_threads_updated ON pr_threads(last_updated);
|
|
141
|
+
|
|
142
|
+
CREATE TABLE IF NOT EXISTS pr_comments (
|
|
143
|
+
comment_id TEXT PRIMARY KEY,
|
|
144
|
+
thread_id TEXT NOT NULL,
|
|
145
|
+
pull_request_uid TEXT NOT NULL,
|
|
146
|
+
author_id TEXT NOT NULL,
|
|
147
|
+
content TEXT,
|
|
148
|
+
comment_type TEXT, -- text, codeChange, system
|
|
149
|
+
created_at TEXT NOT NULL,
|
|
150
|
+
last_updated TEXT,
|
|
151
|
+
is_deleted INTEGER DEFAULT 0,
|
|
152
|
+
FOREIGN KEY (thread_id) REFERENCES pr_threads(thread_id),
|
|
153
|
+
FOREIGN KEY (pull_request_uid) REFERENCES pull_requests(pull_request_uid),
|
|
154
|
+
FOREIGN KEY (author_id) REFERENCES users(user_id)
|
|
155
|
+
);
|
|
156
|
+
CREATE INDEX IF NOT EXISTS idx_pr_comments_thread ON pr_comments(thread_id);
|
|
157
|
+
CREATE INDEX IF NOT EXISTS idx_pr_comments_pr ON pr_comments(pull_request_uid);
|
|
158
|
+
CREATE INDEX IF NOT EXISTS idx_pr_comments_author ON pr_comments(author_id);
|
|
159
|
+
|
|
160
|
+
-- Schema version for future migrations
|
|
161
|
+
CREATE TABLE IF NOT EXISTS schema_version (
|
|
162
|
+
version INTEGER PRIMARY KEY,
|
|
163
|
+
applied_at TEXT NOT NULL
|
|
164
|
+
);
|
|
165
|
+
|
|
166
|
+
-- Insert initial schema version
|
|
167
|
+
INSERT OR IGNORE INTO schema_version (version, applied_at)
|
|
168
|
+
VALUES (1, datetime('now'));
|
|
169
|
+
"""
|
|
170
|
+
|
|
171
|
+
# CSV column order contract (NON-NEGOTIABLE per Invariants 1-4)
|
|
172
|
+
CSV_SCHEMAS: dict[str, list[str]] = {
|
|
173
|
+
"organizations": ["organization_name"],
|
|
174
|
+
"projects": ["organization_name", "project_name"],
|
|
175
|
+
"repositories": [
|
|
176
|
+
"repository_id",
|
|
177
|
+
"repository_name",
|
|
178
|
+
"project_name",
|
|
179
|
+
"organization_name",
|
|
180
|
+
],
|
|
181
|
+
"pull_requests": [
|
|
182
|
+
"pull_request_uid",
|
|
183
|
+
"pull_request_id",
|
|
184
|
+
"organization_name",
|
|
185
|
+
"project_name",
|
|
186
|
+
"repository_id",
|
|
187
|
+
"user_id",
|
|
188
|
+
"title",
|
|
189
|
+
"status",
|
|
190
|
+
"description",
|
|
191
|
+
"creation_date",
|
|
192
|
+
"closed_date",
|
|
193
|
+
"cycle_time_minutes",
|
|
194
|
+
],
|
|
195
|
+
"users": ["user_id", "display_name", "email"],
|
|
196
|
+
"reviewers": ["pull_request_uid", "user_id", "vote", "repository_id"],
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
# Deterministic row ordering: primary key + tie-breaker (Adjustment 3)
|
|
200
|
+
SORT_KEYS: dict[str, list[str]] = {
|
|
201
|
+
"organizations": ["organization_name"],
|
|
202
|
+
"projects": ["organization_name", "project_name"],
|
|
203
|
+
"repositories": ["repository_id"],
|
|
204
|
+
"pull_requests": ["pull_request_uid", "creation_date"],
|
|
205
|
+
"users": ["user_id"],
|
|
206
|
+
"reviewers": ["pull_request_uid", "user_id"],
|
|
207
|
+
}
|