ado-git-repo-insights 1.2.1__py3-none-any.whl → 2.7.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ado_git_repo_insights/__init__.py +3 -3
- ado_git_repo_insights/cli.py +703 -354
- ado_git_repo_insights/config.py +186 -186
- ado_git_repo_insights/extractor/__init__.py +1 -1
- ado_git_repo_insights/extractor/ado_client.py +452 -246
- ado_git_repo_insights/extractor/pr_extractor.py +239 -239
- ado_git_repo_insights/ml/__init__.py +13 -0
- ado_git_repo_insights/ml/date_utils.py +70 -0
- ado_git_repo_insights/ml/forecaster.py +288 -0
- ado_git_repo_insights/ml/insights.py +497 -0
- ado_git_repo_insights/persistence/__init__.py +1 -1
- ado_git_repo_insights/persistence/database.py +193 -193
- ado_git_repo_insights/persistence/models.py +207 -145
- ado_git_repo_insights/persistence/repository.py +662 -376
- ado_git_repo_insights/transform/__init__.py +1 -1
- ado_git_repo_insights/transform/aggregators.py +950 -0
- ado_git_repo_insights/transform/csv_generator.py +132 -132
- ado_git_repo_insights/utils/__init__.py +1 -1
- ado_git_repo_insights/utils/datetime_utils.py +101 -101
- ado_git_repo_insights/utils/logging_config.py +172 -172
- ado_git_repo_insights/utils/run_summary.py +207 -206
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/METADATA +56 -15
- ado_git_repo_insights-2.7.4.dist-info/RECORD +27 -0
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/licenses/LICENSE +21 -21
- ado_git_repo_insights-1.2.1.dist-info/RECORD +0 -22
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/WHEEL +0 -0
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/entry_points.txt +0 -0
- {ado_git_repo_insights-1.2.1.dist-info → ado_git_repo_insights-2.7.4.dist-info}/top_level.txt +0 -0
|
@@ -1,193 +1,193 @@
|
|
|
1
|
-
"""SQLite database connection and management.
|
|
2
|
-
|
|
3
|
-
This module handles database connections, schema initialization, and
|
|
4
|
-
ensures safe transaction handling per Invariant 7 (no publish-on-failure).
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from __future__ import annotations
|
|
8
|
-
|
|
9
|
-
import logging
|
|
10
|
-
import sqlite3
|
|
11
|
-
from collections.abc import Iterator
|
|
12
|
-
from contextlib import contextmanager
|
|
13
|
-
from pathlib import Path
|
|
14
|
-
from typing import TYPE_CHECKING, Any
|
|
15
|
-
|
|
16
|
-
from .models import SCHEMA_SQL
|
|
17
|
-
|
|
18
|
-
if TYPE_CHECKING:
|
|
19
|
-
from sqlite3 import Connection, Cursor
|
|
20
|
-
|
|
21
|
-
logger = logging.getLogger(__name__)
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class DatabaseError(Exception):
|
|
25
|
-
"""Database operation failed."""
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class DatabaseManager:
|
|
29
|
-
"""Manages SQLite database connections and schema.
|
|
30
|
-
|
|
31
|
-
Invariant 5: SQLite is the source of truth for derived outputs.
|
|
32
|
-
Invariant 9: Persistence must be recoverable.
|
|
33
|
-
"""
|
|
34
|
-
|
|
35
|
-
def __init__(self, db_path: Path) -> None:
|
|
36
|
-
"""Initialize the database manager.
|
|
37
|
-
|
|
38
|
-
Args:
|
|
39
|
-
db_path: Path to the SQLite database file.
|
|
40
|
-
"""
|
|
41
|
-
self.db_path = db_path
|
|
42
|
-
self._connection: Connection | None = None
|
|
43
|
-
|
|
44
|
-
@property
|
|
45
|
-
def connection(self) -> Connection:
|
|
46
|
-
"""Get the active database connection.
|
|
47
|
-
|
|
48
|
-
Raises:
|
|
49
|
-
DatabaseError: If not connected.
|
|
50
|
-
"""
|
|
51
|
-
if self._connection is None:
|
|
52
|
-
raise DatabaseError("Database not connected. Call connect() first.")
|
|
53
|
-
return self._connection
|
|
54
|
-
|
|
55
|
-
def connect(self) -> None:
|
|
56
|
-
"""Open a connection to the database.
|
|
57
|
-
|
|
58
|
-
Creates the database file and parent directories if they don't exist.
|
|
59
|
-
Initializes the schema on first connection.
|
|
60
|
-
"""
|
|
61
|
-
# Ensure parent directory exists
|
|
62
|
-
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
63
|
-
|
|
64
|
-
is_new_db = not self.db_path.exists()
|
|
65
|
-
|
|
66
|
-
try:
|
|
67
|
-
self._connection = sqlite3.connect(
|
|
68
|
-
str(self.db_path),
|
|
69
|
-
isolation_level=None, # Autocommit; we'll manage transactions explicitly
|
|
70
|
-
)
|
|
71
|
-
self._connection.row_factory = sqlite3.Row
|
|
72
|
-
|
|
73
|
-
# Enable foreign keys
|
|
74
|
-
self._connection.execute("PRAGMA foreign_keys = ON")
|
|
75
|
-
|
|
76
|
-
if is_new_db:
|
|
77
|
-
logger.info(f"Creating new database at {self.db_path}")
|
|
78
|
-
self._initialize_schema()
|
|
79
|
-
else:
|
|
80
|
-
logger.info(f"Connected to existing database at {self.db_path}")
|
|
81
|
-
self._validate_schema()
|
|
82
|
-
|
|
83
|
-
except sqlite3.Error as e:
|
|
84
|
-
self.close() # Ensure connection is closed on error
|
|
85
|
-
raise DatabaseError(f"Failed to connect to database: {e}") from e
|
|
86
|
-
except DatabaseError:
|
|
87
|
-
self.close() # Ensure connection is closed on validation error
|
|
88
|
-
raise
|
|
89
|
-
|
|
90
|
-
def close(self) -> None:
|
|
91
|
-
"""Close the database connection."""
|
|
92
|
-
if self._connection is not None:
|
|
93
|
-
self._connection.close()
|
|
94
|
-
self._connection = None
|
|
95
|
-
logger.debug("Database connection closed")
|
|
96
|
-
|
|
97
|
-
def _initialize_schema(self) -> None:
|
|
98
|
-
"""Create all tables and indexes."""
|
|
99
|
-
try:
|
|
100
|
-
self._connection.executescript(SCHEMA_SQL) # type: ignore[union-attr]
|
|
101
|
-
logger.info("Database schema initialized")
|
|
102
|
-
except sqlite3.Error as e:
|
|
103
|
-
raise DatabaseError(f"Failed to initialize schema: {e}") from e
|
|
104
|
-
|
|
105
|
-
def _validate_schema(self) -> None:
|
|
106
|
-
"""Validate that required tables exist.
|
|
107
|
-
|
|
108
|
-
Invariant 9: If schema is invalid, fail fast with clear error.
|
|
109
|
-
"""
|
|
110
|
-
required_tables = [
|
|
111
|
-
"extraction_metadata",
|
|
112
|
-
"organizations",
|
|
113
|
-
"projects",
|
|
114
|
-
"repositories",
|
|
115
|
-
"users",
|
|
116
|
-
"pull_requests",
|
|
117
|
-
"reviewers",
|
|
118
|
-
]
|
|
119
|
-
|
|
120
|
-
cursor = self.connection.execute(
|
|
121
|
-
"SELECT name FROM sqlite_master WHERE type='table'"
|
|
122
|
-
)
|
|
123
|
-
existing_tables = {row["name"] for row in cursor.fetchall()}
|
|
124
|
-
|
|
125
|
-
missing = set(required_tables) - existing_tables
|
|
126
|
-
if missing:
|
|
127
|
-
raise DatabaseError(
|
|
128
|
-
f"Database schema invalid. Missing tables: {missing}. "
|
|
129
|
-
"Consider creating a fresh database."
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
@contextmanager
|
|
133
|
-
def transaction(self) -> Iterator[Cursor]:
|
|
134
|
-
"""Execute operations within a transaction.
|
|
135
|
-
|
|
136
|
-
Invariant 7: On failure, changes are rolled back.
|
|
137
|
-
|
|
138
|
-
Yields:
|
|
139
|
-
Database cursor for executing queries.
|
|
140
|
-
"""
|
|
141
|
-
conn = self.connection
|
|
142
|
-
cursor = conn.cursor()
|
|
143
|
-
|
|
144
|
-
try:
|
|
145
|
-
cursor.execute("BEGIN TRANSACTION")
|
|
146
|
-
yield cursor
|
|
147
|
-
cursor.execute("COMMIT")
|
|
148
|
-
except Exception:
|
|
149
|
-
cursor.execute("ROLLBACK")
|
|
150
|
-
raise
|
|
151
|
-
finally:
|
|
152
|
-
cursor.close()
|
|
153
|
-
|
|
154
|
-
def execute(self, sql: str, parameters: tuple[Any, ...] = ()) -> Cursor: # noqa: UP006
|
|
155
|
-
"""Execute a single SQL statement.
|
|
156
|
-
|
|
157
|
-
Args:
|
|
158
|
-
sql: SQL statement to execute.
|
|
159
|
-
parameters: Parameters for the statement.
|
|
160
|
-
|
|
161
|
-
Returns:
|
|
162
|
-
Cursor with results.
|
|
163
|
-
"""
|
|
164
|
-
return self.connection.execute(sql, parameters)
|
|
165
|
-
|
|
166
|
-
def executemany(
|
|
167
|
-
self,
|
|
168
|
-
sql: str,
|
|
169
|
-
parameters: list[tuple[Any, ...]], # noqa: UP006
|
|
170
|
-
) -> Cursor:
|
|
171
|
-
"""Execute a SQL statement with multiple parameter sets.
|
|
172
|
-
|
|
173
|
-
Args:
|
|
174
|
-
sql: SQL statement to execute.
|
|
175
|
-
parameters: List of parameter tuples.
|
|
176
|
-
|
|
177
|
-
Returns:
|
|
178
|
-
Cursor with results.
|
|
179
|
-
"""
|
|
180
|
-
return self.connection.executemany(sql, parameters)
|
|
181
|
-
|
|
182
|
-
def get_schema_version(self) -> int:
|
|
183
|
-
"""Get the current schema version.
|
|
184
|
-
|
|
185
|
-
Returns:
|
|
186
|
-
Current schema version number.
|
|
187
|
-
"""
|
|
188
|
-
try:
|
|
189
|
-
cursor = self.execute("SELECT MAX(version) as version FROM schema_version")
|
|
190
|
-
row = cursor.fetchone()
|
|
191
|
-
return int(row["version"]) if row and row["version"] is not None else 0
|
|
192
|
-
except sqlite3.Error:
|
|
193
|
-
return 0
|
|
1
|
+
"""SQLite database connection and management.
|
|
2
|
+
|
|
3
|
+
This module handles database connections, schema initialization, and
|
|
4
|
+
ensures safe transaction handling per Invariant 7 (no publish-on-failure).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import logging
|
|
10
|
+
import sqlite3
|
|
11
|
+
from collections.abc import Iterator
|
|
12
|
+
from contextlib import contextmanager
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import TYPE_CHECKING, Any
|
|
15
|
+
|
|
16
|
+
from .models import SCHEMA_SQL
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from sqlite3 import Connection, Cursor
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DatabaseError(Exception):
|
|
25
|
+
"""Database operation failed."""
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DatabaseManager:
|
|
29
|
+
"""Manages SQLite database connections and schema.
|
|
30
|
+
|
|
31
|
+
Invariant 5: SQLite is the source of truth for derived outputs.
|
|
32
|
+
Invariant 9: Persistence must be recoverable.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
def __init__(self, db_path: Path) -> None:
|
|
36
|
+
"""Initialize the database manager.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
db_path: Path to the SQLite database file.
|
|
40
|
+
"""
|
|
41
|
+
self.db_path = db_path
|
|
42
|
+
self._connection: Connection | None = None
|
|
43
|
+
|
|
44
|
+
@property
|
|
45
|
+
def connection(self) -> Connection:
|
|
46
|
+
"""Get the active database connection.
|
|
47
|
+
|
|
48
|
+
Raises:
|
|
49
|
+
DatabaseError: If not connected.
|
|
50
|
+
"""
|
|
51
|
+
if self._connection is None:
|
|
52
|
+
raise DatabaseError("Database not connected. Call connect() first.")
|
|
53
|
+
return self._connection
|
|
54
|
+
|
|
55
|
+
def connect(self) -> None:
|
|
56
|
+
"""Open a connection to the database.
|
|
57
|
+
|
|
58
|
+
Creates the database file and parent directories if they don't exist.
|
|
59
|
+
Initializes the schema on first connection.
|
|
60
|
+
"""
|
|
61
|
+
# Ensure parent directory exists
|
|
62
|
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
63
|
+
|
|
64
|
+
is_new_db = not self.db_path.exists()
|
|
65
|
+
|
|
66
|
+
try:
|
|
67
|
+
self._connection = sqlite3.connect(
|
|
68
|
+
str(self.db_path),
|
|
69
|
+
isolation_level=None, # Autocommit; we'll manage transactions explicitly
|
|
70
|
+
)
|
|
71
|
+
self._connection.row_factory = sqlite3.Row
|
|
72
|
+
|
|
73
|
+
# Enable foreign keys
|
|
74
|
+
self._connection.execute("PRAGMA foreign_keys = ON")
|
|
75
|
+
|
|
76
|
+
if is_new_db:
|
|
77
|
+
logger.info(f"Creating new database at {self.db_path}")
|
|
78
|
+
self._initialize_schema()
|
|
79
|
+
else:
|
|
80
|
+
logger.info(f"Connected to existing database at {self.db_path}")
|
|
81
|
+
self._validate_schema()
|
|
82
|
+
|
|
83
|
+
except sqlite3.Error as e:
|
|
84
|
+
self.close() # Ensure connection is closed on error
|
|
85
|
+
raise DatabaseError(f"Failed to connect to database: {e}") from e
|
|
86
|
+
except DatabaseError:
|
|
87
|
+
self.close() # Ensure connection is closed on validation error
|
|
88
|
+
raise
|
|
89
|
+
|
|
90
|
+
def close(self) -> None:
|
|
91
|
+
"""Close the database connection."""
|
|
92
|
+
if self._connection is not None:
|
|
93
|
+
self._connection.close()
|
|
94
|
+
self._connection = None
|
|
95
|
+
logger.debug("Database connection closed")
|
|
96
|
+
|
|
97
|
+
def _initialize_schema(self) -> None:
|
|
98
|
+
"""Create all tables and indexes."""
|
|
99
|
+
try:
|
|
100
|
+
self._connection.executescript(SCHEMA_SQL) # type: ignore[union-attr]
|
|
101
|
+
logger.info("Database schema initialized")
|
|
102
|
+
except sqlite3.Error as e:
|
|
103
|
+
raise DatabaseError(f"Failed to initialize schema: {e}") from e
|
|
104
|
+
|
|
105
|
+
def _validate_schema(self) -> None:
|
|
106
|
+
"""Validate that required tables exist.
|
|
107
|
+
|
|
108
|
+
Invariant 9: If schema is invalid, fail fast with clear error.
|
|
109
|
+
"""
|
|
110
|
+
required_tables = [
|
|
111
|
+
"extraction_metadata",
|
|
112
|
+
"organizations",
|
|
113
|
+
"projects",
|
|
114
|
+
"repositories",
|
|
115
|
+
"users",
|
|
116
|
+
"pull_requests",
|
|
117
|
+
"reviewers",
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
cursor = self.connection.execute(
|
|
121
|
+
"SELECT name FROM sqlite_master WHERE type='table'"
|
|
122
|
+
)
|
|
123
|
+
existing_tables = {row["name"] for row in cursor.fetchall()}
|
|
124
|
+
|
|
125
|
+
missing = set(required_tables) - existing_tables
|
|
126
|
+
if missing:
|
|
127
|
+
raise DatabaseError(
|
|
128
|
+
f"Database schema invalid. Missing tables: {missing}. "
|
|
129
|
+
"Consider creating a fresh database."
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
@contextmanager
|
|
133
|
+
def transaction(self) -> Iterator[Cursor]:
|
|
134
|
+
"""Execute operations within a transaction.
|
|
135
|
+
|
|
136
|
+
Invariant 7: On failure, changes are rolled back.
|
|
137
|
+
|
|
138
|
+
Yields:
|
|
139
|
+
Database cursor for executing queries.
|
|
140
|
+
"""
|
|
141
|
+
conn = self.connection
|
|
142
|
+
cursor = conn.cursor()
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
cursor.execute("BEGIN TRANSACTION")
|
|
146
|
+
yield cursor
|
|
147
|
+
cursor.execute("COMMIT")
|
|
148
|
+
except Exception:
|
|
149
|
+
cursor.execute("ROLLBACK")
|
|
150
|
+
raise
|
|
151
|
+
finally:
|
|
152
|
+
cursor.close()
|
|
153
|
+
|
|
154
|
+
def execute(self, sql: str, parameters: tuple[Any, ...] = ()) -> Cursor: # noqa: UP006
|
|
155
|
+
"""Execute a single SQL statement.
|
|
156
|
+
|
|
157
|
+
Args:
|
|
158
|
+
sql: SQL statement to execute.
|
|
159
|
+
parameters: Parameters for the statement.
|
|
160
|
+
|
|
161
|
+
Returns:
|
|
162
|
+
Cursor with results.
|
|
163
|
+
"""
|
|
164
|
+
return self.connection.execute(sql, parameters)
|
|
165
|
+
|
|
166
|
+
def executemany(
|
|
167
|
+
self,
|
|
168
|
+
sql: str,
|
|
169
|
+
parameters: list[tuple[Any, ...]], # noqa: UP006
|
|
170
|
+
) -> Cursor:
|
|
171
|
+
"""Execute a SQL statement with multiple parameter sets.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
sql: SQL statement to execute.
|
|
175
|
+
parameters: List of parameter tuples.
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Cursor with results.
|
|
179
|
+
"""
|
|
180
|
+
return self.connection.executemany(sql, parameters)
|
|
181
|
+
|
|
182
|
+
def get_schema_version(self) -> int:
|
|
183
|
+
"""Get the current schema version.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Current schema version number.
|
|
187
|
+
"""
|
|
188
|
+
try:
|
|
189
|
+
cursor = self.execute("SELECT MAX(version) as version FROM schema_version")
|
|
190
|
+
row = cursor.fetchone()
|
|
191
|
+
return int(row["version"]) if row and row["version"] is not None else 0
|
|
192
|
+
except sqlite3.Error:
|
|
193
|
+
return 0
|