gitflow-analytics 3.6.1__py3-none-any.whl → 3.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +8 -12
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/cli.py +156 -175
- gitflow_analytics/cli_wizards/install_wizard.py +5 -5
- gitflow_analytics/core/cache.py +3 -3
- gitflow_analytics/models/database.py +279 -45
- gitflow_analytics/security/reports/__init__.py +5 -0
- gitflow_analytics/security/reports/security_report.py +358 -0
- {gitflow_analytics-3.6.1.dist-info → gitflow_analytics-3.7.0.dist-info}/METADATA +2 -4
- {gitflow_analytics-3.6.1.dist-info → gitflow_analytics-3.7.0.dist-info}/RECORD +14 -25
- gitflow_analytics/tui/__init__.py +0 -5
- gitflow_analytics/tui/app.py +0 -726
- gitflow_analytics/tui/progress_adapter.py +0 -313
- gitflow_analytics/tui/screens/__init__.py +0 -8
- gitflow_analytics/tui/screens/analysis_progress_screen.py +0 -857
- gitflow_analytics/tui/screens/configuration_screen.py +0 -523
- gitflow_analytics/tui/screens/loading_screen.py +0 -348
- gitflow_analytics/tui/screens/main_screen.py +0 -321
- gitflow_analytics/tui/screens/results_screen.py +0 -735
- gitflow_analytics/tui/widgets/__init__.py +0 -7
- gitflow_analytics/tui/widgets/data_table.py +0 -255
- gitflow_analytics/tui/widgets/export_modal.py +0 -301
- gitflow_analytics/tui/widgets/progress_widget.py +0 -187
- {gitflow_analytics-3.6.1.dist-info → gitflow_analytics-3.7.0.dist-info}/WHEEL +0 -0
- {gitflow_analytics-3.6.1.dist-info → gitflow_analytics-3.7.0.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-3.6.1.dist-info → gitflow_analytics-3.7.0.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-3.6.1.dist-info → gitflow_analytics-3.7.0.dist-info}/top_level.txt +0 -0
|
@@ -3,9 +3,9 @@
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
5
|
import tempfile
|
|
6
|
-
from datetime import datetime
|
|
6
|
+
from datetime import datetime, timezone
|
|
7
7
|
from pathlib import Path
|
|
8
|
-
from typing import Any
|
|
8
|
+
from typing import Any, Optional
|
|
9
9
|
|
|
10
10
|
from sqlalchemy import (
|
|
11
11
|
JSON,
|
|
@@ -28,6 +28,19 @@ logger = logging.getLogger(__name__)
|
|
|
28
28
|
Base: Any = declarative_base()
|
|
29
29
|
|
|
30
30
|
|
|
31
|
+
def utcnow_tz_aware() -> datetime:
|
|
32
|
+
"""Return current UTC time as timezone-aware datetime.
|
|
33
|
+
|
|
34
|
+
WHY: SQLAlchemy DateTime(timezone=True) requires timezone-aware datetimes.
|
|
35
|
+
Using timezone-naive datetime.utcnow() causes query mismatches when filtering
|
|
36
|
+
by timezone-aware date ranges.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Timezone-aware datetime in UTC
|
|
40
|
+
"""
|
|
41
|
+
return datetime.now(timezone.utc)
|
|
42
|
+
|
|
43
|
+
|
|
31
44
|
class CachedCommit(Base):
|
|
32
45
|
"""Cached commit analysis results."""
|
|
33
46
|
|
|
@@ -44,7 +57,7 @@ class CachedCommit(Base):
|
|
|
44
57
|
author_name = Column(String)
|
|
45
58
|
author_email = Column(String)
|
|
46
59
|
message = Column(String)
|
|
47
|
-
timestamp = Column(DateTime)
|
|
60
|
+
timestamp = Column(DateTime(timezone=True)) # CRITICAL: Preserve timezone for date filtering
|
|
48
61
|
branch = Column(String)
|
|
49
62
|
is_merge = Column(Boolean, default=False)
|
|
50
63
|
|
|
@@ -62,7 +75,7 @@ class CachedCommit(Base):
|
|
|
62
75
|
ticket_references = Column(JSON) # List of ticket IDs
|
|
63
76
|
|
|
64
77
|
# Cache metadata
|
|
65
|
-
cached_at = Column(DateTime, default=
|
|
78
|
+
cached_at = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
66
79
|
cache_version = Column(String, default="1.0")
|
|
67
80
|
|
|
68
81
|
# Indexes for performance
|
|
@@ -87,12 +100,12 @@ class DeveloperIdentity(Base):
|
|
|
87
100
|
# Statistics
|
|
88
101
|
total_commits = Column(Integer, default=0)
|
|
89
102
|
total_story_points = Column(Integer, default=0)
|
|
90
|
-
first_seen = Column(DateTime, default=
|
|
91
|
-
last_seen = Column(DateTime, default=
|
|
103
|
+
first_seen = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
104
|
+
last_seen = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
92
105
|
|
|
93
106
|
# Metadata
|
|
94
|
-
created_at = Column(DateTime, default=
|
|
95
|
-
updated_at = Column(DateTime, default=
|
|
107
|
+
created_at = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
108
|
+
updated_at = Column(DateTime(timezone=True), default=utcnow_tz_aware, onupdate=utcnow_tz_aware)
|
|
96
109
|
|
|
97
110
|
__table_args__ = (
|
|
98
111
|
Index("idx_primary_email", "primary_email"),
|
|
@@ -130,8 +143,8 @@ class PullRequestCache(Base):
|
|
|
130
143
|
title = Column(String)
|
|
131
144
|
description = Column(String)
|
|
132
145
|
author = Column(String)
|
|
133
|
-
created_at = Column(DateTime)
|
|
134
|
-
merged_at = Column(DateTime, nullable=True)
|
|
146
|
+
created_at = Column(DateTime(timezone=True))
|
|
147
|
+
merged_at = Column(DateTime(timezone=True), nullable=True)
|
|
135
148
|
|
|
136
149
|
# Extracted data
|
|
137
150
|
story_points = Column(Integer, nullable=True)
|
|
@@ -141,7 +154,7 @@ class PullRequestCache(Base):
|
|
|
141
154
|
commit_hashes = Column(JSON) # List of commit hashes
|
|
142
155
|
|
|
143
156
|
# Cache metadata
|
|
144
|
-
cached_at = Column(DateTime, default=
|
|
157
|
+
cached_at = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
145
158
|
|
|
146
159
|
__table_args__ = (Index("idx_repo_pr", "repo_path", "pr_number", unique=True),)
|
|
147
160
|
|
|
@@ -163,9 +176,9 @@ class IssueCache(Base):
|
|
|
163
176
|
description = Column(String)
|
|
164
177
|
status = Column(String)
|
|
165
178
|
assignee = Column(String, nullable=True)
|
|
166
|
-
created_at = Column(DateTime)
|
|
167
|
-
updated_at = Column(DateTime)
|
|
168
|
-
resolved_at = Column(DateTime, nullable=True)
|
|
179
|
+
created_at = Column(DateTime(timezone=True))
|
|
180
|
+
updated_at = Column(DateTime(timezone=True))
|
|
181
|
+
resolved_at = Column(DateTime(timezone=True), nullable=True)
|
|
169
182
|
|
|
170
183
|
# Extracted data
|
|
171
184
|
story_points = Column(Integer, nullable=True)
|
|
@@ -175,7 +188,7 @@ class IssueCache(Base):
|
|
|
175
188
|
platform_data = Column(JSON) # Additional platform-specific fields
|
|
176
189
|
|
|
177
190
|
# Cache metadata
|
|
178
|
-
cached_at = Column(DateTime, default=
|
|
191
|
+
cached_at = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
179
192
|
|
|
180
193
|
__table_args__ = (
|
|
181
194
|
Index("idx_platform_issue", "platform", "issue_id", unique=True),
|
|
@@ -215,7 +228,7 @@ class QualitativeCommitData(Base):
|
|
|
215
228
|
confidence_score = Column(Float, nullable=False)
|
|
216
229
|
|
|
217
230
|
# Timestamps
|
|
218
|
-
analyzed_at = Column(DateTime, default=
|
|
231
|
+
analyzed_at = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
219
232
|
analysis_version = Column(String, default="1.0")
|
|
220
233
|
|
|
221
234
|
# Indexes for efficient querying
|
|
@@ -250,8 +263,8 @@ class PatternCache(Base):
|
|
|
250
263
|
|
|
251
264
|
# Usage tracking for cache management
|
|
252
265
|
hit_count = Column(Integer, default=1)
|
|
253
|
-
last_used = Column(DateTime, default=
|
|
254
|
-
created_at = Column(DateTime, default=
|
|
266
|
+
last_used = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
267
|
+
created_at = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
255
268
|
|
|
256
269
|
# Source tracking
|
|
257
270
|
source_method = Column(String, nullable=False) # 'nlp' or 'llm'
|
|
@@ -284,7 +297,7 @@ class LLMUsageStats(Base):
|
|
|
284
297
|
# API call metadata
|
|
285
298
|
model_name = Column(String, nullable=False)
|
|
286
299
|
api_provider = Column(String, default="openrouter")
|
|
287
|
-
timestamp = Column(DateTime, default=
|
|
300
|
+
timestamp = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
288
301
|
|
|
289
302
|
# Usage metrics
|
|
290
303
|
input_tokens = Column(Integer, nullable=False)
|
|
@@ -342,8 +355,8 @@ class TrainingData(Base):
|
|
|
342
355
|
|
|
343
356
|
# Training metadata
|
|
344
357
|
training_session_id = Column(String, nullable=False) # Groups related training data
|
|
345
|
-
created_at = Column(DateTime, default=
|
|
346
|
-
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=
|
|
358
|
+
created_at = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
359
|
+
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=utcnow_tz_aware)
|
|
347
360
|
|
|
348
361
|
# Quality assurance
|
|
349
362
|
validated = Column(Boolean, default=False) # Human validation flag
|
|
@@ -399,7 +412,7 @@ class RepositoryAnalysisStatus(Base):
|
|
|
399
412
|
unique_developers = Column(Integer, default=0)
|
|
400
413
|
|
|
401
414
|
# Analysis metadata
|
|
402
|
-
last_updated = Column(DateTime, default=datetime.utcnow, onupdate=
|
|
415
|
+
last_updated = Column(DateTime, default=datetime.utcnow, onupdate=utcnow_tz_aware)
|
|
403
416
|
analysis_version = Column(String, default="2.0") # For tracking schema changes
|
|
404
417
|
|
|
405
418
|
# Configuration hash to detect config changes
|
|
@@ -438,8 +451,8 @@ class TrainingSession(Base):
|
|
|
438
451
|
session_id = Column(String, unique=True, nullable=False)
|
|
439
452
|
|
|
440
453
|
# Session metadata
|
|
441
|
-
started_at = Column(DateTime, default=
|
|
442
|
-
completed_at = Column(DateTime)
|
|
454
|
+
started_at = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
455
|
+
completed_at = Column(DateTime(timezone=True))
|
|
443
456
|
status = Column(String, default="running") # running, completed, failed
|
|
444
457
|
|
|
445
458
|
# Configuration
|
|
@@ -500,7 +513,7 @@ class ClassificationModel(Base):
|
|
|
500
513
|
name = Column(String, nullable=False)
|
|
501
514
|
version = Column(String, nullable=False)
|
|
502
515
|
model_type = Column(String, nullable=False) # 'sklearn', 'spacy', 'custom'
|
|
503
|
-
created_at = Column(DateTime, default=
|
|
516
|
+
created_at = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
504
517
|
|
|
505
518
|
# Training information
|
|
506
519
|
training_session_id = Column(String, ForeignKey("training_sessions.session_id"))
|
|
@@ -521,7 +534,7 @@ class ClassificationModel(Base):
|
|
|
521
534
|
# Usage tracking
|
|
522
535
|
active = Column(Boolean, default=True) # Whether model is active
|
|
523
536
|
usage_count = Column(Integer, default=0) # Number of times used
|
|
524
|
-
last_used = Column(DateTime)
|
|
537
|
+
last_used = Column(DateTime(timezone=True))
|
|
525
538
|
|
|
526
539
|
# Model validation
|
|
527
540
|
cross_validation_scores = Column(JSON) # Cross-validation results
|
|
@@ -564,11 +577,11 @@ class DailyCommitBatch(Base):
|
|
|
564
577
|
unique_tickets = Column(JSON) # List of ticket IDs referenced on this day
|
|
565
578
|
|
|
566
579
|
# Processing status
|
|
567
|
-
fetched_at = Column(DateTime, default=
|
|
580
|
+
fetched_at = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
568
581
|
classification_status = Column(
|
|
569
582
|
String, default="pending"
|
|
570
583
|
) # pending, processing, completed, failed
|
|
571
|
-
classified_at = Column(DateTime, nullable=True)
|
|
584
|
+
classified_at = Column(DateTime(timezone=True), nullable=True)
|
|
572
585
|
|
|
573
586
|
# Batch context for LLM classification
|
|
574
587
|
context_summary = Column(String, nullable=True) # Brief summary of day's activity
|
|
@@ -613,9 +626,9 @@ class DetailedTicketData(Base):
|
|
|
613
626
|
# People and dates
|
|
614
627
|
assignee = Column(String, nullable=True)
|
|
615
628
|
reporter = Column(String, nullable=True)
|
|
616
|
-
created_at = Column(DateTime)
|
|
617
|
-
updated_at = Column(DateTime)
|
|
618
|
-
resolved_at = Column(DateTime, nullable=True)
|
|
629
|
+
created_at = Column(DateTime(timezone=True))
|
|
630
|
+
updated_at = Column(DateTime(timezone=True))
|
|
631
|
+
resolved_at = Column(DateTime(timezone=True), nullable=True)
|
|
619
632
|
|
|
620
633
|
# Metrics for classification context
|
|
621
634
|
story_points = Column(Integer, nullable=True)
|
|
@@ -636,7 +649,7 @@ class DetailedTicketData(Base):
|
|
|
636
649
|
platform_data = Column(JSON) # Additional platform-specific fields
|
|
637
650
|
|
|
638
651
|
# Fetch metadata
|
|
639
|
-
fetched_at = Column(DateTime, default=
|
|
652
|
+
fetched_at = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
640
653
|
fetch_version = Column(String, default="2.0") # Version for schema evolution
|
|
641
654
|
|
|
642
655
|
# Indexes for efficient lookup and context building
|
|
@@ -683,8 +696,8 @@ class CommitClassificationBatch(Base):
|
|
|
683
696
|
|
|
684
697
|
# Processing results
|
|
685
698
|
processing_status = Column(String, default="pending") # pending, processing, completed, failed
|
|
686
|
-
started_at = Column(DateTime, default=
|
|
687
|
-
completed_at = Column(DateTime, nullable=True)
|
|
699
|
+
started_at = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
700
|
+
completed_at = Column(DateTime(timezone=True), nullable=True)
|
|
688
701
|
processing_time_ms = Column(Float, nullable=True)
|
|
689
702
|
|
|
690
703
|
# Quality metrics
|
|
@@ -741,7 +754,7 @@ class CommitTicketCorrelation(Base):
|
|
|
741
754
|
matching_pattern = Column(String, nullable=True) # Regex pattern that matched
|
|
742
755
|
|
|
743
756
|
# Timestamps
|
|
744
|
-
created_at = Column(DateTime, default=
|
|
757
|
+
created_at = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
745
758
|
validated = Column(Boolean, default=False) # Manual validation flag
|
|
746
759
|
|
|
747
760
|
# Indexes for efficient correlation lookup
|
|
@@ -801,8 +814,8 @@ class DailyMetrics(Base):
|
|
|
801
814
|
complex_commits = Column(Integer, default=0) # Commits with >5 files changed
|
|
802
815
|
|
|
803
816
|
# Metadata
|
|
804
|
-
created_at = Column(DateTime, default=
|
|
805
|
-
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=
|
|
817
|
+
created_at = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
818
|
+
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=utcnow_tz_aware)
|
|
806
819
|
|
|
807
820
|
# Indexes for efficient querying
|
|
808
821
|
__table_args__ = (
|
|
@@ -846,7 +859,7 @@ class WeeklyTrends(Base):
|
|
|
846
859
|
avg_commits_per_day = Column(Float, default=0.0)
|
|
847
860
|
|
|
848
861
|
# Metadata
|
|
849
|
-
calculated_at = Column(DateTime, default=
|
|
862
|
+
calculated_at = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
850
863
|
|
|
851
864
|
# Indexes for trend queries
|
|
852
865
|
__table_args__ = (
|
|
@@ -856,9 +869,30 @@ class WeeklyTrends(Base):
|
|
|
856
869
|
)
|
|
857
870
|
|
|
858
871
|
|
|
872
|
+
class SchemaVersion(Base):
|
|
873
|
+
"""Track database schema versions for automatic migrations.
|
|
874
|
+
|
|
875
|
+
WHY: Schema changes (like timezone-aware timestamps) require migration
|
|
876
|
+
to ensure old cache databases work correctly without user intervention.
|
|
877
|
+
This table tracks the current schema version to trigger automatic upgrades.
|
|
878
|
+
"""
|
|
879
|
+
|
|
880
|
+
__tablename__ = "schema_version"
|
|
881
|
+
|
|
882
|
+
id = Column(Integer, primary_key=True)
|
|
883
|
+
version = Column(String, nullable=False) # e.g., "2.0"
|
|
884
|
+
upgraded_at = Column(DateTime(timezone=True), default=utcnow_tz_aware)
|
|
885
|
+
previous_version = Column(String, nullable=True)
|
|
886
|
+
migration_notes = Column(String, nullable=True)
|
|
887
|
+
|
|
888
|
+
|
|
859
889
|
class Database:
|
|
860
890
|
"""Database connection manager with robust permission handling."""
|
|
861
891
|
|
|
892
|
+
# Schema version constants
|
|
893
|
+
CURRENT_SCHEMA_VERSION = "2.0" # Timezone-aware timestamps
|
|
894
|
+
LEGACY_SCHEMA_VERSION = "1.0" # Timezone-naive timestamps
|
|
895
|
+
|
|
862
896
|
def __init__(self, db_path: Path):
|
|
863
897
|
"""
|
|
864
898
|
Initialize database connection with proper error handling.
|
|
@@ -936,10 +970,21 @@ class Database:
|
|
|
936
970
|
},
|
|
937
971
|
)
|
|
938
972
|
|
|
939
|
-
#
|
|
940
|
-
Base.metadata.create_all(self.engine)
|
|
973
|
+
# Check schema version BEFORE creating tables to detect legacy databases
|
|
941
974
|
self.SessionLocal = sessionmaker(bind=self.engine)
|
|
942
|
-
|
|
975
|
+
needs_migration = self._check_schema_version_before_create()
|
|
976
|
+
|
|
977
|
+
# Create/update tables
|
|
978
|
+
Base.metadata.create_all(self.engine)
|
|
979
|
+
|
|
980
|
+
# Perform migration if needed (after tables are created/updated)
|
|
981
|
+
if needs_migration:
|
|
982
|
+
self._perform_schema_migration()
|
|
983
|
+
else:
|
|
984
|
+
# No migration needed - record current schema version if not already recorded
|
|
985
|
+
self._ensure_schema_version_recorded()
|
|
986
|
+
|
|
987
|
+
# Apply other migrations for existing databases
|
|
943
988
|
self._apply_migrations()
|
|
944
989
|
|
|
945
990
|
# Test that we can actually write to the database
|
|
@@ -975,9 +1020,21 @@ class Database:
|
|
|
975
1020
|
},
|
|
976
1021
|
)
|
|
977
1022
|
|
|
978
|
-
|
|
1023
|
+
# Check schema version BEFORE creating tables to detect legacy databases
|
|
979
1024
|
self.SessionLocal = sessionmaker(bind=self.engine)
|
|
980
|
-
|
|
1025
|
+
needs_migration = self._check_schema_version_before_create()
|
|
1026
|
+
|
|
1027
|
+
# Create/update tables
|
|
1028
|
+
Base.metadata.create_all(self.engine)
|
|
1029
|
+
|
|
1030
|
+
# Perform migration if needed (after tables are created/updated)
|
|
1031
|
+
if needs_migration:
|
|
1032
|
+
self._perform_schema_migration()
|
|
1033
|
+
else:
|
|
1034
|
+
# No migration needed - record current schema version if not already recorded
|
|
1035
|
+
self._ensure_schema_version_recorded()
|
|
1036
|
+
|
|
1037
|
+
# Apply other migrations for existing databases
|
|
981
1038
|
self._apply_migrations()
|
|
982
1039
|
|
|
983
1040
|
# Test write capability
|
|
@@ -1010,9 +1067,21 @@ class Database:
|
|
|
1010
1067
|
"sqlite:///:memory:", connect_args={"check_same_thread": False}
|
|
1011
1068
|
)
|
|
1012
1069
|
|
|
1013
|
-
|
|
1070
|
+
# Check schema version BEFORE creating tables to detect legacy databases
|
|
1014
1071
|
self.SessionLocal = sessionmaker(bind=self.engine)
|
|
1015
|
-
|
|
1072
|
+
needs_migration = self._check_schema_version_before_create()
|
|
1073
|
+
|
|
1074
|
+
# Create/update tables
|
|
1075
|
+
Base.metadata.create_all(self.engine)
|
|
1076
|
+
|
|
1077
|
+
# Perform migration if needed (after tables are created/updated)
|
|
1078
|
+
if needs_migration:
|
|
1079
|
+
self._perform_schema_migration()
|
|
1080
|
+
else:
|
|
1081
|
+
# No migration needed - record current schema version if not already recorded
|
|
1082
|
+
self._ensure_schema_version_recorded()
|
|
1083
|
+
|
|
1084
|
+
# Apply other migrations for existing databases
|
|
1016
1085
|
self._apply_migrations()
|
|
1017
1086
|
|
|
1018
1087
|
self.is_readonly_fallback = True
|
|
@@ -1104,9 +1173,174 @@ class Database:
|
|
|
1104
1173
|
|
|
1105
1174
|
def init_db(self) -> None:
|
|
1106
1175
|
"""Initialize database tables and apply migrations."""
|
|
1176
|
+
needs_migration = self._check_schema_version_before_create()
|
|
1107
1177
|
Base.metadata.create_all(self.engine)
|
|
1178
|
+
if needs_migration:
|
|
1179
|
+
self._perform_schema_migration()
|
|
1180
|
+
else:
|
|
1181
|
+
self._ensure_schema_version_recorded()
|
|
1108
1182
|
self._apply_migrations()
|
|
1109
1183
|
|
|
1184
|
+
def _check_schema_version_before_create(self) -> bool:
|
|
1185
|
+
"""Check if database needs migration BEFORE create_all is called.
|
|
1186
|
+
|
|
1187
|
+
WHY: We need to check for legacy databases BEFORE creating new tables,
|
|
1188
|
+
otherwise we can't distinguish between a fresh database and a legacy one.
|
|
1189
|
+
|
|
1190
|
+
Returns:
|
|
1191
|
+
True if migration is needed, False otherwise
|
|
1192
|
+
"""
|
|
1193
|
+
try:
|
|
1194
|
+
with self.engine.connect() as conn:
|
|
1195
|
+
# Check if schema_version table exists
|
|
1196
|
+
result = conn.execute(
|
|
1197
|
+
text(
|
|
1198
|
+
"SELECT name FROM sqlite_master WHERE type='table' AND name='schema_version'"
|
|
1199
|
+
)
|
|
1200
|
+
)
|
|
1201
|
+
schema_table_exists = result.fetchone() is not None
|
|
1202
|
+
|
|
1203
|
+
if schema_table_exists:
|
|
1204
|
+
# Check current version
|
|
1205
|
+
result = conn.execute(
|
|
1206
|
+
text("SELECT version FROM schema_version ORDER BY id DESC LIMIT 1")
|
|
1207
|
+
)
|
|
1208
|
+
row = result.fetchone()
|
|
1209
|
+
|
|
1210
|
+
if row and row[0] != self.CURRENT_SCHEMA_VERSION:
|
|
1211
|
+
# Version mismatch - needs migration
|
|
1212
|
+
logger.warning(
|
|
1213
|
+
f"⚠️ Schema version mismatch: {row[0]} → {self.CURRENT_SCHEMA_VERSION}"
|
|
1214
|
+
)
|
|
1215
|
+
return True
|
|
1216
|
+
# else: Already at current version or no version record yet
|
|
1217
|
+
return False
|
|
1218
|
+
else:
|
|
1219
|
+
# No schema_version table - check if this is legacy or new
|
|
1220
|
+
result = conn.execute(
|
|
1221
|
+
text(
|
|
1222
|
+
"SELECT name FROM sqlite_master WHERE type='table' AND name='cached_commits'"
|
|
1223
|
+
)
|
|
1224
|
+
)
|
|
1225
|
+
has_cached_commits = result.fetchone() is not None
|
|
1226
|
+
|
|
1227
|
+
if has_cached_commits:
|
|
1228
|
+
# Check if table has data
|
|
1229
|
+
result = conn.execute(text("SELECT COUNT(*) FROM cached_commits"))
|
|
1230
|
+
commit_count = result.fetchone()[0]
|
|
1231
|
+
|
|
1232
|
+
if commit_count > 0:
|
|
1233
|
+
# Legacy database with data - needs migration
|
|
1234
|
+
logger.warning("⚠️ Old cache schema detected (v1.0 → v2.0)")
|
|
1235
|
+
logger.info(" This is a one-time operation due to timezone fix")
|
|
1236
|
+
return True
|
|
1237
|
+
|
|
1238
|
+
# New database or empty legacy database - no migration needed
|
|
1239
|
+
return False
|
|
1240
|
+
|
|
1241
|
+
except Exception as e:
|
|
1242
|
+
# Don't fail initialization due to schema check issues
|
|
1243
|
+
logger.debug(f"Schema version check failed: {e}")
|
|
1244
|
+
return False
|
|
1245
|
+
|
|
1246
|
+
def _perform_schema_migration(self) -> None:
|
|
1247
|
+
"""Perform the actual schema migration after tables are created.
|
|
1248
|
+
|
|
1249
|
+
WHY: Separating migration from detection allows us to update table schemas
|
|
1250
|
+
via create_all before clearing/migrating data.
|
|
1251
|
+
"""
|
|
1252
|
+
try:
|
|
1253
|
+
with self.engine.connect() as conn:
|
|
1254
|
+
logger.info("🔄 Automatically upgrading cache database...")
|
|
1255
|
+
logger.info(" Clearing old cache data (timezone schema incompatible)...")
|
|
1256
|
+
|
|
1257
|
+
# Clear cached data tables
|
|
1258
|
+
conn.execute(text("DELETE FROM cached_commits"))
|
|
1259
|
+
conn.execute(text("DELETE FROM pull_request_cache"))
|
|
1260
|
+
conn.execute(text("DELETE FROM issue_cache"))
|
|
1261
|
+
conn.execute(text("DELETE FROM repository_analysis_status"))
|
|
1262
|
+
|
|
1263
|
+
# Also clear qualitative analysis data if it exists
|
|
1264
|
+
try:
|
|
1265
|
+
conn.execute(text("DELETE FROM qualitative_commits"))
|
|
1266
|
+
conn.execute(text("DELETE FROM pattern_cache"))
|
|
1267
|
+
except Exception:
|
|
1268
|
+
# These tables might not exist in all databases
|
|
1269
|
+
pass
|
|
1270
|
+
|
|
1271
|
+
conn.commit()
|
|
1272
|
+
|
|
1273
|
+
# Record the schema upgrade
|
|
1274
|
+
self._record_schema_version(
|
|
1275
|
+
conn,
|
|
1276
|
+
self.CURRENT_SCHEMA_VERSION,
|
|
1277
|
+
self.LEGACY_SCHEMA_VERSION,
|
|
1278
|
+
"Migrated to timezone-aware timestamps (v2.0)",
|
|
1279
|
+
)
|
|
1280
|
+
|
|
1281
|
+
logger.info(" Migration complete - cache will be rebuilt on next analysis")
|
|
1282
|
+
logger.info("✅ Cache database upgraded successfully")
|
|
1283
|
+
|
|
1284
|
+
except Exception as e:
|
|
1285
|
+
logger.error(f"Migration failed: {e}")
|
|
1286
|
+
# Don't raise - let the system continue and rebuild cache from scratch
|
|
1287
|
+
|
|
1288
|
+
def _ensure_schema_version_recorded(self) -> None:
|
|
1289
|
+
"""Ensure schema version is recorded for databases that didn't need migration.
|
|
1290
|
+
|
|
1291
|
+
WHY: Fresh databases and already-migrated databases need to have their
|
|
1292
|
+
schema version recorded for future migration detection.
|
|
1293
|
+
"""
|
|
1294
|
+
try:
|
|
1295
|
+
with self.engine.connect() as conn:
|
|
1296
|
+
# Check if version is already recorded
|
|
1297
|
+
result = conn.execute(text("SELECT COUNT(*) FROM schema_version"))
|
|
1298
|
+
count = result.fetchone()[0]
|
|
1299
|
+
|
|
1300
|
+
if count == 0:
|
|
1301
|
+
# No version recorded - this is a fresh database
|
|
1302
|
+
self._record_schema_version(
|
|
1303
|
+
conn, self.CURRENT_SCHEMA_VERSION, None, "Initial schema creation"
|
|
1304
|
+
)
|
|
1305
|
+
logger.debug(f"Recorded initial schema version: {self.CURRENT_SCHEMA_VERSION}")
|
|
1306
|
+
|
|
1307
|
+
except Exception as e:
|
|
1308
|
+
# Don't fail if we can't record version
|
|
1309
|
+
logger.debug(f"Could not ensure schema version recorded: {e}")
|
|
1310
|
+
|
|
1311
|
+
def _record_schema_version(
|
|
1312
|
+
self, conn, version: str, previous_version: Optional[str], notes: Optional[str]
|
|
1313
|
+
) -> None:
|
|
1314
|
+
"""Record schema version in the database.
|
|
1315
|
+
|
|
1316
|
+
Args:
|
|
1317
|
+
conn: Database connection
|
|
1318
|
+
version: New schema version
|
|
1319
|
+
previous_version: Previous schema version (None for initial)
|
|
1320
|
+
notes: Migration notes
|
|
1321
|
+
"""
|
|
1322
|
+
try:
|
|
1323
|
+
from datetime import datetime, timezone
|
|
1324
|
+
|
|
1325
|
+
# Insert new schema version record
|
|
1326
|
+
conn.execute(
|
|
1327
|
+
text(
|
|
1328
|
+
"""
|
|
1329
|
+
INSERT INTO schema_version (version, upgraded_at, previous_version, migration_notes)
|
|
1330
|
+
VALUES (:version, :upgraded_at, :previous_version, :notes)
|
|
1331
|
+
"""
|
|
1332
|
+
),
|
|
1333
|
+
{
|
|
1334
|
+
"version": version,
|
|
1335
|
+
"upgraded_at": datetime.now(timezone.utc),
|
|
1336
|
+
"previous_version": previous_version,
|
|
1337
|
+
"notes": notes,
|
|
1338
|
+
},
|
|
1339
|
+
)
|
|
1340
|
+
conn.commit()
|
|
1341
|
+
except Exception as e:
|
|
1342
|
+
logger.debug(f"Could not record schema version: {e}")
|
|
1343
|
+
|
|
1110
1344
|
def _apply_migrations(self) -> None:
|
|
1111
1345
|
"""Apply database migrations for backward compatibility.
|
|
1112
1346
|
|