gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. gitflow_analytics/__init__.py +11 -11
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/classification/__init__.py +31 -0
  4. gitflow_analytics/classification/batch_classifier.py +752 -0
  5. gitflow_analytics/classification/classifier.py +464 -0
  6. gitflow_analytics/classification/feature_extractor.py +725 -0
  7. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  8. gitflow_analytics/classification/model.py +455 -0
  9. gitflow_analytics/cli.py +4490 -378
  10. gitflow_analytics/cli_rich.py +503 -0
  11. gitflow_analytics/config/__init__.py +43 -0
  12. gitflow_analytics/config/errors.py +261 -0
  13. gitflow_analytics/config/loader.py +904 -0
  14. gitflow_analytics/config/profiles.py +264 -0
  15. gitflow_analytics/config/repository.py +124 -0
  16. gitflow_analytics/config/schema.py +441 -0
  17. gitflow_analytics/config/validator.py +154 -0
  18. gitflow_analytics/config.py +44 -398
  19. gitflow_analytics/core/analyzer.py +1320 -172
  20. gitflow_analytics/core/branch_mapper.py +132 -132
  21. gitflow_analytics/core/cache.py +1554 -175
  22. gitflow_analytics/core/data_fetcher.py +1193 -0
  23. gitflow_analytics/core/identity.py +571 -185
  24. gitflow_analytics/core/metrics_storage.py +526 -0
  25. gitflow_analytics/core/progress.py +372 -0
  26. gitflow_analytics/core/schema_version.py +269 -0
  27. gitflow_analytics/extractors/base.py +13 -11
  28. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  29. gitflow_analytics/extractors/story_points.py +77 -59
  30. gitflow_analytics/extractors/tickets.py +841 -89
  31. gitflow_analytics/identity_llm/__init__.py +6 -0
  32. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  33. gitflow_analytics/identity_llm/analyzer.py +464 -0
  34. gitflow_analytics/identity_llm/models.py +76 -0
  35. gitflow_analytics/integrations/github_integration.py +258 -87
  36. gitflow_analytics/integrations/jira_integration.py +572 -123
  37. gitflow_analytics/integrations/orchestrator.py +206 -82
  38. gitflow_analytics/metrics/activity_scoring.py +322 -0
  39. gitflow_analytics/metrics/branch_health.py +470 -0
  40. gitflow_analytics/metrics/dora.py +542 -179
  41. gitflow_analytics/models/database.py +986 -59
  42. gitflow_analytics/pm_framework/__init__.py +115 -0
  43. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  44. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  45. gitflow_analytics/pm_framework/base.py +406 -0
  46. gitflow_analytics/pm_framework/models.py +211 -0
  47. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  48. gitflow_analytics/pm_framework/registry.py +333 -0
  49. gitflow_analytics/qualitative/__init__.py +29 -0
  50. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  51. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  52. gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
  53. gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
  54. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
  55. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  56. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  57. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  58. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  59. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  60. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  61. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  62. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  63. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  64. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
  65. gitflow_analytics/qualitative/core/__init__.py +13 -0
  66. gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
  67. gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
  68. gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
  69. gitflow_analytics/qualitative/core/processor.py +673 -0
  70. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  71. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  72. gitflow_analytics/qualitative/models/__init__.py +25 -0
  73. gitflow_analytics/qualitative/models/schemas.py +306 -0
  74. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  75. gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
  76. gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
  77. gitflow_analytics/qualitative/utils/metrics.py +361 -0
  78. gitflow_analytics/qualitative/utils/text_processing.py +285 -0
  79. gitflow_analytics/reports/__init__.py +100 -0
  80. gitflow_analytics/reports/analytics_writer.py +550 -18
  81. gitflow_analytics/reports/base.py +648 -0
  82. gitflow_analytics/reports/branch_health_writer.py +322 -0
  83. gitflow_analytics/reports/classification_writer.py +924 -0
  84. gitflow_analytics/reports/cli_integration.py +427 -0
  85. gitflow_analytics/reports/csv_writer.py +1700 -216
  86. gitflow_analytics/reports/data_models.py +504 -0
  87. gitflow_analytics/reports/database_report_generator.py +427 -0
  88. gitflow_analytics/reports/example_usage.py +344 -0
  89. gitflow_analytics/reports/factory.py +499 -0
  90. gitflow_analytics/reports/formatters.py +698 -0
  91. gitflow_analytics/reports/html_generator.py +1116 -0
  92. gitflow_analytics/reports/interfaces.py +489 -0
  93. gitflow_analytics/reports/json_exporter.py +2770 -0
  94. gitflow_analytics/reports/narrative_writer.py +2289 -158
  95. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  96. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  97. gitflow_analytics/training/__init__.py +5 -0
  98. gitflow_analytics/training/model_loader.py +377 -0
  99. gitflow_analytics/training/pipeline.py +550 -0
  100. gitflow_analytics/tui/__init__.py +5 -0
  101. gitflow_analytics/tui/app.py +724 -0
  102. gitflow_analytics/tui/screens/__init__.py +8 -0
  103. gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
  104. gitflow_analytics/tui/screens/configuration_screen.py +523 -0
  105. gitflow_analytics/tui/screens/loading_screen.py +348 -0
  106. gitflow_analytics/tui/screens/main_screen.py +321 -0
  107. gitflow_analytics/tui/screens/results_screen.py +722 -0
  108. gitflow_analytics/tui/widgets/__init__.py +7 -0
  109. gitflow_analytics/tui/widgets/data_table.py +255 -0
  110. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  111. gitflow_analytics/tui/widgets/progress_widget.py +187 -0
  112. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  113. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  114. gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
  115. gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
  116. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  117. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  118. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  119. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -1,24 +1,45 @@
1
1
  """Database models for GitFlow Analytics using SQLAlchemy."""
2
+
3
+ import logging
4
+ import os
5
+ import tempfile
2
6
  from datetime import datetime
3
7
  from pathlib import Path
8
+ from typing import Any
9
+
10
+ from sqlalchemy import (
11
+ JSON,
12
+ Boolean,
13
+ Column,
14
+ DateTime,
15
+ Float,
16
+ ForeignKey,
17
+ Index,
18
+ Integer,
19
+ String,
20
+ create_engine,
21
+ text,
22
+ )
23
+ from sqlalchemy.exc import OperationalError
24
+ from sqlalchemy.orm import Session, declarative_base, sessionmaker
25
+
26
+ logger = logging.getLogger(__name__)
4
27
 
5
- from sqlalchemy import JSON, Boolean, Column, DateTime, Float, Index, Integer, String, create_engine
6
- from sqlalchemy.ext.declarative import declarative_base
7
- from sqlalchemy.orm import Session, sessionmaker
28
+ Base: Any = declarative_base()
8
29
 
9
- Base = declarative_base()
10
30
 
11
31
  class CachedCommit(Base):
12
32
  """Cached commit analysis results."""
13
- __tablename__ = 'cached_commits'
14
-
33
+
34
+ __tablename__ = "cached_commits"
35
+
15
36
  # Primary key
16
37
  id = Column(Integer, primary_key=True)
17
-
38
+
18
39
  # Commit identification
19
40
  repo_path = Column(String, nullable=False)
20
41
  commit_hash = Column(String, nullable=False)
21
-
42
+
22
43
  # Commit data
23
44
  author_name = Column(String)
24
45
  author_email = Column(String)
@@ -26,108 +47,114 @@ class CachedCommit(Base):
26
47
  timestamp = Column(DateTime)
27
48
  branch = Column(String)
28
49
  is_merge = Column(Boolean, default=False)
29
-
50
+
30
51
  # Metrics
31
52
  files_changed = Column(Integer)
32
53
  insertions = Column(Integer)
33
54
  deletions = Column(Integer)
34
55
  complexity_delta = Column(Float)
35
-
56
+
36
57
  # Extracted data
37
58
  story_points = Column(Integer, nullable=True)
38
59
  ticket_references = Column(JSON) # List of ticket IDs
39
-
60
+
40
61
  # Cache metadata
41
62
  cached_at = Column(DateTime, default=datetime.utcnow)
42
63
  cache_version = Column(String, default="1.0")
43
-
64
+
44
65
  # Indexes for performance
45
66
  __table_args__ = (
46
- Index('idx_repo_commit', 'repo_path', 'commit_hash', unique=True),
47
- Index('idx_timestamp', 'timestamp'),
48
- Index('idx_cached_at', 'cached_at'),
67
+ Index("idx_repo_commit", "repo_path", "commit_hash", unique=True),
68
+ Index("idx_timestamp", "timestamp"),
69
+ Index("idx_cached_at", "cached_at"),
49
70
  )
50
71
 
72
+
51
73
  class DeveloperIdentity(Base):
52
74
  """Developer identity mappings."""
53
- __tablename__ = 'developer_identities'
54
-
75
+
76
+ __tablename__ = "developer_identities"
77
+
55
78
  id = Column(Integer, primary_key=True)
56
79
  canonical_id = Column(String, unique=True, nullable=False)
57
80
  primary_name = Column(String, nullable=False)
58
81
  primary_email = Column(String, nullable=False)
59
82
  github_username = Column(String, nullable=True)
60
-
83
+
61
84
  # Statistics
62
85
  total_commits = Column(Integer, default=0)
63
86
  total_story_points = Column(Integer, default=0)
64
87
  first_seen = Column(DateTime, default=datetime.utcnow)
65
88
  last_seen = Column(DateTime, default=datetime.utcnow)
66
-
89
+
67
90
  # Metadata
68
91
  created_at = Column(DateTime, default=datetime.utcnow)
69
92
  updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
70
-
93
+
71
94
  __table_args__ = (
72
- Index('idx_primary_email', 'primary_email'),
73
- Index('idx_canonical_id', 'canonical_id'),
95
+ Index("idx_primary_email", "primary_email"),
96
+ Index("idx_canonical_id", "canonical_id"),
74
97
  )
75
98
 
99
+
76
100
  class DeveloperAlias(Base):
77
101
  """Alternative names/emails for developers."""
78
- __tablename__ = 'developer_aliases'
79
-
102
+
103
+ __tablename__ = "developer_aliases"
104
+
80
105
  id = Column(Integer, primary_key=True)
81
106
  canonical_id = Column(String, nullable=False) # Foreign key to DeveloperIdentity
82
107
  name = Column(String, nullable=False)
83
108
  email = Column(String, nullable=False)
84
-
109
+
85
110
  __table_args__ = (
86
- Index('idx_alias_email', 'email'),
87
- Index('idx_alias_canonical_id', 'canonical_id'),
88
- Index('idx_name_email', 'name', 'email', unique=True),
111
+ Index("idx_alias_email", "email"),
112
+ Index("idx_alias_canonical_id", "canonical_id"),
113
+ Index("idx_name_email", "name", "email", unique=True),
89
114
  )
90
115
 
116
+
91
117
  class PullRequestCache(Base):
92
118
  """Cached pull request data."""
93
- __tablename__ = 'pull_request_cache'
94
-
119
+
120
+ __tablename__ = "pull_request_cache"
121
+
95
122
  id = Column(Integer, primary_key=True)
96
123
  repo_path = Column(String, nullable=False)
97
124
  pr_number = Column(Integer, nullable=False)
98
-
125
+
99
126
  # PR data
100
127
  title = Column(String)
101
128
  description = Column(String)
102
129
  author = Column(String)
103
130
  created_at = Column(DateTime)
104
131
  merged_at = Column(DateTime, nullable=True)
105
-
132
+
106
133
  # Extracted data
107
134
  story_points = Column(Integer, nullable=True)
108
135
  labels = Column(JSON) # List of labels
109
-
136
+
110
137
  # Associated commits
111
138
  commit_hashes = Column(JSON) # List of commit hashes
112
-
139
+
113
140
  # Cache metadata
114
141
  cached_at = Column(DateTime, default=datetime.utcnow)
115
-
116
- __table_args__ = (
117
- Index('idx_repo_pr', 'repo_path', 'pr_number', unique=True),
118
- )
142
+
143
+ __table_args__ = (Index("idx_repo_pr", "repo_path", "pr_number", unique=True),)
144
+
119
145
 
120
146
  class IssueCache(Base):
121
147
  """Cached issue data from various platforms."""
122
- __tablename__ = 'issue_cache'
123
-
148
+
149
+ __tablename__ = "issue_cache"
150
+
124
151
  id = Column(Integer, primary_key=True)
125
-
152
+
126
153
  # Issue identification
127
154
  platform = Column(String, nullable=False) # 'jira', 'github', 'clickup', 'linear'
128
155
  issue_id = Column(String, nullable=False)
129
156
  project_key = Column(String, nullable=False)
130
-
157
+
131
158
  # Issue data
132
159
  title = Column(String)
133
160
  description = Column(String)
@@ -136,36 +163,936 @@ class IssueCache(Base):
136
163
  created_at = Column(DateTime)
137
164
  updated_at = Column(DateTime)
138
165
  resolved_at = Column(DateTime, nullable=True)
139
-
166
+
140
167
  # Extracted data
141
168
  story_points = Column(Integer, nullable=True)
142
169
  labels = Column(JSON)
143
-
170
+
144
171
  # Platform-specific data
145
172
  platform_data = Column(JSON) # Additional platform-specific fields
146
-
173
+
147
174
  # Cache metadata
148
175
  cached_at = Column(DateTime, default=datetime.utcnow)
149
-
176
+
177
+ __table_args__ = (
178
+ Index("idx_platform_issue", "platform", "issue_id", unique=True),
179
+ Index("idx_project_key", "project_key"),
180
+ )
181
+
182
+
183
+ class QualitativeCommitData(Base):
184
+ """Extended commit data with qualitative analysis results.
185
+
186
+ This table stores the results of qualitative analysis performed on commits,
187
+ including change type classification, domain analysis, risk assessment,
188
+ and processing metadata.
189
+ """
190
+
191
+ __tablename__ = "qualitative_commits"
192
+
193
+ # Link to existing commit
194
+ commit_id = Column(Integer, ForeignKey("cached_commits.id"), primary_key=True)
195
+
196
+ # Classification results
197
+ change_type = Column(String, nullable=False)
198
+ change_type_confidence = Column(Float, nullable=False)
199
+ business_domain = Column(String, nullable=False)
200
+ domain_confidence = Column(Float, nullable=False)
201
+ risk_level = Column(String, nullable=False)
202
+ risk_factors = Column(JSON) # List of risk factors
203
+
204
+ # Intent and context analysis
205
+ intent_signals = Column(JSON) # Intent analysis results
206
+ collaboration_patterns = Column(JSON) # Team interaction patterns
207
+ technical_context = Column(JSON) # Technical context information
208
+
209
+ # Processing metadata
210
+ processing_method = Column(String, nullable=False) # 'nlp' or 'llm'
211
+ processing_time_ms = Column(Float)
212
+ confidence_score = Column(Float, nullable=False)
213
+
214
+ # Timestamps
215
+ analyzed_at = Column(DateTime, default=datetime.utcnow)
216
+ analysis_version = Column(String, default="1.0")
217
+
218
+ # Indexes for efficient querying
219
+ __table_args__ = (
220
+ Index("idx_change_type", "change_type"),
221
+ Index("idx_business_domain", "business_domain"),
222
+ Index("idx_risk_level", "risk_level"),
223
+ Index("idx_qualitative_confidence", "confidence_score"),
224
+ Index("idx_processing_method", "processing_method"),
225
+ Index("idx_analyzed_at", "analyzed_at"),
226
+ )
227
+
228
+
229
+ class PatternCache(Base):
230
+ """Cache for learned patterns and classifications.
231
+
232
+ This table stores frequently occurring patterns to avoid reprocessing
233
+ similar commits and to improve classification accuracy over time.
234
+ """
235
+
236
+ __tablename__ = "pattern_cache"
237
+
238
+ id = Column(Integer, primary_key=True)
239
+
240
+ # Pattern identification
241
+ message_hash = Column(String, nullable=False, unique=True)
242
+ semantic_fingerprint = Column(String, nullable=False)
243
+
244
+ # Cached classification results
245
+ classification_result = Column(JSON, nullable=False)
246
+ confidence_score = Column(Float, nullable=False)
247
+
248
+ # Usage tracking for cache management
249
+ hit_count = Column(Integer, default=1)
250
+ last_used = Column(DateTime, default=datetime.utcnow)
251
+ created_at = Column(DateTime, default=datetime.utcnow)
252
+
253
+ # Source tracking
254
+ source_method = Column(String, nullable=False) # 'nlp' or 'llm'
255
+ source_model = Column(String) # Model/method that created this pattern
256
+
257
+ # Performance tracking
258
+ avg_processing_time_ms = Column(Float)
259
+
260
+ # Indexes for pattern matching and cleanup
261
+ __table_args__ = (
262
+ Index("idx_semantic_fingerprint", "semantic_fingerprint"),
263
+ Index("idx_pattern_confidence", "confidence_score"),
264
+ Index("idx_hit_count", "hit_count"),
265
+ Index("idx_last_used", "last_used"),
266
+ Index("idx_source_method", "source_method"),
267
+ )
268
+
269
+
270
+ class LLMUsageStats(Base):
271
+ """Track LLM usage statistics for cost monitoring and optimization.
272
+
273
+ This table helps monitor LLM API usage, costs, and performance to
274
+ optimize the balance between speed, accuracy, and cost.
275
+ """
276
+
277
+ __tablename__ = "llm_usage_stats"
278
+
279
+ id = Column(Integer, primary_key=True)
280
+
281
+ # API call metadata
282
+ model_name = Column(String, nullable=False)
283
+ api_provider = Column(String, default="openrouter")
284
+ timestamp = Column(DateTime, default=datetime.utcnow)
285
+
286
+ # Usage metrics
287
+ input_tokens = Column(Integer, nullable=False)
288
+ output_tokens = Column(Integer, nullable=False)
289
+ processing_time_ms = Column(Float, nullable=False)
290
+
291
+ # Cost tracking
292
+ estimated_cost_usd = Column(Float)
293
+ cost_per_token = Column(Float)
294
+
295
+ # Batch information
296
+ batch_size = Column(Integer, default=1) # Number of commits processed
297
+ batch_id = Column(String) # Group related calls
298
+
299
+ # Quality metrics
300
+ avg_confidence_score = Column(Float)
301
+ success = Column(Boolean, default=True)
302
+ error_message = Column(String)
303
+
304
+ # Indexes for analysis and monitoring
305
+ __table_args__ = (
306
+ Index("idx_model_timestamp", "model_name", "timestamp"),
307
+ Index("idx_llm_timestamp", "timestamp"),
308
+ Index("idx_llm_batch_id", "batch_id"),
309
+ Index("idx_success", "success"),
310
+ )
311
+
312
+
313
+ class TrainingData(Base):
314
+ """Training data for commit classification models.
315
+
316
+ This table stores labeled training examples collected from PM platforms
317
+ and manual annotations for training and improving classification models.
318
+ """
319
+
320
+ __tablename__ = "training_data"
321
+
322
+ id = Column(Integer, primary_key=True)
323
+
324
+ # Commit identification
325
+ commit_hash = Column(String, nullable=False)
326
+ commit_message = Column(String, nullable=False)
327
+ files_changed = Column(JSON) # List of changed files
328
+ repo_path = Column(String, nullable=False)
329
+
330
+ # Classification labels
331
+ category = Column(String, nullable=False) # feature, bug_fix, refactor, etc.
332
+ confidence = Column(Float, nullable=False, default=1.0) # Label confidence (0-1)
333
+
334
+ # Source information
335
+ source_type = Column(String, nullable=False) # 'pm_platform', 'manual', 'inferred'
336
+ source_platform = Column(String) # 'jira', 'github', 'clickup', etc.
337
+ source_ticket_id = Column(String) # Original ticket/issue ID
338
+ source_ticket_type = Column(String) # Bug, Story, Task, etc.
339
+
340
+ # Training metadata
341
+ training_session_id = Column(String, nullable=False) # Groups related training data
342
+ created_at = Column(DateTime, default=datetime.utcnow)
343
+ updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
344
+
345
+ # Quality assurance
346
+ validated = Column(Boolean, default=False) # Human validation flag
347
+ validation_notes = Column(String) # Notes from validation process
348
+
349
+ # Feature extraction (for ML training)
350
+ extracted_features = Column(JSON) # Pre-computed features for ML
351
+
352
+ # Indexes for efficient querying and training
353
+ __table_args__ = (
354
+ Index("idx_training_commit_hash", "commit_hash"),
355
+ Index("idx_training_category", "category"),
356
+ Index("idx_training_source", "source_type", "source_platform"),
357
+ Index("idx_training_session", "training_session_id"),
358
+ Index("idx_training_created", "created_at"),
359
+ Index("idx_training_validated", "validated"),
360
+ Index("idx_commit_repo", "commit_hash", "repo_path", unique=True),
361
+ )
362
+
363
+
364
+ class RepositoryAnalysisStatus(Base):
365
+ """Track repository-level analysis completion status for cache-first workflow.
366
+
367
+ WHY: This table enables "fetch once, report many" behavior by tracking
368
+ which repositories have been fully analyzed for specific time periods.
369
+ Prevents re-fetching Git data when only generating different reports.
370
+ """
371
+
372
+ __tablename__ = "repository_analysis_status"
373
+
374
+ id = Column(Integer, primary_key=True)
375
+
376
+ # Repository identification
377
+ repo_path = Column(String, nullable=False)
378
+ repo_name = Column(String, nullable=False) # For display purposes
379
+ project_key = Column(String, nullable=False)
380
+
381
+ # Analysis period
382
+ analysis_start = Column(DateTime, nullable=False) # Start of analysis period
383
+ analysis_end = Column(DateTime, nullable=False) # End of analysis period
384
+ weeks_analyzed = Column(Integer, nullable=False) # Number of weeks
385
+
386
+ # Completion tracking
387
+ git_analysis_complete = Column(Boolean, default=False)
388
+ commit_count = Column(Integer, default=0)
389
+ pr_analysis_complete = Column(Boolean, default=False)
390
+ pr_count = Column(Integer, default=0)
391
+ ticket_analysis_complete = Column(Boolean, default=False)
392
+ ticket_count = Column(Integer, default=0)
393
+
394
+ # Developer identity resolution
395
+ identity_resolution_complete = Column(Boolean, default=False)
396
+ unique_developers = Column(Integer, default=0)
397
+
398
+ # Analysis metadata
399
+ last_updated = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
400
+ analysis_version = Column(String, default="2.0") # For tracking schema changes
401
+
402
+ # Configuration hash to detect config changes
403
+ config_hash = Column(String, nullable=True) # MD5 hash of relevant config
404
+
405
+ # Analysis performance metrics
406
+ processing_time_seconds = Column(Float, nullable=True)
407
+ cache_hit_rate_percent = Column(Float, nullable=True)
408
+
409
+ # Status tracking
410
+ status = Column(String, default="pending") # pending, in_progress, completed, failed
411
+ error_message = Column(String, nullable=True)
412
+
413
+ # Indexes for efficient querying
414
+ __table_args__ = (
415
+ Index("idx_repo_analysis_path", "repo_path"),
416
+ Index("idx_repo_analysis_period", "analysis_start", "analysis_end"),
417
+ Index("idx_repo_analysis_status", "status"),
418
+ Index(
419
+ "idx_repo_analysis_unique", "repo_path", "analysis_start", "analysis_end", unique=True
420
+ ),
421
+ Index("idx_repo_analysis_updated", "last_updated"),
422
+ )
423
+
424
+
425
+ class TrainingSession(Base):
426
+ """Training session metadata and results.
427
+
428
+ This table tracks individual training runs, their configurations,
429
+ and performance metrics for model versioning and comparison.
430
+ """
431
+
432
+ __tablename__ = "training_sessions"
433
+
434
+ id = Column(Integer, primary_key=True)
435
+ session_id = Column(String, unique=True, nullable=False)
436
+
437
+ # Session metadata
438
+ started_at = Column(DateTime, default=datetime.utcnow)
439
+ completed_at = Column(DateTime)
440
+ status = Column(String, default="running") # running, completed, failed
441
+
442
+ # Configuration
443
+ config = Column(JSON, nullable=False) # Training configuration
444
+ weeks_analyzed = Column(Integer) # Time period covered
445
+ repositories = Column(JSON) # List of repositories analyzed
446
+
447
+ # Data statistics
448
+ total_commits = Column(Integer, default=0)
449
+ labeled_commits = Column(Integer, default=0)
450
+ training_examples = Column(Integer, default=0)
451
+ validation_examples = Column(Integer, default=0)
452
+
453
+ # PM platform coverage
454
+ pm_platforms = Column(JSON) # List of PM platforms used
455
+ ticket_coverage_pct = Column(Float) # Percentage of commits with tickets
456
+
457
+ # Training results
458
+ model_accuracy = Column(Float) # Overall accuracy
459
+ category_metrics = Column(JSON) # Per-category precision/recall/f1
460
+ validation_loss = Column(Float) # Validation loss
461
+
462
+ # Model storage
463
+ model_path = Column(String) # Path to saved model
464
+ model_version = Column(String) # Version identifier
465
+ model_size_mb = Column(Float) # Model file size
466
+
467
+ # Performance metrics
468
+ training_time_minutes = Column(Float)
469
+ prediction_time_ms = Column(Float) # Average prediction time
470
+
471
+ # Notes and errors
472
+ notes = Column(String)
473
+ error_message = Column(String)
474
+
475
+ # Indexes for session management
476
+ __table_args__ = (
477
+ Index("idx_session_id", "session_id"),
478
+ Index("idx_session_status", "status"),
479
+ Index("idx_session_started", "started_at"),
480
+ Index("idx_session_model_version", "model_version"),
481
+ )
482
+
483
+
484
+ class ClassificationModel(Base):
485
+ """Versioned storage for trained classification models.
486
+
487
+ This table manages different versions of trained models with
488
+ metadata for model selection and performance tracking.
489
+ """
490
+
491
+ __tablename__ = "classification_models"
492
+
493
+ id = Column(Integer, primary_key=True)
494
+ model_id = Column(String, unique=True, nullable=False)
495
+
496
+ # Model metadata
497
+ name = Column(String, nullable=False)
498
+ version = Column(String, nullable=False)
499
+ model_type = Column(String, nullable=False) # 'sklearn', 'spacy', 'custom'
500
+ created_at = Column(DateTime, default=datetime.utcnow)
501
+
502
+ # Training information
503
+ training_session_id = Column(String, ForeignKey("training_sessions.session_id"))
504
+ trained_on_commits = Column(Integer, nullable=False)
505
+ training_accuracy = Column(Float, nullable=False)
506
+ validation_accuracy = Column(Float, nullable=False)
507
+
508
+ # Model performance
509
+ categories = Column(JSON, nullable=False) # List of supported categories
510
+ performance_metrics = Column(JSON) # Detailed performance metrics
511
+ feature_importance = Column(JSON) # Feature importance scores
512
+
513
+ # Model storage and configuration
514
+ model_binary = Column(JSON) # Serialized model (for small models)
515
+ model_file_path = Column(String) # Path to model file (for large models)
516
+ model_config = Column(JSON) # Model hyperparameters and settings
517
+
518
+ # Usage tracking
519
+ active = Column(Boolean, default=True) # Whether model is active
520
+ usage_count = Column(Integer, default=0) # Number of times used
521
+ last_used = Column(DateTime)
522
+
523
+ # Model validation
524
+ cross_validation_scores = Column(JSON) # Cross-validation results
525
+ test_accuracy = Column(Float) # Hold-out test set accuracy
526
+
527
+ # Indexes for model management
528
+ __table_args__ = (
529
+ Index("idx_model_id", "model_id"),
530
+ Index("idx_model_version", "version"),
531
+ Index("idx_model_active", "active"),
532
+ Index("idx_model_accuracy", "validation_accuracy"),
533
+ Index("idx_model_created", "created_at"),
534
+ )
535
+
536
+
537
+ class DailyCommitBatch(Base):
538
+ """Daily batches of commits organized for efficient data collection and retrieval.
539
+
540
+ WHY: This table enables the two-step fetch/analyze process by storing raw commit data
541
+ in daily batches with full metadata before classification. Each row represents
542
+ one day's worth of commits for a specific project, enabling efficient batch retrieval.
543
+ """
544
+
545
+ __tablename__ = "daily_commit_batches"
546
+
547
+ # Primary key components
548
+ id = Column(Integer, primary_key=True)
549
+ date = Column(DateTime, nullable=False) # Date for the commit batch (YYYY-MM-DD)
550
+ project_key = Column(String, nullable=False) # Project identifier
551
+ repo_path = Column(String, nullable=False) # Repository path for identification
552
+
553
+ # Batch metadata
554
+ commit_count = Column(Integer, default=0) # Number of commits in this batch
555
+ total_files_changed = Column(Integer, default=0)
556
+ total_lines_added = Column(Integer, default=0)
557
+ total_lines_deleted = Column(Integer, default=0)
558
+
559
+ # Developers active on this day
560
+ active_developers = Column(JSON) # List of developer canonical IDs
561
+ unique_tickets = Column(JSON) # List of ticket IDs referenced on this day
562
+
563
+ # Processing status
564
+ fetched_at = Column(DateTime, default=datetime.utcnow)
565
+ classification_status = Column(
566
+ String, default="pending"
567
+ ) # pending, processing, completed, failed
568
+ classified_at = Column(DateTime, nullable=True)
569
+
570
+ # Batch context for LLM classification
571
+ context_summary = Column(String, nullable=True) # Brief summary of day's activity
572
+
573
+ # Indexes for efficient retrieval by date range and project
574
+ __table_args__ = (
575
+ Index("idx_batch_date", "date"),
576
+ Index("idx_daily_batch_project", "project_key"),
577
+ Index("idx_batch_repo", "repo_path"),
578
+ Index("idx_daily_batch_status", "classification_status"),
579
+ Index("idx_batch_unique", "date", "project_key", "repo_path", unique=True),
580
+ Index("idx_batch_date_range", "date", "project_key"),
581
+ )
582
+
583
+
584
+ class DetailedTicketData(Base):
585
+ """Enhanced ticket storage with full metadata for context-aware classification.
586
+
587
+ WHY: The two-step process requires full ticket context (descriptions, types, etc.)
588
+ to improve classification accuracy. This extends the existing IssueCache with
589
+ fields specifically needed for classification context.
590
+ """
591
+
592
+ __tablename__ = "detailed_tickets"
593
+
594
+ id = Column(Integer, primary_key=True)
595
+
596
+ # Ticket identification (enhanced from IssueCache)
597
+ platform = Column(String, nullable=False) # 'jira', 'github', 'clickup', 'linear'
598
+ ticket_id = Column(String, nullable=False)
599
+ project_key = Column(String, nullable=False)
600
+
601
+ # Core ticket data
602
+ title = Column(String)
603
+ description = Column(String) # Full description for context
604
+ summary = Column(String) # Brief summary extracted from description
605
+ ticket_type = Column(String) # Bug, Story, Task, Epic, etc.
606
+ status = Column(String)
607
+ priority = Column(String)
608
+ labels = Column(JSON) # List of labels/tags
609
+
610
+ # People and dates
611
+ assignee = Column(String, nullable=True)
612
+ reporter = Column(String, nullable=True)
613
+ created_at = Column(DateTime)
614
+ updated_at = Column(DateTime)
615
+ resolved_at = Column(DateTime, nullable=True)
616
+
617
+ # Metrics for classification context
618
+ story_points = Column(Integer, nullable=True)
619
+ original_estimate = Column(String, nullable=True) # Time estimate
620
+ time_spent = Column(String, nullable=True)
621
+
622
+ # Relationships for context
623
+ epic_key = Column(String, nullable=True) # Parent epic
624
+ parent_key = Column(String, nullable=True) # Parent issue
625
+ subtasks = Column(JSON) # List of subtask keys
626
+ linked_issues = Column(JSON) # List of linked issue keys
627
+
628
+ # Classification hints from ticket type/labels
629
+ classification_hints = Column(JSON) # Extracted hints for commit classification
630
+ business_domain = Column(String, nullable=True) # Domain extracted from ticket
631
+
632
+ # Platform-specific data
633
+ platform_data = Column(JSON) # Additional platform-specific fields
634
+
635
+ # Fetch metadata
636
+ fetched_at = Column(DateTime, default=datetime.utcnow)
637
+ fetch_version = Column(String, default="2.0") # Version for schema evolution
638
+
639
+ # Indexes for efficient lookup and context building
640
+ __table_args__ = (
641
+ Index("idx_detailed_platform_ticket", "platform", "ticket_id", unique=True),
642
+ Index("idx_detailed_project", "project_key"),
643
+ Index("idx_detailed_type", "ticket_type"),
644
+ Index("idx_detailed_epic", "epic_key"),
645
+ Index("idx_detailed_created", "created_at"),
646
+ Index("idx_detailed_status", "status"),
647
+ )
648
+
649
+
650
+ class CommitClassificationBatch(Base):
651
+ """Batch classification results with context and confidence tracking.
652
+
653
+ WHY: This table stores the results of batch LLM classification with full
654
+ context about what information was used and confidence levels achieved.
655
+ Enables iterative improvement and debugging of classification quality.
656
+ """
657
+
658
+ __tablename__ = "classification_batches"
659
+
660
+ id = Column(Integer, primary_key=True)
661
+ batch_id = Column(String, unique=True, nullable=False) # UUID for this batch
662
+
663
+ # Batch context
664
+ project_key = Column(String, nullable=False)
665
+ week_start = Column(DateTime, nullable=False) # Monday of the week
666
+ week_end = Column(DateTime, nullable=False) # Sunday of the week
667
+ commit_count = Column(Integer, nullable=False)
668
+
669
+ # Context provided to LLM
670
+ ticket_context = Column(JSON) # Tickets included in context
671
+ developer_context = Column(JSON) # Active developers in this batch
672
+ project_context = Column(String) # Project description/domain
673
+
674
+ # LLM processing details
675
+ model_used = Column(String, nullable=False) # Model identifier
676
+ prompt_template = Column(String, nullable=False) # Template used
677
+ context_tokens = Column(Integer, default=0) # Tokens used for context
678
+ completion_tokens = Column(Integer, default=0) # Tokens in response
679
+ total_tokens = Column(Integer, default=0)
680
+
681
+ # Processing results
682
+ processing_status = Column(String, default="pending") # pending, processing, completed, failed
683
+ started_at = Column(DateTime, default=datetime.utcnow)
684
+ completed_at = Column(DateTime, nullable=True)
685
+ processing_time_ms = Column(Float, nullable=True)
686
+
687
+ # Quality metrics
688
+ avg_confidence = Column(Float, nullable=True) # Average confidence across commits
689
+ low_confidence_count = Column(Integer, default=0) # Commits with confidence < 0.7
690
+ fallback_count = Column(Integer, default=0) # Commits that fell back to rules
691
+
692
+ # Cost tracking
693
+ estimated_cost_usd = Column(Float, nullable=True)
694
+ cost_per_commit = Column(Float, nullable=True)
695
+
696
+ # Error handling
697
+ error_message = Column(String, nullable=True)
698
+ retry_count = Column(Integer, default=0)
699
+
700
+ # Indexes for batch management and analysis
701
+ __table_args__ = (
702
+ Index("idx_classification_batch_id", "batch_id"),
703
+ Index("idx_classification_batch_project", "project_key"),
704
+ Index("idx_batch_week", "week_start", "week_end"),
705
+ Index("idx_classification_batch_status", "processing_status"),
706
+ Index("idx_batch_completed", "completed_at"),
707
+ Index("idx_batch_model", "model_used"),
708
+ )
709
+
710
+
711
+ class CommitTicketCorrelation(Base):
712
+ """Correlations between commits and tickets for context-aware classification.
713
+
714
+ WHY: This table explicitly tracks which commits reference which tickets,
715
+ enabling the batch classifier to include relevant ticket context when
716
+ classifying related commits. Improves accuracy by providing business context.
717
+ """
718
+
719
+ __tablename__ = "commit_ticket_correlations"
720
+
721
+ id = Column(Integer, primary_key=True)
722
+
723
+ # Commit identification
724
+ commit_hash = Column(String, nullable=False)
725
+ repo_path = Column(String, nullable=False)
726
+
727
+ # Ticket identification
728
+ ticket_id = Column(String, nullable=False)
729
+ platform = Column(String, nullable=False)
730
+ project_key = Column(String, nullable=False)
731
+
732
+ # Correlation metadata
733
+ correlation_type = Column(String, default="direct") # direct, inferred, related
734
+ confidence = Column(Float, default=1.0) # Confidence in correlation
735
+ extracted_from = Column(String, nullable=False) # commit_message, branch_name, pr_title
736
+
737
+ # Pattern that created this correlation
738
+ matching_pattern = Column(String, nullable=True) # Regex pattern that matched
739
+
740
+ # Timestamps
741
+ created_at = Column(DateTime, default=datetime.utcnow)
742
+ validated = Column(Boolean, default=False) # Manual validation flag
743
+
744
+ # Indexes for efficient correlation lookup
745
+ __table_args__ = (
746
+ Index("idx_corr_commit", "commit_hash", "repo_path"),
747
+ Index("idx_corr_ticket", "ticket_id", "platform"),
748
+ Index("idx_corr_project", "project_key"),
749
+ Index("idx_corr_unique", "commit_hash", "repo_path", "ticket_id", "platform", unique=True),
750
+ )
751
+
752
+
753
+ class DailyMetrics(Base):
754
+ """Daily activity metrics per developer per project with classification data.
755
+
756
+ WHY: This table stores daily aggregated metrics for each developer-project combination,
757
+ enabling quick retrieval by date range for reporting and trend analysis.
758
+ Each row represents one developer's activity in one project for one day.
759
+ """
760
+
761
+ __tablename__ = "daily_metrics"
762
+
763
+ # Primary key components
764
+ id = Column(Integer, primary_key=True)
765
+ date = Column(DateTime, nullable=False) # Date for the metrics (YYYY-MM-DD)
766
+ developer_id = Column(String, nullable=False) # Canonical developer ID
767
+ project_key = Column(String, nullable=False) # Project identifier
768
+
769
+ # Developer information
770
+ developer_name = Column(String, nullable=False) # Display name for reports
771
+ developer_email = Column(String, nullable=False) # Primary email
772
+
773
+ # Classification counts - commit counts by category
774
+ feature_commits = Column(Integer, default=0)
775
+ bug_fix_commits = Column(Integer, default=0)
776
+ refactor_commits = Column(Integer, default=0)
777
+ documentation_commits = Column(Integer, default=0)
778
+ maintenance_commits = Column(Integer, default=0)
779
+ test_commits = Column(Integer, default=0)
780
+ style_commits = Column(Integer, default=0)
781
+ build_commits = Column(Integer, default=0)
782
+ other_commits = Column(Integer, default=0)
783
+
784
+ # Aggregate metrics
785
+ total_commits = Column(Integer, default=0)
786
+ files_changed = Column(Integer, default=0)
787
+ lines_added = Column(Integer, default=0)
788
+ lines_deleted = Column(Integer, default=0)
789
+ story_points = Column(Integer, default=0)
790
+
791
+ # Ticket tracking metrics
792
+ tracked_commits = Column(Integer, default=0) # Commits with ticket references
793
+ untracked_commits = Column(Integer, default=0) # Commits without ticket references
794
+ unique_tickets = Column(Integer, default=0) # Number of unique tickets referenced
795
+
796
+ # Work pattern indicators
797
+ merge_commits = Column(Integer, default=0)
798
+ complex_commits = Column(Integer, default=0) # Commits with >5 files changed
799
+
800
+ # Metadata
801
+ created_at = Column(DateTime, default=datetime.utcnow)
802
+ updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
803
+
804
+ # Indexes for efficient querying
805
+ __table_args__ = (
806
+ Index("idx_daily_date", "date"),
807
+ Index("idx_daily_developer", "developer_id"),
808
+ Index("idx_daily_project", "project_key"),
809
+ Index("idx_daily_date_range", "date", "developer_id", "project_key"),
810
+ Index("idx_daily_unique", "date", "developer_id", "project_key", unique=True),
811
+ )
812
+
813
+
814
+ class WeeklyTrends(Base):
815
+ """Weekly trend analysis for developer-project combinations.
816
+
817
+ WHY: Pre-calculated weekly trends improve report performance by avoiding
818
+ repeated calculations. Stores week-over-week changes in activity patterns.
819
+ """
820
+
821
+ __tablename__ = "weekly_trends"
822
+
823
+ id = Column(Integer, primary_key=True)
824
+ week_start = Column(DateTime, nullable=False) # Monday of the week
825
+ week_end = Column(DateTime, nullable=False) # Sunday of the week
826
+ developer_id = Column(String, nullable=False)
827
+ project_key = Column(String, nullable=False)
828
+
829
+ # Week totals
830
+ total_commits = Column(Integer, default=0)
831
+ feature_commits = Column(Integer, default=0)
832
+ bug_fix_commits = Column(Integer, default=0)
833
+ refactor_commits = Column(Integer, default=0)
834
+
835
+ # Week-over-week changes (percentage)
836
+ total_commits_change = Column(Float, default=0.0)
837
+ feature_commits_change = Column(Float, default=0.0)
838
+ bug_fix_commits_change = Column(Float, default=0.0)
839
+ refactor_commits_change = Column(Float, default=0.0)
840
+
841
+ # Activity indicators
842
+ days_active = Column(Integer, default=0) # Number of days with commits
843
+ avg_commits_per_day = Column(Float, default=0.0)
844
+
845
+ # Metadata
846
+ calculated_at = Column(DateTime, default=datetime.utcnow)
847
+
848
+ # Indexes for trend queries
150
849
  __table_args__ = (
151
- Index('idx_platform_issue', 'platform', 'issue_id', unique=True),
152
- Index('idx_project_key', 'project_key'),
850
+ Index("idx_weekly_start", "week_start"),
851
+ Index("idx_weekly_dev_proj", "developer_id", "project_key"),
852
+ Index("idx_weekly_unique", "week_start", "developer_id", "project_key", unique=True),
153
853
  )
154
854
 
855
+
155
856
  class Database:
156
- """Database connection manager."""
157
-
857
+ """Database connection manager with robust permission handling."""
858
+
158
859
  def __init__(self, db_path: Path):
159
- """Initialize database connection."""
160
- db_path.parent.mkdir(parents=True, exist_ok=True)
161
- self.engine = create_engine(f'sqlite:///{db_path}')
162
- Base.metadata.create_all(self.engine)
163
- self.SessionLocal = sessionmaker(bind=self.engine)
164
-
860
+ """
861
+ Initialize database connection with proper error handling.
862
+
863
+ WHY: This method handles various permission scenarios that can occur
864
+ in different deployment environments:
865
+ - Readonly filesystems (Docker containers, CI/CD)
866
+ - Permission denied on directory creation
867
+ - Database file creation failures
868
+ - Fallback to memory database when persistence isn't possible
869
+
870
+ DESIGN DECISION: Uses fallback mechanisms rather than failing hard,
871
+ allowing the application to continue running even in restricted environments.
872
+
873
+ Args:
874
+ db_path: Path to the SQLite database file
875
+
876
+ Raises:
877
+ RuntimeError: If database initialization fails completely
878
+ """
879
+ self.db_path = db_path
880
+ self.is_readonly_fallback = False
881
+ self.engine = None
882
+ self.SessionLocal = None
883
+
884
+ # Try to create database with proper error handling
885
+ self._initialize_database()
886
+
887
+ def _initialize_database(self) -> None:
888
+ """
889
+ Initialize database with comprehensive error handling.
890
+
891
+ WHY: Database initialization can fail for multiple reasons:
892
+ 1. Directory doesn't exist and can't be created (permissions)
893
+ 2. Directory exists but database file can't be created (readonly filesystem)
894
+ 3. Database file exists but is readonly
895
+ 4. Filesystem is completely readonly (containers, CI)
896
+
897
+ APPROACH: Try primary location first, then fallback strategies
898
+ """
899
+ # Strategy 1: Try primary database location
900
+ if self._try_primary_database():
901
+ return
902
+
903
+ # Strategy 2: Try temp directory fallback
904
+ if self._try_temp_database_fallback():
905
+ return
906
+
907
+ # Strategy 3: Use in-memory database as last resort
908
+ self._use_memory_database_fallback()
909
+
910
+ def _try_primary_database(self) -> bool:
911
+ """
912
+ Attempt to create database at the primary location.
913
+
914
+ Returns:
915
+ True if successful, False if fallback needed
916
+ """
917
+ try:
918
+ # Check if we can create the directory
919
+ if not self._ensure_directory_writable(self.db_path.parent):
920
+ return False
921
+
922
+ # Check if database file can be created/accessed
923
+ if not self._ensure_database_writable(self.db_path):
924
+ return False
925
+
926
+ # Try to create the database
927
+ self.engine = create_engine(
928
+ f"sqlite:///{self.db_path}",
929
+ # Add connection args to handle locked databases better
930
+ connect_args={
931
+ "timeout": 30, # 30 second timeout for database locks
932
+ "check_same_thread": False, # Allow multi-threading
933
+ },
934
+ )
935
+
936
+ # Test the connection and create tables
937
+ Base.metadata.create_all(self.engine)
938
+ self.SessionLocal = sessionmaker(bind=self.engine)
939
+
940
+ # Test that we can actually write to the database
941
+ self._test_database_write()
942
+
943
+ logger.info(f"Database initialized successfully at: {self.db_path}")
944
+ return True
945
+
946
+ except (OperationalError, OSError, PermissionError) as e:
947
+ logger.warning(f"Failed to initialize primary database at {self.db_path}: {e}")
948
+ return False
949
+
950
+ def _try_temp_database_fallback(self) -> bool:
951
+ """
952
+ Try to create database in system temp directory as fallback.
953
+
954
+ Returns:
955
+ True if successful, False if fallback needed
956
+ """
957
+ try:
958
+ # Create a temp file that will persist for the session
959
+ temp_dir = Path(tempfile.gettempdir()) / "gitflow-analytics-cache"
960
+ temp_dir.mkdir(exist_ok=True, parents=True)
961
+
962
+ # Use the same filename but in temp directory
963
+ temp_db_path = temp_dir / self.db_path.name
964
+
965
+ self.engine = create_engine(
966
+ f"sqlite:///{temp_db_path}",
967
+ connect_args={
968
+ "timeout": 30,
969
+ "check_same_thread": False,
970
+ },
971
+ )
972
+
973
+ Base.metadata.create_all(self.engine)
974
+ self.SessionLocal = sessionmaker(bind=self.engine)
975
+
976
+ # Test write capability
977
+ self._test_database_write()
978
+
979
+ logger.warning(
980
+ f"Primary database location not writable. Using temp fallback: {temp_db_path}"
981
+ )
982
+ self.db_path = temp_db_path # Update path for reference
983
+ return True
984
+
985
+ except (OperationalError, OSError, PermissionError) as e:
986
+ logger.warning(f"Temp database fallback failed: {e}")
987
+ return False
988
+
989
+ def _use_memory_database_fallback(self) -> None:
990
+ """
991
+ Use in-memory SQLite database as last resort.
992
+
993
+ This allows the application to function even in completely readonly environments,
994
+ but data will not persist between runs.
995
+ """
996
+ try:
997
+ logger.warning(
998
+ "All persistent database options failed. Using in-memory database. "
999
+ "Data will not persist between runs."
1000
+ )
1001
+
1002
+ self.engine = create_engine(
1003
+ "sqlite:///:memory:", connect_args={"check_same_thread": False}
1004
+ )
1005
+
1006
+ Base.metadata.create_all(self.engine)
1007
+ self.SessionLocal = sessionmaker(bind=self.engine)
1008
+
1009
+ self.is_readonly_fallback = True
1010
+
1011
+ # Test that memory database works
1012
+ self._test_database_write()
1013
+
1014
+ except Exception as e:
1015
+ raise RuntimeError(
1016
+ f"Failed to initialize any database (including in-memory fallback): {e}. "
1017
+ "This may indicate a deeper system issue."
1018
+ ) from e
1019
+
1020
+ def _ensure_directory_writable(self, directory: Path) -> bool:
1021
+ """
1022
+ Ensure directory exists and is writable.
1023
+
1024
+ Args:
1025
+ directory: Directory to check/create
1026
+
1027
+ Returns:
1028
+ True if directory is writable, False otherwise
1029
+ """
1030
+ try:
1031
+ # Create directory if it doesn't exist
1032
+ directory.mkdir(parents=True, exist_ok=True)
1033
+
1034
+ # Test write permissions by creating a temporary file
1035
+ test_file = directory / ".write_test"
1036
+ test_file.touch()
1037
+ test_file.unlink() # Clean up
1038
+
1039
+ return True
1040
+
1041
+ except (PermissionError, OSError) as e:
1042
+ logger.debug(f"Directory {directory} is not writable: {e}")
1043
+ return False
1044
+
1045
+ def _ensure_database_writable(self, db_path: Path) -> bool:
1046
+ """
1047
+ Check if database file can be created or is writable if it exists.
1048
+
1049
+ Args:
1050
+ db_path: Path to the database file
1051
+
1052
+ Returns:
1053
+ True if database file is writable, False otherwise
1054
+ """
1055
+ try:
1056
+ if db_path.exists():
1057
+ # Check if existing file is writable
1058
+ if not os.access(db_path, os.W_OK):
1059
+ logger.debug(f"Database file {db_path} exists but is not writable")
1060
+ return False
1061
+ else:
1062
+ # Test if we can create the file
1063
+ db_path.touch()
1064
+ db_path.unlink() # Clean up test file
1065
+
1066
+ return True
1067
+
1068
+ except (PermissionError, OSError) as e:
1069
+ logger.debug(f"Cannot create/write database file {db_path}: {e}")
1070
+ return False
1071
+
1072
+ def _test_database_write(self) -> None:
1073
+ """
1074
+ Test that we can actually write to the database.
1075
+
1076
+ Raises:
1077
+ OperationalError: If database write test fails
1078
+ """
1079
+ try:
1080
+ # Try a simple write operation to verify database is writable
1081
+ session = self.get_session()
1082
+ try:
1083
+ # Just test that we can begin a transaction and rollback
1084
+ session.execute(text("SELECT 1"))
1085
+ session.rollback()
1086
+ finally:
1087
+ session.close()
1088
+
1089
+ except Exception as e:
1090
+ raise OperationalError(f"Database write test failed: {e}", None, None) from e
1091
+
165
1092
  def get_session(self) -> Session:
166
1093
  """Get a new database session."""
167
1094
  return self.SessionLocal()
168
-
169
- def init_db(self):
1095
+
1096
+ def init_db(self) -> None:
170
1097
  """Initialize database tables."""
171
- Base.metadata.create_all(self.engine)
1098
+ Base.metadata.create_all(self.engine)