gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4158 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +905 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +444 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1285 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.11.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.11.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0
|
@@ -1,12 +1,30 @@
|
|
|
1
1
|
"""Database models for GitFlow Analytics using SQLAlchemy."""
|
|
2
2
|
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import tempfile
|
|
3
6
|
from datetime import datetime
|
|
4
7
|
from pathlib import Path
|
|
5
|
-
|
|
6
|
-
from sqlalchemy import JSON, Boolean, Column, DateTime, Float, ForeignKey, Index, Integer, String, create_engine
|
|
7
|
-
from sqlalchemy.orm import Session, sessionmaker, declarative_base
|
|
8
8
|
from typing import Any
|
|
9
9
|
|
|
10
|
+
from sqlalchemy import (
|
|
11
|
+
JSON,
|
|
12
|
+
Boolean,
|
|
13
|
+
Column,
|
|
14
|
+
DateTime,
|
|
15
|
+
Float,
|
|
16
|
+
ForeignKey,
|
|
17
|
+
Index,
|
|
18
|
+
Integer,
|
|
19
|
+
String,
|
|
20
|
+
create_engine,
|
|
21
|
+
text,
|
|
22
|
+
)
|
|
23
|
+
from sqlalchemy.exc import OperationalError
|
|
24
|
+
from sqlalchemy.orm import Session, declarative_base, sessionmaker
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
10
28
|
Base: Any = declarative_base()
|
|
11
29
|
|
|
12
30
|
|
|
@@ -164,140 +182,912 @@ class IssueCache(Base):
|
|
|
164
182
|
|
|
165
183
|
class QualitativeCommitData(Base):
|
|
166
184
|
"""Extended commit data with qualitative analysis results.
|
|
167
|
-
|
|
185
|
+
|
|
168
186
|
This table stores the results of qualitative analysis performed on commits,
|
|
169
187
|
including change type classification, domain analysis, risk assessment,
|
|
170
188
|
and processing metadata.
|
|
171
189
|
"""
|
|
172
|
-
|
|
173
|
-
|
|
190
|
+
|
|
191
|
+
__tablename__ = "qualitative_commits"
|
|
192
|
+
|
|
174
193
|
# Link to existing commit
|
|
175
|
-
commit_id = Column(Integer, ForeignKey(
|
|
176
|
-
|
|
194
|
+
commit_id = Column(Integer, ForeignKey("cached_commits.id"), primary_key=True)
|
|
195
|
+
|
|
177
196
|
# Classification results
|
|
178
197
|
change_type = Column(String, nullable=False)
|
|
179
198
|
change_type_confidence = Column(Float, nullable=False)
|
|
180
|
-
business_domain = Column(String, nullable=False)
|
|
199
|
+
business_domain = Column(String, nullable=False)
|
|
181
200
|
domain_confidence = Column(Float, nullable=False)
|
|
182
201
|
risk_level = Column(String, nullable=False)
|
|
183
202
|
risk_factors = Column(JSON) # List of risk factors
|
|
184
|
-
|
|
203
|
+
|
|
185
204
|
# Intent and context analysis
|
|
186
205
|
intent_signals = Column(JSON) # Intent analysis results
|
|
187
206
|
collaboration_patterns = Column(JSON) # Team interaction patterns
|
|
188
207
|
technical_context = Column(JSON) # Technical context information
|
|
189
|
-
|
|
208
|
+
|
|
190
209
|
# Processing metadata
|
|
191
210
|
processing_method = Column(String, nullable=False) # 'nlp' or 'llm'
|
|
192
211
|
processing_time_ms = Column(Float)
|
|
193
212
|
confidence_score = Column(Float, nullable=False)
|
|
194
|
-
|
|
213
|
+
|
|
195
214
|
# Timestamps
|
|
196
215
|
analyzed_at = Column(DateTime, default=datetime.utcnow)
|
|
197
216
|
analysis_version = Column(String, default="1.0")
|
|
198
|
-
|
|
217
|
+
|
|
199
218
|
# Indexes for efficient querying
|
|
200
219
|
__table_args__ = (
|
|
201
|
-
Index(
|
|
202
|
-
Index(
|
|
203
|
-
Index(
|
|
204
|
-
Index(
|
|
205
|
-
Index(
|
|
206
|
-
Index(
|
|
220
|
+
Index("idx_change_type", "change_type"),
|
|
221
|
+
Index("idx_business_domain", "business_domain"),
|
|
222
|
+
Index("idx_risk_level", "risk_level"),
|
|
223
|
+
Index("idx_qualitative_confidence", "confidence_score"),
|
|
224
|
+
Index("idx_processing_method", "processing_method"),
|
|
225
|
+
Index("idx_analyzed_at", "analyzed_at"),
|
|
207
226
|
)
|
|
208
227
|
|
|
209
228
|
|
|
210
229
|
class PatternCache(Base):
|
|
211
230
|
"""Cache for learned patterns and classifications.
|
|
212
|
-
|
|
231
|
+
|
|
213
232
|
This table stores frequently occurring patterns to avoid reprocessing
|
|
214
233
|
similar commits and to improve classification accuracy over time.
|
|
215
234
|
"""
|
|
216
|
-
|
|
217
|
-
|
|
235
|
+
|
|
236
|
+
__tablename__ = "pattern_cache"
|
|
237
|
+
|
|
218
238
|
id = Column(Integer, primary_key=True)
|
|
219
|
-
|
|
239
|
+
|
|
220
240
|
# Pattern identification
|
|
221
241
|
message_hash = Column(String, nullable=False, unique=True)
|
|
222
242
|
semantic_fingerprint = Column(String, nullable=False)
|
|
223
|
-
|
|
243
|
+
|
|
224
244
|
# Cached classification results
|
|
225
245
|
classification_result = Column(JSON, nullable=False)
|
|
226
246
|
confidence_score = Column(Float, nullable=False)
|
|
227
|
-
|
|
247
|
+
|
|
228
248
|
# Usage tracking for cache management
|
|
229
249
|
hit_count = Column(Integer, default=1)
|
|
230
250
|
last_used = Column(DateTime, default=datetime.utcnow)
|
|
231
251
|
created_at = Column(DateTime, default=datetime.utcnow)
|
|
232
|
-
|
|
252
|
+
|
|
233
253
|
# Source tracking
|
|
234
254
|
source_method = Column(String, nullable=False) # 'nlp' or 'llm'
|
|
235
255
|
source_model = Column(String) # Model/method that created this pattern
|
|
236
|
-
|
|
256
|
+
|
|
237
257
|
# Performance tracking
|
|
238
258
|
avg_processing_time_ms = Column(Float)
|
|
239
|
-
|
|
259
|
+
|
|
240
260
|
# Indexes for pattern matching and cleanup
|
|
241
261
|
__table_args__ = (
|
|
242
|
-
Index(
|
|
243
|
-
Index(
|
|
244
|
-
Index(
|
|
245
|
-
Index(
|
|
246
|
-
Index(
|
|
262
|
+
Index("idx_semantic_fingerprint", "semantic_fingerprint"),
|
|
263
|
+
Index("idx_pattern_confidence", "confidence_score"),
|
|
264
|
+
Index("idx_hit_count", "hit_count"),
|
|
265
|
+
Index("idx_last_used", "last_used"),
|
|
266
|
+
Index("idx_source_method", "source_method"),
|
|
247
267
|
)
|
|
248
268
|
|
|
249
269
|
|
|
250
270
|
class LLMUsageStats(Base):
|
|
251
271
|
"""Track LLM usage statistics for cost monitoring and optimization.
|
|
252
|
-
|
|
272
|
+
|
|
253
273
|
This table helps monitor LLM API usage, costs, and performance to
|
|
254
274
|
optimize the balance between speed, accuracy, and cost.
|
|
255
275
|
"""
|
|
256
|
-
|
|
257
|
-
|
|
276
|
+
|
|
277
|
+
__tablename__ = "llm_usage_stats"
|
|
278
|
+
|
|
258
279
|
id = Column(Integer, primary_key=True)
|
|
259
|
-
|
|
280
|
+
|
|
260
281
|
# API call metadata
|
|
261
282
|
model_name = Column(String, nullable=False)
|
|
262
|
-
api_provider = Column(String, default=
|
|
283
|
+
api_provider = Column(String, default="openrouter")
|
|
263
284
|
timestamp = Column(DateTime, default=datetime.utcnow)
|
|
264
|
-
|
|
285
|
+
|
|
265
286
|
# Usage metrics
|
|
266
287
|
input_tokens = Column(Integer, nullable=False)
|
|
267
288
|
output_tokens = Column(Integer, nullable=False)
|
|
268
289
|
processing_time_ms = Column(Float, nullable=False)
|
|
269
|
-
|
|
290
|
+
|
|
270
291
|
# Cost tracking
|
|
271
292
|
estimated_cost_usd = Column(Float)
|
|
272
293
|
cost_per_token = Column(Float)
|
|
273
|
-
|
|
294
|
+
|
|
274
295
|
# Batch information
|
|
275
296
|
batch_size = Column(Integer, default=1) # Number of commits processed
|
|
276
297
|
batch_id = Column(String) # Group related calls
|
|
277
|
-
|
|
298
|
+
|
|
278
299
|
# Quality metrics
|
|
279
300
|
avg_confidence_score = Column(Float)
|
|
280
301
|
success = Column(Boolean, default=True)
|
|
281
302
|
error_message = Column(String)
|
|
282
|
-
|
|
303
|
+
|
|
283
304
|
# Indexes for analysis and monitoring
|
|
284
305
|
__table_args__ = (
|
|
285
|
-
Index(
|
|
286
|
-
Index(
|
|
287
|
-
Index(
|
|
288
|
-
Index(
|
|
306
|
+
Index("idx_model_timestamp", "model_name", "timestamp"),
|
|
307
|
+
Index("idx_llm_timestamp", "timestamp"),
|
|
308
|
+
Index("idx_llm_batch_id", "batch_id"),
|
|
309
|
+
Index("idx_success", "success"),
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
class TrainingData(Base):
|
|
314
|
+
"""Training data for commit classification models.
|
|
315
|
+
|
|
316
|
+
This table stores labeled training examples collected from PM platforms
|
|
317
|
+
and manual annotations for training and improving classification models.
|
|
318
|
+
"""
|
|
319
|
+
|
|
320
|
+
__tablename__ = "training_data"
|
|
321
|
+
|
|
322
|
+
id = Column(Integer, primary_key=True)
|
|
323
|
+
|
|
324
|
+
# Commit identification
|
|
325
|
+
commit_hash = Column(String, nullable=False)
|
|
326
|
+
commit_message = Column(String, nullable=False)
|
|
327
|
+
files_changed = Column(JSON) # List of changed files
|
|
328
|
+
repo_path = Column(String, nullable=False)
|
|
329
|
+
|
|
330
|
+
# Classification labels
|
|
331
|
+
category = Column(String, nullable=False) # feature, bug_fix, refactor, etc.
|
|
332
|
+
confidence = Column(Float, nullable=False, default=1.0) # Label confidence (0-1)
|
|
333
|
+
|
|
334
|
+
# Source information
|
|
335
|
+
source_type = Column(String, nullable=False) # 'pm_platform', 'manual', 'inferred'
|
|
336
|
+
source_platform = Column(String) # 'jira', 'github', 'clickup', etc.
|
|
337
|
+
source_ticket_id = Column(String) # Original ticket/issue ID
|
|
338
|
+
source_ticket_type = Column(String) # Bug, Story, Task, etc.
|
|
339
|
+
|
|
340
|
+
# Training metadata
|
|
341
|
+
training_session_id = Column(String, nullable=False) # Groups related training data
|
|
342
|
+
created_at = Column(DateTime, default=datetime.utcnow)
|
|
343
|
+
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
|
344
|
+
|
|
345
|
+
# Quality assurance
|
|
346
|
+
validated = Column(Boolean, default=False) # Human validation flag
|
|
347
|
+
validation_notes = Column(String) # Notes from validation process
|
|
348
|
+
|
|
349
|
+
# Feature extraction (for ML training)
|
|
350
|
+
extracted_features = Column(JSON) # Pre-computed features for ML
|
|
351
|
+
|
|
352
|
+
# Indexes for efficient querying and training
|
|
353
|
+
__table_args__ = (
|
|
354
|
+
Index("idx_training_commit_hash", "commit_hash"),
|
|
355
|
+
Index("idx_training_category", "category"),
|
|
356
|
+
Index("idx_training_source", "source_type", "source_platform"),
|
|
357
|
+
Index("idx_training_session", "training_session_id"),
|
|
358
|
+
Index("idx_training_created", "created_at"),
|
|
359
|
+
Index("idx_training_validated", "validated"),
|
|
360
|
+
Index("idx_commit_repo", "commit_hash", "repo_path", unique=True),
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
|
|
364
|
+
class RepositoryAnalysisStatus(Base):
|
|
365
|
+
"""Track repository-level analysis completion status for cache-first workflow.
|
|
366
|
+
|
|
367
|
+
WHY: This table enables "fetch once, report many" behavior by tracking
|
|
368
|
+
which repositories have been fully analyzed for specific time periods.
|
|
369
|
+
Prevents re-fetching Git data when only generating different reports.
|
|
370
|
+
"""
|
|
371
|
+
|
|
372
|
+
__tablename__ = "repository_analysis_status"
|
|
373
|
+
|
|
374
|
+
id = Column(Integer, primary_key=True)
|
|
375
|
+
|
|
376
|
+
# Repository identification
|
|
377
|
+
repo_path = Column(String, nullable=False)
|
|
378
|
+
repo_name = Column(String, nullable=False) # For display purposes
|
|
379
|
+
project_key = Column(String, nullable=False)
|
|
380
|
+
|
|
381
|
+
# Analysis period
|
|
382
|
+
analysis_start = Column(DateTime, nullable=False) # Start of analysis period
|
|
383
|
+
analysis_end = Column(DateTime, nullable=False) # End of analysis period
|
|
384
|
+
weeks_analyzed = Column(Integer, nullable=False) # Number of weeks
|
|
385
|
+
|
|
386
|
+
# Completion tracking
|
|
387
|
+
git_analysis_complete = Column(Boolean, default=False)
|
|
388
|
+
commit_count = Column(Integer, default=0)
|
|
389
|
+
pr_analysis_complete = Column(Boolean, default=False)
|
|
390
|
+
pr_count = Column(Integer, default=0)
|
|
391
|
+
ticket_analysis_complete = Column(Boolean, default=False)
|
|
392
|
+
ticket_count = Column(Integer, default=0)
|
|
393
|
+
|
|
394
|
+
# Developer identity resolution
|
|
395
|
+
identity_resolution_complete = Column(Boolean, default=False)
|
|
396
|
+
unique_developers = Column(Integer, default=0)
|
|
397
|
+
|
|
398
|
+
# Analysis metadata
|
|
399
|
+
last_updated = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
|
400
|
+
analysis_version = Column(String, default="2.0") # For tracking schema changes
|
|
401
|
+
|
|
402
|
+
# Configuration hash to detect config changes
|
|
403
|
+
config_hash = Column(String, nullable=True) # MD5 hash of relevant config
|
|
404
|
+
|
|
405
|
+
# Analysis performance metrics
|
|
406
|
+
processing_time_seconds = Column(Float, nullable=True)
|
|
407
|
+
cache_hit_rate_percent = Column(Float, nullable=True)
|
|
408
|
+
|
|
409
|
+
# Status tracking
|
|
410
|
+
status = Column(String, default="pending") # pending, in_progress, completed, failed
|
|
411
|
+
error_message = Column(String, nullable=True)
|
|
412
|
+
|
|
413
|
+
# Indexes for efficient querying
|
|
414
|
+
__table_args__ = (
|
|
415
|
+
Index("idx_repo_analysis_path", "repo_path"),
|
|
416
|
+
Index("idx_repo_analysis_period", "analysis_start", "analysis_end"),
|
|
417
|
+
Index("idx_repo_analysis_status", "status"),
|
|
418
|
+
Index(
|
|
419
|
+
"idx_repo_analysis_unique", "repo_path", "analysis_start", "analysis_end", unique=True
|
|
420
|
+
),
|
|
421
|
+
Index("idx_repo_analysis_updated", "last_updated"),
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
class TrainingSession(Base):
|
|
426
|
+
"""Training session metadata and results.
|
|
427
|
+
|
|
428
|
+
This table tracks individual training runs, their configurations,
|
|
429
|
+
and performance metrics for model versioning and comparison.
|
|
430
|
+
"""
|
|
431
|
+
|
|
432
|
+
__tablename__ = "training_sessions"
|
|
433
|
+
|
|
434
|
+
id = Column(Integer, primary_key=True)
|
|
435
|
+
session_id = Column(String, unique=True, nullable=False)
|
|
436
|
+
|
|
437
|
+
# Session metadata
|
|
438
|
+
started_at = Column(DateTime, default=datetime.utcnow)
|
|
439
|
+
completed_at = Column(DateTime)
|
|
440
|
+
status = Column(String, default="running") # running, completed, failed
|
|
441
|
+
|
|
442
|
+
# Configuration
|
|
443
|
+
config = Column(JSON, nullable=False) # Training configuration
|
|
444
|
+
weeks_analyzed = Column(Integer) # Time period covered
|
|
445
|
+
repositories = Column(JSON) # List of repositories analyzed
|
|
446
|
+
|
|
447
|
+
# Data statistics
|
|
448
|
+
total_commits = Column(Integer, default=0)
|
|
449
|
+
labeled_commits = Column(Integer, default=0)
|
|
450
|
+
training_examples = Column(Integer, default=0)
|
|
451
|
+
validation_examples = Column(Integer, default=0)
|
|
452
|
+
|
|
453
|
+
# PM platform coverage
|
|
454
|
+
pm_platforms = Column(JSON) # List of PM platforms used
|
|
455
|
+
ticket_coverage_pct = Column(Float) # Percentage of commits with tickets
|
|
456
|
+
|
|
457
|
+
# Training results
|
|
458
|
+
model_accuracy = Column(Float) # Overall accuracy
|
|
459
|
+
category_metrics = Column(JSON) # Per-category precision/recall/f1
|
|
460
|
+
validation_loss = Column(Float) # Validation loss
|
|
461
|
+
|
|
462
|
+
# Model storage
|
|
463
|
+
model_path = Column(String) # Path to saved model
|
|
464
|
+
model_version = Column(String) # Version identifier
|
|
465
|
+
model_size_mb = Column(Float) # Model file size
|
|
466
|
+
|
|
467
|
+
# Performance metrics
|
|
468
|
+
training_time_minutes = Column(Float)
|
|
469
|
+
prediction_time_ms = Column(Float) # Average prediction time
|
|
470
|
+
|
|
471
|
+
# Notes and errors
|
|
472
|
+
notes = Column(String)
|
|
473
|
+
error_message = Column(String)
|
|
474
|
+
|
|
475
|
+
# Indexes for session management
|
|
476
|
+
__table_args__ = (
|
|
477
|
+
Index("idx_session_id", "session_id"),
|
|
478
|
+
Index("idx_session_status", "status"),
|
|
479
|
+
Index("idx_session_started", "started_at"),
|
|
480
|
+
Index("idx_session_model_version", "model_version"),
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
class ClassificationModel(Base):
|
|
485
|
+
"""Versioned storage for trained classification models.
|
|
486
|
+
|
|
487
|
+
This table manages different versions of trained models with
|
|
488
|
+
metadata for model selection and performance tracking.
|
|
489
|
+
"""
|
|
490
|
+
|
|
491
|
+
__tablename__ = "classification_models"
|
|
492
|
+
|
|
493
|
+
id = Column(Integer, primary_key=True)
|
|
494
|
+
model_id = Column(String, unique=True, nullable=False)
|
|
495
|
+
|
|
496
|
+
# Model metadata
|
|
497
|
+
name = Column(String, nullable=False)
|
|
498
|
+
version = Column(String, nullable=False)
|
|
499
|
+
model_type = Column(String, nullable=False) # 'sklearn', 'spacy', 'custom'
|
|
500
|
+
created_at = Column(DateTime, default=datetime.utcnow)
|
|
501
|
+
|
|
502
|
+
# Training information
|
|
503
|
+
training_session_id = Column(String, ForeignKey("training_sessions.session_id"))
|
|
504
|
+
trained_on_commits = Column(Integer, nullable=False)
|
|
505
|
+
training_accuracy = Column(Float, nullable=False)
|
|
506
|
+
validation_accuracy = Column(Float, nullable=False)
|
|
507
|
+
|
|
508
|
+
# Model performance
|
|
509
|
+
categories = Column(JSON, nullable=False) # List of supported categories
|
|
510
|
+
performance_metrics = Column(JSON) # Detailed performance metrics
|
|
511
|
+
feature_importance = Column(JSON) # Feature importance scores
|
|
512
|
+
|
|
513
|
+
# Model storage and configuration
|
|
514
|
+
model_binary = Column(JSON) # Serialized model (for small models)
|
|
515
|
+
model_file_path = Column(String) # Path to model file (for large models)
|
|
516
|
+
model_config = Column(JSON) # Model hyperparameters and settings
|
|
517
|
+
|
|
518
|
+
# Usage tracking
|
|
519
|
+
active = Column(Boolean, default=True) # Whether model is active
|
|
520
|
+
usage_count = Column(Integer, default=0) # Number of times used
|
|
521
|
+
last_used = Column(DateTime)
|
|
522
|
+
|
|
523
|
+
# Model validation
|
|
524
|
+
cross_validation_scores = Column(JSON) # Cross-validation results
|
|
525
|
+
test_accuracy = Column(Float) # Hold-out test set accuracy
|
|
526
|
+
|
|
527
|
+
# Indexes for model management
|
|
528
|
+
__table_args__ = (
|
|
529
|
+
Index("idx_model_id", "model_id"),
|
|
530
|
+
Index("idx_model_version", "version"),
|
|
531
|
+
Index("idx_model_active", "active"),
|
|
532
|
+
Index("idx_model_accuracy", "validation_accuracy"),
|
|
533
|
+
Index("idx_model_created", "created_at"),
|
|
534
|
+
)
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
class DailyCommitBatch(Base):
|
|
538
|
+
"""Daily batches of commits organized for efficient data collection and retrieval.
|
|
539
|
+
|
|
540
|
+
WHY: This table enables the two-step fetch/analyze process by storing raw commit data
|
|
541
|
+
in daily batches with full metadata before classification. Each row represents
|
|
542
|
+
one day's worth of commits for a specific project, enabling efficient batch retrieval.
|
|
543
|
+
"""
|
|
544
|
+
|
|
545
|
+
__tablename__ = "daily_commit_batches"
|
|
546
|
+
|
|
547
|
+
# Primary key components
|
|
548
|
+
id = Column(Integer, primary_key=True)
|
|
549
|
+
date = Column(DateTime, nullable=False) # Date for the commit batch (YYYY-MM-DD)
|
|
550
|
+
project_key = Column(String, nullable=False) # Project identifier
|
|
551
|
+
repo_path = Column(String, nullable=False) # Repository path for identification
|
|
552
|
+
|
|
553
|
+
# Batch metadata
|
|
554
|
+
commit_count = Column(Integer, default=0) # Number of commits in this batch
|
|
555
|
+
total_files_changed = Column(Integer, default=0)
|
|
556
|
+
total_lines_added = Column(Integer, default=0)
|
|
557
|
+
total_lines_deleted = Column(Integer, default=0)
|
|
558
|
+
|
|
559
|
+
# Developers active on this day
|
|
560
|
+
active_developers = Column(JSON) # List of developer canonical IDs
|
|
561
|
+
unique_tickets = Column(JSON) # List of ticket IDs referenced on this day
|
|
562
|
+
|
|
563
|
+
# Processing status
|
|
564
|
+
fetched_at = Column(DateTime, default=datetime.utcnow)
|
|
565
|
+
classification_status = Column(
|
|
566
|
+
String, default="pending"
|
|
567
|
+
) # pending, processing, completed, failed
|
|
568
|
+
classified_at = Column(DateTime, nullable=True)
|
|
569
|
+
|
|
570
|
+
# Batch context for LLM classification
|
|
571
|
+
context_summary = Column(String, nullable=True) # Brief summary of day's activity
|
|
572
|
+
|
|
573
|
+
# Indexes for efficient retrieval by date range and project
|
|
574
|
+
__table_args__ = (
|
|
575
|
+
Index("idx_batch_date", "date"),
|
|
576
|
+
Index("idx_daily_batch_project", "project_key"),
|
|
577
|
+
Index("idx_batch_repo", "repo_path"),
|
|
578
|
+
Index("idx_daily_batch_status", "classification_status"),
|
|
579
|
+
Index("idx_batch_unique", "date", "project_key", "repo_path", unique=True),
|
|
580
|
+
Index("idx_batch_date_range", "date", "project_key"),
|
|
581
|
+
)
|
|
582
|
+
|
|
583
|
+
|
|
584
|
+
class DetailedTicketData(Base):
|
|
585
|
+
"""Enhanced ticket storage with full metadata for context-aware classification.
|
|
586
|
+
|
|
587
|
+
WHY: The two-step process requires full ticket context (descriptions, types, etc.)
|
|
588
|
+
to improve classification accuracy. This extends the existing IssueCache with
|
|
589
|
+
fields specifically needed for classification context.
|
|
590
|
+
"""
|
|
591
|
+
|
|
592
|
+
__tablename__ = "detailed_tickets"
|
|
593
|
+
|
|
594
|
+
id = Column(Integer, primary_key=True)
|
|
595
|
+
|
|
596
|
+
# Ticket identification (enhanced from IssueCache)
|
|
597
|
+
platform = Column(String, nullable=False) # 'jira', 'github', 'clickup', 'linear'
|
|
598
|
+
ticket_id = Column(String, nullable=False)
|
|
599
|
+
project_key = Column(String, nullable=False)
|
|
600
|
+
|
|
601
|
+
# Core ticket data
|
|
602
|
+
title = Column(String)
|
|
603
|
+
description = Column(String) # Full description for context
|
|
604
|
+
summary = Column(String) # Brief summary extracted from description
|
|
605
|
+
ticket_type = Column(String) # Bug, Story, Task, Epic, etc.
|
|
606
|
+
status = Column(String)
|
|
607
|
+
priority = Column(String)
|
|
608
|
+
labels = Column(JSON) # List of labels/tags
|
|
609
|
+
|
|
610
|
+
# People and dates
|
|
611
|
+
assignee = Column(String, nullable=True)
|
|
612
|
+
reporter = Column(String, nullable=True)
|
|
613
|
+
created_at = Column(DateTime)
|
|
614
|
+
updated_at = Column(DateTime)
|
|
615
|
+
resolved_at = Column(DateTime, nullable=True)
|
|
616
|
+
|
|
617
|
+
# Metrics for classification context
|
|
618
|
+
story_points = Column(Integer, nullable=True)
|
|
619
|
+
original_estimate = Column(String, nullable=True) # Time estimate
|
|
620
|
+
time_spent = Column(String, nullable=True)
|
|
621
|
+
|
|
622
|
+
# Relationships for context
|
|
623
|
+
epic_key = Column(String, nullable=True) # Parent epic
|
|
624
|
+
parent_key = Column(String, nullable=True) # Parent issue
|
|
625
|
+
subtasks = Column(JSON) # List of subtask keys
|
|
626
|
+
linked_issues = Column(JSON) # List of linked issue keys
|
|
627
|
+
|
|
628
|
+
# Classification hints from ticket type/labels
|
|
629
|
+
classification_hints = Column(JSON) # Extracted hints for commit classification
|
|
630
|
+
business_domain = Column(String, nullable=True) # Domain extracted from ticket
|
|
631
|
+
|
|
632
|
+
# Platform-specific data
|
|
633
|
+
platform_data = Column(JSON) # Additional platform-specific fields
|
|
634
|
+
|
|
635
|
+
# Fetch metadata
|
|
636
|
+
fetched_at = Column(DateTime, default=datetime.utcnow)
|
|
637
|
+
fetch_version = Column(String, default="2.0") # Version for schema evolution
|
|
638
|
+
|
|
639
|
+
# Indexes for efficient lookup and context building
|
|
640
|
+
__table_args__ = (
|
|
641
|
+
Index("idx_detailed_platform_ticket", "platform", "ticket_id", unique=True),
|
|
642
|
+
Index("idx_detailed_project", "project_key"),
|
|
643
|
+
Index("idx_detailed_type", "ticket_type"),
|
|
644
|
+
Index("idx_detailed_epic", "epic_key"),
|
|
645
|
+
Index("idx_detailed_created", "created_at"),
|
|
646
|
+
Index("idx_detailed_status", "status"),
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
|
|
650
|
+
class CommitClassificationBatch(Base):
|
|
651
|
+
"""Batch classification results with context and confidence tracking.
|
|
652
|
+
|
|
653
|
+
WHY: This table stores the results of batch LLM classification with full
|
|
654
|
+
context about what information was used and confidence levels achieved.
|
|
655
|
+
Enables iterative improvement and debugging of classification quality.
|
|
656
|
+
"""
|
|
657
|
+
|
|
658
|
+
__tablename__ = "classification_batches"
|
|
659
|
+
|
|
660
|
+
id = Column(Integer, primary_key=True)
|
|
661
|
+
batch_id = Column(String, unique=True, nullable=False) # UUID for this batch
|
|
662
|
+
|
|
663
|
+
# Batch context
|
|
664
|
+
project_key = Column(String, nullable=False)
|
|
665
|
+
week_start = Column(DateTime, nullable=False) # Monday of the week
|
|
666
|
+
week_end = Column(DateTime, nullable=False) # Sunday of the week
|
|
667
|
+
commit_count = Column(Integer, nullable=False)
|
|
668
|
+
|
|
669
|
+
# Context provided to LLM
|
|
670
|
+
ticket_context = Column(JSON) # Tickets included in context
|
|
671
|
+
developer_context = Column(JSON) # Active developers in this batch
|
|
672
|
+
project_context = Column(String) # Project description/domain
|
|
673
|
+
|
|
674
|
+
# LLM processing details
|
|
675
|
+
model_used = Column(String, nullable=False) # Model identifier
|
|
676
|
+
prompt_template = Column(String, nullable=False) # Template used
|
|
677
|
+
context_tokens = Column(Integer, default=0) # Tokens used for context
|
|
678
|
+
completion_tokens = Column(Integer, default=0) # Tokens in response
|
|
679
|
+
total_tokens = Column(Integer, default=0)
|
|
680
|
+
|
|
681
|
+
# Processing results
|
|
682
|
+
processing_status = Column(String, default="pending") # pending, processing, completed, failed
|
|
683
|
+
started_at = Column(DateTime, default=datetime.utcnow)
|
|
684
|
+
completed_at = Column(DateTime, nullable=True)
|
|
685
|
+
processing_time_ms = Column(Float, nullable=True)
|
|
686
|
+
|
|
687
|
+
# Quality metrics
|
|
688
|
+
avg_confidence = Column(Float, nullable=True) # Average confidence across commits
|
|
689
|
+
low_confidence_count = Column(Integer, default=0) # Commits with confidence < 0.7
|
|
690
|
+
fallback_count = Column(Integer, default=0) # Commits that fell back to rules
|
|
691
|
+
|
|
692
|
+
# Cost tracking
|
|
693
|
+
estimated_cost_usd = Column(Float, nullable=True)
|
|
694
|
+
cost_per_commit = Column(Float, nullable=True)
|
|
695
|
+
|
|
696
|
+
# Error handling
|
|
697
|
+
error_message = Column(String, nullable=True)
|
|
698
|
+
retry_count = Column(Integer, default=0)
|
|
699
|
+
|
|
700
|
+
# Indexes for batch management and analysis
|
|
701
|
+
__table_args__ = (
|
|
702
|
+
Index("idx_classification_batch_id", "batch_id"),
|
|
703
|
+
Index("idx_classification_batch_project", "project_key"),
|
|
704
|
+
Index("idx_batch_week", "week_start", "week_end"),
|
|
705
|
+
Index("idx_classification_batch_status", "processing_status"),
|
|
706
|
+
Index("idx_batch_completed", "completed_at"),
|
|
707
|
+
Index("idx_batch_model", "model_used"),
|
|
708
|
+
)
|
|
709
|
+
|
|
710
|
+
|
|
711
|
+
class CommitTicketCorrelation(Base):
|
|
712
|
+
"""Correlations between commits and tickets for context-aware classification.
|
|
713
|
+
|
|
714
|
+
WHY: This table explicitly tracks which commits reference which tickets,
|
|
715
|
+
enabling the batch classifier to include relevant ticket context when
|
|
716
|
+
classifying related commits. Improves accuracy by providing business context.
|
|
717
|
+
"""
|
|
718
|
+
|
|
719
|
+
__tablename__ = "commit_ticket_correlations"
|
|
720
|
+
|
|
721
|
+
id = Column(Integer, primary_key=True)
|
|
722
|
+
|
|
723
|
+
# Commit identification
|
|
724
|
+
commit_hash = Column(String, nullable=False)
|
|
725
|
+
repo_path = Column(String, nullable=False)
|
|
726
|
+
|
|
727
|
+
# Ticket identification
|
|
728
|
+
ticket_id = Column(String, nullable=False)
|
|
729
|
+
platform = Column(String, nullable=False)
|
|
730
|
+
project_key = Column(String, nullable=False)
|
|
731
|
+
|
|
732
|
+
# Correlation metadata
|
|
733
|
+
correlation_type = Column(String, default="direct") # direct, inferred, related
|
|
734
|
+
confidence = Column(Float, default=1.0) # Confidence in correlation
|
|
735
|
+
extracted_from = Column(String, nullable=False) # commit_message, branch_name, pr_title
|
|
736
|
+
|
|
737
|
+
# Pattern that created this correlation
|
|
738
|
+
matching_pattern = Column(String, nullable=True) # Regex pattern that matched
|
|
739
|
+
|
|
740
|
+
# Timestamps
|
|
741
|
+
created_at = Column(DateTime, default=datetime.utcnow)
|
|
742
|
+
validated = Column(Boolean, default=False) # Manual validation flag
|
|
743
|
+
|
|
744
|
+
# Indexes for efficient correlation lookup
|
|
745
|
+
__table_args__ = (
|
|
746
|
+
Index("idx_corr_commit", "commit_hash", "repo_path"),
|
|
747
|
+
Index("idx_corr_ticket", "ticket_id", "platform"),
|
|
748
|
+
Index("idx_corr_project", "project_key"),
|
|
749
|
+
Index("idx_corr_unique", "commit_hash", "repo_path", "ticket_id", "platform", unique=True),
|
|
750
|
+
)
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
class DailyMetrics(Base):
|
|
754
|
+
"""Daily activity metrics per developer per project with classification data.
|
|
755
|
+
|
|
756
|
+
WHY: This table stores daily aggregated metrics for each developer-project combination,
|
|
757
|
+
enabling quick retrieval by date range for reporting and trend analysis.
|
|
758
|
+
Each row represents one developer's activity in one project for one day.
|
|
759
|
+
"""
|
|
760
|
+
|
|
761
|
+
__tablename__ = "daily_metrics"
|
|
762
|
+
|
|
763
|
+
# Primary key components
|
|
764
|
+
id = Column(Integer, primary_key=True)
|
|
765
|
+
date = Column(DateTime, nullable=False) # Date for the metrics (YYYY-MM-DD)
|
|
766
|
+
developer_id = Column(String, nullable=False) # Canonical developer ID
|
|
767
|
+
project_key = Column(String, nullable=False) # Project identifier
|
|
768
|
+
|
|
769
|
+
# Developer information
|
|
770
|
+
developer_name = Column(String, nullable=False) # Display name for reports
|
|
771
|
+
developer_email = Column(String, nullable=False) # Primary email
|
|
772
|
+
|
|
773
|
+
# Classification counts - commit counts by category
|
|
774
|
+
feature_commits = Column(Integer, default=0)
|
|
775
|
+
bug_fix_commits = Column(Integer, default=0)
|
|
776
|
+
refactor_commits = Column(Integer, default=0)
|
|
777
|
+
documentation_commits = Column(Integer, default=0)
|
|
778
|
+
maintenance_commits = Column(Integer, default=0)
|
|
779
|
+
test_commits = Column(Integer, default=0)
|
|
780
|
+
style_commits = Column(Integer, default=0)
|
|
781
|
+
build_commits = Column(Integer, default=0)
|
|
782
|
+
other_commits = Column(Integer, default=0)
|
|
783
|
+
|
|
784
|
+
# Aggregate metrics
|
|
785
|
+
total_commits = Column(Integer, default=0)
|
|
786
|
+
files_changed = Column(Integer, default=0)
|
|
787
|
+
lines_added = Column(Integer, default=0)
|
|
788
|
+
lines_deleted = Column(Integer, default=0)
|
|
789
|
+
story_points = Column(Integer, default=0)
|
|
790
|
+
|
|
791
|
+
# Ticket tracking metrics
|
|
792
|
+
tracked_commits = Column(Integer, default=0) # Commits with ticket references
|
|
793
|
+
untracked_commits = Column(Integer, default=0) # Commits without ticket references
|
|
794
|
+
unique_tickets = Column(Integer, default=0) # Number of unique tickets referenced
|
|
795
|
+
|
|
796
|
+
# Work pattern indicators
|
|
797
|
+
merge_commits = Column(Integer, default=0)
|
|
798
|
+
complex_commits = Column(Integer, default=0) # Commits with >5 files changed
|
|
799
|
+
|
|
800
|
+
# Metadata
|
|
801
|
+
created_at = Column(DateTime, default=datetime.utcnow)
|
|
802
|
+
updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
|
|
803
|
+
|
|
804
|
+
# Indexes for efficient querying
|
|
805
|
+
__table_args__ = (
|
|
806
|
+
Index("idx_daily_date", "date"),
|
|
807
|
+
Index("idx_daily_developer", "developer_id"),
|
|
808
|
+
Index("idx_daily_project", "project_key"),
|
|
809
|
+
Index("idx_daily_date_range", "date", "developer_id", "project_key"),
|
|
810
|
+
Index("idx_daily_unique", "date", "developer_id", "project_key", unique=True),
|
|
811
|
+
)
|
|
812
|
+
|
|
813
|
+
|
|
814
|
+
class WeeklyTrends(Base):
|
|
815
|
+
"""Weekly trend analysis for developer-project combinations.
|
|
816
|
+
|
|
817
|
+
WHY: Pre-calculated weekly trends improve report performance by avoiding
|
|
818
|
+
repeated calculations. Stores week-over-week changes in activity patterns.
|
|
819
|
+
"""
|
|
820
|
+
|
|
821
|
+
__tablename__ = "weekly_trends"
|
|
822
|
+
|
|
823
|
+
id = Column(Integer, primary_key=True)
|
|
824
|
+
week_start = Column(DateTime, nullable=False) # Monday of the week
|
|
825
|
+
week_end = Column(DateTime, nullable=False) # Sunday of the week
|
|
826
|
+
developer_id = Column(String, nullable=False)
|
|
827
|
+
project_key = Column(String, nullable=False)
|
|
828
|
+
|
|
829
|
+
# Week totals
|
|
830
|
+
total_commits = Column(Integer, default=0)
|
|
831
|
+
feature_commits = Column(Integer, default=0)
|
|
832
|
+
bug_fix_commits = Column(Integer, default=0)
|
|
833
|
+
refactor_commits = Column(Integer, default=0)
|
|
834
|
+
|
|
835
|
+
# Week-over-week changes (percentage)
|
|
836
|
+
total_commits_change = Column(Float, default=0.0)
|
|
837
|
+
feature_commits_change = Column(Float, default=0.0)
|
|
838
|
+
bug_fix_commits_change = Column(Float, default=0.0)
|
|
839
|
+
refactor_commits_change = Column(Float, default=0.0)
|
|
840
|
+
|
|
841
|
+
# Activity indicators
|
|
842
|
+
days_active = Column(Integer, default=0) # Number of days with commits
|
|
843
|
+
avg_commits_per_day = Column(Float, default=0.0)
|
|
844
|
+
|
|
845
|
+
# Metadata
|
|
846
|
+
calculated_at = Column(DateTime, default=datetime.utcnow)
|
|
847
|
+
|
|
848
|
+
# Indexes for trend queries
|
|
849
|
+
__table_args__ = (
|
|
850
|
+
Index("idx_weekly_start", "week_start"),
|
|
851
|
+
Index("idx_weekly_dev_proj", "developer_id", "project_key"),
|
|
852
|
+
Index("idx_weekly_unique", "week_start", "developer_id", "project_key", unique=True),
|
|
289
853
|
)
|
|
290
854
|
|
|
291
855
|
|
|
292
856
|
class Database:
|
|
293
|
-
"""Database connection manager."""
|
|
857
|
+
"""Database connection manager with robust permission handling."""
|
|
294
858
|
|
|
295
859
|
def __init__(self, db_path: Path):
|
|
296
|
-
"""
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
860
|
+
"""
|
|
861
|
+
Initialize database connection with proper error handling.
|
|
862
|
+
|
|
863
|
+
WHY: This method handles various permission scenarios that can occur
|
|
864
|
+
in different deployment environments:
|
|
865
|
+
- Readonly filesystems (Docker containers, CI/CD)
|
|
866
|
+
- Permission denied on directory creation
|
|
867
|
+
- Database file creation failures
|
|
868
|
+
- Fallback to memory database when persistence isn't possible
|
|
869
|
+
|
|
870
|
+
DESIGN DECISION: Uses fallback mechanisms rather than failing hard,
|
|
871
|
+
allowing the application to continue running even in restricted environments.
|
|
872
|
+
|
|
873
|
+
Args:
|
|
874
|
+
db_path: Path to the SQLite database file
|
|
875
|
+
|
|
876
|
+
Raises:
|
|
877
|
+
RuntimeError: If database initialization fails completely
|
|
878
|
+
"""
|
|
879
|
+
self.db_path = db_path
|
|
880
|
+
self.is_readonly_fallback = False
|
|
881
|
+
self.engine = None
|
|
882
|
+
self.SessionLocal = None
|
|
883
|
+
|
|
884
|
+
# Try to create database with proper error handling
|
|
885
|
+
self._initialize_database()
|
|
886
|
+
|
|
887
|
+
def _initialize_database(self) -> None:
|
|
888
|
+
"""
|
|
889
|
+
Initialize database with comprehensive error handling.
|
|
890
|
+
|
|
891
|
+
WHY: Database initialization can fail for multiple reasons:
|
|
892
|
+
1. Directory doesn't exist and can't be created (permissions)
|
|
893
|
+
2. Directory exists but database file can't be created (readonly filesystem)
|
|
894
|
+
3. Database file exists but is readonly
|
|
895
|
+
4. Filesystem is completely readonly (containers, CI)
|
|
896
|
+
|
|
897
|
+
APPROACH: Try primary location first, then fallback strategies
|
|
898
|
+
"""
|
|
899
|
+
# Strategy 1: Try primary database location
|
|
900
|
+
if self._try_primary_database():
|
|
901
|
+
return
|
|
902
|
+
|
|
903
|
+
# Strategy 2: Try temp directory fallback
|
|
904
|
+
if self._try_temp_database_fallback():
|
|
905
|
+
return
|
|
906
|
+
|
|
907
|
+
# Strategy 3: Use in-memory database as last resort
|
|
908
|
+
self._use_memory_database_fallback()
|
|
909
|
+
|
|
910
|
+
def _try_primary_database(self) -> bool:
|
|
911
|
+
"""
|
|
912
|
+
Attempt to create database at the primary location.
|
|
913
|
+
|
|
914
|
+
Returns:
|
|
915
|
+
True if successful, False if fallback needed
|
|
916
|
+
"""
|
|
917
|
+
try:
|
|
918
|
+
# Check if we can create the directory
|
|
919
|
+
if not self._ensure_directory_writable(self.db_path.parent):
|
|
920
|
+
return False
|
|
921
|
+
|
|
922
|
+
# Check if database file can be created/accessed
|
|
923
|
+
if not self._ensure_database_writable(self.db_path):
|
|
924
|
+
return False
|
|
925
|
+
|
|
926
|
+
# Try to create the database
|
|
927
|
+
self.engine = create_engine(
|
|
928
|
+
f"sqlite:///{self.db_path}",
|
|
929
|
+
# Add connection args to handle locked databases better
|
|
930
|
+
connect_args={
|
|
931
|
+
"timeout": 30, # 30 second timeout for database locks
|
|
932
|
+
"check_same_thread": False, # Allow multi-threading
|
|
933
|
+
},
|
|
934
|
+
)
|
|
935
|
+
|
|
936
|
+
# Test the connection and create tables
|
|
937
|
+
Base.metadata.create_all(self.engine)
|
|
938
|
+
self.SessionLocal = sessionmaker(bind=self.engine)
|
|
939
|
+
|
|
940
|
+
# Test that we can actually write to the database
|
|
941
|
+
self._test_database_write()
|
|
942
|
+
|
|
943
|
+
logger.info(f"Database initialized successfully at: {self.db_path}")
|
|
944
|
+
return True
|
|
945
|
+
|
|
946
|
+
except (OperationalError, OSError, PermissionError) as e:
|
|
947
|
+
logger.warning(f"Failed to initialize primary database at {self.db_path}: {e}")
|
|
948
|
+
return False
|
|
949
|
+
|
|
950
|
+
def _try_temp_database_fallback(self) -> bool:
|
|
951
|
+
"""
|
|
952
|
+
Try to create database in system temp directory as fallback.
|
|
953
|
+
|
|
954
|
+
Returns:
|
|
955
|
+
True if successful, False if fallback needed
|
|
956
|
+
"""
|
|
957
|
+
try:
|
|
958
|
+
# Create a temp file that will persist for the session
|
|
959
|
+
temp_dir = Path(tempfile.gettempdir()) / "gitflow-analytics-cache"
|
|
960
|
+
temp_dir.mkdir(exist_ok=True, parents=True)
|
|
961
|
+
|
|
962
|
+
# Use the same filename but in temp directory
|
|
963
|
+
temp_db_path = temp_dir / self.db_path.name
|
|
964
|
+
|
|
965
|
+
self.engine = create_engine(
|
|
966
|
+
f"sqlite:///{temp_db_path}",
|
|
967
|
+
connect_args={
|
|
968
|
+
"timeout": 30,
|
|
969
|
+
"check_same_thread": False,
|
|
970
|
+
},
|
|
971
|
+
)
|
|
972
|
+
|
|
973
|
+
Base.metadata.create_all(self.engine)
|
|
974
|
+
self.SessionLocal = sessionmaker(bind=self.engine)
|
|
975
|
+
|
|
976
|
+
# Test write capability
|
|
977
|
+
self._test_database_write()
|
|
978
|
+
|
|
979
|
+
logger.warning(
|
|
980
|
+
f"Primary database location not writable. Using temp fallback: {temp_db_path}"
|
|
981
|
+
)
|
|
982
|
+
self.db_path = temp_db_path # Update path for reference
|
|
983
|
+
return True
|
|
984
|
+
|
|
985
|
+
except (OperationalError, OSError, PermissionError) as e:
|
|
986
|
+
logger.warning(f"Temp database fallback failed: {e}")
|
|
987
|
+
return False
|
|
988
|
+
|
|
989
|
+
def _use_memory_database_fallback(self) -> None:
|
|
990
|
+
"""
|
|
991
|
+
Use in-memory SQLite database as last resort.
|
|
992
|
+
|
|
993
|
+
This allows the application to function even in completely readonly environments,
|
|
994
|
+
but data will not persist between runs.
|
|
995
|
+
"""
|
|
996
|
+
try:
|
|
997
|
+
logger.warning(
|
|
998
|
+
"All persistent database options failed. Using in-memory database. "
|
|
999
|
+
"Data will not persist between runs."
|
|
1000
|
+
)
|
|
1001
|
+
|
|
1002
|
+
self.engine = create_engine(
|
|
1003
|
+
"sqlite:///:memory:", connect_args={"check_same_thread": False}
|
|
1004
|
+
)
|
|
1005
|
+
|
|
1006
|
+
Base.metadata.create_all(self.engine)
|
|
1007
|
+
self.SessionLocal = sessionmaker(bind=self.engine)
|
|
1008
|
+
|
|
1009
|
+
self.is_readonly_fallback = True
|
|
1010
|
+
|
|
1011
|
+
# Test that memory database works
|
|
1012
|
+
self._test_database_write()
|
|
1013
|
+
|
|
1014
|
+
except Exception as e:
|
|
1015
|
+
raise RuntimeError(
|
|
1016
|
+
f"Failed to initialize any database (including in-memory fallback): {e}. "
|
|
1017
|
+
"This may indicate a deeper system issue."
|
|
1018
|
+
) from e
|
|
1019
|
+
|
|
1020
|
+
def _ensure_directory_writable(self, directory: Path) -> bool:
|
|
1021
|
+
"""
|
|
1022
|
+
Ensure directory exists and is writable.
|
|
1023
|
+
|
|
1024
|
+
Args:
|
|
1025
|
+
directory: Directory to check/create
|
|
1026
|
+
|
|
1027
|
+
Returns:
|
|
1028
|
+
True if directory is writable, False otherwise
|
|
1029
|
+
"""
|
|
1030
|
+
try:
|
|
1031
|
+
# Create directory if it doesn't exist
|
|
1032
|
+
directory.mkdir(parents=True, exist_ok=True)
|
|
1033
|
+
|
|
1034
|
+
# Test write permissions by creating a temporary file
|
|
1035
|
+
test_file = directory / ".write_test"
|
|
1036
|
+
test_file.touch()
|
|
1037
|
+
test_file.unlink() # Clean up
|
|
1038
|
+
|
|
1039
|
+
return True
|
|
1040
|
+
|
|
1041
|
+
except (PermissionError, OSError) as e:
|
|
1042
|
+
logger.debug(f"Directory {directory} is not writable: {e}")
|
|
1043
|
+
return False
|
|
1044
|
+
|
|
1045
|
+
def _ensure_database_writable(self, db_path: Path) -> bool:
|
|
1046
|
+
"""
|
|
1047
|
+
Check if database file can be created or is writable if it exists.
|
|
1048
|
+
|
|
1049
|
+
Args:
|
|
1050
|
+
db_path: Path to the database file
|
|
1051
|
+
|
|
1052
|
+
Returns:
|
|
1053
|
+
True if database file is writable, False otherwise
|
|
1054
|
+
"""
|
|
1055
|
+
try:
|
|
1056
|
+
if db_path.exists():
|
|
1057
|
+
# Check if existing file is writable
|
|
1058
|
+
if not os.access(db_path, os.W_OK):
|
|
1059
|
+
logger.debug(f"Database file {db_path} exists but is not writable")
|
|
1060
|
+
return False
|
|
1061
|
+
else:
|
|
1062
|
+
# Test if we can create the file
|
|
1063
|
+
db_path.touch()
|
|
1064
|
+
db_path.unlink() # Clean up test file
|
|
1065
|
+
|
|
1066
|
+
return True
|
|
1067
|
+
|
|
1068
|
+
except (PermissionError, OSError) as e:
|
|
1069
|
+
logger.debug(f"Cannot create/write database file {db_path}: {e}")
|
|
1070
|
+
return False
|
|
1071
|
+
|
|
1072
|
+
def _test_database_write(self) -> None:
|
|
1073
|
+
"""
|
|
1074
|
+
Test that we can actually write to the database.
|
|
1075
|
+
|
|
1076
|
+
Raises:
|
|
1077
|
+
OperationalError: If database write test fails
|
|
1078
|
+
"""
|
|
1079
|
+
try:
|
|
1080
|
+
# Try a simple write operation to verify database is writable
|
|
1081
|
+
session = self.get_session()
|
|
1082
|
+
try:
|
|
1083
|
+
# Just test that we can begin a transaction and rollback
|
|
1084
|
+
session.execute(text("SELECT 1"))
|
|
1085
|
+
session.rollback()
|
|
1086
|
+
finally:
|
|
1087
|
+
session.close()
|
|
1088
|
+
|
|
1089
|
+
except Exception as e:
|
|
1090
|
+
raise OperationalError(f"Database write test failed: {e}", None, None) from e
|
|
301
1091
|
|
|
302
1092
|
def get_session(self) -> Session:
|
|
303
1093
|
"""Get a new database session."""
|