gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4158 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +905 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +444 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1285 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.11.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.11.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,550 @@
1
+ """Training pipeline for commit classification using PM platform data."""
2
+
3
+ import logging
4
+ from datetime import datetime, timezone
5
+ from pathlib import Path
6
+ from typing import Any, Optional
7
+
8
+ import pandas as pd
9
+ from sqlalchemy import Column, DateTime, Float, ForeignKey, Integer, String, Text, create_engine
10
+ from sqlalchemy.orm import declarative_base, sessionmaker
11
+
12
+ from ..classification.classifier import CommitClassifier
13
+ from ..config import Config
14
+ from ..core.analyzer import GitAnalyzer
15
+ from ..core.cache import GitAnalysisCache
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ # Database models for training data
20
+ TrainingBase = declarative_base()
21
+
22
+
23
+ class TrainingSession(TrainingBase):
24
+ """Store training session metadata."""
25
+
26
+ __tablename__ = "training_sessions"
27
+
28
+ id = Column(String, primary_key=True)
29
+ name = Column(String)
30
+ created_at = Column(DateTime)
31
+ model_type = Column(String)
32
+ training_examples = Column(Integer)
33
+ validation_split = Column(Float)
34
+ accuracy = Column(Float)
35
+ precision = Column(Float)
36
+ recall = Column(Float)
37
+ f1_score = Column(Float)
38
+ model_path = Column(String)
39
+ config_hash = Column(String)
40
+
41
+
42
+ class TrainingData(TrainingBase):
43
+ """Store individual training examples."""
44
+
45
+ __tablename__ = "training_data"
46
+
47
+ id = Column(Integer, primary_key=True, autoincrement=True)
48
+ session_id = Column(String, ForeignKey("training_sessions.id"))
49
+ commit_hash = Column(String, index=True)
50
+ repository = Column(String)
51
+ message = Column(Text)
52
+ author = Column(String)
53
+ timestamp = Column(DateTime)
54
+ files_changed = Column(Integer)
55
+ insertions = Column(Integer)
56
+ deletions = Column(Integer)
57
+ ticket_id = Column(String)
58
+ ticket_type = Column(String)
59
+ ticket_platform = Column(String)
60
+ label = Column(String)
61
+ confidence = Column(Float)
62
+ created_at = Column(DateTime)
63
+
64
+
65
+ class CommitClassificationTrainer:
66
+ """Train commit classification models using PM platform data."""
67
+
68
+ # Mapping from PM ticket types to classification categories
69
+ TICKET_TYPE_MAPPING = {
70
+ # Bug types
71
+ "bug": "bug_fix",
72
+ "defect": "bug_fix",
73
+ "issue": "bug_fix",
74
+ "incident": "bug_fix",
75
+ "problem": "bug_fix",
76
+ # Feature types
77
+ "feature": "feature",
78
+ "story": "feature",
79
+ "user story": "feature",
80
+ "new feature": "feature",
81
+ "enhancement": "feature",
82
+ "epic": "feature",
83
+ "historia": "feature", # EWTN custom type (Spanish for Story)
84
+ # Task/maintenance types
85
+ "task": "maintenance",
86
+ "chore": "maintenance",
87
+ "subtask": "maintenance",
88
+ "sub-task": "maintenance",
89
+ # Documentation types
90
+ "documentation": "documentation",
91
+ "docs": "documentation",
92
+ # Improvement/refactoring types
93
+ "improvement": "refactor",
94
+ "refactoring": "refactor",
95
+ "technical debt": "refactor",
96
+ "optimization": "refactor",
97
+ # Test types
98
+ "test": "test",
99
+ "testing": "test",
100
+ "qa": "test",
101
+ # Other types
102
+ "security": "security",
103
+ "hotfix": "hotfix",
104
+ "research": "other",
105
+ "spike": "other",
106
+ }
107
+
108
+ def __init__(
109
+ self,
110
+ config: Config,
111
+ cache: GitAnalysisCache,
112
+ orchestrator: Any,
113
+ training_config: Optional[dict[str, Any]] = None,
114
+ ):
115
+ """Initialize the training pipeline.
116
+
117
+ Args:
118
+ config: GitFlow Analytics configuration
119
+ cache: Cache instance
120
+ orchestrator: Integration orchestrator with PM platforms
121
+ training_config: Training-specific configuration
122
+ """
123
+ self.config = config
124
+ self.cache = cache
125
+ self.orchestrator = orchestrator
126
+ self.training_config = training_config or {}
127
+
128
+ # Initialize database for training data
129
+ self.db_path = cache.cache_dir / "training_data.db"
130
+ self.engine = create_engine(f"sqlite:///{self.db_path}")
131
+ TrainingBase.metadata.create_all(self.engine)
132
+ self.Session = sessionmaker(bind=self.engine)
133
+
134
+ # Initialize classifier
135
+ self.classifier = CommitClassifier(
136
+ config=(
137
+ config.analysis.commit_classification.__dict__
138
+ if hasattr(config.analysis, "commit_classification")
139
+ else {}
140
+ ),
141
+ cache_dir=cache.cache_dir,
142
+ )
143
+
144
+ logger.info(f"Initialized training pipeline with cache at {cache.cache_dir}")
145
+
146
+ def train(
147
+ self, repositories: list[Any], since: datetime, session_name: Optional[str] = None
148
+ ) -> dict[str, Any]:
149
+ """Train a classification model using PM platform data.
150
+
151
+ Args:
152
+ repositories: List of repository configurations
153
+ since: Start date for commit extraction
154
+ session_name: Optional name for this training session
155
+
156
+ Returns:
157
+ Training results dictionary
158
+ """
159
+ session_id = self._create_training_session(session_name)
160
+
161
+ try:
162
+ # Step 1: Extract commits with ticket references
163
+ logger.info("Extracting commits with ticket references...")
164
+ labeled_commits = self._extract_labeled_commits(repositories, since)
165
+
166
+ if len(labeled_commits) < self.training_config.get("min_training_examples", 50):
167
+ raise ValueError(
168
+ f"Insufficient training data: {len(labeled_commits)} examples found, "
169
+ f"minimum {self.training_config.get('min_training_examples', 50)} required"
170
+ )
171
+
172
+ # Step 2: Store training data
173
+ logger.info(f"Storing {len(labeled_commits)} training examples...")
174
+ self._store_training_data(session_id, labeled_commits)
175
+
176
+ # Step 3: Train the model
177
+ logger.info("Training classification model...")
178
+ training_data = [(commit["commit_data"], commit["label"]) for commit in labeled_commits]
179
+ results = self.classifier.train_model(
180
+ training_data, validation_split=self.training_config.get("validation_split", 0.2)
181
+ )
182
+
183
+ # Step 4: Update session with results
184
+ self._update_training_session(session_id, results, len(labeled_commits))
185
+
186
+ # Step 5: Save training data CSV if requested
187
+ if self.training_config.get("save_training_data", False):
188
+ self._export_training_data(session_id)
189
+
190
+ return {
191
+ "session_id": session_id,
192
+ "training_examples": len(labeled_commits),
193
+ "accuracy": results.get("accuracy", 0.0),
194
+ "results": results,
195
+ }
196
+
197
+ except Exception as e:
198
+ logger.error(f"Training failed: {e}")
199
+ self._mark_session_failed(session_id, str(e))
200
+ raise
201
+
202
+ def _extract_labeled_commits(
203
+ self, repositories: list[Any], since: datetime
204
+ ) -> list[dict[str, Any]]:
205
+ """Extract commits with PM platform labels.
206
+
207
+ Args:
208
+ repositories: List of repository configurations
209
+ since: Start date for commit extraction
210
+
211
+ Returns:
212
+ List of labeled commit dictionaries
213
+ """
214
+ labeled_commits = []
215
+ analyzer = GitAnalyzer(
216
+ self.cache,
217
+ batch_size=getattr(self.config.analysis, "batch_size", 1000),
218
+ allowed_ticket_platforms=getattr(
219
+ self.config.analysis, "allowed_ticket_platforms", None
220
+ ),
221
+ story_point_patterns=getattr(self.config.analysis, "story_point_patterns", None),
222
+ )
223
+
224
+ for repo_config in repositories:
225
+ if not repo_config.path.exists():
226
+ logger.warning(f"Repository path does not exist: {repo_config.path}")
227
+ continue
228
+
229
+ logger.info(f"Analyzing repository: {repo_config.path}")
230
+
231
+ # Extract commits
232
+ try:
233
+ commits = analyzer.analyze_repository(
234
+ repo_config.path, since=since, branch=repo_config.branch
235
+ )
236
+
237
+ # Filter commits with ticket references
238
+ for commit in commits:
239
+ ticket_refs = commit.get("ticket_references", [])
240
+ if not ticket_refs:
241
+ continue
242
+
243
+ # Get ticket data from PM platforms
244
+ ticket_data = self._fetch_ticket_data(ticket_refs)
245
+ if not ticket_data:
246
+ continue
247
+
248
+ # Determine label from ticket type
249
+ label = self._determine_label(ticket_data)
250
+ if label:
251
+ # Normalize commit data to ensure files_changed is a list
252
+ normalized_commit = self._normalize_commit_data(commit)
253
+ labeled_commits.append(
254
+ {
255
+ "commit_data": normalized_commit,
256
+ "ticket_data": ticket_data,
257
+ "label": label,
258
+ "repository": repo_config.name,
259
+ }
260
+ )
261
+
262
+ except Exception as e:
263
+ logger.error(f"Failed to analyze repository {repo_config.path}: {e}")
264
+ continue
265
+
266
+ return labeled_commits
267
+
268
+ def _normalize_commit_data(self, commit: dict[str, Any]) -> dict[str, Any]:
269
+ """Normalize commit data to ensure consistency.
270
+
271
+ Args:
272
+ commit: Original commit data
273
+
274
+ Returns:
275
+ Normalized commit data with files_changed as a list
276
+ """
277
+ normalized = commit.copy()
278
+
279
+ # Ensure files_changed is a list
280
+ files_changed = commit.get("files_changed", [])
281
+ if isinstance(files_changed, int):
282
+ # If it's an integer count, we can't reconstruct the file list
283
+ # Store the count separately and use empty list for files
284
+ normalized["files_changed_count"] = files_changed
285
+ normalized["files_changed"] = []
286
+ elif isinstance(files_changed, list):
287
+ # If it's already a list, keep it and also store the count
288
+ normalized["files_changed"] = files_changed
289
+ normalized["files_changed_count"] = len(files_changed)
290
+ else:
291
+ # Fallback for unexpected types
292
+ normalized["files_changed"] = []
293
+ normalized["files_changed_count"] = 0
294
+
295
+ return normalized
296
+
297
+ def _fetch_ticket_data(self, ticket_refs: list[dict[str, str]]) -> list[dict[str, Any]]:
298
+ """Fetch ticket data from PM platforms.
299
+
300
+ Args:
301
+ ticket_refs: List of ticket references
302
+
303
+ Returns:
304
+ List of ticket data dictionaries
305
+ """
306
+ if not self.orchestrator.pm_orchestrator:
307
+ return []
308
+
309
+ # Get list of configured platforms
310
+ configured_platforms = self.orchestrator.pm_orchestrator.get_active_platforms()
311
+ ticket_data = []
312
+
313
+ for ref in ticket_refs:
314
+ platform = ref.get("platform", "")
315
+ ticket_id = ref.get("id", "")
316
+
317
+ if not platform or not ticket_id:
318
+ continue
319
+
320
+ # Skip platforms that aren't configured
321
+ if platform not in configured_platforms:
322
+ logger.debug(f"Skipping ticket {ticket_id} from unconfigured platform {platform}")
323
+ continue
324
+
325
+ try:
326
+ # Fetch ticket from PM platform
327
+ tickets = self.orchestrator.pm_orchestrator.get_issues_by_keys(
328
+ platform, [ticket_id]
329
+ )
330
+
331
+ if tickets and ticket_id in tickets:
332
+ ticket = tickets[ticket_id]
333
+ ticket_data.append(
334
+ {
335
+ "id": ticket_id,
336
+ "platform": platform,
337
+ "type": ticket.issue_type.value if ticket.issue_type else "unknown",
338
+ "title": ticket.title,
339
+ "status": ticket.status.value if ticket.status else "unknown",
340
+ }
341
+ )
342
+
343
+ except Exception as e:
344
+ logger.warning(f"Failed to fetch ticket {ticket_id} from {platform}: {e}")
345
+ continue
346
+
347
+ return ticket_data
348
+
349
+ def _determine_label(self, ticket_data: list[dict[str, Any]]) -> Optional[str]:
350
+ """Determine classification label from ticket data.
351
+
352
+ Args:
353
+ ticket_data: List of ticket data dictionaries
354
+
355
+ Returns:
356
+ Classification label or None
357
+ """
358
+ if not ticket_data:
359
+ return None
360
+
361
+ # Count ticket types
362
+ type_counts = {}
363
+ for ticket in ticket_data:
364
+ ticket_type = ticket.get("type", "").lower()
365
+ mapped_type = self.TICKET_TYPE_MAPPING.get(ticket_type, None)
366
+
367
+ if mapped_type:
368
+ type_counts[mapped_type] = type_counts.get(mapped_type, 0) + 1
369
+
370
+ if not type_counts:
371
+ return None
372
+
373
+ # Return most common type
374
+ return max(type_counts.items(), key=lambda x: x[1])[0]
375
+
376
+ def _create_training_session(self, name: Optional[str] = None) -> str:
377
+ """Create a new training session.
378
+
379
+ Args:
380
+ name: Optional session name
381
+
382
+ Returns:
383
+ Session ID
384
+ """
385
+ import uuid
386
+
387
+ session_id = str(uuid.uuid4())
388
+ session = TrainingSession(
389
+ id=session_id,
390
+ name=name or f"training_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
391
+ created_at=datetime.now(timezone.utc),
392
+ model_type=self.training_config.get("model_type", "random_forest"),
393
+ validation_split=self.training_config.get("validation_split", 0.2),
394
+ )
395
+
396
+ with self.Session() as db_session:
397
+ db_session.add(session)
398
+ db_session.commit()
399
+
400
+ return session_id
401
+
402
+ def _store_training_data(self, session_id: str, labeled_commits: list[dict[str, Any]]) -> None:
403
+ """Store training data in database.
404
+
405
+ Args:
406
+ session_id: Training session ID
407
+ labeled_commits: List of labeled commit data
408
+ """
409
+ with self.Session() as db_session:
410
+ for item in labeled_commits:
411
+ commit = item["commit_data"]
412
+ ticket_data = item["ticket_data"]
413
+
414
+ # Use first ticket for primary data
415
+ primary_ticket = ticket_data[0] if ticket_data else {}
416
+
417
+ # Handle files_changed being either int or list
418
+ files_changed_value = commit.get("files_changed", 0)
419
+ if isinstance(files_changed_value, int):
420
+ files_changed_count = files_changed_value
421
+ elif isinstance(files_changed_value, list):
422
+ files_changed_count = len(files_changed_value)
423
+ else:
424
+ files_changed_count = 0
425
+
426
+ training_example = TrainingData(
427
+ session_id=session_id,
428
+ commit_hash=commit.get("hash", ""),
429
+ repository=item["repository"],
430
+ message=commit.get("message", ""),
431
+ author=commit.get("author_name", ""),
432
+ timestamp=commit.get("timestamp"),
433
+ files_changed=files_changed_count,
434
+ insertions=commit.get("insertions", 0),
435
+ deletions=commit.get("deletions", 0),
436
+ ticket_id=primary_ticket.get("id", ""),
437
+ ticket_type=primary_ticket.get("type", ""),
438
+ ticket_platform=primary_ticket.get("platform", ""),
439
+ label=item["label"],
440
+ confidence=1.0, # High confidence for PM-based labels
441
+ created_at=datetime.now(timezone.utc),
442
+ )
443
+
444
+ db_session.add(training_example)
445
+
446
+ db_session.commit()
447
+
448
+ def _update_training_session(
449
+ self, session_id: str, results: dict[str, Any], num_examples: int
450
+ ) -> None:
451
+ """Update training session with results.
452
+
453
+ Args:
454
+ session_id: Training session ID
455
+ results: Training results
456
+ num_examples: Number of training examples
457
+ """
458
+ with self.Session() as db_session:
459
+ session = db_session.query(TrainingSession).filter_by(id=session_id).first()
460
+ if session:
461
+ session.training_examples = num_examples
462
+ session.accuracy = results.get("accuracy", 0.0)
463
+ session.precision = results.get("precision", 0.0)
464
+ session.recall = results.get("recall", 0.0)
465
+ session.f1_score = results.get("f1_score", 0.0)
466
+ session.model_path = str(self.classifier.model_path)
467
+ db_session.commit()
468
+
469
+ def _mark_session_failed(self, session_id: str, error: str) -> None:
470
+ """Mark a training session as failed.
471
+
472
+ Args:
473
+ session_id: Training session ID
474
+ error: Error message
475
+ """
476
+ with self.Session() as db_session:
477
+ session = db_session.query(TrainingSession).filter_by(id=session_id).first()
478
+ if session:
479
+ session.accuracy = -1.0 # Indicates failure
480
+ db_session.commit()
481
+
482
+ def _export_training_data(self, session_id: str) -> Path:
483
+ """Export training data to CSV.
484
+
485
+ Args:
486
+ session_id: Training session ID
487
+
488
+ Returns:
489
+ Path to exported CSV file
490
+ """
491
+ output_path = self.cache.cache_dir / f"training_data_{session_id[:8]}.csv"
492
+
493
+ with self.Session() as db_session:
494
+ data = db_session.query(TrainingData).filter_by(session_id=session_id).all()
495
+
496
+ rows = []
497
+ for item in data:
498
+ rows.append(
499
+ {
500
+ "commit_hash": item.commit_hash,
501
+ "repository": item.repository,
502
+ "message": item.message,
503
+ "author": item.author,
504
+ "timestamp": item.timestamp,
505
+ "files_changed": item.files_changed,
506
+ "insertions": item.insertions,
507
+ "deletions": item.deletions,
508
+ "ticket_id": item.ticket_id,
509
+ "ticket_type": item.ticket_type,
510
+ "ticket_platform": item.ticket_platform,
511
+ "label": item.label,
512
+ "confidence": item.confidence,
513
+ }
514
+ )
515
+
516
+ df = pd.DataFrame(rows)
517
+ df.to_csv(output_path, index=False)
518
+
519
+ logger.info(f"Exported training data to {output_path}")
520
+
521
+ return output_path
522
+
523
+ def get_training_history(self) -> list[dict[str, Any]]:
524
+ """Get history of training sessions.
525
+
526
+ Returns:
527
+ List of training session summaries
528
+ """
529
+ with self.Session() as db_session:
530
+ sessions = (
531
+ db_session.query(TrainingSession).order_by(TrainingSession.created_at.desc()).all()
532
+ )
533
+
534
+ history = []
535
+ for session in sessions:
536
+ history.append(
537
+ {
538
+ "id": session.id,
539
+ "name": session.name,
540
+ "created_at": session.created_at,
541
+ "model_type": session.model_type,
542
+ "training_examples": session.training_examples,
543
+ "accuracy": session.accuracy,
544
+ "precision": session.precision,
545
+ "recall": session.recall,
546
+ "f1_score": session.f1_score,
547
+ }
548
+ )
549
+
550
+ return history
@@ -2,4 +2,4 @@
2
2
 
3
3
  from .app import GitFlowAnalyticsApp
4
4
 
5
- __all__ = ["GitFlowAnalyticsApp"]
5
+ __all__ = ["GitFlowAnalyticsApp"]