gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4108 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +904 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +441 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1193 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,455 @@
1
+ """Machine learning model for commit classification.
2
+
3
+ This module implements a Random Forest-based commit classification model with
4
+ comprehensive training, validation, and prediction capabilities. The model is
5
+ designed for production use with robust error handling, model persistence,
6
+ and performance monitoring.
7
+ """
8
+
9
+ import logging
10
+ import pickle
11
+ from datetime import datetime, timedelta
12
+ from pathlib import Path
13
+ from typing import Any, Optional
14
+
15
+ import joblib
16
+ import numpy as np
17
+
18
+ try:
19
+ from sklearn.ensemble import RandomForestClassifier
20
+ from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
21
+ from sklearn.model_selection import cross_val_score, train_test_split
22
+ from sklearn.preprocessing import LabelEncoder
23
+
24
+ SKLEARN_AVAILABLE = True
25
+ except ImportError:
26
+ SKLEARN_AVAILABLE = False
27
+ RandomForestClassifier = None
28
+ LabelEncoder = None
29
+
30
+ from .feature_extractor import FeatureExtractor
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ class CommitClassificationModel:
36
+ """Random Forest-based commit classification model.
37
+
38
+ This model provides comprehensive commit classification using a Random Forest
39
+ classifier trained on 68-dimensional feature vectors. It includes:
40
+
41
+ - Robust training pipeline with cross-validation
42
+ - Model persistence and versioning
43
+ - Batch prediction capabilities
44
+ - Performance monitoring and metrics
45
+ - Graceful fallback when scikit-learn is unavailable
46
+
47
+ The model is designed to classify commits into categories such as:
48
+ - feature: New functionality
49
+ - bugfix: Bug fixes and corrections
50
+ - refactor: Code restructuring
51
+ - docs: Documentation changes
52
+ - test: Testing-related changes
53
+ - config: Configuration changes
54
+ - chore: Maintenance tasks
55
+ - security: Security-related changes
56
+ - hotfix: Emergency fixes
57
+ """
58
+
59
+ def __init__(self, model_path: Optional[Path] = None, config: Optional[dict[str, Any]] = None):
60
+ """Initialize the commit classification model.
61
+
62
+ Args:
63
+ model_path: Path to save/load model files
64
+ config: Configuration dictionary with model parameters
65
+ """
66
+ if not SKLEARN_AVAILABLE:
67
+ logger.warning("scikit-learn not available. Model functionality will be limited.")
68
+ self.model = None
69
+ self.label_encoder = None
70
+ self.feature_extractor = None
71
+ return
72
+
73
+ self.model_path = model_path or Path(".gitflow-cache/classification")
74
+ self.model_path.mkdir(parents=True, exist_ok=True)
75
+
76
+ # Configuration with defaults
77
+ self.config = config or {}
78
+ self.n_estimators = self.config.get("n_estimators", 100)
79
+ self.max_depth = self.config.get("max_depth", 20)
80
+ self.min_samples_split = self.config.get("min_samples_split", 5)
81
+ self.min_samples_leaf = self.config.get("min_samples_leaf", 2)
82
+ self.random_state = self.config.get("random_state", 42)
83
+ self.n_jobs = self.config.get("n_jobs", -1) # Use all available cores
84
+
85
+ # Initialize components
86
+ self.model = RandomForestClassifier(
87
+ n_estimators=self.n_estimators,
88
+ max_depth=self.max_depth,
89
+ min_samples_split=self.min_samples_split,
90
+ min_samples_leaf=self.min_samples_leaf,
91
+ random_state=self.random_state,
92
+ n_jobs=self.n_jobs,
93
+ class_weight="balanced", # Handle class imbalance
94
+ )
95
+ self.label_encoder = LabelEncoder()
96
+ self.feature_extractor = FeatureExtractor()
97
+
98
+ # Model metadata
99
+ self.is_trained = False
100
+ self.training_timestamp = None
101
+ self.feature_importance = None
102
+ self.class_names = None
103
+ self.training_metrics = {}
104
+
105
+ # Load existing model if available
106
+ self._load_model()
107
+
108
+ def train(
109
+ self, commits: list[dict[str, Any]], labels: list[str], validation_split: float = 0.2
110
+ ) -> dict[str, Any]:
111
+ """Train the classification model on labeled commit data.
112
+
113
+ Args:
114
+ commits: List of commit data dictionaries
115
+ labels: List of corresponding classification labels
116
+ validation_split: Fraction of data to use for validation
117
+
118
+ Returns:
119
+ Dictionary containing training metrics and results
120
+ """
121
+ if not SKLEARN_AVAILABLE:
122
+ raise RuntimeError("scikit-learn is required for model training")
123
+
124
+ if len(commits) != len(labels):
125
+ raise ValueError("Number of commits must match number of labels")
126
+
127
+ if len(commits) < 10:
128
+ raise ValueError("Need at least 10 samples for training")
129
+
130
+ logger.info(f"Training classification model on {len(commits)} commits")
131
+
132
+ # Extract features from commits
133
+ logger.info("Extracting features from commits...")
134
+ features = self.feature_extractor.extract_batch_features(commits)
135
+
136
+ # Encode labels
137
+ encoded_labels = self.label_encoder.fit_transform(labels)
138
+ self.class_names = self.label_encoder.classes_.tolist()
139
+
140
+ # Split data for validation
141
+ if validation_split > 0:
142
+ X_train, X_val, y_train, y_val = train_test_split(
143
+ features,
144
+ encoded_labels,
145
+ test_size=validation_split,
146
+ random_state=self.random_state,
147
+ stratify=encoded_labels,
148
+ )
149
+ else:
150
+ X_train, y_train = features, encoded_labels
151
+ X_val, y_val = None, None
152
+
153
+ # Train the model
154
+ logger.info("Training Random Forest classifier...")
155
+ self.model.fit(X_train, y_train)
156
+ self.is_trained = True
157
+ self.training_timestamp = datetime.now()
158
+
159
+ # Calculate feature importance
160
+ self.feature_importance = self.model.feature_importances_
161
+
162
+ # Evaluate the model
163
+ training_metrics = self._evaluate_model(X_train, y_train, X_val, y_val)
164
+ self.training_metrics = training_metrics
165
+
166
+ # Save the trained model
167
+ self._save_model()
168
+
169
+ logger.info(f"Model training completed. Accuracy: {training_metrics['accuracy']:.3f}")
170
+ return training_metrics
171
+
172
+ def predict(self, commits: list[dict[str, Any]]) -> list[dict[str, Any]]:
173
+ """Predict classifications for a batch of commits.
174
+
175
+ Args:
176
+ commits: List of commit data dictionaries
177
+
178
+ Returns:
179
+ List of prediction dictionaries containing:
180
+ - predicted_class: Predicted classification
181
+ - confidence: Prediction confidence (0-1)
182
+ - class_probabilities: Probabilities for all classes
183
+ """
184
+ if not SKLEARN_AVAILABLE or not self.is_trained:
185
+ logger.warning("Model not available or not trained. Using fallback classification.")
186
+ return self._fallback_predictions(commits)
187
+
188
+ if not commits:
189
+ return []
190
+
191
+ # Extract features
192
+ features = self.feature_extractor.extract_batch_features(commits)
193
+
194
+ # Make predictions
195
+ predictions = self.model.predict(features)
196
+ probabilities = self.model.predict_proba(features)
197
+
198
+ # Format results
199
+ results = []
200
+ for i, commit in enumerate(commits):
201
+ predicted_label = self.label_encoder.inverse_transform([predictions[i]])[0]
202
+ max_prob = np.max(probabilities[i])
203
+
204
+ # Create probability dictionary for all classes
205
+ class_probs = dict(zip(self.class_names, probabilities[i]))
206
+
207
+ results.append(
208
+ {
209
+ "commit_hash": commit.get("hash", ""),
210
+ "predicted_class": predicted_label,
211
+ "confidence": float(max_prob),
212
+ "class_probabilities": class_probs,
213
+ }
214
+ )
215
+
216
+ return results
217
+
218
+ def predict_single(self, commit: dict[str, Any]) -> dict[str, Any]:
219
+ """Predict classification for a single commit.
220
+
221
+ Args:
222
+ commit: Commit data dictionary
223
+
224
+ Returns:
225
+ Prediction dictionary with class and confidence
226
+ """
227
+ results = self.predict([commit])
228
+ return results[0] if results else {"predicted_class": "unknown", "confidence": 0.0}
229
+
230
+ def _evaluate_model(
231
+ self,
232
+ X_train: np.ndarray,
233
+ y_train: np.ndarray,
234
+ X_val: Optional[np.ndarray] = None,
235
+ y_val: Optional[np.ndarray] = None,
236
+ ) -> dict[str, Any]:
237
+ """Evaluate model performance with comprehensive metrics.
238
+
239
+ Args:
240
+ X_train: Training features
241
+ y_train: Training labels
242
+ X_val: Validation features (optional)
243
+ y_val: Validation labels (optional)
244
+
245
+ Returns:
246
+ Dictionary with evaluation metrics
247
+ """
248
+ metrics = {}
249
+
250
+ # Cross-validation on training data
251
+ cv_scores = cross_val_score(self.model, X_train, y_train, cv=5, scoring="accuracy")
252
+ metrics["cv_accuracy_mean"] = float(np.mean(cv_scores))
253
+ metrics["cv_accuracy_std"] = float(np.std(cv_scores))
254
+
255
+ # Training accuracy
256
+ train_pred = self.model.predict(X_train)
257
+ metrics["train_accuracy"] = float(accuracy_score(y_train, train_pred))
258
+
259
+ # Validation metrics if validation data provided
260
+ if X_val is not None and y_val is not None:
261
+ val_pred = self.model.predict(X_val)
262
+ metrics["val_accuracy"] = float(accuracy_score(y_val, val_pred))
263
+
264
+ # Detailed classification report
265
+ class_names = [
266
+ self.label_encoder.inverse_transform([i])[0]
267
+ for i in range(len(self.label_encoder.classes_))
268
+ ]
269
+
270
+ val_report = classification_report(
271
+ y_val, val_pred, target_names=class_names, output_dict=True
272
+ )
273
+ metrics["classification_report"] = val_report
274
+
275
+ # Confusion matrix
276
+ conf_matrix = confusion_matrix(y_val, val_pred)
277
+ metrics["confusion_matrix"] = conf_matrix.tolist()
278
+
279
+ # Overall accuracy for reporting
280
+ metrics["accuracy"] = metrics.get("val_accuracy", metrics["train_accuracy"])
281
+
282
+ return metrics
283
+
284
+ def get_feature_importance(self, top_n: int = 20) -> list[tuple[str, float]]:
285
+ """Get top feature importances from the trained model.
286
+
287
+ Args:
288
+ top_n: Number of top features to return
289
+
290
+ Returns:
291
+ List of (feature_name, importance) tuples, sorted by importance
292
+ """
293
+ if not self.is_trained or self.feature_importance is None:
294
+ return []
295
+
296
+ feature_names = self.feature_extractor.get_feature_names()
297
+ importance_pairs = list(zip(feature_names, self.feature_importance))
298
+
299
+ # Sort by importance descending
300
+ importance_pairs.sort(key=lambda x: x[1], reverse=True)
301
+
302
+ return importance_pairs[:top_n]
303
+
304
+ def _save_model(self) -> None:
305
+ """Save the trained model to disk."""
306
+ if not self.is_trained:
307
+ return
308
+
309
+ model_file = self.model_path / "commit_classifier.joblib"
310
+ metadata_file = self.model_path / "model_metadata.pkl"
311
+
312
+ try:
313
+ # Save the scikit-learn model
314
+ joblib.dump(self.model, model_file)
315
+
316
+ # Save metadata
317
+ metadata = {
318
+ "label_encoder": self.label_encoder,
319
+ "is_trained": self.is_trained,
320
+ "training_timestamp": self.training_timestamp,
321
+ "feature_importance": self.feature_importance,
322
+ "class_names": self.class_names,
323
+ "training_metrics": self.training_metrics,
324
+ "config": self.config,
325
+ }
326
+
327
+ with open(metadata_file, "wb") as f:
328
+ pickle.dump(metadata, f)
329
+
330
+ logger.info(f"Model saved to {model_file}")
331
+
332
+ except Exception as e:
333
+ logger.error(f"Failed to save model: {e}")
334
+
335
+ def _load_model(self) -> bool:
336
+ """Load a previously trained model from disk.
337
+
338
+ Returns:
339
+ True if model loaded successfully, False otherwise
340
+ """
341
+ if not SKLEARN_AVAILABLE:
342
+ return False
343
+
344
+ model_file = self.model_path / "commit_classifier.joblib"
345
+ metadata_file = self.model_path / "model_metadata.pkl"
346
+
347
+ if not (model_file.exists() and metadata_file.exists()):
348
+ return False
349
+
350
+ try:
351
+ # Load the scikit-learn model
352
+ self.model = joblib.load(model_file)
353
+
354
+ # Load metadata
355
+ with open(metadata_file, "rb") as f:
356
+ metadata = pickle.load(f)
357
+
358
+ self.label_encoder = metadata["label_encoder"]
359
+ self.is_trained = metadata["is_trained"]
360
+ self.training_timestamp = metadata["training_timestamp"]
361
+ self.feature_importance = metadata["feature_importance"]
362
+ self.class_names = metadata["class_names"]
363
+ self.training_metrics = metadata["training_metrics"]
364
+
365
+ # Check if model is too old (older than 30 days)
366
+ if self.training_timestamp:
367
+ age = datetime.now() - self.training_timestamp
368
+ if age > timedelta(days=30):
369
+ logger.warning(f"Loaded model is {age.days} days old. Consider retraining.")
370
+
371
+ logger.info(f"Model loaded from {model_file}")
372
+ return True
373
+
374
+ except Exception as e:
375
+ logger.error(f"Failed to load model: {e}")
376
+ return False
377
+
378
+ def _fallback_predictions(self, commits: list[dict[str, Any]]) -> list[dict[str, Any]]:
379
+ """Provide fallback predictions when ML model is not available.
380
+
381
+ Args:
382
+ commits: List of commit data dictionaries
383
+
384
+ Returns:
385
+ List of basic prediction dictionaries
386
+ """
387
+ results = []
388
+
389
+ for commit in commits:
390
+ message = commit.get("message", "").lower()
391
+
392
+ # Simple rule-based fallback classification
393
+ predicted_class = "chore" # Default
394
+ confidence = 0.3 # Low confidence for rule-based
395
+
396
+ if any(word in message for word in ["fix", "bug", "error", "issue"]):
397
+ predicted_class = "bugfix"
398
+ confidence = 0.6
399
+ elif any(word in message for word in ["feat", "add", "implement", "new"]):
400
+ predicted_class = "feature"
401
+ confidence = 0.6
402
+ elif any(word in message for word in ["doc", "readme", "comment"]):
403
+ predicted_class = "docs"
404
+ confidence = 0.7
405
+ elif any(word in message for word in ["test", "spec", "coverage"]):
406
+ predicted_class = "test"
407
+ confidence = 0.7
408
+ elif any(word in message for word in ["refactor", "cleanup", "optimize"]):
409
+ predicted_class = "refactor"
410
+ confidence = 0.6
411
+ elif any(word in message for word in ["config", "setting", "env"]):
412
+ predicted_class = "config"
413
+ confidence = 0.6
414
+
415
+ results.append(
416
+ {
417
+ "commit_hash": commit.get("hash", ""),
418
+ "predicted_class": predicted_class,
419
+ "confidence": confidence,
420
+ "class_probabilities": {predicted_class: confidence},
421
+ }
422
+ )
423
+
424
+ return results
425
+
426
+ def get_model_info(self) -> dict[str, Any]:
427
+ """Get information about the current model state.
428
+
429
+ Returns:
430
+ Dictionary with model information
431
+ """
432
+ return {
433
+ "is_trained": self.is_trained,
434
+ "sklearn_available": SKLEARN_AVAILABLE,
435
+ "training_timestamp": self.training_timestamp,
436
+ "class_names": self.class_names,
437
+ "n_classes": len(self.class_names) if self.class_names else 0,
438
+ "training_metrics": self.training_metrics,
439
+ "model_path": str(self.model_path),
440
+ }
441
+
442
+ def retrain_needed(self, days_old: int = 30) -> bool:
443
+ """Check if model retraining is recommended.
444
+
445
+ Args:
446
+ days_old: Age threshold in days
447
+
448
+ Returns:
449
+ True if retraining is recommended
450
+ """
451
+ if not self.is_trained or not self.training_timestamp:
452
+ return True
453
+
454
+ age = datetime.now() - self.training_timestamp
455
+ return age.days > days_old