gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. gitflow_analytics/__init__.py +11 -11
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/classification/__init__.py +31 -0
  4. gitflow_analytics/classification/batch_classifier.py +752 -0
  5. gitflow_analytics/classification/classifier.py +464 -0
  6. gitflow_analytics/classification/feature_extractor.py +725 -0
  7. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  8. gitflow_analytics/classification/model.py +455 -0
  9. gitflow_analytics/cli.py +4490 -378
  10. gitflow_analytics/cli_rich.py +503 -0
  11. gitflow_analytics/config/__init__.py +43 -0
  12. gitflow_analytics/config/errors.py +261 -0
  13. gitflow_analytics/config/loader.py +904 -0
  14. gitflow_analytics/config/profiles.py +264 -0
  15. gitflow_analytics/config/repository.py +124 -0
  16. gitflow_analytics/config/schema.py +441 -0
  17. gitflow_analytics/config/validator.py +154 -0
  18. gitflow_analytics/config.py +44 -398
  19. gitflow_analytics/core/analyzer.py +1320 -172
  20. gitflow_analytics/core/branch_mapper.py +132 -132
  21. gitflow_analytics/core/cache.py +1554 -175
  22. gitflow_analytics/core/data_fetcher.py +1193 -0
  23. gitflow_analytics/core/identity.py +571 -185
  24. gitflow_analytics/core/metrics_storage.py +526 -0
  25. gitflow_analytics/core/progress.py +372 -0
  26. gitflow_analytics/core/schema_version.py +269 -0
  27. gitflow_analytics/extractors/base.py +13 -11
  28. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  29. gitflow_analytics/extractors/story_points.py +77 -59
  30. gitflow_analytics/extractors/tickets.py +841 -89
  31. gitflow_analytics/identity_llm/__init__.py +6 -0
  32. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  33. gitflow_analytics/identity_llm/analyzer.py +464 -0
  34. gitflow_analytics/identity_llm/models.py +76 -0
  35. gitflow_analytics/integrations/github_integration.py +258 -87
  36. gitflow_analytics/integrations/jira_integration.py +572 -123
  37. gitflow_analytics/integrations/orchestrator.py +206 -82
  38. gitflow_analytics/metrics/activity_scoring.py +322 -0
  39. gitflow_analytics/metrics/branch_health.py +470 -0
  40. gitflow_analytics/metrics/dora.py +542 -179
  41. gitflow_analytics/models/database.py +986 -59
  42. gitflow_analytics/pm_framework/__init__.py +115 -0
  43. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  44. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  45. gitflow_analytics/pm_framework/base.py +406 -0
  46. gitflow_analytics/pm_framework/models.py +211 -0
  47. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  48. gitflow_analytics/pm_framework/registry.py +333 -0
  49. gitflow_analytics/qualitative/__init__.py +29 -0
  50. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  51. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  52. gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
  53. gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
  54. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
  55. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  56. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  57. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  58. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  59. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  60. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  61. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  62. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  63. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  64. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
  65. gitflow_analytics/qualitative/core/__init__.py +13 -0
  66. gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
  67. gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
  68. gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
  69. gitflow_analytics/qualitative/core/processor.py +673 -0
  70. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  71. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  72. gitflow_analytics/qualitative/models/__init__.py +25 -0
  73. gitflow_analytics/qualitative/models/schemas.py +306 -0
  74. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  75. gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
  76. gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
  77. gitflow_analytics/qualitative/utils/metrics.py +361 -0
  78. gitflow_analytics/qualitative/utils/text_processing.py +285 -0
  79. gitflow_analytics/reports/__init__.py +100 -0
  80. gitflow_analytics/reports/analytics_writer.py +550 -18
  81. gitflow_analytics/reports/base.py +648 -0
  82. gitflow_analytics/reports/branch_health_writer.py +322 -0
  83. gitflow_analytics/reports/classification_writer.py +924 -0
  84. gitflow_analytics/reports/cli_integration.py +427 -0
  85. gitflow_analytics/reports/csv_writer.py +1700 -216
  86. gitflow_analytics/reports/data_models.py +504 -0
  87. gitflow_analytics/reports/database_report_generator.py +427 -0
  88. gitflow_analytics/reports/example_usage.py +344 -0
  89. gitflow_analytics/reports/factory.py +499 -0
  90. gitflow_analytics/reports/formatters.py +698 -0
  91. gitflow_analytics/reports/html_generator.py +1116 -0
  92. gitflow_analytics/reports/interfaces.py +489 -0
  93. gitflow_analytics/reports/json_exporter.py +2770 -0
  94. gitflow_analytics/reports/narrative_writer.py +2289 -158
  95. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  96. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  97. gitflow_analytics/training/__init__.py +5 -0
  98. gitflow_analytics/training/model_loader.py +377 -0
  99. gitflow_analytics/training/pipeline.py +550 -0
  100. gitflow_analytics/tui/__init__.py +5 -0
  101. gitflow_analytics/tui/app.py +724 -0
  102. gitflow_analytics/tui/screens/__init__.py +8 -0
  103. gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
  104. gitflow_analytics/tui/screens/configuration_screen.py +523 -0
  105. gitflow_analytics/tui/screens/loading_screen.py +348 -0
  106. gitflow_analytics/tui/screens/main_screen.py +321 -0
  107. gitflow_analytics/tui/screens/results_screen.py +722 -0
  108. gitflow_analytics/tui/widgets/__init__.py +7 -0
  109. gitflow_analytics/tui/widgets/data_table.py +255 -0
  110. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  111. gitflow_analytics/tui/widgets/progress_widget.py +187 -0
  112. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  113. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  114. gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
  115. gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
  116. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  117. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  118. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  119. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,479 @@
1
+ """Pattern caching system for qualitative analysis optimization."""
2
+
3
+ import hashlib
4
+ import json
5
+ import logging
6
+ from collections import defaultdict
7
+ from datetime import datetime, timedelta
8
+ from pathlib import Path
9
+ from typing import Any, Optional
10
+
11
+ from sqlalchemy import and_, desc
12
+
13
+ from ...models.database import Database
14
+ from ...models.database import PatternCache as PatternCacheModel
15
+ from ..models.schemas import CacheConfig, QualitativeCommitData
16
+ from ..utils.text_processing import TextProcessor
17
+
18
+
19
+ class PatternCache:
20
+ """Intelligent caching system for qualitative analysis patterns.
21
+
22
+ This system learns from successful classifications to speed up future
23
+ processing and improve accuracy through pattern recognition.
24
+
25
+ Features:
26
+ - Semantic fingerprinting for pattern matching
27
+ - Hit count tracking for popular patterns
28
+ - Automatic cache cleanup and optimization
29
+ - Pattern learning from successful classifications
30
+ """
31
+
32
+ def __init__(self, config: CacheConfig, database: Database):
33
+ """Initialize pattern cache.
34
+
35
+ Args:
36
+ config: Cache configuration
37
+ database: Database instance for persistence
38
+ """
39
+ self.config = config
40
+ self.database = database
41
+ self.logger = logging.getLogger(__name__)
42
+
43
+ # Initialize text processor for fingerprinting
44
+ self.text_processor = TextProcessor()
45
+
46
+ # In-memory cache for frequently accessed patterns
47
+ self._memory_cache: dict[str, dict[str, Any]] = {}
48
+ self._memory_cache_hits = defaultdict(int)
49
+
50
+ # Statistics
51
+ self.cache_hits = 0
52
+ self.cache_misses = 0
53
+ self.pattern_learning_count = 0
54
+
55
+ # Initialize cache directory if using file-based caching
56
+ cache_dir = Path(config.cache_dir)
57
+ cache_dir.mkdir(exist_ok=True, parents=True)
58
+
59
+ self.logger.info(f"Pattern cache initialized with TTL: {config.pattern_cache_ttl_hours}h")
60
+
61
+ def lookup_pattern(self, message: str, files: list[str]) -> Optional[dict[str, Any]]:
62
+ """Look up cached classification for a commit pattern.
63
+
64
+ Args:
65
+ message: Commit message
66
+ files: List of changed files
67
+
68
+ Returns:
69
+ Cached classification result or None if not found
70
+ """
71
+ # Generate semantic fingerprint
72
+ fingerprint = self.text_processor.create_semantic_fingerprint(message, files)
73
+
74
+ # Check in-memory cache first
75
+ if fingerprint in self._memory_cache:
76
+ self._memory_cache_hits[fingerprint] += 1
77
+ self.cache_hits += 1
78
+ self.logger.debug(f"Memory cache hit for pattern: {fingerprint[:8]}")
79
+ return self._memory_cache[fingerprint]
80
+
81
+ # Check database cache
82
+ with self.database.get_session() as session:
83
+ cached_pattern = (
84
+ session.query(PatternCacheModel)
85
+ .filter(PatternCacheModel.semantic_fingerprint == fingerprint)
86
+ .first()
87
+ )
88
+
89
+ if cached_pattern:
90
+ # Check if pattern is still valid (not expired)
91
+ cutoff_time = datetime.utcnow() - timedelta(
92
+ hours=self.config.pattern_cache_ttl_hours
93
+ )
94
+
95
+ if cached_pattern.created_at > cutoff_time:
96
+ # Update hit count and last used
97
+ cached_pattern.hit_count += 1
98
+ cached_pattern.last_used = datetime.utcnow()
99
+ session.commit()
100
+
101
+ # Add to memory cache for faster future access
102
+ result = cached_pattern.classification_result
103
+ self._add_to_memory_cache(fingerprint, result)
104
+
105
+ self.cache_hits += 1
106
+ self.logger.debug(f"Database cache hit for pattern: {fingerprint[:8]}")
107
+ return result
108
+ else:
109
+ # Pattern is expired, remove it
110
+ session.delete(cached_pattern)
111
+ session.commit()
112
+ self.logger.debug(f"Expired pattern removed: {fingerprint[:8]}")
113
+
114
+ self.cache_misses += 1
115
+ return None
116
+
117
+ def store_pattern(
118
+ self,
119
+ message: str,
120
+ files: list[str],
121
+ classification_result: dict[str, Any],
122
+ confidence_score: float,
123
+ source_method: str,
124
+ processing_time_ms: float = 0.0,
125
+ ) -> None:
126
+ """Store a new pattern in the cache.
127
+
128
+ Args:
129
+ message: Commit message
130
+ files: List of changed files
131
+ classification_result: Classification results to cache
132
+ confidence_score: Confidence in the classification
133
+ source_method: Method that produced this result ('nlp' or 'llm')
134
+ processing_time_ms: Time taken to process
135
+ """
136
+ # Only cache high-confidence results
137
+ if confidence_score < 0.6:
138
+ return
139
+
140
+ fingerprint = self.text_processor.create_semantic_fingerprint(message, files)
141
+ message_hash = hashlib.md5(message.encode()).hexdigest()
142
+
143
+ # Add to memory cache
144
+ self._add_to_memory_cache(fingerprint, classification_result)
145
+
146
+ # Store in database
147
+ with self.database.get_session() as session:
148
+ # Check if pattern already exists
149
+ existing_pattern = (
150
+ session.query(PatternCacheModel)
151
+ .filter(PatternCacheModel.semantic_fingerprint == fingerprint)
152
+ .first()
153
+ )
154
+
155
+ if existing_pattern:
156
+ # Update existing pattern with new data
157
+ existing_pattern.hit_count += 1
158
+ existing_pattern.last_used = datetime.utcnow()
159
+
160
+ # Update confidence if new result is more confident
161
+ if confidence_score > existing_pattern.confidence_score:
162
+ existing_pattern.classification_result = classification_result
163
+ existing_pattern.confidence_score = confidence_score
164
+ existing_pattern.source_method = source_method
165
+
166
+ # Update average processing time
167
+ if processing_time_ms > 0:
168
+ if existing_pattern.avg_processing_time_ms:
169
+ existing_pattern.avg_processing_time_ms = (
170
+ existing_pattern.avg_processing_time_ms + processing_time_ms
171
+ ) / 2
172
+ else:
173
+ existing_pattern.avg_processing_time_ms = processing_time_ms
174
+ else:
175
+ # Create new pattern
176
+ new_pattern = PatternCacheModel(
177
+ message_hash=message_hash,
178
+ semantic_fingerprint=fingerprint,
179
+ classification_result=classification_result,
180
+ confidence_score=confidence_score,
181
+ source_method=source_method,
182
+ avg_processing_time_ms=processing_time_ms,
183
+ )
184
+ session.add(new_pattern)
185
+ self.pattern_learning_count += 1
186
+
187
+ session.commit()
188
+
189
+ self.logger.debug(
190
+ f"Stored pattern: {fingerprint[:8]} "
191
+ f"(confidence: {confidence_score:.2f}, method: {source_method})"
192
+ )
193
+
194
+ def learn_from_results(self, results: list[QualitativeCommitData]) -> None:
195
+ """Learn patterns from successful classification results.
196
+
197
+ Args:
198
+ results: List of classification results to learn from
199
+ """
200
+ learned_patterns = 0
201
+
202
+ for result in results:
203
+ if result.confidence_score >= 0.7: # Only learn from high-confidence results
204
+ classification_data = {
205
+ "change_type": result.change_type,
206
+ "change_type_confidence": result.change_type_confidence,
207
+ "business_domain": result.business_domain,
208
+ "domain_confidence": result.domain_confidence,
209
+ "risk_level": result.risk_level,
210
+ "confidence_score": result.confidence_score,
211
+ }
212
+
213
+ self.store_pattern(
214
+ message=result.message,
215
+ files=result.files_changed,
216
+ classification_result=classification_data,
217
+ confidence_score=result.confidence_score,
218
+ source_method=result.processing_method,
219
+ processing_time_ms=result.processing_time_ms,
220
+ )
221
+ learned_patterns += 1
222
+
223
+ if learned_patterns > 0:
224
+ self.logger.info(f"Learned {learned_patterns} new patterns from results")
225
+
226
+ def _add_to_memory_cache(self, fingerprint: str, result: dict[str, Any]) -> None:
227
+ """Add result to in-memory cache with size management.
228
+
229
+ Args:
230
+ fingerprint: Pattern fingerprint
231
+ result: Classification result
232
+ """
233
+ # Manage memory cache size
234
+ if len(self._memory_cache) >= self.config.semantic_cache_size:
235
+ # Remove least recently used items
236
+ sorted_items = sorted(self._memory_cache_hits.items(), key=lambda x: x[1])
237
+
238
+ # Remove bottom 20% of items
239
+ items_to_remove = len(sorted_items) // 5
240
+ for fingerprint_to_remove, _ in sorted_items[:items_to_remove]:
241
+ self._memory_cache.pop(fingerprint_to_remove, None)
242
+ self._memory_cache_hits.pop(fingerprint_to_remove, None)
243
+
244
+ self._memory_cache[fingerprint] = result
245
+ self._memory_cache_hits[fingerprint] = 1
246
+
247
+ def cleanup_cache(self) -> dict[str, int]:
248
+ """Clean up expired and low-quality cache entries.
249
+
250
+ Returns:
251
+ Dictionary with cleanup statistics
252
+ """
253
+ stats = {
254
+ "expired_removed": 0,
255
+ "low_confidence_removed": 0,
256
+ "low_usage_removed": 0,
257
+ "total_remaining": 0,
258
+ }
259
+
260
+ cutoff_time = datetime.utcnow() - timedelta(hours=self.config.pattern_cache_ttl_hours)
261
+
262
+ with self.database.get_session() as session:
263
+ # Remove expired patterns
264
+ expired_patterns = session.query(PatternCacheModel).filter(
265
+ PatternCacheModel.created_at < cutoff_time
266
+ )
267
+ stats["expired_removed"] = expired_patterns.count()
268
+ expired_patterns.delete()
269
+
270
+ # Remove very low confidence patterns (< 0.4)
271
+ low_confidence_patterns = session.query(PatternCacheModel).filter(
272
+ PatternCacheModel.confidence_score < 0.4
273
+ )
274
+ stats["low_confidence_removed"] = low_confidence_patterns.count()
275
+ low_confidence_patterns.delete()
276
+
277
+ # Remove patterns with very low usage (hit_count = 1 and older than 7 days)
278
+ week_ago = datetime.utcnow() - timedelta(days=7)
279
+ low_usage_patterns = session.query(PatternCacheModel).filter(
280
+ and_(PatternCacheModel.hit_count == 1, PatternCacheModel.created_at < week_ago)
281
+ )
282
+ stats["low_usage_removed"] = low_usage_patterns.count()
283
+ low_usage_patterns.delete()
284
+
285
+ # Count remaining patterns
286
+ stats["total_remaining"] = session.query(PatternCacheModel).count()
287
+
288
+ session.commit()
289
+
290
+ # Clear memory cache to force refresh
291
+ self._memory_cache.clear()
292
+ self._memory_cache_hits.clear()
293
+
294
+ self.logger.info(
295
+ f"Cache cleanup completed: {stats['expired_removed']} expired, "
296
+ f"{stats['low_confidence_removed']} low-confidence, "
297
+ f"{stats['low_usage_removed']} low-usage removed. "
298
+ f"{stats['total_remaining']} patterns remaining."
299
+ )
300
+
301
+ return stats
302
+
303
+ def get_cache_statistics(self) -> dict[str, Any]:
304
+ """Get comprehensive cache statistics.
305
+
306
+ Returns:
307
+ Dictionary with cache statistics
308
+ """
309
+ with self.database.get_session() as session:
310
+ # Basic counts
311
+ total_patterns = session.query(PatternCacheModel).count()
312
+
313
+ # Method breakdown
314
+ nlp_patterns = (
315
+ session.query(PatternCacheModel)
316
+ .filter(PatternCacheModel.source_method == "nlp")
317
+ .count()
318
+ )
319
+
320
+ llm_patterns = (
321
+ session.query(PatternCacheModel)
322
+ .filter(PatternCacheModel.source_method == "llm")
323
+ .count()
324
+ )
325
+
326
+ # Confidence distribution
327
+ high_confidence = (
328
+ session.query(PatternCacheModel)
329
+ .filter(PatternCacheModel.confidence_score > 0.8)
330
+ .count()
331
+ )
332
+
333
+ medium_confidence = (
334
+ session.query(PatternCacheModel)
335
+ .filter(
336
+ and_(
337
+ PatternCacheModel.confidence_score >= 0.6,
338
+ PatternCacheModel.confidence_score <= 0.8,
339
+ )
340
+ )
341
+ .count()
342
+ )
343
+
344
+ # Usage statistics
345
+ top_patterns = (
346
+ session.query(PatternCacheModel)
347
+ .order_by(desc(PatternCacheModel.hit_count))
348
+ .limit(10)
349
+ .all()
350
+ )
351
+
352
+ # Age statistics
353
+ week_ago = datetime.utcnow() - timedelta(days=7)
354
+ recent_patterns = (
355
+ session.query(PatternCacheModel)
356
+ .filter(PatternCacheModel.created_at > week_ago)
357
+ .count()
358
+ )
359
+
360
+ # Calculate hit rate
361
+ total_requests = self.cache_hits + self.cache_misses
362
+ hit_rate = (self.cache_hits / total_requests) if total_requests > 0 else 0.0
363
+
364
+ return {
365
+ "total_patterns": total_patterns,
366
+ "method_breakdown": {"nlp_patterns": nlp_patterns, "llm_patterns": llm_patterns},
367
+ "confidence_distribution": {
368
+ "high_confidence": high_confidence,
369
+ "medium_confidence": medium_confidence,
370
+ "low_confidence": total_patterns - high_confidence - medium_confidence,
371
+ },
372
+ "usage_stats": {
373
+ "cache_hits": self.cache_hits,
374
+ "cache_misses": self.cache_misses,
375
+ "hit_rate": hit_rate,
376
+ "memory_cache_size": len(self._memory_cache),
377
+ },
378
+ "top_patterns": [
379
+ {
380
+ "fingerprint": p.semantic_fingerprint[:8],
381
+ "hit_count": p.hit_count,
382
+ "confidence": p.confidence_score,
383
+ "method": p.source_method,
384
+ }
385
+ for p in top_patterns
386
+ ],
387
+ "recent_patterns": recent_patterns,
388
+ "patterns_learned": self.pattern_learning_count,
389
+ "config": {
390
+ "ttl_hours": self.config.pattern_cache_ttl_hours,
391
+ "max_memory_size": self.config.semantic_cache_size,
392
+ "learning_enabled": self.config.enable_pattern_learning,
393
+ },
394
+ }
395
+
396
+ def optimize_cache(self) -> dict[str, Any]:
397
+ """Optimize cache for better performance.
398
+
399
+ Returns:
400
+ Dictionary with optimization results
401
+ """
402
+ optimization_stats = {}
403
+
404
+ # Step 1: Cleanup expired and low-quality entries
405
+ cleanup_stats = self.cleanup_cache()
406
+ optimization_stats["cleanup"] = cleanup_stats
407
+
408
+ # Step 2: Promote high-usage patterns to memory cache
409
+ with self.database.get_session() as session:
410
+ # Get top patterns by hit count
411
+ popular_patterns = (
412
+ session.query(PatternCacheModel)
413
+ .filter(PatternCacheModel.hit_count >= 5)
414
+ .order_by(desc(PatternCacheModel.hit_count))
415
+ .limit(self.config.semantic_cache_size // 2) # Fill half of memory cache
416
+ .all()
417
+ )
418
+
419
+ promoted_count = 0
420
+ for pattern in popular_patterns:
421
+ if pattern.semantic_fingerprint not in self._memory_cache:
422
+ self._add_to_memory_cache(
423
+ pattern.semantic_fingerprint, pattern.classification_result
424
+ )
425
+ promoted_count += 1
426
+
427
+ optimization_stats["promoted_to_memory"] = promoted_count
428
+
429
+ # Step 3: Update learning threshold based on cache size
430
+ total_patterns = cleanup_stats["total_remaining"]
431
+ if total_patterns > 1000:
432
+ # Increase learning threshold for large caches
433
+ self.config.learning_threshold = min(20, self.config.learning_threshold + 2)
434
+ elif total_patterns < 100:
435
+ # Decrease learning threshold for small caches
436
+ self.config.learning_threshold = max(5, self.config.learning_threshold - 1)
437
+
438
+ optimization_stats["learning_threshold"] = self.config.learning_threshold
439
+
440
+ self.logger.info(f"Cache optimization completed: {optimization_stats}")
441
+ return optimization_stats
442
+
443
+ def export_patterns(self, output_path: Path, min_confidence: float = 0.8) -> int:
444
+ """Export high-quality patterns for analysis or backup.
445
+
446
+ Args:
447
+ output_path: Path to export file
448
+ min_confidence: Minimum confidence threshold for export
449
+
450
+ Returns:
451
+ Number of patterns exported
452
+ """
453
+ with self.database.get_session() as session:
454
+ patterns = (
455
+ session.query(PatternCacheModel)
456
+ .filter(PatternCacheModel.confidence_score >= min_confidence)
457
+ .order_by(desc(PatternCacheModel.hit_count))
458
+ .all()
459
+ )
460
+
461
+ export_data = []
462
+ for pattern in patterns:
463
+ export_data.append(
464
+ {
465
+ "semantic_fingerprint": pattern.semantic_fingerprint,
466
+ "classification_result": pattern.classification_result,
467
+ "confidence_score": pattern.confidence_score,
468
+ "hit_count": pattern.hit_count,
469
+ "source_method": pattern.source_method,
470
+ "created_at": pattern.created_at.isoformat(),
471
+ "last_used": pattern.last_used.isoformat(),
472
+ }
473
+ )
474
+
475
+ with open(output_path, "w") as f:
476
+ json.dump(export_data, f, indent=2)
477
+
478
+ self.logger.info(f"Exported {len(patterns)} patterns to {output_path}")
479
+ return len(patterns)