gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4108 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +904 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +441 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1193 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -3,36 +3,35 @@
3
3
  import hashlib
4
4
  import json
5
5
  import logging
6
- import time
6
+ from collections import defaultdict
7
7
  from datetime import datetime, timedelta
8
8
  from pathlib import Path
9
- from typing import Dict, List, Optional, Any, Tuple
10
- from collections import defaultdict
9
+ from typing import Any, Optional
11
10
 
12
- from sqlalchemy.orm import Session
13
11
  from sqlalchemy import and_, desc
14
12
 
15
- from ...models.database import Database, PatternCache as PatternCacheModel
13
+ from ...models.database import Database
14
+ from ...models.database import PatternCache as PatternCacheModel
16
15
  from ..models.schemas import CacheConfig, QualitativeCommitData
17
16
  from ..utils.text_processing import TextProcessor
18
17
 
19
18
 
20
19
  class PatternCache:
21
20
  """Intelligent caching system for qualitative analysis patterns.
22
-
21
+
23
22
  This system learns from successful classifications to speed up future
24
23
  processing and improve accuracy through pattern recognition.
25
-
24
+
26
25
  Features:
27
26
  - Semantic fingerprinting for pattern matching
28
27
  - Hit count tracking for popular patterns
29
28
  - Automatic cache cleanup and optimization
30
29
  - Pattern learning from successful classifications
31
30
  """
32
-
31
+
33
32
  def __init__(self, config: CacheConfig, database: Database):
34
33
  """Initialize pattern cache.
35
-
34
+
36
35
  Args:
37
36
  config: Cache configuration
38
37
  database: Database instance for persistence
@@ -40,65 +39,69 @@ class PatternCache:
40
39
  self.config = config
41
40
  self.database = database
42
41
  self.logger = logging.getLogger(__name__)
43
-
42
+
44
43
  # Initialize text processor for fingerprinting
45
44
  self.text_processor = TextProcessor()
46
-
45
+
47
46
  # In-memory cache for frequently accessed patterns
48
- self._memory_cache: Dict[str, Dict[str, Any]] = {}
47
+ self._memory_cache: dict[str, dict[str, Any]] = {}
49
48
  self._memory_cache_hits = defaultdict(int)
50
-
49
+
51
50
  # Statistics
52
51
  self.cache_hits = 0
53
52
  self.cache_misses = 0
54
53
  self.pattern_learning_count = 0
55
-
54
+
56
55
  # Initialize cache directory if using file-based caching
57
56
  cache_dir = Path(config.cache_dir)
58
57
  cache_dir.mkdir(exist_ok=True, parents=True)
59
-
58
+
60
59
  self.logger.info(f"Pattern cache initialized with TTL: {config.pattern_cache_ttl_hours}h")
61
-
62
- def lookup_pattern(self, message: str, files: List[str]) -> Optional[Dict[str, Any]]:
60
+
61
+ def lookup_pattern(self, message: str, files: list[str]) -> Optional[dict[str, Any]]:
63
62
  """Look up cached classification for a commit pattern.
64
-
63
+
65
64
  Args:
66
65
  message: Commit message
67
66
  files: List of changed files
68
-
67
+
69
68
  Returns:
70
69
  Cached classification result or None if not found
71
70
  """
72
71
  # Generate semantic fingerprint
73
72
  fingerprint = self.text_processor.create_semantic_fingerprint(message, files)
74
-
73
+
75
74
  # Check in-memory cache first
76
75
  if fingerprint in self._memory_cache:
77
76
  self._memory_cache_hits[fingerprint] += 1
78
77
  self.cache_hits += 1
79
78
  self.logger.debug(f"Memory cache hit for pattern: {fingerprint[:8]}")
80
79
  return self._memory_cache[fingerprint]
81
-
80
+
82
81
  # Check database cache
83
82
  with self.database.get_session() as session:
84
- cached_pattern = session.query(PatternCacheModel).filter(
85
- PatternCacheModel.semantic_fingerprint == fingerprint
86
- ).first()
87
-
83
+ cached_pattern = (
84
+ session.query(PatternCacheModel)
85
+ .filter(PatternCacheModel.semantic_fingerprint == fingerprint)
86
+ .first()
87
+ )
88
+
88
89
  if cached_pattern:
89
90
  # Check if pattern is still valid (not expired)
90
- cutoff_time = datetime.utcnow() - timedelta(hours=self.config.pattern_cache_ttl_hours)
91
-
91
+ cutoff_time = datetime.utcnow() - timedelta(
92
+ hours=self.config.pattern_cache_ttl_hours
93
+ )
94
+
92
95
  if cached_pattern.created_at > cutoff_time:
93
96
  # Update hit count and last used
94
97
  cached_pattern.hit_count += 1
95
98
  cached_pattern.last_used = datetime.utcnow()
96
99
  session.commit()
97
-
100
+
98
101
  # Add to memory cache for faster future access
99
102
  result = cached_pattern.classification_result
100
103
  self._add_to_memory_cache(fingerprint, result)
101
-
104
+
102
105
  self.cache_hits += 1
103
106
  self.logger.debug(f"Database cache hit for pattern: {fingerprint[:8]}")
104
107
  return result
@@ -107,16 +110,21 @@ class PatternCache:
107
110
  session.delete(cached_pattern)
108
111
  session.commit()
109
112
  self.logger.debug(f"Expired pattern removed: {fingerprint[:8]}")
110
-
113
+
111
114
  self.cache_misses += 1
112
115
  return None
113
-
114
- def store_pattern(self, message: str, files: List[str],
115
- classification_result: Dict[str, Any],
116
- confidence_score: float, source_method: str,
117
- processing_time_ms: float = 0.0) -> None:
116
+
117
+ def store_pattern(
118
+ self,
119
+ message: str,
120
+ files: list[str],
121
+ classification_result: dict[str, Any],
122
+ confidence_score: float,
123
+ source_method: str,
124
+ processing_time_ms: float = 0.0,
125
+ ) -> None:
118
126
  """Store a new pattern in the cache.
119
-
127
+
120
128
  Args:
121
129
  message: Commit message
122
130
  files: List of changed files
@@ -128,31 +136,33 @@ class PatternCache:
128
136
  # Only cache high-confidence results
129
137
  if confidence_score < 0.6:
130
138
  return
131
-
139
+
132
140
  fingerprint = self.text_processor.create_semantic_fingerprint(message, files)
133
141
  message_hash = hashlib.md5(message.encode()).hexdigest()
134
-
142
+
135
143
  # Add to memory cache
136
144
  self._add_to_memory_cache(fingerprint, classification_result)
137
-
145
+
138
146
  # Store in database
139
147
  with self.database.get_session() as session:
140
148
  # Check if pattern already exists
141
- existing_pattern = session.query(PatternCacheModel).filter(
142
- PatternCacheModel.semantic_fingerprint == fingerprint
143
- ).first()
144
-
149
+ existing_pattern = (
150
+ session.query(PatternCacheModel)
151
+ .filter(PatternCacheModel.semantic_fingerprint == fingerprint)
152
+ .first()
153
+ )
154
+
145
155
  if existing_pattern:
146
156
  # Update existing pattern with new data
147
157
  existing_pattern.hit_count += 1
148
158
  existing_pattern.last_used = datetime.utcnow()
149
-
159
+
150
160
  # Update confidence if new result is more confident
151
161
  if confidence_score > existing_pattern.confidence_score:
152
162
  existing_pattern.classification_result = classification_result
153
163
  existing_pattern.confidence_score = confidence_score
154
164
  existing_pattern.source_method = source_method
155
-
165
+
156
166
  # Update average processing time
157
167
  if processing_time_ms > 0:
158
168
  if existing_pattern.avg_processing_time_ms:
@@ -169,53 +179,53 @@ class PatternCache:
169
179
  classification_result=classification_result,
170
180
  confidence_score=confidence_score,
171
181
  source_method=source_method,
172
- avg_processing_time_ms=processing_time_ms
182
+ avg_processing_time_ms=processing_time_ms,
173
183
  )
174
184
  session.add(new_pattern)
175
185
  self.pattern_learning_count += 1
176
-
186
+
177
187
  session.commit()
178
-
188
+
179
189
  self.logger.debug(
180
190
  f"Stored pattern: {fingerprint[:8]} "
181
191
  f"(confidence: {confidence_score:.2f}, method: {source_method})"
182
192
  )
183
-
184
- def learn_from_results(self, results: List[QualitativeCommitData]) -> None:
193
+
194
+ def learn_from_results(self, results: list[QualitativeCommitData]) -> None:
185
195
  """Learn patterns from successful classification results.
186
-
196
+
187
197
  Args:
188
198
  results: List of classification results to learn from
189
199
  """
190
200
  learned_patterns = 0
191
-
201
+
192
202
  for result in results:
193
203
  if result.confidence_score >= 0.7: # Only learn from high-confidence results
194
204
  classification_data = {
195
- 'change_type': result.change_type,
196
- 'change_type_confidence': result.change_type_confidence,
197
- 'business_domain': result.business_domain,
198
- 'domain_confidence': result.domain_confidence,
199
- 'risk_level': result.risk_level,
200
- 'confidence_score': result.confidence_score
205
+ "change_type": result.change_type,
206
+ "change_type_confidence": result.change_type_confidence,
207
+ "business_domain": result.business_domain,
208
+ "domain_confidence": result.domain_confidence,
209
+ "risk_level": result.risk_level,
210
+ "confidence_score": result.confidence_score,
201
211
  }
202
-
212
+
203
213
  self.store_pattern(
204
214
  message=result.message,
205
215
  files=result.files_changed,
206
216
  classification_result=classification_data,
207
217
  confidence_score=result.confidence_score,
208
218
  source_method=result.processing_method,
209
- processing_time_ms=result.processing_time_ms
219
+ processing_time_ms=result.processing_time_ms,
210
220
  )
211
221
  learned_patterns += 1
212
-
222
+
213
223
  if learned_patterns > 0:
214
224
  self.logger.info(f"Learned {learned_patterns} new patterns from results")
215
-
216
- def _add_to_memory_cache(self, fingerprint: str, result: Dict[str, Any]) -> None:
225
+
226
+ def _add_to_memory_cache(self, fingerprint: str, result: dict[str, Any]) -> None:
217
227
  """Add result to in-memory cache with size management.
218
-
228
+
219
229
  Args:
220
230
  fingerprint: Pattern fingerprint
221
231
  result: Classification result
@@ -223,235 +233,247 @@ class PatternCache:
223
233
  # Manage memory cache size
224
234
  if len(self._memory_cache) >= self.config.semantic_cache_size:
225
235
  # Remove least recently used items
226
- sorted_items = sorted(
227
- self._memory_cache_hits.items(),
228
- key=lambda x: x[1]
229
- )
230
-
236
+ sorted_items = sorted(self._memory_cache_hits.items(), key=lambda x: x[1])
237
+
231
238
  # Remove bottom 20% of items
232
239
  items_to_remove = len(sorted_items) // 5
233
240
  for fingerprint_to_remove, _ in sorted_items[:items_to_remove]:
234
241
  self._memory_cache.pop(fingerprint_to_remove, None)
235
242
  self._memory_cache_hits.pop(fingerprint_to_remove, None)
236
-
243
+
237
244
  self._memory_cache[fingerprint] = result
238
245
  self._memory_cache_hits[fingerprint] = 1
239
-
240
- def cleanup_cache(self) -> Dict[str, int]:
246
+
247
+ def cleanup_cache(self) -> dict[str, int]:
241
248
  """Clean up expired and low-quality cache entries.
242
-
249
+
243
250
  Returns:
244
251
  Dictionary with cleanup statistics
245
252
  """
246
253
  stats = {
247
- 'expired_removed': 0,
248
- 'low_confidence_removed': 0,
249
- 'low_usage_removed': 0,
250
- 'total_remaining': 0
254
+ "expired_removed": 0,
255
+ "low_confidence_removed": 0,
256
+ "low_usage_removed": 0,
257
+ "total_remaining": 0,
251
258
  }
252
-
259
+
253
260
  cutoff_time = datetime.utcnow() - timedelta(hours=self.config.pattern_cache_ttl_hours)
254
-
261
+
255
262
  with self.database.get_session() as session:
256
263
  # Remove expired patterns
257
264
  expired_patterns = session.query(PatternCacheModel).filter(
258
265
  PatternCacheModel.created_at < cutoff_time
259
266
  )
260
- stats['expired_removed'] = expired_patterns.count()
267
+ stats["expired_removed"] = expired_patterns.count()
261
268
  expired_patterns.delete()
262
-
269
+
263
270
  # Remove very low confidence patterns (< 0.4)
264
271
  low_confidence_patterns = session.query(PatternCacheModel).filter(
265
272
  PatternCacheModel.confidence_score < 0.4
266
273
  )
267
- stats['low_confidence_removed'] = low_confidence_patterns.count()
274
+ stats["low_confidence_removed"] = low_confidence_patterns.count()
268
275
  low_confidence_patterns.delete()
269
-
276
+
270
277
  # Remove patterns with very low usage (hit_count = 1 and older than 7 days)
271
278
  week_ago = datetime.utcnow() - timedelta(days=7)
272
279
  low_usage_patterns = session.query(PatternCacheModel).filter(
273
- and_(
274
- PatternCacheModel.hit_count == 1,
275
- PatternCacheModel.created_at < week_ago
276
- )
280
+ and_(PatternCacheModel.hit_count == 1, PatternCacheModel.created_at < week_ago)
277
281
  )
278
- stats['low_usage_removed'] = low_usage_patterns.count()
282
+ stats["low_usage_removed"] = low_usage_patterns.count()
279
283
  low_usage_patterns.delete()
280
-
284
+
281
285
  # Count remaining patterns
282
- stats['total_remaining'] = session.query(PatternCacheModel).count()
283
-
286
+ stats["total_remaining"] = session.query(PatternCacheModel).count()
287
+
284
288
  session.commit()
285
-
289
+
286
290
  # Clear memory cache to force refresh
287
291
  self._memory_cache.clear()
288
292
  self._memory_cache_hits.clear()
289
-
293
+
290
294
  self.logger.info(
291
295
  f"Cache cleanup completed: {stats['expired_removed']} expired, "
292
296
  f"{stats['low_confidence_removed']} low-confidence, "
293
297
  f"{stats['low_usage_removed']} low-usage removed. "
294
298
  f"{stats['total_remaining']} patterns remaining."
295
299
  )
296
-
300
+
297
301
  return stats
298
-
299
- def get_cache_statistics(self) -> Dict[str, Any]:
302
+
303
+ def get_cache_statistics(self) -> dict[str, Any]:
300
304
  """Get comprehensive cache statistics.
301
-
305
+
302
306
  Returns:
303
307
  Dictionary with cache statistics
304
308
  """
305
309
  with self.database.get_session() as session:
306
310
  # Basic counts
307
311
  total_patterns = session.query(PatternCacheModel).count()
308
-
312
+
309
313
  # Method breakdown
310
- nlp_patterns = session.query(PatternCacheModel).filter(
311
- PatternCacheModel.source_method == 'nlp'
312
- ).count()
313
-
314
- llm_patterns = session.query(PatternCacheModel).filter(
315
- PatternCacheModel.source_method == 'llm'
316
- ).count()
317
-
314
+ nlp_patterns = (
315
+ session.query(PatternCacheModel)
316
+ .filter(PatternCacheModel.source_method == "nlp")
317
+ .count()
318
+ )
319
+
320
+ llm_patterns = (
321
+ session.query(PatternCacheModel)
322
+ .filter(PatternCacheModel.source_method == "llm")
323
+ .count()
324
+ )
325
+
318
326
  # Confidence distribution
319
- high_confidence = session.query(PatternCacheModel).filter(
320
- PatternCacheModel.confidence_score > 0.8
321
- ).count()
322
-
323
- medium_confidence = session.query(PatternCacheModel).filter(
324
- and_(
325
- PatternCacheModel.confidence_score >= 0.6,
326
- PatternCacheModel.confidence_score <= 0.8
327
+ high_confidence = (
328
+ session.query(PatternCacheModel)
329
+ .filter(PatternCacheModel.confidence_score > 0.8)
330
+ .count()
331
+ )
332
+
333
+ medium_confidence = (
334
+ session.query(PatternCacheModel)
335
+ .filter(
336
+ and_(
337
+ PatternCacheModel.confidence_score >= 0.6,
338
+ PatternCacheModel.confidence_score <= 0.8,
339
+ )
327
340
  )
328
- ).count()
329
-
341
+ .count()
342
+ )
343
+
330
344
  # Usage statistics
331
- top_patterns = session.query(PatternCacheModel).order_by(
332
- desc(PatternCacheModel.hit_count)
333
- ).limit(10).all()
334
-
345
+ top_patterns = (
346
+ session.query(PatternCacheModel)
347
+ .order_by(desc(PatternCacheModel.hit_count))
348
+ .limit(10)
349
+ .all()
350
+ )
351
+
335
352
  # Age statistics
336
353
  week_ago = datetime.utcnow() - timedelta(days=7)
337
- recent_patterns = session.query(PatternCacheModel).filter(
338
- PatternCacheModel.created_at > week_ago
339
- ).count()
340
-
354
+ recent_patterns = (
355
+ session.query(PatternCacheModel)
356
+ .filter(PatternCacheModel.created_at > week_ago)
357
+ .count()
358
+ )
359
+
341
360
  # Calculate hit rate
342
361
  total_requests = self.cache_hits + self.cache_misses
343
362
  hit_rate = (self.cache_hits / total_requests) if total_requests > 0 else 0.0
344
-
363
+
345
364
  return {
346
- 'total_patterns': total_patterns,
347
- 'method_breakdown': {
348
- 'nlp_patterns': nlp_patterns,
349
- 'llm_patterns': llm_patterns
365
+ "total_patterns": total_patterns,
366
+ "method_breakdown": {"nlp_patterns": nlp_patterns, "llm_patterns": llm_patterns},
367
+ "confidence_distribution": {
368
+ "high_confidence": high_confidence,
369
+ "medium_confidence": medium_confidence,
370
+ "low_confidence": total_patterns - high_confidence - medium_confidence,
350
371
  },
351
- 'confidence_distribution': {
352
- 'high_confidence': high_confidence,
353
- 'medium_confidence': medium_confidence,
354
- 'low_confidence': total_patterns - high_confidence - medium_confidence
372
+ "usage_stats": {
373
+ "cache_hits": self.cache_hits,
374
+ "cache_misses": self.cache_misses,
375
+ "hit_rate": hit_rate,
376
+ "memory_cache_size": len(self._memory_cache),
355
377
  },
356
- 'usage_stats': {
357
- 'cache_hits': self.cache_hits,
358
- 'cache_misses': self.cache_misses,
359
- 'hit_rate': hit_rate,
360
- 'memory_cache_size': len(self._memory_cache)
361
- },
362
- 'top_patterns': [
378
+ "top_patterns": [
363
379
  {
364
- 'fingerprint': p.semantic_fingerprint[:8],
365
- 'hit_count': p.hit_count,
366
- 'confidence': p.confidence_score,
367
- 'method': p.source_method
380
+ "fingerprint": p.semantic_fingerprint[:8],
381
+ "hit_count": p.hit_count,
382
+ "confidence": p.confidence_score,
383
+ "method": p.source_method,
368
384
  }
369
385
  for p in top_patterns
370
386
  ],
371
- 'recent_patterns': recent_patterns,
372
- 'patterns_learned': self.pattern_learning_count,
373
- 'config': {
374
- 'ttl_hours': self.config.pattern_cache_ttl_hours,
375
- 'max_memory_size': self.config.semantic_cache_size,
376
- 'learning_enabled': self.config.enable_pattern_learning
377
- }
387
+ "recent_patterns": recent_patterns,
388
+ "patterns_learned": self.pattern_learning_count,
389
+ "config": {
390
+ "ttl_hours": self.config.pattern_cache_ttl_hours,
391
+ "max_memory_size": self.config.semantic_cache_size,
392
+ "learning_enabled": self.config.enable_pattern_learning,
393
+ },
378
394
  }
379
-
380
- def optimize_cache(self) -> Dict[str, Any]:
395
+
396
+ def optimize_cache(self) -> dict[str, Any]:
381
397
  """Optimize cache for better performance.
382
-
398
+
383
399
  Returns:
384
400
  Dictionary with optimization results
385
401
  """
386
402
  optimization_stats = {}
387
-
403
+
388
404
  # Step 1: Cleanup expired and low-quality entries
389
405
  cleanup_stats = self.cleanup_cache()
390
- optimization_stats['cleanup'] = cleanup_stats
391
-
406
+ optimization_stats["cleanup"] = cleanup_stats
407
+
392
408
  # Step 2: Promote high-usage patterns to memory cache
393
409
  with self.database.get_session() as session:
394
410
  # Get top patterns by hit count
395
- popular_patterns = session.query(PatternCacheModel).filter(
396
- PatternCacheModel.hit_count >= 5
397
- ).order_by(desc(PatternCacheModel.hit_count)).limit(
398
- self.config.semantic_cache_size // 2 # Fill half of memory cache
399
- ).all()
400
-
411
+ popular_patterns = (
412
+ session.query(PatternCacheModel)
413
+ .filter(PatternCacheModel.hit_count >= 5)
414
+ .order_by(desc(PatternCacheModel.hit_count))
415
+ .limit(self.config.semantic_cache_size // 2) # Fill half of memory cache
416
+ .all()
417
+ )
418
+
401
419
  promoted_count = 0
402
420
  for pattern in popular_patterns:
403
421
  if pattern.semantic_fingerprint not in self._memory_cache:
404
422
  self._add_to_memory_cache(
405
- pattern.semantic_fingerprint,
406
- pattern.classification_result
423
+ pattern.semantic_fingerprint, pattern.classification_result
407
424
  )
408
425
  promoted_count += 1
409
-
410
- optimization_stats['promoted_to_memory'] = promoted_count
411
-
426
+
427
+ optimization_stats["promoted_to_memory"] = promoted_count
428
+
412
429
  # Step 3: Update learning threshold based on cache size
413
- total_patterns = cleanup_stats['total_remaining']
430
+ total_patterns = cleanup_stats["total_remaining"]
414
431
  if total_patterns > 1000:
415
432
  # Increase learning threshold for large caches
416
433
  self.config.learning_threshold = min(20, self.config.learning_threshold + 2)
417
434
  elif total_patterns < 100:
418
435
  # Decrease learning threshold for small caches
419
436
  self.config.learning_threshold = max(5, self.config.learning_threshold - 1)
420
-
421
- optimization_stats['learning_threshold'] = self.config.learning_threshold
422
-
437
+
438
+ optimization_stats["learning_threshold"] = self.config.learning_threshold
439
+
423
440
  self.logger.info(f"Cache optimization completed: {optimization_stats}")
424
441
  return optimization_stats
425
-
442
+
426
443
  def export_patterns(self, output_path: Path, min_confidence: float = 0.8) -> int:
427
444
  """Export high-quality patterns for analysis or backup.
428
-
445
+
429
446
  Args:
430
447
  output_path: Path to export file
431
448
  min_confidence: Minimum confidence threshold for export
432
-
449
+
433
450
  Returns:
434
451
  Number of patterns exported
435
452
  """
436
453
  with self.database.get_session() as session:
437
- patterns = session.query(PatternCacheModel).filter(
438
- PatternCacheModel.confidence_score >= min_confidence
439
- ).order_by(desc(PatternCacheModel.hit_count)).all()
440
-
454
+ patterns = (
455
+ session.query(PatternCacheModel)
456
+ .filter(PatternCacheModel.confidence_score >= min_confidence)
457
+ .order_by(desc(PatternCacheModel.hit_count))
458
+ .all()
459
+ )
460
+
441
461
  export_data = []
442
462
  for pattern in patterns:
443
- export_data.append({
444
- 'semantic_fingerprint': pattern.semantic_fingerprint,
445
- 'classification_result': pattern.classification_result,
446
- 'confidence_score': pattern.confidence_score,
447
- 'hit_count': pattern.hit_count,
448
- 'source_method': pattern.source_method,
449
- 'created_at': pattern.created_at.isoformat(),
450
- 'last_used': pattern.last_used.isoformat()
451
- })
452
-
453
- with open(output_path, 'w') as f:
463
+ export_data.append(
464
+ {
465
+ "semantic_fingerprint": pattern.semantic_fingerprint,
466
+ "classification_result": pattern.classification_result,
467
+ "confidence_score": pattern.confidence_score,
468
+ "hit_count": pattern.hit_count,
469
+ "source_method": pattern.source_method,
470
+ "created_at": pattern.created_at.isoformat(),
471
+ "last_used": pattern.last_used.isoformat(),
472
+ }
473
+ )
474
+
475
+ with open(output_path, "w") as f:
454
476
  json.dump(export_data, f, indent=2)
455
-
477
+
456
478
  self.logger.info(f"Exported {len(patterns)} patterns to {output_path}")
457
- return len(patterns)
479
+ return len(patterns)