gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4108 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -3,36 +3,35 @@
|
|
|
3
3
|
import hashlib
|
|
4
4
|
import json
|
|
5
5
|
import logging
|
|
6
|
-
import
|
|
6
|
+
from collections import defaultdict
|
|
7
7
|
from datetime import datetime, timedelta
|
|
8
8
|
from pathlib import Path
|
|
9
|
-
from typing import
|
|
10
|
-
from collections import defaultdict
|
|
9
|
+
from typing import Any, Optional
|
|
11
10
|
|
|
12
|
-
from sqlalchemy.orm import Session
|
|
13
11
|
from sqlalchemy import and_, desc
|
|
14
12
|
|
|
15
|
-
from ...models.database import Database
|
|
13
|
+
from ...models.database import Database
|
|
14
|
+
from ...models.database import PatternCache as PatternCacheModel
|
|
16
15
|
from ..models.schemas import CacheConfig, QualitativeCommitData
|
|
17
16
|
from ..utils.text_processing import TextProcessor
|
|
18
17
|
|
|
19
18
|
|
|
20
19
|
class PatternCache:
|
|
21
20
|
"""Intelligent caching system for qualitative analysis patterns.
|
|
22
|
-
|
|
21
|
+
|
|
23
22
|
This system learns from successful classifications to speed up future
|
|
24
23
|
processing and improve accuracy through pattern recognition.
|
|
25
|
-
|
|
24
|
+
|
|
26
25
|
Features:
|
|
27
26
|
- Semantic fingerprinting for pattern matching
|
|
28
27
|
- Hit count tracking for popular patterns
|
|
29
28
|
- Automatic cache cleanup and optimization
|
|
30
29
|
- Pattern learning from successful classifications
|
|
31
30
|
"""
|
|
32
|
-
|
|
31
|
+
|
|
33
32
|
def __init__(self, config: CacheConfig, database: Database):
|
|
34
33
|
"""Initialize pattern cache.
|
|
35
|
-
|
|
34
|
+
|
|
36
35
|
Args:
|
|
37
36
|
config: Cache configuration
|
|
38
37
|
database: Database instance for persistence
|
|
@@ -40,65 +39,69 @@ class PatternCache:
|
|
|
40
39
|
self.config = config
|
|
41
40
|
self.database = database
|
|
42
41
|
self.logger = logging.getLogger(__name__)
|
|
43
|
-
|
|
42
|
+
|
|
44
43
|
# Initialize text processor for fingerprinting
|
|
45
44
|
self.text_processor = TextProcessor()
|
|
46
|
-
|
|
45
|
+
|
|
47
46
|
# In-memory cache for frequently accessed patterns
|
|
48
|
-
self._memory_cache:
|
|
47
|
+
self._memory_cache: dict[str, dict[str, Any]] = {}
|
|
49
48
|
self._memory_cache_hits = defaultdict(int)
|
|
50
|
-
|
|
49
|
+
|
|
51
50
|
# Statistics
|
|
52
51
|
self.cache_hits = 0
|
|
53
52
|
self.cache_misses = 0
|
|
54
53
|
self.pattern_learning_count = 0
|
|
55
|
-
|
|
54
|
+
|
|
56
55
|
# Initialize cache directory if using file-based caching
|
|
57
56
|
cache_dir = Path(config.cache_dir)
|
|
58
57
|
cache_dir.mkdir(exist_ok=True, parents=True)
|
|
59
|
-
|
|
58
|
+
|
|
60
59
|
self.logger.info(f"Pattern cache initialized with TTL: {config.pattern_cache_ttl_hours}h")
|
|
61
|
-
|
|
62
|
-
def lookup_pattern(self, message: str, files:
|
|
60
|
+
|
|
61
|
+
def lookup_pattern(self, message: str, files: list[str]) -> Optional[dict[str, Any]]:
|
|
63
62
|
"""Look up cached classification for a commit pattern.
|
|
64
|
-
|
|
63
|
+
|
|
65
64
|
Args:
|
|
66
65
|
message: Commit message
|
|
67
66
|
files: List of changed files
|
|
68
|
-
|
|
67
|
+
|
|
69
68
|
Returns:
|
|
70
69
|
Cached classification result or None if not found
|
|
71
70
|
"""
|
|
72
71
|
# Generate semantic fingerprint
|
|
73
72
|
fingerprint = self.text_processor.create_semantic_fingerprint(message, files)
|
|
74
|
-
|
|
73
|
+
|
|
75
74
|
# Check in-memory cache first
|
|
76
75
|
if fingerprint in self._memory_cache:
|
|
77
76
|
self._memory_cache_hits[fingerprint] += 1
|
|
78
77
|
self.cache_hits += 1
|
|
79
78
|
self.logger.debug(f"Memory cache hit for pattern: {fingerprint[:8]}")
|
|
80
79
|
return self._memory_cache[fingerprint]
|
|
81
|
-
|
|
80
|
+
|
|
82
81
|
# Check database cache
|
|
83
82
|
with self.database.get_session() as session:
|
|
84
|
-
cached_pattern =
|
|
85
|
-
PatternCacheModel
|
|
86
|
-
|
|
87
|
-
|
|
83
|
+
cached_pattern = (
|
|
84
|
+
session.query(PatternCacheModel)
|
|
85
|
+
.filter(PatternCacheModel.semantic_fingerprint == fingerprint)
|
|
86
|
+
.first()
|
|
87
|
+
)
|
|
88
|
+
|
|
88
89
|
if cached_pattern:
|
|
89
90
|
# Check if pattern is still valid (not expired)
|
|
90
|
-
cutoff_time = datetime.utcnow() - timedelta(
|
|
91
|
-
|
|
91
|
+
cutoff_time = datetime.utcnow() - timedelta(
|
|
92
|
+
hours=self.config.pattern_cache_ttl_hours
|
|
93
|
+
)
|
|
94
|
+
|
|
92
95
|
if cached_pattern.created_at > cutoff_time:
|
|
93
96
|
# Update hit count and last used
|
|
94
97
|
cached_pattern.hit_count += 1
|
|
95
98
|
cached_pattern.last_used = datetime.utcnow()
|
|
96
99
|
session.commit()
|
|
97
|
-
|
|
100
|
+
|
|
98
101
|
# Add to memory cache for faster future access
|
|
99
102
|
result = cached_pattern.classification_result
|
|
100
103
|
self._add_to_memory_cache(fingerprint, result)
|
|
101
|
-
|
|
104
|
+
|
|
102
105
|
self.cache_hits += 1
|
|
103
106
|
self.logger.debug(f"Database cache hit for pattern: {fingerprint[:8]}")
|
|
104
107
|
return result
|
|
@@ -107,16 +110,21 @@ class PatternCache:
|
|
|
107
110
|
session.delete(cached_pattern)
|
|
108
111
|
session.commit()
|
|
109
112
|
self.logger.debug(f"Expired pattern removed: {fingerprint[:8]}")
|
|
110
|
-
|
|
113
|
+
|
|
111
114
|
self.cache_misses += 1
|
|
112
115
|
return None
|
|
113
|
-
|
|
114
|
-
def store_pattern(
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
116
|
+
|
|
117
|
+
def store_pattern(
|
|
118
|
+
self,
|
|
119
|
+
message: str,
|
|
120
|
+
files: list[str],
|
|
121
|
+
classification_result: dict[str, Any],
|
|
122
|
+
confidence_score: float,
|
|
123
|
+
source_method: str,
|
|
124
|
+
processing_time_ms: float = 0.0,
|
|
125
|
+
) -> None:
|
|
118
126
|
"""Store a new pattern in the cache.
|
|
119
|
-
|
|
127
|
+
|
|
120
128
|
Args:
|
|
121
129
|
message: Commit message
|
|
122
130
|
files: List of changed files
|
|
@@ -128,31 +136,33 @@ class PatternCache:
|
|
|
128
136
|
# Only cache high-confidence results
|
|
129
137
|
if confidence_score < 0.6:
|
|
130
138
|
return
|
|
131
|
-
|
|
139
|
+
|
|
132
140
|
fingerprint = self.text_processor.create_semantic_fingerprint(message, files)
|
|
133
141
|
message_hash = hashlib.md5(message.encode()).hexdigest()
|
|
134
|
-
|
|
142
|
+
|
|
135
143
|
# Add to memory cache
|
|
136
144
|
self._add_to_memory_cache(fingerprint, classification_result)
|
|
137
|
-
|
|
145
|
+
|
|
138
146
|
# Store in database
|
|
139
147
|
with self.database.get_session() as session:
|
|
140
148
|
# Check if pattern already exists
|
|
141
|
-
existing_pattern =
|
|
142
|
-
PatternCacheModel
|
|
143
|
-
|
|
144
|
-
|
|
149
|
+
existing_pattern = (
|
|
150
|
+
session.query(PatternCacheModel)
|
|
151
|
+
.filter(PatternCacheModel.semantic_fingerprint == fingerprint)
|
|
152
|
+
.first()
|
|
153
|
+
)
|
|
154
|
+
|
|
145
155
|
if existing_pattern:
|
|
146
156
|
# Update existing pattern with new data
|
|
147
157
|
existing_pattern.hit_count += 1
|
|
148
158
|
existing_pattern.last_used = datetime.utcnow()
|
|
149
|
-
|
|
159
|
+
|
|
150
160
|
# Update confidence if new result is more confident
|
|
151
161
|
if confidence_score > existing_pattern.confidence_score:
|
|
152
162
|
existing_pattern.classification_result = classification_result
|
|
153
163
|
existing_pattern.confidence_score = confidence_score
|
|
154
164
|
existing_pattern.source_method = source_method
|
|
155
|
-
|
|
165
|
+
|
|
156
166
|
# Update average processing time
|
|
157
167
|
if processing_time_ms > 0:
|
|
158
168
|
if existing_pattern.avg_processing_time_ms:
|
|
@@ -169,53 +179,53 @@ class PatternCache:
|
|
|
169
179
|
classification_result=classification_result,
|
|
170
180
|
confidence_score=confidence_score,
|
|
171
181
|
source_method=source_method,
|
|
172
|
-
avg_processing_time_ms=processing_time_ms
|
|
182
|
+
avg_processing_time_ms=processing_time_ms,
|
|
173
183
|
)
|
|
174
184
|
session.add(new_pattern)
|
|
175
185
|
self.pattern_learning_count += 1
|
|
176
|
-
|
|
186
|
+
|
|
177
187
|
session.commit()
|
|
178
|
-
|
|
188
|
+
|
|
179
189
|
self.logger.debug(
|
|
180
190
|
f"Stored pattern: {fingerprint[:8]} "
|
|
181
191
|
f"(confidence: {confidence_score:.2f}, method: {source_method})"
|
|
182
192
|
)
|
|
183
|
-
|
|
184
|
-
def learn_from_results(self, results:
|
|
193
|
+
|
|
194
|
+
def learn_from_results(self, results: list[QualitativeCommitData]) -> None:
|
|
185
195
|
"""Learn patterns from successful classification results.
|
|
186
|
-
|
|
196
|
+
|
|
187
197
|
Args:
|
|
188
198
|
results: List of classification results to learn from
|
|
189
199
|
"""
|
|
190
200
|
learned_patterns = 0
|
|
191
|
-
|
|
201
|
+
|
|
192
202
|
for result in results:
|
|
193
203
|
if result.confidence_score >= 0.7: # Only learn from high-confidence results
|
|
194
204
|
classification_data = {
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
205
|
+
"change_type": result.change_type,
|
|
206
|
+
"change_type_confidence": result.change_type_confidence,
|
|
207
|
+
"business_domain": result.business_domain,
|
|
208
|
+
"domain_confidence": result.domain_confidence,
|
|
209
|
+
"risk_level": result.risk_level,
|
|
210
|
+
"confidence_score": result.confidence_score,
|
|
201
211
|
}
|
|
202
|
-
|
|
212
|
+
|
|
203
213
|
self.store_pattern(
|
|
204
214
|
message=result.message,
|
|
205
215
|
files=result.files_changed,
|
|
206
216
|
classification_result=classification_data,
|
|
207
217
|
confidence_score=result.confidence_score,
|
|
208
218
|
source_method=result.processing_method,
|
|
209
|
-
processing_time_ms=result.processing_time_ms
|
|
219
|
+
processing_time_ms=result.processing_time_ms,
|
|
210
220
|
)
|
|
211
221
|
learned_patterns += 1
|
|
212
|
-
|
|
222
|
+
|
|
213
223
|
if learned_patterns > 0:
|
|
214
224
|
self.logger.info(f"Learned {learned_patterns} new patterns from results")
|
|
215
|
-
|
|
216
|
-
def _add_to_memory_cache(self, fingerprint: str, result:
|
|
225
|
+
|
|
226
|
+
def _add_to_memory_cache(self, fingerprint: str, result: dict[str, Any]) -> None:
|
|
217
227
|
"""Add result to in-memory cache with size management.
|
|
218
|
-
|
|
228
|
+
|
|
219
229
|
Args:
|
|
220
230
|
fingerprint: Pattern fingerprint
|
|
221
231
|
result: Classification result
|
|
@@ -223,235 +233,247 @@ class PatternCache:
|
|
|
223
233
|
# Manage memory cache size
|
|
224
234
|
if len(self._memory_cache) >= self.config.semantic_cache_size:
|
|
225
235
|
# Remove least recently used items
|
|
226
|
-
sorted_items = sorted(
|
|
227
|
-
|
|
228
|
-
key=lambda x: x[1]
|
|
229
|
-
)
|
|
230
|
-
|
|
236
|
+
sorted_items = sorted(self._memory_cache_hits.items(), key=lambda x: x[1])
|
|
237
|
+
|
|
231
238
|
# Remove bottom 20% of items
|
|
232
239
|
items_to_remove = len(sorted_items) // 5
|
|
233
240
|
for fingerprint_to_remove, _ in sorted_items[:items_to_remove]:
|
|
234
241
|
self._memory_cache.pop(fingerprint_to_remove, None)
|
|
235
242
|
self._memory_cache_hits.pop(fingerprint_to_remove, None)
|
|
236
|
-
|
|
243
|
+
|
|
237
244
|
self._memory_cache[fingerprint] = result
|
|
238
245
|
self._memory_cache_hits[fingerprint] = 1
|
|
239
|
-
|
|
240
|
-
def cleanup_cache(self) ->
|
|
246
|
+
|
|
247
|
+
def cleanup_cache(self) -> dict[str, int]:
|
|
241
248
|
"""Clean up expired and low-quality cache entries.
|
|
242
|
-
|
|
249
|
+
|
|
243
250
|
Returns:
|
|
244
251
|
Dictionary with cleanup statistics
|
|
245
252
|
"""
|
|
246
253
|
stats = {
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
254
|
+
"expired_removed": 0,
|
|
255
|
+
"low_confidence_removed": 0,
|
|
256
|
+
"low_usage_removed": 0,
|
|
257
|
+
"total_remaining": 0,
|
|
251
258
|
}
|
|
252
|
-
|
|
259
|
+
|
|
253
260
|
cutoff_time = datetime.utcnow() - timedelta(hours=self.config.pattern_cache_ttl_hours)
|
|
254
|
-
|
|
261
|
+
|
|
255
262
|
with self.database.get_session() as session:
|
|
256
263
|
# Remove expired patterns
|
|
257
264
|
expired_patterns = session.query(PatternCacheModel).filter(
|
|
258
265
|
PatternCacheModel.created_at < cutoff_time
|
|
259
266
|
)
|
|
260
|
-
stats[
|
|
267
|
+
stats["expired_removed"] = expired_patterns.count()
|
|
261
268
|
expired_patterns.delete()
|
|
262
|
-
|
|
269
|
+
|
|
263
270
|
# Remove very low confidence patterns (< 0.4)
|
|
264
271
|
low_confidence_patterns = session.query(PatternCacheModel).filter(
|
|
265
272
|
PatternCacheModel.confidence_score < 0.4
|
|
266
273
|
)
|
|
267
|
-
stats[
|
|
274
|
+
stats["low_confidence_removed"] = low_confidence_patterns.count()
|
|
268
275
|
low_confidence_patterns.delete()
|
|
269
|
-
|
|
276
|
+
|
|
270
277
|
# Remove patterns with very low usage (hit_count = 1 and older than 7 days)
|
|
271
278
|
week_ago = datetime.utcnow() - timedelta(days=7)
|
|
272
279
|
low_usage_patterns = session.query(PatternCacheModel).filter(
|
|
273
|
-
and_(
|
|
274
|
-
PatternCacheModel.hit_count == 1,
|
|
275
|
-
PatternCacheModel.created_at < week_ago
|
|
276
|
-
)
|
|
280
|
+
and_(PatternCacheModel.hit_count == 1, PatternCacheModel.created_at < week_ago)
|
|
277
281
|
)
|
|
278
|
-
stats[
|
|
282
|
+
stats["low_usage_removed"] = low_usage_patterns.count()
|
|
279
283
|
low_usage_patterns.delete()
|
|
280
|
-
|
|
284
|
+
|
|
281
285
|
# Count remaining patterns
|
|
282
|
-
stats[
|
|
283
|
-
|
|
286
|
+
stats["total_remaining"] = session.query(PatternCacheModel).count()
|
|
287
|
+
|
|
284
288
|
session.commit()
|
|
285
|
-
|
|
289
|
+
|
|
286
290
|
# Clear memory cache to force refresh
|
|
287
291
|
self._memory_cache.clear()
|
|
288
292
|
self._memory_cache_hits.clear()
|
|
289
|
-
|
|
293
|
+
|
|
290
294
|
self.logger.info(
|
|
291
295
|
f"Cache cleanup completed: {stats['expired_removed']} expired, "
|
|
292
296
|
f"{stats['low_confidence_removed']} low-confidence, "
|
|
293
297
|
f"{stats['low_usage_removed']} low-usage removed. "
|
|
294
298
|
f"{stats['total_remaining']} patterns remaining."
|
|
295
299
|
)
|
|
296
|
-
|
|
300
|
+
|
|
297
301
|
return stats
|
|
298
|
-
|
|
299
|
-
def get_cache_statistics(self) ->
|
|
302
|
+
|
|
303
|
+
def get_cache_statistics(self) -> dict[str, Any]:
|
|
300
304
|
"""Get comprehensive cache statistics.
|
|
301
|
-
|
|
305
|
+
|
|
302
306
|
Returns:
|
|
303
307
|
Dictionary with cache statistics
|
|
304
308
|
"""
|
|
305
309
|
with self.database.get_session() as session:
|
|
306
310
|
# Basic counts
|
|
307
311
|
total_patterns = session.query(PatternCacheModel).count()
|
|
308
|
-
|
|
312
|
+
|
|
309
313
|
# Method breakdown
|
|
310
|
-
nlp_patterns =
|
|
311
|
-
PatternCacheModel
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
314
|
+
nlp_patterns = (
|
|
315
|
+
session.query(PatternCacheModel)
|
|
316
|
+
.filter(PatternCacheModel.source_method == "nlp")
|
|
317
|
+
.count()
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
llm_patterns = (
|
|
321
|
+
session.query(PatternCacheModel)
|
|
322
|
+
.filter(PatternCacheModel.source_method == "llm")
|
|
323
|
+
.count()
|
|
324
|
+
)
|
|
325
|
+
|
|
318
326
|
# Confidence distribution
|
|
319
|
-
high_confidence =
|
|
320
|
-
PatternCacheModel
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
+
high_confidence = (
|
|
328
|
+
session.query(PatternCacheModel)
|
|
329
|
+
.filter(PatternCacheModel.confidence_score > 0.8)
|
|
330
|
+
.count()
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
medium_confidence = (
|
|
334
|
+
session.query(PatternCacheModel)
|
|
335
|
+
.filter(
|
|
336
|
+
and_(
|
|
337
|
+
PatternCacheModel.confidence_score >= 0.6,
|
|
338
|
+
PatternCacheModel.confidence_score <= 0.8,
|
|
339
|
+
)
|
|
327
340
|
)
|
|
328
|
-
|
|
329
|
-
|
|
341
|
+
.count()
|
|
342
|
+
)
|
|
343
|
+
|
|
330
344
|
# Usage statistics
|
|
331
|
-
top_patterns =
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
345
|
+
top_patterns = (
|
|
346
|
+
session.query(PatternCacheModel)
|
|
347
|
+
.order_by(desc(PatternCacheModel.hit_count))
|
|
348
|
+
.limit(10)
|
|
349
|
+
.all()
|
|
350
|
+
)
|
|
351
|
+
|
|
335
352
|
# Age statistics
|
|
336
353
|
week_ago = datetime.utcnow() - timedelta(days=7)
|
|
337
|
-
recent_patterns =
|
|
338
|
-
PatternCacheModel
|
|
339
|
-
|
|
340
|
-
|
|
354
|
+
recent_patterns = (
|
|
355
|
+
session.query(PatternCacheModel)
|
|
356
|
+
.filter(PatternCacheModel.created_at > week_ago)
|
|
357
|
+
.count()
|
|
358
|
+
)
|
|
359
|
+
|
|
341
360
|
# Calculate hit rate
|
|
342
361
|
total_requests = self.cache_hits + self.cache_misses
|
|
343
362
|
hit_rate = (self.cache_hits / total_requests) if total_requests > 0 else 0.0
|
|
344
|
-
|
|
363
|
+
|
|
345
364
|
return {
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
365
|
+
"total_patterns": total_patterns,
|
|
366
|
+
"method_breakdown": {"nlp_patterns": nlp_patterns, "llm_patterns": llm_patterns},
|
|
367
|
+
"confidence_distribution": {
|
|
368
|
+
"high_confidence": high_confidence,
|
|
369
|
+
"medium_confidence": medium_confidence,
|
|
370
|
+
"low_confidence": total_patterns - high_confidence - medium_confidence,
|
|
350
371
|
},
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
372
|
+
"usage_stats": {
|
|
373
|
+
"cache_hits": self.cache_hits,
|
|
374
|
+
"cache_misses": self.cache_misses,
|
|
375
|
+
"hit_rate": hit_rate,
|
|
376
|
+
"memory_cache_size": len(self._memory_cache),
|
|
355
377
|
},
|
|
356
|
-
|
|
357
|
-
'cache_hits': self.cache_hits,
|
|
358
|
-
'cache_misses': self.cache_misses,
|
|
359
|
-
'hit_rate': hit_rate,
|
|
360
|
-
'memory_cache_size': len(self._memory_cache)
|
|
361
|
-
},
|
|
362
|
-
'top_patterns': [
|
|
378
|
+
"top_patterns": [
|
|
363
379
|
{
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
380
|
+
"fingerprint": p.semantic_fingerprint[:8],
|
|
381
|
+
"hit_count": p.hit_count,
|
|
382
|
+
"confidence": p.confidence_score,
|
|
383
|
+
"method": p.source_method,
|
|
368
384
|
}
|
|
369
385
|
for p in top_patterns
|
|
370
386
|
],
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
}
|
|
387
|
+
"recent_patterns": recent_patterns,
|
|
388
|
+
"patterns_learned": self.pattern_learning_count,
|
|
389
|
+
"config": {
|
|
390
|
+
"ttl_hours": self.config.pattern_cache_ttl_hours,
|
|
391
|
+
"max_memory_size": self.config.semantic_cache_size,
|
|
392
|
+
"learning_enabled": self.config.enable_pattern_learning,
|
|
393
|
+
},
|
|
378
394
|
}
|
|
379
|
-
|
|
380
|
-
def optimize_cache(self) ->
|
|
395
|
+
|
|
396
|
+
def optimize_cache(self) -> dict[str, Any]:
|
|
381
397
|
"""Optimize cache for better performance.
|
|
382
|
-
|
|
398
|
+
|
|
383
399
|
Returns:
|
|
384
400
|
Dictionary with optimization results
|
|
385
401
|
"""
|
|
386
402
|
optimization_stats = {}
|
|
387
|
-
|
|
403
|
+
|
|
388
404
|
# Step 1: Cleanup expired and low-quality entries
|
|
389
405
|
cleanup_stats = self.cleanup_cache()
|
|
390
|
-
optimization_stats[
|
|
391
|
-
|
|
406
|
+
optimization_stats["cleanup"] = cleanup_stats
|
|
407
|
+
|
|
392
408
|
# Step 2: Promote high-usage patterns to memory cache
|
|
393
409
|
with self.database.get_session() as session:
|
|
394
410
|
# Get top patterns by hit count
|
|
395
|
-
popular_patterns =
|
|
396
|
-
PatternCacheModel
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
411
|
+
popular_patterns = (
|
|
412
|
+
session.query(PatternCacheModel)
|
|
413
|
+
.filter(PatternCacheModel.hit_count >= 5)
|
|
414
|
+
.order_by(desc(PatternCacheModel.hit_count))
|
|
415
|
+
.limit(self.config.semantic_cache_size // 2) # Fill half of memory cache
|
|
416
|
+
.all()
|
|
417
|
+
)
|
|
418
|
+
|
|
401
419
|
promoted_count = 0
|
|
402
420
|
for pattern in popular_patterns:
|
|
403
421
|
if pattern.semantic_fingerprint not in self._memory_cache:
|
|
404
422
|
self._add_to_memory_cache(
|
|
405
|
-
pattern.semantic_fingerprint,
|
|
406
|
-
pattern.classification_result
|
|
423
|
+
pattern.semantic_fingerprint, pattern.classification_result
|
|
407
424
|
)
|
|
408
425
|
promoted_count += 1
|
|
409
|
-
|
|
410
|
-
optimization_stats[
|
|
411
|
-
|
|
426
|
+
|
|
427
|
+
optimization_stats["promoted_to_memory"] = promoted_count
|
|
428
|
+
|
|
412
429
|
# Step 3: Update learning threshold based on cache size
|
|
413
|
-
total_patterns = cleanup_stats[
|
|
430
|
+
total_patterns = cleanup_stats["total_remaining"]
|
|
414
431
|
if total_patterns > 1000:
|
|
415
432
|
# Increase learning threshold for large caches
|
|
416
433
|
self.config.learning_threshold = min(20, self.config.learning_threshold + 2)
|
|
417
434
|
elif total_patterns < 100:
|
|
418
435
|
# Decrease learning threshold for small caches
|
|
419
436
|
self.config.learning_threshold = max(5, self.config.learning_threshold - 1)
|
|
420
|
-
|
|
421
|
-
optimization_stats[
|
|
422
|
-
|
|
437
|
+
|
|
438
|
+
optimization_stats["learning_threshold"] = self.config.learning_threshold
|
|
439
|
+
|
|
423
440
|
self.logger.info(f"Cache optimization completed: {optimization_stats}")
|
|
424
441
|
return optimization_stats
|
|
425
|
-
|
|
442
|
+
|
|
426
443
|
def export_patterns(self, output_path: Path, min_confidence: float = 0.8) -> int:
|
|
427
444
|
"""Export high-quality patterns for analysis or backup.
|
|
428
|
-
|
|
445
|
+
|
|
429
446
|
Args:
|
|
430
447
|
output_path: Path to export file
|
|
431
448
|
min_confidence: Minimum confidence threshold for export
|
|
432
|
-
|
|
449
|
+
|
|
433
450
|
Returns:
|
|
434
451
|
Number of patterns exported
|
|
435
452
|
"""
|
|
436
453
|
with self.database.get_session() as session:
|
|
437
|
-
patterns =
|
|
438
|
-
PatternCacheModel
|
|
439
|
-
|
|
440
|
-
|
|
454
|
+
patterns = (
|
|
455
|
+
session.query(PatternCacheModel)
|
|
456
|
+
.filter(PatternCacheModel.confidence_score >= min_confidence)
|
|
457
|
+
.order_by(desc(PatternCacheModel.hit_count))
|
|
458
|
+
.all()
|
|
459
|
+
)
|
|
460
|
+
|
|
441
461
|
export_data = []
|
|
442
462
|
for pattern in patterns:
|
|
443
|
-
export_data.append(
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
463
|
+
export_data.append(
|
|
464
|
+
{
|
|
465
|
+
"semantic_fingerprint": pattern.semantic_fingerprint,
|
|
466
|
+
"classification_result": pattern.classification_result,
|
|
467
|
+
"confidence_score": pattern.confidence_score,
|
|
468
|
+
"hit_count": pattern.hit_count,
|
|
469
|
+
"source_method": pattern.source_method,
|
|
470
|
+
"created_at": pattern.created_at.isoformat(),
|
|
471
|
+
"last_used": pattern.last_used.isoformat(),
|
|
472
|
+
}
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
with open(output_path, "w") as f:
|
|
454
476
|
json.dump(export_data, f, indent=2)
|
|
455
|
-
|
|
477
|
+
|
|
456
478
|
self.logger.info(f"Exported {len(patterns)} patterns to {output_path}")
|
|
457
|
-
return len(patterns)
|
|
479
|
+
return len(patterns)
|