gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +11 -11
- gitflow_analytics/_version.py +2 -2
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4490 -378
- gitflow_analytics/cli_rich.py +503 -0
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -398
- gitflow_analytics/core/analyzer.py +1320 -172
- gitflow_analytics/core/branch_mapper.py +132 -132
- gitflow_analytics/core/cache.py +1554 -175
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +571 -185
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/base.py +13 -11
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +77 -59
- gitflow_analytics/extractors/tickets.py +841 -89
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +258 -87
- gitflow_analytics/integrations/jira_integration.py +572 -123
- gitflow_analytics/integrations/orchestrator.py +206 -82
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +542 -179
- gitflow_analytics/models/database.py +986 -59
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +29 -0
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
- gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
- gitflow_analytics/qualitative/core/__init__.py +13 -0
- gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
- gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
- gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
- gitflow_analytics/qualitative/core/processor.py +673 -0
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +25 -0
- gitflow_analytics/qualitative/models/schemas.py +306 -0
- gitflow_analytics/qualitative/utils/__init__.py +13 -0
- gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
- gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
- gitflow_analytics/qualitative/utils/metrics.py +361 -0
- gitflow_analytics/qualitative/utils/text_processing.py +285 -0
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +550 -18
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1700 -216
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2289 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +5 -0
- gitflow_analytics/tui/app.py +724 -0
- gitflow_analytics/tui/screens/__init__.py +8 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
- gitflow_analytics/tui/screens/configuration_screen.py +523 -0
- gitflow_analytics/tui/screens/loading_screen.py +348 -0
- gitflow_analytics/tui/screens/main_screen.py +321 -0
- gitflow_analytics/tui/screens/results_screen.py +722 -0
- gitflow_analytics/tui/widgets/__init__.py +7 -0
- gitflow_analytics/tui/widgets/data_table.py +255 -0
- gitflow_analytics/tui/widgets/export_modal.py +301 -0
- gitflow_analytics/tui/widgets/progress_widget.py +187 -0
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
- gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
"""Centralized progress reporting service for GitFlow Analytics.
|
|
2
|
+
|
|
3
|
+
This module provides a unified interface for progress reporting across the application,
|
|
4
|
+
replacing scattered tqdm usage with a centralized, testable, and configurable service.
|
|
5
|
+
|
|
6
|
+
WHY: Progress reporting was scattered across multiple modules (analyzer.py, data_fetcher.py,
|
|
7
|
+
batch_classifier.py, etc.), violating DRY principles and making it difficult to maintain
|
|
8
|
+
consistent progress UX. This service centralizes all progress management.
|
|
9
|
+
|
|
10
|
+
DESIGN DECISIONS:
|
|
11
|
+
- Context-based API: Each progress bar gets a context object for clean lifecycle management
|
|
12
|
+
- Thread-safe: Uses threading locks to ensure safe concurrent access
|
|
13
|
+
- Testable: Can be globally disabled for testing, with event capture capability
|
|
14
|
+
- Nested support: Handles nested progress contexts with proper positioning
|
|
15
|
+
- Consistent styling: All progress bars follow the same formatting rules
|
|
16
|
+
|
|
17
|
+
USAGE:
|
|
18
|
+
from gitflow_analytics.core.progress import get_progress_service
|
|
19
|
+
|
|
20
|
+
progress = get_progress_service()
|
|
21
|
+
context = progress.create_progress(100, "Processing items")
|
|
22
|
+
for item in items:
|
|
23
|
+
# Process item
|
|
24
|
+
progress.update(context)
|
|
25
|
+
progress.complete(context)
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
import os
|
|
29
|
+
import sys
|
|
30
|
+
import threading
|
|
31
|
+
from contextlib import contextmanager
|
|
32
|
+
from dataclasses import dataclass
|
|
33
|
+
from typing import Any, Optional
|
|
34
|
+
|
|
35
|
+
from tqdm import tqdm
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class ProgressContext:
|
|
40
|
+
"""Context object for a single progress operation.
|
|
41
|
+
|
|
42
|
+
Encapsulates all state for a progress bar, allowing clean lifecycle management
|
|
43
|
+
and preventing resource leaks.
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
progress_bar: Optional[Any] # tqdm instance or None if disabled
|
|
47
|
+
description: str
|
|
48
|
+
total: int
|
|
49
|
+
unit: str
|
|
50
|
+
position: int
|
|
51
|
+
current: int = 0
|
|
52
|
+
is_nested: bool = False
|
|
53
|
+
parent_context: Optional["ProgressContext"] = None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class ProgressEvent:
|
|
58
|
+
"""Event captured during progress operations for testing.
|
|
59
|
+
|
|
60
|
+
Allows tests to verify that progress operations occurred without
|
|
61
|
+
actually displaying progress bars.
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
event_type: str # 'create', 'update', 'complete'
|
|
65
|
+
description: str
|
|
66
|
+
total: Optional[int] = None
|
|
67
|
+
increment: Optional[int] = None
|
|
68
|
+
current: Optional[int] = None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class ProgressService:
|
|
72
|
+
"""Centralized service for managing progress reporting.
|
|
73
|
+
|
|
74
|
+
This service provides a unified interface for creating and managing progress bars
|
|
75
|
+
throughout the application. It supports nested progress contexts, global disable
|
|
76
|
+
for testing, and event capture for verification.
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
def __init__(self):
|
|
80
|
+
"""Initialize the progress service."""
|
|
81
|
+
self._enabled = True
|
|
82
|
+
self._lock = threading.Lock()
|
|
83
|
+
self._active_contexts: list[ProgressContext] = []
|
|
84
|
+
self._position_counter = 0
|
|
85
|
+
self._capture_events = False
|
|
86
|
+
self._captured_events: list[ProgressEvent] = []
|
|
87
|
+
|
|
88
|
+
# Check environment for testing mode
|
|
89
|
+
self._check_testing_environment()
|
|
90
|
+
|
|
91
|
+
def _check_testing_environment(self):
|
|
92
|
+
"""Check if running in a testing environment and disable if needed.
|
|
93
|
+
|
|
94
|
+
WHY: Progress bars interfere with test output and can cause issues in CI/CD.
|
|
95
|
+
This automatically detects common testing scenarios and disables progress.
|
|
96
|
+
"""
|
|
97
|
+
# Disable in pytest
|
|
98
|
+
if "pytest" in sys.modules:
|
|
99
|
+
self._enabled = False
|
|
100
|
+
|
|
101
|
+
# Disable if explicitly requested via environment
|
|
102
|
+
if os.environ.get("GITFLOW_DISABLE_PROGRESS", "").lower() in ("1", "true", "yes"):
|
|
103
|
+
self._enabled = False
|
|
104
|
+
|
|
105
|
+
# Disable if not in a TTY (e.g., CI/CD, piped output)
|
|
106
|
+
if not sys.stdout.isatty():
|
|
107
|
+
self._enabled = False
|
|
108
|
+
|
|
109
|
+
def create_progress(
|
|
110
|
+
self,
|
|
111
|
+
total: int,
|
|
112
|
+
description: str,
|
|
113
|
+
unit: str = "items",
|
|
114
|
+
nested: bool = False,
|
|
115
|
+
leave: bool = True,
|
|
116
|
+
position: Optional[int] = None,
|
|
117
|
+
) -> ProgressContext:
|
|
118
|
+
"""Create a new progress context.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
total: Total number of items to process
|
|
122
|
+
description: Description shown next to the progress bar
|
|
123
|
+
unit: Unit label for items (e.g., "commits", "repos", "files")
|
|
124
|
+
nested: Whether this is a nested progress bar
|
|
125
|
+
leave: Whether to leave the progress bar on screen after completion
|
|
126
|
+
position: Explicit position for the progress bar (for nested contexts)
|
|
127
|
+
|
|
128
|
+
Returns:
|
|
129
|
+
ProgressContext object to use for updates
|
|
130
|
+
|
|
131
|
+
DESIGN: Returns a context object rather than the tqdm instance directly
|
|
132
|
+
to provide better lifecycle management and prevent resource leaks.
|
|
133
|
+
"""
|
|
134
|
+
with self._lock:
|
|
135
|
+
# Capture event if needed
|
|
136
|
+
if self._capture_events:
|
|
137
|
+
self._captured_events.append(ProgressEvent("create", description, total=total))
|
|
138
|
+
|
|
139
|
+
# Determine position for nested progress bars
|
|
140
|
+
if position is None:
|
|
141
|
+
if nested:
|
|
142
|
+
self._position_counter += 1
|
|
143
|
+
position = self._position_counter
|
|
144
|
+
|
|
145
|
+
# Create context
|
|
146
|
+
context = ProgressContext(
|
|
147
|
+
progress_bar=None,
|
|
148
|
+
description=description,
|
|
149
|
+
total=total,
|
|
150
|
+
unit=unit,
|
|
151
|
+
position=position,
|
|
152
|
+
is_nested=nested,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Create actual progress bar if enabled
|
|
156
|
+
if self._enabled:
|
|
157
|
+
context.progress_bar = tqdm(
|
|
158
|
+
total=total,
|
|
159
|
+
desc=description,
|
|
160
|
+
unit=unit,
|
|
161
|
+
position=position,
|
|
162
|
+
leave=leave,
|
|
163
|
+
# Consistent styling
|
|
164
|
+
bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]",
|
|
165
|
+
dynamic_ncols=True,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
self._active_contexts.append(context)
|
|
169
|
+
return context
|
|
170
|
+
|
|
171
|
+
def update(
|
|
172
|
+
self, context: ProgressContext, increment: int = 1, description: Optional[str] = None
|
|
173
|
+
):
|
|
174
|
+
"""Update progress for a given context.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
context: The progress context to update
|
|
178
|
+
increment: Number of items completed (default: 1)
|
|
179
|
+
description: Optional new description to set
|
|
180
|
+
|
|
181
|
+
WHY: Centralizes update logic and ensures consistent behavior across
|
|
182
|
+
all progress bars in the application.
|
|
183
|
+
"""
|
|
184
|
+
with self._lock:
|
|
185
|
+
context.current += increment
|
|
186
|
+
|
|
187
|
+
# Capture event if needed
|
|
188
|
+
if self._capture_events:
|
|
189
|
+
self._captured_events.append(
|
|
190
|
+
ProgressEvent(
|
|
191
|
+
"update",
|
|
192
|
+
description or context.description,
|
|
193
|
+
increment=increment,
|
|
194
|
+
current=context.current,
|
|
195
|
+
)
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# Update actual progress bar if it exists
|
|
199
|
+
if context.progress_bar:
|
|
200
|
+
context.progress_bar.update(increment)
|
|
201
|
+
if description:
|
|
202
|
+
context.progress_bar.set_description(description)
|
|
203
|
+
|
|
204
|
+
def set_description(self, context: ProgressContext, description: str):
|
|
205
|
+
"""Update the description of a progress context.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
context: The progress context to update
|
|
209
|
+
description: New description to display
|
|
210
|
+
"""
|
|
211
|
+
with self._lock:
|
|
212
|
+
context.description = description
|
|
213
|
+
if context.progress_bar:
|
|
214
|
+
context.progress_bar.set_description(description)
|
|
215
|
+
|
|
216
|
+
def complete(self, context: ProgressContext):
|
|
217
|
+
"""Mark a progress context as complete and clean up resources.
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
context: The progress context to complete
|
|
221
|
+
|
|
222
|
+
IMPORTANT: Always call this method when done with a progress context
|
|
223
|
+
to ensure proper resource cleanup.
|
|
224
|
+
"""
|
|
225
|
+
with self._lock:
|
|
226
|
+
# Capture event if needed
|
|
227
|
+
if self._capture_events:
|
|
228
|
+
self._captured_events.append(
|
|
229
|
+
ProgressEvent("complete", context.description, current=context.current)
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
# Remove from active contexts BEFORE modifying progress_bar
|
|
233
|
+
# to avoid comparison issues with None
|
|
234
|
+
if context in self._active_contexts:
|
|
235
|
+
self._active_contexts.remove(context)
|
|
236
|
+
|
|
237
|
+
# Close actual progress bar if it exists
|
|
238
|
+
if context.progress_bar:
|
|
239
|
+
context.progress_bar.close()
|
|
240
|
+
context.progress_bar = None
|
|
241
|
+
|
|
242
|
+
# Reset position counter if no nested contexts remain
|
|
243
|
+
if context.is_nested and not any(c.is_nested for c in self._active_contexts):
|
|
244
|
+
self._position_counter = 0
|
|
245
|
+
|
|
246
|
+
@contextmanager
|
|
247
|
+
def progress(
|
|
248
|
+
self,
|
|
249
|
+
total: int,
|
|
250
|
+
description: str,
|
|
251
|
+
unit: str = "items",
|
|
252
|
+
nested: bool = False,
|
|
253
|
+
leave: bool = True,
|
|
254
|
+
):
|
|
255
|
+
"""Context manager for progress operations.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
total: Total number of items to process
|
|
259
|
+
description: Description shown next to the progress bar
|
|
260
|
+
unit: Unit label for items
|
|
261
|
+
nested: Whether this is a nested progress bar
|
|
262
|
+
leave: Whether to leave the progress bar on screen
|
|
263
|
+
|
|
264
|
+
Yields:
|
|
265
|
+
ProgressContext object for updates
|
|
266
|
+
|
|
267
|
+
Example:
|
|
268
|
+
with progress.progress(100, "Processing") as ctx:
|
|
269
|
+
for item in items:
|
|
270
|
+
process(item)
|
|
271
|
+
progress.update(ctx)
|
|
272
|
+
"""
|
|
273
|
+
context = self.create_progress(total, description, unit, nested, leave)
|
|
274
|
+
try:
|
|
275
|
+
yield context
|
|
276
|
+
finally:
|
|
277
|
+
self.complete(context)
|
|
278
|
+
|
|
279
|
+
def disable(self):
|
|
280
|
+
"""Disable all progress reporting globally.
|
|
281
|
+
|
|
282
|
+
Useful for testing or quiet mode operation.
|
|
283
|
+
"""
|
|
284
|
+
with self._lock:
|
|
285
|
+
self._enabled = False
|
|
286
|
+
# Close any active progress bars
|
|
287
|
+
for context in self._active_contexts[:]:
|
|
288
|
+
if context.progress_bar:
|
|
289
|
+
context.progress_bar.close()
|
|
290
|
+
context.progress_bar = None
|
|
291
|
+
|
|
292
|
+
def enable(self):
|
|
293
|
+
"""Enable progress reporting globally."""
|
|
294
|
+
with self._lock:
|
|
295
|
+
self._enabled = True
|
|
296
|
+
|
|
297
|
+
def is_enabled(self) -> bool:
|
|
298
|
+
"""Check if progress reporting is enabled."""
|
|
299
|
+
return self._enabled
|
|
300
|
+
|
|
301
|
+
def start_event_capture(self):
|
|
302
|
+
"""Start capturing progress events for testing.
|
|
303
|
+
|
|
304
|
+
WHY: Allows tests to verify that progress operations occurred
|
|
305
|
+
without actually displaying progress bars.
|
|
306
|
+
"""
|
|
307
|
+
with self._lock:
|
|
308
|
+
self._capture_events = True
|
|
309
|
+
self._captured_events = []
|
|
310
|
+
|
|
311
|
+
def stop_event_capture(self) -> list[ProgressEvent]:
|
|
312
|
+
"""Stop capturing events and return captured events.
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
List of ProgressEvent objects that were captured
|
|
316
|
+
"""
|
|
317
|
+
with self._lock:
|
|
318
|
+
self._capture_events = False
|
|
319
|
+
events = self._captured_events[:]
|
|
320
|
+
self._captured_events = []
|
|
321
|
+
return events
|
|
322
|
+
|
|
323
|
+
def get_captured_events(self) -> list[ProgressEvent]:
|
|
324
|
+
"""Get currently captured events without stopping capture.
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
List of ProgressEvent objects captured so far
|
|
328
|
+
"""
|
|
329
|
+
with self._lock:
|
|
330
|
+
return self._captured_events[:]
|
|
331
|
+
|
|
332
|
+
def clear_captured_events(self):
|
|
333
|
+
"""Clear captured events without stopping capture."""
|
|
334
|
+
with self._lock:
|
|
335
|
+
self._captured_events = []
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
# Global singleton instance
|
|
339
|
+
_progress_service: Optional[ProgressService] = None
|
|
340
|
+
_service_lock = threading.Lock()
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def get_progress_service() -> ProgressService:
|
|
344
|
+
"""Get the global progress service instance.
|
|
345
|
+
|
|
346
|
+
Returns:
|
|
347
|
+
The singleton ProgressService instance
|
|
348
|
+
|
|
349
|
+
Thread-safe singleton pattern ensures only one progress service exists.
|
|
350
|
+
"""
|
|
351
|
+
global _progress_service
|
|
352
|
+
|
|
353
|
+
if _progress_service is None:
|
|
354
|
+
with _service_lock:
|
|
355
|
+
if _progress_service is None:
|
|
356
|
+
_progress_service = ProgressService()
|
|
357
|
+
|
|
358
|
+
return _progress_service
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def reset_progress_service():
|
|
362
|
+
"""Reset the global progress service instance.
|
|
363
|
+
|
|
364
|
+
WARNING: Only use this in tests or during application shutdown.
|
|
365
|
+
This will close all active progress bars and create a new service instance.
|
|
366
|
+
"""
|
|
367
|
+
global _progress_service
|
|
368
|
+
|
|
369
|
+
with _service_lock:
|
|
370
|
+
if _progress_service:
|
|
371
|
+
_progress_service.disable()
|
|
372
|
+
_progress_service = None
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
"""Schema versioning for tracking data structure changes."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
from datetime import datetime, timezone
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Optional
|
|
8
|
+
|
|
9
|
+
from sqlalchemy import Column, DateTime, String, Text, create_engine
|
|
10
|
+
from sqlalchemy.orm import declarative_base, sessionmaker
|
|
11
|
+
|
|
12
|
+
Base = declarative_base()
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SchemaVersion(Base):
|
|
16
|
+
"""Track schema versions for incremental data processing."""
|
|
17
|
+
|
|
18
|
+
__tablename__ = "schema_versions"
|
|
19
|
+
|
|
20
|
+
component = Column(String, primary_key=True) # e.g., 'qualitative', 'identity', 'core'
|
|
21
|
+
version_hash = Column(String, nullable=False) # Hash of schema definition
|
|
22
|
+
schema_definition = Column(Text, nullable=False) # JSON schema definition
|
|
23
|
+
created_at = Column(DateTime, default=datetime.utcnow)
|
|
24
|
+
last_processed_date = Column(
|
|
25
|
+
DateTime, nullable=True
|
|
26
|
+
) # Last date we processed data with this schema
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class SchemaVersionManager:
|
|
30
|
+
"""Manages schema versions and determines if incremental processing is possible."""
|
|
31
|
+
|
|
32
|
+
# Define current schema versions for each component
|
|
33
|
+
CURRENT_SCHEMAS = {
|
|
34
|
+
"qualitative": {
|
|
35
|
+
"version": "2.0",
|
|
36
|
+
"fields": [
|
|
37
|
+
"change_type",
|
|
38
|
+
"change_type_confidence",
|
|
39
|
+
"business_domain",
|
|
40
|
+
"domain_confidence",
|
|
41
|
+
"risk_level",
|
|
42
|
+
"risk_factors",
|
|
43
|
+
"intent_signals",
|
|
44
|
+
"collaboration_patterns",
|
|
45
|
+
"technical_context",
|
|
46
|
+
"processing_method",
|
|
47
|
+
"processing_time_ms",
|
|
48
|
+
"confidence_score",
|
|
49
|
+
],
|
|
50
|
+
"config_fields": [
|
|
51
|
+
"nlp_config",
|
|
52
|
+
"llm_config",
|
|
53
|
+
"cache_config",
|
|
54
|
+
"confidence_threshold",
|
|
55
|
+
"max_llm_fallback_pct",
|
|
56
|
+
],
|
|
57
|
+
},
|
|
58
|
+
"identity": {
|
|
59
|
+
"version": "1.3",
|
|
60
|
+
"fields": [
|
|
61
|
+
"canonical_id",
|
|
62
|
+
"primary_name",
|
|
63
|
+
"primary_email",
|
|
64
|
+
"manual_mappings",
|
|
65
|
+
"similarity_threshold",
|
|
66
|
+
"auto_analysis",
|
|
67
|
+
"display_names",
|
|
68
|
+
"preferred_name_field",
|
|
69
|
+
],
|
|
70
|
+
},
|
|
71
|
+
"core": {
|
|
72
|
+
"version": "1.0",
|
|
73
|
+
"fields": [
|
|
74
|
+
"story_points",
|
|
75
|
+
"ticket_references",
|
|
76
|
+
"files_changed",
|
|
77
|
+
"insertions",
|
|
78
|
+
"deletions",
|
|
79
|
+
"complexity_delta",
|
|
80
|
+
"branch_mapping_rules",
|
|
81
|
+
],
|
|
82
|
+
},
|
|
83
|
+
"github": {
|
|
84
|
+
"version": "1.0",
|
|
85
|
+
"fields": [
|
|
86
|
+
"pr_data",
|
|
87
|
+
"pr_metrics",
|
|
88
|
+
"issue_data",
|
|
89
|
+
"rate_limit_retries",
|
|
90
|
+
"backoff_factor",
|
|
91
|
+
"allowed_ticket_platforms",
|
|
92
|
+
],
|
|
93
|
+
},
|
|
94
|
+
"jira": {
|
|
95
|
+
"version": "1.0",
|
|
96
|
+
"fields": ["story_point_fields", "project_keys", "base_url", "issue_data"],
|
|
97
|
+
},
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
def __init__(self, cache_dir: Path):
|
|
101
|
+
"""Initialize schema version manager."""
|
|
102
|
+
self.cache_dir = cache_dir
|
|
103
|
+
self.db_path = cache_dir / "schema_versions.db"
|
|
104
|
+
self.engine = create_engine(f"sqlite:///{self.db_path}")
|
|
105
|
+
Base.metadata.create_all(self.engine)
|
|
106
|
+
self.session_factory = sessionmaker(bind=self.engine)
|
|
107
|
+
|
|
108
|
+
def get_schema_hash(self, component: str, config: Optional[dict[str, Any]] = None) -> str:
|
|
109
|
+
"""Generate hash for a component's schema including configuration."""
|
|
110
|
+
if component not in self.CURRENT_SCHEMAS:
|
|
111
|
+
raise ValueError(f"Unknown component: {component}")
|
|
112
|
+
|
|
113
|
+
schema_def = self.CURRENT_SCHEMAS[component].copy()
|
|
114
|
+
|
|
115
|
+
# Include relevant configuration in the hash
|
|
116
|
+
if config and "config_fields" in schema_def:
|
|
117
|
+
relevant_config = {}
|
|
118
|
+
for field in schema_def["config_fields"]:
|
|
119
|
+
if field in config:
|
|
120
|
+
relevant_config[field] = self._normalize_config_value(config[field])
|
|
121
|
+
schema_def["config"] = relevant_config
|
|
122
|
+
|
|
123
|
+
# Create deterministic hash
|
|
124
|
+
schema_json = json.dumps(schema_def, sort_keys=True)
|
|
125
|
+
return hashlib.sha256(schema_json.encode()).hexdigest()[:16]
|
|
126
|
+
|
|
127
|
+
def _normalize_config_value(self, value: Any) -> Any:
|
|
128
|
+
"""Normalize config values for consistent hashing."""
|
|
129
|
+
if isinstance(value, dict):
|
|
130
|
+
return {k: self._normalize_config_value(v) for k, v in sorted(value.items())}
|
|
131
|
+
elif isinstance(value, list):
|
|
132
|
+
return sorted([self._normalize_config_value(v) for v in value])
|
|
133
|
+
elif isinstance(value, (int, float, str, bool, type(None))):
|
|
134
|
+
return value
|
|
135
|
+
else:
|
|
136
|
+
# Convert complex objects to string representation
|
|
137
|
+
return str(value)
|
|
138
|
+
|
|
139
|
+
def has_schema_changed(self, component: str, config: Optional[dict[str, Any]] = None) -> bool:
|
|
140
|
+
"""Check if schema has changed since last processing."""
|
|
141
|
+
current_hash = self.get_schema_hash(component, config)
|
|
142
|
+
|
|
143
|
+
with self.session_factory() as session:
|
|
144
|
+
stored_version = session.query(SchemaVersion).filter_by(component=component).first()
|
|
145
|
+
|
|
146
|
+
if not stored_version:
|
|
147
|
+
return True # No previous schema, consider changed
|
|
148
|
+
|
|
149
|
+
return stored_version.version_hash != current_hash
|
|
150
|
+
|
|
151
|
+
def update_schema_version(
|
|
152
|
+
self,
|
|
153
|
+
component: str,
|
|
154
|
+
config: Optional[dict[str, Any]] = None,
|
|
155
|
+
last_processed_date: Optional[datetime] = None,
|
|
156
|
+
):
|
|
157
|
+
"""Update stored schema version."""
|
|
158
|
+
current_hash = self.get_schema_hash(component, config)
|
|
159
|
+
schema_def = json.dumps(self.CURRENT_SCHEMAS[component], sort_keys=True)
|
|
160
|
+
|
|
161
|
+
# Ensure date is timezone-aware before storing
|
|
162
|
+
if last_processed_date and last_processed_date.tzinfo is None:
|
|
163
|
+
last_processed_date = last_processed_date.replace(tzinfo=timezone.utc)
|
|
164
|
+
|
|
165
|
+
with self.session_factory() as session:
|
|
166
|
+
stored_version = session.query(SchemaVersion).filter_by(component=component).first()
|
|
167
|
+
|
|
168
|
+
if stored_version:
|
|
169
|
+
stored_version.version_hash = current_hash
|
|
170
|
+
stored_version.schema_definition = schema_def
|
|
171
|
+
if last_processed_date:
|
|
172
|
+
stored_version.last_processed_date = last_processed_date
|
|
173
|
+
else:
|
|
174
|
+
stored_version = SchemaVersion(
|
|
175
|
+
component=component,
|
|
176
|
+
version_hash=current_hash,
|
|
177
|
+
schema_definition=schema_def,
|
|
178
|
+
last_processed_date=last_processed_date,
|
|
179
|
+
)
|
|
180
|
+
session.add(stored_version)
|
|
181
|
+
|
|
182
|
+
session.commit()
|
|
183
|
+
|
|
184
|
+
def get_last_processed_date(self, component: str) -> Optional[datetime]:
|
|
185
|
+
"""Get the last date data was processed for this component."""
|
|
186
|
+
with self.session_factory() as session:
|
|
187
|
+
stored_version = session.query(SchemaVersion).filter_by(component=component).first()
|
|
188
|
+
return stored_version.last_processed_date if stored_version else None
|
|
189
|
+
|
|
190
|
+
def should_process_date(
|
|
191
|
+
self, component: str, date: datetime, config: Optional[dict[str, Any]] = None
|
|
192
|
+
) -> bool:
|
|
193
|
+
"""Determine if we should process data for a given date."""
|
|
194
|
+
# Always process if schema has changed
|
|
195
|
+
if self.has_schema_changed(component, config):
|
|
196
|
+
return True
|
|
197
|
+
|
|
198
|
+
# Check if we've already processed this date
|
|
199
|
+
last_processed = self.get_last_processed_date(component)
|
|
200
|
+
if not last_processed:
|
|
201
|
+
return True
|
|
202
|
+
|
|
203
|
+
# Ensure both dates are timezone-aware for comparison
|
|
204
|
+
if date.tzinfo is None:
|
|
205
|
+
date = date.replace(tzinfo=timezone.utc)
|
|
206
|
+
if last_processed.tzinfo is None:
|
|
207
|
+
last_processed = last_processed.replace(tzinfo=timezone.utc)
|
|
208
|
+
|
|
209
|
+
# Process if date is after last processed date
|
|
210
|
+
return date > last_processed
|
|
211
|
+
|
|
212
|
+
def mark_date_processed(
|
|
213
|
+
self, component: str, date: datetime, config: Optional[dict[str, Any]] = None
|
|
214
|
+
):
|
|
215
|
+
"""Mark a date as processed for incremental tracking."""
|
|
216
|
+
# Ensure date is timezone-aware before storing
|
|
217
|
+
if date.tzinfo is None:
|
|
218
|
+
date = date.replace(tzinfo=timezone.utc)
|
|
219
|
+
|
|
220
|
+
with self.session_factory() as session:
|
|
221
|
+
stored_version = session.query(SchemaVersion).filter_by(component=component).first()
|
|
222
|
+
|
|
223
|
+
if stored_version:
|
|
224
|
+
# Update to the latest processed date
|
|
225
|
+
if not stored_version.last_processed_date:
|
|
226
|
+
stored_version.last_processed_date = date
|
|
227
|
+
session.commit()
|
|
228
|
+
else:
|
|
229
|
+
# Ensure stored date is timezone-aware for comparison
|
|
230
|
+
stored_date = stored_version.last_processed_date
|
|
231
|
+
if stored_date.tzinfo is None:
|
|
232
|
+
stored_date = stored_date.replace(tzinfo=timezone.utc)
|
|
233
|
+
|
|
234
|
+
if date > stored_date:
|
|
235
|
+
stored_version.last_processed_date = date
|
|
236
|
+
session.commit()
|
|
237
|
+
else:
|
|
238
|
+
# Create new entry
|
|
239
|
+
self.update_schema_version(component, config, date)
|
|
240
|
+
|
|
241
|
+
def get_schema_info(self, component: str) -> dict[str, Any]:
|
|
242
|
+
"""Get detailed schema information for debugging."""
|
|
243
|
+
with self.session_factory() as session:
|
|
244
|
+
stored_version = session.query(SchemaVersion).filter_by(component=component).first()
|
|
245
|
+
|
|
246
|
+
current_hash = self.get_schema_hash(component)
|
|
247
|
+
|
|
248
|
+
return {
|
|
249
|
+
"component": component,
|
|
250
|
+
"current_schema_hash": current_hash,
|
|
251
|
+
"stored_schema_hash": stored_version.version_hash if stored_version else None,
|
|
252
|
+
"schema_changed": self.has_schema_changed(component),
|
|
253
|
+
"last_processed": stored_version.last_processed_date if stored_version else None,
|
|
254
|
+
"created_at": stored_version.created_at if stored_version else None,
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
def reset_component(self, component: str):
|
|
258
|
+
"""Reset schema version for a component (forces full reprocessing)."""
|
|
259
|
+
with self.session_factory() as session:
|
|
260
|
+
stored_version = session.query(SchemaVersion).filter_by(component=component).first()
|
|
261
|
+
if stored_version:
|
|
262
|
+
session.delete(stored_version)
|
|
263
|
+
session.commit()
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def create_schema_manager(cache_dir: Path) -> SchemaVersionManager:
|
|
267
|
+
"""Factory function to create a schema version manager."""
|
|
268
|
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
|
269
|
+
return SchemaVersionManager(cache_dir)
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
"""Base classes for pluggable extractors."""
|
|
2
|
+
|
|
2
3
|
from abc import ABC, abstractmethod
|
|
3
|
-
from typing import Any,
|
|
4
|
+
from typing import Any, Optional
|
|
4
5
|
|
|
5
6
|
|
|
6
7
|
class ExtractorBase(ABC):
|
|
7
8
|
"""Base class for all extractors."""
|
|
8
|
-
|
|
9
|
+
|
|
9
10
|
@abstractmethod
|
|
10
11
|
def extract_from_text(self, text: str) -> Any:
|
|
11
12
|
"""Extract information from text."""
|
|
@@ -14,28 +15,29 @@ class ExtractorBase(ABC):
|
|
|
14
15
|
|
|
15
16
|
class StoryPointExtractorBase(ExtractorBase):
|
|
16
17
|
"""Base class for story point extractors."""
|
|
17
|
-
|
|
18
|
+
|
|
18
19
|
@abstractmethod
|
|
19
20
|
def extract_from_text(self, text: str) -> Optional[int]:
|
|
20
21
|
"""Extract story points from text."""
|
|
21
22
|
pass
|
|
22
|
-
|
|
23
|
+
|
|
23
24
|
@abstractmethod
|
|
24
|
-
def extract_from_pr(
|
|
25
|
-
|
|
25
|
+
def extract_from_pr(
|
|
26
|
+
self, pr_data: dict[str, Any], commit_messages: Optional[list[str]] = None
|
|
27
|
+
) -> Optional[int]:
|
|
26
28
|
"""Extract story points from pull request."""
|
|
27
29
|
pass
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
class TicketExtractorBase(ExtractorBase):
|
|
31
33
|
"""Base class for ticket extractors."""
|
|
32
|
-
|
|
34
|
+
|
|
33
35
|
@abstractmethod
|
|
34
|
-
def extract_from_text(self, text: str) ->
|
|
36
|
+
def extract_from_text(self, text: str) -> list[dict[str, str]]:
|
|
35
37
|
"""Extract ticket references from text."""
|
|
36
38
|
pass
|
|
37
|
-
|
|
39
|
+
|
|
38
40
|
@abstractmethod
|
|
39
|
-
def extract_by_platform(self, text: str) ->
|
|
41
|
+
def extract_by_platform(self, text: str) -> dict[str, list[str]]:
|
|
40
42
|
"""Extract tickets grouped by platform."""
|
|
41
|
-
pass
|
|
43
|
+
pass
|