gitflow-analytics 1.0.0__py3-none-any.whl → 1.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. gitflow_analytics/__init__.py +11 -9
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/cli.py +691 -243
  4. gitflow_analytics/cli_rich.py +353 -0
  5. gitflow_analytics/config.py +389 -96
  6. gitflow_analytics/core/analyzer.py +175 -78
  7. gitflow_analytics/core/branch_mapper.py +132 -132
  8. gitflow_analytics/core/cache.py +242 -173
  9. gitflow_analytics/core/identity.py +214 -178
  10. gitflow_analytics/extractors/base.py +13 -11
  11. gitflow_analytics/extractors/story_points.py +70 -59
  12. gitflow_analytics/extractors/tickets.py +111 -88
  13. gitflow_analytics/integrations/github_integration.py +91 -77
  14. gitflow_analytics/integrations/jira_integration.py +284 -0
  15. gitflow_analytics/integrations/orchestrator.py +99 -72
  16. gitflow_analytics/metrics/dora.py +183 -179
  17. gitflow_analytics/models/database.py +191 -54
  18. gitflow_analytics/qualitative/__init__.py +30 -0
  19. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  20. gitflow_analytics/qualitative/classifiers/change_type.py +468 -0
  21. gitflow_analytics/qualitative/classifiers/domain_classifier.py +399 -0
  22. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +436 -0
  23. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +412 -0
  24. gitflow_analytics/qualitative/core/__init__.py +13 -0
  25. gitflow_analytics/qualitative/core/llm_fallback.py +653 -0
  26. gitflow_analytics/qualitative/core/nlp_engine.py +373 -0
  27. gitflow_analytics/qualitative/core/pattern_cache.py +457 -0
  28. gitflow_analytics/qualitative/core/processor.py +540 -0
  29. gitflow_analytics/qualitative/models/__init__.py +25 -0
  30. gitflow_analytics/qualitative/models/schemas.py +272 -0
  31. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  32. gitflow_analytics/qualitative/utils/batch_processor.py +326 -0
  33. gitflow_analytics/qualitative/utils/cost_tracker.py +343 -0
  34. gitflow_analytics/qualitative/utils/metrics.py +347 -0
  35. gitflow_analytics/qualitative/utils/text_processing.py +243 -0
  36. gitflow_analytics/reports/analytics_writer.py +25 -8
  37. gitflow_analytics/reports/csv_writer.py +60 -32
  38. gitflow_analytics/reports/narrative_writer.py +21 -15
  39. gitflow_analytics/tui/__init__.py +5 -0
  40. gitflow_analytics/tui/app.py +721 -0
  41. gitflow_analytics/tui/screens/__init__.py +8 -0
  42. gitflow_analytics/tui/screens/analysis_progress_screen.py +487 -0
  43. gitflow_analytics/tui/screens/configuration_screen.py +547 -0
  44. gitflow_analytics/tui/screens/loading_screen.py +358 -0
  45. gitflow_analytics/tui/screens/main_screen.py +304 -0
  46. gitflow_analytics/tui/screens/results_screen.py +698 -0
  47. gitflow_analytics/tui/widgets/__init__.py +7 -0
  48. gitflow_analytics/tui/widgets/data_table.py +257 -0
  49. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  50. gitflow_analytics/tui/widgets/progress_widget.py +192 -0
  51. gitflow_analytics-1.0.3.dist-info/METADATA +490 -0
  52. gitflow_analytics-1.0.3.dist-info/RECORD +62 -0
  53. gitflow_analytics-1.0.0.dist-info/METADATA +0 -201
  54. gitflow_analytics-1.0.0.dist-info/RECORD +0 -30
  55. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/WHEEL +0 -0
  56. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/entry_points.txt +0 -0
  57. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/licenses/LICENSE +0 -0
  58. {gitflow_analytics-1.0.0.dist-info → gitflow_analytics-1.0.3.dist-info}/top_level.txt +0 -0
@@ -1,24 +1,27 @@
1
1
  """Database models for GitFlow Analytics using SQLAlchemy."""
2
+
2
3
  from datetime import datetime
3
- from typing import Optional
4
- from sqlalchemy import create_engine, Column, String, Integer, Float, DateTime, Boolean, JSON, Index
5
- from sqlalchemy.ext.declarative import declarative_base
6
- from sqlalchemy.orm import sessionmaker, Session
7
4
  from pathlib import Path
8
5
 
9
- Base = declarative_base()
6
+ from sqlalchemy import JSON, Boolean, Column, DateTime, Float, ForeignKey, Index, Integer, String, create_engine
7
+ from sqlalchemy.orm import Session, sessionmaker, declarative_base
8
+ from typing import Any
9
+
10
+ Base: Any = declarative_base()
11
+
10
12
 
11
13
  class CachedCommit(Base):
12
14
  """Cached commit analysis results."""
13
- __tablename__ = 'cached_commits'
14
-
15
+
16
+ __tablename__ = "cached_commits"
17
+
15
18
  # Primary key
16
19
  id = Column(Integer, primary_key=True)
17
-
20
+
18
21
  # Commit identification
19
22
  repo_path = Column(String, nullable=False)
20
23
  commit_hash = Column(String, nullable=False)
21
-
24
+
22
25
  # Commit data
23
26
  author_name = Column(String)
24
27
  author_email = Column(String)
@@ -26,108 +29,114 @@ class CachedCommit(Base):
26
29
  timestamp = Column(DateTime)
27
30
  branch = Column(String)
28
31
  is_merge = Column(Boolean, default=False)
29
-
32
+
30
33
  # Metrics
31
34
  files_changed = Column(Integer)
32
35
  insertions = Column(Integer)
33
36
  deletions = Column(Integer)
34
37
  complexity_delta = Column(Float)
35
-
38
+
36
39
  # Extracted data
37
40
  story_points = Column(Integer, nullable=True)
38
41
  ticket_references = Column(JSON) # List of ticket IDs
39
-
42
+
40
43
  # Cache metadata
41
44
  cached_at = Column(DateTime, default=datetime.utcnow)
42
45
  cache_version = Column(String, default="1.0")
43
-
46
+
44
47
  # Indexes for performance
45
48
  __table_args__ = (
46
- Index('idx_repo_commit', 'repo_path', 'commit_hash', unique=True),
47
- Index('idx_timestamp', 'timestamp'),
48
- Index('idx_cached_at', 'cached_at'),
49
+ Index("idx_repo_commit", "repo_path", "commit_hash", unique=True),
50
+ Index("idx_timestamp", "timestamp"),
51
+ Index("idx_cached_at", "cached_at"),
49
52
  )
50
53
 
54
+
51
55
  class DeveloperIdentity(Base):
52
56
  """Developer identity mappings."""
53
- __tablename__ = 'developer_identities'
54
-
57
+
58
+ __tablename__ = "developer_identities"
59
+
55
60
  id = Column(Integer, primary_key=True)
56
61
  canonical_id = Column(String, unique=True, nullable=False)
57
62
  primary_name = Column(String, nullable=False)
58
63
  primary_email = Column(String, nullable=False)
59
64
  github_username = Column(String, nullable=True)
60
-
65
+
61
66
  # Statistics
62
67
  total_commits = Column(Integer, default=0)
63
68
  total_story_points = Column(Integer, default=0)
64
69
  first_seen = Column(DateTime, default=datetime.utcnow)
65
70
  last_seen = Column(DateTime, default=datetime.utcnow)
66
-
71
+
67
72
  # Metadata
68
73
  created_at = Column(DateTime, default=datetime.utcnow)
69
74
  updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
70
-
75
+
71
76
  __table_args__ = (
72
- Index('idx_primary_email', 'primary_email'),
73
- Index('idx_canonical_id', 'canonical_id'),
77
+ Index("idx_primary_email", "primary_email"),
78
+ Index("idx_canonical_id", "canonical_id"),
74
79
  )
75
80
 
81
+
76
82
  class DeveloperAlias(Base):
77
83
  """Alternative names/emails for developers."""
78
- __tablename__ = 'developer_aliases'
79
-
84
+
85
+ __tablename__ = "developer_aliases"
86
+
80
87
  id = Column(Integer, primary_key=True)
81
88
  canonical_id = Column(String, nullable=False) # Foreign key to DeveloperIdentity
82
89
  name = Column(String, nullable=False)
83
90
  email = Column(String, nullable=False)
84
-
91
+
85
92
  __table_args__ = (
86
- Index('idx_alias_email', 'email'),
87
- Index('idx_alias_canonical_id', 'canonical_id'),
88
- Index('idx_name_email', 'name', 'email', unique=True),
93
+ Index("idx_alias_email", "email"),
94
+ Index("idx_alias_canonical_id", "canonical_id"),
95
+ Index("idx_name_email", "name", "email", unique=True),
89
96
  )
90
97
 
98
+
91
99
  class PullRequestCache(Base):
92
100
  """Cached pull request data."""
93
- __tablename__ = 'pull_request_cache'
94
-
101
+
102
+ __tablename__ = "pull_request_cache"
103
+
95
104
  id = Column(Integer, primary_key=True)
96
105
  repo_path = Column(String, nullable=False)
97
106
  pr_number = Column(Integer, nullable=False)
98
-
107
+
99
108
  # PR data
100
109
  title = Column(String)
101
110
  description = Column(String)
102
111
  author = Column(String)
103
112
  created_at = Column(DateTime)
104
113
  merged_at = Column(DateTime, nullable=True)
105
-
114
+
106
115
  # Extracted data
107
116
  story_points = Column(Integer, nullable=True)
108
117
  labels = Column(JSON) # List of labels
109
-
118
+
110
119
  # Associated commits
111
120
  commit_hashes = Column(JSON) # List of commit hashes
112
-
121
+
113
122
  # Cache metadata
114
123
  cached_at = Column(DateTime, default=datetime.utcnow)
115
-
116
- __table_args__ = (
117
- Index('idx_repo_pr', 'repo_path', 'pr_number', unique=True),
118
- )
124
+
125
+ __table_args__ = (Index("idx_repo_pr", "repo_path", "pr_number", unique=True),)
126
+
119
127
 
120
128
  class IssueCache(Base):
121
129
  """Cached issue data from various platforms."""
122
- __tablename__ = 'issue_cache'
123
-
130
+
131
+ __tablename__ = "issue_cache"
132
+
124
133
  id = Column(Integer, primary_key=True)
125
-
134
+
126
135
  # Issue identification
127
136
  platform = Column(String, nullable=False) # 'jira', 'github', 'clickup', 'linear'
128
137
  issue_id = Column(String, nullable=False)
129
138
  project_key = Column(String, nullable=False)
130
-
139
+
131
140
  # Issue data
132
141
  title = Column(String)
133
142
  description = Column(String)
@@ -136,36 +145,164 @@ class IssueCache(Base):
136
145
  created_at = Column(DateTime)
137
146
  updated_at = Column(DateTime)
138
147
  resolved_at = Column(DateTime, nullable=True)
139
-
148
+
140
149
  # Extracted data
141
150
  story_points = Column(Integer, nullable=True)
142
151
  labels = Column(JSON)
143
-
152
+
144
153
  # Platform-specific data
145
154
  platform_data = Column(JSON) # Additional platform-specific fields
146
-
155
+
147
156
  # Cache metadata
148
157
  cached_at = Column(DateTime, default=datetime.utcnow)
158
+
159
+ __table_args__ = (
160
+ Index("idx_platform_issue", "platform", "issue_id", unique=True),
161
+ Index("idx_project_key", "project_key"),
162
+ )
163
+
164
+
165
+ class QualitativeCommitData(Base):
166
+ """Extended commit data with qualitative analysis results.
149
167
 
168
+ This table stores the results of qualitative analysis performed on commits,
169
+ including change type classification, domain analysis, risk assessment,
170
+ and processing metadata.
171
+ """
172
+ __tablename__ = 'qualitative_commits'
173
+
174
+ # Link to existing commit
175
+ commit_id = Column(Integer, ForeignKey('cached_commits.id'), primary_key=True)
176
+
177
+ # Classification results
178
+ change_type = Column(String, nullable=False)
179
+ change_type_confidence = Column(Float, nullable=False)
180
+ business_domain = Column(String, nullable=False)
181
+ domain_confidence = Column(Float, nullable=False)
182
+ risk_level = Column(String, nullable=False)
183
+ risk_factors = Column(JSON) # List of risk factors
184
+
185
+ # Intent and context analysis
186
+ intent_signals = Column(JSON) # Intent analysis results
187
+ collaboration_patterns = Column(JSON) # Team interaction patterns
188
+ technical_context = Column(JSON) # Technical context information
189
+
190
+ # Processing metadata
191
+ processing_method = Column(String, nullable=False) # 'nlp' or 'llm'
192
+ processing_time_ms = Column(Float)
193
+ confidence_score = Column(Float, nullable=False)
194
+
195
+ # Timestamps
196
+ analyzed_at = Column(DateTime, default=datetime.utcnow)
197
+ analysis_version = Column(String, default="1.0")
198
+
199
+ # Indexes for efficient querying
150
200
  __table_args__ = (
151
- Index('idx_platform_issue', 'platform', 'issue_id', unique=True),
152
- Index('idx_project_key', 'project_key'),
201
+ Index('idx_change_type', 'change_type'),
202
+ Index('idx_business_domain', 'business_domain'),
203
+ Index('idx_risk_level', 'risk_level'),
204
+ Index('idx_qualitative_confidence', 'confidence_score'),
205
+ Index('idx_processing_method', 'processing_method'),
206
+ Index('idx_analyzed_at', 'analyzed_at'),
153
207
  )
154
208
 
209
+
210
+ class PatternCache(Base):
211
+ """Cache for learned patterns and classifications.
212
+
213
+ This table stores frequently occurring patterns to avoid reprocessing
214
+ similar commits and to improve classification accuracy over time.
215
+ """
216
+ __tablename__ = 'pattern_cache'
217
+
218
+ id = Column(Integer, primary_key=True)
219
+
220
+ # Pattern identification
221
+ message_hash = Column(String, nullable=False, unique=True)
222
+ semantic_fingerprint = Column(String, nullable=False)
223
+
224
+ # Cached classification results
225
+ classification_result = Column(JSON, nullable=False)
226
+ confidence_score = Column(Float, nullable=False)
227
+
228
+ # Usage tracking for cache management
229
+ hit_count = Column(Integer, default=1)
230
+ last_used = Column(DateTime, default=datetime.utcnow)
231
+ created_at = Column(DateTime, default=datetime.utcnow)
232
+
233
+ # Source tracking
234
+ source_method = Column(String, nullable=False) # 'nlp' or 'llm'
235
+ source_model = Column(String) # Model/method that created this pattern
236
+
237
+ # Performance tracking
238
+ avg_processing_time_ms = Column(Float)
239
+
240
+ # Indexes for pattern matching and cleanup
241
+ __table_args__ = (
242
+ Index('idx_semantic_fingerprint', 'semantic_fingerprint'),
243
+ Index('idx_pattern_confidence', 'confidence_score'),
244
+ Index('idx_hit_count', 'hit_count'),
245
+ Index('idx_last_used', 'last_used'),
246
+ Index('idx_source_method', 'source_method'),
247
+ )
248
+
249
+
250
+ class LLMUsageStats(Base):
251
+ """Track LLM usage statistics for cost monitoring and optimization.
252
+
253
+ This table helps monitor LLM API usage, costs, and performance to
254
+ optimize the balance between speed, accuracy, and cost.
255
+ """
256
+ __tablename__ = 'llm_usage_stats'
257
+
258
+ id = Column(Integer, primary_key=True)
259
+
260
+ # API call metadata
261
+ model_name = Column(String, nullable=False)
262
+ api_provider = Column(String, default='openrouter')
263
+ timestamp = Column(DateTime, default=datetime.utcnow)
264
+
265
+ # Usage metrics
266
+ input_tokens = Column(Integer, nullable=False)
267
+ output_tokens = Column(Integer, nullable=False)
268
+ processing_time_ms = Column(Float, nullable=False)
269
+
270
+ # Cost tracking
271
+ estimated_cost_usd = Column(Float)
272
+ cost_per_token = Column(Float)
273
+
274
+ # Batch information
275
+ batch_size = Column(Integer, default=1) # Number of commits processed
276
+ batch_id = Column(String) # Group related calls
277
+
278
+ # Quality metrics
279
+ avg_confidence_score = Column(Float)
280
+ success = Column(Boolean, default=True)
281
+ error_message = Column(String)
282
+
283
+ # Indexes for analysis and monitoring
284
+ __table_args__ = (
285
+ Index('idx_model_timestamp', 'model_name', 'timestamp'),
286
+ Index('idx_llm_timestamp', 'timestamp'),
287
+ Index('idx_batch_id', 'batch_id'),
288
+ Index('idx_success', 'success'),
289
+ )
290
+
291
+
155
292
  class Database:
156
293
  """Database connection manager."""
157
-
294
+
158
295
  def __init__(self, db_path: Path):
159
296
  """Initialize database connection."""
160
297
  db_path.parent.mkdir(parents=True, exist_ok=True)
161
- self.engine = create_engine(f'sqlite:///{db_path}')
298
+ self.engine = create_engine(f"sqlite:///{db_path}")
162
299
  Base.metadata.create_all(self.engine)
163
300
  self.SessionLocal = sessionmaker(bind=self.engine)
164
-
301
+
165
302
  def get_session(self) -> Session:
166
303
  """Get a new database session."""
167
304
  return self.SessionLocal()
168
-
169
- def init_db(self):
305
+
306
+ def init_db(self) -> None:
170
307
  """Initialize database tables."""
171
- Base.metadata.create_all(self.engine)
308
+ Base.metadata.create_all(self.engine)
@@ -0,0 +1,30 @@
1
+ """Qualitative data extraction module for GitFlow Analytics.
2
+
3
+ This module provides NLP-based analysis of Git commits to extract semantic meaning,
4
+ change types, domain classification, and risk assessment from commit messages and
5
+ file changes.
6
+
7
+ Key Components:
8
+ - QualitativeProcessor: Main orchestrator for qualitative analysis
9
+ - NLPEngine: spaCy-based fast processing for most commits
10
+ - LLMFallback: Strategic use of LLMs for uncertain cases
11
+ - Various classifiers for change type, domain, risk, and intent analysis
12
+ """
13
+
14
+ from .core.processor import QualitativeProcessor
15
+ from .models.schemas import (
16
+ QualitativeCommitData,
17
+ QualitativeConfig,
18
+ NLPConfig,
19
+ LLMConfig,
20
+ CacheConfig as QualitativeCacheConfig,
21
+ )
22
+
23
+ __all__ = [
24
+ "QualitativeProcessor",
25
+ "QualitativeCommitData",
26
+ "QualitativeConfig",
27
+ "NLPConfig",
28
+ "LLMConfig",
29
+ "QualitativeCacheConfig",
30
+ ]
@@ -0,0 +1,13 @@
1
+ """Classification components for qualitative analysis."""
2
+
3
+ from .change_type import ChangeTypeClassifier
4
+ from .domain_classifier import DomainClassifier
5
+ from .intent_analyzer import IntentAnalyzer
6
+ from .risk_analyzer import RiskAnalyzer
7
+
8
+ __all__ = [
9
+ "ChangeTypeClassifier",
10
+ "DomainClassifier",
11
+ "IntentAnalyzer",
12
+ "RiskAnalyzer",
13
+ ]