gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/_version.py +1 -1
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4108 -350
- gitflow_analytics/cli_rich.py +198 -48
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -508
- gitflow_analytics/core/analyzer.py +1209 -98
- gitflow_analytics/core/cache.py +1337 -29
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +363 -14
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +8 -1
- gitflow_analytics/extractors/tickets.py +749 -11
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +175 -11
- gitflow_analytics/integrations/jira_integration.py +461 -24
- gitflow_analytics/integrations/orchestrator.py +124 -1
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +379 -20
- gitflow_analytics/models/database.py +843 -53
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +9 -10
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
- gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
- gitflow_analytics/qualitative/core/__init__.py +4 -4
- gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
- gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
- gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
- gitflow_analytics/qualitative/core/processor.py +381 -248
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +7 -7
- gitflow_analytics/qualitative/models/schemas.py +155 -121
- gitflow_analytics/qualitative/utils/__init__.py +4 -4
- gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
- gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
- gitflow_analytics/qualitative/utils/metrics.py +172 -158
- gitflow_analytics/qualitative/utils/text_processing.py +146 -104
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +539 -14
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1676 -212
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2287 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +1 -1
- gitflow_analytics/tui/app.py +129 -126
- gitflow_analytics/tui/screens/__init__.py +3 -3
- gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
- gitflow_analytics/tui/screens/configuration_screen.py +154 -178
- gitflow_analytics/tui/screens/loading_screen.py +100 -110
- gitflow_analytics/tui/screens/main_screen.py +89 -72
- gitflow_analytics/tui/screens/results_screen.py +305 -281
- gitflow_analytics/tui/widgets/__init__.py +2 -2
- gitflow_analytics/tui/widgets/data_table.py +67 -69
- gitflow_analytics/tui/widgets/export_modal.py +76 -76
- gitflow_analytics/tui/widgets/progress_widget.py +41 -46
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
- gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1015 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: gitflow-analytics
|
|
3
|
+
Version: 1.3.6
|
|
4
|
+
Summary: Analyze Git repositories for developer productivity insights
|
|
5
|
+
Author-email: Bob Matyas <bobmatnyc@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/bobmatnyc/gitflow-analytics
|
|
8
|
+
Project-URL: Documentation, https://github.com/bobmatnyc/gitflow-analytics/blob/main/README.md
|
|
9
|
+
Project-URL: Repository, https://github.com/bobmatnyc/gitflow-analytics
|
|
10
|
+
Project-URL: Issues, https://github.com/bobmatnyc/gitflow-analytics/issues
|
|
11
|
+
Keywords: git,analytics,productivity,metrics,development
|
|
12
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Software Development :: Version Control :: Git
|
|
20
|
+
Classifier: Topic :: Software Development :: Quality Assurance
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: click>=8.1
|
|
25
|
+
Requires-Dist: gitpython>=3.1
|
|
26
|
+
Requires-Dist: pygithub>=2.0
|
|
27
|
+
Requires-Dist: tqdm>=4.65
|
|
28
|
+
Requires-Dist: sqlalchemy>=2.0
|
|
29
|
+
Requires-Dist: pandas>=2.0
|
|
30
|
+
Requires-Dist: pyyaml>=6.0
|
|
31
|
+
Requires-Dist: python-dateutil>=2.8
|
|
32
|
+
Requires-Dist: python-dotenv>=1.0
|
|
33
|
+
Requires-Dist: rich>=13.0.0
|
|
34
|
+
Requires-Dist: spacy>=3.7.0
|
|
35
|
+
Requires-Dist: scikit-learn>=1.3.0
|
|
36
|
+
Requires-Dist: openai>=1.30.0
|
|
37
|
+
Requires-Dist: tiktoken>=0.7.0
|
|
38
|
+
Requires-Dist: numpy>=1.24.0
|
|
39
|
+
Provides-Extra: dev
|
|
40
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
41
|
+
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
42
|
+
Requires-Dist: pytest-mock>=3.0; extra == "dev"
|
|
43
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
44
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
45
|
+
Requires-Dist: black>=23.0; extra == "dev"
|
|
46
|
+
Requires-Dist: isort>=5.0; extra == "dev"
|
|
47
|
+
Requires-Dist: bandit[toml]>=1.7; extra == "dev"
|
|
48
|
+
Requires-Dist: safety>=2.0; extra == "dev"
|
|
49
|
+
Requires-Dist: python-semantic-release>=8.0.0; extra == "dev"
|
|
50
|
+
Requires-Dist: types-PyYAML>=6.0; extra == "dev"
|
|
51
|
+
Requires-Dist: types-requests>=2.28; extra == "dev"
|
|
52
|
+
Provides-Extra: github
|
|
53
|
+
Requires-Dist: pygithub>=1.58; extra == "github"
|
|
54
|
+
Provides-Extra: tui
|
|
55
|
+
Requires-Dist: textual>=0.41.0; extra == "tui"
|
|
56
|
+
Provides-Extra: all
|
|
57
|
+
Requires-Dist: gitflow-analytics[github,tui]; extra == "all"
|
|
58
|
+
Dynamic: license-file
|
|
59
|
+
|
|
60
|
+
# GitFlow Analytics
|
|
61
|
+
|
|
62
|
+
[](https://badge.fury.io/py/gitflow-analytics)
|
|
63
|
+
[](https://pypi.org/project/gitflow-analytics/)
|
|
64
|
+
[](https://opensource.org/licenses/MIT)
|
|
65
|
+
[](https://github.com/bobmatnyc/gitflow-analytics/tree/main/docs)
|
|
66
|
+
[](https://github.com/bobmatnyc/gitflow-analytics/actions)
|
|
67
|
+
|
|
68
|
+
A comprehensive Python package for analyzing Git repositories to generate developer productivity insights without requiring external project management tools. Extract actionable metrics directly from Git history with ML-enhanced commit categorization, automated developer identity resolution, and professional reporting.
|
|
69
|
+
|
|
70
|
+
## 🚀 Key Features
|
|
71
|
+
|
|
72
|
+
- **🔍 Zero Dependencies**: Analyze productivity without requiring JIRA, Linear, or other PM tools
|
|
73
|
+
- **🧠 ML-Powered Intelligence**: Advanced commit categorization with 85-95% accuracy
|
|
74
|
+
- **👥 Smart Identity Resolution**: Automatically consolidate developer identities across email addresses
|
|
75
|
+
- **🏢 Enterprise Ready**: Organization-wide repository discovery with intelligent caching
|
|
76
|
+
- **📊 Professional Reports**: Rich markdown narratives and CSV exports for executive dashboards
|
|
77
|
+
|
|
78
|
+
## 🎯 Quick Start
|
|
79
|
+
|
|
80
|
+
Get up and running in 5 minutes:
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
# 1. Install GitFlow Analytics
|
|
84
|
+
pip install gitflow-analytics
|
|
85
|
+
|
|
86
|
+
# 2. Install ML dependencies (optional but recommended)
|
|
87
|
+
python -m spacy download en_core_web_sm
|
|
88
|
+
|
|
89
|
+
# 3. Create a simple configuration
|
|
90
|
+
echo 'version: "1.0"
|
|
91
|
+
github:
|
|
92
|
+
token: "${GITHUB_TOKEN}"
|
|
93
|
+
organization: "your-org"' > config.yaml
|
|
94
|
+
|
|
95
|
+
# 4. Set your GitHub token
|
|
96
|
+
echo 'GITHUB_TOKEN=ghp_your_token_here' > .env
|
|
97
|
+
|
|
98
|
+
# 5. Run analysis
|
|
99
|
+
gitflow-analytics -c config.yaml --weeks 8
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
**What you get:**
|
|
103
|
+
- 📈 Weekly metrics CSV with developer productivity trends
|
|
104
|
+
- 👥 Developer profiles with project distribution and work styles
|
|
105
|
+
- 🔍 Untracked work analysis with ML-powered categorization
|
|
106
|
+
- 📋 Executive summary with actionable insights
|
|
107
|
+
- 📊 Rich markdown report ready for stakeholders
|
|
108
|
+
|
|
109
|
+
### Sample Output Preview
|
|
110
|
+
|
|
111
|
+
```markdown
|
|
112
|
+
## Executive Summary
|
|
113
|
+
- **Total Commits**: 156 across 3 projects
|
|
114
|
+
- **Active Developers**: 5 team members
|
|
115
|
+
- **Ticket Coverage**: 73.2% (industry benchmark: 60-80%)
|
|
116
|
+
- **Top Contributor**: Sarah Chen (32 commits, FRONTEND focus)
|
|
117
|
+
|
|
118
|
+
## Key Insights
|
|
119
|
+
🎯 **High Productivity**: Team averaged 31 commits/week
|
|
120
|
+
📊 **Balanced Workload**: No single developer >40% of total work
|
|
121
|
+
✅ **Good Process**: 73% ticket coverage shows strong tracking
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## ✨ Latest Features (v1.2.x)
|
|
125
|
+
|
|
126
|
+
- **🚀 Two-Step Processing**: Optimized fetch-then-classify workflow for better performance
|
|
127
|
+
- **💰 Cost Tracking**: Monitor LLM API usage with detailed token and cost reporting
|
|
128
|
+
- **⚡ Smart Caching**: Intelligent caching reduces analysis time by up to 90%
|
|
129
|
+
- **🔄 Automatic Updates**: Repositories automatically fetch latest commits before analysis
|
|
130
|
+
- **📊 Weekly Trends**: Track classification pattern changes over time
|
|
131
|
+
- **🎯 Enhanced Categorization**: All commits properly categorized with confidence scores
|
|
132
|
+
|
|
133
|
+
## 🔥 Core Capabilities
|
|
134
|
+
|
|
135
|
+
**📊 Analysis & Insights**
|
|
136
|
+
- Multi-repository analysis with intelligent project grouping
|
|
137
|
+
- ML-enhanced commit categorization (85-95% accuracy)
|
|
138
|
+
- Developer productivity metrics and work pattern analysis
|
|
139
|
+
- Story point extraction from commits and PRs
|
|
140
|
+
- Ticket tracking across JIRA, GitHub, ClickUp, and Linear
|
|
141
|
+
|
|
142
|
+
**🏢 Enterprise Features**
|
|
143
|
+
- Organization-wide repository discovery from GitHub
|
|
144
|
+
- Automated developer identity resolution and consolidation
|
|
145
|
+
- Database-backed caching for sub-second report generation
|
|
146
|
+
- Data anonymization for secure external sharing
|
|
147
|
+
- Batch processing optimized for large repositories
|
|
148
|
+
|
|
149
|
+
**📈 Professional Reporting**
|
|
150
|
+
- Rich markdown narratives with executive summaries
|
|
151
|
+
- Weekly CSV exports with trend analysis
|
|
152
|
+
- Customizable output formats and filtering
|
|
153
|
+
- Performance benchmarking and team comparisons
|
|
154
|
+
|
|
155
|
+
## 📚 Documentation
|
|
156
|
+
|
|
157
|
+
Comprehensive guides for every use case:
|
|
158
|
+
|
|
159
|
+
| **Getting Started** | **Advanced Usage** | **Integration** |
|
|
160
|
+
|-------------------|------------------|---------------|
|
|
161
|
+
| [Installation](docs/getting-started/installation.md) | [Complete Configuration](docs/guides/configuration.md) | [CLI Reference](docs/reference/cli-commands.md) |
|
|
162
|
+
| [5-Minute Tutorial](docs/getting-started/quickstart.md) | [ML Categorization](docs/guides/ml-categorization.md) | [JSON Export Schema](docs/reference/json-export-schema.md) |
|
|
163
|
+
| [First Analysis](docs/getting-started/first-analysis.md) | [Enterprise Setup](docs/examples/enterprise-setup.md) | [CI Integration](docs/examples/ci-integration.md) |
|
|
164
|
+
|
|
165
|
+
**🎯 Quick Links:**
|
|
166
|
+
- 📖 [**Documentation Hub**](docs/README.md) - Complete guide index
|
|
167
|
+
- 🚀 [**Quick Start**](docs/getting-started/quickstart.md) - Get running in 5 minutes
|
|
168
|
+
- ⚙️ [**Configuration**](docs/guides/configuration.md) - Full reference
|
|
169
|
+
- 🤝 [**Contributing**](docs/developer/contributing.md) - Join the project
|
|
170
|
+
|
|
171
|
+
## ⚡ Installation Options
|
|
172
|
+
|
|
173
|
+
### Standard Installation
|
|
174
|
+
```bash
|
|
175
|
+
pip install gitflow-analytics
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### With ML Enhancement (Recommended)
|
|
179
|
+
```bash
|
|
180
|
+
pip install gitflow-analytics
|
|
181
|
+
python -m spacy download en_core_web_sm
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
### Development Installation
|
|
185
|
+
```bash
|
|
186
|
+
git clone https://github.com/bobmatnyc/gitflow-analytics.git
|
|
187
|
+
cd gitflow-analytics
|
|
188
|
+
pip install -e ".[dev]"
|
|
189
|
+
python -m spacy download en_core_web_sm
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
## 🔧 Configuration
|
|
193
|
+
|
|
194
|
+
### Option 1: Organization Analysis (Recommended)
|
|
195
|
+
```yaml
|
|
196
|
+
# config.yaml
|
|
197
|
+
version: "1.0"
|
|
198
|
+
github:
|
|
199
|
+
token: "${GITHUB_TOKEN}"
|
|
200
|
+
organization: "your-org" # Auto-discovers all repositories
|
|
201
|
+
|
|
202
|
+
analysis:
|
|
203
|
+
ml_categorization:
|
|
204
|
+
enabled: true
|
|
205
|
+
min_confidence: 0.7
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
### Option 2: Specific Repositories
|
|
209
|
+
```yaml
|
|
210
|
+
# config.yaml
|
|
211
|
+
version: "1.0"
|
|
212
|
+
github:
|
|
213
|
+
token: "${GITHUB_TOKEN}"
|
|
214
|
+
|
|
215
|
+
repositories:
|
|
216
|
+
- name: "my-app"
|
|
217
|
+
path: "~/code/my-app"
|
|
218
|
+
github_repo: "myorg/my-app"
|
|
219
|
+
project_key: "APP"
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
### Environment Setup
|
|
223
|
+
```bash
|
|
224
|
+
# .env (same directory as config.yaml)
|
|
225
|
+
GITHUB_TOKEN=ghp_your_token_here
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
### Run Analysis
|
|
229
|
+
```bash
|
|
230
|
+
# Analyze last 8 weeks
|
|
231
|
+
gitflow-analytics -c config.yaml --weeks 8
|
|
232
|
+
|
|
233
|
+
# With custom output directory
|
|
234
|
+
gitflow-analytics -c config.yaml --weeks 8 --output ./reports
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
> 💡 **Need more configuration options?** See the [Complete Configuration Guide](docs/guides/configuration.md) for advanced features, integrations, and customization.
|
|
238
|
+
|
|
239
|
+
## 📊 Generated Reports
|
|
240
|
+
|
|
241
|
+
GitFlow Analytics generates comprehensive reports for different audiences:
|
|
242
|
+
|
|
243
|
+
### 📈 CSV Data Files
|
|
244
|
+
- **weekly_metrics.csv** - Developer productivity trends by week
|
|
245
|
+
- **weekly_velocity.csv** - Lines-per-story-point velocity analysis
|
|
246
|
+
- **developers.csv** - Complete team profiles and statistics
|
|
247
|
+
- **summary.csv** - Project-wide statistics and benchmarks
|
|
248
|
+
- **untracked_commits.csv** - ML-categorized uncommitted work analysis
|
|
249
|
+
|
|
250
|
+
### 📋 Executive Reports
|
|
251
|
+
- **narrative_summary.md** - Rich markdown report with:
|
|
252
|
+
- Executive summary with key metrics
|
|
253
|
+
- Team composition and work distribution
|
|
254
|
+
- Project activity breakdown
|
|
255
|
+
- Development patterns and recommendations
|
|
256
|
+
- Weekly trend analysis
|
|
257
|
+
|
|
258
|
+
### Sample Executive Summary
|
|
259
|
+
```markdown
|
|
260
|
+
## Executive Summary
|
|
261
|
+
- **Total Commits**: 324 commits across 4 projects
|
|
262
|
+
- **Active Developers**: 8 team members
|
|
263
|
+
- **Ticket Coverage**: 78.4% (above industry benchmark)
|
|
264
|
+
- **Top Areas**: Frontend (45%), API (32%), Infrastructure (23%)
|
|
265
|
+
|
|
266
|
+
## Key Insights
|
|
267
|
+
✅ **Strong Process Adherence**: 78% ticket coverage
|
|
268
|
+
🎯 **Balanced Team**: No developer >35% of total work
|
|
269
|
+
📈 **Growth Trend**: +15% productivity vs last quarter
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
## 🛠️ Common Use Cases
|
|
273
|
+
|
|
274
|
+
**👥 Team Lead Dashboard**
|
|
275
|
+
- Track individual developer productivity and growth
|
|
276
|
+
- Identify workload distribution and potential burnout
|
|
277
|
+
- Monitor code quality trends and technical debt
|
|
278
|
+
|
|
279
|
+
**📈 Engineering Management**
|
|
280
|
+
- Generate executive reports on team velocity
|
|
281
|
+
- Analyze process adherence and ticket coverage
|
|
282
|
+
- Benchmark performance across projects and quarters
|
|
283
|
+
|
|
284
|
+
**🔍 Process Optimization**
|
|
285
|
+
- Identify untracked work patterns that should be formalized
|
|
286
|
+
- Optimize developer focus and reduce context switching
|
|
287
|
+
- Improve estimation accuracy with historical data
|
|
288
|
+
|
|
289
|
+
**🏢 Enterprise Analytics**
|
|
290
|
+
- Organization-wide repository analysis across dozens of projects
|
|
291
|
+
- Automated identity resolution for large, distributed teams
|
|
292
|
+
- Cost-effective analysis without expensive PM tool dependencies
|
|
293
|
+
|
|
294
|
+
## Command Line Interface
|
|
295
|
+
|
|
296
|
+
### Main Commands
|
|
297
|
+
|
|
298
|
+
```bash
|
|
299
|
+
# Analyze repositories (default command)
|
|
300
|
+
gitflow-analytics -c config.yaml --weeks 12 --output ./reports
|
|
301
|
+
|
|
302
|
+
# Explicit analyze command (backward compatibility)
|
|
303
|
+
gitflow-analytics analyze -c config.yaml --weeks 12 --output ./reports
|
|
304
|
+
|
|
305
|
+
# Show cache statistics
|
|
306
|
+
gitflow-analytics cache-stats -c config.yaml
|
|
307
|
+
|
|
308
|
+
# List known developers
|
|
309
|
+
gitflow-analytics list-developers -c config.yaml
|
|
310
|
+
|
|
311
|
+
# Analyze developer identities
|
|
312
|
+
gitflow-analytics identities -c config.yaml
|
|
313
|
+
|
|
314
|
+
# Merge developer identities
|
|
315
|
+
gitflow-analytics merge-identity -c config.yaml dev1_id dev2_id
|
|
316
|
+
|
|
317
|
+
# Discover story point fields in your PM platform
|
|
318
|
+
gitflow-analytics discover-storypoint-fields -c config.yaml
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
### Options
|
|
322
|
+
|
|
323
|
+
- `--weeks, -w`: Number of weeks to analyze (default: 12)
|
|
324
|
+
- `--output, -o`: Output directory for reports (default: ./reports)
|
|
325
|
+
- `--anonymize`: Anonymize developer information
|
|
326
|
+
- `--no-cache`: Disable caching for fresh analysis
|
|
327
|
+
- `--clear-cache`: Clear cache before analysis
|
|
328
|
+
- `--validate-only`: Validate configuration without running
|
|
329
|
+
- `--skip-identity-analysis`: Skip automatic identity analysis
|
|
330
|
+
- `--apply-identity-suggestions`: Apply identity suggestions without prompting
|
|
331
|
+
|
|
332
|
+
## Complete Configuration Example
|
|
333
|
+
|
|
334
|
+
Here's a complete example showing `.env` file and corresponding YAML configuration:
|
|
335
|
+
|
|
336
|
+
### `.env` file
|
|
337
|
+
```bash
|
|
338
|
+
# GitHub Configuration
|
|
339
|
+
GITHUB_TOKEN=ghp_xxxxxxxxxxxxxxxxxxxx
|
|
340
|
+
GITHUB_ORG=your-organization
|
|
341
|
+
|
|
342
|
+
# JIRA Configuration
|
|
343
|
+
JIRA_ACCESS_USER=developer@company.com
|
|
344
|
+
JIRA_ACCESS_TOKEN=ATATT3xxxxxxxxxxx
|
|
345
|
+
|
|
346
|
+
# Optional: Other integrations
|
|
347
|
+
# CLICKUP_TOKEN=pk_xxxxxxxxxxxx
|
|
348
|
+
# LINEAR_TOKEN=lin_api_xxxxxxxxxxxx
|
|
349
|
+
```
|
|
350
|
+
|
|
351
|
+
### `config.yaml` file
|
|
352
|
+
```yaml
|
|
353
|
+
version: "1.0"
|
|
354
|
+
|
|
355
|
+
# GitHub configuration with organization discovery
|
|
356
|
+
github:
|
|
357
|
+
token: "${GITHUB_TOKEN}"
|
|
358
|
+
organization: "${GITHUB_ORG}"
|
|
359
|
+
|
|
360
|
+
# JIRA integration for story points
|
|
361
|
+
jira:
|
|
362
|
+
access_user: "${JIRA_ACCESS_USER}"
|
|
363
|
+
access_token: "${JIRA_ACCESS_TOKEN}"
|
|
364
|
+
base_url: "https://company.atlassian.net"
|
|
365
|
+
|
|
366
|
+
jira_integration:
|
|
367
|
+
enabled: true
|
|
368
|
+
fetch_story_points: true
|
|
369
|
+
story_point_fields:
|
|
370
|
+
- "Story point estimate" # Your field name
|
|
371
|
+
- "customfield_10016" # Fallback field ID
|
|
372
|
+
|
|
373
|
+
# Analysis configuration
|
|
374
|
+
analysis:
|
|
375
|
+
# Only track JIRA tickets (ignore GitHub issues, etc.)
|
|
376
|
+
ticket_platforms:
|
|
377
|
+
- jira
|
|
378
|
+
|
|
379
|
+
# Exclude bot commits and boilerplate files
|
|
380
|
+
exclude:
|
|
381
|
+
authors:
|
|
382
|
+
- "dependabot[bot]"
|
|
383
|
+
- "renovate[bot]"
|
|
384
|
+
paths:
|
|
385
|
+
- "**/node_modules/**"
|
|
386
|
+
- "**/*.min.js"
|
|
387
|
+
- "**/package-lock.json"
|
|
388
|
+
|
|
389
|
+
# Developer identity consolidation
|
|
390
|
+
identity:
|
|
391
|
+
similarity_threshold: 0.85
|
|
392
|
+
manual_mappings:
|
|
393
|
+
- name: "John Doe"
|
|
394
|
+
primary_email: "john.doe@company.com"
|
|
395
|
+
aliases:
|
|
396
|
+
- "jdoe@oldcompany.com"
|
|
397
|
+
- "john@personal.com"
|
|
398
|
+
|
|
399
|
+
# Output configuration
|
|
400
|
+
output:
|
|
401
|
+
directory: "./reports"
|
|
402
|
+
formats:
|
|
403
|
+
- csv
|
|
404
|
+
- markdown
|
|
405
|
+
```
|
|
406
|
+
|
|
407
|
+
## Output Reports
|
|
408
|
+
|
|
409
|
+
The tool generates comprehensive CSV reports and markdown summaries:
|
|
410
|
+
|
|
411
|
+
### CSV Reports
|
|
412
|
+
|
|
413
|
+
1. **Weekly Metrics** (`weekly_metrics_YYYYMMDD.csv`)
|
|
414
|
+
- Week-by-week developer productivity
|
|
415
|
+
- Story points, commits, lines changed
|
|
416
|
+
- Ticket coverage percentages
|
|
417
|
+
- Per-project breakdown
|
|
418
|
+
|
|
419
|
+
2. **Weekly Velocity** (`weekly_velocity_YYYYMMDD.csv`)
|
|
420
|
+
- Lines of code per story point analysis
|
|
421
|
+
- Efficiency trends and velocity patterns
|
|
422
|
+
- PR-based vs commit-based story points breakdown
|
|
423
|
+
- Team velocity benchmarking and week-over-week trends
|
|
424
|
+
|
|
425
|
+
3. **Summary Statistics** (`summary_YYYYMMDD.csv`)
|
|
426
|
+
- Overall project statistics
|
|
427
|
+
- Platform-specific ticket counts
|
|
428
|
+
- Top contributors
|
|
429
|
+
|
|
430
|
+
4. **Developer Report** (`developers_YYYYMMDD.csv`)
|
|
431
|
+
- Complete developer profiles
|
|
432
|
+
- Total contributions
|
|
433
|
+
- Identity aliases
|
|
434
|
+
|
|
435
|
+
5. **Untracked Commits Report** (`untracked_commits_YYYYMMDD.csv`)
|
|
436
|
+
- Detailed analysis of commits without ticket references
|
|
437
|
+
- Commit categorization (bug_fix, feature, refactor, documentation, maintenance, test, style, build)
|
|
438
|
+
- Enhanced metadata: commit hash, author, timestamp, project, message, file/line changes
|
|
439
|
+
- Configurable file change threshold for filtering significant commits
|
|
440
|
+
|
|
441
|
+
### Enhanced Untracked Commit Analysis
|
|
442
|
+
|
|
443
|
+
The untracked commits report provides deep insights into work that bypasses ticket tracking:
|
|
444
|
+
|
|
445
|
+
**CSV Columns:**
|
|
446
|
+
- `commit_hash` / `short_hash`: Full and abbreviated commit identifiers
|
|
447
|
+
- `author` / `author_email` / `canonical_id`: Developer identification (with anonymization support)
|
|
448
|
+
- `date`: Commit timestamp
|
|
449
|
+
- `project`: Project key for multi-repository analysis
|
|
450
|
+
- `message`: Commit message (truncated for readability)
|
|
451
|
+
- `category`: Automated categorization of work type
|
|
452
|
+
- `files_changed` / `lines_added` / `lines_removed` / `lines_changed`: Change metrics
|
|
453
|
+
- `is_merge`: Boolean flag for merge commits
|
|
454
|
+
|
|
455
|
+
**Automatic Categorization:**
|
|
456
|
+
- **Feature**: New functionality development (`add`, `new`, `implement`, `create`)
|
|
457
|
+
- **Bug Fix**: Error corrections (`fix`, `bug`, `error`, `resolve`, `hotfix`)
|
|
458
|
+
- **Refactor**: Code restructuring (`refactor`, `optimize`, `improve`, `cleanup`)
|
|
459
|
+
- **Documentation**: Documentation updates (`doc`, `readme`, `comment`, `guide`)
|
|
460
|
+
- **Maintenance**: Routine upkeep (`update`, `upgrade`, `dependency`, `config`)
|
|
461
|
+
- **Test**: Testing-related changes (`test`, `spec`, `mock`, `fixture`)
|
|
462
|
+
- **Style**: Formatting changes (`format`, `lint`, `prettier`, `whitespace`)
|
|
463
|
+
- **Build**: Build system changes (`build`, `compile`, `ci`, `docker`)
|
|
464
|
+
|
|
465
|
+
### Markdown Reports
|
|
466
|
+
|
|
467
|
+
5. **Narrative Summary** (`narrative_summary_YYYYMMDD.md`)
|
|
468
|
+
- **Executive Summary**: High-level metrics and team overview
|
|
469
|
+
- **Team Composition**: Developer profiles with project percentages and work patterns
|
|
470
|
+
- **Project Activity**: Detailed breakdown by project with contributor percentages and **commit classifications**
|
|
471
|
+
- **Development Patterns**: Key insights from productivity and collaboration analysis
|
|
472
|
+
- **Pull Request Analysis**: PR metrics including size, lifetime, and review activity
|
|
473
|
+
- **Weekly Trends** (v1.1.0+): Week-over-week changes in classification patterns
|
|
474
|
+
|
|
475
|
+
6. **Database-Backed Qualitative Report** (`database_qualitative_report_YYYYMMDD.md`) (v1.1.0+)
|
|
476
|
+
- Generated directly from SQLite storage for fast retrieval
|
|
477
|
+
- Includes weekly trend analysis per developer/project
|
|
478
|
+
- Shows classification changes over time (e.g., "Features: +15%, Bug Fixes: -5%")
|
|
479
|
+
- **Issue Tracking**: Platform usage and coverage analysis with simplified display
|
|
480
|
+
- **Enhanced Untracked Work Analysis**: Comprehensive categorization with dual percentage metrics
|
|
481
|
+
- **PM Platform Integration**: Story point tracking and correlation insights (when available)
|
|
482
|
+
- **Recommendations**: Actionable insights based on analysis patterns
|
|
483
|
+
|
|
484
|
+
### Enhanced Narrative Report Sections
|
|
485
|
+
|
|
486
|
+
The narrative report provides comprehensive insights through multiple detailed sections:
|
|
487
|
+
|
|
488
|
+
#### Team Composition Section
|
|
489
|
+
- **Developer Profiles**: Individual developer statistics with commit counts
|
|
490
|
+
- **Project Distribution**: Shows ALL projects each developer works on with precise percentages
|
|
491
|
+
- **Work Style Classification**: Categorizes developers as "Focused", "Multi-project", or "Highly Focused"
|
|
492
|
+
- **Activity Patterns**: Identifies time patterns like "Standard Hours" or "Extended Hours"
|
|
493
|
+
|
|
494
|
+
**Example developer profile:**
|
|
495
|
+
```markdown
|
|
496
|
+
**John Developer**
|
|
497
|
+
- Commits: 15
|
|
498
|
+
- Projects: FRONTEND (85.0%), SERVICE_TS (15.0%)
|
|
499
|
+
- Work Style: Focused
|
|
500
|
+
- Active Pattern: Standard Hours
|
|
501
|
+
```
|
|
502
|
+
|
|
503
|
+
#### Project Activity Section
|
|
504
|
+
- **Activity by Project**: Commits and percentage of total activity per project
|
|
505
|
+
- **Contributor Breakdown**: Shows each developer's contribution percentage within each project
|
|
506
|
+
- **Lines Changed**: Quantifies the scale of changes per project
|
|
507
|
+
|
|
508
|
+
#### Issue Tracking with Simplified Display
|
|
509
|
+
- **Platform Usage**: Clean display of ticket platform distribution (JIRA, GitHub, etc.)
|
|
510
|
+
- **Coverage Analysis**: Percentage of commits that reference tickets
|
|
511
|
+
- **Enhanced Untracked Work Analysis**: Detailed categorization and recommendations
|
|
512
|
+
|
|
513
|
+
### Interpreting Dual Percentage Metrics
|
|
514
|
+
|
|
515
|
+
The enhanced untracked work analysis provides two key percentage metrics for better context:
|
|
516
|
+
|
|
517
|
+
1. **Percentage of Total Untracked Work**: Shows how much each developer contributes to the overall untracked work pool
|
|
518
|
+
2. **Percentage of Developer's Individual Work**: Shows what proportion of a specific developer's commits are untracked
|
|
519
|
+
|
|
520
|
+
**Example interpretation:**
|
|
521
|
+
```
|
|
522
|
+
- John Doe: 25 commits (40% of untracked, 15% of their work) - maintenance, style
|
|
523
|
+
```
|
|
524
|
+
|
|
525
|
+
This means:
|
|
526
|
+
- John contributed 25 untracked commits
|
|
527
|
+
- These represent 40% of all untracked commits in the analysis period
|
|
528
|
+
- Only 15% of John's total work was untracked (85% was properly tracked)
|
|
529
|
+
- Most untracked work was maintenance and style changes (acceptable categories)
|
|
530
|
+
|
|
531
|
+
**Process Insights:**
|
|
532
|
+
- High "% of untracked" + low "% of their work" = Developer doing most of the acceptable maintenance work
|
|
533
|
+
- Low "% of untracked" + high "% of their work" = Developer needs process guidance
|
|
534
|
+
- High percentages in feature/bug_fix categories = Process improvement opportunity
|
|
535
|
+
|
|
536
|
+
### Example Report Outputs
|
|
537
|
+
|
|
538
|
+
#### Untracked Commits CSV Sample
|
|
539
|
+
```csv
|
|
540
|
+
commit_hash,short_hash,author,author_email,canonical_id,date,project,message,category,files_changed,lines_added,lines_removed,lines_changed,is_merge
|
|
541
|
+
a1b2c3d4e5f6...,a1b2c3d,John Doe,john@company.com,ID0001,2024-01-15 14:30:22,FRONTEND,Update dependency versions for security patches,maintenance,2,45,12,57,false
|
|
542
|
+
f6e5d4c3b2a1...,f6e5d4c,Jane Smith,jane@company.com,ID0002,2024-01-15 09:15:10,BACKEND,Fix typo in error message,bug_fix,1,1,1,2,false
|
|
543
|
+
9876543210ab...,9876543,Bob Wilson,bob@company.com,ID0003,2024-01-14 16:45:33,FRONTEND,Add JSDoc comments to utility functions,documentation,3,28,0,28,false
|
|
544
|
+
```
|
|
545
|
+
|
|
546
|
+
#### Complete Narrative Report Sample
|
|
547
|
+
```markdown
|
|
548
|
+
# GitFlow Analytics Report
|
|
549
|
+
|
|
550
|
+
**Generated**: 2025-08-04 14:27:47
|
|
551
|
+
**Analysis Period**: Last 4 weeks
|
|
552
|
+
|
|
553
|
+
## Executive Summary
|
|
554
|
+
|
|
555
|
+
- **Total Commits**: 35
|
|
556
|
+
- **Active Developers**: 3
|
|
557
|
+
- **Lines Changed**: 910
|
|
558
|
+
- **Ticket Coverage**: 71.4%
|
|
559
|
+
- **Active Projects**: FRONTEND, SERVICE_TS, SERVICES
|
|
560
|
+
- **Top Contributor**: John Developer with 15 commits
|
|
561
|
+
|
|
562
|
+
## Team Composition
|
|
563
|
+
|
|
564
|
+
### Developer Profiles
|
|
565
|
+
|
|
566
|
+
**John Developer**
|
|
567
|
+
- Commits: 15
|
|
568
|
+
- Projects: FRONTEND (85.0%), SERVICE_TS (15.0%)
|
|
569
|
+
- Work Style: Focused
|
|
570
|
+
- Active Pattern: Standard Hours
|
|
571
|
+
|
|
572
|
+
**Jane Smith**
|
|
573
|
+
- Commits: 12
|
|
574
|
+
- Projects: SERVICE_TS (70.0%), FRONTEND (30.0%)
|
|
575
|
+
- Work Style: Multi-project
|
|
576
|
+
- Active Pattern: Extended Hours
|
|
577
|
+
|
|
578
|
+
## Project Activity
|
|
579
|
+
|
|
580
|
+
### Activity by Project
|
|
581
|
+
|
|
582
|
+
**FRONTEND**
|
|
583
|
+
- Commits: 14 (50.0% of total)
|
|
584
|
+
- Lines Changed: 450
|
|
585
|
+
- Contributors: John Developer (71.4%), Jane Smith (28.6%)
|
|
586
|
+
|
|
587
|
+
**SERVICE_TS**
|
|
588
|
+
- Commits: 8 (28.6% of total)
|
|
589
|
+
- Lines Changed: 280
|
|
590
|
+
- Contributors: Jane Smith (100.0%)
|
|
591
|
+
|
|
592
|
+
## Issue Tracking
|
|
593
|
+
|
|
594
|
+
### Platform Usage
|
|
595
|
+
|
|
596
|
+
- **Jira**: 15 tickets (60.0%)
|
|
597
|
+
- **Github**: 8 tickets (32.0%)
|
|
598
|
+
- **Clickup**: 2 tickets (8.0%)
|
|
599
|
+
|
|
600
|
+
### Untracked Work Analysis
|
|
601
|
+
|
|
602
|
+
**Summary**: 10 commits (28.6% of total) lack ticket references.
|
|
603
|
+
|
|
604
|
+
#### Work Categories
|
|
605
|
+
|
|
606
|
+
- **Maintenance**: 4 commits (40.0%), avg 23 lines *(acceptable untracked)*
|
|
607
|
+
- **Bug Fix**: 3 commits (30.0%), avg 15 lines *(should be tracked)*
|
|
608
|
+
- **Documentation**: 2 commits (20.0%), avg 12 lines *(acceptable untracked)*
|
|
609
|
+
|
|
610
|
+
#### Top Contributors (Untracked Work)
|
|
611
|
+
|
|
612
|
+
- **John Developer**: 1 commits (50.0% of untracked, 6.7% of their work) - *refactor*
|
|
613
|
+
- **Jane Smith**: 1 commits (50.0% of untracked, 8.3% of their work) - *style*
|
|
614
|
+
|
|
615
|
+
#### Recommendations for Untracked Work
|
|
616
|
+
|
|
617
|
+
🎯 **Excellent tracking**: Less than 20% of commits are untracked - the team shows strong process adherence.
|
|
618
|
+
|
|
619
|
+
## Recommendations
|
|
620
|
+
|
|
621
|
+
✅ The team shows healthy development patterns. Continue current practices while monitoring for changes.
|
|
622
|
+
```
|
|
623
|
+
|
|
624
|
+
### Configuration for Enhanced Narrative Reports
|
|
625
|
+
|
|
626
|
+
The narrative reports automatically include all available sections based on your configuration and data availability:
|
|
627
|
+
|
|
628
|
+
**Always Generated:**
|
|
629
|
+
- Executive Summary, Team Composition, Project Activity, Development Patterns, Issue Tracking, Recommendations
|
|
630
|
+
|
|
631
|
+
**Conditionally Generated:**
|
|
632
|
+
- **Pull Request Analysis**: Requires GitHub integration with PR data
|
|
633
|
+
- **PM Platform Integration**: Requires JIRA or other PM platform configuration
|
|
634
|
+
- **Qualitative Analysis**: Requires ChatGPT integration setup
|
|
635
|
+
|
|
636
|
+
**Customizing Report Content:**
|
|
637
|
+
```yaml
|
|
638
|
+
# config.yaml
|
|
639
|
+
output:
|
|
640
|
+
formats:
|
|
641
|
+
- csv
|
|
642
|
+
- markdown # Enables narrative report generation
|
|
643
|
+
|
|
644
|
+
# Optional: Enhance narrative reports with additional data
|
|
645
|
+
jira:
|
|
646
|
+
access_user: "${JIRA_ACCESS_USER}"
|
|
647
|
+
access_token: "${JIRA_ACCESS_TOKEN}"
|
|
648
|
+
base_url: "https://company.atlassian.net"
|
|
649
|
+
|
|
650
|
+
# Optional: Add qualitative insights
|
|
651
|
+
analysis:
|
|
652
|
+
chatgpt:
|
|
653
|
+
enabled: true
|
|
654
|
+
api_key: "${OPENAI_API_KEY}"
|
|
655
|
+
```
|
|
656
|
+
|
|
657
|
+
## Story Point Patterns
|
|
658
|
+
|
|
659
|
+
Configure custom regex patterns to match your team's story point format:
|
|
660
|
+
|
|
661
|
+
```yaml
|
|
662
|
+
story_point_patterns:
|
|
663
|
+
- "SP: (\\d+)" # SP: 5
|
|
664
|
+
- "\\[([0-9]+) pts\\]" # [3 pts]
|
|
665
|
+
- "estimate: (\\d+)" # estimate: 8
|
|
666
|
+
```
|
|
667
|
+
|
|
668
|
+
## Ticket Platform Support
|
|
669
|
+
|
|
670
|
+
Automatically detects and tracks tickets from:
|
|
671
|
+
- **JIRA**: `PROJ-123`
|
|
672
|
+
- **GitHub**: `#123`, `GH-123`
|
|
673
|
+
- **ClickUp**: `CU-abc123`
|
|
674
|
+
- **Linear**: `ENG-123`
|
|
675
|
+
|
|
676
|
+
### JIRA Integration
|
|
677
|
+
|
|
678
|
+
GitFlow Analytics can fetch story points directly from JIRA tickets. Configure your JIRA instance:
|
|
679
|
+
|
|
680
|
+
```yaml
|
|
681
|
+
jira:
|
|
682
|
+
access_user: "${JIRA_ACCESS_USER}"
|
|
683
|
+
access_token: "${JIRA_ACCESS_TOKEN}"
|
|
684
|
+
base_url: "https://your-company.atlassian.net"
|
|
685
|
+
|
|
686
|
+
jira_integration:
|
|
687
|
+
enabled: true
|
|
688
|
+
story_point_fields:
|
|
689
|
+
- "Story point estimate" # Your custom field name
|
|
690
|
+
- "customfield_10016" # Or use field ID
|
|
691
|
+
```
|
|
692
|
+
|
|
693
|
+
To discover your JIRA story point fields:
|
|
694
|
+
```bash
|
|
695
|
+
gitflow-analytics discover-storypoint-fields -c config.yaml
|
|
696
|
+
```
|
|
697
|
+
|
|
698
|
+
## Caching
|
|
699
|
+
|
|
700
|
+
The tool uses SQLite for intelligent caching:
|
|
701
|
+
- Commit analysis results
|
|
702
|
+
- Developer identity mappings
|
|
703
|
+
- Pull request data
|
|
704
|
+
|
|
705
|
+
Cache is automatically managed with configurable TTL.
|
|
706
|
+
|
|
707
|
+
## Developer Identity Resolution
|
|
708
|
+
|
|
709
|
+
GitFlow Analytics intelligently consolidates developer identities across different email addresses and name variations:
|
|
710
|
+
|
|
711
|
+
### Automatic Identity Analysis (New!)
|
|
712
|
+
|
|
713
|
+
Identity analysis now runs **automatically by default** when no manual mappings exist. The system will:
|
|
714
|
+
|
|
715
|
+
1. **Analyze all developer identities** in your commits
|
|
716
|
+
2. **Show suggested consolidations** with a clear preview
|
|
717
|
+
3. **Prompt for approval** with a simple Y/n
|
|
718
|
+
4. **Update your configuration** automatically
|
|
719
|
+
5. **Continue analysis** with consolidated identities
|
|
720
|
+
|
|
721
|
+
Example of the interactive prompt:
|
|
722
|
+
```
|
|
723
|
+
🔍 Analyzing developer identities...
|
|
724
|
+
|
|
725
|
+
⚠️ Found 3 potential identity clusters:
|
|
726
|
+
|
|
727
|
+
📋 Suggested identity mappings:
|
|
728
|
+
john.doe@company.com
|
|
729
|
+
→ 123456+johndoe@users.noreply.github.com
|
|
730
|
+
→ jdoe@personal.email.com
|
|
731
|
+
|
|
732
|
+
🤖 Found 2 bot accounts to exclude:
|
|
733
|
+
- dependabot[bot]
|
|
734
|
+
- renovate[bot]
|
|
735
|
+
|
|
736
|
+
────────────────────────────────────────────────────────────
|
|
737
|
+
Apply these identity mappings to your configuration? [Y/n]:
|
|
738
|
+
```
|
|
739
|
+
|
|
740
|
+
This prompt appears at most once every 7 days.
|
|
741
|
+
|
|
742
|
+
To skip automatic identity analysis:
|
|
743
|
+
```bash
|
|
744
|
+
# Simplified syntax (default)
|
|
745
|
+
gitflow-analytics -c config.yaml --skip-identity-analysis
|
|
746
|
+
|
|
747
|
+
# Explicit analyze command
|
|
748
|
+
gitflow-analytics analyze -c config.yaml --skip-identity-analysis
|
|
749
|
+
```
|
|
750
|
+
|
|
751
|
+
To manually run identity analysis:
|
|
752
|
+
```bash
|
|
753
|
+
gitflow-analytics identities -c config.yaml
|
|
754
|
+
```
|
|
755
|
+
|
|
756
|
+
### Smart Identity Matching
|
|
757
|
+
|
|
758
|
+
The system automatically detects:
|
|
759
|
+
- **GitHub noreply emails** (e.g., `150280367+username@users.noreply.github.com`)
|
|
760
|
+
- **Name variations** (e.g., "John Doe" vs "John D" vs "jdoe")
|
|
761
|
+
- **Common email patterns** across domains
|
|
762
|
+
- **Bot accounts** for automatic exclusion
|
|
763
|
+
|
|
764
|
+
### Manual Configuration
|
|
765
|
+
|
|
766
|
+
You can also manually configure identity mappings in your YAML:
|
|
767
|
+
|
|
768
|
+
```yaml
|
|
769
|
+
analysis:
|
|
770
|
+
identity:
|
|
771
|
+
manual_mappings:
|
|
772
|
+
- name: "John Doe" # Optional: preferred display name for reports
|
|
773
|
+
primary_email: john.doe@company.com
|
|
774
|
+
aliases:
|
|
775
|
+
- jdoe@personal.email.com
|
|
776
|
+
- 123456+johndoe@users.noreply.github.com
|
|
777
|
+
- name: "Sarah Smith"
|
|
778
|
+
primary_email: sarah.smith@company.com
|
|
779
|
+
aliases:
|
|
780
|
+
- s.smith@oldcompany.com
|
|
781
|
+
```
|
|
782
|
+
|
|
783
|
+
### Display Name Control
|
|
784
|
+
|
|
785
|
+
The optional `name` field in manual mappings allows you to control how developer names appear in reports. This is particularly useful for:
|
|
786
|
+
|
|
787
|
+
- **Standardizing display names** across different email formats
|
|
788
|
+
- **Resolving duplicates** when the same person appears with slight name variations
|
|
789
|
+
- **Using preferred names** instead of technical email formats
|
|
790
|
+
|
|
791
|
+
**Example use cases:**
|
|
792
|
+
```yaml
|
|
793
|
+
analysis:
|
|
794
|
+
identity:
|
|
795
|
+
manual_mappings:
|
|
796
|
+
# Consolidate Austin Zach identities
|
|
797
|
+
- name: "Austin Zach"
|
|
798
|
+
primary_email: "john.smith@company.com"
|
|
799
|
+
aliases:
|
|
800
|
+
- "150280367+jsmith@users.noreply.github.com"
|
|
801
|
+
- "jsmith-company@users.noreply.github.com"
|
|
802
|
+
|
|
803
|
+
# Standardize name variations
|
|
804
|
+
- name: "John Doe" # Consistent display across all reports
|
|
805
|
+
primary_email: "john.doe@company.com"
|
|
806
|
+
aliases:
|
|
807
|
+
- "johndoe@company.com"
|
|
808
|
+
- "j.doe@company.com"
|
|
809
|
+
```
|
|
810
|
+
|
|
811
|
+
Without the `name` field, the system uses the canonical email's associated name, which might not be ideal for reporting.
|
|
812
|
+
|
|
813
|
+
### Disabling Automatic Analysis
|
|
814
|
+
|
|
815
|
+
To disable the automatic identity prompt:
|
|
816
|
+
```yaml
|
|
817
|
+
analysis:
|
|
818
|
+
identity:
|
|
819
|
+
auto_analysis: false
|
|
820
|
+
```
|
|
821
|
+
|
|
822
|
+
## ML-Enhanced Commit Categorization
|
|
823
|
+
|
|
824
|
+
GitFlow Analytics includes sophisticated machine learning capabilities for categorizing commits with high accuracy and confidence scoring.
|
|
825
|
+
|
|
826
|
+
### How It Works
|
|
827
|
+
|
|
828
|
+
The ML categorization system uses a **hybrid approach** combining:
|
|
829
|
+
|
|
830
|
+
1. **Semantic Analysis**: Uses spaCy NLP models to understand commit message meaning
|
|
831
|
+
2. **File Pattern Recognition**: Analyzes changed files for additional context signals
|
|
832
|
+
3. **Rule-based Fallback**: Falls back to traditional regex patterns when ML confidence is low
|
|
833
|
+
4. **Confidence Scoring**: Provides confidence metrics for all categorizations
|
|
834
|
+
|
|
835
|
+
### Categories Detected
|
|
836
|
+
|
|
837
|
+
The system automatically categorizes commits into:
|
|
838
|
+
|
|
839
|
+
- **Feature**: New functionality development (`add`, `implement`, `create`)
|
|
840
|
+
- **Bug Fix**: Error corrections (`fix`, `resolve`, `correct`)
|
|
841
|
+
- **Refactor**: Code restructuring (`refactor`, `optimize`, `improve`)
|
|
842
|
+
- **Documentation**: Documentation updates (`docs`, `readme`, `comment`)
|
|
843
|
+
- **Maintenance**: Routine upkeep (`update`, `upgrade`, `dependency`)
|
|
844
|
+
- **Test**: Testing-related changes (`test`, `spec`, `coverage`)
|
|
845
|
+
- **Style**: Formatting changes (`format`, `lint`, `prettier`)
|
|
846
|
+
- **Build**: Build system changes (`build`, `ci`, `docker`)
|
|
847
|
+
- **Security**: Security-related fixes (`security`, `vulnerability`)
|
|
848
|
+
- **Hotfix**: Urgent production fixes (`hotfix`, `critical`, `emergency`)
|
|
849
|
+
- **Config**: Configuration changes (`config`, `settings`, `environment`)
|
|
850
|
+
|
|
851
|
+
### Configuration
|
|
852
|
+
|
|
853
|
+
```yaml
|
|
854
|
+
analysis:
|
|
855
|
+
ml_categorization:
|
|
856
|
+
# Enable/disable ML categorization (default: true)
|
|
857
|
+
enabled: true
|
|
858
|
+
|
|
859
|
+
# Minimum confidence for ML predictions (0.0-1.0, default: 0.6)
|
|
860
|
+
min_confidence: 0.6
|
|
861
|
+
|
|
862
|
+
# Semantic vs file pattern weighting (default: 0.7 vs 0.3)
|
|
863
|
+
semantic_weight: 0.7
|
|
864
|
+
file_pattern_weight: 0.3
|
|
865
|
+
|
|
866
|
+
# Confidence threshold for ML vs rule-based (default: 0.5)
|
|
867
|
+
hybrid_threshold: 0.5
|
|
868
|
+
|
|
869
|
+
# Caching for performance
|
|
870
|
+
enable_caching: true
|
|
871
|
+
cache_duration_days: 30
|
|
872
|
+
|
|
873
|
+
# Processing settings
|
|
874
|
+
batch_size: 100
|
|
875
|
+
```
|
|
876
|
+
|
|
877
|
+
### Installation Requirements
|
|
878
|
+
|
|
879
|
+
For ML categorization, install the spaCy English model:
|
|
880
|
+
|
|
881
|
+
```bash
|
|
882
|
+
python -m spacy download en_core_web_sm
|
|
883
|
+
```
|
|
884
|
+
|
|
885
|
+
**Alternative models** (if the default is unavailable):
|
|
886
|
+
```bash
|
|
887
|
+
# Medium model (more accurate, larger)
|
|
888
|
+
python -m spacy download en_core_web_md
|
|
889
|
+
|
|
890
|
+
# Large model (most accurate, largest)
|
|
891
|
+
python -m spacy download en_core_web_lg
|
|
892
|
+
```
|
|
893
|
+
|
|
894
|
+
### Performance Expectations
|
|
895
|
+
|
|
896
|
+
- **Accuracy**: 85-95% accuracy on typical commit messages
|
|
897
|
+
- **Speed**: ~50-100 commits/second with caching enabled
|
|
898
|
+
- **Fallback**: Graceful degradation to rule-based when ML unavailable
|
|
899
|
+
- **Memory**: ~200MB additional memory usage for spaCy models
|
|
900
|
+
|
|
901
|
+
### Enhanced Reports
|
|
902
|
+
|
|
903
|
+
With ML categorization enabled, reports include:
|
|
904
|
+
|
|
905
|
+
- **Confidence scores** for each categorization
|
|
906
|
+
- **Method indicators** (ML, rules, or cached)
|
|
907
|
+
- **Alternative predictions** for uncertain cases
|
|
908
|
+
- **ML performance statistics** in analysis summaries
|
|
909
|
+
|
|
910
|
+
### Example Enhanced Output
|
|
911
|
+
|
|
912
|
+
```csv
|
|
913
|
+
commit_hash,category,ml_confidence,ml_method,message
|
|
914
|
+
a1b2c3d,feature,0.89,ml,"Add user authentication system"
|
|
915
|
+
f6e5d4c,bug_fix,0.92,ml,"Fix memory leak in cache cleanup"
|
|
916
|
+
9876543,maintenance,0.74,rules,"Update dependency versions"
|
|
917
|
+
```
|
|
918
|
+
|
|
919
|
+
## Troubleshooting
|
|
920
|
+
|
|
921
|
+
### YAML Configuration Errors
|
|
922
|
+
|
|
923
|
+
GitFlow Analytics provides helpful error messages when YAML configuration issues are encountered. Here are common errors and their solutions:
|
|
924
|
+
|
|
925
|
+
#### Tab Characters Not Allowed
|
|
926
|
+
```
|
|
927
|
+
❌ YAML configuration error at line 3, column 1:
|
|
928
|
+
🚫 Tab characters are not allowed in YAML files!
|
|
929
|
+
```
|
|
930
|
+
**Fix**: Replace all tabs with spaces (use 2 or 4 spaces for indentation)
|
|
931
|
+
- Most editors can show whitespace characters and convert tabs to spaces
|
|
932
|
+
- In VS Code: View → Render Whitespace, then Edit → Convert Indentation to Spaces
|
|
933
|
+
|
|
934
|
+
#### Missing Colons
|
|
935
|
+
```
|
|
936
|
+
❌ YAML configuration error at line 5, column 10:
|
|
937
|
+
🚫 Missing colon (:) after a key name!
|
|
938
|
+
```
|
|
939
|
+
**Fix**: Add a colon and space after each key name
|
|
940
|
+
```yaml
|
|
941
|
+
# Correct:
|
|
942
|
+
repositories:
|
|
943
|
+
- name: my-repo
|
|
944
|
+
|
|
945
|
+
# Incorrect:
|
|
946
|
+
repositories
|
|
947
|
+
- name my-repo
|
|
948
|
+
```
|
|
949
|
+
|
|
950
|
+
#### Unclosed Quotes
|
|
951
|
+
```
|
|
952
|
+
❌ YAML configuration error at line 8, column 15:
|
|
953
|
+
🚫 Unclosed quoted string!
|
|
954
|
+
```
|
|
955
|
+
**Fix**: Ensure all quotes are properly closed
|
|
956
|
+
```yaml
|
|
957
|
+
# Correct:
|
|
958
|
+
token: "my-token-value"
|
|
959
|
+
|
|
960
|
+
# Incorrect:
|
|
961
|
+
token: "my-token-value
|
|
962
|
+
```
|
|
963
|
+
|
|
964
|
+
#### Invalid Indentation
|
|
965
|
+
```
|
|
966
|
+
❌ YAML configuration error:
|
|
967
|
+
🚫 Indentation error or invalid structure!
|
|
968
|
+
```
|
|
969
|
+
**Fix**: Use consistent indentation (either 2 or 4 spaces)
|
|
970
|
+
```yaml
|
|
971
|
+
# Correct:
|
|
972
|
+
analysis:
|
|
973
|
+
exclude:
|
|
974
|
+
paths:
|
|
975
|
+
- "vendor/**"
|
|
976
|
+
|
|
977
|
+
# Incorrect:
|
|
978
|
+
analysis:
|
|
979
|
+
exclude:
|
|
980
|
+
paths: # 3 spaces - inconsistent!
|
|
981
|
+
- "vendor/**"
|
|
982
|
+
```
|
|
983
|
+
|
|
984
|
+
### Tips for Valid YAML
|
|
985
|
+
|
|
986
|
+
1. **Use a YAML validator**: Check your configuration with online YAML validators before using
|
|
987
|
+
2. **Enable whitespace display**: Make tabs and spaces visible in your editor
|
|
988
|
+
3. **Use quotes for special characters**: Wrap values containing `:`, `#`, `@`, etc. in quotes
|
|
989
|
+
4. **Consistent indentation**: Pick 2 or 4 spaces and stick to it throughout the file
|
|
990
|
+
5. **Check the sample config**: Reference `config-sample.yaml` for proper structure
|
|
991
|
+
|
|
992
|
+
### Configuration Validation
|
|
993
|
+
|
|
994
|
+
Beyond YAML syntax, GitFlow Analytics validates:
|
|
995
|
+
- Required fields (`repositories` must have `name` and `path`)
|
|
996
|
+
- Environment variable resolution
|
|
997
|
+
- File path existence
|
|
998
|
+
- Valid configuration structure
|
|
999
|
+
|
|
1000
|
+
If you encounter persistent issues, run with `--debug` for detailed error information:
|
|
1001
|
+
```bash
|
|
1002
|
+
# Simplified syntax (default)
|
|
1003
|
+
gitflow-analytics -c config.yaml --debug
|
|
1004
|
+
|
|
1005
|
+
# Explicit analyze command
|
|
1006
|
+
gitflow-analytics analyze -c config.yaml --debug
|
|
1007
|
+
```
|
|
1008
|
+
|
|
1009
|
+
## Contributing
|
|
1010
|
+
|
|
1011
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
1012
|
+
|
|
1013
|
+
## License
|
|
1014
|
+
|
|
1015
|
+
This project is licensed under the MIT License - see the LICENSE file for details.
|