gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. gitflow_analytics/__init__.py +11 -11
  2. gitflow_analytics/_version.py +2 -2
  3. gitflow_analytics/classification/__init__.py +31 -0
  4. gitflow_analytics/classification/batch_classifier.py +752 -0
  5. gitflow_analytics/classification/classifier.py +464 -0
  6. gitflow_analytics/classification/feature_extractor.py +725 -0
  7. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  8. gitflow_analytics/classification/model.py +455 -0
  9. gitflow_analytics/cli.py +4490 -378
  10. gitflow_analytics/cli_rich.py +503 -0
  11. gitflow_analytics/config/__init__.py +43 -0
  12. gitflow_analytics/config/errors.py +261 -0
  13. gitflow_analytics/config/loader.py +904 -0
  14. gitflow_analytics/config/profiles.py +264 -0
  15. gitflow_analytics/config/repository.py +124 -0
  16. gitflow_analytics/config/schema.py +441 -0
  17. gitflow_analytics/config/validator.py +154 -0
  18. gitflow_analytics/config.py +44 -398
  19. gitflow_analytics/core/analyzer.py +1320 -172
  20. gitflow_analytics/core/branch_mapper.py +132 -132
  21. gitflow_analytics/core/cache.py +1554 -175
  22. gitflow_analytics/core/data_fetcher.py +1193 -0
  23. gitflow_analytics/core/identity.py +571 -185
  24. gitflow_analytics/core/metrics_storage.py +526 -0
  25. gitflow_analytics/core/progress.py +372 -0
  26. gitflow_analytics/core/schema_version.py +269 -0
  27. gitflow_analytics/extractors/base.py +13 -11
  28. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  29. gitflow_analytics/extractors/story_points.py +77 -59
  30. gitflow_analytics/extractors/tickets.py +841 -89
  31. gitflow_analytics/identity_llm/__init__.py +6 -0
  32. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  33. gitflow_analytics/identity_llm/analyzer.py +464 -0
  34. gitflow_analytics/identity_llm/models.py +76 -0
  35. gitflow_analytics/integrations/github_integration.py +258 -87
  36. gitflow_analytics/integrations/jira_integration.py +572 -123
  37. gitflow_analytics/integrations/orchestrator.py +206 -82
  38. gitflow_analytics/metrics/activity_scoring.py +322 -0
  39. gitflow_analytics/metrics/branch_health.py +470 -0
  40. gitflow_analytics/metrics/dora.py +542 -179
  41. gitflow_analytics/models/database.py +986 -59
  42. gitflow_analytics/pm_framework/__init__.py +115 -0
  43. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  44. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  45. gitflow_analytics/pm_framework/base.py +406 -0
  46. gitflow_analytics/pm_framework/models.py +211 -0
  47. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  48. gitflow_analytics/pm_framework/registry.py +333 -0
  49. gitflow_analytics/qualitative/__init__.py +29 -0
  50. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  51. gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
  52. gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
  53. gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
  54. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
  55. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  56. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  57. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  58. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  59. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  60. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  61. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  62. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  63. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  64. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
  65. gitflow_analytics/qualitative/core/__init__.py +13 -0
  66. gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
  67. gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
  68. gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
  69. gitflow_analytics/qualitative/core/processor.py +673 -0
  70. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  71. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  72. gitflow_analytics/qualitative/models/__init__.py +25 -0
  73. gitflow_analytics/qualitative/models/schemas.py +306 -0
  74. gitflow_analytics/qualitative/utils/__init__.py +13 -0
  75. gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
  76. gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
  77. gitflow_analytics/qualitative/utils/metrics.py +361 -0
  78. gitflow_analytics/qualitative/utils/text_processing.py +285 -0
  79. gitflow_analytics/reports/__init__.py +100 -0
  80. gitflow_analytics/reports/analytics_writer.py +550 -18
  81. gitflow_analytics/reports/base.py +648 -0
  82. gitflow_analytics/reports/branch_health_writer.py +322 -0
  83. gitflow_analytics/reports/classification_writer.py +924 -0
  84. gitflow_analytics/reports/cli_integration.py +427 -0
  85. gitflow_analytics/reports/csv_writer.py +1700 -216
  86. gitflow_analytics/reports/data_models.py +504 -0
  87. gitflow_analytics/reports/database_report_generator.py +427 -0
  88. gitflow_analytics/reports/example_usage.py +344 -0
  89. gitflow_analytics/reports/factory.py +499 -0
  90. gitflow_analytics/reports/formatters.py +698 -0
  91. gitflow_analytics/reports/html_generator.py +1116 -0
  92. gitflow_analytics/reports/interfaces.py +489 -0
  93. gitflow_analytics/reports/json_exporter.py +2770 -0
  94. gitflow_analytics/reports/narrative_writer.py +2289 -158
  95. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  96. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  97. gitflow_analytics/training/__init__.py +5 -0
  98. gitflow_analytics/training/model_loader.py +377 -0
  99. gitflow_analytics/training/pipeline.py +550 -0
  100. gitflow_analytics/tui/__init__.py +5 -0
  101. gitflow_analytics/tui/app.py +724 -0
  102. gitflow_analytics/tui/screens/__init__.py +8 -0
  103. gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
  104. gitflow_analytics/tui/screens/configuration_screen.py +523 -0
  105. gitflow_analytics/tui/screens/loading_screen.py +348 -0
  106. gitflow_analytics/tui/screens/main_screen.py +321 -0
  107. gitflow_analytics/tui/screens/results_screen.py +722 -0
  108. gitflow_analytics/tui/widgets/__init__.py +7 -0
  109. gitflow_analytics/tui/widgets/data_table.py +255 -0
  110. gitflow_analytics/tui/widgets/export_modal.py +301 -0
  111. gitflow_analytics/tui/widgets/progress_widget.py +187 -0
  112. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  113. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  114. gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
  115. gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
  116. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  117. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  118. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  119. {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1015 @@
1
+ Metadata-Version: 2.4
2
+ Name: gitflow-analytics
3
+ Version: 1.3.6
4
+ Summary: Analyze Git repositories for developer productivity insights
5
+ Author-email: Bob Matyas <bobmatnyc@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/bobmatnyc/gitflow-analytics
8
+ Project-URL: Documentation, https://github.com/bobmatnyc/gitflow-analytics/blob/main/README.md
9
+ Project-URL: Repository, https://github.com/bobmatnyc/gitflow-analytics
10
+ Project-URL: Issues, https://github.com/bobmatnyc/gitflow-analytics/issues
11
+ Keywords: git,analytics,productivity,metrics,development
12
+ Classifier: Development Status :: 5 - Production/Stable
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Topic :: Software Development :: Version Control :: Git
20
+ Classifier: Topic :: Software Development :: Quality Assurance
21
+ Requires-Python: >=3.9
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: click>=8.1
25
+ Requires-Dist: gitpython>=3.1
26
+ Requires-Dist: pygithub>=2.0
27
+ Requires-Dist: tqdm>=4.65
28
+ Requires-Dist: sqlalchemy>=2.0
29
+ Requires-Dist: pandas>=2.0
30
+ Requires-Dist: pyyaml>=6.0
31
+ Requires-Dist: python-dateutil>=2.8
32
+ Requires-Dist: python-dotenv>=1.0
33
+ Requires-Dist: rich>=13.0.0
34
+ Requires-Dist: spacy>=3.7.0
35
+ Requires-Dist: scikit-learn>=1.3.0
36
+ Requires-Dist: openai>=1.30.0
37
+ Requires-Dist: tiktoken>=0.7.0
38
+ Requires-Dist: numpy>=1.24.0
39
+ Provides-Extra: dev
40
+ Requires-Dist: pytest>=7.0; extra == "dev"
41
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
42
+ Requires-Dist: pytest-mock>=3.0; extra == "dev"
43
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
44
+ Requires-Dist: mypy>=1.0; extra == "dev"
45
+ Requires-Dist: black>=23.0; extra == "dev"
46
+ Requires-Dist: isort>=5.0; extra == "dev"
47
+ Requires-Dist: bandit[toml]>=1.7; extra == "dev"
48
+ Requires-Dist: safety>=2.0; extra == "dev"
49
+ Requires-Dist: python-semantic-release>=8.0.0; extra == "dev"
50
+ Requires-Dist: types-PyYAML>=6.0; extra == "dev"
51
+ Requires-Dist: types-requests>=2.28; extra == "dev"
52
+ Provides-Extra: github
53
+ Requires-Dist: pygithub>=1.58; extra == "github"
54
+ Provides-Extra: tui
55
+ Requires-Dist: textual>=0.41.0; extra == "tui"
56
+ Provides-Extra: all
57
+ Requires-Dist: gitflow-analytics[github,tui]; extra == "all"
58
+ Dynamic: license-file
59
+
60
+ # GitFlow Analytics
61
+
62
+ [![PyPI version](https://badge.fury.io/py/gitflow-analytics.svg)](https://badge.fury.io/py/gitflow-analytics)
63
+ [![Python Support](https://img.shields.io/pypi/pyversions/gitflow-analytics.svg)](https://pypi.org/project/gitflow-analytics/)
64
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
65
+ [![Documentation](https://img.shields.io/badge/docs-latest-brightgreen.svg)](https://github.com/bobmatnyc/gitflow-analytics/tree/main/docs)
66
+ [![Tests](https://github.com/bobmatnyc/gitflow-analytics/workflows/Tests/badge.svg)](https://github.com/bobmatnyc/gitflow-analytics/actions)
67
+
68
+ A comprehensive Python package for analyzing Git repositories to generate developer productivity insights without requiring external project management tools. Extract actionable metrics directly from Git history with ML-enhanced commit categorization, automated developer identity resolution, and professional reporting.
69
+
70
+ ## 🚀 Key Features
71
+
72
+ - **🔍 Zero Dependencies**: Analyze productivity without requiring JIRA, Linear, or other PM tools
73
+ - **🧠 ML-Powered Intelligence**: Advanced commit categorization with 85-95% accuracy
74
+ - **👥 Smart Identity Resolution**: Automatically consolidate developer identities across email addresses
75
+ - **🏢 Enterprise Ready**: Organization-wide repository discovery with intelligent caching
76
+ - **📊 Professional Reports**: Rich markdown narratives and CSV exports for executive dashboards
77
+
78
+ ## 🎯 Quick Start
79
+
80
+ Get up and running in 5 minutes:
81
+
82
+ ```bash
83
+ # 1. Install GitFlow Analytics
84
+ pip install gitflow-analytics
85
+
86
+ # 2. Install ML dependencies (optional but recommended)
87
+ python -m spacy download en_core_web_sm
88
+
89
+ # 3. Create a simple configuration
90
+ echo 'version: "1.0"
91
+ github:
92
+ token: "${GITHUB_TOKEN}"
93
+ organization: "your-org"' > config.yaml
94
+
95
+ # 4. Set your GitHub token
96
+ echo 'GITHUB_TOKEN=ghp_your_token_here' > .env
97
+
98
+ # 5. Run analysis
99
+ gitflow-analytics -c config.yaml --weeks 8
100
+ ```
101
+
102
+ **What you get:**
103
+ - 📈 Weekly metrics CSV with developer productivity trends
104
+ - 👥 Developer profiles with project distribution and work styles
105
+ - 🔍 Untracked work analysis with ML-powered categorization
106
+ - 📋 Executive summary with actionable insights
107
+ - 📊 Rich markdown report ready for stakeholders
108
+
109
+ ### Sample Output Preview
110
+
111
+ ```markdown
112
+ ## Executive Summary
113
+ - **Total Commits**: 156 across 3 projects
114
+ - **Active Developers**: 5 team members
115
+ - **Ticket Coverage**: 73.2% (industry benchmark: 60-80%)
116
+ - **Top Contributor**: Sarah Chen (32 commits, FRONTEND focus)
117
+
118
+ ## Key Insights
119
+ 🎯 **High Productivity**: Team averaged 31 commits/week
120
+ 📊 **Balanced Workload**: No single developer >40% of total work
121
+ ✅ **Good Process**: 73% ticket coverage shows strong tracking
122
+ ```
123
+
124
+ ## ✨ Latest Features (v1.2.x)
125
+
126
+ - **🚀 Two-Step Processing**: Optimized fetch-then-classify workflow for better performance
127
+ - **💰 Cost Tracking**: Monitor LLM API usage with detailed token and cost reporting
128
+ - **⚡ Smart Caching**: Intelligent caching reduces analysis time by up to 90%
129
+ - **🔄 Automatic Updates**: Repositories automatically fetch latest commits before analysis
130
+ - **📊 Weekly Trends**: Track classification pattern changes over time
131
+ - **🎯 Enhanced Categorization**: All commits properly categorized with confidence scores
132
+
133
+ ## 🔥 Core Capabilities
134
+
135
+ **📊 Analysis & Insights**
136
+ - Multi-repository analysis with intelligent project grouping
137
+ - ML-enhanced commit categorization (85-95% accuracy)
138
+ - Developer productivity metrics and work pattern analysis
139
+ - Story point extraction from commits and PRs
140
+ - Ticket tracking across JIRA, GitHub, ClickUp, and Linear
141
+
142
+ **🏢 Enterprise Features**
143
+ - Organization-wide repository discovery from GitHub
144
+ - Automated developer identity resolution and consolidation
145
+ - Database-backed caching for sub-second report generation
146
+ - Data anonymization for secure external sharing
147
+ - Batch processing optimized for large repositories
148
+
149
+ **📈 Professional Reporting**
150
+ - Rich markdown narratives with executive summaries
151
+ - Weekly CSV exports with trend analysis
152
+ - Customizable output formats and filtering
153
+ - Performance benchmarking and team comparisons
154
+
155
+ ## 📚 Documentation
156
+
157
+ Comprehensive guides for every use case:
158
+
159
+ | **Getting Started** | **Advanced Usage** | **Integration** |
160
+ |-------------------|------------------|---------------|
161
+ | [Installation](docs/getting-started/installation.md) | [Complete Configuration](docs/guides/configuration.md) | [CLI Reference](docs/reference/cli-commands.md) |
162
+ | [5-Minute Tutorial](docs/getting-started/quickstart.md) | [ML Categorization](docs/guides/ml-categorization.md) | [JSON Export Schema](docs/reference/json-export-schema.md) |
163
+ | [First Analysis](docs/getting-started/first-analysis.md) | [Enterprise Setup](docs/examples/enterprise-setup.md) | [CI Integration](docs/examples/ci-integration.md) |
164
+
165
+ **🎯 Quick Links:**
166
+ - 📖 [**Documentation Hub**](docs/README.md) - Complete guide index
167
+ - 🚀 [**Quick Start**](docs/getting-started/quickstart.md) - Get running in 5 minutes
168
+ - ⚙️ [**Configuration**](docs/guides/configuration.md) - Full reference
169
+ - 🤝 [**Contributing**](docs/developer/contributing.md) - Join the project
170
+
171
+ ## ⚡ Installation Options
172
+
173
+ ### Standard Installation
174
+ ```bash
175
+ pip install gitflow-analytics
176
+ ```
177
+
178
+ ### With ML Enhancement (Recommended)
179
+ ```bash
180
+ pip install gitflow-analytics
181
+ python -m spacy download en_core_web_sm
182
+ ```
183
+
184
+ ### Development Installation
185
+ ```bash
186
+ git clone https://github.com/bobmatnyc/gitflow-analytics.git
187
+ cd gitflow-analytics
188
+ pip install -e ".[dev]"
189
+ python -m spacy download en_core_web_sm
190
+ ```
191
+
192
+ ## 🔧 Configuration
193
+
194
+ ### Option 1: Organization Analysis (Recommended)
195
+ ```yaml
196
+ # config.yaml
197
+ version: "1.0"
198
+ github:
199
+ token: "${GITHUB_TOKEN}"
200
+ organization: "your-org" # Auto-discovers all repositories
201
+
202
+ analysis:
203
+ ml_categorization:
204
+ enabled: true
205
+ min_confidence: 0.7
206
+ ```
207
+
208
+ ### Option 2: Specific Repositories
209
+ ```yaml
210
+ # config.yaml
211
+ version: "1.0"
212
+ github:
213
+ token: "${GITHUB_TOKEN}"
214
+
215
+ repositories:
216
+ - name: "my-app"
217
+ path: "~/code/my-app"
218
+ github_repo: "myorg/my-app"
219
+ project_key: "APP"
220
+ ```
221
+
222
+ ### Environment Setup
223
+ ```bash
224
+ # .env (same directory as config.yaml)
225
+ GITHUB_TOKEN=ghp_your_token_here
226
+ ```
227
+
228
+ ### Run Analysis
229
+ ```bash
230
+ # Analyze last 8 weeks
231
+ gitflow-analytics -c config.yaml --weeks 8
232
+
233
+ # With custom output directory
234
+ gitflow-analytics -c config.yaml --weeks 8 --output ./reports
235
+ ```
236
+
237
+ > 💡 **Need more configuration options?** See the [Complete Configuration Guide](docs/guides/configuration.md) for advanced features, integrations, and customization.
238
+
239
+ ## 📊 Generated Reports
240
+
241
+ GitFlow Analytics generates comprehensive reports for different audiences:
242
+
243
+ ### 📈 CSV Data Files
244
+ - **weekly_metrics.csv** - Developer productivity trends by week
245
+ - **weekly_velocity.csv** - Lines-per-story-point velocity analysis
246
+ - **developers.csv** - Complete team profiles and statistics
247
+ - **summary.csv** - Project-wide statistics and benchmarks
248
+ - **untracked_commits.csv** - ML-categorized uncommitted work analysis
249
+
250
+ ### 📋 Executive Reports
251
+ - **narrative_summary.md** - Rich markdown report with:
252
+ - Executive summary with key metrics
253
+ - Team composition and work distribution
254
+ - Project activity breakdown
255
+ - Development patterns and recommendations
256
+ - Weekly trend analysis
257
+
258
+ ### Sample Executive Summary
259
+ ```markdown
260
+ ## Executive Summary
261
+ - **Total Commits**: 324 commits across 4 projects
262
+ - **Active Developers**: 8 team members
263
+ - **Ticket Coverage**: 78.4% (above industry benchmark)
264
+ - **Top Areas**: Frontend (45%), API (32%), Infrastructure (23%)
265
+
266
+ ## Key Insights
267
+ ✅ **Strong Process Adherence**: 78% ticket coverage
268
+ 🎯 **Balanced Team**: No developer >35% of total work
269
+ 📈 **Growth Trend**: +15% productivity vs last quarter
270
+ ```
271
+
272
+ ## 🛠️ Common Use Cases
273
+
274
+ **👥 Team Lead Dashboard**
275
+ - Track individual developer productivity and growth
276
+ - Identify workload distribution and potential burnout
277
+ - Monitor code quality trends and technical debt
278
+
279
+ **📈 Engineering Management**
280
+ - Generate executive reports on team velocity
281
+ - Analyze process adherence and ticket coverage
282
+ - Benchmark performance across projects and quarters
283
+
284
+ **🔍 Process Optimization**
285
+ - Identify untracked work patterns that should be formalized
286
+ - Optimize developer focus and reduce context switching
287
+ - Improve estimation accuracy with historical data
288
+
289
+ **🏢 Enterprise Analytics**
290
+ - Organization-wide repository analysis across dozens of projects
291
+ - Automated identity resolution for large, distributed teams
292
+ - Cost-effective analysis without expensive PM tool dependencies
293
+
294
+ ## Command Line Interface
295
+
296
+ ### Main Commands
297
+
298
+ ```bash
299
+ # Analyze repositories (default command)
300
+ gitflow-analytics -c config.yaml --weeks 12 --output ./reports
301
+
302
+ # Explicit analyze command (backward compatibility)
303
+ gitflow-analytics analyze -c config.yaml --weeks 12 --output ./reports
304
+
305
+ # Show cache statistics
306
+ gitflow-analytics cache-stats -c config.yaml
307
+
308
+ # List known developers
309
+ gitflow-analytics list-developers -c config.yaml
310
+
311
+ # Analyze developer identities
312
+ gitflow-analytics identities -c config.yaml
313
+
314
+ # Merge developer identities
315
+ gitflow-analytics merge-identity -c config.yaml dev1_id dev2_id
316
+
317
+ # Discover story point fields in your PM platform
318
+ gitflow-analytics discover-storypoint-fields -c config.yaml
319
+ ```
320
+
321
+ ### Options
322
+
323
+ - `--weeks, -w`: Number of weeks to analyze (default: 12)
324
+ - `--output, -o`: Output directory for reports (default: ./reports)
325
+ - `--anonymize`: Anonymize developer information
326
+ - `--no-cache`: Disable caching for fresh analysis
327
+ - `--clear-cache`: Clear cache before analysis
328
+ - `--validate-only`: Validate configuration without running
329
+ - `--skip-identity-analysis`: Skip automatic identity analysis
330
+ - `--apply-identity-suggestions`: Apply identity suggestions without prompting
331
+
332
+ ## Complete Configuration Example
333
+
334
+ Here's a complete example showing `.env` file and corresponding YAML configuration:
335
+
336
+ ### `.env` file
337
+ ```bash
338
+ # GitHub Configuration
339
+ GITHUB_TOKEN=ghp_xxxxxxxxxxxxxxxxxxxx
340
+ GITHUB_ORG=your-organization
341
+
342
+ # JIRA Configuration
343
+ JIRA_ACCESS_USER=developer@company.com
344
+ JIRA_ACCESS_TOKEN=ATATT3xxxxxxxxxxx
345
+
346
+ # Optional: Other integrations
347
+ # CLICKUP_TOKEN=pk_xxxxxxxxxxxx
348
+ # LINEAR_TOKEN=lin_api_xxxxxxxxxxxx
349
+ ```
350
+
351
+ ### `config.yaml` file
352
+ ```yaml
353
+ version: "1.0"
354
+
355
+ # GitHub configuration with organization discovery
356
+ github:
357
+ token: "${GITHUB_TOKEN}"
358
+ organization: "${GITHUB_ORG}"
359
+
360
+ # JIRA integration for story points
361
+ jira:
362
+ access_user: "${JIRA_ACCESS_USER}"
363
+ access_token: "${JIRA_ACCESS_TOKEN}"
364
+ base_url: "https://company.atlassian.net"
365
+
366
+ jira_integration:
367
+ enabled: true
368
+ fetch_story_points: true
369
+ story_point_fields:
370
+ - "Story point estimate" # Your field name
371
+ - "customfield_10016" # Fallback field ID
372
+
373
+ # Analysis configuration
374
+ analysis:
375
+ # Only track JIRA tickets (ignore GitHub issues, etc.)
376
+ ticket_platforms:
377
+ - jira
378
+
379
+ # Exclude bot commits and boilerplate files
380
+ exclude:
381
+ authors:
382
+ - "dependabot[bot]"
383
+ - "renovate[bot]"
384
+ paths:
385
+ - "**/node_modules/**"
386
+ - "**/*.min.js"
387
+ - "**/package-lock.json"
388
+
389
+ # Developer identity consolidation
390
+ identity:
391
+ similarity_threshold: 0.85
392
+ manual_mappings:
393
+ - name: "John Doe"
394
+ primary_email: "john.doe@company.com"
395
+ aliases:
396
+ - "jdoe@oldcompany.com"
397
+ - "john@personal.com"
398
+
399
+ # Output configuration
400
+ output:
401
+ directory: "./reports"
402
+ formats:
403
+ - csv
404
+ - markdown
405
+ ```
406
+
407
+ ## Output Reports
408
+
409
+ The tool generates comprehensive CSV reports and markdown summaries:
410
+
411
+ ### CSV Reports
412
+
413
+ 1. **Weekly Metrics** (`weekly_metrics_YYYYMMDD.csv`)
414
+ - Week-by-week developer productivity
415
+ - Story points, commits, lines changed
416
+ - Ticket coverage percentages
417
+ - Per-project breakdown
418
+
419
+ 2. **Weekly Velocity** (`weekly_velocity_YYYYMMDD.csv`)
420
+ - Lines of code per story point analysis
421
+ - Efficiency trends and velocity patterns
422
+ - PR-based vs commit-based story points breakdown
423
+ - Team velocity benchmarking and week-over-week trends
424
+
425
+ 3. **Summary Statistics** (`summary_YYYYMMDD.csv`)
426
+ - Overall project statistics
427
+ - Platform-specific ticket counts
428
+ - Top contributors
429
+
430
+ 4. **Developer Report** (`developers_YYYYMMDD.csv`)
431
+ - Complete developer profiles
432
+ - Total contributions
433
+ - Identity aliases
434
+
435
+ 5. **Untracked Commits Report** (`untracked_commits_YYYYMMDD.csv`)
436
+ - Detailed analysis of commits without ticket references
437
+ - Commit categorization (bug_fix, feature, refactor, documentation, maintenance, test, style, build)
438
+ - Enhanced metadata: commit hash, author, timestamp, project, message, file/line changes
439
+ - Configurable file change threshold for filtering significant commits
440
+
441
+ ### Enhanced Untracked Commit Analysis
442
+
443
+ The untracked commits report provides deep insights into work that bypasses ticket tracking:
444
+
445
+ **CSV Columns:**
446
+ - `commit_hash` / `short_hash`: Full and abbreviated commit identifiers
447
+ - `author` / `author_email` / `canonical_id`: Developer identification (with anonymization support)
448
+ - `date`: Commit timestamp
449
+ - `project`: Project key for multi-repository analysis
450
+ - `message`: Commit message (truncated for readability)
451
+ - `category`: Automated categorization of work type
452
+ - `files_changed` / `lines_added` / `lines_removed` / `lines_changed`: Change metrics
453
+ - `is_merge`: Boolean flag for merge commits
454
+
455
+ **Automatic Categorization:**
456
+ - **Feature**: New functionality development (`add`, `new`, `implement`, `create`)
457
+ - **Bug Fix**: Error corrections (`fix`, `bug`, `error`, `resolve`, `hotfix`)
458
+ - **Refactor**: Code restructuring (`refactor`, `optimize`, `improve`, `cleanup`)
459
+ - **Documentation**: Documentation updates (`doc`, `readme`, `comment`, `guide`)
460
+ - **Maintenance**: Routine upkeep (`update`, `upgrade`, `dependency`, `config`)
461
+ - **Test**: Testing-related changes (`test`, `spec`, `mock`, `fixture`)
462
+ - **Style**: Formatting changes (`format`, `lint`, `prettier`, `whitespace`)
463
+ - **Build**: Build system changes (`build`, `compile`, `ci`, `docker`)
464
+
465
+ ### Markdown Reports
466
+
467
+ 5. **Narrative Summary** (`narrative_summary_YYYYMMDD.md`)
468
+ - **Executive Summary**: High-level metrics and team overview
469
+ - **Team Composition**: Developer profiles with project percentages and work patterns
470
+ - **Project Activity**: Detailed breakdown by project with contributor percentages and **commit classifications**
471
+ - **Development Patterns**: Key insights from productivity and collaboration analysis
472
+ - **Pull Request Analysis**: PR metrics including size, lifetime, and review activity
473
+ - **Weekly Trends** (v1.1.0+): Week-over-week changes in classification patterns
474
+
475
+ 6. **Database-Backed Qualitative Report** (`database_qualitative_report_YYYYMMDD.md`) (v1.1.0+)
476
+ - Generated directly from SQLite storage for fast retrieval
477
+ - Includes weekly trend analysis per developer/project
478
+ - Shows classification changes over time (e.g., "Features: +15%, Bug Fixes: -5%")
479
+ - **Issue Tracking**: Platform usage and coverage analysis with simplified display
480
+ - **Enhanced Untracked Work Analysis**: Comprehensive categorization with dual percentage metrics
481
+ - **PM Platform Integration**: Story point tracking and correlation insights (when available)
482
+ - **Recommendations**: Actionable insights based on analysis patterns
483
+
484
+ ### Enhanced Narrative Report Sections
485
+
486
+ The narrative report provides comprehensive insights through multiple detailed sections:
487
+
488
+ #### Team Composition Section
489
+ - **Developer Profiles**: Individual developer statistics with commit counts
490
+ - **Project Distribution**: Shows ALL projects each developer works on with precise percentages
491
+ - **Work Style Classification**: Categorizes developers as "Focused", "Multi-project", or "Highly Focused"
492
+ - **Activity Patterns**: Identifies time patterns like "Standard Hours" or "Extended Hours"
493
+
494
+ **Example developer profile:**
495
+ ```markdown
496
+ **John Developer**
497
+ - Commits: 15
498
+ - Projects: FRONTEND (85.0%), SERVICE_TS (15.0%)
499
+ - Work Style: Focused
500
+ - Active Pattern: Standard Hours
501
+ ```
502
+
503
+ #### Project Activity Section
504
+ - **Activity by Project**: Commits and percentage of total activity per project
505
+ - **Contributor Breakdown**: Shows each developer's contribution percentage within each project
506
+ - **Lines Changed**: Quantifies the scale of changes per project
507
+
508
+ #### Issue Tracking with Simplified Display
509
+ - **Platform Usage**: Clean display of ticket platform distribution (JIRA, GitHub, etc.)
510
+ - **Coverage Analysis**: Percentage of commits that reference tickets
511
+ - **Enhanced Untracked Work Analysis**: Detailed categorization and recommendations
512
+
513
+ ### Interpreting Dual Percentage Metrics
514
+
515
+ The enhanced untracked work analysis provides two key percentage metrics for better context:
516
+
517
+ 1. **Percentage of Total Untracked Work**: Shows how much each developer contributes to the overall untracked work pool
518
+ 2. **Percentage of Developer's Individual Work**: Shows what proportion of a specific developer's commits are untracked
519
+
520
+ **Example interpretation:**
521
+ ```
522
+ - John Doe: 25 commits (40% of untracked, 15% of their work) - maintenance, style
523
+ ```
524
+
525
+ This means:
526
+ - John contributed 25 untracked commits
527
+ - These represent 40% of all untracked commits in the analysis period
528
+ - Only 15% of John's total work was untracked (85% was properly tracked)
529
+ - Most untracked work was maintenance and style changes (acceptable categories)
530
+
531
+ **Process Insights:**
532
+ - High "% of untracked" + low "% of their work" = Developer doing most of the acceptable maintenance work
533
+ - Low "% of untracked" + high "% of their work" = Developer needs process guidance
534
+ - High percentages in feature/bug_fix categories = Process improvement opportunity
535
+
536
+ ### Example Report Outputs
537
+
538
+ #### Untracked Commits CSV Sample
539
+ ```csv
540
+ commit_hash,short_hash,author,author_email,canonical_id,date,project,message,category,files_changed,lines_added,lines_removed,lines_changed,is_merge
541
+ a1b2c3d4e5f6...,a1b2c3d,John Doe,john@company.com,ID0001,2024-01-15 14:30:22,FRONTEND,Update dependency versions for security patches,maintenance,2,45,12,57,false
542
+ f6e5d4c3b2a1...,f6e5d4c,Jane Smith,jane@company.com,ID0002,2024-01-15 09:15:10,BACKEND,Fix typo in error message,bug_fix,1,1,1,2,false
543
+ 9876543210ab...,9876543,Bob Wilson,bob@company.com,ID0003,2024-01-14 16:45:33,FRONTEND,Add JSDoc comments to utility functions,documentation,3,28,0,28,false
544
+ ```
545
+
546
+ #### Complete Narrative Report Sample
547
+ ```markdown
548
+ # GitFlow Analytics Report
549
+
550
+ **Generated**: 2025-08-04 14:27:47
551
+ **Analysis Period**: Last 4 weeks
552
+
553
+ ## Executive Summary
554
+
555
+ - **Total Commits**: 35
556
+ - **Active Developers**: 3
557
+ - **Lines Changed**: 910
558
+ - **Ticket Coverage**: 71.4%
559
+ - **Active Projects**: FRONTEND, SERVICE_TS, SERVICES
560
+ - **Top Contributor**: John Developer with 15 commits
561
+
562
+ ## Team Composition
563
+
564
+ ### Developer Profiles
565
+
566
+ **John Developer**
567
+ - Commits: 15
568
+ - Projects: FRONTEND (85.0%), SERVICE_TS (15.0%)
569
+ - Work Style: Focused
570
+ - Active Pattern: Standard Hours
571
+
572
+ **Jane Smith**
573
+ - Commits: 12
574
+ - Projects: SERVICE_TS (70.0%), FRONTEND (30.0%)
575
+ - Work Style: Multi-project
576
+ - Active Pattern: Extended Hours
577
+
578
+ ## Project Activity
579
+
580
+ ### Activity by Project
581
+
582
+ **FRONTEND**
583
+ - Commits: 14 (50.0% of total)
584
+ - Lines Changed: 450
585
+ - Contributors: John Developer (71.4%), Jane Smith (28.6%)
586
+
587
+ **SERVICE_TS**
588
+ - Commits: 8 (28.6% of total)
589
+ - Lines Changed: 280
590
+ - Contributors: Jane Smith (100.0%)
591
+
592
+ ## Issue Tracking
593
+
594
+ ### Platform Usage
595
+
596
+ - **Jira**: 15 tickets (60.0%)
597
+ - **Github**: 8 tickets (32.0%)
598
+ - **Clickup**: 2 tickets (8.0%)
599
+
600
+ ### Untracked Work Analysis
601
+
602
+ **Summary**: 10 commits (28.6% of total) lack ticket references.
603
+
604
+ #### Work Categories
605
+
606
+ - **Maintenance**: 4 commits (40.0%), avg 23 lines *(acceptable untracked)*
607
+ - **Bug Fix**: 3 commits (30.0%), avg 15 lines *(should be tracked)*
608
+ - **Documentation**: 2 commits (20.0%), avg 12 lines *(acceptable untracked)*
609
+
610
+ #### Top Contributors (Untracked Work)
611
+
612
+ - **John Developer**: 1 commits (50.0% of untracked, 6.7% of their work) - *refactor*
613
+ - **Jane Smith**: 1 commits (50.0% of untracked, 8.3% of their work) - *style*
614
+
615
+ #### Recommendations for Untracked Work
616
+
617
+ 🎯 **Excellent tracking**: Less than 20% of commits are untracked - the team shows strong process adherence.
618
+
619
+ ## Recommendations
620
+
621
+ ✅ The team shows healthy development patterns. Continue current practices while monitoring for changes.
622
+ ```
623
+
624
+ ### Configuration for Enhanced Narrative Reports
625
+
626
+ The narrative reports automatically include all available sections based on your configuration and data availability:
627
+
628
+ **Always Generated:**
629
+ - Executive Summary, Team Composition, Project Activity, Development Patterns, Issue Tracking, Recommendations
630
+
631
+ **Conditionally Generated:**
632
+ - **Pull Request Analysis**: Requires GitHub integration with PR data
633
+ - **PM Platform Integration**: Requires JIRA or other PM platform configuration
634
+ - **Qualitative Analysis**: Requires ChatGPT integration setup
635
+
636
+ **Customizing Report Content:**
637
+ ```yaml
638
+ # config.yaml
639
+ output:
640
+ formats:
641
+ - csv
642
+ - markdown # Enables narrative report generation
643
+
644
+ # Optional: Enhance narrative reports with additional data
645
+ jira:
646
+ access_user: "${JIRA_ACCESS_USER}"
647
+ access_token: "${JIRA_ACCESS_TOKEN}"
648
+ base_url: "https://company.atlassian.net"
649
+
650
+ # Optional: Add qualitative insights
651
+ analysis:
652
+ chatgpt:
653
+ enabled: true
654
+ api_key: "${OPENAI_API_KEY}"
655
+ ```
656
+
657
+ ## Story Point Patterns
658
+
659
+ Configure custom regex patterns to match your team's story point format:
660
+
661
+ ```yaml
662
+ story_point_patterns:
663
+ - "SP: (\\d+)" # SP: 5
664
+ - "\\[([0-9]+) pts\\]" # [3 pts]
665
+ - "estimate: (\\d+)" # estimate: 8
666
+ ```
667
+
668
+ ## Ticket Platform Support
669
+
670
+ Automatically detects and tracks tickets from:
671
+ - **JIRA**: `PROJ-123`
672
+ - **GitHub**: `#123`, `GH-123`
673
+ - **ClickUp**: `CU-abc123`
674
+ - **Linear**: `ENG-123`
675
+
676
+ ### JIRA Integration
677
+
678
+ GitFlow Analytics can fetch story points directly from JIRA tickets. Configure your JIRA instance:
679
+
680
+ ```yaml
681
+ jira:
682
+ access_user: "${JIRA_ACCESS_USER}"
683
+ access_token: "${JIRA_ACCESS_TOKEN}"
684
+ base_url: "https://your-company.atlassian.net"
685
+
686
+ jira_integration:
687
+ enabled: true
688
+ story_point_fields:
689
+ - "Story point estimate" # Your custom field name
690
+ - "customfield_10016" # Or use field ID
691
+ ```
692
+
693
+ To discover your JIRA story point fields:
694
+ ```bash
695
+ gitflow-analytics discover-storypoint-fields -c config.yaml
696
+ ```
697
+
698
+ ## Caching
699
+
700
+ The tool uses SQLite for intelligent caching:
701
+ - Commit analysis results
702
+ - Developer identity mappings
703
+ - Pull request data
704
+
705
+ Cache is automatically managed with configurable TTL.
706
+
707
+ ## Developer Identity Resolution
708
+
709
+ GitFlow Analytics intelligently consolidates developer identities across different email addresses and name variations:
710
+
711
+ ### Automatic Identity Analysis (New!)
712
+
713
+ Identity analysis now runs **automatically by default** when no manual mappings exist. The system will:
714
+
715
+ 1. **Analyze all developer identities** in your commits
716
+ 2. **Show suggested consolidations** with a clear preview
717
+ 3. **Prompt for approval** with a simple Y/n
718
+ 4. **Update your configuration** automatically
719
+ 5. **Continue analysis** with consolidated identities
720
+
721
+ Example of the interactive prompt:
722
+ ```
723
+ 🔍 Analyzing developer identities...
724
+
725
+ ⚠️ Found 3 potential identity clusters:
726
+
727
+ 📋 Suggested identity mappings:
728
+ john.doe@company.com
729
+ → 123456+johndoe@users.noreply.github.com
730
+ → jdoe@personal.email.com
731
+
732
+ 🤖 Found 2 bot accounts to exclude:
733
+ - dependabot[bot]
734
+ - renovate[bot]
735
+
736
+ ────────────────────────────────────────────────────────────
737
+ Apply these identity mappings to your configuration? [Y/n]:
738
+ ```
739
+
740
+ This prompt appears at most once every 7 days.
741
+
742
+ To skip automatic identity analysis:
743
+ ```bash
744
+ # Simplified syntax (default)
745
+ gitflow-analytics -c config.yaml --skip-identity-analysis
746
+
747
+ # Explicit analyze command
748
+ gitflow-analytics analyze -c config.yaml --skip-identity-analysis
749
+ ```
750
+
751
+ To manually run identity analysis:
752
+ ```bash
753
+ gitflow-analytics identities -c config.yaml
754
+ ```
755
+
756
+ ### Smart Identity Matching
757
+
758
+ The system automatically detects:
759
+ - **GitHub noreply emails** (e.g., `150280367+username@users.noreply.github.com`)
760
+ - **Name variations** (e.g., "John Doe" vs "John D" vs "jdoe")
761
+ - **Common email patterns** across domains
762
+ - **Bot accounts** for automatic exclusion
763
+
764
+ ### Manual Configuration
765
+
766
+ You can also manually configure identity mappings in your YAML:
767
+
768
+ ```yaml
769
+ analysis:
770
+ identity:
771
+ manual_mappings:
772
+ - name: "John Doe" # Optional: preferred display name for reports
773
+ primary_email: john.doe@company.com
774
+ aliases:
775
+ - jdoe@personal.email.com
776
+ - 123456+johndoe@users.noreply.github.com
777
+ - name: "Sarah Smith"
778
+ primary_email: sarah.smith@company.com
779
+ aliases:
780
+ - s.smith@oldcompany.com
781
+ ```
782
+
783
+ ### Display Name Control
784
+
785
+ The optional `name` field in manual mappings allows you to control how developer names appear in reports. This is particularly useful for:
786
+
787
+ - **Standardizing display names** across different email formats
788
+ - **Resolving duplicates** when the same person appears with slight name variations
789
+ - **Using preferred names** instead of technical email formats
790
+
791
+ **Example use cases:**
792
+ ```yaml
793
+ analysis:
794
+ identity:
795
+ manual_mappings:
796
+ # Consolidate Austin Zach identities
797
+ - name: "Austin Zach"
798
+ primary_email: "john.smith@company.com"
799
+ aliases:
800
+ - "150280367+jsmith@users.noreply.github.com"
801
+ - "jsmith-company@users.noreply.github.com"
802
+
803
+ # Standardize name variations
804
+ - name: "John Doe" # Consistent display across all reports
805
+ primary_email: "john.doe@company.com"
806
+ aliases:
807
+ - "johndoe@company.com"
808
+ - "j.doe@company.com"
809
+ ```
810
+
811
+ Without the `name` field, the system uses the canonical email's associated name, which might not be ideal for reporting.
812
+
813
+ ### Disabling Automatic Analysis
814
+
815
+ To disable the automatic identity prompt:
816
+ ```yaml
817
+ analysis:
818
+ identity:
819
+ auto_analysis: false
820
+ ```
821
+
822
+ ## ML-Enhanced Commit Categorization
823
+
824
+ GitFlow Analytics includes sophisticated machine learning capabilities for categorizing commits with high accuracy and confidence scoring.
825
+
826
+ ### How It Works
827
+
828
+ The ML categorization system uses a **hybrid approach** combining:
829
+
830
+ 1. **Semantic Analysis**: Uses spaCy NLP models to understand commit message meaning
831
+ 2. **File Pattern Recognition**: Analyzes changed files for additional context signals
832
+ 3. **Rule-based Fallback**: Falls back to traditional regex patterns when ML confidence is low
833
+ 4. **Confidence Scoring**: Provides confidence metrics for all categorizations
834
+
835
+ ### Categories Detected
836
+
837
+ The system automatically categorizes commits into:
838
+
839
+ - **Feature**: New functionality development (`add`, `implement`, `create`)
840
+ - **Bug Fix**: Error corrections (`fix`, `resolve`, `correct`)
841
+ - **Refactor**: Code restructuring (`refactor`, `optimize`, `improve`)
842
+ - **Documentation**: Documentation updates (`docs`, `readme`, `comment`)
843
+ - **Maintenance**: Routine upkeep (`update`, `upgrade`, `dependency`)
844
+ - **Test**: Testing-related changes (`test`, `spec`, `coverage`)
845
+ - **Style**: Formatting changes (`format`, `lint`, `prettier`)
846
+ - **Build**: Build system changes (`build`, `ci`, `docker`)
847
+ - **Security**: Security-related fixes (`security`, `vulnerability`)
848
+ - **Hotfix**: Urgent production fixes (`hotfix`, `critical`, `emergency`)
849
+ - **Config**: Configuration changes (`config`, `settings`, `environment`)
850
+
851
+ ### Configuration
852
+
853
+ ```yaml
854
+ analysis:
855
+ ml_categorization:
856
+ # Enable/disable ML categorization (default: true)
857
+ enabled: true
858
+
859
+ # Minimum confidence for ML predictions (0.0-1.0, default: 0.6)
860
+ min_confidence: 0.6
861
+
862
+ # Semantic vs file pattern weighting (default: 0.7 vs 0.3)
863
+ semantic_weight: 0.7
864
+ file_pattern_weight: 0.3
865
+
866
+ # Confidence threshold for ML vs rule-based (default: 0.5)
867
+ hybrid_threshold: 0.5
868
+
869
+ # Caching for performance
870
+ enable_caching: true
871
+ cache_duration_days: 30
872
+
873
+ # Processing settings
874
+ batch_size: 100
875
+ ```
876
+
877
+ ### Installation Requirements
878
+
879
+ For ML categorization, install the spaCy English model:
880
+
881
+ ```bash
882
+ python -m spacy download en_core_web_sm
883
+ ```
884
+
885
+ **Alternative models** (if the default is unavailable):
886
+ ```bash
887
+ # Medium model (more accurate, larger)
888
+ python -m spacy download en_core_web_md
889
+
890
+ # Large model (most accurate, largest)
891
+ python -m spacy download en_core_web_lg
892
+ ```
893
+
894
+ ### Performance Expectations
895
+
896
+ - **Accuracy**: 85-95% accuracy on typical commit messages
897
+ - **Speed**: ~50-100 commits/second with caching enabled
898
+ - **Fallback**: Graceful degradation to rule-based when ML unavailable
899
+ - **Memory**: ~200MB additional memory usage for spaCy models
900
+
901
+ ### Enhanced Reports
902
+
903
+ With ML categorization enabled, reports include:
904
+
905
+ - **Confidence scores** for each categorization
906
+ - **Method indicators** (ML, rules, or cached)
907
+ - **Alternative predictions** for uncertain cases
908
+ - **ML performance statistics** in analysis summaries
909
+
910
+ ### Example Enhanced Output
911
+
912
+ ```csv
913
+ commit_hash,category,ml_confidence,ml_method,message
914
+ a1b2c3d,feature,0.89,ml,"Add user authentication system"
915
+ f6e5d4c,bug_fix,0.92,ml,"Fix memory leak in cache cleanup"
916
+ 9876543,maintenance,0.74,rules,"Update dependency versions"
917
+ ```
918
+
919
+ ## Troubleshooting
920
+
921
+ ### YAML Configuration Errors
922
+
923
+ GitFlow Analytics provides helpful error messages when YAML configuration issues are encountered. Here are common errors and their solutions:
924
+
925
+ #### Tab Characters Not Allowed
926
+ ```
927
+ ❌ YAML configuration error at line 3, column 1:
928
+ 🚫 Tab characters are not allowed in YAML files!
929
+ ```
930
+ **Fix**: Replace all tabs with spaces (use 2 or 4 spaces for indentation)
931
+ - Most editors can show whitespace characters and convert tabs to spaces
932
+ - In VS Code: View → Render Whitespace, then Edit → Convert Indentation to Spaces
933
+
934
+ #### Missing Colons
935
+ ```
936
+ ❌ YAML configuration error at line 5, column 10:
937
+ 🚫 Missing colon (:) after a key name!
938
+ ```
939
+ **Fix**: Add a colon and space after each key name
940
+ ```yaml
941
+ # Correct:
942
+ repositories:
943
+ - name: my-repo
944
+
945
+ # Incorrect:
946
+ repositories
947
+ - name my-repo
948
+ ```
949
+
950
+ #### Unclosed Quotes
951
+ ```
952
+ ❌ YAML configuration error at line 8, column 15:
953
+ 🚫 Unclosed quoted string!
954
+ ```
955
+ **Fix**: Ensure all quotes are properly closed
956
+ ```yaml
957
+ # Correct:
958
+ token: "my-token-value"
959
+
960
+ # Incorrect:
961
+ token: "my-token-value
962
+ ```
963
+
964
+ #### Invalid Indentation
965
+ ```
966
+ ❌ YAML configuration error:
967
+ 🚫 Indentation error or invalid structure!
968
+ ```
969
+ **Fix**: Use consistent indentation (either 2 or 4 spaces)
970
+ ```yaml
971
+ # Correct:
972
+ analysis:
973
+ exclude:
974
+ paths:
975
+ - "vendor/**"
976
+
977
+ # Incorrect:
978
+ analysis:
979
+ exclude:
980
+ paths: # 3 spaces - inconsistent!
981
+ - "vendor/**"
982
+ ```
983
+
984
+ ### Tips for Valid YAML
985
+
986
+ 1. **Use a YAML validator**: Check your configuration with online YAML validators before using
987
+ 2. **Enable whitespace display**: Make tabs and spaces visible in your editor
988
+ 3. **Use quotes for special characters**: Wrap values containing `:`, `#`, `@`, etc. in quotes
989
+ 4. **Consistent indentation**: Pick 2 or 4 spaces and stick to it throughout the file
990
+ 5. **Check the sample config**: Reference `config-sample.yaml` for proper structure
991
+
992
+ ### Configuration Validation
993
+
994
+ Beyond YAML syntax, GitFlow Analytics validates:
995
+ - Required fields (`repositories` must have `name` and `path`)
996
+ - Environment variable resolution
997
+ - File path existence
998
+ - Valid configuration structure
999
+
1000
+ If you encounter persistent issues, run with `--debug` for detailed error information:
1001
+ ```bash
1002
+ # Simplified syntax (default)
1003
+ gitflow-analytics -c config.yaml --debug
1004
+
1005
+ # Explicit analyze command
1006
+ gitflow-analytics analyze -c config.yaml --debug
1007
+ ```
1008
+
1009
+ ## Contributing
1010
+
1011
+ Contributions are welcome! Please feel free to submit a Pull Request.
1012
+
1013
+ ## License
1014
+
1015
+ This project is licensed under the MIT License - see the LICENSE file for details.