gitflow-analytics 1.0.1__py3-none-any.whl → 1.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gitflow_analytics/__init__.py +11 -11
- gitflow_analytics/_version.py +2 -2
- gitflow_analytics/classification/__init__.py +31 -0
- gitflow_analytics/classification/batch_classifier.py +752 -0
- gitflow_analytics/classification/classifier.py +464 -0
- gitflow_analytics/classification/feature_extractor.py +725 -0
- gitflow_analytics/classification/linguist_analyzer.py +574 -0
- gitflow_analytics/classification/model.py +455 -0
- gitflow_analytics/cli.py +4490 -378
- gitflow_analytics/cli_rich.py +503 -0
- gitflow_analytics/config/__init__.py +43 -0
- gitflow_analytics/config/errors.py +261 -0
- gitflow_analytics/config/loader.py +904 -0
- gitflow_analytics/config/profiles.py +264 -0
- gitflow_analytics/config/repository.py +124 -0
- gitflow_analytics/config/schema.py +441 -0
- gitflow_analytics/config/validator.py +154 -0
- gitflow_analytics/config.py +44 -398
- gitflow_analytics/core/analyzer.py +1320 -172
- gitflow_analytics/core/branch_mapper.py +132 -132
- gitflow_analytics/core/cache.py +1554 -175
- gitflow_analytics/core/data_fetcher.py +1193 -0
- gitflow_analytics/core/identity.py +571 -185
- gitflow_analytics/core/metrics_storage.py +526 -0
- gitflow_analytics/core/progress.py +372 -0
- gitflow_analytics/core/schema_version.py +269 -0
- gitflow_analytics/extractors/base.py +13 -11
- gitflow_analytics/extractors/ml_tickets.py +1100 -0
- gitflow_analytics/extractors/story_points.py +77 -59
- gitflow_analytics/extractors/tickets.py +841 -89
- gitflow_analytics/identity_llm/__init__.py +6 -0
- gitflow_analytics/identity_llm/analysis_pass.py +231 -0
- gitflow_analytics/identity_llm/analyzer.py +464 -0
- gitflow_analytics/identity_llm/models.py +76 -0
- gitflow_analytics/integrations/github_integration.py +258 -87
- gitflow_analytics/integrations/jira_integration.py +572 -123
- gitflow_analytics/integrations/orchestrator.py +206 -82
- gitflow_analytics/metrics/activity_scoring.py +322 -0
- gitflow_analytics/metrics/branch_health.py +470 -0
- gitflow_analytics/metrics/dora.py +542 -179
- gitflow_analytics/models/database.py +986 -59
- gitflow_analytics/pm_framework/__init__.py +115 -0
- gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
- gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
- gitflow_analytics/pm_framework/base.py +406 -0
- gitflow_analytics/pm_framework/models.py +211 -0
- gitflow_analytics/pm_framework/orchestrator.py +652 -0
- gitflow_analytics/pm_framework/registry.py +333 -0
- gitflow_analytics/qualitative/__init__.py +29 -0
- gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
- gitflow_analytics/qualitative/classifiers/__init__.py +13 -0
- gitflow_analytics/qualitative/classifiers/change_type.py +742 -0
- gitflow_analytics/qualitative/classifiers/domain_classifier.py +506 -0
- gitflow_analytics/qualitative/classifiers/intent_analyzer.py +535 -0
- gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
- gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
- gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
- gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
- gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
- gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
- gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
- gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
- gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
- gitflow_analytics/qualitative/classifiers/risk_analyzer.py +438 -0
- gitflow_analytics/qualitative/core/__init__.py +13 -0
- gitflow_analytics/qualitative/core/llm_fallback.py +657 -0
- gitflow_analytics/qualitative/core/nlp_engine.py +382 -0
- gitflow_analytics/qualitative/core/pattern_cache.py +479 -0
- gitflow_analytics/qualitative/core/processor.py +673 -0
- gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
- gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
- gitflow_analytics/qualitative/models/__init__.py +25 -0
- gitflow_analytics/qualitative/models/schemas.py +306 -0
- gitflow_analytics/qualitative/utils/__init__.py +13 -0
- gitflow_analytics/qualitative/utils/batch_processor.py +339 -0
- gitflow_analytics/qualitative/utils/cost_tracker.py +345 -0
- gitflow_analytics/qualitative/utils/metrics.py +361 -0
- gitflow_analytics/qualitative/utils/text_processing.py +285 -0
- gitflow_analytics/reports/__init__.py +100 -0
- gitflow_analytics/reports/analytics_writer.py +550 -18
- gitflow_analytics/reports/base.py +648 -0
- gitflow_analytics/reports/branch_health_writer.py +322 -0
- gitflow_analytics/reports/classification_writer.py +924 -0
- gitflow_analytics/reports/cli_integration.py +427 -0
- gitflow_analytics/reports/csv_writer.py +1700 -216
- gitflow_analytics/reports/data_models.py +504 -0
- gitflow_analytics/reports/database_report_generator.py +427 -0
- gitflow_analytics/reports/example_usage.py +344 -0
- gitflow_analytics/reports/factory.py +499 -0
- gitflow_analytics/reports/formatters.py +698 -0
- gitflow_analytics/reports/html_generator.py +1116 -0
- gitflow_analytics/reports/interfaces.py +489 -0
- gitflow_analytics/reports/json_exporter.py +2770 -0
- gitflow_analytics/reports/narrative_writer.py +2289 -158
- gitflow_analytics/reports/story_point_correlation.py +1144 -0
- gitflow_analytics/reports/weekly_trends_writer.py +389 -0
- gitflow_analytics/training/__init__.py +5 -0
- gitflow_analytics/training/model_loader.py +377 -0
- gitflow_analytics/training/pipeline.py +550 -0
- gitflow_analytics/tui/__init__.py +5 -0
- gitflow_analytics/tui/app.py +724 -0
- gitflow_analytics/tui/screens/__init__.py +8 -0
- gitflow_analytics/tui/screens/analysis_progress_screen.py +496 -0
- gitflow_analytics/tui/screens/configuration_screen.py +523 -0
- gitflow_analytics/tui/screens/loading_screen.py +348 -0
- gitflow_analytics/tui/screens/main_screen.py +321 -0
- gitflow_analytics/tui/screens/results_screen.py +722 -0
- gitflow_analytics/tui/widgets/__init__.py +7 -0
- gitflow_analytics/tui/widgets/data_table.py +255 -0
- gitflow_analytics/tui/widgets/export_modal.py +301 -0
- gitflow_analytics/tui/widgets/progress_widget.py +187 -0
- gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
- gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
- gitflow_analytics-1.0.1.dist-info/METADATA +0 -463
- gitflow_analytics-1.0.1.dist-info/RECORD +0 -31
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
- {gitflow_analytics-1.0.1.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,574 @@
|
|
|
1
|
+
"""File language and activity analysis inspired by GitHub Linguist.
|
|
2
|
+
|
|
3
|
+
This module provides capabilities to analyze file changes in commits to determine:
|
|
4
|
+
- Programming languages involved
|
|
5
|
+
- Development activities (UI, API, database, etc.)
|
|
6
|
+
- Generated/binary file detection
|
|
7
|
+
- Directory-based activity patterns
|
|
8
|
+
|
|
9
|
+
The analysis helps understand the technical context of commits for better classification.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import logging
|
|
13
|
+
import re
|
|
14
|
+
from collections import Counter
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class LinguistAnalyzer:
|
|
21
|
+
"""Analyzes files to determine programming languages and development activities.
|
|
22
|
+
|
|
23
|
+
This class provides GitHub Linguist-inspired analysis of file changes,
|
|
24
|
+
mapping file extensions to languages and directory patterns to activities.
|
|
25
|
+
It's designed to work with commit file lists to provide context for ML classification.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self):
|
|
29
|
+
"""Initialize the linguist analyzer with language and activity mappings."""
|
|
30
|
+
# File extension to programming language mappings
|
|
31
|
+
# Based on GitHub Linguist but simplified for common cases
|
|
32
|
+
self.language_mappings = {
|
|
33
|
+
# Web Frontend
|
|
34
|
+
".js": "JavaScript",
|
|
35
|
+
".jsx": "JavaScript",
|
|
36
|
+
".ts": "TypeScript",
|
|
37
|
+
".tsx": "TypeScript",
|
|
38
|
+
".vue": "Vue",
|
|
39
|
+
".html": "HTML",
|
|
40
|
+
".htm": "HTML",
|
|
41
|
+
".css": "CSS",
|
|
42
|
+
".scss": "SCSS",
|
|
43
|
+
".sass": "Sass",
|
|
44
|
+
".less": "Less",
|
|
45
|
+
# Backend Languages
|
|
46
|
+
".py": "Python",
|
|
47
|
+
".java": "Java",
|
|
48
|
+
".kt": "Kotlin",
|
|
49
|
+
".scala": "Scala",
|
|
50
|
+
".go": "Go",
|
|
51
|
+
".rs": "Rust",
|
|
52
|
+
".rb": "Ruby",
|
|
53
|
+
".php": "PHP",
|
|
54
|
+
".cs": "C#",
|
|
55
|
+
".fs": "F#",
|
|
56
|
+
".vb": "Visual Basic",
|
|
57
|
+
".cpp": "C++",
|
|
58
|
+
".cc": "C++",
|
|
59
|
+
".cxx": "C++",
|
|
60
|
+
".c": "C",
|
|
61
|
+
".h": "C/C++",
|
|
62
|
+
".hpp": "C++",
|
|
63
|
+
# Mobile
|
|
64
|
+
".swift": "Swift",
|
|
65
|
+
".m": "Objective-C",
|
|
66
|
+
".mm": "Objective-C++",
|
|
67
|
+
".dart": "Dart",
|
|
68
|
+
# Data & Config
|
|
69
|
+
".sql": "SQL",
|
|
70
|
+
".json": "JSON",
|
|
71
|
+
".yaml": "YAML",
|
|
72
|
+
".yml": "YAML",
|
|
73
|
+
".xml": "XML",
|
|
74
|
+
".toml": "TOML",
|
|
75
|
+
".ini": "INI",
|
|
76
|
+
".env": "Environment",
|
|
77
|
+
".properties": "Properties",
|
|
78
|
+
# Shell & Scripting
|
|
79
|
+
".sh": "Shell",
|
|
80
|
+
".bash": "Bash",
|
|
81
|
+
".zsh": "Zsh",
|
|
82
|
+
".fish": "Fish",
|
|
83
|
+
".ps1": "PowerShell",
|
|
84
|
+
".bat": "Batch",
|
|
85
|
+
".cmd": "Batch",
|
|
86
|
+
# Documentation
|
|
87
|
+
".md": "Markdown",
|
|
88
|
+
".rst": "reStructuredText",
|
|
89
|
+
".txt": "Text",
|
|
90
|
+
".adoc": "AsciiDoc",
|
|
91
|
+
# Build & CI
|
|
92
|
+
".dockerfile": "Dockerfile",
|
|
93
|
+
".gradle": "Gradle",
|
|
94
|
+
".maven": "Maven",
|
|
95
|
+
".cmake": "CMake",
|
|
96
|
+
".make": "Makefile",
|
|
97
|
+
# Misc
|
|
98
|
+
".r": "R",
|
|
99
|
+
".jl": "Julia",
|
|
100
|
+
".ex": "Elixir",
|
|
101
|
+
".exs": "Elixir",
|
|
102
|
+
".erl": "Erlang",
|
|
103
|
+
".hrl": "Erlang",
|
|
104
|
+
".clj": "Clojure",
|
|
105
|
+
".cljs": "ClojureScript",
|
|
106
|
+
".hs": "Haskell",
|
|
107
|
+
".elm": "Elm",
|
|
108
|
+
".lua": "Lua",
|
|
109
|
+
".pl": "Perl",
|
|
110
|
+
".pm": "Perl",
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
# Directory patterns to activity type mappings
|
|
114
|
+
self.directory_activity_patterns = {
|
|
115
|
+
# Frontend/UI patterns
|
|
116
|
+
"ui": [
|
|
117
|
+
"ui/",
|
|
118
|
+
"frontend/",
|
|
119
|
+
"client/",
|
|
120
|
+
"web/",
|
|
121
|
+
"www/",
|
|
122
|
+
"public/",
|
|
123
|
+
"assets/",
|
|
124
|
+
"static/",
|
|
125
|
+
"components/",
|
|
126
|
+
"views/",
|
|
127
|
+
"pages/",
|
|
128
|
+
"templates/",
|
|
129
|
+
"layouts/",
|
|
130
|
+
"styles/",
|
|
131
|
+
"css/",
|
|
132
|
+
"js/",
|
|
133
|
+
"javascript/",
|
|
134
|
+
"typescript/",
|
|
135
|
+
"react/",
|
|
136
|
+
"vue/",
|
|
137
|
+
"angular/",
|
|
138
|
+
],
|
|
139
|
+
# Backend/API patterns
|
|
140
|
+
"api": [
|
|
141
|
+
"api/",
|
|
142
|
+
"backend/",
|
|
143
|
+
"server/",
|
|
144
|
+
"service/",
|
|
145
|
+
"services/",
|
|
146
|
+
"controllers/",
|
|
147
|
+
"handlers/",
|
|
148
|
+
"routes/",
|
|
149
|
+
"endpoints/",
|
|
150
|
+
"middleware/",
|
|
151
|
+
"auth/",
|
|
152
|
+
"authentication/",
|
|
153
|
+
"authorization/",
|
|
154
|
+
"business/",
|
|
155
|
+
"domain/",
|
|
156
|
+
"core/",
|
|
157
|
+
"logic/",
|
|
158
|
+
],
|
|
159
|
+
# Database patterns
|
|
160
|
+
"database": [
|
|
161
|
+
"database/",
|
|
162
|
+
"db/",
|
|
163
|
+
"data/",
|
|
164
|
+
"models/",
|
|
165
|
+
"entities/",
|
|
166
|
+
"repositories/",
|
|
167
|
+
"dao/",
|
|
168
|
+
"migrations/",
|
|
169
|
+
"schema/",
|
|
170
|
+
"seeds/",
|
|
171
|
+
"fixtures/",
|
|
172
|
+
"sql/",
|
|
173
|
+
"queries/",
|
|
174
|
+
],
|
|
175
|
+
# Testing patterns
|
|
176
|
+
"test": [
|
|
177
|
+
"test/",
|
|
178
|
+
"tests/",
|
|
179
|
+
"testing/",
|
|
180
|
+
"spec/",
|
|
181
|
+
"specs/",
|
|
182
|
+
"__tests__/",
|
|
183
|
+
"e2e/",
|
|
184
|
+
"integration/",
|
|
185
|
+
"unit/",
|
|
186
|
+
"fixtures/",
|
|
187
|
+
"mocks/",
|
|
188
|
+
"stubs/",
|
|
189
|
+
],
|
|
190
|
+
# Documentation patterns
|
|
191
|
+
"docs": [
|
|
192
|
+
"docs/",
|
|
193
|
+
"doc/",
|
|
194
|
+
"documentation/",
|
|
195
|
+
"readme/",
|
|
196
|
+
"guides/",
|
|
197
|
+
"tutorials/",
|
|
198
|
+
"examples/",
|
|
199
|
+
"samples/",
|
|
200
|
+
"wiki/",
|
|
201
|
+
"help/",
|
|
202
|
+
"manual/",
|
|
203
|
+
],
|
|
204
|
+
# Infrastructure/DevOps patterns
|
|
205
|
+
"infrastructure": [
|
|
206
|
+
"infrastructure/",
|
|
207
|
+
"infra/",
|
|
208
|
+
"ops/",
|
|
209
|
+
"devops/",
|
|
210
|
+
"deploy/",
|
|
211
|
+
"deployment/",
|
|
212
|
+
"k8s/",
|
|
213
|
+
"kubernetes/",
|
|
214
|
+
"docker/",
|
|
215
|
+
"terraform/",
|
|
216
|
+
"ansible/",
|
|
217
|
+
"helm/",
|
|
218
|
+
"ci/",
|
|
219
|
+
"cd/",
|
|
220
|
+
".github/",
|
|
221
|
+
".gitlab/",
|
|
222
|
+
"jenkins/",
|
|
223
|
+
"scripts/",
|
|
224
|
+
"tools/",
|
|
225
|
+
"utilities/",
|
|
226
|
+
"bin/",
|
|
227
|
+
],
|
|
228
|
+
# Configuration patterns
|
|
229
|
+
"config": [
|
|
230
|
+
"config/",
|
|
231
|
+
"configuration/",
|
|
232
|
+
"settings/",
|
|
233
|
+
"env/",
|
|
234
|
+
"environment/",
|
|
235
|
+
"properties/",
|
|
236
|
+
"resources/",
|
|
237
|
+
"assets/config/",
|
|
238
|
+
"etc/",
|
|
239
|
+
],
|
|
240
|
+
# Build patterns
|
|
241
|
+
"build": [
|
|
242
|
+
"build/",
|
|
243
|
+
"dist/",
|
|
244
|
+
"target/",
|
|
245
|
+
"out/",
|
|
246
|
+
"output/",
|
|
247
|
+
"generated/",
|
|
248
|
+
"artifacts/",
|
|
249
|
+
"release/",
|
|
250
|
+
"gradle/",
|
|
251
|
+
"maven/",
|
|
252
|
+
"npm/",
|
|
253
|
+
"node_modules/",
|
|
254
|
+
],
|
|
255
|
+
# Mobile patterns
|
|
256
|
+
"mobile": [
|
|
257
|
+
"mobile/",
|
|
258
|
+
"app/",
|
|
259
|
+
"android/",
|
|
260
|
+
"ios/",
|
|
261
|
+
"flutter/",
|
|
262
|
+
"react-native/",
|
|
263
|
+
"cordova/",
|
|
264
|
+
"phonegap/",
|
|
265
|
+
"ionic/",
|
|
266
|
+
],
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
# File patterns for generated/binary content detection
|
|
270
|
+
self.generated_patterns = [
|
|
271
|
+
# Compiled/Generated files
|
|
272
|
+
r"\.min\.(js|css)$",
|
|
273
|
+
r"\.bundle\.(js|css)$",
|
|
274
|
+
r"\.generated\.",
|
|
275
|
+
r"\.g\.(cs|java|py)$",
|
|
276
|
+
r"_pb2\.py$", # Protocol buffer generated files
|
|
277
|
+
r"\.pb\.go$",
|
|
278
|
+
# Build artifacts
|
|
279
|
+
r"\.(class|o|obj|exe|dll|so|dylib)$",
|
|
280
|
+
r"\.a$", # Static libraries
|
|
281
|
+
r"\.jar$",
|
|
282
|
+
r"\.war$",
|
|
283
|
+
r"\.ear$",
|
|
284
|
+
# Package files
|
|
285
|
+
r"package-lock\.json$",
|
|
286
|
+
r"yarn\.lock$",
|
|
287
|
+
r"Gemfile\.lock$",
|
|
288
|
+
r"composer\.lock$",
|
|
289
|
+
r"Pipfile\.lock$",
|
|
290
|
+
# IDE/Editor files
|
|
291
|
+
r"\.(idea|vscode|settings)/",
|
|
292
|
+
r"\.swp$",
|
|
293
|
+
r"\.swo$",
|
|
294
|
+
r"~$",
|
|
295
|
+
# OS files
|
|
296
|
+
r"\.DS_Store$",
|
|
297
|
+
r"Thumbs\.db$",
|
|
298
|
+
r"desktop\.ini$",
|
|
299
|
+
# Log files
|
|
300
|
+
r"\.(log|logs)$",
|
|
301
|
+
r"\.log\.",
|
|
302
|
+
]
|
|
303
|
+
|
|
304
|
+
# Binary file extensions
|
|
305
|
+
self.binary_extensions = {
|
|
306
|
+
".jpg",
|
|
307
|
+
".jpeg",
|
|
308
|
+
".png",
|
|
309
|
+
".gif",
|
|
310
|
+
".bmp",
|
|
311
|
+
".ico",
|
|
312
|
+
".svg",
|
|
313
|
+
".pdf",
|
|
314
|
+
".doc",
|
|
315
|
+
".docx",
|
|
316
|
+
".xls",
|
|
317
|
+
".xlsx",
|
|
318
|
+
".ppt",
|
|
319
|
+
".pptx",
|
|
320
|
+
".zip",
|
|
321
|
+
".tar",
|
|
322
|
+
".gz",
|
|
323
|
+
".bz2",
|
|
324
|
+
".7z",
|
|
325
|
+
".rar",
|
|
326
|
+
".mp3",
|
|
327
|
+
".mp4",
|
|
328
|
+
".avi",
|
|
329
|
+
".mov",
|
|
330
|
+
".wav",
|
|
331
|
+
".flv",
|
|
332
|
+
".ttf",
|
|
333
|
+
".otf",
|
|
334
|
+
".woff",
|
|
335
|
+
".woff2",
|
|
336
|
+
".eot",
|
|
337
|
+
".bin",
|
|
338
|
+
".dat",
|
|
339
|
+
".db",
|
|
340
|
+
".sqlite",
|
|
341
|
+
".sqlite3",
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
# Compile regex patterns for efficiency
|
|
345
|
+
self._compile_patterns()
|
|
346
|
+
|
|
347
|
+
def _compile_patterns(self) -> None:
|
|
348
|
+
"""Compile regex patterns for efficient matching."""
|
|
349
|
+
self.compiled_generated_patterns = [
|
|
350
|
+
re.compile(pattern, re.IGNORECASE) for pattern in self.generated_patterns
|
|
351
|
+
]
|
|
352
|
+
|
|
353
|
+
def analyze_commit_files(self, file_paths: list[str]) -> dict[str, any]:
|
|
354
|
+
"""Analyze a list of file paths from a commit.
|
|
355
|
+
|
|
356
|
+
This method provides comprehensive analysis of files changed in a commit,
|
|
357
|
+
including language detection, activity classification, and metadata extraction.
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
file_paths: List of file paths from a git commit
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
Dictionary containing:
|
|
364
|
+
- languages: Counter of programming languages
|
|
365
|
+
- activities: Counter of development activities
|
|
366
|
+
- primary_language: Most common language (or None)
|
|
367
|
+
- primary_activity: Most common activity (or None)
|
|
368
|
+
- file_count: Total number of files
|
|
369
|
+
- generated_count: Number of generated/binary files
|
|
370
|
+
- generated_ratio: Ratio of generated to total files
|
|
371
|
+
- language_diversity: Number of unique languages
|
|
372
|
+
- activity_diversity: Number of unique activities
|
|
373
|
+
- file_types: Counter of file extensions
|
|
374
|
+
- is_multilingual: Whether multiple languages are involved
|
|
375
|
+
- is_cross_functional: Whether multiple activities are involved
|
|
376
|
+
"""
|
|
377
|
+
if not file_paths:
|
|
378
|
+
return self._empty_analysis_result()
|
|
379
|
+
|
|
380
|
+
languages = Counter()
|
|
381
|
+
activities = Counter()
|
|
382
|
+
file_types = Counter()
|
|
383
|
+
generated_count = 0
|
|
384
|
+
|
|
385
|
+
for file_path in file_paths:
|
|
386
|
+
# Analyze individual file
|
|
387
|
+
file_analysis = self._analyze_single_file(file_path)
|
|
388
|
+
|
|
389
|
+
# Aggregate language information
|
|
390
|
+
if file_analysis["language"]:
|
|
391
|
+
languages[file_analysis["language"]] += 1
|
|
392
|
+
|
|
393
|
+
# Aggregate activity information
|
|
394
|
+
for activity in file_analysis["activities"]:
|
|
395
|
+
activities[activity] += 1
|
|
396
|
+
|
|
397
|
+
# Track file extensions
|
|
398
|
+
file_types[file_analysis["extension"]] += 1
|
|
399
|
+
|
|
400
|
+
# Count generated/binary files
|
|
401
|
+
if file_analysis["is_generated"] or file_analysis["is_binary"]:
|
|
402
|
+
generated_count += 1
|
|
403
|
+
|
|
404
|
+
# Calculate derived metrics
|
|
405
|
+
total_files = len(file_paths)
|
|
406
|
+
generated_ratio = generated_count / total_files if total_files > 0 else 0.0
|
|
407
|
+
|
|
408
|
+
# Determine primary language and activity
|
|
409
|
+
primary_language = languages.most_common(1)[0][0] if languages else None
|
|
410
|
+
primary_activity = activities.most_common(1)[0][0] if activities else None
|
|
411
|
+
|
|
412
|
+
# Calculate diversity metrics
|
|
413
|
+
language_diversity = len(languages)
|
|
414
|
+
activity_diversity = len(activities)
|
|
415
|
+
|
|
416
|
+
return {
|
|
417
|
+
"languages": languages,
|
|
418
|
+
"activities": activities,
|
|
419
|
+
"primary_language": primary_language,
|
|
420
|
+
"primary_activity": primary_activity,
|
|
421
|
+
"file_count": total_files,
|
|
422
|
+
"generated_count": generated_count,
|
|
423
|
+
"generated_ratio": generated_ratio,
|
|
424
|
+
"language_diversity": language_diversity,
|
|
425
|
+
"activity_diversity": activity_diversity,
|
|
426
|
+
"file_types": file_types,
|
|
427
|
+
"is_multilingual": language_diversity > 1,
|
|
428
|
+
"is_cross_functional": activity_diversity > 1,
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
def _analyze_single_file(self, file_path: str) -> dict[str, any]:
|
|
432
|
+
"""Analyze a single file path.
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
file_path: Path to analyze
|
|
436
|
+
|
|
437
|
+
Returns:
|
|
438
|
+
Dictionary with file analysis results
|
|
439
|
+
"""
|
|
440
|
+
path_obj = Path(file_path)
|
|
441
|
+
extension = path_obj.suffix.lower()
|
|
442
|
+
|
|
443
|
+
# Detect language from extension
|
|
444
|
+
language = self.language_mappings.get(extension)
|
|
445
|
+
|
|
446
|
+
# Handle special cases for files without extensions
|
|
447
|
+
if not language and not extension:
|
|
448
|
+
filename = path_obj.name.lower()
|
|
449
|
+
if filename in ["dockerfile", "makefile", "rakefile", "gemfile"]:
|
|
450
|
+
language = filename.title()
|
|
451
|
+
elif filename.startswith("dockerfile"):
|
|
452
|
+
language = "Dockerfile"
|
|
453
|
+
|
|
454
|
+
# Detect activities from directory patterns
|
|
455
|
+
activities = self._classify_directory_activities(file_path)
|
|
456
|
+
|
|
457
|
+
# Check if file is generated or binary
|
|
458
|
+
is_generated = any(
|
|
459
|
+
pattern.search(file_path) for pattern in self.compiled_generated_patterns
|
|
460
|
+
)
|
|
461
|
+
is_binary = extension in self.binary_extensions
|
|
462
|
+
|
|
463
|
+
return {
|
|
464
|
+
"language": language,
|
|
465
|
+
"activities": activities,
|
|
466
|
+
"extension": extension,
|
|
467
|
+
"is_generated": is_generated,
|
|
468
|
+
"is_binary": is_binary,
|
|
469
|
+
"filename": path_obj.name,
|
|
470
|
+
"directory": str(path_obj.parent) if path_obj.parent != Path(".") else "",
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
def _classify_directory_activities(self, file_path: str) -> list[str]:
|
|
474
|
+
"""Classify development activities based on directory patterns.
|
|
475
|
+
|
|
476
|
+
Args:
|
|
477
|
+
file_path: File path to analyze
|
|
478
|
+
|
|
479
|
+
Returns:
|
|
480
|
+
List of activity types that match the file path
|
|
481
|
+
"""
|
|
482
|
+
activities = []
|
|
483
|
+
normalized_path = file_path.lower().replace("\\", "/")
|
|
484
|
+
|
|
485
|
+
for activity, patterns in self.directory_activity_patterns.items():
|
|
486
|
+
for pattern in patterns:
|
|
487
|
+
if pattern in normalized_path:
|
|
488
|
+
activities.append(activity)
|
|
489
|
+
break # Don't add the same activity multiple times
|
|
490
|
+
|
|
491
|
+
# If no specific activity detected, classify as 'general'
|
|
492
|
+
if not activities:
|
|
493
|
+
activities = ["general"]
|
|
494
|
+
|
|
495
|
+
return activities
|
|
496
|
+
|
|
497
|
+
def _empty_analysis_result(self) -> dict[str, any]:
|
|
498
|
+
"""Return empty analysis result structure."""
|
|
499
|
+
return {
|
|
500
|
+
"languages": Counter(),
|
|
501
|
+
"activities": Counter(),
|
|
502
|
+
"primary_language": None,
|
|
503
|
+
"primary_activity": None,
|
|
504
|
+
"file_count": 0,
|
|
505
|
+
"generated_count": 0,
|
|
506
|
+
"generated_ratio": 0.0,
|
|
507
|
+
"language_diversity": 0,
|
|
508
|
+
"activity_diversity": 0,
|
|
509
|
+
"file_types": Counter(),
|
|
510
|
+
"is_multilingual": False,
|
|
511
|
+
"is_cross_functional": False,
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
def get_language_category(self, language: str) -> str:
|
|
515
|
+
"""Get high-level category for a programming language.
|
|
516
|
+
|
|
517
|
+
Args:
|
|
518
|
+
language: Programming language name
|
|
519
|
+
|
|
520
|
+
Returns:
|
|
521
|
+
Language category (frontend, backend, mobile, data, etc.)
|
|
522
|
+
"""
|
|
523
|
+
frontend_languages = {
|
|
524
|
+
"JavaScript",
|
|
525
|
+
"TypeScript",
|
|
526
|
+
"HTML",
|
|
527
|
+
"CSS",
|
|
528
|
+
"SCSS",
|
|
529
|
+
"Sass",
|
|
530
|
+
"Less",
|
|
531
|
+
"Vue",
|
|
532
|
+
}
|
|
533
|
+
backend_languages = {
|
|
534
|
+
"Python",
|
|
535
|
+
"Java",
|
|
536
|
+
"Go",
|
|
537
|
+
"Rust",
|
|
538
|
+
"Ruby",
|
|
539
|
+
"PHP",
|
|
540
|
+
"C#",
|
|
541
|
+
"C++",
|
|
542
|
+
"C",
|
|
543
|
+
"Scala",
|
|
544
|
+
"Kotlin",
|
|
545
|
+
}
|
|
546
|
+
mobile_languages = {"Swift", "Objective-C", "Objective-C++", "Kotlin", "Dart"}
|
|
547
|
+
data_languages = {"SQL", "R", "Julia", "Python"} # Python can be both backend and data
|
|
548
|
+
|
|
549
|
+
if language in frontend_languages:
|
|
550
|
+
return "frontend"
|
|
551
|
+
elif language in backend_languages:
|
|
552
|
+
return "backend"
|
|
553
|
+
elif language in mobile_languages:
|
|
554
|
+
return "mobile"
|
|
555
|
+
elif language in data_languages:
|
|
556
|
+
return "data"
|
|
557
|
+
else:
|
|
558
|
+
return "other"
|
|
559
|
+
|
|
560
|
+
def get_supported_languages(self) -> list[str]:
|
|
561
|
+
"""Get list of all supported programming languages.
|
|
562
|
+
|
|
563
|
+
Returns:
|
|
564
|
+
Sorted list of supported language names
|
|
565
|
+
"""
|
|
566
|
+
return sorted(set(self.language_mappings.values()))
|
|
567
|
+
|
|
568
|
+
def get_supported_activities(self) -> list[str]:
|
|
569
|
+
"""Get list of all supported activity types.
|
|
570
|
+
|
|
571
|
+
Returns:
|
|
572
|
+
Sorted list of supported activity types
|
|
573
|
+
"""
|
|
574
|
+
return sorted(self.directory_activity_patterns.keys())
|