gitflow-analytics 1.0.3__py3-none-any.whl → 1.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. gitflow_analytics/_version.py +1 -1
  2. gitflow_analytics/classification/__init__.py +31 -0
  3. gitflow_analytics/classification/batch_classifier.py +752 -0
  4. gitflow_analytics/classification/classifier.py +464 -0
  5. gitflow_analytics/classification/feature_extractor.py +725 -0
  6. gitflow_analytics/classification/linguist_analyzer.py +574 -0
  7. gitflow_analytics/classification/model.py +455 -0
  8. gitflow_analytics/cli.py +4108 -350
  9. gitflow_analytics/cli_rich.py +198 -48
  10. gitflow_analytics/config/__init__.py +43 -0
  11. gitflow_analytics/config/errors.py +261 -0
  12. gitflow_analytics/config/loader.py +904 -0
  13. gitflow_analytics/config/profiles.py +264 -0
  14. gitflow_analytics/config/repository.py +124 -0
  15. gitflow_analytics/config/schema.py +441 -0
  16. gitflow_analytics/config/validator.py +154 -0
  17. gitflow_analytics/config.py +44 -508
  18. gitflow_analytics/core/analyzer.py +1209 -98
  19. gitflow_analytics/core/cache.py +1337 -29
  20. gitflow_analytics/core/data_fetcher.py +1193 -0
  21. gitflow_analytics/core/identity.py +363 -14
  22. gitflow_analytics/core/metrics_storage.py +526 -0
  23. gitflow_analytics/core/progress.py +372 -0
  24. gitflow_analytics/core/schema_version.py +269 -0
  25. gitflow_analytics/extractors/ml_tickets.py +1100 -0
  26. gitflow_analytics/extractors/story_points.py +8 -1
  27. gitflow_analytics/extractors/tickets.py +749 -11
  28. gitflow_analytics/identity_llm/__init__.py +6 -0
  29. gitflow_analytics/identity_llm/analysis_pass.py +231 -0
  30. gitflow_analytics/identity_llm/analyzer.py +464 -0
  31. gitflow_analytics/identity_llm/models.py +76 -0
  32. gitflow_analytics/integrations/github_integration.py +175 -11
  33. gitflow_analytics/integrations/jira_integration.py +461 -24
  34. gitflow_analytics/integrations/orchestrator.py +124 -1
  35. gitflow_analytics/metrics/activity_scoring.py +322 -0
  36. gitflow_analytics/metrics/branch_health.py +470 -0
  37. gitflow_analytics/metrics/dora.py +379 -20
  38. gitflow_analytics/models/database.py +843 -53
  39. gitflow_analytics/pm_framework/__init__.py +115 -0
  40. gitflow_analytics/pm_framework/adapters/__init__.py +50 -0
  41. gitflow_analytics/pm_framework/adapters/jira_adapter.py +1845 -0
  42. gitflow_analytics/pm_framework/base.py +406 -0
  43. gitflow_analytics/pm_framework/models.py +211 -0
  44. gitflow_analytics/pm_framework/orchestrator.py +652 -0
  45. gitflow_analytics/pm_framework/registry.py +333 -0
  46. gitflow_analytics/qualitative/__init__.py +9 -10
  47. gitflow_analytics/qualitative/chatgpt_analyzer.py +259 -0
  48. gitflow_analytics/qualitative/classifiers/__init__.py +3 -3
  49. gitflow_analytics/qualitative/classifiers/change_type.py +518 -244
  50. gitflow_analytics/qualitative/classifiers/domain_classifier.py +272 -165
  51. gitflow_analytics/qualitative/classifiers/intent_analyzer.py +321 -222
  52. gitflow_analytics/qualitative/classifiers/llm/__init__.py +35 -0
  53. gitflow_analytics/qualitative/classifiers/llm/base.py +193 -0
  54. gitflow_analytics/qualitative/classifiers/llm/batch_processor.py +383 -0
  55. gitflow_analytics/qualitative/classifiers/llm/cache.py +479 -0
  56. gitflow_analytics/qualitative/classifiers/llm/cost_tracker.py +435 -0
  57. gitflow_analytics/qualitative/classifiers/llm/openai_client.py +403 -0
  58. gitflow_analytics/qualitative/classifiers/llm/prompts.py +373 -0
  59. gitflow_analytics/qualitative/classifiers/llm/response_parser.py +287 -0
  60. gitflow_analytics/qualitative/classifiers/llm_commit_classifier.py +607 -0
  61. gitflow_analytics/qualitative/classifiers/risk_analyzer.py +215 -189
  62. gitflow_analytics/qualitative/core/__init__.py +4 -4
  63. gitflow_analytics/qualitative/core/llm_fallback.py +239 -235
  64. gitflow_analytics/qualitative/core/nlp_engine.py +157 -148
  65. gitflow_analytics/qualitative/core/pattern_cache.py +214 -192
  66. gitflow_analytics/qualitative/core/processor.py +381 -248
  67. gitflow_analytics/qualitative/enhanced_analyzer.py +2236 -0
  68. gitflow_analytics/qualitative/example_enhanced_usage.py +420 -0
  69. gitflow_analytics/qualitative/models/__init__.py +7 -7
  70. gitflow_analytics/qualitative/models/schemas.py +155 -121
  71. gitflow_analytics/qualitative/utils/__init__.py +4 -4
  72. gitflow_analytics/qualitative/utils/batch_processor.py +136 -123
  73. gitflow_analytics/qualitative/utils/cost_tracker.py +142 -140
  74. gitflow_analytics/qualitative/utils/metrics.py +172 -158
  75. gitflow_analytics/qualitative/utils/text_processing.py +146 -104
  76. gitflow_analytics/reports/__init__.py +100 -0
  77. gitflow_analytics/reports/analytics_writer.py +539 -14
  78. gitflow_analytics/reports/base.py +648 -0
  79. gitflow_analytics/reports/branch_health_writer.py +322 -0
  80. gitflow_analytics/reports/classification_writer.py +924 -0
  81. gitflow_analytics/reports/cli_integration.py +427 -0
  82. gitflow_analytics/reports/csv_writer.py +1676 -212
  83. gitflow_analytics/reports/data_models.py +504 -0
  84. gitflow_analytics/reports/database_report_generator.py +427 -0
  85. gitflow_analytics/reports/example_usage.py +344 -0
  86. gitflow_analytics/reports/factory.py +499 -0
  87. gitflow_analytics/reports/formatters.py +698 -0
  88. gitflow_analytics/reports/html_generator.py +1116 -0
  89. gitflow_analytics/reports/interfaces.py +489 -0
  90. gitflow_analytics/reports/json_exporter.py +2770 -0
  91. gitflow_analytics/reports/narrative_writer.py +2287 -158
  92. gitflow_analytics/reports/story_point_correlation.py +1144 -0
  93. gitflow_analytics/reports/weekly_trends_writer.py +389 -0
  94. gitflow_analytics/training/__init__.py +5 -0
  95. gitflow_analytics/training/model_loader.py +377 -0
  96. gitflow_analytics/training/pipeline.py +550 -0
  97. gitflow_analytics/tui/__init__.py +1 -1
  98. gitflow_analytics/tui/app.py +129 -126
  99. gitflow_analytics/tui/screens/__init__.py +3 -3
  100. gitflow_analytics/tui/screens/analysis_progress_screen.py +188 -179
  101. gitflow_analytics/tui/screens/configuration_screen.py +154 -178
  102. gitflow_analytics/tui/screens/loading_screen.py +100 -110
  103. gitflow_analytics/tui/screens/main_screen.py +89 -72
  104. gitflow_analytics/tui/screens/results_screen.py +305 -281
  105. gitflow_analytics/tui/widgets/__init__.py +2 -2
  106. gitflow_analytics/tui/widgets/data_table.py +67 -69
  107. gitflow_analytics/tui/widgets/export_modal.py +76 -76
  108. gitflow_analytics/tui/widgets/progress_widget.py +41 -46
  109. gitflow_analytics-1.3.6.dist-info/METADATA +1015 -0
  110. gitflow_analytics-1.3.6.dist-info/RECORD +122 -0
  111. gitflow_analytics-1.0.3.dist-info/METADATA +0 -490
  112. gitflow_analytics-1.0.3.dist-info/RECORD +0 -62
  113. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/WHEEL +0 -0
  114. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/entry_points.txt +0 -0
  115. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/licenses/LICENSE +0 -0
  116. {gitflow_analytics-1.0.3.dist-info → gitflow_analytics-1.3.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,574 @@
1
+ """File language and activity analysis inspired by GitHub Linguist.
2
+
3
+ This module provides capabilities to analyze file changes in commits to determine:
4
+ - Programming languages involved
5
+ - Development activities (UI, API, database, etc.)
6
+ - Generated/binary file detection
7
+ - Directory-based activity patterns
8
+
9
+ The analysis helps understand the technical context of commits for better classification.
10
+ """
11
+
12
+ import logging
13
+ import re
14
+ from collections import Counter
15
+ from pathlib import Path
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class LinguistAnalyzer:
21
+ """Analyzes files to determine programming languages and development activities.
22
+
23
+ This class provides GitHub Linguist-inspired analysis of file changes,
24
+ mapping file extensions to languages and directory patterns to activities.
25
+ It's designed to work with commit file lists to provide context for ML classification.
26
+ """
27
+
28
+ def __init__(self):
29
+ """Initialize the linguist analyzer with language and activity mappings."""
30
+ # File extension to programming language mappings
31
+ # Based on GitHub Linguist but simplified for common cases
32
+ self.language_mappings = {
33
+ # Web Frontend
34
+ ".js": "JavaScript",
35
+ ".jsx": "JavaScript",
36
+ ".ts": "TypeScript",
37
+ ".tsx": "TypeScript",
38
+ ".vue": "Vue",
39
+ ".html": "HTML",
40
+ ".htm": "HTML",
41
+ ".css": "CSS",
42
+ ".scss": "SCSS",
43
+ ".sass": "Sass",
44
+ ".less": "Less",
45
+ # Backend Languages
46
+ ".py": "Python",
47
+ ".java": "Java",
48
+ ".kt": "Kotlin",
49
+ ".scala": "Scala",
50
+ ".go": "Go",
51
+ ".rs": "Rust",
52
+ ".rb": "Ruby",
53
+ ".php": "PHP",
54
+ ".cs": "C#",
55
+ ".fs": "F#",
56
+ ".vb": "Visual Basic",
57
+ ".cpp": "C++",
58
+ ".cc": "C++",
59
+ ".cxx": "C++",
60
+ ".c": "C",
61
+ ".h": "C/C++",
62
+ ".hpp": "C++",
63
+ # Mobile
64
+ ".swift": "Swift",
65
+ ".m": "Objective-C",
66
+ ".mm": "Objective-C++",
67
+ ".dart": "Dart",
68
+ # Data & Config
69
+ ".sql": "SQL",
70
+ ".json": "JSON",
71
+ ".yaml": "YAML",
72
+ ".yml": "YAML",
73
+ ".xml": "XML",
74
+ ".toml": "TOML",
75
+ ".ini": "INI",
76
+ ".env": "Environment",
77
+ ".properties": "Properties",
78
+ # Shell & Scripting
79
+ ".sh": "Shell",
80
+ ".bash": "Bash",
81
+ ".zsh": "Zsh",
82
+ ".fish": "Fish",
83
+ ".ps1": "PowerShell",
84
+ ".bat": "Batch",
85
+ ".cmd": "Batch",
86
+ # Documentation
87
+ ".md": "Markdown",
88
+ ".rst": "reStructuredText",
89
+ ".txt": "Text",
90
+ ".adoc": "AsciiDoc",
91
+ # Build & CI
92
+ ".dockerfile": "Dockerfile",
93
+ ".gradle": "Gradle",
94
+ ".maven": "Maven",
95
+ ".cmake": "CMake",
96
+ ".make": "Makefile",
97
+ # Misc
98
+ ".r": "R",
99
+ ".jl": "Julia",
100
+ ".ex": "Elixir",
101
+ ".exs": "Elixir",
102
+ ".erl": "Erlang",
103
+ ".hrl": "Erlang",
104
+ ".clj": "Clojure",
105
+ ".cljs": "ClojureScript",
106
+ ".hs": "Haskell",
107
+ ".elm": "Elm",
108
+ ".lua": "Lua",
109
+ ".pl": "Perl",
110
+ ".pm": "Perl",
111
+ }
112
+
113
+ # Directory patterns to activity type mappings
114
+ self.directory_activity_patterns = {
115
+ # Frontend/UI patterns
116
+ "ui": [
117
+ "ui/",
118
+ "frontend/",
119
+ "client/",
120
+ "web/",
121
+ "www/",
122
+ "public/",
123
+ "assets/",
124
+ "static/",
125
+ "components/",
126
+ "views/",
127
+ "pages/",
128
+ "templates/",
129
+ "layouts/",
130
+ "styles/",
131
+ "css/",
132
+ "js/",
133
+ "javascript/",
134
+ "typescript/",
135
+ "react/",
136
+ "vue/",
137
+ "angular/",
138
+ ],
139
+ # Backend/API patterns
140
+ "api": [
141
+ "api/",
142
+ "backend/",
143
+ "server/",
144
+ "service/",
145
+ "services/",
146
+ "controllers/",
147
+ "handlers/",
148
+ "routes/",
149
+ "endpoints/",
150
+ "middleware/",
151
+ "auth/",
152
+ "authentication/",
153
+ "authorization/",
154
+ "business/",
155
+ "domain/",
156
+ "core/",
157
+ "logic/",
158
+ ],
159
+ # Database patterns
160
+ "database": [
161
+ "database/",
162
+ "db/",
163
+ "data/",
164
+ "models/",
165
+ "entities/",
166
+ "repositories/",
167
+ "dao/",
168
+ "migrations/",
169
+ "schema/",
170
+ "seeds/",
171
+ "fixtures/",
172
+ "sql/",
173
+ "queries/",
174
+ ],
175
+ # Testing patterns
176
+ "test": [
177
+ "test/",
178
+ "tests/",
179
+ "testing/",
180
+ "spec/",
181
+ "specs/",
182
+ "__tests__/",
183
+ "e2e/",
184
+ "integration/",
185
+ "unit/",
186
+ "fixtures/",
187
+ "mocks/",
188
+ "stubs/",
189
+ ],
190
+ # Documentation patterns
191
+ "docs": [
192
+ "docs/",
193
+ "doc/",
194
+ "documentation/",
195
+ "readme/",
196
+ "guides/",
197
+ "tutorials/",
198
+ "examples/",
199
+ "samples/",
200
+ "wiki/",
201
+ "help/",
202
+ "manual/",
203
+ ],
204
+ # Infrastructure/DevOps patterns
205
+ "infrastructure": [
206
+ "infrastructure/",
207
+ "infra/",
208
+ "ops/",
209
+ "devops/",
210
+ "deploy/",
211
+ "deployment/",
212
+ "k8s/",
213
+ "kubernetes/",
214
+ "docker/",
215
+ "terraform/",
216
+ "ansible/",
217
+ "helm/",
218
+ "ci/",
219
+ "cd/",
220
+ ".github/",
221
+ ".gitlab/",
222
+ "jenkins/",
223
+ "scripts/",
224
+ "tools/",
225
+ "utilities/",
226
+ "bin/",
227
+ ],
228
+ # Configuration patterns
229
+ "config": [
230
+ "config/",
231
+ "configuration/",
232
+ "settings/",
233
+ "env/",
234
+ "environment/",
235
+ "properties/",
236
+ "resources/",
237
+ "assets/config/",
238
+ "etc/",
239
+ ],
240
+ # Build patterns
241
+ "build": [
242
+ "build/",
243
+ "dist/",
244
+ "target/",
245
+ "out/",
246
+ "output/",
247
+ "generated/",
248
+ "artifacts/",
249
+ "release/",
250
+ "gradle/",
251
+ "maven/",
252
+ "npm/",
253
+ "node_modules/",
254
+ ],
255
+ # Mobile patterns
256
+ "mobile": [
257
+ "mobile/",
258
+ "app/",
259
+ "android/",
260
+ "ios/",
261
+ "flutter/",
262
+ "react-native/",
263
+ "cordova/",
264
+ "phonegap/",
265
+ "ionic/",
266
+ ],
267
+ }
268
+
269
+ # File patterns for generated/binary content detection
270
+ self.generated_patterns = [
271
+ # Compiled/Generated files
272
+ r"\.min\.(js|css)$",
273
+ r"\.bundle\.(js|css)$",
274
+ r"\.generated\.",
275
+ r"\.g\.(cs|java|py)$",
276
+ r"_pb2\.py$", # Protocol buffer generated files
277
+ r"\.pb\.go$",
278
+ # Build artifacts
279
+ r"\.(class|o|obj|exe|dll|so|dylib)$",
280
+ r"\.a$", # Static libraries
281
+ r"\.jar$",
282
+ r"\.war$",
283
+ r"\.ear$",
284
+ # Package files
285
+ r"package-lock\.json$",
286
+ r"yarn\.lock$",
287
+ r"Gemfile\.lock$",
288
+ r"composer\.lock$",
289
+ r"Pipfile\.lock$",
290
+ # IDE/Editor files
291
+ r"\.(idea|vscode|settings)/",
292
+ r"\.swp$",
293
+ r"\.swo$",
294
+ r"~$",
295
+ # OS files
296
+ r"\.DS_Store$",
297
+ r"Thumbs\.db$",
298
+ r"desktop\.ini$",
299
+ # Log files
300
+ r"\.(log|logs)$",
301
+ r"\.log\.",
302
+ ]
303
+
304
+ # Binary file extensions
305
+ self.binary_extensions = {
306
+ ".jpg",
307
+ ".jpeg",
308
+ ".png",
309
+ ".gif",
310
+ ".bmp",
311
+ ".ico",
312
+ ".svg",
313
+ ".pdf",
314
+ ".doc",
315
+ ".docx",
316
+ ".xls",
317
+ ".xlsx",
318
+ ".ppt",
319
+ ".pptx",
320
+ ".zip",
321
+ ".tar",
322
+ ".gz",
323
+ ".bz2",
324
+ ".7z",
325
+ ".rar",
326
+ ".mp3",
327
+ ".mp4",
328
+ ".avi",
329
+ ".mov",
330
+ ".wav",
331
+ ".flv",
332
+ ".ttf",
333
+ ".otf",
334
+ ".woff",
335
+ ".woff2",
336
+ ".eot",
337
+ ".bin",
338
+ ".dat",
339
+ ".db",
340
+ ".sqlite",
341
+ ".sqlite3",
342
+ }
343
+
344
+ # Compile regex patterns for efficiency
345
+ self._compile_patterns()
346
+
347
+ def _compile_patterns(self) -> None:
348
+ """Compile regex patterns for efficient matching."""
349
+ self.compiled_generated_patterns = [
350
+ re.compile(pattern, re.IGNORECASE) for pattern in self.generated_patterns
351
+ ]
352
+
353
+ def analyze_commit_files(self, file_paths: list[str]) -> dict[str, any]:
354
+ """Analyze a list of file paths from a commit.
355
+
356
+ This method provides comprehensive analysis of files changed in a commit,
357
+ including language detection, activity classification, and metadata extraction.
358
+
359
+ Args:
360
+ file_paths: List of file paths from a git commit
361
+
362
+ Returns:
363
+ Dictionary containing:
364
+ - languages: Counter of programming languages
365
+ - activities: Counter of development activities
366
+ - primary_language: Most common language (or None)
367
+ - primary_activity: Most common activity (or None)
368
+ - file_count: Total number of files
369
+ - generated_count: Number of generated/binary files
370
+ - generated_ratio: Ratio of generated to total files
371
+ - language_diversity: Number of unique languages
372
+ - activity_diversity: Number of unique activities
373
+ - file_types: Counter of file extensions
374
+ - is_multilingual: Whether multiple languages are involved
375
+ - is_cross_functional: Whether multiple activities are involved
376
+ """
377
+ if not file_paths:
378
+ return self._empty_analysis_result()
379
+
380
+ languages = Counter()
381
+ activities = Counter()
382
+ file_types = Counter()
383
+ generated_count = 0
384
+
385
+ for file_path in file_paths:
386
+ # Analyze individual file
387
+ file_analysis = self._analyze_single_file(file_path)
388
+
389
+ # Aggregate language information
390
+ if file_analysis["language"]:
391
+ languages[file_analysis["language"]] += 1
392
+
393
+ # Aggregate activity information
394
+ for activity in file_analysis["activities"]:
395
+ activities[activity] += 1
396
+
397
+ # Track file extensions
398
+ file_types[file_analysis["extension"]] += 1
399
+
400
+ # Count generated/binary files
401
+ if file_analysis["is_generated"] or file_analysis["is_binary"]:
402
+ generated_count += 1
403
+
404
+ # Calculate derived metrics
405
+ total_files = len(file_paths)
406
+ generated_ratio = generated_count / total_files if total_files > 0 else 0.0
407
+
408
+ # Determine primary language and activity
409
+ primary_language = languages.most_common(1)[0][0] if languages else None
410
+ primary_activity = activities.most_common(1)[0][0] if activities else None
411
+
412
+ # Calculate diversity metrics
413
+ language_diversity = len(languages)
414
+ activity_diversity = len(activities)
415
+
416
+ return {
417
+ "languages": languages,
418
+ "activities": activities,
419
+ "primary_language": primary_language,
420
+ "primary_activity": primary_activity,
421
+ "file_count": total_files,
422
+ "generated_count": generated_count,
423
+ "generated_ratio": generated_ratio,
424
+ "language_diversity": language_diversity,
425
+ "activity_diversity": activity_diversity,
426
+ "file_types": file_types,
427
+ "is_multilingual": language_diversity > 1,
428
+ "is_cross_functional": activity_diversity > 1,
429
+ }
430
+
431
+ def _analyze_single_file(self, file_path: str) -> dict[str, any]:
432
+ """Analyze a single file path.
433
+
434
+ Args:
435
+ file_path: Path to analyze
436
+
437
+ Returns:
438
+ Dictionary with file analysis results
439
+ """
440
+ path_obj = Path(file_path)
441
+ extension = path_obj.suffix.lower()
442
+
443
+ # Detect language from extension
444
+ language = self.language_mappings.get(extension)
445
+
446
+ # Handle special cases for files without extensions
447
+ if not language and not extension:
448
+ filename = path_obj.name.lower()
449
+ if filename in ["dockerfile", "makefile", "rakefile", "gemfile"]:
450
+ language = filename.title()
451
+ elif filename.startswith("dockerfile"):
452
+ language = "Dockerfile"
453
+
454
+ # Detect activities from directory patterns
455
+ activities = self._classify_directory_activities(file_path)
456
+
457
+ # Check if file is generated or binary
458
+ is_generated = any(
459
+ pattern.search(file_path) for pattern in self.compiled_generated_patterns
460
+ )
461
+ is_binary = extension in self.binary_extensions
462
+
463
+ return {
464
+ "language": language,
465
+ "activities": activities,
466
+ "extension": extension,
467
+ "is_generated": is_generated,
468
+ "is_binary": is_binary,
469
+ "filename": path_obj.name,
470
+ "directory": str(path_obj.parent) if path_obj.parent != Path(".") else "",
471
+ }
472
+
473
+ def _classify_directory_activities(self, file_path: str) -> list[str]:
474
+ """Classify development activities based on directory patterns.
475
+
476
+ Args:
477
+ file_path: File path to analyze
478
+
479
+ Returns:
480
+ List of activity types that match the file path
481
+ """
482
+ activities = []
483
+ normalized_path = file_path.lower().replace("\\", "/")
484
+
485
+ for activity, patterns in self.directory_activity_patterns.items():
486
+ for pattern in patterns:
487
+ if pattern in normalized_path:
488
+ activities.append(activity)
489
+ break # Don't add the same activity multiple times
490
+
491
+ # If no specific activity detected, classify as 'general'
492
+ if not activities:
493
+ activities = ["general"]
494
+
495
+ return activities
496
+
497
+ def _empty_analysis_result(self) -> dict[str, any]:
498
+ """Return empty analysis result structure."""
499
+ return {
500
+ "languages": Counter(),
501
+ "activities": Counter(),
502
+ "primary_language": None,
503
+ "primary_activity": None,
504
+ "file_count": 0,
505
+ "generated_count": 0,
506
+ "generated_ratio": 0.0,
507
+ "language_diversity": 0,
508
+ "activity_diversity": 0,
509
+ "file_types": Counter(),
510
+ "is_multilingual": False,
511
+ "is_cross_functional": False,
512
+ }
513
+
514
+ def get_language_category(self, language: str) -> str:
515
+ """Get high-level category for a programming language.
516
+
517
+ Args:
518
+ language: Programming language name
519
+
520
+ Returns:
521
+ Language category (frontend, backend, mobile, data, etc.)
522
+ """
523
+ frontend_languages = {
524
+ "JavaScript",
525
+ "TypeScript",
526
+ "HTML",
527
+ "CSS",
528
+ "SCSS",
529
+ "Sass",
530
+ "Less",
531
+ "Vue",
532
+ }
533
+ backend_languages = {
534
+ "Python",
535
+ "Java",
536
+ "Go",
537
+ "Rust",
538
+ "Ruby",
539
+ "PHP",
540
+ "C#",
541
+ "C++",
542
+ "C",
543
+ "Scala",
544
+ "Kotlin",
545
+ }
546
+ mobile_languages = {"Swift", "Objective-C", "Objective-C++", "Kotlin", "Dart"}
547
+ data_languages = {"SQL", "R", "Julia", "Python"} # Python can be both backend and data
548
+
549
+ if language in frontend_languages:
550
+ return "frontend"
551
+ elif language in backend_languages:
552
+ return "backend"
553
+ elif language in mobile_languages:
554
+ return "mobile"
555
+ elif language in data_languages:
556
+ return "data"
557
+ else:
558
+ return "other"
559
+
560
+ def get_supported_languages(self) -> list[str]:
561
+ """Get list of all supported programming languages.
562
+
563
+ Returns:
564
+ Sorted list of supported language names
565
+ """
566
+ return sorted(set(self.language_mappings.values()))
567
+
568
+ def get_supported_activities(self) -> list[str]:
569
+ """Get list of all supported activity types.
570
+
571
+ Returns:
572
+ Sorted list of supported activity types
573
+ """
574
+ return sorted(self.directory_activity_patterns.keys())