greenmining 1.0.4__py3-none-any.whl → 1.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,9 @@ from __future__ import annotations
5
5
  import os
6
6
  import re
7
7
  import shutil
8
+ import subprocess
8
9
  import tempfile
10
+ from concurrent.futures import ThreadPoolExecutor, as_completed
9
11
  from dataclasses import dataclass, field
10
12
  from datetime import datetime, timedelta
11
13
  from pathlib import Path
@@ -25,10 +27,62 @@ from greenmining.gsf_patterns import get_pattern_by_keywords, is_green_aware, GS
25
27
  from greenmining.utils import colored_print
26
28
 
27
29
 
30
+ @dataclass
31
+ class MethodMetrics:
32
+ # Per-method analysis metrics from Lizard integration.
33
+
34
+ name: str
35
+ long_name: str
36
+ filename: str
37
+ nloc: int = 0
38
+ complexity: int = 0
39
+ token_count: int = 0
40
+ parameters: int = 0
41
+ start_line: int = 0
42
+ end_line: int = 0
43
+
44
+ def to_dict(self) -> Dict[str, Any]:
45
+ return {
46
+ "name": self.name,
47
+ "long_name": self.long_name,
48
+ "filename": self.filename,
49
+ "nloc": self.nloc,
50
+ "complexity": self.complexity,
51
+ "token_count": self.token_count,
52
+ "parameters": self.parameters,
53
+ "start_line": self.start_line,
54
+ "end_line": self.end_line,
55
+ }
56
+
57
+
58
+ @dataclass
59
+ class SourceCodeChange:
60
+ # Source code before/after a commit for refactoring detection.
61
+
62
+ filename: str
63
+ source_code_before: Optional[str] = None
64
+ source_code_after: Optional[str] = None
65
+ diff: Optional[str] = None
66
+ added_lines: int = 0
67
+ deleted_lines: int = 0
68
+ change_type: str = "" # ADD, DELETE, MODIFY, RENAME
69
+
70
+ def to_dict(self) -> Dict[str, Any]:
71
+ return {
72
+ "filename": self.filename,
73
+ "source_code_before": self.source_code_before,
74
+ "source_code_after": self.source_code_after,
75
+ "diff": self.diff,
76
+ "added_lines": self.added_lines,
77
+ "deleted_lines": self.deleted_lines,
78
+ "change_type": self.change_type,
79
+ }
80
+
81
+
28
82
  @dataclass
29
83
  class CommitAnalysis:
30
84
  # Analysis result for a single commit.
31
-
85
+
32
86
  hash: str
33
87
  message: str
34
88
  author: str
@@ -42,21 +96,31 @@ class CommitAnalysis:
42
96
  files_modified: List[str]
43
97
  insertions: int
44
98
  deletions: int
45
-
99
+
46
100
  # PyDriller DMM metrics
47
101
  dmm_unit_size: Optional[float] = None
48
102
  dmm_unit_complexity: Optional[float] = None
49
103
  dmm_unit_interfacing: Optional[float] = None
50
-
104
+
51
105
  # Structural metrics (Lizard)
52
106
  total_nloc: int = 0
53
107
  total_complexity: int = 0
54
108
  max_complexity: int = 0
55
109
  methods_count: int = 0
56
-
110
+
111
+ # Method-level analysis (Phase 3.2)
112
+ methods: List[MethodMetrics] = field(default_factory=list)
113
+
114
+ # Source code access (Phase 3.3)
115
+ source_changes: List[SourceCodeChange] = field(default_factory=list)
116
+
117
+ # Energy metrics (Phase 2.2 - populated when energy_tracking=True)
118
+ energy_joules: Optional[float] = None
119
+ energy_watts_avg: Optional[float] = None
120
+
57
121
  def to_dict(self) -> Dict[str, Any]:
58
122
  # Convert to dictionary.
59
- return {
123
+ result = {
60
124
  "commit_hash": self.hash,
61
125
  "message": self.message,
62
126
  "author": self.author,
@@ -79,11 +143,23 @@ class CommitAnalysis:
79
143
  "methods_count": self.methods_count,
80
144
  }
81
145
 
146
+ if self.methods:
147
+ result["methods"] = [m.to_dict() for m in self.methods]
148
+
149
+ if self.source_changes:
150
+ result["source_changes"] = [s.to_dict() for s in self.source_changes]
151
+
152
+ if self.energy_joules is not None:
153
+ result["energy_joules"] = self.energy_joules
154
+ result["energy_watts_avg"] = self.energy_watts_avg
155
+
156
+ return result
157
+
82
158
 
83
159
  @dataclass
84
160
  class RepositoryAnalysis:
85
161
  # Complete analysis result for a repository.
86
-
162
+
87
163
  url: str
88
164
  name: str
89
165
  total_commits: int
@@ -91,10 +167,11 @@ class RepositoryAnalysis:
91
167
  green_commit_rate: float
92
168
  commits: List[CommitAnalysis] = field(default_factory=list)
93
169
  process_metrics: Dict[str, Any] = field(default_factory=dict)
94
-
170
+ energy_metrics: Optional[Dict[str, Any]] = None
171
+
95
172
  def to_dict(self) -> Dict[str, Any]:
96
173
  # Convert to dictionary.
97
- return {
174
+ result = {
98
175
  "url": self.url,
99
176
  "name": self.name,
100
177
  "total_commits": self.total_commits,
@@ -103,11 +180,15 @@ class RepositoryAnalysis:
103
180
  "commits": [c.to_dict() for c in self.commits],
104
181
  "process_metrics": self.process_metrics,
105
182
  }
183
+ if self.energy_metrics:
184
+ result["energy_metrics"] = self.energy_metrics
185
+ return result
106
186
 
107
187
 
108
188
  class LocalRepoAnalyzer:
109
189
  # Analyze repositories directly from GitHub URLs using PyDriller.
110
-
190
+ # Supports HTTPS URLs, SSH URLs, and private repositories.
191
+
111
192
  def __init__(
112
193
  self,
113
194
  clone_path: Optional[Path] = None,
@@ -116,8 +197,29 @@ class LocalRepoAnalyzer:
116
197
  skip_merges: bool = True,
117
198
  compute_process_metrics: bool = True,
118
199
  cleanup_after: bool = True,
200
+ ssh_key_path: Optional[str] = None,
201
+ github_token: Optional[str] = None,
202
+ energy_tracking: bool = False,
203
+ energy_backend: str = "rapl",
204
+ method_level_analysis: bool = False,
205
+ include_source_code: bool = False,
206
+ process_metrics: str = "standard",
119
207
  ):
120
208
  # Initialize the local repository analyzer.
209
+ # Args:
210
+ # clone_path: Directory to clone repos into
211
+ # max_commits: Maximum commits to analyze per repo
212
+ # days_back: How far back to analyze
213
+ # skip_merges: Skip merge commits
214
+ # compute_process_metrics: Compute PyDriller process metrics
215
+ # cleanup_after: Remove cloned repos after analysis
216
+ # ssh_key_path: Path to SSH private key for private repos
217
+ # github_token: GitHub token for private HTTPS repos
218
+ # energy_tracking: Enable automatic energy measurement
219
+ # energy_backend: Energy measurement backend (rapl, codecarbon)
220
+ # method_level_analysis: Extract per-method metrics via Lizard
221
+ # include_source_code: Include source code before/after in results
222
+ # process_metrics: "standard" or "full" PyDriller process metrics
121
223
  self.clone_path = clone_path or Path(tempfile.gettempdir()) / "greenmining_repos"
122
224
  self.clone_path.mkdir(parents=True, exist_ok=True)
123
225
  self.max_commits = max_commits
@@ -126,7 +228,53 @@ class LocalRepoAnalyzer:
126
228
  self.compute_process_metrics = compute_process_metrics
127
229
  self.cleanup_after = cleanup_after
128
230
  self.gsf_patterns = GSF_PATTERNS
129
-
231
+
232
+ # Phase 1.3: Private repository support
233
+ self.ssh_key_path = ssh_key_path
234
+ self.github_token = github_token
235
+
236
+ # Phase 2.2: Integrated energy tracking
237
+ self.energy_tracking = energy_tracking
238
+ self.energy_backend = energy_backend
239
+ self._energy_meter = None
240
+ if energy_tracking:
241
+ self._init_energy_meter()
242
+
243
+ # Phase 3.2: Method-level analysis
244
+ self.method_level_analysis = method_level_analysis
245
+
246
+ # Phase 3.3: Source code access
247
+ self.include_source_code = include_source_code
248
+
249
+ # Phase 3.1: Full process metrics mode
250
+ self.process_metrics_mode = process_metrics
251
+
252
+ def _init_energy_meter(self):
253
+ # Initialize the energy measurement backend.
254
+ try:
255
+ from greenmining.energy.base import get_energy_meter
256
+
257
+ self._energy_meter = get_energy_meter(self.energy_backend)
258
+ except Exception as e:
259
+ colored_print(f" Warning: Energy tracking unavailable: {e}", "yellow")
260
+ self.energy_tracking = False
261
+
262
+ def _prepare_auth_url(self, url: str) -> str:
263
+ # Prepare authenticated URL for private repositories.
264
+ if self.github_token and url.startswith("https://"):
265
+ # Inject token into HTTPS URL for private repo access
266
+ return url.replace("https://", f"https://x-access-token:{self.github_token}@")
267
+ return url
268
+
269
+ def _setup_ssh_env(self) -> Dict[str, str]:
270
+ # Set up SSH environment for private repository cloning.
271
+ env = os.environ.copy()
272
+ if self.ssh_key_path:
273
+ ssh_key = os.path.expanduser(self.ssh_key_path)
274
+ if os.path.exists(ssh_key):
275
+ env["GIT_SSH_COMMAND"] = f"ssh -i {ssh_key} -o StrictHostKeyChecking=no"
276
+ return env
277
+
130
278
  def _parse_repo_url(self, url: str) -> tuple[str, str]:
131
279
  # Parse repository URL to extract owner and name.
132
280
  # Handle HTTPS URLs
@@ -134,66 +282,111 @@ class LocalRepoAnalyzer:
134
282
  match = re.search(https_pattern, url)
135
283
  if match:
136
284
  return match.group(1), match.group(2).replace(".git", "")
137
-
285
+
138
286
  # Handle SSH URLs
139
287
  ssh_pattern = r"git@github\.com:([^/]+)/([^/\.]+)"
140
288
  match = re.search(ssh_pattern, url)
141
289
  if match:
142
290
  return match.group(1), match.group(2).replace(".git", "")
143
-
291
+
144
292
  raise ValueError(f"Could not parse GitHub URL: {url}")
145
-
293
+
146
294
  def _get_pattern_details(self, matched_patterns: List[str]) -> List[Dict[str, Any]]:
147
295
  # Get detailed pattern information.
148
296
  details = []
149
297
  for pattern_id, pattern in self.gsf_patterns.items():
150
298
  if pattern["name"] in matched_patterns:
151
- details.append({
152
- "name": pattern["name"],
153
- "category": pattern["category"],
154
- "description": pattern["description"],
155
- "sci_impact": pattern["sci_impact"],
156
- })
299
+ details.append(
300
+ {
301
+ "name": pattern["name"],
302
+ "category": pattern["category"],
303
+ "description": pattern["description"],
304
+ "sci_impact": pattern["sci_impact"],
305
+ }
306
+ )
157
307
  return details
158
-
308
+
309
+ def _extract_method_metrics(self, commit) -> List[MethodMetrics]:
310
+ # Extract per-method metrics from modified files using Lizard (via PyDriller).
311
+ methods = []
312
+ try:
313
+ for mod in commit.modified_files:
314
+ if mod.methods:
315
+ for method in mod.methods:
316
+ methods.append(
317
+ MethodMetrics(
318
+ name=method.name,
319
+ long_name=method.long_name,
320
+ filename=mod.filename,
321
+ nloc=method.nloc,
322
+ complexity=method.complexity,
323
+ token_count=method.token_count,
324
+ parameters=len(method.parameters),
325
+ start_line=method.start_line,
326
+ end_line=method.end_line,
327
+ )
328
+ )
329
+ except Exception:
330
+ pass
331
+ return methods
332
+
333
+ def _extract_source_changes(self, commit) -> List[SourceCodeChange]:
334
+ # Extract source code before/after for each modified file.
335
+ changes = []
336
+ try:
337
+ for mod in commit.modified_files:
338
+ change = SourceCodeChange(
339
+ filename=mod.filename,
340
+ source_code_before=mod.source_code_before if mod.source_code_before else None,
341
+ source_code_after=mod.source_code if mod.source_code else None,
342
+ diff=mod.diff if mod.diff else None,
343
+ added_lines=mod.added_lines,
344
+ deleted_lines=mod.deleted_lines,
345
+ change_type=mod.change_type.name if mod.change_type else "",
346
+ )
347
+ changes.append(change)
348
+ except Exception:
349
+ pass
350
+ return changes
351
+
159
352
  def analyze_commit(self, commit) -> CommitAnalysis:
160
353
  # Analyze a single PyDriller commit object.
161
354
  message = commit.msg or ""
162
-
355
+
163
356
  # Green awareness check
164
357
  green_aware = is_green_aware(message)
165
-
358
+
166
359
  # GSF pattern matching
167
360
  matched_patterns = get_pattern_by_keywords(message)
168
361
  pattern_details = self._get_pattern_details(matched_patterns)
169
-
362
+
170
363
  # Confidence calculation
171
364
  pattern_count = len(matched_patterns)
172
365
  confidence = "high" if pattern_count >= 2 else "medium" if pattern_count == 1 else "low"
173
-
366
+
174
367
  # File modifications
175
368
  files_modified = [mod.filename for mod in commit.modified_files]
176
369
  insertions = sum(mod.added_lines for mod in commit.modified_files)
177
370
  deletions = sum(mod.deleted_lines for mod in commit.modified_files)
178
-
371
+
179
372
  # Delta Maintainability Model (if available)
180
373
  dmm_unit_size = None
181
374
  dmm_unit_complexity = None
182
375
  dmm_unit_interfacing = None
183
-
376
+
184
377
  try:
185
378
  dmm_unit_size = commit.dmm_unit_size
186
379
  dmm_unit_complexity = commit.dmm_unit_complexity
187
380
  dmm_unit_interfacing = commit.dmm_unit_interfacing
188
381
  except Exception:
189
382
  pass # DMM may not be available for all commits
190
-
383
+
191
384
  # Structural metrics from Lizard (via PyDriller)
192
385
  total_nloc = 0
193
386
  total_complexity = 0
194
387
  max_complexity = 0
195
388
  methods_count = 0
196
-
389
+
197
390
  try:
198
391
  for mod in commit.modified_files:
199
392
  if mod.nloc:
@@ -206,7 +399,17 @@ class LocalRepoAnalyzer:
206
399
  methods_count += len(mod.methods)
207
400
  except Exception:
208
401
  pass # Structural metrics may fail for some files
209
-
402
+
403
+ # Phase 3.2: Method-level analysis
404
+ methods = []
405
+ if self.method_level_analysis:
406
+ methods = self._extract_method_metrics(commit)
407
+
408
+ # Phase 3.3: Source code access
409
+ source_changes = []
410
+ if self.include_source_code:
411
+ source_changes = self._extract_source_changes(commit)
412
+
210
413
  return CommitAnalysis(
211
414
  hash=commit.hash,
212
415
  message=message,
@@ -228,66 +431,93 @@ class LocalRepoAnalyzer:
228
431
  total_complexity=total_complexity,
229
432
  max_complexity=max_complexity,
230
433
  methods_count=methods_count,
434
+ methods=methods,
435
+ source_changes=source_changes,
231
436
  )
232
-
437
+
233
438
  def analyze_repository(self, url: str) -> RepositoryAnalysis:
234
439
  # Analyze a repository from its URL.
235
440
  owner, repo_name = self._parse_repo_url(url)
236
441
  full_name = f"{owner}/{repo_name}"
237
-
442
+
238
443
  colored_print(f"\n Analyzing repository: {full_name}", "cyan")
239
-
444
+
445
+ # Phase 1.3: Prepare authenticated URL for private repos
446
+ auth_url = self._prepare_auth_url(url)
447
+
240
448
  # Calculate date range
241
449
  since_date = datetime.now() - timedelta(days=self.days_back)
242
-
450
+
243
451
  # Configure PyDriller Repository
244
452
  repo_config = {
245
- "path_to_repo": url,
453
+ "path_to_repo": auth_url,
246
454
  "since": since_date,
247
455
  "only_no_merge": self.skip_merges,
248
456
  }
249
-
457
+
250
458
  # Clone to specific path if needed
251
459
  local_path = self.clone_path / repo_name
252
460
  if local_path.exists():
253
461
  shutil.rmtree(local_path)
254
-
462
+
255
463
  repo_config["clone_repo_to"] = str(self.clone_path)
256
-
464
+
257
465
  colored_print(f" Cloning to: {local_path}", "cyan")
258
-
466
+
467
+ # Phase 2.2: Start energy measurement if enabled
468
+ energy_result = None
469
+ if self.energy_tracking and self._energy_meter:
470
+ try:
471
+ self._energy_meter.start()
472
+ except Exception as e:
473
+ colored_print(f" Warning: Energy measurement start failed: {e}", "yellow")
474
+
259
475
  commits_analyzed = []
260
476
  commit_count = 0
261
-
477
+
262
478
  try:
263
479
  for commit in Repository(**repo_config).traverse_commits():
264
480
  if commit_count >= self.max_commits:
265
481
  break
266
-
482
+
267
483
  try:
268
484
  analysis = self.analyze_commit(commit)
269
485
  commits_analyzed.append(analysis)
270
486
  commit_count += 1
271
-
487
+
272
488
  if commit_count % 50 == 0:
273
489
  colored_print(f" Processed {commit_count} commits...", "cyan")
274
-
490
+
275
491
  except Exception as e:
276
- colored_print(f" Warning: Error analyzing commit {commit.hash[:8]}: {e}", "yellow")
492
+ colored_print(
493
+ f" Warning: Error analyzing commit {commit.hash[:8]}: {e}", "yellow"
494
+ )
277
495
  continue
278
-
496
+
279
497
  colored_print(f" Analyzed {len(commits_analyzed)} commits", "green")
280
-
498
+
499
+ # Phase 2.2: Stop energy measurement
500
+ if self.energy_tracking and self._energy_meter:
501
+ try:
502
+ energy_result = self._energy_meter.stop()
503
+ except Exception as e:
504
+ colored_print(f" Warning: Energy measurement stop failed: {e}", "yellow")
505
+
281
506
  # Compute process metrics if enabled
282
507
  process_metrics = {}
283
508
  if self.compute_process_metrics and local_path.exists():
284
509
  colored_print(" Computing process metrics...", "cyan")
285
510
  process_metrics = self._compute_process_metrics(str(local_path))
286
-
511
+
287
512
  # Calculate summary
288
513
  green_commits = sum(1 for c in commits_analyzed if c.green_aware)
289
514
  green_rate = green_commits / len(commits_analyzed) if commits_analyzed else 0
290
-
515
+
516
+ # Build energy metrics dict
517
+ energy_dict = None
518
+ if energy_result:
519
+ energy_dict = energy_result.to_dict()
520
+
291
521
  result = RepositoryAnalysis(
292
522
  url=url,
293
523
  name=full_name,
@@ -296,22 +526,23 @@ class LocalRepoAnalyzer:
296
526
  green_commit_rate=green_rate,
297
527
  commits=commits_analyzed,
298
528
  process_metrics=process_metrics,
529
+ energy_metrics=energy_dict,
299
530
  )
300
-
531
+
301
532
  return result
302
-
533
+
303
534
  finally:
304
535
  # Cleanup if requested
305
536
  if self.cleanup_after and local_path.exists():
306
537
  colored_print(f" Cleaning up: {local_path}", "cyan")
307
538
  shutil.rmtree(local_path, ignore_errors=True)
308
-
539
+
309
540
  def _compute_process_metrics(self, repo_path: str) -> Dict[str, Any]:
310
541
  # Compute PyDriller process metrics for the repository.
311
542
  metrics = {}
312
543
  since_date = datetime.now() - timedelta(days=self.days_back)
313
544
  to_date = datetime.now()
314
-
545
+
315
546
  try:
316
547
  # ChangeSet metrics
317
548
  cs = ChangeSet(repo_path, since=since_date, to=to_date)
@@ -319,62 +550,76 @@ class LocalRepoAnalyzer:
319
550
  metrics["change_set_avg"] = cs.avg()
320
551
  except Exception as e:
321
552
  colored_print(f" Warning: ChangeSet metrics failed: {e}", "yellow")
322
-
553
+
323
554
  try:
324
555
  # CodeChurn metrics
325
556
  churn = CodeChurn(repo_path, since=since_date, to=to_date)
326
557
  metrics["code_churn"] = churn.count()
327
558
  except Exception as e:
328
559
  colored_print(f" Warning: CodeChurn metrics failed: {e}", "yellow")
329
-
560
+
330
561
  try:
331
562
  # CommitsCount metrics
332
563
  cc = CommitsCount(repo_path, since=since_date, to=to_date)
333
564
  metrics["commits_per_file"] = cc.count()
334
565
  except Exception as e:
335
566
  colored_print(f" Warning: CommitsCount metrics failed: {e}", "yellow")
336
-
567
+
337
568
  try:
338
569
  # ContributorsCount metrics
339
570
  contrib = ContributorsCount(repo_path, since=since_date, to=to_date)
340
571
  metrics["contributors_per_file"] = contrib.count()
341
572
  except Exception as e:
342
573
  colored_print(f" Warning: ContributorsCount metrics failed: {e}", "yellow")
343
-
574
+
344
575
  try:
345
576
  # ContributorsExperience metrics
346
577
  exp = ContributorsExperience(repo_path, since=since_date, to=to_date)
347
578
  metrics["contributors_experience"] = exp.count()
348
579
  except Exception as e:
349
580
  colored_print(f" Warning: ContributorsExperience metrics failed: {e}", "yellow")
350
-
581
+
351
582
  try:
352
583
  # HistoryComplexity metrics
353
584
  hc = HistoryComplexity(repo_path, since=since_date, to=to_date)
354
585
  metrics["history_complexity"] = hc.count()
355
586
  except Exception as e:
356
587
  colored_print(f" Warning: HistoryComplexity metrics failed: {e}", "yellow")
357
-
588
+
358
589
  try:
359
590
  # HunksCount metrics
360
591
  hunks = HunksCount(repo_path, since=since_date, to=to_date)
361
592
  metrics["hunks_count"] = hunks.count()
362
593
  except Exception as e:
363
594
  colored_print(f" Warning: HunksCount metrics failed: {e}", "yellow")
364
-
595
+
365
596
  try:
366
597
  # LinesCount metrics
367
598
  lines = LinesCount(repo_path, since=since_date, to=to_date)
368
599
  metrics["lines_count"] = lines.count()
369
600
  except Exception as e:
370
601
  colored_print(f" Warning: LinesCount metrics failed: {e}", "yellow")
371
-
602
+
372
603
  return metrics
373
-
374
- def analyze_repositories(self, urls: List[str]) -> List[RepositoryAnalysis]:
604
+
605
+ def analyze_repositories(
606
+ self,
607
+ urls: List[str],
608
+ parallel_workers: int = 1,
609
+ output_format: str = "dict",
610
+ ) -> List[RepositoryAnalysis]:
375
611
  # Analyze multiple repositories from URLs.
612
+ # Args:
613
+ # urls: List of repository URLs to analyze
614
+ # parallel_workers: Number of concurrent workers (1 = sequential)
615
+ # output_format: Output format (dict, json, csv)
616
+ if parallel_workers <= 1:
617
+ return self._analyze_sequential(urls)
618
+ return self._analyze_parallel(urls, parallel_workers)
619
+
620
+ def _analyze_sequential(self, urls: List[str]) -> List[RepositoryAnalysis]:
621
+ # Analyze repositories sequentially.
376
622
  results = []
377
-
378
623
  for i, url in enumerate(urls, 1):
379
624
  colored_print(f"\n[{i}/{len(urls)}] Processing repository...", "cyan")
380
625
  try:
@@ -383,5 +628,22 @@ class LocalRepoAnalyzer:
383
628
  except Exception as e:
384
629
  colored_print(f" Error analyzing {url}: {e}", "red")
385
630
  continue
386
-
631
+ return results
632
+
633
+ def _analyze_parallel(self, urls: List[str], max_workers: int) -> List[RepositoryAnalysis]:
634
+ # Analyze repositories in parallel using thread pool.
635
+ results = []
636
+ colored_print(f"\n Analyzing {len(urls)} repositories with {max_workers} workers", "cyan")
637
+
638
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
639
+ future_to_url = {executor.submit(self.analyze_repository, url): url for url in urls}
640
+ for future in as_completed(future_to_url):
641
+ url = future_to_url[future]
642
+ try:
643
+ result = future.result()
644
+ results.append(result)
645
+ colored_print(f" Completed: {result.name}", "green")
646
+ except Exception as e:
647
+ colored_print(f" Error analyzing {url}: {e}", "red")
648
+
387
649
  return results
@@ -1,4 +1,5 @@
1
1
  # Report generation for green mining analysis.
2
+ """Report generation module for GreenMining analysis results."""
2
3
 
3
4
  from __future__ import annotations
4
5
 
@@ -228,12 +229,10 @@ Commits were analyzed using a keyword and heuristic-based classification framewo
228
229
  # Pattern descriptions
229
230
  pattern_details = []
230
231
  for i, pattern in enumerate(patterns[:10], 1):
231
- pattern_details.append(
232
- f"""**{i}. {pattern['pattern_name']}**
232
+ pattern_details.append(f"""**{i}. {pattern['pattern_name']}**
233
233
  - Frequency: {format_number(pattern['count'])} commits ({format_percentage(pattern['percentage'])})
234
234
  - Confidence Distribution: HIGH={conf['HIGH']}, MEDIUM={conf['MEDIUM']}, LOW={conf['LOW']}
235
- - Example Commits: {', '.join([c[:8] for c in pattern['example_commits'][:3]])}"""
236
- )
235
+ - Example Commits: {', '.join([c[:8] for c in pattern['example_commits'][:3]])}""")
237
236
 
238
237
  return f"""#### 2.2 Known Green Patterns & Tactics Applied
239
238
 
@@ -258,12 +257,10 @@ No novel microservice-specific green practices were automatically detected. Manu
258
257
 
259
258
  pattern_list = []
260
259
  for pattern in emergent:
261
- pattern_list.append(
262
- f"""**Pattern:** {pattern['pattern_name']}
260
+ pattern_list.append(f"""**Pattern:** {pattern['pattern_name']}
263
261
  - Occurrences: {pattern['count']}
264
262
  - Description: {pattern['description']}
265
- - Example Commits: {', '.join([c[:8] for c in pattern['example_commits'][:3]])}"""
266
- )
263
+ - Example Commits: {', '.join([c[:8] for c in pattern['example_commits'][:3]])}""")
267
264
 
268
265
  return f"""#### 2.3 Emerging Practices Discovered
269
266