greenmining 0.1.11__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
greenmining/cli.py CHANGED
@@ -29,8 +29,27 @@ def cli(config_file, verbose):
29
29
  @click.option(
30
30
  "--languages", default="Python,Java,Go,JavaScript,TypeScript", help="Comma-separated languages"
31
31
  )
32
- def fetch(max_repos, min_stars, languages):
33
- """Fetch top microservice repositories from GitHub."""
32
+ @click.option(
33
+ "--keywords",
34
+ default="microservices",
35
+ type=str,
36
+ help="Search keywords (e.g., 'kubernetes', 'docker', 'cloud-native')",
37
+ )
38
+ @click.option("--created-after", type=str, help="Repository created after (YYYY-MM-DD)")
39
+ @click.option("--created-before", type=str, help="Repository created before (YYYY-MM-DD)")
40
+ @click.option("--pushed-after", type=str, help="Repository pushed after (YYYY-MM-DD)")
41
+ @click.option("--pushed-before", type=str, help="Repository pushed before (YYYY-MM-DD)")
42
+ def fetch(
43
+ max_repos,
44
+ min_stars,
45
+ languages,
46
+ keywords,
47
+ created_after,
48
+ created_before,
49
+ pushed_after,
50
+ pushed_before,
51
+ ):
52
+ """Fetch repositories from GitHub based on custom search keywords."""
34
53
  presenter.show_banner()
35
54
  colored_print(f"\n🎯 Target: {max_repos} repositories\n", "cyan")
36
55
 
@@ -39,7 +58,14 @@ def fetch(max_repos, min_stars, languages):
39
58
 
40
59
  try:
41
60
  repositories = controller.fetch_repositories(
42
- max_repos=max_repos, min_stars=min_stars, languages=lang_list
61
+ max_repos=max_repos,
62
+ min_stars=min_stars,
63
+ languages=lang_list,
64
+ keywords=keywords,
65
+ created_after=created_after,
66
+ created_before=created_before,
67
+ pushed_after=pushed_after,
68
+ pushed_before=pushed_before,
43
69
  )
44
70
 
45
71
  # Show results
@@ -61,11 +87,12 @@ def fetch(max_repos, min_stars, languages):
61
87
  @click.option("--max-commits", default=50, type=int, help="Max commits per repository")
62
88
  @click.option("--skip-merges", is_flag=True, default=True, help="Skip merge commits")
63
89
  @click.option("--days-back", default=730, type=int, help="Days to look back (default: 2 years)")
64
- def extract(max_commits, skip_merges, days_back):
90
+ @click.option("--timeout", default=60, type=int, help="Timeout per repo in seconds (default: 60)")
91
+ def extract(max_commits, skip_merges, days_back, timeout):
65
92
  """Extract commits from fetched repositories."""
66
93
  presenter.show_banner()
67
94
 
68
- from services.commit_extractor import CommitExtractor
95
+ from greenmining.services.commit_extractor import CommitExtractor
69
96
 
70
97
  try:
71
98
  # Load repositories
@@ -80,14 +107,14 @@ def extract(max_commits, skip_merges, days_back):
80
107
 
81
108
  # Extract commits
82
109
  extractor = CommitExtractor(
83
- max_commits=max_commits, skip_merges=skip_merges, days_back=days_back
110
+ max_commits=max_commits, skip_merges=skip_merges, days_back=days_back, timeout=timeout
84
111
  )
85
112
  commits = extractor.extract_from_repositories(
86
113
  repositories=[r.to_dict() for r in repositories]
87
114
  )
88
115
 
89
116
  # Save commits
90
- from utils import save_json_file
117
+ from greenmining.utils import save_json_file
91
118
 
92
119
  save_json_file(commits, config.COMMITS_FILE)
93
120
  colored_print(f" Saved to: {config.COMMITS_FILE}", "cyan")
@@ -113,12 +140,15 @@ def extract(max_commits, skip_merges, days_back):
113
140
 
114
141
  @cli.command()
115
142
  @click.option("--batch-size", default=10, type=int, help="Batch size for processing")
116
- def analyze(batch_size):
143
+ @click.option("--enable-diff-analysis", is_flag=True, help="Enable code diff analysis (slower)")
144
+ @click.option("--enable-nlp", is_flag=True, help="Enable NLP-enhanced pattern detection")
145
+ @click.option("--enable-ml-features", is_flag=True, help="Enable ML feature extraction")
146
+ def analyze(batch_size, enable_diff_analysis, enable_nlp, enable_ml_features):
117
147
  """Analyze commits for green software patterns."""
118
148
  presenter.show_banner()
119
149
 
120
- from services.data_analyzer import DataAnalyzer
121
- from utils import save_json_file
150
+ from greenmining.services.data_analyzer import DataAnalyzer
151
+ from greenmining.utils import save_json_file
122
152
 
123
153
  try:
124
154
  # Load commits
@@ -127,12 +157,27 @@ def analyze(batch_size):
127
157
 
128
158
  commits = load_json_file(config.COMMITS_FILE)
129
159
  colored_print(f"\n🔬 Analyzing {len(commits)} commits for green patterns...\n", "cyan")
130
- colored_print(" Method: Keyword-based heuristic analysis\n", "cyan")
160
+
161
+ # Show enabled methods
162
+ methods = ["Keyword"]
163
+ if enable_diff_analysis:
164
+ methods.append("Code Diff")
165
+ if enable_nlp:
166
+ methods.append("NLP")
167
+ if enable_ml_features:
168
+ methods.append("ML Features")
169
+
170
+ colored_print(f" Methods: {' + '.join(methods)}\n", "cyan")
131
171
  colored_print(f" Batch size: {batch_size}\n", "cyan")
132
172
 
133
173
  # Analyze
134
- analyzer = DataAnalyzer()
135
- results = analyzer.analyze_commits_batch(commits, batch_size=batch_size)
174
+ analyzer = DataAnalyzer(
175
+ batch_size=batch_size,
176
+ enable_diff_analysis=enable_diff_analysis,
177
+ enable_nlp=enable_nlp,
178
+ enable_ml_features=enable_ml_features,
179
+ )
180
+ results = analyzer.analyze_commits(commits)
136
181
 
137
182
  # Save results
138
183
  save_json_file(results, config.ANALYSIS_FILE)
@@ -159,12 +204,20 @@ def analyze(batch_size):
159
204
 
160
205
 
161
206
  @cli.command()
162
- def aggregate():
207
+ @click.option("--enable-enhanced-stats", is_flag=True, help="Enable enhanced statistical analysis")
208
+ @click.option("--enable-temporal", is_flag=True, help="Enable temporal trend analysis")
209
+ @click.option(
210
+ "--temporal-granularity",
211
+ default="quarter",
212
+ type=click.Choice(["day", "week", "month", "quarter", "year"]),
213
+ help="Temporal analysis granularity",
214
+ )
215
+ def aggregate(enable_enhanced_stats, enable_temporal, temporal_granularity):
163
216
  """Aggregate analysis results and generate statistics."""
164
217
  presenter.show_banner()
165
218
 
166
- from services.data_aggregator import DataAggregator
167
- from utils import save_json_file
219
+ from greenmining.services.data_aggregator import DataAggregator
220
+ from greenmining.utils import save_json_file
168
221
 
169
222
  try:
170
223
  # Load data
@@ -176,8 +229,20 @@ def aggregate():
176
229
 
177
230
  colored_print(f"\n📊 Aggregating results from {len(results)} commits...\n", "cyan")
178
231
 
232
+ # Show enabled features
233
+ if enable_enhanced_stats:
234
+ colored_print(" Enhanced statistics: Enabled\n", "cyan")
235
+ if enable_temporal:
236
+ colored_print(
237
+ f" Temporal analysis: Enabled (granularity: {temporal_granularity})\n", "cyan"
238
+ )
239
+
179
240
  # Aggregate
180
- aggregator = DataAggregator()
241
+ aggregator = DataAggregator(
242
+ enable_enhanced_stats=enable_enhanced_stats,
243
+ enable_temporal=enable_temporal,
244
+ temporal_granularity=temporal_granularity,
245
+ )
181
246
  aggregated = aggregator.aggregate(results, repos)
182
247
 
183
248
  # Save
@@ -187,7 +252,15 @@ def aggregate():
187
252
  presenter.show_analysis_results(aggregated)
188
253
 
189
254
  if aggregated.get("known_patterns"):
190
- presenter.show_pattern_distribution(aggregated["known_patterns"], limit=10)
255
+ # Convert list format to dict format expected by presenter
256
+ patterns_dict = {}
257
+ for pattern in aggregated["known_patterns"]:
258
+ patterns_dict[pattern["pattern_name"]] = {
259
+ "count": pattern["count"],
260
+ "percentage": pattern["percentage"],
261
+ "confidence_distribution": pattern.get("confidence_breakdown", {}),
262
+ }
263
+ presenter.show_pattern_distribution(patterns_dict, limit=10)
191
264
 
192
265
  presenter.show_success(f"Aggregation complete! Results saved to {config.AGGREGATED_FILE}")
193
266
 
@@ -202,20 +275,48 @@ def report(output):
202
275
  """Generate comprehensive markdown report."""
203
276
  presenter.show_banner()
204
277
 
205
- from services.reports import ReportGenerator
278
+ from greenmining.services.reports import ReportGenerator
206
279
 
207
280
  try:
208
281
  # Load aggregated data
209
282
  if not config.AGGREGATED_FILE.exists():
210
283
  raise FileNotFoundError("No aggregated data found. Run 'aggregate' first.")
211
284
 
285
+ # Load analysis results
286
+ if not config.ANALYSIS_FILE.exists():
287
+ raise FileNotFoundError("No analysis results found. Run 'analyze' first.")
288
+
289
+ # Load repository data
290
+ if not config.REPOS_FILE.exists():
291
+ raise FileNotFoundError("No repository data found. Run 'fetch' first.")
292
+
212
293
  aggregated = load_json_file(config.AGGREGATED_FILE)
294
+ analysis_results = load_json_file(config.ANALYSIS_FILE)
295
+ repos_data = load_json_file(config.REPOS_FILE)
296
+
297
+ # Wrap analysis results if it's a list
298
+ if isinstance(analysis_results, list):
299
+ analysis = {"results": analysis_results, "total": len(analysis_results)}
300
+ else:
301
+ analysis = analysis_results
302
+
303
+ # Wrap repos data if it's a list
304
+ if isinstance(repos_data, list):
305
+ repos = {"repositories": repos_data, "total": len(repos_data)}
306
+ else:
307
+ repos = repos_data
213
308
 
214
309
  colored_print("\n📄 Generating comprehensive report...\n", "cyan")
215
310
 
216
311
  # Generate report
217
312
  generator = ReportGenerator()
218
- report_path = generator.generate_report(aggregated, output)
313
+ report_content = generator.generate_report(aggregated, analysis, repos)
314
+
315
+ # Save report
316
+ from pathlib import Path
317
+
318
+ report_path = Path(output)
319
+ report_path.write_text(report_content)
219
320
 
220
321
  presenter.show_success(f"Report generated: {report_path}")
221
322
  colored_print("\n📖 The report includes:", "cyan")
@@ -317,8 +418,8 @@ def pipeline(max_repos, skip_fetch):
317
418
 
318
419
  # Phase 2: Extract
319
420
  colored_print("\n[2/5] 📝 Extracting commits...", "cyan")
320
- from services.commit_extractor import CommitExtractor
321
- from utils import save_json_file
421
+ from greenmining.services.commit_extractor import CommitExtractor
422
+ from greenmining.utils import save_json_file
322
423
 
323
424
  controller = RepositoryController(config)
324
425
  repos = controller.load_repositories()
@@ -329,7 +430,7 @@ def pipeline(max_repos, skip_fetch):
329
430
 
330
431
  # Phase 3: Analyze
331
432
  colored_print("\n[3/5] 🔬 Analyzing commits...", "cyan")
332
- from services.data_analyzer import DataAnalyzer
433
+ from greenmining.services.data_analyzer import DataAnalyzer
333
434
 
334
435
  commits = load_json_file(config.COMMITS_FILE)
335
436
  analyzer = DataAnalyzer()
@@ -341,7 +442,7 @@ def pipeline(max_repos, skip_fetch):
341
442
 
342
443
  # Phase 4: Aggregate
343
444
  colored_print("\n[4/5] 📊 Aggregating results...", "cyan")
344
- from services.data_aggregator import DataAggregator
445
+ from greenmining.services.data_aggregator import DataAggregator
345
446
 
346
447
  aggregator = DataAggregator()
347
448
  aggregated = aggregator.aggregate(results, [r.to_dict() for r in repos])
@@ -349,7 +450,7 @@ def pipeline(max_repos, skip_fetch):
349
450
 
350
451
  # Phase 5: Report
351
452
  colored_print("\n[5/5] 📄 Generating report...", "cyan")
352
- from services.reports import ReportGenerator
453
+ from greenmining.services.reports import ReportGenerator
353
454
 
354
455
  generator = ReportGenerator()
355
456
  generator.generate_report(aggregated)
greenmining/config.py CHANGED
@@ -49,6 +49,27 @@ class Config:
49
49
  self.COMMITS_PER_REPO = int(os.getenv("COMMITS_PER_REPO", "50"))
50
50
  self.DAYS_BACK = int(os.getenv("DAYS_BACK", "730")) # 2 years
51
51
 
52
+ # Advanced Analyzer Configuration
53
+ self.ENABLE_NLP_ANALYSIS = os.getenv("ENABLE_NLP_ANALYSIS", "false").lower() == "true"
54
+ self.ENABLE_TEMPORAL_ANALYSIS = (
55
+ os.getenv("ENABLE_TEMPORAL_ANALYSIS", "false").lower() == "true"
56
+ )
57
+ self.TEMPORAL_GRANULARITY = os.getenv(
58
+ "TEMPORAL_GRANULARITY", "quarter"
59
+ ) # day, week, month, quarter, year
60
+ self.ENABLE_ML_FEATURES = os.getenv("ENABLE_ML_FEATURES", "false").lower() == "true"
61
+ self.VALIDATION_SAMPLE_SIZE = int(os.getenv("VALIDATION_SAMPLE_SIZE", "30"))
62
+
63
+ # Temporal Filtering (NEW)
64
+ self.CREATED_AFTER = os.getenv("CREATED_AFTER") # YYYY-MM-DD
65
+ self.CREATED_BEFORE = os.getenv("CREATED_BEFORE") # YYYY-MM-DD
66
+ self.PUSHED_AFTER = os.getenv("PUSHED_AFTER") # YYYY-MM-DD
67
+ self.PUSHED_BEFORE = os.getenv("PUSHED_BEFORE") # YYYY-MM-DD
68
+ self.COMMIT_DATE_FROM = os.getenv("COMMIT_DATE_FROM") # YYYY-MM-DD
69
+ self.COMMIT_DATE_TO = os.getenv("COMMIT_DATE_TO") # YYYY-MM-DD
70
+ self.MIN_COMMITS = int(os.getenv("MIN_COMMITS", "0"))
71
+ self.ACTIVITY_WINDOW_DAYS = int(os.getenv("ACTIVITY_WINDOW_DAYS", "730"))
72
+
52
73
  # Analysis Configuration
53
74
  self.BATCH_SIZE = int(os.getenv("BATCH_SIZE", "10"))
54
75
 
@@ -17,7 +17,15 @@ class RepositoryController:
17
17
  self.github = Github(config.GITHUB_TOKEN)
18
18
 
19
19
  def fetch_repositories(
20
- self, max_repos: int = None, min_stars: int = None, languages: list[str] = None
20
+ self,
21
+ max_repos: int = None,
22
+ min_stars: int = None,
23
+ languages: list[str] = None,
24
+ keywords: str = None,
25
+ created_after: str = None,
26
+ created_before: str = None,
27
+ pushed_after: str = None,
28
+ pushed_before: str = None,
21
29
  ) -> list[Repository]:
22
30
  """Fetch repositories from GitHub.
23
31
 
@@ -25,6 +33,11 @@ class RepositoryController:
25
33
  max_repos: Maximum number of repositories to fetch
26
34
  min_stars: Minimum stars filter
27
35
  languages: List of programming languages to filter
36
+ keywords: Custom search keywords (default: "microservices")
37
+ created_after: Repository created after date (YYYY-MM-DD)
38
+ created_before: Repository created before date (YYYY-MM-DD)
39
+ pushed_after: Repository pushed after date (YYYY-MM-DD)
40
+ pushed_before: Repository pushed before date (YYYY-MM-DD)
28
41
 
29
42
  Returns:
30
43
  List of Repository model instances
@@ -32,12 +45,23 @@ class RepositoryController:
32
45
  max_repos = max_repos or self.config.MAX_REPOS
33
46
  min_stars = min_stars or self.config.MIN_STARS
34
47
  languages = languages or self.config.SUPPORTED_LANGUAGES
48
+ keywords = keywords or "microservices"
35
49
 
36
50
  colored_print(f"🔍 Fetching up to {max_repos} repositories...", "cyan")
51
+ colored_print(f" Keywords: {keywords}", "cyan")
37
52
  colored_print(f" Filters: min_stars={min_stars}", "cyan")
38
53
 
39
- # Build search query - simpler approach
40
- query = f"microservices stars:>={min_stars}"
54
+ if created_after or created_before:
55
+ colored_print(
56
+ f" Created: {created_after or 'any'} to {created_before or 'any'}", "cyan"
57
+ )
58
+ if pushed_after or pushed_before:
59
+ colored_print(f" Pushed: {pushed_after or 'any'} to {pushed_before or 'any'}", "cyan")
60
+
61
+ # Build search query with temporal filters
62
+ query = self._build_temporal_query(
63
+ keywords, min_stars, created_after, created_before, pushed_after, pushed_before
64
+ )
41
65
 
42
66
  try:
43
67
  # Execute search
@@ -76,6 +100,37 @@ class RepositoryController:
76
100
  colored_print(f"❌ Error fetching repositories: {e}", "red")
77
101
  raise
78
102
 
103
+ def _build_temporal_query(
104
+ self,
105
+ keywords: str,
106
+ min_stars: int,
107
+ created_after: str = None,
108
+ created_before: str = None,
109
+ pushed_after: str = None,
110
+ pushed_before: str = None,
111
+ ) -> str:
112
+ """Build GitHub search query with temporal constraints."""
113
+ query_parts = [keywords, f"stars:>={min_stars}"]
114
+
115
+ # Temporal filters
116
+ if created_after and created_before:
117
+ query_parts.append(f"created:{created_after}..{created_before}")
118
+ elif created_after:
119
+ query_parts.append(f"created:>={created_after}")
120
+ elif created_before:
121
+ query_parts.append(f"created:<={created_before}")
122
+
123
+ if pushed_after and pushed_before:
124
+ query_parts.append(f"pushed:{pushed_after}..{pushed_before}")
125
+ elif pushed_after:
126
+ query_parts.append(f"pushed:>={pushed_after}")
127
+ elif pushed_before:
128
+ query_parts.append(f"pushed:<={pushed_before}")
129
+
130
+ query = " ".join(query_parts)
131
+ colored_print(f" Query: {query}", "cyan")
132
+ return query
133
+
79
134
  def load_repositories(self) -> list[Repository]:
80
135
  """Load repositories from file.
81
136
 
@@ -356,7 +356,12 @@ GSF_PATTERNS = {
356
356
  "compress_ml_models": {
357
357
  "name": "Compress ML Models for Inference",
358
358
  "category": "ai",
359
- "keywords": ["compress", "model", "quantiz", "prune", "distill"],
359
+ "keywords": [
360
+ "model compression",
361
+ "quantization",
362
+ "model pruning",
363
+ "knowledge distillation",
364
+ ],
360
365
  "description": "Reduce model size through quantization, pruning, distillation",
361
366
  "sci_impact": "Dramatically reduces inference energy and memory",
362
367
  },
@@ -370,14 +375,14 @@ GSF_PATTERNS = {
370
375
  "energy_efficient_ai_edge": {
371
376
  "name": "Energy Efficient AI at Edge",
372
377
  "category": "ai",
373
- "keywords": ["edge", "ai", "inference", "local", "device"],
378
+ "keywords": ["edge inference", "edge ai", "edge ml", "tflite", "onnx runtime"],
374
379
  "description": "Run inference on edge devices when possible",
375
380
  "sci_impact": "Eliminates network transfer, uses local compute",
376
381
  },
377
382
  "energy_efficient_framework": {
378
383
  "name": "Energy Efficient Framework",
379
384
  "category": "ai",
380
- "keywords": ["framework", "tensorflow", "pytorch", "efficient"],
385
+ "keywords": ["tensorflow", "pytorch", "onnx", "jax", "huggingface"],
381
386
  "description": "Choose ML frameworks optimized for efficiency",
382
387
  "sci_impact": "Different frameworks have different energy profiles",
383
388
  },
@@ -405,14 +410,14 @@ GSF_PATTERNS = {
405
410
  "right_hardware_ai": {
406
411
  "name": "Right Hardware Type for AI",
407
412
  "category": "ai",
408
- "keywords": ["hardware", "gpu", "tpu", "accelerator", "ai"],
413
+ "keywords": ["gpu training", "tpu", "cuda", "nvidia ai", "ml accelerator"],
409
414
  "description": "Use appropriate hardware (GPU/TPU) for AI workloads",
410
415
  "sci_impact": "Specialized hardware is more energy efficient",
411
416
  },
412
417
  "serverless_ml": {
413
418
  "name": "Serverless Model Development",
414
419
  "category": "ai",
415
- "keywords": ["serverless", "ml", "sagemaker", "vertex", "lambda"],
420
+ "keywords": ["sagemaker", "vertex ai", "azure ml", "lambda inference", "serverless ml"],
416
421
  "description": "Use serverless platforms for ML development",
417
422
  "sci_impact": "Pay-per-use, no idle resources",
418
423
  },
@@ -1,7 +1,9 @@
1
1
  """Aggregated Statistics Model - Represents aggregated analysis data."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from dataclasses import dataclass, field
4
- from typing import Optional
6
+ from typing import Dict, List, Optional
5
7
 
6
8
 
7
9
  @dataclass
@@ -1,6 +1,9 @@
1
1
  """Commit Model - Represents a Git commit."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from dataclasses import dataclass, field
6
+ from typing import List
4
7
 
5
8
 
6
9
  @dataclass
@@ -1,7 +1,9 @@
1
1
  """Repository Model - Represents a GitHub repository."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from dataclasses import dataclass, field
4
- from typing import Optional
6
+ from typing import List, Optional
5
7
 
6
8
 
7
9
  @dataclass
@@ -1,6 +1,8 @@
1
1
  """Console Presenter - Handles console output formatting."""
2
2
 
3
- from typing import Any
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Dict, List
4
6
 
5
7
  from tabulate import tabulate
6
8
 
@@ -1,9 +1,11 @@
1
1
  """Commit extractor for green microservices mining."""
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  import json
4
6
  from datetime import datetime, timedelta
5
7
  from pathlib import Path
6
- from typing import Any, Optional
8
+ from typing import Any, Dict, List, Optional
7
9
 
8
10
  import click
9
11
  from github import Github
@@ -29,6 +31,7 @@ class CommitExtractor:
29
31
  skip_merges: bool = True,
30
32
  days_back: int = 730,
31
33
  github_token: str | None = None,
34
+ timeout: int = 60,
32
35
  ):
33
36
  """Initialize commit extractor.
34
37
 
@@ -37,12 +40,14 @@ class CommitExtractor:
37
40
  skip_merges: Skip merge commits
38
41
  days_back: Only analyze commits from last N days
39
42
  github_token: GitHub API token (optional)
43
+ timeout: Timeout in seconds per repository (default: 60)
40
44
  """
41
45
  self.max_commits = max_commits
42
46
  self.skip_merges = skip_merges
43
47
  self.days_back = days_back
44
48
  self.cutoff_date = datetime.now() - timedelta(days=days_back)
45
49
  self.github = Github(github_token) if github_token else None
50
+ self.timeout = timeout
46
51
 
47
52
  def extract_from_repositories(self, repositories: list[dict[str, Any]]) -> list[dict[str, Any]]:
48
53
  """Extract commits from list of repositories.
@@ -62,14 +67,35 @@ class CommitExtractor:
62
67
  "cyan",
63
68
  )
64
69
 
70
+ import signal
71
+
72
+ def timeout_handler(signum, frame):
73
+ raise TimeoutError("Repository extraction timeout")
74
+
65
75
  with tqdm(total=len(repositories), desc="Processing repositories", unit="repo") as pbar:
66
76
  for repo in repositories:
67
77
  try:
78
+ # Set timeout alarm
79
+ signal.signal(signal.SIGALRM, timeout_handler)
80
+ signal.alarm(self.timeout)
81
+
68
82
  commits = self._extract_repo_commits(repo)
69
83
  all_commits.extend(commits)
84
+
85
+ # Cancel alarm
86
+ signal.alarm(0)
87
+
70
88
  pbar.set_postfix({"commits": len(all_commits), "failed": len(failed_repos)})
71
89
  pbar.update(1)
90
+ except TimeoutError:
91
+ signal.alarm(0) # Cancel alarm
92
+ colored_print(
93
+ f"\nTimeout processing {repo['full_name']} (>{self.timeout}s)", "yellow"
94
+ )
95
+ failed_repos.append(repo["full_name"])
96
+ pbar.update(1)
72
97
  except Exception as e:
98
+ signal.alarm(0) # Cancel alarm
73
99
  colored_print(f"\nError processing {repo['full_name']}: {e}", "yellow")
74
100
  failed_repos.append(repo["full_name"])
75
101
  pbar.update(1)