greenmining 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,20 @@
1
+ """Green Microservices Mining - GSF Pattern Analysis Tool."""
2
+
3
+ from greenmining.config import Config
4
+ from greenmining.gsf_patterns import (
5
+ GREEN_KEYWORDS,
6
+ GSF_PATTERNS,
7
+ get_pattern_by_keywords,
8
+ is_green_aware,
9
+ )
10
+
11
+ __version__ = "0.1.0"
12
+
13
+ __all__ = [
14
+ "Config",
15
+ "GSF_PATTERNS",
16
+ "GREEN_KEYWORDS",
17
+ "is_green_aware",
18
+ "get_pattern_by_keywords",
19
+ "__version__",
20
+ ]
@@ -0,0 +1,6 @@
1
+ """Allow running greenmining as a module: python -m greenmining"""
2
+
3
+ from greenmining.cli import cli
4
+
5
+ if __name__ == "__main__":
6
+ cli()
@@ -0,0 +1,3 @@
1
+ """Version information for greenmining."""
2
+
3
+ __version__ = "0.1.4"
greenmining/cli.py ADDED
@@ -0,0 +1,370 @@
1
+ """Green Microservices Mining CLI"""
2
+
3
+ import click
4
+
5
+ from greenmining.config import Config
6
+ from greenmining.controllers.repository_controller import RepositoryController
7
+ from greenmining.presenters.console_presenter import ConsolePresenter
8
+ from greenmining.utils import colored_print, load_json_file
9
+
10
+ # Initialize configuration
11
+ config = Config()
12
+
13
+ # Initialize presenter
14
+ presenter = ConsolePresenter()
15
+
16
+
17
+ @click.group()
18
+ @click.option("--config-file", default=".env", help="Path to configuration file")
19
+ @click.option("--verbose", is_flag=True, help="Enable verbose output")
20
+ def cli(config_file, verbose):
21
+ """Green Microservices Mining"""
22
+ if verbose:
23
+ config.VERBOSE = True
24
+
25
+
26
+ @cli.command()
27
+ @click.option("--max-repos", default=100, type=int, help="Maximum repositories to fetch")
28
+ @click.option("--min-stars", default=100, type=int, help="Minimum stars required")
29
+ @click.option(
30
+ "--languages", default="Python,Java,Go,JavaScript,TypeScript", help="Comma-separated languages"
31
+ )
32
+ def fetch(max_repos, min_stars, languages):
33
+ """Fetch top microservice repositories from GitHub."""
34
+ presenter.show_banner()
35
+ colored_print(f"\nšŸŽÆ Target: {max_repos} repositories\n", "cyan")
36
+
37
+ controller = RepositoryController(config)
38
+ lang_list = [lang.strip() for lang in languages.split(",")]
39
+
40
+ try:
41
+ repositories = controller.fetch_repositories(
42
+ max_repos=max_repos, min_stars=min_stars, languages=lang_list
43
+ )
44
+
45
+ # Show results
46
+ repo_dicts = [r.to_dict() for r in repositories]
47
+ presenter.show_repositories(repo_dicts, limit=10)
48
+
49
+ stats = controller.get_repository_stats(repositories)
50
+ colored_print(f"\nšŸ“Š Total Stars: {stats.get('total_stars', 0):,}", "green")
51
+ colored_print(f"šŸ“ˆ Average Stars: {stats.get('avg_stars', 0):.0f}", "green")
52
+
53
+ presenter.show_success(f"Fetched {len(repositories)} repositories successfully!")
54
+
55
+ except Exception as e:
56
+ presenter.show_error(str(e))
57
+ raise click.Abort() from e
58
+
59
+
60
+ @cli.command()
61
+ @click.option("--max-commits", default=50, type=int, help="Max commits per repository")
62
+ @click.option("--skip-merges", is_flag=True, default=True, help="Skip merge commits")
63
+ @click.option("--days-back", default=730, type=int, help="Days to look back (default: 2 years)")
64
+ def extract(max_commits, skip_merges, days_back):
65
+ """Extract commits from fetched repositories."""
66
+ presenter.show_banner()
67
+
68
+ from services.commit_extractor import CommitExtractor
69
+
70
+ try:
71
+ # Load repositories
72
+ controller = RepositoryController(config)
73
+ repositories = controller.load_repositories()
74
+
75
+ colored_print(f"\nšŸ“ Extracting commits from {len(repositories)} repositories...\n", "cyan")
76
+ colored_print(
77
+ f" Settings: max={max_commits}/repo, skip_merges={skip_merges}, days_back={days_back}\n",
78
+ "cyan",
79
+ )
80
+
81
+ # Extract commits
82
+ extractor = CommitExtractor(
83
+ max_commits=max_commits, skip_merges=skip_merges, days_back=days_back
84
+ )
85
+ commits = extractor.extract_from_repositories(
86
+ repositories=[r.to_dict() for r in repositories]
87
+ )
88
+
89
+ # Save commits
90
+ from utils import save_json_file
91
+
92
+ save_json_file(commits, config.COMMITS_FILE)
93
+ colored_print(f" Saved to: {config.COMMITS_FILE}", "cyan")
94
+
95
+ # Show stats
96
+ stats = {
97
+ "total_commits": len(commits),
98
+ "total_repos": len(repositories),
99
+ "avg_per_repo": len(commits) / len(repositories) if repositories else 0,
100
+ }
101
+
102
+ presenter.show_commit_stats(stats)
103
+ presenter.show_success(f"Extracted {len(commits)} commits successfully!")
104
+
105
+ except FileNotFoundError as e:
106
+ presenter.show_error(str(e))
107
+ colored_print("šŸ’” Run 'fetch' command first to get repositories", "yellow")
108
+ raise click.Abort() from e
109
+ except Exception as e:
110
+ presenter.show_error(str(e))
111
+ raise click.Abort() from e
112
+
113
+
114
+ @cli.command()
115
+ @click.option("--batch-size", default=10, type=int, help="Batch size for processing")
116
+ def analyze(batch_size):
117
+ """Analyze commits for green software patterns."""
118
+ presenter.show_banner()
119
+
120
+ from services.data_analyzer import DataAnalyzer
121
+ from utils import save_json_file
122
+
123
+ try:
124
+ # Load commits
125
+ if not config.COMMITS_FILE.exists():
126
+ raise FileNotFoundError("No commits file found. Run 'extract' first.")
127
+
128
+ commits = load_json_file(config.COMMITS_FILE)
129
+ colored_print(f"\nšŸ”¬ Analyzing {len(commits)} commits for green patterns...\n", "cyan")
130
+ colored_print(" Method: Keyword-based heuristic analysis\n", "cyan")
131
+ colored_print(f" Batch size: {batch_size}\n", "cyan")
132
+
133
+ # Analyze
134
+ analyzer = DataAnalyzer()
135
+ results = analyzer.analyze_commits_batch(commits, batch_size=batch_size)
136
+
137
+ # Save results
138
+ save_json_file(results, config.ANALYSIS_FILE)
139
+
140
+ # Show results
141
+ green_count = sum(1 for r in results if r.get("green_aware", False))
142
+ green_rate = (green_count / len(results)) if results else 0
143
+
144
+ results_dict = {
145
+ "summary": {
146
+ "total_commits": len(results),
147
+ "green_commits": green_count,
148
+ "green_commit_rate": green_rate,
149
+ },
150
+ "known_patterns": {},
151
+ }
152
+
153
+ presenter.show_analysis_results(results_dict)
154
+ presenter.show_success(f"Analysis complete! Results saved to {config.ANALYSIS_FILE}")
155
+
156
+ except Exception as e:
157
+ presenter.show_error(str(e))
158
+ raise click.Abort() from e
159
+
160
+
161
+ @cli.command()
162
+ def aggregate():
163
+ """Aggregate analysis results and generate statistics."""
164
+ presenter.show_banner()
165
+
166
+ from services.data_aggregator import DataAggregator
167
+ from utils import save_json_file
168
+
169
+ try:
170
+ # Load data
171
+ if not config.ANALYSIS_FILE.exists():
172
+ raise FileNotFoundError("No analysis file found. Run 'analyze' first.")
173
+
174
+ results = load_json_file(config.ANALYSIS_FILE)
175
+ repos = load_json_file(config.REPOS_FILE) if config.REPOS_FILE.exists() else []
176
+
177
+ colored_print(f"\nšŸ“Š Aggregating results from {len(results)} commits...\n", "cyan")
178
+
179
+ # Aggregate
180
+ aggregator = DataAggregator()
181
+ aggregated = aggregator.aggregate(results, repos)
182
+
183
+ # Save
184
+ save_json_file(aggregated, config.AGGREGATED_FILE)
185
+
186
+ # Show results
187
+ presenter.show_analysis_results(aggregated)
188
+
189
+ if aggregated.get("known_patterns"):
190
+ presenter.show_pattern_distribution(aggregated["known_patterns"], limit=10)
191
+
192
+ presenter.show_success(f"Aggregation complete! Results saved to {config.AGGREGATED_FILE}")
193
+
194
+ except Exception as e:
195
+ presenter.show_error(str(e))
196
+ raise click.Abort() from e
197
+
198
+
199
+ @cli.command()
200
+ @click.option("--output", default="green_microservices_analysis.md", help="Output filename")
201
+ def report(output):
202
+ """Generate comprehensive markdown report."""
203
+ presenter.show_banner()
204
+
205
+ from services.reports import ReportGenerator
206
+
207
+ try:
208
+ # Load aggregated data
209
+ if not config.AGGREGATED_FILE.exists():
210
+ raise FileNotFoundError("No aggregated data found. Run 'aggregate' first.")
211
+
212
+ aggregated = load_json_file(config.AGGREGATED_FILE)
213
+
214
+ colored_print("\nšŸ“„ Generating comprehensive report...\n", "cyan")
215
+
216
+ # Generate report
217
+ generator = ReportGenerator()
218
+ report_path = generator.generate_report(aggregated, output)
219
+
220
+ presenter.show_success(f"Report generated: {report_path}")
221
+ colored_print("\nšŸ“– The report includes:", "cyan")
222
+ colored_print(" • Executive Summary", "white")
223
+ colored_print(" • Methodology", "white")
224
+ colored_print(" • Results & Statistics", "white")
225
+ colored_print(" • Pattern Analysis", "white")
226
+ colored_print(" • Per-Repository Breakdown", "white")
227
+ colored_print(" • Discussion & Conclusions", "white")
228
+
229
+ except Exception as e:
230
+ presenter.show_error(str(e))
231
+ raise click.Abort() from e
232
+
233
+
234
+ @cli.command()
235
+ def status():
236
+ """Show current pipeline status."""
237
+ presenter.show_banner()
238
+
239
+ phases = {
240
+ "1. Fetch Repositories": {
241
+ "file": str(config.REPOS_FILE),
242
+ "completed": config.REPOS_FILE.exists(),
243
+ "size": (
244
+ f"{config.REPOS_FILE.stat().st_size / 1024:.1f} KB"
245
+ if config.REPOS_FILE.exists()
246
+ else "N/A"
247
+ ),
248
+ },
249
+ "2. Extract Commits": {
250
+ "file": str(config.COMMITS_FILE),
251
+ "completed": config.COMMITS_FILE.exists(),
252
+ "size": (
253
+ f"{config.COMMITS_FILE.stat().st_size / 1024:.1f} KB"
254
+ if config.COMMITS_FILE.exists()
255
+ else "N/A"
256
+ ),
257
+ },
258
+ "3. Analyze Commits": {
259
+ "file": str(config.ANALYSIS_FILE),
260
+ "completed": config.ANALYSIS_FILE.exists(),
261
+ "size": (
262
+ f"{config.ANALYSIS_FILE.stat().st_size / 1024:.1f} KB"
263
+ if config.ANALYSIS_FILE.exists()
264
+ else "N/A"
265
+ ),
266
+ },
267
+ "4. Aggregate Results": {
268
+ "file": str(config.AGGREGATED_FILE),
269
+ "completed": config.AGGREGATED_FILE.exists(),
270
+ "size": (
271
+ f"{config.AGGREGATED_FILE.stat().st_size / 1024:.1f} KB"
272
+ if config.AGGREGATED_FILE.exists()
273
+ else "N/A"
274
+ ),
275
+ },
276
+ "5. Generate Report": {
277
+ "file": str(config.REPORT_FILE),
278
+ "completed": config.REPORT_FILE.exists(),
279
+ "size": (
280
+ f"{config.REPORT_FILE.stat().st_size / 1024:.1f} KB"
281
+ if config.REPORT_FILE.exists()
282
+ else "N/A"
283
+ ),
284
+ },
285
+ }
286
+
287
+ presenter.show_pipeline_status(phases)
288
+
289
+ # Show next step
290
+ for phase_name, info in phases.items():
291
+ if not info["completed"]:
292
+ colored_print(f"\nšŸ’” Next step: {phase_name}", "yellow")
293
+ break
294
+ else:
295
+ colored_print("\nāœ… All phases complete!", "green")
296
+
297
+
298
+ @cli.command()
299
+ @click.option("--max-repos", default=100, type=int, help="Maximum repositories to analyze")
300
+ @click.option("--skip-fetch", is_flag=True, help="Skip fetch phase if data exists")
301
+ def pipeline(max_repos, skip_fetch):
302
+ """Run full pipeline: fetch → extract → analyze → aggregate → report."""
303
+ presenter.show_banner()
304
+
305
+ colored_print("\nšŸš€ Starting Full Pipeline...\n", "green")
306
+ colored_print(f" Target: {max_repos} repositories", "cyan")
307
+ colored_print(" Phases: fetch → extract → analyze → aggregate → report\n", "cyan")
308
+
309
+ try:
310
+ # Phase 1: Fetch
311
+ if not skip_fetch or not config.REPOS_FILE.exists():
312
+ colored_print("\n[1/5] šŸ” Fetching repositories...", "cyan")
313
+ controller = RepositoryController(config)
314
+ controller.fetch_repositories(max_repos=max_repos)
315
+ else:
316
+ colored_print("\n[1/5] ā­ļø Skipping fetch (using existing data)", "yellow")
317
+
318
+ # Phase 2: Extract
319
+ colored_print("\n[2/5] šŸ“ Extracting commits...", "cyan")
320
+ from services.commit_extractor import CommitExtractor
321
+ from utils import save_json_file
322
+
323
+ controller = RepositoryController(config)
324
+ repos = controller.load_repositories()
325
+ extractor = CommitExtractor()
326
+ commits = extractor.extract_from_repositories([r.to_dict() for r in repos])
327
+ save_json_file(commits, config.COMMITS_FILE)
328
+ colored_print(f" Saved {len(commits)} commits to: {config.COMMITS_FILE}", "green")
329
+
330
+ # Phase 3: Analyze
331
+ colored_print("\n[3/5] šŸ”¬ Analyzing commits...", "cyan")
332
+ from services.data_analyzer import DataAnalyzer
333
+
334
+ commits = load_json_file(config.COMMITS_FILE)
335
+ analyzer = DataAnalyzer()
336
+ results = analyzer.analyze_commits_batch(commits)
337
+ save_json_file(results, config.ANALYSIS_FILE)
338
+ colored_print(
339
+ f" Analyzed {len(results)} commits, saved to: {config.ANALYSIS_FILE}", "green"
340
+ )
341
+
342
+ # Phase 4: Aggregate
343
+ colored_print("\n[4/5] šŸ“Š Aggregating results...", "cyan")
344
+ from services.data_aggregator import DataAggregator
345
+
346
+ aggregator = DataAggregator()
347
+ aggregated = aggregator.aggregate(results, [r.to_dict() for r in repos])
348
+ save_json_file(aggregated, config.AGGREGATED_FILE)
349
+
350
+ # Phase 5: Report
351
+ colored_print("\n[5/5] šŸ“„ Generating report...", "cyan")
352
+ from services.reports import ReportGenerator
353
+
354
+ generator = ReportGenerator()
355
+ generator.generate_report(aggregated)
356
+
357
+ colored_print("\n" + "=" * 60, "green")
358
+ colored_print("āœ… Pipeline Complete!", "green")
359
+ colored_print("=" * 60, "green")
360
+
361
+ presenter.show_success(f"All results saved to {config.OUTPUT_DIR}")
362
+ colored_print(f"\nšŸ“– View report: {config.REPORT_FILE}", "cyan")
363
+
364
+ except Exception as e:
365
+ presenter.show_error(str(e))
366
+ raise click.Abort() from e
367
+
368
+
369
+ if __name__ == "__main__":
370
+ cli()
greenmining/config.py ADDED
@@ -0,0 +1,120 @@
1
+ """Configuration management for green microservices mining CLI."""
2
+
3
+ import os
4
+ from pathlib import Path
5
+
6
+ from dotenv import load_dotenv
7
+
8
+
9
+ class Config:
10
+ """Configuration class for loading and validating environment variables."""
11
+
12
+ def __init__(self, env_file: str = ".env"):
13
+ """Initialize configuration from environment file.
14
+
15
+ Args:
16
+ env_file: Path to .env file
17
+ """
18
+ # Load environment variables
19
+ env_path = Path(env_file)
20
+ if env_path.exists():
21
+ load_dotenv(env_path)
22
+ else:
23
+ load_dotenv() # Load from system environment
24
+
25
+ # GitHub API Configuration
26
+ self.GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
27
+ if not self.GITHUB_TOKEN or self.GITHUB_TOKEN == "your_github_pat_here":
28
+ raise ValueError("GITHUB_TOKEN not set. Please set it in .env file or environment.")
29
+
30
+ # Analysis Type - Using GitHub Copilot for AI-powered analysis
31
+ self.ANALYSIS_TYPE = "keyword_heuristic"
32
+
33
+ # Search and Processing Configuration
34
+ self.GITHUB_SEARCH_KEYWORDS = ["microservices", "microservice-architecture", "cloud-native"]
35
+
36
+ self.SUPPORTED_LANGUAGES = [
37
+ "Java",
38
+ "Python",
39
+ "Go",
40
+ "JavaScript",
41
+ "TypeScript",
42
+ "C#",
43
+ "Rust",
44
+ ]
45
+
46
+ # Repository and Commit Limits
47
+ self.MIN_STARS = int(os.getenv("MIN_STARS", "100"))
48
+ self.MAX_REPOS = int(os.getenv("MAX_REPOS", "100"))
49
+ self.COMMITS_PER_REPO = int(os.getenv("COMMITS_PER_REPO", "50"))
50
+ self.DAYS_BACK = int(os.getenv("DAYS_BACK", "730")) # 2 years
51
+
52
+ # Analysis Configuration
53
+ self.BATCH_SIZE = int(os.getenv("BATCH_SIZE", "10"))
54
+
55
+ # Processing Configuration
56
+ self.TIMEOUT_SECONDS = int(os.getenv("TIMEOUT_SECONDS", "30"))
57
+ self.MAX_RETRIES = int(os.getenv("MAX_RETRIES", "3"))
58
+ self.RETRY_DELAY = 2 # seconds
59
+ self.EXPONENTIAL_BACKOFF = True
60
+
61
+ # Output Configuration
62
+ self.OUTPUT_DIR = Path(os.getenv("OUTPUT_DIR", "./data"))
63
+ self.OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
64
+
65
+ # File Paths
66
+ self.REPOS_FILE = self.OUTPUT_DIR / "repositories.json"
67
+ self.COMMITS_FILE = self.OUTPUT_DIR / "commits.json"
68
+ self.ANALYSIS_FILE = self.OUTPUT_DIR / "analysis_results.json"
69
+ self.AGGREGATED_FILE = self.OUTPUT_DIR / "aggregated_statistics.json"
70
+ self.CSV_FILE = self.OUTPUT_DIR / "green_analysis_results.csv"
71
+ self.REPORT_FILE = self.OUTPUT_DIR / "green_microservices_analysis.md"
72
+ self.CHECKPOINT_FILE = self.OUTPUT_DIR / "checkpoint.json"
73
+
74
+ # Logging
75
+ self.VERBOSE = os.getenv("VERBOSE", "false").lower() == "true"
76
+ self.LOG_FILE = self.OUTPUT_DIR / "mining.log"
77
+
78
+ def validate(self) -> bool:
79
+ """Validate that all required configuration is present.
80
+
81
+ Returns:
82
+ True if configuration is valid
83
+ """
84
+ required_attrs = ["GITHUB_TOKEN", "CLAUDE_API_KEY", "MAX_REPOS", "COMMITS_PER_REPO"]
85
+
86
+ for attr in required_attrs:
87
+ if not getattr(self, attr, None):
88
+ raise ValueError(f"Missing required configuration: {attr}")
89
+
90
+ return True
91
+
92
+ def __repr__(self) -> str:
93
+ """String representation of configuration (hiding sensitive data)."""
94
+ return (
95
+ f"Config("
96
+ f"MAX_REPOS={self.MAX_REPOS}, "
97
+ f"COMMITS_PER_REPO={self.COMMITS_PER_REPO}, "
98
+ f"BATCH_SIZE={self.BATCH_SIZE}, "
99
+ f"OUTPUT_DIR={self.OUTPUT_DIR}"
100
+ f")"
101
+ )
102
+
103
+
104
+ # Global config instance
105
+ _config_instance = None
106
+
107
+
108
+ def get_config(env_file: str = ".env") -> Config:
109
+ """Get or create global configuration instance.
110
+
111
+ Args:
112
+ env_file: Path to .env file
113
+
114
+ Returns:
115
+ Config instance
116
+ """
117
+ global _config_instance
118
+ if _config_instance is None:
119
+ _config_instance = Config(env_file)
120
+ return _config_instance
@@ -0,0 +1,11 @@
1
+ """
2
+ Controllers Package - Business logic and orchestration for mining operations.
3
+
4
+ Controllers coordinate between models, services, and presenters following MCP architecture.
5
+ """
6
+
7
+ from .repository_controller import RepositoryController
8
+
9
+ __all__ = [
10
+ "RepositoryController",
11
+ ]
@@ -0,0 +1,117 @@
1
+ """Repository Controller - Handles repository fetching operations."""
2
+
3
+ from github import Github, GithubException
4
+ from tqdm import tqdm
5
+
6
+ from greenmining.config import Config
7
+ from greenmining.models.repository import Repository
8
+ from greenmining.utils import colored_print, load_json_file, save_json_file
9
+
10
+
11
+ class RepositoryController:
12
+ """Controller for GitHub repository operations."""
13
+
14
+ def __init__(self, config: Config):
15
+ """Initialize controller with configuration."""
16
+ self.config = config
17
+ self.github = Github(config.GITHUB_TOKEN)
18
+
19
+ def fetch_repositories(
20
+ self, max_repos: int = None, min_stars: int = None, languages: list[str] = None
21
+ ) -> list[Repository]:
22
+ """Fetch repositories from GitHub.
23
+
24
+ Args:
25
+ max_repos: Maximum number of repositories to fetch
26
+ min_stars: Minimum stars filter
27
+ languages: List of programming languages to filter
28
+
29
+ Returns:
30
+ List of Repository model instances
31
+ """
32
+ max_repos = max_repos or self.config.MAX_REPOS
33
+ min_stars = min_stars or self.config.MIN_STARS
34
+ languages = languages or self.config.SUPPORTED_LANGUAGES
35
+
36
+ colored_print(f"šŸ” Fetching up to {max_repos} repositories...", "cyan")
37
+ colored_print(f" Filters: min_stars={min_stars}", "cyan")
38
+
39
+ # Build search query - simpler approach
40
+ query = f"microservices stars:>={min_stars}"
41
+
42
+ try:
43
+ # Execute search
44
+ search_results = self.github.search_repositories(
45
+ query=query, sort="stars", order="desc"
46
+ )
47
+
48
+ total_found = search_results.totalCount
49
+ colored_print(f" Found {total_found} repositories", "green")
50
+
51
+ # Fetch repositories
52
+ repositories = []
53
+ with tqdm(total=min(max_repos, total_found), desc="Fetching", unit="repo") as pbar:
54
+ for idx, repo in enumerate(search_results):
55
+ if idx >= max_repos:
56
+ break
57
+
58
+ try:
59
+ repo_model = Repository.from_github_repo(repo, idx + 1)
60
+ repositories.append(repo_model)
61
+ pbar.update(1)
62
+ except GithubException as e:
63
+ colored_print(f" Error: {repo.full_name}: {e}", "yellow")
64
+ continue
65
+
66
+ # Save to file
67
+ repo_dicts = [r.to_dict() for r in repositories]
68
+ save_json_file(repo_dicts, self.config.REPOS_FILE)
69
+
70
+ colored_print(f"āœ… Fetched {len(repositories)} repositories", "green")
71
+ colored_print(f" Saved to: {self.config.REPOS_FILE}", "cyan")
72
+
73
+ return repositories
74
+
75
+ except Exception as e:
76
+ colored_print(f"āŒ Error fetching repositories: {e}", "red")
77
+ raise
78
+
79
+ def load_repositories(self) -> list[Repository]:
80
+ """Load repositories from file.
81
+
82
+ Returns:
83
+ List of Repository model instances
84
+ """
85
+ if not self.config.REPOS_FILE.exists():
86
+ raise FileNotFoundError(f"No repositories file found at {self.config.REPOS_FILE}")
87
+
88
+ repo_dicts = load_json_file(self.config.REPOS_FILE)
89
+ return [Repository.from_dict(r) for r in repo_dicts]
90
+
91
+ def get_repository_stats(self, repositories: list[Repository]) -> dict:
92
+ """Get statistics about fetched repositories.
93
+
94
+ Args:
95
+ repositories: List of Repository instances
96
+
97
+ Returns:
98
+ Dictionary with statistics
99
+ """
100
+ if not repositories:
101
+ return {}
102
+
103
+ return {
104
+ "total": len(repositories),
105
+ "by_language": self._count_by_language(repositories),
106
+ "total_stars": sum(r.stars for r in repositories),
107
+ "avg_stars": sum(r.stars for r in repositories) / len(repositories),
108
+ "top_repo": max(repositories, key=lambda r: r.stars).full_name,
109
+ }
110
+
111
+ def _count_by_language(self, repositories: list[Repository]) -> dict:
112
+ """Count repositories by language."""
113
+ counts = {}
114
+ for repo in repositories:
115
+ lang = repo.language or "Unknown"
116
+ counts[lang] = counts.get(lang, 0) + 1
117
+ return counts