greenmining 1.0.3__py3-none-any.whl → 1.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- greenmining/__init__.py +11 -29
- greenmining/__main__.py +9 -3
- greenmining/__version__.py +2 -2
- greenmining/analyzers/__init__.py +3 -7
- greenmining/analyzers/code_diff_analyzer.py +151 -61
- greenmining/analyzers/qualitative_analyzer.py +15 -81
- greenmining/analyzers/statistical_analyzer.py +8 -69
- greenmining/analyzers/temporal_analyzer.py +16 -72
- greenmining/config.py +105 -58
- greenmining/controllers/__init__.py +1 -5
- greenmining/controllers/repository_controller.py +153 -94
- greenmining/energy/__init__.py +13 -0
- greenmining/energy/base.py +165 -0
- greenmining/energy/codecarbon_meter.py +146 -0
- greenmining/energy/rapl.py +157 -0
- greenmining/gsf_patterns.py +4 -26
- greenmining/models/__init__.py +1 -5
- greenmining/models/aggregated_stats.py +4 -4
- greenmining/models/analysis_result.py +4 -4
- greenmining/models/commit.py +5 -5
- greenmining/models/repository.py +5 -5
- greenmining/presenters/__init__.py +1 -5
- greenmining/presenters/console_presenter.py +24 -24
- greenmining/services/__init__.py +10 -6
- greenmining/services/commit_extractor.py +8 -152
- greenmining/services/data_aggregator.py +45 -175
- greenmining/services/data_analyzer.py +9 -202
- greenmining/services/github_fetcher.py +212 -323
- greenmining/services/github_graphql_fetcher.py +371 -0
- greenmining/services/local_repo_analyzer.py +387 -0
- greenmining/services/reports.py +33 -137
- greenmining/utils.py +21 -149
- {greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/METADATA +61 -151
- greenmining-1.0.4.dist-info/RECORD +37 -0
- {greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/WHEEL +1 -1
- greenmining/analyzers/ml_feature_extractor.py +0 -512
- greenmining/analyzers/nlp_analyzer.py +0 -365
- greenmining/cli.py +0 -471
- greenmining/main.py +0 -37
- greenmining-1.0.3.dist-info/RECORD +0 -36
- greenmining-1.0.3.dist-info/entry_points.txt +0 -2
- {greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/licenses/LICENSE +0 -0
- {greenmining-1.0.3.dist-info → greenmining-1.0.4.dist-info}/top_level.txt +0 -0
greenmining/services/reports.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
# Report generation for green mining analysis.
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
@@ -7,8 +7,6 @@ from datetime import datetime
|
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from typing import Any, Dict, Optional
|
|
9
9
|
|
|
10
|
-
import click
|
|
11
|
-
|
|
12
10
|
from greenmining.config import get_config
|
|
13
11
|
from greenmining.utils import (
|
|
14
12
|
colored_print,
|
|
@@ -20,10 +18,10 @@ from greenmining.utils import (
|
|
|
20
18
|
|
|
21
19
|
|
|
22
20
|
class ReportGenerator:
|
|
23
|
-
|
|
21
|
+
# Generates markdown report from aggregated statistics.
|
|
24
22
|
|
|
25
23
|
def __init__(self):
|
|
26
|
-
|
|
24
|
+
# Initialize report generator.
|
|
27
25
|
pass
|
|
28
26
|
|
|
29
27
|
def generate_report(
|
|
@@ -32,16 +30,7 @@ class ReportGenerator:
|
|
|
32
30
|
analysis_data: dict[str, Any],
|
|
33
31
|
repos_data: dict[str, Any],
|
|
34
32
|
) -> str:
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
Args:
|
|
38
|
-
aggregated_data: Aggregated statistics
|
|
39
|
-
analysis_data: Original analysis results
|
|
40
|
-
repos_data: Repository metadata
|
|
41
|
-
|
|
42
|
-
Returns:
|
|
43
|
-
Markdown report content
|
|
44
|
-
"""
|
|
33
|
+
# Generate comprehensive markdown report.
|
|
45
34
|
report_sections = []
|
|
46
35
|
|
|
47
36
|
# Title and metadata
|
|
@@ -68,7 +57,7 @@ class ReportGenerator:
|
|
|
68
57
|
return "\n\n".join(report_sections)
|
|
69
58
|
|
|
70
59
|
def _generate_header(self) -> str:
|
|
71
|
-
|
|
60
|
+
# Generate report header.
|
|
72
61
|
return f"""# Mining Software Repositories for Green Microservices
|
|
73
62
|
## Comprehensive Analysis Report
|
|
74
63
|
|
|
@@ -78,7 +67,7 @@ class ReportGenerator:
|
|
|
78
67
|
---"""
|
|
79
68
|
|
|
80
69
|
def _generate_executive_summary(self, data: dict[str, Any]) -> str:
|
|
81
|
-
|
|
70
|
+
# Generate executive summary.
|
|
82
71
|
summary = data["summary"]
|
|
83
72
|
top_patterns = data["known_patterns"][:3] if data["known_patterns"] else []
|
|
84
73
|
|
|
@@ -106,7 +95,7 @@ These findings suggest that while green software practices are present in micros
|
|
|
106
95
|
def _generate_methodology(
|
|
107
96
|
self, repos_data: dict[str, Any], analysis_data: dict[str, Any]
|
|
108
97
|
) -> str:
|
|
109
|
-
|
|
98
|
+
# Generate methodology section.
|
|
110
99
|
metadata = repos_data.get("metadata", {})
|
|
111
100
|
analysis_metadata = analysis_data.get("metadata", {})
|
|
112
101
|
|
|
@@ -167,7 +156,7 @@ Commits were analyzed using a keyword and heuristic-based classification framewo
|
|
|
167
156
|
- 2-year time window may not capture all historical practices"""
|
|
168
157
|
|
|
169
158
|
def _generate_results(self, data: dict[str, Any]) -> str:
|
|
170
|
-
|
|
159
|
+
# Generate results section.
|
|
171
160
|
sections = []
|
|
172
161
|
|
|
173
162
|
# 2.1 Green Awareness
|
|
@@ -182,15 +171,15 @@ Commits were analyzed using a keyword and heuristic-based classification framewo
|
|
|
182
171
|
# 2.4 Per-Repository Analysis
|
|
183
172
|
sections.append(self._generate_repo_analysis_section(data))
|
|
184
173
|
|
|
185
|
-
# 2.5
|
|
186
|
-
|
|
187
|
-
if
|
|
188
|
-
sections.append(
|
|
174
|
+
# 2.5 Statistics (if available)
|
|
175
|
+
stats_section = self._generate_statistics_section(data)
|
|
176
|
+
if stats_section:
|
|
177
|
+
sections.append(stats_section)
|
|
189
178
|
|
|
190
179
|
return "### 2. Results\n\n" + "\n\n".join(sections)
|
|
191
180
|
|
|
192
181
|
def _generate_green_awareness_section(self, data: dict[str, Any]) -> str:
|
|
193
|
-
|
|
182
|
+
# Generate green awareness subsection.
|
|
194
183
|
summary = data["summary"]
|
|
195
184
|
per_lang = data["per_language_stats"]
|
|
196
185
|
per_repo = data["per_repo_stats"]
|
|
@@ -219,7 +208,7 @@ Commits were analyzed using a keyword and heuristic-based classification framewo
|
|
|
219
208
|
{lang_table}"""
|
|
220
209
|
|
|
221
210
|
def _generate_known_patterns_section(self, data: dict[str, Any]) -> str:
|
|
222
|
-
|
|
211
|
+
# Generate known patterns subsection.
|
|
223
212
|
patterns = data["known_patterns"]
|
|
224
213
|
|
|
225
214
|
if not patterns:
|
|
@@ -259,7 +248,7 @@ The following table summarizes the known green software patterns detected in the
|
|
|
259
248
|
{chr(10).join(pattern_details)}"""
|
|
260
249
|
|
|
261
250
|
def _generate_emergent_patterns_section(self, data: dict[str, Any]) -> str:
|
|
262
|
-
|
|
251
|
+
# Generate emergent patterns subsection.
|
|
263
252
|
emergent = data["emergent_patterns"]
|
|
264
253
|
|
|
265
254
|
if not emergent:
|
|
@@ -281,7 +270,7 @@ No novel microservice-specific green practices were automatically detected. Manu
|
|
|
281
270
|
{chr(10).join(pattern_list)}"""
|
|
282
271
|
|
|
283
272
|
def _generate_repo_analysis_section(self, data: dict[str, Any]) -> str:
|
|
284
|
-
|
|
273
|
+
# Generate per-repository analysis subsection.
|
|
285
274
|
per_repo = data["per_repo_stats"]
|
|
286
275
|
|
|
287
276
|
# Top 10 greenest
|
|
@@ -307,37 +296,30 @@ No novel microservice-specific green practices were automatically detected. Manu
|
|
|
307
296
|
|
|
308
297
|
**Repositories with No Green Mentions:** {no_green_count} out of {len(per_repo)} repositories had zero green-aware commits."""
|
|
309
298
|
|
|
310
|
-
def
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
Args:
|
|
314
|
-
data: Aggregated data containing enhanced_statistics field
|
|
315
|
-
|
|
316
|
-
Returns:
|
|
317
|
-
Markdown section with enhanced statistics
|
|
318
|
-
"""
|
|
319
|
-
enhanced_stats = data.get("enhanced_statistics")
|
|
299
|
+
def _generate_statistics_section(self, data: dict[str, Any]) -> str:
|
|
300
|
+
# Generate statistical analysis subsection.
|
|
301
|
+
stats = data.get("statistics")
|
|
320
302
|
|
|
321
|
-
if not
|
|
303
|
+
if not stats:
|
|
322
304
|
return ""
|
|
323
305
|
|
|
324
306
|
# Handle error case
|
|
325
|
-
if "error" in
|
|
326
|
-
return f"""#### 2.5
|
|
307
|
+
if "error" in stats:
|
|
308
|
+
return f"""#### 2.5 Statistical Analysis
|
|
327
309
|
|
|
328
|
-
**Note:**
|
|
310
|
+
**Note:** Statistical analysis encountered an error: {stats['error']}
|
|
329
311
|
"""
|
|
330
312
|
|
|
331
313
|
sections = []
|
|
332
|
-
sections.append("#### 2.5
|
|
314
|
+
sections.append("#### 2.5 Statistical Analysis")
|
|
333
315
|
sections.append("")
|
|
334
316
|
sections.append(
|
|
335
|
-
"This section presents
|
|
317
|
+
"This section presents statistical analyses of green software engineering patterns."
|
|
336
318
|
)
|
|
337
319
|
sections.append("")
|
|
338
320
|
|
|
339
321
|
# Temporal trends
|
|
340
|
-
temporal =
|
|
322
|
+
temporal = stats.get("temporal_trends", {})
|
|
341
323
|
if temporal and "error" not in temporal:
|
|
342
324
|
sections.append("##### Temporal Trends")
|
|
343
325
|
sections.append("")
|
|
@@ -362,7 +344,7 @@ No novel microservice-specific green practices were automatically detected. Manu
|
|
|
362
344
|
sections.append("")
|
|
363
345
|
|
|
364
346
|
# Pattern correlations
|
|
365
|
-
correlations =
|
|
347
|
+
correlations = stats.get("pattern_correlations", {})
|
|
366
348
|
if correlations and "error" not in correlations:
|
|
367
349
|
sections.append("##### Pattern Correlations")
|
|
368
350
|
sections.append("")
|
|
@@ -383,7 +365,7 @@ No novel microservice-specific green practices were automatically detected. Manu
|
|
|
383
365
|
sections.append("")
|
|
384
366
|
|
|
385
367
|
# Effect sizes
|
|
386
|
-
effect_sizes =
|
|
368
|
+
effect_sizes = stats.get("effect_size", {})
|
|
387
369
|
if effect_sizes and "error" not in effect_sizes:
|
|
388
370
|
sections.append("##### Effect Size Analysis")
|
|
389
371
|
sections.append("")
|
|
@@ -398,7 +380,7 @@ No novel microservice-specific green practices were automatically detected. Manu
|
|
|
398
380
|
sections.append("")
|
|
399
381
|
|
|
400
382
|
# Descriptive statistics
|
|
401
|
-
descriptive =
|
|
383
|
+
descriptive = stats.get("descriptive", {})
|
|
402
384
|
if descriptive and "error" not in descriptive:
|
|
403
385
|
sections.append("##### Descriptive Statistics")
|
|
404
386
|
sections.append("")
|
|
@@ -422,7 +404,7 @@ No novel microservice-specific green practices were automatically detected. Manu
|
|
|
422
404
|
return "\n".join(sections)
|
|
423
405
|
|
|
424
406
|
def _generate_discussion(self, data: dict[str, Any]) -> str:
|
|
425
|
-
|
|
407
|
+
# Generate discussion section.
|
|
426
408
|
summary = data["summary"]
|
|
427
409
|
green_pct = summary["green_aware_percentage"]
|
|
428
410
|
|
|
@@ -473,7 +455,7 @@ Based on the detected patterns, microservice developers primarily focus on:
|
|
|
473
455
|
4. **Best practices dissemination:** Green microservices patterns should be documented and promoted in the community"""
|
|
474
456
|
|
|
475
457
|
def _generate_limitations(self) -> str:
|
|
476
|
-
|
|
458
|
+
# Generate limitations section.
|
|
477
459
|
return """### 4. Limitations
|
|
478
460
|
|
|
479
461
|
#### 4.1 Sample Size and Selection Bias
|
|
@@ -505,7 +487,7 @@ Based on the detected patterns, microservice developers primarily focus on:
|
|
|
505
487
|
5. **Energy measurement:** Correlate detected patterns with actual energy consumption data"""
|
|
506
488
|
|
|
507
489
|
def _generate_conclusion(self, data: dict[str, Any]) -> str:
|
|
508
|
-
|
|
490
|
+
# Generate conclusion section.
|
|
509
491
|
summary = data["summary"]
|
|
510
492
|
top_patterns = (
|
|
511
493
|
[p["pattern_name"] for p in data["known_patterns"][:5]]
|
|
@@ -559,94 +541,8 @@ Answer: Automated keyword analysis found limited evidence of novel patterns. Man
|
|
|
559
541
|
*For questions or additional analysis, please refer to the accompanying data files: `green_analysis_results.csv` and `aggregated_statistics.json`*"""
|
|
560
542
|
|
|
561
543
|
def save_report(self, report_content: str, output_file: Path):
|
|
562
|
-
|
|
544
|
+
# Save report to markdown file.
|
|
563
545
|
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
564
546
|
with open(output_file, "w", encoding="utf-8") as f:
|
|
565
547
|
f.write(report_content)
|
|
566
548
|
colored_print(f"Saved report to {output_file}", "green")
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
@click.command()
|
|
570
|
-
@click.option("--aggregated-file", default=None, help="Input aggregated statistics file")
|
|
571
|
-
@click.option("--analysis-file", default=None, help="Input analysis results file")
|
|
572
|
-
@click.option("--repos-file", default=None, help="Input repositories file")
|
|
573
|
-
@click.option(
|
|
574
|
-
"--output-file",
|
|
575
|
-
default=None,
|
|
576
|
-
help="Output markdown file (default: data/green_microservices_analysis.md)",
|
|
577
|
-
)
|
|
578
|
-
@click.option("--config-file", default=".env", help="Path to .env configuration file")
|
|
579
|
-
def report(
|
|
580
|
-
aggregated_file: Optional[str],
|
|
581
|
-
analysis_file: Optional[str],
|
|
582
|
-
repos_file: Optional[str],
|
|
583
|
-
output_file: Optional[str],
|
|
584
|
-
config_file: str,
|
|
585
|
-
):
|
|
586
|
-
"""Generate comprehensive markdown report."""
|
|
587
|
-
print_banner("Report Generator")
|
|
588
|
-
|
|
589
|
-
try:
|
|
590
|
-
# Load configuration
|
|
591
|
-
config = get_config(config_file)
|
|
592
|
-
|
|
593
|
-
# Determine input/output files
|
|
594
|
-
agg_input = Path(aggregated_file) if aggregated_file else config.AGGREGATED_FILE
|
|
595
|
-
analysis_input = Path(analysis_file) if analysis_file else config.ANALYSIS_FILE
|
|
596
|
-
repos_input = Path(repos_file) if repos_file else config.REPOS_FILE
|
|
597
|
-
output = Path(output_file) if output_file else config.REPORT_FILE
|
|
598
|
-
|
|
599
|
-
# Check if input files exist
|
|
600
|
-
missing_files = []
|
|
601
|
-
if not agg_input.exists():
|
|
602
|
-
missing_files.append(str(agg_input))
|
|
603
|
-
if not analysis_input.exists():
|
|
604
|
-
missing_files.append(str(analysis_input))
|
|
605
|
-
if not repos_input.exists():
|
|
606
|
-
missing_files.append(str(repos_input))
|
|
607
|
-
|
|
608
|
-
if missing_files:
|
|
609
|
-
colored_print("Missing required input files:", "red")
|
|
610
|
-
for f in missing_files:
|
|
611
|
-
colored_print(f" - {f}", "red")
|
|
612
|
-
colored_print(
|
|
613
|
-
"\nPlease run the full pipeline first: fetch → extract → analyze → aggregate",
|
|
614
|
-
"yellow",
|
|
615
|
-
)
|
|
616
|
-
exit(1)
|
|
617
|
-
|
|
618
|
-
# Load data
|
|
619
|
-
colored_print("Loading data files...", "blue")
|
|
620
|
-
aggregated_data = load_json_file(agg_input)
|
|
621
|
-
analysis_data = load_json_file(analysis_input)
|
|
622
|
-
repos_data = load_json_file(repos_input)
|
|
623
|
-
colored_print("✓ Data loaded successfully", "green")
|
|
624
|
-
|
|
625
|
-
# Generate report
|
|
626
|
-
colored_print("\nGenerating report...", "blue")
|
|
627
|
-
generator = ReportGenerator()
|
|
628
|
-
report_content = generator.generate_report(aggregated_data, analysis_data, repos_data)
|
|
629
|
-
|
|
630
|
-
# Save report
|
|
631
|
-
generator.save_report(report_content, output)
|
|
632
|
-
|
|
633
|
-
colored_print("\n✓ Report generated successfully!", "green")
|
|
634
|
-
colored_print(f"Output: {output}", "green")
|
|
635
|
-
colored_print(f"Report size: {len(report_content):,} characters", "white")
|
|
636
|
-
|
|
637
|
-
except FileNotFoundError as e:
|
|
638
|
-
colored_print(f"File not found: {e}", "red")
|
|
639
|
-
exit(1)
|
|
640
|
-
except json.JSONDecodeError as e:
|
|
641
|
-
colored_print(f"Invalid JSON: {e}", "red")
|
|
642
|
-
exit(1)
|
|
643
|
-
except Exception as e:
|
|
644
|
-
colored_print(f"Error: {e}", "red")
|
|
645
|
-
import traceback
|
|
646
|
-
|
|
647
|
-
traceback.print_exc()
|
|
648
|
-
exit(1)
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
if __name__ == "__main__":
|
|
652
|
-
report()
|
greenmining/utils.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
1
|
+
# Utility functions for green microservices mining CLI.
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
import time
|
|
@@ -15,32 +15,14 @@ init(autoreset=True)
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def format_timestamp(dt: Optional[datetime] = None) -> str:
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
Args:
|
|
21
|
-
dt: Datetime object, defaults to now
|
|
22
|
-
|
|
23
|
-
Returns:
|
|
24
|
-
ISO formatted timestamp string
|
|
25
|
-
"""
|
|
18
|
+
# Format timestamp in ISO 8601 format.
|
|
26
19
|
if dt is None:
|
|
27
20
|
dt = datetime.utcnow()
|
|
28
21
|
return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
29
22
|
|
|
30
23
|
|
|
31
24
|
def load_json_file(path: Path) -> dict[str, Any]:
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
Args:
|
|
35
|
-
path: Path to JSON file
|
|
36
|
-
|
|
37
|
-
Returns:
|
|
38
|
-
Parsed JSON data
|
|
39
|
-
|
|
40
|
-
Raises:
|
|
41
|
-
FileNotFoundError: If file doesn't exist
|
|
42
|
-
json.JSONDecodeError: If file is not valid JSON
|
|
43
|
-
"""
|
|
25
|
+
# Load JSON data from file.
|
|
44
26
|
if not path.exists():
|
|
45
27
|
raise FileNotFoundError(f"File not found: {path}")
|
|
46
28
|
|
|
@@ -49,13 +31,7 @@ def load_json_file(path: Path) -> dict[str, Any]:
|
|
|
49
31
|
|
|
50
32
|
|
|
51
33
|
def save_json_file(data: dict[str, Any], path: Path, indent: int = 2) -> None:
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
Args:
|
|
55
|
-
data: Data to save
|
|
56
|
-
path: Output file path
|
|
57
|
-
indent: JSON indentation level
|
|
58
|
-
"""
|
|
34
|
+
# Save data to JSON file.
|
|
59
35
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
60
36
|
|
|
61
37
|
with open(path, "w", encoding="utf-8") as f:
|
|
@@ -63,17 +39,7 @@ def save_json_file(data: dict[str, Any], path: Path, indent: int = 2) -> None:
|
|
|
63
39
|
|
|
64
40
|
|
|
65
41
|
def load_csv_file(path: Path) -> pd.DataFrame:
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
Args:
|
|
69
|
-
path: Path to CSV file
|
|
70
|
-
|
|
71
|
-
Returns:
|
|
72
|
-
DataFrame with CSV data
|
|
73
|
-
|
|
74
|
-
Raises:
|
|
75
|
-
FileNotFoundError: If file doesn't exist
|
|
76
|
-
"""
|
|
42
|
+
# Load CSV file as pandas DataFrame.
|
|
77
43
|
if not path.exists():
|
|
78
44
|
raise FileNotFoundError(f"File not found: {path}")
|
|
79
45
|
|
|
@@ -81,40 +47,18 @@ def load_csv_file(path: Path) -> pd.DataFrame:
|
|
|
81
47
|
|
|
82
48
|
|
|
83
49
|
def save_csv_file(df: pd.DataFrame, path: Path) -> None:
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
Args:
|
|
87
|
-
df: DataFrame to save
|
|
88
|
-
path: Output file path
|
|
89
|
-
"""
|
|
50
|
+
# Save DataFrame to CSV file.
|
|
90
51
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
91
52
|
df.to_csv(path, index=False, encoding="utf-8")
|
|
92
53
|
|
|
93
54
|
|
|
94
55
|
def estimate_tokens(text: str) -> int:
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
Uses rough approximation: 1 token ≈ 4 characters
|
|
98
|
-
|
|
99
|
-
Args:
|
|
100
|
-
text: Input text
|
|
101
|
-
|
|
102
|
-
Returns:
|
|
103
|
-
Estimated token count
|
|
104
|
-
"""
|
|
56
|
+
# Estimate number of tokens in text.
|
|
105
57
|
return len(text) // 4
|
|
106
58
|
|
|
107
59
|
|
|
108
60
|
def estimate_cost(tokens: int, model: str = "claude-sonnet-4-20250514") -> float:
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
Args:
|
|
112
|
-
tokens: Number of tokens
|
|
113
|
-
model: Model name
|
|
114
|
-
|
|
115
|
-
Returns:
|
|
116
|
-
Estimated cost in USD
|
|
117
|
-
"""
|
|
61
|
+
# Estimate API cost based on token usage.
|
|
118
62
|
# Claude Sonnet 4 pricing (as of Dec 2024)
|
|
119
63
|
# Input: $3 per million tokens
|
|
120
64
|
# Output: $15 per million tokens
|
|
@@ -135,17 +79,7 @@ def retry_on_exception(
|
|
|
135
79
|
exponential_backoff: bool = True,
|
|
136
80
|
exceptions: tuple = (Exception,),
|
|
137
81
|
) -> Callable:
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
Args:
|
|
141
|
-
max_retries: Maximum number of retry attempts
|
|
142
|
-
delay: Initial delay between retries in seconds
|
|
143
|
-
exponential_backoff: Use exponential backoff for delays
|
|
144
|
-
exceptions: Tuple of exception types to catch
|
|
145
|
-
|
|
146
|
-
Returns:
|
|
147
|
-
Decorated function
|
|
148
|
-
"""
|
|
82
|
+
# Decorator to retry function on exception.
|
|
149
83
|
|
|
150
84
|
def decorator(func: Callable) -> Callable:
|
|
151
85
|
@wraps(func)
|
|
@@ -175,12 +109,7 @@ def retry_on_exception(
|
|
|
175
109
|
|
|
176
110
|
|
|
177
111
|
def colored_print(text: str, color: str = "white") -> None:
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
Args:
|
|
181
|
-
text: Text to print
|
|
182
|
-
color: Color name (red, green, yellow, blue, magenta, cyan, white)
|
|
183
|
-
"""
|
|
112
|
+
# Print colored text to console.
|
|
184
113
|
color_map = {
|
|
185
114
|
"red": Fore.RED,
|
|
186
115
|
"green": Fore.GREEN,
|
|
@@ -196,14 +125,7 @@ def colored_print(text: str, color: str = "white") -> None:
|
|
|
196
125
|
|
|
197
126
|
|
|
198
127
|
def handle_github_rate_limit(response) -> None:
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
Args:
|
|
202
|
-
response: GitHub API response object
|
|
203
|
-
|
|
204
|
-
Raises:
|
|
205
|
-
Exception: If rate limit is exceeded
|
|
206
|
-
"""
|
|
128
|
+
# Handle GitHub API rate limiting.
|
|
207
129
|
if hasattr(response, "status") and response.status == 403:
|
|
208
130
|
colored_print("GitHub API rate limit exceeded!", "red")
|
|
209
131
|
colored_print("Please wait or use an authenticated token.", "yellow")
|
|
@@ -211,39 +133,17 @@ def handle_github_rate_limit(response) -> None:
|
|
|
211
133
|
|
|
212
134
|
|
|
213
135
|
def format_number(num: int) -> str:
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
Args:
|
|
217
|
-
num: Number to format
|
|
218
|
-
|
|
219
|
-
Returns:
|
|
220
|
-
Formatted string
|
|
221
|
-
"""
|
|
136
|
+
# Format large numbers with thousand separators.
|
|
222
137
|
return f"{num:,}"
|
|
223
138
|
|
|
224
139
|
|
|
225
140
|
def format_percentage(value: float, decimals: int = 1) -> str:
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
Args:
|
|
229
|
-
value: Percentage value (0-100)
|
|
230
|
-
decimals: Number of decimal places
|
|
231
|
-
|
|
232
|
-
Returns:
|
|
233
|
-
Formatted percentage string
|
|
234
|
-
"""
|
|
141
|
+
# Format percentage value.
|
|
235
142
|
return f"{value:.{decimals}f}%"
|
|
236
143
|
|
|
237
144
|
|
|
238
145
|
def format_duration(seconds: float) -> str:
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
Args:
|
|
242
|
-
seconds: Duration in seconds
|
|
243
|
-
|
|
244
|
-
Returns:
|
|
245
|
-
Formatted duration string (e.g., "2h 15m")
|
|
246
|
-
"""
|
|
146
|
+
# Format duration in human-readable format.
|
|
247
147
|
if seconds < 60:
|
|
248
148
|
return f"{int(seconds)}s"
|
|
249
149
|
elif seconds < 3600:
|
|
@@ -257,40 +157,20 @@ def format_duration(seconds: float) -> str:
|
|
|
257
157
|
|
|
258
158
|
|
|
259
159
|
def truncate_text(text: str, max_length: int = 100) -> str:
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
Args:
|
|
263
|
-
text: Input text
|
|
264
|
-
max_length: Maximum length
|
|
265
|
-
|
|
266
|
-
Returns:
|
|
267
|
-
Truncated text with ellipsis if needed
|
|
268
|
-
"""
|
|
160
|
+
# Truncate text to maximum length.
|
|
269
161
|
if len(text) <= max_length:
|
|
270
162
|
return text
|
|
271
163
|
return text[: max_length - 3] + "..."
|
|
272
164
|
|
|
273
165
|
|
|
274
166
|
def create_checkpoint(checkpoint_file: Path, data: dict[str, Any]) -> None:
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
Args:
|
|
278
|
-
checkpoint_file: Path to checkpoint file
|
|
279
|
-
data: Checkpoint data
|
|
280
|
-
"""
|
|
167
|
+
# Create checkpoint file for resuming operations.
|
|
281
168
|
save_json_file(data, checkpoint_file)
|
|
282
169
|
colored_print(f"Checkpoint saved: {checkpoint_file}", "green")
|
|
283
170
|
|
|
284
171
|
|
|
285
172
|
def load_checkpoint(checkpoint_file: Path) -> Optional[dict[str, Any]]:
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
Args:
|
|
289
|
-
checkpoint_file: Path to checkpoint file
|
|
290
|
-
|
|
291
|
-
Returns:
|
|
292
|
-
Checkpoint data or None if doesn't exist
|
|
293
|
-
"""
|
|
173
|
+
# Load checkpoint data if exists.
|
|
294
174
|
if checkpoint_file.exists():
|
|
295
175
|
try:
|
|
296
176
|
return load_json_file(checkpoint_file)
|
|
@@ -300,21 +180,13 @@ def load_checkpoint(checkpoint_file: Path) -> Optional[dict[str, Any]]:
|
|
|
300
180
|
|
|
301
181
|
|
|
302
182
|
def print_banner(title: str) -> None:
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
Args:
|
|
306
|
-
title: Banner title
|
|
307
|
-
"""
|
|
183
|
+
# Print formatted banner.
|
|
308
184
|
colored_print("\n" + "=" * 60, "cyan")
|
|
309
|
-
colored_print(f"
|
|
185
|
+
colored_print(f" {title}", "cyan")
|
|
310
186
|
colored_print("=" * 60 + "\n", "cyan")
|
|
311
187
|
|
|
312
188
|
|
|
313
189
|
def print_section(title: str) -> None:
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
Args:
|
|
317
|
-
title: Section title
|
|
318
|
-
"""
|
|
319
|
-
colored_print(f"\n📌 {title}", "blue")
|
|
190
|
+
# Print section header.
|
|
191
|
+
colored_print(f"\n {title}", "blue")
|
|
320
192
|
colored_print("-" * 60, "blue")
|