github-ai-scraper 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. ai_scraper/__init__.py +3 -0
  2. ai_scraper/api/__init__.py +6 -0
  3. ai_scraper/api/github.py +340 -0
  4. ai_scraper/api/gitlab.py +418 -0
  5. ai_scraper/api/rate_limiter.py +120 -0
  6. ai_scraper/api_server.py +196 -0
  7. ai_scraper/auth.py +68 -0
  8. ai_scraper/backup.py +112 -0
  9. ai_scraper/cache.py +95 -0
  10. ai_scraper/classifier.py +135 -0
  11. ai_scraper/cli.py +747 -0
  12. ai_scraper/config.py +237 -0
  13. ai_scraper/config_watcher.py +82 -0
  14. ai_scraper/dedup.py +148 -0
  15. ai_scraper/filters/__init__.py +5 -0
  16. ai_scraper/filters/ai_filter.py +93 -0
  17. ai_scraper/health.py +155 -0
  18. ai_scraper/i18n.py +141 -0
  19. ai_scraper/interactive.py +96 -0
  20. ai_scraper/keywords/__init__.py +5 -0
  21. ai_scraper/keywords/extractor.py +274 -0
  22. ai_scraper/logging_config.py +74 -0
  23. ai_scraper/models/__init__.py +5 -0
  24. ai_scraper/models/repository.py +72 -0
  25. ai_scraper/output/__init__.py +6 -0
  26. ai_scraper/output/excel.py +79 -0
  27. ai_scraper/output/html.py +152 -0
  28. ai_scraper/output/markdown.py +338 -0
  29. ai_scraper/output/rss.py +82 -0
  30. ai_scraper/output/translator.py +303 -0
  31. ai_scraper/plugin_system.py +146 -0
  32. ai_scraper/plugins/__init__.py +5 -0
  33. ai_scraper/retry.py +134 -0
  34. ai_scraper/scheduler.py +84 -0
  35. ai_scraper/scrape_progress.py +99 -0
  36. ai_scraper/secure_storage.py +127 -0
  37. ai_scraper/storage/__init__.py +5 -0
  38. ai_scraper/storage/async_database.py +237 -0
  39. ai_scraper/storage/database.py +456 -0
  40. ai_scraper/webhooks.py +95 -0
  41. github_ai_scraper-0.1.2.dist-info/METADATA +299 -0
  42. github_ai_scraper-0.1.2.dist-info/RECORD +44 -0
  43. github_ai_scraper-0.1.2.dist-info/WHEEL +4 -0
  44. github_ai_scraper-0.1.2.dist-info/entry_points.txt +2 -0
ai_scraper/health.py ADDED
@@ -0,0 +1,155 @@
1
+ """Repository health assessment."""
2
+
3
+ from dataclasses import dataclass
4
+ from datetime import datetime, timedelta
5
+ from typing import Optional
6
+
7
+ from ai_scraper.models.repository import Repository
8
+
9
+
10
+ @dataclass
11
+ class HealthScore:
12
+ """Repository health score breakdown."""
13
+
14
+ overall: float
15
+ activity: float
16
+ popularity: float
17
+ maintenance: float
18
+ community: float
19
+ grade: str
20
+
21
+
22
+ class HealthAssessor:
23
+ """Assess repository health based on multiple factors."""
24
+
25
+ def assess(self, repo: Repository) -> HealthScore:
26
+ """Assess repository health.
27
+
28
+ Args:
29
+ repo: Repository to assess.
30
+
31
+ Returns:
32
+ Health score breakdown.
33
+ """
34
+ activity = self._score_activity(repo)
35
+ popularity = self._score_popularity(repo)
36
+ maintenance = self._score_maintenance(repo)
37
+ community = self._score_community(repo)
38
+
39
+ # Weighted overall score
40
+ overall = (
41
+ activity * 0.3 +
42
+ popularity * 0.25 +
43
+ maintenance * 0.25 +
44
+ community * 0.2
45
+ )
46
+
47
+ grade = self.get_grade(overall)
48
+
49
+ return HealthScore(
50
+ overall=overall,
51
+ activity=activity,
52
+ popularity=popularity,
53
+ maintenance=maintenance,
54
+ community=community,
55
+ grade=grade,
56
+ )
57
+
58
+ def _score_activity(self, repo: Repository) -> float:
59
+ """Score repository activity (0-100)."""
60
+ if not repo.pushed_at:
61
+ return 0
62
+
63
+ days_since_push = (datetime.now() - repo.pushed_at).days
64
+
65
+ if days_since_push <= 7:
66
+ return 100
67
+ elif days_since_push <= 30:
68
+ return 80
69
+ elif days_since_push <= 90:
70
+ return 60
71
+ elif days_since_push <= 180:
72
+ return 40
73
+ elif days_since_push <= 365:
74
+ return 20
75
+ else:
76
+ return 0
77
+
78
+ def _score_popularity(self, repo: Repository) -> float:
79
+ """Score repository popularity (0-100)."""
80
+ stars = repo.stars
81
+
82
+ if stars >= 10000:
83
+ return 100
84
+ elif stars >= 5000:
85
+ return 85
86
+ elif stars >= 1000:
87
+ return 70
88
+ elif stars >= 500:
89
+ return 55
90
+ elif stars >= 100:
91
+ return 40
92
+ elif stars >= 50:
93
+ return 25
94
+ else:
95
+ return 10
96
+
97
+ def _score_maintenance(self, repo: Repository) -> float:
98
+ """Score repository maintenance (0-100)."""
99
+ if not repo.open_issues:
100
+ return 50 # Unknown
101
+
102
+ # Lower open issues ratio is better
103
+ if repo.stars > 0:
104
+ issue_ratio = repo.open_issues / repo.stars
105
+ if issue_ratio < 0.01:
106
+ return 100
107
+ elif issue_ratio < 0.05:
108
+ return 80
109
+ elif issue_ratio < 0.1:
110
+ return 60
111
+ elif issue_ratio < 0.2:
112
+ return 40
113
+ else:
114
+ return 20
115
+
116
+ return 50
117
+
118
+ def _score_community(self, repo: Repository) -> float:
119
+ """Score community engagement (0-100)."""
120
+ forks = repo.forks or 0
121
+
122
+ if forks >= 1000:
123
+ return 100
124
+ elif forks >= 500:
125
+ return 85
126
+ elif forks >= 100:
127
+ return 70
128
+ elif forks >= 50:
129
+ return 55
130
+ elif forks >= 10:
131
+ return 40
132
+ elif forks >= 5:
133
+ return 25
134
+ else:
135
+ return 10
136
+
137
+ def get_grade(self, score: float) -> str:
138
+ """Convert score to letter grade.
139
+
140
+ Args:
141
+ score: Score (0-100).
142
+
143
+ Returns:
144
+ Letter grade (A-F).
145
+ """
146
+ if score >= 90:
147
+ return "A"
148
+ elif score >= 80:
149
+ return "B"
150
+ elif score >= 70:
151
+ return "C"
152
+ elif score >= 60:
153
+ return "D"
154
+ else:
155
+ return "F"
ai_scraper/i18n.py ADDED
@@ -0,0 +1,141 @@
1
+ """Internationalization support for multi-language search."""
2
+
3
+ from typing import Optional
4
+
5
+
6
+ # Default keyword translations
7
+ DEFAULT_TRANSLATIONS = {
8
+ "en": {
9
+ "ai": "ai",
10
+ "artificial intelligence": "artificial intelligence",
11
+ "machine learning": "machine learning",
12
+ "deep learning": "deep learning",
13
+ "neural network": "neural network",
14
+ "llm": "llm",
15
+ "large language model": "large language model",
16
+ "gpt": "gpt",
17
+ "transformer": "transformer",
18
+ "nlp": "nlp",
19
+ "natural language processing": "natural language processing",
20
+ "computer vision": "computer vision",
21
+ "reinforcement learning": "reinforcement learning",
22
+ "pytorch": "pytorch",
23
+ "tensorflow": "tensorflow",
24
+ "huggingface": "huggingface",
25
+ },
26
+ "zh": {
27
+ "ai": "人工智能",
28
+ "artificial intelligence": "人工智能",
29
+ "machine learning": "机器学习",
30
+ "deep learning": "深度学习",
31
+ "neural network": "神经网络",
32
+ "llm": "大语言模型",
33
+ "large language model": "大语言模型",
34
+ "gpt": "GPT",
35
+ "transformer": "Transformer",
36
+ "nlp": "自然语言处理",
37
+ "natural language processing": "自然语言处理",
38
+ "computer vision": "计算机视觉",
39
+ "reinforcement learning": "强化学习",
40
+ "pytorch": "PyTorch",
41
+ "tensorflow": "TensorFlow",
42
+ "huggingface": "Hugging Face",
43
+ },
44
+ }
45
+
46
+
47
+ class I18nManager:
48
+ """Manage internationalization for keywords."""
49
+
50
+ def __init__(self):
51
+ """Initialize i18n manager with default translations."""
52
+ self._translations = dict(DEFAULT_TRANSLATIONS)
53
+
54
+ def get_keywords(self, language: str) -> set[str]:
55
+ """Get all keywords for a language.
56
+
57
+ Args:
58
+ language: Language code (e.g., "en", "zh").
59
+
60
+ Returns:
61
+ Set of keywords for the language.
62
+ """
63
+ if language in self._translations:
64
+ return set(self._translations[language].values())
65
+ # Fallback to English
66
+ return set(self._translations.get("en", {}).values())
67
+
68
+ def add_translation(
69
+ self,
70
+ source_lang: str,
71
+ source_term: str,
72
+ target_lang: str,
73
+ target_term: str,
74
+ ) -> None:
75
+ """Add a custom translation.
76
+
77
+ Args:
78
+ source_lang: Source language code.
79
+ source_term: Term in source language.
80
+ target_lang: Target language code.
81
+ target_term: Translation in target language.
82
+ """
83
+ if target_lang not in self._translations:
84
+ self._translations[target_lang] = {}
85
+
86
+ self._translations[target_lang][source_term] = target_term
87
+
88
+ def translate(
89
+ self,
90
+ term: str,
91
+ source_lang: str = "en",
92
+ target_lang: str = "zh",
93
+ ) -> Optional[str]:
94
+ """Translate a term between languages.
95
+
96
+ Args:
97
+ term: Term to translate.
98
+ source_lang: Source language code.
99
+ target_lang: Target language code.
100
+
101
+ Returns:
102
+ Translated term or None if not found.
103
+ """
104
+ if target_lang not in self._translations:
105
+ return None
106
+
107
+ return self._translations[target_lang].get(term.lower())
108
+
109
+
110
+ def get_translated_keywords(
111
+ keywords: list[str],
112
+ languages: Optional[list[str]] = None,
113
+ ) -> list[str]:
114
+ """Get keywords translated to multiple languages.
115
+
116
+ Args:
117
+ keywords: List of keywords to translate.
118
+ languages: List of target language codes. Defaults to ["en", "zh"].
119
+
120
+ Returns:
121
+ List of keywords in all specified languages.
122
+ """
123
+ if languages is None:
124
+ languages = ["en", "zh"]
125
+
126
+ i18n = I18nManager()
127
+ result = []
128
+
129
+ for keyword in keywords:
130
+ keyword_lower = keyword.lower()
131
+ for lang in languages:
132
+ # Add original keyword
133
+ if lang == "en":
134
+ result.append(keyword)
135
+ else:
136
+ # Add translation if available
137
+ translated = i18n.translate(keyword_lower, "en", lang)
138
+ if translated:
139
+ result.append(translated)
140
+
141
+ return list(set(result)) # Remove duplicates
@@ -0,0 +1,96 @@
1
+ """Interactive CLI mode."""
2
+
3
+ from rich.console import Console
4
+ from rich.prompt import Prompt
5
+ from rich.panel import Panel
6
+
7
+ console = Console()
8
+
9
+
10
+ def show_main_menu() -> str:
11
+ """Show main menu and get user choice.
12
+
13
+ Returns:
14
+ User's menu choice.
15
+ """
16
+ console.print(Panel.fit(
17
+ "[bold cyan]GitHub AI Scraper[/bold cyan]\n"
18
+ "AI Repository Discovery Tool",
19
+ border_style="cyan"
20
+ ))
21
+
22
+ console.print("\n[bold]What would you like to do?[/bold]\n")
23
+ console.print(" [1] Quick Scrape - Fetch top AI repos (fast)")
24
+ console.print(" [2] Deep Scrape - Comprehensive search (slow)")
25
+ console.print(" [3] Custom Scrape - Set your own parameters")
26
+ console.print(" [4] View Results - List scraped repositories")
27
+ console.print(" [5] Trending - See trending repos")
28
+ console.print(" [6] Export Data - Export to CSV/JSON")
29
+ console.print(" [7] Settings - Configure options")
30
+ console.print(" [q] Quit\n")
31
+
32
+ return Prompt.ask("Select an option", choices=["1", "2", "3", "4", "5", "6", "7", "q"])
33
+
34
+
35
+ def get_scrape_params() -> dict:
36
+ """Interactively get scrape parameters.
37
+
38
+ Returns:
39
+ Dictionary of scrape parameters.
40
+ """
41
+ console.print("\n[bold]Custom Scrape Configuration[/bold]\n")
42
+
43
+ # Get min_stars with validation
44
+ while True:
45
+ try:
46
+ min_stars = int(Prompt.ask("Minimum stars", default="100"))
47
+ if min_stars < 0:
48
+ console.print("[red]Minimum stars must be non-negative[/red]")
49
+ continue
50
+ break
51
+ except ValueError:
52
+ console.print("[red]Please enter a valid number[/red]")
53
+
54
+ # Get max_results with validation
55
+ while True:
56
+ try:
57
+ max_results = int(Prompt.ask("Maximum results", default="100"))
58
+ if max_results <= 0:
59
+ console.print("[red]Maximum results must be greater than 0[/red]")
60
+ continue
61
+ break
62
+ except ValueError:
63
+ console.print("[red]Please enter a valid number[/red]")
64
+
65
+ language = Prompt.ask("Language filter (leave empty for all)", default="")
66
+
67
+ return {
68
+ "min_stars": min_stars,
69
+ "max_results": max_results,
70
+ "language": language or None,
71
+ }
72
+
73
+
74
+ def show_scrape_progress(current: int, total: int, repo_name: str):
75
+ """Show progress during scraping.
76
+
77
+ Args:
78
+ current: Current count.
79
+ total: Total expected.
80
+ repo_name: Name of current repo.
81
+ """
82
+ percent = (current / total * 100) if total > 0 else 0
83
+ console.print(f" [{current}/{total}] {percent:.0f}% - {repo_name[:40]}")
84
+
85
+
86
+ def confirm_action(message: str) -> bool:
87
+ """Ask for confirmation.
88
+
89
+ Args:
90
+ message: Confirmation message.
91
+
92
+ Returns:
93
+ True if confirmed.
94
+ """
95
+ from rich.prompt import Confirm
96
+ return Confirm.ask(message)
@@ -0,0 +1,5 @@
1
+ """Keywords extraction module."""
2
+
3
+ from ai_scraper.keywords.extractor import KeywordExtractor
4
+
5
+ __all__ = ["KeywordExtractor"]
@@ -0,0 +1,274 @@
1
+ """Keyword extraction from repository metadata."""
2
+
3
+ import re
4
+ from pathlib import Path
5
+
6
+ from ai_scraper.models import Repository
7
+
8
+
9
+ # Common English stopwords to filter out
10
+ LOW_QUALITY_KEYWORDS = {
11
+ "aaif",
12
+ }
13
+
14
+ STOPWORDS: set[str] = {
15
+ "a", "an", "the", "and", "or", "but", "is", "are", "was", "were",
16
+ "be", "been", "being", "have", "has", "had", "do", "does", "did",
17
+ "will", "would", "could", "should", "may", "might", "must", "shall",
18
+ "can", "need", "dare", "ought", "used", "to", "of", "in", "for",
19
+ "on", "with", "at", "by", "from", "as", "into", "through", "during",
20
+ "before", "after", "above", "below", "between", "under", "again",
21
+ "further", "then", "once", "here", "there", "when", "where", "why",
22
+ "how", "all", "each", "few", "more", "most", "other", "some", "such",
23
+ "no", "nor", "not", "only", "own", "same", "so", "than", "too",
24
+ "very", "just", "also", "now", "that", "this", "these", "those",
25
+ "what", "which", "who", "whom", "whose", "if", "else", "because",
26
+ "while", "although", "though", "since", "until", "unless", "however",
27
+ "therefore", "thus", "hence", "either", "neither", "both", "not",
28
+ "only", "also", "even", "still", "already", "yet", "just", "only",
29
+ "i", "me", "my", "myself", "we", "our", "ours", "ourselves",
30
+ "you", "your", "yours", "yourself", "yourselves", "he", "him",
31
+ "his", "himself", "she", "her", "hers", "herself", "it", "its",
32
+ "itself", "they", "them", "their", "theirs", "themselves",
33
+ }
34
+
35
+ # 无效关键词模式(需要过滤掉)
36
+ INVALID_PATTERNS = [
37
+ r'^\d+/\w+$', # 数字/单词模式,如 "0/zero", "112/ai"
38
+ r'^[\w-]+/[\w-]+$', # 路径模式,如 "owner/repo"
39
+ r'^\d+$', # 纯数字
40
+ ]
41
+
42
+ # 最小关键词长度(AI 相关缩写例外)
43
+ MIN_KEYWORD_LENGTH = 3
44
+ VALID_SHORT_KEYWORDS = {
45
+ "ai", "ml", "dl", "nlp", "cv", "llm", "gpt", "rag", "mcp",
46
+ "rnn", "cnn", "gan", "vae", "rl", "cl", "asr", "tts",
47
+ }
48
+
49
+
50
+ class KeywordExtractor:
51
+ """Extract and manage keywords from repository metadata."""
52
+
53
+ def __init__(self, keywords_file: Path, max_keywords: int = 100):
54
+ """Initialize the extractor.
55
+
56
+ Args:
57
+ keywords_file: Path to file for persisting keywords.
58
+ max_keywords: Maximum number of keywords to keep.
59
+ """
60
+ self.keywords_file = keywords_file
61
+ self.max_keywords = max_keywords
62
+
63
+ def load_keywords(self) -> set[str]:
64
+ """Load keywords from file.
65
+
66
+ Returns:
67
+ Set of keywords, or empty set if file doesn't exist.
68
+ """
69
+ if not self.keywords_file.exists():
70
+ return set()
71
+
72
+ keywords: set[str] = set()
73
+ with open(self.keywords_file, "r", encoding="utf-8") as f:
74
+ for line in f:
75
+ keyword = line.strip()
76
+ if keyword:
77
+ keywords.add(keyword)
78
+ return keywords
79
+
80
+ def save_keywords(self, keywords: set[str]) -> None:
81
+ """Save keywords to file.
82
+
83
+ Args:
84
+ keywords: Set of keywords to save.
85
+ """
86
+ # Ensure parent directory exists
87
+ self.keywords_file.parent.mkdir(parents=True, exist_ok=True)
88
+
89
+ with open(self.keywords_file, "w", encoding="utf-8") as f:
90
+ for keyword in sorted(keywords):
91
+ f.write(f"{keyword}\n")
92
+
93
+ def extract_from_repos(self, repos: list[Repository]) -> set[str]:
94
+ """Extract keywords from a list of repositories.
95
+
96
+ Args:
97
+ repos: List of repositories to extract from.
98
+
99
+ Returns:
100
+ Set of extracted keywords.
101
+ """
102
+ keywords: set[str] = set()
103
+ for repo in repos:
104
+ keywords.update(self._extract_from_topics(repo))
105
+ keywords.update(self._extract_from_description(repo))
106
+ keywords.update(self._extract_from_name(repo))
107
+
108
+ # Apply quality filter
109
+ return self._filter_keywords(keywords)
110
+
111
+ def _filter_keywords(self, keywords: set[str]) -> set[str]:
112
+ """Filter out low-quality keywords.
113
+
114
+ Args:
115
+ keywords: Set of keywords to filter.
116
+
117
+ Returns:
118
+ Filtered set of high-quality keywords.
119
+ """
120
+ filtered: set[str] = set()
121
+
122
+ for keyword in keywords:
123
+ keyword_lower = keyword.strip().lower()
124
+ if not keyword_lower:
125
+ continue
126
+
127
+ # Skip known low-quality tokens discovered from repository noise
128
+ if keyword_lower in LOW_QUALITY_KEYWORDS:
129
+ continue
130
+
131
+ # Skip if matches invalid patterns
132
+ skip = False
133
+ for pattern in INVALID_PATTERNS:
134
+ if re.match(pattern, keyword_lower):
135
+ skip = True
136
+ break
137
+
138
+ if skip:
139
+ continue
140
+
141
+ # Check minimum length
142
+ if len(keyword_lower) < MIN_KEYWORD_LENGTH:
143
+ # Allow known short AI terms
144
+ if keyword_lower not in VALID_SHORT_KEYWORDS:
145
+ continue
146
+
147
+ # Skip if it looks like a file path with extension
148
+ if '.' in keyword_lower and not keyword_lower.startswith('.'):
149
+ continue
150
+
151
+ # Skip if it's mostly numbers
152
+ digit_count = sum(1 for c in keyword_lower if c.isdigit())
153
+ if digit_count >= len(keyword_lower) * 0.5:
154
+ continue
155
+
156
+ filtered.add(keyword_lower)
157
+
158
+ return filtered
159
+
160
+ def _extract_from_topics(self, repo: Repository) -> set[str]:
161
+ """Extract keywords from repository topics.
162
+
163
+ Args:
164
+ repo: Repository to extract from.
165
+
166
+ Returns:
167
+ Set of keywords from topics (lowercase).
168
+ """
169
+ return {topic.lower() for topic in repo.topics}
170
+
171
+ def _extract_from_description(self, repo: Repository) -> set[str]:
172
+ """Extract keywords from repository description.
173
+
174
+ Splits on non-alphanumeric characters, filters stopwords,
175
+ requires min 2 chars, and excludes pure digits.
176
+
177
+ Args:
178
+ repo: Repository to extract from.
179
+
180
+ Returns:
181
+ Set of keywords from description.
182
+ """
183
+ if not repo.description:
184
+ return set()
185
+
186
+ keywords: set[str] = set()
187
+ # Split on non-alphanumeric characters
188
+ words = re.split(r"[^a-zA-Z0-9]+", repo.description.lower())
189
+
190
+ for word in words:
191
+ # Skip empty strings
192
+ if not word:
193
+ continue
194
+ # Skip short words (less than 2 chars)
195
+ if len(word) < 2:
196
+ continue
197
+ # Skip stopwords
198
+ if word in STOPWORDS:
199
+ continue
200
+ # Skip pure digits
201
+ if word.isdigit():
202
+ continue
203
+
204
+ keywords.add(word)
205
+
206
+ return keywords
207
+
208
+ def _extract_from_name(self, repo: Repository) -> set[str]:
209
+ """Extract keywords from repository name.
210
+
211
+ Splits on hyphens and underscores, filters stopwords,
212
+ requires min 2 chars, and excludes pure digits.
213
+
214
+ Args:
215
+ repo: Repository to extract from.
216
+
217
+ Returns:
218
+ Set of keywords from name.
219
+ """
220
+ keywords: set[str] = set()
221
+ # Split on hyphens and underscores
222
+ parts = re.split(r"[-_]+", repo.name.lower())
223
+
224
+ for part in parts:
225
+ # Skip empty strings
226
+ if not part:
227
+ continue
228
+ # Skip short parts (less than 2 chars)
229
+ if len(part) < 2:
230
+ continue
231
+ # Skip stopwords
232
+ if part in STOPWORDS:
233
+ continue
234
+ # Skip pure digits
235
+ if part.isdigit():
236
+ continue
237
+
238
+ keywords.add(part)
239
+
240
+ return keywords
241
+
242
+ def merge_keywords(
243
+ self, existing: set[str], new: set[str]
244
+ ) -> set[str]:
245
+ """Merge new keywords with existing, respecting max_keywords limit.
246
+
247
+ Args:
248
+ existing: Existing set of keywords.
249
+ new: New keywords to merge.
250
+
251
+ Returns:
252
+ Merged set of keywords, limited to max_keywords.
253
+ """
254
+ merged = existing | new
255
+ if len(merged) <= self.max_keywords:
256
+ return merged
257
+
258
+ # Prioritize existing keywords, then add new ones up to limit
259
+ result: set[str] = set(existing)
260
+ for keyword in sorted(new):
261
+ if keyword not in result:
262
+ result.add(keyword)
263
+ if len(result) >= self.max_keywords:
264
+ break
265
+ return result
266
+
267
+ def get_keywords_for_search(self) -> list[str]:
268
+ """Get keywords as a sorted list for search queries.
269
+
270
+ Returns:
271
+ Sorted list of keywords.
272
+ """
273
+ keywords = self.load_keywords()
274
+ return sorted(keywords)