local-deep-research 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_deep_research/__init__.py +24 -0
- local_deep_research/citation_handler.py +113 -0
- local_deep_research/config.py +166 -0
- local_deep_research/defaults/__init__.py +44 -0
- local_deep_research/defaults/llm_config.py +269 -0
- local_deep_research/defaults/local_collections.toml +47 -0
- local_deep_research/defaults/main.toml +57 -0
- local_deep_research/defaults/search_engines.toml +244 -0
- local_deep_research/local_collections.py +141 -0
- local_deep_research/main.py +113 -0
- local_deep_research/report_generator.py +206 -0
- local_deep_research/search_system.py +241 -0
- local_deep_research/utilties/__init__.py +0 -0
- local_deep_research/utilties/enums.py +9 -0
- local_deep_research/utilties/llm_utils.py +116 -0
- local_deep_research/utilties/search_utilities.py +115 -0
- local_deep_research/utilties/setup_utils.py +6 -0
- local_deep_research/web/__init__.py +2 -0
- local_deep_research/web/app.py +1209 -0
- local_deep_research/web/static/css/styles.css +1008 -0
- local_deep_research/web/static/js/app.js +2078 -0
- local_deep_research/web/templates/api_keys_config.html +82 -0
- local_deep_research/web/templates/collections_config.html +90 -0
- local_deep_research/web/templates/index.html +312 -0
- local_deep_research/web/templates/llm_config.html +120 -0
- local_deep_research/web/templates/main_config.html +89 -0
- local_deep_research/web/templates/search_engines_config.html +154 -0
- local_deep_research/web/templates/settings.html +519 -0
- local_deep_research/web/templates/settings_dashboard.html +207 -0
- local_deep_research/web_search_engines/__init__.py +0 -0
- local_deep_research/web_search_engines/engines/__init__.py +0 -0
- local_deep_research/web_search_engines/engines/full_search.py +128 -0
- local_deep_research/web_search_engines/engines/meta_search_engine.py +274 -0
- local_deep_research/web_search_engines/engines/search_engine_arxiv.py +367 -0
- local_deep_research/web_search_engines/engines/search_engine_brave.py +245 -0
- local_deep_research/web_search_engines/engines/search_engine_ddg.py +123 -0
- local_deep_research/web_search_engines/engines/search_engine_github.py +663 -0
- local_deep_research/web_search_engines/engines/search_engine_google_pse.py +283 -0
- local_deep_research/web_search_engines/engines/search_engine_guardian.py +337 -0
- local_deep_research/web_search_engines/engines/search_engine_local.py +901 -0
- local_deep_research/web_search_engines/engines/search_engine_local_all.py +153 -0
- local_deep_research/web_search_engines/engines/search_engine_medrxiv.py +623 -0
- local_deep_research/web_search_engines/engines/search_engine_pubmed.py +992 -0
- local_deep_research/web_search_engines/engines/search_engine_serpapi.py +230 -0
- local_deep_research/web_search_engines/engines/search_engine_wayback.py +474 -0
- local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +242 -0
- local_deep_research/web_search_engines/full_search.py +254 -0
- local_deep_research/web_search_engines/search_engine_base.py +197 -0
- local_deep_research/web_search_engines/search_engine_factory.py +233 -0
- local_deep_research/web_search_engines/search_engines_config.py +54 -0
- local_deep_research-0.1.0.dist-info/LICENSE +21 -0
- local_deep_research-0.1.0.dist-info/METADATA +328 -0
- local_deep_research-0.1.0.dist-info/RECORD +56 -0
- local_deep_research-0.1.0.dist-info/WHEEL +5 -0
- local_deep_research-0.1.0.dist-info/entry_points.txt +3 -0
- local_deep_research-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,663 @@
|
|
1
|
+
import requests
|
2
|
+
import logging
|
3
|
+
import base64
|
4
|
+
import os
|
5
|
+
import time
|
6
|
+
from typing import Dict, List, Any, Optional, Union
|
7
|
+
from langchain_core.language_models import BaseLLM
|
8
|
+
|
9
|
+
from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
|
10
|
+
from local_deep_research import config
|
11
|
+
|
12
|
+
# Setup logging
|
13
|
+
logging.basicConfig(level=logging.INFO)
|
14
|
+
logger = logging.getLogger(__name__)
|
15
|
+
|
16
|
+
class GitHubSearchEngine(BaseSearchEngine):
|
17
|
+
"""
|
18
|
+
GitHub search engine implementation.
|
19
|
+
Provides search across GitHub repositories, code, issues, and users.
|
20
|
+
"""
|
21
|
+
|
22
|
+
def __init__(self,
|
23
|
+
max_results: int = 15,
|
24
|
+
api_key: Optional[str] = None,
|
25
|
+
search_type: str = "repositories",
|
26
|
+
include_readme: bool = True,
|
27
|
+
include_issues: bool = False,
|
28
|
+
llm: Optional[BaseLLM] = None,
|
29
|
+
max_filtered_results: Optional[int] = None):
|
30
|
+
"""
|
31
|
+
Initialize the GitHub search engine.
|
32
|
+
|
33
|
+
Args:
|
34
|
+
max_results: Maximum number of search results
|
35
|
+
api_key: GitHub API token (can also be set in GITHUB_API_KEY env)
|
36
|
+
search_type: Type of GitHub search ("repositories", "code", "issues", "users")
|
37
|
+
include_readme: Whether to include README content for repositories
|
38
|
+
include_issues: Whether to include recent issues for repositories
|
39
|
+
llm: Language model for relevance filtering
|
40
|
+
max_filtered_results: Maximum number of results to keep after filtering
|
41
|
+
"""
|
42
|
+
# Initialize the BaseSearchEngine with the LLM and max_filtered_results
|
43
|
+
super().__init__(llm=llm, max_filtered_results=max_filtered_results)
|
44
|
+
|
45
|
+
self.max_results = max_results
|
46
|
+
self.api_key = api_key or os.getenv("GITHUB_API_KEY")
|
47
|
+
self.search_type = search_type
|
48
|
+
self.include_readme = include_readme
|
49
|
+
self.include_issues = include_issues
|
50
|
+
|
51
|
+
# API endpoints
|
52
|
+
self.api_base = "https://api.github.com"
|
53
|
+
self.search_endpoint = f"{self.api_base}/search/{search_type}"
|
54
|
+
|
55
|
+
# Set up API headers
|
56
|
+
self.headers = {
|
57
|
+
"Accept": "application/vnd.github.v3+json",
|
58
|
+
"User-Agent": "Local-Deep-Research-Agent"
|
59
|
+
}
|
60
|
+
|
61
|
+
# Add authentication if API key provided
|
62
|
+
if self.api_key:
|
63
|
+
self.headers["Authorization"] = f"token {self.api_key}"
|
64
|
+
logger.info("Using authenticated GitHub API requests")
|
65
|
+
else:
|
66
|
+
logger.warning("No GitHub API key provided. Rate limits will be restricted.")
|
67
|
+
|
68
|
+
def _handle_rate_limits(self, response):
|
69
|
+
"""Handle GitHub API rate limits by logging warnings and sleeping if necessary"""
|
70
|
+
remaining = int(response.headers.get("X-RateLimit-Remaining", 60))
|
71
|
+
reset_time = int(response.headers.get("X-RateLimit-Reset", 0))
|
72
|
+
|
73
|
+
if remaining < 5:
|
74
|
+
current_time = time.time()
|
75
|
+
wait_time = max(reset_time - current_time, 0)
|
76
|
+
logger.warning(f"GitHub API rate limit almost reached. {remaining} requests remaining.")
|
77
|
+
|
78
|
+
if wait_time > 0 and remaining == 0:
|
79
|
+
logger.warning(f"GitHub API rate limit exceeded. Waiting {wait_time:.0f} seconds.")
|
80
|
+
time.sleep(min(wait_time, 60)) # Wait at most 60 seconds
|
81
|
+
|
82
|
+
def _search_github(self, query: str) -> List[Dict[str, Any]]:
|
83
|
+
"""
|
84
|
+
Perform a GitHub search based on the configured search type.
|
85
|
+
|
86
|
+
Args:
|
87
|
+
query: The search query
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
List of GitHub search result items
|
91
|
+
"""
|
92
|
+
results = []
|
93
|
+
|
94
|
+
try:
|
95
|
+
# Optimize GitHub query format
|
96
|
+
github_query = query
|
97
|
+
|
98
|
+
# For long queries, focus on keywords and add filters for better results
|
99
|
+
if len(query) > 80:
|
100
|
+
# Extract key terms if it's a recommendation request
|
101
|
+
if "recommend" in query.lower() or "looking for" in query.lower():
|
102
|
+
github_query = "stars:>100 " + " ".join([
|
103
|
+
word for word in query.split()
|
104
|
+
if len(word) > 3 and word.lower() not in
|
105
|
+
["recommend", "recommended", "github", "repositories", "looking", "developers"]
|
106
|
+
])
|
107
|
+
|
108
|
+
logger.info(f"Optimized GitHub query: {github_query}")
|
109
|
+
|
110
|
+
# Construct search parameters
|
111
|
+
params = {
|
112
|
+
"q": github_query,
|
113
|
+
"per_page": min(self.max_results, 100), # GitHub API max is 100 per page
|
114
|
+
"page": 1
|
115
|
+
}
|
116
|
+
|
117
|
+
# Add sort parameters based on search type
|
118
|
+
if self.search_type == "repositories":
|
119
|
+
params["sort"] = "stars"
|
120
|
+
params["order"] = "desc"
|
121
|
+
elif self.search_type == "code":
|
122
|
+
params["sort"] = "indexed"
|
123
|
+
params["order"] = "desc"
|
124
|
+
elif self.search_type == "issues":
|
125
|
+
params["sort"] = "updated"
|
126
|
+
params["order"] = "desc"
|
127
|
+
elif self.search_type == "users":
|
128
|
+
params["sort"] = "followers"
|
129
|
+
params["order"] = "desc"
|
130
|
+
|
131
|
+
# Execute the API request
|
132
|
+
response = requests.get(
|
133
|
+
self.search_endpoint,
|
134
|
+
headers=self.headers,
|
135
|
+
params=params
|
136
|
+
)
|
137
|
+
|
138
|
+
# Check for rate limiting
|
139
|
+
self._handle_rate_limits(response)
|
140
|
+
|
141
|
+
# Handle response with detailed logging
|
142
|
+
if response.status_code == 200:
|
143
|
+
data = response.json()
|
144
|
+
total_count = data.get("total_count", 0)
|
145
|
+
results = data.get("items", [])
|
146
|
+
logger.info(f"GitHub search returned {len(results)} results (total available: {total_count})")
|
147
|
+
|
148
|
+
# Log the rate limit information
|
149
|
+
rate_limit_remaining = response.headers.get("X-RateLimit-Remaining", "unknown")
|
150
|
+
rate_limit_reset = response.headers.get("X-RateLimit-Reset", "unknown")
|
151
|
+
logger.info(f"GitHub API rate limit: {rate_limit_remaining} requests remaining")
|
152
|
+
|
153
|
+
# If no results, try to provide more guidance
|
154
|
+
if not results:
|
155
|
+
logger.warning(f"No results found. Consider these search tips:")
|
156
|
+
logger.warning(f"1. Use shorter, more specific queries")
|
157
|
+
logger.warning(f"2. For repositories, try adding 'stars:>100' or 'language:python'")
|
158
|
+
logger.warning(f"3. For contribution opportunities, search for 'good-first-issue' or 'help-wanted'")
|
159
|
+
else:
|
160
|
+
logger.error(f"GitHub API error: {response.status_code} - {response.text}")
|
161
|
+
|
162
|
+
except Exception as e:
|
163
|
+
logger.error(f"Error searching GitHub: {e}")
|
164
|
+
|
165
|
+
return results
|
166
|
+
|
167
|
+
def _get_readme_content(self, repo_full_name: str) -> str:
|
168
|
+
"""
|
169
|
+
Get README content for a repository.
|
170
|
+
|
171
|
+
Args:
|
172
|
+
repo_full_name: Full name of the repository (owner/repo)
|
173
|
+
|
174
|
+
Returns:
|
175
|
+
Decoded README content or empty string if not found
|
176
|
+
"""
|
177
|
+
try:
|
178
|
+
# Get README
|
179
|
+
response = requests.get(
|
180
|
+
f"{self.api_base}/repos/{repo_full_name}/readme",
|
181
|
+
headers=self.headers
|
182
|
+
)
|
183
|
+
|
184
|
+
# Check for rate limiting
|
185
|
+
self._handle_rate_limits(response)
|
186
|
+
|
187
|
+
if response.status_code == 200:
|
188
|
+
data = response.json()
|
189
|
+
content = data.get("content", "")
|
190
|
+
encoding = data.get("encoding", "")
|
191
|
+
|
192
|
+
if encoding == "base64" and content:
|
193
|
+
return base64.b64decode(content).decode('utf-8', errors='replace')
|
194
|
+
return content
|
195
|
+
else:
|
196
|
+
logger.warning(f"Could not get README for {repo_full_name}: {response.status_code}")
|
197
|
+
return ""
|
198
|
+
|
199
|
+
except Exception as e:
|
200
|
+
logger.error(f"Error getting README for {repo_full_name}: {e}")
|
201
|
+
return ""
|
202
|
+
|
203
|
+
def _get_recent_issues(self, repo_full_name: str, limit: int = 5) -> List[Dict[str, Any]]:
|
204
|
+
"""
|
205
|
+
Get recent issues for a repository.
|
206
|
+
|
207
|
+
Args:
|
208
|
+
repo_full_name: Full name of the repository (owner/repo)
|
209
|
+
limit: Maximum number of issues to return
|
210
|
+
|
211
|
+
Returns:
|
212
|
+
List of recent issues
|
213
|
+
"""
|
214
|
+
issues = []
|
215
|
+
|
216
|
+
try:
|
217
|
+
# Get recent issues
|
218
|
+
response = requests.get(
|
219
|
+
f"{self.api_base}/repos/{repo_full_name}/issues",
|
220
|
+
headers=self.headers,
|
221
|
+
params={
|
222
|
+
"state": "all",
|
223
|
+
"per_page": limit,
|
224
|
+
"sort": "updated",
|
225
|
+
"direction": "desc"
|
226
|
+
}
|
227
|
+
)
|
228
|
+
|
229
|
+
# Check for rate limiting
|
230
|
+
self._handle_rate_limits(response)
|
231
|
+
|
232
|
+
if response.status_code == 200:
|
233
|
+
issues = response.json()
|
234
|
+
logger.info(f"Got {len(issues)} recent issues for {repo_full_name}")
|
235
|
+
else:
|
236
|
+
logger.warning(f"Could not get issues for {repo_full_name}: {response.status_code}")
|
237
|
+
|
238
|
+
except Exception as e:
|
239
|
+
logger.error(f"Error getting issues for {repo_full_name}: {e}")
|
240
|
+
|
241
|
+
return issues
|
242
|
+
|
243
|
+
def _get_file_content(self, file_url: str) -> str:
|
244
|
+
"""
|
245
|
+
Get content of a file from GitHub.
|
246
|
+
|
247
|
+
Args:
|
248
|
+
file_url: API URL for the file
|
249
|
+
|
250
|
+
Returns:
|
251
|
+
Decoded file content or empty string if not found
|
252
|
+
"""
|
253
|
+
try:
|
254
|
+
# Get file content
|
255
|
+
response = requests.get(
|
256
|
+
file_url,
|
257
|
+
headers=self.headers
|
258
|
+
)
|
259
|
+
|
260
|
+
# Check for rate limiting
|
261
|
+
self._handle_rate_limits(response)
|
262
|
+
|
263
|
+
if response.status_code == 200:
|
264
|
+
data = response.json()
|
265
|
+
content = data.get("content", "")
|
266
|
+
encoding = data.get("encoding", "")
|
267
|
+
|
268
|
+
if encoding == "base64" and content:
|
269
|
+
return base64.b64decode(content).decode('utf-8', errors='replace')
|
270
|
+
return content
|
271
|
+
else:
|
272
|
+
logger.warning(f"Could not get file content: {response.status_code}")
|
273
|
+
return ""
|
274
|
+
|
275
|
+
except Exception as e:
|
276
|
+
logger.error(f"Error getting file content: {e}")
|
277
|
+
return ""
|
278
|
+
|
279
|
+
def _format_repository_preview(self, repo: Dict[str, Any]) -> Dict[str, Any]:
|
280
|
+
"""Format repository search result as preview"""
|
281
|
+
return {
|
282
|
+
"id": str(repo.get("id", "")),
|
283
|
+
"title": repo.get("full_name", ""),
|
284
|
+
"link": repo.get("html_url", ""),
|
285
|
+
"snippet": repo.get("description", "No description provided"),
|
286
|
+
"stars": repo.get("stargazers_count", 0),
|
287
|
+
"forks": repo.get("forks_count", 0),
|
288
|
+
"language": repo.get("language", ""),
|
289
|
+
"updated_at": repo.get("updated_at", ""),
|
290
|
+
"created_at": repo.get("created_at", ""),
|
291
|
+
"topics": repo.get("topics", []),
|
292
|
+
"owner": repo.get("owner", {}).get("login", ""),
|
293
|
+
"is_fork": repo.get("fork", False),
|
294
|
+
"search_type": "repository",
|
295
|
+
"repo_full_name": repo.get("full_name", "")
|
296
|
+
}
|
297
|
+
|
298
|
+
def _format_code_preview(self, code: Dict[str, Any]) -> Dict[str, Any]:
|
299
|
+
"""Format code search result as preview"""
|
300
|
+
repo = code.get("repository", {})
|
301
|
+
return {
|
302
|
+
"id": f"code_{code.get('sha', '')}",
|
303
|
+
"title": f"{code.get('name', '')} in {repo.get('full_name', '')}",
|
304
|
+
"link": code.get("html_url", ""),
|
305
|
+
"snippet": f"Match in {code.get('path', '')}",
|
306
|
+
"path": code.get("path", ""),
|
307
|
+
"repo_name": repo.get("full_name", ""),
|
308
|
+
"repo_url": repo.get("html_url", ""),
|
309
|
+
"search_type": "code",
|
310
|
+
"file_url": code.get("url", "")
|
311
|
+
}
|
312
|
+
|
313
|
+
def _format_issue_preview(self, issue: Dict[str, Any]) -> Dict[str, Any]:
|
314
|
+
"""Format issue search result as preview"""
|
315
|
+
repo = issue.get("repository", {}) if "repository" in issue else {"full_name": ""}
|
316
|
+
return {
|
317
|
+
"id": f"issue_{issue.get('number', '')}",
|
318
|
+
"title": issue.get("title", ""),
|
319
|
+
"link": issue.get("html_url", ""),
|
320
|
+
"snippet": issue.get("body", "")[:200] + "..." if len(issue.get("body", "")) > 200 else issue.get("body", ""),
|
321
|
+
"state": issue.get("state", ""),
|
322
|
+
"created_at": issue.get("created_at", ""),
|
323
|
+
"updated_at": issue.get("updated_at", ""),
|
324
|
+
"user": issue.get("user", {}).get("login", ""),
|
325
|
+
"comments": issue.get("comments", 0),
|
326
|
+
"search_type": "issue",
|
327
|
+
"repo_name": repo.get("full_name", "")
|
328
|
+
}
|
329
|
+
|
330
|
+
def _format_user_preview(self, user: Dict[str, Any]) -> Dict[str, Any]:
|
331
|
+
"""Format user search result as preview"""
|
332
|
+
return {
|
333
|
+
"id": f"user_{user.get('id', '')}",
|
334
|
+
"title": user.get("login", ""),
|
335
|
+
"link": user.get("html_url", ""),
|
336
|
+
"snippet": user.get("bio", "No bio provided"),
|
337
|
+
"name": user.get("name", ""),
|
338
|
+
"followers": user.get("followers", 0),
|
339
|
+
"public_repos": user.get("public_repos", 0),
|
340
|
+
"location": user.get("location", ""),
|
341
|
+
"search_type": "user",
|
342
|
+
"user_login": user.get("login", "")
|
343
|
+
}
|
344
|
+
|
345
|
+
def _get_previews(self, query: str) -> List[Dict[str, Any]]:
|
346
|
+
"""
|
347
|
+
Get preview information for GitHub search results.
|
348
|
+
|
349
|
+
Args:
|
350
|
+
query: The search query
|
351
|
+
|
352
|
+
Returns:
|
353
|
+
List of preview dictionaries
|
354
|
+
"""
|
355
|
+
logger.info(f"Getting GitHub previews for query: {query}")
|
356
|
+
|
357
|
+
# For contribution-focused queries, automatically adjust search type and add filters
|
358
|
+
if any(term in query.lower() for term in ["contribute", "contributing", "contribution", "beginner", "newcomer"]):
|
359
|
+
# Use repositories search with help-wanted or good-first-issue labels
|
360
|
+
original_search_type = self.search_type
|
361
|
+
self.search_type = "repositories"
|
362
|
+
self.search_endpoint = f"{self.api_base}/search/repositories"
|
363
|
+
|
364
|
+
# Create a specialized query for finding beginner-friendly projects
|
365
|
+
specialized_query = "good-first-issues:>5 is:public archived:false"
|
366
|
+
|
367
|
+
# Extract language preferences if present
|
368
|
+
languages = []
|
369
|
+
for lang in ["python", "javascript", "java", "rust", "go", "typescript", "c#", "c++", "ruby"]:
|
370
|
+
if lang in query.lower():
|
371
|
+
languages.append(lang)
|
372
|
+
|
373
|
+
if languages:
|
374
|
+
specialized_query += f" language:{' language:'.join(languages)}"
|
375
|
+
|
376
|
+
# Extract keywords
|
377
|
+
keywords = [word for word in query.split() if len(word) > 3 and word.lower() not in
|
378
|
+
["recommend", "recommended", "github", "repositories", "looking",
|
379
|
+
"developers", "contribute", "contributing", "beginner", "newcomer"]]
|
380
|
+
|
381
|
+
if keywords:
|
382
|
+
specialized_query += " " + " ".join(keywords[:5]) # Add up to 5 keywords
|
383
|
+
|
384
|
+
logger.info(f"Using specialized contribution query: {specialized_query}")
|
385
|
+
|
386
|
+
# Perform GitHub search with specialized query
|
387
|
+
results = self._search_github(specialized_query)
|
388
|
+
|
389
|
+
# Restore original search type
|
390
|
+
self.search_type = original_search_type
|
391
|
+
self.search_endpoint = f"{self.api_base}/search/{self.search_type}"
|
392
|
+
else:
|
393
|
+
# Perform standard GitHub search
|
394
|
+
results = self._search_github(query)
|
395
|
+
|
396
|
+
if not results:
|
397
|
+
logger.warning(f"No GitHub results found for query: {query}")
|
398
|
+
return []
|
399
|
+
|
400
|
+
# Format results as previews
|
401
|
+
previews = []
|
402
|
+
for result in results:
|
403
|
+
# Format based on search type
|
404
|
+
if self.search_type == "repositories":
|
405
|
+
preview = self._format_repository_preview(result)
|
406
|
+
elif self.search_type == "code":
|
407
|
+
preview = self._format_code_preview(result)
|
408
|
+
elif self.search_type == "issues":
|
409
|
+
preview = self._format_issue_preview(result)
|
410
|
+
elif self.search_type == "users":
|
411
|
+
preview = self._format_user_preview(result)
|
412
|
+
else:
|
413
|
+
logger.warning(f"Unknown search type: {self.search_type}")
|
414
|
+
continue
|
415
|
+
|
416
|
+
previews.append(preview)
|
417
|
+
|
418
|
+
logger.info(f"Formatted {len(previews)} GitHub preview results")
|
419
|
+
return previews
|
420
|
+
|
421
|
+
def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
422
|
+
"""
|
423
|
+
Get full content for the relevant GitHub search results.
|
424
|
+
|
425
|
+
Args:
|
426
|
+
relevant_items: List of relevant preview dictionaries
|
427
|
+
|
428
|
+
Returns:
|
429
|
+
List of result dictionaries with full content
|
430
|
+
"""
|
431
|
+
# Check if we should add full content
|
432
|
+
if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
|
433
|
+
logger.info("Snippet-only mode, skipping full content retrieval")
|
434
|
+
return relevant_items
|
435
|
+
|
436
|
+
logger.info(f"Getting full content for {len(relevant_items)} GitHub results")
|
437
|
+
|
438
|
+
results = []
|
439
|
+
for item in relevant_items:
|
440
|
+
result = item.copy()
|
441
|
+
search_type = item.get("search_type", "")
|
442
|
+
|
443
|
+
# Add content based on search type
|
444
|
+
if search_type == "repository" and self.include_readme:
|
445
|
+
repo_full_name = item.get("repo_full_name", "")
|
446
|
+
if repo_full_name:
|
447
|
+
# Get README content
|
448
|
+
readme_content = self._get_readme_content(repo_full_name)
|
449
|
+
result["full_content"] = readme_content
|
450
|
+
result["content_type"] = "readme"
|
451
|
+
|
452
|
+
# Get recent issues if requested
|
453
|
+
if self.include_issues:
|
454
|
+
issues = self._get_recent_issues(repo_full_name)
|
455
|
+
result["recent_issues"] = issues
|
456
|
+
|
457
|
+
elif search_type == "code":
|
458
|
+
file_url = item.get("file_url", "")
|
459
|
+
if file_url:
|
460
|
+
# Get file content
|
461
|
+
file_content = self._get_file_content(file_url)
|
462
|
+
result["full_content"] = file_content
|
463
|
+
result["content_type"] = "file"
|
464
|
+
|
465
|
+
elif search_type == "issue":
|
466
|
+
# For issues, the snippet usually contains a summary already
|
467
|
+
# We'll just keep it as is
|
468
|
+
result["full_content"] = item.get("snippet", "")
|
469
|
+
result["content_type"] = "issue"
|
470
|
+
|
471
|
+
elif search_type == "user":
|
472
|
+
# For users, construct a profile summary
|
473
|
+
user_login = item.get("user_login", "")
|
474
|
+
profile_summary = f"GitHub user: {item.get('title', '')}\n"
|
475
|
+
|
476
|
+
if item.get("name"):
|
477
|
+
profile_summary += f"Name: {item.get('name')}\n"
|
478
|
+
|
479
|
+
if item.get("location"):
|
480
|
+
profile_summary += f"Location: {item.get('location')}\n"
|
481
|
+
|
482
|
+
profile_summary += f"Followers: {item.get('followers', 0)}\n"
|
483
|
+
profile_summary += f"Public repositories: {item.get('public_repos', 0)}\n"
|
484
|
+
|
485
|
+
if item.get("snippet") and item.get("snippet") != "No bio provided":
|
486
|
+
profile_summary += f"\nBio: {item.get('snippet')}\n"
|
487
|
+
|
488
|
+
result["full_content"] = profile_summary
|
489
|
+
result["content_type"] = "user_profile"
|
490
|
+
|
491
|
+
results.append(result)
|
492
|
+
|
493
|
+
return results
|
494
|
+
|
495
|
+
def search_repository(self, repo_owner: str, repo_name: str) -> Dict[str, Any]:
|
496
|
+
"""
|
497
|
+
Get detailed information about a specific repository.
|
498
|
+
|
499
|
+
Args:
|
500
|
+
repo_owner: Owner of the repository
|
501
|
+
repo_name: Name of the repository
|
502
|
+
|
503
|
+
Returns:
|
504
|
+
Dictionary with repository information
|
505
|
+
"""
|
506
|
+
repo_full_name = f"{repo_owner}/{repo_name}"
|
507
|
+
logger.info(f"Getting details for repository: {repo_full_name}")
|
508
|
+
|
509
|
+
try:
|
510
|
+
# Get repository details
|
511
|
+
response = requests.get(
|
512
|
+
f"{self.api_base}/repos/{repo_full_name}",
|
513
|
+
headers=self.headers
|
514
|
+
)
|
515
|
+
|
516
|
+
# Check for rate limiting
|
517
|
+
self._handle_rate_limits(response)
|
518
|
+
|
519
|
+
if response.status_code == 200:
|
520
|
+
repo = response.json()
|
521
|
+
|
522
|
+
# Format as repository preview
|
523
|
+
result = self._format_repository_preview(repo)
|
524
|
+
|
525
|
+
# Add README content if requested
|
526
|
+
if self.include_readme:
|
527
|
+
readme_content = self._get_readme_content(repo_full_name)
|
528
|
+
result["full_content"] = readme_content
|
529
|
+
result["content_type"] = "readme"
|
530
|
+
|
531
|
+
# Add recent issues if requested
|
532
|
+
if self.include_issues:
|
533
|
+
issues = self._get_recent_issues(repo_full_name)
|
534
|
+
result["recent_issues"] = issues
|
535
|
+
|
536
|
+
return result
|
537
|
+
else:
|
538
|
+
logger.error(f"Error getting repository details: {response.status_code} - {response.text}")
|
539
|
+
return {}
|
540
|
+
|
541
|
+
except Exception as e:
|
542
|
+
logger.error(f"Error getting repository details: {e}")
|
543
|
+
return {}
|
544
|
+
|
545
|
+
def search_code(self, query: str, language: Optional[str] = None, user: Optional[str] = None) -> List[Dict[str, Any]]:
|
546
|
+
"""
|
547
|
+
Search for code with more specific parameters.
|
548
|
+
|
549
|
+
Args:
|
550
|
+
query: Code search query
|
551
|
+
language: Filter by programming language
|
552
|
+
user: Filter by GitHub username/organization
|
553
|
+
|
554
|
+
Returns:
|
555
|
+
List of code search results
|
556
|
+
"""
|
557
|
+
# Build advanced query
|
558
|
+
advanced_query = query
|
559
|
+
|
560
|
+
if language:
|
561
|
+
advanced_query += f" language:{language}"
|
562
|
+
|
563
|
+
if user:
|
564
|
+
advanced_query += f" user:{user}"
|
565
|
+
|
566
|
+
# Save current search type
|
567
|
+
original_search_type = self.search_type
|
568
|
+
|
569
|
+
try:
|
570
|
+
# Set search type to code
|
571
|
+
self.search_type = "code"
|
572
|
+
self.search_endpoint = f"{self.api_base}/search/code"
|
573
|
+
|
574
|
+
# Perform search
|
575
|
+
results = self._search_github(advanced_query)
|
576
|
+
|
577
|
+
# Format results
|
578
|
+
previews = [self._format_code_preview(result) for result in results]
|
579
|
+
|
580
|
+
# Get full content if requested
|
581
|
+
if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and not config.SEARCH_SNIPPETS_ONLY:
|
582
|
+
return self._get_full_content(previews)
|
583
|
+
|
584
|
+
return previews
|
585
|
+
|
586
|
+
finally:
|
587
|
+
# Restore original search type
|
588
|
+
self.search_type = original_search_type
|
589
|
+
self.search_endpoint = f"{self.api_base}/search/{self.search_type}"
|
590
|
+
|
591
|
+
def search_issues(self, query: str, state: str = "open", sort: str = "updated") -> List[Dict[str, Any]]:
|
592
|
+
"""
|
593
|
+
Search for issues with more specific parameters.
|
594
|
+
|
595
|
+
Args:
|
596
|
+
query: Issue search query
|
597
|
+
state: Filter by issue state ("open", "closed", "all")
|
598
|
+
sort: Sort order ("updated", "created", "comments")
|
599
|
+
|
600
|
+
Returns:
|
601
|
+
List of issue search results
|
602
|
+
"""
|
603
|
+
# Build advanced query
|
604
|
+
advanced_query = query + f" state:{state}"
|
605
|
+
|
606
|
+
# Save current search type
|
607
|
+
original_search_type = self.search_type
|
608
|
+
|
609
|
+
try:
|
610
|
+
# Set search type to issues
|
611
|
+
self.search_type = "issues"
|
612
|
+
self.search_endpoint = f"{self.api_base}/search/issues"
|
613
|
+
|
614
|
+
# Set sort parameter
|
615
|
+
params = {
|
616
|
+
"q": advanced_query,
|
617
|
+
"per_page": min(self.max_results, 100),
|
618
|
+
"page": 1,
|
619
|
+
"sort": sort,
|
620
|
+
"order": "desc"
|
621
|
+
}
|
622
|
+
|
623
|
+
# Perform search
|
624
|
+
response = requests.get(
|
625
|
+
self.search_endpoint,
|
626
|
+
headers=self.headers,
|
627
|
+
params=params
|
628
|
+
)
|
629
|
+
|
630
|
+
# Check for rate limiting
|
631
|
+
self._handle_rate_limits(response)
|
632
|
+
|
633
|
+
if response.status_code == 200:
|
634
|
+
data = response.json()
|
635
|
+
results = data.get("items", [])
|
636
|
+
|
637
|
+
# Format results
|
638
|
+
previews = [self._format_issue_preview(result) for result in results]
|
639
|
+
|
640
|
+
# For issues, we don't need to get full content
|
641
|
+
return previews
|
642
|
+
else:
|
643
|
+
logger.error(f"GitHub API error: {response.status_code} - {response.text}")
|
644
|
+
return []
|
645
|
+
|
646
|
+
finally:
|
647
|
+
# Restore original search type
|
648
|
+
self.search_type = original_search_type
|
649
|
+
self.search_endpoint = f"{self.api_base}/search/{self.search_type}"
|
650
|
+
|
651
|
+
def set_search_type(self, search_type: str):
|
652
|
+
"""
|
653
|
+
Set the search type for subsequent searches.
|
654
|
+
|
655
|
+
Args:
|
656
|
+
search_type: Type of GitHub search ("repositories", "code", "issues", "users")
|
657
|
+
"""
|
658
|
+
if search_type in ["repositories", "code", "issues", "users"]:
|
659
|
+
self.search_type = search_type
|
660
|
+
self.search_endpoint = f"{self.api_base}/search/{search_type}"
|
661
|
+
logger.info(f"Set GitHub search type to: {search_type}")
|
662
|
+
else:
|
663
|
+
logger.error(f"Invalid GitHub search type: {search_type}")
|