local-deep-research 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. local_deep_research/__init__.py +24 -0
  2. local_deep_research/citation_handler.py +113 -0
  3. local_deep_research/config.py +166 -0
  4. local_deep_research/defaults/__init__.py +44 -0
  5. local_deep_research/defaults/llm_config.py +269 -0
  6. local_deep_research/defaults/local_collections.toml +47 -0
  7. local_deep_research/defaults/main.toml +57 -0
  8. local_deep_research/defaults/search_engines.toml +244 -0
  9. local_deep_research/local_collections.py +141 -0
  10. local_deep_research/main.py +113 -0
  11. local_deep_research/report_generator.py +206 -0
  12. local_deep_research/search_system.py +241 -0
  13. local_deep_research/utilties/__init__.py +0 -0
  14. local_deep_research/utilties/enums.py +9 -0
  15. local_deep_research/utilties/llm_utils.py +116 -0
  16. local_deep_research/utilties/search_utilities.py +115 -0
  17. local_deep_research/utilties/setup_utils.py +6 -0
  18. local_deep_research/web/__init__.py +2 -0
  19. local_deep_research/web/app.py +1209 -0
  20. local_deep_research/web/static/css/styles.css +1008 -0
  21. local_deep_research/web/static/js/app.js +2078 -0
  22. local_deep_research/web/templates/api_keys_config.html +82 -0
  23. local_deep_research/web/templates/collections_config.html +90 -0
  24. local_deep_research/web/templates/index.html +312 -0
  25. local_deep_research/web/templates/llm_config.html +120 -0
  26. local_deep_research/web/templates/main_config.html +89 -0
  27. local_deep_research/web/templates/search_engines_config.html +154 -0
  28. local_deep_research/web/templates/settings.html +519 -0
  29. local_deep_research/web/templates/settings_dashboard.html +207 -0
  30. local_deep_research/web_search_engines/__init__.py +0 -0
  31. local_deep_research/web_search_engines/engines/__init__.py +0 -0
  32. local_deep_research/web_search_engines/engines/full_search.py +128 -0
  33. local_deep_research/web_search_engines/engines/meta_search_engine.py +274 -0
  34. local_deep_research/web_search_engines/engines/search_engine_arxiv.py +367 -0
  35. local_deep_research/web_search_engines/engines/search_engine_brave.py +245 -0
  36. local_deep_research/web_search_engines/engines/search_engine_ddg.py +123 -0
  37. local_deep_research/web_search_engines/engines/search_engine_github.py +663 -0
  38. local_deep_research/web_search_engines/engines/search_engine_google_pse.py +283 -0
  39. local_deep_research/web_search_engines/engines/search_engine_guardian.py +337 -0
  40. local_deep_research/web_search_engines/engines/search_engine_local.py +901 -0
  41. local_deep_research/web_search_engines/engines/search_engine_local_all.py +153 -0
  42. local_deep_research/web_search_engines/engines/search_engine_medrxiv.py +623 -0
  43. local_deep_research/web_search_engines/engines/search_engine_pubmed.py +992 -0
  44. local_deep_research/web_search_engines/engines/search_engine_serpapi.py +230 -0
  45. local_deep_research/web_search_engines/engines/search_engine_wayback.py +474 -0
  46. local_deep_research/web_search_engines/engines/search_engine_wikipedia.py +242 -0
  47. local_deep_research/web_search_engines/full_search.py +254 -0
  48. local_deep_research/web_search_engines/search_engine_base.py +197 -0
  49. local_deep_research/web_search_engines/search_engine_factory.py +233 -0
  50. local_deep_research/web_search_engines/search_engines_config.py +54 -0
  51. local_deep_research-0.1.0.dist-info/LICENSE +21 -0
  52. local_deep_research-0.1.0.dist-info/METADATA +328 -0
  53. local_deep_research-0.1.0.dist-info/RECORD +56 -0
  54. local_deep_research-0.1.0.dist-info/WHEEL +5 -0
  55. local_deep_research-0.1.0.dist-info/entry_points.txt +3 -0
  56. local_deep_research-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,663 @@
1
+ import requests
2
+ import logging
3
+ import base64
4
+ import os
5
+ import time
6
+ from typing import Dict, List, Any, Optional, Union
7
+ from langchain_core.language_models import BaseLLM
8
+
9
+ from local_deep_research.web_search_engines.search_engine_base import BaseSearchEngine
10
+ from local_deep_research import config
11
+
12
+ # Setup logging
13
+ logging.basicConfig(level=logging.INFO)
14
+ logger = logging.getLogger(__name__)
15
+
16
+ class GitHubSearchEngine(BaseSearchEngine):
17
+ """
18
+ GitHub search engine implementation.
19
+ Provides search across GitHub repositories, code, issues, and users.
20
+ """
21
+
22
+ def __init__(self,
23
+ max_results: int = 15,
24
+ api_key: Optional[str] = None,
25
+ search_type: str = "repositories",
26
+ include_readme: bool = True,
27
+ include_issues: bool = False,
28
+ llm: Optional[BaseLLM] = None,
29
+ max_filtered_results: Optional[int] = None):
30
+ """
31
+ Initialize the GitHub search engine.
32
+
33
+ Args:
34
+ max_results: Maximum number of search results
35
+ api_key: GitHub API token (can also be set in GITHUB_API_KEY env)
36
+ search_type: Type of GitHub search ("repositories", "code", "issues", "users")
37
+ include_readme: Whether to include README content for repositories
38
+ include_issues: Whether to include recent issues for repositories
39
+ llm: Language model for relevance filtering
40
+ max_filtered_results: Maximum number of results to keep after filtering
41
+ """
42
+ # Initialize the BaseSearchEngine with the LLM and max_filtered_results
43
+ super().__init__(llm=llm, max_filtered_results=max_filtered_results)
44
+
45
+ self.max_results = max_results
46
+ self.api_key = api_key or os.getenv("GITHUB_API_KEY")
47
+ self.search_type = search_type
48
+ self.include_readme = include_readme
49
+ self.include_issues = include_issues
50
+
51
+ # API endpoints
52
+ self.api_base = "https://api.github.com"
53
+ self.search_endpoint = f"{self.api_base}/search/{search_type}"
54
+
55
+ # Set up API headers
56
+ self.headers = {
57
+ "Accept": "application/vnd.github.v3+json",
58
+ "User-Agent": "Local-Deep-Research-Agent"
59
+ }
60
+
61
+ # Add authentication if API key provided
62
+ if self.api_key:
63
+ self.headers["Authorization"] = f"token {self.api_key}"
64
+ logger.info("Using authenticated GitHub API requests")
65
+ else:
66
+ logger.warning("No GitHub API key provided. Rate limits will be restricted.")
67
+
68
+ def _handle_rate_limits(self, response):
69
+ """Handle GitHub API rate limits by logging warnings and sleeping if necessary"""
70
+ remaining = int(response.headers.get("X-RateLimit-Remaining", 60))
71
+ reset_time = int(response.headers.get("X-RateLimit-Reset", 0))
72
+
73
+ if remaining < 5:
74
+ current_time = time.time()
75
+ wait_time = max(reset_time - current_time, 0)
76
+ logger.warning(f"GitHub API rate limit almost reached. {remaining} requests remaining.")
77
+
78
+ if wait_time > 0 and remaining == 0:
79
+ logger.warning(f"GitHub API rate limit exceeded. Waiting {wait_time:.0f} seconds.")
80
+ time.sleep(min(wait_time, 60)) # Wait at most 60 seconds
81
+
82
+ def _search_github(self, query: str) -> List[Dict[str, Any]]:
83
+ """
84
+ Perform a GitHub search based on the configured search type.
85
+
86
+ Args:
87
+ query: The search query
88
+
89
+ Returns:
90
+ List of GitHub search result items
91
+ """
92
+ results = []
93
+
94
+ try:
95
+ # Optimize GitHub query format
96
+ github_query = query
97
+
98
+ # For long queries, focus on keywords and add filters for better results
99
+ if len(query) > 80:
100
+ # Extract key terms if it's a recommendation request
101
+ if "recommend" in query.lower() or "looking for" in query.lower():
102
+ github_query = "stars:>100 " + " ".join([
103
+ word for word in query.split()
104
+ if len(word) > 3 and word.lower() not in
105
+ ["recommend", "recommended", "github", "repositories", "looking", "developers"]
106
+ ])
107
+
108
+ logger.info(f"Optimized GitHub query: {github_query}")
109
+
110
+ # Construct search parameters
111
+ params = {
112
+ "q": github_query,
113
+ "per_page": min(self.max_results, 100), # GitHub API max is 100 per page
114
+ "page": 1
115
+ }
116
+
117
+ # Add sort parameters based on search type
118
+ if self.search_type == "repositories":
119
+ params["sort"] = "stars"
120
+ params["order"] = "desc"
121
+ elif self.search_type == "code":
122
+ params["sort"] = "indexed"
123
+ params["order"] = "desc"
124
+ elif self.search_type == "issues":
125
+ params["sort"] = "updated"
126
+ params["order"] = "desc"
127
+ elif self.search_type == "users":
128
+ params["sort"] = "followers"
129
+ params["order"] = "desc"
130
+
131
+ # Execute the API request
132
+ response = requests.get(
133
+ self.search_endpoint,
134
+ headers=self.headers,
135
+ params=params
136
+ )
137
+
138
+ # Check for rate limiting
139
+ self._handle_rate_limits(response)
140
+
141
+ # Handle response with detailed logging
142
+ if response.status_code == 200:
143
+ data = response.json()
144
+ total_count = data.get("total_count", 0)
145
+ results = data.get("items", [])
146
+ logger.info(f"GitHub search returned {len(results)} results (total available: {total_count})")
147
+
148
+ # Log the rate limit information
149
+ rate_limit_remaining = response.headers.get("X-RateLimit-Remaining", "unknown")
150
+ rate_limit_reset = response.headers.get("X-RateLimit-Reset", "unknown")
151
+ logger.info(f"GitHub API rate limit: {rate_limit_remaining} requests remaining")
152
+
153
+ # If no results, try to provide more guidance
154
+ if not results:
155
+ logger.warning(f"No results found. Consider these search tips:")
156
+ logger.warning(f"1. Use shorter, more specific queries")
157
+ logger.warning(f"2. For repositories, try adding 'stars:>100' or 'language:python'")
158
+ logger.warning(f"3. For contribution opportunities, search for 'good-first-issue' or 'help-wanted'")
159
+ else:
160
+ logger.error(f"GitHub API error: {response.status_code} - {response.text}")
161
+
162
+ except Exception as e:
163
+ logger.error(f"Error searching GitHub: {e}")
164
+
165
+ return results
166
+
167
+ def _get_readme_content(self, repo_full_name: str) -> str:
168
+ """
169
+ Get README content for a repository.
170
+
171
+ Args:
172
+ repo_full_name: Full name of the repository (owner/repo)
173
+
174
+ Returns:
175
+ Decoded README content or empty string if not found
176
+ """
177
+ try:
178
+ # Get README
179
+ response = requests.get(
180
+ f"{self.api_base}/repos/{repo_full_name}/readme",
181
+ headers=self.headers
182
+ )
183
+
184
+ # Check for rate limiting
185
+ self._handle_rate_limits(response)
186
+
187
+ if response.status_code == 200:
188
+ data = response.json()
189
+ content = data.get("content", "")
190
+ encoding = data.get("encoding", "")
191
+
192
+ if encoding == "base64" and content:
193
+ return base64.b64decode(content).decode('utf-8', errors='replace')
194
+ return content
195
+ else:
196
+ logger.warning(f"Could not get README for {repo_full_name}: {response.status_code}")
197
+ return ""
198
+
199
+ except Exception as e:
200
+ logger.error(f"Error getting README for {repo_full_name}: {e}")
201
+ return ""
202
+
203
+ def _get_recent_issues(self, repo_full_name: str, limit: int = 5) -> List[Dict[str, Any]]:
204
+ """
205
+ Get recent issues for a repository.
206
+
207
+ Args:
208
+ repo_full_name: Full name of the repository (owner/repo)
209
+ limit: Maximum number of issues to return
210
+
211
+ Returns:
212
+ List of recent issues
213
+ """
214
+ issues = []
215
+
216
+ try:
217
+ # Get recent issues
218
+ response = requests.get(
219
+ f"{self.api_base}/repos/{repo_full_name}/issues",
220
+ headers=self.headers,
221
+ params={
222
+ "state": "all",
223
+ "per_page": limit,
224
+ "sort": "updated",
225
+ "direction": "desc"
226
+ }
227
+ )
228
+
229
+ # Check for rate limiting
230
+ self._handle_rate_limits(response)
231
+
232
+ if response.status_code == 200:
233
+ issues = response.json()
234
+ logger.info(f"Got {len(issues)} recent issues for {repo_full_name}")
235
+ else:
236
+ logger.warning(f"Could not get issues for {repo_full_name}: {response.status_code}")
237
+
238
+ except Exception as e:
239
+ logger.error(f"Error getting issues for {repo_full_name}: {e}")
240
+
241
+ return issues
242
+
243
+ def _get_file_content(self, file_url: str) -> str:
244
+ """
245
+ Get content of a file from GitHub.
246
+
247
+ Args:
248
+ file_url: API URL for the file
249
+
250
+ Returns:
251
+ Decoded file content or empty string if not found
252
+ """
253
+ try:
254
+ # Get file content
255
+ response = requests.get(
256
+ file_url,
257
+ headers=self.headers
258
+ )
259
+
260
+ # Check for rate limiting
261
+ self._handle_rate_limits(response)
262
+
263
+ if response.status_code == 200:
264
+ data = response.json()
265
+ content = data.get("content", "")
266
+ encoding = data.get("encoding", "")
267
+
268
+ if encoding == "base64" and content:
269
+ return base64.b64decode(content).decode('utf-8', errors='replace')
270
+ return content
271
+ else:
272
+ logger.warning(f"Could not get file content: {response.status_code}")
273
+ return ""
274
+
275
+ except Exception as e:
276
+ logger.error(f"Error getting file content: {e}")
277
+ return ""
278
+
279
+ def _format_repository_preview(self, repo: Dict[str, Any]) -> Dict[str, Any]:
280
+ """Format repository search result as preview"""
281
+ return {
282
+ "id": str(repo.get("id", "")),
283
+ "title": repo.get("full_name", ""),
284
+ "link": repo.get("html_url", ""),
285
+ "snippet": repo.get("description", "No description provided"),
286
+ "stars": repo.get("stargazers_count", 0),
287
+ "forks": repo.get("forks_count", 0),
288
+ "language": repo.get("language", ""),
289
+ "updated_at": repo.get("updated_at", ""),
290
+ "created_at": repo.get("created_at", ""),
291
+ "topics": repo.get("topics", []),
292
+ "owner": repo.get("owner", {}).get("login", ""),
293
+ "is_fork": repo.get("fork", False),
294
+ "search_type": "repository",
295
+ "repo_full_name": repo.get("full_name", "")
296
+ }
297
+
298
+ def _format_code_preview(self, code: Dict[str, Any]) -> Dict[str, Any]:
299
+ """Format code search result as preview"""
300
+ repo = code.get("repository", {})
301
+ return {
302
+ "id": f"code_{code.get('sha', '')}",
303
+ "title": f"{code.get('name', '')} in {repo.get('full_name', '')}",
304
+ "link": code.get("html_url", ""),
305
+ "snippet": f"Match in {code.get('path', '')}",
306
+ "path": code.get("path", ""),
307
+ "repo_name": repo.get("full_name", ""),
308
+ "repo_url": repo.get("html_url", ""),
309
+ "search_type": "code",
310
+ "file_url": code.get("url", "")
311
+ }
312
+
313
+ def _format_issue_preview(self, issue: Dict[str, Any]) -> Dict[str, Any]:
314
+ """Format issue search result as preview"""
315
+ repo = issue.get("repository", {}) if "repository" in issue else {"full_name": ""}
316
+ return {
317
+ "id": f"issue_{issue.get('number', '')}",
318
+ "title": issue.get("title", ""),
319
+ "link": issue.get("html_url", ""),
320
+ "snippet": issue.get("body", "")[:200] + "..." if len(issue.get("body", "")) > 200 else issue.get("body", ""),
321
+ "state": issue.get("state", ""),
322
+ "created_at": issue.get("created_at", ""),
323
+ "updated_at": issue.get("updated_at", ""),
324
+ "user": issue.get("user", {}).get("login", ""),
325
+ "comments": issue.get("comments", 0),
326
+ "search_type": "issue",
327
+ "repo_name": repo.get("full_name", "")
328
+ }
329
+
330
+ def _format_user_preview(self, user: Dict[str, Any]) -> Dict[str, Any]:
331
+ """Format user search result as preview"""
332
+ return {
333
+ "id": f"user_{user.get('id', '')}",
334
+ "title": user.get("login", ""),
335
+ "link": user.get("html_url", ""),
336
+ "snippet": user.get("bio", "No bio provided"),
337
+ "name": user.get("name", ""),
338
+ "followers": user.get("followers", 0),
339
+ "public_repos": user.get("public_repos", 0),
340
+ "location": user.get("location", ""),
341
+ "search_type": "user",
342
+ "user_login": user.get("login", "")
343
+ }
344
+
345
+ def _get_previews(self, query: str) -> List[Dict[str, Any]]:
346
+ """
347
+ Get preview information for GitHub search results.
348
+
349
+ Args:
350
+ query: The search query
351
+
352
+ Returns:
353
+ List of preview dictionaries
354
+ """
355
+ logger.info(f"Getting GitHub previews for query: {query}")
356
+
357
+ # For contribution-focused queries, automatically adjust search type and add filters
358
+ if any(term in query.lower() for term in ["contribute", "contributing", "contribution", "beginner", "newcomer"]):
359
+ # Use repositories search with help-wanted or good-first-issue labels
360
+ original_search_type = self.search_type
361
+ self.search_type = "repositories"
362
+ self.search_endpoint = f"{self.api_base}/search/repositories"
363
+
364
+ # Create a specialized query for finding beginner-friendly projects
365
+ specialized_query = "good-first-issues:>5 is:public archived:false"
366
+
367
+ # Extract language preferences if present
368
+ languages = []
369
+ for lang in ["python", "javascript", "java", "rust", "go", "typescript", "c#", "c++", "ruby"]:
370
+ if lang in query.lower():
371
+ languages.append(lang)
372
+
373
+ if languages:
374
+ specialized_query += f" language:{' language:'.join(languages)}"
375
+
376
+ # Extract keywords
377
+ keywords = [word for word in query.split() if len(word) > 3 and word.lower() not in
378
+ ["recommend", "recommended", "github", "repositories", "looking",
379
+ "developers", "contribute", "contributing", "beginner", "newcomer"]]
380
+
381
+ if keywords:
382
+ specialized_query += " " + " ".join(keywords[:5]) # Add up to 5 keywords
383
+
384
+ logger.info(f"Using specialized contribution query: {specialized_query}")
385
+
386
+ # Perform GitHub search with specialized query
387
+ results = self._search_github(specialized_query)
388
+
389
+ # Restore original search type
390
+ self.search_type = original_search_type
391
+ self.search_endpoint = f"{self.api_base}/search/{self.search_type}"
392
+ else:
393
+ # Perform standard GitHub search
394
+ results = self._search_github(query)
395
+
396
+ if not results:
397
+ logger.warning(f"No GitHub results found for query: {query}")
398
+ return []
399
+
400
+ # Format results as previews
401
+ previews = []
402
+ for result in results:
403
+ # Format based on search type
404
+ if self.search_type == "repositories":
405
+ preview = self._format_repository_preview(result)
406
+ elif self.search_type == "code":
407
+ preview = self._format_code_preview(result)
408
+ elif self.search_type == "issues":
409
+ preview = self._format_issue_preview(result)
410
+ elif self.search_type == "users":
411
+ preview = self._format_user_preview(result)
412
+ else:
413
+ logger.warning(f"Unknown search type: {self.search_type}")
414
+ continue
415
+
416
+ previews.append(preview)
417
+
418
+ logger.info(f"Formatted {len(previews)} GitHub preview results")
419
+ return previews
420
+
421
+ def _get_full_content(self, relevant_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
422
+ """
423
+ Get full content for the relevant GitHub search results.
424
+
425
+ Args:
426
+ relevant_items: List of relevant preview dictionaries
427
+
428
+ Returns:
429
+ List of result dictionaries with full content
430
+ """
431
+ # Check if we should add full content
432
+ if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and config.SEARCH_SNIPPETS_ONLY:
433
+ logger.info("Snippet-only mode, skipping full content retrieval")
434
+ return relevant_items
435
+
436
+ logger.info(f"Getting full content for {len(relevant_items)} GitHub results")
437
+
438
+ results = []
439
+ for item in relevant_items:
440
+ result = item.copy()
441
+ search_type = item.get("search_type", "")
442
+
443
+ # Add content based on search type
444
+ if search_type == "repository" and self.include_readme:
445
+ repo_full_name = item.get("repo_full_name", "")
446
+ if repo_full_name:
447
+ # Get README content
448
+ readme_content = self._get_readme_content(repo_full_name)
449
+ result["full_content"] = readme_content
450
+ result["content_type"] = "readme"
451
+
452
+ # Get recent issues if requested
453
+ if self.include_issues:
454
+ issues = self._get_recent_issues(repo_full_name)
455
+ result["recent_issues"] = issues
456
+
457
+ elif search_type == "code":
458
+ file_url = item.get("file_url", "")
459
+ if file_url:
460
+ # Get file content
461
+ file_content = self._get_file_content(file_url)
462
+ result["full_content"] = file_content
463
+ result["content_type"] = "file"
464
+
465
+ elif search_type == "issue":
466
+ # For issues, the snippet usually contains a summary already
467
+ # We'll just keep it as is
468
+ result["full_content"] = item.get("snippet", "")
469
+ result["content_type"] = "issue"
470
+
471
+ elif search_type == "user":
472
+ # For users, construct a profile summary
473
+ user_login = item.get("user_login", "")
474
+ profile_summary = f"GitHub user: {item.get('title', '')}\n"
475
+
476
+ if item.get("name"):
477
+ profile_summary += f"Name: {item.get('name')}\n"
478
+
479
+ if item.get("location"):
480
+ profile_summary += f"Location: {item.get('location')}\n"
481
+
482
+ profile_summary += f"Followers: {item.get('followers', 0)}\n"
483
+ profile_summary += f"Public repositories: {item.get('public_repos', 0)}\n"
484
+
485
+ if item.get("snippet") and item.get("snippet") != "No bio provided":
486
+ profile_summary += f"\nBio: {item.get('snippet')}\n"
487
+
488
+ result["full_content"] = profile_summary
489
+ result["content_type"] = "user_profile"
490
+
491
+ results.append(result)
492
+
493
+ return results
494
+
495
+ def search_repository(self, repo_owner: str, repo_name: str) -> Dict[str, Any]:
496
+ """
497
+ Get detailed information about a specific repository.
498
+
499
+ Args:
500
+ repo_owner: Owner of the repository
501
+ repo_name: Name of the repository
502
+
503
+ Returns:
504
+ Dictionary with repository information
505
+ """
506
+ repo_full_name = f"{repo_owner}/{repo_name}"
507
+ logger.info(f"Getting details for repository: {repo_full_name}")
508
+
509
+ try:
510
+ # Get repository details
511
+ response = requests.get(
512
+ f"{self.api_base}/repos/{repo_full_name}",
513
+ headers=self.headers
514
+ )
515
+
516
+ # Check for rate limiting
517
+ self._handle_rate_limits(response)
518
+
519
+ if response.status_code == 200:
520
+ repo = response.json()
521
+
522
+ # Format as repository preview
523
+ result = self._format_repository_preview(repo)
524
+
525
+ # Add README content if requested
526
+ if self.include_readme:
527
+ readme_content = self._get_readme_content(repo_full_name)
528
+ result["full_content"] = readme_content
529
+ result["content_type"] = "readme"
530
+
531
+ # Add recent issues if requested
532
+ if self.include_issues:
533
+ issues = self._get_recent_issues(repo_full_name)
534
+ result["recent_issues"] = issues
535
+
536
+ return result
537
+ else:
538
+ logger.error(f"Error getting repository details: {response.status_code} - {response.text}")
539
+ return {}
540
+
541
+ except Exception as e:
542
+ logger.error(f"Error getting repository details: {e}")
543
+ return {}
544
+
545
+ def search_code(self, query: str, language: Optional[str] = None, user: Optional[str] = None) -> List[Dict[str, Any]]:
546
+ """
547
+ Search for code with more specific parameters.
548
+
549
+ Args:
550
+ query: Code search query
551
+ language: Filter by programming language
552
+ user: Filter by GitHub username/organization
553
+
554
+ Returns:
555
+ List of code search results
556
+ """
557
+ # Build advanced query
558
+ advanced_query = query
559
+
560
+ if language:
561
+ advanced_query += f" language:{language}"
562
+
563
+ if user:
564
+ advanced_query += f" user:{user}"
565
+
566
+ # Save current search type
567
+ original_search_type = self.search_type
568
+
569
+ try:
570
+ # Set search type to code
571
+ self.search_type = "code"
572
+ self.search_endpoint = f"{self.api_base}/search/code"
573
+
574
+ # Perform search
575
+ results = self._search_github(advanced_query)
576
+
577
+ # Format results
578
+ previews = [self._format_code_preview(result) for result in results]
579
+
580
+ # Get full content if requested
581
+ if hasattr(config, 'SEARCH_SNIPPETS_ONLY') and not config.SEARCH_SNIPPETS_ONLY:
582
+ return self._get_full_content(previews)
583
+
584
+ return previews
585
+
586
+ finally:
587
+ # Restore original search type
588
+ self.search_type = original_search_type
589
+ self.search_endpoint = f"{self.api_base}/search/{self.search_type}"
590
+
591
+ def search_issues(self, query: str, state: str = "open", sort: str = "updated") -> List[Dict[str, Any]]:
592
+ """
593
+ Search for issues with more specific parameters.
594
+
595
+ Args:
596
+ query: Issue search query
597
+ state: Filter by issue state ("open", "closed", "all")
598
+ sort: Sort order ("updated", "created", "comments")
599
+
600
+ Returns:
601
+ List of issue search results
602
+ """
603
+ # Build advanced query
604
+ advanced_query = query + f" state:{state}"
605
+
606
+ # Save current search type
607
+ original_search_type = self.search_type
608
+
609
+ try:
610
+ # Set search type to issues
611
+ self.search_type = "issues"
612
+ self.search_endpoint = f"{self.api_base}/search/issues"
613
+
614
+ # Set sort parameter
615
+ params = {
616
+ "q": advanced_query,
617
+ "per_page": min(self.max_results, 100),
618
+ "page": 1,
619
+ "sort": sort,
620
+ "order": "desc"
621
+ }
622
+
623
+ # Perform search
624
+ response = requests.get(
625
+ self.search_endpoint,
626
+ headers=self.headers,
627
+ params=params
628
+ )
629
+
630
+ # Check for rate limiting
631
+ self._handle_rate_limits(response)
632
+
633
+ if response.status_code == 200:
634
+ data = response.json()
635
+ results = data.get("items", [])
636
+
637
+ # Format results
638
+ previews = [self._format_issue_preview(result) for result in results]
639
+
640
+ # For issues, we don't need to get full content
641
+ return previews
642
+ else:
643
+ logger.error(f"GitHub API error: {response.status_code} - {response.text}")
644
+ return []
645
+
646
+ finally:
647
+ # Restore original search type
648
+ self.search_type = original_search_type
649
+ self.search_endpoint = f"{self.api_base}/search/{self.search_type}"
650
+
651
+ def set_search_type(self, search_type: str):
652
+ """
653
+ Set the search type for subsequent searches.
654
+
655
+ Args:
656
+ search_type: Type of GitHub search ("repositories", "code", "issues", "users")
657
+ """
658
+ if search_type in ["repositories", "code", "issues", "users"]:
659
+ self.search_type = search_type
660
+ self.search_endpoint = f"{self.api_base}/search/{search_type}"
661
+ logger.info(f"Set GitHub search type to: {search_type}")
662
+ else:
663
+ logger.error(f"Invalid GitHub search type: {search_type}")