ngpt 3.5.0__py3-none-any.whl → 3.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ngpt/utils/web_search.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """
2
- Web search utilities for nGPT using duckduckgo-search and BeautifulSoup4.
2
+ Web search utilities for nGPT using BeautifulSoup4.
3
3
 
4
4
  This module provides functionality to search the web and extract
5
5
  information from search results to enhance AI prompts.
@@ -7,8 +7,7 @@ information from search results to enhance AI prompts.
7
7
 
8
8
  import re
9
9
  from typing import List, Dict, Any, Optional
10
- from duckduckgo_search import DDGS
11
- from urllib.parse import urlparse
10
+ from urllib.parse import urlparse, parse_qs
12
11
  import requests
13
12
  import sys
14
13
  import datetime
@@ -42,7 +41,7 @@ def get_logger():
42
41
 
43
42
  def perform_web_search(query: str, max_results: int = 5) -> List[Dict[str, Any]]:
44
43
  """
45
- Search the web using DuckDuckGo and return relevant results.
44
+ Search DuckDuckGo directly and return relevant results.
46
45
 
47
46
  Args:
48
47
  query: The search query
@@ -53,8 +52,48 @@ def perform_web_search(query: str, max_results: int = 5) -> List[Dict[str, Any]]
53
52
  """
54
53
  logger = get_logger()
55
54
  try:
56
- ddgs = DDGS()
57
- results = list(ddgs.text(query, max_results=max_results))
55
+ # Headers to mimic a browser request
56
+ headers = {
57
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
58
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
59
+ }
60
+
61
+ # DuckDuckGo search URL
62
+ encoded_query = requests.utils.quote(query)
63
+ url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
64
+
65
+ # Fetch search results
66
+ response = requests.get(url, headers=headers, timeout=10)
67
+ response.raise_for_status()
68
+
69
+ # Parse HTML response with html.parser (no lxml dependency)
70
+ soup = BeautifulSoup(response.text, 'html.parser')
71
+ results = []
72
+
73
+ # Extract search results
74
+ for result in soup.select('.result')[:max_results]:
75
+ title_elem = result.select_one('.result__title')
76
+ snippet_elem = result.select_one('.result__snippet')
77
+ url_elem = result.select_one('.result__url')
78
+
79
+ # Extract actual URL from DDG's redirect URL if needed
80
+ href = title_elem.find('a')['href'] if title_elem and title_elem.find('a') else None
81
+ if href and href.startswith('/'):
82
+ # Parse DDG redirect URL to get actual URL
83
+ parsed_url = urlparse(href)
84
+ query_params = parse_qs(parsed_url.query)
85
+ actual_url = query_params.get('uddg', [None])[0]
86
+ else:
87
+ actual_url = href
88
+
89
+ # Add result to list
90
+ if title_elem and actual_url:
91
+ results.append({
92
+ 'title': title_elem.get_text(strip=True),
93
+ 'href': actual_url,
94
+ 'body': snippet_elem.get_text(strip=True) if snippet_elem else ''
95
+ })
96
+
58
97
  return results
59
98
  except Exception as e:
60
99
  logger.error(f"Error performing web search: {str(e)}")
@@ -112,18 +151,15 @@ def extract_article_content(url: str, max_chars: int = 5000) -> Optional[str]:
112
151
  # Default to UTF-8 if we can't detect
113
152
  response.encoding = 'utf-8'
114
153
 
115
- # Parse with BeautifulSoup using lxml parser for better results if available
116
- try:
117
- soup = BeautifulSoup(response.text, 'lxml')
118
- except ImportError:
119
- soup = BeautifulSoup(response.text, 'html.parser')
154
+ # Parse with BeautifulSoup using html.parser
155
+ soup = BeautifulSoup(response.text, 'html.parser')
120
156
 
121
157
  # Extract main content using multiple strategies
122
158
  extracted_content = None
123
159
 
124
160
  # ---------- PREPROCESSING ----------
125
161
  # Clone the soup before preprocessing
126
- processed_soup = BeautifulSoup(str(soup), 'lxml' if 'lxml' in str(type(soup.parser)) else 'html.parser')
162
+ processed_soup = BeautifulSoup(str(soup), 'html.parser')
127
163
 
128
164
  # Remove all script, style tags and comments
129
165
  for element in processed_soup.find_all(['script', 'style', 'noscript']):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ngpt
3
- Version: 3.5.0
3
+ Version: 3.5.1
4
4
  Summary: Swiss army knife for LLMs: powerful CLI and interactive chatbot in one package. Seamlessly work with OpenAI, Ollama, Groq, Claude, Gemini, or any OpenAI-compatible API to generate code, craft git commits, rewrite text, and execute shell commands.
5
5
  Project-URL: Homepage, https://github.com/nazdridoy/ngpt
6
6
  Project-URL: Repository, https://github.com/nazdridoy/ngpt
@@ -29,7 +29,6 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
29
29
  Classifier: Topic :: Utilities
30
30
  Requires-Python: >=3.8
31
31
  Requires-Dist: beautifulsoup4>=4.12.0
32
- Requires-Dist: duckduckgo-search>=3.0.0
33
32
  Requires-Dist: prompt-toolkit>=3.0.0
34
33
  Requires-Dist: pyperclip>=1.8.0
35
34
  Requires-Dist: requests>=2.31.0
@@ -20,9 +20,9 @@ ngpt/utils/__init__.py,sha256=qu_66I1Vtav2f1LDiPn5J3DUsbK7o1CSScMcTkYqxoM,1179
20
20
  ngpt/utils/cli_config.py,sha256=Ug8cECBTIuzOwkBWidLTfs-OAdOsCMJ2bNa70pOADfw,11195
21
21
  ngpt/utils/config.py,sha256=wsArA4osnh8fKqOvtsPqqBxAz3DpdjtaWUFaRtnUdyc,10452
22
22
  ngpt/utils/log.py,sha256=f1jg2iFo35PAmsarH8FVL_62plq4VXH0Mu2QiP6RJGw,15934
23
- ngpt/utils/web_search.py,sha256=_e78fk-6WDVqHDIaAMP9CDoX4FyOWM9XhdSZcr5KasI,29163
24
- ngpt-3.5.0.dist-info/METADATA,sha256=BEcmK1bv4EZ_Q6W7MDnDPo304e-_L0zk1bDgH4mmp8Q,23926
25
- ngpt-3.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
26
- ngpt-3.5.0.dist-info/entry_points.txt,sha256=SqAAvLhMrsEpkIr4YFRdUeyuXQ9o0IBCeYgE6AVojoI,44
27
- ngpt-3.5.0.dist-info/licenses/LICENSE,sha256=mQkpWoADxbHqE0HRefYLJdm7OpdrXBr3vNv5bZ8w72M,1065
28
- ngpt-3.5.0.dist-info/RECORD,,
23
+ ngpt/utils/web_search.py,sha256=TK_c2U8MYM86f9J_oEzi0UZ46JohvyxdjfonHZZZqfY,30718
24
+ ngpt-3.5.1.dist-info/METADATA,sha256=djECxREmLWeO-ugYuco3gi2xEJqaGuFpjJopqa3veLI,23886
25
+ ngpt-3.5.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
26
+ ngpt-3.5.1.dist-info/entry_points.txt,sha256=SqAAvLhMrsEpkIr4YFRdUeyuXQ9o0IBCeYgE6AVojoI,44
27
+ ngpt-3.5.1.dist-info/licenses/LICENSE,sha256=mQkpWoADxbHqE0HRefYLJdm7OpdrXBr3vNv5bZ8w72M,1065
28
+ ngpt-3.5.1.dist-info/RECORD,,
File without changes