ngpt 3.5.0__py3-none-any.whl → 3.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ngpt/utils/web_search.py +48 -12
- {ngpt-3.5.0.dist-info → ngpt-3.5.1.dist-info}/METADATA +1 -2
- {ngpt-3.5.0.dist-info → ngpt-3.5.1.dist-info}/RECORD +6 -6
- {ngpt-3.5.0.dist-info → ngpt-3.5.1.dist-info}/WHEEL +0 -0
- {ngpt-3.5.0.dist-info → ngpt-3.5.1.dist-info}/entry_points.txt +0 -0
- {ngpt-3.5.0.dist-info → ngpt-3.5.1.dist-info}/licenses/LICENSE +0 -0
ngpt/utils/web_search.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
"""
|
2
|
-
Web search utilities for nGPT using
|
2
|
+
Web search utilities for nGPT using BeautifulSoup4.
|
3
3
|
|
4
4
|
This module provides functionality to search the web and extract
|
5
5
|
information from search results to enhance AI prompts.
|
@@ -7,8 +7,7 @@ information from search results to enhance AI prompts.
|
|
7
7
|
|
8
8
|
import re
|
9
9
|
from typing import List, Dict, Any, Optional
|
10
|
-
from
|
11
|
-
from urllib.parse import urlparse
|
10
|
+
from urllib.parse import urlparse, parse_qs
|
12
11
|
import requests
|
13
12
|
import sys
|
14
13
|
import datetime
|
@@ -42,7 +41,7 @@ def get_logger():
|
|
42
41
|
|
43
42
|
def perform_web_search(query: str, max_results: int = 5) -> List[Dict[str, Any]]:
|
44
43
|
"""
|
45
|
-
Search
|
44
|
+
Search DuckDuckGo directly and return relevant results.
|
46
45
|
|
47
46
|
Args:
|
48
47
|
query: The search query
|
@@ -53,8 +52,48 @@ def perform_web_search(query: str, max_results: int = 5) -> List[Dict[str, Any]]
|
|
53
52
|
"""
|
54
53
|
logger = get_logger()
|
55
54
|
try:
|
56
|
-
|
57
|
-
|
55
|
+
# Headers to mimic a browser request
|
56
|
+
headers = {
|
57
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
|
58
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
59
|
+
}
|
60
|
+
|
61
|
+
# DuckDuckGo search URL
|
62
|
+
encoded_query = requests.utils.quote(query)
|
63
|
+
url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
|
64
|
+
|
65
|
+
# Fetch search results
|
66
|
+
response = requests.get(url, headers=headers, timeout=10)
|
67
|
+
response.raise_for_status()
|
68
|
+
|
69
|
+
# Parse HTML response with html.parser (no lxml dependency)
|
70
|
+
soup = BeautifulSoup(response.text, 'html.parser')
|
71
|
+
results = []
|
72
|
+
|
73
|
+
# Extract search results
|
74
|
+
for result in soup.select('.result')[:max_results]:
|
75
|
+
title_elem = result.select_one('.result__title')
|
76
|
+
snippet_elem = result.select_one('.result__snippet')
|
77
|
+
url_elem = result.select_one('.result__url')
|
78
|
+
|
79
|
+
# Extract actual URL from DDG's redirect URL if needed
|
80
|
+
href = title_elem.find('a')['href'] if title_elem and title_elem.find('a') else None
|
81
|
+
if href and href.startswith('/'):
|
82
|
+
# Parse DDG redirect URL to get actual URL
|
83
|
+
parsed_url = urlparse(href)
|
84
|
+
query_params = parse_qs(parsed_url.query)
|
85
|
+
actual_url = query_params.get('uddg', [None])[0]
|
86
|
+
else:
|
87
|
+
actual_url = href
|
88
|
+
|
89
|
+
# Add result to list
|
90
|
+
if title_elem and actual_url:
|
91
|
+
results.append({
|
92
|
+
'title': title_elem.get_text(strip=True),
|
93
|
+
'href': actual_url,
|
94
|
+
'body': snippet_elem.get_text(strip=True) if snippet_elem else ''
|
95
|
+
})
|
96
|
+
|
58
97
|
return results
|
59
98
|
except Exception as e:
|
60
99
|
logger.error(f"Error performing web search: {str(e)}")
|
@@ -112,18 +151,15 @@ def extract_article_content(url: str, max_chars: int = 5000) -> Optional[str]:
|
|
112
151
|
# Default to UTF-8 if we can't detect
|
113
152
|
response.encoding = 'utf-8'
|
114
153
|
|
115
|
-
# Parse with BeautifulSoup using
|
116
|
-
|
117
|
-
soup = BeautifulSoup(response.text, 'lxml')
|
118
|
-
except ImportError:
|
119
|
-
soup = BeautifulSoup(response.text, 'html.parser')
|
154
|
+
# Parse with BeautifulSoup using html.parser
|
155
|
+
soup = BeautifulSoup(response.text, 'html.parser')
|
120
156
|
|
121
157
|
# Extract main content using multiple strategies
|
122
158
|
extracted_content = None
|
123
159
|
|
124
160
|
# ---------- PREPROCESSING ----------
|
125
161
|
# Clone the soup before preprocessing
|
126
|
-
processed_soup = BeautifulSoup(str(soup), '
|
162
|
+
processed_soup = BeautifulSoup(str(soup), 'html.parser')
|
127
163
|
|
128
164
|
# Remove all script, style tags and comments
|
129
165
|
for element in processed_soup.find_all(['script', 'style', 'noscript']):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: ngpt
|
3
|
-
Version: 3.5.
|
3
|
+
Version: 3.5.1
|
4
4
|
Summary: Swiss army knife for LLMs: powerful CLI and interactive chatbot in one package. Seamlessly work with OpenAI, Ollama, Groq, Claude, Gemini, or any OpenAI-compatible API to generate code, craft git commits, rewrite text, and execute shell commands.
|
5
5
|
Project-URL: Homepage, https://github.com/nazdridoy/ngpt
|
6
6
|
Project-URL: Repository, https://github.com/nazdridoy/ngpt
|
@@ -29,7 +29,6 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
29
29
|
Classifier: Topic :: Utilities
|
30
30
|
Requires-Python: >=3.8
|
31
31
|
Requires-Dist: beautifulsoup4>=4.12.0
|
32
|
-
Requires-Dist: duckduckgo-search>=3.0.0
|
33
32
|
Requires-Dist: prompt-toolkit>=3.0.0
|
34
33
|
Requires-Dist: pyperclip>=1.8.0
|
35
34
|
Requires-Dist: requests>=2.31.0
|
@@ -20,9 +20,9 @@ ngpt/utils/__init__.py,sha256=qu_66I1Vtav2f1LDiPn5J3DUsbK7o1CSScMcTkYqxoM,1179
|
|
20
20
|
ngpt/utils/cli_config.py,sha256=Ug8cECBTIuzOwkBWidLTfs-OAdOsCMJ2bNa70pOADfw,11195
|
21
21
|
ngpt/utils/config.py,sha256=wsArA4osnh8fKqOvtsPqqBxAz3DpdjtaWUFaRtnUdyc,10452
|
22
22
|
ngpt/utils/log.py,sha256=f1jg2iFo35PAmsarH8FVL_62plq4VXH0Mu2QiP6RJGw,15934
|
23
|
-
ngpt/utils/web_search.py,sha256=
|
24
|
-
ngpt-3.5.
|
25
|
-
ngpt-3.5.
|
26
|
-
ngpt-3.5.
|
27
|
-
ngpt-3.5.
|
28
|
-
ngpt-3.5.
|
23
|
+
ngpt/utils/web_search.py,sha256=TK_c2U8MYM86f9J_oEzi0UZ46JohvyxdjfonHZZZqfY,30718
|
24
|
+
ngpt-3.5.1.dist-info/METADATA,sha256=djECxREmLWeO-ugYuco3gi2xEJqaGuFpjJopqa3veLI,23886
|
25
|
+
ngpt-3.5.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
26
|
+
ngpt-3.5.1.dist-info/entry_points.txt,sha256=SqAAvLhMrsEpkIr4YFRdUeyuXQ9o0IBCeYgE6AVojoI,44
|
27
|
+
ngpt-3.5.1.dist-info/licenses/LICENSE,sha256=mQkpWoADxbHqE0HRefYLJdm7OpdrXBr3vNv5bZ8w72M,1065
|
28
|
+
ngpt-3.5.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|