ngpt 3.4.5__py3-none-any.whl → 3.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ngpt/utils/web_search.py +345 -61
- {ngpt-3.4.5.dist-info → ngpt-3.5.0.dist-info}/METADATA +94 -124
- {ngpt-3.4.5.dist-info → ngpt-3.5.0.dist-info}/RECORD +6 -6
- {ngpt-3.4.5.dist-info → ngpt-3.5.0.dist-info}/WHEEL +0 -0
- {ngpt-3.4.5.dist-info → ngpt-3.5.0.dist-info}/entry_points.txt +0 -0
- {ngpt-3.4.5.dist-info → ngpt-3.5.0.dist-info}/licenses/LICENSE +0 -0
ngpt/utils/web_search.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
"""
|
2
|
-
Web search utilities for nGPT using duckduckgo-search and
|
2
|
+
Web search utilities for nGPT using duckduckgo-search and BeautifulSoup4.
|
3
3
|
|
4
4
|
This module provides functionality to search the web and extract
|
5
5
|
information from search results to enhance AI prompts.
|
@@ -12,6 +12,9 @@ from urllib.parse import urlparse
|
|
12
12
|
import requests
|
13
13
|
import sys
|
14
14
|
import datetime
|
15
|
+
from bs4 import BeautifulSoup
|
16
|
+
from bs4.element import Comment, Declaration, Doctype, ProcessingInstruction
|
17
|
+
import json
|
15
18
|
|
16
19
|
# Get actual logger from global context instead of using standard logging
|
17
20
|
from . import log
|
@@ -58,9 +61,10 @@ def perform_web_search(query: str, max_results: int = 5) -> List[Dict[str, Any]]
|
|
58
61
|
logger.info("Web search encountered an issue, but will continue with available results")
|
59
62
|
return []
|
60
63
|
|
61
|
-
def extract_article_content(url: str, max_chars: int =
|
64
|
+
def extract_article_content(url: str, max_chars: int = 5000) -> Optional[str]:
|
62
65
|
"""
|
63
|
-
Extract and clean content from a webpage URL
|
66
|
+
Extract and clean content from a webpage URL using a hybrid approach
|
67
|
+
inspired by trafilatura and readability algorithms.
|
64
68
|
|
65
69
|
Args:
|
66
70
|
url: The URL to extract content from
|
@@ -81,82 +85,336 @@ def extract_article_content(url: str, max_chars: int = 2000) -> Optional[str]:
|
|
81
85
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
|
82
86
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
83
87
|
'Accept-Language': 'en-US,en;q=0.5',
|
88
|
+
'DNT': '1', # Do Not Track
|
84
89
|
'Connection': 'keep-alive',
|
85
90
|
'Upgrade-Insecure-Requests': '1',
|
91
|
+
'Cache-Control': 'max-age=0',
|
92
|
+
'Sec-Fetch-Dest': 'document',
|
93
|
+
'Sec-Fetch-Mode': 'navigate',
|
94
|
+
'Sec-Fetch-Site': 'none',
|
95
|
+
'Pragma': 'no-cache',
|
86
96
|
}
|
87
97
|
|
88
98
|
logger.info(f"Fetching content from {url}")
|
89
99
|
|
90
100
|
try:
|
91
|
-
#
|
92
|
-
|
101
|
+
# Fetch the page content
|
102
|
+
response = requests.get(url, headers=headers, timeout=15)
|
93
103
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
104
|
+
if response.status_code == 200:
|
105
|
+
# Try to detect the encoding if not properly specified
|
106
|
+
if response.encoding == 'ISO-8859-1':
|
107
|
+
# Try to detect encoding from content
|
108
|
+
possible_encoding = re.search(r'charset=["\'](.*?)["\']', response.text)
|
109
|
+
if possible_encoding:
|
110
|
+
response.encoding = possible_encoding.group(1)
|
111
|
+
else:
|
112
|
+
# Default to UTF-8 if we can't detect
|
113
|
+
response.encoding = 'utf-8'
|
114
|
+
|
115
|
+
# Parse with BeautifulSoup using lxml parser for better results if available
|
116
|
+
try:
|
117
|
+
soup = BeautifulSoup(response.text, 'lxml')
|
118
|
+
except ImportError:
|
119
|
+
soup = BeautifulSoup(response.text, 'html.parser')
|
120
|
+
|
121
|
+
# Extract main content using multiple strategies
|
122
|
+
extracted_content = None
|
123
|
+
|
124
|
+
# ---------- PREPROCESSING ----------
|
125
|
+
# Clone the soup before preprocessing
|
126
|
+
processed_soup = BeautifulSoup(str(soup), 'lxml' if 'lxml' in str(type(soup.parser)) else 'html.parser')
|
127
|
+
|
128
|
+
# Remove all script, style tags and comments
|
129
|
+
for element in processed_soup.find_all(['script', 'style', 'noscript']):
|
130
|
+
element.decompose()
|
131
|
+
|
132
|
+
# Remove HTML comments
|
133
|
+
for comment in processed_soup.find_all(text=lambda text: isinstance(text, Comment)):
|
134
|
+
comment.extract()
|
135
|
+
|
136
|
+
# Remove hidden elements
|
137
|
+
for hidden in processed_soup.find_all(style=lambda s: s and isinstance(s, str) and ('display:none' in s.lower() or 'visibility:hidden' in s.lower())):
|
138
|
+
hidden.decompose()
|
139
|
+
for hidden in processed_soup.find_all(hidden=True):
|
140
|
+
hidden.decompose()
|
141
|
+
for hidden in processed_soup.find_all(class_=lambda c: c and isinstance(c, str) and any(x in c.lower() for x in ['hidden', 'invisible'])):
|
142
|
+
hidden.decompose()
|
143
|
+
|
144
|
+
# Handle iframes and frames
|
145
|
+
for frame in processed_soup.find_all(['iframe', 'frame']):
|
146
|
+
frame.decompose()
|
147
|
+
|
148
|
+
# ---------- SITE-SPECIFIC HANDLING ----------
|
149
|
+
domain = parsed_url.netloc.lower()
|
150
|
+
|
151
|
+
# Wikipedia-specific extraction
|
152
|
+
if 'wikipedia.org' in domain:
|
153
|
+
content_div = processed_soup.select_one('#mw-content-text')
|
154
|
+
if content_div:
|
155
|
+
# Remove tables, references, navigation elements
|
156
|
+
for unwanted in content_div.select('table, .reference, .reflist, .navbox, .vertical-navbox, .thumbcaption, .mw-editsection, .mw-headline, .toc, #toc'):
|
157
|
+
unwanted.decompose()
|
158
|
+
extracted_content = content_div.get_text(separator=' ', strip=True)
|
159
|
+
|
160
|
+
# News site specific handling
|
161
|
+
news_sites = {
|
162
|
+
'cnn.com': ['article', '.article__content', '.l-container', '.body-text', '#body-text'],
|
163
|
+
'bbc.com': ['.article__body-content', '.story-body__inner', '[data-component="text-block"]'],
|
164
|
+
'nytimes.com': ['article', '.meteredContent', '.StoryBodyCompanionColumn', '.article-body'],
|
165
|
+
'reuters.com': ['article', '.ArticleBody__content___3MtHP', '.article-body'],
|
166
|
+
'theguardian.com': ['.article-body-commercial-selector', '.content__article-body', '.dcr-1cas96z'],
|
167
|
+
'washingtonpost.com': ['.article-body', '.teaser-content'],
|
168
|
+
'apnews.com': ['.Article', '.RichTextStoryBody'],
|
169
|
+
'indiatimes.com': ['.article-body', '.article_content', '.article-desc', '.Normal'],
|
170
|
+
'cnbc.com': ['.ArticleBody-articleBody', '.group-article-body', '.article-body'],
|
171
|
+
'thehindu.com': ['.article-body', '.article-text', '#content-body-14269002']
|
172
|
+
}
|
173
|
+
|
174
|
+
if not extracted_content:
|
175
|
+
# Check if we're on a known news site
|
176
|
+
for site, selectors in news_sites.items():
|
177
|
+
if site in domain:
|
178
|
+
for selector in selectors:
|
179
|
+
content_element = processed_soup.select_one(selector)
|
180
|
+
if content_element:
|
181
|
+
# Clean the news content
|
182
|
+
for unwanted in content_element.select('aside, figure, .ad, .ads, .advertisement, .social, .share, .related, .newsletter, .more-on, .read-more, .promotions'):
|
183
|
+
unwanted.decompose()
|
184
|
+
extracted_content = content_element.get_text(separator=' ', strip=True)
|
185
|
+
break
|
186
|
+
if extracted_content:
|
187
|
+
break
|
188
|
+
|
189
|
+
# ---------- JSON-LD EXTRACTION ----------
|
190
|
+
if not extracted_content:
|
191
|
+
# Look for structured data in JSON-LD format
|
192
|
+
json_ld = processed_soup.find_all('script', type='application/ld+json')
|
193
|
+
for script in json_ld:
|
194
|
+
try:
|
195
|
+
script_content = script.string
|
196
|
+
if not script_content: # Skip empty scripts
|
197
|
+
continue
|
198
|
+
|
199
|
+
# Clean the JSON string (some sites have invalid JSON)
|
200
|
+
script_content = re.sub(r'[\n\t\r]', '', script_content)
|
201
|
+
script_content = script_content.strip()
|
202
|
+
|
203
|
+
data = json.loads(script_content)
|
204
|
+
# Handle both single objects and arrays of objects
|
205
|
+
if isinstance(data, list):
|
206
|
+
data_list = data
|
207
|
+
else:
|
208
|
+
data_list = [data]
|
209
|
+
|
210
|
+
for item in data_list:
|
211
|
+
article_body = None
|
212
|
+
# Try to find articleBody or various content fields
|
213
|
+
if isinstance(item, dict):
|
214
|
+
# Check for common content fields directly
|
215
|
+
for field in ['articleBody', 'description', 'text', 'mainEntityOfPage']:
|
216
|
+
if field in item and isinstance(item[field], str) and len(item[field]) > 200:
|
217
|
+
article_body = item[field]
|
218
|
+
break
|
219
|
+
|
220
|
+
# Check in nested objects
|
221
|
+
if not article_body and '@graph' in item and isinstance(item['@graph'], list):
|
222
|
+
for graph_item in item['@graph']:
|
223
|
+
if isinstance(graph_item, dict):
|
224
|
+
for field in ['articleBody', 'description', 'text']:
|
225
|
+
if field in graph_item and isinstance(graph_item[field], str) and len(graph_item[field]) > 200:
|
226
|
+
article_body = graph_item[field]
|
227
|
+
break
|
228
|
+
if article_body:
|
229
|
+
break
|
230
|
+
|
231
|
+
if article_body:
|
232
|
+
extracted_content = article_body
|
233
|
+
break
|
234
|
+
|
235
|
+
if extracted_content:
|
236
|
+
break
|
237
|
+
except (json.JSONDecodeError, TypeError, AttributeError, ValueError) as e:
|
238
|
+
logger.debug(f"Error parsing JSON-LD: {str(e)}")
|
239
|
+
continue
|
240
|
+
|
241
|
+
# ---------- META DESCRIPTION EXTRACTION ----------
|
242
|
+
meta_description = None
|
243
|
+
meta_tag = processed_soup.find('meta', attrs={'name': 'description'}) or processed_soup.find('meta', attrs={'property': 'og:description'})
|
244
|
+
if meta_tag and meta_tag.get('content'):
|
245
|
+
meta_description = meta_tag.get('content')
|
103
246
|
|
104
|
-
|
105
|
-
|
106
|
-
content
|
247
|
+
# ---------- CONTENT ANALYSIS ----------
|
248
|
+
if not extracted_content:
|
249
|
+
# Get all content blocks (divs, sections, articles)
|
250
|
+
content_blocks = []
|
107
251
|
|
108
|
-
#
|
109
|
-
|
110
|
-
|
252
|
+
# Prioritize semantic tags
|
253
|
+
for tag in ['article', 'main', 'section', 'div']:
|
254
|
+
blocks = processed_soup.find_all(tag)
|
111
255
|
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
256
|
+
for block in blocks:
|
257
|
+
# Skip if too small
|
258
|
+
text = block.get_text(strip=True)
|
259
|
+
if len(text) < 200:
|
260
|
+
continue
|
261
|
+
|
262
|
+
# Calculate content metrics
|
263
|
+
char_count = len(text)
|
264
|
+
link_density = calculate_link_density(block)
|
265
|
+
p_count = len(block.find_all('p'))
|
266
|
+
p_text_length = sum(len(p.get_text(strip=True)) for p in block.find_all('p'))
|
267
|
+
p_density = p_text_length / char_count if char_count > 0 else 0
|
268
|
+
|
269
|
+
# Skip blocks with high link density (likely navigation)
|
270
|
+
if link_density > 0.5:
|
271
|
+
continue
|
272
|
+
|
273
|
+
# Calculate readability scores
|
274
|
+
text_density = char_count / (len(str(block)) + 1) # Text to HTML ratio
|
275
|
+
|
276
|
+
# Score content blocks
|
277
|
+
score = 0
|
278
|
+
|
279
|
+
# Prefer blocks with many paragraphs
|
280
|
+
score += min(p_count * 5, 50) # Max 50 points for paragraphs
|
281
|
+
|
282
|
+
# Prefer blocks with high paragraph text density
|
283
|
+
score += min(int(p_density * 100), 50) # Max 50 points for paragraph density
|
284
|
+
|
285
|
+
# Penalize high link density
|
286
|
+
score -= int(link_density * 100)
|
287
|
+
|
288
|
+
# Boost for high text density
|
289
|
+
score += min(int(text_density * 30), 30) # Max 30 points for text density
|
290
|
+
|
291
|
+
# Boost for certain attributes and classes
|
292
|
+
content_indicators = ['content', 'article', 'story', 'post', 'text', 'body', 'entry']
|
293
|
+
|
294
|
+
# Check class and id attributes
|
295
|
+
for attr in ['class', 'id']:
|
296
|
+
attr_val = block.get(attr, '')
|
297
|
+
if attr_val:
|
298
|
+
if isinstance(attr_val, list):
|
299
|
+
attr_val = ' '.join(attr_val)
|
300
|
+
for indicator in content_indicators:
|
301
|
+
if indicator in attr_val.lower():
|
302
|
+
score += 30
|
303
|
+
break
|
304
|
+
|
305
|
+
# Penalty for boilerplate indicators
|
306
|
+
boilerplate_indicators = ['sidebar', 'menu', 'nav', 'banner', 'ad', 'footer', 'header', 'comment', 'share', 'related']
|
307
|
+
for attr in ['class', 'id']:
|
308
|
+
attr_val = block.get(attr, '')
|
309
|
+
if attr_val:
|
310
|
+
if isinstance(attr_val, list):
|
311
|
+
attr_val = ' '.join(attr_val)
|
312
|
+
for indicator in boilerplate_indicators:
|
313
|
+
if indicator in attr_val.lower():
|
314
|
+
score -= 50
|
315
|
+
break
|
316
|
+
|
317
|
+
# Add to content blocks if score is positive
|
318
|
+
if score > 0:
|
319
|
+
content_blocks.append({
|
320
|
+
'element': block,
|
321
|
+
'score': score,
|
322
|
+
'char_count': char_count,
|
323
|
+
'text': text
|
324
|
+
})
|
325
|
+
|
326
|
+
# Sort content blocks by score
|
327
|
+
if content_blocks:
|
328
|
+
content_blocks.sort(key=lambda x: x['score'], reverse=True)
|
329
|
+
best_block = content_blocks[0]['element']
|
330
|
+
|
331
|
+
# Clean up the best block
|
332
|
+
for unwanted in best_block.find_all(['aside', 'nav', 'footer', 'header']):
|
333
|
+
unwanted.decompose()
|
334
|
+
|
335
|
+
extracted_content = best_block.get_text(separator=' ', strip=True)
|
125
336
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
337
|
+
# ---------- PARAGRAPH EXTRACTION FALLBACK ----------
|
338
|
+
if not extracted_content:
|
339
|
+
# Get all paragraphs with substantial content
|
340
|
+
paragraphs = []
|
341
|
+
for p in processed_soup.find_all('p'):
|
342
|
+
text = p.get_text(strip=True)
|
343
|
+
if len(text) > 40: # Only consider substantial paragraphs
|
344
|
+
# Calculate link density
|
345
|
+
link_density = calculate_link_density(p)
|
346
|
+
if link_density < 0.25: # Skip if too many links
|
347
|
+
paragraphs.append(text)
|
348
|
+
|
349
|
+
if paragraphs:
|
350
|
+
extracted_content = ' '.join(paragraphs)
|
130
351
|
|
352
|
+
# If we have content, clean it up
|
353
|
+
if extracted_content:
|
354
|
+
# Clean whitespace
|
355
|
+
extracted_content = re.sub(r'\s+', ' ', extracted_content).strip()
|
356
|
+
|
357
|
+
# Remove URLs
|
358
|
+
extracted_content = re.sub(r'https?://\S+', '', extracted_content)
|
359
|
+
|
360
|
+
# Remove email addresses
|
361
|
+
extracted_content = re.sub(r'\S+@\S+', '', extracted_content)
|
362
|
+
|
363
|
+
# Remove social media handles
|
364
|
+
extracted_content = re.sub(r'@\w+', '', extracted_content)
|
365
|
+
|
366
|
+
# Replace multiple spaces with single space
|
367
|
+
extracted_content = re.sub(r' +', ' ', extracted_content)
|
368
|
+
|
369
|
+
# Normalize quotes and apostrophes
|
370
|
+
extracted_content = extracted_content.replace('"', '"').replace('"', '"')
|
371
|
+
extracted_content = extracted_content.replace("'", "'").replace("'", "'")
|
372
|
+
|
373
|
+
# Remove any remaining HTML entities
|
374
|
+
extracted_content = re.sub(r'&[a-zA-Z]+;', ' ', extracted_content)
|
375
|
+
|
376
|
+
# Remove short lines that are likely navigation/menu items
|
377
|
+
lines = extracted_content.split('\n')
|
378
|
+
extracted_content = ' '.join([line for line in lines if len(line) > 40 or '.' in line])
|
379
|
+
|
380
|
+
# Combine with meta description if available and content is short
|
381
|
+
if meta_description and len(extracted_content) < 500:
|
382
|
+
extracted_content = meta_description + " " + extracted_content
|
383
|
+
|
384
|
+
# Truncate if needed
|
385
|
+
if len(extracted_content) > max_chars:
|
386
|
+
# Try to break at a sentence boundary
|
387
|
+
cutoff_point = max_chars
|
388
|
+
for i in range(max_chars - 1, max_chars - 300, -1):
|
389
|
+
if i < len(extracted_content) and extracted_content[i] in ['.', '!', '?']:
|
390
|
+
cutoff_point = i + 1
|
391
|
+
break
|
392
|
+
|
393
|
+
extracted_content = extracted_content[:cutoff_point]
|
394
|
+
|
395
|
+
return extracted_content
|
396
|
+
else:
|
397
|
+
# Return meta description if nothing else was found
|
398
|
+
if meta_description:
|
399
|
+
return meta_description
|
400
|
+
|
401
|
+
logger.error(f"No content extracted from {url}")
|
402
|
+
return None
|
131
403
|
else:
|
132
404
|
logger.error(f"Request to {url} returned status code {response.status_code}")
|
133
|
-
|
134
|
-
except ImportError:
|
135
|
-
logger.error("Trafilatura not installed. Install with 'pip install trafilatura'")
|
136
|
-
# Try direct requests only
|
137
|
-
try:
|
138
|
-
response = requests.get(url, headers=headers, timeout=10)
|
139
|
-
if response.status_code == 200:
|
140
|
-
# Very basic HTML cleaning
|
141
|
-
html_content = response.text
|
142
|
-
text = re.sub(r'<[^>]+>', ' ', html_content)
|
143
|
-
text = re.sub(r'\s+', ' ', text).strip()
|
144
|
-
|
145
|
-
if text:
|
146
|
-
if len(text) > max_chars:
|
147
|
-
text = text[:max_chars] + "..."
|
148
|
-
return text
|
149
|
-
except Exception as req_error:
|
150
|
-
logger.error(f"Direct request fallback failed: {str(req_error)}")
|
405
|
+
return None
|
151
406
|
|
152
407
|
except Exception as e:
|
153
|
-
logger.error(f"Error extracting content with
|
154
|
-
# Try
|
408
|
+
logger.error(f"Error extracting content with hybrid approach: {str(e)}")
|
409
|
+
# Try a basic fallback
|
155
410
|
try:
|
156
411
|
response = requests.get(url, headers=headers, timeout=10)
|
157
412
|
if response.status_code == 200:
|
158
|
-
|
159
|
-
text
|
413
|
+
soup = BeautifulSoup(response.text, 'html.parser')
|
414
|
+
# Just get the text without images, scripts, styles, etc.
|
415
|
+
for tag in soup(['script', 'style', 'img', 'nav', 'footer', 'header']):
|
416
|
+
tag.decompose()
|
417
|
+
text = soup.get_text(separator=' ', strip=True)
|
160
418
|
text = re.sub(r'\s+', ' ', text).strip()
|
161
419
|
|
162
420
|
if text:
|
@@ -164,14 +422,40 @@ def extract_article_content(url: str, max_chars: int = 2000) -> Optional[str]:
|
|
164
422
|
text = text[:max_chars] + "..."
|
165
423
|
return text
|
166
424
|
except Exception as req_error:
|
167
|
-
logger.error(f"
|
425
|
+
logger.error(f"Basic fallback failed: {str(req_error)}")
|
168
426
|
|
169
427
|
return None
|
170
428
|
except Exception as e:
|
171
429
|
logger.error(f"Error extracting content from {url}: {str(e)}")
|
172
430
|
return None
|
173
431
|
|
174
|
-
def
|
432
|
+
def calculate_link_density(element):
|
433
|
+
"""
|
434
|
+
Calculate the ratio of link text to all text in an element.
|
435
|
+
Used to identify navigation-heavy areas.
|
436
|
+
|
437
|
+
Args:
|
438
|
+
element: BeautifulSoup element
|
439
|
+
|
440
|
+
Returns:
|
441
|
+
Float between 0 and 1 indicating link density
|
442
|
+
"""
|
443
|
+
try:
|
444
|
+
if element is None:
|
445
|
+
return 0
|
446
|
+
|
447
|
+
text_length = len(element.get_text(strip=True))
|
448
|
+
if text_length == 0:
|
449
|
+
return 0
|
450
|
+
|
451
|
+
links = element.find_all('a')
|
452
|
+
link_text_length = sum(len(a.get_text(strip=True)) for a in links)
|
453
|
+
|
454
|
+
return link_text_length / text_length
|
455
|
+
except Exception:
|
456
|
+
return 0
|
457
|
+
|
458
|
+
def get_web_search_results(query: str, max_results: int = 5, max_chars_per_result: int = 5000) -> Dict[str, Any]:
|
175
459
|
"""
|
176
460
|
Get formatted web search results ready to be included in AI prompts.
|
177
461
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: ngpt
|
3
|
-
Version: 3.
|
3
|
+
Version: 3.5.0
|
4
4
|
Summary: Swiss army knife for LLMs: powerful CLI and interactive chatbot in one package. Seamlessly work with OpenAI, Ollama, Groq, Claude, Gemini, or any OpenAI-compatible API to generate code, craft git commits, rewrite text, and execute shell commands.
|
5
5
|
Project-URL: Homepage, https://github.com/nazdridoy/ngpt
|
6
6
|
Project-URL: Repository, https://github.com/nazdridoy/ngpt
|
@@ -28,12 +28,12 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
28
28
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
29
29
|
Classifier: Topic :: Utilities
|
30
30
|
Requires-Python: >=3.8
|
31
|
+
Requires-Dist: beautifulsoup4>=4.12.0
|
31
32
|
Requires-Dist: duckduckgo-search>=3.0.0
|
32
33
|
Requires-Dist: prompt-toolkit>=3.0.0
|
33
34
|
Requires-Dist: pyperclip>=1.8.0
|
34
35
|
Requires-Dist: requests>=2.31.0
|
35
36
|
Requires-Dist: rich>=10.0.0
|
36
|
-
Requires-Dist: trafilatura>=1.6.0
|
37
37
|
Description-Content-Type: text/markdown
|
38
38
|
|
39
39
|
# nGPT
|
@@ -62,10 +62,10 @@ Description-Content-Type: text/markdown
|
|
62
62
|
|
63
63
|
- ✅ **Versatile**: Powerful and easy-to-use CLI tool for various AI tasks
|
64
64
|
- 🪶 **Lightweight**: Minimal dependencies with everything you need included
|
65
|
-
- 🔄 **API Flexibility**: Works with OpenAI, Ollama, Groq, Claude, Gemini, and any compatible endpoint
|
65
|
+
- 🔄 **API Flexibility**: Works with OpenAI, Ollama, Groq, Claude, Gemini, and any OpenAI-compatible endpoint
|
66
66
|
- 💬 **Interactive Chat**: Continuous conversation with memory in modern UI
|
67
67
|
- 📊 **Streaming Responses**: Real-time output for better user experience
|
68
|
-
- 🔍 **Web Search**: Enhance any model with contextual information from the web
|
68
|
+
- 🔍 **Web Search**: Enhance any model with contextual information from the web, using advanced content extraction to identify the most relevant information from web pages
|
69
69
|
- 📥 **Stdin Processing**: Process piped content by using `{}` placeholder in prompts
|
70
70
|
- 🎨 **Markdown Rendering**: Beautiful formatting of markdown and code with syntax highlighting
|
71
71
|
- ⚡ **Real-time Markdown**: Stream responses with live updating syntax highlighting and formatting
|
@@ -85,37 +85,47 @@ See the [Feature Overview](https://nazdridoy.github.io/ngpt/overview/) for more
|
|
85
85
|
|
86
86
|
|
87
87
|
## Table of Contents
|
88
|
-
- [Quick Start](#quick-start)
|
89
88
|
- [Features](#features)
|
90
|
-
- [Documentation](#documentation)
|
91
89
|
- [Installation](#installation)
|
90
|
+
- [Quick Start](#quick-start)
|
92
91
|
- [Usage](#usage)
|
92
|
+
- [Command Line Options](#command-line-options)
|
93
93
|
- [Documentation](https://nazdridoy.github.io/ngpt/)
|
94
|
-
|
94
|
+
- [Documentation](#documentation)
|
95
95
|
- [Configuration](#configuration)
|
96
|
-
- [
|
96
|
+
- [API Key Setup](#api-key-setup)
|
97
|
+
- [OpenAI API Key](#openai-api-key)
|
98
|
+
- [Google Gemini API Key](#google-gemini-api-key)
|
97
99
|
- [CLI Configuration](#cli-configuration)
|
98
100
|
- [Interactive Configuration](#interactive-configuration)
|
99
101
|
- [Configuration File](#configuration-file)
|
100
102
|
- [Configuration Priority](#configuration-priority)
|
101
|
-
- [API Key Setup](#api-key-setup)
|
102
|
-
- [OpenAI API Key](#openai-api-key)
|
103
|
-
- [Google Gemini API Key](#google-gemini-api-key)
|
104
103
|
- [Contributing](#contributing)
|
105
104
|
- [License](#license)
|
106
105
|
|
107
|
-
##
|
106
|
+
## Installation
|
108
107
|
|
109
108
|
```bash
|
110
|
-
#
|
109
|
+
# Installation with pip
|
111
110
|
pip install ngpt
|
112
111
|
|
113
|
-
# Or install with uv (faster)
|
112
|
+
# Or install with uv (faster installation)
|
114
113
|
uv pip install ngpt
|
115
114
|
|
116
|
-
# Or install globally as a CLI tool (recommended)
|
115
|
+
# Or install globally as a CLI tool (recommended for command-line usage)
|
117
116
|
uv tool install ngpt
|
118
117
|
|
118
|
+
# Arch Linux: install from AUR
|
119
|
+
paru -S ngpt
|
120
|
+
```
|
121
|
+
|
122
|
+
Requires Python 3.8 or newer.
|
123
|
+
|
124
|
+
For detailed installation instructions, see the [Installation Guide](https://nazdridoy.github.io/ngpt/installation/).
|
125
|
+
|
126
|
+
## Quick Start
|
127
|
+
|
128
|
+
```bash
|
119
129
|
# Chat with default settings
|
120
130
|
ngpt "Tell me about quantum computing"
|
121
131
|
|
@@ -200,58 +210,6 @@ ngpt --provider Groq "Explain quantum computing"
|
|
200
210
|
# Compare outputs from different providers
|
201
211
|
ngpt --provider OpenAI "Explain quantum physics" > openai_response.txt
|
202
212
|
ngpt --provider Ollama "Explain quantum physics" > ollama_response.txt
|
203
|
-
```
|
204
|
-
|
205
|
-
For more examples and detailed usage, visit the [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/).
|
206
|
-
|
207
|
-
## Documentation
|
208
|
-
|
209
|
-
Comprehensive documentation, including usage guides and examples, is available at:
|
210
|
-
|
211
|
-
**[https://nazdridoy.github.io/ngpt/](https://nazdridoy.github.io/ngpt/)**
|
212
|
-
|
213
|
-
Key documentation sections:
|
214
|
-
- [Installation Guide](https://nazdridoy.github.io/ngpt/installation/)
|
215
|
-
- [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/)
|
216
|
-
- [Configuration Guide](https://nazdridoy.github.io/ngpt/configuration/)
|
217
|
-
- [Examples & Tutorials](https://nazdridoy.github.io/ngpt/examples/basic/)
|
218
|
-
|
219
|
-
## Installation
|
220
|
-
|
221
|
-
```bash
|
222
|
-
# Installation with pip
|
223
|
-
pip install ngpt
|
224
|
-
|
225
|
-
# Or install with uv (faster installation)
|
226
|
-
uv pip install ngpt
|
227
|
-
|
228
|
-
# Or install globally as a CLI tool (recommended for command-line usage)
|
229
|
-
uv tool install ngpt
|
230
|
-
|
231
|
-
# Arch Linux: install from AUR
|
232
|
-
paru -S ngpt
|
233
|
-
```
|
234
|
-
|
235
|
-
Requires Python 3.8 or newer.
|
236
|
-
|
237
|
-
For detailed installation instructions, see the [Installation Guide](https://nazdridoy.github.io/ngpt/installation/).
|
238
|
-
|
239
|
-
## Usage
|
240
|
-
|
241
|
-
### As a CLI Tool
|
242
|
-
|
243
|
-
```bash
|
244
|
-
# Basic chat (default mode)
|
245
|
-
ngpt "Hello, how are you?"
|
246
|
-
|
247
|
-
# Interactive chat session with conversation history
|
248
|
-
ngpt -i
|
249
|
-
|
250
|
-
# Log conversation to a file
|
251
|
-
ngpt --interactive --log conversation.log
|
252
|
-
|
253
|
-
# Use custom system prompt to guide AI behavior
|
254
|
-
ngpt --preprompt "You are a Python programming tutor" "Explain decorators"
|
255
213
|
|
256
214
|
# Show all API configurations
|
257
215
|
ngpt --show-config --all
|
@@ -277,47 +235,16 @@ ngpt -s "list all files in current directory"
|
|
277
235
|
# On Windows generates: dir
|
278
236
|
# On Linux/macOS generates: ls -la
|
279
237
|
|
280
|
-
# Generate
|
281
|
-
# Returns only code without markdown formatting or explanations
|
238
|
+
# Generate code (using -c or --code flag)
|
282
239
|
ngpt -c "create a python function that calculates fibonacci numbers"
|
283
240
|
|
284
241
|
# Use multiline text editor for complex prompts (using -t or --text flag)
|
285
|
-
# Opens an interactive editor with syntax highlighting and intuitive controls
|
286
242
|
ngpt -t
|
287
243
|
```
|
288
244
|
|
289
|
-
For more
|
290
|
-
|
291
|
-
## Configuration
|
292
|
-
|
293
|
-
### API Key Setup
|
294
|
-
|
295
|
-
#### OpenAI API Key
|
296
|
-
1. Create an account at [OpenAI](https://platform.openai.com/)
|
297
|
-
2. Navigate to API keys: https://platform.openai.com/api-keys
|
298
|
-
3. Click "Create new secret key" and copy your API key
|
299
|
-
4. Configure nGPT with your key:
|
300
|
-
```bash
|
301
|
-
ngpt --config
|
302
|
-
# Enter provider: OpenAI
|
303
|
-
# Enter API key: your-openai-api-key
|
304
|
-
# Enter base URL: https://api.openai.com/v1/
|
305
|
-
# Enter model: gpt-3.5-turbo (or other model)
|
306
|
-
```
|
245
|
+
For more examples and detailed usage, visit the [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/).
|
307
246
|
|
308
|
-
|
309
|
-
1. Create or use an existing Google account
|
310
|
-
2. Go to [Google AI Studio](https://aistudio.google.com/)
|
311
|
-
3. Navigate to API keys in the left sidebar (or visit https://aistudio.google.com/app/apikey)
|
312
|
-
4. Create an API key and copy it
|
313
|
-
5. Configure nGPT with your key:
|
314
|
-
```bash
|
315
|
-
ngpt --config
|
316
|
-
# Enter provider: Gemini
|
317
|
-
# Enter API key: your-gemini-api-key
|
318
|
-
# Enter base URL: https://generativelanguage.googleapis.com/v1beta/openai
|
319
|
-
# Enter model: gemini-2.0-flash
|
320
|
-
```
|
247
|
+
## Usage
|
321
248
|
|
322
249
|
### Command Line Options
|
323
250
|
|
@@ -326,8 +253,8 @@ For more CLI examples and detailed usage information, see the [CLI Usage Guide](
|
|
326
253
|
usage: ngpt [-h] [-v] [--language LANGUAGE] [--config [CONFIG]] [--config-index CONFIG_INDEX] [--provider PROVIDER]
|
327
254
|
[--remove] [--show-config] [--all] [--list-models] [--list-renderers] [--cli-config [COMMAND ...]]
|
328
255
|
[--api-key API_KEY] [--base-url BASE_URL] [--model MODEL] [--web-search] [--temperature TEMPERATURE]
|
329
|
-
[--top_p TOP_P] [--max_tokens MAX_TOKENS] [--log [FILE]] [--preprompt PREPROMPT] [--no-stream
|
330
|
-
|
256
|
+
[--top_p TOP_P] [--max_tokens MAX_TOKENS] [--log [FILE]] [--preprompt PREPROMPT] [--no-stream | --prettify |
|
257
|
+
--stream-prettify] [--renderer {auto,rich,glow}] [--rec-chunk] [--diff [FILE]] [--chunk-size CHUNK_SIZE]
|
331
258
|
[--analyses-chunk-size ANALYSES_CHUNK_SIZE] [--max-msg-lines MAX_MSG_LINES]
|
332
259
|
[--max-recursion-depth MAX_RECURSION_DEPTH] [-i | -s | -c | -t | -p | -r | -g]
|
333
260
|
[prompt]
|
@@ -346,12 +273,10 @@ options::
|
|
346
273
|
|
347
274
|
Configuration Options::
|
348
275
|
|
349
|
-
--config [CONFIG] Path to a custom config file or, if no value provided, enter interactive
|
350
|
-
configuration mode to create a new config
|
276
|
+
--config [CONFIG] Path to a custom config file or, if no value provided, enter interactive configuration mode to create a new config
|
351
277
|
--config-index CONFIG_INDEX Index of the configuration to use or edit (default: 0)
|
352
278
|
--provider PROVIDER Provider name to identify the configuration to use
|
353
|
-
--remove Remove the configuration at the specified index (requires --config and
|
354
|
-
--config-index or --provider)
|
279
|
+
--remove Remove the configuration at the specified index (requires --config and --config-index or --provider)
|
355
280
|
--show-config Show the current configuration(s) and exit
|
356
281
|
--all Show details for all configurations (requires --show-config)
|
357
282
|
--list-models List all available models for the current configuration and exit
|
@@ -363,30 +288,28 @@ Global Options::
|
|
363
288
|
--api-key API_KEY API key for the service
|
364
289
|
--base-url BASE_URL Base URL for the API
|
365
290
|
--model MODEL Model to use
|
366
|
-
--web-search Enable web search capability
|
367
|
-
feature)
|
291
|
+
--web-search Enable web search capability using DuckDuckGo to enhance prompts with relevant information
|
368
292
|
--temperature TEMPERATURE Set temperature (controls randomness, default: 0.7)
|
369
293
|
--top_p TOP_P Set top_p (controls diversity, default: 1.0)
|
370
294
|
--max_tokens MAX_TOKENS Set max response length in tokens
|
371
|
-
--log [FILE] Set filepath to log conversation to, or create a temporary log file if no path
|
372
|
-
provided
|
295
|
+
--log [FILE] Set filepath to log conversation to, or create a temporary log file if no path provided
|
373
296
|
--preprompt PREPROMPT Set custom system prompt to control AI behavior
|
374
|
-
--
|
375
|
-
|
376
|
-
|
377
|
-
|
297
|
+
--renderer {auto,rich,glow} Select which markdown renderer to use with --prettify or --stream-prettify (auto, rich, or glow)
|
298
|
+
|
299
|
+
Output Display Options (mutually exclusive)::
|
300
|
+
|
301
|
+
--no-stream Return the whole response without streaming or formatting
|
302
|
+
--prettify Render complete response with markdown and code formatting (non-streaming)
|
303
|
+
--stream-prettify Stream response with real-time markdown rendering (default)
|
378
304
|
|
379
305
|
Git Commit Message Options::
|
380
306
|
|
381
307
|
--rec-chunk Process large diffs in chunks with recursive analysis if needed
|
382
|
-
--diff [FILE] Use diff from specified file instead of staged changes. If used without a path,
|
383
|
-
uses the path from CLI config.
|
308
|
+
--diff [FILE] Use diff from specified file instead of staged changes. If used without a path, uses the path from CLI config.
|
384
309
|
--chunk-size CHUNK_SIZE Number of lines per chunk when chunking is enabled (default: 200)
|
385
|
-
--analyses-chunk-size ANALYSES_CHUNK_SIZE
|
386
|
-
Number of lines per chunk when recursively chunking analyses (default: 200)
|
310
|
+
--analyses-chunk-size ANALYSES_CHUNK_SIZE Number of lines per chunk when recursively chunking analyses (default: 200)
|
387
311
|
--max-msg-lines MAX_MSG_LINES Maximum number of lines in commit message before condensing (default: 20)
|
388
|
-
--max-recursion-depth MAX_RECURSION_DEPTH
|
389
|
-
Maximum recursion depth for commit message condensing (default: 3)
|
312
|
+
--max-recursion-depth MAX_RECURSION_DEPTH Maximum recursion depth for commit message condensing (default: 3)
|
390
313
|
|
391
314
|
Modes (mutually exclusive)::
|
392
315
|
|
@@ -394,15 +317,62 @@ Modes (mutually exclusive)::
|
|
394
317
|
-s, --shell Generate and execute shell commands
|
395
318
|
-c, --code Generate code
|
396
319
|
-t, --text Enter multi-line text input (submit with Ctrl+D)
|
397
|
-
-p, --pipe Read from stdin and use content with prompt. Use {} in prompt as placeholder
|
398
|
-
for stdin content
|
320
|
+
-p, --pipe Read from stdin and use content with prompt. Use {} in prompt as placeholder for stdin content
|
399
321
|
-r, --rewrite Rewrite text from stdin to be more natural while preserving tone and meaning
|
400
322
|
-g, --gitcommsg Generate AI-powered git commit messages from staged changes or diff file
|
401
323
|
```
|
402
324
|
|
403
325
|
> **Note**: For better visualization of conventional commit messages on GitHub, you can use the [GitHub Commit Labels](https://greasyfork.org/en/scripts/526153-github-commit-labels) userscript, which adds colorful labels to your commits.
|
404
326
|
|
405
|
-
For a complete reference of all available options, see the [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/).
|
327
|
+
For a complete reference of all available options, detailed CLI examples and usage information, see the [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/).
|
328
|
+
|
329
|
+
|
330
|
+
## Documentation
|
331
|
+
|
332
|
+
Comprehensive documentation, including usage guides and examples, is available at:
|
333
|
+
|
334
|
+
**[https://nazdridoy.github.io/ngpt/](https://nazdridoy.github.io/ngpt/)**
|
335
|
+
|
336
|
+
Key documentation sections:
|
337
|
+
- [Installation Guide](https://nazdridoy.github.io/ngpt/installation/)
|
338
|
+
- [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/)
|
339
|
+
- [Configuration Guide](https://nazdridoy.github.io/ngpt/configuration/)
|
340
|
+
- [Examples & Tutorials](https://nazdridoy.github.io/ngpt/examples/basic/)
|
341
|
+
- [Git Commit Message Guide](https://nazdridoy.github.io/ngpt/usage/gitcommsg/)
|
342
|
+
|
343
|
+
|
344
|
+
## Configuration
|
345
|
+
|
346
|
+
### API Key Setup
|
347
|
+
|
348
|
+
#### OpenAI API Key
|
349
|
+
1. Create an account at [OpenAI](https://platform.openai.com/)
|
350
|
+
2. Navigate to API keys: https://platform.openai.com/api-keys
|
351
|
+
3. Click "Create new secret key" and copy your API key
|
352
|
+
4. Configure nGPT with your key:
|
353
|
+
```bash
|
354
|
+
ngpt --config
|
355
|
+
# Enter provider: OpenAI
|
356
|
+
# Enter API key: your-openai-api-key
|
357
|
+
# Enter base URL: https://api.openai.com/v1/
|
358
|
+
# Enter model: gpt-3.5-turbo (or other model)
|
359
|
+
```
|
360
|
+
|
361
|
+
#### Google Gemini API Key
|
362
|
+
1. Create or use an existing Google account
|
363
|
+
2. Go to [Google AI Studio](https://aistudio.google.com/)
|
364
|
+
3. Navigate to API keys in the left sidebar (or visit https://aistudio.google.com/app/apikey)
|
365
|
+
4. Create an API key and copy it
|
366
|
+
5. Configure nGPT with your key:
|
367
|
+
```bash
|
368
|
+
ngpt --config
|
369
|
+
# Enter provider: Gemini
|
370
|
+
# Enter API key: your-gemini-api-key
|
371
|
+
# Enter base URL: https://generativelanguage.googleapis.com/v1beta/openai
|
372
|
+
# Enter model: gemini-2.0-flash
|
373
|
+
```
|
374
|
+
|
375
|
+
For more detailed information, refer to the [API Key Setup documentation](https://nazdridoy.github.io/ngpt/configuration/#api-key-setup).
|
406
376
|
|
407
377
|
### CLI Configuration
|
408
378
|
|
@@ -20,9 +20,9 @@ ngpt/utils/__init__.py,sha256=qu_66I1Vtav2f1LDiPn5J3DUsbK7o1CSScMcTkYqxoM,1179
|
|
20
20
|
ngpt/utils/cli_config.py,sha256=Ug8cECBTIuzOwkBWidLTfs-OAdOsCMJ2bNa70pOADfw,11195
|
21
21
|
ngpt/utils/config.py,sha256=wsArA4osnh8fKqOvtsPqqBxAz3DpdjtaWUFaRtnUdyc,10452
|
22
22
|
ngpt/utils/log.py,sha256=f1jg2iFo35PAmsarH8FVL_62plq4VXH0Mu2QiP6RJGw,15934
|
23
|
-
ngpt/utils/web_search.py,sha256=
|
24
|
-
ngpt-3.
|
25
|
-
ngpt-3.
|
26
|
-
ngpt-3.
|
27
|
-
ngpt-3.
|
28
|
-
ngpt-3.
|
23
|
+
ngpt/utils/web_search.py,sha256=_e78fk-6WDVqHDIaAMP9CDoX4FyOWM9XhdSZcr5KasI,29163
|
24
|
+
ngpt-3.5.0.dist-info/METADATA,sha256=BEcmK1bv4EZ_Q6W7MDnDPo304e-_L0zk1bDgH4mmp8Q,23926
|
25
|
+
ngpt-3.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
26
|
+
ngpt-3.5.0.dist-info/entry_points.txt,sha256=SqAAvLhMrsEpkIr4YFRdUeyuXQ9o0IBCeYgE6AVojoI,44
|
27
|
+
ngpt-3.5.0.dist-info/licenses/LICENSE,sha256=mQkpWoADxbHqE0HRefYLJdm7OpdrXBr3vNv5bZ8w72M,1065
|
28
|
+
ngpt-3.5.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|