ngpt 3.4.5__py3-none-any.whl → 3.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ngpt/utils/web_search.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """
2
- Web search utilities for nGPT using duckduckgo-search and trafilatura.
2
+ Web search utilities for nGPT using BeautifulSoup4.
3
3
 
4
4
  This module provides functionality to search the web and extract
5
5
  information from search results to enhance AI prompts.
@@ -7,11 +7,13 @@ information from search results to enhance AI prompts.
7
7
 
8
8
  import re
9
9
  from typing import List, Dict, Any, Optional
10
- from duckduckgo_search import DDGS
11
- from urllib.parse import urlparse
10
+ from urllib.parse import urlparse, parse_qs
12
11
  import requests
13
12
  import sys
14
13
  import datetime
14
+ from bs4 import BeautifulSoup
15
+ from bs4.element import Comment, Declaration, Doctype, ProcessingInstruction
16
+ import json
15
17
 
16
18
  # Get actual logger from global context instead of using standard logging
17
19
  from . import log
@@ -39,7 +41,7 @@ def get_logger():
39
41
 
40
42
  def perform_web_search(query: str, max_results: int = 5) -> List[Dict[str, Any]]:
41
43
  """
42
- Search the web using DuckDuckGo and return relevant results.
44
+ Search DuckDuckGo directly and return relevant results.
43
45
 
44
46
  Args:
45
47
  query: The search query
@@ -50,17 +52,58 @@ def perform_web_search(query: str, max_results: int = 5) -> List[Dict[str, Any]]
50
52
  """
51
53
  logger = get_logger()
52
54
  try:
53
- ddgs = DDGS()
54
- results = list(ddgs.text(query, max_results=max_results))
55
+ # Headers to mimic a browser request
56
+ headers = {
57
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
58
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
59
+ }
60
+
61
+ # DuckDuckGo search URL
62
+ encoded_query = requests.utils.quote(query)
63
+ url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
64
+
65
+ # Fetch search results
66
+ response = requests.get(url, headers=headers, timeout=10)
67
+ response.raise_for_status()
68
+
69
+ # Parse HTML response with html.parser (no lxml dependency)
70
+ soup = BeautifulSoup(response.text, 'html.parser')
71
+ results = []
72
+
73
+ # Extract search results
74
+ for result in soup.select('.result')[:max_results]:
75
+ title_elem = result.select_one('.result__title')
76
+ snippet_elem = result.select_one('.result__snippet')
77
+ url_elem = result.select_one('.result__url')
78
+
79
+ # Extract actual URL from DDG's redirect URL if needed
80
+ href = title_elem.find('a')['href'] if title_elem and title_elem.find('a') else None
81
+ if href and href.startswith('/'):
82
+ # Parse DDG redirect URL to get actual URL
83
+ parsed_url = urlparse(href)
84
+ query_params = parse_qs(parsed_url.query)
85
+ actual_url = query_params.get('uddg', [None])[0]
86
+ else:
87
+ actual_url = href
88
+
89
+ # Add result to list
90
+ if title_elem and actual_url:
91
+ results.append({
92
+ 'title': title_elem.get_text(strip=True),
93
+ 'href': actual_url,
94
+ 'body': snippet_elem.get_text(strip=True) if snippet_elem else ''
95
+ })
96
+
55
97
  return results
56
98
  except Exception as e:
57
99
  logger.error(f"Error performing web search: {str(e)}")
58
100
  logger.info("Web search encountered an issue, but will continue with available results")
59
101
  return []
60
102
 
61
- def extract_article_content(url: str, max_chars: int = 2000) -> Optional[str]:
103
+ def extract_article_content(url: str, max_chars: int = 5000) -> Optional[str]:
62
104
  """
63
- Extract and clean content from a webpage URL.
105
+ Extract and clean content from a webpage URL using a hybrid approach
106
+ inspired by trafilatura and readability algorithms.
64
107
 
65
108
  Args:
66
109
  url: The URL to extract content from
@@ -81,82 +124,333 @@ def extract_article_content(url: str, max_chars: int = 2000) -> Optional[str]:
81
124
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
82
125
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
83
126
  'Accept-Language': 'en-US,en;q=0.5',
127
+ 'DNT': '1', # Do Not Track
84
128
  'Connection': 'keep-alive',
85
129
  'Upgrade-Insecure-Requests': '1',
130
+ 'Cache-Control': 'max-age=0',
131
+ 'Sec-Fetch-Dest': 'document',
132
+ 'Sec-Fetch-Mode': 'navigate',
133
+ 'Sec-Fetch-Site': 'none',
134
+ 'Pragma': 'no-cache',
86
135
  }
87
136
 
88
137
  logger.info(f"Fetching content from {url}")
89
138
 
90
139
  try:
91
- # Try using trafilatura
92
- import trafilatura
140
+ # Fetch the page content
141
+ response = requests.get(url, headers=headers, timeout=15)
93
142
 
94
- # Download with correct parameters
95
- # trafilatura handles user-agent internally
96
- downloaded = trafilatura.fetch_url(url)
97
-
98
- if downloaded:
99
- # Extract main content
100
- content = trafilatura.extract(downloaded, include_comments=False,
101
- include_tables=False,
102
- no_fallback=False)
143
+ if response.status_code == 200:
144
+ # Try to detect the encoding if not properly specified
145
+ if response.encoding == 'ISO-8859-1':
146
+ # Try to detect encoding from content
147
+ possible_encoding = re.search(r'charset=["\'](.*?)["\']', response.text)
148
+ if possible_encoding:
149
+ response.encoding = possible_encoding.group(1)
150
+ else:
151
+ # Default to UTF-8 if we can't detect
152
+ response.encoding = 'utf-8'
153
+
154
+ # Parse with BeautifulSoup using html.parser
155
+ soup = BeautifulSoup(response.text, 'html.parser')
156
+
157
+ # Extract main content using multiple strategies
158
+ extracted_content = None
159
+
160
+ # ---------- PREPROCESSING ----------
161
+ # Clone the soup before preprocessing
162
+ processed_soup = BeautifulSoup(str(soup), 'html.parser')
163
+
164
+ # Remove all script, style tags and comments
165
+ for element in processed_soup.find_all(['script', 'style', 'noscript']):
166
+ element.decompose()
103
167
 
104
- if content:
105
- # Clean up content if needed
106
- content = content.strip()
168
+ # Remove HTML comments
169
+ for comment in processed_soup.find_all(text=lambda text: isinstance(text, Comment)):
170
+ comment.extract()
171
+
172
+ # Remove hidden elements
173
+ for hidden in processed_soup.find_all(style=lambda s: s and isinstance(s, str) and ('display:none' in s.lower() or 'visibility:hidden' in s.lower())):
174
+ hidden.decompose()
175
+ for hidden in processed_soup.find_all(hidden=True):
176
+ hidden.decompose()
177
+ for hidden in processed_soup.find_all(class_=lambda c: c and isinstance(c, str) and any(x in c.lower() for x in ['hidden', 'invisible'])):
178
+ hidden.decompose()
179
+
180
+ # Handle iframes and frames
181
+ for frame in processed_soup.find_all(['iframe', 'frame']):
182
+ frame.decompose()
183
+
184
+ # ---------- SITE-SPECIFIC HANDLING ----------
185
+ domain = parsed_url.netloc.lower()
186
+
187
+ # Wikipedia-specific extraction
188
+ if 'wikipedia.org' in domain:
189
+ content_div = processed_soup.select_one('#mw-content-text')
190
+ if content_div:
191
+ # Remove tables, references, navigation elements
192
+ for unwanted in content_div.select('table, .reference, .reflist, .navbox, .vertical-navbox, .thumbcaption, .mw-editsection, .mw-headline, .toc, #toc'):
193
+ unwanted.decompose()
194
+ extracted_content = content_div.get_text(separator=' ', strip=True)
195
+
196
+ # News site specific handling
197
+ news_sites = {
198
+ 'cnn.com': ['article', '.article__content', '.l-container', '.body-text', '#body-text'],
199
+ 'bbc.com': ['.article__body-content', '.story-body__inner', '[data-component="text-block"]'],
200
+ 'nytimes.com': ['article', '.meteredContent', '.StoryBodyCompanionColumn', '.article-body'],
201
+ 'reuters.com': ['article', '.ArticleBody__content___3MtHP', '.article-body'],
202
+ 'theguardian.com': ['.article-body-commercial-selector', '.content__article-body', '.dcr-1cas96z'],
203
+ 'washingtonpost.com': ['.article-body', '.teaser-content'],
204
+ 'apnews.com': ['.Article', '.RichTextStoryBody'],
205
+ 'indiatimes.com': ['.article-body', '.article_content', '.article-desc', '.Normal'],
206
+ 'cnbc.com': ['.ArticleBody-articleBody', '.group-article-body', '.article-body'],
207
+ 'thehindu.com': ['.article-body', '.article-text', '#content-body-14269002']
208
+ }
209
+
210
+ if not extracted_content:
211
+ # Check if we're on a known news site
212
+ for site, selectors in news_sites.items():
213
+ if site in domain:
214
+ for selector in selectors:
215
+ content_element = processed_soup.select_one(selector)
216
+ if content_element:
217
+ # Clean the news content
218
+ for unwanted in content_element.select('aside, figure, .ad, .ads, .advertisement, .social, .share, .related, .newsletter, .more-on, .read-more, .promotions'):
219
+ unwanted.decompose()
220
+ extracted_content = content_element.get_text(separator=' ', strip=True)
221
+ break
222
+ if extracted_content:
223
+ break
224
+
225
+ # ---------- JSON-LD EXTRACTION ----------
226
+ if not extracted_content:
227
+ # Look for structured data in JSON-LD format
228
+ json_ld = processed_soup.find_all('script', type='application/ld+json')
229
+ for script in json_ld:
230
+ try:
231
+ script_content = script.string
232
+ if not script_content: # Skip empty scripts
233
+ continue
234
+
235
+ # Clean the JSON string (some sites have invalid JSON)
236
+ script_content = re.sub(r'[\n\t\r]', '', script_content)
237
+ script_content = script_content.strip()
238
+
239
+ data = json.loads(script_content)
240
+ # Handle both single objects and arrays of objects
241
+ if isinstance(data, list):
242
+ data_list = data
243
+ else:
244
+ data_list = [data]
245
+
246
+ for item in data_list:
247
+ article_body = None
248
+ # Try to find articleBody or various content fields
249
+ if isinstance(item, dict):
250
+ # Check for common content fields directly
251
+ for field in ['articleBody', 'description', 'text', 'mainEntityOfPage']:
252
+ if field in item and isinstance(item[field], str) and len(item[field]) > 200:
253
+ article_body = item[field]
254
+ break
255
+
256
+ # Check in nested objects
257
+ if not article_body and '@graph' in item and isinstance(item['@graph'], list):
258
+ for graph_item in item['@graph']:
259
+ if isinstance(graph_item, dict):
260
+ for field in ['articleBody', 'description', 'text']:
261
+ if field in graph_item and isinstance(graph_item[field], str) and len(graph_item[field]) > 200:
262
+ article_body = graph_item[field]
263
+ break
264
+ if article_body:
265
+ break
266
+
267
+ if article_body:
268
+ extracted_content = article_body
269
+ break
270
+
271
+ if extracted_content:
272
+ break
273
+ except (json.JSONDecodeError, TypeError, AttributeError, ValueError) as e:
274
+ logger.debug(f"Error parsing JSON-LD: {str(e)}")
275
+ continue
276
+
277
+ # ---------- META DESCRIPTION EXTRACTION ----------
278
+ meta_description = None
279
+ meta_tag = processed_soup.find('meta', attrs={'name': 'description'}) or processed_soup.find('meta', attrs={'property': 'og:description'})
280
+ if meta_tag and meta_tag.get('content'):
281
+ meta_description = meta_tag.get('content')
282
+
283
+ # ---------- CONTENT ANALYSIS ----------
284
+ if not extracted_content:
285
+ # Get all content blocks (divs, sections, articles)
286
+ content_blocks = []
107
287
 
108
- # Truncate if needed
109
- if len(content) > max_chars:
110
- content = content[:max_chars] + "..."
288
+ # Prioritize semantic tags
289
+ for tag in ['article', 'main', 'section', 'div']:
290
+ blocks = processed_soup.find_all(tag)
111
291
 
112
- return content
113
-
114
- # If trafilatura failed, try direct requests
115
- logger.info(f"Trafilatura extraction failed for {url}, trying fallback method")
116
- response = requests.get(url, headers=headers, timeout=10)
117
-
118
- if response.status_code == 200:
119
- # Very basic HTML cleaning
120
- html_content = response.text
121
- # Remove HTML tags
122
- text = re.sub(r'<[^>]+>', ' ', html_content)
123
- # Remove excess whitespace
124
- text = re.sub(r'\s+', ' ', text).strip()
292
+ for block in blocks:
293
+ # Skip if too small
294
+ text = block.get_text(strip=True)
295
+ if len(text) < 200:
296
+ continue
297
+
298
+ # Calculate content metrics
299
+ char_count = len(text)
300
+ link_density = calculate_link_density(block)
301
+ p_count = len(block.find_all('p'))
302
+ p_text_length = sum(len(p.get_text(strip=True)) for p in block.find_all('p'))
303
+ p_density = p_text_length / char_count if char_count > 0 else 0
304
+
305
+ # Skip blocks with high link density (likely navigation)
306
+ if link_density > 0.5:
307
+ continue
308
+
309
+ # Calculate readability scores
310
+ text_density = char_count / (len(str(block)) + 1) # Text to HTML ratio
311
+
312
+ # Score content blocks
313
+ score = 0
314
+
315
+ # Prefer blocks with many paragraphs
316
+ score += min(p_count * 5, 50) # Max 50 points for paragraphs
317
+
318
+ # Prefer blocks with high paragraph text density
319
+ score += min(int(p_density * 100), 50) # Max 50 points for paragraph density
320
+
321
+ # Penalize high link density
322
+ score -= int(link_density * 100)
323
+
324
+ # Boost for high text density
325
+ score += min(int(text_density * 30), 30) # Max 30 points for text density
326
+
327
+ # Boost for certain attributes and classes
328
+ content_indicators = ['content', 'article', 'story', 'post', 'text', 'body', 'entry']
329
+
330
+ # Check class and id attributes
331
+ for attr in ['class', 'id']:
332
+ attr_val = block.get(attr, '')
333
+ if attr_val:
334
+ if isinstance(attr_val, list):
335
+ attr_val = ' '.join(attr_val)
336
+ for indicator in content_indicators:
337
+ if indicator in attr_val.lower():
338
+ score += 30
339
+ break
340
+
341
+ # Penalty for boilerplate indicators
342
+ boilerplate_indicators = ['sidebar', 'menu', 'nav', 'banner', 'ad', 'footer', 'header', 'comment', 'share', 'related']
343
+ for attr in ['class', 'id']:
344
+ attr_val = block.get(attr, '')
345
+ if attr_val:
346
+ if isinstance(attr_val, list):
347
+ attr_val = ' '.join(attr_val)
348
+ for indicator in boilerplate_indicators:
349
+ if indicator in attr_val.lower():
350
+ score -= 50
351
+ break
352
+
353
+ # Add to content blocks if score is positive
354
+ if score > 0:
355
+ content_blocks.append({
356
+ 'element': block,
357
+ 'score': score,
358
+ 'char_count': char_count,
359
+ 'text': text
360
+ })
361
+
362
+ # Sort content blocks by score
363
+ if content_blocks:
364
+ content_blocks.sort(key=lambda x: x['score'], reverse=True)
365
+ best_block = content_blocks[0]['element']
366
+
367
+ # Clean up the best block
368
+ for unwanted in best_block.find_all(['aside', 'nav', 'footer', 'header']):
369
+ unwanted.decompose()
370
+
371
+ extracted_content = best_block.get_text(separator=' ', strip=True)
125
372
 
126
- if text:
127
- if len(text) > max_chars:
128
- text = text[:max_chars] + "..."
129
- return text
373
+ # ---------- PARAGRAPH EXTRACTION FALLBACK ----------
374
+ if not extracted_content:
375
+ # Get all paragraphs with substantial content
376
+ paragraphs = []
377
+ for p in processed_soup.find_all('p'):
378
+ text = p.get_text(strip=True)
379
+ if len(text) > 40: # Only consider substantial paragraphs
380
+ # Calculate link density
381
+ link_density = calculate_link_density(p)
382
+ if link_density < 0.25: # Skip if too many links
383
+ paragraphs.append(text)
384
+
385
+ if paragraphs:
386
+ extracted_content = ' '.join(paragraphs)
130
387
 
388
+ # If we have content, clean it up
389
+ if extracted_content:
390
+ # Clean whitespace
391
+ extracted_content = re.sub(r'\s+', ' ', extracted_content).strip()
392
+
393
+ # Remove URLs
394
+ extracted_content = re.sub(r'https?://\S+', '', extracted_content)
395
+
396
+ # Remove email addresses
397
+ extracted_content = re.sub(r'\S+@\S+', '', extracted_content)
398
+
399
+ # Remove social media handles
400
+ extracted_content = re.sub(r'@\w+', '', extracted_content)
401
+
402
+ # Replace multiple spaces with single space
403
+ extracted_content = re.sub(r' +', ' ', extracted_content)
404
+
405
+ # Normalize quotes and apostrophes
406
+ extracted_content = extracted_content.replace('"', '"').replace('"', '"')
407
+ extracted_content = extracted_content.replace("'", "'").replace("'", "'")
408
+
409
+ # Remove any remaining HTML entities
410
+ extracted_content = re.sub(r'&[a-zA-Z]+;', ' ', extracted_content)
411
+
412
+ # Remove short lines that are likely navigation/menu items
413
+ lines = extracted_content.split('\n')
414
+ extracted_content = ' '.join([line for line in lines if len(line) > 40 or '.' in line])
415
+
416
+ # Combine with meta description if available and content is short
417
+ if meta_description and len(extracted_content) < 500:
418
+ extracted_content = meta_description + " " + extracted_content
419
+
420
+ # Truncate if needed
421
+ if len(extracted_content) > max_chars:
422
+ # Try to break at a sentence boundary
423
+ cutoff_point = max_chars
424
+ for i in range(max_chars - 1, max_chars - 300, -1):
425
+ if i < len(extracted_content) and extracted_content[i] in ['.', '!', '?']:
426
+ cutoff_point = i + 1
427
+ break
428
+
429
+ extracted_content = extracted_content[:cutoff_point]
430
+
431
+ return extracted_content
432
+ else:
433
+ # Return meta description if nothing else was found
434
+ if meta_description:
435
+ return meta_description
436
+
437
+ logger.error(f"No content extracted from {url}")
438
+ return None
131
439
  else:
132
440
  logger.error(f"Request to {url} returned status code {response.status_code}")
133
-
134
- except ImportError:
135
- logger.error("Trafilatura not installed. Install with 'pip install trafilatura'")
136
- # Try direct requests only
137
- try:
138
- response = requests.get(url, headers=headers, timeout=10)
139
- if response.status_code == 200:
140
- # Very basic HTML cleaning
141
- html_content = response.text
142
- text = re.sub(r'<[^>]+>', ' ', html_content)
143
- text = re.sub(r'\s+', ' ', text).strip()
144
-
145
- if text:
146
- if len(text) > max_chars:
147
- text = text[:max_chars] + "..."
148
- return text
149
- except Exception as req_error:
150
- logger.error(f"Direct request fallback failed: {str(req_error)}")
441
+ return None
151
442
 
152
443
  except Exception as e:
153
- logger.error(f"Error extracting content with trafilatura: {str(e)}")
154
- # Try the requests fallback
444
+ logger.error(f"Error extracting content with hybrid approach: {str(e)}")
445
+ # Try a basic fallback
155
446
  try:
156
447
  response = requests.get(url, headers=headers, timeout=10)
157
448
  if response.status_code == 200:
158
- html_content = response.text
159
- text = re.sub(r'<[^>]+>', ' ', html_content)
449
+ soup = BeautifulSoup(response.text, 'html.parser')
450
+ # Just get the text without images, scripts, styles, etc.
451
+ for tag in soup(['script', 'style', 'img', 'nav', 'footer', 'header']):
452
+ tag.decompose()
453
+ text = soup.get_text(separator=' ', strip=True)
160
454
  text = re.sub(r'\s+', ' ', text).strip()
161
455
 
162
456
  if text:
@@ -164,14 +458,40 @@ def extract_article_content(url: str, max_chars: int = 2000) -> Optional[str]:
164
458
  text = text[:max_chars] + "..."
165
459
  return text
166
460
  except Exception as req_error:
167
- logger.error(f"Direct request fallback failed: {str(req_error)}")
461
+ logger.error(f"Basic fallback failed: {str(req_error)}")
168
462
 
169
463
  return None
170
464
  except Exception as e:
171
465
  logger.error(f"Error extracting content from {url}: {str(e)}")
172
466
  return None
173
467
 
174
- def get_web_search_results(query: str, max_results: int = 3, max_chars_per_result: int = 2000) -> Dict[str, Any]:
468
+ def calculate_link_density(element):
469
+ """
470
+ Calculate the ratio of link text to all text in an element.
471
+ Used to identify navigation-heavy areas.
472
+
473
+ Args:
474
+ element: BeautifulSoup element
475
+
476
+ Returns:
477
+ Float between 0 and 1 indicating link density
478
+ """
479
+ try:
480
+ if element is None:
481
+ return 0
482
+
483
+ text_length = len(element.get_text(strip=True))
484
+ if text_length == 0:
485
+ return 0
486
+
487
+ links = element.find_all('a')
488
+ link_text_length = sum(len(a.get_text(strip=True)) for a in links)
489
+
490
+ return link_text_length / text_length
491
+ except Exception:
492
+ return 0
493
+
494
+ def get_web_search_results(query: str, max_results: int = 5, max_chars_per_result: int = 5000) -> Dict[str, Any]:
175
495
  """
176
496
  Get formatted web search results ready to be included in AI prompts.
177
497
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ngpt
3
- Version: 3.4.5
3
+ Version: 3.5.1
4
4
  Summary: Swiss army knife for LLMs: powerful CLI and interactive chatbot in one package. Seamlessly work with OpenAI, Ollama, Groq, Claude, Gemini, or any OpenAI-compatible API to generate code, craft git commits, rewrite text, and execute shell commands.
5
5
  Project-URL: Homepage, https://github.com/nazdridoy/ngpt
6
6
  Project-URL: Repository, https://github.com/nazdridoy/ngpt
@@ -28,12 +28,11 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
28
28
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
29
29
  Classifier: Topic :: Utilities
30
30
  Requires-Python: >=3.8
31
- Requires-Dist: duckduckgo-search>=3.0.0
31
+ Requires-Dist: beautifulsoup4>=4.12.0
32
32
  Requires-Dist: prompt-toolkit>=3.0.0
33
33
  Requires-Dist: pyperclip>=1.8.0
34
34
  Requires-Dist: requests>=2.31.0
35
35
  Requires-Dist: rich>=10.0.0
36
- Requires-Dist: trafilatura>=1.6.0
37
36
  Description-Content-Type: text/markdown
38
37
 
39
38
  # nGPT
@@ -62,10 +61,10 @@ Description-Content-Type: text/markdown
62
61
 
63
62
  - ✅ **Versatile**: Powerful and easy-to-use CLI tool for various AI tasks
64
63
  - 🪶 **Lightweight**: Minimal dependencies with everything you need included
65
- - 🔄 **API Flexibility**: Works with OpenAI, Ollama, Groq, Claude, Gemini, and any compatible endpoint
64
+ - 🔄 **API Flexibility**: Works with OpenAI, Ollama, Groq, Claude, Gemini, and any OpenAI-compatible endpoint
66
65
  - 💬 **Interactive Chat**: Continuous conversation with memory in modern UI
67
66
  - 📊 **Streaming Responses**: Real-time output for better user experience
68
- - 🔍 **Web Search**: Enhance any model with contextual information from the web
67
+ - 🔍 **Web Search**: Enhance any model with contextual information from the web, using advanced content extraction to identify the most relevant information from web pages
69
68
  - 📥 **Stdin Processing**: Process piped content by using `{}` placeholder in prompts
70
69
  - 🎨 **Markdown Rendering**: Beautiful formatting of markdown and code with syntax highlighting
71
70
  - ⚡ **Real-time Markdown**: Stream responses with live updating syntax highlighting and formatting
@@ -85,37 +84,47 @@ See the [Feature Overview](https://nazdridoy.github.io/ngpt/overview/) for more
85
84
 
86
85
 
87
86
  ## Table of Contents
88
- - [Quick Start](#quick-start)
89
87
  - [Features](#features)
90
- - [Documentation](#documentation)
91
88
  - [Installation](#installation)
89
+ - [Quick Start](#quick-start)
92
90
  - [Usage](#usage)
91
+ - [Command Line Options](#command-line-options)
93
92
  - [Documentation](https://nazdridoy.github.io/ngpt/)
94
- - [CLI Tool](#as-a-cli-tool)
93
+ - [Documentation](#documentation)
95
94
  - [Configuration](#configuration)
96
- - [Command Line Options](#command-line-options)
95
+ - [API Key Setup](#api-key-setup)
96
+ - [OpenAI API Key](#openai-api-key)
97
+ - [Google Gemini API Key](#google-gemini-api-key)
97
98
  - [CLI Configuration](#cli-configuration)
98
99
  - [Interactive Configuration](#interactive-configuration)
99
100
  - [Configuration File](#configuration-file)
100
101
  - [Configuration Priority](#configuration-priority)
101
- - [API Key Setup](#api-key-setup)
102
- - [OpenAI API Key](#openai-api-key)
103
- - [Google Gemini API Key](#google-gemini-api-key)
104
102
  - [Contributing](#contributing)
105
103
  - [License](#license)
106
104
 
107
- ## Quick Start
105
+ ## Installation
108
106
 
109
107
  ```bash
110
- # Install with pip
108
+ # Installation with pip
111
109
  pip install ngpt
112
110
 
113
- # Or install with uv (faster)
111
+ # Or install with uv (faster installation)
114
112
  uv pip install ngpt
115
113
 
116
- # Or install globally as a CLI tool (recommended)
114
+ # Or install globally as a CLI tool (recommended for command-line usage)
117
115
  uv tool install ngpt
118
116
 
117
+ # Arch Linux: install from AUR
118
+ paru -S ngpt
119
+ ```
120
+
121
+ Requires Python 3.8 or newer.
122
+
123
+ For detailed installation instructions, see the [Installation Guide](https://nazdridoy.github.io/ngpt/installation/).
124
+
125
+ ## Quick Start
126
+
127
+ ```bash
119
128
  # Chat with default settings
120
129
  ngpt "Tell me about quantum computing"
121
130
 
@@ -200,58 +209,6 @@ ngpt --provider Groq "Explain quantum computing"
200
209
  # Compare outputs from different providers
201
210
  ngpt --provider OpenAI "Explain quantum physics" > openai_response.txt
202
211
  ngpt --provider Ollama "Explain quantum physics" > ollama_response.txt
203
- ```
204
-
205
- For more examples and detailed usage, visit the [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/).
206
-
207
- ## Documentation
208
-
209
- Comprehensive documentation, including usage guides and examples, is available at:
210
-
211
- **[https://nazdridoy.github.io/ngpt/](https://nazdridoy.github.io/ngpt/)**
212
-
213
- Key documentation sections:
214
- - [Installation Guide](https://nazdridoy.github.io/ngpt/installation/)
215
- - [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/)
216
- - [Configuration Guide](https://nazdridoy.github.io/ngpt/configuration/)
217
- - [Examples & Tutorials](https://nazdridoy.github.io/ngpt/examples/basic/)
218
-
219
- ## Installation
220
-
221
- ```bash
222
- # Installation with pip
223
- pip install ngpt
224
-
225
- # Or install with uv (faster installation)
226
- uv pip install ngpt
227
-
228
- # Or install globally as a CLI tool (recommended for command-line usage)
229
- uv tool install ngpt
230
-
231
- # Arch Linux: install from AUR
232
- paru -S ngpt
233
- ```
234
-
235
- Requires Python 3.8 or newer.
236
-
237
- For detailed installation instructions, see the [Installation Guide](https://nazdridoy.github.io/ngpt/installation/).
238
-
239
- ## Usage
240
-
241
- ### As a CLI Tool
242
-
243
- ```bash
244
- # Basic chat (default mode)
245
- ngpt "Hello, how are you?"
246
-
247
- # Interactive chat session with conversation history
248
- ngpt -i
249
-
250
- # Log conversation to a file
251
- ngpt --interactive --log conversation.log
252
-
253
- # Use custom system prompt to guide AI behavior
254
- ngpt --preprompt "You are a Python programming tutor" "Explain decorators"
255
212
 
256
213
  # Show all API configurations
257
214
  ngpt --show-config --all
@@ -277,47 +234,16 @@ ngpt -s "list all files in current directory"
277
234
  # On Windows generates: dir
278
235
  # On Linux/macOS generates: ls -la
279
236
 
280
- # Generate clean code (using -c or --code flag)
281
- # Returns only code without markdown formatting or explanations
237
+ # Generate code (using -c or --code flag)
282
238
  ngpt -c "create a python function that calculates fibonacci numbers"
283
239
 
284
240
  # Use multiline text editor for complex prompts (using -t or --text flag)
285
- # Opens an interactive editor with syntax highlighting and intuitive controls
286
241
  ngpt -t
287
242
  ```
288
243
 
289
- For more CLI examples and detailed usage information, see the [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/).
290
-
291
- ## Configuration
292
-
293
- ### API Key Setup
294
-
295
- #### OpenAI API Key
296
- 1. Create an account at [OpenAI](https://platform.openai.com/)
297
- 2. Navigate to API keys: https://platform.openai.com/api-keys
298
- 3. Click "Create new secret key" and copy your API key
299
- 4. Configure nGPT with your key:
300
- ```bash
301
- ngpt --config
302
- # Enter provider: OpenAI
303
- # Enter API key: your-openai-api-key
304
- # Enter base URL: https://api.openai.com/v1/
305
- # Enter model: gpt-3.5-turbo (or other model)
306
- ```
244
+ For more examples and detailed usage, visit the [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/).
307
245
 
308
- #### Google Gemini API Key
309
- 1. Create or use an existing Google account
310
- 2. Go to [Google AI Studio](https://aistudio.google.com/)
311
- 3. Navigate to API keys in the left sidebar (or visit https://aistudio.google.com/app/apikey)
312
- 4. Create an API key and copy it
313
- 5. Configure nGPT with your key:
314
- ```bash
315
- ngpt --config
316
- # Enter provider: Gemini
317
- # Enter API key: your-gemini-api-key
318
- # Enter base URL: https://generativelanguage.googleapis.com/v1beta/openai
319
- # Enter model: gemini-2.0-flash
320
- ```
246
+ ## Usage
321
247
 
322
248
  ### Command Line Options
323
249
 
@@ -326,8 +252,8 @@ For more CLI examples and detailed usage information, see the [CLI Usage Guide](
326
252
  usage: ngpt [-h] [-v] [--language LANGUAGE] [--config [CONFIG]] [--config-index CONFIG_INDEX] [--provider PROVIDER]
327
253
  [--remove] [--show-config] [--all] [--list-models] [--list-renderers] [--cli-config [COMMAND ...]]
328
254
  [--api-key API_KEY] [--base-url BASE_URL] [--model MODEL] [--web-search] [--temperature TEMPERATURE]
329
- [--top_p TOP_P] [--max_tokens MAX_TOKENS] [--log [FILE]] [--preprompt PREPROMPT] [--no-stream] [--prettify]
330
- [--stream-prettify] [--renderer {auto,rich,glow}] [--rec-chunk] [--diff [FILE]] [--chunk-size CHUNK_SIZE]
255
+ [--top_p TOP_P] [--max_tokens MAX_TOKENS] [--log [FILE]] [--preprompt PREPROMPT] [--no-stream | --prettify |
256
+ --stream-prettify] [--renderer {auto,rich,glow}] [--rec-chunk] [--diff [FILE]] [--chunk-size CHUNK_SIZE]
331
257
  [--analyses-chunk-size ANALYSES_CHUNK_SIZE] [--max-msg-lines MAX_MSG_LINES]
332
258
  [--max-recursion-depth MAX_RECURSION_DEPTH] [-i | -s | -c | -t | -p | -r | -g]
333
259
  [prompt]
@@ -346,12 +272,10 @@ options::
346
272
 
347
273
  Configuration Options::
348
274
 
349
- --config [CONFIG] Path to a custom config file or, if no value provided, enter interactive
350
- configuration mode to create a new config
275
+ --config [CONFIG] Path to a custom config file or, if no value provided, enter interactive configuration mode to create a new config
351
276
  --config-index CONFIG_INDEX Index of the configuration to use or edit (default: 0)
352
277
  --provider PROVIDER Provider name to identify the configuration to use
353
- --remove Remove the configuration at the specified index (requires --config and
354
- --config-index or --provider)
278
+ --remove Remove the configuration at the specified index (requires --config and --config-index or --provider)
355
279
  --show-config Show the current configuration(s) and exit
356
280
  --all Show details for all configurations (requires --show-config)
357
281
  --list-models List all available models for the current configuration and exit
@@ -363,30 +287,28 @@ Global Options::
363
287
  --api-key API_KEY API key for the service
364
288
  --base-url BASE_URL Base URL for the API
365
289
  --model MODEL Model to use
366
- --web-search Enable web search capability (Note: Your API endpoint must support this
367
- feature)
290
+ --web-search Enable web search capability using DuckDuckGo to enhance prompts with relevant information
368
291
  --temperature TEMPERATURE Set temperature (controls randomness, default: 0.7)
369
292
  --top_p TOP_P Set top_p (controls diversity, default: 1.0)
370
293
  --max_tokens MAX_TOKENS Set max response length in tokens
371
- --log [FILE] Set filepath to log conversation to, or create a temporary log file if no path
372
- provided
294
+ --log [FILE] Set filepath to log conversation to, or create a temporary log file if no path provided
373
295
  --preprompt PREPROMPT Set custom system prompt to control AI behavior
374
- --no-stream Return the whole response without streaming
375
- --prettify Render markdown responses and code with syntax highlighting and formatting
376
- --stream-prettify Enable streaming with markdown rendering (automatically uses Rich renderer)
377
- --renderer {auto,rich,glow} Select which markdown renderer to use with --prettify (auto, rich, or glow)
296
+ --renderer {auto,rich,glow} Select which markdown renderer to use with --prettify or --stream-prettify (auto, rich, or glow)
297
+
298
+ Output Display Options (mutually exclusive)::
299
+
300
+ --no-stream Return the whole response without streaming or formatting
301
+ --prettify Render complete response with markdown and code formatting (non-streaming)
302
+ --stream-prettify Stream response with real-time markdown rendering (default)
378
303
 
379
304
  Git Commit Message Options::
380
305
 
381
306
  --rec-chunk Process large diffs in chunks with recursive analysis if needed
382
- --diff [FILE] Use diff from specified file instead of staged changes. If used without a path,
383
- uses the path from CLI config.
307
+ --diff [FILE] Use diff from specified file instead of staged changes. If used without a path, uses the path from CLI config.
384
308
  --chunk-size CHUNK_SIZE Number of lines per chunk when chunking is enabled (default: 200)
385
- --analyses-chunk-size ANALYSES_CHUNK_SIZE
386
- Number of lines per chunk when recursively chunking analyses (default: 200)
309
+ --analyses-chunk-size ANALYSES_CHUNK_SIZE Number of lines per chunk when recursively chunking analyses (default: 200)
387
310
  --max-msg-lines MAX_MSG_LINES Maximum number of lines in commit message before condensing (default: 20)
388
- --max-recursion-depth MAX_RECURSION_DEPTH
389
- Maximum recursion depth for commit message condensing (default: 3)
311
+ --max-recursion-depth MAX_RECURSION_DEPTH Maximum recursion depth for commit message condensing (default: 3)
390
312
 
391
313
  Modes (mutually exclusive)::
392
314
 
@@ -394,15 +316,62 @@ Modes (mutually exclusive)::
394
316
  -s, --shell Generate and execute shell commands
395
317
  -c, --code Generate code
396
318
  -t, --text Enter multi-line text input (submit with Ctrl+D)
397
- -p, --pipe Read from stdin and use content with prompt. Use {} in prompt as placeholder
398
- for stdin content
319
+ -p, --pipe Read from stdin and use content with prompt. Use {} in prompt as placeholder for stdin content
399
320
  -r, --rewrite Rewrite text from stdin to be more natural while preserving tone and meaning
400
321
  -g, --gitcommsg Generate AI-powered git commit messages from staged changes or diff file
401
322
  ```
402
323
 
403
324
  > **Note**: For better visualization of conventional commit messages on GitHub, you can use the [GitHub Commit Labels](https://greasyfork.org/en/scripts/526153-github-commit-labels) userscript, which adds colorful labels to your commits.
404
325
 
405
- For a complete reference of all available options, see the [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/).
326
+ For a complete reference of all available options, detailed CLI examples and usage information, see the [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/).
327
+
328
+
329
+ ## Documentation
330
+
331
+ Comprehensive documentation, including usage guides and examples, is available at:
332
+
333
+ **[https://nazdridoy.github.io/ngpt/](https://nazdridoy.github.io/ngpt/)**
334
+
335
+ Key documentation sections:
336
+ - [Installation Guide](https://nazdridoy.github.io/ngpt/installation/)
337
+ - [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/)
338
+ - [Configuration Guide](https://nazdridoy.github.io/ngpt/configuration/)
339
+ - [Examples & Tutorials](https://nazdridoy.github.io/ngpt/examples/basic/)
340
+ - [Git Commit Message Guide](https://nazdridoy.github.io/ngpt/usage/gitcommsg/)
341
+
342
+
343
+ ## Configuration
344
+
345
+ ### API Key Setup
346
+
347
+ #### OpenAI API Key
348
+ 1. Create an account at [OpenAI](https://platform.openai.com/)
349
+ 2. Navigate to API keys: https://platform.openai.com/api-keys
350
+ 3. Click "Create new secret key" and copy your API key
351
+ 4. Configure nGPT with your key:
352
+ ```bash
353
+ ngpt --config
354
+ # Enter provider: OpenAI
355
+ # Enter API key: your-openai-api-key
356
+ # Enter base URL: https://api.openai.com/v1/
357
+ # Enter model: gpt-3.5-turbo (or other model)
358
+ ```
359
+
360
+ #### Google Gemini API Key
361
+ 1. Create or use an existing Google account
362
+ 2. Go to [Google AI Studio](https://aistudio.google.com/)
363
+ 3. Navigate to API keys in the left sidebar (or visit https://aistudio.google.com/app/apikey)
364
+ 4. Create an API key and copy it
365
+ 5. Configure nGPT with your key:
366
+ ```bash
367
+ ngpt --config
368
+ # Enter provider: Gemini
369
+ # Enter API key: your-gemini-api-key
370
+ # Enter base URL: https://generativelanguage.googleapis.com/v1beta/openai
371
+ # Enter model: gemini-2.0-flash
372
+ ```
373
+
374
+ For more detailed information, refer to the [API Key Setup documentation](https://nazdridoy.github.io/ngpt/configuration/#api-key-setup).
406
375
 
407
376
  ### CLI Configuration
408
377
 
@@ -20,9 +20,9 @@ ngpt/utils/__init__.py,sha256=qu_66I1Vtav2f1LDiPn5J3DUsbK7o1CSScMcTkYqxoM,1179
20
20
  ngpt/utils/cli_config.py,sha256=Ug8cECBTIuzOwkBWidLTfs-OAdOsCMJ2bNa70pOADfw,11195
21
21
  ngpt/utils/config.py,sha256=wsArA4osnh8fKqOvtsPqqBxAz3DpdjtaWUFaRtnUdyc,10452
22
22
  ngpt/utils/log.py,sha256=f1jg2iFo35PAmsarH8FVL_62plq4VXH0Mu2QiP6RJGw,15934
23
- ngpt/utils/web_search.py,sha256=yvCUDNhwcIcKZ_hWESFQQ-vB-LKsDDCDT17YFzFcGR4,12598
24
- ngpt-3.4.5.dist-info/METADATA,sha256=ofAuga74DmRxU0xIJ_FNHMYIEu-wAzVZPpjpiKwPHqw,24585
25
- ngpt-3.4.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
26
- ngpt-3.4.5.dist-info/entry_points.txt,sha256=SqAAvLhMrsEpkIr4YFRdUeyuXQ9o0IBCeYgE6AVojoI,44
27
- ngpt-3.4.5.dist-info/licenses/LICENSE,sha256=mQkpWoADxbHqE0HRefYLJdm7OpdrXBr3vNv5bZ8w72M,1065
28
- ngpt-3.4.5.dist-info/RECORD,,
23
+ ngpt/utils/web_search.py,sha256=TK_c2U8MYM86f9J_oEzi0UZ46JohvyxdjfonHZZZqfY,30718
24
+ ngpt-3.5.1.dist-info/METADATA,sha256=djECxREmLWeO-ugYuco3gi2xEJqaGuFpjJopqa3veLI,23886
25
+ ngpt-3.5.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
26
+ ngpt-3.5.1.dist-info/entry_points.txt,sha256=SqAAvLhMrsEpkIr4YFRdUeyuXQ9o0IBCeYgE6AVojoI,44
27
+ ngpt-3.5.1.dist-info/licenses/LICENSE,sha256=mQkpWoADxbHqE0HRefYLJdm7OpdrXBr3vNv5bZ8w72M,1065
28
+ ngpt-3.5.1.dist-info/RECORD,,
File without changes