ngpt 3.4.5__py3-none-any.whl → 3.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ngpt/utils/web_search.py CHANGED
@@ -1,5 +1,5 @@
1
1
  """
2
- Web search utilities for nGPT using duckduckgo-search and trafilatura.
2
+ Web search utilities for nGPT using duckduckgo-search and BeautifulSoup4.
3
3
 
4
4
  This module provides functionality to search the web and extract
5
5
  information from search results to enhance AI prompts.
@@ -12,6 +12,9 @@ from urllib.parse import urlparse
12
12
  import requests
13
13
  import sys
14
14
  import datetime
15
+ from bs4 import BeautifulSoup
16
+ from bs4.element import Comment, Declaration, Doctype, ProcessingInstruction
17
+ import json
15
18
 
16
19
  # Get actual logger from global context instead of using standard logging
17
20
  from . import log
@@ -58,9 +61,10 @@ def perform_web_search(query: str, max_results: int = 5) -> List[Dict[str, Any]]
58
61
  logger.info("Web search encountered an issue, but will continue with available results")
59
62
  return []
60
63
 
61
- def extract_article_content(url: str, max_chars: int = 2000) -> Optional[str]:
64
+ def extract_article_content(url: str, max_chars: int = 5000) -> Optional[str]:
62
65
  """
63
- Extract and clean content from a webpage URL.
66
+ Extract and clean content from a webpage URL using a hybrid approach
67
+ inspired by trafilatura and readability algorithms.
64
68
 
65
69
  Args:
66
70
  url: The URL to extract content from
@@ -81,82 +85,336 @@ def extract_article_content(url: str, max_chars: int = 2000) -> Optional[str]:
81
85
  'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
82
86
  'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
83
87
  'Accept-Language': 'en-US,en;q=0.5',
88
+ 'DNT': '1', # Do Not Track
84
89
  'Connection': 'keep-alive',
85
90
  'Upgrade-Insecure-Requests': '1',
91
+ 'Cache-Control': 'max-age=0',
92
+ 'Sec-Fetch-Dest': 'document',
93
+ 'Sec-Fetch-Mode': 'navigate',
94
+ 'Sec-Fetch-Site': 'none',
95
+ 'Pragma': 'no-cache',
86
96
  }
87
97
 
88
98
  logger.info(f"Fetching content from {url}")
89
99
 
90
100
  try:
91
- # Try using trafilatura
92
- import trafilatura
101
+ # Fetch the page content
102
+ response = requests.get(url, headers=headers, timeout=15)
93
103
 
94
- # Download with correct parameters
95
- # trafilatura handles user-agent internally
96
- downloaded = trafilatura.fetch_url(url)
97
-
98
- if downloaded:
99
- # Extract main content
100
- content = trafilatura.extract(downloaded, include_comments=False,
101
- include_tables=False,
102
- no_fallback=False)
104
+ if response.status_code == 200:
105
+ # Try to detect the encoding if not properly specified
106
+ if response.encoding == 'ISO-8859-1':
107
+ # Try to detect encoding from content
108
+ possible_encoding = re.search(r'charset=["\'](.*?)["\']', response.text)
109
+ if possible_encoding:
110
+ response.encoding = possible_encoding.group(1)
111
+ else:
112
+ # Default to UTF-8 if we can't detect
113
+ response.encoding = 'utf-8'
114
+
115
+ # Parse with BeautifulSoup using lxml parser for better results if available
116
+ try:
117
+ soup = BeautifulSoup(response.text, 'lxml')
118
+ except ImportError:
119
+ soup = BeautifulSoup(response.text, 'html.parser')
120
+
121
+ # Extract main content using multiple strategies
122
+ extracted_content = None
123
+
124
+ # ---------- PREPROCESSING ----------
125
+ # Clone the soup before preprocessing
126
+ processed_soup = BeautifulSoup(str(soup), 'lxml' if 'lxml' in str(type(soup.parser)) else 'html.parser')
127
+
128
+ # Remove all script, style tags and comments
129
+ for element in processed_soup.find_all(['script', 'style', 'noscript']):
130
+ element.decompose()
131
+
132
+ # Remove HTML comments
133
+ for comment in processed_soup.find_all(text=lambda text: isinstance(text, Comment)):
134
+ comment.extract()
135
+
136
+ # Remove hidden elements
137
+ for hidden in processed_soup.find_all(style=lambda s: s and isinstance(s, str) and ('display:none' in s.lower() or 'visibility:hidden' in s.lower())):
138
+ hidden.decompose()
139
+ for hidden in processed_soup.find_all(hidden=True):
140
+ hidden.decompose()
141
+ for hidden in processed_soup.find_all(class_=lambda c: c and isinstance(c, str) and any(x in c.lower() for x in ['hidden', 'invisible'])):
142
+ hidden.decompose()
143
+
144
+ # Handle iframes and frames
145
+ for frame in processed_soup.find_all(['iframe', 'frame']):
146
+ frame.decompose()
147
+
148
+ # ---------- SITE-SPECIFIC HANDLING ----------
149
+ domain = parsed_url.netloc.lower()
150
+
151
+ # Wikipedia-specific extraction
152
+ if 'wikipedia.org' in domain:
153
+ content_div = processed_soup.select_one('#mw-content-text')
154
+ if content_div:
155
+ # Remove tables, references, navigation elements
156
+ for unwanted in content_div.select('table, .reference, .reflist, .navbox, .vertical-navbox, .thumbcaption, .mw-editsection, .mw-headline, .toc, #toc'):
157
+ unwanted.decompose()
158
+ extracted_content = content_div.get_text(separator=' ', strip=True)
159
+
160
+ # News site specific handling
161
+ news_sites = {
162
+ 'cnn.com': ['article', '.article__content', '.l-container', '.body-text', '#body-text'],
163
+ 'bbc.com': ['.article__body-content', '.story-body__inner', '[data-component="text-block"]'],
164
+ 'nytimes.com': ['article', '.meteredContent', '.StoryBodyCompanionColumn', '.article-body'],
165
+ 'reuters.com': ['article', '.ArticleBody__content___3MtHP', '.article-body'],
166
+ 'theguardian.com': ['.article-body-commercial-selector', '.content__article-body', '.dcr-1cas96z'],
167
+ 'washingtonpost.com': ['.article-body', '.teaser-content'],
168
+ 'apnews.com': ['.Article', '.RichTextStoryBody'],
169
+ 'indiatimes.com': ['.article-body', '.article_content', '.article-desc', '.Normal'],
170
+ 'cnbc.com': ['.ArticleBody-articleBody', '.group-article-body', '.article-body'],
171
+ 'thehindu.com': ['.article-body', '.article-text', '#content-body-14269002']
172
+ }
173
+
174
+ if not extracted_content:
175
+ # Check if we're on a known news site
176
+ for site, selectors in news_sites.items():
177
+ if site in domain:
178
+ for selector in selectors:
179
+ content_element = processed_soup.select_one(selector)
180
+ if content_element:
181
+ # Clean the news content
182
+ for unwanted in content_element.select('aside, figure, .ad, .ads, .advertisement, .social, .share, .related, .newsletter, .more-on, .read-more, .promotions'):
183
+ unwanted.decompose()
184
+ extracted_content = content_element.get_text(separator=' ', strip=True)
185
+ break
186
+ if extracted_content:
187
+ break
188
+
189
+ # ---------- JSON-LD EXTRACTION ----------
190
+ if not extracted_content:
191
+ # Look for structured data in JSON-LD format
192
+ json_ld = processed_soup.find_all('script', type='application/ld+json')
193
+ for script in json_ld:
194
+ try:
195
+ script_content = script.string
196
+ if not script_content: # Skip empty scripts
197
+ continue
198
+
199
+ # Clean the JSON string (some sites have invalid JSON)
200
+ script_content = re.sub(r'[\n\t\r]', '', script_content)
201
+ script_content = script_content.strip()
202
+
203
+ data = json.loads(script_content)
204
+ # Handle both single objects and arrays of objects
205
+ if isinstance(data, list):
206
+ data_list = data
207
+ else:
208
+ data_list = [data]
209
+
210
+ for item in data_list:
211
+ article_body = None
212
+ # Try to find articleBody or various content fields
213
+ if isinstance(item, dict):
214
+ # Check for common content fields directly
215
+ for field in ['articleBody', 'description', 'text', 'mainEntityOfPage']:
216
+ if field in item and isinstance(item[field], str) and len(item[field]) > 200:
217
+ article_body = item[field]
218
+ break
219
+
220
+ # Check in nested objects
221
+ if not article_body and '@graph' in item and isinstance(item['@graph'], list):
222
+ for graph_item in item['@graph']:
223
+ if isinstance(graph_item, dict):
224
+ for field in ['articleBody', 'description', 'text']:
225
+ if field in graph_item and isinstance(graph_item[field], str) and len(graph_item[field]) > 200:
226
+ article_body = graph_item[field]
227
+ break
228
+ if article_body:
229
+ break
230
+
231
+ if article_body:
232
+ extracted_content = article_body
233
+ break
234
+
235
+ if extracted_content:
236
+ break
237
+ except (json.JSONDecodeError, TypeError, AttributeError, ValueError) as e:
238
+ logger.debug(f"Error parsing JSON-LD: {str(e)}")
239
+ continue
240
+
241
+ # ---------- META DESCRIPTION EXTRACTION ----------
242
+ meta_description = None
243
+ meta_tag = processed_soup.find('meta', attrs={'name': 'description'}) or processed_soup.find('meta', attrs={'property': 'og:description'})
244
+ if meta_tag and meta_tag.get('content'):
245
+ meta_description = meta_tag.get('content')
103
246
 
104
- if content:
105
- # Clean up content if needed
106
- content = content.strip()
247
+ # ---------- CONTENT ANALYSIS ----------
248
+ if not extracted_content:
249
+ # Get all content blocks (divs, sections, articles)
250
+ content_blocks = []
107
251
 
108
- # Truncate if needed
109
- if len(content) > max_chars:
110
- content = content[:max_chars] + "..."
252
+ # Prioritize semantic tags
253
+ for tag in ['article', 'main', 'section', 'div']:
254
+ blocks = processed_soup.find_all(tag)
111
255
 
112
- return content
113
-
114
- # If trafilatura failed, try direct requests
115
- logger.info(f"Trafilatura extraction failed for {url}, trying fallback method")
116
- response = requests.get(url, headers=headers, timeout=10)
117
-
118
- if response.status_code == 200:
119
- # Very basic HTML cleaning
120
- html_content = response.text
121
- # Remove HTML tags
122
- text = re.sub(r'<[^>]+>', ' ', html_content)
123
- # Remove excess whitespace
124
- text = re.sub(r'\s+', ' ', text).strip()
256
+ for block in blocks:
257
+ # Skip if too small
258
+ text = block.get_text(strip=True)
259
+ if len(text) < 200:
260
+ continue
261
+
262
+ # Calculate content metrics
263
+ char_count = len(text)
264
+ link_density = calculate_link_density(block)
265
+ p_count = len(block.find_all('p'))
266
+ p_text_length = sum(len(p.get_text(strip=True)) for p in block.find_all('p'))
267
+ p_density = p_text_length / char_count if char_count > 0 else 0
268
+
269
+ # Skip blocks with high link density (likely navigation)
270
+ if link_density > 0.5:
271
+ continue
272
+
273
+ # Calculate readability scores
274
+ text_density = char_count / (len(str(block)) + 1) # Text to HTML ratio
275
+
276
+ # Score content blocks
277
+ score = 0
278
+
279
+ # Prefer blocks with many paragraphs
280
+ score += min(p_count * 5, 50) # Max 50 points for paragraphs
281
+
282
+ # Prefer blocks with high paragraph text density
283
+ score += min(int(p_density * 100), 50) # Max 50 points for paragraph density
284
+
285
+ # Penalize high link density
286
+ score -= int(link_density * 100)
287
+
288
+ # Boost for high text density
289
+ score += min(int(text_density * 30), 30) # Max 30 points for text density
290
+
291
+ # Boost for certain attributes and classes
292
+ content_indicators = ['content', 'article', 'story', 'post', 'text', 'body', 'entry']
293
+
294
+ # Check class and id attributes
295
+ for attr in ['class', 'id']:
296
+ attr_val = block.get(attr, '')
297
+ if attr_val:
298
+ if isinstance(attr_val, list):
299
+ attr_val = ' '.join(attr_val)
300
+ for indicator in content_indicators:
301
+ if indicator in attr_val.lower():
302
+ score += 30
303
+ break
304
+
305
+ # Penalty for boilerplate indicators
306
+ boilerplate_indicators = ['sidebar', 'menu', 'nav', 'banner', 'ad', 'footer', 'header', 'comment', 'share', 'related']
307
+ for attr in ['class', 'id']:
308
+ attr_val = block.get(attr, '')
309
+ if attr_val:
310
+ if isinstance(attr_val, list):
311
+ attr_val = ' '.join(attr_val)
312
+ for indicator in boilerplate_indicators:
313
+ if indicator in attr_val.lower():
314
+ score -= 50
315
+ break
316
+
317
+ # Add to content blocks if score is positive
318
+ if score > 0:
319
+ content_blocks.append({
320
+ 'element': block,
321
+ 'score': score,
322
+ 'char_count': char_count,
323
+ 'text': text
324
+ })
325
+
326
+ # Sort content blocks by score
327
+ if content_blocks:
328
+ content_blocks.sort(key=lambda x: x['score'], reverse=True)
329
+ best_block = content_blocks[0]['element']
330
+
331
+ # Clean up the best block
332
+ for unwanted in best_block.find_all(['aside', 'nav', 'footer', 'header']):
333
+ unwanted.decompose()
334
+
335
+ extracted_content = best_block.get_text(separator=' ', strip=True)
125
336
 
126
- if text:
127
- if len(text) > max_chars:
128
- text = text[:max_chars] + "..."
129
- return text
337
+ # ---------- PARAGRAPH EXTRACTION FALLBACK ----------
338
+ if not extracted_content:
339
+ # Get all paragraphs with substantial content
340
+ paragraphs = []
341
+ for p in processed_soup.find_all('p'):
342
+ text = p.get_text(strip=True)
343
+ if len(text) > 40: # Only consider substantial paragraphs
344
+ # Calculate link density
345
+ link_density = calculate_link_density(p)
346
+ if link_density < 0.25: # Skip if too many links
347
+ paragraphs.append(text)
348
+
349
+ if paragraphs:
350
+ extracted_content = ' '.join(paragraphs)
130
351
 
352
+ # If we have content, clean it up
353
+ if extracted_content:
354
+ # Clean whitespace
355
+ extracted_content = re.sub(r'\s+', ' ', extracted_content).strip()
356
+
357
+ # Remove URLs
358
+ extracted_content = re.sub(r'https?://\S+', '', extracted_content)
359
+
360
+ # Remove email addresses
361
+ extracted_content = re.sub(r'\S+@\S+', '', extracted_content)
362
+
363
+ # Remove social media handles
364
+ extracted_content = re.sub(r'@\w+', '', extracted_content)
365
+
366
+ # Replace multiple spaces with single space
367
+ extracted_content = re.sub(r' +', ' ', extracted_content)
368
+
369
+ # Normalize quotes and apostrophes
370
+ extracted_content = extracted_content.replace('"', '"').replace('"', '"')
371
+ extracted_content = extracted_content.replace("'", "'").replace("'", "'")
372
+
373
+ # Remove any remaining HTML entities
374
+ extracted_content = re.sub(r'&[a-zA-Z]+;', ' ', extracted_content)
375
+
376
+ # Remove short lines that are likely navigation/menu items
377
+ lines = extracted_content.split('\n')
378
+ extracted_content = ' '.join([line for line in lines if len(line) > 40 or '.' in line])
379
+
380
+ # Combine with meta description if available and content is short
381
+ if meta_description and len(extracted_content) < 500:
382
+ extracted_content = meta_description + " " + extracted_content
383
+
384
+ # Truncate if needed
385
+ if len(extracted_content) > max_chars:
386
+ # Try to break at a sentence boundary
387
+ cutoff_point = max_chars
388
+ for i in range(max_chars - 1, max_chars - 300, -1):
389
+ if i < len(extracted_content) and extracted_content[i] in ['.', '!', '?']:
390
+ cutoff_point = i + 1
391
+ break
392
+
393
+ extracted_content = extracted_content[:cutoff_point]
394
+
395
+ return extracted_content
396
+ else:
397
+ # Return meta description if nothing else was found
398
+ if meta_description:
399
+ return meta_description
400
+
401
+ logger.error(f"No content extracted from {url}")
402
+ return None
131
403
  else:
132
404
  logger.error(f"Request to {url} returned status code {response.status_code}")
133
-
134
- except ImportError:
135
- logger.error("Trafilatura not installed. Install with 'pip install trafilatura'")
136
- # Try direct requests only
137
- try:
138
- response = requests.get(url, headers=headers, timeout=10)
139
- if response.status_code == 200:
140
- # Very basic HTML cleaning
141
- html_content = response.text
142
- text = re.sub(r'<[^>]+>', ' ', html_content)
143
- text = re.sub(r'\s+', ' ', text).strip()
144
-
145
- if text:
146
- if len(text) > max_chars:
147
- text = text[:max_chars] + "..."
148
- return text
149
- except Exception as req_error:
150
- logger.error(f"Direct request fallback failed: {str(req_error)}")
405
+ return None
151
406
 
152
407
  except Exception as e:
153
- logger.error(f"Error extracting content with trafilatura: {str(e)}")
154
- # Try the requests fallback
408
+ logger.error(f"Error extracting content with hybrid approach: {str(e)}")
409
+ # Try a basic fallback
155
410
  try:
156
411
  response = requests.get(url, headers=headers, timeout=10)
157
412
  if response.status_code == 200:
158
- html_content = response.text
159
- text = re.sub(r'<[^>]+>', ' ', html_content)
413
+ soup = BeautifulSoup(response.text, 'html.parser')
414
+ # Just get the text without images, scripts, styles, etc.
415
+ for tag in soup(['script', 'style', 'img', 'nav', 'footer', 'header']):
416
+ tag.decompose()
417
+ text = soup.get_text(separator=' ', strip=True)
160
418
  text = re.sub(r'\s+', ' ', text).strip()
161
419
 
162
420
  if text:
@@ -164,14 +422,40 @@ def extract_article_content(url: str, max_chars: int = 2000) -> Optional[str]:
164
422
  text = text[:max_chars] + "..."
165
423
  return text
166
424
  except Exception as req_error:
167
- logger.error(f"Direct request fallback failed: {str(req_error)}")
425
+ logger.error(f"Basic fallback failed: {str(req_error)}")
168
426
 
169
427
  return None
170
428
  except Exception as e:
171
429
  logger.error(f"Error extracting content from {url}: {str(e)}")
172
430
  return None
173
431
 
174
- def get_web_search_results(query: str, max_results: int = 3, max_chars_per_result: int = 2000) -> Dict[str, Any]:
432
+ def calculate_link_density(element):
433
+ """
434
+ Calculate the ratio of link text to all text in an element.
435
+ Used to identify navigation-heavy areas.
436
+
437
+ Args:
438
+ element: BeautifulSoup element
439
+
440
+ Returns:
441
+ Float between 0 and 1 indicating link density
442
+ """
443
+ try:
444
+ if element is None:
445
+ return 0
446
+
447
+ text_length = len(element.get_text(strip=True))
448
+ if text_length == 0:
449
+ return 0
450
+
451
+ links = element.find_all('a')
452
+ link_text_length = sum(len(a.get_text(strip=True)) for a in links)
453
+
454
+ return link_text_length / text_length
455
+ except Exception:
456
+ return 0
457
+
458
+ def get_web_search_results(query: str, max_results: int = 5, max_chars_per_result: int = 5000) -> Dict[str, Any]:
175
459
  """
176
460
  Get formatted web search results ready to be included in AI prompts.
177
461
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ngpt
3
- Version: 3.4.5
3
+ Version: 3.5.0
4
4
  Summary: Swiss army knife for LLMs: powerful CLI and interactive chatbot in one package. Seamlessly work with OpenAI, Ollama, Groq, Claude, Gemini, or any OpenAI-compatible API to generate code, craft git commits, rewrite text, and execute shell commands.
5
5
  Project-URL: Homepage, https://github.com/nazdridoy/ngpt
6
6
  Project-URL: Repository, https://github.com/nazdridoy/ngpt
@@ -28,12 +28,12 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
28
28
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
29
29
  Classifier: Topic :: Utilities
30
30
  Requires-Python: >=3.8
31
+ Requires-Dist: beautifulsoup4>=4.12.0
31
32
  Requires-Dist: duckduckgo-search>=3.0.0
32
33
  Requires-Dist: prompt-toolkit>=3.0.0
33
34
  Requires-Dist: pyperclip>=1.8.0
34
35
  Requires-Dist: requests>=2.31.0
35
36
  Requires-Dist: rich>=10.0.0
36
- Requires-Dist: trafilatura>=1.6.0
37
37
  Description-Content-Type: text/markdown
38
38
 
39
39
  # nGPT
@@ -62,10 +62,10 @@ Description-Content-Type: text/markdown
62
62
 
63
63
  - ✅ **Versatile**: Powerful and easy-to-use CLI tool for various AI tasks
64
64
  - 🪶 **Lightweight**: Minimal dependencies with everything you need included
65
- - 🔄 **API Flexibility**: Works with OpenAI, Ollama, Groq, Claude, Gemini, and any compatible endpoint
65
+ - 🔄 **API Flexibility**: Works with OpenAI, Ollama, Groq, Claude, Gemini, and any OpenAI-compatible endpoint
66
66
  - 💬 **Interactive Chat**: Continuous conversation with memory in modern UI
67
67
  - 📊 **Streaming Responses**: Real-time output for better user experience
68
- - 🔍 **Web Search**: Enhance any model with contextual information from the web
68
+ - 🔍 **Web Search**: Enhance any model with contextual information from the web, using advanced content extraction to identify the most relevant information from web pages
69
69
  - 📥 **Stdin Processing**: Process piped content by using `{}` placeholder in prompts
70
70
  - 🎨 **Markdown Rendering**: Beautiful formatting of markdown and code with syntax highlighting
71
71
  - ⚡ **Real-time Markdown**: Stream responses with live updating syntax highlighting and formatting
@@ -85,37 +85,47 @@ See the [Feature Overview](https://nazdridoy.github.io/ngpt/overview/) for more
85
85
 
86
86
 
87
87
  ## Table of Contents
88
- - [Quick Start](#quick-start)
89
88
  - [Features](#features)
90
- - [Documentation](#documentation)
91
89
  - [Installation](#installation)
90
+ - [Quick Start](#quick-start)
92
91
  - [Usage](#usage)
92
+ - [Command Line Options](#command-line-options)
93
93
  - [Documentation](https://nazdridoy.github.io/ngpt/)
94
- - [CLI Tool](#as-a-cli-tool)
94
+ - [Documentation](#documentation)
95
95
  - [Configuration](#configuration)
96
- - [Command Line Options](#command-line-options)
96
+ - [API Key Setup](#api-key-setup)
97
+ - [OpenAI API Key](#openai-api-key)
98
+ - [Google Gemini API Key](#google-gemini-api-key)
97
99
  - [CLI Configuration](#cli-configuration)
98
100
  - [Interactive Configuration](#interactive-configuration)
99
101
  - [Configuration File](#configuration-file)
100
102
  - [Configuration Priority](#configuration-priority)
101
- - [API Key Setup](#api-key-setup)
102
- - [OpenAI API Key](#openai-api-key)
103
- - [Google Gemini API Key](#google-gemini-api-key)
104
103
  - [Contributing](#contributing)
105
104
  - [License](#license)
106
105
 
107
- ## Quick Start
106
+ ## Installation
108
107
 
109
108
  ```bash
110
- # Install with pip
109
+ # Installation with pip
111
110
  pip install ngpt
112
111
 
113
- # Or install with uv (faster)
112
+ # Or install with uv (faster installation)
114
113
  uv pip install ngpt
115
114
 
116
- # Or install globally as a CLI tool (recommended)
115
+ # Or install globally as a CLI tool (recommended for command-line usage)
117
116
  uv tool install ngpt
118
117
 
118
+ # Arch Linux: install from AUR
119
+ paru -S ngpt
120
+ ```
121
+
122
+ Requires Python 3.8 or newer.
123
+
124
+ For detailed installation instructions, see the [Installation Guide](https://nazdridoy.github.io/ngpt/installation/).
125
+
126
+ ## Quick Start
127
+
128
+ ```bash
119
129
  # Chat with default settings
120
130
  ngpt "Tell me about quantum computing"
121
131
 
@@ -200,58 +210,6 @@ ngpt --provider Groq "Explain quantum computing"
200
210
  # Compare outputs from different providers
201
211
  ngpt --provider OpenAI "Explain quantum physics" > openai_response.txt
202
212
  ngpt --provider Ollama "Explain quantum physics" > ollama_response.txt
203
- ```
204
-
205
- For more examples and detailed usage, visit the [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/).
206
-
207
- ## Documentation
208
-
209
- Comprehensive documentation, including usage guides and examples, is available at:
210
-
211
- **[https://nazdridoy.github.io/ngpt/](https://nazdridoy.github.io/ngpt/)**
212
-
213
- Key documentation sections:
214
- - [Installation Guide](https://nazdridoy.github.io/ngpt/installation/)
215
- - [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/)
216
- - [Configuration Guide](https://nazdridoy.github.io/ngpt/configuration/)
217
- - [Examples & Tutorials](https://nazdridoy.github.io/ngpt/examples/basic/)
218
-
219
- ## Installation
220
-
221
- ```bash
222
- # Installation with pip
223
- pip install ngpt
224
-
225
- # Or install with uv (faster installation)
226
- uv pip install ngpt
227
-
228
- # Or install globally as a CLI tool (recommended for command-line usage)
229
- uv tool install ngpt
230
-
231
- # Arch Linux: install from AUR
232
- paru -S ngpt
233
- ```
234
-
235
- Requires Python 3.8 or newer.
236
-
237
- For detailed installation instructions, see the [Installation Guide](https://nazdridoy.github.io/ngpt/installation/).
238
-
239
- ## Usage
240
-
241
- ### As a CLI Tool
242
-
243
- ```bash
244
- # Basic chat (default mode)
245
- ngpt "Hello, how are you?"
246
-
247
- # Interactive chat session with conversation history
248
- ngpt -i
249
-
250
- # Log conversation to a file
251
- ngpt --interactive --log conversation.log
252
-
253
- # Use custom system prompt to guide AI behavior
254
- ngpt --preprompt "You are a Python programming tutor" "Explain decorators"
255
213
 
256
214
  # Show all API configurations
257
215
  ngpt --show-config --all
@@ -277,47 +235,16 @@ ngpt -s "list all files in current directory"
277
235
  # On Windows generates: dir
278
236
  # On Linux/macOS generates: ls -la
279
237
 
280
- # Generate clean code (using -c or --code flag)
281
- # Returns only code without markdown formatting or explanations
238
+ # Generate code (using -c or --code flag)
282
239
  ngpt -c "create a python function that calculates fibonacci numbers"
283
240
 
284
241
  # Use multiline text editor for complex prompts (using -t or --text flag)
285
- # Opens an interactive editor with syntax highlighting and intuitive controls
286
242
  ngpt -t
287
243
  ```
288
244
 
289
- For more CLI examples and detailed usage information, see the [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/).
290
-
291
- ## Configuration
292
-
293
- ### API Key Setup
294
-
295
- #### OpenAI API Key
296
- 1. Create an account at [OpenAI](https://platform.openai.com/)
297
- 2. Navigate to API keys: https://platform.openai.com/api-keys
298
- 3. Click "Create new secret key" and copy your API key
299
- 4. Configure nGPT with your key:
300
- ```bash
301
- ngpt --config
302
- # Enter provider: OpenAI
303
- # Enter API key: your-openai-api-key
304
- # Enter base URL: https://api.openai.com/v1/
305
- # Enter model: gpt-3.5-turbo (or other model)
306
- ```
245
+ For more examples and detailed usage, visit the [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/).
307
246
 
308
- #### Google Gemini API Key
309
- 1. Create or use an existing Google account
310
- 2. Go to [Google AI Studio](https://aistudio.google.com/)
311
- 3. Navigate to API keys in the left sidebar (or visit https://aistudio.google.com/app/apikey)
312
- 4. Create an API key and copy it
313
- 5. Configure nGPT with your key:
314
- ```bash
315
- ngpt --config
316
- # Enter provider: Gemini
317
- # Enter API key: your-gemini-api-key
318
- # Enter base URL: https://generativelanguage.googleapis.com/v1beta/openai
319
- # Enter model: gemini-2.0-flash
320
- ```
247
+ ## Usage
321
248
 
322
249
  ### Command Line Options
323
250
 
@@ -326,8 +253,8 @@ For more CLI examples and detailed usage information, see the [CLI Usage Guide](
326
253
  usage: ngpt [-h] [-v] [--language LANGUAGE] [--config [CONFIG]] [--config-index CONFIG_INDEX] [--provider PROVIDER]
327
254
  [--remove] [--show-config] [--all] [--list-models] [--list-renderers] [--cli-config [COMMAND ...]]
328
255
  [--api-key API_KEY] [--base-url BASE_URL] [--model MODEL] [--web-search] [--temperature TEMPERATURE]
329
- [--top_p TOP_P] [--max_tokens MAX_TOKENS] [--log [FILE]] [--preprompt PREPROMPT] [--no-stream] [--prettify]
330
- [--stream-prettify] [--renderer {auto,rich,glow}] [--rec-chunk] [--diff [FILE]] [--chunk-size CHUNK_SIZE]
256
+ [--top_p TOP_P] [--max_tokens MAX_TOKENS] [--log [FILE]] [--preprompt PREPROMPT] [--no-stream | --prettify |
257
+ --stream-prettify] [--renderer {auto,rich,glow}] [--rec-chunk] [--diff [FILE]] [--chunk-size CHUNK_SIZE]
331
258
  [--analyses-chunk-size ANALYSES_CHUNK_SIZE] [--max-msg-lines MAX_MSG_LINES]
332
259
  [--max-recursion-depth MAX_RECURSION_DEPTH] [-i | -s | -c | -t | -p | -r | -g]
333
260
  [prompt]
@@ -346,12 +273,10 @@ options::
346
273
 
347
274
  Configuration Options::
348
275
 
349
- --config [CONFIG] Path to a custom config file or, if no value provided, enter interactive
350
- configuration mode to create a new config
276
+ --config [CONFIG] Path to a custom config file or, if no value provided, enter interactive configuration mode to create a new config
351
277
  --config-index CONFIG_INDEX Index of the configuration to use or edit (default: 0)
352
278
  --provider PROVIDER Provider name to identify the configuration to use
353
- --remove Remove the configuration at the specified index (requires --config and
354
- --config-index or --provider)
279
+ --remove Remove the configuration at the specified index (requires --config and --config-index or --provider)
355
280
  --show-config Show the current configuration(s) and exit
356
281
  --all Show details for all configurations (requires --show-config)
357
282
  --list-models List all available models for the current configuration and exit
@@ -363,30 +288,28 @@ Global Options::
363
288
  --api-key API_KEY API key for the service
364
289
  --base-url BASE_URL Base URL for the API
365
290
  --model MODEL Model to use
366
- --web-search Enable web search capability (Note: Your API endpoint must support this
367
- feature)
291
+ --web-search Enable web search capability using DuckDuckGo to enhance prompts with relevant information
368
292
  --temperature TEMPERATURE Set temperature (controls randomness, default: 0.7)
369
293
  --top_p TOP_P Set top_p (controls diversity, default: 1.0)
370
294
  --max_tokens MAX_TOKENS Set max response length in tokens
371
- --log [FILE] Set filepath to log conversation to, or create a temporary log file if no path
372
- provided
295
+ --log [FILE] Set filepath to log conversation to, or create a temporary log file if no path provided
373
296
  --preprompt PREPROMPT Set custom system prompt to control AI behavior
374
- --no-stream Return the whole response without streaming
375
- --prettify Render markdown responses and code with syntax highlighting and formatting
376
- --stream-prettify Enable streaming with markdown rendering (automatically uses Rich renderer)
377
- --renderer {auto,rich,glow} Select which markdown renderer to use with --prettify (auto, rich, or glow)
297
+ --renderer {auto,rich,glow} Select which markdown renderer to use with --prettify or --stream-prettify (auto, rich, or glow)
298
+
299
+ Output Display Options (mutually exclusive)::
300
+
301
+ --no-stream Return the whole response without streaming or formatting
302
+ --prettify Render complete response with markdown and code formatting (non-streaming)
303
+ --stream-prettify Stream response with real-time markdown rendering (default)
378
304
 
379
305
  Git Commit Message Options::
380
306
 
381
307
  --rec-chunk Process large diffs in chunks with recursive analysis if needed
382
- --diff [FILE] Use diff from specified file instead of staged changes. If used without a path,
383
- uses the path from CLI config.
308
+ --diff [FILE] Use diff from specified file instead of staged changes. If used without a path, uses the path from CLI config.
384
309
  --chunk-size CHUNK_SIZE Number of lines per chunk when chunking is enabled (default: 200)
385
- --analyses-chunk-size ANALYSES_CHUNK_SIZE
386
- Number of lines per chunk when recursively chunking analyses (default: 200)
310
+ --analyses-chunk-size ANALYSES_CHUNK_SIZE Number of lines per chunk when recursively chunking analyses (default: 200)
387
311
  --max-msg-lines MAX_MSG_LINES Maximum number of lines in commit message before condensing (default: 20)
388
- --max-recursion-depth MAX_RECURSION_DEPTH
389
- Maximum recursion depth for commit message condensing (default: 3)
312
+ --max-recursion-depth MAX_RECURSION_DEPTH Maximum recursion depth for commit message condensing (default: 3)
390
313
 
391
314
  Modes (mutually exclusive)::
392
315
 
@@ -394,15 +317,62 @@ Modes (mutually exclusive)::
394
317
  -s, --shell Generate and execute shell commands
395
318
  -c, --code Generate code
396
319
  -t, --text Enter multi-line text input (submit with Ctrl+D)
397
- -p, --pipe Read from stdin and use content with prompt. Use {} in prompt as placeholder
398
- for stdin content
320
+ -p, --pipe Read from stdin and use content with prompt. Use {} in prompt as placeholder for stdin content
399
321
  -r, --rewrite Rewrite text from stdin to be more natural while preserving tone and meaning
400
322
  -g, --gitcommsg Generate AI-powered git commit messages from staged changes or diff file
401
323
  ```
402
324
 
403
325
  > **Note**: For better visualization of conventional commit messages on GitHub, you can use the [GitHub Commit Labels](https://greasyfork.org/en/scripts/526153-github-commit-labels) userscript, which adds colorful labels to your commits.
404
326
 
405
- For a complete reference of all available options, see the [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/).
327
+ For a complete reference of all available options, detailed CLI examples and usage information, see the [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/).
328
+
329
+
330
+ ## Documentation
331
+
332
+ Comprehensive documentation, including usage guides and examples, is available at:
333
+
334
+ **[https://nazdridoy.github.io/ngpt/](https://nazdridoy.github.io/ngpt/)**
335
+
336
+ Key documentation sections:
337
+ - [Installation Guide](https://nazdridoy.github.io/ngpt/installation/)
338
+ - [CLI Usage Guide](https://nazdridoy.github.io/ngpt/usage/cli_usage/)
339
+ - [Configuration Guide](https://nazdridoy.github.io/ngpt/configuration/)
340
+ - [Examples & Tutorials](https://nazdridoy.github.io/ngpt/examples/basic/)
341
+ - [Git Commit Message Guide](https://nazdridoy.github.io/ngpt/usage/gitcommsg/)
342
+
343
+
344
+ ## Configuration
345
+
346
+ ### API Key Setup
347
+
348
+ #### OpenAI API Key
349
+ 1. Create an account at [OpenAI](https://platform.openai.com/)
350
+ 2. Navigate to API keys: https://platform.openai.com/api-keys
351
+ 3. Click "Create new secret key" and copy your API key
352
+ 4. Configure nGPT with your key:
353
+ ```bash
354
+ ngpt --config
355
+ # Enter provider: OpenAI
356
+ # Enter API key: your-openai-api-key
357
+ # Enter base URL: https://api.openai.com/v1/
358
+ # Enter model: gpt-3.5-turbo (or other model)
359
+ ```
360
+
361
+ #### Google Gemini API Key
362
+ 1. Create or use an existing Google account
363
+ 2. Go to [Google AI Studio](https://aistudio.google.com/)
364
+ 3. Navigate to API keys in the left sidebar (or visit https://aistudio.google.com/app/apikey)
365
+ 4. Create an API key and copy it
366
+ 5. Configure nGPT with your key:
367
+ ```bash
368
+ ngpt --config
369
+ # Enter provider: Gemini
370
+ # Enter API key: your-gemini-api-key
371
+ # Enter base URL: https://generativelanguage.googleapis.com/v1beta/openai
372
+ # Enter model: gemini-2.0-flash
373
+ ```
374
+
375
+ For more detailed information, refer to the [API Key Setup documentation](https://nazdridoy.github.io/ngpt/configuration/#api-key-setup).
406
376
 
407
377
  ### CLI Configuration
408
378
 
@@ -20,9 +20,9 @@ ngpt/utils/__init__.py,sha256=qu_66I1Vtav2f1LDiPn5J3DUsbK7o1CSScMcTkYqxoM,1179
20
20
  ngpt/utils/cli_config.py,sha256=Ug8cECBTIuzOwkBWidLTfs-OAdOsCMJ2bNa70pOADfw,11195
21
21
  ngpt/utils/config.py,sha256=wsArA4osnh8fKqOvtsPqqBxAz3DpdjtaWUFaRtnUdyc,10452
22
22
  ngpt/utils/log.py,sha256=f1jg2iFo35PAmsarH8FVL_62plq4VXH0Mu2QiP6RJGw,15934
23
- ngpt/utils/web_search.py,sha256=yvCUDNhwcIcKZ_hWESFQQ-vB-LKsDDCDT17YFzFcGR4,12598
24
- ngpt-3.4.5.dist-info/METADATA,sha256=ofAuga74DmRxU0xIJ_FNHMYIEu-wAzVZPpjpiKwPHqw,24585
25
- ngpt-3.4.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
26
- ngpt-3.4.5.dist-info/entry_points.txt,sha256=SqAAvLhMrsEpkIr4YFRdUeyuXQ9o0IBCeYgE6AVojoI,44
27
- ngpt-3.4.5.dist-info/licenses/LICENSE,sha256=mQkpWoADxbHqE0HRefYLJdm7OpdrXBr3vNv5bZ8w72M,1065
28
- ngpt-3.4.5.dist-info/RECORD,,
23
+ ngpt/utils/web_search.py,sha256=_e78fk-6WDVqHDIaAMP9CDoX4FyOWM9XhdSZcr5KasI,29163
24
+ ngpt-3.5.0.dist-info/METADATA,sha256=BEcmK1bv4EZ_Q6W7MDnDPo304e-_L0zk1bDgH4mmp8Q,23926
25
+ ngpt-3.5.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
26
+ ngpt-3.5.0.dist-info/entry_points.txt,sha256=SqAAvLhMrsEpkIr4YFRdUeyuXQ9o0IBCeYgE6AVojoI,44
27
+ ngpt-3.5.0.dist-info/licenses/LICENSE,sha256=mQkpWoADxbHqE0HRefYLJdm7OpdrXBr3vNv5bZ8w72M,1065
28
+ ngpt-3.5.0.dist-info/RECORD,,
File without changes