janito 0.12.0__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. janito/__init__.py +1 -1
  2. janito/cli/agent/__init__.py +7 -0
  3. janito/cli/agent/conversation.py +149 -0
  4. janito/cli/agent/initialization.py +172 -0
  5. janito/cli/agent/query.py +108 -0
  6. janito/cli/agent.py +7 -282
  7. janito/cli/app.py +105 -9
  8. janito/cli/commands/__init__.py +12 -0
  9. janito/cli/commands/config.py +242 -0
  10. janito/cli/commands/history.py +119 -0
  11. janito/cli/commands/profile.py +72 -0
  12. janito/cli/commands/validation.py +24 -0
  13. janito/cli/commands/workspace.py +31 -0
  14. janito/cli/commands.py +9 -326
  15. janito/config.py +37 -0
  16. janito/data/instructions_template.txt +9 -5
  17. janito/tools/__init__.py +8 -2
  18. janito/tools/bash/bash.py +3 -1
  19. janito/tools/bash/unix_persistent_bash.py +183 -181
  20. janito/tools/bash/win_persistent_bash.py +4 -2
  21. janito/tools/fetch_webpage/__init__.py +22 -33
  22. janito/tools/fetch_webpage/core.py +182 -155
  23. janito/tools/rich_console.py +46 -9
  24. janito/tools/search_text.py +225 -238
  25. janito/tools/str_replace_editor/handlers/str_replace.py +3 -1
  26. janito/tools/str_replace_editor/handlers/view.py +14 -8
  27. janito/tools/think.py +37 -0
  28. janito/tools/usage_tracker.py +1 -0
  29. janito-0.14.0.dist-info/METADATA +396 -0
  30. janito-0.14.0.dist-info/RECORD +53 -0
  31. janito/test_file.py +0 -4
  32. janito/tools/fetch_webpage/chunking.py +0 -76
  33. janito/tools/fetch_webpage/extractors.py +0 -276
  34. janito/tools/fetch_webpage/news.py +0 -137
  35. janito/tools/fetch_webpage/utils.py +0 -108
  36. janito-0.12.0.dist-info/METADATA +0 -203
  37. janito-0.12.0.dist-info/RECORD +0 -47
  38. {janito-0.12.0.dist-info → janito-0.14.0.dist-info}/WHEEL +0 -0
  39. {janito-0.12.0.dist-info → janito-0.14.0.dist-info}/entry_points.txt +0 -0
  40. {janito-0.12.0.dist-info → janito-0.14.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,155 +1,182 @@
1
- """
2
- Core functionality for fetching web pages and extracting content.
3
- """
4
-
5
- import requests
6
- from typing import Tuple, List, Optional
7
- from urllib.parse import urlparse
8
- from janito.tools.rich_console import print_info, print_success, print_error, print_warning
9
- from janito.tools.usage_tracker import track_usage
10
-
11
- from janito.tools.fetch_webpage.extractors import extract_clean_text
12
- # Import moved to fetch_and_extract function to avoid circular imports
13
- from janito.tools.fetch_webpage.utils import SITE_SPECIFIC_STRATEGIES
14
-
15
-
16
- @track_usage('web_requests')
17
- def fetch_webpage(url: str, headers: dict = None, timeout: int = 30, max_size: int = 5000000,
18
- target_strings: List[str] = None) -> Tuple[str, bool]:
19
- """
20
- Fetch the content of a web page from a given URL.
21
-
22
- Args:
23
- url: The URL of the web page to fetch
24
- headers: Optional HTTP headers to include in the request (default: None)
25
- timeout: Request timeout in seconds (default: 30)
26
- max_size: Maximum size in bytes to download (default: 5MB)
27
- target_strings: Optional list of strings to target specific content sections
28
-
29
- Returns:
30
- A tuple containing (message, is_error)
31
- """
32
- print_info(f"Fetching content from URL: {url}", "Web Fetch")
33
-
34
- try:
35
- # Set default headers if none provided
36
- if headers is None:
37
- headers = {
38
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
39
- }
40
-
41
- # Make the HTTP request with streaming enabled
42
- response = requests.get(url, headers=headers, timeout=timeout, stream=True)
43
-
44
- # Raise an exception for HTTP errors
45
- response.raise_for_status()
46
-
47
- # Check content length before downloading fully
48
- content_length = response.headers.get('Content-Length')
49
- if content_length and int(content_length) > max_size:
50
- warning_msg = f"Web Fetch: Content size ({int(content_length)/1000000:.1f}MB) exceeds max size ({max_size/1000000:.1f}MB). Aborting download."
51
- print_warning(warning_msg)
52
- return warning_msg, True
53
-
54
- # Download content with size limit
55
- content_bytes = b''
56
- for chunk in response.iter_content(chunk_size=1024 * 1024): # 1MB chunks
57
- content_bytes += chunk
58
- if len(content_bytes) > max_size:
59
- warning_msg = f"Web Fetch: Download exceeded max size ({max_size/1000000:.1f}MB). Truncating."
60
- print_warning(warning_msg)
61
- break
62
-
63
- # Get the content
64
- content = content_bytes.decode('utf-8', errors='replace')
65
-
66
- # If target strings are provided, extract only the relevant sections
67
- if target_strings and len(target_strings) > 0:
68
- print_info(f"Targeting specific content using {len(target_strings)} search strings", "Web Fetch")
69
- from janito.tools.fetch_webpage.extractors import extract_targeted_content
70
- targeted_content = extract_targeted_content(content, target_strings)
71
-
72
- if targeted_content:
73
- print_success(f"Successfully targeted specific content based on search strings", "Web Fetch")
74
- # Create a summary with first 300 chars of targeted content
75
- content_preview = targeted_content[:300] + "..." if len(targeted_content) > 300 else targeted_content
76
- summary = f"Successfully fetched targeted content from {url}\n\nContent preview:\n{content_preview}"
77
- print_success(f"Successfully fetched targeted content from {url} ({len(targeted_content)} bytes)", "Web Fetch")
78
- return targeted_content, False
79
- else:
80
- print_warning(f"Web Fetch: Could not find content matching the target strings. Returning full content.")
81
-
82
- # Create a summary message with first 300 chars of content
83
- content_preview = content[:300] + "..." if len(content) > 300 else content
84
-
85
- print_success(f"({len(content)} bytes)", "Web Fetch")
86
-
87
- # Return the full content
88
- return content, False
89
-
90
- except requests.exceptions.RequestException as e:
91
- error_msg = f"Error fetching web page: {str(e)}"
92
- print_error(error_msg, "Web Fetch Error")
93
- return error_msg, True
94
-
95
-
96
- @track_usage('web_content')
97
- def fetch_and_extract(url: str, extract_method: str = 'trafilatura',
98
- max_length: int = 10000,
99
- target_strings: List[str] = None) -> Tuple[str, bool]:
100
- """
101
- Fetch a webpage and extract its main content in a format suitable for LLM processing.
102
-
103
- Args:
104
- url: The URL to fetch
105
- extract_method: Content extraction method ('trafilatura', 'newspaper', 'beautifulsoup', 'all')
106
- max_length: Maximum length of text to return
107
- target_strings: Optional list of strings to target specific content sections
108
-
109
- Returns:
110
- A tuple containing (extracted_content, is_error)
111
- """
112
- # Check if this is a news aggregator site that needs special handling
113
- domain = urlparse(url).netloc
114
- for site_domain in SITE_SPECIFIC_STRATEGIES.keys():
115
- if site_domain in domain:
116
- print_info(f"Detected news aggregator site: {domain}. Using specialized extraction.", "Content Extraction")
117
- # Import here to avoid circular imports
118
- from janito.tools.fetch_webpage.news import fetch_and_extract_news_aggregator
119
- return fetch_and_extract_news_aggregator(url)
120
-
121
- # If target strings are provided, pass them directly to fetch_webpage for efficiency
122
- if target_strings and len(target_strings) > 0:
123
- html_content, is_error = fetch_webpage(url, target_strings=target_strings)
124
- else:
125
- html_content, is_error = fetch_webpage(url)
126
-
127
- if is_error:
128
- return html_content, True
129
-
130
- extracted_text = extract_clean_text(html_content, method=extract_method, url=url)
131
-
132
- if not extracted_text or len(extracted_text) < 100:
133
- return f"Could not extract meaningful content from {url}", True
134
-
135
- # If target strings were provided but not already handled by fetch_webpage
136
- if target_strings and len(target_strings) > 0 and not any(target in extracted_text for target in target_strings if len(target) > 3):
137
- from janito.tools.fetch_webpage.extractors import extract_targeted_content
138
- targeted_content = extract_targeted_content(html_content, target_strings)
139
- if targeted_content:
140
- print_success(f"Successfully extracted targeted content based on {len(target_strings)} search strings",
141
- "Targeted Extraction")
142
- extracted_text = targeted_content
143
-
144
- # Truncate if needed
145
- if len(extracted_text) > max_length:
146
- print_info(f"Truncating content from {len(extracted_text)} to {max_length} characters", "Content Extraction")
147
- extracted_text = extracted_text[:max_length] + "..."
148
-
149
- # Check if the content is still too large for an LLM (rough estimate)
150
- estimated_tokens = len(extracted_text.split())
151
- if estimated_tokens > 10000: # Conservative estimate for token limits
152
- print_warning(f"Content Extraction: Extracted content still very large (~{estimated_tokens} words). Consider using chunk_large_content()")
153
-
154
- print_success(f"Successfully extracted {len(extracted_text)} characters of content", "Content Extraction")
155
- return extracted_text, False
1
+ """
2
+ Core functionality for fetching web pages and extracting content.
3
+ """
4
+
5
+ import requests
6
+ from typing import Tuple, List, Optional
7
+ from urllib.parse import urlparse, unquote
8
+ from janito.tools.rich_console import print_info, print_success, print_error, print_warning
9
+ from janito.tools.usage_tracker import track_usage
10
+ from bs4 import BeautifulSoup
11
+
12
+ @track_usage('web_requests')
13
+ def fetch_webpage(url: str, headers: dict = None, timeout: int = 30, max_size: int = 5000000) -> Tuple[str, bool]:
14
+ """
15
+ Fetch the content of a web page from a given URL.
16
+
17
+ Args:
18
+ url: The URL of the web page to fetch
19
+ headers: Optional HTTP headers to include in the request (default: None)
20
+ timeout: Request timeout in seconds (default: 30)
21
+ max_size: Maximum size in bytes to download (default: 5MB)
22
+
23
+ Returns:
24
+ A tuple containing (message, is_error)
25
+ """
26
+ print_info(f"Fetching content from URL: {url}", "Web Fetch")
27
+
28
+ try:
29
+ # Set default headers if none provided
30
+ if headers is None:
31
+ headers = {
32
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
33
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
34
+ 'Accept-Language': 'en-US,en;q=0.9',
35
+ 'Accept-Encoding': 'gzip, deflate, br',
36
+ 'Referer': 'https://www.google.com/',
37
+ 'Sec-Ch-Ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
38
+ 'Sec-Ch-Ua-Mobile': '?0',
39
+ 'Sec-Ch-Ua-Platform': '"Windows"',
40
+ 'Sec-Fetch-Dest': 'document',
41
+ 'Sec-Fetch-Mode': 'navigate',
42
+ 'Sec-Fetch-Site': 'cross-site',
43
+ 'Sec-Fetch-User': '?1',
44
+ 'Upgrade-Insecure-Requests': '1'
45
+ }
46
+
47
+ # Make the HTTP request with streaming enabled
48
+ response = requests.get(url, headers=headers, timeout=timeout, stream=True)
49
+
50
+ # Raise an exception for HTTP errors
51
+ response.raise_for_status()
52
+
53
+ # Check content length before downloading fully
54
+ content_length = response.headers.get('Content-Length')
55
+ if content_length and int(content_length) > max_size:
56
+ warning_msg = f"Web Fetch: Content size ({int(content_length)/1000000:.1f}MB) exceeds max size ({max_size/1000000:.1f}MB). Aborting download."
57
+ print_warning(warning_msg)
58
+ return warning_msg, True
59
+
60
+ # Download content with size limit
61
+ content_bytes = b''
62
+ for chunk in response.iter_content(chunk_size=1024 * 1024): # 1MB chunks
63
+ content_bytes += chunk
64
+ if len(content_bytes) > max_size:
65
+ warning_msg = f"Web Fetch: Download exceeded max size ({max_size/1000000:.1f}MB). Truncating."
66
+ print_warning(warning_msg)
67
+ break
68
+
69
+ # Get the content
70
+ content = content_bytes.decode('utf-8', errors='replace')
71
+
72
+ print_success(f"Successfully fetched content ({len(content)} bytes)", "Web Fetch")
73
+
74
+ # Return the full content
75
+ return content, False
76
+
77
+ except requests.exceptions.RequestException as e:
78
+ error_msg = f"Error fetching web page: {str(e)}"
79
+ print_error(error_msg, "Web Fetch Error")
80
+ return error_msg, True
81
+
82
+
83
+ @track_usage('web_content')
84
+ def fetch_and_extract(url: str, max_length: int = 10000, keywords: List[str] = None) -> Tuple[str, bool]:
85
+ """
86
+ Fetch a webpage and extract its main content using BeautifulSoup.
87
+
88
+ Args:
89
+ url: The URL to fetch
90
+ max_length: Maximum length of text to return
91
+ keywords: Optional list of URL-encoded keywords to prioritize content containing these terms
92
+
93
+ Returns:
94
+ A tuple containing (extracted_content, is_error)
95
+ """
96
+ html_content, is_error = fetch_webpage(url)
97
+
98
+ if is_error:
99
+ return html_content, True
100
+
101
+ try:
102
+ # Use BeautifulSoup to parse and extract content
103
+ soup = BeautifulSoup(html_content, 'html.parser')
104
+
105
+ # Remove script, style, and other non-content elements
106
+ for element in soup(['script', 'style', 'header', 'footer', 'nav', 'aside']):
107
+ element.decompose()
108
+
109
+ # URL-decode keywords if provided
110
+ decoded_keywords = []
111
+ if keywords:
112
+ decoded_keywords = [unquote(keyword).lower() for keyword in keywords]
113
+ print_info(f"Prioritizing content with keywords: {', '.join(decoded_keywords)}", "Content Extraction")
114
+
115
+ # Extract text from main content elements
116
+ paragraphs = []
117
+ keyword_paragraphs = []
118
+
119
+ for tag in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'article', 'section', 'div']):
120
+ text = tag.get_text(strip=True)
121
+ if text and len(text) > 20: # Skip very short pieces that might be UI elements
122
+ # Check if the paragraph contains any of the keywords
123
+ if decoded_keywords and any(keyword in text.lower() for keyword in decoded_keywords):
124
+ keyword_paragraphs.append(text)
125
+ else:
126
+ paragraphs.append(text)
127
+
128
+ # Join paragraphs, prioritizing those with keywords
129
+ if keyword_paragraphs:
130
+ print_info(f"Found {len(keyword_paragraphs)} paragraphs containing keywords", "Content Extraction")
131
+ extracted_text = "\n\n".join(keyword_paragraphs + paragraphs)
132
+ else:
133
+ extracted_text = "\n\n".join(paragraphs)
134
+
135
+ # If no paragraphs found, fall back to all text
136
+ if not extracted_text or len(extracted_text) < 100:
137
+ extracted_text = soup.get_text(separator='\n\n')
138
+
139
+ # Clean up extra whitespace
140
+ extracted_text = ' '.join(extracted_text.split())
141
+ extracted_text = extracted_text.replace('. ', '.\n\n')
142
+
143
+ # Truncate if needed
144
+ if len(extracted_text) > max_length:
145
+ print_info(f"Truncating content from {len(extracted_text)} to {max_length} characters", "Content Extraction")
146
+ extracted_text = extracted_text[:max_length] + "..."
147
+
148
+ print_success(f"Successfully extracted {len(extracted_text)} characters of content", "Content Extraction")
149
+ return extracted_text, False
150
+
151
+ except Exception as e:
152
+ error_msg = f"Error extracting content: {str(e)}"
153
+ print_error(error_msg, "Content Extraction Error")
154
+ return error_msg, True
155
+
156
+
157
+ def chunk_content(content: str, chunk_size: int = 2000, overlap: int = 200) -> List[str]:
158
+ """
159
+ Split content into overlapping chunks of a specified size.
160
+
161
+ Args:
162
+ content: The text content to chunk
163
+ chunk_size: Maximum size of each chunk
164
+ overlap: Number of characters to overlap between chunks
165
+
166
+ Returns:
167
+ List of text chunks
168
+ """
169
+ if not content:
170
+ return []
171
+
172
+ chunks = []
173
+
174
+ # Simple chunking with overlap
175
+ for i in range(0, len(content), chunk_size - overlap):
176
+ chunk_end = min(i + chunk_size, len(content))
177
+ chunks.append(content[i:chunk_end])
178
+ if chunk_end == len(content):
179
+ break
180
+
181
+ print_success(f"Content successfully chunked into {len(chunks)} parts", "Content Chunking")
182
+ return chunks
@@ -4,6 +4,7 @@ Utility module for rich console printing in tools.
4
4
  from rich.console import Console
5
5
  from rich.text import Text
6
6
  from typing import Optional
7
+ from janito.config import get_config
7
8
 
8
9
  # Create a shared console instance
9
10
  console = Console()
@@ -16,6 +17,9 @@ def print_info(message: str, title: Optional[str] = None):
16
17
  message: The message to print
17
18
  title: Optional title for the panel
18
19
  """
20
+ # Skip printing if trust mode is enabled
21
+ if get_config().trust_mode:
22
+ return
19
23
  # Map titles to specific icons
20
24
  icon_map = {
21
25
  # File operations
@@ -82,20 +86,22 @@ def print_info(message: str, title: Optional[str] = None):
82
86
  elif "Undoing last edit" in title:
83
87
  icon = "↩️" # Undo icon
84
88
 
89
+ # Add indentation to all tool messages
90
+ indent = " "
85
91
  text = Text(message)
86
92
  if title:
87
93
  # Special case for Bash Run commands
88
94
  if title == "Bash Run":
89
95
  console.print("\n" + "-"*50)
90
- console.print(f"{icon} {title}", style="bold white on blue")
96
+ console.print(f"{indent}{icon} {title}", style="bold white on blue")
91
97
  console.print("-"*50)
92
- console.print(f"$ {text}", style="white on dark_blue")
98
+ console.print(f"{indent}$ {text}", style="white on dark_blue")
93
99
  # Make sure we're not returning anything
94
100
  return
95
101
  else:
96
- console.print(f"{icon} {message}", style="blue", end="")
102
+ console.print(f"{indent}{icon} {message}", style="blue", end="")
97
103
  else:
98
- console.print(f"{icon} {text}", style="blue", end="")
104
+ console.print(f"{indent}{icon} {text}", style="blue", end="")
99
105
 
100
106
  def print_success(message: str, title: Optional[str] = None):
101
107
  """
@@ -105,6 +111,9 @@ def print_success(message: str, title: Optional[str] = None):
105
111
  message: The message to print
106
112
  title: Optional title for the panel
107
113
  """
114
+ # Skip printing if trust mode is enabled
115
+ if get_config().trust_mode:
116
+ return
108
117
  text = Text(message)
109
118
  if title:
110
119
  console.print(f" ✅ {message}", style="green")
@@ -114,26 +123,54 @@ def print_success(message: str, title: Optional[str] = None):
114
123
  def print_error(message: str, title: Optional[str] = None):
115
124
  """
116
125
  Print an error message with rich formatting.
126
+ In trust mode, error messages are suppressed.
117
127
 
118
128
  Args:
119
129
  message: The message to print
120
130
  title: Optional title for the panel
121
131
  """
132
+ # Skip printing if trust mode is enabled
133
+ if get_config().trust_mode:
134
+ return
135
+
122
136
  text = Text(message)
123
- if title:
124
- # Special case for File View - print without header
137
+
138
+ # Check if message starts with question mark emoji (❓)
139
+ # If it does, use warning styling (yellow) instead of error styling (red)
140
+ starts_with_question_mark = message.startswith("❓")
141
+
142
+ if starts_with_question_mark:
143
+ # Use warning styling for question mark emoji errors
144
+ # For question mark emoji errors, don't include the title (like "Error")
145
+ # Just print the message with the emoji
125
146
  if title == "File View":
126
- console.print(f"\n {message}", style="red")
147
+ console.print(f"\n {message}", style="yellow")
127
148
  else:
128
- console.print(f"{title} {text}")
149
+ console.print(f"{message}", style="yellow")
129
150
  else:
130
- console.print(f"\n❌ {text}", style="red")
151
+ # Regular error styling
152
+ if title:
153
+ # Special case for File View - print without header
154
+ if title == "File View":
155
+ console.print(f"\n ❌ {message}", style="red")
156
+ # Special case for Search Error
157
+ elif title == "Search Error":
158
+ console.print(f"❌ {message}", style="red")
159
+ else:
160
+ console.print(f"❌ {title} {text}", style="red")
161
+ else:
162
+ console.print(f"\n❌ {text}", style="red")
131
163
 
132
164
  def print_warning(message: str):
133
165
  """
134
166
  Print a warning message with rich formatting.
167
+ In trust mode, warning messages are suppressed.
135
168
 
136
169
  Args:
137
170
  message: The message to print
138
171
  """
172
+ # Skip printing if trust mode is enabled
173
+ if get_config().trust_mode:
174
+ return
175
+
139
176
  console.print(f"⚠️ {message}", style="yellow")