janito 0.12.0__py3-none-any.whl → 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janito/__init__.py +1 -1
- janito/cli/agent/__init__.py +7 -0
- janito/cli/agent/conversation.py +149 -0
- janito/cli/agent/initialization.py +172 -0
- janito/cli/agent/query.py +108 -0
- janito/cli/agent.py +7 -282
- janito/cli/app.py +105 -9
- janito/cli/commands/__init__.py +12 -0
- janito/cli/commands/config.py +242 -0
- janito/cli/commands/history.py +119 -0
- janito/cli/commands/profile.py +72 -0
- janito/cli/commands/validation.py +24 -0
- janito/cli/commands/workspace.py +31 -0
- janito/cli/commands.py +9 -326
- janito/config.py +37 -0
- janito/data/instructions_template.txt +9 -5
- janito/tools/__init__.py +8 -2
- janito/tools/bash/bash.py +3 -1
- janito/tools/bash/unix_persistent_bash.py +183 -181
- janito/tools/bash/win_persistent_bash.py +4 -2
- janito/tools/fetch_webpage/__init__.py +22 -33
- janito/tools/fetch_webpage/core.py +182 -155
- janito/tools/rich_console.py +46 -9
- janito/tools/search_text.py +225 -238
- janito/tools/str_replace_editor/handlers/str_replace.py +3 -1
- janito/tools/str_replace_editor/handlers/view.py +14 -8
- janito/tools/think.py +37 -0
- janito/tools/usage_tracker.py +1 -0
- janito-0.14.0.dist-info/METADATA +396 -0
- janito-0.14.0.dist-info/RECORD +53 -0
- janito/test_file.py +0 -4
- janito/tools/fetch_webpage/chunking.py +0 -76
- janito/tools/fetch_webpage/extractors.py +0 -276
- janito/tools/fetch_webpage/news.py +0 -137
- janito/tools/fetch_webpage/utils.py +0 -108
- janito-0.12.0.dist-info/METADATA +0 -203
- janito-0.12.0.dist-info/RECORD +0 -47
- {janito-0.12.0.dist-info → janito-0.14.0.dist-info}/WHEEL +0 -0
- {janito-0.12.0.dist-info → janito-0.14.0.dist-info}/entry_points.txt +0 -0
- {janito-0.12.0.dist-info → janito-0.14.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,155 +1,182 @@
|
|
1
|
-
"""
|
2
|
-
Core functionality for fetching web pages and extracting content.
|
3
|
-
"""
|
4
|
-
|
5
|
-
import requests
|
6
|
-
from typing import Tuple, List, Optional
|
7
|
-
from urllib.parse import urlparse
|
8
|
-
from janito.tools.rich_console import print_info, print_success, print_error, print_warning
|
9
|
-
from janito.tools.usage_tracker import track_usage
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
'
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
#
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
1
|
+
"""
|
2
|
+
Core functionality for fetching web pages and extracting content.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import requests
|
6
|
+
from typing import Tuple, List, Optional
|
7
|
+
from urllib.parse import urlparse, unquote
|
8
|
+
from janito.tools.rich_console import print_info, print_success, print_error, print_warning
|
9
|
+
from janito.tools.usage_tracker import track_usage
|
10
|
+
from bs4 import BeautifulSoup
|
11
|
+
|
12
|
+
@track_usage('web_requests')
|
13
|
+
def fetch_webpage(url: str, headers: dict = None, timeout: int = 30, max_size: int = 5000000) -> Tuple[str, bool]:
|
14
|
+
"""
|
15
|
+
Fetch the content of a web page from a given URL.
|
16
|
+
|
17
|
+
Args:
|
18
|
+
url: The URL of the web page to fetch
|
19
|
+
headers: Optional HTTP headers to include in the request (default: None)
|
20
|
+
timeout: Request timeout in seconds (default: 30)
|
21
|
+
max_size: Maximum size in bytes to download (default: 5MB)
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
A tuple containing (message, is_error)
|
25
|
+
"""
|
26
|
+
print_info(f"Fetching content from URL: {url}", "Web Fetch")
|
27
|
+
|
28
|
+
try:
|
29
|
+
# Set default headers if none provided
|
30
|
+
if headers is None:
|
31
|
+
headers = {
|
32
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
33
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
34
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
35
|
+
'Accept-Encoding': 'gzip, deflate, br',
|
36
|
+
'Referer': 'https://www.google.com/',
|
37
|
+
'Sec-Ch-Ua': '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
|
38
|
+
'Sec-Ch-Ua-Mobile': '?0',
|
39
|
+
'Sec-Ch-Ua-Platform': '"Windows"',
|
40
|
+
'Sec-Fetch-Dest': 'document',
|
41
|
+
'Sec-Fetch-Mode': 'navigate',
|
42
|
+
'Sec-Fetch-Site': 'cross-site',
|
43
|
+
'Sec-Fetch-User': '?1',
|
44
|
+
'Upgrade-Insecure-Requests': '1'
|
45
|
+
}
|
46
|
+
|
47
|
+
# Make the HTTP request with streaming enabled
|
48
|
+
response = requests.get(url, headers=headers, timeout=timeout, stream=True)
|
49
|
+
|
50
|
+
# Raise an exception for HTTP errors
|
51
|
+
response.raise_for_status()
|
52
|
+
|
53
|
+
# Check content length before downloading fully
|
54
|
+
content_length = response.headers.get('Content-Length')
|
55
|
+
if content_length and int(content_length) > max_size:
|
56
|
+
warning_msg = f"Web Fetch: Content size ({int(content_length)/1000000:.1f}MB) exceeds max size ({max_size/1000000:.1f}MB). Aborting download."
|
57
|
+
print_warning(warning_msg)
|
58
|
+
return warning_msg, True
|
59
|
+
|
60
|
+
# Download content with size limit
|
61
|
+
content_bytes = b''
|
62
|
+
for chunk in response.iter_content(chunk_size=1024 * 1024): # 1MB chunks
|
63
|
+
content_bytes += chunk
|
64
|
+
if len(content_bytes) > max_size:
|
65
|
+
warning_msg = f"Web Fetch: Download exceeded max size ({max_size/1000000:.1f}MB). Truncating."
|
66
|
+
print_warning(warning_msg)
|
67
|
+
break
|
68
|
+
|
69
|
+
# Get the content
|
70
|
+
content = content_bytes.decode('utf-8', errors='replace')
|
71
|
+
|
72
|
+
print_success(f"Successfully fetched content ({len(content)} bytes)", "Web Fetch")
|
73
|
+
|
74
|
+
# Return the full content
|
75
|
+
return content, False
|
76
|
+
|
77
|
+
except requests.exceptions.RequestException as e:
|
78
|
+
error_msg = f"Error fetching web page: {str(e)}"
|
79
|
+
print_error(error_msg, "Web Fetch Error")
|
80
|
+
return error_msg, True
|
81
|
+
|
82
|
+
|
83
|
+
@track_usage('web_content')
|
84
|
+
def fetch_and_extract(url: str, max_length: int = 10000, keywords: List[str] = None) -> Tuple[str, bool]:
|
85
|
+
"""
|
86
|
+
Fetch a webpage and extract its main content using BeautifulSoup.
|
87
|
+
|
88
|
+
Args:
|
89
|
+
url: The URL to fetch
|
90
|
+
max_length: Maximum length of text to return
|
91
|
+
keywords: Optional list of URL-encoded keywords to prioritize content containing these terms
|
92
|
+
|
93
|
+
Returns:
|
94
|
+
A tuple containing (extracted_content, is_error)
|
95
|
+
"""
|
96
|
+
html_content, is_error = fetch_webpage(url)
|
97
|
+
|
98
|
+
if is_error:
|
99
|
+
return html_content, True
|
100
|
+
|
101
|
+
try:
|
102
|
+
# Use BeautifulSoup to parse and extract content
|
103
|
+
soup = BeautifulSoup(html_content, 'html.parser')
|
104
|
+
|
105
|
+
# Remove script, style, and other non-content elements
|
106
|
+
for element in soup(['script', 'style', 'header', 'footer', 'nav', 'aside']):
|
107
|
+
element.decompose()
|
108
|
+
|
109
|
+
# URL-decode keywords if provided
|
110
|
+
decoded_keywords = []
|
111
|
+
if keywords:
|
112
|
+
decoded_keywords = [unquote(keyword).lower() for keyword in keywords]
|
113
|
+
print_info(f"Prioritizing content with keywords: {', '.join(decoded_keywords)}", "Content Extraction")
|
114
|
+
|
115
|
+
# Extract text from main content elements
|
116
|
+
paragraphs = []
|
117
|
+
keyword_paragraphs = []
|
118
|
+
|
119
|
+
for tag in soup.find_all(['p', 'h1', 'h2', 'h3', 'h4', 'article', 'section', 'div']):
|
120
|
+
text = tag.get_text(strip=True)
|
121
|
+
if text and len(text) > 20: # Skip very short pieces that might be UI elements
|
122
|
+
# Check if the paragraph contains any of the keywords
|
123
|
+
if decoded_keywords and any(keyword in text.lower() for keyword in decoded_keywords):
|
124
|
+
keyword_paragraphs.append(text)
|
125
|
+
else:
|
126
|
+
paragraphs.append(text)
|
127
|
+
|
128
|
+
# Join paragraphs, prioritizing those with keywords
|
129
|
+
if keyword_paragraphs:
|
130
|
+
print_info(f"Found {len(keyword_paragraphs)} paragraphs containing keywords", "Content Extraction")
|
131
|
+
extracted_text = "\n\n".join(keyword_paragraphs + paragraphs)
|
132
|
+
else:
|
133
|
+
extracted_text = "\n\n".join(paragraphs)
|
134
|
+
|
135
|
+
# If no paragraphs found, fall back to all text
|
136
|
+
if not extracted_text or len(extracted_text) < 100:
|
137
|
+
extracted_text = soup.get_text(separator='\n\n')
|
138
|
+
|
139
|
+
# Clean up extra whitespace
|
140
|
+
extracted_text = ' '.join(extracted_text.split())
|
141
|
+
extracted_text = extracted_text.replace('. ', '.\n\n')
|
142
|
+
|
143
|
+
# Truncate if needed
|
144
|
+
if len(extracted_text) > max_length:
|
145
|
+
print_info(f"Truncating content from {len(extracted_text)} to {max_length} characters", "Content Extraction")
|
146
|
+
extracted_text = extracted_text[:max_length] + "..."
|
147
|
+
|
148
|
+
print_success(f"Successfully extracted {len(extracted_text)} characters of content", "Content Extraction")
|
149
|
+
return extracted_text, False
|
150
|
+
|
151
|
+
except Exception as e:
|
152
|
+
error_msg = f"Error extracting content: {str(e)}"
|
153
|
+
print_error(error_msg, "Content Extraction Error")
|
154
|
+
return error_msg, True
|
155
|
+
|
156
|
+
|
157
|
+
def chunk_content(content: str, chunk_size: int = 2000, overlap: int = 200) -> List[str]:
|
158
|
+
"""
|
159
|
+
Split content into overlapping chunks of a specified size.
|
160
|
+
|
161
|
+
Args:
|
162
|
+
content: The text content to chunk
|
163
|
+
chunk_size: Maximum size of each chunk
|
164
|
+
overlap: Number of characters to overlap between chunks
|
165
|
+
|
166
|
+
Returns:
|
167
|
+
List of text chunks
|
168
|
+
"""
|
169
|
+
if not content:
|
170
|
+
return []
|
171
|
+
|
172
|
+
chunks = []
|
173
|
+
|
174
|
+
# Simple chunking with overlap
|
175
|
+
for i in range(0, len(content), chunk_size - overlap):
|
176
|
+
chunk_end = min(i + chunk_size, len(content))
|
177
|
+
chunks.append(content[i:chunk_end])
|
178
|
+
if chunk_end == len(content):
|
179
|
+
break
|
180
|
+
|
181
|
+
print_success(f"Content successfully chunked into {len(chunks)} parts", "Content Chunking")
|
182
|
+
return chunks
|
janito/tools/rich_console.py
CHANGED
@@ -4,6 +4,7 @@ Utility module for rich console printing in tools.
|
|
4
4
|
from rich.console import Console
|
5
5
|
from rich.text import Text
|
6
6
|
from typing import Optional
|
7
|
+
from janito.config import get_config
|
7
8
|
|
8
9
|
# Create a shared console instance
|
9
10
|
console = Console()
|
@@ -16,6 +17,9 @@ def print_info(message: str, title: Optional[str] = None):
|
|
16
17
|
message: The message to print
|
17
18
|
title: Optional title for the panel
|
18
19
|
"""
|
20
|
+
# Skip printing if trust mode is enabled
|
21
|
+
if get_config().trust_mode:
|
22
|
+
return
|
19
23
|
# Map titles to specific icons
|
20
24
|
icon_map = {
|
21
25
|
# File operations
|
@@ -82,20 +86,22 @@ def print_info(message: str, title: Optional[str] = None):
|
|
82
86
|
elif "Undoing last edit" in title:
|
83
87
|
icon = "↩️" # Undo icon
|
84
88
|
|
89
|
+
# Add indentation to all tool messages
|
90
|
+
indent = " "
|
85
91
|
text = Text(message)
|
86
92
|
if title:
|
87
93
|
# Special case for Bash Run commands
|
88
94
|
if title == "Bash Run":
|
89
95
|
console.print("\n" + "-"*50)
|
90
|
-
console.print(f"{icon} {title}", style="bold white on blue")
|
96
|
+
console.print(f"{indent}{icon} {title}", style="bold white on blue")
|
91
97
|
console.print("-"*50)
|
92
|
-
console.print(f"$ {text}", style="white on dark_blue")
|
98
|
+
console.print(f"{indent}$ {text}", style="white on dark_blue")
|
93
99
|
# Make sure we're not returning anything
|
94
100
|
return
|
95
101
|
else:
|
96
|
-
console.print(f"{icon} {message}", style="blue", end="")
|
102
|
+
console.print(f"{indent}{icon} {message}", style="blue", end="")
|
97
103
|
else:
|
98
|
-
console.print(f"{icon} {text}", style="blue", end="")
|
104
|
+
console.print(f"{indent}{icon} {text}", style="blue", end="")
|
99
105
|
|
100
106
|
def print_success(message: str, title: Optional[str] = None):
|
101
107
|
"""
|
@@ -105,6 +111,9 @@ def print_success(message: str, title: Optional[str] = None):
|
|
105
111
|
message: The message to print
|
106
112
|
title: Optional title for the panel
|
107
113
|
"""
|
114
|
+
# Skip printing if trust mode is enabled
|
115
|
+
if get_config().trust_mode:
|
116
|
+
return
|
108
117
|
text = Text(message)
|
109
118
|
if title:
|
110
119
|
console.print(f" ✅ {message}", style="green")
|
@@ -114,26 +123,54 @@ def print_success(message: str, title: Optional[str] = None):
|
|
114
123
|
def print_error(message: str, title: Optional[str] = None):
|
115
124
|
"""
|
116
125
|
Print an error message with rich formatting.
|
126
|
+
In trust mode, error messages are suppressed.
|
117
127
|
|
118
128
|
Args:
|
119
129
|
message: The message to print
|
120
130
|
title: Optional title for the panel
|
121
131
|
"""
|
132
|
+
# Skip printing if trust mode is enabled
|
133
|
+
if get_config().trust_mode:
|
134
|
+
return
|
135
|
+
|
122
136
|
text = Text(message)
|
123
|
-
|
124
|
-
|
137
|
+
|
138
|
+
# Check if message starts with question mark emoji (❓)
|
139
|
+
# If it does, use warning styling (yellow) instead of error styling (red)
|
140
|
+
starts_with_question_mark = message.startswith("❓")
|
141
|
+
|
142
|
+
if starts_with_question_mark:
|
143
|
+
# Use warning styling for question mark emoji errors
|
144
|
+
# For question mark emoji errors, don't include the title (like "Error")
|
145
|
+
# Just print the message with the emoji
|
125
146
|
if title == "File View":
|
126
|
-
console.print(f"\n
|
147
|
+
console.print(f"\n {message}", style="yellow")
|
127
148
|
else:
|
128
|
-
console.print(f"
|
149
|
+
console.print(f"{message}", style="yellow")
|
129
150
|
else:
|
130
|
-
|
151
|
+
# Regular error styling
|
152
|
+
if title:
|
153
|
+
# Special case for File View - print without header
|
154
|
+
if title == "File View":
|
155
|
+
console.print(f"\n ❌ {message}", style="red")
|
156
|
+
# Special case for Search Error
|
157
|
+
elif title == "Search Error":
|
158
|
+
console.print(f"❌ {message}", style="red")
|
159
|
+
else:
|
160
|
+
console.print(f"❌ {title} {text}", style="red")
|
161
|
+
else:
|
162
|
+
console.print(f"\n❌ {text}", style="red")
|
131
163
|
|
132
164
|
def print_warning(message: str):
|
133
165
|
"""
|
134
166
|
Print a warning message with rich formatting.
|
167
|
+
In trust mode, warning messages are suppressed.
|
135
168
|
|
136
169
|
Args:
|
137
170
|
message: The message to print
|
138
171
|
"""
|
172
|
+
# Skip printing if trust mode is enabled
|
173
|
+
if get_config().trust_mode:
|
174
|
+
return
|
175
|
+
|
139
176
|
console.print(f"⚠️ {message}", style="yellow")
|