janito 0.13.0__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- janito/__init__.py +1 -1
- janito/cli/agent/__init__.py +7 -0
- janito/cli/agent/conversation.py +149 -0
- janito/cli/agent/initialization.py +168 -0
- janito/cli/agent/query.py +112 -0
- janito/cli/agent.py +7 -395
- janito/cli/app.py +103 -19
- janito/cli/commands/__init__.py +12 -0
- janito/cli/commands/config.py +30 -0
- janito/cli/commands/history.py +119 -0
- janito/cli/commands/profile.py +93 -0
- janito/cli/commands/validation.py +24 -0
- janito/cli/commands/workspace.py +31 -0
- janito/cli/commands.py +9 -326
- janito/config/README.md +104 -0
- janito/config/__init__.py +16 -0
- janito/config/cli/__init__.py +28 -0
- janito/config/cli/commands.py +397 -0
- janito/config/cli/validators.py +77 -0
- janito/config/core/__init__.py +23 -0
- janito/config/core/file_operations.py +90 -0
- janito/config/core/properties.py +316 -0
- janito/config/core/singleton.py +282 -0
- janito/config/profiles/__init__.py +8 -0
- janito/config/profiles/definitions.py +38 -0
- janito/config/profiles/manager.py +80 -0
- janito/data/instructions_template.txt +12 -6
- janito/tools/__init__.py +8 -2
- janito/tools/bash/bash.py +80 -7
- janito/tools/bash/unix_persistent_bash.py +32 -1
- janito/tools/bash/win_persistent_bash.py +34 -1
- janito/tools/fetch_webpage/__init__.py +22 -33
- janito/tools/fetch_webpage/core.py +182 -155
- janito/tools/move_file.py +1 -1
- janito/tools/search_text.py +225 -239
- janito/tools/str_replace_editor/handlers/view.py +14 -8
- janito/tools/think.py +37 -0
- janito/tools/usage_tracker.py +1 -0
- {janito-0.13.0.dist-info → janito-0.15.0.dist-info}/METADATA +204 -23
- janito-0.15.0.dist-info/RECORD +64 -0
- janito/config.py +0 -358
- janito/test_file.py +0 -4
- janito/tools/fetch_webpage/chunking.py +0 -76
- janito/tools/fetch_webpage/extractors.py +0 -276
- janito/tools/fetch_webpage/news.py +0 -137
- janito/tools/fetch_webpage/utils.py +0 -108
- janito-0.13.0.dist-info/RECORD +0 -47
- {janito-0.13.0.dist-info → janito-0.15.0.dist-info}/WHEEL +0 -0
- {janito-0.13.0.dist-info → janito-0.15.0.dist-info}/entry_points.txt +0 -0
- {janito-0.13.0.dist-info → janito-0.15.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,38 @@
|
|
1
|
+
"""
|
2
|
+
Predefined parameter profiles for Janito.
|
3
|
+
"""
|
4
|
+
from typing import Dict, Any
|
5
|
+
|
6
|
+
# Predefined parameter profiles
|
7
|
+
PROFILES = {
|
8
|
+
"precise": {
|
9
|
+
"temperature": 0.2,
|
10
|
+
"top_p": 0.85,
|
11
|
+
"top_k": 20,
|
12
|
+
"description": "Factual answers, documentation, structured data, avoiding hallucinations"
|
13
|
+
},
|
14
|
+
"balanced": {
|
15
|
+
"temperature": 0.5,
|
16
|
+
"top_p": 0.9,
|
17
|
+
"top_k": 40,
|
18
|
+
"description": "Professional writing, summarization, everyday tasks with moderate creativity"
|
19
|
+
},
|
20
|
+
"conversational": {
|
21
|
+
"temperature": 0.7,
|
22
|
+
"top_p": 0.9,
|
23
|
+
"top_k": 45,
|
24
|
+
"description": "Natural dialogue, educational content, support conversations"
|
25
|
+
},
|
26
|
+
"creative": {
|
27
|
+
"temperature": 0.9,
|
28
|
+
"top_p": 0.95,
|
29
|
+
"top_k": 70,
|
30
|
+
"description": "Storytelling, brainstorming, marketing copy, poetry"
|
31
|
+
},
|
32
|
+
"technical": {
|
33
|
+
"temperature": 0.3,
|
34
|
+
"top_p": 0.95,
|
35
|
+
"top_k": 15,
|
36
|
+
"description": "Code generation, debugging, decision analysis, technical problem-solving"
|
37
|
+
}
|
38
|
+
}
|
@@ -0,0 +1,80 @@
|
|
1
|
+
"""
|
2
|
+
Profile management functions for Janito configuration.
|
3
|
+
"""
|
4
|
+
from typing import Dict, Any
|
5
|
+
|
6
|
+
from .definitions import PROFILES
|
7
|
+
|
8
|
+
def get_available_profiles() -> Dict[str, Dict[str, Any]]:
|
9
|
+
"""
|
10
|
+
Get all available predefined profiles.
|
11
|
+
|
12
|
+
Returns:
|
13
|
+
Dictionary of profile names to profile settings
|
14
|
+
"""
|
15
|
+
return PROFILES
|
16
|
+
|
17
|
+
def get_profile(profile_name: str) -> Dict[str, Any]:
|
18
|
+
"""
|
19
|
+
Get a specific profile by name.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
profile_name: Name of the profile to retrieve
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
Dict containing the profile settings
|
26
|
+
|
27
|
+
Raises:
|
28
|
+
ValueError: If the profile name is not recognized
|
29
|
+
"""
|
30
|
+
profile_name = profile_name.lower()
|
31
|
+
if profile_name not in PROFILES:
|
32
|
+
valid_profiles = ", ".join(PROFILES.keys())
|
33
|
+
raise ValueError(f"Unknown profile: {profile_name}. Valid profiles are: {valid_profiles}")
|
34
|
+
|
35
|
+
return PROFILES[profile_name]
|
36
|
+
|
37
|
+
def create_custom_profile(name: str, temperature: float, description: str = None) -> Dict[str, Any]:
|
38
|
+
"""
|
39
|
+
Create a custom profile with the given parameters.
|
40
|
+
|
41
|
+
Args:
|
42
|
+
name: Name for the custom profile
|
43
|
+
temperature: Temperature value (0.0 to 1.0)
|
44
|
+
description: Optional description for the profile
|
45
|
+
|
46
|
+
Returns:
|
47
|
+
Dict containing the profile settings
|
48
|
+
|
49
|
+
Raises:
|
50
|
+
ValueError: If temperature is not between 0.0 and 1.0
|
51
|
+
"""
|
52
|
+
if temperature < 0.0 or temperature > 1.0:
|
53
|
+
raise ValueError("Temperature must be between 0.0 and 1.0")
|
54
|
+
|
55
|
+
# Determine top_p and top_k based on temperature
|
56
|
+
if temperature <= 0.3:
|
57
|
+
top_p = 0.85
|
58
|
+
top_k = 15
|
59
|
+
elif temperature <= 0.6:
|
60
|
+
top_p = 0.9
|
61
|
+
top_k = 40
|
62
|
+
else:
|
63
|
+
top_p = 0.95
|
64
|
+
top_k = 60
|
65
|
+
|
66
|
+
# Use provided description or generate a default one
|
67
|
+
if description is None:
|
68
|
+
if temperature <= 0.3:
|
69
|
+
description = "Custom precise profile"
|
70
|
+
elif temperature <= 0.6:
|
71
|
+
description = "Custom balanced profile"
|
72
|
+
else:
|
73
|
+
description = "Custom creative profile"
|
74
|
+
|
75
|
+
return {
|
76
|
+
"temperature": temperature,
|
77
|
+
"top_p": top_p,
|
78
|
+
"top_k": top_k,
|
79
|
+
"description": description
|
80
|
+
}
|
@@ -1,28 +1,34 @@
|
|
1
1
|
You are a {{ role }}, using the name Janito .
|
2
2
|
You will be assisting an user using a computer system on a {{ platform }} platform.
|
3
3
|
You can find more about the current project using the tools in the workspace directory.
|
4
|
-
If the question is related to the project, use the tools using the relative path
|
4
|
+
If the question is related to the project, use the tools using the relative path, ./filename instead of /filename.
|
5
5
|
|
6
6
|
If creating or editing files with a large number of lines, organize them into smaller files.
|
7
7
|
If creating or editing files in an existing directory check surrounding files for the used patterns.
|
8
8
|
|
9
|
-
# Structure Discovery (
|
10
|
-
Always start exploring the project by viewing for the file
|
9
|
+
# Structure Discovery (docs/STRUCTURE.md from current directory)
|
10
|
+
Always start exploring the project by viewing for the file docs/STRUCTURE.md.
|
11
11
|
Do not track files or directories wich are in .gitignore in the structure.
|
12
12
|
At the end of responding to the user, update the structure file based on the files and directories you have interacted with,
|
13
13
|
be precise focusing on the most important files and directories, avoid adding extra information like architecture or design patterns.
|
14
14
|
|
15
15
|
# Tools
|
16
16
|
The bash tool does not support commands which will require user input.
|
17
|
+
Use the bash tool to get the current date or time when needed.
|
17
18
|
Prefer the str_replace_editor tool to view directories and file contents.
|
18
19
|
|
19
|
-
|
20
|
+
<IMPORTANT>
|
20
21
|
Call the user_prompt tool when:
|
21
22
|
- There are multiple options to apply a certain change
|
22
23
|
- The next operation risk is moderated or high
|
23
24
|
- The implementation plan is complex, requiring a review
|
24
25
|
Proceed according to the user answer.
|
25
|
-
|
26
|
+
</IMPORTANT>
|
26
27
|
|
27
28
|
When changing code in Python files, be mindful about the need to review the imports specially when new type hints are used (eg. Optional, Tuple, List, Dict, etc).
|
28
|
-
After performing changes to a project in interfaces which are exposed to the user,
|
29
|
+
After performing changes to a project in interfaces which are exposed to the user, respond to the user with a short summary on how to verify the changes. eg. "run cmd xpto", prefer to provide a command to run instead of a description.
|
30
|
+
When displaying commands in instructions to the user, consider their platform.
|
31
|
+
When creating html pages which refer to images that should be manually placed by the user, instead of broken links provide a frame with a placeholder image.
|
32
|
+
|
33
|
+
If STRUCTURE.md was updated add it to the list of files to be committed.
|
34
|
+
After significant changes, run git commit with a message describing the changes made.
|
janito/tools/__init__.py
CHANGED
@@ -10,11 +10,12 @@ from .replace_file import replace_file
|
|
10
10
|
from .prompt_user import prompt_user
|
11
11
|
from .move_file import move_file
|
12
12
|
from janito.tools.fetch_webpage import fetch_webpage
|
13
|
+
from .think import think
|
13
14
|
from .usage_tracker import get_tracker, reset_tracker, print_usage_stats
|
14
15
|
from janito.config import get_config
|
15
16
|
|
16
17
|
__all__ = ["str_replace_editor", "find_files", "delete_file", "search_text", "replace_file",
|
17
|
-
"prompt_user", "move_file", "fetch_webpage", "get_tools",
|
18
|
+
"prompt_user", "move_file", "fetch_webpage", "think", "get_tools",
|
18
19
|
"get_tracker", "reset_tracker", "print_usage_stats"]
|
19
20
|
|
20
21
|
def get_tools():
|
@@ -23,10 +24,15 @@ def get_tools():
|
|
23
24
|
|
24
25
|
Returns:
|
25
26
|
List of tool functions (excluding str_replace_editor which is passed separately)
|
27
|
+
If no_tools mode is enabled, returns an empty list
|
26
28
|
If ask_mode is enabled, only returns tools that don't perform changes
|
27
29
|
"""
|
30
|
+
# If no_tools mode is enabled, return an empty list
|
31
|
+
if get_config().no_tools:
|
32
|
+
return []
|
33
|
+
|
28
34
|
# Tools that only read or view but don't modify anything
|
29
|
-
read_only_tools = [find_files, search_text, prompt_user, fetch_webpage]
|
35
|
+
read_only_tools = [find_files, search_text, prompt_user, fetch_webpage, think]
|
30
36
|
|
31
37
|
# Tools that modify the filesystem
|
32
38
|
write_tools = [delete_file, replace_file, move_file]
|
janito/tools/bash/bash.py
CHANGED
@@ -3,6 +3,9 @@ from typing import Tuple
|
|
3
3
|
import threading
|
4
4
|
import platform
|
5
5
|
import re
|
6
|
+
import queue
|
7
|
+
import signal
|
8
|
+
import time
|
6
9
|
from janito.config import get_config
|
7
10
|
from janito.tools.usage_tracker import get_tracker
|
8
11
|
from janito.tools.rich_console import console, print_info
|
@@ -16,6 +19,42 @@ else:
|
|
16
19
|
# Global instance of PersistentBash to maintain state between calls
|
17
20
|
_bash_session = None
|
18
21
|
_session_lock = threading.RLock() # Use RLock to allow reentrant locking
|
22
|
+
_current_bash_thread = None
|
23
|
+
_command_interrupted = False
|
24
|
+
|
25
|
+
def _execute_bash_command(command, result_queue):
|
26
|
+
"""
|
27
|
+
Execute a bash command in a separate thread.
|
28
|
+
|
29
|
+
Args:
|
30
|
+
command: The bash command to execute
|
31
|
+
result_queue: Queue to store the result
|
32
|
+
"""
|
33
|
+
global _bash_session, _command_interrupted
|
34
|
+
|
35
|
+
try:
|
36
|
+
# Execute the command - output will be printed to console in real-time
|
37
|
+
output = _bash_session.execute(command)
|
38
|
+
|
39
|
+
# Put the result in the queue if the command wasn't interrupted
|
40
|
+
if not _command_interrupted:
|
41
|
+
result_queue.put((output, False))
|
42
|
+
except Exception as e:
|
43
|
+
# Handle any exceptions that might occur
|
44
|
+
error_message = f"Error executing bash command: {str(e)}"
|
45
|
+
console.print(error_message, style="red bold")
|
46
|
+
result_queue.put((error_message, True))
|
47
|
+
|
48
|
+
def _keyboard_interrupt_handler(signum, frame):
|
49
|
+
"""
|
50
|
+
Handle keyboard interrupt (Ctrl+C) by setting the interrupt flag.
|
51
|
+
"""
|
52
|
+
global _command_interrupted
|
53
|
+
_command_interrupted = True
|
54
|
+
console.print("\n[bold red]Command interrupted by user (Ctrl+C)[/bold red]")
|
55
|
+
|
56
|
+
# Restore the default signal handler
|
57
|
+
signal.signal(signal.SIGINT, original_sigint_handler)
|
19
58
|
|
20
59
|
def bash_tool(command: str, restart: Optional[bool] = False) -> Tuple[str, bool]:
|
21
60
|
"""
|
@@ -23,6 +62,7 @@ def bash_tool(command: str, restart: Optional[bool] = False) -> Tuple[str, bool]
|
|
23
62
|
The appropriate implementation (Windows or Unix) is selected based on the detected platform.
|
24
63
|
When in ask mode, only read-only commands are allowed.
|
25
64
|
Output is printed to the console in real-time as it's received.
|
65
|
+
Command runs in a background thread, allowing Ctrl+C to interrupt just the command.
|
26
66
|
|
27
67
|
Args:
|
28
68
|
command: The bash command to execute
|
@@ -37,7 +77,9 @@ def bash_tool(command: str, restart: Optional[bool] = False) -> Tuple[str, bool]
|
|
37
77
|
# Only print command if not in trust mode
|
38
78
|
if not get_config().trust_mode:
|
39
79
|
print_info(f"{command}", "Bash Run")
|
40
|
-
|
80
|
+
|
81
|
+
global _bash_session, _current_bash_thread, _command_interrupted, original_sigint_handler
|
82
|
+
_command_interrupted = False
|
41
83
|
|
42
84
|
# Check if in ask mode and if the command might modify files
|
43
85
|
if get_config().ask_mode:
|
@@ -65,20 +107,51 @@ def bash_tool(command: str, restart: Optional[bool] = False) -> Tuple[str, bool]
|
|
65
107
|
_bash_session = PersistentBash(bash_path=gitbash_path)
|
66
108
|
|
67
109
|
try:
|
68
|
-
#
|
69
|
-
|
110
|
+
# Create a queue to get the result from the thread
|
111
|
+
result_queue = queue.Queue()
|
112
|
+
|
113
|
+
# Save the original SIGINT handler
|
114
|
+
original_sigint_handler = signal.getsignal(signal.SIGINT)
|
115
|
+
|
116
|
+
# Set our custom SIGINT handler
|
117
|
+
signal.signal(signal.SIGINT, _keyboard_interrupt_handler)
|
118
|
+
|
119
|
+
# Create and start the thread
|
120
|
+
_current_bash_thread = threading.Thread(
|
121
|
+
target=_execute_bash_command,
|
122
|
+
args=(command, result_queue)
|
123
|
+
)
|
124
|
+
_current_bash_thread.daemon = True
|
125
|
+
_current_bash_thread.start()
|
126
|
+
|
127
|
+
# Wait for the thread to complete or for an interrupt
|
128
|
+
while _current_bash_thread.is_alive() and not _command_interrupted:
|
129
|
+
_current_bash_thread.join(0.1) # Check every 100ms
|
130
|
+
|
131
|
+
# If the command was interrupted, return a message
|
132
|
+
if _command_interrupted:
|
133
|
+
# Restore the original signal handler
|
134
|
+
signal.signal(signal.SIGINT, original_sigint_handler)
|
135
|
+
return ("Command was interrupted by Ctrl+C", True)
|
136
|
+
|
137
|
+
# Get the result from the queue
|
138
|
+
output, is_error = result_queue.get(timeout=1)
|
139
|
+
|
140
|
+
# Restore the original signal handler
|
141
|
+
signal.signal(signal.SIGINT, original_sigint_handler)
|
70
142
|
|
71
143
|
# Track bash command execution
|
72
144
|
get_tracker().increment('bash_commands')
|
73
145
|
|
74
|
-
#
|
75
|
-
is_error = False
|
76
|
-
|
77
|
-
# Return the output as a string (even though it was already printed in real-time)
|
146
|
+
# Return the output
|
78
147
|
return output, is_error
|
79
148
|
|
80
149
|
except Exception as e:
|
81
150
|
# Handle any exceptions that might occur
|
82
151
|
error_message = f"Error executing bash command: {str(e)}"
|
83
152
|
console.print(error_message, style="red bold")
|
153
|
+
|
154
|
+
# Restore the original signal handler
|
155
|
+
signal.signal(signal.SIGINT, original_sigint_handler)
|
156
|
+
|
84
157
|
return error_message, True
|
@@ -132,7 +132,38 @@ class PersistentBash:
|
|
132
132
|
start_time = time.time()
|
133
133
|
max_wait = timeout if timeout is not None else 3600 # Default to 1 hour if no timeout
|
134
134
|
|
135
|
+
# Check if we're being run from the main bash_tool function
|
136
|
+
# which will handle interruption
|
137
|
+
try:
|
138
|
+
from janito.tools.bash.bash import _command_interrupted
|
139
|
+
except ImportError:
|
140
|
+
_command_interrupted = False
|
141
|
+
|
135
142
|
while time.time() - start_time < max_wait + 5: # Add buffer time
|
143
|
+
# Check if we've been interrupted
|
144
|
+
if '_command_interrupted' in globals() and _command_interrupted:
|
145
|
+
# Send Ctrl+C to the running process
|
146
|
+
if self.process and self.process.poll() is None:
|
147
|
+
try:
|
148
|
+
# Send interrupt signal to the process group
|
149
|
+
import os
|
150
|
+
import signal
|
151
|
+
pgid = os.getpgid(self.process.pid)
|
152
|
+
os.killpg(pgid, signal.SIGINT)
|
153
|
+
except:
|
154
|
+
pass
|
155
|
+
|
156
|
+
# Add message to output
|
157
|
+
interrupt_msg = "Command interrupted by user (Ctrl+C)"
|
158
|
+
console.print(f"[bold red]{interrupt_msg}[/bold red]")
|
159
|
+
output_lines.append(interrupt_msg)
|
160
|
+
|
161
|
+
# Reset the bash session
|
162
|
+
self.close()
|
163
|
+
self.start_process()
|
164
|
+
|
165
|
+
break
|
166
|
+
|
136
167
|
try:
|
137
168
|
line = self.process.stdout.readline().rstrip('\r\n')
|
138
169
|
if end_marker in line:
|
@@ -152,7 +183,7 @@ class PersistentBash:
|
|
152
183
|
continue
|
153
184
|
|
154
185
|
# Check for timeout
|
155
|
-
if time.time() - start_time >= max_wait + 5:
|
186
|
+
if time.time() - start_time >= max_wait + 5 and not _command_interrupted:
|
156
187
|
timeout_msg = f"Error: Command timed out after {max_wait} seconds"
|
157
188
|
console.print(timeout_msg, style="red bold")
|
158
189
|
output_lines.append(timeout_msg)
|
@@ -216,7 +216,40 @@ class PersistentBash:
|
|
216
216
|
start_time = time.time()
|
217
217
|
max_wait = timeout if timeout is not None else 3600 # Default to 1 hour if no timeout
|
218
218
|
|
219
|
+
# Check if we're being run from the main bash_tool function
|
220
|
+
# which will handle interruption
|
221
|
+
try:
|
222
|
+
from janito.tools.bash.bash import _command_interrupted
|
223
|
+
except ImportError:
|
224
|
+
_command_interrupted = False
|
225
|
+
|
219
226
|
while time.time() - start_time < max_wait + 5: # Add buffer time
|
227
|
+
# Check if we've been interrupted
|
228
|
+
if '_command_interrupted' in globals() and _command_interrupted:
|
229
|
+
# Send Ctrl+C to the running process
|
230
|
+
if self.process and self.process.poll() is None:
|
231
|
+
try:
|
232
|
+
# On Windows, we need to use CTRL_C_EVENT
|
233
|
+
import signal
|
234
|
+
self.process.send_signal(signal.CTRL_C_EVENT)
|
235
|
+
except:
|
236
|
+
# If that fails, try to terminate the process
|
237
|
+
try:
|
238
|
+
self.process.terminate()
|
239
|
+
except:
|
240
|
+
pass
|
241
|
+
|
242
|
+
# Add message to output
|
243
|
+
interrupt_msg = "Command interrupted by user (Ctrl+C)"
|
244
|
+
console.print(f"[bold red]{interrupt_msg}[/bold red]")
|
245
|
+
output_lines.append(interrupt_msg)
|
246
|
+
|
247
|
+
# Reset the bash session
|
248
|
+
self.close()
|
249
|
+
self.start_process()
|
250
|
+
|
251
|
+
break
|
252
|
+
|
220
253
|
try:
|
221
254
|
line = self.stdout.readline().rstrip('\r\n')
|
222
255
|
if end_marker in line:
|
@@ -243,7 +276,7 @@ class PersistentBash:
|
|
243
276
|
continue
|
244
277
|
|
245
278
|
# Check for timeout
|
246
|
-
if time.time() - start_time >= max_wait + 5:
|
279
|
+
if time.time() - start_time >= max_wait + 5 and not _command_interrupted:
|
247
280
|
timeout_msg = f"Error: Command timed out after {max_wait} seconds"
|
248
281
|
console.print(timeout_msg, style="red bold")
|
249
282
|
output_lines.append(timeout_msg)
|
@@ -1,34 +1,23 @@
|
|
1
|
-
"""
|
2
|
-
Webpage Content Extractor Package
|
3
|
-
|
4
|
-
A
|
5
|
-
for processing with LLMs. Features include:
|
6
|
-
-
|
7
|
-
-
|
8
|
-
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
from janito.tools.fetch_webpage.extractors import extract_clean_text, extract_targeted_content, extract_structured_content
|
24
|
-
from janito.tools.fetch_webpage.chunking import chunk_large_content
|
25
|
-
|
26
|
-
__all__ = [
|
27
|
-
'fetch_webpage',
|
28
|
-
'fetch_and_extract',
|
29
|
-
'fetch_and_extract_news_aggregator',
|
30
|
-
'extract_clean_text',
|
31
|
-
'extract_targeted_content',
|
32
|
-
'extract_structured_content',
|
33
|
-
'chunk_large_content'
|
1
|
+
"""
|
2
|
+
Webpage Content Extractor Package
|
3
|
+
|
4
|
+
A simplified tool for extracting clean, relevant content from web pages
|
5
|
+
for processing with LLMs. Features include:
|
6
|
+
- Streamlined content extraction using BeautifulSoup
|
7
|
+
- Clean HTML text extraction
|
8
|
+
- Efficient content chunking
|
9
|
+
|
10
|
+
Dependencies:
|
11
|
+
- requests
|
12
|
+
- beautifulsoup4
|
13
|
+
|
14
|
+
Author: Claude (Anthropic)
|
15
|
+
"""
|
16
|
+
|
17
|
+
from janito.tools.fetch_webpage.core import fetch_webpage, fetch_and_extract, chunk_content
|
18
|
+
|
19
|
+
__all__ = [
|
20
|
+
'fetch_webpage',
|
21
|
+
'fetch_and_extract',
|
22
|
+
'chunk_content'
|
34
23
|
]
|