janito 0.10.1__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. janito/__init__.py +1 -1
  2. janito/__main__.py +3 -147
  3. janito/callbacks.py +13 -109
  4. janito/cli/__init__.py +6 -0
  5. janito/cli/agent.py +287 -0
  6. janito/cli/app.py +86 -0
  7. janito/cli/commands.py +329 -0
  8. janito/cli/output.py +29 -0
  9. janito/cli/utils.py +22 -0
  10. janito/config.py +338 -63
  11. janito/data/instructions_template.txt +27 -0
  12. janito/token_report.py +124 -43
  13. janito/tools/__init__.py +29 -1
  14. janito/tools/bash/bash.py +82 -0
  15. janito/tools/bash/unix_persistent_bash.py +182 -0
  16. janito/tools/bash/win_persistent_bash.py +306 -0
  17. janito/tools/decorators.py +90 -84
  18. janito/tools/delete_file.py +65 -44
  19. janito/tools/fetch_webpage/__init__.py +34 -0
  20. janito/tools/fetch_webpage/chunking.py +76 -0
  21. janito/tools/fetch_webpage/core.py +155 -0
  22. janito/tools/fetch_webpage/extractors.py +276 -0
  23. janito/tools/fetch_webpage/news.py +137 -0
  24. janito/tools/fetch_webpage/utils.py +108 -0
  25. janito/tools/find_files.py +108 -42
  26. janito/tools/move_file.py +72 -0
  27. janito/tools/prompt_user.py +57 -0
  28. janito/tools/replace_file.py +63 -0
  29. janito/tools/rich_console.py +139 -0
  30. janito/tools/search_text.py +33 -21
  31. janito/tools/str_replace_editor/editor.py +55 -43
  32. janito/tools/str_replace_editor/handlers/__init__.py +16 -0
  33. janito/tools/str_replace_editor/handlers/create.py +60 -0
  34. janito/tools/str_replace_editor/handlers/insert.py +100 -0
  35. janito/tools/str_replace_editor/handlers/str_replace.py +92 -0
  36. janito/tools/str_replace_editor/handlers/undo.py +64 -0
  37. janito/tools/str_replace_editor/handlers/view.py +153 -0
  38. janito/tools/str_replace_editor/utils.py +7 -62
  39. janito/tools/usage_tracker.py +136 -0
  40. janito-0.12.0.dist-info/METADATA +203 -0
  41. janito-0.12.0.dist-info/RECORD +47 -0
  42. janito/cli.py +0 -202
  43. janito/data/instructions.txt +0 -4
  44. janito/tools/str_replace_editor/handlers.py +0 -338
  45. janito-0.10.1.dist-info/METADATA +0 -86
  46. janito-0.10.1.dist-info/RECORD +0 -23
  47. {janito-0.10.1.dist-info → janito-0.12.0.dist-info}/WHEEL +0 -0
  48. {janito-0.10.1.dist-info → janito-0.12.0.dist-info}/entry_points.txt +0 -0
  49. {janito-0.10.1.dist-info → janito-0.12.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,84 +1,90 @@
1
- """
2
- Decorators for janito tools.
3
- """
4
- import functools
5
- import inspect
6
- import string
7
- from typing import Any, Callable, Dict, Optional, Tuple
8
-
9
-
10
- class ToolMetaFormatter(string.Formatter):
11
- """Custom string formatter that handles conditional expressions in format strings."""
12
-
13
- def get_value(self, key, args, kwargs):
14
- """Override to handle conditional expressions."""
15
- if key in kwargs:
16
- return kwargs[key]
17
-
18
- # Try to evaluate the key as a Python expression
19
- try:
20
- # Create a safe local namespace with only the parameters
21
- return eval(key, {"__builtins__": {}}, kwargs)
22
- except Exception:
23
- return f"[{key}]"
24
-
25
-
26
- def tool_meta(label: str):
27
- """
28
- Decorator to add metadata to a tool function.
29
-
30
- Args:
31
- label: A format string that can reference function parameters.
32
- Example: "Finding files {pattern}, on {root_dir}"
33
-
34
- Returns:
35
- Decorated function with metadata attached
36
- """
37
- def decorator(func: Callable):
38
- @functools.wraps(func)
39
- def wrapper(*args, **kwargs):
40
- return func(*args, **kwargs)
41
-
42
- # Attach metadata to the function
43
- wrapper._tool_meta = {
44
- 'label': label
45
- }
46
-
47
- return wrapper
48
-
49
- return decorator
50
-
51
-
52
- def format_tool_label(func: Callable, tool_input: Dict[str, Any]) -> Optional[str]:
53
- """
54
- Format the tool label using the function's parameters.
55
-
56
- Args:
57
- func: The tool function
58
- tool_input: Input parameters for the tool
59
-
60
- Returns:
61
- Formatted label string or None if no label is defined
62
- """
63
- if not hasattr(func, '_tool_meta') or 'label' not in func._tool_meta:
64
- return None
65
-
66
- # Get the label template
67
- label_template = func._tool_meta['label']
68
-
69
- # Special handling for str_replace_editor which uses **kwargs
70
- if func.__name__ == 'str_replace_editor':
71
- # Extract command and file_path from tool_input if they exist
72
- command = tool_input.get('command', 'unknown')
73
- file_path = tool_input.get('file_path', '')
74
-
75
- # Simple string replacement for the common case
76
- if '{command}' in label_template and '{file_path}' in label_template:
77
- return label_template.replace('{command}', command).replace('{file_path}', file_path)
78
-
79
- # Format the label with the parameters
80
- try:
81
- formatter = ToolMetaFormatter()
82
- return formatter.format(label_template, **tool_input)
83
- except Exception as e:
84
- return f"{func.__name__}"
1
+ """
2
+ Decorators for janito tools.
3
+ """
4
+ import functools
5
+ import string
6
+ from typing import Any, Callable, Dict, Optional
7
+
8
+
9
+ class ToolMetaFormatter(string.Formatter):
10
+ """Custom string formatter that handles conditional expressions in format strings."""
11
+
12
+ def get_value(self, key, args, kwargs):
13
+ """Override to handle conditional expressions."""
14
+ if key in kwargs:
15
+ return kwargs[key]
16
+
17
+ # Try to evaluate the key as a Python expression
18
+ try:
19
+ # Create a safe local namespace with only the parameters
20
+ return eval(key, {"__builtins__": {}}, kwargs)
21
+ except Exception:
22
+ return f"[{key}]"
23
+
24
+
25
+ def tool_meta(label: str):
26
+ """
27
+ Decorator to add metadata to a tool function.
28
+
29
+ Args:
30
+ label: A format string that can reference function parameters.
31
+ Example: "Finding files {pattern}, on {root_dir}"
32
+
33
+ Returns:
34
+ Decorated function with metadata attached
35
+ """
36
+ def decorator(func: Callable):
37
+ @functools.wraps(func)
38
+ def wrapper(*args, **kwargs):
39
+ return func(*args, **kwargs)
40
+
41
+ # Attach metadata to the function
42
+ wrapper._tool_meta = {
43
+ 'label': label
44
+ }
45
+
46
+ return wrapper
47
+
48
+ return decorator
49
+
50
+
51
+ def tool(func: Callable):
52
+ """
53
+ Basic decorator for tool functions.
54
+
55
+ This decorator marks a function as a tool and can be used for
56
+ simpler tools that don't need additional metadata.
57
+
58
+ Returns:
59
+ Decorated function
60
+ """
61
+ @functools.wraps(func)
62
+ def wrapper(*args, **kwargs):
63
+ return func(*args, **kwargs)
64
+
65
+ return wrapper
66
+
67
+
68
+ def format_tool_label(func: Callable, tool_input: Dict[str, Any]) -> Optional[str]:
69
+ """
70
+ Format the tool label using the function's parameters.
71
+
72
+ Args:
73
+ func: The tool function
74
+ tool_input: Input parameters for the tool
75
+
76
+ Returns:
77
+ Formatted label string or None if no label is defined
78
+ """
79
+ if not hasattr(func, '_tool_meta') or 'label' not in func._tool_meta:
80
+ return None
81
+
82
+ # Get the label template
83
+ label_template = func._tool_meta['label']
84
+
85
+ # Format the label with the parameters
86
+ try:
87
+ formatter = ToolMetaFormatter()
88
+ return formatter.format(label_template, **tool_input)
89
+ except Exception:
90
+ return f"{func.__name__}"
@@ -1,44 +1,65 @@
1
- """
2
- Tool for deleting files through the claudine agent.
3
- """
4
- import os
5
- from pathlib import Path
6
- from typing import Dict, Any, Tuple
7
- from janito.config import get_config
8
- from janito.tools.str_replace_editor.utils import normalize_path
9
- from janito.tools.decorators import tool_meta
10
-
11
-
12
- @tool_meta(label="Deleting file {file_path}")
13
- def delete_file(
14
- file_path: str,
15
- ) -> Tuple[str, bool]:
16
- """
17
- Delete an existing file.
18
-
19
- Args:
20
- file_path: Path to the file to delete, relative to the workspace directory
21
-
22
- Returns:
23
- A tuple containing (message, is_error)
24
- """
25
- # Normalize the file path
26
- path = normalize_path(file_path)
27
-
28
- # Convert to Path object for better path handling
29
- path_obj = Path(path)
30
-
31
- # Check if the file exists
32
- if not path_obj.exists():
33
- return (f"File {path} does not exist.", True)
34
-
35
- # Check if it's a directory
36
- if path_obj.is_dir():
37
- return (f"{path} is a directory, not a file. Use delete_directory for directories.", True)
38
-
39
- # Delete the file
40
- try:
41
- path_obj.unlink()
42
- return (f"Successfully deleted file {path}", False)
43
- except Exception as e:
44
- return (f"Error deleting file {path}: {str(e)}", True)
1
+ """
2
+ Tool for deleting files through the claudine agent.
3
+ """
4
+ from pathlib import Path
5
+ from typing import Tuple
6
+ from janito.tools.str_replace_editor.utils import normalize_path
7
+ from janito.tools.rich_console import print_info, print_success, print_error
8
+ from janito.tools.usage_tracker import track_usage, get_tracker
9
+
10
+
11
+ @track_usage('files_deleted')
12
+ def delete_file(
13
+ file_path: str,
14
+ ) -> Tuple[str, bool]:
15
+ """
16
+ Delete an existing file.
17
+
18
+ Args:
19
+ file_path: Path to the file to delete, relative to the workspace directory
20
+
21
+ Returns:
22
+ A tuple containing (message, is_error)
23
+ """
24
+ print_info(f"Deleting file {file_path}", "Delete Operation")
25
+ # Store the original path for display purposes
26
+ original_path = file_path
27
+
28
+ # Normalize the file path (converts to absolute path)
29
+ path = normalize_path(file_path)
30
+
31
+ # Convert to Path object for better path handling
32
+ path_obj = Path(path)
33
+
34
+ # Check if the file exists
35
+ if not path_obj.exists():
36
+ error_msg = f"File {original_path} does not exist."
37
+ print_error(error_msg, "Error")
38
+ return (error_msg, True)
39
+
40
+ # Check if it's a directory
41
+ if path_obj.is_dir():
42
+ error_msg = f"{original_path} is a directory, not a file. Use delete_directory for directories."
43
+ print_error(error_msg, "Error")
44
+ return (error_msg, True)
45
+
46
+ # Delete the file
47
+ try:
48
+ # Count the number of lines in the file before deleting
49
+ try:
50
+ with open(path_obj, 'r', encoding='utf-8') as f:
51
+ line_count = len(f.readlines())
52
+ # Track negative line delta for deleted file
53
+ get_tracker().increment('lines_delta', -line_count)
54
+ except Exception:
55
+ # If we can't read the file, we can't count lines
56
+ pass
57
+
58
+ path_obj.unlink()
59
+ success_msg = f"Successfully deleted file {original_path}"
60
+ print_success("", "Success")
61
+ return (success_msg, False)
62
+ except Exception as e:
63
+ error_msg = f"Error deleting file {original_path}: {str(e)}"
64
+ print_error(error_msg, "Error")
65
+ return (error_msg, True)
@@ -0,0 +1,34 @@
1
+ """
2
+ Webpage Content Extractor Package
3
+
4
+ A comprehensive tool for extracting clean, relevant content from web pages
5
+ for processing with LLMs. Features include:
6
+ - General content extraction with multiple methods
7
+ - Specialized handling for news aggregator sites
8
+ - Targeted extraction based on specific search strings
9
+ - Chunking for large content
10
+ - Structured content extraction
11
+
12
+ Dependencies:
13
+ - requests
14
+ - beautifulsoup4
15
+ - trafilatura
16
+ - newspaper3k
17
+
18
+ Author: Claude (Anthropic)
19
+ """
20
+
21
+ from janito.tools.fetch_webpage.core import fetch_webpage, fetch_and_extract
22
+ from janito.tools.fetch_webpage.news import fetch_and_extract_news_aggregator
23
+ from janito.tools.fetch_webpage.extractors import extract_clean_text, extract_targeted_content, extract_structured_content
24
+ from janito.tools.fetch_webpage.chunking import chunk_large_content
25
+
26
+ __all__ = [
27
+ 'fetch_webpage',
28
+ 'fetch_and_extract',
29
+ 'fetch_and_extract_news_aggregator',
30
+ 'extract_clean_text',
31
+ 'extract_targeted_content',
32
+ 'extract_structured_content',
33
+ 'chunk_large_content'
34
+ ]
@@ -0,0 +1,76 @@
1
+ """
2
+ Functions for chunking large content into manageable pieces.
3
+ """
4
+
5
+ from typing import List
6
+ from janito.tools.rich_console import print_info, print_success
7
+
8
+
9
+ def chunk_large_content(text: str, chunk_size: int = 4000, overlap: int = 500) -> List[str]:
10
+ """
11
+ Split very large text content into manageable chunks suitable for LLM processing.
12
+
13
+ Args:
14
+ text: The text to chunk
15
+ chunk_size: Target size for each chunk in characters
16
+ overlap: Number of characters to overlap between chunks
17
+
18
+ Returns:
19
+ List of text chunks
20
+ """
21
+ if not text or len(text) <= chunk_size:
22
+ return [text] if text else []
23
+
24
+ print_info(f"Chunking {len(text)} characters of text into ~{chunk_size} character chunks", "Content Chunking")
25
+
26
+ # Try to split on paragraph breaks first
27
+ paragraphs = text.split('\n\n')
28
+ chunks = []
29
+ current_chunk = ""
30
+
31
+ for para in paragraphs:
32
+ # If adding this paragraph would exceed chunk size
33
+ if len(current_chunk) + len(para) + 2 > chunk_size:
34
+ # If current chunk is not empty, add it to chunks
35
+ if current_chunk:
36
+ chunks.append(current_chunk)
37
+ # Start new chunk with overlap from previous chunk
38
+ if overlap > 0 and len(current_chunk) > overlap:
39
+ current_chunk = current_chunk[-overlap:] + "\n\n" + para
40
+ else:
41
+ current_chunk = para
42
+ else:
43
+ # If paragraph itself is bigger than chunk size, split it
44
+ if len(para) > chunk_size:
45
+ words = para.split()
46
+ temp_chunk = ""
47
+ for word in words:
48
+ if len(temp_chunk) + len(word) + 1 > chunk_size:
49
+ chunks.append(temp_chunk)
50
+ # Start new chunk with overlap
51
+ if overlap > 0 and len(temp_chunk) > overlap:
52
+ temp_chunk = temp_chunk[-overlap:] + " " + word
53
+ else:
54
+ temp_chunk = word
55
+ else:
56
+ if temp_chunk:
57
+ temp_chunk += " " + word
58
+ else:
59
+ temp_chunk = word
60
+ if temp_chunk:
61
+ current_chunk = temp_chunk
62
+ else:
63
+ chunks.append(para)
64
+ else:
65
+ # Add paragraph to current chunk
66
+ if current_chunk:
67
+ current_chunk += "\n\n" + para
68
+ else:
69
+ current_chunk = para
70
+
71
+ # Don't forget the last chunk
72
+ if current_chunk:
73
+ chunks.append(current_chunk)
74
+
75
+ print_success(f"Text chunked into {len(chunks)} segments", "Content Chunking")
76
+ return chunks
@@ -0,0 +1,155 @@
1
+ """
2
+ Core functionality for fetching web pages and extracting content.
3
+ """
4
+
5
+ import requests
6
+ from typing import Tuple, List, Optional
7
+ from urllib.parse import urlparse
8
+ from janito.tools.rich_console import print_info, print_success, print_error, print_warning
9
+ from janito.tools.usage_tracker import track_usage
10
+
11
+ from janito.tools.fetch_webpage.extractors import extract_clean_text
12
+ # Import moved to fetch_and_extract function to avoid circular imports
13
+ from janito.tools.fetch_webpage.utils import SITE_SPECIFIC_STRATEGIES
14
+
15
+
16
+ @track_usage('web_requests')
17
+ def fetch_webpage(url: str, headers: dict = None, timeout: int = 30, max_size: int = 5000000,
18
+ target_strings: List[str] = None) -> Tuple[str, bool]:
19
+ """
20
+ Fetch the content of a web page from a given URL.
21
+
22
+ Args:
23
+ url: The URL of the web page to fetch
24
+ headers: Optional HTTP headers to include in the request (default: None)
25
+ timeout: Request timeout in seconds (default: 30)
26
+ max_size: Maximum size in bytes to download (default: 5MB)
27
+ target_strings: Optional list of strings to target specific content sections
28
+
29
+ Returns:
30
+ A tuple containing (message, is_error)
31
+ """
32
+ print_info(f"Fetching content from URL: {url}", "Web Fetch")
33
+
34
+ try:
35
+ # Set default headers if none provided
36
+ if headers is None:
37
+ headers = {
38
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
39
+ }
40
+
41
+ # Make the HTTP request with streaming enabled
42
+ response = requests.get(url, headers=headers, timeout=timeout, stream=True)
43
+
44
+ # Raise an exception for HTTP errors
45
+ response.raise_for_status()
46
+
47
+ # Check content length before downloading fully
48
+ content_length = response.headers.get('Content-Length')
49
+ if content_length and int(content_length) > max_size:
50
+ warning_msg = f"Web Fetch: Content size ({int(content_length)/1000000:.1f}MB) exceeds max size ({max_size/1000000:.1f}MB). Aborting download."
51
+ print_warning(warning_msg)
52
+ return warning_msg, True
53
+
54
+ # Download content with size limit
55
+ content_bytes = b''
56
+ for chunk in response.iter_content(chunk_size=1024 * 1024): # 1MB chunks
57
+ content_bytes += chunk
58
+ if len(content_bytes) > max_size:
59
+ warning_msg = f"Web Fetch: Download exceeded max size ({max_size/1000000:.1f}MB). Truncating."
60
+ print_warning(warning_msg)
61
+ break
62
+
63
+ # Get the content
64
+ content = content_bytes.decode('utf-8', errors='replace')
65
+
66
+ # If target strings are provided, extract only the relevant sections
67
+ if target_strings and len(target_strings) > 0:
68
+ print_info(f"Targeting specific content using {len(target_strings)} search strings", "Web Fetch")
69
+ from janito.tools.fetch_webpage.extractors import extract_targeted_content
70
+ targeted_content = extract_targeted_content(content, target_strings)
71
+
72
+ if targeted_content:
73
+ print_success(f"Successfully targeted specific content based on search strings", "Web Fetch")
74
+ # Create a summary with first 300 chars of targeted content
75
+ content_preview = targeted_content[:300] + "..." if len(targeted_content) > 300 else targeted_content
76
+ summary = f"Successfully fetched targeted content from {url}\n\nContent preview:\n{content_preview}"
77
+ print_success(f"Successfully fetched targeted content from {url} ({len(targeted_content)} bytes)", "Web Fetch")
78
+ return targeted_content, False
79
+ else:
80
+ print_warning(f"Web Fetch: Could not find content matching the target strings. Returning full content.")
81
+
82
+ # Create a summary message with first 300 chars of content
83
+ content_preview = content[:300] + "..." if len(content) > 300 else content
84
+
85
+ print_success(f"({len(content)} bytes)", "Web Fetch")
86
+
87
+ # Return the full content
88
+ return content, False
89
+
90
+ except requests.exceptions.RequestException as e:
91
+ error_msg = f"Error fetching web page: {str(e)}"
92
+ print_error(error_msg, "Web Fetch Error")
93
+ return error_msg, True
94
+
95
+
96
+ @track_usage('web_content')
97
+ def fetch_and_extract(url: str, extract_method: str = 'trafilatura',
98
+ max_length: int = 10000,
99
+ target_strings: List[str] = None) -> Tuple[str, bool]:
100
+ """
101
+ Fetch a webpage and extract its main content in a format suitable for LLM processing.
102
+
103
+ Args:
104
+ url: The URL to fetch
105
+ extract_method: Content extraction method ('trafilatura', 'newspaper', 'beautifulsoup', 'all')
106
+ max_length: Maximum length of text to return
107
+ target_strings: Optional list of strings to target specific content sections
108
+
109
+ Returns:
110
+ A tuple containing (extracted_content, is_error)
111
+ """
112
+ # Check if this is a news aggregator site that needs special handling
113
+ domain = urlparse(url).netloc
114
+ for site_domain in SITE_SPECIFIC_STRATEGIES.keys():
115
+ if site_domain in domain:
116
+ print_info(f"Detected news aggregator site: {domain}. Using specialized extraction.", "Content Extraction")
117
+ # Import here to avoid circular imports
118
+ from janito.tools.fetch_webpage.news import fetch_and_extract_news_aggregator
119
+ return fetch_and_extract_news_aggregator(url)
120
+
121
+ # If target strings are provided, pass them directly to fetch_webpage for efficiency
122
+ if target_strings and len(target_strings) > 0:
123
+ html_content, is_error = fetch_webpage(url, target_strings=target_strings)
124
+ else:
125
+ html_content, is_error = fetch_webpage(url)
126
+
127
+ if is_error:
128
+ return html_content, True
129
+
130
+ extracted_text = extract_clean_text(html_content, method=extract_method, url=url)
131
+
132
+ if not extracted_text or len(extracted_text) < 100:
133
+ return f"Could not extract meaningful content from {url}", True
134
+
135
+ # If target strings were provided but not already handled by fetch_webpage
136
+ if target_strings and len(target_strings) > 0 and not any(target in extracted_text for target in target_strings if len(target) > 3):
137
+ from janito.tools.fetch_webpage.extractors import extract_targeted_content
138
+ targeted_content = extract_targeted_content(html_content, target_strings)
139
+ if targeted_content:
140
+ print_success(f"Successfully extracted targeted content based on {len(target_strings)} search strings",
141
+ "Targeted Extraction")
142
+ extracted_text = targeted_content
143
+
144
+ # Truncate if needed
145
+ if len(extracted_text) > max_length:
146
+ print_info(f"Truncating content from {len(extracted_text)} to {max_length} characters", "Content Extraction")
147
+ extracted_text = extracted_text[:max_length] + "..."
148
+
149
+ # Check if the content is still too large for an LLM (rough estimate)
150
+ estimated_tokens = len(extracted_text.split())
151
+ if estimated_tokens > 10000: # Conservative estimate for token limits
152
+ print_warning(f"Content Extraction: Extracted content still very large (~{estimated_tokens} words). Consider using chunk_large_content()")
153
+
154
+ print_success(f"Successfully extracted {len(extracted_text)} characters of content", "Content Extraction")
155
+ return extracted_text, False