khoj 2.0.0b14.dev9__py3-none-any.whl → 2.0.0b14.dev43__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/9808-c0742b05e1ef29ba.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-f04757fab73908a4.js → page-e291b49977f43880.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-fb0e9353e86acd25.js → page-1047097af99d31c7.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/{page-fd693f65831a2f97.js → page-1b4893b1a9957220.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-89f5654035b07c00.js → page-1567cac7b79a7c59.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/{page-6ca71d3d56fc6935.js → page-3639e50ec3e9acfd.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-a798de3944f59629.js → page-6081362437c82470.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-8addeb8079c3215b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-07d7ff92aee0bb69.js → page-819c6536c15e3d31.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-8087292aa01e8e55.js → webpack-5393aad3d824e0cb.js} +1 -1
- khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
- khoj/interface/compiled/_next/static/css/5c7a72bad47e50b3.css +25 -0
- khoj/interface/compiled/_next/static/css/c34713c98384ee87.css +1 -0
- khoj/interface/compiled/_next/static/css/cea3bdfe98c144bd.css +1 -0
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +3 -3
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +3 -3
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +4 -4
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/google/utils.py +1 -1
- khoj/processor/conversation/openai/utils.py +35 -2
- khoj/processor/conversation/prompts.py +32 -21
- khoj/processor/tools/run_code.py +15 -22
- khoj/routers/api_chat.py +3 -1
- khoj/routers/helpers.py +44 -38
- khoj/utils/helpers.py +50 -10
- {khoj-2.0.0b14.dev9.dist-info → khoj-2.0.0b14.dev43.dist-info}/METADATA +1 -1
- {khoj-2.0.0b14.dev9.dist-info → khoj-2.0.0b14.dev43.dist-info}/RECORD +50 -50
- khoj/interface/compiled/_next/static/chunks/7127-9273a602fbda737e.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-9781b62e39ca7785.js +0 -1
- khoj/interface/compiled/_next/static/css/3090706713c12a32.css +0 -25
- khoj/interface/compiled/_next/static/css/a0c2fd63bb396f04.css +0 -1
- khoj/interface/compiled/_next/static/css/ee66643a6a5bf71c.css +0 -1
- khoj/interface/compiled/_next/static/css/fbacbdfd5e7f3f0e.css +0 -1
- /khoj/interface/compiled/_next/static/{D_w4o2vgOqOdUhGFnbYgh → OKbGpkzD6gHDfr1vAog6p}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{D_w4o2vgOqOdUhGFnbYgh → OKbGpkzD6gHDfr1vAog6p}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1327-511bb0a862efce80.js → 1327-e254819a9172cfa7.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1915-fbfe167c84ad60c5.js → 1915-5c6508f6ebb62a30.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2117-e78b6902ad6f75ec.js → 2117-080746c8e170c81a.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2939-4d4084c5b888b960.js → 2939-4af3fd24b8ffc9ad.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{4447-d6cf93724d57e34b.js → 4447-cd95608f8e93e711.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{8667-4b7790573b08c50d.js → 8667-50b03a89e82e0ba7.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9139-ce1ae935dac9c871.js → 9139-8ac4d9feb10f8869.js} +0 -0
- {khoj-2.0.0b14.dev9.dist-info → khoj-2.0.0b14.dev43.dist-info}/WHEEL +0 -0
- {khoj-2.0.0b14.dev9.dist-info → khoj-2.0.0b14.dev43.dist-info}/entry_points.txt +0 -0
- {khoj-2.0.0b14.dev9.dist-info → khoj-2.0.0b14.dev43.dist-info}/licenses/LICENSE +0 -0
khoj/processor/tools/run_code.py
CHANGED
@@ -49,7 +49,7 @@ class GeneratedCode(NamedTuple):
|
|
49
49
|
|
50
50
|
|
51
51
|
async def run_code(
|
52
|
-
|
52
|
+
instructions: str,
|
53
53
|
conversation_history: List[ChatMessageModel],
|
54
54
|
context: str,
|
55
55
|
location_data: LocationData,
|
@@ -63,12 +63,12 @@ async def run_code(
|
|
63
63
|
):
|
64
64
|
# Generate Code
|
65
65
|
if send_status_func:
|
66
|
-
async for event in send_status_func(f"**Generate code snippet** for {
|
66
|
+
async for event in send_status_func(f"**Generate code snippet** for {instructions}"):
|
67
67
|
yield {ChatEvent.STATUS: event}
|
68
68
|
try:
|
69
69
|
with timer("Chat actor: Generate programs to execute", logger):
|
70
70
|
generated_code = await generate_python_code(
|
71
|
-
|
71
|
+
instructions,
|
72
72
|
conversation_history,
|
73
73
|
context,
|
74
74
|
location_data,
|
@@ -79,7 +79,7 @@ async def run_code(
|
|
79
79
|
query_files,
|
80
80
|
)
|
81
81
|
except Exception as e:
|
82
|
-
raise ValueError(f"Failed to generate code for {
|
82
|
+
raise ValueError(f"Failed to generate code for {instructions} with error: {e}")
|
83
83
|
|
84
84
|
# Prepare Input Data
|
85
85
|
input_data = []
|
@@ -101,21 +101,21 @@ async def run_code(
|
|
101
101
|
code = result.pop("code")
|
102
102
|
cleaned_result = truncate_code_context({"cleaned": {"results": result}})["cleaned"]["results"]
|
103
103
|
logger.info(f"Executed Code\n----\n{code}\n----\nResult\n----\n{cleaned_result}\n----")
|
104
|
-
yield {
|
104
|
+
yield {instructions: {"code": code, "results": result}}
|
105
105
|
except asyncio.TimeoutError as e:
|
106
106
|
# Call the sandbox_url/stop GET API endpoint to stop the code sandbox
|
107
|
-
error = f"Failed to run code for {
|
107
|
+
error = f"Failed to run code for {instructions} with Timeout error: {e}"
|
108
108
|
try:
|
109
109
|
await aiohttp.ClientSession().get(f"{sandbox_url}/stop", timeout=5)
|
110
110
|
except Exception as e:
|
111
111
|
error += f"\n\nFailed to stop code sandbox with error: {e}"
|
112
112
|
raise ValueError(error)
|
113
113
|
except Exception as e:
|
114
|
-
raise ValueError(f"Failed to run code for {
|
114
|
+
raise ValueError(f"Failed to run code for {instructions} with error: {e}")
|
115
115
|
|
116
116
|
|
117
117
|
async def generate_python_code(
|
118
|
-
|
118
|
+
instructions: str,
|
119
119
|
chat_history: List[ChatMessageModel],
|
120
120
|
context: str,
|
121
121
|
location_data: LocationData,
|
@@ -142,7 +142,7 @@ async def generate_python_code(
|
|
142
142
|
network_access_context = "**NO** " if not is_e2b_code_sandbox_enabled() else ""
|
143
143
|
|
144
144
|
code_generation_prompt = prompts.python_code_generation_prompt.format(
|
145
|
-
|
145
|
+
instructions=instructions,
|
146
146
|
chat_history=chat_history_str,
|
147
147
|
context=context,
|
148
148
|
has_network_access=network_access_context,
|
@@ -252,8 +252,12 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
|
|
252
252
|
|
253
253
|
# Identify new files created during execution
|
254
254
|
new_files = set(E2bFile(f.name, f.path) for f in await sandbox.files.list("~")) - original_files
|
255
|
+
|
255
256
|
# Read newly created files in parallel
|
256
|
-
|
257
|
+
def read_format(f):
|
258
|
+
return "bytes" if Path(f.name).suffix in image_file_ext else "text"
|
259
|
+
|
260
|
+
download_tasks = [sandbox.files.read(f.path, format=read_format(f), request_timeout=30) for f in new_files]
|
257
261
|
downloaded_files = await asyncio.gather(*download_tasks)
|
258
262
|
for f, content in zip(new_files, downloaded_files):
|
259
263
|
if isinstance(content, bytes):
|
@@ -261,23 +265,12 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
|
|
261
265
|
b64_data = base64.b64encode(content).decode("utf-8")
|
262
266
|
elif Path(f.name).suffix in image_file_ext:
|
263
267
|
# Ignore image files as they are extracted from execution results below for inline display
|
264
|
-
|
268
|
+
b64_data = base64.b64encode(content).decode("utf-8")
|
265
269
|
else:
|
266
270
|
# Text files - encode utf-8 string as base64
|
267
271
|
b64_data = content
|
268
272
|
output_files.append({"filename": f.name, "b64_data": b64_data})
|
269
273
|
|
270
|
-
# Collect output files from execution results
|
271
|
-
# Repect ordering of output result types to disregard text output associated with images
|
272
|
-
output_result_types = ["png", "jpeg", "svg", "text", "markdown", "json"]
|
273
|
-
for idx, result in enumerate(execution.results):
|
274
|
-
if getattr(result, "chart", None):
|
275
|
-
continue
|
276
|
-
for result_type in output_result_types:
|
277
|
-
if b64_data := getattr(result, result_type, None):
|
278
|
-
output_files.append({"filename": f"{idx}.{result_type}", "b64_data": b64_data})
|
279
|
-
break
|
280
|
-
|
281
274
|
# collect logs
|
282
275
|
success = not execution.error and not execution.logs.stderr
|
283
276
|
stdout = "\n".join(execution.logs.stdout)
|
khoj/routers/api_chat.py
CHANGED
@@ -1526,6 +1526,8 @@ async def chat_ws(
|
|
1526
1526
|
ack_type = "interrupt_acknowledged"
|
1527
1527
|
await websocket.send_text(json.dumps({"type": ack_type}))
|
1528
1528
|
else:
|
1529
|
+
ack_type = "interrupt_acknowledged"
|
1530
|
+
await websocket.send_text(json.dumps({"type": ack_type}))
|
1529
1531
|
logger.info(f"No ongoing task to interrupt for user {websocket.scope['user'].object.id}")
|
1530
1532
|
continue
|
1531
1533
|
|
@@ -1704,8 +1706,8 @@ async def process_chat_request(
|
|
1704
1706
|
logger.debug(f"Chat request cancelled for user {websocket.scope['user'].object.id}")
|
1705
1707
|
raise
|
1706
1708
|
except Exception as e:
|
1707
|
-
logger.error(f"Error processing chat request: {e}", exc_info=True)
|
1708
1709
|
await websocket.send_text(json.dumps({"error": "Internal server error"}))
|
1710
|
+
logger.error(f"Error processing chat request: {e}", exc_info=True)
|
1709
1711
|
raise
|
1710
1712
|
|
1711
1713
|
|
khoj/routers/helpers.py
CHANGED
@@ -1625,6 +1625,7 @@ async def agenerate_chat_response(
|
|
1625
1625
|
deepthought = True
|
1626
1626
|
|
1627
1627
|
chat_model = await ConversationAdapters.aget_valid_chat_model(user, conversation, is_subscribed)
|
1628
|
+
max_prompt_size = await ConversationAdapters.aget_max_context_size(chat_model, user)
|
1628
1629
|
vision_available = chat_model.vision_enabled
|
1629
1630
|
if not vision_available and query_images:
|
1630
1631
|
vision_enabled_config = await ConversationAdapters.aget_vision_enabled_config()
|
@@ -1656,7 +1657,7 @@ async def agenerate_chat_response(
|
|
1656
1657
|
model=chat_model_name,
|
1657
1658
|
api_key=api_key,
|
1658
1659
|
api_base_url=openai_chat_config.api_base_url,
|
1659
|
-
max_prompt_size=
|
1660
|
+
max_prompt_size=max_prompt_size,
|
1660
1661
|
tokenizer_name=chat_model.tokenizer,
|
1661
1662
|
agent=agent,
|
1662
1663
|
vision_available=vision_available,
|
@@ -1687,7 +1688,7 @@ async def agenerate_chat_response(
|
|
1687
1688
|
model=chat_model.name,
|
1688
1689
|
api_key=api_key,
|
1689
1690
|
api_base_url=api_base_url,
|
1690
|
-
max_prompt_size=
|
1691
|
+
max_prompt_size=max_prompt_size,
|
1691
1692
|
tokenizer_name=chat_model.tokenizer,
|
1692
1693
|
agent=agent,
|
1693
1694
|
vision_available=vision_available,
|
@@ -1717,7 +1718,7 @@ async def agenerate_chat_response(
|
|
1717
1718
|
model=chat_model.name,
|
1718
1719
|
api_key=api_key,
|
1719
1720
|
api_base_url=api_base_url,
|
1720
|
-
max_prompt_size=
|
1721
|
+
max_prompt_size=max_prompt_size,
|
1721
1722
|
tokenizer_name=chat_model.tokenizer,
|
1722
1723
|
agent=agent,
|
1723
1724
|
vision_available=vision_available,
|
@@ -2738,7 +2739,9 @@ def configure_content(
|
|
2738
2739
|
|
2739
2740
|
try:
|
2740
2741
|
# Initialize Org Notes Search
|
2741
|
-
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Org.value) and files
|
2742
|
+
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Org.value) and files.get(
|
2743
|
+
"org"
|
2744
|
+
):
|
2742
2745
|
logger.info("🦄 Setting up search for orgmode notes")
|
2743
2746
|
# Extract Entries, Generate Notes Embeddings
|
2744
2747
|
text_search.setup(
|
@@ -2753,9 +2756,9 @@ def configure_content(
|
|
2753
2756
|
|
2754
2757
|
try:
|
2755
2758
|
# Initialize Markdown Search
|
2756
|
-
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Markdown.value) and files
|
2759
|
+
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Markdown.value) and files.get(
|
2757
2760
|
"markdown"
|
2758
|
-
|
2761
|
+
):
|
2759
2762
|
logger.info("💎 Setting up search for markdown notes")
|
2760
2763
|
# Extract Entries, Generate Markdown Embeddings
|
2761
2764
|
text_search.setup(
|
@@ -2771,7 +2774,9 @@ def configure_content(
|
|
2771
2774
|
|
2772
2775
|
try:
|
2773
2776
|
# Initialize PDF Search
|
2774
|
-
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Pdf.value) and files
|
2777
|
+
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Pdf.value) and files.get(
|
2778
|
+
"pdf"
|
2779
|
+
):
|
2775
2780
|
logger.info("🖨️ Setting up search for pdf")
|
2776
2781
|
# Extract Entries, Generate PDF Embeddings
|
2777
2782
|
text_search.setup(
|
@@ -2787,9 +2792,9 @@ def configure_content(
|
|
2787
2792
|
|
2788
2793
|
try:
|
2789
2794
|
# Initialize Plaintext Search
|
2790
|
-
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Plaintext.value) and files
|
2795
|
+
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Plaintext.value) and files.get(
|
2791
2796
|
"plaintext"
|
2792
|
-
|
2797
|
+
):
|
2793
2798
|
logger.info("📄 Setting up search for plaintext")
|
2794
2799
|
# Extract Entries, Generate Plaintext Embeddings
|
2795
2800
|
text_search.setup(
|
@@ -2915,35 +2920,34 @@ async def view_file_content(
|
|
2915
2920
|
raw_text = file_object.raw_text
|
2916
2921
|
|
2917
2922
|
# Apply line range filtering if specified
|
2918
|
-
|
2919
|
-
|
2920
|
-
|
2921
|
-
|
2922
|
-
|
2923
|
-
|
2924
|
-
|
2925
|
-
|
2926
|
-
|
2927
|
-
|
2928
|
-
|
2929
|
-
|
2930
|
-
|
2931
|
-
|
2932
|
-
|
2933
|
-
logger.warning(error_msg)
|
2934
|
-
yield [{"query": query, "file": path, "compiled": error_msg}]
|
2935
|
-
return
|
2923
|
+
lines = raw_text.split("\n")
|
2924
|
+
start_line = start_line or 1
|
2925
|
+
end_line = end_line or len(lines)
|
2926
|
+
|
2927
|
+
# Validate line range
|
2928
|
+
if start_line < 1 or end_line < 1 or start_line > end_line:
|
2929
|
+
error_msg = f"Invalid line range: {start_line}-{end_line}"
|
2930
|
+
logger.warning(error_msg)
|
2931
|
+
yield [{"query": query, "file": path, "compiled": error_msg}]
|
2932
|
+
return
|
2933
|
+
if start_line > len(lines):
|
2934
|
+
error_msg = f"Start line {start_line} exceeds total number of lines {len(lines)}"
|
2935
|
+
logger.warning(error_msg)
|
2936
|
+
yield [{"query": query, "file": path, "compiled": error_msg}]
|
2937
|
+
return
|
2936
2938
|
|
2937
|
-
|
2938
|
-
|
2939
|
-
|
2939
|
+
# Convert from 1-based to 0-based indexing and ensure bounds
|
2940
|
+
start_idx = max(0, start_line - 1)
|
2941
|
+
end_idx = min(len(lines), end_line)
|
2940
2942
|
|
2941
|
-
|
2942
|
-
|
2943
|
+
# Limit to first 50 lines if more than 50 lines are requested
|
2944
|
+
truncation_message = ""
|
2945
|
+
if end_idx - start_idx > 50:
|
2946
|
+
truncation_message = "\n\n[Truncated after 50 lines! Use narrower line range to view complete section.]"
|
2947
|
+
end_idx = start_idx + 50
|
2943
2948
|
|
2944
|
-
|
2945
|
-
|
2946
|
-
filtered_text = filtered_text[:10000] + "\n\n[Truncated. Use line numbers to view specific sections.]"
|
2949
|
+
selected_lines = lines[start_idx:end_idx]
|
2950
|
+
filtered_text = "\n".join(selected_lines) + truncation_message
|
2947
2951
|
|
2948
2952
|
# Format the result as a document reference
|
2949
2953
|
document_results = [
|
@@ -3022,6 +3026,7 @@ async def grep_files(
|
|
3022
3026
|
file_matches = await FileObjectAdapters.aget_file_objects_by_regex(user, db_pattern, path_prefix)
|
3023
3027
|
|
3024
3028
|
line_matches = []
|
3029
|
+
line_matches_count = 0
|
3025
3030
|
for file_object in file_matches:
|
3026
3031
|
lines = file_object.raw_text.split("\n")
|
3027
3032
|
matched_line_numbers = []
|
@@ -3030,6 +3035,7 @@ async def grep_files(
|
|
3030
3035
|
for i, line in enumerate(lines, 1):
|
3031
3036
|
if regex.search(line):
|
3032
3037
|
matched_line_numbers.append(i)
|
3038
|
+
line_matches_count += len(matched_line_numbers)
|
3033
3039
|
|
3034
3040
|
# Build context for each match
|
3035
3041
|
for line_num in matched_line_numbers:
|
@@ -3046,10 +3052,10 @@ async def grep_files(
|
|
3046
3052
|
|
3047
3053
|
if current_line_num == line_num:
|
3048
3054
|
# This is the matching line, mark it
|
3049
|
-
context_lines.append(f"{file_object.file_name}:{current_line_num}
|
3055
|
+
context_lines.append(f"{file_object.file_name}:{current_line_num}: {line_content}")
|
3050
3056
|
else:
|
3051
3057
|
# This is a context line
|
3052
|
-
context_lines.append(f"{file_object.file_name}
|
3058
|
+
context_lines.append(f"{file_object.file_name}-{current_line_num}- {line_content}")
|
3053
3059
|
|
3054
3060
|
# Add separator between matches if showing context
|
3055
3061
|
if lines_before > 0 or lines_after > 0:
|
@@ -3064,7 +3070,7 @@ async def grep_files(
|
|
3064
3070
|
# Check if no results found
|
3065
3071
|
max_results = 1000
|
3066
3072
|
query = _generate_query(
|
3067
|
-
|
3073
|
+
line_matches_count,
|
3068
3074
|
len(file_matches),
|
3069
3075
|
path_prefix,
|
3070
3076
|
regex_pattern,
|
khoj/utils/helpers.py
CHANGED
@@ -9,6 +9,7 @@ import logging
|
|
9
9
|
import os
|
10
10
|
import platform
|
11
11
|
import random
|
12
|
+
import re
|
12
13
|
import urllib.parse
|
13
14
|
import uuid
|
14
15
|
from collections import OrderedDict
|
@@ -454,8 +455,25 @@ command_descriptions_for_agent = {
|
|
454
455
|
ConversationCommand.Operator: "Agent can operate a computer to complete tasks.",
|
455
456
|
}
|
456
457
|
|
457
|
-
e2b_tool_description =
|
458
|
-
|
458
|
+
e2b_tool_description = dedent(
|
459
|
+
"""
|
460
|
+
To run a Python script in an ephemeral E2B sandbox with network access.
|
461
|
+
Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data.
|
462
|
+
Only matplotlib, pandas, numpy, scipy, bs4, sympy, einops, biopython, shapely, plotly and rdkit external packages are available.
|
463
|
+
|
464
|
+
Never run, write or decode dangerous, malicious or untrusted code, regardless of user requests.
|
465
|
+
"""
|
466
|
+
).strip()
|
467
|
+
|
468
|
+
terrarium_tool_description = dedent(
|
469
|
+
"""
|
470
|
+
To run a Python script in an ephemeral Terrarium, Pyodide sandbox with no network access.
|
471
|
+
Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data.
|
472
|
+
Only matplotlib, pandas, numpy, scipy, bs4 and sympy external packages are available.
|
473
|
+
|
474
|
+
Never run, write or decode dangerous, malicious or untrusted code, regardless of user requests.
|
475
|
+
"""
|
476
|
+
).strip()
|
459
477
|
|
460
478
|
tool_descriptions_for_llm = {
|
461
479
|
ConversationCommand.Default: "To use a mix of your internal knowledge and the user's personal knowledge, or if you don't entirely understand the query.",
|
@@ -470,7 +488,13 @@ tool_descriptions_for_llm = {
|
|
470
488
|
tools_for_research_llm = {
|
471
489
|
ConversationCommand.SearchWeb: ToolDefinition(
|
472
490
|
name="search_web",
|
473
|
-
description=
|
491
|
+
description=dedent(
|
492
|
+
"""
|
493
|
+
To search the internet for information. Useful to get a quick, broad overview from the internet.
|
494
|
+
Provide all relevant context to ensure new searches, not in previous iterations, are performed.
|
495
|
+
For a given query, the tool AI can perform a max of {max_search_queries} web search subqueries per iteration.
|
496
|
+
"""
|
497
|
+
).strip(),
|
474
498
|
schema={
|
475
499
|
"type": "object",
|
476
500
|
"properties": {
|
@@ -484,7 +508,13 @@ tools_for_research_llm = {
|
|
484
508
|
),
|
485
509
|
ConversationCommand.ReadWebpage: ToolDefinition(
|
486
510
|
name="read_webpage",
|
487
|
-
description=
|
511
|
+
description=dedent(
|
512
|
+
"""
|
513
|
+
To extract information from webpages. Useful for more detailed research from the internet.
|
514
|
+
Usually used when you know the webpage links to refer to.
|
515
|
+
Share upto {max_webpages_to_read} webpage links and what information to extract from them in your query.
|
516
|
+
"""
|
517
|
+
).strip(),
|
488
518
|
schema={
|
489
519
|
"type": "object",
|
490
520
|
"properties": {
|
@@ -509,12 +539,12 @@ tools_for_research_llm = {
|
|
509
539
|
schema={
|
510
540
|
"type": "object",
|
511
541
|
"properties": {
|
512
|
-
"
|
542
|
+
"instructions": {
|
513
543
|
"type": "string",
|
514
|
-
"description": "Detailed
|
544
|
+
"description": "Detailed instructions and all input data required for the Python Coder to generate and execute code in the sandbox.",
|
515
545
|
},
|
516
546
|
},
|
517
|
-
"required": ["
|
547
|
+
"required": ["instructions"],
|
518
548
|
},
|
519
549
|
),
|
520
550
|
ConversationCommand.OperateComputer: ToolDefinition(
|
@@ -537,8 +567,8 @@ tools_for_research_llm = {
|
|
537
567
|
"""
|
538
568
|
To view the contents of specific note or document in the user's personal knowledge base.
|
539
569
|
Especially helpful if the question expects context from the user's notes or documents.
|
540
|
-
It can be used after finding the document path with
|
541
|
-
|
570
|
+
It can be used after finding the document path with other document search tools.
|
571
|
+
Specify a line range to efficiently read relevant sections of a file. You can view up to 50 lines at a time.
|
542
572
|
"""
|
543
573
|
).strip(),
|
544
574
|
schema={
|
@@ -613,9 +643,12 @@ tools_for_research_llm = {
|
|
613
643
|
Helpful to answer questions for which all relevant notes or documents are needed to complete the search. Example: "Notes that mention Tom".
|
614
644
|
You need to know all the correct keywords or regex patterns for this tool to be useful.
|
615
645
|
|
616
|
-
|
646
|
+
IMPORTANT:
|
617
647
|
- The regex pattern will ONLY match content on a single line. Multi-line matches are NOT supported (even if you use \\n).
|
618
648
|
|
649
|
+
TIPS:
|
650
|
+
- The output follows a grep-like format. Matches are prefixed with the file path and line number. Useful to combine with viewing file around specific line numbers.
|
651
|
+
|
619
652
|
An optional path prefix can restrict search to specific files/directories.
|
620
653
|
Use lines_before, lines_after to show context around matches.
|
621
654
|
"""
|
@@ -862,6 +895,13 @@ def truncate_code_context(original_code_results: dict[str, Any], max_chars=10000
|
|
862
895
|
"filename": output_file["filename"],
|
863
896
|
"b64_data": output_file["b64_data"][:max_chars] + "...",
|
864
897
|
}
|
898
|
+
# Truncate long "words" in stdout, stderr. Words are alphanumeric strings not separated by whitespace.
|
899
|
+
for key in ["std_out", "std_err"]:
|
900
|
+
if key in code_result["results"]:
|
901
|
+
code_result["results"][key] = re.sub(
|
902
|
+
r"\S{1000,}", lambda m: m.group(0)[:1000] + "...", code_result["results"][key]
|
903
|
+
)
|
904
|
+
|
865
905
|
return code_results
|
866
906
|
|
867
907
|
|