khoj 2.0.0b14.dev9__py3-none-any.whl → 2.0.0b14.dev43__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. khoj/interface/compiled/404/index.html +2 -2
  2. khoj/interface/compiled/_next/static/chunks/9808-c0742b05e1ef29ba.js +1 -0
  3. khoj/interface/compiled/_next/static/chunks/app/agents/{page-f04757fab73908a4.js → page-e291b49977f43880.js} +1 -1
  4. khoj/interface/compiled/_next/static/chunks/app/automations/{page-fb0e9353e86acd25.js → page-1047097af99d31c7.js} +1 -1
  5. khoj/interface/compiled/_next/static/chunks/app/chat/{page-fd693f65831a2f97.js → page-1b4893b1a9957220.js} +1 -1
  6. khoj/interface/compiled/_next/static/chunks/app/{page-89f5654035b07c00.js → page-1567cac7b79a7c59.js} +1 -1
  7. khoj/interface/compiled/_next/static/chunks/app/search/{page-6ca71d3d56fc6935.js → page-3639e50ec3e9acfd.js} +1 -1
  8. khoj/interface/compiled/_next/static/chunks/app/settings/{page-a798de3944f59629.js → page-6081362437c82470.js} +1 -1
  9. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-8addeb8079c3215b.js +1 -0
  10. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-07d7ff92aee0bb69.js → page-819c6536c15e3d31.js} +1 -1
  11. khoj/interface/compiled/_next/static/chunks/{webpack-8087292aa01e8e55.js → webpack-5393aad3d824e0cb.js} +1 -1
  12. khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
  13. khoj/interface/compiled/_next/static/css/5c7a72bad47e50b3.css +25 -0
  14. khoj/interface/compiled/_next/static/css/c34713c98384ee87.css +1 -0
  15. khoj/interface/compiled/_next/static/css/cea3bdfe98c144bd.css +1 -0
  16. khoj/interface/compiled/agents/index.html +2 -2
  17. khoj/interface/compiled/agents/index.txt +2 -2
  18. khoj/interface/compiled/automations/index.html +2 -2
  19. khoj/interface/compiled/automations/index.txt +3 -3
  20. khoj/interface/compiled/chat/index.html +2 -2
  21. khoj/interface/compiled/chat/index.txt +3 -3
  22. khoj/interface/compiled/index.html +2 -2
  23. khoj/interface/compiled/index.txt +2 -2
  24. khoj/interface/compiled/search/index.html +2 -2
  25. khoj/interface/compiled/search/index.txt +2 -2
  26. khoj/interface/compiled/settings/index.html +2 -2
  27. khoj/interface/compiled/settings/index.txt +4 -4
  28. khoj/interface/compiled/share/chat/index.html +2 -2
  29. khoj/interface/compiled/share/chat/index.txt +2 -2
  30. khoj/processor/conversation/google/utils.py +1 -1
  31. khoj/processor/conversation/openai/utils.py +35 -2
  32. khoj/processor/conversation/prompts.py +32 -21
  33. khoj/processor/tools/run_code.py +15 -22
  34. khoj/routers/api_chat.py +3 -1
  35. khoj/routers/helpers.py +44 -38
  36. khoj/utils/helpers.py +50 -10
  37. {khoj-2.0.0b14.dev9.dist-info → khoj-2.0.0b14.dev43.dist-info}/METADATA +1 -1
  38. {khoj-2.0.0b14.dev9.dist-info → khoj-2.0.0b14.dev43.dist-info}/RECORD +50 -50
  39. khoj/interface/compiled/_next/static/chunks/7127-9273a602fbda737e.js +0 -1
  40. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-9781b62e39ca7785.js +0 -1
  41. khoj/interface/compiled/_next/static/css/3090706713c12a32.css +0 -25
  42. khoj/interface/compiled/_next/static/css/a0c2fd63bb396f04.css +0 -1
  43. khoj/interface/compiled/_next/static/css/ee66643a6a5bf71c.css +0 -1
  44. khoj/interface/compiled/_next/static/css/fbacbdfd5e7f3f0e.css +0 -1
  45. /khoj/interface/compiled/_next/static/{D_w4o2vgOqOdUhGFnbYgh → OKbGpkzD6gHDfr1vAog6p}/_buildManifest.js +0 -0
  46. /khoj/interface/compiled/_next/static/{D_w4o2vgOqOdUhGFnbYgh → OKbGpkzD6gHDfr1vAog6p}/_ssgManifest.js +0 -0
  47. /khoj/interface/compiled/_next/static/chunks/{1327-511bb0a862efce80.js → 1327-e254819a9172cfa7.js} +0 -0
  48. /khoj/interface/compiled/_next/static/chunks/{1915-fbfe167c84ad60c5.js → 1915-5c6508f6ebb62a30.js} +0 -0
  49. /khoj/interface/compiled/_next/static/chunks/{2117-e78b6902ad6f75ec.js → 2117-080746c8e170c81a.js} +0 -0
  50. /khoj/interface/compiled/_next/static/chunks/{2939-4d4084c5b888b960.js → 2939-4af3fd24b8ffc9ad.js} +0 -0
  51. /khoj/interface/compiled/_next/static/chunks/{4447-d6cf93724d57e34b.js → 4447-cd95608f8e93e711.js} +0 -0
  52. /khoj/interface/compiled/_next/static/chunks/{8667-4b7790573b08c50d.js → 8667-50b03a89e82e0ba7.js} +0 -0
  53. /khoj/interface/compiled/_next/static/chunks/{9139-ce1ae935dac9c871.js → 9139-8ac4d9feb10f8869.js} +0 -0
  54. {khoj-2.0.0b14.dev9.dist-info → khoj-2.0.0b14.dev43.dist-info}/WHEEL +0 -0
  55. {khoj-2.0.0b14.dev9.dist-info → khoj-2.0.0b14.dev43.dist-info}/entry_points.txt +0 -0
  56. {khoj-2.0.0b14.dev9.dist-info → khoj-2.0.0b14.dev43.dist-info}/licenses/LICENSE +0 -0
@@ -49,7 +49,7 @@ class GeneratedCode(NamedTuple):
49
49
 
50
50
 
51
51
  async def run_code(
52
- query: str,
52
+ instructions: str,
53
53
  conversation_history: List[ChatMessageModel],
54
54
  context: str,
55
55
  location_data: LocationData,
@@ -63,12 +63,12 @@ async def run_code(
63
63
  ):
64
64
  # Generate Code
65
65
  if send_status_func:
66
- async for event in send_status_func(f"**Generate code snippet** for {query}"):
66
+ async for event in send_status_func(f"**Generate code snippet** for {instructions}"):
67
67
  yield {ChatEvent.STATUS: event}
68
68
  try:
69
69
  with timer("Chat actor: Generate programs to execute", logger):
70
70
  generated_code = await generate_python_code(
71
- query,
71
+ instructions,
72
72
  conversation_history,
73
73
  context,
74
74
  location_data,
@@ -79,7 +79,7 @@ async def run_code(
79
79
  query_files,
80
80
  )
81
81
  except Exception as e:
82
- raise ValueError(f"Failed to generate code for {query} with error: {e}")
82
+ raise ValueError(f"Failed to generate code for {instructions} with error: {e}")
83
83
 
84
84
  # Prepare Input Data
85
85
  input_data = []
@@ -101,21 +101,21 @@ async def run_code(
101
101
  code = result.pop("code")
102
102
  cleaned_result = truncate_code_context({"cleaned": {"results": result}})["cleaned"]["results"]
103
103
  logger.info(f"Executed Code\n----\n{code}\n----\nResult\n----\n{cleaned_result}\n----")
104
- yield {query: {"code": code, "results": result}}
104
+ yield {instructions: {"code": code, "results": result}}
105
105
  except asyncio.TimeoutError as e:
106
106
  # Call the sandbox_url/stop GET API endpoint to stop the code sandbox
107
- error = f"Failed to run code for {query} with Timeout error: {e}"
107
+ error = f"Failed to run code for {instructions} with Timeout error: {e}"
108
108
  try:
109
109
  await aiohttp.ClientSession().get(f"{sandbox_url}/stop", timeout=5)
110
110
  except Exception as e:
111
111
  error += f"\n\nFailed to stop code sandbox with error: {e}"
112
112
  raise ValueError(error)
113
113
  except Exception as e:
114
- raise ValueError(f"Failed to run code for {query} with error: {e}")
114
+ raise ValueError(f"Failed to run code for {instructions} with error: {e}")
115
115
 
116
116
 
117
117
  async def generate_python_code(
118
- q: str,
118
+ instructions: str,
119
119
  chat_history: List[ChatMessageModel],
120
120
  context: str,
121
121
  location_data: LocationData,
@@ -142,7 +142,7 @@ async def generate_python_code(
142
142
  network_access_context = "**NO** " if not is_e2b_code_sandbox_enabled() else ""
143
143
 
144
144
  code_generation_prompt = prompts.python_code_generation_prompt.format(
145
- query=q,
145
+ instructions=instructions,
146
146
  chat_history=chat_history_str,
147
147
  context=context,
148
148
  has_network_access=network_access_context,
@@ -252,8 +252,12 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
252
252
 
253
253
  # Identify new files created during execution
254
254
  new_files = set(E2bFile(f.name, f.path) for f in await sandbox.files.list("~")) - original_files
255
+
255
256
  # Read newly created files in parallel
256
- download_tasks = [sandbox.files.read(f.path, request_timeout=30) for f in new_files]
257
+ def read_format(f):
258
+ return "bytes" if Path(f.name).suffix in image_file_ext else "text"
259
+
260
+ download_tasks = [sandbox.files.read(f.path, format=read_format(f), request_timeout=30) for f in new_files]
257
261
  downloaded_files = await asyncio.gather(*download_tasks)
258
262
  for f, content in zip(new_files, downloaded_files):
259
263
  if isinstance(content, bytes):
@@ -261,23 +265,12 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
261
265
  b64_data = base64.b64encode(content).decode("utf-8")
262
266
  elif Path(f.name).suffix in image_file_ext:
263
267
  # Ignore image files as they are extracted from execution results below for inline display
264
- continue
268
+ b64_data = base64.b64encode(content).decode("utf-8")
265
269
  else:
266
270
  # Text files - encode utf-8 string as base64
267
271
  b64_data = content
268
272
  output_files.append({"filename": f.name, "b64_data": b64_data})
269
273
 
270
- # Collect output files from execution results
271
- # Repect ordering of output result types to disregard text output associated with images
272
- output_result_types = ["png", "jpeg", "svg", "text", "markdown", "json"]
273
- for idx, result in enumerate(execution.results):
274
- if getattr(result, "chart", None):
275
- continue
276
- for result_type in output_result_types:
277
- if b64_data := getattr(result, result_type, None):
278
- output_files.append({"filename": f"{idx}.{result_type}", "b64_data": b64_data})
279
- break
280
-
281
274
  # collect logs
282
275
  success = not execution.error and not execution.logs.stderr
283
276
  stdout = "\n".join(execution.logs.stdout)
khoj/routers/api_chat.py CHANGED
@@ -1526,6 +1526,8 @@ async def chat_ws(
1526
1526
  ack_type = "interrupt_acknowledged"
1527
1527
  await websocket.send_text(json.dumps({"type": ack_type}))
1528
1528
  else:
1529
+ ack_type = "interrupt_acknowledged"
1530
+ await websocket.send_text(json.dumps({"type": ack_type}))
1529
1531
  logger.info(f"No ongoing task to interrupt for user {websocket.scope['user'].object.id}")
1530
1532
  continue
1531
1533
 
@@ -1704,8 +1706,8 @@ async def process_chat_request(
1704
1706
  logger.debug(f"Chat request cancelled for user {websocket.scope['user'].object.id}")
1705
1707
  raise
1706
1708
  except Exception as e:
1707
- logger.error(f"Error processing chat request: {e}", exc_info=True)
1708
1709
  await websocket.send_text(json.dumps({"error": "Internal server error"}))
1710
+ logger.error(f"Error processing chat request: {e}", exc_info=True)
1709
1711
  raise
1710
1712
 
1711
1713
 
khoj/routers/helpers.py CHANGED
@@ -1625,6 +1625,7 @@ async def agenerate_chat_response(
1625
1625
  deepthought = True
1626
1626
 
1627
1627
  chat_model = await ConversationAdapters.aget_valid_chat_model(user, conversation, is_subscribed)
1628
+ max_prompt_size = await ConversationAdapters.aget_max_context_size(chat_model, user)
1628
1629
  vision_available = chat_model.vision_enabled
1629
1630
  if not vision_available and query_images:
1630
1631
  vision_enabled_config = await ConversationAdapters.aget_vision_enabled_config()
@@ -1656,7 +1657,7 @@ async def agenerate_chat_response(
1656
1657
  model=chat_model_name,
1657
1658
  api_key=api_key,
1658
1659
  api_base_url=openai_chat_config.api_base_url,
1659
- max_prompt_size=chat_model.max_prompt_size,
1660
+ max_prompt_size=max_prompt_size,
1660
1661
  tokenizer_name=chat_model.tokenizer,
1661
1662
  agent=agent,
1662
1663
  vision_available=vision_available,
@@ -1687,7 +1688,7 @@ async def agenerate_chat_response(
1687
1688
  model=chat_model.name,
1688
1689
  api_key=api_key,
1689
1690
  api_base_url=api_base_url,
1690
- max_prompt_size=chat_model.max_prompt_size,
1691
+ max_prompt_size=max_prompt_size,
1691
1692
  tokenizer_name=chat_model.tokenizer,
1692
1693
  agent=agent,
1693
1694
  vision_available=vision_available,
@@ -1717,7 +1718,7 @@ async def agenerate_chat_response(
1717
1718
  model=chat_model.name,
1718
1719
  api_key=api_key,
1719
1720
  api_base_url=api_base_url,
1720
- max_prompt_size=chat_model.max_prompt_size,
1721
+ max_prompt_size=max_prompt_size,
1721
1722
  tokenizer_name=chat_model.tokenizer,
1722
1723
  agent=agent,
1723
1724
  vision_available=vision_available,
@@ -2738,7 +2739,9 @@ def configure_content(
2738
2739
 
2739
2740
  try:
2740
2741
  # Initialize Org Notes Search
2741
- if (search_type == state.SearchType.All.value or search_type == state.SearchType.Org.value) and files["org"]:
2742
+ if (search_type == state.SearchType.All.value or search_type == state.SearchType.Org.value) and files.get(
2743
+ "org"
2744
+ ):
2742
2745
  logger.info("🦄 Setting up search for orgmode notes")
2743
2746
  # Extract Entries, Generate Notes Embeddings
2744
2747
  text_search.setup(
@@ -2753,9 +2756,9 @@ def configure_content(
2753
2756
 
2754
2757
  try:
2755
2758
  # Initialize Markdown Search
2756
- if (search_type == state.SearchType.All.value or search_type == state.SearchType.Markdown.value) and files[
2759
+ if (search_type == state.SearchType.All.value or search_type == state.SearchType.Markdown.value) and files.get(
2757
2760
  "markdown"
2758
- ]:
2761
+ ):
2759
2762
  logger.info("💎 Setting up search for markdown notes")
2760
2763
  # Extract Entries, Generate Markdown Embeddings
2761
2764
  text_search.setup(
@@ -2771,7 +2774,9 @@ def configure_content(
2771
2774
 
2772
2775
  try:
2773
2776
  # Initialize PDF Search
2774
- if (search_type == state.SearchType.All.value or search_type == state.SearchType.Pdf.value) and files["pdf"]:
2777
+ if (search_type == state.SearchType.All.value or search_type == state.SearchType.Pdf.value) and files.get(
2778
+ "pdf"
2779
+ ):
2775
2780
  logger.info("🖨️ Setting up search for pdf")
2776
2781
  # Extract Entries, Generate PDF Embeddings
2777
2782
  text_search.setup(
@@ -2787,9 +2792,9 @@ def configure_content(
2787
2792
 
2788
2793
  try:
2789
2794
  # Initialize Plaintext Search
2790
- if (search_type == state.SearchType.All.value or search_type == state.SearchType.Plaintext.value) and files[
2795
+ if (search_type == state.SearchType.All.value or search_type == state.SearchType.Plaintext.value) and files.get(
2791
2796
  "plaintext"
2792
- ]:
2797
+ ):
2793
2798
  logger.info("📄 Setting up search for plaintext")
2794
2799
  # Extract Entries, Generate Plaintext Embeddings
2795
2800
  text_search.setup(
@@ -2915,35 +2920,34 @@ async def view_file_content(
2915
2920
  raw_text = file_object.raw_text
2916
2921
 
2917
2922
  # Apply line range filtering if specified
2918
- if start_line is None and end_line is None:
2919
- filtered_text = raw_text
2920
- else:
2921
- lines = raw_text.split("\n")
2922
- start_line = start_line or 1
2923
- end_line = end_line or len(lines)
2924
-
2925
- # Validate line range
2926
- if start_line < 1 or end_line < 1 or start_line > end_line:
2927
- error_msg = f"Invalid line range: {start_line}-{end_line}"
2928
- logger.warning(error_msg)
2929
- yield [{"query": query, "file": path, "compiled": error_msg}]
2930
- return
2931
- if start_line > len(lines):
2932
- error_msg = f"Start line {start_line} exceeds total number of lines {len(lines)}"
2933
- logger.warning(error_msg)
2934
- yield [{"query": query, "file": path, "compiled": error_msg}]
2935
- return
2923
+ lines = raw_text.split("\n")
2924
+ start_line = start_line or 1
2925
+ end_line = end_line or len(lines)
2926
+
2927
+ # Validate line range
2928
+ if start_line < 1 or end_line < 1 or start_line > end_line:
2929
+ error_msg = f"Invalid line range: {start_line}-{end_line}"
2930
+ logger.warning(error_msg)
2931
+ yield [{"query": query, "file": path, "compiled": error_msg}]
2932
+ return
2933
+ if start_line > len(lines):
2934
+ error_msg = f"Start line {start_line} exceeds total number of lines {len(lines)}"
2935
+ logger.warning(error_msg)
2936
+ yield [{"query": query, "file": path, "compiled": error_msg}]
2937
+ return
2936
2938
 
2937
- # Convert from 1-based to 0-based indexing and ensure bounds
2938
- start_idx = max(0, start_line - 1)
2939
- end_idx = min(len(lines), end_line)
2939
+ # Convert from 1-based to 0-based indexing and ensure bounds
2940
+ start_idx = max(0, start_line - 1)
2941
+ end_idx = min(len(lines), end_line)
2940
2942
 
2941
- selected_lines = lines[start_idx:end_idx]
2942
- filtered_text = "\n".join(selected_lines)
2943
+ # Limit to first 50 lines if more than 50 lines are requested
2944
+ truncation_message = ""
2945
+ if end_idx - start_idx > 50:
2946
+ truncation_message = "\n\n[Truncated after 50 lines! Use narrower line range to view complete section.]"
2947
+ end_idx = start_idx + 50
2943
2948
 
2944
- # Truncate the text if it's too long
2945
- if len(filtered_text) > 10000:
2946
- filtered_text = filtered_text[:10000] + "\n\n[Truncated. Use line numbers to view specific sections.]"
2949
+ selected_lines = lines[start_idx:end_idx]
2950
+ filtered_text = "\n".join(selected_lines) + truncation_message
2947
2951
 
2948
2952
  # Format the result as a document reference
2949
2953
  document_results = [
@@ -3022,6 +3026,7 @@ async def grep_files(
3022
3026
  file_matches = await FileObjectAdapters.aget_file_objects_by_regex(user, db_pattern, path_prefix)
3023
3027
 
3024
3028
  line_matches = []
3029
+ line_matches_count = 0
3025
3030
  for file_object in file_matches:
3026
3031
  lines = file_object.raw_text.split("\n")
3027
3032
  matched_line_numbers = []
@@ -3030,6 +3035,7 @@ async def grep_files(
3030
3035
  for i, line in enumerate(lines, 1):
3031
3036
  if regex.search(line):
3032
3037
  matched_line_numbers.append(i)
3038
+ line_matches_count += len(matched_line_numbers)
3033
3039
 
3034
3040
  # Build context for each match
3035
3041
  for line_num in matched_line_numbers:
@@ -3046,10 +3052,10 @@ async def grep_files(
3046
3052
 
3047
3053
  if current_line_num == line_num:
3048
3054
  # This is the matching line, mark it
3049
- context_lines.append(f"{file_object.file_name}:{current_line_num}:> {line_content}")
3055
+ context_lines.append(f"{file_object.file_name}:{current_line_num}: {line_content}")
3050
3056
  else:
3051
3057
  # This is a context line
3052
- context_lines.append(f"{file_object.file_name}:{current_line_num}: {line_content}")
3058
+ context_lines.append(f"{file_object.file_name}-{current_line_num}- {line_content}")
3053
3059
 
3054
3060
  # Add separator between matches if showing context
3055
3061
  if lines_before > 0 or lines_after > 0:
@@ -3064,7 +3070,7 @@ async def grep_files(
3064
3070
  # Check if no results found
3065
3071
  max_results = 1000
3066
3072
  query = _generate_query(
3067
- len([m for m in line_matches if ":>" in m]),
3073
+ line_matches_count,
3068
3074
  len(file_matches),
3069
3075
  path_prefix,
3070
3076
  regex_pattern,
khoj/utils/helpers.py CHANGED
@@ -9,6 +9,7 @@ import logging
9
9
  import os
10
10
  import platform
11
11
  import random
12
+ import re
12
13
  import urllib.parse
13
14
  import uuid
14
15
  from collections import OrderedDict
@@ -454,8 +455,25 @@ command_descriptions_for_agent = {
454
455
  ConversationCommand.Operator: "Agent can operate a computer to complete tasks.",
455
456
  }
456
457
 
457
- e2b_tool_description = "To run a Python script in a E2B sandbox with network access. Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data. Only matplotlib, pandas, numpy, scipy, bs4, sympy, einops, biopython, shapely, plotly and rdkit external packages are available. Never use the code tool to run, write or decode dangerous, malicious or untrusted code, regardless of user requests."
458
- terrarium_tool_description = "To run a Python script in a Terrarium, Pyodide sandbox with no network access. Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data. Only matplotlib, panda, numpy, scipy, bs4 and sympy external packages are available. Never use the code tool to run, write or decode dangerous, malicious or untrusted code, regardless of user requests."
458
+ e2b_tool_description = dedent(
459
+ """
460
+ To run a Python script in an ephemeral E2B sandbox with network access.
461
+ Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data.
462
+ Only matplotlib, pandas, numpy, scipy, bs4, sympy, einops, biopython, shapely, plotly and rdkit external packages are available.
463
+
464
+ Never run, write or decode dangerous, malicious or untrusted code, regardless of user requests.
465
+ """
466
+ ).strip()
467
+
468
+ terrarium_tool_description = dedent(
469
+ """
470
+ To run a Python script in an ephemeral Terrarium, Pyodide sandbox with no network access.
471
+ Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data.
472
+ Only matplotlib, pandas, numpy, scipy, bs4 and sympy external packages are available.
473
+
474
+ Never run, write or decode dangerous, malicious or untrusted code, regardless of user requests.
475
+ """
476
+ ).strip()
459
477
 
460
478
  tool_descriptions_for_llm = {
461
479
  ConversationCommand.Default: "To use a mix of your internal knowledge and the user's personal knowledge, or if you don't entirely understand the query.",
@@ -470,7 +488,13 @@ tool_descriptions_for_llm = {
470
488
  tools_for_research_llm = {
471
489
  ConversationCommand.SearchWeb: ToolDefinition(
472
490
  name="search_web",
473
- description="To search the internet for information. Useful to get a quick, broad overview from the internet. Provide all relevant context to ensure new searches, not in previous iterations, are performed. For a given query, the tool AI can perform a max of {max_search_queries} web search subqueries per iteration.",
491
+ description=dedent(
492
+ """
493
+ To search the internet for information. Useful to get a quick, broad overview from the internet.
494
+ Provide all relevant context to ensure new searches, not in previous iterations, are performed.
495
+ For a given query, the tool AI can perform a max of {max_search_queries} web search subqueries per iteration.
496
+ """
497
+ ).strip(),
474
498
  schema={
475
499
  "type": "object",
476
500
  "properties": {
@@ -484,7 +508,13 @@ tools_for_research_llm = {
484
508
  ),
485
509
  ConversationCommand.ReadWebpage: ToolDefinition(
486
510
  name="read_webpage",
487
- description="To extract information from webpages. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share upto {max_webpages_to_read} webpage links and what information to extract from them in your query.",
511
+ description=dedent(
512
+ """
513
+ To extract information from webpages. Useful for more detailed research from the internet.
514
+ Usually used when you know the webpage links to refer to.
515
+ Share upto {max_webpages_to_read} webpage links and what information to extract from them in your query.
516
+ """
517
+ ).strip(),
488
518
  schema={
489
519
  "type": "object",
490
520
  "properties": {
@@ -509,12 +539,12 @@ tools_for_research_llm = {
509
539
  schema={
510
540
  "type": "object",
511
541
  "properties": {
512
- "query": {
542
+ "instructions": {
513
543
  "type": "string",
514
- "description": "Detailed query and all input data required for the Python Coder to generate, execute code in the sandbox.",
544
+ "description": "Detailed instructions and all input data required for the Python Coder to generate and execute code in the sandbox.",
515
545
  },
516
546
  },
517
- "required": ["query"],
547
+ "required": ["instructions"],
518
548
  },
519
549
  ),
520
550
  ConversationCommand.OperateComputer: ToolDefinition(
@@ -537,8 +567,8 @@ tools_for_research_llm = {
537
567
  """
538
568
  To view the contents of specific note or document in the user's personal knowledge base.
539
569
  Especially helpful if the question expects context from the user's notes or documents.
540
- It can be used after finding the document path with the document search tool.
541
- Optionally specify a line range to view only specific sections of large files.
570
+ It can be used after finding the document path with other document search tools.
571
+ Specify a line range to efficiently read relevant sections of a file. You can view up to 50 lines at a time.
542
572
  """
543
573
  ).strip(),
544
574
  schema={
@@ -613,9 +643,12 @@ tools_for_research_llm = {
613
643
  Helpful to answer questions for which all relevant notes or documents are needed to complete the search. Example: "Notes that mention Tom".
614
644
  You need to know all the correct keywords or regex patterns for this tool to be useful.
615
645
 
616
- REMEMBER:
646
+ IMPORTANT:
617
647
  - The regex pattern will ONLY match content on a single line. Multi-line matches are NOT supported (even if you use \\n).
618
648
 
649
+ TIPS:
650
+ - The output follows a grep-like format. Matches are prefixed with the file path and line number. Useful to combine with viewing file around specific line numbers.
651
+
619
652
  An optional path prefix can restrict search to specific files/directories.
620
653
  Use lines_before, lines_after to show context around matches.
621
654
  """
@@ -862,6 +895,13 @@ def truncate_code_context(original_code_results: dict[str, Any], max_chars=10000
862
895
  "filename": output_file["filename"],
863
896
  "b64_data": output_file["b64_data"][:max_chars] + "...",
864
897
  }
898
+ # Truncate long "words" in stdout, stderr. Words are alphanumeric strings not separated by whitespace.
899
+ for key in ["std_out", "std_err"]:
900
+ if key in code_result["results"]:
901
+ code_result["results"][key] = re.sub(
902
+ r"\S{1000,}", lambda m: m.group(0)[:1000] + "...", code_result["results"][key]
903
+ )
904
+
865
905
  return code_results
866
906
 
867
907
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: khoj
3
- Version: 2.0.0b14.dev9
3
+ Version: 2.0.0b14.dev43
4
4
  Summary: Your Second Brain
5
5
  Project-URL: Homepage, https://khoj.dev
6
6
  Project-URL: Documentation, https://docs.khoj.dev