khoj 1.41.1.dev25__py3-none-any.whl → 1.41.1.dev34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. khoj/interface/compiled/404/index.html +2 -2
  2. khoj/interface/compiled/_next/static/chunks/{2327-f03b2a77f67b8f8c.js → 2327-aa22697ed9c8d54a.js} +1 -1
  3. khoj/interface/compiled/_next/static/chunks/{8515-010dd769c584b672.js → 8515-f305779d95dd5780.js} +1 -1
  4. khoj/interface/compiled/_next/static/chunks/app/agents/layout-2e626327abfbe612.js +1 -0
  5. khoj/interface/compiled/_next/static/chunks/app/agents/{page-ceeb9a91edea74ce.js → page-c9ceb9b94e24b94a.js} +1 -1
  6. khoj/interface/compiled/_next/static/chunks/app/automations/{page-e3cb78747ab98cc7.js → page-3dc59a0df3827dc7.js} +1 -1
  7. khoj/interface/compiled/_next/static/chunks/app/chat/layout-d6acbba22ccac0ff.js +1 -0
  8. khoj/interface/compiled/_next/static/chunks/app/chat/{page-ee1cd53e1a794ca3.js → page-2dd1b200be6be11d.js} +1 -1
  9. khoj/interface/compiled/_next/static/chunks/app/{page-bde9dd79a8cc9b0e.js → page-7a7f336908a76b8b.js} +1 -1
  10. khoj/interface/compiled/_next/static/chunks/app/search/layout-94c76c3a41db42a2.js +1 -0
  11. khoj/interface/compiled/_next/static/chunks/app/search/{page-8973da2f4c076fe1.js → page-26d4492fb1200e0e.js} +1 -1
  12. khoj/interface/compiled/_next/static/chunks/app/settings/{page-375136dbb400525b.js → page-bf1a4e488b29fceb.js} +1 -1
  13. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-95998f0bdc22bb13.js +1 -0
  14. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-442bbe50b75beda4.js → page-585c39865f6f0c16.js} +1 -1
  15. khoj/interface/compiled/_next/static/chunks/{webpack-bb0a4ef4d6cb32e1.js → webpack-7576ea771214e321.js} +1 -1
  16. khoj/interface/compiled/_next/static/css/{303de34ca0f84c11.css → 95b31be535d74c4e.css} +1 -1
  17. khoj/interface/compiled/_next/static/css/bb7ea98028b368f3.css +1 -0
  18. khoj/interface/compiled/_next/static/css/ee66643a6a5bf71c.css +1 -0
  19. khoj/interface/compiled/agents/index.html +2 -2
  20. khoj/interface/compiled/agents/index.txt +2 -2
  21. khoj/interface/compiled/automations/index.html +2 -2
  22. khoj/interface/compiled/automations/index.txt +2 -2
  23. khoj/interface/compiled/chat/index.html +2 -2
  24. khoj/interface/compiled/chat/index.txt +2 -2
  25. khoj/interface/compiled/index.html +2 -2
  26. khoj/interface/compiled/index.txt +2 -2
  27. khoj/interface/compiled/search/index.html +2 -2
  28. khoj/interface/compiled/search/index.txt +2 -2
  29. khoj/interface/compiled/settings/index.html +2 -2
  30. khoj/interface/compiled/settings/index.txt +2 -2
  31. khoj/interface/compiled/share/chat/index.html +2 -2
  32. khoj/interface/compiled/share/chat/index.txt +2 -2
  33. khoj/processor/content/text_to_entries.py +1 -1
  34. khoj/processor/conversation/anthropic/anthropic_chat.py +1 -1
  35. khoj/processor/conversation/anthropic/utils.py +8 -3
  36. khoj/processor/conversation/google/gemini_chat.py +1 -1
  37. khoj/processor/conversation/google/utils.py +8 -3
  38. khoj/processor/conversation/offline/chat_model.py +1 -1
  39. khoj/processor/conversation/openai/gpt.py +1 -1
  40. khoj/processor/conversation/openai/utils.py +3 -1
  41. khoj/processor/conversation/prompts.py +23 -17
  42. khoj/processor/conversation/utils.py +152 -50
  43. khoj/processor/tools/online_search.py +4 -2
  44. khoj/routers/api_chat.py +2 -1
  45. khoj/routers/helpers.py +2 -2
  46. khoj/routers/research.py +41 -25
  47. khoj/utils/constants.py +1 -0
  48. khoj/utils/helpers.py +4 -4
  49. khoj/utils/state.py +2 -1
  50. {khoj-1.41.1.dev25.dist-info → khoj-1.41.1.dev34.dist-info}/METADATA +4 -5
  51. {khoj-1.41.1.dev25.dist-info → khoj-1.41.1.dev34.dist-info}/RECORD +56 -56
  52. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e3d72f0edda6aa0c.js +0 -1
  53. khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +0 -1
  54. khoj/interface/compiled/_next/static/chunks/app/search/layout-4505b79deb734a30.js +0 -1
  55. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-6fb51c5c80f8ec67.js +0 -1
  56. khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +0 -1
  57. khoj/interface/compiled/_next/static/css/f29752d6e1be7624.css +0 -1
  58. /khoj/interface/compiled/_next/static/{Hs-Zg1aPUjGuDO_G2SDUE → OSj4ew4_YXxd8J7Kq3Czx}/_buildManifest.js +0 -0
  59. /khoj/interface/compiled/_next/static/{Hs-Zg1aPUjGuDO_G2SDUE → OSj4ew4_YXxd8J7Kq3Czx}/_ssgManifest.js +0 -0
  60. {khoj-1.41.1.dev25.dist-info → khoj-1.41.1.dev34.dist-info}/WHEEL +0 -0
  61. {khoj-1.41.1.dev25.dist-info → khoj-1.41.1.dev34.dist-info}/entry_points.txt +0 -0
  62. {khoj-1.41.1.dev25.dist-info → khoj-1.41.1.dev34.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
1
- from langchain.prompts import PromptTemplate
1
+ from langchain_core.prompts import PromptTemplate
2
2
 
3
3
  ## Personality
4
4
  ## --
@@ -666,21 +666,25 @@ As a professional analyst, your job is to extract all pertinent information from
666
666
  You will be provided raw text directly from within the document.
667
667
  Adhere to these guidelines while extracting information from the provided documents:
668
668
 
669
- 1. Extract all relevant text and links from the document that can assist with further research or answer the user's query.
669
+ 1. Extract all relevant text and links from the document that can assist with further research or answer the target query.
670
670
  2. Craft a comprehensive but compact report with all the necessary data from the document to generate an informed response.
671
671
  3. Rely strictly on the provided text to generate your summary, without including external information.
672
672
  4. Provide specific, important snippets from the document in your report to establish trust in your summary.
673
+ 5. Verbatim quote all necessary text, code or data from the provided document to answer the target query.
673
674
  """.strip()
674
675
 
675
676
  extract_relevant_information = PromptTemplate.from_template(
676
677
  """
677
678
  {personality_context}
678
- Target Query: {query}
679
+ <target_query>
680
+ {query}
681
+ </target_query>
679
682
 
680
- Document:
683
+ <document>
681
684
  {corpus}
685
+ </document>
682
686
 
683
- Collate only relevant information from the document to answer the target query.
687
+ Collate all relevant information from the document to answer the target query.
684
688
  """.strip()
685
689
  )
686
690
 
@@ -758,29 +762,32 @@ Assuming you can search the user's notes and the internet.
758
762
  - User Name: {username}
759
763
 
760
764
  # Available Tool AIs
761
- Which of the tool AIs listed below would you use to answer the user's question? You **only** have access to the following tool AIs:
765
+ You decide which of the tool AIs listed below would you use to answer the user's question. You **only** have access to the following tool AIs:
762
766
 
763
767
  {tools}
764
768
 
765
- # Previous Iterations
766
- {previous_iterations}
767
-
768
- # Chat History:
769
- {chat_history}
770
-
771
- Return the next tool AI to use and the query to ask it. Your response should always be a valid JSON object. Do not say anything else.
769
+ Your response should always be a valid JSON object. Do not say anything else.
772
770
  Response format:
773
771
  {{"scratchpad": "<your_scratchpad_to_reason_about_which_tool_to_use>", "tool": "<name_of_tool_ai>", "query": "<your_detailed_query_for_the_tool_ai>"}}
774
772
  """.strip()
775
773
  )
776
774
 
775
+ plan_function_execution_next_tool = PromptTemplate.from_template(
776
+ """
777
+ Given the results of your previous iterations, which tool AI will you use next to answer the target query?
778
+
779
+ # Target Query:
780
+ {query}
781
+ """.strip()
782
+ )
783
+
777
784
  previous_iteration = PromptTemplate.from_template(
778
785
  """
779
- ## Iteration {index}:
786
+ # Iteration {index}:
780
787
  - tool: {tool}
781
788
  - query: {query}
782
789
  - result: {result}
783
- """
790
+ """.strip()
784
791
  )
785
792
 
786
793
  pick_relevant_tools = PromptTemplate.from_template(
@@ -858,8 +865,7 @@ infer_webpages_to_read = PromptTemplate.from_template(
858
865
  You are Khoj, an advanced web page reading assistant. You are to construct **up to {max_webpages}, valid** webpage urls to read before answering the user's question.
859
866
  - You will receive the conversation history as context.
860
867
  - Add as much context from the previous questions and answers as required to construct the webpage urls.
861
- - Use multiple web page urls if required to retrieve the relevant information.
862
- - You have access to the the whole internet to retrieve information.
868
+ - You have access to the whole internet to retrieve information.
863
869
  {personality_context}
864
870
  Which webpages will you need to read to answer the user's question?
865
871
  Provide web page links as a list of strings in a JSON object.
@@ -4,14 +4,12 @@ import logging
4
4
  import math
5
5
  import mimetypes
6
6
  import os
7
- import queue
8
7
  import re
9
8
  import uuid
10
9
  from dataclasses import dataclass
11
10
  from datetime import datetime
12
11
  from enum import Enum
13
12
  from io import BytesIO
14
- from time import perf_counter
15
13
  from typing import Any, Callable, Dict, List, Optional
16
14
 
17
15
  import PIL.Image
@@ -19,9 +17,10 @@ import pyjson5
19
17
  import requests
20
18
  import tiktoken
21
19
  import yaml
22
- from langchain.schema import ChatMessage
20
+ from langchain_core.messages.chat import ChatMessage
21
+ from llama_cpp import LlamaTokenizer
23
22
  from llama_cpp.llama import Llama
24
- from transformers import AutoTokenizer
23
+ from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
25
24
 
26
25
  from khoj.database.adapters import ConversationAdapters
27
26
  from khoj.database.models import ChatModel, ClientApplication, KhojUser
@@ -52,7 +51,7 @@ except ImportError:
52
51
  model_to_prompt_size = {
53
52
  # OpenAI Models
54
53
  "gpt-4o": 60000,
55
- "gpt-4o-mini": 120000,
54
+ "gpt-4o-mini": 60000,
56
55
  "gpt-4.1": 60000,
57
56
  "gpt-4.1-mini": 120000,
58
57
  "gpt-4.1-nano": 120000,
@@ -105,9 +104,9 @@ class InformationCollectionIteration:
105
104
 
106
105
 
107
106
  def construct_iteration_history(
108
- previous_iterations: List[InformationCollectionIteration], previous_iteration_prompt: str
109
- ) -> str:
110
- previous_iterations_history = ""
107
+ query: str, previous_iterations: List[InformationCollectionIteration], previous_iteration_prompt: str
108
+ ) -> list[dict]:
109
+ previous_iterations_history = []
111
110
  for idx, iteration in enumerate(previous_iterations):
112
111
  iteration_data = previous_iteration_prompt.format(
113
112
  tool=iteration.tool,
@@ -116,8 +115,23 @@ def construct_iteration_history(
116
115
  index=idx + 1,
117
116
  )
118
117
 
119
- previous_iterations_history += iteration_data
120
- return previous_iterations_history
118
+ previous_iterations_history.append(iteration_data)
119
+
120
+ return (
121
+ [
122
+ {
123
+ "by": "you",
124
+ "message": query,
125
+ },
126
+ {
127
+ "by": "khoj",
128
+ "intent": {"type": "remember", "query": query},
129
+ "message": previous_iterations_history,
130
+ },
131
+ ]
132
+ if previous_iterations_history
133
+ else []
134
+ )
121
135
 
122
136
 
123
137
  def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="AI") -> str:
@@ -152,19 +166,35 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A
152
166
  def construct_tool_chat_history(
153
167
  previous_iterations: List[InformationCollectionIteration], tool: ConversationCommand = None
154
168
  ) -> Dict[str, list]:
169
+ """
170
+ Construct chat history from previous iterations for a specific tool
171
+
172
+ If a tool is provided, only the inferred queries for that tool is added.
173
+ If no tool is provided inferred query for all tools used are added.
174
+ """
155
175
  chat_history: list = []
156
- inferred_query_extractor: Callable[[InformationCollectionIteration], List[str]] = lambda x: []
157
- if tool == ConversationCommand.Notes:
158
- inferred_query_extractor = (
176
+ base_extractor: Callable[[InformationCollectionIteration], List[str]] = lambda x: []
177
+ extract_inferred_query_map: Dict[ConversationCommand, Callable[[InformationCollectionIteration], List[str]]] = {
178
+ ConversationCommand.Notes: (
159
179
  lambda iteration: [c["query"] for c in iteration.context] if iteration.context else []
160
- )
161
- elif tool == ConversationCommand.Online:
162
- inferred_query_extractor = (
180
+ ),
181
+ ConversationCommand.Online: (
163
182
  lambda iteration: list(iteration.onlineContext.keys()) if iteration.onlineContext else []
164
- )
165
- elif tool == ConversationCommand.Code:
166
- inferred_query_extractor = lambda iteration: list(iteration.codeContext.keys()) if iteration.codeContext else []
183
+ ),
184
+ ConversationCommand.Webpage: (
185
+ lambda iteration: list(iteration.onlineContext.keys()) if iteration.onlineContext else []
186
+ ),
187
+ ConversationCommand.Code: (
188
+ lambda iteration: list(iteration.codeContext.keys()) if iteration.codeContext else []
189
+ ),
190
+ }
167
191
  for iteration in previous_iterations:
192
+ # If a tool is provided use the inferred query extractor for that tool if available
193
+ # If no tool is provided, use inferred query extractor for the tool used in the iteration
194
+ # Fallback to base extractor if the tool does not have an inferred query extractor
195
+ inferred_query_extractor = extract_inferred_query_map.get(
196
+ tool or ConversationCommand(iteration.tool), base_extractor
197
+ )
168
198
  chat_history += [
169
199
  {
170
200
  "by": "you",
@@ -300,7 +330,11 @@ Khoj: "{chat_response}"
300
330
 
301
331
 
302
332
  def construct_structured_message(
303
- message: str, images: list[str], model_type: str, vision_enabled: bool, attached_file_context: str = None
333
+ message: list[str] | str,
334
+ images: list[str],
335
+ model_type: str,
336
+ vision_enabled: bool,
337
+ attached_file_context: str = None,
304
338
  ):
305
339
  """
306
340
  Format messages into appropriate multimedia format for supported chat model types
@@ -310,10 +344,11 @@ def construct_structured_message(
310
344
  ChatModel.ModelType.GOOGLE,
311
345
  ChatModel.ModelType.ANTHROPIC,
312
346
  ]:
313
- if not attached_file_context and not (vision_enabled and images):
314
- return message
347
+ message = [message] if isinstance(message, str) else message
315
348
 
316
- constructed_messages: List[Any] = [{"type": "text", "text": message}]
349
+ constructed_messages: List[dict[str, Any]] = [
350
+ {"type": "text", "text": message_part} for message_part in message
351
+ ]
317
352
 
318
353
  if not is_none_or_empty(attached_file_context):
319
354
  constructed_messages.append({"type": "text", "text": attached_file_context})
@@ -346,7 +381,7 @@ def gather_raw_query_files(
346
381
 
347
382
  def generate_chatml_messages_with_context(
348
383
  user_message,
349
- system_message=None,
384
+ system_message: str = None,
350
385
  conversation_log={},
351
386
  model_name="gpt-4o-mini",
352
387
  loaded_model: Optional[Llama] = None,
@@ -409,6 +444,9 @@ def generate_chatml_messages_with_context(
409
444
  if not is_none_or_empty(chat.get("onlineContext")):
410
445
  message_context += f"{prompts.online_search_conversation.format(online_results=chat.get('onlineContext'))}"
411
446
 
447
+ if not is_none_or_empty(chat.get("codeContext")):
448
+ message_context += f"{prompts.code_executed_context.format(online_results=chat.get('codeContext'))}"
449
+
412
450
  if not is_none_or_empty(message_context):
413
451
  reconstructed_context_message = ChatMessage(content=message_context, role="user")
414
452
  chatml_messages.insert(0, reconstructed_context_message)
@@ -441,7 +479,7 @@ def generate_chatml_messages_with_context(
441
479
  if len(chatml_messages) >= 3 * lookback_turns:
442
480
  break
443
481
 
444
- messages = []
482
+ messages: list[ChatMessage] = []
445
483
 
446
484
  if not is_none_or_empty(generated_asset_results):
447
485
  messages.append(
@@ -478,6 +516,11 @@ def generate_chatml_messages_with_context(
478
516
  if not is_none_or_empty(system_message):
479
517
  messages.append(ChatMessage(content=system_message, role="system"))
480
518
 
519
+ # Normalize message content to list of chatml dictionaries
520
+ for message in messages:
521
+ if isinstance(message.content, str):
522
+ message.content = [{"type": "text", "text": message.content}]
523
+
481
524
  # Truncate oldest messages from conversation history until under max supported prompt size by model
482
525
  messages = truncate_messages(messages, max_prompt_size, model_name, loaded_model, tokenizer_name)
483
526
 
@@ -485,14 +528,11 @@ def generate_chatml_messages_with_context(
485
528
  return messages[::-1]
486
529
 
487
530
 
488
- def truncate_messages(
489
- messages: list[ChatMessage],
490
- max_prompt_size: int,
531
+ def get_encoder(
491
532
  model_name: str,
492
533
  loaded_model: Optional[Llama] = None,
493
534
  tokenizer_name=None,
494
- ) -> list[ChatMessage]:
495
- """Truncate messages to fit within max prompt size supported by model"""
535
+ ) -> tiktoken.Encoding | PreTrainedTokenizer | PreTrainedTokenizerFast | LlamaTokenizer:
496
536
  default_tokenizer = "gpt-4o"
497
537
 
498
538
  try:
@@ -515,6 +555,48 @@ def truncate_messages(
515
555
  logger.debug(
516
556
  f"Fallback to default chat model tokenizer: {default_tokenizer}.\nConfigure tokenizer for model: {model_name} in Khoj settings to improve context stuffing."
517
557
  )
558
+ return encoder
559
+
560
+
561
+ def count_tokens(
562
+ message_content: str | list[str | dict],
563
+ encoder: PreTrainedTokenizer | PreTrainedTokenizerFast | LlamaTokenizer | tiktoken.Encoding,
564
+ ) -> int:
565
+ """
566
+ Count the total number of tokens in a list of messages.
567
+
568
+ Assumes each images takes 500 tokens for approximation.
569
+ """
570
+ if isinstance(message_content, list):
571
+ image_count = 0
572
+ message_content_parts: list[str] = []
573
+ # Collate message content into single string to ease token counting
574
+ for part in message_content:
575
+ if isinstance(part, dict) and part.get("type") == "text":
576
+ message_content_parts.append(part["text"])
577
+ elif isinstance(part, dict) and part.get("type") == "image_url":
578
+ image_count += 1
579
+ elif isinstance(part, str):
580
+ message_content_parts.append(part)
581
+ else:
582
+ logger.warning(f"Unknown message type: {part}. Skipping.")
583
+ message_content = "\n".join(message_content_parts).rstrip()
584
+ return len(encoder.encode(message_content)) + image_count * 500
585
+ elif isinstance(message_content, str):
586
+ return len(encoder.encode(message_content))
587
+ else:
588
+ return len(encoder.encode(json.dumps(message_content)))
589
+
590
+
591
+ def truncate_messages(
592
+ messages: list[ChatMessage],
593
+ max_prompt_size: int,
594
+ model_name: str,
595
+ loaded_model: Optional[Llama] = None,
596
+ tokenizer_name=None,
597
+ ) -> list[ChatMessage]:
598
+ """Truncate messages to fit within max prompt size supported by model"""
599
+ encoder = get_encoder(model_name, loaded_model, tokenizer_name)
518
600
 
519
601
  # Extract system message from messages
520
602
  system_message = None
@@ -523,35 +605,55 @@ def truncate_messages(
523
605
  system_message = messages.pop(idx)
524
606
  break
525
607
 
526
- # TODO: Handle truncation of multi-part message.content, i.e when message.content is a list[dict] rather than a string
527
- system_message_tokens = (
528
- len(encoder.encode(system_message.content)) if system_message and type(system_message.content) == str else 0
529
- )
530
-
531
- tokens = sum([len(encoder.encode(message.content)) for message in messages if type(message.content) == str])
532
-
533
608
  # Drop older messages until under max supported prompt size by model
534
609
  # Reserves 4 tokens to demarcate each message (e.g <|im_start|>user, <|im_end|>, <|endoftext|> etc.)
535
- while (tokens + system_message_tokens + 4 * len(messages)) > max_prompt_size and len(messages) > 1:
536
- messages.pop()
537
- tokens = sum([len(encoder.encode(message.content)) for message in messages if type(message.content) == str])
610
+ system_message_tokens = count_tokens(system_message.content, encoder) if system_message else 0
611
+ tokens = sum([count_tokens(message.content, encoder) for message in messages])
612
+ total_tokens = tokens + system_message_tokens + 4 * len(messages)
613
+
614
+ while total_tokens > max_prompt_size and (len(messages) > 1 or len(messages[0].content) > 1):
615
+ if len(messages[-1].content) > 1:
616
+ # The oldest content part is earlier in content list. So pop from the front.
617
+ messages[-1].content.pop(0)
618
+ else:
619
+ # The oldest message is the last one. So pop from the back.
620
+ messages.pop()
621
+ tokens = sum([count_tokens(message.content, encoder) for message in messages])
622
+ total_tokens = tokens + system_message_tokens + 4 * len(messages)
538
623
 
539
624
  # Truncate current message if still over max supported prompt size by model
540
- if (tokens + system_message_tokens) > max_prompt_size:
541
- current_message = "\n".join(messages[0].content.split("\n")[:-1]) if type(messages[0].content) == str else ""
542
- original_question = "\n".join(messages[0].content.split("\n")[-1:]) if type(messages[0].content) == str else ""
543
- original_question = f"\n{original_question}"
544
- original_question_tokens = len(encoder.encode(original_question))
625
+ total_tokens = tokens + system_message_tokens + 4 * len(messages)
626
+ if total_tokens > max_prompt_size:
627
+ # At this point, a single message with a single content part of type dict should remain
628
+ assert (
629
+ len(messages) == 1 and len(messages[0].content) == 1 and isinstance(messages[0].content[0], dict)
630
+ ), "Expected a single message with a single content part remaining at this point in truncation"
631
+
632
+ # Collate message content into single string to ease truncation
633
+ part = messages[0].content[0]
634
+ message_content: str = part["text"] if part["type"] == "text" else json.dumps(part)
635
+ message_role = messages[0].role
636
+
637
+ remaining_context = "\n".join(message_content.split("\n")[:-1])
638
+ original_question = "\n" + "\n".join(message_content.split("\n")[-1:])
639
+
640
+ original_question_tokens = count_tokens(original_question, encoder)
545
641
  remaining_tokens = max_prompt_size - system_message_tokens
546
642
  if remaining_tokens > original_question_tokens:
547
643
  remaining_tokens -= original_question_tokens
548
- truncated_message = encoder.decode(encoder.encode(current_message)[:remaining_tokens]).strip()
549
- messages = [ChatMessage(content=truncated_message + original_question, role=messages[0].role)]
644
+ truncated_context = encoder.decode(encoder.encode(remaining_context)[:remaining_tokens]).strip()
645
+ truncated_content = truncated_context + original_question
550
646
  else:
551
- truncated_message = encoder.decode(encoder.encode(original_question)[:remaining_tokens]).strip()
552
- messages = [ChatMessage(content=truncated_message, role=messages[0].role)]
647
+ truncated_content = encoder.decode(encoder.encode(original_question)[:remaining_tokens]).strip()
648
+ messages = [ChatMessage(content=[{"type": "text", "text": truncated_content}], role=message_role)]
649
+
650
+ truncated_snippet = (
651
+ f"{truncated_content[:1000]}\n...\n{truncated_content[-1000:]}"
652
+ if len(truncated_content) > 2000
653
+ else truncated_content
654
+ )
553
655
  logger.debug(
554
- f"Truncate current message to fit within max prompt size of {max_prompt_size} supported by {model_name} model:\n {truncated_message[:1000]}..."
656
+ f"Truncate current message to fit within max prompt size of {max_prompt_size} supported by {model_name} model:\n {truncated_snippet}"
555
657
  )
556
658
 
557
659
  if system_message:
@@ -64,11 +64,12 @@ async def search_online(
64
64
  user: KhojUser,
65
65
  send_status_func: Optional[Callable] = None,
66
66
  custom_filters: List[str] = [],
67
+ max_online_searches: int = 3,
67
68
  max_webpages_to_read: int = 1,
68
69
  query_images: List[str] = None,
70
+ query_files: str = None,
69
71
  previous_subqueries: Set = set(),
70
72
  agent: Agent = None,
71
- query_files: str = None,
72
73
  tracer: dict = {},
73
74
  ):
74
75
  query += " ".join(custom_filters)
@@ -84,9 +85,10 @@ async def search_online(
84
85
  location,
85
86
  user,
86
87
  query_images=query_images,
88
+ query_files=query_files,
89
+ max_queries=max_online_searches,
87
90
  agent=agent,
88
91
  tracer=tracer,
89
- query_files=query_files,
90
92
  )
91
93
  subqueries = list(new_subqueries - previous_subqueries)
92
94
  response_dict: Dict[str, Dict[str, List[Dict] | Dict]] = {}
khoj/routers/api_chat.py CHANGED
@@ -1129,9 +1129,10 @@ async def chat(
1129
1129
  user,
1130
1130
  partial(send_event, ChatEvent.STATUS),
1131
1131
  custom_filters,
1132
+ max_online_searches=3,
1132
1133
  query_images=uploaded_images,
1133
- agent=agent,
1134
1134
  query_files=attached_file_context,
1135
+ agent=agent,
1135
1136
  tracer=tracer,
1136
1137
  ):
1137
1138
  if isinstance(result, dict) and ChatEvent.STATUS in result:
khoj/routers/helpers.py CHANGED
@@ -523,8 +523,9 @@ async def generate_online_subqueries(
523
523
  location_data: LocationData,
524
524
  user: KhojUser,
525
525
  query_images: List[str] = None,
526
- agent: Agent = None,
527
526
  query_files: str = None,
527
+ max_queries: int = 3,
528
+ agent: Agent = None,
528
529
  tracer: dict = {},
529
530
  ) -> Set[str]:
530
531
  """
@@ -534,7 +535,6 @@ async def generate_online_subqueries(
534
535
  username = prompts.user_name.format(name=user.get_full_name()) if user.get_full_name() else ""
535
536
  chat_history = construct_chat_history(conversation_history)
536
537
 
537
- max_queries = 3
538
538
  utc_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
539
539
  personality_context = (
540
540
  prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
khoj/routers/research.py CHANGED
@@ -6,7 +6,6 @@ from enum import Enum
6
6
  from typing import Callable, Dict, List, Optional, Type
7
7
 
8
8
  import yaml
9
- from fastapi import Request
10
9
  from pydantic import BaseModel, Field
11
10
 
12
11
  from khoj.database.adapters import AgentAdapters, EntryAdapters
@@ -14,7 +13,6 @@ from khoj.database.models import Agent, KhojUser
14
13
  from khoj.processor.conversation import prompts
15
14
  from khoj.processor.conversation.utils import (
16
15
  InformationCollectionIteration,
17
- construct_chat_history,
18
16
  construct_iteration_history,
19
17
  construct_tool_chat_history,
20
18
  load_complex_json,
@@ -29,9 +27,9 @@ from khoj.routers.helpers import (
29
27
  )
30
28
  from khoj.utils.helpers import (
31
29
  ConversationCommand,
32
- function_calling_description_for_llm,
33
30
  is_none_or_empty,
34
31
  timer,
32
+ tool_description_for_research_llm,
35
33
  truncate_code_context,
36
34
  )
37
35
  from khoj.utils.rawconfig import LocationData
@@ -79,15 +77,18 @@ async def apick_next_tool(
79
77
  query: str,
80
78
  conversation_history: dict,
81
79
  user: KhojUser = None,
82
- query_images: List[str] = [],
83
80
  location: LocationData = None,
84
81
  user_name: str = None,
85
82
  agent: Agent = None,
86
83
  previous_iterations: List[InformationCollectionIteration] = [],
87
84
  max_iterations: int = 5,
85
+ query_images: List[str] = [],
86
+ query_files: str = None,
87
+ max_document_searches: int = 7,
88
+ max_online_searches: int = 3,
89
+ max_webpages_to_read: int = 1,
88
90
  send_status_func: Optional[Callable] = None,
89
91
  tracer: dict = {},
90
- query_files: str = None,
91
92
  ):
92
93
  """Given a query, determine which of the available tools the agent should use in order to answer appropriately."""
93
94
 
@@ -96,10 +97,16 @@ async def apick_next_tool(
96
97
  tool_options_str = ""
97
98
  agent_tools = agent.input_tools if agent else []
98
99
  user_has_entries = await EntryAdapters.auser_has_entries(user)
99
- for tool, description in function_calling_description_for_llm.items():
100
+ for tool, description in tool_description_for_research_llm.items():
100
101
  # Skip showing Notes tool as an option if user has no entries
101
- if tool == ConversationCommand.Notes and not user_has_entries:
102
- continue
102
+ if tool == ConversationCommand.Notes:
103
+ if not user_has_entries:
104
+ continue
105
+ description = description.format(max_search_queries=max_document_searches)
106
+ if tool == ConversationCommand.Webpage:
107
+ description = description.format(max_webpages_to_read=max_webpages_to_read)
108
+ if tool == ConversationCommand.Online:
109
+ description = description.format(max_search_queries=max_online_searches)
103
110
  # Add tool if agent does not have any tools defined or the tool is supported by the agent.
104
111
  if len(agent_tools) == 0 or tool.value in agent_tools:
105
112
  tool_options[tool.name] = tool.value
@@ -108,13 +115,6 @@ async def apick_next_tool(
108
115
  # Create planning reponse model with dynamically populated tool enum class
109
116
  planning_response_model = PlanningResponse.create_model_with_enum(tool_options)
110
117
 
111
- # Construct chat history with user and iteration history with researcher agent for context
112
- chat_history = construct_chat_history(conversation_history, agent_name=agent.name if agent else "Khoj")
113
- previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
114
-
115
- if query_images:
116
- query = f"[placeholder for user attached images]\n{query}"
117
-
118
118
  today = datetime.today()
119
119
  location_data = f"{location}" if location else "Unknown"
120
120
  agent_chat_model = AgentAdapters.get_agent_chat_model(agent, user) if agent else None
@@ -124,21 +124,30 @@ async def apick_next_tool(
124
124
 
125
125
  function_planning_prompt = prompts.plan_function_execution.format(
126
126
  tools=tool_options_str,
127
- chat_history=chat_history,
128
127
  personality_context=personality_context,
129
128
  current_date=today.strftime("%Y-%m-%d"),
130
129
  day_of_week=today.strftime("%A"),
131
130
  username=user_name or "Unknown",
132
131
  location=location_data,
133
- previous_iterations=previous_iterations_history,
134
132
  max_iterations=max_iterations,
135
133
  )
136
134
 
135
+ if query_images:
136
+ query = f"[placeholder for user attached images]\n{query}"
137
+
138
+ # Construct chat history with user and iteration history with researcher agent for context
139
+ previous_iterations_history = construct_iteration_history(query, previous_iterations, prompts.previous_iteration)
140
+ iteration_chat_log = {"chat": conversation_history.get("chat", []) + previous_iterations_history}
141
+
142
+ # Plan function execution for the next tool
143
+ query = prompts.plan_function_execution_next_tool.format(query=query) if previous_iterations_history else query
144
+
137
145
  try:
138
146
  with timer("Chat actor: Infer information sources to refer", logger):
139
147
  response = await send_message_to_model_wrapper(
140
148
  query=query,
141
- context=function_planning_prompt,
149
+ system_message=function_planning_prompt,
150
+ conversation_log=iteration_chat_log,
142
151
  response_type="json_object",
143
152
  response_schema=planning_response_model,
144
153
  deepthought=True,
@@ -208,6 +217,9 @@ async def execute_information_collection(
208
217
  query_files: str = None,
209
218
  cancellation_event: Optional[asyncio.Event] = None,
210
219
  ):
220
+ max_document_searches = 7
221
+ max_online_searches = 3
222
+ max_webpages_to_read = 1
211
223
  current_iteration = 0
212
224
  MAX_ITERATIONS = int(os.getenv("KHOJ_RESEARCH_ITERATIONS", 5))
213
225
  previous_iterations: List[InformationCollectionIteration] = []
@@ -227,15 +239,18 @@ async def execute_information_collection(
227
239
  query,
228
240
  conversation_history,
229
241
  user,
230
- query_images,
231
242
  location,
232
243
  user_name,
233
244
  agent,
234
245
  previous_iterations,
235
246
  MAX_ITERATIONS,
236
- send_status_func,
237
- tracer=tracer,
247
+ query_images=query_images,
238
248
  query_files=query_files,
249
+ max_document_searches=max_document_searches,
250
+ max_online_searches=max_online_searches,
251
+ max_webpages_to_read=max_webpages_to_read,
252
+ send_status_func=send_status_func,
253
+ tracer=tracer,
239
254
  ):
240
255
  if isinstance(result, dict) and ChatEvent.STATUS in result:
241
256
  yield result[ChatEvent.STATUS]
@@ -260,7 +275,7 @@ async def execute_information_collection(
260
275
  user,
261
276
  construct_tool_chat_history(previous_iterations, ConversationCommand.Notes),
262
277
  this_iteration.query,
263
- 7,
278
+ max_document_searches,
264
279
  None,
265
280
  conversation_id,
266
281
  [ConversationCommand.Default],
@@ -307,6 +322,7 @@ async def execute_information_collection(
307
322
  user,
308
323
  send_status_func,
309
324
  [],
325
+ max_online_searches=max_online_searches,
310
326
  max_webpages_to_read=0,
311
327
  query_images=query_images,
312
328
  previous_subqueries=previous_subqueries,
@@ -332,7 +348,7 @@ async def execute_information_collection(
332
348
  location,
333
349
  user,
334
350
  send_status_func,
335
- max_webpages_to_read=1,
351
+ max_webpages_to_read=max_webpages_to_read,
336
352
  query_images=query_images,
337
353
  agent=agent,
338
354
  tracer=tracer,
@@ -361,7 +377,7 @@ async def execute_information_collection(
361
377
  try:
362
378
  async for result in run_code(
363
379
  this_iteration.query,
364
- construct_tool_chat_history(previous_iterations, ConversationCommand.Webpage),
380
+ construct_tool_chat_history(previous_iterations, ConversationCommand.Code),
365
381
  "",
366
382
  location,
367
383
  user,
@@ -388,7 +404,7 @@ async def execute_information_collection(
388
404
  this_iteration.query,
389
405
  user,
390
406
  file_filters,
391
- construct_tool_chat_history(previous_iterations),
407
+ construct_tool_chat_history(previous_iterations, ConversationCommand.Summarize),
392
408
  query_images=query_images,
393
409
  agent=agent,
394
410
  send_status_func=send_status_func,
khoj/utils/constants.py CHANGED
@@ -52,6 +52,7 @@ model_to_cost: Dict[str, Dict[str, float]] = {
52
52
  "gemini-1.5-pro": {"input": 1.25, "output": 5.00},
53
53
  "gemini-1.5-pro-002": {"input": 1.25, "output": 5.00},
54
54
  "gemini-2.0-flash": {"input": 0.10, "output": 0.40},
55
+ "gemini-2.0-flash-lite": {"input": 0.0075, "output": 0.30},
55
56
  "gemini-2.5-flash-preview-04-17": {"input": 0.15, "output": 0.60, "thought": 3.50},
56
57
  "gemini-2.5-pro-preview-03-25": {"input": 1.25, "output": 10.0},
57
58
  # Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api