khoj 1.41.1.dev25__py3-none-any.whl → 1.41.1.dev34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/{2327-f03b2a77f67b8f8c.js → 2327-aa22697ed9c8d54a.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{8515-010dd769c584b672.js → 8515-f305779d95dd5780.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-2e626327abfbe612.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-ceeb9a91edea74ce.js → page-c9ceb9b94e24b94a.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-e3cb78747ab98cc7.js → page-3dc59a0df3827dc7.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-d6acbba22ccac0ff.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/{page-ee1cd53e1a794ca3.js → page-2dd1b200be6be11d.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-bde9dd79a8cc9b0e.js → page-7a7f336908a76b8b.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-94c76c3a41db42a2.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/{page-8973da2f4c076fe1.js → page-26d4492fb1200e0e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-375136dbb400525b.js → page-bf1a4e488b29fceb.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-95998f0bdc22bb13.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-442bbe50b75beda4.js → page-585c39865f6f0c16.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-bb0a4ef4d6cb32e1.js → webpack-7576ea771214e321.js} +1 -1
- khoj/interface/compiled/_next/static/css/{303de34ca0f84c11.css → 95b31be535d74c4e.css} +1 -1
- khoj/interface/compiled/_next/static/css/bb7ea98028b368f3.css +1 -0
- khoj/interface/compiled/_next/static/css/ee66643a6a5bf71c.css +1 -0
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/content/text_to_entries.py +1 -1
- khoj/processor/conversation/anthropic/anthropic_chat.py +1 -1
- khoj/processor/conversation/anthropic/utils.py +8 -3
- khoj/processor/conversation/google/gemini_chat.py +1 -1
- khoj/processor/conversation/google/utils.py +8 -3
- khoj/processor/conversation/offline/chat_model.py +1 -1
- khoj/processor/conversation/openai/gpt.py +1 -1
- khoj/processor/conversation/openai/utils.py +3 -1
- khoj/processor/conversation/prompts.py +23 -17
- khoj/processor/conversation/utils.py +152 -50
- khoj/processor/tools/online_search.py +4 -2
- khoj/routers/api_chat.py +2 -1
- khoj/routers/helpers.py +2 -2
- khoj/routers/research.py +41 -25
- khoj/utils/constants.py +1 -0
- khoj/utils/helpers.py +4 -4
- khoj/utils/state.py +2 -1
- {khoj-1.41.1.dev25.dist-info → khoj-1.41.1.dev34.dist-info}/METADATA +4 -5
- {khoj-1.41.1.dev25.dist-info → khoj-1.41.1.dev34.dist-info}/RECORD +56 -56
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e3d72f0edda6aa0c.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-4505b79deb734a30.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-6fb51c5c80f8ec67.js +0 -1
- khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +0 -1
- khoj/interface/compiled/_next/static/css/f29752d6e1be7624.css +0 -1
- /khoj/interface/compiled/_next/static/{Hs-Zg1aPUjGuDO_G2SDUE → OSj4ew4_YXxd8J7Kq3Czx}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{Hs-Zg1aPUjGuDO_G2SDUE → OSj4ew4_YXxd8J7Kq3Czx}/_ssgManifest.js +0 -0
- {khoj-1.41.1.dev25.dist-info → khoj-1.41.1.dev34.dist-info}/WHEEL +0 -0
- {khoj-1.41.1.dev25.dist-info → khoj-1.41.1.dev34.dist-info}/entry_points.txt +0 -0
- {khoj-1.41.1.dev25.dist-info → khoj-1.41.1.dev34.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
from
|
1
|
+
from langchain_core.prompts import PromptTemplate
|
2
2
|
|
3
3
|
## Personality
|
4
4
|
## --
|
@@ -666,21 +666,25 @@ As a professional analyst, your job is to extract all pertinent information from
|
|
666
666
|
You will be provided raw text directly from within the document.
|
667
667
|
Adhere to these guidelines while extracting information from the provided documents:
|
668
668
|
|
669
|
-
1. Extract all relevant text and links from the document that can assist with further research or answer the
|
669
|
+
1. Extract all relevant text and links from the document that can assist with further research or answer the target query.
|
670
670
|
2. Craft a comprehensive but compact report with all the necessary data from the document to generate an informed response.
|
671
671
|
3. Rely strictly on the provided text to generate your summary, without including external information.
|
672
672
|
4. Provide specific, important snippets from the document in your report to establish trust in your summary.
|
673
|
+
5. Verbatim quote all necessary text, code or data from the provided document to answer the target query.
|
673
674
|
""".strip()
|
674
675
|
|
675
676
|
extract_relevant_information = PromptTemplate.from_template(
|
676
677
|
"""
|
677
678
|
{personality_context}
|
678
|
-
|
679
|
+
<target_query>
|
680
|
+
{query}
|
681
|
+
</target_query>
|
679
682
|
|
680
|
-
|
683
|
+
<document>
|
681
684
|
{corpus}
|
685
|
+
</document>
|
682
686
|
|
683
|
-
Collate
|
687
|
+
Collate all relevant information from the document to answer the target query.
|
684
688
|
""".strip()
|
685
689
|
)
|
686
690
|
|
@@ -758,29 +762,32 @@ Assuming you can search the user's notes and the internet.
|
|
758
762
|
- User Name: {username}
|
759
763
|
|
760
764
|
# Available Tool AIs
|
761
|
-
|
765
|
+
You decide which of the tool AIs listed below would you use to answer the user's question. You **only** have access to the following tool AIs:
|
762
766
|
|
763
767
|
{tools}
|
764
768
|
|
765
|
-
|
766
|
-
{previous_iterations}
|
767
|
-
|
768
|
-
# Chat History:
|
769
|
-
{chat_history}
|
770
|
-
|
771
|
-
Return the next tool AI to use and the query to ask it. Your response should always be a valid JSON object. Do not say anything else.
|
769
|
+
Your response should always be a valid JSON object. Do not say anything else.
|
772
770
|
Response format:
|
773
771
|
{{"scratchpad": "<your_scratchpad_to_reason_about_which_tool_to_use>", "tool": "<name_of_tool_ai>", "query": "<your_detailed_query_for_the_tool_ai>"}}
|
774
772
|
""".strip()
|
775
773
|
)
|
776
774
|
|
775
|
+
plan_function_execution_next_tool = PromptTemplate.from_template(
|
776
|
+
"""
|
777
|
+
Given the results of your previous iterations, which tool AI will you use next to answer the target query?
|
778
|
+
|
779
|
+
# Target Query:
|
780
|
+
{query}
|
781
|
+
""".strip()
|
782
|
+
)
|
783
|
+
|
777
784
|
previous_iteration = PromptTemplate.from_template(
|
778
785
|
"""
|
779
|
-
|
786
|
+
# Iteration {index}:
|
780
787
|
- tool: {tool}
|
781
788
|
- query: {query}
|
782
789
|
- result: {result}
|
783
|
-
"""
|
790
|
+
""".strip()
|
784
791
|
)
|
785
792
|
|
786
793
|
pick_relevant_tools = PromptTemplate.from_template(
|
@@ -858,8 +865,7 @@ infer_webpages_to_read = PromptTemplate.from_template(
|
|
858
865
|
You are Khoj, an advanced web page reading assistant. You are to construct **up to {max_webpages}, valid** webpage urls to read before answering the user's question.
|
859
866
|
- You will receive the conversation history as context.
|
860
867
|
- Add as much context from the previous questions and answers as required to construct the webpage urls.
|
861
|
-
-
|
862
|
-
- You have access to the the whole internet to retrieve information.
|
868
|
+
- You have access to the whole internet to retrieve information.
|
863
869
|
{personality_context}
|
864
870
|
Which webpages will you need to read to answer the user's question?
|
865
871
|
Provide web page links as a list of strings in a JSON object.
|
@@ -4,14 +4,12 @@ import logging
|
|
4
4
|
import math
|
5
5
|
import mimetypes
|
6
6
|
import os
|
7
|
-
import queue
|
8
7
|
import re
|
9
8
|
import uuid
|
10
9
|
from dataclasses import dataclass
|
11
10
|
from datetime import datetime
|
12
11
|
from enum import Enum
|
13
12
|
from io import BytesIO
|
14
|
-
from time import perf_counter
|
15
13
|
from typing import Any, Callable, Dict, List, Optional
|
16
14
|
|
17
15
|
import PIL.Image
|
@@ -19,9 +17,10 @@ import pyjson5
|
|
19
17
|
import requests
|
20
18
|
import tiktoken
|
21
19
|
import yaml
|
22
|
-
from
|
20
|
+
from langchain_core.messages.chat import ChatMessage
|
21
|
+
from llama_cpp import LlamaTokenizer
|
23
22
|
from llama_cpp.llama import Llama
|
24
|
-
from transformers import AutoTokenizer
|
23
|
+
from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
|
25
24
|
|
26
25
|
from khoj.database.adapters import ConversationAdapters
|
27
26
|
from khoj.database.models import ChatModel, ClientApplication, KhojUser
|
@@ -52,7 +51,7 @@ except ImportError:
|
|
52
51
|
model_to_prompt_size = {
|
53
52
|
# OpenAI Models
|
54
53
|
"gpt-4o": 60000,
|
55
|
-
"gpt-4o-mini":
|
54
|
+
"gpt-4o-mini": 60000,
|
56
55
|
"gpt-4.1": 60000,
|
57
56
|
"gpt-4.1-mini": 120000,
|
58
57
|
"gpt-4.1-nano": 120000,
|
@@ -105,9 +104,9 @@ class InformationCollectionIteration:
|
|
105
104
|
|
106
105
|
|
107
106
|
def construct_iteration_history(
|
108
|
-
previous_iterations: List[InformationCollectionIteration], previous_iteration_prompt: str
|
109
|
-
) ->
|
110
|
-
previous_iterations_history =
|
107
|
+
query: str, previous_iterations: List[InformationCollectionIteration], previous_iteration_prompt: str
|
108
|
+
) -> list[dict]:
|
109
|
+
previous_iterations_history = []
|
111
110
|
for idx, iteration in enumerate(previous_iterations):
|
112
111
|
iteration_data = previous_iteration_prompt.format(
|
113
112
|
tool=iteration.tool,
|
@@ -116,8 +115,23 @@ def construct_iteration_history(
|
|
116
115
|
index=idx + 1,
|
117
116
|
)
|
118
117
|
|
119
|
-
previous_iterations_history
|
120
|
-
|
118
|
+
previous_iterations_history.append(iteration_data)
|
119
|
+
|
120
|
+
return (
|
121
|
+
[
|
122
|
+
{
|
123
|
+
"by": "you",
|
124
|
+
"message": query,
|
125
|
+
},
|
126
|
+
{
|
127
|
+
"by": "khoj",
|
128
|
+
"intent": {"type": "remember", "query": query},
|
129
|
+
"message": previous_iterations_history,
|
130
|
+
},
|
131
|
+
]
|
132
|
+
if previous_iterations_history
|
133
|
+
else []
|
134
|
+
)
|
121
135
|
|
122
136
|
|
123
137
|
def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="AI") -> str:
|
@@ -152,19 +166,35 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A
|
|
152
166
|
def construct_tool_chat_history(
|
153
167
|
previous_iterations: List[InformationCollectionIteration], tool: ConversationCommand = None
|
154
168
|
) -> Dict[str, list]:
|
169
|
+
"""
|
170
|
+
Construct chat history from previous iterations for a specific tool
|
171
|
+
|
172
|
+
If a tool is provided, only the inferred queries for that tool is added.
|
173
|
+
If no tool is provided inferred query for all tools used are added.
|
174
|
+
"""
|
155
175
|
chat_history: list = []
|
156
|
-
|
157
|
-
|
158
|
-
|
176
|
+
base_extractor: Callable[[InformationCollectionIteration], List[str]] = lambda x: []
|
177
|
+
extract_inferred_query_map: Dict[ConversationCommand, Callable[[InformationCollectionIteration], List[str]]] = {
|
178
|
+
ConversationCommand.Notes: (
|
159
179
|
lambda iteration: [c["query"] for c in iteration.context] if iteration.context else []
|
160
|
-
)
|
161
|
-
|
162
|
-
inferred_query_extractor = (
|
180
|
+
),
|
181
|
+
ConversationCommand.Online: (
|
163
182
|
lambda iteration: list(iteration.onlineContext.keys()) if iteration.onlineContext else []
|
164
|
-
)
|
165
|
-
|
166
|
-
|
183
|
+
),
|
184
|
+
ConversationCommand.Webpage: (
|
185
|
+
lambda iteration: list(iteration.onlineContext.keys()) if iteration.onlineContext else []
|
186
|
+
),
|
187
|
+
ConversationCommand.Code: (
|
188
|
+
lambda iteration: list(iteration.codeContext.keys()) if iteration.codeContext else []
|
189
|
+
),
|
190
|
+
}
|
167
191
|
for iteration in previous_iterations:
|
192
|
+
# If a tool is provided use the inferred query extractor for that tool if available
|
193
|
+
# If no tool is provided, use inferred query extractor for the tool used in the iteration
|
194
|
+
# Fallback to base extractor if the tool does not have an inferred query extractor
|
195
|
+
inferred_query_extractor = extract_inferred_query_map.get(
|
196
|
+
tool or ConversationCommand(iteration.tool), base_extractor
|
197
|
+
)
|
168
198
|
chat_history += [
|
169
199
|
{
|
170
200
|
"by": "you",
|
@@ -300,7 +330,11 @@ Khoj: "{chat_response}"
|
|
300
330
|
|
301
331
|
|
302
332
|
def construct_structured_message(
|
303
|
-
message:
|
333
|
+
message: list[str] | str,
|
334
|
+
images: list[str],
|
335
|
+
model_type: str,
|
336
|
+
vision_enabled: bool,
|
337
|
+
attached_file_context: str = None,
|
304
338
|
):
|
305
339
|
"""
|
306
340
|
Format messages into appropriate multimedia format for supported chat model types
|
@@ -310,10 +344,11 @@ def construct_structured_message(
|
|
310
344
|
ChatModel.ModelType.GOOGLE,
|
311
345
|
ChatModel.ModelType.ANTHROPIC,
|
312
346
|
]:
|
313
|
-
|
314
|
-
return message
|
347
|
+
message = [message] if isinstance(message, str) else message
|
315
348
|
|
316
|
-
constructed_messages: List[Any] = [
|
349
|
+
constructed_messages: List[dict[str, Any]] = [
|
350
|
+
{"type": "text", "text": message_part} for message_part in message
|
351
|
+
]
|
317
352
|
|
318
353
|
if not is_none_or_empty(attached_file_context):
|
319
354
|
constructed_messages.append({"type": "text", "text": attached_file_context})
|
@@ -346,7 +381,7 @@ def gather_raw_query_files(
|
|
346
381
|
|
347
382
|
def generate_chatml_messages_with_context(
|
348
383
|
user_message,
|
349
|
-
system_message=None,
|
384
|
+
system_message: str = None,
|
350
385
|
conversation_log={},
|
351
386
|
model_name="gpt-4o-mini",
|
352
387
|
loaded_model: Optional[Llama] = None,
|
@@ -409,6 +444,9 @@ def generate_chatml_messages_with_context(
|
|
409
444
|
if not is_none_or_empty(chat.get("onlineContext")):
|
410
445
|
message_context += f"{prompts.online_search_conversation.format(online_results=chat.get('onlineContext'))}"
|
411
446
|
|
447
|
+
if not is_none_or_empty(chat.get("codeContext")):
|
448
|
+
message_context += f"{prompts.code_executed_context.format(online_results=chat.get('codeContext'))}"
|
449
|
+
|
412
450
|
if not is_none_or_empty(message_context):
|
413
451
|
reconstructed_context_message = ChatMessage(content=message_context, role="user")
|
414
452
|
chatml_messages.insert(0, reconstructed_context_message)
|
@@ -441,7 +479,7 @@ def generate_chatml_messages_with_context(
|
|
441
479
|
if len(chatml_messages) >= 3 * lookback_turns:
|
442
480
|
break
|
443
481
|
|
444
|
-
messages = []
|
482
|
+
messages: list[ChatMessage] = []
|
445
483
|
|
446
484
|
if not is_none_or_empty(generated_asset_results):
|
447
485
|
messages.append(
|
@@ -478,6 +516,11 @@ def generate_chatml_messages_with_context(
|
|
478
516
|
if not is_none_or_empty(system_message):
|
479
517
|
messages.append(ChatMessage(content=system_message, role="system"))
|
480
518
|
|
519
|
+
# Normalize message content to list of chatml dictionaries
|
520
|
+
for message in messages:
|
521
|
+
if isinstance(message.content, str):
|
522
|
+
message.content = [{"type": "text", "text": message.content}]
|
523
|
+
|
481
524
|
# Truncate oldest messages from conversation history until under max supported prompt size by model
|
482
525
|
messages = truncate_messages(messages, max_prompt_size, model_name, loaded_model, tokenizer_name)
|
483
526
|
|
@@ -485,14 +528,11 @@ def generate_chatml_messages_with_context(
|
|
485
528
|
return messages[::-1]
|
486
529
|
|
487
530
|
|
488
|
-
def
|
489
|
-
messages: list[ChatMessage],
|
490
|
-
max_prompt_size: int,
|
531
|
+
def get_encoder(
|
491
532
|
model_name: str,
|
492
533
|
loaded_model: Optional[Llama] = None,
|
493
534
|
tokenizer_name=None,
|
494
|
-
) ->
|
495
|
-
"""Truncate messages to fit within max prompt size supported by model"""
|
535
|
+
) -> tiktoken.Encoding | PreTrainedTokenizer | PreTrainedTokenizerFast | LlamaTokenizer:
|
496
536
|
default_tokenizer = "gpt-4o"
|
497
537
|
|
498
538
|
try:
|
@@ -515,6 +555,48 @@ def truncate_messages(
|
|
515
555
|
logger.debug(
|
516
556
|
f"Fallback to default chat model tokenizer: {default_tokenizer}.\nConfigure tokenizer for model: {model_name} in Khoj settings to improve context stuffing."
|
517
557
|
)
|
558
|
+
return encoder
|
559
|
+
|
560
|
+
|
561
|
+
def count_tokens(
|
562
|
+
message_content: str | list[str | dict],
|
563
|
+
encoder: PreTrainedTokenizer | PreTrainedTokenizerFast | LlamaTokenizer | tiktoken.Encoding,
|
564
|
+
) -> int:
|
565
|
+
"""
|
566
|
+
Count the total number of tokens in a list of messages.
|
567
|
+
|
568
|
+
Assumes each images takes 500 tokens for approximation.
|
569
|
+
"""
|
570
|
+
if isinstance(message_content, list):
|
571
|
+
image_count = 0
|
572
|
+
message_content_parts: list[str] = []
|
573
|
+
# Collate message content into single string to ease token counting
|
574
|
+
for part in message_content:
|
575
|
+
if isinstance(part, dict) and part.get("type") == "text":
|
576
|
+
message_content_parts.append(part["text"])
|
577
|
+
elif isinstance(part, dict) and part.get("type") == "image_url":
|
578
|
+
image_count += 1
|
579
|
+
elif isinstance(part, str):
|
580
|
+
message_content_parts.append(part)
|
581
|
+
else:
|
582
|
+
logger.warning(f"Unknown message type: {part}. Skipping.")
|
583
|
+
message_content = "\n".join(message_content_parts).rstrip()
|
584
|
+
return len(encoder.encode(message_content)) + image_count * 500
|
585
|
+
elif isinstance(message_content, str):
|
586
|
+
return len(encoder.encode(message_content))
|
587
|
+
else:
|
588
|
+
return len(encoder.encode(json.dumps(message_content)))
|
589
|
+
|
590
|
+
|
591
|
+
def truncate_messages(
|
592
|
+
messages: list[ChatMessage],
|
593
|
+
max_prompt_size: int,
|
594
|
+
model_name: str,
|
595
|
+
loaded_model: Optional[Llama] = None,
|
596
|
+
tokenizer_name=None,
|
597
|
+
) -> list[ChatMessage]:
|
598
|
+
"""Truncate messages to fit within max prompt size supported by model"""
|
599
|
+
encoder = get_encoder(model_name, loaded_model, tokenizer_name)
|
518
600
|
|
519
601
|
# Extract system message from messages
|
520
602
|
system_message = None
|
@@ -523,35 +605,55 @@ def truncate_messages(
|
|
523
605
|
system_message = messages.pop(idx)
|
524
606
|
break
|
525
607
|
|
526
|
-
# TODO: Handle truncation of multi-part message.content, i.e when message.content is a list[dict] rather than a string
|
527
|
-
system_message_tokens = (
|
528
|
-
len(encoder.encode(system_message.content)) if system_message and type(system_message.content) == str else 0
|
529
|
-
)
|
530
|
-
|
531
|
-
tokens = sum([len(encoder.encode(message.content)) for message in messages if type(message.content) == str])
|
532
|
-
|
533
608
|
# Drop older messages until under max supported prompt size by model
|
534
609
|
# Reserves 4 tokens to demarcate each message (e.g <|im_start|>user, <|im_end|>, <|endoftext|> etc.)
|
535
|
-
|
536
|
-
|
537
|
-
|
610
|
+
system_message_tokens = count_tokens(system_message.content, encoder) if system_message else 0
|
611
|
+
tokens = sum([count_tokens(message.content, encoder) for message in messages])
|
612
|
+
total_tokens = tokens + system_message_tokens + 4 * len(messages)
|
613
|
+
|
614
|
+
while total_tokens > max_prompt_size and (len(messages) > 1 or len(messages[0].content) > 1):
|
615
|
+
if len(messages[-1].content) > 1:
|
616
|
+
# The oldest content part is earlier in content list. So pop from the front.
|
617
|
+
messages[-1].content.pop(0)
|
618
|
+
else:
|
619
|
+
# The oldest message is the last one. So pop from the back.
|
620
|
+
messages.pop()
|
621
|
+
tokens = sum([count_tokens(message.content, encoder) for message in messages])
|
622
|
+
total_tokens = tokens + system_message_tokens + 4 * len(messages)
|
538
623
|
|
539
624
|
# Truncate current message if still over max supported prompt size by model
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
625
|
+
total_tokens = tokens + system_message_tokens + 4 * len(messages)
|
626
|
+
if total_tokens > max_prompt_size:
|
627
|
+
# At this point, a single message with a single content part of type dict should remain
|
628
|
+
assert (
|
629
|
+
len(messages) == 1 and len(messages[0].content) == 1 and isinstance(messages[0].content[0], dict)
|
630
|
+
), "Expected a single message with a single content part remaining at this point in truncation"
|
631
|
+
|
632
|
+
# Collate message content into single string to ease truncation
|
633
|
+
part = messages[0].content[0]
|
634
|
+
message_content: str = part["text"] if part["type"] == "text" else json.dumps(part)
|
635
|
+
message_role = messages[0].role
|
636
|
+
|
637
|
+
remaining_context = "\n".join(message_content.split("\n")[:-1])
|
638
|
+
original_question = "\n" + "\n".join(message_content.split("\n")[-1:])
|
639
|
+
|
640
|
+
original_question_tokens = count_tokens(original_question, encoder)
|
545
641
|
remaining_tokens = max_prompt_size - system_message_tokens
|
546
642
|
if remaining_tokens > original_question_tokens:
|
547
643
|
remaining_tokens -= original_question_tokens
|
548
|
-
|
549
|
-
|
644
|
+
truncated_context = encoder.decode(encoder.encode(remaining_context)[:remaining_tokens]).strip()
|
645
|
+
truncated_content = truncated_context + original_question
|
550
646
|
else:
|
551
|
-
|
552
|
-
|
647
|
+
truncated_content = encoder.decode(encoder.encode(original_question)[:remaining_tokens]).strip()
|
648
|
+
messages = [ChatMessage(content=[{"type": "text", "text": truncated_content}], role=message_role)]
|
649
|
+
|
650
|
+
truncated_snippet = (
|
651
|
+
f"{truncated_content[:1000]}\n...\n{truncated_content[-1000:]}"
|
652
|
+
if len(truncated_content) > 2000
|
653
|
+
else truncated_content
|
654
|
+
)
|
553
655
|
logger.debug(
|
554
|
-
f"Truncate current message to fit within max prompt size of {max_prompt_size} supported by {model_name} model:\n {
|
656
|
+
f"Truncate current message to fit within max prompt size of {max_prompt_size} supported by {model_name} model:\n {truncated_snippet}"
|
555
657
|
)
|
556
658
|
|
557
659
|
if system_message:
|
@@ -64,11 +64,12 @@ async def search_online(
|
|
64
64
|
user: KhojUser,
|
65
65
|
send_status_func: Optional[Callable] = None,
|
66
66
|
custom_filters: List[str] = [],
|
67
|
+
max_online_searches: int = 3,
|
67
68
|
max_webpages_to_read: int = 1,
|
68
69
|
query_images: List[str] = None,
|
70
|
+
query_files: str = None,
|
69
71
|
previous_subqueries: Set = set(),
|
70
72
|
agent: Agent = None,
|
71
|
-
query_files: str = None,
|
72
73
|
tracer: dict = {},
|
73
74
|
):
|
74
75
|
query += " ".join(custom_filters)
|
@@ -84,9 +85,10 @@ async def search_online(
|
|
84
85
|
location,
|
85
86
|
user,
|
86
87
|
query_images=query_images,
|
88
|
+
query_files=query_files,
|
89
|
+
max_queries=max_online_searches,
|
87
90
|
agent=agent,
|
88
91
|
tracer=tracer,
|
89
|
-
query_files=query_files,
|
90
92
|
)
|
91
93
|
subqueries = list(new_subqueries - previous_subqueries)
|
92
94
|
response_dict: Dict[str, Dict[str, List[Dict] | Dict]] = {}
|
khoj/routers/api_chat.py
CHANGED
@@ -1129,9 +1129,10 @@ async def chat(
|
|
1129
1129
|
user,
|
1130
1130
|
partial(send_event, ChatEvent.STATUS),
|
1131
1131
|
custom_filters,
|
1132
|
+
max_online_searches=3,
|
1132
1133
|
query_images=uploaded_images,
|
1133
|
-
agent=agent,
|
1134
1134
|
query_files=attached_file_context,
|
1135
|
+
agent=agent,
|
1135
1136
|
tracer=tracer,
|
1136
1137
|
):
|
1137
1138
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
khoj/routers/helpers.py
CHANGED
@@ -523,8 +523,9 @@ async def generate_online_subqueries(
|
|
523
523
|
location_data: LocationData,
|
524
524
|
user: KhojUser,
|
525
525
|
query_images: List[str] = None,
|
526
|
-
agent: Agent = None,
|
527
526
|
query_files: str = None,
|
527
|
+
max_queries: int = 3,
|
528
|
+
agent: Agent = None,
|
528
529
|
tracer: dict = {},
|
529
530
|
) -> Set[str]:
|
530
531
|
"""
|
@@ -534,7 +535,6 @@ async def generate_online_subqueries(
|
|
534
535
|
username = prompts.user_name.format(name=user.get_full_name()) if user.get_full_name() else ""
|
535
536
|
chat_history = construct_chat_history(conversation_history)
|
536
537
|
|
537
|
-
max_queries = 3
|
538
538
|
utc_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
539
539
|
personality_context = (
|
540
540
|
prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
|
khoj/routers/research.py
CHANGED
@@ -6,7 +6,6 @@ from enum import Enum
|
|
6
6
|
from typing import Callable, Dict, List, Optional, Type
|
7
7
|
|
8
8
|
import yaml
|
9
|
-
from fastapi import Request
|
10
9
|
from pydantic import BaseModel, Field
|
11
10
|
|
12
11
|
from khoj.database.adapters import AgentAdapters, EntryAdapters
|
@@ -14,7 +13,6 @@ from khoj.database.models import Agent, KhojUser
|
|
14
13
|
from khoj.processor.conversation import prompts
|
15
14
|
from khoj.processor.conversation.utils import (
|
16
15
|
InformationCollectionIteration,
|
17
|
-
construct_chat_history,
|
18
16
|
construct_iteration_history,
|
19
17
|
construct_tool_chat_history,
|
20
18
|
load_complex_json,
|
@@ -29,9 +27,9 @@ from khoj.routers.helpers import (
|
|
29
27
|
)
|
30
28
|
from khoj.utils.helpers import (
|
31
29
|
ConversationCommand,
|
32
|
-
function_calling_description_for_llm,
|
33
30
|
is_none_or_empty,
|
34
31
|
timer,
|
32
|
+
tool_description_for_research_llm,
|
35
33
|
truncate_code_context,
|
36
34
|
)
|
37
35
|
from khoj.utils.rawconfig import LocationData
|
@@ -79,15 +77,18 @@ async def apick_next_tool(
|
|
79
77
|
query: str,
|
80
78
|
conversation_history: dict,
|
81
79
|
user: KhojUser = None,
|
82
|
-
query_images: List[str] = [],
|
83
80
|
location: LocationData = None,
|
84
81
|
user_name: str = None,
|
85
82
|
agent: Agent = None,
|
86
83
|
previous_iterations: List[InformationCollectionIteration] = [],
|
87
84
|
max_iterations: int = 5,
|
85
|
+
query_images: List[str] = [],
|
86
|
+
query_files: str = None,
|
87
|
+
max_document_searches: int = 7,
|
88
|
+
max_online_searches: int = 3,
|
89
|
+
max_webpages_to_read: int = 1,
|
88
90
|
send_status_func: Optional[Callable] = None,
|
89
91
|
tracer: dict = {},
|
90
|
-
query_files: str = None,
|
91
92
|
):
|
92
93
|
"""Given a query, determine which of the available tools the agent should use in order to answer appropriately."""
|
93
94
|
|
@@ -96,10 +97,16 @@ async def apick_next_tool(
|
|
96
97
|
tool_options_str = ""
|
97
98
|
agent_tools = agent.input_tools if agent else []
|
98
99
|
user_has_entries = await EntryAdapters.auser_has_entries(user)
|
99
|
-
for tool, description in
|
100
|
+
for tool, description in tool_description_for_research_llm.items():
|
100
101
|
# Skip showing Notes tool as an option if user has no entries
|
101
|
-
if tool == ConversationCommand.Notes
|
102
|
-
|
102
|
+
if tool == ConversationCommand.Notes:
|
103
|
+
if not user_has_entries:
|
104
|
+
continue
|
105
|
+
description = description.format(max_search_queries=max_document_searches)
|
106
|
+
if tool == ConversationCommand.Webpage:
|
107
|
+
description = description.format(max_webpages_to_read=max_webpages_to_read)
|
108
|
+
if tool == ConversationCommand.Online:
|
109
|
+
description = description.format(max_search_queries=max_online_searches)
|
103
110
|
# Add tool if agent does not have any tools defined or the tool is supported by the agent.
|
104
111
|
if len(agent_tools) == 0 or tool.value in agent_tools:
|
105
112
|
tool_options[tool.name] = tool.value
|
@@ -108,13 +115,6 @@ async def apick_next_tool(
|
|
108
115
|
# Create planning reponse model with dynamically populated tool enum class
|
109
116
|
planning_response_model = PlanningResponse.create_model_with_enum(tool_options)
|
110
117
|
|
111
|
-
# Construct chat history with user and iteration history with researcher agent for context
|
112
|
-
chat_history = construct_chat_history(conversation_history, agent_name=agent.name if agent else "Khoj")
|
113
|
-
previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
|
114
|
-
|
115
|
-
if query_images:
|
116
|
-
query = f"[placeholder for user attached images]\n{query}"
|
117
|
-
|
118
118
|
today = datetime.today()
|
119
119
|
location_data = f"{location}" if location else "Unknown"
|
120
120
|
agent_chat_model = AgentAdapters.get_agent_chat_model(agent, user) if agent else None
|
@@ -124,21 +124,30 @@ async def apick_next_tool(
|
|
124
124
|
|
125
125
|
function_planning_prompt = prompts.plan_function_execution.format(
|
126
126
|
tools=tool_options_str,
|
127
|
-
chat_history=chat_history,
|
128
127
|
personality_context=personality_context,
|
129
128
|
current_date=today.strftime("%Y-%m-%d"),
|
130
129
|
day_of_week=today.strftime("%A"),
|
131
130
|
username=user_name or "Unknown",
|
132
131
|
location=location_data,
|
133
|
-
previous_iterations=previous_iterations_history,
|
134
132
|
max_iterations=max_iterations,
|
135
133
|
)
|
136
134
|
|
135
|
+
if query_images:
|
136
|
+
query = f"[placeholder for user attached images]\n{query}"
|
137
|
+
|
138
|
+
# Construct chat history with user and iteration history with researcher agent for context
|
139
|
+
previous_iterations_history = construct_iteration_history(query, previous_iterations, prompts.previous_iteration)
|
140
|
+
iteration_chat_log = {"chat": conversation_history.get("chat", []) + previous_iterations_history}
|
141
|
+
|
142
|
+
# Plan function execution for the next tool
|
143
|
+
query = prompts.plan_function_execution_next_tool.format(query=query) if previous_iterations_history else query
|
144
|
+
|
137
145
|
try:
|
138
146
|
with timer("Chat actor: Infer information sources to refer", logger):
|
139
147
|
response = await send_message_to_model_wrapper(
|
140
148
|
query=query,
|
141
|
-
|
149
|
+
system_message=function_planning_prompt,
|
150
|
+
conversation_log=iteration_chat_log,
|
142
151
|
response_type="json_object",
|
143
152
|
response_schema=planning_response_model,
|
144
153
|
deepthought=True,
|
@@ -208,6 +217,9 @@ async def execute_information_collection(
|
|
208
217
|
query_files: str = None,
|
209
218
|
cancellation_event: Optional[asyncio.Event] = None,
|
210
219
|
):
|
220
|
+
max_document_searches = 7
|
221
|
+
max_online_searches = 3
|
222
|
+
max_webpages_to_read = 1
|
211
223
|
current_iteration = 0
|
212
224
|
MAX_ITERATIONS = int(os.getenv("KHOJ_RESEARCH_ITERATIONS", 5))
|
213
225
|
previous_iterations: List[InformationCollectionIteration] = []
|
@@ -227,15 +239,18 @@ async def execute_information_collection(
|
|
227
239
|
query,
|
228
240
|
conversation_history,
|
229
241
|
user,
|
230
|
-
query_images,
|
231
242
|
location,
|
232
243
|
user_name,
|
233
244
|
agent,
|
234
245
|
previous_iterations,
|
235
246
|
MAX_ITERATIONS,
|
236
|
-
|
237
|
-
tracer=tracer,
|
247
|
+
query_images=query_images,
|
238
248
|
query_files=query_files,
|
249
|
+
max_document_searches=max_document_searches,
|
250
|
+
max_online_searches=max_online_searches,
|
251
|
+
max_webpages_to_read=max_webpages_to_read,
|
252
|
+
send_status_func=send_status_func,
|
253
|
+
tracer=tracer,
|
239
254
|
):
|
240
255
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
241
256
|
yield result[ChatEvent.STATUS]
|
@@ -260,7 +275,7 @@ async def execute_information_collection(
|
|
260
275
|
user,
|
261
276
|
construct_tool_chat_history(previous_iterations, ConversationCommand.Notes),
|
262
277
|
this_iteration.query,
|
263
|
-
|
278
|
+
max_document_searches,
|
264
279
|
None,
|
265
280
|
conversation_id,
|
266
281
|
[ConversationCommand.Default],
|
@@ -307,6 +322,7 @@ async def execute_information_collection(
|
|
307
322
|
user,
|
308
323
|
send_status_func,
|
309
324
|
[],
|
325
|
+
max_online_searches=max_online_searches,
|
310
326
|
max_webpages_to_read=0,
|
311
327
|
query_images=query_images,
|
312
328
|
previous_subqueries=previous_subqueries,
|
@@ -332,7 +348,7 @@ async def execute_information_collection(
|
|
332
348
|
location,
|
333
349
|
user,
|
334
350
|
send_status_func,
|
335
|
-
max_webpages_to_read=
|
351
|
+
max_webpages_to_read=max_webpages_to_read,
|
336
352
|
query_images=query_images,
|
337
353
|
agent=agent,
|
338
354
|
tracer=tracer,
|
@@ -361,7 +377,7 @@ async def execute_information_collection(
|
|
361
377
|
try:
|
362
378
|
async for result in run_code(
|
363
379
|
this_iteration.query,
|
364
|
-
construct_tool_chat_history(previous_iterations, ConversationCommand.
|
380
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Code),
|
365
381
|
"",
|
366
382
|
location,
|
367
383
|
user,
|
@@ -388,7 +404,7 @@ async def execute_information_collection(
|
|
388
404
|
this_iteration.query,
|
389
405
|
user,
|
390
406
|
file_filters,
|
391
|
-
construct_tool_chat_history(previous_iterations),
|
407
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Summarize),
|
392
408
|
query_images=query_images,
|
393
409
|
agent=agent,
|
394
410
|
send_status_func=send_status_func,
|
khoj/utils/constants.py
CHANGED
@@ -52,6 +52,7 @@ model_to_cost: Dict[str, Dict[str, float]] = {
|
|
52
52
|
"gemini-1.5-pro": {"input": 1.25, "output": 5.00},
|
53
53
|
"gemini-1.5-pro-002": {"input": 1.25, "output": 5.00},
|
54
54
|
"gemini-2.0-flash": {"input": 0.10, "output": 0.40},
|
55
|
+
"gemini-2.0-flash-lite": {"input": 0.0075, "output": 0.30},
|
55
56
|
"gemini-2.5-flash-preview-04-17": {"input": 0.15, "output": 0.60, "thought": 3.50},
|
56
57
|
"gemini-2.5-pro-preview-03-25": {"input": 1.25, "output": 10.0},
|
57
58
|
# Anthropic Pricing: https://www.anthropic.com/pricing#anthropic-api
|