khoj 1.41.1.dev23__py3-none-any.whl → 1.41.1.dev34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-2e626327abfbe612.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-ceeb9a91edea74ce.js → page-c9ceb9b94e24b94a.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-e3cb78747ab98cc7.js → page-3dc59a0df3827dc7.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-d6acbba22ccac0ff.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/{page-7e780dc11eb5e5d3.js → page-2dd1b200be6be11d.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-a4053e1bb578b2ce.js → page-7a7f336908a76b8b.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-94c76c3a41db42a2.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/{page-8973da2f4c076fe1.js → page-26d4492fb1200e0e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-375136dbb400525b.js → page-bf1a4e488b29fceb.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-95998f0bdc22bb13.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-384b54fc953b18f2.js → page-585c39865f6f0c16.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-dd173f234b134d52.js → webpack-7576ea771214e321.js} +1 -1
- khoj/interface/compiled/_next/static/css/440ae0f0f650dc35.css +1 -0
- khoj/interface/compiled/_next/static/css/95b31be535d74c4e.css +1 -0
- khoj/interface/compiled/_next/static/css/bb7ea98028b368f3.css +1 -0
- khoj/interface/compiled/_next/static/css/ee66643a6a5bf71c.css +1 -0
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/content/text_to_entries.py +1 -1
- khoj/processor/conversation/anthropic/anthropic_chat.py +1 -1
- khoj/processor/conversation/anthropic/utils.py +8 -3
- khoj/processor/conversation/google/gemini_chat.py +1 -1
- khoj/processor/conversation/google/utils.py +8 -3
- khoj/processor/conversation/offline/chat_model.py +1 -1
- khoj/processor/conversation/openai/gpt.py +1 -1
- khoj/processor/conversation/openai/utils.py +3 -1
- khoj/processor/conversation/prompts.py +23 -17
- khoj/processor/conversation/utils.py +152 -50
- khoj/processor/tools/online_search.py +4 -2
- khoj/processor/tools/run_code.py +4 -2
- khoj/routers/api_chat.py +2 -1
- khoj/routers/helpers.py +2 -2
- khoj/routers/research.py +41 -25
- khoj/utils/constants.py +1 -0
- khoj/utils/helpers.py +4 -4
- khoj/utils/state.py +2 -1
- {khoj-1.41.1.dev23.dist-info → khoj-1.41.1.dev34.dist-info}/METADATA +4 -5
- {khoj-1.41.1.dev23.dist-info → khoj-1.41.1.dev34.dist-info}/RECORD +56 -56
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-4e2a134ec26aa606.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-ad4d1792ab1a4108.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +0 -1
- khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +0 -1
- khoj/interface/compiled/_next/static/css/76c658ee459140a9.css +0 -1
- khoj/interface/compiled/_next/static/css/f29752d6e1be7624.css +0 -1
- khoj/interface/compiled/_next/static/css/fca983d49c3dd1a3.css +0 -1
- /khoj/interface/compiled/_next/static/{Nynu88UDualErdYuh1DDw → OSj4ew4_YXxd8J7Kq3Czx}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{Nynu88UDualErdYuh1DDw → OSj4ew4_YXxd8J7Kq3Czx}/_ssgManifest.js +0 -0
- {khoj-1.41.1.dev23.dist-info → khoj-1.41.1.dev34.dist-info}/WHEEL +0 -0
- {khoj-1.41.1.dev23.dist-info → khoj-1.41.1.dev34.dist-info}/entry_points.txt +0 -0
- {khoj-1.41.1.dev23.dist-info → khoj-1.41.1.dev34.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
from
|
1
|
+
from langchain_core.prompts import PromptTemplate
|
2
2
|
|
3
3
|
## Personality
|
4
4
|
## --
|
@@ -666,21 +666,25 @@ As a professional analyst, your job is to extract all pertinent information from
|
|
666
666
|
You will be provided raw text directly from within the document.
|
667
667
|
Adhere to these guidelines while extracting information from the provided documents:
|
668
668
|
|
669
|
-
1. Extract all relevant text and links from the document that can assist with further research or answer the
|
669
|
+
1. Extract all relevant text and links from the document that can assist with further research or answer the target query.
|
670
670
|
2. Craft a comprehensive but compact report with all the necessary data from the document to generate an informed response.
|
671
671
|
3. Rely strictly on the provided text to generate your summary, without including external information.
|
672
672
|
4. Provide specific, important snippets from the document in your report to establish trust in your summary.
|
673
|
+
5. Verbatim quote all necessary text, code or data from the provided document to answer the target query.
|
673
674
|
""".strip()
|
674
675
|
|
675
676
|
extract_relevant_information = PromptTemplate.from_template(
|
676
677
|
"""
|
677
678
|
{personality_context}
|
678
|
-
|
679
|
+
<target_query>
|
680
|
+
{query}
|
681
|
+
</target_query>
|
679
682
|
|
680
|
-
|
683
|
+
<document>
|
681
684
|
{corpus}
|
685
|
+
</document>
|
682
686
|
|
683
|
-
Collate
|
687
|
+
Collate all relevant information from the document to answer the target query.
|
684
688
|
""".strip()
|
685
689
|
)
|
686
690
|
|
@@ -758,29 +762,32 @@ Assuming you can search the user's notes and the internet.
|
|
758
762
|
- User Name: {username}
|
759
763
|
|
760
764
|
# Available Tool AIs
|
761
|
-
|
765
|
+
You decide which of the tool AIs listed below would you use to answer the user's question. You **only** have access to the following tool AIs:
|
762
766
|
|
763
767
|
{tools}
|
764
768
|
|
765
|
-
|
766
|
-
{previous_iterations}
|
767
|
-
|
768
|
-
# Chat History:
|
769
|
-
{chat_history}
|
770
|
-
|
771
|
-
Return the next tool AI to use and the query to ask it. Your response should always be a valid JSON object. Do not say anything else.
|
769
|
+
Your response should always be a valid JSON object. Do not say anything else.
|
772
770
|
Response format:
|
773
771
|
{{"scratchpad": "<your_scratchpad_to_reason_about_which_tool_to_use>", "tool": "<name_of_tool_ai>", "query": "<your_detailed_query_for_the_tool_ai>"}}
|
774
772
|
""".strip()
|
775
773
|
)
|
776
774
|
|
775
|
+
plan_function_execution_next_tool = PromptTemplate.from_template(
|
776
|
+
"""
|
777
|
+
Given the results of your previous iterations, which tool AI will you use next to answer the target query?
|
778
|
+
|
779
|
+
# Target Query:
|
780
|
+
{query}
|
781
|
+
""".strip()
|
782
|
+
)
|
783
|
+
|
777
784
|
previous_iteration = PromptTemplate.from_template(
|
778
785
|
"""
|
779
|
-
|
786
|
+
# Iteration {index}:
|
780
787
|
- tool: {tool}
|
781
788
|
- query: {query}
|
782
789
|
- result: {result}
|
783
|
-
"""
|
790
|
+
""".strip()
|
784
791
|
)
|
785
792
|
|
786
793
|
pick_relevant_tools = PromptTemplate.from_template(
|
@@ -858,8 +865,7 @@ infer_webpages_to_read = PromptTemplate.from_template(
|
|
858
865
|
You are Khoj, an advanced web page reading assistant. You are to construct **up to {max_webpages}, valid** webpage urls to read before answering the user's question.
|
859
866
|
- You will receive the conversation history as context.
|
860
867
|
- Add as much context from the previous questions and answers as required to construct the webpage urls.
|
861
|
-
-
|
862
|
-
- You have access to the the whole internet to retrieve information.
|
868
|
+
- You have access to the whole internet to retrieve information.
|
863
869
|
{personality_context}
|
864
870
|
Which webpages will you need to read to answer the user's question?
|
865
871
|
Provide web page links as a list of strings in a JSON object.
|
@@ -4,14 +4,12 @@ import logging
|
|
4
4
|
import math
|
5
5
|
import mimetypes
|
6
6
|
import os
|
7
|
-
import queue
|
8
7
|
import re
|
9
8
|
import uuid
|
10
9
|
from dataclasses import dataclass
|
11
10
|
from datetime import datetime
|
12
11
|
from enum import Enum
|
13
12
|
from io import BytesIO
|
14
|
-
from time import perf_counter
|
15
13
|
from typing import Any, Callable, Dict, List, Optional
|
16
14
|
|
17
15
|
import PIL.Image
|
@@ -19,9 +17,10 @@ import pyjson5
|
|
19
17
|
import requests
|
20
18
|
import tiktoken
|
21
19
|
import yaml
|
22
|
-
from
|
20
|
+
from langchain_core.messages.chat import ChatMessage
|
21
|
+
from llama_cpp import LlamaTokenizer
|
23
22
|
from llama_cpp.llama import Llama
|
24
|
-
from transformers import AutoTokenizer
|
23
|
+
from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
|
25
24
|
|
26
25
|
from khoj.database.adapters import ConversationAdapters
|
27
26
|
from khoj.database.models import ChatModel, ClientApplication, KhojUser
|
@@ -52,7 +51,7 @@ except ImportError:
|
|
52
51
|
model_to_prompt_size = {
|
53
52
|
# OpenAI Models
|
54
53
|
"gpt-4o": 60000,
|
55
|
-
"gpt-4o-mini":
|
54
|
+
"gpt-4o-mini": 60000,
|
56
55
|
"gpt-4.1": 60000,
|
57
56
|
"gpt-4.1-mini": 120000,
|
58
57
|
"gpt-4.1-nano": 120000,
|
@@ -105,9 +104,9 @@ class InformationCollectionIteration:
|
|
105
104
|
|
106
105
|
|
107
106
|
def construct_iteration_history(
|
108
|
-
previous_iterations: List[InformationCollectionIteration], previous_iteration_prompt: str
|
109
|
-
) ->
|
110
|
-
previous_iterations_history =
|
107
|
+
query: str, previous_iterations: List[InformationCollectionIteration], previous_iteration_prompt: str
|
108
|
+
) -> list[dict]:
|
109
|
+
previous_iterations_history = []
|
111
110
|
for idx, iteration in enumerate(previous_iterations):
|
112
111
|
iteration_data = previous_iteration_prompt.format(
|
113
112
|
tool=iteration.tool,
|
@@ -116,8 +115,23 @@ def construct_iteration_history(
|
|
116
115
|
index=idx + 1,
|
117
116
|
)
|
118
117
|
|
119
|
-
previous_iterations_history
|
120
|
-
|
118
|
+
previous_iterations_history.append(iteration_data)
|
119
|
+
|
120
|
+
return (
|
121
|
+
[
|
122
|
+
{
|
123
|
+
"by": "you",
|
124
|
+
"message": query,
|
125
|
+
},
|
126
|
+
{
|
127
|
+
"by": "khoj",
|
128
|
+
"intent": {"type": "remember", "query": query},
|
129
|
+
"message": previous_iterations_history,
|
130
|
+
},
|
131
|
+
]
|
132
|
+
if previous_iterations_history
|
133
|
+
else []
|
134
|
+
)
|
121
135
|
|
122
136
|
|
123
137
|
def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="AI") -> str:
|
@@ -152,19 +166,35 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A
|
|
152
166
|
def construct_tool_chat_history(
|
153
167
|
previous_iterations: List[InformationCollectionIteration], tool: ConversationCommand = None
|
154
168
|
) -> Dict[str, list]:
|
169
|
+
"""
|
170
|
+
Construct chat history from previous iterations for a specific tool
|
171
|
+
|
172
|
+
If a tool is provided, only the inferred queries for that tool is added.
|
173
|
+
If no tool is provided inferred query for all tools used are added.
|
174
|
+
"""
|
155
175
|
chat_history: list = []
|
156
|
-
|
157
|
-
|
158
|
-
|
176
|
+
base_extractor: Callable[[InformationCollectionIteration], List[str]] = lambda x: []
|
177
|
+
extract_inferred_query_map: Dict[ConversationCommand, Callable[[InformationCollectionIteration], List[str]]] = {
|
178
|
+
ConversationCommand.Notes: (
|
159
179
|
lambda iteration: [c["query"] for c in iteration.context] if iteration.context else []
|
160
|
-
)
|
161
|
-
|
162
|
-
inferred_query_extractor = (
|
180
|
+
),
|
181
|
+
ConversationCommand.Online: (
|
163
182
|
lambda iteration: list(iteration.onlineContext.keys()) if iteration.onlineContext else []
|
164
|
-
)
|
165
|
-
|
166
|
-
|
183
|
+
),
|
184
|
+
ConversationCommand.Webpage: (
|
185
|
+
lambda iteration: list(iteration.onlineContext.keys()) if iteration.onlineContext else []
|
186
|
+
),
|
187
|
+
ConversationCommand.Code: (
|
188
|
+
lambda iteration: list(iteration.codeContext.keys()) if iteration.codeContext else []
|
189
|
+
),
|
190
|
+
}
|
167
191
|
for iteration in previous_iterations:
|
192
|
+
# If a tool is provided use the inferred query extractor for that tool if available
|
193
|
+
# If no tool is provided, use inferred query extractor for the tool used in the iteration
|
194
|
+
# Fallback to base extractor if the tool does not have an inferred query extractor
|
195
|
+
inferred_query_extractor = extract_inferred_query_map.get(
|
196
|
+
tool or ConversationCommand(iteration.tool), base_extractor
|
197
|
+
)
|
168
198
|
chat_history += [
|
169
199
|
{
|
170
200
|
"by": "you",
|
@@ -300,7 +330,11 @@ Khoj: "{chat_response}"
|
|
300
330
|
|
301
331
|
|
302
332
|
def construct_structured_message(
|
303
|
-
message:
|
333
|
+
message: list[str] | str,
|
334
|
+
images: list[str],
|
335
|
+
model_type: str,
|
336
|
+
vision_enabled: bool,
|
337
|
+
attached_file_context: str = None,
|
304
338
|
):
|
305
339
|
"""
|
306
340
|
Format messages into appropriate multimedia format for supported chat model types
|
@@ -310,10 +344,11 @@ def construct_structured_message(
|
|
310
344
|
ChatModel.ModelType.GOOGLE,
|
311
345
|
ChatModel.ModelType.ANTHROPIC,
|
312
346
|
]:
|
313
|
-
|
314
|
-
return message
|
347
|
+
message = [message] if isinstance(message, str) else message
|
315
348
|
|
316
|
-
constructed_messages: List[Any] = [
|
349
|
+
constructed_messages: List[dict[str, Any]] = [
|
350
|
+
{"type": "text", "text": message_part} for message_part in message
|
351
|
+
]
|
317
352
|
|
318
353
|
if not is_none_or_empty(attached_file_context):
|
319
354
|
constructed_messages.append({"type": "text", "text": attached_file_context})
|
@@ -346,7 +381,7 @@ def gather_raw_query_files(
|
|
346
381
|
|
347
382
|
def generate_chatml_messages_with_context(
|
348
383
|
user_message,
|
349
|
-
system_message=None,
|
384
|
+
system_message: str = None,
|
350
385
|
conversation_log={},
|
351
386
|
model_name="gpt-4o-mini",
|
352
387
|
loaded_model: Optional[Llama] = None,
|
@@ -409,6 +444,9 @@ def generate_chatml_messages_with_context(
|
|
409
444
|
if not is_none_or_empty(chat.get("onlineContext")):
|
410
445
|
message_context += f"{prompts.online_search_conversation.format(online_results=chat.get('onlineContext'))}"
|
411
446
|
|
447
|
+
if not is_none_or_empty(chat.get("codeContext")):
|
448
|
+
message_context += f"{prompts.code_executed_context.format(online_results=chat.get('codeContext'))}"
|
449
|
+
|
412
450
|
if not is_none_or_empty(message_context):
|
413
451
|
reconstructed_context_message = ChatMessage(content=message_context, role="user")
|
414
452
|
chatml_messages.insert(0, reconstructed_context_message)
|
@@ -441,7 +479,7 @@ def generate_chatml_messages_with_context(
|
|
441
479
|
if len(chatml_messages) >= 3 * lookback_turns:
|
442
480
|
break
|
443
481
|
|
444
|
-
messages = []
|
482
|
+
messages: list[ChatMessage] = []
|
445
483
|
|
446
484
|
if not is_none_or_empty(generated_asset_results):
|
447
485
|
messages.append(
|
@@ -478,6 +516,11 @@ def generate_chatml_messages_with_context(
|
|
478
516
|
if not is_none_or_empty(system_message):
|
479
517
|
messages.append(ChatMessage(content=system_message, role="system"))
|
480
518
|
|
519
|
+
# Normalize message content to list of chatml dictionaries
|
520
|
+
for message in messages:
|
521
|
+
if isinstance(message.content, str):
|
522
|
+
message.content = [{"type": "text", "text": message.content}]
|
523
|
+
|
481
524
|
# Truncate oldest messages from conversation history until under max supported prompt size by model
|
482
525
|
messages = truncate_messages(messages, max_prompt_size, model_name, loaded_model, tokenizer_name)
|
483
526
|
|
@@ -485,14 +528,11 @@ def generate_chatml_messages_with_context(
|
|
485
528
|
return messages[::-1]
|
486
529
|
|
487
530
|
|
488
|
-
def
|
489
|
-
messages: list[ChatMessage],
|
490
|
-
max_prompt_size: int,
|
531
|
+
def get_encoder(
|
491
532
|
model_name: str,
|
492
533
|
loaded_model: Optional[Llama] = None,
|
493
534
|
tokenizer_name=None,
|
494
|
-
) ->
|
495
|
-
"""Truncate messages to fit within max prompt size supported by model"""
|
535
|
+
) -> tiktoken.Encoding | PreTrainedTokenizer | PreTrainedTokenizerFast | LlamaTokenizer:
|
496
536
|
default_tokenizer = "gpt-4o"
|
497
537
|
|
498
538
|
try:
|
@@ -515,6 +555,48 @@ def truncate_messages(
|
|
515
555
|
logger.debug(
|
516
556
|
f"Fallback to default chat model tokenizer: {default_tokenizer}.\nConfigure tokenizer for model: {model_name} in Khoj settings to improve context stuffing."
|
517
557
|
)
|
558
|
+
return encoder
|
559
|
+
|
560
|
+
|
561
|
+
def count_tokens(
|
562
|
+
message_content: str | list[str | dict],
|
563
|
+
encoder: PreTrainedTokenizer | PreTrainedTokenizerFast | LlamaTokenizer | tiktoken.Encoding,
|
564
|
+
) -> int:
|
565
|
+
"""
|
566
|
+
Count the total number of tokens in a list of messages.
|
567
|
+
|
568
|
+
Assumes each images takes 500 tokens for approximation.
|
569
|
+
"""
|
570
|
+
if isinstance(message_content, list):
|
571
|
+
image_count = 0
|
572
|
+
message_content_parts: list[str] = []
|
573
|
+
# Collate message content into single string to ease token counting
|
574
|
+
for part in message_content:
|
575
|
+
if isinstance(part, dict) and part.get("type") == "text":
|
576
|
+
message_content_parts.append(part["text"])
|
577
|
+
elif isinstance(part, dict) and part.get("type") == "image_url":
|
578
|
+
image_count += 1
|
579
|
+
elif isinstance(part, str):
|
580
|
+
message_content_parts.append(part)
|
581
|
+
else:
|
582
|
+
logger.warning(f"Unknown message type: {part}. Skipping.")
|
583
|
+
message_content = "\n".join(message_content_parts).rstrip()
|
584
|
+
return len(encoder.encode(message_content)) + image_count * 500
|
585
|
+
elif isinstance(message_content, str):
|
586
|
+
return len(encoder.encode(message_content))
|
587
|
+
else:
|
588
|
+
return len(encoder.encode(json.dumps(message_content)))
|
589
|
+
|
590
|
+
|
591
|
+
def truncate_messages(
|
592
|
+
messages: list[ChatMessage],
|
593
|
+
max_prompt_size: int,
|
594
|
+
model_name: str,
|
595
|
+
loaded_model: Optional[Llama] = None,
|
596
|
+
tokenizer_name=None,
|
597
|
+
) -> list[ChatMessage]:
|
598
|
+
"""Truncate messages to fit within max prompt size supported by model"""
|
599
|
+
encoder = get_encoder(model_name, loaded_model, tokenizer_name)
|
518
600
|
|
519
601
|
# Extract system message from messages
|
520
602
|
system_message = None
|
@@ -523,35 +605,55 @@ def truncate_messages(
|
|
523
605
|
system_message = messages.pop(idx)
|
524
606
|
break
|
525
607
|
|
526
|
-
# TODO: Handle truncation of multi-part message.content, i.e when message.content is a list[dict] rather than a string
|
527
|
-
system_message_tokens = (
|
528
|
-
len(encoder.encode(system_message.content)) if system_message and type(system_message.content) == str else 0
|
529
|
-
)
|
530
|
-
|
531
|
-
tokens = sum([len(encoder.encode(message.content)) for message in messages if type(message.content) == str])
|
532
|
-
|
533
608
|
# Drop older messages until under max supported prompt size by model
|
534
609
|
# Reserves 4 tokens to demarcate each message (e.g <|im_start|>user, <|im_end|>, <|endoftext|> etc.)
|
535
|
-
|
536
|
-
|
537
|
-
|
610
|
+
system_message_tokens = count_tokens(system_message.content, encoder) if system_message else 0
|
611
|
+
tokens = sum([count_tokens(message.content, encoder) for message in messages])
|
612
|
+
total_tokens = tokens + system_message_tokens + 4 * len(messages)
|
613
|
+
|
614
|
+
while total_tokens > max_prompt_size and (len(messages) > 1 or len(messages[0].content) > 1):
|
615
|
+
if len(messages[-1].content) > 1:
|
616
|
+
# The oldest content part is earlier in content list. So pop from the front.
|
617
|
+
messages[-1].content.pop(0)
|
618
|
+
else:
|
619
|
+
# The oldest message is the last one. So pop from the back.
|
620
|
+
messages.pop()
|
621
|
+
tokens = sum([count_tokens(message.content, encoder) for message in messages])
|
622
|
+
total_tokens = tokens + system_message_tokens + 4 * len(messages)
|
538
623
|
|
539
624
|
# Truncate current message if still over max supported prompt size by model
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
625
|
+
total_tokens = tokens + system_message_tokens + 4 * len(messages)
|
626
|
+
if total_tokens > max_prompt_size:
|
627
|
+
# At this point, a single message with a single content part of type dict should remain
|
628
|
+
assert (
|
629
|
+
len(messages) == 1 and len(messages[0].content) == 1 and isinstance(messages[0].content[0], dict)
|
630
|
+
), "Expected a single message with a single content part remaining at this point in truncation"
|
631
|
+
|
632
|
+
# Collate message content into single string to ease truncation
|
633
|
+
part = messages[0].content[0]
|
634
|
+
message_content: str = part["text"] if part["type"] == "text" else json.dumps(part)
|
635
|
+
message_role = messages[0].role
|
636
|
+
|
637
|
+
remaining_context = "\n".join(message_content.split("\n")[:-1])
|
638
|
+
original_question = "\n" + "\n".join(message_content.split("\n")[-1:])
|
639
|
+
|
640
|
+
original_question_tokens = count_tokens(original_question, encoder)
|
545
641
|
remaining_tokens = max_prompt_size - system_message_tokens
|
546
642
|
if remaining_tokens > original_question_tokens:
|
547
643
|
remaining_tokens -= original_question_tokens
|
548
|
-
|
549
|
-
|
644
|
+
truncated_context = encoder.decode(encoder.encode(remaining_context)[:remaining_tokens]).strip()
|
645
|
+
truncated_content = truncated_context + original_question
|
550
646
|
else:
|
551
|
-
|
552
|
-
|
647
|
+
truncated_content = encoder.decode(encoder.encode(original_question)[:remaining_tokens]).strip()
|
648
|
+
messages = [ChatMessage(content=[{"type": "text", "text": truncated_content}], role=message_role)]
|
649
|
+
|
650
|
+
truncated_snippet = (
|
651
|
+
f"{truncated_content[:1000]}\n...\n{truncated_content[-1000:]}"
|
652
|
+
if len(truncated_content) > 2000
|
653
|
+
else truncated_content
|
654
|
+
)
|
553
655
|
logger.debug(
|
554
|
-
f"Truncate current message to fit within max prompt size of {max_prompt_size} supported by {model_name} model:\n {
|
656
|
+
f"Truncate current message to fit within max prompt size of {max_prompt_size} supported by {model_name} model:\n {truncated_snippet}"
|
555
657
|
)
|
556
658
|
|
557
659
|
if system_message:
|
@@ -64,11 +64,12 @@ async def search_online(
|
|
64
64
|
user: KhojUser,
|
65
65
|
send_status_func: Optional[Callable] = None,
|
66
66
|
custom_filters: List[str] = [],
|
67
|
+
max_online_searches: int = 3,
|
67
68
|
max_webpages_to_read: int = 1,
|
68
69
|
query_images: List[str] = None,
|
70
|
+
query_files: str = None,
|
69
71
|
previous_subqueries: Set = set(),
|
70
72
|
agent: Agent = None,
|
71
|
-
query_files: str = None,
|
72
73
|
tracer: dict = {},
|
73
74
|
):
|
74
75
|
query += " ".join(custom_filters)
|
@@ -84,9 +85,10 @@ async def search_online(
|
|
84
85
|
location,
|
85
86
|
user,
|
86
87
|
query_images=query_images,
|
88
|
+
query_files=query_files,
|
89
|
+
max_queries=max_online_searches,
|
87
90
|
agent=agent,
|
88
91
|
tracer=tracer,
|
89
|
-
query_files=query_files,
|
90
92
|
)
|
91
93
|
subqueries = list(new_subqueries - previous_subqueries)
|
92
94
|
response_dict: Dict[str, Dict[str, List[Dict] | Dict]] = {}
|
khoj/processor/tools/run_code.py
CHANGED
@@ -9,8 +9,8 @@ from pathlib import Path
|
|
9
9
|
from typing import Any, Callable, List, NamedTuple, Optional
|
10
10
|
|
11
11
|
import aiohttp
|
12
|
+
import httpx
|
12
13
|
from asgiref.sync import sync_to_async
|
13
|
-
from httpx import RemoteProtocolError
|
14
14
|
from tenacity import (
|
15
15
|
before_sleep_log,
|
16
16
|
retry,
|
@@ -192,7 +192,9 @@ async def generate_python_code(
|
|
192
192
|
| retry_if_exception_type(aiohttp.ClientTimeout)
|
193
193
|
| retry_if_exception_type(asyncio.TimeoutError)
|
194
194
|
| retry_if_exception_type(ConnectionError)
|
195
|
-
| retry_if_exception_type(RemoteProtocolError)
|
195
|
+
| retry_if_exception_type(httpx.RemoteProtocolError)
|
196
|
+
| retry_if_exception_type(httpx.NetworkError)
|
197
|
+
| retry_if_exception_type(httpx.TimeoutException)
|
196
198
|
),
|
197
199
|
wait=wait_random_exponential(min=1, max=5),
|
198
200
|
stop=stop_after_attempt(3),
|
khoj/routers/api_chat.py
CHANGED
@@ -1129,9 +1129,10 @@ async def chat(
|
|
1129
1129
|
user,
|
1130
1130
|
partial(send_event, ChatEvent.STATUS),
|
1131
1131
|
custom_filters,
|
1132
|
+
max_online_searches=3,
|
1132
1133
|
query_images=uploaded_images,
|
1133
|
-
agent=agent,
|
1134
1134
|
query_files=attached_file_context,
|
1135
|
+
agent=agent,
|
1135
1136
|
tracer=tracer,
|
1136
1137
|
):
|
1137
1138
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
khoj/routers/helpers.py
CHANGED
@@ -523,8 +523,9 @@ async def generate_online_subqueries(
|
|
523
523
|
location_data: LocationData,
|
524
524
|
user: KhojUser,
|
525
525
|
query_images: List[str] = None,
|
526
|
-
agent: Agent = None,
|
527
526
|
query_files: str = None,
|
527
|
+
max_queries: int = 3,
|
528
|
+
agent: Agent = None,
|
528
529
|
tracer: dict = {},
|
529
530
|
) -> Set[str]:
|
530
531
|
"""
|
@@ -534,7 +535,6 @@ async def generate_online_subqueries(
|
|
534
535
|
username = prompts.user_name.format(name=user.get_full_name()) if user.get_full_name() else ""
|
535
536
|
chat_history = construct_chat_history(conversation_history)
|
536
537
|
|
537
|
-
max_queries = 3
|
538
538
|
utc_date = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
539
539
|
personality_context = (
|
540
540
|
prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
|