khoj 1.27.2.dev29__py3-none-any.whl → 1.28.1.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +1 -1
- khoj/database/adapters/__init__.py +50 -12
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1467-b331e469fe411347.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1603-c1568f45947e9f2c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3423-ff7402ae1dd66592.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8423-e80647edf6c92c27.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-5ae1e540bb5be8a9.js → page-2beaba7c9bb750bd.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-774ae3e033f938cd.js → page-9b5c77e0b0dd772c.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-bfc70b16ba5e51b4.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-340bcf53abf6a2cc.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/{page-4dc472cf6d674004.js → page-f249666a0cbdaa0d.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/{page-9b64f61caa5bd7f9.js → page-ab2995529ece3140.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-7a8c382af2a7e870.js → page-89e6737b2cc9fb3a.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-eb9e282691858f2e.js → page-505b07bce608b34e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-2b720658ccc746f2.js → webpack-878569182b3af4c6.js} +1 -1
- khoj/interface/compiled/_next/static/css/{2272c73fc7a3b571.css → 26c1c33d0423a7d8.css} +1 -1
- khoj/interface/compiled/_next/static/css/592ca99f5122e75a.css +1 -0
- khoj/interface/compiled/_next/static/css/a795ee88875f4853.css +25 -0
- khoj/interface/compiled/_next/static/css/d738728883c68af8.css +1 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/factchecker/index.html +1 -1
- khoj/interface/compiled/factchecker/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +14 -10
- khoj/processor/conversation/anthropic/utils.py +13 -2
- khoj/processor/conversation/google/gemini_chat.py +15 -11
- khoj/processor/conversation/offline/chat_model.py +18 -10
- khoj/processor/conversation/openai/gpt.py +11 -8
- khoj/processor/conversation/openai/utils.py +7 -0
- khoj/processor/conversation/prompts.py +156 -49
- khoj/processor/conversation/utils.py +146 -13
- khoj/processor/embeddings.py +4 -4
- khoj/processor/tools/online_search.py +13 -7
- khoj/processor/tools/run_code.py +144 -0
- khoj/routers/api.py +6 -6
- khoj/routers/api_chat.py +193 -112
- khoj/routers/helpers.py +107 -48
- khoj/routers/research.py +320 -0
- khoj/search_filter/date_filter.py +1 -3
- khoj/search_filter/file_filter.py +1 -2
- khoj/search_type/text_search.py +3 -3
- khoj/utils/helpers.py +24 -2
- khoj/utils/yaml.py +4 -0
- {khoj-1.27.2.dev29.dist-info → khoj-1.28.1.dev1.dist-info}/METADATA +3 -3
- {khoj-1.27.2.dev29.dist-info → khoj-1.28.1.dev1.dist-info}/RECORD +66 -63
- khoj/interface/compiled/_next/static/chunks/1603-5138bb7c8035d9a6.js +0 -1
- khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3423-0b533af8bf6ac218.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9479-ff7d8c4dae2014d1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-97f5b61aaf46d364.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-d82403db2866bad8.js +0 -1
- khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +0 -1
- khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +0 -25
- khoj/interface/compiled/_next/static/css/ddcc0cf73e062476.css +0 -1
- /khoj/interface/compiled/_next/static/{atzIseFarmC7TIwq2BgHC → K7ZigmRDrBfpIN7jxKQsA}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{atzIseFarmC7TIwq2BgHC → K7ZigmRDrBfpIN7jxKQsA}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1970-60c96aed937a4928.js → 1970-90dd510762d820ba.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9417-2ca87207387fc790.js → 9417-951f46451a8dd6d7.js} +0 -0
- {khoj-1.27.2.dev29.dist-info → khoj-1.28.1.dev1.dist-info}/WHEEL +0 -0
- {khoj-1.27.2.dev29.dist-info → khoj-1.28.1.dev1.dist-info}/entry_points.txt +0 -0
- {khoj-1.27.2.dev29.dist-info → khoj-1.28.1.dev1.dist-info}/licenses/LICENSE +0 -0
khoj/routers/helpers.py
CHANGED
@@ -43,6 +43,7 @@ from khoj.database.adapters import (
|
|
43
43
|
AutomationAdapters,
|
44
44
|
ConversationAdapters,
|
45
45
|
EntryAdapters,
|
46
|
+
FileObjectAdapters,
|
46
47
|
ais_user_subscribed,
|
47
48
|
create_khoj_token,
|
48
49
|
get_khoj_tokens,
|
@@ -87,9 +88,11 @@ from khoj.processor.conversation.offline.chat_model import (
|
|
87
88
|
)
|
88
89
|
from khoj.processor.conversation.openai.gpt import converse, send_message_to_model
|
89
90
|
from khoj.processor.conversation.utils import (
|
91
|
+
ChatEvent,
|
90
92
|
ThreadedGenerator,
|
93
|
+
clean_json,
|
94
|
+
construct_chat_history,
|
91
95
|
generate_chatml_messages_with_context,
|
92
|
-
remove_json_codeblock,
|
93
96
|
save_to_conversation_log,
|
94
97
|
)
|
95
98
|
from khoj.processor.speech.text_to_speech import is_eleven_labs_enabled
|
@@ -137,7 +140,7 @@ def validate_conversation_config(user: KhojUser):
|
|
137
140
|
async def is_ready_to_chat(user: KhojUser):
|
138
141
|
user_conversation_config = await ConversationAdapters.aget_user_conversation_config(user)
|
139
142
|
if user_conversation_config == None:
|
140
|
-
user_conversation_config = await ConversationAdapters.aget_default_conversation_config()
|
143
|
+
user_conversation_config = await ConversationAdapters.aget_default_conversation_config(user)
|
141
144
|
|
142
145
|
if user_conversation_config and user_conversation_config.model_type == ChatModelOptions.ModelType.OFFLINE:
|
143
146
|
chat_model = user_conversation_config.chat_model
|
@@ -210,21 +213,6 @@ def get_next_url(request: Request) -> str:
|
|
210
213
|
return urljoin(str(request.base_url).rstrip("/"), next_path)
|
211
214
|
|
212
215
|
|
213
|
-
def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="AI") -> str:
|
214
|
-
chat_history = ""
|
215
|
-
for chat in conversation_history.get("chat", [])[-n:]:
|
216
|
-
if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder", "summarize"]:
|
217
|
-
chat_history += f"User: {chat['intent']['query']}\n"
|
218
|
-
chat_history += f"{agent_name}: {chat['message']}\n"
|
219
|
-
elif chat["by"] == "khoj" and ("text-to-image" in chat["intent"].get("type")):
|
220
|
-
chat_history += f"User: {chat['intent']['query']}\n"
|
221
|
-
chat_history += f"{agent_name}: [generated image redacted for space]\n"
|
222
|
-
elif chat["by"] == "khoj" and ("excalidraw" in chat["intent"].get("type")):
|
223
|
-
chat_history += f"User: {chat['intent']['query']}\n"
|
224
|
-
chat_history += f"{agent_name}: {chat['intent']['inferred-queries'][0]}\n"
|
225
|
-
return chat_history
|
226
|
-
|
227
|
-
|
228
216
|
def get_conversation_command(query: str, any_references: bool = False) -> ConversationCommand:
|
229
217
|
if query.startswith("/notes"):
|
230
218
|
return ConversationCommand.Notes
|
@@ -244,6 +232,10 @@ def get_conversation_command(query: str, any_references: bool = False) -> Conver
|
|
244
232
|
return ConversationCommand.Summarize
|
245
233
|
elif query.startswith("/diagram"):
|
246
234
|
return ConversationCommand.Diagram
|
235
|
+
elif query.startswith("/code"):
|
236
|
+
return ConversationCommand.Code
|
237
|
+
elif query.startswith("/research"):
|
238
|
+
return ConversationCommand.Research
|
247
239
|
# If no relevant notes found for the given query
|
248
240
|
elif not any_references:
|
249
241
|
return ConversationCommand.General
|
@@ -342,8 +334,7 @@ async def aget_relevant_information_sources(
|
|
342
334
|
)
|
343
335
|
|
344
336
|
try:
|
345
|
-
response = response
|
346
|
-
response = remove_json_codeblock(response)
|
337
|
+
response = clean_json(response)
|
347
338
|
response = json.loads(response)
|
348
339
|
response = [q.strip() for q in response["source"] if q.strip()]
|
349
340
|
if not isinstance(response, list) or not response or len(response) == 0:
|
@@ -421,8 +412,7 @@ async def aget_relevant_output_modes(
|
|
421
412
|
)
|
422
413
|
|
423
414
|
try:
|
424
|
-
response = response
|
425
|
-
response = remove_json_codeblock(response)
|
415
|
+
response = clean_json(response)
|
426
416
|
response = json.loads(response)
|
427
417
|
|
428
418
|
if is_none_or_empty(response):
|
@@ -483,11 +473,14 @@ async def infer_webpage_urls(
|
|
483
473
|
|
484
474
|
# Validate that the response is a non-empty, JSON-serializable list of URLs
|
485
475
|
try:
|
486
|
-
response = response
|
476
|
+
response = clean_json(response)
|
487
477
|
urls = json.loads(response)
|
488
478
|
valid_unique_urls = {str(url).strip() for url in urls["links"] if is_valid_url(url)}
|
489
479
|
if is_none_or_empty(valid_unique_urls):
|
490
480
|
raise ValueError(f"Invalid list of urls: {response}")
|
481
|
+
if len(valid_unique_urls) == 0:
|
482
|
+
logger.error(f"No valid URLs found in response: {response}")
|
483
|
+
return []
|
491
484
|
return list(valid_unique_urls)
|
492
485
|
except Exception:
|
493
486
|
raise ValueError(f"Invalid list of urls: {response}")
|
@@ -534,8 +527,7 @@ async def generate_online_subqueries(
|
|
534
527
|
|
535
528
|
# Validate that the response is a non-empty, JSON-serializable list
|
536
529
|
try:
|
537
|
-
response = response
|
538
|
-
response = remove_json_codeblock(response)
|
530
|
+
response = clean_json(response)
|
539
531
|
response = json.loads(response)
|
540
532
|
response = [q.strip() for q in response["queries"] if q.strip()]
|
541
533
|
if not isinstance(response, list) or not response or len(response) == 0:
|
@@ -644,6 +636,53 @@ async def extract_relevant_summary(
|
|
644
636
|
return response.strip()
|
645
637
|
|
646
638
|
|
639
|
+
async def generate_summary_from_files(
|
640
|
+
q: str,
|
641
|
+
user: KhojUser,
|
642
|
+
file_filters: List[str],
|
643
|
+
meta_log: dict,
|
644
|
+
query_images: List[str] = None,
|
645
|
+
agent: Agent = None,
|
646
|
+
send_status_func: Optional[Callable] = None,
|
647
|
+
tracer: dict = {},
|
648
|
+
):
|
649
|
+
try:
|
650
|
+
file_object = None
|
651
|
+
if await EntryAdapters.aagent_has_entries(agent):
|
652
|
+
file_names = await EntryAdapters.aget_agent_entry_filepaths(agent)
|
653
|
+
if len(file_names) > 0:
|
654
|
+
file_object = await FileObjectAdapters.async_get_file_objects_by_name(None, file_names.pop(), agent)
|
655
|
+
|
656
|
+
if len(file_filters) > 0:
|
657
|
+
file_object = await FileObjectAdapters.async_get_file_objects_by_name(user, file_filters[0])
|
658
|
+
|
659
|
+
if len(file_object) == 0:
|
660
|
+
response_log = "Sorry, I couldn't find the full text of this file."
|
661
|
+
yield response_log
|
662
|
+
return
|
663
|
+
contextual_data = " ".join([file.raw_text for file in file_object])
|
664
|
+
if not q:
|
665
|
+
q = "Create a general summary of the file"
|
666
|
+
async for result in send_status_func(f"**Constructing Summary Using:** {file_object[0].file_name}"):
|
667
|
+
yield {ChatEvent.STATUS: result}
|
668
|
+
|
669
|
+
response = await extract_relevant_summary(
|
670
|
+
q,
|
671
|
+
contextual_data,
|
672
|
+
conversation_history=meta_log,
|
673
|
+
query_images=query_images,
|
674
|
+
user=user,
|
675
|
+
agent=agent,
|
676
|
+
tracer=tracer,
|
677
|
+
)
|
678
|
+
|
679
|
+
yield str(response)
|
680
|
+
except Exception as e:
|
681
|
+
response_log = "Error summarizing file. Please try again, or contact support."
|
682
|
+
logger.error(f"Error summarizing file for {user.email}: {e}", exc_info=True)
|
683
|
+
yield result
|
684
|
+
|
685
|
+
|
647
686
|
async def generate_excalidraw_diagram(
|
648
687
|
q: str,
|
649
688
|
conversation_history: Dict[str, Any],
|
@@ -759,10 +798,9 @@ async def generate_excalidraw_diagram_from_description(
|
|
759
798
|
|
760
799
|
with timer("Chat actor: Generate excalidraw diagram", logger):
|
761
800
|
raw_response = await send_message_to_model_wrapper(
|
762
|
-
|
801
|
+
query=excalidraw_diagram_generation, user=user, tracer=tracer
|
763
802
|
)
|
764
|
-
raw_response = raw_response
|
765
|
-
raw_response = remove_json_codeblock(raw_response)
|
803
|
+
raw_response = clean_json(raw_response)
|
766
804
|
response: Dict[str, str] = json.loads(raw_response)
|
767
805
|
if not response or not isinstance(response, List) or not isinstance(response[0], Dict):
|
768
806
|
# TODO Some additional validation here that it's a valid Excalidraw diagram
|
@@ -839,11 +877,12 @@ async def generate_better_image_prompt(
|
|
839
877
|
|
840
878
|
|
841
879
|
async def send_message_to_model_wrapper(
|
842
|
-
|
880
|
+
query: str,
|
843
881
|
system_message: str = "",
|
844
882
|
response_type: str = "text",
|
845
883
|
user: KhojUser = None,
|
846
884
|
query_images: List[str] = None,
|
885
|
+
context: str = "",
|
847
886
|
tracer: dict = {},
|
848
887
|
):
|
849
888
|
conversation_config: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config(user)
|
@@ -874,7 +913,8 @@ async def send_message_to_model_wrapper(
|
|
874
913
|
|
875
914
|
loaded_model = state.offline_chat_processor_config.loaded_model
|
876
915
|
truncated_messages = generate_chatml_messages_with_context(
|
877
|
-
user_message=
|
916
|
+
user_message=query,
|
917
|
+
context_message=context,
|
878
918
|
system_message=system_message,
|
879
919
|
model_name=chat_model,
|
880
920
|
loaded_model=loaded_model,
|
@@ -899,7 +939,8 @@ async def send_message_to_model_wrapper(
|
|
899
939
|
api_key = openai_chat_config.api_key
|
900
940
|
api_base_url = openai_chat_config.api_base_url
|
901
941
|
truncated_messages = generate_chatml_messages_with_context(
|
902
|
-
user_message=
|
942
|
+
user_message=query,
|
943
|
+
context_message=context,
|
903
944
|
system_message=system_message,
|
904
945
|
model_name=chat_model,
|
905
946
|
max_prompt_size=max_tokens,
|
@@ -920,7 +961,8 @@ async def send_message_to_model_wrapper(
|
|
920
961
|
elif model_type == ChatModelOptions.ModelType.ANTHROPIC:
|
921
962
|
api_key = conversation_config.openai_config.api_key
|
922
963
|
truncated_messages = generate_chatml_messages_with_context(
|
923
|
-
user_message=
|
964
|
+
user_message=query,
|
965
|
+
context_message=context,
|
924
966
|
system_message=system_message,
|
925
967
|
model_name=chat_model,
|
926
968
|
max_prompt_size=max_tokens,
|
@@ -934,12 +976,14 @@ async def send_message_to_model_wrapper(
|
|
934
976
|
messages=truncated_messages,
|
935
977
|
api_key=api_key,
|
936
978
|
model=chat_model,
|
979
|
+
response_type=response_type,
|
937
980
|
tracer=tracer,
|
938
981
|
)
|
939
982
|
elif model_type == ChatModelOptions.ModelType.GOOGLE:
|
940
983
|
api_key = conversation_config.openai_config.api_key
|
941
984
|
truncated_messages = generate_chatml_messages_with_context(
|
942
|
-
user_message=
|
985
|
+
user_message=query,
|
986
|
+
context_message=context,
|
943
987
|
system_message=system_message,
|
944
988
|
model_name=chat_model,
|
945
989
|
max_prompt_size=max_tokens,
|
@@ -1033,6 +1077,7 @@ def send_message_to_model_wrapper_sync(
|
|
1033
1077
|
messages=truncated_messages,
|
1034
1078
|
api_key=api_key,
|
1035
1079
|
model=chat_model,
|
1080
|
+
response_type=response_type,
|
1036
1081
|
tracer=tracer,
|
1037
1082
|
)
|
1038
1083
|
|
@@ -1064,6 +1109,7 @@ def generate_chat_response(
|
|
1064
1109
|
conversation: Conversation,
|
1065
1110
|
compiled_references: List[Dict] = [],
|
1066
1111
|
online_results: Dict[str, Dict] = {},
|
1112
|
+
code_results: Dict[str, Dict] = {},
|
1067
1113
|
inferred_queries: List[str] = [],
|
1068
1114
|
conversation_commands: List[ConversationCommand] = [ConversationCommand.Default],
|
1069
1115
|
user: KhojUser = None,
|
@@ -1071,8 +1117,10 @@ def generate_chat_response(
|
|
1071
1117
|
conversation_id: str = None,
|
1072
1118
|
location_data: LocationData = None,
|
1073
1119
|
user_name: Optional[str] = None,
|
1120
|
+
meta_research: str = "",
|
1074
1121
|
query_images: Optional[List[str]] = None,
|
1075
1122
|
tracer: dict = {},
|
1123
|
+
train_of_thought: List[Any] = [],
|
1076
1124
|
) -> Tuple[Union[ThreadedGenerator, Iterator[str]], Dict[str, str]]:
|
1077
1125
|
# Initialize Variables
|
1078
1126
|
chat_response = None
|
@@ -1080,6 +1128,9 @@ def generate_chat_response(
|
|
1080
1128
|
|
1081
1129
|
metadata = {}
|
1082
1130
|
agent = AgentAdapters.get_conversation_agent_by_id(conversation.agent.id) if conversation.agent else None
|
1131
|
+
query_to_run = q
|
1132
|
+
if meta_research:
|
1133
|
+
query_to_run = f"AI Research: {meta_research} {q}"
|
1083
1134
|
try:
|
1084
1135
|
partial_completion = partial(
|
1085
1136
|
save_to_conversation_log,
|
@@ -1088,11 +1139,13 @@ def generate_chat_response(
|
|
1088
1139
|
meta_log=meta_log,
|
1089
1140
|
compiled_references=compiled_references,
|
1090
1141
|
online_results=online_results,
|
1142
|
+
code_results=code_results,
|
1091
1143
|
inferred_queries=inferred_queries,
|
1092
1144
|
client_application=client_application,
|
1093
1145
|
conversation_id=conversation_id,
|
1094
1146
|
query_images=query_images,
|
1095
1147
|
tracer=tracer,
|
1148
|
+
train_of_thought=train_of_thought,
|
1096
1149
|
)
|
1097
1150
|
|
1098
1151
|
conversation_config = ConversationAdapters.get_valid_conversation_config(user, conversation)
|
@@ -1106,9 +1159,9 @@ def generate_chat_response(
|
|
1106
1159
|
if conversation_config.model_type == "offline":
|
1107
1160
|
loaded_model = state.offline_chat_processor_config.loaded_model
|
1108
1161
|
chat_response = converse_offline(
|
1162
|
+
user_query=query_to_run,
|
1109
1163
|
references=compiled_references,
|
1110
1164
|
online_results=online_results,
|
1111
|
-
user_query=q,
|
1112
1165
|
loaded_model=loaded_model,
|
1113
1166
|
conversation_log=meta_log,
|
1114
1167
|
completion_func=partial_completion,
|
@@ -1128,9 +1181,10 @@ def generate_chat_response(
|
|
1128
1181
|
chat_model = conversation_config.chat_model
|
1129
1182
|
chat_response = converse(
|
1130
1183
|
compiled_references,
|
1131
|
-
|
1184
|
+
query_to_run,
|
1132
1185
|
query_images=query_images,
|
1133
1186
|
online_results=online_results,
|
1187
|
+
code_results=code_results,
|
1134
1188
|
conversation_log=meta_log,
|
1135
1189
|
model=chat_model,
|
1136
1190
|
api_key=api_key,
|
@@ -1150,9 +1204,10 @@ def generate_chat_response(
|
|
1150
1204
|
api_key = conversation_config.openai_config.api_key
|
1151
1205
|
chat_response = converse_anthropic(
|
1152
1206
|
compiled_references,
|
1153
|
-
|
1207
|
+
query_to_run,
|
1154
1208
|
query_images=query_images,
|
1155
1209
|
online_results=online_results,
|
1210
|
+
code_results=code_results,
|
1156
1211
|
conversation_log=meta_log,
|
1157
1212
|
model=conversation_config.chat_model,
|
1158
1213
|
api_key=api_key,
|
@@ -1170,10 +1225,10 @@ def generate_chat_response(
|
|
1170
1225
|
api_key = conversation_config.openai_config.api_key
|
1171
1226
|
chat_response = converse_gemini(
|
1172
1227
|
compiled_references,
|
1173
|
-
|
1174
|
-
|
1175
|
-
|
1176
|
-
|
1228
|
+
query_to_run,
|
1229
|
+
online_results,
|
1230
|
+
code_results,
|
1231
|
+
meta_log,
|
1177
1232
|
model=conversation_config.chat_model,
|
1178
1233
|
api_key=api_key,
|
1179
1234
|
completion_func=partial_completion,
|
@@ -1203,6 +1258,7 @@ class ChatRequestBody(BaseModel):
|
|
1203
1258
|
stream: Optional[bool] = False
|
1204
1259
|
title: Optional[str] = None
|
1205
1260
|
conversation_id: Optional[str] = None
|
1261
|
+
turn_id: Optional[str] = None
|
1206
1262
|
city: Optional[str] = None
|
1207
1263
|
region: Optional[str] = None
|
1208
1264
|
country: Optional[str] = None
|
@@ -1212,6 +1268,17 @@ class ChatRequestBody(BaseModel):
|
|
1212
1268
|
create_new: Optional[bool] = False
|
1213
1269
|
|
1214
1270
|
|
1271
|
+
class DeleteMessageRequestBody(BaseModel):
|
1272
|
+
conversation_id: str
|
1273
|
+
turn_id: str
|
1274
|
+
|
1275
|
+
|
1276
|
+
class FeedbackData(BaseModel):
|
1277
|
+
uquery: str
|
1278
|
+
kquery: str
|
1279
|
+
sentiment: str
|
1280
|
+
|
1281
|
+
|
1215
1282
|
class ApiUserRateLimiter:
|
1216
1283
|
def __init__(self, requests: int, subscribed_requests: int, window: int, slug: str):
|
1217
1284
|
self.requests = requests
|
@@ -1314,7 +1381,7 @@ class ConversationCommandRateLimiter:
|
|
1314
1381
|
self.slug = slug
|
1315
1382
|
self.trial_rate_limit = trial_rate_limit
|
1316
1383
|
self.subscribed_rate_limit = subscribed_rate_limit
|
1317
|
-
self.restricted_commands = [ConversationCommand.
|
1384
|
+
self.restricted_commands = [ConversationCommand.Research]
|
1318
1385
|
|
1319
1386
|
async def update_and_check_if_valid(self, request: Request, conversation_command: ConversationCommand):
|
1320
1387
|
if state.billing_enabled is False:
|
@@ -1627,14 +1694,6 @@ Manage your automations [here](/automations).
|
|
1627
1694
|
""".strip()
|
1628
1695
|
|
1629
1696
|
|
1630
|
-
class ChatEvent(Enum):
|
1631
|
-
START_LLM_RESPONSE = "start_llm_response"
|
1632
|
-
END_LLM_RESPONSE = "end_llm_response"
|
1633
|
-
MESSAGE = "message"
|
1634
|
-
REFERENCES = "references"
|
1635
|
-
STATUS = "status"
|
1636
|
-
|
1637
|
-
|
1638
1697
|
class MessageProcessor:
|
1639
1698
|
def __init__(self):
|
1640
1699
|
self.references = {}
|
khoj/routers/research.py
ADDED
@@ -0,0 +1,320 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
from datetime import datetime
|
4
|
+
from typing import Callable, Dict, List, Optional
|
5
|
+
|
6
|
+
import yaml
|
7
|
+
from fastapi import Request
|
8
|
+
|
9
|
+
from khoj.database.models import Agent, KhojUser
|
10
|
+
from khoj.processor.conversation import prompts
|
11
|
+
from khoj.processor.conversation.utils import (
|
12
|
+
InformationCollectionIteration,
|
13
|
+
clean_json,
|
14
|
+
construct_iteration_history,
|
15
|
+
construct_tool_chat_history,
|
16
|
+
)
|
17
|
+
from khoj.processor.tools.online_search import read_webpages, search_online
|
18
|
+
from khoj.processor.tools.run_code import run_code
|
19
|
+
from khoj.routers.api import extract_references_and_questions
|
20
|
+
from khoj.routers.helpers import (
|
21
|
+
ChatEvent,
|
22
|
+
construct_chat_history,
|
23
|
+
extract_relevant_info,
|
24
|
+
generate_summary_from_files,
|
25
|
+
send_message_to_model_wrapper,
|
26
|
+
)
|
27
|
+
from khoj.utils.helpers import (
|
28
|
+
ConversationCommand,
|
29
|
+
function_calling_description_for_llm,
|
30
|
+
is_none_or_empty,
|
31
|
+
timer,
|
32
|
+
)
|
33
|
+
from khoj.utils.rawconfig import LocationData
|
34
|
+
|
35
|
+
logger = logging.getLogger(__name__)
|
36
|
+
|
37
|
+
|
38
|
+
async def apick_next_tool(
|
39
|
+
query: str,
|
40
|
+
conversation_history: dict,
|
41
|
+
user: KhojUser = None,
|
42
|
+
query_images: List[str] = [],
|
43
|
+
location: LocationData = None,
|
44
|
+
user_name: str = None,
|
45
|
+
agent: Agent = None,
|
46
|
+
previous_iterations_history: str = None,
|
47
|
+
max_iterations: int = 5,
|
48
|
+
send_status_func: Optional[Callable] = None,
|
49
|
+
tracer: dict = {},
|
50
|
+
):
|
51
|
+
"""
|
52
|
+
Given a query, determine which of the available tools the agent should use in order to answer appropriately. One at a time, and it's able to use subsequent iterations to refine the answer.
|
53
|
+
"""
|
54
|
+
|
55
|
+
tool_options = dict()
|
56
|
+
tool_options_str = ""
|
57
|
+
|
58
|
+
agent_tools = agent.input_tools if agent else []
|
59
|
+
|
60
|
+
for tool, description in function_calling_description_for_llm.items():
|
61
|
+
tool_options[tool.value] = description
|
62
|
+
if len(agent_tools) == 0 or tool.value in agent_tools:
|
63
|
+
tool_options_str += f'- "{tool.value}": "{description}"\n'
|
64
|
+
|
65
|
+
chat_history = construct_chat_history(conversation_history, agent_name=agent.name if agent else "Khoj")
|
66
|
+
|
67
|
+
if query_images:
|
68
|
+
query = f"[placeholder for user attached images]\n{query}"
|
69
|
+
|
70
|
+
personality_context = (
|
71
|
+
prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
|
72
|
+
)
|
73
|
+
|
74
|
+
# Extract Past User Message and Inferred Questions from Conversation Log
|
75
|
+
today = datetime.today()
|
76
|
+
location_data = f"{location}" if location else "Unknown"
|
77
|
+
|
78
|
+
function_planning_prompt = prompts.plan_function_execution.format(
|
79
|
+
tools=tool_options_str,
|
80
|
+
chat_history=chat_history,
|
81
|
+
personality_context=personality_context,
|
82
|
+
current_date=today.strftime("%Y-%m-%d"),
|
83
|
+
day_of_week=today.strftime("%A"),
|
84
|
+
username=user_name or "Unknown",
|
85
|
+
location=location_data,
|
86
|
+
previous_iterations=previous_iterations_history,
|
87
|
+
max_iterations=max_iterations,
|
88
|
+
)
|
89
|
+
|
90
|
+
with timer("Chat actor: Infer information sources to refer", logger):
|
91
|
+
response = await send_message_to_model_wrapper(
|
92
|
+
query=query,
|
93
|
+
context=function_planning_prompt,
|
94
|
+
response_type="json_object",
|
95
|
+
user=user,
|
96
|
+
query_images=query_images,
|
97
|
+
tracer=tracer,
|
98
|
+
)
|
99
|
+
|
100
|
+
try:
|
101
|
+
response = clean_json(response)
|
102
|
+
response = json.loads(response)
|
103
|
+
selected_tool = response.get("tool", None)
|
104
|
+
generated_query = response.get("query", None)
|
105
|
+
scratchpad = response.get("scratchpad", None)
|
106
|
+
logger.info(f"Response for determining relevant tools: {response}")
|
107
|
+
if send_status_func:
|
108
|
+
determined_tool_message = "**Determined Tool**: "
|
109
|
+
determined_tool_message += f"{selected_tool}({generated_query})." if selected_tool else "respond."
|
110
|
+
determined_tool_message += f"\nReason: {scratchpad}" if scratchpad else ""
|
111
|
+
async for event in send_status_func(f"{scratchpad}"):
|
112
|
+
yield {ChatEvent.STATUS: event}
|
113
|
+
|
114
|
+
yield InformationCollectionIteration(
|
115
|
+
tool=selected_tool,
|
116
|
+
query=generated_query,
|
117
|
+
)
|
118
|
+
|
119
|
+
except Exception as e:
|
120
|
+
logger.error(f"Invalid response for determining relevant tools: {response}. {e}", exc_info=True)
|
121
|
+
yield InformationCollectionIteration(
|
122
|
+
tool=None,
|
123
|
+
query=None,
|
124
|
+
)
|
125
|
+
|
126
|
+
|
127
|
+
async def execute_information_collection(
|
128
|
+
request: Request,
|
129
|
+
user: KhojUser,
|
130
|
+
query: str,
|
131
|
+
conversation_id: str,
|
132
|
+
conversation_history: dict,
|
133
|
+
query_images: List[str],
|
134
|
+
agent: Agent = None,
|
135
|
+
send_status_func: Optional[Callable] = None,
|
136
|
+
user_name: str = None,
|
137
|
+
location: LocationData = None,
|
138
|
+
file_filters: List[str] = [],
|
139
|
+
tracer: dict = {},
|
140
|
+
):
|
141
|
+
current_iteration = 0
|
142
|
+
MAX_ITERATIONS = 5
|
143
|
+
previous_iterations: List[InformationCollectionIteration] = []
|
144
|
+
while current_iteration < MAX_ITERATIONS:
|
145
|
+
online_results: Dict = dict()
|
146
|
+
code_results: Dict = dict()
|
147
|
+
document_results: List[Dict[str, str]] = []
|
148
|
+
summarize_files: str = ""
|
149
|
+
this_iteration = InformationCollectionIteration(tool=None, query=query)
|
150
|
+
previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
|
151
|
+
|
152
|
+
async for result in apick_next_tool(
|
153
|
+
query,
|
154
|
+
conversation_history,
|
155
|
+
user,
|
156
|
+
query_images,
|
157
|
+
location,
|
158
|
+
user_name,
|
159
|
+
agent,
|
160
|
+
previous_iterations_history,
|
161
|
+
MAX_ITERATIONS,
|
162
|
+
send_status_func,
|
163
|
+
tracer=tracer,
|
164
|
+
):
|
165
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
166
|
+
yield result[ChatEvent.STATUS]
|
167
|
+
elif isinstance(result, InformationCollectionIteration):
|
168
|
+
this_iteration = result
|
169
|
+
|
170
|
+
if this_iteration.tool == ConversationCommand.Notes:
|
171
|
+
this_iteration.context = []
|
172
|
+
document_results = []
|
173
|
+
async for result in extract_references_and_questions(
|
174
|
+
request,
|
175
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Notes),
|
176
|
+
this_iteration.query,
|
177
|
+
7,
|
178
|
+
None,
|
179
|
+
conversation_id,
|
180
|
+
[ConversationCommand.Default],
|
181
|
+
location,
|
182
|
+
send_status_func,
|
183
|
+
query_images,
|
184
|
+
agent=agent,
|
185
|
+
tracer=tracer,
|
186
|
+
):
|
187
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
188
|
+
yield result[ChatEvent.STATUS]
|
189
|
+
elif isinstance(result, tuple):
|
190
|
+
document_results = result[0]
|
191
|
+
this_iteration.context += document_results
|
192
|
+
|
193
|
+
if not is_none_or_empty(document_results):
|
194
|
+
try:
|
195
|
+
distinct_files = {d["file"] for d in document_results}
|
196
|
+
distinct_headings = set([d["compiled"].split("\n")[0] for d in document_results if "compiled" in d])
|
197
|
+
# Strip only leading # from headings
|
198
|
+
headings_str = "\n- " + "\n- ".join(distinct_headings).replace("#", "")
|
199
|
+
async for result in send_status_func(
|
200
|
+
f"**Found {len(distinct_headings)} Notes Across {len(distinct_files)} Files**: {headings_str}"
|
201
|
+
):
|
202
|
+
yield result
|
203
|
+
except Exception as e:
|
204
|
+
logger.error(f"Error extracting document references: {e}", exc_info=True)
|
205
|
+
|
206
|
+
elif this_iteration.tool == ConversationCommand.Online:
|
207
|
+
async for result in search_online(
|
208
|
+
this_iteration.query,
|
209
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Online),
|
210
|
+
location,
|
211
|
+
user,
|
212
|
+
send_status_func,
|
213
|
+
[],
|
214
|
+
max_webpages_to_read=0,
|
215
|
+
query_images=query_images,
|
216
|
+
agent=agent,
|
217
|
+
tracer=tracer,
|
218
|
+
):
|
219
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
220
|
+
yield result[ChatEvent.STATUS]
|
221
|
+
else:
|
222
|
+
online_results: Dict[str, Dict] = result # type: ignore
|
223
|
+
this_iteration.onlineContext = online_results
|
224
|
+
|
225
|
+
elif this_iteration.tool == ConversationCommand.Webpage:
|
226
|
+
try:
|
227
|
+
async for result in read_webpages(
|
228
|
+
this_iteration.query,
|
229
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Webpage),
|
230
|
+
location,
|
231
|
+
user,
|
232
|
+
send_status_func,
|
233
|
+
query_images=query_images,
|
234
|
+
agent=agent,
|
235
|
+
tracer=tracer,
|
236
|
+
):
|
237
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
238
|
+
yield result[ChatEvent.STATUS]
|
239
|
+
else:
|
240
|
+
direct_web_pages: Dict[str, Dict] = result # type: ignore
|
241
|
+
|
242
|
+
webpages = []
|
243
|
+
for web_query in direct_web_pages:
|
244
|
+
if online_results.get(web_query):
|
245
|
+
online_results[web_query]["webpages"] = direct_web_pages[web_query]["webpages"]
|
246
|
+
else:
|
247
|
+
online_results[web_query] = {"webpages": direct_web_pages[web_query]["webpages"]}
|
248
|
+
|
249
|
+
for webpage in direct_web_pages[web_query]["webpages"]:
|
250
|
+
webpages.append(webpage["link"])
|
251
|
+
this_iteration.onlineContext = online_results
|
252
|
+
except Exception as e:
|
253
|
+
logger.error(f"Error reading webpages: {e}", exc_info=True)
|
254
|
+
|
255
|
+
elif this_iteration.tool == ConversationCommand.Code:
|
256
|
+
try:
|
257
|
+
async for result in run_code(
|
258
|
+
this_iteration.query,
|
259
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Webpage),
|
260
|
+
"",
|
261
|
+
location,
|
262
|
+
user,
|
263
|
+
send_status_func,
|
264
|
+
query_images=query_images,
|
265
|
+
agent=agent,
|
266
|
+
tracer=tracer,
|
267
|
+
):
|
268
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
269
|
+
yield result[ChatEvent.STATUS]
|
270
|
+
else:
|
271
|
+
code_results: Dict[str, Dict] = result # type: ignore
|
272
|
+
this_iteration.codeContext = code_results
|
273
|
+
async for result in send_status_func(f"**Ran code snippets**: {len(this_iteration.codeContext)}"):
|
274
|
+
yield result
|
275
|
+
except ValueError as e:
|
276
|
+
logger.warning(
|
277
|
+
f"Failed to use code tool: {e}. Attempting to respond without code results",
|
278
|
+
exc_info=True,
|
279
|
+
)
|
280
|
+
|
281
|
+
elif this_iteration.tool == ConversationCommand.Summarize:
|
282
|
+
try:
|
283
|
+
async for result in generate_summary_from_files(
|
284
|
+
this_iteration.query,
|
285
|
+
user,
|
286
|
+
file_filters,
|
287
|
+
construct_tool_chat_history(previous_iterations),
|
288
|
+
query_images=query_images,
|
289
|
+
agent=agent,
|
290
|
+
send_status_func=send_status_func,
|
291
|
+
):
|
292
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
293
|
+
yield result[ChatEvent.STATUS]
|
294
|
+
else:
|
295
|
+
summarize_files = result # type: ignore
|
296
|
+
except Exception as e:
|
297
|
+
logger.error(f"Error generating summary: {e}", exc_info=True)
|
298
|
+
|
299
|
+
else:
|
300
|
+
# No valid tools. This is our exit condition.
|
301
|
+
current_iteration = MAX_ITERATIONS
|
302
|
+
|
303
|
+
current_iteration += 1
|
304
|
+
|
305
|
+
if document_results or online_results or code_results or summarize_files:
|
306
|
+
results_data = f"**Results**:\n"
|
307
|
+
if document_results:
|
308
|
+
results_data += f"**Document References**:\n{yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
309
|
+
if online_results:
|
310
|
+
results_data += f"**Online Results**:\n{yaml.dump(online_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
311
|
+
if code_results:
|
312
|
+
results_data += f"**Code Results**:\n{yaml.dump(code_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
313
|
+
if summarize_files:
|
314
|
+
results_data += f"**Summarized Files**:\n{yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
315
|
+
|
316
|
+
# intermediate_result = await extract_relevant_info(this_iteration.query, results_data, agent)
|
317
|
+
this_iteration.summarizedResult = results_data
|
318
|
+
|
319
|
+
previous_iterations.append(this_iteration)
|
320
|
+
yield this_iteration
|