khoj 1.27.2.dev29__py3-none-any.whl → 1.27.2.dev130__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +34 -10
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1467-5a191c1cd5bf0b83.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1603-5d70d9dfcdcb1f10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3423-fa918f4e5365a35e.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8423-3ad0bfb299801220.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-5ae1e540bb5be8a9.js → page-2beaba7c9bb750bd.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-774ae3e033f938cd.js → page-9b5c77e0b0dd772c.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-7dc98df9c88828f0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-d887f55fe6d4f35d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/{page-4dc472cf6d674004.js → page-d46244282af16509.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/{page-9b64f61caa5bd7f9.js → page-ab2995529ece3140.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-7a8c382af2a7e870.js → page-89e6737b2cc9fb3a.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-eb9e282691858f2e.js → page-505b07bce608b34e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-2b720658ccc746f2.js → webpack-8ae5ce45161bd98e.js} +1 -1
- khoj/interface/compiled/_next/static/css/{2272c73fc7a3b571.css → 26c1c33d0423a7d8.css} +1 -1
- khoj/interface/compiled/_next/static/css/592ca99f5122e75a.css +1 -0
- khoj/interface/compiled/_next/static/css/b70402177a7c3207.css +1 -0
- khoj/interface/compiled/_next/static/css/e9c5fe555dd3050b.css +25 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/factchecker/index.html +1 -1
- khoj/interface/compiled/factchecker/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +14 -10
- khoj/processor/conversation/anthropic/utils.py +13 -2
- khoj/processor/conversation/google/gemini_chat.py +15 -11
- khoj/processor/conversation/offline/chat_model.py +10 -9
- khoj/processor/conversation/openai/gpt.py +11 -8
- khoj/processor/conversation/prompts.py +131 -22
- khoj/processor/conversation/utils.py +132 -6
- khoj/processor/tools/online_search.py +5 -3
- khoj/processor/tools/run_code.py +144 -0
- khoj/routers/api.py +6 -6
- khoj/routers/api_chat.py +156 -88
- khoj/routers/helpers.py +91 -47
- khoj/routers/research.py +321 -0
- khoj/search_filter/date_filter.py +1 -3
- khoj/search_filter/file_filter.py +1 -2
- khoj/search_type/text_search.py +3 -3
- khoj/utils/helpers.py +15 -2
- khoj/utils/yaml.py +4 -0
- {khoj-1.27.2.dev29.dist-info → khoj-1.27.2.dev130.dist-info}/METADATA +1 -1
- {khoj-1.27.2.dev29.dist-info → khoj-1.27.2.dev130.dist-info}/RECORD +63 -60
- khoj/interface/compiled/_next/static/chunks/1603-5138bb7c8035d9a6.js +0 -1
- khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3423-0b533af8bf6ac218.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9479-ff7d8c4dae2014d1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-97f5b61aaf46d364.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-d82403db2866bad8.js +0 -1
- khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +0 -1
- khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +0 -25
- khoj/interface/compiled/_next/static/css/ddcc0cf73e062476.css +0 -1
- /khoj/interface/compiled/_next/static/{atzIseFarmC7TIwq2BgHC → N19uqHAJYqRAVxvuVwHfE}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{atzIseFarmC7TIwq2BgHC → N19uqHAJYqRAVxvuVwHfE}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1970-60c96aed937a4928.js → 1970-444843bea1d17d61.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9417-2ca87207387fc790.js → 9417-19cfd1a9cb758e71.js} +0 -0
- {khoj-1.27.2.dev29.dist-info → khoj-1.27.2.dev130.dist-info}/WHEEL +0 -0
- {khoj-1.27.2.dev29.dist-info → khoj-1.27.2.dev130.dist-info}/entry_points.txt +0 -0
- {khoj-1.27.2.dev29.dist-info → khoj-1.27.2.dev130.dist-info}/licenses/LICENSE +0 -0
khoj/routers/helpers.py
CHANGED
@@ -43,6 +43,7 @@ from khoj.database.adapters import (
|
|
43
43
|
AutomationAdapters,
|
44
44
|
ConversationAdapters,
|
45
45
|
EntryAdapters,
|
46
|
+
FileObjectAdapters,
|
46
47
|
ais_user_subscribed,
|
47
48
|
create_khoj_token,
|
48
49
|
get_khoj_tokens,
|
@@ -87,9 +88,11 @@ from khoj.processor.conversation.offline.chat_model import (
|
|
87
88
|
)
|
88
89
|
from khoj.processor.conversation.openai.gpt import converse, send_message_to_model
|
89
90
|
from khoj.processor.conversation.utils import (
|
91
|
+
ChatEvent,
|
90
92
|
ThreadedGenerator,
|
93
|
+
clean_json,
|
94
|
+
construct_chat_history,
|
91
95
|
generate_chatml_messages_with_context,
|
92
|
-
remove_json_codeblock,
|
93
96
|
save_to_conversation_log,
|
94
97
|
)
|
95
98
|
from khoj.processor.speech.text_to_speech import is_eleven_labs_enabled
|
@@ -137,7 +140,7 @@ def validate_conversation_config(user: KhojUser):
|
|
137
140
|
async def is_ready_to_chat(user: KhojUser):
|
138
141
|
user_conversation_config = await ConversationAdapters.aget_user_conversation_config(user)
|
139
142
|
if user_conversation_config == None:
|
140
|
-
user_conversation_config = await ConversationAdapters.aget_default_conversation_config()
|
143
|
+
user_conversation_config = await ConversationAdapters.aget_default_conversation_config(user)
|
141
144
|
|
142
145
|
if user_conversation_config and user_conversation_config.model_type == ChatModelOptions.ModelType.OFFLINE:
|
143
146
|
chat_model = user_conversation_config.chat_model
|
@@ -210,21 +213,6 @@ def get_next_url(request: Request) -> str:
|
|
210
213
|
return urljoin(str(request.base_url).rstrip("/"), next_path)
|
211
214
|
|
212
215
|
|
213
|
-
def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="AI") -> str:
|
214
|
-
chat_history = ""
|
215
|
-
for chat in conversation_history.get("chat", [])[-n:]:
|
216
|
-
if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder", "summarize"]:
|
217
|
-
chat_history += f"User: {chat['intent']['query']}\n"
|
218
|
-
chat_history += f"{agent_name}: {chat['message']}\n"
|
219
|
-
elif chat["by"] == "khoj" and ("text-to-image" in chat["intent"].get("type")):
|
220
|
-
chat_history += f"User: {chat['intent']['query']}\n"
|
221
|
-
chat_history += f"{agent_name}: [generated image redacted for space]\n"
|
222
|
-
elif chat["by"] == "khoj" and ("excalidraw" in chat["intent"].get("type")):
|
223
|
-
chat_history += f"User: {chat['intent']['query']}\n"
|
224
|
-
chat_history += f"{agent_name}: {chat['intent']['inferred-queries'][0]}\n"
|
225
|
-
return chat_history
|
226
|
-
|
227
|
-
|
228
216
|
def get_conversation_command(query: str, any_references: bool = False) -> ConversationCommand:
|
229
217
|
if query.startswith("/notes"):
|
230
218
|
return ConversationCommand.Notes
|
@@ -244,6 +232,10 @@ def get_conversation_command(query: str, any_references: bool = False) -> Conver
|
|
244
232
|
return ConversationCommand.Summarize
|
245
233
|
elif query.startswith("/diagram"):
|
246
234
|
return ConversationCommand.Diagram
|
235
|
+
elif query.startswith("/code"):
|
236
|
+
return ConversationCommand.Code
|
237
|
+
elif query.startswith("/research"):
|
238
|
+
return ConversationCommand.Research
|
247
239
|
# If no relevant notes found for the given query
|
248
240
|
elif not any_references:
|
249
241
|
return ConversationCommand.General
|
@@ -342,8 +334,7 @@ async def aget_relevant_information_sources(
|
|
342
334
|
)
|
343
335
|
|
344
336
|
try:
|
345
|
-
response = response
|
346
|
-
response = remove_json_codeblock(response)
|
337
|
+
response = clean_json(response)
|
347
338
|
response = json.loads(response)
|
348
339
|
response = [q.strip() for q in response["source"] if q.strip()]
|
349
340
|
if not isinstance(response, list) or not response or len(response) == 0:
|
@@ -421,8 +412,7 @@ async def aget_relevant_output_modes(
|
|
421
412
|
)
|
422
413
|
|
423
414
|
try:
|
424
|
-
response = response
|
425
|
-
response = remove_json_codeblock(response)
|
415
|
+
response = clean_json(response)
|
426
416
|
response = json.loads(response)
|
427
417
|
|
428
418
|
if is_none_or_empty(response):
|
@@ -483,7 +473,7 @@ async def infer_webpage_urls(
|
|
483
473
|
|
484
474
|
# Validate that the response is a non-empty, JSON-serializable list of URLs
|
485
475
|
try:
|
486
|
-
response = response
|
476
|
+
response = clean_json(response)
|
487
477
|
urls = json.loads(response)
|
488
478
|
valid_unique_urls = {str(url).strip() for url in urls["links"] if is_valid_url(url)}
|
489
479
|
if is_none_or_empty(valid_unique_urls):
|
@@ -534,8 +524,7 @@ async def generate_online_subqueries(
|
|
534
524
|
|
535
525
|
# Validate that the response is a non-empty, JSON-serializable list
|
536
526
|
try:
|
537
|
-
response = response
|
538
|
-
response = remove_json_codeblock(response)
|
527
|
+
response = clean_json(response)
|
539
528
|
response = json.loads(response)
|
540
529
|
response = [q.strip() for q in response["queries"] if q.strip()]
|
541
530
|
if not isinstance(response, list) or not response or len(response) == 0:
|
@@ -644,6 +633,53 @@ async def extract_relevant_summary(
|
|
644
633
|
return response.strip()
|
645
634
|
|
646
635
|
|
636
|
+
async def generate_summary_from_files(
|
637
|
+
q: str,
|
638
|
+
user: KhojUser,
|
639
|
+
file_filters: List[str],
|
640
|
+
meta_log: dict,
|
641
|
+
query_images: List[str] = None,
|
642
|
+
agent: Agent = None,
|
643
|
+
send_status_func: Optional[Callable] = None,
|
644
|
+
tracer: dict = {},
|
645
|
+
):
|
646
|
+
try:
|
647
|
+
file_object = None
|
648
|
+
if await EntryAdapters.aagent_has_entries(agent):
|
649
|
+
file_names = await EntryAdapters.aget_agent_entry_filepaths(agent)
|
650
|
+
if len(file_names) > 0:
|
651
|
+
file_object = await FileObjectAdapters.async_get_file_objects_by_name(None, file_names.pop(), agent)
|
652
|
+
|
653
|
+
if len(file_filters) > 0:
|
654
|
+
file_object = await FileObjectAdapters.async_get_file_objects_by_name(user, file_filters[0])
|
655
|
+
|
656
|
+
if len(file_object) == 0:
|
657
|
+
response_log = "Sorry, I couldn't find the full text of this file."
|
658
|
+
yield response_log
|
659
|
+
return
|
660
|
+
contextual_data = " ".join([file.raw_text for file in file_object])
|
661
|
+
if not q:
|
662
|
+
q = "Create a general summary of the file"
|
663
|
+
async for result in send_status_func(f"**Constructing Summary Using:** {file_object[0].file_name}"):
|
664
|
+
yield {ChatEvent.STATUS: result}
|
665
|
+
|
666
|
+
response = await extract_relevant_summary(
|
667
|
+
q,
|
668
|
+
contextual_data,
|
669
|
+
conversation_history=meta_log,
|
670
|
+
query_images=query_images,
|
671
|
+
user=user,
|
672
|
+
agent=agent,
|
673
|
+
tracer=tracer,
|
674
|
+
)
|
675
|
+
|
676
|
+
yield str(response)
|
677
|
+
except Exception as e:
|
678
|
+
response_log = "Error summarizing file. Please try again, or contact support."
|
679
|
+
logger.error(f"Error summarizing file for {user.email}: {e}", exc_info=True)
|
680
|
+
yield result
|
681
|
+
|
682
|
+
|
647
683
|
async def generate_excalidraw_diagram(
|
648
684
|
q: str,
|
649
685
|
conversation_history: Dict[str, Any],
|
@@ -759,10 +795,9 @@ async def generate_excalidraw_diagram_from_description(
|
|
759
795
|
|
760
796
|
with timer("Chat actor: Generate excalidraw diagram", logger):
|
761
797
|
raw_response = await send_message_to_model_wrapper(
|
762
|
-
|
798
|
+
query=excalidraw_diagram_generation, user=user, tracer=tracer
|
763
799
|
)
|
764
|
-
raw_response = raw_response
|
765
|
-
raw_response = remove_json_codeblock(raw_response)
|
800
|
+
raw_response = clean_json(raw_response)
|
766
801
|
response: Dict[str, str] = json.loads(raw_response)
|
767
802
|
if not response or not isinstance(response, List) or not isinstance(response[0], Dict):
|
768
803
|
# TODO Some additional validation here that it's a valid Excalidraw diagram
|
@@ -839,11 +874,12 @@ async def generate_better_image_prompt(
|
|
839
874
|
|
840
875
|
|
841
876
|
async def send_message_to_model_wrapper(
|
842
|
-
|
877
|
+
query: str,
|
843
878
|
system_message: str = "",
|
844
879
|
response_type: str = "text",
|
845
880
|
user: KhojUser = None,
|
846
881
|
query_images: List[str] = None,
|
882
|
+
context: str = "",
|
847
883
|
tracer: dict = {},
|
848
884
|
):
|
849
885
|
conversation_config: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config(user)
|
@@ -874,7 +910,8 @@ async def send_message_to_model_wrapper(
|
|
874
910
|
|
875
911
|
loaded_model = state.offline_chat_processor_config.loaded_model
|
876
912
|
truncated_messages = generate_chatml_messages_with_context(
|
877
|
-
user_message=
|
913
|
+
user_message=query,
|
914
|
+
context_message=context,
|
878
915
|
system_message=system_message,
|
879
916
|
model_name=chat_model,
|
880
917
|
loaded_model=loaded_model,
|
@@ -899,7 +936,8 @@ async def send_message_to_model_wrapper(
|
|
899
936
|
api_key = openai_chat_config.api_key
|
900
937
|
api_base_url = openai_chat_config.api_base_url
|
901
938
|
truncated_messages = generate_chatml_messages_with_context(
|
902
|
-
user_message=
|
939
|
+
user_message=query,
|
940
|
+
context_message=context,
|
903
941
|
system_message=system_message,
|
904
942
|
model_name=chat_model,
|
905
943
|
max_prompt_size=max_tokens,
|
@@ -920,7 +958,8 @@ async def send_message_to_model_wrapper(
|
|
920
958
|
elif model_type == ChatModelOptions.ModelType.ANTHROPIC:
|
921
959
|
api_key = conversation_config.openai_config.api_key
|
922
960
|
truncated_messages = generate_chatml_messages_with_context(
|
923
|
-
user_message=
|
961
|
+
user_message=query,
|
962
|
+
context_message=context,
|
924
963
|
system_message=system_message,
|
925
964
|
model_name=chat_model,
|
926
965
|
max_prompt_size=max_tokens,
|
@@ -934,12 +973,14 @@ async def send_message_to_model_wrapper(
|
|
934
973
|
messages=truncated_messages,
|
935
974
|
api_key=api_key,
|
936
975
|
model=chat_model,
|
976
|
+
response_type=response_type,
|
937
977
|
tracer=tracer,
|
938
978
|
)
|
939
979
|
elif model_type == ChatModelOptions.ModelType.GOOGLE:
|
940
980
|
api_key = conversation_config.openai_config.api_key
|
941
981
|
truncated_messages = generate_chatml_messages_with_context(
|
942
|
-
user_message=
|
982
|
+
user_message=query,
|
983
|
+
context_message=context,
|
943
984
|
system_message=system_message,
|
944
985
|
model_name=chat_model,
|
945
986
|
max_prompt_size=max_tokens,
|
@@ -1033,6 +1074,7 @@ def send_message_to_model_wrapper_sync(
|
|
1033
1074
|
messages=truncated_messages,
|
1034
1075
|
api_key=api_key,
|
1035
1076
|
model=chat_model,
|
1077
|
+
response_type=response_type,
|
1036
1078
|
tracer=tracer,
|
1037
1079
|
)
|
1038
1080
|
|
@@ -1064,6 +1106,7 @@ def generate_chat_response(
|
|
1064
1106
|
conversation: Conversation,
|
1065
1107
|
compiled_references: List[Dict] = [],
|
1066
1108
|
online_results: Dict[str, Dict] = {},
|
1109
|
+
code_results: Dict[str, Dict] = {},
|
1067
1110
|
inferred_queries: List[str] = [],
|
1068
1111
|
conversation_commands: List[ConversationCommand] = [ConversationCommand.Default],
|
1069
1112
|
user: KhojUser = None,
|
@@ -1071,8 +1114,10 @@ def generate_chat_response(
|
|
1071
1114
|
conversation_id: str = None,
|
1072
1115
|
location_data: LocationData = None,
|
1073
1116
|
user_name: Optional[str] = None,
|
1117
|
+
meta_research: str = "",
|
1074
1118
|
query_images: Optional[List[str]] = None,
|
1075
1119
|
tracer: dict = {},
|
1120
|
+
train_of_thought: List[Any] = [],
|
1076
1121
|
) -> Tuple[Union[ThreadedGenerator, Iterator[str]], Dict[str, str]]:
|
1077
1122
|
# Initialize Variables
|
1078
1123
|
chat_response = None
|
@@ -1080,6 +1125,9 @@ def generate_chat_response(
|
|
1080
1125
|
|
1081
1126
|
metadata = {}
|
1082
1127
|
agent = AgentAdapters.get_conversation_agent_by_id(conversation.agent.id) if conversation.agent else None
|
1128
|
+
query_to_run = q
|
1129
|
+
if meta_research:
|
1130
|
+
query_to_run = f"AI Research: {meta_research} {q}"
|
1083
1131
|
try:
|
1084
1132
|
partial_completion = partial(
|
1085
1133
|
save_to_conversation_log,
|
@@ -1088,11 +1136,13 @@ def generate_chat_response(
|
|
1088
1136
|
meta_log=meta_log,
|
1089
1137
|
compiled_references=compiled_references,
|
1090
1138
|
online_results=online_results,
|
1139
|
+
code_results=code_results,
|
1091
1140
|
inferred_queries=inferred_queries,
|
1092
1141
|
client_application=client_application,
|
1093
1142
|
conversation_id=conversation_id,
|
1094
1143
|
query_images=query_images,
|
1095
1144
|
tracer=tracer,
|
1145
|
+
train_of_thought=train_of_thought,
|
1096
1146
|
)
|
1097
1147
|
|
1098
1148
|
conversation_config = ConversationAdapters.get_valid_conversation_config(user, conversation)
|
@@ -1106,9 +1156,9 @@ def generate_chat_response(
|
|
1106
1156
|
if conversation_config.model_type == "offline":
|
1107
1157
|
loaded_model = state.offline_chat_processor_config.loaded_model
|
1108
1158
|
chat_response = converse_offline(
|
1159
|
+
user_query=query_to_run,
|
1109
1160
|
references=compiled_references,
|
1110
1161
|
online_results=online_results,
|
1111
|
-
user_query=q,
|
1112
1162
|
loaded_model=loaded_model,
|
1113
1163
|
conversation_log=meta_log,
|
1114
1164
|
completion_func=partial_completion,
|
@@ -1128,9 +1178,10 @@ def generate_chat_response(
|
|
1128
1178
|
chat_model = conversation_config.chat_model
|
1129
1179
|
chat_response = converse(
|
1130
1180
|
compiled_references,
|
1131
|
-
|
1181
|
+
query_to_run,
|
1132
1182
|
query_images=query_images,
|
1133
1183
|
online_results=online_results,
|
1184
|
+
code_results=code_results,
|
1134
1185
|
conversation_log=meta_log,
|
1135
1186
|
model=chat_model,
|
1136
1187
|
api_key=api_key,
|
@@ -1150,9 +1201,10 @@ def generate_chat_response(
|
|
1150
1201
|
api_key = conversation_config.openai_config.api_key
|
1151
1202
|
chat_response = converse_anthropic(
|
1152
1203
|
compiled_references,
|
1153
|
-
|
1204
|
+
query_to_run,
|
1154
1205
|
query_images=query_images,
|
1155
1206
|
online_results=online_results,
|
1207
|
+
code_results=code_results,
|
1156
1208
|
conversation_log=meta_log,
|
1157
1209
|
model=conversation_config.chat_model,
|
1158
1210
|
api_key=api_key,
|
@@ -1170,10 +1222,10 @@ def generate_chat_response(
|
|
1170
1222
|
api_key = conversation_config.openai_config.api_key
|
1171
1223
|
chat_response = converse_gemini(
|
1172
1224
|
compiled_references,
|
1173
|
-
|
1174
|
-
|
1175
|
-
|
1176
|
-
|
1225
|
+
query_to_run,
|
1226
|
+
online_results,
|
1227
|
+
code_results,
|
1228
|
+
meta_log,
|
1177
1229
|
model=conversation_config.chat_model,
|
1178
1230
|
api_key=api_key,
|
1179
1231
|
completion_func=partial_completion,
|
@@ -1627,14 +1679,6 @@ Manage your automations [here](/automations).
|
|
1627
1679
|
""".strip()
|
1628
1680
|
|
1629
1681
|
|
1630
|
-
class ChatEvent(Enum):
|
1631
|
-
START_LLM_RESPONSE = "start_llm_response"
|
1632
|
-
END_LLM_RESPONSE = "end_llm_response"
|
1633
|
-
MESSAGE = "message"
|
1634
|
-
REFERENCES = "references"
|
1635
|
-
STATUS = "status"
|
1636
|
-
|
1637
|
-
|
1638
1682
|
class MessageProcessor:
|
1639
1683
|
def __init__(self):
|
1640
1684
|
self.references = {}
|
khoj/routers/research.py
ADDED
@@ -0,0 +1,321 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
from datetime import datetime
|
4
|
+
from typing import Any, Callable, Dict, List, Optional
|
5
|
+
|
6
|
+
import yaml
|
7
|
+
from fastapi import Request
|
8
|
+
|
9
|
+
from khoj.database.adapters import ConversationAdapters, EntryAdapters
|
10
|
+
from khoj.database.models import Agent, KhojUser
|
11
|
+
from khoj.processor.conversation import prompts
|
12
|
+
from khoj.processor.conversation.utils import (
|
13
|
+
InformationCollectionIteration,
|
14
|
+
clean_json,
|
15
|
+
construct_iteration_history,
|
16
|
+
construct_tool_chat_history,
|
17
|
+
)
|
18
|
+
from khoj.processor.tools.online_search import read_webpages, search_online
|
19
|
+
from khoj.processor.tools.run_code import run_code
|
20
|
+
from khoj.routers.api import extract_references_and_questions
|
21
|
+
from khoj.routers.helpers import (
|
22
|
+
ChatEvent,
|
23
|
+
construct_chat_history,
|
24
|
+
extract_relevant_info,
|
25
|
+
generate_summary_from_files,
|
26
|
+
send_message_to_model_wrapper,
|
27
|
+
)
|
28
|
+
from khoj.utils.helpers import (
|
29
|
+
ConversationCommand,
|
30
|
+
function_calling_description_for_llm,
|
31
|
+
is_none_or_empty,
|
32
|
+
timer,
|
33
|
+
)
|
34
|
+
from khoj.utils.rawconfig import LocationData
|
35
|
+
|
36
|
+
logger = logging.getLogger(__name__)
|
37
|
+
|
38
|
+
|
39
|
+
async def apick_next_tool(
|
40
|
+
query: str,
|
41
|
+
conversation_history: dict,
|
42
|
+
user: KhojUser = None,
|
43
|
+
query_images: List[str] = [],
|
44
|
+
location: LocationData = None,
|
45
|
+
user_name: str = None,
|
46
|
+
agent: Agent = None,
|
47
|
+
previous_iterations_history: str = None,
|
48
|
+
max_iterations: int = 5,
|
49
|
+
send_status_func: Optional[Callable] = None,
|
50
|
+
tracer: dict = {},
|
51
|
+
):
|
52
|
+
"""
|
53
|
+
Given a query, determine which of the available tools the agent should use in order to answer appropriately. One at a time, and it's able to use subsequent iterations to refine the answer.
|
54
|
+
"""
|
55
|
+
|
56
|
+
tool_options = dict()
|
57
|
+
tool_options_str = ""
|
58
|
+
|
59
|
+
agent_tools = agent.input_tools if agent else []
|
60
|
+
|
61
|
+
for tool, description in function_calling_description_for_llm.items():
|
62
|
+
tool_options[tool.value] = description
|
63
|
+
if len(agent_tools) == 0 or tool.value in agent_tools:
|
64
|
+
tool_options_str += f'- "{tool.value}": "{description}"\n'
|
65
|
+
|
66
|
+
chat_history = construct_chat_history(conversation_history, agent_name=agent.name if agent else "Khoj")
|
67
|
+
|
68
|
+
if query_images:
|
69
|
+
query = f"[placeholder for user attached images]\n{query}"
|
70
|
+
|
71
|
+
personality_context = (
|
72
|
+
prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
|
73
|
+
)
|
74
|
+
|
75
|
+
# Extract Past User Message and Inferred Questions from Conversation Log
|
76
|
+
today = datetime.today()
|
77
|
+
location_data = f"{location}" if location else "Unknown"
|
78
|
+
|
79
|
+
function_planning_prompt = prompts.plan_function_execution.format(
|
80
|
+
tools=tool_options_str,
|
81
|
+
chat_history=chat_history,
|
82
|
+
personality_context=personality_context,
|
83
|
+
current_date=today.strftime("%Y-%m-%d"),
|
84
|
+
day_of_week=today.strftime("%A"),
|
85
|
+
username=user_name or "Unknown",
|
86
|
+
location=location_data,
|
87
|
+
previous_iterations=previous_iterations_history,
|
88
|
+
max_iterations=max_iterations,
|
89
|
+
)
|
90
|
+
|
91
|
+
with timer("Chat actor: Infer information sources to refer", logger):
|
92
|
+
response = await send_message_to_model_wrapper(
|
93
|
+
query=query,
|
94
|
+
context=function_planning_prompt,
|
95
|
+
response_type="json_object",
|
96
|
+
user=user,
|
97
|
+
query_images=query_images,
|
98
|
+
tracer=tracer,
|
99
|
+
)
|
100
|
+
|
101
|
+
try:
|
102
|
+
response = clean_json(response)
|
103
|
+
response = json.loads(response)
|
104
|
+
selected_tool = response.get("tool", None)
|
105
|
+
generated_query = response.get("query", None)
|
106
|
+
scratchpad = response.get("scratchpad", None)
|
107
|
+
logger.info(f"Response for determining relevant tools: {response}")
|
108
|
+
if send_status_func:
|
109
|
+
determined_tool_message = "**Determined Tool**: "
|
110
|
+
determined_tool_message += f"{selected_tool}({generated_query})." if selected_tool else "respond."
|
111
|
+
determined_tool_message += f"\nReason: {scratchpad}" if scratchpad else ""
|
112
|
+
async for event in send_status_func(f"{scratchpad}"):
|
113
|
+
yield {ChatEvent.STATUS: event}
|
114
|
+
|
115
|
+
yield InformationCollectionIteration(
|
116
|
+
tool=selected_tool,
|
117
|
+
query=generated_query,
|
118
|
+
)
|
119
|
+
|
120
|
+
except Exception as e:
|
121
|
+
logger.error(f"Invalid response for determining relevant tools: {response}. {e}", exc_info=True)
|
122
|
+
yield InformationCollectionIteration(
|
123
|
+
tool=None,
|
124
|
+
query=None,
|
125
|
+
)
|
126
|
+
|
127
|
+
|
128
|
+
async def execute_information_collection(
|
129
|
+
request: Request,
|
130
|
+
user: KhojUser,
|
131
|
+
query: str,
|
132
|
+
conversation_id: str,
|
133
|
+
conversation_history: dict,
|
134
|
+
query_images: List[str],
|
135
|
+
agent: Agent = None,
|
136
|
+
send_status_func: Optional[Callable] = None,
|
137
|
+
user_name: str = None,
|
138
|
+
location: LocationData = None,
|
139
|
+
file_filters: List[str] = [],
|
140
|
+
tracer: dict = {},
|
141
|
+
):
|
142
|
+
current_iteration = 0
|
143
|
+
MAX_ITERATIONS = 5
|
144
|
+
previous_iterations: List[InformationCollectionIteration] = []
|
145
|
+
while current_iteration < MAX_ITERATIONS:
|
146
|
+
online_results: Dict = dict()
|
147
|
+
code_results: Dict = dict()
|
148
|
+
document_results: List[Dict[str, str]] = []
|
149
|
+
summarize_files: str = ""
|
150
|
+
this_iteration = InformationCollectionIteration(tool=None, query=query)
|
151
|
+
previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
|
152
|
+
|
153
|
+
async for result in apick_next_tool(
|
154
|
+
query,
|
155
|
+
conversation_history,
|
156
|
+
user,
|
157
|
+
query_images,
|
158
|
+
location,
|
159
|
+
user_name,
|
160
|
+
agent,
|
161
|
+
previous_iterations_history,
|
162
|
+
MAX_ITERATIONS,
|
163
|
+
send_status_func,
|
164
|
+
tracer=tracer,
|
165
|
+
):
|
166
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
167
|
+
yield result[ChatEvent.STATUS]
|
168
|
+
elif isinstance(result, InformationCollectionIteration):
|
169
|
+
this_iteration = result
|
170
|
+
|
171
|
+
if this_iteration.tool == ConversationCommand.Notes:
|
172
|
+
this_iteration.context = []
|
173
|
+
document_results = []
|
174
|
+
async for result in extract_references_and_questions(
|
175
|
+
request,
|
176
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Notes),
|
177
|
+
this_iteration.query,
|
178
|
+
7,
|
179
|
+
None,
|
180
|
+
conversation_id,
|
181
|
+
[ConversationCommand.Default],
|
182
|
+
location,
|
183
|
+
send_status_func,
|
184
|
+
query_images,
|
185
|
+
agent=agent,
|
186
|
+
tracer=tracer,
|
187
|
+
):
|
188
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
189
|
+
yield result[ChatEvent.STATUS]
|
190
|
+
elif isinstance(result, tuple):
|
191
|
+
document_results = result[0]
|
192
|
+
this_iteration.context += document_results
|
193
|
+
|
194
|
+
if not is_none_or_empty(document_results):
|
195
|
+
try:
|
196
|
+
distinct_files = {d["file"] for d in document_results}
|
197
|
+
distinct_headings = set([d["compiled"].split("\n")[0] for d in document_results if "compiled" in d])
|
198
|
+
# Strip only leading # from headings
|
199
|
+
headings_str = "\n- " + "\n- ".join(distinct_headings).replace("#", "")
|
200
|
+
async for result in send_status_func(
|
201
|
+
f"**Found {len(distinct_headings)} Notes Across {len(distinct_files)} Files**: {headings_str}"
|
202
|
+
):
|
203
|
+
yield result
|
204
|
+
except Exception as e:
|
205
|
+
logger.error(f"Error extracting document references: {e}", exc_info=True)
|
206
|
+
|
207
|
+
elif this_iteration.tool == ConversationCommand.Online:
|
208
|
+
async for result in search_online(
|
209
|
+
this_iteration.query,
|
210
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Online),
|
211
|
+
location,
|
212
|
+
user,
|
213
|
+
send_status_func,
|
214
|
+
[],
|
215
|
+
max_webpages_to_read=0,
|
216
|
+
query_images=query_images,
|
217
|
+
agent=agent,
|
218
|
+
tracer=tracer,
|
219
|
+
):
|
220
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
221
|
+
yield result[ChatEvent.STATUS]
|
222
|
+
else:
|
223
|
+
online_results: Dict[str, Dict] = result # type: ignore
|
224
|
+
this_iteration.onlineContext = online_results
|
225
|
+
|
226
|
+
elif this_iteration.tool == ConversationCommand.Webpage:
|
227
|
+
try:
|
228
|
+
async for result in read_webpages(
|
229
|
+
this_iteration.query,
|
230
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Webpage),
|
231
|
+
location,
|
232
|
+
user,
|
233
|
+
send_status_func,
|
234
|
+
query_images=query_images,
|
235
|
+
agent=agent,
|
236
|
+
tracer=tracer,
|
237
|
+
):
|
238
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
239
|
+
yield result[ChatEvent.STATUS]
|
240
|
+
else:
|
241
|
+
direct_web_pages: Dict[str, Dict] = result # type: ignore
|
242
|
+
|
243
|
+
webpages = []
|
244
|
+
for web_query in direct_web_pages:
|
245
|
+
if online_results.get(web_query):
|
246
|
+
online_results[web_query]["webpages"] = direct_web_pages[web_query]["webpages"]
|
247
|
+
else:
|
248
|
+
online_results[web_query] = {"webpages": direct_web_pages[web_query]["webpages"]}
|
249
|
+
|
250
|
+
for webpage in direct_web_pages[web_query]["webpages"]:
|
251
|
+
webpages.append(webpage["link"])
|
252
|
+
this_iteration.onlineContext = online_results
|
253
|
+
except Exception as e:
|
254
|
+
logger.error(f"Error reading webpages: {e}", exc_info=True)
|
255
|
+
|
256
|
+
elif this_iteration.tool == ConversationCommand.Code:
|
257
|
+
try:
|
258
|
+
async for result in run_code(
|
259
|
+
this_iteration.query,
|
260
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Webpage),
|
261
|
+
"",
|
262
|
+
location,
|
263
|
+
user,
|
264
|
+
send_status_func,
|
265
|
+
query_images=query_images,
|
266
|
+
agent=agent,
|
267
|
+
tracer=tracer,
|
268
|
+
):
|
269
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
270
|
+
yield result[ChatEvent.STATUS]
|
271
|
+
else:
|
272
|
+
code_results: Dict[str, Dict] = result # type: ignore
|
273
|
+
this_iteration.codeContext = code_results
|
274
|
+
async for result in send_status_func(f"**Ran code snippets**: {len(this_iteration.codeContext)}"):
|
275
|
+
yield result
|
276
|
+
except ValueError as e:
|
277
|
+
logger.warning(
|
278
|
+
f"Failed to use code tool: {e}. Attempting to respond without code results",
|
279
|
+
exc_info=True,
|
280
|
+
)
|
281
|
+
|
282
|
+
elif this_iteration.tool == ConversationCommand.Summarize:
|
283
|
+
try:
|
284
|
+
async for result in generate_summary_from_files(
|
285
|
+
this_iteration.query,
|
286
|
+
user,
|
287
|
+
file_filters,
|
288
|
+
construct_tool_chat_history(previous_iterations),
|
289
|
+
query_images=query_images,
|
290
|
+
agent=agent,
|
291
|
+
send_status_func=send_status_func,
|
292
|
+
):
|
293
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
294
|
+
yield result[ChatEvent.STATUS]
|
295
|
+
else:
|
296
|
+
summarize_files = result # type: ignore
|
297
|
+
except Exception as e:
|
298
|
+
logger.error(f"Error generating summary: {e}", exc_info=True)
|
299
|
+
|
300
|
+
else:
|
301
|
+
# No valid tools. This is our exit condition.
|
302
|
+
current_iteration = MAX_ITERATIONS
|
303
|
+
|
304
|
+
current_iteration += 1
|
305
|
+
|
306
|
+
if document_results or online_results or code_results or summarize_files:
|
307
|
+
results_data = f"**Results**:\n"
|
308
|
+
if document_results:
|
309
|
+
results_data += f"**Document References**: {yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
310
|
+
if online_results:
|
311
|
+
results_data += f"**Online Results**: {yaml.dump(online_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
312
|
+
if code_results:
|
313
|
+
results_data += f"**Code Results**: {yaml.dump(code_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
314
|
+
if summarize_files:
|
315
|
+
results_data += f"**Summarized Files**: {yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
316
|
+
|
317
|
+
# intermediate_result = await extract_relevant_info(this_iteration.query, results_data, agent)
|
318
|
+
this_iteration.summarizedResult = results_data
|
319
|
+
|
320
|
+
previous_iterations.append(this_iteration)
|
321
|
+
yield this_iteration
|
@@ -7,8 +7,6 @@ from math import inf
|
|
7
7
|
from typing import List, Tuple
|
8
8
|
|
9
9
|
import dateparser as dtparse
|
10
|
-
from dateparser.search import search_dates
|
11
|
-
from dateparser_data.settings import default_parsers
|
12
10
|
from dateutil.relativedelta import relativedelta
|
13
11
|
|
14
12
|
from khoj.search_filter.base_filter import BaseFilter
|
@@ -23,7 +21,7 @@ class DateFilter(BaseFilter):
|
|
23
21
|
# - dt>="yesterday" dt<"tomorrow"
|
24
22
|
# - dt>="last week"
|
25
23
|
# - dt:"2 years ago"
|
26
|
-
date_regex = r"dt([:><=]{1,2})[\"'](.*?)[\"']"
|
24
|
+
date_regex = r"dt([:><=]{1,2})[\"'‘’](.*?)[\"'‘’]"
|
27
25
|
|
28
26
|
def __init__(self, entry_key="compiled"):
|
29
27
|
self.entry_key = entry_key
|
@@ -1,11 +1,10 @@
|
|
1
|
-
import fnmatch
|
2
1
|
import logging
|
3
2
|
import re
|
4
3
|
from collections import defaultdict
|
5
4
|
from typing import List
|
6
5
|
|
7
6
|
from khoj.search_filter.base_filter import BaseFilter
|
8
|
-
from khoj.utils.helpers import LRU
|
7
|
+
from khoj.utils.helpers import LRU
|
9
8
|
|
10
9
|
logger = logging.getLogger(__name__)
|
11
10
|
|