khoj 1.27.2.dev29__py3-none-any.whl → 1.27.2.dev130__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. khoj/database/adapters/__init__.py +34 -10
  2. khoj/interface/compiled/404/index.html +1 -1
  3. khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +1 -0
  4. khoj/interface/compiled/_next/static/chunks/1467-5a191c1cd5bf0b83.js +1 -0
  5. khoj/interface/compiled/_next/static/chunks/1603-5d70d9dfcdcb1f10.js +1 -0
  6. khoj/interface/compiled/_next/static/chunks/3423-fa918f4e5365a35e.js +1 -0
  7. khoj/interface/compiled/_next/static/chunks/8423-3ad0bfb299801220.js +1 -0
  8. khoj/interface/compiled/_next/static/chunks/app/agents/{page-5ae1e540bb5be8a9.js → page-2beaba7c9bb750bd.js} +1 -1
  9. khoj/interface/compiled/_next/static/chunks/app/automations/{page-774ae3e033f938cd.js → page-9b5c77e0b0dd772c.js} +1 -1
  10. khoj/interface/compiled/_next/static/chunks/app/chat/page-7dc98df9c88828f0.js +1 -0
  11. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-d887f55fe6d4f35d.js +1 -0
  12. khoj/interface/compiled/_next/static/chunks/app/{page-4dc472cf6d674004.js → page-d46244282af16509.js} +1 -1
  13. khoj/interface/compiled/_next/static/chunks/app/search/{page-9b64f61caa5bd7f9.js → page-ab2995529ece3140.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/app/settings/{page-7a8c382af2a7e870.js → page-89e6737b2cc9fb3a.js} +1 -1
  15. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-eb9e282691858f2e.js → page-505b07bce608b34e.js} +1 -1
  16. khoj/interface/compiled/_next/static/chunks/{webpack-2b720658ccc746f2.js → webpack-8ae5ce45161bd98e.js} +1 -1
  17. khoj/interface/compiled/_next/static/css/{2272c73fc7a3b571.css → 26c1c33d0423a7d8.css} +1 -1
  18. khoj/interface/compiled/_next/static/css/592ca99f5122e75a.css +1 -0
  19. khoj/interface/compiled/_next/static/css/b70402177a7c3207.css +1 -0
  20. khoj/interface/compiled/_next/static/css/e9c5fe555dd3050b.css +25 -0
  21. khoj/interface/compiled/agents/index.html +1 -1
  22. khoj/interface/compiled/agents/index.txt +2 -2
  23. khoj/interface/compiled/automations/index.html +1 -1
  24. khoj/interface/compiled/automations/index.txt +2 -2
  25. khoj/interface/compiled/chat/index.html +1 -1
  26. khoj/interface/compiled/chat/index.txt +2 -2
  27. khoj/interface/compiled/factchecker/index.html +1 -1
  28. khoj/interface/compiled/factchecker/index.txt +2 -2
  29. khoj/interface/compiled/index.html +1 -1
  30. khoj/interface/compiled/index.txt +2 -2
  31. khoj/interface/compiled/search/index.html +1 -1
  32. khoj/interface/compiled/search/index.txt +2 -2
  33. khoj/interface/compiled/settings/index.html +1 -1
  34. khoj/interface/compiled/settings/index.txt +2 -2
  35. khoj/interface/compiled/share/chat/index.html +1 -1
  36. khoj/interface/compiled/share/chat/index.txt +2 -2
  37. khoj/processor/conversation/anthropic/anthropic_chat.py +14 -10
  38. khoj/processor/conversation/anthropic/utils.py +13 -2
  39. khoj/processor/conversation/google/gemini_chat.py +15 -11
  40. khoj/processor/conversation/offline/chat_model.py +10 -9
  41. khoj/processor/conversation/openai/gpt.py +11 -8
  42. khoj/processor/conversation/prompts.py +131 -22
  43. khoj/processor/conversation/utils.py +132 -6
  44. khoj/processor/tools/online_search.py +5 -3
  45. khoj/processor/tools/run_code.py +144 -0
  46. khoj/routers/api.py +6 -6
  47. khoj/routers/api_chat.py +156 -88
  48. khoj/routers/helpers.py +91 -47
  49. khoj/routers/research.py +321 -0
  50. khoj/search_filter/date_filter.py +1 -3
  51. khoj/search_filter/file_filter.py +1 -2
  52. khoj/search_type/text_search.py +3 -3
  53. khoj/utils/helpers.py +15 -2
  54. khoj/utils/yaml.py +4 -0
  55. {khoj-1.27.2.dev29.dist-info → khoj-1.27.2.dev130.dist-info}/METADATA +1 -1
  56. {khoj-1.27.2.dev29.dist-info → khoj-1.27.2.dev130.dist-info}/RECORD +63 -60
  57. khoj/interface/compiled/_next/static/chunks/1603-5138bb7c8035d9a6.js +0 -1
  58. khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +0 -1
  59. khoj/interface/compiled/_next/static/chunks/3423-0b533af8bf6ac218.js +0 -1
  60. khoj/interface/compiled/_next/static/chunks/9479-ff7d8c4dae2014d1.js +0 -1
  61. khoj/interface/compiled/_next/static/chunks/app/chat/page-97f5b61aaf46d364.js +0 -1
  62. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-d82403db2866bad8.js +0 -1
  63. khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +0 -1
  64. khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +0 -25
  65. khoj/interface/compiled/_next/static/css/ddcc0cf73e062476.css +0 -1
  66. /khoj/interface/compiled/_next/static/{atzIseFarmC7TIwq2BgHC → N19uqHAJYqRAVxvuVwHfE}/_buildManifest.js +0 -0
  67. /khoj/interface/compiled/_next/static/{atzIseFarmC7TIwq2BgHC → N19uqHAJYqRAVxvuVwHfE}/_ssgManifest.js +0 -0
  68. /khoj/interface/compiled/_next/static/chunks/{1970-60c96aed937a4928.js → 1970-444843bea1d17d61.js} +0 -0
  69. /khoj/interface/compiled/_next/static/chunks/{9417-2ca87207387fc790.js → 9417-19cfd1a9cb758e71.js} +0 -0
  70. {khoj-1.27.2.dev29.dist-info → khoj-1.27.2.dev130.dist-info}/WHEEL +0 -0
  71. {khoj-1.27.2.dev29.dist-info → khoj-1.27.2.dev130.dist-info}/entry_points.txt +0 -0
  72. {khoj-1.27.2.dev29.dist-info → khoj-1.27.2.dev130.dist-info}/licenses/LICENSE +0 -0
khoj/routers/helpers.py CHANGED
@@ -43,6 +43,7 @@ from khoj.database.adapters import (
43
43
  AutomationAdapters,
44
44
  ConversationAdapters,
45
45
  EntryAdapters,
46
+ FileObjectAdapters,
46
47
  ais_user_subscribed,
47
48
  create_khoj_token,
48
49
  get_khoj_tokens,
@@ -87,9 +88,11 @@ from khoj.processor.conversation.offline.chat_model import (
87
88
  )
88
89
  from khoj.processor.conversation.openai.gpt import converse, send_message_to_model
89
90
  from khoj.processor.conversation.utils import (
91
+ ChatEvent,
90
92
  ThreadedGenerator,
93
+ clean_json,
94
+ construct_chat_history,
91
95
  generate_chatml_messages_with_context,
92
- remove_json_codeblock,
93
96
  save_to_conversation_log,
94
97
  )
95
98
  from khoj.processor.speech.text_to_speech import is_eleven_labs_enabled
@@ -137,7 +140,7 @@ def validate_conversation_config(user: KhojUser):
137
140
  async def is_ready_to_chat(user: KhojUser):
138
141
  user_conversation_config = await ConversationAdapters.aget_user_conversation_config(user)
139
142
  if user_conversation_config == None:
140
- user_conversation_config = await ConversationAdapters.aget_default_conversation_config()
143
+ user_conversation_config = await ConversationAdapters.aget_default_conversation_config(user)
141
144
 
142
145
  if user_conversation_config and user_conversation_config.model_type == ChatModelOptions.ModelType.OFFLINE:
143
146
  chat_model = user_conversation_config.chat_model
@@ -210,21 +213,6 @@ def get_next_url(request: Request) -> str:
210
213
  return urljoin(str(request.base_url).rstrip("/"), next_path)
211
214
 
212
215
 
213
- def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="AI") -> str:
214
- chat_history = ""
215
- for chat in conversation_history.get("chat", [])[-n:]:
216
- if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder", "summarize"]:
217
- chat_history += f"User: {chat['intent']['query']}\n"
218
- chat_history += f"{agent_name}: {chat['message']}\n"
219
- elif chat["by"] == "khoj" and ("text-to-image" in chat["intent"].get("type")):
220
- chat_history += f"User: {chat['intent']['query']}\n"
221
- chat_history += f"{agent_name}: [generated image redacted for space]\n"
222
- elif chat["by"] == "khoj" and ("excalidraw" in chat["intent"].get("type")):
223
- chat_history += f"User: {chat['intent']['query']}\n"
224
- chat_history += f"{agent_name}: {chat['intent']['inferred-queries'][0]}\n"
225
- return chat_history
226
-
227
-
228
216
  def get_conversation_command(query: str, any_references: bool = False) -> ConversationCommand:
229
217
  if query.startswith("/notes"):
230
218
  return ConversationCommand.Notes
@@ -244,6 +232,10 @@ def get_conversation_command(query: str, any_references: bool = False) -> Conver
244
232
  return ConversationCommand.Summarize
245
233
  elif query.startswith("/diagram"):
246
234
  return ConversationCommand.Diagram
235
+ elif query.startswith("/code"):
236
+ return ConversationCommand.Code
237
+ elif query.startswith("/research"):
238
+ return ConversationCommand.Research
247
239
  # If no relevant notes found for the given query
248
240
  elif not any_references:
249
241
  return ConversationCommand.General
@@ -342,8 +334,7 @@ async def aget_relevant_information_sources(
342
334
  )
343
335
 
344
336
  try:
345
- response = response.strip()
346
- response = remove_json_codeblock(response)
337
+ response = clean_json(response)
347
338
  response = json.loads(response)
348
339
  response = [q.strip() for q in response["source"] if q.strip()]
349
340
  if not isinstance(response, list) or not response or len(response) == 0:
@@ -421,8 +412,7 @@ async def aget_relevant_output_modes(
421
412
  )
422
413
 
423
414
  try:
424
- response = response.strip()
425
- response = remove_json_codeblock(response)
415
+ response = clean_json(response)
426
416
  response = json.loads(response)
427
417
 
428
418
  if is_none_or_empty(response):
@@ -483,7 +473,7 @@ async def infer_webpage_urls(
483
473
 
484
474
  # Validate that the response is a non-empty, JSON-serializable list of URLs
485
475
  try:
486
- response = response.strip()
476
+ response = clean_json(response)
487
477
  urls = json.loads(response)
488
478
  valid_unique_urls = {str(url).strip() for url in urls["links"] if is_valid_url(url)}
489
479
  if is_none_or_empty(valid_unique_urls):
@@ -534,8 +524,7 @@ async def generate_online_subqueries(
534
524
 
535
525
  # Validate that the response is a non-empty, JSON-serializable list
536
526
  try:
537
- response = response.strip()
538
- response = remove_json_codeblock(response)
527
+ response = clean_json(response)
539
528
  response = json.loads(response)
540
529
  response = [q.strip() for q in response["queries"] if q.strip()]
541
530
  if not isinstance(response, list) or not response or len(response) == 0:
@@ -644,6 +633,53 @@ async def extract_relevant_summary(
644
633
  return response.strip()
645
634
 
646
635
 
636
+ async def generate_summary_from_files(
637
+ q: str,
638
+ user: KhojUser,
639
+ file_filters: List[str],
640
+ meta_log: dict,
641
+ query_images: List[str] = None,
642
+ agent: Agent = None,
643
+ send_status_func: Optional[Callable] = None,
644
+ tracer: dict = {},
645
+ ):
646
+ try:
647
+ file_object = None
648
+ if await EntryAdapters.aagent_has_entries(agent):
649
+ file_names = await EntryAdapters.aget_agent_entry_filepaths(agent)
650
+ if len(file_names) > 0:
651
+ file_object = await FileObjectAdapters.async_get_file_objects_by_name(None, file_names.pop(), agent)
652
+
653
+ if len(file_filters) > 0:
654
+ file_object = await FileObjectAdapters.async_get_file_objects_by_name(user, file_filters[0])
655
+
656
+ if len(file_object) == 0:
657
+ response_log = "Sorry, I couldn't find the full text of this file."
658
+ yield response_log
659
+ return
660
+ contextual_data = " ".join([file.raw_text for file in file_object])
661
+ if not q:
662
+ q = "Create a general summary of the file"
663
+ async for result in send_status_func(f"**Constructing Summary Using:** {file_object[0].file_name}"):
664
+ yield {ChatEvent.STATUS: result}
665
+
666
+ response = await extract_relevant_summary(
667
+ q,
668
+ contextual_data,
669
+ conversation_history=meta_log,
670
+ query_images=query_images,
671
+ user=user,
672
+ agent=agent,
673
+ tracer=tracer,
674
+ )
675
+
676
+ yield str(response)
677
+ except Exception as e:
678
+ response_log = "Error summarizing file. Please try again, or contact support."
679
+ logger.error(f"Error summarizing file for {user.email}: {e}", exc_info=True)
680
+ yield result
681
+
682
+
647
683
  async def generate_excalidraw_diagram(
648
684
  q: str,
649
685
  conversation_history: Dict[str, Any],
@@ -759,10 +795,9 @@ async def generate_excalidraw_diagram_from_description(
759
795
 
760
796
  with timer("Chat actor: Generate excalidraw diagram", logger):
761
797
  raw_response = await send_message_to_model_wrapper(
762
- message=excalidraw_diagram_generation, user=user, tracer=tracer
798
+ query=excalidraw_diagram_generation, user=user, tracer=tracer
763
799
  )
764
- raw_response = raw_response.strip()
765
- raw_response = remove_json_codeblock(raw_response)
800
+ raw_response = clean_json(raw_response)
766
801
  response: Dict[str, str] = json.loads(raw_response)
767
802
  if not response or not isinstance(response, List) or not isinstance(response[0], Dict):
768
803
  # TODO Some additional validation here that it's a valid Excalidraw diagram
@@ -839,11 +874,12 @@ async def generate_better_image_prompt(
839
874
 
840
875
 
841
876
  async def send_message_to_model_wrapper(
842
- message: str,
877
+ query: str,
843
878
  system_message: str = "",
844
879
  response_type: str = "text",
845
880
  user: KhojUser = None,
846
881
  query_images: List[str] = None,
882
+ context: str = "",
847
883
  tracer: dict = {},
848
884
  ):
849
885
  conversation_config: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config(user)
@@ -874,7 +910,8 @@ async def send_message_to_model_wrapper(
874
910
 
875
911
  loaded_model = state.offline_chat_processor_config.loaded_model
876
912
  truncated_messages = generate_chatml_messages_with_context(
877
- user_message=message,
913
+ user_message=query,
914
+ context_message=context,
878
915
  system_message=system_message,
879
916
  model_name=chat_model,
880
917
  loaded_model=loaded_model,
@@ -899,7 +936,8 @@ async def send_message_to_model_wrapper(
899
936
  api_key = openai_chat_config.api_key
900
937
  api_base_url = openai_chat_config.api_base_url
901
938
  truncated_messages = generate_chatml_messages_with_context(
902
- user_message=message,
939
+ user_message=query,
940
+ context_message=context,
903
941
  system_message=system_message,
904
942
  model_name=chat_model,
905
943
  max_prompt_size=max_tokens,
@@ -920,7 +958,8 @@ async def send_message_to_model_wrapper(
920
958
  elif model_type == ChatModelOptions.ModelType.ANTHROPIC:
921
959
  api_key = conversation_config.openai_config.api_key
922
960
  truncated_messages = generate_chatml_messages_with_context(
923
- user_message=message,
961
+ user_message=query,
962
+ context_message=context,
924
963
  system_message=system_message,
925
964
  model_name=chat_model,
926
965
  max_prompt_size=max_tokens,
@@ -934,12 +973,14 @@ async def send_message_to_model_wrapper(
934
973
  messages=truncated_messages,
935
974
  api_key=api_key,
936
975
  model=chat_model,
976
+ response_type=response_type,
937
977
  tracer=tracer,
938
978
  )
939
979
  elif model_type == ChatModelOptions.ModelType.GOOGLE:
940
980
  api_key = conversation_config.openai_config.api_key
941
981
  truncated_messages = generate_chatml_messages_with_context(
942
- user_message=message,
982
+ user_message=query,
983
+ context_message=context,
943
984
  system_message=system_message,
944
985
  model_name=chat_model,
945
986
  max_prompt_size=max_tokens,
@@ -1033,6 +1074,7 @@ def send_message_to_model_wrapper_sync(
1033
1074
  messages=truncated_messages,
1034
1075
  api_key=api_key,
1035
1076
  model=chat_model,
1077
+ response_type=response_type,
1036
1078
  tracer=tracer,
1037
1079
  )
1038
1080
 
@@ -1064,6 +1106,7 @@ def generate_chat_response(
1064
1106
  conversation: Conversation,
1065
1107
  compiled_references: List[Dict] = [],
1066
1108
  online_results: Dict[str, Dict] = {},
1109
+ code_results: Dict[str, Dict] = {},
1067
1110
  inferred_queries: List[str] = [],
1068
1111
  conversation_commands: List[ConversationCommand] = [ConversationCommand.Default],
1069
1112
  user: KhojUser = None,
@@ -1071,8 +1114,10 @@ def generate_chat_response(
1071
1114
  conversation_id: str = None,
1072
1115
  location_data: LocationData = None,
1073
1116
  user_name: Optional[str] = None,
1117
+ meta_research: str = "",
1074
1118
  query_images: Optional[List[str]] = None,
1075
1119
  tracer: dict = {},
1120
+ train_of_thought: List[Any] = [],
1076
1121
  ) -> Tuple[Union[ThreadedGenerator, Iterator[str]], Dict[str, str]]:
1077
1122
  # Initialize Variables
1078
1123
  chat_response = None
@@ -1080,6 +1125,9 @@ def generate_chat_response(
1080
1125
 
1081
1126
  metadata = {}
1082
1127
  agent = AgentAdapters.get_conversation_agent_by_id(conversation.agent.id) if conversation.agent else None
1128
+ query_to_run = q
1129
+ if meta_research:
1130
+ query_to_run = f"AI Research: {meta_research} {q}"
1083
1131
  try:
1084
1132
  partial_completion = partial(
1085
1133
  save_to_conversation_log,
@@ -1088,11 +1136,13 @@ def generate_chat_response(
1088
1136
  meta_log=meta_log,
1089
1137
  compiled_references=compiled_references,
1090
1138
  online_results=online_results,
1139
+ code_results=code_results,
1091
1140
  inferred_queries=inferred_queries,
1092
1141
  client_application=client_application,
1093
1142
  conversation_id=conversation_id,
1094
1143
  query_images=query_images,
1095
1144
  tracer=tracer,
1145
+ train_of_thought=train_of_thought,
1096
1146
  )
1097
1147
 
1098
1148
  conversation_config = ConversationAdapters.get_valid_conversation_config(user, conversation)
@@ -1106,9 +1156,9 @@ def generate_chat_response(
1106
1156
  if conversation_config.model_type == "offline":
1107
1157
  loaded_model = state.offline_chat_processor_config.loaded_model
1108
1158
  chat_response = converse_offline(
1159
+ user_query=query_to_run,
1109
1160
  references=compiled_references,
1110
1161
  online_results=online_results,
1111
- user_query=q,
1112
1162
  loaded_model=loaded_model,
1113
1163
  conversation_log=meta_log,
1114
1164
  completion_func=partial_completion,
@@ -1128,9 +1178,10 @@ def generate_chat_response(
1128
1178
  chat_model = conversation_config.chat_model
1129
1179
  chat_response = converse(
1130
1180
  compiled_references,
1131
- q,
1181
+ query_to_run,
1132
1182
  query_images=query_images,
1133
1183
  online_results=online_results,
1184
+ code_results=code_results,
1134
1185
  conversation_log=meta_log,
1135
1186
  model=chat_model,
1136
1187
  api_key=api_key,
@@ -1150,9 +1201,10 @@ def generate_chat_response(
1150
1201
  api_key = conversation_config.openai_config.api_key
1151
1202
  chat_response = converse_anthropic(
1152
1203
  compiled_references,
1153
- q,
1204
+ query_to_run,
1154
1205
  query_images=query_images,
1155
1206
  online_results=online_results,
1207
+ code_results=code_results,
1156
1208
  conversation_log=meta_log,
1157
1209
  model=conversation_config.chat_model,
1158
1210
  api_key=api_key,
@@ -1170,10 +1222,10 @@ def generate_chat_response(
1170
1222
  api_key = conversation_config.openai_config.api_key
1171
1223
  chat_response = converse_gemini(
1172
1224
  compiled_references,
1173
- q,
1174
- query_images=query_images,
1175
- online_results=online_results,
1176
- conversation_log=meta_log,
1225
+ query_to_run,
1226
+ online_results,
1227
+ code_results,
1228
+ meta_log,
1177
1229
  model=conversation_config.chat_model,
1178
1230
  api_key=api_key,
1179
1231
  completion_func=partial_completion,
@@ -1627,14 +1679,6 @@ Manage your automations [here](/automations).
1627
1679
  """.strip()
1628
1680
 
1629
1681
 
1630
- class ChatEvent(Enum):
1631
- START_LLM_RESPONSE = "start_llm_response"
1632
- END_LLM_RESPONSE = "end_llm_response"
1633
- MESSAGE = "message"
1634
- REFERENCES = "references"
1635
- STATUS = "status"
1636
-
1637
-
1638
1682
  class MessageProcessor:
1639
1683
  def __init__(self):
1640
1684
  self.references = {}
@@ -0,0 +1,321 @@
1
+ import json
2
+ import logging
3
+ from datetime import datetime
4
+ from typing import Any, Callable, Dict, List, Optional
5
+
6
+ import yaml
7
+ from fastapi import Request
8
+
9
+ from khoj.database.adapters import ConversationAdapters, EntryAdapters
10
+ from khoj.database.models import Agent, KhojUser
11
+ from khoj.processor.conversation import prompts
12
+ from khoj.processor.conversation.utils import (
13
+ InformationCollectionIteration,
14
+ clean_json,
15
+ construct_iteration_history,
16
+ construct_tool_chat_history,
17
+ )
18
+ from khoj.processor.tools.online_search import read_webpages, search_online
19
+ from khoj.processor.tools.run_code import run_code
20
+ from khoj.routers.api import extract_references_and_questions
21
+ from khoj.routers.helpers import (
22
+ ChatEvent,
23
+ construct_chat_history,
24
+ extract_relevant_info,
25
+ generate_summary_from_files,
26
+ send_message_to_model_wrapper,
27
+ )
28
+ from khoj.utils.helpers import (
29
+ ConversationCommand,
30
+ function_calling_description_for_llm,
31
+ is_none_or_empty,
32
+ timer,
33
+ )
34
+ from khoj.utils.rawconfig import LocationData
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+
39
+ async def apick_next_tool(
40
+ query: str,
41
+ conversation_history: dict,
42
+ user: KhojUser = None,
43
+ query_images: List[str] = [],
44
+ location: LocationData = None,
45
+ user_name: str = None,
46
+ agent: Agent = None,
47
+ previous_iterations_history: str = None,
48
+ max_iterations: int = 5,
49
+ send_status_func: Optional[Callable] = None,
50
+ tracer: dict = {},
51
+ ):
52
+ """
53
+ Given a query, determine which of the available tools the agent should use in order to answer appropriately. One at a time, and it's able to use subsequent iterations to refine the answer.
54
+ """
55
+
56
+ tool_options = dict()
57
+ tool_options_str = ""
58
+
59
+ agent_tools = agent.input_tools if agent else []
60
+
61
+ for tool, description in function_calling_description_for_llm.items():
62
+ tool_options[tool.value] = description
63
+ if len(agent_tools) == 0 or tool.value in agent_tools:
64
+ tool_options_str += f'- "{tool.value}": "{description}"\n'
65
+
66
+ chat_history = construct_chat_history(conversation_history, agent_name=agent.name if agent else "Khoj")
67
+
68
+ if query_images:
69
+ query = f"[placeholder for user attached images]\n{query}"
70
+
71
+ personality_context = (
72
+ prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
73
+ )
74
+
75
+ # Extract Past User Message and Inferred Questions from Conversation Log
76
+ today = datetime.today()
77
+ location_data = f"{location}" if location else "Unknown"
78
+
79
+ function_planning_prompt = prompts.plan_function_execution.format(
80
+ tools=tool_options_str,
81
+ chat_history=chat_history,
82
+ personality_context=personality_context,
83
+ current_date=today.strftime("%Y-%m-%d"),
84
+ day_of_week=today.strftime("%A"),
85
+ username=user_name or "Unknown",
86
+ location=location_data,
87
+ previous_iterations=previous_iterations_history,
88
+ max_iterations=max_iterations,
89
+ )
90
+
91
+ with timer("Chat actor: Infer information sources to refer", logger):
92
+ response = await send_message_to_model_wrapper(
93
+ query=query,
94
+ context=function_planning_prompt,
95
+ response_type="json_object",
96
+ user=user,
97
+ query_images=query_images,
98
+ tracer=tracer,
99
+ )
100
+
101
+ try:
102
+ response = clean_json(response)
103
+ response = json.loads(response)
104
+ selected_tool = response.get("tool", None)
105
+ generated_query = response.get("query", None)
106
+ scratchpad = response.get("scratchpad", None)
107
+ logger.info(f"Response for determining relevant tools: {response}")
108
+ if send_status_func:
109
+ determined_tool_message = "**Determined Tool**: "
110
+ determined_tool_message += f"{selected_tool}({generated_query})." if selected_tool else "respond."
111
+ determined_tool_message += f"\nReason: {scratchpad}" if scratchpad else ""
112
+ async for event in send_status_func(f"{scratchpad}"):
113
+ yield {ChatEvent.STATUS: event}
114
+
115
+ yield InformationCollectionIteration(
116
+ tool=selected_tool,
117
+ query=generated_query,
118
+ )
119
+
120
+ except Exception as e:
121
+ logger.error(f"Invalid response for determining relevant tools: {response}. {e}", exc_info=True)
122
+ yield InformationCollectionIteration(
123
+ tool=None,
124
+ query=None,
125
+ )
126
+
127
+
128
+ async def execute_information_collection(
129
+ request: Request,
130
+ user: KhojUser,
131
+ query: str,
132
+ conversation_id: str,
133
+ conversation_history: dict,
134
+ query_images: List[str],
135
+ agent: Agent = None,
136
+ send_status_func: Optional[Callable] = None,
137
+ user_name: str = None,
138
+ location: LocationData = None,
139
+ file_filters: List[str] = [],
140
+ tracer: dict = {},
141
+ ):
142
+ current_iteration = 0
143
+ MAX_ITERATIONS = 5
144
+ previous_iterations: List[InformationCollectionIteration] = []
145
+ while current_iteration < MAX_ITERATIONS:
146
+ online_results: Dict = dict()
147
+ code_results: Dict = dict()
148
+ document_results: List[Dict[str, str]] = []
149
+ summarize_files: str = ""
150
+ this_iteration = InformationCollectionIteration(tool=None, query=query)
151
+ previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
152
+
153
+ async for result in apick_next_tool(
154
+ query,
155
+ conversation_history,
156
+ user,
157
+ query_images,
158
+ location,
159
+ user_name,
160
+ agent,
161
+ previous_iterations_history,
162
+ MAX_ITERATIONS,
163
+ send_status_func,
164
+ tracer=tracer,
165
+ ):
166
+ if isinstance(result, dict) and ChatEvent.STATUS in result:
167
+ yield result[ChatEvent.STATUS]
168
+ elif isinstance(result, InformationCollectionIteration):
169
+ this_iteration = result
170
+
171
+ if this_iteration.tool == ConversationCommand.Notes:
172
+ this_iteration.context = []
173
+ document_results = []
174
+ async for result in extract_references_and_questions(
175
+ request,
176
+ construct_tool_chat_history(previous_iterations, ConversationCommand.Notes),
177
+ this_iteration.query,
178
+ 7,
179
+ None,
180
+ conversation_id,
181
+ [ConversationCommand.Default],
182
+ location,
183
+ send_status_func,
184
+ query_images,
185
+ agent=agent,
186
+ tracer=tracer,
187
+ ):
188
+ if isinstance(result, dict) and ChatEvent.STATUS in result:
189
+ yield result[ChatEvent.STATUS]
190
+ elif isinstance(result, tuple):
191
+ document_results = result[0]
192
+ this_iteration.context += document_results
193
+
194
+ if not is_none_or_empty(document_results):
195
+ try:
196
+ distinct_files = {d["file"] for d in document_results}
197
+ distinct_headings = set([d["compiled"].split("\n")[0] for d in document_results if "compiled" in d])
198
+ # Strip only leading # from headings
199
+ headings_str = "\n- " + "\n- ".join(distinct_headings).replace("#", "")
200
+ async for result in send_status_func(
201
+ f"**Found {len(distinct_headings)} Notes Across {len(distinct_files)} Files**: {headings_str}"
202
+ ):
203
+ yield result
204
+ except Exception as e:
205
+ logger.error(f"Error extracting document references: {e}", exc_info=True)
206
+
207
+ elif this_iteration.tool == ConversationCommand.Online:
208
+ async for result in search_online(
209
+ this_iteration.query,
210
+ construct_tool_chat_history(previous_iterations, ConversationCommand.Online),
211
+ location,
212
+ user,
213
+ send_status_func,
214
+ [],
215
+ max_webpages_to_read=0,
216
+ query_images=query_images,
217
+ agent=agent,
218
+ tracer=tracer,
219
+ ):
220
+ if isinstance(result, dict) and ChatEvent.STATUS in result:
221
+ yield result[ChatEvent.STATUS]
222
+ else:
223
+ online_results: Dict[str, Dict] = result # type: ignore
224
+ this_iteration.onlineContext = online_results
225
+
226
+ elif this_iteration.tool == ConversationCommand.Webpage:
227
+ try:
228
+ async for result in read_webpages(
229
+ this_iteration.query,
230
+ construct_tool_chat_history(previous_iterations, ConversationCommand.Webpage),
231
+ location,
232
+ user,
233
+ send_status_func,
234
+ query_images=query_images,
235
+ agent=agent,
236
+ tracer=tracer,
237
+ ):
238
+ if isinstance(result, dict) and ChatEvent.STATUS in result:
239
+ yield result[ChatEvent.STATUS]
240
+ else:
241
+ direct_web_pages: Dict[str, Dict] = result # type: ignore
242
+
243
+ webpages = []
244
+ for web_query in direct_web_pages:
245
+ if online_results.get(web_query):
246
+ online_results[web_query]["webpages"] = direct_web_pages[web_query]["webpages"]
247
+ else:
248
+ online_results[web_query] = {"webpages": direct_web_pages[web_query]["webpages"]}
249
+
250
+ for webpage in direct_web_pages[web_query]["webpages"]:
251
+ webpages.append(webpage["link"])
252
+ this_iteration.onlineContext = online_results
253
+ except Exception as e:
254
+ logger.error(f"Error reading webpages: {e}", exc_info=True)
255
+
256
+ elif this_iteration.tool == ConversationCommand.Code:
257
+ try:
258
+ async for result in run_code(
259
+ this_iteration.query,
260
+ construct_tool_chat_history(previous_iterations, ConversationCommand.Webpage),
261
+ "",
262
+ location,
263
+ user,
264
+ send_status_func,
265
+ query_images=query_images,
266
+ agent=agent,
267
+ tracer=tracer,
268
+ ):
269
+ if isinstance(result, dict) and ChatEvent.STATUS in result:
270
+ yield result[ChatEvent.STATUS]
271
+ else:
272
+ code_results: Dict[str, Dict] = result # type: ignore
273
+ this_iteration.codeContext = code_results
274
+ async for result in send_status_func(f"**Ran code snippets**: {len(this_iteration.codeContext)}"):
275
+ yield result
276
+ except ValueError as e:
277
+ logger.warning(
278
+ f"Failed to use code tool: {e}. Attempting to respond without code results",
279
+ exc_info=True,
280
+ )
281
+
282
+ elif this_iteration.tool == ConversationCommand.Summarize:
283
+ try:
284
+ async for result in generate_summary_from_files(
285
+ this_iteration.query,
286
+ user,
287
+ file_filters,
288
+ construct_tool_chat_history(previous_iterations),
289
+ query_images=query_images,
290
+ agent=agent,
291
+ send_status_func=send_status_func,
292
+ ):
293
+ if isinstance(result, dict) and ChatEvent.STATUS in result:
294
+ yield result[ChatEvent.STATUS]
295
+ else:
296
+ summarize_files = result # type: ignore
297
+ except Exception as e:
298
+ logger.error(f"Error generating summary: {e}", exc_info=True)
299
+
300
+ else:
301
+ # No valid tools. This is our exit condition.
302
+ current_iteration = MAX_ITERATIONS
303
+
304
+ current_iteration += 1
305
+
306
+ if document_results or online_results or code_results or summarize_files:
307
+ results_data = f"**Results**:\n"
308
+ if document_results:
309
+ results_data += f"**Document References**: {yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
310
+ if online_results:
311
+ results_data += f"**Online Results**: {yaml.dump(online_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
312
+ if code_results:
313
+ results_data += f"**Code Results**: {yaml.dump(code_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
314
+ if summarize_files:
315
+ results_data += f"**Summarized Files**: {yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
316
+
317
+ # intermediate_result = await extract_relevant_info(this_iteration.query, results_data, agent)
318
+ this_iteration.summarizedResult = results_data
319
+
320
+ previous_iterations.append(this_iteration)
321
+ yield this_iteration
@@ -7,8 +7,6 @@ from math import inf
7
7
  from typing import List, Tuple
8
8
 
9
9
  import dateparser as dtparse
10
- from dateparser.search import search_dates
11
- from dateparser_data.settings import default_parsers
12
10
  from dateutil.relativedelta import relativedelta
13
11
 
14
12
  from khoj.search_filter.base_filter import BaseFilter
@@ -23,7 +21,7 @@ class DateFilter(BaseFilter):
23
21
  # - dt>="yesterday" dt<"tomorrow"
24
22
  # - dt>="last week"
25
23
  # - dt:"2 years ago"
26
- date_regex = r"dt([:><=]{1,2})[\"'](.*?)[\"']"
24
+ date_regex = r"dt([:><=]{1,2})[\"'‘’](.*?)[\"'‘’]"
27
25
 
28
26
  def __init__(self, entry_key="compiled"):
29
27
  self.entry_key = entry_key
@@ -1,11 +1,10 @@
1
- import fnmatch
2
1
  import logging
3
2
  import re
4
3
  from collections import defaultdict
5
4
  from typing import List
6
5
 
7
6
  from khoj.search_filter.base_filter import BaseFilter
8
- from khoj.utils.helpers import LRU, timer
7
+ from khoj.utils.helpers import LRU
9
8
 
10
9
  logger = logging.getLogger(__name__)
11
10