khoj 1.27.2.dev29__py3-none-any.whl → 1.28.1.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. khoj/configure.py +1 -1
  2. khoj/database/adapters/__init__.py +50 -12
  3. khoj/interface/compiled/404/index.html +1 -1
  4. khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +1 -0
  5. khoj/interface/compiled/_next/static/chunks/1467-b331e469fe411347.js +1 -0
  6. khoj/interface/compiled/_next/static/chunks/1603-c1568f45947e9f2c.js +1 -0
  7. khoj/interface/compiled/_next/static/chunks/3423-ff7402ae1dd66592.js +1 -0
  8. khoj/interface/compiled/_next/static/chunks/8423-e80647edf6c92c27.js +1 -0
  9. khoj/interface/compiled/_next/static/chunks/app/agents/{page-5ae1e540bb5be8a9.js → page-2beaba7c9bb750bd.js} +1 -1
  10. khoj/interface/compiled/_next/static/chunks/app/automations/{page-774ae3e033f938cd.js → page-9b5c77e0b0dd772c.js} +1 -1
  11. khoj/interface/compiled/_next/static/chunks/app/chat/page-bfc70b16ba5e51b4.js +1 -0
  12. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-340bcf53abf6a2cc.js +1 -0
  13. khoj/interface/compiled/_next/static/chunks/app/{page-4dc472cf6d674004.js → page-f249666a0cbdaa0d.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/app/search/{page-9b64f61caa5bd7f9.js → page-ab2995529ece3140.js} +1 -1
  15. khoj/interface/compiled/_next/static/chunks/app/settings/{page-7a8c382af2a7e870.js → page-89e6737b2cc9fb3a.js} +1 -1
  16. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-eb9e282691858f2e.js → page-505b07bce608b34e.js} +1 -1
  17. khoj/interface/compiled/_next/static/chunks/{webpack-2b720658ccc746f2.js → webpack-878569182b3af4c6.js} +1 -1
  18. khoj/interface/compiled/_next/static/css/{2272c73fc7a3b571.css → 26c1c33d0423a7d8.css} +1 -1
  19. khoj/interface/compiled/_next/static/css/592ca99f5122e75a.css +1 -0
  20. khoj/interface/compiled/_next/static/css/a795ee88875f4853.css +25 -0
  21. khoj/interface/compiled/_next/static/css/d738728883c68af8.css +1 -0
  22. khoj/interface/compiled/agents/index.html +1 -1
  23. khoj/interface/compiled/agents/index.txt +2 -2
  24. khoj/interface/compiled/automations/index.html +1 -1
  25. khoj/interface/compiled/automations/index.txt +2 -2
  26. khoj/interface/compiled/chat/index.html +1 -1
  27. khoj/interface/compiled/chat/index.txt +2 -2
  28. khoj/interface/compiled/factchecker/index.html +1 -1
  29. khoj/interface/compiled/factchecker/index.txt +2 -2
  30. khoj/interface/compiled/index.html +1 -1
  31. khoj/interface/compiled/index.txt +2 -2
  32. khoj/interface/compiled/search/index.html +1 -1
  33. khoj/interface/compiled/search/index.txt +2 -2
  34. khoj/interface/compiled/settings/index.html +1 -1
  35. khoj/interface/compiled/settings/index.txt +2 -2
  36. khoj/interface/compiled/share/chat/index.html +1 -1
  37. khoj/interface/compiled/share/chat/index.txt +2 -2
  38. khoj/processor/conversation/anthropic/anthropic_chat.py +14 -10
  39. khoj/processor/conversation/anthropic/utils.py +13 -2
  40. khoj/processor/conversation/google/gemini_chat.py +15 -11
  41. khoj/processor/conversation/offline/chat_model.py +18 -10
  42. khoj/processor/conversation/openai/gpt.py +11 -8
  43. khoj/processor/conversation/openai/utils.py +7 -0
  44. khoj/processor/conversation/prompts.py +156 -49
  45. khoj/processor/conversation/utils.py +146 -13
  46. khoj/processor/embeddings.py +4 -4
  47. khoj/processor/tools/online_search.py +13 -7
  48. khoj/processor/tools/run_code.py +144 -0
  49. khoj/routers/api.py +6 -6
  50. khoj/routers/api_chat.py +193 -112
  51. khoj/routers/helpers.py +107 -48
  52. khoj/routers/research.py +320 -0
  53. khoj/search_filter/date_filter.py +1 -3
  54. khoj/search_filter/file_filter.py +1 -2
  55. khoj/search_type/text_search.py +3 -3
  56. khoj/utils/helpers.py +24 -2
  57. khoj/utils/yaml.py +4 -0
  58. {khoj-1.27.2.dev29.dist-info → khoj-1.28.1.dev1.dist-info}/METADATA +3 -3
  59. {khoj-1.27.2.dev29.dist-info → khoj-1.28.1.dev1.dist-info}/RECORD +66 -63
  60. khoj/interface/compiled/_next/static/chunks/1603-5138bb7c8035d9a6.js +0 -1
  61. khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +0 -1
  62. khoj/interface/compiled/_next/static/chunks/3423-0b533af8bf6ac218.js +0 -1
  63. khoj/interface/compiled/_next/static/chunks/9479-ff7d8c4dae2014d1.js +0 -1
  64. khoj/interface/compiled/_next/static/chunks/app/chat/page-97f5b61aaf46d364.js +0 -1
  65. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-d82403db2866bad8.js +0 -1
  66. khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +0 -1
  67. khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +0 -25
  68. khoj/interface/compiled/_next/static/css/ddcc0cf73e062476.css +0 -1
  69. /khoj/interface/compiled/_next/static/{atzIseFarmC7TIwq2BgHC → K7ZigmRDrBfpIN7jxKQsA}/_buildManifest.js +0 -0
  70. /khoj/interface/compiled/_next/static/{atzIseFarmC7TIwq2BgHC → K7ZigmRDrBfpIN7jxKQsA}/_ssgManifest.js +0 -0
  71. /khoj/interface/compiled/_next/static/chunks/{1970-60c96aed937a4928.js → 1970-90dd510762d820ba.js} +0 -0
  72. /khoj/interface/compiled/_next/static/chunks/{9417-2ca87207387fc790.js → 9417-951f46451a8dd6d7.js} +0 -0
  73. {khoj-1.27.2.dev29.dist-info → khoj-1.28.1.dev1.dist-info}/WHEEL +0 -0
  74. {khoj-1.27.2.dev29.dist-info → khoj-1.28.1.dev1.dist-info}/entry_points.txt +0 -0
  75. {khoj-1.27.2.dev29.dist-info → khoj-1.28.1.dev1.dist-info}/licenses/LICENSE +0 -0
khoj/routers/helpers.py CHANGED
@@ -43,6 +43,7 @@ from khoj.database.adapters import (
43
43
  AutomationAdapters,
44
44
  ConversationAdapters,
45
45
  EntryAdapters,
46
+ FileObjectAdapters,
46
47
  ais_user_subscribed,
47
48
  create_khoj_token,
48
49
  get_khoj_tokens,
@@ -87,9 +88,11 @@ from khoj.processor.conversation.offline.chat_model import (
87
88
  )
88
89
  from khoj.processor.conversation.openai.gpt import converse, send_message_to_model
89
90
  from khoj.processor.conversation.utils import (
91
+ ChatEvent,
90
92
  ThreadedGenerator,
93
+ clean_json,
94
+ construct_chat_history,
91
95
  generate_chatml_messages_with_context,
92
- remove_json_codeblock,
93
96
  save_to_conversation_log,
94
97
  )
95
98
  from khoj.processor.speech.text_to_speech import is_eleven_labs_enabled
@@ -137,7 +140,7 @@ def validate_conversation_config(user: KhojUser):
137
140
  async def is_ready_to_chat(user: KhojUser):
138
141
  user_conversation_config = await ConversationAdapters.aget_user_conversation_config(user)
139
142
  if user_conversation_config == None:
140
- user_conversation_config = await ConversationAdapters.aget_default_conversation_config()
143
+ user_conversation_config = await ConversationAdapters.aget_default_conversation_config(user)
141
144
 
142
145
  if user_conversation_config and user_conversation_config.model_type == ChatModelOptions.ModelType.OFFLINE:
143
146
  chat_model = user_conversation_config.chat_model
@@ -210,21 +213,6 @@ def get_next_url(request: Request) -> str:
210
213
  return urljoin(str(request.base_url).rstrip("/"), next_path)
211
214
 
212
215
 
213
- def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="AI") -> str:
214
- chat_history = ""
215
- for chat in conversation_history.get("chat", [])[-n:]:
216
- if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder", "summarize"]:
217
- chat_history += f"User: {chat['intent']['query']}\n"
218
- chat_history += f"{agent_name}: {chat['message']}\n"
219
- elif chat["by"] == "khoj" and ("text-to-image" in chat["intent"].get("type")):
220
- chat_history += f"User: {chat['intent']['query']}\n"
221
- chat_history += f"{agent_name}: [generated image redacted for space]\n"
222
- elif chat["by"] == "khoj" and ("excalidraw" in chat["intent"].get("type")):
223
- chat_history += f"User: {chat['intent']['query']}\n"
224
- chat_history += f"{agent_name}: {chat['intent']['inferred-queries'][0]}\n"
225
- return chat_history
226
-
227
-
228
216
  def get_conversation_command(query: str, any_references: bool = False) -> ConversationCommand:
229
217
  if query.startswith("/notes"):
230
218
  return ConversationCommand.Notes
@@ -244,6 +232,10 @@ def get_conversation_command(query: str, any_references: bool = False) -> Conver
244
232
  return ConversationCommand.Summarize
245
233
  elif query.startswith("/diagram"):
246
234
  return ConversationCommand.Diagram
235
+ elif query.startswith("/code"):
236
+ return ConversationCommand.Code
237
+ elif query.startswith("/research"):
238
+ return ConversationCommand.Research
247
239
  # If no relevant notes found for the given query
248
240
  elif not any_references:
249
241
  return ConversationCommand.General
@@ -342,8 +334,7 @@ async def aget_relevant_information_sources(
342
334
  )
343
335
 
344
336
  try:
345
- response = response.strip()
346
- response = remove_json_codeblock(response)
337
+ response = clean_json(response)
347
338
  response = json.loads(response)
348
339
  response = [q.strip() for q in response["source"] if q.strip()]
349
340
  if not isinstance(response, list) or not response or len(response) == 0:
@@ -421,8 +412,7 @@ async def aget_relevant_output_modes(
421
412
  )
422
413
 
423
414
  try:
424
- response = response.strip()
425
- response = remove_json_codeblock(response)
415
+ response = clean_json(response)
426
416
  response = json.loads(response)
427
417
 
428
418
  if is_none_or_empty(response):
@@ -483,11 +473,14 @@ async def infer_webpage_urls(
483
473
 
484
474
  # Validate that the response is a non-empty, JSON-serializable list of URLs
485
475
  try:
486
- response = response.strip()
476
+ response = clean_json(response)
487
477
  urls = json.loads(response)
488
478
  valid_unique_urls = {str(url).strip() for url in urls["links"] if is_valid_url(url)}
489
479
  if is_none_or_empty(valid_unique_urls):
490
480
  raise ValueError(f"Invalid list of urls: {response}")
481
+ if len(valid_unique_urls) == 0:
482
+ logger.error(f"No valid URLs found in response: {response}")
483
+ return []
491
484
  return list(valid_unique_urls)
492
485
  except Exception:
493
486
  raise ValueError(f"Invalid list of urls: {response}")
@@ -534,8 +527,7 @@ async def generate_online_subqueries(
534
527
 
535
528
  # Validate that the response is a non-empty, JSON-serializable list
536
529
  try:
537
- response = response.strip()
538
- response = remove_json_codeblock(response)
530
+ response = clean_json(response)
539
531
  response = json.loads(response)
540
532
  response = [q.strip() for q in response["queries"] if q.strip()]
541
533
  if not isinstance(response, list) or not response or len(response) == 0:
@@ -644,6 +636,53 @@ async def extract_relevant_summary(
644
636
  return response.strip()
645
637
 
646
638
 
639
+ async def generate_summary_from_files(
640
+ q: str,
641
+ user: KhojUser,
642
+ file_filters: List[str],
643
+ meta_log: dict,
644
+ query_images: List[str] = None,
645
+ agent: Agent = None,
646
+ send_status_func: Optional[Callable] = None,
647
+ tracer: dict = {},
648
+ ):
649
+ try:
650
+ file_object = None
651
+ if await EntryAdapters.aagent_has_entries(agent):
652
+ file_names = await EntryAdapters.aget_agent_entry_filepaths(agent)
653
+ if len(file_names) > 0:
654
+ file_object = await FileObjectAdapters.async_get_file_objects_by_name(None, file_names.pop(), agent)
655
+
656
+ if len(file_filters) > 0:
657
+ file_object = await FileObjectAdapters.async_get_file_objects_by_name(user, file_filters[0])
658
+
659
+ if len(file_object) == 0:
660
+ response_log = "Sorry, I couldn't find the full text of this file."
661
+ yield response_log
662
+ return
663
+ contextual_data = " ".join([file.raw_text for file in file_object])
664
+ if not q:
665
+ q = "Create a general summary of the file"
666
+ async for result in send_status_func(f"**Constructing Summary Using:** {file_object[0].file_name}"):
667
+ yield {ChatEvent.STATUS: result}
668
+
669
+ response = await extract_relevant_summary(
670
+ q,
671
+ contextual_data,
672
+ conversation_history=meta_log,
673
+ query_images=query_images,
674
+ user=user,
675
+ agent=agent,
676
+ tracer=tracer,
677
+ )
678
+
679
+ yield str(response)
680
+ except Exception as e:
681
+ response_log = "Error summarizing file. Please try again, or contact support."
682
+ logger.error(f"Error summarizing file for {user.email}: {e}", exc_info=True)
683
+ yield result
684
+
685
+
647
686
  async def generate_excalidraw_diagram(
648
687
  q: str,
649
688
  conversation_history: Dict[str, Any],
@@ -759,10 +798,9 @@ async def generate_excalidraw_diagram_from_description(
759
798
 
760
799
  with timer("Chat actor: Generate excalidraw diagram", logger):
761
800
  raw_response = await send_message_to_model_wrapper(
762
- message=excalidraw_diagram_generation, user=user, tracer=tracer
801
+ query=excalidraw_diagram_generation, user=user, tracer=tracer
763
802
  )
764
- raw_response = raw_response.strip()
765
- raw_response = remove_json_codeblock(raw_response)
803
+ raw_response = clean_json(raw_response)
766
804
  response: Dict[str, str] = json.loads(raw_response)
767
805
  if not response or not isinstance(response, List) or not isinstance(response[0], Dict):
768
806
  # TODO Some additional validation here that it's a valid Excalidraw diagram
@@ -839,11 +877,12 @@ async def generate_better_image_prompt(
839
877
 
840
878
 
841
879
  async def send_message_to_model_wrapper(
842
- message: str,
880
+ query: str,
843
881
  system_message: str = "",
844
882
  response_type: str = "text",
845
883
  user: KhojUser = None,
846
884
  query_images: List[str] = None,
885
+ context: str = "",
847
886
  tracer: dict = {},
848
887
  ):
849
888
  conversation_config: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config(user)
@@ -874,7 +913,8 @@ async def send_message_to_model_wrapper(
874
913
 
875
914
  loaded_model = state.offline_chat_processor_config.loaded_model
876
915
  truncated_messages = generate_chatml_messages_with_context(
877
- user_message=message,
916
+ user_message=query,
917
+ context_message=context,
878
918
  system_message=system_message,
879
919
  model_name=chat_model,
880
920
  loaded_model=loaded_model,
@@ -899,7 +939,8 @@ async def send_message_to_model_wrapper(
899
939
  api_key = openai_chat_config.api_key
900
940
  api_base_url = openai_chat_config.api_base_url
901
941
  truncated_messages = generate_chatml_messages_with_context(
902
- user_message=message,
942
+ user_message=query,
943
+ context_message=context,
903
944
  system_message=system_message,
904
945
  model_name=chat_model,
905
946
  max_prompt_size=max_tokens,
@@ -920,7 +961,8 @@ async def send_message_to_model_wrapper(
920
961
  elif model_type == ChatModelOptions.ModelType.ANTHROPIC:
921
962
  api_key = conversation_config.openai_config.api_key
922
963
  truncated_messages = generate_chatml_messages_with_context(
923
- user_message=message,
964
+ user_message=query,
965
+ context_message=context,
924
966
  system_message=system_message,
925
967
  model_name=chat_model,
926
968
  max_prompt_size=max_tokens,
@@ -934,12 +976,14 @@ async def send_message_to_model_wrapper(
934
976
  messages=truncated_messages,
935
977
  api_key=api_key,
936
978
  model=chat_model,
979
+ response_type=response_type,
937
980
  tracer=tracer,
938
981
  )
939
982
  elif model_type == ChatModelOptions.ModelType.GOOGLE:
940
983
  api_key = conversation_config.openai_config.api_key
941
984
  truncated_messages = generate_chatml_messages_with_context(
942
- user_message=message,
985
+ user_message=query,
986
+ context_message=context,
943
987
  system_message=system_message,
944
988
  model_name=chat_model,
945
989
  max_prompt_size=max_tokens,
@@ -1033,6 +1077,7 @@ def send_message_to_model_wrapper_sync(
1033
1077
  messages=truncated_messages,
1034
1078
  api_key=api_key,
1035
1079
  model=chat_model,
1080
+ response_type=response_type,
1036
1081
  tracer=tracer,
1037
1082
  )
1038
1083
 
@@ -1064,6 +1109,7 @@ def generate_chat_response(
1064
1109
  conversation: Conversation,
1065
1110
  compiled_references: List[Dict] = [],
1066
1111
  online_results: Dict[str, Dict] = {},
1112
+ code_results: Dict[str, Dict] = {},
1067
1113
  inferred_queries: List[str] = [],
1068
1114
  conversation_commands: List[ConversationCommand] = [ConversationCommand.Default],
1069
1115
  user: KhojUser = None,
@@ -1071,8 +1117,10 @@ def generate_chat_response(
1071
1117
  conversation_id: str = None,
1072
1118
  location_data: LocationData = None,
1073
1119
  user_name: Optional[str] = None,
1120
+ meta_research: str = "",
1074
1121
  query_images: Optional[List[str]] = None,
1075
1122
  tracer: dict = {},
1123
+ train_of_thought: List[Any] = [],
1076
1124
  ) -> Tuple[Union[ThreadedGenerator, Iterator[str]], Dict[str, str]]:
1077
1125
  # Initialize Variables
1078
1126
  chat_response = None
@@ -1080,6 +1128,9 @@ def generate_chat_response(
1080
1128
 
1081
1129
  metadata = {}
1082
1130
  agent = AgentAdapters.get_conversation_agent_by_id(conversation.agent.id) if conversation.agent else None
1131
+ query_to_run = q
1132
+ if meta_research:
1133
+ query_to_run = f"AI Research: {meta_research} {q}"
1083
1134
  try:
1084
1135
  partial_completion = partial(
1085
1136
  save_to_conversation_log,
@@ -1088,11 +1139,13 @@ def generate_chat_response(
1088
1139
  meta_log=meta_log,
1089
1140
  compiled_references=compiled_references,
1090
1141
  online_results=online_results,
1142
+ code_results=code_results,
1091
1143
  inferred_queries=inferred_queries,
1092
1144
  client_application=client_application,
1093
1145
  conversation_id=conversation_id,
1094
1146
  query_images=query_images,
1095
1147
  tracer=tracer,
1148
+ train_of_thought=train_of_thought,
1096
1149
  )
1097
1150
 
1098
1151
  conversation_config = ConversationAdapters.get_valid_conversation_config(user, conversation)
@@ -1106,9 +1159,9 @@ def generate_chat_response(
1106
1159
  if conversation_config.model_type == "offline":
1107
1160
  loaded_model = state.offline_chat_processor_config.loaded_model
1108
1161
  chat_response = converse_offline(
1162
+ user_query=query_to_run,
1109
1163
  references=compiled_references,
1110
1164
  online_results=online_results,
1111
- user_query=q,
1112
1165
  loaded_model=loaded_model,
1113
1166
  conversation_log=meta_log,
1114
1167
  completion_func=partial_completion,
@@ -1128,9 +1181,10 @@ def generate_chat_response(
1128
1181
  chat_model = conversation_config.chat_model
1129
1182
  chat_response = converse(
1130
1183
  compiled_references,
1131
- q,
1184
+ query_to_run,
1132
1185
  query_images=query_images,
1133
1186
  online_results=online_results,
1187
+ code_results=code_results,
1134
1188
  conversation_log=meta_log,
1135
1189
  model=chat_model,
1136
1190
  api_key=api_key,
@@ -1150,9 +1204,10 @@ def generate_chat_response(
1150
1204
  api_key = conversation_config.openai_config.api_key
1151
1205
  chat_response = converse_anthropic(
1152
1206
  compiled_references,
1153
- q,
1207
+ query_to_run,
1154
1208
  query_images=query_images,
1155
1209
  online_results=online_results,
1210
+ code_results=code_results,
1156
1211
  conversation_log=meta_log,
1157
1212
  model=conversation_config.chat_model,
1158
1213
  api_key=api_key,
@@ -1170,10 +1225,10 @@ def generate_chat_response(
1170
1225
  api_key = conversation_config.openai_config.api_key
1171
1226
  chat_response = converse_gemini(
1172
1227
  compiled_references,
1173
- q,
1174
- query_images=query_images,
1175
- online_results=online_results,
1176
- conversation_log=meta_log,
1228
+ query_to_run,
1229
+ online_results,
1230
+ code_results,
1231
+ meta_log,
1177
1232
  model=conversation_config.chat_model,
1178
1233
  api_key=api_key,
1179
1234
  completion_func=partial_completion,
@@ -1203,6 +1258,7 @@ class ChatRequestBody(BaseModel):
1203
1258
  stream: Optional[bool] = False
1204
1259
  title: Optional[str] = None
1205
1260
  conversation_id: Optional[str] = None
1261
+ turn_id: Optional[str] = None
1206
1262
  city: Optional[str] = None
1207
1263
  region: Optional[str] = None
1208
1264
  country: Optional[str] = None
@@ -1212,6 +1268,17 @@ class ChatRequestBody(BaseModel):
1212
1268
  create_new: Optional[bool] = False
1213
1269
 
1214
1270
 
1271
+ class DeleteMessageRequestBody(BaseModel):
1272
+ conversation_id: str
1273
+ turn_id: str
1274
+
1275
+
1276
+ class FeedbackData(BaseModel):
1277
+ uquery: str
1278
+ kquery: str
1279
+ sentiment: str
1280
+
1281
+
1215
1282
  class ApiUserRateLimiter:
1216
1283
  def __init__(self, requests: int, subscribed_requests: int, window: int, slug: str):
1217
1284
  self.requests = requests
@@ -1314,7 +1381,7 @@ class ConversationCommandRateLimiter:
1314
1381
  self.slug = slug
1315
1382
  self.trial_rate_limit = trial_rate_limit
1316
1383
  self.subscribed_rate_limit = subscribed_rate_limit
1317
- self.restricted_commands = [ConversationCommand.Online, ConversationCommand.Image]
1384
+ self.restricted_commands = [ConversationCommand.Research]
1318
1385
 
1319
1386
  async def update_and_check_if_valid(self, request: Request, conversation_command: ConversationCommand):
1320
1387
  if state.billing_enabled is False:
@@ -1627,14 +1694,6 @@ Manage your automations [here](/automations).
1627
1694
  """.strip()
1628
1695
 
1629
1696
 
1630
- class ChatEvent(Enum):
1631
- START_LLM_RESPONSE = "start_llm_response"
1632
- END_LLM_RESPONSE = "end_llm_response"
1633
- MESSAGE = "message"
1634
- REFERENCES = "references"
1635
- STATUS = "status"
1636
-
1637
-
1638
1697
  class MessageProcessor:
1639
1698
  def __init__(self):
1640
1699
  self.references = {}
@@ -0,0 +1,320 @@
1
+ import json
2
+ import logging
3
+ from datetime import datetime
4
+ from typing import Callable, Dict, List, Optional
5
+
6
+ import yaml
7
+ from fastapi import Request
8
+
9
+ from khoj.database.models import Agent, KhojUser
10
+ from khoj.processor.conversation import prompts
11
+ from khoj.processor.conversation.utils import (
12
+ InformationCollectionIteration,
13
+ clean_json,
14
+ construct_iteration_history,
15
+ construct_tool_chat_history,
16
+ )
17
+ from khoj.processor.tools.online_search import read_webpages, search_online
18
+ from khoj.processor.tools.run_code import run_code
19
+ from khoj.routers.api import extract_references_and_questions
20
+ from khoj.routers.helpers import (
21
+ ChatEvent,
22
+ construct_chat_history,
23
+ extract_relevant_info,
24
+ generate_summary_from_files,
25
+ send_message_to_model_wrapper,
26
+ )
27
+ from khoj.utils.helpers import (
28
+ ConversationCommand,
29
+ function_calling_description_for_llm,
30
+ is_none_or_empty,
31
+ timer,
32
+ )
33
+ from khoj.utils.rawconfig import LocationData
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+
38
+ async def apick_next_tool(
39
+ query: str,
40
+ conversation_history: dict,
41
+ user: KhojUser = None,
42
+ query_images: List[str] = [],
43
+ location: LocationData = None,
44
+ user_name: str = None,
45
+ agent: Agent = None,
46
+ previous_iterations_history: str = None,
47
+ max_iterations: int = 5,
48
+ send_status_func: Optional[Callable] = None,
49
+ tracer: dict = {},
50
+ ):
51
+ """
52
+ Given a query, determine which of the available tools the agent should use in order to answer appropriately. One at a time, and it's able to use subsequent iterations to refine the answer.
53
+ """
54
+
55
+ tool_options = dict()
56
+ tool_options_str = ""
57
+
58
+ agent_tools = agent.input_tools if agent else []
59
+
60
+ for tool, description in function_calling_description_for_llm.items():
61
+ tool_options[tool.value] = description
62
+ if len(agent_tools) == 0 or tool.value in agent_tools:
63
+ tool_options_str += f'- "{tool.value}": "{description}"\n'
64
+
65
+ chat_history = construct_chat_history(conversation_history, agent_name=agent.name if agent else "Khoj")
66
+
67
+ if query_images:
68
+ query = f"[placeholder for user attached images]\n{query}"
69
+
70
+ personality_context = (
71
+ prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
72
+ )
73
+
74
+ # Extract Past User Message and Inferred Questions from Conversation Log
75
+ today = datetime.today()
76
+ location_data = f"{location}" if location else "Unknown"
77
+
78
+ function_planning_prompt = prompts.plan_function_execution.format(
79
+ tools=tool_options_str,
80
+ chat_history=chat_history,
81
+ personality_context=personality_context,
82
+ current_date=today.strftime("%Y-%m-%d"),
83
+ day_of_week=today.strftime("%A"),
84
+ username=user_name or "Unknown",
85
+ location=location_data,
86
+ previous_iterations=previous_iterations_history,
87
+ max_iterations=max_iterations,
88
+ )
89
+
90
+ with timer("Chat actor: Infer information sources to refer", logger):
91
+ response = await send_message_to_model_wrapper(
92
+ query=query,
93
+ context=function_planning_prompt,
94
+ response_type="json_object",
95
+ user=user,
96
+ query_images=query_images,
97
+ tracer=tracer,
98
+ )
99
+
100
+ try:
101
+ response = clean_json(response)
102
+ response = json.loads(response)
103
+ selected_tool = response.get("tool", None)
104
+ generated_query = response.get("query", None)
105
+ scratchpad = response.get("scratchpad", None)
106
+ logger.info(f"Response for determining relevant tools: {response}")
107
+ if send_status_func:
108
+ determined_tool_message = "**Determined Tool**: "
109
+ determined_tool_message += f"{selected_tool}({generated_query})." if selected_tool else "respond."
110
+ determined_tool_message += f"\nReason: {scratchpad}" if scratchpad else ""
111
+ async for event in send_status_func(f"{scratchpad}"):
112
+ yield {ChatEvent.STATUS: event}
113
+
114
+ yield InformationCollectionIteration(
115
+ tool=selected_tool,
116
+ query=generated_query,
117
+ )
118
+
119
+ except Exception as e:
120
+ logger.error(f"Invalid response for determining relevant tools: {response}. {e}", exc_info=True)
121
+ yield InformationCollectionIteration(
122
+ tool=None,
123
+ query=None,
124
+ )
125
+
126
+
127
+ async def execute_information_collection(
128
+ request: Request,
129
+ user: KhojUser,
130
+ query: str,
131
+ conversation_id: str,
132
+ conversation_history: dict,
133
+ query_images: List[str],
134
+ agent: Agent = None,
135
+ send_status_func: Optional[Callable] = None,
136
+ user_name: str = None,
137
+ location: LocationData = None,
138
+ file_filters: List[str] = [],
139
+ tracer: dict = {},
140
+ ):
141
+ current_iteration = 0
142
+ MAX_ITERATIONS = 5
143
+ previous_iterations: List[InformationCollectionIteration] = []
144
+ while current_iteration < MAX_ITERATIONS:
145
+ online_results: Dict = dict()
146
+ code_results: Dict = dict()
147
+ document_results: List[Dict[str, str]] = []
148
+ summarize_files: str = ""
149
+ this_iteration = InformationCollectionIteration(tool=None, query=query)
150
+ previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
151
+
152
+ async for result in apick_next_tool(
153
+ query,
154
+ conversation_history,
155
+ user,
156
+ query_images,
157
+ location,
158
+ user_name,
159
+ agent,
160
+ previous_iterations_history,
161
+ MAX_ITERATIONS,
162
+ send_status_func,
163
+ tracer=tracer,
164
+ ):
165
+ if isinstance(result, dict) and ChatEvent.STATUS in result:
166
+ yield result[ChatEvent.STATUS]
167
+ elif isinstance(result, InformationCollectionIteration):
168
+ this_iteration = result
169
+
170
+ if this_iteration.tool == ConversationCommand.Notes:
171
+ this_iteration.context = []
172
+ document_results = []
173
+ async for result in extract_references_and_questions(
174
+ request,
175
+ construct_tool_chat_history(previous_iterations, ConversationCommand.Notes),
176
+ this_iteration.query,
177
+ 7,
178
+ None,
179
+ conversation_id,
180
+ [ConversationCommand.Default],
181
+ location,
182
+ send_status_func,
183
+ query_images,
184
+ agent=agent,
185
+ tracer=tracer,
186
+ ):
187
+ if isinstance(result, dict) and ChatEvent.STATUS in result:
188
+ yield result[ChatEvent.STATUS]
189
+ elif isinstance(result, tuple):
190
+ document_results = result[0]
191
+ this_iteration.context += document_results
192
+
193
+ if not is_none_or_empty(document_results):
194
+ try:
195
+ distinct_files = {d["file"] for d in document_results}
196
+ distinct_headings = set([d["compiled"].split("\n")[0] for d in document_results if "compiled" in d])
197
+ # Strip only leading # from headings
198
+ headings_str = "\n- " + "\n- ".join(distinct_headings).replace("#", "")
199
+ async for result in send_status_func(
200
+ f"**Found {len(distinct_headings)} Notes Across {len(distinct_files)} Files**: {headings_str}"
201
+ ):
202
+ yield result
203
+ except Exception as e:
204
+ logger.error(f"Error extracting document references: {e}", exc_info=True)
205
+
206
+ elif this_iteration.tool == ConversationCommand.Online:
207
+ async for result in search_online(
208
+ this_iteration.query,
209
+ construct_tool_chat_history(previous_iterations, ConversationCommand.Online),
210
+ location,
211
+ user,
212
+ send_status_func,
213
+ [],
214
+ max_webpages_to_read=0,
215
+ query_images=query_images,
216
+ agent=agent,
217
+ tracer=tracer,
218
+ ):
219
+ if isinstance(result, dict) and ChatEvent.STATUS in result:
220
+ yield result[ChatEvent.STATUS]
221
+ else:
222
+ online_results: Dict[str, Dict] = result # type: ignore
223
+ this_iteration.onlineContext = online_results
224
+
225
+ elif this_iteration.tool == ConversationCommand.Webpage:
226
+ try:
227
+ async for result in read_webpages(
228
+ this_iteration.query,
229
+ construct_tool_chat_history(previous_iterations, ConversationCommand.Webpage),
230
+ location,
231
+ user,
232
+ send_status_func,
233
+ query_images=query_images,
234
+ agent=agent,
235
+ tracer=tracer,
236
+ ):
237
+ if isinstance(result, dict) and ChatEvent.STATUS in result:
238
+ yield result[ChatEvent.STATUS]
239
+ else:
240
+ direct_web_pages: Dict[str, Dict] = result # type: ignore
241
+
242
+ webpages = []
243
+ for web_query in direct_web_pages:
244
+ if online_results.get(web_query):
245
+ online_results[web_query]["webpages"] = direct_web_pages[web_query]["webpages"]
246
+ else:
247
+ online_results[web_query] = {"webpages": direct_web_pages[web_query]["webpages"]}
248
+
249
+ for webpage in direct_web_pages[web_query]["webpages"]:
250
+ webpages.append(webpage["link"])
251
+ this_iteration.onlineContext = online_results
252
+ except Exception as e:
253
+ logger.error(f"Error reading webpages: {e}", exc_info=True)
254
+
255
+ elif this_iteration.tool == ConversationCommand.Code:
256
+ try:
257
+ async for result in run_code(
258
+ this_iteration.query,
259
+ construct_tool_chat_history(previous_iterations, ConversationCommand.Webpage),
260
+ "",
261
+ location,
262
+ user,
263
+ send_status_func,
264
+ query_images=query_images,
265
+ agent=agent,
266
+ tracer=tracer,
267
+ ):
268
+ if isinstance(result, dict) and ChatEvent.STATUS in result:
269
+ yield result[ChatEvent.STATUS]
270
+ else:
271
+ code_results: Dict[str, Dict] = result # type: ignore
272
+ this_iteration.codeContext = code_results
273
+ async for result in send_status_func(f"**Ran code snippets**: {len(this_iteration.codeContext)}"):
274
+ yield result
275
+ except ValueError as e:
276
+ logger.warning(
277
+ f"Failed to use code tool: {e}. Attempting to respond without code results",
278
+ exc_info=True,
279
+ )
280
+
281
+ elif this_iteration.tool == ConversationCommand.Summarize:
282
+ try:
283
+ async for result in generate_summary_from_files(
284
+ this_iteration.query,
285
+ user,
286
+ file_filters,
287
+ construct_tool_chat_history(previous_iterations),
288
+ query_images=query_images,
289
+ agent=agent,
290
+ send_status_func=send_status_func,
291
+ ):
292
+ if isinstance(result, dict) and ChatEvent.STATUS in result:
293
+ yield result[ChatEvent.STATUS]
294
+ else:
295
+ summarize_files = result # type: ignore
296
+ except Exception as e:
297
+ logger.error(f"Error generating summary: {e}", exc_info=True)
298
+
299
+ else:
300
+ # No valid tools. This is our exit condition.
301
+ current_iteration = MAX_ITERATIONS
302
+
303
+ current_iteration += 1
304
+
305
+ if document_results or online_results or code_results or summarize_files:
306
+ results_data = f"**Results**:\n"
307
+ if document_results:
308
+ results_data += f"**Document References**:\n{yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
309
+ if online_results:
310
+ results_data += f"**Online Results**:\n{yaml.dump(online_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
311
+ if code_results:
312
+ results_data += f"**Code Results**:\n{yaml.dump(code_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
313
+ if summarize_files:
314
+ results_data += f"**Summarized Files**:\n{yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
315
+
316
+ # intermediate_result = await extract_relevant_info(this_iteration.query, results_data, agent)
317
+ this_iteration.summarizedResult = results_data
318
+
319
+ previous_iterations.append(this_iteration)
320
+ yield this_iteration