khoj 1.41.1.dev97__py3-none-any.whl → 1.41.1.dev142__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. khoj/database/adapters/__init__.py +20 -0
  2. khoj/database/models/__init__.py +3 -0
  3. khoj/interface/compiled/404/index.html +2 -2
  4. khoj/interface/compiled/_next/static/chunks/{2327-aa22697ed9c8d54a.js → 2327-f03b2a77f67b8f8c.js} +1 -1
  5. khoj/interface/compiled/_next/static/chunks/{8515-f305779d95dd5780.js → 5138-81457f7f59956b56.js} +9 -9
  6. khoj/interface/compiled/_next/static/chunks/5477-b91e9926cfc3095c.js +1 -0
  7. khoj/interface/compiled/_next/static/chunks/7127-d3199617463d45f0.js +1 -0
  8. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
  9. khoj/interface/compiled/_next/static/chunks/app/agents/{page-ceeb9a91edea74ce.js → page-774c78ff0f55a228.js} +1 -1
  10. khoj/interface/compiled/_next/static/chunks/app/automations/{page-e3cb78747ab98cc7.js → page-4454891c5007b870.js} +1 -1
  11. khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
  12. khoj/interface/compiled/_next/static/chunks/app/chat/{page-7e780dc11eb5e5d3.js → page-5a2559825b4d5def.js} +1 -1
  13. khoj/interface/compiled/_next/static/chunks/app/{page-a4053e1bb578b2ce.js → page-f7a0286dfc31ad6b.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/app/search/layout-f5881c7ae3ba0795.js +1 -0
  15. khoj/interface/compiled/_next/static/chunks/app/search/{page-8973da2f4c076fe1.js → page-f1a7f278c89e09b6.js} +1 -1
  16. khoj/interface/compiled/_next/static/chunks/app/settings/{page-375136dbb400525b.js → page-5d9134d4a97f8834.js} +1 -1
  17. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-abb6c5f4239ad7be.js +1 -0
  18. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-384b54fc953b18f2.js → page-32cd0ceb9ffbd777.js} +1 -1
  19. khoj/interface/compiled/_next/static/chunks/{webpack-21f76f7f59582bc7.js → webpack-952bc0d41769db77.js} +1 -1
  20. khoj/interface/compiled/_next/static/css/{fca983d49c3dd1a3.css → 0db53bacf81896f5.css} +1 -1
  21. khoj/interface/compiled/_next/static/css/93eeacc43e261162.css +1 -0
  22. khoj/interface/compiled/agents/index.html +2 -2
  23. khoj/interface/compiled/agents/index.txt +2 -2
  24. khoj/interface/compiled/automations/index.html +2 -2
  25. khoj/interface/compiled/automations/index.txt +3 -3
  26. khoj/interface/compiled/chat/index.html +2 -2
  27. khoj/interface/compiled/chat/index.txt +2 -2
  28. khoj/interface/compiled/index.html +2 -2
  29. khoj/interface/compiled/index.txt +2 -2
  30. khoj/interface/compiled/search/index.html +2 -2
  31. khoj/interface/compiled/search/index.txt +2 -2
  32. khoj/interface/compiled/settings/index.html +2 -2
  33. khoj/interface/compiled/settings/index.txt +4 -4
  34. khoj/interface/compiled/share/chat/index.html +2 -2
  35. khoj/interface/compiled/share/chat/index.txt +2 -2
  36. khoj/processor/conversation/anthropic/anthropic_chat.py +9 -10
  37. khoj/processor/conversation/anthropic/utils.py +30 -7
  38. khoj/processor/conversation/google/gemini_chat.py +10 -10
  39. khoj/processor/conversation/google/utils.py +20 -12
  40. khoj/processor/conversation/offline/chat_model.py +2 -7
  41. khoj/processor/conversation/openai/gpt.py +9 -10
  42. khoj/processor/conversation/utils.py +177 -53
  43. khoj/processor/operator/README.md +59 -0
  44. khoj/processor/operator/{operate_browser.py → __init__.py} +98 -34
  45. khoj/processor/operator/grounding_agent.py +229 -175
  46. khoj/processor/operator/grounding_agent_uitars.py +61 -50
  47. khoj/processor/operator/operator_actions.py +48 -0
  48. khoj/processor/operator/operator_agent_anthropic.py +298 -90
  49. khoj/processor/operator/operator_agent_base.py +45 -14
  50. khoj/processor/operator/operator_agent_binary.py +125 -57
  51. khoj/processor/operator/operator_agent_openai.py +183 -75
  52. khoj/processor/operator/operator_environment_base.py +11 -1
  53. khoj/processor/operator/operator_environment_browser.py +5 -3
  54. khoj/processor/operator/operator_environment_computer.py +658 -0
  55. khoj/routers/api_chat.py +125 -43
  56. khoj/routers/api_model.py +3 -3
  57. khoj/routers/helpers.py +13 -18
  58. khoj/routers/research.py +57 -23
  59. khoj/utils/constants.py +4 -4
  60. khoj/utils/helpers.py +12 -15
  61. khoj/utils/rawconfig.py +1 -0
  62. {khoj-1.41.1.dev97.dist-info → khoj-1.41.1.dev142.dist-info}/METADATA +3 -1
  63. {khoj-1.41.1.dev97.dist-info → khoj-1.41.1.dev142.dist-info}/RECORD +74 -72
  64. khoj/interface/compiled/_next/static/chunks/4986-9ddd694756d03aa1.js +0 -1
  65. khoj/interface/compiled/_next/static/chunks/5477-77ce5c6f468d6c25.js +0 -1
  66. khoj/interface/compiled/_next/static/chunks/app/agents/layout-4e2a134ec26aa606.js +0 -1
  67. khoj/interface/compiled/_next/static/chunks/app/chat/layout-ad4d1792ab1a4108.js +0 -1
  68. khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +0 -1
  69. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +0 -1
  70. khoj/interface/compiled/_next/static/css/f29752d6e1be7624.css +0 -1
  71. /khoj/interface/compiled/_next/static/{o6zlo73DbD2lS92jWHS8o → 4CIEX6Ko-Qehhb7L-ymZw}/_buildManifest.js +0 -0
  72. /khoj/interface/compiled/_next/static/{o6zlo73DbD2lS92jWHS8o → 4CIEX6Ko-Qehhb7L-ymZw}/_ssgManifest.js +0 -0
  73. /khoj/interface/compiled/_next/static/chunks/{1915-ab4353eaca76f690.js → 1915-1943ee8a628b893c.js} +0 -0
  74. /khoj/interface/compiled/_next/static/chunks/{2117-1c18aa2098982bf9.js → 2117-5a41630a2bd2eae8.js} +0 -0
  75. /khoj/interface/compiled/_next/static/chunks/{4363-4efaf12abe696251.js → 4363-e6ac2203564d1a3b.js} +0 -0
  76. /khoj/interface/compiled/_next/static/chunks/{4447-5d44807c40355b1a.js → 4447-e038b251d626c340.js} +0 -0
  77. /khoj/interface/compiled/_next/static/chunks/{8667-adbe6017a66cef10.js → 8667-8136f74e9a086fca.js} +0 -0
  78. /khoj/interface/compiled/_next/static/chunks/{9259-d8bcd9da9e80c81e.js → 9259-640fdd77408475df.js} +0 -0
  79. {khoj-1.41.1.dev97.dist-info → khoj-1.41.1.dev142.dist-info}/WHEEL +0 -0
  80. {khoj-1.41.1.dev97.dist-info → khoj-1.41.1.dev142.dist-info}/entry_points.txt +0 -0
  81. {khoj-1.41.1.dev97.dist-info → khoj-1.41.1.dev142.dist-info}/licenses/LICENSE +0 -0
khoj/routers/api_chat.py CHANGED
@@ -26,12 +26,13 @@ from khoj.database.models import Agent, KhojUser
26
26
  from khoj.processor.conversation import prompts
27
27
  from khoj.processor.conversation.prompts import help_message, no_entries_found
28
28
  from khoj.processor.conversation.utils import (
29
+ OperatorRun,
29
30
  ResponseWithThought,
30
31
  defilter_query,
31
32
  save_to_conversation_log,
32
33
  )
33
34
  from khoj.processor.image.generate import text_to_image
34
- from khoj.processor.operator.operate_browser import operate_browser
35
+ from khoj.processor.operator import operate_environment
35
36
  from khoj.processor.speech.text_to_speech import generate_text_to_speech
36
37
  from khoj.processor.tools.online_search import (
37
38
  deduplicate_organic_results,
@@ -65,10 +66,7 @@ from khoj.routers.helpers import (
65
66
  update_telemetry_state,
66
67
  validate_chat_model,
67
68
  )
68
- from khoj.routers.research import (
69
- InformationCollectionIteration,
70
- execute_information_collection,
71
- )
69
+ from khoj.routers.research import ResearchIteration, research
72
70
  from khoj.routers.storage import upload_user_image_to_bucket
73
71
  from khoj.utils import state
74
72
  from khoj.utils.helpers import (
@@ -682,11 +680,13 @@ async def chat(
682
680
  timezone = body.timezone
683
681
  raw_images = body.images
684
682
  raw_query_files = body.files
683
+ interrupt_flag = body.interrupt
685
684
 
686
685
  async def event_generator(q: str, images: list[str]):
687
686
  start_time = time.perf_counter()
688
687
  ttft = None
689
688
  chat_metadata: dict = {}
689
+ conversation = None
690
690
  user: KhojUser = request.user.object
691
691
  is_subscribed = has_required_scope(request, ["premium"])
692
692
  q = unquote(q)
@@ -720,6 +720,20 @@ async def chat(
720
720
  for file in raw_query_files:
721
721
  query_files[file.name] = file.content
722
722
 
723
+ research_results: List[ResearchIteration] = []
724
+ online_results: Dict = dict()
725
+ code_results: Dict = dict()
726
+ operator_results: List[OperatorRun] = []
727
+ compiled_references: List[Any] = []
728
+ inferred_queries: List[Any] = []
729
+ attached_file_context = gather_raw_query_files(query_files)
730
+
731
+ generated_images: List[str] = []
732
+ generated_files: List[FileAttachment] = []
733
+ generated_mermaidjs_diagram: str = None
734
+ generated_asset_results: Dict = dict()
735
+ program_execution_context: List[str] = []
736
+
723
737
  # Create a task to monitor for disconnections
724
738
  disconnect_monitor_task = None
725
739
 
@@ -727,8 +741,34 @@ async def chat(
727
741
  try:
728
742
  msg = await request.receive()
729
743
  if msg["type"] == "http.disconnect":
730
- logger.debug(f"User {user} disconnected from {common.client} client.")
744
+ logger.debug(f"Request cancelled. User {user} disconnected from {common.client} client.")
731
745
  cancellation_event.set()
746
+ # ensure partial chat state saved on interrupt
747
+ # shield the save against task cancellation
748
+ if conversation:
749
+ await asyncio.shield(
750
+ save_to_conversation_log(
751
+ q,
752
+ chat_response="",
753
+ user=user,
754
+ meta_log=meta_log,
755
+ compiled_references=compiled_references,
756
+ online_results=online_results,
757
+ code_results=code_results,
758
+ operator_results=operator_results,
759
+ research_results=research_results,
760
+ inferred_queries=inferred_queries,
761
+ client_application=request.user.client_app,
762
+ conversation_id=conversation_id,
763
+ query_images=uploaded_images,
764
+ train_of_thought=train_of_thought,
765
+ raw_query_files=raw_query_files,
766
+ generated_images=generated_images,
767
+ raw_generated_files=generated_asset_results,
768
+ generated_mermaidjs_diagram=generated_mermaidjs_diagram,
769
+ tracer=tracer,
770
+ )
771
+ )
732
772
  except Exception as e:
733
773
  logger.error(f"Error in disconnect monitor: {e}")
734
774
 
@@ -746,7 +786,6 @@ async def chat(
746
786
  nonlocal ttft, train_of_thought
747
787
  event_delimiter = "␃🔚␗"
748
788
  if cancellation_event.is_set():
749
- logger.debug(f"User {user} disconnected from {common.client} client. Setting cancellation event.")
750
789
  return
751
790
  try:
752
791
  if event_type == ChatEvent.END_LLM_RESPONSE:
@@ -770,9 +809,6 @@ async def chat(
770
809
  yield data
771
810
  elif event_type == ChatEvent.REFERENCES or ChatEvent.METADATA or stream:
772
811
  yield json.dumps({"type": event_type.value, "data": data}, ensure_ascii=False)
773
- except asyncio.CancelledError as e:
774
- if cancellation_event.is_set():
775
- logger.debug(f"Request cancelled. User {user} disconnected from {common.client} client: {e}.")
776
812
  except Exception as e:
777
813
  if not cancellation_event.is_set():
778
814
  logger.error(
@@ -883,21 +919,52 @@ async def chat(
883
919
  user_message_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
884
920
  meta_log = conversation.conversation_log
885
921
 
886
- researched_results = ""
887
- online_results: Dict = dict()
888
- code_results: Dict = dict()
889
- operator_results: Dict[str, str] = {}
890
- generated_asset_results: Dict = dict()
891
- ## Extract Document References
892
- compiled_references: List[Any] = []
893
- inferred_queries: List[Any] = []
894
- file_filters = conversation.file_filters if conversation and conversation.file_filters else []
895
- attached_file_context = gather_raw_query_files(query_files)
922
+ # If interrupt flag is set, wait for the previous turn to be saved before proceeding
923
+ if interrupt_flag:
924
+ max_wait_time = 20.0 # seconds
925
+ wait_interval = 0.3 # seconds
926
+ wait_start = wait_current = time.time()
927
+ while wait_current - wait_start < max_wait_time:
928
+ # Refresh conversation to check if interrupted message saved to DB
929
+ conversation = await ConversationAdapters.aget_conversation_by_user(
930
+ user,
931
+ client_application=request.user.client_app,
932
+ conversation_id=conversation_id,
933
+ )
934
+ if (
935
+ conversation
936
+ and conversation.messages
937
+ and conversation.messages[-1].by == "khoj"
938
+ and not conversation.messages[-1].message
939
+ ):
940
+ logger.info(f"Detected interrupted message save to conversation {conversation_id}.")
941
+ break
942
+ await asyncio.sleep(wait_interval)
943
+ wait_current = time.time()
896
944
 
897
- generated_images: List[str] = []
898
- generated_files: List[FileAttachment] = []
899
- generated_mermaidjs_diagram: str = None
900
- program_execution_context: List[str] = []
945
+ if wait_current - wait_start >= max_wait_time:
946
+ logger.warning(
947
+ f"Timeout waiting to load interrupted context from conversation {conversation_id}. Proceed without previous context."
948
+ )
949
+
950
+ # If interrupted message in DB
951
+ if (
952
+ conversation
953
+ and conversation.messages
954
+ and conversation.messages[-1].by == "khoj"
955
+ and not conversation.messages[-1].message
956
+ ):
957
+ # Populate context from interrupted message
958
+ last_message = conversation.messages[-1]
959
+ online_results = {key: val.model_dump() for key, val in last_message.onlineContext.items() or []}
960
+ code_results = {key: val.model_dump() for key, val in last_message.codeContext.items() or []}
961
+ compiled_references = [ref.model_dump() for ref in last_message.context or []]
962
+ research_results = [ResearchIteration(**iter_dict) for iter_dict in last_message.researchContext or []]
963
+ operator_results = [OperatorRun(**iter_dict) for iter_dict in last_message.operatorContext or []]
964
+ train_of_thought = [thought.model_dump() for thought in last_message.trainOfThought or []]
965
+ # Drop the interrupted message from conversation history
966
+ meta_log["chat"].pop()
967
+ logger.info(f"Loaded interrupted partial context from conversation {conversation_id}.")
901
968
 
902
969
  if conversation_commands == [ConversationCommand.Default]:
903
970
  try:
@@ -936,24 +1003,26 @@ async def chat(
936
1003
  return
937
1004
 
938
1005
  defiltered_query = defilter_query(q)
1006
+ file_filters = conversation.file_filters if conversation and conversation.file_filters else []
939
1007
 
940
1008
  if conversation_commands == [ConversationCommand.Research]:
941
- async for research_result in execute_information_collection(
1009
+ async for research_result in research(
942
1010
  user=user,
943
1011
  query=defiltered_query,
944
1012
  conversation_id=conversation_id,
945
1013
  conversation_history=meta_log,
1014
+ previous_iterations=list(research_results),
946
1015
  query_images=uploaded_images,
947
1016
  agent=agent,
948
1017
  send_status_func=partial(send_event, ChatEvent.STATUS),
949
1018
  user_name=user_name,
950
1019
  location=location,
951
- file_filters=conversation.file_filters if conversation else [],
1020
+ file_filters=file_filters,
952
1021
  query_files=attached_file_context,
953
1022
  tracer=tracer,
954
1023
  cancellation_event=cancellation_event,
955
1024
  ):
956
- if isinstance(research_result, InformationCollectionIteration):
1025
+ if isinstance(research_result, ResearchIteration):
957
1026
  if research_result.summarizedResult:
958
1027
  if research_result.onlineContext:
959
1028
  online_results.update(research_result.onlineContext)
@@ -961,19 +1030,31 @@ async def chat(
961
1030
  code_results.update(research_result.codeContext)
962
1031
  if research_result.context:
963
1032
  compiled_references.extend(research_result.context)
964
- if research_result.operatorContext:
965
- operator_results.update(research_result.operatorContext)
966
- researched_results += research_result.summarizedResult
967
-
1033
+ if not research_results or research_results[-1] is not research_result:
1034
+ research_results.append(research_result)
968
1035
  else:
969
1036
  yield research_result
970
1037
 
1038
+ # Track operator results across research and operator iterations
1039
+ # This relies on two conditions:
1040
+ # 1. Check to append new (partial) operator results
1041
+ # Relies on triggering this check on every status updates.
1042
+ # Status updates cascade up from operator to research to chat api on every step.
1043
+ # 2. Keep operator results in sync with each research operator step
1044
+ # Relies on python object references to ensure operator results
1045
+ # are implicitly kept in sync after the initial append
1046
+ if (
1047
+ research_results
1048
+ and research_results[-1].operatorContext
1049
+ and (not operator_results or operator_results[-1] is not research_results[-1].operatorContext)
1050
+ ):
1051
+ operator_results.append(research_results[-1].operatorContext)
1052
+
971
1053
  # researched_results = await extract_relevant_info(q, researched_results, agent)
972
1054
  if state.verbose > 1:
973
- logger.debug(f"Researched Results: {researched_results}")
1055
+ logger.debug(f'Researched Results: {"".join(r.summarizedResult for r in research_results)}')
974
1056
 
975
1057
  used_slash_summarize = conversation_commands == [ConversationCommand.Summarize]
976
- file_filters = conversation.file_filters if conversation else []
977
1058
  # Skip trying to summarize if
978
1059
  if (
979
1060
  # summarization intent was inferred
@@ -1221,11 +1302,12 @@ async def chat(
1221
1302
  )
1222
1303
  if ConversationCommand.Operator in conversation_commands:
1223
1304
  try:
1224
- async for result in operate_browser(
1305
+ async for result in operate_environment(
1225
1306
  defiltered_query,
1226
1307
  user,
1227
1308
  meta_log,
1228
1309
  location,
1310
+ list(operator_results)[-1] if operator_results else None,
1229
1311
  query_images=uploaded_images,
1230
1312
  query_files=attached_file_context,
1231
1313
  send_status_func=partial(send_event, ChatEvent.STATUS),
@@ -1235,16 +1317,17 @@ async def chat(
1235
1317
  ):
1236
1318
  if isinstance(result, dict) and ChatEvent.STATUS in result:
1237
1319
  yield result[ChatEvent.STATUS]
1238
- else:
1239
- operator_results = {result["query"]: result["result"]}
1320
+ elif isinstance(result, OperatorRun):
1321
+ if not operator_results or operator_results[-1] is not result:
1322
+ operator_results.append(result)
1240
1323
  # Add webpages visited while operating browser to references
1241
- if result.get("webpages"):
1324
+ if result.webpages:
1242
1325
  if not online_results.get(defiltered_query):
1243
- online_results[defiltered_query] = {"webpages": result["webpages"]}
1326
+ online_results[defiltered_query] = {"webpages": result.webpages}
1244
1327
  elif not online_results[defiltered_query].get("webpages"):
1245
- online_results[defiltered_query]["webpages"] = result["webpages"]
1328
+ online_results[defiltered_query]["webpages"] = result.webpages
1246
1329
  else:
1247
- online_results[defiltered_query]["webpages"] += result["webpages"]
1330
+ online_results[defiltered_query]["webpages"] += result.webpages
1248
1331
  except ValueError as e:
1249
1332
  program_execution_context.append(f"Browser operation error: {e}")
1250
1333
  logger.warning(f"Failed to operate browser with {e}", exc_info=True)
@@ -1262,7 +1345,6 @@ async def chat(
1262
1345
  "context": compiled_references,
1263
1346
  "onlineContext": unique_online_results,
1264
1347
  "codeContext": code_results,
1265
- "operatorContext": operator_results,
1266
1348
  },
1267
1349
  ):
1268
1350
  yield result
@@ -1362,7 +1444,7 @@ async def chat(
1362
1444
 
1363
1445
  # Check if the user has disconnected
1364
1446
  if cancellation_event.is_set():
1365
- logger.debug(f"User {user} disconnected from {common.client} client. Stopping LLM response.")
1447
+ logger.debug(f"Stopping LLM response to user {user} on {common.client} client.")
1366
1448
  # Cancel the disconnect monitor task if it is still running
1367
1449
  await cancel_disconnect_monitor()
1368
1450
  return
@@ -1379,13 +1461,13 @@ async def chat(
1379
1461
  online_results,
1380
1462
  code_results,
1381
1463
  operator_results,
1464
+ research_results,
1382
1465
  inferred_queries,
1383
1466
  conversation_commands,
1384
1467
  user,
1385
1468
  request.user.client_app,
1386
1469
  location,
1387
1470
  user_name,
1388
- researched_results,
1389
1471
  uploaded_images,
1390
1472
  train_of_thought,
1391
1473
  attached_file_context,
khoj/routers/api_model.py CHANGED
@@ -72,7 +72,7 @@ async def update_chat_model(
72
72
  if chat_model is None:
73
73
  return Response(status_code=404, content=json.dumps({"status": "error", "message": "Chat model not found"}))
74
74
  if not subscribed and chat_model.price_tier != PriceTier.FREE:
75
- raise Response(
75
+ return Response(
76
76
  status_code=403,
77
77
  content=json.dumps({"status": "error", "message": "Subscribe to switch to this chat model"}),
78
78
  )
@@ -108,7 +108,7 @@ async def update_voice_model(
108
108
  if voice_model is None:
109
109
  return Response(status_code=404, content=json.dumps({"status": "error", "message": "Voice model not found"}))
110
110
  if not subscribed and voice_model.price_tier != PriceTier.FREE:
111
- raise Response(
111
+ return Response(
112
112
  status_code=403,
113
113
  content=json.dumps({"status": "error", "message": "Subscribe to switch to this voice model"}),
114
114
  )
@@ -143,7 +143,7 @@ async def update_paint_model(
143
143
  if image_model is None:
144
144
  return Response(status_code=404, content=json.dumps({"status": "error", "message": "Image model not found"}))
145
145
  if not subscribed and image_model.price_tier != PriceTier.FREE:
146
- raise Response(
146
+ return Response(
147
147
  status_code=403,
148
148
  content=json.dumps({"status": "error", "message": "Subscribe to switch to this image model"}),
149
149
  )
khoj/routers/helpers.py CHANGED
@@ -94,6 +94,8 @@ from khoj.processor.conversation.openai.gpt import (
94
94
  )
95
95
  from khoj.processor.conversation.utils import (
96
96
  ChatEvent,
97
+ OperatorRun,
98
+ ResearchIteration,
97
99
  ResponseWithThought,
98
100
  clean_json,
99
101
  clean_mermaidjs,
@@ -384,7 +386,7 @@ async def aget_data_sources_and_output_format(
384
386
  if len(agent_outputs) == 0 or output.value in agent_outputs:
385
387
  output_options_str += f'- "{output.value}": "{description}"\n'
386
388
 
387
- chat_history = construct_chat_history(conversation_history)
389
+ chat_history = construct_chat_history(conversation_history, n=6)
388
390
 
389
391
  if query_images:
390
392
  query = f"[placeholder for {len(query_images)} user attached images]\n{query}"
@@ -1173,12 +1175,7 @@ async def send_message_to_model_wrapper(
1173
1175
  if vision_available and query_images:
1174
1176
  logger.info(f"Using {chat_model.name} model to understand {len(query_images)} images.")
1175
1177
 
1176
- subscribed = await ais_user_subscribed(user) if user else False
1177
- max_tokens = (
1178
- chat_model.subscribed_max_prompt_size
1179
- if subscribed and chat_model.subscribed_max_prompt_size
1180
- else chat_model.max_prompt_size
1181
- )
1178
+ max_tokens = await ConversationAdapters.aget_max_context_size(chat_model, user)
1182
1179
  chat_model_name = chat_model.name
1183
1180
  tokenizer = chat_model.tokenizer
1184
1181
  model_type = chat_model.model_type
@@ -1270,12 +1267,7 @@ def send_message_to_model_wrapper_sync(
1270
1267
  if chat_model is None:
1271
1268
  raise HTTPException(status_code=500, detail="Contact the server administrator to set a default chat model.")
1272
1269
 
1273
- subscribed = is_user_subscribed(user) if user else False
1274
- max_tokens = (
1275
- chat_model.subscribed_max_prompt_size
1276
- if subscribed and chat_model.subscribed_max_prompt_size
1277
- else chat_model.max_prompt_size
1278
- )
1270
+ max_tokens = ConversationAdapters.get_max_context_size(chat_model, user)
1279
1271
  chat_model_name = chat_model.name
1280
1272
  model_type = chat_model.model_type
1281
1273
  vision_available = chat_model.vision_enabled
@@ -1354,14 +1346,14 @@ async def agenerate_chat_response(
1354
1346
  compiled_references: List[Dict] = [],
1355
1347
  online_results: Dict[str, Dict] = {},
1356
1348
  code_results: Dict[str, Dict] = {},
1357
- operator_results: Dict[str, str] = {},
1349
+ operator_results: List[OperatorRun] = [],
1350
+ research_results: List[ResearchIteration] = [],
1358
1351
  inferred_queries: List[str] = [],
1359
1352
  conversation_commands: List[ConversationCommand] = [ConversationCommand.Default],
1360
1353
  user: KhojUser = None,
1361
1354
  client_application: ClientApplication = None,
1362
1355
  location_data: LocationData = None,
1363
1356
  user_name: Optional[str] = None,
1364
- meta_research: str = "",
1365
1357
  query_images: Optional[List[str]] = None,
1366
1358
  train_of_thought: List[Any] = [],
1367
1359
  query_files: str = None,
@@ -1391,6 +1383,7 @@ async def agenerate_chat_response(
1391
1383
  online_results=online_results,
1392
1384
  code_results=code_results,
1393
1385
  operator_results=operator_results,
1386
+ research_results=research_results,
1394
1387
  inferred_queries=inferred_queries,
1395
1388
  client_application=client_application,
1396
1389
  conversation_id=str(conversation.id),
@@ -1405,12 +1398,14 @@ async def agenerate_chat_response(
1405
1398
 
1406
1399
  query_to_run = q
1407
1400
  deepthought = False
1408
- if meta_research:
1409
- query_to_run = f"<query>{q}</query>\n<collected_research>\n{meta_research}\n</collected_research>"
1401
+ if research_results:
1402
+ compiled_research = "".join([r.summarizedResult for r in research_results if r.summarizedResult])
1403
+ if compiled_research:
1404
+ query_to_run = f"<query>{q}</query>\n<collected_research>\n{compiled_research}\n</collected_research>"
1410
1405
  compiled_references = []
1411
1406
  online_results = {}
1412
1407
  code_results = {}
1413
- operator_results = {}
1408
+ operator_results = []
1414
1409
  deepthought = True
1415
1410
 
1416
1411
  chat_model = await ConversationAdapters.aget_valid_chat_model(user, conversation, is_subscribed)
khoj/routers/research.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import asyncio
2
2
  import logging
3
3
  import os
4
+ from copy import deepcopy
4
5
  from datetime import datetime
5
6
  from enum import Enum
6
7
  from typing import Callable, Dict, List, Optional, Type
@@ -12,12 +13,13 @@ from khoj.database.adapters import AgentAdapters, EntryAdapters
12
13
  from khoj.database.models import Agent, KhojUser
13
14
  from khoj.processor.conversation import prompts
14
15
  from khoj.processor.conversation.utils import (
15
- InformationCollectionIteration,
16
+ OperatorRun,
17
+ ResearchIteration,
16
18
  construct_iteration_history,
17
19
  construct_tool_chat_history,
18
20
  load_complex_json,
19
21
  )
20
- from khoj.processor.operator.operate_browser import operate_browser
22
+ from khoj.processor.operator import operate_environment
21
23
  from khoj.processor.tools.online_search import read_webpages, search_online
22
24
  from khoj.processor.tools.run_code import run_code
23
25
  from khoj.routers.api import extract_references_and_questions
@@ -82,7 +84,7 @@ async def apick_next_tool(
82
84
  location: LocationData = None,
83
85
  user_name: str = None,
84
86
  agent: Agent = None,
85
- previous_iterations: List[InformationCollectionIteration] = [],
87
+ previous_iterations: List[ResearchIteration] = [],
86
88
  max_iterations: int = 5,
87
89
  query_images: List[str] = [],
88
90
  query_files: str = None,
@@ -94,6 +96,24 @@ async def apick_next_tool(
94
96
  ):
95
97
  """Given a query, determine which of the available tools the agent should use in order to answer appropriately."""
96
98
 
99
+ # Continue with previous iteration if a multi-step tool use is in progress
100
+ if (
101
+ previous_iterations
102
+ and previous_iterations[-1].tool == ConversationCommand.Operator
103
+ and not previous_iterations[-1].summarizedResult
104
+ ):
105
+ previous_iteration = previous_iterations[-1]
106
+ yield ResearchIteration(
107
+ tool=previous_iteration.tool,
108
+ query=query,
109
+ context=previous_iteration.context,
110
+ onlineContext=previous_iteration.onlineContext,
111
+ codeContext=previous_iteration.codeContext,
112
+ operatorContext=previous_iteration.operatorContext,
113
+ warning=previous_iteration.warning,
114
+ )
115
+ return
116
+
97
117
  # Construct tool options for the agent to choose from
98
118
  tool_options = dict()
99
119
  tool_options_str = ""
@@ -141,7 +161,7 @@ async def apick_next_tool(
141
161
  query = f"[placeholder for user attached images]\n{query}"
142
162
 
143
163
  # Construct chat history with user and iteration history with researcher agent for context
144
- previous_iterations_history = construct_iteration_history(query, previous_iterations, prompts.previous_iteration)
164
+ previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration, query)
145
165
  iteration_chat_log = {"chat": conversation_history.get("chat", []) + previous_iterations_history}
146
166
 
147
167
  # Plan function execution for the next tool
@@ -164,7 +184,7 @@ async def apick_next_tool(
164
184
  )
165
185
  except Exception as e:
166
186
  logger.error(f"Failed to infer information sources to refer: {e}", exc_info=True)
167
- yield InformationCollectionIteration(
187
+ yield ResearchIteration(
168
188
  tool=None,
169
189
  query=None,
170
190
  warning="Failed to infer information sources to refer. Skipping iteration. Try again.",
@@ -193,25 +213,26 @@ async def apick_next_tool(
193
213
  async for event in send_status_func(f"{scratchpad}"):
194
214
  yield {ChatEvent.STATUS: event}
195
215
 
196
- yield InformationCollectionIteration(
216
+ yield ResearchIteration(
197
217
  tool=selected_tool,
198
218
  query=generated_query,
199
219
  warning=warning,
200
220
  )
201
221
  except Exception as e:
202
222
  logger.error(f"Invalid response for determining relevant tools: {response}. {e}", exc_info=True)
203
- yield InformationCollectionIteration(
223
+ yield ResearchIteration(
204
224
  tool=None,
205
225
  query=None,
206
226
  warning=f"Invalid response for determining relevant tools: {response}. Skipping iteration. Fix error: {e}",
207
227
  )
208
228
 
209
229
 
210
- async def execute_information_collection(
230
+ async def research(
211
231
  user: KhojUser,
212
232
  query: str,
213
233
  conversation_id: str,
214
234
  conversation_history: dict,
235
+ previous_iterations: List[ResearchIteration],
215
236
  query_images: List[str],
216
237
  agent: Agent = None,
217
238
  send_status_func: Optional[Callable] = None,
@@ -227,23 +248,32 @@ async def execute_information_collection(
227
248
  max_webpages_to_read = 1
228
249
  current_iteration = 0
229
250
  MAX_ITERATIONS = int(os.getenv("KHOJ_RESEARCH_ITERATIONS", 5))
230
- previous_iterations: List[InformationCollectionIteration] = []
251
+
252
+ # Incorporate previous partial research into current research chat history
253
+ research_conversation_history = deepcopy(conversation_history)
254
+ if current_iteration := len(previous_iterations) > 0:
255
+ logger.info(f"Continuing research with the previous {len(previous_iterations)} iteration results.")
256
+ previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
257
+ research_conversation_history["chat"] = (
258
+ research_conversation_history.get("chat", []) + previous_iterations_history
259
+ )
260
+
231
261
  while current_iteration < MAX_ITERATIONS:
232
262
  # Check for cancellation at the start of each iteration
233
263
  if cancellation_event and cancellation_event.is_set():
234
- logger.debug(f"User {user} disconnected client. Research cancelled.")
264
+ logger.debug(f"Research cancelled. User {user} disconnected client.")
235
265
  break
236
266
 
237
267
  online_results: Dict = dict()
238
268
  code_results: Dict = dict()
239
269
  document_results: List[Dict[str, str]] = []
240
- operator_results: Dict[str, str] = {}
270
+ operator_results: OperatorRun = None
241
271
  summarize_files: str = ""
242
- this_iteration = InformationCollectionIteration(tool=None, query=query)
272
+ this_iteration = ResearchIteration(tool=None, query=query)
243
273
 
244
274
  async for result in apick_next_tool(
245
275
  query,
246
- conversation_history,
276
+ research_conversation_history,
247
277
  user,
248
278
  location,
249
279
  user_name,
@@ -260,8 +290,9 @@ async def execute_information_collection(
260
290
  ):
261
291
  if isinstance(result, dict) and ChatEvent.STATUS in result:
262
292
  yield result[ChatEvent.STATUS]
263
- elif isinstance(result, InformationCollectionIteration):
293
+ elif isinstance(result, ResearchIteration):
264
294
  this_iteration = result
295
+ yield this_iteration
265
296
 
266
297
  # Skip running iteration if warning present in iteration
267
298
  if this_iteration.warning:
@@ -406,12 +437,13 @@ async def execute_information_collection(
406
437
 
407
438
  elif this_iteration.tool == ConversationCommand.Operator:
408
439
  try:
409
- async for result in operate_browser(
440
+ async for result in operate_environment(
410
441
  this_iteration.query,
411
442
  user,
412
443
  construct_tool_chat_history(previous_iterations, ConversationCommand.Operator),
413
444
  location,
414
- send_status_func,
445
+ previous_iterations[-1].operatorContext if previous_iterations else None,
446
+ send_status_func=send_status_func,
415
447
  query_images=query_images,
416
448
  agent=agent,
417
449
  query_files=query_files,
@@ -420,17 +452,17 @@ async def execute_information_collection(
420
452
  ):
421
453
  if isinstance(result, dict) and ChatEvent.STATUS in result:
422
454
  yield result[ChatEvent.STATUS]
423
- else:
424
- operator_results = {result["query"]: result["result"]}
455
+ elif isinstance(result, OperatorRun):
456
+ operator_results = result
425
457
  this_iteration.operatorContext = operator_results
426
458
  # Add webpages visited while operating browser to references
427
- if result.get("webpages"):
459
+ if result.webpages:
428
460
  if not online_results.get(this_iteration.query):
429
- online_results[this_iteration.query] = {"webpages": result["webpages"]}
461
+ online_results[this_iteration.query] = {"webpages": result.webpages}
430
462
  elif not online_results[this_iteration.query].get("webpages"):
431
- online_results[this_iteration.query]["webpages"] = result["webpages"]
463
+ online_results[this_iteration.query]["webpages"] = result.webpages
432
464
  else:
433
- online_results[this_iteration.query]["webpages"] += result["webpages"]
465
+ online_results[this_iteration.query]["webpages"] += result.webpages
434
466
  this_iteration.onlineContext = online_results
435
467
  except Exception as e:
436
468
  this_iteration.warning = f"Error operating browser: {e}"
@@ -478,7 +510,9 @@ async def execute_information_collection(
478
510
  if code_results:
479
511
  results_data += f"\n<code_results>\n{yaml.dump(truncate_code_context(code_results), allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</code_results>"
480
512
  if operator_results:
481
- results_data += f"\n<browser_operator_results>\n{next(iter(operator_results.values()))}\n</browser_operator_results>"
513
+ results_data += (
514
+ f"\n<browser_operator_results>\n{operator_results.response}\n</browser_operator_results>"
515
+ )
482
516
  if summarize_files:
483
517
  results_data += f"\n<summarized_files>\n{yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</summarized_files>"
484
518
  if this_iteration.warning:
khoj/utils/constants.py CHANGED
@@ -18,8 +18,8 @@ default_offline_chat_models = [
18
18
  "bartowski/Qwen2.5-14B-Instruct-GGUF",
19
19
  ]
20
20
  default_openai_chat_models = ["gpt-4o-mini", "gpt-4.1"]
21
- default_gemini_chat_models = ["gemini-2.0-flash", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25"]
22
- default_anthropic_chat_models = ["claude-3-7-sonnet-latest", "claude-3-5-haiku-latest"]
21
+ default_gemini_chat_models = ["gemini-2.0-flash", "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-05-06"]
22
+ default_anthropic_chat_models = ["claude-sonnet-4-0", "claude-3-5-haiku-latest"]
23
23
 
24
24
  empty_config = {
25
25
  "search-type": {
@@ -63,10 +63,10 @@ model_to_cost: Dict[str, Dict[str, float]] = {
63
63
  "claude-3-7-sonnet-20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
64
64
  "claude-3-7-sonnet@20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
65
65
  "claude-3-7-sonnet-latest": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
66
- "claude-sonnet-4": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
66
+ "claude-sonnet-4-0": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
67
67
  "claude-sonnet-4-20250514": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
68
68
  "claude-sonnet-4@20250514": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
69
- "claude-opus-4": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
69
+ "claude-opus-4-0": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
70
70
  "claude-opus-4-20250514": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
71
71
  "claude-opus-4@20250514": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
72
72
  # Grok pricing: https://docs.x.ai/docs/models