khoj 1.41.1.dev107__py3-none-any.whl → 1.41.1.dev144__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. khoj/database/adapters/__init__.py +20 -0
  2. khoj/database/models/__init__.py +1 -1
  3. khoj/interface/compiled/404/index.html +2 -2
  4. khoj/interface/compiled/_next/static/chunks/{8515-f305779d95dd5780.js → 5138-2cce449fd2454abf.js} +9 -9
  5. khoj/interface/compiled/_next/static/chunks/7127-d3199617463d45f0.js +1 -0
  6. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
  7. khoj/interface/compiled/_next/static/chunks/app/agents/{page-c9ceb9b94e24b94a.js → page-e18e67cff45758c8.js} +1 -1
  8. khoj/interface/compiled/_next/static/chunks/app/automations/{page-3dc59a0df3827dc7.js → page-768a0903c4b5b06d.js} +1 -1
  9. khoj/interface/compiled/_next/static/chunks/app/chat/{page-2b27c7118d8d5a16.js → page-1153981cb9c4907f.js} +1 -1
  10. khoj/interface/compiled/_next/static/chunks/app/{page-38f1f125d7aeb4c7.js → page-a4b97dd0c2a70cfb.js} +1 -1
  11. khoj/interface/compiled/_next/static/chunks/app/search/{page-26d4492fb1200e0e.js → page-44072d929427ee56.js} +1 -1
  12. khoj/interface/compiled/_next/static/chunks/app/settings/{page-bf1a4e488b29fceb.js → page-4e8fdd30a3238357.js} +1 -1
  13. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-b3f7ae1ef8871d30.js +1 -0
  14. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-a1f10c96366c3a4f.js → page-6a4a9050c8bddae9.js} +1 -1
  15. khoj/interface/compiled/_next/static/chunks/{webpack-c6bde5961098facd.js → webpack-34ac812e4e4e9a50.js} +1 -1
  16. khoj/interface/compiled/_next/static/css/1e9b757ee2a2b34b.css +1 -0
  17. khoj/interface/compiled/agents/index.html +2 -2
  18. khoj/interface/compiled/agents/index.txt +2 -2
  19. khoj/interface/compiled/automations/index.html +2 -2
  20. khoj/interface/compiled/automations/index.txt +2 -2
  21. khoj/interface/compiled/chat/index.html +2 -2
  22. khoj/interface/compiled/chat/index.txt +2 -2
  23. khoj/interface/compiled/index.html +2 -2
  24. khoj/interface/compiled/index.txt +2 -2
  25. khoj/interface/compiled/search/index.html +2 -2
  26. khoj/interface/compiled/search/index.txt +2 -2
  27. khoj/interface/compiled/settings/index.html +2 -2
  28. khoj/interface/compiled/settings/index.txt +2 -2
  29. khoj/interface/compiled/share/chat/index.html +2 -2
  30. khoj/interface/compiled/share/chat/index.txt +2 -2
  31. khoj/processor/conversation/anthropic/anthropic_chat.py +8 -9
  32. khoj/processor/conversation/anthropic/utils.py +30 -7
  33. khoj/processor/conversation/google/gemini_chat.py +10 -10
  34. khoj/processor/conversation/google/utils.py +20 -12
  35. khoj/processor/conversation/offline/chat_model.py +2 -7
  36. khoj/processor/conversation/openai/gpt.py +8 -9
  37. khoj/processor/conversation/utils.py +132 -21
  38. khoj/processor/operator/README.md +59 -0
  39. khoj/processor/operator/{operate_browser.py → __init__.py} +98 -34
  40. khoj/processor/operator/grounding_agent.py +229 -175
  41. khoj/processor/operator/grounding_agent_uitars.py +59 -48
  42. khoj/processor/operator/operator_actions.py +48 -0
  43. khoj/processor/operator/operator_agent_anthropic.py +298 -90
  44. khoj/processor/operator/operator_agent_base.py +45 -14
  45. khoj/processor/operator/operator_agent_binary.py +125 -57
  46. khoj/processor/operator/operator_agent_openai.py +183 -75
  47. khoj/processor/operator/operator_environment_base.py +11 -1
  48. khoj/processor/operator/operator_environment_browser.py +5 -3
  49. khoj/processor/operator/operator_environment_computer.py +658 -0
  50. khoj/routers/api_chat.py +36 -25
  51. khoj/routers/helpers.py +8 -17
  52. khoj/routers/research.py +43 -20
  53. khoj/utils/constants.py +4 -4
  54. khoj/utils/helpers.py +12 -15
  55. {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev144.dist-info}/METADATA +3 -1
  56. {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev144.dist-info}/RECORD +61 -59
  57. khoj/interface/compiled/_next/static/chunks/4986-9ddd694756d03aa1.js +0 -1
  58. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e49165209d2e406c.js +0 -1
  59. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-64a53f8ec4afa6b3.js +0 -1
  60. khoj/interface/compiled/_next/static/css/bb7ea98028b368f3.css +0 -1
  61. /khoj/interface/compiled/_next/static/{y_k1yn7bI1CgM5ZfW7jUq → aJZTO0gnTwX0Dca_dPw4r}/_buildManifest.js +0 -0
  62. /khoj/interface/compiled/_next/static/{y_k1yn7bI1CgM5ZfW7jUq → aJZTO0gnTwX0Dca_dPw4r}/_ssgManifest.js +0 -0
  63. {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev144.dist-info}/WHEEL +0 -0
  64. {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev144.dist-info}/entry_points.txt +0 -0
  65. {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev144.dist-info}/licenses/LICENSE +0 -0
khoj/routers/api_chat.py CHANGED
@@ -26,12 +26,13 @@ from khoj.database.models import Agent, KhojUser
26
26
  from khoj.processor.conversation import prompts
27
27
  from khoj.processor.conversation.prompts import help_message, no_entries_found
28
28
  from khoj.processor.conversation.utils import (
29
+ OperatorRun,
29
30
  ResponseWithThought,
30
31
  defilter_query,
31
32
  save_to_conversation_log,
32
33
  )
33
34
  from khoj.processor.image.generate import text_to_image
34
- from khoj.processor.operator.operate_browser import operate_browser
35
+ from khoj.processor.operator import operate_environment
35
36
  from khoj.processor.speech.text_to_speech import generate_text_to_speech
36
37
  from khoj.processor.tools.online_search import (
37
38
  deduplicate_organic_results,
@@ -65,10 +66,7 @@ from khoj.routers.helpers import (
65
66
  update_telemetry_state,
66
67
  validate_chat_model,
67
68
  )
68
- from khoj.routers.research import (
69
- InformationCollectionIteration,
70
- execute_information_collection,
71
- )
69
+ from khoj.routers.research import ResearchIteration, research
72
70
  from khoj.routers.storage import upload_user_image_to_bucket
73
71
  from khoj.utils import state
74
72
  from khoj.utils.helpers import (
@@ -722,10 +720,10 @@ async def chat(
722
720
  for file in raw_query_files:
723
721
  query_files[file.name] = file.content
724
722
 
725
- research_results: List[InformationCollectionIteration] = []
723
+ research_results: List[ResearchIteration] = []
726
724
  online_results: Dict = dict()
727
725
  code_results: Dict = dict()
728
- operator_results: Dict[str, str] = {}
726
+ operator_results: List[OperatorRun] = []
729
727
  compiled_references: List[Any] = []
730
728
  inferred_queries: List[Any] = []
731
729
  attached_file_context = gather_raw_query_files(query_files)
@@ -960,11 +958,10 @@ async def chat(
960
958
  last_message = conversation.messages[-1]
961
959
  online_results = {key: val.model_dump() for key, val in last_message.onlineContext.items() or []}
962
960
  code_results = {key: val.model_dump() for key, val in last_message.codeContext.items() or []}
963
- operator_results = last_message.operatorContext or {}
964
961
  compiled_references = [ref.model_dump() for ref in last_message.context or []]
965
- research_results = [
966
- InformationCollectionIteration(**iter_dict) for iter_dict in last_message.researchContext or []
967
- ]
962
+ research_results = [ResearchIteration(**iter_dict) for iter_dict in last_message.researchContext or []]
963
+ operator_results = [OperatorRun(**iter_dict) for iter_dict in last_message.operatorContext or []]
964
+ train_of_thought = [thought.model_dump() for thought in last_message.trainOfThought or []]
968
965
  # Drop the interrupted message from conversation history
969
966
  meta_log["chat"].pop()
970
967
  logger.info(f"Loaded interrupted partial context from conversation {conversation_id}.")
@@ -1009,12 +1006,12 @@ async def chat(
1009
1006
  file_filters = conversation.file_filters if conversation and conversation.file_filters else []
1010
1007
 
1011
1008
  if conversation_commands == [ConversationCommand.Research]:
1012
- async for research_result in execute_information_collection(
1009
+ async for research_result in research(
1013
1010
  user=user,
1014
1011
  query=defiltered_query,
1015
1012
  conversation_id=conversation_id,
1016
1013
  conversation_history=meta_log,
1017
- previous_iterations=research_results,
1014
+ previous_iterations=list(research_results),
1018
1015
  query_images=uploaded_images,
1019
1016
  agent=agent,
1020
1017
  send_status_func=partial(send_event, ChatEvent.STATUS),
@@ -1025,7 +1022,7 @@ async def chat(
1025
1022
  tracer=tracer,
1026
1023
  cancellation_event=cancellation_event,
1027
1024
  ):
1028
- if isinstance(research_result, InformationCollectionIteration):
1025
+ if isinstance(research_result, ResearchIteration):
1029
1026
  if research_result.summarizedResult:
1030
1027
  if research_result.onlineContext:
1031
1028
  online_results.update(research_result.onlineContext)
@@ -1033,13 +1030,26 @@ async def chat(
1033
1030
  code_results.update(research_result.codeContext)
1034
1031
  if research_result.context:
1035
1032
  compiled_references.extend(research_result.context)
1036
- if research_result.operatorContext:
1037
- operator_results.update(research_result.operatorContext)
1033
+ if not research_results or research_results[-1] is not research_result:
1038
1034
  research_results.append(research_result)
1039
-
1040
1035
  else:
1041
1036
  yield research_result
1042
1037
 
1038
+ # Track operator results across research and operator iterations
1039
+ # This relies on two conditions:
1040
+ # 1. Check to append new (partial) operator results
1041
+ # Relies on triggering this check on every status updates.
1042
+ # Status updates cascade up from operator to research to chat api on every step.
1043
+ # 2. Keep operator results in sync with each research operator step
1044
+ # Relies on python object references to ensure operator results
1045
+ # are implicitly kept in sync after the initial append
1046
+ if (
1047
+ research_results
1048
+ and research_results[-1].operatorContext
1049
+ and (not operator_results or operator_results[-1] is not research_results[-1].operatorContext)
1050
+ ):
1051
+ operator_results.append(research_results[-1].operatorContext)
1052
+
1043
1053
  # researched_results = await extract_relevant_info(q, researched_results, agent)
1044
1054
  if state.verbose > 1:
1045
1055
  logger.debug(f'Researched Results: {"".join(r.summarizedResult for r in research_results)}')
@@ -1292,11 +1302,12 @@ async def chat(
1292
1302
  )
1293
1303
  if ConversationCommand.Operator in conversation_commands:
1294
1304
  try:
1295
- async for result in operate_browser(
1305
+ async for result in operate_environment(
1296
1306
  defiltered_query,
1297
1307
  user,
1298
1308
  meta_log,
1299
1309
  location,
1310
+ list(operator_results)[-1] if operator_results else None,
1300
1311
  query_images=uploaded_images,
1301
1312
  query_files=attached_file_context,
1302
1313
  send_status_func=partial(send_event, ChatEvent.STATUS),
@@ -1306,16 +1317,17 @@ async def chat(
1306
1317
  ):
1307
1318
  if isinstance(result, dict) and ChatEvent.STATUS in result:
1308
1319
  yield result[ChatEvent.STATUS]
1309
- else:
1310
- operator_results = {result["query"]: result["result"]}
1320
+ elif isinstance(result, OperatorRun):
1321
+ if not operator_results or operator_results[-1] is not result:
1322
+ operator_results.append(result)
1311
1323
  # Add webpages visited while operating browser to references
1312
- if result.get("webpages"):
1324
+ if result.webpages:
1313
1325
  if not online_results.get(defiltered_query):
1314
- online_results[defiltered_query] = {"webpages": result["webpages"]}
1326
+ online_results[defiltered_query] = {"webpages": result.webpages}
1315
1327
  elif not online_results[defiltered_query].get("webpages"):
1316
- online_results[defiltered_query]["webpages"] = result["webpages"]
1328
+ online_results[defiltered_query]["webpages"] = result.webpages
1317
1329
  else:
1318
- online_results[defiltered_query]["webpages"] += result["webpages"]
1330
+ online_results[defiltered_query]["webpages"] += result.webpages
1319
1331
  except ValueError as e:
1320
1332
  program_execution_context.append(f"Browser operation error: {e}")
1321
1333
  logger.warning(f"Failed to operate browser with {e}", exc_info=True)
@@ -1333,7 +1345,6 @@ async def chat(
1333
1345
  "context": compiled_references,
1334
1346
  "onlineContext": unique_online_results,
1335
1347
  "codeContext": code_results,
1336
- "operatorContext": operator_results,
1337
1348
  },
1338
1349
  ):
1339
1350
  yield result
khoj/routers/helpers.py CHANGED
@@ -94,7 +94,8 @@ from khoj.processor.conversation.openai.gpt import (
94
94
  )
95
95
  from khoj.processor.conversation.utils import (
96
96
  ChatEvent,
97
- InformationCollectionIteration,
97
+ OperatorRun,
98
+ ResearchIteration,
98
99
  ResponseWithThought,
99
100
  clean_json,
100
101
  clean_mermaidjs,
@@ -385,7 +386,7 @@ async def aget_data_sources_and_output_format(
385
386
  if len(agent_outputs) == 0 or output.value in agent_outputs:
386
387
  output_options_str += f'- "{output.value}": "{description}"\n'
387
388
 
388
- chat_history = construct_chat_history(conversation_history)
389
+ chat_history = construct_chat_history(conversation_history, n=6)
389
390
 
390
391
  if query_images:
391
392
  query = f"[placeholder for {len(query_images)} user attached images]\n{query}"
@@ -1174,12 +1175,7 @@ async def send_message_to_model_wrapper(
1174
1175
  if vision_available and query_images:
1175
1176
  logger.info(f"Using {chat_model.name} model to understand {len(query_images)} images.")
1176
1177
 
1177
- subscribed = await ais_user_subscribed(user) if user else False
1178
- max_tokens = (
1179
- chat_model.subscribed_max_prompt_size
1180
- if subscribed and chat_model.subscribed_max_prompt_size
1181
- else chat_model.max_prompt_size
1182
- )
1178
+ max_tokens = await ConversationAdapters.aget_max_context_size(chat_model, user)
1183
1179
  chat_model_name = chat_model.name
1184
1180
  tokenizer = chat_model.tokenizer
1185
1181
  model_type = chat_model.model_type
@@ -1271,12 +1267,7 @@ def send_message_to_model_wrapper_sync(
1271
1267
  if chat_model is None:
1272
1268
  raise HTTPException(status_code=500, detail="Contact the server administrator to set a default chat model.")
1273
1269
 
1274
- subscribed = is_user_subscribed(user) if user else False
1275
- max_tokens = (
1276
- chat_model.subscribed_max_prompt_size
1277
- if subscribed and chat_model.subscribed_max_prompt_size
1278
- else chat_model.max_prompt_size
1279
- )
1270
+ max_tokens = ConversationAdapters.get_max_context_size(chat_model, user)
1280
1271
  chat_model_name = chat_model.name
1281
1272
  model_type = chat_model.model_type
1282
1273
  vision_available = chat_model.vision_enabled
@@ -1355,8 +1346,8 @@ async def agenerate_chat_response(
1355
1346
  compiled_references: List[Dict] = [],
1356
1347
  online_results: Dict[str, Dict] = {},
1357
1348
  code_results: Dict[str, Dict] = {},
1358
- operator_results: Dict[str, str] = {},
1359
- research_results: List[InformationCollectionIteration] = [],
1349
+ operator_results: List[OperatorRun] = [],
1350
+ research_results: List[ResearchIteration] = [],
1360
1351
  inferred_queries: List[str] = [],
1361
1352
  conversation_commands: List[ConversationCommand] = [ConversationCommand.Default],
1362
1353
  user: KhojUser = None,
@@ -1414,7 +1405,7 @@ async def agenerate_chat_response(
1414
1405
  compiled_references = []
1415
1406
  online_results = {}
1416
1407
  code_results = {}
1417
- operator_results = {}
1408
+ operator_results = []
1418
1409
  deepthought = True
1419
1410
 
1420
1411
  chat_model = await ConversationAdapters.aget_valid_chat_model(user, conversation, is_subscribed)
khoj/routers/research.py CHANGED
@@ -13,12 +13,13 @@ from khoj.database.adapters import AgentAdapters, EntryAdapters
13
13
  from khoj.database.models import Agent, KhojUser
14
14
  from khoj.processor.conversation import prompts
15
15
  from khoj.processor.conversation.utils import (
16
- InformationCollectionIteration,
16
+ OperatorRun,
17
+ ResearchIteration,
17
18
  construct_iteration_history,
18
19
  construct_tool_chat_history,
19
20
  load_complex_json,
20
21
  )
21
- from khoj.processor.operator.operate_browser import operate_browser
22
+ from khoj.processor.operator import operate_environment
22
23
  from khoj.processor.tools.online_search import read_webpages, search_online
23
24
  from khoj.processor.tools.run_code import run_code
24
25
  from khoj.routers.api import extract_references_and_questions
@@ -83,7 +84,7 @@ async def apick_next_tool(
83
84
  location: LocationData = None,
84
85
  user_name: str = None,
85
86
  agent: Agent = None,
86
- previous_iterations: List[InformationCollectionIteration] = [],
87
+ previous_iterations: List[ResearchIteration] = [],
87
88
  max_iterations: int = 5,
88
89
  query_images: List[str] = [],
89
90
  query_files: str = None,
@@ -95,6 +96,24 @@ async def apick_next_tool(
95
96
  ):
96
97
  """Given a query, determine which of the available tools the agent should use in order to answer appropriately."""
97
98
 
99
+ # Continue with previous iteration if a multi-step tool use is in progress
100
+ if (
101
+ previous_iterations
102
+ and previous_iterations[-1].tool == ConversationCommand.Operator
103
+ and not previous_iterations[-1].summarizedResult
104
+ ):
105
+ previous_iteration = previous_iterations[-1]
106
+ yield ResearchIteration(
107
+ tool=previous_iteration.tool,
108
+ query=query,
109
+ context=previous_iteration.context,
110
+ onlineContext=previous_iteration.onlineContext,
111
+ codeContext=previous_iteration.codeContext,
112
+ operatorContext=previous_iteration.operatorContext,
113
+ warning=previous_iteration.warning,
114
+ )
115
+ return
116
+
98
117
  # Construct tool options for the agent to choose from
99
118
  tool_options = dict()
100
119
  tool_options_str = ""
@@ -165,7 +184,7 @@ async def apick_next_tool(
165
184
  )
166
185
  except Exception as e:
167
186
  logger.error(f"Failed to infer information sources to refer: {e}", exc_info=True)
168
- yield InformationCollectionIteration(
187
+ yield ResearchIteration(
169
188
  tool=None,
170
189
  query=None,
171
190
  warning="Failed to infer information sources to refer. Skipping iteration. Try again.",
@@ -194,26 +213,26 @@ async def apick_next_tool(
194
213
  async for event in send_status_func(f"{scratchpad}"):
195
214
  yield {ChatEvent.STATUS: event}
196
215
 
197
- yield InformationCollectionIteration(
216
+ yield ResearchIteration(
198
217
  tool=selected_tool,
199
218
  query=generated_query,
200
219
  warning=warning,
201
220
  )
202
221
  except Exception as e:
203
222
  logger.error(f"Invalid response for determining relevant tools: {response}. {e}", exc_info=True)
204
- yield InformationCollectionIteration(
223
+ yield ResearchIteration(
205
224
  tool=None,
206
225
  query=None,
207
226
  warning=f"Invalid response for determining relevant tools: {response}. Skipping iteration. Fix error: {e}",
208
227
  )
209
228
 
210
229
 
211
- async def execute_information_collection(
230
+ async def research(
212
231
  user: KhojUser,
213
232
  query: str,
214
233
  conversation_id: str,
215
234
  conversation_history: dict,
216
- previous_iterations: List[InformationCollectionIteration],
235
+ previous_iterations: List[ResearchIteration],
217
236
  query_images: List[str],
218
237
  agent: Agent = None,
219
238
  send_status_func: Optional[Callable] = None,
@@ -248,9 +267,9 @@ async def execute_information_collection(
248
267
  online_results: Dict = dict()
249
268
  code_results: Dict = dict()
250
269
  document_results: List[Dict[str, str]] = []
251
- operator_results: Dict[str, str] = {}
270
+ operator_results: OperatorRun = None
252
271
  summarize_files: str = ""
253
- this_iteration = InformationCollectionIteration(tool=None, query=query)
272
+ this_iteration = ResearchIteration(tool=None, query=query)
254
273
 
255
274
  async for result in apick_next_tool(
256
275
  query,
@@ -271,8 +290,9 @@ async def execute_information_collection(
271
290
  ):
272
291
  if isinstance(result, dict) and ChatEvent.STATUS in result:
273
292
  yield result[ChatEvent.STATUS]
274
- elif isinstance(result, InformationCollectionIteration):
293
+ elif isinstance(result, ResearchIteration):
275
294
  this_iteration = result
295
+ yield this_iteration
276
296
 
277
297
  # Skip running iteration if warning present in iteration
278
298
  if this_iteration.warning:
@@ -417,12 +437,13 @@ async def execute_information_collection(
417
437
 
418
438
  elif this_iteration.tool == ConversationCommand.Operator:
419
439
  try:
420
- async for result in operate_browser(
440
+ async for result in operate_environment(
421
441
  this_iteration.query,
422
442
  user,
423
443
  construct_tool_chat_history(previous_iterations, ConversationCommand.Operator),
424
444
  location,
425
- send_status_func,
445
+ previous_iterations[-1].operatorContext if previous_iterations else None,
446
+ send_status_func=send_status_func,
426
447
  query_images=query_images,
427
448
  agent=agent,
428
449
  query_files=query_files,
@@ -431,17 +452,17 @@ async def execute_information_collection(
431
452
  ):
432
453
  if isinstance(result, dict) and ChatEvent.STATUS in result:
433
454
  yield result[ChatEvent.STATUS]
434
- else:
435
- operator_results = {result["query"]: result["result"]}
455
+ elif isinstance(result, OperatorRun):
456
+ operator_results = result
436
457
  this_iteration.operatorContext = operator_results
437
458
  # Add webpages visited while operating browser to references
438
- if result.get("webpages"):
459
+ if result.webpages:
439
460
  if not online_results.get(this_iteration.query):
440
- online_results[this_iteration.query] = {"webpages": result["webpages"]}
461
+ online_results[this_iteration.query] = {"webpages": result.webpages}
441
462
  elif not online_results[this_iteration.query].get("webpages"):
442
- online_results[this_iteration.query]["webpages"] = result["webpages"]
463
+ online_results[this_iteration.query]["webpages"] = result.webpages
443
464
  else:
444
- online_results[this_iteration.query]["webpages"] += result["webpages"]
465
+ online_results[this_iteration.query]["webpages"] += result.webpages
445
466
  this_iteration.onlineContext = online_results
446
467
  except Exception as e:
447
468
  this_iteration.warning = f"Error operating browser: {e}"
@@ -489,7 +510,9 @@ async def execute_information_collection(
489
510
  if code_results:
490
511
  results_data += f"\n<code_results>\n{yaml.dump(truncate_code_context(code_results), allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</code_results>"
491
512
  if operator_results:
492
- results_data += f"\n<browser_operator_results>\n{next(iter(operator_results.values()))}\n</browser_operator_results>"
513
+ results_data += (
514
+ f"\n<browser_operator_results>\n{operator_results.response}\n</browser_operator_results>"
515
+ )
493
516
  if summarize_files:
494
517
  results_data += f"\n<summarized_files>\n{yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</summarized_files>"
495
518
  if this_iteration.warning:
khoj/utils/constants.py CHANGED
@@ -18,8 +18,8 @@ default_offline_chat_models = [
18
18
  "bartowski/Qwen2.5-14B-Instruct-GGUF",
19
19
  ]
20
20
  default_openai_chat_models = ["gpt-4o-mini", "gpt-4.1"]
21
- default_gemini_chat_models = ["gemini-2.0-flash", "gemini-2.5-flash-preview-04-17", "gemini-2.5-pro-preview-03-25"]
22
- default_anthropic_chat_models = ["claude-3-7-sonnet-latest", "claude-3-5-haiku-latest"]
21
+ default_gemini_chat_models = ["gemini-2.0-flash", "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-05-06"]
22
+ default_anthropic_chat_models = ["claude-sonnet-4-0", "claude-3-5-haiku-latest"]
23
23
 
24
24
  empty_config = {
25
25
  "search-type": {
@@ -63,10 +63,10 @@ model_to_cost: Dict[str, Dict[str, float]] = {
63
63
  "claude-3-7-sonnet-20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
64
64
  "claude-3-7-sonnet@20250219": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
65
65
  "claude-3-7-sonnet-latest": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
66
- "claude-sonnet-4": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
66
+ "claude-sonnet-4-0": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
67
67
  "claude-sonnet-4-20250514": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
68
68
  "claude-sonnet-4@20250514": {"input": 3.0, "output": 15.0, "cache_read": 0.3, "cache_write": 3.75},
69
- "claude-opus-4": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
69
+ "claude-opus-4-0": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
70
70
  "claude-opus-4-20250514": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
71
71
  "claude-opus-4@20250514": {"input": 15.0, "output": 75.0, "cache_read": 1.50, "cache_write": 18.75},
72
72
  # Grok pricing: https://docs.x.ai/docs/models
khoj/utils/helpers.py CHANGED
@@ -46,6 +46,7 @@ if TYPE_CHECKING:
46
46
  from khoj.utils.models import BaseEncoder
47
47
  from khoj.utils.rawconfig import AppConfig
48
48
 
49
+ logger = logging.getLogger(__name__)
49
50
 
50
51
  # Initialize Magika for file type identification
51
52
  magika = Magika()
@@ -364,7 +365,7 @@ command_descriptions = {
364
365
  ConversationCommand.Summarize: "Get help with a question pertaining to an entire document.",
365
366
  ConversationCommand.Diagram: "Draw a flowchart, diagram, or any other visual representation best expressed with primitives like lines, rectangles, and text.",
366
367
  ConversationCommand.Research: "Do deep research on a topic. This will take longer than usual, but give a more detailed, comprehensive answer.",
367
- ConversationCommand.Operator: "Operate and perform tasks using a GUI web browser.",
368
+ ConversationCommand.Operator: "Operate and perform tasks using a computer.",
368
369
  }
369
370
 
370
371
  command_descriptions_for_agent = {
@@ -373,12 +374,12 @@ command_descriptions_for_agent = {
373
374
  ConversationCommand.Online: "Agent can search the internet for information.",
374
375
  ConversationCommand.Webpage: "Agent can read suggested web pages for information.",
375
376
  ConversationCommand.Research: "Agent can do deep research on a topic.",
376
- ConversationCommand.Code: "Agent can run Python code to parse information, run complex calculations, create documents and charts.",
377
- ConversationCommand.Operator: "Agent can operate and perform actions using a GUI web browser to complete a task.",
377
+ ConversationCommand.Code: "Agent can run a Python script to parse information, run complex calculations, create documents and charts.",
378
+ ConversationCommand.Operator: "Agent can operate a computer to complete tasks.",
378
379
  }
379
380
 
380
- e2b_tool_description = "To run Python code in a E2B sandbox with no network access. Helpful to parse complex information, run calculations, create text documents and create charts with quantitative data. Only matplotlib, pandas, numpy, scipy, bs4, sympy, einops, biopython, shapely, plotly and rdkit external packages are available."
381
- terrarium_tool_description = "To run Python code in a Terrarium, Pyodide sandbox with no network access. Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data. Only matplotlib, panda, numpy, scipy, bs4 and sympy external packages are available."
381
+ e2b_tool_description = "To run a Python script in a E2B sandbox with no network access. Helpful to parse complex information, run calculations, create text documents and create charts with quantitative data. Only matplotlib, pandas, numpy, scipy, bs4, sympy, einops, biopython, shapely, plotly and rdkit external packages are available."
382
+ terrarium_tool_description = "To run a Python script in a Terrarium, Pyodide sandbox with no network access. Helpful to parse complex information, run complex calculations, create plaintext documents and create charts with quantitative data. Only matplotlib, panda, numpy, scipy, bs4 and sympy external packages are available."
382
383
 
383
384
  tool_descriptions_for_llm = {
384
385
  ConversationCommand.Default: "To use a mix of your internal knowledge and the user's personal knowledge, or if you don't entirely understand the query.",
@@ -387,7 +388,7 @@ tool_descriptions_for_llm = {
387
388
  ConversationCommand.Online: "To search for the latest, up-to-date information from the internet. Note: **Questions about Khoj should always use this data source**",
388
389
  ConversationCommand.Webpage: "To use if the user has directly provided the webpage urls or you are certain of the webpage urls to read.",
389
390
  ConversationCommand.Code: e2b_tool_description if is_e2b_code_sandbox_enabled() else terrarium_tool_description,
390
- ConversationCommand.Operator: "To use when you need to operate and take actions using a GUI web browser.",
391
+ ConversationCommand.Operator: "To use when you need to operate a computer to complete the task.",
391
392
  }
392
393
 
393
394
  tool_description_for_research_llm = {
@@ -396,7 +397,7 @@ tool_description_for_research_llm = {
396
397
  ConversationCommand.Webpage: "To extract information from webpages. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share upto {max_webpages_to_read} webpage links and what information to extract from them in your query.",
397
398
  ConversationCommand.Code: e2b_tool_description if is_e2b_code_sandbox_enabled() else terrarium_tool_description,
398
399
  ConversationCommand.Text: "To respond to the user once you've completed your research and have the required information.",
399
- ConversationCommand.Operator: "To operate and take actions using a GUI web browser.",
400
+ ConversationCommand.Operator: "To operate a computer to complete the task.",
400
401
  }
401
402
 
402
403
  mode_descriptions_for_llm = {
@@ -493,13 +494,7 @@ def is_promptrace_enabled():
493
494
  def is_operator_enabled():
494
495
  """Check if Khoj can operate GUI applications.
495
496
  Set KHOJ_OPERATOR_ENABLED env var to true and install playwright to enable it."""
496
- try:
497
- import playwright
498
-
499
- is_playwright_installed = True
500
- except ImportError:
501
- is_playwright_installed = False
502
- return is_env_var_true("KHOJ_OPERATOR_ENABLED") and is_playwright_installed
497
+ return is_env_var_true("KHOJ_OPERATOR_ENABLED")
503
498
 
504
499
 
505
500
  def is_valid_url(url: str) -> bool:
@@ -686,7 +681,7 @@ def get_chat_usage_metrics(
686
681
  "cache_write_tokens": 0,
687
682
  "cost": 0.0,
688
683
  }
689
- return {
684
+ current_usage = {
690
685
  "input_tokens": prev_usage["input_tokens"] + input_tokens,
691
686
  "output_tokens": prev_usage["output_tokens"] + output_tokens,
692
687
  "thought_tokens": prev_usage.get("thought_tokens", 0) + thought_tokens,
@@ -703,6 +698,8 @@ def get_chat_usage_metrics(
703
698
  prev_cost=prev_usage["cost"],
704
699
  ),
705
700
  }
701
+ logger.debug(f"AI API usage by {model_name}: {current_usage}")
702
+ return current_usage
706
703
 
707
704
 
708
705
  class AiApiInfo(NamedTuple):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: khoj
3
- Version: 1.41.1.dev107
3
+ Version: 1.41.1.dev144
4
4
  Summary: Your Second Brain
5
5
  Project-URL: Homepage, https://khoj.dev
6
6
  Project-URL: Documentation, https://docs.khoj.dev
@@ -91,6 +91,7 @@ Requires-Dist: pandas; extra == 'dev'
91
91
  Requires-Dist: pgserver==0.1.4; extra == 'dev'
92
92
  Requires-Dist: playwright>=1.49.0; extra == 'dev'
93
93
  Requires-Dist: pre-commit>=3.0.4; extra == 'dev'
94
+ Requires-Dist: pyautogui==0.9.54; extra == 'dev'
94
95
  Requires-Dist: pytest-asyncio==0.21.1; extra == 'dev'
95
96
  Requires-Dist: pytest-django==4.5.2; extra == 'dev'
96
97
  Requires-Dist: pytest-xdist[psutil]; extra == 'dev'
@@ -100,6 +101,7 @@ Requires-Dist: twilio==8.11; extra == 'dev'
100
101
  Provides-Extra: local
101
102
  Requires-Dist: pgserver==0.1.4; extra == 'local'
102
103
  Requires-Dist: playwright>=1.49.0; extra == 'local'
104
+ Requires-Dist: pyautogui==0.9.54; extra == 'local'
103
105
  Provides-Extra: prod
104
106
  Requires-Dist: boto3>=1.34.57; extra == 'prod'
105
107
  Requires-Dist: gunicorn==22.0.0; extra == 'prod'