khoj 1.41.1.dev107__py3-none-any.whl → 1.41.1.dev142__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. khoj/database/adapters/__init__.py +20 -0
  2. khoj/database/models/__init__.py +1 -1
  3. khoj/interface/compiled/404/index.html +2 -2
  4. khoj/interface/compiled/_next/static/chunks/{2327-aa22697ed9c8d54a.js → 2327-f03b2a77f67b8f8c.js} +1 -1
  5. khoj/interface/compiled/_next/static/chunks/{8515-f305779d95dd5780.js → 5138-81457f7f59956b56.js} +9 -9
  6. khoj/interface/compiled/_next/static/chunks/7127-d3199617463d45f0.js +1 -0
  7. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
  8. khoj/interface/compiled/_next/static/chunks/app/agents/{page-c9ceb9b94e24b94a.js → page-774c78ff0f55a228.js} +1 -1
  9. khoj/interface/compiled/_next/static/chunks/app/automations/{page-3dc59a0df3827dc7.js → page-4454891c5007b870.js} +1 -1
  10. khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
  11. khoj/interface/compiled/_next/static/chunks/app/chat/{page-2b27c7118d8d5a16.js → page-5a2559825b4d5def.js} +1 -1
  12. khoj/interface/compiled/_next/static/chunks/app/{page-38f1f125d7aeb4c7.js → page-f7a0286dfc31ad6b.js} +1 -1
  13. khoj/interface/compiled/_next/static/chunks/app/search/{page-26d4492fb1200e0e.js → page-f1a7f278c89e09b6.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/app/settings/{page-bf1a4e488b29fceb.js → page-5d9134d4a97f8834.js} +1 -1
  15. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-abb6c5f4239ad7be.js +1 -0
  16. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-a1f10c96366c3a4f.js → page-32cd0ceb9ffbd777.js} +1 -1
  17. khoj/interface/compiled/_next/static/chunks/{webpack-c6bde5961098facd.js → webpack-952bc0d41769db77.js} +1 -1
  18. khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
  19. khoj/interface/compiled/_next/static/css/93eeacc43e261162.css +1 -0
  20. khoj/interface/compiled/agents/index.html +2 -2
  21. khoj/interface/compiled/agents/index.txt +2 -2
  22. khoj/interface/compiled/automations/index.html +2 -2
  23. khoj/interface/compiled/automations/index.txt +3 -3
  24. khoj/interface/compiled/chat/index.html +2 -2
  25. khoj/interface/compiled/chat/index.txt +2 -2
  26. khoj/interface/compiled/index.html +2 -2
  27. khoj/interface/compiled/index.txt +2 -2
  28. khoj/interface/compiled/search/index.html +2 -2
  29. khoj/interface/compiled/search/index.txt +2 -2
  30. khoj/interface/compiled/settings/index.html +2 -2
  31. khoj/interface/compiled/settings/index.txt +4 -4
  32. khoj/interface/compiled/share/chat/index.html +2 -2
  33. khoj/interface/compiled/share/chat/index.txt +2 -2
  34. khoj/processor/conversation/anthropic/anthropic_chat.py +8 -9
  35. khoj/processor/conversation/anthropic/utils.py +30 -7
  36. khoj/processor/conversation/google/gemini_chat.py +10 -10
  37. khoj/processor/conversation/google/utils.py +20 -12
  38. khoj/processor/conversation/offline/chat_model.py +2 -7
  39. khoj/processor/conversation/openai/gpt.py +8 -9
  40. khoj/processor/conversation/utils.py +132 -21
  41. khoj/processor/operator/README.md +59 -0
  42. khoj/processor/operator/{operate_browser.py → __init__.py} +98 -34
  43. khoj/processor/operator/grounding_agent.py +229 -175
  44. khoj/processor/operator/grounding_agent_uitars.py +59 -48
  45. khoj/processor/operator/operator_actions.py +48 -0
  46. khoj/processor/operator/operator_agent_anthropic.py +298 -90
  47. khoj/processor/operator/operator_agent_base.py +45 -14
  48. khoj/processor/operator/operator_agent_binary.py +125 -57
  49. khoj/processor/operator/operator_agent_openai.py +183 -75
  50. khoj/processor/operator/operator_environment_base.py +11 -1
  51. khoj/processor/operator/operator_environment_browser.py +5 -3
  52. khoj/processor/operator/operator_environment_computer.py +658 -0
  53. khoj/routers/api_chat.py +36 -25
  54. khoj/routers/helpers.py +8 -17
  55. khoj/routers/research.py +43 -20
  56. khoj/utils/constants.py +4 -4
  57. khoj/utils/helpers.py +12 -15
  58. {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev142.dist-info}/METADATA +3 -1
  59. {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev142.dist-info}/RECORD +70 -68
  60. khoj/interface/compiled/_next/static/chunks/4986-9ddd694756d03aa1.js +0 -1
  61. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e49165209d2e406c.js +0 -1
  62. khoj/interface/compiled/_next/static/chunks/app/chat/layout-d5ae861e1ade9d08.js +0 -1
  63. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-64a53f8ec4afa6b3.js +0 -1
  64. khoj/interface/compiled/_next/static/css/bb7ea98028b368f3.css +0 -1
  65. khoj/interface/compiled/_next/static/css/ee66643a6a5bf71c.css +0 -1
  66. /khoj/interface/compiled/_next/static/{y_k1yn7bI1CgM5ZfW7jUq → 4CIEX6Ko-Qehhb7L-ymZw}/_buildManifest.js +0 -0
  67. /khoj/interface/compiled/_next/static/{y_k1yn7bI1CgM5ZfW7jUq → 4CIEX6Ko-Qehhb7L-ymZw}/_ssgManifest.js +0 -0
  68. /khoj/interface/compiled/_next/static/chunks/{1915-ab4353eaca76f690.js → 1915-1943ee8a628b893c.js} +0 -0
  69. /khoj/interface/compiled/_next/static/chunks/{2117-1c18aa2098982bf9.js → 2117-5a41630a2bd2eae8.js} +0 -0
  70. /khoj/interface/compiled/_next/static/chunks/{4363-4efaf12abe696251.js → 4363-e6ac2203564d1a3b.js} +0 -0
  71. /khoj/interface/compiled/_next/static/chunks/{4447-5d44807c40355b1a.js → 4447-e038b251d626c340.js} +0 -0
  72. /khoj/interface/compiled/_next/static/chunks/{8667-adbe6017a66cef10.js → 8667-8136f74e9a086fca.js} +0 -0
  73. /khoj/interface/compiled/_next/static/chunks/{9259-d8bcd9da9e80c81e.js → 9259-640fdd77408475df.js} +0 -0
  74. {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev142.dist-info}/WHEEL +0 -0
  75. {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev142.dist-info}/entry_points.txt +0 -0
  76. {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev142.dist-info}/licenses/LICENSE +0 -0
@@ -21,6 +21,7 @@ from tenacity import (
21
21
  )
22
22
 
23
23
  from khoj.processor.conversation.utils import (
24
+ ResponseWithThought,
24
25
  commit_conversation_trace,
25
26
  get_image_from_base64,
26
27
  get_image_from_url,
@@ -102,7 +103,7 @@ def gemini_completion_with_backoff(
102
103
  client = get_gemini_client(api_key, api_base_url)
103
104
  gemini_clients[api_key] = client
104
105
 
105
- formatted_messages, system_prompt = format_messages_for_gemini(messages, system_prompt)
106
+ formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt)
106
107
 
107
108
  # format model response schema
108
109
  response_schema = None
@@ -110,12 +111,12 @@ def gemini_completion_with_backoff(
110
111
  response_schema = clean_response_schema(model_kwargs["response_schema"])
111
112
 
112
113
  thinking_config = None
113
- if deepthought and model_name.startswith("gemini-2-5"):
114
+ if deepthought and model_name.startswith("gemini-2.5"):
114
115
  thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI)
115
116
 
116
117
  seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
117
118
  config = gtypes.GenerateContentConfig(
118
- system_instruction=system_prompt,
119
+ system_instruction=system_instruction,
119
120
  temperature=temperature,
120
121
  thinking_config=thinking_config,
121
122
  max_output_tokens=MAX_OUTPUT_TOKENS_GEMINI,
@@ -178,21 +179,21 @@ async def gemini_chat_completion_with_backoff(
178
179
  model_kwargs=None,
179
180
  deepthought=False,
180
181
  tracer: dict = {},
181
- ) -> AsyncGenerator[str, None]:
182
+ ) -> AsyncGenerator[ResponseWithThought, None]:
182
183
  client = gemini_clients.get(api_key)
183
184
  if not client:
184
185
  client = get_gemini_client(api_key, api_base_url)
185
186
  gemini_clients[api_key] = client
186
187
 
187
- formatted_messages, system_prompt = format_messages_for_gemini(messages, system_prompt)
188
+ formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt)
188
189
 
189
190
  thinking_config = None
190
- if deepthought and model_name.startswith("gemini-2-5"):
191
- thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI)
191
+ if deepthought and model_name.startswith("gemini-2.5"):
192
+ thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI, include_thoughts=True)
192
193
 
193
194
  seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
194
195
  config = gtypes.GenerateContentConfig(
195
- system_instruction=system_prompt,
196
+ system_instruction=system_instruction,
196
197
  temperature=temperature,
197
198
  thinking_config=thinking_config,
198
199
  max_output_tokens=MAX_OUTPUT_TOKENS_GEMINI,
@@ -216,18 +217,25 @@ async def gemini_chat_completion_with_backoff(
216
217
  logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds")
217
218
  # Keep track of the last chunk for usage data
218
219
  final_chunk = chunk
219
- # Handle streamed response chunk
220
+
221
+ # handle safety, rate-limit, other finish reasons
220
222
  stop_message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
221
- message = stop_message or chunk.text
222
- aggregated_response += message
223
- yield message
224
223
  if stopped:
224
+ yield ResponseWithThought(response=stop_message)
225
225
  logger.warning(
226
226
  f"LLM Response Prevented for {model_name}: {stop_message}.\n"
227
227
  + f"Last Message by {messages[-1].role}: {messages[-1].content}"
228
228
  )
229
229
  break
230
230
 
231
+ # emit thought vs response parts
232
+ for part in chunk.candidates[0].content.parts:
233
+ if part.text:
234
+ aggregated_response += part.text
235
+ yield ResponseWithThought(response=part.text)
236
+ if part.thought:
237
+ yield ResponseWithThought(thought=part.text)
238
+
231
239
  # Calculate cost of chat
232
240
  input_tokens = final_chunk.usage_metadata.prompt_token_count or 0 if final_chunk else 0
233
241
  output_tokens = final_chunk.usage_metadata.candidates_token_count or 0 if final_chunk else 0
@@ -16,6 +16,7 @@ from khoj.processor.conversation.offline.utils import download_model
16
16
  from khoj.processor.conversation.utils import (
17
17
  clean_json,
18
18
  commit_conversation_trace,
19
+ construct_question_history,
19
20
  generate_chatml_messages_with_context,
20
21
  messages_to_print,
21
22
  )
@@ -64,13 +65,7 @@ def extract_questions_offline(
64
65
  username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""
65
66
 
66
67
  # Extract Past User Message and Inferred Questions from Conversation Log
67
- chat_history = ""
68
-
69
- if use_history:
70
- for chat in conversation_log.get("chat", [])[-4:]:
71
- if chat["by"] == "khoj":
72
- chat_history += f"Q: {chat['intent']['query']}\n"
73
- chat_history += f"Khoj: {chat['message']}\n\n"
68
+ chat_history = construct_question_history(conversation_log, include_query=False) if use_history else ""
74
69
 
75
70
  # Get dates relative to today for prompt creation
76
71
  today = datetime.today()
@@ -17,8 +17,10 @@ from khoj.processor.conversation.openai.utils import (
17
17
  )
18
18
  from khoj.processor.conversation.utils import (
19
19
  JsonSupport,
20
+ OperatorRun,
20
21
  ResponseWithThought,
21
22
  clean_json,
23
+ construct_question_history,
22
24
  construct_structured_message,
23
25
  generate_chatml_messages_with_context,
24
26
  messages_to_print,
@@ -55,13 +57,7 @@ def extract_questions(
55
57
  username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""
56
58
 
57
59
  # Extract Past User Message and Inferred Questions from Conversation Log
58
- chat_history = "".join(
59
- [
60
- f'Q: {chat["intent"]["query"]}\nKhoj: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
61
- for chat in conversation_log.get("chat", [])[-4:]
62
- if chat["by"] == "khoj" and "to-image" not in chat["intent"].get("type")
63
- ]
64
- )
60
+ chat_history = construct_question_history(conversation_log)
65
61
 
66
62
  # Get dates relative to today for prompt creation
67
63
  today = datetime.today()
@@ -169,7 +165,7 @@ async def converse_openai(
169
165
  references: list[dict],
170
166
  online_results: Optional[Dict[str, Dict]] = None,
171
167
  code_results: Optional[Dict[str, Dict]] = None,
172
- operator_results: Optional[Dict[str, str]] = None,
168
+ operator_results: Optional[List[OperatorRun]] = None,
173
169
  conversation_log={},
174
170
  model: str = "gpt-4o-mini",
175
171
  api_key: Optional[str] = None,
@@ -242,8 +238,11 @@ async def converse_openai(
242
238
  f"{prompts.code_executed_context.format(code_results=truncate_code_context(code_results))}\n\n"
243
239
  )
244
240
  if not is_none_or_empty(operator_results):
241
+ operator_content = [
242
+ {"query": oc.query, "response": oc.response, "webpages": oc.webpages} for oc in operator_results
243
+ ]
245
244
  context_message += (
246
- f"{prompts.operator_execution_context.format(operator_results=yaml_dump(operator_results))}\n\n"
245
+ f"{prompts.operator_execution_context.format(operator_results=yaml_dump(operator_content))}\n\n"
247
246
  )
248
247
 
249
248
  context_message = context_message.strip()
@@ -10,7 +10,7 @@ from dataclasses import dataclass
10
10
  from datetime import datetime
11
11
  from enum import Enum
12
12
  from io import BytesIO
13
- from typing import Any, Callable, Dict, List, Optional
13
+ from typing import Any, Callable, Dict, List, Literal, Optional, Union
14
14
 
15
15
  import PIL.Image
16
16
  import pyjson5
@@ -20,6 +20,7 @@ import yaml
20
20
  from langchain_core.messages.chat import ChatMessage
21
21
  from llama_cpp import LlamaTokenizer
22
22
  from llama_cpp.llama import Llama
23
+ from pydantic import BaseModel
23
24
  from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
24
25
 
25
26
  from khoj.database.adapters import ConversationAdapters
@@ -73,9 +74,9 @@ model_to_prompt_size = {
73
74
  "claude-3-7-sonnet-20250219": 60000,
74
75
  "claude-3-7-sonnet-latest": 60000,
75
76
  "claude-3-5-haiku-20241022": 60000,
76
- "claude-sonnet-4": 60000,
77
+ "claude-sonnet-4-0": 60000,
77
78
  "claude-sonnet-4-20250514": 60000,
78
- "claude-opus-4": 60000,
79
+ "claude-opus-4-0": 60000,
79
80
  "claude-opus-4-20250514": 60000,
80
81
  # Offline Models
81
82
  "bartowski/Qwen2.5-14B-Instruct-GGUF": 20000,
@@ -87,7 +88,49 @@ model_to_prompt_size = {
87
88
  model_to_tokenizer: Dict[str, str] = {}
88
89
 
89
90
 
90
- class InformationCollectionIteration:
91
+ class AgentMessage(BaseModel):
92
+ role: Literal["user", "assistant", "system", "environment"]
93
+ content: Union[str, List]
94
+
95
+
96
+ class OperatorRun:
97
+ def __init__(
98
+ self,
99
+ query: str,
100
+ trajectory: list[AgentMessage] | list[dict] = None,
101
+ response: str = None,
102
+ webpages: list[dict] = None,
103
+ ):
104
+ self.query = query
105
+ self.response = response
106
+ self.webpages = webpages or []
107
+ self.trajectory: list[AgentMessage] = []
108
+ if trajectory:
109
+ for item in trajectory:
110
+ if isinstance(item, dict):
111
+ self.trajectory.append(AgentMessage(**item))
112
+ elif hasattr(item, "role") and hasattr(item, "content"): # Heuristic for AgentMessage like object
113
+ self.trajectory.append(item)
114
+ else:
115
+ logger.warning(f"Unexpected item type in trajectory: {type(item)}")
116
+
117
+ def to_dict(self) -> dict:
118
+ # Ensure AgentMessage instances in trajectory are also dicts
119
+ serialized_trajectory = []
120
+ for msg in self.trajectory:
121
+ if hasattr(msg, "model_dump"): # Check if it's a Pydantic model
122
+ serialized_trajectory.append(msg.model_dump())
123
+ elif isinstance(msg, dict):
124
+ serialized_trajectory.append(msg) # Already a dict
125
+ return {
126
+ "query": self.query,
127
+ "response": self.response,
128
+ "trajectory": serialized_trajectory,
129
+ "webpages": self.webpages,
130
+ }
131
+
132
+
133
+ class ResearchIteration:
91
134
  def __init__(
92
135
  self,
93
136
  tool: str,
@@ -95,7 +138,7 @@ class InformationCollectionIteration:
95
138
  context: list = None,
96
139
  onlineContext: dict = None,
97
140
  codeContext: dict = None,
98
- operatorContext: dict[str, str] = None,
141
+ operatorContext: dict | OperatorRun = None,
99
142
  summarizedResult: str = None,
100
143
  warning: str = None,
101
144
  ):
@@ -104,13 +147,18 @@ class InformationCollectionIteration:
104
147
  self.context = context
105
148
  self.onlineContext = onlineContext
106
149
  self.codeContext = codeContext
107
- self.operatorContext = operatorContext
150
+ self.operatorContext = OperatorRun(**operatorContext) if isinstance(operatorContext, dict) else operatorContext
108
151
  self.summarizedResult = summarizedResult
109
152
  self.warning = warning
110
153
 
154
+ def to_dict(self) -> dict:
155
+ data = vars(self).copy()
156
+ data["operatorContext"] = self.operatorContext.to_dict() if self.operatorContext else None
157
+ return data
158
+
111
159
 
112
160
  def construct_iteration_history(
113
- previous_iterations: List[InformationCollectionIteration],
161
+ previous_iterations: List[ResearchIteration],
114
162
  previous_iteration_prompt: str,
115
163
  query: str = None,
116
164
  ) -> list[dict]:
@@ -143,11 +191,8 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A
143
191
  chat_history = ""
144
192
  for chat in conversation_history.get("chat", [])[-n:]:
145
193
  if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder", "summarize"]:
146
- chat_history += f"User: {chat['intent']['query']}\n"
147
-
148
194
  if chat["intent"].get("inferred-queries"):
149
195
  chat_history += f'{agent_name}: {{"queries": {chat["intent"].get("inferred-queries")}}}\n'
150
-
151
196
  chat_history += f"{agent_name}: {chat['message']}\n\n"
152
197
  elif chat["by"] == "khoj" and chat.get("images"):
153
198
  chat_history += f"User: {chat['intent']['query']}\n"
@@ -156,6 +201,7 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A
156
201
  chat_history += f"User: {chat['intent']['query']}\n"
157
202
  chat_history += f"{agent_name}: {chat['intent']['inferred-queries'][0]}\n"
158
203
  elif chat["by"] == "you":
204
+ chat_history += f"User: {chat['message']}\n"
159
205
  raw_query_files = chat.get("queryFiles")
160
206
  if raw_query_files:
161
207
  query_files: Dict[str, str] = {}
@@ -168,8 +214,74 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A
168
214
  return chat_history
169
215
 
170
216
 
217
+ def construct_question_history(
218
+ conversation_log: dict,
219
+ include_query: bool = True,
220
+ lookback: int = 6,
221
+ query_prefix: str = "Q",
222
+ agent_name: str = "Khoj",
223
+ ) -> str:
224
+ """
225
+ Constructs a chat history string formatted for query extraction purposes.
226
+ """
227
+ history_parts = ""
228
+ original_query = None
229
+ for chat in conversation_log.get("chat", [])[-lookback:]:
230
+ if chat["by"] == "you":
231
+ original_query = chat.get("message")
232
+ history_parts += f"{query_prefix}: {original_query}\n"
233
+ if chat["by"] == "khoj":
234
+ if original_query is None:
235
+ continue
236
+
237
+ message = chat.get("message", "")
238
+ inferred_queries_list = chat.get("intent", {}).get("inferred-queries")
239
+
240
+ # Ensure inferred_queries_list is a list, defaulting to the original query in a list
241
+ if not inferred_queries_list:
242
+ inferred_queries_list = [original_query]
243
+ # If it's a string (though unlikely based on usage), wrap it in a list
244
+ elif isinstance(inferred_queries_list, str):
245
+ inferred_queries_list = [inferred_queries_list]
246
+
247
+ if include_query:
248
+ # Ensure 'type' exists and is a string before checking 'to-image'
249
+ intent_type = chat.get("intent", {}).get("type", "")
250
+ if "to-image" not in intent_type:
251
+ history_parts += f'{agent_name}: {{"queries": {inferred_queries_list}}}\n'
252
+ history_parts += f"A: {message}\n\n"
253
+ else:
254
+ history_parts += f"{agent_name}: {message}\n\n"
255
+
256
+ # Reset original_query for the next turn
257
+ original_query = None
258
+
259
+ return history_parts
260
+
261
+
262
+ def construct_chat_history_for_operator(conversation_history: dict, n: int = 6) -> list[AgentMessage]:
263
+ """
264
+ Construct chat history for operator agent in conversation log.
265
+ Only include last n completed turns (i.e with user and khoj message).
266
+ """
267
+ chat_history: list[AgentMessage] = []
268
+ user_message: Optional[AgentMessage] = None
269
+
270
+ for chat in conversation_history.get("chat", []):
271
+ if len(chat_history) >= n:
272
+ break
273
+ if chat["by"] == "you" and chat.get("message"):
274
+ content = [{"type": "text", "text": chat["message"]}]
275
+ for file in chat.get("queryFiles", []):
276
+ content += [{"type": "text", "text": f'## File: {file["name"]}\n\n{file["content"]}'}]
277
+ user_message = AgentMessage(role="user", content=content)
278
+ elif chat["by"] == "khoj" and chat.get("message"):
279
+ chat_history += [user_message, AgentMessage(role="assistant", content=chat["message"])]
280
+ return chat_history
281
+
282
+
171
283
  def construct_tool_chat_history(
172
- previous_iterations: List[InformationCollectionIteration], tool: ConversationCommand = None
284
+ previous_iterations: List[ResearchIteration], tool: ConversationCommand = None
173
285
  ) -> Dict[str, list]:
174
286
  """
175
287
  Construct chat history from previous iterations for a specific tool
@@ -178,8 +290,8 @@ def construct_tool_chat_history(
178
290
  If no tool is provided inferred query for all tools used are added.
179
291
  """
180
292
  chat_history: list = []
181
- base_extractor: Callable[[InformationCollectionIteration], List[str]] = lambda x: []
182
- extract_inferred_query_map: Dict[ConversationCommand, Callable[[InformationCollectionIteration], List[str]]] = {
293
+ base_extractor: Callable[[ResearchIteration], List[str]] = lambda iteration: []
294
+ extract_inferred_query_map: Dict[ConversationCommand, Callable[[ResearchIteration], List[str]]] = {
183
295
  ConversationCommand.Notes: (
184
296
  lambda iteration: [c["query"] for c in iteration.context] if iteration.context else []
185
297
  ),
@@ -192,9 +304,6 @@ def construct_tool_chat_history(
192
304
  ConversationCommand.Code: (
193
305
  lambda iteration: list(iteration.codeContext.keys()) if iteration.codeContext else []
194
306
  ),
195
- ConversationCommand.Operator: (
196
- lambda iteration: list(iteration.operatorContext.keys()) if iteration.operatorContext else []
197
- ),
198
307
  }
199
308
  for iteration in previous_iterations:
200
309
  # If a tool is provided use the inferred query extractor for that tool if available
@@ -273,7 +382,7 @@ async def save_to_conversation_log(
273
382
  compiled_references: List[Dict[str, Any]] = [],
274
383
  online_results: Dict[str, Any] = {},
275
384
  code_results: Dict[str, Any] = {},
276
- operator_results: Dict[str, str] = {},
385
+ operator_results: List[OperatorRun] = None,
277
386
  inferred_queries: List[str] = [],
278
387
  intent_type: str = "remember",
279
388
  client_application: ClientApplication = None,
@@ -284,7 +393,7 @@ async def save_to_conversation_log(
284
393
  generated_images: List[str] = [],
285
394
  raw_generated_files: List[FileAttachment] = [],
286
395
  generated_mermaidjs_diagram: str = None,
287
- research_results: Optional[List[InformationCollectionIteration]] = None,
396
+ research_results: Optional[List[ResearchIteration]] = None,
288
397
  train_of_thought: List[Any] = [],
289
398
  tracer: Dict[str, Any] = {},
290
399
  ):
@@ -301,8 +410,8 @@ async def save_to_conversation_log(
301
410
  "intent": {"inferred-queries": inferred_queries, "type": intent_type},
302
411
  "onlineContext": online_results,
303
412
  "codeContext": code_results,
304
- "operatorContext": operator_results,
305
- "researchContext": [vars(r) for r in research_results] if research_results and not chat_response else None,
413
+ "operatorContext": [o.to_dict() for o in operator_results] if operator_results and not chat_response else None,
414
+ "researchContext": [r.to_dict() for r in research_results] if research_results and not chat_response else None,
306
415
  "automationId": automation_id,
307
416
  "trainOfThought": train_of_thought,
308
417
  "turnId": turn_id,
@@ -459,10 +568,12 @@ def generate_chatml_messages_with_context(
459
568
  ]
460
569
 
461
570
  if not is_none_or_empty(chat.get("operatorContext")):
571
+ operator_context = chat.get("operatorContext")
572
+ operator_content = "\n\n".join([f'## Task: {oc["query"]}\n{oc["response"]}\n' for oc in operator_context])
462
573
  message_context += [
463
574
  {
464
575
  "type": "text",
465
- "text": f"{prompts.operator_execution_context.format(operator_results=chat.get('operatorContext'))}",
576
+ "text": f"{prompts.operator_execution_context.format(operator_results=operator_content)}",
466
577
  }
467
578
  ]
468
579
 
@@ -0,0 +1,59 @@
1
+ # Khoj Operator (Experimental)
2
+
3
+ ## Overview
4
+ Give Khoj its own computer to operate in a transparent, controlled manner. Accomplish tasks that require visual browsing, file editing and terminal access. Operator with research mode can work for 30+ minutes to accomplish more substantial tasks like feature development, travel planning, shopping etc.
5
+
6
+ ## Setup
7
+
8
+ ### Prerequisites
9
+ - Docker and Docker Compose installed
10
+ - Anthropic API key (required - only Anthropic models currently enabled)
11
+
12
+ ### Installation Steps
13
+ 1. Download the Khoj docker-compose.yml file
14
+ ```shell
15
+ mkdir ~/.khoj && cd ~/.khoj
16
+ wget https://raw.githubusercontent.com/khoj-ai/khoj/master/docker-compose.yml
17
+ ```
18
+
19
+ 2. Configure environment variables in `docker-compose.yml`
20
+ - Set `ANTHROPIC_API_KEY` to your [Anthropic API key](https://console.anthropic.com/settings/keys)
21
+ - Uncomment `KHOJ_OPERATOR_ENABLED=True` to enable the operator tool
22
+
23
+ 3. Start Khoj services
24
+ ```shell
25
+ docker-compose up
26
+ ```
27
+
28
+ 4. Access the web app at http://localhost:42110
29
+ Ensure you're using a claude 3.7+ models on your [settings page](http://localhost:42110/settings)
30
+
31
+ ## Usage
32
+ Use the `/operator` command or ask Khoj in normal or research mode to use the operator tool to have it operate its computer:
33
+
34
+ **Examples:**
35
+ - `/operator Find flights from Bangkok to Mexico City with no US layover`
36
+ - `/research Clone the khoj repo and tell me how the operator tool is implemented`
37
+
38
+ ## Supported Models
39
+
40
+ Currently enables **only Anthropic models**:
41
+ - Claude Sonnet 4
42
+ - Claude 3.7 Sonnet
43
+ - Claude Opus 4
44
+
45
+ *Note: OpenAI and other operator models are disabled while in developemnt.*
46
+
47
+ ## Capabilities
48
+
49
+ The operator can:
50
+ - **Computer Control**: Take screenshots, click, type, navigate desktop
51
+ - **File Operations**: Create, edit, and manage files
52
+ - **Terminal Access**: Execute bash commands and scripts
53
+ - **Web Browsing**: Navigate websites, documents and extract information
54
+
55
+ ## Architecture
56
+
57
+ - **Environments**: Operator Computer and Browser environments
58
+ - **Models**: Enable Vision Language Models (VLM) to operate computer
59
+ - **Execution**: Containerize computer environment for security and isolation