khoj 1.41.1.dev107__py3-none-any.whl → 1.41.1.dev142__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +20 -0
- khoj/database/models/__init__.py +1 -1
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/{2327-aa22697ed9c8d54a.js → 2327-f03b2a77f67b8f8c.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{8515-f305779d95dd5780.js → 5138-81457f7f59956b56.js} +9 -9
- khoj/interface/compiled/_next/static/chunks/7127-d3199617463d45f0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-c9ceb9b94e24b94a.js → page-774c78ff0f55a228.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-3dc59a0df3827dc7.js → page-4454891c5007b870.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/{page-2b27c7118d8d5a16.js → page-5a2559825b4d5def.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-38f1f125d7aeb4c7.js → page-f7a0286dfc31ad6b.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/{page-26d4492fb1200e0e.js → page-f1a7f278c89e09b6.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-bf1a4e488b29fceb.js → page-5d9134d4a97f8834.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-abb6c5f4239ad7be.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-a1f10c96366c3a4f.js → page-32cd0ceb9ffbd777.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-c6bde5961098facd.js → webpack-952bc0d41769db77.js} +1 -1
- khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
- khoj/interface/compiled/_next/static/css/93eeacc43e261162.css +1 -0
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +3 -3
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +4 -4
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +8 -9
- khoj/processor/conversation/anthropic/utils.py +30 -7
- khoj/processor/conversation/google/gemini_chat.py +10 -10
- khoj/processor/conversation/google/utils.py +20 -12
- khoj/processor/conversation/offline/chat_model.py +2 -7
- khoj/processor/conversation/openai/gpt.py +8 -9
- khoj/processor/conversation/utils.py +132 -21
- khoj/processor/operator/README.md +59 -0
- khoj/processor/operator/{operate_browser.py → __init__.py} +98 -34
- khoj/processor/operator/grounding_agent.py +229 -175
- khoj/processor/operator/grounding_agent_uitars.py +59 -48
- khoj/processor/operator/operator_actions.py +48 -0
- khoj/processor/operator/operator_agent_anthropic.py +298 -90
- khoj/processor/operator/operator_agent_base.py +45 -14
- khoj/processor/operator/operator_agent_binary.py +125 -57
- khoj/processor/operator/operator_agent_openai.py +183 -75
- khoj/processor/operator/operator_environment_base.py +11 -1
- khoj/processor/operator/operator_environment_browser.py +5 -3
- khoj/processor/operator/operator_environment_computer.py +658 -0
- khoj/routers/api_chat.py +36 -25
- khoj/routers/helpers.py +8 -17
- khoj/routers/research.py +43 -20
- khoj/utils/constants.py +4 -4
- khoj/utils/helpers.py +12 -15
- {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev142.dist-info}/METADATA +3 -1
- {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev142.dist-info}/RECORD +70 -68
- khoj/interface/compiled/_next/static/chunks/4986-9ddd694756d03aa1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e49165209d2e406c.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-d5ae861e1ade9d08.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-64a53f8ec4afa6b3.js +0 -1
- khoj/interface/compiled/_next/static/css/bb7ea98028b368f3.css +0 -1
- khoj/interface/compiled/_next/static/css/ee66643a6a5bf71c.css +0 -1
- /khoj/interface/compiled/_next/static/{y_k1yn7bI1CgM5ZfW7jUq → 4CIEX6Ko-Qehhb7L-ymZw}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{y_k1yn7bI1CgM5ZfW7jUq → 4CIEX6Ko-Qehhb7L-ymZw}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1915-ab4353eaca76f690.js → 1915-1943ee8a628b893c.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2117-1c18aa2098982bf9.js → 2117-5a41630a2bd2eae8.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{4363-4efaf12abe696251.js → 4363-e6ac2203564d1a3b.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{4447-5d44807c40355b1a.js → 4447-e038b251d626c340.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{8667-adbe6017a66cef10.js → 8667-8136f74e9a086fca.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9259-d8bcd9da9e80c81e.js → 9259-640fdd77408475df.js} +0 -0
- {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev142.dist-info}/WHEEL +0 -0
- {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev142.dist-info}/entry_points.txt +0 -0
- {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev142.dist-info}/licenses/LICENSE +0 -0
@@ -21,6 +21,7 @@ from tenacity import (
|
|
21
21
|
)
|
22
22
|
|
23
23
|
from khoj.processor.conversation.utils import (
|
24
|
+
ResponseWithThought,
|
24
25
|
commit_conversation_trace,
|
25
26
|
get_image_from_base64,
|
26
27
|
get_image_from_url,
|
@@ -102,7 +103,7 @@ def gemini_completion_with_backoff(
|
|
102
103
|
client = get_gemini_client(api_key, api_base_url)
|
103
104
|
gemini_clients[api_key] = client
|
104
105
|
|
105
|
-
formatted_messages,
|
106
|
+
formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt)
|
106
107
|
|
107
108
|
# format model response schema
|
108
109
|
response_schema = None
|
@@ -110,12 +111,12 @@ def gemini_completion_with_backoff(
|
|
110
111
|
response_schema = clean_response_schema(model_kwargs["response_schema"])
|
111
112
|
|
112
113
|
thinking_config = None
|
113
|
-
if deepthought and model_name.startswith("gemini-2
|
114
|
+
if deepthought and model_name.startswith("gemini-2.5"):
|
114
115
|
thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI)
|
115
116
|
|
116
117
|
seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
|
117
118
|
config = gtypes.GenerateContentConfig(
|
118
|
-
system_instruction=
|
119
|
+
system_instruction=system_instruction,
|
119
120
|
temperature=temperature,
|
120
121
|
thinking_config=thinking_config,
|
121
122
|
max_output_tokens=MAX_OUTPUT_TOKENS_GEMINI,
|
@@ -178,21 +179,21 @@ async def gemini_chat_completion_with_backoff(
|
|
178
179
|
model_kwargs=None,
|
179
180
|
deepthought=False,
|
180
181
|
tracer: dict = {},
|
181
|
-
) -> AsyncGenerator[
|
182
|
+
) -> AsyncGenerator[ResponseWithThought, None]:
|
182
183
|
client = gemini_clients.get(api_key)
|
183
184
|
if not client:
|
184
185
|
client = get_gemini_client(api_key, api_base_url)
|
185
186
|
gemini_clients[api_key] = client
|
186
187
|
|
187
|
-
formatted_messages,
|
188
|
+
formatted_messages, system_instruction = format_messages_for_gemini(messages, system_prompt)
|
188
189
|
|
189
190
|
thinking_config = None
|
190
|
-
if deepthought and model_name.startswith("gemini-2
|
191
|
-
thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI)
|
191
|
+
if deepthought and model_name.startswith("gemini-2.5"):
|
192
|
+
thinking_config = gtypes.ThinkingConfig(thinking_budget=MAX_REASONING_TOKENS_GEMINI, include_thoughts=True)
|
192
193
|
|
193
194
|
seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
|
194
195
|
config = gtypes.GenerateContentConfig(
|
195
|
-
system_instruction=
|
196
|
+
system_instruction=system_instruction,
|
196
197
|
temperature=temperature,
|
197
198
|
thinking_config=thinking_config,
|
198
199
|
max_output_tokens=MAX_OUTPUT_TOKENS_GEMINI,
|
@@ -216,18 +217,25 @@ async def gemini_chat_completion_with_backoff(
|
|
216
217
|
logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds")
|
217
218
|
# Keep track of the last chunk for usage data
|
218
219
|
final_chunk = chunk
|
219
|
-
|
220
|
+
|
221
|
+
# handle safety, rate-limit, other finish reasons
|
220
222
|
stop_message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
|
221
|
-
message = stop_message or chunk.text
|
222
|
-
aggregated_response += message
|
223
|
-
yield message
|
224
223
|
if stopped:
|
224
|
+
yield ResponseWithThought(response=stop_message)
|
225
225
|
logger.warning(
|
226
226
|
f"LLM Response Prevented for {model_name}: {stop_message}.\n"
|
227
227
|
+ f"Last Message by {messages[-1].role}: {messages[-1].content}"
|
228
228
|
)
|
229
229
|
break
|
230
230
|
|
231
|
+
# emit thought vs response parts
|
232
|
+
for part in chunk.candidates[0].content.parts:
|
233
|
+
if part.text:
|
234
|
+
aggregated_response += part.text
|
235
|
+
yield ResponseWithThought(response=part.text)
|
236
|
+
if part.thought:
|
237
|
+
yield ResponseWithThought(thought=part.text)
|
238
|
+
|
231
239
|
# Calculate cost of chat
|
232
240
|
input_tokens = final_chunk.usage_metadata.prompt_token_count or 0 if final_chunk else 0
|
233
241
|
output_tokens = final_chunk.usage_metadata.candidates_token_count or 0 if final_chunk else 0
|
@@ -16,6 +16,7 @@ from khoj.processor.conversation.offline.utils import download_model
|
|
16
16
|
from khoj.processor.conversation.utils import (
|
17
17
|
clean_json,
|
18
18
|
commit_conversation_trace,
|
19
|
+
construct_question_history,
|
19
20
|
generate_chatml_messages_with_context,
|
20
21
|
messages_to_print,
|
21
22
|
)
|
@@ -64,13 +65,7 @@ def extract_questions_offline(
|
|
64
65
|
username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""
|
65
66
|
|
66
67
|
# Extract Past User Message and Inferred Questions from Conversation Log
|
67
|
-
chat_history = ""
|
68
|
-
|
69
|
-
if use_history:
|
70
|
-
for chat in conversation_log.get("chat", [])[-4:]:
|
71
|
-
if chat["by"] == "khoj":
|
72
|
-
chat_history += f"Q: {chat['intent']['query']}\n"
|
73
|
-
chat_history += f"Khoj: {chat['message']}\n\n"
|
68
|
+
chat_history = construct_question_history(conversation_log, include_query=False) if use_history else ""
|
74
69
|
|
75
70
|
# Get dates relative to today for prompt creation
|
76
71
|
today = datetime.today()
|
@@ -17,8 +17,10 @@ from khoj.processor.conversation.openai.utils import (
|
|
17
17
|
)
|
18
18
|
from khoj.processor.conversation.utils import (
|
19
19
|
JsonSupport,
|
20
|
+
OperatorRun,
|
20
21
|
ResponseWithThought,
|
21
22
|
clean_json,
|
23
|
+
construct_question_history,
|
22
24
|
construct_structured_message,
|
23
25
|
generate_chatml_messages_with_context,
|
24
26
|
messages_to_print,
|
@@ -55,13 +57,7 @@ def extract_questions(
|
|
55
57
|
username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""
|
56
58
|
|
57
59
|
# Extract Past User Message and Inferred Questions from Conversation Log
|
58
|
-
chat_history =
|
59
|
-
[
|
60
|
-
f'Q: {chat["intent"]["query"]}\nKhoj: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
|
61
|
-
for chat in conversation_log.get("chat", [])[-4:]
|
62
|
-
if chat["by"] == "khoj" and "to-image" not in chat["intent"].get("type")
|
63
|
-
]
|
64
|
-
)
|
60
|
+
chat_history = construct_question_history(conversation_log)
|
65
61
|
|
66
62
|
# Get dates relative to today for prompt creation
|
67
63
|
today = datetime.today()
|
@@ -169,7 +165,7 @@ async def converse_openai(
|
|
169
165
|
references: list[dict],
|
170
166
|
online_results: Optional[Dict[str, Dict]] = None,
|
171
167
|
code_results: Optional[Dict[str, Dict]] = None,
|
172
|
-
operator_results: Optional[
|
168
|
+
operator_results: Optional[List[OperatorRun]] = None,
|
173
169
|
conversation_log={},
|
174
170
|
model: str = "gpt-4o-mini",
|
175
171
|
api_key: Optional[str] = None,
|
@@ -242,8 +238,11 @@ async def converse_openai(
|
|
242
238
|
f"{prompts.code_executed_context.format(code_results=truncate_code_context(code_results))}\n\n"
|
243
239
|
)
|
244
240
|
if not is_none_or_empty(operator_results):
|
241
|
+
operator_content = [
|
242
|
+
{"query": oc.query, "response": oc.response, "webpages": oc.webpages} for oc in operator_results
|
243
|
+
]
|
245
244
|
context_message += (
|
246
|
-
f"{prompts.operator_execution_context.format(operator_results=yaml_dump(
|
245
|
+
f"{prompts.operator_execution_context.format(operator_results=yaml_dump(operator_content))}\n\n"
|
247
246
|
)
|
248
247
|
|
249
248
|
context_message = context_message.strip()
|
@@ -10,7 +10,7 @@ from dataclasses import dataclass
|
|
10
10
|
from datetime import datetime
|
11
11
|
from enum import Enum
|
12
12
|
from io import BytesIO
|
13
|
-
from typing import Any, Callable, Dict, List, Optional
|
13
|
+
from typing import Any, Callable, Dict, List, Literal, Optional, Union
|
14
14
|
|
15
15
|
import PIL.Image
|
16
16
|
import pyjson5
|
@@ -20,6 +20,7 @@ import yaml
|
|
20
20
|
from langchain_core.messages.chat import ChatMessage
|
21
21
|
from llama_cpp import LlamaTokenizer
|
22
22
|
from llama_cpp.llama import Llama
|
23
|
+
from pydantic import BaseModel
|
23
24
|
from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
|
24
25
|
|
25
26
|
from khoj.database.adapters import ConversationAdapters
|
@@ -73,9 +74,9 @@ model_to_prompt_size = {
|
|
73
74
|
"claude-3-7-sonnet-20250219": 60000,
|
74
75
|
"claude-3-7-sonnet-latest": 60000,
|
75
76
|
"claude-3-5-haiku-20241022": 60000,
|
76
|
-
"claude-sonnet-4": 60000,
|
77
|
+
"claude-sonnet-4-0": 60000,
|
77
78
|
"claude-sonnet-4-20250514": 60000,
|
78
|
-
"claude-opus-4": 60000,
|
79
|
+
"claude-opus-4-0": 60000,
|
79
80
|
"claude-opus-4-20250514": 60000,
|
80
81
|
# Offline Models
|
81
82
|
"bartowski/Qwen2.5-14B-Instruct-GGUF": 20000,
|
@@ -87,7 +88,49 @@ model_to_prompt_size = {
|
|
87
88
|
model_to_tokenizer: Dict[str, str] = {}
|
88
89
|
|
89
90
|
|
90
|
-
class
|
91
|
+
class AgentMessage(BaseModel):
|
92
|
+
role: Literal["user", "assistant", "system", "environment"]
|
93
|
+
content: Union[str, List]
|
94
|
+
|
95
|
+
|
96
|
+
class OperatorRun:
|
97
|
+
def __init__(
|
98
|
+
self,
|
99
|
+
query: str,
|
100
|
+
trajectory: list[AgentMessage] | list[dict] = None,
|
101
|
+
response: str = None,
|
102
|
+
webpages: list[dict] = None,
|
103
|
+
):
|
104
|
+
self.query = query
|
105
|
+
self.response = response
|
106
|
+
self.webpages = webpages or []
|
107
|
+
self.trajectory: list[AgentMessage] = []
|
108
|
+
if trajectory:
|
109
|
+
for item in trajectory:
|
110
|
+
if isinstance(item, dict):
|
111
|
+
self.trajectory.append(AgentMessage(**item))
|
112
|
+
elif hasattr(item, "role") and hasattr(item, "content"): # Heuristic for AgentMessage like object
|
113
|
+
self.trajectory.append(item)
|
114
|
+
else:
|
115
|
+
logger.warning(f"Unexpected item type in trajectory: {type(item)}")
|
116
|
+
|
117
|
+
def to_dict(self) -> dict:
|
118
|
+
# Ensure AgentMessage instances in trajectory are also dicts
|
119
|
+
serialized_trajectory = []
|
120
|
+
for msg in self.trajectory:
|
121
|
+
if hasattr(msg, "model_dump"): # Check if it's a Pydantic model
|
122
|
+
serialized_trajectory.append(msg.model_dump())
|
123
|
+
elif isinstance(msg, dict):
|
124
|
+
serialized_trajectory.append(msg) # Already a dict
|
125
|
+
return {
|
126
|
+
"query": self.query,
|
127
|
+
"response": self.response,
|
128
|
+
"trajectory": serialized_trajectory,
|
129
|
+
"webpages": self.webpages,
|
130
|
+
}
|
131
|
+
|
132
|
+
|
133
|
+
class ResearchIteration:
|
91
134
|
def __init__(
|
92
135
|
self,
|
93
136
|
tool: str,
|
@@ -95,7 +138,7 @@ class InformationCollectionIteration:
|
|
95
138
|
context: list = None,
|
96
139
|
onlineContext: dict = None,
|
97
140
|
codeContext: dict = None,
|
98
|
-
operatorContext: dict
|
141
|
+
operatorContext: dict | OperatorRun = None,
|
99
142
|
summarizedResult: str = None,
|
100
143
|
warning: str = None,
|
101
144
|
):
|
@@ -104,13 +147,18 @@ class InformationCollectionIteration:
|
|
104
147
|
self.context = context
|
105
148
|
self.onlineContext = onlineContext
|
106
149
|
self.codeContext = codeContext
|
107
|
-
self.operatorContext = operatorContext
|
150
|
+
self.operatorContext = OperatorRun(**operatorContext) if isinstance(operatorContext, dict) else operatorContext
|
108
151
|
self.summarizedResult = summarizedResult
|
109
152
|
self.warning = warning
|
110
153
|
|
154
|
+
def to_dict(self) -> dict:
|
155
|
+
data = vars(self).copy()
|
156
|
+
data["operatorContext"] = self.operatorContext.to_dict() if self.operatorContext else None
|
157
|
+
return data
|
158
|
+
|
111
159
|
|
112
160
|
def construct_iteration_history(
|
113
|
-
previous_iterations: List[
|
161
|
+
previous_iterations: List[ResearchIteration],
|
114
162
|
previous_iteration_prompt: str,
|
115
163
|
query: str = None,
|
116
164
|
) -> list[dict]:
|
@@ -143,11 +191,8 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A
|
|
143
191
|
chat_history = ""
|
144
192
|
for chat in conversation_history.get("chat", [])[-n:]:
|
145
193
|
if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder", "summarize"]:
|
146
|
-
chat_history += f"User: {chat['intent']['query']}\n"
|
147
|
-
|
148
194
|
if chat["intent"].get("inferred-queries"):
|
149
195
|
chat_history += f'{agent_name}: {{"queries": {chat["intent"].get("inferred-queries")}}}\n'
|
150
|
-
|
151
196
|
chat_history += f"{agent_name}: {chat['message']}\n\n"
|
152
197
|
elif chat["by"] == "khoj" and chat.get("images"):
|
153
198
|
chat_history += f"User: {chat['intent']['query']}\n"
|
@@ -156,6 +201,7 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A
|
|
156
201
|
chat_history += f"User: {chat['intent']['query']}\n"
|
157
202
|
chat_history += f"{agent_name}: {chat['intent']['inferred-queries'][0]}\n"
|
158
203
|
elif chat["by"] == "you":
|
204
|
+
chat_history += f"User: {chat['message']}\n"
|
159
205
|
raw_query_files = chat.get("queryFiles")
|
160
206
|
if raw_query_files:
|
161
207
|
query_files: Dict[str, str] = {}
|
@@ -168,8 +214,74 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A
|
|
168
214
|
return chat_history
|
169
215
|
|
170
216
|
|
217
|
+
def construct_question_history(
|
218
|
+
conversation_log: dict,
|
219
|
+
include_query: bool = True,
|
220
|
+
lookback: int = 6,
|
221
|
+
query_prefix: str = "Q",
|
222
|
+
agent_name: str = "Khoj",
|
223
|
+
) -> str:
|
224
|
+
"""
|
225
|
+
Constructs a chat history string formatted for query extraction purposes.
|
226
|
+
"""
|
227
|
+
history_parts = ""
|
228
|
+
original_query = None
|
229
|
+
for chat in conversation_log.get("chat", [])[-lookback:]:
|
230
|
+
if chat["by"] == "you":
|
231
|
+
original_query = chat.get("message")
|
232
|
+
history_parts += f"{query_prefix}: {original_query}\n"
|
233
|
+
if chat["by"] == "khoj":
|
234
|
+
if original_query is None:
|
235
|
+
continue
|
236
|
+
|
237
|
+
message = chat.get("message", "")
|
238
|
+
inferred_queries_list = chat.get("intent", {}).get("inferred-queries")
|
239
|
+
|
240
|
+
# Ensure inferred_queries_list is a list, defaulting to the original query in a list
|
241
|
+
if not inferred_queries_list:
|
242
|
+
inferred_queries_list = [original_query]
|
243
|
+
# If it's a string (though unlikely based on usage), wrap it in a list
|
244
|
+
elif isinstance(inferred_queries_list, str):
|
245
|
+
inferred_queries_list = [inferred_queries_list]
|
246
|
+
|
247
|
+
if include_query:
|
248
|
+
# Ensure 'type' exists and is a string before checking 'to-image'
|
249
|
+
intent_type = chat.get("intent", {}).get("type", "")
|
250
|
+
if "to-image" not in intent_type:
|
251
|
+
history_parts += f'{agent_name}: {{"queries": {inferred_queries_list}}}\n'
|
252
|
+
history_parts += f"A: {message}\n\n"
|
253
|
+
else:
|
254
|
+
history_parts += f"{agent_name}: {message}\n\n"
|
255
|
+
|
256
|
+
# Reset original_query for the next turn
|
257
|
+
original_query = None
|
258
|
+
|
259
|
+
return history_parts
|
260
|
+
|
261
|
+
|
262
|
+
def construct_chat_history_for_operator(conversation_history: dict, n: int = 6) -> list[AgentMessage]:
|
263
|
+
"""
|
264
|
+
Construct chat history for operator agent in conversation log.
|
265
|
+
Only include last n completed turns (i.e with user and khoj message).
|
266
|
+
"""
|
267
|
+
chat_history: list[AgentMessage] = []
|
268
|
+
user_message: Optional[AgentMessage] = None
|
269
|
+
|
270
|
+
for chat in conversation_history.get("chat", []):
|
271
|
+
if len(chat_history) >= n:
|
272
|
+
break
|
273
|
+
if chat["by"] == "you" and chat.get("message"):
|
274
|
+
content = [{"type": "text", "text": chat["message"]}]
|
275
|
+
for file in chat.get("queryFiles", []):
|
276
|
+
content += [{"type": "text", "text": f'## File: {file["name"]}\n\n{file["content"]}'}]
|
277
|
+
user_message = AgentMessage(role="user", content=content)
|
278
|
+
elif chat["by"] == "khoj" and chat.get("message"):
|
279
|
+
chat_history += [user_message, AgentMessage(role="assistant", content=chat["message"])]
|
280
|
+
return chat_history
|
281
|
+
|
282
|
+
|
171
283
|
def construct_tool_chat_history(
|
172
|
-
previous_iterations: List[
|
284
|
+
previous_iterations: List[ResearchIteration], tool: ConversationCommand = None
|
173
285
|
) -> Dict[str, list]:
|
174
286
|
"""
|
175
287
|
Construct chat history from previous iterations for a specific tool
|
@@ -178,8 +290,8 @@ def construct_tool_chat_history(
|
|
178
290
|
If no tool is provided inferred query for all tools used are added.
|
179
291
|
"""
|
180
292
|
chat_history: list = []
|
181
|
-
base_extractor: Callable[[
|
182
|
-
extract_inferred_query_map: Dict[ConversationCommand, Callable[[
|
293
|
+
base_extractor: Callable[[ResearchIteration], List[str]] = lambda iteration: []
|
294
|
+
extract_inferred_query_map: Dict[ConversationCommand, Callable[[ResearchIteration], List[str]]] = {
|
183
295
|
ConversationCommand.Notes: (
|
184
296
|
lambda iteration: [c["query"] for c in iteration.context] if iteration.context else []
|
185
297
|
),
|
@@ -192,9 +304,6 @@ def construct_tool_chat_history(
|
|
192
304
|
ConversationCommand.Code: (
|
193
305
|
lambda iteration: list(iteration.codeContext.keys()) if iteration.codeContext else []
|
194
306
|
),
|
195
|
-
ConversationCommand.Operator: (
|
196
|
-
lambda iteration: list(iteration.operatorContext.keys()) if iteration.operatorContext else []
|
197
|
-
),
|
198
307
|
}
|
199
308
|
for iteration in previous_iterations:
|
200
309
|
# If a tool is provided use the inferred query extractor for that tool if available
|
@@ -273,7 +382,7 @@ async def save_to_conversation_log(
|
|
273
382
|
compiled_references: List[Dict[str, Any]] = [],
|
274
383
|
online_results: Dict[str, Any] = {},
|
275
384
|
code_results: Dict[str, Any] = {},
|
276
|
-
operator_results:
|
385
|
+
operator_results: List[OperatorRun] = None,
|
277
386
|
inferred_queries: List[str] = [],
|
278
387
|
intent_type: str = "remember",
|
279
388
|
client_application: ClientApplication = None,
|
@@ -284,7 +393,7 @@ async def save_to_conversation_log(
|
|
284
393
|
generated_images: List[str] = [],
|
285
394
|
raw_generated_files: List[FileAttachment] = [],
|
286
395
|
generated_mermaidjs_diagram: str = None,
|
287
|
-
research_results: Optional[List[
|
396
|
+
research_results: Optional[List[ResearchIteration]] = None,
|
288
397
|
train_of_thought: List[Any] = [],
|
289
398
|
tracer: Dict[str, Any] = {},
|
290
399
|
):
|
@@ -301,8 +410,8 @@ async def save_to_conversation_log(
|
|
301
410
|
"intent": {"inferred-queries": inferred_queries, "type": intent_type},
|
302
411
|
"onlineContext": online_results,
|
303
412
|
"codeContext": code_results,
|
304
|
-
"operatorContext": operator_results,
|
305
|
-
"researchContext": [
|
413
|
+
"operatorContext": [o.to_dict() for o in operator_results] if operator_results and not chat_response else None,
|
414
|
+
"researchContext": [r.to_dict() for r in research_results] if research_results and not chat_response else None,
|
306
415
|
"automationId": automation_id,
|
307
416
|
"trainOfThought": train_of_thought,
|
308
417
|
"turnId": turn_id,
|
@@ -459,10 +568,12 @@ def generate_chatml_messages_with_context(
|
|
459
568
|
]
|
460
569
|
|
461
570
|
if not is_none_or_empty(chat.get("operatorContext")):
|
571
|
+
operator_context = chat.get("operatorContext")
|
572
|
+
operator_content = "\n\n".join([f'## Task: {oc["query"]}\n{oc["response"]}\n' for oc in operator_context])
|
462
573
|
message_context += [
|
463
574
|
{
|
464
575
|
"type": "text",
|
465
|
-
"text": f"{prompts.operator_execution_context.format(operator_results=
|
576
|
+
"text": f"{prompts.operator_execution_context.format(operator_results=operator_content)}",
|
466
577
|
}
|
467
578
|
]
|
468
579
|
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# Khoj Operator (Experimental)
|
2
|
+
|
3
|
+
## Overview
|
4
|
+
Give Khoj its own computer to operate in a transparent, controlled manner. Accomplish tasks that require visual browsing, file editing and terminal access. Operator with research mode can work for 30+ minutes to accomplish more substantial tasks like feature development, travel planning, shopping etc.
|
5
|
+
|
6
|
+
## Setup
|
7
|
+
|
8
|
+
### Prerequisites
|
9
|
+
- Docker and Docker Compose installed
|
10
|
+
- Anthropic API key (required - only Anthropic models currently enabled)
|
11
|
+
|
12
|
+
### Installation Steps
|
13
|
+
1. Download the Khoj docker-compose.yml file
|
14
|
+
```shell
|
15
|
+
mkdir ~/.khoj && cd ~/.khoj
|
16
|
+
wget https://raw.githubusercontent.com/khoj-ai/khoj/master/docker-compose.yml
|
17
|
+
```
|
18
|
+
|
19
|
+
2. Configure environment variables in `docker-compose.yml`
|
20
|
+
- Set `ANTHROPIC_API_KEY` to your [Anthropic API key](https://console.anthropic.com/settings/keys)
|
21
|
+
- Uncomment `KHOJ_OPERATOR_ENABLED=True` to enable the operator tool
|
22
|
+
|
23
|
+
3. Start Khoj services
|
24
|
+
```shell
|
25
|
+
docker-compose up
|
26
|
+
```
|
27
|
+
|
28
|
+
4. Access the web app at http://localhost:42110
|
29
|
+
Ensure you're using a claude 3.7+ models on your [settings page](http://localhost:42110/settings)
|
30
|
+
|
31
|
+
## Usage
|
32
|
+
Use the `/operator` command or ask Khoj in normal or research mode to use the operator tool to have it operate its computer:
|
33
|
+
|
34
|
+
**Examples:**
|
35
|
+
- `/operator Find flights from Bangkok to Mexico City with no US layover`
|
36
|
+
- `/research Clone the khoj repo and tell me how the operator tool is implemented`
|
37
|
+
|
38
|
+
## Supported Models
|
39
|
+
|
40
|
+
Currently enables **only Anthropic models**:
|
41
|
+
- Claude Sonnet 4
|
42
|
+
- Claude 3.7 Sonnet
|
43
|
+
- Claude Opus 4
|
44
|
+
|
45
|
+
*Note: OpenAI and other operator models are disabled while in developemnt.*
|
46
|
+
|
47
|
+
## Capabilities
|
48
|
+
|
49
|
+
The operator can:
|
50
|
+
- **Computer Control**: Take screenshots, click, type, navigate desktop
|
51
|
+
- **File Operations**: Create, edit, and manage files
|
52
|
+
- **Terminal Access**: Execute bash commands and scripts
|
53
|
+
- **Web Browsing**: Navigate websites, documents and extract information
|
54
|
+
|
55
|
+
## Architecture
|
56
|
+
|
57
|
+
- **Environments**: Operator Computer and Browser environments
|
58
|
+
- **Models**: Enable Vision Language Models (VLM) to operate computer
|
59
|
+
- **Execution**: Containerize computer environment for security and isolation
|