khoj 1.41.1.dev107__py3-none-any.whl → 1.41.1.dev144__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +20 -0
- khoj/database/models/__init__.py +1 -1
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/{8515-f305779d95dd5780.js → 5138-2cce449fd2454abf.js} +9 -9
- khoj/interface/compiled/_next/static/chunks/7127-d3199617463d45f0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-c9ceb9b94e24b94a.js → page-e18e67cff45758c8.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-3dc59a0df3827dc7.js → page-768a0903c4b5b06d.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/{page-2b27c7118d8d5a16.js → page-1153981cb9c4907f.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-38f1f125d7aeb4c7.js → page-a4b97dd0c2a70cfb.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/{page-26d4492fb1200e0e.js → page-44072d929427ee56.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-bf1a4e488b29fceb.js → page-4e8fdd30a3238357.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-b3f7ae1ef8871d30.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-a1f10c96366c3a4f.js → page-6a4a9050c8bddae9.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-c6bde5961098facd.js → webpack-34ac812e4e4e9a50.js} +1 -1
- khoj/interface/compiled/_next/static/css/1e9b757ee2a2b34b.css +1 -0
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +8 -9
- khoj/processor/conversation/anthropic/utils.py +30 -7
- khoj/processor/conversation/google/gemini_chat.py +10 -10
- khoj/processor/conversation/google/utils.py +20 -12
- khoj/processor/conversation/offline/chat_model.py +2 -7
- khoj/processor/conversation/openai/gpt.py +8 -9
- khoj/processor/conversation/utils.py +132 -21
- khoj/processor/operator/README.md +59 -0
- khoj/processor/operator/{operate_browser.py → __init__.py} +98 -34
- khoj/processor/operator/grounding_agent.py +229 -175
- khoj/processor/operator/grounding_agent_uitars.py +59 -48
- khoj/processor/operator/operator_actions.py +48 -0
- khoj/processor/operator/operator_agent_anthropic.py +298 -90
- khoj/processor/operator/operator_agent_base.py +45 -14
- khoj/processor/operator/operator_agent_binary.py +125 -57
- khoj/processor/operator/operator_agent_openai.py +183 -75
- khoj/processor/operator/operator_environment_base.py +11 -1
- khoj/processor/operator/operator_environment_browser.py +5 -3
- khoj/processor/operator/operator_environment_computer.py +658 -0
- khoj/routers/api_chat.py +36 -25
- khoj/routers/helpers.py +8 -17
- khoj/routers/research.py +43 -20
- khoj/utils/constants.py +4 -4
- khoj/utils/helpers.py +12 -15
- {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev144.dist-info}/METADATA +3 -1
- {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev144.dist-info}/RECORD +61 -59
- khoj/interface/compiled/_next/static/chunks/4986-9ddd694756d03aa1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e49165209d2e406c.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-64a53f8ec4afa6b3.js +0 -1
- khoj/interface/compiled/_next/static/css/bb7ea98028b368f3.css +0 -1
- /khoj/interface/compiled/_next/static/{y_k1yn7bI1CgM5ZfW7jUq → aJZTO0gnTwX0Dca_dPw4r}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{y_k1yn7bI1CgM5ZfW7jUq → aJZTO0gnTwX0Dca_dPw4r}/_ssgManifest.js +0 -0
- {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev144.dist-info}/WHEEL +0 -0
- {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev144.dist-info}/entry_points.txt +0 -0
- {khoj-1.41.1.dev107.dist-info → khoj-1.41.1.dev144.dist-info}/licenses/LICENSE +0 -0
@@ -6,13 +6,23 @@ from typing import Callable, List, Optional
|
|
6
6
|
|
7
7
|
from khoj.database.adapters import AgentAdapters, ConversationAdapters
|
8
8
|
from khoj.database.models import Agent, ChatModel, KhojUser
|
9
|
+
from khoj.processor.conversation.utils import (
|
10
|
+
OperatorRun,
|
11
|
+
construct_chat_history,
|
12
|
+
construct_chat_history_for_operator,
|
13
|
+
)
|
9
14
|
from khoj.processor.operator.operator_actions import *
|
10
15
|
from khoj.processor.operator.operator_agent_anthropic import AnthropicOperatorAgent
|
11
16
|
from khoj.processor.operator.operator_agent_base import OperatorAgent
|
12
17
|
from khoj.processor.operator.operator_agent_binary import BinaryOperatorAgent
|
13
18
|
from khoj.processor.operator.operator_agent_openai import OpenAIOperatorAgent
|
14
|
-
from khoj.processor.operator.operator_environment_base import
|
19
|
+
from khoj.processor.operator.operator_environment_base import (
|
20
|
+
Environment,
|
21
|
+
EnvironmentType,
|
22
|
+
EnvStepResult,
|
23
|
+
)
|
15
24
|
from khoj.processor.operator.operator_environment_browser import BrowserEnvironment
|
25
|
+
from khoj.processor.operator.operator_environment_computer import ComputerEnvironment
|
16
26
|
from khoj.routers.helpers import ChatEvent
|
17
27
|
from khoj.utils.helpers import timer
|
18
28
|
from khoj.utils.rawconfig import LocationData
|
@@ -20,12 +30,14 @@ from khoj.utils.rawconfig import LocationData
|
|
20
30
|
logger = logging.getLogger(__name__)
|
21
31
|
|
22
32
|
|
23
|
-
# ---
|
24
|
-
async def
|
33
|
+
# --- Main Operator Entrypoint ---
|
34
|
+
async def operate_environment(
|
25
35
|
query: str,
|
26
36
|
user: KhojUser,
|
27
37
|
conversation_log: dict,
|
28
38
|
location_data: LocationData,
|
39
|
+
previous_trajectory: Optional[OperatorRun] = None,
|
40
|
+
environment_type: EnvironmentType = EnvironmentType.COMPUTER,
|
29
41
|
send_status_func: Optional[Callable] = None,
|
30
42
|
query_images: Optional[List[str]] = None, # TODO: Handle query images
|
31
43
|
agent: Agent = None,
|
@@ -33,8 +45,11 @@ async def operate_browser(
|
|
33
45
|
cancellation_event: Optional[asyncio.Event] = None,
|
34
46
|
tracer: dict = {},
|
35
47
|
):
|
36
|
-
response,
|
37
|
-
|
48
|
+
response, user_input_message = None, None
|
49
|
+
|
50
|
+
# Only use partial previous trajectories to continue existing task
|
51
|
+
if previous_trajectory and previous_trajectory.response:
|
52
|
+
previous_trajectory = None
|
38
53
|
|
39
54
|
# Get the agent chat model
|
40
55
|
agent_chat_model = await AgentAdapters.aget_agent_chat_model(agent, user) if agent else None
|
@@ -42,16 +57,40 @@ async def operate_browser(
|
|
42
57
|
if not reasoning_model or not reasoning_model.vision_enabled:
|
43
58
|
reasoning_model = await ConversationAdapters.aget_vision_enabled_config()
|
44
59
|
if not reasoning_model:
|
45
|
-
raise ValueError(f"No vision enabled chat model found. Configure a vision chat model to operate
|
60
|
+
raise ValueError(f"No vision enabled chat model found. Configure a vision chat model to operate environment.")
|
61
|
+
|
62
|
+
# Create conversation history from conversation log
|
63
|
+
chat_history = construct_chat_history_for_operator(conversation_log)
|
46
64
|
|
47
65
|
# Initialize Agent
|
48
|
-
|
66
|
+
max_context = await ConversationAdapters.aget_max_context_size(reasoning_model, user) or 20000
|
67
|
+
max_iterations = int(os.getenv("KHOJ_OPERATOR_ITERATIONS", 100))
|
49
68
|
operator_agent: OperatorAgent
|
50
|
-
if is_operator_model(reasoning_model.name) == ChatModel.ModelType.
|
51
|
-
operator_agent =
|
52
|
-
|
53
|
-
|
54
|
-
|
69
|
+
if is_operator_model(reasoning_model.name) == ChatModel.ModelType.ANTHROPIC:
|
70
|
+
operator_agent = AnthropicOperatorAgent(
|
71
|
+
query,
|
72
|
+
reasoning_model,
|
73
|
+
environment_type,
|
74
|
+
max_iterations,
|
75
|
+
max_context,
|
76
|
+
chat_history,
|
77
|
+
previous_trajectory,
|
78
|
+
tracer,
|
79
|
+
)
|
80
|
+
# TODO: Remove once OpenAI Operator Agent is useful
|
81
|
+
elif is_operator_model(reasoning_model.name) == ChatModel.ModelType.OPENAI and False:
|
82
|
+
operator_agent = OpenAIOperatorAgent(
|
83
|
+
query,
|
84
|
+
reasoning_model,
|
85
|
+
environment_type,
|
86
|
+
max_iterations,
|
87
|
+
max_context,
|
88
|
+
chat_history,
|
89
|
+
previous_trajectory,
|
90
|
+
tracer,
|
91
|
+
)
|
92
|
+
# TODO: Remove once Binary Operator Agent is useful
|
93
|
+
elif False:
|
55
94
|
grounding_model_name = "ui-tars-1.5"
|
56
95
|
grounding_model = await ConversationAdapters.aget_chat_model_by_name(grounding_model_name)
|
57
96
|
if (
|
@@ -59,41 +98,62 @@ async def operate_browser(
|
|
59
98
|
or not grounding_model.vision_enabled
|
60
99
|
or not grounding_model.model_type == ChatModel.ModelType.OPENAI
|
61
100
|
):
|
62
|
-
raise ValueError("
|
63
|
-
operator_agent = BinaryOperatorAgent(
|
101
|
+
raise ValueError("Binary operator agent needs ui-tars-1.5 served over an OpenAI compatible API.")
|
102
|
+
operator_agent = BinaryOperatorAgent(
|
103
|
+
query,
|
104
|
+
reasoning_model,
|
105
|
+
grounding_model,
|
106
|
+
environment_type,
|
107
|
+
max_iterations,
|
108
|
+
max_context,
|
109
|
+
chat_history,
|
110
|
+
previous_trajectory,
|
111
|
+
tracer,
|
112
|
+
)
|
113
|
+
else:
|
114
|
+
raise ValueError(
|
115
|
+
f"Unsupported operator model: {reasoning_model.name}. "
|
116
|
+
"Please use a supported operator model. Only Anthropic models are currently supported."
|
117
|
+
)
|
64
118
|
|
65
119
|
# Initialize Environment
|
66
120
|
if send_status_func:
|
67
|
-
async for event in send_status_func(f"**Launching
|
121
|
+
async for event in send_status_func(f"**Launching {environment_type.value}**"):
|
68
122
|
yield {ChatEvent.STATUS: event}
|
69
|
-
|
123
|
+
if environment_type == EnvironmentType.BROWSER:
|
124
|
+
environment: Environment = BrowserEnvironment()
|
125
|
+
else:
|
126
|
+
environment = ComputerEnvironment(provider="docker")
|
70
127
|
await environment.start(width=1024, height=768)
|
71
128
|
|
72
129
|
# Start Operator Loop
|
73
130
|
try:
|
74
|
-
summarize_prompt = f"Use the results of our research to provide a comprehensive, self-contained answer for the target query:\n{query}."
|
75
131
|
task_completed = False
|
76
132
|
iterations = 0
|
133
|
+
operator_run = OperatorRun(query=query, trajectory=operator_agent.messages, response=response)
|
134
|
+
yield operator_run
|
77
135
|
|
78
|
-
with timer(
|
136
|
+
with timer(
|
137
|
+
f"Operating {environment_type.value} with {reasoning_model.model_type} {reasoning_model.name}", logger
|
138
|
+
):
|
79
139
|
while iterations < max_iterations and not task_completed:
|
80
140
|
if cancellation_event and cancellation_event.is_set():
|
81
|
-
logger.debug(f"
|
141
|
+
logger.debug(f"{environment_type.value} operator cancelled by client disconnect")
|
82
142
|
break
|
83
143
|
|
84
144
|
iterations += 1
|
85
145
|
|
86
146
|
# 1. Get current environment state
|
87
|
-
|
147
|
+
env_state = await environment.get_state()
|
88
148
|
|
89
149
|
# 2. Agent decides action(s)
|
90
|
-
agent_result = await operator_agent.act(
|
150
|
+
agent_result = await operator_agent.act(env_state)
|
91
151
|
|
92
152
|
# 3. Execute actions in the environment
|
93
153
|
env_steps: List[EnvStepResult] = []
|
94
154
|
for action in agent_result.actions:
|
95
155
|
if cancellation_event and cancellation_event.is_set():
|
96
|
-
logger.debug(f"
|
156
|
+
logger.debug(f"{environment_type.value} operator cancelled by client disconnect")
|
97
157
|
break
|
98
158
|
# Handle request for user action and break the loop
|
99
159
|
if isinstance(action, RequestUserAction):
|
@@ -106,12 +166,14 @@ async def operate_browser(
|
|
106
166
|
env_steps.append(env_step)
|
107
167
|
|
108
168
|
# Render status update
|
109
|
-
latest_screenshot =
|
169
|
+
latest_screenshot = (
|
170
|
+
f"data:image/webp;base64,{env_steps[-1].screenshot_base64 if env_steps else env_state.screenshot}"
|
171
|
+
)
|
110
172
|
render_payload = agent_result.rendered_response
|
111
173
|
render_payload["image"] = latest_screenshot
|
112
174
|
render_content = f"**Action**: {json.dumps(render_payload)}"
|
113
175
|
if send_status_func:
|
114
|
-
async for event in send_status_func(f"**Operating
|
176
|
+
async for event in send_status_func(f"**Operating {environment_type.value}**:\n{render_content}"):
|
115
177
|
yield {ChatEvent.STATUS: event}
|
116
178
|
|
117
179
|
# Check if termination conditions are met
|
@@ -123,31 +185,33 @@ async def operate_browser(
|
|
123
185
|
if task_completed or trigger_iteration_limit:
|
124
186
|
# Summarize results of operator run on last iteration
|
125
187
|
operator_agent.add_action_results(env_steps, agent_result)
|
126
|
-
summary_message = await operator_agent.summarize(
|
188
|
+
summary_message = await operator_agent.summarize(env_state)
|
127
189
|
logger.info(f"Task completed: {task_completed}, Iteration limit: {trigger_iteration_limit}")
|
128
190
|
break
|
129
191
|
|
130
192
|
# 4. Update agent on the results of its action on the environment
|
131
193
|
operator_agent.add_action_results(env_steps, agent_result)
|
194
|
+
operator_run.trajectory = operator_agent.messages
|
132
195
|
|
133
196
|
# Determine final response message
|
134
197
|
if user_input_message:
|
135
|
-
response = user_input_message
|
198
|
+
operator_run.response = user_input_message
|
136
199
|
elif task_completed:
|
137
|
-
response = summary_message
|
200
|
+
operator_run.response = summary_message
|
201
|
+
elif cancellation_event and cancellation_event.is_set():
|
202
|
+
operator_run.response = None
|
138
203
|
else: # Hit iteration limit
|
139
|
-
response = f"Operator hit iteration limit ({max_iterations}). If the results seem incomplete try again, assign a smaller task or try a different approach.\nThese were the results till now:\n{summary_message}"
|
204
|
+
operator_run.response = f"Operator hit iteration limit ({max_iterations}). If the results seem incomplete try again, assign a smaller task or try a different approach.\nThese were the results till now:\n{summary_message}"
|
140
205
|
finally:
|
141
|
-
if environment and not user_input_message: # Don't close
|
206
|
+
if environment and not user_input_message: # Don't close environment if user input required
|
142
207
|
await environment.close()
|
143
208
|
if operator_agent:
|
144
209
|
operator_agent.reset()
|
145
210
|
|
146
|
-
|
147
|
-
"
|
148
|
-
|
149
|
-
|
150
|
-
}
|
211
|
+
if environment_type == EnvironmentType.BROWSER and hasattr(environment, "visited_urls"):
|
212
|
+
operator_run.webpages = [{"link": url, "snippet": ""} for url in environment.visited_urls]
|
213
|
+
|
214
|
+
yield operator_run
|
151
215
|
|
152
216
|
|
153
217
|
def is_operator_model(model: str) -> ChatModel.ModelType | None:
|