khoj 1.42.9.dev26__py3-none-any.whl → 1.42.10.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +0 -20
- khoj/database/models/__init__.py +0 -1
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/app/chat/page-4c6b873a4a5c7d2f.js +1 -0
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +3 -3
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +4 -4
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/content/markdown/markdown_to_entries.py +9 -38
- khoj/processor/content/org_mode/org_to_entries.py +2 -18
- khoj/processor/content/org_mode/orgnode.py +16 -18
- khoj/processor/content/text_to_entries.py +0 -30
- khoj/processor/conversation/anthropic/anthropic_chat.py +2 -11
- khoj/processor/conversation/anthropic/utils.py +103 -90
- khoj/processor/conversation/google/gemini_chat.py +1 -4
- khoj/processor/conversation/google/utils.py +18 -80
- khoj/processor/conversation/offline/chat_model.py +3 -3
- khoj/processor/conversation/openai/gpt.py +38 -13
- khoj/processor/conversation/openai/utils.py +12 -113
- khoj/processor/conversation/prompts.py +35 -17
- khoj/processor/conversation/utils.py +58 -129
- khoj/processor/operator/grounding_agent.py +1 -1
- khoj/processor/operator/operator_agent_binary.py +3 -4
- khoj/processor/tools/online_search.py +0 -18
- khoj/processor/tools/run_code.py +1 -1
- khoj/routers/api_chat.py +1 -1
- khoj/routers/api_content.py +6 -6
- khoj/routers/helpers.py +27 -297
- khoj/routers/research.py +155 -169
- khoj/search_type/text_search.py +0 -2
- khoj/utils/helpers.py +8 -284
- khoj/utils/initialization.py +2 -0
- khoj/utils/rawconfig.py +0 -11
- {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/METADATA +1 -1
- {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/RECORD +57 -57
- khoj/interface/compiled/_next/static/chunks/app/chat/page-76fc915800aa90f4.js +0 -1
- /khoj/interface/compiled/_next/static/chunks/{1327-3b1a41af530fa8ee.js → 1327-1a9107b9a2a04a98.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1915-fbfe167c84ad60c5.js → 1915-5c6508f6ebb62a30.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2117-e78b6902ad6f75ec.js → 2117-080746c8e170c81a.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2939-4d4084c5b888b960.js → 2939-4af3fd24b8ffc9ad.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{4447-d6cf93724d57e34b.js → 4447-cd95608f8e93e711.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{8667-4b7790573b08c50d.js → 8667-50b03a89e82e0ba7.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{webpack-70e0762712341826.js → webpack-92ce8aaf95718ec4.js} +0 -0
- /khoj/interface/compiled/_next/static/{IYGyer2N7GdUJ7QHFghtY → cuzJcS32_a4L4a6gCZ63y}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{IYGyer2N7GdUJ7QHFghtY → cuzJcS32_a4L4a6gCZ63y}/_ssgManifest.js +0 -0
- {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/WHEEL +0 -0
- {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/entry_points.txt +0 -0
- {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/licenses/LICENSE +0 -0
khoj/routers/research.py
CHANGED
@@ -3,9 +3,11 @@ import logging
|
|
3
3
|
import os
|
4
4
|
from copy import deepcopy
|
5
5
|
from datetime import datetime
|
6
|
-
from
|
6
|
+
from enum import Enum
|
7
|
+
from typing import Callable, Dict, List, Optional, Type
|
7
8
|
|
8
9
|
import yaml
|
10
|
+
from pydantic import BaseModel, Field
|
9
11
|
|
10
12
|
from khoj.database.adapters import AgentAdapters, EntryAdapters
|
11
13
|
from khoj.database.models import Agent, ChatMessageModel, KhojUser
|
@@ -13,31 +15,25 @@ from khoj.processor.conversation import prompts
|
|
13
15
|
from khoj.processor.conversation.utils import (
|
14
16
|
OperatorRun,
|
15
17
|
ResearchIteration,
|
16
|
-
ToolCall,
|
17
18
|
construct_iteration_history,
|
18
19
|
construct_tool_chat_history,
|
19
20
|
load_complex_json,
|
20
21
|
)
|
21
22
|
from khoj.processor.operator import operate_environment
|
22
|
-
from khoj.processor.tools.online_search import
|
23
|
+
from khoj.processor.tools.online_search import read_webpages, search_online
|
23
24
|
from khoj.processor.tools.run_code import run_code
|
24
25
|
from khoj.routers.helpers import (
|
25
26
|
ChatEvent,
|
26
27
|
generate_summary_from_files,
|
27
|
-
grep_files,
|
28
|
-
list_files,
|
29
28
|
search_documents,
|
30
29
|
send_message_to_model_wrapper,
|
31
|
-
view_file_content,
|
32
30
|
)
|
33
31
|
from khoj.utils.helpers import (
|
34
32
|
ConversationCommand,
|
35
|
-
ToolDefinition,
|
36
|
-
dict_to_tuple,
|
37
33
|
is_none_or_empty,
|
38
34
|
is_operator_enabled,
|
39
35
|
timer,
|
40
|
-
|
36
|
+
tool_description_for_research_llm,
|
41
37
|
truncate_code_context,
|
42
38
|
)
|
43
39
|
from khoj.utils.rawconfig import LocationData
|
@@ -45,6 +41,47 @@ from khoj.utils.rawconfig import LocationData
|
|
45
41
|
logger = logging.getLogger(__name__)
|
46
42
|
|
47
43
|
|
44
|
+
class PlanningResponse(BaseModel):
|
45
|
+
"""
|
46
|
+
Schema for the response from planning agent when deciding the next tool to pick.
|
47
|
+
"""
|
48
|
+
|
49
|
+
scratchpad: str = Field(..., description="Scratchpad to reason about which tool to use next")
|
50
|
+
|
51
|
+
class Config:
|
52
|
+
arbitrary_types_allowed = True
|
53
|
+
|
54
|
+
@classmethod
|
55
|
+
def create_model_with_enum(cls: Type["PlanningResponse"], tool_options: dict) -> Type["PlanningResponse"]:
|
56
|
+
"""
|
57
|
+
Factory method that creates a customized PlanningResponse model
|
58
|
+
with a properly typed tool field based on available tools.
|
59
|
+
|
60
|
+
The tool field is dynamically generated based on available tools.
|
61
|
+
The query field should be filled by the model after the tool field for a more logical reasoning flow.
|
62
|
+
|
63
|
+
Args:
|
64
|
+
tool_options: Dictionary mapping tool names to values
|
65
|
+
|
66
|
+
Returns:
|
67
|
+
A customized PlanningResponse class
|
68
|
+
"""
|
69
|
+
# Create dynamic enum from tool options
|
70
|
+
tool_enum = Enum("ToolEnum", tool_options) # type: ignore
|
71
|
+
|
72
|
+
# Create and return a customized response model with the enum
|
73
|
+
class PlanningResponseWithTool(PlanningResponse):
|
74
|
+
"""
|
75
|
+
Use the scratchpad to reason about which tool to use next and the query to send to the tool.
|
76
|
+
Pick tool from provided options and your query to send to the tool.
|
77
|
+
"""
|
78
|
+
|
79
|
+
tool: tool_enum = Field(..., description="Name of the tool to use")
|
80
|
+
query: str = Field(..., description="Detailed query for the selected tool")
|
81
|
+
|
82
|
+
return PlanningResponseWithTool
|
83
|
+
|
84
|
+
|
48
85
|
async def apick_next_tool(
|
49
86
|
query: str,
|
50
87
|
conversation_history: List[ChatMessageModel],
|
@@ -67,13 +104,12 @@ async def apick_next_tool(
|
|
67
104
|
# Continue with previous iteration if a multi-step tool use is in progress
|
68
105
|
if (
|
69
106
|
previous_iterations
|
70
|
-
and previous_iterations[-1].
|
71
|
-
and isinstance(previous_iterations[-1].query, ToolCall)
|
72
|
-
and previous_iterations[-1].query.name == ConversationCommand.Operator
|
107
|
+
and previous_iterations[-1].tool == ConversationCommand.Operator
|
73
108
|
and not previous_iterations[-1].summarizedResult
|
74
109
|
):
|
75
110
|
previous_iteration = previous_iterations[-1]
|
76
111
|
yield ResearchIteration(
|
112
|
+
tool=previous_iteration.tool,
|
77
113
|
query=query,
|
78
114
|
context=previous_iteration.context,
|
79
115
|
onlineContext=previous_iteration.onlineContext,
|
@@ -84,40 +120,30 @@ async def apick_next_tool(
|
|
84
120
|
return
|
85
121
|
|
86
122
|
# Construct tool options for the agent to choose from
|
87
|
-
|
123
|
+
tool_options = dict()
|
88
124
|
tool_options_str = ""
|
89
125
|
agent_tools = agent.input_tools if agent else []
|
90
126
|
user_has_entries = await EntryAdapters.auser_has_entries(user)
|
91
|
-
for tool,
|
127
|
+
for tool, description in tool_description_for_research_llm.items():
|
92
128
|
# Skip showing operator tool as an option if not enabled
|
93
129
|
if tool == ConversationCommand.Operator and not is_operator_enabled():
|
94
130
|
continue
|
95
|
-
# Skip showing
|
96
|
-
if
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
description = tool_data.description.format(max_search_queries=max_document_searches)
|
105
|
-
elif tool == ConversationCommand.Webpage:
|
106
|
-
description = tool_data.description.format(max_webpages_to_read=max_webpages_to_read)
|
107
|
-
elif tool == ConversationCommand.Online:
|
108
|
-
description = tool_data.description.format(max_search_queries=max_online_searches)
|
109
|
-
else:
|
110
|
-
description = tool_data.description
|
131
|
+
# Skip showing Notes tool as an option if user has no entries
|
132
|
+
if tool == ConversationCommand.Notes:
|
133
|
+
if not user_has_entries:
|
134
|
+
continue
|
135
|
+
description = description.format(max_search_queries=max_document_searches)
|
136
|
+
if tool == ConversationCommand.Webpage:
|
137
|
+
description = description.format(max_webpages_to_read=max_webpages_to_read)
|
138
|
+
if tool == ConversationCommand.Online:
|
139
|
+
description = description.format(max_search_queries=max_online_searches)
|
111
140
|
# Add tool if agent does not have any tools defined or the tool is supported by the agent.
|
112
141
|
if len(agent_tools) == 0 or tool.value in agent_tools:
|
142
|
+
tool_options[tool.name] = tool.value
|
113
143
|
tool_options_str += f'- "{tool.value}": "{description}"\n'
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
description=description,
|
118
|
-
schema=tool_data.schema,
|
119
|
-
)
|
120
|
-
)
|
144
|
+
|
145
|
+
# Create planning reponse model with dynamically populated tool enum class
|
146
|
+
planning_response_model = PlanningResponse.create_model_with_enum(tool_options)
|
121
147
|
|
122
148
|
today = datetime.today()
|
123
149
|
location_data = f"{location}" if location else "Unknown"
|
@@ -136,17 +162,24 @@ async def apick_next_tool(
|
|
136
162
|
max_iterations=max_iterations,
|
137
163
|
)
|
138
164
|
|
165
|
+
if query_images:
|
166
|
+
query = f"[placeholder for user attached images]\n{query}"
|
167
|
+
|
139
168
|
# Construct chat history with user and iteration history with researcher agent for context
|
140
|
-
iteration_chat_history = construct_iteration_history(previous_iterations,
|
169
|
+
iteration_chat_history = construct_iteration_history(previous_iterations, prompts.previous_iteration, query)
|
141
170
|
chat_and_research_history = conversation_history + iteration_chat_history
|
142
171
|
|
172
|
+
# Plan function execution for the next tool
|
173
|
+
query = prompts.plan_function_execution_next_tool.format(query=query) if iteration_chat_history else query
|
174
|
+
|
143
175
|
try:
|
144
176
|
with timer("Chat actor: Infer information sources to refer", logger):
|
145
177
|
response = await send_message_to_model_wrapper(
|
146
|
-
query=
|
178
|
+
query=query,
|
147
179
|
system_message=function_planning_prompt,
|
148
180
|
chat_history=chat_and_research_history,
|
149
|
-
|
181
|
+
response_type="json_object",
|
182
|
+
response_schema=planning_response_model,
|
150
183
|
deepthought=True,
|
151
184
|
user=user,
|
152
185
|
query_images=query_images,
|
@@ -157,38 +190,48 @@ async def apick_next_tool(
|
|
157
190
|
except Exception as e:
|
158
191
|
logger.error(f"Failed to infer information sources to refer: {e}", exc_info=True)
|
159
192
|
yield ResearchIteration(
|
193
|
+
tool=None,
|
160
194
|
query=None,
|
161
195
|
warning="Failed to infer information sources to refer. Skipping iteration. Try again.",
|
162
196
|
)
|
163
197
|
return
|
164
198
|
|
165
199
|
try:
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
async for event in send_status_func(response.thought):
|
189
|
-
yield {ChatEvent.STATUS: event}
|
200
|
+
response = load_complex_json(response)
|
201
|
+
if not isinstance(response, dict):
|
202
|
+
raise ValueError(f"Expected dict response, got {type(response).__name__}: {response}")
|
203
|
+
selected_tool = response.get("tool", None)
|
204
|
+
generated_query = response.get("query", None)
|
205
|
+
scratchpad = response.get("scratchpad", None)
|
206
|
+
warning = None
|
207
|
+
logger.info(f"Response for determining relevant tools: {response}")
|
208
|
+
|
209
|
+
# Detect selection of previously used query, tool combination.
|
210
|
+
previous_tool_query_combinations = {(i.tool, i.query) for i in previous_iterations if i.warning is None}
|
211
|
+
if (selected_tool, generated_query) in previous_tool_query_combinations:
|
212
|
+
warning = f"Repeated tool, query combination detected. Skipping iteration. Try something different."
|
213
|
+
# Only send client status updates if we'll execute this iteration
|
214
|
+
elif send_status_func:
|
215
|
+
determined_tool_message = "**Determined Tool**: "
|
216
|
+
determined_tool_message += (
|
217
|
+
f"{selected_tool}({generated_query})." if selected_tool != ConversationCommand.Text else "respond."
|
218
|
+
)
|
219
|
+
determined_tool_message += f"\nReason: {scratchpad}" if scratchpad else ""
|
220
|
+
async for event in send_status_func(f"{scratchpad}"):
|
221
|
+
yield {ChatEvent.STATUS: event}
|
190
222
|
|
191
|
-
|
223
|
+
yield ResearchIteration(
|
224
|
+
tool=selected_tool,
|
225
|
+
query=generated_query,
|
226
|
+
warning=warning,
|
227
|
+
)
|
228
|
+
except Exception as e:
|
229
|
+
logger.error(f"Invalid response for determining relevant tools: {response}. {e}", exc_info=True)
|
230
|
+
yield ResearchIteration(
|
231
|
+
tool=None,
|
232
|
+
query=None,
|
233
|
+
warning=f"Invalid response for determining relevant tools: {response}. Skipping iteration. Fix error: {e}",
|
234
|
+
)
|
192
235
|
|
193
236
|
|
194
237
|
async def research(
|
@@ -214,10 +257,10 @@ async def research(
|
|
214
257
|
MAX_ITERATIONS = int(os.getenv("KHOJ_RESEARCH_ITERATIONS", 5))
|
215
258
|
|
216
259
|
# Incorporate previous partial research into current research chat history
|
217
|
-
research_conversation_history =
|
260
|
+
research_conversation_history = deepcopy(conversation_history)
|
218
261
|
if current_iteration := len(previous_iterations) > 0:
|
219
262
|
logger.info(f"Continuing research with the previous {len(previous_iterations)} iteration results.")
|
220
|
-
previous_iterations_history = construct_iteration_history(previous_iterations)
|
263
|
+
previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
|
221
264
|
research_conversation_history += previous_iterations_history
|
222
265
|
|
223
266
|
while current_iteration < MAX_ITERATIONS:
|
@@ -230,7 +273,7 @@ async def research(
|
|
230
273
|
code_results: Dict = dict()
|
231
274
|
document_results: List[Dict[str, str]] = []
|
232
275
|
operator_results: OperatorRun = None
|
233
|
-
this_iteration = ResearchIteration(query=query)
|
276
|
+
this_iteration = ResearchIteration(tool=None, query=query)
|
234
277
|
|
235
278
|
async for result in apick_next_tool(
|
236
279
|
query,
|
@@ -260,30 +303,26 @@ async def research(
|
|
260
303
|
logger.warning(f"Research mode: {this_iteration.warning}.")
|
261
304
|
|
262
305
|
# Terminate research if selected text tool or query, tool not set for next iteration
|
263
|
-
elif
|
264
|
-
not this_iteration.query
|
265
|
-
or isinstance(this_iteration.query, str)
|
266
|
-
or this_iteration.query.name == ConversationCommand.Text
|
267
|
-
):
|
306
|
+
elif not this_iteration.query or not this_iteration.tool or this_iteration.tool == ConversationCommand.Text:
|
268
307
|
current_iteration = MAX_ITERATIONS
|
269
308
|
|
270
|
-
elif this_iteration.
|
309
|
+
elif this_iteration.tool == ConversationCommand.Notes:
|
271
310
|
this_iteration.context = []
|
272
311
|
document_results = []
|
273
312
|
previous_inferred_queries = {
|
274
313
|
c["query"] for iteration in previous_iterations if iteration.context for c in iteration.context
|
275
314
|
}
|
276
315
|
async for result in search_documents(
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
user
|
281
|
-
|
282
|
-
conversation_id
|
283
|
-
|
284
|
-
|
285
|
-
send_status_func
|
286
|
-
query_images
|
316
|
+
this_iteration.query,
|
317
|
+
max_document_searches,
|
318
|
+
None,
|
319
|
+
user,
|
320
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Notes),
|
321
|
+
conversation_id,
|
322
|
+
[ConversationCommand.Default],
|
323
|
+
location,
|
324
|
+
send_status_func,
|
325
|
+
query_images,
|
287
326
|
previous_inferred_queries=previous_inferred_queries,
|
288
327
|
agent=agent,
|
289
328
|
tracer=tracer,
|
@@ -311,7 +350,7 @@ async def research(
|
|
311
350
|
else:
|
312
351
|
this_iteration.warning = "No matching document references found"
|
313
352
|
|
314
|
-
elif this_iteration.
|
353
|
+
elif this_iteration.tool == ConversationCommand.Online:
|
315
354
|
previous_subqueries = {
|
316
355
|
subquery
|
317
356
|
for iteration in previous_iterations
|
@@ -320,12 +359,12 @@ async def research(
|
|
320
359
|
}
|
321
360
|
try:
|
322
361
|
async for result in search_online(
|
323
|
-
|
324
|
-
|
325
|
-
location
|
326
|
-
user
|
327
|
-
send_status_func
|
328
|
-
|
362
|
+
this_iteration.query,
|
363
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Online),
|
364
|
+
location,
|
365
|
+
user,
|
366
|
+
send_status_func,
|
367
|
+
[],
|
329
368
|
max_online_searches=max_online_searches,
|
330
369
|
max_webpages_to_read=0,
|
331
370
|
query_images=query_images,
|
@@ -344,15 +383,19 @@ async def research(
|
|
344
383
|
this_iteration.warning = f"Error searching online: {e}"
|
345
384
|
logger.error(this_iteration.warning, exc_info=True)
|
346
385
|
|
347
|
-
elif this_iteration.
|
386
|
+
elif this_iteration.tool == ConversationCommand.Webpage:
|
348
387
|
try:
|
349
|
-
async for result in
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
388
|
+
async for result in read_webpages(
|
389
|
+
this_iteration.query,
|
390
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Webpage),
|
391
|
+
location,
|
392
|
+
user,
|
393
|
+
send_status_func,
|
394
|
+
max_webpages_to_read=max_webpages_to_read,
|
395
|
+
query_images=query_images,
|
354
396
|
agent=agent,
|
355
397
|
tracer=tracer,
|
398
|
+
query_files=query_files,
|
356
399
|
):
|
357
400
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
358
401
|
yield result[ChatEvent.STATUS]
|
@@ -373,15 +416,15 @@ async def research(
|
|
373
416
|
this_iteration.warning = f"Error reading webpages: {e}"
|
374
417
|
logger.error(this_iteration.warning, exc_info=True)
|
375
418
|
|
376
|
-
elif this_iteration.
|
419
|
+
elif this_iteration.tool == ConversationCommand.Code:
|
377
420
|
try:
|
378
421
|
async for result in run_code(
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
user
|
384
|
-
send_status_func
|
422
|
+
this_iteration.query,
|
423
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Code),
|
424
|
+
"",
|
425
|
+
location,
|
426
|
+
user,
|
427
|
+
send_status_func,
|
385
428
|
query_images=query_images,
|
386
429
|
agent=agent,
|
387
430
|
query_files=query_files,
|
@@ -398,14 +441,14 @@ async def research(
|
|
398
441
|
this_iteration.warning = f"Error running code: {e}"
|
399
442
|
logger.warning(this_iteration.warning, exc_info=True)
|
400
443
|
|
401
|
-
elif this_iteration.
|
444
|
+
elif this_iteration.tool == ConversationCommand.Operator:
|
402
445
|
try:
|
403
446
|
async for result in operate_environment(
|
404
|
-
|
405
|
-
user
|
406
|
-
|
407
|
-
|
408
|
-
|
447
|
+
this_iteration.query,
|
448
|
+
user,
|
449
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Operator),
|
450
|
+
location,
|
451
|
+
previous_iterations[-1].operatorContext if previous_iterations else None,
|
409
452
|
send_status_func=send_status_func,
|
410
453
|
query_images=query_images,
|
411
454
|
agent=agent,
|
@@ -431,63 +474,6 @@ async def research(
|
|
431
474
|
this_iteration.warning = f"Error operating browser: {e}"
|
432
475
|
logger.error(this_iteration.warning, exc_info=True)
|
433
476
|
|
434
|
-
elif this_iteration.query.name == ConversationCommand.ViewFile:
|
435
|
-
try:
|
436
|
-
async for result in view_file_content(
|
437
|
-
**this_iteration.query.args,
|
438
|
-
user=user,
|
439
|
-
):
|
440
|
-
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
441
|
-
yield result[ChatEvent.STATUS]
|
442
|
-
else:
|
443
|
-
if this_iteration.context is None:
|
444
|
-
this_iteration.context = []
|
445
|
-
document_results: List[Dict[str, str]] = result # type: ignore
|
446
|
-
this_iteration.context += document_results
|
447
|
-
async for result in send_status_func(f"**Viewed file**: {this_iteration.query.args['path']}"):
|
448
|
-
yield result
|
449
|
-
except Exception as e:
|
450
|
-
this_iteration.warning = f"Error viewing file: {e}"
|
451
|
-
logger.error(this_iteration.warning, exc_info=True)
|
452
|
-
|
453
|
-
elif this_iteration.query.name == ConversationCommand.ListFiles:
|
454
|
-
try:
|
455
|
-
async for result in list_files(
|
456
|
-
**this_iteration.query.args,
|
457
|
-
user=user,
|
458
|
-
):
|
459
|
-
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
460
|
-
yield result[ChatEvent.STATUS]
|
461
|
-
else:
|
462
|
-
if this_iteration.context is None:
|
463
|
-
this_iteration.context = []
|
464
|
-
document_results: List[Dict[str, str]] = [result] # type: ignore
|
465
|
-
this_iteration.context += document_results
|
466
|
-
async for result in send_status_func(result["query"]):
|
467
|
-
yield result
|
468
|
-
except Exception as e:
|
469
|
-
this_iteration.warning = f"Error listing files: {e}"
|
470
|
-
logger.error(this_iteration.warning, exc_info=True)
|
471
|
-
|
472
|
-
elif this_iteration.query.name == ConversationCommand.RegexSearchFiles:
|
473
|
-
try:
|
474
|
-
async for result in grep_files(
|
475
|
-
**this_iteration.query.args,
|
476
|
-
user=user,
|
477
|
-
):
|
478
|
-
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
479
|
-
yield result[ChatEvent.STATUS]
|
480
|
-
else:
|
481
|
-
if this_iteration.context is None:
|
482
|
-
this_iteration.context = []
|
483
|
-
document_results: List[Dict[str, str]] = [result] # type: ignore
|
484
|
-
this_iteration.context += document_results
|
485
|
-
async for result in send_status_func(result["query"]):
|
486
|
-
yield result
|
487
|
-
except Exception as e:
|
488
|
-
this_iteration.warning = f"Error searching with regex: {e}"
|
489
|
-
logger.error(this_iteration.warning, exc_info=True)
|
490
|
-
|
491
477
|
else:
|
492
478
|
# No valid tools. This is our exit condition.
|
493
479
|
current_iteration = MAX_ITERATIONS
|
@@ -495,7 +481,7 @@ async def research(
|
|
495
481
|
current_iteration += 1
|
496
482
|
|
497
483
|
if document_results or online_results or code_results or operator_results or this_iteration.warning:
|
498
|
-
results_data = f"\n<
|
484
|
+
results_data = f"\n<iteration>{current_iteration}\n<tool>{this_iteration.tool}</tool>\n<query>{this_iteration.query}</query>\n<results>"
|
499
485
|
if document_results:
|
500
486
|
results_data += f"\n<document_references>\n{yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</document_references>"
|
501
487
|
if online_results:
|
@@ -508,7 +494,7 @@ async def research(
|
|
508
494
|
)
|
509
495
|
if this_iteration.warning:
|
510
496
|
results_data += f"\n<warning>\n{this_iteration.warning}\n</warning>"
|
511
|
-
results_data +=
|
497
|
+
results_data += "\n</results>\n</iteration>"
|
512
498
|
|
513
499
|
# intermediate_result = await extract_relevant_info(this_iteration.query, results_data, agent)
|
514
500
|
this_iteration.summarizedResult = results_data
|
khoj/search_type/text_search.py
CHANGED
@@ -157,7 +157,6 @@ def collate_results(hits, dedupe=True):
|
|
157
157
|
"additional": {
|
158
158
|
"source": hit.file_source,
|
159
159
|
"file": hit.file_path,
|
160
|
-
"uri": hit.url,
|
161
160
|
"compiled": hit.compiled,
|
162
161
|
"heading": hit.heading,
|
163
162
|
},
|
@@ -181,7 +180,6 @@ def deduplicated_search_responses(hits: List[SearchResponse]):
|
|
181
180
|
"additional": {
|
182
181
|
"source": hit.additional["source"],
|
183
182
|
"file": hit.additional["file"],
|
184
|
-
"uri": hit.additional["uri"],
|
185
183
|
"query": hit.additional["query"],
|
186
184
|
"compiled": hit.additional["compiled"],
|
187
185
|
"heading": hit.additional["heading"],
|