khoj 1.42.9.dev26__py3-none-any.whl → 1.42.10.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. khoj/database/adapters/__init__.py +0 -20
  2. khoj/database/models/__init__.py +0 -1
  3. khoj/interface/compiled/404/index.html +2 -2
  4. khoj/interface/compiled/_next/static/chunks/app/chat/page-4c6b873a4a5c7d2f.js +1 -0
  5. khoj/interface/compiled/agents/index.html +2 -2
  6. khoj/interface/compiled/agents/index.txt +2 -2
  7. khoj/interface/compiled/automations/index.html +2 -2
  8. khoj/interface/compiled/automations/index.txt +3 -3
  9. khoj/interface/compiled/chat/index.html +2 -2
  10. khoj/interface/compiled/chat/index.txt +2 -2
  11. khoj/interface/compiled/index.html +2 -2
  12. khoj/interface/compiled/index.txt +2 -2
  13. khoj/interface/compiled/search/index.html +2 -2
  14. khoj/interface/compiled/search/index.txt +2 -2
  15. khoj/interface/compiled/settings/index.html +2 -2
  16. khoj/interface/compiled/settings/index.txt +4 -4
  17. khoj/interface/compiled/share/chat/index.html +2 -2
  18. khoj/interface/compiled/share/chat/index.txt +2 -2
  19. khoj/processor/content/markdown/markdown_to_entries.py +9 -38
  20. khoj/processor/content/org_mode/org_to_entries.py +2 -18
  21. khoj/processor/content/org_mode/orgnode.py +16 -18
  22. khoj/processor/content/text_to_entries.py +0 -30
  23. khoj/processor/conversation/anthropic/anthropic_chat.py +2 -11
  24. khoj/processor/conversation/anthropic/utils.py +103 -90
  25. khoj/processor/conversation/google/gemini_chat.py +1 -4
  26. khoj/processor/conversation/google/utils.py +18 -80
  27. khoj/processor/conversation/offline/chat_model.py +3 -3
  28. khoj/processor/conversation/openai/gpt.py +38 -13
  29. khoj/processor/conversation/openai/utils.py +12 -113
  30. khoj/processor/conversation/prompts.py +35 -17
  31. khoj/processor/conversation/utils.py +58 -129
  32. khoj/processor/operator/grounding_agent.py +1 -1
  33. khoj/processor/operator/operator_agent_binary.py +3 -4
  34. khoj/processor/tools/online_search.py +0 -18
  35. khoj/processor/tools/run_code.py +1 -1
  36. khoj/routers/api_chat.py +1 -1
  37. khoj/routers/api_content.py +6 -6
  38. khoj/routers/helpers.py +27 -297
  39. khoj/routers/research.py +155 -169
  40. khoj/search_type/text_search.py +0 -2
  41. khoj/utils/helpers.py +8 -284
  42. khoj/utils/initialization.py +2 -0
  43. khoj/utils/rawconfig.py +0 -11
  44. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/METADATA +1 -1
  45. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/RECORD +57 -57
  46. khoj/interface/compiled/_next/static/chunks/app/chat/page-76fc915800aa90f4.js +0 -1
  47. /khoj/interface/compiled/_next/static/chunks/{1327-3b1a41af530fa8ee.js → 1327-1a9107b9a2a04a98.js} +0 -0
  48. /khoj/interface/compiled/_next/static/chunks/{1915-fbfe167c84ad60c5.js → 1915-5c6508f6ebb62a30.js} +0 -0
  49. /khoj/interface/compiled/_next/static/chunks/{2117-e78b6902ad6f75ec.js → 2117-080746c8e170c81a.js} +0 -0
  50. /khoj/interface/compiled/_next/static/chunks/{2939-4d4084c5b888b960.js → 2939-4af3fd24b8ffc9ad.js} +0 -0
  51. /khoj/interface/compiled/_next/static/chunks/{4447-d6cf93724d57e34b.js → 4447-cd95608f8e93e711.js} +0 -0
  52. /khoj/interface/compiled/_next/static/chunks/{8667-4b7790573b08c50d.js → 8667-50b03a89e82e0ba7.js} +0 -0
  53. /khoj/interface/compiled/_next/static/chunks/{webpack-70e0762712341826.js → webpack-92ce8aaf95718ec4.js} +0 -0
  54. /khoj/interface/compiled/_next/static/{IYGyer2N7GdUJ7QHFghtY → cuzJcS32_a4L4a6gCZ63y}/_buildManifest.js +0 -0
  55. /khoj/interface/compiled/_next/static/{IYGyer2N7GdUJ7QHFghtY → cuzJcS32_a4L4a6gCZ63y}/_ssgManifest.js +0 -0
  56. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/WHEEL +0 -0
  57. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/entry_points.txt +0 -0
  58. {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/licenses/LICENSE +0 -0
khoj/routers/research.py CHANGED
@@ -3,9 +3,11 @@ import logging
3
3
  import os
4
4
  from copy import deepcopy
5
5
  from datetime import datetime
6
- from typing import Callable, Dict, List, Optional
6
+ from enum import Enum
7
+ from typing import Callable, Dict, List, Optional, Type
7
8
 
8
9
  import yaml
10
+ from pydantic import BaseModel, Field
9
11
 
10
12
  from khoj.database.adapters import AgentAdapters, EntryAdapters
11
13
  from khoj.database.models import Agent, ChatMessageModel, KhojUser
@@ -13,31 +15,25 @@ from khoj.processor.conversation import prompts
13
15
  from khoj.processor.conversation.utils import (
14
16
  OperatorRun,
15
17
  ResearchIteration,
16
- ToolCall,
17
18
  construct_iteration_history,
18
19
  construct_tool_chat_history,
19
20
  load_complex_json,
20
21
  )
21
22
  from khoj.processor.operator import operate_environment
22
- from khoj.processor.tools.online_search import read_webpages_content, search_online
23
+ from khoj.processor.tools.online_search import read_webpages, search_online
23
24
  from khoj.processor.tools.run_code import run_code
24
25
  from khoj.routers.helpers import (
25
26
  ChatEvent,
26
27
  generate_summary_from_files,
27
- grep_files,
28
- list_files,
29
28
  search_documents,
30
29
  send_message_to_model_wrapper,
31
- view_file_content,
32
30
  )
33
31
  from khoj.utils.helpers import (
34
32
  ConversationCommand,
35
- ToolDefinition,
36
- dict_to_tuple,
37
33
  is_none_or_empty,
38
34
  is_operator_enabled,
39
35
  timer,
40
- tools_for_research_llm,
36
+ tool_description_for_research_llm,
41
37
  truncate_code_context,
42
38
  )
43
39
  from khoj.utils.rawconfig import LocationData
@@ -45,6 +41,47 @@ from khoj.utils.rawconfig import LocationData
45
41
  logger = logging.getLogger(__name__)
46
42
 
47
43
 
44
+ class PlanningResponse(BaseModel):
45
+ """
46
+ Schema for the response from planning agent when deciding the next tool to pick.
47
+ """
48
+
49
+ scratchpad: str = Field(..., description="Scratchpad to reason about which tool to use next")
50
+
51
+ class Config:
52
+ arbitrary_types_allowed = True
53
+
54
+ @classmethod
55
+ def create_model_with_enum(cls: Type["PlanningResponse"], tool_options: dict) -> Type["PlanningResponse"]:
56
+ """
57
+ Factory method that creates a customized PlanningResponse model
58
+ with a properly typed tool field based on available tools.
59
+
60
+ The tool field is dynamically generated based on available tools.
61
+ The query field should be filled by the model after the tool field for a more logical reasoning flow.
62
+
63
+ Args:
64
+ tool_options: Dictionary mapping tool names to values
65
+
66
+ Returns:
67
+ A customized PlanningResponse class
68
+ """
69
+ # Create dynamic enum from tool options
70
+ tool_enum = Enum("ToolEnum", tool_options) # type: ignore
71
+
72
+ # Create and return a customized response model with the enum
73
+ class PlanningResponseWithTool(PlanningResponse):
74
+ """
75
+ Use the scratchpad to reason about which tool to use next and the query to send to the tool.
76
+ Pick tool from provided options and your query to send to the tool.
77
+ """
78
+
79
+ tool: tool_enum = Field(..., description="Name of the tool to use")
80
+ query: str = Field(..., description="Detailed query for the selected tool")
81
+
82
+ return PlanningResponseWithTool
83
+
84
+
48
85
  async def apick_next_tool(
49
86
  query: str,
50
87
  conversation_history: List[ChatMessageModel],
@@ -67,13 +104,12 @@ async def apick_next_tool(
67
104
  # Continue with previous iteration if a multi-step tool use is in progress
68
105
  if (
69
106
  previous_iterations
70
- and previous_iterations[-1].query
71
- and isinstance(previous_iterations[-1].query, ToolCall)
72
- and previous_iterations[-1].query.name == ConversationCommand.Operator
107
+ and previous_iterations[-1].tool == ConversationCommand.Operator
73
108
  and not previous_iterations[-1].summarizedResult
74
109
  ):
75
110
  previous_iteration = previous_iterations[-1]
76
111
  yield ResearchIteration(
112
+ tool=previous_iteration.tool,
77
113
  query=query,
78
114
  context=previous_iteration.context,
79
115
  onlineContext=previous_iteration.onlineContext,
@@ -84,40 +120,30 @@ async def apick_next_tool(
84
120
  return
85
121
 
86
122
  # Construct tool options for the agent to choose from
87
- tools = []
123
+ tool_options = dict()
88
124
  tool_options_str = ""
89
125
  agent_tools = agent.input_tools if agent else []
90
126
  user_has_entries = await EntryAdapters.auser_has_entries(user)
91
- for tool, tool_data in tools_for_research_llm.items():
127
+ for tool, description in tool_description_for_research_llm.items():
92
128
  # Skip showing operator tool as an option if not enabled
93
129
  if tool == ConversationCommand.Operator and not is_operator_enabled():
94
130
  continue
95
- # Skip showing document related tools if user has no documents
96
- if (
97
- tool == ConversationCommand.SemanticSearchFiles
98
- or tool == ConversationCommand.RegexSearchFiles
99
- or tool == ConversationCommand.ViewFile
100
- or tool == ConversationCommand.ListFiles
101
- ) and not user_has_entries:
102
- continue
103
- if tool == ConversationCommand.SemanticSearchFiles:
104
- description = tool_data.description.format(max_search_queries=max_document_searches)
105
- elif tool == ConversationCommand.Webpage:
106
- description = tool_data.description.format(max_webpages_to_read=max_webpages_to_read)
107
- elif tool == ConversationCommand.Online:
108
- description = tool_data.description.format(max_search_queries=max_online_searches)
109
- else:
110
- description = tool_data.description
131
+ # Skip showing Notes tool as an option if user has no entries
132
+ if tool == ConversationCommand.Notes:
133
+ if not user_has_entries:
134
+ continue
135
+ description = description.format(max_search_queries=max_document_searches)
136
+ if tool == ConversationCommand.Webpage:
137
+ description = description.format(max_webpages_to_read=max_webpages_to_read)
138
+ if tool == ConversationCommand.Online:
139
+ description = description.format(max_search_queries=max_online_searches)
111
140
  # Add tool if agent does not have any tools defined or the tool is supported by the agent.
112
141
  if len(agent_tools) == 0 or tool.value in agent_tools:
142
+ tool_options[tool.name] = tool.value
113
143
  tool_options_str += f'- "{tool.value}": "{description}"\n'
114
- tools.append(
115
- ToolDefinition(
116
- name=tool.value,
117
- description=description,
118
- schema=tool_data.schema,
119
- )
120
- )
144
+
145
+ # Create planning reponse model with dynamically populated tool enum class
146
+ planning_response_model = PlanningResponse.create_model_with_enum(tool_options)
121
147
 
122
148
  today = datetime.today()
123
149
  location_data = f"{location}" if location else "Unknown"
@@ -136,17 +162,24 @@ async def apick_next_tool(
136
162
  max_iterations=max_iterations,
137
163
  )
138
164
 
165
+ if query_images:
166
+ query = f"[placeholder for user attached images]\n{query}"
167
+
139
168
  # Construct chat history with user and iteration history with researcher agent for context
140
- iteration_chat_history = construct_iteration_history(previous_iterations, query, query_images, query_files)
169
+ iteration_chat_history = construct_iteration_history(previous_iterations, prompts.previous_iteration, query)
141
170
  chat_and_research_history = conversation_history + iteration_chat_history
142
171
 
172
+ # Plan function execution for the next tool
173
+ query = prompts.plan_function_execution_next_tool.format(query=query) if iteration_chat_history else query
174
+
143
175
  try:
144
176
  with timer("Chat actor: Infer information sources to refer", logger):
145
177
  response = await send_message_to_model_wrapper(
146
- query="",
178
+ query=query,
147
179
  system_message=function_planning_prompt,
148
180
  chat_history=chat_and_research_history,
149
- tools=tools,
181
+ response_type="json_object",
182
+ response_schema=planning_response_model,
150
183
  deepthought=True,
151
184
  user=user,
152
185
  query_images=query_images,
@@ -157,38 +190,48 @@ async def apick_next_tool(
157
190
  except Exception as e:
158
191
  logger.error(f"Failed to infer information sources to refer: {e}", exc_info=True)
159
192
  yield ResearchIteration(
193
+ tool=None,
160
194
  query=None,
161
195
  warning="Failed to infer information sources to refer. Skipping iteration. Try again.",
162
196
  )
163
197
  return
164
198
 
165
199
  try:
166
- # Try parse the response as function call response to infer next tool to use.
167
- # TODO: Handle multiple tool calls.
168
- response_text = response.text
169
- parsed_response = [ToolCall(**item) for item in load_complex_json(response_text)][0]
170
- except Exception as e:
171
- # Otherwise assume the model has decided to end the research run and respond to the user.
172
- parsed_response = ToolCall(name=ConversationCommand.Text, args={"response": response_text}, id=None)
173
-
174
- # If we have a valid response, extract the tool and query.
175
- warning = None
176
- logger.info(f"Response for determining relevant tools: {parsed_response.name}({parsed_response.args})")
177
-
178
- # Detect selection of previously used query, tool combination.
179
- previous_tool_query_combinations = {
180
- (i.query.name, dict_to_tuple(i.query.args))
181
- for i in previous_iterations
182
- if i.warning is None and isinstance(i.query, ToolCall)
183
- }
184
- if (parsed_response.name, dict_to_tuple(parsed_response.args)) in previous_tool_query_combinations:
185
- warning = f"Repeated tool, query combination detected. Skipping iteration. Try something different."
186
- # Only send client status updates if we'll execute this iteration and model has thoughts to share.
187
- elif send_status_func and not is_none_or_empty(response.thought):
188
- async for event in send_status_func(response.thought):
189
- yield {ChatEvent.STATUS: event}
200
+ response = load_complex_json(response)
201
+ if not isinstance(response, dict):
202
+ raise ValueError(f"Expected dict response, got {type(response).__name__}: {response}")
203
+ selected_tool = response.get("tool", None)
204
+ generated_query = response.get("query", None)
205
+ scratchpad = response.get("scratchpad", None)
206
+ warning = None
207
+ logger.info(f"Response for determining relevant tools: {response}")
208
+
209
+ # Detect selection of previously used query, tool combination.
210
+ previous_tool_query_combinations = {(i.tool, i.query) for i in previous_iterations if i.warning is None}
211
+ if (selected_tool, generated_query) in previous_tool_query_combinations:
212
+ warning = f"Repeated tool, query combination detected. Skipping iteration. Try something different."
213
+ # Only send client status updates if we'll execute this iteration
214
+ elif send_status_func:
215
+ determined_tool_message = "**Determined Tool**: "
216
+ determined_tool_message += (
217
+ f"{selected_tool}({generated_query})." if selected_tool != ConversationCommand.Text else "respond."
218
+ )
219
+ determined_tool_message += f"\nReason: {scratchpad}" if scratchpad else ""
220
+ async for event in send_status_func(f"{scratchpad}"):
221
+ yield {ChatEvent.STATUS: event}
190
222
 
191
- yield ResearchIteration(query=parsed_response, warning=warning, raw_response=response.raw_content)
223
+ yield ResearchIteration(
224
+ tool=selected_tool,
225
+ query=generated_query,
226
+ warning=warning,
227
+ )
228
+ except Exception as e:
229
+ logger.error(f"Invalid response for determining relevant tools: {response}. {e}", exc_info=True)
230
+ yield ResearchIteration(
231
+ tool=None,
232
+ query=None,
233
+ warning=f"Invalid response for determining relevant tools: {response}. Skipping iteration. Fix error: {e}",
234
+ )
192
235
 
193
236
 
194
237
  async def research(
@@ -214,10 +257,10 @@ async def research(
214
257
  MAX_ITERATIONS = int(os.getenv("KHOJ_RESEARCH_ITERATIONS", 5))
215
258
 
216
259
  # Incorporate previous partial research into current research chat history
217
- research_conversation_history = [chat for chat in deepcopy(conversation_history) if chat.message]
260
+ research_conversation_history = deepcopy(conversation_history)
218
261
  if current_iteration := len(previous_iterations) > 0:
219
262
  logger.info(f"Continuing research with the previous {len(previous_iterations)} iteration results.")
220
- previous_iterations_history = construct_iteration_history(previous_iterations)
263
+ previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
221
264
  research_conversation_history += previous_iterations_history
222
265
 
223
266
  while current_iteration < MAX_ITERATIONS:
@@ -230,7 +273,7 @@ async def research(
230
273
  code_results: Dict = dict()
231
274
  document_results: List[Dict[str, str]] = []
232
275
  operator_results: OperatorRun = None
233
- this_iteration = ResearchIteration(query=query)
276
+ this_iteration = ResearchIteration(tool=None, query=query)
234
277
 
235
278
  async for result in apick_next_tool(
236
279
  query,
@@ -260,30 +303,26 @@ async def research(
260
303
  logger.warning(f"Research mode: {this_iteration.warning}.")
261
304
 
262
305
  # Terminate research if selected text tool or query, tool not set for next iteration
263
- elif (
264
- not this_iteration.query
265
- or isinstance(this_iteration.query, str)
266
- or this_iteration.query.name == ConversationCommand.Text
267
- ):
306
+ elif not this_iteration.query or not this_iteration.tool or this_iteration.tool == ConversationCommand.Text:
268
307
  current_iteration = MAX_ITERATIONS
269
308
 
270
- elif this_iteration.query.name == ConversationCommand.SemanticSearchFiles:
309
+ elif this_iteration.tool == ConversationCommand.Notes:
271
310
  this_iteration.context = []
272
311
  document_results = []
273
312
  previous_inferred_queries = {
274
313
  c["query"] for iteration in previous_iterations if iteration.context for c in iteration.context
275
314
  }
276
315
  async for result in search_documents(
277
- **this_iteration.query.args,
278
- n=max_document_searches,
279
- d=None,
280
- user=user,
281
- chat_history=construct_tool_chat_history(previous_iterations, ConversationCommand.SemanticSearchFiles),
282
- conversation_id=conversation_id,
283
- conversation_commands=[ConversationCommand.Default],
284
- location_data=location,
285
- send_status_func=send_status_func,
286
- query_images=query_images,
316
+ this_iteration.query,
317
+ max_document_searches,
318
+ None,
319
+ user,
320
+ construct_tool_chat_history(previous_iterations, ConversationCommand.Notes),
321
+ conversation_id,
322
+ [ConversationCommand.Default],
323
+ location,
324
+ send_status_func,
325
+ query_images,
287
326
  previous_inferred_queries=previous_inferred_queries,
288
327
  agent=agent,
289
328
  tracer=tracer,
@@ -311,7 +350,7 @@ async def research(
311
350
  else:
312
351
  this_iteration.warning = "No matching document references found"
313
352
 
314
- elif this_iteration.query.name == ConversationCommand.SearchWeb:
353
+ elif this_iteration.tool == ConversationCommand.Online:
315
354
  previous_subqueries = {
316
355
  subquery
317
356
  for iteration in previous_iterations
@@ -320,12 +359,12 @@ async def research(
320
359
  }
321
360
  try:
322
361
  async for result in search_online(
323
- **this_iteration.query.args,
324
- conversation_history=construct_tool_chat_history(previous_iterations, ConversationCommand.Online),
325
- location=location,
326
- user=user,
327
- send_status_func=send_status_func,
328
- custom_filters=[],
362
+ this_iteration.query,
363
+ construct_tool_chat_history(previous_iterations, ConversationCommand.Online),
364
+ location,
365
+ user,
366
+ send_status_func,
367
+ [],
329
368
  max_online_searches=max_online_searches,
330
369
  max_webpages_to_read=0,
331
370
  query_images=query_images,
@@ -344,15 +383,19 @@ async def research(
344
383
  this_iteration.warning = f"Error searching online: {e}"
345
384
  logger.error(this_iteration.warning, exc_info=True)
346
385
 
347
- elif this_iteration.query.name == ConversationCommand.ReadWebpage:
386
+ elif this_iteration.tool == ConversationCommand.Webpage:
348
387
  try:
349
- async for result in read_webpages_content(
350
- **this_iteration.query.args,
351
- user=user,
352
- send_status_func=send_status_func,
353
- # max_webpages_to_read=max_webpages_to_read,
388
+ async for result in read_webpages(
389
+ this_iteration.query,
390
+ construct_tool_chat_history(previous_iterations, ConversationCommand.Webpage),
391
+ location,
392
+ user,
393
+ send_status_func,
394
+ max_webpages_to_read=max_webpages_to_read,
395
+ query_images=query_images,
354
396
  agent=agent,
355
397
  tracer=tracer,
398
+ query_files=query_files,
356
399
  ):
357
400
  if isinstance(result, dict) and ChatEvent.STATUS in result:
358
401
  yield result[ChatEvent.STATUS]
@@ -373,15 +416,15 @@ async def research(
373
416
  this_iteration.warning = f"Error reading webpages: {e}"
374
417
  logger.error(this_iteration.warning, exc_info=True)
375
418
 
376
- elif this_iteration.query.name == ConversationCommand.RunCode:
419
+ elif this_iteration.tool == ConversationCommand.Code:
377
420
  try:
378
421
  async for result in run_code(
379
- **this_iteration.query.args,
380
- conversation_history=construct_tool_chat_history(previous_iterations, ConversationCommand.Code),
381
- context="",
382
- location_data=location,
383
- user=user,
384
- send_status_func=send_status_func,
422
+ this_iteration.query,
423
+ construct_tool_chat_history(previous_iterations, ConversationCommand.Code),
424
+ "",
425
+ location,
426
+ user,
427
+ send_status_func,
385
428
  query_images=query_images,
386
429
  agent=agent,
387
430
  query_files=query_files,
@@ -398,14 +441,14 @@ async def research(
398
441
  this_iteration.warning = f"Error running code: {e}"
399
442
  logger.warning(this_iteration.warning, exc_info=True)
400
443
 
401
- elif this_iteration.query.name == ConversationCommand.OperateComputer:
444
+ elif this_iteration.tool == ConversationCommand.Operator:
402
445
  try:
403
446
  async for result in operate_environment(
404
- **this_iteration.query.args,
405
- user=user,
406
- conversation_log=construct_tool_chat_history(previous_iterations, ConversationCommand.Operator),
407
- location_data=location,
408
- previous_trajectory=previous_iterations[-1].operatorContext if previous_iterations else None,
447
+ this_iteration.query,
448
+ user,
449
+ construct_tool_chat_history(previous_iterations, ConversationCommand.Operator),
450
+ location,
451
+ previous_iterations[-1].operatorContext if previous_iterations else None,
409
452
  send_status_func=send_status_func,
410
453
  query_images=query_images,
411
454
  agent=agent,
@@ -431,63 +474,6 @@ async def research(
431
474
  this_iteration.warning = f"Error operating browser: {e}"
432
475
  logger.error(this_iteration.warning, exc_info=True)
433
476
 
434
- elif this_iteration.query.name == ConversationCommand.ViewFile:
435
- try:
436
- async for result in view_file_content(
437
- **this_iteration.query.args,
438
- user=user,
439
- ):
440
- if isinstance(result, dict) and ChatEvent.STATUS in result:
441
- yield result[ChatEvent.STATUS]
442
- else:
443
- if this_iteration.context is None:
444
- this_iteration.context = []
445
- document_results: List[Dict[str, str]] = result # type: ignore
446
- this_iteration.context += document_results
447
- async for result in send_status_func(f"**Viewed file**: {this_iteration.query.args['path']}"):
448
- yield result
449
- except Exception as e:
450
- this_iteration.warning = f"Error viewing file: {e}"
451
- logger.error(this_iteration.warning, exc_info=True)
452
-
453
- elif this_iteration.query.name == ConversationCommand.ListFiles:
454
- try:
455
- async for result in list_files(
456
- **this_iteration.query.args,
457
- user=user,
458
- ):
459
- if isinstance(result, dict) and ChatEvent.STATUS in result:
460
- yield result[ChatEvent.STATUS]
461
- else:
462
- if this_iteration.context is None:
463
- this_iteration.context = []
464
- document_results: List[Dict[str, str]] = [result] # type: ignore
465
- this_iteration.context += document_results
466
- async for result in send_status_func(result["query"]):
467
- yield result
468
- except Exception as e:
469
- this_iteration.warning = f"Error listing files: {e}"
470
- logger.error(this_iteration.warning, exc_info=True)
471
-
472
- elif this_iteration.query.name == ConversationCommand.RegexSearchFiles:
473
- try:
474
- async for result in grep_files(
475
- **this_iteration.query.args,
476
- user=user,
477
- ):
478
- if isinstance(result, dict) and ChatEvent.STATUS in result:
479
- yield result[ChatEvent.STATUS]
480
- else:
481
- if this_iteration.context is None:
482
- this_iteration.context = []
483
- document_results: List[Dict[str, str]] = [result] # type: ignore
484
- this_iteration.context += document_results
485
- async for result in send_status_func(result["query"]):
486
- yield result
487
- except Exception as e:
488
- this_iteration.warning = f"Error searching with regex: {e}"
489
- logger.error(this_iteration.warning, exc_info=True)
490
-
491
477
  else:
492
478
  # No valid tools. This is our exit condition.
493
479
  current_iteration = MAX_ITERATIONS
@@ -495,7 +481,7 @@ async def research(
495
481
  current_iteration += 1
496
482
 
497
483
  if document_results or online_results or code_results or operator_results or this_iteration.warning:
498
- results_data = f"\n<iteration_{current_iteration}_results>"
484
+ results_data = f"\n<iteration>{current_iteration}\n<tool>{this_iteration.tool}</tool>\n<query>{this_iteration.query}</query>\n<results>"
499
485
  if document_results:
500
486
  results_data += f"\n<document_references>\n{yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</document_references>"
501
487
  if online_results:
@@ -508,7 +494,7 @@ async def research(
508
494
  )
509
495
  if this_iteration.warning:
510
496
  results_data += f"\n<warning>\n{this_iteration.warning}\n</warning>"
511
- results_data += f"\n</results>\n</iteration_{current_iteration}_results>"
497
+ results_data += "\n</results>\n</iteration>"
512
498
 
513
499
  # intermediate_result = await extract_relevant_info(this_iteration.query, results_data, agent)
514
500
  this_iteration.summarizedResult = results_data
@@ -157,7 +157,6 @@ def collate_results(hits, dedupe=True):
157
157
  "additional": {
158
158
  "source": hit.file_source,
159
159
  "file": hit.file_path,
160
- "uri": hit.url,
161
160
  "compiled": hit.compiled,
162
161
  "heading": hit.heading,
163
162
  },
@@ -181,7 +180,6 @@ def deduplicated_search_responses(hits: List[SearchResponse]):
181
180
  "additional": {
182
181
  "source": hit.additional["source"],
183
182
  "file": hit.additional["file"],
184
- "uri": hit.additional["uri"],
185
183
  "query": hit.additional["query"],
186
184
  "compiled": hit.additional["compiled"],
187
185
  "heading": hit.additional["heading"],