khoj 1.42.2.dev1__py3-none-any.whl → 1.42.2.dev19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. khoj/configure.py +2 -0
  2. khoj/database/adapters/__init__.py +6 -6
  3. khoj/interface/compiled/404/index.html +2 -2
  4. khoj/interface/compiled/_next/static/chunks/{2117-1c18aa2098982bf9.js → 2117-056a00add390772b.js} +1 -1
  5. khoj/interface/compiled/_next/static/chunks/{2327-f03b2a77f67b8f8c.js → 2327-aa22697ed9c8d54a.js} +1 -1
  6. khoj/interface/compiled/_next/static/chunks/7127-79a3af5138960272.js +1 -0
  7. khoj/interface/compiled/_next/static/chunks/{5138-81457f7f59956b56.js → 7211-7fedd2ee3655239c.js} +1 -1
  8. khoj/interface/compiled/_next/static/chunks/app/agents/{page-2fac1d5ac7192e73.js → page-774c78ff0f55a228.js} +1 -1
  9. khoj/interface/compiled/_next/static/chunks/app/automations/page-fc4f5a3ca7201154.js +1 -0
  10. khoj/interface/compiled/_next/static/chunks/app/chat/page-14cf05b18e3c00ad.js +1 -0
  11. khoj/interface/compiled/_next/static/chunks/app/{page-45ae5e99e8a61821.js → page-f7a0286dfc31ad6b.js} +1 -1
  12. khoj/interface/compiled/_next/static/chunks/app/search/layout-f5881c7ae3ba0795.js +1 -0
  13. khoj/interface/compiled/_next/static/chunks/app/search/{page-afb5e7ed13d221c1.js → page-f1a7f278c89e09b6.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/app/settings/{page-8fb6cc97be8774a7.js → page-5d9134d4a97f8834.js} +1 -1
  15. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-abb6c5f4239ad7be.js +1 -0
  16. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-9a167dc9b5fcd464.js → page-bcc8f41edcfbcb6c.js} +1 -1
  17. khoj/interface/compiled/_next/static/chunks/{main-876327ac335776ab.js → main-63d6432f34cdf74b.js} +1 -1
  18. khoj/interface/compiled/_next/static/chunks/{webpack-1c900156837baf90.js → webpack-f39eedae8f597e56.js} +1 -1
  19. khoj/interface/compiled/_next/static/css/{c34713c98384ee87.css → 4398afc6d5a37666.css} +1 -1
  20. khoj/interface/compiled/_next/static/css/{9c223d337a984468.css → 7017ee76c2f2cd87.css} +1 -1
  21. khoj/interface/compiled/_next/static/css/e1bf03aa79521f86.css +1 -0
  22. khoj/interface/compiled/agents/index.html +2 -2
  23. khoj/interface/compiled/agents/index.txt +2 -2
  24. khoj/interface/compiled/automations/index.html +2 -2
  25. khoj/interface/compiled/automations/index.txt +3 -3
  26. khoj/interface/compiled/chat/index.html +2 -2
  27. khoj/interface/compiled/chat/index.txt +2 -2
  28. khoj/interface/compiled/index.html +2 -2
  29. khoj/interface/compiled/index.txt +2 -2
  30. khoj/interface/compiled/search/index.html +2 -2
  31. khoj/interface/compiled/search/index.txt +2 -2
  32. khoj/interface/compiled/settings/index.html +2 -2
  33. khoj/interface/compiled/settings/index.txt +4 -4
  34. khoj/interface/compiled/share/chat/index.html +2 -2
  35. khoj/interface/compiled/share/chat/index.txt +2 -2
  36. khoj/processor/conversation/anthropic/anthropic_chat.py +17 -132
  37. khoj/processor/conversation/anthropic/utils.py +1 -1
  38. khoj/processor/conversation/google/gemini_chat.py +18 -139
  39. khoj/processor/conversation/offline/chat_model.py +21 -151
  40. khoj/processor/conversation/openai/gpt.py +12 -126
  41. khoj/processor/conversation/prompts.py +2 -63
  42. khoj/routers/api.py +5 -533
  43. khoj/routers/api_automation.py +243 -0
  44. khoj/routers/api_chat.py +35 -116
  45. khoj/routers/helpers.py +329 -80
  46. khoj/routers/research.py +3 -33
  47. khoj/utils/helpers.py +0 -6
  48. {khoj-1.42.2.dev1.dist-info → khoj-1.42.2.dev19.dist-info}/METADATA +2 -2
  49. {khoj-1.42.2.dev1.dist-info → khoj-1.42.2.dev19.dist-info}/RECORD +59 -58
  50. khoj/interface/compiled/_next/static/chunks/7127-d3199617463d45f0.js +0 -1
  51. khoj/interface/compiled/_next/static/chunks/app/automations/page-465741d9149dfd48.js +0 -1
  52. khoj/interface/compiled/_next/static/chunks/app/chat/page-1726184cf1c1b86e.js +0 -1
  53. khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +0 -1
  54. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +0 -1
  55. khoj/interface/compiled/_next/static/css/fca983d49c3dd1a3.css +0 -1
  56. /khoj/interface/compiled/_next/static/chunks/{1915-ab4353eaca76f690.js → 1915-1943ee8a628b893c.js} +0 -0
  57. /khoj/interface/compiled/_next/static/chunks/{4363-4efaf12abe696251.js → 4363-e6ac2203564d1a3b.js} +0 -0
  58. /khoj/interface/compiled/_next/static/chunks/{4447-5d44807c40355b1a.js → 4447-e038b251d626c340.js} +0 -0
  59. /khoj/interface/compiled/_next/static/chunks/{8667-adbe6017a66cef10.js → 8667-8136f74e9a086fca.js} +0 -0
  60. /khoj/interface/compiled/_next/static/chunks/{9259-d8bcd9da9e80c81e.js → 9259-640fdd77408475df.js} +0 -0
  61. /khoj/interface/compiled/_next/static/{Dzg_ViqMwQEjqMgetZPRc → oXUhXGFp7bJNGngoyQu8D}/_buildManifest.js +0 -0
  62. /khoj/interface/compiled/_next/static/{Dzg_ViqMwQEjqMgetZPRc → oXUhXGFp7bJNGngoyQu8D}/_ssgManifest.js +0 -0
  63. {khoj-1.42.2.dev1.dist-info → khoj-1.42.2.dev19.dist-info}/WHEEL +0 -0
  64. {khoj-1.42.2.dev1.dist-info → khoj-1.42.2.dev19.dist-info}/entry_points.txt +0 -0
  65. {khoj-1.42.2.dev1.dist-info → khoj-1.42.2.dev19.dist-info}/licenses/LICENSE +0 -0
khoj/routers/helpers.py CHANGED
@@ -1,12 +1,13 @@
1
1
  import base64
2
+ import concurrent.futures
2
3
  import hashlib
3
4
  import json
4
5
  import logging
5
6
  import math
6
7
  import os
7
8
  import re
9
+ import time
8
10
  from datetime import datetime, timedelta, timezone
9
- from functools import partial
10
11
  from random import random
11
12
  from typing import (
12
13
  Annotated,
@@ -46,6 +47,7 @@ from khoj.database.adapters import (
46
47
  aget_user_by_email,
47
48
  ais_user_subscribed,
48
49
  create_khoj_token,
50
+ get_default_search_model,
49
51
  get_khoj_tokens,
50
52
  get_user_name,
51
53
  get_user_notion_config,
@@ -101,12 +103,16 @@ from khoj.processor.conversation.utils import (
101
103
  clean_json,
102
104
  clean_mermaidjs,
103
105
  construct_chat_history,
106
+ construct_question_history,
107
+ defilter_query,
104
108
  generate_chatml_messages_with_context,
105
- save_to_conversation_log,
106
109
  )
107
110
  from khoj.processor.speech.text_to_speech import is_eleven_labs_enabled
108
111
  from khoj.routers.email import is_resend_enabled, send_task_email
109
112
  from khoj.routers.twilio import is_twilio_enabled
113
+ from khoj.search_filter.date_filter import DateFilter
114
+ from khoj.search_filter.file_filter import FileFilter
115
+ from khoj.search_filter.word_filter import WordFilter
110
116
  from khoj.search_type import text_search
111
117
  from khoj.utils import state
112
118
  from khoj.utils.config import OfflineChatProcessorModel
@@ -123,7 +129,13 @@ from khoj.utils.helpers import (
123
129
  timer,
124
130
  tool_descriptions_for_llm,
125
131
  )
126
- from khoj.utils.rawconfig import ChatRequestBody, FileAttachment, FileData, LocationData
132
+ from khoj.utils.rawconfig import (
133
+ ChatRequestBody,
134
+ FileAttachment,
135
+ FileData,
136
+ LocationData,
137
+ SearchResponse,
138
+ )
127
139
 
128
140
  logger = logging.getLogger(__name__)
129
141
 
@@ -237,8 +249,6 @@ def get_next_url(request: Request) -> str:
237
249
  def get_conversation_command(query: str) -> ConversationCommand:
238
250
  if query.startswith("/notes"):
239
251
  return ConversationCommand.Notes
240
- elif query.startswith("/help"):
241
- return ConversationCommand.Help
242
252
  elif query.startswith("/general"):
243
253
  return ConversationCommand.General
244
254
  elif query.startswith("/online"):
@@ -249,8 +259,6 @@ def get_conversation_command(query: str) -> ConversationCommand:
249
259
  return ConversationCommand.Image
250
260
  elif query.startswith("/automated_task"):
251
261
  return ConversationCommand.AutomatedTask
252
- elif query.startswith("/summarize"):
253
- return ConversationCommand.Summarize
254
262
  elif query.startswith("/diagram"):
255
263
  return ConversationCommand.Diagram
256
264
  elif query.startswith("/code"):
@@ -380,9 +388,6 @@ async def aget_data_sources_and_output_format(
380
388
  agent_outputs = agent.output_modes if agent else []
381
389
 
382
390
  for output, description in mode_descriptions_for_llm.items():
383
- # Do not allow tasks to schedule another task
384
- if is_task and output == ConversationCommand.Automation:
385
- continue
386
391
  output_options[output.value] = description
387
392
  if len(agent_outputs) == 0 or output.value in agent_outputs:
388
393
  output_options_str += f'- "{output.value}": "{description}"\n'
@@ -1151,6 +1156,276 @@ async def generate_better_image_prompt(
1151
1156
  return response
1152
1157
 
1153
1158
 
1159
+ async def search_documents(
1160
+ user: KhojUser,
1161
+ chat_history: list[ChatMessageModel],
1162
+ q: str,
1163
+ n: int,
1164
+ d: float,
1165
+ conversation_id: str,
1166
+ conversation_commands: List[ConversationCommand] = [ConversationCommand.Default],
1167
+ location_data: LocationData = None,
1168
+ send_status_func: Optional[Callable] = None,
1169
+ query_images: Optional[List[str]] = None,
1170
+ previous_inferred_queries: Set = set(),
1171
+ agent: Agent = None,
1172
+ query_files: str = None,
1173
+ tracer: dict = {},
1174
+ ):
1175
+ # Initialize Variables
1176
+ compiled_references: List[dict[str, str]] = []
1177
+ inferred_queries: List[str] = []
1178
+
1179
+ agent_has_entries = False
1180
+
1181
+ if agent:
1182
+ agent_has_entries = await sync_to_async(EntryAdapters.agent_has_entries)(agent=agent)
1183
+
1184
+ if (
1185
+ not ConversationCommand.Notes in conversation_commands
1186
+ and not ConversationCommand.Default in conversation_commands
1187
+ and not agent_has_entries
1188
+ ):
1189
+ yield compiled_references, inferred_queries, q
1190
+ return
1191
+
1192
+ # If Notes or Default is not in the conversation command, then the search should be restricted to the agent's knowledge base
1193
+ should_limit_to_agent_knowledge = (
1194
+ ConversationCommand.Notes not in conversation_commands
1195
+ and ConversationCommand.Default not in conversation_commands
1196
+ )
1197
+
1198
+ if not await sync_to_async(EntryAdapters.user_has_entries)(user=user):
1199
+ if not agent_has_entries:
1200
+ logger.debug("No documents in knowledge base. Use a Khoj client to sync and chat with your docs.")
1201
+ yield compiled_references, inferred_queries, q
1202
+ return
1203
+
1204
+ # Extract filter terms from user message
1205
+ defiltered_query = defilter_query(q)
1206
+ filters_in_query = q.replace(defiltered_query, "").strip()
1207
+ conversation = await sync_to_async(ConversationAdapters.get_conversation_by_id)(conversation_id)
1208
+
1209
+ if not conversation:
1210
+ logger.error(f"Conversation with id {conversation_id} not found when extracting references.")
1211
+ yield compiled_references, inferred_queries, defiltered_query
1212
+ return
1213
+
1214
+ filters_in_query += " ".join([f'file:"{filter}"' for filter in conversation.file_filters])
1215
+ if is_none_or_empty(filters_in_query):
1216
+ logger.debug(f"Filters in query: {filters_in_query}")
1217
+
1218
+ personality_context = prompts.personality_context.format(personality=agent.personality) if agent else ""
1219
+
1220
+ # Infer search queries from user message
1221
+ with timer("Extracting search queries took", logger):
1222
+ inferred_queries = await extract_questions(
1223
+ query=defiltered_query,
1224
+ user=user,
1225
+ personality_context=personality_context,
1226
+ chat_history=chat_history,
1227
+ location_data=location_data,
1228
+ query_images=query_images,
1229
+ query_files=query_files,
1230
+ tracer=tracer,
1231
+ )
1232
+
1233
+ # Collate search results as context for the LLM
1234
+ inferred_queries = list(set(inferred_queries) - previous_inferred_queries)
1235
+ with timer("Searching knowledge base took", logger):
1236
+ search_results = []
1237
+ logger.info(f"🔍 Searching knowledge base with queries: {inferred_queries}")
1238
+ if send_status_func:
1239
+ inferred_queries_str = "\n- " + "\n- ".join(inferred_queries)
1240
+ async for event in send_status_func(f"**Searching Documents for:** {inferred_queries_str}"):
1241
+ yield {ChatEvent.STATUS: event}
1242
+ for query in inferred_queries:
1243
+ search_results.extend(
1244
+ await execute_search(
1245
+ user if not should_limit_to_agent_knowledge else None,
1246
+ f"{query} {filters_in_query}",
1247
+ n=n,
1248
+ t=state.SearchType.All,
1249
+ r=True,
1250
+ max_distance=d,
1251
+ dedupe=False,
1252
+ agent=agent,
1253
+ )
1254
+ )
1255
+ search_results = text_search.deduplicated_search_responses(search_results)
1256
+ compiled_references = [
1257
+ {"query": q, "compiled": item.additional["compiled"], "file": item.additional["file"]}
1258
+ for q, item in zip(inferred_queries, search_results)
1259
+ ]
1260
+
1261
+ yield compiled_references, inferred_queries, defiltered_query
1262
+
1263
+
1264
+ async def extract_questions(
1265
+ query: str,
1266
+ user: KhojUser,
1267
+ personality_context: str = "",
1268
+ chat_history: List[ChatMessageModel] = [],
1269
+ location_data: LocationData = None,
1270
+ query_images: Optional[List[str]] = None,
1271
+ query_files: str = None,
1272
+ tracer: dict = {},
1273
+ ):
1274
+ """
1275
+ Infer document search queries from user message and provided context
1276
+ """
1277
+ # Shared context setup
1278
+ location = f"{location_data}" if location_data else "N/A"
1279
+ username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""
1280
+
1281
+ # Date variables for prompt formatting
1282
+ today = datetime.today()
1283
+ current_new_year = today.replace(month=1, day=1)
1284
+ last_new_year = current_new_year.replace(year=today.year - 1)
1285
+ yesterday = (today - timedelta(days=1)).strftime("%Y-%m-%d")
1286
+
1287
+ # Common prompt setup for API-based models (using Anthropic prompts for consistency)
1288
+ chat_history_str = construct_question_history(chat_history, query_prefix="User", agent_name="Assistant")
1289
+
1290
+ system_prompt = prompts.extract_questions_system_prompt.format(
1291
+ current_date=today.strftime("%Y-%m-%d"),
1292
+ day_of_week=today.strftime("%A"),
1293
+ current_month=today.strftime("%Y-%m"),
1294
+ last_new_year=last_new_year.strftime("%Y"),
1295
+ last_new_year_date=last_new_year.strftime("%Y-%m-%d"),
1296
+ current_new_year_date=current_new_year.strftime("%Y-%m-%d"),
1297
+ yesterday_date=yesterday,
1298
+ location=location,
1299
+ username=username,
1300
+ personality_context=personality_context,
1301
+ )
1302
+
1303
+ prompt = prompts.extract_questions_user_message.format(text=query, chat_history=chat_history_str)
1304
+
1305
+ class DocumentQueries(BaseModel):
1306
+ """Choose searches to run on user documents."""
1307
+
1308
+ queries: List[str] = Field(..., min_items=1, description="List of search queries to run on user documents.")
1309
+
1310
+ raw_response = await send_message_to_model_wrapper(
1311
+ system_message=system_prompt,
1312
+ query=prompt,
1313
+ query_images=query_images,
1314
+ query_files=query_files,
1315
+ chat_history=chat_history,
1316
+ response_type="json_object",
1317
+ response_schema=DocumentQueries,
1318
+ user=user,
1319
+ tracer=tracer,
1320
+ )
1321
+
1322
+ # Extract questions from the response
1323
+ try:
1324
+ response = clean_json(raw_response)
1325
+ response = pyjson5.loads(response)
1326
+ queries = [q.strip() for q in response["queries"] if q.strip()]
1327
+ if not isinstance(queries, list) or not queries:
1328
+ logger.error(f"Invalid response for constructing subqueries: {response}")
1329
+ return [query]
1330
+ return queries
1331
+ except:
1332
+ logger.warning(f"LLM returned invalid JSON. Falling back to using user message as search query.")
1333
+ return [query]
1334
+
1335
+
1336
+ async def execute_search(
1337
+ user: KhojUser,
1338
+ q: str,
1339
+ n: Optional[int] = 5,
1340
+ t: Optional[state.SearchType] = None,
1341
+ r: Optional[bool] = False,
1342
+ max_distance: Optional[Union[float, None]] = None,
1343
+ dedupe: Optional[bool] = True,
1344
+ agent: Optional[Agent] = None,
1345
+ ):
1346
+ # Run validation checks
1347
+ results: List[SearchResponse] = []
1348
+
1349
+ start_time = time.time()
1350
+
1351
+ # Ensure the agent, if present, is accessible by the user
1352
+ if user and agent and not await AgentAdapters.ais_agent_accessible(agent, user):
1353
+ logger.error(f"Agent {agent.slug} is not accessible by user {user}")
1354
+ return results
1355
+
1356
+ if q is None or q == "":
1357
+ logger.warning(f"No query param (q) passed in API call to initiate search")
1358
+ return results
1359
+
1360
+ # initialize variables
1361
+ user_query = q.strip()
1362
+ results_count = n or 5
1363
+ t = t or state.SearchType.All
1364
+ search_futures: List[concurrent.futures.Future] = []
1365
+
1366
+ # return cached results, if available
1367
+ if user:
1368
+ query_cache_key = f"{user_query}-{n}-{t}-{r}-{max_distance}-{dedupe}"
1369
+ if query_cache_key in state.query_cache[user.uuid]:
1370
+ logger.debug(f"Return response from query cache")
1371
+ return state.query_cache[user.uuid][query_cache_key]
1372
+
1373
+ # Encode query with filter terms removed
1374
+ defiltered_query = user_query
1375
+ for filter in [DateFilter(), WordFilter(), FileFilter()]:
1376
+ defiltered_query = filter.defilter(defiltered_query)
1377
+
1378
+ encoded_asymmetric_query = None
1379
+ if t != state.SearchType.Image:
1380
+ with timer("Encoding query took", logger=logger):
1381
+ search_model = await sync_to_async(get_default_search_model)()
1382
+ encoded_asymmetric_query = state.embeddings_model[search_model.name].embed_query(defiltered_query)
1383
+
1384
+ with concurrent.futures.ThreadPoolExecutor() as executor:
1385
+ if t in [
1386
+ state.SearchType.All,
1387
+ state.SearchType.Org,
1388
+ state.SearchType.Markdown,
1389
+ state.SearchType.Github,
1390
+ state.SearchType.Notion,
1391
+ state.SearchType.Plaintext,
1392
+ state.SearchType.Pdf,
1393
+ ]:
1394
+ # query markdown notes
1395
+ search_futures += [
1396
+ executor.submit(
1397
+ text_search.query,
1398
+ user_query,
1399
+ user,
1400
+ t,
1401
+ question_embedding=encoded_asymmetric_query,
1402
+ max_distance=max_distance,
1403
+ agent=agent,
1404
+ )
1405
+ ]
1406
+
1407
+ # Query across each requested content types in parallel
1408
+ with timer("Query took", logger):
1409
+ for search_future in concurrent.futures.as_completed(search_futures):
1410
+ hits = await search_future.result()
1411
+ # Collate results
1412
+ results += text_search.collate_results(hits, dedupe=dedupe)
1413
+
1414
+ # Sort results across all content types and take top results
1415
+ results = text_search.rerank_and_sort_results(
1416
+ results, query=defiltered_query, rank_results=r, search_model_name=search_model.name
1417
+ )[:results_count]
1418
+
1419
+ # Cache results
1420
+ if user:
1421
+ state.query_cache[user.uuid][query_cache_key] = results
1422
+
1423
+ end_time = time.time()
1424
+ logger.debug(f"🔍 Search took: {end_time - start_time:.3f} seconds")
1425
+
1426
+ return results
1427
+
1428
+
1154
1429
  async def send_message_to_model_wrapper(
1155
1430
  query: str,
1156
1431
  system_message: str = "",
@@ -1350,54 +1625,24 @@ async def agenerate_chat_response(
1350
1625
  code_results: Dict[str, Dict] = {},
1351
1626
  operator_results: List[OperatorRun] = [],
1352
1627
  research_results: List[ResearchIteration] = [],
1353
- inferred_queries: List[str] = [],
1354
- conversation_commands: List[ConversationCommand] = [ConversationCommand.Default],
1355
1628
  user: KhojUser = None,
1356
- client_application: ClientApplication = None,
1357
1629
  location_data: LocationData = None,
1358
1630
  user_name: Optional[str] = None,
1359
1631
  query_images: Optional[List[str]] = None,
1360
- train_of_thought: List[Any] = [],
1361
1632
  query_files: str = None,
1362
- raw_query_files: List[FileAttachment] = None,
1363
- generated_images: List[str] = None,
1364
1633
  raw_generated_files: List[FileAttachment] = [],
1365
- generated_mermaidjs_diagram: str = None,
1366
1634
  program_execution_context: List[str] = [],
1367
1635
  generated_asset_results: Dict[str, Dict] = {},
1368
1636
  is_subscribed: bool = False,
1369
1637
  tracer: dict = {},
1370
- ) -> Tuple[AsyncGenerator[str | ResponseWithThought, None], Dict[str, str]]:
1638
+ ) -> Tuple[AsyncGenerator[ResponseWithThought, None], Dict[str, str]]:
1371
1639
  # Initialize Variables
1372
- chat_response_generator: AsyncGenerator[str | ResponseWithThought, None] = None
1373
- logger.debug(f"Conversation Types: {conversation_commands}")
1640
+ chat_response_generator: AsyncGenerator[ResponseWithThought, None] = None
1374
1641
 
1375
1642
  metadata = {}
1376
1643
  agent = await AgentAdapters.aget_conversation_agent_by_id(conversation.agent.id) if conversation.agent else None
1377
1644
 
1378
1645
  try:
1379
- partial_completion = partial(
1380
- save_to_conversation_log,
1381
- q,
1382
- user=user,
1383
- chat_history=chat_history,
1384
- compiled_references=compiled_references,
1385
- online_results=online_results,
1386
- code_results=code_results,
1387
- operator_results=operator_results,
1388
- research_results=research_results,
1389
- inferred_queries=inferred_queries,
1390
- client_application=client_application,
1391
- conversation_id=str(conversation.id),
1392
- query_images=query_images,
1393
- train_of_thought=train_of_thought,
1394
- raw_query_files=raw_query_files,
1395
- generated_images=generated_images,
1396
- raw_generated_files=raw_generated_files,
1397
- generated_mermaidjs_diagram=generated_mermaidjs_diagram,
1398
- tracer=tracer,
1399
- )
1400
-
1401
1646
  query_to_run = q
1402
1647
  deepthought = False
1403
1648
  if research_results:
@@ -1421,22 +1666,23 @@ async def agenerate_chat_response(
1421
1666
  if chat_model.model_type == "offline":
1422
1667
  loaded_model = state.offline_chat_processor_config.loaded_model
1423
1668
  chat_response_generator = converse_offline(
1669
+ # Query
1424
1670
  user_query=query_to_run,
1671
+ # Context
1425
1672
  references=compiled_references,
1426
1673
  online_results=online_results,
1427
- loaded_model=loaded_model,
1674
+ generated_files=raw_generated_files,
1675
+ generated_asset_results=generated_asset_results,
1676
+ location_data=location_data,
1677
+ user_name=user_name,
1678
+ query_files=query_files,
1428
1679
  chat_history=chat_history,
1429
- completion_func=partial_completion,
1430
- conversation_commands=conversation_commands,
1680
+ # Model
1681
+ loaded_model=loaded_model,
1431
1682
  model_name=chat_model.name,
1432
1683
  max_prompt_size=chat_model.max_prompt_size,
1433
1684
  tokenizer_name=chat_model.tokenizer,
1434
- location_data=location_data,
1435
- user_name=user_name,
1436
1685
  agent=agent,
1437
- query_files=query_files,
1438
- generated_files=raw_generated_files,
1439
- generated_asset_results=generated_asset_results,
1440
1686
  tracer=tracer,
1441
1687
  )
1442
1688
 
@@ -1445,28 +1691,29 @@ async def agenerate_chat_response(
1445
1691
  api_key = openai_chat_config.api_key
1446
1692
  chat_model_name = chat_model.name
1447
1693
  chat_response_generator = converse_openai(
1694
+ # Query
1448
1695
  query_to_run,
1449
- compiled_references,
1450
- query_images=query_images,
1696
+ # Context
1697
+ references=compiled_references,
1451
1698
  online_results=online_results,
1452
1699
  code_results=code_results,
1453
1700
  operator_results=operator_results,
1701
+ query_images=query_images,
1702
+ query_files=query_files,
1703
+ generated_files=raw_generated_files,
1704
+ generated_asset_results=generated_asset_results,
1705
+ program_execution_context=program_execution_context,
1706
+ location_data=location_data,
1707
+ user_name=user_name,
1454
1708
  chat_history=chat_history,
1709
+ # Model
1455
1710
  model=chat_model_name,
1456
1711
  api_key=api_key,
1457
1712
  api_base_url=openai_chat_config.api_base_url,
1458
- completion_func=partial_completion,
1459
- conversation_commands=conversation_commands,
1460
1713
  max_prompt_size=chat_model.max_prompt_size,
1461
1714
  tokenizer_name=chat_model.tokenizer,
1462
- location_data=location_data,
1463
- user_name=user_name,
1464
1715
  agent=agent,
1465
1716
  vision_available=vision_available,
1466
- query_files=query_files,
1467
- generated_files=raw_generated_files,
1468
- generated_asset_results=generated_asset_results,
1469
- program_execution_context=program_execution_context,
1470
1717
  deepthought=deepthought,
1471
1718
  tracer=tracer,
1472
1719
  )
@@ -1475,28 +1722,29 @@ async def agenerate_chat_response(
1475
1722
  api_key = chat_model.ai_model_api.api_key
1476
1723
  api_base_url = chat_model.ai_model_api.api_base_url
1477
1724
  chat_response_generator = converse_anthropic(
1725
+ # Query
1478
1726
  query_to_run,
1479
- compiled_references,
1480
- query_images=query_images,
1727
+ # Context
1728
+ references=compiled_references,
1481
1729
  online_results=online_results,
1482
1730
  code_results=code_results,
1483
1731
  operator_results=operator_results,
1732
+ query_images=query_images,
1733
+ query_files=query_files,
1734
+ generated_files=raw_generated_files,
1735
+ generated_asset_results=generated_asset_results,
1736
+ program_execution_context=program_execution_context,
1737
+ location_data=location_data,
1738
+ user_name=user_name,
1484
1739
  chat_history=chat_history,
1740
+ # Model
1485
1741
  model=chat_model.name,
1486
1742
  api_key=api_key,
1487
1743
  api_base_url=api_base_url,
1488
- completion_func=partial_completion,
1489
- conversation_commands=conversation_commands,
1490
1744
  max_prompt_size=chat_model.max_prompt_size,
1491
1745
  tokenizer_name=chat_model.tokenizer,
1492
- location_data=location_data,
1493
- user_name=user_name,
1494
1746
  agent=agent,
1495
1747
  vision_available=vision_available,
1496
- query_files=query_files,
1497
- generated_files=raw_generated_files,
1498
- generated_asset_results=generated_asset_results,
1499
- program_execution_context=program_execution_context,
1500
1748
  deepthought=deepthought,
1501
1749
  tracer=tracer,
1502
1750
  )
@@ -1504,28 +1752,29 @@ async def agenerate_chat_response(
1504
1752
  api_key = chat_model.ai_model_api.api_key
1505
1753
  api_base_url = chat_model.ai_model_api.api_base_url
1506
1754
  chat_response_generator = converse_gemini(
1755
+ # Query
1507
1756
  query_to_run,
1508
- compiled_references,
1757
+ # Context
1758
+ references=compiled_references,
1509
1759
  online_results=online_results,
1510
1760
  code_results=code_results,
1511
1761
  operator_results=operator_results,
1762
+ query_images=query_images,
1763
+ query_files=query_files,
1764
+ generated_files=raw_generated_files,
1765
+ generated_asset_results=generated_asset_results,
1766
+ program_execution_context=program_execution_context,
1767
+ location_data=location_data,
1768
+ user_name=user_name,
1512
1769
  chat_history=chat_history,
1770
+ # Model
1513
1771
  model=chat_model.name,
1514
1772
  api_key=api_key,
1515
1773
  api_base_url=api_base_url,
1516
- completion_func=partial_completion,
1517
- conversation_commands=conversation_commands,
1518
1774
  max_prompt_size=chat_model.max_prompt_size,
1519
1775
  tokenizer_name=chat_model.tokenizer,
1520
- location_data=location_data,
1521
- user_name=user_name,
1522
1776
  agent=agent,
1523
- query_images=query_images,
1524
1777
  vision_available=vision_available,
1525
- query_files=query_files,
1526
- generated_files=raw_generated_files,
1527
- generated_asset_results=generated_asset_results,
1528
- program_execution_context=program_execution_context,
1529
1778
  deepthought=deepthought,
1530
1779
  tracer=tracer,
1531
1780
  )
khoj/routers/research.py CHANGED
@@ -22,10 +22,10 @@ from khoj.processor.conversation.utils import (
22
22
  from khoj.processor.operator import operate_environment
23
23
  from khoj.processor.tools.online_search import read_webpages, search_online
24
24
  from khoj.processor.tools.run_code import run_code
25
- from khoj.routers.api import extract_references_and_questions
26
25
  from khoj.routers.helpers import (
27
26
  ChatEvent,
28
27
  generate_summary_from_files,
28
+ search_documents,
29
29
  send_message_to_model_wrapper,
30
30
  )
31
31
  from khoj.utils.helpers import (
@@ -273,7 +273,6 @@ async def research(
273
273
  code_results: Dict = dict()
274
274
  document_results: List[Dict[str, str]] = []
275
275
  operator_results: OperatorRun = None
276
- summarize_files: str = ""
277
276
  this_iteration = ResearchIteration(tool=None, query=query)
278
277
 
279
278
  async for result in apick_next_tool(
@@ -313,7 +312,7 @@ async def research(
313
312
  previous_inferred_queries = {
314
313
  c["query"] for iteration in previous_iterations if iteration.context for c in iteration.context
315
314
  }
316
- async for result in extract_references_and_questions(
315
+ async for result in search_documents(
317
316
  user,
318
317
  construct_tool_chat_history(previous_iterations, ConversationCommand.Notes),
319
318
  this_iteration.query,
@@ -473,40 +472,13 @@ async def research(
473
472
  this_iteration.warning = f"Error operating browser: {e}"
474
473
  logger.error(this_iteration.warning, exc_info=True)
475
474
 
476
- elif this_iteration.tool == ConversationCommand.Summarize:
477
- try:
478
- async for result in generate_summary_from_files(
479
- this_iteration.query,
480
- user,
481
- file_filters,
482
- construct_tool_chat_history(previous_iterations, ConversationCommand.Summarize),
483
- query_images=query_images,
484
- agent=agent,
485
- send_status_func=send_status_func,
486
- query_files=query_files,
487
- ):
488
- if isinstance(result, dict) and ChatEvent.STATUS in result:
489
- yield result[ChatEvent.STATUS]
490
- else:
491
- summarize_files = result # type: ignore
492
- except Exception as e:
493
- this_iteration.warning = f"Error summarizing files: {e}"
494
- logger.error(this_iteration.warning, exc_info=True)
495
-
496
475
  else:
497
476
  # No valid tools. This is our exit condition.
498
477
  current_iteration = MAX_ITERATIONS
499
478
 
500
479
  current_iteration += 1
501
480
 
502
- if (
503
- document_results
504
- or online_results
505
- or code_results
506
- or operator_results
507
- or summarize_files
508
- or this_iteration.warning
509
- ):
481
+ if document_results or online_results or code_results or operator_results or this_iteration.warning:
510
482
  results_data = f"\n<iteration>{current_iteration}\n<tool>{this_iteration.tool}</tool>\n<query>{this_iteration.query}</query>\n<results>"
511
483
  if document_results:
512
484
  results_data += f"\n<document_references>\n{yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</document_references>"
@@ -518,8 +490,6 @@ async def research(
518
490
  results_data += (
519
491
  f"\n<browser_operator_results>\n{operator_results.response}\n</browser_operator_results>"
520
492
  )
521
- if summarize_files:
522
- results_data += f"\n<summarized_files>\n{yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</summarized_files>"
523
493
  if this_iteration.warning:
524
494
  results_data += f"\n<warning>\n{this_iteration.warning}\n</warning>"
525
495
  results_data += "\n</results>\n</iteration>"
khoj/utils/helpers.py CHANGED
@@ -338,15 +338,12 @@ class ConversationCommand(str, Enum):
338
338
  Default = "default"
339
339
  General = "general"
340
340
  Notes = "notes"
341
- Help = "help"
342
341
  Online = "online"
343
342
  Webpage = "webpage"
344
343
  Code = "code"
345
344
  Image = "image"
346
345
  Text = "text"
347
- Automation = "automation"
348
346
  AutomatedTask = "automated_task"
349
- Summarize = "summarize"
350
347
  Diagram = "diagram"
351
348
  Research = "research"
352
349
  Operator = "operator"
@@ -360,9 +357,6 @@ command_descriptions = {
360
357
  ConversationCommand.Webpage: "Get information from webpage suggested by you.",
361
358
  ConversationCommand.Code: "Run Python code to parse information, run complex calculations, create documents and charts.",
362
359
  ConversationCommand.Image: "Generate illustrative, creative images by describing your imagination in words.",
363
- ConversationCommand.Automation: "Automatically run your query at a specified time or interval.",
364
- ConversationCommand.Help: "Get help with how to use or setup Khoj from the documentation",
365
- ConversationCommand.Summarize: "Get help with a question pertaining to an entire document.",
366
360
  ConversationCommand.Diagram: "Draw a flowchart, diagram, or any other visual representation best expressed with primitives like lines, rectangles, and text.",
367
361
  ConversationCommand.Research: "Do deep research on a topic. This will take longer than usual, but give a more detailed, comprehensive answer.",
368
362
  ConversationCommand.Operator: "Operate and perform tasks using a computer.",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: khoj
3
- Version: 1.42.2.dev1
3
+ Version: 1.42.2.dev19
4
4
  Summary: Your Second Brain
5
5
  Project-URL: Homepage, https://khoj.dev
6
6
  Project-URL: Documentation, https://docs.khoj.dev
@@ -33,7 +33,7 @@ Requires-Dist: defusedxml==0.7.1
33
33
  Requires-Dist: django-apscheduler==0.7.0
34
34
  Requires-Dist: django-phonenumber-field==7.3.0
35
35
  Requires-Dist: django-unfold==0.42.0
36
- Requires-Dist: django==5.1.9
36
+ Requires-Dist: django==5.1.10
37
37
  Requires-Dist: docx2txt==0.8
38
38
  Requires-Dist: e2b-code-interpreter~=1.0.0
39
39
  Requires-Dist: einops==0.8.0