khoj 1.42.2.dev1__py3-none-any.whl → 1.42.2.dev16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +2 -0
- khoj/database/adapters/__init__.py +6 -6
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/{2327-f03b2a77f67b8f8c.js → 2327-aa22697ed9c8d54a.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/7127-79a3af5138960272.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{5138-81457f7f59956b56.js → 7211-7fedd2ee3655239c.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-4e2a134ec26aa606.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/page-ef89ac958e78aa81.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-ad4d1792ab1a4108.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-db0fbea54ccea62f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-9a167dc9b5fcd464.js → page-da90c78180a86040.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-1c900156837baf90.js → webpack-0f15e6b51732b337.js} +1 -1
- khoj/interface/compiled/_next/static/css/{c34713c98384ee87.css → 2945c4a857922f3b.css} +1 -1
- khoj/interface/compiled/_next/static/css/{9c223d337a984468.css → 7017ee76c2f2cd87.css} +1 -1
- khoj/interface/compiled/_next/static/css/9a460202d29476e5.css +1 -0
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +3 -3
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +4 -4
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +17 -132
- khoj/processor/conversation/anthropic/utils.py +1 -1
- khoj/processor/conversation/google/gemini_chat.py +18 -139
- khoj/processor/conversation/offline/chat_model.py +21 -151
- khoj/processor/conversation/openai/gpt.py +12 -126
- khoj/processor/conversation/prompts.py +2 -63
- khoj/routers/api.py +5 -533
- khoj/routers/api_automation.py +243 -0
- khoj/routers/api_chat.py +35 -116
- khoj/routers/helpers.py +329 -80
- khoj/routers/research.py +3 -33
- khoj/utils/helpers.py +0 -6
- {khoj-1.42.2.dev1.dist-info → khoj-1.42.2.dev16.dist-info}/METADATA +1 -1
- {khoj-1.42.2.dev1.dist-info → khoj-1.42.2.dev16.dist-info}/RECORD +54 -53
- khoj/interface/compiled/_next/static/chunks/7127-d3199617463d45f0.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/page-465741d9149dfd48.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-1726184cf1c1b86e.js +0 -1
- khoj/interface/compiled/_next/static/css/fca983d49c3dd1a3.css +0 -1
- /khoj/interface/compiled/_next/static/{Dzg_ViqMwQEjqMgetZPRc → OTsOjbrtuaYMukpuJS4sy}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{Dzg_ViqMwQEjqMgetZPRc → OTsOjbrtuaYMukpuJS4sy}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1915-ab4353eaca76f690.js → 1915-1943ee8a628b893c.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2117-1c18aa2098982bf9.js → 2117-5a41630a2bd2eae8.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{4363-4efaf12abe696251.js → 4363-e6ac2203564d1a3b.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{4447-5d44807c40355b1a.js → 4447-e038b251d626c340.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{8667-adbe6017a66cef10.js → 8667-8136f74e9a086fca.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9259-d8bcd9da9e80c81e.js → 9259-640fdd77408475df.js} +0 -0
- {khoj-1.42.2.dev1.dist-info → khoj-1.42.2.dev16.dist-info}/WHEEL +0 -0
- {khoj-1.42.2.dev1.dist-info → khoj-1.42.2.dev16.dist-info}/entry_points.txt +0 -0
- {khoj-1.42.2.dev1.dist-info → khoj-1.42.2.dev16.dist-info}/licenses/LICENSE +0 -0
khoj/routers/helpers.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
import base64
|
2
|
+
import concurrent.futures
|
2
3
|
import hashlib
|
3
4
|
import json
|
4
5
|
import logging
|
5
6
|
import math
|
6
7
|
import os
|
7
8
|
import re
|
9
|
+
import time
|
8
10
|
from datetime import datetime, timedelta, timezone
|
9
|
-
from functools import partial
|
10
11
|
from random import random
|
11
12
|
from typing import (
|
12
13
|
Annotated,
|
@@ -46,6 +47,7 @@ from khoj.database.adapters import (
|
|
46
47
|
aget_user_by_email,
|
47
48
|
ais_user_subscribed,
|
48
49
|
create_khoj_token,
|
50
|
+
get_default_search_model,
|
49
51
|
get_khoj_tokens,
|
50
52
|
get_user_name,
|
51
53
|
get_user_notion_config,
|
@@ -101,12 +103,16 @@ from khoj.processor.conversation.utils import (
|
|
101
103
|
clean_json,
|
102
104
|
clean_mermaidjs,
|
103
105
|
construct_chat_history,
|
106
|
+
construct_question_history,
|
107
|
+
defilter_query,
|
104
108
|
generate_chatml_messages_with_context,
|
105
|
-
save_to_conversation_log,
|
106
109
|
)
|
107
110
|
from khoj.processor.speech.text_to_speech import is_eleven_labs_enabled
|
108
111
|
from khoj.routers.email import is_resend_enabled, send_task_email
|
109
112
|
from khoj.routers.twilio import is_twilio_enabled
|
113
|
+
from khoj.search_filter.date_filter import DateFilter
|
114
|
+
from khoj.search_filter.file_filter import FileFilter
|
115
|
+
from khoj.search_filter.word_filter import WordFilter
|
110
116
|
from khoj.search_type import text_search
|
111
117
|
from khoj.utils import state
|
112
118
|
from khoj.utils.config import OfflineChatProcessorModel
|
@@ -123,7 +129,13 @@ from khoj.utils.helpers import (
|
|
123
129
|
timer,
|
124
130
|
tool_descriptions_for_llm,
|
125
131
|
)
|
126
|
-
from khoj.utils.rawconfig import
|
132
|
+
from khoj.utils.rawconfig import (
|
133
|
+
ChatRequestBody,
|
134
|
+
FileAttachment,
|
135
|
+
FileData,
|
136
|
+
LocationData,
|
137
|
+
SearchResponse,
|
138
|
+
)
|
127
139
|
|
128
140
|
logger = logging.getLogger(__name__)
|
129
141
|
|
@@ -237,8 +249,6 @@ def get_next_url(request: Request) -> str:
|
|
237
249
|
def get_conversation_command(query: str) -> ConversationCommand:
|
238
250
|
if query.startswith("/notes"):
|
239
251
|
return ConversationCommand.Notes
|
240
|
-
elif query.startswith("/help"):
|
241
|
-
return ConversationCommand.Help
|
242
252
|
elif query.startswith("/general"):
|
243
253
|
return ConversationCommand.General
|
244
254
|
elif query.startswith("/online"):
|
@@ -249,8 +259,6 @@ def get_conversation_command(query: str) -> ConversationCommand:
|
|
249
259
|
return ConversationCommand.Image
|
250
260
|
elif query.startswith("/automated_task"):
|
251
261
|
return ConversationCommand.AutomatedTask
|
252
|
-
elif query.startswith("/summarize"):
|
253
|
-
return ConversationCommand.Summarize
|
254
262
|
elif query.startswith("/diagram"):
|
255
263
|
return ConversationCommand.Diagram
|
256
264
|
elif query.startswith("/code"):
|
@@ -380,9 +388,6 @@ async def aget_data_sources_and_output_format(
|
|
380
388
|
agent_outputs = agent.output_modes if agent else []
|
381
389
|
|
382
390
|
for output, description in mode_descriptions_for_llm.items():
|
383
|
-
# Do not allow tasks to schedule another task
|
384
|
-
if is_task and output == ConversationCommand.Automation:
|
385
|
-
continue
|
386
391
|
output_options[output.value] = description
|
387
392
|
if len(agent_outputs) == 0 or output.value in agent_outputs:
|
388
393
|
output_options_str += f'- "{output.value}": "{description}"\n'
|
@@ -1151,6 +1156,276 @@ async def generate_better_image_prompt(
|
|
1151
1156
|
return response
|
1152
1157
|
|
1153
1158
|
|
1159
|
+
async def search_documents(
|
1160
|
+
user: KhojUser,
|
1161
|
+
chat_history: list[ChatMessageModel],
|
1162
|
+
q: str,
|
1163
|
+
n: int,
|
1164
|
+
d: float,
|
1165
|
+
conversation_id: str,
|
1166
|
+
conversation_commands: List[ConversationCommand] = [ConversationCommand.Default],
|
1167
|
+
location_data: LocationData = None,
|
1168
|
+
send_status_func: Optional[Callable] = None,
|
1169
|
+
query_images: Optional[List[str]] = None,
|
1170
|
+
previous_inferred_queries: Set = set(),
|
1171
|
+
agent: Agent = None,
|
1172
|
+
query_files: str = None,
|
1173
|
+
tracer: dict = {},
|
1174
|
+
):
|
1175
|
+
# Initialize Variables
|
1176
|
+
compiled_references: List[dict[str, str]] = []
|
1177
|
+
inferred_queries: List[str] = []
|
1178
|
+
|
1179
|
+
agent_has_entries = False
|
1180
|
+
|
1181
|
+
if agent:
|
1182
|
+
agent_has_entries = await sync_to_async(EntryAdapters.agent_has_entries)(agent=agent)
|
1183
|
+
|
1184
|
+
if (
|
1185
|
+
not ConversationCommand.Notes in conversation_commands
|
1186
|
+
and not ConversationCommand.Default in conversation_commands
|
1187
|
+
and not agent_has_entries
|
1188
|
+
):
|
1189
|
+
yield compiled_references, inferred_queries, q
|
1190
|
+
return
|
1191
|
+
|
1192
|
+
# If Notes or Default is not in the conversation command, then the search should be restricted to the agent's knowledge base
|
1193
|
+
should_limit_to_agent_knowledge = (
|
1194
|
+
ConversationCommand.Notes not in conversation_commands
|
1195
|
+
and ConversationCommand.Default not in conversation_commands
|
1196
|
+
)
|
1197
|
+
|
1198
|
+
if not await sync_to_async(EntryAdapters.user_has_entries)(user=user):
|
1199
|
+
if not agent_has_entries:
|
1200
|
+
logger.debug("No documents in knowledge base. Use a Khoj client to sync and chat with your docs.")
|
1201
|
+
yield compiled_references, inferred_queries, q
|
1202
|
+
return
|
1203
|
+
|
1204
|
+
# Extract filter terms from user message
|
1205
|
+
defiltered_query = defilter_query(q)
|
1206
|
+
filters_in_query = q.replace(defiltered_query, "").strip()
|
1207
|
+
conversation = await sync_to_async(ConversationAdapters.get_conversation_by_id)(conversation_id)
|
1208
|
+
|
1209
|
+
if not conversation:
|
1210
|
+
logger.error(f"Conversation with id {conversation_id} not found when extracting references.")
|
1211
|
+
yield compiled_references, inferred_queries, defiltered_query
|
1212
|
+
return
|
1213
|
+
|
1214
|
+
filters_in_query += " ".join([f'file:"{filter}"' for filter in conversation.file_filters])
|
1215
|
+
if is_none_or_empty(filters_in_query):
|
1216
|
+
logger.debug(f"Filters in query: {filters_in_query}")
|
1217
|
+
|
1218
|
+
personality_context = prompts.personality_context.format(personality=agent.personality) if agent else ""
|
1219
|
+
|
1220
|
+
# Infer search queries from user message
|
1221
|
+
with timer("Extracting search queries took", logger):
|
1222
|
+
inferred_queries = await extract_questions(
|
1223
|
+
query=defiltered_query,
|
1224
|
+
user=user,
|
1225
|
+
personality_context=personality_context,
|
1226
|
+
chat_history=chat_history,
|
1227
|
+
location_data=location_data,
|
1228
|
+
query_images=query_images,
|
1229
|
+
query_files=query_files,
|
1230
|
+
tracer=tracer,
|
1231
|
+
)
|
1232
|
+
|
1233
|
+
# Collate search results as context for the LLM
|
1234
|
+
inferred_queries = list(set(inferred_queries) - previous_inferred_queries)
|
1235
|
+
with timer("Searching knowledge base took", logger):
|
1236
|
+
search_results = []
|
1237
|
+
logger.info(f"🔍 Searching knowledge base with queries: {inferred_queries}")
|
1238
|
+
if send_status_func:
|
1239
|
+
inferred_queries_str = "\n- " + "\n- ".join(inferred_queries)
|
1240
|
+
async for event in send_status_func(f"**Searching Documents for:** {inferred_queries_str}"):
|
1241
|
+
yield {ChatEvent.STATUS: event}
|
1242
|
+
for query in inferred_queries:
|
1243
|
+
search_results.extend(
|
1244
|
+
await execute_search(
|
1245
|
+
user if not should_limit_to_agent_knowledge else None,
|
1246
|
+
f"{query} {filters_in_query}",
|
1247
|
+
n=n,
|
1248
|
+
t=state.SearchType.All,
|
1249
|
+
r=True,
|
1250
|
+
max_distance=d,
|
1251
|
+
dedupe=False,
|
1252
|
+
agent=agent,
|
1253
|
+
)
|
1254
|
+
)
|
1255
|
+
search_results = text_search.deduplicated_search_responses(search_results)
|
1256
|
+
compiled_references = [
|
1257
|
+
{"query": q, "compiled": item.additional["compiled"], "file": item.additional["file"]}
|
1258
|
+
for q, item in zip(inferred_queries, search_results)
|
1259
|
+
]
|
1260
|
+
|
1261
|
+
yield compiled_references, inferred_queries, defiltered_query
|
1262
|
+
|
1263
|
+
|
1264
|
+
async def extract_questions(
|
1265
|
+
query: str,
|
1266
|
+
user: KhojUser,
|
1267
|
+
personality_context: str = "",
|
1268
|
+
chat_history: List[ChatMessageModel] = [],
|
1269
|
+
location_data: LocationData = None,
|
1270
|
+
query_images: Optional[List[str]] = None,
|
1271
|
+
query_files: str = None,
|
1272
|
+
tracer: dict = {},
|
1273
|
+
):
|
1274
|
+
"""
|
1275
|
+
Infer document search queries from user message and provided context
|
1276
|
+
"""
|
1277
|
+
# Shared context setup
|
1278
|
+
location = f"{location_data}" if location_data else "N/A"
|
1279
|
+
username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""
|
1280
|
+
|
1281
|
+
# Date variables for prompt formatting
|
1282
|
+
today = datetime.today()
|
1283
|
+
current_new_year = today.replace(month=1, day=1)
|
1284
|
+
last_new_year = current_new_year.replace(year=today.year - 1)
|
1285
|
+
yesterday = (today - timedelta(days=1)).strftime("%Y-%m-%d")
|
1286
|
+
|
1287
|
+
# Common prompt setup for API-based models (using Anthropic prompts for consistency)
|
1288
|
+
chat_history_str = construct_question_history(chat_history, query_prefix="User", agent_name="Assistant")
|
1289
|
+
|
1290
|
+
system_prompt = prompts.extract_questions_system_prompt.format(
|
1291
|
+
current_date=today.strftime("%Y-%m-%d"),
|
1292
|
+
day_of_week=today.strftime("%A"),
|
1293
|
+
current_month=today.strftime("%Y-%m"),
|
1294
|
+
last_new_year=last_new_year.strftime("%Y"),
|
1295
|
+
last_new_year_date=last_new_year.strftime("%Y-%m-%d"),
|
1296
|
+
current_new_year_date=current_new_year.strftime("%Y-%m-%d"),
|
1297
|
+
yesterday_date=yesterday,
|
1298
|
+
location=location,
|
1299
|
+
username=username,
|
1300
|
+
personality_context=personality_context,
|
1301
|
+
)
|
1302
|
+
|
1303
|
+
prompt = prompts.extract_questions_user_message.format(text=query, chat_history=chat_history_str)
|
1304
|
+
|
1305
|
+
class DocumentQueries(BaseModel):
|
1306
|
+
"""Choose searches to run on user documents."""
|
1307
|
+
|
1308
|
+
queries: List[str] = Field(..., min_items=1, description="List of search queries to run on user documents.")
|
1309
|
+
|
1310
|
+
raw_response = await send_message_to_model_wrapper(
|
1311
|
+
system_message=system_prompt,
|
1312
|
+
query=prompt,
|
1313
|
+
query_images=query_images,
|
1314
|
+
query_files=query_files,
|
1315
|
+
chat_history=chat_history,
|
1316
|
+
response_type="json_object",
|
1317
|
+
response_schema=DocumentQueries,
|
1318
|
+
user=user,
|
1319
|
+
tracer=tracer,
|
1320
|
+
)
|
1321
|
+
|
1322
|
+
# Extract questions from the response
|
1323
|
+
try:
|
1324
|
+
response = clean_json(raw_response)
|
1325
|
+
response = pyjson5.loads(response)
|
1326
|
+
queries = [q.strip() for q in response["queries"] if q.strip()]
|
1327
|
+
if not isinstance(queries, list) or not queries:
|
1328
|
+
logger.error(f"Invalid response for constructing subqueries: {response}")
|
1329
|
+
return [query]
|
1330
|
+
return queries
|
1331
|
+
except:
|
1332
|
+
logger.warning(f"LLM returned invalid JSON. Falling back to using user message as search query.")
|
1333
|
+
return [query]
|
1334
|
+
|
1335
|
+
|
1336
|
+
async def execute_search(
|
1337
|
+
user: KhojUser,
|
1338
|
+
q: str,
|
1339
|
+
n: Optional[int] = 5,
|
1340
|
+
t: Optional[state.SearchType] = None,
|
1341
|
+
r: Optional[bool] = False,
|
1342
|
+
max_distance: Optional[Union[float, None]] = None,
|
1343
|
+
dedupe: Optional[bool] = True,
|
1344
|
+
agent: Optional[Agent] = None,
|
1345
|
+
):
|
1346
|
+
# Run validation checks
|
1347
|
+
results: List[SearchResponse] = []
|
1348
|
+
|
1349
|
+
start_time = time.time()
|
1350
|
+
|
1351
|
+
# Ensure the agent, if present, is accessible by the user
|
1352
|
+
if user and agent and not await AgentAdapters.ais_agent_accessible(agent, user):
|
1353
|
+
logger.error(f"Agent {agent.slug} is not accessible by user {user}")
|
1354
|
+
return results
|
1355
|
+
|
1356
|
+
if q is None or q == "":
|
1357
|
+
logger.warning(f"No query param (q) passed in API call to initiate search")
|
1358
|
+
return results
|
1359
|
+
|
1360
|
+
# initialize variables
|
1361
|
+
user_query = q.strip()
|
1362
|
+
results_count = n or 5
|
1363
|
+
t = t or state.SearchType.All
|
1364
|
+
search_futures: List[concurrent.futures.Future] = []
|
1365
|
+
|
1366
|
+
# return cached results, if available
|
1367
|
+
if user:
|
1368
|
+
query_cache_key = f"{user_query}-{n}-{t}-{r}-{max_distance}-{dedupe}"
|
1369
|
+
if query_cache_key in state.query_cache[user.uuid]:
|
1370
|
+
logger.debug(f"Return response from query cache")
|
1371
|
+
return state.query_cache[user.uuid][query_cache_key]
|
1372
|
+
|
1373
|
+
# Encode query with filter terms removed
|
1374
|
+
defiltered_query = user_query
|
1375
|
+
for filter in [DateFilter(), WordFilter(), FileFilter()]:
|
1376
|
+
defiltered_query = filter.defilter(defiltered_query)
|
1377
|
+
|
1378
|
+
encoded_asymmetric_query = None
|
1379
|
+
if t != state.SearchType.Image:
|
1380
|
+
with timer("Encoding query took", logger=logger):
|
1381
|
+
search_model = await sync_to_async(get_default_search_model)()
|
1382
|
+
encoded_asymmetric_query = state.embeddings_model[search_model.name].embed_query(defiltered_query)
|
1383
|
+
|
1384
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
1385
|
+
if t in [
|
1386
|
+
state.SearchType.All,
|
1387
|
+
state.SearchType.Org,
|
1388
|
+
state.SearchType.Markdown,
|
1389
|
+
state.SearchType.Github,
|
1390
|
+
state.SearchType.Notion,
|
1391
|
+
state.SearchType.Plaintext,
|
1392
|
+
state.SearchType.Pdf,
|
1393
|
+
]:
|
1394
|
+
# query markdown notes
|
1395
|
+
search_futures += [
|
1396
|
+
executor.submit(
|
1397
|
+
text_search.query,
|
1398
|
+
user_query,
|
1399
|
+
user,
|
1400
|
+
t,
|
1401
|
+
question_embedding=encoded_asymmetric_query,
|
1402
|
+
max_distance=max_distance,
|
1403
|
+
agent=agent,
|
1404
|
+
)
|
1405
|
+
]
|
1406
|
+
|
1407
|
+
# Query across each requested content types in parallel
|
1408
|
+
with timer("Query took", logger):
|
1409
|
+
for search_future in concurrent.futures.as_completed(search_futures):
|
1410
|
+
hits = await search_future.result()
|
1411
|
+
# Collate results
|
1412
|
+
results += text_search.collate_results(hits, dedupe=dedupe)
|
1413
|
+
|
1414
|
+
# Sort results across all content types and take top results
|
1415
|
+
results = text_search.rerank_and_sort_results(
|
1416
|
+
results, query=defiltered_query, rank_results=r, search_model_name=search_model.name
|
1417
|
+
)[:results_count]
|
1418
|
+
|
1419
|
+
# Cache results
|
1420
|
+
if user:
|
1421
|
+
state.query_cache[user.uuid][query_cache_key] = results
|
1422
|
+
|
1423
|
+
end_time = time.time()
|
1424
|
+
logger.debug(f"🔍 Search took: {end_time - start_time:.3f} seconds")
|
1425
|
+
|
1426
|
+
return results
|
1427
|
+
|
1428
|
+
|
1154
1429
|
async def send_message_to_model_wrapper(
|
1155
1430
|
query: str,
|
1156
1431
|
system_message: str = "",
|
@@ -1350,54 +1625,24 @@ async def agenerate_chat_response(
|
|
1350
1625
|
code_results: Dict[str, Dict] = {},
|
1351
1626
|
operator_results: List[OperatorRun] = [],
|
1352
1627
|
research_results: List[ResearchIteration] = [],
|
1353
|
-
inferred_queries: List[str] = [],
|
1354
|
-
conversation_commands: List[ConversationCommand] = [ConversationCommand.Default],
|
1355
1628
|
user: KhojUser = None,
|
1356
|
-
client_application: ClientApplication = None,
|
1357
1629
|
location_data: LocationData = None,
|
1358
1630
|
user_name: Optional[str] = None,
|
1359
1631
|
query_images: Optional[List[str]] = None,
|
1360
|
-
train_of_thought: List[Any] = [],
|
1361
1632
|
query_files: str = None,
|
1362
|
-
raw_query_files: List[FileAttachment] = None,
|
1363
|
-
generated_images: List[str] = None,
|
1364
1633
|
raw_generated_files: List[FileAttachment] = [],
|
1365
|
-
generated_mermaidjs_diagram: str = None,
|
1366
1634
|
program_execution_context: List[str] = [],
|
1367
1635
|
generated_asset_results: Dict[str, Dict] = {},
|
1368
1636
|
is_subscribed: bool = False,
|
1369
1637
|
tracer: dict = {},
|
1370
|
-
) -> Tuple[AsyncGenerator[
|
1638
|
+
) -> Tuple[AsyncGenerator[ResponseWithThought, None], Dict[str, str]]:
|
1371
1639
|
# Initialize Variables
|
1372
|
-
chat_response_generator: AsyncGenerator[
|
1373
|
-
logger.debug(f"Conversation Types: {conversation_commands}")
|
1640
|
+
chat_response_generator: AsyncGenerator[ResponseWithThought, None] = None
|
1374
1641
|
|
1375
1642
|
metadata = {}
|
1376
1643
|
agent = await AgentAdapters.aget_conversation_agent_by_id(conversation.agent.id) if conversation.agent else None
|
1377
1644
|
|
1378
1645
|
try:
|
1379
|
-
partial_completion = partial(
|
1380
|
-
save_to_conversation_log,
|
1381
|
-
q,
|
1382
|
-
user=user,
|
1383
|
-
chat_history=chat_history,
|
1384
|
-
compiled_references=compiled_references,
|
1385
|
-
online_results=online_results,
|
1386
|
-
code_results=code_results,
|
1387
|
-
operator_results=operator_results,
|
1388
|
-
research_results=research_results,
|
1389
|
-
inferred_queries=inferred_queries,
|
1390
|
-
client_application=client_application,
|
1391
|
-
conversation_id=str(conversation.id),
|
1392
|
-
query_images=query_images,
|
1393
|
-
train_of_thought=train_of_thought,
|
1394
|
-
raw_query_files=raw_query_files,
|
1395
|
-
generated_images=generated_images,
|
1396
|
-
raw_generated_files=raw_generated_files,
|
1397
|
-
generated_mermaidjs_diagram=generated_mermaidjs_diagram,
|
1398
|
-
tracer=tracer,
|
1399
|
-
)
|
1400
|
-
|
1401
1646
|
query_to_run = q
|
1402
1647
|
deepthought = False
|
1403
1648
|
if research_results:
|
@@ -1421,22 +1666,23 @@ async def agenerate_chat_response(
|
|
1421
1666
|
if chat_model.model_type == "offline":
|
1422
1667
|
loaded_model = state.offline_chat_processor_config.loaded_model
|
1423
1668
|
chat_response_generator = converse_offline(
|
1669
|
+
# Query
|
1424
1670
|
user_query=query_to_run,
|
1671
|
+
# Context
|
1425
1672
|
references=compiled_references,
|
1426
1673
|
online_results=online_results,
|
1427
|
-
|
1674
|
+
generated_files=raw_generated_files,
|
1675
|
+
generated_asset_results=generated_asset_results,
|
1676
|
+
location_data=location_data,
|
1677
|
+
user_name=user_name,
|
1678
|
+
query_files=query_files,
|
1428
1679
|
chat_history=chat_history,
|
1429
|
-
|
1430
|
-
|
1680
|
+
# Model
|
1681
|
+
loaded_model=loaded_model,
|
1431
1682
|
model_name=chat_model.name,
|
1432
1683
|
max_prompt_size=chat_model.max_prompt_size,
|
1433
1684
|
tokenizer_name=chat_model.tokenizer,
|
1434
|
-
location_data=location_data,
|
1435
|
-
user_name=user_name,
|
1436
1685
|
agent=agent,
|
1437
|
-
query_files=query_files,
|
1438
|
-
generated_files=raw_generated_files,
|
1439
|
-
generated_asset_results=generated_asset_results,
|
1440
1686
|
tracer=tracer,
|
1441
1687
|
)
|
1442
1688
|
|
@@ -1445,28 +1691,29 @@ async def agenerate_chat_response(
|
|
1445
1691
|
api_key = openai_chat_config.api_key
|
1446
1692
|
chat_model_name = chat_model.name
|
1447
1693
|
chat_response_generator = converse_openai(
|
1694
|
+
# Query
|
1448
1695
|
query_to_run,
|
1449
|
-
|
1450
|
-
|
1696
|
+
# Context
|
1697
|
+
references=compiled_references,
|
1451
1698
|
online_results=online_results,
|
1452
1699
|
code_results=code_results,
|
1453
1700
|
operator_results=operator_results,
|
1701
|
+
query_images=query_images,
|
1702
|
+
query_files=query_files,
|
1703
|
+
generated_files=raw_generated_files,
|
1704
|
+
generated_asset_results=generated_asset_results,
|
1705
|
+
program_execution_context=program_execution_context,
|
1706
|
+
location_data=location_data,
|
1707
|
+
user_name=user_name,
|
1454
1708
|
chat_history=chat_history,
|
1709
|
+
# Model
|
1455
1710
|
model=chat_model_name,
|
1456
1711
|
api_key=api_key,
|
1457
1712
|
api_base_url=openai_chat_config.api_base_url,
|
1458
|
-
completion_func=partial_completion,
|
1459
|
-
conversation_commands=conversation_commands,
|
1460
1713
|
max_prompt_size=chat_model.max_prompt_size,
|
1461
1714
|
tokenizer_name=chat_model.tokenizer,
|
1462
|
-
location_data=location_data,
|
1463
|
-
user_name=user_name,
|
1464
1715
|
agent=agent,
|
1465
1716
|
vision_available=vision_available,
|
1466
|
-
query_files=query_files,
|
1467
|
-
generated_files=raw_generated_files,
|
1468
|
-
generated_asset_results=generated_asset_results,
|
1469
|
-
program_execution_context=program_execution_context,
|
1470
1717
|
deepthought=deepthought,
|
1471
1718
|
tracer=tracer,
|
1472
1719
|
)
|
@@ -1475,28 +1722,29 @@ async def agenerate_chat_response(
|
|
1475
1722
|
api_key = chat_model.ai_model_api.api_key
|
1476
1723
|
api_base_url = chat_model.ai_model_api.api_base_url
|
1477
1724
|
chat_response_generator = converse_anthropic(
|
1725
|
+
# Query
|
1478
1726
|
query_to_run,
|
1479
|
-
|
1480
|
-
|
1727
|
+
# Context
|
1728
|
+
references=compiled_references,
|
1481
1729
|
online_results=online_results,
|
1482
1730
|
code_results=code_results,
|
1483
1731
|
operator_results=operator_results,
|
1732
|
+
query_images=query_images,
|
1733
|
+
query_files=query_files,
|
1734
|
+
generated_files=raw_generated_files,
|
1735
|
+
generated_asset_results=generated_asset_results,
|
1736
|
+
program_execution_context=program_execution_context,
|
1737
|
+
location_data=location_data,
|
1738
|
+
user_name=user_name,
|
1484
1739
|
chat_history=chat_history,
|
1740
|
+
# Model
|
1485
1741
|
model=chat_model.name,
|
1486
1742
|
api_key=api_key,
|
1487
1743
|
api_base_url=api_base_url,
|
1488
|
-
completion_func=partial_completion,
|
1489
|
-
conversation_commands=conversation_commands,
|
1490
1744
|
max_prompt_size=chat_model.max_prompt_size,
|
1491
1745
|
tokenizer_name=chat_model.tokenizer,
|
1492
|
-
location_data=location_data,
|
1493
|
-
user_name=user_name,
|
1494
1746
|
agent=agent,
|
1495
1747
|
vision_available=vision_available,
|
1496
|
-
query_files=query_files,
|
1497
|
-
generated_files=raw_generated_files,
|
1498
|
-
generated_asset_results=generated_asset_results,
|
1499
|
-
program_execution_context=program_execution_context,
|
1500
1748
|
deepthought=deepthought,
|
1501
1749
|
tracer=tracer,
|
1502
1750
|
)
|
@@ -1504,28 +1752,29 @@ async def agenerate_chat_response(
|
|
1504
1752
|
api_key = chat_model.ai_model_api.api_key
|
1505
1753
|
api_base_url = chat_model.ai_model_api.api_base_url
|
1506
1754
|
chat_response_generator = converse_gemini(
|
1755
|
+
# Query
|
1507
1756
|
query_to_run,
|
1508
|
-
|
1757
|
+
# Context
|
1758
|
+
references=compiled_references,
|
1509
1759
|
online_results=online_results,
|
1510
1760
|
code_results=code_results,
|
1511
1761
|
operator_results=operator_results,
|
1762
|
+
query_images=query_images,
|
1763
|
+
query_files=query_files,
|
1764
|
+
generated_files=raw_generated_files,
|
1765
|
+
generated_asset_results=generated_asset_results,
|
1766
|
+
program_execution_context=program_execution_context,
|
1767
|
+
location_data=location_data,
|
1768
|
+
user_name=user_name,
|
1512
1769
|
chat_history=chat_history,
|
1770
|
+
# Model
|
1513
1771
|
model=chat_model.name,
|
1514
1772
|
api_key=api_key,
|
1515
1773
|
api_base_url=api_base_url,
|
1516
|
-
completion_func=partial_completion,
|
1517
|
-
conversation_commands=conversation_commands,
|
1518
1774
|
max_prompt_size=chat_model.max_prompt_size,
|
1519
1775
|
tokenizer_name=chat_model.tokenizer,
|
1520
|
-
location_data=location_data,
|
1521
|
-
user_name=user_name,
|
1522
1776
|
agent=agent,
|
1523
|
-
query_images=query_images,
|
1524
1777
|
vision_available=vision_available,
|
1525
|
-
query_files=query_files,
|
1526
|
-
generated_files=raw_generated_files,
|
1527
|
-
generated_asset_results=generated_asset_results,
|
1528
|
-
program_execution_context=program_execution_context,
|
1529
1778
|
deepthought=deepthought,
|
1530
1779
|
tracer=tracer,
|
1531
1780
|
)
|
khoj/routers/research.py
CHANGED
@@ -22,10 +22,10 @@ from khoj.processor.conversation.utils import (
|
|
22
22
|
from khoj.processor.operator import operate_environment
|
23
23
|
from khoj.processor.tools.online_search import read_webpages, search_online
|
24
24
|
from khoj.processor.tools.run_code import run_code
|
25
|
-
from khoj.routers.api import extract_references_and_questions
|
26
25
|
from khoj.routers.helpers import (
|
27
26
|
ChatEvent,
|
28
27
|
generate_summary_from_files,
|
28
|
+
search_documents,
|
29
29
|
send_message_to_model_wrapper,
|
30
30
|
)
|
31
31
|
from khoj.utils.helpers import (
|
@@ -273,7 +273,6 @@ async def research(
|
|
273
273
|
code_results: Dict = dict()
|
274
274
|
document_results: List[Dict[str, str]] = []
|
275
275
|
operator_results: OperatorRun = None
|
276
|
-
summarize_files: str = ""
|
277
276
|
this_iteration = ResearchIteration(tool=None, query=query)
|
278
277
|
|
279
278
|
async for result in apick_next_tool(
|
@@ -313,7 +312,7 @@ async def research(
|
|
313
312
|
previous_inferred_queries = {
|
314
313
|
c["query"] for iteration in previous_iterations if iteration.context for c in iteration.context
|
315
314
|
}
|
316
|
-
async for result in
|
315
|
+
async for result in search_documents(
|
317
316
|
user,
|
318
317
|
construct_tool_chat_history(previous_iterations, ConversationCommand.Notes),
|
319
318
|
this_iteration.query,
|
@@ -473,40 +472,13 @@ async def research(
|
|
473
472
|
this_iteration.warning = f"Error operating browser: {e}"
|
474
473
|
logger.error(this_iteration.warning, exc_info=True)
|
475
474
|
|
476
|
-
elif this_iteration.tool == ConversationCommand.Summarize:
|
477
|
-
try:
|
478
|
-
async for result in generate_summary_from_files(
|
479
|
-
this_iteration.query,
|
480
|
-
user,
|
481
|
-
file_filters,
|
482
|
-
construct_tool_chat_history(previous_iterations, ConversationCommand.Summarize),
|
483
|
-
query_images=query_images,
|
484
|
-
agent=agent,
|
485
|
-
send_status_func=send_status_func,
|
486
|
-
query_files=query_files,
|
487
|
-
):
|
488
|
-
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
489
|
-
yield result[ChatEvent.STATUS]
|
490
|
-
else:
|
491
|
-
summarize_files = result # type: ignore
|
492
|
-
except Exception as e:
|
493
|
-
this_iteration.warning = f"Error summarizing files: {e}"
|
494
|
-
logger.error(this_iteration.warning, exc_info=True)
|
495
|
-
|
496
475
|
else:
|
497
476
|
# No valid tools. This is our exit condition.
|
498
477
|
current_iteration = MAX_ITERATIONS
|
499
478
|
|
500
479
|
current_iteration += 1
|
501
480
|
|
502
|
-
if
|
503
|
-
document_results
|
504
|
-
or online_results
|
505
|
-
or code_results
|
506
|
-
or operator_results
|
507
|
-
or summarize_files
|
508
|
-
or this_iteration.warning
|
509
|
-
):
|
481
|
+
if document_results or online_results or code_results or operator_results or this_iteration.warning:
|
510
482
|
results_data = f"\n<iteration>{current_iteration}\n<tool>{this_iteration.tool}</tool>\n<query>{this_iteration.query}</query>\n<results>"
|
511
483
|
if document_results:
|
512
484
|
results_data += f"\n<document_references>\n{yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</document_references>"
|
@@ -518,8 +490,6 @@ async def research(
|
|
518
490
|
results_data += (
|
519
491
|
f"\n<browser_operator_results>\n{operator_results.response}\n</browser_operator_results>"
|
520
492
|
)
|
521
|
-
if summarize_files:
|
522
|
-
results_data += f"\n<summarized_files>\n{yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</summarized_files>"
|
523
493
|
if this_iteration.warning:
|
524
494
|
results_data += f"\n<warning>\n{this_iteration.warning}\n</warning>"
|
525
495
|
results_data += "\n</results>\n</iteration>"
|
khoj/utils/helpers.py
CHANGED
@@ -338,15 +338,12 @@ class ConversationCommand(str, Enum):
|
|
338
338
|
Default = "default"
|
339
339
|
General = "general"
|
340
340
|
Notes = "notes"
|
341
|
-
Help = "help"
|
342
341
|
Online = "online"
|
343
342
|
Webpage = "webpage"
|
344
343
|
Code = "code"
|
345
344
|
Image = "image"
|
346
345
|
Text = "text"
|
347
|
-
Automation = "automation"
|
348
346
|
AutomatedTask = "automated_task"
|
349
|
-
Summarize = "summarize"
|
350
347
|
Diagram = "diagram"
|
351
348
|
Research = "research"
|
352
349
|
Operator = "operator"
|
@@ -360,9 +357,6 @@ command_descriptions = {
|
|
360
357
|
ConversationCommand.Webpage: "Get information from webpage suggested by you.",
|
361
358
|
ConversationCommand.Code: "Run Python code to parse information, run complex calculations, create documents and charts.",
|
362
359
|
ConversationCommand.Image: "Generate illustrative, creative images by describing your imagination in words.",
|
363
|
-
ConversationCommand.Automation: "Automatically run your query at a specified time or interval.",
|
364
|
-
ConversationCommand.Help: "Get help with how to use or setup Khoj from the documentation",
|
365
|
-
ConversationCommand.Summarize: "Get help with a question pertaining to an entire document.",
|
366
360
|
ConversationCommand.Diagram: "Draw a flowchart, diagram, or any other visual representation best expressed with primitives like lines, rectangles, and text.",
|
367
361
|
ConversationCommand.Research: "Do deep research on a topic. This will take longer than usual, but give a more detailed, comprehensive answer.",
|
368
362
|
ConversationCommand.Operator: "Operate and perform tasks using a computer.",
|