khoj 1.24.2.dev16__py3-none-any.whl → 1.25.1.dev34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +13 -4
- khoj/database/adapters/__init__.py +163 -49
- khoj/database/admin.py +18 -1
- khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
- khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
- khoj/database/models/__init__.py +78 -2
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1603-fa3ee48860b9dc5c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/7762-79f2205740622b5c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/{layout-e71c8e913cccf792.js → layout-75636ab3a413fa8e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/page-fa282831808ee536.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-1688dead2f21270d.js → page-5480731341f34450.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/{layout-8102549127db3067.js → layout-96fcf62857bf8f30.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/{page-91abcb71846922b7.js → page-702057ccbcf27881.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/{page-7ab093711c27041c.js → page-e7b34316ec6f44de.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{layout-f3e40d346da53112.js → layout-d0f0a9067427fb20.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-fada198096eab47f.js → page-10a5aad6e04f3cf8.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/{page-a7e036689b6507ff.js → page-d56541c746fded7d.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{layout-6f9314b0d7a26046.js → layout-a8f33dfe92f997fb.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-fa11cafaec7ab39f.js → page-e044a999468a7c5d.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{layout-39f03f9e32399f0f.js → layout-2df56074e42adaa0.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-c5d2b9076e5390b2.js → page-fbbd66a4d4633438.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-f52083d548d804fa.js → webpack-c0cd5a6afb1f0798.js} +1 -1
- khoj/interface/compiled/_next/static/css/2de69f0be774c768.css +1 -0
- khoj/interface/compiled/_next/static/css/3e1f1fdd70775091.css +1 -0
- khoj/interface/compiled/_next/static/css/467a524c75e7d7c0.css +1 -0
- khoj/interface/compiled/_next/static/css/b9a6bf04305d98d7.css +25 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/factchecker/index.html +1 -1
- khoj/interface/compiled/factchecker/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +3 -3
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/interface/web/assets/icons/agents.svg +1 -0
- khoj/interface/web/assets/icons/automation.svg +1 -0
- khoj/interface/web/assets/icons/chat.svg +24 -0
- khoj/interface/web/login.html +11 -22
- khoj/processor/conversation/google/gemini_chat.py +4 -19
- khoj/processor/conversation/google/utils.py +33 -15
- khoj/processor/conversation/prompts.py +14 -3
- khoj/processor/conversation/utils.py +3 -7
- khoj/processor/embeddings.py +6 -3
- khoj/processor/image/generate.py +1 -2
- khoj/processor/tools/online_search.py +135 -42
- khoj/routers/api.py +1 -1
- khoj/routers/api_agents.py +6 -3
- khoj/routers/api_chat.py +63 -520
- khoj/routers/api_model.py +1 -1
- khoj/routers/auth.py +9 -1
- khoj/routers/helpers.py +74 -61
- khoj/routers/subscription.py +18 -4
- khoj/search_type/text_search.py +7 -2
- khoj/utils/helpers.py +56 -13
- khoj/utils/initialization.py +0 -3
- {khoj-1.24.2.dev16.dist-info → khoj-1.25.1.dev34.dist-info}/METADATA +19 -14
- {khoj-1.24.2.dev16.dist-info → khoj-1.25.1.dev34.dist-info}/RECORD +71 -68
- khoj/interface/compiled/_next/static/chunks/1269-2e52d48e7d0e5c61.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1603-67a89278e2c5dbe6.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/page-df26b497b7356151.js +0 -1
- khoj/interface/compiled/_next/static/css/1538cedb321e3a97.css +0 -1
- khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +0 -1
- khoj/interface/compiled/_next/static/css/50d972a8c787730b.css +0 -25
- khoj/interface/compiled/_next/static/css/dfb67a9287720a2b.css +0 -1
- /khoj/interface/compiled/_next/static/{MyYNlmGMz32TGV_-febR4 → Jid9q6Qg851ioDaaO_fth}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{MyYNlmGMz32TGV_-febR4 → Jid9q6Qg851ioDaaO_fth}/_ssgManifest.js +0 -0
- {khoj-1.24.2.dev16.dist-info → khoj-1.25.1.dev34.dist-info}/WHEEL +0 -0
- {khoj-1.24.2.dev16.dist-info → khoj-1.25.1.dev34.dist-info}/entry_points.txt +0 -0
- {khoj-1.24.2.dev16.dist-info → khoj-1.25.1.dev34.dist-info}/licenses/LICENSE +0 -0
khoj/routers/api_model.py
CHANGED
@@ -40,7 +40,7 @@ def get_user_chat_model(
|
|
40
40
|
chat_model = ConversationAdapters.get_conversation_config(user)
|
41
41
|
|
42
42
|
if chat_model is None:
|
43
|
-
chat_model = ConversationAdapters.get_default_conversation_config()
|
43
|
+
chat_model = ConversationAdapters.get_default_conversation_config(user)
|
44
44
|
|
45
45
|
return Response(status_code=200, content=json.dumps({"id": chat_model.id, "chat_model": chat_model.chat_model}))
|
46
46
|
|
khoj/routers/auth.py
CHANGED
@@ -80,11 +80,19 @@ async def login_magic_link(request: Request, form: MagicLinkForm):
|
|
80
80
|
request.session.pop("user", None)
|
81
81
|
|
82
82
|
email = form.email
|
83
|
-
user = await aget_or_create_user_by_email(email)
|
83
|
+
user, is_new = await aget_or_create_user_by_email(email)
|
84
84
|
unique_id = user.email_verification_code
|
85
85
|
|
86
86
|
if user:
|
87
87
|
await send_magic_link_email(email, unique_id, request.base_url)
|
88
|
+
if is_new:
|
89
|
+
update_telemetry_state(
|
90
|
+
request=request,
|
91
|
+
telemetry_type="api",
|
92
|
+
api="create_user",
|
93
|
+
metadata={"user_id": str(user.uuid)},
|
94
|
+
)
|
95
|
+
logger.log(logging.INFO, f"🥳 New User Created: {user.uuid}")
|
88
96
|
|
89
97
|
return Response(status_code=200)
|
90
98
|
|
khoj/routers/helpers.py
CHANGED
@@ -39,6 +39,7 @@ from khoj.database.adapters import (
|
|
39
39
|
AutomationAdapters,
|
40
40
|
ConversationAdapters,
|
41
41
|
EntryAdapters,
|
42
|
+
ais_user_subscribed,
|
42
43
|
create_khoj_token,
|
43
44
|
get_khoj_tokens,
|
44
45
|
get_user_name,
|
@@ -119,20 +120,20 @@ def is_query_empty(query: str) -> bool:
|
|
119
120
|
return is_none_or_empty(query.strip())
|
120
121
|
|
121
122
|
|
122
|
-
def validate_conversation_config():
|
123
|
-
default_config = ConversationAdapters.get_default_conversation_config()
|
123
|
+
def validate_conversation_config(user: KhojUser):
|
124
|
+
default_config = ConversationAdapters.get_default_conversation_config(user)
|
124
125
|
|
125
126
|
if default_config is None:
|
126
|
-
raise HTTPException(status_code=500, detail="Contact the server administrator to
|
127
|
+
raise HTTPException(status_code=500, detail="Contact the server administrator to add a chat model.")
|
127
128
|
|
128
129
|
if default_config.model_type == "openai" and not default_config.openai_config:
|
129
|
-
raise HTTPException(status_code=500, detail="Contact the server administrator to
|
130
|
+
raise HTTPException(status_code=500, detail="Contact the server administrator to add a chat model.")
|
130
131
|
|
131
132
|
|
132
133
|
async def is_ready_to_chat(user: KhojUser):
|
133
|
-
user_conversation_config =
|
134
|
-
|
135
|
-
|
134
|
+
user_conversation_config = await ConversationAdapters.aget_user_conversation_config(user)
|
135
|
+
if user_conversation_config == None:
|
136
|
+
user_conversation_config = await ConversationAdapters.aget_default_conversation_config()
|
136
137
|
|
137
138
|
if user_conversation_config and user_conversation_config.model_type == ChatModelOptions.ModelType.OFFLINE:
|
138
139
|
chat_model = user_conversation_config.chat_model
|
@@ -208,7 +209,7 @@ def get_next_url(request: Request) -> str:
|
|
208
209
|
def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="AI") -> str:
|
209
210
|
chat_history = ""
|
210
211
|
for chat in conversation_history.get("chat", [])[-n:]:
|
211
|
-
if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder"]:
|
212
|
+
if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder", "summarize"]:
|
212
213
|
chat_history += f"User: {chat['intent']['query']}\n"
|
213
214
|
chat_history += f"{agent_name}: {chat['message']}\n"
|
214
215
|
elif chat["by"] == "khoj" and ("text-to-image" in chat["intent"].get("type")):
|
@@ -246,19 +247,19 @@ async def agenerate_chat_response(*args):
|
|
246
247
|
return await loop.run_in_executor(executor, generate_chat_response, *args)
|
247
248
|
|
248
249
|
|
249
|
-
async def acreate_title_from_query(query: str) -> str:
|
250
|
+
async def acreate_title_from_query(query: str, user: KhojUser = None) -> str:
|
250
251
|
"""
|
251
252
|
Create a title from the given query
|
252
253
|
"""
|
253
254
|
title_generation_prompt = prompts.subject_generation.format(query=query)
|
254
255
|
|
255
256
|
with timer("Chat actor: Generate title from query", logger):
|
256
|
-
response = await send_message_to_model_wrapper(title_generation_prompt)
|
257
|
+
response = await send_message_to_model_wrapper(title_generation_prompt, user=user)
|
257
258
|
|
258
259
|
return response.strip()
|
259
260
|
|
260
261
|
|
261
|
-
async def acheck_if_safe_prompt(system_prompt: str) -> Tuple[bool, str]:
|
262
|
+
async def acheck_if_safe_prompt(system_prompt: str, user: KhojUser = None) -> Tuple[bool, str]:
|
262
263
|
"""
|
263
264
|
Check if the system prompt is safe to use
|
264
265
|
"""
|
@@ -267,7 +268,7 @@ async def acheck_if_safe_prompt(system_prompt: str) -> Tuple[bool, str]:
|
|
267
268
|
reason = ""
|
268
269
|
|
269
270
|
with timer("Chat actor: Check if safe prompt", logger):
|
270
|
-
response = await send_message_to_model_wrapper(safe_prompt_check)
|
271
|
+
response = await send_message_to_model_wrapper(safe_prompt_check, user=user)
|
271
272
|
|
272
273
|
response = response.strip()
|
273
274
|
try:
|
@@ -288,7 +289,7 @@ async def aget_relevant_information_sources(
|
|
288
289
|
query: str,
|
289
290
|
conversation_history: dict,
|
290
291
|
is_task: bool,
|
291
|
-
|
292
|
+
user: KhojUser,
|
292
293
|
uploaded_image_url: str = None,
|
293
294
|
agent: Agent = None,
|
294
295
|
):
|
@@ -326,7 +327,7 @@ async def aget_relevant_information_sources(
|
|
326
327
|
response = await send_message_to_model_wrapper(
|
327
328
|
relevant_tools_prompt,
|
328
329
|
response_type="json_object",
|
329
|
-
|
330
|
+
user=user,
|
330
331
|
)
|
331
332
|
|
332
333
|
try:
|
@@ -348,15 +349,26 @@ async def aget_relevant_information_sources(
|
|
348
349
|
final_response.append(ConversationCommand(llm_suggested_tool))
|
349
350
|
|
350
351
|
if is_none_or_empty(final_response):
|
351
|
-
|
352
|
-
|
353
|
-
|
352
|
+
if len(agent_tools) == 0:
|
353
|
+
final_response = [ConversationCommand.Default]
|
354
|
+
else:
|
355
|
+
final_response = [ConversationCommand.General]
|
356
|
+
except Exception:
|
354
357
|
logger.error(f"Invalid response for determining relevant tools: {response}")
|
355
|
-
|
358
|
+
if len(agent_tools) == 0:
|
359
|
+
final_response = [ConversationCommand.Default]
|
360
|
+
else:
|
361
|
+
final_response = agent_tools
|
362
|
+
return final_response
|
356
363
|
|
357
364
|
|
358
365
|
async def aget_relevant_output_modes(
|
359
|
-
query: str,
|
366
|
+
query: str,
|
367
|
+
conversation_history: dict,
|
368
|
+
is_task: bool = False,
|
369
|
+
user: KhojUser = None,
|
370
|
+
uploaded_image_url: str = None,
|
371
|
+
agent: Agent = None,
|
360
372
|
):
|
361
373
|
"""
|
362
374
|
Given a query, determine which of the available tools the agent should use in order to answer appropriately.
|
@@ -392,7 +404,7 @@ async def aget_relevant_output_modes(
|
|
392
404
|
)
|
393
405
|
|
394
406
|
with timer("Chat actor: Infer output mode for chat response", logger):
|
395
|
-
response = await send_message_to_model_wrapper(relevant_mode_prompt, response_type="json_object")
|
407
|
+
response = await send_message_to_model_wrapper(relevant_mode_prompt, response_type="json_object", user=user)
|
396
408
|
|
397
409
|
try:
|
398
410
|
response = response.strip()
|
@@ -447,7 +459,7 @@ async def infer_webpage_urls(
|
|
447
459
|
|
448
460
|
with timer("Chat actor: Infer webpage urls to read", logger):
|
449
461
|
response = await send_message_to_model_wrapper(
|
450
|
-
online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
|
462
|
+
online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object", user=user
|
451
463
|
)
|
452
464
|
|
453
465
|
# Validate that the response is a non-empty, JSON-serializable list of URLs
|
@@ -493,7 +505,7 @@ async def generate_online_subqueries(
|
|
493
505
|
|
494
506
|
with timer("Chat actor: Generate online search subqueries", logger):
|
495
507
|
response = await send_message_to_model_wrapper(
|
496
|
-
online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
|
508
|
+
online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object", user=user
|
497
509
|
)
|
498
510
|
|
499
511
|
# Validate that the response is a non-empty, JSON-serializable list
|
@@ -511,7 +523,9 @@ async def generate_online_subqueries(
|
|
511
523
|
return [q]
|
512
524
|
|
513
525
|
|
514
|
-
async def schedule_query(
|
526
|
+
async def schedule_query(
|
527
|
+
q: str, conversation_history: dict, user: KhojUser, uploaded_image_url: str = None
|
528
|
+
) -> Tuple[str, ...]:
|
515
529
|
"""
|
516
530
|
Schedule the date, time to run the query. Assume the server timezone is UTC.
|
517
531
|
"""
|
@@ -523,7 +537,7 @@ async def schedule_query(q: str, conversation_history: dict, uploaded_image_url:
|
|
523
537
|
)
|
524
538
|
|
525
539
|
raw_response = await send_message_to_model_wrapper(
|
526
|
-
crontime_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
|
540
|
+
crontime_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object", user=user
|
527
541
|
)
|
528
542
|
|
529
543
|
# Validate that the response is a non-empty, JSON-serializable list
|
@@ -537,12 +551,14 @@ async def schedule_query(q: str, conversation_history: dict, uploaded_image_url:
|
|
537
551
|
raise AssertionError(f"Invalid response for scheduling query: {raw_response}")
|
538
552
|
|
539
553
|
|
540
|
-
async def extract_relevant_info(
|
554
|
+
async def extract_relevant_info(
|
555
|
+
qs: set[str], corpus: str, user: KhojUser = None, agent: Agent = None
|
556
|
+
) -> Union[str, None]:
|
541
557
|
"""
|
542
558
|
Extract relevant information for a given query from the target corpus
|
543
559
|
"""
|
544
560
|
|
545
|
-
if is_none_or_empty(corpus) or is_none_or_empty(
|
561
|
+
if is_none_or_empty(corpus) or is_none_or_empty(qs):
|
546
562
|
return None
|
547
563
|
|
548
564
|
personality_context = (
|
@@ -550,25 +566,26 @@ async def extract_relevant_info(q: str, corpus: str, subscribed: bool, agent: Ag
|
|
550
566
|
)
|
551
567
|
|
552
568
|
extract_relevant_information = prompts.extract_relevant_information.format(
|
553
|
-
query=
|
569
|
+
query=", ".join(qs),
|
554
570
|
corpus=corpus.strip(),
|
555
571
|
personality_context=personality_context,
|
556
572
|
)
|
557
573
|
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
prompts.system_prompt_extract_relevant_information,
|
564
|
-
chat_model_option=chat_model,
|
565
|
-
subscribed=subscribed,
|
566
|
-
)
|
574
|
+
response = await send_message_to_model_wrapper(
|
575
|
+
extract_relevant_information,
|
576
|
+
prompts.system_prompt_extract_relevant_information,
|
577
|
+
user=user,
|
578
|
+
)
|
567
579
|
return response.strip()
|
568
580
|
|
569
581
|
|
570
582
|
async def extract_relevant_summary(
|
571
|
-
q: str,
|
583
|
+
q: str,
|
584
|
+
corpus: str,
|
585
|
+
conversation_history: dict,
|
586
|
+
uploaded_image_url: str = None,
|
587
|
+
user: KhojUser = None,
|
588
|
+
agent: Agent = None,
|
572
589
|
) -> Union[str, None]:
|
573
590
|
"""
|
574
591
|
Extract relevant information for a given query from the target corpus
|
@@ -581,20 +598,20 @@ async def extract_relevant_summary(
|
|
581
598
|
prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
|
582
599
|
)
|
583
600
|
|
601
|
+
chat_history = construct_chat_history(conversation_history)
|
602
|
+
|
584
603
|
extract_relevant_information = prompts.extract_relevant_summary.format(
|
585
604
|
query=q,
|
605
|
+
chat_history=chat_history,
|
586
606
|
corpus=corpus.strip(),
|
587
607
|
personality_context=personality_context,
|
588
608
|
)
|
589
609
|
|
590
|
-
chat_model: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config()
|
591
|
-
|
592
610
|
with timer("Chat actor: Extract relevant information from data", logger):
|
593
611
|
response = await send_message_to_model_wrapper(
|
594
612
|
extract_relevant_information,
|
595
613
|
prompts.system_prompt_extract_relevant_summary,
|
596
|
-
|
597
|
-
subscribed=subscribed,
|
614
|
+
user=user,
|
598
615
|
uploaded_image_url=uploaded_image_url,
|
599
616
|
)
|
600
617
|
return response.strip()
|
@@ -607,8 +624,8 @@ async def generate_better_image_prompt(
|
|
607
624
|
note_references: List[Dict[str, Any]],
|
608
625
|
online_results: Optional[dict] = None,
|
609
626
|
model_type: Optional[str] = None,
|
610
|
-
subscribed: bool = False,
|
611
627
|
uploaded_image_url: Optional[str] = None,
|
628
|
+
user: KhojUser = None,
|
612
629
|
agent: Agent = None,
|
613
630
|
) -> str:
|
614
631
|
"""
|
@@ -658,12 +675,8 @@ async def generate_better_image_prompt(
|
|
658
675
|
personality_context=personality_context,
|
659
676
|
)
|
660
677
|
|
661
|
-
chat_model: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config()
|
662
|
-
|
663
678
|
with timer("Chat actor: Generate contextual image prompt", logger):
|
664
|
-
response = await send_message_to_model_wrapper(
|
665
|
-
image_prompt, chat_model_option=chat_model, subscribed=subscribed, uploaded_image_url=uploaded_image_url
|
666
|
-
)
|
679
|
+
response = await send_message_to_model_wrapper(image_prompt, uploaded_image_url=uploaded_image_url, user=user)
|
667
680
|
response = response.strip()
|
668
681
|
if response.startswith(('"', "'")) and response.endswith(('"', "'")):
|
669
682
|
response = response[1:-1]
|
@@ -675,14 +688,10 @@ async def send_message_to_model_wrapper(
|
|
675
688
|
message: str,
|
676
689
|
system_message: str = "",
|
677
690
|
response_type: str = "text",
|
678
|
-
|
679
|
-
subscribed: bool = False,
|
691
|
+
user: KhojUser = None,
|
680
692
|
uploaded_image_url: str = None,
|
681
693
|
):
|
682
|
-
conversation_config: ChatModelOptions = (
|
683
|
-
chat_model_option or await ConversationAdapters.aget_default_conversation_config()
|
684
|
-
)
|
685
|
-
|
694
|
+
conversation_config: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config(user)
|
686
695
|
vision_available = conversation_config.vision_enabled
|
687
696
|
if not vision_available and uploaded_image_url:
|
688
697
|
vision_enabled_config = await ConversationAdapters.aget_vision_enabled_config()
|
@@ -690,6 +699,7 @@ async def send_message_to_model_wrapper(
|
|
690
699
|
conversation_config = vision_enabled_config
|
691
700
|
vision_available = True
|
692
701
|
|
702
|
+
subscribed = await ais_user_subscribed(user)
|
693
703
|
chat_model = conversation_config.chat_model
|
694
704
|
max_tokens = (
|
695
705
|
conversation_config.subscribed_max_prompt_size
|
@@ -788,8 +798,9 @@ def send_message_to_model_wrapper_sync(
|
|
788
798
|
message: str,
|
789
799
|
system_message: str = "",
|
790
800
|
response_type: str = "text",
|
801
|
+
user: KhojUser = None,
|
791
802
|
):
|
792
|
-
conversation_config: ChatModelOptions = ConversationAdapters.get_default_conversation_config()
|
803
|
+
conversation_config: ChatModelOptions = ConversationAdapters.get_default_conversation_config(user)
|
793
804
|
|
794
805
|
if conversation_config is None:
|
795
806
|
raise HTTPException(status_code=500, detail="Contact the server administrator to set a default chat model.")
|
@@ -1168,7 +1179,7 @@ class CommonQueryParamsClass:
|
|
1168
1179
|
CommonQueryParams = Annotated[CommonQueryParamsClass, Depends()]
|
1169
1180
|
|
1170
1181
|
|
1171
|
-
def should_notify(original_query: str, executed_query: str, ai_response: str) -> bool:
|
1182
|
+
def should_notify(original_query: str, executed_query: str, ai_response: str, user: KhojUser) -> bool:
|
1172
1183
|
"""
|
1173
1184
|
Decide whether to notify the user of the AI response.
|
1174
1185
|
Default to notifying the user for now.
|
@@ -1185,7 +1196,7 @@ def should_notify(original_query: str, executed_query: str, ai_response: str) ->
|
|
1185
1196
|
with timer("Chat actor: Decide to notify user of automation response", logger):
|
1186
1197
|
try:
|
1187
1198
|
# TODO Replace with async call so we don't have to maintain a sync version
|
1188
|
-
response = send_message_to_model_wrapper_sync(to_notify_or_not)
|
1199
|
+
response = send_message_to_model_wrapper_sync(to_notify_or_not, user)
|
1189
1200
|
should_notify_result = "no" not in response.lower()
|
1190
1201
|
logger.info(f'Decided to {"not " if not should_notify_result else ""}notify user of automation response.')
|
1191
1202
|
return should_notify_result
|
@@ -1277,7 +1288,9 @@ def scheduled_chat(
|
|
1277
1288
|
ai_response = raw_response.text
|
1278
1289
|
|
1279
1290
|
# Notify user if the AI response is satisfactory
|
1280
|
-
if should_notify(
|
1291
|
+
if should_notify(
|
1292
|
+
original_query=scheduling_request, executed_query=cleaned_query, ai_response=ai_response, user=user
|
1293
|
+
):
|
1281
1294
|
if is_resend_enabled():
|
1282
1295
|
send_task_email(user.get_short_name(), user.email, cleaned_query, ai_response, subject, is_image)
|
1283
1296
|
else:
|
@@ -1287,7 +1300,7 @@ def scheduled_chat(
|
|
1287
1300
|
async def create_automation(
|
1288
1301
|
q: str, timezone: str, user: KhojUser, calling_url: URL, meta_log: dict = {}, conversation_id: str = None
|
1289
1302
|
):
|
1290
|
-
crontime, query_to_run, subject = await schedule_query(q, meta_log)
|
1303
|
+
crontime, query_to_run, subject = await schedule_query(q, meta_log, user)
|
1291
1304
|
job = await schedule_automation(query_to_run, subject, crontime, timezone, q, user, calling_url, conversation_id)
|
1292
1305
|
return job, crontime, query_to_run, subject
|
1293
1306
|
|
@@ -1481,9 +1494,9 @@ def get_user_config(user: KhojUser, request: Request, is_detailed: bool = False)
|
|
1481
1494
|
current_notion_config = get_user_notion_config(user)
|
1482
1495
|
notion_token = current_notion_config.token if current_notion_config else ""
|
1483
1496
|
|
1484
|
-
selected_chat_model_config = (
|
1485
|
-
|
1486
|
-
)
|
1497
|
+
selected_chat_model_config = ConversationAdapters.get_conversation_config(
|
1498
|
+
user
|
1499
|
+
) or ConversationAdapters.get_default_conversation_config(user)
|
1487
1500
|
chat_models = ConversationAdapters.get_conversation_processor_options().all()
|
1488
1501
|
chat_model_options = list()
|
1489
1502
|
for chat_model in chat_models:
|
khoj/routers/subscription.py
CHANGED
@@ -7,6 +7,7 @@ from fastapi import APIRouter, Request
|
|
7
7
|
from starlette.authentication import requires
|
8
8
|
|
9
9
|
from khoj.database import adapters
|
10
|
+
from khoj.routers.helpers import update_telemetry_state
|
10
11
|
from khoj.utils import state
|
11
12
|
|
12
13
|
# Stripe integration for Khoj Cloud Subscription
|
@@ -48,6 +49,8 @@ async def subscribe(request: Request):
|
|
48
49
|
customer_id = subscription["customer"]
|
49
50
|
customer = stripe.Customer.retrieve(customer_id)
|
50
51
|
customer_email = customer["email"]
|
52
|
+
user = None
|
53
|
+
is_new = False
|
51
54
|
|
52
55
|
# Handle valid stripe webhook events
|
53
56
|
success = True
|
@@ -55,7 +58,9 @@ async def subscribe(request: Request):
|
|
55
58
|
# Mark the user as subscribed and update the next renewal date on payment
|
56
59
|
subscription = stripe.Subscription.list(customer=customer_id).data[0]
|
57
60
|
renewal_date = datetime.fromtimestamp(subscription["current_period_end"], tz=timezone.utc)
|
58
|
-
user = await adapters.set_user_subscription(
|
61
|
+
user, is_new = await adapters.set_user_subscription(
|
62
|
+
customer_email, is_recurring=True, renewal_date=renewal_date
|
63
|
+
)
|
59
64
|
success = user is not None
|
60
65
|
elif event_type in {"customer.subscription.updated"}:
|
61
66
|
user_subscription = await sync_to_async(adapters.get_user_subscription)(customer_email)
|
@@ -63,15 +68,24 @@ async def subscribe(request: Request):
|
|
63
68
|
if user_subscription and user_subscription.renewal_date:
|
64
69
|
# Mark user as unsubscribed or resubscribed
|
65
70
|
is_recurring = not subscription["cancel_at_period_end"]
|
66
|
-
|
67
|
-
success =
|
71
|
+
user, is_new = await adapters.set_user_subscription(customer_email, is_recurring=is_recurring)
|
72
|
+
success = user is not None
|
68
73
|
elif event_type in {"customer.subscription.deleted"}:
|
69
74
|
# Reset the user to trial state
|
70
|
-
user = await adapters.set_user_subscription(
|
75
|
+
user, is_new = await adapters.set_user_subscription(
|
71
76
|
customer_email, is_recurring=False, renewal_date=False, type="trial"
|
72
77
|
)
|
73
78
|
success = user is not None
|
74
79
|
|
80
|
+
if user and is_new:
|
81
|
+
update_telemetry_state(
|
82
|
+
request=request,
|
83
|
+
telemetry_type="api",
|
84
|
+
api="create_user",
|
85
|
+
metadata={"user_id": str(user.user.uuid)},
|
86
|
+
)
|
87
|
+
logger.log(logging.INFO, f"🥳 New User Created: {user.user.uuid}")
|
88
|
+
|
75
89
|
logger.info(f'Stripe subscription {event["type"]} for {customer_email}')
|
76
90
|
return {"success": success}
|
77
91
|
|
khoj/search_type/text_search.py
CHANGED
@@ -3,6 +3,7 @@ import math
|
|
3
3
|
from pathlib import Path
|
4
4
|
from typing import List, Optional, Tuple, Type, Union
|
5
5
|
|
6
|
+
import requests
|
6
7
|
import torch
|
7
8
|
from asgiref.sync import sync_to_async
|
8
9
|
from sentence_transformers import util
|
@@ -231,8 +232,12 @@ def setup(
|
|
231
232
|
|
232
233
|
def cross_encoder_score(query: str, hits: List[SearchResponse], search_model_name: str) -> List[SearchResponse]:
|
233
234
|
"""Score all retrieved entries using the cross-encoder"""
|
234
|
-
|
235
|
-
|
235
|
+
try:
|
236
|
+
with timer("Cross-Encoder Predict Time", logger, state.device):
|
237
|
+
cross_scores = state.cross_encoder_model[search_model_name].predict(query, hits)
|
238
|
+
except requests.exceptions.HTTPError as e:
|
239
|
+
logger.error(f"Failed to rerank documents using the inference endpoint. Error: {e}.", exc_info=True)
|
240
|
+
cross_scores = [0.0] * len(hits)
|
236
241
|
|
237
242
|
# Convert cross-encoder scores to distances and pass in hits for reranking
|
238
243
|
for idx in range(len(cross_scores)):
|
khoj/utils/helpers.py
CHANGED
@@ -2,10 +2,12 @@ from __future__ import annotations # to avoid quoting type hints
|
|
2
2
|
|
3
3
|
import datetime
|
4
4
|
import io
|
5
|
+
import ipaddress
|
5
6
|
import logging
|
6
7
|
import os
|
7
8
|
import platform
|
8
9
|
import random
|
10
|
+
import urllib.parse
|
9
11
|
import uuid
|
10
12
|
from collections import OrderedDict
|
11
13
|
from enum import Enum
|
@@ -164,9 +166,9 @@ def get_class_by_name(name: str) -> object:
|
|
164
166
|
class timer:
|
165
167
|
"""Context manager to log time taken for a block of code to run"""
|
166
168
|
|
167
|
-
def __init__(self, message: str, logger: logging.Logger, device: torch.device = None):
|
169
|
+
def __init__(self, message: str, logger: logging.Logger, device: torch.device = None, log_level=logging.DEBUG):
|
168
170
|
self.message = message
|
169
|
-
self.logger = logger
|
171
|
+
self.logger = logger.debug if log_level == logging.DEBUG else logger.info
|
170
172
|
self.device = device
|
171
173
|
|
172
174
|
def __enter__(self):
|
@@ -176,9 +178,9 @@ class timer:
|
|
176
178
|
def __exit__(self, *_):
|
177
179
|
elapsed = perf_counter() - self.start
|
178
180
|
if self.device is None:
|
179
|
-
self.logger
|
181
|
+
self.logger(f"{self.message}: {elapsed:.3f} seconds")
|
180
182
|
else:
|
181
|
-
self.logger
|
183
|
+
self.logger(f"{self.message}: {elapsed:.3f} seconds on device: {self.device}")
|
182
184
|
|
183
185
|
|
184
186
|
class LRU(OrderedDict):
|
@@ -321,7 +323,7 @@ command_descriptions = {
|
|
321
323
|
ConversationCommand.Notes: "Only talk about information that is available in your knowledge base.",
|
322
324
|
ConversationCommand.Default: "The default command when no command specified. It intelligently auto-switches between general and notes mode.",
|
323
325
|
ConversationCommand.Online: "Search for information on the internet.",
|
324
|
-
ConversationCommand.Webpage: "Get information from webpage
|
326
|
+
ConversationCommand.Webpage: "Get information from webpage suggested by you.",
|
325
327
|
ConversationCommand.Image: "Generate images by describing your imagination in words.",
|
326
328
|
ConversationCommand.Automation: "Automatically run your query at a specified time or interval.",
|
327
329
|
ConversationCommand.Help: "Get help with how to use or setup Khoj from the documentation",
|
@@ -329,11 +331,11 @@ command_descriptions = {
|
|
329
331
|
}
|
330
332
|
|
331
333
|
command_descriptions_for_agent = {
|
332
|
-
ConversationCommand.General: "
|
333
|
-
ConversationCommand.Notes: "
|
334
|
-
ConversationCommand.Online: "
|
335
|
-
ConversationCommand.Webpage: "
|
336
|
-
ConversationCommand.Summarize: "
|
334
|
+
ConversationCommand.General: "Agent can use the agents knowledge base and general knowledge.",
|
335
|
+
ConversationCommand.Notes: "Agent can search the users knowledge base for information.",
|
336
|
+
ConversationCommand.Online: "Agent can search the internet for information.",
|
337
|
+
ConversationCommand.Webpage: "Agent can read suggested web pages for information.",
|
338
|
+
ConversationCommand.Summarize: "Agent can read an entire document. Agents knowledge base must be a single document.",
|
337
339
|
}
|
338
340
|
|
339
341
|
tool_descriptions_for_llm = {
|
@@ -347,13 +349,14 @@ tool_descriptions_for_llm = {
|
|
347
349
|
|
348
350
|
mode_descriptions_for_llm = {
|
349
351
|
ConversationCommand.Image: "Use this if the user is requesting you to generate a picture based on their description.",
|
350
|
-
ConversationCommand.Automation: "Use this if the user is requesting a response at a scheduled date
|
352
|
+
ConversationCommand.Automation: "Use this if you are confident the user is requesting a response at a scheduled date, time and frequency",
|
351
353
|
ConversationCommand.Text: "Use this if the other response modes don't seem to fit the query.",
|
352
354
|
}
|
353
355
|
|
354
356
|
mode_descriptions_for_agent = {
|
355
|
-
ConversationCommand.Image: "
|
356
|
-
ConversationCommand.
|
357
|
+
ConversationCommand.Image: "Agent can generate image in response.",
|
358
|
+
ConversationCommand.Automation: "Agent can schedule a task to run at a scheduled date, time and frequency in response.",
|
359
|
+
ConversationCommand.Text: "Agent can generate text in response.",
|
357
360
|
}
|
358
361
|
|
359
362
|
|
@@ -435,6 +438,46 @@ def is_internet_connected():
|
|
435
438
|
return False
|
436
439
|
|
437
440
|
|
441
|
+
def is_internal_url(url: str) -> bool:
|
442
|
+
"""
|
443
|
+
Check if a URL is likely to be internal/non-public.
|
444
|
+
|
445
|
+
Args:
|
446
|
+
url (str): The URL to check.
|
447
|
+
|
448
|
+
Returns:
|
449
|
+
bool: True if the URL is likely internal, False otherwise.
|
450
|
+
"""
|
451
|
+
try:
|
452
|
+
parsed_url = urllib.parse.urlparse(url)
|
453
|
+
hostname = parsed_url.hostname
|
454
|
+
|
455
|
+
# Check for localhost
|
456
|
+
if hostname in ["localhost", "127.0.0.1", "::1"]:
|
457
|
+
return True
|
458
|
+
|
459
|
+
# Check for IP addresses in private ranges
|
460
|
+
try:
|
461
|
+
ip = ipaddress.ip_address(hostname)
|
462
|
+
return ip.is_private
|
463
|
+
except ValueError:
|
464
|
+
pass # Not an IP address, continue with other checks
|
465
|
+
|
466
|
+
# Check for common internal TLDs
|
467
|
+
internal_tlds = [".local", ".internal", ".private", ".corp", ".home", ".lan"]
|
468
|
+
if any(hostname.endswith(tld) for tld in internal_tlds):
|
469
|
+
return True
|
470
|
+
|
471
|
+
# Check for URLs without a TLD
|
472
|
+
if "." not in hostname:
|
473
|
+
return True
|
474
|
+
|
475
|
+
return False
|
476
|
+
except Exception:
|
477
|
+
# If we can't parse the URL or something else goes wrong, assume it's not internal
|
478
|
+
return False
|
479
|
+
|
480
|
+
|
438
481
|
def convert_image_to_webp(image_bytes):
|
439
482
|
"""Convert image bytes to webp format for faster loading"""
|
440
483
|
image_io = io.BytesIO(image_bytes)
|
khoj/utils/initialization.py
CHANGED
@@ -129,9 +129,6 @@ def initialization(interactive: bool = True):
|
|
129
129
|
if user_chat_model_name and ChatModelOptions.objects.filter(chat_model=user_chat_model_name).exists():
|
130
130
|
default_chat_model_name = user_chat_model_name
|
131
131
|
|
132
|
-
# Create a server chat settings object with the default chat model
|
133
|
-
default_chat_model = ChatModelOptions.objects.filter(chat_model=default_chat_model_name).first()
|
134
|
-
ServerChatSettings.objects.create(chat_default=default_chat_model)
|
135
132
|
logger.info("🗣️ Chat model configuration complete")
|
136
133
|
|
137
134
|
# Set up offline speech to text model
|