khoj 1.24.2.dev16__py3-none-any.whl → 1.25.1.dev34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. khoj/configure.py +13 -4
  2. khoj/database/adapters/__init__.py +163 -49
  3. khoj/database/admin.py +18 -1
  4. khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
  5. khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
  6. khoj/database/models/__init__.py +78 -2
  7. khoj/interface/compiled/404/index.html +1 -1
  8. khoj/interface/compiled/_next/static/chunks/1603-fa3ee48860b9dc5c.js +1 -0
  9. khoj/interface/compiled/_next/static/chunks/7762-79f2205740622b5c.js +1 -0
  10. khoj/interface/compiled/_next/static/chunks/app/agents/{layout-e71c8e913cccf792.js → layout-75636ab3a413fa8e.js} +1 -1
  11. khoj/interface/compiled/_next/static/chunks/app/agents/page-fa282831808ee536.js +1 -0
  12. khoj/interface/compiled/_next/static/chunks/app/automations/{page-1688dead2f21270d.js → page-5480731341f34450.js} +1 -1
  13. khoj/interface/compiled/_next/static/chunks/app/chat/{layout-8102549127db3067.js → layout-96fcf62857bf8f30.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/app/chat/{page-91abcb71846922b7.js → page-702057ccbcf27881.js} +1 -1
  15. khoj/interface/compiled/_next/static/chunks/app/factchecker/{page-7ab093711c27041c.js → page-e7b34316ec6f44de.js} +1 -1
  16. khoj/interface/compiled/_next/static/chunks/app/{layout-f3e40d346da53112.js → layout-d0f0a9067427fb20.js} +1 -1
  17. khoj/interface/compiled/_next/static/chunks/app/{page-fada198096eab47f.js → page-10a5aad6e04f3cf8.js} +1 -1
  18. khoj/interface/compiled/_next/static/chunks/app/search/{page-a7e036689b6507ff.js → page-d56541c746fded7d.js} +1 -1
  19. khoj/interface/compiled/_next/static/chunks/app/settings/{layout-6f9314b0d7a26046.js → layout-a8f33dfe92f997fb.js} +1 -1
  20. khoj/interface/compiled/_next/static/chunks/app/settings/{page-fa11cafaec7ab39f.js → page-e044a999468a7c5d.js} +1 -1
  21. khoj/interface/compiled/_next/static/chunks/app/share/chat/{layout-39f03f9e32399f0f.js → layout-2df56074e42adaa0.js} +1 -1
  22. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-c5d2b9076e5390b2.js → page-fbbd66a4d4633438.js} +1 -1
  23. khoj/interface/compiled/_next/static/chunks/{webpack-f52083d548d804fa.js → webpack-c0cd5a6afb1f0798.js} +1 -1
  24. khoj/interface/compiled/_next/static/css/2de69f0be774c768.css +1 -0
  25. khoj/interface/compiled/_next/static/css/3e1f1fdd70775091.css +1 -0
  26. khoj/interface/compiled/_next/static/css/467a524c75e7d7c0.css +1 -0
  27. khoj/interface/compiled/_next/static/css/b9a6bf04305d98d7.css +25 -0
  28. khoj/interface/compiled/agents/index.html +1 -1
  29. khoj/interface/compiled/agents/index.txt +2 -2
  30. khoj/interface/compiled/automations/index.html +1 -1
  31. khoj/interface/compiled/automations/index.txt +2 -2
  32. khoj/interface/compiled/chat/index.html +1 -1
  33. khoj/interface/compiled/chat/index.txt +2 -2
  34. khoj/interface/compiled/factchecker/index.html +1 -1
  35. khoj/interface/compiled/factchecker/index.txt +2 -2
  36. khoj/interface/compiled/index.html +1 -1
  37. khoj/interface/compiled/index.txt +2 -2
  38. khoj/interface/compiled/search/index.html +1 -1
  39. khoj/interface/compiled/search/index.txt +2 -2
  40. khoj/interface/compiled/settings/index.html +1 -1
  41. khoj/interface/compiled/settings/index.txt +3 -3
  42. khoj/interface/compiled/share/chat/index.html +1 -1
  43. khoj/interface/compiled/share/chat/index.txt +2 -2
  44. khoj/interface/web/assets/icons/agents.svg +1 -0
  45. khoj/interface/web/assets/icons/automation.svg +1 -0
  46. khoj/interface/web/assets/icons/chat.svg +24 -0
  47. khoj/interface/web/login.html +11 -22
  48. khoj/processor/conversation/google/gemini_chat.py +4 -19
  49. khoj/processor/conversation/google/utils.py +33 -15
  50. khoj/processor/conversation/prompts.py +14 -3
  51. khoj/processor/conversation/utils.py +3 -7
  52. khoj/processor/embeddings.py +6 -3
  53. khoj/processor/image/generate.py +1 -2
  54. khoj/processor/tools/online_search.py +135 -42
  55. khoj/routers/api.py +1 -1
  56. khoj/routers/api_agents.py +6 -3
  57. khoj/routers/api_chat.py +63 -520
  58. khoj/routers/api_model.py +1 -1
  59. khoj/routers/auth.py +9 -1
  60. khoj/routers/helpers.py +74 -61
  61. khoj/routers/subscription.py +18 -4
  62. khoj/search_type/text_search.py +7 -2
  63. khoj/utils/helpers.py +56 -13
  64. khoj/utils/initialization.py +0 -3
  65. {khoj-1.24.2.dev16.dist-info → khoj-1.25.1.dev34.dist-info}/METADATA +19 -14
  66. {khoj-1.24.2.dev16.dist-info → khoj-1.25.1.dev34.dist-info}/RECORD +71 -68
  67. khoj/interface/compiled/_next/static/chunks/1269-2e52d48e7d0e5c61.js +0 -1
  68. khoj/interface/compiled/_next/static/chunks/1603-67a89278e2c5dbe6.js +0 -1
  69. khoj/interface/compiled/_next/static/chunks/app/agents/page-df26b497b7356151.js +0 -1
  70. khoj/interface/compiled/_next/static/css/1538cedb321e3a97.css +0 -1
  71. khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +0 -1
  72. khoj/interface/compiled/_next/static/css/50d972a8c787730b.css +0 -25
  73. khoj/interface/compiled/_next/static/css/dfb67a9287720a2b.css +0 -1
  74. /khoj/interface/compiled/_next/static/{MyYNlmGMz32TGV_-febR4 → Jid9q6Qg851ioDaaO_fth}/_buildManifest.js +0 -0
  75. /khoj/interface/compiled/_next/static/{MyYNlmGMz32TGV_-febR4 → Jid9q6Qg851ioDaaO_fth}/_ssgManifest.js +0 -0
  76. {khoj-1.24.2.dev16.dist-info → khoj-1.25.1.dev34.dist-info}/WHEEL +0 -0
  77. {khoj-1.24.2.dev16.dist-info → khoj-1.25.1.dev34.dist-info}/entry_points.txt +0 -0
  78. {khoj-1.24.2.dev16.dist-info → khoj-1.25.1.dev34.dist-info}/licenses/LICENSE +0 -0
khoj/routers/api_model.py CHANGED
@@ -40,7 +40,7 @@ def get_user_chat_model(
40
40
  chat_model = ConversationAdapters.get_conversation_config(user)
41
41
 
42
42
  if chat_model is None:
43
- chat_model = ConversationAdapters.get_default_conversation_config()
43
+ chat_model = ConversationAdapters.get_default_conversation_config(user)
44
44
 
45
45
  return Response(status_code=200, content=json.dumps({"id": chat_model.id, "chat_model": chat_model.chat_model}))
46
46
 
khoj/routers/auth.py CHANGED
@@ -80,11 +80,19 @@ async def login_magic_link(request: Request, form: MagicLinkForm):
80
80
  request.session.pop("user", None)
81
81
 
82
82
  email = form.email
83
- user = await aget_or_create_user_by_email(email)
83
+ user, is_new = await aget_or_create_user_by_email(email)
84
84
  unique_id = user.email_verification_code
85
85
 
86
86
  if user:
87
87
  await send_magic_link_email(email, unique_id, request.base_url)
88
+ if is_new:
89
+ update_telemetry_state(
90
+ request=request,
91
+ telemetry_type="api",
92
+ api="create_user",
93
+ metadata={"user_id": str(user.uuid)},
94
+ )
95
+ logger.log(logging.INFO, f"🥳 New User Created: {user.uuid}")
88
96
 
89
97
  return Response(status_code=200)
90
98
 
khoj/routers/helpers.py CHANGED
@@ -39,6 +39,7 @@ from khoj.database.adapters import (
39
39
  AutomationAdapters,
40
40
  ConversationAdapters,
41
41
  EntryAdapters,
42
+ ais_user_subscribed,
42
43
  create_khoj_token,
43
44
  get_khoj_tokens,
44
45
  get_user_name,
@@ -119,20 +120,20 @@ def is_query_empty(query: str) -> bool:
119
120
  return is_none_or_empty(query.strip())
120
121
 
121
122
 
122
- def validate_conversation_config():
123
- default_config = ConversationAdapters.get_default_conversation_config()
123
+ def validate_conversation_config(user: KhojUser):
124
+ default_config = ConversationAdapters.get_default_conversation_config(user)
124
125
 
125
126
  if default_config is None:
126
- raise HTTPException(status_code=500, detail="Contact the server administrator to set a default chat model.")
127
+ raise HTTPException(status_code=500, detail="Contact the server administrator to add a chat model.")
127
128
 
128
129
  if default_config.model_type == "openai" and not default_config.openai_config:
129
- raise HTTPException(status_code=500, detail="Contact the server administrator to set a default chat model.")
130
+ raise HTTPException(status_code=500, detail="Contact the server administrator to add a chat model.")
130
131
 
131
132
 
132
133
  async def is_ready_to_chat(user: KhojUser):
133
- user_conversation_config = (await ConversationAdapters.aget_user_conversation_config(user)) or (
134
- await ConversationAdapters.aget_default_conversation_config()
135
- )
134
+ user_conversation_config = await ConversationAdapters.aget_user_conversation_config(user)
135
+ if user_conversation_config == None:
136
+ user_conversation_config = await ConversationAdapters.aget_default_conversation_config()
136
137
 
137
138
  if user_conversation_config and user_conversation_config.model_type == ChatModelOptions.ModelType.OFFLINE:
138
139
  chat_model = user_conversation_config.chat_model
@@ -208,7 +209,7 @@ def get_next_url(request: Request) -> str:
208
209
  def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="AI") -> str:
209
210
  chat_history = ""
210
211
  for chat in conversation_history.get("chat", [])[-n:]:
211
- if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder"]:
212
+ if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder", "summarize"]:
212
213
  chat_history += f"User: {chat['intent']['query']}\n"
213
214
  chat_history += f"{agent_name}: {chat['message']}\n"
214
215
  elif chat["by"] == "khoj" and ("text-to-image" in chat["intent"].get("type")):
@@ -246,19 +247,19 @@ async def agenerate_chat_response(*args):
246
247
  return await loop.run_in_executor(executor, generate_chat_response, *args)
247
248
 
248
249
 
249
- async def acreate_title_from_query(query: str) -> str:
250
+ async def acreate_title_from_query(query: str, user: KhojUser = None) -> str:
250
251
  """
251
252
  Create a title from the given query
252
253
  """
253
254
  title_generation_prompt = prompts.subject_generation.format(query=query)
254
255
 
255
256
  with timer("Chat actor: Generate title from query", logger):
256
- response = await send_message_to_model_wrapper(title_generation_prompt)
257
+ response = await send_message_to_model_wrapper(title_generation_prompt, user=user)
257
258
 
258
259
  return response.strip()
259
260
 
260
261
 
261
- async def acheck_if_safe_prompt(system_prompt: str) -> Tuple[bool, str]:
262
+ async def acheck_if_safe_prompt(system_prompt: str, user: KhojUser = None) -> Tuple[bool, str]:
262
263
  """
263
264
  Check if the system prompt is safe to use
264
265
  """
@@ -267,7 +268,7 @@ async def acheck_if_safe_prompt(system_prompt: str) -> Tuple[bool, str]:
267
268
  reason = ""
268
269
 
269
270
  with timer("Chat actor: Check if safe prompt", logger):
270
- response = await send_message_to_model_wrapper(safe_prompt_check)
271
+ response = await send_message_to_model_wrapper(safe_prompt_check, user=user)
271
272
 
272
273
  response = response.strip()
273
274
  try:
@@ -288,7 +289,7 @@ async def aget_relevant_information_sources(
288
289
  query: str,
289
290
  conversation_history: dict,
290
291
  is_task: bool,
291
- subscribed: bool,
292
+ user: KhojUser,
292
293
  uploaded_image_url: str = None,
293
294
  agent: Agent = None,
294
295
  ):
@@ -326,7 +327,7 @@ async def aget_relevant_information_sources(
326
327
  response = await send_message_to_model_wrapper(
327
328
  relevant_tools_prompt,
328
329
  response_type="json_object",
329
- subscribed=subscribed,
330
+ user=user,
330
331
  )
331
332
 
332
333
  try:
@@ -348,15 +349,26 @@ async def aget_relevant_information_sources(
348
349
  final_response.append(ConversationCommand(llm_suggested_tool))
349
350
 
350
351
  if is_none_or_empty(final_response):
351
- final_response = [ConversationCommand.Default]
352
- return final_response
353
- except Exception as e:
352
+ if len(agent_tools) == 0:
353
+ final_response = [ConversationCommand.Default]
354
+ else:
355
+ final_response = [ConversationCommand.General]
356
+ except Exception:
354
357
  logger.error(f"Invalid response for determining relevant tools: {response}")
355
- return [ConversationCommand.Default]
358
+ if len(agent_tools) == 0:
359
+ final_response = [ConversationCommand.Default]
360
+ else:
361
+ final_response = agent_tools
362
+ return final_response
356
363
 
357
364
 
358
365
  async def aget_relevant_output_modes(
359
- query: str, conversation_history: dict, is_task: bool = False, uploaded_image_url: str = None, agent: Agent = None
366
+ query: str,
367
+ conversation_history: dict,
368
+ is_task: bool = False,
369
+ user: KhojUser = None,
370
+ uploaded_image_url: str = None,
371
+ agent: Agent = None,
360
372
  ):
361
373
  """
362
374
  Given a query, determine which of the available tools the agent should use in order to answer appropriately.
@@ -392,7 +404,7 @@ async def aget_relevant_output_modes(
392
404
  )
393
405
 
394
406
  with timer("Chat actor: Infer output mode for chat response", logger):
395
- response = await send_message_to_model_wrapper(relevant_mode_prompt, response_type="json_object")
407
+ response = await send_message_to_model_wrapper(relevant_mode_prompt, response_type="json_object", user=user)
396
408
 
397
409
  try:
398
410
  response = response.strip()
@@ -447,7 +459,7 @@ async def infer_webpage_urls(
447
459
 
448
460
  with timer("Chat actor: Infer webpage urls to read", logger):
449
461
  response = await send_message_to_model_wrapper(
450
- online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
462
+ online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object", user=user
451
463
  )
452
464
 
453
465
  # Validate that the response is a non-empty, JSON-serializable list of URLs
@@ -493,7 +505,7 @@ async def generate_online_subqueries(
493
505
 
494
506
  with timer("Chat actor: Generate online search subqueries", logger):
495
507
  response = await send_message_to_model_wrapper(
496
- online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
508
+ online_queries_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object", user=user
497
509
  )
498
510
 
499
511
  # Validate that the response is a non-empty, JSON-serializable list
@@ -511,7 +523,9 @@ async def generate_online_subqueries(
511
523
  return [q]
512
524
 
513
525
 
514
- async def schedule_query(q: str, conversation_history: dict, uploaded_image_url: str = None) -> Tuple[str, ...]:
526
+ async def schedule_query(
527
+ q: str, conversation_history: dict, user: KhojUser, uploaded_image_url: str = None
528
+ ) -> Tuple[str, ...]:
515
529
  """
516
530
  Schedule the date, time to run the query. Assume the server timezone is UTC.
517
531
  """
@@ -523,7 +537,7 @@ async def schedule_query(q: str, conversation_history: dict, uploaded_image_url:
523
537
  )
524
538
 
525
539
  raw_response = await send_message_to_model_wrapper(
526
- crontime_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object"
540
+ crontime_prompt, uploaded_image_url=uploaded_image_url, response_type="json_object", user=user
527
541
  )
528
542
 
529
543
  # Validate that the response is a non-empty, JSON-serializable list
@@ -537,12 +551,14 @@ async def schedule_query(q: str, conversation_history: dict, uploaded_image_url:
537
551
  raise AssertionError(f"Invalid response for scheduling query: {raw_response}")
538
552
 
539
553
 
540
- async def extract_relevant_info(q: str, corpus: str, subscribed: bool, agent: Agent = None) -> Union[str, None]:
554
+ async def extract_relevant_info(
555
+ qs: set[str], corpus: str, user: KhojUser = None, agent: Agent = None
556
+ ) -> Union[str, None]:
541
557
  """
542
558
  Extract relevant information for a given query from the target corpus
543
559
  """
544
560
 
545
- if is_none_or_empty(corpus) or is_none_or_empty(q):
561
+ if is_none_or_empty(corpus) or is_none_or_empty(qs):
546
562
  return None
547
563
 
548
564
  personality_context = (
@@ -550,25 +566,26 @@ async def extract_relevant_info(q: str, corpus: str, subscribed: bool, agent: Ag
550
566
  )
551
567
 
552
568
  extract_relevant_information = prompts.extract_relevant_information.format(
553
- query=q,
569
+ query=", ".join(qs),
554
570
  corpus=corpus.strip(),
555
571
  personality_context=personality_context,
556
572
  )
557
573
 
558
- chat_model: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config()
559
-
560
- with timer("Chat actor: Extract relevant information from data", logger):
561
- response = await send_message_to_model_wrapper(
562
- extract_relevant_information,
563
- prompts.system_prompt_extract_relevant_information,
564
- chat_model_option=chat_model,
565
- subscribed=subscribed,
566
- )
574
+ response = await send_message_to_model_wrapper(
575
+ extract_relevant_information,
576
+ prompts.system_prompt_extract_relevant_information,
577
+ user=user,
578
+ )
567
579
  return response.strip()
568
580
 
569
581
 
570
582
  async def extract_relevant_summary(
571
- q: str, corpus: str, subscribed: bool = False, uploaded_image_url: str = None, agent: Agent = None
583
+ q: str,
584
+ corpus: str,
585
+ conversation_history: dict,
586
+ uploaded_image_url: str = None,
587
+ user: KhojUser = None,
588
+ agent: Agent = None,
572
589
  ) -> Union[str, None]:
573
590
  """
574
591
  Extract relevant information for a given query from the target corpus
@@ -581,20 +598,20 @@ async def extract_relevant_summary(
581
598
  prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
582
599
  )
583
600
 
601
+ chat_history = construct_chat_history(conversation_history)
602
+
584
603
  extract_relevant_information = prompts.extract_relevant_summary.format(
585
604
  query=q,
605
+ chat_history=chat_history,
586
606
  corpus=corpus.strip(),
587
607
  personality_context=personality_context,
588
608
  )
589
609
 
590
- chat_model: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config()
591
-
592
610
  with timer("Chat actor: Extract relevant information from data", logger):
593
611
  response = await send_message_to_model_wrapper(
594
612
  extract_relevant_information,
595
613
  prompts.system_prompt_extract_relevant_summary,
596
- chat_model_option=chat_model,
597
- subscribed=subscribed,
614
+ user=user,
598
615
  uploaded_image_url=uploaded_image_url,
599
616
  )
600
617
  return response.strip()
@@ -607,8 +624,8 @@ async def generate_better_image_prompt(
607
624
  note_references: List[Dict[str, Any]],
608
625
  online_results: Optional[dict] = None,
609
626
  model_type: Optional[str] = None,
610
- subscribed: bool = False,
611
627
  uploaded_image_url: Optional[str] = None,
628
+ user: KhojUser = None,
612
629
  agent: Agent = None,
613
630
  ) -> str:
614
631
  """
@@ -658,12 +675,8 @@ async def generate_better_image_prompt(
658
675
  personality_context=personality_context,
659
676
  )
660
677
 
661
- chat_model: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config()
662
-
663
678
  with timer("Chat actor: Generate contextual image prompt", logger):
664
- response = await send_message_to_model_wrapper(
665
- image_prompt, chat_model_option=chat_model, subscribed=subscribed, uploaded_image_url=uploaded_image_url
666
- )
679
+ response = await send_message_to_model_wrapper(image_prompt, uploaded_image_url=uploaded_image_url, user=user)
667
680
  response = response.strip()
668
681
  if response.startswith(('"', "'")) and response.endswith(('"', "'")):
669
682
  response = response[1:-1]
@@ -675,14 +688,10 @@ async def send_message_to_model_wrapper(
675
688
  message: str,
676
689
  system_message: str = "",
677
690
  response_type: str = "text",
678
- chat_model_option: ChatModelOptions = None,
679
- subscribed: bool = False,
691
+ user: KhojUser = None,
680
692
  uploaded_image_url: str = None,
681
693
  ):
682
- conversation_config: ChatModelOptions = (
683
- chat_model_option or await ConversationAdapters.aget_default_conversation_config()
684
- )
685
-
694
+ conversation_config: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config(user)
686
695
  vision_available = conversation_config.vision_enabled
687
696
  if not vision_available and uploaded_image_url:
688
697
  vision_enabled_config = await ConversationAdapters.aget_vision_enabled_config()
@@ -690,6 +699,7 @@ async def send_message_to_model_wrapper(
690
699
  conversation_config = vision_enabled_config
691
700
  vision_available = True
692
701
 
702
+ subscribed = await ais_user_subscribed(user)
693
703
  chat_model = conversation_config.chat_model
694
704
  max_tokens = (
695
705
  conversation_config.subscribed_max_prompt_size
@@ -788,8 +798,9 @@ def send_message_to_model_wrapper_sync(
788
798
  message: str,
789
799
  system_message: str = "",
790
800
  response_type: str = "text",
801
+ user: KhojUser = None,
791
802
  ):
792
- conversation_config: ChatModelOptions = ConversationAdapters.get_default_conversation_config()
803
+ conversation_config: ChatModelOptions = ConversationAdapters.get_default_conversation_config(user)
793
804
 
794
805
  if conversation_config is None:
795
806
  raise HTTPException(status_code=500, detail="Contact the server administrator to set a default chat model.")
@@ -1168,7 +1179,7 @@ class CommonQueryParamsClass:
1168
1179
  CommonQueryParams = Annotated[CommonQueryParamsClass, Depends()]
1169
1180
 
1170
1181
 
1171
- def should_notify(original_query: str, executed_query: str, ai_response: str) -> bool:
1182
+ def should_notify(original_query: str, executed_query: str, ai_response: str, user: KhojUser) -> bool:
1172
1183
  """
1173
1184
  Decide whether to notify the user of the AI response.
1174
1185
  Default to notifying the user for now.
@@ -1185,7 +1196,7 @@ def should_notify(original_query: str, executed_query: str, ai_response: str) ->
1185
1196
  with timer("Chat actor: Decide to notify user of automation response", logger):
1186
1197
  try:
1187
1198
  # TODO Replace with async call so we don't have to maintain a sync version
1188
- response = send_message_to_model_wrapper_sync(to_notify_or_not)
1199
+ response = send_message_to_model_wrapper_sync(to_notify_or_not, user)
1189
1200
  should_notify_result = "no" not in response.lower()
1190
1201
  logger.info(f'Decided to {"not " if not should_notify_result else ""}notify user of automation response.')
1191
1202
  return should_notify_result
@@ -1277,7 +1288,9 @@ def scheduled_chat(
1277
1288
  ai_response = raw_response.text
1278
1289
 
1279
1290
  # Notify user if the AI response is satisfactory
1280
- if should_notify(original_query=scheduling_request, executed_query=cleaned_query, ai_response=ai_response):
1291
+ if should_notify(
1292
+ original_query=scheduling_request, executed_query=cleaned_query, ai_response=ai_response, user=user
1293
+ ):
1281
1294
  if is_resend_enabled():
1282
1295
  send_task_email(user.get_short_name(), user.email, cleaned_query, ai_response, subject, is_image)
1283
1296
  else:
@@ -1287,7 +1300,7 @@ def scheduled_chat(
1287
1300
  async def create_automation(
1288
1301
  q: str, timezone: str, user: KhojUser, calling_url: URL, meta_log: dict = {}, conversation_id: str = None
1289
1302
  ):
1290
- crontime, query_to_run, subject = await schedule_query(q, meta_log)
1303
+ crontime, query_to_run, subject = await schedule_query(q, meta_log, user)
1291
1304
  job = await schedule_automation(query_to_run, subject, crontime, timezone, q, user, calling_url, conversation_id)
1292
1305
  return job, crontime, query_to_run, subject
1293
1306
 
@@ -1481,9 +1494,9 @@ def get_user_config(user: KhojUser, request: Request, is_detailed: bool = False)
1481
1494
  current_notion_config = get_user_notion_config(user)
1482
1495
  notion_token = current_notion_config.token if current_notion_config else ""
1483
1496
 
1484
- selected_chat_model_config = (
1485
- ConversationAdapters.get_conversation_config(user) or ConversationAdapters.get_default_conversation_config()
1486
- )
1497
+ selected_chat_model_config = ConversationAdapters.get_conversation_config(
1498
+ user
1499
+ ) or ConversationAdapters.get_default_conversation_config(user)
1487
1500
  chat_models = ConversationAdapters.get_conversation_processor_options().all()
1488
1501
  chat_model_options = list()
1489
1502
  for chat_model in chat_models:
@@ -7,6 +7,7 @@ from fastapi import APIRouter, Request
7
7
  from starlette.authentication import requires
8
8
 
9
9
  from khoj.database import adapters
10
+ from khoj.routers.helpers import update_telemetry_state
10
11
  from khoj.utils import state
11
12
 
12
13
  # Stripe integration for Khoj Cloud Subscription
@@ -48,6 +49,8 @@ async def subscribe(request: Request):
48
49
  customer_id = subscription["customer"]
49
50
  customer = stripe.Customer.retrieve(customer_id)
50
51
  customer_email = customer["email"]
52
+ user = None
53
+ is_new = False
51
54
 
52
55
  # Handle valid stripe webhook events
53
56
  success = True
@@ -55,7 +58,9 @@ async def subscribe(request: Request):
55
58
  # Mark the user as subscribed and update the next renewal date on payment
56
59
  subscription = stripe.Subscription.list(customer=customer_id).data[0]
57
60
  renewal_date = datetime.fromtimestamp(subscription["current_period_end"], tz=timezone.utc)
58
- user = await adapters.set_user_subscription(customer_email, is_recurring=True, renewal_date=renewal_date)
61
+ user, is_new = await adapters.set_user_subscription(
62
+ customer_email, is_recurring=True, renewal_date=renewal_date
63
+ )
59
64
  success = user is not None
60
65
  elif event_type in {"customer.subscription.updated"}:
61
66
  user_subscription = await sync_to_async(adapters.get_user_subscription)(customer_email)
@@ -63,15 +68,24 @@ async def subscribe(request: Request):
63
68
  if user_subscription and user_subscription.renewal_date:
64
69
  # Mark user as unsubscribed or resubscribed
65
70
  is_recurring = not subscription["cancel_at_period_end"]
66
- updated_user = await adapters.set_user_subscription(customer_email, is_recurring=is_recurring)
67
- success = updated_user is not None
71
+ user, is_new = await adapters.set_user_subscription(customer_email, is_recurring=is_recurring)
72
+ success = user is not None
68
73
  elif event_type in {"customer.subscription.deleted"}:
69
74
  # Reset the user to trial state
70
- user = await adapters.set_user_subscription(
75
+ user, is_new = await adapters.set_user_subscription(
71
76
  customer_email, is_recurring=False, renewal_date=False, type="trial"
72
77
  )
73
78
  success = user is not None
74
79
 
80
+ if user and is_new:
81
+ update_telemetry_state(
82
+ request=request,
83
+ telemetry_type="api",
84
+ api="create_user",
85
+ metadata={"user_id": str(user.user.uuid)},
86
+ )
87
+ logger.log(logging.INFO, f"🥳 New User Created: {user.user.uuid}")
88
+
75
89
  logger.info(f'Stripe subscription {event["type"]} for {customer_email}')
76
90
  return {"success": success}
77
91
 
@@ -3,6 +3,7 @@ import math
3
3
  from pathlib import Path
4
4
  from typing import List, Optional, Tuple, Type, Union
5
5
 
6
+ import requests
6
7
  import torch
7
8
  from asgiref.sync import sync_to_async
8
9
  from sentence_transformers import util
@@ -231,8 +232,12 @@ def setup(
231
232
 
232
233
  def cross_encoder_score(query: str, hits: List[SearchResponse], search_model_name: str) -> List[SearchResponse]:
233
234
  """Score all retrieved entries using the cross-encoder"""
234
- with timer("Cross-Encoder Predict Time", logger, state.device):
235
- cross_scores = state.cross_encoder_model[search_model_name].predict(query, hits)
235
+ try:
236
+ with timer("Cross-Encoder Predict Time", logger, state.device):
237
+ cross_scores = state.cross_encoder_model[search_model_name].predict(query, hits)
238
+ except requests.exceptions.HTTPError as e:
239
+ logger.error(f"Failed to rerank documents using the inference endpoint. Error: {e}.", exc_info=True)
240
+ cross_scores = [0.0] * len(hits)
236
241
 
237
242
  # Convert cross-encoder scores to distances and pass in hits for reranking
238
243
  for idx in range(len(cross_scores)):
khoj/utils/helpers.py CHANGED
@@ -2,10 +2,12 @@ from __future__ import annotations # to avoid quoting type hints
2
2
 
3
3
  import datetime
4
4
  import io
5
+ import ipaddress
5
6
  import logging
6
7
  import os
7
8
  import platform
8
9
  import random
10
+ import urllib.parse
9
11
  import uuid
10
12
  from collections import OrderedDict
11
13
  from enum import Enum
@@ -164,9 +166,9 @@ def get_class_by_name(name: str) -> object:
164
166
  class timer:
165
167
  """Context manager to log time taken for a block of code to run"""
166
168
 
167
- def __init__(self, message: str, logger: logging.Logger, device: torch.device = None):
169
+ def __init__(self, message: str, logger: logging.Logger, device: torch.device = None, log_level=logging.DEBUG):
168
170
  self.message = message
169
- self.logger = logger
171
+ self.logger = logger.debug if log_level == logging.DEBUG else logger.info
170
172
  self.device = device
171
173
 
172
174
  def __enter__(self):
@@ -176,9 +178,9 @@ class timer:
176
178
  def __exit__(self, *_):
177
179
  elapsed = perf_counter() - self.start
178
180
  if self.device is None:
179
- self.logger.debug(f"{self.message}: {elapsed:.3f} seconds")
181
+ self.logger(f"{self.message}: {elapsed:.3f} seconds")
180
182
  else:
181
- self.logger.debug(f"{self.message}: {elapsed:.3f} seconds on device: {self.device}")
183
+ self.logger(f"{self.message}: {elapsed:.3f} seconds on device: {self.device}")
182
184
 
183
185
 
184
186
  class LRU(OrderedDict):
@@ -321,7 +323,7 @@ command_descriptions = {
321
323
  ConversationCommand.Notes: "Only talk about information that is available in your knowledge base.",
322
324
  ConversationCommand.Default: "The default command when no command specified. It intelligently auto-switches between general and notes mode.",
323
325
  ConversationCommand.Online: "Search for information on the internet.",
324
- ConversationCommand.Webpage: "Get information from webpage links provided by you.",
326
+ ConversationCommand.Webpage: "Get information from webpage suggested by you.",
325
327
  ConversationCommand.Image: "Generate images by describing your imagination in words.",
326
328
  ConversationCommand.Automation: "Automatically run your query at a specified time or interval.",
327
329
  ConversationCommand.Help: "Get help with how to use or setup Khoj from the documentation",
@@ -329,11 +331,11 @@ command_descriptions = {
329
331
  }
330
332
 
331
333
  command_descriptions_for_agent = {
332
- ConversationCommand.General: "Respond without any outside information or personal knowledge.",
333
- ConversationCommand.Notes: "Search through the knowledge base. Required if the agent expects context from the knowledge base.",
334
- ConversationCommand.Online: "Search for the latest, up-to-date information from the internet.",
335
- ConversationCommand.Webpage: "Scrape specific web pages for information.",
336
- ConversationCommand.Summarize: "Retrieve an answer that depends on the entire document or a large text. Knowledge base must be a single document.",
334
+ ConversationCommand.General: "Agent can use the agents knowledge base and general knowledge.",
335
+ ConversationCommand.Notes: "Agent can search the users knowledge base for information.",
336
+ ConversationCommand.Online: "Agent can search the internet for information.",
337
+ ConversationCommand.Webpage: "Agent can read suggested web pages for information.",
338
+ ConversationCommand.Summarize: "Agent can read an entire document. Agents knowledge base must be a single document.",
337
339
  }
338
340
 
339
341
  tool_descriptions_for_llm = {
@@ -347,13 +349,14 @@ tool_descriptions_for_llm = {
347
349
 
348
350
  mode_descriptions_for_llm = {
349
351
  ConversationCommand.Image: "Use this if the user is requesting you to generate a picture based on their description.",
350
- ConversationCommand.Automation: "Use this if the user is requesting a response at a scheduled date or time.",
352
+ ConversationCommand.Automation: "Use this if you are confident the user is requesting a response at a scheduled date, time and frequency",
351
353
  ConversationCommand.Text: "Use this if the other response modes don't seem to fit the query.",
352
354
  }
353
355
 
354
356
  mode_descriptions_for_agent = {
355
- ConversationCommand.Image: "Allow the agent to generate images.",
356
- ConversationCommand.Text: "Allow the agent to generate text.",
357
+ ConversationCommand.Image: "Agent can generate image in response.",
358
+ ConversationCommand.Automation: "Agent can schedule a task to run at a scheduled date, time and frequency in response.",
359
+ ConversationCommand.Text: "Agent can generate text in response.",
357
360
  }
358
361
 
359
362
 
@@ -435,6 +438,46 @@ def is_internet_connected():
435
438
  return False
436
439
 
437
440
 
441
+ def is_internal_url(url: str) -> bool:
442
+ """
443
+ Check if a URL is likely to be internal/non-public.
444
+
445
+ Args:
446
+ url (str): The URL to check.
447
+
448
+ Returns:
449
+ bool: True if the URL is likely internal, False otherwise.
450
+ """
451
+ try:
452
+ parsed_url = urllib.parse.urlparse(url)
453
+ hostname = parsed_url.hostname
454
+
455
+ # Check for localhost
456
+ if hostname in ["localhost", "127.0.0.1", "::1"]:
457
+ return True
458
+
459
+ # Check for IP addresses in private ranges
460
+ try:
461
+ ip = ipaddress.ip_address(hostname)
462
+ return ip.is_private
463
+ except ValueError:
464
+ pass # Not an IP address, continue with other checks
465
+
466
+ # Check for common internal TLDs
467
+ internal_tlds = [".local", ".internal", ".private", ".corp", ".home", ".lan"]
468
+ if any(hostname.endswith(tld) for tld in internal_tlds):
469
+ return True
470
+
471
+ # Check for URLs without a TLD
472
+ if "." not in hostname:
473
+ return True
474
+
475
+ return False
476
+ except Exception:
477
+ # If we can't parse the URL or something else goes wrong, assume it's not internal
478
+ return False
479
+
480
+
438
481
  def convert_image_to_webp(image_bytes):
439
482
  """Convert image bytes to webp format for faster loading"""
440
483
  image_io = io.BytesIO(image_bytes)
@@ -129,9 +129,6 @@ def initialization(interactive: bool = True):
129
129
  if user_chat_model_name and ChatModelOptions.objects.filter(chat_model=user_chat_model_name).exists():
130
130
  default_chat_model_name = user_chat_model_name
131
131
 
132
- # Create a server chat settings object with the default chat model
133
- default_chat_model = ChatModelOptions.objects.filter(chat_model=default_chat_model_name).first()
134
- ServerChatSettings.objects.create(chat_default=default_chat_model)
135
132
  logger.info("🗣️ Chat model configuration complete")
136
133
 
137
134
  # Set up offline speech to text model