khoj 1.24.2.dev16__py3-none-any.whl → 1.25.1.dev34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. khoj/configure.py +13 -4
  2. khoj/database/adapters/__init__.py +163 -49
  3. khoj/database/admin.py +18 -1
  4. khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
  5. khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
  6. khoj/database/models/__init__.py +78 -2
  7. khoj/interface/compiled/404/index.html +1 -1
  8. khoj/interface/compiled/_next/static/chunks/1603-fa3ee48860b9dc5c.js +1 -0
  9. khoj/interface/compiled/_next/static/chunks/7762-79f2205740622b5c.js +1 -0
  10. khoj/interface/compiled/_next/static/chunks/app/agents/{layout-e71c8e913cccf792.js → layout-75636ab3a413fa8e.js} +1 -1
  11. khoj/interface/compiled/_next/static/chunks/app/agents/page-fa282831808ee536.js +1 -0
  12. khoj/interface/compiled/_next/static/chunks/app/automations/{page-1688dead2f21270d.js → page-5480731341f34450.js} +1 -1
  13. khoj/interface/compiled/_next/static/chunks/app/chat/{layout-8102549127db3067.js → layout-96fcf62857bf8f30.js} +1 -1
  14. khoj/interface/compiled/_next/static/chunks/app/chat/{page-91abcb71846922b7.js → page-702057ccbcf27881.js} +1 -1
  15. khoj/interface/compiled/_next/static/chunks/app/factchecker/{page-7ab093711c27041c.js → page-e7b34316ec6f44de.js} +1 -1
  16. khoj/interface/compiled/_next/static/chunks/app/{layout-f3e40d346da53112.js → layout-d0f0a9067427fb20.js} +1 -1
  17. khoj/interface/compiled/_next/static/chunks/app/{page-fada198096eab47f.js → page-10a5aad6e04f3cf8.js} +1 -1
  18. khoj/interface/compiled/_next/static/chunks/app/search/{page-a7e036689b6507ff.js → page-d56541c746fded7d.js} +1 -1
  19. khoj/interface/compiled/_next/static/chunks/app/settings/{layout-6f9314b0d7a26046.js → layout-a8f33dfe92f997fb.js} +1 -1
  20. khoj/interface/compiled/_next/static/chunks/app/settings/{page-fa11cafaec7ab39f.js → page-e044a999468a7c5d.js} +1 -1
  21. khoj/interface/compiled/_next/static/chunks/app/share/chat/{layout-39f03f9e32399f0f.js → layout-2df56074e42adaa0.js} +1 -1
  22. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-c5d2b9076e5390b2.js → page-fbbd66a4d4633438.js} +1 -1
  23. khoj/interface/compiled/_next/static/chunks/{webpack-f52083d548d804fa.js → webpack-c0cd5a6afb1f0798.js} +1 -1
  24. khoj/interface/compiled/_next/static/css/2de69f0be774c768.css +1 -0
  25. khoj/interface/compiled/_next/static/css/3e1f1fdd70775091.css +1 -0
  26. khoj/interface/compiled/_next/static/css/467a524c75e7d7c0.css +1 -0
  27. khoj/interface/compiled/_next/static/css/b9a6bf04305d98d7.css +25 -0
  28. khoj/interface/compiled/agents/index.html +1 -1
  29. khoj/interface/compiled/agents/index.txt +2 -2
  30. khoj/interface/compiled/automations/index.html +1 -1
  31. khoj/interface/compiled/automations/index.txt +2 -2
  32. khoj/interface/compiled/chat/index.html +1 -1
  33. khoj/interface/compiled/chat/index.txt +2 -2
  34. khoj/interface/compiled/factchecker/index.html +1 -1
  35. khoj/interface/compiled/factchecker/index.txt +2 -2
  36. khoj/interface/compiled/index.html +1 -1
  37. khoj/interface/compiled/index.txt +2 -2
  38. khoj/interface/compiled/search/index.html +1 -1
  39. khoj/interface/compiled/search/index.txt +2 -2
  40. khoj/interface/compiled/settings/index.html +1 -1
  41. khoj/interface/compiled/settings/index.txt +3 -3
  42. khoj/interface/compiled/share/chat/index.html +1 -1
  43. khoj/interface/compiled/share/chat/index.txt +2 -2
  44. khoj/interface/web/assets/icons/agents.svg +1 -0
  45. khoj/interface/web/assets/icons/automation.svg +1 -0
  46. khoj/interface/web/assets/icons/chat.svg +24 -0
  47. khoj/interface/web/login.html +11 -22
  48. khoj/processor/conversation/google/gemini_chat.py +4 -19
  49. khoj/processor/conversation/google/utils.py +33 -15
  50. khoj/processor/conversation/prompts.py +14 -3
  51. khoj/processor/conversation/utils.py +3 -7
  52. khoj/processor/embeddings.py +6 -3
  53. khoj/processor/image/generate.py +1 -2
  54. khoj/processor/tools/online_search.py +135 -42
  55. khoj/routers/api.py +1 -1
  56. khoj/routers/api_agents.py +6 -3
  57. khoj/routers/api_chat.py +63 -520
  58. khoj/routers/api_model.py +1 -1
  59. khoj/routers/auth.py +9 -1
  60. khoj/routers/helpers.py +74 -61
  61. khoj/routers/subscription.py +18 -4
  62. khoj/search_type/text_search.py +7 -2
  63. khoj/utils/helpers.py +56 -13
  64. khoj/utils/initialization.py +0 -3
  65. {khoj-1.24.2.dev16.dist-info → khoj-1.25.1.dev34.dist-info}/METADATA +19 -14
  66. {khoj-1.24.2.dev16.dist-info → khoj-1.25.1.dev34.dist-info}/RECORD +71 -68
  67. khoj/interface/compiled/_next/static/chunks/1269-2e52d48e7d0e5c61.js +0 -1
  68. khoj/interface/compiled/_next/static/chunks/1603-67a89278e2c5dbe6.js +0 -1
  69. khoj/interface/compiled/_next/static/chunks/app/agents/page-df26b497b7356151.js +0 -1
  70. khoj/interface/compiled/_next/static/css/1538cedb321e3a97.css +0 -1
  71. khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +0 -1
  72. khoj/interface/compiled/_next/static/css/50d972a8c787730b.css +0 -25
  73. khoj/interface/compiled/_next/static/css/dfb67a9287720a2b.css +0 -1
  74. /khoj/interface/compiled/_next/static/{MyYNlmGMz32TGV_-febR4 → Jid9q6Qg851ioDaaO_fth}/_buildManifest.js +0 -0
  75. /khoj/interface/compiled/_next/static/{MyYNlmGMz32TGV_-febR4 → Jid9q6Qg851ioDaaO_fth}/_ssgManifest.js +0 -0
  76. {khoj-1.24.2.dev16.dist-info → khoj-1.25.1.dev34.dist-info}/WHEEL +0 -0
  77. {khoj-1.24.2.dev16.dist-info → khoj-1.25.1.dev34.dist-info}/entry_points.txt +0 -0
  78. {khoj-1.24.2.dev16.dist-info → khoj-1.25.1.dev34.dist-info}/licenses/LICENSE +0 -0
khoj/configure.py CHANGED
@@ -42,6 +42,7 @@ from khoj.database.adapters import (
42
42
  from khoj.database.models import ClientApplication, KhojUser, ProcessLock, Subscription
43
43
  from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel
44
44
  from khoj.routers.api_content import configure_content, configure_search
45
+ from khoj.routers.helpers import update_telemetry_state
45
46
  from khoj.routers.twilio import is_twilio_enabled
46
47
  from khoj.utils import constants, state
47
48
  from khoj.utils.config import SearchType
@@ -165,7 +166,15 @@ class UserAuthenticationBackend(AuthenticationBackend):
165
166
 
166
167
  create_if_not_exists = request.query_params.get("create_if_not_exists")
167
168
  if create_if_not_exists:
168
- user = await aget_or_create_user_by_phone_number(phone_number)
169
+ user, is_new = await aget_or_create_user_by_phone_number(phone_number)
170
+ if user and is_new:
171
+ update_telemetry_state(
172
+ request=request,
173
+ telemetry_type="api",
174
+ api="create_user",
175
+ metadata={"user_id": str(user.uuid)},
176
+ )
177
+ logger.log(logging.INFO, f"🥳 New User Created: {user.uuid}")
169
178
  else:
170
179
  user = await aget_user_by_phone_number(phone_number)
171
180
 
@@ -244,7 +253,7 @@ def configure_server(
244
253
 
245
254
  state.SearchType = configure_search_types()
246
255
  state.search_models = configure_search(state.search_models, state.config.search_type)
247
- setup_default_agent()
256
+ setup_default_agent(user)
248
257
 
249
258
  message = "📡 Telemetry disabled" if telemetry_disabled(state.config.app) else "📡 Telemetry enabled"
250
259
  logger.info(message)
@@ -256,8 +265,8 @@ def configure_server(
256
265
  raise e
257
266
 
258
267
 
259
- def setup_default_agent():
260
- AgentAdapters.create_default_agent()
268
+ def setup_default_agent(user: KhojUser):
269
+ AgentAdapters.create_default_agent(user)
261
270
 
262
271
 
263
272
  def initialize_content(regenerate: bool, search_type: Optional[SearchType] = None, user: KhojUser = None):
@@ -1,6 +1,7 @@
1
1
  import json
2
2
  import logging
3
3
  import math
4
+ import os
4
5
  import random
5
6
  import re
6
7
  import secrets
@@ -10,7 +11,6 @@ from enum import Enum
10
11
  from typing import Callable, Iterable, List, Optional, Type
11
12
 
12
13
  import cron_descriptor
13
- import django
14
14
  from apscheduler.job import Job
15
15
  from asgiref.sync import sync_to_async
16
16
  from django.contrib.sessions.backends.db import SessionStore
@@ -52,6 +52,7 @@ from khoj.database.models import (
52
52
  UserTextToImageModelConfig,
53
53
  UserVoiceModelConfig,
54
54
  VoiceModelOption,
55
+ WebScraper,
55
56
  )
56
57
  from khoj.processor.conversation import prompts
57
58
  from khoj.search_filter.date_filter import DateFilter
@@ -59,7 +60,12 @@ from khoj.search_filter.file_filter import FileFilter
59
60
  from khoj.search_filter.word_filter import WordFilter
60
61
  from khoj.utils import state
61
62
  from khoj.utils.config import OfflineChatProcessorModel
62
- from khoj.utils.helpers import generate_random_name, is_none_or_empty, timer
63
+ from khoj.utils.helpers import (
64
+ generate_random_name,
65
+ in_debug_mode,
66
+ is_none_or_empty,
67
+ timer,
68
+ )
63
69
 
64
70
  logger = logging.getLogger(__name__)
65
71
 
@@ -113,13 +119,15 @@ async def get_or_create_user(token: dict) -> KhojUser:
113
119
  return user
114
120
 
115
121
 
116
- async def aget_or_create_user_by_phone_number(phone_number: str) -> KhojUser:
122
+ async def aget_or_create_user_by_phone_number(phone_number: str) -> tuple[KhojUser, bool]:
123
+ is_new = False
117
124
  if is_none_or_empty(phone_number):
118
- return None
125
+ return None, is_new
119
126
  user = await aget_user_by_phone_number(phone_number)
120
127
  if not user:
121
128
  user = await acreate_user_by_phone_number(phone_number)
122
- return user
129
+ is_new = True
130
+ return user, is_new
123
131
 
124
132
 
125
133
  async def aset_user_phone_number(user: KhojUser, phone_number: str) -> KhojUser:
@@ -165,8 +173,10 @@ async def acreate_user_by_phone_number(phone_number: str) -> KhojUser:
165
173
  return user
166
174
 
167
175
 
168
- async def aget_or_create_user_by_email(email: str) -> KhojUser:
169
- user, _ = await KhojUser.objects.filter(email=email).aupdate_or_create(defaults={"username": email, "email": email})
176
+ async def aget_or_create_user_by_email(email: str) -> tuple[KhojUser, bool]:
177
+ user, is_new = await KhojUser.objects.filter(email=email).aupdate_or_create(
178
+ defaults={"username": email, "email": email}
179
+ )
170
180
  await user.asave()
171
181
 
172
182
  if user:
@@ -177,7 +187,7 @@ async def aget_or_create_user_by_email(email: str) -> KhojUser:
177
187
  if not user_subscription:
178
188
  await Subscription.objects.acreate(user=user, type="trial")
179
189
 
180
- return user
190
+ return user, is_new
181
191
 
182
192
 
183
193
  async def aget_user_validated_by_email_verification_code(code: str) -> KhojUser:
@@ -248,9 +258,9 @@ def get_user_subscription(email: str) -> Optional[Subscription]:
248
258
 
249
259
  async def set_user_subscription(
250
260
  email: str, is_recurring=None, renewal_date=None, type="standard"
251
- ) -> Optional[Subscription]:
261
+ ) -> tuple[Optional[Subscription], bool]:
252
262
  # Get or create the user object and their subscription
253
- user = await aget_or_create_user_by_email(email)
263
+ user, is_new = await aget_or_create_user_by_email(email)
254
264
  user_subscription = await Subscription.objects.filter(user=user).afirst()
255
265
 
256
266
  # Update the user subscription state
@@ -262,7 +272,7 @@ async def set_user_subscription(
262
272
  elif renewal_date is not None:
263
273
  user_subscription.renewal_date = renewal_date
264
274
  await user_subscription.asave()
265
- return user_subscription
275
+ return user_subscription, is_new
266
276
 
267
277
 
268
278
  def subscription_to_state(subscription: Subscription) -> str:
@@ -556,18 +566,26 @@ class AgentAdapters:
556
566
 
557
567
  @staticmethod
558
568
  async def aget_readonly_agent_by_slug(agent_slug: str, user: KhojUser):
559
- return await Agent.objects.filter(
560
- (Q(slug__iexact=agent_slug.lower()))
561
- & (
562
- Q(privacy_level=Agent.PrivacyLevel.PUBLIC)
563
- | Q(privacy_level=Agent.PrivacyLevel.PROTECTED)
564
- | Q(creator=user)
569
+ return (
570
+ await Agent.objects.filter(
571
+ (Q(slug__iexact=agent_slug.lower()))
572
+ & (
573
+ Q(privacy_level=Agent.PrivacyLevel.PUBLIC)
574
+ | Q(privacy_level=Agent.PrivacyLevel.PROTECTED)
575
+ | Q(creator=user)
576
+ )
565
577
  )
566
- ).afirst()
578
+ .prefetch_related("creator", "chat_model", "fileobject_set")
579
+ .afirst()
580
+ )
567
581
 
568
582
  @staticmethod
569
583
  async def adelete_agent_by_slug(agent_slug: str, user: KhojUser):
570
584
  agent = await AgentAdapters.aget_agent_by_slug(agent_slug, user)
585
+
586
+ async for entry in Entry.objects.filter(agent=agent).aiterator():
587
+ await entry.adelete()
588
+
571
589
  if agent:
572
590
  await agent.adelete()
573
591
  return True
@@ -575,15 +593,23 @@ class AgentAdapters:
575
593
 
576
594
  @staticmethod
577
595
  async def aget_agent_by_slug(agent_slug: str, user: KhojUser):
578
- return await Agent.objects.filter(
579
- (Q(slug__iexact=agent_slug.lower())) & (Q(privacy_level=Agent.PrivacyLevel.PUBLIC) | Q(creator=user))
580
- ).afirst()
596
+ return (
597
+ await Agent.objects.filter(
598
+ (Q(slug__iexact=agent_slug.lower())) & (Q(privacy_level=Agent.PrivacyLevel.PUBLIC) | Q(creator=user))
599
+ )
600
+ .prefetch_related("creator", "chat_model", "fileobject_set")
601
+ .afirst()
602
+ )
581
603
 
582
604
  @staticmethod
583
605
  async def aget_agent_by_name(agent_name: str, user: KhojUser):
584
- return await Agent.objects.filter(
585
- (Q(name__iexact=agent_name.lower())) & (Q(privacy_level=Agent.PrivacyLevel.PUBLIC) | Q(creator=user))
586
- ).afirst()
606
+ return (
607
+ await Agent.objects.filter(
608
+ (Q(name__iexact=agent_name.lower())) & (Q(privacy_level=Agent.PrivacyLevel.PUBLIC) | Q(creator=user))
609
+ )
610
+ .prefetch_related("creator", "chat_model", "fileobject_set")
611
+ .afirst()
612
+ )
587
613
 
588
614
  @staticmethod
589
615
  def get_agent_by_slug(slug: str, user: KhojUser = None):
@@ -627,8 +653,8 @@ class AgentAdapters:
627
653
  return Agent.objects.filter(name=AgentAdapters.DEFAULT_AGENT_NAME).first()
628
654
 
629
655
  @staticmethod
630
- def create_default_agent():
631
- default_conversation_config = ConversationAdapters.get_default_conversation_config()
656
+ def create_default_agent(user: KhojUser):
657
+ default_conversation_config = ConversationAdapters.get_default_conversation_config(user)
632
658
  if default_conversation_config is None:
633
659
  logger.info("No default conversation config found, skipping default agent creation")
634
660
  return None
@@ -676,10 +702,12 @@ class AgentAdapters:
676
702
  files: List[str],
677
703
  input_tools: List[str],
678
704
  output_modes: List[str],
705
+ slug: Optional[str] = None,
679
706
  ):
680
707
  chat_model_option = await ChatModelOptions.objects.filter(chat_model=chat_model).afirst()
681
708
 
682
- agent, created = await Agent.objects.filter(name=name, creator=user).aupdate_or_create(
709
+ # Slug will be None for new agents, which will trigger a new agent creation with a generated, immutable slug
710
+ agent, created = await Agent.objects.filter(slug=slug, creator=user).aupdate_or_create(
683
711
  defaults={
684
712
  "name": name,
685
713
  "creator": user,
@@ -810,7 +838,7 @@ class ConversationAdapters:
810
838
  user: KhojUser, client_application: ClientApplication = None, agent_slug: str = None, title: str = None
811
839
  ):
812
840
  if agent_slug:
813
- agent = await AgentAdapters.aget_agent_by_slug(agent_slug, user)
841
+ agent = await AgentAdapters.aget_readonly_agent_by_slug(agent_slug, user)
814
842
  if agent is None:
815
843
  raise HTTPException(status_code=400, detail="No such agent currently exists.")
816
844
  return await Conversation.objects.acreate(user=user, client=client_application, agent=agent, title=title)
@@ -822,7 +850,7 @@ class ConversationAdapters:
822
850
  user: KhojUser, client_application: ClientApplication = None, agent_slug: str = None, title: str = None
823
851
  ):
824
852
  if agent_slug:
825
- agent = AgentAdapters.get_agent_by_slug(agent_slug, user)
853
+ agent = AgentAdapters.aget_readonly_agent_by_slug(agent_slug, user)
826
854
  if agent is None:
827
855
  raise HTTPException(status_code=400, detail="No such agent currently exists.")
828
856
  return Conversation.objects.create(user=user, client=client_application, agent=agent, title=title)
@@ -917,21 +945,21 @@ class ConversationAdapters:
917
945
  def get_conversation_config(user: KhojUser):
918
946
  subscribed = is_user_subscribed(user)
919
947
  if not subscribed:
920
- return ConversationAdapters.get_default_conversation_config()
948
+ return ConversationAdapters.get_default_conversation_config(user)
921
949
  config = UserConversationConfig.objects.filter(user=user).first()
922
950
  if config:
923
951
  return config.setting
924
- return ConversationAdapters.get_advanced_conversation_config()
952
+ return ConversationAdapters.get_advanced_conversation_config(user)
925
953
 
926
954
  @staticmethod
927
955
  async def aget_conversation_config(user: KhojUser):
928
956
  subscribed = await ais_user_subscribed(user)
929
957
  if not subscribed:
930
- return await ConversationAdapters.aget_default_conversation_config()
958
+ return await ConversationAdapters.aget_default_conversation_config(user)
931
959
  config = await UserConversationConfig.objects.filter(user=user).prefetch_related("setting").afirst()
932
960
  if config:
933
961
  return config.setting
934
- return ConversationAdapters.aget_advanced_conversation_config()
962
+ return ConversationAdapters.aget_advanced_conversation_config(user)
935
963
 
936
964
  @staticmethod
937
965
  async def aget_voice_model_config(user: KhojUser) -> Optional[VoiceModelOption]:
@@ -952,40 +980,126 @@ class ConversationAdapters:
952
980
  return VoiceModelOption.objects.first()
953
981
 
954
982
  @staticmethod
955
- def get_default_conversation_config():
983
+ def get_default_conversation_config(user: KhojUser = None):
984
+ """Get default conversation config. Prefer chat model by server admin > user > first created chat model"""
985
+ # Get the server chat settings
956
986
  server_chat_settings = ServerChatSettings.objects.first()
957
- if server_chat_settings is None or server_chat_settings.chat_default is None:
958
- return ChatModelOptions.objects.filter().first()
959
- return server_chat_settings.chat_default
987
+ if server_chat_settings is not None and server_chat_settings.chat_default is not None:
988
+ return server_chat_settings.chat_default
989
+
990
+ # Get the user's chat settings, if the server chat settings are not set
991
+ user_chat_settings = UserConversationConfig.objects.filter(user=user).first() if user else None
992
+ if user_chat_settings is not None and user_chat_settings.setting is not None:
993
+ return user_chat_settings.setting
994
+
995
+ # Get the first chat model if even the user chat settings are not set
996
+ return ChatModelOptions.objects.filter().first()
960
997
 
961
998
  @staticmethod
962
- async def aget_default_conversation_config():
999
+ async def aget_default_conversation_config(user: KhojUser = None):
1000
+ """Get default conversation config. Prefer chat model by server admin > user > first created chat model"""
1001
+ # Get the server chat settings
963
1002
  server_chat_settings: ServerChatSettings = (
964
1003
  await ServerChatSettings.objects.filter()
965
1004
  .prefetch_related("chat_default", "chat_default__openai_config")
966
1005
  .afirst()
967
1006
  )
968
- if server_chat_settings is None or server_chat_settings.chat_default is None:
969
- return await ChatModelOptions.objects.filter().prefetch_related("openai_config").afirst()
970
- return server_chat_settings.chat_default
1007
+ if server_chat_settings is not None and server_chat_settings.chat_default is not None:
1008
+ return server_chat_settings.chat_default
1009
+
1010
+ # Get the user's chat settings, if the server chat settings are not set
1011
+ user_chat_settings = (
1012
+ (await UserConversationConfig.objects.filter(user=user).prefetch_related("setting__openai_config").afirst())
1013
+ if user
1014
+ else None
1015
+ )
1016
+ if user_chat_settings is not None and user_chat_settings.setting is not None:
1017
+ return user_chat_settings.setting
1018
+
1019
+ # Get the first chat model if even the user chat settings are not set
1020
+ return await ChatModelOptions.objects.filter().prefetch_related("openai_config").afirst()
971
1021
 
972
1022
  @staticmethod
973
- def get_advanced_conversation_config():
1023
+ def get_advanced_conversation_config(user: KhojUser):
974
1024
  server_chat_settings = ServerChatSettings.objects.first()
975
- if server_chat_settings is None or server_chat_settings.chat_advanced is None:
976
- return ConversationAdapters.get_default_conversation_config()
977
- return server_chat_settings.chat_advanced
1025
+ if server_chat_settings is not None and server_chat_settings.chat_advanced is not None:
1026
+ return server_chat_settings.chat_advanced
1027
+ return ConversationAdapters.get_default_conversation_config(user)
978
1028
 
979
1029
  @staticmethod
980
- async def aget_advanced_conversation_config():
1030
+ async def aget_advanced_conversation_config(user: KhojUser = None):
981
1031
  server_chat_settings: ServerChatSettings = (
982
1032
  await ServerChatSettings.objects.filter()
983
1033
  .prefetch_related("chat_advanced", "chat_advanced__openai_config")
984
1034
  .afirst()
985
1035
  )
986
- if server_chat_settings is None or server_chat_settings.chat_advanced is None:
987
- return await ConversationAdapters.aget_default_conversation_config()
988
- return server_chat_settings.chat_advanced
1036
+ if server_chat_settings is not None and server_chat_settings.chat_advanced is not None:
1037
+ return server_chat_settings.chat_advanced
1038
+ return await ConversationAdapters.aget_default_conversation_config(user)
1039
+
1040
+ @staticmethod
1041
+ async def aget_server_webscraper():
1042
+ server_chat_settings = await ServerChatSettings.objects.filter().prefetch_related("web_scraper").afirst()
1043
+ if server_chat_settings is not None and server_chat_settings.web_scraper is not None:
1044
+ return server_chat_settings.web_scraper
1045
+ return None
1046
+
1047
+ @staticmethod
1048
+ async def aget_enabled_webscrapers() -> list[WebScraper]:
1049
+ enabled_scrapers: list[WebScraper] = []
1050
+ server_webscraper = await ConversationAdapters.aget_server_webscraper()
1051
+ if server_webscraper:
1052
+ # Only use the webscraper set in the server chat settings
1053
+ enabled_scrapers = [server_webscraper]
1054
+ if not enabled_scrapers:
1055
+ # Use the enabled web scrapers, ordered by priority, until get web page content
1056
+ enabled_scrapers = [scraper async for scraper in WebScraper.objects.all().order_by("priority").aiterator()]
1057
+ if not enabled_scrapers:
1058
+ # Use scrapers enabled via environment variables
1059
+ if os.getenv("FIRECRAWL_API_KEY"):
1060
+ api_url = os.getenv("FIRECRAWL_API_URL", "https://api.firecrawl.dev")
1061
+ enabled_scrapers.append(
1062
+ WebScraper(
1063
+ type=WebScraper.WebScraperType.FIRECRAWL,
1064
+ name=WebScraper.WebScraperType.FIRECRAWL.capitalize(),
1065
+ api_key=os.getenv("FIRECRAWL_API_KEY"),
1066
+ api_url=api_url,
1067
+ )
1068
+ )
1069
+ if os.getenv("OLOSTEP_API_KEY"):
1070
+ api_url = os.getenv("OLOSTEP_API_URL", "https://agent.olostep.com/olostep-p2p-incomingAPI")
1071
+ enabled_scrapers.append(
1072
+ WebScraper(
1073
+ type=WebScraper.WebScraperType.OLOSTEP,
1074
+ name=WebScraper.WebScraperType.OLOSTEP.capitalize(),
1075
+ api_key=os.getenv("OLOSTEP_API_KEY"),
1076
+ api_url=api_url,
1077
+ )
1078
+ )
1079
+ # Jina is the default fallback scrapers to use as it does not require an API key
1080
+ api_url = os.getenv("JINA_READER_API_URL", "https://r.jina.ai/")
1081
+ enabled_scrapers.append(
1082
+ WebScraper(
1083
+ type=WebScraper.WebScraperType.JINA,
1084
+ name=WebScraper.WebScraperType.JINA.capitalize(),
1085
+ api_key=os.getenv("JINA_API_KEY"),
1086
+ api_url=api_url,
1087
+ )
1088
+ )
1089
+
1090
+ # Only enable the direct web page scraper by default in self-hosted single user setups.
1091
+ # Useful for reading webpages on your intranet.
1092
+ if state.anonymous_mode or in_debug_mode():
1093
+ enabled_scrapers.append(
1094
+ WebScraper(
1095
+ type=WebScraper.WebScraperType.DIRECT,
1096
+ name=WebScraper.WebScraperType.DIRECT.capitalize(),
1097
+ api_key=None,
1098
+ api_url=None,
1099
+ )
1100
+ )
1101
+
1102
+ return enabled_scrapers
989
1103
 
990
1104
  @staticmethod
991
1105
  def create_conversation_from_public_conversation(
khoj/database/admin.py CHANGED
@@ -31,6 +31,7 @@ from khoj.database.models import (
31
31
  UserSearchModelConfig,
32
32
  UserVoiceModelConfig,
33
33
  VoiceModelOption,
34
+ WebScraper,
34
35
  )
35
36
  from khoj.utils.helpers import ImageIntentType
36
37
 
@@ -69,10 +70,11 @@ class KhojUserAdmin(UserAdmin):
69
70
  "id",
70
71
  "email",
71
72
  "username",
73
+ "phone_number",
72
74
  "is_active",
75
+ "uuid",
73
76
  "is_staff",
74
77
  "is_superuser",
75
- "phone_number",
76
78
  )
77
79
  search_fields = ("email", "username", "phone_number", "uuid")
78
80
  filter_horizontal = ("groups", "user_permissions")
@@ -197,7 +199,22 @@ class ServerChatSettingsAdmin(admin.ModelAdmin):
197
199
  list_display = (
198
200
  "chat_default",
199
201
  "chat_advanced",
202
+ "web_scraper",
203
+ )
204
+
205
+
206
+ @admin.register(WebScraper)
207
+ class WebScraperAdmin(admin.ModelAdmin):
208
+ list_display = (
209
+ "priority",
210
+ "name",
211
+ "type",
212
+ "api_key",
213
+ "api_url",
214
+ "created_at",
200
215
  )
216
+ search_fields = ("name", "api_key", "api_url", "type")
217
+ ordering = ("priority",)
201
218
 
202
219
 
203
220
  @admin.register(Conversation)
@@ -0,0 +1,24 @@
1
+ # Generated by Django 5.0.8 on 2024-10-17 18:13
2
+
3
+ import django.contrib.postgres.fields
4
+ from django.db import migrations, models
5
+
6
+
7
+ class Migration(migrations.Migration):
8
+ dependencies = [
9
+ ("database", "0067_alter_agent_style_icon"),
10
+ ]
11
+
12
+ operations = [
13
+ migrations.AlterField(
14
+ model_name="agent",
15
+ name="output_modes",
16
+ field=django.contrib.postgres.fields.ArrayField(
17
+ base_field=models.CharField(
18
+ choices=[("text", "Text"), ("image", "Image"), ("automation", "Automation")], max_length=200
19
+ ),
20
+ default=list,
21
+ size=None,
22
+ ),
23
+ ),
24
+ ]
@@ -0,0 +1,89 @@
1
+ # Generated by Django 5.0.8 on 2024-10-18 00:41
2
+
3
+ import django.db.models.deletion
4
+ from django.db import migrations, models
5
+
6
+
7
+ class Migration(migrations.Migration):
8
+ dependencies = [
9
+ ("database", "0068_alter_agent_output_modes"),
10
+ ]
11
+
12
+ operations = [
13
+ migrations.CreateModel(
14
+ name="WebScraper",
15
+ fields=[
16
+ ("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
17
+ ("created_at", models.DateTimeField(auto_now_add=True)),
18
+ ("updated_at", models.DateTimeField(auto_now=True)),
19
+ (
20
+ "name",
21
+ models.CharField(
22
+ blank=True,
23
+ default=None,
24
+ help_text="Friendly name. If not set, it will be set to the type of the scraper.",
25
+ max_length=200,
26
+ null=True,
27
+ unique=True,
28
+ ),
29
+ ),
30
+ (
31
+ "type",
32
+ models.CharField(
33
+ choices=[
34
+ ("Firecrawl", "Firecrawl"),
35
+ ("Olostep", "Olostep"),
36
+ ("Jina", "Jina"),
37
+ ("Direct", "Direct"),
38
+ ],
39
+ default="Jina",
40
+ max_length=20,
41
+ ),
42
+ ),
43
+ (
44
+ "api_key",
45
+ models.CharField(
46
+ blank=True,
47
+ default=None,
48
+ help_text="API key of the web scraper. Only set if scraper service requires an API key. Default is set from env var.",
49
+ max_length=200,
50
+ null=True,
51
+ ),
52
+ ),
53
+ (
54
+ "api_url",
55
+ models.URLField(
56
+ blank=True,
57
+ default=None,
58
+ help_text="API URL of the web scraper. Only set if scraper service on non-default URL.",
59
+ null=True,
60
+ ),
61
+ ),
62
+ (
63
+ "priority",
64
+ models.IntegerField(
65
+ blank=True,
66
+ default=None,
67
+ help_text="Priority of the web scraper. Lower numbers run first.",
68
+ null=True,
69
+ unique=True,
70
+ ),
71
+ ),
72
+ ],
73
+ options={
74
+ "abstract": False,
75
+ },
76
+ ),
77
+ migrations.AddField(
78
+ model_name="serverchatsettings",
79
+ name="web_scraper",
80
+ field=models.ForeignKey(
81
+ blank=True,
82
+ default=None,
83
+ null=True,
84
+ on_delete=django.db.models.deletion.CASCADE,
85
+ related_name="web_scraper",
86
+ to="database.webscraper",
87
+ ),
88
+ ),
89
+ ]