khoj 1.24.2.dev3__py3-none-any.whl → 1.25.1.dev34__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. khoj/configure.py +13 -4
  2. khoj/database/adapters/__init__.py +289 -52
  3. khoj/database/admin.py +20 -1
  4. khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
  5. khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
  6. khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
  7. khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
  8. khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
  9. khoj/database/models/__init__.py +136 -18
  10. khoj/interface/compiled/404/index.html +1 -1
  11. khoj/interface/compiled/_next/static/chunks/1603-fa3ee48860b9dc5c.js +1 -0
  12. khoj/interface/compiled/_next/static/chunks/2697-a38d01981ad3bdf8.js +1 -0
  13. khoj/interface/compiled/_next/static/chunks/3110-ef2cacd1b8d79ad8.js +1 -0
  14. khoj/interface/compiled/_next/static/chunks/4086-2c74808ba38a5a0f.js +1 -0
  15. khoj/interface/compiled/_next/static/chunks/477-ec86e93db10571c1.js +1 -0
  16. khoj/interface/compiled/_next/static/chunks/51-e8f5bdb69b5ea421.js +1 -0
  17. khoj/interface/compiled/_next/static/chunks/7762-79f2205740622b5c.js +1 -0
  18. khoj/interface/compiled/_next/static/chunks/9178-899fe9a6b754ecfe.js +1 -0
  19. khoj/interface/compiled/_next/static/chunks/9417-29502e39c3e7d60c.js +1 -0
  20. khoj/interface/compiled/_next/static/chunks/9479-7eed36fc954ef804.js +1 -0
  21. khoj/interface/compiled/_next/static/chunks/app/agents/{layout-e71c8e913cccf792.js → layout-75636ab3a413fa8e.js} +1 -1
  22. khoj/interface/compiled/_next/static/chunks/app/agents/page-fa282831808ee536.js +1 -0
  23. khoj/interface/compiled/_next/static/chunks/app/automations/page-5480731341f34450.js +1 -0
  24. khoj/interface/compiled/_next/static/chunks/app/chat/{layout-8102549127db3067.js → layout-96fcf62857bf8f30.js} +1 -1
  25. khoj/interface/compiled/_next/static/chunks/app/chat/page-702057ccbcf27881.js +1 -0
  26. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-e7b34316ec6f44de.js +1 -0
  27. khoj/interface/compiled/_next/static/chunks/app/{layout-f3e40d346da53112.js → layout-d0f0a9067427fb20.js} +1 -1
  28. khoj/interface/compiled/_next/static/chunks/app/page-10a5aad6e04f3cf8.js +1 -0
  29. khoj/interface/compiled/_next/static/chunks/app/search/page-d56541c746fded7d.js +1 -0
  30. khoj/interface/compiled/_next/static/chunks/app/settings/{layout-6f9314b0d7a26046.js → layout-a8f33dfe92f997fb.js} +1 -1
  31. khoj/interface/compiled/_next/static/chunks/app/settings/page-e044a999468a7c5d.js +1 -0
  32. khoj/interface/compiled/_next/static/chunks/app/share/chat/{layout-39f03f9e32399f0f.js → layout-2df56074e42adaa0.js} +1 -1
  33. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-fbbd66a4d4633438.js +1 -0
  34. khoj/interface/compiled/_next/static/chunks/{webpack-d4781cada9b58e75.js → webpack-c0cd5a6afb1f0798.js} +1 -1
  35. khoj/interface/compiled/_next/static/css/2de69f0be774c768.css +1 -0
  36. khoj/interface/compiled/_next/static/css/467a524c75e7d7c0.css +1 -0
  37. khoj/interface/compiled/_next/static/css/592ca99f5122e75a.css +1 -0
  38. khoj/interface/compiled/_next/static/css/b9a6bf04305d98d7.css +25 -0
  39. khoj/interface/compiled/agents/index.html +1 -1
  40. khoj/interface/compiled/agents/index.txt +2 -2
  41. khoj/interface/compiled/automations/index.html +1 -1
  42. khoj/interface/compiled/automations/index.txt +2 -2
  43. khoj/interface/compiled/chat/index.html +1 -1
  44. khoj/interface/compiled/chat/index.txt +2 -2
  45. khoj/interface/compiled/factchecker/index.html +1 -1
  46. khoj/interface/compiled/factchecker/index.txt +2 -2
  47. khoj/interface/compiled/index.html +1 -1
  48. khoj/interface/compiled/index.txt +2 -2
  49. khoj/interface/compiled/search/index.html +1 -1
  50. khoj/interface/compiled/search/index.txt +2 -2
  51. khoj/interface/compiled/settings/index.html +1 -1
  52. khoj/interface/compiled/settings/index.txt +3 -3
  53. khoj/interface/compiled/share/chat/index.html +1 -1
  54. khoj/interface/compiled/share/chat/index.txt +2 -2
  55. khoj/interface/web/assets/icons/agents.svg +1 -0
  56. khoj/interface/web/assets/icons/automation.svg +1 -0
  57. khoj/interface/web/assets/icons/chat.svg +24 -0
  58. khoj/interface/web/login.html +11 -22
  59. khoj/processor/content/notion/notion_to_entries.py +2 -1
  60. khoj/processor/conversation/anthropic/anthropic_chat.py +2 -0
  61. khoj/processor/conversation/google/gemini_chat.py +6 -19
  62. khoj/processor/conversation/google/utils.py +33 -15
  63. khoj/processor/conversation/offline/chat_model.py +3 -1
  64. khoj/processor/conversation/openai/gpt.py +2 -0
  65. khoj/processor/conversation/prompts.py +67 -5
  66. khoj/processor/conversation/utils.py +3 -7
  67. khoj/processor/embeddings.py +6 -3
  68. khoj/processor/image/generate.py +4 -3
  69. khoj/processor/tools/online_search.py +139 -44
  70. khoj/routers/api.py +35 -6
  71. khoj/routers/api_agents.py +235 -4
  72. khoj/routers/api_chat.py +102 -530
  73. khoj/routers/api_content.py +14 -0
  74. khoj/routers/api_model.py +1 -1
  75. khoj/routers/auth.py +9 -1
  76. khoj/routers/helpers.py +181 -68
  77. khoj/routers/subscription.py +18 -4
  78. khoj/search_type/text_search.py +11 -3
  79. khoj/utils/helpers.py +64 -8
  80. khoj/utils/initialization.py +0 -3
  81. {khoj-1.24.2.dev3.dist-info → khoj-1.25.1.dev34.dist-info}/METADATA +19 -21
  82. {khoj-1.24.2.dev3.dist-info → khoj-1.25.1.dev34.dist-info}/RECORD +87 -81
  83. khoj/interface/compiled/_next/static/chunks/1603-3e2e1528e3b6ea1d.js +0 -1
  84. khoj/interface/compiled/_next/static/chunks/2697-a29cb9191a9e339c.js +0 -1
  85. khoj/interface/compiled/_next/static/chunks/6648-ee109f4ea33a74e2.js +0 -1
  86. khoj/interface/compiled/_next/static/chunks/7071-b4711cecca6619a8.js +0 -1
  87. khoj/interface/compiled/_next/static/chunks/743-1a64254447cda71f.js +0 -1
  88. khoj/interface/compiled/_next/static/chunks/8423-62ac6c832be2461b.js +0 -1
  89. khoj/interface/compiled/_next/static/chunks/9162-0be016519a18568b.js +0 -1
  90. khoj/interface/compiled/_next/static/chunks/9178-7e815211edcb3657.js +0 -1
  91. khoj/interface/compiled/_next/static/chunks/9417-5d14ac74aaab2c66.js +0 -1
  92. khoj/interface/compiled/_next/static/chunks/9984-e410179c6fac7cf1.js +0 -1
  93. khoj/interface/compiled/_next/static/chunks/app/agents/page-d302911777a3e027.js +0 -1
  94. khoj/interface/compiled/_next/static/chunks/app/automations/page-0a5de8c254c29a1c.js +0 -1
  95. khoj/interface/compiled/_next/static/chunks/app/chat/page-d96bf6a84bb05290.js +0 -1
  96. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-32e61af29e6b431d.js +0 -1
  97. khoj/interface/compiled/_next/static/chunks/app/page-96cab08c985716f4.js +0 -1
  98. khoj/interface/compiled/_next/static/chunks/app/search/page-b3193d46c65571c5.js +0 -1
  99. khoj/interface/compiled/_next/static/chunks/app/settings/page-0db9b708366606ec.js +0 -1
  100. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-f06ac16cfe5b5a16.js +0 -1
  101. khoj/interface/compiled/_next/static/css/1538cedb321e3a97.css +0 -1
  102. khoj/interface/compiled/_next/static/css/24f141a6e37cd204.css +0 -25
  103. khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +0 -1
  104. khoj/interface/compiled/_next/static/css/f768dddada62459d.css +0 -1
  105. /khoj/interface/compiled/_next/static/{_29ceahp81LhuIHo5QgOD → Jid9q6Qg851ioDaaO_fth}/_buildManifest.js +0 -0
  106. /khoj/interface/compiled/_next/static/{_29ceahp81LhuIHo5QgOD → Jid9q6Qg851ioDaaO_fth}/_ssgManifest.js +0 -0
  107. {khoj-1.24.2.dev3.dist-info → khoj-1.25.1.dev34.dist-info}/WHEEL +0 -0
  108. {khoj-1.24.2.dev3.dist-info → khoj-1.25.1.dev34.dist-info}/entry_points.txt +0 -0
  109. {khoj-1.24.2.dev3.dist-info → khoj-1.25.1.dev34.dist-info}/licenses/LICENSE +0 -0
khoj/routers/api_chat.py CHANGED
@@ -3,7 +3,6 @@ import base64
3
3
  import json
4
4
  import logging
5
5
  import time
6
- import warnings
7
6
  from datetime import datetime
8
7
  from functools import partial
9
8
  from typing import Dict, Optional
@@ -17,13 +16,14 @@ from starlette.authentication import has_required_scope, requires
17
16
 
18
17
  from khoj.app.settings import ALLOWED_HOSTS
19
18
  from khoj.database.adapters import (
19
+ AgentAdapters,
20
20
  ConversationAdapters,
21
21
  EntryAdapters,
22
22
  FileObjectAdapters,
23
23
  PublicConversationAdapters,
24
24
  aget_user_name,
25
25
  )
26
- from khoj.database.models import KhojUser
26
+ from khoj.database.models import Agent, KhojUser
27
27
  from khoj.processor.conversation.prompts import help_message, no_entries_found
28
28
  from khoj.processor.conversation.utils import save_to_conversation_log
29
29
  from khoj.processor.image.generate import text_to_image
@@ -65,7 +65,7 @@ from khoj.utils.rawconfig import FileFilterRequest, FilesFilterRequest, Location
65
65
  # Initialize Router
66
66
  logger = logging.getLogger(__name__)
67
67
  conversation_command_rate_limiter = ConversationCommandRateLimiter(
68
- trial_rate_limit=100, subscribed_rate_limit=100, slug="command"
68
+ trial_rate_limit=100, subscribed_rate_limit=6000, slug="command"
69
69
  )
70
70
 
71
71
 
@@ -193,7 +193,7 @@ def chat_history(
193
193
  n: Optional[int] = None,
194
194
  ):
195
195
  user = request.user.object
196
- validate_conversation_config()
196
+ validate_conversation_config(user)
197
197
 
198
198
  # Load Conversation History
199
199
  conversation = ConversationAdapters.get_conversation_by_user(
@@ -208,15 +208,17 @@ def chat_history(
208
208
 
209
209
  agent_metadata = None
210
210
  if conversation.agent:
211
- agent_metadata = {
212
- "slug": conversation.agent.slug,
213
- "name": conversation.agent.name,
214
- "avatar": conversation.agent.avatar,
215
- "isCreator": conversation.agent.creator == user,
216
- "color": conversation.agent.style_color,
217
- "icon": conversation.agent.style_icon,
218
- "persona": conversation.agent.personality,
219
- }
211
+ if conversation.agent.privacy_level == Agent.PrivacyLevel.PRIVATE and conversation.agent.creator != user:
212
+ conversation.agent = None
213
+ else:
214
+ agent_metadata = {
215
+ "slug": conversation.agent.slug,
216
+ "name": conversation.agent.name,
217
+ "isCreator": conversation.agent.creator == user,
218
+ "color": conversation.agent.style_color,
219
+ "icon": conversation.agent.style_icon,
220
+ "persona": conversation.agent.personality,
221
+ }
220
222
 
221
223
  meta_log = conversation.conversation_log
222
224
  meta_log.update(
@@ -265,15 +267,17 @@ def get_shared_chat(
265
267
 
266
268
  agent_metadata = None
267
269
  if conversation.agent:
268
- agent_metadata = {
269
- "slug": conversation.agent.slug,
270
- "name": conversation.agent.name,
271
- "avatar": conversation.agent.avatar,
272
- "isCreator": conversation.agent.creator == user,
273
- "color": conversation.agent.style_color,
274
- "icon": conversation.agent.style_icon,
275
- "persona": conversation.agent.personality,
276
- }
270
+ if conversation.agent.privacy_level == Agent.PrivacyLevel.PRIVATE:
271
+ conversation.agent = None
272
+ else:
273
+ agent_metadata = {
274
+ "slug": conversation.agent.slug,
275
+ "name": conversation.agent.name,
276
+ "isCreator": conversation.agent.creator == user,
277
+ "color": conversation.agent.style_color,
278
+ "icon": conversation.agent.style_icon,
279
+ "persona": conversation.agent.personality,
280
+ }
277
281
 
278
282
  meta_log = conversation.conversation_log
279
283
  scrubbed_title = conversation.title if conversation.title else conversation.slug
@@ -300,7 +304,7 @@ def get_shared_chat(
300
304
  update_telemetry_state(
301
305
  request=request,
302
306
  telemetry_type="api",
303
- api="chat_history",
307
+ api="get_shared_chat_history",
304
308
  **common.__dict__,
305
309
  )
306
310
 
@@ -418,7 +422,7 @@ def chat_sessions(
418
422
  conversations = conversations[:8]
419
423
 
420
424
  sessions = conversations.values_list(
421
- "id", "slug", "title", "agent__slug", "agent__name", "agent__avatar", "created_at", "updated_at"
425
+ "id", "slug", "title", "agent__slug", "agent__name", "created_at", "updated_at"
422
426
  )
423
427
 
424
428
  session_values = [
@@ -426,9 +430,8 @@ def chat_sessions(
426
430
  "conversation_id": str(session[0]),
427
431
  "slug": session[2] or session[1],
428
432
  "agent_name": session[4],
429
- "agent_avatar": session[5],
430
- "created": session[6].strftime("%Y-%m-%d %H:%M:%S"),
431
- "updated": session[7].strftime("%Y-%m-%d %H:%M:%S"),
433
+ "created": session[5].strftime("%Y-%m-%d %H:%M:%S"),
434
+ "updated": session[6].strftime("%Y-%m-%d %H:%M:%S"),
432
435
  }
433
436
  for session in sessions
434
437
  ]
@@ -570,7 +573,6 @@ async def chat(
570
573
  chat_metadata: dict = {}
571
574
  connection_alive = True
572
575
  user: KhojUser = request.user.object
573
- subscribed: bool = has_required_scope(request, ["premium"])
574
576
  event_delimiter = "␃🔚␗"
575
577
  q = unquote(q)
576
578
  nonlocal conversation_id
@@ -590,7 +592,7 @@ async def chat(
590
592
  nonlocal connection_alive, ttft
591
593
  if not connection_alive or await request.is_disconnected():
592
594
  connection_alive = False
593
- logger.warn(f"User {user} disconnected from {common.client} client")
595
+ logger.warning(f"User {user} disconnected from {common.client} client")
594
596
  return
595
597
  try:
596
598
  if event_type == ChatEvent.END_LLM_RESPONSE:
@@ -637,7 +639,7 @@ async def chat(
637
639
  request=request,
638
640
  telemetry_type="api",
639
641
  api="chat",
640
- client=request.user.client_app,
642
+ client=common.client,
641
643
  user_agent=request.headers.get("user-agent"),
642
644
  host=request.headers.get("host"),
643
645
  metadata=chat_metadata,
@@ -658,6 +660,16 @@ async def chat(
658
660
  return
659
661
  conversation_id = conversation.id
660
662
 
663
+ agent: Agent | None = None
664
+ default_agent = await AgentAdapters.aget_default_agent()
665
+ if conversation.agent and conversation.agent != default_agent:
666
+ agent = conversation.agent
667
+
668
+ if not conversation.agent:
669
+ conversation.agent = default_agent
670
+ await conversation.asave()
671
+ agent = default_agent
672
+
661
673
  await is_ready_to_chat(user)
662
674
 
663
675
  user_name = await aget_user_name(user)
@@ -677,7 +689,12 @@ async def chat(
677
689
 
678
690
  if conversation_commands == [ConversationCommand.Default] or is_automated_task:
679
691
  conversation_commands = await aget_relevant_information_sources(
680
- q, meta_log, is_automated_task, subscribed=subscribed, uploaded_image_url=uploaded_image_url
692
+ q,
693
+ meta_log,
694
+ is_automated_task,
695
+ user=user,
696
+ uploaded_image_url=uploaded_image_url,
697
+ agent=agent,
681
698
  )
682
699
  conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands])
683
700
  async for result in send_event(
@@ -685,7 +702,7 @@ async def chat(
685
702
  ):
686
703
  yield result
687
704
 
688
- mode = await aget_relevant_output_modes(q, meta_log, is_automated_task, uploaded_image_url)
705
+ mode = await aget_relevant_output_modes(q, meta_log, is_automated_task, user, uploaded_image_url, agent)
689
706
  async for result in send_event(ChatEvent.STATUS, f"**Decided Response Mode:** {mode.value}"):
690
707
  yield result
691
708
  if mode not in conversation_commands:
@@ -709,19 +726,30 @@ async def chat(
709
726
  conversation_commands.remove(ConversationCommand.Summarize)
710
727
  elif ConversationCommand.Summarize in conversation_commands:
711
728
  response_log = ""
712
- if len(file_filters) == 0:
729
+ agent_has_entries = await EntryAdapters.aagent_has_entries(agent)
730
+ if len(file_filters) == 0 and not agent_has_entries:
713
731
  response_log = "No files selected for summarization. Please add files using the section on the left."
714
732
  async for result in send_llm_response(response_log):
715
733
  yield result
716
- elif len(file_filters) > 1:
734
+ elif len(file_filters) > 1 and not agent_has_entries:
717
735
  response_log = "Only one file can be selected for summarization."
718
736
  async for result in send_llm_response(response_log):
719
737
  yield result
720
738
  else:
721
739
  try:
722
- file_object = await FileObjectAdapters.async_get_file_objects_by_name(user, file_filters[0])
740
+ file_object = None
741
+ if await EntryAdapters.aagent_has_entries(agent):
742
+ file_names = await EntryAdapters.aget_agent_entry_filepaths(agent)
743
+ if len(file_names) > 0:
744
+ file_object = await FileObjectAdapters.async_get_file_objects_by_name(
745
+ None, file_names[0], agent
746
+ )
747
+
748
+ if len(file_filters) > 0:
749
+ file_object = await FileObjectAdapters.async_get_file_objects_by_name(user, file_filters[0])
750
+
723
751
  if len(file_object) == 0:
724
- response_log = "Sorry, we couldn't find the full text of this file. Please re-upload the document and try again."
752
+ response_log = "Sorry, I couldn't find the full text of this file. Please re-upload the document and try again."
725
753
  async for result in send_llm_response(response_log):
726
754
  yield result
727
755
  return
@@ -734,13 +762,18 @@ async def chat(
734
762
  yield result
735
763
 
736
764
  response = await extract_relevant_summary(
737
- q, contextual_data, subscribed=subscribed, uploaded_image_url=uploaded_image_url
765
+ q,
766
+ contextual_data,
767
+ conversation_history=meta_log,
768
+ uploaded_image_url=uploaded_image_url,
769
+ user=user,
770
+ agent=agent,
738
771
  )
739
772
  response_log = str(response)
740
773
  async for result in send_llm_response(response_log):
741
774
  yield result
742
775
  except Exception as e:
743
- response_log = "Error summarizing file."
776
+ response_log = "Error summarizing file. Please try again, or contact support."
744
777
  logger.error(f"Error summarizing file for {user.email}: {e}", exc_info=True)
745
778
  async for result in send_llm_response(response_log):
746
779
  yield result
@@ -805,500 +838,34 @@ async def chat(
805
838
  # Gather Context
806
839
  ## Extract Document References
807
840
  compiled_references, inferred_queries, defiltered_query = [], [], None
808
- async for result in extract_references_and_questions(
809
- request,
810
- meta_log,
811
- q,
812
- (n or 7),
813
- d,
814
- conversation_id,
815
- conversation_commands,
816
- location,
817
- partial(send_event, ChatEvent.STATUS),
818
- uploaded_image_url=uploaded_image_url,
819
- ):
820
- if isinstance(result, dict) and ChatEvent.STATUS in result:
821
- yield result[ChatEvent.STATUS]
822
- else:
823
- compiled_references.extend(result[0])
824
- inferred_queries.extend(result[1])
825
- defiltered_query = result[2]
826
-
827
- if not is_none_or_empty(compiled_references):
828
- headings = "\n- " + "\n- ".join(set([c.get("compiled", c).split("\n")[0] for c in compiled_references]))
829
- # Strip only leading # from headings
830
- headings = headings.replace("#", "")
831
- async for result in send_event(ChatEvent.STATUS, f"**Found Relevant Notes**: {headings}"):
832
- yield result
833
-
834
- online_results: Dict = dict()
835
-
836
- if conversation_commands == [ConversationCommand.Notes] and not await EntryAdapters.auser_has_entries(user):
837
- async for result in send_llm_response(f"{no_entries_found.format()}"):
838
- yield result
839
- return
840
-
841
- if ConversationCommand.Notes in conversation_commands and is_none_or_empty(compiled_references):
842
- conversation_commands.remove(ConversationCommand.Notes)
843
-
844
- ## Gather Online References
845
- if ConversationCommand.Online in conversation_commands:
846
- try:
847
- async for result in search_online(
848
- defiltered_query,
849
- meta_log,
850
- location,
851
- user,
852
- subscribed,
853
- partial(send_event, ChatEvent.STATUS),
854
- custom_filters,
855
- uploaded_image_url=uploaded_image_url,
856
- ):
857
- if isinstance(result, dict) and ChatEvent.STATUS in result:
858
- yield result[ChatEvent.STATUS]
859
- else:
860
- online_results = result
861
- except ValueError as e:
862
- error_message = f"Error searching online: {e}. Attempting to respond without online results"
863
- logger.warning(error_message)
864
- async for result in send_llm_response(error_message):
865
- yield result
866
- return
867
-
868
- ## Gather Webpage References
869
- if ConversationCommand.Webpage in conversation_commands:
870
- try:
871
- async for result in read_webpages(
872
- defiltered_query,
873
- meta_log,
874
- location,
875
- user,
876
- subscribed,
877
- partial(send_event, ChatEvent.STATUS),
878
- uploaded_image_url=uploaded_image_url,
879
- ):
880
- if isinstance(result, dict) and ChatEvent.STATUS in result:
881
- yield result[ChatEvent.STATUS]
882
- else:
883
- direct_web_pages = result
884
- webpages = []
885
- for query in direct_web_pages:
886
- if online_results.get(query):
887
- online_results[query]["webpages"] = direct_web_pages[query]["webpages"]
888
- else:
889
- online_results[query] = {"webpages": direct_web_pages[query]["webpages"]}
890
-
891
- for webpage in direct_web_pages[query]["webpages"]:
892
- webpages.append(webpage["link"])
893
- async for result in send_event(ChatEvent.STATUS, f"**Read web pages**: {webpages}"):
894
- yield result
895
- except ValueError as e:
896
- logger.warning(
897
- f"Error directly reading webpages: {e}. Attempting to respond without online results",
898
- exc_info=True,
899
- )
900
-
901
- ## Send Gathered References
902
- async for result in send_event(
903
- ChatEvent.REFERENCES,
904
- {
905
- "inferredQueries": inferred_queries,
906
- "context": compiled_references,
907
- "onlineContext": online_results,
908
- },
909
- ):
910
- yield result
911
-
912
- # Generate Output
913
- ## Generate Image Output
914
- if ConversationCommand.Image in conversation_commands:
915
- async for result in text_to_image(
916
- q,
917
- user,
841
+ try:
842
+ async for result in extract_references_and_questions(
843
+ request,
918
844
  meta_log,
919
- location_data=location,
920
- references=compiled_references,
921
- online_results=online_results,
922
- subscribed=subscribed,
923
- send_status_func=partial(send_event, ChatEvent.STATUS),
845
+ q,
846
+ (n or 7),
847
+ d,
848
+ conversation_id,
849
+ conversation_commands,
850
+ location,
851
+ partial(send_event, ChatEvent.STATUS),
924
852
  uploaded_image_url=uploaded_image_url,
853
+ agent=agent,
925
854
  ):
926
855
  if isinstance(result, dict) and ChatEvent.STATUS in result:
927
856
  yield result[ChatEvent.STATUS]
928
857
  else:
929
- image, status_code, improved_image_prompt, intent_type = result
930
-
931
- if image is None or status_code != 200:
932
- content_obj = {
933
- "content-type": "application/json",
934
- "intentType": intent_type,
935
- "detail": improved_image_prompt,
936
- "image": image,
937
- }
938
- async for result in send_llm_response(json.dumps(content_obj)):
939
- yield result
940
- return
941
-
942
- await sync_to_async(save_to_conversation_log)(
943
- q,
944
- image,
945
- user,
946
- meta_log,
947
- user_message_time,
948
- intent_type=intent_type,
949
- inferred_queries=[improved_image_prompt],
950
- client_application=request.user.client_app,
951
- conversation_id=conversation_id,
952
- compiled_references=compiled_references,
953
- online_results=online_results,
954
- uploaded_image_url=uploaded_image_url,
955
- )
956
- content_obj = {
957
- "intentType": intent_type,
958
- "inferredQueries": [improved_image_prompt],
959
- "image": image,
960
- }
961
- async for result in send_llm_response(json.dumps(content_obj)):
962
- yield result
963
- return
964
-
965
- ## Generate Text Output
966
- async for result in send_event(ChatEvent.STATUS, f"**Generating a well-informed response**"):
967
- yield result
968
- llm_response, chat_metadata = await agenerate_chat_response(
969
- defiltered_query,
970
- meta_log,
971
- conversation,
972
- compiled_references,
973
- online_results,
974
- inferred_queries,
975
- conversation_commands,
976
- user,
977
- request.user.client_app,
978
- conversation_id,
979
- location,
980
- user_name,
981
- uploaded_image_url,
982
- )
983
-
984
- # Send Response
985
- async for result in send_event(ChatEvent.START_LLM_RESPONSE, ""):
986
- yield result
987
-
988
- continue_stream = True
989
- iterator = AsyncIteratorWrapper(llm_response)
990
- async for item in iterator:
991
- if item is None:
992
- async for result in send_event(ChatEvent.END_LLM_RESPONSE, ""):
993
- yield result
994
- logger.debug("Finished streaming response")
995
- return
996
- if not connection_alive or not continue_stream:
997
- continue
998
- try:
999
- async for result in send_event(ChatEvent.MESSAGE, f"{item}"):
1000
- yield result
1001
- except Exception as e:
1002
- continue_stream = False
1003
- logger.info(f"User {user} disconnected. Emitting rest of responses to clear thread: {e}")
1004
-
1005
- ## Stream Text Response
1006
- if stream:
1007
- return StreamingResponse(event_generator(q, image=image), media_type="text/plain")
1008
- ## Non-Streaming Text Response
1009
- else:
1010
- response_iterator = event_generator(q, image=image)
1011
- response_data = await read_chat_stream(response_iterator)
1012
- return Response(content=json.dumps(response_data), media_type="application/json", status_code=200)
1013
-
1014
-
1015
- # Deprecated API. Remove by end of September 2024
1016
- @api_chat.get("")
1017
- @requires(["authenticated"])
1018
- async def get_chat(
1019
- request: Request,
1020
- common: CommonQueryParams,
1021
- q: str,
1022
- n: int = 7,
1023
- d: float = None,
1024
- stream: Optional[bool] = False,
1025
- title: Optional[str] = None,
1026
- conversation_id: Optional[str] = None,
1027
- city: Optional[str] = None,
1028
- region: Optional[str] = None,
1029
- country: Optional[str] = None,
1030
- timezone: Optional[str] = None,
1031
- image: Optional[str] = None,
1032
- rate_limiter_per_minute=Depends(
1033
- ApiUserRateLimiter(requests=60, subscribed_requests=60, window=60, slug="chat_minute")
1034
- ),
1035
- rate_limiter_per_day=Depends(
1036
- ApiUserRateLimiter(requests=600, subscribed_requests=600, window=60 * 60 * 24, slug="chat_day")
1037
- ),
1038
- ):
1039
- # Issue a deprecation warning
1040
- warnings.warn(
1041
- "The 'get_chat' API endpoint is deprecated. It will be removed by the end of September 2024.",
1042
- DeprecationWarning,
1043
- stacklevel=2,
1044
- )
1045
-
1046
- async def event_generator(q: str, image: str):
1047
- start_time = time.perf_counter()
1048
- ttft = None
1049
- chat_metadata: dict = {}
1050
- connection_alive = True
1051
- user: KhojUser = request.user.object
1052
- subscribed: bool = has_required_scope(request, ["premium"])
1053
- event_delimiter = "␃🔚␗"
1054
- q = unquote(q)
1055
- nonlocal conversation_id
1056
-
1057
- uploaded_image_url = None
1058
- if image:
1059
- decoded_string = unquote(image)
1060
- base64_data = decoded_string.split(",", 1)[1]
1061
- image_bytes = base64.b64decode(base64_data)
1062
- webp_image_bytes = convert_image_to_webp(image_bytes)
1063
- try:
1064
- uploaded_image_url = upload_image_to_bucket(webp_image_bytes, request.user.object.id)
1065
- except:
1066
- uploaded_image_url = None
1067
-
1068
- async def send_event(event_type: ChatEvent, data: str | dict):
1069
- nonlocal connection_alive, ttft
1070
- if not connection_alive or await request.is_disconnected():
1071
- connection_alive = False
1072
- logger.warn(f"User {user} disconnected from {common.client} client")
1073
- return
1074
- try:
1075
- if event_type == ChatEvent.END_LLM_RESPONSE:
1076
- collect_telemetry()
1077
- if event_type == ChatEvent.START_LLM_RESPONSE:
1078
- ttft = time.perf_counter() - start_time
1079
- if event_type == ChatEvent.MESSAGE:
1080
- yield data
1081
- elif event_type == ChatEvent.REFERENCES or stream:
1082
- yield json.dumps({"type": event_type.value, "data": data}, ensure_ascii=False)
1083
- except asyncio.CancelledError as e:
1084
- connection_alive = False
1085
- logger.warn(f"User {user} disconnected from {common.client} client: {e}")
1086
- return
1087
- except Exception as e:
1088
- connection_alive = False
1089
- logger.error(f"Failed to stream chat API response to {user} on {common.client}: {e}", exc_info=True)
1090
- return
1091
- finally:
1092
- yield event_delimiter
1093
-
1094
- async def send_llm_response(response: str):
1095
- async for result in send_event(ChatEvent.START_LLM_RESPONSE, ""):
1096
- yield result
1097
- async for result in send_event(ChatEvent.MESSAGE, response):
1098
- yield result
1099
- async for result in send_event(ChatEvent.END_LLM_RESPONSE, ""):
1100
- yield result
1101
-
1102
- def collect_telemetry():
1103
- # Gather chat response telemetry
1104
- nonlocal chat_metadata
1105
- latency = time.perf_counter() - start_time
1106
- cmd_set = set([cmd.value for cmd in conversation_commands])
1107
- chat_metadata = chat_metadata or {}
1108
- chat_metadata["conversation_command"] = cmd_set
1109
- chat_metadata["agent"] = conversation.agent.slug if conversation.agent else None
1110
- chat_metadata["latency"] = f"{latency:.3f}"
1111
- chat_metadata["ttft_latency"] = f"{ttft:.3f}"
1112
-
1113
- logger.info(f"Chat response time to first token: {ttft:.3f} seconds")
1114
- logger.info(f"Chat response total time: {latency:.3f} seconds")
1115
- update_telemetry_state(
1116
- request=request,
1117
- telemetry_type="api",
1118
- api="chat",
1119
- client=request.user.client_app,
1120
- user_agent=request.headers.get("user-agent"),
1121
- host=request.headers.get("host"),
1122
- metadata=chat_metadata,
1123
- )
1124
-
1125
- conversation_commands = [get_conversation_command(query=q, any_references=True)]
1126
-
1127
- conversation = await ConversationAdapters.aget_conversation_by_user(
1128
- user, client_application=request.user.client_app, conversation_id=conversation_id, title=title
1129
- )
1130
- if not conversation:
1131
- async for result in send_llm_response(f"Conversation {conversation_id} not found"):
1132
- yield result
1133
- return
1134
- conversation_id = conversation.id
1135
-
1136
- await is_ready_to_chat(user)
1137
-
1138
- user_name = await aget_user_name(user)
1139
- location = None
1140
- if city or region or country:
1141
- location = LocationData(city=city, region=region, country=country)
1142
-
1143
- if is_query_empty(q):
1144
- async for result in send_llm_response("Please ask your query to get started."):
1145
- yield result
1146
- return
1147
-
1148
- user_message_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1149
-
1150
- meta_log = conversation.conversation_log
1151
- is_automated_task = conversation_commands == [ConversationCommand.AutomatedTask]
1152
-
1153
- if conversation_commands == [ConversationCommand.Default] or is_automated_task:
1154
- conversation_commands = await aget_relevant_information_sources(
1155
- q, meta_log, is_automated_task, subscribed=subscribed, uploaded_image_url=uploaded_image_url
1156
- )
1157
- conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands])
858
+ compiled_references.extend(result[0])
859
+ inferred_queries.extend(result[1])
860
+ defiltered_query = result[2]
861
+ except Exception as e:
862
+ error_message = f"Error searching knowledge base: {e}. Attempting to respond without document references."
863
+ logger.warning(error_message)
1158
864
  async for result in send_event(
1159
- ChatEvent.STATUS, f"**Chose Data Sources to Search:** {conversation_commands_str}"
865
+ ChatEvent.STATUS, "Document search failed. I'll try respond without document references"
1160
866
  ):
1161
867
  yield result
1162
868
 
1163
- mode = await aget_relevant_output_modes(q, meta_log, is_automated_task, uploaded_image_url)
1164
- async for result in send_event(ChatEvent.STATUS, f"**Decided Response Mode:** {mode.value}"):
1165
- yield result
1166
- if mode not in conversation_commands:
1167
- conversation_commands.append(mode)
1168
-
1169
- for cmd in conversation_commands:
1170
- await conversation_command_rate_limiter.update_and_check_if_valid(request, cmd)
1171
- q = q.replace(f"/{cmd.value}", "").strip()
1172
-
1173
- used_slash_summarize = conversation_commands == [ConversationCommand.Summarize]
1174
- file_filters = conversation.file_filters if conversation else []
1175
- # Skip trying to summarize if
1176
- if (
1177
- # summarization intent was inferred
1178
- ConversationCommand.Summarize in conversation_commands
1179
- # and not triggered via slash command
1180
- and not used_slash_summarize
1181
- # but we can't actually summarize
1182
- and len(file_filters) != 1
1183
- ):
1184
- conversation_commands.remove(ConversationCommand.Summarize)
1185
- elif ConversationCommand.Summarize in conversation_commands:
1186
- response_log = ""
1187
- if len(file_filters) == 0:
1188
- response_log = "No files selected for summarization. Please add files using the section on the left."
1189
- async for result in send_llm_response(response_log):
1190
- yield result
1191
- elif len(file_filters) > 1:
1192
- response_log = "Only one file can be selected for summarization."
1193
- async for result in send_llm_response(response_log):
1194
- yield result
1195
- else:
1196
- try:
1197
- file_object = await FileObjectAdapters.async_get_file_objects_by_name(user, file_filters[0])
1198
- if len(file_object) == 0:
1199
- response_log = "Sorry, we couldn't find the full text of this file. Please re-upload the document and try again."
1200
- async for result in send_llm_response(response_log):
1201
- yield result
1202
- return
1203
- contextual_data = " ".join([file.raw_text for file in file_object])
1204
- if not q:
1205
- q = "Create a general summary of the file"
1206
- async for result in send_event(
1207
- ChatEvent.STATUS, f"**Constructing Summary Using:** {file_object[0].file_name}"
1208
- ):
1209
- yield result
1210
-
1211
- response = await extract_relevant_summary(
1212
- q, contextual_data, subscribed=subscribed, uploaded_image_url=uploaded_image_url
1213
- )
1214
- response_log = str(response)
1215
- async for result in send_llm_response(response_log):
1216
- yield result
1217
- except Exception as e:
1218
- response_log = "Error summarizing file."
1219
- logger.error(f"Error summarizing file for {user.email}: {e}", exc_info=True)
1220
- async for result in send_llm_response(response_log):
1221
- yield result
1222
- await sync_to_async(save_to_conversation_log)(
1223
- q,
1224
- response_log,
1225
- user,
1226
- meta_log,
1227
- user_message_time,
1228
- intent_type="summarize",
1229
- client_application=request.user.client_app,
1230
- conversation_id=conversation_id,
1231
- uploaded_image_url=uploaded_image_url,
1232
- )
1233
- return
1234
-
1235
- custom_filters = []
1236
- if conversation_commands == [ConversationCommand.Help]:
1237
- if not q:
1238
- conversation_config = await ConversationAdapters.aget_user_conversation_config(user)
1239
- if conversation_config == None:
1240
- conversation_config = await ConversationAdapters.aget_default_conversation_config()
1241
- model_type = conversation_config.model_type
1242
- formatted_help = help_message.format(model=model_type, version=state.khoj_version, device=get_device())
1243
- async for result in send_llm_response(formatted_help):
1244
- yield result
1245
- return
1246
- # Adding specification to search online specifically on khoj.dev pages.
1247
- custom_filters.append("site:khoj.dev")
1248
- conversation_commands.append(ConversationCommand.Online)
1249
-
1250
- if ConversationCommand.Automation in conversation_commands:
1251
- try:
1252
- automation, crontime, query_to_run, subject = await create_automation(
1253
- q, timezone, user, request.url, meta_log
1254
- )
1255
- except Exception as e:
1256
- logger.error(f"Error scheduling task {q} for {user.email}: {e}")
1257
- error_message = f"Unable to create automation. Ensure the automation doesn't already exist."
1258
- async for result in send_llm_response(error_message):
1259
- yield result
1260
- return
1261
-
1262
- llm_response = construct_automation_created_message(automation, crontime, query_to_run, subject)
1263
- await sync_to_async(save_to_conversation_log)(
1264
- q,
1265
- llm_response,
1266
- user,
1267
- meta_log,
1268
- user_message_time,
1269
- intent_type="automation",
1270
- client_application=request.user.client_app,
1271
- conversation_id=conversation_id,
1272
- inferred_queries=[query_to_run],
1273
- automation_id=automation.id,
1274
- uploaded_image_url=uploaded_image_url,
1275
- )
1276
- async for result in send_llm_response(llm_response):
1277
- yield result
1278
- return
1279
-
1280
- # Gather Context
1281
- ## Extract Document References
1282
- compiled_references, inferred_queries, defiltered_query = [], [], None
1283
- async for result in extract_references_and_questions(
1284
- request,
1285
- meta_log,
1286
- q,
1287
- (n or 7),
1288
- d,
1289
- conversation_id,
1290
- conversation_commands,
1291
- location,
1292
- partial(send_event, ChatEvent.STATUS),
1293
- uploaded_image_url=uploaded_image_url,
1294
- ):
1295
- if isinstance(result, dict) and ChatEvent.STATUS in result:
1296
- yield result[ChatEvent.STATUS]
1297
- else:
1298
- compiled_references.extend(result[0])
1299
- inferred_queries.extend(result[1])
1300
- defiltered_query = result[2]
1301
-
1302
869
  if not is_none_or_empty(compiled_references):
1303
870
  headings = "\n- " + "\n- ".join(set([c.get("compiled", c).split("\n")[0] for c in compiled_references]))
1304
871
  # Strip only leading # from headings
@@ -1324,21 +891,22 @@ async def get_chat(
1324
891
  meta_log,
1325
892
  location,
1326
893
  user,
1327
- subscribed,
1328
894
  partial(send_event, ChatEvent.STATUS),
1329
895
  custom_filters,
1330
896
  uploaded_image_url=uploaded_image_url,
897
+ agent=agent,
1331
898
  ):
1332
899
  if isinstance(result, dict) and ChatEvent.STATUS in result:
1333
900
  yield result[ChatEvent.STATUS]
1334
901
  else:
1335
902
  online_results = result
1336
- except ValueError as e:
903
+ except Exception as e:
1337
904
  error_message = f"Error searching online: {e}. Attempting to respond without online results"
1338
905
  logger.warning(error_message)
1339
- async for result in send_llm_response(error_message):
906
+ async for result in send_event(
907
+ ChatEvent.STATUS, "Online search failed. I'll try respond without online references"
908
+ ):
1340
909
  yield result
1341
- return
1342
910
 
1343
911
  ## Gather Webpage References
1344
912
  if ConversationCommand.Webpage in conversation_commands:
@@ -1348,9 +916,9 @@ async def get_chat(
1348
916
  meta_log,
1349
917
  location,
1350
918
  user,
1351
- subscribed,
1352
919
  partial(send_event, ChatEvent.STATUS),
1353
920
  uploaded_image_url=uploaded_image_url,
921
+ agent=agent,
1354
922
  ):
1355
923
  if isinstance(result, dict) and ChatEvent.STATUS in result:
1356
924
  yield result[ChatEvent.STATUS]
@@ -1367,11 +935,15 @@ async def get_chat(
1367
935
  webpages.append(webpage["link"])
1368
936
  async for result in send_event(ChatEvent.STATUS, f"**Read web pages**: {webpages}"):
1369
937
  yield result
1370
- except ValueError as e:
938
+ except Exception as e:
1371
939
  logger.warning(
1372
- f"Error directly reading webpages: {e}. Attempting to respond without online results",
940
+ f"Error reading webpages: {e}. Attempting to respond without webpage results",
1373
941
  exc_info=True,
1374
942
  )
943
+ async for result in send_event(
944
+ ChatEvent.STATUS, "Webpage read failed. I'll try respond without webpage references"
945
+ ):
946
+ yield result
1375
947
 
1376
948
  ## Send Gathered References
1377
949
  async for result in send_event(
@@ -1394,9 +966,9 @@ async def get_chat(
1394
966
  location_data=location,
1395
967
  references=compiled_references,
1396
968
  online_results=online_results,
1397
- subscribed=subscribed,
1398
969
  send_status_func=partial(send_event, ChatEvent.STATUS),
1399
970
  uploaded_image_url=uploaded_image_url,
971
+ agent=agent,
1400
972
  ):
1401
973
  if isinstance(result, dict) and ChatEvent.STATUS in result:
1402
974
  yield result[ChatEvent.STATUS]