khoj 1.28.3__py3-none-any.whl → 1.28.4.dev92__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. khoj/configure.py +10 -14
  2. khoj/database/adapters/__init__.py +128 -44
  3. khoj/database/admin.py +6 -3
  4. khoj/database/management/commands/change_default_model.py +7 -72
  5. khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
  6. khoj/database/models/__init__.py +4 -6
  7. khoj/interface/compiled/404/index.html +1 -1
  8. khoj/interface/compiled/_next/static/chunks/1603-dc5fd983dbcd070d.js +1 -0
  9. khoj/interface/compiled/_next/static/chunks/1970-c78f6acc8e16e30b.js +1 -0
  10. khoj/interface/compiled/_next/static/chunks/2261-748f7c327df3c8c1.js +1 -0
  11. khoj/interface/compiled/_next/static/chunks/3124-a4cea2eda163128d.js +1 -0
  12. khoj/interface/compiled/_next/static/chunks/3803-d74118a2d0182c52.js +1 -0
  13. khoj/interface/compiled/_next/static/chunks/5538-36aa824a75519c5b.js +1 -0
  14. khoj/interface/compiled/_next/static/chunks/5961-3c104d9736b7902b.js +3 -0
  15. khoj/interface/compiled/_next/static/chunks/8423-ebfa9bb9e2424ca3.js +1 -0
  16. khoj/interface/compiled/_next/static/chunks/9417-32c4db52ca42e681.js +1 -0
  17. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e9838b642913a071.js +1 -0
  18. khoj/interface/compiled/_next/static/chunks/app/agents/page-4353b1a532795ad1.js +1 -0
  19. khoj/interface/compiled/_next/static/chunks/app/automations/{page-d3edae545a1b5393.js → page-c9f13c865e739607.js} +1 -1
  20. khoj/interface/compiled/_next/static/chunks/app/chat/layout-b0e7ff4baa3b5265.js +1 -0
  21. khoj/interface/compiled/_next/static/chunks/app/chat/page-45720e1ed71e3ef5.js +1 -0
  22. khoj/interface/compiled/_next/static/chunks/app/{layout-d0f0a9067427fb20.js → layout-86561d2fac35a91a.js} +1 -1
  23. khoj/interface/compiled/_next/static/chunks/app/{page-ea462e20376b6dce.js → page-ecb8e1c192aa8834.js} +1 -1
  24. khoj/interface/compiled/_next/static/chunks/app/search/layout-ea6b73fdaf9b24ca.js +1 -0
  25. khoj/interface/compiled/_next/static/chunks/app/search/{page-a5c277eff207959e.js → page-8e28deacb61f75aa.js} +1 -1
  26. khoj/interface/compiled/_next/static/chunks/app/settings/{layout-a8f33dfe92f997fb.js → layout-254eaaf916449a60.js} +1 -1
  27. khoj/interface/compiled/_next/static/chunks/app/settings/page-2fab613a557d3cc5.js +1 -0
  28. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-cf7445cf0326bda3.js +1 -0
  29. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-30376aa7e9cfa342.js +1 -0
  30. khoj/interface/compiled/_next/static/chunks/{main-f84cd3c1873cd842.js → main-1ea5c2e0fdef4626.js} +1 -1
  31. khoj/interface/compiled/_next/static/chunks/{webpack-8beec5b51cabb39a.js → webpack-27cf153c35b1338d.js} +1 -1
  32. khoj/interface/compiled/_next/static/css/{467a524c75e7d7c0.css → 0e9d53dcd7f11342.css} +1 -1
  33. khoj/interface/compiled/_next/static/css/{26c1c33d0423a7d8.css → 1f293605f2871853.css} +1 -1
  34. khoj/interface/compiled/_next/static/css/2d097a35da6bfe8d.css +1 -0
  35. khoj/interface/compiled/_next/static/css/80bd6301fc657983.css +1 -0
  36. khoj/interface/compiled/_next/static/css/ed437164d77aa600.css +25 -0
  37. khoj/interface/compiled/_next/static/media/5455839c73f146e7-s.p.woff2 +0 -0
  38. khoj/interface/compiled/_next/static/media/5984b96ba4822821-s.woff2 +0 -0
  39. khoj/interface/compiled/_next/static/media/684adc3dde1b03f1-s.woff2 +0 -0
  40. khoj/interface/compiled/_next/static/media/82e3b9a1bdaf0c26-s.woff2 +0 -0
  41. khoj/interface/compiled/_next/static/media/8d1ea331386a0db8-s.woff2 +0 -0
  42. khoj/interface/compiled/_next/static/media/91475f6526542a4f-s.woff2 +0 -0
  43. khoj/interface/compiled/_next/static/media/b98b13dbc1c3b59c-s.woff2 +0 -0
  44. khoj/interface/compiled/_next/static/media/c824d7a20139e39d-s.woff2 +0 -0
  45. khoj/interface/compiled/agents/index.html +1 -1
  46. khoj/interface/compiled/agents/index.txt +2 -2
  47. khoj/interface/compiled/automations/index.html +1 -1
  48. khoj/interface/compiled/automations/index.txt +2 -2
  49. khoj/interface/compiled/chat/index.html +1 -1
  50. khoj/interface/compiled/chat/index.txt +2 -2
  51. khoj/interface/compiled/index.html +1 -1
  52. khoj/interface/compiled/index.txt +3 -3
  53. khoj/interface/compiled/search/index.html +1 -1
  54. khoj/interface/compiled/search/index.txt +2 -2
  55. khoj/interface/compiled/settings/index.html +1 -1
  56. khoj/interface/compiled/settings/index.txt +3 -3
  57. khoj/interface/compiled/share/chat/index.html +1 -1
  58. khoj/interface/compiled/share/chat/index.txt +3 -3
  59. khoj/processor/content/docx/docx_to_entries.py +27 -21
  60. khoj/processor/content/github/github_to_entries.py +2 -2
  61. khoj/processor/content/images/image_to_entries.py +2 -2
  62. khoj/processor/content/markdown/markdown_to_entries.py +2 -2
  63. khoj/processor/content/notion/notion_to_entries.py +2 -2
  64. khoj/processor/content/org_mode/org_to_entries.py +2 -2
  65. khoj/processor/content/org_mode/orgnode.py +1 -1
  66. khoj/processor/content/pdf/pdf_to_entries.py +37 -29
  67. khoj/processor/content/plaintext/plaintext_to_entries.py +2 -2
  68. khoj/processor/content/text_to_entries.py +3 -4
  69. khoj/processor/conversation/anthropic/anthropic_chat.py +9 -1
  70. khoj/processor/conversation/google/gemini_chat.py +15 -2
  71. khoj/processor/conversation/google/utils.py +3 -1
  72. khoj/processor/conversation/offline/chat_model.py +4 -0
  73. khoj/processor/conversation/openai/gpt.py +6 -1
  74. khoj/processor/conversation/prompts.py +72 -13
  75. khoj/processor/conversation/utils.py +80 -13
  76. khoj/processor/image/generate.py +2 -0
  77. khoj/processor/tools/online_search.py +68 -18
  78. khoj/processor/tools/run_code.py +54 -20
  79. khoj/routers/api.py +10 -4
  80. khoj/routers/api_agents.py +8 -10
  81. khoj/routers/api_chat.py +89 -24
  82. khoj/routers/api_content.py +80 -8
  83. khoj/routers/helpers.py +176 -60
  84. khoj/routers/notion.py +1 -1
  85. khoj/routers/research.py +73 -31
  86. khoj/routers/web_client.py +0 -10
  87. khoj/search_type/text_search.py +3 -7
  88. khoj/utils/cli.py +2 -2
  89. khoj/utils/fs_syncer.py +2 -1
  90. khoj/utils/helpers.py +6 -3
  91. khoj/utils/rawconfig.py +32 -0
  92. khoj/utils/state.py +2 -1
  93. {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/METADATA +3 -3
  94. {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/RECORD +99 -105
  95. {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/WHEEL +1 -1
  96. khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +0 -1
  97. khoj/interface/compiled/_next/static/chunks/1467-b331e469fe411347.js +0 -1
  98. khoj/interface/compiled/_next/static/chunks/1603-c1568f45947e9f2c.js +0 -1
  99. khoj/interface/compiled/_next/static/chunks/1970-d44050bf658ae5cc.js +0 -1
  100. khoj/interface/compiled/_next/static/chunks/3110-ef2cacd1b8d79ad8.js +0 -1
  101. khoj/interface/compiled/_next/static/chunks/3423-f4b7df2f6f3362f7.js +0 -1
  102. khoj/interface/compiled/_next/static/chunks/394-6bcb8c429f168f21.js +0 -3
  103. khoj/interface/compiled/_next/static/chunks/7113-f2e114d7034a0835.js +0 -1
  104. khoj/interface/compiled/_next/static/chunks/8423-da57554315eebcbe.js +0 -1
  105. khoj/interface/compiled/_next/static/chunks/8840-b8d7b9f0923c6651.js +0 -1
  106. khoj/interface/compiled/_next/static/chunks/9417-0d0fc7eb49a86abb.js +0 -1
  107. khoj/interface/compiled/_next/static/chunks/app/agents/layout-75636ab3a413fa8e.js +0 -1
  108. khoj/interface/compiled/_next/static/chunks/app/agents/page-adbf3cd470da248f.js +0 -1
  109. khoj/interface/compiled/_next/static/chunks/app/chat/layout-96fcf62857bf8f30.js +0 -1
  110. khoj/interface/compiled/_next/static/chunks/app/chat/page-222d348681b848a5.js +0 -1
  111. khoj/interface/compiled/_next/static/chunks/app/factchecker/layout-7b30c541c05fb904.js +0 -1
  112. khoj/interface/compiled/_next/static/chunks/app/factchecker/page-bded0868a08ac4ba.js +0 -1
  113. khoj/interface/compiled/_next/static/chunks/app/search/layout-3720f1362310bebb.js +0 -1
  114. khoj/interface/compiled/_next/static/chunks/app/settings/page-210bd54db4841333.js +0 -1
  115. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-2df56074e42adaa0.js +0 -1
  116. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-a21b7e8890ed1209.js +0 -1
  117. khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +0 -1
  118. khoj/interface/compiled/_next/static/css/553f9cdcc7a2bcd6.css +0 -1
  119. khoj/interface/compiled/_next/static/css/a795ee88875f4853.css +0 -25
  120. khoj/interface/compiled/_next/static/css/afd3d45cc65d55d8.css +0 -1
  121. khoj/interface/compiled/_next/static/media/0e790e04fd40ad16-s.p.woff2 +0 -0
  122. khoj/interface/compiled/_next/static/media/4221e1667cd19c7d-s.woff2 +0 -0
  123. khoj/interface/compiled/_next/static/media/6c276159aa0eb14b-s.woff2 +0 -0
  124. khoj/interface/compiled/_next/static/media/6cc0b9500e4f9168-s.woff2 +0 -0
  125. khoj/interface/compiled/_next/static/media/9d9319a7a2ac39c6-s.woff2 +0 -0
  126. khoj/interface/compiled/_next/static/media/a75c8ea86756d52d-s.woff2 +0 -0
  127. khoj/interface/compiled/_next/static/media/abce7c400ca31a51-s.woff2 +0 -0
  128. khoj/interface/compiled/_next/static/media/f759c939737fb668-s.woff2 +0 -0
  129. khoj/interface/compiled/factchecker/index.html +0 -1
  130. khoj/interface/compiled/factchecker/index.txt +0 -7
  131. /khoj/interface/compiled/_next/static/{EfnEiWDle86AUcxEdEFgO → t_2jovvUVve0Gvc3FqpT9}/_buildManifest.js +0 -0
  132. /khoj/interface/compiled/_next/static/{EfnEiWDle86AUcxEdEFgO → t_2jovvUVve0Gvc3FqpT9}/_ssgManifest.js +0 -0
  133. {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/entry_points.txt +0 -0
  134. {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/licenses/LICENSE +0 -0
khoj/routers/helpers.py CHANGED
@@ -20,6 +20,7 @@ from typing import (
20
20
  Iterator,
21
21
  List,
22
22
  Optional,
23
+ Set,
23
24
  Tuple,
24
25
  Union,
25
26
  )
@@ -104,6 +105,7 @@ from khoj.utils.config import OfflineChatProcessorModel
104
105
  from khoj.utils.helpers import (
105
106
  LRU,
106
107
  ConversationCommand,
108
+ get_file_type,
107
109
  is_none_or_empty,
108
110
  is_valid_url,
109
111
  log_telemetry,
@@ -111,7 +113,7 @@ from khoj.utils.helpers import (
111
113
  timer,
112
114
  tool_descriptions_for_llm,
113
115
  )
114
- from khoj.utils.rawconfig import LocationData
116
+ from khoj.utils.rawconfig import ChatRequestBody, FileAttachment, FileData, LocationData
115
117
 
116
118
  logger = logging.getLogger(__name__)
117
119
 
@@ -167,6 +169,12 @@ async def is_ready_to_chat(user: KhojUser):
167
169
  raise HTTPException(status_code=500, detail="Set your OpenAI API key or enable Local LLM via Khoj settings.")
168
170
 
169
171
 
172
+ def get_file_content(file: UploadFile):
173
+ file_content = file.file.read()
174
+ file_type, encoding = get_file_type(file.content_type, file_content)
175
+ return FileData(name=file.filename, content=file_content, file_type=file_type, encoding=encoding)
176
+
177
+
170
178
  def update_telemetry_state(
171
179
  request: Request,
172
180
  telemetry_type: str,
@@ -196,7 +204,12 @@ def update_telemetry_state(
196
204
 
197
205
  state.telemetry += [
198
206
  log_telemetry(
199
- telemetry_type=telemetry_type, api=api, client=client, app_config=state.config.app, properties=user_state
207
+ telemetry_type=telemetry_type,
208
+ api=api,
209
+ client=client,
210
+ app_config=state.config.app,
211
+ disable_telemetry_env=state.telemetry_disabled,
212
+ properties=user_state,
200
213
  )
201
214
  ]
202
215
 
@@ -248,6 +261,39 @@ async def agenerate_chat_response(*args):
248
261
  return await loop.run_in_executor(executor, generate_chat_response, *args)
249
262
 
250
263
 
264
+ def gather_raw_query_files(
265
+ query_files: Dict[str, str],
266
+ ):
267
+ """
268
+ Gather contextual data from the given (raw) files
269
+ """
270
+
271
+ if len(query_files) == 0:
272
+ return ""
273
+
274
+ contextual_data = " ".join(
275
+ [f"File: {file_name}\n\n{file_content}\n\n" for file_name, file_content in query_files.items()]
276
+ )
277
+ return f"I have attached the following files:\n\n{contextual_data}"
278
+
279
+
280
+ async def acreate_title_from_history(
281
+ user: KhojUser,
282
+ conversation: Conversation,
283
+ ):
284
+ """
285
+ Create a title from the given conversation history
286
+ """
287
+ chat_history = construct_chat_history(conversation.conversation_log)
288
+
289
+ title_generation_prompt = prompts.conversation_title_generation.format(chat_history=chat_history)
290
+
291
+ with timer("Chat actor: Generate title from conversation history", logger):
292
+ response = await send_message_to_model_wrapper(title_generation_prompt, user=user)
293
+
294
+ return response.strip()
295
+
296
+
251
297
  async def acreate_title_from_query(query: str, user: KhojUser = None) -> str:
252
298
  """
253
299
  Create a title from the given query
@@ -260,11 +306,15 @@ async def acreate_title_from_query(query: str, user: KhojUser = None) -> str:
260
306
  return response.strip()
261
307
 
262
308
 
263
- async def acheck_if_safe_prompt(system_prompt: str, user: KhojUser = None) -> Tuple[bool, str]:
309
+ async def acheck_if_safe_prompt(system_prompt: str, user: KhojUser = None, lax: bool = False) -> Tuple[bool, str]:
264
310
  """
265
311
  Check if the system prompt is safe to use
266
312
  """
267
- safe_prompt_check = prompts.personality_prompt_safety_expert.format(prompt=system_prompt)
313
+ safe_prompt_check = (
314
+ prompts.personality_prompt_safety_expert.format(prompt=system_prompt)
315
+ if not lax
316
+ else prompts.personality_prompt_safety_expert_lax.format(prompt=system_prompt)
317
+ )
268
318
  is_safe = True
269
319
  reason = ""
270
320
 
@@ -293,6 +343,7 @@ async def aget_relevant_information_sources(
293
343
  user: KhojUser,
294
344
  query_images: List[str] = None,
295
345
  agent: Agent = None,
346
+ query_files: str = None,
296
347
  tracer: dict = {},
297
348
  ):
298
349
  """
@@ -330,6 +381,7 @@ async def aget_relevant_information_sources(
330
381
  relevant_tools_prompt,
331
382
  response_type="json_object",
332
383
  user=user,
384
+ query_files=query_files,
333
385
  tracer=tracer,
334
386
  )
335
387
 
@@ -439,6 +491,7 @@ async def infer_webpage_urls(
439
491
  user: KhojUser,
440
492
  query_images: List[str] = None,
441
493
  agent: Agent = None,
494
+ query_files: str = None,
442
495
  tracer: dict = {},
443
496
  ) -> List[str]:
444
497
  """
@@ -468,6 +521,7 @@ async def infer_webpage_urls(
468
521
  query_images=query_images,
469
522
  response_type="json_object",
470
523
  user=user,
524
+ query_files=query_files,
471
525
  tracer=tracer,
472
526
  )
473
527
 
@@ -493,8 +547,9 @@ async def generate_online_subqueries(
493
547
  user: KhojUser,
494
548
  query_images: List[str] = None,
495
549
  agent: Agent = None,
550
+ query_files: str = None,
496
551
  tracer: dict = {},
497
- ) -> List[str]:
552
+ ) -> Set[str]:
498
553
  """
499
554
  Generate subqueries from the given query
500
555
  """
@@ -522,6 +577,7 @@ async def generate_online_subqueries(
522
577
  query_images=query_images,
523
578
  response_type="json_object",
524
579
  user=user,
580
+ query_files=query_files,
525
581
  tracer=tracer,
526
582
  )
527
583
 
@@ -529,14 +585,14 @@ async def generate_online_subqueries(
529
585
  try:
530
586
  response = clean_json(response)
531
587
  response = json.loads(response)
532
- response = [q.strip() for q in response["queries"] if q.strip()]
533
- if not isinstance(response, list) or not response or len(response) == 0:
588
+ response = {q.strip() for q in response["queries"] if q.strip()}
589
+ if not isinstance(response, set) or not response or len(response) == 0:
534
590
  logger.error(f"Invalid response for constructing subqueries: {response}. Returning original query: {q}")
535
- return [q]
591
+ return {q}
536
592
  return response
537
593
  except Exception as e:
538
594
  logger.error(f"Invalid response for constructing subqueries: {response}. Returning original query: {q}")
539
- return [q]
595
+ return {q}
540
596
 
541
597
 
542
598
  async def schedule_query(
@@ -644,26 +700,38 @@ async def generate_summary_from_files(
644
700
  query_images: List[str] = None,
645
701
  agent: Agent = None,
646
702
  send_status_func: Optional[Callable] = None,
703
+ query_files: str = None,
647
704
  tracer: dict = {},
648
705
  ):
649
706
  try:
650
- file_object = None
707
+ file_objects = None
651
708
  if await EntryAdapters.aagent_has_entries(agent):
652
709
  file_names = await EntryAdapters.aget_agent_entry_filepaths(agent)
653
710
  if len(file_names) > 0:
654
- file_object = await FileObjectAdapters.async_get_file_objects_by_name(None, file_names.pop(), agent)
711
+ file_objects = await FileObjectAdapters.aget_file_objects_by_name(None, file_names.pop(), agent)
655
712
 
656
- if len(file_filters) > 0:
657
- file_object = await FileObjectAdapters.async_get_file_objects_by_name(user, file_filters[0])
658
-
659
- if len(file_object) == 0:
660
- response_log = "Sorry, I couldn't find the full text of this file."
713
+ if (file_objects and len(file_objects) == 0 and not query_files) or (not file_objects and not query_files):
714
+ response_log = "Sorry, I couldn't find anything to summarize."
661
715
  yield response_log
662
716
  return
663
- contextual_data = " ".join([file.raw_text for file in file_object])
717
+
718
+ contextual_data = " ".join([f"File: {file.file_name}\n\n{file.raw_text}" for file in file_objects])
719
+
720
+ if query_files:
721
+ contextual_data += f"\n\n{query_files}"
722
+
664
723
  if not q:
665
724
  q = "Create a general summary of the file"
666
- async for result in send_status_func(f"**Constructing Summary Using:** {file_object[0].file_name}"):
725
+
726
+ file_names = [file.file_name for file in file_objects]
727
+ file_names.extend(file_filters)
728
+
729
+ all_file_names = ""
730
+
731
+ for file_name in file_names:
732
+ all_file_names += f"- {file_name}\n"
733
+
734
+ async for result in send_status_func(f"**Constructing Summary Using:**\n{all_file_names}"):
667
735
  yield {ChatEvent.STATUS: result}
668
736
 
669
737
  response = await extract_relevant_summary(
@@ -693,6 +761,7 @@ async def generate_excalidraw_diagram(
693
761
  user: KhojUser = None,
694
762
  agent: Agent = None,
695
763
  send_status_func: Optional[Callable] = None,
764
+ query_files: str = None,
696
765
  tracer: dict = {},
697
766
  ):
698
767
  if send_status_func:
@@ -708,19 +777,24 @@ async def generate_excalidraw_diagram(
708
777
  query_images=query_images,
709
778
  user=user,
710
779
  agent=agent,
780
+ query_files=query_files,
711
781
  tracer=tracer,
712
782
  )
713
783
 
714
784
  if send_status_func:
715
785
  async for event in send_status_func(f"**Diagram to Create:**:\n{better_diagram_description_prompt}"):
716
786
  yield {ChatEvent.STATUS: event}
717
-
718
- excalidraw_diagram_description = await generate_excalidraw_diagram_from_description(
719
- q=better_diagram_description_prompt,
720
- user=user,
721
- agent=agent,
722
- tracer=tracer,
723
- )
787
+ try:
788
+ excalidraw_diagram_description = await generate_excalidraw_diagram_from_description(
789
+ q=better_diagram_description_prompt,
790
+ user=user,
791
+ agent=agent,
792
+ tracer=tracer,
793
+ )
794
+ except Exception as e:
795
+ logger.error(f"Error generating Excalidraw diagram for {user.email}: {e}", exc_info=True)
796
+ yield None, None
797
+ return
724
798
 
725
799
  yield better_diagram_description_prompt, excalidraw_diagram_description
726
800
 
@@ -734,6 +808,7 @@ async def generate_better_diagram_description(
734
808
  query_images: List[str] = None,
735
809
  user: KhojUser = None,
736
810
  agent: Agent = None,
811
+ query_files: str = None,
737
812
  tracer: dict = {},
738
813
  ) -> str:
739
814
  """
@@ -772,7 +847,11 @@ async def generate_better_diagram_description(
772
847
 
773
848
  with timer("Chat actor: Generate better diagram description", logger):
774
849
  response = await send_message_to_model_wrapper(
775
- improve_diagram_description_prompt, query_images=query_images, user=user, tracer=tracer
850
+ improve_diagram_description_prompt,
851
+ query_images=query_images,
852
+ user=user,
853
+ query_files=query_files,
854
+ tracer=tracer,
776
855
  )
777
856
  response = response.strip()
778
857
  if response.startswith(('"', "'")) and response.endswith(('"', "'")):
@@ -801,7 +880,10 @@ async def generate_excalidraw_diagram_from_description(
801
880
  query=excalidraw_diagram_generation, user=user, tracer=tracer
802
881
  )
803
882
  raw_response = clean_json(raw_response)
804
- response: Dict[str, str] = json.loads(raw_response)
883
+ try:
884
+ response: Dict[str, str] = json.loads(raw_response)
885
+ except Exception:
886
+ raise AssertionError(f"Invalid response for generating Excalidraw diagram: {raw_response}")
805
887
  if not response or not isinstance(response, List) or not isinstance(response[0], Dict):
806
888
  # TODO Some additional validation here that it's a valid Excalidraw diagram
807
889
  raise AssertionError(f"Invalid response for improving diagram description: {response}")
@@ -819,6 +901,7 @@ async def generate_better_image_prompt(
819
901
  query_images: Optional[List[str]] = None,
820
902
  user: KhojUser = None,
821
903
  agent: Agent = None,
904
+ query_files: str = "",
822
905
  tracer: dict = {},
823
906
  ) -> str:
824
907
  """
@@ -867,7 +950,7 @@ async def generate_better_image_prompt(
867
950
 
868
951
  with timer("Chat actor: Generate contextual image prompt", logger):
869
952
  response = await send_message_to_model_wrapper(
870
- image_prompt, query_images=query_images, user=user, tracer=tracer
953
+ image_prompt, query_images=query_images, user=user, query_files=query_files, tracer=tracer
871
954
  )
872
955
  response = response.strip()
873
956
  if response.startswith(('"', "'")) and response.endswith(('"', "'")):
@@ -883,6 +966,7 @@ async def send_message_to_model_wrapper(
883
966
  user: KhojUser = None,
884
967
  query_images: List[str] = None,
885
968
  context: str = "",
969
+ query_files: str = None,
886
970
  tracer: dict = {},
887
971
  ):
888
972
  conversation_config: ChatModelOptions = await ConversationAdapters.aget_default_conversation_config(user)
@@ -922,6 +1006,7 @@ async def send_message_to_model_wrapper(
922
1006
  max_prompt_size=max_tokens,
923
1007
  vision_enabled=vision_available,
924
1008
  model_type=conversation_config.model_type,
1009
+ query_files=query_files,
925
1010
  )
926
1011
 
927
1012
  return send_message_to_model_offline(
@@ -948,6 +1033,7 @@ async def send_message_to_model_wrapper(
948
1033
  vision_enabled=vision_available,
949
1034
  query_images=query_images,
950
1035
  model_type=conversation_config.model_type,
1036
+ query_files=query_files,
951
1037
  )
952
1038
 
953
1039
  return send_message_to_model(
@@ -970,6 +1056,7 @@ async def send_message_to_model_wrapper(
970
1056
  vision_enabled=vision_available,
971
1057
  query_images=query_images,
972
1058
  model_type=conversation_config.model_type,
1059
+ query_files=query_files,
973
1060
  )
974
1061
 
975
1062
  return anthropic_send_message_to_model(
@@ -991,6 +1078,7 @@ async def send_message_to_model_wrapper(
991
1078
  vision_enabled=vision_available,
992
1079
  query_images=query_images,
993
1080
  model_type=conversation_config.model_type,
1081
+ query_files=query_files,
994
1082
  )
995
1083
 
996
1084
  return gemini_send_message_to_model(
@@ -1005,6 +1093,7 @@ def send_message_to_model_wrapper_sync(
1005
1093
  system_message: str = "",
1006
1094
  response_type: str = "text",
1007
1095
  user: KhojUser = None,
1096
+ query_files: str = "",
1008
1097
  tracer: dict = {},
1009
1098
  ):
1010
1099
  conversation_config: ChatModelOptions = ConversationAdapters.get_default_conversation_config(user)
@@ -1029,6 +1118,7 @@ def send_message_to_model_wrapper_sync(
1029
1118
  max_prompt_size=max_tokens,
1030
1119
  vision_enabled=vision_available,
1031
1120
  model_type=conversation_config.model_type,
1121
+ query_files=query_files,
1032
1122
  )
1033
1123
 
1034
1124
  return send_message_to_model_offline(
@@ -1050,6 +1140,7 @@ def send_message_to_model_wrapper_sync(
1050
1140
  max_prompt_size=max_tokens,
1051
1141
  vision_enabled=vision_available,
1052
1142
  model_type=conversation_config.model_type,
1143
+ query_files=query_files,
1053
1144
  )
1054
1145
 
1055
1146
  openai_response = send_message_to_model(
@@ -1071,6 +1162,7 @@ def send_message_to_model_wrapper_sync(
1071
1162
  max_prompt_size=max_tokens,
1072
1163
  vision_enabled=vision_available,
1073
1164
  model_type=conversation_config.model_type,
1165
+ query_files=query_files,
1074
1166
  )
1075
1167
 
1076
1168
  return anthropic_send_message_to_model(
@@ -1090,6 +1182,7 @@ def send_message_to_model_wrapper_sync(
1090
1182
  max_prompt_size=max_tokens,
1091
1183
  vision_enabled=vision_available,
1092
1184
  model_type=conversation_config.model_type,
1185
+ query_files=query_files,
1093
1186
  )
1094
1187
 
1095
1188
  return gemini_send_message_to_model(
@@ -1119,8 +1212,10 @@ def generate_chat_response(
1119
1212
  user_name: Optional[str] = None,
1120
1213
  meta_research: str = "",
1121
1214
  query_images: Optional[List[str]] = None,
1122
- tracer: dict = {},
1123
1215
  train_of_thought: List[Any] = [],
1216
+ query_files: str = None,
1217
+ raw_query_files: List[FileAttachment] = None,
1218
+ tracer: dict = {},
1124
1219
  ) -> Tuple[Union[ThreadedGenerator, Iterator[str]], Dict[str, str]]:
1125
1220
  # Initialize Variables
1126
1221
  chat_response = None
@@ -1128,9 +1223,6 @@ def generate_chat_response(
1128
1223
 
1129
1224
  metadata = {}
1130
1225
  agent = AgentAdapters.get_conversation_agent_by_id(conversation.agent.id) if conversation.agent else None
1131
- query_to_run = q
1132
- if meta_research:
1133
- query_to_run = f"AI Research: {meta_research} {q}"
1134
1226
  try:
1135
1227
  partial_completion = partial(
1136
1228
  save_to_conversation_log,
@@ -1144,10 +1236,18 @@ def generate_chat_response(
1144
1236
  client_application=client_application,
1145
1237
  conversation_id=conversation_id,
1146
1238
  query_images=query_images,
1147
- tracer=tracer,
1148
1239
  train_of_thought=train_of_thought,
1240
+ raw_query_files=raw_query_files,
1241
+ tracer=tracer,
1149
1242
  )
1150
1243
 
1244
+ query_to_run = q
1245
+ if meta_research:
1246
+ query_to_run = f"<query>{q}</query>\n<collected_research>\n{meta_research}\n</collected_research>"
1247
+ compiled_references = []
1248
+ online_results = {}
1249
+ code_results = {}
1250
+
1151
1251
  conversation_config = ConversationAdapters.get_valid_conversation_config(user, conversation)
1152
1252
  vision_available = conversation_config.vision_enabled
1153
1253
  if not vision_available and query_images:
@@ -1172,6 +1272,7 @@ def generate_chat_response(
1172
1272
  location_data=location_data,
1173
1273
  user_name=user_name,
1174
1274
  agent=agent,
1275
+ query_files=query_files,
1175
1276
  tracer=tracer,
1176
1277
  )
1177
1278
 
@@ -1197,6 +1298,7 @@ def generate_chat_response(
1197
1298
  user_name=user_name,
1198
1299
  agent=agent,
1199
1300
  vision_available=vision_available,
1301
+ query_files=query_files,
1200
1302
  tracer=tracer,
1201
1303
  )
1202
1304
 
@@ -1219,6 +1321,7 @@ def generate_chat_response(
1219
1321
  user_name=user_name,
1220
1322
  agent=agent,
1221
1323
  vision_available=vision_available,
1324
+ query_files=query_files,
1222
1325
  tracer=tracer,
1223
1326
  )
1224
1327
  elif conversation_config.model_type == ChatModelOptions.ModelType.GOOGLE:
@@ -1238,7 +1341,9 @@ def generate_chat_response(
1238
1341
  location_data=location_data,
1239
1342
  user_name=user_name,
1240
1343
  agent=agent,
1344
+ query_images=query_images,
1241
1345
  vision_available=vision_available,
1346
+ query_files=query_files,
1242
1347
  tracer=tracer,
1243
1348
  )
1244
1349
 
@@ -1251,23 +1356,6 @@ def generate_chat_response(
1251
1356
  return chat_response, metadata
1252
1357
 
1253
1358
 
1254
- class ChatRequestBody(BaseModel):
1255
- q: str
1256
- n: Optional[int] = 7
1257
- d: Optional[float] = None
1258
- stream: Optional[bool] = False
1259
- title: Optional[str] = None
1260
- conversation_id: Optional[str] = None
1261
- turn_id: Optional[str] = None
1262
- city: Optional[str] = None
1263
- region: Optional[str] = None
1264
- country: Optional[str] = None
1265
- country_code: Optional[str] = None
1266
- timezone: Optional[str] = None
1267
- images: Optional[list[str]] = None
1268
- create_new: Optional[bool] = False
1269
-
1270
-
1271
1359
  class DeleteMessageRequestBody(BaseModel):
1272
1360
  conversation_id: str
1273
1361
  turn_id: str
@@ -1306,25 +1394,28 @@ class ApiUserRateLimiter:
1306
1394
  # Check if the user has exceeded the rate limit
1307
1395
  if subscribed and count_requests >= self.subscribed_requests:
1308
1396
  logger.info(
1309
- f"Rate limit: {count_requests} requests in {self.window} seconds for user: {user}. Limit is {self.subscribed_requests} requests."
1397
+ f"Rate limit: {count_requests}/{self.subscribed_requests} requests not allowed in {self.window} seconds for subscribed user: {user}."
1398
+ )
1399
+ raise HTTPException(
1400
+ status_code=429,
1401
+ detail="I'm glad you're enjoying interacting with me! You've unfortunately exceeded your usage limit for today. But let's chat more tomorrow?",
1310
1402
  )
1311
- raise HTTPException(status_code=429, detail="Slow down! Too Many Requests")
1312
1403
  if not subscribed and count_requests >= self.requests:
1313
1404
  if self.requests >= self.subscribed_requests:
1314
1405
  logger.info(
1315
- f"Rate limit: {count_requests} requests in {self.window} seconds for user: {user}. Limit is {self.subscribed_requests} requests."
1406
+ f"Rate limit: {count_requests}/{self.subscribed_requests} requests not allowed in {self.window} seconds for user: {user}."
1316
1407
  )
1317
1408
  raise HTTPException(
1318
1409
  status_code=429,
1319
- detail="Slow down! Too Many Requests",
1410
+ detail="I'm glad you're enjoying interacting with me! You've unfortunately exceeded your usage limit for today. But let's chat more tomorrow?",
1320
1411
  )
1321
1412
 
1322
1413
  logger.info(
1323
- f"Rate limit: {count_requests} requests in {self.window} seconds for user: {user}. Limit is {self.subscribed_requests} requests."
1414
+ f"Rate limit: {count_requests}/{self.requests} requests not allowed in {self.window} seconds for user: {user}."
1324
1415
  )
1325
1416
  raise HTTPException(
1326
1417
  status_code=429,
1327
- detail="I'm glad you're enjoying interacting with me! But you've exceeded your usage limit for today. Come back tomorrow or subscribe to increase your usage limit via [your settings](https://app.khoj.dev/settings).",
1418
+ detail="I'm glad you're enjoying interacting with me! You've unfortunately exceeded your usage limit for today. You can subscribe to increase your usage limit via [your settings](https://app.khoj.dev/settings) or we can continue our conversation tomorrow?",
1328
1419
  )
1329
1420
 
1330
1421
  # Add the current request to the cache
@@ -1350,6 +1441,7 @@ class ApiImageRateLimiter:
1350
1441
 
1351
1442
  # Check number of images
1352
1443
  if len(body.images) > self.max_images:
1444
+ logger.info(f"Rate limit: {len(body.images)}/{self.max_images} images not allowed per message.")
1353
1445
  raise HTTPException(
1354
1446
  status_code=429,
1355
1447
  detail=f"Those are way too many images for me! I can handle up to {self.max_images} images per message.",
@@ -1370,6 +1462,7 @@ class ApiImageRateLimiter:
1370
1462
  total_size_mb += len(image_bytes) / (1024 * 1024) # Convert bytes to MB
1371
1463
 
1372
1464
  if total_size_mb > self.max_combined_size_mb:
1465
+ logger.info(f"Data limit: {total_size_mb}MB/{self.max_combined_size_mb}MB size not allowed per message.")
1373
1466
  raise HTTPException(
1374
1467
  status_code=429,
1375
1468
  detail=f"Those images are way too large for me! I can handle up to {self.max_combined_size_mb}MB of images per message.",
@@ -1405,13 +1498,19 @@ class ConversationCommandRateLimiter:
1405
1498
 
1406
1499
  if subscribed and count_requests >= self.subscribed_rate_limit:
1407
1500
  logger.info(
1408
- f"Rate limit: {count_requests} requests in 24 hours for user: {user}. Limit is {self.subscribed_rate_limit} requests."
1501
+ f"Rate limit: {count_requests}/{self.subscribed_rate_limit} requests not allowed in 24 hours for subscribed user: {user}."
1502
+ )
1503
+ raise HTTPException(
1504
+ status_code=429,
1505
+ detail=f"I'm glad you're enjoying interacting with me! You've unfortunately exceeded your `/{conversation_command.value}` command usage limit for today. Maybe we can talk about something else for today?",
1409
1506
  )
1410
- raise HTTPException(status_code=429, detail="Slow down! Too Many Requests")
1411
1507
  if not subscribed and count_requests >= self.trial_rate_limit:
1508
+ logger.info(
1509
+ f"Rate limit: {count_requests}/{self.trial_rate_limit} requests not allowed in 24 hours for user: {user}."
1510
+ )
1412
1511
  raise HTTPException(
1413
1512
  status_code=429,
1414
- detail=f"We're glad you're enjoying Khoj! You've exceeded your `/{conversation_command.value}` command usage limit for today. Subscribe to increase your usage limit via [your settings](https://app.khoj.dev/settings).",
1513
+ detail=f"I'm glad you're enjoying interacting with me! You've unfortunately exceeded your `/{conversation_command.value}` command usage limit for today. You can subscribe to increase your usage limit via [your settings](https://app.khoj.dev/settings) or we can talk about something else for today?",
1415
1514
  )
1416
1515
  await UserRequests.objects.acreate(user=user, slug=command_slug)
1417
1516
  return
@@ -1457,16 +1556,28 @@ class ApiIndexedDataLimiter:
1457
1556
  logger.info(f"Deleted {num_deleted_entries} entries for user: {user}.")
1458
1557
 
1459
1558
  if subscribed and incoming_data_size_mb >= self.subscribed_num_entries_size:
1559
+ logger.info(
1560
+ f"Data limit: {incoming_data_size_mb}MB incoming will exceed {self.subscribed_num_entries_size}MB allowed for subscribed user: {user}."
1561
+ )
1460
1562
  raise HTTPException(status_code=429, detail="Too much data indexed.")
1461
1563
  if not subscribed and incoming_data_size_mb >= self.num_entries_size:
1564
+ logger.info(
1565
+ f"Data limit: {incoming_data_size_mb}MB incoming will exceed {self.num_entries_size}MB allowed for user: {user}."
1566
+ )
1462
1567
  raise HTTPException(
1463
1568
  status_code=429, detail="Too much data indexed. Subscribe to increase your data index limit."
1464
1569
  )
1465
1570
 
1466
1571
  user_size_data = EntryAdapters.get_size_of_indexed_data_in_mb(user)
1467
1572
  if subscribed and user_size_data + incoming_data_size_mb >= self.subscribed_total_entries_size:
1573
+ logger.info(
1574
+ f"Data limit: {incoming_data_size_mb}MB incoming + {user_size_data}MB existing will exceed {self.subscribed_total_entries_size}MB allowed for subscribed user: {user}."
1575
+ )
1468
1576
  raise HTTPException(status_code=429, detail="Too much data indexed.")
1469
1577
  if not subscribed and user_size_data + incoming_data_size_mb >= self.total_entries_size_limit:
1578
+ logger.info(
1579
+ f"Data limit: {incoming_data_size_mb}MB incoming + {user_size_data}MB existing will exceed {self.subscribed_total_entries_size}MB allowed for non subscribed user: {user}."
1580
+ )
1470
1581
  raise HTTPException(
1471
1582
  status_code=429, detail="Too much data indexed. Subscribe to increase your data index limit."
1472
1583
  )
@@ -1554,6 +1665,11 @@ def scheduled_chat(
1554
1665
  # encode the conversation_id to avoid any issues with special characters
1555
1666
  query_dict["conversation_id"] = [quote(str(conversation_id))]
1556
1667
 
1668
+ # validate that the conversation id exists. If not, delete the automation and exit.
1669
+ if not ConversationAdapters.get_conversation_by_id(conversation_id):
1670
+ AutomationAdapters.delete_automation(user, job_id)
1671
+ return
1672
+
1557
1673
  # Restructure the original query_dict into a valid JSON payload for the chat API
1558
1674
  json_payload = {key: values[0] for key, values in query_dict.items()}
1559
1675
 
@@ -1871,10 +1987,10 @@ def get_user_config(user: KhojUser, request: Request, is_detailed: bool = False)
1871
1987
 
1872
1988
 
1873
1989
  def configure_content(
1990
+ user: KhojUser,
1874
1991
  files: Optional[dict[str, dict[str, str]]],
1875
1992
  regenerate: bool = False,
1876
1993
  t: Optional[state.SearchType] = state.SearchType.All,
1877
- user: KhojUser = None,
1878
1994
  ) -> bool:
1879
1995
  success = True
1880
1996
  if t == None:
khoj/routers/notion.py CHANGED
@@ -80,6 +80,6 @@ async def notion_auth_callback(request: Request, background_tasks: BackgroundTas
80
80
  notion_redirect = str(request.app.url_path_for("config_page"))
81
81
 
82
82
  # Trigger an async job to configure_content. Let it run without blocking the response.
83
- background_tasks.add_task(run_in_executor, configure_content, {}, False, SearchType.Notion, user)
83
+ background_tasks.add_task(run_in_executor, configure_content, user, {}, False, SearchType.Notion)
84
84
 
85
85
  return RedirectResponse(notion_redirect)