khoj 1.33.3.dev32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. khoj/__init__.py +0 -0
  2. khoj/app/README.md +94 -0
  3. khoj/app/__init__.py +0 -0
  4. khoj/app/asgi.py +16 -0
  5. khoj/app/settings.py +218 -0
  6. khoj/app/urls.py +25 -0
  7. khoj/configure.py +452 -0
  8. khoj/database/__init__.py +0 -0
  9. khoj/database/adapters/__init__.py +1821 -0
  10. khoj/database/admin.py +417 -0
  11. khoj/database/apps.py +6 -0
  12. khoj/database/management/__init__.py +0 -0
  13. khoj/database/management/commands/__init__.py +0 -0
  14. khoj/database/management/commands/change_default_model.py +116 -0
  15. khoj/database/management/commands/change_generated_images_url.py +61 -0
  16. khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
  17. khoj/database/migrations/0001_khojuser.py +98 -0
  18. khoj/database/migrations/0002_googleuser.py +32 -0
  19. khoj/database/migrations/0003_vector_extension.py +10 -0
  20. khoj/database/migrations/0004_content_types_and_more.py +181 -0
  21. khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
  22. khoj/database/migrations/0006_embeddingsdates.py +33 -0
  23. khoj/database/migrations/0007_add_conversation.py +27 -0
  24. khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
  25. khoj/database/migrations/0009_khojapiuser.py +24 -0
  26. khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
  27. khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
  28. khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
  29. khoj/database/migrations/0012_entry_file_source.py +21 -0
  30. khoj/database/migrations/0013_subscription.py +37 -0
  31. khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
  32. khoj/database/migrations/0015_alter_subscription_user.py +21 -0
  33. khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
  34. khoj/database/migrations/0017_searchmodel.py +32 -0
  35. khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
  36. khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
  37. khoj/database/migrations/0020_reflectivequestion.py +36 -0
  38. khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
  39. khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
  40. khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
  41. khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
  42. khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
  43. khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
  44. khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
  45. khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
  46. khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
  47. khoj/database/migrations/0029_userrequests.py +27 -0
  48. khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
  49. khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
  50. khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
  51. khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
  52. khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
  53. khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
  54. khoj/database/migrations/0035_processlock.py +26 -0
  55. khoj/database/migrations/0036_alter_processlock_name.py +19 -0
  56. khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
  57. khoj/database/migrations/0036_publicconversation.py +42 -0
  58. khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
  59. khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
  60. khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
  61. khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
  62. khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
  63. khoj/database/migrations/0040_alter_processlock_name.py +26 -0
  64. khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
  65. khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
  66. khoj/database/migrations/0042_serverchatsettings.py +46 -0
  67. khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
  68. khoj/database/migrations/0044_conversation_file_filters.py +17 -0
  69. khoj/database/migrations/0045_fileobject.py +37 -0
  70. khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
  71. khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
  72. khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
  73. khoj/database/migrations/0049_datastore.py +38 -0
  74. khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
  75. khoj/database/migrations/0050_alter_processlock_name.py +25 -0
  76. khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
  77. khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
  78. khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
  79. khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
  80. khoj/database/migrations/0055_alter_agent_style_icon.py +37 -0
  81. khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
  82. khoj/database/migrations/0056_searchmodelconfig_cross_encoder_model_config.py +17 -0
  83. khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
  84. khoj/database/migrations/0057_remove_serverchatsettings_default_model_and_more.py +51 -0
  85. khoj/database/migrations/0058_alter_chatmodeloptions_chat_model.py +17 -0
  86. khoj/database/migrations/0059_searchmodelconfig_bi_encoder_confidence_threshold.py +17 -0
  87. khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
  88. khoj/database/migrations/0061_alter_chatmodeloptions_model_type.py +26 -0
  89. khoj/database/migrations/0061_alter_texttoimagemodelconfig_model_type.py +21 -0
  90. khoj/database/migrations/0062_merge_20240913_0222.py +14 -0
  91. khoj/database/migrations/0063_conversation_temp_id.py +36 -0
  92. khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +86 -0
  93. khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
  94. khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
  95. khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
  96. khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
  97. khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
  98. khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
  99. khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
  100. khoj/database/migrations/0072_entry_search_model.py +24 -0
  101. khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
  102. khoj/database/migrations/0074_alter_conversation_title.py +17 -0
  103. khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +85 -0
  104. khoj/database/migrations/0076_rename_openaiprocessorconversationconfig_aimodelapi_and_more.py +26 -0
  105. khoj/database/migrations/0077_chatmodel_alter_agent_chat_model_and_more.py +62 -0
  106. khoj/database/migrations/0078_khojuser_email_verification_code_expiry.py +17 -0
  107. khoj/database/migrations/__init__.py +0 -0
  108. khoj/database/models/__init__.py +725 -0
  109. khoj/database/tests.py +3 -0
  110. khoj/interface/compiled/404/index.html +1 -0
  111. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_buildManifest.js +1 -0
  112. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_ssgManifest.js +1 -0
  113. khoj/interface/compiled/_next/static/chunks/1010-8f39bb4648b5ba10.js +1 -0
  114. khoj/interface/compiled/_next/static/chunks/182-f1c48a203dc91e0e.js +20 -0
  115. khoj/interface/compiled/_next/static/chunks/1915-d3c36ad6ce697ce7.js +1 -0
  116. khoj/interface/compiled/_next/static/chunks/2117-165ef4747a5b836b.js +2 -0
  117. khoj/interface/compiled/_next/static/chunks/2581-455000f8aeb08fc3.js +1 -0
  118. khoj/interface/compiled/_next/static/chunks/3727.dcea8f2193111552.js +1 -0
  119. khoj/interface/compiled/_next/static/chunks/3789-a09e37a819171a9d.js +1 -0
  120. khoj/interface/compiled/_next/static/chunks/4124-6c28322ce218d2d5.js +1 -0
  121. khoj/interface/compiled/_next/static/chunks/5427-b52d95253e692bfa.js +1 -0
  122. khoj/interface/compiled/_next/static/chunks/5473-b1cf56dedac6577a.js +1 -0
  123. khoj/interface/compiled/_next/static/chunks/5477-0bbddb79c25a54a7.js +1 -0
  124. khoj/interface/compiled/_next/static/chunks/6065-64db9ad305ba0bcd.js +1 -0
  125. khoj/interface/compiled/_next/static/chunks/6293-469dd16402ea8a6f.js +3 -0
  126. khoj/interface/compiled/_next/static/chunks/688-b5b4391bbc0376f1.js +1 -0
  127. khoj/interface/compiled/_next/static/chunks/8667-b6bf63c72b2d76eb.js +1 -0
  128. khoj/interface/compiled/_next/static/chunks/9259-1172dbaca0515237.js +1 -0
  129. khoj/interface/compiled/_next/static/chunks/94ca1967.1d9b42d929a1ee8c.js +1 -0
  130. khoj/interface/compiled/_next/static/chunks/9597.83583248dfbf6e73.js +1 -0
  131. khoj/interface/compiled/_next/static/chunks/964ecbae.51d6faf8801d15e6.js +1 -0
  132. khoj/interface/compiled/_next/static/chunks/9665-391df1e5c51c960a.js +1 -0
  133. khoj/interface/compiled/_next/static/chunks/app/_not-found/page-a834eddae3e235df.js +1 -0
  134. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
  135. khoj/interface/compiled/_next/static/chunks/app/agents/page-28ce086a1129bca2.js +1 -0
  136. khoj/interface/compiled/_next/static/chunks/app/automations/layout-1fe1537449f43496.js +1 -0
  137. khoj/interface/compiled/_next/static/chunks/app/automations/page-bf365a60829d347f.js +1 -0
  138. khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
  139. khoj/interface/compiled/_next/static/chunks/app/chat/page-0e476e57eb2015e3.js +1 -0
  140. khoj/interface/compiled/_next/static/chunks/app/layout-30e7fda7262713ce.js +1 -0
  141. khoj/interface/compiled/_next/static/chunks/app/page-a5515ea71aec5ef0.js +1 -0
  142. khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
  143. khoj/interface/compiled/_next/static/chunks/app/search/page-9140541e67ea307d.js +1 -0
  144. khoj/interface/compiled/_next/static/chunks/app/settings/layout-d09d6510a45cd4bd.js +1 -0
  145. khoj/interface/compiled/_next/static/chunks/app/settings/page-951ba40b5b94b23a.js +1 -0
  146. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
  147. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1beb80d8d741c932.js +1 -0
  148. khoj/interface/compiled/_next/static/chunks/d3ac728e-44ebd2a0c99b12a0.js +1 -0
  149. khoj/interface/compiled/_next/static/chunks/fd9d1056-4482b99a36fd1673.js +1 -0
  150. khoj/interface/compiled/_next/static/chunks/framework-8e0e0f4a6b83a956.js +1 -0
  151. khoj/interface/compiled/_next/static/chunks/main-app-de1f09df97a3cfc7.js +1 -0
  152. khoj/interface/compiled/_next/static/chunks/main-db4bfac6b0a8d00b.js +1 -0
  153. khoj/interface/compiled/_next/static/chunks/pages/_app-3c9ca398d360b709.js +1 -0
  154. khoj/interface/compiled/_next/static/chunks/pages/_error-cf5ca766ac8f493f.js +1 -0
  155. khoj/interface/compiled/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
  156. khoj/interface/compiled/_next/static/chunks/webpack-a03962458328b163.js +1 -0
  157. khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +1 -0
  158. khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
  159. khoj/interface/compiled/_next/static/css/4e4e6a4a1c920d06.css +1 -0
  160. khoj/interface/compiled/_next/static/css/8d02837c730f8d13.css +25 -0
  161. khoj/interface/compiled/_next/static/css/8e6a3ca11a60b189.css +1 -0
  162. khoj/interface/compiled/_next/static/css/9c164d9727dd8092.css +1 -0
  163. khoj/interface/compiled/_next/static/css/dac88c17aaee5fcf.css +1 -0
  164. khoj/interface/compiled/_next/static/css/df4b47a2d0d85eae.css +1 -0
  165. khoj/interface/compiled/_next/static/css/e4eb883b5265d372.css +1 -0
  166. khoj/interface/compiled/_next/static/media/1d8a05b60287ae6c-s.p.woff2 +0 -0
  167. khoj/interface/compiled/_next/static/media/6f22fce21a7c433c-s.woff2 +0 -0
  168. khoj/interface/compiled/_next/static/media/77c207b095007c34-s.p.woff2 +0 -0
  169. khoj/interface/compiled/_next/static/media/82ef96de0e8f4d8c-s.p.woff2 +0 -0
  170. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.1608a09b.woff +0 -0
  171. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.4aafdb68.ttf +0 -0
  172. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.a79f1c31.woff2 +0 -0
  173. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.b6770918.woff +0 -0
  174. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.cce5b8ec.ttf +0 -0
  175. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.ec17d132.woff2 +0 -0
  176. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.07ef19e7.ttf +0 -0
  177. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.55fac258.woff2 +0 -0
  178. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.dad44a7f.woff +0 -0
  179. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.9f256b85.woff +0 -0
  180. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.b18f59e1.ttf +0 -0
  181. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.d42a5579.woff2 +0 -0
  182. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.7c187121.woff +0 -0
  183. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.d3c882a6.woff2 +0 -0
  184. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.ed38e79f.ttf +0 -0
  185. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.b74a1a8b.ttf +0 -0
  186. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.c3fb5ac2.woff2 +0 -0
  187. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.d181c465.woff +0 -0
  188. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.6f2bb1df.woff2 +0 -0
  189. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.70d8b0a5.ttf +0 -0
  190. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.e3f82f9d.woff +0 -0
  191. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.47373d1e.ttf +0 -0
  192. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.8916142b.woff2 +0 -0
  193. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.9024d815.woff +0 -0
  194. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.0462f03b.woff2 +0 -0
  195. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.7f51fe03.woff +0 -0
  196. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.b7f8fe9b.ttf +0 -0
  197. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.572d331f.woff2 +0 -0
  198. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.a879cf83.ttf +0 -0
  199. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.f1035d8d.woff +0 -0
  200. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.5295ba48.woff +0 -0
  201. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.939bc644.ttf +0 -0
  202. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.f28c23ac.woff2 +0 -0
  203. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.8c5b5494.woff2 +0 -0
  204. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.94e1e8dc.ttf +0 -0
  205. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.bf59d231.woff +0 -0
  206. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.3b1e59b3.woff2 +0 -0
  207. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.7c9bc82b.woff +0 -0
  208. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.b4c20c84.ttf +0 -0
  209. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.74048478.woff +0 -0
  210. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.ba21ed5f.woff2 +0 -0
  211. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.d4d7ba48.ttf +0 -0
  212. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.03e9641d.woff2 +0 -0
  213. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.07505710.woff +0 -0
  214. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.fe9cbbe1.ttf +0 -0
  215. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.e1e279cb.woff +0 -0
  216. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.eae34984.woff2 +0 -0
  217. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.fabc004a.ttf +0 -0
  218. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.57727022.woff +0 -0
  219. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.5916a24f.woff2 +0 -0
  220. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.d6b476ec.ttf +0 -0
  221. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.9acaf01c.woff +0 -0
  222. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.a144ef58.ttf +0 -0
  223. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.b4230e7e.woff2 +0 -0
  224. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.10d95fd3.woff2 +0 -0
  225. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.7a996c9d.woff +0 -0
  226. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.fbccdabe.ttf +0 -0
  227. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.6258592b.woff +0 -0
  228. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.a8709e36.woff2 +0 -0
  229. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.d97aaf4a.ttf +0 -0
  230. khoj/interface/compiled/_next/static/media/a6ecd16fa044d500-s.p.woff2 +0 -0
  231. khoj/interface/compiled/_next/static/media/bd82c78e5b7b3fe9-s.p.woff2 +0 -0
  232. khoj/interface/compiled/_next/static/media/c32c8052c071fc42-s.woff2 +0 -0
  233. khoj/interface/compiled/_next/static/media/c4250770ab8708b6-s.p.woff2 +0 -0
  234. khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
  235. khoj/interface/compiled/_next/static/media/flags.3afdda2f.webp +0 -0
  236. khoj/interface/compiled/_next/static/media/flags@2x.5fbe9fc1.webp +0 -0
  237. khoj/interface/compiled/_next/static/media/globe.98e105ca.webp +0 -0
  238. khoj/interface/compiled/_next/static/media/globe@2x.974df6f8.webp +0 -0
  239. khoj/interface/compiled/agents/index.html +1 -0
  240. khoj/interface/compiled/agents/index.txt +7 -0
  241. khoj/interface/compiled/agents.svg +6 -0
  242. khoj/interface/compiled/assets/icons/khoj_lantern.ico +0 -0
  243. khoj/interface/compiled/assets/icons/khoj_lantern.svg +100 -0
  244. khoj/interface/compiled/assets/icons/khoj_lantern_1200x1200.png +0 -0
  245. khoj/interface/compiled/assets/icons/khoj_lantern_128x128.png +0 -0
  246. khoj/interface/compiled/assets/icons/khoj_lantern_128x128_dark.png +0 -0
  247. khoj/interface/compiled/assets/icons/khoj_lantern_256x256.png +0 -0
  248. khoj/interface/compiled/assets/icons/khoj_lantern_512x512.png +0 -0
  249. khoj/interface/compiled/assets/icons/khoj_lantern_logomarktype_1200x630.png +0 -0
  250. khoj/interface/compiled/assets/samples/desktop-browse-draw-sample.png +0 -0
  251. khoj/interface/compiled/assets/samples/desktop-plain-chat-sample.png +0 -0
  252. khoj/interface/compiled/assets/samples/desktop-remember-plan-sample.png +0 -0
  253. khoj/interface/compiled/assets/samples/phone-browse-draw-sample.png +0 -0
  254. khoj/interface/compiled/assets/samples/phone-plain-chat-sample.png +0 -0
  255. khoj/interface/compiled/assets/samples/phone-remember-plan-sample.png +0 -0
  256. khoj/interface/compiled/automation.svg +37 -0
  257. khoj/interface/compiled/automations/index.html +1 -0
  258. khoj/interface/compiled/automations/index.txt +8 -0
  259. khoj/interface/compiled/chat/index.html +1 -0
  260. khoj/interface/compiled/chat/index.txt +7 -0
  261. khoj/interface/compiled/chat.svg +24 -0
  262. khoj/interface/compiled/close.svg +5 -0
  263. khoj/interface/compiled/copy-button-success.svg +6 -0
  264. khoj/interface/compiled/copy-button.svg +5 -0
  265. khoj/interface/compiled/index.html +1 -0
  266. khoj/interface/compiled/index.txt +7 -0
  267. khoj/interface/compiled/khoj.webmanifest +76 -0
  268. khoj/interface/compiled/logo.svg +24 -0
  269. khoj/interface/compiled/search/index.html +1 -0
  270. khoj/interface/compiled/search/index.txt +7 -0
  271. khoj/interface/compiled/send.svg +1 -0
  272. khoj/interface/compiled/settings/index.html +1 -0
  273. khoj/interface/compiled/settings/index.txt +9 -0
  274. khoj/interface/compiled/share/chat/index.html +1 -0
  275. khoj/interface/compiled/share/chat/index.txt +7 -0
  276. khoj/interface/compiled/share.svg +8 -0
  277. khoj/interface/compiled/thumbs-down.svg +6 -0
  278. khoj/interface/compiled/thumbs-up.svg +6 -0
  279. khoj/interface/email/feedback.html +34 -0
  280. khoj/interface/email/magic_link.html +40 -0
  281. khoj/interface/email/task.html +37 -0
  282. khoj/interface/email/welcome.html +90 -0
  283. khoj/interface/web/.well-known/assetlinks.json +11 -0
  284. khoj/interface/web/assets/icons/agents.svg +19 -0
  285. khoj/interface/web/assets/icons/automation.svg +43 -0
  286. khoj/interface/web/assets/icons/chat.svg +24 -0
  287. khoj/interface/web/assets/icons/github.svg +1 -0
  288. khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
  289. khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
  290. khoj/interface/web/assets/icons/khoj-logo-sideways.svg +32 -0
  291. khoj/interface/web/assets/icons/khoj.svg +26 -0
  292. khoj/interface/web/assets/icons/logotype.svg +1 -0
  293. khoj/interface/web/assets/icons/search.svg +57 -0
  294. khoj/interface/web/assets/icons/sync.svg +4 -0
  295. khoj/interface/web/assets/khoj.css +237 -0
  296. khoj/interface/web/assets/utils.js +33 -0
  297. khoj/interface/web/base_config.html +445 -0
  298. khoj/interface/web/content_source_github_input.html +208 -0
  299. khoj/interface/web/login.html +310 -0
  300. khoj/interface/web/utils.html +48 -0
  301. khoj/main.py +249 -0
  302. khoj/manage.py +22 -0
  303. khoj/migrations/__init__.py +0 -0
  304. khoj/migrations/migrate_offline_chat_default_model.py +69 -0
  305. khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
  306. khoj/migrations/migrate_offline_chat_schema.py +83 -0
  307. khoj/migrations/migrate_offline_model.py +29 -0
  308. khoj/migrations/migrate_processor_config_openai.py +67 -0
  309. khoj/migrations/migrate_server_pg.py +132 -0
  310. khoj/migrations/migrate_version.py +17 -0
  311. khoj/processor/__init__.py +0 -0
  312. khoj/processor/content/__init__.py +0 -0
  313. khoj/processor/content/docx/__init__.py +0 -0
  314. khoj/processor/content/docx/docx_to_entries.py +111 -0
  315. khoj/processor/content/github/__init__.py +0 -0
  316. khoj/processor/content/github/github_to_entries.py +226 -0
  317. khoj/processor/content/images/__init__.py +0 -0
  318. khoj/processor/content/images/image_to_entries.py +117 -0
  319. khoj/processor/content/markdown/__init__.py +0 -0
  320. khoj/processor/content/markdown/markdown_to_entries.py +160 -0
  321. khoj/processor/content/notion/notion_to_entries.py +259 -0
  322. khoj/processor/content/org_mode/__init__.py +0 -0
  323. khoj/processor/content/org_mode/org_to_entries.py +226 -0
  324. khoj/processor/content/org_mode/orgnode.py +532 -0
  325. khoj/processor/content/pdf/__init__.py +0 -0
  326. khoj/processor/content/pdf/pdf_to_entries.py +119 -0
  327. khoj/processor/content/plaintext/__init__.py +0 -0
  328. khoj/processor/content/plaintext/plaintext_to_entries.py +117 -0
  329. khoj/processor/content/text_to_entries.py +296 -0
  330. khoj/processor/conversation/__init__.py +0 -0
  331. khoj/processor/conversation/anthropic/__init__.py +0 -0
  332. khoj/processor/conversation/anthropic/anthropic_chat.py +243 -0
  333. khoj/processor/conversation/anthropic/utils.py +217 -0
  334. khoj/processor/conversation/google/__init__.py +0 -0
  335. khoj/processor/conversation/google/gemini_chat.py +253 -0
  336. khoj/processor/conversation/google/utils.py +260 -0
  337. khoj/processor/conversation/offline/__init__.py +0 -0
  338. khoj/processor/conversation/offline/chat_model.py +308 -0
  339. khoj/processor/conversation/offline/utils.py +80 -0
  340. khoj/processor/conversation/offline/whisper.py +15 -0
  341. khoj/processor/conversation/openai/__init__.py +0 -0
  342. khoj/processor/conversation/openai/gpt.py +243 -0
  343. khoj/processor/conversation/openai/utils.py +232 -0
  344. khoj/processor/conversation/openai/whisper.py +13 -0
  345. khoj/processor/conversation/prompts.py +1188 -0
  346. khoj/processor/conversation/utils.py +867 -0
  347. khoj/processor/embeddings.py +122 -0
  348. khoj/processor/image/generate.py +215 -0
  349. khoj/processor/speech/__init__.py +0 -0
  350. khoj/processor/speech/text_to_speech.py +51 -0
  351. khoj/processor/tools/__init__.py +0 -0
  352. khoj/processor/tools/online_search.py +472 -0
  353. khoj/processor/tools/run_code.py +179 -0
  354. khoj/routers/__init__.py +0 -0
  355. khoj/routers/api.py +760 -0
  356. khoj/routers/api_agents.py +295 -0
  357. khoj/routers/api_chat.py +1273 -0
  358. khoj/routers/api_content.py +634 -0
  359. khoj/routers/api_model.py +123 -0
  360. khoj/routers/api_phone.py +86 -0
  361. khoj/routers/api_subscription.py +144 -0
  362. khoj/routers/auth.py +307 -0
  363. khoj/routers/email.py +135 -0
  364. khoj/routers/helpers.py +2333 -0
  365. khoj/routers/notion.py +85 -0
  366. khoj/routers/research.py +364 -0
  367. khoj/routers/storage.py +63 -0
  368. khoj/routers/twilio.py +36 -0
  369. khoj/routers/web_client.py +141 -0
  370. khoj/search_filter/__init__.py +0 -0
  371. khoj/search_filter/base_filter.py +15 -0
  372. khoj/search_filter/date_filter.py +215 -0
  373. khoj/search_filter/file_filter.py +32 -0
  374. khoj/search_filter/word_filter.py +29 -0
  375. khoj/search_type/__init__.py +0 -0
  376. khoj/search_type/text_search.py +255 -0
  377. khoj/utils/__init__.py +0 -0
  378. khoj/utils/cli.py +101 -0
  379. khoj/utils/config.py +81 -0
  380. khoj/utils/constants.py +51 -0
  381. khoj/utils/fs_syncer.py +252 -0
  382. khoj/utils/helpers.py +627 -0
  383. khoj/utils/initialization.py +301 -0
  384. khoj/utils/jsonl.py +43 -0
  385. khoj/utils/models.py +47 -0
  386. khoj/utils/rawconfig.py +208 -0
  387. khoj/utils/state.py +48 -0
  388. khoj/utils/yaml.py +47 -0
  389. khoj-1.33.3.dev32.dist-info/METADATA +190 -0
  390. khoj-1.33.3.dev32.dist-info/RECORD +393 -0
  391. khoj-1.33.3.dev32.dist-info/WHEEL +4 -0
  392. khoj-1.33.3.dev32.dist-info/entry_points.txt +2 -0
  393. khoj-1.33.3.dev32.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,2333 @@
1
+ import asyncio
2
+ import base64
3
+ import hashlib
4
+ import json
5
+ import logging
6
+ import math
7
+ import os
8
+ import re
9
+ from concurrent.futures import ThreadPoolExecutor
10
+ from datetime import datetime, timedelta, timezone
11
+ from enum import Enum
12
+ from functools import partial
13
+ from random import random
14
+ from typing import (
15
+ Annotated,
16
+ Any,
17
+ AsyncGenerator,
18
+ Callable,
19
+ Dict,
20
+ Iterator,
21
+ List,
22
+ Optional,
23
+ Set,
24
+ Tuple,
25
+ Union,
26
+ )
27
+ from urllib.parse import parse_qs, quote, unquote, urljoin, urlparse
28
+
29
+ import cron_descriptor
30
+ import pyjson5
31
+ import pytz
32
+ import requests
33
+ from apscheduler.job import Job
34
+ from apscheduler.triggers.cron import CronTrigger
35
+ from asgiref.sync import sync_to_async
36
+ from fastapi import Depends, Header, HTTPException, Request, UploadFile
37
+ from pydantic import BaseModel
38
+ from starlette.authentication import has_required_scope
39
+ from starlette.requests import URL
40
+
41
+ from khoj.database import adapters
42
+ from khoj.database.adapters import (
43
+ LENGTH_OF_FREE_TRIAL,
44
+ AgentAdapters,
45
+ AutomationAdapters,
46
+ ConversationAdapters,
47
+ EntryAdapters,
48
+ FileObjectAdapters,
49
+ ais_user_subscribed,
50
+ create_khoj_token,
51
+ get_khoj_tokens,
52
+ get_user_by_email,
53
+ get_user_name,
54
+ get_user_notion_config,
55
+ get_user_subscription_state,
56
+ run_with_process_lock,
57
+ )
58
+ from khoj.database.models import (
59
+ Agent,
60
+ ChatModel,
61
+ ClientApplication,
62
+ Conversation,
63
+ GithubConfig,
64
+ KhojUser,
65
+ NotionConfig,
66
+ ProcessLock,
67
+ Subscription,
68
+ TextToImageModelConfig,
69
+ UserRequests,
70
+ )
71
+ from khoj.processor.content.docx.docx_to_entries import DocxToEntries
72
+ from khoj.processor.content.github.github_to_entries import GithubToEntries
73
+ from khoj.processor.content.images.image_to_entries import ImageToEntries
74
+ from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries
75
+ from khoj.processor.content.notion.notion_to_entries import NotionToEntries
76
+ from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
77
+ from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
78
+ from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries
79
+ from khoj.processor.conversation import prompts
80
+ from khoj.processor.conversation.anthropic.anthropic_chat import (
81
+ anthropic_send_message_to_model,
82
+ converse_anthropic,
83
+ )
84
+ from khoj.processor.conversation.google.gemini_chat import (
85
+ converse_gemini,
86
+ gemini_send_message_to_model,
87
+ )
88
+ from khoj.processor.conversation.offline.chat_model import (
89
+ converse_offline,
90
+ send_message_to_model_offline,
91
+ )
92
+ from khoj.processor.conversation.openai.gpt import (
93
+ converse_openai,
94
+ send_message_to_model,
95
+ )
96
+ from khoj.processor.conversation.utils import (
97
+ ChatEvent,
98
+ ThreadedGenerator,
99
+ clean_json,
100
+ construct_chat_history,
101
+ generate_chatml_messages_with_context,
102
+ save_to_conversation_log,
103
+ )
104
+ from khoj.processor.speech.text_to_speech import is_eleven_labs_enabled
105
+ from khoj.routers.email import is_resend_enabled, send_task_email
106
+ from khoj.routers.twilio import is_twilio_enabled
107
+ from khoj.search_type import text_search
108
+ from khoj.utils import state
109
+ from khoj.utils.config import OfflineChatProcessorModel
110
+ from khoj.utils.helpers import (
111
+ LRU,
112
+ ConversationCommand,
113
+ get_file_type,
114
+ is_none_or_empty,
115
+ is_valid_url,
116
+ log_telemetry,
117
+ mode_descriptions_for_llm,
118
+ timer,
119
+ tool_descriptions_for_llm,
120
+ )
121
+ from khoj.utils.rawconfig import ChatRequestBody, FileAttachment, FileData, LocationData
122
+
123
+ logger = logging.getLogger(__name__)
124
+
125
+ executor = ThreadPoolExecutor(max_workers=1)
126
+
127
+
128
+ NOTION_OAUTH_CLIENT_ID = os.getenv("NOTION_OAUTH_CLIENT_ID")
129
+ NOTION_OAUTH_CLIENT_SECRET = os.getenv("NOTION_OAUTH_CLIENT_SECRET")
130
+ NOTION_REDIRECT_URI = os.getenv("NOTION_REDIRECT_URI")
131
+
132
+
133
+ def is_query_empty(query: str) -> bool:
134
+ return is_none_or_empty(query.strip())
135
+
136
+
137
+ def validate_chat_model(user: KhojUser):
138
+ default_chat_model = ConversationAdapters.get_default_chat_model(user)
139
+
140
+ if default_chat_model is None:
141
+ raise HTTPException(status_code=500, detail="Contact the server administrator to add a chat model.")
142
+
143
+ if default_chat_model.model_type == "openai" and not default_chat_model.ai_model_api:
144
+ raise HTTPException(status_code=500, detail="Contact the server administrator to add a chat model.")
145
+
146
+
147
+ async def is_ready_to_chat(user: KhojUser):
148
+ user_chat_model = await ConversationAdapters.aget_user_chat_model(user)
149
+ if user_chat_model == None:
150
+ user_chat_model = await ConversationAdapters.aget_default_chat_model(user)
151
+
152
+ if user_chat_model and user_chat_model.model_type == ChatModel.ModelType.OFFLINE:
153
+ chat_model_name = user_chat_model.name
154
+ max_tokens = user_chat_model.max_prompt_size
155
+ if state.offline_chat_processor_config is None:
156
+ logger.info("Loading Offline Chat Model...")
157
+ state.offline_chat_processor_config = OfflineChatProcessorModel(chat_model_name, max_tokens)
158
+ return True
159
+
160
+ if (
161
+ user_chat_model
162
+ and (
163
+ user_chat_model.model_type
164
+ in [
165
+ ChatModel.ModelType.OPENAI,
166
+ ChatModel.ModelType.ANTHROPIC,
167
+ ChatModel.ModelType.GOOGLE,
168
+ ]
169
+ )
170
+ and user_chat_model.ai_model_api
171
+ ):
172
+ return True
173
+
174
+ raise HTTPException(status_code=500, detail="Set your OpenAI API key or enable Local LLM via Khoj settings.")
175
+
176
+
177
+ def get_file_content(file: UploadFile):
178
+ file_content = file.file.read()
179
+ file_type, encoding = get_file_type(file.content_type, file_content)
180
+ return FileData(name=file.filename, content=file_content, file_type=file_type, encoding=encoding)
181
+
182
+
183
+ def update_telemetry_state(
184
+ request: Request,
185
+ telemetry_type: str,
186
+ api: str,
187
+ client: Optional[str] = None,
188
+ user_agent: Optional[str] = None,
189
+ referer: Optional[str] = None,
190
+ host: Optional[str] = None,
191
+ metadata: Optional[dict] = None,
192
+ ):
193
+ user: KhojUser = request.user.object if request.user.is_authenticated else None
194
+ client_app: ClientApplication = request.user.client_app if request.user.is_authenticated else None
195
+ subscription: Subscription = user.subscription if user and hasattr(user, "subscription") else None
196
+ user_state = {
197
+ "client_host": request.client.host if request.client else None,
198
+ "user_agent": user_agent or "unknown",
199
+ "referer": referer or "unknown",
200
+ "host": host or "unknown",
201
+ "server_id": str(user.uuid) if user else None,
202
+ "subscription_type": subscription.type if subscription else None,
203
+ "is_recurring": subscription.is_recurring if subscription else None,
204
+ "client_id": str(client_app.name) if client_app else "default",
205
+ }
206
+
207
+ if metadata:
208
+ user_state.update(metadata)
209
+
210
+ state.telemetry += [
211
+ log_telemetry(
212
+ telemetry_type=telemetry_type,
213
+ api=api,
214
+ client=client,
215
+ app_config=state.config.app,
216
+ disable_telemetry_env=state.telemetry_disabled,
217
+ properties=user_state,
218
+ )
219
+ ]
220
+
221
+
222
+ def get_next_url(request: Request) -> str:
223
+ "Construct next url relative to current domain from request"
224
+ next_url_param = urlparse(request.query_params.get("next", "/"))
225
+ next_path = "/" # default next path
226
+ # If relative path or absolute path to current domain
227
+ if is_none_or_empty(next_url_param.scheme) or next_url_param.netloc == request.base_url.netloc:
228
+ # Use path in next query param
229
+ next_path = next_url_param.path
230
+ # Construct absolute url using current domain and next path from request
231
+ return urljoin(str(request.base_url).rstrip("/"), next_path)
232
+
233
+
234
+ def get_conversation_command(query: str) -> ConversationCommand:
235
+ if query.startswith("/notes"):
236
+ return ConversationCommand.Notes
237
+ elif query.startswith("/help"):
238
+ return ConversationCommand.Help
239
+ elif query.startswith("/general"):
240
+ return ConversationCommand.General
241
+ elif query.startswith("/online"):
242
+ return ConversationCommand.Online
243
+ elif query.startswith("/webpage"):
244
+ return ConversationCommand.Webpage
245
+ elif query.startswith("/image"):
246
+ return ConversationCommand.Image
247
+ elif query.startswith("/automated_task"):
248
+ return ConversationCommand.AutomatedTask
249
+ elif query.startswith("/summarize"):
250
+ return ConversationCommand.Summarize
251
+ elif query.startswith("/diagram"):
252
+ return ConversationCommand.Diagram
253
+ elif query.startswith("/code"):
254
+ return ConversationCommand.Code
255
+ elif query.startswith("/research"):
256
+ return ConversationCommand.Research
257
+ else:
258
+ return ConversationCommand.Default
259
+
260
+
261
+ async def agenerate_chat_response(*args):
262
+ loop = asyncio.get_event_loop()
263
+ return await loop.run_in_executor(executor, generate_chat_response, *args)
264
+
265
+
266
+ def gather_raw_query_files(
267
+ query_files: Dict[str, str],
268
+ ):
269
+ """
270
+ Gather contextual data from the given (raw) files
271
+ """
272
+
273
+ if len(query_files) == 0:
274
+ return ""
275
+
276
+ contextual_data = " ".join(
277
+ [f"File: {file_name}\n\n{file_content}\n\n" for file_name, file_content in query_files.items()]
278
+ )
279
+ return f"I have attached the following files:\n\n{contextual_data}"
280
+
281
+
282
+ async def acreate_title_from_history(
283
+ user: KhojUser,
284
+ conversation: Conversation,
285
+ ):
286
+ """
287
+ Create a title from the given conversation history
288
+ """
289
+ chat_history = construct_chat_history(conversation.conversation_log)
290
+
291
+ title_generation_prompt = prompts.conversation_title_generation.format(chat_history=chat_history)
292
+
293
+ with timer("Chat actor: Generate title from conversation history", logger):
294
+ response = await send_message_to_model_wrapper(title_generation_prompt, user=user)
295
+
296
+ return response.strip()
297
+
298
+
299
+ async def acreate_title_from_query(query: str, user: KhojUser = None) -> str:
300
+ """
301
+ Create a title from the given query
302
+ """
303
+ title_generation_prompt = prompts.subject_generation.format(query=query)
304
+
305
+ with timer("Chat actor: Generate title from query", logger):
306
+ response = await send_message_to_model_wrapper(title_generation_prompt, user=user)
307
+
308
+ return response.strip()
309
+
310
+
311
+ async def acheck_if_safe_prompt(system_prompt: str, user: KhojUser = None, lax: bool = False) -> Tuple[bool, str]:
312
+ """
313
+ Check if the system prompt is safe to use
314
+ """
315
+ safe_prompt_check = (
316
+ prompts.personality_prompt_safety_expert.format(prompt=system_prompt)
317
+ if not lax
318
+ else prompts.personality_prompt_safety_expert_lax.format(prompt=system_prompt)
319
+ )
320
+ is_safe = True
321
+ reason = ""
322
+
323
+ with timer("Chat actor: Check if safe prompt", logger):
324
+ response = await send_message_to_model_wrapper(safe_prompt_check, user=user)
325
+
326
+ response = response.strip()
327
+ try:
328
+ response = json.loads(clean_json(response))
329
+ is_safe = response.get("safe", "True") == "True"
330
+ if not is_safe:
331
+ reason = response.get("reason", "")
332
+ except Exception:
333
+ logger.error(f"Invalid response for checking safe prompt: {response}")
334
+
335
+ if not is_safe:
336
+ logger.error(f"Unsafe prompt: {system_prompt}. Reason: {reason}")
337
+
338
+ return is_safe, reason
339
+
340
+
341
+ async def aget_data_sources_and_output_format(
342
+ query: str,
343
+ conversation_history: dict,
344
+ is_task: bool,
345
+ user: KhojUser,
346
+ query_images: List[str] = None,
347
+ agent: Agent = None,
348
+ query_files: str = None,
349
+ tracer: dict = {},
350
+ ) -> Dict[str, Any]:
351
+ """
352
+ Given a query, determine which of the available data sources and output modes the agent should use to answer appropriately.
353
+ """
354
+
355
+ source_options = dict()
356
+ source_options_str = ""
357
+
358
+ agent_sources = agent.input_tools if agent else []
359
+
360
+ for source, description in tool_descriptions_for_llm.items():
361
+ source_options[source.value] = description
362
+ if len(agent_sources) == 0 or source.value in agent_sources:
363
+ source_options_str += f'- "{source.value}": "{description}"\n'
364
+
365
+ output_options = dict()
366
+ output_options_str = ""
367
+
368
+ agent_outputs = agent.output_modes if agent else []
369
+
370
+ for output, description in mode_descriptions_for_llm.items():
371
+ # Do not allow tasks to schedule another task
372
+ if is_task and output == ConversationCommand.Automation:
373
+ continue
374
+ output_options[output.value] = description
375
+ if len(agent_outputs) == 0 or output.value in agent_outputs:
376
+ output_options_str += f'- "{output.value}": "{description}"\n'
377
+
378
+ chat_history = construct_chat_history(conversation_history)
379
+
380
+ if query_images:
381
+ query = f"[placeholder for {len(query_images)} user attached images]\n{query}"
382
+
383
+ personality_context = (
384
+ prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
385
+ )
386
+
387
+ relevant_tools_prompt = prompts.pick_relevant_tools.format(
388
+ query=query,
389
+ sources=source_options_str,
390
+ outputs=output_options_str,
391
+ chat_history=chat_history,
392
+ personality_context=personality_context,
393
+ )
394
+
395
+ with timer("Chat actor: Infer information sources to refer", logger):
396
+ response = await send_message_to_model_wrapper(
397
+ relevant_tools_prompt,
398
+ response_type="json_object",
399
+ user=user,
400
+ query_files=query_files,
401
+ tracer=tracer,
402
+ )
403
+
404
+ try:
405
+ response = clean_json(response)
406
+ response = json.loads(response)
407
+
408
+ chosen_sources = [s.strip() for s in response.get("source", []) if s.strip()]
409
+ chosen_output = response.get("output", "text").strip() # Default to text output
410
+
411
+ if is_none_or_empty(chosen_sources) or not isinstance(chosen_sources, list):
412
+ raise ValueError(
413
+ f"Invalid response for determining relevant tools: {chosen_sources}. Raw Response: {response}"
414
+ )
415
+
416
+ output_mode = ConversationCommand.Text
417
+ # Verify selected output mode is enabled for the agent, as the LLM can sometimes get confused by the tool options.
418
+ if chosen_output in output_options.keys() and (len(agent_outputs) == 0 or chosen_output in agent_outputs):
419
+ # Ensure that the chosen output mode exists as a valid ConversationCommand
420
+ output_mode = ConversationCommand(chosen_output)
421
+
422
+ data_sources = []
423
+ # Verify selected data sources are enabled for the agent, as the LLM can sometimes get confused by the tool options.
424
+ for chosen_source in chosen_sources:
425
+ # Ensure that the chosen data source exists as a valid ConversationCommand
426
+ if chosen_source in source_options.keys() and (len(agent_sources) == 0 or chosen_source in agent_sources):
427
+ data_sources.append(ConversationCommand(chosen_source))
428
+
429
+ # Fallback to default sources if the inferred data sources are unset or invalid
430
+ if is_none_or_empty(data_sources):
431
+ if len(agent_sources) == 0:
432
+ data_sources = [ConversationCommand.Default]
433
+ else:
434
+ data_sources = [ConversationCommand.General]
435
+ except Exception as e:
436
+ logger.error(f"Invalid response for determining relevant tools: {response}. Error: {e}", exc_info=True)
437
+ data_sources = agent_sources if len(agent_sources) > 0 else [ConversationCommand.Default]
438
+ output_mode = agent_outputs[0] if len(agent_outputs) > 0 else ConversationCommand.Text
439
+
440
+ return {"sources": data_sources, "output": output_mode}
441
+
442
+
443
+ async def infer_webpage_urls(
444
+ q: str,
445
+ conversation_history: dict,
446
+ location_data: LocationData,
447
+ user: KhojUser,
448
+ query_images: List[str] = None,
449
+ agent: Agent = None,
450
+ query_files: str = None,
451
+ tracer: dict = {},
452
+ ) -> List[str]:
453
+ """
454
+ Infer webpage links from the given query
455
+ """
456
+ location = f"{location_data}" if location_data else "Unknown"
457
+ username = prompts.user_name.format(name=user.get_full_name()) if user.get_full_name() else ""
458
+ chat_history = construct_chat_history(conversation_history)
459
+
460
+ utc_date = datetime.utcnow().strftime("%Y-%m-%d")
461
+ personality_context = (
462
+ prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
463
+ )
464
+
465
+ online_queries_prompt = prompts.infer_webpages_to_read.format(
466
+ current_date=utc_date,
467
+ query=q,
468
+ chat_history=chat_history,
469
+ location=location,
470
+ username=username,
471
+ personality_context=personality_context,
472
+ )
473
+
474
+ with timer("Chat actor: Infer webpage urls to read", logger):
475
+ response = await send_message_to_model_wrapper(
476
+ online_queries_prompt,
477
+ query_images=query_images,
478
+ response_type="json_object",
479
+ user=user,
480
+ query_files=query_files,
481
+ tracer=tracer,
482
+ )
483
+
484
+ # Validate that the response is a non-empty, JSON-serializable list of URLs
485
+ try:
486
+ response = clean_json(response)
487
+ urls = json.loads(response)
488
+ valid_unique_urls = {str(url).strip() for url in urls["links"] if is_valid_url(url)}
489
+ if is_none_or_empty(valid_unique_urls):
490
+ raise ValueError(f"Invalid list of urls: {response}")
491
+ if len(valid_unique_urls) == 0:
492
+ logger.error(f"No valid URLs found in response: {response}")
493
+ return []
494
+ return list(valid_unique_urls)
495
+ except Exception:
496
+ raise ValueError(f"Invalid list of urls: {response}")
497
+
498
+
499
+ async def generate_online_subqueries(
500
+ q: str,
501
+ conversation_history: dict,
502
+ location_data: LocationData,
503
+ user: KhojUser,
504
+ query_images: List[str] = None,
505
+ agent: Agent = None,
506
+ query_files: str = None,
507
+ tracer: dict = {},
508
+ ) -> Set[str]:
509
+ """
510
+ Generate subqueries from the given query
511
+ """
512
+ location = f"{location_data}" if location_data else "Unknown"
513
+ username = prompts.user_name.format(name=user.get_full_name()) if user.get_full_name() else ""
514
+ chat_history = construct_chat_history(conversation_history)
515
+
516
+ utc_date = datetime.utcnow().strftime("%Y-%m-%d")
517
+ personality_context = (
518
+ prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
519
+ )
520
+
521
+ online_queries_prompt = prompts.online_search_conversation_subqueries.format(
522
+ current_date=utc_date,
523
+ query=q,
524
+ chat_history=chat_history,
525
+ location=location,
526
+ username=username,
527
+ personality_context=personality_context,
528
+ )
529
+
530
+ with timer("Chat actor: Generate online search subqueries", logger):
531
+ response = await send_message_to_model_wrapper(
532
+ online_queries_prompt,
533
+ query_images=query_images,
534
+ response_type="json_object",
535
+ user=user,
536
+ query_files=query_files,
537
+ tracer=tracer,
538
+ )
539
+
540
+ # Validate that the response is a non-empty, JSON-serializable list
541
+ try:
542
+ response = clean_json(response)
543
+ response = pyjson5.loads(response)
544
+ response = {q.strip() for q in response["queries"] if q.strip()}
545
+ if not isinstance(response, set) or not response or len(response) == 0:
546
+ logger.error(f"Invalid response for constructing subqueries: {response}. Returning original query: {q}")
547
+ return {q}
548
+ return response
549
+ except Exception as e:
550
+ logger.error(f"Invalid response for constructing subqueries: {response}. Returning original query: {q}")
551
+ return {q}
552
+
553
+
554
+ def schedule_query(
555
+ q: str, conversation_history: dict, user: KhojUser, query_images: List[str] = None, tracer: dict = {}
556
+ ) -> Tuple[str, str, str]:
557
+ """
558
+ Schedule the date, time to run the query. Assume the server timezone is UTC.
559
+ """
560
+ chat_history = construct_chat_history(conversation_history)
561
+
562
+ crontime_prompt = prompts.crontime_prompt.format(
563
+ query=q,
564
+ chat_history=chat_history,
565
+ )
566
+
567
+ raw_response = send_message_to_model_wrapper_sync(
568
+ crontime_prompt, query_images=query_images, response_type="json_object", user=user, tracer=tracer
569
+ )
570
+
571
+ # Validate that the response is a non-empty, JSON-serializable list
572
+ try:
573
+ raw_response = raw_response.strip()
574
+ response: Dict[str, str] = json.loads(clean_json(raw_response))
575
+ if not response or not isinstance(response, Dict) or len(response) != 3:
576
+ raise AssertionError(f"Invalid response for scheduling query : {response}")
577
+ return response.get("crontime"), response.get("query"), response.get("subject")
578
+ except Exception:
579
+ raise AssertionError(f"Invalid response for scheduling query: {raw_response}")
580
+
581
+
582
+ async def aschedule_query(
583
+ q: str, conversation_history: dict, user: KhojUser, query_images: List[str] = None, tracer: dict = {}
584
+ ) -> Tuple[str, str, str]:
585
+ """
586
+ Schedule the date, time to run the query. Assume the server timezone is UTC.
587
+ """
588
+ chat_history = construct_chat_history(conversation_history)
589
+
590
+ crontime_prompt = prompts.crontime_prompt.format(
591
+ query=q,
592
+ chat_history=chat_history,
593
+ )
594
+
595
+ raw_response = await send_message_to_model_wrapper(
596
+ crontime_prompt, query_images=query_images, response_type="json_object", user=user, tracer=tracer
597
+ )
598
+
599
+ # Validate that the response is a non-empty, JSON-serializable list
600
+ try:
601
+ raw_response = raw_response.strip()
602
+ response: Dict[str, str] = json.loads(clean_json(raw_response))
603
+ if not response or not isinstance(response, Dict) or len(response) != 3:
604
+ raise AssertionError(f"Invalid response for scheduling query : {response}")
605
+ return response.get("crontime"), response.get("query"), response.get("subject")
606
+ except Exception:
607
+ raise AssertionError(f"Invalid response for scheduling query: {raw_response}")
608
+
609
+
610
+ async def extract_relevant_info(
611
+ qs: set[str], corpus: str, user: KhojUser = None, agent: Agent = None, tracer: dict = {}
612
+ ) -> Union[str, None]:
613
+ """
614
+ Extract relevant information for a given query from the target corpus
615
+ """
616
+
617
+ if is_none_or_empty(corpus) or is_none_or_empty(qs):
618
+ return None
619
+
620
+ personality_context = (
621
+ prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
622
+ )
623
+
624
+ extract_relevant_information = prompts.extract_relevant_information.format(
625
+ query=", ".join(qs),
626
+ corpus=corpus.strip(),
627
+ personality_context=personality_context,
628
+ )
629
+
630
+ response = await send_message_to_model_wrapper(
631
+ extract_relevant_information,
632
+ prompts.system_prompt_extract_relevant_information,
633
+ user=user,
634
+ tracer=tracer,
635
+ )
636
+ return response.strip()
637
+
638
+
639
+ async def extract_relevant_summary(
640
+ q: str,
641
+ corpus: str,
642
+ conversation_history: dict,
643
+ query_images: List[str] = None,
644
+ user: KhojUser = None,
645
+ agent: Agent = None,
646
+ tracer: dict = {},
647
+ ) -> Union[str, None]:
648
+ """
649
+ Extract relevant information for a given query from the target corpus
650
+ """
651
+
652
+ if is_none_or_empty(corpus) or is_none_or_empty(q):
653
+ return None
654
+
655
+ personality_context = (
656
+ prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
657
+ )
658
+
659
+ chat_history = construct_chat_history(conversation_history)
660
+
661
+ extract_relevant_information = prompts.extract_relevant_summary.format(
662
+ query=q,
663
+ chat_history=chat_history,
664
+ corpus=corpus.strip(),
665
+ personality_context=personality_context,
666
+ )
667
+
668
+ with timer("Chat actor: Extract relevant information from data", logger):
669
+ response = await send_message_to_model_wrapper(
670
+ extract_relevant_information,
671
+ prompts.system_prompt_extract_relevant_summary,
672
+ user=user,
673
+ query_images=query_images,
674
+ tracer=tracer,
675
+ )
676
+ return response.strip()
677
+
678
+
679
+ async def generate_summary_from_files(
680
+ q: str,
681
+ user: KhojUser,
682
+ file_filters: List[str],
683
+ meta_log: dict,
684
+ query_images: List[str] = None,
685
+ agent: Agent = None,
686
+ send_status_func: Optional[Callable] = None,
687
+ query_files: str = None,
688
+ tracer: dict = {},
689
+ ):
690
+ try:
691
+ file_objects = None
692
+ if await EntryAdapters.aagent_has_entries(agent):
693
+ file_names = await EntryAdapters.aget_agent_entry_filepaths(agent)
694
+ if len(file_names) > 0:
695
+ file_objects = await FileObjectAdapters.aget_file_objects_by_name(None, file_names.pop(), agent)
696
+
697
+ if (file_objects and len(file_objects) == 0 and not query_files) or (not file_objects and not query_files):
698
+ response_log = "Sorry, I couldn't find anything to summarize."
699
+ yield response_log
700
+ return
701
+
702
+ contextual_data = " ".join([f"File: {file.file_name}\n\n{file.raw_text}" for file in file_objects])
703
+
704
+ if query_files:
705
+ contextual_data += f"\n\n{query_files}"
706
+
707
+ if not q:
708
+ q = "Create a general summary of the file"
709
+
710
+ file_names = [file.file_name for file in file_objects]
711
+ file_names.extend(file_filters)
712
+
713
+ all_file_names = ""
714
+
715
+ for file_name in file_names:
716
+ all_file_names += f"- {file_name}\n"
717
+
718
+ async for result in send_status_func(f"**Constructing Summary Using:**\n{all_file_names}"):
719
+ yield {ChatEvent.STATUS: result}
720
+
721
+ response = await extract_relevant_summary(
722
+ q,
723
+ contextual_data,
724
+ conversation_history=meta_log,
725
+ query_images=query_images,
726
+ user=user,
727
+ agent=agent,
728
+ tracer=tracer,
729
+ )
730
+
731
+ yield str(response)
732
+ except Exception as e:
733
+ response_log = "Error summarizing file. Please try again, or contact support."
734
+ logger.error(f"Error summarizing file for {user.email}: {e}", exc_info=True)
735
+ yield result
736
+
737
+
738
+ async def generate_excalidraw_diagram(
739
+ q: str,
740
+ conversation_history: Dict[str, Any],
741
+ location_data: LocationData,
742
+ note_references: List[Dict[str, Any]],
743
+ online_results: Optional[dict] = None,
744
+ query_images: List[str] = None,
745
+ user: KhojUser = None,
746
+ agent: Agent = None,
747
+ send_status_func: Optional[Callable] = None,
748
+ query_files: str = None,
749
+ tracer: dict = {},
750
+ ):
751
+ if send_status_func:
752
+ async for event in send_status_func("**Enhancing the Diagramming Prompt**"):
753
+ yield {ChatEvent.STATUS: event}
754
+
755
+ better_diagram_description_prompt = await generate_better_diagram_description(
756
+ q=q,
757
+ conversation_history=conversation_history,
758
+ location_data=location_data,
759
+ note_references=note_references,
760
+ online_results=online_results,
761
+ query_images=query_images,
762
+ user=user,
763
+ agent=agent,
764
+ query_files=query_files,
765
+ tracer=tracer,
766
+ )
767
+
768
+ if send_status_func:
769
+ async for event in send_status_func(f"**Diagram to Create:**:\n{better_diagram_description_prompt}"):
770
+ yield {ChatEvent.STATUS: event}
771
+ try:
772
+ excalidraw_diagram_description = await generate_excalidraw_diagram_from_description(
773
+ q=better_diagram_description_prompt,
774
+ user=user,
775
+ agent=agent,
776
+ tracer=tracer,
777
+ )
778
+ except Exception as e:
779
+ logger.error(f"Error generating Excalidraw diagram for {user.email}: {e}", exc_info=True)
780
+ yield better_diagram_description_prompt, None
781
+ return
782
+
783
+ scratchpad = excalidraw_diagram_description.get("scratchpad")
784
+
785
+ inferred_queries = f"Instruction: {better_diagram_description_prompt}\n\nScratchpad: {scratchpad}"
786
+
787
+ yield inferred_queries, excalidraw_diagram_description.get("elements")
788
+
789
+
790
+ async def generate_better_diagram_description(
791
+ q: str,
792
+ conversation_history: Dict[str, Any],
793
+ location_data: LocationData,
794
+ note_references: List[Dict[str, Any]],
795
+ online_results: Optional[dict] = None,
796
+ query_images: List[str] = None,
797
+ user: KhojUser = None,
798
+ agent: Agent = None,
799
+ query_files: str = None,
800
+ tracer: dict = {},
801
+ ) -> str:
802
+ """
803
+ Generate a diagram description from the given query and context
804
+ """
805
+
806
+ today_date = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d, %A")
807
+ personality_context = (
808
+ prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
809
+ )
810
+
811
+ location = f"{location_data}" if location_data else "Unknown"
812
+
813
+ user_references = "\n\n".join([f"# {item['compiled']}" for item in note_references])
814
+
815
+ chat_history = construct_chat_history(conversation_history)
816
+
817
+ simplified_online_results = {}
818
+
819
+ if online_results:
820
+ for result in online_results:
821
+ if online_results[result].get("answerBox"):
822
+ simplified_online_results[result] = online_results[result]["answerBox"]
823
+ elif online_results[result].get("webpages"):
824
+ simplified_online_results[result] = online_results[result]["webpages"]
825
+
826
+ improve_diagram_description_prompt = prompts.improve_diagram_description_prompt.format(
827
+ query=q,
828
+ chat_history=chat_history,
829
+ location=location,
830
+ current_date=today_date,
831
+ references=user_references,
832
+ online_results=simplified_online_results,
833
+ personality_context=personality_context,
834
+ )
835
+
836
+ with timer("Chat actor: Generate better diagram description", logger):
837
+ response = await send_message_to_model_wrapper(
838
+ improve_diagram_description_prompt,
839
+ query_images=query_images,
840
+ user=user,
841
+ query_files=query_files,
842
+ tracer=tracer,
843
+ )
844
+ response = response.strip()
845
+ if response.startswith(('"', "'")) and response.endswith(('"', "'")):
846
+ response = response[1:-1]
847
+
848
+ return response
849
+
850
+
851
+ async def generate_excalidraw_diagram_from_description(
852
+ q: str,
853
+ user: KhojUser = None,
854
+ agent: Agent = None,
855
+ tracer: dict = {},
856
+ ) -> Dict[str, Any]:
857
+ personality_context = (
858
+ prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
859
+ )
860
+
861
+ excalidraw_diagram_generation = prompts.excalidraw_diagram_generation_prompt.format(
862
+ personality_context=personality_context,
863
+ query=q,
864
+ )
865
+
866
+ with timer("Chat actor: Generate excalidraw diagram", logger):
867
+ raw_response = await send_message_to_model_wrapper(
868
+ query=excalidraw_diagram_generation, user=user, tracer=tracer
869
+ )
870
+ raw_response = clean_json(raw_response)
871
+ try:
872
+ # Expect response to have `elements` and `scratchpad` keys
873
+ response: Dict[str, str] = json.loads(raw_response)
874
+ if (
875
+ not response
876
+ or not isinstance(response, Dict)
877
+ or not response.get("elements")
878
+ or not response.get("scratchpad")
879
+ ):
880
+ raise AssertionError(f"Invalid response for generating Excalidraw diagram: {response}")
881
+ except Exception:
882
+ raise AssertionError(f"Invalid response for generating Excalidraw diagram: {raw_response}")
883
+ if not response or not isinstance(response["elements"], List) or not isinstance(response["elements"][0], Dict):
884
+ # TODO Some additional validation here that it's a valid Excalidraw diagram
885
+ raise AssertionError(f"Invalid response for improving diagram description: {response}")
886
+
887
+ return response
888
+
889
+
890
+ async def generate_better_image_prompt(
891
+ q: str,
892
+ conversation_history: str,
893
+ location_data: LocationData,
894
+ note_references: List[Dict[str, Any]],
895
+ online_results: Optional[dict] = None,
896
+ model_type: Optional[str] = None,
897
+ query_images: Optional[List[str]] = None,
898
+ user: KhojUser = None,
899
+ agent: Agent = None,
900
+ query_files: str = "",
901
+ tracer: dict = {},
902
+ ) -> str:
903
+ """
904
+ Generate a better image prompt from the given query
905
+ """
906
+
907
+ today_date = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d, %A")
908
+ personality_context = (
909
+ prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
910
+ )
911
+ model_type = model_type or TextToImageModelConfig.ModelType.OPENAI
912
+
913
+ location = f"{location_data}" if location_data else "Unknown"
914
+
915
+ user_references = "\n\n".join([f"# {item['compiled']}" for item in note_references])
916
+
917
+ simplified_online_results = {}
918
+
919
+ if online_results:
920
+ for result in online_results:
921
+ if online_results[result].get("answerBox"):
922
+ simplified_online_results[result] = online_results[result]["answerBox"]
923
+ elif online_results[result].get("webpages"):
924
+ simplified_online_results[result] = online_results[result]["webpages"]
925
+
926
+ if model_type == TextToImageModelConfig.ModelType.OPENAI:
927
+ image_prompt = prompts.image_generation_improve_prompt_dalle.format(
928
+ query=q,
929
+ chat_history=conversation_history,
930
+ location=location,
931
+ current_date=today_date,
932
+ references=user_references,
933
+ online_results=simplified_online_results,
934
+ personality_context=personality_context,
935
+ )
936
+ elif model_type in [TextToImageModelConfig.ModelType.STABILITYAI, TextToImageModelConfig.ModelType.REPLICATE]:
937
+ image_prompt = prompts.image_generation_improve_prompt_sd.format(
938
+ query=q,
939
+ chat_history=conversation_history,
940
+ location=location,
941
+ current_date=today_date,
942
+ references=user_references,
943
+ online_results=simplified_online_results,
944
+ personality_context=personality_context,
945
+ )
946
+
947
+ with timer("Chat actor: Generate contextual image prompt", logger):
948
+ response = await send_message_to_model_wrapper(
949
+ image_prompt, query_images=query_images, user=user, query_files=query_files, tracer=tracer
950
+ )
951
+ response = response.strip()
952
+ if response.startswith(('"', "'")) and response.endswith(('"', "'")):
953
+ response = response[1:-1]
954
+
955
+ return response
956
+
957
+
958
+ async def send_message_to_model_wrapper(
959
+ query: str,
960
+ system_message: str = "",
961
+ response_type: str = "text",
962
+ user: KhojUser = None,
963
+ query_images: List[str] = None,
964
+ context: str = "",
965
+ query_files: str = None,
966
+ tracer: dict = {},
967
+ ):
968
+ chat_model: ChatModel = await ConversationAdapters.aget_default_chat_model(user)
969
+ vision_available = chat_model.vision_enabled
970
+ if not vision_available and query_images:
971
+ logger.warning(f"Vision is not enabled for default model: {chat_model.name}.")
972
+ vision_enabled_config = await ConversationAdapters.aget_vision_enabled_config()
973
+ if vision_enabled_config:
974
+ chat_model = vision_enabled_config
975
+ vision_available = True
976
+ if vision_available and query_images:
977
+ logger.info(f"Using {chat_model.name} model to understand {len(query_images)} images.")
978
+
979
+ subscribed = await ais_user_subscribed(user)
980
+ chat_model_name = chat_model.name
981
+ max_tokens = (
982
+ chat_model.subscribed_max_prompt_size
983
+ if subscribed and chat_model.subscribed_max_prompt_size
984
+ else chat_model.max_prompt_size
985
+ )
986
+ tokenizer = chat_model.tokenizer
987
+ model_type = chat_model.model_type
988
+ vision_available = chat_model.vision_enabled
989
+
990
+ if model_type == ChatModel.ModelType.OFFLINE:
991
+ if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None:
992
+ state.offline_chat_processor_config = OfflineChatProcessorModel(chat_model_name, max_tokens)
993
+
994
+ loaded_model = state.offline_chat_processor_config.loaded_model
995
+ truncated_messages = generate_chatml_messages_with_context(
996
+ user_message=query,
997
+ context_message=context,
998
+ system_message=system_message,
999
+ model_name=chat_model_name,
1000
+ loaded_model=loaded_model,
1001
+ tokenizer_name=tokenizer,
1002
+ max_prompt_size=max_tokens,
1003
+ vision_enabled=vision_available,
1004
+ model_type=chat_model.model_type,
1005
+ query_files=query_files,
1006
+ )
1007
+
1008
+ return send_message_to_model_offline(
1009
+ messages=truncated_messages,
1010
+ loaded_model=loaded_model,
1011
+ model_name=chat_model_name,
1012
+ max_prompt_size=max_tokens,
1013
+ streaming=False,
1014
+ response_type=response_type,
1015
+ tracer=tracer,
1016
+ )
1017
+
1018
+ elif model_type == ChatModel.ModelType.OPENAI:
1019
+ openai_chat_config = chat_model.ai_model_api
1020
+ api_key = openai_chat_config.api_key
1021
+ api_base_url = openai_chat_config.api_base_url
1022
+ truncated_messages = generate_chatml_messages_with_context(
1023
+ user_message=query,
1024
+ context_message=context,
1025
+ system_message=system_message,
1026
+ model_name=chat_model_name,
1027
+ max_prompt_size=max_tokens,
1028
+ tokenizer_name=tokenizer,
1029
+ vision_enabled=vision_available,
1030
+ query_images=query_images,
1031
+ model_type=chat_model.model_type,
1032
+ query_files=query_files,
1033
+ )
1034
+
1035
+ return send_message_to_model(
1036
+ messages=truncated_messages,
1037
+ api_key=api_key,
1038
+ model=chat_model_name,
1039
+ response_type=response_type,
1040
+ api_base_url=api_base_url,
1041
+ tracer=tracer,
1042
+ )
1043
+ elif model_type == ChatModel.ModelType.ANTHROPIC:
1044
+ api_key = chat_model.ai_model_api.api_key
1045
+ truncated_messages = generate_chatml_messages_with_context(
1046
+ user_message=query,
1047
+ context_message=context,
1048
+ system_message=system_message,
1049
+ model_name=chat_model_name,
1050
+ max_prompt_size=max_tokens,
1051
+ tokenizer_name=tokenizer,
1052
+ vision_enabled=vision_available,
1053
+ query_images=query_images,
1054
+ model_type=chat_model.model_type,
1055
+ query_files=query_files,
1056
+ )
1057
+
1058
+ return anthropic_send_message_to_model(
1059
+ messages=truncated_messages,
1060
+ api_key=api_key,
1061
+ model=chat_model_name,
1062
+ response_type=response_type,
1063
+ tracer=tracer,
1064
+ )
1065
+ elif model_type == ChatModel.ModelType.GOOGLE:
1066
+ api_key = chat_model.ai_model_api.api_key
1067
+ truncated_messages = generate_chatml_messages_with_context(
1068
+ user_message=query,
1069
+ context_message=context,
1070
+ system_message=system_message,
1071
+ model_name=chat_model_name,
1072
+ max_prompt_size=max_tokens,
1073
+ tokenizer_name=tokenizer,
1074
+ vision_enabled=vision_available,
1075
+ query_images=query_images,
1076
+ model_type=chat_model.model_type,
1077
+ query_files=query_files,
1078
+ )
1079
+
1080
+ return gemini_send_message_to_model(
1081
+ messages=truncated_messages,
1082
+ api_key=api_key,
1083
+ model=chat_model_name,
1084
+ response_type=response_type,
1085
+ tracer=tracer,
1086
+ )
1087
+ else:
1088
+ raise HTTPException(status_code=500, detail="Invalid conversation config")
1089
+
1090
+
1091
+ def send_message_to_model_wrapper_sync(
1092
+ message: str,
1093
+ system_message: str = "",
1094
+ response_type: str = "text",
1095
+ user: KhojUser = None,
1096
+ query_images: List[str] = None,
1097
+ query_files: str = "",
1098
+ tracer: dict = {},
1099
+ ):
1100
+ chat_model: ChatModel = ConversationAdapters.get_default_chat_model(user)
1101
+
1102
+ if chat_model is None:
1103
+ raise HTTPException(status_code=500, detail="Contact the server administrator to set a default chat model.")
1104
+
1105
+ chat_model_name = chat_model.name
1106
+ max_tokens = chat_model.max_prompt_size
1107
+ vision_available = chat_model.vision_enabled
1108
+
1109
+ if chat_model.model_type == ChatModel.ModelType.OFFLINE:
1110
+ if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None:
1111
+ state.offline_chat_processor_config = OfflineChatProcessorModel(chat_model_name, max_tokens)
1112
+
1113
+ loaded_model = state.offline_chat_processor_config.loaded_model
1114
+ truncated_messages = generate_chatml_messages_with_context(
1115
+ user_message=message,
1116
+ system_message=system_message,
1117
+ model_name=chat_model_name,
1118
+ loaded_model=loaded_model,
1119
+ max_prompt_size=max_tokens,
1120
+ vision_enabled=vision_available,
1121
+ model_type=chat_model.model_type,
1122
+ query_images=query_images,
1123
+ query_files=query_files,
1124
+ )
1125
+
1126
+ return send_message_to_model_offline(
1127
+ messages=truncated_messages,
1128
+ loaded_model=loaded_model,
1129
+ model_name=chat_model_name,
1130
+ max_prompt_size=max_tokens,
1131
+ streaming=False,
1132
+ response_type=response_type,
1133
+ tracer=tracer,
1134
+ )
1135
+
1136
+ elif chat_model.model_type == ChatModel.ModelType.OPENAI:
1137
+ api_key = chat_model.ai_model_api.api_key
1138
+ api_base_url = chat_model.ai_model_api.api_base_url
1139
+ truncated_messages = generate_chatml_messages_with_context(
1140
+ user_message=message,
1141
+ system_message=system_message,
1142
+ model_name=chat_model_name,
1143
+ max_prompt_size=max_tokens,
1144
+ vision_enabled=vision_available,
1145
+ model_type=chat_model.model_type,
1146
+ query_images=query_images,
1147
+ query_files=query_files,
1148
+ )
1149
+
1150
+ openai_response = send_message_to_model(
1151
+ messages=truncated_messages,
1152
+ api_key=api_key,
1153
+ api_base_url=api_base_url,
1154
+ model=chat_model_name,
1155
+ response_type=response_type,
1156
+ tracer=tracer,
1157
+ )
1158
+
1159
+ return openai_response
1160
+
1161
+ elif chat_model.model_type == ChatModel.ModelType.ANTHROPIC:
1162
+ api_key = chat_model.ai_model_api.api_key
1163
+ truncated_messages = generate_chatml_messages_with_context(
1164
+ user_message=message,
1165
+ system_message=system_message,
1166
+ model_name=chat_model_name,
1167
+ max_prompt_size=max_tokens,
1168
+ vision_enabled=vision_available,
1169
+ model_type=chat_model.model_type,
1170
+ query_images=query_images,
1171
+ query_files=query_files,
1172
+ )
1173
+
1174
+ return anthropic_send_message_to_model(
1175
+ messages=truncated_messages,
1176
+ api_key=api_key,
1177
+ model=chat_model_name,
1178
+ response_type=response_type,
1179
+ tracer=tracer,
1180
+ )
1181
+
1182
+ elif chat_model.model_type == ChatModel.ModelType.GOOGLE:
1183
+ api_key = chat_model.ai_model_api.api_key
1184
+ truncated_messages = generate_chatml_messages_with_context(
1185
+ user_message=message,
1186
+ system_message=system_message,
1187
+ model_name=chat_model_name,
1188
+ max_prompt_size=max_tokens,
1189
+ vision_enabled=vision_available,
1190
+ model_type=chat_model.model_type,
1191
+ query_images=query_images,
1192
+ query_files=query_files,
1193
+ )
1194
+
1195
+ return gemini_send_message_to_model(
1196
+ messages=truncated_messages,
1197
+ api_key=api_key,
1198
+ model=chat_model_name,
1199
+ response_type=response_type,
1200
+ tracer=tracer,
1201
+ )
1202
+ else:
1203
+ raise HTTPException(status_code=500, detail="Invalid conversation config")
1204
+
1205
+
1206
+ def generate_chat_response(
1207
+ q: str,
1208
+ meta_log: dict,
1209
+ conversation: Conversation,
1210
+ compiled_references: List[Dict] = [],
1211
+ online_results: Dict[str, Dict] = {},
1212
+ code_results: Dict[str, Dict] = {},
1213
+ inferred_queries: List[str] = [],
1214
+ conversation_commands: List[ConversationCommand] = [ConversationCommand.Default],
1215
+ user: KhojUser = None,
1216
+ client_application: ClientApplication = None,
1217
+ conversation_id: str = None,
1218
+ location_data: LocationData = None,
1219
+ user_name: Optional[str] = None,
1220
+ meta_research: str = "",
1221
+ query_images: Optional[List[str]] = None,
1222
+ train_of_thought: List[Any] = [],
1223
+ query_files: str = None,
1224
+ raw_query_files: List[FileAttachment] = None,
1225
+ generated_images: List[str] = None,
1226
+ raw_generated_files: List[FileAttachment] = [],
1227
+ generated_excalidraw_diagram: str = None,
1228
+ program_execution_context: List[str] = [],
1229
+ generated_asset_results: Dict[str, Dict] = {},
1230
+ tracer: dict = {},
1231
+ ) -> Tuple[Union[ThreadedGenerator, Iterator[str]], Dict[str, str]]:
1232
+ # Initialize Variables
1233
+ chat_response = None
1234
+ logger.debug(f"Conversation Types: {conversation_commands}")
1235
+
1236
+ metadata = {}
1237
+ agent = AgentAdapters.get_conversation_agent_by_id(conversation.agent.id) if conversation.agent else None
1238
+ try:
1239
+ partial_completion = partial(
1240
+ save_to_conversation_log,
1241
+ q,
1242
+ user=user,
1243
+ meta_log=meta_log,
1244
+ compiled_references=compiled_references,
1245
+ online_results=online_results,
1246
+ code_results=code_results,
1247
+ inferred_queries=inferred_queries,
1248
+ client_application=client_application,
1249
+ conversation_id=conversation_id,
1250
+ query_images=query_images,
1251
+ train_of_thought=train_of_thought,
1252
+ raw_query_files=raw_query_files,
1253
+ generated_images=generated_images,
1254
+ raw_generated_files=raw_generated_files,
1255
+ generated_excalidraw_diagram=generated_excalidraw_diagram,
1256
+ tracer=tracer,
1257
+ )
1258
+
1259
+ query_to_run = q
1260
+ if meta_research:
1261
+ query_to_run = f"<query>{q}</query>\n<collected_research>\n{meta_research}\n</collected_research>"
1262
+ compiled_references = []
1263
+ online_results = {}
1264
+ code_results = {}
1265
+
1266
+ chat_model = ConversationAdapters.get_valid_chat_model(user, conversation)
1267
+ vision_available = chat_model.vision_enabled
1268
+ if not vision_available and query_images:
1269
+ vision_enabled_config = ConversationAdapters.get_vision_enabled_config()
1270
+ if vision_enabled_config:
1271
+ chat_model = vision_enabled_config
1272
+ vision_available = True
1273
+
1274
+ if chat_model.model_type == "offline":
1275
+ loaded_model = state.offline_chat_processor_config.loaded_model
1276
+ chat_response = converse_offline(
1277
+ user_query=query_to_run,
1278
+ references=compiled_references,
1279
+ online_results=online_results,
1280
+ loaded_model=loaded_model,
1281
+ conversation_log=meta_log,
1282
+ completion_func=partial_completion,
1283
+ conversation_commands=conversation_commands,
1284
+ model_name=chat_model.name,
1285
+ max_prompt_size=chat_model.max_prompt_size,
1286
+ tokenizer_name=chat_model.tokenizer,
1287
+ location_data=location_data,
1288
+ user_name=user_name,
1289
+ agent=agent,
1290
+ query_files=query_files,
1291
+ generated_files=raw_generated_files,
1292
+ generated_asset_results=generated_asset_results,
1293
+ tracer=tracer,
1294
+ )
1295
+
1296
+ elif chat_model.model_type == ChatModel.ModelType.OPENAI:
1297
+ openai_chat_config = chat_model.ai_model_api
1298
+ api_key = openai_chat_config.api_key
1299
+ chat_model_name = chat_model.name
1300
+ chat_response = converse_openai(
1301
+ compiled_references,
1302
+ query_to_run,
1303
+ query_images=query_images,
1304
+ online_results=online_results,
1305
+ code_results=code_results,
1306
+ conversation_log=meta_log,
1307
+ model=chat_model_name,
1308
+ api_key=api_key,
1309
+ api_base_url=openai_chat_config.api_base_url,
1310
+ completion_func=partial_completion,
1311
+ conversation_commands=conversation_commands,
1312
+ max_prompt_size=chat_model.max_prompt_size,
1313
+ tokenizer_name=chat_model.tokenizer,
1314
+ location_data=location_data,
1315
+ user_name=user_name,
1316
+ agent=agent,
1317
+ vision_available=vision_available,
1318
+ query_files=query_files,
1319
+ generated_files=raw_generated_files,
1320
+ generated_asset_results=generated_asset_results,
1321
+ program_execution_context=program_execution_context,
1322
+ tracer=tracer,
1323
+ )
1324
+
1325
+ elif chat_model.model_type == ChatModel.ModelType.ANTHROPIC:
1326
+ api_key = chat_model.ai_model_api.api_key
1327
+ chat_response = converse_anthropic(
1328
+ compiled_references,
1329
+ query_to_run,
1330
+ query_images=query_images,
1331
+ online_results=online_results,
1332
+ code_results=code_results,
1333
+ conversation_log=meta_log,
1334
+ model=chat_model.name,
1335
+ api_key=api_key,
1336
+ completion_func=partial_completion,
1337
+ conversation_commands=conversation_commands,
1338
+ max_prompt_size=chat_model.max_prompt_size,
1339
+ tokenizer_name=chat_model.tokenizer,
1340
+ location_data=location_data,
1341
+ user_name=user_name,
1342
+ agent=agent,
1343
+ vision_available=vision_available,
1344
+ query_files=query_files,
1345
+ generated_files=raw_generated_files,
1346
+ generated_asset_results=generated_asset_results,
1347
+ program_execution_context=program_execution_context,
1348
+ tracer=tracer,
1349
+ )
1350
+ elif chat_model.model_type == ChatModel.ModelType.GOOGLE:
1351
+ api_key = chat_model.ai_model_api.api_key
1352
+ chat_response = converse_gemini(
1353
+ compiled_references,
1354
+ query_to_run,
1355
+ online_results,
1356
+ code_results,
1357
+ meta_log,
1358
+ model=chat_model.name,
1359
+ api_key=api_key,
1360
+ completion_func=partial_completion,
1361
+ conversation_commands=conversation_commands,
1362
+ max_prompt_size=chat_model.max_prompt_size,
1363
+ tokenizer_name=chat_model.tokenizer,
1364
+ location_data=location_data,
1365
+ user_name=user_name,
1366
+ agent=agent,
1367
+ query_images=query_images,
1368
+ vision_available=vision_available,
1369
+ query_files=query_files,
1370
+ generated_files=raw_generated_files,
1371
+ generated_asset_results=generated_asset_results,
1372
+ program_execution_context=program_execution_context,
1373
+ tracer=tracer,
1374
+ )
1375
+
1376
+ metadata.update({"chat_model": chat_model.name})
1377
+
1378
+ except Exception as e:
1379
+ logger.error(e, exc_info=True)
1380
+ raise HTTPException(status_code=500, detail=str(e))
1381
+
1382
+ return chat_response, metadata
1383
+
1384
+
1385
+ class DeleteMessageRequestBody(BaseModel):
1386
+ conversation_id: str
1387
+ turn_id: str
1388
+
1389
+
1390
+ class FeedbackData(BaseModel):
1391
+ uquery: str
1392
+ kquery: str
1393
+ sentiment: str
1394
+
1395
+
1396
+ class EmailVerificationApiRateLimiter:
1397
+ def __init__(self, requests: int, window: int, slug: str):
1398
+ self.requests = requests
1399
+ self.window = window
1400
+ self.slug = slug
1401
+
1402
+ def __call__(self, request: Request):
1403
+ # Rate limiting disabled if billing is disabled
1404
+ if state.billing_enabled is False:
1405
+ return
1406
+
1407
+ # Extract the email query parameter
1408
+ email = request.query_params.get("email")
1409
+
1410
+ if email:
1411
+ logger.info(f"Email query parameter: {email}")
1412
+
1413
+ user: KhojUser = get_user_by_email(email)
1414
+
1415
+ if not user:
1416
+ raise HTTPException(
1417
+ status_code=404,
1418
+ detail="User not found.",
1419
+ )
1420
+
1421
+ # Remove requests outside of the time window
1422
+ cutoff = datetime.now(tz=timezone.utc) - timedelta(seconds=self.window)
1423
+ count_requests = UserRequests.objects.filter(user=user, created_at__gte=cutoff, slug=self.slug).count()
1424
+
1425
+ # Check if the user has exceeded the rate limit
1426
+ if count_requests >= self.requests:
1427
+ logger.info(
1428
+ f"Rate limit: {count_requests}/{self.requests} requests not allowed in {self.window} seconds for email: {email}."
1429
+ )
1430
+ raise HTTPException(
1431
+ status_code=429,
1432
+ detail="Ran out of login attempts",
1433
+ )
1434
+
1435
+ # Add the current request to the db
1436
+ UserRequests.objects.create(user=user, slug=self.slug)
1437
+
1438
+
1439
+ class ApiUserRateLimiter:
1440
+ def __init__(self, requests: int, subscribed_requests: int, window: int, slug: str):
1441
+ self.requests = requests
1442
+ self.subscribed_requests = subscribed_requests
1443
+ self.window = window
1444
+ self.slug = slug
1445
+
1446
+ def __call__(self, request: Request):
1447
+ # Rate limiting disabled if billing is disabled
1448
+ if state.billing_enabled is False:
1449
+ return
1450
+
1451
+ # Rate limiting is disabled if user unauthenticated.
1452
+ # Other systems handle authentication
1453
+ if not request.user.is_authenticated:
1454
+ return
1455
+
1456
+ user: KhojUser = request.user.object
1457
+ subscribed = has_required_scope(request, ["premium"])
1458
+
1459
+ # Remove requests outside of the time window
1460
+ cutoff = datetime.now(tz=timezone.utc) - timedelta(seconds=self.window)
1461
+ count_requests = UserRequests.objects.filter(user=user, created_at__gte=cutoff, slug=self.slug).count()
1462
+
1463
+ # Check if the user has exceeded the rate limit
1464
+ if subscribed and count_requests >= self.subscribed_requests:
1465
+ logger.info(
1466
+ f"Rate limit: {count_requests}/{self.subscribed_requests} requests not allowed in {self.window} seconds for subscribed user: {user}."
1467
+ )
1468
+ raise HTTPException(
1469
+ status_code=429,
1470
+ detail="I'm glad you're enjoying interacting with me! You've unfortunately exceeded your usage limit for today. But let's chat more tomorrow?",
1471
+ )
1472
+ if not subscribed and count_requests >= self.requests:
1473
+ if self.requests >= self.subscribed_requests:
1474
+ logger.info(
1475
+ f"Rate limit: {count_requests}/{self.subscribed_requests} requests not allowed in {self.window} seconds for user: {user}."
1476
+ )
1477
+ raise HTTPException(
1478
+ status_code=429,
1479
+ detail="I'm glad you're enjoying interacting with me! You've unfortunately exceeded your usage limit for today. But let's chat more tomorrow?",
1480
+ )
1481
+
1482
+ logger.info(
1483
+ f"Rate limit: {count_requests}/{self.requests} requests not allowed in {self.window} seconds for user: {user}."
1484
+ )
1485
+ raise HTTPException(
1486
+ status_code=429,
1487
+ detail="I'm glad you're enjoying interacting with me! You've unfortunately exceeded your usage limit for today. You can subscribe to increase your usage limit via [your settings](https://app.khoj.dev/settings) or we can continue our conversation tomorrow?",
1488
+ )
1489
+
1490
+ # Add the current request to the cache
1491
+ UserRequests.objects.create(user=user, slug=self.slug)
1492
+
1493
+
1494
+ class ApiImageRateLimiter:
1495
+ def __init__(self, max_images: int = 10, max_combined_size_mb: float = 10):
1496
+ self.max_images = max_images
1497
+ self.max_combined_size_mb = max_combined_size_mb
1498
+
1499
+ def __call__(self, request: Request, body: ChatRequestBody):
1500
+ if state.billing_enabled is False:
1501
+ return
1502
+
1503
+ # Rate limiting is disabled if user unauthenticated.
1504
+ # Other systems handle authentication
1505
+ if not request.user.is_authenticated:
1506
+ return
1507
+
1508
+ if not body.images:
1509
+ return
1510
+
1511
+ # Check number of images
1512
+ if len(body.images) > self.max_images:
1513
+ logger.info(f"Rate limit: {len(body.images)}/{self.max_images} images not allowed per message.")
1514
+ raise HTTPException(
1515
+ status_code=429,
1516
+ detail=f"Those are way too many images for me! I can handle up to {self.max_images} images per message.",
1517
+ )
1518
+
1519
+ # Check total size of images
1520
+ total_size_mb = 0.0
1521
+ for image in body.images:
1522
+ # Unquote the image in case it's URL encoded
1523
+ image = unquote(image)
1524
+ # Assuming the image is a base64 encoded string
1525
+ # Remove the data:image/jpeg;base64, part if present
1526
+ if "," in image:
1527
+ image = image.split(",", 1)[1]
1528
+
1529
+ # Decode base64 to get the actual size
1530
+ image_bytes = base64.b64decode(image)
1531
+ total_size_mb += len(image_bytes) / (1024 * 1024) # Convert bytes to MB
1532
+
1533
+ if total_size_mb > self.max_combined_size_mb:
1534
+ logger.info(f"Data limit: {total_size_mb}MB/{self.max_combined_size_mb}MB size not allowed per message.")
1535
+ raise HTTPException(
1536
+ status_code=429,
1537
+ detail=f"Those images are way too large for me! I can handle up to {self.max_combined_size_mb}MB of images per message.",
1538
+ )
1539
+
1540
+
1541
+ class ConversationCommandRateLimiter:
1542
+ def __init__(self, trial_rate_limit: int, subscribed_rate_limit: int, slug: str):
1543
+ self.slug = slug
1544
+ self.trial_rate_limit = trial_rate_limit
1545
+ self.subscribed_rate_limit = subscribed_rate_limit
1546
+ self.restricted_commands = [ConversationCommand.Research]
1547
+
1548
+ async def update_and_check_if_valid(self, request: Request, conversation_command: ConversationCommand):
1549
+ if state.billing_enabled is False:
1550
+ return
1551
+
1552
+ if not request.user.is_authenticated:
1553
+ return
1554
+
1555
+ if conversation_command not in self.restricted_commands:
1556
+ return
1557
+
1558
+ user: KhojUser = request.user.object
1559
+ subscribed = has_required_scope(request, ["premium"])
1560
+
1561
+ # Remove requests outside of the 24-hr time window
1562
+ cutoff = datetime.now(tz=timezone.utc) - timedelta(seconds=60 * 60 * 24)
1563
+ command_slug = f"{self.slug}_{conversation_command.value}"
1564
+ count_requests = await UserRequests.objects.filter(
1565
+ user=user, created_at__gte=cutoff, slug=command_slug
1566
+ ).acount()
1567
+
1568
+ if subscribed and count_requests >= self.subscribed_rate_limit:
1569
+ logger.info(
1570
+ f"Rate limit: {count_requests}/{self.subscribed_rate_limit} requests not allowed in 24 hours for subscribed user: {user}."
1571
+ )
1572
+ raise HTTPException(
1573
+ status_code=429,
1574
+ detail=f"I'm glad you're enjoying interacting with me! You've unfortunately exceeded your `/{conversation_command.value}` command usage limit for today. Maybe we can talk about something else for today?",
1575
+ )
1576
+ if not subscribed and count_requests >= self.trial_rate_limit:
1577
+ logger.info(
1578
+ f"Rate limit: {count_requests}/{self.trial_rate_limit} requests not allowed in 24 hours for user: {user}."
1579
+ )
1580
+ raise HTTPException(
1581
+ status_code=429,
1582
+ detail=f"I'm glad you're enjoying interacting with me! You've unfortunately exceeded your `/{conversation_command.value}` command usage limit for today. You can subscribe to increase your usage limit via [your settings](https://app.khoj.dev/settings) or we can talk about something else for today?",
1583
+ )
1584
+ await UserRequests.objects.acreate(user=user, slug=command_slug)
1585
+ return
1586
+
1587
+
1588
+ class ApiIndexedDataLimiter:
1589
+ def __init__(
1590
+ self,
1591
+ incoming_entries_size_limit: float,
1592
+ subscribed_incoming_entries_size_limit: float,
1593
+ total_entries_size_limit: float,
1594
+ subscribed_total_entries_size_limit: float,
1595
+ ):
1596
+ self.num_entries_size = incoming_entries_size_limit
1597
+ self.subscribed_num_entries_size = subscribed_incoming_entries_size_limit
1598
+ self.total_entries_size_limit = total_entries_size_limit
1599
+ self.subscribed_total_entries_size = subscribed_total_entries_size_limit
1600
+
1601
+ def __call__(self, request: Request, files: List[UploadFile] = None):
1602
+ if state.billing_enabled is False:
1603
+ return
1604
+
1605
+ subscribed = has_required_scope(request, ["premium"])
1606
+ incoming_data_size_mb = 0.0
1607
+ deletion_file_names = set()
1608
+
1609
+ if not request.user.is_authenticated or not files:
1610
+ return
1611
+
1612
+ user: KhojUser = request.user.object
1613
+
1614
+ for file in files:
1615
+ if file.size == 0:
1616
+ deletion_file_names.add(file.filename)
1617
+
1618
+ incoming_data_size_mb += file.size / 1024 / 1024
1619
+
1620
+ num_deleted_entries = 0
1621
+ for file_path in deletion_file_names:
1622
+ deleted_count = EntryAdapters.delete_entry_by_file(user, file_path)
1623
+ num_deleted_entries += deleted_count
1624
+
1625
+ logger.info(f"Deleted {num_deleted_entries} entries for user: {user}.")
1626
+
1627
+ if subscribed and incoming_data_size_mb >= self.subscribed_num_entries_size:
1628
+ logger.info(
1629
+ f"Data limit: {incoming_data_size_mb}MB incoming will exceed {self.subscribed_num_entries_size}MB allowed for subscribed user: {user}."
1630
+ )
1631
+ raise HTTPException(status_code=429, detail="Too much data indexed.")
1632
+ if not subscribed and incoming_data_size_mb >= self.num_entries_size:
1633
+ logger.info(
1634
+ f"Data limit: {incoming_data_size_mb}MB incoming will exceed {self.num_entries_size}MB allowed for user: {user}."
1635
+ )
1636
+ raise HTTPException(
1637
+ status_code=429, detail="Too much data indexed. Subscribe to increase your data index limit."
1638
+ )
1639
+
1640
+ user_size_data = EntryAdapters.get_size_of_indexed_data_in_mb(user)
1641
+ if subscribed and user_size_data + incoming_data_size_mb >= self.subscribed_total_entries_size:
1642
+ logger.info(
1643
+ f"Data limit: {incoming_data_size_mb}MB incoming + {user_size_data}MB existing will exceed {self.subscribed_total_entries_size}MB allowed for subscribed user: {user}."
1644
+ )
1645
+ raise HTTPException(status_code=429, detail="Too much data indexed.")
1646
+ if not subscribed and user_size_data + incoming_data_size_mb >= self.total_entries_size_limit:
1647
+ logger.info(
1648
+ f"Data limit: {incoming_data_size_mb}MB incoming + {user_size_data}MB existing will exceed {self.subscribed_total_entries_size}MB allowed for non subscribed user: {user}."
1649
+ )
1650
+ raise HTTPException(
1651
+ status_code=429, detail="Too much data indexed. Subscribe to increase your data index limit."
1652
+ )
1653
+
1654
+
1655
+ class CommonQueryParamsClass:
1656
+ def __init__(
1657
+ self,
1658
+ client: Optional[str] = None,
1659
+ user_agent: Optional[str] = Header(None),
1660
+ referer: Optional[str] = Header(None),
1661
+ host: Optional[str] = Header(None),
1662
+ ):
1663
+ self.client = client
1664
+ self.user_agent = user_agent
1665
+ self.referer = referer
1666
+ self.host = host
1667
+
1668
+
1669
+ CommonQueryParams = Annotated[CommonQueryParamsClass, Depends()]
1670
+
1671
+
1672
+ def format_automation_response(scheduling_request: str, executed_query: str, ai_response: str, user: KhojUser) -> bool:
1673
+ """
1674
+ Format the AI response to send in automation email to user.
1675
+ """
1676
+ name = get_user_name(user)
1677
+ username = prompts.user_name.format(name=name) if name else ""
1678
+
1679
+ automation_format_prompt = prompts.automation_format_prompt.format(
1680
+ original_query=scheduling_request,
1681
+ executed_query=executed_query,
1682
+ response=ai_response,
1683
+ username=username,
1684
+ )
1685
+
1686
+ with timer("Chat actor: Format automation response", logger):
1687
+ return send_message_to_model_wrapper_sync(automation_format_prompt, user=user)
1688
+
1689
+
1690
+ def should_notify(original_query: str, executed_query: str, ai_response: str, user: KhojUser) -> bool:
1691
+ """
1692
+ Decide whether to notify the user of the AI response.
1693
+ Default to notifying the user for now.
1694
+ """
1695
+ if any(is_none_or_empty(message) for message in [original_query, executed_query, ai_response]):
1696
+ return False
1697
+
1698
+ to_notify_or_not = prompts.to_notify_or_not.format(
1699
+ original_query=original_query,
1700
+ executed_query=executed_query,
1701
+ response=ai_response,
1702
+ )
1703
+
1704
+ with timer("Chat actor: Decide to notify user of automation response", logger):
1705
+ try:
1706
+ # TODO Replace with async call so we don't have to maintain a sync version
1707
+ raw_response = send_message_to_model_wrapper_sync(to_notify_or_not, user=user, response_type="json_object")
1708
+ response = json.loads(clean_json(raw_response))
1709
+ should_notify_result = response["decision"] == "Yes"
1710
+ reason = response.get("reason", "unknown")
1711
+ logger.info(
1712
+ f'Decided to {"not " if not should_notify_result else ""}notify user of automation response because of reason: {reason}.'
1713
+ )
1714
+ return should_notify_result
1715
+ except Exception as e:
1716
+ logger.warning(
1717
+ f"Fallback to notify user of automation response as failed to infer should notify or not. {e}",
1718
+ exc_info=True,
1719
+ )
1720
+ return True
1721
+
1722
+
1723
+ def scheduled_chat(
1724
+ query_to_run: str,
1725
+ scheduling_request: str,
1726
+ subject: str,
1727
+ user: KhojUser,
1728
+ calling_url: URL,
1729
+ job_id: str = None,
1730
+ conversation_id: str = None,
1731
+ ):
1732
+ logger.info(f"Processing scheduled_chat: {query_to_run}")
1733
+ if job_id:
1734
+ # Get the job object and check whether the time is valid for it to run. This helps avoid race conditions that cause the same job to be run multiple times.
1735
+ job = AutomationAdapters.get_automation(user, job_id)
1736
+ last_run_time = AutomationAdapters.get_job_last_run(user, job)
1737
+
1738
+ # Convert last_run_time from %Y-%m-%d %I:%M %p %Z to datetime object
1739
+ if last_run_time:
1740
+ last_run_time = datetime.strptime(last_run_time, "%Y-%m-%d %I:%M %p %Z").replace(tzinfo=timezone.utc)
1741
+
1742
+ # If the last run time was within the last 6 hours, don't run it again. This helps avoid multithreading issues and rate limits.
1743
+ if (datetime.now(timezone.utc) - last_run_time).total_seconds() < 6 * 60 * 60:
1744
+ logger.info(f"Skipping scheduled chat {job_id} as the next run time is in the future.")
1745
+ return
1746
+
1747
+ # Extract relevant params from the original URL
1748
+ scheme = "http" if not calling_url.is_secure else "https"
1749
+ query_dict = parse_qs(calling_url.query)
1750
+
1751
+ # Pop the stream value from query_dict if it exists
1752
+ query_dict.pop("stream", None)
1753
+
1754
+ # Replace the original scheduling query with the scheduled query
1755
+ query_dict["q"] = [query_to_run]
1756
+
1757
+ # Replace the original conversation_id with the conversation_id
1758
+ if conversation_id:
1759
+ # encode the conversation_id to avoid any issues with special characters
1760
+ query_dict["conversation_id"] = [quote(str(conversation_id))]
1761
+
1762
+ # validate that the conversation id exists. If not, delete the automation and exit.
1763
+ if not ConversationAdapters.get_conversation_by_id(conversation_id):
1764
+ AutomationAdapters.delete_automation(user, job_id)
1765
+ return
1766
+
1767
+ # Restructure the original query_dict into a valid JSON payload for the chat API
1768
+ json_payload = {key: values[0] for key, values in query_dict.items()}
1769
+
1770
+ # Construct the URL to call the chat API with the scheduled query string
1771
+ url = f"{scheme}://{calling_url.netloc}/api/chat?client=khoj"
1772
+
1773
+ # Construct the Headers for the chat API
1774
+ headers = {"User-Agent": "Khoj", "Content-Type": "application/json"}
1775
+ if not state.anonymous_mode:
1776
+ # Add authorization request header in non-anonymous mode
1777
+ token = get_khoj_tokens(user)
1778
+ if is_none_or_empty(token):
1779
+ token = create_khoj_token(user).token
1780
+ else:
1781
+ token = token[0].token
1782
+ headers["Authorization"] = f"Bearer {token}"
1783
+
1784
+ # Call the chat API endpoint with authenticated user token and query
1785
+ raw_response = requests.post(url, headers=headers, json=json_payload, allow_redirects=False)
1786
+
1787
+ # Handle redirect manually if necessary
1788
+ if raw_response.status_code in [301, 302]:
1789
+ redirect_url = raw_response.headers["Location"]
1790
+ logger.info(f"Redirecting to {redirect_url}")
1791
+ raw_response = requests.post(redirect_url, headers=headers, json=json_payload)
1792
+
1793
+ # Stop if the chat API call was not successful
1794
+ if raw_response.status_code != 200:
1795
+ logger.error(f"Failed to run schedule chat: {raw_response.text}, user: {user}, query: {query_to_run}")
1796
+ return None
1797
+
1798
+ # Extract the AI response from the chat API response
1799
+ cleaned_query = re.sub(r"^/automated_task\s*", "", query_to_run).strip()
1800
+ is_image = False
1801
+ if raw_response.headers.get("Content-Type") == "application/json":
1802
+ response_map = raw_response.json()
1803
+ ai_response = response_map.get("response") or response_map.get("image")
1804
+ is_image = False
1805
+ if type(ai_response) == dict:
1806
+ is_image = ai_response.get("image") is not None
1807
+ else:
1808
+ ai_response = raw_response.text
1809
+
1810
+ # Notify user if the AI response is satisfactory
1811
+ if should_notify(
1812
+ original_query=scheduling_request, executed_query=cleaned_query, ai_response=ai_response, user=user
1813
+ ):
1814
+ formatted_response = format_automation_response(scheduling_request, cleaned_query, ai_response, user)
1815
+
1816
+ if is_resend_enabled():
1817
+ send_task_email(user.get_short_name(), user.email, cleaned_query, formatted_response, subject, is_image)
1818
+ else:
1819
+ return formatted_response
1820
+
1821
+
1822
+ async def create_automation(
1823
+ q: str,
1824
+ timezone: str,
1825
+ user: KhojUser,
1826
+ calling_url: URL,
1827
+ meta_log: dict = {},
1828
+ conversation_id: str = None,
1829
+ tracer: dict = {},
1830
+ ):
1831
+ crontime, query_to_run, subject = await aschedule_query(q, meta_log, user, tracer=tracer)
1832
+ job = await aschedule_automation(query_to_run, subject, crontime, timezone, q, user, calling_url, conversation_id)
1833
+ return job, crontime, query_to_run, subject
1834
+
1835
+
1836
+ def schedule_automation(
1837
+ query_to_run: str,
1838
+ subject: str,
1839
+ crontime: str,
1840
+ timezone: str,
1841
+ scheduling_request: str,
1842
+ user: KhojUser,
1843
+ calling_url: URL,
1844
+ conversation_id: str,
1845
+ ):
1846
+ # Disable minute level automation recurrence
1847
+ minute_value = crontime.split(" ")[0]
1848
+ if not minute_value.isdigit():
1849
+ # Run automation at some random minute (to distribute request load) instead of running every X minutes
1850
+ crontime = " ".join([str(math.floor(random() * 60))] + crontime.split(" ")[1:])
1851
+
1852
+ user_timezone = pytz.timezone(timezone)
1853
+ trigger = CronTrigger.from_crontab(crontime, user_timezone)
1854
+ trigger.jitter = 60
1855
+ # Generate id and metadata used by task scheduler and process locks for the task runs
1856
+ job_metadata = json.dumps(
1857
+ {
1858
+ "query_to_run": query_to_run,
1859
+ "scheduling_request": scheduling_request,
1860
+ "subject": subject,
1861
+ "crontime": crontime,
1862
+ "conversation_id": str(conversation_id),
1863
+ }
1864
+ )
1865
+ query_id = hashlib.md5(f"{query_to_run}_{crontime}".encode("utf-8")).hexdigest()
1866
+ job_id = f"automation_{user.uuid}_{query_id}"
1867
+ job = state.scheduler.add_job(
1868
+ run_with_process_lock,
1869
+ trigger=trigger,
1870
+ args=(
1871
+ scheduled_chat,
1872
+ f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{query_id}",
1873
+ ),
1874
+ kwargs={
1875
+ "query_to_run": query_to_run,
1876
+ "scheduling_request": scheduling_request,
1877
+ "subject": subject,
1878
+ "user": user,
1879
+ "calling_url": calling_url,
1880
+ "job_id": job_id,
1881
+ "conversation_id": conversation_id,
1882
+ },
1883
+ id=job_id,
1884
+ name=job_metadata,
1885
+ max_instances=2, # Allow second instance to kill any previous instance with stale lock
1886
+ )
1887
+ return job
1888
+
1889
+
1890
+ async def aschedule_automation(
1891
+ query_to_run: str,
1892
+ subject: str,
1893
+ crontime: str,
1894
+ timezone: str,
1895
+ scheduling_request: str,
1896
+ user: KhojUser,
1897
+ calling_url: URL,
1898
+ conversation_id: str,
1899
+ ):
1900
+ # Disable minute level automation recurrence
1901
+ minute_value = crontime.split(" ")[0]
1902
+ if not minute_value.isdigit():
1903
+ # Run automation at some random minute (to distribute request load) instead of running every X minutes
1904
+ crontime = " ".join([str(math.floor(random() * 60))] + crontime.split(" ")[1:])
1905
+
1906
+ user_timezone = pytz.timezone(timezone)
1907
+ trigger = CronTrigger.from_crontab(crontime, user_timezone)
1908
+ trigger.jitter = 60
1909
+ # Generate id and metadata used by task scheduler and process locks for the task runs
1910
+ job_metadata = json.dumps(
1911
+ {
1912
+ "query_to_run": query_to_run,
1913
+ "scheduling_request": scheduling_request,
1914
+ "subject": subject,
1915
+ "crontime": crontime,
1916
+ "conversation_id": str(conversation_id),
1917
+ }
1918
+ )
1919
+ query_id = hashlib.md5(f"{query_to_run}_{crontime}".encode("utf-8")).hexdigest()
1920
+ job_id = f"automation_{user.uuid}_{query_id}"
1921
+ job = await sync_to_async(state.scheduler.add_job)(
1922
+ run_with_process_lock,
1923
+ trigger=trigger,
1924
+ args=(
1925
+ scheduled_chat,
1926
+ f"{ProcessLock.Operation.SCHEDULED_JOB}_{user.uuid}_{query_id}",
1927
+ ),
1928
+ kwargs={
1929
+ "query_to_run": query_to_run,
1930
+ "scheduling_request": scheduling_request,
1931
+ "subject": subject,
1932
+ "user": user,
1933
+ "calling_url": calling_url,
1934
+ "job_id": job_id,
1935
+ "conversation_id": conversation_id,
1936
+ },
1937
+ id=job_id,
1938
+ name=job_metadata,
1939
+ max_instances=2, # Allow second instance to kill any previous instance with stale lock
1940
+ )
1941
+ return job
1942
+
1943
+
1944
+ def construct_automation_created_message(automation: Job, crontime: str, query_to_run: str, subject: str):
1945
+ # Display next run time in user timezone instead of UTC
1946
+ schedule = f'{cron_descriptor.get_description(crontime)} {automation.next_run_time.strftime("%Z")}'
1947
+ next_run_time = automation.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z")
1948
+ # Remove /automated_task prefix from inferred_query
1949
+ unprefixed_query_to_run = re.sub(r"^\/automated_task\s*", "", query_to_run)
1950
+ # Create the automation response
1951
+ automation_icon_url = f"/static/assets/icons/automation.svg"
1952
+ return f"""
1953
+ ### ![]({automation_icon_url}) Created Automation
1954
+ - Subject: **{subject}**
1955
+ - Query to Run: "{unprefixed_query_to_run}"
1956
+ - Schedule: `{schedule}`
1957
+ - Next Run At: {next_run_time}
1958
+
1959
+ Manage your automations [here](/automations).
1960
+ """.strip()
1961
+
1962
+
1963
+ class MessageProcessor:
1964
+ def __init__(self):
1965
+ self.references = {}
1966
+ self.usage = {}
1967
+ self.raw_response = ""
1968
+ self.generated_images = []
1969
+ self.generated_files = []
1970
+ self.generated_excalidraw_diagram = []
1971
+
1972
+ def convert_message_chunk_to_json(self, raw_chunk: str) -> Dict[str, Any]:
1973
+ if raw_chunk.startswith("{") and raw_chunk.endswith("}"):
1974
+ try:
1975
+ json_chunk = json.loads(raw_chunk)
1976
+ if "type" not in json_chunk:
1977
+ json_chunk = {"type": "message", "data": json_chunk}
1978
+ return json_chunk
1979
+ except json.JSONDecodeError:
1980
+ return {"type": "message", "data": raw_chunk}
1981
+ elif raw_chunk:
1982
+ return {"type": "message", "data": raw_chunk}
1983
+ return {"type": "", "data": ""}
1984
+
1985
+ def process_message_chunk(self, raw_chunk: str) -> None:
1986
+ chunk = self.convert_message_chunk_to_json(raw_chunk)
1987
+ if not chunk or not chunk["type"]:
1988
+ return
1989
+
1990
+ chunk_type = ChatEvent(chunk["type"])
1991
+ if chunk_type == ChatEvent.REFERENCES:
1992
+ self.references = chunk["data"]
1993
+ elif chunk_type == ChatEvent.USAGE:
1994
+ self.usage = chunk["data"]
1995
+ elif chunk_type == ChatEvent.MESSAGE:
1996
+ chunk_data = chunk["data"]
1997
+ if isinstance(chunk_data, dict):
1998
+ self.raw_response = self.handle_json_response(chunk_data)
1999
+ elif (
2000
+ isinstance(chunk_data, str) and chunk_data.strip().startswith("{") and chunk_data.strip().endswith("}")
2001
+ ):
2002
+ try:
2003
+ json_data = json.loads(chunk_data.strip())
2004
+ self.raw_response = self.handle_json_response(json_data)
2005
+ except json.JSONDecodeError:
2006
+ self.raw_response += chunk_data
2007
+ else:
2008
+ self.raw_response += chunk_data
2009
+ elif chunk_type == ChatEvent.GENERATED_ASSETS:
2010
+ chunk_data = chunk["data"]
2011
+ if isinstance(chunk_data, dict):
2012
+ for key in chunk_data:
2013
+ if key == "images":
2014
+ self.generated_images = chunk_data[key]
2015
+ elif key == "files":
2016
+ self.generated_files = chunk_data[key]
2017
+ elif key == "excalidrawDiagram":
2018
+ self.generated_excalidraw_diagram = chunk_data[key]
2019
+
2020
+ def handle_json_response(self, json_data: Dict[str, str]) -> str | Dict[str, str]:
2021
+ if "image" in json_data or "details" in json_data:
2022
+ return json_data
2023
+ if "response" in json_data:
2024
+ return json_data["response"]
2025
+ return json_data
2026
+
2027
+
2028
+ async def read_chat_stream(response_iterator: AsyncGenerator[str, None]) -> Dict[str, Any]:
2029
+ processor = MessageProcessor()
2030
+ event_delimiter = "␃🔚␗"
2031
+ buffer = ""
2032
+
2033
+ async for chunk in response_iterator:
2034
+ # Start buffering chunks until complete event is received
2035
+ buffer += chunk
2036
+
2037
+ # Once the buffer contains a complete event
2038
+ while event_delimiter in buffer:
2039
+ # Extract the event from the buffer
2040
+ event, buffer = buffer.split(event_delimiter, 1)
2041
+ # Process the event
2042
+ if event:
2043
+ processor.process_message_chunk(event)
2044
+
2045
+ # Process any remaining data in the buffer
2046
+ if buffer:
2047
+ processor.process_message_chunk(buffer)
2048
+
2049
+ return {
2050
+ "response": processor.raw_response,
2051
+ "references": processor.references,
2052
+ "usage": processor.usage,
2053
+ "images": processor.generated_images,
2054
+ "files": processor.generated_files,
2055
+ "excalidrawDiagram": processor.generated_excalidraw_diagram,
2056
+ }
2057
+
2058
+
2059
+ def get_user_config(user: KhojUser, request: Request, is_detailed: bool = False):
2060
+ user_picture = request.session.get("user", {}).get("picture")
2061
+ is_active = has_required_scope(request, ["premium"])
2062
+ has_documents = EntryAdapters.user_has_entries(user=user)
2063
+
2064
+ if not is_detailed:
2065
+ return {
2066
+ "request": request,
2067
+ "username": user.username if user else None,
2068
+ "user_photo": user_picture,
2069
+ "is_active": is_active,
2070
+ "has_documents": has_documents,
2071
+ "khoj_version": state.khoj_version,
2072
+ }
2073
+
2074
+ user_subscription_state = get_user_subscription_state(user.email)
2075
+ user_subscription = adapters.get_user_subscription(user.email)
2076
+
2077
+ subscription_renewal_date = (
2078
+ user_subscription.renewal_date.strftime("%d %b %Y")
2079
+ if user_subscription and user_subscription.renewal_date
2080
+ else None
2081
+ )
2082
+ subscription_enabled_trial_at = (
2083
+ user_subscription.enabled_trial_at.strftime("%d %b %Y")
2084
+ if user_subscription and user_subscription.enabled_trial_at
2085
+ else None
2086
+ )
2087
+ given_name = get_user_name(user)
2088
+
2089
+ enabled_content_sources_set = set(EntryAdapters.get_unique_file_sources(user))
2090
+ enabled_content_sources = {
2091
+ "computer": ("computer" in enabled_content_sources_set),
2092
+ "github": ("github" in enabled_content_sources_set),
2093
+ "notion": ("notion" in enabled_content_sources_set),
2094
+ }
2095
+
2096
+ notion_oauth_url = get_notion_auth_url(user)
2097
+ current_notion_config = get_user_notion_config(user)
2098
+ notion_token = current_notion_config.token if current_notion_config else ""
2099
+
2100
+ selected_chat_model_config = ConversationAdapters.get_chat_model(
2101
+ user
2102
+ ) or ConversationAdapters.get_default_chat_model(user)
2103
+ chat_models = ConversationAdapters.get_conversation_processor_options().all()
2104
+ chat_model_options = list()
2105
+ for chat_model in chat_models:
2106
+ chat_model_options.append({"name": chat_model.name, "id": chat_model.id})
2107
+
2108
+ selected_paint_model_config = ConversationAdapters.get_user_text_to_image_model_config(user)
2109
+ paint_model_options = ConversationAdapters.get_text_to_image_model_options().all()
2110
+ all_paint_model_options = list()
2111
+ for paint_model in paint_model_options:
2112
+ all_paint_model_options.append({"name": paint_model.model_name, "id": paint_model.id})
2113
+
2114
+ voice_models = ConversationAdapters.get_voice_model_options()
2115
+ voice_model_options = list()
2116
+ for voice_model in voice_models:
2117
+ voice_model_options.append({"name": voice_model.name, "id": voice_model.model_id})
2118
+
2119
+ if len(voice_model_options) == 0:
2120
+ eleven_labs_enabled = False
2121
+ else:
2122
+ eleven_labs_enabled = is_eleven_labs_enabled()
2123
+
2124
+ selected_voice_model_config = ConversationAdapters.get_voice_model_config(user)
2125
+
2126
+ return {
2127
+ "request": request,
2128
+ # user info
2129
+ "username": user.username if user else None,
2130
+ "user_photo": user_picture,
2131
+ "is_active": is_active,
2132
+ "given_name": given_name,
2133
+ "phone_number": str(user.phone_number) if user.phone_number else "",
2134
+ "is_phone_number_verified": user.verified_phone_number,
2135
+ # user content settings
2136
+ "enabled_content_source": enabled_content_sources,
2137
+ "has_documents": has_documents,
2138
+ "notion_token": notion_token,
2139
+ # user model settings
2140
+ "chat_model_options": chat_model_options,
2141
+ "selected_chat_model_config": selected_chat_model_config.id if selected_chat_model_config else None,
2142
+ "paint_model_options": all_paint_model_options,
2143
+ "selected_paint_model_config": selected_paint_model_config.id if selected_paint_model_config else None,
2144
+ "voice_model_options": voice_model_options,
2145
+ "selected_voice_model_config": selected_voice_model_config.model_id if selected_voice_model_config else None,
2146
+ # user billing info
2147
+ "subscription_state": user_subscription_state,
2148
+ "subscription_renewal_date": subscription_renewal_date,
2149
+ "subscription_enabled_trial_at": subscription_enabled_trial_at,
2150
+ # server settings
2151
+ "khoj_cloud_subscription_url": os.getenv("KHOJ_CLOUD_SUBSCRIPTION_URL"),
2152
+ "billing_enabled": state.billing_enabled,
2153
+ "is_eleven_labs_enabled": eleven_labs_enabled,
2154
+ "is_twilio_enabled": is_twilio_enabled(),
2155
+ "khoj_version": state.khoj_version,
2156
+ "anonymous_mode": state.anonymous_mode,
2157
+ "notion_oauth_url": notion_oauth_url,
2158
+ "length_of_free_trial": LENGTH_OF_FREE_TRIAL,
2159
+ }
2160
+
2161
+
2162
+ def configure_content(
2163
+ user: KhojUser,
2164
+ files: Optional[dict[str, dict[str, str]]],
2165
+ regenerate: bool = False,
2166
+ t: Optional[state.SearchType] = state.SearchType.All,
2167
+ ) -> bool:
2168
+ success = True
2169
+ if t == None:
2170
+ t = state.SearchType.All
2171
+
2172
+ if t is not None and t in [type.value for type in state.SearchType]:
2173
+ t = state.SearchType(t)
2174
+
2175
+ if t is not None and not t.value in [type.value for type in state.SearchType]:
2176
+ logger.warning(f"🚨 Invalid search type: {t}")
2177
+ return False
2178
+
2179
+ search_type = t.value if t else None
2180
+
2181
+ no_documents = all([not files.get(file_type) for file_type in files])
2182
+
2183
+ if files is None:
2184
+ logger.warning(f"🚨 No files to process for {search_type} search.")
2185
+ return True
2186
+
2187
+ try:
2188
+ # Initialize Org Notes Search
2189
+ if (search_type == state.SearchType.All.value or search_type == state.SearchType.Org.value) and files["org"]:
2190
+ logger.info("🦄 Setting up search for orgmode notes")
2191
+ # Extract Entries, Generate Notes Embeddings
2192
+ text_search.setup(
2193
+ OrgToEntries,
2194
+ files.get("org"),
2195
+ regenerate=regenerate,
2196
+ user=user,
2197
+ )
2198
+ except Exception as e:
2199
+ logger.error(f"🚨 Failed to setup org: {e}", exc_info=True)
2200
+ success = False
2201
+
2202
+ try:
2203
+ # Initialize Markdown Search
2204
+ if (search_type == state.SearchType.All.value or search_type == state.SearchType.Markdown.value) and files[
2205
+ "markdown"
2206
+ ]:
2207
+ logger.info("💎 Setting up search for markdown notes")
2208
+ # Extract Entries, Generate Markdown Embeddings
2209
+ text_search.setup(
2210
+ MarkdownToEntries,
2211
+ files.get("markdown"),
2212
+ regenerate=regenerate,
2213
+ user=user,
2214
+ )
2215
+
2216
+ except Exception as e:
2217
+ logger.error(f"🚨 Failed to setup markdown: {e}", exc_info=True)
2218
+ success = False
2219
+
2220
+ try:
2221
+ # Initialize PDF Search
2222
+ if (search_type == state.SearchType.All.value or search_type == state.SearchType.Pdf.value) and files["pdf"]:
2223
+ logger.info("🖨️ Setting up search for pdf")
2224
+ # Extract Entries, Generate PDF Embeddings
2225
+ text_search.setup(
2226
+ PdfToEntries,
2227
+ files.get("pdf"),
2228
+ regenerate=regenerate,
2229
+ user=user,
2230
+ )
2231
+
2232
+ except Exception as e:
2233
+ logger.error(f"🚨 Failed to setup PDF: {e}", exc_info=True)
2234
+ success = False
2235
+
2236
+ try:
2237
+ # Initialize Plaintext Search
2238
+ if (search_type == state.SearchType.All.value or search_type == state.SearchType.Plaintext.value) and files[
2239
+ "plaintext"
2240
+ ]:
2241
+ logger.info("📄 Setting up search for plaintext")
2242
+ # Extract Entries, Generate Plaintext Embeddings
2243
+ text_search.setup(
2244
+ PlaintextToEntries,
2245
+ files.get("plaintext"),
2246
+ regenerate=regenerate,
2247
+ user=user,
2248
+ )
2249
+
2250
+ except Exception as e:
2251
+ logger.error(f"🚨 Failed to setup plaintext: {e}", exc_info=True)
2252
+ success = False
2253
+
2254
+ try:
2255
+ if no_documents:
2256
+ github_config = GithubConfig.objects.filter(user=user).prefetch_related("githubrepoconfig").first()
2257
+ if (
2258
+ search_type == state.SearchType.All.value or search_type == state.SearchType.Github.value
2259
+ ) and github_config is not None:
2260
+ logger.info("🐙 Setting up search for github")
2261
+ # Extract Entries, Generate Github Embeddings
2262
+ text_search.setup(
2263
+ GithubToEntries,
2264
+ None,
2265
+ regenerate=regenerate,
2266
+ user=user,
2267
+ config=github_config,
2268
+ )
2269
+
2270
+ except Exception as e:
2271
+ logger.error(f"🚨 Failed to setup GitHub: {e}", exc_info=True)
2272
+ success = False
2273
+
2274
+ try:
2275
+ if no_documents:
2276
+ # Initialize Notion Search
2277
+ notion_config = NotionConfig.objects.filter(user=user).first()
2278
+ if (
2279
+ search_type == state.SearchType.All.value or search_type == state.SearchType.Notion.value
2280
+ ) and notion_config:
2281
+ logger.info("🔌 Setting up search for notion")
2282
+ text_search.setup(
2283
+ NotionToEntries,
2284
+ None,
2285
+ regenerate=regenerate,
2286
+ user=user,
2287
+ config=notion_config,
2288
+ )
2289
+
2290
+ except Exception as e:
2291
+ logger.error(f"🚨 Failed to setup Notion: {e}", exc_info=True)
2292
+ success = False
2293
+
2294
+ try:
2295
+ # Initialize Image Search
2296
+ if (search_type == state.SearchType.All.value or search_type == state.SearchType.Image.value) and files[
2297
+ "image"
2298
+ ]:
2299
+ logger.info("🖼️ Setting up search for images")
2300
+ # Extract Entries, Generate Image Embeddings
2301
+ text_search.setup(
2302
+ ImageToEntries,
2303
+ files.get("image"),
2304
+ regenerate=regenerate,
2305
+ user=user,
2306
+ )
2307
+ except Exception as e:
2308
+ logger.error(f"🚨 Failed to setup images: {e}", exc_info=True)
2309
+ success = False
2310
+ try:
2311
+ if (search_type == state.SearchType.All.value or search_type == state.SearchType.Docx.value) and files["docx"]:
2312
+ logger.info("📄 Setting up search for docx")
2313
+ text_search.setup(
2314
+ DocxToEntries,
2315
+ files.get("docx"),
2316
+ regenerate=regenerate,
2317
+ user=user,
2318
+ )
2319
+ except Exception as e:
2320
+ logger.error(f"🚨 Failed to setup docx: {e}", exc_info=True)
2321
+ success = False
2322
+
2323
+ # Invalidate Query Cache
2324
+ if user:
2325
+ state.query_cache[user.uuid] = LRU()
2326
+
2327
+ return success
2328
+
2329
+
2330
+ def get_notion_auth_url(user: KhojUser):
2331
+ if not NOTION_OAUTH_CLIENT_ID or not NOTION_OAUTH_CLIENT_SECRET or not NOTION_REDIRECT_URI:
2332
+ return None
2333
+ return f"https://api.notion.com/v1/oauth/authorize?client_id={NOTION_OAUTH_CLIENT_ID}&redirect_uri={NOTION_REDIRECT_URI}&response_type=code&state={user.uuid}"