khoj 1.33.3.dev32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. khoj/__init__.py +0 -0
  2. khoj/app/README.md +94 -0
  3. khoj/app/__init__.py +0 -0
  4. khoj/app/asgi.py +16 -0
  5. khoj/app/settings.py +218 -0
  6. khoj/app/urls.py +25 -0
  7. khoj/configure.py +452 -0
  8. khoj/database/__init__.py +0 -0
  9. khoj/database/adapters/__init__.py +1821 -0
  10. khoj/database/admin.py +417 -0
  11. khoj/database/apps.py +6 -0
  12. khoj/database/management/__init__.py +0 -0
  13. khoj/database/management/commands/__init__.py +0 -0
  14. khoj/database/management/commands/change_default_model.py +116 -0
  15. khoj/database/management/commands/change_generated_images_url.py +61 -0
  16. khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
  17. khoj/database/migrations/0001_khojuser.py +98 -0
  18. khoj/database/migrations/0002_googleuser.py +32 -0
  19. khoj/database/migrations/0003_vector_extension.py +10 -0
  20. khoj/database/migrations/0004_content_types_and_more.py +181 -0
  21. khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
  22. khoj/database/migrations/0006_embeddingsdates.py +33 -0
  23. khoj/database/migrations/0007_add_conversation.py +27 -0
  24. khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
  25. khoj/database/migrations/0009_khojapiuser.py +24 -0
  26. khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
  27. khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
  28. khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
  29. khoj/database/migrations/0012_entry_file_source.py +21 -0
  30. khoj/database/migrations/0013_subscription.py +37 -0
  31. khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
  32. khoj/database/migrations/0015_alter_subscription_user.py +21 -0
  33. khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
  34. khoj/database/migrations/0017_searchmodel.py +32 -0
  35. khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
  36. khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
  37. khoj/database/migrations/0020_reflectivequestion.py +36 -0
  38. khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
  39. khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
  40. khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
  41. khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
  42. khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
  43. khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
  44. khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
  45. khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
  46. khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
  47. khoj/database/migrations/0029_userrequests.py +27 -0
  48. khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
  49. khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
  50. khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
  51. khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
  52. khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
  53. khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
  54. khoj/database/migrations/0035_processlock.py +26 -0
  55. khoj/database/migrations/0036_alter_processlock_name.py +19 -0
  56. khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
  57. khoj/database/migrations/0036_publicconversation.py +42 -0
  58. khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
  59. khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
  60. khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
  61. khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
  62. khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
  63. khoj/database/migrations/0040_alter_processlock_name.py +26 -0
  64. khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
  65. khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
  66. khoj/database/migrations/0042_serverchatsettings.py +46 -0
  67. khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
  68. khoj/database/migrations/0044_conversation_file_filters.py +17 -0
  69. khoj/database/migrations/0045_fileobject.py +37 -0
  70. khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
  71. khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
  72. khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
  73. khoj/database/migrations/0049_datastore.py +38 -0
  74. khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
  75. khoj/database/migrations/0050_alter_processlock_name.py +25 -0
  76. khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
  77. khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
  78. khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
  79. khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
  80. khoj/database/migrations/0055_alter_agent_style_icon.py +37 -0
  81. khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
  82. khoj/database/migrations/0056_searchmodelconfig_cross_encoder_model_config.py +17 -0
  83. khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
  84. khoj/database/migrations/0057_remove_serverchatsettings_default_model_and_more.py +51 -0
  85. khoj/database/migrations/0058_alter_chatmodeloptions_chat_model.py +17 -0
  86. khoj/database/migrations/0059_searchmodelconfig_bi_encoder_confidence_threshold.py +17 -0
  87. khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
  88. khoj/database/migrations/0061_alter_chatmodeloptions_model_type.py +26 -0
  89. khoj/database/migrations/0061_alter_texttoimagemodelconfig_model_type.py +21 -0
  90. khoj/database/migrations/0062_merge_20240913_0222.py +14 -0
  91. khoj/database/migrations/0063_conversation_temp_id.py +36 -0
  92. khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +86 -0
  93. khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
  94. khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
  95. khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
  96. khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
  97. khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
  98. khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
  99. khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
  100. khoj/database/migrations/0072_entry_search_model.py +24 -0
  101. khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
  102. khoj/database/migrations/0074_alter_conversation_title.py +17 -0
  103. khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +85 -0
  104. khoj/database/migrations/0076_rename_openaiprocessorconversationconfig_aimodelapi_and_more.py +26 -0
  105. khoj/database/migrations/0077_chatmodel_alter_agent_chat_model_and_more.py +62 -0
  106. khoj/database/migrations/0078_khojuser_email_verification_code_expiry.py +17 -0
  107. khoj/database/migrations/__init__.py +0 -0
  108. khoj/database/models/__init__.py +725 -0
  109. khoj/database/tests.py +3 -0
  110. khoj/interface/compiled/404/index.html +1 -0
  111. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_buildManifest.js +1 -0
  112. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_ssgManifest.js +1 -0
  113. khoj/interface/compiled/_next/static/chunks/1010-8f39bb4648b5ba10.js +1 -0
  114. khoj/interface/compiled/_next/static/chunks/182-f1c48a203dc91e0e.js +20 -0
  115. khoj/interface/compiled/_next/static/chunks/1915-d3c36ad6ce697ce7.js +1 -0
  116. khoj/interface/compiled/_next/static/chunks/2117-165ef4747a5b836b.js +2 -0
  117. khoj/interface/compiled/_next/static/chunks/2581-455000f8aeb08fc3.js +1 -0
  118. khoj/interface/compiled/_next/static/chunks/3727.dcea8f2193111552.js +1 -0
  119. khoj/interface/compiled/_next/static/chunks/3789-a09e37a819171a9d.js +1 -0
  120. khoj/interface/compiled/_next/static/chunks/4124-6c28322ce218d2d5.js +1 -0
  121. khoj/interface/compiled/_next/static/chunks/5427-b52d95253e692bfa.js +1 -0
  122. khoj/interface/compiled/_next/static/chunks/5473-b1cf56dedac6577a.js +1 -0
  123. khoj/interface/compiled/_next/static/chunks/5477-0bbddb79c25a54a7.js +1 -0
  124. khoj/interface/compiled/_next/static/chunks/6065-64db9ad305ba0bcd.js +1 -0
  125. khoj/interface/compiled/_next/static/chunks/6293-469dd16402ea8a6f.js +3 -0
  126. khoj/interface/compiled/_next/static/chunks/688-b5b4391bbc0376f1.js +1 -0
  127. khoj/interface/compiled/_next/static/chunks/8667-b6bf63c72b2d76eb.js +1 -0
  128. khoj/interface/compiled/_next/static/chunks/9259-1172dbaca0515237.js +1 -0
  129. khoj/interface/compiled/_next/static/chunks/94ca1967.1d9b42d929a1ee8c.js +1 -0
  130. khoj/interface/compiled/_next/static/chunks/9597.83583248dfbf6e73.js +1 -0
  131. khoj/interface/compiled/_next/static/chunks/964ecbae.51d6faf8801d15e6.js +1 -0
  132. khoj/interface/compiled/_next/static/chunks/9665-391df1e5c51c960a.js +1 -0
  133. khoj/interface/compiled/_next/static/chunks/app/_not-found/page-a834eddae3e235df.js +1 -0
  134. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
  135. khoj/interface/compiled/_next/static/chunks/app/agents/page-28ce086a1129bca2.js +1 -0
  136. khoj/interface/compiled/_next/static/chunks/app/automations/layout-1fe1537449f43496.js +1 -0
  137. khoj/interface/compiled/_next/static/chunks/app/automations/page-bf365a60829d347f.js +1 -0
  138. khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
  139. khoj/interface/compiled/_next/static/chunks/app/chat/page-0e476e57eb2015e3.js +1 -0
  140. khoj/interface/compiled/_next/static/chunks/app/layout-30e7fda7262713ce.js +1 -0
  141. khoj/interface/compiled/_next/static/chunks/app/page-a5515ea71aec5ef0.js +1 -0
  142. khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
  143. khoj/interface/compiled/_next/static/chunks/app/search/page-9140541e67ea307d.js +1 -0
  144. khoj/interface/compiled/_next/static/chunks/app/settings/layout-d09d6510a45cd4bd.js +1 -0
  145. khoj/interface/compiled/_next/static/chunks/app/settings/page-951ba40b5b94b23a.js +1 -0
  146. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
  147. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1beb80d8d741c932.js +1 -0
  148. khoj/interface/compiled/_next/static/chunks/d3ac728e-44ebd2a0c99b12a0.js +1 -0
  149. khoj/interface/compiled/_next/static/chunks/fd9d1056-4482b99a36fd1673.js +1 -0
  150. khoj/interface/compiled/_next/static/chunks/framework-8e0e0f4a6b83a956.js +1 -0
  151. khoj/interface/compiled/_next/static/chunks/main-app-de1f09df97a3cfc7.js +1 -0
  152. khoj/interface/compiled/_next/static/chunks/main-db4bfac6b0a8d00b.js +1 -0
  153. khoj/interface/compiled/_next/static/chunks/pages/_app-3c9ca398d360b709.js +1 -0
  154. khoj/interface/compiled/_next/static/chunks/pages/_error-cf5ca766ac8f493f.js +1 -0
  155. khoj/interface/compiled/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
  156. khoj/interface/compiled/_next/static/chunks/webpack-a03962458328b163.js +1 -0
  157. khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +1 -0
  158. khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
  159. khoj/interface/compiled/_next/static/css/4e4e6a4a1c920d06.css +1 -0
  160. khoj/interface/compiled/_next/static/css/8d02837c730f8d13.css +25 -0
  161. khoj/interface/compiled/_next/static/css/8e6a3ca11a60b189.css +1 -0
  162. khoj/interface/compiled/_next/static/css/9c164d9727dd8092.css +1 -0
  163. khoj/interface/compiled/_next/static/css/dac88c17aaee5fcf.css +1 -0
  164. khoj/interface/compiled/_next/static/css/df4b47a2d0d85eae.css +1 -0
  165. khoj/interface/compiled/_next/static/css/e4eb883b5265d372.css +1 -0
  166. khoj/interface/compiled/_next/static/media/1d8a05b60287ae6c-s.p.woff2 +0 -0
  167. khoj/interface/compiled/_next/static/media/6f22fce21a7c433c-s.woff2 +0 -0
  168. khoj/interface/compiled/_next/static/media/77c207b095007c34-s.p.woff2 +0 -0
  169. khoj/interface/compiled/_next/static/media/82ef96de0e8f4d8c-s.p.woff2 +0 -0
  170. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.1608a09b.woff +0 -0
  171. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.4aafdb68.ttf +0 -0
  172. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.a79f1c31.woff2 +0 -0
  173. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.b6770918.woff +0 -0
  174. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.cce5b8ec.ttf +0 -0
  175. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.ec17d132.woff2 +0 -0
  176. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.07ef19e7.ttf +0 -0
  177. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.55fac258.woff2 +0 -0
  178. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.dad44a7f.woff +0 -0
  179. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.9f256b85.woff +0 -0
  180. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.b18f59e1.ttf +0 -0
  181. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.d42a5579.woff2 +0 -0
  182. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.7c187121.woff +0 -0
  183. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.d3c882a6.woff2 +0 -0
  184. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.ed38e79f.ttf +0 -0
  185. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.b74a1a8b.ttf +0 -0
  186. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.c3fb5ac2.woff2 +0 -0
  187. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.d181c465.woff +0 -0
  188. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.6f2bb1df.woff2 +0 -0
  189. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.70d8b0a5.ttf +0 -0
  190. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.e3f82f9d.woff +0 -0
  191. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.47373d1e.ttf +0 -0
  192. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.8916142b.woff2 +0 -0
  193. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.9024d815.woff +0 -0
  194. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.0462f03b.woff2 +0 -0
  195. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.7f51fe03.woff +0 -0
  196. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.b7f8fe9b.ttf +0 -0
  197. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.572d331f.woff2 +0 -0
  198. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.a879cf83.ttf +0 -0
  199. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.f1035d8d.woff +0 -0
  200. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.5295ba48.woff +0 -0
  201. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.939bc644.ttf +0 -0
  202. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.f28c23ac.woff2 +0 -0
  203. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.8c5b5494.woff2 +0 -0
  204. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.94e1e8dc.ttf +0 -0
  205. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.bf59d231.woff +0 -0
  206. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.3b1e59b3.woff2 +0 -0
  207. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.7c9bc82b.woff +0 -0
  208. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.b4c20c84.ttf +0 -0
  209. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.74048478.woff +0 -0
  210. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.ba21ed5f.woff2 +0 -0
  211. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.d4d7ba48.ttf +0 -0
  212. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.03e9641d.woff2 +0 -0
  213. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.07505710.woff +0 -0
  214. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.fe9cbbe1.ttf +0 -0
  215. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.e1e279cb.woff +0 -0
  216. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.eae34984.woff2 +0 -0
  217. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.fabc004a.ttf +0 -0
  218. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.57727022.woff +0 -0
  219. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.5916a24f.woff2 +0 -0
  220. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.d6b476ec.ttf +0 -0
  221. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.9acaf01c.woff +0 -0
  222. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.a144ef58.ttf +0 -0
  223. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.b4230e7e.woff2 +0 -0
  224. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.10d95fd3.woff2 +0 -0
  225. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.7a996c9d.woff +0 -0
  226. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.fbccdabe.ttf +0 -0
  227. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.6258592b.woff +0 -0
  228. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.a8709e36.woff2 +0 -0
  229. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.d97aaf4a.ttf +0 -0
  230. khoj/interface/compiled/_next/static/media/a6ecd16fa044d500-s.p.woff2 +0 -0
  231. khoj/interface/compiled/_next/static/media/bd82c78e5b7b3fe9-s.p.woff2 +0 -0
  232. khoj/interface/compiled/_next/static/media/c32c8052c071fc42-s.woff2 +0 -0
  233. khoj/interface/compiled/_next/static/media/c4250770ab8708b6-s.p.woff2 +0 -0
  234. khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
  235. khoj/interface/compiled/_next/static/media/flags.3afdda2f.webp +0 -0
  236. khoj/interface/compiled/_next/static/media/flags@2x.5fbe9fc1.webp +0 -0
  237. khoj/interface/compiled/_next/static/media/globe.98e105ca.webp +0 -0
  238. khoj/interface/compiled/_next/static/media/globe@2x.974df6f8.webp +0 -0
  239. khoj/interface/compiled/agents/index.html +1 -0
  240. khoj/interface/compiled/agents/index.txt +7 -0
  241. khoj/interface/compiled/agents.svg +6 -0
  242. khoj/interface/compiled/assets/icons/khoj_lantern.ico +0 -0
  243. khoj/interface/compiled/assets/icons/khoj_lantern.svg +100 -0
  244. khoj/interface/compiled/assets/icons/khoj_lantern_1200x1200.png +0 -0
  245. khoj/interface/compiled/assets/icons/khoj_lantern_128x128.png +0 -0
  246. khoj/interface/compiled/assets/icons/khoj_lantern_128x128_dark.png +0 -0
  247. khoj/interface/compiled/assets/icons/khoj_lantern_256x256.png +0 -0
  248. khoj/interface/compiled/assets/icons/khoj_lantern_512x512.png +0 -0
  249. khoj/interface/compiled/assets/icons/khoj_lantern_logomarktype_1200x630.png +0 -0
  250. khoj/interface/compiled/assets/samples/desktop-browse-draw-sample.png +0 -0
  251. khoj/interface/compiled/assets/samples/desktop-plain-chat-sample.png +0 -0
  252. khoj/interface/compiled/assets/samples/desktop-remember-plan-sample.png +0 -0
  253. khoj/interface/compiled/assets/samples/phone-browse-draw-sample.png +0 -0
  254. khoj/interface/compiled/assets/samples/phone-plain-chat-sample.png +0 -0
  255. khoj/interface/compiled/assets/samples/phone-remember-plan-sample.png +0 -0
  256. khoj/interface/compiled/automation.svg +37 -0
  257. khoj/interface/compiled/automations/index.html +1 -0
  258. khoj/interface/compiled/automations/index.txt +8 -0
  259. khoj/interface/compiled/chat/index.html +1 -0
  260. khoj/interface/compiled/chat/index.txt +7 -0
  261. khoj/interface/compiled/chat.svg +24 -0
  262. khoj/interface/compiled/close.svg +5 -0
  263. khoj/interface/compiled/copy-button-success.svg +6 -0
  264. khoj/interface/compiled/copy-button.svg +5 -0
  265. khoj/interface/compiled/index.html +1 -0
  266. khoj/interface/compiled/index.txt +7 -0
  267. khoj/interface/compiled/khoj.webmanifest +76 -0
  268. khoj/interface/compiled/logo.svg +24 -0
  269. khoj/interface/compiled/search/index.html +1 -0
  270. khoj/interface/compiled/search/index.txt +7 -0
  271. khoj/interface/compiled/send.svg +1 -0
  272. khoj/interface/compiled/settings/index.html +1 -0
  273. khoj/interface/compiled/settings/index.txt +9 -0
  274. khoj/interface/compiled/share/chat/index.html +1 -0
  275. khoj/interface/compiled/share/chat/index.txt +7 -0
  276. khoj/interface/compiled/share.svg +8 -0
  277. khoj/interface/compiled/thumbs-down.svg +6 -0
  278. khoj/interface/compiled/thumbs-up.svg +6 -0
  279. khoj/interface/email/feedback.html +34 -0
  280. khoj/interface/email/magic_link.html +40 -0
  281. khoj/interface/email/task.html +37 -0
  282. khoj/interface/email/welcome.html +90 -0
  283. khoj/interface/web/.well-known/assetlinks.json +11 -0
  284. khoj/interface/web/assets/icons/agents.svg +19 -0
  285. khoj/interface/web/assets/icons/automation.svg +43 -0
  286. khoj/interface/web/assets/icons/chat.svg +24 -0
  287. khoj/interface/web/assets/icons/github.svg +1 -0
  288. khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
  289. khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
  290. khoj/interface/web/assets/icons/khoj-logo-sideways.svg +32 -0
  291. khoj/interface/web/assets/icons/khoj.svg +26 -0
  292. khoj/interface/web/assets/icons/logotype.svg +1 -0
  293. khoj/interface/web/assets/icons/search.svg +57 -0
  294. khoj/interface/web/assets/icons/sync.svg +4 -0
  295. khoj/interface/web/assets/khoj.css +237 -0
  296. khoj/interface/web/assets/utils.js +33 -0
  297. khoj/interface/web/base_config.html +445 -0
  298. khoj/interface/web/content_source_github_input.html +208 -0
  299. khoj/interface/web/login.html +310 -0
  300. khoj/interface/web/utils.html +48 -0
  301. khoj/main.py +249 -0
  302. khoj/manage.py +22 -0
  303. khoj/migrations/__init__.py +0 -0
  304. khoj/migrations/migrate_offline_chat_default_model.py +69 -0
  305. khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
  306. khoj/migrations/migrate_offline_chat_schema.py +83 -0
  307. khoj/migrations/migrate_offline_model.py +29 -0
  308. khoj/migrations/migrate_processor_config_openai.py +67 -0
  309. khoj/migrations/migrate_server_pg.py +132 -0
  310. khoj/migrations/migrate_version.py +17 -0
  311. khoj/processor/__init__.py +0 -0
  312. khoj/processor/content/__init__.py +0 -0
  313. khoj/processor/content/docx/__init__.py +0 -0
  314. khoj/processor/content/docx/docx_to_entries.py +111 -0
  315. khoj/processor/content/github/__init__.py +0 -0
  316. khoj/processor/content/github/github_to_entries.py +226 -0
  317. khoj/processor/content/images/__init__.py +0 -0
  318. khoj/processor/content/images/image_to_entries.py +117 -0
  319. khoj/processor/content/markdown/__init__.py +0 -0
  320. khoj/processor/content/markdown/markdown_to_entries.py +160 -0
  321. khoj/processor/content/notion/notion_to_entries.py +259 -0
  322. khoj/processor/content/org_mode/__init__.py +0 -0
  323. khoj/processor/content/org_mode/org_to_entries.py +226 -0
  324. khoj/processor/content/org_mode/orgnode.py +532 -0
  325. khoj/processor/content/pdf/__init__.py +0 -0
  326. khoj/processor/content/pdf/pdf_to_entries.py +119 -0
  327. khoj/processor/content/plaintext/__init__.py +0 -0
  328. khoj/processor/content/plaintext/plaintext_to_entries.py +117 -0
  329. khoj/processor/content/text_to_entries.py +296 -0
  330. khoj/processor/conversation/__init__.py +0 -0
  331. khoj/processor/conversation/anthropic/__init__.py +0 -0
  332. khoj/processor/conversation/anthropic/anthropic_chat.py +243 -0
  333. khoj/processor/conversation/anthropic/utils.py +217 -0
  334. khoj/processor/conversation/google/__init__.py +0 -0
  335. khoj/processor/conversation/google/gemini_chat.py +253 -0
  336. khoj/processor/conversation/google/utils.py +260 -0
  337. khoj/processor/conversation/offline/__init__.py +0 -0
  338. khoj/processor/conversation/offline/chat_model.py +308 -0
  339. khoj/processor/conversation/offline/utils.py +80 -0
  340. khoj/processor/conversation/offline/whisper.py +15 -0
  341. khoj/processor/conversation/openai/__init__.py +0 -0
  342. khoj/processor/conversation/openai/gpt.py +243 -0
  343. khoj/processor/conversation/openai/utils.py +232 -0
  344. khoj/processor/conversation/openai/whisper.py +13 -0
  345. khoj/processor/conversation/prompts.py +1188 -0
  346. khoj/processor/conversation/utils.py +867 -0
  347. khoj/processor/embeddings.py +122 -0
  348. khoj/processor/image/generate.py +215 -0
  349. khoj/processor/speech/__init__.py +0 -0
  350. khoj/processor/speech/text_to_speech.py +51 -0
  351. khoj/processor/tools/__init__.py +0 -0
  352. khoj/processor/tools/online_search.py +472 -0
  353. khoj/processor/tools/run_code.py +179 -0
  354. khoj/routers/__init__.py +0 -0
  355. khoj/routers/api.py +760 -0
  356. khoj/routers/api_agents.py +295 -0
  357. khoj/routers/api_chat.py +1273 -0
  358. khoj/routers/api_content.py +634 -0
  359. khoj/routers/api_model.py +123 -0
  360. khoj/routers/api_phone.py +86 -0
  361. khoj/routers/api_subscription.py +144 -0
  362. khoj/routers/auth.py +307 -0
  363. khoj/routers/email.py +135 -0
  364. khoj/routers/helpers.py +2333 -0
  365. khoj/routers/notion.py +85 -0
  366. khoj/routers/research.py +364 -0
  367. khoj/routers/storage.py +63 -0
  368. khoj/routers/twilio.py +36 -0
  369. khoj/routers/web_client.py +141 -0
  370. khoj/search_filter/__init__.py +0 -0
  371. khoj/search_filter/base_filter.py +15 -0
  372. khoj/search_filter/date_filter.py +215 -0
  373. khoj/search_filter/file_filter.py +32 -0
  374. khoj/search_filter/word_filter.py +29 -0
  375. khoj/search_type/__init__.py +0 -0
  376. khoj/search_type/text_search.py +255 -0
  377. khoj/utils/__init__.py +0 -0
  378. khoj/utils/cli.py +101 -0
  379. khoj/utils/config.py +81 -0
  380. khoj/utils/constants.py +51 -0
  381. khoj/utils/fs_syncer.py +252 -0
  382. khoj/utils/helpers.py +627 -0
  383. khoj/utils/initialization.py +301 -0
  384. khoj/utils/jsonl.py +43 -0
  385. khoj/utils/models.py +47 -0
  386. khoj/utils/rawconfig.py +208 -0
  387. khoj/utils/state.py +48 -0
  388. khoj/utils/yaml.py +47 -0
  389. khoj-1.33.3.dev32.dist-info/METADATA +190 -0
  390. khoj-1.33.3.dev32.dist-info/RECORD +393 -0
  391. khoj-1.33.3.dev32.dist-info/WHEEL +4 -0
  392. khoj-1.33.3.dev32.dist-info/entry_points.txt +2 -0
  393. khoj-1.33.3.dev32.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,117 @@
1
+ import logging
2
+ import re
3
+ from pathlib import Path
4
+ from typing import Dict, List, Tuple
5
+
6
+ import urllib3
7
+ from bs4 import BeautifulSoup
8
+
9
+ from khoj.database.models import Entry as DbEntry
10
+ from khoj.database.models import KhojUser
11
+ from khoj.processor.content.text_to_entries import TextToEntries
12
+ from khoj.utils.helpers import timer
13
+ from khoj.utils.rawconfig import Entry
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class PlaintextToEntries(TextToEntries):
19
+ def __init__(self):
20
+ super().__init__()
21
+
22
+ # Define Functions
23
+ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
24
+ deletion_file_names = set([file for file in files if files[file] == ""])
25
+ files_to_process = set(files) - deletion_file_names
26
+ files = {file: files[file] for file in files_to_process}
27
+
28
+ # Extract Entries from specified plaintext files
29
+ with timer("Extract entries from specified Plaintext files", logger):
30
+ file_to_text_map, current_entries = PlaintextToEntries.extract_plaintext_entries(files)
31
+
32
+ # Split entries by max tokens supported by model
33
+ with timer("Split entries by max token size supported by model", logger):
34
+ current_entries = self.split_entries_by_max_tokens(current_entries, max_tokens=256, raw_is_compiled=True)
35
+
36
+ # Identify, mark and merge any new entries with previous entries
37
+ with timer("Identify new or updated entries", logger):
38
+ num_new_embeddings, num_deleted_embeddings = self.update_embeddings(
39
+ user,
40
+ current_entries,
41
+ DbEntry.EntryType.PLAINTEXT,
42
+ DbEntry.EntrySource.COMPUTER,
43
+ key="compiled",
44
+ logger=logger,
45
+ deletion_filenames=deletion_file_names,
46
+ regenerate=regenerate,
47
+ file_to_text_map=file_to_text_map,
48
+ )
49
+
50
+ return num_new_embeddings, num_deleted_embeddings
51
+
52
+ @staticmethod
53
+ def extract_html_content(markup_content: str, markup_type: str):
54
+ "Extract content from HTML"
55
+ if markup_type == "xml":
56
+ soup = BeautifulSoup(markup_content, "xml")
57
+ else:
58
+ soup = BeautifulSoup(markup_content, "html.parser")
59
+ return soup.get_text(strip=True, separator="\n")
60
+
61
+ @staticmethod
62
+ def extract_plaintext_entries(text_files: Dict[str, str]) -> Tuple[Dict, List[Entry]]:
63
+ entries: List[str] = []
64
+ entry_to_file_map: List[Tuple[str, str]] = []
65
+ file_to_text_map = dict()
66
+ for text_file in text_files:
67
+ try:
68
+ text_content = text_files[text_file]
69
+ entries, entry_to_file_map = PlaintextToEntries.process_single_plaintext_file(
70
+ text_content, text_file, entries, entry_to_file_map
71
+ )
72
+ file_to_text_map[text_file] = text_content
73
+ except Exception as e:
74
+ logger.warning(f"Unable to read file: {text_file} as plaintext. Skipping file.")
75
+ logger.warning(e, exc_info=True)
76
+
77
+ # Extract Entries from specified plaintext files
78
+ return file_to_text_map, PlaintextToEntries.convert_text_files_to_entries(entries, dict(entry_to_file_map))
79
+
80
+ @staticmethod
81
+ def process_single_plaintext_file(
82
+ text_content: str,
83
+ text_file: str,
84
+ entries: List[str],
85
+ entry_to_file_map: List[Tuple[str, str]],
86
+ ) -> Tuple[List[str], List[Tuple[str, str]]]:
87
+ if text_file.endswith(("html", "htm", "xml")):
88
+ text_content = PlaintextToEntries.extract_html_content(text_content, text_file.split(".")[-1])
89
+ entry_to_file_map += [(text_content, text_file)]
90
+ entries.extend([text_content])
91
+ return entries, entry_to_file_map
92
+
93
+ @staticmethod
94
+ def convert_text_files_to_entries(parsed_entries: List[str], entry_to_file_map: dict[str, str]) -> List[Entry]:
95
+ "Convert each plaintext file into an entry"
96
+ entries: List[Entry] = []
97
+ for parsed_entry in parsed_entries:
98
+ raw_filename = entry_to_file_map[parsed_entry]
99
+ # Check if raw_filename is a URL. If so, save it as is. If not, convert it to a Path.
100
+ if type(raw_filename) == str and re.search(r"^https?://", raw_filename):
101
+ # Escape the URL to avoid issues with special characters
102
+ entry_filename = urllib3.util.parse_url(raw_filename).url
103
+ else:
104
+ entry_filename = raw_filename
105
+
106
+ # Append base filename to compiled entry for context to model
107
+ entries.append(
108
+ Entry(
109
+ raw=parsed_entry,
110
+ file=f"{entry_filename}",
111
+ compiled=f"{entry_filename}\n{parsed_entry}",
112
+ heading=entry_filename,
113
+ )
114
+ )
115
+
116
+ logger.debug(f"Converted {len(parsed_entries)} plaintext files to entries")
117
+ return entries
@@ -0,0 +1,296 @@
1
+ import hashlib
2
+ import logging
3
+ import re
4
+ import uuid
5
+ from abc import ABC, abstractmethod
6
+ from itertools import repeat
7
+ from typing import Any, Callable, List, Set, Tuple
8
+
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from tqdm import tqdm
11
+
12
+ from khoj.database.adapters import (
13
+ EntryAdapters,
14
+ FileObjectAdapters,
15
+ get_default_search_model,
16
+ )
17
+ from khoj.database.models import Entry as DbEntry
18
+ from khoj.database.models import EntryDates, KhojUser
19
+ from khoj.search_filter.date_filter import DateFilter
20
+ from khoj.utils import state
21
+ from khoj.utils.helpers import batcher, is_none_or_empty, timer
22
+ from khoj.utils.rawconfig import Entry
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class TextToEntries(ABC):
28
+ def __init__(self, config: Any = None):
29
+ self.embeddings_model = state.embeddings_model
30
+ self.config = config
31
+ self.date_filter = DateFilter()
32
+
33
+ @abstractmethod
34
+ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
35
+ ...
36
+
37
+ @staticmethod
38
+ def hash_func(key: str) -> Callable:
39
+ return lambda entry: hashlib.md5(bytes(getattr(entry, key), encoding="utf-8")).hexdigest()
40
+
41
+ @staticmethod
42
+ def remove_long_words(text: str, max_word_length: int = 500) -> str:
43
+ "Remove words longer than max_word_length from text."
44
+ # Split the string by words, keeping the delimiters
45
+ splits = re.split(r"(\s+)", text) + [""]
46
+ words_with_delimiters = list(zip(splits[::2], splits[1::2]))
47
+
48
+ # Filter out long words while preserving delimiters in text
49
+ filtered_text = [
50
+ f"{word}{delimiter}"
51
+ for word, delimiter in words_with_delimiters
52
+ if not word.strip() or len(word.strip()) <= max_word_length
53
+ ]
54
+
55
+ return "".join(filtered_text)
56
+
57
+ @staticmethod
58
+ def tokenizer(text: str) -> List[str]:
59
+ "Tokenize text into words."
60
+ return text.split()
61
+
62
+ @staticmethod
63
+ def split_entries_by_max_tokens(
64
+ entries: List[Entry], max_tokens: int = 256, max_word_length: int = 500, raw_is_compiled: bool = False
65
+ ) -> List[Entry]:
66
+ "Split entries if compiled entry length exceeds the max tokens supported by the ML model."
67
+ chunked_entries: List[Entry] = []
68
+ for entry in entries:
69
+ if is_none_or_empty(entry.compiled):
70
+ continue
71
+
72
+ # Split entry into chunks of max_tokens
73
+ # Use chunking preference order: paragraphs > sentences > words > characters
74
+ text_splitter = RecursiveCharacterTextSplitter(
75
+ chunk_size=max_tokens,
76
+ separators=["\n\n", "\n", "!", "?", ".", " ", "\t", ""],
77
+ keep_separator=True,
78
+ length_function=lambda chunk: len(TextToEntries.tokenizer(chunk)),
79
+ chunk_overlap=0,
80
+ )
81
+ chunked_entry_chunks = text_splitter.split_text(entry.compiled)
82
+ corpus_id = uuid.uuid4()
83
+
84
+ # Create heading prefixed entry from each chunk
85
+ for chunk_index, compiled_entry_chunk in enumerate(chunked_entry_chunks):
86
+ # Prepend heading to all other chunks, the first chunk already has heading from original entry
87
+ if chunk_index > 0 and entry.heading:
88
+ # Snip heading to avoid crossing max_tokens limit
89
+ # Keep last 100 characters of heading as entry heading more important than filename
90
+ snipped_heading = entry.heading[-100:]
91
+ # Prepend snipped heading
92
+ compiled_entry_chunk = f"{snipped_heading}\n{compiled_entry_chunk}"
93
+
94
+ # Drop long words instead of having entry truncated to maintain quality of entry processed by models
95
+ compiled_entry_chunk = TextToEntries.remove_long_words(compiled_entry_chunk, max_word_length)
96
+
97
+ # Clean entry of unwanted characters like \0 character
98
+ compiled_entry_chunk = TextToEntries.clean_field(compiled_entry_chunk)
99
+ entry.raw = compiled_entry_chunk if raw_is_compiled else TextToEntries.clean_field(entry.raw)
100
+ entry.heading = TextToEntries.clean_field(entry.heading)
101
+ entry.file = TextToEntries.clean_field(entry.file)
102
+
103
+ chunked_entries.append(
104
+ Entry(
105
+ compiled=compiled_entry_chunk,
106
+ raw=entry.raw,
107
+ heading=entry.heading,
108
+ file=entry.file,
109
+ corpus_id=corpus_id,
110
+ )
111
+ )
112
+
113
+ return chunked_entries
114
+
115
+ def update_embeddings(
116
+ self,
117
+ user: KhojUser,
118
+ current_entries: List[Entry],
119
+ file_type: str,
120
+ file_source: str,
121
+ key="compiled",
122
+ logger: logging.Logger = None,
123
+ deletion_filenames: Set[str] = None,
124
+ regenerate: bool = False,
125
+ file_to_text_map: dict[str, str] = None,
126
+ ):
127
+ with timer("Constructed current entry hashes in", logger):
128
+ hashes_by_file = dict[str, set[str]]()
129
+ current_entry_hashes = list(map(TextToEntries.hash_func(key), current_entries))
130
+ hash_to_current_entries = dict(zip(current_entry_hashes, current_entries))
131
+ for entry in tqdm(current_entries, desc="Hashing Entries"):
132
+ hashes_by_file.setdefault(entry.file, set()).add(TextToEntries.hash_func(key)(entry))
133
+
134
+ num_deleted_entries = 0
135
+ if regenerate:
136
+ with timer("Cleared existing dataset for regeneration in", logger):
137
+ logger.debug(f"Deleting all entries for file type {file_type}")
138
+ num_deleted_entries = EntryAdapters.delete_all_entries(user, file_type=file_type)
139
+
140
+ hashes_to_process = set()
141
+ with timer("Identified entries to add to database in", logger):
142
+ for file in tqdm(hashes_by_file, desc="Identify new entries"):
143
+ hashes_for_file = hashes_by_file[file]
144
+ existing_entries = DbEntry.objects.filter(
145
+ user=user, hashed_value__in=hashes_for_file, file_type=file_type
146
+ )
147
+ existing_entry_hashes = set([entry.hashed_value for entry in existing_entries])
148
+ hashes_to_process |= hashes_for_file - existing_entry_hashes
149
+
150
+ embeddings = []
151
+ model = get_default_search_model()
152
+ with timer("Generated embeddings for entries to add to database in", logger):
153
+ entries_to_process = [hash_to_current_entries[hashed_val] for hashed_val in hashes_to_process]
154
+ data_to_embed = [getattr(entry, key) for entry in entries_to_process]
155
+ embeddings += self.embeddings_model[model.name].embed_documents(data_to_embed)
156
+
157
+ added_entries: list[DbEntry] = []
158
+ with timer("Added entries to database in", logger):
159
+ num_items = len(hashes_to_process)
160
+ assert num_items == len(embeddings)
161
+ batch_size = min(200, num_items)
162
+ entry_batches = zip(hashes_to_process, embeddings)
163
+
164
+ for entry_batch in tqdm(batcher(entry_batches, batch_size), desc="Add entries to database"):
165
+ batch_embeddings_to_create: List[DbEntry] = []
166
+ for entry_hash, new_entry in entry_batch:
167
+ entry = hash_to_current_entries[entry_hash]
168
+ batch_embeddings_to_create.append(
169
+ DbEntry(
170
+ user=user,
171
+ embeddings=new_entry,
172
+ raw=entry.raw,
173
+ compiled=entry.compiled,
174
+ heading=entry.heading[:1000], # Truncate to max chars of field allowed
175
+ file_path=entry.file,
176
+ file_source=file_source,
177
+ file_type=file_type,
178
+ hashed_value=entry_hash,
179
+ corpus_id=entry.corpus_id,
180
+ search_model=model,
181
+ )
182
+ )
183
+ try:
184
+ added_entries += DbEntry.objects.bulk_create(batch_embeddings_to_create)
185
+ except Exception as e:
186
+ batch_indexing_error = "\n\n".join(
187
+ f"file: {entry.file_path}\nheading: {entry.heading}\ncompiled: {entry.compiled[:100]}\nraw: {entry.raw[:100]}"
188
+ for entry in batch_embeddings_to_create
189
+ )
190
+ logger.error(f"Error adding entries to database:\n{batch_indexing_error}\n---\n{e}", exc_info=True)
191
+ logger.debug(f"Added {len(added_entries)} {file_type} entries to database")
192
+
193
+ if file_to_text_map:
194
+ with timer("Indexed text of modified file in", logger):
195
+ # get the set of modified files from added_entries
196
+ modified_files = {entry.file_path for entry in added_entries}
197
+ # create or update text of each updated file indexed on DB
198
+ for modified_file in modified_files:
199
+ raw_text = file_to_text_map[modified_file]
200
+ file_object = FileObjectAdapters.get_file_object_by_name(user, modified_file)
201
+ if file_object:
202
+ FileObjectAdapters.update_raw_text(file_object, raw_text)
203
+ else:
204
+ FileObjectAdapters.create_file_object(user, modified_file, raw_text)
205
+
206
+ new_dates = []
207
+ with timer("Indexed dates from added entries in", logger):
208
+ for added_entry in added_entries:
209
+ dates_in_entries = zip(self.date_filter.extract_dates(added_entry.compiled), repeat(added_entry))
210
+ dates_to_create = [
211
+ EntryDates(date=date, entry=added_entry)
212
+ for date, added_entry in dates_in_entries
213
+ if not is_none_or_empty(date)
214
+ ]
215
+ new_dates += EntryDates.objects.bulk_create(dates_to_create)
216
+ logger.debug(f"Indexed {len(new_dates)} dates from added {file_type} entries")
217
+
218
+ with timer("Deleted entries identified by server from database in", logger):
219
+ for file in hashes_by_file:
220
+ existing_entry_hashes = EntryAdapters.get_existing_entry_hashes_by_file(user, file)
221
+ to_delete_entry_hashes = set(existing_entry_hashes) - hashes_by_file[file]
222
+ num_deleted_entries += len(to_delete_entry_hashes)
223
+ EntryAdapters.delete_entry_by_hash(user, hashed_values=list(to_delete_entry_hashes))
224
+
225
+ with timer("Deleted entries requested by clients from database in", logger):
226
+ if deletion_filenames is not None:
227
+ for file_path in deletion_filenames:
228
+ deleted_count = EntryAdapters.delete_entry_by_file(user, file_path)
229
+ num_deleted_entries += deleted_count
230
+ FileObjectAdapters.delete_file_object_by_name(user, file_path)
231
+
232
+ return len(added_entries), num_deleted_entries
233
+
234
+ @staticmethod
235
+ def mark_entries_for_update(
236
+ current_entries: List[Entry],
237
+ previous_entries: List[Entry],
238
+ key="compiled",
239
+ logger: logging.Logger = None,
240
+ deletion_filenames: Set[str] = None,
241
+ ):
242
+ # Hash all current and previous entries to identify new entries
243
+ with timer("Hash previous, current entries", logger):
244
+ current_entry_hashes = list(map(TextToEntries.hash_func(key), current_entries))
245
+ previous_entry_hashes = list(map(TextToEntries.hash_func(key), previous_entries))
246
+ if deletion_filenames is not None:
247
+ deletion_entries = [entry for entry in previous_entries if entry.file in deletion_filenames]
248
+ deletion_entry_hashes = list(map(TextToEntries.hash_func(key), deletion_entries))
249
+ else:
250
+ deletion_entry_hashes = []
251
+
252
+ with timer("Identify, Mark, Combine new, existing entries", logger):
253
+ hash_to_current_entries = dict(zip(current_entry_hashes, current_entries))
254
+ hash_to_previous_entries = dict(zip(previous_entry_hashes, previous_entries))
255
+
256
+ # All entries that did not exist in the previous set are to be added
257
+ new_entry_hashes = set(current_entry_hashes) - set(previous_entry_hashes)
258
+ # All entries that exist in both current and previous sets are kept
259
+ existing_entry_hashes = set(current_entry_hashes) & set(previous_entry_hashes)
260
+ # All entries that exist in the previous set but not in the current set should be preserved
261
+ remaining_entry_hashes = set(previous_entry_hashes) - set(current_entry_hashes)
262
+ # All entries that exist in the previous set and also in the deletions set should be removed
263
+ to_delete_entry_hashes = set(previous_entry_hashes) & set(deletion_entry_hashes)
264
+
265
+ preserving_entry_hashes = existing_entry_hashes
266
+
267
+ if deletion_filenames is not None:
268
+ preserving_entry_hashes = (
269
+ (existing_entry_hashes | remaining_entry_hashes)
270
+ if len(deletion_entry_hashes) == 0
271
+ else (set(previous_entry_hashes) - to_delete_entry_hashes)
272
+ )
273
+
274
+ # load new entries in the order in which they are processed for a stable sort
275
+ new_entries = [
276
+ (current_entry_hashes.index(entry_hash), hash_to_current_entries[entry_hash])
277
+ for entry_hash in new_entry_hashes
278
+ ]
279
+ new_entries_sorted = sorted(new_entries, key=lambda e: e[0])
280
+ # Mark new entries with -1 id to flag for later embeddings generation
281
+ new_entries_sorted = [(-1, entry[1]) for entry in new_entries_sorted]
282
+
283
+ # Set id of existing entries to their previous ids to reuse their existing encoded embeddings
284
+ existing_entries = [
285
+ (previous_entry_hashes.index(entry_hash), hash_to_previous_entries[entry_hash])
286
+ for entry_hash in preserving_entry_hashes
287
+ ]
288
+ existing_entries_sorted = sorted(existing_entries, key=lambda e: e[0])
289
+
290
+ entries_with_ids = existing_entries_sorted + new_entries_sorted
291
+
292
+ return entries_with_ids
293
+
294
+ @staticmethod
295
+ def clean_field(field: str) -> str:
296
+ return field.replace("\0", "") if not is_none_or_empty(field) else ""
File without changes
File without changes
@@ -0,0 +1,243 @@
1
+ import logging
2
+ from datetime import datetime, timedelta
3
+ from typing import Dict, List, Optional
4
+
5
+ import pyjson5
6
+ from langchain.schema import ChatMessage
7
+
8
+ from khoj.database.models import Agent, ChatModel, KhojUser
9
+ from khoj.processor.conversation import prompts
10
+ from khoj.processor.conversation.anthropic.utils import (
11
+ anthropic_chat_completion_with_backoff,
12
+ anthropic_completion_with_backoff,
13
+ format_messages_for_anthropic,
14
+ )
15
+ from khoj.processor.conversation.utils import (
16
+ clean_json,
17
+ construct_structured_message,
18
+ generate_chatml_messages_with_context,
19
+ messages_to_print,
20
+ )
21
+ from khoj.utils.helpers import (
22
+ ConversationCommand,
23
+ is_none_or_empty,
24
+ truncate_code_context,
25
+ )
26
+ from khoj.utils.rawconfig import FileAttachment, LocationData
27
+ from khoj.utils.yaml import yaml_dump
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ def extract_questions_anthropic(
33
+ text,
34
+ model: Optional[str] = "claude-instant-1.2",
35
+ conversation_log={},
36
+ api_key=None,
37
+ temperature=0.7,
38
+ location_data: LocationData = None,
39
+ user: KhojUser = None,
40
+ query_images: Optional[list[str]] = None,
41
+ vision_enabled: bool = False,
42
+ personality_context: Optional[str] = None,
43
+ query_files: str = None,
44
+ tracer: dict = {},
45
+ ):
46
+ """
47
+ Infer search queries to retrieve relevant notes to answer user query
48
+ """
49
+ # Extract Past User Message and Inferred Questions from Conversation Log
50
+ location = f"{location_data}" if location_data else "Unknown"
51
+ username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""
52
+
53
+ # Extract Past User Message and Inferred Questions from Conversation Log
54
+ chat_history = "".join(
55
+ [
56
+ f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
57
+ for chat in conversation_log.get("chat", [])[-4:]
58
+ if chat["by"] == "khoj"
59
+ ]
60
+ )
61
+
62
+ # Get dates relative to today for prompt creation
63
+ today = datetime.today()
64
+ current_new_year = today.replace(month=1, day=1)
65
+ last_new_year = current_new_year.replace(year=today.year - 1)
66
+
67
+ system_prompt = prompts.extract_questions_anthropic_system_prompt.format(
68
+ current_date=today.strftime("%Y-%m-%d"),
69
+ day_of_week=today.strftime("%A"),
70
+ current_month=today.strftime("%Y-%m"),
71
+ last_new_year=last_new_year.strftime("%Y"),
72
+ last_new_year_date=last_new_year.strftime("%Y-%m-%d"),
73
+ current_new_year_date=current_new_year.strftime("%Y-%m-%d"),
74
+ yesterday_date=(today - timedelta(days=1)).strftime("%Y-%m-%d"),
75
+ location=location,
76
+ username=username,
77
+ personality_context=personality_context,
78
+ )
79
+
80
+ prompt = prompts.extract_questions_anthropic_user_message.format(
81
+ chat_history=chat_history,
82
+ text=text,
83
+ )
84
+
85
+ prompt = construct_structured_message(
86
+ message=prompt,
87
+ images=query_images,
88
+ model_type=ChatModel.ModelType.ANTHROPIC,
89
+ vision_enabled=vision_enabled,
90
+ attached_file_context=query_files,
91
+ )
92
+
93
+ messages = []
94
+
95
+ messages.append(ChatMessage(content=prompt, role="user"))
96
+
97
+ messages, system_prompt = format_messages_for_anthropic(messages, system_prompt)
98
+
99
+ response = anthropic_completion_with_backoff(
100
+ messages=messages,
101
+ system_prompt=system_prompt,
102
+ model_name=model,
103
+ temperature=temperature,
104
+ api_key=api_key,
105
+ response_type="json_object",
106
+ tracer=tracer,
107
+ )
108
+
109
+ # Extract, Clean Message from Claude's Response
110
+ try:
111
+ response = clean_json(response)
112
+ response = pyjson5.loads(response)
113
+ response = [q.strip() for q in response["queries"] if q.strip()]
114
+ if not isinstance(response, list) or not response:
115
+ logger.error(f"Invalid response for constructing subqueries: {response}")
116
+ return [text]
117
+ return response
118
+ except:
119
+ logger.warning(f"Claude returned invalid JSON. Falling back to using user message as search query.\n{response}")
120
+ questions = [text]
121
+ logger.debug(f"Extracted Questions by Claude: {questions}")
122
+ return questions
123
+
124
+
125
+ def anthropic_send_message_to_model(messages, api_key, model, response_type="text", tracer={}):
126
+ """
127
+ Send message to model
128
+ """
129
+ messages, system_prompt = format_messages_for_anthropic(messages)
130
+
131
+ # Get Response from GPT. Don't use response_type because Anthropic doesn't support it.
132
+ return anthropic_completion_with_backoff(
133
+ messages=messages,
134
+ system_prompt=system_prompt,
135
+ model_name=model,
136
+ api_key=api_key,
137
+ response_type=response_type,
138
+ tracer=tracer,
139
+ )
140
+
141
+
142
+ def converse_anthropic(
143
+ references,
144
+ user_query,
145
+ online_results: Optional[Dict[str, Dict]] = None,
146
+ code_results: Optional[Dict[str, Dict]] = None,
147
+ conversation_log={},
148
+ model: Optional[str] = "claude-3-5-sonnet-20241022",
149
+ api_key: Optional[str] = None,
150
+ completion_func=None,
151
+ conversation_commands=[ConversationCommand.Default],
152
+ max_prompt_size=None,
153
+ tokenizer_name=None,
154
+ location_data: LocationData = None,
155
+ user_name: str = None,
156
+ agent: Agent = None,
157
+ query_images: Optional[list[str]] = None,
158
+ vision_available: bool = False,
159
+ query_files: str = None,
160
+ generated_files: List[FileAttachment] = None,
161
+ program_execution_context: Optional[List[str]] = None,
162
+ generated_asset_results: Dict[str, Dict] = {},
163
+ tracer: dict = {},
164
+ ):
165
+ """
166
+ Converse with user using Anthropic's Claude
167
+ """
168
+ # Initialize Variables
169
+ current_date = datetime.now()
170
+
171
+ if agent and agent.personality:
172
+ system_prompt = prompts.custom_personality.format(
173
+ name=agent.name,
174
+ bio=agent.personality,
175
+ current_date=current_date.strftime("%Y-%m-%d"),
176
+ day_of_week=current_date.strftime("%A"),
177
+ )
178
+ else:
179
+ system_prompt = prompts.personality.format(
180
+ current_date=current_date.strftime("%Y-%m-%d"),
181
+ day_of_week=current_date.strftime("%A"),
182
+ )
183
+
184
+ if location_data:
185
+ location_prompt = prompts.user_location.format(location=f"{location_data}")
186
+ system_prompt = f"{system_prompt}\n{location_prompt}"
187
+
188
+ if user_name:
189
+ user_name_prompt = prompts.user_name.format(name=user_name)
190
+ system_prompt = f"{system_prompt}\n{user_name_prompt}"
191
+
192
+ # Get Conversation Primer appropriate to Conversation Type
193
+ if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
194
+ completion_func(chat_response=prompts.no_notes_found.format())
195
+ return iter([prompts.no_notes_found.format()])
196
+ elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
197
+ completion_func(chat_response=prompts.no_online_results_found.format())
198
+ return iter([prompts.no_online_results_found.format()])
199
+
200
+ context_message = ""
201
+ if not is_none_or_empty(references):
202
+ context_message = f"{prompts.notes_conversation.format(query=user_query, references=yaml_dump(references))}\n\n"
203
+ if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
204
+ context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
205
+ if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
206
+ context_message += (
207
+ f"{prompts.code_executed_context.format(code_results=truncate_code_context(code_results))}\n\n"
208
+ )
209
+ context_message = context_message.strip()
210
+
211
+ # Setup Prompt with Primer or Conversation History
212
+ messages = generate_chatml_messages_with_context(
213
+ user_query,
214
+ context_message=context_message,
215
+ conversation_log=conversation_log,
216
+ model_name=model,
217
+ max_prompt_size=max_prompt_size,
218
+ tokenizer_name=tokenizer_name,
219
+ query_images=query_images,
220
+ vision_enabled=vision_available,
221
+ model_type=ChatModel.ModelType.ANTHROPIC,
222
+ query_files=query_files,
223
+ generated_files=generated_files,
224
+ generated_asset_results=generated_asset_results,
225
+ program_execution_context=program_execution_context,
226
+ )
227
+
228
+ messages, system_prompt = format_messages_for_anthropic(messages, system_prompt)
229
+ logger.debug(f"Conversation Context for Claude: {messages_to_print(messages)}")
230
+
231
+ # Get Response from Claude
232
+ return anthropic_chat_completion_with_backoff(
233
+ messages=messages,
234
+ compiled_references=references,
235
+ online_results=online_results,
236
+ model_name=model,
237
+ temperature=0,
238
+ api_key=api_key,
239
+ system_prompt=system_prompt,
240
+ completion_func=completion_func,
241
+ max_prompt_size=max_prompt_size,
242
+ tracer=tracer,
243
+ )