khoj 1.33.3.dev32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. khoj/__init__.py +0 -0
  2. khoj/app/README.md +94 -0
  3. khoj/app/__init__.py +0 -0
  4. khoj/app/asgi.py +16 -0
  5. khoj/app/settings.py +218 -0
  6. khoj/app/urls.py +25 -0
  7. khoj/configure.py +452 -0
  8. khoj/database/__init__.py +0 -0
  9. khoj/database/adapters/__init__.py +1821 -0
  10. khoj/database/admin.py +417 -0
  11. khoj/database/apps.py +6 -0
  12. khoj/database/management/__init__.py +0 -0
  13. khoj/database/management/commands/__init__.py +0 -0
  14. khoj/database/management/commands/change_default_model.py +116 -0
  15. khoj/database/management/commands/change_generated_images_url.py +61 -0
  16. khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
  17. khoj/database/migrations/0001_khojuser.py +98 -0
  18. khoj/database/migrations/0002_googleuser.py +32 -0
  19. khoj/database/migrations/0003_vector_extension.py +10 -0
  20. khoj/database/migrations/0004_content_types_and_more.py +181 -0
  21. khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
  22. khoj/database/migrations/0006_embeddingsdates.py +33 -0
  23. khoj/database/migrations/0007_add_conversation.py +27 -0
  24. khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
  25. khoj/database/migrations/0009_khojapiuser.py +24 -0
  26. khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
  27. khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
  28. khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
  29. khoj/database/migrations/0012_entry_file_source.py +21 -0
  30. khoj/database/migrations/0013_subscription.py +37 -0
  31. khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
  32. khoj/database/migrations/0015_alter_subscription_user.py +21 -0
  33. khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
  34. khoj/database/migrations/0017_searchmodel.py +32 -0
  35. khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
  36. khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
  37. khoj/database/migrations/0020_reflectivequestion.py +36 -0
  38. khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
  39. khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
  40. khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
  41. khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
  42. khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
  43. khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
  44. khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
  45. khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
  46. khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
  47. khoj/database/migrations/0029_userrequests.py +27 -0
  48. khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
  49. khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
  50. khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
  51. khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
  52. khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
  53. khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
  54. khoj/database/migrations/0035_processlock.py +26 -0
  55. khoj/database/migrations/0036_alter_processlock_name.py +19 -0
  56. khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
  57. khoj/database/migrations/0036_publicconversation.py +42 -0
  58. khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
  59. khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
  60. khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
  61. khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
  62. khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
  63. khoj/database/migrations/0040_alter_processlock_name.py +26 -0
  64. khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
  65. khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
  66. khoj/database/migrations/0042_serverchatsettings.py +46 -0
  67. khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
  68. khoj/database/migrations/0044_conversation_file_filters.py +17 -0
  69. khoj/database/migrations/0045_fileobject.py +37 -0
  70. khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
  71. khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
  72. khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
  73. khoj/database/migrations/0049_datastore.py +38 -0
  74. khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
  75. khoj/database/migrations/0050_alter_processlock_name.py +25 -0
  76. khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
  77. khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
  78. khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
  79. khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
  80. khoj/database/migrations/0055_alter_agent_style_icon.py +37 -0
  81. khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
  82. khoj/database/migrations/0056_searchmodelconfig_cross_encoder_model_config.py +17 -0
  83. khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
  84. khoj/database/migrations/0057_remove_serverchatsettings_default_model_and_more.py +51 -0
  85. khoj/database/migrations/0058_alter_chatmodeloptions_chat_model.py +17 -0
  86. khoj/database/migrations/0059_searchmodelconfig_bi_encoder_confidence_threshold.py +17 -0
  87. khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
  88. khoj/database/migrations/0061_alter_chatmodeloptions_model_type.py +26 -0
  89. khoj/database/migrations/0061_alter_texttoimagemodelconfig_model_type.py +21 -0
  90. khoj/database/migrations/0062_merge_20240913_0222.py +14 -0
  91. khoj/database/migrations/0063_conversation_temp_id.py +36 -0
  92. khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +86 -0
  93. khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
  94. khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
  95. khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
  96. khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
  97. khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
  98. khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
  99. khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
  100. khoj/database/migrations/0072_entry_search_model.py +24 -0
  101. khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
  102. khoj/database/migrations/0074_alter_conversation_title.py +17 -0
  103. khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +85 -0
  104. khoj/database/migrations/0076_rename_openaiprocessorconversationconfig_aimodelapi_and_more.py +26 -0
  105. khoj/database/migrations/0077_chatmodel_alter_agent_chat_model_and_more.py +62 -0
  106. khoj/database/migrations/0078_khojuser_email_verification_code_expiry.py +17 -0
  107. khoj/database/migrations/__init__.py +0 -0
  108. khoj/database/models/__init__.py +725 -0
  109. khoj/database/tests.py +3 -0
  110. khoj/interface/compiled/404/index.html +1 -0
  111. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_buildManifest.js +1 -0
  112. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_ssgManifest.js +1 -0
  113. khoj/interface/compiled/_next/static/chunks/1010-8f39bb4648b5ba10.js +1 -0
  114. khoj/interface/compiled/_next/static/chunks/182-f1c48a203dc91e0e.js +20 -0
  115. khoj/interface/compiled/_next/static/chunks/1915-d3c36ad6ce697ce7.js +1 -0
  116. khoj/interface/compiled/_next/static/chunks/2117-165ef4747a5b836b.js +2 -0
  117. khoj/interface/compiled/_next/static/chunks/2581-455000f8aeb08fc3.js +1 -0
  118. khoj/interface/compiled/_next/static/chunks/3727.dcea8f2193111552.js +1 -0
  119. khoj/interface/compiled/_next/static/chunks/3789-a09e37a819171a9d.js +1 -0
  120. khoj/interface/compiled/_next/static/chunks/4124-6c28322ce218d2d5.js +1 -0
  121. khoj/interface/compiled/_next/static/chunks/5427-b52d95253e692bfa.js +1 -0
  122. khoj/interface/compiled/_next/static/chunks/5473-b1cf56dedac6577a.js +1 -0
  123. khoj/interface/compiled/_next/static/chunks/5477-0bbddb79c25a54a7.js +1 -0
  124. khoj/interface/compiled/_next/static/chunks/6065-64db9ad305ba0bcd.js +1 -0
  125. khoj/interface/compiled/_next/static/chunks/6293-469dd16402ea8a6f.js +3 -0
  126. khoj/interface/compiled/_next/static/chunks/688-b5b4391bbc0376f1.js +1 -0
  127. khoj/interface/compiled/_next/static/chunks/8667-b6bf63c72b2d76eb.js +1 -0
  128. khoj/interface/compiled/_next/static/chunks/9259-1172dbaca0515237.js +1 -0
  129. khoj/interface/compiled/_next/static/chunks/94ca1967.1d9b42d929a1ee8c.js +1 -0
  130. khoj/interface/compiled/_next/static/chunks/9597.83583248dfbf6e73.js +1 -0
  131. khoj/interface/compiled/_next/static/chunks/964ecbae.51d6faf8801d15e6.js +1 -0
  132. khoj/interface/compiled/_next/static/chunks/9665-391df1e5c51c960a.js +1 -0
  133. khoj/interface/compiled/_next/static/chunks/app/_not-found/page-a834eddae3e235df.js +1 -0
  134. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
  135. khoj/interface/compiled/_next/static/chunks/app/agents/page-28ce086a1129bca2.js +1 -0
  136. khoj/interface/compiled/_next/static/chunks/app/automations/layout-1fe1537449f43496.js +1 -0
  137. khoj/interface/compiled/_next/static/chunks/app/automations/page-bf365a60829d347f.js +1 -0
  138. khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
  139. khoj/interface/compiled/_next/static/chunks/app/chat/page-0e476e57eb2015e3.js +1 -0
  140. khoj/interface/compiled/_next/static/chunks/app/layout-30e7fda7262713ce.js +1 -0
  141. khoj/interface/compiled/_next/static/chunks/app/page-a5515ea71aec5ef0.js +1 -0
  142. khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
  143. khoj/interface/compiled/_next/static/chunks/app/search/page-9140541e67ea307d.js +1 -0
  144. khoj/interface/compiled/_next/static/chunks/app/settings/layout-d09d6510a45cd4bd.js +1 -0
  145. khoj/interface/compiled/_next/static/chunks/app/settings/page-951ba40b5b94b23a.js +1 -0
  146. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
  147. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1beb80d8d741c932.js +1 -0
  148. khoj/interface/compiled/_next/static/chunks/d3ac728e-44ebd2a0c99b12a0.js +1 -0
  149. khoj/interface/compiled/_next/static/chunks/fd9d1056-4482b99a36fd1673.js +1 -0
  150. khoj/interface/compiled/_next/static/chunks/framework-8e0e0f4a6b83a956.js +1 -0
  151. khoj/interface/compiled/_next/static/chunks/main-app-de1f09df97a3cfc7.js +1 -0
  152. khoj/interface/compiled/_next/static/chunks/main-db4bfac6b0a8d00b.js +1 -0
  153. khoj/interface/compiled/_next/static/chunks/pages/_app-3c9ca398d360b709.js +1 -0
  154. khoj/interface/compiled/_next/static/chunks/pages/_error-cf5ca766ac8f493f.js +1 -0
  155. khoj/interface/compiled/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
  156. khoj/interface/compiled/_next/static/chunks/webpack-a03962458328b163.js +1 -0
  157. khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +1 -0
  158. khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
  159. khoj/interface/compiled/_next/static/css/4e4e6a4a1c920d06.css +1 -0
  160. khoj/interface/compiled/_next/static/css/8d02837c730f8d13.css +25 -0
  161. khoj/interface/compiled/_next/static/css/8e6a3ca11a60b189.css +1 -0
  162. khoj/interface/compiled/_next/static/css/9c164d9727dd8092.css +1 -0
  163. khoj/interface/compiled/_next/static/css/dac88c17aaee5fcf.css +1 -0
  164. khoj/interface/compiled/_next/static/css/df4b47a2d0d85eae.css +1 -0
  165. khoj/interface/compiled/_next/static/css/e4eb883b5265d372.css +1 -0
  166. khoj/interface/compiled/_next/static/media/1d8a05b60287ae6c-s.p.woff2 +0 -0
  167. khoj/interface/compiled/_next/static/media/6f22fce21a7c433c-s.woff2 +0 -0
  168. khoj/interface/compiled/_next/static/media/77c207b095007c34-s.p.woff2 +0 -0
  169. khoj/interface/compiled/_next/static/media/82ef96de0e8f4d8c-s.p.woff2 +0 -0
  170. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.1608a09b.woff +0 -0
  171. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.4aafdb68.ttf +0 -0
  172. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.a79f1c31.woff2 +0 -0
  173. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.b6770918.woff +0 -0
  174. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.cce5b8ec.ttf +0 -0
  175. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.ec17d132.woff2 +0 -0
  176. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.07ef19e7.ttf +0 -0
  177. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.55fac258.woff2 +0 -0
  178. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.dad44a7f.woff +0 -0
  179. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.9f256b85.woff +0 -0
  180. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.b18f59e1.ttf +0 -0
  181. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.d42a5579.woff2 +0 -0
  182. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.7c187121.woff +0 -0
  183. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.d3c882a6.woff2 +0 -0
  184. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.ed38e79f.ttf +0 -0
  185. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.b74a1a8b.ttf +0 -0
  186. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.c3fb5ac2.woff2 +0 -0
  187. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.d181c465.woff +0 -0
  188. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.6f2bb1df.woff2 +0 -0
  189. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.70d8b0a5.ttf +0 -0
  190. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.e3f82f9d.woff +0 -0
  191. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.47373d1e.ttf +0 -0
  192. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.8916142b.woff2 +0 -0
  193. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.9024d815.woff +0 -0
  194. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.0462f03b.woff2 +0 -0
  195. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.7f51fe03.woff +0 -0
  196. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.b7f8fe9b.ttf +0 -0
  197. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.572d331f.woff2 +0 -0
  198. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.a879cf83.ttf +0 -0
  199. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.f1035d8d.woff +0 -0
  200. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.5295ba48.woff +0 -0
  201. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.939bc644.ttf +0 -0
  202. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.f28c23ac.woff2 +0 -0
  203. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.8c5b5494.woff2 +0 -0
  204. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.94e1e8dc.ttf +0 -0
  205. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.bf59d231.woff +0 -0
  206. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.3b1e59b3.woff2 +0 -0
  207. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.7c9bc82b.woff +0 -0
  208. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.b4c20c84.ttf +0 -0
  209. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.74048478.woff +0 -0
  210. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.ba21ed5f.woff2 +0 -0
  211. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.d4d7ba48.ttf +0 -0
  212. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.03e9641d.woff2 +0 -0
  213. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.07505710.woff +0 -0
  214. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.fe9cbbe1.ttf +0 -0
  215. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.e1e279cb.woff +0 -0
  216. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.eae34984.woff2 +0 -0
  217. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.fabc004a.ttf +0 -0
  218. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.57727022.woff +0 -0
  219. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.5916a24f.woff2 +0 -0
  220. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.d6b476ec.ttf +0 -0
  221. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.9acaf01c.woff +0 -0
  222. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.a144ef58.ttf +0 -0
  223. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.b4230e7e.woff2 +0 -0
  224. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.10d95fd3.woff2 +0 -0
  225. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.7a996c9d.woff +0 -0
  226. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.fbccdabe.ttf +0 -0
  227. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.6258592b.woff +0 -0
  228. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.a8709e36.woff2 +0 -0
  229. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.d97aaf4a.ttf +0 -0
  230. khoj/interface/compiled/_next/static/media/a6ecd16fa044d500-s.p.woff2 +0 -0
  231. khoj/interface/compiled/_next/static/media/bd82c78e5b7b3fe9-s.p.woff2 +0 -0
  232. khoj/interface/compiled/_next/static/media/c32c8052c071fc42-s.woff2 +0 -0
  233. khoj/interface/compiled/_next/static/media/c4250770ab8708b6-s.p.woff2 +0 -0
  234. khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
  235. khoj/interface/compiled/_next/static/media/flags.3afdda2f.webp +0 -0
  236. khoj/interface/compiled/_next/static/media/flags@2x.5fbe9fc1.webp +0 -0
  237. khoj/interface/compiled/_next/static/media/globe.98e105ca.webp +0 -0
  238. khoj/interface/compiled/_next/static/media/globe@2x.974df6f8.webp +0 -0
  239. khoj/interface/compiled/agents/index.html +1 -0
  240. khoj/interface/compiled/agents/index.txt +7 -0
  241. khoj/interface/compiled/agents.svg +6 -0
  242. khoj/interface/compiled/assets/icons/khoj_lantern.ico +0 -0
  243. khoj/interface/compiled/assets/icons/khoj_lantern.svg +100 -0
  244. khoj/interface/compiled/assets/icons/khoj_lantern_1200x1200.png +0 -0
  245. khoj/interface/compiled/assets/icons/khoj_lantern_128x128.png +0 -0
  246. khoj/interface/compiled/assets/icons/khoj_lantern_128x128_dark.png +0 -0
  247. khoj/interface/compiled/assets/icons/khoj_lantern_256x256.png +0 -0
  248. khoj/interface/compiled/assets/icons/khoj_lantern_512x512.png +0 -0
  249. khoj/interface/compiled/assets/icons/khoj_lantern_logomarktype_1200x630.png +0 -0
  250. khoj/interface/compiled/assets/samples/desktop-browse-draw-sample.png +0 -0
  251. khoj/interface/compiled/assets/samples/desktop-plain-chat-sample.png +0 -0
  252. khoj/interface/compiled/assets/samples/desktop-remember-plan-sample.png +0 -0
  253. khoj/interface/compiled/assets/samples/phone-browse-draw-sample.png +0 -0
  254. khoj/interface/compiled/assets/samples/phone-plain-chat-sample.png +0 -0
  255. khoj/interface/compiled/assets/samples/phone-remember-plan-sample.png +0 -0
  256. khoj/interface/compiled/automation.svg +37 -0
  257. khoj/interface/compiled/automations/index.html +1 -0
  258. khoj/interface/compiled/automations/index.txt +8 -0
  259. khoj/interface/compiled/chat/index.html +1 -0
  260. khoj/interface/compiled/chat/index.txt +7 -0
  261. khoj/interface/compiled/chat.svg +24 -0
  262. khoj/interface/compiled/close.svg +5 -0
  263. khoj/interface/compiled/copy-button-success.svg +6 -0
  264. khoj/interface/compiled/copy-button.svg +5 -0
  265. khoj/interface/compiled/index.html +1 -0
  266. khoj/interface/compiled/index.txt +7 -0
  267. khoj/interface/compiled/khoj.webmanifest +76 -0
  268. khoj/interface/compiled/logo.svg +24 -0
  269. khoj/interface/compiled/search/index.html +1 -0
  270. khoj/interface/compiled/search/index.txt +7 -0
  271. khoj/interface/compiled/send.svg +1 -0
  272. khoj/interface/compiled/settings/index.html +1 -0
  273. khoj/interface/compiled/settings/index.txt +9 -0
  274. khoj/interface/compiled/share/chat/index.html +1 -0
  275. khoj/interface/compiled/share/chat/index.txt +7 -0
  276. khoj/interface/compiled/share.svg +8 -0
  277. khoj/interface/compiled/thumbs-down.svg +6 -0
  278. khoj/interface/compiled/thumbs-up.svg +6 -0
  279. khoj/interface/email/feedback.html +34 -0
  280. khoj/interface/email/magic_link.html +40 -0
  281. khoj/interface/email/task.html +37 -0
  282. khoj/interface/email/welcome.html +90 -0
  283. khoj/interface/web/.well-known/assetlinks.json +11 -0
  284. khoj/interface/web/assets/icons/agents.svg +19 -0
  285. khoj/interface/web/assets/icons/automation.svg +43 -0
  286. khoj/interface/web/assets/icons/chat.svg +24 -0
  287. khoj/interface/web/assets/icons/github.svg +1 -0
  288. khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
  289. khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
  290. khoj/interface/web/assets/icons/khoj-logo-sideways.svg +32 -0
  291. khoj/interface/web/assets/icons/khoj.svg +26 -0
  292. khoj/interface/web/assets/icons/logotype.svg +1 -0
  293. khoj/interface/web/assets/icons/search.svg +57 -0
  294. khoj/interface/web/assets/icons/sync.svg +4 -0
  295. khoj/interface/web/assets/khoj.css +237 -0
  296. khoj/interface/web/assets/utils.js +33 -0
  297. khoj/interface/web/base_config.html +445 -0
  298. khoj/interface/web/content_source_github_input.html +208 -0
  299. khoj/interface/web/login.html +310 -0
  300. khoj/interface/web/utils.html +48 -0
  301. khoj/main.py +249 -0
  302. khoj/manage.py +22 -0
  303. khoj/migrations/__init__.py +0 -0
  304. khoj/migrations/migrate_offline_chat_default_model.py +69 -0
  305. khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
  306. khoj/migrations/migrate_offline_chat_schema.py +83 -0
  307. khoj/migrations/migrate_offline_model.py +29 -0
  308. khoj/migrations/migrate_processor_config_openai.py +67 -0
  309. khoj/migrations/migrate_server_pg.py +132 -0
  310. khoj/migrations/migrate_version.py +17 -0
  311. khoj/processor/__init__.py +0 -0
  312. khoj/processor/content/__init__.py +0 -0
  313. khoj/processor/content/docx/__init__.py +0 -0
  314. khoj/processor/content/docx/docx_to_entries.py +111 -0
  315. khoj/processor/content/github/__init__.py +0 -0
  316. khoj/processor/content/github/github_to_entries.py +226 -0
  317. khoj/processor/content/images/__init__.py +0 -0
  318. khoj/processor/content/images/image_to_entries.py +117 -0
  319. khoj/processor/content/markdown/__init__.py +0 -0
  320. khoj/processor/content/markdown/markdown_to_entries.py +160 -0
  321. khoj/processor/content/notion/notion_to_entries.py +259 -0
  322. khoj/processor/content/org_mode/__init__.py +0 -0
  323. khoj/processor/content/org_mode/org_to_entries.py +226 -0
  324. khoj/processor/content/org_mode/orgnode.py +532 -0
  325. khoj/processor/content/pdf/__init__.py +0 -0
  326. khoj/processor/content/pdf/pdf_to_entries.py +119 -0
  327. khoj/processor/content/plaintext/__init__.py +0 -0
  328. khoj/processor/content/plaintext/plaintext_to_entries.py +117 -0
  329. khoj/processor/content/text_to_entries.py +296 -0
  330. khoj/processor/conversation/__init__.py +0 -0
  331. khoj/processor/conversation/anthropic/__init__.py +0 -0
  332. khoj/processor/conversation/anthropic/anthropic_chat.py +243 -0
  333. khoj/processor/conversation/anthropic/utils.py +217 -0
  334. khoj/processor/conversation/google/__init__.py +0 -0
  335. khoj/processor/conversation/google/gemini_chat.py +253 -0
  336. khoj/processor/conversation/google/utils.py +260 -0
  337. khoj/processor/conversation/offline/__init__.py +0 -0
  338. khoj/processor/conversation/offline/chat_model.py +308 -0
  339. khoj/processor/conversation/offline/utils.py +80 -0
  340. khoj/processor/conversation/offline/whisper.py +15 -0
  341. khoj/processor/conversation/openai/__init__.py +0 -0
  342. khoj/processor/conversation/openai/gpt.py +243 -0
  343. khoj/processor/conversation/openai/utils.py +232 -0
  344. khoj/processor/conversation/openai/whisper.py +13 -0
  345. khoj/processor/conversation/prompts.py +1188 -0
  346. khoj/processor/conversation/utils.py +867 -0
  347. khoj/processor/embeddings.py +122 -0
  348. khoj/processor/image/generate.py +215 -0
  349. khoj/processor/speech/__init__.py +0 -0
  350. khoj/processor/speech/text_to_speech.py +51 -0
  351. khoj/processor/tools/__init__.py +0 -0
  352. khoj/processor/tools/online_search.py +472 -0
  353. khoj/processor/tools/run_code.py +179 -0
  354. khoj/routers/__init__.py +0 -0
  355. khoj/routers/api.py +760 -0
  356. khoj/routers/api_agents.py +295 -0
  357. khoj/routers/api_chat.py +1273 -0
  358. khoj/routers/api_content.py +634 -0
  359. khoj/routers/api_model.py +123 -0
  360. khoj/routers/api_phone.py +86 -0
  361. khoj/routers/api_subscription.py +144 -0
  362. khoj/routers/auth.py +307 -0
  363. khoj/routers/email.py +135 -0
  364. khoj/routers/helpers.py +2333 -0
  365. khoj/routers/notion.py +85 -0
  366. khoj/routers/research.py +364 -0
  367. khoj/routers/storage.py +63 -0
  368. khoj/routers/twilio.py +36 -0
  369. khoj/routers/web_client.py +141 -0
  370. khoj/search_filter/__init__.py +0 -0
  371. khoj/search_filter/base_filter.py +15 -0
  372. khoj/search_filter/date_filter.py +215 -0
  373. khoj/search_filter/file_filter.py +32 -0
  374. khoj/search_filter/word_filter.py +29 -0
  375. khoj/search_type/__init__.py +0 -0
  376. khoj/search_type/text_search.py +255 -0
  377. khoj/utils/__init__.py +0 -0
  378. khoj/utils/cli.py +101 -0
  379. khoj/utils/config.py +81 -0
  380. khoj/utils/constants.py +51 -0
  381. khoj/utils/fs_syncer.py +252 -0
  382. khoj/utils/helpers.py +627 -0
  383. khoj/utils/initialization.py +301 -0
  384. khoj/utils/jsonl.py +43 -0
  385. khoj/utils/models.py +47 -0
  386. khoj/utils/rawconfig.py +208 -0
  387. khoj/utils/state.py +48 -0
  388. khoj/utils/yaml.py +47 -0
  389. khoj-1.33.3.dev32.dist-info/METADATA +190 -0
  390. khoj-1.33.3.dev32.dist-info/RECORD +393 -0
  391. khoj-1.33.3.dev32.dist-info/WHEEL +4 -0
  392. khoj-1.33.3.dev32.dist-info/entry_points.txt +2 -0
  393. khoj-1.33.3.dev32.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,15 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List
3
+
4
+
5
+ class BaseFilter(ABC):
6
+ @abstractmethod
7
+ def get_filter_terms(self, query: str) -> List[str]:
8
+ ...
9
+
10
+ def can_filter(self, raw_query: str) -> bool:
11
+ return len(self.get_filter_terms(raw_query)) > 0
12
+
13
+ @abstractmethod
14
+ def defilter(self, query: str) -> str:
15
+ ...
@@ -0,0 +1,215 @@
1
+ import calendar
2
+ import logging
3
+ import re
4
+ from collections import defaultdict
5
+ from datetime import datetime, timedelta
6
+ from math import inf
7
+ from typing import List, Tuple
8
+
9
+ import dateparser as dtparse
10
+ from dateutil.relativedelta import relativedelta
11
+
12
+ from khoj.search_filter.base_filter import BaseFilter
13
+ from khoj.utils.helpers import LRU, merge_dicts, timer
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class DateFilter(BaseFilter):
19
+ # Date Range Filter Regexes
20
+ # Example filter queries:
21
+ # - dt>="yesterday" dt<"tomorrow"
22
+ # - dt>="last week"
23
+ # - dt:"2 years ago"
24
+ date_regex = r"dt([:><=]{1,2})[\"'‘’](.*?)[\"'‘’]"
25
+
26
+ def __init__(self, entry_key="compiled"):
27
+ self.entry_key = entry_key
28
+ self.date_to_entry_ids = defaultdict(set)
29
+ self.cache = LRU()
30
+ self.dtparser_regexes = self.compile_date_regexes()
31
+ self.dtparser_ordinal_suffixes = re.compile(r"(st|nd|rd|th)")
32
+ self.dtparser_settings = {
33
+ "PREFER_DAY_OF_MONTH": "first",
34
+ "DATE_ORDER": "YMD", # Prefer YMD and DMY over MDY when parsing ambiguous dates
35
+ }
36
+
37
+ def compile_date_regexes(self):
38
+ months = calendar.month_name[1:]
39
+ abbr_months = calendar.month_abbr[1:]
40
+ # Extract natural dates from content like 1st April 1984, 31 April 84, Apr 4th 1984, 13 Apr 84
41
+ dBY_regex = re.compile(r"\b\d{1,2}(?:st|nd|rd|th)? (?:" + "|".join(months) + r") \d{4}\b", re.IGNORECASE)
42
+ dBy_regex = re.compile(r"\b\d{1,2}(?:st|nd|rd|th)? (?:" + "|".join(months) + r") \d{2}\b", re.IGNORECASE)
43
+ BdY_regex = re.compile(r"\b(?:" + "|".join(months) + r") \d{1,2}(?:st|nd|rd|th)? \d{4}\b", re.IGNORECASE)
44
+ Bdy_regex = re.compile(r"\b(?:" + "|".join(months) + r") \d{1,2}(?:st|nd|rd|th)? \d{2}\b", re.IGNORECASE)
45
+ dbY_regex = re.compile(r"\b\d{1,2}(?:st|nd|rd|th)? (?:" + "|".join(abbr_months) + r") \d{4}\b", re.IGNORECASE)
46
+ dby_regex = re.compile(r"\b\d{1,2}(?:st|nd|rd|th)? (?:" + "|".join(abbr_months) + r") \d{2}\b", re.IGNORECASE)
47
+ bdY_regex = re.compile(r"\b(?:" + "|".join(abbr_months) + r") \d{1,2}(?:st|nd|rd|th)? \d{4}\b", re.IGNORECASE)
48
+ bdy_regex = re.compile(r"\b(?:" + "|".join(abbr_months) + r") \d{1,2}(?:st|nd|rd|th)? \d{2}\b", re.IGNORECASE)
49
+ # Extract natural of form Month, Year like January 2021, Jan 2021, Jan 21
50
+ BY_regex = re.compile(r"\b(?:" + "|".join(months) + r") \d{4}\b", re.IGNORECASE)
51
+ By_regex = re.compile(r"\b(?:" + "|".join(months) + r") \d{2}\b", re.IGNORECASE)
52
+ bY_regex = re.compile(r"\b(?:" + "|".join(abbr_months) + r") \d{4}\b", re.IGNORECASE)
53
+ by_regex = re.compile(r"\b(?:" + "|".join(abbr_months) + r") \d{2}\b", re.IGNORECASE)
54
+ # Extract structured dates from content like 1984-04-01, 1984/04/01, 01-04-1984, 01/04/1984, 01.04.1984, 01-04-84, 01/04/84
55
+ Ymd_date_regex = re.compile(r"\b\d{4}[-\/]\d{2}[-\/]\d{2}\b", re.IGNORECASE)
56
+ dmY_date_regex = re.compile(r"\b\d{2}[-\/]\d{2}[-\/]\d{4}\b", re.IGNORECASE)
57
+ dmy_date_regex = re.compile(r"\b\d{2}[-\/]\d{2}[-\/]\d{2}\b", re.IGNORECASE)
58
+ dmY_dot_date_regex = re.compile(r"\b\d{2}[\.]\d{2}[\.]\d{4}\b", re.IGNORECASE)
59
+
60
+ # Combine date formatter and date identifier regex pairs
61
+ dtparser_regexes: List[Tuple[str, re.Pattern[str]]] = [
62
+ # Structured dates
63
+ ("%Y-%m-%d", Ymd_date_regex),
64
+ ("%Y/%m/%d", Ymd_date_regex),
65
+ ("%d-%m-%Y", dmY_date_regex),
66
+ ("%d/%m/%Y", dmY_date_regex),
67
+ ("%d.%m.%Y", dmY_dot_date_regex),
68
+ ("%d-%m-%y", dmy_date_regex),
69
+ ("%d/%m/%y", dmy_date_regex),
70
+ # Natural dates
71
+ ("%d %B %Y", dBY_regex),
72
+ ("%d %B %y", dBy_regex),
73
+ ("%B %d %Y", BdY_regex),
74
+ ("%B %d %y", Bdy_regex),
75
+ ("%d %b %Y", dbY_regex),
76
+ ("%d %b %y", dby_regex),
77
+ ("%b %d %Y", bdY_regex),
78
+ ("%b %d %y", bdy_regex),
79
+ # Partial natural dates
80
+ ("%B %Y", BY_regex),
81
+ ("%B %y", By_regex),
82
+ ("%b %Y", bY_regex),
83
+ ("%b %y", by_regex),
84
+ ]
85
+ return dtparser_regexes
86
+
87
+ def extract_dates(self, content):
88
+ "Extract natural and structured dates from content"
89
+ valid_dates = set()
90
+ for date_format, date_regex in self.dtparser_regexes:
91
+ matched_dates = date_regex.findall(content)
92
+ for date_str in matched_dates:
93
+ # Remove ordinal suffixes to parse date
94
+ date_str = self.dtparser_ordinal_suffixes.sub("", date_str)
95
+ try:
96
+ valid_dates.add(datetime.strptime(date_str, date_format))
97
+ except ValueError:
98
+ continue
99
+
100
+ return list(valid_dates)
101
+
102
+ def get_filter_terms(self, query: str) -> List[str]:
103
+ "Get all filter terms in query"
104
+ return [f"dt{item[0]}'{item[1]}'" for item in re.findall(self.date_regex, query)]
105
+
106
+ def get_query_date_range(self, query) -> List:
107
+ with timer("Extract date range to filter from query", logger):
108
+ query_daterange = self.extract_date_range(query)
109
+
110
+ return query_daterange
111
+
112
+ def defilter(self, query):
113
+ # remove date range filter from query
114
+ query = re.sub(rf"\s+{self.date_regex}", " ", query)
115
+ query = re.sub(r"\s{2,}", " ", query).strip() # remove multiple spaces
116
+ return query
117
+
118
+ def extract_date_range(self, query):
119
+ # find date range filter in query
120
+ date_range_matches = re.findall(self.date_regex, query)
121
+
122
+ if len(date_range_matches) == 0:
123
+ return []
124
+
125
+ # extract, parse natural dates ranges from date range filter passed in query
126
+ # e.g. today maps to (start_of_day, start_of_tomorrow)
127
+ date_ranges_from_filter = []
128
+ for cmp, date_str in date_range_matches:
129
+ if self.parse(date_str):
130
+ dt_start, dt_end = self.parse(date_str)
131
+ date_ranges_from_filter += [[cmp, (dt_start.timestamp(), dt_end.timestamp())]]
132
+
133
+ # Combine dates with their comparators to form date range intervals
134
+ # For e.g.
135
+ # >=yesterday maps to [start_of_yesterday, inf)
136
+ # <tomorrow maps to [0, start_of_tomorrow)
137
+ # ---
138
+ effective_date_range: List = [0, inf]
139
+ date_range_considering_comparator = []
140
+ for cmp, (dtrange_start, dtrange_end) in date_ranges_from_filter:
141
+ if cmp == ">":
142
+ date_range_considering_comparator += [[dtrange_end, inf]]
143
+ elif cmp == ">=":
144
+ date_range_considering_comparator += [[dtrange_start, inf]]
145
+ elif cmp == "<":
146
+ date_range_considering_comparator += [[0, dtrange_start]]
147
+ elif cmp == "<=":
148
+ date_range_considering_comparator += [[0, dtrange_end]]
149
+ elif cmp == "=" or cmp == ":" or cmp == "==":
150
+ date_range_considering_comparator += [[dtrange_start, dtrange_end]]
151
+
152
+ # Combine above intervals (via AND/intersect)
153
+ # In the above example, this gives us [start_of_yesterday, start_of_tomorrow)
154
+ # This is the effective date range to filter entries by
155
+ # ---
156
+ for date_range in date_range_considering_comparator:
157
+ effective_date_range = [
158
+ max(effective_date_range[0], date_range[0]),
159
+ min(effective_date_range[1], date_range[1]),
160
+ ]
161
+
162
+ if effective_date_range == [0, inf] or effective_date_range[0] > effective_date_range[1]:
163
+ return []
164
+ else:
165
+ # If the first element is 0, replace it with None
166
+
167
+ if effective_date_range[0] == 0:
168
+ effective_date_range[0] = None
169
+
170
+ # If the second element is inf, replace it with None
171
+ if effective_date_range[1] == inf:
172
+ effective_date_range[1] = None
173
+
174
+ return effective_date_range
175
+
176
+ def parse(self, date_str, relative_base=None):
177
+ "Parse date string passed in date filter of query to datetime object"
178
+ # clean date string to handle future date parsing by date parser
179
+ future_strings = ["later", "from now", "from today"]
180
+ prefer_dates_from = {True: "future", False: "past"}[any([True for fstr in future_strings if fstr in date_str])]
181
+ dtquery_settings = {"RELATIVE_BASE": relative_base or datetime.now(), "PREFER_DATES_FROM": prefer_dates_from}
182
+ dtparser_settings = merge_dicts(dtquery_settings, self.dtparser_settings)
183
+
184
+ # parse date passed in query date filter
185
+ clean_date_str = re.sub("|".join(future_strings), "", date_str)
186
+ try:
187
+ parsed_date = dtparse.parse(clean_date_str, settings=dtparser_settings)
188
+ except Exception as e:
189
+ logger.error(f"Failed to parse date string: {date_str} with error: {e}")
190
+ return None
191
+
192
+ if parsed_date is None:
193
+ return None
194
+
195
+ return self.date_to_daterange(parsed_date, date_str)
196
+
197
+ def date_to_daterange(self, parsed_date, date_str):
198
+ "Convert parsed date to date ranges at natural granularity (day, week, month or year)"
199
+
200
+ start_of_day = parsed_date.replace(hour=0, minute=0, second=0, microsecond=0)
201
+
202
+ if "year" in date_str:
203
+ return (datetime(parsed_date.year, 1, 1, 0, 0, 0), datetime(parsed_date.year + 1, 1, 1, 0, 0, 0))
204
+ if "month" in date_str:
205
+ start_of_month = datetime(parsed_date.year, parsed_date.month, 1, 0, 0, 0)
206
+ next_month = start_of_month + relativedelta(months=1)
207
+ return (start_of_month, next_month)
208
+ if "week" in date_str:
209
+ # if week in date string, dateparser parses it to next week start
210
+ # so today = end of this week
211
+ start_of_week = start_of_day - timedelta(days=7)
212
+ return (start_of_week, start_of_day)
213
+ else:
214
+ next_day = start_of_day + relativedelta(days=1)
215
+ return (start_of_day, next_day)
@@ -0,0 +1,32 @@
1
+ import logging
2
+ import re
3
+ from collections import defaultdict
4
+ from typing import List
5
+
6
+ from khoj.search_filter.base_filter import BaseFilter
7
+ from khoj.utils.helpers import LRU
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class FileFilter(BaseFilter):
13
+ file_filter_regex = r'(?<!-)file:"(.+?)" ?'
14
+ excluded_file_filter_regex = r'-file:"(.+?)" ?'
15
+
16
+ def __init__(self, entry_key="file"):
17
+ self.entry_key = entry_key
18
+ self.file_to_entry_map = defaultdict(set)
19
+ self.cache = LRU()
20
+
21
+ def get_filter_terms(self, query: str) -> List[str]:
22
+ "Get all filter terms in query"
23
+ required_files = [f"{required_file}" for required_file in re.findall(self.file_filter_regex, query)]
24
+ excluded_files = [f"-{excluded_file}" for excluded_file in re.findall(self.excluded_file_filter_regex, query)]
25
+ return required_files + excluded_files
26
+
27
+ def convert_to_regex(self, file_filter: str) -> str:
28
+ "Convert file filter to regex"
29
+ return file_filter.replace(".", r"\.").replace("*", r".*")
30
+
31
+ def defilter(self, query: str) -> str:
32
+ return re.sub(self.file_filter_regex, "", query).strip()
@@ -0,0 +1,29 @@
1
+ import logging
2
+ import re
3
+ from collections import defaultdict
4
+ from typing import List
5
+
6
+ from khoj.search_filter.base_filter import BaseFilter
7
+ from khoj.utils.helpers import LRU
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class WordFilter(BaseFilter):
13
+ # Filter Regex
14
+ required_regex = r'\+"([a-zA-Z0-9_-]+)" ?'
15
+ blocked_regex = r'\-"([a-zA-Z0-9_-]+)" ?'
16
+
17
+ def __init__(self, entry_key="raw"):
18
+ self.entry_key = entry_key
19
+ self.word_to_entry_index = defaultdict(set)
20
+ self.cache = LRU()
21
+
22
+ def get_filter_terms(self, query: str) -> List[str]:
23
+ "Get all filter terms in query"
24
+ required_terms = [f"+{required_term}" for required_term in re.findall(self.required_regex, query)]
25
+ blocked_terms = [f"-{blocked_term}" for blocked_term in re.findall(self.blocked_regex, query)]
26
+ return required_terms + blocked_terms
27
+
28
+ def defilter(self, query: str) -> str:
29
+ return re.sub(self.blocked_regex, "", re.sub(self.required_regex, "", query)).strip()
File without changes
@@ -0,0 +1,255 @@
1
+ import logging
2
+ import math
3
+ from pathlib import Path
4
+ from typing import List, Optional, Tuple, Type, Union
5
+
6
+ import requests
7
+ import torch
8
+ from asgiref.sync import sync_to_async
9
+ from sentence_transformers import util
10
+
11
+ from khoj.database.adapters import EntryAdapters, get_default_search_model
12
+ from khoj.database.models import Agent
13
+ from khoj.database.models import Entry as DbEntry
14
+ from khoj.database.models import KhojUser
15
+ from khoj.processor.content.text_to_entries import TextToEntries
16
+ from khoj.utils import state
17
+ from khoj.utils.helpers import get_absolute_path, timer
18
+ from khoj.utils.jsonl import load_jsonl
19
+ from khoj.utils.models import BaseEncoder
20
+ from khoj.utils.rawconfig import Entry, SearchResponse
21
+ from khoj.utils.state import SearchType
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ search_type_to_embeddings_type = {
26
+ SearchType.Org.value: DbEntry.EntryType.ORG,
27
+ SearchType.Markdown.value: DbEntry.EntryType.MARKDOWN,
28
+ SearchType.Plaintext.value: DbEntry.EntryType.PLAINTEXT,
29
+ SearchType.Pdf.value: DbEntry.EntryType.PDF,
30
+ SearchType.Github.value: DbEntry.EntryType.GITHUB,
31
+ SearchType.Notion.value: DbEntry.EntryType.NOTION,
32
+ SearchType.All.value: None,
33
+ }
34
+
35
+
36
+ def extract_entries(jsonl_file) -> List[Entry]:
37
+ "Load entries from compressed jsonl"
38
+ return list(map(Entry.from_dict, load_jsonl(jsonl_file)))
39
+
40
+
41
+ def compute_embeddings(
42
+ entries_with_ids: List[Tuple[int, Entry]],
43
+ bi_encoder: BaseEncoder,
44
+ embeddings_file: Path,
45
+ regenerate=False,
46
+ normalize=True,
47
+ ):
48
+ "Compute (and Save) Embeddings or Load Pre-Computed Embeddings"
49
+ new_embeddings = torch.tensor([], device=state.device)
50
+ existing_embeddings = torch.tensor([], device=state.device)
51
+ create_index_msg = ""
52
+ # Load pre-computed embeddings from file if exists and update them if required
53
+ if embeddings_file.exists() and not regenerate:
54
+ corpus_embeddings: torch.Tensor = torch.load(get_absolute_path(embeddings_file), map_location=state.device)
55
+ logger.debug(f"Loaded {len(corpus_embeddings)} text embeddings from {embeddings_file}")
56
+ else:
57
+ corpus_embeddings = torch.tensor([], device=state.device)
58
+ create_index_msg = " Creating index from scratch."
59
+
60
+ # Encode any new entries in the corpus and update corpus embeddings
61
+ new_entries = [entry.compiled for id, entry in entries_with_ids if id == -1]
62
+ if new_entries:
63
+ logger.info(f"📩 Indexing {len(new_entries)} text entries.{create_index_msg}")
64
+ new_embeddings = bi_encoder.encode(
65
+ new_entries, convert_to_tensor=True, device=state.device, show_progress_bar=True
66
+ )
67
+
68
+ # Extract existing embeddings from previous corpus embeddings
69
+ existing_entry_ids = [id for id, _ in entries_with_ids if id != -1]
70
+ if existing_entry_ids:
71
+ existing_embeddings = torch.index_select(
72
+ corpus_embeddings, 0, torch.tensor(existing_entry_ids, device=state.device)
73
+ )
74
+
75
+ # Set corpus embeddings to merger of existing and new embeddings
76
+ corpus_embeddings = torch.cat([existing_embeddings, new_embeddings], dim=0)
77
+ if normalize:
78
+ # Normalize embeddings for faster lookup via dot product when querying
79
+ corpus_embeddings = util.normalize_embeddings(corpus_embeddings)
80
+
81
+ # Save regenerated or updated embeddings to file
82
+ torch.save(corpus_embeddings, embeddings_file)
83
+ logger.info(f"📩 Saved computed text embeddings to {embeddings_file}")
84
+
85
+ return corpus_embeddings
86
+
87
+
88
+ def load_embeddings(
89
+ embeddings_file: Path,
90
+ ):
91
+ "Load pre-computed embeddings from file if exists and update them if required"
92
+ if embeddings_file.exists():
93
+ corpus_embeddings: torch.Tensor = torch.load(get_absolute_path(embeddings_file), map_location=state.device)
94
+ logger.debug(f"Loaded {len(corpus_embeddings)} text embeddings from {embeddings_file}")
95
+ return util.normalize_embeddings(corpus_embeddings)
96
+
97
+ return None
98
+
99
+
100
+ async def query(
101
+ raw_query: str,
102
+ user: KhojUser,
103
+ type: SearchType = SearchType.All,
104
+ question_embedding: Union[torch.Tensor, None] = None,
105
+ max_distance: float = None,
106
+ agent: Optional[Agent] = None,
107
+ ) -> Tuple[List[dict], List[Entry]]:
108
+ "Search for entries that answer the query"
109
+
110
+ file_type = search_type_to_embeddings_type[type.value]
111
+
112
+ query = raw_query
113
+ search_model = await sync_to_async(get_default_search_model)()
114
+ if not max_distance:
115
+ if search_model.bi_encoder_confidence_threshold:
116
+ max_distance = search_model.bi_encoder_confidence_threshold
117
+ else:
118
+ max_distance = math.inf
119
+
120
+ # Encode the query using the bi-encoder
121
+ if question_embedding is None:
122
+ with timer("Query Encode Time", logger, state.device):
123
+ question_embedding = state.embeddings_model[search_model.name].embed_query(query)
124
+
125
+ # Find relevant entries for the query
126
+ top_k = 10
127
+ with timer("Search Time", logger, state.device):
128
+ hits = EntryAdapters.search_with_embeddings(
129
+ raw_query=raw_query,
130
+ embeddings=question_embedding,
131
+ max_results=top_k,
132
+ file_type_filter=file_type,
133
+ max_distance=max_distance,
134
+ user=user,
135
+ agent=agent,
136
+ ).all()
137
+ hits = await sync_to_async(list)(hits) # type: ignore[call-arg]
138
+
139
+ return hits
140
+
141
+
142
+ def collate_results(hits, dedupe=True):
143
+ hit_ids = set()
144
+ hit_hashes = set()
145
+ for hit in hits:
146
+ if dedupe and (hit.hashed_value in hit_hashes or hit.corpus_id in hit_ids):
147
+ continue
148
+
149
+ else:
150
+ hit_hashes.add(hit.hashed_value)
151
+ hit_ids.add(hit.corpus_id)
152
+ yield SearchResponse.model_validate(
153
+ {
154
+ "entry": hit.raw,
155
+ "score": hit.distance,
156
+ "corpus_id": str(hit.corpus_id),
157
+ "additional": {
158
+ "source": hit.file_source,
159
+ "file": hit.file_path,
160
+ "compiled": hit.compiled,
161
+ "heading": hit.heading,
162
+ },
163
+ }
164
+ )
165
+
166
+
167
+ def deduplicated_search_responses(hits: List[SearchResponse]):
168
+ hit_ids = set()
169
+ for hit in hits:
170
+ if hit.corpus_id in hit_ids:
171
+ continue
172
+
173
+ else:
174
+ hit_ids.add(hit.corpus_id)
175
+ yield SearchResponse.model_validate(
176
+ {
177
+ "entry": hit.entry,
178
+ "score": hit.score,
179
+ "corpus_id": hit.corpus_id,
180
+ "additional": {
181
+ "source": hit.additional["source"],
182
+ "file": hit.additional["file"],
183
+ "compiled": hit.additional["compiled"],
184
+ "heading": hit.additional["heading"],
185
+ },
186
+ }
187
+ )
188
+
189
+
190
+ def rerank_and_sort_results(hits, query, rank_results, search_model_name):
191
+ # Rerank results if explicitly requested, if can use inference server
192
+ # AND if we have more than one result
193
+ rank_results = (rank_results or state.cross_encoder_model[search_model_name].inference_server_enabled()) and len(
194
+ list(hits)
195
+ ) > 1
196
+
197
+ # Score all retrieved entries using the cross-encoder
198
+ if rank_results:
199
+ hits = cross_encoder_score(query, hits, search_model_name)
200
+
201
+ # Sort results by cross-encoder score followed by bi-encoder score
202
+ hits = sort_results(rank_results=rank_results, hits=hits)
203
+
204
+ return hits
205
+
206
+
207
+ def setup(
208
+ text_to_entries: Type[TextToEntries],
209
+ files: dict[str, str],
210
+ regenerate: bool,
211
+ user: KhojUser,
212
+ config=None,
213
+ ) -> Tuple[int, int]:
214
+ if config:
215
+ num_new_embeddings, num_deleted_embeddings = text_to_entries(config).process(
216
+ files=files, user=user, regenerate=regenerate
217
+ )
218
+ else:
219
+ num_new_embeddings, num_deleted_embeddings = text_to_entries().process(
220
+ files=files, user=user, regenerate=regenerate
221
+ )
222
+
223
+ if files:
224
+ file_names = [file_name for file_name in files]
225
+
226
+ logger.info(
227
+ f"Deleted {num_deleted_embeddings} entries. Created {num_new_embeddings} new entries for user {user} from files {file_names[:10]} ..."
228
+ )
229
+
230
+ return num_new_embeddings, num_deleted_embeddings
231
+
232
+
233
+ def cross_encoder_score(query: str, hits: List[SearchResponse], search_model_name: str) -> List[SearchResponse]:
234
+ """Score all retrieved entries using the cross-encoder"""
235
+ try:
236
+ with timer("Cross-Encoder Predict Time", logger, state.device):
237
+ cross_scores = state.cross_encoder_model[search_model_name].predict(query, hits)
238
+ except requests.exceptions.HTTPError as e:
239
+ logger.error(f"Failed to rerank documents using the inference endpoint. Error: {e}.", exc_info=True)
240
+ cross_scores = [0.0] * len(hits)
241
+
242
+ # Convert cross-encoder scores to distances and pass in hits for reranking
243
+ for idx in range(len(cross_scores)):
244
+ hits[idx]["cross_score"] = 1 - cross_scores[idx]
245
+
246
+ return hits
247
+
248
+
249
+ def sort_results(rank_results: bool, hits: List[dict]) -> List[dict]:
250
+ """Order results by cross-encoder score followed by bi-encoder score"""
251
+ with timer("Rank Time", logger, state.device):
252
+ hits.sort(key=lambda x: x["score"]) # sort by bi-encoder score
253
+ if rank_results:
254
+ hits.sort(key=lambda x: x["cross_score"]) # sort by cross-encoder score
255
+ return hits
khoj/utils/__init__.py ADDED
File without changes
khoj/utils/cli.py ADDED
@@ -0,0 +1,101 @@
1
+ import argparse
2
+ import logging
3
+ import os
4
+ import pathlib
5
+ from importlib.metadata import version
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ from khoj.migrations.migrate_offline_chat_default_model import (
10
+ migrate_offline_chat_default_model,
11
+ )
12
+ from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema
13
+ from khoj.migrations.migrate_offline_model import migrate_offline_model
14
+ from khoj.migrations.migrate_processor_config_openai import (
15
+ migrate_processor_conversation_schema,
16
+ )
17
+ from khoj.migrations.migrate_server_pg import migrate_server_pg
18
+ from khoj.migrations.migrate_version import migrate_config_to_version
19
+ from khoj.utils.helpers import in_debug_mode, is_env_var_true, resolve_absolute_path
20
+ from khoj.utils.yaml import parse_config_from_file
21
+
22
+
23
+ def cli(args=None):
24
+ # Setup Argument Parser for the Commandline Interface
25
+ parser = argparse.ArgumentParser(description="Start Khoj; An AI personal assistant for your Digital Brain")
26
+ parser.add_argument(
27
+ "--config-file", default="~/.khoj/khoj.yml", type=pathlib.Path, help="YAML file to configure Khoj"
28
+ )
29
+ parser.add_argument(
30
+ "--regenerate",
31
+ action="store_true",
32
+ default=False,
33
+ help="Regenerate model embeddings from source files. Default: false",
34
+ )
35
+ parser.add_argument("--verbose", "-v", action="count", default=0, help="Show verbose conversion logs. Default: 0")
36
+ parser.add_argument("--host", type=str, default="127.0.0.1", help="Host address of the server. Default: 127.0.0.1")
37
+ parser.add_argument("--port", "-p", type=int, default=42110, help="Port of the server. Default: 42110")
38
+ parser.add_argument(
39
+ "--socket",
40
+ type=pathlib.Path,
41
+ help="Path to UNIX socket for server. Use to run server behind reverse proxy. Default: /tmp/uvicorn.sock",
42
+ )
43
+ parser.add_argument("--sslcert", type=str, help="Path to SSL certificate file")
44
+ parser.add_argument("--sslkey", type=str, help="Path to SSL key file")
45
+ parser.add_argument("--version", "-V", action="store_true", help="Print the installed Khoj version and exit")
46
+ parser.add_argument(
47
+ "--disable-chat-on-gpu", action="store_true", default=False, help="Disable using GPU for the offline chat model"
48
+ )
49
+ parser.add_argument(
50
+ "--anonymous-mode",
51
+ action="store_true",
52
+ default=False,
53
+ help="Run Khoj in anonymous mode. This does not require any login for connecting users.",
54
+ )
55
+ parser.add_argument(
56
+ "--non-interactive",
57
+ action="store_true",
58
+ default=False,
59
+ help="Start Khoj in non-interactive mode. Assumes interactive shell unavailable for config. E.g when run via Docker.",
60
+ )
61
+
62
+ args, remaining_args = parser.parse_known_args(args)
63
+
64
+ if len(remaining_args) > 0:
65
+ logger.info(f"⚠️ Ignoring unknown commandline args: {remaining_args}")
66
+
67
+ # Set default values for arguments
68
+ args.chat_on_gpu = not args.disable_chat_on_gpu
69
+
70
+ args.version_no = version("khoj")
71
+ if args.version:
72
+ # Show version of khoj installed and exit
73
+ print(args.version_no)
74
+ exit(0)
75
+
76
+ # Normalize config_file path to absolute path
77
+ args.config_file = resolve_absolute_path(args.config_file)
78
+
79
+ if not args.config_file.exists():
80
+ args.config = None
81
+ else:
82
+ args = run_migrations(args)
83
+ args.config = parse_config_from_file(args.config_file)
84
+ if is_env_var_true("KHOJ_TELEMETRY_DISABLE") or in_debug_mode():
85
+ args.config.app.should_log_telemetry = False
86
+
87
+ return args
88
+
89
+
90
+ def run_migrations(args):
91
+ migrations = [
92
+ migrate_config_to_version,
93
+ migrate_processor_conversation_schema,
94
+ migrate_offline_model,
95
+ migrate_offline_chat_schema,
96
+ migrate_offline_chat_default_model,
97
+ migrate_server_pg,
98
+ ]
99
+ for migration in migrations:
100
+ args = migration(args)
101
+ return args