khoj 1.33.3.dev32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. khoj/__init__.py +0 -0
  2. khoj/app/README.md +94 -0
  3. khoj/app/__init__.py +0 -0
  4. khoj/app/asgi.py +16 -0
  5. khoj/app/settings.py +218 -0
  6. khoj/app/urls.py +25 -0
  7. khoj/configure.py +452 -0
  8. khoj/database/__init__.py +0 -0
  9. khoj/database/adapters/__init__.py +1821 -0
  10. khoj/database/admin.py +417 -0
  11. khoj/database/apps.py +6 -0
  12. khoj/database/management/__init__.py +0 -0
  13. khoj/database/management/commands/__init__.py +0 -0
  14. khoj/database/management/commands/change_default_model.py +116 -0
  15. khoj/database/management/commands/change_generated_images_url.py +61 -0
  16. khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
  17. khoj/database/migrations/0001_khojuser.py +98 -0
  18. khoj/database/migrations/0002_googleuser.py +32 -0
  19. khoj/database/migrations/0003_vector_extension.py +10 -0
  20. khoj/database/migrations/0004_content_types_and_more.py +181 -0
  21. khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
  22. khoj/database/migrations/0006_embeddingsdates.py +33 -0
  23. khoj/database/migrations/0007_add_conversation.py +27 -0
  24. khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
  25. khoj/database/migrations/0009_khojapiuser.py +24 -0
  26. khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
  27. khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
  28. khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
  29. khoj/database/migrations/0012_entry_file_source.py +21 -0
  30. khoj/database/migrations/0013_subscription.py +37 -0
  31. khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
  32. khoj/database/migrations/0015_alter_subscription_user.py +21 -0
  33. khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
  34. khoj/database/migrations/0017_searchmodel.py +32 -0
  35. khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
  36. khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
  37. khoj/database/migrations/0020_reflectivequestion.py +36 -0
  38. khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
  39. khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
  40. khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
  41. khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
  42. khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
  43. khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
  44. khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
  45. khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
  46. khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
  47. khoj/database/migrations/0029_userrequests.py +27 -0
  48. khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
  49. khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
  50. khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
  51. khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
  52. khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
  53. khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
  54. khoj/database/migrations/0035_processlock.py +26 -0
  55. khoj/database/migrations/0036_alter_processlock_name.py +19 -0
  56. khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
  57. khoj/database/migrations/0036_publicconversation.py +42 -0
  58. khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
  59. khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
  60. khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
  61. khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
  62. khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
  63. khoj/database/migrations/0040_alter_processlock_name.py +26 -0
  64. khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
  65. khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
  66. khoj/database/migrations/0042_serverchatsettings.py +46 -0
  67. khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
  68. khoj/database/migrations/0044_conversation_file_filters.py +17 -0
  69. khoj/database/migrations/0045_fileobject.py +37 -0
  70. khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
  71. khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
  72. khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
  73. khoj/database/migrations/0049_datastore.py +38 -0
  74. khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
  75. khoj/database/migrations/0050_alter_processlock_name.py +25 -0
  76. khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
  77. khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
  78. khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
  79. khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
  80. khoj/database/migrations/0055_alter_agent_style_icon.py +37 -0
  81. khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
  82. khoj/database/migrations/0056_searchmodelconfig_cross_encoder_model_config.py +17 -0
  83. khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
  84. khoj/database/migrations/0057_remove_serverchatsettings_default_model_and_more.py +51 -0
  85. khoj/database/migrations/0058_alter_chatmodeloptions_chat_model.py +17 -0
  86. khoj/database/migrations/0059_searchmodelconfig_bi_encoder_confidence_threshold.py +17 -0
  87. khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
  88. khoj/database/migrations/0061_alter_chatmodeloptions_model_type.py +26 -0
  89. khoj/database/migrations/0061_alter_texttoimagemodelconfig_model_type.py +21 -0
  90. khoj/database/migrations/0062_merge_20240913_0222.py +14 -0
  91. khoj/database/migrations/0063_conversation_temp_id.py +36 -0
  92. khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +86 -0
  93. khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
  94. khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
  95. khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
  96. khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
  97. khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
  98. khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
  99. khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
  100. khoj/database/migrations/0072_entry_search_model.py +24 -0
  101. khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
  102. khoj/database/migrations/0074_alter_conversation_title.py +17 -0
  103. khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +85 -0
  104. khoj/database/migrations/0076_rename_openaiprocessorconversationconfig_aimodelapi_and_more.py +26 -0
  105. khoj/database/migrations/0077_chatmodel_alter_agent_chat_model_and_more.py +62 -0
  106. khoj/database/migrations/0078_khojuser_email_verification_code_expiry.py +17 -0
  107. khoj/database/migrations/__init__.py +0 -0
  108. khoj/database/models/__init__.py +725 -0
  109. khoj/database/tests.py +3 -0
  110. khoj/interface/compiled/404/index.html +1 -0
  111. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_buildManifest.js +1 -0
  112. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_ssgManifest.js +1 -0
  113. khoj/interface/compiled/_next/static/chunks/1010-8f39bb4648b5ba10.js +1 -0
  114. khoj/interface/compiled/_next/static/chunks/182-f1c48a203dc91e0e.js +20 -0
  115. khoj/interface/compiled/_next/static/chunks/1915-d3c36ad6ce697ce7.js +1 -0
  116. khoj/interface/compiled/_next/static/chunks/2117-165ef4747a5b836b.js +2 -0
  117. khoj/interface/compiled/_next/static/chunks/2581-455000f8aeb08fc3.js +1 -0
  118. khoj/interface/compiled/_next/static/chunks/3727.dcea8f2193111552.js +1 -0
  119. khoj/interface/compiled/_next/static/chunks/3789-a09e37a819171a9d.js +1 -0
  120. khoj/interface/compiled/_next/static/chunks/4124-6c28322ce218d2d5.js +1 -0
  121. khoj/interface/compiled/_next/static/chunks/5427-b52d95253e692bfa.js +1 -0
  122. khoj/interface/compiled/_next/static/chunks/5473-b1cf56dedac6577a.js +1 -0
  123. khoj/interface/compiled/_next/static/chunks/5477-0bbddb79c25a54a7.js +1 -0
  124. khoj/interface/compiled/_next/static/chunks/6065-64db9ad305ba0bcd.js +1 -0
  125. khoj/interface/compiled/_next/static/chunks/6293-469dd16402ea8a6f.js +3 -0
  126. khoj/interface/compiled/_next/static/chunks/688-b5b4391bbc0376f1.js +1 -0
  127. khoj/interface/compiled/_next/static/chunks/8667-b6bf63c72b2d76eb.js +1 -0
  128. khoj/interface/compiled/_next/static/chunks/9259-1172dbaca0515237.js +1 -0
  129. khoj/interface/compiled/_next/static/chunks/94ca1967.1d9b42d929a1ee8c.js +1 -0
  130. khoj/interface/compiled/_next/static/chunks/9597.83583248dfbf6e73.js +1 -0
  131. khoj/interface/compiled/_next/static/chunks/964ecbae.51d6faf8801d15e6.js +1 -0
  132. khoj/interface/compiled/_next/static/chunks/9665-391df1e5c51c960a.js +1 -0
  133. khoj/interface/compiled/_next/static/chunks/app/_not-found/page-a834eddae3e235df.js +1 -0
  134. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
  135. khoj/interface/compiled/_next/static/chunks/app/agents/page-28ce086a1129bca2.js +1 -0
  136. khoj/interface/compiled/_next/static/chunks/app/automations/layout-1fe1537449f43496.js +1 -0
  137. khoj/interface/compiled/_next/static/chunks/app/automations/page-bf365a60829d347f.js +1 -0
  138. khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
  139. khoj/interface/compiled/_next/static/chunks/app/chat/page-0e476e57eb2015e3.js +1 -0
  140. khoj/interface/compiled/_next/static/chunks/app/layout-30e7fda7262713ce.js +1 -0
  141. khoj/interface/compiled/_next/static/chunks/app/page-a5515ea71aec5ef0.js +1 -0
  142. khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
  143. khoj/interface/compiled/_next/static/chunks/app/search/page-9140541e67ea307d.js +1 -0
  144. khoj/interface/compiled/_next/static/chunks/app/settings/layout-d09d6510a45cd4bd.js +1 -0
  145. khoj/interface/compiled/_next/static/chunks/app/settings/page-951ba40b5b94b23a.js +1 -0
  146. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
  147. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1beb80d8d741c932.js +1 -0
  148. khoj/interface/compiled/_next/static/chunks/d3ac728e-44ebd2a0c99b12a0.js +1 -0
  149. khoj/interface/compiled/_next/static/chunks/fd9d1056-4482b99a36fd1673.js +1 -0
  150. khoj/interface/compiled/_next/static/chunks/framework-8e0e0f4a6b83a956.js +1 -0
  151. khoj/interface/compiled/_next/static/chunks/main-app-de1f09df97a3cfc7.js +1 -0
  152. khoj/interface/compiled/_next/static/chunks/main-db4bfac6b0a8d00b.js +1 -0
  153. khoj/interface/compiled/_next/static/chunks/pages/_app-3c9ca398d360b709.js +1 -0
  154. khoj/interface/compiled/_next/static/chunks/pages/_error-cf5ca766ac8f493f.js +1 -0
  155. khoj/interface/compiled/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
  156. khoj/interface/compiled/_next/static/chunks/webpack-a03962458328b163.js +1 -0
  157. khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +1 -0
  158. khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
  159. khoj/interface/compiled/_next/static/css/4e4e6a4a1c920d06.css +1 -0
  160. khoj/interface/compiled/_next/static/css/8d02837c730f8d13.css +25 -0
  161. khoj/interface/compiled/_next/static/css/8e6a3ca11a60b189.css +1 -0
  162. khoj/interface/compiled/_next/static/css/9c164d9727dd8092.css +1 -0
  163. khoj/interface/compiled/_next/static/css/dac88c17aaee5fcf.css +1 -0
  164. khoj/interface/compiled/_next/static/css/df4b47a2d0d85eae.css +1 -0
  165. khoj/interface/compiled/_next/static/css/e4eb883b5265d372.css +1 -0
  166. khoj/interface/compiled/_next/static/media/1d8a05b60287ae6c-s.p.woff2 +0 -0
  167. khoj/interface/compiled/_next/static/media/6f22fce21a7c433c-s.woff2 +0 -0
  168. khoj/interface/compiled/_next/static/media/77c207b095007c34-s.p.woff2 +0 -0
  169. khoj/interface/compiled/_next/static/media/82ef96de0e8f4d8c-s.p.woff2 +0 -0
  170. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.1608a09b.woff +0 -0
  171. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.4aafdb68.ttf +0 -0
  172. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.a79f1c31.woff2 +0 -0
  173. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.b6770918.woff +0 -0
  174. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.cce5b8ec.ttf +0 -0
  175. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.ec17d132.woff2 +0 -0
  176. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.07ef19e7.ttf +0 -0
  177. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.55fac258.woff2 +0 -0
  178. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.dad44a7f.woff +0 -0
  179. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.9f256b85.woff +0 -0
  180. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.b18f59e1.ttf +0 -0
  181. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.d42a5579.woff2 +0 -0
  182. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.7c187121.woff +0 -0
  183. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.d3c882a6.woff2 +0 -0
  184. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.ed38e79f.ttf +0 -0
  185. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.b74a1a8b.ttf +0 -0
  186. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.c3fb5ac2.woff2 +0 -0
  187. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.d181c465.woff +0 -0
  188. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.6f2bb1df.woff2 +0 -0
  189. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.70d8b0a5.ttf +0 -0
  190. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.e3f82f9d.woff +0 -0
  191. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.47373d1e.ttf +0 -0
  192. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.8916142b.woff2 +0 -0
  193. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.9024d815.woff +0 -0
  194. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.0462f03b.woff2 +0 -0
  195. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.7f51fe03.woff +0 -0
  196. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.b7f8fe9b.ttf +0 -0
  197. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.572d331f.woff2 +0 -0
  198. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.a879cf83.ttf +0 -0
  199. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.f1035d8d.woff +0 -0
  200. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.5295ba48.woff +0 -0
  201. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.939bc644.ttf +0 -0
  202. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.f28c23ac.woff2 +0 -0
  203. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.8c5b5494.woff2 +0 -0
  204. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.94e1e8dc.ttf +0 -0
  205. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.bf59d231.woff +0 -0
  206. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.3b1e59b3.woff2 +0 -0
  207. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.7c9bc82b.woff +0 -0
  208. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.b4c20c84.ttf +0 -0
  209. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.74048478.woff +0 -0
  210. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.ba21ed5f.woff2 +0 -0
  211. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.d4d7ba48.ttf +0 -0
  212. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.03e9641d.woff2 +0 -0
  213. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.07505710.woff +0 -0
  214. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.fe9cbbe1.ttf +0 -0
  215. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.e1e279cb.woff +0 -0
  216. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.eae34984.woff2 +0 -0
  217. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.fabc004a.ttf +0 -0
  218. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.57727022.woff +0 -0
  219. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.5916a24f.woff2 +0 -0
  220. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.d6b476ec.ttf +0 -0
  221. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.9acaf01c.woff +0 -0
  222. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.a144ef58.ttf +0 -0
  223. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.b4230e7e.woff2 +0 -0
  224. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.10d95fd3.woff2 +0 -0
  225. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.7a996c9d.woff +0 -0
  226. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.fbccdabe.ttf +0 -0
  227. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.6258592b.woff +0 -0
  228. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.a8709e36.woff2 +0 -0
  229. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.d97aaf4a.ttf +0 -0
  230. khoj/interface/compiled/_next/static/media/a6ecd16fa044d500-s.p.woff2 +0 -0
  231. khoj/interface/compiled/_next/static/media/bd82c78e5b7b3fe9-s.p.woff2 +0 -0
  232. khoj/interface/compiled/_next/static/media/c32c8052c071fc42-s.woff2 +0 -0
  233. khoj/interface/compiled/_next/static/media/c4250770ab8708b6-s.p.woff2 +0 -0
  234. khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
  235. khoj/interface/compiled/_next/static/media/flags.3afdda2f.webp +0 -0
  236. khoj/interface/compiled/_next/static/media/flags@2x.5fbe9fc1.webp +0 -0
  237. khoj/interface/compiled/_next/static/media/globe.98e105ca.webp +0 -0
  238. khoj/interface/compiled/_next/static/media/globe@2x.974df6f8.webp +0 -0
  239. khoj/interface/compiled/agents/index.html +1 -0
  240. khoj/interface/compiled/agents/index.txt +7 -0
  241. khoj/interface/compiled/agents.svg +6 -0
  242. khoj/interface/compiled/assets/icons/khoj_lantern.ico +0 -0
  243. khoj/interface/compiled/assets/icons/khoj_lantern.svg +100 -0
  244. khoj/interface/compiled/assets/icons/khoj_lantern_1200x1200.png +0 -0
  245. khoj/interface/compiled/assets/icons/khoj_lantern_128x128.png +0 -0
  246. khoj/interface/compiled/assets/icons/khoj_lantern_128x128_dark.png +0 -0
  247. khoj/interface/compiled/assets/icons/khoj_lantern_256x256.png +0 -0
  248. khoj/interface/compiled/assets/icons/khoj_lantern_512x512.png +0 -0
  249. khoj/interface/compiled/assets/icons/khoj_lantern_logomarktype_1200x630.png +0 -0
  250. khoj/interface/compiled/assets/samples/desktop-browse-draw-sample.png +0 -0
  251. khoj/interface/compiled/assets/samples/desktop-plain-chat-sample.png +0 -0
  252. khoj/interface/compiled/assets/samples/desktop-remember-plan-sample.png +0 -0
  253. khoj/interface/compiled/assets/samples/phone-browse-draw-sample.png +0 -0
  254. khoj/interface/compiled/assets/samples/phone-plain-chat-sample.png +0 -0
  255. khoj/interface/compiled/assets/samples/phone-remember-plan-sample.png +0 -0
  256. khoj/interface/compiled/automation.svg +37 -0
  257. khoj/interface/compiled/automations/index.html +1 -0
  258. khoj/interface/compiled/automations/index.txt +8 -0
  259. khoj/interface/compiled/chat/index.html +1 -0
  260. khoj/interface/compiled/chat/index.txt +7 -0
  261. khoj/interface/compiled/chat.svg +24 -0
  262. khoj/interface/compiled/close.svg +5 -0
  263. khoj/interface/compiled/copy-button-success.svg +6 -0
  264. khoj/interface/compiled/copy-button.svg +5 -0
  265. khoj/interface/compiled/index.html +1 -0
  266. khoj/interface/compiled/index.txt +7 -0
  267. khoj/interface/compiled/khoj.webmanifest +76 -0
  268. khoj/interface/compiled/logo.svg +24 -0
  269. khoj/interface/compiled/search/index.html +1 -0
  270. khoj/interface/compiled/search/index.txt +7 -0
  271. khoj/interface/compiled/send.svg +1 -0
  272. khoj/interface/compiled/settings/index.html +1 -0
  273. khoj/interface/compiled/settings/index.txt +9 -0
  274. khoj/interface/compiled/share/chat/index.html +1 -0
  275. khoj/interface/compiled/share/chat/index.txt +7 -0
  276. khoj/interface/compiled/share.svg +8 -0
  277. khoj/interface/compiled/thumbs-down.svg +6 -0
  278. khoj/interface/compiled/thumbs-up.svg +6 -0
  279. khoj/interface/email/feedback.html +34 -0
  280. khoj/interface/email/magic_link.html +40 -0
  281. khoj/interface/email/task.html +37 -0
  282. khoj/interface/email/welcome.html +90 -0
  283. khoj/interface/web/.well-known/assetlinks.json +11 -0
  284. khoj/interface/web/assets/icons/agents.svg +19 -0
  285. khoj/interface/web/assets/icons/automation.svg +43 -0
  286. khoj/interface/web/assets/icons/chat.svg +24 -0
  287. khoj/interface/web/assets/icons/github.svg +1 -0
  288. khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
  289. khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
  290. khoj/interface/web/assets/icons/khoj-logo-sideways.svg +32 -0
  291. khoj/interface/web/assets/icons/khoj.svg +26 -0
  292. khoj/interface/web/assets/icons/logotype.svg +1 -0
  293. khoj/interface/web/assets/icons/search.svg +57 -0
  294. khoj/interface/web/assets/icons/sync.svg +4 -0
  295. khoj/interface/web/assets/khoj.css +237 -0
  296. khoj/interface/web/assets/utils.js +33 -0
  297. khoj/interface/web/base_config.html +445 -0
  298. khoj/interface/web/content_source_github_input.html +208 -0
  299. khoj/interface/web/login.html +310 -0
  300. khoj/interface/web/utils.html +48 -0
  301. khoj/main.py +249 -0
  302. khoj/manage.py +22 -0
  303. khoj/migrations/__init__.py +0 -0
  304. khoj/migrations/migrate_offline_chat_default_model.py +69 -0
  305. khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
  306. khoj/migrations/migrate_offline_chat_schema.py +83 -0
  307. khoj/migrations/migrate_offline_model.py +29 -0
  308. khoj/migrations/migrate_processor_config_openai.py +67 -0
  309. khoj/migrations/migrate_server_pg.py +132 -0
  310. khoj/migrations/migrate_version.py +17 -0
  311. khoj/processor/__init__.py +0 -0
  312. khoj/processor/content/__init__.py +0 -0
  313. khoj/processor/content/docx/__init__.py +0 -0
  314. khoj/processor/content/docx/docx_to_entries.py +111 -0
  315. khoj/processor/content/github/__init__.py +0 -0
  316. khoj/processor/content/github/github_to_entries.py +226 -0
  317. khoj/processor/content/images/__init__.py +0 -0
  318. khoj/processor/content/images/image_to_entries.py +117 -0
  319. khoj/processor/content/markdown/__init__.py +0 -0
  320. khoj/processor/content/markdown/markdown_to_entries.py +160 -0
  321. khoj/processor/content/notion/notion_to_entries.py +259 -0
  322. khoj/processor/content/org_mode/__init__.py +0 -0
  323. khoj/processor/content/org_mode/org_to_entries.py +226 -0
  324. khoj/processor/content/org_mode/orgnode.py +532 -0
  325. khoj/processor/content/pdf/__init__.py +0 -0
  326. khoj/processor/content/pdf/pdf_to_entries.py +119 -0
  327. khoj/processor/content/plaintext/__init__.py +0 -0
  328. khoj/processor/content/plaintext/plaintext_to_entries.py +117 -0
  329. khoj/processor/content/text_to_entries.py +296 -0
  330. khoj/processor/conversation/__init__.py +0 -0
  331. khoj/processor/conversation/anthropic/__init__.py +0 -0
  332. khoj/processor/conversation/anthropic/anthropic_chat.py +243 -0
  333. khoj/processor/conversation/anthropic/utils.py +217 -0
  334. khoj/processor/conversation/google/__init__.py +0 -0
  335. khoj/processor/conversation/google/gemini_chat.py +253 -0
  336. khoj/processor/conversation/google/utils.py +260 -0
  337. khoj/processor/conversation/offline/__init__.py +0 -0
  338. khoj/processor/conversation/offline/chat_model.py +308 -0
  339. khoj/processor/conversation/offline/utils.py +80 -0
  340. khoj/processor/conversation/offline/whisper.py +15 -0
  341. khoj/processor/conversation/openai/__init__.py +0 -0
  342. khoj/processor/conversation/openai/gpt.py +243 -0
  343. khoj/processor/conversation/openai/utils.py +232 -0
  344. khoj/processor/conversation/openai/whisper.py +13 -0
  345. khoj/processor/conversation/prompts.py +1188 -0
  346. khoj/processor/conversation/utils.py +867 -0
  347. khoj/processor/embeddings.py +122 -0
  348. khoj/processor/image/generate.py +215 -0
  349. khoj/processor/speech/__init__.py +0 -0
  350. khoj/processor/speech/text_to_speech.py +51 -0
  351. khoj/processor/tools/__init__.py +0 -0
  352. khoj/processor/tools/online_search.py +472 -0
  353. khoj/processor/tools/run_code.py +179 -0
  354. khoj/routers/__init__.py +0 -0
  355. khoj/routers/api.py +760 -0
  356. khoj/routers/api_agents.py +295 -0
  357. khoj/routers/api_chat.py +1273 -0
  358. khoj/routers/api_content.py +634 -0
  359. khoj/routers/api_model.py +123 -0
  360. khoj/routers/api_phone.py +86 -0
  361. khoj/routers/api_subscription.py +144 -0
  362. khoj/routers/auth.py +307 -0
  363. khoj/routers/email.py +135 -0
  364. khoj/routers/helpers.py +2333 -0
  365. khoj/routers/notion.py +85 -0
  366. khoj/routers/research.py +364 -0
  367. khoj/routers/storage.py +63 -0
  368. khoj/routers/twilio.py +36 -0
  369. khoj/routers/web_client.py +141 -0
  370. khoj/search_filter/__init__.py +0 -0
  371. khoj/search_filter/base_filter.py +15 -0
  372. khoj/search_filter/date_filter.py +215 -0
  373. khoj/search_filter/file_filter.py +32 -0
  374. khoj/search_filter/word_filter.py +29 -0
  375. khoj/search_type/__init__.py +0 -0
  376. khoj/search_type/text_search.py +255 -0
  377. khoj/utils/__init__.py +0 -0
  378. khoj/utils/cli.py +101 -0
  379. khoj/utils/config.py +81 -0
  380. khoj/utils/constants.py +51 -0
  381. khoj/utils/fs_syncer.py +252 -0
  382. khoj/utils/helpers.py +627 -0
  383. khoj/utils/initialization.py +301 -0
  384. khoj/utils/jsonl.py +43 -0
  385. khoj/utils/models.py +47 -0
  386. khoj/utils/rawconfig.py +208 -0
  387. khoj/utils/state.py +48 -0
  388. khoj/utils/yaml.py +47 -0
  389. khoj-1.33.3.dev32.dist-info/METADATA +190 -0
  390. khoj-1.33.3.dev32.dist-info/RECORD +393 -0
  391. khoj-1.33.3.dev32.dist-info/WHEEL +4 -0
  392. khoj-1.33.3.dev32.dist-info/entry_points.txt +2 -0
  393. khoj-1.33.3.dev32.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,301 @@
1
+ import logging
2
+ import os
3
+ from typing import Tuple
4
+
5
+ import openai
6
+
7
+ from khoj.database.adapters import ConversationAdapters
8
+ from khoj.database.models import (
9
+ AiModelApi,
10
+ ChatModel,
11
+ KhojUser,
12
+ SpeechToTextModelOptions,
13
+ TextToImageModelConfig,
14
+ )
15
+ from khoj.processor.conversation.utils import model_to_prompt_size, model_to_tokenizer
16
+ from khoj.utils.constants import (
17
+ default_anthropic_chat_models,
18
+ default_gemini_chat_models,
19
+ default_offline_chat_models,
20
+ default_openai_chat_models,
21
+ )
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ def initialization(interactive: bool = True):
27
+ def _create_admin_user():
28
+ logger.info(
29
+ "👩‍✈️ Setting up admin user. These credentials will allow you to configure your server at /server/admin."
30
+ )
31
+ if not interactive and (not os.getenv("KHOJ_ADMIN_EMAIL") or not os.getenv("KHOJ_ADMIN_PASSWORD")):
32
+ logger.error(
33
+ "🚨 Admin user cannot be created. Please set the KHOJ_ADMIN_EMAIL, KHOJ_ADMIN_PASSWORD environment variables or start server in interactive mode."
34
+ )
35
+ exit(1)
36
+ email_addr = os.getenv("KHOJ_ADMIN_EMAIL") or input("Email: ")
37
+ password = os.getenv("KHOJ_ADMIN_PASSWORD") or input("Password: ")
38
+ admin_user = KhojUser.objects.create_superuser(email=email_addr, username=email_addr, password=password)
39
+ logger.info(f"👩‍✈️ Created admin user: {admin_user.email}")
40
+
41
+ def _create_chat_configuration():
42
+ logger.info(
43
+ "🗣️ Configure chat models available to your server. You can always update these at /server/admin using your admin account"
44
+ )
45
+
46
+ openai_api_base = os.getenv("OPENAI_API_BASE")
47
+ provider = "Ollama" if openai_api_base and openai_api_base.endswith(":11434/v1/") else "OpenAI"
48
+ openai_api_key = os.getenv("OPENAI_API_KEY", "placeholder" if openai_api_base else None)
49
+ default_chat_models = default_openai_chat_models
50
+ if openai_api_base:
51
+ # Get available chat models from OpenAI compatible API
52
+ try:
53
+ openai_client = openai.OpenAI(api_key=openai_api_key, base_url=openai_api_base)
54
+ default_chat_models = [model.id for model in openai_client.models.list()]
55
+ # Put the available default OpenAI models at the top
56
+ valid_default_models = [model for model in default_openai_chat_models if model in default_chat_models]
57
+ other_available_models = [model for model in default_chat_models if model not in valid_default_models]
58
+ default_chat_models = valid_default_models + other_available_models
59
+ except Exception as e:
60
+ logger.warning(
61
+ f"⚠️ Failed to fetch {provider} chat models. Fallback to default models. Error: {str(e)}"
62
+ )
63
+
64
+ # Set up OpenAI's online chat models
65
+ openai_configured, openai_provider = _setup_chat_model_provider(
66
+ ChatModel.ModelType.OPENAI,
67
+ default_chat_models,
68
+ default_api_key=openai_api_key,
69
+ api_base_url=openai_api_base,
70
+ vision_enabled=True,
71
+ is_offline=False,
72
+ interactive=interactive,
73
+ provider_name=provider,
74
+ )
75
+
76
+ # Setup OpenAI speech to text model
77
+ if openai_configured:
78
+ default_speech2text_model = "whisper-1"
79
+ if interactive:
80
+ openai_speech2text_model = input(
81
+ f"Enter the OpenAI speech to text model you want to use (default: {default_speech2text_model}): "
82
+ )
83
+ openai_speech2text_model = openai_speech2text_model or default_speech2text_model
84
+ else:
85
+ openai_speech2text_model = default_speech2text_model
86
+ SpeechToTextModelOptions.objects.create(
87
+ model_name=openai_speech2text_model, model_type=SpeechToTextModelOptions.ModelType.OPENAI
88
+ )
89
+
90
+ # Setup OpenAI text to image model
91
+ if openai_configured:
92
+ default_text_to_image_model = "dall-e-3"
93
+ if interactive:
94
+ openai_text_to_image_model = input(
95
+ f"Enter the OpenAI text to image model you want to use (default: {default_text_to_image_model}): "
96
+ )
97
+ openai_text_to_image_model = openai_text_to_image_model or default_text_to_image_model
98
+ else:
99
+ openai_text_to_image_model = default_text_to_image_model
100
+ TextToImageModelConfig.objects.create(
101
+ model_name=openai_text_to_image_model,
102
+ model_type=TextToImageModelConfig.ModelType.OPENAI,
103
+ ai_model_api=openai_provider,
104
+ )
105
+
106
+ # Set up Google's Gemini online chat models
107
+ _setup_chat_model_provider(
108
+ ChatModel.ModelType.GOOGLE,
109
+ default_gemini_chat_models,
110
+ default_api_key=os.getenv("GEMINI_API_KEY"),
111
+ vision_enabled=True,
112
+ is_offline=False,
113
+ interactive=interactive,
114
+ provider_name="Google Gemini",
115
+ )
116
+
117
+ # Set up Anthropic's online chat models
118
+ _setup_chat_model_provider(
119
+ ChatModel.ModelType.ANTHROPIC,
120
+ default_anthropic_chat_models,
121
+ default_api_key=os.getenv("ANTHROPIC_API_KEY"),
122
+ vision_enabled=True,
123
+ is_offline=False,
124
+ interactive=interactive,
125
+ )
126
+
127
+ # Set up offline chat models
128
+ _setup_chat_model_provider(
129
+ ChatModel.ModelType.OFFLINE,
130
+ default_offline_chat_models,
131
+ default_api_key=None,
132
+ vision_enabled=False,
133
+ is_offline=True,
134
+ interactive=interactive,
135
+ )
136
+
137
+ # Explicitly set default chat model
138
+ chat_models_configured = ChatModel.objects.count()
139
+ if chat_models_configured > 0:
140
+ default_chat_model_name = ChatModel.objects.first().name
141
+ # If there are multiple chat models, ask the user to choose the default chat model
142
+ if chat_models_configured > 1 and interactive:
143
+ user_chat_model_name = input(
144
+ f"Enter the default chat model to use (default: {default_chat_model_name}): "
145
+ )
146
+ else:
147
+ user_chat_model_name = None
148
+
149
+ # If the user's choice is valid, set it as the default chat model
150
+ if user_chat_model_name and ChatModel.objects.filter(name=user_chat_model_name).exists():
151
+ default_chat_model_name = user_chat_model_name
152
+
153
+ logger.info("🗣️ Chat model configuration complete")
154
+
155
+ # Set up offline speech to text model
156
+ use_offline_speech2text_model = "n" if not interactive else input("Use offline speech to text model? (y/n): ")
157
+ if use_offline_speech2text_model == "y":
158
+ logger.info("🗣️ Setting up offline speech to text model")
159
+ # Delete any existing speech to text model options. There can only be one.
160
+ SpeechToTextModelOptions.objects.all().delete()
161
+
162
+ default_offline_speech2text_model = "base"
163
+ offline_speech2text_model = input(
164
+ f"Enter the Whisper model to use Offline (default: {default_offline_speech2text_model}): "
165
+ )
166
+ offline_speech2text_model = offline_speech2text_model or default_offline_speech2text_model
167
+ SpeechToTextModelOptions.objects.create(
168
+ model_name=offline_speech2text_model, model_type=SpeechToTextModelOptions.ModelType.OFFLINE
169
+ )
170
+
171
+ logger.info(f"🗣️ Offline speech to text model configured to {offline_speech2text_model}")
172
+
173
+ def _setup_chat_model_provider(
174
+ model_type: ChatModel.ModelType,
175
+ default_chat_models: list,
176
+ default_api_key: str,
177
+ interactive: bool,
178
+ api_base_url: str = None,
179
+ vision_enabled: bool = False,
180
+ is_offline: bool = False,
181
+ provider_name: str = None,
182
+ ) -> Tuple[bool, AiModelApi]:
183
+ supported_vision_models = (
184
+ default_openai_chat_models + default_anthropic_chat_models + default_gemini_chat_models
185
+ )
186
+ provider_name = provider_name or model_type.name.capitalize()
187
+ default_use_model = {True: "y", False: "n"}[default_api_key is not None or is_offline]
188
+ use_model_provider = (
189
+ default_use_model if not interactive else input(f"Add {provider_name} chat models? (y/n): ")
190
+ )
191
+
192
+ if use_model_provider != "y":
193
+ return False, None
194
+
195
+ logger.info(f"️💬 Setting up your {provider_name} chat configuration")
196
+
197
+ ai_model_api = None
198
+ if not is_offline:
199
+ if interactive:
200
+ user_api_key = input(f"Enter your {provider_name} API key (default: {default_api_key}): ")
201
+ api_key = user_api_key if user_api_key != "" else default_api_key
202
+ else:
203
+ api_key = default_api_key
204
+ ai_model_api = AiModelApi.objects.create(api_key=api_key, name=provider_name, api_base_url=api_base_url)
205
+
206
+ if interactive:
207
+ user_chat_models = input(
208
+ f"Enter the {provider_name} chat models you want to use (default: {','.join(default_chat_models)}): "
209
+ )
210
+ chat_models = user_chat_models.split(",") if user_chat_models != "" else default_chat_models
211
+ chat_models = [model.strip() for model in chat_models]
212
+ else:
213
+ chat_models = default_chat_models
214
+
215
+ for chat_model in chat_models:
216
+ default_max_tokens = model_to_prompt_size.get(chat_model)
217
+ default_tokenizer = model_to_tokenizer.get(chat_model)
218
+ vision_enabled = vision_enabled and chat_model in supported_vision_models
219
+
220
+ chat_model_options = {
221
+ "name": chat_model,
222
+ "model_type": model_type,
223
+ "max_prompt_size": default_max_tokens,
224
+ "vision_enabled": vision_enabled,
225
+ "tokenizer": default_tokenizer,
226
+ "ai_model_api": ai_model_api,
227
+ }
228
+
229
+ ChatModel.objects.create(**chat_model_options)
230
+
231
+ logger.info(f"🗣️ {provider_name} chat model configuration complete")
232
+ return True, ai_model_api
233
+
234
+ def _update_chat_model_options():
235
+ """Update available chat models for OpenAI-compatible APIs"""
236
+ try:
237
+ # Get OpenAI configs with custom base URLs
238
+ custom_configs = AiModelApi.objects.exclude(api_base_url__isnull=True)
239
+
240
+ # Only enable for whitelisted provider names (i.e Ollama) for now
241
+ # TODO: This is hacky. Will be replaced with more robust solution based on provider type enum
242
+ custom_configs = custom_configs.filter(name__in=["Ollama"])
243
+
244
+ for config in custom_configs:
245
+ try:
246
+ # Create OpenAI client with custom base URL
247
+ openai_client = openai.OpenAI(api_key=config.api_key, base_url=config.api_base_url)
248
+
249
+ # Get available models
250
+ available_models = [model.id for model in openai_client.models.list()]
251
+
252
+ # Get existing chat model options for this config
253
+ existing_models = ChatModel.objects.filter(
254
+ ai_model_api=config, model_type=ChatModel.ModelType.OPENAI
255
+ )
256
+
257
+ # Add new models
258
+ for model_name in available_models:
259
+ if not existing_models.filter(name=model_name).exists():
260
+ ChatModel.objects.create(
261
+ name=model_name,
262
+ model_type=ChatModel.ModelType.OPENAI,
263
+ max_prompt_size=model_to_prompt_size.get(model_name),
264
+ vision_enabled=model_name in default_openai_chat_models,
265
+ tokenizer=model_to_tokenizer.get(model_name),
266
+ ai_model_api=config,
267
+ )
268
+
269
+ # Remove models that are no longer available
270
+ existing_models.exclude(name__in=available_models).delete()
271
+
272
+ except Exception as e:
273
+ logger.warning(f"Failed to update models for {config.name}: {str(e)}")
274
+
275
+ except Exception as e:
276
+ logger.error(f"Failed to update chat model options: {str(e)}")
277
+
278
+ admin_user = KhojUser.objects.filter(is_staff=True).first()
279
+ if admin_user is None:
280
+ while True:
281
+ try:
282
+ _create_admin_user()
283
+ break
284
+ except Exception as e:
285
+ logger.error(f"🚨 Failed to create admin user: {e}", exc_info=True)
286
+
287
+ chat_config = ConversationAdapters.get_default_chat_model()
288
+ if admin_user is None and chat_config is None:
289
+ while True:
290
+ try:
291
+ _create_chat_configuration()
292
+ break
293
+ # Some environments don't support interactive input. We catch the exception and return if that's the case.
294
+ # The admin can still configure their settings from the admin page.
295
+ except EOFError:
296
+ return
297
+ except Exception as e:
298
+ logger.error(f"🚨 Failed to create chat configuration: {e}", exc_info=True)
299
+ else:
300
+ _update_chat_model_options()
301
+ logger.info("🗣️ Chat model configuration updated")
khoj/utils/jsonl.py ADDED
@@ -0,0 +1,43 @@
1
+ import gzip
2
+ import json
3
+ import logging
4
+
5
+ from khoj.utils.constants import empty_escape_sequences
6
+ from khoj.utils.helpers import get_absolute_path
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ def load_jsonl(input_path):
12
+ "Read List of JSON objects from JSON line file"
13
+ # Initialize Variables
14
+ data = []
15
+ jsonl_file = None
16
+
17
+ # Open JSONL file
18
+ if input_path.suffix == ".gz":
19
+ jsonl_file = gzip.open(get_absolute_path(input_path), "rt", encoding="utf-8")
20
+ else:
21
+ jsonl_file = open(get_absolute_path(input_path), "r", encoding="utf-8")
22
+
23
+ # Read JSONL file
24
+ for line in jsonl_file:
25
+ data.append(json.loads(line.strip(empty_escape_sequences)))
26
+
27
+ # Close JSONL file
28
+ jsonl_file.close()
29
+
30
+ # Log JSONL entries loaded
31
+ logger.debug(f"Loaded {len(data)} records from {input_path}")
32
+
33
+ return data
34
+
35
+
36
+ def compress_jsonl_data(jsonl_data, output_path):
37
+ # Create output directory, if it doesn't exist
38
+ output_path.parent.mkdir(parents=True, exist_ok=True)
39
+
40
+ with gzip.open(output_path, "wt", encoding="utf-8") as gzip_file:
41
+ gzip_file.write(jsonl_data)
42
+
43
+ logger.debug(f"Wrote jsonl data to gzip compressed jsonl at {output_path}")
khoj/utils/models.py ADDED
@@ -0,0 +1,47 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import List
3
+
4
+ import openai
5
+ import torch
6
+ from tqdm import trange
7
+
8
+
9
+ class BaseEncoder(ABC):
10
+ @abstractmethod
11
+ def __init__(self, model_name: str, device: torch.device = None, **kwargs):
12
+ ...
13
+
14
+ @abstractmethod
15
+ def encode(self, entries: List[str], device: torch.device = None, **kwargs) -> torch.Tensor:
16
+ ...
17
+
18
+
19
+ class OpenAI(BaseEncoder):
20
+ def __init__(self, model_name, client: openai.OpenAI, device=None):
21
+ self.model_name = model_name
22
+ self.openai_client = client
23
+ self.embedding_dimensions = None
24
+
25
+ def encode(self, entries, device=None, **kwargs):
26
+ embedding_tensors = []
27
+
28
+ for index in trange(0, len(entries)):
29
+ # OpenAI models create better embeddings for entries without newlines
30
+ processed_entry = entries[index].replace("\n", " ")
31
+
32
+ try:
33
+ response = self.openai_client.embeddings.create(input=processed_entry, model=self.model_name)
34
+ embedding_tensors += [torch.tensor(response.data[0].embedding, device=device)]
35
+ # Use current models embedding dimension, once available
36
+ # Else default to embedding dimensions of the text-embedding-ada-002 model
37
+ self.embedding_dimensions = len(response.data[0].embedding) if not self.embedding_dimensions else 1536
38
+ except Exception as e:
39
+ print(
40
+ f"Failed to encode entry {index} of length: {len(entries[index])}\n\n{entries[index][:1000]}...\n\n{e}"
41
+ )
42
+ # Use zero embedding vector for entries with failed embeddings
43
+ # This ensures entry embeddings match the order of the source entries
44
+ # And they have minimal similarity to other entries (as zero vectors are always orthogonal to other vector)
45
+ embedding_tensors += [torch.zeros(self.embedding_dimensions, device=device)]
46
+
47
+ return torch.stack(embedding_tensors)
@@ -0,0 +1,208 @@
1
+ # System Packages
2
+ import json
3
+ import uuid
4
+ from pathlib import Path
5
+ from typing import Dict, List, Optional
6
+
7
+ from pydantic import BaseModel
8
+
9
+ from khoj.utils.helpers import to_snake_case_from_dash
10
+
11
+
12
+ class ConfigBase(BaseModel):
13
+ class Config:
14
+ alias_generator = to_snake_case_from_dash
15
+ populate_by_name = True
16
+
17
+ def __getitem__(self, item):
18
+ return getattr(self, item)
19
+
20
+ def __setitem__(self, key, value):
21
+ return setattr(self, key, value)
22
+
23
+
24
+ class LocationData(BaseModel):
25
+ city: Optional[str]
26
+ region: Optional[str]
27
+ country: Optional[str]
28
+ country_code: Optional[str]
29
+
30
+ def __str__(self):
31
+ parts = []
32
+ if self.city:
33
+ parts.append(self.city)
34
+ if self.region:
35
+ parts.append(self.region)
36
+ if self.country:
37
+ parts.append(self.country)
38
+ return ", ".join(parts)
39
+
40
+
41
+ class FileFilterRequest(BaseModel):
42
+ filename: str
43
+ conversation_id: str
44
+
45
+
46
+ class FilesFilterRequest(BaseModel):
47
+ filenames: List[str]
48
+ conversation_id: str
49
+
50
+
51
+ class TextConfigBase(ConfigBase):
52
+ compressed_jsonl: Path
53
+ embeddings_file: Path
54
+
55
+
56
+ class TextContentConfig(ConfigBase):
57
+ input_files: Optional[List[Path]] = None
58
+ input_filter: Optional[List[str]] = None
59
+ index_heading_entries: Optional[bool] = False
60
+
61
+
62
+ class GithubRepoConfig(ConfigBase):
63
+ name: str
64
+ owner: str
65
+ branch: Optional[str] = "master"
66
+
67
+
68
+ class GithubContentConfig(ConfigBase):
69
+ pat_token: Optional[str] = None
70
+ repos: List[GithubRepoConfig]
71
+
72
+
73
+ class NotionContentConfig(ConfigBase):
74
+ token: str
75
+
76
+
77
+ class ContentConfig(ConfigBase):
78
+ org: Optional[TextContentConfig] = None
79
+ markdown: Optional[TextContentConfig] = None
80
+ pdf: Optional[TextContentConfig] = None
81
+ plaintext: Optional[TextContentConfig] = None
82
+ github: Optional[GithubContentConfig] = None
83
+ notion: Optional[NotionContentConfig] = None
84
+ image: Optional[TextContentConfig] = None
85
+ docx: Optional[TextContentConfig] = None
86
+
87
+
88
+ class ImageSearchConfig(ConfigBase):
89
+ encoder: str
90
+ encoder_type: Optional[str] = None
91
+ model_directory: Optional[Path] = None
92
+
93
+ class Config:
94
+ protected_namespaces = ()
95
+
96
+
97
+ class SearchConfig(ConfigBase):
98
+ image: Optional[ImageSearchConfig] = None
99
+
100
+
101
+ class OpenAIProcessorConfig(ConfigBase):
102
+ api_key: str
103
+ chat_model: Optional[str] = "gpt-4o-mini"
104
+
105
+
106
+ class OfflineChatProcessorConfig(ConfigBase):
107
+ chat_model: Optional[str] = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
108
+
109
+
110
+ class ConversationProcessorConfig(ConfigBase):
111
+ openai: Optional[OpenAIProcessorConfig] = None
112
+ offline_chat: Optional[OfflineChatProcessorConfig] = None
113
+ max_prompt_size: Optional[int] = None
114
+ tokenizer: Optional[str] = None
115
+
116
+
117
+ class ProcessorConfig(ConfigBase):
118
+ conversation: Optional[ConversationProcessorConfig] = None
119
+
120
+
121
+ class AppConfig(ConfigBase):
122
+ should_log_telemetry: bool = True
123
+
124
+
125
+ class FullConfig(ConfigBase):
126
+ content_type: Optional[ContentConfig] = None
127
+ search_type: Optional[SearchConfig] = None
128
+ processor: Optional[ProcessorConfig] = None
129
+ app: Optional[AppConfig] = AppConfig()
130
+ version: Optional[str] = None
131
+
132
+
133
+ class SearchResponse(ConfigBase):
134
+ entry: str
135
+ score: float
136
+ cross_score: Optional[float] = None
137
+ additional: Optional[dict] = None
138
+ corpus_id: str
139
+
140
+
141
+ class FileData(BaseModel):
142
+ name: str
143
+ content: bytes
144
+ file_type: str
145
+ encoding: str | None = None
146
+
147
+
148
+ class FileAttachment(BaseModel):
149
+ name: str
150
+ content: str
151
+ file_type: str
152
+ size: int
153
+
154
+
155
+ class ChatRequestBody(BaseModel):
156
+ q: str
157
+ n: Optional[int] = 7
158
+ d: Optional[float] = None
159
+ stream: Optional[bool] = False
160
+ title: Optional[str] = None
161
+ conversation_id: Optional[str] = None
162
+ turn_id: Optional[str] = None
163
+ city: Optional[str] = None
164
+ region: Optional[str] = None
165
+ country: Optional[str] = None
166
+ country_code: Optional[str] = None
167
+ timezone: Optional[str] = None
168
+ images: Optional[list[str]] = None
169
+ files: Optional[list[FileAttachment]] = []
170
+ create_new: Optional[bool] = False
171
+
172
+
173
+ class Entry:
174
+ raw: str
175
+ compiled: str
176
+ heading: Optional[str]
177
+ file: Optional[str]
178
+ corpus_id: str
179
+
180
+ def __init__(
181
+ self,
182
+ raw: str = None,
183
+ compiled: str = None,
184
+ heading: Optional[str] = None,
185
+ file: Optional[str] = None,
186
+ corpus_id: uuid.UUID = None,
187
+ ):
188
+ self.raw = raw
189
+ self.compiled = compiled
190
+ self.heading = heading
191
+ self.file = file
192
+ self.corpus_id = str(corpus_id)
193
+
194
+ def to_json(self) -> str:
195
+ return json.dumps(self.__dict__, ensure_ascii=False)
196
+
197
+ def __repr__(self) -> str:
198
+ return self.__dict__.__repr__()
199
+
200
+ @classmethod
201
+ def from_dict(cls, dictionary: dict):
202
+ return cls(
203
+ raw=dictionary["raw"],
204
+ compiled=dictionary["compiled"],
205
+ file=dictionary.get("file", None),
206
+ heading=dictionary.get("heading", None),
207
+ corpus_id=dictionary.get("corpus_id", None),
208
+ )
khoj/utils/state.py ADDED
@@ -0,0 +1,48 @@
1
+ import os
2
+ import threading
3
+ from collections import defaultdict
4
+ from pathlib import Path
5
+ from typing import Any, Dict, List
6
+
7
+ from apscheduler.schedulers.background import BackgroundScheduler
8
+ from openai import OpenAI
9
+ from whisper import Whisper
10
+
11
+ from khoj.database.models import ProcessLock
12
+ from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel
13
+ from khoj.utils import config as utils_config
14
+ from khoj.utils.config import OfflineChatProcessorModel, SearchModels
15
+ from khoj.utils.helpers import LRU, get_device, is_env_var_true
16
+ from khoj.utils.rawconfig import FullConfig
17
+
18
+ # Application Global State
19
+ config = FullConfig()
20
+ search_models = SearchModels()
21
+ embeddings_model: Dict[str, EmbeddingsModel] = None
22
+ cross_encoder_model: Dict[str, CrossEncoderModel] = None
23
+ openai_client: OpenAI = None
24
+ offline_chat_processor_config: OfflineChatProcessorModel = None
25
+ whisper_model: Whisper = None
26
+ config_file: Path = None
27
+ verbose: int = 0
28
+ host: str = None
29
+ port: int = None
30
+ ssl_config: Dict[str, str] = None
31
+ cli_args: List[str] = None
32
+ query_cache: Dict[str, LRU] = defaultdict(LRU)
33
+ chat_lock = threading.Lock()
34
+ SearchType = utils_config.SearchType
35
+ scheduler: BackgroundScheduler = None
36
+ schedule_leader_process_lock: ProcessLock = None
37
+ telemetry: List[Dict[str, str]] = []
38
+ telemetry_disabled: bool = is_env_var_true("KHOJ_TELEMETRY_DISABLE")
39
+ khoj_version: str = None
40
+ device = get_device()
41
+ chat_on_gpu: bool = True
42
+ anonymous_mode: bool = False
43
+ pretrained_tokenizers: Dict[str, Any] = dict()
44
+ billing_enabled: bool = (
45
+ os.getenv("STRIPE_API_KEY") is not None
46
+ and os.getenv("STRIPE_SIGNING_SECRET") is not None
47
+ and os.getenv("KHOJ_CLOUD_SUBSCRIPTION_URL") is not None
48
+ )
khoj/utils/yaml.py ADDED
@@ -0,0 +1,47 @@
1
+ from pathlib import Path
2
+
3
+ import yaml
4
+
5
+ from khoj.utils import state
6
+ from khoj.utils.rawconfig import FullConfig
7
+
8
+ # Do not emit tags when dumping to YAML
9
+ yaml.emitter.Emitter.process_tag = lambda self, *args, **kwargs: None # type: ignore[assignment]
10
+
11
+
12
+ def save_config_to_file_updated_state():
13
+ with open(state.config_file, "w") as outfile:
14
+ yaml.dump(yaml.safe_load(state.config.json(by_alias=True)), outfile)
15
+ outfile.close()
16
+ return state.config
17
+
18
+
19
+ def save_config_to_file(yaml_config: dict, yaml_config_file: Path):
20
+ "Write config to YML file"
21
+ # Create output directory, if it doesn't exist
22
+ yaml_config_file.parent.mkdir(parents=True, exist_ok=True)
23
+
24
+ with open(yaml_config_file, "w", encoding="utf-8") as config_file:
25
+ yaml.safe_dump(yaml_config, config_file, allow_unicode=True)
26
+
27
+
28
+ def load_config_from_file(yaml_config_file: Path) -> dict:
29
+ "Read config from YML file"
30
+ config_from_file = None
31
+ with open(yaml_config_file, "r", encoding="utf-8") as config_file:
32
+ config_from_file = yaml.safe_load(config_file)
33
+ return config_from_file
34
+
35
+
36
+ def parse_config_from_string(yaml_config: dict) -> FullConfig:
37
+ "Parse and validate config in YML string"
38
+ return FullConfig.model_validate(yaml_config)
39
+
40
+
41
+ def parse_config_from_file(yaml_config_file):
42
+ "Parse and validate config in YML file"
43
+ return parse_config_from_string(load_config_from_file(yaml_config_file))
44
+
45
+
46
+ def yaml_dump(data):
47
+ return yaml.dump(data, allow_unicode=True, sort_keys=False, default_flow_style=False)