khoj 1.33.3.dev32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. khoj/__init__.py +0 -0
  2. khoj/app/README.md +94 -0
  3. khoj/app/__init__.py +0 -0
  4. khoj/app/asgi.py +16 -0
  5. khoj/app/settings.py +218 -0
  6. khoj/app/urls.py +25 -0
  7. khoj/configure.py +452 -0
  8. khoj/database/__init__.py +0 -0
  9. khoj/database/adapters/__init__.py +1821 -0
  10. khoj/database/admin.py +417 -0
  11. khoj/database/apps.py +6 -0
  12. khoj/database/management/__init__.py +0 -0
  13. khoj/database/management/commands/__init__.py +0 -0
  14. khoj/database/management/commands/change_default_model.py +116 -0
  15. khoj/database/management/commands/change_generated_images_url.py +61 -0
  16. khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
  17. khoj/database/migrations/0001_khojuser.py +98 -0
  18. khoj/database/migrations/0002_googleuser.py +32 -0
  19. khoj/database/migrations/0003_vector_extension.py +10 -0
  20. khoj/database/migrations/0004_content_types_and_more.py +181 -0
  21. khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
  22. khoj/database/migrations/0006_embeddingsdates.py +33 -0
  23. khoj/database/migrations/0007_add_conversation.py +27 -0
  24. khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
  25. khoj/database/migrations/0009_khojapiuser.py +24 -0
  26. khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
  27. khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
  28. khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
  29. khoj/database/migrations/0012_entry_file_source.py +21 -0
  30. khoj/database/migrations/0013_subscription.py +37 -0
  31. khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
  32. khoj/database/migrations/0015_alter_subscription_user.py +21 -0
  33. khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
  34. khoj/database/migrations/0017_searchmodel.py +32 -0
  35. khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
  36. khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
  37. khoj/database/migrations/0020_reflectivequestion.py +36 -0
  38. khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
  39. khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
  40. khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
  41. khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
  42. khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
  43. khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
  44. khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
  45. khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
  46. khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
  47. khoj/database/migrations/0029_userrequests.py +27 -0
  48. khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
  49. khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
  50. khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
  51. khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
  52. khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
  53. khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
  54. khoj/database/migrations/0035_processlock.py +26 -0
  55. khoj/database/migrations/0036_alter_processlock_name.py +19 -0
  56. khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
  57. khoj/database/migrations/0036_publicconversation.py +42 -0
  58. khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
  59. khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
  60. khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
  61. khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
  62. khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
  63. khoj/database/migrations/0040_alter_processlock_name.py +26 -0
  64. khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
  65. khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
  66. khoj/database/migrations/0042_serverchatsettings.py +46 -0
  67. khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
  68. khoj/database/migrations/0044_conversation_file_filters.py +17 -0
  69. khoj/database/migrations/0045_fileobject.py +37 -0
  70. khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
  71. khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
  72. khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
  73. khoj/database/migrations/0049_datastore.py +38 -0
  74. khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
  75. khoj/database/migrations/0050_alter_processlock_name.py +25 -0
  76. khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
  77. khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
  78. khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
  79. khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
  80. khoj/database/migrations/0055_alter_agent_style_icon.py +37 -0
  81. khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
  82. khoj/database/migrations/0056_searchmodelconfig_cross_encoder_model_config.py +17 -0
  83. khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
  84. khoj/database/migrations/0057_remove_serverchatsettings_default_model_and_more.py +51 -0
  85. khoj/database/migrations/0058_alter_chatmodeloptions_chat_model.py +17 -0
  86. khoj/database/migrations/0059_searchmodelconfig_bi_encoder_confidence_threshold.py +17 -0
  87. khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
  88. khoj/database/migrations/0061_alter_chatmodeloptions_model_type.py +26 -0
  89. khoj/database/migrations/0061_alter_texttoimagemodelconfig_model_type.py +21 -0
  90. khoj/database/migrations/0062_merge_20240913_0222.py +14 -0
  91. khoj/database/migrations/0063_conversation_temp_id.py +36 -0
  92. khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +86 -0
  93. khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
  94. khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
  95. khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
  96. khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
  97. khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
  98. khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
  99. khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
  100. khoj/database/migrations/0072_entry_search_model.py +24 -0
  101. khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
  102. khoj/database/migrations/0074_alter_conversation_title.py +17 -0
  103. khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +85 -0
  104. khoj/database/migrations/0076_rename_openaiprocessorconversationconfig_aimodelapi_and_more.py +26 -0
  105. khoj/database/migrations/0077_chatmodel_alter_agent_chat_model_and_more.py +62 -0
  106. khoj/database/migrations/0078_khojuser_email_verification_code_expiry.py +17 -0
  107. khoj/database/migrations/__init__.py +0 -0
  108. khoj/database/models/__init__.py +725 -0
  109. khoj/database/tests.py +3 -0
  110. khoj/interface/compiled/404/index.html +1 -0
  111. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_buildManifest.js +1 -0
  112. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_ssgManifest.js +1 -0
  113. khoj/interface/compiled/_next/static/chunks/1010-8f39bb4648b5ba10.js +1 -0
  114. khoj/interface/compiled/_next/static/chunks/182-f1c48a203dc91e0e.js +20 -0
  115. khoj/interface/compiled/_next/static/chunks/1915-d3c36ad6ce697ce7.js +1 -0
  116. khoj/interface/compiled/_next/static/chunks/2117-165ef4747a5b836b.js +2 -0
  117. khoj/interface/compiled/_next/static/chunks/2581-455000f8aeb08fc3.js +1 -0
  118. khoj/interface/compiled/_next/static/chunks/3727.dcea8f2193111552.js +1 -0
  119. khoj/interface/compiled/_next/static/chunks/3789-a09e37a819171a9d.js +1 -0
  120. khoj/interface/compiled/_next/static/chunks/4124-6c28322ce218d2d5.js +1 -0
  121. khoj/interface/compiled/_next/static/chunks/5427-b52d95253e692bfa.js +1 -0
  122. khoj/interface/compiled/_next/static/chunks/5473-b1cf56dedac6577a.js +1 -0
  123. khoj/interface/compiled/_next/static/chunks/5477-0bbddb79c25a54a7.js +1 -0
  124. khoj/interface/compiled/_next/static/chunks/6065-64db9ad305ba0bcd.js +1 -0
  125. khoj/interface/compiled/_next/static/chunks/6293-469dd16402ea8a6f.js +3 -0
  126. khoj/interface/compiled/_next/static/chunks/688-b5b4391bbc0376f1.js +1 -0
  127. khoj/interface/compiled/_next/static/chunks/8667-b6bf63c72b2d76eb.js +1 -0
  128. khoj/interface/compiled/_next/static/chunks/9259-1172dbaca0515237.js +1 -0
  129. khoj/interface/compiled/_next/static/chunks/94ca1967.1d9b42d929a1ee8c.js +1 -0
  130. khoj/interface/compiled/_next/static/chunks/9597.83583248dfbf6e73.js +1 -0
  131. khoj/interface/compiled/_next/static/chunks/964ecbae.51d6faf8801d15e6.js +1 -0
  132. khoj/interface/compiled/_next/static/chunks/9665-391df1e5c51c960a.js +1 -0
  133. khoj/interface/compiled/_next/static/chunks/app/_not-found/page-a834eddae3e235df.js +1 -0
  134. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
  135. khoj/interface/compiled/_next/static/chunks/app/agents/page-28ce086a1129bca2.js +1 -0
  136. khoj/interface/compiled/_next/static/chunks/app/automations/layout-1fe1537449f43496.js +1 -0
  137. khoj/interface/compiled/_next/static/chunks/app/automations/page-bf365a60829d347f.js +1 -0
  138. khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
  139. khoj/interface/compiled/_next/static/chunks/app/chat/page-0e476e57eb2015e3.js +1 -0
  140. khoj/interface/compiled/_next/static/chunks/app/layout-30e7fda7262713ce.js +1 -0
  141. khoj/interface/compiled/_next/static/chunks/app/page-a5515ea71aec5ef0.js +1 -0
  142. khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
  143. khoj/interface/compiled/_next/static/chunks/app/search/page-9140541e67ea307d.js +1 -0
  144. khoj/interface/compiled/_next/static/chunks/app/settings/layout-d09d6510a45cd4bd.js +1 -0
  145. khoj/interface/compiled/_next/static/chunks/app/settings/page-951ba40b5b94b23a.js +1 -0
  146. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
  147. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1beb80d8d741c932.js +1 -0
  148. khoj/interface/compiled/_next/static/chunks/d3ac728e-44ebd2a0c99b12a0.js +1 -0
  149. khoj/interface/compiled/_next/static/chunks/fd9d1056-4482b99a36fd1673.js +1 -0
  150. khoj/interface/compiled/_next/static/chunks/framework-8e0e0f4a6b83a956.js +1 -0
  151. khoj/interface/compiled/_next/static/chunks/main-app-de1f09df97a3cfc7.js +1 -0
  152. khoj/interface/compiled/_next/static/chunks/main-db4bfac6b0a8d00b.js +1 -0
  153. khoj/interface/compiled/_next/static/chunks/pages/_app-3c9ca398d360b709.js +1 -0
  154. khoj/interface/compiled/_next/static/chunks/pages/_error-cf5ca766ac8f493f.js +1 -0
  155. khoj/interface/compiled/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
  156. khoj/interface/compiled/_next/static/chunks/webpack-a03962458328b163.js +1 -0
  157. khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +1 -0
  158. khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
  159. khoj/interface/compiled/_next/static/css/4e4e6a4a1c920d06.css +1 -0
  160. khoj/interface/compiled/_next/static/css/8d02837c730f8d13.css +25 -0
  161. khoj/interface/compiled/_next/static/css/8e6a3ca11a60b189.css +1 -0
  162. khoj/interface/compiled/_next/static/css/9c164d9727dd8092.css +1 -0
  163. khoj/interface/compiled/_next/static/css/dac88c17aaee5fcf.css +1 -0
  164. khoj/interface/compiled/_next/static/css/df4b47a2d0d85eae.css +1 -0
  165. khoj/interface/compiled/_next/static/css/e4eb883b5265d372.css +1 -0
  166. khoj/interface/compiled/_next/static/media/1d8a05b60287ae6c-s.p.woff2 +0 -0
  167. khoj/interface/compiled/_next/static/media/6f22fce21a7c433c-s.woff2 +0 -0
  168. khoj/interface/compiled/_next/static/media/77c207b095007c34-s.p.woff2 +0 -0
  169. khoj/interface/compiled/_next/static/media/82ef96de0e8f4d8c-s.p.woff2 +0 -0
  170. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.1608a09b.woff +0 -0
  171. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.4aafdb68.ttf +0 -0
  172. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.a79f1c31.woff2 +0 -0
  173. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.b6770918.woff +0 -0
  174. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.cce5b8ec.ttf +0 -0
  175. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.ec17d132.woff2 +0 -0
  176. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.07ef19e7.ttf +0 -0
  177. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.55fac258.woff2 +0 -0
  178. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.dad44a7f.woff +0 -0
  179. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.9f256b85.woff +0 -0
  180. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.b18f59e1.ttf +0 -0
  181. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.d42a5579.woff2 +0 -0
  182. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.7c187121.woff +0 -0
  183. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.d3c882a6.woff2 +0 -0
  184. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.ed38e79f.ttf +0 -0
  185. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.b74a1a8b.ttf +0 -0
  186. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.c3fb5ac2.woff2 +0 -0
  187. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.d181c465.woff +0 -0
  188. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.6f2bb1df.woff2 +0 -0
  189. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.70d8b0a5.ttf +0 -0
  190. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.e3f82f9d.woff +0 -0
  191. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.47373d1e.ttf +0 -0
  192. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.8916142b.woff2 +0 -0
  193. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.9024d815.woff +0 -0
  194. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.0462f03b.woff2 +0 -0
  195. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.7f51fe03.woff +0 -0
  196. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.b7f8fe9b.ttf +0 -0
  197. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.572d331f.woff2 +0 -0
  198. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.a879cf83.ttf +0 -0
  199. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.f1035d8d.woff +0 -0
  200. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.5295ba48.woff +0 -0
  201. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.939bc644.ttf +0 -0
  202. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.f28c23ac.woff2 +0 -0
  203. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.8c5b5494.woff2 +0 -0
  204. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.94e1e8dc.ttf +0 -0
  205. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.bf59d231.woff +0 -0
  206. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.3b1e59b3.woff2 +0 -0
  207. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.7c9bc82b.woff +0 -0
  208. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.b4c20c84.ttf +0 -0
  209. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.74048478.woff +0 -0
  210. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.ba21ed5f.woff2 +0 -0
  211. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.d4d7ba48.ttf +0 -0
  212. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.03e9641d.woff2 +0 -0
  213. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.07505710.woff +0 -0
  214. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.fe9cbbe1.ttf +0 -0
  215. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.e1e279cb.woff +0 -0
  216. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.eae34984.woff2 +0 -0
  217. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.fabc004a.ttf +0 -0
  218. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.57727022.woff +0 -0
  219. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.5916a24f.woff2 +0 -0
  220. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.d6b476ec.ttf +0 -0
  221. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.9acaf01c.woff +0 -0
  222. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.a144ef58.ttf +0 -0
  223. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.b4230e7e.woff2 +0 -0
  224. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.10d95fd3.woff2 +0 -0
  225. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.7a996c9d.woff +0 -0
  226. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.fbccdabe.ttf +0 -0
  227. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.6258592b.woff +0 -0
  228. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.a8709e36.woff2 +0 -0
  229. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.d97aaf4a.ttf +0 -0
  230. khoj/interface/compiled/_next/static/media/a6ecd16fa044d500-s.p.woff2 +0 -0
  231. khoj/interface/compiled/_next/static/media/bd82c78e5b7b3fe9-s.p.woff2 +0 -0
  232. khoj/interface/compiled/_next/static/media/c32c8052c071fc42-s.woff2 +0 -0
  233. khoj/interface/compiled/_next/static/media/c4250770ab8708b6-s.p.woff2 +0 -0
  234. khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
  235. khoj/interface/compiled/_next/static/media/flags.3afdda2f.webp +0 -0
  236. khoj/interface/compiled/_next/static/media/flags@2x.5fbe9fc1.webp +0 -0
  237. khoj/interface/compiled/_next/static/media/globe.98e105ca.webp +0 -0
  238. khoj/interface/compiled/_next/static/media/globe@2x.974df6f8.webp +0 -0
  239. khoj/interface/compiled/agents/index.html +1 -0
  240. khoj/interface/compiled/agents/index.txt +7 -0
  241. khoj/interface/compiled/agents.svg +6 -0
  242. khoj/interface/compiled/assets/icons/khoj_lantern.ico +0 -0
  243. khoj/interface/compiled/assets/icons/khoj_lantern.svg +100 -0
  244. khoj/interface/compiled/assets/icons/khoj_lantern_1200x1200.png +0 -0
  245. khoj/interface/compiled/assets/icons/khoj_lantern_128x128.png +0 -0
  246. khoj/interface/compiled/assets/icons/khoj_lantern_128x128_dark.png +0 -0
  247. khoj/interface/compiled/assets/icons/khoj_lantern_256x256.png +0 -0
  248. khoj/interface/compiled/assets/icons/khoj_lantern_512x512.png +0 -0
  249. khoj/interface/compiled/assets/icons/khoj_lantern_logomarktype_1200x630.png +0 -0
  250. khoj/interface/compiled/assets/samples/desktop-browse-draw-sample.png +0 -0
  251. khoj/interface/compiled/assets/samples/desktop-plain-chat-sample.png +0 -0
  252. khoj/interface/compiled/assets/samples/desktop-remember-plan-sample.png +0 -0
  253. khoj/interface/compiled/assets/samples/phone-browse-draw-sample.png +0 -0
  254. khoj/interface/compiled/assets/samples/phone-plain-chat-sample.png +0 -0
  255. khoj/interface/compiled/assets/samples/phone-remember-plan-sample.png +0 -0
  256. khoj/interface/compiled/automation.svg +37 -0
  257. khoj/interface/compiled/automations/index.html +1 -0
  258. khoj/interface/compiled/automations/index.txt +8 -0
  259. khoj/interface/compiled/chat/index.html +1 -0
  260. khoj/interface/compiled/chat/index.txt +7 -0
  261. khoj/interface/compiled/chat.svg +24 -0
  262. khoj/interface/compiled/close.svg +5 -0
  263. khoj/interface/compiled/copy-button-success.svg +6 -0
  264. khoj/interface/compiled/copy-button.svg +5 -0
  265. khoj/interface/compiled/index.html +1 -0
  266. khoj/interface/compiled/index.txt +7 -0
  267. khoj/interface/compiled/khoj.webmanifest +76 -0
  268. khoj/interface/compiled/logo.svg +24 -0
  269. khoj/interface/compiled/search/index.html +1 -0
  270. khoj/interface/compiled/search/index.txt +7 -0
  271. khoj/interface/compiled/send.svg +1 -0
  272. khoj/interface/compiled/settings/index.html +1 -0
  273. khoj/interface/compiled/settings/index.txt +9 -0
  274. khoj/interface/compiled/share/chat/index.html +1 -0
  275. khoj/interface/compiled/share/chat/index.txt +7 -0
  276. khoj/interface/compiled/share.svg +8 -0
  277. khoj/interface/compiled/thumbs-down.svg +6 -0
  278. khoj/interface/compiled/thumbs-up.svg +6 -0
  279. khoj/interface/email/feedback.html +34 -0
  280. khoj/interface/email/magic_link.html +40 -0
  281. khoj/interface/email/task.html +37 -0
  282. khoj/interface/email/welcome.html +90 -0
  283. khoj/interface/web/.well-known/assetlinks.json +11 -0
  284. khoj/interface/web/assets/icons/agents.svg +19 -0
  285. khoj/interface/web/assets/icons/automation.svg +43 -0
  286. khoj/interface/web/assets/icons/chat.svg +24 -0
  287. khoj/interface/web/assets/icons/github.svg +1 -0
  288. khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
  289. khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
  290. khoj/interface/web/assets/icons/khoj-logo-sideways.svg +32 -0
  291. khoj/interface/web/assets/icons/khoj.svg +26 -0
  292. khoj/interface/web/assets/icons/logotype.svg +1 -0
  293. khoj/interface/web/assets/icons/search.svg +57 -0
  294. khoj/interface/web/assets/icons/sync.svg +4 -0
  295. khoj/interface/web/assets/khoj.css +237 -0
  296. khoj/interface/web/assets/utils.js +33 -0
  297. khoj/interface/web/base_config.html +445 -0
  298. khoj/interface/web/content_source_github_input.html +208 -0
  299. khoj/interface/web/login.html +310 -0
  300. khoj/interface/web/utils.html +48 -0
  301. khoj/main.py +249 -0
  302. khoj/manage.py +22 -0
  303. khoj/migrations/__init__.py +0 -0
  304. khoj/migrations/migrate_offline_chat_default_model.py +69 -0
  305. khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
  306. khoj/migrations/migrate_offline_chat_schema.py +83 -0
  307. khoj/migrations/migrate_offline_model.py +29 -0
  308. khoj/migrations/migrate_processor_config_openai.py +67 -0
  309. khoj/migrations/migrate_server_pg.py +132 -0
  310. khoj/migrations/migrate_version.py +17 -0
  311. khoj/processor/__init__.py +0 -0
  312. khoj/processor/content/__init__.py +0 -0
  313. khoj/processor/content/docx/__init__.py +0 -0
  314. khoj/processor/content/docx/docx_to_entries.py +111 -0
  315. khoj/processor/content/github/__init__.py +0 -0
  316. khoj/processor/content/github/github_to_entries.py +226 -0
  317. khoj/processor/content/images/__init__.py +0 -0
  318. khoj/processor/content/images/image_to_entries.py +117 -0
  319. khoj/processor/content/markdown/__init__.py +0 -0
  320. khoj/processor/content/markdown/markdown_to_entries.py +160 -0
  321. khoj/processor/content/notion/notion_to_entries.py +259 -0
  322. khoj/processor/content/org_mode/__init__.py +0 -0
  323. khoj/processor/content/org_mode/org_to_entries.py +226 -0
  324. khoj/processor/content/org_mode/orgnode.py +532 -0
  325. khoj/processor/content/pdf/__init__.py +0 -0
  326. khoj/processor/content/pdf/pdf_to_entries.py +119 -0
  327. khoj/processor/content/plaintext/__init__.py +0 -0
  328. khoj/processor/content/plaintext/plaintext_to_entries.py +117 -0
  329. khoj/processor/content/text_to_entries.py +296 -0
  330. khoj/processor/conversation/__init__.py +0 -0
  331. khoj/processor/conversation/anthropic/__init__.py +0 -0
  332. khoj/processor/conversation/anthropic/anthropic_chat.py +243 -0
  333. khoj/processor/conversation/anthropic/utils.py +217 -0
  334. khoj/processor/conversation/google/__init__.py +0 -0
  335. khoj/processor/conversation/google/gemini_chat.py +253 -0
  336. khoj/processor/conversation/google/utils.py +260 -0
  337. khoj/processor/conversation/offline/__init__.py +0 -0
  338. khoj/processor/conversation/offline/chat_model.py +308 -0
  339. khoj/processor/conversation/offline/utils.py +80 -0
  340. khoj/processor/conversation/offline/whisper.py +15 -0
  341. khoj/processor/conversation/openai/__init__.py +0 -0
  342. khoj/processor/conversation/openai/gpt.py +243 -0
  343. khoj/processor/conversation/openai/utils.py +232 -0
  344. khoj/processor/conversation/openai/whisper.py +13 -0
  345. khoj/processor/conversation/prompts.py +1188 -0
  346. khoj/processor/conversation/utils.py +867 -0
  347. khoj/processor/embeddings.py +122 -0
  348. khoj/processor/image/generate.py +215 -0
  349. khoj/processor/speech/__init__.py +0 -0
  350. khoj/processor/speech/text_to_speech.py +51 -0
  351. khoj/processor/tools/__init__.py +0 -0
  352. khoj/processor/tools/online_search.py +472 -0
  353. khoj/processor/tools/run_code.py +179 -0
  354. khoj/routers/__init__.py +0 -0
  355. khoj/routers/api.py +760 -0
  356. khoj/routers/api_agents.py +295 -0
  357. khoj/routers/api_chat.py +1273 -0
  358. khoj/routers/api_content.py +634 -0
  359. khoj/routers/api_model.py +123 -0
  360. khoj/routers/api_phone.py +86 -0
  361. khoj/routers/api_subscription.py +144 -0
  362. khoj/routers/auth.py +307 -0
  363. khoj/routers/email.py +135 -0
  364. khoj/routers/helpers.py +2333 -0
  365. khoj/routers/notion.py +85 -0
  366. khoj/routers/research.py +364 -0
  367. khoj/routers/storage.py +63 -0
  368. khoj/routers/twilio.py +36 -0
  369. khoj/routers/web_client.py +141 -0
  370. khoj/search_filter/__init__.py +0 -0
  371. khoj/search_filter/base_filter.py +15 -0
  372. khoj/search_filter/date_filter.py +215 -0
  373. khoj/search_filter/file_filter.py +32 -0
  374. khoj/search_filter/word_filter.py +29 -0
  375. khoj/search_type/__init__.py +0 -0
  376. khoj/search_type/text_search.py +255 -0
  377. khoj/utils/__init__.py +0 -0
  378. khoj/utils/cli.py +101 -0
  379. khoj/utils/config.py +81 -0
  380. khoj/utils/constants.py +51 -0
  381. khoj/utils/fs_syncer.py +252 -0
  382. khoj/utils/helpers.py +627 -0
  383. khoj/utils/initialization.py +301 -0
  384. khoj/utils/jsonl.py +43 -0
  385. khoj/utils/models.py +47 -0
  386. khoj/utils/rawconfig.py +208 -0
  387. khoj/utils/state.py +48 -0
  388. khoj/utils/yaml.py +47 -0
  389. khoj-1.33.3.dev32.dist-info/METADATA +190 -0
  390. khoj-1.33.3.dev32.dist-info/RECORD +393 -0
  391. khoj-1.33.3.dev32.dist-info/WHEEL +4 -0
  392. khoj-1.33.3.dev32.dist-info/entry_points.txt +2 -0
  393. khoj-1.33.3.dev32.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,69 @@
1
+ """
2
+ Current format of khoj.yml
3
+ ---
4
+ app:
5
+ ...
6
+ content-type:
7
+ ...
8
+ processor:
9
+ conversation:
10
+ offline-chat:
11
+ enable-offline-chat: false
12
+ chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
13
+ ...
14
+ search-type:
15
+ ...
16
+
17
+ New format of khoj.yml
18
+ ---
19
+ app:
20
+ ...
21
+ content-type:
22
+ ...
23
+ processor:
24
+ conversation:
25
+ offline-chat:
26
+ enable-offline-chat: false
27
+ chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
28
+ ...
29
+ search-type:
30
+ ...
31
+ """
32
+ import logging
33
+
34
+ from packaging import version
35
+
36
+ from khoj.utils.yaml import load_config_from_file, save_config_to_file
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ def migrate_offline_chat_default_model(args):
42
+ schema_version = "0.12.4"
43
+ raw_config = load_config_from_file(args.config_file)
44
+ previous_version = raw_config.get("version")
45
+
46
+ if "processor" not in raw_config:
47
+ return args
48
+ if raw_config["processor"] is None:
49
+ return args
50
+ if "conversation" not in raw_config["processor"]:
51
+ return args
52
+ if "offline-chat" not in raw_config["processor"]["conversation"]:
53
+ return args
54
+ if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]:
55
+ return args
56
+
57
+ if previous_version is None or version.parse(previous_version) < version.parse("0.12.4"):
58
+ logger.info(
59
+ f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF"
60
+ )
61
+ raw_config["version"] = schema_version
62
+
63
+ # Update offline chat model to mistral in GGUF format to use latest GPT4All
64
+ offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"]
65
+ if offline_chat_model.endswith(".bin"):
66
+ raw_config["processor"]["conversation"]["offline-chat"]["chat-model"] = "mistral-7b-instruct-v0.1.Q4_0.gguf"
67
+
68
+ save_config_to_file(raw_config, args.config_file)
69
+ return args
@@ -0,0 +1,71 @@
1
+ """
2
+ Current format of khoj.yml
3
+ ---
4
+ app:
5
+ ...
6
+ content-type:
7
+ ...
8
+ processor:
9
+ conversation:
10
+ offline-chat:
11
+ enable-offline-chat: false
12
+ chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
13
+ ...
14
+ search-type:
15
+ ...
16
+
17
+ New format of khoj.yml
18
+ ---
19
+ app:
20
+ ...
21
+ content-type:
22
+ ...
23
+ processor:
24
+ conversation:
25
+ offline-chat:
26
+ enable-offline-chat: false
27
+ chat-model: NousResearch/Hermes-2-Pro-Mistral-7B-GGUF
28
+ ...
29
+ search-type:
30
+ ...
31
+ """
32
+ import logging
33
+
34
+ from packaging import version
35
+
36
+ from khoj.utils.yaml import load_config_from_file, save_config_to_file
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ def migrate_offline_chat_default_model(args):
42
+ schema_version = "1.7.0"
43
+ raw_config = load_config_from_file(args.config_file)
44
+ previous_version = raw_config.get("version")
45
+
46
+ if "processor" not in raw_config:
47
+ return args
48
+ if raw_config["processor"] is None:
49
+ return args
50
+ if "conversation" not in raw_config["processor"]:
51
+ return args
52
+ if "offline-chat" not in raw_config["processor"]["conversation"]:
53
+ return args
54
+ if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]:
55
+ return args
56
+
57
+ if previous_version is None or version.parse(previous_version) < version.parse(schema_version):
58
+ logger.info(
59
+ f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF"
60
+ )
61
+ raw_config["version"] = schema_version
62
+
63
+ # Update offline chat model to use Nous Research's Hermes-2-Pro GGUF in path format suitable for llama-cpp
64
+ offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"]
65
+ if offline_chat_model == "mistral-7b-instruct-v0.1.Q4_0.gguf":
66
+ raw_config["processor"]["conversation"]["offline-chat"][
67
+ "chat-model"
68
+ ] = "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF"
69
+
70
+ save_config_to_file(raw_config, args.config_file)
71
+ return args
@@ -0,0 +1,83 @@
1
+ """
2
+ Current format of khoj.yml
3
+ ---
4
+ app:
5
+ ...
6
+ content-type:
7
+ ...
8
+ processor:
9
+ conversation:
10
+ enable-offline-chat: false
11
+ conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
12
+ openai:
13
+ ...
14
+ search-type:
15
+ ...
16
+
17
+ New format of khoj.yml
18
+ ---
19
+ app:
20
+ ...
21
+ content-type:
22
+ ...
23
+ processor:
24
+ conversation:
25
+ offline-chat:
26
+ enable-offline-chat: false
27
+ chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
28
+ tokenizer: null
29
+ max_prompt_size: null
30
+ conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
31
+ openai:
32
+ ...
33
+ search-type:
34
+ ...
35
+ """
36
+ import logging
37
+
38
+ from packaging import version
39
+
40
+ from khoj.utils.yaml import load_config_from_file, save_config_to_file
41
+
42
+ logger = logging.getLogger(__name__)
43
+
44
+
45
+ def migrate_offline_chat_schema(args):
46
+ schema_version = "0.12.3"
47
+ raw_config = load_config_from_file(args.config_file)
48
+ previous_version = raw_config.get("version")
49
+
50
+ if "processor" not in raw_config:
51
+ return args
52
+ if raw_config["processor"] is None:
53
+ return args
54
+ if "conversation" not in raw_config["processor"]:
55
+ return args
56
+
57
+ if previous_version is None or version.parse(previous_version) < version.parse("0.12.3"):
58
+ logger.info(
59
+ f"Upgrading config schema to {schema_version} from {previous_version} to make (offline) chat more configuration"
60
+ )
61
+ raw_config["version"] = schema_version
62
+
63
+ # Create max-prompt-size field in conversation processor schema
64
+ raw_config["processor"]["conversation"]["max-prompt-size"] = None
65
+ raw_config["processor"]["conversation"]["tokenizer"] = None
66
+
67
+ # Create offline chat schema based on existing enable_offline_chat field in khoj config schema
68
+ offline_chat_model = (
69
+ raw_config["processor"]["conversation"]
70
+ .get("offline-chat", {})
71
+ .get("chat-model", "llama-2-7b-chat.ggmlv3.q4_0.bin")
72
+ )
73
+ raw_config["processor"]["conversation"]["offline-chat"] = {
74
+ "enable-offline-chat": raw_config["processor"]["conversation"].get("enable-offline-chat", False),
75
+ "chat-model": offline_chat_model,
76
+ }
77
+
78
+ # Delete old enable-offline-chat field from conversation processor schema
79
+ if "enable-offline-chat" in raw_config["processor"]["conversation"]:
80
+ del raw_config["processor"]["conversation"]["enable-offline-chat"]
81
+
82
+ save_config_to_file(raw_config, args.config_file)
83
+ return args
@@ -0,0 +1,29 @@
1
+ import logging
2
+ import os
3
+
4
+ from packaging import version
5
+
6
+ from khoj.utils.yaml import load_config_from_file, save_config_to_file
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ def migrate_offline_model(args):
12
+ schema_version = "0.10.1"
13
+ raw_config = load_config_from_file(args.config_file)
14
+ previous_version = raw_config.get("version")
15
+
16
+ if previous_version is None or version.parse(previous_version) < version.parse("0.10.1"):
17
+ logger.info(
18
+ f"Migrating offline model used for version {previous_version} to latest version for {args.version_no}"
19
+ )
20
+ raw_config["version"] = schema_version
21
+
22
+ # If the user has downloaded the offline model, remove it from the cache.
23
+ offline_model_path = os.path.expanduser("~/.cache/gpt4all/llama-2-7b-chat.ggmlv3.q4_K_S.bin")
24
+ if os.path.exists(offline_model_path):
25
+ os.remove(offline_model_path)
26
+
27
+ save_config_to_file(raw_config, args.config_file)
28
+
29
+ return args
@@ -0,0 +1,67 @@
1
+ """
2
+ Current format of khoj.yml
3
+ ---
4
+ app:
5
+ should-log-telemetry: true
6
+ content-type:
7
+ ...
8
+ processor:
9
+ conversation:
10
+ chat-model: gpt-3.5-turbo
11
+ conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
12
+ model: text-davinci-003
13
+ openai-api-key: sk-secret-key
14
+ search-type:
15
+ ...
16
+
17
+ New format of khoj.yml
18
+ ---
19
+ app:
20
+ should-log-telemetry: true
21
+ content-type:
22
+ ...
23
+ processor:
24
+ conversation:
25
+ openai:
26
+ chat-model: gpt-3.5-turbo
27
+ openai-api-key: sk-secret-key
28
+ conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
29
+ enable-offline-chat: false
30
+ search-type:
31
+ ...
32
+ """
33
+ from khoj.utils.yaml import load_config_from_file, save_config_to_file
34
+
35
+
36
+ def migrate_processor_conversation_schema(args):
37
+ schema_version = "0.10.0"
38
+ raw_config = load_config_from_file(args.config_file)
39
+
40
+ if "processor" not in raw_config:
41
+ return args
42
+ if raw_config["processor"] is None:
43
+ return args
44
+ if "conversation" not in raw_config["processor"]:
45
+ return args
46
+
47
+ current_openai_api_key = raw_config["processor"]["conversation"].get("openai-api-key", None)
48
+ current_chat_model = raw_config["processor"]["conversation"].get("chat-model", None)
49
+ if current_openai_api_key is None and current_chat_model is None:
50
+ return args
51
+
52
+ raw_config["version"] = schema_version
53
+
54
+ # Add enable_offline_chat to khoj config schema
55
+ if "enable-offline-chat" not in raw_config["processor"]["conversation"]:
56
+ raw_config["processor"]["conversation"]["enable-offline-chat"] = False
57
+
58
+ # Update conversation processor schema
59
+ conversation_logfile = raw_config["processor"]["conversation"].get("conversation-logfile", None)
60
+ raw_config["processor"]["conversation"] = {
61
+ "openai": {"chat-model": current_chat_model, "api-key": current_openai_api_key},
62
+ "conversation-logfile": conversation_logfile,
63
+ "enable-offline-chat": False,
64
+ }
65
+
66
+ save_config_to_file(raw_config, args.config_file)
67
+ return args
@@ -0,0 +1,132 @@
1
+ """
2
+ The application config currently looks like this:
3
+ app:
4
+ should-log-telemetry: true
5
+ content-type:
6
+ ...
7
+ processor:
8
+ conversation:
9
+ conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
10
+ max-prompt-size: null
11
+ offline-chat:
12
+ chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
13
+ enable-offline-chat: false
14
+ openai:
15
+ api-key: sk-blah
16
+ chat-model: gpt-3.5-turbo
17
+ tokenizer: null
18
+ search-type:
19
+ asymmetric:
20
+ cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
21
+ encoder: sentence-transformers/multi-qa-MiniLM-L6-cos-v1
22
+ encoder-type: null
23
+ model-directory: /Users/si/.khoj/search/asymmetric
24
+ image:
25
+ encoder: sentence-transformers/clip-ViT-B-32
26
+ encoder-type: null
27
+ model-directory: /Users/si/.khoj/search/image
28
+ symmetric:
29
+ cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
30
+ encoder: sentence-transformers/all-MiniLM-L6-v2
31
+ encoder-type: null
32
+ model-directory: ~/.khoj/search/symmetric
33
+ version: 0.14.0
34
+
35
+
36
+ The new version will looks like this:
37
+ app:
38
+ should-log-telemetry: true
39
+ processor:
40
+ conversation:
41
+ offline-chat:
42
+ enabled: false
43
+ openai:
44
+ api-key: sk-blah
45
+ chat-model-options:
46
+ - chat-model: gpt-3.5-turbo
47
+ tokenizer: null
48
+ type: openai
49
+ - chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
50
+ tokenizer: null
51
+ type: offline
52
+ search-type:
53
+ asymmetric:
54
+ cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
55
+ encoder: sentence-transformers/multi-qa-MiniLM-L6-cos-v1
56
+ version: 0.15.0
57
+ """
58
+
59
+ import logging
60
+
61
+ from packaging import version
62
+
63
+ from khoj.database.models import AiModelApi, ChatModel, SearchModelConfig
64
+ from khoj.utils.yaml import load_config_from_file, save_config_to_file
65
+
66
+ logger = logging.getLogger(__name__)
67
+
68
+
69
+ def migrate_server_pg(args):
70
+ schema_version = "0.15.0"
71
+ raw_config = load_config_from_file(args.config_file)
72
+ previous_version = raw_config.get("version")
73
+
74
+ if previous_version is None or version.parse(previous_version) < version.parse(schema_version):
75
+ logger.info(
76
+ f"Migrating configuration used for version {previous_version} to latest version for server with postgres in {args.version_no}"
77
+ )
78
+ raw_config["version"] = schema_version
79
+
80
+ if raw_config is None:
81
+ return args
82
+
83
+ if "search-type" in raw_config and raw_config["search-type"]:
84
+ if "asymmetric" in raw_config["search-type"]:
85
+ # Delete all existing search models
86
+ SearchModelConfig.objects.filter(model_type=SearchModelConfig.ModelType.TEXT).delete()
87
+ # Create new search model from existing Khoj YAML config
88
+ asymmetric_search = raw_config["search-type"]["asymmetric"]
89
+ SearchModelConfig.objects.create(
90
+ name="default",
91
+ model_type=SearchModelConfig.ModelType.TEXT,
92
+ bi_encoder=asymmetric_search.get("encoder"),
93
+ cross_encoder=asymmetric_search.get("cross-encoder"),
94
+ )
95
+
96
+ if "processor" in raw_config and raw_config["processor"] and "conversation" in raw_config["processor"]:
97
+ processor_conversation = raw_config["processor"]["conversation"]
98
+
99
+ if "offline-chat" in raw_config["processor"]["conversation"]:
100
+ offline_chat = raw_config["processor"]["conversation"]["offline-chat"]
101
+ ChatModel.objects.create(
102
+ name=offline_chat.get("chat-model"),
103
+ tokenizer=processor_conversation.get("tokenizer"),
104
+ max_prompt_size=processor_conversation.get("max-prompt-size"),
105
+ model_type=ChatModel.ModelType.OFFLINE,
106
+ )
107
+
108
+ if (
109
+ "openai" in raw_config["processor"]["conversation"]
110
+ and raw_config["processor"]["conversation"]["openai"]
111
+ ):
112
+ openai = raw_config["processor"]["conversation"]["openai"]
113
+
114
+ if openai.get("api-key") is None:
115
+ logger.error("OpenAI API Key is not set. Will not be migrating OpenAI config.")
116
+ else:
117
+ if openai.get("chat-model") is None:
118
+ openai["chat-model"] = "gpt-3.5-turbo"
119
+
120
+ openai_model_api = AiModelApi.objects.create(api_key=openai.get("api-key"), name="default")
121
+
122
+ ChatModel.objects.create(
123
+ name=openai.get("chat-model"),
124
+ tokenizer=processor_conversation.get("tokenizer"),
125
+ max_prompt_size=processor_conversation.get("max-prompt-size"),
126
+ model_type=ChatModel.ModelType.OPENAI,
127
+ ai_model_api=openai_model_api,
128
+ )
129
+
130
+ save_config_to_file(raw_config, args.config_file)
131
+
132
+ return args
@@ -0,0 +1,17 @@
1
+ from khoj.utils.yaml import load_config_from_file, save_config_to_file
2
+
3
+
4
+ def migrate_config_to_version(args):
5
+ schema_version = "0.9.0"
6
+ raw_config = load_config_from_file(args.config_file)
7
+
8
+ # Add version to khoj config schema
9
+ if "version" not in raw_config:
10
+ raw_config["version"] = schema_version
11
+ save_config_to_file(raw_config, args.config_file)
12
+
13
+ # regenerate khoj index on first start of this version
14
+ # this should refresh index and apply index corruption fixes from #325
15
+ args.regenerate = True
16
+
17
+ return args
File without changes
File without changes
File without changes
@@ -0,0 +1,111 @@
1
+ import logging
2
+ import tempfile
3
+ from typing import Dict, List, Tuple
4
+
5
+ from langchain_community.document_loaders import Docx2txtLoader
6
+
7
+ from khoj.database.models import Entry as DbEntry
8
+ from khoj.database.models import KhojUser
9
+ from khoj.processor.content.text_to_entries import TextToEntries
10
+ from khoj.utils.helpers import timer
11
+ from khoj.utils.rawconfig import Entry
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class DocxToEntries(TextToEntries):
17
+ def __init__(self):
18
+ super().__init__()
19
+
20
+ # Define Functions
21
+ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
22
+ # Extract required fields from config
23
+ deletion_file_names = set([file for file in files if files[file] == b""])
24
+ files_to_process = set(files) - deletion_file_names
25
+ files = {file: files[file] for file in files_to_process}
26
+
27
+ # Extract Entries from specified Docx files
28
+ with timer("Extract entries from specified DOCX files", logger):
29
+ file_to_text_map, current_entries = DocxToEntries.extract_docx_entries(files)
30
+
31
+ # Split entries by max tokens supported by model
32
+ with timer("Split entries by max token size supported by model", logger):
33
+ current_entries = self.split_entries_by_max_tokens(current_entries, max_tokens=256)
34
+
35
+ # Identify, mark and merge any new entries with previous entries
36
+ with timer("Identify new or updated entries", logger):
37
+ num_new_embeddings, num_deleted_embeddings = self.update_embeddings(
38
+ user,
39
+ current_entries,
40
+ DbEntry.EntryType.DOCX,
41
+ DbEntry.EntrySource.COMPUTER,
42
+ "compiled",
43
+ logger,
44
+ deletion_file_names,
45
+ regenerate=regenerate,
46
+ file_to_text_map=file_to_text_map,
47
+ )
48
+
49
+ return num_new_embeddings, num_deleted_embeddings
50
+
51
+ @staticmethod
52
+ def extract_docx_entries(docx_files) -> Tuple[Dict, List[Entry]]:
53
+ """Extract entries from specified DOCX files"""
54
+
55
+ entries: List[str] = []
56
+ entry_to_location_map: List[Tuple[str, str]] = []
57
+ file_to_text_map = dict()
58
+ for docx_file in docx_files:
59
+ try:
60
+ docx_texts = DocxToEntries.extract_text(docx_files[docx_file])
61
+ entry_to_location_map += zip(docx_texts, [docx_file] * len(docx_texts))
62
+ entries.extend(docx_texts)
63
+ file_to_text_map[docx_file] = docx_texts
64
+ except Exception as e:
65
+ logger.warning(f"Unable to extract entries from file: {docx_file}")
66
+ logger.warning(e, exc_info=True)
67
+ return file_to_text_map, DocxToEntries.convert_docx_entries_to_maps(entries, dict(entry_to_location_map))
68
+
69
+ @staticmethod
70
+ def convert_docx_entries_to_maps(parsed_entries: List[str], entry_to_file_map) -> List[Entry]:
71
+ """Convert each DOCX entry into a dictionary"""
72
+ entries = []
73
+ for parsed_entry in parsed_entries:
74
+ entry_filename = entry_to_file_map[parsed_entry]
75
+ # Append base filename to compiled entry for context to model
76
+ heading = f"{entry_filename}\n"
77
+ compiled_entry = f"{heading}{parsed_entry}"
78
+ entries.append(
79
+ Entry(
80
+ compiled=compiled_entry,
81
+ raw=parsed_entry,
82
+ heading=heading,
83
+ file=f"{entry_filename}",
84
+ )
85
+ )
86
+
87
+ logger.debug(f"Converted {len(parsed_entries)} DOCX entries to dictionaries")
88
+
89
+ return entries
90
+
91
+ @staticmethod
92
+ def extract_text(docx_file):
93
+ """Extract text from specified DOCX file"""
94
+ try:
95
+ docx_entry_by_pages = []
96
+ # Create temp file with .docx extension that gets auto-deleted
97
+ with tempfile.NamedTemporaryFile(suffix=".docx", delete=True) as tmp:
98
+ tmp.write(docx_file)
99
+ tmp.flush() # Ensure all data is written
100
+
101
+ # Load the content using Docx2txtLoader
102
+ loader = Docx2txtLoader(tmp.name)
103
+ docx_entries_per_file = loader.load()
104
+
105
+ # Convert the loaded entries into the desired format
106
+ docx_entry_by_pages = [page.page_content for page in docx_entries_per_file]
107
+ except Exception as e:
108
+ logger.warning(f"Unable to extract text from file: {docx_file}")
109
+ logger.warning(e, exc_info=True)
110
+
111
+ return docx_entry_by_pages
File without changes