khoj 1.33.3.dev32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. khoj/__init__.py +0 -0
  2. khoj/app/README.md +94 -0
  3. khoj/app/__init__.py +0 -0
  4. khoj/app/asgi.py +16 -0
  5. khoj/app/settings.py +218 -0
  6. khoj/app/urls.py +25 -0
  7. khoj/configure.py +452 -0
  8. khoj/database/__init__.py +0 -0
  9. khoj/database/adapters/__init__.py +1821 -0
  10. khoj/database/admin.py +417 -0
  11. khoj/database/apps.py +6 -0
  12. khoj/database/management/__init__.py +0 -0
  13. khoj/database/management/commands/__init__.py +0 -0
  14. khoj/database/management/commands/change_default_model.py +116 -0
  15. khoj/database/management/commands/change_generated_images_url.py +61 -0
  16. khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
  17. khoj/database/migrations/0001_khojuser.py +98 -0
  18. khoj/database/migrations/0002_googleuser.py +32 -0
  19. khoj/database/migrations/0003_vector_extension.py +10 -0
  20. khoj/database/migrations/0004_content_types_and_more.py +181 -0
  21. khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
  22. khoj/database/migrations/0006_embeddingsdates.py +33 -0
  23. khoj/database/migrations/0007_add_conversation.py +27 -0
  24. khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
  25. khoj/database/migrations/0009_khojapiuser.py +24 -0
  26. khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
  27. khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
  28. khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
  29. khoj/database/migrations/0012_entry_file_source.py +21 -0
  30. khoj/database/migrations/0013_subscription.py +37 -0
  31. khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
  32. khoj/database/migrations/0015_alter_subscription_user.py +21 -0
  33. khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
  34. khoj/database/migrations/0017_searchmodel.py +32 -0
  35. khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
  36. khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
  37. khoj/database/migrations/0020_reflectivequestion.py +36 -0
  38. khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
  39. khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
  40. khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
  41. khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
  42. khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
  43. khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
  44. khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
  45. khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
  46. khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
  47. khoj/database/migrations/0029_userrequests.py +27 -0
  48. khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
  49. khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
  50. khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
  51. khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
  52. khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
  53. khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
  54. khoj/database/migrations/0035_processlock.py +26 -0
  55. khoj/database/migrations/0036_alter_processlock_name.py +19 -0
  56. khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
  57. khoj/database/migrations/0036_publicconversation.py +42 -0
  58. khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
  59. khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
  60. khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
  61. khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
  62. khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
  63. khoj/database/migrations/0040_alter_processlock_name.py +26 -0
  64. khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
  65. khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
  66. khoj/database/migrations/0042_serverchatsettings.py +46 -0
  67. khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
  68. khoj/database/migrations/0044_conversation_file_filters.py +17 -0
  69. khoj/database/migrations/0045_fileobject.py +37 -0
  70. khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
  71. khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
  72. khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
  73. khoj/database/migrations/0049_datastore.py +38 -0
  74. khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
  75. khoj/database/migrations/0050_alter_processlock_name.py +25 -0
  76. khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
  77. khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
  78. khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
  79. khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
  80. khoj/database/migrations/0055_alter_agent_style_icon.py +37 -0
  81. khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
  82. khoj/database/migrations/0056_searchmodelconfig_cross_encoder_model_config.py +17 -0
  83. khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
  84. khoj/database/migrations/0057_remove_serverchatsettings_default_model_and_more.py +51 -0
  85. khoj/database/migrations/0058_alter_chatmodeloptions_chat_model.py +17 -0
  86. khoj/database/migrations/0059_searchmodelconfig_bi_encoder_confidence_threshold.py +17 -0
  87. khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
  88. khoj/database/migrations/0061_alter_chatmodeloptions_model_type.py +26 -0
  89. khoj/database/migrations/0061_alter_texttoimagemodelconfig_model_type.py +21 -0
  90. khoj/database/migrations/0062_merge_20240913_0222.py +14 -0
  91. khoj/database/migrations/0063_conversation_temp_id.py +36 -0
  92. khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +86 -0
  93. khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
  94. khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
  95. khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
  96. khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
  97. khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
  98. khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
  99. khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
  100. khoj/database/migrations/0072_entry_search_model.py +24 -0
  101. khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
  102. khoj/database/migrations/0074_alter_conversation_title.py +17 -0
  103. khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +85 -0
  104. khoj/database/migrations/0076_rename_openaiprocessorconversationconfig_aimodelapi_and_more.py +26 -0
  105. khoj/database/migrations/0077_chatmodel_alter_agent_chat_model_and_more.py +62 -0
  106. khoj/database/migrations/0078_khojuser_email_verification_code_expiry.py +17 -0
  107. khoj/database/migrations/__init__.py +0 -0
  108. khoj/database/models/__init__.py +725 -0
  109. khoj/database/tests.py +3 -0
  110. khoj/interface/compiled/404/index.html +1 -0
  111. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_buildManifest.js +1 -0
  112. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_ssgManifest.js +1 -0
  113. khoj/interface/compiled/_next/static/chunks/1010-8f39bb4648b5ba10.js +1 -0
  114. khoj/interface/compiled/_next/static/chunks/182-f1c48a203dc91e0e.js +20 -0
  115. khoj/interface/compiled/_next/static/chunks/1915-d3c36ad6ce697ce7.js +1 -0
  116. khoj/interface/compiled/_next/static/chunks/2117-165ef4747a5b836b.js +2 -0
  117. khoj/interface/compiled/_next/static/chunks/2581-455000f8aeb08fc3.js +1 -0
  118. khoj/interface/compiled/_next/static/chunks/3727.dcea8f2193111552.js +1 -0
  119. khoj/interface/compiled/_next/static/chunks/3789-a09e37a819171a9d.js +1 -0
  120. khoj/interface/compiled/_next/static/chunks/4124-6c28322ce218d2d5.js +1 -0
  121. khoj/interface/compiled/_next/static/chunks/5427-b52d95253e692bfa.js +1 -0
  122. khoj/interface/compiled/_next/static/chunks/5473-b1cf56dedac6577a.js +1 -0
  123. khoj/interface/compiled/_next/static/chunks/5477-0bbddb79c25a54a7.js +1 -0
  124. khoj/interface/compiled/_next/static/chunks/6065-64db9ad305ba0bcd.js +1 -0
  125. khoj/interface/compiled/_next/static/chunks/6293-469dd16402ea8a6f.js +3 -0
  126. khoj/interface/compiled/_next/static/chunks/688-b5b4391bbc0376f1.js +1 -0
  127. khoj/interface/compiled/_next/static/chunks/8667-b6bf63c72b2d76eb.js +1 -0
  128. khoj/interface/compiled/_next/static/chunks/9259-1172dbaca0515237.js +1 -0
  129. khoj/interface/compiled/_next/static/chunks/94ca1967.1d9b42d929a1ee8c.js +1 -0
  130. khoj/interface/compiled/_next/static/chunks/9597.83583248dfbf6e73.js +1 -0
  131. khoj/interface/compiled/_next/static/chunks/964ecbae.51d6faf8801d15e6.js +1 -0
  132. khoj/interface/compiled/_next/static/chunks/9665-391df1e5c51c960a.js +1 -0
  133. khoj/interface/compiled/_next/static/chunks/app/_not-found/page-a834eddae3e235df.js +1 -0
  134. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
  135. khoj/interface/compiled/_next/static/chunks/app/agents/page-28ce086a1129bca2.js +1 -0
  136. khoj/interface/compiled/_next/static/chunks/app/automations/layout-1fe1537449f43496.js +1 -0
  137. khoj/interface/compiled/_next/static/chunks/app/automations/page-bf365a60829d347f.js +1 -0
  138. khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
  139. khoj/interface/compiled/_next/static/chunks/app/chat/page-0e476e57eb2015e3.js +1 -0
  140. khoj/interface/compiled/_next/static/chunks/app/layout-30e7fda7262713ce.js +1 -0
  141. khoj/interface/compiled/_next/static/chunks/app/page-a5515ea71aec5ef0.js +1 -0
  142. khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
  143. khoj/interface/compiled/_next/static/chunks/app/search/page-9140541e67ea307d.js +1 -0
  144. khoj/interface/compiled/_next/static/chunks/app/settings/layout-d09d6510a45cd4bd.js +1 -0
  145. khoj/interface/compiled/_next/static/chunks/app/settings/page-951ba40b5b94b23a.js +1 -0
  146. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
  147. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1beb80d8d741c932.js +1 -0
  148. khoj/interface/compiled/_next/static/chunks/d3ac728e-44ebd2a0c99b12a0.js +1 -0
  149. khoj/interface/compiled/_next/static/chunks/fd9d1056-4482b99a36fd1673.js +1 -0
  150. khoj/interface/compiled/_next/static/chunks/framework-8e0e0f4a6b83a956.js +1 -0
  151. khoj/interface/compiled/_next/static/chunks/main-app-de1f09df97a3cfc7.js +1 -0
  152. khoj/interface/compiled/_next/static/chunks/main-db4bfac6b0a8d00b.js +1 -0
  153. khoj/interface/compiled/_next/static/chunks/pages/_app-3c9ca398d360b709.js +1 -0
  154. khoj/interface/compiled/_next/static/chunks/pages/_error-cf5ca766ac8f493f.js +1 -0
  155. khoj/interface/compiled/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
  156. khoj/interface/compiled/_next/static/chunks/webpack-a03962458328b163.js +1 -0
  157. khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +1 -0
  158. khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
  159. khoj/interface/compiled/_next/static/css/4e4e6a4a1c920d06.css +1 -0
  160. khoj/interface/compiled/_next/static/css/8d02837c730f8d13.css +25 -0
  161. khoj/interface/compiled/_next/static/css/8e6a3ca11a60b189.css +1 -0
  162. khoj/interface/compiled/_next/static/css/9c164d9727dd8092.css +1 -0
  163. khoj/interface/compiled/_next/static/css/dac88c17aaee5fcf.css +1 -0
  164. khoj/interface/compiled/_next/static/css/df4b47a2d0d85eae.css +1 -0
  165. khoj/interface/compiled/_next/static/css/e4eb883b5265d372.css +1 -0
  166. khoj/interface/compiled/_next/static/media/1d8a05b60287ae6c-s.p.woff2 +0 -0
  167. khoj/interface/compiled/_next/static/media/6f22fce21a7c433c-s.woff2 +0 -0
  168. khoj/interface/compiled/_next/static/media/77c207b095007c34-s.p.woff2 +0 -0
  169. khoj/interface/compiled/_next/static/media/82ef96de0e8f4d8c-s.p.woff2 +0 -0
  170. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.1608a09b.woff +0 -0
  171. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.4aafdb68.ttf +0 -0
  172. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.a79f1c31.woff2 +0 -0
  173. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.b6770918.woff +0 -0
  174. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.cce5b8ec.ttf +0 -0
  175. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.ec17d132.woff2 +0 -0
  176. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.07ef19e7.ttf +0 -0
  177. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.55fac258.woff2 +0 -0
  178. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.dad44a7f.woff +0 -0
  179. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.9f256b85.woff +0 -0
  180. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.b18f59e1.ttf +0 -0
  181. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.d42a5579.woff2 +0 -0
  182. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.7c187121.woff +0 -0
  183. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.d3c882a6.woff2 +0 -0
  184. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.ed38e79f.ttf +0 -0
  185. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.b74a1a8b.ttf +0 -0
  186. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.c3fb5ac2.woff2 +0 -0
  187. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.d181c465.woff +0 -0
  188. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.6f2bb1df.woff2 +0 -0
  189. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.70d8b0a5.ttf +0 -0
  190. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.e3f82f9d.woff +0 -0
  191. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.47373d1e.ttf +0 -0
  192. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.8916142b.woff2 +0 -0
  193. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.9024d815.woff +0 -0
  194. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.0462f03b.woff2 +0 -0
  195. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.7f51fe03.woff +0 -0
  196. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.b7f8fe9b.ttf +0 -0
  197. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.572d331f.woff2 +0 -0
  198. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.a879cf83.ttf +0 -0
  199. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.f1035d8d.woff +0 -0
  200. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.5295ba48.woff +0 -0
  201. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.939bc644.ttf +0 -0
  202. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.f28c23ac.woff2 +0 -0
  203. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.8c5b5494.woff2 +0 -0
  204. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.94e1e8dc.ttf +0 -0
  205. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.bf59d231.woff +0 -0
  206. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.3b1e59b3.woff2 +0 -0
  207. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.7c9bc82b.woff +0 -0
  208. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.b4c20c84.ttf +0 -0
  209. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.74048478.woff +0 -0
  210. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.ba21ed5f.woff2 +0 -0
  211. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.d4d7ba48.ttf +0 -0
  212. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.03e9641d.woff2 +0 -0
  213. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.07505710.woff +0 -0
  214. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.fe9cbbe1.ttf +0 -0
  215. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.e1e279cb.woff +0 -0
  216. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.eae34984.woff2 +0 -0
  217. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.fabc004a.ttf +0 -0
  218. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.57727022.woff +0 -0
  219. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.5916a24f.woff2 +0 -0
  220. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.d6b476ec.ttf +0 -0
  221. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.9acaf01c.woff +0 -0
  222. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.a144ef58.ttf +0 -0
  223. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.b4230e7e.woff2 +0 -0
  224. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.10d95fd3.woff2 +0 -0
  225. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.7a996c9d.woff +0 -0
  226. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.fbccdabe.ttf +0 -0
  227. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.6258592b.woff +0 -0
  228. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.a8709e36.woff2 +0 -0
  229. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.d97aaf4a.ttf +0 -0
  230. khoj/interface/compiled/_next/static/media/a6ecd16fa044d500-s.p.woff2 +0 -0
  231. khoj/interface/compiled/_next/static/media/bd82c78e5b7b3fe9-s.p.woff2 +0 -0
  232. khoj/interface/compiled/_next/static/media/c32c8052c071fc42-s.woff2 +0 -0
  233. khoj/interface/compiled/_next/static/media/c4250770ab8708b6-s.p.woff2 +0 -0
  234. khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
  235. khoj/interface/compiled/_next/static/media/flags.3afdda2f.webp +0 -0
  236. khoj/interface/compiled/_next/static/media/flags@2x.5fbe9fc1.webp +0 -0
  237. khoj/interface/compiled/_next/static/media/globe.98e105ca.webp +0 -0
  238. khoj/interface/compiled/_next/static/media/globe@2x.974df6f8.webp +0 -0
  239. khoj/interface/compiled/agents/index.html +1 -0
  240. khoj/interface/compiled/agents/index.txt +7 -0
  241. khoj/interface/compiled/agents.svg +6 -0
  242. khoj/interface/compiled/assets/icons/khoj_lantern.ico +0 -0
  243. khoj/interface/compiled/assets/icons/khoj_lantern.svg +100 -0
  244. khoj/interface/compiled/assets/icons/khoj_lantern_1200x1200.png +0 -0
  245. khoj/interface/compiled/assets/icons/khoj_lantern_128x128.png +0 -0
  246. khoj/interface/compiled/assets/icons/khoj_lantern_128x128_dark.png +0 -0
  247. khoj/interface/compiled/assets/icons/khoj_lantern_256x256.png +0 -0
  248. khoj/interface/compiled/assets/icons/khoj_lantern_512x512.png +0 -0
  249. khoj/interface/compiled/assets/icons/khoj_lantern_logomarktype_1200x630.png +0 -0
  250. khoj/interface/compiled/assets/samples/desktop-browse-draw-sample.png +0 -0
  251. khoj/interface/compiled/assets/samples/desktop-plain-chat-sample.png +0 -0
  252. khoj/interface/compiled/assets/samples/desktop-remember-plan-sample.png +0 -0
  253. khoj/interface/compiled/assets/samples/phone-browse-draw-sample.png +0 -0
  254. khoj/interface/compiled/assets/samples/phone-plain-chat-sample.png +0 -0
  255. khoj/interface/compiled/assets/samples/phone-remember-plan-sample.png +0 -0
  256. khoj/interface/compiled/automation.svg +37 -0
  257. khoj/interface/compiled/automations/index.html +1 -0
  258. khoj/interface/compiled/automations/index.txt +8 -0
  259. khoj/interface/compiled/chat/index.html +1 -0
  260. khoj/interface/compiled/chat/index.txt +7 -0
  261. khoj/interface/compiled/chat.svg +24 -0
  262. khoj/interface/compiled/close.svg +5 -0
  263. khoj/interface/compiled/copy-button-success.svg +6 -0
  264. khoj/interface/compiled/copy-button.svg +5 -0
  265. khoj/interface/compiled/index.html +1 -0
  266. khoj/interface/compiled/index.txt +7 -0
  267. khoj/interface/compiled/khoj.webmanifest +76 -0
  268. khoj/interface/compiled/logo.svg +24 -0
  269. khoj/interface/compiled/search/index.html +1 -0
  270. khoj/interface/compiled/search/index.txt +7 -0
  271. khoj/interface/compiled/send.svg +1 -0
  272. khoj/interface/compiled/settings/index.html +1 -0
  273. khoj/interface/compiled/settings/index.txt +9 -0
  274. khoj/interface/compiled/share/chat/index.html +1 -0
  275. khoj/interface/compiled/share/chat/index.txt +7 -0
  276. khoj/interface/compiled/share.svg +8 -0
  277. khoj/interface/compiled/thumbs-down.svg +6 -0
  278. khoj/interface/compiled/thumbs-up.svg +6 -0
  279. khoj/interface/email/feedback.html +34 -0
  280. khoj/interface/email/magic_link.html +40 -0
  281. khoj/interface/email/task.html +37 -0
  282. khoj/interface/email/welcome.html +90 -0
  283. khoj/interface/web/.well-known/assetlinks.json +11 -0
  284. khoj/interface/web/assets/icons/agents.svg +19 -0
  285. khoj/interface/web/assets/icons/automation.svg +43 -0
  286. khoj/interface/web/assets/icons/chat.svg +24 -0
  287. khoj/interface/web/assets/icons/github.svg +1 -0
  288. khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
  289. khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
  290. khoj/interface/web/assets/icons/khoj-logo-sideways.svg +32 -0
  291. khoj/interface/web/assets/icons/khoj.svg +26 -0
  292. khoj/interface/web/assets/icons/logotype.svg +1 -0
  293. khoj/interface/web/assets/icons/search.svg +57 -0
  294. khoj/interface/web/assets/icons/sync.svg +4 -0
  295. khoj/interface/web/assets/khoj.css +237 -0
  296. khoj/interface/web/assets/utils.js +33 -0
  297. khoj/interface/web/base_config.html +445 -0
  298. khoj/interface/web/content_source_github_input.html +208 -0
  299. khoj/interface/web/login.html +310 -0
  300. khoj/interface/web/utils.html +48 -0
  301. khoj/main.py +249 -0
  302. khoj/manage.py +22 -0
  303. khoj/migrations/__init__.py +0 -0
  304. khoj/migrations/migrate_offline_chat_default_model.py +69 -0
  305. khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
  306. khoj/migrations/migrate_offline_chat_schema.py +83 -0
  307. khoj/migrations/migrate_offline_model.py +29 -0
  308. khoj/migrations/migrate_processor_config_openai.py +67 -0
  309. khoj/migrations/migrate_server_pg.py +132 -0
  310. khoj/migrations/migrate_version.py +17 -0
  311. khoj/processor/__init__.py +0 -0
  312. khoj/processor/content/__init__.py +0 -0
  313. khoj/processor/content/docx/__init__.py +0 -0
  314. khoj/processor/content/docx/docx_to_entries.py +111 -0
  315. khoj/processor/content/github/__init__.py +0 -0
  316. khoj/processor/content/github/github_to_entries.py +226 -0
  317. khoj/processor/content/images/__init__.py +0 -0
  318. khoj/processor/content/images/image_to_entries.py +117 -0
  319. khoj/processor/content/markdown/__init__.py +0 -0
  320. khoj/processor/content/markdown/markdown_to_entries.py +160 -0
  321. khoj/processor/content/notion/notion_to_entries.py +259 -0
  322. khoj/processor/content/org_mode/__init__.py +0 -0
  323. khoj/processor/content/org_mode/org_to_entries.py +226 -0
  324. khoj/processor/content/org_mode/orgnode.py +532 -0
  325. khoj/processor/content/pdf/__init__.py +0 -0
  326. khoj/processor/content/pdf/pdf_to_entries.py +119 -0
  327. khoj/processor/content/plaintext/__init__.py +0 -0
  328. khoj/processor/content/plaintext/plaintext_to_entries.py +117 -0
  329. khoj/processor/content/text_to_entries.py +296 -0
  330. khoj/processor/conversation/__init__.py +0 -0
  331. khoj/processor/conversation/anthropic/__init__.py +0 -0
  332. khoj/processor/conversation/anthropic/anthropic_chat.py +243 -0
  333. khoj/processor/conversation/anthropic/utils.py +217 -0
  334. khoj/processor/conversation/google/__init__.py +0 -0
  335. khoj/processor/conversation/google/gemini_chat.py +253 -0
  336. khoj/processor/conversation/google/utils.py +260 -0
  337. khoj/processor/conversation/offline/__init__.py +0 -0
  338. khoj/processor/conversation/offline/chat_model.py +308 -0
  339. khoj/processor/conversation/offline/utils.py +80 -0
  340. khoj/processor/conversation/offline/whisper.py +15 -0
  341. khoj/processor/conversation/openai/__init__.py +0 -0
  342. khoj/processor/conversation/openai/gpt.py +243 -0
  343. khoj/processor/conversation/openai/utils.py +232 -0
  344. khoj/processor/conversation/openai/whisper.py +13 -0
  345. khoj/processor/conversation/prompts.py +1188 -0
  346. khoj/processor/conversation/utils.py +867 -0
  347. khoj/processor/embeddings.py +122 -0
  348. khoj/processor/image/generate.py +215 -0
  349. khoj/processor/speech/__init__.py +0 -0
  350. khoj/processor/speech/text_to_speech.py +51 -0
  351. khoj/processor/tools/__init__.py +0 -0
  352. khoj/processor/tools/online_search.py +472 -0
  353. khoj/processor/tools/run_code.py +179 -0
  354. khoj/routers/__init__.py +0 -0
  355. khoj/routers/api.py +760 -0
  356. khoj/routers/api_agents.py +295 -0
  357. khoj/routers/api_chat.py +1273 -0
  358. khoj/routers/api_content.py +634 -0
  359. khoj/routers/api_model.py +123 -0
  360. khoj/routers/api_phone.py +86 -0
  361. khoj/routers/api_subscription.py +144 -0
  362. khoj/routers/auth.py +307 -0
  363. khoj/routers/email.py +135 -0
  364. khoj/routers/helpers.py +2333 -0
  365. khoj/routers/notion.py +85 -0
  366. khoj/routers/research.py +364 -0
  367. khoj/routers/storage.py +63 -0
  368. khoj/routers/twilio.py +36 -0
  369. khoj/routers/web_client.py +141 -0
  370. khoj/search_filter/__init__.py +0 -0
  371. khoj/search_filter/base_filter.py +15 -0
  372. khoj/search_filter/date_filter.py +215 -0
  373. khoj/search_filter/file_filter.py +32 -0
  374. khoj/search_filter/word_filter.py +29 -0
  375. khoj/search_type/__init__.py +0 -0
  376. khoj/search_type/text_search.py +255 -0
  377. khoj/utils/__init__.py +0 -0
  378. khoj/utils/cli.py +101 -0
  379. khoj/utils/config.py +81 -0
  380. khoj/utils/constants.py +51 -0
  381. khoj/utils/fs_syncer.py +252 -0
  382. khoj/utils/helpers.py +627 -0
  383. khoj/utils/initialization.py +301 -0
  384. khoj/utils/jsonl.py +43 -0
  385. khoj/utils/models.py +47 -0
  386. khoj/utils/rawconfig.py +208 -0
  387. khoj/utils/state.py +48 -0
  388. khoj/utils/yaml.py +47 -0
  389. khoj-1.33.3.dev32.dist-info/METADATA +190 -0
  390. khoj-1.33.3.dev32.dist-info/RECORD +393 -0
  391. khoj-1.33.3.dev32.dist-info/WHEEL +4 -0
  392. khoj-1.33.3.dev32.dist-info/entry_points.txt +2 -0
  393. khoj-1.33.3.dev32.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,260 @@
1
+ import logging
2
+ import random
3
+ from threading import Thread
4
+
5
+ import google.generativeai as genai
6
+ from google.generativeai.types.answer_types import FinishReason
7
+ from google.generativeai.types.generation_types import StopCandidateException
8
+ from google.generativeai.types.safety_types import (
9
+ HarmBlockThreshold,
10
+ HarmCategory,
11
+ HarmProbability,
12
+ )
13
+ from langchain.schema import ChatMessage
14
+ from tenacity import (
15
+ before_sleep_log,
16
+ retry,
17
+ stop_after_attempt,
18
+ wait_exponential,
19
+ wait_random_exponential,
20
+ )
21
+
22
+ from khoj.processor.conversation.utils import (
23
+ ThreadedGenerator,
24
+ commit_conversation_trace,
25
+ get_image_from_url,
26
+ )
27
+ from khoj.utils import state
28
+ from khoj.utils.helpers import (
29
+ get_chat_usage_metrics,
30
+ is_none_or_empty,
31
+ is_promptrace_enabled,
32
+ )
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ MAX_OUTPUT_TOKENS_GEMINI = 8192
38
+
39
+
40
+ @retry(
41
+ wait=wait_random_exponential(min=1, max=10),
42
+ stop=stop_after_attempt(2),
43
+ before_sleep=before_sleep_log(logger, logging.DEBUG),
44
+ reraise=True,
45
+ )
46
+ def gemini_completion_with_backoff(
47
+ messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None, tracer={}
48
+ ) -> str:
49
+ genai.configure(api_key=api_key)
50
+ model_kwargs = model_kwargs or dict()
51
+ model_kwargs["temperature"] = temperature
52
+ model_kwargs["max_output_tokens"] = MAX_OUTPUT_TOKENS_GEMINI
53
+ model = genai.GenerativeModel(
54
+ model_name,
55
+ generation_config=model_kwargs,
56
+ system_instruction=system_prompt,
57
+ safety_settings={
58
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
59
+ HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
60
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
61
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
62
+ },
63
+ )
64
+
65
+ formatted_messages = [{"role": message.role, "parts": message.content} for message in messages]
66
+
67
+ # Start chat session. All messages up to the last are considered to be part of the chat history
68
+ chat_session = model.start_chat(history=formatted_messages[0:-1])
69
+
70
+ try:
71
+ # Generate the response. The last message is considered to be the current prompt
72
+ response = chat_session.send_message(formatted_messages[-1]["parts"])
73
+ response_text = response.text
74
+ except StopCandidateException as e:
75
+ response = None
76
+ response_text, _ = handle_gemini_response(e.args)
77
+ # Respond with reason for stopping
78
+ logger.warning(
79
+ f"LLM Response Prevented for {model_name}: {response_text}.\n"
80
+ + f"Last Message by {messages[-1].role}: {messages[-1].content}"
81
+ )
82
+
83
+ # Aggregate cost of chat
84
+ input_tokens = response.usage_metadata.prompt_token_count if response else 0
85
+ output_tokens = response.usage_metadata.candidates_token_count if response else 0
86
+ tracer["usage"] = get_chat_usage_metrics(model_name, input_tokens, output_tokens, tracer.get("usage"))
87
+
88
+ # Save conversation trace
89
+ tracer["chat_model"] = model_name
90
+ tracer["temperature"] = temperature
91
+ if is_promptrace_enabled():
92
+ commit_conversation_trace(messages, response_text, tracer)
93
+
94
+ return response_text
95
+
96
+
97
+ @retry(
98
+ wait=wait_exponential(multiplier=1, min=4, max=10),
99
+ stop=stop_after_attempt(2),
100
+ before_sleep=before_sleep_log(logger, logging.DEBUG),
101
+ reraise=True,
102
+ )
103
+ def gemini_chat_completion_with_backoff(
104
+ messages,
105
+ compiled_references,
106
+ online_results,
107
+ model_name,
108
+ temperature,
109
+ api_key,
110
+ system_prompt,
111
+ completion_func=None,
112
+ model_kwargs=None,
113
+ tracer: dict = {},
114
+ ):
115
+ g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
116
+ t = Thread(
117
+ target=gemini_llm_thread,
118
+ args=(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs, tracer),
119
+ )
120
+ t.start()
121
+ return g
122
+
123
+
124
+ def gemini_llm_thread(
125
+ g, messages, system_prompt, model_name, temperature, api_key, model_kwargs=None, tracer: dict = {}
126
+ ):
127
+ try:
128
+ genai.configure(api_key=api_key)
129
+ model_kwargs = model_kwargs or dict()
130
+ model_kwargs["temperature"] = temperature
131
+ model_kwargs["max_output_tokens"] = MAX_OUTPUT_TOKENS_GEMINI
132
+ model_kwargs["stop_sequences"] = ["Notes:\n["]
133
+ model = genai.GenerativeModel(
134
+ model_name,
135
+ generation_config=model_kwargs,
136
+ system_instruction=system_prompt,
137
+ safety_settings={
138
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
139
+ HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
140
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_ONLY_HIGH,
141
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_ONLY_HIGH,
142
+ },
143
+ )
144
+
145
+ aggregated_response = ""
146
+ formatted_messages = [{"role": message.role, "parts": message.content} for message in messages]
147
+
148
+ # all messages up to the last are considered to be part of the chat history
149
+ chat_session = model.start_chat(history=formatted_messages[0:-1])
150
+ # the last message is considered to be the current prompt
151
+ for chunk in chat_session.send_message(formatted_messages[-1]["parts"], stream=True):
152
+ message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
153
+ message = message or chunk.text
154
+ aggregated_response += message
155
+ g.send(message)
156
+ if stopped:
157
+ raise StopCandidateException(message)
158
+
159
+ # Calculate cost of chat
160
+ input_tokens = chunk.usage_metadata.prompt_token_count
161
+ output_tokens = chunk.usage_metadata.candidates_token_count
162
+ tracer["usage"] = get_chat_usage_metrics(model_name, input_tokens, output_tokens, tracer.get("usage"))
163
+
164
+ # Save conversation trace
165
+ tracer["chat_model"] = model_name
166
+ tracer["temperature"] = temperature
167
+ if is_promptrace_enabled():
168
+ commit_conversation_trace(messages, aggregated_response, tracer)
169
+ except StopCandidateException as e:
170
+ logger.warning(
171
+ f"LLM Response Prevented for {model_name}: {e.args[0]}.\n"
172
+ + f"Last Message by {messages[-1].role}: {messages[-1].content}"
173
+ )
174
+ except Exception as e:
175
+ logger.error(f"Error in gemini_llm_thread: {e}", exc_info=True)
176
+ finally:
177
+ g.close()
178
+
179
+
180
+ def handle_gemini_response(candidates, prompt_feedback=None):
181
+ """Check if Gemini response was blocked and return an explanatory error message."""
182
+ # Check if the response was blocked due to safety concerns with the prompt
183
+ if len(candidates) == 0 and prompt_feedback:
184
+ message = f"\nI'd prefer to not respond to that due to **{prompt_feedback.block_reason.name}** issues with your query."
185
+ stopped = True
186
+ # Check if the response was blocked due to safety concerns with the generated content
187
+ elif candidates[0].finish_reason == FinishReason.SAFETY:
188
+ message = generate_safety_response(candidates[0].safety_ratings)
189
+ stopped = True
190
+ # Check if finish reason is empty, therefore generation is in progress
191
+ elif not candidates[0].finish_reason:
192
+ message = None
193
+ stopped = False
194
+ # Check if the response was stopped due to reaching maximum token limit or other reasons
195
+ elif candidates[0].finish_reason != FinishReason.STOP:
196
+ message = f"\nI can't talk further about that because of **{candidates[0].finish_reason.name} issue.**"
197
+ stopped = True
198
+ # Otherwise, the response is valid and can be used
199
+ else:
200
+ message = None
201
+ stopped = False
202
+ return message, stopped
203
+
204
+
205
+ def generate_safety_response(safety_ratings):
206
+ """Generate a conversational response based on the safety ratings of the response."""
207
+ # Get the safety rating with the highest probability
208
+ max_safety_rating = sorted(safety_ratings, key=lambda x: x.probability, reverse=True)[0]
209
+ # Remove the "HARM_CATEGORY_" prefix and title case the category name
210
+ max_safety_category = " ".join(max_safety_rating.category.name.split("_")[2:]).title()
211
+ # Add a bit of variety to the discomfort level based on the safety rating probability
212
+ discomfort_level = {
213
+ HarmProbability.HARM_PROBABILITY_UNSPECIFIED: " ",
214
+ HarmProbability.LOW: "a bit ",
215
+ HarmProbability.MEDIUM: "moderately ",
216
+ HarmProbability.HIGH: random.choice(["very ", "quite ", "fairly "]),
217
+ }[max_safety_rating.probability]
218
+ # Generate a response using a random response template
219
+ safety_response_choice = random.choice(
220
+ [
221
+ "\nUmm, I'd rather not to respond to that. The conversation has some probability of going into **{category}** territory.",
222
+ "\nI'd prefer not to talk about **{category}** related topics. It makes me {discomfort_level}uncomfortable.",
223
+ "\nI feel {discomfort_level}squeamish talking about **{category}** related stuff! Can we talk about something less controversial?",
224
+ "\nThat sounds {discomfort_level}outside the [Overtone Window](https://en.wikipedia.org/wiki/Overton_window) of acceptable conversation. Should we stick to something less {category} related?",
225
+ ]
226
+ )
227
+ return safety_response_choice.format(
228
+ category=max_safety_category, probability=max_safety_rating.probability.name, discomfort_level=discomfort_level
229
+ )
230
+
231
+
232
+ def format_messages_for_gemini(messages: list[ChatMessage], system_prompt: str = None) -> tuple[list[str], str]:
233
+ # Extract system message
234
+ system_prompt = system_prompt or ""
235
+ for message in messages.copy():
236
+ if message.role == "system":
237
+ system_prompt += message.content
238
+ messages.remove(message)
239
+ system_prompt = None if is_none_or_empty(system_prompt) else system_prompt
240
+
241
+ for message in messages:
242
+ # Convert message content to string list from chatml dictionary list
243
+ if isinstance(message.content, list):
244
+ # Convert image_urls to PIL.Image and place them at beginning of list (better for Gemini)
245
+ message.content = [
246
+ get_image_from_url(item["image_url"]["url"]).content
247
+ if item["type"] == "image_url"
248
+ else item.get("text", "")
249
+ for item in sorted(message.content, key=lambda x: 0 if x["type"] == "image_url" else 1)
250
+ ]
251
+ elif isinstance(message.content, str):
252
+ message.content = [message.content]
253
+
254
+ if message.role == "assistant":
255
+ message.role = "model"
256
+
257
+ if len(messages) == 1:
258
+ messages[0].role = "user"
259
+
260
+ return messages, system_prompt
File without changes
@@ -0,0 +1,308 @@
1
+ import json
2
+ import logging
3
+ import os
4
+ from datetime import datetime, timedelta
5
+ from threading import Thread
6
+ from typing import Any, Dict, Iterator, List, Optional, Union
7
+
8
+ import pyjson5
9
+ from langchain.schema import ChatMessage
10
+ from llama_cpp import Llama
11
+
12
+ from khoj.database.models import Agent, ChatModel, KhojUser
13
+ from khoj.processor.conversation import prompts
14
+ from khoj.processor.conversation.offline.utils import download_model
15
+ from khoj.processor.conversation.utils import (
16
+ ThreadedGenerator,
17
+ clean_json,
18
+ commit_conversation_trace,
19
+ generate_chatml_messages_with_context,
20
+ messages_to_print,
21
+ )
22
+ from khoj.utils import state
23
+ from khoj.utils.constants import empty_escape_sequences
24
+ from khoj.utils.helpers import (
25
+ ConversationCommand,
26
+ is_none_or_empty,
27
+ is_promptrace_enabled,
28
+ truncate_code_context,
29
+ )
30
+ from khoj.utils.rawconfig import FileAttachment, LocationData
31
+ from khoj.utils.yaml import yaml_dump
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+
36
+ def extract_questions_offline(
37
+ text: str,
38
+ model: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
39
+ loaded_model: Union[Any, None] = None,
40
+ conversation_log={},
41
+ use_history: bool = True,
42
+ should_extract_questions: bool = True,
43
+ location_data: LocationData = None,
44
+ user: KhojUser = None,
45
+ max_prompt_size: int = None,
46
+ temperature: float = 0.7,
47
+ personality_context: Optional[str] = None,
48
+ query_files: str = None,
49
+ tracer: dict = {},
50
+ ) -> List[str]:
51
+ """
52
+ Infer search queries to retrieve relevant notes to answer user query
53
+ """
54
+ all_questions = text.split("? ")
55
+ all_questions = [q + "?" for q in all_questions[:-1]] + [all_questions[-1]]
56
+
57
+ if not should_extract_questions:
58
+ return all_questions
59
+
60
+ assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
61
+ offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
62
+
63
+ location = f"{location_data}" if location_data else "Unknown"
64
+ username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""
65
+
66
+ # Extract Past User Message and Inferred Questions from Conversation Log
67
+ chat_history = ""
68
+
69
+ if use_history:
70
+ for chat in conversation_log.get("chat", [])[-4:]:
71
+ if chat["by"] == "khoj":
72
+ chat_history += f"Q: {chat['intent']['query']}\n"
73
+ chat_history += f"Khoj: {chat['message']}\n\n"
74
+
75
+ # Get dates relative to today for prompt creation
76
+ today = datetime.today()
77
+ yesterday = (today - timedelta(days=1)).strftime("%Y-%m-%d")
78
+ last_year = today.year - 1
79
+ example_questions = prompts.extract_questions_offline.format(
80
+ query=text,
81
+ chat_history=chat_history,
82
+ current_date=today.strftime("%Y-%m-%d"),
83
+ day_of_week=today.strftime("%A"),
84
+ current_month=today.strftime("%Y-%m"),
85
+ yesterday_date=yesterday,
86
+ last_year=last_year,
87
+ this_year=today.year,
88
+ location=location,
89
+ username=username,
90
+ personality_context=personality_context,
91
+ )
92
+
93
+ messages = generate_chatml_messages_with_context(
94
+ example_questions,
95
+ model_name=model,
96
+ loaded_model=offline_chat_model,
97
+ max_prompt_size=max_prompt_size,
98
+ model_type=ChatModel.ModelType.OFFLINE,
99
+ query_files=query_files,
100
+ )
101
+
102
+ state.chat_lock.acquire()
103
+ try:
104
+ response = send_message_to_model_offline(
105
+ messages,
106
+ loaded_model=offline_chat_model,
107
+ model_name=model,
108
+ max_prompt_size=max_prompt_size,
109
+ temperature=temperature,
110
+ response_type="json_object",
111
+ tracer=tracer,
112
+ )
113
+ finally:
114
+ state.chat_lock.release()
115
+
116
+ # Extract and clean the chat model's response
117
+ try:
118
+ response = clean_json(empty_escape_sequences)
119
+ response = pyjson5.loads(response)
120
+ questions = [q.strip() for q in response["queries"] if q.strip()]
121
+ questions = filter_questions(questions)
122
+ except:
123
+ logger.warning(f"Llama returned invalid JSON. Falling back to using user message as search query.\n{response}")
124
+ return all_questions
125
+ logger.debug(f"Questions extracted by {model}: {questions}")
126
+ return questions
127
+
128
+
129
+ def filter_questions(questions: List[str]):
130
+ # Skip questions that seem to be apologizing for not being able to answer the question
131
+ hint_words = [
132
+ "sorry",
133
+ "apologize",
134
+ "unable",
135
+ "can't",
136
+ "cannot",
137
+ "don't know",
138
+ "don't understand",
139
+ "do not know",
140
+ "do not understand",
141
+ ]
142
+ filtered_questions = set()
143
+ for q in questions:
144
+ if not any([word in q.lower() for word in hint_words]) and not is_none_or_empty(q):
145
+ filtered_questions.add(q)
146
+
147
+ return list(filtered_questions)
148
+
149
+
150
+ def converse_offline(
151
+ user_query,
152
+ references=[],
153
+ online_results={},
154
+ code_results={},
155
+ conversation_log={},
156
+ model_name: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
157
+ loaded_model: Union[Any, None] = None,
158
+ completion_func=None,
159
+ conversation_commands=[ConversationCommand.Default],
160
+ max_prompt_size=None,
161
+ tokenizer_name=None,
162
+ location_data: LocationData = None,
163
+ user_name: str = None,
164
+ agent: Agent = None,
165
+ query_files: str = None,
166
+ generated_files: List[FileAttachment] = None,
167
+ additional_context: List[str] = None,
168
+ generated_asset_results: Dict[str, Dict] = {},
169
+ tracer: dict = {},
170
+ ) -> Union[ThreadedGenerator, Iterator[str]]:
171
+ """
172
+ Converse with user using Llama
173
+ """
174
+ # Initialize Variables
175
+ assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
176
+ offline_chat_model = loaded_model or download_model(model_name, max_tokens=max_prompt_size)
177
+ tracer["chat_model"] = model_name
178
+ current_date = datetime.now()
179
+
180
+ if agent and agent.personality:
181
+ system_prompt = prompts.custom_system_prompt_offline_chat.format(
182
+ name=agent.name,
183
+ bio=agent.personality,
184
+ current_date=current_date.strftime("%Y-%m-%d"),
185
+ day_of_week=current_date.strftime("%A"),
186
+ )
187
+ else:
188
+ system_prompt = prompts.system_prompt_offline_chat.format(
189
+ current_date=current_date.strftime("%Y-%m-%d"),
190
+ day_of_week=current_date.strftime("%A"),
191
+ )
192
+
193
+ if location_data:
194
+ location_prompt = prompts.user_location.format(location=f"{location_data}")
195
+ system_prompt = f"{system_prompt}\n{location_prompt}"
196
+
197
+ if user_name:
198
+ user_name_prompt = prompts.user_name.format(name=user_name)
199
+ system_prompt = f"{system_prompt}\n{user_name_prompt}"
200
+
201
+ # Get Conversation Primer appropriate to Conversation Type
202
+ if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
203
+ return iter([prompts.no_notes_found.format()])
204
+ elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
205
+ completion_func(chat_response=prompts.no_online_results_found.format())
206
+ return iter([prompts.no_online_results_found.format()])
207
+
208
+ context_message = ""
209
+ if not is_none_or_empty(references):
210
+ context_message = f"{prompts.notes_conversation_offline.format(references=yaml_dump(references))}\n\n"
211
+ if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
212
+ simplified_online_results = online_results.copy()
213
+ for result in online_results:
214
+ if online_results[result].get("webpages"):
215
+ simplified_online_results[result] = online_results[result]["webpages"]
216
+
217
+ context_message += f"{prompts.online_search_conversation_offline.format(online_results=yaml_dump(simplified_online_results))}\n\n"
218
+ if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
219
+ context_message += (
220
+ f"{prompts.code_executed_context.format(code_results=truncate_code_context(code_results))}\n\n"
221
+ )
222
+ context_message = context_message.strip()
223
+
224
+ # Setup Prompt with Primer or Conversation History
225
+ messages = generate_chatml_messages_with_context(
226
+ user_query,
227
+ system_prompt,
228
+ conversation_log,
229
+ context_message=context_message,
230
+ model_name=model_name,
231
+ loaded_model=offline_chat_model,
232
+ max_prompt_size=max_prompt_size,
233
+ tokenizer_name=tokenizer_name,
234
+ model_type=ChatModel.ModelType.OFFLINE,
235
+ query_files=query_files,
236
+ generated_files=generated_files,
237
+ generated_asset_results=generated_asset_results,
238
+ program_execution_context=additional_context,
239
+ )
240
+
241
+ logger.debug(f"Conversation Context for {model_name}: {messages_to_print(messages)}")
242
+
243
+ g = ThreadedGenerator(references, online_results, completion_func=completion_func)
244
+ t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size, tracer))
245
+ t.start()
246
+ return g
247
+
248
+
249
+ def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int = None, tracer: dict = {}):
250
+ stop_phrases = ["<s>", "INST]", "Notes:"]
251
+ aggregated_response = ""
252
+
253
+ state.chat_lock.acquire()
254
+ try:
255
+ response_iterator = send_message_to_model_offline(
256
+ messages, loaded_model=model, stop=stop_phrases, max_prompt_size=max_prompt_size, streaming=True
257
+ )
258
+ for response in response_iterator:
259
+ response_delta = response["choices"][0]["delta"].get("content", "")
260
+ aggregated_response += response_delta
261
+ g.send(response_delta)
262
+
263
+ # Save conversation trace
264
+ if is_promptrace_enabled():
265
+ commit_conversation_trace(messages, aggregated_response, tracer)
266
+
267
+ finally:
268
+ state.chat_lock.release()
269
+ g.close()
270
+
271
+
272
+ def send_message_to_model_offline(
273
+ messages: List[ChatMessage],
274
+ loaded_model=None,
275
+ model_name="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
276
+ temperature: float = 0.2,
277
+ streaming=False,
278
+ stop=[],
279
+ max_prompt_size: int = None,
280
+ response_type: str = "text",
281
+ tracer: dict = {},
282
+ ):
283
+ assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
284
+ offline_chat_model = loaded_model or download_model(model_name, max_tokens=max_prompt_size)
285
+ messages_dict = [{"role": message.role, "content": message.content} for message in messages]
286
+ seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
287
+ response = offline_chat_model.create_chat_completion(
288
+ messages_dict,
289
+ stop=stop,
290
+ stream=streaming,
291
+ temperature=temperature,
292
+ response_format={"type": response_type},
293
+ seed=seed,
294
+ )
295
+
296
+ if streaming:
297
+ return response
298
+
299
+ response_text = response["choices"][0]["message"].get("content", "")
300
+
301
+ # Save conversation trace for non-streaming responses
302
+ # Streamed responses need to be saved by the calling function
303
+ tracer["chat_model"] = model_name
304
+ tracer["temperature"] = temperature
305
+ if is_promptrace_enabled():
306
+ commit_conversation_trace(messages, response_text, tracer)
307
+
308
+ return response_text
@@ -0,0 +1,80 @@
1
+ import glob
2
+ import logging
3
+ import math
4
+ import os
5
+ from typing import Any, Dict
6
+
7
+ from huggingface_hub.constants import HF_HUB_CACHE
8
+
9
+ from khoj.utils import state
10
+ from khoj.utils.helpers import get_device_memory
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def download_model(repo_id: str, filename: str = "*Q4_K_M.gguf", max_tokens: int = None):
16
+ # Initialize Model Parameters
17
+ # Use n_ctx=0 to get context size from the model
18
+ kwargs: Dict[str, Any] = {"n_threads": 4, "n_ctx": 0, "verbose": False}
19
+
20
+ # Decide whether to load model to GPU or CPU
21
+ device = "gpu" if state.chat_on_gpu and state.device != "cpu" else "cpu"
22
+ kwargs["n_gpu_layers"] = -1 if device == "gpu" else 0
23
+
24
+ # Add chat format if known
25
+ if "llama-3" in repo_id.lower():
26
+ kwargs["chat_format"] = "llama-3"
27
+ elif "gemma-2" in repo_id.lower():
28
+ kwargs["chat_format"] = "gemma"
29
+
30
+ # Check if the model is already downloaded
31
+ model_path = load_model_from_cache(repo_id, filename)
32
+ chat_model = None
33
+ try:
34
+ chat_model = load_model(model_path, repo_id, filename, kwargs)
35
+ except:
36
+ # Load model on CPU if GPU is not available
37
+ kwargs["n_gpu_layers"], device = 0, "cpu"
38
+ chat_model = load_model(model_path, repo_id, filename, kwargs)
39
+
40
+ # Now load the model with context size set based on:
41
+ # 1. context size supported by model and
42
+ # 2. configured size or machine (V)RAM
43
+ kwargs["n_ctx"] = infer_max_tokens(chat_model.n_ctx(), max_tokens)
44
+ chat_model = load_model(model_path, repo_id, filename, kwargs)
45
+
46
+ logger.debug(
47
+ f"{'Loaded' if model_path else 'Downloaded'} chat model to {device.upper()} with {kwargs['n_ctx']} token context window."
48
+ )
49
+ return chat_model
50
+
51
+
52
+ def load_model(model_path: str, repo_id: str, filename: str = "*Q4_K_M.gguf", kwargs: dict = {}):
53
+ from llama_cpp.llama import Llama
54
+
55
+ if model_path:
56
+ return Llama(model_path, **kwargs)
57
+ else:
58
+ return Llama.from_pretrained(repo_id=repo_id, filename=filename, **kwargs)
59
+
60
+
61
+ def load_model_from_cache(repo_id: str, filename: str, repo_type="models"):
62
+ # Construct the path to the model file in the cache directory
63
+ repo_org, repo_name = repo_id.split("/")
64
+ object_id = "--".join([repo_type, repo_org, repo_name])
65
+ model_path = os.path.sep.join([HF_HUB_CACHE, object_id, "snapshots", "**", filename])
66
+
67
+ # Check if the model file exists
68
+ paths = glob.glob(model_path)
69
+ if paths:
70
+ return paths[0]
71
+ else:
72
+ return None
73
+
74
+
75
+ def infer_max_tokens(model_context_window: int, configured_max_tokens=None) -> int:
76
+ """Infer max prompt size based on device memory and max context window supported by the model"""
77
+ configured_max_tokens = math.inf if configured_max_tokens is None else configured_max_tokens
78
+ vram_based_n_ctx = int(get_device_memory() / 1e6) # based on heuristic
79
+ configured_max_tokens = configured_max_tokens or math.inf # do not use if set to None
80
+ return min(configured_max_tokens, vram_based_n_ctx, model_context_window)
@@ -0,0 +1,15 @@
1
+ import whisper
2
+ from asgiref.sync import sync_to_async
3
+
4
+ from khoj.utils import state
5
+
6
+
7
+ async def transcribe_audio_offline(audio_filename: str, model: str) -> str:
8
+ """
9
+ Transcribe audio file offline using Whisper
10
+ """
11
+ # Send the audio data to the Whisper API
12
+ if not state.whisper_model:
13
+ state.whisper_model = whisper.load_model(model)
14
+ response = await sync_to_async(state.whisper_model.transcribe)(audio_filename)
15
+ return response["text"]
File without changes