khoj 1.33.3.dev32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. khoj/__init__.py +0 -0
  2. khoj/app/README.md +94 -0
  3. khoj/app/__init__.py +0 -0
  4. khoj/app/asgi.py +16 -0
  5. khoj/app/settings.py +218 -0
  6. khoj/app/urls.py +25 -0
  7. khoj/configure.py +452 -0
  8. khoj/database/__init__.py +0 -0
  9. khoj/database/adapters/__init__.py +1821 -0
  10. khoj/database/admin.py +417 -0
  11. khoj/database/apps.py +6 -0
  12. khoj/database/management/__init__.py +0 -0
  13. khoj/database/management/commands/__init__.py +0 -0
  14. khoj/database/management/commands/change_default_model.py +116 -0
  15. khoj/database/management/commands/change_generated_images_url.py +61 -0
  16. khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
  17. khoj/database/migrations/0001_khojuser.py +98 -0
  18. khoj/database/migrations/0002_googleuser.py +32 -0
  19. khoj/database/migrations/0003_vector_extension.py +10 -0
  20. khoj/database/migrations/0004_content_types_and_more.py +181 -0
  21. khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
  22. khoj/database/migrations/0006_embeddingsdates.py +33 -0
  23. khoj/database/migrations/0007_add_conversation.py +27 -0
  24. khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
  25. khoj/database/migrations/0009_khojapiuser.py +24 -0
  26. khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
  27. khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
  28. khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
  29. khoj/database/migrations/0012_entry_file_source.py +21 -0
  30. khoj/database/migrations/0013_subscription.py +37 -0
  31. khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
  32. khoj/database/migrations/0015_alter_subscription_user.py +21 -0
  33. khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
  34. khoj/database/migrations/0017_searchmodel.py +32 -0
  35. khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
  36. khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
  37. khoj/database/migrations/0020_reflectivequestion.py +36 -0
  38. khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
  39. khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
  40. khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
  41. khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
  42. khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
  43. khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
  44. khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
  45. khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
  46. khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
  47. khoj/database/migrations/0029_userrequests.py +27 -0
  48. khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
  49. khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
  50. khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
  51. khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
  52. khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
  53. khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
  54. khoj/database/migrations/0035_processlock.py +26 -0
  55. khoj/database/migrations/0036_alter_processlock_name.py +19 -0
  56. khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
  57. khoj/database/migrations/0036_publicconversation.py +42 -0
  58. khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
  59. khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
  60. khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
  61. khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
  62. khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
  63. khoj/database/migrations/0040_alter_processlock_name.py +26 -0
  64. khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
  65. khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
  66. khoj/database/migrations/0042_serverchatsettings.py +46 -0
  67. khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
  68. khoj/database/migrations/0044_conversation_file_filters.py +17 -0
  69. khoj/database/migrations/0045_fileobject.py +37 -0
  70. khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
  71. khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
  72. khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
  73. khoj/database/migrations/0049_datastore.py +38 -0
  74. khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
  75. khoj/database/migrations/0050_alter_processlock_name.py +25 -0
  76. khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
  77. khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
  78. khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
  79. khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
  80. khoj/database/migrations/0055_alter_agent_style_icon.py +37 -0
  81. khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
  82. khoj/database/migrations/0056_searchmodelconfig_cross_encoder_model_config.py +17 -0
  83. khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
  84. khoj/database/migrations/0057_remove_serverchatsettings_default_model_and_more.py +51 -0
  85. khoj/database/migrations/0058_alter_chatmodeloptions_chat_model.py +17 -0
  86. khoj/database/migrations/0059_searchmodelconfig_bi_encoder_confidence_threshold.py +17 -0
  87. khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
  88. khoj/database/migrations/0061_alter_chatmodeloptions_model_type.py +26 -0
  89. khoj/database/migrations/0061_alter_texttoimagemodelconfig_model_type.py +21 -0
  90. khoj/database/migrations/0062_merge_20240913_0222.py +14 -0
  91. khoj/database/migrations/0063_conversation_temp_id.py +36 -0
  92. khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +86 -0
  93. khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
  94. khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
  95. khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
  96. khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
  97. khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
  98. khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
  99. khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
  100. khoj/database/migrations/0072_entry_search_model.py +24 -0
  101. khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
  102. khoj/database/migrations/0074_alter_conversation_title.py +17 -0
  103. khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +85 -0
  104. khoj/database/migrations/0076_rename_openaiprocessorconversationconfig_aimodelapi_and_more.py +26 -0
  105. khoj/database/migrations/0077_chatmodel_alter_agent_chat_model_and_more.py +62 -0
  106. khoj/database/migrations/0078_khojuser_email_verification_code_expiry.py +17 -0
  107. khoj/database/migrations/__init__.py +0 -0
  108. khoj/database/models/__init__.py +725 -0
  109. khoj/database/tests.py +3 -0
  110. khoj/interface/compiled/404/index.html +1 -0
  111. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_buildManifest.js +1 -0
  112. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_ssgManifest.js +1 -0
  113. khoj/interface/compiled/_next/static/chunks/1010-8f39bb4648b5ba10.js +1 -0
  114. khoj/interface/compiled/_next/static/chunks/182-f1c48a203dc91e0e.js +20 -0
  115. khoj/interface/compiled/_next/static/chunks/1915-d3c36ad6ce697ce7.js +1 -0
  116. khoj/interface/compiled/_next/static/chunks/2117-165ef4747a5b836b.js +2 -0
  117. khoj/interface/compiled/_next/static/chunks/2581-455000f8aeb08fc3.js +1 -0
  118. khoj/interface/compiled/_next/static/chunks/3727.dcea8f2193111552.js +1 -0
  119. khoj/interface/compiled/_next/static/chunks/3789-a09e37a819171a9d.js +1 -0
  120. khoj/interface/compiled/_next/static/chunks/4124-6c28322ce218d2d5.js +1 -0
  121. khoj/interface/compiled/_next/static/chunks/5427-b52d95253e692bfa.js +1 -0
  122. khoj/interface/compiled/_next/static/chunks/5473-b1cf56dedac6577a.js +1 -0
  123. khoj/interface/compiled/_next/static/chunks/5477-0bbddb79c25a54a7.js +1 -0
  124. khoj/interface/compiled/_next/static/chunks/6065-64db9ad305ba0bcd.js +1 -0
  125. khoj/interface/compiled/_next/static/chunks/6293-469dd16402ea8a6f.js +3 -0
  126. khoj/interface/compiled/_next/static/chunks/688-b5b4391bbc0376f1.js +1 -0
  127. khoj/interface/compiled/_next/static/chunks/8667-b6bf63c72b2d76eb.js +1 -0
  128. khoj/interface/compiled/_next/static/chunks/9259-1172dbaca0515237.js +1 -0
  129. khoj/interface/compiled/_next/static/chunks/94ca1967.1d9b42d929a1ee8c.js +1 -0
  130. khoj/interface/compiled/_next/static/chunks/9597.83583248dfbf6e73.js +1 -0
  131. khoj/interface/compiled/_next/static/chunks/964ecbae.51d6faf8801d15e6.js +1 -0
  132. khoj/interface/compiled/_next/static/chunks/9665-391df1e5c51c960a.js +1 -0
  133. khoj/interface/compiled/_next/static/chunks/app/_not-found/page-a834eddae3e235df.js +1 -0
  134. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
  135. khoj/interface/compiled/_next/static/chunks/app/agents/page-28ce086a1129bca2.js +1 -0
  136. khoj/interface/compiled/_next/static/chunks/app/automations/layout-1fe1537449f43496.js +1 -0
  137. khoj/interface/compiled/_next/static/chunks/app/automations/page-bf365a60829d347f.js +1 -0
  138. khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
  139. khoj/interface/compiled/_next/static/chunks/app/chat/page-0e476e57eb2015e3.js +1 -0
  140. khoj/interface/compiled/_next/static/chunks/app/layout-30e7fda7262713ce.js +1 -0
  141. khoj/interface/compiled/_next/static/chunks/app/page-a5515ea71aec5ef0.js +1 -0
  142. khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
  143. khoj/interface/compiled/_next/static/chunks/app/search/page-9140541e67ea307d.js +1 -0
  144. khoj/interface/compiled/_next/static/chunks/app/settings/layout-d09d6510a45cd4bd.js +1 -0
  145. khoj/interface/compiled/_next/static/chunks/app/settings/page-951ba40b5b94b23a.js +1 -0
  146. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
  147. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1beb80d8d741c932.js +1 -0
  148. khoj/interface/compiled/_next/static/chunks/d3ac728e-44ebd2a0c99b12a0.js +1 -0
  149. khoj/interface/compiled/_next/static/chunks/fd9d1056-4482b99a36fd1673.js +1 -0
  150. khoj/interface/compiled/_next/static/chunks/framework-8e0e0f4a6b83a956.js +1 -0
  151. khoj/interface/compiled/_next/static/chunks/main-app-de1f09df97a3cfc7.js +1 -0
  152. khoj/interface/compiled/_next/static/chunks/main-db4bfac6b0a8d00b.js +1 -0
  153. khoj/interface/compiled/_next/static/chunks/pages/_app-3c9ca398d360b709.js +1 -0
  154. khoj/interface/compiled/_next/static/chunks/pages/_error-cf5ca766ac8f493f.js +1 -0
  155. khoj/interface/compiled/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
  156. khoj/interface/compiled/_next/static/chunks/webpack-a03962458328b163.js +1 -0
  157. khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +1 -0
  158. khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
  159. khoj/interface/compiled/_next/static/css/4e4e6a4a1c920d06.css +1 -0
  160. khoj/interface/compiled/_next/static/css/8d02837c730f8d13.css +25 -0
  161. khoj/interface/compiled/_next/static/css/8e6a3ca11a60b189.css +1 -0
  162. khoj/interface/compiled/_next/static/css/9c164d9727dd8092.css +1 -0
  163. khoj/interface/compiled/_next/static/css/dac88c17aaee5fcf.css +1 -0
  164. khoj/interface/compiled/_next/static/css/df4b47a2d0d85eae.css +1 -0
  165. khoj/interface/compiled/_next/static/css/e4eb883b5265d372.css +1 -0
  166. khoj/interface/compiled/_next/static/media/1d8a05b60287ae6c-s.p.woff2 +0 -0
  167. khoj/interface/compiled/_next/static/media/6f22fce21a7c433c-s.woff2 +0 -0
  168. khoj/interface/compiled/_next/static/media/77c207b095007c34-s.p.woff2 +0 -0
  169. khoj/interface/compiled/_next/static/media/82ef96de0e8f4d8c-s.p.woff2 +0 -0
  170. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.1608a09b.woff +0 -0
  171. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.4aafdb68.ttf +0 -0
  172. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.a79f1c31.woff2 +0 -0
  173. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.b6770918.woff +0 -0
  174. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.cce5b8ec.ttf +0 -0
  175. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.ec17d132.woff2 +0 -0
  176. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.07ef19e7.ttf +0 -0
  177. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.55fac258.woff2 +0 -0
  178. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.dad44a7f.woff +0 -0
  179. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.9f256b85.woff +0 -0
  180. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.b18f59e1.ttf +0 -0
  181. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.d42a5579.woff2 +0 -0
  182. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.7c187121.woff +0 -0
  183. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.d3c882a6.woff2 +0 -0
  184. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.ed38e79f.ttf +0 -0
  185. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.b74a1a8b.ttf +0 -0
  186. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.c3fb5ac2.woff2 +0 -0
  187. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.d181c465.woff +0 -0
  188. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.6f2bb1df.woff2 +0 -0
  189. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.70d8b0a5.ttf +0 -0
  190. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.e3f82f9d.woff +0 -0
  191. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.47373d1e.ttf +0 -0
  192. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.8916142b.woff2 +0 -0
  193. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.9024d815.woff +0 -0
  194. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.0462f03b.woff2 +0 -0
  195. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.7f51fe03.woff +0 -0
  196. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.b7f8fe9b.ttf +0 -0
  197. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.572d331f.woff2 +0 -0
  198. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.a879cf83.ttf +0 -0
  199. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.f1035d8d.woff +0 -0
  200. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.5295ba48.woff +0 -0
  201. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.939bc644.ttf +0 -0
  202. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.f28c23ac.woff2 +0 -0
  203. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.8c5b5494.woff2 +0 -0
  204. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.94e1e8dc.ttf +0 -0
  205. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.bf59d231.woff +0 -0
  206. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.3b1e59b3.woff2 +0 -0
  207. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.7c9bc82b.woff +0 -0
  208. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.b4c20c84.ttf +0 -0
  209. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.74048478.woff +0 -0
  210. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.ba21ed5f.woff2 +0 -0
  211. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.d4d7ba48.ttf +0 -0
  212. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.03e9641d.woff2 +0 -0
  213. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.07505710.woff +0 -0
  214. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.fe9cbbe1.ttf +0 -0
  215. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.e1e279cb.woff +0 -0
  216. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.eae34984.woff2 +0 -0
  217. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.fabc004a.ttf +0 -0
  218. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.57727022.woff +0 -0
  219. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.5916a24f.woff2 +0 -0
  220. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.d6b476ec.ttf +0 -0
  221. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.9acaf01c.woff +0 -0
  222. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.a144ef58.ttf +0 -0
  223. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.b4230e7e.woff2 +0 -0
  224. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.10d95fd3.woff2 +0 -0
  225. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.7a996c9d.woff +0 -0
  226. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.fbccdabe.ttf +0 -0
  227. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.6258592b.woff +0 -0
  228. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.a8709e36.woff2 +0 -0
  229. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.d97aaf4a.ttf +0 -0
  230. khoj/interface/compiled/_next/static/media/a6ecd16fa044d500-s.p.woff2 +0 -0
  231. khoj/interface/compiled/_next/static/media/bd82c78e5b7b3fe9-s.p.woff2 +0 -0
  232. khoj/interface/compiled/_next/static/media/c32c8052c071fc42-s.woff2 +0 -0
  233. khoj/interface/compiled/_next/static/media/c4250770ab8708b6-s.p.woff2 +0 -0
  234. khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
  235. khoj/interface/compiled/_next/static/media/flags.3afdda2f.webp +0 -0
  236. khoj/interface/compiled/_next/static/media/flags@2x.5fbe9fc1.webp +0 -0
  237. khoj/interface/compiled/_next/static/media/globe.98e105ca.webp +0 -0
  238. khoj/interface/compiled/_next/static/media/globe@2x.974df6f8.webp +0 -0
  239. khoj/interface/compiled/agents/index.html +1 -0
  240. khoj/interface/compiled/agents/index.txt +7 -0
  241. khoj/interface/compiled/agents.svg +6 -0
  242. khoj/interface/compiled/assets/icons/khoj_lantern.ico +0 -0
  243. khoj/interface/compiled/assets/icons/khoj_lantern.svg +100 -0
  244. khoj/interface/compiled/assets/icons/khoj_lantern_1200x1200.png +0 -0
  245. khoj/interface/compiled/assets/icons/khoj_lantern_128x128.png +0 -0
  246. khoj/interface/compiled/assets/icons/khoj_lantern_128x128_dark.png +0 -0
  247. khoj/interface/compiled/assets/icons/khoj_lantern_256x256.png +0 -0
  248. khoj/interface/compiled/assets/icons/khoj_lantern_512x512.png +0 -0
  249. khoj/interface/compiled/assets/icons/khoj_lantern_logomarktype_1200x630.png +0 -0
  250. khoj/interface/compiled/assets/samples/desktop-browse-draw-sample.png +0 -0
  251. khoj/interface/compiled/assets/samples/desktop-plain-chat-sample.png +0 -0
  252. khoj/interface/compiled/assets/samples/desktop-remember-plan-sample.png +0 -0
  253. khoj/interface/compiled/assets/samples/phone-browse-draw-sample.png +0 -0
  254. khoj/interface/compiled/assets/samples/phone-plain-chat-sample.png +0 -0
  255. khoj/interface/compiled/assets/samples/phone-remember-plan-sample.png +0 -0
  256. khoj/interface/compiled/automation.svg +37 -0
  257. khoj/interface/compiled/automations/index.html +1 -0
  258. khoj/interface/compiled/automations/index.txt +8 -0
  259. khoj/interface/compiled/chat/index.html +1 -0
  260. khoj/interface/compiled/chat/index.txt +7 -0
  261. khoj/interface/compiled/chat.svg +24 -0
  262. khoj/interface/compiled/close.svg +5 -0
  263. khoj/interface/compiled/copy-button-success.svg +6 -0
  264. khoj/interface/compiled/copy-button.svg +5 -0
  265. khoj/interface/compiled/index.html +1 -0
  266. khoj/interface/compiled/index.txt +7 -0
  267. khoj/interface/compiled/khoj.webmanifest +76 -0
  268. khoj/interface/compiled/logo.svg +24 -0
  269. khoj/interface/compiled/search/index.html +1 -0
  270. khoj/interface/compiled/search/index.txt +7 -0
  271. khoj/interface/compiled/send.svg +1 -0
  272. khoj/interface/compiled/settings/index.html +1 -0
  273. khoj/interface/compiled/settings/index.txt +9 -0
  274. khoj/interface/compiled/share/chat/index.html +1 -0
  275. khoj/interface/compiled/share/chat/index.txt +7 -0
  276. khoj/interface/compiled/share.svg +8 -0
  277. khoj/interface/compiled/thumbs-down.svg +6 -0
  278. khoj/interface/compiled/thumbs-up.svg +6 -0
  279. khoj/interface/email/feedback.html +34 -0
  280. khoj/interface/email/magic_link.html +40 -0
  281. khoj/interface/email/task.html +37 -0
  282. khoj/interface/email/welcome.html +90 -0
  283. khoj/interface/web/.well-known/assetlinks.json +11 -0
  284. khoj/interface/web/assets/icons/agents.svg +19 -0
  285. khoj/interface/web/assets/icons/automation.svg +43 -0
  286. khoj/interface/web/assets/icons/chat.svg +24 -0
  287. khoj/interface/web/assets/icons/github.svg +1 -0
  288. khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
  289. khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
  290. khoj/interface/web/assets/icons/khoj-logo-sideways.svg +32 -0
  291. khoj/interface/web/assets/icons/khoj.svg +26 -0
  292. khoj/interface/web/assets/icons/logotype.svg +1 -0
  293. khoj/interface/web/assets/icons/search.svg +57 -0
  294. khoj/interface/web/assets/icons/sync.svg +4 -0
  295. khoj/interface/web/assets/khoj.css +237 -0
  296. khoj/interface/web/assets/utils.js +33 -0
  297. khoj/interface/web/base_config.html +445 -0
  298. khoj/interface/web/content_source_github_input.html +208 -0
  299. khoj/interface/web/login.html +310 -0
  300. khoj/interface/web/utils.html +48 -0
  301. khoj/main.py +249 -0
  302. khoj/manage.py +22 -0
  303. khoj/migrations/__init__.py +0 -0
  304. khoj/migrations/migrate_offline_chat_default_model.py +69 -0
  305. khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
  306. khoj/migrations/migrate_offline_chat_schema.py +83 -0
  307. khoj/migrations/migrate_offline_model.py +29 -0
  308. khoj/migrations/migrate_processor_config_openai.py +67 -0
  309. khoj/migrations/migrate_server_pg.py +132 -0
  310. khoj/migrations/migrate_version.py +17 -0
  311. khoj/processor/__init__.py +0 -0
  312. khoj/processor/content/__init__.py +0 -0
  313. khoj/processor/content/docx/__init__.py +0 -0
  314. khoj/processor/content/docx/docx_to_entries.py +111 -0
  315. khoj/processor/content/github/__init__.py +0 -0
  316. khoj/processor/content/github/github_to_entries.py +226 -0
  317. khoj/processor/content/images/__init__.py +0 -0
  318. khoj/processor/content/images/image_to_entries.py +117 -0
  319. khoj/processor/content/markdown/__init__.py +0 -0
  320. khoj/processor/content/markdown/markdown_to_entries.py +160 -0
  321. khoj/processor/content/notion/notion_to_entries.py +259 -0
  322. khoj/processor/content/org_mode/__init__.py +0 -0
  323. khoj/processor/content/org_mode/org_to_entries.py +226 -0
  324. khoj/processor/content/org_mode/orgnode.py +532 -0
  325. khoj/processor/content/pdf/__init__.py +0 -0
  326. khoj/processor/content/pdf/pdf_to_entries.py +119 -0
  327. khoj/processor/content/plaintext/__init__.py +0 -0
  328. khoj/processor/content/plaintext/plaintext_to_entries.py +117 -0
  329. khoj/processor/content/text_to_entries.py +296 -0
  330. khoj/processor/conversation/__init__.py +0 -0
  331. khoj/processor/conversation/anthropic/__init__.py +0 -0
  332. khoj/processor/conversation/anthropic/anthropic_chat.py +243 -0
  333. khoj/processor/conversation/anthropic/utils.py +217 -0
  334. khoj/processor/conversation/google/__init__.py +0 -0
  335. khoj/processor/conversation/google/gemini_chat.py +253 -0
  336. khoj/processor/conversation/google/utils.py +260 -0
  337. khoj/processor/conversation/offline/__init__.py +0 -0
  338. khoj/processor/conversation/offline/chat_model.py +308 -0
  339. khoj/processor/conversation/offline/utils.py +80 -0
  340. khoj/processor/conversation/offline/whisper.py +15 -0
  341. khoj/processor/conversation/openai/__init__.py +0 -0
  342. khoj/processor/conversation/openai/gpt.py +243 -0
  343. khoj/processor/conversation/openai/utils.py +232 -0
  344. khoj/processor/conversation/openai/whisper.py +13 -0
  345. khoj/processor/conversation/prompts.py +1188 -0
  346. khoj/processor/conversation/utils.py +867 -0
  347. khoj/processor/embeddings.py +122 -0
  348. khoj/processor/image/generate.py +215 -0
  349. khoj/processor/speech/__init__.py +0 -0
  350. khoj/processor/speech/text_to_speech.py +51 -0
  351. khoj/processor/tools/__init__.py +0 -0
  352. khoj/processor/tools/online_search.py +472 -0
  353. khoj/processor/tools/run_code.py +179 -0
  354. khoj/routers/__init__.py +0 -0
  355. khoj/routers/api.py +760 -0
  356. khoj/routers/api_agents.py +295 -0
  357. khoj/routers/api_chat.py +1273 -0
  358. khoj/routers/api_content.py +634 -0
  359. khoj/routers/api_model.py +123 -0
  360. khoj/routers/api_phone.py +86 -0
  361. khoj/routers/api_subscription.py +144 -0
  362. khoj/routers/auth.py +307 -0
  363. khoj/routers/email.py +135 -0
  364. khoj/routers/helpers.py +2333 -0
  365. khoj/routers/notion.py +85 -0
  366. khoj/routers/research.py +364 -0
  367. khoj/routers/storage.py +63 -0
  368. khoj/routers/twilio.py +36 -0
  369. khoj/routers/web_client.py +141 -0
  370. khoj/search_filter/__init__.py +0 -0
  371. khoj/search_filter/base_filter.py +15 -0
  372. khoj/search_filter/date_filter.py +215 -0
  373. khoj/search_filter/file_filter.py +32 -0
  374. khoj/search_filter/word_filter.py +29 -0
  375. khoj/search_type/__init__.py +0 -0
  376. khoj/search_type/text_search.py +255 -0
  377. khoj/utils/__init__.py +0 -0
  378. khoj/utils/cli.py +101 -0
  379. khoj/utils/config.py +81 -0
  380. khoj/utils/constants.py +51 -0
  381. khoj/utils/fs_syncer.py +252 -0
  382. khoj/utils/helpers.py +627 -0
  383. khoj/utils/initialization.py +301 -0
  384. khoj/utils/jsonl.py +43 -0
  385. khoj/utils/models.py +47 -0
  386. khoj/utils/rawconfig.py +208 -0
  387. khoj/utils/state.py +48 -0
  388. khoj/utils/yaml.py +47 -0
  389. khoj-1.33.3.dev32.dist-info/METADATA +190 -0
  390. khoj-1.33.3.dev32.dist-info/RECORD +393 -0
  391. khoj-1.33.3.dev32.dist-info/WHEEL +4 -0
  392. khoj-1.33.3.dev32.dist-info/entry_points.txt +2 -0
  393. khoj-1.33.3.dev32.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,867 @@
1
+ import base64
2
+ import json
3
+ import logging
4
+ import math
5
+ import mimetypes
6
+ import os
7
+ import queue
8
+ import re
9
+ import uuid
10
+ from dataclasses import dataclass
11
+ from datetime import datetime
12
+ from enum import Enum
13
+ from io import BytesIO
14
+ from time import perf_counter
15
+ from typing import Any, Callable, Dict, List, Optional
16
+
17
+ import PIL.Image
18
+ import pyjson5
19
+ import requests
20
+ import tiktoken
21
+ import yaml
22
+ from langchain.schema import ChatMessage
23
+ from llama_cpp.llama import Llama
24
+ from transformers import AutoTokenizer
25
+
26
+ from khoj.database.adapters import ConversationAdapters
27
+ from khoj.database.models import ChatModel, ClientApplication, KhojUser
28
+ from khoj.processor.conversation import prompts
29
+ from khoj.processor.conversation.offline.utils import download_model, infer_max_tokens
30
+ from khoj.search_filter.base_filter import BaseFilter
31
+ from khoj.search_filter.date_filter import DateFilter
32
+ from khoj.search_filter.file_filter import FileFilter
33
+ from khoj.search_filter.word_filter import WordFilter
34
+ from khoj.utils import state
35
+ from khoj.utils.helpers import (
36
+ ConversationCommand,
37
+ is_none_or_empty,
38
+ is_promptrace_enabled,
39
+ merge_dicts,
40
+ )
41
+ from khoj.utils.rawconfig import FileAttachment
42
+ from khoj.utils.yaml import yaml_dump
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+ try:
47
+ from git import Repo
48
+ except ImportError:
49
+ if is_promptrace_enabled():
50
+ logger.warning("GitPython not installed. `pip install gitpython` to use prompt tracer.")
51
+
52
+ model_to_prompt_size = {
53
+ # OpenAI Models
54
+ "gpt-4o": 60000,
55
+ "gpt-4o-mini": 60000,
56
+ "o1": 20000,
57
+ "o1-mini": 60000,
58
+ # Google Models
59
+ "gemini-1.5-flash": 60000,
60
+ "gemini-1.5-pro": 60000,
61
+ # Anthropic Models
62
+ "claude-3-5-sonnet-20241022": 60000,
63
+ "claude-3-5-haiku-20241022": 60000,
64
+ # Offline Models
65
+ "Qwen/Qwen2.5-14B-Instruct-GGUF": 20000,
66
+ "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
67
+ "bartowski/Llama-3.2-3B-Instruct-GGUF": 20000,
68
+ "bartowski/gemma-2-9b-it-GGUF": 6000,
69
+ "bartowski/gemma-2-2b-it-GGUF": 6000,
70
+ }
71
+ model_to_tokenizer: Dict[str, str] = {}
72
+
73
+
74
+ class ThreadedGenerator:
75
+ def __init__(self, compiled_references, online_results, completion_func=None):
76
+ self.queue = queue.Queue()
77
+ self.compiled_references = compiled_references
78
+ self.online_results = online_results
79
+ self.completion_func = completion_func
80
+ self.response = ""
81
+ self.start_time = perf_counter()
82
+
83
+ def __iter__(self):
84
+ return self
85
+
86
+ def __next__(self):
87
+ item = self.queue.get()
88
+ if item is StopIteration:
89
+ time_to_response = perf_counter() - self.start_time
90
+ logger.info(f"Chat streaming took: {time_to_response:.3f} seconds")
91
+ if self.completion_func:
92
+ # The completion func effectively acts as a callback.
93
+ # It adds the aggregated response to the conversation history.
94
+ self.completion_func(chat_response=self.response)
95
+ raise StopIteration
96
+ return item
97
+
98
+ def send(self, data):
99
+ if self.response == "":
100
+ time_to_first_response = perf_counter() - self.start_time
101
+ logger.info(f"First response took: {time_to_first_response:.3f} seconds")
102
+
103
+ self.response += data
104
+ self.queue.put(data)
105
+
106
+ def close(self):
107
+ self.queue.put(StopIteration)
108
+
109
+
110
+ class InformationCollectionIteration:
111
+ def __init__(
112
+ self,
113
+ tool: str,
114
+ query: str,
115
+ context: list = None,
116
+ onlineContext: dict = None,
117
+ codeContext: dict = None,
118
+ summarizedResult: str = None,
119
+ warning: str = None,
120
+ ):
121
+ self.tool = tool
122
+ self.query = query
123
+ self.context = context
124
+ self.onlineContext = onlineContext
125
+ self.codeContext = codeContext
126
+ self.summarizedResult = summarizedResult
127
+ self.warning = warning
128
+
129
+
130
+ def construct_iteration_history(
131
+ previous_iterations: List[InformationCollectionIteration], previous_iteration_prompt: str
132
+ ) -> str:
133
+ previous_iterations_history = ""
134
+ for idx, iteration in enumerate(previous_iterations):
135
+ iteration_data = previous_iteration_prompt.format(
136
+ tool=iteration.tool,
137
+ query=iteration.query,
138
+ result=iteration.summarizedResult,
139
+ index=idx + 1,
140
+ )
141
+
142
+ previous_iterations_history += iteration_data
143
+ return previous_iterations_history
144
+
145
+
146
+ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="AI") -> str:
147
+ chat_history = ""
148
+ for chat in conversation_history.get("chat", [])[-n:]:
149
+ if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder", "summarize"]:
150
+ chat_history += f"User: {chat['intent']['query']}\n"
151
+
152
+ if chat["intent"].get("inferred-queries"):
153
+ chat_history += f'{agent_name}: {{"queries": {chat["intent"].get("inferred-queries")}}}\n'
154
+
155
+ chat_history += f"{agent_name}: {chat['message']}\n\n"
156
+ elif chat["by"] == "khoj" and chat.get("images"):
157
+ chat_history += f"User: {chat['intent']['query']}\n"
158
+ chat_history += f"{agent_name}: [generated image redacted for space]\n"
159
+ elif chat["by"] == "khoj" and ("excalidraw" in chat["intent"].get("type")):
160
+ chat_history += f"User: {chat['intent']['query']}\n"
161
+ chat_history += f"{agent_name}: {chat['intent']['inferred-queries'][0]}\n"
162
+ elif chat["by"] == "you":
163
+ raw_query_files = chat.get("queryFiles")
164
+ if raw_query_files:
165
+ query_files: Dict[str, str] = {}
166
+ for file in raw_query_files:
167
+ query_files[file["name"]] = file["content"]
168
+
169
+ query_file_context = gather_raw_query_files(query_files)
170
+ chat_history += f"User: {query_file_context}\n"
171
+
172
+ return chat_history
173
+
174
+
175
+ def construct_tool_chat_history(
176
+ previous_iterations: List[InformationCollectionIteration], tool: ConversationCommand = None
177
+ ) -> Dict[str, list]:
178
+ chat_history: list = []
179
+ inferred_query_extractor: Callable[[InformationCollectionIteration], List[str]] = lambda x: []
180
+ if tool == ConversationCommand.Notes:
181
+ inferred_query_extractor = (
182
+ lambda iteration: [c["query"] for c in iteration.context] if iteration.context else []
183
+ )
184
+ elif tool == ConversationCommand.Online:
185
+ inferred_query_extractor = (
186
+ lambda iteration: list(iteration.onlineContext.keys()) if iteration.onlineContext else []
187
+ )
188
+ elif tool == ConversationCommand.Code:
189
+ inferred_query_extractor = lambda iteration: list(iteration.codeContext.keys()) if iteration.codeContext else []
190
+ for iteration in previous_iterations:
191
+ chat_history += [
192
+ {
193
+ "by": "you",
194
+ "message": iteration.query,
195
+ },
196
+ {
197
+ "by": "khoj",
198
+ "intent": {
199
+ "type": "remember",
200
+ "inferred-queries": inferred_query_extractor(iteration),
201
+ "query": iteration.query,
202
+ },
203
+ "message": iteration.summarizedResult,
204
+ },
205
+ ]
206
+
207
+ return {"chat": chat_history}
208
+
209
+
210
+ class ChatEvent(Enum):
211
+ START_LLM_RESPONSE = "start_llm_response"
212
+ END_LLM_RESPONSE = "end_llm_response"
213
+ MESSAGE = "message"
214
+ REFERENCES = "references"
215
+ GENERATED_ASSETS = "generated_assets"
216
+ STATUS = "status"
217
+ METADATA = "metadata"
218
+ USAGE = "usage"
219
+ END_RESPONSE = "end_response"
220
+
221
+
222
+ def message_to_log(
223
+ user_message,
224
+ chat_response,
225
+ user_message_metadata={},
226
+ khoj_message_metadata={},
227
+ conversation_log=[],
228
+ ):
229
+ """Create json logs from messages, metadata for conversation log"""
230
+ default_khoj_message_metadata = {
231
+ "intent": {"type": "remember", "memory-type": "notes", "query": user_message},
232
+ "trigger-emotion": "calm",
233
+ }
234
+ khoj_response_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
235
+
236
+ # Filter out any fields that are set to None
237
+ user_message_metadata = {k: v for k, v in user_message_metadata.items() if v is not None}
238
+ khoj_message_metadata = {k: v for k, v in khoj_message_metadata.items() if v is not None}
239
+
240
+ # Create json log from Human's message
241
+ human_log = merge_dicts({"message": user_message, "by": "you"}, user_message_metadata)
242
+
243
+ # Create json log from GPT's response
244
+ khoj_log = merge_dicts(khoj_message_metadata, default_khoj_message_metadata)
245
+ khoj_log = merge_dicts({"message": chat_response, "by": "khoj", "created": khoj_response_time}, khoj_log)
246
+
247
+ conversation_log.extend([human_log, khoj_log])
248
+ return conversation_log
249
+
250
+
251
+ def save_to_conversation_log(
252
+ q: str,
253
+ chat_response: str,
254
+ user: KhojUser,
255
+ meta_log: Dict,
256
+ user_message_time: str = None,
257
+ compiled_references: List[Dict[str, Any]] = [],
258
+ online_results: Dict[str, Any] = {},
259
+ code_results: Dict[str, Any] = {},
260
+ inferred_queries: List[str] = [],
261
+ intent_type: str = "remember",
262
+ client_application: ClientApplication = None,
263
+ conversation_id: str = None,
264
+ automation_id: str = None,
265
+ query_images: List[str] = None,
266
+ raw_query_files: List[FileAttachment] = [],
267
+ generated_images: List[str] = [],
268
+ raw_generated_files: List[FileAttachment] = [],
269
+ generated_excalidraw_diagram: str = None,
270
+ train_of_thought: List[Any] = [],
271
+ tracer: Dict[str, Any] = {},
272
+ ):
273
+ user_message_time = user_message_time or datetime.now().strftime("%Y-%m-%d %H:%M:%S")
274
+ turn_id = tracer.get("mid") or str(uuid.uuid4())
275
+
276
+ user_message_metadata = {"created": user_message_time, "images": query_images, "turnId": turn_id}
277
+
278
+ if raw_query_files and len(raw_query_files) > 0:
279
+ user_message_metadata["queryFiles"] = [file.model_dump(mode="json") for file in raw_query_files]
280
+
281
+ khoj_message_metadata = {
282
+ "context": compiled_references,
283
+ "intent": {"inferred-queries": inferred_queries, "type": intent_type},
284
+ "onlineContext": online_results,
285
+ "codeContext": code_results,
286
+ "automationId": automation_id,
287
+ "trainOfThought": train_of_thought,
288
+ "turnId": turn_id,
289
+ "images": generated_images,
290
+ "queryFiles": [file.model_dump(mode="json") for file in raw_generated_files],
291
+ }
292
+
293
+ if generated_excalidraw_diagram:
294
+ khoj_message_metadata["excalidrawDiagram"] = generated_excalidraw_diagram
295
+
296
+ updated_conversation = message_to_log(
297
+ user_message=q,
298
+ chat_response=chat_response,
299
+ user_message_metadata=user_message_metadata,
300
+ khoj_message_metadata=khoj_message_metadata,
301
+ conversation_log=meta_log.get("chat", []),
302
+ )
303
+ ConversationAdapters.save_conversation(
304
+ user,
305
+ {"chat": updated_conversation},
306
+ client_application=client_application,
307
+ conversation_id=conversation_id,
308
+ user_message=q,
309
+ )
310
+
311
+ if is_promptrace_enabled():
312
+ merge_message_into_conversation_trace(q, chat_response, tracer)
313
+
314
+ logger.info(
315
+ f"""
316
+ Saved Conversation Turn
317
+ You ({user.username}): "{q}"
318
+
319
+ Khoj: "{chat_response}"
320
+ """.strip()
321
+ )
322
+
323
+
324
+ def construct_structured_message(
325
+ message: str, images: list[str], model_type: str, vision_enabled: bool, attached_file_context: str = None
326
+ ):
327
+ """
328
+ Format messages into appropriate multimedia format for supported chat model types
329
+ """
330
+ if model_type in [
331
+ ChatModel.ModelType.OPENAI,
332
+ ChatModel.ModelType.GOOGLE,
333
+ ChatModel.ModelType.ANTHROPIC,
334
+ ]:
335
+ if not attached_file_context and not (vision_enabled and images):
336
+ return message
337
+
338
+ constructed_messages: List[Any] = [{"type": "text", "text": message}]
339
+
340
+ if not is_none_or_empty(attached_file_context):
341
+ constructed_messages.append({"type": "text", "text": attached_file_context})
342
+ if vision_enabled and images:
343
+ for image in images:
344
+ if image.startswith("https://"):
345
+ constructed_messages.append({"type": "image_url", "image_url": {"url": image}})
346
+ return constructed_messages
347
+
348
+ if not is_none_or_empty(attached_file_context):
349
+ return f"{attached_file_context}\n\n{message}"
350
+
351
+ return message
352
+
353
+
354
+ def gather_raw_query_files(
355
+ query_files: Dict[str, str],
356
+ ):
357
+ """
358
+ Gather contextual data from the given (raw) files
359
+ """
360
+
361
+ if len(query_files) == 0:
362
+ return ""
363
+
364
+ contextual_data = " ".join(
365
+ [f"File: {file_name}\n\n{file_content}\n\n" for file_name, file_content in query_files.items()]
366
+ )
367
+ return f"I have attached the following files:\n\n{contextual_data}"
368
+
369
+
370
+ def generate_chatml_messages_with_context(
371
+ user_message,
372
+ system_message=None,
373
+ conversation_log={},
374
+ model_name="gpt-4o-mini",
375
+ loaded_model: Optional[Llama] = None,
376
+ max_prompt_size=None,
377
+ tokenizer_name=None,
378
+ query_images=None,
379
+ vision_enabled=False,
380
+ model_type="",
381
+ context_message="",
382
+ query_files: str = None,
383
+ generated_files: List[FileAttachment] = None,
384
+ generated_asset_results: Dict[str, Dict] = {},
385
+ program_execution_context: List[str] = [],
386
+ ):
387
+ """Generate chat messages with appropriate context from previous conversation to send to the chat model"""
388
+ # Set max prompt size from user config or based on pre-configured for model and machine specs
389
+ if not max_prompt_size:
390
+ if loaded_model:
391
+ max_prompt_size = infer_max_tokens(loaded_model.n_ctx(), model_to_prompt_size.get(model_name, math.inf))
392
+ else:
393
+ max_prompt_size = model_to_prompt_size.get(model_name, 10000)
394
+
395
+ # Scale lookback turns proportional to max prompt size supported by model
396
+ lookback_turns = max_prompt_size // 750
397
+
398
+ # Extract Chat History for Context
399
+ chatml_messages: List[ChatMessage] = []
400
+ for chat in conversation_log.get("chat", []):
401
+ message_context = ""
402
+ message_attached_files = ""
403
+
404
+ generated_assets = {}
405
+
406
+ chat_message = chat.get("message")
407
+ role = "user" if chat["by"] == "you" else "assistant"
408
+
409
+ # Legacy code to handle excalidraw diagrams prior to Dec 2024
410
+ if chat["by"] == "khoj" and "excalidraw" in chat["intent"].get("type", ""):
411
+ chat_message = chat["intent"].get("inferred-queries")[0]
412
+
413
+ if not is_none_or_empty(chat.get("context")):
414
+ references = "\n\n".join(
415
+ {
416
+ f"# File: {item['file']}\n## {item['compiled']}\n"
417
+ for item in chat.get("context") or []
418
+ if isinstance(item, dict)
419
+ }
420
+ )
421
+ message_context += f"{prompts.notes_conversation.format(references=references)}\n\n"
422
+
423
+ if chat.get("queryFiles"):
424
+ raw_query_files = chat.get("queryFiles")
425
+ query_files_dict = dict()
426
+ for file in raw_query_files:
427
+ query_files_dict[file["name"]] = file["content"]
428
+
429
+ message_attached_files = gather_raw_query_files(query_files_dict)
430
+ chatml_messages.append(ChatMessage(content=message_attached_files, role=role))
431
+
432
+ if not is_none_or_empty(chat.get("onlineContext")):
433
+ message_context += f"{prompts.online_search_conversation.format(online_results=chat.get('onlineContext'))}"
434
+
435
+ if not is_none_or_empty(message_context):
436
+ reconstructed_context_message = ChatMessage(content=message_context, role="user")
437
+ chatml_messages.insert(0, reconstructed_context_message)
438
+
439
+ if not is_none_or_empty(chat.get("images")) and role == "assistant":
440
+ generated_assets["image"] = {
441
+ "query": chat.get("intent", {}).get("inferred-queries", [user_message])[0],
442
+ }
443
+
444
+ if not is_none_or_empty(chat.get("excalidrawDiagram")) and role == "assistant":
445
+ generated_assets["diagram"] = {
446
+ "query": chat.get("intent", {}).get("inferred-queries", [user_message])[0],
447
+ }
448
+
449
+ if not is_none_or_empty(generated_assets):
450
+ chatml_messages.append(
451
+ ChatMessage(
452
+ content=f"{prompts.generated_assets_context.format(generated_assets=yaml_dump(generated_assets))}\n",
453
+ role="user",
454
+ )
455
+ )
456
+
457
+ message_content = construct_structured_message(
458
+ chat_message, chat.get("images") if role == "user" else [], model_type, vision_enabled
459
+ )
460
+
461
+ reconstructed_message = ChatMessage(content=message_content, role=role)
462
+ chatml_messages.insert(0, reconstructed_message)
463
+
464
+ if len(chatml_messages) >= 3 * lookback_turns:
465
+ break
466
+
467
+ messages = []
468
+
469
+ if not is_none_or_empty(generated_asset_results):
470
+ messages.append(
471
+ ChatMessage(
472
+ content=f"{prompts.generated_assets_context.format(generated_assets=yaml_dump(generated_asset_results))}\n\n",
473
+ role="user",
474
+ )
475
+ )
476
+
477
+ if not is_none_or_empty(user_message):
478
+ messages.append(
479
+ ChatMessage(
480
+ content=construct_structured_message(
481
+ user_message, query_images, model_type, vision_enabled, query_files
482
+ ),
483
+ role="user",
484
+ )
485
+ )
486
+
487
+ if generated_files:
488
+ message_attached_files = gather_raw_query_files({file.name: file.content for file in generated_files})
489
+ messages.append(ChatMessage(content=message_attached_files, role="assistant"))
490
+
491
+ if program_execution_context:
492
+ program_context_text = "\n".join(program_execution_context)
493
+ context_message += f"{prompts.additional_program_context.format(context=program_context_text)}\n"
494
+
495
+ if not is_none_or_empty(context_message):
496
+ messages.append(ChatMessage(content=context_message, role="user"))
497
+
498
+ if len(chatml_messages) > 0:
499
+ messages += chatml_messages
500
+
501
+ if not is_none_or_empty(system_message):
502
+ messages.append(ChatMessage(content=system_message, role="system"))
503
+
504
+ # Truncate oldest messages from conversation history until under max supported prompt size by model
505
+ messages = truncate_messages(messages, max_prompt_size, model_name, loaded_model, tokenizer_name)
506
+
507
+ # Return message in chronological order
508
+ return messages[::-1]
509
+
510
+
511
+ def truncate_messages(
512
+ messages: list[ChatMessage],
513
+ max_prompt_size: int,
514
+ model_name: str,
515
+ loaded_model: Optional[Llama] = None,
516
+ tokenizer_name=None,
517
+ ) -> list[ChatMessage]:
518
+ """Truncate messages to fit within max prompt size supported by model"""
519
+ default_tokenizer = "gpt-4o"
520
+
521
+ try:
522
+ if loaded_model:
523
+ encoder = loaded_model.tokenizer()
524
+ elif model_name.startswith("gpt-") or model_name.startswith("o1"):
525
+ # as tiktoken doesn't recognize o1 model series yet
526
+ encoder = tiktoken.encoding_for_model("gpt-4o" if model_name.startswith("o1") else model_name)
527
+ elif tokenizer_name:
528
+ if tokenizer_name in state.pretrained_tokenizers:
529
+ encoder = state.pretrained_tokenizers[tokenizer_name]
530
+ else:
531
+ encoder = AutoTokenizer.from_pretrained(tokenizer_name)
532
+ state.pretrained_tokenizers[tokenizer_name] = encoder
533
+ else:
534
+ encoder = download_model(model_name).tokenizer()
535
+ except:
536
+ encoder = tiktoken.encoding_for_model(default_tokenizer)
537
+ logger.debug(
538
+ f"Fallback to default chat model tokenizer: {default_tokenizer}.\nConfigure tokenizer for model: {model_name} in Khoj settings to improve context stuffing."
539
+ )
540
+
541
+ # Extract system message from messages
542
+ system_message = None
543
+ for idx, message in enumerate(messages):
544
+ if message.role == "system":
545
+ system_message = messages.pop(idx)
546
+ break
547
+
548
+ # TODO: Handle truncation of multi-part message.content, i.e when message.content is a list[dict] rather than a string
549
+ system_message_tokens = (
550
+ len(encoder.encode(system_message.content)) if system_message and type(system_message.content) == str else 0
551
+ )
552
+
553
+ tokens = sum([len(encoder.encode(message.content)) for message in messages if type(message.content) == str])
554
+
555
+ # Drop older messages until under max supported prompt size by model
556
+ # Reserves 4 tokens to demarcate each message (e.g <|im_start|>user, <|im_end|>, <|endoftext|> etc.)
557
+ while (tokens + system_message_tokens + 4 * len(messages)) > max_prompt_size and len(messages) > 1:
558
+ messages.pop()
559
+ tokens = sum([len(encoder.encode(message.content)) for message in messages if type(message.content) == str])
560
+
561
+ # Truncate current message if still over max supported prompt size by model
562
+ if (tokens + system_message_tokens) > max_prompt_size:
563
+ current_message = "\n".join(messages[0].content.split("\n")[:-1]) if type(messages[0].content) == str else ""
564
+ original_question = "\n".join(messages[0].content.split("\n")[-1:]) if type(messages[0].content) == str else ""
565
+ original_question = f"\n{original_question}"
566
+ original_question_tokens = len(encoder.encode(original_question))
567
+ remaining_tokens = max_prompt_size - system_message_tokens
568
+ if remaining_tokens > original_question_tokens:
569
+ remaining_tokens -= original_question_tokens
570
+ truncated_message = encoder.decode(encoder.encode(current_message)[:remaining_tokens]).strip()
571
+ messages = [ChatMessage(content=truncated_message + original_question, role=messages[0].role)]
572
+ else:
573
+ truncated_message = encoder.decode(encoder.encode(original_question)[:remaining_tokens]).strip()
574
+ messages = [ChatMessage(content=truncated_message, role=messages[0].role)]
575
+ logger.debug(
576
+ f"Truncate current message to fit within max prompt size of {max_prompt_size} supported by {model_name} model:\n {truncated_message[:1000]}..."
577
+ )
578
+
579
+ if system_message:
580
+ # Default system message role is system.
581
+ # Fallback to system message role of user for models that do not support this role like gemma-2 and openai's o1 model series.
582
+ system_message.role = "user" if "gemma-2" in model_name or model_name.startswith("o1") else "system"
583
+ return messages + [system_message] if system_message else messages
584
+
585
+
586
+ def reciprocal_conversation_to_chatml(message_pair):
587
+ """Convert a single back and forth between user and assistant to chatml format"""
588
+ return [ChatMessage(content=message, role=role) for message, role in zip(message_pair, ["user", "assistant"])]
589
+
590
+
591
+ def clean_json(response: str):
592
+ """Remove any markdown json codeblock and newline formatting if present. Useful for non schema enforceable models"""
593
+ return response.strip().replace("\n", "").removeprefix("```json").removesuffix("```")
594
+
595
+
596
+ def clean_code_python(code: str):
597
+ """Remove any markdown codeblock and newline formatting if present. Useful for non schema enforceable models"""
598
+ return code.strip().removeprefix("```python").removesuffix("```")
599
+
600
+
601
+ def load_complex_json(json_str):
602
+ """
603
+ Preprocess a raw JSON string to escape unescaped double quotes within value strings,
604
+ while preserving the JSON structure and already escaped quotes.
605
+ """
606
+
607
+ def replace_unescaped_quotes(match):
608
+ # Get the content between colons and commas/end braces
609
+ content = match.group(1)
610
+ # Replace unescaped double, single quotes that aren't already escaped
611
+ # Uses negative lookbehind to avoid replacing already escaped quotes
612
+ # Replace " with \"
613
+ processed_dq = re.sub(r'(?<!\\)"', '\\"', content)
614
+ # Replace \' with \\'
615
+ processed_final = re.sub(r"(?<!\\)\\'", r"\\\\'", processed_dq)
616
+ return f': "{processed_final}"'
617
+
618
+ # Match content between : and either , or }
619
+ # This pattern looks for ': ' followed by any characters until , or }
620
+ pattern = r':\s*"(.*?)(?<!\\)"(?=[,}])'
621
+
622
+ # Process the JSON string
623
+ cleaned = clean_json(rf"{json_str}")
624
+ processed = re.sub(pattern, replace_unescaped_quotes, cleaned)
625
+
626
+ # See which json loader can load the processed JSON as valid
627
+ errors = []
628
+ json_loaders_to_try = [json.loads, pyjson5.loads]
629
+ for loads in json_loaders_to_try:
630
+ try:
631
+ return loads(processed)
632
+ except (json.JSONDecodeError, pyjson5.Json5Exception) as e:
633
+ errors.append(f"{type(e).__name__}: {str(e)}")
634
+
635
+ # If all loaders fail, raise the aggregated error
636
+ raise ValueError(
637
+ f"Failed to load JSON with errors: {'; '.join(errors)}\n\n"
638
+ f"While attempting to load this cleaned JSON:\n{processed}"
639
+ )
640
+
641
+
642
+ def defilter_query(query: str):
643
+ """Remove any query filters in query"""
644
+ defiltered_query = query
645
+ filters: List[BaseFilter] = [WordFilter(), FileFilter(), DateFilter()]
646
+ for filter in filters:
647
+ defiltered_query = filter.defilter(defiltered_query)
648
+ return defiltered_query
649
+
650
+
651
+ @dataclass
652
+ class ImageWithType:
653
+ content: Any
654
+ type: str
655
+
656
+
657
+ def get_image_from_url(image_url: str, type="pil"):
658
+ try:
659
+ response = requests.get(image_url)
660
+ response.raise_for_status() # Check if the request was successful
661
+
662
+ # Get content type from response or infer from URL
663
+ content_type = response.headers.get("content-type") or mimetypes.guess_type(image_url)[0] or "image/webp"
664
+
665
+ # Convert image to desired format
666
+ if type == "b64":
667
+ image_data = base64.b64encode(response.content).decode("utf-8")
668
+ elif type == "pil":
669
+ image_data = PIL.Image.open(BytesIO(response.content))
670
+ else:
671
+ raise ValueError(f"Invalid image type: {type}")
672
+
673
+ return ImageWithType(content=image_data, type=content_type)
674
+ except requests.exceptions.RequestException as e:
675
+ logger.error(f"Failed to get image from URL {image_url}: {e}")
676
+ return ImageWithType(content=None, type=None)
677
+
678
+
679
+ def commit_conversation_trace(
680
+ session: list[ChatMessage],
681
+ response: str | list[dict],
682
+ tracer: dict,
683
+ system_message: str | list[dict] = "",
684
+ repo_path: str = None,
685
+ ) -> str:
686
+ """
687
+ Save trace of conversation step using git. Useful to visualize, compare and debug traces.
688
+ Returns the path to the repository.
689
+ """
690
+ try:
691
+ from git import Repo
692
+ except ImportError:
693
+ return None
694
+
695
+ # Infer repository path from environment variable or provided path
696
+ repo_path = repo_path if not is_none_or_empty(repo_path) else os.getenv("PROMPTRACE_DIR")
697
+ if not repo_path:
698
+ return None
699
+
700
+ # Serialize session, system message and response to yaml
701
+ system_message_yaml = json.dumps(system_message, ensure_ascii=False, sort_keys=False)
702
+ response_yaml = json.dumps(response, ensure_ascii=False, sort_keys=False)
703
+ formatted_session = [{"role": message.role, "content": message.content} for message in session]
704
+ session_yaml = json.dumps(formatted_session, ensure_ascii=False, sort_keys=False)
705
+ query = (
706
+ json.dumps(session[-1].content, ensure_ascii=False, sort_keys=False).strip().removeprefix("'").removesuffix("'")
707
+ ) # Extract serialized query from chat session
708
+
709
+ # Extract chat metadata for session
710
+ uid, cid, mid = tracer.get("uid", "main"), tracer.get("cid", "main"), tracer.get("mid")
711
+
712
+ try:
713
+ # Prepare git repository
714
+ os.makedirs(repo_path, exist_ok=True)
715
+ repo = Repo.init(repo_path)
716
+
717
+ # Remove post-commit hook if it exists
718
+ hooks_dir = os.path.join(repo_path, ".git", "hooks")
719
+ post_commit_hook = os.path.join(hooks_dir, "post-commit")
720
+ if os.path.exists(post_commit_hook):
721
+ os.remove(post_commit_hook)
722
+
723
+ # Configure git user if not set
724
+ if not repo.config_reader().has_option("user", "email"):
725
+ repo.config_writer().set_value("user", "name", "Prompt Tracer").release()
726
+ repo.config_writer().set_value("user", "email", "promptracer@khoj.dev").release()
727
+
728
+ # Create an initial commit if the repository is newly created
729
+ if not repo.head.is_valid():
730
+ repo.index.commit("And then there was a trace")
731
+
732
+ # Check out the initial commit
733
+ initial_commit = repo.commit("HEAD~0")
734
+ repo.head.reference = initial_commit
735
+ repo.head.reset(index=True, working_tree=True)
736
+
737
+ # Create or switch to user branch from initial commit
738
+ user_branch = f"u_{uid}"
739
+ if user_branch not in repo.branches:
740
+ repo.create_head(user_branch)
741
+ repo.heads[user_branch].checkout()
742
+
743
+ # Create or switch to conversation branch from user branch
744
+ conv_branch = f"c_{cid}"
745
+ if conv_branch not in repo.branches:
746
+ repo.create_head(conv_branch)
747
+ repo.heads[conv_branch].checkout()
748
+
749
+ # Create or switch to message branch from conversation branch
750
+ msg_branch = f"m_{mid}" if mid else None
751
+ if msg_branch and msg_branch not in repo.branches:
752
+ repo.create_head(msg_branch)
753
+ if msg_branch:
754
+ repo.heads[msg_branch].checkout()
755
+
756
+ # Include file with content to commit
757
+ files_to_commit = {"query": session_yaml, "response": response_yaml, "system_prompt": system_message_yaml}
758
+
759
+ # Write files and stage them
760
+ for filename, content in files_to_commit.items():
761
+ file_path = os.path.join(repo_path, filename)
762
+ # Unescape special characters in content for better readability
763
+ content = content.strip().replace("\\n", "\n").replace("\\t", "\t")
764
+ with open(file_path, "w", encoding="utf-8") as f:
765
+ f.write(content)
766
+ repo.index.add([filename])
767
+
768
+ # Create commit
769
+ metadata_yaml = yaml.dump(tracer, allow_unicode=True, sort_keys=False, default_flow_style=False)
770
+ commit_message = f"""
771
+ {query[:250]}
772
+
773
+ Response:
774
+ ---
775
+ {response[:500]}...
776
+
777
+ Metadata
778
+ ---
779
+ {metadata_yaml}
780
+ """.strip()
781
+
782
+ repo.index.commit(commit_message)
783
+
784
+ logger.debug(f"Saved conversation trace to repo at {repo_path}")
785
+ return repo_path
786
+ except Exception as e:
787
+ logger.error(f"Failed to add conversation trace to repo: {str(e)}", exc_info=True)
788
+ return None
789
+
790
+
791
+ def merge_message_into_conversation_trace(query: str, response: str, tracer: dict, repo_path=None) -> bool:
792
+ """
793
+ Merge the message branch into its parent conversation branch.
794
+
795
+ Args:
796
+ query: User query
797
+ response: Assistant response
798
+ tracer: Dictionary containing uid, cid and mid
799
+ repo_path: Path to the git repository
800
+
801
+ Returns:
802
+ bool: True if merge was successful, False otherwise
803
+ """
804
+ try:
805
+ from git import Repo
806
+ except ImportError:
807
+ return False
808
+ try:
809
+ # Extract branch names
810
+ msg_branch = f"m_{tracer['mid']}"
811
+ conv_branch = f"c_{tracer['cid']}"
812
+
813
+ # Infer repository path from environment variable or provided path
814
+ repo_path = repo_path if not is_none_or_empty(repo_path) else os.getenv("PROMPTRACE_DIR")
815
+ if not repo_path:
816
+ return None
817
+ repo = Repo(repo_path)
818
+
819
+ # Checkout conversation branch
820
+ repo.heads[conv_branch].checkout()
821
+
822
+ # Create commit message
823
+ metadata_yaml = yaml.dump(tracer, allow_unicode=True, sort_keys=False, default_flow_style=False)
824
+ commit_message = f"""
825
+ {query[:250]}
826
+
827
+ Response:
828
+ ---
829
+ {response[:500]}...
830
+
831
+ Metadata
832
+ ---
833
+ {metadata_yaml}
834
+ """.strip()
835
+
836
+ # Merge message branch into conversation branch
837
+ repo.git.merge(msg_branch, no_ff=True, m=commit_message)
838
+
839
+ # Delete message branch after merge
840
+ repo.delete_head(msg_branch, force=True)
841
+
842
+ logger.debug(f"Successfully merged {msg_branch} into {conv_branch}")
843
+ return True
844
+ except Exception as e:
845
+ logger.error(f"Failed to merge message {msg_branch} into conversation {conv_branch}: {str(e)}", exc_info=True)
846
+ return False
847
+
848
+
849
+ def messages_to_print(messages: list[ChatMessage], max_length: int = 70) -> str:
850
+ """
851
+ Format and truncate messages to print, ensuring JSON serializable content
852
+ """
853
+
854
+ def safe_serialize(content: Any) -> str:
855
+ try:
856
+ # Try JSON serialization
857
+ json.dumps(content)
858
+ return content
859
+ except (TypeError, json.JSONDecodeError):
860
+ # Handle non-serializable types
861
+ if hasattr(content, "format") and content.format == "WEBP":
862
+ return "[WebP Image]"
863
+ elif hasattr(content, "__dict__"):
864
+ return str(content.__dict__)
865
+ return str(content)
866
+
867
+ return "\n".join([f"{json.dumps(safe_serialize(message.content))[:max_length]}..." for message in messages])