khoj 1.33.3.dev32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. khoj/__init__.py +0 -0
  2. khoj/app/README.md +94 -0
  3. khoj/app/__init__.py +0 -0
  4. khoj/app/asgi.py +16 -0
  5. khoj/app/settings.py +218 -0
  6. khoj/app/urls.py +25 -0
  7. khoj/configure.py +452 -0
  8. khoj/database/__init__.py +0 -0
  9. khoj/database/adapters/__init__.py +1821 -0
  10. khoj/database/admin.py +417 -0
  11. khoj/database/apps.py +6 -0
  12. khoj/database/management/__init__.py +0 -0
  13. khoj/database/management/commands/__init__.py +0 -0
  14. khoj/database/management/commands/change_default_model.py +116 -0
  15. khoj/database/management/commands/change_generated_images_url.py +61 -0
  16. khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
  17. khoj/database/migrations/0001_khojuser.py +98 -0
  18. khoj/database/migrations/0002_googleuser.py +32 -0
  19. khoj/database/migrations/0003_vector_extension.py +10 -0
  20. khoj/database/migrations/0004_content_types_and_more.py +181 -0
  21. khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
  22. khoj/database/migrations/0006_embeddingsdates.py +33 -0
  23. khoj/database/migrations/0007_add_conversation.py +27 -0
  24. khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
  25. khoj/database/migrations/0009_khojapiuser.py +24 -0
  26. khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
  27. khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
  28. khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
  29. khoj/database/migrations/0012_entry_file_source.py +21 -0
  30. khoj/database/migrations/0013_subscription.py +37 -0
  31. khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
  32. khoj/database/migrations/0015_alter_subscription_user.py +21 -0
  33. khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
  34. khoj/database/migrations/0017_searchmodel.py +32 -0
  35. khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
  36. khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
  37. khoj/database/migrations/0020_reflectivequestion.py +36 -0
  38. khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
  39. khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
  40. khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
  41. khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
  42. khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
  43. khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
  44. khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
  45. khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
  46. khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
  47. khoj/database/migrations/0029_userrequests.py +27 -0
  48. khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
  49. khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
  50. khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
  51. khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
  52. khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
  53. khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
  54. khoj/database/migrations/0035_processlock.py +26 -0
  55. khoj/database/migrations/0036_alter_processlock_name.py +19 -0
  56. khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
  57. khoj/database/migrations/0036_publicconversation.py +42 -0
  58. khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
  59. khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
  60. khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
  61. khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
  62. khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
  63. khoj/database/migrations/0040_alter_processlock_name.py +26 -0
  64. khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
  65. khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
  66. khoj/database/migrations/0042_serverchatsettings.py +46 -0
  67. khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
  68. khoj/database/migrations/0044_conversation_file_filters.py +17 -0
  69. khoj/database/migrations/0045_fileobject.py +37 -0
  70. khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
  71. khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
  72. khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
  73. khoj/database/migrations/0049_datastore.py +38 -0
  74. khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
  75. khoj/database/migrations/0050_alter_processlock_name.py +25 -0
  76. khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
  77. khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
  78. khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
  79. khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
  80. khoj/database/migrations/0055_alter_agent_style_icon.py +37 -0
  81. khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
  82. khoj/database/migrations/0056_searchmodelconfig_cross_encoder_model_config.py +17 -0
  83. khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
  84. khoj/database/migrations/0057_remove_serverchatsettings_default_model_and_more.py +51 -0
  85. khoj/database/migrations/0058_alter_chatmodeloptions_chat_model.py +17 -0
  86. khoj/database/migrations/0059_searchmodelconfig_bi_encoder_confidence_threshold.py +17 -0
  87. khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
  88. khoj/database/migrations/0061_alter_chatmodeloptions_model_type.py +26 -0
  89. khoj/database/migrations/0061_alter_texttoimagemodelconfig_model_type.py +21 -0
  90. khoj/database/migrations/0062_merge_20240913_0222.py +14 -0
  91. khoj/database/migrations/0063_conversation_temp_id.py +36 -0
  92. khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +86 -0
  93. khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
  94. khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
  95. khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
  96. khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
  97. khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
  98. khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
  99. khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
  100. khoj/database/migrations/0072_entry_search_model.py +24 -0
  101. khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
  102. khoj/database/migrations/0074_alter_conversation_title.py +17 -0
  103. khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +85 -0
  104. khoj/database/migrations/0076_rename_openaiprocessorconversationconfig_aimodelapi_and_more.py +26 -0
  105. khoj/database/migrations/0077_chatmodel_alter_agent_chat_model_and_more.py +62 -0
  106. khoj/database/migrations/0078_khojuser_email_verification_code_expiry.py +17 -0
  107. khoj/database/migrations/__init__.py +0 -0
  108. khoj/database/models/__init__.py +725 -0
  109. khoj/database/tests.py +3 -0
  110. khoj/interface/compiled/404/index.html +1 -0
  111. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_buildManifest.js +1 -0
  112. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_ssgManifest.js +1 -0
  113. khoj/interface/compiled/_next/static/chunks/1010-8f39bb4648b5ba10.js +1 -0
  114. khoj/interface/compiled/_next/static/chunks/182-f1c48a203dc91e0e.js +20 -0
  115. khoj/interface/compiled/_next/static/chunks/1915-d3c36ad6ce697ce7.js +1 -0
  116. khoj/interface/compiled/_next/static/chunks/2117-165ef4747a5b836b.js +2 -0
  117. khoj/interface/compiled/_next/static/chunks/2581-455000f8aeb08fc3.js +1 -0
  118. khoj/interface/compiled/_next/static/chunks/3727.dcea8f2193111552.js +1 -0
  119. khoj/interface/compiled/_next/static/chunks/3789-a09e37a819171a9d.js +1 -0
  120. khoj/interface/compiled/_next/static/chunks/4124-6c28322ce218d2d5.js +1 -0
  121. khoj/interface/compiled/_next/static/chunks/5427-b52d95253e692bfa.js +1 -0
  122. khoj/interface/compiled/_next/static/chunks/5473-b1cf56dedac6577a.js +1 -0
  123. khoj/interface/compiled/_next/static/chunks/5477-0bbddb79c25a54a7.js +1 -0
  124. khoj/interface/compiled/_next/static/chunks/6065-64db9ad305ba0bcd.js +1 -0
  125. khoj/interface/compiled/_next/static/chunks/6293-469dd16402ea8a6f.js +3 -0
  126. khoj/interface/compiled/_next/static/chunks/688-b5b4391bbc0376f1.js +1 -0
  127. khoj/interface/compiled/_next/static/chunks/8667-b6bf63c72b2d76eb.js +1 -0
  128. khoj/interface/compiled/_next/static/chunks/9259-1172dbaca0515237.js +1 -0
  129. khoj/interface/compiled/_next/static/chunks/94ca1967.1d9b42d929a1ee8c.js +1 -0
  130. khoj/interface/compiled/_next/static/chunks/9597.83583248dfbf6e73.js +1 -0
  131. khoj/interface/compiled/_next/static/chunks/964ecbae.51d6faf8801d15e6.js +1 -0
  132. khoj/interface/compiled/_next/static/chunks/9665-391df1e5c51c960a.js +1 -0
  133. khoj/interface/compiled/_next/static/chunks/app/_not-found/page-a834eddae3e235df.js +1 -0
  134. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
  135. khoj/interface/compiled/_next/static/chunks/app/agents/page-28ce086a1129bca2.js +1 -0
  136. khoj/interface/compiled/_next/static/chunks/app/automations/layout-1fe1537449f43496.js +1 -0
  137. khoj/interface/compiled/_next/static/chunks/app/automations/page-bf365a60829d347f.js +1 -0
  138. khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
  139. khoj/interface/compiled/_next/static/chunks/app/chat/page-0e476e57eb2015e3.js +1 -0
  140. khoj/interface/compiled/_next/static/chunks/app/layout-30e7fda7262713ce.js +1 -0
  141. khoj/interface/compiled/_next/static/chunks/app/page-a5515ea71aec5ef0.js +1 -0
  142. khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
  143. khoj/interface/compiled/_next/static/chunks/app/search/page-9140541e67ea307d.js +1 -0
  144. khoj/interface/compiled/_next/static/chunks/app/settings/layout-d09d6510a45cd4bd.js +1 -0
  145. khoj/interface/compiled/_next/static/chunks/app/settings/page-951ba40b5b94b23a.js +1 -0
  146. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
  147. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1beb80d8d741c932.js +1 -0
  148. khoj/interface/compiled/_next/static/chunks/d3ac728e-44ebd2a0c99b12a0.js +1 -0
  149. khoj/interface/compiled/_next/static/chunks/fd9d1056-4482b99a36fd1673.js +1 -0
  150. khoj/interface/compiled/_next/static/chunks/framework-8e0e0f4a6b83a956.js +1 -0
  151. khoj/interface/compiled/_next/static/chunks/main-app-de1f09df97a3cfc7.js +1 -0
  152. khoj/interface/compiled/_next/static/chunks/main-db4bfac6b0a8d00b.js +1 -0
  153. khoj/interface/compiled/_next/static/chunks/pages/_app-3c9ca398d360b709.js +1 -0
  154. khoj/interface/compiled/_next/static/chunks/pages/_error-cf5ca766ac8f493f.js +1 -0
  155. khoj/interface/compiled/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
  156. khoj/interface/compiled/_next/static/chunks/webpack-a03962458328b163.js +1 -0
  157. khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +1 -0
  158. khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
  159. khoj/interface/compiled/_next/static/css/4e4e6a4a1c920d06.css +1 -0
  160. khoj/interface/compiled/_next/static/css/8d02837c730f8d13.css +25 -0
  161. khoj/interface/compiled/_next/static/css/8e6a3ca11a60b189.css +1 -0
  162. khoj/interface/compiled/_next/static/css/9c164d9727dd8092.css +1 -0
  163. khoj/interface/compiled/_next/static/css/dac88c17aaee5fcf.css +1 -0
  164. khoj/interface/compiled/_next/static/css/df4b47a2d0d85eae.css +1 -0
  165. khoj/interface/compiled/_next/static/css/e4eb883b5265d372.css +1 -0
  166. khoj/interface/compiled/_next/static/media/1d8a05b60287ae6c-s.p.woff2 +0 -0
  167. khoj/interface/compiled/_next/static/media/6f22fce21a7c433c-s.woff2 +0 -0
  168. khoj/interface/compiled/_next/static/media/77c207b095007c34-s.p.woff2 +0 -0
  169. khoj/interface/compiled/_next/static/media/82ef96de0e8f4d8c-s.p.woff2 +0 -0
  170. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.1608a09b.woff +0 -0
  171. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.4aafdb68.ttf +0 -0
  172. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.a79f1c31.woff2 +0 -0
  173. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.b6770918.woff +0 -0
  174. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.cce5b8ec.ttf +0 -0
  175. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.ec17d132.woff2 +0 -0
  176. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.07ef19e7.ttf +0 -0
  177. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.55fac258.woff2 +0 -0
  178. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.dad44a7f.woff +0 -0
  179. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.9f256b85.woff +0 -0
  180. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.b18f59e1.ttf +0 -0
  181. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.d42a5579.woff2 +0 -0
  182. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.7c187121.woff +0 -0
  183. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.d3c882a6.woff2 +0 -0
  184. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.ed38e79f.ttf +0 -0
  185. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.b74a1a8b.ttf +0 -0
  186. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.c3fb5ac2.woff2 +0 -0
  187. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.d181c465.woff +0 -0
  188. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.6f2bb1df.woff2 +0 -0
  189. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.70d8b0a5.ttf +0 -0
  190. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.e3f82f9d.woff +0 -0
  191. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.47373d1e.ttf +0 -0
  192. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.8916142b.woff2 +0 -0
  193. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.9024d815.woff +0 -0
  194. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.0462f03b.woff2 +0 -0
  195. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.7f51fe03.woff +0 -0
  196. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.b7f8fe9b.ttf +0 -0
  197. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.572d331f.woff2 +0 -0
  198. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.a879cf83.ttf +0 -0
  199. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.f1035d8d.woff +0 -0
  200. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.5295ba48.woff +0 -0
  201. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.939bc644.ttf +0 -0
  202. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.f28c23ac.woff2 +0 -0
  203. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.8c5b5494.woff2 +0 -0
  204. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.94e1e8dc.ttf +0 -0
  205. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.bf59d231.woff +0 -0
  206. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.3b1e59b3.woff2 +0 -0
  207. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.7c9bc82b.woff +0 -0
  208. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.b4c20c84.ttf +0 -0
  209. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.74048478.woff +0 -0
  210. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.ba21ed5f.woff2 +0 -0
  211. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.d4d7ba48.ttf +0 -0
  212. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.03e9641d.woff2 +0 -0
  213. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.07505710.woff +0 -0
  214. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.fe9cbbe1.ttf +0 -0
  215. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.e1e279cb.woff +0 -0
  216. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.eae34984.woff2 +0 -0
  217. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.fabc004a.ttf +0 -0
  218. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.57727022.woff +0 -0
  219. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.5916a24f.woff2 +0 -0
  220. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.d6b476ec.ttf +0 -0
  221. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.9acaf01c.woff +0 -0
  222. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.a144ef58.ttf +0 -0
  223. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.b4230e7e.woff2 +0 -0
  224. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.10d95fd3.woff2 +0 -0
  225. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.7a996c9d.woff +0 -0
  226. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.fbccdabe.ttf +0 -0
  227. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.6258592b.woff +0 -0
  228. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.a8709e36.woff2 +0 -0
  229. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.d97aaf4a.ttf +0 -0
  230. khoj/interface/compiled/_next/static/media/a6ecd16fa044d500-s.p.woff2 +0 -0
  231. khoj/interface/compiled/_next/static/media/bd82c78e5b7b3fe9-s.p.woff2 +0 -0
  232. khoj/interface/compiled/_next/static/media/c32c8052c071fc42-s.woff2 +0 -0
  233. khoj/interface/compiled/_next/static/media/c4250770ab8708b6-s.p.woff2 +0 -0
  234. khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
  235. khoj/interface/compiled/_next/static/media/flags.3afdda2f.webp +0 -0
  236. khoj/interface/compiled/_next/static/media/flags@2x.5fbe9fc1.webp +0 -0
  237. khoj/interface/compiled/_next/static/media/globe.98e105ca.webp +0 -0
  238. khoj/interface/compiled/_next/static/media/globe@2x.974df6f8.webp +0 -0
  239. khoj/interface/compiled/agents/index.html +1 -0
  240. khoj/interface/compiled/agents/index.txt +7 -0
  241. khoj/interface/compiled/agents.svg +6 -0
  242. khoj/interface/compiled/assets/icons/khoj_lantern.ico +0 -0
  243. khoj/interface/compiled/assets/icons/khoj_lantern.svg +100 -0
  244. khoj/interface/compiled/assets/icons/khoj_lantern_1200x1200.png +0 -0
  245. khoj/interface/compiled/assets/icons/khoj_lantern_128x128.png +0 -0
  246. khoj/interface/compiled/assets/icons/khoj_lantern_128x128_dark.png +0 -0
  247. khoj/interface/compiled/assets/icons/khoj_lantern_256x256.png +0 -0
  248. khoj/interface/compiled/assets/icons/khoj_lantern_512x512.png +0 -0
  249. khoj/interface/compiled/assets/icons/khoj_lantern_logomarktype_1200x630.png +0 -0
  250. khoj/interface/compiled/assets/samples/desktop-browse-draw-sample.png +0 -0
  251. khoj/interface/compiled/assets/samples/desktop-plain-chat-sample.png +0 -0
  252. khoj/interface/compiled/assets/samples/desktop-remember-plan-sample.png +0 -0
  253. khoj/interface/compiled/assets/samples/phone-browse-draw-sample.png +0 -0
  254. khoj/interface/compiled/assets/samples/phone-plain-chat-sample.png +0 -0
  255. khoj/interface/compiled/assets/samples/phone-remember-plan-sample.png +0 -0
  256. khoj/interface/compiled/automation.svg +37 -0
  257. khoj/interface/compiled/automations/index.html +1 -0
  258. khoj/interface/compiled/automations/index.txt +8 -0
  259. khoj/interface/compiled/chat/index.html +1 -0
  260. khoj/interface/compiled/chat/index.txt +7 -0
  261. khoj/interface/compiled/chat.svg +24 -0
  262. khoj/interface/compiled/close.svg +5 -0
  263. khoj/interface/compiled/copy-button-success.svg +6 -0
  264. khoj/interface/compiled/copy-button.svg +5 -0
  265. khoj/interface/compiled/index.html +1 -0
  266. khoj/interface/compiled/index.txt +7 -0
  267. khoj/interface/compiled/khoj.webmanifest +76 -0
  268. khoj/interface/compiled/logo.svg +24 -0
  269. khoj/interface/compiled/search/index.html +1 -0
  270. khoj/interface/compiled/search/index.txt +7 -0
  271. khoj/interface/compiled/send.svg +1 -0
  272. khoj/interface/compiled/settings/index.html +1 -0
  273. khoj/interface/compiled/settings/index.txt +9 -0
  274. khoj/interface/compiled/share/chat/index.html +1 -0
  275. khoj/interface/compiled/share/chat/index.txt +7 -0
  276. khoj/interface/compiled/share.svg +8 -0
  277. khoj/interface/compiled/thumbs-down.svg +6 -0
  278. khoj/interface/compiled/thumbs-up.svg +6 -0
  279. khoj/interface/email/feedback.html +34 -0
  280. khoj/interface/email/magic_link.html +40 -0
  281. khoj/interface/email/task.html +37 -0
  282. khoj/interface/email/welcome.html +90 -0
  283. khoj/interface/web/.well-known/assetlinks.json +11 -0
  284. khoj/interface/web/assets/icons/agents.svg +19 -0
  285. khoj/interface/web/assets/icons/automation.svg +43 -0
  286. khoj/interface/web/assets/icons/chat.svg +24 -0
  287. khoj/interface/web/assets/icons/github.svg +1 -0
  288. khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
  289. khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
  290. khoj/interface/web/assets/icons/khoj-logo-sideways.svg +32 -0
  291. khoj/interface/web/assets/icons/khoj.svg +26 -0
  292. khoj/interface/web/assets/icons/logotype.svg +1 -0
  293. khoj/interface/web/assets/icons/search.svg +57 -0
  294. khoj/interface/web/assets/icons/sync.svg +4 -0
  295. khoj/interface/web/assets/khoj.css +237 -0
  296. khoj/interface/web/assets/utils.js +33 -0
  297. khoj/interface/web/base_config.html +445 -0
  298. khoj/interface/web/content_source_github_input.html +208 -0
  299. khoj/interface/web/login.html +310 -0
  300. khoj/interface/web/utils.html +48 -0
  301. khoj/main.py +249 -0
  302. khoj/manage.py +22 -0
  303. khoj/migrations/__init__.py +0 -0
  304. khoj/migrations/migrate_offline_chat_default_model.py +69 -0
  305. khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
  306. khoj/migrations/migrate_offline_chat_schema.py +83 -0
  307. khoj/migrations/migrate_offline_model.py +29 -0
  308. khoj/migrations/migrate_processor_config_openai.py +67 -0
  309. khoj/migrations/migrate_server_pg.py +132 -0
  310. khoj/migrations/migrate_version.py +17 -0
  311. khoj/processor/__init__.py +0 -0
  312. khoj/processor/content/__init__.py +0 -0
  313. khoj/processor/content/docx/__init__.py +0 -0
  314. khoj/processor/content/docx/docx_to_entries.py +111 -0
  315. khoj/processor/content/github/__init__.py +0 -0
  316. khoj/processor/content/github/github_to_entries.py +226 -0
  317. khoj/processor/content/images/__init__.py +0 -0
  318. khoj/processor/content/images/image_to_entries.py +117 -0
  319. khoj/processor/content/markdown/__init__.py +0 -0
  320. khoj/processor/content/markdown/markdown_to_entries.py +160 -0
  321. khoj/processor/content/notion/notion_to_entries.py +259 -0
  322. khoj/processor/content/org_mode/__init__.py +0 -0
  323. khoj/processor/content/org_mode/org_to_entries.py +226 -0
  324. khoj/processor/content/org_mode/orgnode.py +532 -0
  325. khoj/processor/content/pdf/__init__.py +0 -0
  326. khoj/processor/content/pdf/pdf_to_entries.py +119 -0
  327. khoj/processor/content/plaintext/__init__.py +0 -0
  328. khoj/processor/content/plaintext/plaintext_to_entries.py +117 -0
  329. khoj/processor/content/text_to_entries.py +296 -0
  330. khoj/processor/conversation/__init__.py +0 -0
  331. khoj/processor/conversation/anthropic/__init__.py +0 -0
  332. khoj/processor/conversation/anthropic/anthropic_chat.py +243 -0
  333. khoj/processor/conversation/anthropic/utils.py +217 -0
  334. khoj/processor/conversation/google/__init__.py +0 -0
  335. khoj/processor/conversation/google/gemini_chat.py +253 -0
  336. khoj/processor/conversation/google/utils.py +260 -0
  337. khoj/processor/conversation/offline/__init__.py +0 -0
  338. khoj/processor/conversation/offline/chat_model.py +308 -0
  339. khoj/processor/conversation/offline/utils.py +80 -0
  340. khoj/processor/conversation/offline/whisper.py +15 -0
  341. khoj/processor/conversation/openai/__init__.py +0 -0
  342. khoj/processor/conversation/openai/gpt.py +243 -0
  343. khoj/processor/conversation/openai/utils.py +232 -0
  344. khoj/processor/conversation/openai/whisper.py +13 -0
  345. khoj/processor/conversation/prompts.py +1188 -0
  346. khoj/processor/conversation/utils.py +867 -0
  347. khoj/processor/embeddings.py +122 -0
  348. khoj/processor/image/generate.py +215 -0
  349. khoj/processor/speech/__init__.py +0 -0
  350. khoj/processor/speech/text_to_speech.py +51 -0
  351. khoj/processor/tools/__init__.py +0 -0
  352. khoj/processor/tools/online_search.py +472 -0
  353. khoj/processor/tools/run_code.py +179 -0
  354. khoj/routers/__init__.py +0 -0
  355. khoj/routers/api.py +760 -0
  356. khoj/routers/api_agents.py +295 -0
  357. khoj/routers/api_chat.py +1273 -0
  358. khoj/routers/api_content.py +634 -0
  359. khoj/routers/api_model.py +123 -0
  360. khoj/routers/api_phone.py +86 -0
  361. khoj/routers/api_subscription.py +144 -0
  362. khoj/routers/auth.py +307 -0
  363. khoj/routers/email.py +135 -0
  364. khoj/routers/helpers.py +2333 -0
  365. khoj/routers/notion.py +85 -0
  366. khoj/routers/research.py +364 -0
  367. khoj/routers/storage.py +63 -0
  368. khoj/routers/twilio.py +36 -0
  369. khoj/routers/web_client.py +141 -0
  370. khoj/search_filter/__init__.py +0 -0
  371. khoj/search_filter/base_filter.py +15 -0
  372. khoj/search_filter/date_filter.py +215 -0
  373. khoj/search_filter/file_filter.py +32 -0
  374. khoj/search_filter/word_filter.py +29 -0
  375. khoj/search_type/__init__.py +0 -0
  376. khoj/search_type/text_search.py +255 -0
  377. khoj/utils/__init__.py +0 -0
  378. khoj/utils/cli.py +101 -0
  379. khoj/utils/config.py +81 -0
  380. khoj/utils/constants.py +51 -0
  381. khoj/utils/fs_syncer.py +252 -0
  382. khoj/utils/helpers.py +627 -0
  383. khoj/utils/initialization.py +301 -0
  384. khoj/utils/jsonl.py +43 -0
  385. khoj/utils/models.py +47 -0
  386. khoj/utils/rawconfig.py +208 -0
  387. khoj/utils/state.py +48 -0
  388. khoj/utils/yaml.py +47 -0
  389. khoj-1.33.3.dev32.dist-info/METADATA +190 -0
  390. khoj-1.33.3.dev32.dist-info/RECORD +393 -0
  391. khoj-1.33.3.dev32.dist-info/WHEEL +4 -0
  392. khoj-1.33.3.dev32.dist-info/entry_points.txt +2 -0
  393. khoj-1.33.3.dev32.dist-info/licenses/LICENSE +661 -0
khoj/utils/helpers.py ADDED
@@ -0,0 +1,627 @@
1
+ from __future__ import annotations # to avoid quoting type hints
2
+
3
+ import copy
4
+ import datetime
5
+ import io
6
+ import ipaddress
7
+ import logging
8
+ import os
9
+ import platform
10
+ import random
11
+ import urllib.parse
12
+ import uuid
13
+ from collections import OrderedDict
14
+ from enum import Enum
15
+ from functools import lru_cache
16
+ from importlib import import_module
17
+ from importlib.metadata import version
18
+ from itertools import islice
19
+ from os import path
20
+ from pathlib import Path
21
+ from time import perf_counter
22
+ from typing import TYPE_CHECKING, Any, Optional, Union
23
+ from urllib.parse import urlparse
24
+
25
+ import openai
26
+ import psutil
27
+ import requests
28
+ import torch
29
+ from asgiref.sync import sync_to_async
30
+ from email_validator import EmailNotValidError, EmailUndeliverableError, validate_email
31
+ from magika import Magika
32
+ from PIL import Image
33
+ from pytz import country_names, country_timezones
34
+
35
+ from khoj.utils import constants
36
+
37
+ if TYPE_CHECKING:
38
+ from sentence_transformers import CrossEncoder, SentenceTransformer
39
+
40
+ from khoj.utils.models import BaseEncoder
41
+ from khoj.utils.rawconfig import AppConfig
42
+
43
+
44
+ # Initialize Magika for file type identification
45
+ magika = Magika()
46
+
47
+
48
+ class AsyncIteratorWrapper:
49
+ def __init__(self, obj):
50
+ self._it = iter(obj)
51
+
52
+ def __aiter__(self):
53
+ return self
54
+
55
+ async def __anext__(self):
56
+ try:
57
+ value = await self.next_async()
58
+ except StopAsyncIteration:
59
+ return
60
+ return value
61
+
62
+ @sync_to_async
63
+ def next_async(self):
64
+ try:
65
+ return next(self._it)
66
+ except StopIteration:
67
+ raise StopAsyncIteration
68
+
69
+
70
+ def is_none_or_empty(item):
71
+ return item == None or (hasattr(item, "__iter__") and len(item) == 0) or item == ""
72
+
73
+
74
+ def to_snake_case_from_dash(item: str):
75
+ return item.replace("_", "-")
76
+
77
+
78
+ def get_absolute_path(filepath: Union[str, Path]) -> str:
79
+ return str(Path(filepath).expanduser().absolute())
80
+
81
+
82
+ def resolve_absolute_path(filepath: Union[str, Optional[Path]], strict=False) -> Path:
83
+ return Path(filepath).expanduser().absolute().resolve(strict=strict)
84
+
85
+
86
+ def get_from_dict(dictionary, *args):
87
+ """null-aware get from a nested dictionary
88
+ Returns: dictionary[args[0]][args[1]]... or None if any keys missing"""
89
+ current = dictionary
90
+ for arg in args:
91
+ if not hasattr(current, "__iter__") or not arg in current:
92
+ return None
93
+ current = current[arg]
94
+ return current
95
+
96
+
97
+ def merge_dicts(priority_dict: dict, default_dict: dict):
98
+ merged_dict = priority_dict.copy()
99
+ for key, _ in default_dict.items():
100
+ if key not in priority_dict:
101
+ merged_dict[key] = default_dict[key]
102
+ elif isinstance(priority_dict[key], dict) and isinstance(default_dict[key], dict):
103
+ merged_dict[key] = merge_dicts(priority_dict[key], default_dict[key])
104
+ return merged_dict
105
+
106
+
107
+ def fix_json_dict(json_dict: dict) -> dict:
108
+ for k, v in json_dict.items():
109
+ if v == "True" or v == "False":
110
+ json_dict[k] = v == "True"
111
+ if isinstance(v, dict):
112
+ json_dict[k] = fix_json_dict(v)
113
+ return json_dict
114
+
115
+
116
+ def get_file_type(file_type: str, file_content: bytes) -> tuple[str, str]:
117
+ "Get file type from file mime type"
118
+
119
+ # Extract encoding from file_type
120
+ encoding = file_type.split("=")[1].strip().lower() if ";" in file_type else None
121
+ file_type = file_type.split(";")[0].strip() if ";" in file_type else file_type
122
+
123
+ # Infer content type from reading file content
124
+ try:
125
+ content_group = magika.identify_bytes(file_content).output.group
126
+ except Exception:
127
+ # Fallback to using just file type if content type cannot be inferred
128
+ content_group = "unknown"
129
+
130
+ if file_type in ["text/markdown"]:
131
+ return "markdown", encoding
132
+ elif file_type in ["text/org"]:
133
+ return "org", encoding
134
+ elif file_type in ["application/pdf"]:
135
+ return "pdf", encoding
136
+ elif file_type in ["application/msword", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
137
+ return "docx", encoding
138
+ elif file_type in ["image/jpeg"]:
139
+ return "image", encoding
140
+ elif file_type in ["image/png"]:
141
+ return "image", encoding
142
+ elif file_type in ["image/webp"]:
143
+ return "image", encoding
144
+ elif content_group in ["code", "text"]:
145
+ return "plaintext", encoding
146
+ else:
147
+ return "other", encoding
148
+
149
+
150
+ def load_model(
151
+ model_name: str, model_type, model_dir=None, device: str = None
152
+ ) -> Union[BaseEncoder, SentenceTransformer, CrossEncoder]:
153
+ "Load model from disk or huggingface"
154
+ # Construct model path
155
+ logger = logging.getLogger(__name__)
156
+ model_path = path.join(model_dir, model_name.replace("/", "_")) if model_dir is not None else None
157
+
158
+ # Load model from model_path if it exists there
159
+ model_type_class = get_class_by_name(model_type) if isinstance(model_type, str) else model_type
160
+ if model_path is not None and resolve_absolute_path(model_path).exists():
161
+ logger.debug(f"Loading {model_name} model from disk")
162
+ model = model_type_class(get_absolute_path(model_path), device=device)
163
+ # Else load the model from the model_name
164
+ else:
165
+ logger.info(f"🤖 Downloading {model_name} model from web")
166
+ model = model_type_class(model_name, device=device)
167
+ if model_path is not None:
168
+ logger.info(f"📩 Saved {model_name} model to disk")
169
+ model.save(model_path)
170
+
171
+ return model
172
+
173
+
174
+ def get_class_by_name(name: str) -> object:
175
+ "Returns the class object from name string"
176
+ module_name, class_name = name.rsplit(".", 1)
177
+ return getattr(import_module(module_name), class_name)
178
+
179
+
180
+ class timer:
181
+ """Context manager to log time taken for a block of code to run"""
182
+
183
+ def __init__(self, message: str, logger: logging.Logger, device: torch.device = None, log_level=logging.DEBUG):
184
+ self.message = message
185
+ self.logger = logger.debug if log_level == logging.DEBUG else logger.info
186
+ self.device = device
187
+
188
+ def __enter__(self):
189
+ self.start = perf_counter()
190
+ return self
191
+
192
+ def __exit__(self, *_):
193
+ elapsed = perf_counter() - self.start
194
+ if self.device is None:
195
+ self.logger(f"{self.message}: {elapsed:.3f} seconds")
196
+ else:
197
+ self.logger(f"{self.message}: {elapsed:.3f} seconds on device: {self.device}")
198
+
199
+
200
+ class LRU(OrderedDict):
201
+ def __init__(self, *args, capacity=128, **kwargs):
202
+ self.capacity = capacity
203
+ super().__init__(*args, **kwargs)
204
+
205
+ def __getitem__(self, key):
206
+ value = super().__getitem__(key)
207
+ self.move_to_end(key)
208
+ return value
209
+
210
+ def __setitem__(self, key, value):
211
+ super().__setitem__(key, value)
212
+ if len(self) > self.capacity:
213
+ oldest = next(iter(self))
214
+ del self[oldest]
215
+
216
+
217
+ def get_server_id():
218
+ """Get, Generate Persistent, Random ID per server install.
219
+ Helps count distinct khoj servers deployed.
220
+ Maintains anonymity by using non-PII random id."""
221
+ # Initialize server_id to None
222
+ server_id = None
223
+ # Expand path to the khoj env file. It contains persistent internal app data
224
+ app_env_filename = path.expanduser(constants.app_env_filepath)
225
+
226
+ # Check if the file exists
227
+ if path.exists(app_env_filename):
228
+ # Read the contents of the file
229
+ with open(app_env_filename, "r") as f:
230
+ contents = f.readlines()
231
+
232
+ # Extract the server_id from the contents
233
+ for line in contents:
234
+ key, value = line.strip().split("=")
235
+ if key.strip() == "server_id":
236
+ server_id = value.strip()
237
+ break
238
+
239
+ # If server_id is not found, generate and write to env file
240
+ if server_id is None:
241
+ # If server_id is not found, generate a new one
242
+ server_id = str(uuid.uuid4())
243
+
244
+ with open(app_env_filename, "a") as f:
245
+ f.write("server_id=" + server_id + "\n")
246
+ else:
247
+ # If server_id is not found, generate a new one
248
+ server_id = str(uuid.uuid4())
249
+
250
+ # Create khoj config directory if it doesn't exist
251
+ os.makedirs(path.dirname(app_env_filename), exist_ok=True)
252
+
253
+ # Write the server_id to the env file
254
+ with open(app_env_filename, "w") as f:
255
+ f.write("server_id=" + server_id + "\n")
256
+
257
+ return server_id
258
+
259
+
260
+ def telemetry_disabled(app_config: AppConfig, telemetry_disable_env) -> bool:
261
+ if telemetry_disable_env is True:
262
+ return True
263
+ return not app_config or not app_config.should_log_telemetry
264
+
265
+
266
+ def log_telemetry(
267
+ telemetry_type: str,
268
+ api: str = None,
269
+ client: Optional[str] = None,
270
+ app_config: Optional[AppConfig] = None,
271
+ disable_telemetry_env: bool = False,
272
+ properties: dict = None,
273
+ ):
274
+ """Log basic app usage telemetry like client, os, api called"""
275
+ # Do not log usage telemetry, if telemetry is disabled via app config
276
+ if telemetry_disabled(app_config, disable_telemetry_env):
277
+ return []
278
+
279
+ if properties.get("server_id") is None:
280
+ properties["server_id"] = get_server_id()
281
+
282
+ # Populate telemetry data to log
283
+ request_body = {
284
+ "telemetry_type": telemetry_type,
285
+ "server_version": version("khoj"),
286
+ "os": platform.system(),
287
+ "timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
288
+ }
289
+ request_body.update(properties or {})
290
+ if api:
291
+ # API endpoint on server called by client
292
+ request_body["api"] = api
293
+ if client:
294
+ # Client from which the API was called. E.g. Emacs, Obsidian
295
+ request_body["client"] = client
296
+
297
+ # Log telemetry data to telemetry endpoint
298
+ return request_body
299
+
300
+
301
+ def get_device_memory() -> int:
302
+ """Get device memory in GB"""
303
+ device = get_device()
304
+ if device.type == "cuda":
305
+ return torch.cuda.get_device_properties(device).total_memory
306
+ elif device.type == "mps":
307
+ return torch.mps.driver_allocated_memory()
308
+ else:
309
+ return psutil.virtual_memory().total
310
+
311
+
312
+ def get_device() -> torch.device:
313
+ """Get device to run model on"""
314
+ if torch.cuda.is_available():
315
+ # Use CUDA GPU
316
+ return torch.device("cuda:0")
317
+ elif torch.backends.mps.is_available():
318
+ # Use Apple M1 Metal Acceleration
319
+ return torch.device("mps")
320
+ else:
321
+ return torch.device("cpu")
322
+
323
+
324
+ class ConversationCommand(str, Enum):
325
+ Default = "default"
326
+ General = "general"
327
+ Notes = "notes"
328
+ Help = "help"
329
+ Online = "online"
330
+ Webpage = "webpage"
331
+ Code = "code"
332
+ Image = "image"
333
+ Text = "text"
334
+ Automation = "automation"
335
+ AutomatedTask = "automated_task"
336
+ Summarize = "summarize"
337
+ Diagram = "diagram"
338
+ Research = "research"
339
+
340
+
341
+ command_descriptions = {
342
+ ConversationCommand.General: "Only talk about information that relies on Khoj's general knowledge, not your personal knowledge base.",
343
+ ConversationCommand.Notes: "Only talk about information that is available in your knowledge base.",
344
+ ConversationCommand.Default: "The default command when no command specified. It intelligently auto-switches between general and notes mode.",
345
+ ConversationCommand.Online: "Search for information on the internet.",
346
+ ConversationCommand.Webpage: "Get information from webpage suggested by you.",
347
+ ConversationCommand.Code: "Run Python code to parse information, run complex calculations, create documents and charts.",
348
+ ConversationCommand.Image: "Generate illustrative, creative images by describing your imagination in words.",
349
+ ConversationCommand.Automation: "Automatically run your query at a specified time or interval.",
350
+ ConversationCommand.Help: "Get help with how to use or setup Khoj from the documentation",
351
+ ConversationCommand.Summarize: "Get help with a question pertaining to an entire document.",
352
+ ConversationCommand.Diagram: "Draw a flowchart, diagram, or any other visual representation best expressed with primitives like lines, rectangles, and text.",
353
+ ConversationCommand.Research: "Do deep research on a topic. This will take longer than usual, but give a more detailed, comprehensive answer.",
354
+ }
355
+
356
+ command_descriptions_for_agent = {
357
+ ConversationCommand.General: "Agent can use the agents knowledge base and general knowledge.",
358
+ ConversationCommand.Notes: "Agent can search the users knowledge base for information.",
359
+ ConversationCommand.Online: "Agent can search the internet for information.",
360
+ ConversationCommand.Webpage: "Agent can read suggested web pages for information.",
361
+ ConversationCommand.Summarize: "Agent can read an entire document. Agents knowledge base must be a single document.",
362
+ ConversationCommand.Research: "Agent can do deep research on a topic.",
363
+ }
364
+
365
+ tool_descriptions_for_llm = {
366
+ ConversationCommand.Default: "To use a mix of your internal knowledge and the user's personal knowledge, or if you don't entirely understand the query.",
367
+ ConversationCommand.General: "To use when you can answer the question without any outside information or personal knowledge",
368
+ ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.",
369
+ ConversationCommand.Online: "To search for the latest, up-to-date information from the internet. Note: **Questions about Khoj should always use this data source**",
370
+ ConversationCommand.Webpage: "To use if the user has directly provided the webpage urls or you are certain of the webpage urls to read.",
371
+ ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse complex information, run complex calculations, create plaintext documents, and create charts with quantitative data. Only matplotlib, panda, numpy, scipy, bs4 and sympy external packages are available.",
372
+ ConversationCommand.Summarize: "To retrieve an answer that depends on the entire document or a large text.",
373
+ }
374
+
375
+ function_calling_description_for_llm = {
376
+ ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.",
377
+ ConversationCommand.Online: "To search the internet for information. Useful to get a quick, broad overview from the internet. Provide all relevant context to ensure new searches, not in previous iterations, are performed.",
378
+ ConversationCommand.Webpage: "To extract information from webpages. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share the webpage links and information to extract in your query.",
379
+ ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse complex information, run complex calculations, create plaintext documents, and create charts with quantitative data. Only matplotlib, panda, numpy, scipy, bs4 and sympy external packages are available.",
380
+ }
381
+
382
+ mode_descriptions_for_llm = {
383
+ ConversationCommand.Image: "Use this if you are confident the user is requesting you to create a new picture based on their description. This DOES NOT support generating charts or graphs. It is for creative images.",
384
+ ConversationCommand.Text: "Use this if a normal text response would be sufficient for accurately responding to the query or you don't feel strongly about the other modes.",
385
+ ConversationCommand.Diagram: "Use this if the user is requesting a diagram or visual representation that requires primitives like lines, rectangles, and text. This does not work for charts, graphs, or quantitative data. It is for mind mapping, flowcharts, etc.",
386
+ }
387
+
388
+ mode_descriptions_for_agent = {
389
+ ConversationCommand.Image: "Agent can generate images in response. It cannot not use this to generate charts and graphs.",
390
+ ConversationCommand.Automation: "Agent can schedule a task to run at a scheduled date, time and frequency in response.",
391
+ ConversationCommand.Text: "Agent can generate text in response.",
392
+ ConversationCommand.Diagram: "Agent can generate a visual representation that requires primitives like lines, rectangles, and text.",
393
+ }
394
+
395
+
396
+ class ImageIntentType(Enum):
397
+ """
398
+ Chat message intent by Khoj for image responses.
399
+ Marks the schema used to reference image in chat messages
400
+ """
401
+
402
+ # Images as Inline PNG
403
+ TEXT_TO_IMAGE = "text-to-image"
404
+ # Images as URLs
405
+ TEXT_TO_IMAGE2 = "text-to-image2"
406
+ # Images as Inline WebP
407
+ TEXT_TO_IMAGE_V3 = "text-to-image-v3"
408
+
409
+
410
+ def generate_random_name():
411
+ # List of adjectives and nouns to choose from
412
+ adjectives = [
413
+ "happy",
414
+ "serendipitous",
415
+ "exuberant",
416
+ "calm",
417
+ "brave",
418
+ "scared",
419
+ "energetic",
420
+ "chivalrous",
421
+ "kind",
422
+ "suave",
423
+ ]
424
+ nouns = ["dog", "cat", "falcon", "whale", "turtle", "rabbit", "hamster", "snake", "spider", "elephant"]
425
+
426
+ # Select two random words from the lists
427
+ adjective = random.choice(adjectives)
428
+ noun = random.choice(nouns)
429
+
430
+ # Combine the words to form a name
431
+ name = f"{adjective} {noun}"
432
+
433
+ return name
434
+
435
+
436
+ def batcher(iterable, max_n):
437
+ "Split an iterable into chunks of size max_n"
438
+ it = iter(iterable)
439
+ while True:
440
+ chunk = list(islice(it, max_n))
441
+ if not chunk:
442
+ return
443
+ yield (x for x in chunk if x is not None)
444
+
445
+
446
+ def is_env_var_true(env_var: str, default: str = "false") -> bool:
447
+ """Get state of boolean environment variable"""
448
+ return os.getenv(env_var, default).lower() == "true"
449
+
450
+
451
+ def in_debug_mode():
452
+ """Check if Khoj is running in debug mode.
453
+ Set KHOJ_DEBUG environment variable to true to enable debug mode."""
454
+ return is_env_var_true("KHOJ_DEBUG")
455
+
456
+
457
+ def is_promptrace_enabled():
458
+ """Check if Khoj is running with prompt tracing enabled.
459
+ Set PROMPTRACE_DIR environment variable to prompt tracing path to enable it."""
460
+ return not is_none_or_empty(os.getenv("PROMPTRACE_DIR"))
461
+
462
+
463
+ def is_valid_url(url: str) -> bool:
464
+ """Check if a string is a valid URL"""
465
+ try:
466
+ result = urlparse(url.strip())
467
+ return all([result.scheme, result.netloc])
468
+ except:
469
+ return False
470
+
471
+
472
+ def is_internet_connected():
473
+ try:
474
+ response = requests.head("https://www.google.com")
475
+ return response.status_code == 200
476
+ except:
477
+ return False
478
+
479
+
480
+ def is_internal_url(url: str) -> bool:
481
+ """
482
+ Check if a URL is likely to be internal/non-public.
483
+
484
+ Args:
485
+ url (str): The URL to check.
486
+
487
+ Returns:
488
+ bool: True if the URL is likely internal, False otherwise.
489
+ """
490
+ try:
491
+ parsed_url = urllib.parse.urlparse(url)
492
+ hostname = parsed_url.hostname
493
+
494
+ # Check for localhost
495
+ if hostname in ["localhost", "127.0.0.1", "::1"]:
496
+ return True
497
+
498
+ # Check for IP addresses in private ranges
499
+ try:
500
+ ip = ipaddress.ip_address(hostname)
501
+ return ip.is_private
502
+ except ValueError:
503
+ pass # Not an IP address, continue with other checks
504
+
505
+ # Check for common internal TLDs
506
+ internal_tlds = [".local", ".internal", ".private", ".corp", ".home", ".lan"]
507
+ if any(hostname.endswith(tld) for tld in internal_tlds):
508
+ return True
509
+
510
+ # Check for URLs without a TLD
511
+ if "." not in hostname:
512
+ return True
513
+
514
+ return False
515
+ except Exception:
516
+ # If we can't parse the URL or something else goes wrong, assume it's not internal
517
+ return False
518
+
519
+
520
+ def convert_image_to_webp(image_bytes):
521
+ """Convert image bytes to webp format for faster loading"""
522
+ image_io = io.BytesIO(image_bytes)
523
+ with Image.open(image_io) as original_image:
524
+ webp_image_io = io.BytesIO()
525
+ original_image.save(webp_image_io, "WEBP")
526
+
527
+ # Encode the WebP image back to base64
528
+ webp_image_bytes = webp_image_io.getvalue()
529
+ webp_image_io.close()
530
+ return webp_image_bytes
531
+
532
+
533
+ def truncate_code_context(original_code_results: dict[str, Any], max_chars=10000) -> dict[str, Any]:
534
+ """
535
+ Truncate large output files and drop image file data from code results.
536
+ """
537
+ # Create a deep copy of the code results to avoid modifying the original data
538
+ code_results = copy.deepcopy(original_code_results)
539
+ for code_result in code_results.values():
540
+ for idx, output_file in enumerate(code_result["results"]["output_files"]):
541
+ # Drop image files from code results
542
+ if Path(output_file["filename"]).suffix in {".png", ".jpg", ".jpeg", ".webp"}:
543
+ code_result["results"]["output_files"][idx] = {
544
+ "filename": output_file["filename"],
545
+ "b64_data": "[placeholder for generated image data for brevity]",
546
+ }
547
+ # Truncate large output files
548
+ elif len(output_file["b64_data"]) > max_chars:
549
+ code_result["results"]["output_files"][idx] = {
550
+ "filename": output_file["filename"],
551
+ "b64_data": output_file["b64_data"][:max_chars] + "...",
552
+ }
553
+ return code_results
554
+
555
+
556
+ @lru_cache
557
+ def tz_to_cc_map() -> dict[str, str]:
558
+ """Create a mapping of timezone to country code"""
559
+ timezone_country = {}
560
+ for countrycode in country_timezones:
561
+ timezones = country_timezones[countrycode]
562
+ for timezone in timezones:
563
+ timezone_country[timezone] = countrycode
564
+ return timezone_country
565
+
566
+
567
+ def get_country_code_from_timezone(tz: str) -> str:
568
+ """Get country code from timezone"""
569
+ return tz_to_cc_map().get(tz, "US")
570
+
571
+
572
+ def get_country_name_from_timezone(tz: str) -> str:
573
+ """Get country name from timezone"""
574
+ return country_names.get(get_country_code_from_timezone(tz), "United States")
575
+
576
+
577
+ def get_cost_of_chat_message(model_name: str, input_tokens: int = 0, output_tokens: int = 0, prev_cost: float = 0.0):
578
+ """
579
+ Calculate cost of chat message based on input and output tokens
580
+ """
581
+
582
+ # Calculate cost of input and output tokens. Costs are per million tokens
583
+ input_cost = constants.model_to_cost.get(model_name, {}).get("input", 0) * (input_tokens / 1e6)
584
+ output_cost = constants.model_to_cost.get(model_name, {}).get("output", 0) * (output_tokens / 1e6)
585
+
586
+ return input_cost + output_cost + prev_cost
587
+
588
+
589
+ def get_chat_usage_metrics(
590
+ model_name: str, input_tokens: int = 0, output_tokens: int = 0, usage: dict = {}, cost: float = None
591
+ ):
592
+ """
593
+ Get usage metrics for chat message based on input and output tokens and cost
594
+ """
595
+ prev_usage = usage or {"input_tokens": 0, "output_tokens": 0, "cost": 0.0}
596
+ return {
597
+ "input_tokens": prev_usage["input_tokens"] + input_tokens,
598
+ "output_tokens": prev_usage["output_tokens"] + output_tokens,
599
+ "cost": cost or get_cost_of_chat_message(model_name, input_tokens, output_tokens, prev_cost=prev_usage["cost"]),
600
+ }
601
+
602
+
603
+ def get_openai_client(api_key: str, api_base_url: str) -> Union[openai.OpenAI, openai.AzureOpenAI]:
604
+ """Get OpenAI or AzureOpenAI client based on the API Base URL"""
605
+ parsed_url = urlparse(api_base_url)
606
+ if parsed_url.hostname and parsed_url.hostname.endswith(".openai.azure.com"):
607
+ client = openai.AzureOpenAI(
608
+ api_key=api_key,
609
+ azure_endpoint=api_base_url,
610
+ api_version="2024-10-21",
611
+ )
612
+ else:
613
+ client = openai.OpenAI(
614
+ api_key=api_key,
615
+ base_url=api_base_url,
616
+ )
617
+ return client
618
+
619
+
620
+ def normalize_email(email: str, check_deliverability=False) -> tuple[str, bool]:
621
+ """Normalize, validate and check deliverability of email address"""
622
+ lower_email = email.lower()
623
+ try:
624
+ valid_email = validate_email(lower_email, check_deliverability=check_deliverability)
625
+ return valid_email.normalized, True
626
+ except (EmailNotValidError, EmailUndeliverableError):
627
+ return lower_email, False