khoj 1.33.3.dev32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. khoj/__init__.py +0 -0
  2. khoj/app/README.md +94 -0
  3. khoj/app/__init__.py +0 -0
  4. khoj/app/asgi.py +16 -0
  5. khoj/app/settings.py +218 -0
  6. khoj/app/urls.py +25 -0
  7. khoj/configure.py +452 -0
  8. khoj/database/__init__.py +0 -0
  9. khoj/database/adapters/__init__.py +1821 -0
  10. khoj/database/admin.py +417 -0
  11. khoj/database/apps.py +6 -0
  12. khoj/database/management/__init__.py +0 -0
  13. khoj/database/management/commands/__init__.py +0 -0
  14. khoj/database/management/commands/change_default_model.py +116 -0
  15. khoj/database/management/commands/change_generated_images_url.py +61 -0
  16. khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
  17. khoj/database/migrations/0001_khojuser.py +98 -0
  18. khoj/database/migrations/0002_googleuser.py +32 -0
  19. khoj/database/migrations/0003_vector_extension.py +10 -0
  20. khoj/database/migrations/0004_content_types_and_more.py +181 -0
  21. khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
  22. khoj/database/migrations/0006_embeddingsdates.py +33 -0
  23. khoj/database/migrations/0007_add_conversation.py +27 -0
  24. khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
  25. khoj/database/migrations/0009_khojapiuser.py +24 -0
  26. khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
  27. khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
  28. khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
  29. khoj/database/migrations/0012_entry_file_source.py +21 -0
  30. khoj/database/migrations/0013_subscription.py +37 -0
  31. khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
  32. khoj/database/migrations/0015_alter_subscription_user.py +21 -0
  33. khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
  34. khoj/database/migrations/0017_searchmodel.py +32 -0
  35. khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
  36. khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
  37. khoj/database/migrations/0020_reflectivequestion.py +36 -0
  38. khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
  39. khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
  40. khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
  41. khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
  42. khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
  43. khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
  44. khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
  45. khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
  46. khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
  47. khoj/database/migrations/0029_userrequests.py +27 -0
  48. khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
  49. khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
  50. khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
  51. khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
  52. khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
  53. khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
  54. khoj/database/migrations/0035_processlock.py +26 -0
  55. khoj/database/migrations/0036_alter_processlock_name.py +19 -0
  56. khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
  57. khoj/database/migrations/0036_publicconversation.py +42 -0
  58. khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
  59. khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
  60. khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
  61. khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
  62. khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
  63. khoj/database/migrations/0040_alter_processlock_name.py +26 -0
  64. khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
  65. khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
  66. khoj/database/migrations/0042_serverchatsettings.py +46 -0
  67. khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
  68. khoj/database/migrations/0044_conversation_file_filters.py +17 -0
  69. khoj/database/migrations/0045_fileobject.py +37 -0
  70. khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
  71. khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
  72. khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
  73. khoj/database/migrations/0049_datastore.py +38 -0
  74. khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
  75. khoj/database/migrations/0050_alter_processlock_name.py +25 -0
  76. khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
  77. khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
  78. khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
  79. khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
  80. khoj/database/migrations/0055_alter_agent_style_icon.py +37 -0
  81. khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
  82. khoj/database/migrations/0056_searchmodelconfig_cross_encoder_model_config.py +17 -0
  83. khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
  84. khoj/database/migrations/0057_remove_serverchatsettings_default_model_and_more.py +51 -0
  85. khoj/database/migrations/0058_alter_chatmodeloptions_chat_model.py +17 -0
  86. khoj/database/migrations/0059_searchmodelconfig_bi_encoder_confidence_threshold.py +17 -0
  87. khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
  88. khoj/database/migrations/0061_alter_chatmodeloptions_model_type.py +26 -0
  89. khoj/database/migrations/0061_alter_texttoimagemodelconfig_model_type.py +21 -0
  90. khoj/database/migrations/0062_merge_20240913_0222.py +14 -0
  91. khoj/database/migrations/0063_conversation_temp_id.py +36 -0
  92. khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +86 -0
  93. khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
  94. khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
  95. khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
  96. khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
  97. khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
  98. khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
  99. khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
  100. khoj/database/migrations/0072_entry_search_model.py +24 -0
  101. khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
  102. khoj/database/migrations/0074_alter_conversation_title.py +17 -0
  103. khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +85 -0
  104. khoj/database/migrations/0076_rename_openaiprocessorconversationconfig_aimodelapi_and_more.py +26 -0
  105. khoj/database/migrations/0077_chatmodel_alter_agent_chat_model_and_more.py +62 -0
  106. khoj/database/migrations/0078_khojuser_email_verification_code_expiry.py +17 -0
  107. khoj/database/migrations/__init__.py +0 -0
  108. khoj/database/models/__init__.py +725 -0
  109. khoj/database/tests.py +3 -0
  110. khoj/interface/compiled/404/index.html +1 -0
  111. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_buildManifest.js +1 -0
  112. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_ssgManifest.js +1 -0
  113. khoj/interface/compiled/_next/static/chunks/1010-8f39bb4648b5ba10.js +1 -0
  114. khoj/interface/compiled/_next/static/chunks/182-f1c48a203dc91e0e.js +20 -0
  115. khoj/interface/compiled/_next/static/chunks/1915-d3c36ad6ce697ce7.js +1 -0
  116. khoj/interface/compiled/_next/static/chunks/2117-165ef4747a5b836b.js +2 -0
  117. khoj/interface/compiled/_next/static/chunks/2581-455000f8aeb08fc3.js +1 -0
  118. khoj/interface/compiled/_next/static/chunks/3727.dcea8f2193111552.js +1 -0
  119. khoj/interface/compiled/_next/static/chunks/3789-a09e37a819171a9d.js +1 -0
  120. khoj/interface/compiled/_next/static/chunks/4124-6c28322ce218d2d5.js +1 -0
  121. khoj/interface/compiled/_next/static/chunks/5427-b52d95253e692bfa.js +1 -0
  122. khoj/interface/compiled/_next/static/chunks/5473-b1cf56dedac6577a.js +1 -0
  123. khoj/interface/compiled/_next/static/chunks/5477-0bbddb79c25a54a7.js +1 -0
  124. khoj/interface/compiled/_next/static/chunks/6065-64db9ad305ba0bcd.js +1 -0
  125. khoj/interface/compiled/_next/static/chunks/6293-469dd16402ea8a6f.js +3 -0
  126. khoj/interface/compiled/_next/static/chunks/688-b5b4391bbc0376f1.js +1 -0
  127. khoj/interface/compiled/_next/static/chunks/8667-b6bf63c72b2d76eb.js +1 -0
  128. khoj/interface/compiled/_next/static/chunks/9259-1172dbaca0515237.js +1 -0
  129. khoj/interface/compiled/_next/static/chunks/94ca1967.1d9b42d929a1ee8c.js +1 -0
  130. khoj/interface/compiled/_next/static/chunks/9597.83583248dfbf6e73.js +1 -0
  131. khoj/interface/compiled/_next/static/chunks/964ecbae.51d6faf8801d15e6.js +1 -0
  132. khoj/interface/compiled/_next/static/chunks/9665-391df1e5c51c960a.js +1 -0
  133. khoj/interface/compiled/_next/static/chunks/app/_not-found/page-a834eddae3e235df.js +1 -0
  134. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
  135. khoj/interface/compiled/_next/static/chunks/app/agents/page-28ce086a1129bca2.js +1 -0
  136. khoj/interface/compiled/_next/static/chunks/app/automations/layout-1fe1537449f43496.js +1 -0
  137. khoj/interface/compiled/_next/static/chunks/app/automations/page-bf365a60829d347f.js +1 -0
  138. khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
  139. khoj/interface/compiled/_next/static/chunks/app/chat/page-0e476e57eb2015e3.js +1 -0
  140. khoj/interface/compiled/_next/static/chunks/app/layout-30e7fda7262713ce.js +1 -0
  141. khoj/interface/compiled/_next/static/chunks/app/page-a5515ea71aec5ef0.js +1 -0
  142. khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
  143. khoj/interface/compiled/_next/static/chunks/app/search/page-9140541e67ea307d.js +1 -0
  144. khoj/interface/compiled/_next/static/chunks/app/settings/layout-d09d6510a45cd4bd.js +1 -0
  145. khoj/interface/compiled/_next/static/chunks/app/settings/page-951ba40b5b94b23a.js +1 -0
  146. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
  147. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1beb80d8d741c932.js +1 -0
  148. khoj/interface/compiled/_next/static/chunks/d3ac728e-44ebd2a0c99b12a0.js +1 -0
  149. khoj/interface/compiled/_next/static/chunks/fd9d1056-4482b99a36fd1673.js +1 -0
  150. khoj/interface/compiled/_next/static/chunks/framework-8e0e0f4a6b83a956.js +1 -0
  151. khoj/interface/compiled/_next/static/chunks/main-app-de1f09df97a3cfc7.js +1 -0
  152. khoj/interface/compiled/_next/static/chunks/main-db4bfac6b0a8d00b.js +1 -0
  153. khoj/interface/compiled/_next/static/chunks/pages/_app-3c9ca398d360b709.js +1 -0
  154. khoj/interface/compiled/_next/static/chunks/pages/_error-cf5ca766ac8f493f.js +1 -0
  155. khoj/interface/compiled/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
  156. khoj/interface/compiled/_next/static/chunks/webpack-a03962458328b163.js +1 -0
  157. khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +1 -0
  158. khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
  159. khoj/interface/compiled/_next/static/css/4e4e6a4a1c920d06.css +1 -0
  160. khoj/interface/compiled/_next/static/css/8d02837c730f8d13.css +25 -0
  161. khoj/interface/compiled/_next/static/css/8e6a3ca11a60b189.css +1 -0
  162. khoj/interface/compiled/_next/static/css/9c164d9727dd8092.css +1 -0
  163. khoj/interface/compiled/_next/static/css/dac88c17aaee5fcf.css +1 -0
  164. khoj/interface/compiled/_next/static/css/df4b47a2d0d85eae.css +1 -0
  165. khoj/interface/compiled/_next/static/css/e4eb883b5265d372.css +1 -0
  166. khoj/interface/compiled/_next/static/media/1d8a05b60287ae6c-s.p.woff2 +0 -0
  167. khoj/interface/compiled/_next/static/media/6f22fce21a7c433c-s.woff2 +0 -0
  168. khoj/interface/compiled/_next/static/media/77c207b095007c34-s.p.woff2 +0 -0
  169. khoj/interface/compiled/_next/static/media/82ef96de0e8f4d8c-s.p.woff2 +0 -0
  170. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.1608a09b.woff +0 -0
  171. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.4aafdb68.ttf +0 -0
  172. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.a79f1c31.woff2 +0 -0
  173. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.b6770918.woff +0 -0
  174. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.cce5b8ec.ttf +0 -0
  175. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.ec17d132.woff2 +0 -0
  176. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.07ef19e7.ttf +0 -0
  177. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.55fac258.woff2 +0 -0
  178. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.dad44a7f.woff +0 -0
  179. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.9f256b85.woff +0 -0
  180. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.b18f59e1.ttf +0 -0
  181. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.d42a5579.woff2 +0 -0
  182. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.7c187121.woff +0 -0
  183. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.d3c882a6.woff2 +0 -0
  184. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.ed38e79f.ttf +0 -0
  185. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.b74a1a8b.ttf +0 -0
  186. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.c3fb5ac2.woff2 +0 -0
  187. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.d181c465.woff +0 -0
  188. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.6f2bb1df.woff2 +0 -0
  189. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.70d8b0a5.ttf +0 -0
  190. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.e3f82f9d.woff +0 -0
  191. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.47373d1e.ttf +0 -0
  192. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.8916142b.woff2 +0 -0
  193. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.9024d815.woff +0 -0
  194. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.0462f03b.woff2 +0 -0
  195. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.7f51fe03.woff +0 -0
  196. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.b7f8fe9b.ttf +0 -0
  197. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.572d331f.woff2 +0 -0
  198. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.a879cf83.ttf +0 -0
  199. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.f1035d8d.woff +0 -0
  200. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.5295ba48.woff +0 -0
  201. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.939bc644.ttf +0 -0
  202. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.f28c23ac.woff2 +0 -0
  203. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.8c5b5494.woff2 +0 -0
  204. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.94e1e8dc.ttf +0 -0
  205. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.bf59d231.woff +0 -0
  206. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.3b1e59b3.woff2 +0 -0
  207. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.7c9bc82b.woff +0 -0
  208. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.b4c20c84.ttf +0 -0
  209. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.74048478.woff +0 -0
  210. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.ba21ed5f.woff2 +0 -0
  211. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.d4d7ba48.ttf +0 -0
  212. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.03e9641d.woff2 +0 -0
  213. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.07505710.woff +0 -0
  214. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.fe9cbbe1.ttf +0 -0
  215. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.e1e279cb.woff +0 -0
  216. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.eae34984.woff2 +0 -0
  217. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.fabc004a.ttf +0 -0
  218. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.57727022.woff +0 -0
  219. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.5916a24f.woff2 +0 -0
  220. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.d6b476ec.ttf +0 -0
  221. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.9acaf01c.woff +0 -0
  222. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.a144ef58.ttf +0 -0
  223. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.b4230e7e.woff2 +0 -0
  224. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.10d95fd3.woff2 +0 -0
  225. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.7a996c9d.woff +0 -0
  226. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.fbccdabe.ttf +0 -0
  227. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.6258592b.woff +0 -0
  228. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.a8709e36.woff2 +0 -0
  229. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.d97aaf4a.ttf +0 -0
  230. khoj/interface/compiled/_next/static/media/a6ecd16fa044d500-s.p.woff2 +0 -0
  231. khoj/interface/compiled/_next/static/media/bd82c78e5b7b3fe9-s.p.woff2 +0 -0
  232. khoj/interface/compiled/_next/static/media/c32c8052c071fc42-s.woff2 +0 -0
  233. khoj/interface/compiled/_next/static/media/c4250770ab8708b6-s.p.woff2 +0 -0
  234. khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
  235. khoj/interface/compiled/_next/static/media/flags.3afdda2f.webp +0 -0
  236. khoj/interface/compiled/_next/static/media/flags@2x.5fbe9fc1.webp +0 -0
  237. khoj/interface/compiled/_next/static/media/globe.98e105ca.webp +0 -0
  238. khoj/interface/compiled/_next/static/media/globe@2x.974df6f8.webp +0 -0
  239. khoj/interface/compiled/agents/index.html +1 -0
  240. khoj/interface/compiled/agents/index.txt +7 -0
  241. khoj/interface/compiled/agents.svg +6 -0
  242. khoj/interface/compiled/assets/icons/khoj_lantern.ico +0 -0
  243. khoj/interface/compiled/assets/icons/khoj_lantern.svg +100 -0
  244. khoj/interface/compiled/assets/icons/khoj_lantern_1200x1200.png +0 -0
  245. khoj/interface/compiled/assets/icons/khoj_lantern_128x128.png +0 -0
  246. khoj/interface/compiled/assets/icons/khoj_lantern_128x128_dark.png +0 -0
  247. khoj/interface/compiled/assets/icons/khoj_lantern_256x256.png +0 -0
  248. khoj/interface/compiled/assets/icons/khoj_lantern_512x512.png +0 -0
  249. khoj/interface/compiled/assets/icons/khoj_lantern_logomarktype_1200x630.png +0 -0
  250. khoj/interface/compiled/assets/samples/desktop-browse-draw-sample.png +0 -0
  251. khoj/interface/compiled/assets/samples/desktop-plain-chat-sample.png +0 -0
  252. khoj/interface/compiled/assets/samples/desktop-remember-plan-sample.png +0 -0
  253. khoj/interface/compiled/assets/samples/phone-browse-draw-sample.png +0 -0
  254. khoj/interface/compiled/assets/samples/phone-plain-chat-sample.png +0 -0
  255. khoj/interface/compiled/assets/samples/phone-remember-plan-sample.png +0 -0
  256. khoj/interface/compiled/automation.svg +37 -0
  257. khoj/interface/compiled/automations/index.html +1 -0
  258. khoj/interface/compiled/automations/index.txt +8 -0
  259. khoj/interface/compiled/chat/index.html +1 -0
  260. khoj/interface/compiled/chat/index.txt +7 -0
  261. khoj/interface/compiled/chat.svg +24 -0
  262. khoj/interface/compiled/close.svg +5 -0
  263. khoj/interface/compiled/copy-button-success.svg +6 -0
  264. khoj/interface/compiled/copy-button.svg +5 -0
  265. khoj/interface/compiled/index.html +1 -0
  266. khoj/interface/compiled/index.txt +7 -0
  267. khoj/interface/compiled/khoj.webmanifest +76 -0
  268. khoj/interface/compiled/logo.svg +24 -0
  269. khoj/interface/compiled/search/index.html +1 -0
  270. khoj/interface/compiled/search/index.txt +7 -0
  271. khoj/interface/compiled/send.svg +1 -0
  272. khoj/interface/compiled/settings/index.html +1 -0
  273. khoj/interface/compiled/settings/index.txt +9 -0
  274. khoj/interface/compiled/share/chat/index.html +1 -0
  275. khoj/interface/compiled/share/chat/index.txt +7 -0
  276. khoj/interface/compiled/share.svg +8 -0
  277. khoj/interface/compiled/thumbs-down.svg +6 -0
  278. khoj/interface/compiled/thumbs-up.svg +6 -0
  279. khoj/interface/email/feedback.html +34 -0
  280. khoj/interface/email/magic_link.html +40 -0
  281. khoj/interface/email/task.html +37 -0
  282. khoj/interface/email/welcome.html +90 -0
  283. khoj/interface/web/.well-known/assetlinks.json +11 -0
  284. khoj/interface/web/assets/icons/agents.svg +19 -0
  285. khoj/interface/web/assets/icons/automation.svg +43 -0
  286. khoj/interface/web/assets/icons/chat.svg +24 -0
  287. khoj/interface/web/assets/icons/github.svg +1 -0
  288. khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
  289. khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
  290. khoj/interface/web/assets/icons/khoj-logo-sideways.svg +32 -0
  291. khoj/interface/web/assets/icons/khoj.svg +26 -0
  292. khoj/interface/web/assets/icons/logotype.svg +1 -0
  293. khoj/interface/web/assets/icons/search.svg +57 -0
  294. khoj/interface/web/assets/icons/sync.svg +4 -0
  295. khoj/interface/web/assets/khoj.css +237 -0
  296. khoj/interface/web/assets/utils.js +33 -0
  297. khoj/interface/web/base_config.html +445 -0
  298. khoj/interface/web/content_source_github_input.html +208 -0
  299. khoj/interface/web/login.html +310 -0
  300. khoj/interface/web/utils.html +48 -0
  301. khoj/main.py +249 -0
  302. khoj/manage.py +22 -0
  303. khoj/migrations/__init__.py +0 -0
  304. khoj/migrations/migrate_offline_chat_default_model.py +69 -0
  305. khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
  306. khoj/migrations/migrate_offline_chat_schema.py +83 -0
  307. khoj/migrations/migrate_offline_model.py +29 -0
  308. khoj/migrations/migrate_processor_config_openai.py +67 -0
  309. khoj/migrations/migrate_server_pg.py +132 -0
  310. khoj/migrations/migrate_version.py +17 -0
  311. khoj/processor/__init__.py +0 -0
  312. khoj/processor/content/__init__.py +0 -0
  313. khoj/processor/content/docx/__init__.py +0 -0
  314. khoj/processor/content/docx/docx_to_entries.py +111 -0
  315. khoj/processor/content/github/__init__.py +0 -0
  316. khoj/processor/content/github/github_to_entries.py +226 -0
  317. khoj/processor/content/images/__init__.py +0 -0
  318. khoj/processor/content/images/image_to_entries.py +117 -0
  319. khoj/processor/content/markdown/__init__.py +0 -0
  320. khoj/processor/content/markdown/markdown_to_entries.py +160 -0
  321. khoj/processor/content/notion/notion_to_entries.py +259 -0
  322. khoj/processor/content/org_mode/__init__.py +0 -0
  323. khoj/processor/content/org_mode/org_to_entries.py +226 -0
  324. khoj/processor/content/org_mode/orgnode.py +532 -0
  325. khoj/processor/content/pdf/__init__.py +0 -0
  326. khoj/processor/content/pdf/pdf_to_entries.py +119 -0
  327. khoj/processor/content/plaintext/__init__.py +0 -0
  328. khoj/processor/content/plaintext/plaintext_to_entries.py +117 -0
  329. khoj/processor/content/text_to_entries.py +296 -0
  330. khoj/processor/conversation/__init__.py +0 -0
  331. khoj/processor/conversation/anthropic/__init__.py +0 -0
  332. khoj/processor/conversation/anthropic/anthropic_chat.py +243 -0
  333. khoj/processor/conversation/anthropic/utils.py +217 -0
  334. khoj/processor/conversation/google/__init__.py +0 -0
  335. khoj/processor/conversation/google/gemini_chat.py +253 -0
  336. khoj/processor/conversation/google/utils.py +260 -0
  337. khoj/processor/conversation/offline/__init__.py +0 -0
  338. khoj/processor/conversation/offline/chat_model.py +308 -0
  339. khoj/processor/conversation/offline/utils.py +80 -0
  340. khoj/processor/conversation/offline/whisper.py +15 -0
  341. khoj/processor/conversation/openai/__init__.py +0 -0
  342. khoj/processor/conversation/openai/gpt.py +243 -0
  343. khoj/processor/conversation/openai/utils.py +232 -0
  344. khoj/processor/conversation/openai/whisper.py +13 -0
  345. khoj/processor/conversation/prompts.py +1188 -0
  346. khoj/processor/conversation/utils.py +867 -0
  347. khoj/processor/embeddings.py +122 -0
  348. khoj/processor/image/generate.py +215 -0
  349. khoj/processor/speech/__init__.py +0 -0
  350. khoj/processor/speech/text_to_speech.py +51 -0
  351. khoj/processor/tools/__init__.py +0 -0
  352. khoj/processor/tools/online_search.py +472 -0
  353. khoj/processor/tools/run_code.py +179 -0
  354. khoj/routers/__init__.py +0 -0
  355. khoj/routers/api.py +760 -0
  356. khoj/routers/api_agents.py +295 -0
  357. khoj/routers/api_chat.py +1273 -0
  358. khoj/routers/api_content.py +634 -0
  359. khoj/routers/api_model.py +123 -0
  360. khoj/routers/api_phone.py +86 -0
  361. khoj/routers/api_subscription.py +144 -0
  362. khoj/routers/auth.py +307 -0
  363. khoj/routers/email.py +135 -0
  364. khoj/routers/helpers.py +2333 -0
  365. khoj/routers/notion.py +85 -0
  366. khoj/routers/research.py +364 -0
  367. khoj/routers/storage.py +63 -0
  368. khoj/routers/twilio.py +36 -0
  369. khoj/routers/web_client.py +141 -0
  370. khoj/search_filter/__init__.py +0 -0
  371. khoj/search_filter/base_filter.py +15 -0
  372. khoj/search_filter/date_filter.py +215 -0
  373. khoj/search_filter/file_filter.py +32 -0
  374. khoj/search_filter/word_filter.py +29 -0
  375. khoj/search_type/__init__.py +0 -0
  376. khoj/search_type/text_search.py +255 -0
  377. khoj/utils/__init__.py +0 -0
  378. khoj/utils/cli.py +101 -0
  379. khoj/utils/config.py +81 -0
  380. khoj/utils/constants.py +51 -0
  381. khoj/utils/fs_syncer.py +252 -0
  382. khoj/utils/helpers.py +627 -0
  383. khoj/utils/initialization.py +301 -0
  384. khoj/utils/jsonl.py +43 -0
  385. khoj/utils/models.py +47 -0
  386. khoj/utils/rawconfig.py +208 -0
  387. khoj/utils/state.py +48 -0
  388. khoj/utils/yaml.py +47 -0
  389. khoj-1.33.3.dev32.dist-info/METADATA +190 -0
  390. khoj-1.33.3.dev32.dist-info/RECORD +393 -0
  391. khoj-1.33.3.dev32.dist-info/WHEEL +4 -0
  392. khoj-1.33.3.dev32.dist-info/entry_points.txt +2 -0
  393. khoj-1.33.3.dev32.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,472 @@
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import os
5
+ import urllib.parse
6
+ from collections import defaultdict
7
+ from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
8
+
9
+ import aiohttp
10
+ from bs4 import BeautifulSoup
11
+ from markdownify import markdownify
12
+
13
+ from khoj.database.adapters import ConversationAdapters
14
+ from khoj.database.models import Agent, KhojUser, ServerChatSettings, WebScraper
15
+ from khoj.processor.conversation import prompts
16
+ from khoj.routers.helpers import (
17
+ ChatEvent,
18
+ extract_relevant_info,
19
+ generate_online_subqueries,
20
+ infer_webpage_urls,
21
+ )
22
+ from khoj.utils.helpers import (
23
+ is_env_var_true,
24
+ is_internal_url,
25
+ is_internet_connected,
26
+ is_none_or_empty,
27
+ timer,
28
+ )
29
+ from khoj.utils.rawconfig import LocationData
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+ SERPER_DEV_API_KEY = os.getenv("SERPER_DEV_API_KEY")
34
+ SERPER_DEV_URL = "https://google.serper.dev/search"
35
+
36
+ JINA_SEARCH_API_URL = "https://s.jina.ai/"
37
+ JINA_API_KEY = os.getenv("JINA_API_KEY")
38
+
39
+ FIRECRAWL_USE_LLM_EXTRACT = is_env_var_true("FIRECRAWL_USE_LLM_EXTRACT")
40
+
41
+ OLOSTEP_QUERY_PARAMS = {
42
+ "timeout": 35, # seconds
43
+ "waitBeforeScraping": 0, # seconds
44
+ "saveHtml": "False",
45
+ "saveMarkdown": "True",
46
+ "removeCSSselectors": "default",
47
+ "htmlTransformer": "none",
48
+ "removeImages": "True",
49
+ "fastLane": "True",
50
+ # Similar to Stripe's API, the expand parameters avoid the need to make a second API call
51
+ # to retrieve the dataset (from the dataset API) if you only need the markdown or html.
52
+ "expandMarkdown": "True",
53
+ "expandHtml": "False",
54
+ }
55
+
56
+ DEFAULT_MAX_WEBPAGES_TO_READ = 1
57
+ MAX_WEBPAGES_TO_INFER = 10
58
+
59
+
60
+ async def search_online(
61
+ query: str,
62
+ conversation_history: dict,
63
+ location: LocationData,
64
+ user: KhojUser,
65
+ send_status_func: Optional[Callable] = None,
66
+ custom_filters: List[str] = [],
67
+ max_webpages_to_read: int = DEFAULT_MAX_WEBPAGES_TO_READ,
68
+ query_images: List[str] = None,
69
+ previous_subqueries: Set = set(),
70
+ agent: Agent = None,
71
+ query_files: str = None,
72
+ tracer: dict = {},
73
+ ):
74
+ query += " ".join(custom_filters)
75
+ if not is_internet_connected():
76
+ logger.warning("Cannot search online as not connected to internet")
77
+ yield {}
78
+ return
79
+
80
+ # Breakdown the query into subqueries to get the correct answer
81
+ new_subqueries = await generate_online_subqueries(
82
+ query,
83
+ conversation_history,
84
+ location,
85
+ user,
86
+ query_images=query_images,
87
+ agent=agent,
88
+ tracer=tracer,
89
+ query_files=query_files,
90
+ )
91
+ subqueries = list(new_subqueries - previous_subqueries)
92
+ response_dict: Dict[str, Dict[str, List[Dict] | Dict]] = {}
93
+
94
+ if is_none_or_empty(subqueries):
95
+ logger.info("No new subqueries to search online")
96
+ yield response_dict
97
+ return
98
+
99
+ logger.info(f"🌐 Searching the Internet for {subqueries}")
100
+ if send_status_func:
101
+ subqueries_str = "\n- " + "\n- ".join(subqueries)
102
+ async for event in send_status_func(f"**Searching the Internet for**: {subqueries_str}"):
103
+ yield {ChatEvent.STATUS: event}
104
+
105
+ if SERPER_DEV_API_KEY:
106
+ search_func = search_with_serper
107
+ elif JINA_API_KEY:
108
+ search_func = search_with_jina
109
+ else:
110
+ search_func = search_with_searxng
111
+
112
+ with timer(f"Internet searches for {subqueries} took", logger):
113
+ search_tasks = [search_func(subquery, location) for subquery in subqueries]
114
+ search_results = await asyncio.gather(*search_tasks)
115
+ response_dict = {subquery: search_result for subquery, search_result in search_results}
116
+
117
+ # Gather distinct web pages from organic results for subqueries without an instant answer.
118
+ webpages: Dict[str, Dict] = {}
119
+ for subquery in response_dict:
120
+ if "answerBox" in response_dict[subquery]:
121
+ continue
122
+ for idx, organic in enumerate(response_dict[subquery].get("organic", [])):
123
+ link = organic.get("link")
124
+ if link in webpages and idx < max_webpages_to_read:
125
+ webpages[link]["queries"].add(subquery)
126
+ # Content of web pages is directly available when Jina is used for search.
127
+ elif idx < max_webpages_to_read:
128
+ webpages[link] = {"queries": {subquery}, "content": organic.get("content")}
129
+ # Only keep webpage content for up to max_webpages_to_read organic results.
130
+ if idx >= max_webpages_to_read and not is_none_or_empty(organic.get("content")):
131
+ organic["content"] = None
132
+ response_dict[subquery]["organic"][idx] = organic
133
+
134
+ # Read, extract relevant info from the retrieved web pages
135
+ if webpages:
136
+ logger.info(f"Reading web pages at: {webpages.keys()}")
137
+ if send_status_func:
138
+ webpage_links_str = "\n- " + "\n- ".join(webpages.keys())
139
+ async for event in send_status_func(f"**Reading web pages**: {webpage_links_str}"):
140
+ yield {ChatEvent.STATUS: event}
141
+ tasks = [
142
+ read_webpage_and_extract_content(
143
+ data["queries"], link, data.get("content"), user=user, agent=agent, tracer=tracer
144
+ )
145
+ for link, data in webpages.items()
146
+ ]
147
+ results = await asyncio.gather(*tasks)
148
+
149
+ # Collect extracted info from the retrieved web pages
150
+ for subqueries, url, webpage_extract in results:
151
+ if webpage_extract is not None:
152
+ response_dict[subqueries.pop()]["webpages"] = {"link": url, "snippet": webpage_extract}
153
+
154
+ yield response_dict
155
+
156
+
157
+ async def search_with_searxng(query: str, location: LocationData) -> Tuple[str, Dict[str, List[Dict]]]:
158
+ """Search using local SearXNG instance."""
159
+ # Use environment variable or default to localhost
160
+ searxng_url = os.getenv("KHOJ_SEARXNG_URL", "http://localhost:42113")
161
+ search_url = f"{searxng_url}/search"
162
+ country_code = location.country_code.lower() if location and location.country_code else "us"
163
+
164
+ params = {"q": query, "format": "html", "language": "en", "country": country_code, "categories": "general"}
165
+
166
+ async with aiohttp.ClientSession() as session:
167
+ try:
168
+ async with session.get(search_url, params=params) as response:
169
+ if response.status != 200:
170
+ logger.error(f"SearXNG search failed to call {searxng_url}: {await response.text()}")
171
+ return query, {}
172
+
173
+ html_content = await response.text()
174
+
175
+ soup = BeautifulSoup(html_content, "html.parser")
176
+ organic_results = []
177
+
178
+ for result in soup.find_all("article", class_="result"):
179
+ title_elem = result.find("a", rel="noreferrer")
180
+ if title_elem:
181
+ title = title_elem.text.strip()
182
+ link = title_elem["href"]
183
+
184
+ description_elem = result.find("p", class_="content")
185
+ description = description_elem.text.strip() if description_elem else None
186
+
187
+ organic_results.append({"title": title, "link": link, "description": description})
188
+
189
+ extracted_search_result = {"organic": organic_results}
190
+
191
+ return query, extracted_search_result
192
+
193
+ except Exception as e:
194
+ logger.error(f"Error searching with SearXNG: {str(e)}")
195
+ return query, {}
196
+
197
+
198
+ async def search_with_serper(query: str, location: LocationData) -> Tuple[str, Dict[str, List[Dict]]]:
199
+ country_code = location.country_code.lower() if location and location.country_code else "us"
200
+ payload = json.dumps({"q": query, "gl": country_code})
201
+ headers = {"X-API-KEY": SERPER_DEV_API_KEY, "Content-Type": "application/json"}
202
+
203
+ async with aiohttp.ClientSession() as session:
204
+ async with session.post(SERPER_DEV_URL, headers=headers, data=payload) as response:
205
+ if response.status != 200:
206
+ logger.error(await response.text())
207
+ return query, {}
208
+ json_response = await response.json()
209
+ extraction_fields = ["organic", "answerBox", "peopleAlsoAsk", "knowledgeGraph"]
210
+ extracted_search_result = {
211
+ field: json_response[field]
212
+ for field in extraction_fields
213
+ if not is_none_or_empty(json_response.get(field))
214
+ }
215
+
216
+ return query, extracted_search_result
217
+
218
+
219
+ async def read_webpages(
220
+ query: str,
221
+ conversation_history: dict,
222
+ location: LocationData,
223
+ user: KhojUser,
224
+ send_status_func: Optional[Callable] = None,
225
+ query_images: List[str] = None,
226
+ agent: Agent = None,
227
+ max_webpages_to_read: int = DEFAULT_MAX_WEBPAGES_TO_READ,
228
+ query_files: str = None,
229
+ tracer: dict = {},
230
+ ):
231
+ "Infer web pages to read from the query and extract relevant information from them"
232
+ logger.info(f"Inferring web pages to read")
233
+ urls = await infer_webpage_urls(
234
+ query,
235
+ conversation_history,
236
+ location,
237
+ user,
238
+ query_images,
239
+ agent=agent,
240
+ query_files=query_files,
241
+ tracer=tracer,
242
+ )
243
+
244
+ # Get the top 10 web pages to read
245
+ urls = urls[:max_webpages_to_read]
246
+
247
+ logger.info(f"Reading web pages at: {urls}")
248
+ if send_status_func:
249
+ webpage_links_str = "\n- " + "\n- ".join(list(urls))
250
+ async for event in send_status_func(f"**Reading web pages**: {webpage_links_str}"):
251
+ yield {ChatEvent.STATUS: event}
252
+ tasks = [read_webpage_and_extract_content({query}, url, user=user, agent=agent, tracer=tracer) for url in urls]
253
+ results = await asyncio.gather(*tasks)
254
+
255
+ response: Dict[str, Dict] = defaultdict(dict)
256
+ response[query]["webpages"] = [
257
+ {"query": qs.pop(), "link": url, "snippet": extract} for qs, url, extract in results if extract is not None
258
+ ]
259
+ yield response
260
+
261
+
262
+ async def read_webpage(
263
+ url, scraper_type=None, api_key=None, api_url=None, subqueries=None, agent=None
264
+ ) -> Tuple[str | None, str | None]:
265
+ if scraper_type == WebScraper.WebScraperType.FIRECRAWL and FIRECRAWL_USE_LLM_EXTRACT:
266
+ return None, await query_webpage_with_firecrawl(url, subqueries, api_key, api_url, agent)
267
+ elif scraper_type == WebScraper.WebScraperType.FIRECRAWL:
268
+ return await read_webpage_with_firecrawl(url, api_key, api_url), None
269
+ elif scraper_type == WebScraper.WebScraperType.OLOSTEP:
270
+ return await read_webpage_with_olostep(url, api_key, api_url), None
271
+ elif scraper_type == WebScraper.WebScraperType.JINA:
272
+ return await read_webpage_with_jina(url, api_key, api_url), None
273
+ else:
274
+ return await read_webpage_at_url(url), None
275
+
276
+
277
+ async def read_webpage_and_extract_content(
278
+ subqueries: set[str],
279
+ url: str,
280
+ content: str = None,
281
+ user: KhojUser = None,
282
+ agent: Agent = None,
283
+ tracer: dict = {},
284
+ ) -> Tuple[set[str], str, Union[None, str]]:
285
+ # Select the web scrapers to use for reading the web page
286
+ web_scrapers = await ConversationAdapters.aget_enabled_webscrapers()
287
+ # Only use the direct web scraper for internal URLs
288
+ if is_internal_url(url):
289
+ web_scrapers = [scraper for scraper in web_scrapers if scraper.type == WebScraper.WebScraperType.DIRECT]
290
+
291
+ # Fallback through enabled web scrapers until we successfully read the web page
292
+ extracted_info = None
293
+ for scraper in web_scrapers:
294
+ try:
295
+ # Read the web page
296
+ if is_none_or_empty(content):
297
+ with timer(f"Reading web page with {scraper.type} at '{url}' took", logger, log_level=logging.INFO):
298
+ content, extracted_info = await read_webpage(
299
+ url, scraper.type, scraper.api_key, scraper.api_url, subqueries, agent
300
+ )
301
+
302
+ # Extract relevant information from the web page
303
+ if is_none_or_empty(extracted_info):
304
+ with timer(f"Extracting relevant information from web page at '{url}' took", logger):
305
+ extracted_info = await extract_relevant_info(
306
+ subqueries, content, user=user, agent=agent, tracer=tracer
307
+ )
308
+
309
+ # If we successfully extracted information, break the loop
310
+ if not is_none_or_empty(extracted_info):
311
+ break
312
+ except Exception as e:
313
+ logger.warning(f"Failed to read web page with {scraper.type} at '{url}' with {e}")
314
+ # If this is the last web scraper in the list, log an error
315
+ if scraper.name == web_scrapers[-1].name:
316
+ logger.error(f"All web scrapers failed for '{url}'")
317
+
318
+ return subqueries, url, extracted_info
319
+
320
+
321
+ async def read_webpage_at_url(web_url: str) -> str:
322
+ headers = {
323
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
324
+ }
325
+
326
+ async with aiohttp.ClientSession() as session:
327
+ async with session.get(web_url, headers=headers, timeout=30) as response:
328
+ response.raise_for_status()
329
+ html = await response.text()
330
+ parsed_html = BeautifulSoup(html, "html.parser")
331
+ body = parsed_html.body.get_text(separator="\n", strip=True)
332
+ return markdownify(body)
333
+
334
+
335
+ async def read_webpage_with_olostep(web_url: str, api_key: str, api_url: str) -> str:
336
+ headers = {"Authorization": f"Bearer {api_key}"}
337
+ web_scraping_params: Dict[str, Union[str, int, bool]] = OLOSTEP_QUERY_PARAMS.copy() # type: ignore
338
+ web_scraping_params["url"] = web_url
339
+
340
+ async with aiohttp.ClientSession() as session:
341
+ async with session.get(api_url, params=web_scraping_params, headers=headers) as response:
342
+ response.raise_for_status()
343
+ response_json = await response.json()
344
+ return response_json["markdown_content"]
345
+
346
+
347
+ async def read_webpage_with_jina(web_url: str, api_key: str, api_url: str) -> str:
348
+ jina_reader_api_url = f"{api_url}/{web_url}"
349
+ headers = {"Accept": "application/json", "X-Timeout": "30"}
350
+ if api_key:
351
+ headers["Authorization"] = f"Bearer {api_key}"
352
+
353
+ async with aiohttp.ClientSession() as session:
354
+ async with session.get(jina_reader_api_url, headers=headers) as response:
355
+ response.raise_for_status()
356
+ response_json = await response.json()
357
+ return response_json["data"]["content"]
358
+
359
+
360
+ async def read_webpage_with_firecrawl(web_url: str, api_key: str, api_url: str) -> str:
361
+ firecrawl_api_url = f"{api_url}/v1/scrape"
362
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
363
+ params = {"url": web_url, "formats": ["markdown"], "excludeTags": ["script", ".ad"]}
364
+
365
+ async with aiohttp.ClientSession() as session:
366
+ async with session.post(firecrawl_api_url, json=params, headers=headers) as response:
367
+ response.raise_for_status()
368
+ response_json = await response.json()
369
+ return response_json["data"]["markdown"]
370
+
371
+
372
+ async def query_webpage_with_firecrawl(
373
+ web_url: str, queries: set[str], api_key: str, api_url: str, agent: Agent = None
374
+ ) -> str:
375
+ firecrawl_api_url = f"{api_url}/v1/scrape"
376
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
377
+ schema = {
378
+ "type": "object",
379
+ "properties": {
380
+ "relevant_extract": {"type": "string"},
381
+ },
382
+ "required": [
383
+ "relevant_extract",
384
+ ],
385
+ }
386
+
387
+ personality_context = (
388
+ prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
389
+ )
390
+ system_prompt = f"""
391
+ {prompts.system_prompt_extract_relevant_information}
392
+
393
+ {personality_context}
394
+ User Query: {", ".join(queries)}
395
+
396
+ Collate only relevant information from the website to answer the target query and in the provided JSON schema.
397
+ """.strip()
398
+
399
+ params = {"url": web_url, "formats": ["extract"], "extract": {"systemPrompt": system_prompt, "schema": schema}}
400
+
401
+ async with aiohttp.ClientSession() as session:
402
+ async with session.post(firecrawl_api_url, json=params, headers=headers) as response:
403
+ response.raise_for_status()
404
+ response_json = await response.json()
405
+ return response_json["data"]["extract"]["relevant_extract"]
406
+
407
+
408
+ async def search_with_jina(query: str, location: LocationData) -> Tuple[str, Dict[str, List[Dict]]]:
409
+ encoded_query = urllib.parse.quote(query)
410
+ jina_search_api_url = f"{JINA_SEARCH_API_URL}/{encoded_query}"
411
+ headers = {"Accept": "application/json"}
412
+
413
+ # First check for jina scraper configuration in database
414
+ default_jina_scraper = (
415
+ await ServerChatSettings.objects.filter()
416
+ .prefetch_related("web_scraper")
417
+ .filter(web_scraper__type=WebScraper.WebScraperType.JINA)
418
+ .afirst()
419
+ )
420
+ if default_jina_scraper and default_jina_scraper.web_scraper:
421
+ jina_scraper = default_jina_scraper.web_scraper
422
+ else:
423
+ # Fallback to first configured Jina scraper in DB if no server settings
424
+ jina_scraper = await WebScraper.objects.filter(type=WebScraper.WebScraperType.JINA).afirst()
425
+
426
+ # Get API key from DB scraper config or environment variable
427
+ api_key = jina_scraper.api_key if jina_scraper and jina_scraper.api_key else JINA_API_KEY
428
+
429
+ if api_key:
430
+ headers["Authorization"] = f"Bearer {api_key}"
431
+
432
+ async with aiohttp.ClientSession() as session:
433
+ async with session.get(jina_search_api_url, headers=headers) as response:
434
+ if response.status != 200:
435
+ error_text = await response.text()
436
+ logger.error(f"Jina search failed: {error_text}")
437
+ return query, {}
438
+ response_json = await response.json()
439
+ parsed_response = [
440
+ {
441
+ "title": item["title"],
442
+ "content": item.get("content"),
443
+ # rename description -> snippet for consistency
444
+ "snippet": item["description"],
445
+ # rename url -> link for consistency
446
+ "link": item["url"],
447
+ }
448
+ for item in response_json["data"]
449
+ ]
450
+ return query, {"organic": parsed_response}
451
+
452
+
453
+ def deduplicate_organic_results(online_results: dict) -> dict:
454
+ """Deduplicate organic search results based on links across all queries."""
455
+ # Keep track of seen links to filter out duplicates across queries
456
+ seen_links = set()
457
+ deduplicated_results = {}
458
+
459
+ # Process each query's results
460
+ for query, results in online_results.items():
461
+ # Filter organic results keeping only first occurrence of each link
462
+ filtered_organic = []
463
+ for result in results.get("organic", []):
464
+ link = result.get("link")
465
+ if link and link not in seen_links:
466
+ seen_links.add(link)
467
+ filtered_organic.append(result)
468
+
469
+ # Update results with deduplicated organic entries
470
+ deduplicated_results[query] = {**results, "organic": filtered_organic}
471
+
472
+ return deduplicated_results
@@ -0,0 +1,179 @@
1
+ import base64
2
+ import datetime
3
+ import logging
4
+ import mimetypes
5
+ import os
6
+ from pathlib import Path
7
+ from typing import Any, Callable, List, NamedTuple, Optional
8
+
9
+ import aiohttp
10
+
11
+ from khoj.database.adapters import FileObjectAdapters
12
+ from khoj.database.models import Agent, FileObject, KhojUser
13
+ from khoj.processor.conversation import prompts
14
+ from khoj.processor.conversation.utils import (
15
+ ChatEvent,
16
+ clean_code_python,
17
+ construct_chat_history,
18
+ load_complex_json,
19
+ )
20
+ from khoj.routers.helpers import send_message_to_model_wrapper
21
+ from khoj.utils.helpers import is_none_or_empty, timer, truncate_code_context
22
+ from khoj.utils.rawconfig import LocationData
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ SANDBOX_URL = os.getenv("KHOJ_TERRARIUM_URL", "http://localhost:8080")
28
+
29
+
30
+ class GeneratedCode(NamedTuple):
31
+ code: str
32
+ input_files: List[str]
33
+ input_links: List[str]
34
+
35
+
36
+ async def run_code(
37
+ query: str,
38
+ conversation_history: dict,
39
+ context: str,
40
+ location_data: LocationData,
41
+ user: KhojUser,
42
+ send_status_func: Optional[Callable] = None,
43
+ query_images: List[str] = None,
44
+ agent: Agent = None,
45
+ sandbox_url: str = SANDBOX_URL,
46
+ query_files: str = None,
47
+ tracer: dict = {},
48
+ ):
49
+ # Generate Code
50
+ if send_status_func:
51
+ async for event in send_status_func(f"**Generate code snippet** for {query}"):
52
+ yield {ChatEvent.STATUS: event}
53
+ try:
54
+ with timer("Chat actor: Generate programs to execute", logger):
55
+ generated_code = await generate_python_code(
56
+ query,
57
+ conversation_history,
58
+ context,
59
+ location_data,
60
+ user,
61
+ query_images,
62
+ agent,
63
+ tracer,
64
+ query_files,
65
+ )
66
+ except Exception as e:
67
+ raise ValueError(f"Failed to generate code for {query} with error: {e}")
68
+
69
+ # Prepare Input Data
70
+ input_data = []
71
+ user_input_files: List[FileObject] = []
72
+ for input_file in generated_code.input_files:
73
+ user_input_files += await FileObjectAdapters.aget_file_objects_by_name(user, input_file)
74
+ for f in user_input_files:
75
+ input_data.append(
76
+ {
77
+ "filename": os.path.basename(f.file_name),
78
+ "b64_data": base64.b64encode(f.raw_text.encode("utf-8")).decode("utf-8"),
79
+ }
80
+ )
81
+
82
+ # Run Code
83
+ if send_status_func:
84
+ async for event in send_status_func(f"**Running code snippet**"):
85
+ yield {ChatEvent.STATUS: event}
86
+ try:
87
+ with timer("Chat actor: Execute generated program", logger, log_level=logging.INFO):
88
+ result = await execute_sandboxed_python(generated_code.code, input_data, sandbox_url)
89
+ code = result.pop("code")
90
+ cleaned_result = truncate_code_context({"cleaned": {"results": result}})["cleaned"]["results"]
91
+ logger.info(f"Executed Code\n----\n{code}\n----\nResult\n----\n{cleaned_result}\n----")
92
+ yield {query: {"code": code, "results": result}}
93
+ except Exception as e:
94
+ raise ValueError(f"Failed to run code for {query} with error: {e}")
95
+
96
+
97
+ async def generate_python_code(
98
+ q: str,
99
+ conversation_history: dict,
100
+ context: str,
101
+ location_data: LocationData,
102
+ user: KhojUser,
103
+ query_images: list[str] = None,
104
+ agent: Agent = None,
105
+ tracer: dict = {},
106
+ query_files: str = None,
107
+ ) -> GeneratedCode:
108
+ location = f"{location_data}" if location_data else "Unknown"
109
+ username = prompts.user_name.format(name=user.get_full_name()) if user.get_full_name() else ""
110
+ chat_history = construct_chat_history(conversation_history)
111
+
112
+ utc_date = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d")
113
+ personality_context = (
114
+ prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
115
+ )
116
+
117
+ code_generation_prompt = prompts.python_code_generation_prompt.format(
118
+ current_date=utc_date,
119
+ query=q,
120
+ chat_history=chat_history,
121
+ context=context,
122
+ location=location,
123
+ username=username,
124
+ personality_context=personality_context,
125
+ )
126
+
127
+ response = await send_message_to_model_wrapper(
128
+ code_generation_prompt,
129
+ query_images=query_images,
130
+ response_type="json_object",
131
+ user=user,
132
+ tracer=tracer,
133
+ query_files=query_files,
134
+ )
135
+
136
+ # Validate that the response is a non-empty, JSON-serializable list
137
+ response = load_complex_json(response)
138
+ code = response.get("code", "").strip()
139
+ input_files = response.get("input_files", [])
140
+ input_links = response.get("input_links", [])
141
+
142
+ if not isinstance(code, str) or is_none_or_empty(code):
143
+ raise ValueError
144
+ return GeneratedCode(code, input_files, input_links)
145
+
146
+
147
+ async def execute_sandboxed_python(code: str, input_data: list[dict], sandbox_url: str = SANDBOX_URL) -> dict[str, Any]:
148
+ """
149
+ Takes code to run as a string and calls the terrarium API to execute it.
150
+ Returns the result of the code execution as a dictionary.
151
+
152
+ Reference data i/o format based on Terrarium example client code at:
153
+ https://github.com/cohere-ai/cohere-terrarium/blob/main/example-clients/python/terrarium_client.py
154
+ """
155
+ headers = {"Content-Type": "application/json"}
156
+ cleaned_code = clean_code_python(code)
157
+ data = {"code": cleaned_code, "files": input_data}
158
+
159
+ async with aiohttp.ClientSession() as session:
160
+ async with session.post(sandbox_url, json=data, headers=headers) as response:
161
+ if response.status == 200:
162
+ result: dict[str, Any] = await response.json()
163
+ result["code"] = cleaned_code
164
+ # Store decoded output files
165
+ result["output_files"] = result.get("output_files", [])
166
+ for output_file in result["output_files"]:
167
+ # Decode text files as UTF-8
168
+ if mimetypes.guess_type(output_file["filename"])[0].startswith("text/") or Path(
169
+ output_file["filename"]
170
+ ).suffix in [".org", ".md", ".json"]:
171
+ output_file["b64_data"] = base64.b64decode(output_file["b64_data"]).decode("utf-8")
172
+ return result
173
+ else:
174
+ return {
175
+ "code": cleaned_code,
176
+ "success": False,
177
+ "std_err": f"Failed to execute code with {response.status}",
178
+ "output_files": [],
179
+ }
File without changes