khoj 1.33.3.dev32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. khoj/__init__.py +0 -0
  2. khoj/app/README.md +94 -0
  3. khoj/app/__init__.py +0 -0
  4. khoj/app/asgi.py +16 -0
  5. khoj/app/settings.py +218 -0
  6. khoj/app/urls.py +25 -0
  7. khoj/configure.py +452 -0
  8. khoj/database/__init__.py +0 -0
  9. khoj/database/adapters/__init__.py +1821 -0
  10. khoj/database/admin.py +417 -0
  11. khoj/database/apps.py +6 -0
  12. khoj/database/management/__init__.py +0 -0
  13. khoj/database/management/commands/__init__.py +0 -0
  14. khoj/database/management/commands/change_default_model.py +116 -0
  15. khoj/database/management/commands/change_generated_images_url.py +61 -0
  16. khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
  17. khoj/database/migrations/0001_khojuser.py +98 -0
  18. khoj/database/migrations/0002_googleuser.py +32 -0
  19. khoj/database/migrations/0003_vector_extension.py +10 -0
  20. khoj/database/migrations/0004_content_types_and_more.py +181 -0
  21. khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
  22. khoj/database/migrations/0006_embeddingsdates.py +33 -0
  23. khoj/database/migrations/0007_add_conversation.py +27 -0
  24. khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
  25. khoj/database/migrations/0009_khojapiuser.py +24 -0
  26. khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
  27. khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
  28. khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
  29. khoj/database/migrations/0012_entry_file_source.py +21 -0
  30. khoj/database/migrations/0013_subscription.py +37 -0
  31. khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
  32. khoj/database/migrations/0015_alter_subscription_user.py +21 -0
  33. khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
  34. khoj/database/migrations/0017_searchmodel.py +32 -0
  35. khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
  36. khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
  37. khoj/database/migrations/0020_reflectivequestion.py +36 -0
  38. khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
  39. khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
  40. khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
  41. khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
  42. khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
  43. khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
  44. khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
  45. khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
  46. khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
  47. khoj/database/migrations/0029_userrequests.py +27 -0
  48. khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
  49. khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
  50. khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
  51. khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
  52. khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
  53. khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
  54. khoj/database/migrations/0035_processlock.py +26 -0
  55. khoj/database/migrations/0036_alter_processlock_name.py +19 -0
  56. khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
  57. khoj/database/migrations/0036_publicconversation.py +42 -0
  58. khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
  59. khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
  60. khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
  61. khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
  62. khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
  63. khoj/database/migrations/0040_alter_processlock_name.py +26 -0
  64. khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
  65. khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
  66. khoj/database/migrations/0042_serverchatsettings.py +46 -0
  67. khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
  68. khoj/database/migrations/0044_conversation_file_filters.py +17 -0
  69. khoj/database/migrations/0045_fileobject.py +37 -0
  70. khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
  71. khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
  72. khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
  73. khoj/database/migrations/0049_datastore.py +38 -0
  74. khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
  75. khoj/database/migrations/0050_alter_processlock_name.py +25 -0
  76. khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
  77. khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
  78. khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
  79. khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
  80. khoj/database/migrations/0055_alter_agent_style_icon.py +37 -0
  81. khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
  82. khoj/database/migrations/0056_searchmodelconfig_cross_encoder_model_config.py +17 -0
  83. khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
  84. khoj/database/migrations/0057_remove_serverchatsettings_default_model_and_more.py +51 -0
  85. khoj/database/migrations/0058_alter_chatmodeloptions_chat_model.py +17 -0
  86. khoj/database/migrations/0059_searchmodelconfig_bi_encoder_confidence_threshold.py +17 -0
  87. khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
  88. khoj/database/migrations/0061_alter_chatmodeloptions_model_type.py +26 -0
  89. khoj/database/migrations/0061_alter_texttoimagemodelconfig_model_type.py +21 -0
  90. khoj/database/migrations/0062_merge_20240913_0222.py +14 -0
  91. khoj/database/migrations/0063_conversation_temp_id.py +36 -0
  92. khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +86 -0
  93. khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
  94. khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
  95. khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
  96. khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
  97. khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
  98. khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
  99. khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
  100. khoj/database/migrations/0072_entry_search_model.py +24 -0
  101. khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
  102. khoj/database/migrations/0074_alter_conversation_title.py +17 -0
  103. khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +85 -0
  104. khoj/database/migrations/0076_rename_openaiprocessorconversationconfig_aimodelapi_and_more.py +26 -0
  105. khoj/database/migrations/0077_chatmodel_alter_agent_chat_model_and_more.py +62 -0
  106. khoj/database/migrations/0078_khojuser_email_verification_code_expiry.py +17 -0
  107. khoj/database/migrations/__init__.py +0 -0
  108. khoj/database/models/__init__.py +725 -0
  109. khoj/database/tests.py +3 -0
  110. khoj/interface/compiled/404/index.html +1 -0
  111. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_buildManifest.js +1 -0
  112. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_ssgManifest.js +1 -0
  113. khoj/interface/compiled/_next/static/chunks/1010-8f39bb4648b5ba10.js +1 -0
  114. khoj/interface/compiled/_next/static/chunks/182-f1c48a203dc91e0e.js +20 -0
  115. khoj/interface/compiled/_next/static/chunks/1915-d3c36ad6ce697ce7.js +1 -0
  116. khoj/interface/compiled/_next/static/chunks/2117-165ef4747a5b836b.js +2 -0
  117. khoj/interface/compiled/_next/static/chunks/2581-455000f8aeb08fc3.js +1 -0
  118. khoj/interface/compiled/_next/static/chunks/3727.dcea8f2193111552.js +1 -0
  119. khoj/interface/compiled/_next/static/chunks/3789-a09e37a819171a9d.js +1 -0
  120. khoj/interface/compiled/_next/static/chunks/4124-6c28322ce218d2d5.js +1 -0
  121. khoj/interface/compiled/_next/static/chunks/5427-b52d95253e692bfa.js +1 -0
  122. khoj/interface/compiled/_next/static/chunks/5473-b1cf56dedac6577a.js +1 -0
  123. khoj/interface/compiled/_next/static/chunks/5477-0bbddb79c25a54a7.js +1 -0
  124. khoj/interface/compiled/_next/static/chunks/6065-64db9ad305ba0bcd.js +1 -0
  125. khoj/interface/compiled/_next/static/chunks/6293-469dd16402ea8a6f.js +3 -0
  126. khoj/interface/compiled/_next/static/chunks/688-b5b4391bbc0376f1.js +1 -0
  127. khoj/interface/compiled/_next/static/chunks/8667-b6bf63c72b2d76eb.js +1 -0
  128. khoj/interface/compiled/_next/static/chunks/9259-1172dbaca0515237.js +1 -0
  129. khoj/interface/compiled/_next/static/chunks/94ca1967.1d9b42d929a1ee8c.js +1 -0
  130. khoj/interface/compiled/_next/static/chunks/9597.83583248dfbf6e73.js +1 -0
  131. khoj/interface/compiled/_next/static/chunks/964ecbae.51d6faf8801d15e6.js +1 -0
  132. khoj/interface/compiled/_next/static/chunks/9665-391df1e5c51c960a.js +1 -0
  133. khoj/interface/compiled/_next/static/chunks/app/_not-found/page-a834eddae3e235df.js +1 -0
  134. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
  135. khoj/interface/compiled/_next/static/chunks/app/agents/page-28ce086a1129bca2.js +1 -0
  136. khoj/interface/compiled/_next/static/chunks/app/automations/layout-1fe1537449f43496.js +1 -0
  137. khoj/interface/compiled/_next/static/chunks/app/automations/page-bf365a60829d347f.js +1 -0
  138. khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
  139. khoj/interface/compiled/_next/static/chunks/app/chat/page-0e476e57eb2015e3.js +1 -0
  140. khoj/interface/compiled/_next/static/chunks/app/layout-30e7fda7262713ce.js +1 -0
  141. khoj/interface/compiled/_next/static/chunks/app/page-a5515ea71aec5ef0.js +1 -0
  142. khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
  143. khoj/interface/compiled/_next/static/chunks/app/search/page-9140541e67ea307d.js +1 -0
  144. khoj/interface/compiled/_next/static/chunks/app/settings/layout-d09d6510a45cd4bd.js +1 -0
  145. khoj/interface/compiled/_next/static/chunks/app/settings/page-951ba40b5b94b23a.js +1 -0
  146. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
  147. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1beb80d8d741c932.js +1 -0
  148. khoj/interface/compiled/_next/static/chunks/d3ac728e-44ebd2a0c99b12a0.js +1 -0
  149. khoj/interface/compiled/_next/static/chunks/fd9d1056-4482b99a36fd1673.js +1 -0
  150. khoj/interface/compiled/_next/static/chunks/framework-8e0e0f4a6b83a956.js +1 -0
  151. khoj/interface/compiled/_next/static/chunks/main-app-de1f09df97a3cfc7.js +1 -0
  152. khoj/interface/compiled/_next/static/chunks/main-db4bfac6b0a8d00b.js +1 -0
  153. khoj/interface/compiled/_next/static/chunks/pages/_app-3c9ca398d360b709.js +1 -0
  154. khoj/interface/compiled/_next/static/chunks/pages/_error-cf5ca766ac8f493f.js +1 -0
  155. khoj/interface/compiled/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
  156. khoj/interface/compiled/_next/static/chunks/webpack-a03962458328b163.js +1 -0
  157. khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +1 -0
  158. khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
  159. khoj/interface/compiled/_next/static/css/4e4e6a4a1c920d06.css +1 -0
  160. khoj/interface/compiled/_next/static/css/8d02837c730f8d13.css +25 -0
  161. khoj/interface/compiled/_next/static/css/8e6a3ca11a60b189.css +1 -0
  162. khoj/interface/compiled/_next/static/css/9c164d9727dd8092.css +1 -0
  163. khoj/interface/compiled/_next/static/css/dac88c17aaee5fcf.css +1 -0
  164. khoj/interface/compiled/_next/static/css/df4b47a2d0d85eae.css +1 -0
  165. khoj/interface/compiled/_next/static/css/e4eb883b5265d372.css +1 -0
  166. khoj/interface/compiled/_next/static/media/1d8a05b60287ae6c-s.p.woff2 +0 -0
  167. khoj/interface/compiled/_next/static/media/6f22fce21a7c433c-s.woff2 +0 -0
  168. khoj/interface/compiled/_next/static/media/77c207b095007c34-s.p.woff2 +0 -0
  169. khoj/interface/compiled/_next/static/media/82ef96de0e8f4d8c-s.p.woff2 +0 -0
  170. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.1608a09b.woff +0 -0
  171. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.4aafdb68.ttf +0 -0
  172. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.a79f1c31.woff2 +0 -0
  173. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.b6770918.woff +0 -0
  174. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.cce5b8ec.ttf +0 -0
  175. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.ec17d132.woff2 +0 -0
  176. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.07ef19e7.ttf +0 -0
  177. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.55fac258.woff2 +0 -0
  178. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.dad44a7f.woff +0 -0
  179. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.9f256b85.woff +0 -0
  180. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.b18f59e1.ttf +0 -0
  181. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.d42a5579.woff2 +0 -0
  182. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.7c187121.woff +0 -0
  183. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.d3c882a6.woff2 +0 -0
  184. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.ed38e79f.ttf +0 -0
  185. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.b74a1a8b.ttf +0 -0
  186. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.c3fb5ac2.woff2 +0 -0
  187. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.d181c465.woff +0 -0
  188. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.6f2bb1df.woff2 +0 -0
  189. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.70d8b0a5.ttf +0 -0
  190. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.e3f82f9d.woff +0 -0
  191. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.47373d1e.ttf +0 -0
  192. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.8916142b.woff2 +0 -0
  193. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.9024d815.woff +0 -0
  194. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.0462f03b.woff2 +0 -0
  195. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.7f51fe03.woff +0 -0
  196. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.b7f8fe9b.ttf +0 -0
  197. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.572d331f.woff2 +0 -0
  198. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.a879cf83.ttf +0 -0
  199. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.f1035d8d.woff +0 -0
  200. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.5295ba48.woff +0 -0
  201. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.939bc644.ttf +0 -0
  202. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.f28c23ac.woff2 +0 -0
  203. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.8c5b5494.woff2 +0 -0
  204. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.94e1e8dc.ttf +0 -0
  205. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.bf59d231.woff +0 -0
  206. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.3b1e59b3.woff2 +0 -0
  207. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.7c9bc82b.woff +0 -0
  208. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.b4c20c84.ttf +0 -0
  209. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.74048478.woff +0 -0
  210. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.ba21ed5f.woff2 +0 -0
  211. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.d4d7ba48.ttf +0 -0
  212. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.03e9641d.woff2 +0 -0
  213. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.07505710.woff +0 -0
  214. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.fe9cbbe1.ttf +0 -0
  215. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.e1e279cb.woff +0 -0
  216. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.eae34984.woff2 +0 -0
  217. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.fabc004a.ttf +0 -0
  218. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.57727022.woff +0 -0
  219. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.5916a24f.woff2 +0 -0
  220. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.d6b476ec.ttf +0 -0
  221. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.9acaf01c.woff +0 -0
  222. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.a144ef58.ttf +0 -0
  223. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.b4230e7e.woff2 +0 -0
  224. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.10d95fd3.woff2 +0 -0
  225. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.7a996c9d.woff +0 -0
  226. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.fbccdabe.ttf +0 -0
  227. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.6258592b.woff +0 -0
  228. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.a8709e36.woff2 +0 -0
  229. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.d97aaf4a.ttf +0 -0
  230. khoj/interface/compiled/_next/static/media/a6ecd16fa044d500-s.p.woff2 +0 -0
  231. khoj/interface/compiled/_next/static/media/bd82c78e5b7b3fe9-s.p.woff2 +0 -0
  232. khoj/interface/compiled/_next/static/media/c32c8052c071fc42-s.woff2 +0 -0
  233. khoj/interface/compiled/_next/static/media/c4250770ab8708b6-s.p.woff2 +0 -0
  234. khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
  235. khoj/interface/compiled/_next/static/media/flags.3afdda2f.webp +0 -0
  236. khoj/interface/compiled/_next/static/media/flags@2x.5fbe9fc1.webp +0 -0
  237. khoj/interface/compiled/_next/static/media/globe.98e105ca.webp +0 -0
  238. khoj/interface/compiled/_next/static/media/globe@2x.974df6f8.webp +0 -0
  239. khoj/interface/compiled/agents/index.html +1 -0
  240. khoj/interface/compiled/agents/index.txt +7 -0
  241. khoj/interface/compiled/agents.svg +6 -0
  242. khoj/interface/compiled/assets/icons/khoj_lantern.ico +0 -0
  243. khoj/interface/compiled/assets/icons/khoj_lantern.svg +100 -0
  244. khoj/interface/compiled/assets/icons/khoj_lantern_1200x1200.png +0 -0
  245. khoj/interface/compiled/assets/icons/khoj_lantern_128x128.png +0 -0
  246. khoj/interface/compiled/assets/icons/khoj_lantern_128x128_dark.png +0 -0
  247. khoj/interface/compiled/assets/icons/khoj_lantern_256x256.png +0 -0
  248. khoj/interface/compiled/assets/icons/khoj_lantern_512x512.png +0 -0
  249. khoj/interface/compiled/assets/icons/khoj_lantern_logomarktype_1200x630.png +0 -0
  250. khoj/interface/compiled/assets/samples/desktop-browse-draw-sample.png +0 -0
  251. khoj/interface/compiled/assets/samples/desktop-plain-chat-sample.png +0 -0
  252. khoj/interface/compiled/assets/samples/desktop-remember-plan-sample.png +0 -0
  253. khoj/interface/compiled/assets/samples/phone-browse-draw-sample.png +0 -0
  254. khoj/interface/compiled/assets/samples/phone-plain-chat-sample.png +0 -0
  255. khoj/interface/compiled/assets/samples/phone-remember-plan-sample.png +0 -0
  256. khoj/interface/compiled/automation.svg +37 -0
  257. khoj/interface/compiled/automations/index.html +1 -0
  258. khoj/interface/compiled/automations/index.txt +8 -0
  259. khoj/interface/compiled/chat/index.html +1 -0
  260. khoj/interface/compiled/chat/index.txt +7 -0
  261. khoj/interface/compiled/chat.svg +24 -0
  262. khoj/interface/compiled/close.svg +5 -0
  263. khoj/interface/compiled/copy-button-success.svg +6 -0
  264. khoj/interface/compiled/copy-button.svg +5 -0
  265. khoj/interface/compiled/index.html +1 -0
  266. khoj/interface/compiled/index.txt +7 -0
  267. khoj/interface/compiled/khoj.webmanifest +76 -0
  268. khoj/interface/compiled/logo.svg +24 -0
  269. khoj/interface/compiled/search/index.html +1 -0
  270. khoj/interface/compiled/search/index.txt +7 -0
  271. khoj/interface/compiled/send.svg +1 -0
  272. khoj/interface/compiled/settings/index.html +1 -0
  273. khoj/interface/compiled/settings/index.txt +9 -0
  274. khoj/interface/compiled/share/chat/index.html +1 -0
  275. khoj/interface/compiled/share/chat/index.txt +7 -0
  276. khoj/interface/compiled/share.svg +8 -0
  277. khoj/interface/compiled/thumbs-down.svg +6 -0
  278. khoj/interface/compiled/thumbs-up.svg +6 -0
  279. khoj/interface/email/feedback.html +34 -0
  280. khoj/interface/email/magic_link.html +40 -0
  281. khoj/interface/email/task.html +37 -0
  282. khoj/interface/email/welcome.html +90 -0
  283. khoj/interface/web/.well-known/assetlinks.json +11 -0
  284. khoj/interface/web/assets/icons/agents.svg +19 -0
  285. khoj/interface/web/assets/icons/automation.svg +43 -0
  286. khoj/interface/web/assets/icons/chat.svg +24 -0
  287. khoj/interface/web/assets/icons/github.svg +1 -0
  288. khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
  289. khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
  290. khoj/interface/web/assets/icons/khoj-logo-sideways.svg +32 -0
  291. khoj/interface/web/assets/icons/khoj.svg +26 -0
  292. khoj/interface/web/assets/icons/logotype.svg +1 -0
  293. khoj/interface/web/assets/icons/search.svg +57 -0
  294. khoj/interface/web/assets/icons/sync.svg +4 -0
  295. khoj/interface/web/assets/khoj.css +237 -0
  296. khoj/interface/web/assets/utils.js +33 -0
  297. khoj/interface/web/base_config.html +445 -0
  298. khoj/interface/web/content_source_github_input.html +208 -0
  299. khoj/interface/web/login.html +310 -0
  300. khoj/interface/web/utils.html +48 -0
  301. khoj/main.py +249 -0
  302. khoj/manage.py +22 -0
  303. khoj/migrations/__init__.py +0 -0
  304. khoj/migrations/migrate_offline_chat_default_model.py +69 -0
  305. khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
  306. khoj/migrations/migrate_offline_chat_schema.py +83 -0
  307. khoj/migrations/migrate_offline_model.py +29 -0
  308. khoj/migrations/migrate_processor_config_openai.py +67 -0
  309. khoj/migrations/migrate_server_pg.py +132 -0
  310. khoj/migrations/migrate_version.py +17 -0
  311. khoj/processor/__init__.py +0 -0
  312. khoj/processor/content/__init__.py +0 -0
  313. khoj/processor/content/docx/__init__.py +0 -0
  314. khoj/processor/content/docx/docx_to_entries.py +111 -0
  315. khoj/processor/content/github/__init__.py +0 -0
  316. khoj/processor/content/github/github_to_entries.py +226 -0
  317. khoj/processor/content/images/__init__.py +0 -0
  318. khoj/processor/content/images/image_to_entries.py +117 -0
  319. khoj/processor/content/markdown/__init__.py +0 -0
  320. khoj/processor/content/markdown/markdown_to_entries.py +160 -0
  321. khoj/processor/content/notion/notion_to_entries.py +259 -0
  322. khoj/processor/content/org_mode/__init__.py +0 -0
  323. khoj/processor/content/org_mode/org_to_entries.py +226 -0
  324. khoj/processor/content/org_mode/orgnode.py +532 -0
  325. khoj/processor/content/pdf/__init__.py +0 -0
  326. khoj/processor/content/pdf/pdf_to_entries.py +119 -0
  327. khoj/processor/content/plaintext/__init__.py +0 -0
  328. khoj/processor/content/plaintext/plaintext_to_entries.py +117 -0
  329. khoj/processor/content/text_to_entries.py +296 -0
  330. khoj/processor/conversation/__init__.py +0 -0
  331. khoj/processor/conversation/anthropic/__init__.py +0 -0
  332. khoj/processor/conversation/anthropic/anthropic_chat.py +243 -0
  333. khoj/processor/conversation/anthropic/utils.py +217 -0
  334. khoj/processor/conversation/google/__init__.py +0 -0
  335. khoj/processor/conversation/google/gemini_chat.py +253 -0
  336. khoj/processor/conversation/google/utils.py +260 -0
  337. khoj/processor/conversation/offline/__init__.py +0 -0
  338. khoj/processor/conversation/offline/chat_model.py +308 -0
  339. khoj/processor/conversation/offline/utils.py +80 -0
  340. khoj/processor/conversation/offline/whisper.py +15 -0
  341. khoj/processor/conversation/openai/__init__.py +0 -0
  342. khoj/processor/conversation/openai/gpt.py +243 -0
  343. khoj/processor/conversation/openai/utils.py +232 -0
  344. khoj/processor/conversation/openai/whisper.py +13 -0
  345. khoj/processor/conversation/prompts.py +1188 -0
  346. khoj/processor/conversation/utils.py +867 -0
  347. khoj/processor/embeddings.py +122 -0
  348. khoj/processor/image/generate.py +215 -0
  349. khoj/processor/speech/__init__.py +0 -0
  350. khoj/processor/speech/text_to_speech.py +51 -0
  351. khoj/processor/tools/__init__.py +0 -0
  352. khoj/processor/tools/online_search.py +472 -0
  353. khoj/processor/tools/run_code.py +179 -0
  354. khoj/routers/__init__.py +0 -0
  355. khoj/routers/api.py +760 -0
  356. khoj/routers/api_agents.py +295 -0
  357. khoj/routers/api_chat.py +1273 -0
  358. khoj/routers/api_content.py +634 -0
  359. khoj/routers/api_model.py +123 -0
  360. khoj/routers/api_phone.py +86 -0
  361. khoj/routers/api_subscription.py +144 -0
  362. khoj/routers/auth.py +307 -0
  363. khoj/routers/email.py +135 -0
  364. khoj/routers/helpers.py +2333 -0
  365. khoj/routers/notion.py +85 -0
  366. khoj/routers/research.py +364 -0
  367. khoj/routers/storage.py +63 -0
  368. khoj/routers/twilio.py +36 -0
  369. khoj/routers/web_client.py +141 -0
  370. khoj/search_filter/__init__.py +0 -0
  371. khoj/search_filter/base_filter.py +15 -0
  372. khoj/search_filter/date_filter.py +215 -0
  373. khoj/search_filter/file_filter.py +32 -0
  374. khoj/search_filter/word_filter.py +29 -0
  375. khoj/search_type/__init__.py +0 -0
  376. khoj/search_type/text_search.py +255 -0
  377. khoj/utils/__init__.py +0 -0
  378. khoj/utils/cli.py +101 -0
  379. khoj/utils/config.py +81 -0
  380. khoj/utils/constants.py +51 -0
  381. khoj/utils/fs_syncer.py +252 -0
  382. khoj/utils/helpers.py +627 -0
  383. khoj/utils/initialization.py +301 -0
  384. khoj/utils/jsonl.py +43 -0
  385. khoj/utils/models.py +47 -0
  386. khoj/utils/rawconfig.py +208 -0
  387. khoj/utils/state.py +48 -0
  388. khoj/utils/yaml.py +47 -0
  389. khoj-1.33.3.dev32.dist-info/METADATA +190 -0
  390. khoj-1.33.3.dev32.dist-info/RECORD +393 -0
  391. khoj-1.33.3.dev32.dist-info/WHEEL +4 -0
  392. khoj-1.33.3.dev32.dist-info/entry_points.txt +2 -0
  393. khoj-1.33.3.dev32.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,634 @@
1
+ import asyncio
2
+ import json
3
+ import logging
4
+ import math
5
+ from concurrent.futures import ThreadPoolExecutor
6
+ from typing import Dict, List, Optional, Union
7
+
8
+ from asgiref.sync import sync_to_async
9
+ from fastapi import (
10
+ APIRouter,
11
+ BackgroundTasks,
12
+ Depends,
13
+ Header,
14
+ HTTPException,
15
+ Request,
16
+ Response,
17
+ UploadFile,
18
+ )
19
+ from pydantic import BaseModel
20
+ from starlette.authentication import requires
21
+
22
+ from khoj.database import adapters
23
+ from khoj.database.adapters import (
24
+ EntryAdapters,
25
+ get_user_github_config,
26
+ get_user_notion_config,
27
+ )
28
+ from khoj.database.models import Entry as DbEntry
29
+ from khoj.database.models import (
30
+ GithubConfig,
31
+ GithubRepoConfig,
32
+ KhojUser,
33
+ LocalMarkdownConfig,
34
+ LocalOrgConfig,
35
+ LocalPdfConfig,
36
+ LocalPlaintextConfig,
37
+ NotionConfig,
38
+ )
39
+ from khoj.processor.content.docx.docx_to_entries import DocxToEntries
40
+ from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
41
+ from khoj.routers.helpers import (
42
+ ApiIndexedDataLimiter,
43
+ CommonQueryParams,
44
+ configure_content,
45
+ get_file_content,
46
+ get_user_config,
47
+ update_telemetry_state,
48
+ )
49
+ from khoj.utils import constants, state
50
+ from khoj.utils.config import SearchModels
51
+ from khoj.utils.rawconfig import (
52
+ ContentConfig,
53
+ FullConfig,
54
+ GithubContentConfig,
55
+ NotionContentConfig,
56
+ SearchConfig,
57
+ )
58
+ from khoj.utils.state import SearchType
59
+ from khoj.utils.yaml import save_config_to_file_updated_state
60
+
61
+ logger = logging.getLogger(__name__)
62
+
63
+ api_content = APIRouter()
64
+
65
+ executor = ThreadPoolExecutor()
66
+
67
+
68
+ class File(BaseModel):
69
+ path: str
70
+ content: Union[str, bytes]
71
+
72
+
73
+ class IndexBatchRequest(BaseModel):
74
+ files: list[File]
75
+
76
+
77
+ class IndexerInput(BaseModel):
78
+ org: Optional[dict[str, str]] = None
79
+ markdown: Optional[dict[str, str]] = None
80
+ pdf: Optional[dict[str, bytes]] = None
81
+ plaintext: Optional[dict[str, str]] = None
82
+ image: Optional[dict[str, bytes]] = None
83
+ docx: Optional[dict[str, bytes]] = None
84
+
85
+
86
+ async def run_in_executor(func, *args):
87
+ loop = asyncio.get_event_loop()
88
+ return await loop.run_in_executor(executor, func, *args)
89
+
90
+
91
+ @api_content.put("")
92
+ @requires(["authenticated"])
93
+ async def put_content(
94
+ request: Request,
95
+ files: List[UploadFile] = [],
96
+ t: Optional[Union[state.SearchType, str]] = state.SearchType.All,
97
+ client: Optional[str] = None,
98
+ user_agent: Optional[str] = Header(None),
99
+ referer: Optional[str] = Header(None),
100
+ host: Optional[str] = Header(None),
101
+ indexed_data_limiter: ApiIndexedDataLimiter = Depends(
102
+ ApiIndexedDataLimiter(
103
+ incoming_entries_size_limit=10,
104
+ subscribed_incoming_entries_size_limit=75,
105
+ total_entries_size_limit=10,
106
+ subscribed_total_entries_size_limit=200,
107
+ )
108
+ ),
109
+ ):
110
+ return await indexer(request, files, t, True, client, user_agent, referer, host)
111
+
112
+
113
+ @api_content.patch("")
114
+ @requires(["authenticated"])
115
+ async def patch_content(
116
+ request: Request,
117
+ files: List[UploadFile] = [],
118
+ t: Optional[Union[state.SearchType, str]] = state.SearchType.All,
119
+ client: Optional[str] = None,
120
+ user_agent: Optional[str] = Header(None),
121
+ referer: Optional[str] = Header(None),
122
+ host: Optional[str] = Header(None),
123
+ indexed_data_limiter: ApiIndexedDataLimiter = Depends(
124
+ ApiIndexedDataLimiter(
125
+ incoming_entries_size_limit=10,
126
+ subscribed_incoming_entries_size_limit=75,
127
+ total_entries_size_limit=10,
128
+ subscribed_total_entries_size_limit=200,
129
+ )
130
+ ),
131
+ ):
132
+ return await indexer(request, files, t, False, client, user_agent, referer, host)
133
+
134
+
135
+ @api_content.get("/github", response_class=Response)
136
+ @requires(["authenticated"])
137
+ def get_content_github(request: Request) -> Response:
138
+ user = request.user.object
139
+ user_config = get_user_config(user, request)
140
+ del user_config["request"]
141
+
142
+ current_github_config = get_user_github_config(user)
143
+
144
+ if current_github_config:
145
+ raw_repos = current_github_config.githubrepoconfig.all()
146
+ repos = []
147
+ for repo in raw_repos:
148
+ repos.append(
149
+ GithubRepoConfig(
150
+ name=repo.name,
151
+ owner=repo.owner,
152
+ branch=repo.branch,
153
+ )
154
+ )
155
+ current_config = GithubContentConfig(
156
+ pat_token=current_github_config.pat_token,
157
+ repos=repos,
158
+ )
159
+ current_config = json.loads(current_config.json())
160
+ else:
161
+ current_config = {} # type: ignore
162
+
163
+ user_config["current_config"] = current_config
164
+
165
+ # Return config data as a JSON response
166
+ return Response(content=json.dumps(user_config), media_type="application/json", status_code=200)
167
+
168
+
169
+ @api_content.get("/notion", response_class=Response)
170
+ @requires(["authenticated"])
171
+ def get_content_notion(request: Request) -> Response:
172
+ user = request.user.object
173
+ user_config = get_user_config(user, request)
174
+ del user_config["request"]
175
+
176
+ current_notion_config = get_user_notion_config(user)
177
+ token = current_notion_config.token if current_notion_config else ""
178
+ current_config = NotionContentConfig(token=token)
179
+ current_config = json.loads(current_config.model_dump_json())
180
+
181
+ user_config["current_config"] = current_config
182
+
183
+ # Return config data as a JSON response
184
+ return Response(content=json.dumps(user_config), media_type="application/json", status_code=200)
185
+
186
+
187
+ @api_content.post("/github", status_code=200)
188
+ @requires(["authenticated"])
189
+ async def set_content_github(
190
+ request: Request,
191
+ updated_config: Union[GithubContentConfig, None],
192
+ client: Optional[str] = None,
193
+ ):
194
+ _initialize_config()
195
+
196
+ user = request.user.object
197
+
198
+ try:
199
+ await adapters.set_user_github_config(
200
+ user=user,
201
+ pat_token=updated_config.pat_token,
202
+ repos=updated_config.repos,
203
+ )
204
+ except Exception as e:
205
+ logger.error(e, exc_info=True)
206
+ raise HTTPException(status_code=500, detail="Failed to set Github config")
207
+
208
+ update_telemetry_state(
209
+ request=request,
210
+ telemetry_type="api",
211
+ api="set_content_config",
212
+ client=client,
213
+ metadata={"content_type": "github"},
214
+ )
215
+
216
+ return {"status": "ok"}
217
+
218
+
219
+ @api_content.post("/notion", status_code=200)
220
+ @requires(["authenticated"])
221
+ async def set_content_notion(
222
+ request: Request,
223
+ background_tasks: BackgroundTasks,
224
+ updated_config: Union[NotionContentConfig, None],
225
+ client: Optional[str] = None,
226
+ ):
227
+ _initialize_config()
228
+
229
+ user = request.user.object
230
+
231
+ try:
232
+ await adapters.set_notion_config(
233
+ user=user,
234
+ token=updated_config.token,
235
+ )
236
+ except Exception as e:
237
+ logger.error(e, exc_info=True)
238
+ raise HTTPException(status_code=500, detail="Failed to set Notion config")
239
+
240
+ if updated_config.token:
241
+ # Trigger an async job to configure_content. Let it run without blocking the response.
242
+ background_tasks.add_task(run_in_executor, configure_content, user, {}, False, SearchType.Notion)
243
+
244
+ update_telemetry_state(
245
+ request=request,
246
+ telemetry_type="api",
247
+ api="set_content_config",
248
+ client=client,
249
+ metadata={"content_type": "notion"},
250
+ )
251
+
252
+ return {"status": "ok"}
253
+
254
+
255
+ @api_content.delete("/file", status_code=201)
256
+ @requires(["authenticated"])
257
+ async def delete_content_files(
258
+ request: Request,
259
+ filename: str,
260
+ client: Optional[str] = None,
261
+ ):
262
+ user = request.user.object
263
+
264
+ update_telemetry_state(
265
+ request=request,
266
+ telemetry_type="api",
267
+ api="delete_file",
268
+ client=client,
269
+ )
270
+
271
+ await EntryAdapters.adelete_entry_by_file(user, filename)
272
+
273
+ return {"status": "ok"}
274
+
275
+
276
+ class DeleteFilesRequest(BaseModel):
277
+ files: List[str]
278
+
279
+
280
+ @api_content.delete("/files", status_code=201)
281
+ @requires(["authenticated"])
282
+ async def delete_content_file(
283
+ request: Request,
284
+ files: DeleteFilesRequest,
285
+ client: Optional[str] = None,
286
+ ):
287
+ user = request.user.object
288
+
289
+ update_telemetry_state(
290
+ request=request,
291
+ telemetry_type="api",
292
+ api="delete_file",
293
+ client=client,
294
+ )
295
+
296
+ deleted_count = await EntryAdapters.adelete_entries_by_filenames(user, files.files)
297
+
298
+ return {"status": "ok", "deleted_count": deleted_count}
299
+
300
+
301
+ @api_content.get("/size", response_model=Dict[str, int])
302
+ @requires(["authenticated"])
303
+ async def get_content_size(request: Request, common: CommonQueryParams, client: Optional[str] = None):
304
+ user = request.user.object
305
+ indexed_data_size_in_mb = await sync_to_async(EntryAdapters.get_size_of_indexed_data_in_mb)(user)
306
+ return Response(
307
+ content=json.dumps({"indexed_data_size_in_mb": math.ceil(indexed_data_size_in_mb)}),
308
+ media_type="application/json",
309
+ status_code=200,
310
+ )
311
+
312
+
313
+ @api_content.get("/types", response_model=List[str])
314
+ @requires(["authenticated"])
315
+ def get_content_types(request: Request, client: Optional[str] = None):
316
+ user = request.user.object
317
+ all_content_types = {s.value for s in SearchType}
318
+ configured_content_types = set(EntryAdapters.get_unique_file_types(user))
319
+ configured_content_types |= {"all"}
320
+
321
+ if state.config and state.config.content_type:
322
+ for ctype in state.config.content_type.model_dump(exclude_none=True):
323
+ configured_content_types.add(ctype)
324
+
325
+ return list(configured_content_types & all_content_types)
326
+
327
+
328
+ @api_content.get("/{content_source}", response_model=List[str])
329
+ @requires(["authenticated"])
330
+ async def get_content_source(
331
+ request: Request,
332
+ content_source: str,
333
+ client: Optional[str] = None,
334
+ ):
335
+ user = request.user.object
336
+
337
+ update_telemetry_state(
338
+ request=request,
339
+ telemetry_type="api",
340
+ api="get_all_filenames",
341
+ client=client,
342
+ )
343
+
344
+ return await sync_to_async(list)(EntryAdapters.get_all_filenames_by_source(user, content_source)) # type: ignore[call-arg]
345
+
346
+
347
+ @api_content.delete("/{content_source}", status_code=200)
348
+ @requires(["authenticated"])
349
+ async def delete_content_source(
350
+ request: Request,
351
+ content_source: str,
352
+ client: Optional[str] = None,
353
+ ):
354
+ user = request.user.object
355
+
356
+ content_object = map_config_to_object(content_source)
357
+ if content_object is None:
358
+ raise ValueError(f"Invalid content source: {content_source}")
359
+ elif content_object != "Computer":
360
+ await content_object.objects.filter(user=user).adelete()
361
+ await sync_to_async(EntryAdapters.delete_all_entries)(user, file_source=content_source)
362
+
363
+ if content_source == DbEntry.EntrySource.NOTION:
364
+ await NotionConfig.objects.filter(user=user).adelete()
365
+ elif content_source == DbEntry.EntrySource.GITHUB:
366
+ await GithubConfig.objects.filter(user=user).adelete()
367
+
368
+ update_telemetry_state(
369
+ request=request,
370
+ telemetry_type="api",
371
+ api="delete_content_config",
372
+ client=client,
373
+ metadata={"content_source": content_source},
374
+ )
375
+
376
+ enabled_content = await sync_to_async(EntryAdapters.get_unique_file_types)(user)
377
+ return {"status": "ok"}
378
+
379
+
380
+ @api_content.post("/convert", status_code=200)
381
+ @requires(["authenticated"])
382
+ async def convert_documents(
383
+ request: Request,
384
+ files: List[UploadFile],
385
+ client: Optional[str] = None,
386
+ ):
387
+ MAX_FILE_SIZE_MB = 10 # 10MB limit
388
+ MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
389
+
390
+ converted_files = []
391
+ supported_files = ["org", "markdown", "pdf", "plaintext", "docx"]
392
+
393
+ for file in files:
394
+ # Check file size first
395
+ file_size = 0
396
+ content = await file.read()
397
+ file_size = len(content)
398
+ await file.seek(0) # Reset file pointer
399
+
400
+ if file_size > MAX_FILE_SIZE_BYTES:
401
+ logger.warning(
402
+ f"Skipped converting oversized file ({file_size / 1024 / 1024:.1f}MB) sent by {client} client: {file.filename}"
403
+ )
404
+ continue
405
+
406
+ file_data = get_file_content(file)
407
+ if file_data.file_type in supported_files:
408
+ extracted_content = (
409
+ file_data.content.decode(file_data.encoding) if file_data.encoding else file_data.content
410
+ )
411
+
412
+ if file_data.file_type == "docx":
413
+ entries_per_page = DocxToEntries.extract_text(file_data.content)
414
+ annotated_pages = [
415
+ f"Page {index} of {file_data.name}:\n\n{entry}" for index, entry in enumerate(entries_per_page)
416
+ ]
417
+ extracted_content = "\n".join(annotated_pages)
418
+
419
+ elif file_data.file_type == "pdf":
420
+ entries_per_page = PdfToEntries.extract_text(file_data.content)
421
+ annotated_pages = [
422
+ f"Page {index} of {file_data.name}:\n\n{entry}" for index, entry in enumerate(entries_per_page)
423
+ ]
424
+ extracted_content = "\n".join(annotated_pages)
425
+ else:
426
+ # Convert content to string
427
+ extracted_content = extracted_content.decode("utf-8")
428
+
429
+ # Calculate size in bytes. Some of the content might be in bytes, some in str.
430
+ if isinstance(extracted_content, str):
431
+ size_in_bytes = len(extracted_content.encode("utf-8"))
432
+ elif isinstance(extracted_content, bytes):
433
+ size_in_bytes = len(extracted_content)
434
+ else:
435
+ size_in_bytes = 0
436
+ logger.warning(f"Unexpected content type: {type(extracted_content)}")
437
+
438
+ converted_files.append(
439
+ {
440
+ "name": file_data.name,
441
+ "content": extracted_content,
442
+ "file_type": file_data.file_type,
443
+ "size": size_in_bytes,
444
+ }
445
+ )
446
+ else:
447
+ logger.warning(f"Skipped converting unsupported file type sent by {client} client: {file.filename}")
448
+
449
+ update_telemetry_state(
450
+ request=request,
451
+ telemetry_type="api",
452
+ api="convert_documents",
453
+ client=client,
454
+ )
455
+
456
+ return Response(content=json.dumps(converted_files), media_type="application/json", status_code=200)
457
+
458
+
459
+ async def indexer(
460
+ request: Request,
461
+ files: list[UploadFile],
462
+ t: Optional[Union[state.SearchType, str]] = state.SearchType.All,
463
+ regenerate: bool = False,
464
+ client: Optional[str] = None,
465
+ user_agent: Optional[str] = Header(None),
466
+ referer: Optional[str] = Header(None),
467
+ host: Optional[str] = Header(None),
468
+ ):
469
+ user = request.user.object
470
+ method = "regenerate" if regenerate else "sync"
471
+ index_files: Dict[str, Dict[str, str]] = {
472
+ "org": {},
473
+ "markdown": {},
474
+ "pdf": {},
475
+ "plaintext": {},
476
+ "image": {},
477
+ "docx": {},
478
+ }
479
+ try:
480
+ logger.info(f"📬 Updating content index via API call by {client} client")
481
+ for file in files:
482
+ file_data = get_file_content(file)
483
+ if file_data.file_type in index_files:
484
+ index_files[file_data.file_type][file_data.name] = (
485
+ file_data.content.decode(file_data.encoding) if file_data.encoding else file_data.content
486
+ )
487
+ else:
488
+ logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {file_data.name}")
489
+
490
+ indexer_input = IndexerInput(
491
+ org=index_files["org"],
492
+ markdown=index_files["markdown"],
493
+ pdf=index_files["pdf"],
494
+ plaintext=index_files["plaintext"],
495
+ image=index_files["image"],
496
+ docx=index_files["docx"],
497
+ )
498
+
499
+ if state.config == None:
500
+ logger.info("📬 Initializing content index on first run.")
501
+ default_full_config = FullConfig(
502
+ content_type=None,
503
+ search_type=SearchConfig.model_validate(constants.default_config["search-type"]),
504
+ processor=None,
505
+ )
506
+ state.config = default_full_config
507
+ default_content_config = ContentConfig(
508
+ org=None,
509
+ markdown=None,
510
+ pdf=None,
511
+ docx=None,
512
+ image=None,
513
+ github=None,
514
+ notion=None,
515
+ plaintext=None,
516
+ )
517
+ state.config.content_type = default_content_config
518
+ save_config_to_file_updated_state()
519
+ configure_search(state.search_models, state.config.search_type)
520
+
521
+ loop = asyncio.get_event_loop()
522
+ success = await loop.run_in_executor(
523
+ None,
524
+ configure_content,
525
+ user,
526
+ indexer_input.model_dump(),
527
+ regenerate,
528
+ t,
529
+ )
530
+ if not success:
531
+ raise RuntimeError(f"Failed to {method} {t} data sent by {client} client into content index")
532
+ logger.info(f"Finished {method} {t} data sent by {client} client into content index")
533
+ except Exception as e:
534
+ logger.error(f"Failed to {method} {t} data sent by {client} client into content index: {e}", exc_info=True)
535
+ logger.error(
536
+ f"🚨 Failed to {method} {t} data sent by {client} client into content index: {e}",
537
+ exc_info=True,
538
+ )
539
+ return Response(content="Failed", status_code=500)
540
+
541
+ indexing_metadata = {
542
+ "num_org": len(index_files["org"]),
543
+ "num_markdown": len(index_files["markdown"]),
544
+ "num_pdf": len(index_files["pdf"]),
545
+ "num_plaintext": len(index_files["plaintext"]),
546
+ "num_image": len(index_files["image"]),
547
+ "num_docx": len(index_files["docx"]),
548
+ }
549
+
550
+ update_telemetry_state(
551
+ request=request,
552
+ telemetry_type="api",
553
+ api="index/update",
554
+ client=client,
555
+ user_agent=user_agent,
556
+ referer=referer,
557
+ host=host,
558
+ metadata=indexing_metadata,
559
+ )
560
+
561
+ logger.info(f"📪 Content index updated via API call by {client} client")
562
+
563
+ indexed_filenames = ",".join(file for ctype in index_files for file in index_files[ctype]) or ""
564
+ return Response(content=indexed_filenames, status_code=200)
565
+
566
+
567
+ def configure_search(search_models: SearchModels, search_config: Optional[SearchConfig]) -> Optional[SearchModels]:
568
+ # Run Validation Checks
569
+ if search_models is None:
570
+ search_models = SearchModels()
571
+
572
+ return search_models
573
+
574
+
575
+ def map_config_to_object(content_source: str):
576
+ if content_source == DbEntry.EntrySource.GITHUB:
577
+ return GithubConfig
578
+ if content_source == DbEntry.EntrySource.NOTION:
579
+ return NotionConfig
580
+ if content_source == DbEntry.EntrySource.COMPUTER:
581
+ return "Computer"
582
+
583
+
584
+ async def map_config_to_db(config: FullConfig, user: KhojUser):
585
+ if config.content_type:
586
+ if config.content_type.org:
587
+ await LocalOrgConfig.objects.filter(user=user).adelete()
588
+ await LocalOrgConfig.objects.acreate(
589
+ input_files=config.content_type.org.input_files,
590
+ input_filter=config.content_type.org.input_filter,
591
+ index_heading_entries=config.content_type.org.index_heading_entries,
592
+ user=user,
593
+ )
594
+ if config.content_type.markdown:
595
+ await LocalMarkdownConfig.objects.filter(user=user).adelete()
596
+ await LocalMarkdownConfig.objects.acreate(
597
+ input_files=config.content_type.markdown.input_files,
598
+ input_filter=config.content_type.markdown.input_filter,
599
+ index_heading_entries=config.content_type.markdown.index_heading_entries,
600
+ user=user,
601
+ )
602
+ if config.content_type.pdf:
603
+ await LocalPdfConfig.objects.filter(user=user).adelete()
604
+ await LocalPdfConfig.objects.acreate(
605
+ input_files=config.content_type.pdf.input_files,
606
+ input_filter=config.content_type.pdf.input_filter,
607
+ index_heading_entries=config.content_type.pdf.index_heading_entries,
608
+ user=user,
609
+ )
610
+ if config.content_type.plaintext:
611
+ await LocalPlaintextConfig.objects.filter(user=user).adelete()
612
+ await LocalPlaintextConfig.objects.acreate(
613
+ input_files=config.content_type.plaintext.input_files,
614
+ input_filter=config.content_type.plaintext.input_filter,
615
+ index_heading_entries=config.content_type.plaintext.index_heading_entries,
616
+ user=user,
617
+ )
618
+ if config.content_type.github:
619
+ await adapters.set_user_github_config(
620
+ user=user,
621
+ pat_token=config.content_type.github.pat_token,
622
+ repos=config.content_type.github.repos,
623
+ )
624
+ if config.content_type.notion:
625
+ await adapters.set_notion_config(
626
+ user=user,
627
+ token=config.content_type.notion.token,
628
+ )
629
+
630
+
631
+ def _initialize_config():
632
+ if state.config is None:
633
+ state.config = FullConfig()
634
+ state.config.search_type = SearchConfig.model_validate(constants.default_config["search-type"])