khoj 1.33.3.dev32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/__init__.py +0 -0
- khoj/app/README.md +94 -0
- khoj/app/__init__.py +0 -0
- khoj/app/asgi.py +16 -0
- khoj/app/settings.py +218 -0
- khoj/app/urls.py +25 -0
- khoj/configure.py +452 -0
- khoj/database/__init__.py +0 -0
- khoj/database/adapters/__init__.py +1821 -0
- khoj/database/admin.py +417 -0
- khoj/database/apps.py +6 -0
- khoj/database/management/__init__.py +0 -0
- khoj/database/management/commands/__init__.py +0 -0
- khoj/database/management/commands/change_default_model.py +116 -0
- khoj/database/management/commands/change_generated_images_url.py +61 -0
- khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
- khoj/database/migrations/0001_khojuser.py +98 -0
- khoj/database/migrations/0002_googleuser.py +32 -0
- khoj/database/migrations/0003_vector_extension.py +10 -0
- khoj/database/migrations/0004_content_types_and_more.py +181 -0
- khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
- khoj/database/migrations/0006_embeddingsdates.py +33 -0
- khoj/database/migrations/0007_add_conversation.py +27 -0
- khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
- khoj/database/migrations/0009_khojapiuser.py +24 -0
- khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
- khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
- khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
- khoj/database/migrations/0012_entry_file_source.py +21 -0
- khoj/database/migrations/0013_subscription.py +37 -0
- khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
- khoj/database/migrations/0015_alter_subscription_user.py +21 -0
- khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
- khoj/database/migrations/0017_searchmodel.py +32 -0
- khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
- khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
- khoj/database/migrations/0020_reflectivequestion.py +36 -0
- khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
- khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
- khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
- khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
- khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
- khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
- khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
- khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
- khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
- khoj/database/migrations/0029_userrequests.py +27 -0
- khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
- khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
- khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
- khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
- khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
- khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
- khoj/database/migrations/0035_processlock.py +26 -0
- khoj/database/migrations/0036_alter_processlock_name.py +19 -0
- khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
- khoj/database/migrations/0036_publicconversation.py +42 -0
- khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
- khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
- khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
- khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
- khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
- khoj/database/migrations/0040_alter_processlock_name.py +26 -0
- khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
- khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
- khoj/database/migrations/0042_serverchatsettings.py +46 -0
- khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
- khoj/database/migrations/0044_conversation_file_filters.py +17 -0
- khoj/database/migrations/0045_fileobject.py +37 -0
- khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
- khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
- khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
- khoj/database/migrations/0049_datastore.py +38 -0
- khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
- khoj/database/migrations/0050_alter_processlock_name.py +25 -0
- khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
- khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
- khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
- khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
- khoj/database/migrations/0055_alter_agent_style_icon.py +37 -0
- khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
- khoj/database/migrations/0056_searchmodelconfig_cross_encoder_model_config.py +17 -0
- khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
- khoj/database/migrations/0057_remove_serverchatsettings_default_model_and_more.py +51 -0
- khoj/database/migrations/0058_alter_chatmodeloptions_chat_model.py +17 -0
- khoj/database/migrations/0059_searchmodelconfig_bi_encoder_confidence_threshold.py +17 -0
- khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
- khoj/database/migrations/0061_alter_chatmodeloptions_model_type.py +26 -0
- khoj/database/migrations/0061_alter_texttoimagemodelconfig_model_type.py +21 -0
- khoj/database/migrations/0062_merge_20240913_0222.py +14 -0
- khoj/database/migrations/0063_conversation_temp_id.py +36 -0
- khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +86 -0
- khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
- khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
- khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
- khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
- khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
- khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
- khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
- khoj/database/migrations/0072_entry_search_model.py +24 -0
- khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
- khoj/database/migrations/0074_alter_conversation_title.py +17 -0
- khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +85 -0
- khoj/database/migrations/0076_rename_openaiprocessorconversationconfig_aimodelapi_and_more.py +26 -0
- khoj/database/migrations/0077_chatmodel_alter_agent_chat_model_and_more.py +62 -0
- khoj/database/migrations/0078_khojuser_email_verification_code_expiry.py +17 -0
- khoj/database/migrations/__init__.py +0 -0
- khoj/database/models/__init__.py +725 -0
- khoj/database/tests.py +3 -0
- khoj/interface/compiled/404/index.html +1 -0
- khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_buildManifest.js +1 -0
- khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_ssgManifest.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1010-8f39bb4648b5ba10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/182-f1c48a203dc91e0e.js +20 -0
- khoj/interface/compiled/_next/static/chunks/1915-d3c36ad6ce697ce7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/2117-165ef4747a5b836b.js +2 -0
- khoj/interface/compiled/_next/static/chunks/2581-455000f8aeb08fc3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3727.dcea8f2193111552.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3789-a09e37a819171a9d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/4124-6c28322ce218d2d5.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5427-b52d95253e692bfa.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5473-b1cf56dedac6577a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5477-0bbddb79c25a54a7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6065-64db9ad305ba0bcd.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6293-469dd16402ea8a6f.js +3 -0
- khoj/interface/compiled/_next/static/chunks/688-b5b4391bbc0376f1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8667-b6bf63c72b2d76eb.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9259-1172dbaca0515237.js +1 -0
- khoj/interface/compiled/_next/static/chunks/94ca1967.1d9b42d929a1ee8c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9597.83583248dfbf6e73.js +1 -0
- khoj/interface/compiled/_next/static/chunks/964ecbae.51d6faf8801d15e6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9665-391df1e5c51c960a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/_not-found/page-a834eddae3e235df.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/page-28ce086a1129bca2.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/layout-1fe1537449f43496.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/page-bf365a60829d347f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-0e476e57eb2015e3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/layout-30e7fda7262713ce.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/page-a5515ea71aec5ef0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/page-9140541e67ea307d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/settings/layout-d09d6510a45cd4bd.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/settings/page-951ba40b5b94b23a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1beb80d8d741c932.js +1 -0
- khoj/interface/compiled/_next/static/chunks/d3ac728e-44ebd2a0c99b12a0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/fd9d1056-4482b99a36fd1673.js +1 -0
- khoj/interface/compiled/_next/static/chunks/framework-8e0e0f4a6b83a956.js +1 -0
- khoj/interface/compiled/_next/static/chunks/main-app-de1f09df97a3cfc7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/main-db4bfac6b0a8d00b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/pages/_app-3c9ca398d360b709.js +1 -0
- khoj/interface/compiled/_next/static/chunks/pages/_error-cf5ca766ac8f493f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/webpack-a03962458328b163.js +1 -0
- khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +1 -0
- khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
- khoj/interface/compiled/_next/static/css/4e4e6a4a1c920d06.css +1 -0
- khoj/interface/compiled/_next/static/css/8d02837c730f8d13.css +25 -0
- khoj/interface/compiled/_next/static/css/8e6a3ca11a60b189.css +1 -0
- khoj/interface/compiled/_next/static/css/9c164d9727dd8092.css +1 -0
- khoj/interface/compiled/_next/static/css/dac88c17aaee5fcf.css +1 -0
- khoj/interface/compiled/_next/static/css/df4b47a2d0d85eae.css +1 -0
- khoj/interface/compiled/_next/static/css/e4eb883b5265d372.css +1 -0
- khoj/interface/compiled/_next/static/media/1d8a05b60287ae6c-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/6f22fce21a7c433c-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/77c207b095007c34-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/82ef96de0e8f4d8c-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.1608a09b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.4aafdb68.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.a79f1c31.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.b6770918.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.cce5b8ec.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.ec17d132.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.07ef19e7.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.55fac258.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.dad44a7f.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.9f256b85.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.b18f59e1.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.d42a5579.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.7c187121.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.d3c882a6.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.ed38e79f.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.b74a1a8b.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.c3fb5ac2.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.d181c465.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.6f2bb1df.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.70d8b0a5.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.e3f82f9d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.47373d1e.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.8916142b.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.9024d815.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.0462f03b.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.7f51fe03.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.b7f8fe9b.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.572d331f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.a879cf83.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.f1035d8d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.5295ba48.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.939bc644.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.f28c23ac.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.8c5b5494.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.94e1e8dc.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.bf59d231.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.3b1e59b3.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.7c9bc82b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.b4c20c84.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.74048478.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.ba21ed5f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.d4d7ba48.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.03e9641d.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.07505710.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.fe9cbbe1.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.e1e279cb.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.eae34984.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.fabc004a.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.57727022.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.5916a24f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.d6b476ec.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.9acaf01c.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.a144ef58.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.b4230e7e.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.10d95fd3.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.7a996c9d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.fbccdabe.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.6258592b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.a8709e36.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.d97aaf4a.ttf +0 -0
- khoj/interface/compiled/_next/static/media/a6ecd16fa044d500-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/bd82c78e5b7b3fe9-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/c32c8052c071fc42-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/c4250770ab8708b6-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/flags.3afdda2f.webp +0 -0
- khoj/interface/compiled/_next/static/media/flags@2x.5fbe9fc1.webp +0 -0
- khoj/interface/compiled/_next/static/media/globe.98e105ca.webp +0 -0
- khoj/interface/compiled/_next/static/media/globe@2x.974df6f8.webp +0 -0
- khoj/interface/compiled/agents/index.html +1 -0
- khoj/interface/compiled/agents/index.txt +7 -0
- khoj/interface/compiled/agents.svg +6 -0
- khoj/interface/compiled/assets/icons/khoj_lantern.ico +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern.svg +100 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_1200x1200.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_128x128.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_128x128_dark.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_256x256.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_512x512.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_logomarktype_1200x630.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-browse-draw-sample.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-plain-chat-sample.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-remember-plan-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-browse-draw-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-plain-chat-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-remember-plan-sample.png +0 -0
- khoj/interface/compiled/automation.svg +37 -0
- khoj/interface/compiled/automations/index.html +1 -0
- khoj/interface/compiled/automations/index.txt +8 -0
- khoj/interface/compiled/chat/index.html +1 -0
- khoj/interface/compiled/chat/index.txt +7 -0
- khoj/interface/compiled/chat.svg +24 -0
- khoj/interface/compiled/close.svg +5 -0
- khoj/interface/compiled/copy-button-success.svg +6 -0
- khoj/interface/compiled/copy-button.svg +5 -0
- khoj/interface/compiled/index.html +1 -0
- khoj/interface/compiled/index.txt +7 -0
- khoj/interface/compiled/khoj.webmanifest +76 -0
- khoj/interface/compiled/logo.svg +24 -0
- khoj/interface/compiled/search/index.html +1 -0
- khoj/interface/compiled/search/index.txt +7 -0
- khoj/interface/compiled/send.svg +1 -0
- khoj/interface/compiled/settings/index.html +1 -0
- khoj/interface/compiled/settings/index.txt +9 -0
- khoj/interface/compiled/share/chat/index.html +1 -0
- khoj/interface/compiled/share/chat/index.txt +7 -0
- khoj/interface/compiled/share.svg +8 -0
- khoj/interface/compiled/thumbs-down.svg +6 -0
- khoj/interface/compiled/thumbs-up.svg +6 -0
- khoj/interface/email/feedback.html +34 -0
- khoj/interface/email/magic_link.html +40 -0
- khoj/interface/email/task.html +37 -0
- khoj/interface/email/welcome.html +90 -0
- khoj/interface/web/.well-known/assetlinks.json +11 -0
- khoj/interface/web/assets/icons/agents.svg +19 -0
- khoj/interface/web/assets/icons/automation.svg +43 -0
- khoj/interface/web/assets/icons/chat.svg +24 -0
- khoj/interface/web/assets/icons/github.svg +1 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways.svg +32 -0
- khoj/interface/web/assets/icons/khoj.svg +26 -0
- khoj/interface/web/assets/icons/logotype.svg +1 -0
- khoj/interface/web/assets/icons/search.svg +57 -0
- khoj/interface/web/assets/icons/sync.svg +4 -0
- khoj/interface/web/assets/khoj.css +237 -0
- khoj/interface/web/assets/utils.js +33 -0
- khoj/interface/web/base_config.html +445 -0
- khoj/interface/web/content_source_github_input.html +208 -0
- khoj/interface/web/login.html +310 -0
- khoj/interface/web/utils.html +48 -0
- khoj/main.py +249 -0
- khoj/manage.py +22 -0
- khoj/migrations/__init__.py +0 -0
- khoj/migrations/migrate_offline_chat_default_model.py +69 -0
- khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
- khoj/migrations/migrate_offline_chat_schema.py +83 -0
- khoj/migrations/migrate_offline_model.py +29 -0
- khoj/migrations/migrate_processor_config_openai.py +67 -0
- khoj/migrations/migrate_server_pg.py +132 -0
- khoj/migrations/migrate_version.py +17 -0
- khoj/processor/__init__.py +0 -0
- khoj/processor/content/__init__.py +0 -0
- khoj/processor/content/docx/__init__.py +0 -0
- khoj/processor/content/docx/docx_to_entries.py +111 -0
- khoj/processor/content/github/__init__.py +0 -0
- khoj/processor/content/github/github_to_entries.py +226 -0
- khoj/processor/content/images/__init__.py +0 -0
- khoj/processor/content/images/image_to_entries.py +117 -0
- khoj/processor/content/markdown/__init__.py +0 -0
- khoj/processor/content/markdown/markdown_to_entries.py +160 -0
- khoj/processor/content/notion/notion_to_entries.py +259 -0
- khoj/processor/content/org_mode/__init__.py +0 -0
- khoj/processor/content/org_mode/org_to_entries.py +226 -0
- khoj/processor/content/org_mode/orgnode.py +532 -0
- khoj/processor/content/pdf/__init__.py +0 -0
- khoj/processor/content/pdf/pdf_to_entries.py +119 -0
- khoj/processor/content/plaintext/__init__.py +0 -0
- khoj/processor/content/plaintext/plaintext_to_entries.py +117 -0
- khoj/processor/content/text_to_entries.py +296 -0
- khoj/processor/conversation/__init__.py +0 -0
- khoj/processor/conversation/anthropic/__init__.py +0 -0
- khoj/processor/conversation/anthropic/anthropic_chat.py +243 -0
- khoj/processor/conversation/anthropic/utils.py +217 -0
- khoj/processor/conversation/google/__init__.py +0 -0
- khoj/processor/conversation/google/gemini_chat.py +253 -0
- khoj/processor/conversation/google/utils.py +260 -0
- khoj/processor/conversation/offline/__init__.py +0 -0
- khoj/processor/conversation/offline/chat_model.py +308 -0
- khoj/processor/conversation/offline/utils.py +80 -0
- khoj/processor/conversation/offline/whisper.py +15 -0
- khoj/processor/conversation/openai/__init__.py +0 -0
- khoj/processor/conversation/openai/gpt.py +243 -0
- khoj/processor/conversation/openai/utils.py +232 -0
- khoj/processor/conversation/openai/whisper.py +13 -0
- khoj/processor/conversation/prompts.py +1188 -0
- khoj/processor/conversation/utils.py +867 -0
- khoj/processor/embeddings.py +122 -0
- khoj/processor/image/generate.py +215 -0
- khoj/processor/speech/__init__.py +0 -0
- khoj/processor/speech/text_to_speech.py +51 -0
- khoj/processor/tools/__init__.py +0 -0
- khoj/processor/tools/online_search.py +472 -0
- khoj/processor/tools/run_code.py +179 -0
- khoj/routers/__init__.py +0 -0
- khoj/routers/api.py +760 -0
- khoj/routers/api_agents.py +295 -0
- khoj/routers/api_chat.py +1273 -0
- khoj/routers/api_content.py +634 -0
- khoj/routers/api_model.py +123 -0
- khoj/routers/api_phone.py +86 -0
- khoj/routers/api_subscription.py +144 -0
- khoj/routers/auth.py +307 -0
- khoj/routers/email.py +135 -0
- khoj/routers/helpers.py +2333 -0
- khoj/routers/notion.py +85 -0
- khoj/routers/research.py +364 -0
- khoj/routers/storage.py +63 -0
- khoj/routers/twilio.py +36 -0
- khoj/routers/web_client.py +141 -0
- khoj/search_filter/__init__.py +0 -0
- khoj/search_filter/base_filter.py +15 -0
- khoj/search_filter/date_filter.py +215 -0
- khoj/search_filter/file_filter.py +32 -0
- khoj/search_filter/word_filter.py +29 -0
- khoj/search_type/__init__.py +0 -0
- khoj/search_type/text_search.py +255 -0
- khoj/utils/__init__.py +0 -0
- khoj/utils/cli.py +101 -0
- khoj/utils/config.py +81 -0
- khoj/utils/constants.py +51 -0
- khoj/utils/fs_syncer.py +252 -0
- khoj/utils/helpers.py +627 -0
- khoj/utils/initialization.py +301 -0
- khoj/utils/jsonl.py +43 -0
- khoj/utils/models.py +47 -0
- khoj/utils/rawconfig.py +208 -0
- khoj/utils/state.py +48 -0
- khoj/utils/yaml.py +47 -0
- khoj-1.33.3.dev32.dist-info/METADATA +190 -0
- khoj-1.33.3.dev32.dist-info/RECORD +393 -0
- khoj-1.33.3.dev32.dist-info/WHEEL +4 -0
- khoj-1.33.3.dev32.dist-info/entry_points.txt +2 -0
- khoj-1.33.3.dev32.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,634 @@
|
|
1
|
+
import asyncio
|
2
|
+
import json
|
3
|
+
import logging
|
4
|
+
import math
|
5
|
+
from concurrent.futures import ThreadPoolExecutor
|
6
|
+
from typing import Dict, List, Optional, Union
|
7
|
+
|
8
|
+
from asgiref.sync import sync_to_async
|
9
|
+
from fastapi import (
|
10
|
+
APIRouter,
|
11
|
+
BackgroundTasks,
|
12
|
+
Depends,
|
13
|
+
Header,
|
14
|
+
HTTPException,
|
15
|
+
Request,
|
16
|
+
Response,
|
17
|
+
UploadFile,
|
18
|
+
)
|
19
|
+
from pydantic import BaseModel
|
20
|
+
from starlette.authentication import requires
|
21
|
+
|
22
|
+
from khoj.database import adapters
|
23
|
+
from khoj.database.adapters import (
|
24
|
+
EntryAdapters,
|
25
|
+
get_user_github_config,
|
26
|
+
get_user_notion_config,
|
27
|
+
)
|
28
|
+
from khoj.database.models import Entry as DbEntry
|
29
|
+
from khoj.database.models import (
|
30
|
+
GithubConfig,
|
31
|
+
GithubRepoConfig,
|
32
|
+
KhojUser,
|
33
|
+
LocalMarkdownConfig,
|
34
|
+
LocalOrgConfig,
|
35
|
+
LocalPdfConfig,
|
36
|
+
LocalPlaintextConfig,
|
37
|
+
NotionConfig,
|
38
|
+
)
|
39
|
+
from khoj.processor.content.docx.docx_to_entries import DocxToEntries
|
40
|
+
from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
|
41
|
+
from khoj.routers.helpers import (
|
42
|
+
ApiIndexedDataLimiter,
|
43
|
+
CommonQueryParams,
|
44
|
+
configure_content,
|
45
|
+
get_file_content,
|
46
|
+
get_user_config,
|
47
|
+
update_telemetry_state,
|
48
|
+
)
|
49
|
+
from khoj.utils import constants, state
|
50
|
+
from khoj.utils.config import SearchModels
|
51
|
+
from khoj.utils.rawconfig import (
|
52
|
+
ContentConfig,
|
53
|
+
FullConfig,
|
54
|
+
GithubContentConfig,
|
55
|
+
NotionContentConfig,
|
56
|
+
SearchConfig,
|
57
|
+
)
|
58
|
+
from khoj.utils.state import SearchType
|
59
|
+
from khoj.utils.yaml import save_config_to_file_updated_state
|
60
|
+
|
61
|
+
logger = logging.getLogger(__name__)
|
62
|
+
|
63
|
+
api_content = APIRouter()
|
64
|
+
|
65
|
+
executor = ThreadPoolExecutor()
|
66
|
+
|
67
|
+
|
68
|
+
class File(BaseModel):
|
69
|
+
path: str
|
70
|
+
content: Union[str, bytes]
|
71
|
+
|
72
|
+
|
73
|
+
class IndexBatchRequest(BaseModel):
|
74
|
+
files: list[File]
|
75
|
+
|
76
|
+
|
77
|
+
class IndexerInput(BaseModel):
|
78
|
+
org: Optional[dict[str, str]] = None
|
79
|
+
markdown: Optional[dict[str, str]] = None
|
80
|
+
pdf: Optional[dict[str, bytes]] = None
|
81
|
+
plaintext: Optional[dict[str, str]] = None
|
82
|
+
image: Optional[dict[str, bytes]] = None
|
83
|
+
docx: Optional[dict[str, bytes]] = None
|
84
|
+
|
85
|
+
|
86
|
+
async def run_in_executor(func, *args):
|
87
|
+
loop = asyncio.get_event_loop()
|
88
|
+
return await loop.run_in_executor(executor, func, *args)
|
89
|
+
|
90
|
+
|
91
|
+
@api_content.put("")
|
92
|
+
@requires(["authenticated"])
|
93
|
+
async def put_content(
|
94
|
+
request: Request,
|
95
|
+
files: List[UploadFile] = [],
|
96
|
+
t: Optional[Union[state.SearchType, str]] = state.SearchType.All,
|
97
|
+
client: Optional[str] = None,
|
98
|
+
user_agent: Optional[str] = Header(None),
|
99
|
+
referer: Optional[str] = Header(None),
|
100
|
+
host: Optional[str] = Header(None),
|
101
|
+
indexed_data_limiter: ApiIndexedDataLimiter = Depends(
|
102
|
+
ApiIndexedDataLimiter(
|
103
|
+
incoming_entries_size_limit=10,
|
104
|
+
subscribed_incoming_entries_size_limit=75,
|
105
|
+
total_entries_size_limit=10,
|
106
|
+
subscribed_total_entries_size_limit=200,
|
107
|
+
)
|
108
|
+
),
|
109
|
+
):
|
110
|
+
return await indexer(request, files, t, True, client, user_agent, referer, host)
|
111
|
+
|
112
|
+
|
113
|
+
@api_content.patch("")
|
114
|
+
@requires(["authenticated"])
|
115
|
+
async def patch_content(
|
116
|
+
request: Request,
|
117
|
+
files: List[UploadFile] = [],
|
118
|
+
t: Optional[Union[state.SearchType, str]] = state.SearchType.All,
|
119
|
+
client: Optional[str] = None,
|
120
|
+
user_agent: Optional[str] = Header(None),
|
121
|
+
referer: Optional[str] = Header(None),
|
122
|
+
host: Optional[str] = Header(None),
|
123
|
+
indexed_data_limiter: ApiIndexedDataLimiter = Depends(
|
124
|
+
ApiIndexedDataLimiter(
|
125
|
+
incoming_entries_size_limit=10,
|
126
|
+
subscribed_incoming_entries_size_limit=75,
|
127
|
+
total_entries_size_limit=10,
|
128
|
+
subscribed_total_entries_size_limit=200,
|
129
|
+
)
|
130
|
+
),
|
131
|
+
):
|
132
|
+
return await indexer(request, files, t, False, client, user_agent, referer, host)
|
133
|
+
|
134
|
+
|
135
|
+
@api_content.get("/github", response_class=Response)
|
136
|
+
@requires(["authenticated"])
|
137
|
+
def get_content_github(request: Request) -> Response:
|
138
|
+
user = request.user.object
|
139
|
+
user_config = get_user_config(user, request)
|
140
|
+
del user_config["request"]
|
141
|
+
|
142
|
+
current_github_config = get_user_github_config(user)
|
143
|
+
|
144
|
+
if current_github_config:
|
145
|
+
raw_repos = current_github_config.githubrepoconfig.all()
|
146
|
+
repos = []
|
147
|
+
for repo in raw_repos:
|
148
|
+
repos.append(
|
149
|
+
GithubRepoConfig(
|
150
|
+
name=repo.name,
|
151
|
+
owner=repo.owner,
|
152
|
+
branch=repo.branch,
|
153
|
+
)
|
154
|
+
)
|
155
|
+
current_config = GithubContentConfig(
|
156
|
+
pat_token=current_github_config.pat_token,
|
157
|
+
repos=repos,
|
158
|
+
)
|
159
|
+
current_config = json.loads(current_config.json())
|
160
|
+
else:
|
161
|
+
current_config = {} # type: ignore
|
162
|
+
|
163
|
+
user_config["current_config"] = current_config
|
164
|
+
|
165
|
+
# Return config data as a JSON response
|
166
|
+
return Response(content=json.dumps(user_config), media_type="application/json", status_code=200)
|
167
|
+
|
168
|
+
|
169
|
+
@api_content.get("/notion", response_class=Response)
|
170
|
+
@requires(["authenticated"])
|
171
|
+
def get_content_notion(request: Request) -> Response:
|
172
|
+
user = request.user.object
|
173
|
+
user_config = get_user_config(user, request)
|
174
|
+
del user_config["request"]
|
175
|
+
|
176
|
+
current_notion_config = get_user_notion_config(user)
|
177
|
+
token = current_notion_config.token if current_notion_config else ""
|
178
|
+
current_config = NotionContentConfig(token=token)
|
179
|
+
current_config = json.loads(current_config.model_dump_json())
|
180
|
+
|
181
|
+
user_config["current_config"] = current_config
|
182
|
+
|
183
|
+
# Return config data as a JSON response
|
184
|
+
return Response(content=json.dumps(user_config), media_type="application/json", status_code=200)
|
185
|
+
|
186
|
+
|
187
|
+
@api_content.post("/github", status_code=200)
|
188
|
+
@requires(["authenticated"])
|
189
|
+
async def set_content_github(
|
190
|
+
request: Request,
|
191
|
+
updated_config: Union[GithubContentConfig, None],
|
192
|
+
client: Optional[str] = None,
|
193
|
+
):
|
194
|
+
_initialize_config()
|
195
|
+
|
196
|
+
user = request.user.object
|
197
|
+
|
198
|
+
try:
|
199
|
+
await adapters.set_user_github_config(
|
200
|
+
user=user,
|
201
|
+
pat_token=updated_config.pat_token,
|
202
|
+
repos=updated_config.repos,
|
203
|
+
)
|
204
|
+
except Exception as e:
|
205
|
+
logger.error(e, exc_info=True)
|
206
|
+
raise HTTPException(status_code=500, detail="Failed to set Github config")
|
207
|
+
|
208
|
+
update_telemetry_state(
|
209
|
+
request=request,
|
210
|
+
telemetry_type="api",
|
211
|
+
api="set_content_config",
|
212
|
+
client=client,
|
213
|
+
metadata={"content_type": "github"},
|
214
|
+
)
|
215
|
+
|
216
|
+
return {"status": "ok"}
|
217
|
+
|
218
|
+
|
219
|
+
@api_content.post("/notion", status_code=200)
|
220
|
+
@requires(["authenticated"])
|
221
|
+
async def set_content_notion(
|
222
|
+
request: Request,
|
223
|
+
background_tasks: BackgroundTasks,
|
224
|
+
updated_config: Union[NotionContentConfig, None],
|
225
|
+
client: Optional[str] = None,
|
226
|
+
):
|
227
|
+
_initialize_config()
|
228
|
+
|
229
|
+
user = request.user.object
|
230
|
+
|
231
|
+
try:
|
232
|
+
await adapters.set_notion_config(
|
233
|
+
user=user,
|
234
|
+
token=updated_config.token,
|
235
|
+
)
|
236
|
+
except Exception as e:
|
237
|
+
logger.error(e, exc_info=True)
|
238
|
+
raise HTTPException(status_code=500, detail="Failed to set Notion config")
|
239
|
+
|
240
|
+
if updated_config.token:
|
241
|
+
# Trigger an async job to configure_content. Let it run without blocking the response.
|
242
|
+
background_tasks.add_task(run_in_executor, configure_content, user, {}, False, SearchType.Notion)
|
243
|
+
|
244
|
+
update_telemetry_state(
|
245
|
+
request=request,
|
246
|
+
telemetry_type="api",
|
247
|
+
api="set_content_config",
|
248
|
+
client=client,
|
249
|
+
metadata={"content_type": "notion"},
|
250
|
+
)
|
251
|
+
|
252
|
+
return {"status": "ok"}
|
253
|
+
|
254
|
+
|
255
|
+
@api_content.delete("/file", status_code=201)
|
256
|
+
@requires(["authenticated"])
|
257
|
+
async def delete_content_files(
|
258
|
+
request: Request,
|
259
|
+
filename: str,
|
260
|
+
client: Optional[str] = None,
|
261
|
+
):
|
262
|
+
user = request.user.object
|
263
|
+
|
264
|
+
update_telemetry_state(
|
265
|
+
request=request,
|
266
|
+
telemetry_type="api",
|
267
|
+
api="delete_file",
|
268
|
+
client=client,
|
269
|
+
)
|
270
|
+
|
271
|
+
await EntryAdapters.adelete_entry_by_file(user, filename)
|
272
|
+
|
273
|
+
return {"status": "ok"}
|
274
|
+
|
275
|
+
|
276
|
+
class DeleteFilesRequest(BaseModel):
|
277
|
+
files: List[str]
|
278
|
+
|
279
|
+
|
280
|
+
@api_content.delete("/files", status_code=201)
|
281
|
+
@requires(["authenticated"])
|
282
|
+
async def delete_content_file(
|
283
|
+
request: Request,
|
284
|
+
files: DeleteFilesRequest,
|
285
|
+
client: Optional[str] = None,
|
286
|
+
):
|
287
|
+
user = request.user.object
|
288
|
+
|
289
|
+
update_telemetry_state(
|
290
|
+
request=request,
|
291
|
+
telemetry_type="api",
|
292
|
+
api="delete_file",
|
293
|
+
client=client,
|
294
|
+
)
|
295
|
+
|
296
|
+
deleted_count = await EntryAdapters.adelete_entries_by_filenames(user, files.files)
|
297
|
+
|
298
|
+
return {"status": "ok", "deleted_count": deleted_count}
|
299
|
+
|
300
|
+
|
301
|
+
@api_content.get("/size", response_model=Dict[str, int])
|
302
|
+
@requires(["authenticated"])
|
303
|
+
async def get_content_size(request: Request, common: CommonQueryParams, client: Optional[str] = None):
|
304
|
+
user = request.user.object
|
305
|
+
indexed_data_size_in_mb = await sync_to_async(EntryAdapters.get_size_of_indexed_data_in_mb)(user)
|
306
|
+
return Response(
|
307
|
+
content=json.dumps({"indexed_data_size_in_mb": math.ceil(indexed_data_size_in_mb)}),
|
308
|
+
media_type="application/json",
|
309
|
+
status_code=200,
|
310
|
+
)
|
311
|
+
|
312
|
+
|
313
|
+
@api_content.get("/types", response_model=List[str])
|
314
|
+
@requires(["authenticated"])
|
315
|
+
def get_content_types(request: Request, client: Optional[str] = None):
|
316
|
+
user = request.user.object
|
317
|
+
all_content_types = {s.value for s in SearchType}
|
318
|
+
configured_content_types = set(EntryAdapters.get_unique_file_types(user))
|
319
|
+
configured_content_types |= {"all"}
|
320
|
+
|
321
|
+
if state.config and state.config.content_type:
|
322
|
+
for ctype in state.config.content_type.model_dump(exclude_none=True):
|
323
|
+
configured_content_types.add(ctype)
|
324
|
+
|
325
|
+
return list(configured_content_types & all_content_types)
|
326
|
+
|
327
|
+
|
328
|
+
@api_content.get("/{content_source}", response_model=List[str])
|
329
|
+
@requires(["authenticated"])
|
330
|
+
async def get_content_source(
|
331
|
+
request: Request,
|
332
|
+
content_source: str,
|
333
|
+
client: Optional[str] = None,
|
334
|
+
):
|
335
|
+
user = request.user.object
|
336
|
+
|
337
|
+
update_telemetry_state(
|
338
|
+
request=request,
|
339
|
+
telemetry_type="api",
|
340
|
+
api="get_all_filenames",
|
341
|
+
client=client,
|
342
|
+
)
|
343
|
+
|
344
|
+
return await sync_to_async(list)(EntryAdapters.get_all_filenames_by_source(user, content_source)) # type: ignore[call-arg]
|
345
|
+
|
346
|
+
|
347
|
+
@api_content.delete("/{content_source}", status_code=200)
|
348
|
+
@requires(["authenticated"])
|
349
|
+
async def delete_content_source(
|
350
|
+
request: Request,
|
351
|
+
content_source: str,
|
352
|
+
client: Optional[str] = None,
|
353
|
+
):
|
354
|
+
user = request.user.object
|
355
|
+
|
356
|
+
content_object = map_config_to_object(content_source)
|
357
|
+
if content_object is None:
|
358
|
+
raise ValueError(f"Invalid content source: {content_source}")
|
359
|
+
elif content_object != "Computer":
|
360
|
+
await content_object.objects.filter(user=user).adelete()
|
361
|
+
await sync_to_async(EntryAdapters.delete_all_entries)(user, file_source=content_source)
|
362
|
+
|
363
|
+
if content_source == DbEntry.EntrySource.NOTION:
|
364
|
+
await NotionConfig.objects.filter(user=user).adelete()
|
365
|
+
elif content_source == DbEntry.EntrySource.GITHUB:
|
366
|
+
await GithubConfig.objects.filter(user=user).adelete()
|
367
|
+
|
368
|
+
update_telemetry_state(
|
369
|
+
request=request,
|
370
|
+
telemetry_type="api",
|
371
|
+
api="delete_content_config",
|
372
|
+
client=client,
|
373
|
+
metadata={"content_source": content_source},
|
374
|
+
)
|
375
|
+
|
376
|
+
enabled_content = await sync_to_async(EntryAdapters.get_unique_file_types)(user)
|
377
|
+
return {"status": "ok"}
|
378
|
+
|
379
|
+
|
380
|
+
@api_content.post("/convert", status_code=200)
|
381
|
+
@requires(["authenticated"])
|
382
|
+
async def convert_documents(
|
383
|
+
request: Request,
|
384
|
+
files: List[UploadFile],
|
385
|
+
client: Optional[str] = None,
|
386
|
+
):
|
387
|
+
MAX_FILE_SIZE_MB = 10 # 10MB limit
|
388
|
+
MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
|
389
|
+
|
390
|
+
converted_files = []
|
391
|
+
supported_files = ["org", "markdown", "pdf", "plaintext", "docx"]
|
392
|
+
|
393
|
+
for file in files:
|
394
|
+
# Check file size first
|
395
|
+
file_size = 0
|
396
|
+
content = await file.read()
|
397
|
+
file_size = len(content)
|
398
|
+
await file.seek(0) # Reset file pointer
|
399
|
+
|
400
|
+
if file_size > MAX_FILE_SIZE_BYTES:
|
401
|
+
logger.warning(
|
402
|
+
f"Skipped converting oversized file ({file_size / 1024 / 1024:.1f}MB) sent by {client} client: {file.filename}"
|
403
|
+
)
|
404
|
+
continue
|
405
|
+
|
406
|
+
file_data = get_file_content(file)
|
407
|
+
if file_data.file_type in supported_files:
|
408
|
+
extracted_content = (
|
409
|
+
file_data.content.decode(file_data.encoding) if file_data.encoding else file_data.content
|
410
|
+
)
|
411
|
+
|
412
|
+
if file_data.file_type == "docx":
|
413
|
+
entries_per_page = DocxToEntries.extract_text(file_data.content)
|
414
|
+
annotated_pages = [
|
415
|
+
f"Page {index} of {file_data.name}:\n\n{entry}" for index, entry in enumerate(entries_per_page)
|
416
|
+
]
|
417
|
+
extracted_content = "\n".join(annotated_pages)
|
418
|
+
|
419
|
+
elif file_data.file_type == "pdf":
|
420
|
+
entries_per_page = PdfToEntries.extract_text(file_data.content)
|
421
|
+
annotated_pages = [
|
422
|
+
f"Page {index} of {file_data.name}:\n\n{entry}" for index, entry in enumerate(entries_per_page)
|
423
|
+
]
|
424
|
+
extracted_content = "\n".join(annotated_pages)
|
425
|
+
else:
|
426
|
+
# Convert content to string
|
427
|
+
extracted_content = extracted_content.decode("utf-8")
|
428
|
+
|
429
|
+
# Calculate size in bytes. Some of the content might be in bytes, some in str.
|
430
|
+
if isinstance(extracted_content, str):
|
431
|
+
size_in_bytes = len(extracted_content.encode("utf-8"))
|
432
|
+
elif isinstance(extracted_content, bytes):
|
433
|
+
size_in_bytes = len(extracted_content)
|
434
|
+
else:
|
435
|
+
size_in_bytes = 0
|
436
|
+
logger.warning(f"Unexpected content type: {type(extracted_content)}")
|
437
|
+
|
438
|
+
converted_files.append(
|
439
|
+
{
|
440
|
+
"name": file_data.name,
|
441
|
+
"content": extracted_content,
|
442
|
+
"file_type": file_data.file_type,
|
443
|
+
"size": size_in_bytes,
|
444
|
+
}
|
445
|
+
)
|
446
|
+
else:
|
447
|
+
logger.warning(f"Skipped converting unsupported file type sent by {client} client: {file.filename}")
|
448
|
+
|
449
|
+
update_telemetry_state(
|
450
|
+
request=request,
|
451
|
+
telemetry_type="api",
|
452
|
+
api="convert_documents",
|
453
|
+
client=client,
|
454
|
+
)
|
455
|
+
|
456
|
+
return Response(content=json.dumps(converted_files), media_type="application/json", status_code=200)
|
457
|
+
|
458
|
+
|
459
|
+
async def indexer(
|
460
|
+
request: Request,
|
461
|
+
files: list[UploadFile],
|
462
|
+
t: Optional[Union[state.SearchType, str]] = state.SearchType.All,
|
463
|
+
regenerate: bool = False,
|
464
|
+
client: Optional[str] = None,
|
465
|
+
user_agent: Optional[str] = Header(None),
|
466
|
+
referer: Optional[str] = Header(None),
|
467
|
+
host: Optional[str] = Header(None),
|
468
|
+
):
|
469
|
+
user = request.user.object
|
470
|
+
method = "regenerate" if regenerate else "sync"
|
471
|
+
index_files: Dict[str, Dict[str, str]] = {
|
472
|
+
"org": {},
|
473
|
+
"markdown": {},
|
474
|
+
"pdf": {},
|
475
|
+
"plaintext": {},
|
476
|
+
"image": {},
|
477
|
+
"docx": {},
|
478
|
+
}
|
479
|
+
try:
|
480
|
+
logger.info(f"📬 Updating content index via API call by {client} client")
|
481
|
+
for file in files:
|
482
|
+
file_data = get_file_content(file)
|
483
|
+
if file_data.file_type in index_files:
|
484
|
+
index_files[file_data.file_type][file_data.name] = (
|
485
|
+
file_data.content.decode(file_data.encoding) if file_data.encoding else file_data.content
|
486
|
+
)
|
487
|
+
else:
|
488
|
+
logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {file_data.name}")
|
489
|
+
|
490
|
+
indexer_input = IndexerInput(
|
491
|
+
org=index_files["org"],
|
492
|
+
markdown=index_files["markdown"],
|
493
|
+
pdf=index_files["pdf"],
|
494
|
+
plaintext=index_files["plaintext"],
|
495
|
+
image=index_files["image"],
|
496
|
+
docx=index_files["docx"],
|
497
|
+
)
|
498
|
+
|
499
|
+
if state.config == None:
|
500
|
+
logger.info("📬 Initializing content index on first run.")
|
501
|
+
default_full_config = FullConfig(
|
502
|
+
content_type=None,
|
503
|
+
search_type=SearchConfig.model_validate(constants.default_config["search-type"]),
|
504
|
+
processor=None,
|
505
|
+
)
|
506
|
+
state.config = default_full_config
|
507
|
+
default_content_config = ContentConfig(
|
508
|
+
org=None,
|
509
|
+
markdown=None,
|
510
|
+
pdf=None,
|
511
|
+
docx=None,
|
512
|
+
image=None,
|
513
|
+
github=None,
|
514
|
+
notion=None,
|
515
|
+
plaintext=None,
|
516
|
+
)
|
517
|
+
state.config.content_type = default_content_config
|
518
|
+
save_config_to_file_updated_state()
|
519
|
+
configure_search(state.search_models, state.config.search_type)
|
520
|
+
|
521
|
+
loop = asyncio.get_event_loop()
|
522
|
+
success = await loop.run_in_executor(
|
523
|
+
None,
|
524
|
+
configure_content,
|
525
|
+
user,
|
526
|
+
indexer_input.model_dump(),
|
527
|
+
regenerate,
|
528
|
+
t,
|
529
|
+
)
|
530
|
+
if not success:
|
531
|
+
raise RuntimeError(f"Failed to {method} {t} data sent by {client} client into content index")
|
532
|
+
logger.info(f"Finished {method} {t} data sent by {client} client into content index")
|
533
|
+
except Exception as e:
|
534
|
+
logger.error(f"Failed to {method} {t} data sent by {client} client into content index: {e}", exc_info=True)
|
535
|
+
logger.error(
|
536
|
+
f"🚨 Failed to {method} {t} data sent by {client} client into content index: {e}",
|
537
|
+
exc_info=True,
|
538
|
+
)
|
539
|
+
return Response(content="Failed", status_code=500)
|
540
|
+
|
541
|
+
indexing_metadata = {
|
542
|
+
"num_org": len(index_files["org"]),
|
543
|
+
"num_markdown": len(index_files["markdown"]),
|
544
|
+
"num_pdf": len(index_files["pdf"]),
|
545
|
+
"num_plaintext": len(index_files["plaintext"]),
|
546
|
+
"num_image": len(index_files["image"]),
|
547
|
+
"num_docx": len(index_files["docx"]),
|
548
|
+
}
|
549
|
+
|
550
|
+
update_telemetry_state(
|
551
|
+
request=request,
|
552
|
+
telemetry_type="api",
|
553
|
+
api="index/update",
|
554
|
+
client=client,
|
555
|
+
user_agent=user_agent,
|
556
|
+
referer=referer,
|
557
|
+
host=host,
|
558
|
+
metadata=indexing_metadata,
|
559
|
+
)
|
560
|
+
|
561
|
+
logger.info(f"📪 Content index updated via API call by {client} client")
|
562
|
+
|
563
|
+
indexed_filenames = ",".join(file for ctype in index_files for file in index_files[ctype]) or ""
|
564
|
+
return Response(content=indexed_filenames, status_code=200)
|
565
|
+
|
566
|
+
|
567
|
+
def configure_search(search_models: SearchModels, search_config: Optional[SearchConfig]) -> Optional[SearchModels]:
|
568
|
+
# Run Validation Checks
|
569
|
+
if search_models is None:
|
570
|
+
search_models = SearchModels()
|
571
|
+
|
572
|
+
return search_models
|
573
|
+
|
574
|
+
|
575
|
+
def map_config_to_object(content_source: str):
|
576
|
+
if content_source == DbEntry.EntrySource.GITHUB:
|
577
|
+
return GithubConfig
|
578
|
+
if content_source == DbEntry.EntrySource.NOTION:
|
579
|
+
return NotionConfig
|
580
|
+
if content_source == DbEntry.EntrySource.COMPUTER:
|
581
|
+
return "Computer"
|
582
|
+
|
583
|
+
|
584
|
+
async def map_config_to_db(config: FullConfig, user: KhojUser):
|
585
|
+
if config.content_type:
|
586
|
+
if config.content_type.org:
|
587
|
+
await LocalOrgConfig.objects.filter(user=user).adelete()
|
588
|
+
await LocalOrgConfig.objects.acreate(
|
589
|
+
input_files=config.content_type.org.input_files,
|
590
|
+
input_filter=config.content_type.org.input_filter,
|
591
|
+
index_heading_entries=config.content_type.org.index_heading_entries,
|
592
|
+
user=user,
|
593
|
+
)
|
594
|
+
if config.content_type.markdown:
|
595
|
+
await LocalMarkdownConfig.objects.filter(user=user).adelete()
|
596
|
+
await LocalMarkdownConfig.objects.acreate(
|
597
|
+
input_files=config.content_type.markdown.input_files,
|
598
|
+
input_filter=config.content_type.markdown.input_filter,
|
599
|
+
index_heading_entries=config.content_type.markdown.index_heading_entries,
|
600
|
+
user=user,
|
601
|
+
)
|
602
|
+
if config.content_type.pdf:
|
603
|
+
await LocalPdfConfig.objects.filter(user=user).adelete()
|
604
|
+
await LocalPdfConfig.objects.acreate(
|
605
|
+
input_files=config.content_type.pdf.input_files,
|
606
|
+
input_filter=config.content_type.pdf.input_filter,
|
607
|
+
index_heading_entries=config.content_type.pdf.index_heading_entries,
|
608
|
+
user=user,
|
609
|
+
)
|
610
|
+
if config.content_type.plaintext:
|
611
|
+
await LocalPlaintextConfig.objects.filter(user=user).adelete()
|
612
|
+
await LocalPlaintextConfig.objects.acreate(
|
613
|
+
input_files=config.content_type.plaintext.input_files,
|
614
|
+
input_filter=config.content_type.plaintext.input_filter,
|
615
|
+
index_heading_entries=config.content_type.plaintext.index_heading_entries,
|
616
|
+
user=user,
|
617
|
+
)
|
618
|
+
if config.content_type.github:
|
619
|
+
await adapters.set_user_github_config(
|
620
|
+
user=user,
|
621
|
+
pat_token=config.content_type.github.pat_token,
|
622
|
+
repos=config.content_type.github.repos,
|
623
|
+
)
|
624
|
+
if config.content_type.notion:
|
625
|
+
await adapters.set_notion_config(
|
626
|
+
user=user,
|
627
|
+
token=config.content_type.notion.token,
|
628
|
+
)
|
629
|
+
|
630
|
+
|
631
|
+
def _initialize_config():
|
632
|
+
if state.config is None:
|
633
|
+
state.config = FullConfig()
|
634
|
+
state.config.search_type = SearchConfig.model_validate(constants.default_config["search-type"])
|