khoj 1.33.3.dev32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/__init__.py +0 -0
- khoj/app/README.md +94 -0
- khoj/app/__init__.py +0 -0
- khoj/app/asgi.py +16 -0
- khoj/app/settings.py +218 -0
- khoj/app/urls.py +25 -0
- khoj/configure.py +452 -0
- khoj/database/__init__.py +0 -0
- khoj/database/adapters/__init__.py +1821 -0
- khoj/database/admin.py +417 -0
- khoj/database/apps.py +6 -0
- khoj/database/management/__init__.py +0 -0
- khoj/database/management/commands/__init__.py +0 -0
- khoj/database/management/commands/change_default_model.py +116 -0
- khoj/database/management/commands/change_generated_images_url.py +61 -0
- khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
- khoj/database/migrations/0001_khojuser.py +98 -0
- khoj/database/migrations/0002_googleuser.py +32 -0
- khoj/database/migrations/0003_vector_extension.py +10 -0
- khoj/database/migrations/0004_content_types_and_more.py +181 -0
- khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
- khoj/database/migrations/0006_embeddingsdates.py +33 -0
- khoj/database/migrations/0007_add_conversation.py +27 -0
- khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
- khoj/database/migrations/0009_khojapiuser.py +24 -0
- khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
- khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
- khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
- khoj/database/migrations/0012_entry_file_source.py +21 -0
- khoj/database/migrations/0013_subscription.py +37 -0
- khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
- khoj/database/migrations/0015_alter_subscription_user.py +21 -0
- khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
- khoj/database/migrations/0017_searchmodel.py +32 -0
- khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
- khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
- khoj/database/migrations/0020_reflectivequestion.py +36 -0
- khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
- khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
- khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
- khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
- khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
- khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
- khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
- khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
- khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
- khoj/database/migrations/0029_userrequests.py +27 -0
- khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
- khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
- khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
- khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
- khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
- khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
- khoj/database/migrations/0035_processlock.py +26 -0
- khoj/database/migrations/0036_alter_processlock_name.py +19 -0
- khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
- khoj/database/migrations/0036_publicconversation.py +42 -0
- khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
- khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
- khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
- khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
- khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
- khoj/database/migrations/0040_alter_processlock_name.py +26 -0
- khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
- khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
- khoj/database/migrations/0042_serverchatsettings.py +46 -0
- khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
- khoj/database/migrations/0044_conversation_file_filters.py +17 -0
- khoj/database/migrations/0045_fileobject.py +37 -0
- khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
- khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
- khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
- khoj/database/migrations/0049_datastore.py +38 -0
- khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
- khoj/database/migrations/0050_alter_processlock_name.py +25 -0
- khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
- khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
- khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
- khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
- khoj/database/migrations/0055_alter_agent_style_icon.py +37 -0
- khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
- khoj/database/migrations/0056_searchmodelconfig_cross_encoder_model_config.py +17 -0
- khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
- khoj/database/migrations/0057_remove_serverchatsettings_default_model_and_more.py +51 -0
- khoj/database/migrations/0058_alter_chatmodeloptions_chat_model.py +17 -0
- khoj/database/migrations/0059_searchmodelconfig_bi_encoder_confidence_threshold.py +17 -0
- khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
- khoj/database/migrations/0061_alter_chatmodeloptions_model_type.py +26 -0
- khoj/database/migrations/0061_alter_texttoimagemodelconfig_model_type.py +21 -0
- khoj/database/migrations/0062_merge_20240913_0222.py +14 -0
- khoj/database/migrations/0063_conversation_temp_id.py +36 -0
- khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +86 -0
- khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
- khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
- khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
- khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
- khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
- khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
- khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
- khoj/database/migrations/0072_entry_search_model.py +24 -0
- khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
- khoj/database/migrations/0074_alter_conversation_title.py +17 -0
- khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +85 -0
- khoj/database/migrations/0076_rename_openaiprocessorconversationconfig_aimodelapi_and_more.py +26 -0
- khoj/database/migrations/0077_chatmodel_alter_agent_chat_model_and_more.py +62 -0
- khoj/database/migrations/0078_khojuser_email_verification_code_expiry.py +17 -0
- khoj/database/migrations/__init__.py +0 -0
- khoj/database/models/__init__.py +725 -0
- khoj/database/tests.py +3 -0
- khoj/interface/compiled/404/index.html +1 -0
- khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_buildManifest.js +1 -0
- khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_ssgManifest.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1010-8f39bb4648b5ba10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/182-f1c48a203dc91e0e.js +20 -0
- khoj/interface/compiled/_next/static/chunks/1915-d3c36ad6ce697ce7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/2117-165ef4747a5b836b.js +2 -0
- khoj/interface/compiled/_next/static/chunks/2581-455000f8aeb08fc3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3727.dcea8f2193111552.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3789-a09e37a819171a9d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/4124-6c28322ce218d2d5.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5427-b52d95253e692bfa.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5473-b1cf56dedac6577a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5477-0bbddb79c25a54a7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6065-64db9ad305ba0bcd.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6293-469dd16402ea8a6f.js +3 -0
- khoj/interface/compiled/_next/static/chunks/688-b5b4391bbc0376f1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8667-b6bf63c72b2d76eb.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9259-1172dbaca0515237.js +1 -0
- khoj/interface/compiled/_next/static/chunks/94ca1967.1d9b42d929a1ee8c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9597.83583248dfbf6e73.js +1 -0
- khoj/interface/compiled/_next/static/chunks/964ecbae.51d6faf8801d15e6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9665-391df1e5c51c960a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/_not-found/page-a834eddae3e235df.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/page-28ce086a1129bca2.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/layout-1fe1537449f43496.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/page-bf365a60829d347f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-0e476e57eb2015e3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/layout-30e7fda7262713ce.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/page-a5515ea71aec5ef0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/page-9140541e67ea307d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/settings/layout-d09d6510a45cd4bd.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/settings/page-951ba40b5b94b23a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1beb80d8d741c932.js +1 -0
- khoj/interface/compiled/_next/static/chunks/d3ac728e-44ebd2a0c99b12a0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/fd9d1056-4482b99a36fd1673.js +1 -0
- khoj/interface/compiled/_next/static/chunks/framework-8e0e0f4a6b83a956.js +1 -0
- khoj/interface/compiled/_next/static/chunks/main-app-de1f09df97a3cfc7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/main-db4bfac6b0a8d00b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/pages/_app-3c9ca398d360b709.js +1 -0
- khoj/interface/compiled/_next/static/chunks/pages/_error-cf5ca766ac8f493f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/webpack-a03962458328b163.js +1 -0
- khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +1 -0
- khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
- khoj/interface/compiled/_next/static/css/4e4e6a4a1c920d06.css +1 -0
- khoj/interface/compiled/_next/static/css/8d02837c730f8d13.css +25 -0
- khoj/interface/compiled/_next/static/css/8e6a3ca11a60b189.css +1 -0
- khoj/interface/compiled/_next/static/css/9c164d9727dd8092.css +1 -0
- khoj/interface/compiled/_next/static/css/dac88c17aaee5fcf.css +1 -0
- khoj/interface/compiled/_next/static/css/df4b47a2d0d85eae.css +1 -0
- khoj/interface/compiled/_next/static/css/e4eb883b5265d372.css +1 -0
- khoj/interface/compiled/_next/static/media/1d8a05b60287ae6c-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/6f22fce21a7c433c-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/77c207b095007c34-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/82ef96de0e8f4d8c-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.1608a09b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.4aafdb68.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.a79f1c31.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.b6770918.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.cce5b8ec.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.ec17d132.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.07ef19e7.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.55fac258.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.dad44a7f.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.9f256b85.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.b18f59e1.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.d42a5579.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.7c187121.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.d3c882a6.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.ed38e79f.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.b74a1a8b.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.c3fb5ac2.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.d181c465.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.6f2bb1df.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.70d8b0a5.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.e3f82f9d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.47373d1e.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.8916142b.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.9024d815.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.0462f03b.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.7f51fe03.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.b7f8fe9b.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.572d331f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.a879cf83.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.f1035d8d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.5295ba48.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.939bc644.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.f28c23ac.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.8c5b5494.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.94e1e8dc.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.bf59d231.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.3b1e59b3.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.7c9bc82b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.b4c20c84.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.74048478.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.ba21ed5f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.d4d7ba48.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.03e9641d.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.07505710.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.fe9cbbe1.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.e1e279cb.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.eae34984.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.fabc004a.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.57727022.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.5916a24f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.d6b476ec.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.9acaf01c.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.a144ef58.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.b4230e7e.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.10d95fd3.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.7a996c9d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.fbccdabe.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.6258592b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.a8709e36.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.d97aaf4a.ttf +0 -0
- khoj/interface/compiled/_next/static/media/a6ecd16fa044d500-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/bd82c78e5b7b3fe9-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/c32c8052c071fc42-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/c4250770ab8708b6-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/flags.3afdda2f.webp +0 -0
- khoj/interface/compiled/_next/static/media/flags@2x.5fbe9fc1.webp +0 -0
- khoj/interface/compiled/_next/static/media/globe.98e105ca.webp +0 -0
- khoj/interface/compiled/_next/static/media/globe@2x.974df6f8.webp +0 -0
- khoj/interface/compiled/agents/index.html +1 -0
- khoj/interface/compiled/agents/index.txt +7 -0
- khoj/interface/compiled/agents.svg +6 -0
- khoj/interface/compiled/assets/icons/khoj_lantern.ico +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern.svg +100 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_1200x1200.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_128x128.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_128x128_dark.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_256x256.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_512x512.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_logomarktype_1200x630.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-browse-draw-sample.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-plain-chat-sample.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-remember-plan-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-browse-draw-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-plain-chat-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-remember-plan-sample.png +0 -0
- khoj/interface/compiled/automation.svg +37 -0
- khoj/interface/compiled/automations/index.html +1 -0
- khoj/interface/compiled/automations/index.txt +8 -0
- khoj/interface/compiled/chat/index.html +1 -0
- khoj/interface/compiled/chat/index.txt +7 -0
- khoj/interface/compiled/chat.svg +24 -0
- khoj/interface/compiled/close.svg +5 -0
- khoj/interface/compiled/copy-button-success.svg +6 -0
- khoj/interface/compiled/copy-button.svg +5 -0
- khoj/interface/compiled/index.html +1 -0
- khoj/interface/compiled/index.txt +7 -0
- khoj/interface/compiled/khoj.webmanifest +76 -0
- khoj/interface/compiled/logo.svg +24 -0
- khoj/interface/compiled/search/index.html +1 -0
- khoj/interface/compiled/search/index.txt +7 -0
- khoj/interface/compiled/send.svg +1 -0
- khoj/interface/compiled/settings/index.html +1 -0
- khoj/interface/compiled/settings/index.txt +9 -0
- khoj/interface/compiled/share/chat/index.html +1 -0
- khoj/interface/compiled/share/chat/index.txt +7 -0
- khoj/interface/compiled/share.svg +8 -0
- khoj/interface/compiled/thumbs-down.svg +6 -0
- khoj/interface/compiled/thumbs-up.svg +6 -0
- khoj/interface/email/feedback.html +34 -0
- khoj/interface/email/magic_link.html +40 -0
- khoj/interface/email/task.html +37 -0
- khoj/interface/email/welcome.html +90 -0
- khoj/interface/web/.well-known/assetlinks.json +11 -0
- khoj/interface/web/assets/icons/agents.svg +19 -0
- khoj/interface/web/assets/icons/automation.svg +43 -0
- khoj/interface/web/assets/icons/chat.svg +24 -0
- khoj/interface/web/assets/icons/github.svg +1 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways.svg +32 -0
- khoj/interface/web/assets/icons/khoj.svg +26 -0
- khoj/interface/web/assets/icons/logotype.svg +1 -0
- khoj/interface/web/assets/icons/search.svg +57 -0
- khoj/interface/web/assets/icons/sync.svg +4 -0
- khoj/interface/web/assets/khoj.css +237 -0
- khoj/interface/web/assets/utils.js +33 -0
- khoj/interface/web/base_config.html +445 -0
- khoj/interface/web/content_source_github_input.html +208 -0
- khoj/interface/web/login.html +310 -0
- khoj/interface/web/utils.html +48 -0
- khoj/main.py +249 -0
- khoj/manage.py +22 -0
- khoj/migrations/__init__.py +0 -0
- khoj/migrations/migrate_offline_chat_default_model.py +69 -0
- khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
- khoj/migrations/migrate_offline_chat_schema.py +83 -0
- khoj/migrations/migrate_offline_model.py +29 -0
- khoj/migrations/migrate_processor_config_openai.py +67 -0
- khoj/migrations/migrate_server_pg.py +132 -0
- khoj/migrations/migrate_version.py +17 -0
- khoj/processor/__init__.py +0 -0
- khoj/processor/content/__init__.py +0 -0
- khoj/processor/content/docx/__init__.py +0 -0
- khoj/processor/content/docx/docx_to_entries.py +111 -0
- khoj/processor/content/github/__init__.py +0 -0
- khoj/processor/content/github/github_to_entries.py +226 -0
- khoj/processor/content/images/__init__.py +0 -0
- khoj/processor/content/images/image_to_entries.py +117 -0
- khoj/processor/content/markdown/__init__.py +0 -0
- khoj/processor/content/markdown/markdown_to_entries.py +160 -0
- khoj/processor/content/notion/notion_to_entries.py +259 -0
- khoj/processor/content/org_mode/__init__.py +0 -0
- khoj/processor/content/org_mode/org_to_entries.py +226 -0
- khoj/processor/content/org_mode/orgnode.py +532 -0
- khoj/processor/content/pdf/__init__.py +0 -0
- khoj/processor/content/pdf/pdf_to_entries.py +119 -0
- khoj/processor/content/plaintext/__init__.py +0 -0
- khoj/processor/content/plaintext/plaintext_to_entries.py +117 -0
- khoj/processor/content/text_to_entries.py +296 -0
- khoj/processor/conversation/__init__.py +0 -0
- khoj/processor/conversation/anthropic/__init__.py +0 -0
- khoj/processor/conversation/anthropic/anthropic_chat.py +243 -0
- khoj/processor/conversation/anthropic/utils.py +217 -0
- khoj/processor/conversation/google/__init__.py +0 -0
- khoj/processor/conversation/google/gemini_chat.py +253 -0
- khoj/processor/conversation/google/utils.py +260 -0
- khoj/processor/conversation/offline/__init__.py +0 -0
- khoj/processor/conversation/offline/chat_model.py +308 -0
- khoj/processor/conversation/offline/utils.py +80 -0
- khoj/processor/conversation/offline/whisper.py +15 -0
- khoj/processor/conversation/openai/__init__.py +0 -0
- khoj/processor/conversation/openai/gpt.py +243 -0
- khoj/processor/conversation/openai/utils.py +232 -0
- khoj/processor/conversation/openai/whisper.py +13 -0
- khoj/processor/conversation/prompts.py +1188 -0
- khoj/processor/conversation/utils.py +867 -0
- khoj/processor/embeddings.py +122 -0
- khoj/processor/image/generate.py +215 -0
- khoj/processor/speech/__init__.py +0 -0
- khoj/processor/speech/text_to_speech.py +51 -0
- khoj/processor/tools/__init__.py +0 -0
- khoj/processor/tools/online_search.py +472 -0
- khoj/processor/tools/run_code.py +179 -0
- khoj/routers/__init__.py +0 -0
- khoj/routers/api.py +760 -0
- khoj/routers/api_agents.py +295 -0
- khoj/routers/api_chat.py +1273 -0
- khoj/routers/api_content.py +634 -0
- khoj/routers/api_model.py +123 -0
- khoj/routers/api_phone.py +86 -0
- khoj/routers/api_subscription.py +144 -0
- khoj/routers/auth.py +307 -0
- khoj/routers/email.py +135 -0
- khoj/routers/helpers.py +2333 -0
- khoj/routers/notion.py +85 -0
- khoj/routers/research.py +364 -0
- khoj/routers/storage.py +63 -0
- khoj/routers/twilio.py +36 -0
- khoj/routers/web_client.py +141 -0
- khoj/search_filter/__init__.py +0 -0
- khoj/search_filter/base_filter.py +15 -0
- khoj/search_filter/date_filter.py +215 -0
- khoj/search_filter/file_filter.py +32 -0
- khoj/search_filter/word_filter.py +29 -0
- khoj/search_type/__init__.py +0 -0
- khoj/search_type/text_search.py +255 -0
- khoj/utils/__init__.py +0 -0
- khoj/utils/cli.py +101 -0
- khoj/utils/config.py +81 -0
- khoj/utils/constants.py +51 -0
- khoj/utils/fs_syncer.py +252 -0
- khoj/utils/helpers.py +627 -0
- khoj/utils/initialization.py +301 -0
- khoj/utils/jsonl.py +43 -0
- khoj/utils/models.py +47 -0
- khoj/utils/rawconfig.py +208 -0
- khoj/utils/state.py +48 -0
- khoj/utils/yaml.py +47 -0
- khoj-1.33.3.dev32.dist-info/METADATA +190 -0
- khoj-1.33.3.dev32.dist-info/RECORD +393 -0
- khoj-1.33.3.dev32.dist-info/WHEEL +4 -0
- khoj-1.33.3.dev32.dist-info/entry_points.txt +2 -0
- khoj-1.33.3.dev32.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,472 @@
|
|
1
|
+
import asyncio
|
2
|
+
import json
|
3
|
+
import logging
|
4
|
+
import os
|
5
|
+
import urllib.parse
|
6
|
+
from collections import defaultdict
|
7
|
+
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
|
8
|
+
|
9
|
+
import aiohttp
|
10
|
+
from bs4 import BeautifulSoup
|
11
|
+
from markdownify import markdownify
|
12
|
+
|
13
|
+
from khoj.database.adapters import ConversationAdapters
|
14
|
+
from khoj.database.models import Agent, KhojUser, ServerChatSettings, WebScraper
|
15
|
+
from khoj.processor.conversation import prompts
|
16
|
+
from khoj.routers.helpers import (
|
17
|
+
ChatEvent,
|
18
|
+
extract_relevant_info,
|
19
|
+
generate_online_subqueries,
|
20
|
+
infer_webpage_urls,
|
21
|
+
)
|
22
|
+
from khoj.utils.helpers import (
|
23
|
+
is_env_var_true,
|
24
|
+
is_internal_url,
|
25
|
+
is_internet_connected,
|
26
|
+
is_none_or_empty,
|
27
|
+
timer,
|
28
|
+
)
|
29
|
+
from khoj.utils.rawconfig import LocationData
|
30
|
+
|
31
|
+
logger = logging.getLogger(__name__)
|
32
|
+
|
33
|
+
SERPER_DEV_API_KEY = os.getenv("SERPER_DEV_API_KEY")
|
34
|
+
SERPER_DEV_URL = "https://google.serper.dev/search"
|
35
|
+
|
36
|
+
JINA_SEARCH_API_URL = "https://s.jina.ai/"
|
37
|
+
JINA_API_KEY = os.getenv("JINA_API_KEY")
|
38
|
+
|
39
|
+
FIRECRAWL_USE_LLM_EXTRACT = is_env_var_true("FIRECRAWL_USE_LLM_EXTRACT")
|
40
|
+
|
41
|
+
OLOSTEP_QUERY_PARAMS = {
|
42
|
+
"timeout": 35, # seconds
|
43
|
+
"waitBeforeScraping": 0, # seconds
|
44
|
+
"saveHtml": "False",
|
45
|
+
"saveMarkdown": "True",
|
46
|
+
"removeCSSselectors": "default",
|
47
|
+
"htmlTransformer": "none",
|
48
|
+
"removeImages": "True",
|
49
|
+
"fastLane": "True",
|
50
|
+
# Similar to Stripe's API, the expand parameters avoid the need to make a second API call
|
51
|
+
# to retrieve the dataset (from the dataset API) if you only need the markdown or html.
|
52
|
+
"expandMarkdown": "True",
|
53
|
+
"expandHtml": "False",
|
54
|
+
}
|
55
|
+
|
56
|
+
DEFAULT_MAX_WEBPAGES_TO_READ = 1
|
57
|
+
MAX_WEBPAGES_TO_INFER = 10
|
58
|
+
|
59
|
+
|
60
|
+
async def search_online(
|
61
|
+
query: str,
|
62
|
+
conversation_history: dict,
|
63
|
+
location: LocationData,
|
64
|
+
user: KhojUser,
|
65
|
+
send_status_func: Optional[Callable] = None,
|
66
|
+
custom_filters: List[str] = [],
|
67
|
+
max_webpages_to_read: int = DEFAULT_MAX_WEBPAGES_TO_READ,
|
68
|
+
query_images: List[str] = None,
|
69
|
+
previous_subqueries: Set = set(),
|
70
|
+
agent: Agent = None,
|
71
|
+
query_files: str = None,
|
72
|
+
tracer: dict = {},
|
73
|
+
):
|
74
|
+
query += " ".join(custom_filters)
|
75
|
+
if not is_internet_connected():
|
76
|
+
logger.warning("Cannot search online as not connected to internet")
|
77
|
+
yield {}
|
78
|
+
return
|
79
|
+
|
80
|
+
# Breakdown the query into subqueries to get the correct answer
|
81
|
+
new_subqueries = await generate_online_subqueries(
|
82
|
+
query,
|
83
|
+
conversation_history,
|
84
|
+
location,
|
85
|
+
user,
|
86
|
+
query_images=query_images,
|
87
|
+
agent=agent,
|
88
|
+
tracer=tracer,
|
89
|
+
query_files=query_files,
|
90
|
+
)
|
91
|
+
subqueries = list(new_subqueries - previous_subqueries)
|
92
|
+
response_dict: Dict[str, Dict[str, List[Dict] | Dict]] = {}
|
93
|
+
|
94
|
+
if is_none_or_empty(subqueries):
|
95
|
+
logger.info("No new subqueries to search online")
|
96
|
+
yield response_dict
|
97
|
+
return
|
98
|
+
|
99
|
+
logger.info(f"🌐 Searching the Internet for {subqueries}")
|
100
|
+
if send_status_func:
|
101
|
+
subqueries_str = "\n- " + "\n- ".join(subqueries)
|
102
|
+
async for event in send_status_func(f"**Searching the Internet for**: {subqueries_str}"):
|
103
|
+
yield {ChatEvent.STATUS: event}
|
104
|
+
|
105
|
+
if SERPER_DEV_API_KEY:
|
106
|
+
search_func = search_with_serper
|
107
|
+
elif JINA_API_KEY:
|
108
|
+
search_func = search_with_jina
|
109
|
+
else:
|
110
|
+
search_func = search_with_searxng
|
111
|
+
|
112
|
+
with timer(f"Internet searches for {subqueries} took", logger):
|
113
|
+
search_tasks = [search_func(subquery, location) for subquery in subqueries]
|
114
|
+
search_results = await asyncio.gather(*search_tasks)
|
115
|
+
response_dict = {subquery: search_result for subquery, search_result in search_results}
|
116
|
+
|
117
|
+
# Gather distinct web pages from organic results for subqueries without an instant answer.
|
118
|
+
webpages: Dict[str, Dict] = {}
|
119
|
+
for subquery in response_dict:
|
120
|
+
if "answerBox" in response_dict[subquery]:
|
121
|
+
continue
|
122
|
+
for idx, organic in enumerate(response_dict[subquery].get("organic", [])):
|
123
|
+
link = organic.get("link")
|
124
|
+
if link in webpages and idx < max_webpages_to_read:
|
125
|
+
webpages[link]["queries"].add(subquery)
|
126
|
+
# Content of web pages is directly available when Jina is used for search.
|
127
|
+
elif idx < max_webpages_to_read:
|
128
|
+
webpages[link] = {"queries": {subquery}, "content": organic.get("content")}
|
129
|
+
# Only keep webpage content for up to max_webpages_to_read organic results.
|
130
|
+
if idx >= max_webpages_to_read and not is_none_or_empty(organic.get("content")):
|
131
|
+
organic["content"] = None
|
132
|
+
response_dict[subquery]["organic"][idx] = organic
|
133
|
+
|
134
|
+
# Read, extract relevant info from the retrieved web pages
|
135
|
+
if webpages:
|
136
|
+
logger.info(f"Reading web pages at: {webpages.keys()}")
|
137
|
+
if send_status_func:
|
138
|
+
webpage_links_str = "\n- " + "\n- ".join(webpages.keys())
|
139
|
+
async for event in send_status_func(f"**Reading web pages**: {webpage_links_str}"):
|
140
|
+
yield {ChatEvent.STATUS: event}
|
141
|
+
tasks = [
|
142
|
+
read_webpage_and_extract_content(
|
143
|
+
data["queries"], link, data.get("content"), user=user, agent=agent, tracer=tracer
|
144
|
+
)
|
145
|
+
for link, data in webpages.items()
|
146
|
+
]
|
147
|
+
results = await asyncio.gather(*tasks)
|
148
|
+
|
149
|
+
# Collect extracted info from the retrieved web pages
|
150
|
+
for subqueries, url, webpage_extract in results:
|
151
|
+
if webpage_extract is not None:
|
152
|
+
response_dict[subqueries.pop()]["webpages"] = {"link": url, "snippet": webpage_extract}
|
153
|
+
|
154
|
+
yield response_dict
|
155
|
+
|
156
|
+
|
157
|
+
async def search_with_searxng(query: str, location: LocationData) -> Tuple[str, Dict[str, List[Dict]]]:
|
158
|
+
"""Search using local SearXNG instance."""
|
159
|
+
# Use environment variable or default to localhost
|
160
|
+
searxng_url = os.getenv("KHOJ_SEARXNG_URL", "http://localhost:42113")
|
161
|
+
search_url = f"{searxng_url}/search"
|
162
|
+
country_code = location.country_code.lower() if location and location.country_code else "us"
|
163
|
+
|
164
|
+
params = {"q": query, "format": "html", "language": "en", "country": country_code, "categories": "general"}
|
165
|
+
|
166
|
+
async with aiohttp.ClientSession() as session:
|
167
|
+
try:
|
168
|
+
async with session.get(search_url, params=params) as response:
|
169
|
+
if response.status != 200:
|
170
|
+
logger.error(f"SearXNG search failed to call {searxng_url}: {await response.text()}")
|
171
|
+
return query, {}
|
172
|
+
|
173
|
+
html_content = await response.text()
|
174
|
+
|
175
|
+
soup = BeautifulSoup(html_content, "html.parser")
|
176
|
+
organic_results = []
|
177
|
+
|
178
|
+
for result in soup.find_all("article", class_="result"):
|
179
|
+
title_elem = result.find("a", rel="noreferrer")
|
180
|
+
if title_elem:
|
181
|
+
title = title_elem.text.strip()
|
182
|
+
link = title_elem["href"]
|
183
|
+
|
184
|
+
description_elem = result.find("p", class_="content")
|
185
|
+
description = description_elem.text.strip() if description_elem else None
|
186
|
+
|
187
|
+
organic_results.append({"title": title, "link": link, "description": description})
|
188
|
+
|
189
|
+
extracted_search_result = {"organic": organic_results}
|
190
|
+
|
191
|
+
return query, extracted_search_result
|
192
|
+
|
193
|
+
except Exception as e:
|
194
|
+
logger.error(f"Error searching with SearXNG: {str(e)}")
|
195
|
+
return query, {}
|
196
|
+
|
197
|
+
|
198
|
+
async def search_with_serper(query: str, location: LocationData) -> Tuple[str, Dict[str, List[Dict]]]:
|
199
|
+
country_code = location.country_code.lower() if location and location.country_code else "us"
|
200
|
+
payload = json.dumps({"q": query, "gl": country_code})
|
201
|
+
headers = {"X-API-KEY": SERPER_DEV_API_KEY, "Content-Type": "application/json"}
|
202
|
+
|
203
|
+
async with aiohttp.ClientSession() as session:
|
204
|
+
async with session.post(SERPER_DEV_URL, headers=headers, data=payload) as response:
|
205
|
+
if response.status != 200:
|
206
|
+
logger.error(await response.text())
|
207
|
+
return query, {}
|
208
|
+
json_response = await response.json()
|
209
|
+
extraction_fields = ["organic", "answerBox", "peopleAlsoAsk", "knowledgeGraph"]
|
210
|
+
extracted_search_result = {
|
211
|
+
field: json_response[field]
|
212
|
+
for field in extraction_fields
|
213
|
+
if not is_none_or_empty(json_response.get(field))
|
214
|
+
}
|
215
|
+
|
216
|
+
return query, extracted_search_result
|
217
|
+
|
218
|
+
|
219
|
+
async def read_webpages(
|
220
|
+
query: str,
|
221
|
+
conversation_history: dict,
|
222
|
+
location: LocationData,
|
223
|
+
user: KhojUser,
|
224
|
+
send_status_func: Optional[Callable] = None,
|
225
|
+
query_images: List[str] = None,
|
226
|
+
agent: Agent = None,
|
227
|
+
max_webpages_to_read: int = DEFAULT_MAX_WEBPAGES_TO_READ,
|
228
|
+
query_files: str = None,
|
229
|
+
tracer: dict = {},
|
230
|
+
):
|
231
|
+
"Infer web pages to read from the query and extract relevant information from them"
|
232
|
+
logger.info(f"Inferring web pages to read")
|
233
|
+
urls = await infer_webpage_urls(
|
234
|
+
query,
|
235
|
+
conversation_history,
|
236
|
+
location,
|
237
|
+
user,
|
238
|
+
query_images,
|
239
|
+
agent=agent,
|
240
|
+
query_files=query_files,
|
241
|
+
tracer=tracer,
|
242
|
+
)
|
243
|
+
|
244
|
+
# Get the top 10 web pages to read
|
245
|
+
urls = urls[:max_webpages_to_read]
|
246
|
+
|
247
|
+
logger.info(f"Reading web pages at: {urls}")
|
248
|
+
if send_status_func:
|
249
|
+
webpage_links_str = "\n- " + "\n- ".join(list(urls))
|
250
|
+
async for event in send_status_func(f"**Reading web pages**: {webpage_links_str}"):
|
251
|
+
yield {ChatEvent.STATUS: event}
|
252
|
+
tasks = [read_webpage_and_extract_content({query}, url, user=user, agent=agent, tracer=tracer) for url in urls]
|
253
|
+
results = await asyncio.gather(*tasks)
|
254
|
+
|
255
|
+
response: Dict[str, Dict] = defaultdict(dict)
|
256
|
+
response[query]["webpages"] = [
|
257
|
+
{"query": qs.pop(), "link": url, "snippet": extract} for qs, url, extract in results if extract is not None
|
258
|
+
]
|
259
|
+
yield response
|
260
|
+
|
261
|
+
|
262
|
+
async def read_webpage(
|
263
|
+
url, scraper_type=None, api_key=None, api_url=None, subqueries=None, agent=None
|
264
|
+
) -> Tuple[str | None, str | None]:
|
265
|
+
if scraper_type == WebScraper.WebScraperType.FIRECRAWL and FIRECRAWL_USE_LLM_EXTRACT:
|
266
|
+
return None, await query_webpage_with_firecrawl(url, subqueries, api_key, api_url, agent)
|
267
|
+
elif scraper_type == WebScraper.WebScraperType.FIRECRAWL:
|
268
|
+
return await read_webpage_with_firecrawl(url, api_key, api_url), None
|
269
|
+
elif scraper_type == WebScraper.WebScraperType.OLOSTEP:
|
270
|
+
return await read_webpage_with_olostep(url, api_key, api_url), None
|
271
|
+
elif scraper_type == WebScraper.WebScraperType.JINA:
|
272
|
+
return await read_webpage_with_jina(url, api_key, api_url), None
|
273
|
+
else:
|
274
|
+
return await read_webpage_at_url(url), None
|
275
|
+
|
276
|
+
|
277
|
+
async def read_webpage_and_extract_content(
|
278
|
+
subqueries: set[str],
|
279
|
+
url: str,
|
280
|
+
content: str = None,
|
281
|
+
user: KhojUser = None,
|
282
|
+
agent: Agent = None,
|
283
|
+
tracer: dict = {},
|
284
|
+
) -> Tuple[set[str], str, Union[None, str]]:
|
285
|
+
# Select the web scrapers to use for reading the web page
|
286
|
+
web_scrapers = await ConversationAdapters.aget_enabled_webscrapers()
|
287
|
+
# Only use the direct web scraper for internal URLs
|
288
|
+
if is_internal_url(url):
|
289
|
+
web_scrapers = [scraper for scraper in web_scrapers if scraper.type == WebScraper.WebScraperType.DIRECT]
|
290
|
+
|
291
|
+
# Fallback through enabled web scrapers until we successfully read the web page
|
292
|
+
extracted_info = None
|
293
|
+
for scraper in web_scrapers:
|
294
|
+
try:
|
295
|
+
# Read the web page
|
296
|
+
if is_none_or_empty(content):
|
297
|
+
with timer(f"Reading web page with {scraper.type} at '{url}' took", logger, log_level=logging.INFO):
|
298
|
+
content, extracted_info = await read_webpage(
|
299
|
+
url, scraper.type, scraper.api_key, scraper.api_url, subqueries, agent
|
300
|
+
)
|
301
|
+
|
302
|
+
# Extract relevant information from the web page
|
303
|
+
if is_none_or_empty(extracted_info):
|
304
|
+
with timer(f"Extracting relevant information from web page at '{url}' took", logger):
|
305
|
+
extracted_info = await extract_relevant_info(
|
306
|
+
subqueries, content, user=user, agent=agent, tracer=tracer
|
307
|
+
)
|
308
|
+
|
309
|
+
# If we successfully extracted information, break the loop
|
310
|
+
if not is_none_or_empty(extracted_info):
|
311
|
+
break
|
312
|
+
except Exception as e:
|
313
|
+
logger.warning(f"Failed to read web page with {scraper.type} at '{url}' with {e}")
|
314
|
+
# If this is the last web scraper in the list, log an error
|
315
|
+
if scraper.name == web_scrapers[-1].name:
|
316
|
+
logger.error(f"All web scrapers failed for '{url}'")
|
317
|
+
|
318
|
+
return subqueries, url, extracted_info
|
319
|
+
|
320
|
+
|
321
|
+
async def read_webpage_at_url(web_url: str) -> str:
|
322
|
+
headers = {
|
323
|
+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
|
324
|
+
}
|
325
|
+
|
326
|
+
async with aiohttp.ClientSession() as session:
|
327
|
+
async with session.get(web_url, headers=headers, timeout=30) as response:
|
328
|
+
response.raise_for_status()
|
329
|
+
html = await response.text()
|
330
|
+
parsed_html = BeautifulSoup(html, "html.parser")
|
331
|
+
body = parsed_html.body.get_text(separator="\n", strip=True)
|
332
|
+
return markdownify(body)
|
333
|
+
|
334
|
+
|
335
|
+
async def read_webpage_with_olostep(web_url: str, api_key: str, api_url: str) -> str:
|
336
|
+
headers = {"Authorization": f"Bearer {api_key}"}
|
337
|
+
web_scraping_params: Dict[str, Union[str, int, bool]] = OLOSTEP_QUERY_PARAMS.copy() # type: ignore
|
338
|
+
web_scraping_params["url"] = web_url
|
339
|
+
|
340
|
+
async with aiohttp.ClientSession() as session:
|
341
|
+
async with session.get(api_url, params=web_scraping_params, headers=headers) as response:
|
342
|
+
response.raise_for_status()
|
343
|
+
response_json = await response.json()
|
344
|
+
return response_json["markdown_content"]
|
345
|
+
|
346
|
+
|
347
|
+
async def read_webpage_with_jina(web_url: str, api_key: str, api_url: str) -> str:
|
348
|
+
jina_reader_api_url = f"{api_url}/{web_url}"
|
349
|
+
headers = {"Accept": "application/json", "X-Timeout": "30"}
|
350
|
+
if api_key:
|
351
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
352
|
+
|
353
|
+
async with aiohttp.ClientSession() as session:
|
354
|
+
async with session.get(jina_reader_api_url, headers=headers) as response:
|
355
|
+
response.raise_for_status()
|
356
|
+
response_json = await response.json()
|
357
|
+
return response_json["data"]["content"]
|
358
|
+
|
359
|
+
|
360
|
+
async def read_webpage_with_firecrawl(web_url: str, api_key: str, api_url: str) -> str:
|
361
|
+
firecrawl_api_url = f"{api_url}/v1/scrape"
|
362
|
+
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
|
363
|
+
params = {"url": web_url, "formats": ["markdown"], "excludeTags": ["script", ".ad"]}
|
364
|
+
|
365
|
+
async with aiohttp.ClientSession() as session:
|
366
|
+
async with session.post(firecrawl_api_url, json=params, headers=headers) as response:
|
367
|
+
response.raise_for_status()
|
368
|
+
response_json = await response.json()
|
369
|
+
return response_json["data"]["markdown"]
|
370
|
+
|
371
|
+
|
372
|
+
async def query_webpage_with_firecrawl(
|
373
|
+
web_url: str, queries: set[str], api_key: str, api_url: str, agent: Agent = None
|
374
|
+
) -> str:
|
375
|
+
firecrawl_api_url = f"{api_url}/v1/scrape"
|
376
|
+
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
|
377
|
+
schema = {
|
378
|
+
"type": "object",
|
379
|
+
"properties": {
|
380
|
+
"relevant_extract": {"type": "string"},
|
381
|
+
},
|
382
|
+
"required": [
|
383
|
+
"relevant_extract",
|
384
|
+
],
|
385
|
+
}
|
386
|
+
|
387
|
+
personality_context = (
|
388
|
+
prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
|
389
|
+
)
|
390
|
+
system_prompt = f"""
|
391
|
+
{prompts.system_prompt_extract_relevant_information}
|
392
|
+
|
393
|
+
{personality_context}
|
394
|
+
User Query: {", ".join(queries)}
|
395
|
+
|
396
|
+
Collate only relevant information from the website to answer the target query and in the provided JSON schema.
|
397
|
+
""".strip()
|
398
|
+
|
399
|
+
params = {"url": web_url, "formats": ["extract"], "extract": {"systemPrompt": system_prompt, "schema": schema}}
|
400
|
+
|
401
|
+
async with aiohttp.ClientSession() as session:
|
402
|
+
async with session.post(firecrawl_api_url, json=params, headers=headers) as response:
|
403
|
+
response.raise_for_status()
|
404
|
+
response_json = await response.json()
|
405
|
+
return response_json["data"]["extract"]["relevant_extract"]
|
406
|
+
|
407
|
+
|
408
|
+
async def search_with_jina(query: str, location: LocationData) -> Tuple[str, Dict[str, List[Dict]]]:
|
409
|
+
encoded_query = urllib.parse.quote(query)
|
410
|
+
jina_search_api_url = f"{JINA_SEARCH_API_URL}/{encoded_query}"
|
411
|
+
headers = {"Accept": "application/json"}
|
412
|
+
|
413
|
+
# First check for jina scraper configuration in database
|
414
|
+
default_jina_scraper = (
|
415
|
+
await ServerChatSettings.objects.filter()
|
416
|
+
.prefetch_related("web_scraper")
|
417
|
+
.filter(web_scraper__type=WebScraper.WebScraperType.JINA)
|
418
|
+
.afirst()
|
419
|
+
)
|
420
|
+
if default_jina_scraper and default_jina_scraper.web_scraper:
|
421
|
+
jina_scraper = default_jina_scraper.web_scraper
|
422
|
+
else:
|
423
|
+
# Fallback to first configured Jina scraper in DB if no server settings
|
424
|
+
jina_scraper = await WebScraper.objects.filter(type=WebScraper.WebScraperType.JINA).afirst()
|
425
|
+
|
426
|
+
# Get API key from DB scraper config or environment variable
|
427
|
+
api_key = jina_scraper.api_key if jina_scraper and jina_scraper.api_key else JINA_API_KEY
|
428
|
+
|
429
|
+
if api_key:
|
430
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
431
|
+
|
432
|
+
async with aiohttp.ClientSession() as session:
|
433
|
+
async with session.get(jina_search_api_url, headers=headers) as response:
|
434
|
+
if response.status != 200:
|
435
|
+
error_text = await response.text()
|
436
|
+
logger.error(f"Jina search failed: {error_text}")
|
437
|
+
return query, {}
|
438
|
+
response_json = await response.json()
|
439
|
+
parsed_response = [
|
440
|
+
{
|
441
|
+
"title": item["title"],
|
442
|
+
"content": item.get("content"),
|
443
|
+
# rename description -> snippet for consistency
|
444
|
+
"snippet": item["description"],
|
445
|
+
# rename url -> link for consistency
|
446
|
+
"link": item["url"],
|
447
|
+
}
|
448
|
+
for item in response_json["data"]
|
449
|
+
]
|
450
|
+
return query, {"organic": parsed_response}
|
451
|
+
|
452
|
+
|
453
|
+
def deduplicate_organic_results(online_results: dict) -> dict:
|
454
|
+
"""Deduplicate organic search results based on links across all queries."""
|
455
|
+
# Keep track of seen links to filter out duplicates across queries
|
456
|
+
seen_links = set()
|
457
|
+
deduplicated_results = {}
|
458
|
+
|
459
|
+
# Process each query's results
|
460
|
+
for query, results in online_results.items():
|
461
|
+
# Filter organic results keeping only first occurrence of each link
|
462
|
+
filtered_organic = []
|
463
|
+
for result in results.get("organic", []):
|
464
|
+
link = result.get("link")
|
465
|
+
if link and link not in seen_links:
|
466
|
+
seen_links.add(link)
|
467
|
+
filtered_organic.append(result)
|
468
|
+
|
469
|
+
# Update results with deduplicated organic entries
|
470
|
+
deduplicated_results[query] = {**results, "organic": filtered_organic}
|
471
|
+
|
472
|
+
return deduplicated_results
|
@@ -0,0 +1,179 @@
|
|
1
|
+
import base64
|
2
|
+
import datetime
|
3
|
+
import logging
|
4
|
+
import mimetypes
|
5
|
+
import os
|
6
|
+
from pathlib import Path
|
7
|
+
from typing import Any, Callable, List, NamedTuple, Optional
|
8
|
+
|
9
|
+
import aiohttp
|
10
|
+
|
11
|
+
from khoj.database.adapters import FileObjectAdapters
|
12
|
+
from khoj.database.models import Agent, FileObject, KhojUser
|
13
|
+
from khoj.processor.conversation import prompts
|
14
|
+
from khoj.processor.conversation.utils import (
|
15
|
+
ChatEvent,
|
16
|
+
clean_code_python,
|
17
|
+
construct_chat_history,
|
18
|
+
load_complex_json,
|
19
|
+
)
|
20
|
+
from khoj.routers.helpers import send_message_to_model_wrapper
|
21
|
+
from khoj.utils.helpers import is_none_or_empty, timer, truncate_code_context
|
22
|
+
from khoj.utils.rawconfig import LocationData
|
23
|
+
|
24
|
+
logger = logging.getLogger(__name__)
|
25
|
+
|
26
|
+
|
27
|
+
SANDBOX_URL = os.getenv("KHOJ_TERRARIUM_URL", "http://localhost:8080")
|
28
|
+
|
29
|
+
|
30
|
+
class GeneratedCode(NamedTuple):
|
31
|
+
code: str
|
32
|
+
input_files: List[str]
|
33
|
+
input_links: List[str]
|
34
|
+
|
35
|
+
|
36
|
+
async def run_code(
|
37
|
+
query: str,
|
38
|
+
conversation_history: dict,
|
39
|
+
context: str,
|
40
|
+
location_data: LocationData,
|
41
|
+
user: KhojUser,
|
42
|
+
send_status_func: Optional[Callable] = None,
|
43
|
+
query_images: List[str] = None,
|
44
|
+
agent: Agent = None,
|
45
|
+
sandbox_url: str = SANDBOX_URL,
|
46
|
+
query_files: str = None,
|
47
|
+
tracer: dict = {},
|
48
|
+
):
|
49
|
+
# Generate Code
|
50
|
+
if send_status_func:
|
51
|
+
async for event in send_status_func(f"**Generate code snippet** for {query}"):
|
52
|
+
yield {ChatEvent.STATUS: event}
|
53
|
+
try:
|
54
|
+
with timer("Chat actor: Generate programs to execute", logger):
|
55
|
+
generated_code = await generate_python_code(
|
56
|
+
query,
|
57
|
+
conversation_history,
|
58
|
+
context,
|
59
|
+
location_data,
|
60
|
+
user,
|
61
|
+
query_images,
|
62
|
+
agent,
|
63
|
+
tracer,
|
64
|
+
query_files,
|
65
|
+
)
|
66
|
+
except Exception as e:
|
67
|
+
raise ValueError(f"Failed to generate code for {query} with error: {e}")
|
68
|
+
|
69
|
+
# Prepare Input Data
|
70
|
+
input_data = []
|
71
|
+
user_input_files: List[FileObject] = []
|
72
|
+
for input_file in generated_code.input_files:
|
73
|
+
user_input_files += await FileObjectAdapters.aget_file_objects_by_name(user, input_file)
|
74
|
+
for f in user_input_files:
|
75
|
+
input_data.append(
|
76
|
+
{
|
77
|
+
"filename": os.path.basename(f.file_name),
|
78
|
+
"b64_data": base64.b64encode(f.raw_text.encode("utf-8")).decode("utf-8"),
|
79
|
+
}
|
80
|
+
)
|
81
|
+
|
82
|
+
# Run Code
|
83
|
+
if send_status_func:
|
84
|
+
async for event in send_status_func(f"**Running code snippet**"):
|
85
|
+
yield {ChatEvent.STATUS: event}
|
86
|
+
try:
|
87
|
+
with timer("Chat actor: Execute generated program", logger, log_level=logging.INFO):
|
88
|
+
result = await execute_sandboxed_python(generated_code.code, input_data, sandbox_url)
|
89
|
+
code = result.pop("code")
|
90
|
+
cleaned_result = truncate_code_context({"cleaned": {"results": result}})["cleaned"]["results"]
|
91
|
+
logger.info(f"Executed Code\n----\n{code}\n----\nResult\n----\n{cleaned_result}\n----")
|
92
|
+
yield {query: {"code": code, "results": result}}
|
93
|
+
except Exception as e:
|
94
|
+
raise ValueError(f"Failed to run code for {query} with error: {e}")
|
95
|
+
|
96
|
+
|
97
|
+
async def generate_python_code(
|
98
|
+
q: str,
|
99
|
+
conversation_history: dict,
|
100
|
+
context: str,
|
101
|
+
location_data: LocationData,
|
102
|
+
user: KhojUser,
|
103
|
+
query_images: list[str] = None,
|
104
|
+
agent: Agent = None,
|
105
|
+
tracer: dict = {},
|
106
|
+
query_files: str = None,
|
107
|
+
) -> GeneratedCode:
|
108
|
+
location = f"{location_data}" if location_data else "Unknown"
|
109
|
+
username = prompts.user_name.format(name=user.get_full_name()) if user.get_full_name() else ""
|
110
|
+
chat_history = construct_chat_history(conversation_history)
|
111
|
+
|
112
|
+
utc_date = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d")
|
113
|
+
personality_context = (
|
114
|
+
prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
|
115
|
+
)
|
116
|
+
|
117
|
+
code_generation_prompt = prompts.python_code_generation_prompt.format(
|
118
|
+
current_date=utc_date,
|
119
|
+
query=q,
|
120
|
+
chat_history=chat_history,
|
121
|
+
context=context,
|
122
|
+
location=location,
|
123
|
+
username=username,
|
124
|
+
personality_context=personality_context,
|
125
|
+
)
|
126
|
+
|
127
|
+
response = await send_message_to_model_wrapper(
|
128
|
+
code_generation_prompt,
|
129
|
+
query_images=query_images,
|
130
|
+
response_type="json_object",
|
131
|
+
user=user,
|
132
|
+
tracer=tracer,
|
133
|
+
query_files=query_files,
|
134
|
+
)
|
135
|
+
|
136
|
+
# Validate that the response is a non-empty, JSON-serializable list
|
137
|
+
response = load_complex_json(response)
|
138
|
+
code = response.get("code", "").strip()
|
139
|
+
input_files = response.get("input_files", [])
|
140
|
+
input_links = response.get("input_links", [])
|
141
|
+
|
142
|
+
if not isinstance(code, str) or is_none_or_empty(code):
|
143
|
+
raise ValueError
|
144
|
+
return GeneratedCode(code, input_files, input_links)
|
145
|
+
|
146
|
+
|
147
|
+
async def execute_sandboxed_python(code: str, input_data: list[dict], sandbox_url: str = SANDBOX_URL) -> dict[str, Any]:
|
148
|
+
"""
|
149
|
+
Takes code to run as a string and calls the terrarium API to execute it.
|
150
|
+
Returns the result of the code execution as a dictionary.
|
151
|
+
|
152
|
+
Reference data i/o format based on Terrarium example client code at:
|
153
|
+
https://github.com/cohere-ai/cohere-terrarium/blob/main/example-clients/python/terrarium_client.py
|
154
|
+
"""
|
155
|
+
headers = {"Content-Type": "application/json"}
|
156
|
+
cleaned_code = clean_code_python(code)
|
157
|
+
data = {"code": cleaned_code, "files": input_data}
|
158
|
+
|
159
|
+
async with aiohttp.ClientSession() as session:
|
160
|
+
async with session.post(sandbox_url, json=data, headers=headers) as response:
|
161
|
+
if response.status == 200:
|
162
|
+
result: dict[str, Any] = await response.json()
|
163
|
+
result["code"] = cleaned_code
|
164
|
+
# Store decoded output files
|
165
|
+
result["output_files"] = result.get("output_files", [])
|
166
|
+
for output_file in result["output_files"]:
|
167
|
+
# Decode text files as UTF-8
|
168
|
+
if mimetypes.guess_type(output_file["filename"])[0].startswith("text/") or Path(
|
169
|
+
output_file["filename"]
|
170
|
+
).suffix in [".org", ".md", ".json"]:
|
171
|
+
output_file["b64_data"] = base64.b64decode(output_file["b64_data"]).decode("utf-8")
|
172
|
+
return result
|
173
|
+
else:
|
174
|
+
return {
|
175
|
+
"code": cleaned_code,
|
176
|
+
"success": False,
|
177
|
+
"std_err": f"Failed to execute code with {response.status}",
|
178
|
+
"output_files": [],
|
179
|
+
}
|
khoj/routers/__init__.py
ADDED
File without changes
|