khoj 1.33.3.dev32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/__init__.py +0 -0
- khoj/app/README.md +94 -0
- khoj/app/__init__.py +0 -0
- khoj/app/asgi.py +16 -0
- khoj/app/settings.py +218 -0
- khoj/app/urls.py +25 -0
- khoj/configure.py +452 -0
- khoj/database/__init__.py +0 -0
- khoj/database/adapters/__init__.py +1821 -0
- khoj/database/admin.py +417 -0
- khoj/database/apps.py +6 -0
- khoj/database/management/__init__.py +0 -0
- khoj/database/management/commands/__init__.py +0 -0
- khoj/database/management/commands/change_default_model.py +116 -0
- khoj/database/management/commands/change_generated_images_url.py +61 -0
- khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
- khoj/database/migrations/0001_khojuser.py +98 -0
- khoj/database/migrations/0002_googleuser.py +32 -0
- khoj/database/migrations/0003_vector_extension.py +10 -0
- khoj/database/migrations/0004_content_types_and_more.py +181 -0
- khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
- khoj/database/migrations/0006_embeddingsdates.py +33 -0
- khoj/database/migrations/0007_add_conversation.py +27 -0
- khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
- khoj/database/migrations/0009_khojapiuser.py +24 -0
- khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
- khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
- khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
- khoj/database/migrations/0012_entry_file_source.py +21 -0
- khoj/database/migrations/0013_subscription.py +37 -0
- khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
- khoj/database/migrations/0015_alter_subscription_user.py +21 -0
- khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
- khoj/database/migrations/0017_searchmodel.py +32 -0
- khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
- khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
- khoj/database/migrations/0020_reflectivequestion.py +36 -0
- khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
- khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
- khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
- khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
- khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
- khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
- khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
- khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
- khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
- khoj/database/migrations/0029_userrequests.py +27 -0
- khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
- khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
- khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
- khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
- khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
- khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
- khoj/database/migrations/0035_processlock.py +26 -0
- khoj/database/migrations/0036_alter_processlock_name.py +19 -0
- khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
- khoj/database/migrations/0036_publicconversation.py +42 -0
- khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
- khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
- khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
- khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
- khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
- khoj/database/migrations/0040_alter_processlock_name.py +26 -0
- khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
- khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
- khoj/database/migrations/0042_serverchatsettings.py +46 -0
- khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
- khoj/database/migrations/0044_conversation_file_filters.py +17 -0
- khoj/database/migrations/0045_fileobject.py +37 -0
- khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
- khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
- khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
- khoj/database/migrations/0049_datastore.py +38 -0
- khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
- khoj/database/migrations/0050_alter_processlock_name.py +25 -0
- khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
- khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
- khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
- khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
- khoj/database/migrations/0055_alter_agent_style_icon.py +37 -0
- khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
- khoj/database/migrations/0056_searchmodelconfig_cross_encoder_model_config.py +17 -0
- khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
- khoj/database/migrations/0057_remove_serverchatsettings_default_model_and_more.py +51 -0
- khoj/database/migrations/0058_alter_chatmodeloptions_chat_model.py +17 -0
- khoj/database/migrations/0059_searchmodelconfig_bi_encoder_confidence_threshold.py +17 -0
- khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
- khoj/database/migrations/0061_alter_chatmodeloptions_model_type.py +26 -0
- khoj/database/migrations/0061_alter_texttoimagemodelconfig_model_type.py +21 -0
- khoj/database/migrations/0062_merge_20240913_0222.py +14 -0
- khoj/database/migrations/0063_conversation_temp_id.py +36 -0
- khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +86 -0
- khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
- khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
- khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
- khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
- khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
- khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
- khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
- khoj/database/migrations/0072_entry_search_model.py +24 -0
- khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
- khoj/database/migrations/0074_alter_conversation_title.py +17 -0
- khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +85 -0
- khoj/database/migrations/0076_rename_openaiprocessorconversationconfig_aimodelapi_and_more.py +26 -0
- khoj/database/migrations/0077_chatmodel_alter_agent_chat_model_and_more.py +62 -0
- khoj/database/migrations/0078_khojuser_email_verification_code_expiry.py +17 -0
- khoj/database/migrations/__init__.py +0 -0
- khoj/database/models/__init__.py +725 -0
- khoj/database/tests.py +3 -0
- khoj/interface/compiled/404/index.html +1 -0
- khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_buildManifest.js +1 -0
- khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_ssgManifest.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1010-8f39bb4648b5ba10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/182-f1c48a203dc91e0e.js +20 -0
- khoj/interface/compiled/_next/static/chunks/1915-d3c36ad6ce697ce7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/2117-165ef4747a5b836b.js +2 -0
- khoj/interface/compiled/_next/static/chunks/2581-455000f8aeb08fc3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3727.dcea8f2193111552.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3789-a09e37a819171a9d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/4124-6c28322ce218d2d5.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5427-b52d95253e692bfa.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5473-b1cf56dedac6577a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5477-0bbddb79c25a54a7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6065-64db9ad305ba0bcd.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6293-469dd16402ea8a6f.js +3 -0
- khoj/interface/compiled/_next/static/chunks/688-b5b4391bbc0376f1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8667-b6bf63c72b2d76eb.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9259-1172dbaca0515237.js +1 -0
- khoj/interface/compiled/_next/static/chunks/94ca1967.1d9b42d929a1ee8c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9597.83583248dfbf6e73.js +1 -0
- khoj/interface/compiled/_next/static/chunks/964ecbae.51d6faf8801d15e6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9665-391df1e5c51c960a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/_not-found/page-a834eddae3e235df.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/page-28ce086a1129bca2.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/layout-1fe1537449f43496.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/page-bf365a60829d347f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-0e476e57eb2015e3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/layout-30e7fda7262713ce.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/page-a5515ea71aec5ef0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/page-9140541e67ea307d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/settings/layout-d09d6510a45cd4bd.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/settings/page-951ba40b5b94b23a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1beb80d8d741c932.js +1 -0
- khoj/interface/compiled/_next/static/chunks/d3ac728e-44ebd2a0c99b12a0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/fd9d1056-4482b99a36fd1673.js +1 -0
- khoj/interface/compiled/_next/static/chunks/framework-8e0e0f4a6b83a956.js +1 -0
- khoj/interface/compiled/_next/static/chunks/main-app-de1f09df97a3cfc7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/main-db4bfac6b0a8d00b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/pages/_app-3c9ca398d360b709.js +1 -0
- khoj/interface/compiled/_next/static/chunks/pages/_error-cf5ca766ac8f493f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/webpack-a03962458328b163.js +1 -0
- khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +1 -0
- khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
- khoj/interface/compiled/_next/static/css/4e4e6a4a1c920d06.css +1 -0
- khoj/interface/compiled/_next/static/css/8d02837c730f8d13.css +25 -0
- khoj/interface/compiled/_next/static/css/8e6a3ca11a60b189.css +1 -0
- khoj/interface/compiled/_next/static/css/9c164d9727dd8092.css +1 -0
- khoj/interface/compiled/_next/static/css/dac88c17aaee5fcf.css +1 -0
- khoj/interface/compiled/_next/static/css/df4b47a2d0d85eae.css +1 -0
- khoj/interface/compiled/_next/static/css/e4eb883b5265d372.css +1 -0
- khoj/interface/compiled/_next/static/media/1d8a05b60287ae6c-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/6f22fce21a7c433c-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/77c207b095007c34-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/82ef96de0e8f4d8c-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.1608a09b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.4aafdb68.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.a79f1c31.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.b6770918.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.cce5b8ec.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.ec17d132.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.07ef19e7.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.55fac258.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.dad44a7f.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.9f256b85.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.b18f59e1.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.d42a5579.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.7c187121.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.d3c882a6.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.ed38e79f.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.b74a1a8b.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.c3fb5ac2.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.d181c465.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.6f2bb1df.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.70d8b0a5.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.e3f82f9d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.47373d1e.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.8916142b.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.9024d815.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.0462f03b.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.7f51fe03.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.b7f8fe9b.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.572d331f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.a879cf83.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.f1035d8d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.5295ba48.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.939bc644.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.f28c23ac.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.8c5b5494.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.94e1e8dc.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.bf59d231.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.3b1e59b3.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.7c9bc82b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.b4c20c84.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.74048478.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.ba21ed5f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.d4d7ba48.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.03e9641d.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.07505710.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.fe9cbbe1.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.e1e279cb.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.eae34984.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.fabc004a.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.57727022.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.5916a24f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.d6b476ec.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.9acaf01c.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.a144ef58.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.b4230e7e.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.10d95fd3.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.7a996c9d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.fbccdabe.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.6258592b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.a8709e36.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.d97aaf4a.ttf +0 -0
- khoj/interface/compiled/_next/static/media/a6ecd16fa044d500-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/bd82c78e5b7b3fe9-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/c32c8052c071fc42-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/c4250770ab8708b6-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/flags.3afdda2f.webp +0 -0
- khoj/interface/compiled/_next/static/media/flags@2x.5fbe9fc1.webp +0 -0
- khoj/interface/compiled/_next/static/media/globe.98e105ca.webp +0 -0
- khoj/interface/compiled/_next/static/media/globe@2x.974df6f8.webp +0 -0
- khoj/interface/compiled/agents/index.html +1 -0
- khoj/interface/compiled/agents/index.txt +7 -0
- khoj/interface/compiled/agents.svg +6 -0
- khoj/interface/compiled/assets/icons/khoj_lantern.ico +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern.svg +100 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_1200x1200.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_128x128.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_128x128_dark.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_256x256.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_512x512.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_logomarktype_1200x630.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-browse-draw-sample.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-plain-chat-sample.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-remember-plan-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-browse-draw-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-plain-chat-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-remember-plan-sample.png +0 -0
- khoj/interface/compiled/automation.svg +37 -0
- khoj/interface/compiled/automations/index.html +1 -0
- khoj/interface/compiled/automations/index.txt +8 -0
- khoj/interface/compiled/chat/index.html +1 -0
- khoj/interface/compiled/chat/index.txt +7 -0
- khoj/interface/compiled/chat.svg +24 -0
- khoj/interface/compiled/close.svg +5 -0
- khoj/interface/compiled/copy-button-success.svg +6 -0
- khoj/interface/compiled/copy-button.svg +5 -0
- khoj/interface/compiled/index.html +1 -0
- khoj/interface/compiled/index.txt +7 -0
- khoj/interface/compiled/khoj.webmanifest +76 -0
- khoj/interface/compiled/logo.svg +24 -0
- khoj/interface/compiled/search/index.html +1 -0
- khoj/interface/compiled/search/index.txt +7 -0
- khoj/interface/compiled/send.svg +1 -0
- khoj/interface/compiled/settings/index.html +1 -0
- khoj/interface/compiled/settings/index.txt +9 -0
- khoj/interface/compiled/share/chat/index.html +1 -0
- khoj/interface/compiled/share/chat/index.txt +7 -0
- khoj/interface/compiled/share.svg +8 -0
- khoj/interface/compiled/thumbs-down.svg +6 -0
- khoj/interface/compiled/thumbs-up.svg +6 -0
- khoj/interface/email/feedback.html +34 -0
- khoj/interface/email/magic_link.html +40 -0
- khoj/interface/email/task.html +37 -0
- khoj/interface/email/welcome.html +90 -0
- khoj/interface/web/.well-known/assetlinks.json +11 -0
- khoj/interface/web/assets/icons/agents.svg +19 -0
- khoj/interface/web/assets/icons/automation.svg +43 -0
- khoj/interface/web/assets/icons/chat.svg +24 -0
- khoj/interface/web/assets/icons/github.svg +1 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways.svg +32 -0
- khoj/interface/web/assets/icons/khoj.svg +26 -0
- khoj/interface/web/assets/icons/logotype.svg +1 -0
- khoj/interface/web/assets/icons/search.svg +57 -0
- khoj/interface/web/assets/icons/sync.svg +4 -0
- khoj/interface/web/assets/khoj.css +237 -0
- khoj/interface/web/assets/utils.js +33 -0
- khoj/interface/web/base_config.html +445 -0
- khoj/interface/web/content_source_github_input.html +208 -0
- khoj/interface/web/login.html +310 -0
- khoj/interface/web/utils.html +48 -0
- khoj/main.py +249 -0
- khoj/manage.py +22 -0
- khoj/migrations/__init__.py +0 -0
- khoj/migrations/migrate_offline_chat_default_model.py +69 -0
- khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
- khoj/migrations/migrate_offline_chat_schema.py +83 -0
- khoj/migrations/migrate_offline_model.py +29 -0
- khoj/migrations/migrate_processor_config_openai.py +67 -0
- khoj/migrations/migrate_server_pg.py +132 -0
- khoj/migrations/migrate_version.py +17 -0
- khoj/processor/__init__.py +0 -0
- khoj/processor/content/__init__.py +0 -0
- khoj/processor/content/docx/__init__.py +0 -0
- khoj/processor/content/docx/docx_to_entries.py +111 -0
- khoj/processor/content/github/__init__.py +0 -0
- khoj/processor/content/github/github_to_entries.py +226 -0
- khoj/processor/content/images/__init__.py +0 -0
- khoj/processor/content/images/image_to_entries.py +117 -0
- khoj/processor/content/markdown/__init__.py +0 -0
- khoj/processor/content/markdown/markdown_to_entries.py +160 -0
- khoj/processor/content/notion/notion_to_entries.py +259 -0
- khoj/processor/content/org_mode/__init__.py +0 -0
- khoj/processor/content/org_mode/org_to_entries.py +226 -0
- khoj/processor/content/org_mode/orgnode.py +532 -0
- khoj/processor/content/pdf/__init__.py +0 -0
- khoj/processor/content/pdf/pdf_to_entries.py +119 -0
- khoj/processor/content/plaintext/__init__.py +0 -0
- khoj/processor/content/plaintext/plaintext_to_entries.py +117 -0
- khoj/processor/content/text_to_entries.py +296 -0
- khoj/processor/conversation/__init__.py +0 -0
- khoj/processor/conversation/anthropic/__init__.py +0 -0
- khoj/processor/conversation/anthropic/anthropic_chat.py +243 -0
- khoj/processor/conversation/anthropic/utils.py +217 -0
- khoj/processor/conversation/google/__init__.py +0 -0
- khoj/processor/conversation/google/gemini_chat.py +253 -0
- khoj/processor/conversation/google/utils.py +260 -0
- khoj/processor/conversation/offline/__init__.py +0 -0
- khoj/processor/conversation/offline/chat_model.py +308 -0
- khoj/processor/conversation/offline/utils.py +80 -0
- khoj/processor/conversation/offline/whisper.py +15 -0
- khoj/processor/conversation/openai/__init__.py +0 -0
- khoj/processor/conversation/openai/gpt.py +243 -0
- khoj/processor/conversation/openai/utils.py +232 -0
- khoj/processor/conversation/openai/whisper.py +13 -0
- khoj/processor/conversation/prompts.py +1188 -0
- khoj/processor/conversation/utils.py +867 -0
- khoj/processor/embeddings.py +122 -0
- khoj/processor/image/generate.py +215 -0
- khoj/processor/speech/__init__.py +0 -0
- khoj/processor/speech/text_to_speech.py +51 -0
- khoj/processor/tools/__init__.py +0 -0
- khoj/processor/tools/online_search.py +472 -0
- khoj/processor/tools/run_code.py +179 -0
- khoj/routers/__init__.py +0 -0
- khoj/routers/api.py +760 -0
- khoj/routers/api_agents.py +295 -0
- khoj/routers/api_chat.py +1273 -0
- khoj/routers/api_content.py +634 -0
- khoj/routers/api_model.py +123 -0
- khoj/routers/api_phone.py +86 -0
- khoj/routers/api_subscription.py +144 -0
- khoj/routers/auth.py +307 -0
- khoj/routers/email.py +135 -0
- khoj/routers/helpers.py +2333 -0
- khoj/routers/notion.py +85 -0
- khoj/routers/research.py +364 -0
- khoj/routers/storage.py +63 -0
- khoj/routers/twilio.py +36 -0
- khoj/routers/web_client.py +141 -0
- khoj/search_filter/__init__.py +0 -0
- khoj/search_filter/base_filter.py +15 -0
- khoj/search_filter/date_filter.py +215 -0
- khoj/search_filter/file_filter.py +32 -0
- khoj/search_filter/word_filter.py +29 -0
- khoj/search_type/__init__.py +0 -0
- khoj/search_type/text_search.py +255 -0
- khoj/utils/__init__.py +0 -0
- khoj/utils/cli.py +101 -0
- khoj/utils/config.py +81 -0
- khoj/utils/constants.py +51 -0
- khoj/utils/fs_syncer.py +252 -0
- khoj/utils/helpers.py +627 -0
- khoj/utils/initialization.py +301 -0
- khoj/utils/jsonl.py +43 -0
- khoj/utils/models.py +47 -0
- khoj/utils/rawconfig.py +208 -0
- khoj/utils/state.py +48 -0
- khoj/utils/yaml.py +47 -0
- khoj-1.33.3.dev32.dist-info/METADATA +190 -0
- khoj-1.33.3.dev32.dist-info/RECORD +393 -0
- khoj-1.33.3.dev32.dist-info/WHEEL +4 -0
- khoj-1.33.3.dev32.dist-info/entry_points.txt +2 -0
- khoj-1.33.3.dev32.dist-info/licenses/LICENSE +661 -0
khoj/utils/helpers.py
ADDED
@@ -0,0 +1,627 @@
|
|
1
|
+
from __future__ import annotations # to avoid quoting type hints
|
2
|
+
|
3
|
+
import copy
|
4
|
+
import datetime
|
5
|
+
import io
|
6
|
+
import ipaddress
|
7
|
+
import logging
|
8
|
+
import os
|
9
|
+
import platform
|
10
|
+
import random
|
11
|
+
import urllib.parse
|
12
|
+
import uuid
|
13
|
+
from collections import OrderedDict
|
14
|
+
from enum import Enum
|
15
|
+
from functools import lru_cache
|
16
|
+
from importlib import import_module
|
17
|
+
from importlib.metadata import version
|
18
|
+
from itertools import islice
|
19
|
+
from os import path
|
20
|
+
from pathlib import Path
|
21
|
+
from time import perf_counter
|
22
|
+
from typing import TYPE_CHECKING, Any, Optional, Union
|
23
|
+
from urllib.parse import urlparse
|
24
|
+
|
25
|
+
import openai
|
26
|
+
import psutil
|
27
|
+
import requests
|
28
|
+
import torch
|
29
|
+
from asgiref.sync import sync_to_async
|
30
|
+
from email_validator import EmailNotValidError, EmailUndeliverableError, validate_email
|
31
|
+
from magika import Magika
|
32
|
+
from PIL import Image
|
33
|
+
from pytz import country_names, country_timezones
|
34
|
+
|
35
|
+
from khoj.utils import constants
|
36
|
+
|
37
|
+
if TYPE_CHECKING:
|
38
|
+
from sentence_transformers import CrossEncoder, SentenceTransformer
|
39
|
+
|
40
|
+
from khoj.utils.models import BaseEncoder
|
41
|
+
from khoj.utils.rawconfig import AppConfig
|
42
|
+
|
43
|
+
|
44
|
+
# Initialize Magika for file type identification
|
45
|
+
magika = Magika()
|
46
|
+
|
47
|
+
|
48
|
+
class AsyncIteratorWrapper:
|
49
|
+
def __init__(self, obj):
|
50
|
+
self._it = iter(obj)
|
51
|
+
|
52
|
+
def __aiter__(self):
|
53
|
+
return self
|
54
|
+
|
55
|
+
async def __anext__(self):
|
56
|
+
try:
|
57
|
+
value = await self.next_async()
|
58
|
+
except StopAsyncIteration:
|
59
|
+
return
|
60
|
+
return value
|
61
|
+
|
62
|
+
@sync_to_async
|
63
|
+
def next_async(self):
|
64
|
+
try:
|
65
|
+
return next(self._it)
|
66
|
+
except StopIteration:
|
67
|
+
raise StopAsyncIteration
|
68
|
+
|
69
|
+
|
70
|
+
def is_none_or_empty(item):
|
71
|
+
return item == None or (hasattr(item, "__iter__") and len(item) == 0) or item == ""
|
72
|
+
|
73
|
+
|
74
|
+
def to_snake_case_from_dash(item: str):
|
75
|
+
return item.replace("_", "-")
|
76
|
+
|
77
|
+
|
78
|
+
def get_absolute_path(filepath: Union[str, Path]) -> str:
|
79
|
+
return str(Path(filepath).expanduser().absolute())
|
80
|
+
|
81
|
+
|
82
|
+
def resolve_absolute_path(filepath: Union[str, Optional[Path]], strict=False) -> Path:
|
83
|
+
return Path(filepath).expanduser().absolute().resolve(strict=strict)
|
84
|
+
|
85
|
+
|
86
|
+
def get_from_dict(dictionary, *args):
|
87
|
+
"""null-aware get from a nested dictionary
|
88
|
+
Returns: dictionary[args[0]][args[1]]... or None if any keys missing"""
|
89
|
+
current = dictionary
|
90
|
+
for arg in args:
|
91
|
+
if not hasattr(current, "__iter__") or not arg in current:
|
92
|
+
return None
|
93
|
+
current = current[arg]
|
94
|
+
return current
|
95
|
+
|
96
|
+
|
97
|
+
def merge_dicts(priority_dict: dict, default_dict: dict):
|
98
|
+
merged_dict = priority_dict.copy()
|
99
|
+
for key, _ in default_dict.items():
|
100
|
+
if key not in priority_dict:
|
101
|
+
merged_dict[key] = default_dict[key]
|
102
|
+
elif isinstance(priority_dict[key], dict) and isinstance(default_dict[key], dict):
|
103
|
+
merged_dict[key] = merge_dicts(priority_dict[key], default_dict[key])
|
104
|
+
return merged_dict
|
105
|
+
|
106
|
+
|
107
|
+
def fix_json_dict(json_dict: dict) -> dict:
|
108
|
+
for k, v in json_dict.items():
|
109
|
+
if v == "True" or v == "False":
|
110
|
+
json_dict[k] = v == "True"
|
111
|
+
if isinstance(v, dict):
|
112
|
+
json_dict[k] = fix_json_dict(v)
|
113
|
+
return json_dict
|
114
|
+
|
115
|
+
|
116
|
+
def get_file_type(file_type: str, file_content: bytes) -> tuple[str, str]:
|
117
|
+
"Get file type from file mime type"
|
118
|
+
|
119
|
+
# Extract encoding from file_type
|
120
|
+
encoding = file_type.split("=")[1].strip().lower() if ";" in file_type else None
|
121
|
+
file_type = file_type.split(";")[0].strip() if ";" in file_type else file_type
|
122
|
+
|
123
|
+
# Infer content type from reading file content
|
124
|
+
try:
|
125
|
+
content_group = magika.identify_bytes(file_content).output.group
|
126
|
+
except Exception:
|
127
|
+
# Fallback to using just file type if content type cannot be inferred
|
128
|
+
content_group = "unknown"
|
129
|
+
|
130
|
+
if file_type in ["text/markdown"]:
|
131
|
+
return "markdown", encoding
|
132
|
+
elif file_type in ["text/org"]:
|
133
|
+
return "org", encoding
|
134
|
+
elif file_type in ["application/pdf"]:
|
135
|
+
return "pdf", encoding
|
136
|
+
elif file_type in ["application/msword", "application/vnd.openxmlformats-officedocument.wordprocessingml.document"]:
|
137
|
+
return "docx", encoding
|
138
|
+
elif file_type in ["image/jpeg"]:
|
139
|
+
return "image", encoding
|
140
|
+
elif file_type in ["image/png"]:
|
141
|
+
return "image", encoding
|
142
|
+
elif file_type in ["image/webp"]:
|
143
|
+
return "image", encoding
|
144
|
+
elif content_group in ["code", "text"]:
|
145
|
+
return "plaintext", encoding
|
146
|
+
else:
|
147
|
+
return "other", encoding
|
148
|
+
|
149
|
+
|
150
|
+
def load_model(
|
151
|
+
model_name: str, model_type, model_dir=None, device: str = None
|
152
|
+
) -> Union[BaseEncoder, SentenceTransformer, CrossEncoder]:
|
153
|
+
"Load model from disk or huggingface"
|
154
|
+
# Construct model path
|
155
|
+
logger = logging.getLogger(__name__)
|
156
|
+
model_path = path.join(model_dir, model_name.replace("/", "_")) if model_dir is not None else None
|
157
|
+
|
158
|
+
# Load model from model_path if it exists there
|
159
|
+
model_type_class = get_class_by_name(model_type) if isinstance(model_type, str) else model_type
|
160
|
+
if model_path is not None and resolve_absolute_path(model_path).exists():
|
161
|
+
logger.debug(f"Loading {model_name} model from disk")
|
162
|
+
model = model_type_class(get_absolute_path(model_path), device=device)
|
163
|
+
# Else load the model from the model_name
|
164
|
+
else:
|
165
|
+
logger.info(f"🤖 Downloading {model_name} model from web")
|
166
|
+
model = model_type_class(model_name, device=device)
|
167
|
+
if model_path is not None:
|
168
|
+
logger.info(f"📩 Saved {model_name} model to disk")
|
169
|
+
model.save(model_path)
|
170
|
+
|
171
|
+
return model
|
172
|
+
|
173
|
+
|
174
|
+
def get_class_by_name(name: str) -> object:
|
175
|
+
"Returns the class object from name string"
|
176
|
+
module_name, class_name = name.rsplit(".", 1)
|
177
|
+
return getattr(import_module(module_name), class_name)
|
178
|
+
|
179
|
+
|
180
|
+
class timer:
|
181
|
+
"""Context manager to log time taken for a block of code to run"""
|
182
|
+
|
183
|
+
def __init__(self, message: str, logger: logging.Logger, device: torch.device = None, log_level=logging.DEBUG):
|
184
|
+
self.message = message
|
185
|
+
self.logger = logger.debug if log_level == logging.DEBUG else logger.info
|
186
|
+
self.device = device
|
187
|
+
|
188
|
+
def __enter__(self):
|
189
|
+
self.start = perf_counter()
|
190
|
+
return self
|
191
|
+
|
192
|
+
def __exit__(self, *_):
|
193
|
+
elapsed = perf_counter() - self.start
|
194
|
+
if self.device is None:
|
195
|
+
self.logger(f"{self.message}: {elapsed:.3f} seconds")
|
196
|
+
else:
|
197
|
+
self.logger(f"{self.message}: {elapsed:.3f} seconds on device: {self.device}")
|
198
|
+
|
199
|
+
|
200
|
+
class LRU(OrderedDict):
|
201
|
+
def __init__(self, *args, capacity=128, **kwargs):
|
202
|
+
self.capacity = capacity
|
203
|
+
super().__init__(*args, **kwargs)
|
204
|
+
|
205
|
+
def __getitem__(self, key):
|
206
|
+
value = super().__getitem__(key)
|
207
|
+
self.move_to_end(key)
|
208
|
+
return value
|
209
|
+
|
210
|
+
def __setitem__(self, key, value):
|
211
|
+
super().__setitem__(key, value)
|
212
|
+
if len(self) > self.capacity:
|
213
|
+
oldest = next(iter(self))
|
214
|
+
del self[oldest]
|
215
|
+
|
216
|
+
|
217
|
+
def get_server_id():
|
218
|
+
"""Get, Generate Persistent, Random ID per server install.
|
219
|
+
Helps count distinct khoj servers deployed.
|
220
|
+
Maintains anonymity by using non-PII random id."""
|
221
|
+
# Initialize server_id to None
|
222
|
+
server_id = None
|
223
|
+
# Expand path to the khoj env file. It contains persistent internal app data
|
224
|
+
app_env_filename = path.expanduser(constants.app_env_filepath)
|
225
|
+
|
226
|
+
# Check if the file exists
|
227
|
+
if path.exists(app_env_filename):
|
228
|
+
# Read the contents of the file
|
229
|
+
with open(app_env_filename, "r") as f:
|
230
|
+
contents = f.readlines()
|
231
|
+
|
232
|
+
# Extract the server_id from the contents
|
233
|
+
for line in contents:
|
234
|
+
key, value = line.strip().split("=")
|
235
|
+
if key.strip() == "server_id":
|
236
|
+
server_id = value.strip()
|
237
|
+
break
|
238
|
+
|
239
|
+
# If server_id is not found, generate and write to env file
|
240
|
+
if server_id is None:
|
241
|
+
# If server_id is not found, generate a new one
|
242
|
+
server_id = str(uuid.uuid4())
|
243
|
+
|
244
|
+
with open(app_env_filename, "a") as f:
|
245
|
+
f.write("server_id=" + server_id + "\n")
|
246
|
+
else:
|
247
|
+
# If server_id is not found, generate a new one
|
248
|
+
server_id = str(uuid.uuid4())
|
249
|
+
|
250
|
+
# Create khoj config directory if it doesn't exist
|
251
|
+
os.makedirs(path.dirname(app_env_filename), exist_ok=True)
|
252
|
+
|
253
|
+
# Write the server_id to the env file
|
254
|
+
with open(app_env_filename, "w") as f:
|
255
|
+
f.write("server_id=" + server_id + "\n")
|
256
|
+
|
257
|
+
return server_id
|
258
|
+
|
259
|
+
|
260
|
+
def telemetry_disabled(app_config: AppConfig, telemetry_disable_env) -> bool:
|
261
|
+
if telemetry_disable_env is True:
|
262
|
+
return True
|
263
|
+
return not app_config or not app_config.should_log_telemetry
|
264
|
+
|
265
|
+
|
266
|
+
def log_telemetry(
|
267
|
+
telemetry_type: str,
|
268
|
+
api: str = None,
|
269
|
+
client: Optional[str] = None,
|
270
|
+
app_config: Optional[AppConfig] = None,
|
271
|
+
disable_telemetry_env: bool = False,
|
272
|
+
properties: dict = None,
|
273
|
+
):
|
274
|
+
"""Log basic app usage telemetry like client, os, api called"""
|
275
|
+
# Do not log usage telemetry, if telemetry is disabled via app config
|
276
|
+
if telemetry_disabled(app_config, disable_telemetry_env):
|
277
|
+
return []
|
278
|
+
|
279
|
+
if properties.get("server_id") is None:
|
280
|
+
properties["server_id"] = get_server_id()
|
281
|
+
|
282
|
+
# Populate telemetry data to log
|
283
|
+
request_body = {
|
284
|
+
"telemetry_type": telemetry_type,
|
285
|
+
"server_version": version("khoj"),
|
286
|
+
"os": platform.system(),
|
287
|
+
"timestamp": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
288
|
+
}
|
289
|
+
request_body.update(properties or {})
|
290
|
+
if api:
|
291
|
+
# API endpoint on server called by client
|
292
|
+
request_body["api"] = api
|
293
|
+
if client:
|
294
|
+
# Client from which the API was called. E.g. Emacs, Obsidian
|
295
|
+
request_body["client"] = client
|
296
|
+
|
297
|
+
# Log telemetry data to telemetry endpoint
|
298
|
+
return request_body
|
299
|
+
|
300
|
+
|
301
|
+
def get_device_memory() -> int:
|
302
|
+
"""Get device memory in GB"""
|
303
|
+
device = get_device()
|
304
|
+
if device.type == "cuda":
|
305
|
+
return torch.cuda.get_device_properties(device).total_memory
|
306
|
+
elif device.type == "mps":
|
307
|
+
return torch.mps.driver_allocated_memory()
|
308
|
+
else:
|
309
|
+
return psutil.virtual_memory().total
|
310
|
+
|
311
|
+
|
312
|
+
def get_device() -> torch.device:
|
313
|
+
"""Get device to run model on"""
|
314
|
+
if torch.cuda.is_available():
|
315
|
+
# Use CUDA GPU
|
316
|
+
return torch.device("cuda:0")
|
317
|
+
elif torch.backends.mps.is_available():
|
318
|
+
# Use Apple M1 Metal Acceleration
|
319
|
+
return torch.device("mps")
|
320
|
+
else:
|
321
|
+
return torch.device("cpu")
|
322
|
+
|
323
|
+
|
324
|
+
class ConversationCommand(str, Enum):
|
325
|
+
Default = "default"
|
326
|
+
General = "general"
|
327
|
+
Notes = "notes"
|
328
|
+
Help = "help"
|
329
|
+
Online = "online"
|
330
|
+
Webpage = "webpage"
|
331
|
+
Code = "code"
|
332
|
+
Image = "image"
|
333
|
+
Text = "text"
|
334
|
+
Automation = "automation"
|
335
|
+
AutomatedTask = "automated_task"
|
336
|
+
Summarize = "summarize"
|
337
|
+
Diagram = "diagram"
|
338
|
+
Research = "research"
|
339
|
+
|
340
|
+
|
341
|
+
command_descriptions = {
|
342
|
+
ConversationCommand.General: "Only talk about information that relies on Khoj's general knowledge, not your personal knowledge base.",
|
343
|
+
ConversationCommand.Notes: "Only talk about information that is available in your knowledge base.",
|
344
|
+
ConversationCommand.Default: "The default command when no command specified. It intelligently auto-switches between general and notes mode.",
|
345
|
+
ConversationCommand.Online: "Search for information on the internet.",
|
346
|
+
ConversationCommand.Webpage: "Get information from webpage suggested by you.",
|
347
|
+
ConversationCommand.Code: "Run Python code to parse information, run complex calculations, create documents and charts.",
|
348
|
+
ConversationCommand.Image: "Generate illustrative, creative images by describing your imagination in words.",
|
349
|
+
ConversationCommand.Automation: "Automatically run your query at a specified time or interval.",
|
350
|
+
ConversationCommand.Help: "Get help with how to use or setup Khoj from the documentation",
|
351
|
+
ConversationCommand.Summarize: "Get help with a question pertaining to an entire document.",
|
352
|
+
ConversationCommand.Diagram: "Draw a flowchart, diagram, or any other visual representation best expressed with primitives like lines, rectangles, and text.",
|
353
|
+
ConversationCommand.Research: "Do deep research on a topic. This will take longer than usual, but give a more detailed, comprehensive answer.",
|
354
|
+
}
|
355
|
+
|
356
|
+
command_descriptions_for_agent = {
|
357
|
+
ConversationCommand.General: "Agent can use the agents knowledge base and general knowledge.",
|
358
|
+
ConversationCommand.Notes: "Agent can search the users knowledge base for information.",
|
359
|
+
ConversationCommand.Online: "Agent can search the internet for information.",
|
360
|
+
ConversationCommand.Webpage: "Agent can read suggested web pages for information.",
|
361
|
+
ConversationCommand.Summarize: "Agent can read an entire document. Agents knowledge base must be a single document.",
|
362
|
+
ConversationCommand.Research: "Agent can do deep research on a topic.",
|
363
|
+
}
|
364
|
+
|
365
|
+
tool_descriptions_for_llm = {
|
366
|
+
ConversationCommand.Default: "To use a mix of your internal knowledge and the user's personal knowledge, or if you don't entirely understand the query.",
|
367
|
+
ConversationCommand.General: "To use when you can answer the question without any outside information or personal knowledge",
|
368
|
+
ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.",
|
369
|
+
ConversationCommand.Online: "To search for the latest, up-to-date information from the internet. Note: **Questions about Khoj should always use this data source**",
|
370
|
+
ConversationCommand.Webpage: "To use if the user has directly provided the webpage urls or you are certain of the webpage urls to read.",
|
371
|
+
ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse complex information, run complex calculations, create plaintext documents, and create charts with quantitative data. Only matplotlib, panda, numpy, scipy, bs4 and sympy external packages are available.",
|
372
|
+
ConversationCommand.Summarize: "To retrieve an answer that depends on the entire document or a large text.",
|
373
|
+
}
|
374
|
+
|
375
|
+
function_calling_description_for_llm = {
|
376
|
+
ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.",
|
377
|
+
ConversationCommand.Online: "To search the internet for information. Useful to get a quick, broad overview from the internet. Provide all relevant context to ensure new searches, not in previous iterations, are performed.",
|
378
|
+
ConversationCommand.Webpage: "To extract information from webpages. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share the webpage links and information to extract in your query.",
|
379
|
+
ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse complex information, run complex calculations, create plaintext documents, and create charts with quantitative data. Only matplotlib, panda, numpy, scipy, bs4 and sympy external packages are available.",
|
380
|
+
}
|
381
|
+
|
382
|
+
mode_descriptions_for_llm = {
|
383
|
+
ConversationCommand.Image: "Use this if you are confident the user is requesting you to create a new picture based on their description. This DOES NOT support generating charts or graphs. It is for creative images.",
|
384
|
+
ConversationCommand.Text: "Use this if a normal text response would be sufficient for accurately responding to the query or you don't feel strongly about the other modes.",
|
385
|
+
ConversationCommand.Diagram: "Use this if the user is requesting a diagram or visual representation that requires primitives like lines, rectangles, and text. This does not work for charts, graphs, or quantitative data. It is for mind mapping, flowcharts, etc.",
|
386
|
+
}
|
387
|
+
|
388
|
+
mode_descriptions_for_agent = {
|
389
|
+
ConversationCommand.Image: "Agent can generate images in response. It cannot not use this to generate charts and graphs.",
|
390
|
+
ConversationCommand.Automation: "Agent can schedule a task to run at a scheduled date, time and frequency in response.",
|
391
|
+
ConversationCommand.Text: "Agent can generate text in response.",
|
392
|
+
ConversationCommand.Diagram: "Agent can generate a visual representation that requires primitives like lines, rectangles, and text.",
|
393
|
+
}
|
394
|
+
|
395
|
+
|
396
|
+
class ImageIntentType(Enum):
|
397
|
+
"""
|
398
|
+
Chat message intent by Khoj for image responses.
|
399
|
+
Marks the schema used to reference image in chat messages
|
400
|
+
"""
|
401
|
+
|
402
|
+
# Images as Inline PNG
|
403
|
+
TEXT_TO_IMAGE = "text-to-image"
|
404
|
+
# Images as URLs
|
405
|
+
TEXT_TO_IMAGE2 = "text-to-image2"
|
406
|
+
# Images as Inline WebP
|
407
|
+
TEXT_TO_IMAGE_V3 = "text-to-image-v3"
|
408
|
+
|
409
|
+
|
410
|
+
def generate_random_name():
|
411
|
+
# List of adjectives and nouns to choose from
|
412
|
+
adjectives = [
|
413
|
+
"happy",
|
414
|
+
"serendipitous",
|
415
|
+
"exuberant",
|
416
|
+
"calm",
|
417
|
+
"brave",
|
418
|
+
"scared",
|
419
|
+
"energetic",
|
420
|
+
"chivalrous",
|
421
|
+
"kind",
|
422
|
+
"suave",
|
423
|
+
]
|
424
|
+
nouns = ["dog", "cat", "falcon", "whale", "turtle", "rabbit", "hamster", "snake", "spider", "elephant"]
|
425
|
+
|
426
|
+
# Select two random words from the lists
|
427
|
+
adjective = random.choice(adjectives)
|
428
|
+
noun = random.choice(nouns)
|
429
|
+
|
430
|
+
# Combine the words to form a name
|
431
|
+
name = f"{adjective} {noun}"
|
432
|
+
|
433
|
+
return name
|
434
|
+
|
435
|
+
|
436
|
+
def batcher(iterable, max_n):
|
437
|
+
"Split an iterable into chunks of size max_n"
|
438
|
+
it = iter(iterable)
|
439
|
+
while True:
|
440
|
+
chunk = list(islice(it, max_n))
|
441
|
+
if not chunk:
|
442
|
+
return
|
443
|
+
yield (x for x in chunk if x is not None)
|
444
|
+
|
445
|
+
|
446
|
+
def is_env_var_true(env_var: str, default: str = "false") -> bool:
|
447
|
+
"""Get state of boolean environment variable"""
|
448
|
+
return os.getenv(env_var, default).lower() == "true"
|
449
|
+
|
450
|
+
|
451
|
+
def in_debug_mode():
|
452
|
+
"""Check if Khoj is running in debug mode.
|
453
|
+
Set KHOJ_DEBUG environment variable to true to enable debug mode."""
|
454
|
+
return is_env_var_true("KHOJ_DEBUG")
|
455
|
+
|
456
|
+
|
457
|
+
def is_promptrace_enabled():
|
458
|
+
"""Check if Khoj is running with prompt tracing enabled.
|
459
|
+
Set PROMPTRACE_DIR environment variable to prompt tracing path to enable it."""
|
460
|
+
return not is_none_or_empty(os.getenv("PROMPTRACE_DIR"))
|
461
|
+
|
462
|
+
|
463
|
+
def is_valid_url(url: str) -> bool:
|
464
|
+
"""Check if a string is a valid URL"""
|
465
|
+
try:
|
466
|
+
result = urlparse(url.strip())
|
467
|
+
return all([result.scheme, result.netloc])
|
468
|
+
except:
|
469
|
+
return False
|
470
|
+
|
471
|
+
|
472
|
+
def is_internet_connected():
|
473
|
+
try:
|
474
|
+
response = requests.head("https://www.google.com")
|
475
|
+
return response.status_code == 200
|
476
|
+
except:
|
477
|
+
return False
|
478
|
+
|
479
|
+
|
480
|
+
def is_internal_url(url: str) -> bool:
|
481
|
+
"""
|
482
|
+
Check if a URL is likely to be internal/non-public.
|
483
|
+
|
484
|
+
Args:
|
485
|
+
url (str): The URL to check.
|
486
|
+
|
487
|
+
Returns:
|
488
|
+
bool: True if the URL is likely internal, False otherwise.
|
489
|
+
"""
|
490
|
+
try:
|
491
|
+
parsed_url = urllib.parse.urlparse(url)
|
492
|
+
hostname = parsed_url.hostname
|
493
|
+
|
494
|
+
# Check for localhost
|
495
|
+
if hostname in ["localhost", "127.0.0.1", "::1"]:
|
496
|
+
return True
|
497
|
+
|
498
|
+
# Check for IP addresses in private ranges
|
499
|
+
try:
|
500
|
+
ip = ipaddress.ip_address(hostname)
|
501
|
+
return ip.is_private
|
502
|
+
except ValueError:
|
503
|
+
pass # Not an IP address, continue with other checks
|
504
|
+
|
505
|
+
# Check for common internal TLDs
|
506
|
+
internal_tlds = [".local", ".internal", ".private", ".corp", ".home", ".lan"]
|
507
|
+
if any(hostname.endswith(tld) for tld in internal_tlds):
|
508
|
+
return True
|
509
|
+
|
510
|
+
# Check for URLs without a TLD
|
511
|
+
if "." not in hostname:
|
512
|
+
return True
|
513
|
+
|
514
|
+
return False
|
515
|
+
except Exception:
|
516
|
+
# If we can't parse the URL or something else goes wrong, assume it's not internal
|
517
|
+
return False
|
518
|
+
|
519
|
+
|
520
|
+
def convert_image_to_webp(image_bytes):
|
521
|
+
"""Convert image bytes to webp format for faster loading"""
|
522
|
+
image_io = io.BytesIO(image_bytes)
|
523
|
+
with Image.open(image_io) as original_image:
|
524
|
+
webp_image_io = io.BytesIO()
|
525
|
+
original_image.save(webp_image_io, "WEBP")
|
526
|
+
|
527
|
+
# Encode the WebP image back to base64
|
528
|
+
webp_image_bytes = webp_image_io.getvalue()
|
529
|
+
webp_image_io.close()
|
530
|
+
return webp_image_bytes
|
531
|
+
|
532
|
+
|
533
|
+
def truncate_code_context(original_code_results: dict[str, Any], max_chars=10000) -> dict[str, Any]:
|
534
|
+
"""
|
535
|
+
Truncate large output files and drop image file data from code results.
|
536
|
+
"""
|
537
|
+
# Create a deep copy of the code results to avoid modifying the original data
|
538
|
+
code_results = copy.deepcopy(original_code_results)
|
539
|
+
for code_result in code_results.values():
|
540
|
+
for idx, output_file in enumerate(code_result["results"]["output_files"]):
|
541
|
+
# Drop image files from code results
|
542
|
+
if Path(output_file["filename"]).suffix in {".png", ".jpg", ".jpeg", ".webp"}:
|
543
|
+
code_result["results"]["output_files"][idx] = {
|
544
|
+
"filename": output_file["filename"],
|
545
|
+
"b64_data": "[placeholder for generated image data for brevity]",
|
546
|
+
}
|
547
|
+
# Truncate large output files
|
548
|
+
elif len(output_file["b64_data"]) > max_chars:
|
549
|
+
code_result["results"]["output_files"][idx] = {
|
550
|
+
"filename": output_file["filename"],
|
551
|
+
"b64_data": output_file["b64_data"][:max_chars] + "...",
|
552
|
+
}
|
553
|
+
return code_results
|
554
|
+
|
555
|
+
|
556
|
+
@lru_cache
|
557
|
+
def tz_to_cc_map() -> dict[str, str]:
|
558
|
+
"""Create a mapping of timezone to country code"""
|
559
|
+
timezone_country = {}
|
560
|
+
for countrycode in country_timezones:
|
561
|
+
timezones = country_timezones[countrycode]
|
562
|
+
for timezone in timezones:
|
563
|
+
timezone_country[timezone] = countrycode
|
564
|
+
return timezone_country
|
565
|
+
|
566
|
+
|
567
|
+
def get_country_code_from_timezone(tz: str) -> str:
|
568
|
+
"""Get country code from timezone"""
|
569
|
+
return tz_to_cc_map().get(tz, "US")
|
570
|
+
|
571
|
+
|
572
|
+
def get_country_name_from_timezone(tz: str) -> str:
|
573
|
+
"""Get country name from timezone"""
|
574
|
+
return country_names.get(get_country_code_from_timezone(tz), "United States")
|
575
|
+
|
576
|
+
|
577
|
+
def get_cost_of_chat_message(model_name: str, input_tokens: int = 0, output_tokens: int = 0, prev_cost: float = 0.0):
|
578
|
+
"""
|
579
|
+
Calculate cost of chat message based on input and output tokens
|
580
|
+
"""
|
581
|
+
|
582
|
+
# Calculate cost of input and output tokens. Costs are per million tokens
|
583
|
+
input_cost = constants.model_to_cost.get(model_name, {}).get("input", 0) * (input_tokens / 1e6)
|
584
|
+
output_cost = constants.model_to_cost.get(model_name, {}).get("output", 0) * (output_tokens / 1e6)
|
585
|
+
|
586
|
+
return input_cost + output_cost + prev_cost
|
587
|
+
|
588
|
+
|
589
|
+
def get_chat_usage_metrics(
|
590
|
+
model_name: str, input_tokens: int = 0, output_tokens: int = 0, usage: dict = {}, cost: float = None
|
591
|
+
):
|
592
|
+
"""
|
593
|
+
Get usage metrics for chat message based on input and output tokens and cost
|
594
|
+
"""
|
595
|
+
prev_usage = usage or {"input_tokens": 0, "output_tokens": 0, "cost": 0.0}
|
596
|
+
return {
|
597
|
+
"input_tokens": prev_usage["input_tokens"] + input_tokens,
|
598
|
+
"output_tokens": prev_usage["output_tokens"] + output_tokens,
|
599
|
+
"cost": cost or get_cost_of_chat_message(model_name, input_tokens, output_tokens, prev_cost=prev_usage["cost"]),
|
600
|
+
}
|
601
|
+
|
602
|
+
|
603
|
+
def get_openai_client(api_key: str, api_base_url: str) -> Union[openai.OpenAI, openai.AzureOpenAI]:
|
604
|
+
"""Get OpenAI or AzureOpenAI client based on the API Base URL"""
|
605
|
+
parsed_url = urlparse(api_base_url)
|
606
|
+
if parsed_url.hostname and parsed_url.hostname.endswith(".openai.azure.com"):
|
607
|
+
client = openai.AzureOpenAI(
|
608
|
+
api_key=api_key,
|
609
|
+
azure_endpoint=api_base_url,
|
610
|
+
api_version="2024-10-21",
|
611
|
+
)
|
612
|
+
else:
|
613
|
+
client = openai.OpenAI(
|
614
|
+
api_key=api_key,
|
615
|
+
base_url=api_base_url,
|
616
|
+
)
|
617
|
+
return client
|
618
|
+
|
619
|
+
|
620
|
+
def normalize_email(email: str, check_deliverability=False) -> tuple[str, bool]:
|
621
|
+
"""Normalize, validate and check deliverability of email address"""
|
622
|
+
lower_email = email.lower()
|
623
|
+
try:
|
624
|
+
valid_email = validate_email(lower_email, check_deliverability=check_deliverability)
|
625
|
+
return valid_email.normalized, True
|
626
|
+
except (EmailNotValidError, EmailUndeliverableError):
|
627
|
+
return lower_email, False
|