khoj 1.33.3.dev32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/__init__.py +0 -0
- khoj/app/README.md +94 -0
- khoj/app/__init__.py +0 -0
- khoj/app/asgi.py +16 -0
- khoj/app/settings.py +218 -0
- khoj/app/urls.py +25 -0
- khoj/configure.py +452 -0
- khoj/database/__init__.py +0 -0
- khoj/database/adapters/__init__.py +1821 -0
- khoj/database/admin.py +417 -0
- khoj/database/apps.py +6 -0
- khoj/database/management/__init__.py +0 -0
- khoj/database/management/commands/__init__.py +0 -0
- khoj/database/management/commands/change_default_model.py +116 -0
- khoj/database/management/commands/change_generated_images_url.py +61 -0
- khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
- khoj/database/migrations/0001_khojuser.py +98 -0
- khoj/database/migrations/0002_googleuser.py +32 -0
- khoj/database/migrations/0003_vector_extension.py +10 -0
- khoj/database/migrations/0004_content_types_and_more.py +181 -0
- khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
- khoj/database/migrations/0006_embeddingsdates.py +33 -0
- khoj/database/migrations/0007_add_conversation.py +27 -0
- khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
- khoj/database/migrations/0009_khojapiuser.py +24 -0
- khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
- khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
- khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
- khoj/database/migrations/0012_entry_file_source.py +21 -0
- khoj/database/migrations/0013_subscription.py +37 -0
- khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
- khoj/database/migrations/0015_alter_subscription_user.py +21 -0
- khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
- khoj/database/migrations/0017_searchmodel.py +32 -0
- khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
- khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
- khoj/database/migrations/0020_reflectivequestion.py +36 -0
- khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
- khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
- khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
- khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
- khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
- khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
- khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
- khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
- khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
- khoj/database/migrations/0029_userrequests.py +27 -0
- khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
- khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
- khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
- khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
- khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
- khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
- khoj/database/migrations/0035_processlock.py +26 -0
- khoj/database/migrations/0036_alter_processlock_name.py +19 -0
- khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
- khoj/database/migrations/0036_publicconversation.py +42 -0
- khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
- khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
- khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
- khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
- khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
- khoj/database/migrations/0040_alter_processlock_name.py +26 -0
- khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
- khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
- khoj/database/migrations/0042_serverchatsettings.py +46 -0
- khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
- khoj/database/migrations/0044_conversation_file_filters.py +17 -0
- khoj/database/migrations/0045_fileobject.py +37 -0
- khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
- khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
- khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
- khoj/database/migrations/0049_datastore.py +38 -0
- khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
- khoj/database/migrations/0050_alter_processlock_name.py +25 -0
- khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
- khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
- khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
- khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
- khoj/database/migrations/0055_alter_agent_style_icon.py +37 -0
- khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
- khoj/database/migrations/0056_searchmodelconfig_cross_encoder_model_config.py +17 -0
- khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
- khoj/database/migrations/0057_remove_serverchatsettings_default_model_and_more.py +51 -0
- khoj/database/migrations/0058_alter_chatmodeloptions_chat_model.py +17 -0
- khoj/database/migrations/0059_searchmodelconfig_bi_encoder_confidence_threshold.py +17 -0
- khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
- khoj/database/migrations/0061_alter_chatmodeloptions_model_type.py +26 -0
- khoj/database/migrations/0061_alter_texttoimagemodelconfig_model_type.py +21 -0
- khoj/database/migrations/0062_merge_20240913_0222.py +14 -0
- khoj/database/migrations/0063_conversation_temp_id.py +36 -0
- khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +86 -0
- khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
- khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
- khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
- khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
- khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
- khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
- khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
- khoj/database/migrations/0072_entry_search_model.py +24 -0
- khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
- khoj/database/migrations/0074_alter_conversation_title.py +17 -0
- khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +85 -0
- khoj/database/migrations/0076_rename_openaiprocessorconversationconfig_aimodelapi_and_more.py +26 -0
- khoj/database/migrations/0077_chatmodel_alter_agent_chat_model_and_more.py +62 -0
- khoj/database/migrations/0078_khojuser_email_verification_code_expiry.py +17 -0
- khoj/database/migrations/__init__.py +0 -0
- khoj/database/models/__init__.py +725 -0
- khoj/database/tests.py +3 -0
- khoj/interface/compiled/404/index.html +1 -0
- khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_buildManifest.js +1 -0
- khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_ssgManifest.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1010-8f39bb4648b5ba10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/182-f1c48a203dc91e0e.js +20 -0
- khoj/interface/compiled/_next/static/chunks/1915-d3c36ad6ce697ce7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/2117-165ef4747a5b836b.js +2 -0
- khoj/interface/compiled/_next/static/chunks/2581-455000f8aeb08fc3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3727.dcea8f2193111552.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3789-a09e37a819171a9d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/4124-6c28322ce218d2d5.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5427-b52d95253e692bfa.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5473-b1cf56dedac6577a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5477-0bbddb79c25a54a7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6065-64db9ad305ba0bcd.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6293-469dd16402ea8a6f.js +3 -0
- khoj/interface/compiled/_next/static/chunks/688-b5b4391bbc0376f1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8667-b6bf63c72b2d76eb.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9259-1172dbaca0515237.js +1 -0
- khoj/interface/compiled/_next/static/chunks/94ca1967.1d9b42d929a1ee8c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9597.83583248dfbf6e73.js +1 -0
- khoj/interface/compiled/_next/static/chunks/964ecbae.51d6faf8801d15e6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9665-391df1e5c51c960a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/_not-found/page-a834eddae3e235df.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/page-28ce086a1129bca2.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/layout-1fe1537449f43496.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/page-bf365a60829d347f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-0e476e57eb2015e3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/layout-30e7fda7262713ce.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/page-a5515ea71aec5ef0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/page-9140541e67ea307d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/settings/layout-d09d6510a45cd4bd.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/settings/page-951ba40b5b94b23a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1beb80d8d741c932.js +1 -0
- khoj/interface/compiled/_next/static/chunks/d3ac728e-44ebd2a0c99b12a0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/fd9d1056-4482b99a36fd1673.js +1 -0
- khoj/interface/compiled/_next/static/chunks/framework-8e0e0f4a6b83a956.js +1 -0
- khoj/interface/compiled/_next/static/chunks/main-app-de1f09df97a3cfc7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/main-db4bfac6b0a8d00b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/pages/_app-3c9ca398d360b709.js +1 -0
- khoj/interface/compiled/_next/static/chunks/pages/_error-cf5ca766ac8f493f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/webpack-a03962458328b163.js +1 -0
- khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +1 -0
- khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
- khoj/interface/compiled/_next/static/css/4e4e6a4a1c920d06.css +1 -0
- khoj/interface/compiled/_next/static/css/8d02837c730f8d13.css +25 -0
- khoj/interface/compiled/_next/static/css/8e6a3ca11a60b189.css +1 -0
- khoj/interface/compiled/_next/static/css/9c164d9727dd8092.css +1 -0
- khoj/interface/compiled/_next/static/css/dac88c17aaee5fcf.css +1 -0
- khoj/interface/compiled/_next/static/css/df4b47a2d0d85eae.css +1 -0
- khoj/interface/compiled/_next/static/css/e4eb883b5265d372.css +1 -0
- khoj/interface/compiled/_next/static/media/1d8a05b60287ae6c-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/6f22fce21a7c433c-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/77c207b095007c34-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/82ef96de0e8f4d8c-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.1608a09b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.4aafdb68.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.a79f1c31.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.b6770918.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.cce5b8ec.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.ec17d132.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.07ef19e7.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.55fac258.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.dad44a7f.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.9f256b85.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.b18f59e1.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.d42a5579.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.7c187121.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.d3c882a6.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.ed38e79f.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.b74a1a8b.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.c3fb5ac2.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.d181c465.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.6f2bb1df.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.70d8b0a5.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.e3f82f9d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.47373d1e.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.8916142b.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.9024d815.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.0462f03b.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.7f51fe03.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.b7f8fe9b.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.572d331f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.a879cf83.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.f1035d8d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.5295ba48.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.939bc644.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.f28c23ac.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.8c5b5494.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.94e1e8dc.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.bf59d231.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.3b1e59b3.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.7c9bc82b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.b4c20c84.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.74048478.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.ba21ed5f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.d4d7ba48.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.03e9641d.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.07505710.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.fe9cbbe1.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.e1e279cb.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.eae34984.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.fabc004a.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.57727022.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.5916a24f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.d6b476ec.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.9acaf01c.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.a144ef58.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.b4230e7e.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.10d95fd3.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.7a996c9d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.fbccdabe.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.6258592b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.a8709e36.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.d97aaf4a.ttf +0 -0
- khoj/interface/compiled/_next/static/media/a6ecd16fa044d500-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/bd82c78e5b7b3fe9-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/c32c8052c071fc42-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/c4250770ab8708b6-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/flags.3afdda2f.webp +0 -0
- khoj/interface/compiled/_next/static/media/flags@2x.5fbe9fc1.webp +0 -0
- khoj/interface/compiled/_next/static/media/globe.98e105ca.webp +0 -0
- khoj/interface/compiled/_next/static/media/globe@2x.974df6f8.webp +0 -0
- khoj/interface/compiled/agents/index.html +1 -0
- khoj/interface/compiled/agents/index.txt +7 -0
- khoj/interface/compiled/agents.svg +6 -0
- khoj/interface/compiled/assets/icons/khoj_lantern.ico +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern.svg +100 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_1200x1200.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_128x128.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_128x128_dark.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_256x256.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_512x512.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_logomarktype_1200x630.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-browse-draw-sample.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-plain-chat-sample.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-remember-plan-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-browse-draw-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-plain-chat-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-remember-plan-sample.png +0 -0
- khoj/interface/compiled/automation.svg +37 -0
- khoj/interface/compiled/automations/index.html +1 -0
- khoj/interface/compiled/automations/index.txt +8 -0
- khoj/interface/compiled/chat/index.html +1 -0
- khoj/interface/compiled/chat/index.txt +7 -0
- khoj/interface/compiled/chat.svg +24 -0
- khoj/interface/compiled/close.svg +5 -0
- khoj/interface/compiled/copy-button-success.svg +6 -0
- khoj/interface/compiled/copy-button.svg +5 -0
- khoj/interface/compiled/index.html +1 -0
- khoj/interface/compiled/index.txt +7 -0
- khoj/interface/compiled/khoj.webmanifest +76 -0
- khoj/interface/compiled/logo.svg +24 -0
- khoj/interface/compiled/search/index.html +1 -0
- khoj/interface/compiled/search/index.txt +7 -0
- khoj/interface/compiled/send.svg +1 -0
- khoj/interface/compiled/settings/index.html +1 -0
- khoj/interface/compiled/settings/index.txt +9 -0
- khoj/interface/compiled/share/chat/index.html +1 -0
- khoj/interface/compiled/share/chat/index.txt +7 -0
- khoj/interface/compiled/share.svg +8 -0
- khoj/interface/compiled/thumbs-down.svg +6 -0
- khoj/interface/compiled/thumbs-up.svg +6 -0
- khoj/interface/email/feedback.html +34 -0
- khoj/interface/email/magic_link.html +40 -0
- khoj/interface/email/task.html +37 -0
- khoj/interface/email/welcome.html +90 -0
- khoj/interface/web/.well-known/assetlinks.json +11 -0
- khoj/interface/web/assets/icons/agents.svg +19 -0
- khoj/interface/web/assets/icons/automation.svg +43 -0
- khoj/interface/web/assets/icons/chat.svg +24 -0
- khoj/interface/web/assets/icons/github.svg +1 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways.svg +32 -0
- khoj/interface/web/assets/icons/khoj.svg +26 -0
- khoj/interface/web/assets/icons/logotype.svg +1 -0
- khoj/interface/web/assets/icons/search.svg +57 -0
- khoj/interface/web/assets/icons/sync.svg +4 -0
- khoj/interface/web/assets/khoj.css +237 -0
- khoj/interface/web/assets/utils.js +33 -0
- khoj/interface/web/base_config.html +445 -0
- khoj/interface/web/content_source_github_input.html +208 -0
- khoj/interface/web/login.html +310 -0
- khoj/interface/web/utils.html +48 -0
- khoj/main.py +249 -0
- khoj/manage.py +22 -0
- khoj/migrations/__init__.py +0 -0
- khoj/migrations/migrate_offline_chat_default_model.py +69 -0
- khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
- khoj/migrations/migrate_offline_chat_schema.py +83 -0
- khoj/migrations/migrate_offline_model.py +29 -0
- khoj/migrations/migrate_processor_config_openai.py +67 -0
- khoj/migrations/migrate_server_pg.py +132 -0
- khoj/migrations/migrate_version.py +17 -0
- khoj/processor/__init__.py +0 -0
- khoj/processor/content/__init__.py +0 -0
- khoj/processor/content/docx/__init__.py +0 -0
- khoj/processor/content/docx/docx_to_entries.py +111 -0
- khoj/processor/content/github/__init__.py +0 -0
- khoj/processor/content/github/github_to_entries.py +226 -0
- khoj/processor/content/images/__init__.py +0 -0
- khoj/processor/content/images/image_to_entries.py +117 -0
- khoj/processor/content/markdown/__init__.py +0 -0
- khoj/processor/content/markdown/markdown_to_entries.py +160 -0
- khoj/processor/content/notion/notion_to_entries.py +259 -0
- khoj/processor/content/org_mode/__init__.py +0 -0
- khoj/processor/content/org_mode/org_to_entries.py +226 -0
- khoj/processor/content/org_mode/orgnode.py +532 -0
- khoj/processor/content/pdf/__init__.py +0 -0
- khoj/processor/content/pdf/pdf_to_entries.py +119 -0
- khoj/processor/content/plaintext/__init__.py +0 -0
- khoj/processor/content/plaintext/plaintext_to_entries.py +117 -0
- khoj/processor/content/text_to_entries.py +296 -0
- khoj/processor/conversation/__init__.py +0 -0
- khoj/processor/conversation/anthropic/__init__.py +0 -0
- khoj/processor/conversation/anthropic/anthropic_chat.py +243 -0
- khoj/processor/conversation/anthropic/utils.py +217 -0
- khoj/processor/conversation/google/__init__.py +0 -0
- khoj/processor/conversation/google/gemini_chat.py +253 -0
- khoj/processor/conversation/google/utils.py +260 -0
- khoj/processor/conversation/offline/__init__.py +0 -0
- khoj/processor/conversation/offline/chat_model.py +308 -0
- khoj/processor/conversation/offline/utils.py +80 -0
- khoj/processor/conversation/offline/whisper.py +15 -0
- khoj/processor/conversation/openai/__init__.py +0 -0
- khoj/processor/conversation/openai/gpt.py +243 -0
- khoj/processor/conversation/openai/utils.py +232 -0
- khoj/processor/conversation/openai/whisper.py +13 -0
- khoj/processor/conversation/prompts.py +1188 -0
- khoj/processor/conversation/utils.py +867 -0
- khoj/processor/embeddings.py +122 -0
- khoj/processor/image/generate.py +215 -0
- khoj/processor/speech/__init__.py +0 -0
- khoj/processor/speech/text_to_speech.py +51 -0
- khoj/processor/tools/__init__.py +0 -0
- khoj/processor/tools/online_search.py +472 -0
- khoj/processor/tools/run_code.py +179 -0
- khoj/routers/__init__.py +0 -0
- khoj/routers/api.py +760 -0
- khoj/routers/api_agents.py +295 -0
- khoj/routers/api_chat.py +1273 -0
- khoj/routers/api_content.py +634 -0
- khoj/routers/api_model.py +123 -0
- khoj/routers/api_phone.py +86 -0
- khoj/routers/api_subscription.py +144 -0
- khoj/routers/auth.py +307 -0
- khoj/routers/email.py +135 -0
- khoj/routers/helpers.py +2333 -0
- khoj/routers/notion.py +85 -0
- khoj/routers/research.py +364 -0
- khoj/routers/storage.py +63 -0
- khoj/routers/twilio.py +36 -0
- khoj/routers/web_client.py +141 -0
- khoj/search_filter/__init__.py +0 -0
- khoj/search_filter/base_filter.py +15 -0
- khoj/search_filter/date_filter.py +215 -0
- khoj/search_filter/file_filter.py +32 -0
- khoj/search_filter/word_filter.py +29 -0
- khoj/search_type/__init__.py +0 -0
- khoj/search_type/text_search.py +255 -0
- khoj/utils/__init__.py +0 -0
- khoj/utils/cli.py +101 -0
- khoj/utils/config.py +81 -0
- khoj/utils/constants.py +51 -0
- khoj/utils/fs_syncer.py +252 -0
- khoj/utils/helpers.py +627 -0
- khoj/utils/initialization.py +301 -0
- khoj/utils/jsonl.py +43 -0
- khoj/utils/models.py +47 -0
- khoj/utils/rawconfig.py +208 -0
- khoj/utils/state.py +48 -0
- khoj/utils/yaml.py +47 -0
- khoj-1.33.3.dev32.dist-info/METADATA +190 -0
- khoj-1.33.3.dev32.dist-info/RECORD +393 -0
- khoj-1.33.3.dev32.dist-info/WHEEL +4 -0
- khoj-1.33.3.dev32.dist-info/entry_points.txt +2 -0
- khoj-1.33.3.dev32.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,301 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
from typing import Tuple
|
4
|
+
|
5
|
+
import openai
|
6
|
+
|
7
|
+
from khoj.database.adapters import ConversationAdapters
|
8
|
+
from khoj.database.models import (
|
9
|
+
AiModelApi,
|
10
|
+
ChatModel,
|
11
|
+
KhojUser,
|
12
|
+
SpeechToTextModelOptions,
|
13
|
+
TextToImageModelConfig,
|
14
|
+
)
|
15
|
+
from khoj.processor.conversation.utils import model_to_prompt_size, model_to_tokenizer
|
16
|
+
from khoj.utils.constants import (
|
17
|
+
default_anthropic_chat_models,
|
18
|
+
default_gemini_chat_models,
|
19
|
+
default_offline_chat_models,
|
20
|
+
default_openai_chat_models,
|
21
|
+
)
|
22
|
+
|
23
|
+
logger = logging.getLogger(__name__)
|
24
|
+
|
25
|
+
|
26
|
+
def initialization(interactive: bool = True):
|
27
|
+
def _create_admin_user():
|
28
|
+
logger.info(
|
29
|
+
"👩✈️ Setting up admin user. These credentials will allow you to configure your server at /server/admin."
|
30
|
+
)
|
31
|
+
if not interactive and (not os.getenv("KHOJ_ADMIN_EMAIL") or not os.getenv("KHOJ_ADMIN_PASSWORD")):
|
32
|
+
logger.error(
|
33
|
+
"🚨 Admin user cannot be created. Please set the KHOJ_ADMIN_EMAIL, KHOJ_ADMIN_PASSWORD environment variables or start server in interactive mode."
|
34
|
+
)
|
35
|
+
exit(1)
|
36
|
+
email_addr = os.getenv("KHOJ_ADMIN_EMAIL") or input("Email: ")
|
37
|
+
password = os.getenv("KHOJ_ADMIN_PASSWORD") or input("Password: ")
|
38
|
+
admin_user = KhojUser.objects.create_superuser(email=email_addr, username=email_addr, password=password)
|
39
|
+
logger.info(f"👩✈️ Created admin user: {admin_user.email}")
|
40
|
+
|
41
|
+
def _create_chat_configuration():
|
42
|
+
logger.info(
|
43
|
+
"🗣️ Configure chat models available to your server. You can always update these at /server/admin using your admin account"
|
44
|
+
)
|
45
|
+
|
46
|
+
openai_api_base = os.getenv("OPENAI_API_BASE")
|
47
|
+
provider = "Ollama" if openai_api_base and openai_api_base.endswith(":11434/v1/") else "OpenAI"
|
48
|
+
openai_api_key = os.getenv("OPENAI_API_KEY", "placeholder" if openai_api_base else None)
|
49
|
+
default_chat_models = default_openai_chat_models
|
50
|
+
if openai_api_base:
|
51
|
+
# Get available chat models from OpenAI compatible API
|
52
|
+
try:
|
53
|
+
openai_client = openai.OpenAI(api_key=openai_api_key, base_url=openai_api_base)
|
54
|
+
default_chat_models = [model.id for model in openai_client.models.list()]
|
55
|
+
# Put the available default OpenAI models at the top
|
56
|
+
valid_default_models = [model for model in default_openai_chat_models if model in default_chat_models]
|
57
|
+
other_available_models = [model for model in default_chat_models if model not in valid_default_models]
|
58
|
+
default_chat_models = valid_default_models + other_available_models
|
59
|
+
except Exception as e:
|
60
|
+
logger.warning(
|
61
|
+
f"⚠️ Failed to fetch {provider} chat models. Fallback to default models. Error: {str(e)}"
|
62
|
+
)
|
63
|
+
|
64
|
+
# Set up OpenAI's online chat models
|
65
|
+
openai_configured, openai_provider = _setup_chat_model_provider(
|
66
|
+
ChatModel.ModelType.OPENAI,
|
67
|
+
default_chat_models,
|
68
|
+
default_api_key=openai_api_key,
|
69
|
+
api_base_url=openai_api_base,
|
70
|
+
vision_enabled=True,
|
71
|
+
is_offline=False,
|
72
|
+
interactive=interactive,
|
73
|
+
provider_name=provider,
|
74
|
+
)
|
75
|
+
|
76
|
+
# Setup OpenAI speech to text model
|
77
|
+
if openai_configured:
|
78
|
+
default_speech2text_model = "whisper-1"
|
79
|
+
if interactive:
|
80
|
+
openai_speech2text_model = input(
|
81
|
+
f"Enter the OpenAI speech to text model you want to use (default: {default_speech2text_model}): "
|
82
|
+
)
|
83
|
+
openai_speech2text_model = openai_speech2text_model or default_speech2text_model
|
84
|
+
else:
|
85
|
+
openai_speech2text_model = default_speech2text_model
|
86
|
+
SpeechToTextModelOptions.objects.create(
|
87
|
+
model_name=openai_speech2text_model, model_type=SpeechToTextModelOptions.ModelType.OPENAI
|
88
|
+
)
|
89
|
+
|
90
|
+
# Setup OpenAI text to image model
|
91
|
+
if openai_configured:
|
92
|
+
default_text_to_image_model = "dall-e-3"
|
93
|
+
if interactive:
|
94
|
+
openai_text_to_image_model = input(
|
95
|
+
f"Enter the OpenAI text to image model you want to use (default: {default_text_to_image_model}): "
|
96
|
+
)
|
97
|
+
openai_text_to_image_model = openai_text_to_image_model or default_text_to_image_model
|
98
|
+
else:
|
99
|
+
openai_text_to_image_model = default_text_to_image_model
|
100
|
+
TextToImageModelConfig.objects.create(
|
101
|
+
model_name=openai_text_to_image_model,
|
102
|
+
model_type=TextToImageModelConfig.ModelType.OPENAI,
|
103
|
+
ai_model_api=openai_provider,
|
104
|
+
)
|
105
|
+
|
106
|
+
# Set up Google's Gemini online chat models
|
107
|
+
_setup_chat_model_provider(
|
108
|
+
ChatModel.ModelType.GOOGLE,
|
109
|
+
default_gemini_chat_models,
|
110
|
+
default_api_key=os.getenv("GEMINI_API_KEY"),
|
111
|
+
vision_enabled=True,
|
112
|
+
is_offline=False,
|
113
|
+
interactive=interactive,
|
114
|
+
provider_name="Google Gemini",
|
115
|
+
)
|
116
|
+
|
117
|
+
# Set up Anthropic's online chat models
|
118
|
+
_setup_chat_model_provider(
|
119
|
+
ChatModel.ModelType.ANTHROPIC,
|
120
|
+
default_anthropic_chat_models,
|
121
|
+
default_api_key=os.getenv("ANTHROPIC_API_KEY"),
|
122
|
+
vision_enabled=True,
|
123
|
+
is_offline=False,
|
124
|
+
interactive=interactive,
|
125
|
+
)
|
126
|
+
|
127
|
+
# Set up offline chat models
|
128
|
+
_setup_chat_model_provider(
|
129
|
+
ChatModel.ModelType.OFFLINE,
|
130
|
+
default_offline_chat_models,
|
131
|
+
default_api_key=None,
|
132
|
+
vision_enabled=False,
|
133
|
+
is_offline=True,
|
134
|
+
interactive=interactive,
|
135
|
+
)
|
136
|
+
|
137
|
+
# Explicitly set default chat model
|
138
|
+
chat_models_configured = ChatModel.objects.count()
|
139
|
+
if chat_models_configured > 0:
|
140
|
+
default_chat_model_name = ChatModel.objects.first().name
|
141
|
+
# If there are multiple chat models, ask the user to choose the default chat model
|
142
|
+
if chat_models_configured > 1 and interactive:
|
143
|
+
user_chat_model_name = input(
|
144
|
+
f"Enter the default chat model to use (default: {default_chat_model_name}): "
|
145
|
+
)
|
146
|
+
else:
|
147
|
+
user_chat_model_name = None
|
148
|
+
|
149
|
+
# If the user's choice is valid, set it as the default chat model
|
150
|
+
if user_chat_model_name and ChatModel.objects.filter(name=user_chat_model_name).exists():
|
151
|
+
default_chat_model_name = user_chat_model_name
|
152
|
+
|
153
|
+
logger.info("🗣️ Chat model configuration complete")
|
154
|
+
|
155
|
+
# Set up offline speech to text model
|
156
|
+
use_offline_speech2text_model = "n" if not interactive else input("Use offline speech to text model? (y/n): ")
|
157
|
+
if use_offline_speech2text_model == "y":
|
158
|
+
logger.info("🗣️ Setting up offline speech to text model")
|
159
|
+
# Delete any existing speech to text model options. There can only be one.
|
160
|
+
SpeechToTextModelOptions.objects.all().delete()
|
161
|
+
|
162
|
+
default_offline_speech2text_model = "base"
|
163
|
+
offline_speech2text_model = input(
|
164
|
+
f"Enter the Whisper model to use Offline (default: {default_offline_speech2text_model}): "
|
165
|
+
)
|
166
|
+
offline_speech2text_model = offline_speech2text_model or default_offline_speech2text_model
|
167
|
+
SpeechToTextModelOptions.objects.create(
|
168
|
+
model_name=offline_speech2text_model, model_type=SpeechToTextModelOptions.ModelType.OFFLINE
|
169
|
+
)
|
170
|
+
|
171
|
+
logger.info(f"🗣️ Offline speech to text model configured to {offline_speech2text_model}")
|
172
|
+
|
173
|
+
def _setup_chat_model_provider(
|
174
|
+
model_type: ChatModel.ModelType,
|
175
|
+
default_chat_models: list,
|
176
|
+
default_api_key: str,
|
177
|
+
interactive: bool,
|
178
|
+
api_base_url: str = None,
|
179
|
+
vision_enabled: bool = False,
|
180
|
+
is_offline: bool = False,
|
181
|
+
provider_name: str = None,
|
182
|
+
) -> Tuple[bool, AiModelApi]:
|
183
|
+
supported_vision_models = (
|
184
|
+
default_openai_chat_models + default_anthropic_chat_models + default_gemini_chat_models
|
185
|
+
)
|
186
|
+
provider_name = provider_name or model_type.name.capitalize()
|
187
|
+
default_use_model = {True: "y", False: "n"}[default_api_key is not None or is_offline]
|
188
|
+
use_model_provider = (
|
189
|
+
default_use_model if not interactive else input(f"Add {provider_name} chat models? (y/n): ")
|
190
|
+
)
|
191
|
+
|
192
|
+
if use_model_provider != "y":
|
193
|
+
return False, None
|
194
|
+
|
195
|
+
logger.info(f"️💬 Setting up your {provider_name} chat configuration")
|
196
|
+
|
197
|
+
ai_model_api = None
|
198
|
+
if not is_offline:
|
199
|
+
if interactive:
|
200
|
+
user_api_key = input(f"Enter your {provider_name} API key (default: {default_api_key}): ")
|
201
|
+
api_key = user_api_key if user_api_key != "" else default_api_key
|
202
|
+
else:
|
203
|
+
api_key = default_api_key
|
204
|
+
ai_model_api = AiModelApi.objects.create(api_key=api_key, name=provider_name, api_base_url=api_base_url)
|
205
|
+
|
206
|
+
if interactive:
|
207
|
+
user_chat_models = input(
|
208
|
+
f"Enter the {provider_name} chat models you want to use (default: {','.join(default_chat_models)}): "
|
209
|
+
)
|
210
|
+
chat_models = user_chat_models.split(",") if user_chat_models != "" else default_chat_models
|
211
|
+
chat_models = [model.strip() for model in chat_models]
|
212
|
+
else:
|
213
|
+
chat_models = default_chat_models
|
214
|
+
|
215
|
+
for chat_model in chat_models:
|
216
|
+
default_max_tokens = model_to_prompt_size.get(chat_model)
|
217
|
+
default_tokenizer = model_to_tokenizer.get(chat_model)
|
218
|
+
vision_enabled = vision_enabled and chat_model in supported_vision_models
|
219
|
+
|
220
|
+
chat_model_options = {
|
221
|
+
"name": chat_model,
|
222
|
+
"model_type": model_type,
|
223
|
+
"max_prompt_size": default_max_tokens,
|
224
|
+
"vision_enabled": vision_enabled,
|
225
|
+
"tokenizer": default_tokenizer,
|
226
|
+
"ai_model_api": ai_model_api,
|
227
|
+
}
|
228
|
+
|
229
|
+
ChatModel.objects.create(**chat_model_options)
|
230
|
+
|
231
|
+
logger.info(f"🗣️ {provider_name} chat model configuration complete")
|
232
|
+
return True, ai_model_api
|
233
|
+
|
234
|
+
def _update_chat_model_options():
|
235
|
+
"""Update available chat models for OpenAI-compatible APIs"""
|
236
|
+
try:
|
237
|
+
# Get OpenAI configs with custom base URLs
|
238
|
+
custom_configs = AiModelApi.objects.exclude(api_base_url__isnull=True)
|
239
|
+
|
240
|
+
# Only enable for whitelisted provider names (i.e Ollama) for now
|
241
|
+
# TODO: This is hacky. Will be replaced with more robust solution based on provider type enum
|
242
|
+
custom_configs = custom_configs.filter(name__in=["Ollama"])
|
243
|
+
|
244
|
+
for config in custom_configs:
|
245
|
+
try:
|
246
|
+
# Create OpenAI client with custom base URL
|
247
|
+
openai_client = openai.OpenAI(api_key=config.api_key, base_url=config.api_base_url)
|
248
|
+
|
249
|
+
# Get available models
|
250
|
+
available_models = [model.id for model in openai_client.models.list()]
|
251
|
+
|
252
|
+
# Get existing chat model options for this config
|
253
|
+
existing_models = ChatModel.objects.filter(
|
254
|
+
ai_model_api=config, model_type=ChatModel.ModelType.OPENAI
|
255
|
+
)
|
256
|
+
|
257
|
+
# Add new models
|
258
|
+
for model_name in available_models:
|
259
|
+
if not existing_models.filter(name=model_name).exists():
|
260
|
+
ChatModel.objects.create(
|
261
|
+
name=model_name,
|
262
|
+
model_type=ChatModel.ModelType.OPENAI,
|
263
|
+
max_prompt_size=model_to_prompt_size.get(model_name),
|
264
|
+
vision_enabled=model_name in default_openai_chat_models,
|
265
|
+
tokenizer=model_to_tokenizer.get(model_name),
|
266
|
+
ai_model_api=config,
|
267
|
+
)
|
268
|
+
|
269
|
+
# Remove models that are no longer available
|
270
|
+
existing_models.exclude(name__in=available_models).delete()
|
271
|
+
|
272
|
+
except Exception as e:
|
273
|
+
logger.warning(f"Failed to update models for {config.name}: {str(e)}")
|
274
|
+
|
275
|
+
except Exception as e:
|
276
|
+
logger.error(f"Failed to update chat model options: {str(e)}")
|
277
|
+
|
278
|
+
admin_user = KhojUser.objects.filter(is_staff=True).first()
|
279
|
+
if admin_user is None:
|
280
|
+
while True:
|
281
|
+
try:
|
282
|
+
_create_admin_user()
|
283
|
+
break
|
284
|
+
except Exception as e:
|
285
|
+
logger.error(f"🚨 Failed to create admin user: {e}", exc_info=True)
|
286
|
+
|
287
|
+
chat_config = ConversationAdapters.get_default_chat_model()
|
288
|
+
if admin_user is None and chat_config is None:
|
289
|
+
while True:
|
290
|
+
try:
|
291
|
+
_create_chat_configuration()
|
292
|
+
break
|
293
|
+
# Some environments don't support interactive input. We catch the exception and return if that's the case.
|
294
|
+
# The admin can still configure their settings from the admin page.
|
295
|
+
except EOFError:
|
296
|
+
return
|
297
|
+
except Exception as e:
|
298
|
+
logger.error(f"🚨 Failed to create chat configuration: {e}", exc_info=True)
|
299
|
+
else:
|
300
|
+
_update_chat_model_options()
|
301
|
+
logger.info("🗣️ Chat model configuration updated")
|
khoj/utils/jsonl.py
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
import gzip
|
2
|
+
import json
|
3
|
+
import logging
|
4
|
+
|
5
|
+
from khoj.utils.constants import empty_escape_sequences
|
6
|
+
from khoj.utils.helpers import get_absolute_path
|
7
|
+
|
8
|
+
logger = logging.getLogger(__name__)
|
9
|
+
|
10
|
+
|
11
|
+
def load_jsonl(input_path):
|
12
|
+
"Read List of JSON objects from JSON line file"
|
13
|
+
# Initialize Variables
|
14
|
+
data = []
|
15
|
+
jsonl_file = None
|
16
|
+
|
17
|
+
# Open JSONL file
|
18
|
+
if input_path.suffix == ".gz":
|
19
|
+
jsonl_file = gzip.open(get_absolute_path(input_path), "rt", encoding="utf-8")
|
20
|
+
else:
|
21
|
+
jsonl_file = open(get_absolute_path(input_path), "r", encoding="utf-8")
|
22
|
+
|
23
|
+
# Read JSONL file
|
24
|
+
for line in jsonl_file:
|
25
|
+
data.append(json.loads(line.strip(empty_escape_sequences)))
|
26
|
+
|
27
|
+
# Close JSONL file
|
28
|
+
jsonl_file.close()
|
29
|
+
|
30
|
+
# Log JSONL entries loaded
|
31
|
+
logger.debug(f"Loaded {len(data)} records from {input_path}")
|
32
|
+
|
33
|
+
return data
|
34
|
+
|
35
|
+
|
36
|
+
def compress_jsonl_data(jsonl_data, output_path):
|
37
|
+
# Create output directory, if it doesn't exist
|
38
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
39
|
+
|
40
|
+
with gzip.open(output_path, "wt", encoding="utf-8") as gzip_file:
|
41
|
+
gzip_file.write(jsonl_data)
|
42
|
+
|
43
|
+
logger.debug(f"Wrote jsonl data to gzip compressed jsonl at {output_path}")
|
khoj/utils/models.py
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import List
|
3
|
+
|
4
|
+
import openai
|
5
|
+
import torch
|
6
|
+
from tqdm import trange
|
7
|
+
|
8
|
+
|
9
|
+
class BaseEncoder(ABC):
|
10
|
+
@abstractmethod
|
11
|
+
def __init__(self, model_name: str, device: torch.device = None, **kwargs):
|
12
|
+
...
|
13
|
+
|
14
|
+
@abstractmethod
|
15
|
+
def encode(self, entries: List[str], device: torch.device = None, **kwargs) -> torch.Tensor:
|
16
|
+
...
|
17
|
+
|
18
|
+
|
19
|
+
class OpenAI(BaseEncoder):
|
20
|
+
def __init__(self, model_name, client: openai.OpenAI, device=None):
|
21
|
+
self.model_name = model_name
|
22
|
+
self.openai_client = client
|
23
|
+
self.embedding_dimensions = None
|
24
|
+
|
25
|
+
def encode(self, entries, device=None, **kwargs):
|
26
|
+
embedding_tensors = []
|
27
|
+
|
28
|
+
for index in trange(0, len(entries)):
|
29
|
+
# OpenAI models create better embeddings for entries without newlines
|
30
|
+
processed_entry = entries[index].replace("\n", " ")
|
31
|
+
|
32
|
+
try:
|
33
|
+
response = self.openai_client.embeddings.create(input=processed_entry, model=self.model_name)
|
34
|
+
embedding_tensors += [torch.tensor(response.data[0].embedding, device=device)]
|
35
|
+
# Use current models embedding dimension, once available
|
36
|
+
# Else default to embedding dimensions of the text-embedding-ada-002 model
|
37
|
+
self.embedding_dimensions = len(response.data[0].embedding) if not self.embedding_dimensions else 1536
|
38
|
+
except Exception as e:
|
39
|
+
print(
|
40
|
+
f"Failed to encode entry {index} of length: {len(entries[index])}\n\n{entries[index][:1000]}...\n\n{e}"
|
41
|
+
)
|
42
|
+
# Use zero embedding vector for entries with failed embeddings
|
43
|
+
# This ensures entry embeddings match the order of the source entries
|
44
|
+
# And they have minimal similarity to other entries (as zero vectors are always orthogonal to other vector)
|
45
|
+
embedding_tensors += [torch.zeros(self.embedding_dimensions, device=device)]
|
46
|
+
|
47
|
+
return torch.stack(embedding_tensors)
|
khoj/utils/rawconfig.py
ADDED
@@ -0,0 +1,208 @@
|
|
1
|
+
# System Packages
|
2
|
+
import json
|
3
|
+
import uuid
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import Dict, List, Optional
|
6
|
+
|
7
|
+
from pydantic import BaseModel
|
8
|
+
|
9
|
+
from khoj.utils.helpers import to_snake_case_from_dash
|
10
|
+
|
11
|
+
|
12
|
+
class ConfigBase(BaseModel):
|
13
|
+
class Config:
|
14
|
+
alias_generator = to_snake_case_from_dash
|
15
|
+
populate_by_name = True
|
16
|
+
|
17
|
+
def __getitem__(self, item):
|
18
|
+
return getattr(self, item)
|
19
|
+
|
20
|
+
def __setitem__(self, key, value):
|
21
|
+
return setattr(self, key, value)
|
22
|
+
|
23
|
+
|
24
|
+
class LocationData(BaseModel):
|
25
|
+
city: Optional[str]
|
26
|
+
region: Optional[str]
|
27
|
+
country: Optional[str]
|
28
|
+
country_code: Optional[str]
|
29
|
+
|
30
|
+
def __str__(self):
|
31
|
+
parts = []
|
32
|
+
if self.city:
|
33
|
+
parts.append(self.city)
|
34
|
+
if self.region:
|
35
|
+
parts.append(self.region)
|
36
|
+
if self.country:
|
37
|
+
parts.append(self.country)
|
38
|
+
return ", ".join(parts)
|
39
|
+
|
40
|
+
|
41
|
+
class FileFilterRequest(BaseModel):
|
42
|
+
filename: str
|
43
|
+
conversation_id: str
|
44
|
+
|
45
|
+
|
46
|
+
class FilesFilterRequest(BaseModel):
|
47
|
+
filenames: List[str]
|
48
|
+
conversation_id: str
|
49
|
+
|
50
|
+
|
51
|
+
class TextConfigBase(ConfigBase):
|
52
|
+
compressed_jsonl: Path
|
53
|
+
embeddings_file: Path
|
54
|
+
|
55
|
+
|
56
|
+
class TextContentConfig(ConfigBase):
|
57
|
+
input_files: Optional[List[Path]] = None
|
58
|
+
input_filter: Optional[List[str]] = None
|
59
|
+
index_heading_entries: Optional[bool] = False
|
60
|
+
|
61
|
+
|
62
|
+
class GithubRepoConfig(ConfigBase):
|
63
|
+
name: str
|
64
|
+
owner: str
|
65
|
+
branch: Optional[str] = "master"
|
66
|
+
|
67
|
+
|
68
|
+
class GithubContentConfig(ConfigBase):
|
69
|
+
pat_token: Optional[str] = None
|
70
|
+
repos: List[GithubRepoConfig]
|
71
|
+
|
72
|
+
|
73
|
+
class NotionContentConfig(ConfigBase):
|
74
|
+
token: str
|
75
|
+
|
76
|
+
|
77
|
+
class ContentConfig(ConfigBase):
|
78
|
+
org: Optional[TextContentConfig] = None
|
79
|
+
markdown: Optional[TextContentConfig] = None
|
80
|
+
pdf: Optional[TextContentConfig] = None
|
81
|
+
plaintext: Optional[TextContentConfig] = None
|
82
|
+
github: Optional[GithubContentConfig] = None
|
83
|
+
notion: Optional[NotionContentConfig] = None
|
84
|
+
image: Optional[TextContentConfig] = None
|
85
|
+
docx: Optional[TextContentConfig] = None
|
86
|
+
|
87
|
+
|
88
|
+
class ImageSearchConfig(ConfigBase):
|
89
|
+
encoder: str
|
90
|
+
encoder_type: Optional[str] = None
|
91
|
+
model_directory: Optional[Path] = None
|
92
|
+
|
93
|
+
class Config:
|
94
|
+
protected_namespaces = ()
|
95
|
+
|
96
|
+
|
97
|
+
class SearchConfig(ConfigBase):
|
98
|
+
image: Optional[ImageSearchConfig] = None
|
99
|
+
|
100
|
+
|
101
|
+
class OpenAIProcessorConfig(ConfigBase):
|
102
|
+
api_key: str
|
103
|
+
chat_model: Optional[str] = "gpt-4o-mini"
|
104
|
+
|
105
|
+
|
106
|
+
class OfflineChatProcessorConfig(ConfigBase):
|
107
|
+
chat_model: Optional[str] = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
|
108
|
+
|
109
|
+
|
110
|
+
class ConversationProcessorConfig(ConfigBase):
|
111
|
+
openai: Optional[OpenAIProcessorConfig] = None
|
112
|
+
offline_chat: Optional[OfflineChatProcessorConfig] = None
|
113
|
+
max_prompt_size: Optional[int] = None
|
114
|
+
tokenizer: Optional[str] = None
|
115
|
+
|
116
|
+
|
117
|
+
class ProcessorConfig(ConfigBase):
|
118
|
+
conversation: Optional[ConversationProcessorConfig] = None
|
119
|
+
|
120
|
+
|
121
|
+
class AppConfig(ConfigBase):
|
122
|
+
should_log_telemetry: bool = True
|
123
|
+
|
124
|
+
|
125
|
+
class FullConfig(ConfigBase):
|
126
|
+
content_type: Optional[ContentConfig] = None
|
127
|
+
search_type: Optional[SearchConfig] = None
|
128
|
+
processor: Optional[ProcessorConfig] = None
|
129
|
+
app: Optional[AppConfig] = AppConfig()
|
130
|
+
version: Optional[str] = None
|
131
|
+
|
132
|
+
|
133
|
+
class SearchResponse(ConfigBase):
|
134
|
+
entry: str
|
135
|
+
score: float
|
136
|
+
cross_score: Optional[float] = None
|
137
|
+
additional: Optional[dict] = None
|
138
|
+
corpus_id: str
|
139
|
+
|
140
|
+
|
141
|
+
class FileData(BaseModel):
|
142
|
+
name: str
|
143
|
+
content: bytes
|
144
|
+
file_type: str
|
145
|
+
encoding: str | None = None
|
146
|
+
|
147
|
+
|
148
|
+
class FileAttachment(BaseModel):
|
149
|
+
name: str
|
150
|
+
content: str
|
151
|
+
file_type: str
|
152
|
+
size: int
|
153
|
+
|
154
|
+
|
155
|
+
class ChatRequestBody(BaseModel):
|
156
|
+
q: str
|
157
|
+
n: Optional[int] = 7
|
158
|
+
d: Optional[float] = None
|
159
|
+
stream: Optional[bool] = False
|
160
|
+
title: Optional[str] = None
|
161
|
+
conversation_id: Optional[str] = None
|
162
|
+
turn_id: Optional[str] = None
|
163
|
+
city: Optional[str] = None
|
164
|
+
region: Optional[str] = None
|
165
|
+
country: Optional[str] = None
|
166
|
+
country_code: Optional[str] = None
|
167
|
+
timezone: Optional[str] = None
|
168
|
+
images: Optional[list[str]] = None
|
169
|
+
files: Optional[list[FileAttachment]] = []
|
170
|
+
create_new: Optional[bool] = False
|
171
|
+
|
172
|
+
|
173
|
+
class Entry:
|
174
|
+
raw: str
|
175
|
+
compiled: str
|
176
|
+
heading: Optional[str]
|
177
|
+
file: Optional[str]
|
178
|
+
corpus_id: str
|
179
|
+
|
180
|
+
def __init__(
|
181
|
+
self,
|
182
|
+
raw: str = None,
|
183
|
+
compiled: str = None,
|
184
|
+
heading: Optional[str] = None,
|
185
|
+
file: Optional[str] = None,
|
186
|
+
corpus_id: uuid.UUID = None,
|
187
|
+
):
|
188
|
+
self.raw = raw
|
189
|
+
self.compiled = compiled
|
190
|
+
self.heading = heading
|
191
|
+
self.file = file
|
192
|
+
self.corpus_id = str(corpus_id)
|
193
|
+
|
194
|
+
def to_json(self) -> str:
|
195
|
+
return json.dumps(self.__dict__, ensure_ascii=False)
|
196
|
+
|
197
|
+
def __repr__(self) -> str:
|
198
|
+
return self.__dict__.__repr__()
|
199
|
+
|
200
|
+
@classmethod
|
201
|
+
def from_dict(cls, dictionary: dict):
|
202
|
+
return cls(
|
203
|
+
raw=dictionary["raw"],
|
204
|
+
compiled=dictionary["compiled"],
|
205
|
+
file=dictionary.get("file", None),
|
206
|
+
heading=dictionary.get("heading", None),
|
207
|
+
corpus_id=dictionary.get("corpus_id", None),
|
208
|
+
)
|
khoj/utils/state.py
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
import os
|
2
|
+
import threading
|
3
|
+
from collections import defaultdict
|
4
|
+
from pathlib import Path
|
5
|
+
from typing import Any, Dict, List
|
6
|
+
|
7
|
+
from apscheduler.schedulers.background import BackgroundScheduler
|
8
|
+
from openai import OpenAI
|
9
|
+
from whisper import Whisper
|
10
|
+
|
11
|
+
from khoj.database.models import ProcessLock
|
12
|
+
from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel
|
13
|
+
from khoj.utils import config as utils_config
|
14
|
+
from khoj.utils.config import OfflineChatProcessorModel, SearchModels
|
15
|
+
from khoj.utils.helpers import LRU, get_device, is_env_var_true
|
16
|
+
from khoj.utils.rawconfig import FullConfig
|
17
|
+
|
18
|
+
# Application Global State
|
19
|
+
config = FullConfig()
|
20
|
+
search_models = SearchModels()
|
21
|
+
embeddings_model: Dict[str, EmbeddingsModel] = None
|
22
|
+
cross_encoder_model: Dict[str, CrossEncoderModel] = None
|
23
|
+
openai_client: OpenAI = None
|
24
|
+
offline_chat_processor_config: OfflineChatProcessorModel = None
|
25
|
+
whisper_model: Whisper = None
|
26
|
+
config_file: Path = None
|
27
|
+
verbose: int = 0
|
28
|
+
host: str = None
|
29
|
+
port: int = None
|
30
|
+
ssl_config: Dict[str, str] = None
|
31
|
+
cli_args: List[str] = None
|
32
|
+
query_cache: Dict[str, LRU] = defaultdict(LRU)
|
33
|
+
chat_lock = threading.Lock()
|
34
|
+
SearchType = utils_config.SearchType
|
35
|
+
scheduler: BackgroundScheduler = None
|
36
|
+
schedule_leader_process_lock: ProcessLock = None
|
37
|
+
telemetry: List[Dict[str, str]] = []
|
38
|
+
telemetry_disabled: bool = is_env_var_true("KHOJ_TELEMETRY_DISABLE")
|
39
|
+
khoj_version: str = None
|
40
|
+
device = get_device()
|
41
|
+
chat_on_gpu: bool = True
|
42
|
+
anonymous_mode: bool = False
|
43
|
+
pretrained_tokenizers: Dict[str, Any] = dict()
|
44
|
+
billing_enabled: bool = (
|
45
|
+
os.getenv("STRIPE_API_KEY") is not None
|
46
|
+
and os.getenv("STRIPE_SIGNING_SECRET") is not None
|
47
|
+
and os.getenv("KHOJ_CLOUD_SUBSCRIPTION_URL") is not None
|
48
|
+
)
|
khoj/utils/yaml.py
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
from pathlib import Path
|
2
|
+
|
3
|
+
import yaml
|
4
|
+
|
5
|
+
from khoj.utils import state
|
6
|
+
from khoj.utils.rawconfig import FullConfig
|
7
|
+
|
8
|
+
# Do not emit tags when dumping to YAML
|
9
|
+
yaml.emitter.Emitter.process_tag = lambda self, *args, **kwargs: None # type: ignore[assignment]
|
10
|
+
|
11
|
+
|
12
|
+
def save_config_to_file_updated_state():
|
13
|
+
with open(state.config_file, "w") as outfile:
|
14
|
+
yaml.dump(yaml.safe_load(state.config.json(by_alias=True)), outfile)
|
15
|
+
outfile.close()
|
16
|
+
return state.config
|
17
|
+
|
18
|
+
|
19
|
+
def save_config_to_file(yaml_config: dict, yaml_config_file: Path):
|
20
|
+
"Write config to YML file"
|
21
|
+
# Create output directory, if it doesn't exist
|
22
|
+
yaml_config_file.parent.mkdir(parents=True, exist_ok=True)
|
23
|
+
|
24
|
+
with open(yaml_config_file, "w", encoding="utf-8") as config_file:
|
25
|
+
yaml.safe_dump(yaml_config, config_file, allow_unicode=True)
|
26
|
+
|
27
|
+
|
28
|
+
def load_config_from_file(yaml_config_file: Path) -> dict:
|
29
|
+
"Read config from YML file"
|
30
|
+
config_from_file = None
|
31
|
+
with open(yaml_config_file, "r", encoding="utf-8") as config_file:
|
32
|
+
config_from_file = yaml.safe_load(config_file)
|
33
|
+
return config_from_file
|
34
|
+
|
35
|
+
|
36
|
+
def parse_config_from_string(yaml_config: dict) -> FullConfig:
|
37
|
+
"Parse and validate config in YML string"
|
38
|
+
return FullConfig.model_validate(yaml_config)
|
39
|
+
|
40
|
+
|
41
|
+
def parse_config_from_file(yaml_config_file):
|
42
|
+
"Parse and validate config in YML file"
|
43
|
+
return parse_config_from_string(load_config_from_file(yaml_config_file))
|
44
|
+
|
45
|
+
|
46
|
+
def yaml_dump(data):
|
47
|
+
return yaml.dump(data, allow_unicode=True, sort_keys=False, default_flow_style=False)
|