khoj 1.33.3.dev32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/__init__.py +0 -0
- khoj/app/README.md +94 -0
- khoj/app/__init__.py +0 -0
- khoj/app/asgi.py +16 -0
- khoj/app/settings.py +218 -0
- khoj/app/urls.py +25 -0
- khoj/configure.py +452 -0
- khoj/database/__init__.py +0 -0
- khoj/database/adapters/__init__.py +1821 -0
- khoj/database/admin.py +417 -0
- khoj/database/apps.py +6 -0
- khoj/database/management/__init__.py +0 -0
- khoj/database/management/commands/__init__.py +0 -0
- khoj/database/management/commands/change_default_model.py +116 -0
- khoj/database/management/commands/change_generated_images_url.py +61 -0
- khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
- khoj/database/migrations/0001_khojuser.py +98 -0
- khoj/database/migrations/0002_googleuser.py +32 -0
- khoj/database/migrations/0003_vector_extension.py +10 -0
- khoj/database/migrations/0004_content_types_and_more.py +181 -0
- khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
- khoj/database/migrations/0006_embeddingsdates.py +33 -0
- khoj/database/migrations/0007_add_conversation.py +27 -0
- khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
- khoj/database/migrations/0009_khojapiuser.py +24 -0
- khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
- khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
- khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
- khoj/database/migrations/0012_entry_file_source.py +21 -0
- khoj/database/migrations/0013_subscription.py +37 -0
- khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
- khoj/database/migrations/0015_alter_subscription_user.py +21 -0
- khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
- khoj/database/migrations/0017_searchmodel.py +32 -0
- khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
- khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
- khoj/database/migrations/0020_reflectivequestion.py +36 -0
- khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
- khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
- khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
- khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
- khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
- khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
- khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
- khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
- khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
- khoj/database/migrations/0029_userrequests.py +27 -0
- khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
- khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
- khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
- khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
- khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
- khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
- khoj/database/migrations/0035_processlock.py +26 -0
- khoj/database/migrations/0036_alter_processlock_name.py +19 -0
- khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
- khoj/database/migrations/0036_publicconversation.py +42 -0
- khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
- khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
- khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
- khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
- khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
- khoj/database/migrations/0040_alter_processlock_name.py +26 -0
- khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
- khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
- khoj/database/migrations/0042_serverchatsettings.py +46 -0
- khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
- khoj/database/migrations/0044_conversation_file_filters.py +17 -0
- khoj/database/migrations/0045_fileobject.py +37 -0
- khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
- khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
- khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
- khoj/database/migrations/0049_datastore.py +38 -0
- khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
- khoj/database/migrations/0050_alter_processlock_name.py +25 -0
- khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
- khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
- khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
- khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
- khoj/database/migrations/0055_alter_agent_style_icon.py +37 -0
- khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
- khoj/database/migrations/0056_searchmodelconfig_cross_encoder_model_config.py +17 -0
- khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
- khoj/database/migrations/0057_remove_serverchatsettings_default_model_and_more.py +51 -0
- khoj/database/migrations/0058_alter_chatmodeloptions_chat_model.py +17 -0
- khoj/database/migrations/0059_searchmodelconfig_bi_encoder_confidence_threshold.py +17 -0
- khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
- khoj/database/migrations/0061_alter_chatmodeloptions_model_type.py +26 -0
- khoj/database/migrations/0061_alter_texttoimagemodelconfig_model_type.py +21 -0
- khoj/database/migrations/0062_merge_20240913_0222.py +14 -0
- khoj/database/migrations/0063_conversation_temp_id.py +36 -0
- khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +86 -0
- khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
- khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
- khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
- khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
- khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
- khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
- khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
- khoj/database/migrations/0072_entry_search_model.py +24 -0
- khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
- khoj/database/migrations/0074_alter_conversation_title.py +17 -0
- khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +85 -0
- khoj/database/migrations/0076_rename_openaiprocessorconversationconfig_aimodelapi_and_more.py +26 -0
- khoj/database/migrations/0077_chatmodel_alter_agent_chat_model_and_more.py +62 -0
- khoj/database/migrations/0078_khojuser_email_verification_code_expiry.py +17 -0
- khoj/database/migrations/__init__.py +0 -0
- khoj/database/models/__init__.py +725 -0
- khoj/database/tests.py +3 -0
- khoj/interface/compiled/404/index.html +1 -0
- khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_buildManifest.js +1 -0
- khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_ssgManifest.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1010-8f39bb4648b5ba10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/182-f1c48a203dc91e0e.js +20 -0
- khoj/interface/compiled/_next/static/chunks/1915-d3c36ad6ce697ce7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/2117-165ef4747a5b836b.js +2 -0
- khoj/interface/compiled/_next/static/chunks/2581-455000f8aeb08fc3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3727.dcea8f2193111552.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3789-a09e37a819171a9d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/4124-6c28322ce218d2d5.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5427-b52d95253e692bfa.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5473-b1cf56dedac6577a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5477-0bbddb79c25a54a7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6065-64db9ad305ba0bcd.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6293-469dd16402ea8a6f.js +3 -0
- khoj/interface/compiled/_next/static/chunks/688-b5b4391bbc0376f1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8667-b6bf63c72b2d76eb.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9259-1172dbaca0515237.js +1 -0
- khoj/interface/compiled/_next/static/chunks/94ca1967.1d9b42d929a1ee8c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9597.83583248dfbf6e73.js +1 -0
- khoj/interface/compiled/_next/static/chunks/964ecbae.51d6faf8801d15e6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9665-391df1e5c51c960a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/_not-found/page-a834eddae3e235df.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/page-28ce086a1129bca2.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/layout-1fe1537449f43496.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/page-bf365a60829d347f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-0e476e57eb2015e3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/layout-30e7fda7262713ce.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/page-a5515ea71aec5ef0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/page-9140541e67ea307d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/settings/layout-d09d6510a45cd4bd.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/settings/page-951ba40b5b94b23a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1beb80d8d741c932.js +1 -0
- khoj/interface/compiled/_next/static/chunks/d3ac728e-44ebd2a0c99b12a0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/fd9d1056-4482b99a36fd1673.js +1 -0
- khoj/interface/compiled/_next/static/chunks/framework-8e0e0f4a6b83a956.js +1 -0
- khoj/interface/compiled/_next/static/chunks/main-app-de1f09df97a3cfc7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/main-db4bfac6b0a8d00b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/pages/_app-3c9ca398d360b709.js +1 -0
- khoj/interface/compiled/_next/static/chunks/pages/_error-cf5ca766ac8f493f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/webpack-a03962458328b163.js +1 -0
- khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +1 -0
- khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
- khoj/interface/compiled/_next/static/css/4e4e6a4a1c920d06.css +1 -0
- khoj/interface/compiled/_next/static/css/8d02837c730f8d13.css +25 -0
- khoj/interface/compiled/_next/static/css/8e6a3ca11a60b189.css +1 -0
- khoj/interface/compiled/_next/static/css/9c164d9727dd8092.css +1 -0
- khoj/interface/compiled/_next/static/css/dac88c17aaee5fcf.css +1 -0
- khoj/interface/compiled/_next/static/css/df4b47a2d0d85eae.css +1 -0
- khoj/interface/compiled/_next/static/css/e4eb883b5265d372.css +1 -0
- khoj/interface/compiled/_next/static/media/1d8a05b60287ae6c-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/6f22fce21a7c433c-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/77c207b095007c34-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/82ef96de0e8f4d8c-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.1608a09b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.4aafdb68.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.a79f1c31.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.b6770918.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.cce5b8ec.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.ec17d132.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.07ef19e7.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.55fac258.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.dad44a7f.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.9f256b85.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.b18f59e1.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.d42a5579.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.7c187121.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.d3c882a6.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.ed38e79f.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.b74a1a8b.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.c3fb5ac2.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.d181c465.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.6f2bb1df.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.70d8b0a5.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.e3f82f9d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.47373d1e.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.8916142b.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.9024d815.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.0462f03b.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.7f51fe03.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.b7f8fe9b.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.572d331f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.a879cf83.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.f1035d8d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.5295ba48.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.939bc644.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.f28c23ac.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.8c5b5494.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.94e1e8dc.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.bf59d231.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.3b1e59b3.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.7c9bc82b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.b4c20c84.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.74048478.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.ba21ed5f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.d4d7ba48.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.03e9641d.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.07505710.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.fe9cbbe1.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.e1e279cb.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.eae34984.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.fabc004a.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.57727022.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.5916a24f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.d6b476ec.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.9acaf01c.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.a144ef58.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.b4230e7e.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.10d95fd3.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.7a996c9d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.fbccdabe.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.6258592b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.a8709e36.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.d97aaf4a.ttf +0 -0
- khoj/interface/compiled/_next/static/media/a6ecd16fa044d500-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/bd82c78e5b7b3fe9-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/c32c8052c071fc42-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/c4250770ab8708b6-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/flags.3afdda2f.webp +0 -0
- khoj/interface/compiled/_next/static/media/flags@2x.5fbe9fc1.webp +0 -0
- khoj/interface/compiled/_next/static/media/globe.98e105ca.webp +0 -0
- khoj/interface/compiled/_next/static/media/globe@2x.974df6f8.webp +0 -0
- khoj/interface/compiled/agents/index.html +1 -0
- khoj/interface/compiled/agents/index.txt +7 -0
- khoj/interface/compiled/agents.svg +6 -0
- khoj/interface/compiled/assets/icons/khoj_lantern.ico +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern.svg +100 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_1200x1200.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_128x128.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_128x128_dark.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_256x256.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_512x512.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_logomarktype_1200x630.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-browse-draw-sample.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-plain-chat-sample.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-remember-plan-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-browse-draw-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-plain-chat-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-remember-plan-sample.png +0 -0
- khoj/interface/compiled/automation.svg +37 -0
- khoj/interface/compiled/automations/index.html +1 -0
- khoj/interface/compiled/automations/index.txt +8 -0
- khoj/interface/compiled/chat/index.html +1 -0
- khoj/interface/compiled/chat/index.txt +7 -0
- khoj/interface/compiled/chat.svg +24 -0
- khoj/interface/compiled/close.svg +5 -0
- khoj/interface/compiled/copy-button-success.svg +6 -0
- khoj/interface/compiled/copy-button.svg +5 -0
- khoj/interface/compiled/index.html +1 -0
- khoj/interface/compiled/index.txt +7 -0
- khoj/interface/compiled/khoj.webmanifest +76 -0
- khoj/interface/compiled/logo.svg +24 -0
- khoj/interface/compiled/search/index.html +1 -0
- khoj/interface/compiled/search/index.txt +7 -0
- khoj/interface/compiled/send.svg +1 -0
- khoj/interface/compiled/settings/index.html +1 -0
- khoj/interface/compiled/settings/index.txt +9 -0
- khoj/interface/compiled/share/chat/index.html +1 -0
- khoj/interface/compiled/share/chat/index.txt +7 -0
- khoj/interface/compiled/share.svg +8 -0
- khoj/interface/compiled/thumbs-down.svg +6 -0
- khoj/interface/compiled/thumbs-up.svg +6 -0
- khoj/interface/email/feedback.html +34 -0
- khoj/interface/email/magic_link.html +40 -0
- khoj/interface/email/task.html +37 -0
- khoj/interface/email/welcome.html +90 -0
- khoj/interface/web/.well-known/assetlinks.json +11 -0
- khoj/interface/web/assets/icons/agents.svg +19 -0
- khoj/interface/web/assets/icons/automation.svg +43 -0
- khoj/interface/web/assets/icons/chat.svg +24 -0
- khoj/interface/web/assets/icons/github.svg +1 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways.svg +32 -0
- khoj/interface/web/assets/icons/khoj.svg +26 -0
- khoj/interface/web/assets/icons/logotype.svg +1 -0
- khoj/interface/web/assets/icons/search.svg +57 -0
- khoj/interface/web/assets/icons/sync.svg +4 -0
- khoj/interface/web/assets/khoj.css +237 -0
- khoj/interface/web/assets/utils.js +33 -0
- khoj/interface/web/base_config.html +445 -0
- khoj/interface/web/content_source_github_input.html +208 -0
- khoj/interface/web/login.html +310 -0
- khoj/interface/web/utils.html +48 -0
- khoj/main.py +249 -0
- khoj/manage.py +22 -0
- khoj/migrations/__init__.py +0 -0
- khoj/migrations/migrate_offline_chat_default_model.py +69 -0
- khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
- khoj/migrations/migrate_offline_chat_schema.py +83 -0
- khoj/migrations/migrate_offline_model.py +29 -0
- khoj/migrations/migrate_processor_config_openai.py +67 -0
- khoj/migrations/migrate_server_pg.py +132 -0
- khoj/migrations/migrate_version.py +17 -0
- khoj/processor/__init__.py +0 -0
- khoj/processor/content/__init__.py +0 -0
- khoj/processor/content/docx/__init__.py +0 -0
- khoj/processor/content/docx/docx_to_entries.py +111 -0
- khoj/processor/content/github/__init__.py +0 -0
- khoj/processor/content/github/github_to_entries.py +226 -0
- khoj/processor/content/images/__init__.py +0 -0
- khoj/processor/content/images/image_to_entries.py +117 -0
- khoj/processor/content/markdown/__init__.py +0 -0
- khoj/processor/content/markdown/markdown_to_entries.py +160 -0
- khoj/processor/content/notion/notion_to_entries.py +259 -0
- khoj/processor/content/org_mode/__init__.py +0 -0
- khoj/processor/content/org_mode/org_to_entries.py +226 -0
- khoj/processor/content/org_mode/orgnode.py +532 -0
- khoj/processor/content/pdf/__init__.py +0 -0
- khoj/processor/content/pdf/pdf_to_entries.py +119 -0
- khoj/processor/content/plaintext/__init__.py +0 -0
- khoj/processor/content/plaintext/plaintext_to_entries.py +117 -0
- khoj/processor/content/text_to_entries.py +296 -0
- khoj/processor/conversation/__init__.py +0 -0
- khoj/processor/conversation/anthropic/__init__.py +0 -0
- khoj/processor/conversation/anthropic/anthropic_chat.py +243 -0
- khoj/processor/conversation/anthropic/utils.py +217 -0
- khoj/processor/conversation/google/__init__.py +0 -0
- khoj/processor/conversation/google/gemini_chat.py +253 -0
- khoj/processor/conversation/google/utils.py +260 -0
- khoj/processor/conversation/offline/__init__.py +0 -0
- khoj/processor/conversation/offline/chat_model.py +308 -0
- khoj/processor/conversation/offline/utils.py +80 -0
- khoj/processor/conversation/offline/whisper.py +15 -0
- khoj/processor/conversation/openai/__init__.py +0 -0
- khoj/processor/conversation/openai/gpt.py +243 -0
- khoj/processor/conversation/openai/utils.py +232 -0
- khoj/processor/conversation/openai/whisper.py +13 -0
- khoj/processor/conversation/prompts.py +1188 -0
- khoj/processor/conversation/utils.py +867 -0
- khoj/processor/embeddings.py +122 -0
- khoj/processor/image/generate.py +215 -0
- khoj/processor/speech/__init__.py +0 -0
- khoj/processor/speech/text_to_speech.py +51 -0
- khoj/processor/tools/__init__.py +0 -0
- khoj/processor/tools/online_search.py +472 -0
- khoj/processor/tools/run_code.py +179 -0
- khoj/routers/__init__.py +0 -0
- khoj/routers/api.py +760 -0
- khoj/routers/api_agents.py +295 -0
- khoj/routers/api_chat.py +1273 -0
- khoj/routers/api_content.py +634 -0
- khoj/routers/api_model.py +123 -0
- khoj/routers/api_phone.py +86 -0
- khoj/routers/api_subscription.py +144 -0
- khoj/routers/auth.py +307 -0
- khoj/routers/email.py +135 -0
- khoj/routers/helpers.py +2333 -0
- khoj/routers/notion.py +85 -0
- khoj/routers/research.py +364 -0
- khoj/routers/storage.py +63 -0
- khoj/routers/twilio.py +36 -0
- khoj/routers/web_client.py +141 -0
- khoj/search_filter/__init__.py +0 -0
- khoj/search_filter/base_filter.py +15 -0
- khoj/search_filter/date_filter.py +215 -0
- khoj/search_filter/file_filter.py +32 -0
- khoj/search_filter/word_filter.py +29 -0
- khoj/search_type/__init__.py +0 -0
- khoj/search_type/text_search.py +255 -0
- khoj/utils/__init__.py +0 -0
- khoj/utils/cli.py +101 -0
- khoj/utils/config.py +81 -0
- khoj/utils/constants.py +51 -0
- khoj/utils/fs_syncer.py +252 -0
- khoj/utils/helpers.py +627 -0
- khoj/utils/initialization.py +301 -0
- khoj/utils/jsonl.py +43 -0
- khoj/utils/models.py +47 -0
- khoj/utils/rawconfig.py +208 -0
- khoj/utils/state.py +48 -0
- khoj/utils/yaml.py +47 -0
- khoj-1.33.3.dev32.dist-info/METADATA +190 -0
- khoj-1.33.3.dev32.dist-info/RECORD +393 -0
- khoj-1.33.3.dev32.dist-info/WHEEL +4 -0
- khoj-1.33.3.dev32.dist-info/entry_points.txt +2 -0
- khoj-1.33.3.dev32.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,15 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import List
|
3
|
+
|
4
|
+
|
5
|
+
class BaseFilter(ABC):
|
6
|
+
@abstractmethod
|
7
|
+
def get_filter_terms(self, query: str) -> List[str]:
|
8
|
+
...
|
9
|
+
|
10
|
+
def can_filter(self, raw_query: str) -> bool:
|
11
|
+
return len(self.get_filter_terms(raw_query)) > 0
|
12
|
+
|
13
|
+
@abstractmethod
|
14
|
+
def defilter(self, query: str) -> str:
|
15
|
+
...
|
@@ -0,0 +1,215 @@
|
|
1
|
+
import calendar
|
2
|
+
import logging
|
3
|
+
import re
|
4
|
+
from collections import defaultdict
|
5
|
+
from datetime import datetime, timedelta
|
6
|
+
from math import inf
|
7
|
+
from typing import List, Tuple
|
8
|
+
|
9
|
+
import dateparser as dtparse
|
10
|
+
from dateutil.relativedelta import relativedelta
|
11
|
+
|
12
|
+
from khoj.search_filter.base_filter import BaseFilter
|
13
|
+
from khoj.utils.helpers import LRU, merge_dicts, timer
|
14
|
+
|
15
|
+
logger = logging.getLogger(__name__)
|
16
|
+
|
17
|
+
|
18
|
+
class DateFilter(BaseFilter):
|
19
|
+
# Date Range Filter Regexes
|
20
|
+
# Example filter queries:
|
21
|
+
# - dt>="yesterday" dt<"tomorrow"
|
22
|
+
# - dt>="last week"
|
23
|
+
# - dt:"2 years ago"
|
24
|
+
date_regex = r"dt([:><=]{1,2})[\"'‘’](.*?)[\"'‘’]"
|
25
|
+
|
26
|
+
def __init__(self, entry_key="compiled"):
|
27
|
+
self.entry_key = entry_key
|
28
|
+
self.date_to_entry_ids = defaultdict(set)
|
29
|
+
self.cache = LRU()
|
30
|
+
self.dtparser_regexes = self.compile_date_regexes()
|
31
|
+
self.dtparser_ordinal_suffixes = re.compile(r"(st|nd|rd|th)")
|
32
|
+
self.dtparser_settings = {
|
33
|
+
"PREFER_DAY_OF_MONTH": "first",
|
34
|
+
"DATE_ORDER": "YMD", # Prefer YMD and DMY over MDY when parsing ambiguous dates
|
35
|
+
}
|
36
|
+
|
37
|
+
def compile_date_regexes(self):
|
38
|
+
months = calendar.month_name[1:]
|
39
|
+
abbr_months = calendar.month_abbr[1:]
|
40
|
+
# Extract natural dates from content like 1st April 1984, 31 April 84, Apr 4th 1984, 13 Apr 84
|
41
|
+
dBY_regex = re.compile(r"\b\d{1,2}(?:st|nd|rd|th)? (?:" + "|".join(months) + r") \d{4}\b", re.IGNORECASE)
|
42
|
+
dBy_regex = re.compile(r"\b\d{1,2}(?:st|nd|rd|th)? (?:" + "|".join(months) + r") \d{2}\b", re.IGNORECASE)
|
43
|
+
BdY_regex = re.compile(r"\b(?:" + "|".join(months) + r") \d{1,2}(?:st|nd|rd|th)? \d{4}\b", re.IGNORECASE)
|
44
|
+
Bdy_regex = re.compile(r"\b(?:" + "|".join(months) + r") \d{1,2}(?:st|nd|rd|th)? \d{2}\b", re.IGNORECASE)
|
45
|
+
dbY_regex = re.compile(r"\b\d{1,2}(?:st|nd|rd|th)? (?:" + "|".join(abbr_months) + r") \d{4}\b", re.IGNORECASE)
|
46
|
+
dby_regex = re.compile(r"\b\d{1,2}(?:st|nd|rd|th)? (?:" + "|".join(abbr_months) + r") \d{2}\b", re.IGNORECASE)
|
47
|
+
bdY_regex = re.compile(r"\b(?:" + "|".join(abbr_months) + r") \d{1,2}(?:st|nd|rd|th)? \d{4}\b", re.IGNORECASE)
|
48
|
+
bdy_regex = re.compile(r"\b(?:" + "|".join(abbr_months) + r") \d{1,2}(?:st|nd|rd|th)? \d{2}\b", re.IGNORECASE)
|
49
|
+
# Extract natural of form Month, Year like January 2021, Jan 2021, Jan 21
|
50
|
+
BY_regex = re.compile(r"\b(?:" + "|".join(months) + r") \d{4}\b", re.IGNORECASE)
|
51
|
+
By_regex = re.compile(r"\b(?:" + "|".join(months) + r") \d{2}\b", re.IGNORECASE)
|
52
|
+
bY_regex = re.compile(r"\b(?:" + "|".join(abbr_months) + r") \d{4}\b", re.IGNORECASE)
|
53
|
+
by_regex = re.compile(r"\b(?:" + "|".join(abbr_months) + r") \d{2}\b", re.IGNORECASE)
|
54
|
+
# Extract structured dates from content like 1984-04-01, 1984/04/01, 01-04-1984, 01/04/1984, 01.04.1984, 01-04-84, 01/04/84
|
55
|
+
Ymd_date_regex = re.compile(r"\b\d{4}[-\/]\d{2}[-\/]\d{2}\b", re.IGNORECASE)
|
56
|
+
dmY_date_regex = re.compile(r"\b\d{2}[-\/]\d{2}[-\/]\d{4}\b", re.IGNORECASE)
|
57
|
+
dmy_date_regex = re.compile(r"\b\d{2}[-\/]\d{2}[-\/]\d{2}\b", re.IGNORECASE)
|
58
|
+
dmY_dot_date_regex = re.compile(r"\b\d{2}[\.]\d{2}[\.]\d{4}\b", re.IGNORECASE)
|
59
|
+
|
60
|
+
# Combine date formatter and date identifier regex pairs
|
61
|
+
dtparser_regexes: List[Tuple[str, re.Pattern[str]]] = [
|
62
|
+
# Structured dates
|
63
|
+
("%Y-%m-%d", Ymd_date_regex),
|
64
|
+
("%Y/%m/%d", Ymd_date_regex),
|
65
|
+
("%d-%m-%Y", dmY_date_regex),
|
66
|
+
("%d/%m/%Y", dmY_date_regex),
|
67
|
+
("%d.%m.%Y", dmY_dot_date_regex),
|
68
|
+
("%d-%m-%y", dmy_date_regex),
|
69
|
+
("%d/%m/%y", dmy_date_regex),
|
70
|
+
# Natural dates
|
71
|
+
("%d %B %Y", dBY_regex),
|
72
|
+
("%d %B %y", dBy_regex),
|
73
|
+
("%B %d %Y", BdY_regex),
|
74
|
+
("%B %d %y", Bdy_regex),
|
75
|
+
("%d %b %Y", dbY_regex),
|
76
|
+
("%d %b %y", dby_regex),
|
77
|
+
("%b %d %Y", bdY_regex),
|
78
|
+
("%b %d %y", bdy_regex),
|
79
|
+
# Partial natural dates
|
80
|
+
("%B %Y", BY_regex),
|
81
|
+
("%B %y", By_regex),
|
82
|
+
("%b %Y", bY_regex),
|
83
|
+
("%b %y", by_regex),
|
84
|
+
]
|
85
|
+
return dtparser_regexes
|
86
|
+
|
87
|
+
def extract_dates(self, content):
|
88
|
+
"Extract natural and structured dates from content"
|
89
|
+
valid_dates = set()
|
90
|
+
for date_format, date_regex in self.dtparser_regexes:
|
91
|
+
matched_dates = date_regex.findall(content)
|
92
|
+
for date_str in matched_dates:
|
93
|
+
# Remove ordinal suffixes to parse date
|
94
|
+
date_str = self.dtparser_ordinal_suffixes.sub("", date_str)
|
95
|
+
try:
|
96
|
+
valid_dates.add(datetime.strptime(date_str, date_format))
|
97
|
+
except ValueError:
|
98
|
+
continue
|
99
|
+
|
100
|
+
return list(valid_dates)
|
101
|
+
|
102
|
+
def get_filter_terms(self, query: str) -> List[str]:
|
103
|
+
"Get all filter terms in query"
|
104
|
+
return [f"dt{item[0]}'{item[1]}'" for item in re.findall(self.date_regex, query)]
|
105
|
+
|
106
|
+
def get_query_date_range(self, query) -> List:
|
107
|
+
with timer("Extract date range to filter from query", logger):
|
108
|
+
query_daterange = self.extract_date_range(query)
|
109
|
+
|
110
|
+
return query_daterange
|
111
|
+
|
112
|
+
def defilter(self, query):
|
113
|
+
# remove date range filter from query
|
114
|
+
query = re.sub(rf"\s+{self.date_regex}", " ", query)
|
115
|
+
query = re.sub(r"\s{2,}", " ", query).strip() # remove multiple spaces
|
116
|
+
return query
|
117
|
+
|
118
|
+
def extract_date_range(self, query):
|
119
|
+
# find date range filter in query
|
120
|
+
date_range_matches = re.findall(self.date_regex, query)
|
121
|
+
|
122
|
+
if len(date_range_matches) == 0:
|
123
|
+
return []
|
124
|
+
|
125
|
+
# extract, parse natural dates ranges from date range filter passed in query
|
126
|
+
# e.g. today maps to (start_of_day, start_of_tomorrow)
|
127
|
+
date_ranges_from_filter = []
|
128
|
+
for cmp, date_str in date_range_matches:
|
129
|
+
if self.parse(date_str):
|
130
|
+
dt_start, dt_end = self.parse(date_str)
|
131
|
+
date_ranges_from_filter += [[cmp, (dt_start.timestamp(), dt_end.timestamp())]]
|
132
|
+
|
133
|
+
# Combine dates with their comparators to form date range intervals
|
134
|
+
# For e.g.
|
135
|
+
# >=yesterday maps to [start_of_yesterday, inf)
|
136
|
+
# <tomorrow maps to [0, start_of_tomorrow)
|
137
|
+
# ---
|
138
|
+
effective_date_range: List = [0, inf]
|
139
|
+
date_range_considering_comparator = []
|
140
|
+
for cmp, (dtrange_start, dtrange_end) in date_ranges_from_filter:
|
141
|
+
if cmp == ">":
|
142
|
+
date_range_considering_comparator += [[dtrange_end, inf]]
|
143
|
+
elif cmp == ">=":
|
144
|
+
date_range_considering_comparator += [[dtrange_start, inf]]
|
145
|
+
elif cmp == "<":
|
146
|
+
date_range_considering_comparator += [[0, dtrange_start]]
|
147
|
+
elif cmp == "<=":
|
148
|
+
date_range_considering_comparator += [[0, dtrange_end]]
|
149
|
+
elif cmp == "=" or cmp == ":" or cmp == "==":
|
150
|
+
date_range_considering_comparator += [[dtrange_start, dtrange_end]]
|
151
|
+
|
152
|
+
# Combine above intervals (via AND/intersect)
|
153
|
+
# In the above example, this gives us [start_of_yesterday, start_of_tomorrow)
|
154
|
+
# This is the effective date range to filter entries by
|
155
|
+
# ---
|
156
|
+
for date_range in date_range_considering_comparator:
|
157
|
+
effective_date_range = [
|
158
|
+
max(effective_date_range[0], date_range[0]),
|
159
|
+
min(effective_date_range[1], date_range[1]),
|
160
|
+
]
|
161
|
+
|
162
|
+
if effective_date_range == [0, inf] or effective_date_range[0] > effective_date_range[1]:
|
163
|
+
return []
|
164
|
+
else:
|
165
|
+
# If the first element is 0, replace it with None
|
166
|
+
|
167
|
+
if effective_date_range[0] == 0:
|
168
|
+
effective_date_range[0] = None
|
169
|
+
|
170
|
+
# If the second element is inf, replace it with None
|
171
|
+
if effective_date_range[1] == inf:
|
172
|
+
effective_date_range[1] = None
|
173
|
+
|
174
|
+
return effective_date_range
|
175
|
+
|
176
|
+
def parse(self, date_str, relative_base=None):
|
177
|
+
"Parse date string passed in date filter of query to datetime object"
|
178
|
+
# clean date string to handle future date parsing by date parser
|
179
|
+
future_strings = ["later", "from now", "from today"]
|
180
|
+
prefer_dates_from = {True: "future", False: "past"}[any([True for fstr in future_strings if fstr in date_str])]
|
181
|
+
dtquery_settings = {"RELATIVE_BASE": relative_base or datetime.now(), "PREFER_DATES_FROM": prefer_dates_from}
|
182
|
+
dtparser_settings = merge_dicts(dtquery_settings, self.dtparser_settings)
|
183
|
+
|
184
|
+
# parse date passed in query date filter
|
185
|
+
clean_date_str = re.sub("|".join(future_strings), "", date_str)
|
186
|
+
try:
|
187
|
+
parsed_date = dtparse.parse(clean_date_str, settings=dtparser_settings)
|
188
|
+
except Exception as e:
|
189
|
+
logger.error(f"Failed to parse date string: {date_str} with error: {e}")
|
190
|
+
return None
|
191
|
+
|
192
|
+
if parsed_date is None:
|
193
|
+
return None
|
194
|
+
|
195
|
+
return self.date_to_daterange(parsed_date, date_str)
|
196
|
+
|
197
|
+
def date_to_daterange(self, parsed_date, date_str):
|
198
|
+
"Convert parsed date to date ranges at natural granularity (day, week, month or year)"
|
199
|
+
|
200
|
+
start_of_day = parsed_date.replace(hour=0, minute=0, second=0, microsecond=0)
|
201
|
+
|
202
|
+
if "year" in date_str:
|
203
|
+
return (datetime(parsed_date.year, 1, 1, 0, 0, 0), datetime(parsed_date.year + 1, 1, 1, 0, 0, 0))
|
204
|
+
if "month" in date_str:
|
205
|
+
start_of_month = datetime(parsed_date.year, parsed_date.month, 1, 0, 0, 0)
|
206
|
+
next_month = start_of_month + relativedelta(months=1)
|
207
|
+
return (start_of_month, next_month)
|
208
|
+
if "week" in date_str:
|
209
|
+
# if week in date string, dateparser parses it to next week start
|
210
|
+
# so today = end of this week
|
211
|
+
start_of_week = start_of_day - timedelta(days=7)
|
212
|
+
return (start_of_week, start_of_day)
|
213
|
+
else:
|
214
|
+
next_day = start_of_day + relativedelta(days=1)
|
215
|
+
return (start_of_day, next_day)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
import logging
|
2
|
+
import re
|
3
|
+
from collections import defaultdict
|
4
|
+
from typing import List
|
5
|
+
|
6
|
+
from khoj.search_filter.base_filter import BaseFilter
|
7
|
+
from khoj.utils.helpers import LRU
|
8
|
+
|
9
|
+
logger = logging.getLogger(__name__)
|
10
|
+
|
11
|
+
|
12
|
+
class FileFilter(BaseFilter):
|
13
|
+
file_filter_regex = r'(?<!-)file:"(.+?)" ?'
|
14
|
+
excluded_file_filter_regex = r'-file:"(.+?)" ?'
|
15
|
+
|
16
|
+
def __init__(self, entry_key="file"):
|
17
|
+
self.entry_key = entry_key
|
18
|
+
self.file_to_entry_map = defaultdict(set)
|
19
|
+
self.cache = LRU()
|
20
|
+
|
21
|
+
def get_filter_terms(self, query: str) -> List[str]:
|
22
|
+
"Get all filter terms in query"
|
23
|
+
required_files = [f"{required_file}" for required_file in re.findall(self.file_filter_regex, query)]
|
24
|
+
excluded_files = [f"-{excluded_file}" for excluded_file in re.findall(self.excluded_file_filter_regex, query)]
|
25
|
+
return required_files + excluded_files
|
26
|
+
|
27
|
+
def convert_to_regex(self, file_filter: str) -> str:
|
28
|
+
"Convert file filter to regex"
|
29
|
+
return file_filter.replace(".", r"\.").replace("*", r".*")
|
30
|
+
|
31
|
+
def defilter(self, query: str) -> str:
|
32
|
+
return re.sub(self.file_filter_regex, "", query).strip()
|
@@ -0,0 +1,29 @@
|
|
1
|
+
import logging
|
2
|
+
import re
|
3
|
+
from collections import defaultdict
|
4
|
+
from typing import List
|
5
|
+
|
6
|
+
from khoj.search_filter.base_filter import BaseFilter
|
7
|
+
from khoj.utils.helpers import LRU
|
8
|
+
|
9
|
+
logger = logging.getLogger(__name__)
|
10
|
+
|
11
|
+
|
12
|
+
class WordFilter(BaseFilter):
|
13
|
+
# Filter Regex
|
14
|
+
required_regex = r'\+"([a-zA-Z0-9_-]+)" ?'
|
15
|
+
blocked_regex = r'\-"([a-zA-Z0-9_-]+)" ?'
|
16
|
+
|
17
|
+
def __init__(self, entry_key="raw"):
|
18
|
+
self.entry_key = entry_key
|
19
|
+
self.word_to_entry_index = defaultdict(set)
|
20
|
+
self.cache = LRU()
|
21
|
+
|
22
|
+
def get_filter_terms(self, query: str) -> List[str]:
|
23
|
+
"Get all filter terms in query"
|
24
|
+
required_terms = [f"+{required_term}" for required_term in re.findall(self.required_regex, query)]
|
25
|
+
blocked_terms = [f"-{blocked_term}" for blocked_term in re.findall(self.blocked_regex, query)]
|
26
|
+
return required_terms + blocked_terms
|
27
|
+
|
28
|
+
def defilter(self, query: str) -> str:
|
29
|
+
return re.sub(self.blocked_regex, "", re.sub(self.required_regex, "", query)).strip()
|
File without changes
|
@@ -0,0 +1,255 @@
|
|
1
|
+
import logging
|
2
|
+
import math
|
3
|
+
from pathlib import Path
|
4
|
+
from typing import List, Optional, Tuple, Type, Union
|
5
|
+
|
6
|
+
import requests
|
7
|
+
import torch
|
8
|
+
from asgiref.sync import sync_to_async
|
9
|
+
from sentence_transformers import util
|
10
|
+
|
11
|
+
from khoj.database.adapters import EntryAdapters, get_default_search_model
|
12
|
+
from khoj.database.models import Agent
|
13
|
+
from khoj.database.models import Entry as DbEntry
|
14
|
+
from khoj.database.models import KhojUser
|
15
|
+
from khoj.processor.content.text_to_entries import TextToEntries
|
16
|
+
from khoj.utils import state
|
17
|
+
from khoj.utils.helpers import get_absolute_path, timer
|
18
|
+
from khoj.utils.jsonl import load_jsonl
|
19
|
+
from khoj.utils.models import BaseEncoder
|
20
|
+
from khoj.utils.rawconfig import Entry, SearchResponse
|
21
|
+
from khoj.utils.state import SearchType
|
22
|
+
|
23
|
+
logger = logging.getLogger(__name__)
|
24
|
+
|
25
|
+
search_type_to_embeddings_type = {
|
26
|
+
SearchType.Org.value: DbEntry.EntryType.ORG,
|
27
|
+
SearchType.Markdown.value: DbEntry.EntryType.MARKDOWN,
|
28
|
+
SearchType.Plaintext.value: DbEntry.EntryType.PLAINTEXT,
|
29
|
+
SearchType.Pdf.value: DbEntry.EntryType.PDF,
|
30
|
+
SearchType.Github.value: DbEntry.EntryType.GITHUB,
|
31
|
+
SearchType.Notion.value: DbEntry.EntryType.NOTION,
|
32
|
+
SearchType.All.value: None,
|
33
|
+
}
|
34
|
+
|
35
|
+
|
36
|
+
def extract_entries(jsonl_file) -> List[Entry]:
|
37
|
+
"Load entries from compressed jsonl"
|
38
|
+
return list(map(Entry.from_dict, load_jsonl(jsonl_file)))
|
39
|
+
|
40
|
+
|
41
|
+
def compute_embeddings(
|
42
|
+
entries_with_ids: List[Tuple[int, Entry]],
|
43
|
+
bi_encoder: BaseEncoder,
|
44
|
+
embeddings_file: Path,
|
45
|
+
regenerate=False,
|
46
|
+
normalize=True,
|
47
|
+
):
|
48
|
+
"Compute (and Save) Embeddings or Load Pre-Computed Embeddings"
|
49
|
+
new_embeddings = torch.tensor([], device=state.device)
|
50
|
+
existing_embeddings = torch.tensor([], device=state.device)
|
51
|
+
create_index_msg = ""
|
52
|
+
# Load pre-computed embeddings from file if exists and update them if required
|
53
|
+
if embeddings_file.exists() and not regenerate:
|
54
|
+
corpus_embeddings: torch.Tensor = torch.load(get_absolute_path(embeddings_file), map_location=state.device)
|
55
|
+
logger.debug(f"Loaded {len(corpus_embeddings)} text embeddings from {embeddings_file}")
|
56
|
+
else:
|
57
|
+
corpus_embeddings = torch.tensor([], device=state.device)
|
58
|
+
create_index_msg = " Creating index from scratch."
|
59
|
+
|
60
|
+
# Encode any new entries in the corpus and update corpus embeddings
|
61
|
+
new_entries = [entry.compiled for id, entry in entries_with_ids if id == -1]
|
62
|
+
if new_entries:
|
63
|
+
logger.info(f"📩 Indexing {len(new_entries)} text entries.{create_index_msg}")
|
64
|
+
new_embeddings = bi_encoder.encode(
|
65
|
+
new_entries, convert_to_tensor=True, device=state.device, show_progress_bar=True
|
66
|
+
)
|
67
|
+
|
68
|
+
# Extract existing embeddings from previous corpus embeddings
|
69
|
+
existing_entry_ids = [id for id, _ in entries_with_ids if id != -1]
|
70
|
+
if existing_entry_ids:
|
71
|
+
existing_embeddings = torch.index_select(
|
72
|
+
corpus_embeddings, 0, torch.tensor(existing_entry_ids, device=state.device)
|
73
|
+
)
|
74
|
+
|
75
|
+
# Set corpus embeddings to merger of existing and new embeddings
|
76
|
+
corpus_embeddings = torch.cat([existing_embeddings, new_embeddings], dim=0)
|
77
|
+
if normalize:
|
78
|
+
# Normalize embeddings for faster lookup via dot product when querying
|
79
|
+
corpus_embeddings = util.normalize_embeddings(corpus_embeddings)
|
80
|
+
|
81
|
+
# Save regenerated or updated embeddings to file
|
82
|
+
torch.save(corpus_embeddings, embeddings_file)
|
83
|
+
logger.info(f"📩 Saved computed text embeddings to {embeddings_file}")
|
84
|
+
|
85
|
+
return corpus_embeddings
|
86
|
+
|
87
|
+
|
88
|
+
def load_embeddings(
|
89
|
+
embeddings_file: Path,
|
90
|
+
):
|
91
|
+
"Load pre-computed embeddings from file if exists and update them if required"
|
92
|
+
if embeddings_file.exists():
|
93
|
+
corpus_embeddings: torch.Tensor = torch.load(get_absolute_path(embeddings_file), map_location=state.device)
|
94
|
+
logger.debug(f"Loaded {len(corpus_embeddings)} text embeddings from {embeddings_file}")
|
95
|
+
return util.normalize_embeddings(corpus_embeddings)
|
96
|
+
|
97
|
+
return None
|
98
|
+
|
99
|
+
|
100
|
+
async def query(
|
101
|
+
raw_query: str,
|
102
|
+
user: KhojUser,
|
103
|
+
type: SearchType = SearchType.All,
|
104
|
+
question_embedding: Union[torch.Tensor, None] = None,
|
105
|
+
max_distance: float = None,
|
106
|
+
agent: Optional[Agent] = None,
|
107
|
+
) -> Tuple[List[dict], List[Entry]]:
|
108
|
+
"Search for entries that answer the query"
|
109
|
+
|
110
|
+
file_type = search_type_to_embeddings_type[type.value]
|
111
|
+
|
112
|
+
query = raw_query
|
113
|
+
search_model = await sync_to_async(get_default_search_model)()
|
114
|
+
if not max_distance:
|
115
|
+
if search_model.bi_encoder_confidence_threshold:
|
116
|
+
max_distance = search_model.bi_encoder_confidence_threshold
|
117
|
+
else:
|
118
|
+
max_distance = math.inf
|
119
|
+
|
120
|
+
# Encode the query using the bi-encoder
|
121
|
+
if question_embedding is None:
|
122
|
+
with timer("Query Encode Time", logger, state.device):
|
123
|
+
question_embedding = state.embeddings_model[search_model.name].embed_query(query)
|
124
|
+
|
125
|
+
# Find relevant entries for the query
|
126
|
+
top_k = 10
|
127
|
+
with timer("Search Time", logger, state.device):
|
128
|
+
hits = EntryAdapters.search_with_embeddings(
|
129
|
+
raw_query=raw_query,
|
130
|
+
embeddings=question_embedding,
|
131
|
+
max_results=top_k,
|
132
|
+
file_type_filter=file_type,
|
133
|
+
max_distance=max_distance,
|
134
|
+
user=user,
|
135
|
+
agent=agent,
|
136
|
+
).all()
|
137
|
+
hits = await sync_to_async(list)(hits) # type: ignore[call-arg]
|
138
|
+
|
139
|
+
return hits
|
140
|
+
|
141
|
+
|
142
|
+
def collate_results(hits, dedupe=True):
|
143
|
+
hit_ids = set()
|
144
|
+
hit_hashes = set()
|
145
|
+
for hit in hits:
|
146
|
+
if dedupe and (hit.hashed_value in hit_hashes or hit.corpus_id in hit_ids):
|
147
|
+
continue
|
148
|
+
|
149
|
+
else:
|
150
|
+
hit_hashes.add(hit.hashed_value)
|
151
|
+
hit_ids.add(hit.corpus_id)
|
152
|
+
yield SearchResponse.model_validate(
|
153
|
+
{
|
154
|
+
"entry": hit.raw,
|
155
|
+
"score": hit.distance,
|
156
|
+
"corpus_id": str(hit.corpus_id),
|
157
|
+
"additional": {
|
158
|
+
"source": hit.file_source,
|
159
|
+
"file": hit.file_path,
|
160
|
+
"compiled": hit.compiled,
|
161
|
+
"heading": hit.heading,
|
162
|
+
},
|
163
|
+
}
|
164
|
+
)
|
165
|
+
|
166
|
+
|
167
|
+
def deduplicated_search_responses(hits: List[SearchResponse]):
|
168
|
+
hit_ids = set()
|
169
|
+
for hit in hits:
|
170
|
+
if hit.corpus_id in hit_ids:
|
171
|
+
continue
|
172
|
+
|
173
|
+
else:
|
174
|
+
hit_ids.add(hit.corpus_id)
|
175
|
+
yield SearchResponse.model_validate(
|
176
|
+
{
|
177
|
+
"entry": hit.entry,
|
178
|
+
"score": hit.score,
|
179
|
+
"corpus_id": hit.corpus_id,
|
180
|
+
"additional": {
|
181
|
+
"source": hit.additional["source"],
|
182
|
+
"file": hit.additional["file"],
|
183
|
+
"compiled": hit.additional["compiled"],
|
184
|
+
"heading": hit.additional["heading"],
|
185
|
+
},
|
186
|
+
}
|
187
|
+
)
|
188
|
+
|
189
|
+
|
190
|
+
def rerank_and_sort_results(hits, query, rank_results, search_model_name):
|
191
|
+
# Rerank results if explicitly requested, if can use inference server
|
192
|
+
# AND if we have more than one result
|
193
|
+
rank_results = (rank_results or state.cross_encoder_model[search_model_name].inference_server_enabled()) and len(
|
194
|
+
list(hits)
|
195
|
+
) > 1
|
196
|
+
|
197
|
+
# Score all retrieved entries using the cross-encoder
|
198
|
+
if rank_results:
|
199
|
+
hits = cross_encoder_score(query, hits, search_model_name)
|
200
|
+
|
201
|
+
# Sort results by cross-encoder score followed by bi-encoder score
|
202
|
+
hits = sort_results(rank_results=rank_results, hits=hits)
|
203
|
+
|
204
|
+
return hits
|
205
|
+
|
206
|
+
|
207
|
+
def setup(
|
208
|
+
text_to_entries: Type[TextToEntries],
|
209
|
+
files: dict[str, str],
|
210
|
+
regenerate: bool,
|
211
|
+
user: KhojUser,
|
212
|
+
config=None,
|
213
|
+
) -> Tuple[int, int]:
|
214
|
+
if config:
|
215
|
+
num_new_embeddings, num_deleted_embeddings = text_to_entries(config).process(
|
216
|
+
files=files, user=user, regenerate=regenerate
|
217
|
+
)
|
218
|
+
else:
|
219
|
+
num_new_embeddings, num_deleted_embeddings = text_to_entries().process(
|
220
|
+
files=files, user=user, regenerate=regenerate
|
221
|
+
)
|
222
|
+
|
223
|
+
if files:
|
224
|
+
file_names = [file_name for file_name in files]
|
225
|
+
|
226
|
+
logger.info(
|
227
|
+
f"Deleted {num_deleted_embeddings} entries. Created {num_new_embeddings} new entries for user {user} from files {file_names[:10]} ..."
|
228
|
+
)
|
229
|
+
|
230
|
+
return num_new_embeddings, num_deleted_embeddings
|
231
|
+
|
232
|
+
|
233
|
+
def cross_encoder_score(query: str, hits: List[SearchResponse], search_model_name: str) -> List[SearchResponse]:
|
234
|
+
"""Score all retrieved entries using the cross-encoder"""
|
235
|
+
try:
|
236
|
+
with timer("Cross-Encoder Predict Time", logger, state.device):
|
237
|
+
cross_scores = state.cross_encoder_model[search_model_name].predict(query, hits)
|
238
|
+
except requests.exceptions.HTTPError as e:
|
239
|
+
logger.error(f"Failed to rerank documents using the inference endpoint. Error: {e}.", exc_info=True)
|
240
|
+
cross_scores = [0.0] * len(hits)
|
241
|
+
|
242
|
+
# Convert cross-encoder scores to distances and pass in hits for reranking
|
243
|
+
for idx in range(len(cross_scores)):
|
244
|
+
hits[idx]["cross_score"] = 1 - cross_scores[idx]
|
245
|
+
|
246
|
+
return hits
|
247
|
+
|
248
|
+
|
249
|
+
def sort_results(rank_results: bool, hits: List[dict]) -> List[dict]:
|
250
|
+
"""Order results by cross-encoder score followed by bi-encoder score"""
|
251
|
+
with timer("Rank Time", logger, state.device):
|
252
|
+
hits.sort(key=lambda x: x["score"]) # sort by bi-encoder score
|
253
|
+
if rank_results:
|
254
|
+
hits.sort(key=lambda x: x["cross_score"]) # sort by cross-encoder score
|
255
|
+
return hits
|
khoj/utils/__init__.py
ADDED
File without changes
|
khoj/utils/cli.py
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
import argparse
|
2
|
+
import logging
|
3
|
+
import os
|
4
|
+
import pathlib
|
5
|
+
from importlib.metadata import version
|
6
|
+
|
7
|
+
logger = logging.getLogger(__name__)
|
8
|
+
|
9
|
+
from khoj.migrations.migrate_offline_chat_default_model import (
|
10
|
+
migrate_offline_chat_default_model,
|
11
|
+
)
|
12
|
+
from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema
|
13
|
+
from khoj.migrations.migrate_offline_model import migrate_offline_model
|
14
|
+
from khoj.migrations.migrate_processor_config_openai import (
|
15
|
+
migrate_processor_conversation_schema,
|
16
|
+
)
|
17
|
+
from khoj.migrations.migrate_server_pg import migrate_server_pg
|
18
|
+
from khoj.migrations.migrate_version import migrate_config_to_version
|
19
|
+
from khoj.utils.helpers import in_debug_mode, is_env_var_true, resolve_absolute_path
|
20
|
+
from khoj.utils.yaml import parse_config_from_file
|
21
|
+
|
22
|
+
|
23
|
+
def cli(args=None):
|
24
|
+
# Setup Argument Parser for the Commandline Interface
|
25
|
+
parser = argparse.ArgumentParser(description="Start Khoj; An AI personal assistant for your Digital Brain")
|
26
|
+
parser.add_argument(
|
27
|
+
"--config-file", default="~/.khoj/khoj.yml", type=pathlib.Path, help="YAML file to configure Khoj"
|
28
|
+
)
|
29
|
+
parser.add_argument(
|
30
|
+
"--regenerate",
|
31
|
+
action="store_true",
|
32
|
+
default=False,
|
33
|
+
help="Regenerate model embeddings from source files. Default: false",
|
34
|
+
)
|
35
|
+
parser.add_argument("--verbose", "-v", action="count", default=0, help="Show verbose conversion logs. Default: 0")
|
36
|
+
parser.add_argument("--host", type=str, default="127.0.0.1", help="Host address of the server. Default: 127.0.0.1")
|
37
|
+
parser.add_argument("--port", "-p", type=int, default=42110, help="Port of the server. Default: 42110")
|
38
|
+
parser.add_argument(
|
39
|
+
"--socket",
|
40
|
+
type=pathlib.Path,
|
41
|
+
help="Path to UNIX socket for server. Use to run server behind reverse proxy. Default: /tmp/uvicorn.sock",
|
42
|
+
)
|
43
|
+
parser.add_argument("--sslcert", type=str, help="Path to SSL certificate file")
|
44
|
+
parser.add_argument("--sslkey", type=str, help="Path to SSL key file")
|
45
|
+
parser.add_argument("--version", "-V", action="store_true", help="Print the installed Khoj version and exit")
|
46
|
+
parser.add_argument(
|
47
|
+
"--disable-chat-on-gpu", action="store_true", default=False, help="Disable using GPU for the offline chat model"
|
48
|
+
)
|
49
|
+
parser.add_argument(
|
50
|
+
"--anonymous-mode",
|
51
|
+
action="store_true",
|
52
|
+
default=False,
|
53
|
+
help="Run Khoj in anonymous mode. This does not require any login for connecting users.",
|
54
|
+
)
|
55
|
+
parser.add_argument(
|
56
|
+
"--non-interactive",
|
57
|
+
action="store_true",
|
58
|
+
default=False,
|
59
|
+
help="Start Khoj in non-interactive mode. Assumes interactive shell unavailable for config. E.g when run via Docker.",
|
60
|
+
)
|
61
|
+
|
62
|
+
args, remaining_args = parser.parse_known_args(args)
|
63
|
+
|
64
|
+
if len(remaining_args) > 0:
|
65
|
+
logger.info(f"⚠️ Ignoring unknown commandline args: {remaining_args}")
|
66
|
+
|
67
|
+
# Set default values for arguments
|
68
|
+
args.chat_on_gpu = not args.disable_chat_on_gpu
|
69
|
+
|
70
|
+
args.version_no = version("khoj")
|
71
|
+
if args.version:
|
72
|
+
# Show version of khoj installed and exit
|
73
|
+
print(args.version_no)
|
74
|
+
exit(0)
|
75
|
+
|
76
|
+
# Normalize config_file path to absolute path
|
77
|
+
args.config_file = resolve_absolute_path(args.config_file)
|
78
|
+
|
79
|
+
if not args.config_file.exists():
|
80
|
+
args.config = None
|
81
|
+
else:
|
82
|
+
args = run_migrations(args)
|
83
|
+
args.config = parse_config_from_file(args.config_file)
|
84
|
+
if is_env_var_true("KHOJ_TELEMETRY_DISABLE") or in_debug_mode():
|
85
|
+
args.config.app.should_log_telemetry = False
|
86
|
+
|
87
|
+
return args
|
88
|
+
|
89
|
+
|
90
|
+
def run_migrations(args):
|
91
|
+
migrations = [
|
92
|
+
migrate_config_to_version,
|
93
|
+
migrate_processor_conversation_schema,
|
94
|
+
migrate_offline_model,
|
95
|
+
migrate_offline_chat_schema,
|
96
|
+
migrate_offline_chat_default_model,
|
97
|
+
migrate_server_pg,
|
98
|
+
]
|
99
|
+
for migration in migrations:
|
100
|
+
args = migration(args)
|
101
|
+
return args
|