khoj 1.33.3.dev32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/__init__.py +0 -0
- khoj/app/README.md +94 -0
- khoj/app/__init__.py +0 -0
- khoj/app/asgi.py +16 -0
- khoj/app/settings.py +218 -0
- khoj/app/urls.py +25 -0
- khoj/configure.py +452 -0
- khoj/database/__init__.py +0 -0
- khoj/database/adapters/__init__.py +1821 -0
- khoj/database/admin.py +417 -0
- khoj/database/apps.py +6 -0
- khoj/database/management/__init__.py +0 -0
- khoj/database/management/commands/__init__.py +0 -0
- khoj/database/management/commands/change_default_model.py +116 -0
- khoj/database/management/commands/change_generated_images_url.py +61 -0
- khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
- khoj/database/migrations/0001_khojuser.py +98 -0
- khoj/database/migrations/0002_googleuser.py +32 -0
- khoj/database/migrations/0003_vector_extension.py +10 -0
- khoj/database/migrations/0004_content_types_and_more.py +181 -0
- khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
- khoj/database/migrations/0006_embeddingsdates.py +33 -0
- khoj/database/migrations/0007_add_conversation.py +27 -0
- khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
- khoj/database/migrations/0009_khojapiuser.py +24 -0
- khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
- khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
- khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
- khoj/database/migrations/0012_entry_file_source.py +21 -0
- khoj/database/migrations/0013_subscription.py +37 -0
- khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
- khoj/database/migrations/0015_alter_subscription_user.py +21 -0
- khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
- khoj/database/migrations/0017_searchmodel.py +32 -0
- khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
- khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
- khoj/database/migrations/0020_reflectivequestion.py +36 -0
- khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
- khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
- khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
- khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
- khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
- khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
- khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
- khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
- khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
- khoj/database/migrations/0029_userrequests.py +27 -0
- khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
- khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
- khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
- khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
- khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
- khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
- khoj/database/migrations/0035_processlock.py +26 -0
- khoj/database/migrations/0036_alter_processlock_name.py +19 -0
- khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
- khoj/database/migrations/0036_publicconversation.py +42 -0
- khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
- khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
- khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
- khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
- khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
- khoj/database/migrations/0040_alter_processlock_name.py +26 -0
- khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
- khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
- khoj/database/migrations/0042_serverchatsettings.py +46 -0
- khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
- khoj/database/migrations/0044_conversation_file_filters.py +17 -0
- khoj/database/migrations/0045_fileobject.py +37 -0
- khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
- khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
- khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
- khoj/database/migrations/0049_datastore.py +38 -0
- khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
- khoj/database/migrations/0050_alter_processlock_name.py +25 -0
- khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
- khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
- khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
- khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
- khoj/database/migrations/0055_alter_agent_style_icon.py +37 -0
- khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
- khoj/database/migrations/0056_searchmodelconfig_cross_encoder_model_config.py +17 -0
- khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
- khoj/database/migrations/0057_remove_serverchatsettings_default_model_and_more.py +51 -0
- khoj/database/migrations/0058_alter_chatmodeloptions_chat_model.py +17 -0
- khoj/database/migrations/0059_searchmodelconfig_bi_encoder_confidence_threshold.py +17 -0
- khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
- khoj/database/migrations/0061_alter_chatmodeloptions_model_type.py +26 -0
- khoj/database/migrations/0061_alter_texttoimagemodelconfig_model_type.py +21 -0
- khoj/database/migrations/0062_merge_20240913_0222.py +14 -0
- khoj/database/migrations/0063_conversation_temp_id.py +36 -0
- khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +86 -0
- khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
- khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
- khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
- khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
- khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
- khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
- khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
- khoj/database/migrations/0072_entry_search_model.py +24 -0
- khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
- khoj/database/migrations/0074_alter_conversation_title.py +17 -0
- khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +85 -0
- khoj/database/migrations/0076_rename_openaiprocessorconversationconfig_aimodelapi_and_more.py +26 -0
- khoj/database/migrations/0077_chatmodel_alter_agent_chat_model_and_more.py +62 -0
- khoj/database/migrations/0078_khojuser_email_verification_code_expiry.py +17 -0
- khoj/database/migrations/__init__.py +0 -0
- khoj/database/models/__init__.py +725 -0
- khoj/database/tests.py +3 -0
- khoj/interface/compiled/404/index.html +1 -0
- khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_buildManifest.js +1 -0
- khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_ssgManifest.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1010-8f39bb4648b5ba10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/182-f1c48a203dc91e0e.js +20 -0
- khoj/interface/compiled/_next/static/chunks/1915-d3c36ad6ce697ce7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/2117-165ef4747a5b836b.js +2 -0
- khoj/interface/compiled/_next/static/chunks/2581-455000f8aeb08fc3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3727.dcea8f2193111552.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3789-a09e37a819171a9d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/4124-6c28322ce218d2d5.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5427-b52d95253e692bfa.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5473-b1cf56dedac6577a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5477-0bbddb79c25a54a7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6065-64db9ad305ba0bcd.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6293-469dd16402ea8a6f.js +3 -0
- khoj/interface/compiled/_next/static/chunks/688-b5b4391bbc0376f1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8667-b6bf63c72b2d76eb.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9259-1172dbaca0515237.js +1 -0
- khoj/interface/compiled/_next/static/chunks/94ca1967.1d9b42d929a1ee8c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9597.83583248dfbf6e73.js +1 -0
- khoj/interface/compiled/_next/static/chunks/964ecbae.51d6faf8801d15e6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9665-391df1e5c51c960a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/_not-found/page-a834eddae3e235df.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/page-28ce086a1129bca2.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/layout-1fe1537449f43496.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/page-bf365a60829d347f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-0e476e57eb2015e3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/layout-30e7fda7262713ce.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/page-a5515ea71aec5ef0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/page-9140541e67ea307d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/settings/layout-d09d6510a45cd4bd.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/settings/page-951ba40b5b94b23a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1beb80d8d741c932.js +1 -0
- khoj/interface/compiled/_next/static/chunks/d3ac728e-44ebd2a0c99b12a0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/fd9d1056-4482b99a36fd1673.js +1 -0
- khoj/interface/compiled/_next/static/chunks/framework-8e0e0f4a6b83a956.js +1 -0
- khoj/interface/compiled/_next/static/chunks/main-app-de1f09df97a3cfc7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/main-db4bfac6b0a8d00b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/pages/_app-3c9ca398d360b709.js +1 -0
- khoj/interface/compiled/_next/static/chunks/pages/_error-cf5ca766ac8f493f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/webpack-a03962458328b163.js +1 -0
- khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +1 -0
- khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
- khoj/interface/compiled/_next/static/css/4e4e6a4a1c920d06.css +1 -0
- khoj/interface/compiled/_next/static/css/8d02837c730f8d13.css +25 -0
- khoj/interface/compiled/_next/static/css/8e6a3ca11a60b189.css +1 -0
- khoj/interface/compiled/_next/static/css/9c164d9727dd8092.css +1 -0
- khoj/interface/compiled/_next/static/css/dac88c17aaee5fcf.css +1 -0
- khoj/interface/compiled/_next/static/css/df4b47a2d0d85eae.css +1 -0
- khoj/interface/compiled/_next/static/css/e4eb883b5265d372.css +1 -0
- khoj/interface/compiled/_next/static/media/1d8a05b60287ae6c-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/6f22fce21a7c433c-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/77c207b095007c34-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/82ef96de0e8f4d8c-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.1608a09b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.4aafdb68.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.a79f1c31.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.b6770918.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.cce5b8ec.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.ec17d132.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.07ef19e7.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.55fac258.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.dad44a7f.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.9f256b85.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.b18f59e1.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.d42a5579.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.7c187121.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.d3c882a6.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.ed38e79f.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.b74a1a8b.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.c3fb5ac2.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.d181c465.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.6f2bb1df.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.70d8b0a5.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.e3f82f9d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.47373d1e.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.8916142b.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.9024d815.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.0462f03b.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.7f51fe03.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.b7f8fe9b.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.572d331f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.a879cf83.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.f1035d8d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.5295ba48.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.939bc644.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.f28c23ac.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.8c5b5494.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.94e1e8dc.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.bf59d231.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.3b1e59b3.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.7c9bc82b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.b4c20c84.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.74048478.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.ba21ed5f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.d4d7ba48.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.03e9641d.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.07505710.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.fe9cbbe1.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.e1e279cb.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.eae34984.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.fabc004a.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.57727022.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.5916a24f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.d6b476ec.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.9acaf01c.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.a144ef58.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.b4230e7e.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.10d95fd3.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.7a996c9d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.fbccdabe.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.6258592b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.a8709e36.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.d97aaf4a.ttf +0 -0
- khoj/interface/compiled/_next/static/media/a6ecd16fa044d500-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/bd82c78e5b7b3fe9-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/c32c8052c071fc42-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/c4250770ab8708b6-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/flags.3afdda2f.webp +0 -0
- khoj/interface/compiled/_next/static/media/flags@2x.5fbe9fc1.webp +0 -0
- khoj/interface/compiled/_next/static/media/globe.98e105ca.webp +0 -0
- khoj/interface/compiled/_next/static/media/globe@2x.974df6f8.webp +0 -0
- khoj/interface/compiled/agents/index.html +1 -0
- khoj/interface/compiled/agents/index.txt +7 -0
- khoj/interface/compiled/agents.svg +6 -0
- khoj/interface/compiled/assets/icons/khoj_lantern.ico +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern.svg +100 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_1200x1200.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_128x128.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_128x128_dark.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_256x256.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_512x512.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_logomarktype_1200x630.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-browse-draw-sample.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-plain-chat-sample.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-remember-plan-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-browse-draw-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-plain-chat-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-remember-plan-sample.png +0 -0
- khoj/interface/compiled/automation.svg +37 -0
- khoj/interface/compiled/automations/index.html +1 -0
- khoj/interface/compiled/automations/index.txt +8 -0
- khoj/interface/compiled/chat/index.html +1 -0
- khoj/interface/compiled/chat/index.txt +7 -0
- khoj/interface/compiled/chat.svg +24 -0
- khoj/interface/compiled/close.svg +5 -0
- khoj/interface/compiled/copy-button-success.svg +6 -0
- khoj/interface/compiled/copy-button.svg +5 -0
- khoj/interface/compiled/index.html +1 -0
- khoj/interface/compiled/index.txt +7 -0
- khoj/interface/compiled/khoj.webmanifest +76 -0
- khoj/interface/compiled/logo.svg +24 -0
- khoj/interface/compiled/search/index.html +1 -0
- khoj/interface/compiled/search/index.txt +7 -0
- khoj/interface/compiled/send.svg +1 -0
- khoj/interface/compiled/settings/index.html +1 -0
- khoj/interface/compiled/settings/index.txt +9 -0
- khoj/interface/compiled/share/chat/index.html +1 -0
- khoj/interface/compiled/share/chat/index.txt +7 -0
- khoj/interface/compiled/share.svg +8 -0
- khoj/interface/compiled/thumbs-down.svg +6 -0
- khoj/interface/compiled/thumbs-up.svg +6 -0
- khoj/interface/email/feedback.html +34 -0
- khoj/interface/email/magic_link.html +40 -0
- khoj/interface/email/task.html +37 -0
- khoj/interface/email/welcome.html +90 -0
- khoj/interface/web/.well-known/assetlinks.json +11 -0
- khoj/interface/web/assets/icons/agents.svg +19 -0
- khoj/interface/web/assets/icons/automation.svg +43 -0
- khoj/interface/web/assets/icons/chat.svg +24 -0
- khoj/interface/web/assets/icons/github.svg +1 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways.svg +32 -0
- khoj/interface/web/assets/icons/khoj.svg +26 -0
- khoj/interface/web/assets/icons/logotype.svg +1 -0
- khoj/interface/web/assets/icons/search.svg +57 -0
- khoj/interface/web/assets/icons/sync.svg +4 -0
- khoj/interface/web/assets/khoj.css +237 -0
- khoj/interface/web/assets/utils.js +33 -0
- khoj/interface/web/base_config.html +445 -0
- khoj/interface/web/content_source_github_input.html +208 -0
- khoj/interface/web/login.html +310 -0
- khoj/interface/web/utils.html +48 -0
- khoj/main.py +249 -0
- khoj/manage.py +22 -0
- khoj/migrations/__init__.py +0 -0
- khoj/migrations/migrate_offline_chat_default_model.py +69 -0
- khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
- khoj/migrations/migrate_offline_chat_schema.py +83 -0
- khoj/migrations/migrate_offline_model.py +29 -0
- khoj/migrations/migrate_processor_config_openai.py +67 -0
- khoj/migrations/migrate_server_pg.py +132 -0
- khoj/migrations/migrate_version.py +17 -0
- khoj/processor/__init__.py +0 -0
- khoj/processor/content/__init__.py +0 -0
- khoj/processor/content/docx/__init__.py +0 -0
- khoj/processor/content/docx/docx_to_entries.py +111 -0
- khoj/processor/content/github/__init__.py +0 -0
- khoj/processor/content/github/github_to_entries.py +226 -0
- khoj/processor/content/images/__init__.py +0 -0
- khoj/processor/content/images/image_to_entries.py +117 -0
- khoj/processor/content/markdown/__init__.py +0 -0
- khoj/processor/content/markdown/markdown_to_entries.py +160 -0
- khoj/processor/content/notion/notion_to_entries.py +259 -0
- khoj/processor/content/org_mode/__init__.py +0 -0
- khoj/processor/content/org_mode/org_to_entries.py +226 -0
- khoj/processor/content/org_mode/orgnode.py +532 -0
- khoj/processor/content/pdf/__init__.py +0 -0
- khoj/processor/content/pdf/pdf_to_entries.py +119 -0
- khoj/processor/content/plaintext/__init__.py +0 -0
- khoj/processor/content/plaintext/plaintext_to_entries.py +117 -0
- khoj/processor/content/text_to_entries.py +296 -0
- khoj/processor/conversation/__init__.py +0 -0
- khoj/processor/conversation/anthropic/__init__.py +0 -0
- khoj/processor/conversation/anthropic/anthropic_chat.py +243 -0
- khoj/processor/conversation/anthropic/utils.py +217 -0
- khoj/processor/conversation/google/__init__.py +0 -0
- khoj/processor/conversation/google/gemini_chat.py +253 -0
- khoj/processor/conversation/google/utils.py +260 -0
- khoj/processor/conversation/offline/__init__.py +0 -0
- khoj/processor/conversation/offline/chat_model.py +308 -0
- khoj/processor/conversation/offline/utils.py +80 -0
- khoj/processor/conversation/offline/whisper.py +15 -0
- khoj/processor/conversation/openai/__init__.py +0 -0
- khoj/processor/conversation/openai/gpt.py +243 -0
- khoj/processor/conversation/openai/utils.py +232 -0
- khoj/processor/conversation/openai/whisper.py +13 -0
- khoj/processor/conversation/prompts.py +1188 -0
- khoj/processor/conversation/utils.py +867 -0
- khoj/processor/embeddings.py +122 -0
- khoj/processor/image/generate.py +215 -0
- khoj/processor/speech/__init__.py +0 -0
- khoj/processor/speech/text_to_speech.py +51 -0
- khoj/processor/tools/__init__.py +0 -0
- khoj/processor/tools/online_search.py +472 -0
- khoj/processor/tools/run_code.py +179 -0
- khoj/routers/__init__.py +0 -0
- khoj/routers/api.py +760 -0
- khoj/routers/api_agents.py +295 -0
- khoj/routers/api_chat.py +1273 -0
- khoj/routers/api_content.py +634 -0
- khoj/routers/api_model.py +123 -0
- khoj/routers/api_phone.py +86 -0
- khoj/routers/api_subscription.py +144 -0
- khoj/routers/auth.py +307 -0
- khoj/routers/email.py +135 -0
- khoj/routers/helpers.py +2333 -0
- khoj/routers/notion.py +85 -0
- khoj/routers/research.py +364 -0
- khoj/routers/storage.py +63 -0
- khoj/routers/twilio.py +36 -0
- khoj/routers/web_client.py +141 -0
- khoj/search_filter/__init__.py +0 -0
- khoj/search_filter/base_filter.py +15 -0
- khoj/search_filter/date_filter.py +215 -0
- khoj/search_filter/file_filter.py +32 -0
- khoj/search_filter/word_filter.py +29 -0
- khoj/search_type/__init__.py +0 -0
- khoj/search_type/text_search.py +255 -0
- khoj/utils/__init__.py +0 -0
- khoj/utils/cli.py +101 -0
- khoj/utils/config.py +81 -0
- khoj/utils/constants.py +51 -0
- khoj/utils/fs_syncer.py +252 -0
- khoj/utils/helpers.py +627 -0
- khoj/utils/initialization.py +301 -0
- khoj/utils/jsonl.py +43 -0
- khoj/utils/models.py +47 -0
- khoj/utils/rawconfig.py +208 -0
- khoj/utils/state.py +48 -0
- khoj/utils/yaml.py +47 -0
- khoj-1.33.3.dev32.dist-info/METADATA +190 -0
- khoj-1.33.3.dev32.dist-info/RECORD +393 -0
- khoj-1.33.3.dev32.dist-info/WHEEL +4 -0
- khoj-1.33.3.dev32.dist-info/entry_points.txt +2 -0
- khoj-1.33.3.dev32.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,69 @@
|
|
1
|
+
"""
|
2
|
+
Current format of khoj.yml
|
3
|
+
---
|
4
|
+
app:
|
5
|
+
...
|
6
|
+
content-type:
|
7
|
+
...
|
8
|
+
processor:
|
9
|
+
conversation:
|
10
|
+
offline-chat:
|
11
|
+
enable-offline-chat: false
|
12
|
+
chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
|
13
|
+
...
|
14
|
+
search-type:
|
15
|
+
...
|
16
|
+
|
17
|
+
New format of khoj.yml
|
18
|
+
---
|
19
|
+
app:
|
20
|
+
...
|
21
|
+
content-type:
|
22
|
+
...
|
23
|
+
processor:
|
24
|
+
conversation:
|
25
|
+
offline-chat:
|
26
|
+
enable-offline-chat: false
|
27
|
+
chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
28
|
+
...
|
29
|
+
search-type:
|
30
|
+
...
|
31
|
+
"""
|
32
|
+
import logging
|
33
|
+
|
34
|
+
from packaging import version
|
35
|
+
|
36
|
+
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
37
|
+
|
38
|
+
logger = logging.getLogger(__name__)
|
39
|
+
|
40
|
+
|
41
|
+
def migrate_offline_chat_default_model(args):
|
42
|
+
schema_version = "0.12.4"
|
43
|
+
raw_config = load_config_from_file(args.config_file)
|
44
|
+
previous_version = raw_config.get("version")
|
45
|
+
|
46
|
+
if "processor" not in raw_config:
|
47
|
+
return args
|
48
|
+
if raw_config["processor"] is None:
|
49
|
+
return args
|
50
|
+
if "conversation" not in raw_config["processor"]:
|
51
|
+
return args
|
52
|
+
if "offline-chat" not in raw_config["processor"]["conversation"]:
|
53
|
+
return args
|
54
|
+
if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]:
|
55
|
+
return args
|
56
|
+
|
57
|
+
if previous_version is None or version.parse(previous_version) < version.parse("0.12.4"):
|
58
|
+
logger.info(
|
59
|
+
f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF"
|
60
|
+
)
|
61
|
+
raw_config["version"] = schema_version
|
62
|
+
|
63
|
+
# Update offline chat model to mistral in GGUF format to use latest GPT4All
|
64
|
+
offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"]
|
65
|
+
if offline_chat_model.endswith(".bin"):
|
66
|
+
raw_config["processor"]["conversation"]["offline-chat"]["chat-model"] = "mistral-7b-instruct-v0.1.Q4_0.gguf"
|
67
|
+
|
68
|
+
save_config_to_file(raw_config, args.config_file)
|
69
|
+
return args
|
@@ -0,0 +1,71 @@
|
|
1
|
+
"""
|
2
|
+
Current format of khoj.yml
|
3
|
+
---
|
4
|
+
app:
|
5
|
+
...
|
6
|
+
content-type:
|
7
|
+
...
|
8
|
+
processor:
|
9
|
+
conversation:
|
10
|
+
offline-chat:
|
11
|
+
enable-offline-chat: false
|
12
|
+
chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
13
|
+
...
|
14
|
+
search-type:
|
15
|
+
...
|
16
|
+
|
17
|
+
New format of khoj.yml
|
18
|
+
---
|
19
|
+
app:
|
20
|
+
...
|
21
|
+
content-type:
|
22
|
+
...
|
23
|
+
processor:
|
24
|
+
conversation:
|
25
|
+
offline-chat:
|
26
|
+
enable-offline-chat: false
|
27
|
+
chat-model: NousResearch/Hermes-2-Pro-Mistral-7B-GGUF
|
28
|
+
...
|
29
|
+
search-type:
|
30
|
+
...
|
31
|
+
"""
|
32
|
+
import logging
|
33
|
+
|
34
|
+
from packaging import version
|
35
|
+
|
36
|
+
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
37
|
+
|
38
|
+
logger = logging.getLogger(__name__)
|
39
|
+
|
40
|
+
|
41
|
+
def migrate_offline_chat_default_model(args):
|
42
|
+
schema_version = "1.7.0"
|
43
|
+
raw_config = load_config_from_file(args.config_file)
|
44
|
+
previous_version = raw_config.get("version")
|
45
|
+
|
46
|
+
if "processor" not in raw_config:
|
47
|
+
return args
|
48
|
+
if raw_config["processor"] is None:
|
49
|
+
return args
|
50
|
+
if "conversation" not in raw_config["processor"]:
|
51
|
+
return args
|
52
|
+
if "offline-chat" not in raw_config["processor"]["conversation"]:
|
53
|
+
return args
|
54
|
+
if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]:
|
55
|
+
return args
|
56
|
+
|
57
|
+
if previous_version is None or version.parse(previous_version) < version.parse(schema_version):
|
58
|
+
logger.info(
|
59
|
+
f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF"
|
60
|
+
)
|
61
|
+
raw_config["version"] = schema_version
|
62
|
+
|
63
|
+
# Update offline chat model to use Nous Research's Hermes-2-Pro GGUF in path format suitable for llama-cpp
|
64
|
+
offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"]
|
65
|
+
if offline_chat_model == "mistral-7b-instruct-v0.1.Q4_0.gguf":
|
66
|
+
raw_config["processor"]["conversation"]["offline-chat"][
|
67
|
+
"chat-model"
|
68
|
+
] = "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF"
|
69
|
+
|
70
|
+
save_config_to_file(raw_config, args.config_file)
|
71
|
+
return args
|
@@ -0,0 +1,83 @@
|
|
1
|
+
"""
|
2
|
+
Current format of khoj.yml
|
3
|
+
---
|
4
|
+
app:
|
5
|
+
...
|
6
|
+
content-type:
|
7
|
+
...
|
8
|
+
processor:
|
9
|
+
conversation:
|
10
|
+
enable-offline-chat: false
|
11
|
+
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
12
|
+
openai:
|
13
|
+
...
|
14
|
+
search-type:
|
15
|
+
...
|
16
|
+
|
17
|
+
New format of khoj.yml
|
18
|
+
---
|
19
|
+
app:
|
20
|
+
...
|
21
|
+
content-type:
|
22
|
+
...
|
23
|
+
processor:
|
24
|
+
conversation:
|
25
|
+
offline-chat:
|
26
|
+
enable-offline-chat: false
|
27
|
+
chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
|
28
|
+
tokenizer: null
|
29
|
+
max_prompt_size: null
|
30
|
+
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
31
|
+
openai:
|
32
|
+
...
|
33
|
+
search-type:
|
34
|
+
...
|
35
|
+
"""
|
36
|
+
import logging
|
37
|
+
|
38
|
+
from packaging import version
|
39
|
+
|
40
|
+
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
41
|
+
|
42
|
+
logger = logging.getLogger(__name__)
|
43
|
+
|
44
|
+
|
45
|
+
def migrate_offline_chat_schema(args):
|
46
|
+
schema_version = "0.12.3"
|
47
|
+
raw_config = load_config_from_file(args.config_file)
|
48
|
+
previous_version = raw_config.get("version")
|
49
|
+
|
50
|
+
if "processor" not in raw_config:
|
51
|
+
return args
|
52
|
+
if raw_config["processor"] is None:
|
53
|
+
return args
|
54
|
+
if "conversation" not in raw_config["processor"]:
|
55
|
+
return args
|
56
|
+
|
57
|
+
if previous_version is None or version.parse(previous_version) < version.parse("0.12.3"):
|
58
|
+
logger.info(
|
59
|
+
f"Upgrading config schema to {schema_version} from {previous_version} to make (offline) chat more configuration"
|
60
|
+
)
|
61
|
+
raw_config["version"] = schema_version
|
62
|
+
|
63
|
+
# Create max-prompt-size field in conversation processor schema
|
64
|
+
raw_config["processor"]["conversation"]["max-prompt-size"] = None
|
65
|
+
raw_config["processor"]["conversation"]["tokenizer"] = None
|
66
|
+
|
67
|
+
# Create offline chat schema based on existing enable_offline_chat field in khoj config schema
|
68
|
+
offline_chat_model = (
|
69
|
+
raw_config["processor"]["conversation"]
|
70
|
+
.get("offline-chat", {})
|
71
|
+
.get("chat-model", "llama-2-7b-chat.ggmlv3.q4_0.bin")
|
72
|
+
)
|
73
|
+
raw_config["processor"]["conversation"]["offline-chat"] = {
|
74
|
+
"enable-offline-chat": raw_config["processor"]["conversation"].get("enable-offline-chat", False),
|
75
|
+
"chat-model": offline_chat_model,
|
76
|
+
}
|
77
|
+
|
78
|
+
# Delete old enable-offline-chat field from conversation processor schema
|
79
|
+
if "enable-offline-chat" in raw_config["processor"]["conversation"]:
|
80
|
+
del raw_config["processor"]["conversation"]["enable-offline-chat"]
|
81
|
+
|
82
|
+
save_config_to_file(raw_config, args.config_file)
|
83
|
+
return args
|
@@ -0,0 +1,29 @@
|
|
1
|
+
import logging
|
2
|
+
import os
|
3
|
+
|
4
|
+
from packaging import version
|
5
|
+
|
6
|
+
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
7
|
+
|
8
|
+
logger = logging.getLogger(__name__)
|
9
|
+
|
10
|
+
|
11
|
+
def migrate_offline_model(args):
|
12
|
+
schema_version = "0.10.1"
|
13
|
+
raw_config = load_config_from_file(args.config_file)
|
14
|
+
previous_version = raw_config.get("version")
|
15
|
+
|
16
|
+
if previous_version is None or version.parse(previous_version) < version.parse("0.10.1"):
|
17
|
+
logger.info(
|
18
|
+
f"Migrating offline model used for version {previous_version} to latest version for {args.version_no}"
|
19
|
+
)
|
20
|
+
raw_config["version"] = schema_version
|
21
|
+
|
22
|
+
# If the user has downloaded the offline model, remove it from the cache.
|
23
|
+
offline_model_path = os.path.expanduser("~/.cache/gpt4all/llama-2-7b-chat.ggmlv3.q4_K_S.bin")
|
24
|
+
if os.path.exists(offline_model_path):
|
25
|
+
os.remove(offline_model_path)
|
26
|
+
|
27
|
+
save_config_to_file(raw_config, args.config_file)
|
28
|
+
|
29
|
+
return args
|
@@ -0,0 +1,67 @@
|
|
1
|
+
"""
|
2
|
+
Current format of khoj.yml
|
3
|
+
---
|
4
|
+
app:
|
5
|
+
should-log-telemetry: true
|
6
|
+
content-type:
|
7
|
+
...
|
8
|
+
processor:
|
9
|
+
conversation:
|
10
|
+
chat-model: gpt-3.5-turbo
|
11
|
+
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
12
|
+
model: text-davinci-003
|
13
|
+
openai-api-key: sk-secret-key
|
14
|
+
search-type:
|
15
|
+
...
|
16
|
+
|
17
|
+
New format of khoj.yml
|
18
|
+
---
|
19
|
+
app:
|
20
|
+
should-log-telemetry: true
|
21
|
+
content-type:
|
22
|
+
...
|
23
|
+
processor:
|
24
|
+
conversation:
|
25
|
+
openai:
|
26
|
+
chat-model: gpt-3.5-turbo
|
27
|
+
openai-api-key: sk-secret-key
|
28
|
+
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
29
|
+
enable-offline-chat: false
|
30
|
+
search-type:
|
31
|
+
...
|
32
|
+
"""
|
33
|
+
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
34
|
+
|
35
|
+
|
36
|
+
def migrate_processor_conversation_schema(args):
|
37
|
+
schema_version = "0.10.0"
|
38
|
+
raw_config = load_config_from_file(args.config_file)
|
39
|
+
|
40
|
+
if "processor" not in raw_config:
|
41
|
+
return args
|
42
|
+
if raw_config["processor"] is None:
|
43
|
+
return args
|
44
|
+
if "conversation" not in raw_config["processor"]:
|
45
|
+
return args
|
46
|
+
|
47
|
+
current_openai_api_key = raw_config["processor"]["conversation"].get("openai-api-key", None)
|
48
|
+
current_chat_model = raw_config["processor"]["conversation"].get("chat-model", None)
|
49
|
+
if current_openai_api_key is None and current_chat_model is None:
|
50
|
+
return args
|
51
|
+
|
52
|
+
raw_config["version"] = schema_version
|
53
|
+
|
54
|
+
# Add enable_offline_chat to khoj config schema
|
55
|
+
if "enable-offline-chat" not in raw_config["processor"]["conversation"]:
|
56
|
+
raw_config["processor"]["conversation"]["enable-offline-chat"] = False
|
57
|
+
|
58
|
+
# Update conversation processor schema
|
59
|
+
conversation_logfile = raw_config["processor"]["conversation"].get("conversation-logfile", None)
|
60
|
+
raw_config["processor"]["conversation"] = {
|
61
|
+
"openai": {"chat-model": current_chat_model, "api-key": current_openai_api_key},
|
62
|
+
"conversation-logfile": conversation_logfile,
|
63
|
+
"enable-offline-chat": False,
|
64
|
+
}
|
65
|
+
|
66
|
+
save_config_to_file(raw_config, args.config_file)
|
67
|
+
return args
|
@@ -0,0 +1,132 @@
|
|
1
|
+
"""
|
2
|
+
The application config currently looks like this:
|
3
|
+
app:
|
4
|
+
should-log-telemetry: true
|
5
|
+
content-type:
|
6
|
+
...
|
7
|
+
processor:
|
8
|
+
conversation:
|
9
|
+
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
10
|
+
max-prompt-size: null
|
11
|
+
offline-chat:
|
12
|
+
chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
13
|
+
enable-offline-chat: false
|
14
|
+
openai:
|
15
|
+
api-key: sk-blah
|
16
|
+
chat-model: gpt-3.5-turbo
|
17
|
+
tokenizer: null
|
18
|
+
search-type:
|
19
|
+
asymmetric:
|
20
|
+
cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
|
21
|
+
encoder: sentence-transformers/multi-qa-MiniLM-L6-cos-v1
|
22
|
+
encoder-type: null
|
23
|
+
model-directory: /Users/si/.khoj/search/asymmetric
|
24
|
+
image:
|
25
|
+
encoder: sentence-transformers/clip-ViT-B-32
|
26
|
+
encoder-type: null
|
27
|
+
model-directory: /Users/si/.khoj/search/image
|
28
|
+
symmetric:
|
29
|
+
cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
|
30
|
+
encoder: sentence-transformers/all-MiniLM-L6-v2
|
31
|
+
encoder-type: null
|
32
|
+
model-directory: ~/.khoj/search/symmetric
|
33
|
+
version: 0.14.0
|
34
|
+
|
35
|
+
|
36
|
+
The new version will looks like this:
|
37
|
+
app:
|
38
|
+
should-log-telemetry: true
|
39
|
+
processor:
|
40
|
+
conversation:
|
41
|
+
offline-chat:
|
42
|
+
enabled: false
|
43
|
+
openai:
|
44
|
+
api-key: sk-blah
|
45
|
+
chat-model-options:
|
46
|
+
- chat-model: gpt-3.5-turbo
|
47
|
+
tokenizer: null
|
48
|
+
type: openai
|
49
|
+
- chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
50
|
+
tokenizer: null
|
51
|
+
type: offline
|
52
|
+
search-type:
|
53
|
+
asymmetric:
|
54
|
+
cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
|
55
|
+
encoder: sentence-transformers/multi-qa-MiniLM-L6-cos-v1
|
56
|
+
version: 0.15.0
|
57
|
+
"""
|
58
|
+
|
59
|
+
import logging
|
60
|
+
|
61
|
+
from packaging import version
|
62
|
+
|
63
|
+
from khoj.database.models import AiModelApi, ChatModel, SearchModelConfig
|
64
|
+
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
65
|
+
|
66
|
+
logger = logging.getLogger(__name__)
|
67
|
+
|
68
|
+
|
69
|
+
def migrate_server_pg(args):
|
70
|
+
schema_version = "0.15.0"
|
71
|
+
raw_config = load_config_from_file(args.config_file)
|
72
|
+
previous_version = raw_config.get("version")
|
73
|
+
|
74
|
+
if previous_version is None or version.parse(previous_version) < version.parse(schema_version):
|
75
|
+
logger.info(
|
76
|
+
f"Migrating configuration used for version {previous_version} to latest version for server with postgres in {args.version_no}"
|
77
|
+
)
|
78
|
+
raw_config["version"] = schema_version
|
79
|
+
|
80
|
+
if raw_config is None:
|
81
|
+
return args
|
82
|
+
|
83
|
+
if "search-type" in raw_config and raw_config["search-type"]:
|
84
|
+
if "asymmetric" in raw_config["search-type"]:
|
85
|
+
# Delete all existing search models
|
86
|
+
SearchModelConfig.objects.filter(model_type=SearchModelConfig.ModelType.TEXT).delete()
|
87
|
+
# Create new search model from existing Khoj YAML config
|
88
|
+
asymmetric_search = raw_config["search-type"]["asymmetric"]
|
89
|
+
SearchModelConfig.objects.create(
|
90
|
+
name="default",
|
91
|
+
model_type=SearchModelConfig.ModelType.TEXT,
|
92
|
+
bi_encoder=asymmetric_search.get("encoder"),
|
93
|
+
cross_encoder=asymmetric_search.get("cross-encoder"),
|
94
|
+
)
|
95
|
+
|
96
|
+
if "processor" in raw_config and raw_config["processor"] and "conversation" in raw_config["processor"]:
|
97
|
+
processor_conversation = raw_config["processor"]["conversation"]
|
98
|
+
|
99
|
+
if "offline-chat" in raw_config["processor"]["conversation"]:
|
100
|
+
offline_chat = raw_config["processor"]["conversation"]["offline-chat"]
|
101
|
+
ChatModel.objects.create(
|
102
|
+
name=offline_chat.get("chat-model"),
|
103
|
+
tokenizer=processor_conversation.get("tokenizer"),
|
104
|
+
max_prompt_size=processor_conversation.get("max-prompt-size"),
|
105
|
+
model_type=ChatModel.ModelType.OFFLINE,
|
106
|
+
)
|
107
|
+
|
108
|
+
if (
|
109
|
+
"openai" in raw_config["processor"]["conversation"]
|
110
|
+
and raw_config["processor"]["conversation"]["openai"]
|
111
|
+
):
|
112
|
+
openai = raw_config["processor"]["conversation"]["openai"]
|
113
|
+
|
114
|
+
if openai.get("api-key") is None:
|
115
|
+
logger.error("OpenAI API Key is not set. Will not be migrating OpenAI config.")
|
116
|
+
else:
|
117
|
+
if openai.get("chat-model") is None:
|
118
|
+
openai["chat-model"] = "gpt-3.5-turbo"
|
119
|
+
|
120
|
+
openai_model_api = AiModelApi.objects.create(api_key=openai.get("api-key"), name="default")
|
121
|
+
|
122
|
+
ChatModel.objects.create(
|
123
|
+
name=openai.get("chat-model"),
|
124
|
+
tokenizer=processor_conversation.get("tokenizer"),
|
125
|
+
max_prompt_size=processor_conversation.get("max-prompt-size"),
|
126
|
+
model_type=ChatModel.ModelType.OPENAI,
|
127
|
+
ai_model_api=openai_model_api,
|
128
|
+
)
|
129
|
+
|
130
|
+
save_config_to_file(raw_config, args.config_file)
|
131
|
+
|
132
|
+
return args
|
@@ -0,0 +1,17 @@
|
|
1
|
+
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
2
|
+
|
3
|
+
|
4
|
+
def migrate_config_to_version(args):
|
5
|
+
schema_version = "0.9.0"
|
6
|
+
raw_config = load_config_from_file(args.config_file)
|
7
|
+
|
8
|
+
# Add version to khoj config schema
|
9
|
+
if "version" not in raw_config:
|
10
|
+
raw_config["version"] = schema_version
|
11
|
+
save_config_to_file(raw_config, args.config_file)
|
12
|
+
|
13
|
+
# regenerate khoj index on first start of this version
|
14
|
+
# this should refresh index and apply index corruption fixes from #325
|
15
|
+
args.regenerate = True
|
16
|
+
|
17
|
+
return args
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,111 @@
|
|
1
|
+
import logging
|
2
|
+
import tempfile
|
3
|
+
from typing import Dict, List, Tuple
|
4
|
+
|
5
|
+
from langchain_community.document_loaders import Docx2txtLoader
|
6
|
+
|
7
|
+
from khoj.database.models import Entry as DbEntry
|
8
|
+
from khoj.database.models import KhojUser
|
9
|
+
from khoj.processor.content.text_to_entries import TextToEntries
|
10
|
+
from khoj.utils.helpers import timer
|
11
|
+
from khoj.utils.rawconfig import Entry
|
12
|
+
|
13
|
+
logger = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
|
16
|
+
class DocxToEntries(TextToEntries):
|
17
|
+
def __init__(self):
|
18
|
+
super().__init__()
|
19
|
+
|
20
|
+
# Define Functions
|
21
|
+
def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
|
22
|
+
# Extract required fields from config
|
23
|
+
deletion_file_names = set([file for file in files if files[file] == b""])
|
24
|
+
files_to_process = set(files) - deletion_file_names
|
25
|
+
files = {file: files[file] for file in files_to_process}
|
26
|
+
|
27
|
+
# Extract Entries from specified Docx files
|
28
|
+
with timer("Extract entries from specified DOCX files", logger):
|
29
|
+
file_to_text_map, current_entries = DocxToEntries.extract_docx_entries(files)
|
30
|
+
|
31
|
+
# Split entries by max tokens supported by model
|
32
|
+
with timer("Split entries by max token size supported by model", logger):
|
33
|
+
current_entries = self.split_entries_by_max_tokens(current_entries, max_tokens=256)
|
34
|
+
|
35
|
+
# Identify, mark and merge any new entries with previous entries
|
36
|
+
with timer("Identify new or updated entries", logger):
|
37
|
+
num_new_embeddings, num_deleted_embeddings = self.update_embeddings(
|
38
|
+
user,
|
39
|
+
current_entries,
|
40
|
+
DbEntry.EntryType.DOCX,
|
41
|
+
DbEntry.EntrySource.COMPUTER,
|
42
|
+
"compiled",
|
43
|
+
logger,
|
44
|
+
deletion_file_names,
|
45
|
+
regenerate=regenerate,
|
46
|
+
file_to_text_map=file_to_text_map,
|
47
|
+
)
|
48
|
+
|
49
|
+
return num_new_embeddings, num_deleted_embeddings
|
50
|
+
|
51
|
+
@staticmethod
|
52
|
+
def extract_docx_entries(docx_files) -> Tuple[Dict, List[Entry]]:
|
53
|
+
"""Extract entries from specified DOCX files"""
|
54
|
+
|
55
|
+
entries: List[str] = []
|
56
|
+
entry_to_location_map: List[Tuple[str, str]] = []
|
57
|
+
file_to_text_map = dict()
|
58
|
+
for docx_file in docx_files:
|
59
|
+
try:
|
60
|
+
docx_texts = DocxToEntries.extract_text(docx_files[docx_file])
|
61
|
+
entry_to_location_map += zip(docx_texts, [docx_file] * len(docx_texts))
|
62
|
+
entries.extend(docx_texts)
|
63
|
+
file_to_text_map[docx_file] = docx_texts
|
64
|
+
except Exception as e:
|
65
|
+
logger.warning(f"Unable to extract entries from file: {docx_file}")
|
66
|
+
logger.warning(e, exc_info=True)
|
67
|
+
return file_to_text_map, DocxToEntries.convert_docx_entries_to_maps(entries, dict(entry_to_location_map))
|
68
|
+
|
69
|
+
@staticmethod
|
70
|
+
def convert_docx_entries_to_maps(parsed_entries: List[str], entry_to_file_map) -> List[Entry]:
|
71
|
+
"""Convert each DOCX entry into a dictionary"""
|
72
|
+
entries = []
|
73
|
+
for parsed_entry in parsed_entries:
|
74
|
+
entry_filename = entry_to_file_map[parsed_entry]
|
75
|
+
# Append base filename to compiled entry for context to model
|
76
|
+
heading = f"{entry_filename}\n"
|
77
|
+
compiled_entry = f"{heading}{parsed_entry}"
|
78
|
+
entries.append(
|
79
|
+
Entry(
|
80
|
+
compiled=compiled_entry,
|
81
|
+
raw=parsed_entry,
|
82
|
+
heading=heading,
|
83
|
+
file=f"{entry_filename}",
|
84
|
+
)
|
85
|
+
)
|
86
|
+
|
87
|
+
logger.debug(f"Converted {len(parsed_entries)} DOCX entries to dictionaries")
|
88
|
+
|
89
|
+
return entries
|
90
|
+
|
91
|
+
@staticmethod
|
92
|
+
def extract_text(docx_file):
|
93
|
+
"""Extract text from specified DOCX file"""
|
94
|
+
try:
|
95
|
+
docx_entry_by_pages = []
|
96
|
+
# Create temp file with .docx extension that gets auto-deleted
|
97
|
+
with tempfile.NamedTemporaryFile(suffix=".docx", delete=True) as tmp:
|
98
|
+
tmp.write(docx_file)
|
99
|
+
tmp.flush() # Ensure all data is written
|
100
|
+
|
101
|
+
# Load the content using Docx2txtLoader
|
102
|
+
loader = Docx2txtLoader(tmp.name)
|
103
|
+
docx_entries_per_file = loader.load()
|
104
|
+
|
105
|
+
# Convert the loaded entries into the desired format
|
106
|
+
docx_entry_by_pages = [page.page_content for page in docx_entries_per_file]
|
107
|
+
except Exception as e:
|
108
|
+
logger.warning(f"Unable to extract text from file: {docx_file}")
|
109
|
+
logger.warning(e, exc_info=True)
|
110
|
+
|
111
|
+
return docx_entry_by_pages
|
File without changes
|