khoj 1.33.3.dev32__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/__init__.py +0 -0
- khoj/app/README.md +94 -0
- khoj/app/__init__.py +0 -0
- khoj/app/asgi.py +16 -0
- khoj/app/settings.py +218 -0
- khoj/app/urls.py +25 -0
- khoj/configure.py +452 -0
- khoj/database/__init__.py +0 -0
- khoj/database/adapters/__init__.py +1821 -0
- khoj/database/admin.py +417 -0
- khoj/database/apps.py +6 -0
- khoj/database/management/__init__.py +0 -0
- khoj/database/management/commands/__init__.py +0 -0
- khoj/database/management/commands/change_default_model.py +116 -0
- khoj/database/management/commands/change_generated_images_url.py +61 -0
- khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
- khoj/database/migrations/0001_khojuser.py +98 -0
- khoj/database/migrations/0002_googleuser.py +32 -0
- khoj/database/migrations/0003_vector_extension.py +10 -0
- khoj/database/migrations/0004_content_types_and_more.py +181 -0
- khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
- khoj/database/migrations/0006_embeddingsdates.py +33 -0
- khoj/database/migrations/0007_add_conversation.py +27 -0
- khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
- khoj/database/migrations/0009_khojapiuser.py +24 -0
- khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
- khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
- khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
- khoj/database/migrations/0012_entry_file_source.py +21 -0
- khoj/database/migrations/0013_subscription.py +37 -0
- khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
- khoj/database/migrations/0015_alter_subscription_user.py +21 -0
- khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
- khoj/database/migrations/0017_searchmodel.py +32 -0
- khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
- khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
- khoj/database/migrations/0020_reflectivequestion.py +36 -0
- khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
- khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
- khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
- khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
- khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
- khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
- khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
- khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
- khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
- khoj/database/migrations/0029_userrequests.py +27 -0
- khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
- khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
- khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
- khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
- khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
- khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
- khoj/database/migrations/0035_processlock.py +26 -0
- khoj/database/migrations/0036_alter_processlock_name.py +19 -0
- khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
- khoj/database/migrations/0036_publicconversation.py +42 -0
- khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
- khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
- khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
- khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
- khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
- khoj/database/migrations/0040_alter_processlock_name.py +26 -0
- khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
- khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
- khoj/database/migrations/0042_serverchatsettings.py +46 -0
- khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
- khoj/database/migrations/0044_conversation_file_filters.py +17 -0
- khoj/database/migrations/0045_fileobject.py +37 -0
- khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
- khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
- khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
- khoj/database/migrations/0049_datastore.py +38 -0
- khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
- khoj/database/migrations/0050_alter_processlock_name.py +25 -0
- khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
- khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
- khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
- khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
- khoj/database/migrations/0055_alter_agent_style_icon.py +37 -0
- khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
- khoj/database/migrations/0056_searchmodelconfig_cross_encoder_model_config.py +17 -0
- khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
- khoj/database/migrations/0057_remove_serverchatsettings_default_model_and_more.py +51 -0
- khoj/database/migrations/0058_alter_chatmodeloptions_chat_model.py +17 -0
- khoj/database/migrations/0059_searchmodelconfig_bi_encoder_confidence_threshold.py +17 -0
- khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
- khoj/database/migrations/0061_alter_chatmodeloptions_model_type.py +26 -0
- khoj/database/migrations/0061_alter_texttoimagemodelconfig_model_type.py +21 -0
- khoj/database/migrations/0062_merge_20240913_0222.py +14 -0
- khoj/database/migrations/0063_conversation_temp_id.py +36 -0
- khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +86 -0
- khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
- khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
- khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
- khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
- khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
- khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
- khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
- khoj/database/migrations/0072_entry_search_model.py +24 -0
- khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
- khoj/database/migrations/0074_alter_conversation_title.py +17 -0
- khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +85 -0
- khoj/database/migrations/0076_rename_openaiprocessorconversationconfig_aimodelapi_and_more.py +26 -0
- khoj/database/migrations/0077_chatmodel_alter_agent_chat_model_and_more.py +62 -0
- khoj/database/migrations/0078_khojuser_email_verification_code_expiry.py +17 -0
- khoj/database/migrations/__init__.py +0 -0
- khoj/database/models/__init__.py +725 -0
- khoj/database/tests.py +3 -0
- khoj/interface/compiled/404/index.html +1 -0
- khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_buildManifest.js +1 -0
- khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_ssgManifest.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1010-8f39bb4648b5ba10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/182-f1c48a203dc91e0e.js +20 -0
- khoj/interface/compiled/_next/static/chunks/1915-d3c36ad6ce697ce7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/2117-165ef4747a5b836b.js +2 -0
- khoj/interface/compiled/_next/static/chunks/2581-455000f8aeb08fc3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3727.dcea8f2193111552.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3789-a09e37a819171a9d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/4124-6c28322ce218d2d5.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5427-b52d95253e692bfa.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5473-b1cf56dedac6577a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5477-0bbddb79c25a54a7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6065-64db9ad305ba0bcd.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6293-469dd16402ea8a6f.js +3 -0
- khoj/interface/compiled/_next/static/chunks/688-b5b4391bbc0376f1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8667-b6bf63c72b2d76eb.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9259-1172dbaca0515237.js +1 -0
- khoj/interface/compiled/_next/static/chunks/94ca1967.1d9b42d929a1ee8c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9597.83583248dfbf6e73.js +1 -0
- khoj/interface/compiled/_next/static/chunks/964ecbae.51d6faf8801d15e6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9665-391df1e5c51c960a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/_not-found/page-a834eddae3e235df.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/page-28ce086a1129bca2.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/layout-1fe1537449f43496.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/page-bf365a60829d347f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-0e476e57eb2015e3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/layout-30e7fda7262713ce.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/page-a5515ea71aec5ef0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/page-9140541e67ea307d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/settings/layout-d09d6510a45cd4bd.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/settings/page-951ba40b5b94b23a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1beb80d8d741c932.js +1 -0
- khoj/interface/compiled/_next/static/chunks/d3ac728e-44ebd2a0c99b12a0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/fd9d1056-4482b99a36fd1673.js +1 -0
- khoj/interface/compiled/_next/static/chunks/framework-8e0e0f4a6b83a956.js +1 -0
- khoj/interface/compiled/_next/static/chunks/main-app-de1f09df97a3cfc7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/main-db4bfac6b0a8d00b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/pages/_app-3c9ca398d360b709.js +1 -0
- khoj/interface/compiled/_next/static/chunks/pages/_error-cf5ca766ac8f493f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/webpack-a03962458328b163.js +1 -0
- khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +1 -0
- khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
- khoj/interface/compiled/_next/static/css/4e4e6a4a1c920d06.css +1 -0
- khoj/interface/compiled/_next/static/css/8d02837c730f8d13.css +25 -0
- khoj/interface/compiled/_next/static/css/8e6a3ca11a60b189.css +1 -0
- khoj/interface/compiled/_next/static/css/9c164d9727dd8092.css +1 -0
- khoj/interface/compiled/_next/static/css/dac88c17aaee5fcf.css +1 -0
- khoj/interface/compiled/_next/static/css/df4b47a2d0d85eae.css +1 -0
- khoj/interface/compiled/_next/static/css/e4eb883b5265d372.css +1 -0
- khoj/interface/compiled/_next/static/media/1d8a05b60287ae6c-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/6f22fce21a7c433c-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/77c207b095007c34-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/82ef96de0e8f4d8c-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.1608a09b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.4aafdb68.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.a79f1c31.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.b6770918.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.cce5b8ec.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.ec17d132.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.07ef19e7.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.55fac258.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.dad44a7f.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.9f256b85.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.b18f59e1.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.d42a5579.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.7c187121.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.d3c882a6.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.ed38e79f.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.b74a1a8b.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.c3fb5ac2.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.d181c465.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.6f2bb1df.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.70d8b0a5.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.e3f82f9d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.47373d1e.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.8916142b.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.9024d815.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.0462f03b.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.7f51fe03.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.b7f8fe9b.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.572d331f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.a879cf83.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.f1035d8d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.5295ba48.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.939bc644.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.f28c23ac.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.8c5b5494.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.94e1e8dc.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.bf59d231.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.3b1e59b3.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.7c9bc82b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.b4c20c84.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.74048478.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.ba21ed5f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.d4d7ba48.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.03e9641d.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.07505710.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.fe9cbbe1.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.e1e279cb.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.eae34984.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.fabc004a.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.57727022.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.5916a24f.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.d6b476ec.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.9acaf01c.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.a144ef58.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.b4230e7e.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.10d95fd3.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.7a996c9d.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.fbccdabe.ttf +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.6258592b.woff +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.a8709e36.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.d97aaf4a.ttf +0 -0
- khoj/interface/compiled/_next/static/media/a6ecd16fa044d500-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/bd82c78e5b7b3fe9-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/c32c8052c071fc42-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/c4250770ab8708b6-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/flags.3afdda2f.webp +0 -0
- khoj/interface/compiled/_next/static/media/flags@2x.5fbe9fc1.webp +0 -0
- khoj/interface/compiled/_next/static/media/globe.98e105ca.webp +0 -0
- khoj/interface/compiled/_next/static/media/globe@2x.974df6f8.webp +0 -0
- khoj/interface/compiled/agents/index.html +1 -0
- khoj/interface/compiled/agents/index.txt +7 -0
- khoj/interface/compiled/agents.svg +6 -0
- khoj/interface/compiled/assets/icons/khoj_lantern.ico +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern.svg +100 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_1200x1200.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_128x128.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_128x128_dark.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_256x256.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_512x512.png +0 -0
- khoj/interface/compiled/assets/icons/khoj_lantern_logomarktype_1200x630.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-browse-draw-sample.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-plain-chat-sample.png +0 -0
- khoj/interface/compiled/assets/samples/desktop-remember-plan-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-browse-draw-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-plain-chat-sample.png +0 -0
- khoj/interface/compiled/assets/samples/phone-remember-plan-sample.png +0 -0
- khoj/interface/compiled/automation.svg +37 -0
- khoj/interface/compiled/automations/index.html +1 -0
- khoj/interface/compiled/automations/index.txt +8 -0
- khoj/interface/compiled/chat/index.html +1 -0
- khoj/interface/compiled/chat/index.txt +7 -0
- khoj/interface/compiled/chat.svg +24 -0
- khoj/interface/compiled/close.svg +5 -0
- khoj/interface/compiled/copy-button-success.svg +6 -0
- khoj/interface/compiled/copy-button.svg +5 -0
- khoj/interface/compiled/index.html +1 -0
- khoj/interface/compiled/index.txt +7 -0
- khoj/interface/compiled/khoj.webmanifest +76 -0
- khoj/interface/compiled/logo.svg +24 -0
- khoj/interface/compiled/search/index.html +1 -0
- khoj/interface/compiled/search/index.txt +7 -0
- khoj/interface/compiled/send.svg +1 -0
- khoj/interface/compiled/settings/index.html +1 -0
- khoj/interface/compiled/settings/index.txt +9 -0
- khoj/interface/compiled/share/chat/index.html +1 -0
- khoj/interface/compiled/share/chat/index.txt +7 -0
- khoj/interface/compiled/share.svg +8 -0
- khoj/interface/compiled/thumbs-down.svg +6 -0
- khoj/interface/compiled/thumbs-up.svg +6 -0
- khoj/interface/email/feedback.html +34 -0
- khoj/interface/email/magic_link.html +40 -0
- khoj/interface/email/task.html +37 -0
- khoj/interface/email/welcome.html +90 -0
- khoj/interface/web/.well-known/assetlinks.json +11 -0
- khoj/interface/web/assets/icons/agents.svg +19 -0
- khoj/interface/web/assets/icons/automation.svg +43 -0
- khoj/interface/web/assets/icons/chat.svg +24 -0
- khoj/interface/web/assets/icons/github.svg +1 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways.svg +32 -0
- khoj/interface/web/assets/icons/khoj.svg +26 -0
- khoj/interface/web/assets/icons/logotype.svg +1 -0
- khoj/interface/web/assets/icons/search.svg +57 -0
- khoj/interface/web/assets/icons/sync.svg +4 -0
- khoj/interface/web/assets/khoj.css +237 -0
- khoj/interface/web/assets/utils.js +33 -0
- khoj/interface/web/base_config.html +445 -0
- khoj/interface/web/content_source_github_input.html +208 -0
- khoj/interface/web/login.html +310 -0
- khoj/interface/web/utils.html +48 -0
- khoj/main.py +249 -0
- khoj/manage.py +22 -0
- khoj/migrations/__init__.py +0 -0
- khoj/migrations/migrate_offline_chat_default_model.py +69 -0
- khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
- khoj/migrations/migrate_offline_chat_schema.py +83 -0
- khoj/migrations/migrate_offline_model.py +29 -0
- khoj/migrations/migrate_processor_config_openai.py +67 -0
- khoj/migrations/migrate_server_pg.py +132 -0
- khoj/migrations/migrate_version.py +17 -0
- khoj/processor/__init__.py +0 -0
- khoj/processor/content/__init__.py +0 -0
- khoj/processor/content/docx/__init__.py +0 -0
- khoj/processor/content/docx/docx_to_entries.py +111 -0
- khoj/processor/content/github/__init__.py +0 -0
- khoj/processor/content/github/github_to_entries.py +226 -0
- khoj/processor/content/images/__init__.py +0 -0
- khoj/processor/content/images/image_to_entries.py +117 -0
- khoj/processor/content/markdown/__init__.py +0 -0
- khoj/processor/content/markdown/markdown_to_entries.py +160 -0
- khoj/processor/content/notion/notion_to_entries.py +259 -0
- khoj/processor/content/org_mode/__init__.py +0 -0
- khoj/processor/content/org_mode/org_to_entries.py +226 -0
- khoj/processor/content/org_mode/orgnode.py +532 -0
- khoj/processor/content/pdf/__init__.py +0 -0
- khoj/processor/content/pdf/pdf_to_entries.py +119 -0
- khoj/processor/content/plaintext/__init__.py +0 -0
- khoj/processor/content/plaintext/plaintext_to_entries.py +117 -0
- khoj/processor/content/text_to_entries.py +296 -0
- khoj/processor/conversation/__init__.py +0 -0
- khoj/processor/conversation/anthropic/__init__.py +0 -0
- khoj/processor/conversation/anthropic/anthropic_chat.py +243 -0
- khoj/processor/conversation/anthropic/utils.py +217 -0
- khoj/processor/conversation/google/__init__.py +0 -0
- khoj/processor/conversation/google/gemini_chat.py +253 -0
- khoj/processor/conversation/google/utils.py +260 -0
- khoj/processor/conversation/offline/__init__.py +0 -0
- khoj/processor/conversation/offline/chat_model.py +308 -0
- khoj/processor/conversation/offline/utils.py +80 -0
- khoj/processor/conversation/offline/whisper.py +15 -0
- khoj/processor/conversation/openai/__init__.py +0 -0
- khoj/processor/conversation/openai/gpt.py +243 -0
- khoj/processor/conversation/openai/utils.py +232 -0
- khoj/processor/conversation/openai/whisper.py +13 -0
- khoj/processor/conversation/prompts.py +1188 -0
- khoj/processor/conversation/utils.py +867 -0
- khoj/processor/embeddings.py +122 -0
- khoj/processor/image/generate.py +215 -0
- khoj/processor/speech/__init__.py +0 -0
- khoj/processor/speech/text_to_speech.py +51 -0
- khoj/processor/tools/__init__.py +0 -0
- khoj/processor/tools/online_search.py +472 -0
- khoj/processor/tools/run_code.py +179 -0
- khoj/routers/__init__.py +0 -0
- khoj/routers/api.py +760 -0
- khoj/routers/api_agents.py +295 -0
- khoj/routers/api_chat.py +1273 -0
- khoj/routers/api_content.py +634 -0
- khoj/routers/api_model.py +123 -0
- khoj/routers/api_phone.py +86 -0
- khoj/routers/api_subscription.py +144 -0
- khoj/routers/auth.py +307 -0
- khoj/routers/email.py +135 -0
- khoj/routers/helpers.py +2333 -0
- khoj/routers/notion.py +85 -0
- khoj/routers/research.py +364 -0
- khoj/routers/storage.py +63 -0
- khoj/routers/twilio.py +36 -0
- khoj/routers/web_client.py +141 -0
- khoj/search_filter/__init__.py +0 -0
- khoj/search_filter/base_filter.py +15 -0
- khoj/search_filter/date_filter.py +215 -0
- khoj/search_filter/file_filter.py +32 -0
- khoj/search_filter/word_filter.py +29 -0
- khoj/search_type/__init__.py +0 -0
- khoj/search_type/text_search.py +255 -0
- khoj/utils/__init__.py +0 -0
- khoj/utils/cli.py +101 -0
- khoj/utils/config.py +81 -0
- khoj/utils/constants.py +51 -0
- khoj/utils/fs_syncer.py +252 -0
- khoj/utils/helpers.py +627 -0
- khoj/utils/initialization.py +301 -0
- khoj/utils/jsonl.py +43 -0
- khoj/utils/models.py +47 -0
- khoj/utils/rawconfig.py +208 -0
- khoj/utils/state.py +48 -0
- khoj/utils/yaml.py +47 -0
- khoj-1.33.3.dev32.dist-info/METADATA +190 -0
- khoj-1.33.3.dev32.dist-info/RECORD +393 -0
- khoj-1.33.3.dev32.dist-info/WHEEL +4 -0
- khoj-1.33.3.dev32.dist-info/entry_points.txt +2 -0
- khoj-1.33.3.dev32.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,532 @@
|
|
1
|
+
# Copyright (c) 2010 Charles Cave
|
2
|
+
#
|
3
|
+
# Permission is hereby granted, free of charge, to any person
|
4
|
+
# obtaining a copy of this software and associated documentation
|
5
|
+
# files (the "Software"), to deal in the Software without
|
6
|
+
# restriction, including without limitation the rights to use, copy,
|
7
|
+
# modify, merge, publish, distribute, sublicense, and/or sell copies
|
8
|
+
# of the Software, and to permit persons to whom the Software is
|
9
|
+
# furnished to do so, subject to the following conditions:
|
10
|
+
#
|
11
|
+
# The above copyright notice and this permission notice shall be
|
12
|
+
# included in all copies or substantial portions of the Software.
|
13
|
+
#
|
14
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
18
|
+
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
19
|
+
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
# SOFTWARE.
|
22
|
+
|
23
|
+
# Program written by Charles Cave (charlesweb@optusnet.com.au)
|
24
|
+
# February - March 2009
|
25
|
+
# Version 2 - June 2009
|
26
|
+
# Added support for all tags, TODO priority and checking existence of a tag
|
27
|
+
# More information at
|
28
|
+
# http://members.optusnet.com.au/~charles57/GTD
|
29
|
+
|
30
|
+
"""
|
31
|
+
The Orgnode module consists of the Orgnode class for representing a
|
32
|
+
headline and associated text from an org-mode file, and routines for
|
33
|
+
constructing data structures of these classes.
|
34
|
+
"""
|
35
|
+
|
36
|
+
import datetime
|
37
|
+
import re
|
38
|
+
from os.path import relpath
|
39
|
+
from pathlib import Path
|
40
|
+
from typing import Dict, List, Tuple
|
41
|
+
|
42
|
+
indent_regex = re.compile(r"^ *")
|
43
|
+
|
44
|
+
|
45
|
+
def normalize_filename(filename):
|
46
|
+
"Normalize and escape filename for rendering"
|
47
|
+
if not Path(filename).is_absolute():
|
48
|
+
# Normalize relative filename to be relative to current directory
|
49
|
+
normalized_filename = f"~/{relpath(filename, start=Path.home())}"
|
50
|
+
else:
|
51
|
+
normalized_filename = filename
|
52
|
+
escaped_filename = f"{normalized_filename}".replace("[", r"\[").replace("]", r"\]")
|
53
|
+
return escaped_filename
|
54
|
+
|
55
|
+
|
56
|
+
def makelist_with_filepath(filename):
|
57
|
+
f = open(filename, "r")
|
58
|
+
return makelist(f, filename)
|
59
|
+
|
60
|
+
|
61
|
+
def makelist(file, filename) -> List["Orgnode"]:
|
62
|
+
"""
|
63
|
+
Read an org-mode file and return a list of Orgnode objects
|
64
|
+
created from this file.
|
65
|
+
"""
|
66
|
+
ctr = 0
|
67
|
+
|
68
|
+
if type(file) == str:
|
69
|
+
f = file.split("\n")
|
70
|
+
else:
|
71
|
+
f = file
|
72
|
+
|
73
|
+
todos = {
|
74
|
+
"TODO": "",
|
75
|
+
"WAITING": "",
|
76
|
+
"ACTIVE": "",
|
77
|
+
"DONE": "",
|
78
|
+
"CANCELLED": "",
|
79
|
+
"FAILED": "",
|
80
|
+
} # populated from #+SEQ_TODO line
|
81
|
+
level = ""
|
82
|
+
heading = ""
|
83
|
+
ancestor_headings: List[str] = []
|
84
|
+
bodytext = ""
|
85
|
+
introtext = ""
|
86
|
+
tags: List[str] = list() # set of all tags in headline
|
87
|
+
closed_date: datetime.date = None
|
88
|
+
sched_date: datetime.date = None
|
89
|
+
deadline_date: datetime.date = None
|
90
|
+
logbook: List[Tuple[datetime.datetime, datetime.datetime]] = list()
|
91
|
+
nodelist: List[Orgnode] = list()
|
92
|
+
property_map: Dict[str, str] = dict()
|
93
|
+
in_properties_drawer = False
|
94
|
+
in_logbook_drawer = False
|
95
|
+
file_title = f"{filename}"
|
96
|
+
|
97
|
+
for line in f:
|
98
|
+
ctr += 1
|
99
|
+
heading_search = re.search(r"^(\*+)\s(.*?)\s*$", line)
|
100
|
+
if heading_search: # we are processing a heading line
|
101
|
+
if heading: # if we have are on second heading, append first heading to headings list
|
102
|
+
thisNode = Orgnode(level, heading, bodytext, tags, ancestor_headings)
|
103
|
+
if closed_date:
|
104
|
+
thisNode.closed = closed_date
|
105
|
+
closed_date = None
|
106
|
+
if sched_date:
|
107
|
+
thisNode.scheduled = sched_date
|
108
|
+
sched_date = None
|
109
|
+
if deadline_date:
|
110
|
+
thisNode.deadline = deadline_date
|
111
|
+
deadline_date = None
|
112
|
+
if logbook:
|
113
|
+
thisNode.logbook = logbook
|
114
|
+
logbook = list()
|
115
|
+
thisNode.properties = property_map
|
116
|
+
nodelist.append(thisNode)
|
117
|
+
property_map = {"LINE": f"file:{normalize_filename(filename)}::{ctr}"}
|
118
|
+
previous_level = level
|
119
|
+
previous_heading: str = heading
|
120
|
+
level = heading_search.group(1)
|
121
|
+
heading = heading_search.group(2)
|
122
|
+
bodytext = ""
|
123
|
+
tags = list() # set of all tags in headline
|
124
|
+
tag_search = re.search(r"(.*?)\s*:([a-zA-Z0-9].*?):$", heading)
|
125
|
+
if tag_search:
|
126
|
+
heading = tag_search.group(1)
|
127
|
+
parsedtags = tag_search.group(2)
|
128
|
+
if parsedtags:
|
129
|
+
for parsedtag in parsedtags.split(":"):
|
130
|
+
if parsedtag != "":
|
131
|
+
tags.append(parsedtag)
|
132
|
+
|
133
|
+
# Add previous heading to ancestors if current heading is deeper than previous level
|
134
|
+
if len(level) > len(previous_level) and previous_heading:
|
135
|
+
ancestor_headings.append(previous_heading)
|
136
|
+
# Remove last ancestor(s) if current heading is shallower than previous level
|
137
|
+
elif len(level) < len(previous_level):
|
138
|
+
for _ in range(len(level), len(previous_level)):
|
139
|
+
if not ancestor_headings or len(ancestor_headings) == 0:
|
140
|
+
break
|
141
|
+
ancestor_headings.pop()
|
142
|
+
|
143
|
+
else: # we are processing a non-heading line
|
144
|
+
if line[:10] == "#+SEQ_TODO":
|
145
|
+
kwlist = re.findall(r"([A-Z]+)\(", line)
|
146
|
+
for kw in kwlist:
|
147
|
+
todos[kw] = ""
|
148
|
+
|
149
|
+
# Set file title to TITLE property, if it exists
|
150
|
+
title_search = re.search(r"^#\+TITLE:\s*(.*)$", line)
|
151
|
+
if title_search and title_search.group(1).strip() != "":
|
152
|
+
title_text = title_search.group(1).strip()
|
153
|
+
if file_title == f"{filename}":
|
154
|
+
file_title = title_text
|
155
|
+
else:
|
156
|
+
file_title += f" {title_text}"
|
157
|
+
continue
|
158
|
+
|
159
|
+
# Ignore Properties Drawer Start, End Lines
|
160
|
+
if re.search(":PROPERTIES:", line):
|
161
|
+
in_properties_drawer = True
|
162
|
+
continue
|
163
|
+
if in_properties_drawer and re.search(":END:", line):
|
164
|
+
in_properties_drawer = False
|
165
|
+
continue
|
166
|
+
|
167
|
+
# Ignore Logbook Drawer Start, End Lines
|
168
|
+
if re.search(":LOGBOOK:", line):
|
169
|
+
in_logbook_drawer = True
|
170
|
+
continue
|
171
|
+
if in_logbook_drawer and re.search(":END:", line):
|
172
|
+
in_logbook_drawer = False
|
173
|
+
continue
|
174
|
+
|
175
|
+
# Extract Clocking Lines
|
176
|
+
clocked_re = re.search(
|
177
|
+
r"CLOCK:\s*\[([0-9]{4}-[0-9]{2}-[0-9]{2} [a-zA-Z]{3} [0-9]{2}:[0-9]{2})\]--\[([0-9]{4}-[0-9]{2}-[0-9]{2} [a-zA-Z]{3} [0-9]{2}:[0-9]{2})\]",
|
178
|
+
line,
|
179
|
+
)
|
180
|
+
if clocked_re:
|
181
|
+
# convert clock in, clock out strings to datetime objects
|
182
|
+
clocked_in = datetime.datetime.strptime(clocked_re.group(1), "%Y-%m-%d %a %H:%M")
|
183
|
+
clocked_out = datetime.datetime.strptime(clocked_re.group(2), "%Y-%m-%d %a %H:%M")
|
184
|
+
# add clocked time to the entries logbook list
|
185
|
+
logbook += [(clocked_in, clocked_out)]
|
186
|
+
line = ""
|
187
|
+
|
188
|
+
property_search = re.search(r"^\s*:([a-zA-Z0-9]+):\s*(.*?)\s*$", line)
|
189
|
+
if property_search:
|
190
|
+
# Set ID property to an id based org-mode link to the entry
|
191
|
+
if property_search.group(1) == "ID":
|
192
|
+
property_map["ID"] = f"id:{property_search.group(2)}"
|
193
|
+
else:
|
194
|
+
property_map[property_search.group(1)] = property_search.group(2)
|
195
|
+
continue
|
196
|
+
|
197
|
+
cd_re = re.search(r"CLOSED:\s*\[([0-9]{4})-([0-9]{2})-([0-9]{2})", line)
|
198
|
+
if cd_re:
|
199
|
+
closed_date = datetime.date(int(cd_re.group(1)), int(cd_re.group(2)), int(cd_re.group(3)))
|
200
|
+
sd_re = re.search(r"SCHEDULED:\s*<([0-9]+)\-([0-9]+)\-([0-9]+)", line)
|
201
|
+
if sd_re:
|
202
|
+
sched_date = datetime.date(int(sd_re.group(1)), int(sd_re.group(2)), int(sd_re.group(3)))
|
203
|
+
dd_re = re.search(r"DEADLINE:\s*<(\d+)\-(\d+)\-(\d+)", line)
|
204
|
+
if dd_re:
|
205
|
+
deadline_date = datetime.date(int(dd_re.group(1)), int(dd_re.group(2)), int(dd_re.group(3)))
|
206
|
+
|
207
|
+
# Ignore property drawer, scheduled, closed, deadline, logbook entries and # lines from body
|
208
|
+
if (
|
209
|
+
not in_properties_drawer
|
210
|
+
and not cd_re
|
211
|
+
and not sd_re
|
212
|
+
and not dd_re
|
213
|
+
and not clocked_re
|
214
|
+
and line[:1] != "#"
|
215
|
+
):
|
216
|
+
# if we are in a heading
|
217
|
+
if heading:
|
218
|
+
# add the line to the bodytext
|
219
|
+
bodytext += line.rstrip() + "\n\n" if line.strip() else ""
|
220
|
+
# bodytext += line + "\n" if line.strip() else "\n"
|
221
|
+
# else we are in the pre heading portion of the file
|
222
|
+
elif line.strip():
|
223
|
+
# so add the line to the introtext
|
224
|
+
introtext += line
|
225
|
+
|
226
|
+
# write out intro node before headings
|
227
|
+
# this is done at the end to allow collating all title lines
|
228
|
+
if introtext:
|
229
|
+
thisNode = Orgnode(level, file_title, introtext, tags)
|
230
|
+
nodelist = [thisNode] + nodelist
|
231
|
+
# write out last heading node
|
232
|
+
if heading:
|
233
|
+
thisNode = Orgnode(level, heading, bodytext, tags, ancestor_headings)
|
234
|
+
thisNode.properties = property_map
|
235
|
+
if sched_date:
|
236
|
+
thisNode.scheduled = sched_date
|
237
|
+
if deadline_date:
|
238
|
+
thisNode.deadline = deadline_date
|
239
|
+
if closed_date:
|
240
|
+
thisNode.closed = closed_date
|
241
|
+
if logbook:
|
242
|
+
thisNode.logbook = logbook
|
243
|
+
nodelist.append(thisNode)
|
244
|
+
|
245
|
+
# using the list of TODO keywords found in the file
|
246
|
+
# process the headings searching for TODO keywords
|
247
|
+
for n in nodelist:
|
248
|
+
todo_search = re.search(r"([A-Z]+)\s(.*?)$", n.heading)
|
249
|
+
if todo_search:
|
250
|
+
if todo_search.group(1) in todos:
|
251
|
+
n.heading = todo_search.group(2)
|
252
|
+
n.todo = todo_search.group(1)
|
253
|
+
|
254
|
+
# extract, set priority from heading, update heading if necessary
|
255
|
+
priority_search = re.search(r"^\[\#(A|B|C)\] (.*?)$", n.heading)
|
256
|
+
if priority_search:
|
257
|
+
n.priority = priority_search.group(1)
|
258
|
+
n.heading = priority_search.group(2)
|
259
|
+
|
260
|
+
# Prefix filepath/title to ancestors
|
261
|
+
n.ancestors = [file_title] + n.ancestors
|
262
|
+
|
263
|
+
# Set SOURCE property to a file+heading based org-mode link to the entry
|
264
|
+
if n.level == 0:
|
265
|
+
n.properties["LINE"] = f"file:{normalize_filename(filename)}::0"
|
266
|
+
n.properties["SOURCE"] = f"[[file:{normalize_filename(filename)}]]"
|
267
|
+
else:
|
268
|
+
escaped_heading = n.heading.replace("[", "\\[").replace("]", "\\]")
|
269
|
+
n.properties["SOURCE"] = f"[[file:{normalize_filename(filename)}::*{escaped_heading}]]"
|
270
|
+
|
271
|
+
return nodelist
|
272
|
+
|
273
|
+
|
274
|
+
######################
|
275
|
+
class Orgnode(object):
|
276
|
+
"""
|
277
|
+
Orgnode class represents a headline, tags and text associated
|
278
|
+
with the headline.
|
279
|
+
"""
|
280
|
+
|
281
|
+
def __init__(self, level, headline, body, tags, ancestor_headings=[]):
|
282
|
+
"""
|
283
|
+
Create an Orgnode object given the parameters of level (as the
|
284
|
+
raw asterisks), headline text (including the TODO tag), and
|
285
|
+
first tag. The makelist routine postprocesses the list to
|
286
|
+
identify TODO tags and updates headline and todo fields.
|
287
|
+
"""
|
288
|
+
self._level = len(level)
|
289
|
+
self._heading = headline
|
290
|
+
self._body = body
|
291
|
+
self._tags = tags # All tags in the headline
|
292
|
+
self._todo = ""
|
293
|
+
self._priority = "" # empty of A, B or C
|
294
|
+
self._scheduled = "" # Scheduled date
|
295
|
+
self._deadline = "" # Deadline date
|
296
|
+
self._closed = "" # Closed date
|
297
|
+
self._properties = dict()
|
298
|
+
self._logbook = list() # List of clock-in, clock-out tuples representing logbook entries
|
299
|
+
self._ancestor_headings = ancestor_headings.copy()
|
300
|
+
|
301
|
+
@property
|
302
|
+
def ancestors(self) -> List[str]:
|
303
|
+
"""
|
304
|
+
Return the ancestor headings of the node
|
305
|
+
"""
|
306
|
+
return self._ancestor_headings
|
307
|
+
|
308
|
+
@ancestors.setter
|
309
|
+
def ancestors(self, new_ancestors):
|
310
|
+
"""
|
311
|
+
Update the ancestor headings of the node
|
312
|
+
"""
|
313
|
+
self._ancestor_headings = new_ancestors
|
314
|
+
|
315
|
+
@property
|
316
|
+
def heading(self):
|
317
|
+
"""
|
318
|
+
Return the Heading text of the node without the TODO tag
|
319
|
+
"""
|
320
|
+
return self._heading
|
321
|
+
|
322
|
+
@heading.setter
|
323
|
+
def heading(self, newhdng):
|
324
|
+
"""
|
325
|
+
Change the heading to the supplied string
|
326
|
+
"""
|
327
|
+
self._heading = newhdng
|
328
|
+
|
329
|
+
@property
|
330
|
+
def body(self):
|
331
|
+
"""
|
332
|
+
Returns all lines of text of the body of this node except the
|
333
|
+
Property Drawer
|
334
|
+
"""
|
335
|
+
return self._body
|
336
|
+
|
337
|
+
@property
|
338
|
+
def hasBody(self):
|
339
|
+
"""
|
340
|
+
Returns True if node has non empty body, else False
|
341
|
+
"""
|
342
|
+
return self._body and re.sub(r"\n|\t|\r| ", "", self._body) != ""
|
343
|
+
|
344
|
+
@property
|
345
|
+
def level(self):
|
346
|
+
"""
|
347
|
+
Returns an integer corresponding to the level of the node.
|
348
|
+
Top level (one asterisk) has a level of 1.
|
349
|
+
"""
|
350
|
+
return self._level
|
351
|
+
|
352
|
+
@property
|
353
|
+
def priority(self):
|
354
|
+
"""
|
355
|
+
Returns the priority of this headline: 'A', 'B', 'C' or empty
|
356
|
+
string if priority has not been set.
|
357
|
+
"""
|
358
|
+
return self._priority
|
359
|
+
|
360
|
+
@priority.setter
|
361
|
+
def priority(self, new_priority):
|
362
|
+
"""
|
363
|
+
Change the value of the priority of this headline.
|
364
|
+
Values values are '', 'A', 'B', 'C'
|
365
|
+
"""
|
366
|
+
self._priority = new_priority
|
367
|
+
|
368
|
+
@property
|
369
|
+
def tags(self):
|
370
|
+
"""
|
371
|
+
Returns the list of all tags
|
372
|
+
For example, :HOME:COMPUTER: would return ['HOME', 'COMPUTER']
|
373
|
+
"""
|
374
|
+
return self._tags
|
375
|
+
|
376
|
+
@tags.setter
|
377
|
+
def tags(self, newtags):
|
378
|
+
"""
|
379
|
+
Store all the tags found in the headline.
|
380
|
+
"""
|
381
|
+
self._tags = newtags
|
382
|
+
|
383
|
+
def hasTag(self, tag):
|
384
|
+
"""
|
385
|
+
Returns True if the supplied tag is present in this headline
|
386
|
+
For example, hasTag('COMPUTER') on headling containing
|
387
|
+
:HOME:COMPUTER: would return True.
|
388
|
+
"""
|
389
|
+
return tag in self._tags
|
390
|
+
|
391
|
+
@property
|
392
|
+
def todo(self):
|
393
|
+
"""
|
394
|
+
Return the value of the TODO tag
|
395
|
+
"""
|
396
|
+
return self._todo
|
397
|
+
|
398
|
+
@todo.setter
|
399
|
+
def todo(self, new_todo):
|
400
|
+
"""
|
401
|
+
Set the value of the TODO tag to the supplied string
|
402
|
+
"""
|
403
|
+
self._todo = new_todo
|
404
|
+
|
405
|
+
@property
|
406
|
+
def properties(self):
|
407
|
+
"""
|
408
|
+
Return the dictionary of properties
|
409
|
+
"""
|
410
|
+
return self._properties
|
411
|
+
|
412
|
+
@properties.setter
|
413
|
+
def properties(self, new_properties):
|
414
|
+
"""
|
415
|
+
Sets all properties using the supplied dictionary of
|
416
|
+
name/value pairs
|
417
|
+
"""
|
418
|
+
self._properties = new_properties
|
419
|
+
|
420
|
+
def Property(self, property_key):
|
421
|
+
"""
|
422
|
+
Returns the value of the requested property or null if the
|
423
|
+
property does not exist.
|
424
|
+
"""
|
425
|
+
return self._properties.get(property_key, "")
|
426
|
+
|
427
|
+
@property
|
428
|
+
def scheduled(self):
|
429
|
+
"""
|
430
|
+
Return the scheduled date
|
431
|
+
"""
|
432
|
+
return self._scheduled
|
433
|
+
|
434
|
+
@scheduled.setter
|
435
|
+
def scheduled(self, new_scheduled):
|
436
|
+
"""
|
437
|
+
Set the scheduled date to the scheduled date
|
438
|
+
"""
|
439
|
+
self._scheduled = new_scheduled
|
440
|
+
|
441
|
+
@property
|
442
|
+
def deadline(self):
|
443
|
+
"""
|
444
|
+
Return the deadline date
|
445
|
+
"""
|
446
|
+
return self._deadline
|
447
|
+
|
448
|
+
@deadline.setter
|
449
|
+
def deadline(self, new_deadline):
|
450
|
+
"""
|
451
|
+
Set the deadline (due) date to the new deadline date
|
452
|
+
"""
|
453
|
+
self._deadline = new_deadline
|
454
|
+
|
455
|
+
@property
|
456
|
+
def closed(self):
|
457
|
+
"""
|
458
|
+
Return the closed date
|
459
|
+
"""
|
460
|
+
return self._closed
|
461
|
+
|
462
|
+
@closed.setter
|
463
|
+
def closed(self, new_closed):
|
464
|
+
"""
|
465
|
+
Set the closed date to the new closed date
|
466
|
+
"""
|
467
|
+
self._closed = new_closed
|
468
|
+
|
469
|
+
@property
|
470
|
+
def logbook(self):
|
471
|
+
"""
|
472
|
+
Return the logbook with all clocked-in, clocked-out date object pairs or empty list if nonexistent
|
473
|
+
"""
|
474
|
+
return self._logbook
|
475
|
+
|
476
|
+
@logbook.setter
|
477
|
+
def logbook(self, new_logbook):
|
478
|
+
"""
|
479
|
+
Set the logbook with list of clocked-in, clocked-out tuples for the entry
|
480
|
+
"""
|
481
|
+
self._logbook = new_logbook
|
482
|
+
|
483
|
+
def __repr__(self):
|
484
|
+
"""
|
485
|
+
Print the level, heading text and tag of a node and the body
|
486
|
+
text as used to construct the node.
|
487
|
+
"""
|
488
|
+
# Output heading line
|
489
|
+
n = ""
|
490
|
+
for _ in range(0, self._level):
|
491
|
+
n = n + "*"
|
492
|
+
n = n + " "
|
493
|
+
if self._todo:
|
494
|
+
n = n + self._todo + " "
|
495
|
+
if self._priority:
|
496
|
+
n = n + "[#" + self._priority + "] "
|
497
|
+
n = n + self._heading
|
498
|
+
if self._tags:
|
499
|
+
n = "%-60s " % n # hack - tags will start in column 62
|
500
|
+
closecolon = ""
|
501
|
+
for t in self._tags:
|
502
|
+
n = n + ":" + t
|
503
|
+
closecolon = ":"
|
504
|
+
n = n + closecolon
|
505
|
+
n = n + "\n"
|
506
|
+
|
507
|
+
# Get body indentation from first line of body
|
508
|
+
indent = indent_regex.match(self._body).group()
|
509
|
+
|
510
|
+
# Output Closed Date, Scheduled Date, Deadline Date
|
511
|
+
if self._closed or self._scheduled or self._deadline:
|
512
|
+
n = n + indent
|
513
|
+
if self._closed:
|
514
|
+
n = n + f'CLOSED: [{self._closed.strftime("%Y-%m-%d %a")}] '
|
515
|
+
if self._scheduled:
|
516
|
+
n = n + f'SCHEDULED: <{self._scheduled.strftime("%Y-%m-%d %a")}> '
|
517
|
+
if self._deadline:
|
518
|
+
n = n + f'DEADLINE: <{self._deadline.strftime("%Y-%m-%d %a")}> '
|
519
|
+
if self._closed or self._scheduled or self._deadline:
|
520
|
+
n = n + "\n"
|
521
|
+
|
522
|
+
# Output Property Drawer
|
523
|
+
n = n + indent + ":PROPERTIES:\n"
|
524
|
+
for key, value in self._properties.items():
|
525
|
+
n = n + indent + f":{key}: {value}\n"
|
526
|
+
n = n + indent + ":END:\n"
|
527
|
+
|
528
|
+
# Output Body
|
529
|
+
if self.hasBody:
|
530
|
+
n = n + self._body
|
531
|
+
|
532
|
+
return n
|
File without changes
|
@@ -0,0 +1,119 @@
|
|
1
|
+
import logging
|
2
|
+
import tempfile
|
3
|
+
from typing import Dict, Final, List, Tuple
|
4
|
+
|
5
|
+
from langchain_community.document_loaders import PyMuPDFLoader
|
6
|
+
|
7
|
+
from khoj.database.models import Entry as DbEntry
|
8
|
+
from khoj.database.models import KhojUser
|
9
|
+
from khoj.processor.content.text_to_entries import TextToEntries
|
10
|
+
from khoj.utils.helpers import timer
|
11
|
+
from khoj.utils.rawconfig import Entry
|
12
|
+
|
13
|
+
logger = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
|
16
|
+
class PdfToEntries(TextToEntries):
|
17
|
+
# Class-level constant translation table
|
18
|
+
NULL_TRANSLATOR: Final = str.maketrans("", "", "\x00")
|
19
|
+
|
20
|
+
def __init__(self):
|
21
|
+
super().__init__()
|
22
|
+
|
23
|
+
# Define Functions
|
24
|
+
def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
|
25
|
+
# Extract required fields from config
|
26
|
+
deletion_file_names = set([file for file in files if files[file] == b""])
|
27
|
+
files_to_process = set(files) - deletion_file_names
|
28
|
+
files = {file: files[file] for file in files_to_process}
|
29
|
+
|
30
|
+
# Extract Entries from specified Pdf files
|
31
|
+
with timer("Extract entries from specified PDF files", logger):
|
32
|
+
file_to_text_map, current_entries = PdfToEntries.extract_pdf_entries(files)
|
33
|
+
|
34
|
+
# Split entries by max tokens supported by model
|
35
|
+
with timer("Split entries by max token size supported by model", logger):
|
36
|
+
current_entries = self.split_entries_by_max_tokens(current_entries, max_tokens=256)
|
37
|
+
|
38
|
+
# Identify, mark and merge any new entries with previous entries
|
39
|
+
with timer("Identify new or updated entries", logger):
|
40
|
+
num_new_embeddings, num_deleted_embeddings = self.update_embeddings(
|
41
|
+
user,
|
42
|
+
current_entries,
|
43
|
+
DbEntry.EntryType.PDF,
|
44
|
+
DbEntry.EntrySource.COMPUTER,
|
45
|
+
"compiled",
|
46
|
+
logger,
|
47
|
+
deletion_file_names,
|
48
|
+
regenerate=regenerate,
|
49
|
+
file_to_text_map=file_to_text_map,
|
50
|
+
)
|
51
|
+
|
52
|
+
return num_new_embeddings, num_deleted_embeddings
|
53
|
+
|
54
|
+
@staticmethod
|
55
|
+
def extract_pdf_entries(pdf_files) -> Tuple[Dict, List[Entry]]: # important function
|
56
|
+
"""Extract entries by page from specified PDF files"""
|
57
|
+
file_to_text_map = dict()
|
58
|
+
entries: List[str] = []
|
59
|
+
entry_to_location_map: List[Tuple[str, str]] = []
|
60
|
+
for pdf_file in pdf_files:
|
61
|
+
try:
|
62
|
+
pdf_entries_per_file = PdfToEntries.extract_text(pdf_files[pdf_file])
|
63
|
+
entry_to_location_map += zip(pdf_entries_per_file, [pdf_file] * len(pdf_entries_per_file))
|
64
|
+
entries.extend(pdf_entries_per_file)
|
65
|
+
file_to_text_map[pdf_file] = pdf_entries_per_file
|
66
|
+
except Exception as e:
|
67
|
+
logger.warning(f"Unable to extract entries from file: {pdf_file}")
|
68
|
+
logger.warning(e, exc_info=True)
|
69
|
+
|
70
|
+
return file_to_text_map, PdfToEntries.convert_pdf_entries_to_maps(entries, dict(entry_to_location_map))
|
71
|
+
|
72
|
+
@staticmethod
|
73
|
+
def convert_pdf_entries_to_maps(parsed_entries: List[str], entry_to_file_map) -> List[Entry]:
|
74
|
+
"Convert each PDF entries into a dictionary"
|
75
|
+
entries = []
|
76
|
+
for parsed_entry in parsed_entries:
|
77
|
+
entry_filename = entry_to_file_map[parsed_entry]
|
78
|
+
# Append base filename to compiled entry for context to model
|
79
|
+
heading = f"{entry_filename}\n"
|
80
|
+
compiled_entry = f"{heading}{parsed_entry}"
|
81
|
+
entries.append(
|
82
|
+
Entry(
|
83
|
+
compiled=compiled_entry,
|
84
|
+
raw=parsed_entry,
|
85
|
+
heading=heading,
|
86
|
+
file=f"{entry_filename}",
|
87
|
+
)
|
88
|
+
)
|
89
|
+
|
90
|
+
logger.debug(f"Converted {len(parsed_entries)} PDF entries to dictionaries")
|
91
|
+
|
92
|
+
return entries
|
93
|
+
|
94
|
+
@staticmethod
|
95
|
+
def extract_text(pdf_file):
|
96
|
+
"""Extract text from specified PDF files"""
|
97
|
+
try:
|
98
|
+
# Create temp file with .pdf extension that gets auto-deleted
|
99
|
+
with tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) as tmpf:
|
100
|
+
tmpf.write(pdf_file)
|
101
|
+
tmpf.flush() # Ensure all data is written
|
102
|
+
|
103
|
+
# Load the content using PyMuPDFLoader
|
104
|
+
loader = PyMuPDFLoader(tmpf.name)
|
105
|
+
pdf_entries_per_file = loader.load()
|
106
|
+
|
107
|
+
# Convert the loaded entries into the desired format
|
108
|
+
pdf_entry_by_pages = [PdfToEntries.clean_text(page.page_content) for page in pdf_entries_per_file]
|
109
|
+
except Exception as e:
|
110
|
+
logger.warning(f"Unable to process file: {pdf_file}. This file will not be indexed.")
|
111
|
+
logger.warning(e, exc_info=True)
|
112
|
+
|
113
|
+
return pdf_entry_by_pages
|
114
|
+
|
115
|
+
@staticmethod
|
116
|
+
def clean_text(text: str) -> str:
|
117
|
+
"""Clean PDF text by removing null bytes and invalid Unicode characters."""
|
118
|
+
# Use faster translation table instead of replace
|
119
|
+
return text.translate(PdfToEntries.NULL_TRANSLATOR)
|
File without changes
|