khoj 1.33.3.dev32__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (393) hide show
  1. khoj/__init__.py +0 -0
  2. khoj/app/README.md +94 -0
  3. khoj/app/__init__.py +0 -0
  4. khoj/app/asgi.py +16 -0
  5. khoj/app/settings.py +218 -0
  6. khoj/app/urls.py +25 -0
  7. khoj/configure.py +452 -0
  8. khoj/database/__init__.py +0 -0
  9. khoj/database/adapters/__init__.py +1821 -0
  10. khoj/database/admin.py +417 -0
  11. khoj/database/apps.py +6 -0
  12. khoj/database/management/__init__.py +0 -0
  13. khoj/database/management/commands/__init__.py +0 -0
  14. khoj/database/management/commands/change_default_model.py +116 -0
  15. khoj/database/management/commands/change_generated_images_url.py +61 -0
  16. khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
  17. khoj/database/migrations/0001_khojuser.py +98 -0
  18. khoj/database/migrations/0002_googleuser.py +32 -0
  19. khoj/database/migrations/0003_vector_extension.py +10 -0
  20. khoj/database/migrations/0004_content_types_and_more.py +181 -0
  21. khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
  22. khoj/database/migrations/0006_embeddingsdates.py +33 -0
  23. khoj/database/migrations/0007_add_conversation.py +27 -0
  24. khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
  25. khoj/database/migrations/0009_khojapiuser.py +24 -0
  26. khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
  27. khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
  28. khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
  29. khoj/database/migrations/0012_entry_file_source.py +21 -0
  30. khoj/database/migrations/0013_subscription.py +37 -0
  31. khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
  32. khoj/database/migrations/0015_alter_subscription_user.py +21 -0
  33. khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
  34. khoj/database/migrations/0017_searchmodel.py +32 -0
  35. khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
  36. khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
  37. khoj/database/migrations/0020_reflectivequestion.py +36 -0
  38. khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
  39. khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
  40. khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
  41. khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
  42. khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
  43. khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
  44. khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
  45. khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
  46. khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
  47. khoj/database/migrations/0029_userrequests.py +27 -0
  48. khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
  49. khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
  50. khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
  51. khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
  52. khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
  53. khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
  54. khoj/database/migrations/0035_processlock.py +26 -0
  55. khoj/database/migrations/0036_alter_processlock_name.py +19 -0
  56. khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
  57. khoj/database/migrations/0036_publicconversation.py +42 -0
  58. khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
  59. khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
  60. khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
  61. khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
  62. khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
  63. khoj/database/migrations/0040_alter_processlock_name.py +26 -0
  64. khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
  65. khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
  66. khoj/database/migrations/0042_serverchatsettings.py +46 -0
  67. khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
  68. khoj/database/migrations/0044_conversation_file_filters.py +17 -0
  69. khoj/database/migrations/0045_fileobject.py +37 -0
  70. khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
  71. khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
  72. khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
  73. khoj/database/migrations/0049_datastore.py +38 -0
  74. khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
  75. khoj/database/migrations/0050_alter_processlock_name.py +25 -0
  76. khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
  77. khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
  78. khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
  79. khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
  80. khoj/database/migrations/0055_alter_agent_style_icon.py +37 -0
  81. khoj/database/migrations/0056_chatmodeloptions_vision_enabled.py +17 -0
  82. khoj/database/migrations/0056_searchmodelconfig_cross_encoder_model_config.py +17 -0
  83. khoj/database/migrations/0057_merge_20240816_1409.py +13 -0
  84. khoj/database/migrations/0057_remove_serverchatsettings_default_model_and_more.py +51 -0
  85. khoj/database/migrations/0058_alter_chatmodeloptions_chat_model.py +17 -0
  86. khoj/database/migrations/0059_searchmodelconfig_bi_encoder_confidence_threshold.py +17 -0
  87. khoj/database/migrations/0060_merge_20240905_1828.py +14 -0
  88. khoj/database/migrations/0061_alter_chatmodeloptions_model_type.py +26 -0
  89. khoj/database/migrations/0061_alter_texttoimagemodelconfig_model_type.py +21 -0
  90. khoj/database/migrations/0062_merge_20240913_0222.py +14 -0
  91. khoj/database/migrations/0063_conversation_temp_id.py +36 -0
  92. khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +86 -0
  93. khoj/database/migrations/0065_remove_agent_avatar_remove_agent_public_and_more.py +49 -0
  94. khoj/database/migrations/0066_remove_agent_tools_agent_input_tools_and_more.py +69 -0
  95. khoj/database/migrations/0067_alter_agent_style_icon.py +50 -0
  96. khoj/database/migrations/0068_alter_agent_output_modes.py +24 -0
  97. khoj/database/migrations/0069_webscraper_serverchatsettings_web_scraper.py +89 -0
  98. khoj/database/migrations/0070_alter_agent_input_tools_alter_agent_output_modes.py +46 -0
  99. khoj/database/migrations/0071_subscription_enabled_trial_at_and_more.py +32 -0
  100. khoj/database/migrations/0072_entry_search_model.py +24 -0
  101. khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
  102. khoj/database/migrations/0074_alter_conversation_title.py +17 -0
  103. khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +85 -0
  104. khoj/database/migrations/0076_rename_openaiprocessorconversationconfig_aimodelapi_and_more.py +26 -0
  105. khoj/database/migrations/0077_chatmodel_alter_agent_chat_model_and_more.py +62 -0
  106. khoj/database/migrations/0078_khojuser_email_verification_code_expiry.py +17 -0
  107. khoj/database/migrations/__init__.py +0 -0
  108. khoj/database/models/__init__.py +725 -0
  109. khoj/database/tests.py +3 -0
  110. khoj/interface/compiled/404/index.html +1 -0
  111. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_buildManifest.js +1 -0
  112. khoj/interface/compiled/_next/static/Tg-vU1p1B-YKT5Qv8KSHt/_ssgManifest.js +1 -0
  113. khoj/interface/compiled/_next/static/chunks/1010-8f39bb4648b5ba10.js +1 -0
  114. khoj/interface/compiled/_next/static/chunks/182-f1c48a203dc91e0e.js +20 -0
  115. khoj/interface/compiled/_next/static/chunks/1915-d3c36ad6ce697ce7.js +1 -0
  116. khoj/interface/compiled/_next/static/chunks/2117-165ef4747a5b836b.js +2 -0
  117. khoj/interface/compiled/_next/static/chunks/2581-455000f8aeb08fc3.js +1 -0
  118. khoj/interface/compiled/_next/static/chunks/3727.dcea8f2193111552.js +1 -0
  119. khoj/interface/compiled/_next/static/chunks/3789-a09e37a819171a9d.js +1 -0
  120. khoj/interface/compiled/_next/static/chunks/4124-6c28322ce218d2d5.js +1 -0
  121. khoj/interface/compiled/_next/static/chunks/5427-b52d95253e692bfa.js +1 -0
  122. khoj/interface/compiled/_next/static/chunks/5473-b1cf56dedac6577a.js +1 -0
  123. khoj/interface/compiled/_next/static/chunks/5477-0bbddb79c25a54a7.js +1 -0
  124. khoj/interface/compiled/_next/static/chunks/6065-64db9ad305ba0bcd.js +1 -0
  125. khoj/interface/compiled/_next/static/chunks/6293-469dd16402ea8a6f.js +3 -0
  126. khoj/interface/compiled/_next/static/chunks/688-b5b4391bbc0376f1.js +1 -0
  127. khoj/interface/compiled/_next/static/chunks/8667-b6bf63c72b2d76eb.js +1 -0
  128. khoj/interface/compiled/_next/static/chunks/9259-1172dbaca0515237.js +1 -0
  129. khoj/interface/compiled/_next/static/chunks/94ca1967.1d9b42d929a1ee8c.js +1 -0
  130. khoj/interface/compiled/_next/static/chunks/9597.83583248dfbf6e73.js +1 -0
  131. khoj/interface/compiled/_next/static/chunks/964ecbae.51d6faf8801d15e6.js +1 -0
  132. khoj/interface/compiled/_next/static/chunks/9665-391df1e5c51c960a.js +1 -0
  133. khoj/interface/compiled/_next/static/chunks/app/_not-found/page-a834eddae3e235df.js +1 -0
  134. khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
  135. khoj/interface/compiled/_next/static/chunks/app/agents/page-28ce086a1129bca2.js +1 -0
  136. khoj/interface/compiled/_next/static/chunks/app/automations/layout-1fe1537449f43496.js +1 -0
  137. khoj/interface/compiled/_next/static/chunks/app/automations/page-bf365a60829d347f.js +1 -0
  138. khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
  139. khoj/interface/compiled/_next/static/chunks/app/chat/page-0e476e57eb2015e3.js +1 -0
  140. khoj/interface/compiled/_next/static/chunks/app/layout-30e7fda7262713ce.js +1 -0
  141. khoj/interface/compiled/_next/static/chunks/app/page-a5515ea71aec5ef0.js +1 -0
  142. khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
  143. khoj/interface/compiled/_next/static/chunks/app/search/page-9140541e67ea307d.js +1 -0
  144. khoj/interface/compiled/_next/static/chunks/app/settings/layout-d09d6510a45cd4bd.js +1 -0
  145. khoj/interface/compiled/_next/static/chunks/app/settings/page-951ba40b5b94b23a.js +1 -0
  146. khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
  147. khoj/interface/compiled/_next/static/chunks/app/share/chat/page-1beb80d8d741c932.js +1 -0
  148. khoj/interface/compiled/_next/static/chunks/d3ac728e-44ebd2a0c99b12a0.js +1 -0
  149. khoj/interface/compiled/_next/static/chunks/fd9d1056-4482b99a36fd1673.js +1 -0
  150. khoj/interface/compiled/_next/static/chunks/framework-8e0e0f4a6b83a956.js +1 -0
  151. khoj/interface/compiled/_next/static/chunks/main-app-de1f09df97a3cfc7.js +1 -0
  152. khoj/interface/compiled/_next/static/chunks/main-db4bfac6b0a8d00b.js +1 -0
  153. khoj/interface/compiled/_next/static/chunks/pages/_app-3c9ca398d360b709.js +1 -0
  154. khoj/interface/compiled/_next/static/chunks/pages/_error-cf5ca766ac8f493f.js +1 -0
  155. khoj/interface/compiled/_next/static/chunks/polyfills-42372ed130431b0a.js +1 -0
  156. khoj/interface/compiled/_next/static/chunks/webpack-a03962458328b163.js +1 -0
  157. khoj/interface/compiled/_next/static/css/089de1d8526b96e9.css +1 -0
  158. khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +1 -0
  159. khoj/interface/compiled/_next/static/css/4e4e6a4a1c920d06.css +1 -0
  160. khoj/interface/compiled/_next/static/css/8d02837c730f8d13.css +25 -0
  161. khoj/interface/compiled/_next/static/css/8e6a3ca11a60b189.css +1 -0
  162. khoj/interface/compiled/_next/static/css/9c164d9727dd8092.css +1 -0
  163. khoj/interface/compiled/_next/static/css/dac88c17aaee5fcf.css +1 -0
  164. khoj/interface/compiled/_next/static/css/df4b47a2d0d85eae.css +1 -0
  165. khoj/interface/compiled/_next/static/css/e4eb883b5265d372.css +1 -0
  166. khoj/interface/compiled/_next/static/media/1d8a05b60287ae6c-s.p.woff2 +0 -0
  167. khoj/interface/compiled/_next/static/media/6f22fce21a7c433c-s.woff2 +0 -0
  168. khoj/interface/compiled/_next/static/media/77c207b095007c34-s.p.woff2 +0 -0
  169. khoj/interface/compiled/_next/static/media/82ef96de0e8f4d8c-s.p.woff2 +0 -0
  170. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.1608a09b.woff +0 -0
  171. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.4aafdb68.ttf +0 -0
  172. khoj/interface/compiled/_next/static/media/KaTeX_AMS-Regular.a79f1c31.woff2 +0 -0
  173. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.b6770918.woff +0 -0
  174. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.cce5b8ec.ttf +0 -0
  175. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Bold.ec17d132.woff2 +0 -0
  176. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.07ef19e7.ttf +0 -0
  177. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.55fac258.woff2 +0 -0
  178. khoj/interface/compiled/_next/static/media/KaTeX_Caligraphic-Regular.dad44a7f.woff +0 -0
  179. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.9f256b85.woff +0 -0
  180. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.b18f59e1.ttf +0 -0
  181. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Bold.d42a5579.woff2 +0 -0
  182. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.7c187121.woff +0 -0
  183. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.d3c882a6.woff2 +0 -0
  184. khoj/interface/compiled/_next/static/media/KaTeX_Fraktur-Regular.ed38e79f.ttf +0 -0
  185. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.b74a1a8b.ttf +0 -0
  186. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.c3fb5ac2.woff2 +0 -0
  187. khoj/interface/compiled/_next/static/media/KaTeX_Main-Bold.d181c465.woff +0 -0
  188. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.6f2bb1df.woff2 +0 -0
  189. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.70d8b0a5.ttf +0 -0
  190. khoj/interface/compiled/_next/static/media/KaTeX_Main-BoldItalic.e3f82f9d.woff +0 -0
  191. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.47373d1e.ttf +0 -0
  192. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.8916142b.woff2 +0 -0
  193. khoj/interface/compiled/_next/static/media/KaTeX_Main-Italic.9024d815.woff +0 -0
  194. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.0462f03b.woff2 +0 -0
  195. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.7f51fe03.woff +0 -0
  196. khoj/interface/compiled/_next/static/media/KaTeX_Main-Regular.b7f8fe9b.ttf +0 -0
  197. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.572d331f.woff2 +0 -0
  198. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.a879cf83.ttf +0 -0
  199. khoj/interface/compiled/_next/static/media/KaTeX_Math-BoldItalic.f1035d8d.woff +0 -0
  200. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.5295ba48.woff +0 -0
  201. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.939bc644.ttf +0 -0
  202. khoj/interface/compiled/_next/static/media/KaTeX_Math-Italic.f28c23ac.woff2 +0 -0
  203. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.8c5b5494.woff2 +0 -0
  204. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.94e1e8dc.ttf +0 -0
  205. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Bold.bf59d231.woff +0 -0
  206. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.3b1e59b3.woff2 +0 -0
  207. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.7c9bc82b.woff +0 -0
  208. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Italic.b4c20c84.ttf +0 -0
  209. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.74048478.woff +0 -0
  210. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.ba21ed5f.woff2 +0 -0
  211. khoj/interface/compiled/_next/static/media/KaTeX_SansSerif-Regular.d4d7ba48.ttf +0 -0
  212. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.03e9641d.woff2 +0 -0
  213. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.07505710.woff +0 -0
  214. khoj/interface/compiled/_next/static/media/KaTeX_Script-Regular.fe9cbbe1.ttf +0 -0
  215. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.e1e279cb.woff +0 -0
  216. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.eae34984.woff2 +0 -0
  217. khoj/interface/compiled/_next/static/media/KaTeX_Size1-Regular.fabc004a.ttf +0 -0
  218. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.57727022.woff +0 -0
  219. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.5916a24f.woff2 +0 -0
  220. khoj/interface/compiled/_next/static/media/KaTeX_Size2-Regular.d6b476ec.ttf +0 -0
  221. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.9acaf01c.woff +0 -0
  222. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.a144ef58.ttf +0 -0
  223. khoj/interface/compiled/_next/static/media/KaTeX_Size3-Regular.b4230e7e.woff2 +0 -0
  224. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.10d95fd3.woff2 +0 -0
  225. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.7a996c9d.woff +0 -0
  226. khoj/interface/compiled/_next/static/media/KaTeX_Size4-Regular.fbccdabe.ttf +0 -0
  227. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.6258592b.woff +0 -0
  228. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.a8709e36.woff2 +0 -0
  229. khoj/interface/compiled/_next/static/media/KaTeX_Typewriter-Regular.d97aaf4a.ttf +0 -0
  230. khoj/interface/compiled/_next/static/media/a6ecd16fa044d500-s.p.woff2 +0 -0
  231. khoj/interface/compiled/_next/static/media/bd82c78e5b7b3fe9-s.p.woff2 +0 -0
  232. khoj/interface/compiled/_next/static/media/c32c8052c071fc42-s.woff2 +0 -0
  233. khoj/interface/compiled/_next/static/media/c4250770ab8708b6-s.p.woff2 +0 -0
  234. khoj/interface/compiled/_next/static/media/e098aaaecc9cfbb2-s.p.woff2 +0 -0
  235. khoj/interface/compiled/_next/static/media/flags.3afdda2f.webp +0 -0
  236. khoj/interface/compiled/_next/static/media/flags@2x.5fbe9fc1.webp +0 -0
  237. khoj/interface/compiled/_next/static/media/globe.98e105ca.webp +0 -0
  238. khoj/interface/compiled/_next/static/media/globe@2x.974df6f8.webp +0 -0
  239. khoj/interface/compiled/agents/index.html +1 -0
  240. khoj/interface/compiled/agents/index.txt +7 -0
  241. khoj/interface/compiled/agents.svg +6 -0
  242. khoj/interface/compiled/assets/icons/khoj_lantern.ico +0 -0
  243. khoj/interface/compiled/assets/icons/khoj_lantern.svg +100 -0
  244. khoj/interface/compiled/assets/icons/khoj_lantern_1200x1200.png +0 -0
  245. khoj/interface/compiled/assets/icons/khoj_lantern_128x128.png +0 -0
  246. khoj/interface/compiled/assets/icons/khoj_lantern_128x128_dark.png +0 -0
  247. khoj/interface/compiled/assets/icons/khoj_lantern_256x256.png +0 -0
  248. khoj/interface/compiled/assets/icons/khoj_lantern_512x512.png +0 -0
  249. khoj/interface/compiled/assets/icons/khoj_lantern_logomarktype_1200x630.png +0 -0
  250. khoj/interface/compiled/assets/samples/desktop-browse-draw-sample.png +0 -0
  251. khoj/interface/compiled/assets/samples/desktop-plain-chat-sample.png +0 -0
  252. khoj/interface/compiled/assets/samples/desktop-remember-plan-sample.png +0 -0
  253. khoj/interface/compiled/assets/samples/phone-browse-draw-sample.png +0 -0
  254. khoj/interface/compiled/assets/samples/phone-plain-chat-sample.png +0 -0
  255. khoj/interface/compiled/assets/samples/phone-remember-plan-sample.png +0 -0
  256. khoj/interface/compiled/automation.svg +37 -0
  257. khoj/interface/compiled/automations/index.html +1 -0
  258. khoj/interface/compiled/automations/index.txt +8 -0
  259. khoj/interface/compiled/chat/index.html +1 -0
  260. khoj/interface/compiled/chat/index.txt +7 -0
  261. khoj/interface/compiled/chat.svg +24 -0
  262. khoj/interface/compiled/close.svg +5 -0
  263. khoj/interface/compiled/copy-button-success.svg +6 -0
  264. khoj/interface/compiled/copy-button.svg +5 -0
  265. khoj/interface/compiled/index.html +1 -0
  266. khoj/interface/compiled/index.txt +7 -0
  267. khoj/interface/compiled/khoj.webmanifest +76 -0
  268. khoj/interface/compiled/logo.svg +24 -0
  269. khoj/interface/compiled/search/index.html +1 -0
  270. khoj/interface/compiled/search/index.txt +7 -0
  271. khoj/interface/compiled/send.svg +1 -0
  272. khoj/interface/compiled/settings/index.html +1 -0
  273. khoj/interface/compiled/settings/index.txt +9 -0
  274. khoj/interface/compiled/share/chat/index.html +1 -0
  275. khoj/interface/compiled/share/chat/index.txt +7 -0
  276. khoj/interface/compiled/share.svg +8 -0
  277. khoj/interface/compiled/thumbs-down.svg +6 -0
  278. khoj/interface/compiled/thumbs-up.svg +6 -0
  279. khoj/interface/email/feedback.html +34 -0
  280. khoj/interface/email/magic_link.html +40 -0
  281. khoj/interface/email/task.html +37 -0
  282. khoj/interface/email/welcome.html +90 -0
  283. khoj/interface/web/.well-known/assetlinks.json +11 -0
  284. khoj/interface/web/assets/icons/agents.svg +19 -0
  285. khoj/interface/web/assets/icons/automation.svg +43 -0
  286. khoj/interface/web/assets/icons/chat.svg +24 -0
  287. khoj/interface/web/assets/icons/github.svg +1 -0
  288. khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
  289. khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
  290. khoj/interface/web/assets/icons/khoj-logo-sideways.svg +32 -0
  291. khoj/interface/web/assets/icons/khoj.svg +26 -0
  292. khoj/interface/web/assets/icons/logotype.svg +1 -0
  293. khoj/interface/web/assets/icons/search.svg +57 -0
  294. khoj/interface/web/assets/icons/sync.svg +4 -0
  295. khoj/interface/web/assets/khoj.css +237 -0
  296. khoj/interface/web/assets/utils.js +33 -0
  297. khoj/interface/web/base_config.html +445 -0
  298. khoj/interface/web/content_source_github_input.html +208 -0
  299. khoj/interface/web/login.html +310 -0
  300. khoj/interface/web/utils.html +48 -0
  301. khoj/main.py +249 -0
  302. khoj/manage.py +22 -0
  303. khoj/migrations/__init__.py +0 -0
  304. khoj/migrations/migrate_offline_chat_default_model.py +69 -0
  305. khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
  306. khoj/migrations/migrate_offline_chat_schema.py +83 -0
  307. khoj/migrations/migrate_offline_model.py +29 -0
  308. khoj/migrations/migrate_processor_config_openai.py +67 -0
  309. khoj/migrations/migrate_server_pg.py +132 -0
  310. khoj/migrations/migrate_version.py +17 -0
  311. khoj/processor/__init__.py +0 -0
  312. khoj/processor/content/__init__.py +0 -0
  313. khoj/processor/content/docx/__init__.py +0 -0
  314. khoj/processor/content/docx/docx_to_entries.py +111 -0
  315. khoj/processor/content/github/__init__.py +0 -0
  316. khoj/processor/content/github/github_to_entries.py +226 -0
  317. khoj/processor/content/images/__init__.py +0 -0
  318. khoj/processor/content/images/image_to_entries.py +117 -0
  319. khoj/processor/content/markdown/__init__.py +0 -0
  320. khoj/processor/content/markdown/markdown_to_entries.py +160 -0
  321. khoj/processor/content/notion/notion_to_entries.py +259 -0
  322. khoj/processor/content/org_mode/__init__.py +0 -0
  323. khoj/processor/content/org_mode/org_to_entries.py +226 -0
  324. khoj/processor/content/org_mode/orgnode.py +532 -0
  325. khoj/processor/content/pdf/__init__.py +0 -0
  326. khoj/processor/content/pdf/pdf_to_entries.py +119 -0
  327. khoj/processor/content/plaintext/__init__.py +0 -0
  328. khoj/processor/content/plaintext/plaintext_to_entries.py +117 -0
  329. khoj/processor/content/text_to_entries.py +296 -0
  330. khoj/processor/conversation/__init__.py +0 -0
  331. khoj/processor/conversation/anthropic/__init__.py +0 -0
  332. khoj/processor/conversation/anthropic/anthropic_chat.py +243 -0
  333. khoj/processor/conversation/anthropic/utils.py +217 -0
  334. khoj/processor/conversation/google/__init__.py +0 -0
  335. khoj/processor/conversation/google/gemini_chat.py +253 -0
  336. khoj/processor/conversation/google/utils.py +260 -0
  337. khoj/processor/conversation/offline/__init__.py +0 -0
  338. khoj/processor/conversation/offline/chat_model.py +308 -0
  339. khoj/processor/conversation/offline/utils.py +80 -0
  340. khoj/processor/conversation/offline/whisper.py +15 -0
  341. khoj/processor/conversation/openai/__init__.py +0 -0
  342. khoj/processor/conversation/openai/gpt.py +243 -0
  343. khoj/processor/conversation/openai/utils.py +232 -0
  344. khoj/processor/conversation/openai/whisper.py +13 -0
  345. khoj/processor/conversation/prompts.py +1188 -0
  346. khoj/processor/conversation/utils.py +867 -0
  347. khoj/processor/embeddings.py +122 -0
  348. khoj/processor/image/generate.py +215 -0
  349. khoj/processor/speech/__init__.py +0 -0
  350. khoj/processor/speech/text_to_speech.py +51 -0
  351. khoj/processor/tools/__init__.py +0 -0
  352. khoj/processor/tools/online_search.py +472 -0
  353. khoj/processor/tools/run_code.py +179 -0
  354. khoj/routers/__init__.py +0 -0
  355. khoj/routers/api.py +760 -0
  356. khoj/routers/api_agents.py +295 -0
  357. khoj/routers/api_chat.py +1273 -0
  358. khoj/routers/api_content.py +634 -0
  359. khoj/routers/api_model.py +123 -0
  360. khoj/routers/api_phone.py +86 -0
  361. khoj/routers/api_subscription.py +144 -0
  362. khoj/routers/auth.py +307 -0
  363. khoj/routers/email.py +135 -0
  364. khoj/routers/helpers.py +2333 -0
  365. khoj/routers/notion.py +85 -0
  366. khoj/routers/research.py +364 -0
  367. khoj/routers/storage.py +63 -0
  368. khoj/routers/twilio.py +36 -0
  369. khoj/routers/web_client.py +141 -0
  370. khoj/search_filter/__init__.py +0 -0
  371. khoj/search_filter/base_filter.py +15 -0
  372. khoj/search_filter/date_filter.py +215 -0
  373. khoj/search_filter/file_filter.py +32 -0
  374. khoj/search_filter/word_filter.py +29 -0
  375. khoj/search_type/__init__.py +0 -0
  376. khoj/search_type/text_search.py +255 -0
  377. khoj/utils/__init__.py +0 -0
  378. khoj/utils/cli.py +101 -0
  379. khoj/utils/config.py +81 -0
  380. khoj/utils/constants.py +51 -0
  381. khoj/utils/fs_syncer.py +252 -0
  382. khoj/utils/helpers.py +627 -0
  383. khoj/utils/initialization.py +301 -0
  384. khoj/utils/jsonl.py +43 -0
  385. khoj/utils/models.py +47 -0
  386. khoj/utils/rawconfig.py +208 -0
  387. khoj/utils/state.py +48 -0
  388. khoj/utils/yaml.py +47 -0
  389. khoj-1.33.3.dev32.dist-info/METADATA +190 -0
  390. khoj-1.33.3.dev32.dist-info/RECORD +393 -0
  391. khoj-1.33.3.dev32.dist-info/WHEEL +4 -0
  392. khoj-1.33.3.dev32.dist-info/entry_points.txt +2 -0
  393. khoj-1.33.3.dev32.dist-info/licenses/LICENSE +661 -0
@@ -0,0 +1,259 @@
1
+ import logging
2
+ from enum import Enum
3
+ from typing import Tuple
4
+
5
+ import requests
6
+
7
+ from khoj.database.models import Entry as DbEntry
8
+ from khoj.database.models import KhojUser, NotionConfig
9
+ from khoj.processor.content.text_to_entries import TextToEntries
10
+ from khoj.utils.helpers import timer
11
+ from khoj.utils.rawconfig import Entry, NotionContentConfig
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ class NotionBlockType(Enum):
17
+ PARAGRAPH = "paragraph"
18
+ HEADING_1 = "heading_1"
19
+ HEADING_2 = "heading_2"
20
+ HEADING_3 = "heading_3"
21
+ BULLETED_LIST_ITEM = "bulleted_list_item"
22
+ NUMBERED_LIST_ITEM = "numbered_list_item"
23
+ TO_DO = "to_do"
24
+ TOGGLE = "toggle"
25
+ CHILD_PAGE = "child_page"
26
+ UNSUPPORTED = "unsupported"
27
+ BOOKMARK = "bookmark"
28
+ DIVIDER = "divider"
29
+ PDF = "pdf"
30
+ IMAGE = "image"
31
+ EMBED = "embed"
32
+ VIDEO = "video"
33
+ FILE = "file"
34
+ SYNCED_BLOCK = "synced_block"
35
+ TABLE_OF_CONTENTS = "table_of_contents"
36
+ COLUMN = "column"
37
+ EQUATION = "equation"
38
+ LINK_PREVIEW = "link_preview"
39
+ COLUMN_LIST = "column_list"
40
+ QUOTE = "quote"
41
+ BREADCRUMB = "breadcrumb"
42
+ LINK_TO_PAGE = "link_to_page"
43
+ CHILD_DATABASE = "child_database"
44
+ TEMPLATE = "template"
45
+ CALLOUT = "callout"
46
+
47
+
48
+ class NotionToEntries(TextToEntries):
49
+ def __init__(self, config: NotionConfig):
50
+ super().__init__(config)
51
+ self.config = NotionContentConfig(
52
+ token=config.token,
53
+ )
54
+ self.session = requests.Session()
55
+ if config.token:
56
+ self.session.headers.update({"Authorization": f"Bearer {config.token}", "Notion-Version": "2022-02-22"})
57
+ self.unsupported_block_types = [
58
+ NotionBlockType.BOOKMARK.value,
59
+ NotionBlockType.DIVIDER.value,
60
+ NotionBlockType.CHILD_DATABASE.value,
61
+ NotionBlockType.TEMPLATE.value,
62
+ NotionBlockType.CALLOUT.value,
63
+ NotionBlockType.UNSUPPORTED.value,
64
+ ]
65
+
66
+ self.display_block_block_types = [
67
+ NotionBlockType.PARAGRAPH.value,
68
+ NotionBlockType.HEADING_1.value,
69
+ NotionBlockType.HEADING_2.value,
70
+ NotionBlockType.HEADING_3.value,
71
+ NotionBlockType.BULLETED_LIST_ITEM.value,
72
+ NotionBlockType.NUMBERED_LIST_ITEM.value,
73
+ NotionBlockType.TO_DO.value,
74
+ NotionBlockType.TOGGLE.value,
75
+ NotionBlockType.CHILD_PAGE.value,
76
+ NotionBlockType.BOOKMARK.value,
77
+ NotionBlockType.DIVIDER.value,
78
+ ]
79
+
80
+ self.body_params = {"page_size": 100}
81
+
82
+ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
83
+ current_entries = []
84
+
85
+ # Get all pages
86
+ with timer("Getting all pages via search endpoint", logger=logger):
87
+ responses = []
88
+
89
+ while True:
90
+ result = self.session.post(
91
+ "https://api.notion.com/v1/search",
92
+ json=self.body_params,
93
+ ).json()
94
+ responses.append(result)
95
+ if result.get("has_more", False) == False:
96
+ break
97
+ else:
98
+ self.body_params.update({"start_cursor": result["next_cursor"]})
99
+
100
+ for response in responses:
101
+ with timer("Processing response", logger=logger):
102
+ pages_or_databases = response.get("results", [])
103
+
104
+ # Get all pages content
105
+ for p_or_d in pages_or_databases:
106
+ with timer(f"Processing {p_or_d['object']} {p_or_d['id']}", logger=logger):
107
+ if p_or_d["object"] == "database":
108
+ # TODO: Handle databases
109
+ continue
110
+ elif p_or_d["object"] == "page":
111
+ page_entries = self.process_page(p_or_d)
112
+ current_entries.extend(page_entries)
113
+
114
+ current_entries = TextToEntries.split_entries_by_max_tokens(current_entries, max_tokens=256)
115
+
116
+ return self.update_entries_with_ids(current_entries, user=user)
117
+
118
+ def process_page(self, page):
119
+ page_id = page["id"]
120
+ title, content = self.get_page_content(page_id)
121
+
122
+ if title == None or content == None:
123
+ return []
124
+
125
+ current_entries = []
126
+ curr_heading = ""
127
+ for block in content.get("results", []):
128
+ block_type = block.get("type")
129
+
130
+ if block_type == None:
131
+ continue
132
+ block_data = block[block_type]
133
+
134
+ if block_data.get("rich_text") == None or len(block_data["rich_text"]) == 0:
135
+ # There's no text to handle here.
136
+ continue
137
+
138
+ raw_content = ""
139
+ if block_type in ["heading_1", "heading_2", "heading_3"]:
140
+ # If the current block is a heading, we can consider the previous block processing completed.
141
+ # Add it as an entry and move on to processing the next chunk of the page.
142
+ if raw_content != "":
143
+ current_entries.append(
144
+ Entry(
145
+ compiled=raw_content,
146
+ raw=raw_content,
147
+ heading=title,
148
+ file=page["url"],
149
+ )
150
+ )
151
+ curr_heading = block_data["rich_text"][0]["plain_text"]
152
+ else:
153
+ if curr_heading != "":
154
+ # Add the last known heading to the content for additional context
155
+ raw_content = self.process_heading(curr_heading)
156
+ for text in block_data["rich_text"]:
157
+ raw_content += self.process_text(text)
158
+
159
+ if block.get("has_children", True):
160
+ raw_content += "\n"
161
+ raw_content = self.process_nested_children(
162
+ self.get_block_children(block["id"]), raw_content, block_type
163
+ )
164
+
165
+ if raw_content != "":
166
+ current_entries.append(
167
+ Entry(
168
+ compiled=raw_content,
169
+ raw=raw_content,
170
+ heading=title,
171
+ file=page["url"],
172
+ )
173
+ )
174
+ return current_entries
175
+
176
+ def process_heading(self, heading):
177
+ return f"\n<b>{heading}</b>\n"
178
+
179
+ def process_nested_children(self, children, raw_content, block_type=None):
180
+ results = children.get("results", [])
181
+ for child in results:
182
+ child_type = child.get("type")
183
+ if child_type == None:
184
+ continue
185
+ child_data = child[child_type]
186
+ if child_data.get("rich_text") and len(child_data["rich_text"]) > 0:
187
+ for text in child_data["rich_text"]:
188
+ raw_content += self.process_text(text, block_type)
189
+ if child_data.get("has_children", True):
190
+ return self.process_nested_children(self.get_block_children(child["id"]), raw_content, block_type)
191
+
192
+ return raw_content
193
+
194
+ def process_text(self, text, block_type=None):
195
+ text_type = text.get("type", None)
196
+ if text_type in self.unsupported_block_types:
197
+ return ""
198
+ if text.get("href", None):
199
+ return f"<a href='{text['href']}'>{text['plain_text']}</a>"
200
+ raw_text = text["plain_text"]
201
+ if text_type in self.display_block_block_types or block_type in self.display_block_block_types:
202
+ return f"\n{raw_text}\n"
203
+ return raw_text
204
+
205
+ def get_block_children(self, block_id):
206
+ try:
207
+ return self.session.get(f"https://api.notion.com/v1/blocks/{block_id}/children").json()
208
+ except Exception as e:
209
+ logger.error(f"Error getting children for block {block_id}: {e}")
210
+ return {}
211
+
212
+ def get_page(self, page_id):
213
+ return self.session.get(f"https://api.notion.com/v1/pages/{page_id}").json()
214
+
215
+ def get_page_children(self, page_id):
216
+ return self.session.get(f"https://api.notion.com/v1/blocks/{page_id}/children").json()
217
+
218
+ def get_page_content(self, page_id):
219
+ try:
220
+ page = self.get_page(page_id)
221
+ content = self.get_page_children(page_id)
222
+ except Exception as e:
223
+ logger.error(f"Error getting page {page_id}: {e}", exc_info=True)
224
+ return None, None
225
+ properties = page.get("properties", {})
226
+
227
+ title_field = "title"
228
+ if "Title" in properties:
229
+ title_field = "Title"
230
+ elif "Name" in properties:
231
+ title_field = "Name"
232
+ elif "Page" in properties:
233
+ title_field = "Page"
234
+ elif "Event" in properties:
235
+ title_field = "Event"
236
+ elif title_field not in properties:
237
+ logger.debug(f"Title field not found for page {page_id}. Setting title as None...")
238
+ title = None
239
+ return title, content
240
+ try:
241
+ title = page["properties"][title_field]["title"][0]["text"]["content"]
242
+ except Exception as e:
243
+ logger.warning(f"Error getting title for page {page_id}: {e}. Setting title as None...")
244
+ title = None
245
+ return title, content
246
+
247
+ def update_entries_with_ids(self, current_entries, user: KhojUser = None):
248
+ # Identify, mark and merge any new entries with previous entries
249
+ with timer("Identify new or updated entries", logger):
250
+ num_new_embeddings, num_deleted_embeddings = self.update_embeddings(
251
+ user,
252
+ current_entries,
253
+ DbEntry.EntryType.NOTION,
254
+ DbEntry.EntrySource.NOTION,
255
+ key="compiled",
256
+ logger=logger,
257
+ )
258
+
259
+ return num_new_embeddings, num_deleted_embeddings
File without changes
@@ -0,0 +1,226 @@
1
+ import logging
2
+ import re
3
+ from pathlib import Path
4
+ from typing import Dict, List, Tuple
5
+
6
+ from khoj.database.models import Entry as DbEntry
7
+ from khoj.database.models import KhojUser
8
+ from khoj.processor.content.org_mode import orgnode
9
+ from khoj.processor.content.org_mode.orgnode import Orgnode
10
+ from khoj.processor.content.text_to_entries import TextToEntries
11
+ from khoj.utils import state
12
+ from khoj.utils.helpers import timer
13
+ from khoj.utils.rawconfig import Entry
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class OrgToEntries(TextToEntries):
19
+ def __init__(self):
20
+ super().__init__()
21
+
22
+ # Define Functions
23
+ def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
24
+ deletion_file_names = set([file for file in files if files[file] == ""])
25
+ files_to_process = set(files) - deletion_file_names
26
+ files = {file: files[file] for file in files_to_process}
27
+
28
+ # Extract Entries from specified Org files
29
+ max_tokens = 256
30
+ with timer("Extract entries from specified Org files", logger):
31
+ file_to_text_map, current_entries = self.extract_org_entries(files, max_tokens=max_tokens)
32
+
33
+ with timer("Split entries by max token size supported by model", logger):
34
+ current_entries = self.split_entries_by_max_tokens(current_entries, max_tokens=max_tokens)
35
+
36
+ # Identify, mark and merge any new entries with previous entries
37
+ with timer("Identify new or updated entries", logger):
38
+ num_new_embeddings, num_deleted_embeddings = self.update_embeddings(
39
+ user,
40
+ current_entries,
41
+ DbEntry.EntryType.ORG,
42
+ DbEntry.EntrySource.COMPUTER,
43
+ "compiled",
44
+ logger,
45
+ deletion_file_names,
46
+ regenerate=regenerate,
47
+ file_to_text_map=file_to_text_map,
48
+ )
49
+
50
+ return num_new_embeddings, num_deleted_embeddings
51
+
52
+ @staticmethod
53
+ def extract_org_entries(
54
+ org_files: dict[str, str], index_heading_entries: bool = False, max_tokens=256
55
+ ) -> Tuple[Dict, List[Entry]]:
56
+ "Extract entries from specified Org files"
57
+ file_to_text_map, entries, entry_to_file_map = OrgToEntries.extract_org_nodes(org_files, max_tokens)
58
+ return file_to_text_map, OrgToEntries.convert_org_nodes_to_entries(
59
+ entries, entry_to_file_map, index_heading_entries
60
+ )
61
+
62
+ @staticmethod
63
+ def extract_org_nodes(
64
+ org_files: dict[str, str], max_tokens
65
+ ) -> Tuple[Dict, List[List[Orgnode]], Dict[Orgnode, str]]:
66
+ "Extract org nodes from specified org files"
67
+ entries: List[List[Orgnode]] = []
68
+ entry_to_file_map: List[Tuple[Orgnode, str]] = []
69
+ file_to_text_map = {}
70
+ for org_file in org_files:
71
+ try:
72
+ org_content = org_files[org_file]
73
+ entries, entry_to_file_map = OrgToEntries.process_single_org_file(
74
+ org_content, org_file, entries, entry_to_file_map, max_tokens
75
+ )
76
+ file_to_text_map[org_file] = org_content
77
+ except Exception as e:
78
+ logger.error(f"Unable to process file: {org_file}. Skipped indexing it.\nError; {e}", exc_info=True)
79
+
80
+ return file_to_text_map, entries, dict(entry_to_file_map)
81
+
82
+ @staticmethod
83
+ def process_single_org_file(
84
+ org_content: str,
85
+ org_file: str,
86
+ entries: List[List[Orgnode]],
87
+ entry_to_file_map: List[Tuple[Orgnode, str]],
88
+ max_tokens=256,
89
+ ancestry: Dict[int, str] = {},
90
+ ) -> Tuple[List[List[Orgnode]], List[Tuple[Orgnode, str]]]:
91
+ """Parse org_content from org_file into OrgNode entries
92
+
93
+ Recurse down org file entries, one heading level at a time,
94
+ until reach a leaf entry or the current entry tree fits max_tokens.
95
+
96
+ Parse recursion terminating entry (trees) into (a list of) OrgNode objects.
97
+ """
98
+ # Prepend the org section's heading ancestry
99
+ ancestry_string = "\n".join([f"{'*' * key} {ancestry[key]}" for key in sorted(ancestry.keys())])
100
+ org_content_with_ancestry = f"{ancestry_string}{org_content}"
101
+
102
+ # If content is small or content has no children headings, save it as a single entry
103
+ # Note: This is the terminating condition for this recursive function
104
+ if len(TextToEntries.tokenizer(org_content_with_ancestry)) <= max_tokens or not re.search(
105
+ rf"^\*{{{len(ancestry)+1},}}\s", org_content, re.MULTILINE
106
+ ):
107
+ orgnode_content_with_ancestry = orgnode.makelist(org_content_with_ancestry, org_file)
108
+ entry_to_file_map += zip(orgnode_content_with_ancestry, [org_file] * len(orgnode_content_with_ancestry))
109
+ entries.extend([orgnode_content_with_ancestry])
110
+ return entries, entry_to_file_map
111
+
112
+ # Split this entry tree into sections by the next heading level in it
113
+ # Increment heading level until able to split entry into sections or reach max heading level
114
+ # A successful split will result in at least 2 sections
115
+ max_heading_level = 100
116
+ next_heading_level = len(ancestry)
117
+ sections: List[str] = []
118
+ while len(sections) < 2 and next_heading_level < max_heading_level:
119
+ next_heading_level += 1
120
+ sections = re.split(rf"(\n|^)(?=[*]{{{next_heading_level}}} .+\n?)", org_content, flags=re.MULTILINE)
121
+
122
+ # If unable to split entry into sections, log error and skip indexing it
123
+ if next_heading_level == max_heading_level:
124
+ logger.error(f"Unable to split current entry chunk: {org_content_with_ancestry[:20]}. Skip indexing it.")
125
+ return entries, entry_to_file_map
126
+
127
+ # Recurse down each non-empty section after parsing its body, heading and ancestry
128
+ for section in sections:
129
+ # Skip empty sections
130
+ if section.strip() == "":
131
+ continue
132
+
133
+ # Extract the section body and (when present) the heading
134
+ current_ancestry = ancestry.copy()
135
+ first_non_empty_line = [line for line in section.split("\n") if line.strip() != ""][0]
136
+ # If first non-empty line is a heading with expected heading level
137
+ if re.search(rf"^\*{{{next_heading_level}}}\s", first_non_empty_line):
138
+ # Extract the section body without the heading
139
+ current_section_body = "\n".join(section.split(first_non_empty_line, 1)[1:])
140
+ # Parse the section heading into current section ancestry
141
+ current_section_title = first_non_empty_line[next_heading_level:].strip()
142
+ current_ancestry[next_heading_level] = current_section_title
143
+ # Else process the section as just body text
144
+ else:
145
+ current_section_body = section
146
+
147
+ # Recurse down children of the current entry
148
+ OrgToEntries.process_single_org_file(
149
+ current_section_body,
150
+ org_file,
151
+ entries,
152
+ entry_to_file_map,
153
+ max_tokens,
154
+ current_ancestry,
155
+ )
156
+
157
+ return entries, entry_to_file_map
158
+
159
+ @staticmethod
160
+ def convert_org_nodes_to_entries(
161
+ parsed_entries: List[List[Orgnode]],
162
+ entry_to_file_map: Dict[Orgnode, str],
163
+ index_heading_entries: bool = False,
164
+ ) -> List[Entry]:
165
+ """
166
+ Convert OrgNode lists into list of Entry objects
167
+
168
+ Each list of OrgNodes is a parsed parent org tree or leaf node.
169
+ Convert each list of these OrgNodes into a single Entry.
170
+ """
171
+ entries: List[Entry] = []
172
+ for entry_group in parsed_entries:
173
+ entry_heading, entry_compiled, entry_raw = "", "", ""
174
+ for parsed_entry in entry_group:
175
+ if not parsed_entry.hasBody and not index_heading_entries:
176
+ # Ignore title notes i.e notes with just headings and empty body
177
+ continue
178
+
179
+ todo_str = f"{parsed_entry.todo} " if parsed_entry.todo else ""
180
+
181
+ # Set base level to current org-node tree's root heading level
182
+ if not entry_heading and parsed_entry.level > 0:
183
+ base_level = parsed_entry.level
184
+ # Indent entry by 1 heading level as ancestry is prepended as top level heading
185
+ heading = f"{'*' * (parsed_entry.level-base_level+2)} {todo_str}" if parsed_entry.level > 0 else ""
186
+ if parsed_entry.heading:
187
+ heading += f"{parsed_entry.heading}."
188
+
189
+ # Prepend ancestor headings, filename as top heading to root parent entry for context
190
+ # Children nodes do not need ancestors trail as root parent node will have it
191
+ if not entry_heading:
192
+ ancestors_trail = " / ".join(parsed_entry.ancestors) or Path(entry_to_file_map[parsed_entry])
193
+ heading = f"* {ancestors_trail}\n{heading}" if heading else f"* {ancestors_trail}."
194
+
195
+ compiled = heading
196
+
197
+ if parsed_entry.tags:
198
+ tags_str = " ".join(parsed_entry.tags)
199
+ compiled += f"\t {tags_str}."
200
+
201
+ if parsed_entry.closed:
202
+ compiled += f'\n Closed on {parsed_entry.closed.strftime("%Y-%m-%d")}.'
203
+
204
+ if parsed_entry.scheduled:
205
+ compiled += f'\n Scheduled for {parsed_entry.scheduled.strftime("%Y-%m-%d")}.'
206
+
207
+ if parsed_entry.hasBody:
208
+ compiled += f"\n {parsed_entry.body}"
209
+
210
+ # Add the sub-entry contents to the entry
211
+ entry_compiled += compiled
212
+ entry_raw += f"{parsed_entry}"
213
+ if not entry_heading:
214
+ entry_heading = heading
215
+
216
+ if entry_compiled:
217
+ entries.append(
218
+ Entry(
219
+ compiled=entry_compiled,
220
+ raw=entry_raw,
221
+ heading=entry_heading,
222
+ file=entry_to_file_map[parsed_entry],
223
+ )
224
+ )
225
+
226
+ return entries