khoj 1.16.1.dev15__py3-none-any.whl → 1.17.1.dev220__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +6 -6
- khoj/database/adapters/__init__.py +56 -12
- khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
- khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
- khoj/database/models/__init__.py +35 -0
- khoj/interface/web/assets/icons/favicon-128x128.png +0 -0
- khoj/interface/web/assets/icons/favicon-256x256.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways.svg +31 -5384
- khoj/interface/web/assets/icons/khoj.svg +26 -0
- khoj/interface/web/chat.html +191 -301
- khoj/interface/web/content_source_computer_input.html +3 -3
- khoj/interface/web/content_source_github_input.html +1 -1
- khoj/interface/web/content_source_notion_input.html +1 -1
- khoj/interface/web/public_conversation.html +1 -1
- khoj/interface/web/search.html +2 -2
- khoj/interface/web/{config.html → settings.html} +30 -30
- khoj/interface/web/utils.html +1 -1
- khoj/processor/content/docx/docx_to_entries.py +4 -9
- khoj/processor/content/github/github_to_entries.py +1 -3
- khoj/processor/content/images/image_to_entries.py +4 -9
- khoj/processor/content/markdown/markdown_to_entries.py +4 -9
- khoj/processor/content/notion/notion_to_entries.py +1 -3
- khoj/processor/content/org_mode/org_to_entries.py +4 -9
- khoj/processor/content/pdf/pdf_to_entries.py +4 -9
- khoj/processor/content/plaintext/plaintext_to_entries.py +4 -9
- khoj/processor/content/text_to_entries.py +1 -3
- khoj/processor/conversation/anthropic/anthropic_chat.py +10 -4
- khoj/processor/conversation/offline/chat_model.py +19 -7
- khoj/processor/conversation/offline/utils.py +2 -0
- khoj/processor/conversation/openai/gpt.py +9 -3
- khoj/processor/conversation/prompts.py +56 -25
- khoj/processor/conversation/utils.py +5 -6
- khoj/processor/tools/online_search.py +13 -7
- khoj/routers/api.py +60 -10
- khoj/routers/api_agents.py +3 -1
- khoj/routers/api_chat.py +335 -562
- khoj/routers/api_content.py +538 -0
- khoj/routers/api_model.py +156 -0
- khoj/routers/helpers.py +339 -26
- khoj/routers/notion.py +2 -8
- khoj/routers/web_client.py +43 -256
- khoj/search_type/text_search.py +5 -4
- khoj/utils/fs_syncer.py +4 -2
- khoj/utils/rawconfig.py +6 -1
- {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev220.dist-info}/METADATA +3 -3
- {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev220.dist-info}/RECORD +51 -48
- khoj/routers/api_config.py +0 -434
- khoj/routers/indexer.py +0 -349
- {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev220.dist-info}/WHEEL +0 -0
- {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev220.dist-info}/entry_points.txt +0 -0
- {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev220.dist-info}/licenses/LICENSE +0 -0
khoj/routers/helpers.py
CHANGED
|
@@ -5,9 +5,11 @@ import io
|
|
|
5
5
|
import json
|
|
6
6
|
import logging
|
|
7
7
|
import math
|
|
8
|
+
import os
|
|
8
9
|
import re
|
|
9
10
|
from concurrent.futures import ThreadPoolExecutor
|
|
10
11
|
from datetime import datetime, timedelta, timezone
|
|
12
|
+
from enum import Enum
|
|
11
13
|
from functools import partial
|
|
12
14
|
from random import random
|
|
13
15
|
from typing import (
|
|
@@ -35,6 +37,7 @@ from PIL import Image
|
|
|
35
37
|
from starlette.authentication import has_required_scope
|
|
36
38
|
from starlette.requests import URL
|
|
37
39
|
|
|
40
|
+
from khoj.database import adapters
|
|
38
41
|
from khoj.database.adapters import (
|
|
39
42
|
AgentAdapters,
|
|
40
43
|
AutomationAdapters,
|
|
@@ -42,18 +45,31 @@ from khoj.database.adapters import (
|
|
|
42
45
|
EntryAdapters,
|
|
43
46
|
create_khoj_token,
|
|
44
47
|
get_khoj_tokens,
|
|
48
|
+
get_user_name,
|
|
49
|
+
get_user_notion_config,
|
|
50
|
+
get_user_subscription_state,
|
|
45
51
|
run_with_process_lock,
|
|
46
52
|
)
|
|
47
53
|
from khoj.database.models import (
|
|
48
54
|
ChatModelOptions,
|
|
49
55
|
ClientApplication,
|
|
50
56
|
Conversation,
|
|
57
|
+
GithubConfig,
|
|
51
58
|
KhojUser,
|
|
59
|
+
NotionConfig,
|
|
52
60
|
ProcessLock,
|
|
53
61
|
Subscription,
|
|
54
62
|
TextToImageModelConfig,
|
|
55
63
|
UserRequests,
|
|
56
64
|
)
|
|
65
|
+
from khoj.processor.content.docx.docx_to_entries import DocxToEntries
|
|
66
|
+
from khoj.processor.content.github.github_to_entries import GithubToEntries
|
|
67
|
+
from khoj.processor.content.images.image_to_entries import ImageToEntries
|
|
68
|
+
from khoj.processor.content.markdown.markdown_to_entries import MarkdownToEntries
|
|
69
|
+
from khoj.processor.content.notion.notion_to_entries import NotionToEntries
|
|
70
|
+
from khoj.processor.content.org_mode.org_to_entries import OrgToEntries
|
|
71
|
+
from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
|
|
72
|
+
from khoj.processor.content.plaintext.plaintext_to_entries import PlaintextToEntries
|
|
57
73
|
from khoj.processor.conversation import prompts
|
|
58
74
|
from khoj.processor.conversation.anthropic.anthropic_chat import (
|
|
59
75
|
anthropic_send_message_to_model,
|
|
@@ -69,11 +85,15 @@ from khoj.processor.conversation.utils import (
|
|
|
69
85
|
generate_chatml_messages_with_context,
|
|
70
86
|
save_to_conversation_log,
|
|
71
87
|
)
|
|
88
|
+
from khoj.processor.speech.text_to_speech import is_eleven_labs_enabled
|
|
72
89
|
from khoj.routers.email import is_resend_enabled, send_task_email
|
|
73
90
|
from khoj.routers.storage import upload_image
|
|
91
|
+
from khoj.routers.twilio import is_twilio_enabled
|
|
92
|
+
from khoj.search_type import text_search
|
|
74
93
|
from khoj.utils import state
|
|
75
94
|
from khoj.utils.config import OfflineChatProcessorModel
|
|
76
95
|
from khoj.utils.helpers import (
|
|
96
|
+
LRU,
|
|
77
97
|
ConversationCommand,
|
|
78
98
|
ImageIntentType,
|
|
79
99
|
is_none_or_empty,
|
|
@@ -90,6 +110,11 @@ logger = logging.getLogger(__name__)
|
|
|
90
110
|
executor = ThreadPoolExecutor(max_workers=1)
|
|
91
111
|
|
|
92
112
|
|
|
113
|
+
NOTION_OAUTH_CLIENT_ID = os.getenv("NOTION_OAUTH_CLIENT_ID")
|
|
114
|
+
NOTION_OAUTH_CLIENT_SECRET = os.getenv("NOTION_OAUTH_CLIENT_SECRET")
|
|
115
|
+
NOTION_REDIRECT_URI = os.getenv("NOTION_REDIRECT_URI")
|
|
116
|
+
|
|
117
|
+
|
|
93
118
|
def is_query_empty(query: str) -> bool:
|
|
94
119
|
return is_none_or_empty(query.strip())
|
|
95
120
|
|
|
@@ -298,7 +323,7 @@ async def aget_relevant_output_modes(query: str, conversation_history: dict, is_
|
|
|
298
323
|
response = await send_message_to_model_wrapper(relevant_mode_prompt)
|
|
299
324
|
|
|
300
325
|
try:
|
|
301
|
-
response = response.strip()
|
|
326
|
+
response = response.strip().strip('"')
|
|
302
327
|
|
|
303
328
|
if is_none_or_empty(response):
|
|
304
329
|
return ConversationCommand.Text
|
|
@@ -307,6 +332,7 @@ async def aget_relevant_output_modes(query: str, conversation_history: dict, is_
|
|
|
307
332
|
# Check whether the tool exists as a valid ConversationCommand
|
|
308
333
|
return ConversationCommand(response)
|
|
309
334
|
|
|
335
|
+
logger.error(f"Invalid output mode selected: {response}. Defaulting to text.")
|
|
310
336
|
return ConversationCommand.Text
|
|
311
337
|
except Exception:
|
|
312
338
|
logger.error(f"Invalid response for determining relevant mode: {response}")
|
|
@@ -519,9 +545,6 @@ async def send_message_to_model_wrapper(
|
|
|
519
545
|
chat_model_option or await ConversationAdapters.aget_default_conversation_config()
|
|
520
546
|
)
|
|
521
547
|
|
|
522
|
-
if conversation_config is None:
|
|
523
|
-
raise HTTPException(status_code=500, detail="Contact the server administrator to set a default chat model.")
|
|
524
|
-
|
|
525
548
|
chat_model = conversation_config.chat_model
|
|
526
549
|
max_tokens = conversation_config.max_prompt_size
|
|
527
550
|
tokenizer = conversation_config.tokenizer
|
|
@@ -755,7 +778,7 @@ async def text_to_image(
|
|
|
755
778
|
references: List[Dict[str, Any]],
|
|
756
779
|
online_results: Dict[str, Any],
|
|
757
780
|
send_status_func: Optional[Callable] = None,
|
|
758
|
-
)
|
|
781
|
+
):
|
|
759
782
|
status_code = 200
|
|
760
783
|
image = None
|
|
761
784
|
response = None
|
|
@@ -767,7 +790,8 @@ async def text_to_image(
|
|
|
767
790
|
# If the user has not configured a text to image model, return an unsupported on server error
|
|
768
791
|
status_code = 501
|
|
769
792
|
message = "Failed to generate image. Setup image generation on the server."
|
|
770
|
-
|
|
793
|
+
yield image_url or image, status_code, message, intent_type.value
|
|
794
|
+
return
|
|
771
795
|
|
|
772
796
|
text2image_model = text_to_image_config.model_name
|
|
773
797
|
chat_history = ""
|
|
@@ -779,20 +803,21 @@ async def text_to_image(
|
|
|
779
803
|
chat_history += f"Q: Prompt: {chat['intent']['query']}\n"
|
|
780
804
|
chat_history += f"A: Improved Prompt: {chat['intent']['inferred-queries'][0]}\n"
|
|
781
805
|
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
806
|
+
if send_status_func:
|
|
807
|
+
async for event in send_status_func("**Enhancing the Painting Prompt**"):
|
|
808
|
+
yield {ChatEvent.STATUS: event}
|
|
809
|
+
improved_image_prompt = await generate_better_image_prompt(
|
|
810
|
+
message,
|
|
811
|
+
chat_history,
|
|
812
|
+
location_data=location_data,
|
|
813
|
+
note_references=references,
|
|
814
|
+
online_results=online_results,
|
|
815
|
+
model_type=text_to_image_config.model_type,
|
|
816
|
+
)
|
|
793
817
|
|
|
794
818
|
if send_status_func:
|
|
795
|
-
|
|
819
|
+
async for event in send_status_func(f"**Painting to Imagine**:\n{improved_image_prompt}"):
|
|
820
|
+
yield {ChatEvent.STATUS: event}
|
|
796
821
|
|
|
797
822
|
if text_to_image_config.model_type == TextToImageModelConfig.ModelType.OPENAI:
|
|
798
823
|
with timer("Generate image with OpenAI", logger):
|
|
@@ -817,12 +842,14 @@ async def text_to_image(
|
|
|
817
842
|
logger.error(f"Image Generation blocked by OpenAI: {e}")
|
|
818
843
|
status_code = e.status_code # type: ignore
|
|
819
844
|
message = f"Image generation blocked by OpenAI: {e.message}" # type: ignore
|
|
820
|
-
|
|
845
|
+
yield image_url or image, status_code, message, intent_type.value
|
|
846
|
+
return
|
|
821
847
|
else:
|
|
822
848
|
logger.error(f"Image Generation failed with {e}", exc_info=True)
|
|
823
849
|
message = f"Image generation failed with OpenAI error: {e.message}" # type: ignore
|
|
824
850
|
status_code = e.status_code # type: ignore
|
|
825
|
-
|
|
851
|
+
yield image_url or image, status_code, message, intent_type.value
|
|
852
|
+
return
|
|
826
853
|
|
|
827
854
|
elif text_to_image_config.model_type == TextToImageModelConfig.ModelType.STABILITYAI:
|
|
828
855
|
with timer("Generate image with Stability AI", logger):
|
|
@@ -844,7 +871,8 @@ async def text_to_image(
|
|
|
844
871
|
logger.error(f"Image Generation failed with {e}", exc_info=True)
|
|
845
872
|
message = f"Image generation failed with Stability AI error: {e}"
|
|
846
873
|
status_code = e.status_code # type: ignore
|
|
847
|
-
|
|
874
|
+
yield image_url or image, status_code, message, intent_type.value
|
|
875
|
+
return
|
|
848
876
|
|
|
849
877
|
with timer("Convert image to webp", logger):
|
|
850
878
|
# Convert png to webp for faster loading
|
|
@@ -864,7 +892,7 @@ async def text_to_image(
|
|
|
864
892
|
intent_type = ImageIntentType.TEXT_TO_IMAGE_V3
|
|
865
893
|
image = base64.b64encode(webp_image_bytes).decode("utf-8")
|
|
866
894
|
|
|
867
|
-
|
|
895
|
+
yield image_url or image, status_code, improved_image_prompt, intent_type.value
|
|
868
896
|
|
|
869
897
|
|
|
870
898
|
class ApiUserRateLimiter:
|
|
@@ -902,7 +930,7 @@ class ApiUserRateLimiter:
|
|
|
902
930
|
)
|
|
903
931
|
raise HTTPException(
|
|
904
932
|
status_code=429,
|
|
905
|
-
detail="We're glad you're enjoying Khoj! You've exceeded your usage limit for today. Come back tomorrow or subscribe to increase your usage limit via [your settings](https://app.khoj.dev/
|
|
933
|
+
detail="We're glad you're enjoying Khoj! You've exceeded your usage limit for today. Come back tomorrow or subscribe to increase your usage limit via [your settings](https://app.khoj.dev/settings).",
|
|
906
934
|
)
|
|
907
935
|
|
|
908
936
|
# Add the current request to the cache
|
|
@@ -941,7 +969,7 @@ class ConversationCommandRateLimiter:
|
|
|
941
969
|
if not subscribed and count_requests >= self.trial_rate_limit:
|
|
942
970
|
raise HTTPException(
|
|
943
971
|
status_code=429,
|
|
944
|
-
detail=f"We're glad you're enjoying Khoj! You've exceeded your `/{conversation_command.value}` command usage limit for today. Subscribe to increase your usage limit via [your settings](https://app.khoj.dev/
|
|
972
|
+
detail=f"We're glad you're enjoying Khoj! You've exceeded your `/{conversation_command.value}` command usage limit for today. Subscribe to increase your usage limit via [your settings](https://app.khoj.dev/settings).",
|
|
945
973
|
)
|
|
946
974
|
await UserRequests.objects.acreate(user=user, slug=command_slug)
|
|
947
975
|
return
|
|
@@ -960,14 +988,15 @@ class ApiIndexedDataLimiter:
|
|
|
960
988
|
self.total_entries_size_limit = total_entries_size_limit
|
|
961
989
|
self.subscribed_total_entries_size = subscribed_total_entries_size_limit
|
|
962
990
|
|
|
963
|
-
def __call__(self, request: Request, files: List[UploadFile]):
|
|
991
|
+
def __call__(self, request: Request, files: List[UploadFile] = None):
|
|
964
992
|
if state.billing_enabled is False:
|
|
965
993
|
return
|
|
994
|
+
|
|
966
995
|
subscribed = has_required_scope(request, ["premium"])
|
|
967
996
|
incoming_data_size_mb = 0.0
|
|
968
997
|
deletion_file_names = set()
|
|
969
998
|
|
|
970
|
-
if not request.user.is_authenticated:
|
|
999
|
+
if not request.user.is_authenticated or not files:
|
|
971
1000
|
return
|
|
972
1001
|
|
|
973
1002
|
user: KhojUser = request.user.object
|
|
@@ -1186,3 +1215,287 @@ def construct_automation_created_message(automation: Job, crontime: str, query_t
|
|
|
1186
1215
|
|
|
1187
1216
|
Manage your automations [here](/automations).
|
|
1188
1217
|
""".strip()
|
|
1218
|
+
|
|
1219
|
+
|
|
1220
|
+
class ChatEvent(Enum):
|
|
1221
|
+
START_LLM_RESPONSE = "start_llm_response"
|
|
1222
|
+
END_LLM_RESPONSE = "end_llm_response"
|
|
1223
|
+
MESSAGE = "message"
|
|
1224
|
+
REFERENCES = "references"
|
|
1225
|
+
STATUS = "status"
|
|
1226
|
+
|
|
1227
|
+
|
|
1228
|
+
def get_user_config(user: KhojUser, request: Request, is_detailed: bool = False):
|
|
1229
|
+
user_picture = request.session.get("user", {}).get("picture")
|
|
1230
|
+
is_active = has_required_scope(request, ["premium"])
|
|
1231
|
+
has_documents = EntryAdapters.user_has_entries(user=user)
|
|
1232
|
+
|
|
1233
|
+
if not is_detailed:
|
|
1234
|
+
return {
|
|
1235
|
+
"request": request,
|
|
1236
|
+
"username": user.username if user else None,
|
|
1237
|
+
"user_photo": user_picture,
|
|
1238
|
+
"is_active": is_active,
|
|
1239
|
+
"has_documents": has_documents,
|
|
1240
|
+
"khoj_version": state.khoj_version,
|
|
1241
|
+
}
|
|
1242
|
+
|
|
1243
|
+
user_subscription_state = get_user_subscription_state(user.email)
|
|
1244
|
+
user_subscription = adapters.get_user_subscription(user.email)
|
|
1245
|
+
subscription_renewal_date = (
|
|
1246
|
+
user_subscription.renewal_date.strftime("%d %b %Y")
|
|
1247
|
+
if user_subscription and user_subscription.renewal_date
|
|
1248
|
+
else (user_subscription.created_at + timedelta(days=7)).strftime("%d %b %Y")
|
|
1249
|
+
)
|
|
1250
|
+
given_name = get_user_name(user)
|
|
1251
|
+
|
|
1252
|
+
enabled_content_sources_set = set(EntryAdapters.get_unique_file_sources(user))
|
|
1253
|
+
enabled_content_sources = {
|
|
1254
|
+
"computer": ("computer" in enabled_content_sources_set),
|
|
1255
|
+
"github": ("github" in enabled_content_sources_set),
|
|
1256
|
+
"notion": ("notion" in enabled_content_sources_set),
|
|
1257
|
+
}
|
|
1258
|
+
|
|
1259
|
+
notion_oauth_url = get_notion_auth_url(user)
|
|
1260
|
+
current_notion_config = get_user_notion_config(user)
|
|
1261
|
+
notion_token = current_notion_config.token if current_notion_config else ""
|
|
1262
|
+
|
|
1263
|
+
selected_chat_model_config = ConversationAdapters.get_conversation_config(user)
|
|
1264
|
+
chat_models = ConversationAdapters.get_conversation_processor_options().all()
|
|
1265
|
+
chat_model_options = list()
|
|
1266
|
+
for chat_model in chat_models:
|
|
1267
|
+
chat_model_options.append({"name": chat_model.chat_model, "id": chat_model.id})
|
|
1268
|
+
|
|
1269
|
+
search_model_options = adapters.get_or_create_search_models().all()
|
|
1270
|
+
all_search_model_options = list()
|
|
1271
|
+
for search_model_option in search_model_options:
|
|
1272
|
+
all_search_model_options.append({"name": search_model_option.name, "id": search_model_option.id})
|
|
1273
|
+
|
|
1274
|
+
current_search_model_option = adapters.get_user_search_model_or_default(user)
|
|
1275
|
+
|
|
1276
|
+
selected_paint_model_config = ConversationAdapters.get_user_text_to_image_model_config(user)
|
|
1277
|
+
paint_model_options = ConversationAdapters.get_text_to_image_model_options().all()
|
|
1278
|
+
all_paint_model_options = list()
|
|
1279
|
+
for paint_model in paint_model_options:
|
|
1280
|
+
all_paint_model_options.append({"name": paint_model.model_name, "id": paint_model.id})
|
|
1281
|
+
|
|
1282
|
+
voice_models = ConversationAdapters.get_voice_model_options()
|
|
1283
|
+
voice_model_options = list()
|
|
1284
|
+
for voice_model in voice_models:
|
|
1285
|
+
voice_model_options.append({"name": voice_model.name, "id": voice_model.model_id})
|
|
1286
|
+
|
|
1287
|
+
if len(voice_model_options) == 0:
|
|
1288
|
+
eleven_labs_enabled = False
|
|
1289
|
+
else:
|
|
1290
|
+
eleven_labs_enabled = is_eleven_labs_enabled()
|
|
1291
|
+
|
|
1292
|
+
selected_voice_model_config = ConversationAdapters.get_voice_model_config(user)
|
|
1293
|
+
|
|
1294
|
+
return {
|
|
1295
|
+
"request": request,
|
|
1296
|
+
# user info
|
|
1297
|
+
"username": user.username if user else None,
|
|
1298
|
+
"user_photo": user_picture,
|
|
1299
|
+
"is_active": is_active,
|
|
1300
|
+
"given_name": given_name,
|
|
1301
|
+
"phone_number": str(user.phone_number) if user.phone_number else "",
|
|
1302
|
+
"is_phone_number_verified": user.verified_phone_number,
|
|
1303
|
+
# user content settings
|
|
1304
|
+
"enabled_content_source": enabled_content_sources,
|
|
1305
|
+
"has_documents": has_documents,
|
|
1306
|
+
"notion_token": notion_token,
|
|
1307
|
+
# user model settings
|
|
1308
|
+
"search_model_options": all_search_model_options,
|
|
1309
|
+
"selected_search_model_config": current_search_model_option.id,
|
|
1310
|
+
"chat_model_options": chat_model_options,
|
|
1311
|
+
"selected_chat_model_config": selected_chat_model_config.id if selected_chat_model_config else None,
|
|
1312
|
+
"paint_model_options": all_paint_model_options,
|
|
1313
|
+
"selected_paint_model_config": selected_paint_model_config.id if selected_paint_model_config else None,
|
|
1314
|
+
"voice_model_options": voice_model_options,
|
|
1315
|
+
"selected_voice_model_config": selected_voice_model_config.model_id if selected_voice_model_config else None,
|
|
1316
|
+
# user billing info
|
|
1317
|
+
"subscription_state": user_subscription_state,
|
|
1318
|
+
"subscription_renewal_date": subscription_renewal_date,
|
|
1319
|
+
# server settings
|
|
1320
|
+
"khoj_cloud_subscription_url": os.getenv("KHOJ_CLOUD_SUBSCRIPTION_URL"),
|
|
1321
|
+
"billing_enabled": state.billing_enabled,
|
|
1322
|
+
"is_eleven_labs_enabled": eleven_labs_enabled,
|
|
1323
|
+
"is_twilio_enabled": is_twilio_enabled(),
|
|
1324
|
+
"khoj_version": state.khoj_version,
|
|
1325
|
+
"anonymous_mode": state.anonymous_mode,
|
|
1326
|
+
"notion_oauth_url": notion_oauth_url,
|
|
1327
|
+
}
|
|
1328
|
+
|
|
1329
|
+
|
|
1330
|
+
def configure_content(
|
|
1331
|
+
files: Optional[dict[str, dict[str, str]]],
|
|
1332
|
+
regenerate: bool = False,
|
|
1333
|
+
t: Optional[state.SearchType] = state.SearchType.All,
|
|
1334
|
+
user: KhojUser = None,
|
|
1335
|
+
) -> bool:
|
|
1336
|
+
success = True
|
|
1337
|
+
if t == None:
|
|
1338
|
+
t = state.SearchType.All
|
|
1339
|
+
|
|
1340
|
+
if t is not None and t in [type.value for type in state.SearchType]:
|
|
1341
|
+
t = state.SearchType(t)
|
|
1342
|
+
|
|
1343
|
+
if t is not None and not t.value in [type.value for type in state.SearchType]:
|
|
1344
|
+
logger.warning(f"🚨 Invalid search type: {t}")
|
|
1345
|
+
return False
|
|
1346
|
+
|
|
1347
|
+
search_type = t.value if t else None
|
|
1348
|
+
|
|
1349
|
+
no_documents = all([not files.get(file_type) for file_type in files])
|
|
1350
|
+
|
|
1351
|
+
if files is None:
|
|
1352
|
+
logger.warning(f"🚨 No files to process for {search_type} search.")
|
|
1353
|
+
return True
|
|
1354
|
+
|
|
1355
|
+
try:
|
|
1356
|
+
# Initialize Org Notes Search
|
|
1357
|
+
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Org.value) and files["org"]:
|
|
1358
|
+
logger.info("🦄 Setting up search for orgmode notes")
|
|
1359
|
+
# Extract Entries, Generate Notes Embeddings
|
|
1360
|
+
text_search.setup(
|
|
1361
|
+
OrgToEntries,
|
|
1362
|
+
files.get("org"),
|
|
1363
|
+
regenerate=regenerate,
|
|
1364
|
+
user=user,
|
|
1365
|
+
)
|
|
1366
|
+
except Exception as e:
|
|
1367
|
+
logger.error(f"🚨 Failed to setup org: {e}", exc_info=True)
|
|
1368
|
+
success = False
|
|
1369
|
+
|
|
1370
|
+
try:
|
|
1371
|
+
# Initialize Markdown Search
|
|
1372
|
+
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Markdown.value) and files[
|
|
1373
|
+
"markdown"
|
|
1374
|
+
]:
|
|
1375
|
+
logger.info("💎 Setting up search for markdown notes")
|
|
1376
|
+
# Extract Entries, Generate Markdown Embeddings
|
|
1377
|
+
text_search.setup(
|
|
1378
|
+
MarkdownToEntries,
|
|
1379
|
+
files.get("markdown"),
|
|
1380
|
+
regenerate=regenerate,
|
|
1381
|
+
user=user,
|
|
1382
|
+
)
|
|
1383
|
+
|
|
1384
|
+
except Exception as e:
|
|
1385
|
+
logger.error(f"🚨 Failed to setup markdown: {e}", exc_info=True)
|
|
1386
|
+
success = False
|
|
1387
|
+
|
|
1388
|
+
try:
|
|
1389
|
+
# Initialize PDF Search
|
|
1390
|
+
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Pdf.value) and files["pdf"]:
|
|
1391
|
+
logger.info("🖨️ Setting up search for pdf")
|
|
1392
|
+
# Extract Entries, Generate PDF Embeddings
|
|
1393
|
+
text_search.setup(
|
|
1394
|
+
PdfToEntries,
|
|
1395
|
+
files.get("pdf"),
|
|
1396
|
+
regenerate=regenerate,
|
|
1397
|
+
user=user,
|
|
1398
|
+
)
|
|
1399
|
+
|
|
1400
|
+
except Exception as e:
|
|
1401
|
+
logger.error(f"🚨 Failed to setup PDF: {e}", exc_info=True)
|
|
1402
|
+
success = False
|
|
1403
|
+
|
|
1404
|
+
try:
|
|
1405
|
+
# Initialize Plaintext Search
|
|
1406
|
+
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Plaintext.value) and files[
|
|
1407
|
+
"plaintext"
|
|
1408
|
+
]:
|
|
1409
|
+
logger.info("📄 Setting up search for plaintext")
|
|
1410
|
+
# Extract Entries, Generate Plaintext Embeddings
|
|
1411
|
+
text_search.setup(
|
|
1412
|
+
PlaintextToEntries,
|
|
1413
|
+
files.get("plaintext"),
|
|
1414
|
+
regenerate=regenerate,
|
|
1415
|
+
user=user,
|
|
1416
|
+
)
|
|
1417
|
+
|
|
1418
|
+
except Exception as e:
|
|
1419
|
+
logger.error(f"🚨 Failed to setup plaintext: {e}", exc_info=True)
|
|
1420
|
+
success = False
|
|
1421
|
+
|
|
1422
|
+
try:
|
|
1423
|
+
if no_documents:
|
|
1424
|
+
github_config = GithubConfig.objects.filter(user=user).prefetch_related("githubrepoconfig").first()
|
|
1425
|
+
if (
|
|
1426
|
+
search_type == state.SearchType.All.value or search_type == state.SearchType.Github.value
|
|
1427
|
+
) and github_config is not None:
|
|
1428
|
+
logger.info("🐙 Setting up search for github")
|
|
1429
|
+
# Extract Entries, Generate Github Embeddings
|
|
1430
|
+
text_search.setup(
|
|
1431
|
+
GithubToEntries,
|
|
1432
|
+
None,
|
|
1433
|
+
regenerate=regenerate,
|
|
1434
|
+
user=user,
|
|
1435
|
+
config=github_config,
|
|
1436
|
+
)
|
|
1437
|
+
|
|
1438
|
+
except Exception as e:
|
|
1439
|
+
logger.error(f"🚨 Failed to setup GitHub: {e}", exc_info=True)
|
|
1440
|
+
success = False
|
|
1441
|
+
|
|
1442
|
+
try:
|
|
1443
|
+
if no_documents:
|
|
1444
|
+
# Initialize Notion Search
|
|
1445
|
+
notion_config = NotionConfig.objects.filter(user=user).first()
|
|
1446
|
+
if (
|
|
1447
|
+
search_type == state.SearchType.All.value or search_type == state.SearchType.Notion.value
|
|
1448
|
+
) and notion_config:
|
|
1449
|
+
logger.info("🔌 Setting up search for notion")
|
|
1450
|
+
text_search.setup(
|
|
1451
|
+
NotionToEntries,
|
|
1452
|
+
None,
|
|
1453
|
+
regenerate=regenerate,
|
|
1454
|
+
user=user,
|
|
1455
|
+
config=notion_config,
|
|
1456
|
+
)
|
|
1457
|
+
|
|
1458
|
+
except Exception as e:
|
|
1459
|
+
logger.error(f"🚨 Failed to setup Notion: {e}", exc_info=True)
|
|
1460
|
+
success = False
|
|
1461
|
+
|
|
1462
|
+
try:
|
|
1463
|
+
# Initialize Image Search
|
|
1464
|
+
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Image.value) and files[
|
|
1465
|
+
"image"
|
|
1466
|
+
]:
|
|
1467
|
+
logger.info("🖼️ Setting up search for images")
|
|
1468
|
+
# Extract Entries, Generate Image Embeddings
|
|
1469
|
+
text_search.setup(
|
|
1470
|
+
ImageToEntries,
|
|
1471
|
+
files.get("image"),
|
|
1472
|
+
regenerate=regenerate,
|
|
1473
|
+
user=user,
|
|
1474
|
+
)
|
|
1475
|
+
except Exception as e:
|
|
1476
|
+
logger.error(f"🚨 Failed to setup images: {e}", exc_info=True)
|
|
1477
|
+
success = False
|
|
1478
|
+
try:
|
|
1479
|
+
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Docx.value) and files["docx"]:
|
|
1480
|
+
logger.info("📄 Setting up search for docx")
|
|
1481
|
+
text_search.setup(
|
|
1482
|
+
DocxToEntries,
|
|
1483
|
+
files.get("docx"),
|
|
1484
|
+
regenerate=regenerate,
|
|
1485
|
+
user=user,
|
|
1486
|
+
)
|
|
1487
|
+
except Exception as e:
|
|
1488
|
+
logger.error(f"🚨 Failed to setup docx: {e}", exc_info=True)
|
|
1489
|
+
success = False
|
|
1490
|
+
|
|
1491
|
+
# Invalidate Query Cache
|
|
1492
|
+
if user:
|
|
1493
|
+
state.query_cache[user.uuid] = LRU()
|
|
1494
|
+
|
|
1495
|
+
return success
|
|
1496
|
+
|
|
1497
|
+
|
|
1498
|
+
def get_notion_auth_url(user: KhojUser):
|
|
1499
|
+
if not NOTION_OAUTH_CLIENT_ID or not NOTION_OAUTH_CLIENT_SECRET or not NOTION_REDIRECT_URI:
|
|
1500
|
+
return None
|
|
1501
|
+
return f"https://api.notion.com/v1/oauth/authorize?client_id={NOTION_OAUTH_CLIENT_ID}&redirect_uri={NOTION_REDIRECT_URI}&response_type=code&state={user.uuid}"
|
khoj/routers/notion.py
CHANGED
|
@@ -11,7 +11,7 @@ from starlette.responses import RedirectResponse
|
|
|
11
11
|
|
|
12
12
|
from khoj.database.adapters import aget_user_by_uuid
|
|
13
13
|
from khoj.database.models import KhojUser, NotionConfig
|
|
14
|
-
from khoj.routers.
|
|
14
|
+
from khoj.routers.helpers import configure_content
|
|
15
15
|
from khoj.utils.state import SearchType
|
|
16
16
|
|
|
17
17
|
NOTION_OAUTH_CLIENT_ID = os.getenv("NOTION_OAUTH_CLIENT_ID")
|
|
@@ -25,12 +25,6 @@ executor = ThreadPoolExecutor()
|
|
|
25
25
|
logger = logging.getLogger(__name__)
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
def get_notion_auth_url(user: KhojUser):
|
|
29
|
-
if not NOTION_OAUTH_CLIENT_ID or not NOTION_OAUTH_CLIENT_SECRET or not NOTION_REDIRECT_URI:
|
|
30
|
-
return None
|
|
31
|
-
return f"https://api.notion.com/v1/oauth/authorize?client_id={NOTION_OAUTH_CLIENT_ID}&redirect_uri={NOTION_REDIRECT_URI}&response_type=code&state={user.uuid}"
|
|
32
|
-
|
|
33
|
-
|
|
34
28
|
async def run_in_executor(func, *args):
|
|
35
29
|
loop = asyncio.get_event_loop()
|
|
36
30
|
return await loop.run_in_executor(executor, func, *args)
|
|
@@ -86,6 +80,6 @@ async def notion_auth_callback(request: Request, background_tasks: BackgroundTas
|
|
|
86
80
|
notion_redirect = str(request.app.url_path_for("notion_config_page"))
|
|
87
81
|
|
|
88
82
|
# Trigger an async job to configure_content. Let it run without blocking the response.
|
|
89
|
-
background_tasks.add_task(run_in_executor, configure_content, {}, False, SearchType.Notion,
|
|
83
|
+
background_tasks.add_task(run_in_executor, configure_content, {}, False, SearchType.Notion, user)
|
|
90
84
|
|
|
91
85
|
return RedirectResponse(notion_redirect)
|