khoj 1.28.4.dev23__py3-none-any.whl → 1.28.4.dev71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +4 -0
- khoj/database/models/__init__.py +4 -0
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1603-2418b11d8e8dacb9.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3124-a4cea2eda163128d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5538-5c4f2271e9377b74.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8423-a87e3671c4217ab6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9417-7a8a6da918d37750.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-36da67f03a173e52.js → page-ee4f0da14df15091.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-774ae3e033f938cd.js → page-da59a2b9ec07da16.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-04313ed6d8f38904.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/{page-322c37514a3a613a.js → page-5c06dadacb1b5945.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/{page-9b64f61caa5bd7f9.js → page-4f44549ba3807021.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/page-88dbd5c184dcd1e3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-9257e8817dcd6af3.js +1 -0
- khoj/interface/compiled/_next/static/css/{9d45de78fba367c1.css → 2ff098d0815fdbc1.css} +1 -1
- khoj/interface/compiled/_next/static/css/af0f36f71f368260.css +25 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/content/docx/docx_to_entries.py +25 -19
- khoj/processor/content/pdf/pdf_to_entries.py +34 -26
- khoj/processor/conversation/anthropic/anthropic_chat.py +7 -1
- khoj/processor/conversation/google/gemini_chat.py +15 -2
- khoj/processor/conversation/offline/chat_model.py +4 -0
- khoj/processor/conversation/openai/gpt.py +6 -1
- khoj/processor/conversation/prompts.py +15 -4
- khoj/processor/conversation/utils.py +69 -11
- khoj/processor/image/generate.py +2 -0
- khoj/processor/tools/online_search.py +19 -3
- khoj/processor/tools/run_code.py +4 -0
- khoj/routers/api.py +5 -0
- khoj/routers/api_chat.py +66 -13
- khoj/routers/api_content.py +78 -6
- khoj/routers/helpers.py +98 -31
- khoj/routers/research.py +9 -2
- khoj/utils/rawconfig.py +32 -0
- {khoj-1.28.4.dev23.dist-info → khoj-1.28.4.dev71.dist-info}/METADATA +1 -1
- {khoj-1.28.4.dev23.dist-info → khoj-1.28.4.dev71.dist-info}/RECORD +56 -56
- khoj/interface/compiled/_next/static/chunks/1603-c1568f45947e9f2c.js +0 -1
- khoj/interface/compiled/_next/static/chunks/5538-bf582517a8dd3faa.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8423-a1f432e4a8d9a6b0.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8840-b8d7b9f0923c6651.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9417-0d0fc7eb49a86abb.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-a369e2bda9897794.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/page-10b288c103f19468.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-959d5f097cf38c93.js +0 -1
- khoj/interface/compiled/_next/static/css/d2bc549245313f26.css +0 -25
- /khoj/interface/compiled/_next/static/{s_mKS5kELaw2v4a7_yWNP → I1jjXZh1lBQiY837mKXbn}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{s_mKS5kELaw2v4a7_yWNP → I1jjXZh1lBQiY837mKXbn}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1970-d44050bf658ae5cc.js → 1970-30985763f1451fa2.js} +0 -0
- {khoj-1.28.4.dev23.dist-info → khoj-1.28.4.dev71.dist-info}/WHEEL +0 -0
- {khoj-1.28.4.dev23.dist-info → khoj-1.28.4.dev71.dist-info}/entry_points.txt +0 -0
- {khoj-1.28.4.dev23.dist-info → khoj-1.28.4.dev71.dist-info}/licenses/LICENSE +0 -0
@@ -36,6 +36,7 @@ from khoj.utils.helpers import (
|
|
36
36
|
is_none_or_empty,
|
37
37
|
merge_dicts,
|
38
38
|
)
|
39
|
+
from khoj.utils.rawconfig import FileAttachment
|
39
40
|
|
40
41
|
logger = logging.getLogger(__name__)
|
41
42
|
|
@@ -146,7 +147,7 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A
|
|
146
147
|
chat_history += f"User: {chat['intent']['query']}\n"
|
147
148
|
|
148
149
|
if chat["intent"].get("inferred-queries"):
|
149
|
-
chat_history += f'
|
150
|
+
chat_history += f'{agent_name}: {{"queries": {chat["intent"].get("inferred-queries")}}}\n'
|
150
151
|
|
151
152
|
chat_history += f"{agent_name}: {chat['message']}\n\n"
|
152
153
|
elif chat["by"] == "khoj" and ("text-to-image" in chat["intent"].get("type")):
|
@@ -155,6 +156,16 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A
|
|
155
156
|
elif chat["by"] == "khoj" and ("excalidraw" in chat["intent"].get("type")):
|
156
157
|
chat_history += f"User: {chat['intent']['query']}\n"
|
157
158
|
chat_history += f"{agent_name}: {chat['intent']['inferred-queries'][0]}\n"
|
159
|
+
elif chat["by"] == "you":
|
160
|
+
raw_query_files = chat.get("queryFiles")
|
161
|
+
if raw_query_files:
|
162
|
+
query_files: Dict[str, str] = {}
|
163
|
+
for file in raw_query_files:
|
164
|
+
query_files[file["name"]] = file["content"]
|
165
|
+
|
166
|
+
query_file_context = gather_raw_query_files(query_files)
|
167
|
+
chat_history += f"User: {query_file_context}\n"
|
168
|
+
|
158
169
|
return chat_history
|
159
170
|
|
160
171
|
|
@@ -243,8 +254,9 @@ def save_to_conversation_log(
|
|
243
254
|
conversation_id: str = None,
|
244
255
|
automation_id: str = None,
|
245
256
|
query_images: List[str] = None,
|
246
|
-
|
257
|
+
raw_query_files: List[FileAttachment] = [],
|
247
258
|
train_of_thought: List[Any] = [],
|
259
|
+
tracer: Dict[str, Any] = {},
|
248
260
|
):
|
249
261
|
user_message_time = user_message_time or datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
250
262
|
turn_id = tracer.get("mid") or str(uuid.uuid4())
|
@@ -255,6 +267,7 @@ def save_to_conversation_log(
|
|
255
267
|
"created": user_message_time,
|
256
268
|
"images": query_images,
|
257
269
|
"turnId": turn_id,
|
270
|
+
"queryFiles": [file.model_dump(mode="json") for file in raw_query_files],
|
258
271
|
},
|
259
272
|
khoj_message_metadata={
|
260
273
|
"context": compiled_references,
|
@@ -289,25 +302,50 @@ Khoj: "{inferred_queries if ("text-to-image" in intent_type) else chat_response}
|
|
289
302
|
)
|
290
303
|
|
291
304
|
|
292
|
-
def construct_structured_message(
|
305
|
+
def construct_structured_message(
|
306
|
+
message: str, images: list[str], model_type: str, vision_enabled: bool, attached_file_context: str
|
307
|
+
):
|
293
308
|
"""
|
294
309
|
Format messages into appropriate multimedia format for supported chat model types
|
295
310
|
"""
|
296
|
-
if not images or not vision_enabled:
|
297
|
-
return message
|
298
|
-
|
299
311
|
if model_type in [
|
300
312
|
ChatModelOptions.ModelType.OPENAI,
|
301
313
|
ChatModelOptions.ModelType.GOOGLE,
|
302
314
|
ChatModelOptions.ModelType.ANTHROPIC,
|
303
315
|
]:
|
304
|
-
|
316
|
+
constructed_messages: List[Any] = [
|
305
317
|
{"type": "text", "text": message},
|
306
|
-
*[{"type": "image_url", "image_url": {"url": image}} for image in images],
|
307
318
|
]
|
319
|
+
|
320
|
+
if not is_none_or_empty(attached_file_context):
|
321
|
+
constructed_messages.append({"type": "text", "text": attached_file_context})
|
322
|
+
if vision_enabled and images:
|
323
|
+
for image in images:
|
324
|
+
constructed_messages.append({"type": "image_url", "image_url": {"url": image}})
|
325
|
+
return constructed_messages
|
326
|
+
|
327
|
+
if not is_none_or_empty(attached_file_context):
|
328
|
+
return f"{attached_file_context}\n\n{message}"
|
329
|
+
|
308
330
|
return message
|
309
331
|
|
310
332
|
|
333
|
+
def gather_raw_query_files(
|
334
|
+
query_files: Dict[str, str],
|
335
|
+
):
|
336
|
+
"""
|
337
|
+
Gather contextual data from the given (raw) files
|
338
|
+
"""
|
339
|
+
|
340
|
+
if len(query_files) == 0:
|
341
|
+
return ""
|
342
|
+
|
343
|
+
contextual_data = " ".join(
|
344
|
+
[f"File: {file_name}\n\n{file_content}\n\n" for file_name, file_content in query_files.items()]
|
345
|
+
)
|
346
|
+
return f"I have attached the following files:\n\n{contextual_data}"
|
347
|
+
|
348
|
+
|
311
349
|
def generate_chatml_messages_with_context(
|
312
350
|
user_message,
|
313
351
|
system_message=None,
|
@@ -320,6 +358,7 @@ def generate_chatml_messages_with_context(
|
|
320
358
|
vision_enabled=False,
|
321
359
|
model_type="",
|
322
360
|
context_message="",
|
361
|
+
query_files: str = None,
|
323
362
|
):
|
324
363
|
"""Generate chat messages with appropriate context from previous conversation to send to the chat model"""
|
325
364
|
# Set max prompt size from user config or based on pre-configured for model and machine specs
|
@@ -336,6 +375,8 @@ def generate_chatml_messages_with_context(
|
|
336
375
|
chatml_messages: List[ChatMessage] = []
|
337
376
|
for chat in conversation_log.get("chat", []):
|
338
377
|
message_context = ""
|
378
|
+
message_attached_files = ""
|
379
|
+
|
339
380
|
if chat["by"] == "khoj" and "excalidraw" in chat["intent"].get("type", ""):
|
340
381
|
message_context += chat.get("intent").get("inferred-queries")[0]
|
341
382
|
if not is_none_or_empty(chat.get("context")):
|
@@ -347,14 +388,27 @@ def generate_chatml_messages_with_context(
|
|
347
388
|
}
|
348
389
|
)
|
349
390
|
message_context += f"{prompts.notes_conversation.format(references=references)}\n\n"
|
391
|
+
|
392
|
+
if chat.get("queryFiles"):
|
393
|
+
raw_query_files = chat.get("queryFiles")
|
394
|
+
query_files_dict = dict()
|
395
|
+
for file in raw_query_files:
|
396
|
+
query_files_dict[file["name"]] = file["content"]
|
397
|
+
|
398
|
+
message_attached_files = gather_raw_query_files(query_files_dict)
|
399
|
+
chatml_messages.append(ChatMessage(content=message_attached_files, role="user"))
|
400
|
+
|
350
401
|
if not is_none_or_empty(chat.get("onlineContext")):
|
351
402
|
message_context += f"{prompts.online_search_conversation.format(online_results=chat.get('onlineContext'))}"
|
403
|
+
|
352
404
|
if not is_none_or_empty(message_context):
|
353
405
|
reconstructed_context_message = ChatMessage(content=message_context, role="user")
|
354
406
|
chatml_messages.insert(0, reconstructed_context_message)
|
355
407
|
|
356
408
|
role = "user" if chat["by"] == "you" else "assistant"
|
357
|
-
message_content = construct_structured_message(
|
409
|
+
message_content = construct_structured_message(
|
410
|
+
chat["message"], chat.get("images"), model_type, vision_enabled, attached_file_context=query_files
|
411
|
+
)
|
358
412
|
|
359
413
|
reconstructed_message = ChatMessage(content=message_content, role=role)
|
360
414
|
chatml_messages.insert(0, reconstructed_message)
|
@@ -366,14 +420,18 @@ def generate_chatml_messages_with_context(
|
|
366
420
|
if not is_none_or_empty(user_message):
|
367
421
|
messages.append(
|
368
422
|
ChatMessage(
|
369
|
-
content=construct_structured_message(
|
423
|
+
content=construct_structured_message(
|
424
|
+
user_message, query_images, model_type, vision_enabled, query_files
|
425
|
+
),
|
370
426
|
role="user",
|
371
427
|
)
|
372
428
|
)
|
373
429
|
if not is_none_or_empty(context_message):
|
374
430
|
messages.append(ChatMessage(content=context_message, role="user"))
|
431
|
+
|
375
432
|
if len(chatml_messages) > 0:
|
376
433
|
messages += chatml_messages
|
434
|
+
|
377
435
|
if not is_none_or_empty(system_message):
|
378
436
|
messages.append(ChatMessage(content=system_message, role="system"))
|
379
437
|
|
@@ -449,7 +507,7 @@ def truncate_messages(
|
|
449
507
|
truncated_message = encoder.decode(encoder.encode(original_question)[:remaining_tokens]).strip()
|
450
508
|
messages = [ChatMessage(content=truncated_message, role=messages[0].role)]
|
451
509
|
logger.debug(
|
452
|
-
f"Truncate current message to fit within max prompt size of {max_prompt_size} supported by {model_name} model:\n {truncated_message}"
|
510
|
+
f"Truncate current message to fit within max prompt size of {max_prompt_size} supported by {model_name} model:\n {truncated_message[:1000]}..."
|
453
511
|
)
|
454
512
|
|
455
513
|
if system_message:
|
khoj/processor/image/generate.py
CHANGED
@@ -28,6 +28,7 @@ async def text_to_image(
|
|
28
28
|
send_status_func: Optional[Callable] = None,
|
29
29
|
query_images: Optional[List[str]] = None,
|
30
30
|
agent: Agent = None,
|
31
|
+
query_files: str = None,
|
31
32
|
tracer: dict = {},
|
32
33
|
):
|
33
34
|
status_code = 200
|
@@ -69,6 +70,7 @@ async def text_to_image(
|
|
69
70
|
query_images=query_images,
|
70
71
|
user=user,
|
71
72
|
agent=agent,
|
73
|
+
query_files=query_files,
|
72
74
|
tracer=tracer,
|
73
75
|
)
|
74
76
|
|
@@ -68,6 +68,7 @@ async def search_online(
|
|
68
68
|
query_images: List[str] = None,
|
69
69
|
previous_subqueries: Set = set(),
|
70
70
|
agent: Agent = None,
|
71
|
+
query_files: str = None,
|
71
72
|
tracer: dict = {},
|
72
73
|
):
|
73
74
|
query += " ".join(custom_filters)
|
@@ -78,7 +79,14 @@ async def search_online(
|
|
78
79
|
|
79
80
|
# Breakdown the query into subqueries to get the correct answer
|
80
81
|
new_subqueries = await generate_online_subqueries(
|
81
|
-
query,
|
82
|
+
query,
|
83
|
+
conversation_history,
|
84
|
+
location,
|
85
|
+
user,
|
86
|
+
query_images=query_images,
|
87
|
+
agent=agent,
|
88
|
+
tracer=tracer,
|
89
|
+
query_files=query_files,
|
82
90
|
)
|
83
91
|
subqueries = list(new_subqueries - previous_subqueries)
|
84
92
|
response_dict: Dict[str, Dict[str, List[Dict] | Dict]] = {}
|
@@ -169,13 +177,21 @@ async def read_webpages(
|
|
169
177
|
send_status_func: Optional[Callable] = None,
|
170
178
|
query_images: List[str] = None,
|
171
179
|
agent: Agent = None,
|
172
|
-
tracer: dict = {},
|
173
180
|
max_webpages_to_read: int = DEFAULT_MAX_WEBPAGES_TO_READ,
|
181
|
+
query_files: str = None,
|
182
|
+
tracer: dict = {},
|
174
183
|
):
|
175
184
|
"Infer web pages to read from the query and extract relevant information from them"
|
176
185
|
logger.info(f"Inferring web pages to read")
|
177
186
|
urls = await infer_webpage_urls(
|
178
|
-
query,
|
187
|
+
query,
|
188
|
+
conversation_history,
|
189
|
+
location,
|
190
|
+
user,
|
191
|
+
query_images,
|
192
|
+
agent=agent,
|
193
|
+
query_files=query_files,
|
194
|
+
tracer=tracer,
|
179
195
|
)
|
180
196
|
|
181
197
|
# Get the top 10 web pages to read
|
khoj/processor/tools/run_code.py
CHANGED
@@ -36,6 +36,7 @@ async def run_code(
|
|
36
36
|
query_images: List[str] = None,
|
37
37
|
agent: Agent = None,
|
38
38
|
sandbox_url: str = SANDBOX_URL,
|
39
|
+
query_files: str = None,
|
39
40
|
tracer: dict = {},
|
40
41
|
):
|
41
42
|
# Generate Code
|
@@ -53,6 +54,7 @@ async def run_code(
|
|
53
54
|
query_images,
|
54
55
|
agent,
|
55
56
|
tracer,
|
57
|
+
query_files,
|
56
58
|
)
|
57
59
|
except Exception as e:
|
58
60
|
raise ValueError(f"Failed to generate code for {query} with error: {e}")
|
@@ -82,6 +84,7 @@ async def generate_python_code(
|
|
82
84
|
query_images: List[str] = None,
|
83
85
|
agent: Agent = None,
|
84
86
|
tracer: dict = {},
|
87
|
+
query_files: str = None,
|
85
88
|
) -> List[str]:
|
86
89
|
location = f"{location_data}" if location_data else "Unknown"
|
87
90
|
username = prompts.user_name.format(name=user.get_full_name()) if user.get_full_name() else ""
|
@@ -109,6 +112,7 @@ async def generate_python_code(
|
|
109
112
|
response_type="json_object",
|
110
113
|
user=user,
|
111
114
|
tracer=tracer,
|
115
|
+
query_files=query_files,
|
112
116
|
)
|
113
117
|
|
114
118
|
# Validate that the response is a non-empty, JSON-serializable list
|
khoj/routers/api.py
CHANGED
@@ -351,6 +351,7 @@ async def extract_references_and_questions(
|
|
351
351
|
query_images: Optional[List[str]] = None,
|
352
352
|
previous_inferred_queries: Set = set(),
|
353
353
|
agent: Agent = None,
|
354
|
+
query_files: str = None,
|
354
355
|
tracer: dict = {},
|
355
356
|
):
|
356
357
|
user = request.user.object if request.user.is_authenticated else None
|
@@ -425,6 +426,7 @@ async def extract_references_and_questions(
|
|
425
426
|
user=user,
|
426
427
|
max_prompt_size=conversation_config.max_prompt_size,
|
427
428
|
personality_context=personality_context,
|
429
|
+
query_files=query_files,
|
428
430
|
tracer=tracer,
|
429
431
|
)
|
430
432
|
elif conversation_config.model_type == ChatModelOptions.ModelType.OPENAI:
|
@@ -443,6 +445,7 @@ async def extract_references_and_questions(
|
|
443
445
|
query_images=query_images,
|
444
446
|
vision_enabled=vision_enabled,
|
445
447
|
personality_context=personality_context,
|
448
|
+
query_files=query_files,
|
446
449
|
tracer=tracer,
|
447
450
|
)
|
448
451
|
elif conversation_config.model_type == ChatModelOptions.ModelType.ANTHROPIC:
|
@@ -458,6 +461,7 @@ async def extract_references_and_questions(
|
|
458
461
|
user=user,
|
459
462
|
vision_enabled=vision_enabled,
|
460
463
|
personality_context=personality_context,
|
464
|
+
query_files=query_files,
|
461
465
|
tracer=tracer,
|
462
466
|
)
|
463
467
|
elif conversation_config.model_type == ChatModelOptions.ModelType.GOOGLE:
|
@@ -474,6 +478,7 @@ async def extract_references_and_questions(
|
|
474
478
|
user=user,
|
475
479
|
vision_enabled=vision_enabled,
|
476
480
|
personality_context=personality_context,
|
481
|
+
query_files=query_files,
|
477
482
|
tracer=tracer,
|
478
483
|
)
|
479
484
|
|
khoj/routers/api_chat.py
CHANGED
@@ -19,7 +19,6 @@ from khoj.database.adapters import (
|
|
19
19
|
AgentAdapters,
|
20
20
|
ConversationAdapters,
|
21
21
|
EntryAdapters,
|
22
|
-
FileObjectAdapters,
|
23
22
|
PublicConversationAdapters,
|
24
23
|
aget_user_name,
|
25
24
|
)
|
@@ -45,12 +44,13 @@ from khoj.routers.helpers import (
|
|
45
44
|
ConversationCommandRateLimiter,
|
46
45
|
DeleteMessageRequestBody,
|
47
46
|
FeedbackData,
|
47
|
+
acreate_title_from_history,
|
48
48
|
agenerate_chat_response,
|
49
49
|
aget_relevant_information_sources,
|
50
50
|
aget_relevant_output_modes,
|
51
51
|
construct_automation_created_message,
|
52
52
|
create_automation,
|
53
|
-
|
53
|
+
gather_raw_query_files,
|
54
54
|
generate_excalidraw_diagram,
|
55
55
|
generate_summary_from_files,
|
56
56
|
get_conversation_command,
|
@@ -76,7 +76,12 @@ from khoj.utils.helpers import (
|
|
76
76
|
get_device,
|
77
77
|
is_none_or_empty,
|
78
78
|
)
|
79
|
-
from khoj.utils.rawconfig import
|
79
|
+
from khoj.utils.rawconfig import (
|
80
|
+
ChatRequestBody,
|
81
|
+
FileFilterRequest,
|
82
|
+
FilesFilterRequest,
|
83
|
+
LocationData,
|
84
|
+
)
|
80
85
|
|
81
86
|
# Initialize Router
|
82
87
|
logger = logging.getLogger(__name__)
|
@@ -374,7 +379,7 @@ def fork_public_conversation(
|
|
374
379
|
{
|
375
380
|
"status": "ok",
|
376
381
|
"next_url": redirect_uri,
|
377
|
-
"conversation_id": new_conversation.id,
|
382
|
+
"conversation_id": str(new_conversation.id),
|
378
383
|
}
|
379
384
|
),
|
380
385
|
)
|
@@ -530,6 +535,32 @@ async def set_conversation_title(
|
|
530
535
|
)
|
531
536
|
|
532
537
|
|
538
|
+
@api_chat.post("/title")
|
539
|
+
@requires(["authenticated"])
|
540
|
+
async def generate_chat_title(
|
541
|
+
request: Request,
|
542
|
+
common: CommonQueryParams,
|
543
|
+
conversation_id: str,
|
544
|
+
):
|
545
|
+
user: KhojUser = request.user.object
|
546
|
+
conversation = await ConversationAdapters.aget_conversation_by_user(user=user, conversation_id=conversation_id)
|
547
|
+
|
548
|
+
# Conversation.title is explicitly set by the user. Do not override.
|
549
|
+
if conversation.title:
|
550
|
+
return {"status": "ok", "title": conversation.title}
|
551
|
+
|
552
|
+
if not conversation:
|
553
|
+
raise HTTPException(status_code=404, detail="Conversation not found")
|
554
|
+
|
555
|
+
new_title = await acreate_title_from_history(request.user.object, conversation=conversation)
|
556
|
+
|
557
|
+
conversation.slug = new_title
|
558
|
+
|
559
|
+
conversation.asave()
|
560
|
+
|
561
|
+
return {"status": "ok", "title": new_title}
|
562
|
+
|
563
|
+
|
533
564
|
@api_chat.delete("/conversation/message", response_class=Response)
|
534
565
|
@requires(["authenticated"])
|
535
566
|
def delete_message(request: Request, delete_request: DeleteMessageRequestBody) -> Response:
|
@@ -571,6 +602,7 @@ async def chat(
|
|
571
602
|
country_code = body.country_code or get_country_code_from_timezone(body.timezone)
|
572
603
|
timezone = body.timezone
|
573
604
|
raw_images = body.images
|
605
|
+
raw_query_files = body.files
|
574
606
|
|
575
607
|
async def event_generator(q: str, images: list[str]):
|
576
608
|
start_time = time.perf_counter()
|
@@ -582,6 +614,7 @@ async def chat(
|
|
582
614
|
q = unquote(q)
|
583
615
|
train_of_thought = []
|
584
616
|
nonlocal conversation_id
|
617
|
+
nonlocal raw_query_files
|
585
618
|
|
586
619
|
tracer: dict = {
|
587
620
|
"mid": turn_id,
|
@@ -601,6 +634,11 @@ async def chat(
|
|
601
634
|
if uploaded_image:
|
602
635
|
uploaded_images.append(uploaded_image)
|
603
636
|
|
637
|
+
query_files: Dict[str, str] = {}
|
638
|
+
if raw_query_files:
|
639
|
+
for file in raw_query_files:
|
640
|
+
query_files[file.name] = file.content
|
641
|
+
|
604
642
|
async def send_event(event_type: ChatEvent, data: str | dict):
|
605
643
|
nonlocal connection_alive, ttft, train_of_thought
|
606
644
|
if not connection_alive or await request.is_disconnected():
|
@@ -711,6 +749,8 @@ async def chat(
|
|
711
749
|
## Extract Document References
|
712
750
|
compiled_references: List[Any] = []
|
713
751
|
inferred_queries: List[Any] = []
|
752
|
+
file_filters = conversation.file_filters if conversation and conversation.file_filters else []
|
753
|
+
attached_file_context = gather_raw_query_files(query_files)
|
714
754
|
|
715
755
|
if conversation_commands == [ConversationCommand.Default] or is_automated_task:
|
716
756
|
conversation_commands = await aget_relevant_information_sources(
|
@@ -720,6 +760,7 @@ async def chat(
|
|
720
760
|
user=user,
|
721
761
|
query_images=uploaded_images,
|
722
762
|
agent=agent,
|
763
|
+
query_files=attached_file_context,
|
723
764
|
tracer=tracer,
|
724
765
|
)
|
725
766
|
|
@@ -765,6 +806,7 @@ async def chat(
|
|
765
806
|
user_name=user_name,
|
766
807
|
location=location,
|
767
808
|
file_filters=conversation.file_filters if conversation else [],
|
809
|
+
query_files=attached_file_context,
|
768
810
|
tracer=tracer,
|
769
811
|
):
|
770
812
|
if isinstance(research_result, InformationCollectionIteration):
|
@@ -804,10 +846,6 @@ async def chat(
|
|
804
846
|
response_log = "No files selected for summarization. Please add files using the section on the left."
|
805
847
|
async for result in send_llm_response(response_log):
|
806
848
|
yield result
|
807
|
-
elif len(file_filters) > 1 and not agent_has_entries:
|
808
|
-
response_log = "Only one file can be selected for summarization."
|
809
|
-
async for result in send_llm_response(response_log):
|
810
|
-
yield result
|
811
849
|
else:
|
812
850
|
async for response in generate_summary_from_files(
|
813
851
|
q=q,
|
@@ -817,6 +855,7 @@ async def chat(
|
|
817
855
|
query_images=uploaded_images,
|
818
856
|
agent=agent,
|
819
857
|
send_status_func=partial(send_event, ChatEvent.STATUS),
|
858
|
+
query_files=attached_file_context,
|
820
859
|
tracer=tracer,
|
821
860
|
):
|
822
861
|
if isinstance(response, dict) and ChatEvent.STATUS in response:
|
@@ -837,8 +876,9 @@ async def chat(
|
|
837
876
|
client_application=request.user.client_app,
|
838
877
|
conversation_id=conversation_id,
|
839
878
|
query_images=uploaded_images,
|
840
|
-
tracer=tracer,
|
841
879
|
train_of_thought=train_of_thought,
|
880
|
+
raw_query_files=raw_query_files,
|
881
|
+
tracer=tracer,
|
842
882
|
)
|
843
883
|
return
|
844
884
|
|
@@ -882,8 +922,9 @@ async def chat(
|
|
882
922
|
inferred_queries=[query_to_run],
|
883
923
|
automation_id=automation.id,
|
884
924
|
query_images=uploaded_images,
|
885
|
-
tracer=tracer,
|
886
925
|
train_of_thought=train_of_thought,
|
926
|
+
raw_query_files=raw_query_files,
|
927
|
+
tracer=tracer,
|
887
928
|
)
|
888
929
|
async for result in send_llm_response(llm_response):
|
889
930
|
yield result
|
@@ -905,6 +946,7 @@ async def chat(
|
|
905
946
|
partial(send_event, ChatEvent.STATUS),
|
906
947
|
query_images=uploaded_images,
|
907
948
|
agent=agent,
|
949
|
+
query_files=attached_file_context,
|
908
950
|
tracer=tracer,
|
909
951
|
):
|
910
952
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
@@ -950,6 +992,7 @@ async def chat(
|
|
950
992
|
custom_filters,
|
951
993
|
query_images=uploaded_images,
|
952
994
|
agent=agent,
|
995
|
+
query_files=attached_file_context,
|
953
996
|
tracer=tracer,
|
954
997
|
):
|
955
998
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
@@ -975,6 +1018,7 @@ async def chat(
|
|
975
1018
|
partial(send_event, ChatEvent.STATUS),
|
976
1019
|
query_images=uploaded_images,
|
977
1020
|
agent=agent,
|
1021
|
+
query_files=attached_file_context,
|
978
1022
|
tracer=tracer,
|
979
1023
|
):
|
980
1024
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
@@ -1015,6 +1059,7 @@ async def chat(
|
|
1015
1059
|
partial(send_event, ChatEvent.STATUS),
|
1016
1060
|
query_images=uploaded_images,
|
1017
1061
|
agent=agent,
|
1062
|
+
query_files=attached_file_context,
|
1018
1063
|
tracer=tracer,
|
1019
1064
|
):
|
1020
1065
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
@@ -1055,6 +1100,7 @@ async def chat(
|
|
1055
1100
|
send_status_func=partial(send_event, ChatEvent.STATUS),
|
1056
1101
|
query_images=uploaded_images,
|
1057
1102
|
agent=agent,
|
1103
|
+
query_files=attached_file_context,
|
1058
1104
|
tracer=tracer,
|
1059
1105
|
):
|
1060
1106
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
@@ -1086,8 +1132,10 @@ async def chat(
|
|
1086
1132
|
compiled_references=compiled_references,
|
1087
1133
|
online_results=online_results,
|
1088
1134
|
query_images=uploaded_images,
|
1089
|
-
tracer=tracer,
|
1090
1135
|
train_of_thought=train_of_thought,
|
1136
|
+
attached_file_context=attached_file_context,
|
1137
|
+
raw_query_files=raw_query_files,
|
1138
|
+
tracer=tracer,
|
1091
1139
|
)
|
1092
1140
|
content_obj = {
|
1093
1141
|
"intentType": intent_type,
|
@@ -1116,6 +1164,7 @@ async def chat(
|
|
1116
1164
|
user=user,
|
1117
1165
|
agent=agent,
|
1118
1166
|
send_status_func=partial(send_event, ChatEvent.STATUS),
|
1167
|
+
query_files=attached_file_context,
|
1119
1168
|
tracer=tracer,
|
1120
1169
|
):
|
1121
1170
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
@@ -1144,8 +1193,10 @@ async def chat(
|
|
1144
1193
|
compiled_references=compiled_references,
|
1145
1194
|
online_results=online_results,
|
1146
1195
|
query_images=uploaded_images,
|
1147
|
-
tracer=tracer,
|
1148
1196
|
train_of_thought=train_of_thought,
|
1197
|
+
attached_file_context=attached_file_context,
|
1198
|
+
raw_query_files=raw_query_files,
|
1199
|
+
tracer=tracer,
|
1149
1200
|
)
|
1150
1201
|
|
1151
1202
|
async for result in send_llm_response(json.dumps(content_obj)):
|
@@ -1171,8 +1222,10 @@ async def chat(
|
|
1171
1222
|
user_name,
|
1172
1223
|
researched_results,
|
1173
1224
|
uploaded_images,
|
1174
|
-
tracer,
|
1175
1225
|
train_of_thought,
|
1226
|
+
attached_file_context,
|
1227
|
+
raw_query_files,
|
1228
|
+
tracer,
|
1176
1229
|
)
|
1177
1230
|
|
1178
1231
|
# Send Response
|
khoj/routers/api_content.py
CHANGED
@@ -36,16 +36,18 @@ from khoj.database.models import (
|
|
36
36
|
LocalPlaintextConfig,
|
37
37
|
NotionConfig,
|
38
38
|
)
|
39
|
+
from khoj.processor.content.docx.docx_to_entries import DocxToEntries
|
40
|
+
from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
|
39
41
|
from khoj.routers.helpers import (
|
40
42
|
ApiIndexedDataLimiter,
|
41
43
|
CommonQueryParams,
|
42
44
|
configure_content,
|
45
|
+
get_file_content,
|
43
46
|
get_user_config,
|
44
47
|
update_telemetry_state,
|
45
48
|
)
|
46
49
|
from khoj.utils import constants, state
|
47
50
|
from khoj.utils.config import SearchModels
|
48
|
-
from khoj.utils.helpers import get_file_type
|
49
51
|
from khoj.utils.rawconfig import (
|
50
52
|
ContentConfig,
|
51
53
|
FullConfig,
|
@@ -375,6 +377,75 @@ async def delete_content_source(
|
|
375
377
|
return {"status": "ok"}
|
376
378
|
|
377
379
|
|
380
|
+
@api_content.post("/convert", status_code=200)
|
381
|
+
@requires(["authenticated"])
|
382
|
+
async def convert_documents(
|
383
|
+
request: Request,
|
384
|
+
files: List[UploadFile],
|
385
|
+
client: Optional[str] = None,
|
386
|
+
):
|
387
|
+
MAX_FILE_SIZE_MB = 10 # 10MB limit
|
388
|
+
MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
|
389
|
+
|
390
|
+
converted_files = []
|
391
|
+
supported_files = ["org", "markdown", "pdf", "plaintext", "docx"]
|
392
|
+
|
393
|
+
for file in files:
|
394
|
+
# Check file size first
|
395
|
+
file_size = 0
|
396
|
+
content = await file.read()
|
397
|
+
file_size = len(content)
|
398
|
+
await file.seek(0) # Reset file pointer
|
399
|
+
|
400
|
+
if file_size > MAX_FILE_SIZE_BYTES:
|
401
|
+
logger.warning(
|
402
|
+
f"Skipped converting oversized file ({file_size / 1024 / 1024:.1f}MB) sent by {client} client: {file.filename}"
|
403
|
+
)
|
404
|
+
continue
|
405
|
+
|
406
|
+
file_data = get_file_content(file)
|
407
|
+
if file_data.file_type in supported_files:
|
408
|
+
extracted_content = (
|
409
|
+
file_data.content.decode(file_data.encoding) if file_data.encoding else file_data.content
|
410
|
+
)
|
411
|
+
|
412
|
+
if file_data.file_type == "docx":
|
413
|
+
entries_per_page = DocxToEntries.extract_text(file_data.content)
|
414
|
+
annotated_pages = [
|
415
|
+
f"Page {index} of {file_data.name}:\n\n{entry}" for index, entry in enumerate(entries_per_page)
|
416
|
+
]
|
417
|
+
extracted_content = "\n".join(annotated_pages)
|
418
|
+
|
419
|
+
elif file_data.file_type == "pdf":
|
420
|
+
entries_per_page = PdfToEntries.extract_text(file_data.content)
|
421
|
+
annotated_pages = [
|
422
|
+
f"Page {index} of {file_data.name}:\n\n{entry}" for index, entry in enumerate(entries_per_page)
|
423
|
+
]
|
424
|
+
extracted_content = "\n".join(annotated_pages)
|
425
|
+
|
426
|
+
size_in_bytes = len(extracted_content.encode("utf-8"))
|
427
|
+
|
428
|
+
converted_files.append(
|
429
|
+
{
|
430
|
+
"name": file_data.name,
|
431
|
+
"content": extracted_content,
|
432
|
+
"file_type": file_data.file_type,
|
433
|
+
"size": size_in_bytes,
|
434
|
+
}
|
435
|
+
)
|
436
|
+
else:
|
437
|
+
logger.warning(f"Skipped converting unsupported file type sent by {client} client: {file.filename}")
|
438
|
+
|
439
|
+
update_telemetry_state(
|
440
|
+
request=request,
|
441
|
+
telemetry_type="api",
|
442
|
+
api="convert_documents",
|
443
|
+
client=client,
|
444
|
+
)
|
445
|
+
|
446
|
+
return Response(content=json.dumps(converted_files), media_type="application/json", status_code=200)
|
447
|
+
|
448
|
+
|
378
449
|
async def indexer(
|
379
450
|
request: Request,
|
380
451
|
files: list[UploadFile],
|
@@ -398,12 +469,13 @@ async def indexer(
|
|
398
469
|
try:
|
399
470
|
logger.info(f"📬 Updating content index via API call by {client} client")
|
400
471
|
for file in files:
|
401
|
-
|
402
|
-
file_type
|
403
|
-
|
404
|
-
|
472
|
+
file_data = get_file_content(file)
|
473
|
+
if file_data.file_type in index_files:
|
474
|
+
index_files[file_data.file_type][file_data.name] = (
|
475
|
+
file_data.content.decode(file_data.encoding) if file_data.encoding else file_data.content
|
476
|
+
)
|
405
477
|
else:
|
406
|
-
logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {
|
478
|
+
logger.warning(f"Skipped indexing unsupported file type sent by {client} client: {file_data.name}")
|
407
479
|
|
408
480
|
indexer_input = IndexerInput(
|
409
481
|
org=index_files["org"],
|