khoj 1.27.2.dev130__py3-none-any.whl → 1.27.2.dev167__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +1 -1
- khoj/database/adapters/__init__.py +16 -2
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1467-b331e469fe411347.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1603-c1568f45947e9f2c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3423-ff7402ae1dd66592.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8423-e80647edf6c92c27.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-bfc70b16ba5e51b4.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-340bcf53abf6a2cc.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{webpack-8ae5ce45161bd98e.js → webpack-878569182b3af4c6.js} +1 -1
- khoj/interface/compiled/_next/static/css/{e9c5fe555dd3050b.css → a795ee88875f4853.css} +1 -1
- khoj/interface/compiled/_next/static/css/{b70402177a7c3207.css → d738728883c68af8.css} +1 -1
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/factchecker/index.html +1 -1
- khoj/interface/compiled/factchecker/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/offline/chat_model.py +8 -1
- khoj/processor/conversation/openai/utils.py +7 -0
- khoj/processor/conversation/prompts.py +36 -38
- khoj/processor/conversation/utils.py +15 -8
- khoj/processor/embeddings.py +4 -4
- khoj/processor/tools/online_search.py +8 -4
- khoj/routers/api_chat.py +38 -25
- khoj/routers/helpers.py +16 -1
- khoj/routers/research.py +17 -18
- khoj/utils/helpers.py +12 -3
- {khoj-1.27.2.dev130.dist-info → khoj-1.27.2.dev167.dist-info}/METADATA +2 -2
- {khoj-1.27.2.dev130.dist-info → khoj-1.27.2.dev167.dist-info}/RECORD +48 -48
- khoj/interface/compiled/_next/static/chunks/1467-5a191c1cd5bf0b83.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1603-5d70d9dfcdcb1f10.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3423-fa918f4e5365a35e.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8423-3ad0bfb299801220.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-7dc98df9c88828f0.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-d887f55fe6d4f35d.js +0 -1
- /khoj/interface/compiled/_next/static/chunks/{1970-444843bea1d17d61.js → 1970-90dd510762d820ba.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9417-19cfd1a9cb758e71.js → 9417-951f46451a8dd6d7.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/app/{page-d46244282af16509.js → page-f249666a0cbdaa0d.js} +0 -0
- /khoj/interface/compiled/_next/static/{N19uqHAJYqRAVxvuVwHfE → vUFFjGuewOr_h39o6kbfT}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{N19uqHAJYqRAVxvuVwHfE → vUFFjGuewOr_h39o6kbfT}/_ssgManifest.js +0 -0
- {khoj-1.27.2.dev130.dist-info → khoj-1.27.2.dev167.dist-info}/WHEEL +0 -0
- {khoj-1.27.2.dev130.dist-info → khoj-1.27.2.dev167.dist-info}/entry_points.txt +0 -0
- {khoj-1.27.2.dev130.dist-info → khoj-1.27.2.dev167.dist-info}/licenses/LICENSE +0 -0
@@ -1,9 +1,11 @@
|
|
1
1
|
import base64
|
2
|
+
import json
|
2
3
|
import logging
|
3
4
|
import math
|
4
5
|
import mimetypes
|
5
6
|
import os
|
6
7
|
import queue
|
8
|
+
import uuid
|
7
9
|
from dataclasses import dataclass
|
8
10
|
from datetime import datetime
|
9
11
|
from enum import Enum
|
@@ -134,7 +136,11 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A
|
|
134
136
|
for chat in conversation_history.get("chat", [])[-n:]:
|
135
137
|
if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder", "summarize"]:
|
136
138
|
chat_history += f"User: {chat['intent']['query']}\n"
|
137
|
-
|
139
|
+
|
140
|
+
if chat["intent"].get("inferred-queries"):
|
141
|
+
chat_history += f'Khoj: {{"queries": {chat["intent"].get("inferred-queries")}}}\n'
|
142
|
+
|
143
|
+
chat_history += f"{agent_name}: {chat['message']}\n\n"
|
138
144
|
elif chat["by"] == "khoj" and ("text-to-image" in chat["intent"].get("type")):
|
139
145
|
chat_history += f"User: {chat['intent']['query']}\n"
|
140
146
|
chat_history += f"{agent_name}: [generated image redacted for space]\n"
|
@@ -185,6 +191,7 @@ class ChatEvent(Enum):
|
|
185
191
|
MESSAGE = "message"
|
186
192
|
REFERENCES = "references"
|
187
193
|
STATUS = "status"
|
194
|
+
METADATA = "metadata"
|
188
195
|
|
189
196
|
|
190
197
|
def message_to_log(
|
@@ -232,12 +239,14 @@ def save_to_conversation_log(
|
|
232
239
|
train_of_thought: List[Any] = [],
|
233
240
|
):
|
234
241
|
user_message_time = user_message_time or datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
242
|
+
turn_id = tracer.get("mid") or str(uuid.uuid4())
|
235
243
|
updated_conversation = message_to_log(
|
236
244
|
user_message=q,
|
237
245
|
chat_response=chat_response,
|
238
246
|
user_message_metadata={
|
239
247
|
"created": user_message_time,
|
240
248
|
"images": query_images,
|
249
|
+
"turnId": turn_id,
|
241
250
|
},
|
242
251
|
khoj_message_metadata={
|
243
252
|
"context": compiled_references,
|
@@ -246,6 +255,7 @@ def save_to_conversation_log(
|
|
246
255
|
"codeContext": code_results,
|
247
256
|
"automationId": automation_id,
|
248
257
|
"trainOfThought": train_of_thought,
|
258
|
+
"turnId": turn_id,
|
249
259
|
},
|
250
260
|
conversation_log=meta_log.get("chat", []),
|
251
261
|
train_of_thought=train_of_thought,
|
@@ -501,15 +511,12 @@ def commit_conversation_trace(
|
|
501
511
|
Returns the path to the repository.
|
502
512
|
"""
|
503
513
|
# Serialize session, system message and response to yaml
|
504
|
-
system_message_yaml =
|
505
|
-
response_yaml =
|
514
|
+
system_message_yaml = json.dumps(system_message, ensure_ascii=False, sort_keys=False)
|
515
|
+
response_yaml = json.dumps(response, ensure_ascii=False, sort_keys=False)
|
506
516
|
formatted_session = [{"role": message.role, "content": message.content} for message in session]
|
507
|
-
session_yaml =
|
517
|
+
session_yaml = json.dumps(formatted_session, ensure_ascii=False, sort_keys=False)
|
508
518
|
query = (
|
509
|
-
|
510
|
-
.strip()
|
511
|
-
.removeprefix("'")
|
512
|
-
.removesuffix("'")
|
519
|
+
json.dumps(session[-1].content, ensure_ascii=False, sort_keys=False).strip().removeprefix("'").removesuffix("'")
|
513
520
|
) # Extract serialized query from chat session
|
514
521
|
|
515
522
|
# Extract chat metadata for session
|
khoj/processor/embeddings.py
CHANGED
@@ -13,7 +13,7 @@ from tenacity import (
|
|
13
13
|
)
|
14
14
|
from torch import nn
|
15
15
|
|
16
|
-
from khoj.utils.helpers import get_device, merge_dicts, timer
|
16
|
+
from khoj.utils.helpers import fix_json_dict, get_device, merge_dicts, timer
|
17
17
|
from khoj.utils.rawconfig import SearchResponse
|
18
18
|
|
19
19
|
logger = logging.getLogger(__name__)
|
@@ -31,9 +31,9 @@ class EmbeddingsModel:
|
|
31
31
|
):
|
32
32
|
default_query_encode_kwargs = {"show_progress_bar": False, "normalize_embeddings": True}
|
33
33
|
default_docs_encode_kwargs = {"show_progress_bar": True, "normalize_embeddings": True}
|
34
|
-
self.query_encode_kwargs = merge_dicts(query_encode_kwargs, default_query_encode_kwargs)
|
35
|
-
self.docs_encode_kwargs = merge_dicts(docs_encode_kwargs, default_docs_encode_kwargs)
|
36
|
-
self.model_kwargs = merge_dicts(model_kwargs, {"device": get_device()})
|
34
|
+
self.query_encode_kwargs = merge_dicts(fix_json_dict(query_encode_kwargs), default_query_encode_kwargs)
|
35
|
+
self.docs_encode_kwargs = merge_dicts(fix_json_dict(docs_encode_kwargs), default_docs_encode_kwargs)
|
36
|
+
self.model_kwargs = merge_dicts(fix_json_dict(model_kwargs), {"device": get_device()})
|
37
37
|
self.model_name = model_name
|
38
38
|
self.inference_endpoint = embeddings_inference_endpoint
|
39
39
|
self.api_key = embeddings_inference_endpoint_api_key
|
@@ -54,6 +54,7 @@ OLOSTEP_QUERY_PARAMS = {
|
|
54
54
|
}
|
55
55
|
|
56
56
|
DEFAULT_MAX_WEBPAGES_TO_READ = 1
|
57
|
+
MAX_WEBPAGES_TO_INFER = 10
|
57
58
|
|
58
59
|
|
59
60
|
async def search_online(
|
@@ -157,13 +158,16 @@ async def read_webpages(
|
|
157
158
|
query_images: List[str] = None,
|
158
159
|
agent: Agent = None,
|
159
160
|
tracer: dict = {},
|
161
|
+
max_webpages_to_read: int = DEFAULT_MAX_WEBPAGES_TO_READ,
|
160
162
|
):
|
161
163
|
"Infer web pages to read from the query and extract relevant information from them"
|
162
164
|
logger.info(f"Inferring web pages to read")
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
165
|
+
urls = await infer_webpage_urls(
|
166
|
+
query, conversation_history, location, user, query_images, agent=agent, tracer=tracer
|
167
|
+
)
|
168
|
+
|
169
|
+
# Get the top 10 web pages to read
|
170
|
+
urls = urls[:max_webpages_to_read]
|
167
171
|
|
168
172
|
logger.info(f"Reading web pages at: {urls}")
|
169
173
|
if send_status_func:
|
khoj/routers/api_chat.py
CHANGED
@@ -31,6 +31,7 @@ from khoj.processor.speech.text_to_speech import generate_text_to_speech
|
|
31
31
|
from khoj.processor.tools.online_search import read_webpages, search_online
|
32
32
|
from khoj.processor.tools.run_code import run_code
|
33
33
|
from khoj.routers.api import extract_references_and_questions
|
34
|
+
from khoj.routers.email import send_query_feedback
|
34
35
|
from khoj.routers.helpers import (
|
35
36
|
ApiImageRateLimiter,
|
36
37
|
ApiUserRateLimiter,
|
@@ -38,13 +39,14 @@ from khoj.routers.helpers import (
|
|
38
39
|
ChatRequestBody,
|
39
40
|
CommonQueryParams,
|
40
41
|
ConversationCommandRateLimiter,
|
42
|
+
DeleteMessageRequestBody,
|
43
|
+
FeedbackData,
|
41
44
|
agenerate_chat_response,
|
42
45
|
aget_relevant_information_sources,
|
43
46
|
aget_relevant_output_modes,
|
44
47
|
construct_automation_created_message,
|
45
48
|
create_automation,
|
46
49
|
extract_relevant_info,
|
47
|
-
extract_relevant_summary,
|
48
50
|
generate_excalidraw_diagram,
|
49
51
|
generate_summary_from_files,
|
50
52
|
get_conversation_command,
|
@@ -75,16 +77,12 @@ from khoj.utils.rawconfig import FileFilterRequest, FilesFilterRequest, Location
|
|
75
77
|
# Initialize Router
|
76
78
|
logger = logging.getLogger(__name__)
|
77
79
|
conversation_command_rate_limiter = ConversationCommandRateLimiter(
|
78
|
-
trial_rate_limit=
|
80
|
+
trial_rate_limit=20, subscribed_rate_limit=75, slug="command"
|
79
81
|
)
|
80
82
|
|
81
83
|
|
82
84
|
api_chat = APIRouter()
|
83
85
|
|
84
|
-
from pydantic import BaseModel
|
85
|
-
|
86
|
-
from khoj.routers.email import send_query_feedback
|
87
|
-
|
88
86
|
|
89
87
|
@api_chat.get("/conversation/file-filters/{conversation_id}", response_class=Response)
|
90
88
|
@requires(["authenticated"])
|
@@ -146,12 +144,6 @@ def remove_file_filter(request: Request, filter: FileFilterRequest) -> Response:
|
|
146
144
|
return Response(content=json.dumps(file_filters), media_type="application/json", status_code=200)
|
147
145
|
|
148
146
|
|
149
|
-
class FeedbackData(BaseModel):
|
150
|
-
uquery: str
|
151
|
-
kquery: str
|
152
|
-
sentiment: str
|
153
|
-
|
154
|
-
|
155
147
|
@api_chat.post("/feedback")
|
156
148
|
@requires(["authenticated"])
|
157
149
|
async def sendfeedback(request: Request, data: FeedbackData):
|
@@ -166,10 +158,10 @@ async def text_to_speech(
|
|
166
158
|
common: CommonQueryParams,
|
167
159
|
text: str,
|
168
160
|
rate_limiter_per_minute=Depends(
|
169
|
-
ApiUserRateLimiter(requests=
|
161
|
+
ApiUserRateLimiter(requests=30, subscribed_requests=30, window=60, slug="chat_minute")
|
170
162
|
),
|
171
163
|
rate_limiter_per_day=Depends(
|
172
|
-
ApiUserRateLimiter(requests=
|
164
|
+
ApiUserRateLimiter(requests=100, subscribed_requests=600, window=60 * 60 * 24, slug="chat_day")
|
173
165
|
),
|
174
166
|
) -> Response:
|
175
167
|
voice_model = await ConversationAdapters.aget_voice_model_config(request.user.object)
|
@@ -534,6 +526,19 @@ async def set_conversation_title(
|
|
534
526
|
)
|
535
527
|
|
536
528
|
|
529
|
+
@api_chat.delete("/conversation/message", response_class=Response)
|
530
|
+
@requires(["authenticated"])
|
531
|
+
def delete_message(request: Request, delete_request: DeleteMessageRequestBody) -> Response:
|
532
|
+
user = request.user.object
|
533
|
+
success = ConversationAdapters.delete_message_by_turn_id(
|
534
|
+
user, delete_request.conversation_id, delete_request.turn_id
|
535
|
+
)
|
536
|
+
if success:
|
537
|
+
return Response(content=json.dumps({"status": "ok"}), media_type="application/json", status_code=200)
|
538
|
+
else:
|
539
|
+
return Response(content=json.dumps({"status": "error", "message": "Message not found"}), status_code=404)
|
540
|
+
|
541
|
+
|
537
542
|
@api_chat.post("")
|
538
543
|
@requires(["authenticated"])
|
539
544
|
async def chat(
|
@@ -541,10 +546,10 @@ async def chat(
|
|
541
546
|
common: CommonQueryParams,
|
542
547
|
body: ChatRequestBody,
|
543
548
|
rate_limiter_per_minute=Depends(
|
544
|
-
ApiUserRateLimiter(requests=
|
549
|
+
ApiUserRateLimiter(requests=20, subscribed_requests=20, window=60, slug="chat_minute")
|
545
550
|
),
|
546
551
|
rate_limiter_per_day=Depends(
|
547
|
-
ApiUserRateLimiter(requests=
|
552
|
+
ApiUserRateLimiter(requests=100, subscribed_requests=600, window=60 * 60 * 24, slug="chat_day")
|
548
553
|
),
|
549
554
|
image_rate_limiter=Depends(ApiImageRateLimiter(max_images=10, max_combined_size_mb=20)),
|
550
555
|
):
|
@@ -555,6 +560,7 @@ async def chat(
|
|
555
560
|
stream = body.stream
|
556
561
|
title = body.title
|
557
562
|
conversation_id = body.conversation_id
|
563
|
+
turn_id = str(body.turn_id or uuid.uuid4())
|
558
564
|
city = body.city
|
559
565
|
region = body.region
|
560
566
|
country = body.country or get_country_name_from_timezone(body.timezone)
|
@@ -574,7 +580,7 @@ async def chat(
|
|
574
580
|
nonlocal conversation_id
|
575
581
|
|
576
582
|
tracer: dict = {
|
577
|
-
"mid":
|
583
|
+
"mid": turn_id,
|
578
584
|
"cid": conversation_id,
|
579
585
|
"uid": user.id,
|
580
586
|
"khoj_version": state.khoj_version,
|
@@ -607,7 +613,7 @@ async def chat(
|
|
607
613
|
|
608
614
|
if event_type == ChatEvent.MESSAGE:
|
609
615
|
yield data
|
610
|
-
elif event_type == ChatEvent.REFERENCES or stream:
|
616
|
+
elif event_type == ChatEvent.REFERENCES or ChatEvent.METADATA or stream:
|
611
617
|
yield json.dumps({"type": event_type.value, "data": data}, ensure_ascii=False)
|
612
618
|
except asyncio.CancelledError as e:
|
613
619
|
connection_alive = False
|
@@ -651,6 +657,11 @@ async def chat(
|
|
651
657
|
metadata=chat_metadata,
|
652
658
|
)
|
653
659
|
|
660
|
+
if is_query_empty(q):
|
661
|
+
async for result in send_llm_response("Please ask your query to get started."):
|
662
|
+
yield result
|
663
|
+
return
|
664
|
+
|
654
665
|
conversation_commands = [get_conversation_command(query=q, any_references=True)]
|
655
666
|
|
656
667
|
conversation = await ConversationAdapters.aget_conversation_by_user(
|
@@ -666,6 +677,9 @@ async def chat(
|
|
666
677
|
return
|
667
678
|
conversation_id = conversation.id
|
668
679
|
|
680
|
+
async for event in send_event(ChatEvent.METADATA, {"conversationId": str(conversation_id), "turnId": turn_id}):
|
681
|
+
yield event
|
682
|
+
|
669
683
|
agent: Agent | None = None
|
670
684
|
default_agent = await AgentAdapters.aget_default_agent()
|
671
685
|
if conversation.agent and conversation.agent != default_agent:
|
@@ -677,17 +691,11 @@ async def chat(
|
|
677
691
|
agent = default_agent
|
678
692
|
|
679
693
|
await is_ready_to_chat(user)
|
680
|
-
|
681
694
|
user_name = await aget_user_name(user)
|
682
695
|
location = None
|
683
696
|
if city or region or country or country_code:
|
684
697
|
location = LocationData(city=city, region=region, country=country, country_code=country_code)
|
685
698
|
|
686
|
-
if is_query_empty(q):
|
687
|
-
async for result in send_llm_response("Please ask your query to get started."):
|
688
|
-
yield result
|
689
|
-
return
|
690
|
-
|
691
699
|
user_message_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
692
700
|
|
693
701
|
meta_log = conversation.conversation_log
|
@@ -699,7 +707,6 @@ async def chat(
|
|
699
707
|
## Extract Document References
|
700
708
|
compiled_references: List[Any] = []
|
701
709
|
inferred_queries: List[Any] = []
|
702
|
-
defiltered_query = defilter_query(q)
|
703
710
|
|
704
711
|
if conversation_commands == [ConversationCommand.Default] or is_automated_task:
|
705
712
|
conversation_commands = await aget_relevant_information_sources(
|
@@ -730,6 +737,12 @@ async def chat(
|
|
730
737
|
if mode not in conversation_commands:
|
731
738
|
conversation_commands.append(mode)
|
732
739
|
|
740
|
+
for cmd in conversation_commands:
|
741
|
+
await conversation_command_rate_limiter.update_and_check_if_valid(request, cmd)
|
742
|
+
q = q.replace(f"/{cmd.value}", "").strip()
|
743
|
+
|
744
|
+
defiltered_query = defilter_query(q)
|
745
|
+
|
733
746
|
if conversation_commands == [ConversationCommand.Research]:
|
734
747
|
async for research_result in execute_information_collection(
|
735
748
|
request=request,
|
khoj/routers/helpers.py
CHANGED
@@ -478,6 +478,9 @@ async def infer_webpage_urls(
|
|
478
478
|
valid_unique_urls = {str(url).strip() for url in urls["links"] if is_valid_url(url)}
|
479
479
|
if is_none_or_empty(valid_unique_urls):
|
480
480
|
raise ValueError(f"Invalid list of urls: {response}")
|
481
|
+
if len(valid_unique_urls) == 0:
|
482
|
+
logger.error(f"No valid URLs found in response: {response}")
|
483
|
+
return []
|
481
484
|
return list(valid_unique_urls)
|
482
485
|
except Exception:
|
483
486
|
raise ValueError(f"Invalid list of urls: {response}")
|
@@ -1255,6 +1258,7 @@ class ChatRequestBody(BaseModel):
|
|
1255
1258
|
stream: Optional[bool] = False
|
1256
1259
|
title: Optional[str] = None
|
1257
1260
|
conversation_id: Optional[str] = None
|
1261
|
+
turn_id: Optional[str] = None
|
1258
1262
|
city: Optional[str] = None
|
1259
1263
|
region: Optional[str] = None
|
1260
1264
|
country: Optional[str] = None
|
@@ -1264,6 +1268,17 @@ class ChatRequestBody(BaseModel):
|
|
1264
1268
|
create_new: Optional[bool] = False
|
1265
1269
|
|
1266
1270
|
|
1271
|
+
class DeleteMessageRequestBody(BaseModel):
|
1272
|
+
conversation_id: str
|
1273
|
+
turn_id: str
|
1274
|
+
|
1275
|
+
|
1276
|
+
class FeedbackData(BaseModel):
|
1277
|
+
uquery: str
|
1278
|
+
kquery: str
|
1279
|
+
sentiment: str
|
1280
|
+
|
1281
|
+
|
1267
1282
|
class ApiUserRateLimiter:
|
1268
1283
|
def __init__(self, requests: int, subscribed_requests: int, window: int, slug: str):
|
1269
1284
|
self.requests = requests
|
@@ -1366,7 +1381,7 @@ class ConversationCommandRateLimiter:
|
|
1366
1381
|
self.slug = slug
|
1367
1382
|
self.trial_rate_limit = trial_rate_limit
|
1368
1383
|
self.subscribed_rate_limit = subscribed_rate_limit
|
1369
|
-
self.restricted_commands = [ConversationCommand.
|
1384
|
+
self.restricted_commands = [ConversationCommand.Research]
|
1370
1385
|
|
1371
1386
|
async def update_and_check_if_valid(self, request: Request, conversation_command: ConversationCommand):
|
1372
1387
|
if state.billing_enabled is False:
|
khoj/routers/research.py
CHANGED
@@ -1,12 +1,11 @@
|
|
1
1
|
import json
|
2
2
|
import logging
|
3
3
|
from datetime import datetime
|
4
|
-
from typing import
|
4
|
+
from typing import Callable, Dict, List, Optional
|
5
5
|
|
6
6
|
import yaml
|
7
7
|
from fastapi import Request
|
8
8
|
|
9
|
-
from khoj.database.adapters import ConversationAdapters, EntryAdapters
|
10
9
|
from khoj.database.models import Agent, KhojUser
|
11
10
|
from khoj.processor.conversation import prompts
|
12
11
|
from khoj.processor.conversation.utils import (
|
@@ -191,18 +190,18 @@ async def execute_information_collection(
|
|
191
190
|
document_results = result[0]
|
192
191
|
this_iteration.context += document_results
|
193
192
|
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
193
|
+
if not is_none_or_empty(document_results):
|
194
|
+
try:
|
195
|
+
distinct_files = {d["file"] for d in document_results}
|
196
|
+
distinct_headings = set([d["compiled"].split("\n")[0] for d in document_results if "compiled" in d])
|
197
|
+
# Strip only leading # from headings
|
198
|
+
headings_str = "\n- " + "\n- ".join(distinct_headings).replace("#", "")
|
199
|
+
async for result in send_status_func(
|
200
|
+
f"**Found {len(distinct_headings)} Notes Across {len(distinct_files)} Files**: {headings_str}"
|
201
|
+
):
|
202
|
+
yield result
|
203
|
+
except Exception as e:
|
204
|
+
logger.error(f"Error extracting document references: {e}", exc_info=True)
|
206
205
|
|
207
206
|
elif this_iteration.tool == ConversationCommand.Online:
|
208
207
|
async for result in search_online(
|
@@ -306,13 +305,13 @@ async def execute_information_collection(
|
|
306
305
|
if document_results or online_results or code_results or summarize_files:
|
307
306
|
results_data = f"**Results**:\n"
|
308
307
|
if document_results:
|
309
|
-
results_data += f"**Document References
|
308
|
+
results_data += f"**Document References**:\n{yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
310
309
|
if online_results:
|
311
|
-
results_data += f"**Online Results
|
310
|
+
results_data += f"**Online Results**:\n{yaml.dump(online_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
312
311
|
if code_results:
|
313
|
-
results_data += f"**Code Results
|
312
|
+
results_data += f"**Code Results**:\n{yaml.dump(code_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
314
313
|
if summarize_files:
|
315
|
-
results_data += f"**Summarized Files
|
314
|
+
results_data += f"**Summarized Files**:\n{yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
316
315
|
|
317
316
|
# intermediate_result = await extract_relevant_info(this_iteration.query, results_data, agent)
|
318
317
|
this_iteration.summarizedResult = results_data
|
khoj/utils/helpers.py
CHANGED
@@ -101,6 +101,15 @@ def merge_dicts(priority_dict: dict, default_dict: dict):
|
|
101
101
|
return merged_dict
|
102
102
|
|
103
103
|
|
104
|
+
def fix_json_dict(json_dict: dict) -> dict:
|
105
|
+
for k, v in json_dict.items():
|
106
|
+
if v == "True" or v == "False":
|
107
|
+
json_dict[k] = v == "True"
|
108
|
+
if isinstance(v, dict):
|
109
|
+
json_dict[k] = fix_json_dict(v)
|
110
|
+
return json_dict
|
111
|
+
|
112
|
+
|
104
113
|
def get_file_type(file_type: str, file_content: bytes) -> tuple[str, str]:
|
105
114
|
"Get file type from file mime type"
|
106
115
|
|
@@ -359,9 +368,9 @@ tool_descriptions_for_llm = {
|
|
359
368
|
|
360
369
|
function_calling_description_for_llm = {
|
361
370
|
ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.",
|
362
|
-
ConversationCommand.Online: "To search the internet for information. Provide all relevant context to ensure new searches, not
|
363
|
-
ConversationCommand.Webpage: "To extract information from
|
364
|
-
ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse information, run complex calculations, create
|
371
|
+
ConversationCommand.Online: "To search the internet for information. Useful to get a quick, broad overview from the internet. Provide all relevant context to ensure new searches, not in previous iterations, are performed.",
|
372
|
+
ConversationCommand.Webpage: "To extract information from webpages. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share the webpage links and information to extract in your query.",
|
373
|
+
ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse information, run complex calculations, create charts for user. Matplotlib, bs4, pandas, numpy, etc. are available.",
|
365
374
|
}
|
366
375
|
|
367
376
|
mode_descriptions_for_llm = {
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: khoj
|
3
|
-
Version: 1.27.2.
|
3
|
+
Version: 1.27.2.dev167
|
4
4
|
Summary: Your Second Brain
|
5
5
|
Project-URL: Homepage, https://khoj.dev
|
6
6
|
Project-URL: Documentation, https://docs.khoj.dev
|
@@ -36,7 +36,7 @@ Requires-Dist: django==5.0.9
|
|
36
36
|
Requires-Dist: docx2txt==0.8
|
37
37
|
Requires-Dist: einops==0.8.0
|
38
38
|
Requires-Dist: fastapi>=0.110.0
|
39
|
-
Requires-Dist: google-generativeai==0.
|
39
|
+
Requires-Dist: google-generativeai==0.8.3
|
40
40
|
Requires-Dist: httpx==0.25.0
|
41
41
|
Requires-Dist: huggingface-hub>=0.22.2
|
42
42
|
Requires-Dist: itsdangerous==2.1.2
|