khoj 1.42.1.dev10__py3-none-any.whl → 1.42.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +2 -0
- khoj/database/adapters/__init__.py +9 -7
- khoj/database/models/__init__.py +9 -9
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/{2117-5a41630a2bd2eae8.js → 2117-056a00add390772b.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/7127-79a3af5138960272.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{5138-2cce449fd2454abf.js → 7211-7fedd2ee3655239c.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-1b6273baddb72146.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/page-ef89ac958e78aa81.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-d71351493e1f7c2b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/{page-45ae5e99e8a61821.js → page-4bbe55de8b080c1f.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-4505b79deb734a30.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-9a167dc9b5fcd464.js → page-e3f49c25480e3be4.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{main-876327ac335776ab.js → main-63d6432f34cdf74b.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-964e8ed3380daff1.js → webpack-e4c73eaddc365142.js} +1 -1
- khoj/interface/compiled/_next/static/css/2b1cdb68b799b876.css +1 -0
- khoj/interface/compiled/_next/static/css/440ae0f0f650dc35.css +1 -0
- khoj/interface/compiled/_next/static/css/{9c223d337a984468.css → 7017ee76c2f2cd87.css} +1 -1
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +1 -1
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +1 -1
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +1 -1
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +19 -134
- khoj/processor/conversation/anthropic/utils.py +1 -1
- khoj/processor/conversation/google/gemini_chat.py +20 -141
- khoj/processor/conversation/offline/chat_model.py +23 -153
- khoj/processor/conversation/openai/gpt.py +14 -128
- khoj/processor/conversation/prompts.py +2 -63
- khoj/processor/conversation/utils.py +94 -89
- khoj/processor/image/generate.py +16 -11
- khoj/processor/operator/__init__.py +2 -3
- khoj/processor/operator/operator_agent_binary.py +11 -11
- khoj/processor/operator/operator_environment_computer.py +2 -2
- khoj/processor/tools/online_search.py +9 -3
- khoj/processor/tools/run_code.py +5 -5
- khoj/routers/api.py +5 -527
- khoj/routers/api_automation.py +243 -0
- khoj/routers/api_chat.py +48 -129
- khoj/routers/helpers.py +373 -121
- khoj/routers/research.py +11 -43
- khoj/utils/helpers.py +0 -6
- {khoj-1.42.1.dev10.dist-info → khoj-1.42.2.dist-info}/METADATA +3 -3
- {khoj-1.42.1.dev10.dist-info → khoj-1.42.2.dist-info}/RECORD +58 -57
- khoj/interface/compiled/_next/static/chunks/7127-d3199617463d45f0.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-4e2a134ec26aa606.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/page-465741d9149dfd48.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-898079bcea5376f4.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +0 -1
- khoj/interface/compiled/_next/static/css/76c658ee459140a9.css +0 -1
- khoj/interface/compiled/_next/static/css/fca983d49c3dd1a3.css +0 -1
- /khoj/interface/compiled/_next/static/{2niR8lV9_OpGs1vdb2yMp → BDHACq0ud8EERJ3YZ4aWo}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{2niR8lV9_OpGs1vdb2yMp → BDHACq0ud8EERJ3YZ4aWo}/_ssgManifest.js +0 -0
- {khoj-1.42.1.dev10.dist-info → khoj-1.42.2.dist-info}/WHEEL +0 -0
- {khoj-1.42.1.dev10.dist-info → khoj-1.42.2.dist-info}/entry_points.txt +0 -0
- {khoj-1.42.1.dev10.dist-info → khoj-1.42.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,14 +1,11 @@
|
|
1
|
-
import asyncio
|
2
1
|
import logging
|
3
|
-
from datetime import datetime
|
2
|
+
from datetime import datetime
|
4
3
|
from typing import AsyncGenerator, Dict, List, Optional
|
5
4
|
|
6
|
-
import pyjson5
|
7
|
-
from langchain_core.messages.chat import ChatMessage
|
8
5
|
from openai.lib._pydantic import _ensure_strict_json_schema
|
9
6
|
from pydantic import BaseModel
|
10
7
|
|
11
|
-
from khoj.database.models import Agent,
|
8
|
+
from khoj.database.models import Agent, ChatMessageModel, ChatModel
|
12
9
|
from khoj.processor.conversation import prompts
|
13
10
|
from khoj.processor.conversation.openai.utils import (
|
14
11
|
chat_completion_with_backoff,
|
@@ -19,105 +16,16 @@ from khoj.processor.conversation.utils import (
|
|
19
16
|
JsonSupport,
|
20
17
|
OperatorRun,
|
21
18
|
ResponseWithThought,
|
22
|
-
clean_json,
|
23
|
-
construct_question_history,
|
24
|
-
construct_structured_message,
|
25
19
|
generate_chatml_messages_with_context,
|
26
20
|
messages_to_print,
|
27
21
|
)
|
28
|
-
from khoj.utils.helpers import
|
29
|
-
ConversationCommand,
|
30
|
-
is_none_or_empty,
|
31
|
-
truncate_code_context,
|
32
|
-
)
|
22
|
+
from khoj.utils.helpers import is_none_or_empty, truncate_code_context
|
33
23
|
from khoj.utils.rawconfig import FileAttachment, LocationData
|
34
24
|
from khoj.utils.yaml import yaml_dump
|
35
25
|
|
36
26
|
logger = logging.getLogger(__name__)
|
37
27
|
|
38
28
|
|
39
|
-
def extract_questions(
|
40
|
-
text,
|
41
|
-
model: Optional[str] = "gpt-4o-mini",
|
42
|
-
conversation_log={},
|
43
|
-
api_key=None,
|
44
|
-
api_base_url=None,
|
45
|
-
location_data: LocationData = None,
|
46
|
-
user: KhojUser = None,
|
47
|
-
query_images: Optional[list[str]] = None,
|
48
|
-
vision_enabled: bool = False,
|
49
|
-
personality_context: Optional[str] = None,
|
50
|
-
query_files: str = None,
|
51
|
-
tracer: dict = {},
|
52
|
-
):
|
53
|
-
"""
|
54
|
-
Infer search queries to retrieve relevant notes to answer user query
|
55
|
-
"""
|
56
|
-
location = f"{location_data}" if location_data else "Unknown"
|
57
|
-
username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""
|
58
|
-
|
59
|
-
# Extract Past User Message and Inferred Questions from Conversation Log
|
60
|
-
chat_history = construct_question_history(conversation_log)
|
61
|
-
|
62
|
-
# Get dates relative to today for prompt creation
|
63
|
-
today = datetime.today()
|
64
|
-
current_new_year = today.replace(month=1, day=1)
|
65
|
-
last_new_year = current_new_year.replace(year=today.year - 1)
|
66
|
-
|
67
|
-
prompt = prompts.extract_questions.format(
|
68
|
-
current_date=today.strftime("%Y-%m-%d"),
|
69
|
-
day_of_week=today.strftime("%A"),
|
70
|
-
current_month=today.strftime("%Y-%m"),
|
71
|
-
last_new_year=last_new_year.strftime("%Y"),
|
72
|
-
last_new_year_date=last_new_year.strftime("%Y-%m-%d"),
|
73
|
-
current_new_year_date=current_new_year.strftime("%Y-%m-%d"),
|
74
|
-
bob_tom_age_difference={current_new_year.year - 1984 - 30},
|
75
|
-
bob_age={current_new_year.year - 1984},
|
76
|
-
chat_history=chat_history,
|
77
|
-
text=text,
|
78
|
-
yesterday_date=(today - timedelta(days=1)).strftime("%Y-%m-%d"),
|
79
|
-
location=location,
|
80
|
-
username=username,
|
81
|
-
personality_context=personality_context,
|
82
|
-
)
|
83
|
-
|
84
|
-
prompt = construct_structured_message(
|
85
|
-
message=prompt,
|
86
|
-
images=query_images,
|
87
|
-
model_type=ChatModel.ModelType.OPENAI,
|
88
|
-
vision_enabled=vision_enabled,
|
89
|
-
attached_file_context=query_files,
|
90
|
-
)
|
91
|
-
|
92
|
-
messages = []
|
93
|
-
messages.append(ChatMessage(content=prompt, role="user"))
|
94
|
-
|
95
|
-
response = send_message_to_model(
|
96
|
-
messages,
|
97
|
-
api_key,
|
98
|
-
model,
|
99
|
-
response_type="json_object",
|
100
|
-
api_base_url=api_base_url,
|
101
|
-
tracer=tracer,
|
102
|
-
)
|
103
|
-
|
104
|
-
# Extract, Clean Message from GPT's Response
|
105
|
-
try:
|
106
|
-
response = clean_json(response)
|
107
|
-
response = pyjson5.loads(response)
|
108
|
-
response = [q.strip() for q in response["queries"] if q.strip()]
|
109
|
-
if not isinstance(response, list) or not response:
|
110
|
-
logger.error(f"Invalid response for constructing subqueries: {response}")
|
111
|
-
return [text]
|
112
|
-
return response
|
113
|
-
except:
|
114
|
-
logger.warning(f"GPT returned invalid JSON. Falling back to using user message as search query.\n{response}")
|
115
|
-
questions = [text]
|
116
|
-
|
117
|
-
logger.debug(f"Extracted Questions by GPT: {questions}")
|
118
|
-
return questions
|
119
|
-
|
120
|
-
|
121
29
|
def send_message_to_model(
|
122
30
|
messages,
|
123
31
|
api_key,
|
@@ -161,32 +69,32 @@ def send_message_to_model(
|
|
161
69
|
|
162
70
|
|
163
71
|
async def converse_openai(
|
72
|
+
# Query
|
164
73
|
user_query: str,
|
74
|
+
# Context
|
165
75
|
references: list[dict],
|
166
76
|
online_results: Optional[Dict[str, Dict]] = None,
|
167
77
|
code_results: Optional[Dict[str, Dict]] = None,
|
168
78
|
operator_results: Optional[List[OperatorRun]] = None,
|
169
|
-
|
79
|
+
query_images: Optional[list[str]] = None,
|
80
|
+
query_files: str = None,
|
81
|
+
generated_files: List[FileAttachment] = None,
|
82
|
+
generated_asset_results: Dict[str, Dict] = {},
|
83
|
+
program_execution_context: List[str] = None,
|
84
|
+
location_data: LocationData = None,
|
85
|
+
chat_history: list[ChatMessageModel] = [],
|
170
86
|
model: str = "gpt-4o-mini",
|
171
87
|
api_key: Optional[str] = None,
|
172
88
|
api_base_url: Optional[str] = None,
|
173
89
|
temperature: float = 0.4,
|
174
|
-
completion_func=None,
|
175
|
-
conversation_commands=[ConversationCommand.Default],
|
176
90
|
max_prompt_size=None,
|
177
91
|
tokenizer_name=None,
|
178
|
-
location_data: LocationData = None,
|
179
92
|
user_name: str = None,
|
180
93
|
agent: Agent = None,
|
181
|
-
query_images: Optional[list[str]] = None,
|
182
94
|
vision_available: bool = False,
|
183
|
-
query_files: str = None,
|
184
|
-
generated_files: List[FileAttachment] = None,
|
185
|
-
generated_asset_results: Dict[str, Dict] = {},
|
186
|
-
program_execution_context: List[str] = None,
|
187
95
|
deepthought: Optional[bool] = False,
|
188
96
|
tracer: dict = {},
|
189
|
-
) -> AsyncGenerator[
|
97
|
+
) -> AsyncGenerator[ResponseWithThought, None]:
|
190
98
|
"""
|
191
99
|
Converse with user using OpenAI's ChatGPT
|
192
100
|
"""
|
@@ -214,20 +122,6 @@ async def converse_openai(
|
|
214
122
|
user_name_prompt = prompts.user_name.format(name=user_name)
|
215
123
|
system_prompt = f"{system_prompt}\n{user_name_prompt}"
|
216
124
|
|
217
|
-
# Get Conversation Primer appropriate to Conversation Type
|
218
|
-
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
|
219
|
-
response = prompts.no_notes_found.format()
|
220
|
-
if completion_func:
|
221
|
-
asyncio.create_task(completion_func(chat_response=response))
|
222
|
-
yield response
|
223
|
-
return
|
224
|
-
elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
|
225
|
-
response = prompts.no_online_results_found.format()
|
226
|
-
if completion_func:
|
227
|
-
asyncio.create_task(completion_func(chat_response=response))
|
228
|
-
yield response
|
229
|
-
return
|
230
|
-
|
231
125
|
context_message = ""
|
232
126
|
if not is_none_or_empty(references):
|
233
127
|
context_message = f"{prompts.notes_conversation.format(references=yaml_dump(references))}\n\n"
|
@@ -251,7 +145,7 @@ async def converse_openai(
|
|
251
145
|
messages = generate_chatml_messages_with_context(
|
252
146
|
user_query,
|
253
147
|
system_prompt,
|
254
|
-
|
148
|
+
chat_history,
|
255
149
|
context_message=context_message,
|
256
150
|
model_name=model,
|
257
151
|
max_prompt_size=max_prompt_size,
|
@@ -267,7 +161,6 @@ async def converse_openai(
|
|
267
161
|
logger.debug(f"Conversation Context for GPT: {messages_to_print(messages)}")
|
268
162
|
|
269
163
|
# Get Response from GPT
|
270
|
-
full_response = ""
|
271
164
|
async for chunk in chat_completion_with_backoff(
|
272
165
|
messages=messages,
|
273
166
|
model_name=model,
|
@@ -275,17 +168,10 @@ async def converse_openai(
|
|
275
168
|
openai_api_key=api_key,
|
276
169
|
api_base_url=api_base_url,
|
277
170
|
deepthought=deepthought,
|
278
|
-
model_kwargs={"stop": ["Notes:\n["]},
|
279
171
|
tracer=tracer,
|
280
172
|
):
|
281
|
-
if chunk.response:
|
282
|
-
full_response += chunk.response
|
283
173
|
yield chunk
|
284
174
|
|
285
|
-
# Call completion_func once finish streaming and we have the full response
|
286
|
-
if completion_func:
|
287
|
-
asyncio.create_task(completion_func(chat_response=full_response))
|
288
|
-
|
289
175
|
|
290
176
|
def clean_response_schema(schema: BaseModel | dict) -> dict:
|
291
177
|
"""
|
@@ -549,68 +549,7 @@ Q: {query}
|
|
549
549
|
)
|
550
550
|
|
551
551
|
|
552
|
-
|
553
|
-
"""
|
554
|
-
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes and documents.
|
555
|
-
Construct search queries to retrieve relevant information to answer the user's question.
|
556
|
-
- You will be provided example and actual past user questions(Q), search queries(Khoj) and answers(A) for context.
|
557
|
-
- Add as much context from the previous questions and answers as required into your search queries.
|
558
|
-
- Break your search down into multiple search queries from a diverse set of lenses to retrieve all related documents.
|
559
|
-
- Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
|
560
|
-
- When asked a meta, vague or random questions, search for a variety of broad topics to answer the user's question.
|
561
|
-
{personality_context}
|
562
|
-
What searches will you perform to answer the user's question? Respond with search queries as list of strings in a JSON object.
|
563
|
-
Current Date: {day_of_week}, {current_date}
|
564
|
-
User's Location: {location}
|
565
|
-
{username}
|
566
|
-
|
567
|
-
Examples
|
568
|
-
---
|
569
|
-
Q: How was my trip to Cambodia?
|
570
|
-
Khoj: {{"queries": ["How was my trip to Cambodia?", "Angkor Wat temple visit", "Flight to Phnom Penh", "Expenses in Cambodia", "Stay in Cambodia"]}}
|
571
|
-
A: The trip was amazing. You went to the Angkor Wat temple and it was beautiful.
|
572
|
-
|
573
|
-
Q: Who did i visit that temple with?
|
574
|
-
Khoj: {{"queries": ["Who did I visit the Angkor Wat Temple in Cambodia with?"]}}
|
575
|
-
A: You visited the Angkor Wat Temple in Cambodia with Pablo, Namita and Xi.
|
576
|
-
|
577
|
-
Q: What national parks did I go to last year?
|
578
|
-
Khoj: {{"queries": ["National park I visited in {last_new_year} dt>='{last_new_year_date}' dt<'{current_new_year_date}'"]}}
|
579
|
-
A: You visited the Grand Canyon and Yellowstone National Park in {last_new_year}.
|
580
|
-
|
581
|
-
Q: How can you help me?
|
582
|
-
Khoj: {{"queries": ["Social relationships", "Physical and mental health", "Education and career", "Personal life goals and habits"]}}
|
583
|
-
A: I can help you live healthier and happier across work and personal life
|
584
|
-
|
585
|
-
Q: How many tennis balls fit in the back of a 2002 Honda Civic?
|
586
|
-
Khoj: {{"queries": ["What is the size of a tennis ball?", "What is the trunk size of a 2002 Honda Civic?"]}}
|
587
|
-
A: 1085 tennis balls will fit in the trunk of a Honda Civic
|
588
|
-
|
589
|
-
Q: Share some random, interesting experiences from this month
|
590
|
-
Khoj: {{"queries": ["Exciting travel adventures from {current_month}", "Fun social events dt>='{current_month}-01' dt<'{current_date}'", "Intense emotional experiences in {current_month}"]}}
|
591
|
-
A: You had a great time at the local beach with your friends, attended a music concert and had a deep conversation with your friend, Khalid.
|
592
|
-
|
593
|
-
Q: Is Bob older than Tom?
|
594
|
-
Khoj: {{"queries": ["When was Bob born?", "What is Tom's age?"]}}
|
595
|
-
A: Yes, Bob is older than Tom. As Bob was born on 1984-01-01 and Tom is 30 years old.
|
596
|
-
|
597
|
-
Q: What is their age difference?
|
598
|
-
Khoj: {{"queries": ["What is Bob's age?", "What is Tom's age?"]}}
|
599
|
-
A: Bob is {bob_tom_age_difference} years older than Tom. As Bob is {bob_age} years old and Tom is 30 years old.
|
600
|
-
|
601
|
-
Q: Who all did I meet here yesterday?
|
602
|
-
Khoj: {{"queries": ["Met in {location} on {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]}}
|
603
|
-
A: Yesterday's note mentions your visit to your local beach with Ram and Shyam.
|
604
|
-
|
605
|
-
Actual
|
606
|
-
---
|
607
|
-
{chat_history}
|
608
|
-
Q: {text}
|
609
|
-
Khoj:
|
610
|
-
""".strip()
|
611
|
-
)
|
612
|
-
|
613
|
-
extract_questions_anthropic_system_prompt = PromptTemplate.from_template(
|
552
|
+
extract_questions_system_prompt = PromptTemplate.from_template(
|
614
553
|
"""
|
615
554
|
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes.
|
616
555
|
Construct search queries to retrieve relevant information to answer the user's question.
|
@@ -651,7 +590,7 @@ A: You had a great time at the local beach with your friends, attended a music c
|
|
651
590
|
""".strip()
|
652
591
|
)
|
653
592
|
|
654
|
-
|
593
|
+
extract_questions_user_message = PromptTemplate.from_template(
|
655
594
|
"""
|
656
595
|
Here's our most recent chat history:
|
657
596
|
{chat_history}
|
@@ -24,7 +24,13 @@ from pydantic import BaseModel
|
|
24
24
|
from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
|
25
25
|
|
26
26
|
from khoj.database.adapters import ConversationAdapters
|
27
|
-
from khoj.database.models import
|
27
|
+
from khoj.database.models import (
|
28
|
+
ChatMessageModel,
|
29
|
+
ChatModel,
|
30
|
+
ClientApplication,
|
31
|
+
Intent,
|
32
|
+
KhojUser,
|
33
|
+
)
|
28
34
|
from khoj.processor.conversation import prompts
|
29
35
|
from khoj.processor.conversation.offline.utils import download_model, infer_max_tokens
|
30
36
|
from khoj.search_filter.base_filter import BaseFilter
|
@@ -161,8 +167,8 @@ def construct_iteration_history(
|
|
161
167
|
previous_iterations: List[ResearchIteration],
|
162
168
|
previous_iteration_prompt: str,
|
163
169
|
query: str = None,
|
164
|
-
) -> list[
|
165
|
-
iteration_history: list[
|
170
|
+
) -> list[ChatMessageModel]:
|
171
|
+
iteration_history: list[ChatMessageModel] = []
|
166
172
|
previous_iteration_messages: list[dict] = []
|
167
173
|
for idx, iteration in enumerate(previous_iterations):
|
168
174
|
iteration_data = previous_iteration_prompt.format(
|
@@ -176,46 +182,46 @@ def construct_iteration_history(
|
|
176
182
|
|
177
183
|
if previous_iteration_messages:
|
178
184
|
if query:
|
179
|
-
iteration_history.append(
|
185
|
+
iteration_history.append(ChatMessageModel(by="you", message=query))
|
180
186
|
iteration_history.append(
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
187
|
+
ChatMessageModel(
|
188
|
+
by="khoj",
|
189
|
+
intent={"type": "remember", "query": query},
|
190
|
+
message=previous_iteration_messages,
|
191
|
+
)
|
186
192
|
)
|
187
193
|
return iteration_history
|
188
194
|
|
189
195
|
|
190
|
-
def construct_chat_history(
|
191
|
-
|
192
|
-
for chat in
|
193
|
-
if chat
|
194
|
-
if chat
|
195
|
-
|
196
|
-
|
197
|
-
elif chat
|
198
|
-
|
199
|
-
|
200
|
-
elif chat
|
201
|
-
|
202
|
-
|
203
|
-
elif chat
|
204
|
-
|
205
|
-
raw_query_files = chat.
|
196
|
+
def construct_chat_history(chat_history: list[ChatMessageModel], n: int = 4, agent_name="AI") -> str:
|
197
|
+
chat_history_str = ""
|
198
|
+
for chat in chat_history[-n:]:
|
199
|
+
if chat.by == "khoj" and chat.intent.type in ["remember", "reminder", "summarize"]:
|
200
|
+
if chat.intent.inferred_queries:
|
201
|
+
chat_history_str += f'{agent_name}: {{"queries": {chat.intent.inferred_queries}}}\n'
|
202
|
+
chat_history_str += f"{agent_name}: {chat.message}\n\n"
|
203
|
+
elif chat.by == "khoj" and chat.images:
|
204
|
+
chat_history_str += f"User: {chat.intent.query}\n"
|
205
|
+
chat_history_str += f"{agent_name}: [generated image redacted for space]\n"
|
206
|
+
elif chat.by == "khoj" and ("excalidraw" in chat.intent.type):
|
207
|
+
chat_history_str += f"User: {chat.intent.query}\n"
|
208
|
+
chat_history_str += f"{agent_name}: {chat.intent.inferred_queries[0]}\n"
|
209
|
+
elif chat.by == "you":
|
210
|
+
chat_history_str += f"User: {chat.message}\n"
|
211
|
+
raw_query_files = chat.queryFiles
|
206
212
|
if raw_query_files:
|
207
213
|
query_files: Dict[str, str] = {}
|
208
214
|
for file in raw_query_files:
|
209
215
|
query_files[file["name"]] = file["content"]
|
210
216
|
|
211
217
|
query_file_context = gather_raw_query_files(query_files)
|
212
|
-
|
218
|
+
chat_history_str += f"User: {query_file_context}\n"
|
213
219
|
|
214
|
-
return
|
220
|
+
return chat_history_str
|
215
221
|
|
216
222
|
|
217
223
|
def construct_question_history(
|
218
|
-
conversation_log:
|
224
|
+
conversation_log: list[ChatMessageModel],
|
219
225
|
include_query: bool = True,
|
220
226
|
lookback: int = 6,
|
221
227
|
query_prefix: str = "Q",
|
@@ -226,16 +232,16 @@ def construct_question_history(
|
|
226
232
|
"""
|
227
233
|
history_parts = ""
|
228
234
|
original_query = None
|
229
|
-
for chat in conversation_log
|
230
|
-
if chat
|
231
|
-
original_query = chat.
|
235
|
+
for chat in conversation_log[-lookback:]:
|
236
|
+
if chat.by == "you":
|
237
|
+
original_query = json.dumps(chat.message)
|
232
238
|
history_parts += f"{query_prefix}: {original_query}\n"
|
233
|
-
if chat
|
239
|
+
if chat.by == "khoj":
|
234
240
|
if original_query is None:
|
235
241
|
continue
|
236
242
|
|
237
|
-
message = chat.
|
238
|
-
inferred_queries_list = chat.
|
243
|
+
message = chat.message
|
244
|
+
inferred_queries_list = chat.intent.inferred_queries or []
|
239
245
|
|
240
246
|
# Ensure inferred_queries_list is a list, defaulting to the original query in a list
|
241
247
|
if not inferred_queries_list:
|
@@ -246,7 +252,7 @@ def construct_question_history(
|
|
246
252
|
|
247
253
|
if include_query:
|
248
254
|
# Ensure 'type' exists and is a string before checking 'to-image'
|
249
|
-
intent_type = chat.
|
255
|
+
intent_type = chat.intent.type if chat.intent and chat.intent.type else ""
|
250
256
|
if "to-image" not in intent_type:
|
251
257
|
history_parts += f'{agent_name}: {{"queries": {inferred_queries_list}}}\n'
|
252
258
|
history_parts += f"A: {message}\n\n"
|
@@ -259,7 +265,7 @@ def construct_question_history(
|
|
259
265
|
return history_parts
|
260
266
|
|
261
267
|
|
262
|
-
def construct_chat_history_for_operator(conversation_history:
|
268
|
+
def construct_chat_history_for_operator(conversation_history: List[ChatMessageModel], n: int = 6) -> list[AgentMessage]:
|
263
269
|
"""
|
264
270
|
Construct chat history for operator agent in conversation log.
|
265
271
|
Only include last n completed turns (i.e with user and khoj message).
|
@@ -267,22 +273,22 @@ def construct_chat_history_for_operator(conversation_history: dict, n: int = 6)
|
|
267
273
|
chat_history: list[AgentMessage] = []
|
268
274
|
user_message: Optional[AgentMessage] = None
|
269
275
|
|
270
|
-
for chat in conversation_history
|
276
|
+
for chat in conversation_history:
|
271
277
|
if len(chat_history) >= n:
|
272
278
|
break
|
273
|
-
if chat
|
274
|
-
content = [{"type": "text", "text": chat
|
275
|
-
for file in chat.
|
279
|
+
if chat.by == "you" and chat.message:
|
280
|
+
content = [{"type": "text", "text": chat.message}]
|
281
|
+
for file in chat.queryFiles or []:
|
276
282
|
content += [{"type": "text", "text": f'## File: {file["name"]}\n\n{file["content"]}'}]
|
277
283
|
user_message = AgentMessage(role="user", content=content)
|
278
|
-
elif chat
|
279
|
-
chat_history += [user_message, AgentMessage(role="assistant", content=chat
|
284
|
+
elif chat.by == "khoj" and chat.message:
|
285
|
+
chat_history += [user_message, AgentMessage(role="assistant", content=chat.message)]
|
280
286
|
return chat_history
|
281
287
|
|
282
288
|
|
283
289
|
def construct_tool_chat_history(
|
284
290
|
previous_iterations: List[ResearchIteration], tool: ConversationCommand = None
|
285
|
-
) ->
|
291
|
+
) -> List[ChatMessageModel]:
|
286
292
|
"""
|
287
293
|
Construct chat history from previous iterations for a specific tool
|
288
294
|
|
@@ -313,22 +319,23 @@ def construct_tool_chat_history(
|
|
313
319
|
tool or ConversationCommand(iteration.tool), base_extractor
|
314
320
|
)
|
315
321
|
chat_history += [
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
322
|
+
ChatMessageModel(
|
323
|
+
by="you",
|
324
|
+
message=iteration.query,
|
325
|
+
),
|
326
|
+
ChatMessageModel(
|
327
|
+
by="khoj",
|
328
|
+
intent=Intent(
|
329
|
+
type="remember",
|
330
|
+
query=iteration.query,
|
331
|
+
inferred_queries=inferred_query_extractor(iteration),
|
332
|
+
memory_type="notes",
|
333
|
+
),
|
334
|
+
message=iteration.summarizedResult,
|
335
|
+
),
|
329
336
|
]
|
330
337
|
|
331
|
-
return
|
338
|
+
return chat_history
|
332
339
|
|
333
340
|
|
334
341
|
class ChatEvent(Enum):
|
@@ -349,8 +356,8 @@ def message_to_log(
|
|
349
356
|
chat_response,
|
350
357
|
user_message_metadata={},
|
351
358
|
khoj_message_metadata={},
|
352
|
-
|
353
|
-
):
|
359
|
+
chat_history: List[ChatMessageModel] = [],
|
360
|
+
) -> List[ChatMessageModel]:
|
354
361
|
"""Create json logs from messages, metadata for conversation log"""
|
355
362
|
default_khoj_message_metadata = {
|
356
363
|
"intent": {"type": "remember", "memory-type": "notes", "query": user_message},
|
@@ -369,15 +376,17 @@ def message_to_log(
|
|
369
376
|
khoj_log = merge_dicts(khoj_message_metadata, default_khoj_message_metadata)
|
370
377
|
khoj_log = merge_dicts({"message": chat_response, "by": "khoj", "created": khoj_response_time}, khoj_log)
|
371
378
|
|
372
|
-
|
373
|
-
|
379
|
+
human_message = ChatMessageModel(**human_log)
|
380
|
+
khoj_message = ChatMessageModel(**khoj_log)
|
381
|
+
chat_history.extend([human_message, khoj_message])
|
382
|
+
return chat_history
|
374
383
|
|
375
384
|
|
376
385
|
async def save_to_conversation_log(
|
377
386
|
q: str,
|
378
387
|
chat_response: str,
|
379
388
|
user: KhojUser,
|
380
|
-
|
389
|
+
chat_history: List[ChatMessageModel],
|
381
390
|
user_message_time: str = None,
|
382
391
|
compiled_references: List[Dict[str, Any]] = [],
|
383
392
|
online_results: Dict[str, Any] = {},
|
@@ -427,11 +436,11 @@ async def save_to_conversation_log(
|
|
427
436
|
chat_response=chat_response,
|
428
437
|
user_message_metadata=user_message_metadata,
|
429
438
|
khoj_message_metadata=khoj_message_metadata,
|
430
|
-
|
439
|
+
chat_history=chat_history,
|
431
440
|
)
|
432
441
|
await ConversationAdapters.save_conversation(
|
433
442
|
user,
|
434
|
-
|
443
|
+
updated_conversation,
|
435
444
|
client_application=client_application,
|
436
445
|
conversation_id=conversation_id,
|
437
446
|
user_message=q,
|
@@ -502,7 +511,7 @@ def gather_raw_query_files(
|
|
502
511
|
def generate_chatml_messages_with_context(
|
503
512
|
user_message: str,
|
504
513
|
system_message: str = None,
|
505
|
-
|
514
|
+
chat_history: list[ChatMessageModel] = [],
|
506
515
|
model_name="gpt-4o-mini",
|
507
516
|
loaded_model: Optional[Llama] = None,
|
508
517
|
max_prompt_size=None,
|
@@ -529,21 +538,21 @@ def generate_chatml_messages_with_context(
|
|
529
538
|
|
530
539
|
# Extract Chat History for Context
|
531
540
|
chatml_messages: List[ChatMessage] = []
|
532
|
-
for chat in
|
541
|
+
for chat in chat_history:
|
533
542
|
message_context = []
|
534
543
|
message_attached_files = ""
|
535
544
|
|
536
545
|
generated_assets = {}
|
537
546
|
|
538
|
-
chat_message = chat.
|
539
|
-
role = "user" if chat
|
547
|
+
chat_message = chat.message
|
548
|
+
role = "user" if chat.by == "you" else "assistant"
|
540
549
|
|
541
550
|
# Legacy code to handle excalidraw diagrams prior to Dec 2024
|
542
|
-
if chat
|
543
|
-
chat_message = chat[
|
551
|
+
if chat.by == "khoj" and "excalidraw" in chat.intent.type or "":
|
552
|
+
chat_message = (chat.intent.inferred_queries or [])[0]
|
544
553
|
|
545
|
-
if chat.
|
546
|
-
raw_query_files = chat.
|
554
|
+
if chat.queryFiles:
|
555
|
+
raw_query_files = chat.queryFiles
|
547
556
|
query_files_dict = dict()
|
548
557
|
for file in raw_query_files:
|
549
558
|
query_files_dict[file["name"]] = file["content"]
|
@@ -551,24 +560,24 @@ def generate_chatml_messages_with_context(
|
|
551
560
|
message_attached_files = gather_raw_query_files(query_files_dict)
|
552
561
|
chatml_messages.append(ChatMessage(content=message_attached_files, role=role))
|
553
562
|
|
554
|
-
if not is_none_or_empty(chat.
|
563
|
+
if not is_none_or_empty(chat.onlineContext):
|
555
564
|
message_context += [
|
556
565
|
{
|
557
566
|
"type": "text",
|
558
|
-
"text": f"{prompts.online_search_conversation.format(online_results=chat.
|
567
|
+
"text": f"{prompts.online_search_conversation.format(online_results=chat.onlineContext)}",
|
559
568
|
}
|
560
569
|
]
|
561
570
|
|
562
|
-
if not is_none_or_empty(chat.
|
571
|
+
if not is_none_or_empty(chat.codeContext):
|
563
572
|
message_context += [
|
564
573
|
{
|
565
574
|
"type": "text",
|
566
|
-
"text": f"{prompts.code_executed_context.format(code_results=chat.
|
575
|
+
"text": f"{prompts.code_executed_context.format(code_results=chat.codeContext)}",
|
567
576
|
}
|
568
577
|
]
|
569
578
|
|
570
|
-
if not is_none_or_empty(chat.
|
571
|
-
operator_context = chat.
|
579
|
+
if not is_none_or_empty(chat.operatorContext):
|
580
|
+
operator_context = chat.operatorContext
|
572
581
|
operator_content = "\n\n".join([f'## Task: {oc["query"]}\n{oc["response"]}\n' for oc in operator_context])
|
573
582
|
message_context += [
|
574
583
|
{
|
@@ -577,13 +586,9 @@ def generate_chatml_messages_with_context(
|
|
577
586
|
}
|
578
587
|
]
|
579
588
|
|
580
|
-
if not is_none_or_empty(chat.
|
589
|
+
if not is_none_or_empty(chat.context):
|
581
590
|
references = "\n\n".join(
|
582
|
-
{
|
583
|
-
f"# File: {item['file']}\n## {item['compiled']}\n"
|
584
|
-
for item in chat.get("context") or []
|
585
|
-
if isinstance(item, dict)
|
586
|
-
}
|
591
|
+
{f"# File: {item.file}\n## {item.compiled}\n" for item in chat.context or [] if isinstance(item, dict)}
|
587
592
|
)
|
588
593
|
message_context += [{"type": "text", "text": f"{prompts.notes_conversation.format(references=references)}"}]
|
589
594
|
|
@@ -591,14 +596,14 @@ def generate_chatml_messages_with_context(
|
|
591
596
|
reconstructed_context_message = ChatMessage(content=message_context, role="user")
|
592
597
|
chatml_messages.insert(0, reconstructed_context_message)
|
593
598
|
|
594
|
-
if not is_none_or_empty(chat.
|
599
|
+
if not is_none_or_empty(chat.images) and role == "assistant":
|
595
600
|
generated_assets["image"] = {
|
596
|
-
"query": chat.
|
601
|
+
"query": (chat.intent.inferred_queries or [user_message])[0],
|
597
602
|
}
|
598
603
|
|
599
|
-
if not is_none_or_empty(chat.
|
604
|
+
if not is_none_or_empty(chat.mermaidjsDiagram) and role == "assistant":
|
600
605
|
generated_assets["diagram"] = {
|
601
|
-
"query": chat.
|
606
|
+
"query": (chat.intent.inferred_queries or [user_message])[0],
|
602
607
|
}
|
603
608
|
|
604
609
|
if not is_none_or_empty(generated_assets):
|
@@ -610,7 +615,7 @@ def generate_chatml_messages_with_context(
|
|
610
615
|
)
|
611
616
|
|
612
617
|
message_content = construct_structured_message(
|
613
|
-
chat_message, chat.
|
618
|
+
chat_message, chat.images if role == "user" else [], model_type, vision_enabled
|
614
619
|
)
|
615
620
|
|
616
621
|
reconstructed_message = ChatMessage(content=message_content, role=role)
|