khoj 1.16.1.dev16__py3-none-any.whl → 1.16.1.dev26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +21 -6
- khoj/processor/conversation/anthropic/anthropic_chat.py +10 -4
- khoj/processor/conversation/offline/chat_model.py +19 -7
- khoj/processor/conversation/offline/utils.py +2 -0
- khoj/processor/conversation/openai/gpt.py +9 -3
- khoj/processor/conversation/prompts.py +56 -25
- khoj/processor/conversation/utils.py +5 -2
- khoj/routers/api.py +2 -1
- khoj/routers/helpers.py +1 -3
- khoj/routers/indexer.py +1 -1
- {khoj-1.16.1.dev16.dist-info → khoj-1.16.1.dev26.dist-info}/METADATA +3 -3
- {khoj-1.16.1.dev16.dist-info → khoj-1.16.1.dev26.dist-info}/RECORD +15 -15
- {khoj-1.16.1.dev16.dist-info → khoj-1.16.1.dev26.dist-info}/WHEEL +0 -0
- {khoj-1.16.1.dev16.dist-info → khoj-1.16.1.dev26.dist-info}/entry_points.txt +0 -0
- {khoj-1.16.1.dev16.dist-info → khoj-1.16.1.dev26.dist-info}/licenses/LICENSE +0 -0
|
@@ -559,7 +559,7 @@ class AgentAdapters:
|
|
|
559
559
|
if default_conversation_config is None:
|
|
560
560
|
logger.info("No default conversation config found, skipping default agent creation")
|
|
561
561
|
return None
|
|
562
|
-
default_personality = prompts.personality.format(current_date="placeholder")
|
|
562
|
+
default_personality = prompts.personality.format(current_date="placeholder", day_of_week="placeholder")
|
|
563
563
|
|
|
564
564
|
agent = Agent.objects.filter(name=AgentAdapters.DEFAULT_AGENT_NAME).first()
|
|
565
565
|
|
|
@@ -681,18 +681,33 @@ class ConversationAdapters:
|
|
|
681
681
|
user: KhojUser, client_application: ClientApplication = None, conversation_id: int = None, title: str = None
|
|
682
682
|
) -> Optional[Conversation]:
|
|
683
683
|
if conversation_id:
|
|
684
|
-
return
|
|
684
|
+
return (
|
|
685
|
+
await Conversation.objects.filter(user=user, client=client_application, id=conversation_id)
|
|
686
|
+
.prefetch_related("agent")
|
|
687
|
+
.afirst()
|
|
688
|
+
)
|
|
685
689
|
elif title:
|
|
686
|
-
return
|
|
690
|
+
return (
|
|
691
|
+
await Conversation.objects.filter(user=user, client=client_application, title=title)
|
|
692
|
+
.prefetch_related("agent")
|
|
693
|
+
.afirst()
|
|
694
|
+
)
|
|
687
695
|
else:
|
|
688
|
-
conversation =
|
|
696
|
+
conversation = (
|
|
697
|
+
Conversation.objects.filter(user=user, client=client_application)
|
|
698
|
+
.prefetch_related("agent")
|
|
699
|
+
.order_by("-updated_at")
|
|
700
|
+
)
|
|
689
701
|
|
|
690
702
|
if await conversation.aexists():
|
|
691
703
|
return await conversation.prefetch_related("agent").afirst()
|
|
692
704
|
|
|
693
705
|
return await (
|
|
694
|
-
Conversation.objects.filter(user=user, client=client_application)
|
|
695
|
-
|
|
706
|
+
Conversation.objects.filter(user=user, client=client_application)
|
|
707
|
+
.prefetch_related("agent")
|
|
708
|
+
.order_by("-updated_at")
|
|
709
|
+
.afirst()
|
|
710
|
+
) or await Conversation.objects.prefetch_related("agent").acreate(user=user, client=client_application)
|
|
696
711
|
|
|
697
712
|
@staticmethod
|
|
698
713
|
async def adelete_conversation_by_user(
|
|
@@ -36,7 +36,7 @@ def extract_questions_anthropic(
|
|
|
36
36
|
# Extract Past User Message and Inferred Questions from Conversation Log
|
|
37
37
|
chat_history = "".join(
|
|
38
38
|
[
|
|
39
|
-
f'
|
|
39
|
+
f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
|
|
40
40
|
for chat in conversation_log.get("chat", [])[-4:]
|
|
41
41
|
if chat["by"] == "khoj" and "text-to-image" not in chat["intent"].get("type")
|
|
42
42
|
]
|
|
@@ -135,17 +135,23 @@ def converse_anthropic(
|
|
|
135
135
|
Converse with user using Anthropic's Claude
|
|
136
136
|
"""
|
|
137
137
|
# Initialize Variables
|
|
138
|
-
current_date = datetime.now()
|
|
138
|
+
current_date = datetime.now()
|
|
139
139
|
compiled_references = "\n\n".join({f"# {item}" for item in references})
|
|
140
140
|
|
|
141
141
|
conversation_primer = prompts.query_prompt.format(query=user_query)
|
|
142
142
|
|
|
143
143
|
if agent and agent.personality:
|
|
144
144
|
system_prompt = prompts.custom_personality.format(
|
|
145
|
-
name=agent.name,
|
|
145
|
+
name=agent.name,
|
|
146
|
+
bio=agent.personality,
|
|
147
|
+
current_date=current_date.strftime("%Y-%m-%d"),
|
|
148
|
+
day_of_week=current_date.strftime("%A"),
|
|
146
149
|
)
|
|
147
150
|
else:
|
|
148
|
-
system_prompt = prompts.personality.format(
|
|
151
|
+
system_prompt = prompts.personality.format(
|
|
152
|
+
current_date=current_date.strftime("%Y-%m-%d"),
|
|
153
|
+
day_of_week=current_date.strftime("%A"),
|
|
154
|
+
)
|
|
149
155
|
|
|
150
156
|
if location_data:
|
|
151
157
|
location = f"{location_data.city}, {location_data.region}, {location_data.country}"
|
|
@@ -55,6 +55,7 @@ def extract_questions_offline(
|
|
|
55
55
|
chat_history += f"Q: {chat['intent']['query']}\n"
|
|
56
56
|
chat_history += f"Khoj: {chat['message']}\n\n"
|
|
57
57
|
|
|
58
|
+
# Get dates relative to today for prompt creation
|
|
58
59
|
today = datetime.today()
|
|
59
60
|
yesterday = (today - timedelta(days=1)).strftime("%Y-%m-%d")
|
|
60
61
|
last_year = today.year - 1
|
|
@@ -62,11 +63,13 @@ def extract_questions_offline(
|
|
|
62
63
|
query=text,
|
|
63
64
|
chat_history=chat_history,
|
|
64
65
|
current_date=today.strftime("%Y-%m-%d"),
|
|
66
|
+
day_of_week=today.strftime("%A"),
|
|
65
67
|
yesterday_date=yesterday,
|
|
66
68
|
last_year=last_year,
|
|
67
69
|
this_year=today.year,
|
|
68
70
|
location=location,
|
|
69
71
|
)
|
|
72
|
+
|
|
70
73
|
messages = generate_chatml_messages_with_context(
|
|
71
74
|
example_questions, model_name=model, loaded_model=offline_chat_model, max_prompt_size=max_prompt_size
|
|
72
75
|
)
|
|
@@ -74,7 +77,7 @@ def extract_questions_offline(
|
|
|
74
77
|
state.chat_lock.acquire()
|
|
75
78
|
try:
|
|
76
79
|
response = send_message_to_model_offline(
|
|
77
|
-
messages, loaded_model=offline_chat_model, max_prompt_size=max_prompt_size
|
|
80
|
+
messages, loaded_model=offline_chat_model, model=model, max_prompt_size=max_prompt_size
|
|
78
81
|
)
|
|
79
82
|
finally:
|
|
80
83
|
state.chat_lock.release()
|
|
@@ -96,7 +99,7 @@ def extract_questions_offline(
|
|
|
96
99
|
except:
|
|
97
100
|
logger.warning(f"Llama returned invalid JSON. Falling back to using user message as search query.\n{response}")
|
|
98
101
|
return all_questions
|
|
99
|
-
logger.debug(f"
|
|
102
|
+
logger.debug(f"Questions extracted by {model}: {questions}")
|
|
100
103
|
return questions
|
|
101
104
|
|
|
102
105
|
|
|
@@ -144,14 +147,20 @@ def converse_offline(
|
|
|
144
147
|
offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
|
|
145
148
|
compiled_references_message = "\n\n".join({f"{item['compiled']}" for item in references})
|
|
146
149
|
|
|
147
|
-
current_date = datetime.now()
|
|
150
|
+
current_date = datetime.now()
|
|
148
151
|
|
|
149
152
|
if agent and agent.personality:
|
|
150
153
|
system_prompt = prompts.custom_system_prompt_offline_chat.format(
|
|
151
|
-
name=agent.name,
|
|
154
|
+
name=agent.name,
|
|
155
|
+
bio=agent.personality,
|
|
156
|
+
current_date=current_date.strftime("%Y-%m-%d"),
|
|
157
|
+
day_of_week=current_date.strftime("%A"),
|
|
152
158
|
)
|
|
153
159
|
else:
|
|
154
|
-
system_prompt = prompts.system_prompt_offline_chat.format(
|
|
160
|
+
system_prompt = prompts.system_prompt_offline_chat.format(
|
|
161
|
+
current_date=current_date.strftime("%Y-%m-%d"),
|
|
162
|
+
day_of_week=current_date.strftime("%A"),
|
|
163
|
+
)
|
|
155
164
|
|
|
156
165
|
conversation_primer = prompts.query_prompt.format(query=user_query)
|
|
157
166
|
|
|
@@ -177,9 +186,9 @@ def converse_offline(
|
|
|
177
186
|
if online_results[result].get("webpages"):
|
|
178
187
|
simplified_online_results[result] = online_results[result]["webpages"]
|
|
179
188
|
|
|
180
|
-
conversation_primer = f"{prompts.
|
|
189
|
+
conversation_primer = f"{prompts.online_search_conversation_offline.format(online_results=str(simplified_online_results))}\n{conversation_primer}"
|
|
181
190
|
if not is_none_or_empty(compiled_references_message):
|
|
182
|
-
conversation_primer = f"{prompts.notes_conversation_offline.format(references=compiled_references_message)}\n{conversation_primer}"
|
|
191
|
+
conversation_primer = f"{prompts.notes_conversation_offline.format(references=compiled_references_message)}\n\n{conversation_primer}"
|
|
183
192
|
|
|
184
193
|
# Setup Prompt with Primer or Conversation History
|
|
185
194
|
messages = generate_chatml_messages_with_context(
|
|
@@ -192,6 +201,9 @@ def converse_offline(
|
|
|
192
201
|
tokenizer_name=tokenizer_name,
|
|
193
202
|
)
|
|
194
203
|
|
|
204
|
+
truncated_messages = "\n".join({f"{message.content[:70]}..." for message in messages})
|
|
205
|
+
logger.debug(f"Conversation Context for {model}: {truncated_messages}")
|
|
206
|
+
|
|
195
207
|
g = ThreadedGenerator(references, online_results, completion_func=completion_func)
|
|
196
208
|
t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size))
|
|
197
209
|
t.start()
|
|
@@ -24,6 +24,8 @@ def download_model(repo_id: str, filename: str = "*Q4_K_M.gguf", max_tokens: int
|
|
|
24
24
|
# Add chat format if known
|
|
25
25
|
if "llama-3" in repo_id.lower():
|
|
26
26
|
kwargs["chat_format"] = "llama-3"
|
|
27
|
+
elif "gemma-2" in repo_id.lower():
|
|
28
|
+
kwargs["chat_format"] = "gemma"
|
|
27
29
|
|
|
28
30
|
# Check if the model is already downloaded
|
|
29
31
|
model_path = load_model_from_cache(repo_id, filename)
|
|
@@ -125,17 +125,23 @@ def converse(
|
|
|
125
125
|
Converse with user using OpenAI's ChatGPT
|
|
126
126
|
"""
|
|
127
127
|
# Initialize Variables
|
|
128
|
-
current_date = datetime.now()
|
|
128
|
+
current_date = datetime.now()
|
|
129
129
|
compiled_references = "\n\n".join({f"# {item['compiled']}" for item in references})
|
|
130
130
|
|
|
131
131
|
conversation_primer = prompts.query_prompt.format(query=user_query)
|
|
132
132
|
|
|
133
133
|
if agent and agent.personality:
|
|
134
134
|
system_prompt = prompts.custom_personality.format(
|
|
135
|
-
name=agent.name,
|
|
135
|
+
name=agent.name,
|
|
136
|
+
bio=agent.personality,
|
|
137
|
+
current_date=current_date.strftime("%Y-%m-%d"),
|
|
138
|
+
day_of_week=current_date.strftime("%A"),
|
|
136
139
|
)
|
|
137
140
|
else:
|
|
138
|
-
system_prompt = prompts.personality.format(
|
|
141
|
+
system_prompt = prompts.personality.format(
|
|
142
|
+
current_date=current_date.strftime("%Y-%m-%d"),
|
|
143
|
+
day_of_week=current_date.strftime("%A"),
|
|
144
|
+
)
|
|
139
145
|
|
|
140
146
|
if location_data:
|
|
141
147
|
location = f"{location_data.city}, {location_data.region}, {location_data.country}"
|
|
@@ -19,8 +19,8 @@ You were created by Khoj Inc. with the following capabilities:
|
|
|
19
19
|
- Sometimes the user will share personal information that needs to be remembered, like an account ID or a residential address. These can be acknowledged with a simple "Got it" or "Okay".
|
|
20
20
|
- Provide inline references to quotes from the user's notes or any web pages you refer to in your responses in markdown format. For example, "The farmer had ten sheep. [1](https://example.com)". *ALWAYS CITE YOUR SOURCES AND PROVIDE REFERENCES*. Add them inline to directly support your claim.
|
|
21
21
|
|
|
22
|
-
Note: More information about you, the company or Khoj apps
|
|
23
|
-
Today is {current_date} in UTC.
|
|
22
|
+
Note: More information about you, the company or Khoj apps can be found at https://khoj.dev.
|
|
23
|
+
Today is {day_of_week}, {current_date} in UTC.
|
|
24
24
|
""".strip()
|
|
25
25
|
)
|
|
26
26
|
|
|
@@ -39,7 +39,7 @@ You were created by Khoj Inc. with the following capabilities:
|
|
|
39
39
|
- Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided notes or past conversations.
|
|
40
40
|
- Sometimes the user will share personal information that needs to be remembered, like an account ID or a residential address. These can be acknowledged with a simple "Got it" or "Okay".
|
|
41
41
|
|
|
42
|
-
Today is {current_date} in UTC.
|
|
42
|
+
Today is {day_of_week}, {current_date} in UTC.
|
|
43
43
|
|
|
44
44
|
Instructions:\n{bio}
|
|
45
45
|
""".strip()
|
|
@@ -79,10 +79,12 @@ You are Khoj, a smart, inquisitive and helpful personal assistant.
|
|
|
79
79
|
- Use your general knowledge and past conversation with the user as context to inform your responses.
|
|
80
80
|
- If you do not know the answer, say 'I don't know.'
|
|
81
81
|
- Think step-by-step and ask questions to get the necessary information to answer the user's question.
|
|
82
|
+
- Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided information or past conversations.
|
|
82
83
|
- Do not print verbatim Notes unless necessary.
|
|
83
84
|
|
|
84
|
-
|
|
85
|
-
|
|
85
|
+
Note: More information about you, the company or Khoj apps can be found at https://khoj.dev.
|
|
86
|
+
Today is {day_of_week}, {current_date} in UTC.
|
|
87
|
+
""".strip()
|
|
86
88
|
)
|
|
87
89
|
|
|
88
90
|
custom_system_prompt_offline_chat = PromptTemplate.from_template(
|
|
@@ -91,12 +93,14 @@ You are {name}, a personal agent on Khoj.
|
|
|
91
93
|
- Use your general knowledge and past conversation with the user as context to inform your responses.
|
|
92
94
|
- If you do not know the answer, say 'I don't know.'
|
|
93
95
|
- Think step-by-step and ask questions to get the necessary information to answer the user's question.
|
|
96
|
+
- Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided information or past conversations.
|
|
94
97
|
- Do not print verbatim Notes unless necessary.
|
|
95
98
|
|
|
96
|
-
|
|
99
|
+
Note: More information about you, the company or Khoj apps can be found at https://khoj.dev.
|
|
100
|
+
Today is {day_of_week}, {current_date} in UTC.
|
|
97
101
|
|
|
98
102
|
Instructions:\n{bio}
|
|
99
|
-
|
|
103
|
+
""".strip()
|
|
100
104
|
)
|
|
101
105
|
|
|
102
106
|
## Notes Conversation
|
|
@@ -106,13 +110,15 @@ notes_conversation = PromptTemplate.from_template(
|
|
|
106
110
|
Use my personal notes and our past conversations to inform your response.
|
|
107
111
|
Ask crisp follow-up questions to get additional context, when a helpful response cannot be provided from the provided notes or past conversations.
|
|
108
112
|
|
|
109
|
-
Notes:
|
|
113
|
+
User's Notes:
|
|
110
114
|
{references}
|
|
111
115
|
""".strip()
|
|
112
116
|
)
|
|
113
117
|
|
|
114
118
|
notes_conversation_offline = PromptTemplate.from_template(
|
|
115
119
|
"""
|
|
120
|
+
Use my personal notes and our past conversations to inform your response.
|
|
121
|
+
|
|
116
122
|
User's Notes:
|
|
117
123
|
{references}
|
|
118
124
|
""".strip()
|
|
@@ -174,6 +180,15 @@ Information from the internet:
|
|
|
174
180
|
""".strip()
|
|
175
181
|
)
|
|
176
182
|
|
|
183
|
+
online_search_conversation_offline = PromptTemplate.from_template(
|
|
184
|
+
"""
|
|
185
|
+
Use this up-to-date information from the internet to inform your response.
|
|
186
|
+
|
|
187
|
+
Information from the internet:
|
|
188
|
+
{online_results}
|
|
189
|
+
""".strip()
|
|
190
|
+
)
|
|
191
|
+
|
|
177
192
|
## Query prompt
|
|
178
193
|
## --
|
|
179
194
|
query_prompt = PromptTemplate.from_template(
|
|
@@ -186,15 +201,16 @@ Query: {query}""".strip()
|
|
|
186
201
|
## --
|
|
187
202
|
extract_questions_offline = PromptTemplate.from_template(
|
|
188
203
|
"""
|
|
189
|
-
You are Khoj, an extremely smart and helpful search assistant with the ability to retrieve information from the user's notes.
|
|
190
|
-
|
|
204
|
+
You are Khoj, an extremely smart and helpful search assistant with the ability to retrieve information from the user's notes. Disregard online search requests.
|
|
205
|
+
Construct search queries to retrieve relevant information to answer the user's question.
|
|
206
|
+
- You will be provided past questions(Q) and answers(Khoj) for context.
|
|
191
207
|
- Try to be as specific as possible. Instead of saying "they" or "it" or "he", use proper nouns like name of the person or thing you are referring to.
|
|
192
208
|
- Add as much context from the previous questions and answers as required into your search queries.
|
|
193
209
|
- Break messages into multiple search queries when required to retrieve the relevant information.
|
|
194
210
|
- Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
|
|
195
211
|
- Share relevant search queries as a JSON list of strings. Do not say anything else.
|
|
196
212
|
|
|
197
|
-
Current Date: {current_date}
|
|
213
|
+
Current Date: {day_of_week}, {current_date}
|
|
198
214
|
User's Location: {location}
|
|
199
215
|
|
|
200
216
|
Examples:
|
|
@@ -232,7 +248,8 @@ Q: {query}
|
|
|
232
248
|
|
|
233
249
|
extract_questions = PromptTemplate.from_template(
|
|
234
250
|
"""
|
|
235
|
-
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes. Disregard online search requests.
|
|
251
|
+
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes. Disregard online search requests.
|
|
252
|
+
Construct search queries to retrieve relevant information to answer the user's question.
|
|
236
253
|
- You will be provided past questions(Q) and answers(A) for context.
|
|
237
254
|
- Add as much context from the previous questions and answers as required into your search queries.
|
|
238
255
|
- Break messages into multiple search queries when required to retrieve the relevant information.
|
|
@@ -282,8 +299,9 @@ Khoj:
|
|
|
282
299
|
|
|
283
300
|
extract_questions_anthropic_system_prompt = PromptTemplate.from_template(
|
|
284
301
|
"""
|
|
285
|
-
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes. Disregard online search requests.
|
|
286
|
-
|
|
302
|
+
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes. Disregard online search requests.
|
|
303
|
+
Construct search queries to retrieve relevant information to answer the user's question.
|
|
304
|
+
- You will be provided past questions(User), extracted queries(Assistant) and answers(A) for context.
|
|
287
305
|
- Add as much context from the previous questions and answers as required into your search queries.
|
|
288
306
|
- Break messages into multiple search queries when required to retrieve the relevant information.
|
|
289
307
|
- Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
|
|
@@ -297,15 +315,19 @@ Here are some examples of how you can construct search queries to answer the use
|
|
|
297
315
|
|
|
298
316
|
User: How was my trip to Cambodia?
|
|
299
317
|
Assistant: {{"queries": ["How was my trip to Cambodia?"]}}
|
|
318
|
+
A: The trip was amazing. You went to the Angkor Wat temple and it was beautiful.
|
|
300
319
|
|
|
301
320
|
User: What national parks did I go to last year?
|
|
302
321
|
Assistant: {{"queries": ["National park I visited in {last_new_year} dt>='{last_new_year_date}' dt<'{current_new_year_date}'"]}}
|
|
322
|
+
A: You visited the Grand Canyon and Yellowstone National Park in {last_new_year}.
|
|
303
323
|
|
|
304
324
|
User: How can you help me?
|
|
305
325
|
Assistant: {{"queries": ["Social relationships", "Physical and mental health", "Education and career", "Personal life goals and habits"]}}
|
|
326
|
+
A: I can help you live healthier and happier across work and personal life
|
|
306
327
|
|
|
307
328
|
User: Who all did I meet here yesterday?
|
|
308
329
|
Assistant: {{"queries": ["Met in {location} on {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]}}
|
|
330
|
+
A: Yesterday's note mentions your visit to your local beach with Ram and Shyam.
|
|
309
331
|
""".strip()
|
|
310
332
|
)
|
|
311
333
|
|
|
@@ -319,7 +341,11 @@ Assistant:
|
|
|
319
341
|
""".strip()
|
|
320
342
|
)
|
|
321
343
|
|
|
322
|
-
system_prompt_extract_relevant_information = """
|
|
344
|
+
system_prompt_extract_relevant_information = """
|
|
345
|
+
As a professional analyst, create a comprehensive report of the most relevant information from a web page in response to a user's query.
|
|
346
|
+
The text provided is directly from within the web page.
|
|
347
|
+
The report you create should be multiple paragraphs, and it should represent the content of the website.
|
|
348
|
+
Tell the user exactly what the website says in response to their query, while adhering to these guidelines:
|
|
323
349
|
|
|
324
350
|
1. Answer the user's query as specifically as possible. Include many supporting details from the website.
|
|
325
351
|
2. Craft a report that is detailed, thorough, in-depth, and complex, while maintaining clarity.
|
|
@@ -340,7 +366,11 @@ Collate only relevant information from the website to answer the target query.
|
|
|
340
366
|
""".strip()
|
|
341
367
|
)
|
|
342
368
|
|
|
343
|
-
system_prompt_extract_relevant_summary = """
|
|
369
|
+
system_prompt_extract_relevant_summary = """
|
|
370
|
+
As a professional analyst, create a comprehensive report of the most relevant information from the document in response to a user's query.
|
|
371
|
+
The text provided is directly from within the document.
|
|
372
|
+
The report you create should be multiple paragraphs, and it should represent the content of the document.
|
|
373
|
+
Tell the user exactly what the document says in response to their query, while adhering to these guidelines:
|
|
344
374
|
|
|
345
375
|
1. Answer the user's query as specifically as possible. Include many supporting details from the document.
|
|
346
376
|
2. Craft a report that is detailed, thorough, in-depth, and complex, while maintaining clarity.
|
|
@@ -363,11 +393,13 @@ Collate only relevant information from the document to answer the target query.
|
|
|
363
393
|
|
|
364
394
|
pick_relevant_output_mode = PromptTemplate.from_template(
|
|
365
395
|
"""
|
|
366
|
-
You are Khoj, an excellent analyst for selecting the correct way to respond to a user's query.
|
|
396
|
+
You are Khoj, an excellent analyst for selecting the correct way to respond to a user's query.
|
|
397
|
+
You have access to a limited set of modes for your response.
|
|
398
|
+
You can only use one of these modes.
|
|
367
399
|
|
|
368
400
|
{modes}
|
|
369
401
|
|
|
370
|
-
Here are some
|
|
402
|
+
Here are some examples:
|
|
371
403
|
|
|
372
404
|
Example:
|
|
373
405
|
Chat History:
|
|
@@ -383,7 +415,7 @@ User: I'm having trouble deciding which laptop to get. I want something with at
|
|
|
383
415
|
AI: I can help with that. I see online that there is a new model of the Dell XPS 15 that meets your requirements.
|
|
384
416
|
|
|
385
417
|
Q: What are the specs of the new Dell XPS 15?
|
|
386
|
-
Khoj:
|
|
418
|
+
Khoj: text
|
|
387
419
|
|
|
388
420
|
Example:
|
|
389
421
|
Chat History:
|
|
@@ -391,7 +423,7 @@ User: Where did I go on my last vacation?
|
|
|
391
423
|
AI: You went to Jordan and visited Petra, the Dead Sea, and Wadi Rum.
|
|
392
424
|
|
|
393
425
|
Q: Remind me who did I go with on that trip?
|
|
394
|
-
Khoj:
|
|
426
|
+
Khoj: text
|
|
395
427
|
|
|
396
428
|
Example:
|
|
397
429
|
Chat History:
|
|
@@ -399,7 +431,7 @@ User: How's the weather outside? Current Location: Bali, Indonesia
|
|
|
399
431
|
AI: It's currently 28°C and partly cloudy in Bali.
|
|
400
432
|
|
|
401
433
|
Q: Share a painting using the weather for Bali every morning.
|
|
402
|
-
Khoj:
|
|
434
|
+
Khoj: automation
|
|
403
435
|
|
|
404
436
|
Now it's your turn to pick the mode you would like to use to answer the user's question. Provide your response as a string.
|
|
405
437
|
|
|
@@ -422,7 +454,7 @@ Which of the data sources listed below you would use to answer the user's questi
|
|
|
422
454
|
|
|
423
455
|
{tools}
|
|
424
456
|
|
|
425
|
-
Here are some
|
|
457
|
+
Here are some examples:
|
|
426
458
|
|
|
427
459
|
Example:
|
|
428
460
|
Chat History:
|
|
@@ -533,10 +565,10 @@ You are Khoj, an advanced google search assistant. You are tasked with construct
|
|
|
533
565
|
- Break messages into multiple search queries when required to retrieve the relevant information.
|
|
534
566
|
- Use site: google search operators when appropriate
|
|
535
567
|
- You have access to the the whole internet to retrieve information.
|
|
536
|
-
- Official, up-to-date information about you, Khoj, is available at site:khoj.dev
|
|
568
|
+
- Official, up-to-date information about you, Khoj, is available at site:khoj.dev, github or pypi.
|
|
537
569
|
|
|
538
570
|
What Google searches, if any, will you need to perform to answer the user's question?
|
|
539
|
-
Provide search queries as a list of strings in a JSON object.
|
|
571
|
+
Provide search queries as a list of strings in a JSON object. Do not wrap the json in a codeblock.
|
|
540
572
|
Current Date: {current_date}
|
|
541
573
|
User's Location: {location}
|
|
542
574
|
|
|
@@ -589,7 +621,6 @@ Q: How many oranges would fit in NASA's Saturn V rocket?
|
|
|
589
621
|
Khoj: {{"queries": ["volume of an orange", "volume of saturn v rocket"]}}
|
|
590
622
|
|
|
591
623
|
Now it's your turn to construct Google search queries to answer the user's question. Provide them as a list of strings in a JSON object. Do not say anything else.
|
|
592
|
-
Now it's your turn to construct a search query for Google to answer the user's question.
|
|
593
624
|
History:
|
|
594
625
|
{chat_history}
|
|
595
626
|
|
|
@@ -186,7 +186,7 @@ def generate_chatml_messages_with_context(
|
|
|
186
186
|
|
|
187
187
|
def truncate_messages(
|
|
188
188
|
messages: list[ChatMessage],
|
|
189
|
-
max_prompt_size,
|
|
189
|
+
max_prompt_size: int,
|
|
190
190
|
model_name: str,
|
|
191
191
|
loaded_model: Optional[Llama] = None,
|
|
192
192
|
tokenizer_name=None,
|
|
@@ -232,7 +232,8 @@ def truncate_messages(
|
|
|
232
232
|
tokens = sum([len(encoder.encode(message.content)) for message in messages if type(message.content) == str])
|
|
233
233
|
|
|
234
234
|
# Drop older messages until under max supported prompt size by model
|
|
235
|
-
|
|
235
|
+
# Reserves 4 tokens to demarcate each message (e.g <|im_start|>user, <|im_end|>, <|endoftext|> etc.)
|
|
236
|
+
while (tokens + system_message_tokens + 4 * len(messages)) > max_prompt_size and len(messages) > 1:
|
|
236
237
|
messages.pop()
|
|
237
238
|
tokens = sum([len(encoder.encode(message.content)) for message in messages if type(message.content) == str])
|
|
238
239
|
|
|
@@ -254,6 +255,8 @@ def truncate_messages(
|
|
|
254
255
|
f"Truncate current message to fit within max prompt size of {max_prompt_size} supported by {model_name} model:\n {truncated_message}"
|
|
255
256
|
)
|
|
256
257
|
|
|
258
|
+
if system_message:
|
|
259
|
+
system_message.role = "user" if "gemma-2" in model_name else "system"
|
|
257
260
|
return messages + [system_message] if system_message else messages
|
|
258
261
|
|
|
259
262
|
|
khoj/routers/api.py
CHANGED
|
@@ -317,7 +317,7 @@ async def extract_references_and_questions(
|
|
|
317
317
|
|
|
318
318
|
filters_in_query += " ".join([f'file:"{filter}"' for filter in conversation.file_filters])
|
|
319
319
|
using_offline_chat = False
|
|
320
|
-
|
|
320
|
+
logger.debug(f"Filters in query: {filters_in_query}")
|
|
321
321
|
|
|
322
322
|
# Infer search queries from user message
|
|
323
323
|
with timer("Extracting search queries took", logger):
|
|
@@ -335,6 +335,7 @@ async def extract_references_and_questions(
|
|
|
335
335
|
|
|
336
336
|
inferred_queries = extract_questions_offline(
|
|
337
337
|
defiltered_query,
|
|
338
|
+
model=chat_model,
|
|
338
339
|
loaded_model=loaded_model,
|
|
339
340
|
conversation_log=meta_log,
|
|
340
341
|
should_extract_questions=True,
|
khoj/routers/helpers.py
CHANGED
|
@@ -307,6 +307,7 @@ async def aget_relevant_output_modes(query: str, conversation_history: dict, is_
|
|
|
307
307
|
# Check whether the tool exists as a valid ConversationCommand
|
|
308
308
|
return ConversationCommand(response)
|
|
309
309
|
|
|
310
|
+
logger.error(f"Invalid output mode selected: {response}. Defaulting to text.")
|
|
310
311
|
return ConversationCommand.Text
|
|
311
312
|
except Exception:
|
|
312
313
|
logger.error(f"Invalid response for determining relevant mode: {response}")
|
|
@@ -519,9 +520,6 @@ async def send_message_to_model_wrapper(
|
|
|
519
520
|
chat_model_option or await ConversationAdapters.aget_default_conversation_config()
|
|
520
521
|
)
|
|
521
522
|
|
|
522
|
-
if conversation_config is None:
|
|
523
|
-
raise HTTPException(status_code=500, detail="Contact the server administrator to set a default chat model.")
|
|
524
|
-
|
|
525
523
|
chat_model = conversation_config.chat_model
|
|
526
524
|
max_tokens = conversation_config.max_prompt_size
|
|
527
525
|
tokenizer = conversation_config.tokenizer
|
khoj/routers/indexer.py
CHANGED
|
@@ -60,7 +60,7 @@ async def update(
|
|
|
60
60
|
indexed_data_limiter: ApiIndexedDataLimiter = Depends(
|
|
61
61
|
ApiIndexedDataLimiter(
|
|
62
62
|
incoming_entries_size_limit=10,
|
|
63
|
-
subscribed_incoming_entries_size_limit=
|
|
63
|
+
subscribed_incoming_entries_size_limit=75,
|
|
64
64
|
total_entries_size_limit=10,
|
|
65
65
|
subscribed_total_entries_size_limit=100,
|
|
66
66
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: khoj
|
|
3
|
-
Version: 1.16.1.
|
|
3
|
+
Version: 1.16.1.dev26
|
|
4
4
|
Summary: An AI copilot for your Second Brain
|
|
5
5
|
Project-URL: Homepage, https://khoj.dev
|
|
6
6
|
Project-URL: Documentation, https://docs.khoj.dev
|
|
@@ -43,7 +43,7 @@ Requires-Dist: jinja2==3.1.4
|
|
|
43
43
|
Requires-Dist: langchain-community==0.2.5
|
|
44
44
|
Requires-Dist: langchain-openai==0.1.7
|
|
45
45
|
Requires-Dist: langchain==0.2.5
|
|
46
|
-
Requires-Dist: llama-cpp-python==0.2.
|
|
46
|
+
Requires-Dist: llama-cpp-python==0.2.82
|
|
47
47
|
Requires-Dist: lxml==4.9.3
|
|
48
48
|
Requires-Dist: magika~=0.5.1
|
|
49
49
|
Requires-Dist: markdown-it-py~=3.0.0
|
|
@@ -104,7 +104,7 @@ Description-Content-Type: text/markdown
|
|
|
104
104
|
|
|
105
105
|
[](https://github.com/khoj-ai/khoj/actions/workflows/test.yml)
|
|
106
106
|
[](https://github.com/khoj-ai/khoj/pkgs/container/khoj)
|
|
107
|
-
[](https://pypi.org/project/khoj
|
|
107
|
+
[](https://pypi.org/project/khoj/)
|
|
108
108
|

|
|
109
109
|
|
|
110
110
|
</div>
|
|
@@ -11,7 +11,7 @@ khoj/database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
11
11
|
khoj/database/admin.py,sha256=P1zguxSlqnpCvF7CcF8wS9tGCsO3SNDnl6-C_9eyYFA,8814
|
|
12
12
|
khoj/database/apps.py,sha256=pM4tkX5Odw4YW_hLLKK8Nd5kqGddf1en0oMCea44RZw,153
|
|
13
13
|
khoj/database/tests.py,sha256=mrbGGRNg5jwbTJtWWa7zSKdDyeB4vmgZCRc2nk6VY-g,60
|
|
14
|
-
khoj/database/adapters/__init__.py,sha256=
|
|
14
|
+
khoj/database/adapters/__init__.py,sha256=8vLKZkTMaXXRCv4bOzZNyu_Ihk5EFfJET8y0TjCcx00,48134
|
|
15
15
|
khoj/database/management/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
khoj/database/management/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
17
|
khoj/database/management/commands/change_generated_images_url.py,sha256=w52FwRlyOL4YRpp9O6jJUjSIuGLxVhaS2w1D7gtQgOE,2644
|
|
@@ -184,17 +184,17 @@ khoj/processor/content/pdf/pdf_to_entries.py,sha256=wGXSmxifrjRE9eJWoFokI0G66kyM
|
|
|
184
184
|
khoj/processor/content/plaintext/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
185
185
|
khoj/processor/content/plaintext/plaintext_to_entries.py,sha256=4JpWWulSjzrYsGNLPt7Z0qZykjU6isF58vx-0RJjQRw,5127
|
|
186
186
|
khoj/processor/conversation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
187
|
-
khoj/processor/conversation/prompts.py,sha256=
|
|
188
|
-
khoj/processor/conversation/utils.py,sha256=
|
|
187
|
+
khoj/processor/conversation/prompts.py,sha256=8X0FzJ8iZR0lTdNtCv0WmGPTm4EyP-YZiKJsluZkC9g,32086
|
|
188
|
+
khoj/processor/conversation/utils.py,sha256=3sivb-99M8keN-YdZ81Nj-_RJoRh3KmcwwOhJ_nvN7I,10909
|
|
189
189
|
khoj/processor/conversation/anthropic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
190
|
-
khoj/processor/conversation/anthropic/anthropic_chat.py,sha256=
|
|
190
|
+
khoj/processor/conversation/anthropic/anthropic_chat.py,sha256=7yBlY26PNI4wzwHph4TTQ_tJlLmoNjiXyhzHPJeUVmI,7887
|
|
191
191
|
khoj/processor/conversation/anthropic/utils.py,sha256=uc9d_gIk4Ux2NRlkw3FP9L9KeLRoUI7nC_qb2Qp6d_4,3253
|
|
192
192
|
khoj/processor/conversation/offline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
193
|
-
khoj/processor/conversation/offline/chat_model.py,sha256=
|
|
194
|
-
khoj/processor/conversation/offline/utils.py,sha256
|
|
193
|
+
khoj/processor/conversation/offline/chat_model.py,sha256=7OIexLDXfdyF2jk-JFMqojNW-grlc2jtCt1zxCO-VAY,9349
|
|
194
|
+
khoj/processor/conversation/offline/utils.py,sha256=n2T3vwAIZnSe9-UN1VORLPrLEUcamXXE9isL2ie-9R8,3033
|
|
195
195
|
khoj/processor/conversation/offline/whisper.py,sha256=DJI-8y8DULO2cQ49m2VOvRyIZ2TxBypc15gM8O3HuMI,470
|
|
196
196
|
khoj/processor/conversation/openai/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
197
|
-
khoj/processor/conversation/openai/gpt.py,sha256=
|
|
197
|
+
khoj/processor/conversation/openai/gpt.py,sha256=bbEyiOeRgko65bLsFHNlkMw1cvbiawQsjmDipCpy0E4,7111
|
|
198
198
|
khoj/processor/conversation/openai/utils.py,sha256=UCfu-dHnkgcKxMajUaWnlxW5Zdidqecv5jIdmimie6o,4067
|
|
199
199
|
khoj/processor/conversation/openai/whisper.py,sha256=RuwDtxSJrVWYdZz4aVnk0XiMQy9w8W9lFcVfE0hMiFY,432
|
|
200
200
|
khoj/processor/speech/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -202,15 +202,15 @@ khoj/processor/speech/text_to_speech.py,sha256=Q7sapi5Hv6woXOumtrGqR0t6izZrFBkWX
|
|
|
202
202
|
khoj/processor/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
203
203
|
khoj/processor/tools/online_search.py,sha256=FGAu-FmD158ILXoVo3j2fbqM5SnaWusKV39o68GhvQY,9373
|
|
204
204
|
khoj/routers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
205
|
-
khoj/routers/api.py,sha256=
|
|
205
|
+
khoj/routers/api.py,sha256=pklt2YjV8H68jViAa_kvW8_ImlRHT16SytGRRcBhojs,23731
|
|
206
206
|
khoj/routers/api_agents.py,sha256=A8gS-LIgg5cjV9Ec7P-TgCmmylaNtT43TqXcxIia3PQ,1443
|
|
207
207
|
khoj/routers/api_chat.py,sha256=SdBkCveilzM75uyBc0RAJEk-pOolQdl_Xa7L2B3ceQA,45409
|
|
208
208
|
khoj/routers/api_config.py,sha256=qK4he5cH-Rdx5WagOOfoNRpHuJUoD8ja4zbl0ksBh9M,13446
|
|
209
209
|
khoj/routers/api_phone.py,sha256=p9yfc4WeMHDC0hg3aQk60a2VBy8rZPdEnz9wdJ7DzkU,2208
|
|
210
210
|
khoj/routers/auth.py,sha256=pCOLSRihJWcn097DRPxLjPdlejsjHJFRs9jHIzLujZU,6247
|
|
211
211
|
khoj/routers/email.py,sha256=hhxyLdmgSUfeHTXTco7g4aWDAEaVzF82mV-hshxzHsg,3695
|
|
212
|
-
khoj/routers/helpers.py,sha256=
|
|
213
|
-
khoj/routers/indexer.py,sha256=
|
|
212
|
+
khoj/routers/helpers.py,sha256=kM3dQcGDFY7q5BXy3ygflAW3NllhrBmiqPZg-2qzUCc,47351
|
|
213
|
+
khoj/routers/indexer.py,sha256=CnXbxJ5ZCBBuNvJalGyMNFa4FBa47loEjSiPz_KjMhQ,12560
|
|
214
214
|
khoj/routers/notion.py,sha256=DaEvbBLqg0F7UArRalKZqzvl0fxW7snShByo-BOyRO8,3137
|
|
215
215
|
khoj/routers/storage.py,sha256=9ZfBsr_omxdFV-Lcj6p30xTQcF_7wwCZ9XFJukzjITE,1429
|
|
216
216
|
khoj/routers/subscription.py,sha256=qEyV7m7mrY6MGtaij8W3v61tpzX2a7ydm2B-E8h_R-M,4285
|
|
@@ -235,8 +235,8 @@ khoj/utils/models.py,sha256=Q5tcC9-z25sCiub048fLnvZ6_IIO1bcPNxt5payekk0,2009
|
|
|
235
235
|
khoj/utils/rawconfig.py,sha256=iqyidXyM4AM6QAPLLMFrMuYR6dYplj374UBNFDOBwkY,3892
|
|
236
236
|
khoj/utils/state.py,sha256=x4GTewP1YhOA6c_32N4wOjnV-3AA3xG_qbY1-wC2Uxc,1559
|
|
237
237
|
khoj/utils/yaml.py,sha256=H0mfw0ZvBFUvFmCQn8pWkfxdmIebsrSykza7D8Wv6wQ,1430
|
|
238
|
-
khoj-1.16.1.
|
|
239
|
-
khoj-1.16.1.
|
|
240
|
-
khoj-1.16.1.
|
|
241
|
-
khoj-1.16.1.
|
|
242
|
-
khoj-1.16.1.
|
|
238
|
+
khoj-1.16.1.dev26.dist-info/METADATA,sha256=4mUyWvFUpTjeqLtPDArx7UK8DHMihvBS2ibt8cLty5o,6894
|
|
239
|
+
khoj-1.16.1.dev26.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
240
|
+
khoj-1.16.1.dev26.dist-info/entry_points.txt,sha256=KBIcez5N_jCgq_ER4Uxf-e1lxTBMTE_BBjMwwfeZyAg,39
|
|
241
|
+
khoj-1.16.1.dev26.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
242
|
+
khoj-1.16.1.dev26.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|