khoj 1.16.1.dev15__py3-none-any.whl → 1.16.1.dev47__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +9 -10
- khoj/interface/web/chat.html +186 -296
- khoj/processor/conversation/anthropic/anthropic_chat.py +10 -4
- khoj/processor/conversation/offline/chat_model.py +19 -7
- khoj/processor/conversation/offline/utils.py +2 -0
- khoj/processor/conversation/openai/gpt.py +9 -3
- khoj/processor/conversation/prompts.py +56 -25
- khoj/processor/conversation/utils.py +5 -6
- khoj/processor/tools/online_search.py +13 -7
- khoj/routers/api.py +12 -7
- khoj/routers/api_chat.py +264 -483
- khoj/routers/helpers.py +33 -21
- khoj/routers/indexer.py +1 -1
- khoj/utils/fs_syncer.py +1 -1
- {khoj-1.16.1.dev15.dist-info → khoj-1.16.1.dev47.dist-info}/METADATA +2 -2
- {khoj-1.16.1.dev15.dist-info → khoj-1.16.1.dev47.dist-info}/RECORD +19 -19
- {khoj-1.16.1.dev15.dist-info → khoj-1.16.1.dev47.dist-info}/WHEEL +0 -0
- {khoj-1.16.1.dev15.dist-info → khoj-1.16.1.dev47.dist-info}/entry_points.txt +0 -0
- {khoj-1.16.1.dev15.dist-info → khoj-1.16.1.dev47.dist-info}/licenses/LICENSE +0 -0
|
@@ -36,7 +36,7 @@ def extract_questions_anthropic(
|
|
|
36
36
|
# Extract Past User Message and Inferred Questions from Conversation Log
|
|
37
37
|
chat_history = "".join(
|
|
38
38
|
[
|
|
39
|
-
f'
|
|
39
|
+
f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
|
|
40
40
|
for chat in conversation_log.get("chat", [])[-4:]
|
|
41
41
|
if chat["by"] == "khoj" and "text-to-image" not in chat["intent"].get("type")
|
|
42
42
|
]
|
|
@@ -135,17 +135,23 @@ def converse_anthropic(
|
|
|
135
135
|
Converse with user using Anthropic's Claude
|
|
136
136
|
"""
|
|
137
137
|
# Initialize Variables
|
|
138
|
-
current_date = datetime.now()
|
|
138
|
+
current_date = datetime.now()
|
|
139
139
|
compiled_references = "\n\n".join({f"# {item}" for item in references})
|
|
140
140
|
|
|
141
141
|
conversation_primer = prompts.query_prompt.format(query=user_query)
|
|
142
142
|
|
|
143
143
|
if agent and agent.personality:
|
|
144
144
|
system_prompt = prompts.custom_personality.format(
|
|
145
|
-
name=agent.name,
|
|
145
|
+
name=agent.name,
|
|
146
|
+
bio=agent.personality,
|
|
147
|
+
current_date=current_date.strftime("%Y-%m-%d"),
|
|
148
|
+
day_of_week=current_date.strftime("%A"),
|
|
146
149
|
)
|
|
147
150
|
else:
|
|
148
|
-
system_prompt = prompts.personality.format(
|
|
151
|
+
system_prompt = prompts.personality.format(
|
|
152
|
+
current_date=current_date.strftime("%Y-%m-%d"),
|
|
153
|
+
day_of_week=current_date.strftime("%A"),
|
|
154
|
+
)
|
|
149
155
|
|
|
150
156
|
if location_data:
|
|
151
157
|
location = f"{location_data.city}, {location_data.region}, {location_data.country}"
|
|
@@ -55,6 +55,7 @@ def extract_questions_offline(
|
|
|
55
55
|
chat_history += f"Q: {chat['intent']['query']}\n"
|
|
56
56
|
chat_history += f"Khoj: {chat['message']}\n\n"
|
|
57
57
|
|
|
58
|
+
# Get dates relative to today for prompt creation
|
|
58
59
|
today = datetime.today()
|
|
59
60
|
yesterday = (today - timedelta(days=1)).strftime("%Y-%m-%d")
|
|
60
61
|
last_year = today.year - 1
|
|
@@ -62,11 +63,13 @@ def extract_questions_offline(
|
|
|
62
63
|
query=text,
|
|
63
64
|
chat_history=chat_history,
|
|
64
65
|
current_date=today.strftime("%Y-%m-%d"),
|
|
66
|
+
day_of_week=today.strftime("%A"),
|
|
65
67
|
yesterday_date=yesterday,
|
|
66
68
|
last_year=last_year,
|
|
67
69
|
this_year=today.year,
|
|
68
70
|
location=location,
|
|
69
71
|
)
|
|
72
|
+
|
|
70
73
|
messages = generate_chatml_messages_with_context(
|
|
71
74
|
example_questions, model_name=model, loaded_model=offline_chat_model, max_prompt_size=max_prompt_size
|
|
72
75
|
)
|
|
@@ -74,7 +77,7 @@ def extract_questions_offline(
|
|
|
74
77
|
state.chat_lock.acquire()
|
|
75
78
|
try:
|
|
76
79
|
response = send_message_to_model_offline(
|
|
77
|
-
messages, loaded_model=offline_chat_model, max_prompt_size=max_prompt_size
|
|
80
|
+
messages, loaded_model=offline_chat_model, model=model, max_prompt_size=max_prompt_size
|
|
78
81
|
)
|
|
79
82
|
finally:
|
|
80
83
|
state.chat_lock.release()
|
|
@@ -96,7 +99,7 @@ def extract_questions_offline(
|
|
|
96
99
|
except:
|
|
97
100
|
logger.warning(f"Llama returned invalid JSON. Falling back to using user message as search query.\n{response}")
|
|
98
101
|
return all_questions
|
|
99
|
-
logger.debug(f"
|
|
102
|
+
logger.debug(f"Questions extracted by {model}: {questions}")
|
|
100
103
|
return questions
|
|
101
104
|
|
|
102
105
|
|
|
@@ -144,14 +147,20 @@ def converse_offline(
|
|
|
144
147
|
offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
|
|
145
148
|
compiled_references_message = "\n\n".join({f"{item['compiled']}" for item in references})
|
|
146
149
|
|
|
147
|
-
current_date = datetime.now()
|
|
150
|
+
current_date = datetime.now()
|
|
148
151
|
|
|
149
152
|
if agent and agent.personality:
|
|
150
153
|
system_prompt = prompts.custom_system_prompt_offline_chat.format(
|
|
151
|
-
name=agent.name,
|
|
154
|
+
name=agent.name,
|
|
155
|
+
bio=agent.personality,
|
|
156
|
+
current_date=current_date.strftime("%Y-%m-%d"),
|
|
157
|
+
day_of_week=current_date.strftime("%A"),
|
|
152
158
|
)
|
|
153
159
|
else:
|
|
154
|
-
system_prompt = prompts.system_prompt_offline_chat.format(
|
|
160
|
+
system_prompt = prompts.system_prompt_offline_chat.format(
|
|
161
|
+
current_date=current_date.strftime("%Y-%m-%d"),
|
|
162
|
+
day_of_week=current_date.strftime("%A"),
|
|
163
|
+
)
|
|
155
164
|
|
|
156
165
|
conversation_primer = prompts.query_prompt.format(query=user_query)
|
|
157
166
|
|
|
@@ -177,9 +186,9 @@ def converse_offline(
|
|
|
177
186
|
if online_results[result].get("webpages"):
|
|
178
187
|
simplified_online_results[result] = online_results[result]["webpages"]
|
|
179
188
|
|
|
180
|
-
conversation_primer = f"{prompts.
|
|
189
|
+
conversation_primer = f"{prompts.online_search_conversation_offline.format(online_results=str(simplified_online_results))}\n{conversation_primer}"
|
|
181
190
|
if not is_none_or_empty(compiled_references_message):
|
|
182
|
-
conversation_primer = f"{prompts.notes_conversation_offline.format(references=compiled_references_message)}\n{conversation_primer}"
|
|
191
|
+
conversation_primer = f"{prompts.notes_conversation_offline.format(references=compiled_references_message)}\n\n{conversation_primer}"
|
|
183
192
|
|
|
184
193
|
# Setup Prompt with Primer or Conversation History
|
|
185
194
|
messages = generate_chatml_messages_with_context(
|
|
@@ -192,6 +201,9 @@ def converse_offline(
|
|
|
192
201
|
tokenizer_name=tokenizer_name,
|
|
193
202
|
)
|
|
194
203
|
|
|
204
|
+
truncated_messages = "\n".join({f"{message.content[:70]}..." for message in messages})
|
|
205
|
+
logger.debug(f"Conversation Context for {model}: {truncated_messages}")
|
|
206
|
+
|
|
195
207
|
g = ThreadedGenerator(references, online_results, completion_func=completion_func)
|
|
196
208
|
t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size))
|
|
197
209
|
t.start()
|
|
@@ -24,6 +24,8 @@ def download_model(repo_id: str, filename: str = "*Q4_K_M.gguf", max_tokens: int
|
|
|
24
24
|
# Add chat format if known
|
|
25
25
|
if "llama-3" in repo_id.lower():
|
|
26
26
|
kwargs["chat_format"] = "llama-3"
|
|
27
|
+
elif "gemma-2" in repo_id.lower():
|
|
28
|
+
kwargs["chat_format"] = "gemma"
|
|
27
29
|
|
|
28
30
|
# Check if the model is already downloaded
|
|
29
31
|
model_path = load_model_from_cache(repo_id, filename)
|
|
@@ -125,17 +125,23 @@ def converse(
|
|
|
125
125
|
Converse with user using OpenAI's ChatGPT
|
|
126
126
|
"""
|
|
127
127
|
# Initialize Variables
|
|
128
|
-
current_date = datetime.now()
|
|
128
|
+
current_date = datetime.now()
|
|
129
129
|
compiled_references = "\n\n".join({f"# {item['compiled']}" for item in references})
|
|
130
130
|
|
|
131
131
|
conversation_primer = prompts.query_prompt.format(query=user_query)
|
|
132
132
|
|
|
133
133
|
if agent and agent.personality:
|
|
134
134
|
system_prompt = prompts.custom_personality.format(
|
|
135
|
-
name=agent.name,
|
|
135
|
+
name=agent.name,
|
|
136
|
+
bio=agent.personality,
|
|
137
|
+
current_date=current_date.strftime("%Y-%m-%d"),
|
|
138
|
+
day_of_week=current_date.strftime("%A"),
|
|
136
139
|
)
|
|
137
140
|
else:
|
|
138
|
-
system_prompt = prompts.personality.format(
|
|
141
|
+
system_prompt = prompts.personality.format(
|
|
142
|
+
current_date=current_date.strftime("%Y-%m-%d"),
|
|
143
|
+
day_of_week=current_date.strftime("%A"),
|
|
144
|
+
)
|
|
139
145
|
|
|
140
146
|
if location_data:
|
|
141
147
|
location = f"{location_data.city}, {location_data.region}, {location_data.country}"
|
|
@@ -19,8 +19,8 @@ You were created by Khoj Inc. with the following capabilities:
|
|
|
19
19
|
- Sometimes the user will share personal information that needs to be remembered, like an account ID or a residential address. These can be acknowledged with a simple "Got it" or "Okay".
|
|
20
20
|
- Provide inline references to quotes from the user's notes or any web pages you refer to in your responses in markdown format. For example, "The farmer had ten sheep. [1](https://example.com)". *ALWAYS CITE YOUR SOURCES AND PROVIDE REFERENCES*. Add them inline to directly support your claim.
|
|
21
21
|
|
|
22
|
-
Note: More information about you, the company or Khoj apps
|
|
23
|
-
Today is {current_date} in UTC.
|
|
22
|
+
Note: More information about you, the company or Khoj apps can be found at https://khoj.dev.
|
|
23
|
+
Today is {day_of_week}, {current_date} in UTC.
|
|
24
24
|
""".strip()
|
|
25
25
|
)
|
|
26
26
|
|
|
@@ -39,7 +39,7 @@ You were created by Khoj Inc. with the following capabilities:
|
|
|
39
39
|
- Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided notes or past conversations.
|
|
40
40
|
- Sometimes the user will share personal information that needs to be remembered, like an account ID or a residential address. These can be acknowledged with a simple "Got it" or "Okay".
|
|
41
41
|
|
|
42
|
-
Today is {current_date} in UTC.
|
|
42
|
+
Today is {day_of_week}, {current_date} in UTC.
|
|
43
43
|
|
|
44
44
|
Instructions:\n{bio}
|
|
45
45
|
""".strip()
|
|
@@ -79,10 +79,12 @@ You are Khoj, a smart, inquisitive and helpful personal assistant.
|
|
|
79
79
|
- Use your general knowledge and past conversation with the user as context to inform your responses.
|
|
80
80
|
- If you do not know the answer, say 'I don't know.'
|
|
81
81
|
- Think step-by-step and ask questions to get the necessary information to answer the user's question.
|
|
82
|
+
- Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided information or past conversations.
|
|
82
83
|
- Do not print verbatim Notes unless necessary.
|
|
83
84
|
|
|
84
|
-
|
|
85
|
-
|
|
85
|
+
Note: More information about you, the company or Khoj apps can be found at https://khoj.dev.
|
|
86
|
+
Today is {day_of_week}, {current_date} in UTC.
|
|
87
|
+
""".strip()
|
|
86
88
|
)
|
|
87
89
|
|
|
88
90
|
custom_system_prompt_offline_chat = PromptTemplate.from_template(
|
|
@@ -91,12 +93,14 @@ You are {name}, a personal agent on Khoj.
|
|
|
91
93
|
- Use your general knowledge and past conversation with the user as context to inform your responses.
|
|
92
94
|
- If you do not know the answer, say 'I don't know.'
|
|
93
95
|
- Think step-by-step and ask questions to get the necessary information to answer the user's question.
|
|
96
|
+
- Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided information or past conversations.
|
|
94
97
|
- Do not print verbatim Notes unless necessary.
|
|
95
98
|
|
|
96
|
-
|
|
99
|
+
Note: More information about you, the company or Khoj apps can be found at https://khoj.dev.
|
|
100
|
+
Today is {day_of_week}, {current_date} in UTC.
|
|
97
101
|
|
|
98
102
|
Instructions:\n{bio}
|
|
99
|
-
|
|
103
|
+
""".strip()
|
|
100
104
|
)
|
|
101
105
|
|
|
102
106
|
## Notes Conversation
|
|
@@ -106,13 +110,15 @@ notes_conversation = PromptTemplate.from_template(
|
|
|
106
110
|
Use my personal notes and our past conversations to inform your response.
|
|
107
111
|
Ask crisp follow-up questions to get additional context, when a helpful response cannot be provided from the provided notes or past conversations.
|
|
108
112
|
|
|
109
|
-
Notes:
|
|
113
|
+
User's Notes:
|
|
110
114
|
{references}
|
|
111
115
|
""".strip()
|
|
112
116
|
)
|
|
113
117
|
|
|
114
118
|
notes_conversation_offline = PromptTemplate.from_template(
|
|
115
119
|
"""
|
|
120
|
+
Use my personal notes and our past conversations to inform your response.
|
|
121
|
+
|
|
116
122
|
User's Notes:
|
|
117
123
|
{references}
|
|
118
124
|
""".strip()
|
|
@@ -174,6 +180,15 @@ Information from the internet:
|
|
|
174
180
|
""".strip()
|
|
175
181
|
)
|
|
176
182
|
|
|
183
|
+
online_search_conversation_offline = PromptTemplate.from_template(
|
|
184
|
+
"""
|
|
185
|
+
Use this up-to-date information from the internet to inform your response.
|
|
186
|
+
|
|
187
|
+
Information from the internet:
|
|
188
|
+
{online_results}
|
|
189
|
+
""".strip()
|
|
190
|
+
)
|
|
191
|
+
|
|
177
192
|
## Query prompt
|
|
178
193
|
## --
|
|
179
194
|
query_prompt = PromptTemplate.from_template(
|
|
@@ -186,15 +201,16 @@ Query: {query}""".strip()
|
|
|
186
201
|
## --
|
|
187
202
|
extract_questions_offline = PromptTemplate.from_template(
|
|
188
203
|
"""
|
|
189
|
-
You are Khoj, an extremely smart and helpful search assistant with the ability to retrieve information from the user's notes.
|
|
190
|
-
|
|
204
|
+
You are Khoj, an extremely smart and helpful search assistant with the ability to retrieve information from the user's notes. Disregard online search requests.
|
|
205
|
+
Construct search queries to retrieve relevant information to answer the user's question.
|
|
206
|
+
- You will be provided past questions(Q) and answers(Khoj) for context.
|
|
191
207
|
- Try to be as specific as possible. Instead of saying "they" or "it" or "he", use proper nouns like name of the person or thing you are referring to.
|
|
192
208
|
- Add as much context from the previous questions and answers as required into your search queries.
|
|
193
209
|
- Break messages into multiple search queries when required to retrieve the relevant information.
|
|
194
210
|
- Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
|
|
195
211
|
- Share relevant search queries as a JSON list of strings. Do not say anything else.
|
|
196
212
|
|
|
197
|
-
Current Date: {current_date}
|
|
213
|
+
Current Date: {day_of_week}, {current_date}
|
|
198
214
|
User's Location: {location}
|
|
199
215
|
|
|
200
216
|
Examples:
|
|
@@ -232,7 +248,8 @@ Q: {query}
|
|
|
232
248
|
|
|
233
249
|
extract_questions = PromptTemplate.from_template(
|
|
234
250
|
"""
|
|
235
|
-
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes. Disregard online search requests.
|
|
251
|
+
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes. Disregard online search requests.
|
|
252
|
+
Construct search queries to retrieve relevant information to answer the user's question.
|
|
236
253
|
- You will be provided past questions(Q) and answers(A) for context.
|
|
237
254
|
- Add as much context from the previous questions and answers as required into your search queries.
|
|
238
255
|
- Break messages into multiple search queries when required to retrieve the relevant information.
|
|
@@ -282,8 +299,9 @@ Khoj:
|
|
|
282
299
|
|
|
283
300
|
extract_questions_anthropic_system_prompt = PromptTemplate.from_template(
|
|
284
301
|
"""
|
|
285
|
-
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes. Disregard online search requests.
|
|
286
|
-
|
|
302
|
+
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes. Disregard online search requests.
|
|
303
|
+
Construct search queries to retrieve relevant information to answer the user's question.
|
|
304
|
+
- You will be provided past questions(User), extracted queries(Assistant) and answers(A) for context.
|
|
287
305
|
- Add as much context from the previous questions and answers as required into your search queries.
|
|
288
306
|
- Break messages into multiple search queries when required to retrieve the relevant information.
|
|
289
307
|
- Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
|
|
@@ -297,15 +315,19 @@ Here are some examples of how you can construct search queries to answer the use
|
|
|
297
315
|
|
|
298
316
|
User: How was my trip to Cambodia?
|
|
299
317
|
Assistant: {{"queries": ["How was my trip to Cambodia?"]}}
|
|
318
|
+
A: The trip was amazing. You went to the Angkor Wat temple and it was beautiful.
|
|
300
319
|
|
|
301
320
|
User: What national parks did I go to last year?
|
|
302
321
|
Assistant: {{"queries": ["National park I visited in {last_new_year} dt>='{last_new_year_date}' dt<'{current_new_year_date}'"]}}
|
|
322
|
+
A: You visited the Grand Canyon and Yellowstone National Park in {last_new_year}.
|
|
303
323
|
|
|
304
324
|
User: How can you help me?
|
|
305
325
|
Assistant: {{"queries": ["Social relationships", "Physical and mental health", "Education and career", "Personal life goals and habits"]}}
|
|
326
|
+
A: I can help you live healthier and happier across work and personal life
|
|
306
327
|
|
|
307
328
|
User: Who all did I meet here yesterday?
|
|
308
329
|
Assistant: {{"queries": ["Met in {location} on {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]}}
|
|
330
|
+
A: Yesterday's note mentions your visit to your local beach with Ram and Shyam.
|
|
309
331
|
""".strip()
|
|
310
332
|
)
|
|
311
333
|
|
|
@@ -319,7 +341,11 @@ Assistant:
|
|
|
319
341
|
""".strip()
|
|
320
342
|
)
|
|
321
343
|
|
|
322
|
-
system_prompt_extract_relevant_information = """
|
|
344
|
+
system_prompt_extract_relevant_information = """
|
|
345
|
+
As a professional analyst, create a comprehensive report of the most relevant information from a web page in response to a user's query.
|
|
346
|
+
The text provided is directly from within the web page.
|
|
347
|
+
The report you create should be multiple paragraphs, and it should represent the content of the website.
|
|
348
|
+
Tell the user exactly what the website says in response to their query, while adhering to these guidelines:
|
|
323
349
|
|
|
324
350
|
1. Answer the user's query as specifically as possible. Include many supporting details from the website.
|
|
325
351
|
2. Craft a report that is detailed, thorough, in-depth, and complex, while maintaining clarity.
|
|
@@ -340,7 +366,11 @@ Collate only relevant information from the website to answer the target query.
|
|
|
340
366
|
""".strip()
|
|
341
367
|
)
|
|
342
368
|
|
|
343
|
-
system_prompt_extract_relevant_summary = """
|
|
369
|
+
system_prompt_extract_relevant_summary = """
|
|
370
|
+
As a professional analyst, create a comprehensive report of the most relevant information from the document in response to a user's query.
|
|
371
|
+
The text provided is directly from within the document.
|
|
372
|
+
The report you create should be multiple paragraphs, and it should represent the content of the document.
|
|
373
|
+
Tell the user exactly what the document says in response to their query, while adhering to these guidelines:
|
|
344
374
|
|
|
345
375
|
1. Answer the user's query as specifically as possible. Include many supporting details from the document.
|
|
346
376
|
2. Craft a report that is detailed, thorough, in-depth, and complex, while maintaining clarity.
|
|
@@ -363,11 +393,13 @@ Collate only relevant information from the document to answer the target query.
|
|
|
363
393
|
|
|
364
394
|
pick_relevant_output_mode = PromptTemplate.from_template(
|
|
365
395
|
"""
|
|
366
|
-
You are Khoj, an excellent analyst for selecting the correct way to respond to a user's query.
|
|
396
|
+
You are Khoj, an excellent analyst for selecting the correct way to respond to a user's query.
|
|
397
|
+
You have access to a limited set of modes for your response.
|
|
398
|
+
You can only use one of these modes.
|
|
367
399
|
|
|
368
400
|
{modes}
|
|
369
401
|
|
|
370
|
-
Here are some
|
|
402
|
+
Here are some examples:
|
|
371
403
|
|
|
372
404
|
Example:
|
|
373
405
|
Chat History:
|
|
@@ -383,7 +415,7 @@ User: I'm having trouble deciding which laptop to get. I want something with at
|
|
|
383
415
|
AI: I can help with that. I see online that there is a new model of the Dell XPS 15 that meets your requirements.
|
|
384
416
|
|
|
385
417
|
Q: What are the specs of the new Dell XPS 15?
|
|
386
|
-
Khoj:
|
|
418
|
+
Khoj: text
|
|
387
419
|
|
|
388
420
|
Example:
|
|
389
421
|
Chat History:
|
|
@@ -391,7 +423,7 @@ User: Where did I go on my last vacation?
|
|
|
391
423
|
AI: You went to Jordan and visited Petra, the Dead Sea, and Wadi Rum.
|
|
392
424
|
|
|
393
425
|
Q: Remind me who did I go with on that trip?
|
|
394
|
-
Khoj:
|
|
426
|
+
Khoj: text
|
|
395
427
|
|
|
396
428
|
Example:
|
|
397
429
|
Chat History:
|
|
@@ -399,7 +431,7 @@ User: How's the weather outside? Current Location: Bali, Indonesia
|
|
|
399
431
|
AI: It's currently 28°C and partly cloudy in Bali.
|
|
400
432
|
|
|
401
433
|
Q: Share a painting using the weather for Bali every morning.
|
|
402
|
-
Khoj:
|
|
434
|
+
Khoj: automation
|
|
403
435
|
|
|
404
436
|
Now it's your turn to pick the mode you would like to use to answer the user's question. Provide your response as a string.
|
|
405
437
|
|
|
@@ -422,7 +454,7 @@ Which of the data sources listed below you would use to answer the user's questi
|
|
|
422
454
|
|
|
423
455
|
{tools}
|
|
424
456
|
|
|
425
|
-
Here are some
|
|
457
|
+
Here are some examples:
|
|
426
458
|
|
|
427
459
|
Example:
|
|
428
460
|
Chat History:
|
|
@@ -533,10 +565,10 @@ You are Khoj, an advanced google search assistant. You are tasked with construct
|
|
|
533
565
|
- Break messages into multiple search queries when required to retrieve the relevant information.
|
|
534
566
|
- Use site: google search operators when appropriate
|
|
535
567
|
- You have access to the the whole internet to retrieve information.
|
|
536
|
-
- Official, up-to-date information about you, Khoj, is available at site:khoj.dev
|
|
568
|
+
- Official, up-to-date information about you, Khoj, is available at site:khoj.dev, github or pypi.
|
|
537
569
|
|
|
538
570
|
What Google searches, if any, will you need to perform to answer the user's question?
|
|
539
|
-
Provide search queries as a list of strings in a JSON object.
|
|
571
|
+
Provide search queries as a list of strings in a JSON object. Do not wrap the json in a codeblock.
|
|
540
572
|
Current Date: {current_date}
|
|
541
573
|
User's Location: {location}
|
|
542
574
|
|
|
@@ -589,7 +621,6 @@ Q: How many oranges would fit in NASA's Saturn V rocket?
|
|
|
589
621
|
Khoj: {{"queries": ["volume of an orange", "volume of saturn v rocket"]}}
|
|
590
622
|
|
|
591
623
|
Now it's your turn to construct Google search queries to answer the user's question. Provide them as a list of strings in a JSON object. Do not say anything else.
|
|
592
|
-
Now it's your turn to construct a search query for Google to answer the user's question.
|
|
593
624
|
History:
|
|
594
625
|
{chat_history}
|
|
595
626
|
|
|
@@ -62,10 +62,6 @@ class ThreadedGenerator:
|
|
|
62
62
|
self.queue.put(data)
|
|
63
63
|
|
|
64
64
|
def close(self):
|
|
65
|
-
if self.compiled_references and len(self.compiled_references) > 0:
|
|
66
|
-
self.queue.put(f"### compiled references:{json.dumps(self.compiled_references)}")
|
|
67
|
-
if self.online_results and len(self.online_results) > 0:
|
|
68
|
-
self.queue.put(f"### compiled references:{json.dumps(self.online_results)}")
|
|
69
65
|
self.queue.put(StopIteration)
|
|
70
66
|
|
|
71
67
|
|
|
@@ -186,7 +182,7 @@ def generate_chatml_messages_with_context(
|
|
|
186
182
|
|
|
187
183
|
def truncate_messages(
|
|
188
184
|
messages: list[ChatMessage],
|
|
189
|
-
max_prompt_size,
|
|
185
|
+
max_prompt_size: int,
|
|
190
186
|
model_name: str,
|
|
191
187
|
loaded_model: Optional[Llama] = None,
|
|
192
188
|
tokenizer_name=None,
|
|
@@ -232,7 +228,8 @@ def truncate_messages(
|
|
|
232
228
|
tokens = sum([len(encoder.encode(message.content)) for message in messages if type(message.content) == str])
|
|
233
229
|
|
|
234
230
|
# Drop older messages until under max supported prompt size by model
|
|
235
|
-
|
|
231
|
+
# Reserves 4 tokens to demarcate each message (e.g <|im_start|>user, <|im_end|>, <|endoftext|> etc.)
|
|
232
|
+
while (tokens + system_message_tokens + 4 * len(messages)) > max_prompt_size and len(messages) > 1:
|
|
236
233
|
messages.pop()
|
|
237
234
|
tokens = sum([len(encoder.encode(message.content)) for message in messages if type(message.content) == str])
|
|
238
235
|
|
|
@@ -254,6 +251,8 @@ def truncate_messages(
|
|
|
254
251
|
f"Truncate current message to fit within max prompt size of {max_prompt_size} supported by {model_name} model:\n {truncated_message}"
|
|
255
252
|
)
|
|
256
253
|
|
|
254
|
+
if system_message:
|
|
255
|
+
system_message.role = "user" if "gemma-2" in model_name else "system"
|
|
257
256
|
return messages + [system_message] if system_message else messages
|
|
258
257
|
|
|
259
258
|
|
|
@@ -11,6 +11,7 @@ from bs4 import BeautifulSoup
|
|
|
11
11
|
from markdownify import markdownify
|
|
12
12
|
|
|
13
13
|
from khoj.routers.helpers import (
|
|
14
|
+
ChatEvent,
|
|
14
15
|
extract_relevant_info,
|
|
15
16
|
generate_online_subqueries,
|
|
16
17
|
infer_webpage_urls,
|
|
@@ -56,7 +57,8 @@ async def search_online(
|
|
|
56
57
|
query += " ".join(custom_filters)
|
|
57
58
|
if not is_internet_connected():
|
|
58
59
|
logger.warn("Cannot search online as not connected to internet")
|
|
59
|
-
|
|
60
|
+
yield {}
|
|
61
|
+
return
|
|
60
62
|
|
|
61
63
|
# Breakdown the query into subqueries to get the correct answer
|
|
62
64
|
subqueries = await generate_online_subqueries(query, conversation_history, location)
|
|
@@ -66,7 +68,8 @@ async def search_online(
|
|
|
66
68
|
logger.info(f"🌐 Searching the Internet for {list(subqueries)}")
|
|
67
69
|
if send_status_func:
|
|
68
70
|
subqueries_str = "\n- " + "\n- ".join(list(subqueries))
|
|
69
|
-
|
|
71
|
+
async for event in send_status_func(f"**🌐 Searching the Internet for**: {subqueries_str}"):
|
|
72
|
+
yield {ChatEvent.STATUS: event}
|
|
70
73
|
|
|
71
74
|
with timer(f"Internet searches for {list(subqueries)} took", logger):
|
|
72
75
|
search_func = search_with_google if SERPER_DEV_API_KEY else search_with_jina
|
|
@@ -89,7 +92,8 @@ async def search_online(
|
|
|
89
92
|
logger.info(f"🌐👀 Reading web pages at: {list(webpage_links)}")
|
|
90
93
|
if send_status_func:
|
|
91
94
|
webpage_links_str = "\n- " + "\n- ".join(list(webpage_links))
|
|
92
|
-
|
|
95
|
+
async for event in send_status_func(f"**📖 Reading web pages**: {webpage_links_str}"):
|
|
96
|
+
yield {ChatEvent.STATUS: event}
|
|
93
97
|
tasks = [read_webpage_and_extract_content(subquery, link, content) for link, subquery, content in webpages]
|
|
94
98
|
results = await asyncio.gather(*tasks)
|
|
95
99
|
|
|
@@ -98,7 +102,7 @@ async def search_online(
|
|
|
98
102
|
if webpage_extract is not None:
|
|
99
103
|
response_dict[subquery]["webpages"] = {"link": url, "snippet": webpage_extract}
|
|
100
104
|
|
|
101
|
-
|
|
105
|
+
yield response_dict
|
|
102
106
|
|
|
103
107
|
|
|
104
108
|
async def search_with_google(query: str) -> Tuple[str, Dict[str, List[Dict]]]:
|
|
@@ -127,13 +131,15 @@ async def read_webpages(
|
|
|
127
131
|
"Infer web pages to read from the query and extract relevant information from them"
|
|
128
132
|
logger.info(f"Inferring web pages to read")
|
|
129
133
|
if send_status_func:
|
|
130
|
-
|
|
134
|
+
async for event in send_status_func(f"**🧐 Inferring web pages to read**"):
|
|
135
|
+
yield {ChatEvent.STATUS: event}
|
|
131
136
|
urls = await infer_webpage_urls(query, conversation_history, location)
|
|
132
137
|
|
|
133
138
|
logger.info(f"Reading web pages at: {urls}")
|
|
134
139
|
if send_status_func:
|
|
135
140
|
webpage_links_str = "\n- " + "\n- ".join(list(urls))
|
|
136
|
-
|
|
141
|
+
async for event in send_status_func(f"**📖 Reading web pages**: {webpage_links_str}"):
|
|
142
|
+
yield {ChatEvent.STATUS: event}
|
|
137
143
|
tasks = [read_webpage_and_extract_content(query, url) for url in urls]
|
|
138
144
|
results = await asyncio.gather(*tasks)
|
|
139
145
|
|
|
@@ -141,7 +147,7 @@ async def read_webpages(
|
|
|
141
147
|
response[query]["webpages"] = [
|
|
142
148
|
{"query": q, "link": url, "snippet": web_extract} for q, web_extract, url in results if web_extract is not None
|
|
143
149
|
]
|
|
144
|
-
|
|
150
|
+
yield response
|
|
145
151
|
|
|
146
152
|
|
|
147
153
|
async def read_webpage_and_extract_content(
|
khoj/routers/api.py
CHANGED
|
@@ -6,7 +6,6 @@ import os
|
|
|
6
6
|
import threading
|
|
7
7
|
import time
|
|
8
8
|
import uuid
|
|
9
|
-
from random import random
|
|
10
9
|
from typing import Any, Callable, List, Optional, Union
|
|
11
10
|
|
|
12
11
|
import cron_descriptor
|
|
@@ -37,6 +36,7 @@ from khoj.processor.conversation.openai.gpt import extract_questions
|
|
|
37
36
|
from khoj.processor.conversation.openai.whisper import transcribe_audio
|
|
38
37
|
from khoj.routers.helpers import (
|
|
39
38
|
ApiUserRateLimiter,
|
|
39
|
+
ChatEvent,
|
|
40
40
|
CommonQueryParams,
|
|
41
41
|
ConversationCommandRateLimiter,
|
|
42
42
|
acreate_title_from_query,
|
|
@@ -298,11 +298,13 @@ async def extract_references_and_questions(
|
|
|
298
298
|
not ConversationCommand.Notes in conversation_commands
|
|
299
299
|
and not ConversationCommand.Default in conversation_commands
|
|
300
300
|
):
|
|
301
|
-
|
|
301
|
+
yield compiled_references, inferred_queries, q
|
|
302
|
+
return
|
|
302
303
|
|
|
303
304
|
if not await sync_to_async(EntryAdapters.user_has_entries)(user=user):
|
|
304
305
|
logger.debug("No documents in knowledge base. Use a Khoj client to sync and chat with your docs.")
|
|
305
|
-
|
|
306
|
+
yield compiled_references, inferred_queries, q
|
|
307
|
+
return
|
|
306
308
|
|
|
307
309
|
# Extract filter terms from user message
|
|
308
310
|
defiltered_query = q
|
|
@@ -313,11 +315,12 @@ async def extract_references_and_questions(
|
|
|
313
315
|
|
|
314
316
|
if not conversation:
|
|
315
317
|
logger.error(f"Conversation with id {conversation_id} not found.")
|
|
316
|
-
|
|
318
|
+
yield compiled_references, inferred_queries, defiltered_query
|
|
319
|
+
return
|
|
317
320
|
|
|
318
321
|
filters_in_query += " ".join([f'file:"{filter}"' for filter in conversation.file_filters])
|
|
319
322
|
using_offline_chat = False
|
|
320
|
-
|
|
323
|
+
logger.debug(f"Filters in query: {filters_in_query}")
|
|
321
324
|
|
|
322
325
|
# Infer search queries from user message
|
|
323
326
|
with timer("Extracting search queries took", logger):
|
|
@@ -335,6 +338,7 @@ async def extract_references_and_questions(
|
|
|
335
338
|
|
|
336
339
|
inferred_queries = extract_questions_offline(
|
|
337
340
|
defiltered_query,
|
|
341
|
+
model=chat_model,
|
|
338
342
|
loaded_model=loaded_model,
|
|
339
343
|
conversation_log=meta_log,
|
|
340
344
|
should_extract_questions=True,
|
|
@@ -372,7 +376,8 @@ async def extract_references_and_questions(
|
|
|
372
376
|
logger.info(f"🔍 Searching knowledge base with queries: {inferred_queries}")
|
|
373
377
|
if send_status_func:
|
|
374
378
|
inferred_queries_str = "\n- " + "\n- ".join(inferred_queries)
|
|
375
|
-
|
|
379
|
+
async for event in send_status_func(f"**🔍 Searching Documents for:** {inferred_queries_str}"):
|
|
380
|
+
yield {ChatEvent.STATUS: event}
|
|
376
381
|
for query in inferred_queries:
|
|
377
382
|
n_items = min(n, 3) if using_offline_chat else n
|
|
378
383
|
search_results.extend(
|
|
@@ -391,7 +396,7 @@ async def extract_references_and_questions(
|
|
|
391
396
|
{"compiled": item.additional["compiled"], "file": item.additional["file"]} for item in search_results
|
|
392
397
|
]
|
|
393
398
|
|
|
394
|
-
|
|
399
|
+
yield compiled_references, inferred_queries, defiltered_query
|
|
395
400
|
|
|
396
401
|
|
|
397
402
|
@api.get("/health", response_class=Response)
|