khoj 1.27.2.dev12__py3-none-any.whl → 1.28.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +1 -1
- khoj/database/adapters/__init__.py +55 -12
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1467-b331e469fe411347.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1603-c1568f45947e9f2c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3423-ff7402ae1dd66592.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8423-e80647edf6c92c27.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-2beaba7c9bb750bd.js → page-fc492762298e975e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-9b5c77e0b0dd772c.js → page-416ee13a00575c39.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-c70f5b0c722d7627.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-1541d90140794f63.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/{page-8f22b790e50dd722.js → page-b269e444fc067759.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/{page-ab2995529ece3140.js → page-7d431ce8e565c7c3.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-7946cabb9c54e22d.js → page-95f56e53f48f0289.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-6a01e07fb244c10c.js → page-4eba6154f7bb9771.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-17202cfae517c5de.js → webpack-33a82ccca02cd2b8.js} +1 -1
- khoj/interface/compiled/_next/static/css/2196fae09c2f906e.css +1 -0
- khoj/interface/compiled/_next/static/css/6bde1f2045622ef7.css +1 -0
- khoj/interface/compiled/_next/static/css/a795ee88875f4853.css +25 -0
- khoj/interface/compiled/_next/static/css/ebef43da1c0651d5.css +1 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/factchecker/index.html +1 -1
- khoj/interface/compiled/factchecker/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +19 -10
- khoj/processor/conversation/anthropic/utils.py +37 -6
- khoj/processor/conversation/google/gemini_chat.py +23 -13
- khoj/processor/conversation/google/utils.py +34 -10
- khoj/processor/conversation/offline/chat_model.py +48 -16
- khoj/processor/conversation/openai/gpt.py +25 -10
- khoj/processor/conversation/openai/utils.py +50 -9
- khoj/processor/conversation/prompts.py +156 -65
- khoj/processor/conversation/utils.py +306 -6
- khoj/processor/embeddings.py +4 -4
- khoj/processor/image/generate.py +2 -0
- khoj/processor/tools/online_search.py +27 -12
- khoj/processor/tools/run_code.py +144 -0
- khoj/routers/api.py +11 -6
- khoj/routers/api_chat.py +213 -111
- khoj/routers/helpers.py +171 -60
- khoj/routers/research.py +320 -0
- khoj/search_filter/date_filter.py +1 -3
- khoj/search_filter/file_filter.py +1 -2
- khoj/search_type/text_search.py +3 -3
- khoj/utils/helpers.py +25 -3
- khoj/utils/yaml.py +4 -0
- {khoj-1.27.2.dev12.dist-info → khoj-1.28.1.dist-info}/METADATA +3 -2
- {khoj-1.27.2.dev12.dist-info → khoj-1.28.1.dist-info}/RECORD +68 -65
- khoj/interface/compiled/_next/static/chunks/1603-b9d95833e0e025e8.js +0 -1
- khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3423-8e9c420574a9fbe3.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9479-4b443fdcc99141c9.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-151232d8417a1ea1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-798904432c2417c4.js +0 -1
- khoj/interface/compiled/_next/static/css/2272c73fc7a3b571.css +0 -1
- khoj/interface/compiled/_next/static/css/553f9cdcc7a2bcd6.css +0 -1
- khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +0 -25
- khoj/interface/compiled/_next/static/css/b70402177a7c3207.css +0 -1
- /khoj/interface/compiled/_next/static/{kul3DNllWR6eaUDc4X0eU → JcTomiF3o0dIo4RxHR9Vu}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{kul3DNllWR6eaUDc4X0eU → JcTomiF3o0dIo4RxHR9Vu}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1970-1d6d0c1b00b4f343.js → 1970-90dd510762d820ba.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9417-759984ad62caa3dc.js → 9417-951f46451a8dd6d7.js} +0 -0
- {khoj-1.27.2.dev12.dist-info → khoj-1.28.1.dist-info}/WHEEL +0 -0
- {khoj-1.27.2.dev12.dist-info → khoj-1.28.1.dist-info}/entry_points.txt +0 -0
- {khoj-1.27.2.dev12.dist-info → khoj-1.28.1.dist-info}/licenses/LICENSE +0 -0
@@ -12,12 +12,13 @@ from khoj.processor.conversation.openai.utils import (
|
|
12
12
|
completion_with_backoff,
|
13
13
|
)
|
14
14
|
from khoj.processor.conversation.utils import (
|
15
|
+
clean_json,
|
15
16
|
construct_structured_message,
|
16
17
|
generate_chatml_messages_with_context,
|
17
|
-
remove_json_codeblock,
|
18
18
|
)
|
19
19
|
from khoj.utils.helpers import ConversationCommand, is_none_or_empty
|
20
20
|
from khoj.utils.rawconfig import LocationData
|
21
|
+
from khoj.utils.yaml import yaml_dump
|
21
22
|
|
22
23
|
logger = logging.getLogger(__name__)
|
23
24
|
|
@@ -33,6 +34,7 @@ def extract_questions(
|
|
33
34
|
query_images: Optional[list[str]] = None,
|
34
35
|
vision_enabled: bool = False,
|
35
36
|
personality_context: Optional[str] = None,
|
37
|
+
tracer: dict = {},
|
36
38
|
):
|
37
39
|
"""
|
38
40
|
Infer search queries to retrieve relevant notes to answer user query
|
@@ -82,13 +84,18 @@ def extract_questions(
|
|
82
84
|
messages = [ChatMessage(content=prompt, role="user")]
|
83
85
|
|
84
86
|
response = send_message_to_model(
|
85
|
-
messages,
|
87
|
+
messages,
|
88
|
+
api_key,
|
89
|
+
model,
|
90
|
+
response_type="json_object",
|
91
|
+
api_base_url=api_base_url,
|
92
|
+
temperature=temperature,
|
93
|
+
tracer=tracer,
|
86
94
|
)
|
87
95
|
|
88
96
|
# Extract, Clean Message from GPT's Response
|
89
97
|
try:
|
90
|
-
response = response
|
91
|
-
response = remove_json_codeblock(response)
|
98
|
+
response = clean_json(response)
|
92
99
|
response = json.loads(response)
|
93
100
|
response = [q.strip() for q in response["queries"] if q.strip()]
|
94
101
|
if not isinstance(response, list) or not response:
|
@@ -103,7 +110,9 @@ def extract_questions(
|
|
103
110
|
return questions
|
104
111
|
|
105
112
|
|
106
|
-
def send_message_to_model(
|
113
|
+
def send_message_to_model(
|
114
|
+
messages, api_key, model, response_type="text", api_base_url=None, temperature=0, tracer: dict = {}
|
115
|
+
):
|
107
116
|
"""
|
108
117
|
Send message to model
|
109
118
|
"""
|
@@ -116,6 +125,7 @@ def send_message_to_model(messages, api_key, model, response_type="text", api_ba
|
|
116
125
|
temperature=temperature,
|
117
126
|
api_base_url=api_base_url,
|
118
127
|
model_kwargs={"response_format": {"type": response_type}},
|
128
|
+
tracer=tracer,
|
119
129
|
)
|
120
130
|
|
121
131
|
|
@@ -123,6 +133,7 @@ def converse(
|
|
123
133
|
references,
|
124
134
|
user_query,
|
125
135
|
online_results: Optional[Dict[str, Dict]] = None,
|
136
|
+
code_results: Optional[Dict[str, Dict]] = None,
|
126
137
|
conversation_log={},
|
127
138
|
model: str = "gpt-4o-mini",
|
128
139
|
api_key: Optional[str] = None,
|
@@ -137,13 +148,13 @@ def converse(
|
|
137
148
|
agent: Agent = None,
|
138
149
|
query_images: Optional[list[str]] = None,
|
139
150
|
vision_available: bool = False,
|
151
|
+
tracer: dict = {},
|
140
152
|
):
|
141
153
|
"""
|
142
154
|
Converse with user using OpenAI's ChatGPT
|
143
155
|
"""
|
144
156
|
# Initialize Variables
|
145
157
|
current_date = datetime.now()
|
146
|
-
compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
|
147
158
|
|
148
159
|
if agent and agent.personality:
|
149
160
|
system_prompt = prompts.custom_personality.format(
|
@@ -167,7 +178,7 @@ def converse(
|
|
167
178
|
system_prompt = f"{system_prompt}\n{user_name_prompt}"
|
168
179
|
|
169
180
|
# Get Conversation Primer appropriate to Conversation Type
|
170
|
-
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(
|
181
|
+
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
|
171
182
|
completion_func(chat_response=prompts.no_notes_found.format())
|
172
183
|
return iter([prompts.no_notes_found.format()])
|
173
184
|
elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
|
@@ -175,10 +186,13 @@ def converse(
|
|
175
186
|
return iter([prompts.no_online_results_found.format()])
|
176
187
|
|
177
188
|
context_message = ""
|
178
|
-
if not is_none_or_empty(
|
179
|
-
context_message = f"{prompts.notes_conversation.format(references=
|
189
|
+
if not is_none_or_empty(references):
|
190
|
+
context_message = f"{prompts.notes_conversation.format(references=yaml_dump(references))}\n\n"
|
180
191
|
if not is_none_or_empty(online_results):
|
181
|
-
context_message += f"{prompts.online_search_conversation.format(online_results=
|
192
|
+
context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
|
193
|
+
if not is_none_or_empty(code_results):
|
194
|
+
context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
|
195
|
+
context_message = context_message.strip()
|
182
196
|
|
183
197
|
# Setup Prompt with Primer or Conversation History
|
184
198
|
messages = generate_chatml_messages_with_context(
|
@@ -207,4 +221,5 @@ def converse(
|
|
207
221
|
api_base_url=api_base_url,
|
208
222
|
completion_func=completion_func,
|
209
223
|
model_kwargs={"stop": ["Notes:\n["]},
|
224
|
+
tracer=tracer,
|
210
225
|
)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import logging
|
2
|
+
import os
|
2
3
|
from threading import Thread
|
3
4
|
from typing import Dict
|
4
5
|
|
@@ -12,7 +13,12 @@ from tenacity import (
|
|
12
13
|
wait_random_exponential,
|
13
14
|
)
|
14
15
|
|
15
|
-
from khoj.processor.conversation.utils import
|
16
|
+
from khoj.processor.conversation.utils import (
|
17
|
+
ThreadedGenerator,
|
18
|
+
commit_conversation_trace,
|
19
|
+
)
|
20
|
+
from khoj.utils import state
|
21
|
+
from khoj.utils.helpers import in_debug_mode
|
16
22
|
|
17
23
|
logger = logging.getLogger(__name__)
|
18
24
|
|
@@ -33,7 +39,7 @@ openai_clients: Dict[str, openai.OpenAI] = {}
|
|
33
39
|
reraise=True,
|
34
40
|
)
|
35
41
|
def completion_with_backoff(
|
36
|
-
messages, model, temperature=0, openai_api_key=None, api_base_url=None, model_kwargs=None
|
42
|
+
messages, model, temperature=0, openai_api_key=None, api_base_url=None, model_kwargs=None, tracer: dict = {}
|
37
43
|
) -> str:
|
38
44
|
client_key = f"{openai_api_key}--{api_base_url}"
|
39
45
|
client: openai.OpenAI | None = openai_clients.get(client_key)
|
@@ -55,6 +61,9 @@ def completion_with_backoff(
|
|
55
61
|
model_kwargs.pop("stop", None)
|
56
62
|
model_kwargs.pop("response_format", None)
|
57
63
|
|
64
|
+
if os.getenv("KHOJ_LLM_SEED"):
|
65
|
+
model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
|
66
|
+
|
58
67
|
chat = client.chat.completions.create(
|
59
68
|
stream=stream,
|
60
69
|
messages=formatted_messages, # type: ignore
|
@@ -77,6 +86,12 @@ def completion_with_backoff(
|
|
77
86
|
elif delta_chunk.content:
|
78
87
|
aggregated_response += delta_chunk.content
|
79
88
|
|
89
|
+
# Save conversation trace
|
90
|
+
tracer["chat_model"] = model
|
91
|
+
tracer["temperature"] = temperature
|
92
|
+
if in_debug_mode() or state.verbose > 1:
|
93
|
+
commit_conversation_trace(messages, aggregated_response, tracer)
|
94
|
+
|
80
95
|
return aggregated_response
|
81
96
|
|
82
97
|
|
@@ -103,26 +118,37 @@ def chat_completion_with_backoff(
|
|
103
118
|
api_base_url=None,
|
104
119
|
completion_func=None,
|
105
120
|
model_kwargs=None,
|
121
|
+
tracer: dict = {},
|
106
122
|
):
|
107
123
|
g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
|
108
124
|
t = Thread(
|
109
|
-
target=llm_thread,
|
125
|
+
target=llm_thread,
|
126
|
+
args=(g, messages, model_name, temperature, openai_api_key, api_base_url, model_kwargs, tracer),
|
110
127
|
)
|
111
128
|
t.start()
|
112
129
|
return g
|
113
130
|
|
114
131
|
|
115
|
-
def llm_thread(
|
132
|
+
def llm_thread(
|
133
|
+
g,
|
134
|
+
messages,
|
135
|
+
model_name,
|
136
|
+
temperature,
|
137
|
+
openai_api_key=None,
|
138
|
+
api_base_url=None,
|
139
|
+
model_kwargs=None,
|
140
|
+
tracer: dict = {},
|
141
|
+
):
|
116
142
|
try:
|
117
143
|
client_key = f"{openai_api_key}--{api_base_url}"
|
118
144
|
if client_key not in openai_clients:
|
119
|
-
client
|
145
|
+
client = openai.OpenAI(
|
120
146
|
api_key=openai_api_key,
|
121
147
|
base_url=api_base_url,
|
122
148
|
)
|
123
149
|
openai_clients[client_key] = client
|
124
150
|
else:
|
125
|
-
client
|
151
|
+
client = openai_clients[client_key]
|
126
152
|
|
127
153
|
formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
|
128
154
|
stream = True
|
@@ -135,6 +161,9 @@ def llm_thread(g, messages, model_name, temperature, openai_api_key=None, api_ba
|
|
135
161
|
model_kwargs.pop("stop", None)
|
136
162
|
model_kwargs.pop("response_format", None)
|
137
163
|
|
164
|
+
if os.getenv("KHOJ_LLM_SEED"):
|
165
|
+
model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
|
166
|
+
|
138
167
|
chat = client.chat.completions.create(
|
139
168
|
stream=stream,
|
140
169
|
messages=formatted_messages,
|
@@ -144,17 +173,29 @@ def llm_thread(g, messages, model_name, temperature, openai_api_key=None, api_ba
|
|
144
173
|
**(model_kwargs or dict()),
|
145
174
|
)
|
146
175
|
|
176
|
+
aggregated_response = ""
|
147
177
|
if not stream:
|
148
|
-
|
178
|
+
aggregated_response = chat.choices[0].message.content
|
179
|
+
g.send(aggregated_response)
|
149
180
|
else:
|
150
181
|
for chunk in chat:
|
151
182
|
if len(chunk.choices) == 0:
|
152
183
|
continue
|
153
184
|
delta_chunk = chunk.choices[0].delta
|
185
|
+
text_chunk = ""
|
154
186
|
if isinstance(delta_chunk, str):
|
155
|
-
|
187
|
+
text_chunk = delta_chunk
|
156
188
|
elif delta_chunk.content:
|
157
|
-
|
189
|
+
text_chunk = delta_chunk.content
|
190
|
+
if text_chunk:
|
191
|
+
aggregated_response += text_chunk
|
192
|
+
g.send(text_chunk)
|
193
|
+
|
194
|
+
# Save conversation trace
|
195
|
+
tracer["chat_model"] = model_name
|
196
|
+
tracer["temperature"] = temperature
|
197
|
+
if in_debug_mode() or state.verbose > 1:
|
198
|
+
commit_conversation_trace(messages, aggregated_response, tracer)
|
158
199
|
except Exception as e:
|
159
200
|
logger.error(f"Error in llm_thread: {e}", exc_info=True)
|
160
201
|
finally:
|
@@ -193,7 +193,6 @@ you need to convert the user's query to a description format that the novice art
|
|
193
193
|
- ellipse
|
194
194
|
- line
|
195
195
|
- arrow
|
196
|
-
- frame
|
197
196
|
|
198
197
|
use these primitives to describe what sort of diagram the drawer should create. the artist must recreate the diagram every time, so include all relevant prior information in your description.
|
199
198
|
|
@@ -284,21 +283,6 @@ For text, you must use the `text` property to specify the text to be rendered. Y
|
|
284
283
|
text: string,
|
285
284
|
}}
|
286
285
|
|
287
|
-
For frames, use the `children` property to specify the elements that are inside the frame by their ids.
|
288
|
-
|
289
|
-
{{
|
290
|
-
type: "frame",
|
291
|
-
id: string,
|
292
|
-
x: number,
|
293
|
-
y: number,
|
294
|
-
width: number,
|
295
|
-
height: number,
|
296
|
-
name: string,
|
297
|
-
children: [
|
298
|
-
string
|
299
|
-
]
|
300
|
-
}}
|
301
|
-
|
302
286
|
Here's an example of a valid diagram:
|
303
287
|
|
304
288
|
Design Description: Create a diagram describing a circular development process with 3 stages: design, implementation and feedback. The design stage is connected to the implementation stage and the implementation stage is connected to the feedback stage and the feedback stage is connected to the design stage. Each stage should be labeled with the stage name.
|
@@ -410,21 +394,23 @@ Q: {query}
|
|
410
394
|
|
411
395
|
extract_questions = PromptTemplate.from_template(
|
412
396
|
"""
|
413
|
-
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes
|
397
|
+
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes and documents.
|
414
398
|
Construct search queries to retrieve relevant information to answer the user's question.
|
415
|
-
- You will be provided past questions(Q) and answers(A) for context.
|
399
|
+
- You will be provided example and actual past user questions(Q), search queries(Khoj) and answers(A) for context.
|
416
400
|
- Add as much context from the previous questions and answers as required into your search queries.
|
417
|
-
- Break
|
401
|
+
- Break your search down into multiple search queries from a diverse set of lenses to retrieve all related documents.
|
418
402
|
- Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
|
419
403
|
- When asked a meta, vague or random questions, search for a variety of broad topics to answer the user's question.
|
420
404
|
{personality_context}
|
421
|
-
What searches will you perform to answer the
|
405
|
+
What searches will you perform to answer the user's question? Respond with search queries as list of strings in a JSON object.
|
422
406
|
Current Date: {day_of_week}, {current_date}
|
423
407
|
User's Location: {location}
|
424
408
|
{username}
|
425
409
|
|
410
|
+
Examples
|
411
|
+
---
|
426
412
|
Q: How was my trip to Cambodia?
|
427
|
-
Khoj: {{"queries": ["How was my trip to Cambodia?"]}}
|
413
|
+
Khoj: {{"queries": ["How was my trip to Cambodia?", "Angkor Wat temple visit", "Flight to Phnom Penh", "Expenses in Cambodia", "Stay in Cambodia"]}}
|
428
414
|
A: The trip was amazing. You went to the Angkor Wat temple and it was beautiful.
|
429
415
|
|
430
416
|
Q: Who did i visit that temple with?
|
@@ -459,6 +445,8 @@ Q: Who all did I meet here yesterday?
|
|
459
445
|
Khoj: {{"queries": ["Met in {location} on {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]}}
|
460
446
|
A: Yesterday's note mentions your visit to your local beach with Ram and Shyam.
|
461
447
|
|
448
|
+
Actual
|
449
|
+
---
|
462
450
|
{chat_history}
|
463
451
|
Q: {text}
|
464
452
|
Khoj:
|
@@ -467,11 +455,11 @@ Khoj:
|
|
467
455
|
|
468
456
|
extract_questions_anthropic_system_prompt = PromptTemplate.from_template(
|
469
457
|
"""
|
470
|
-
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes.
|
458
|
+
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes.
|
471
459
|
Construct search queries to retrieve relevant information to answer the user's question.
|
472
|
-
- You will be provided past questions(User),
|
460
|
+
- You will be provided past questions(User), search queries(Assistant) and answers(A) for context.
|
473
461
|
- Add as much context from the previous questions and answers as required into your search queries.
|
474
|
-
- Break
|
462
|
+
- Break your search down into multiple search queries from a diverse set of lenses to retrieve all related documents.
|
475
463
|
- Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
|
476
464
|
- When asked a meta, vague or random questions, search for a variety of broad topics to answer the user's question.
|
477
465
|
{personality_context}
|
@@ -484,7 +472,7 @@ User's Location: {location}
|
|
484
472
|
Here are some examples of how you can construct search queries to answer the user's question:
|
485
473
|
|
486
474
|
User: How was my trip to Cambodia?
|
487
|
-
Assistant: {{"queries": ["How was my trip to Cambodia?"]}}
|
475
|
+
Assistant: {{"queries": ["How was my trip to Cambodia?", "Angkor Wat temple visit", "Flight to Phnom Penh", "Expenses in Cambodia", "Stay in Cambodia"]}}
|
488
476
|
A: The trip was amazing. You went to the Angkor Wat temple and it was beautiful.
|
489
477
|
|
490
478
|
User: What national parks did I go to last year?
|
@@ -517,17 +505,14 @@ Assistant:
|
|
517
505
|
)
|
518
506
|
|
519
507
|
system_prompt_extract_relevant_information = """
|
520
|
-
As a professional analyst,
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
4. Format the report in multiple paragraphs with a clear structure.
|
529
|
-
5. Be as specific as possible in your answer to the user's query.
|
530
|
-
6. Reproduce as much of the provided text as possible, while maintaining readability.
|
508
|
+
As a professional analyst, your job is to extract all pertinent information from documents to help answer user's query.
|
509
|
+
You will be provided raw text directly from within the document.
|
510
|
+
Adhere to these guidelines while extracting information from the provided documents:
|
511
|
+
|
512
|
+
1. Extract all relevant text and links from the document that can assist with further research or answer the user's query.
|
513
|
+
2. Craft a comprehensive but compact report with all the necessary data from the document to generate an informed response.
|
514
|
+
3. Rely strictly on the provided text to generate your summary, without including external information.
|
515
|
+
4. Provide specific, important snippets from the document in your report to establish trust in your summary.
|
531
516
|
""".strip()
|
532
517
|
|
533
518
|
extract_relevant_information = PromptTemplate.from_template(
|
@@ -535,10 +520,10 @@ extract_relevant_information = PromptTemplate.from_template(
|
|
535
520
|
{personality_context}
|
536
521
|
Target Query: {query}
|
537
522
|
|
538
|
-
|
523
|
+
Document:
|
539
524
|
{corpus}
|
540
525
|
|
541
|
-
Collate only relevant information from the
|
526
|
+
Collate only relevant information from the document to answer the target query.
|
542
527
|
""".strip()
|
543
528
|
)
|
544
529
|
|
@@ -633,6 +618,67 @@ Khoj:
|
|
633
618
|
""".strip()
|
634
619
|
)
|
635
620
|
|
621
|
+
plan_function_execution = PromptTemplate.from_template(
|
622
|
+
"""
|
623
|
+
You are Khoj, a smart, creative and methodical researcher. Use the provided tool AIs to investigate information to answer query.
|
624
|
+
Create a multi-step plan and intelligently iterate on the plan based on the retrieved information to find the requested information.
|
625
|
+
{personality_context}
|
626
|
+
|
627
|
+
# Instructions
|
628
|
+
- Ask highly diverse, detailed queries to the tool AIs, one tool AI at a time, to discover required information or run calculations. Their response will be shown to you in the next iteration.
|
629
|
+
- Break down your research process into independent, self-contained steps that can be executed sequentially using the available tool AIs to answer the user's query. Write your step-by-step plan in the scratchpad.
|
630
|
+
- Always ask a new query that was not asked to the tool AI in a previous iteration. Build on the results of the previous iterations.
|
631
|
+
- Ensure that all required context is passed to the tool AIs for successful execution. They only know the context provided in your query.
|
632
|
+
- Think step by step to come up with creative strategies when the previous iteration did not yield useful results.
|
633
|
+
- You are allowed upto {max_iterations} iterations to use the help of the provided tool AIs to answer the user's question.
|
634
|
+
- Stop when you have the required information by returning a JSON object with an empty "tool" field. E.g., {{scratchpad: "I have all I need", tool: "", query: ""}}
|
635
|
+
|
636
|
+
# Examples
|
637
|
+
Assuming you can search the user's notes and the internet.
|
638
|
+
- When the user asks for the population of their hometown
|
639
|
+
1. Try look up their hometown in their notes. Ask the note search AI to search for their birth certificate, childhood memories, school, resume etc.
|
640
|
+
2. If not found in their notes, try infer their hometown from their online social media profiles. Ask the online search AI to look for {username}'s biography, school, resume on linkedin, facebook, website etc.
|
641
|
+
3. Only then try find the latest population of their hometown by reading official websites with the help of the online search and web page reading AI.
|
642
|
+
- When the user asks for their computer's specs
|
643
|
+
1. Try find their computer model in their notes.
|
644
|
+
2. Now find webpages with their computer model's spec online.
|
645
|
+
3. Ask the the webpage tool AI to extract the required information from the relevant webpages.
|
646
|
+
- When the user asks what clothes to carry for their upcoming trip
|
647
|
+
1. Find the itinerary of their upcoming trip in their notes.
|
648
|
+
2. Next find the weather forecast at the destination online.
|
649
|
+
3. Then find if they mentioned what clothes they own in their notes.
|
650
|
+
|
651
|
+
# Background Context
|
652
|
+
- Current Date: {day_of_week}, {current_date}
|
653
|
+
- User Location: {location}
|
654
|
+
- User Name: {username}
|
655
|
+
|
656
|
+
# Available Tool AIs
|
657
|
+
Which of the tool AIs listed below would you use to answer the user's question? You **only** have access to the following tool AIs:
|
658
|
+
|
659
|
+
{tools}
|
660
|
+
|
661
|
+
# Previous Iterations
|
662
|
+
{previous_iterations}
|
663
|
+
|
664
|
+
# Chat History:
|
665
|
+
{chat_history}
|
666
|
+
|
667
|
+
Return the next tool AI to use and the query to ask it. Your response should always be a valid JSON object. Do not say anything else.
|
668
|
+
Response format:
|
669
|
+
{{"scratchpad": "<your_scratchpad_to_reason_about_which_tool_to_use>", "query": "<your_detailed_query_for_the_tool_ai>", "tool": "<name_of_tool_ai>"}}
|
670
|
+
""".strip()
|
671
|
+
)
|
672
|
+
|
673
|
+
previous_iteration = PromptTemplate.from_template(
|
674
|
+
"""
|
675
|
+
## Iteration {index}:
|
676
|
+
- tool: {tool}
|
677
|
+
- query: {query}
|
678
|
+
- result: {result}
|
679
|
+
"""
|
680
|
+
)
|
681
|
+
|
636
682
|
pick_relevant_information_collection_tools = PromptTemplate.from_template(
|
637
683
|
"""
|
638
684
|
You are Khoj, an extremely smart and helpful search assistant.
|
@@ -752,8 +798,8 @@ Khoj:
|
|
752
798
|
online_search_conversation_subqueries = PromptTemplate.from_template(
|
753
799
|
"""
|
754
800
|
You are Khoj, an advanced web search assistant. You are tasked with constructing **up to three** google search queries to answer the user's question.
|
755
|
-
- You will receive the
|
756
|
-
- Add as much context from the
|
801
|
+
- You will receive the actual chat history as context.
|
802
|
+
- Add as much context from the chat history as required into your search queries.
|
757
803
|
- Break messages into multiple search queries when required to retrieve the relevant information.
|
758
804
|
- Use site: google search operator when appropriate
|
759
805
|
- You have access to the the whole internet to retrieve information.
|
@@ -766,62 +812,107 @@ User's Location: {location}
|
|
766
812
|
{username}
|
767
813
|
|
768
814
|
Here are some examples:
|
769
|
-
History:
|
815
|
+
Example Chat History:
|
770
816
|
User: I like to use Hacker News to get my tech news.
|
817
|
+
Khoj: {{queries: ["what is Hacker News?", "Hacker News website for tech news"]}}
|
771
818
|
AI: Hacker News is an online forum for sharing and discussing the latest tech news. It is a great place to learn about new technologies and startups.
|
772
819
|
|
773
|
-
|
820
|
+
User: Summarize the top posts on HackerNews
|
774
821
|
Khoj: {{"queries": ["top posts on HackerNews"]}}
|
775
822
|
|
776
|
-
History:
|
777
|
-
|
778
|
-
Q: Tell me the latest news about the farmers protest in Colombia and China on Reuters
|
823
|
+
Example Chat History:
|
824
|
+
User: Tell me the latest news about the farmers protest in Colombia and China on Reuters
|
779
825
|
Khoj: {{"queries": ["site:reuters.com farmers protest Colombia", "site:reuters.com farmers protest China"]}}
|
780
826
|
|
781
|
-
History:
|
827
|
+
Example Chat History:
|
782
828
|
User: I'm currently living in New York but I'm thinking about moving to San Francisco.
|
829
|
+
Khoj: {{"queries": ["New York city vs San Francisco life", "San Francisco living cost", "New York city living cost"]}}
|
783
830
|
AI: New York is a great city to live in. It has a lot of great restaurants and museums. San Francisco is also a great city to live in. It has good access to nature and a great tech scene.
|
784
831
|
|
785
|
-
|
786
|
-
Khoj: {{"queries": ["climate in
|
832
|
+
User: What is the climate like in those cities?
|
833
|
+
Khoj: {{"queries": ["climate in New York city", "climate in San Francisco"]}}
|
787
834
|
|
788
|
-
History:
|
789
|
-
|
790
|
-
|
835
|
+
Example Chat History:
|
836
|
+
User: Hey, Ananya is in town tonight!
|
837
|
+
Khoj: {{"queries": ["events in {location} tonight", "best restaurants in {location}", "places to visit in {location}"]}}
|
791
838
|
AI: Oh that's awesome! What are your plans for the evening?
|
792
839
|
|
793
|
-
|
840
|
+
User: She wants to see a movie. Any decent sci-fi movies playing at the local theater?
|
794
841
|
Khoj: {{"queries": ["new sci-fi movies in theaters near {location}"]}}
|
795
842
|
|
796
|
-
History:
|
843
|
+
Example Chat History:
|
797
844
|
User: Can I chat with you over WhatsApp?
|
798
|
-
AI: Yes, you can chat with me using WhatsApp.
|
799
|
-
|
800
|
-
Q: How
|
801
845
|
Khoj: {{"queries": ["site:khoj.dev chat with Khoj on Whatsapp"]}}
|
846
|
+
AI: Yes, you can chat with me using WhatsApp.
|
802
847
|
|
803
|
-
History:
|
804
|
-
|
805
|
-
|
806
|
-
Q: How do I share my files with you?
|
848
|
+
Example Chat History:
|
849
|
+
User: How do I share my files with Khoj?
|
807
850
|
Khoj: {{"queries": ["site:khoj.dev sync files with Khoj"]}}
|
808
851
|
|
809
|
-
History:
|
852
|
+
Example Chat History:
|
810
853
|
User: I need to transport a lot of oranges to the moon. Are there any rockets that can fit a lot of oranges?
|
854
|
+
Khoj: {{"queries": ["current rockets with large cargo capacity", "rocket rideshare cost by cargo capacity"]}}
|
811
855
|
AI: NASA's Saturn V rocket frequently makes lunar trips and has a large cargo capacity.
|
812
856
|
|
813
|
-
|
814
|
-
Khoj: {{"queries": ["volume of an orange", "volume of
|
857
|
+
User: How many oranges would fit in NASA's Saturn V rocket?
|
858
|
+
Khoj: {{"queries": ["volume of an orange", "volume of Saturn V rocket"]}}
|
815
859
|
|
816
860
|
Now it's your turn to construct Google search queries to answer the user's question. Provide them as a list of strings in a JSON object. Do not say anything else.
|
817
|
-
History:
|
861
|
+
Actual Chat History:
|
818
862
|
{chat_history}
|
819
863
|
|
820
|
-
|
864
|
+
User: {query}
|
821
865
|
Khoj:
|
822
866
|
""".strip()
|
823
867
|
)
|
824
868
|
|
869
|
+
# Code Generation
|
870
|
+
# --
|
871
|
+
python_code_generation_prompt = PromptTemplate.from_template(
|
872
|
+
"""
|
873
|
+
You are Khoj, an advanced python programmer. You are tasked with constructing **up to three** python programs to best answer the user query.
|
874
|
+
- The python program will run in a pyodide python sandbox with no network access.
|
875
|
+
- You can write programs to run complex calculations, analyze data, create charts, generate documents to meticulously answer the query
|
876
|
+
- The sandbox has access to the standard library, matplotlib, panda, numpy, scipy, bs4, sympy, brotli, cryptography, fast-parquet
|
877
|
+
- Do not try display images or plots in the code directly. The code should save the image or plot to a file instead.
|
878
|
+
- Write any document, charts etc. to be shared with the user to file. These files can be seen by the user.
|
879
|
+
- Use as much context from the previous questions and answers as required to generate your code.
|
880
|
+
{personality_context}
|
881
|
+
What code will you need to write, if any, to answer the user's question?
|
882
|
+
Provide code programs as a list of strings in a JSON object with key "codes".
|
883
|
+
Current Date: {current_date}
|
884
|
+
User's Location: {location}
|
885
|
+
{username}
|
886
|
+
|
887
|
+
The JSON schema is of the form {{"codes": ["code1", "code2", "code3"]}}
|
888
|
+
For example:
|
889
|
+
{{"codes": ["print('Hello, World!')", "print('Goodbye, World!')"]}}
|
890
|
+
|
891
|
+
Now it's your turn to construct python programs to answer the user's question. Provide them as a list of strings in a JSON object. Do not say anything else.
|
892
|
+
Context:
|
893
|
+
---
|
894
|
+
{context}
|
895
|
+
|
896
|
+
Chat History:
|
897
|
+
---
|
898
|
+
{chat_history}
|
899
|
+
|
900
|
+
User: {query}
|
901
|
+
Khoj:
|
902
|
+
""".strip()
|
903
|
+
)
|
904
|
+
|
905
|
+
code_executed_context = PromptTemplate.from_template(
|
906
|
+
"""
|
907
|
+
Use the provided code executions to inform your response.
|
908
|
+
Ask crisp follow-up questions to get additional context, when a helpful response cannot be provided from the provided code execution results or past conversations.
|
909
|
+
|
910
|
+
Code Execution Results:
|
911
|
+
{code_results}
|
912
|
+
""".strip()
|
913
|
+
)
|
914
|
+
|
915
|
+
|
825
916
|
# Automations
|
826
917
|
# --
|
827
918
|
crontime_prompt = PromptTemplate.from_template(
|