khoj 1.27.2.dev18__py3-none-any.whl → 1.27.2.dev130__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +34 -10
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1467-5a191c1cd5bf0b83.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1603-5d70d9dfcdcb1f10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3423-fa918f4e5365a35e.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8423-3ad0bfb299801220.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-7dc98df9c88828f0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-d887f55fe6d4f35d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/{page-8f22b790e50dd722.js → page-d46244282af16509.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-6a01e07fb244c10c.js → page-505b07bce608b34e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-31239d193815e49e.js → webpack-8ae5ce45161bd98e.js} +1 -1
- khoj/interface/compiled/_next/static/css/{2272c73fc7a3b571.css → 26c1c33d0423a7d8.css} +1 -1
- khoj/interface/compiled/_next/static/css/e9c5fe555dd3050b.css +25 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/factchecker/index.html +1 -1
- khoj/interface/compiled/factchecker/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +19 -10
- khoj/processor/conversation/anthropic/utils.py +37 -6
- khoj/processor/conversation/google/gemini_chat.py +23 -13
- khoj/processor/conversation/google/utils.py +34 -10
- khoj/processor/conversation/offline/chat_model.py +40 -15
- khoj/processor/conversation/openai/gpt.py +25 -10
- khoj/processor/conversation/openai/utils.py +43 -9
- khoj/processor/conversation/prompts.py +131 -22
- khoj/processor/conversation/utils.py +299 -6
- khoj/processor/image/generate.py +2 -0
- khoj/processor/tools/online_search.py +19 -8
- khoj/processor/tools/run_code.py +144 -0
- khoj/routers/api.py +11 -6
- khoj/routers/api_chat.py +177 -88
- khoj/routers/helpers.py +155 -59
- khoj/routers/research.py +321 -0
- khoj/search_filter/date_filter.py +1 -3
- khoj/search_filter/file_filter.py +1 -2
- khoj/search_type/text_search.py +3 -3
- khoj/utils/helpers.py +15 -2
- khoj/utils/yaml.py +4 -0
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/METADATA +2 -1
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/RECORD +61 -58
- khoj/interface/compiled/_next/static/chunks/1603-5138bb7c8035d9a6.js +0 -1
- khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3423-8e9c420574a9fbe3.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9479-a5e7ff4c7d1d7ee7.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-151232d8417a1ea1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-798904432c2417c4.js +0 -1
- khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +0 -25
- /khoj/interface/compiled/_next/static/{_gBBcNbs4wMKxKXhQs5E4 → N19uqHAJYqRAVxvuVwHfE}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{_gBBcNbs4wMKxKXhQs5E4 → N19uqHAJYqRAVxvuVwHfE}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1970-1d6d0c1b00b4f343.js → 1970-444843bea1d17d61.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9417-759984ad62caa3dc.js → 9417-19cfd1a9cb758e71.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/app/settings/{page-7946cabb9c54e22d.js → page-89e6737b2cc9fb3a.js} +0 -0
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/WHEEL +0 -0
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/entry_points.txt +0 -0
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/licenses/LICENSE +0 -0
@@ -14,11 +14,13 @@ from khoj.processor.conversation.google.utils import (
|
|
14
14
|
gemini_completion_with_backoff,
|
15
15
|
)
|
16
16
|
from khoj.processor.conversation.utils import (
|
17
|
+
clean_json,
|
17
18
|
construct_structured_message,
|
18
19
|
generate_chatml_messages_with_context,
|
19
20
|
)
|
20
21
|
from khoj.utils.helpers import ConversationCommand, is_none_or_empty
|
21
22
|
from khoj.utils.rawconfig import LocationData
|
23
|
+
from khoj.utils.yaml import yaml_dump
|
22
24
|
|
23
25
|
logger = logging.getLogger(__name__)
|
24
26
|
|
@@ -35,6 +37,7 @@ def extract_questions_gemini(
|
|
35
37
|
query_images: Optional[list[str]] = None,
|
36
38
|
vision_enabled: bool = False,
|
37
39
|
personality_context: Optional[str] = None,
|
40
|
+
tracer: dict = {},
|
38
41
|
):
|
39
42
|
"""
|
40
43
|
Infer search queries to retrieve relevant notes to answer user query
|
@@ -85,15 +88,12 @@ def extract_questions_gemini(
|
|
85
88
|
messages = [ChatMessage(content=prompt, role="user"), ChatMessage(content=system_prompt, role="system")]
|
86
89
|
|
87
90
|
response = gemini_send_message_to_model(
|
88
|
-
messages, api_key, model, response_type="json_object", temperature=temperature
|
91
|
+
messages, api_key, model, response_type="json_object", temperature=temperature, tracer=tracer
|
89
92
|
)
|
90
93
|
|
91
94
|
# Extract, Clean Message from Gemini's Response
|
92
95
|
try:
|
93
|
-
response = response
|
94
|
-
match = re.search(r"\{.*?\}", response)
|
95
|
-
if match:
|
96
|
-
response = match.group()
|
96
|
+
response = clean_json(response)
|
97
97
|
response = json.loads(response)
|
98
98
|
response = [q.strip() for q in response["queries"] if q.strip()]
|
99
99
|
if not isinstance(response, list) or not response:
|
@@ -107,15 +107,19 @@ def extract_questions_gemini(
|
|
107
107
|
return questions
|
108
108
|
|
109
109
|
|
110
|
-
def gemini_send_message_to_model(
|
110
|
+
def gemini_send_message_to_model(
|
111
|
+
messages, api_key, model, response_type="text", temperature=0, model_kwargs=None, tracer={}
|
112
|
+
):
|
111
113
|
"""
|
112
114
|
Send message to model
|
113
115
|
"""
|
114
116
|
messages, system_prompt = format_messages_for_gemini(messages)
|
115
117
|
|
116
118
|
model_kwargs = {}
|
117
|
-
|
118
|
-
|
119
|
+
|
120
|
+
# Sometimes, this causes unwanted behavior and terminates response early. Disable for now while it's flaky.
|
121
|
+
# if response_type == "json_object":
|
122
|
+
# model_kwargs["response_mime_type"] = "application/json"
|
119
123
|
|
120
124
|
# Get Response from Gemini
|
121
125
|
return gemini_completion_with_backoff(
|
@@ -125,6 +129,7 @@ def gemini_send_message_to_model(messages, api_key, model, response_type="text",
|
|
125
129
|
api_key=api_key,
|
126
130
|
temperature=temperature,
|
127
131
|
model_kwargs=model_kwargs,
|
132
|
+
tracer=tracer,
|
128
133
|
)
|
129
134
|
|
130
135
|
|
@@ -132,6 +137,7 @@ def converse_gemini(
|
|
132
137
|
references,
|
133
138
|
user_query,
|
134
139
|
online_results: Optional[Dict[str, Dict]] = None,
|
140
|
+
code_results: Optional[Dict[str, Dict]] = None,
|
135
141
|
conversation_log={},
|
136
142
|
model: Optional[str] = "gemini-1.5-flash",
|
137
143
|
api_key: Optional[str] = None,
|
@@ -145,13 +151,13 @@ def converse_gemini(
|
|
145
151
|
agent: Agent = None,
|
146
152
|
query_images: Optional[list[str]] = None,
|
147
153
|
vision_available: bool = False,
|
154
|
+
tracer={},
|
148
155
|
):
|
149
156
|
"""
|
150
157
|
Converse with user using Google's Gemini
|
151
158
|
"""
|
152
159
|
# Initialize Variables
|
153
160
|
current_date = datetime.now()
|
154
|
-
compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
|
155
161
|
|
156
162
|
if agent and agent.personality:
|
157
163
|
system_prompt = prompts.custom_personality.format(
|
@@ -176,7 +182,7 @@ def converse_gemini(
|
|
176
182
|
system_prompt = f"{system_prompt}\n{user_name_prompt}"
|
177
183
|
|
178
184
|
# Get Conversation Primer appropriate to Conversation Type
|
179
|
-
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(
|
185
|
+
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
|
180
186
|
completion_func(chat_response=prompts.no_notes_found.format())
|
181
187
|
return iter([prompts.no_notes_found.format()])
|
182
188
|
elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
|
@@ -184,10 +190,13 @@ def converse_gemini(
|
|
184
190
|
return iter([prompts.no_online_results_found.format()])
|
185
191
|
|
186
192
|
context_message = ""
|
187
|
-
if not is_none_or_empty(
|
188
|
-
context_message = f"{prompts.notes_conversation.format(query=user_query, references=
|
193
|
+
if not is_none_or_empty(references):
|
194
|
+
context_message = f"{prompts.notes_conversation.format(query=user_query, references=yaml_dump(references))}\n\n"
|
189
195
|
if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
|
190
|
-
context_message += f"{prompts.online_search_conversation.format(online_results=
|
196
|
+
context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
|
197
|
+
if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
|
198
|
+
context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
|
199
|
+
context_message = context_message.strip()
|
191
200
|
|
192
201
|
# Setup Prompt with Primer or Conversation History
|
193
202
|
messages = generate_chatml_messages_with_context(
|
@@ -217,4 +226,5 @@ def converse_gemini(
|
|
217
226
|
api_key=api_key,
|
218
227
|
system_prompt=system_prompt,
|
219
228
|
completion_func=completion_func,
|
229
|
+
tracer=tracer,
|
220
230
|
)
|
@@ -19,8 +19,13 @@ from tenacity import (
|
|
19
19
|
wait_random_exponential,
|
20
20
|
)
|
21
21
|
|
22
|
-
from khoj.processor.conversation.utils import
|
23
|
-
|
22
|
+
from khoj.processor.conversation.utils import (
|
23
|
+
ThreadedGenerator,
|
24
|
+
commit_conversation_trace,
|
25
|
+
get_image_from_url,
|
26
|
+
)
|
27
|
+
from khoj.utils import state
|
28
|
+
from khoj.utils.helpers import in_debug_mode, is_none_or_empty
|
24
29
|
|
25
30
|
logger = logging.getLogger(__name__)
|
26
31
|
|
@@ -35,7 +40,7 @@ MAX_OUTPUT_TOKENS_GEMINI = 8192
|
|
35
40
|
reraise=True,
|
36
41
|
)
|
37
42
|
def gemini_completion_with_backoff(
|
38
|
-
messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None
|
43
|
+
messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None, tracer={}
|
39
44
|
) -> str:
|
40
45
|
genai.configure(api_key=api_key)
|
41
46
|
model_kwargs = model_kwargs or dict()
|
@@ -60,16 +65,23 @@ def gemini_completion_with_backoff(
|
|
60
65
|
|
61
66
|
try:
|
62
67
|
# Generate the response. The last message is considered to be the current prompt
|
63
|
-
|
64
|
-
|
68
|
+
response = chat_session.send_message(formatted_messages[-1]["parts"])
|
69
|
+
response_text = response.text
|
65
70
|
except StopCandidateException as e:
|
66
|
-
|
71
|
+
response_text, _ = handle_gemini_response(e.args)
|
67
72
|
# Respond with reason for stopping
|
68
73
|
logger.warning(
|
69
|
-
f"LLM Response Prevented for {model_name}: {
|
74
|
+
f"LLM Response Prevented for {model_name}: {response_text}.\n"
|
70
75
|
+ f"Last Message by {messages[-1].role}: {messages[-1].content}"
|
71
76
|
)
|
72
|
-
|
77
|
+
|
78
|
+
# Save conversation trace
|
79
|
+
tracer["chat_model"] = model_name
|
80
|
+
tracer["temperature"] = temperature
|
81
|
+
if in_debug_mode() or state.verbose > 1:
|
82
|
+
commit_conversation_trace(messages, response_text, tracer)
|
83
|
+
|
84
|
+
return response_text
|
73
85
|
|
74
86
|
|
75
87
|
@retry(
|
@@ -88,17 +100,20 @@ def gemini_chat_completion_with_backoff(
|
|
88
100
|
system_prompt,
|
89
101
|
completion_func=None,
|
90
102
|
model_kwargs=None,
|
103
|
+
tracer: dict = {},
|
91
104
|
):
|
92
105
|
g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
|
93
106
|
t = Thread(
|
94
107
|
target=gemini_llm_thread,
|
95
|
-
args=(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs),
|
108
|
+
args=(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs, tracer),
|
96
109
|
)
|
97
110
|
t.start()
|
98
111
|
return g
|
99
112
|
|
100
113
|
|
101
|
-
def gemini_llm_thread(
|
114
|
+
def gemini_llm_thread(
|
115
|
+
g, messages, system_prompt, model_name, temperature, api_key, model_kwargs=None, tracer: dict = {}
|
116
|
+
):
|
102
117
|
try:
|
103
118
|
genai.configure(api_key=api_key)
|
104
119
|
model_kwargs = model_kwargs or dict()
|
@@ -117,16 +132,25 @@ def gemini_llm_thread(g, messages, system_prompt, model_name, temperature, api_k
|
|
117
132
|
},
|
118
133
|
)
|
119
134
|
|
135
|
+
aggregated_response = ""
|
120
136
|
formatted_messages = [{"role": message.role, "parts": message.content} for message in messages]
|
137
|
+
|
121
138
|
# all messages up to the last are considered to be part of the chat history
|
122
139
|
chat_session = model.start_chat(history=formatted_messages[0:-1])
|
123
140
|
# the last message is considered to be the current prompt
|
124
141
|
for chunk in chat_session.send_message(formatted_messages[-1]["parts"], stream=True):
|
125
142
|
message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
|
126
143
|
message = message or chunk.text
|
144
|
+
aggregated_response += message
|
127
145
|
g.send(message)
|
128
146
|
if stopped:
|
129
147
|
raise StopCandidateException(message)
|
148
|
+
|
149
|
+
# Save conversation trace
|
150
|
+
tracer["chat_model"] = model_name
|
151
|
+
tracer["temperature"] = temperature
|
152
|
+
if in_debug_mode() or state.verbose > 1:
|
153
|
+
commit_conversation_trace(messages, aggregated_response, tracer)
|
130
154
|
except StopCandidateException as e:
|
131
155
|
logger.warning(
|
132
156
|
f"LLM Response Prevented for {model_name}: {e.args[0]}.\n"
|
@@ -12,12 +12,14 @@ from khoj.processor.conversation import prompts
|
|
12
12
|
from khoj.processor.conversation.offline.utils import download_model
|
13
13
|
from khoj.processor.conversation.utils import (
|
14
14
|
ThreadedGenerator,
|
15
|
+
commit_conversation_trace,
|
15
16
|
generate_chatml_messages_with_context,
|
16
17
|
)
|
17
18
|
from khoj.utils import state
|
18
19
|
from khoj.utils.constants import empty_escape_sequences
|
19
|
-
from khoj.utils.helpers import ConversationCommand, is_none_or_empty
|
20
|
+
from khoj.utils.helpers import ConversationCommand, in_debug_mode, is_none_or_empty
|
20
21
|
from khoj.utils.rawconfig import LocationData
|
22
|
+
from khoj.utils.yaml import yaml_dump
|
21
23
|
|
22
24
|
logger = logging.getLogger(__name__)
|
23
25
|
|
@@ -34,6 +36,7 @@ def extract_questions_offline(
|
|
34
36
|
max_prompt_size: int = None,
|
35
37
|
temperature: float = 0.7,
|
36
38
|
personality_context: Optional[str] = None,
|
39
|
+
tracer: dict = {},
|
37
40
|
) -> List[str]:
|
38
41
|
"""
|
39
42
|
Infer search queries to retrieve relevant notes to answer user query
|
@@ -94,6 +97,7 @@ def extract_questions_offline(
|
|
94
97
|
max_prompt_size=max_prompt_size,
|
95
98
|
temperature=temperature,
|
96
99
|
response_type="json_object",
|
100
|
+
tracer=tracer,
|
97
101
|
)
|
98
102
|
finally:
|
99
103
|
state.chat_lock.release()
|
@@ -135,7 +139,8 @@ def filter_questions(questions: List[str]):
|
|
135
139
|
def converse_offline(
|
136
140
|
user_query,
|
137
141
|
references=[],
|
138
|
-
online_results=
|
142
|
+
online_results={},
|
143
|
+
code_results={},
|
139
144
|
conversation_log={},
|
140
145
|
model: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
|
141
146
|
loaded_model: Union[Any, None] = None,
|
@@ -146,6 +151,7 @@ def converse_offline(
|
|
146
151
|
location_data: LocationData = None,
|
147
152
|
user_name: str = None,
|
148
153
|
agent: Agent = None,
|
154
|
+
tracer: dict = {},
|
149
155
|
) -> Union[ThreadedGenerator, Iterator[str]]:
|
150
156
|
"""
|
151
157
|
Converse with user using Llama
|
@@ -153,8 +159,7 @@ def converse_offline(
|
|
153
159
|
# Initialize Variables
|
154
160
|
assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
|
155
161
|
offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
|
156
|
-
|
157
|
-
|
162
|
+
tracer["chat_model"] = model
|
158
163
|
current_date = datetime.now()
|
159
164
|
|
160
165
|
if agent and agent.personality:
|
@@ -179,24 +184,25 @@ def converse_offline(
|
|
179
184
|
system_prompt = f"{system_prompt}\n{user_name_prompt}"
|
180
185
|
|
181
186
|
# Get Conversation Primer appropriate to Conversation Type
|
182
|
-
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(
|
187
|
+
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
|
183
188
|
return iter([prompts.no_notes_found.format()])
|
184
189
|
elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
|
185
190
|
completion_func(chat_response=prompts.no_online_results_found.format())
|
186
191
|
return iter([prompts.no_online_results_found.format()])
|
187
192
|
|
188
193
|
context_message = ""
|
189
|
-
if not is_none_or_empty(
|
190
|
-
context_message
|
194
|
+
if not is_none_or_empty(references):
|
195
|
+
context_message = f"{prompts.notes_conversation_offline.format(references=yaml_dump(references))}\n\n"
|
191
196
|
if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
|
192
197
|
simplified_online_results = online_results.copy()
|
193
198
|
for result in online_results:
|
194
199
|
if online_results[result].get("webpages"):
|
195
200
|
simplified_online_results[result] = online_results[result]["webpages"]
|
196
201
|
|
197
|
-
context_message += (
|
198
|
-
|
199
|
-
)
|
202
|
+
context_message += f"{prompts.online_search_conversation_offline.format(online_results=yaml_dump(simplified_online_results))}\n\n"
|
203
|
+
if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
|
204
|
+
context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
|
205
|
+
context_message = context_message.strip()
|
200
206
|
|
201
207
|
# Setup Prompt with Primer or Conversation History
|
202
208
|
messages = generate_chatml_messages_with_context(
|
@@ -215,13 +221,14 @@ def converse_offline(
|
|
215
221
|
logger.debug(f"Conversation Context for {model}: {truncated_messages}")
|
216
222
|
|
217
223
|
g = ThreadedGenerator(references, online_results, completion_func=completion_func)
|
218
|
-
t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size))
|
224
|
+
t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size, tracer))
|
219
225
|
t.start()
|
220
226
|
return g
|
221
227
|
|
222
228
|
|
223
|
-
def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int = None):
|
229
|
+
def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int = None, tracer: dict = {}):
|
224
230
|
stop_phrases = ["<s>", "INST]", "Notes:"]
|
231
|
+
aggregated_response = ""
|
225
232
|
|
226
233
|
state.chat_lock.acquire()
|
227
234
|
try:
|
@@ -229,7 +236,14 @@ def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int
|
|
229
236
|
messages, loaded_model=model, stop=stop_phrases, max_prompt_size=max_prompt_size, streaming=True
|
230
237
|
)
|
231
238
|
for response in response_iterator:
|
232
|
-
|
239
|
+
response_delta = response["choices"][0]["delta"].get("content", "")
|
240
|
+
aggregated_response += response_delta
|
241
|
+
g.send(response_delta)
|
242
|
+
|
243
|
+
# Save conversation trace
|
244
|
+
if in_debug_mode() or state.verbose > 1:
|
245
|
+
commit_conversation_trace(messages, aggregated_response, tracer)
|
246
|
+
|
233
247
|
finally:
|
234
248
|
state.chat_lock.release()
|
235
249
|
g.close()
|
@@ -244,6 +258,7 @@ def send_message_to_model_offline(
|
|
244
258
|
stop=[],
|
245
259
|
max_prompt_size: int = None,
|
246
260
|
response_type: str = "text",
|
261
|
+
tracer: dict = {},
|
247
262
|
):
|
248
263
|
assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
|
249
264
|
offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
|
@@ -251,7 +266,17 @@ def send_message_to_model_offline(
|
|
251
266
|
response = offline_chat_model.create_chat_completion(
|
252
267
|
messages_dict, stop=stop, stream=streaming, temperature=temperature, response_format={"type": response_type}
|
253
268
|
)
|
269
|
+
|
254
270
|
if streaming:
|
255
271
|
return response
|
256
|
-
|
257
|
-
|
272
|
+
|
273
|
+
response_text = response["choices"][0]["message"].get("content", "")
|
274
|
+
|
275
|
+
# Save conversation trace for non-streaming responses
|
276
|
+
# Streamed responses need to be saved by the calling function
|
277
|
+
tracer["chat_model"] = model
|
278
|
+
tracer["temperature"] = temperature
|
279
|
+
if in_debug_mode() or state.verbose > 1:
|
280
|
+
commit_conversation_trace(messages, response_text, tracer)
|
281
|
+
|
282
|
+
return response_text
|
@@ -12,12 +12,13 @@ from khoj.processor.conversation.openai.utils import (
|
|
12
12
|
completion_with_backoff,
|
13
13
|
)
|
14
14
|
from khoj.processor.conversation.utils import (
|
15
|
+
clean_json,
|
15
16
|
construct_structured_message,
|
16
17
|
generate_chatml_messages_with_context,
|
17
|
-
remove_json_codeblock,
|
18
18
|
)
|
19
19
|
from khoj.utils.helpers import ConversationCommand, is_none_or_empty
|
20
20
|
from khoj.utils.rawconfig import LocationData
|
21
|
+
from khoj.utils.yaml import yaml_dump
|
21
22
|
|
22
23
|
logger = logging.getLogger(__name__)
|
23
24
|
|
@@ -33,6 +34,7 @@ def extract_questions(
|
|
33
34
|
query_images: Optional[list[str]] = None,
|
34
35
|
vision_enabled: bool = False,
|
35
36
|
personality_context: Optional[str] = None,
|
37
|
+
tracer: dict = {},
|
36
38
|
):
|
37
39
|
"""
|
38
40
|
Infer search queries to retrieve relevant notes to answer user query
|
@@ -82,13 +84,18 @@ def extract_questions(
|
|
82
84
|
messages = [ChatMessage(content=prompt, role="user")]
|
83
85
|
|
84
86
|
response = send_message_to_model(
|
85
|
-
messages,
|
87
|
+
messages,
|
88
|
+
api_key,
|
89
|
+
model,
|
90
|
+
response_type="json_object",
|
91
|
+
api_base_url=api_base_url,
|
92
|
+
temperature=temperature,
|
93
|
+
tracer=tracer,
|
86
94
|
)
|
87
95
|
|
88
96
|
# Extract, Clean Message from GPT's Response
|
89
97
|
try:
|
90
|
-
response = response
|
91
|
-
response = remove_json_codeblock(response)
|
98
|
+
response = clean_json(response)
|
92
99
|
response = json.loads(response)
|
93
100
|
response = [q.strip() for q in response["queries"] if q.strip()]
|
94
101
|
if not isinstance(response, list) or not response:
|
@@ -103,7 +110,9 @@ def extract_questions(
|
|
103
110
|
return questions
|
104
111
|
|
105
112
|
|
106
|
-
def send_message_to_model(
|
113
|
+
def send_message_to_model(
|
114
|
+
messages, api_key, model, response_type="text", api_base_url=None, temperature=0, tracer: dict = {}
|
115
|
+
):
|
107
116
|
"""
|
108
117
|
Send message to model
|
109
118
|
"""
|
@@ -116,6 +125,7 @@ def send_message_to_model(messages, api_key, model, response_type="text", api_ba
|
|
116
125
|
temperature=temperature,
|
117
126
|
api_base_url=api_base_url,
|
118
127
|
model_kwargs={"response_format": {"type": response_type}},
|
128
|
+
tracer=tracer,
|
119
129
|
)
|
120
130
|
|
121
131
|
|
@@ -123,6 +133,7 @@ def converse(
|
|
123
133
|
references,
|
124
134
|
user_query,
|
125
135
|
online_results: Optional[Dict[str, Dict]] = None,
|
136
|
+
code_results: Optional[Dict[str, Dict]] = None,
|
126
137
|
conversation_log={},
|
127
138
|
model: str = "gpt-4o-mini",
|
128
139
|
api_key: Optional[str] = None,
|
@@ -137,13 +148,13 @@ def converse(
|
|
137
148
|
agent: Agent = None,
|
138
149
|
query_images: Optional[list[str]] = None,
|
139
150
|
vision_available: bool = False,
|
151
|
+
tracer: dict = {},
|
140
152
|
):
|
141
153
|
"""
|
142
154
|
Converse with user using OpenAI's ChatGPT
|
143
155
|
"""
|
144
156
|
# Initialize Variables
|
145
157
|
current_date = datetime.now()
|
146
|
-
compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
|
147
158
|
|
148
159
|
if agent and agent.personality:
|
149
160
|
system_prompt = prompts.custom_personality.format(
|
@@ -167,7 +178,7 @@ def converse(
|
|
167
178
|
system_prompt = f"{system_prompt}\n{user_name_prompt}"
|
168
179
|
|
169
180
|
# Get Conversation Primer appropriate to Conversation Type
|
170
|
-
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(
|
181
|
+
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
|
171
182
|
completion_func(chat_response=prompts.no_notes_found.format())
|
172
183
|
return iter([prompts.no_notes_found.format()])
|
173
184
|
elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
|
@@ -175,10 +186,13 @@ def converse(
|
|
175
186
|
return iter([prompts.no_online_results_found.format()])
|
176
187
|
|
177
188
|
context_message = ""
|
178
|
-
if not is_none_or_empty(
|
179
|
-
context_message = f"{prompts.notes_conversation.format(references=
|
189
|
+
if not is_none_or_empty(references):
|
190
|
+
context_message = f"{prompts.notes_conversation.format(references=yaml_dump(references))}\n\n"
|
180
191
|
if not is_none_or_empty(online_results):
|
181
|
-
context_message += f"{prompts.online_search_conversation.format(online_results=
|
192
|
+
context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
|
193
|
+
if not is_none_or_empty(code_results):
|
194
|
+
context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
|
195
|
+
context_message = context_message.strip()
|
182
196
|
|
183
197
|
# Setup Prompt with Primer or Conversation History
|
184
198
|
messages = generate_chatml_messages_with_context(
|
@@ -207,4 +221,5 @@ def converse(
|
|
207
221
|
api_base_url=api_base_url,
|
208
222
|
completion_func=completion_func,
|
209
223
|
model_kwargs={"stop": ["Notes:\n["]},
|
224
|
+
tracer=tracer,
|
210
225
|
)
|
@@ -12,7 +12,12 @@ from tenacity import (
|
|
12
12
|
wait_random_exponential,
|
13
13
|
)
|
14
14
|
|
15
|
-
from khoj.processor.conversation.utils import
|
15
|
+
from khoj.processor.conversation.utils import (
|
16
|
+
ThreadedGenerator,
|
17
|
+
commit_conversation_trace,
|
18
|
+
)
|
19
|
+
from khoj.utils import state
|
20
|
+
from khoj.utils.helpers import in_debug_mode
|
16
21
|
|
17
22
|
logger = logging.getLogger(__name__)
|
18
23
|
|
@@ -33,7 +38,7 @@ openai_clients: Dict[str, openai.OpenAI] = {}
|
|
33
38
|
reraise=True,
|
34
39
|
)
|
35
40
|
def completion_with_backoff(
|
36
|
-
messages, model, temperature=0, openai_api_key=None, api_base_url=None, model_kwargs=None
|
41
|
+
messages, model, temperature=0, openai_api_key=None, api_base_url=None, model_kwargs=None, tracer: dict = {}
|
37
42
|
) -> str:
|
38
43
|
client_key = f"{openai_api_key}--{api_base_url}"
|
39
44
|
client: openai.OpenAI | None = openai_clients.get(client_key)
|
@@ -77,6 +82,12 @@ def completion_with_backoff(
|
|
77
82
|
elif delta_chunk.content:
|
78
83
|
aggregated_response += delta_chunk.content
|
79
84
|
|
85
|
+
# Save conversation trace
|
86
|
+
tracer["chat_model"] = model
|
87
|
+
tracer["temperature"] = temperature
|
88
|
+
if in_debug_mode() or state.verbose > 1:
|
89
|
+
commit_conversation_trace(messages, aggregated_response, tracer)
|
90
|
+
|
80
91
|
return aggregated_response
|
81
92
|
|
82
93
|
|
@@ -103,26 +114,37 @@ def chat_completion_with_backoff(
|
|
103
114
|
api_base_url=None,
|
104
115
|
completion_func=None,
|
105
116
|
model_kwargs=None,
|
117
|
+
tracer: dict = {},
|
106
118
|
):
|
107
119
|
g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
|
108
120
|
t = Thread(
|
109
|
-
target=llm_thread,
|
121
|
+
target=llm_thread,
|
122
|
+
args=(g, messages, model_name, temperature, openai_api_key, api_base_url, model_kwargs, tracer),
|
110
123
|
)
|
111
124
|
t.start()
|
112
125
|
return g
|
113
126
|
|
114
127
|
|
115
|
-
def llm_thread(
|
128
|
+
def llm_thread(
|
129
|
+
g,
|
130
|
+
messages,
|
131
|
+
model_name,
|
132
|
+
temperature,
|
133
|
+
openai_api_key=None,
|
134
|
+
api_base_url=None,
|
135
|
+
model_kwargs=None,
|
136
|
+
tracer: dict = {},
|
137
|
+
):
|
116
138
|
try:
|
117
139
|
client_key = f"{openai_api_key}--{api_base_url}"
|
118
140
|
if client_key not in openai_clients:
|
119
|
-
client
|
141
|
+
client = openai.OpenAI(
|
120
142
|
api_key=openai_api_key,
|
121
143
|
base_url=api_base_url,
|
122
144
|
)
|
123
145
|
openai_clients[client_key] = client
|
124
146
|
else:
|
125
|
-
client
|
147
|
+
client = openai_clients[client_key]
|
126
148
|
|
127
149
|
formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
|
128
150
|
stream = True
|
@@ -144,17 +166,29 @@ def llm_thread(g, messages, model_name, temperature, openai_api_key=None, api_ba
|
|
144
166
|
**(model_kwargs or dict()),
|
145
167
|
)
|
146
168
|
|
169
|
+
aggregated_response = ""
|
147
170
|
if not stream:
|
148
|
-
|
171
|
+
aggregated_response = chat.choices[0].message.content
|
172
|
+
g.send(aggregated_response)
|
149
173
|
else:
|
150
174
|
for chunk in chat:
|
151
175
|
if len(chunk.choices) == 0:
|
152
176
|
continue
|
153
177
|
delta_chunk = chunk.choices[0].delta
|
178
|
+
text_chunk = ""
|
154
179
|
if isinstance(delta_chunk, str):
|
155
|
-
|
180
|
+
text_chunk = delta_chunk
|
156
181
|
elif delta_chunk.content:
|
157
|
-
|
182
|
+
text_chunk = delta_chunk.content
|
183
|
+
if text_chunk:
|
184
|
+
aggregated_response += text_chunk
|
185
|
+
g.send(text_chunk)
|
186
|
+
|
187
|
+
# Save conversation trace
|
188
|
+
tracer["chat_model"] = model_name
|
189
|
+
tracer["temperature"] = temperature
|
190
|
+
if in_debug_mode() or state.verbose > 1:
|
191
|
+
commit_conversation_trace(messages, aggregated_response, tracer)
|
158
192
|
except Exception as e:
|
159
193
|
logger.error(f"Error in llm_thread: {e}", exc_info=True)
|
160
194
|
finally:
|