khoj 1.27.2.dev15__py3-none-any.whl → 1.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +1 -1
- khoj/database/adapters/__init__.py +50 -12
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1467-b331e469fe411347.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1603-c1568f45947e9f2c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3423-f4b7df2f6f3362f7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8423-da57554315eebcbe.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-2beaba7c9bb750bd.js → page-5ae1e540bb5be8a9.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-9b5c77e0b0dd772c.js → page-774ae3e033f938cd.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-d8f4c107ad78e9e9.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-1cc42ee55f89fb2e.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/{page-4b6008223ea79955.js → page-07e54186b066f5ce.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/{page-ab2995529ece3140.js → page-9b64f61caa5bd7f9.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-7946cabb9c54e22d.js → page-10b288c103f19468.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-6a01e07fb244c10c.js → page-db775d42e820afb2.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-878569182b3af4c6.js → webpack-8f2abab7b11aa120.js} +1 -1
- khoj/interface/compiled/_next/static/css/{2272c73fc7a3b571.css → 26c1c33d0423a7d8.css} +1 -1
- khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +1 -0
- khoj/interface/compiled/_next/static/css/a795ee88875f4853.css +25 -0
- khoj/interface/compiled/_next/static/css/ddcc0cf73e062476.css +1 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/factchecker/index.html +1 -1
- khoj/interface/compiled/factchecker/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +19 -10
- khoj/processor/conversation/anthropic/utils.py +37 -6
- khoj/processor/conversation/google/gemini_chat.py +23 -13
- khoj/processor/conversation/google/utils.py +34 -10
- khoj/processor/conversation/offline/chat_model.py +48 -16
- khoj/processor/conversation/openai/gpt.py +25 -10
- khoj/processor/conversation/openai/utils.py +50 -9
- khoj/processor/conversation/prompts.py +156 -65
- khoj/processor/conversation/utils.py +306 -6
- khoj/processor/embeddings.py +4 -4
- khoj/processor/image/generate.py +2 -0
- khoj/processor/tools/online_search.py +27 -12
- khoj/processor/tools/run_code.py +144 -0
- khoj/routers/api.py +11 -6
- khoj/routers/api_chat.py +213 -111
- khoj/routers/helpers.py +171 -60
- khoj/routers/research.py +320 -0
- khoj/search_filter/date_filter.py +1 -3
- khoj/search_filter/file_filter.py +1 -2
- khoj/search_type/text_search.py +3 -3
- khoj/utils/helpers.py +24 -2
- khoj/utils/yaml.py +4 -0
- {khoj-1.27.2.dev15.dist-info → khoj-1.28.0.dist-info}/METADATA +3 -2
- {khoj-1.27.2.dev15.dist-info → khoj-1.28.0.dist-info}/RECORD +68 -65
- khoj/interface/compiled/_next/static/chunks/1603-b9d95833e0e025e8.js +0 -1
- khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3423-0b533af8bf6ac218.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9479-ff7d8c4dae2014d1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-151232d8417a1ea1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-798904432c2417c4.js +0 -1
- khoj/interface/compiled/_next/static/css/592ca99f5122e75a.css +0 -1
- khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +0 -25
- khoj/interface/compiled/_next/static/css/d738728883c68af8.css +0 -1
- /khoj/interface/compiled/_next/static/{vcyFRDGArOFXwUVotHIuv → cC7ahn2y_DddSVovjlztj}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{vcyFRDGArOFXwUVotHIuv → cC7ahn2y_DddSVovjlztj}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1970-60c96aed937a4928.js → 1970-d44050bf658ae5cc.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9417-2ca87207387fc790.js → 9417-0d0fc7eb49a86abb.js} +0 -0
- {khoj-1.27.2.dev15.dist-info → khoj-1.28.0.dist-info}/WHEEL +0 -0
- {khoj-1.27.2.dev15.dist-info → khoj-1.28.0.dist-info}/entry_points.txt +0 -0
- {khoj-1.27.2.dev15.dist-info → khoj-1.28.0.dist-info}/licenses/LICENSE +0 -0
@@ -14,11 +14,13 @@ from khoj.processor.conversation.google.utils import (
|
|
14
14
|
gemini_completion_with_backoff,
|
15
15
|
)
|
16
16
|
from khoj.processor.conversation.utils import (
|
17
|
+
clean_json,
|
17
18
|
construct_structured_message,
|
18
19
|
generate_chatml_messages_with_context,
|
19
20
|
)
|
20
21
|
from khoj.utils.helpers import ConversationCommand, is_none_or_empty
|
21
22
|
from khoj.utils.rawconfig import LocationData
|
23
|
+
from khoj.utils.yaml import yaml_dump
|
22
24
|
|
23
25
|
logger = logging.getLogger(__name__)
|
24
26
|
|
@@ -35,6 +37,7 @@ def extract_questions_gemini(
|
|
35
37
|
query_images: Optional[list[str]] = None,
|
36
38
|
vision_enabled: bool = False,
|
37
39
|
personality_context: Optional[str] = None,
|
40
|
+
tracer: dict = {},
|
38
41
|
):
|
39
42
|
"""
|
40
43
|
Infer search queries to retrieve relevant notes to answer user query
|
@@ -85,15 +88,12 @@ def extract_questions_gemini(
|
|
85
88
|
messages = [ChatMessage(content=prompt, role="user"), ChatMessage(content=system_prompt, role="system")]
|
86
89
|
|
87
90
|
response = gemini_send_message_to_model(
|
88
|
-
messages, api_key, model, response_type="json_object", temperature=temperature
|
91
|
+
messages, api_key, model, response_type="json_object", temperature=temperature, tracer=tracer
|
89
92
|
)
|
90
93
|
|
91
94
|
# Extract, Clean Message from Gemini's Response
|
92
95
|
try:
|
93
|
-
response = response
|
94
|
-
match = re.search(r"\{.*?\}", response)
|
95
|
-
if match:
|
96
|
-
response = match.group()
|
96
|
+
response = clean_json(response)
|
97
97
|
response = json.loads(response)
|
98
98
|
response = [q.strip() for q in response["queries"] if q.strip()]
|
99
99
|
if not isinstance(response, list) or not response:
|
@@ -107,15 +107,19 @@ def extract_questions_gemini(
|
|
107
107
|
return questions
|
108
108
|
|
109
109
|
|
110
|
-
def gemini_send_message_to_model(
|
110
|
+
def gemini_send_message_to_model(
|
111
|
+
messages, api_key, model, response_type="text", temperature=0, model_kwargs=None, tracer={}
|
112
|
+
):
|
111
113
|
"""
|
112
114
|
Send message to model
|
113
115
|
"""
|
114
116
|
messages, system_prompt = format_messages_for_gemini(messages)
|
115
117
|
|
116
118
|
model_kwargs = {}
|
117
|
-
|
118
|
-
|
119
|
+
|
120
|
+
# Sometimes, this causes unwanted behavior and terminates response early. Disable for now while it's flaky.
|
121
|
+
# if response_type == "json_object":
|
122
|
+
# model_kwargs["response_mime_type"] = "application/json"
|
119
123
|
|
120
124
|
# Get Response from Gemini
|
121
125
|
return gemini_completion_with_backoff(
|
@@ -125,6 +129,7 @@ def gemini_send_message_to_model(messages, api_key, model, response_type="text",
|
|
125
129
|
api_key=api_key,
|
126
130
|
temperature=temperature,
|
127
131
|
model_kwargs=model_kwargs,
|
132
|
+
tracer=tracer,
|
128
133
|
)
|
129
134
|
|
130
135
|
|
@@ -132,6 +137,7 @@ def converse_gemini(
|
|
132
137
|
references,
|
133
138
|
user_query,
|
134
139
|
online_results: Optional[Dict[str, Dict]] = None,
|
140
|
+
code_results: Optional[Dict[str, Dict]] = None,
|
135
141
|
conversation_log={},
|
136
142
|
model: Optional[str] = "gemini-1.5-flash",
|
137
143
|
api_key: Optional[str] = None,
|
@@ -145,13 +151,13 @@ def converse_gemini(
|
|
145
151
|
agent: Agent = None,
|
146
152
|
query_images: Optional[list[str]] = None,
|
147
153
|
vision_available: bool = False,
|
154
|
+
tracer={},
|
148
155
|
):
|
149
156
|
"""
|
150
157
|
Converse with user using Google's Gemini
|
151
158
|
"""
|
152
159
|
# Initialize Variables
|
153
160
|
current_date = datetime.now()
|
154
|
-
compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
|
155
161
|
|
156
162
|
if agent and agent.personality:
|
157
163
|
system_prompt = prompts.custom_personality.format(
|
@@ -176,7 +182,7 @@ def converse_gemini(
|
|
176
182
|
system_prompt = f"{system_prompt}\n{user_name_prompt}"
|
177
183
|
|
178
184
|
# Get Conversation Primer appropriate to Conversation Type
|
179
|
-
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(
|
185
|
+
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
|
180
186
|
completion_func(chat_response=prompts.no_notes_found.format())
|
181
187
|
return iter([prompts.no_notes_found.format()])
|
182
188
|
elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
|
@@ -184,10 +190,13 @@ def converse_gemini(
|
|
184
190
|
return iter([prompts.no_online_results_found.format()])
|
185
191
|
|
186
192
|
context_message = ""
|
187
|
-
if not is_none_or_empty(
|
188
|
-
context_message = f"{prompts.notes_conversation.format(query=user_query, references=
|
193
|
+
if not is_none_or_empty(references):
|
194
|
+
context_message = f"{prompts.notes_conversation.format(query=user_query, references=yaml_dump(references))}\n\n"
|
189
195
|
if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
|
190
|
-
context_message += f"{prompts.online_search_conversation.format(online_results=
|
196
|
+
context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
|
197
|
+
if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
|
198
|
+
context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
|
199
|
+
context_message = context_message.strip()
|
191
200
|
|
192
201
|
# Setup Prompt with Primer or Conversation History
|
193
202
|
messages = generate_chatml_messages_with_context(
|
@@ -217,4 +226,5 @@ def converse_gemini(
|
|
217
226
|
api_key=api_key,
|
218
227
|
system_prompt=system_prompt,
|
219
228
|
completion_func=completion_func,
|
229
|
+
tracer=tracer,
|
220
230
|
)
|
@@ -19,8 +19,13 @@ from tenacity import (
|
|
19
19
|
wait_random_exponential,
|
20
20
|
)
|
21
21
|
|
22
|
-
from khoj.processor.conversation.utils import
|
23
|
-
|
22
|
+
from khoj.processor.conversation.utils import (
|
23
|
+
ThreadedGenerator,
|
24
|
+
commit_conversation_trace,
|
25
|
+
get_image_from_url,
|
26
|
+
)
|
27
|
+
from khoj.utils import state
|
28
|
+
from khoj.utils.helpers import in_debug_mode, is_none_or_empty
|
24
29
|
|
25
30
|
logger = logging.getLogger(__name__)
|
26
31
|
|
@@ -35,7 +40,7 @@ MAX_OUTPUT_TOKENS_GEMINI = 8192
|
|
35
40
|
reraise=True,
|
36
41
|
)
|
37
42
|
def gemini_completion_with_backoff(
|
38
|
-
messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None
|
43
|
+
messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None, tracer={}
|
39
44
|
) -> str:
|
40
45
|
genai.configure(api_key=api_key)
|
41
46
|
model_kwargs = model_kwargs or dict()
|
@@ -60,16 +65,23 @@ def gemini_completion_with_backoff(
|
|
60
65
|
|
61
66
|
try:
|
62
67
|
# Generate the response. The last message is considered to be the current prompt
|
63
|
-
|
64
|
-
|
68
|
+
response = chat_session.send_message(formatted_messages[-1]["parts"])
|
69
|
+
response_text = response.text
|
65
70
|
except StopCandidateException as e:
|
66
|
-
|
71
|
+
response_text, _ = handle_gemini_response(e.args)
|
67
72
|
# Respond with reason for stopping
|
68
73
|
logger.warning(
|
69
|
-
f"LLM Response Prevented for {model_name}: {
|
74
|
+
f"LLM Response Prevented for {model_name}: {response_text}.\n"
|
70
75
|
+ f"Last Message by {messages[-1].role}: {messages[-1].content}"
|
71
76
|
)
|
72
|
-
|
77
|
+
|
78
|
+
# Save conversation trace
|
79
|
+
tracer["chat_model"] = model_name
|
80
|
+
tracer["temperature"] = temperature
|
81
|
+
if in_debug_mode() or state.verbose > 1:
|
82
|
+
commit_conversation_trace(messages, response_text, tracer)
|
83
|
+
|
84
|
+
return response_text
|
73
85
|
|
74
86
|
|
75
87
|
@retry(
|
@@ -88,17 +100,20 @@ def gemini_chat_completion_with_backoff(
|
|
88
100
|
system_prompt,
|
89
101
|
completion_func=None,
|
90
102
|
model_kwargs=None,
|
103
|
+
tracer: dict = {},
|
91
104
|
):
|
92
105
|
g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
|
93
106
|
t = Thread(
|
94
107
|
target=gemini_llm_thread,
|
95
|
-
args=(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs),
|
108
|
+
args=(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs, tracer),
|
96
109
|
)
|
97
110
|
t.start()
|
98
111
|
return g
|
99
112
|
|
100
113
|
|
101
|
-
def gemini_llm_thread(
|
114
|
+
def gemini_llm_thread(
|
115
|
+
g, messages, system_prompt, model_name, temperature, api_key, model_kwargs=None, tracer: dict = {}
|
116
|
+
):
|
102
117
|
try:
|
103
118
|
genai.configure(api_key=api_key)
|
104
119
|
model_kwargs = model_kwargs or dict()
|
@@ -117,16 +132,25 @@ def gemini_llm_thread(g, messages, system_prompt, model_name, temperature, api_k
|
|
117
132
|
},
|
118
133
|
)
|
119
134
|
|
135
|
+
aggregated_response = ""
|
120
136
|
formatted_messages = [{"role": message.role, "parts": message.content} for message in messages]
|
137
|
+
|
121
138
|
# all messages up to the last are considered to be part of the chat history
|
122
139
|
chat_session = model.start_chat(history=formatted_messages[0:-1])
|
123
140
|
# the last message is considered to be the current prompt
|
124
141
|
for chunk in chat_session.send_message(formatted_messages[-1]["parts"], stream=True):
|
125
142
|
message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
|
126
143
|
message = message or chunk.text
|
144
|
+
aggregated_response += message
|
127
145
|
g.send(message)
|
128
146
|
if stopped:
|
129
147
|
raise StopCandidateException(message)
|
148
|
+
|
149
|
+
# Save conversation trace
|
150
|
+
tracer["chat_model"] = model_name
|
151
|
+
tracer["temperature"] = temperature
|
152
|
+
if in_debug_mode() or state.verbose > 1:
|
153
|
+
commit_conversation_trace(messages, aggregated_response, tracer)
|
130
154
|
except StopCandidateException as e:
|
131
155
|
logger.warning(
|
132
156
|
f"LLM Response Prevented for {model_name}: {e.args[0]}.\n"
|
@@ -1,5 +1,6 @@
|
|
1
1
|
import json
|
2
2
|
import logging
|
3
|
+
import os
|
3
4
|
from datetime import datetime, timedelta
|
4
5
|
from threading import Thread
|
5
6
|
from typing import Any, Iterator, List, Optional, Union
|
@@ -12,12 +13,14 @@ from khoj.processor.conversation import prompts
|
|
12
13
|
from khoj.processor.conversation.offline.utils import download_model
|
13
14
|
from khoj.processor.conversation.utils import (
|
14
15
|
ThreadedGenerator,
|
16
|
+
commit_conversation_trace,
|
15
17
|
generate_chatml_messages_with_context,
|
16
18
|
)
|
17
19
|
from khoj.utils import state
|
18
20
|
from khoj.utils.constants import empty_escape_sequences
|
19
|
-
from khoj.utils.helpers import ConversationCommand, is_none_or_empty
|
21
|
+
from khoj.utils.helpers import ConversationCommand, in_debug_mode, is_none_or_empty
|
20
22
|
from khoj.utils.rawconfig import LocationData
|
23
|
+
from khoj.utils.yaml import yaml_dump
|
21
24
|
|
22
25
|
logger = logging.getLogger(__name__)
|
23
26
|
|
@@ -34,6 +37,7 @@ def extract_questions_offline(
|
|
34
37
|
max_prompt_size: int = None,
|
35
38
|
temperature: float = 0.7,
|
36
39
|
personality_context: Optional[str] = None,
|
40
|
+
tracer: dict = {},
|
37
41
|
) -> List[str]:
|
38
42
|
"""
|
39
43
|
Infer search queries to retrieve relevant notes to answer user query
|
@@ -94,6 +98,7 @@ def extract_questions_offline(
|
|
94
98
|
max_prompt_size=max_prompt_size,
|
95
99
|
temperature=temperature,
|
96
100
|
response_type="json_object",
|
101
|
+
tracer=tracer,
|
97
102
|
)
|
98
103
|
finally:
|
99
104
|
state.chat_lock.release()
|
@@ -135,7 +140,8 @@ def filter_questions(questions: List[str]):
|
|
135
140
|
def converse_offline(
|
136
141
|
user_query,
|
137
142
|
references=[],
|
138
|
-
online_results=
|
143
|
+
online_results={},
|
144
|
+
code_results={},
|
139
145
|
conversation_log={},
|
140
146
|
model: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
|
141
147
|
loaded_model: Union[Any, None] = None,
|
@@ -146,6 +152,7 @@ def converse_offline(
|
|
146
152
|
location_data: LocationData = None,
|
147
153
|
user_name: str = None,
|
148
154
|
agent: Agent = None,
|
155
|
+
tracer: dict = {},
|
149
156
|
) -> Union[ThreadedGenerator, Iterator[str]]:
|
150
157
|
"""
|
151
158
|
Converse with user using Llama
|
@@ -153,8 +160,7 @@ def converse_offline(
|
|
153
160
|
# Initialize Variables
|
154
161
|
assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
|
155
162
|
offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
|
156
|
-
|
157
|
-
|
163
|
+
tracer["chat_model"] = model
|
158
164
|
current_date = datetime.now()
|
159
165
|
|
160
166
|
if agent and agent.personality:
|
@@ -179,24 +185,25 @@ def converse_offline(
|
|
179
185
|
system_prompt = f"{system_prompt}\n{user_name_prompt}"
|
180
186
|
|
181
187
|
# Get Conversation Primer appropriate to Conversation Type
|
182
|
-
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(
|
188
|
+
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
|
183
189
|
return iter([prompts.no_notes_found.format()])
|
184
190
|
elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
|
185
191
|
completion_func(chat_response=prompts.no_online_results_found.format())
|
186
192
|
return iter([prompts.no_online_results_found.format()])
|
187
193
|
|
188
194
|
context_message = ""
|
189
|
-
if not is_none_or_empty(
|
190
|
-
context_message
|
195
|
+
if not is_none_or_empty(references):
|
196
|
+
context_message = f"{prompts.notes_conversation_offline.format(references=yaml_dump(references))}\n\n"
|
191
197
|
if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
|
192
198
|
simplified_online_results = online_results.copy()
|
193
199
|
for result in online_results:
|
194
200
|
if online_results[result].get("webpages"):
|
195
201
|
simplified_online_results[result] = online_results[result]["webpages"]
|
196
202
|
|
197
|
-
context_message += (
|
198
|
-
|
199
|
-
)
|
203
|
+
context_message += f"{prompts.online_search_conversation_offline.format(online_results=yaml_dump(simplified_online_results))}\n\n"
|
204
|
+
if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
|
205
|
+
context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
|
206
|
+
context_message = context_message.strip()
|
200
207
|
|
201
208
|
# Setup Prompt with Primer or Conversation History
|
202
209
|
messages = generate_chatml_messages_with_context(
|
@@ -215,13 +222,14 @@ def converse_offline(
|
|
215
222
|
logger.debug(f"Conversation Context for {model}: {truncated_messages}")
|
216
223
|
|
217
224
|
g = ThreadedGenerator(references, online_results, completion_func=completion_func)
|
218
|
-
t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size))
|
225
|
+
t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size, tracer))
|
219
226
|
t.start()
|
220
227
|
return g
|
221
228
|
|
222
229
|
|
223
|
-
def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int = None):
|
230
|
+
def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int = None, tracer: dict = {}):
|
224
231
|
stop_phrases = ["<s>", "INST]", "Notes:"]
|
232
|
+
aggregated_response = ""
|
225
233
|
|
226
234
|
state.chat_lock.acquire()
|
227
235
|
try:
|
@@ -229,7 +237,14 @@ def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int
|
|
229
237
|
messages, loaded_model=model, stop=stop_phrases, max_prompt_size=max_prompt_size, streaming=True
|
230
238
|
)
|
231
239
|
for response in response_iterator:
|
232
|
-
|
240
|
+
response_delta = response["choices"][0]["delta"].get("content", "")
|
241
|
+
aggregated_response += response_delta
|
242
|
+
g.send(response_delta)
|
243
|
+
|
244
|
+
# Save conversation trace
|
245
|
+
if in_debug_mode() or state.verbose > 1:
|
246
|
+
commit_conversation_trace(messages, aggregated_response, tracer)
|
247
|
+
|
233
248
|
finally:
|
234
249
|
state.chat_lock.release()
|
235
250
|
g.close()
|
@@ -244,14 +259,31 @@ def send_message_to_model_offline(
|
|
244
259
|
stop=[],
|
245
260
|
max_prompt_size: int = None,
|
246
261
|
response_type: str = "text",
|
262
|
+
tracer: dict = {},
|
247
263
|
):
|
248
264
|
assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
|
249
265
|
offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
|
250
266
|
messages_dict = [{"role": message.role, "content": message.content} for message in messages]
|
267
|
+
seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
|
251
268
|
response = offline_chat_model.create_chat_completion(
|
252
|
-
messages_dict,
|
269
|
+
messages_dict,
|
270
|
+
stop=stop,
|
271
|
+
stream=streaming,
|
272
|
+
temperature=temperature,
|
273
|
+
response_format={"type": response_type},
|
274
|
+
seed=seed,
|
253
275
|
)
|
276
|
+
|
254
277
|
if streaming:
|
255
278
|
return response
|
256
|
-
|
257
|
-
|
279
|
+
|
280
|
+
response_text = response["choices"][0]["message"].get("content", "")
|
281
|
+
|
282
|
+
# Save conversation trace for non-streaming responses
|
283
|
+
# Streamed responses need to be saved by the calling function
|
284
|
+
tracer["chat_model"] = model
|
285
|
+
tracer["temperature"] = temperature
|
286
|
+
if in_debug_mode() or state.verbose > 1:
|
287
|
+
commit_conversation_trace(messages, response_text, tracer)
|
288
|
+
|
289
|
+
return response_text
|
@@ -12,12 +12,13 @@ from khoj.processor.conversation.openai.utils import (
|
|
12
12
|
completion_with_backoff,
|
13
13
|
)
|
14
14
|
from khoj.processor.conversation.utils import (
|
15
|
+
clean_json,
|
15
16
|
construct_structured_message,
|
16
17
|
generate_chatml_messages_with_context,
|
17
|
-
remove_json_codeblock,
|
18
18
|
)
|
19
19
|
from khoj.utils.helpers import ConversationCommand, is_none_or_empty
|
20
20
|
from khoj.utils.rawconfig import LocationData
|
21
|
+
from khoj.utils.yaml import yaml_dump
|
21
22
|
|
22
23
|
logger = logging.getLogger(__name__)
|
23
24
|
|
@@ -33,6 +34,7 @@ def extract_questions(
|
|
33
34
|
query_images: Optional[list[str]] = None,
|
34
35
|
vision_enabled: bool = False,
|
35
36
|
personality_context: Optional[str] = None,
|
37
|
+
tracer: dict = {},
|
36
38
|
):
|
37
39
|
"""
|
38
40
|
Infer search queries to retrieve relevant notes to answer user query
|
@@ -82,13 +84,18 @@ def extract_questions(
|
|
82
84
|
messages = [ChatMessage(content=prompt, role="user")]
|
83
85
|
|
84
86
|
response = send_message_to_model(
|
85
|
-
messages,
|
87
|
+
messages,
|
88
|
+
api_key,
|
89
|
+
model,
|
90
|
+
response_type="json_object",
|
91
|
+
api_base_url=api_base_url,
|
92
|
+
temperature=temperature,
|
93
|
+
tracer=tracer,
|
86
94
|
)
|
87
95
|
|
88
96
|
# Extract, Clean Message from GPT's Response
|
89
97
|
try:
|
90
|
-
response = response
|
91
|
-
response = remove_json_codeblock(response)
|
98
|
+
response = clean_json(response)
|
92
99
|
response = json.loads(response)
|
93
100
|
response = [q.strip() for q in response["queries"] if q.strip()]
|
94
101
|
if not isinstance(response, list) or not response:
|
@@ -103,7 +110,9 @@ def extract_questions(
|
|
103
110
|
return questions
|
104
111
|
|
105
112
|
|
106
|
-
def send_message_to_model(
|
113
|
+
def send_message_to_model(
|
114
|
+
messages, api_key, model, response_type="text", api_base_url=None, temperature=0, tracer: dict = {}
|
115
|
+
):
|
107
116
|
"""
|
108
117
|
Send message to model
|
109
118
|
"""
|
@@ -116,6 +125,7 @@ def send_message_to_model(messages, api_key, model, response_type="text", api_ba
|
|
116
125
|
temperature=temperature,
|
117
126
|
api_base_url=api_base_url,
|
118
127
|
model_kwargs={"response_format": {"type": response_type}},
|
128
|
+
tracer=tracer,
|
119
129
|
)
|
120
130
|
|
121
131
|
|
@@ -123,6 +133,7 @@ def converse(
|
|
123
133
|
references,
|
124
134
|
user_query,
|
125
135
|
online_results: Optional[Dict[str, Dict]] = None,
|
136
|
+
code_results: Optional[Dict[str, Dict]] = None,
|
126
137
|
conversation_log={},
|
127
138
|
model: str = "gpt-4o-mini",
|
128
139
|
api_key: Optional[str] = None,
|
@@ -137,13 +148,13 @@ def converse(
|
|
137
148
|
agent: Agent = None,
|
138
149
|
query_images: Optional[list[str]] = None,
|
139
150
|
vision_available: bool = False,
|
151
|
+
tracer: dict = {},
|
140
152
|
):
|
141
153
|
"""
|
142
154
|
Converse with user using OpenAI's ChatGPT
|
143
155
|
"""
|
144
156
|
# Initialize Variables
|
145
157
|
current_date = datetime.now()
|
146
|
-
compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
|
147
158
|
|
148
159
|
if agent and agent.personality:
|
149
160
|
system_prompt = prompts.custom_personality.format(
|
@@ -167,7 +178,7 @@ def converse(
|
|
167
178
|
system_prompt = f"{system_prompt}\n{user_name_prompt}"
|
168
179
|
|
169
180
|
# Get Conversation Primer appropriate to Conversation Type
|
170
|
-
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(
|
181
|
+
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
|
171
182
|
completion_func(chat_response=prompts.no_notes_found.format())
|
172
183
|
return iter([prompts.no_notes_found.format()])
|
173
184
|
elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
|
@@ -175,10 +186,13 @@ def converse(
|
|
175
186
|
return iter([prompts.no_online_results_found.format()])
|
176
187
|
|
177
188
|
context_message = ""
|
178
|
-
if not is_none_or_empty(
|
179
|
-
context_message = f"{prompts.notes_conversation.format(references=
|
189
|
+
if not is_none_or_empty(references):
|
190
|
+
context_message = f"{prompts.notes_conversation.format(references=yaml_dump(references))}\n\n"
|
180
191
|
if not is_none_or_empty(online_results):
|
181
|
-
context_message += f"{prompts.online_search_conversation.format(online_results=
|
192
|
+
context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
|
193
|
+
if not is_none_or_empty(code_results):
|
194
|
+
context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
|
195
|
+
context_message = context_message.strip()
|
182
196
|
|
183
197
|
# Setup Prompt with Primer or Conversation History
|
184
198
|
messages = generate_chatml_messages_with_context(
|
@@ -207,4 +221,5 @@ def converse(
|
|
207
221
|
api_base_url=api_base_url,
|
208
222
|
completion_func=completion_func,
|
209
223
|
model_kwargs={"stop": ["Notes:\n["]},
|
224
|
+
tracer=tracer,
|
210
225
|
)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
import logging
|
2
|
+
import os
|
2
3
|
from threading import Thread
|
3
4
|
from typing import Dict
|
4
5
|
|
@@ -12,7 +13,12 @@ from tenacity import (
|
|
12
13
|
wait_random_exponential,
|
13
14
|
)
|
14
15
|
|
15
|
-
from khoj.processor.conversation.utils import
|
16
|
+
from khoj.processor.conversation.utils import (
|
17
|
+
ThreadedGenerator,
|
18
|
+
commit_conversation_trace,
|
19
|
+
)
|
20
|
+
from khoj.utils import state
|
21
|
+
from khoj.utils.helpers import in_debug_mode
|
16
22
|
|
17
23
|
logger = logging.getLogger(__name__)
|
18
24
|
|
@@ -33,7 +39,7 @@ openai_clients: Dict[str, openai.OpenAI] = {}
|
|
33
39
|
reraise=True,
|
34
40
|
)
|
35
41
|
def completion_with_backoff(
|
36
|
-
messages, model, temperature=0, openai_api_key=None, api_base_url=None, model_kwargs=None
|
42
|
+
messages, model, temperature=0, openai_api_key=None, api_base_url=None, model_kwargs=None, tracer: dict = {}
|
37
43
|
) -> str:
|
38
44
|
client_key = f"{openai_api_key}--{api_base_url}"
|
39
45
|
client: openai.OpenAI | None = openai_clients.get(client_key)
|
@@ -55,6 +61,9 @@ def completion_with_backoff(
|
|
55
61
|
model_kwargs.pop("stop", None)
|
56
62
|
model_kwargs.pop("response_format", None)
|
57
63
|
|
64
|
+
if os.getenv("KHOJ_LLM_SEED"):
|
65
|
+
model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
|
66
|
+
|
58
67
|
chat = client.chat.completions.create(
|
59
68
|
stream=stream,
|
60
69
|
messages=formatted_messages, # type: ignore
|
@@ -77,6 +86,12 @@ def completion_with_backoff(
|
|
77
86
|
elif delta_chunk.content:
|
78
87
|
aggregated_response += delta_chunk.content
|
79
88
|
|
89
|
+
# Save conversation trace
|
90
|
+
tracer["chat_model"] = model
|
91
|
+
tracer["temperature"] = temperature
|
92
|
+
if in_debug_mode() or state.verbose > 1:
|
93
|
+
commit_conversation_trace(messages, aggregated_response, tracer)
|
94
|
+
|
80
95
|
return aggregated_response
|
81
96
|
|
82
97
|
|
@@ -103,26 +118,37 @@ def chat_completion_with_backoff(
|
|
103
118
|
api_base_url=None,
|
104
119
|
completion_func=None,
|
105
120
|
model_kwargs=None,
|
121
|
+
tracer: dict = {},
|
106
122
|
):
|
107
123
|
g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
|
108
124
|
t = Thread(
|
109
|
-
target=llm_thread,
|
125
|
+
target=llm_thread,
|
126
|
+
args=(g, messages, model_name, temperature, openai_api_key, api_base_url, model_kwargs, tracer),
|
110
127
|
)
|
111
128
|
t.start()
|
112
129
|
return g
|
113
130
|
|
114
131
|
|
115
|
-
def llm_thread(
|
132
|
+
def llm_thread(
|
133
|
+
g,
|
134
|
+
messages,
|
135
|
+
model_name,
|
136
|
+
temperature,
|
137
|
+
openai_api_key=None,
|
138
|
+
api_base_url=None,
|
139
|
+
model_kwargs=None,
|
140
|
+
tracer: dict = {},
|
141
|
+
):
|
116
142
|
try:
|
117
143
|
client_key = f"{openai_api_key}--{api_base_url}"
|
118
144
|
if client_key not in openai_clients:
|
119
|
-
client
|
145
|
+
client = openai.OpenAI(
|
120
146
|
api_key=openai_api_key,
|
121
147
|
base_url=api_base_url,
|
122
148
|
)
|
123
149
|
openai_clients[client_key] = client
|
124
150
|
else:
|
125
|
-
client
|
151
|
+
client = openai_clients[client_key]
|
126
152
|
|
127
153
|
formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
|
128
154
|
stream = True
|
@@ -135,6 +161,9 @@ def llm_thread(g, messages, model_name, temperature, openai_api_key=None, api_ba
|
|
135
161
|
model_kwargs.pop("stop", None)
|
136
162
|
model_kwargs.pop("response_format", None)
|
137
163
|
|
164
|
+
if os.getenv("KHOJ_LLM_SEED"):
|
165
|
+
model_kwargs["seed"] = int(os.getenv("KHOJ_LLM_SEED"))
|
166
|
+
|
138
167
|
chat = client.chat.completions.create(
|
139
168
|
stream=stream,
|
140
169
|
messages=formatted_messages,
|
@@ -144,17 +173,29 @@ def llm_thread(g, messages, model_name, temperature, openai_api_key=None, api_ba
|
|
144
173
|
**(model_kwargs or dict()),
|
145
174
|
)
|
146
175
|
|
176
|
+
aggregated_response = ""
|
147
177
|
if not stream:
|
148
|
-
|
178
|
+
aggregated_response = chat.choices[0].message.content
|
179
|
+
g.send(aggregated_response)
|
149
180
|
else:
|
150
181
|
for chunk in chat:
|
151
182
|
if len(chunk.choices) == 0:
|
152
183
|
continue
|
153
184
|
delta_chunk = chunk.choices[0].delta
|
185
|
+
text_chunk = ""
|
154
186
|
if isinstance(delta_chunk, str):
|
155
|
-
|
187
|
+
text_chunk = delta_chunk
|
156
188
|
elif delta_chunk.content:
|
157
|
-
|
189
|
+
text_chunk = delta_chunk.content
|
190
|
+
if text_chunk:
|
191
|
+
aggregated_response += text_chunk
|
192
|
+
g.send(text_chunk)
|
193
|
+
|
194
|
+
# Save conversation trace
|
195
|
+
tracer["chat_model"] = model_name
|
196
|
+
tracer["temperature"] = temperature
|
197
|
+
if in_debug_mode() or state.verbose > 1:
|
198
|
+
commit_conversation_trace(messages, aggregated_response, tracer)
|
158
199
|
except Exception as e:
|
159
200
|
logger.error(f"Error in llm_thread: {e}", exc_info=True)
|
160
201
|
finally:
|