khoj 1.27.2.dev12__py3-none-any.whl → 1.28.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +1 -1
- khoj/database/adapters/__init__.py +55 -12
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1467-b331e469fe411347.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1603-c1568f45947e9f2c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3423-ff7402ae1dd66592.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8423-e80647edf6c92c27.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-2beaba7c9bb750bd.js → page-fc492762298e975e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-9b5c77e0b0dd772c.js → page-416ee13a00575c39.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-c70f5b0c722d7627.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-1541d90140794f63.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/{page-8f22b790e50dd722.js → page-b269e444fc067759.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/{page-ab2995529ece3140.js → page-7d431ce8e565c7c3.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-7946cabb9c54e22d.js → page-95f56e53f48f0289.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-6a01e07fb244c10c.js → page-4eba6154f7bb9771.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-17202cfae517c5de.js → webpack-33a82ccca02cd2b8.js} +1 -1
- khoj/interface/compiled/_next/static/css/2196fae09c2f906e.css +1 -0
- khoj/interface/compiled/_next/static/css/6bde1f2045622ef7.css +1 -0
- khoj/interface/compiled/_next/static/css/a795ee88875f4853.css +25 -0
- khoj/interface/compiled/_next/static/css/ebef43da1c0651d5.css +1 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/factchecker/index.html +1 -1
- khoj/interface/compiled/factchecker/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +19 -10
- khoj/processor/conversation/anthropic/utils.py +37 -6
- khoj/processor/conversation/google/gemini_chat.py +23 -13
- khoj/processor/conversation/google/utils.py +34 -10
- khoj/processor/conversation/offline/chat_model.py +48 -16
- khoj/processor/conversation/openai/gpt.py +25 -10
- khoj/processor/conversation/openai/utils.py +50 -9
- khoj/processor/conversation/prompts.py +156 -65
- khoj/processor/conversation/utils.py +306 -6
- khoj/processor/embeddings.py +4 -4
- khoj/processor/image/generate.py +2 -0
- khoj/processor/tools/online_search.py +27 -12
- khoj/processor/tools/run_code.py +144 -0
- khoj/routers/api.py +11 -6
- khoj/routers/api_chat.py +213 -111
- khoj/routers/helpers.py +171 -60
- khoj/routers/research.py +320 -0
- khoj/search_filter/date_filter.py +1 -3
- khoj/search_filter/file_filter.py +1 -2
- khoj/search_type/text_search.py +3 -3
- khoj/utils/helpers.py +25 -3
- khoj/utils/yaml.py +4 -0
- {khoj-1.27.2.dev12.dist-info → khoj-1.28.1.dist-info}/METADATA +3 -2
- {khoj-1.27.2.dev12.dist-info → khoj-1.28.1.dist-info}/RECORD +68 -65
- khoj/interface/compiled/_next/static/chunks/1603-b9d95833e0e025e8.js +0 -1
- khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3423-8e9c420574a9fbe3.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9479-4b443fdcc99141c9.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-151232d8417a1ea1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-798904432c2417c4.js +0 -1
- khoj/interface/compiled/_next/static/css/2272c73fc7a3b571.css +0 -1
- khoj/interface/compiled/_next/static/css/553f9cdcc7a2bcd6.css +0 -1
- khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +0 -25
- khoj/interface/compiled/_next/static/css/b70402177a7c3207.css +0 -1
- /khoj/interface/compiled/_next/static/{kul3DNllWR6eaUDc4X0eU → JcTomiF3o0dIo4RxHR9Vu}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{kul3DNllWR6eaUDc4X0eU → JcTomiF3o0dIo4RxHR9Vu}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1970-1d6d0c1b00b4f343.js → 1970-90dd510762d820ba.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9417-759984ad62caa3dc.js → 9417-951f46451a8dd6d7.js} +0 -0
- {khoj-1.27.2.dev12.dist-info → khoj-1.28.1.dist-info}/WHEEL +0 -0
- {khoj-1.27.2.dev12.dist-info → khoj-1.28.1.dist-info}/entry_points.txt +0 -0
- {khoj-1.27.2.dev12.dist-info → khoj-1.28.1.dist-info}/licenses/LICENSE +0 -0
@@ -14,11 +14,13 @@ from khoj.processor.conversation.anthropic.utils import (
|
|
14
14
|
format_messages_for_anthropic,
|
15
15
|
)
|
16
16
|
from khoj.processor.conversation.utils import (
|
17
|
+
clean_json,
|
17
18
|
construct_structured_message,
|
18
19
|
generate_chatml_messages_with_context,
|
19
20
|
)
|
20
21
|
from khoj.utils.helpers import ConversationCommand, is_none_or_empty
|
21
22
|
from khoj.utils.rawconfig import LocationData
|
23
|
+
from khoj.utils.yaml import yaml_dump
|
22
24
|
|
23
25
|
logger = logging.getLogger(__name__)
|
24
26
|
|
@@ -34,6 +36,7 @@ def extract_questions_anthropic(
|
|
34
36
|
query_images: Optional[list[str]] = None,
|
35
37
|
vision_enabled: bool = False,
|
36
38
|
personality_context: Optional[str] = None,
|
39
|
+
tracer: dict = {},
|
37
40
|
):
|
38
41
|
"""
|
39
42
|
Infer search queries to retrieve relevant notes to answer user query
|
@@ -89,14 +92,13 @@ def extract_questions_anthropic(
|
|
89
92
|
model_name=model,
|
90
93
|
temperature=temperature,
|
91
94
|
api_key=api_key,
|
95
|
+
response_type="json_object",
|
96
|
+
tracer=tracer,
|
92
97
|
)
|
93
98
|
|
94
99
|
# Extract, Clean Message from Claude's Response
|
95
100
|
try:
|
96
|
-
response = response
|
97
|
-
match = re.search(r"\{.*?\}", response)
|
98
|
-
if match:
|
99
|
-
response = match.group()
|
101
|
+
response = clean_json(response)
|
100
102
|
response = json.loads(response)
|
101
103
|
response = [q.strip() for q in response["queries"] if q.strip()]
|
102
104
|
if not isinstance(response, list) or not response:
|
@@ -110,7 +112,7 @@ def extract_questions_anthropic(
|
|
110
112
|
return questions
|
111
113
|
|
112
114
|
|
113
|
-
def anthropic_send_message_to_model(messages, api_key, model):
|
115
|
+
def anthropic_send_message_to_model(messages, api_key, model, response_type="text", tracer={}):
|
114
116
|
"""
|
115
117
|
Send message to model
|
116
118
|
"""
|
@@ -122,6 +124,8 @@ def anthropic_send_message_to_model(messages, api_key, model):
|
|
122
124
|
system_prompt=system_prompt,
|
123
125
|
model_name=model,
|
124
126
|
api_key=api_key,
|
127
|
+
response_type=response_type,
|
128
|
+
tracer=tracer,
|
125
129
|
)
|
126
130
|
|
127
131
|
|
@@ -129,6 +133,7 @@ def converse_anthropic(
|
|
129
133
|
references,
|
130
134
|
user_query,
|
131
135
|
online_results: Optional[Dict[str, Dict]] = None,
|
136
|
+
code_results: Optional[Dict[str, Dict]] = None,
|
132
137
|
conversation_log={},
|
133
138
|
model: Optional[str] = "claude-3-5-sonnet-20241022",
|
134
139
|
api_key: Optional[str] = None,
|
@@ -141,13 +146,13 @@ def converse_anthropic(
|
|
141
146
|
agent: Agent = None,
|
142
147
|
query_images: Optional[list[str]] = None,
|
143
148
|
vision_available: bool = False,
|
149
|
+
tracer: dict = {},
|
144
150
|
):
|
145
151
|
"""
|
146
152
|
Converse with user using Anthropic's Claude
|
147
153
|
"""
|
148
154
|
# Initialize Variables
|
149
155
|
current_date = datetime.now()
|
150
|
-
compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
|
151
156
|
|
152
157
|
if agent and agent.personality:
|
153
158
|
system_prompt = prompts.custom_personality.format(
|
@@ -171,7 +176,7 @@ def converse_anthropic(
|
|
171
176
|
system_prompt = f"{system_prompt}\n{user_name_prompt}"
|
172
177
|
|
173
178
|
# Get Conversation Primer appropriate to Conversation Type
|
174
|
-
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(
|
179
|
+
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
|
175
180
|
completion_func(chat_response=prompts.no_notes_found.format())
|
176
181
|
return iter([prompts.no_notes_found.format()])
|
177
182
|
elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
|
@@ -179,10 +184,13 @@ def converse_anthropic(
|
|
179
184
|
return iter([prompts.no_online_results_found.format()])
|
180
185
|
|
181
186
|
context_message = ""
|
182
|
-
if not is_none_or_empty(
|
183
|
-
context_message = f"{prompts.notes_conversation.format(query=user_query, references=
|
187
|
+
if not is_none_or_empty(references):
|
188
|
+
context_message = f"{prompts.notes_conversation.format(query=user_query, references=yaml_dump(references))}\n\n"
|
184
189
|
if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
|
185
|
-
context_message += f"{prompts.online_search_conversation.format(online_results=
|
190
|
+
context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
|
191
|
+
if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
|
192
|
+
context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
|
193
|
+
context_message = context_message.strip()
|
186
194
|
|
187
195
|
# Setup Prompt with Primer or Conversation History
|
188
196
|
messages = generate_chatml_messages_with_context(
|
@@ -213,4 +221,5 @@ def converse_anthropic(
|
|
213
221
|
system_prompt=system_prompt,
|
214
222
|
completion_func=completion_func,
|
215
223
|
max_prompt_size=max_prompt_size,
|
224
|
+
tracer=tracer,
|
216
225
|
)
|
@@ -12,8 +12,13 @@ from tenacity import (
|
|
12
12
|
wait_random_exponential,
|
13
13
|
)
|
14
14
|
|
15
|
-
from khoj.processor.conversation.utils import
|
16
|
-
|
15
|
+
from khoj.processor.conversation.utils import (
|
16
|
+
ThreadedGenerator,
|
17
|
+
commit_conversation_trace,
|
18
|
+
get_image_from_url,
|
19
|
+
)
|
20
|
+
from khoj.utils import state
|
21
|
+
from khoj.utils.helpers import in_debug_mode, is_none_or_empty
|
17
22
|
|
18
23
|
logger = logging.getLogger(__name__)
|
19
24
|
|
@@ -30,7 +35,15 @@ DEFAULT_MAX_TOKENS_ANTHROPIC = 3000
|
|
30
35
|
reraise=True,
|
31
36
|
)
|
32
37
|
def anthropic_completion_with_backoff(
|
33
|
-
messages,
|
38
|
+
messages,
|
39
|
+
system_prompt,
|
40
|
+
model_name,
|
41
|
+
temperature=0,
|
42
|
+
api_key=None,
|
43
|
+
model_kwargs=None,
|
44
|
+
max_tokens=None,
|
45
|
+
response_type="text",
|
46
|
+
tracer={},
|
34
47
|
) -> str:
|
35
48
|
if api_key not in anthropic_clients:
|
36
49
|
client: anthropic.Anthropic = anthropic.Anthropic(api_key=api_key)
|
@@ -39,8 +52,11 @@ def anthropic_completion_with_backoff(
|
|
39
52
|
client = anthropic_clients[api_key]
|
40
53
|
|
41
54
|
formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
|
55
|
+
if response_type == "json_object":
|
56
|
+
# Prefill model response with '{' to make it output a valid JSON object
|
57
|
+
formatted_messages += [{"role": "assistant", "content": "{"}]
|
42
58
|
|
43
|
-
aggregated_response = ""
|
59
|
+
aggregated_response = "{" if response_type == "json_object" else ""
|
44
60
|
max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC
|
45
61
|
|
46
62
|
model_kwargs = model_kwargs or dict()
|
@@ -58,6 +74,12 @@ def anthropic_completion_with_backoff(
|
|
58
74
|
for text in stream.text_stream:
|
59
75
|
aggregated_response += text
|
60
76
|
|
77
|
+
# Save conversation trace
|
78
|
+
tracer["chat_model"] = model_name
|
79
|
+
tracer["temperature"] = temperature
|
80
|
+
if in_debug_mode() or state.verbose > 1:
|
81
|
+
commit_conversation_trace(messages, aggregated_response, tracer)
|
82
|
+
|
61
83
|
return aggregated_response
|
62
84
|
|
63
85
|
|
@@ -78,18 +100,19 @@ def anthropic_chat_completion_with_backoff(
|
|
78
100
|
max_prompt_size=None,
|
79
101
|
completion_func=None,
|
80
102
|
model_kwargs=None,
|
103
|
+
tracer={},
|
81
104
|
):
|
82
105
|
g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
|
83
106
|
t = Thread(
|
84
107
|
target=anthropic_llm_thread,
|
85
|
-
args=(g, messages, system_prompt, model_name, temperature, api_key, max_prompt_size, model_kwargs),
|
108
|
+
args=(g, messages, system_prompt, model_name, temperature, api_key, max_prompt_size, model_kwargs, tracer),
|
86
109
|
)
|
87
110
|
t.start()
|
88
111
|
return g
|
89
112
|
|
90
113
|
|
91
114
|
def anthropic_llm_thread(
|
92
|
-
g, messages, system_prompt, model_name, temperature, api_key, max_prompt_size=None, model_kwargs=None
|
115
|
+
g, messages, system_prompt, model_name, temperature, api_key, max_prompt_size=None, model_kwargs=None, tracer={}
|
93
116
|
):
|
94
117
|
try:
|
95
118
|
if api_key not in anthropic_clients:
|
@@ -102,6 +125,7 @@ def anthropic_llm_thread(
|
|
102
125
|
anthropic.types.MessageParam(role=message.role, content=message.content) for message in messages
|
103
126
|
]
|
104
127
|
|
128
|
+
aggregated_response = ""
|
105
129
|
with client.messages.stream(
|
106
130
|
messages=formatted_messages,
|
107
131
|
model=model_name, # type: ignore
|
@@ -112,7 +136,14 @@ def anthropic_llm_thread(
|
|
112
136
|
**(model_kwargs or dict()),
|
113
137
|
) as stream:
|
114
138
|
for text in stream.text_stream:
|
139
|
+
aggregated_response += text
|
115
140
|
g.send(text)
|
141
|
+
|
142
|
+
# Save conversation trace
|
143
|
+
tracer["chat_model"] = model_name
|
144
|
+
tracer["temperature"] = temperature
|
145
|
+
if in_debug_mode() or state.verbose > 1:
|
146
|
+
commit_conversation_trace(messages, aggregated_response, tracer)
|
116
147
|
except Exception as e:
|
117
148
|
logger.error(f"Error in anthropic_llm_thread: {e}", exc_info=True)
|
118
149
|
finally:
|
@@ -14,11 +14,13 @@ from khoj.processor.conversation.google.utils import (
|
|
14
14
|
gemini_completion_with_backoff,
|
15
15
|
)
|
16
16
|
from khoj.processor.conversation.utils import (
|
17
|
+
clean_json,
|
17
18
|
construct_structured_message,
|
18
19
|
generate_chatml_messages_with_context,
|
19
20
|
)
|
20
21
|
from khoj.utils.helpers import ConversationCommand, is_none_or_empty
|
21
22
|
from khoj.utils.rawconfig import LocationData
|
23
|
+
from khoj.utils.yaml import yaml_dump
|
22
24
|
|
23
25
|
logger = logging.getLogger(__name__)
|
24
26
|
|
@@ -35,6 +37,7 @@ def extract_questions_gemini(
|
|
35
37
|
query_images: Optional[list[str]] = None,
|
36
38
|
vision_enabled: bool = False,
|
37
39
|
personality_context: Optional[str] = None,
|
40
|
+
tracer: dict = {},
|
38
41
|
):
|
39
42
|
"""
|
40
43
|
Infer search queries to retrieve relevant notes to answer user query
|
@@ -85,15 +88,12 @@ def extract_questions_gemini(
|
|
85
88
|
messages = [ChatMessage(content=prompt, role="user"), ChatMessage(content=system_prompt, role="system")]
|
86
89
|
|
87
90
|
response = gemini_send_message_to_model(
|
88
|
-
messages, api_key, model, response_type="json_object", temperature=temperature
|
91
|
+
messages, api_key, model, response_type="json_object", temperature=temperature, tracer=tracer
|
89
92
|
)
|
90
93
|
|
91
94
|
# Extract, Clean Message from Gemini's Response
|
92
95
|
try:
|
93
|
-
response = response
|
94
|
-
match = re.search(r"\{.*?\}", response)
|
95
|
-
if match:
|
96
|
-
response = match.group()
|
96
|
+
response = clean_json(response)
|
97
97
|
response = json.loads(response)
|
98
98
|
response = [q.strip() for q in response["queries"] if q.strip()]
|
99
99
|
if not isinstance(response, list) or not response:
|
@@ -107,15 +107,19 @@ def extract_questions_gemini(
|
|
107
107
|
return questions
|
108
108
|
|
109
109
|
|
110
|
-
def gemini_send_message_to_model(
|
110
|
+
def gemini_send_message_to_model(
|
111
|
+
messages, api_key, model, response_type="text", temperature=0, model_kwargs=None, tracer={}
|
112
|
+
):
|
111
113
|
"""
|
112
114
|
Send message to model
|
113
115
|
"""
|
114
116
|
messages, system_prompt = format_messages_for_gemini(messages)
|
115
117
|
|
116
118
|
model_kwargs = {}
|
117
|
-
|
118
|
-
|
119
|
+
|
120
|
+
# Sometimes, this causes unwanted behavior and terminates response early. Disable for now while it's flaky.
|
121
|
+
# if response_type == "json_object":
|
122
|
+
# model_kwargs["response_mime_type"] = "application/json"
|
119
123
|
|
120
124
|
# Get Response from Gemini
|
121
125
|
return gemini_completion_with_backoff(
|
@@ -125,6 +129,7 @@ def gemini_send_message_to_model(messages, api_key, model, response_type="text",
|
|
125
129
|
api_key=api_key,
|
126
130
|
temperature=temperature,
|
127
131
|
model_kwargs=model_kwargs,
|
132
|
+
tracer=tracer,
|
128
133
|
)
|
129
134
|
|
130
135
|
|
@@ -132,6 +137,7 @@ def converse_gemini(
|
|
132
137
|
references,
|
133
138
|
user_query,
|
134
139
|
online_results: Optional[Dict[str, Dict]] = None,
|
140
|
+
code_results: Optional[Dict[str, Dict]] = None,
|
135
141
|
conversation_log={},
|
136
142
|
model: Optional[str] = "gemini-1.5-flash",
|
137
143
|
api_key: Optional[str] = None,
|
@@ -145,13 +151,13 @@ def converse_gemini(
|
|
145
151
|
agent: Agent = None,
|
146
152
|
query_images: Optional[list[str]] = None,
|
147
153
|
vision_available: bool = False,
|
154
|
+
tracer={},
|
148
155
|
):
|
149
156
|
"""
|
150
157
|
Converse with user using Google's Gemini
|
151
158
|
"""
|
152
159
|
# Initialize Variables
|
153
160
|
current_date = datetime.now()
|
154
|
-
compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
|
155
161
|
|
156
162
|
if agent and agent.personality:
|
157
163
|
system_prompt = prompts.custom_personality.format(
|
@@ -176,7 +182,7 @@ def converse_gemini(
|
|
176
182
|
system_prompt = f"{system_prompt}\n{user_name_prompt}"
|
177
183
|
|
178
184
|
# Get Conversation Primer appropriate to Conversation Type
|
179
|
-
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(
|
185
|
+
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
|
180
186
|
completion_func(chat_response=prompts.no_notes_found.format())
|
181
187
|
return iter([prompts.no_notes_found.format()])
|
182
188
|
elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
|
@@ -184,10 +190,13 @@ def converse_gemini(
|
|
184
190
|
return iter([prompts.no_online_results_found.format()])
|
185
191
|
|
186
192
|
context_message = ""
|
187
|
-
if not is_none_or_empty(
|
188
|
-
context_message = f"{prompts.notes_conversation.format(query=user_query, references=
|
193
|
+
if not is_none_or_empty(references):
|
194
|
+
context_message = f"{prompts.notes_conversation.format(query=user_query, references=yaml_dump(references))}\n\n"
|
189
195
|
if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
|
190
|
-
context_message += f"{prompts.online_search_conversation.format(online_results=
|
196
|
+
context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
|
197
|
+
if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
|
198
|
+
context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
|
199
|
+
context_message = context_message.strip()
|
191
200
|
|
192
201
|
# Setup Prompt with Primer or Conversation History
|
193
202
|
messages = generate_chatml_messages_with_context(
|
@@ -217,4 +226,5 @@ def converse_gemini(
|
|
217
226
|
api_key=api_key,
|
218
227
|
system_prompt=system_prompt,
|
219
228
|
completion_func=completion_func,
|
229
|
+
tracer=tracer,
|
220
230
|
)
|
@@ -19,8 +19,13 @@ from tenacity import (
|
|
19
19
|
wait_random_exponential,
|
20
20
|
)
|
21
21
|
|
22
|
-
from khoj.processor.conversation.utils import
|
23
|
-
|
22
|
+
from khoj.processor.conversation.utils import (
|
23
|
+
ThreadedGenerator,
|
24
|
+
commit_conversation_trace,
|
25
|
+
get_image_from_url,
|
26
|
+
)
|
27
|
+
from khoj.utils import state
|
28
|
+
from khoj.utils.helpers import in_debug_mode, is_none_or_empty
|
24
29
|
|
25
30
|
logger = logging.getLogger(__name__)
|
26
31
|
|
@@ -35,7 +40,7 @@ MAX_OUTPUT_TOKENS_GEMINI = 8192
|
|
35
40
|
reraise=True,
|
36
41
|
)
|
37
42
|
def gemini_completion_with_backoff(
|
38
|
-
messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None
|
43
|
+
messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None, tracer={}
|
39
44
|
) -> str:
|
40
45
|
genai.configure(api_key=api_key)
|
41
46
|
model_kwargs = model_kwargs or dict()
|
@@ -60,16 +65,23 @@ def gemini_completion_with_backoff(
|
|
60
65
|
|
61
66
|
try:
|
62
67
|
# Generate the response. The last message is considered to be the current prompt
|
63
|
-
|
64
|
-
|
68
|
+
response = chat_session.send_message(formatted_messages[-1]["parts"])
|
69
|
+
response_text = response.text
|
65
70
|
except StopCandidateException as e:
|
66
|
-
|
71
|
+
response_text, _ = handle_gemini_response(e.args)
|
67
72
|
# Respond with reason for stopping
|
68
73
|
logger.warning(
|
69
|
-
f"LLM Response Prevented for {model_name}: {
|
74
|
+
f"LLM Response Prevented for {model_name}: {response_text}.\n"
|
70
75
|
+ f"Last Message by {messages[-1].role}: {messages[-1].content}"
|
71
76
|
)
|
72
|
-
|
77
|
+
|
78
|
+
# Save conversation trace
|
79
|
+
tracer["chat_model"] = model_name
|
80
|
+
tracer["temperature"] = temperature
|
81
|
+
if in_debug_mode() or state.verbose > 1:
|
82
|
+
commit_conversation_trace(messages, response_text, tracer)
|
83
|
+
|
84
|
+
return response_text
|
73
85
|
|
74
86
|
|
75
87
|
@retry(
|
@@ -88,17 +100,20 @@ def gemini_chat_completion_with_backoff(
|
|
88
100
|
system_prompt,
|
89
101
|
completion_func=None,
|
90
102
|
model_kwargs=None,
|
103
|
+
tracer: dict = {},
|
91
104
|
):
|
92
105
|
g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
|
93
106
|
t = Thread(
|
94
107
|
target=gemini_llm_thread,
|
95
|
-
args=(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs),
|
108
|
+
args=(g, messages, system_prompt, model_name, temperature, api_key, model_kwargs, tracer),
|
96
109
|
)
|
97
110
|
t.start()
|
98
111
|
return g
|
99
112
|
|
100
113
|
|
101
|
-
def gemini_llm_thread(
|
114
|
+
def gemini_llm_thread(
|
115
|
+
g, messages, system_prompt, model_name, temperature, api_key, model_kwargs=None, tracer: dict = {}
|
116
|
+
):
|
102
117
|
try:
|
103
118
|
genai.configure(api_key=api_key)
|
104
119
|
model_kwargs = model_kwargs or dict()
|
@@ -117,16 +132,25 @@ def gemini_llm_thread(g, messages, system_prompt, model_name, temperature, api_k
|
|
117
132
|
},
|
118
133
|
)
|
119
134
|
|
135
|
+
aggregated_response = ""
|
120
136
|
formatted_messages = [{"role": message.role, "parts": message.content} for message in messages]
|
137
|
+
|
121
138
|
# all messages up to the last are considered to be part of the chat history
|
122
139
|
chat_session = model.start_chat(history=formatted_messages[0:-1])
|
123
140
|
# the last message is considered to be the current prompt
|
124
141
|
for chunk in chat_session.send_message(formatted_messages[-1]["parts"], stream=True):
|
125
142
|
message, stopped = handle_gemini_response(chunk.candidates, chunk.prompt_feedback)
|
126
143
|
message = message or chunk.text
|
144
|
+
aggregated_response += message
|
127
145
|
g.send(message)
|
128
146
|
if stopped:
|
129
147
|
raise StopCandidateException(message)
|
148
|
+
|
149
|
+
# Save conversation trace
|
150
|
+
tracer["chat_model"] = model_name
|
151
|
+
tracer["temperature"] = temperature
|
152
|
+
if in_debug_mode() or state.verbose > 1:
|
153
|
+
commit_conversation_trace(messages, aggregated_response, tracer)
|
130
154
|
except StopCandidateException as e:
|
131
155
|
logger.warning(
|
132
156
|
f"LLM Response Prevented for {model_name}: {e.args[0]}.\n"
|
@@ -1,5 +1,6 @@
|
|
1
1
|
import json
|
2
2
|
import logging
|
3
|
+
import os
|
3
4
|
from datetime import datetime, timedelta
|
4
5
|
from threading import Thread
|
5
6
|
from typing import Any, Iterator, List, Optional, Union
|
@@ -12,12 +13,14 @@ from khoj.processor.conversation import prompts
|
|
12
13
|
from khoj.processor.conversation.offline.utils import download_model
|
13
14
|
from khoj.processor.conversation.utils import (
|
14
15
|
ThreadedGenerator,
|
16
|
+
commit_conversation_trace,
|
15
17
|
generate_chatml_messages_with_context,
|
16
18
|
)
|
17
19
|
from khoj.utils import state
|
18
20
|
from khoj.utils.constants import empty_escape_sequences
|
19
|
-
from khoj.utils.helpers import ConversationCommand, is_none_or_empty
|
21
|
+
from khoj.utils.helpers import ConversationCommand, in_debug_mode, is_none_or_empty
|
20
22
|
from khoj.utils.rawconfig import LocationData
|
23
|
+
from khoj.utils.yaml import yaml_dump
|
21
24
|
|
22
25
|
logger = logging.getLogger(__name__)
|
23
26
|
|
@@ -34,6 +37,7 @@ def extract_questions_offline(
|
|
34
37
|
max_prompt_size: int = None,
|
35
38
|
temperature: float = 0.7,
|
36
39
|
personality_context: Optional[str] = None,
|
40
|
+
tracer: dict = {},
|
37
41
|
) -> List[str]:
|
38
42
|
"""
|
39
43
|
Infer search queries to retrieve relevant notes to answer user query
|
@@ -94,6 +98,7 @@ def extract_questions_offline(
|
|
94
98
|
max_prompt_size=max_prompt_size,
|
95
99
|
temperature=temperature,
|
96
100
|
response_type="json_object",
|
101
|
+
tracer=tracer,
|
97
102
|
)
|
98
103
|
finally:
|
99
104
|
state.chat_lock.release()
|
@@ -135,7 +140,8 @@ def filter_questions(questions: List[str]):
|
|
135
140
|
def converse_offline(
|
136
141
|
user_query,
|
137
142
|
references=[],
|
138
|
-
online_results=
|
143
|
+
online_results={},
|
144
|
+
code_results={},
|
139
145
|
conversation_log={},
|
140
146
|
model: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
|
141
147
|
loaded_model: Union[Any, None] = None,
|
@@ -146,6 +152,7 @@ def converse_offline(
|
|
146
152
|
location_data: LocationData = None,
|
147
153
|
user_name: str = None,
|
148
154
|
agent: Agent = None,
|
155
|
+
tracer: dict = {},
|
149
156
|
) -> Union[ThreadedGenerator, Iterator[str]]:
|
150
157
|
"""
|
151
158
|
Converse with user using Llama
|
@@ -153,8 +160,7 @@ def converse_offline(
|
|
153
160
|
# Initialize Variables
|
154
161
|
assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
|
155
162
|
offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
|
156
|
-
|
157
|
-
|
163
|
+
tracer["chat_model"] = model
|
158
164
|
current_date = datetime.now()
|
159
165
|
|
160
166
|
if agent and agent.personality:
|
@@ -179,24 +185,25 @@ def converse_offline(
|
|
179
185
|
system_prompt = f"{system_prompt}\n{user_name_prompt}"
|
180
186
|
|
181
187
|
# Get Conversation Primer appropriate to Conversation Type
|
182
|
-
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(
|
188
|
+
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
|
183
189
|
return iter([prompts.no_notes_found.format()])
|
184
190
|
elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
|
185
191
|
completion_func(chat_response=prompts.no_online_results_found.format())
|
186
192
|
return iter([prompts.no_online_results_found.format()])
|
187
193
|
|
188
194
|
context_message = ""
|
189
|
-
if not is_none_or_empty(
|
190
|
-
context_message
|
195
|
+
if not is_none_or_empty(references):
|
196
|
+
context_message = f"{prompts.notes_conversation_offline.format(references=yaml_dump(references))}\n\n"
|
191
197
|
if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
|
192
198
|
simplified_online_results = online_results.copy()
|
193
199
|
for result in online_results:
|
194
200
|
if online_results[result].get("webpages"):
|
195
201
|
simplified_online_results[result] = online_results[result]["webpages"]
|
196
202
|
|
197
|
-
context_message += (
|
198
|
-
|
199
|
-
)
|
203
|
+
context_message += f"{prompts.online_search_conversation_offline.format(online_results=yaml_dump(simplified_online_results))}\n\n"
|
204
|
+
if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
|
205
|
+
context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
|
206
|
+
context_message = context_message.strip()
|
200
207
|
|
201
208
|
# Setup Prompt with Primer or Conversation History
|
202
209
|
messages = generate_chatml_messages_with_context(
|
@@ -215,13 +222,14 @@ def converse_offline(
|
|
215
222
|
logger.debug(f"Conversation Context for {model}: {truncated_messages}")
|
216
223
|
|
217
224
|
g = ThreadedGenerator(references, online_results, completion_func=completion_func)
|
218
|
-
t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size))
|
225
|
+
t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size, tracer))
|
219
226
|
t.start()
|
220
227
|
return g
|
221
228
|
|
222
229
|
|
223
|
-
def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int = None):
|
230
|
+
def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int = None, tracer: dict = {}):
|
224
231
|
stop_phrases = ["<s>", "INST]", "Notes:"]
|
232
|
+
aggregated_response = ""
|
225
233
|
|
226
234
|
state.chat_lock.acquire()
|
227
235
|
try:
|
@@ -229,7 +237,14 @@ def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int
|
|
229
237
|
messages, loaded_model=model, stop=stop_phrases, max_prompt_size=max_prompt_size, streaming=True
|
230
238
|
)
|
231
239
|
for response in response_iterator:
|
232
|
-
|
240
|
+
response_delta = response["choices"][0]["delta"].get("content", "")
|
241
|
+
aggregated_response += response_delta
|
242
|
+
g.send(response_delta)
|
243
|
+
|
244
|
+
# Save conversation trace
|
245
|
+
if in_debug_mode() or state.verbose > 1:
|
246
|
+
commit_conversation_trace(messages, aggregated_response, tracer)
|
247
|
+
|
233
248
|
finally:
|
234
249
|
state.chat_lock.release()
|
235
250
|
g.close()
|
@@ -244,14 +259,31 @@ def send_message_to_model_offline(
|
|
244
259
|
stop=[],
|
245
260
|
max_prompt_size: int = None,
|
246
261
|
response_type: str = "text",
|
262
|
+
tracer: dict = {},
|
247
263
|
):
|
248
264
|
assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
|
249
265
|
offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
|
250
266
|
messages_dict = [{"role": message.role, "content": message.content} for message in messages]
|
267
|
+
seed = int(os.getenv("KHOJ_LLM_SEED")) if os.getenv("KHOJ_LLM_SEED") else None
|
251
268
|
response = offline_chat_model.create_chat_completion(
|
252
|
-
messages_dict,
|
269
|
+
messages_dict,
|
270
|
+
stop=stop,
|
271
|
+
stream=streaming,
|
272
|
+
temperature=temperature,
|
273
|
+
response_format={"type": response_type},
|
274
|
+
seed=seed,
|
253
275
|
)
|
276
|
+
|
254
277
|
if streaming:
|
255
278
|
return response
|
256
|
-
|
257
|
-
|
279
|
+
|
280
|
+
response_text = response["choices"][0]["message"].get("content", "")
|
281
|
+
|
282
|
+
# Save conversation trace for non-streaming responses
|
283
|
+
# Streamed responses need to be saved by the calling function
|
284
|
+
tracer["chat_model"] = model
|
285
|
+
tracer["temperature"] = temperature
|
286
|
+
if in_debug_mode() or state.verbose > 1:
|
287
|
+
commit_conversation_trace(messages, response_text, tracer)
|
288
|
+
|
289
|
+
return response_text
|