khoj 1.42.1.dev8__py3-none-any.whl → 1.42.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +2 -0
- khoj/database/adapters/__init__.py +9 -7
- khoj/database/models/__init__.py +9 -9
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/{2117-5a41630a2bd2eae8.js → 2117-056a00add390772b.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/7127-79a3af5138960272.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{5138-2cce449fd2454abf.js → 7211-7fedd2ee3655239c.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-1b6273baddb72146.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-774c78ff0f55a228.js → page-2fac1d5ac7192e73.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/page-ef89ac958e78aa81.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-d71351493e1f7c2b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/{page-f7a0286dfc31ad6b.js → page-4bbe55de8b080c1f.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-4505b79deb734a30.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/{page-f1a7f278c89e09b6.js → page-afb5e7ed13d221c1.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-5d9134d4a97f8834.js → page-8fb6cc97be8774a7.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-32cd0ceb9ffbd777.js → page-e3f49c25480e3be4.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{main-876327ac335776ab.js → main-63d6432f34cdf74b.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-97e712397e673897.js → webpack-e4c73eaddc365142.js} +1 -1
- khoj/interface/compiled/_next/static/css/2945c4a857922f3b.css +1 -0
- khoj/interface/compiled/_next/static/css/2b1cdb68b799b876.css +1 -0
- khoj/interface/compiled/_next/static/css/440ae0f0f650dc35.css +1 -0
- khoj/interface/compiled/_next/static/css/{9c223d337a984468.css → 7017ee76c2f2cd87.css} +1 -1
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +19 -134
- khoj/processor/conversation/anthropic/utils.py +1 -1
- khoj/processor/conversation/google/gemini_chat.py +20 -141
- khoj/processor/conversation/offline/chat_model.py +23 -153
- khoj/processor/conversation/openai/gpt.py +14 -128
- khoj/processor/conversation/prompts.py +2 -63
- khoj/processor/conversation/utils.py +94 -89
- khoj/processor/image/generate.py +16 -11
- khoj/processor/operator/__init__.py +2 -3
- khoj/processor/operator/operator_agent_binary.py +11 -11
- khoj/processor/operator/operator_environment_computer.py +2 -2
- khoj/processor/tools/online_search.py +9 -3
- khoj/processor/tools/run_code.py +5 -5
- khoj/routers/api.py +5 -527
- khoj/routers/api_automation.py +243 -0
- khoj/routers/api_chat.py +48 -129
- khoj/routers/helpers.py +373 -121
- khoj/routers/research.py +13 -43
- khoj/utils/helpers.py +0 -6
- {khoj-1.42.1.dev8.dist-info → khoj-1.42.2.dist-info}/METADATA +3 -3
- {khoj-1.42.1.dev8.dist-info → khoj-1.42.2.dist-info}/RECORD +63 -62
- khoj/interface/compiled/_next/static/chunks/7127-d3199617463d45f0.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-4e2a134ec26aa606.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/page-4454891c5007b870.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-3c299bf8e6b1afd3.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-f5881c7ae3ba0795.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-abb6c5f4239ad7be.js +0 -1
- khoj/interface/compiled/_next/static/css/0db53bacf81896f5.css +0 -1
- khoj/interface/compiled/_next/static/css/76c658ee459140a9.css +0 -1
- khoj/interface/compiled/_next/static/css/93eeacc43e261162.css +0 -1
- /khoj/interface/compiled/_next/static/{TrHI4J6qnG7RYFl2Irnqj → BDHACq0ud8EERJ3YZ4aWo}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{TrHI4J6qnG7RYFl2Irnqj → BDHACq0ud8EERJ3YZ4aWo}/_ssgManifest.js +0 -0
- {khoj-1.42.1.dev8.dist-info → khoj-1.42.2.dist-info}/WHEEL +0 -0
- {khoj-1.42.1.dev8.dist-info → khoj-1.42.2.dist-info}/entry_points.txt +0 -0
- {khoj-1.42.1.dev8.dist-info → khoj-1.42.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,13 +1,8 @@
|
|
1
|
-
import asyncio
|
2
1
|
import logging
|
3
|
-
from datetime import datetime
|
2
|
+
from datetime import datetime
|
4
3
|
from typing import AsyncGenerator, Dict, List, Optional
|
5
4
|
|
6
|
-
import
|
7
|
-
from langchain_core.messages.chat import ChatMessage
|
8
|
-
from pydantic import BaseModel, Field
|
9
|
-
|
10
|
-
from khoj.database.models import Agent, ChatModel, KhojUser
|
5
|
+
from khoj.database.models import Agent, ChatMessageModel, ChatModel
|
11
6
|
from khoj.processor.conversation import prompts
|
12
7
|
from khoj.processor.conversation.google.utils import (
|
13
8
|
gemini_chat_completion_with_backoff,
|
@@ -15,113 +10,17 @@ from khoj.processor.conversation.google.utils import (
|
|
15
10
|
)
|
16
11
|
from khoj.processor.conversation.utils import (
|
17
12
|
OperatorRun,
|
18
|
-
|
19
|
-
construct_question_history,
|
20
|
-
construct_structured_message,
|
13
|
+
ResponseWithThought,
|
21
14
|
generate_chatml_messages_with_context,
|
22
15
|
messages_to_print,
|
23
16
|
)
|
24
|
-
from khoj.utils.helpers import
|
25
|
-
ConversationCommand,
|
26
|
-
is_none_or_empty,
|
27
|
-
truncate_code_context,
|
28
|
-
)
|
17
|
+
from khoj.utils.helpers import is_none_or_empty, truncate_code_context
|
29
18
|
from khoj.utils.rawconfig import FileAttachment, LocationData
|
30
19
|
from khoj.utils.yaml import yaml_dump
|
31
20
|
|
32
21
|
logger = logging.getLogger(__name__)
|
33
22
|
|
34
23
|
|
35
|
-
def extract_questions_gemini(
|
36
|
-
text,
|
37
|
-
model: Optional[str] = "gemini-2.0-flash",
|
38
|
-
conversation_log={},
|
39
|
-
api_key=None,
|
40
|
-
api_base_url=None,
|
41
|
-
max_tokens=None,
|
42
|
-
location_data: LocationData = None,
|
43
|
-
user: KhojUser = None,
|
44
|
-
query_images: Optional[list[str]] = None,
|
45
|
-
vision_enabled: bool = False,
|
46
|
-
personality_context: Optional[str] = None,
|
47
|
-
query_files: str = None,
|
48
|
-
tracer: dict = {},
|
49
|
-
):
|
50
|
-
"""
|
51
|
-
Infer search queries to retrieve relevant notes to answer user query
|
52
|
-
"""
|
53
|
-
# Extract Past User Message and Inferred Questions from Conversation Log
|
54
|
-
location = f"{location_data}" if location_data else "Unknown"
|
55
|
-
username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""
|
56
|
-
|
57
|
-
# Extract Past User Message and Inferred Questions from Conversation Log
|
58
|
-
chat_history = construct_question_history(conversation_log, query_prefix="User", agent_name="Assistant")
|
59
|
-
|
60
|
-
# Get dates relative to today for prompt creation
|
61
|
-
today = datetime.today()
|
62
|
-
current_new_year = today.replace(month=1, day=1)
|
63
|
-
last_new_year = current_new_year.replace(year=today.year - 1)
|
64
|
-
|
65
|
-
system_prompt = prompts.extract_questions_anthropic_system_prompt.format(
|
66
|
-
current_date=today.strftime("%Y-%m-%d"),
|
67
|
-
day_of_week=today.strftime("%A"),
|
68
|
-
current_month=today.strftime("%Y-%m"),
|
69
|
-
last_new_year=last_new_year.strftime("%Y"),
|
70
|
-
last_new_year_date=last_new_year.strftime("%Y-%m-%d"),
|
71
|
-
current_new_year_date=current_new_year.strftime("%Y-%m-%d"),
|
72
|
-
yesterday_date=(today - timedelta(days=1)).strftime("%Y-%m-%d"),
|
73
|
-
location=location,
|
74
|
-
username=username,
|
75
|
-
personality_context=personality_context,
|
76
|
-
)
|
77
|
-
|
78
|
-
prompt = prompts.extract_questions_anthropic_user_message.format(
|
79
|
-
chat_history=chat_history,
|
80
|
-
text=text,
|
81
|
-
)
|
82
|
-
|
83
|
-
prompt = construct_structured_message(
|
84
|
-
message=prompt,
|
85
|
-
images=query_images,
|
86
|
-
model_type=ChatModel.ModelType.GOOGLE,
|
87
|
-
vision_enabled=vision_enabled,
|
88
|
-
attached_file_context=query_files,
|
89
|
-
)
|
90
|
-
|
91
|
-
messages = []
|
92
|
-
|
93
|
-
messages.append(ChatMessage(content=prompt, role="user"))
|
94
|
-
messages.append(ChatMessage(content=system_prompt, role="system"))
|
95
|
-
|
96
|
-
class DocumentQueries(BaseModel):
|
97
|
-
queries: List[str] = Field(..., min_items=1)
|
98
|
-
|
99
|
-
response = gemini_send_message_to_model(
|
100
|
-
messages,
|
101
|
-
api_key,
|
102
|
-
model,
|
103
|
-
api_base_url=api_base_url,
|
104
|
-
response_type="json_object",
|
105
|
-
response_schema=DocumentQueries,
|
106
|
-
tracer=tracer,
|
107
|
-
)
|
108
|
-
|
109
|
-
# Extract, Clean Message from Gemini's Response
|
110
|
-
try:
|
111
|
-
response = clean_json(response)
|
112
|
-
response = pyjson5.loads(response)
|
113
|
-
response = [q.strip() for q in response["queries"] if q.strip()]
|
114
|
-
if not isinstance(response, list) or not response:
|
115
|
-
logger.error(f"Invalid response for constructing subqueries: {response}")
|
116
|
-
return [text]
|
117
|
-
return response
|
118
|
-
except:
|
119
|
-
logger.warning(f"Gemini returned invalid JSON. Falling back to using user message as search query.\n{response}")
|
120
|
-
questions = [text]
|
121
|
-
logger.debug(f"Extracted Questions by Gemini: {questions}")
|
122
|
-
return questions
|
123
|
-
|
124
|
-
|
125
24
|
def gemini_send_message_to_model(
|
126
25
|
messages,
|
127
26
|
api_key,
|
@@ -158,32 +57,33 @@ def gemini_send_message_to_model(
|
|
158
57
|
|
159
58
|
|
160
59
|
async def converse_gemini(
|
60
|
+
# Query
|
161
61
|
user_query: str,
|
62
|
+
# Context
|
162
63
|
references: list[dict],
|
163
64
|
online_results: Optional[Dict[str, Dict]] = None,
|
164
65
|
code_results: Optional[Dict[str, Dict]] = None,
|
165
66
|
operator_results: Optional[List[OperatorRun]] = None,
|
166
|
-
|
67
|
+
query_images: Optional[list[str]] = None,
|
68
|
+
query_files: str = None,
|
69
|
+
generated_files: List[FileAttachment] = None,
|
70
|
+
generated_asset_results: Dict[str, Dict] = {},
|
71
|
+
program_execution_context: List[str] = None,
|
72
|
+
location_data: LocationData = None,
|
73
|
+
user_name: str = None,
|
74
|
+
chat_history: List[ChatMessageModel] = [],
|
75
|
+
# Model
|
167
76
|
model: Optional[str] = "gemini-2.0-flash",
|
168
77
|
api_key: Optional[str] = None,
|
169
78
|
api_base_url: Optional[str] = None,
|
170
79
|
temperature: float = 1.0,
|
171
|
-
completion_func=None,
|
172
|
-
conversation_commands=[ConversationCommand.Default],
|
173
80
|
max_prompt_size=None,
|
174
81
|
tokenizer_name=None,
|
175
|
-
location_data: LocationData = None,
|
176
|
-
user_name: str = None,
|
177
82
|
agent: Agent = None,
|
178
|
-
query_images: Optional[list[str]] = None,
|
179
83
|
vision_available: bool = False,
|
180
|
-
query_files: str = None,
|
181
|
-
generated_files: List[FileAttachment] = None,
|
182
|
-
generated_asset_results: Dict[str, Dict] = {},
|
183
|
-
program_execution_context: List[str] = None,
|
184
84
|
deepthought: Optional[bool] = False,
|
185
85
|
tracer={},
|
186
|
-
) -> AsyncGenerator[
|
86
|
+
) -> AsyncGenerator[ResponseWithThought, None]:
|
187
87
|
"""
|
188
88
|
Converse with user using Google's Gemini
|
189
89
|
"""
|
@@ -212,30 +112,16 @@ async def converse_gemini(
|
|
212
112
|
user_name_prompt = prompts.user_name.format(name=user_name)
|
213
113
|
system_prompt = f"{system_prompt}\n{user_name_prompt}"
|
214
114
|
|
215
|
-
# Get Conversation Primer appropriate to Conversation Type
|
216
|
-
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
|
217
|
-
response = prompts.no_notes_found.format()
|
218
|
-
if completion_func:
|
219
|
-
asyncio.create_task(completion_func(chat_response=response))
|
220
|
-
yield response
|
221
|
-
return
|
222
|
-
elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
|
223
|
-
response = prompts.no_online_results_found.format()
|
224
|
-
if completion_func:
|
225
|
-
asyncio.create_task(completion_func(chat_response=response))
|
226
|
-
yield response
|
227
|
-
return
|
228
|
-
|
229
115
|
context_message = ""
|
230
116
|
if not is_none_or_empty(references):
|
231
117
|
context_message = f"{prompts.notes_conversation.format(query=user_query, references=yaml_dump(references))}\n\n"
|
232
|
-
if
|
118
|
+
if not is_none_or_empty(online_results):
|
233
119
|
context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
|
234
|
-
if
|
120
|
+
if not is_none_or_empty(code_results):
|
235
121
|
context_message += (
|
236
122
|
f"{prompts.code_executed_context.format(code_results=truncate_code_context(code_results))}\n\n"
|
237
123
|
)
|
238
|
-
if
|
124
|
+
if not is_none_or_empty(operator_results):
|
239
125
|
operator_content = [
|
240
126
|
{"query": oc.query, "response": oc.response, "webpages": oc.webpages} for oc in operator_results
|
241
127
|
]
|
@@ -248,7 +134,7 @@ async def converse_gemini(
|
|
248
134
|
messages = generate_chatml_messages_with_context(
|
249
135
|
user_query,
|
250
136
|
context_message=context_message,
|
251
|
-
|
137
|
+
chat_history=chat_history,
|
252
138
|
model_name=model,
|
253
139
|
max_prompt_size=max_prompt_size,
|
254
140
|
tokenizer_name=tokenizer_name,
|
@@ -264,7 +150,6 @@ async def converse_gemini(
|
|
264
150
|
logger.debug(f"Conversation Context for Gemini: {messages_to_print(messages)}")
|
265
151
|
|
266
152
|
# Get Response from Google AI
|
267
|
-
full_response = ""
|
268
153
|
async for chunk in gemini_chat_completion_with_backoff(
|
269
154
|
messages=messages,
|
270
155
|
model_name=model,
|
@@ -275,10 +160,4 @@ async def converse_gemini(
|
|
275
160
|
deepthought=deepthought,
|
276
161
|
tracer=tracer,
|
277
162
|
):
|
278
|
-
if chunk.response:
|
279
|
-
full_response += chunk.response
|
280
163
|
yield chunk
|
281
|
-
|
282
|
-
# Call completion_func once finish streaming and we have the full response
|
283
|
-
if completion_func:
|
284
|
-
asyncio.create_task(completion_func(chat_response=full_response))
|
@@ -1,29 +1,25 @@
|
|
1
1
|
import asyncio
|
2
2
|
import logging
|
3
3
|
import os
|
4
|
-
from datetime import datetime
|
4
|
+
from datetime import datetime
|
5
5
|
from threading import Thread
|
6
6
|
from time import perf_counter
|
7
|
-
from typing import Any, AsyncGenerator, Dict, List,
|
7
|
+
from typing import Any, AsyncGenerator, Dict, List, Union
|
8
8
|
|
9
|
-
import pyjson5
|
10
9
|
from langchain_core.messages.chat import ChatMessage
|
11
10
|
from llama_cpp import Llama
|
12
11
|
|
13
|
-
from khoj.database.models import Agent,
|
12
|
+
from khoj.database.models import Agent, ChatMessageModel, ChatModel
|
14
13
|
from khoj.processor.conversation import prompts
|
15
14
|
from khoj.processor.conversation.offline.utils import download_model
|
16
15
|
from khoj.processor.conversation.utils import (
|
17
|
-
|
16
|
+
ResponseWithThought,
|
18
17
|
commit_conversation_trace,
|
19
|
-
construct_question_history,
|
20
18
|
generate_chatml_messages_with_context,
|
21
19
|
messages_to_print,
|
22
20
|
)
|
23
21
|
from khoj.utils import state
|
24
|
-
from khoj.utils.constants import empty_escape_sequences
|
25
22
|
from khoj.utils.helpers import (
|
26
|
-
ConversationCommand,
|
27
23
|
is_none_or_empty,
|
28
24
|
is_promptrace_enabled,
|
29
25
|
truncate_code_context,
|
@@ -34,135 +30,28 @@ from khoj.utils.yaml import yaml_dump
|
|
34
30
|
logger = logging.getLogger(__name__)
|
35
31
|
|
36
32
|
|
37
|
-
def extract_questions_offline(
|
38
|
-
text: str,
|
39
|
-
model: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
|
40
|
-
loaded_model: Union[Any, None] = None,
|
41
|
-
conversation_log={},
|
42
|
-
use_history: bool = True,
|
43
|
-
should_extract_questions: bool = True,
|
44
|
-
location_data: LocationData = None,
|
45
|
-
user: KhojUser = None,
|
46
|
-
max_prompt_size: int = None,
|
47
|
-
temperature: float = 0.7,
|
48
|
-
personality_context: Optional[str] = None,
|
49
|
-
query_files: str = None,
|
50
|
-
tracer: dict = {},
|
51
|
-
) -> List[str]:
|
52
|
-
"""
|
53
|
-
Infer search queries to retrieve relevant notes to answer user query
|
54
|
-
"""
|
55
|
-
all_questions = text.split("? ")
|
56
|
-
all_questions = [q + "?" for q in all_questions[:-1]] + [all_questions[-1]]
|
57
|
-
|
58
|
-
if not should_extract_questions:
|
59
|
-
return all_questions
|
60
|
-
|
61
|
-
assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
|
62
|
-
offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
|
63
|
-
|
64
|
-
location = f"{location_data}" if location_data else "Unknown"
|
65
|
-
username = prompts.user_name.format(name=user.get_full_name()) if user and user.get_full_name() else ""
|
66
|
-
|
67
|
-
# Extract Past User Message and Inferred Questions from Conversation Log
|
68
|
-
chat_history = construct_question_history(conversation_log, include_query=False) if use_history else ""
|
69
|
-
|
70
|
-
# Get dates relative to today for prompt creation
|
71
|
-
today = datetime.today()
|
72
|
-
yesterday = (today - timedelta(days=1)).strftime("%Y-%m-%d")
|
73
|
-
last_year = today.year - 1
|
74
|
-
example_questions = prompts.extract_questions_offline.format(
|
75
|
-
query=text,
|
76
|
-
chat_history=chat_history,
|
77
|
-
current_date=today.strftime("%Y-%m-%d"),
|
78
|
-
day_of_week=today.strftime("%A"),
|
79
|
-
current_month=today.strftime("%Y-%m"),
|
80
|
-
yesterday_date=yesterday,
|
81
|
-
last_year=last_year,
|
82
|
-
this_year=today.year,
|
83
|
-
location=location,
|
84
|
-
username=username,
|
85
|
-
personality_context=personality_context,
|
86
|
-
)
|
87
|
-
|
88
|
-
messages = generate_chatml_messages_with_context(
|
89
|
-
example_questions,
|
90
|
-
model_name=model,
|
91
|
-
loaded_model=offline_chat_model,
|
92
|
-
max_prompt_size=max_prompt_size,
|
93
|
-
model_type=ChatModel.ModelType.OFFLINE,
|
94
|
-
query_files=query_files,
|
95
|
-
)
|
96
|
-
|
97
|
-
state.chat_lock.acquire()
|
98
|
-
try:
|
99
|
-
response = send_message_to_model_offline(
|
100
|
-
messages,
|
101
|
-
loaded_model=offline_chat_model,
|
102
|
-
model_name=model,
|
103
|
-
max_prompt_size=max_prompt_size,
|
104
|
-
temperature=temperature,
|
105
|
-
response_type="json_object",
|
106
|
-
tracer=tracer,
|
107
|
-
)
|
108
|
-
finally:
|
109
|
-
state.chat_lock.release()
|
110
|
-
|
111
|
-
# Extract and clean the chat model's response
|
112
|
-
try:
|
113
|
-
response = clean_json(empty_escape_sequences)
|
114
|
-
response = pyjson5.loads(response)
|
115
|
-
questions = [q.strip() for q in response["queries"] if q.strip()]
|
116
|
-
questions = filter_questions(questions)
|
117
|
-
except:
|
118
|
-
logger.warning(f"Llama returned invalid JSON. Falling back to using user message as search query.\n{response}")
|
119
|
-
return all_questions
|
120
|
-
logger.debug(f"Questions extracted by {model}: {questions}")
|
121
|
-
return questions
|
122
|
-
|
123
|
-
|
124
|
-
def filter_questions(questions: List[str]):
|
125
|
-
# Skip questions that seem to be apologizing for not being able to answer the question
|
126
|
-
hint_words = [
|
127
|
-
"sorry",
|
128
|
-
"apologize",
|
129
|
-
"unable",
|
130
|
-
"can't",
|
131
|
-
"cannot",
|
132
|
-
"don't know",
|
133
|
-
"don't understand",
|
134
|
-
"do not know",
|
135
|
-
"do not understand",
|
136
|
-
]
|
137
|
-
filtered_questions = set()
|
138
|
-
for q in questions:
|
139
|
-
if not any([word in q.lower() for word in hint_words]) and not is_none_or_empty(q):
|
140
|
-
filtered_questions.add(q)
|
141
|
-
|
142
|
-
return list(filtered_questions)
|
143
|
-
|
144
|
-
|
145
33
|
async def converse_offline(
|
34
|
+
# Query
|
146
35
|
user_query: str,
|
36
|
+
# Context
|
147
37
|
references: list[dict] = [],
|
148
38
|
online_results={},
|
149
39
|
code_results={},
|
150
|
-
|
40
|
+
query_files: str = None,
|
41
|
+
generated_files: List[FileAttachment] = None,
|
42
|
+
additional_context: List[str] = None,
|
43
|
+
generated_asset_results: Dict[str, Dict] = {},
|
44
|
+
location_data: LocationData = None,
|
45
|
+
user_name: str = None,
|
46
|
+
chat_history: list[ChatMessageModel] = [],
|
47
|
+
# Model
|
151
48
|
model_name: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
|
152
49
|
loaded_model: Union[Any, None] = None,
|
153
|
-
completion_func=None,
|
154
|
-
conversation_commands=[ConversationCommand.Default],
|
155
50
|
max_prompt_size=None,
|
156
51
|
tokenizer_name=None,
|
157
|
-
location_data: LocationData = None,
|
158
|
-
user_name: str = None,
|
159
52
|
agent: Agent = None,
|
160
|
-
query_files: str = None,
|
161
|
-
generated_files: List[FileAttachment] = None,
|
162
|
-
additional_context: List[str] = None,
|
163
|
-
generated_asset_results: Dict[str, Dict] = {},
|
164
53
|
tracer: dict = {},
|
165
|
-
) -> AsyncGenerator[
|
54
|
+
) -> AsyncGenerator[ResponseWithThought, None]:
|
166
55
|
"""
|
167
56
|
Converse with user using Llama (Async Version)
|
168
57
|
"""
|
@@ -194,30 +83,17 @@ async def converse_offline(
|
|
194
83
|
system_prompt = f"{system_prompt}\n{user_name_prompt}"
|
195
84
|
|
196
85
|
# Get Conversation Primer appropriate to Conversation Type
|
197
|
-
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(references):
|
198
|
-
response = prompts.no_notes_found.format()
|
199
|
-
if completion_func:
|
200
|
-
asyncio.create_task(completion_func(chat_response=response))
|
201
|
-
yield response
|
202
|
-
return
|
203
|
-
elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
|
204
|
-
response = prompts.no_online_results_found.format()
|
205
|
-
if completion_func:
|
206
|
-
asyncio.create_task(completion_func(chat_response=response))
|
207
|
-
yield response
|
208
|
-
return
|
209
|
-
|
210
86
|
context_message = ""
|
211
87
|
if not is_none_or_empty(references):
|
212
88
|
context_message = f"{prompts.notes_conversation_offline.format(references=yaml_dump(references))}\n\n"
|
213
|
-
if
|
89
|
+
if not is_none_or_empty(online_results):
|
214
90
|
simplified_online_results = online_results.copy()
|
215
91
|
for result in online_results:
|
216
92
|
if online_results[result].get("webpages"):
|
217
93
|
simplified_online_results[result] = online_results[result]["webpages"]
|
218
94
|
|
219
95
|
context_message += f"{prompts.online_search_conversation_offline.format(online_results=yaml_dump(simplified_online_results))}\n\n"
|
220
|
-
if
|
96
|
+
if not is_none_or_empty(code_results):
|
221
97
|
context_message += (
|
222
98
|
f"{prompts.code_executed_context.format(code_results=truncate_code_context(code_results))}\n\n"
|
223
99
|
)
|
@@ -227,7 +103,7 @@ async def converse_offline(
|
|
227
103
|
messages = generate_chatml_messages_with_context(
|
228
104
|
user_query,
|
229
105
|
system_prompt,
|
230
|
-
|
106
|
+
chat_history,
|
231
107
|
context_message=context_message,
|
232
108
|
model_name=model_name,
|
233
109
|
loaded_model=offline_chat_model,
|
@@ -243,9 +119,8 @@ async def converse_offline(
|
|
243
119
|
logger.debug(f"Conversation Context for {model_name}: {messages_to_print(messages)}")
|
244
120
|
|
245
121
|
# Use asyncio.Queue and a thread to bridge sync iterator
|
246
|
-
queue: asyncio.Queue = asyncio.Queue()
|
122
|
+
queue: asyncio.Queue[ResponseWithThought] = asyncio.Queue()
|
247
123
|
stop_phrases = ["<s>", "INST]", "Notes:"]
|
248
|
-
aggregated_response_container = {"response": ""}
|
249
124
|
|
250
125
|
def _sync_llm_thread():
|
251
126
|
"""Synchronous function to run in a separate thread."""
|
@@ -262,7 +137,7 @@ async def converse_offline(
|
|
262
137
|
tracer=tracer,
|
263
138
|
)
|
264
139
|
for response in response_iterator:
|
265
|
-
response_delta = response["choices"][0]["delta"].get("content", "")
|
140
|
+
response_delta: str = response["choices"][0]["delta"].get("content", "")
|
266
141
|
# Log the time taken to start response
|
267
142
|
if aggregated_response == "" and response_delta != "":
|
268
143
|
logger.info(f"First response took: {perf_counter() - start_time:.3f} seconds")
|
@@ -270,12 +145,12 @@ async def converse_offline(
|
|
270
145
|
aggregated_response += response_delta
|
271
146
|
# Put chunk into the asyncio queue (non-blocking)
|
272
147
|
try:
|
273
|
-
queue.put_nowait(response_delta)
|
148
|
+
queue.put_nowait(ResponseWithThought(response=response_delta))
|
274
149
|
except asyncio.QueueFull:
|
275
150
|
# Should not happen with default queue size unless consumer is very slow
|
276
151
|
logger.warning("Asyncio queue full during offline LLM streaming.")
|
277
152
|
# Potentially block here or handle differently if needed
|
278
|
-
asyncio.run(queue.put(response_delta))
|
153
|
+
asyncio.run(queue.put(ResponseWithThought(response=response_delta)))
|
279
154
|
|
280
155
|
# Log the time taken to stream the entire response
|
281
156
|
logger.info(f"Chat streaming took: {perf_counter() - start_time:.3f} seconds")
|
@@ -291,7 +166,6 @@ async def converse_offline(
|
|
291
166
|
state.chat_lock.release()
|
292
167
|
# Signal end of stream
|
293
168
|
queue.put_nowait(None)
|
294
|
-
aggregated_response_container["response"] = aggregated_response
|
295
169
|
|
296
170
|
# Start the synchronous thread
|
297
171
|
thread = Thread(target=_sync_llm_thread)
|
@@ -310,10 +184,6 @@ async def converse_offline(
|
|
310
184
|
loop = asyncio.get_running_loop()
|
311
185
|
await loop.run_in_executor(None, thread.join)
|
312
186
|
|
313
|
-
# Call the completion function after streaming is done
|
314
|
-
if completion_func:
|
315
|
-
asyncio.create_task(completion_func(chat_response=aggregated_response_container["response"]))
|
316
|
-
|
317
187
|
|
318
188
|
def send_message_to_model_offline(
|
319
189
|
messages: List[ChatMessage],
|
@@ -342,7 +212,7 @@ def send_message_to_model_offline(
|
|
342
212
|
if streaming:
|
343
213
|
return response
|
344
214
|
|
345
|
-
response_text = response["choices"][0]["message"].get("content", "")
|
215
|
+
response_text: str = response["choices"][0]["message"].get("content", "")
|
346
216
|
|
347
217
|
# Save conversation trace for non-streaming responses
|
348
218
|
# Streamed responses need to be saved by the calling function
|