khoj 1.27.2.dev13__py3-none-any.whl → 1.27.2.dev29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +5 -0
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1603-5138bb7c8035d9a6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{webpack-70a1cd2ad6a1952c.js → webpack-2b720658ccc746f2.js} +1 -1
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/factchecker/index.html +1 -1
- khoj/interface/compiled/factchecker/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +6 -1
- khoj/processor/conversation/anthropic/utils.py +25 -5
- khoj/processor/conversation/google/gemini_chat.py +8 -2
- khoj/processor/conversation/google/utils.py +34 -10
- khoj/processor/conversation/offline/chat_model.py +31 -7
- khoj/processor/conversation/openai/gpt.py +14 -2
- khoj/processor/conversation/openai/utils.py +43 -9
- khoj/processor/conversation/prompts.py +0 -16
- khoj/processor/conversation/utils.py +168 -1
- khoj/processor/image/generate.py +2 -0
- khoj/processor/tools/online_search.py +14 -5
- khoj/routers/api.py +5 -0
- khoj/routers/api_chat.py +23 -2
- khoj/routers/helpers.py +65 -13
- khoj/utils/helpers.py +1 -1
- {khoj-1.27.2.dev13.dist-info → khoj-1.27.2.dev29.dist-info}/METADATA +2 -1
- {khoj-1.27.2.dev13.dist-info → khoj-1.27.2.dev29.dist-info}/RECORD +42 -42
- khoj/interface/compiled/_next/static/chunks/1603-b9d95833e0e025e8.js +0 -1
- /khoj/interface/compiled/_next/static/{rYy0jmrIYv76V-iyorQim → atzIseFarmC7TIwq2BgHC}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{rYy0jmrIYv76V-iyorQim → atzIseFarmC7TIwq2BgHC}/_ssgManifest.js +0 -0
- {khoj-1.27.2.dev13.dist-info → khoj-1.27.2.dev29.dist-info}/WHEEL +0 -0
- {khoj-1.27.2.dev13.dist-info → khoj-1.27.2.dev29.dist-info}/entry_points.txt +0 -0
- {khoj-1.27.2.dev13.dist-info → khoj-1.27.2.dev29.dist-info}/licenses/LICENSE +0 -0
@@ -12,7 +12,12 @@ from tenacity import (
|
|
12
12
|
wait_random_exponential,
|
13
13
|
)
|
14
14
|
|
15
|
-
from khoj.processor.conversation.utils import
|
15
|
+
from khoj.processor.conversation.utils import (
|
16
|
+
ThreadedGenerator,
|
17
|
+
commit_conversation_trace,
|
18
|
+
)
|
19
|
+
from khoj.utils import state
|
20
|
+
from khoj.utils.helpers import in_debug_mode
|
16
21
|
|
17
22
|
logger = logging.getLogger(__name__)
|
18
23
|
|
@@ -33,7 +38,7 @@ openai_clients: Dict[str, openai.OpenAI] = {}
|
|
33
38
|
reraise=True,
|
34
39
|
)
|
35
40
|
def completion_with_backoff(
|
36
|
-
messages, model, temperature=0, openai_api_key=None, api_base_url=None, model_kwargs=None
|
41
|
+
messages, model, temperature=0, openai_api_key=None, api_base_url=None, model_kwargs=None, tracer: dict = {}
|
37
42
|
) -> str:
|
38
43
|
client_key = f"{openai_api_key}--{api_base_url}"
|
39
44
|
client: openai.OpenAI | None = openai_clients.get(client_key)
|
@@ -77,6 +82,12 @@ def completion_with_backoff(
|
|
77
82
|
elif delta_chunk.content:
|
78
83
|
aggregated_response += delta_chunk.content
|
79
84
|
|
85
|
+
# Save conversation trace
|
86
|
+
tracer["chat_model"] = model
|
87
|
+
tracer["temperature"] = temperature
|
88
|
+
if in_debug_mode() or state.verbose > 1:
|
89
|
+
commit_conversation_trace(messages, aggregated_response, tracer)
|
90
|
+
|
80
91
|
return aggregated_response
|
81
92
|
|
82
93
|
|
@@ -103,26 +114,37 @@ def chat_completion_with_backoff(
|
|
103
114
|
api_base_url=None,
|
104
115
|
completion_func=None,
|
105
116
|
model_kwargs=None,
|
117
|
+
tracer: dict = {},
|
106
118
|
):
|
107
119
|
g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
|
108
120
|
t = Thread(
|
109
|
-
target=llm_thread,
|
121
|
+
target=llm_thread,
|
122
|
+
args=(g, messages, model_name, temperature, openai_api_key, api_base_url, model_kwargs, tracer),
|
110
123
|
)
|
111
124
|
t.start()
|
112
125
|
return g
|
113
126
|
|
114
127
|
|
115
|
-
def llm_thread(
|
128
|
+
def llm_thread(
|
129
|
+
g,
|
130
|
+
messages,
|
131
|
+
model_name,
|
132
|
+
temperature,
|
133
|
+
openai_api_key=None,
|
134
|
+
api_base_url=None,
|
135
|
+
model_kwargs=None,
|
136
|
+
tracer: dict = {},
|
137
|
+
):
|
116
138
|
try:
|
117
139
|
client_key = f"{openai_api_key}--{api_base_url}"
|
118
140
|
if client_key not in openai_clients:
|
119
|
-
client
|
141
|
+
client = openai.OpenAI(
|
120
142
|
api_key=openai_api_key,
|
121
143
|
base_url=api_base_url,
|
122
144
|
)
|
123
145
|
openai_clients[client_key] = client
|
124
146
|
else:
|
125
|
-
client
|
147
|
+
client = openai_clients[client_key]
|
126
148
|
|
127
149
|
formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
|
128
150
|
stream = True
|
@@ -144,17 +166,29 @@ def llm_thread(g, messages, model_name, temperature, openai_api_key=None, api_ba
|
|
144
166
|
**(model_kwargs or dict()),
|
145
167
|
)
|
146
168
|
|
169
|
+
aggregated_response = ""
|
147
170
|
if not stream:
|
148
|
-
|
171
|
+
aggregated_response = chat.choices[0].message.content
|
172
|
+
g.send(aggregated_response)
|
149
173
|
else:
|
150
174
|
for chunk in chat:
|
151
175
|
if len(chunk.choices) == 0:
|
152
176
|
continue
|
153
177
|
delta_chunk = chunk.choices[0].delta
|
178
|
+
text_chunk = ""
|
154
179
|
if isinstance(delta_chunk, str):
|
155
|
-
|
180
|
+
text_chunk = delta_chunk
|
156
181
|
elif delta_chunk.content:
|
157
|
-
|
182
|
+
text_chunk = delta_chunk.content
|
183
|
+
if text_chunk:
|
184
|
+
aggregated_response += text_chunk
|
185
|
+
g.send(text_chunk)
|
186
|
+
|
187
|
+
# Save conversation trace
|
188
|
+
tracer["chat_model"] = model_name
|
189
|
+
tracer["temperature"] = temperature
|
190
|
+
if in_debug_mode() or state.verbose > 1:
|
191
|
+
commit_conversation_trace(messages, aggregated_response, tracer)
|
158
192
|
except Exception as e:
|
159
193
|
logger.error(f"Error in llm_thread: {e}", exc_info=True)
|
160
194
|
finally:
|
@@ -193,7 +193,6 @@ you need to convert the user's query to a description format that the novice art
|
|
193
193
|
- ellipse
|
194
194
|
- line
|
195
195
|
- arrow
|
196
|
-
- frame
|
197
196
|
|
198
197
|
use these primitives to describe what sort of diagram the drawer should create. the artist must recreate the diagram every time, so include all relevant prior information in your description.
|
199
198
|
|
@@ -284,21 +283,6 @@ For text, you must use the `text` property to specify the text to be rendered. Y
|
|
284
283
|
text: string,
|
285
284
|
}}
|
286
285
|
|
287
|
-
For frames, use the `children` property to specify the elements that are inside the frame by their ids.
|
288
|
-
|
289
|
-
{{
|
290
|
-
type: "frame",
|
291
|
-
id: string,
|
292
|
-
x: number,
|
293
|
-
y: number,
|
294
|
-
width: number,
|
295
|
-
height: number,
|
296
|
-
name: string,
|
297
|
-
children: [
|
298
|
-
string
|
299
|
-
]
|
300
|
-
}}
|
301
|
-
|
302
286
|
Here's an example of a valid diagram:
|
303
287
|
|
304
288
|
Design Description: Create a diagram describing a circular development process with 3 stages: design, implementation and feedback. The design stage is connected to the implementation stage and the implementation stage is connected to the feedback stage and the feedback stage is connected to the design stage. Each stage should be labeled with the stage name.
|
@@ -2,6 +2,7 @@ import base64
|
|
2
2
|
import logging
|
3
3
|
import math
|
4
4
|
import mimetypes
|
5
|
+
import os
|
5
6
|
import queue
|
6
7
|
from dataclasses import dataclass
|
7
8
|
from datetime import datetime
|
@@ -12,6 +13,8 @@ from typing import Any, Dict, List, Optional
|
|
12
13
|
import PIL.Image
|
13
14
|
import requests
|
14
15
|
import tiktoken
|
16
|
+
import yaml
|
17
|
+
from git import Repo
|
15
18
|
from langchain.schema import ChatMessage
|
16
19
|
from llama_cpp.llama import Llama
|
17
20
|
from transformers import AutoTokenizer
|
@@ -21,7 +24,7 @@ from khoj.database.models import ChatModelOptions, ClientApplication, KhojUser
|
|
21
24
|
from khoj.processor.conversation import prompts
|
22
25
|
from khoj.processor.conversation.offline.utils import download_model, infer_max_tokens
|
23
26
|
from khoj.utils import state
|
24
|
-
from khoj.utils.helpers import is_none_or_empty, merge_dicts
|
27
|
+
from khoj.utils.helpers import in_debug_mode, is_none_or_empty, merge_dicts
|
25
28
|
|
26
29
|
logger = logging.getLogger(__name__)
|
27
30
|
model_to_prompt_size = {
|
@@ -117,6 +120,7 @@ def save_to_conversation_log(
|
|
117
120
|
conversation_id: str = None,
|
118
121
|
automation_id: str = None,
|
119
122
|
query_images: List[str] = None,
|
123
|
+
tracer: Dict[str, Any] = {},
|
120
124
|
):
|
121
125
|
user_message_time = user_message_time or datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
122
126
|
updated_conversation = message_to_log(
|
@@ -142,6 +146,9 @@ def save_to_conversation_log(
|
|
142
146
|
user_message=q,
|
143
147
|
)
|
144
148
|
|
149
|
+
if in_debug_mode() or state.verbose > 1:
|
150
|
+
merge_message_into_conversation_trace(q, chat_response, tracer)
|
151
|
+
|
145
152
|
logger.info(
|
146
153
|
f"""
|
147
154
|
Saved Conversation Turn
|
@@ -354,3 +361,163 @@ def get_image_from_url(image_url: str, type="pil"):
|
|
354
361
|
except requests.exceptions.RequestException as e:
|
355
362
|
logger.error(f"Failed to get image from URL {image_url}: {e}")
|
356
363
|
return ImageWithType(content=None, type=None)
|
364
|
+
|
365
|
+
|
366
|
+
def commit_conversation_trace(
|
367
|
+
session: list[ChatMessage],
|
368
|
+
response: str | list[dict],
|
369
|
+
tracer: dict,
|
370
|
+
system_message: str | list[dict] = "",
|
371
|
+
repo_path: str = "/tmp/promptrace",
|
372
|
+
) -> str:
|
373
|
+
"""
|
374
|
+
Save trace of conversation step using git. Useful to visualize, compare and debug traces.
|
375
|
+
Returns the path to the repository.
|
376
|
+
"""
|
377
|
+
# Serialize session, system message and response to yaml
|
378
|
+
system_message_yaml = yaml.dump(system_message, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
379
|
+
response_yaml = yaml.dump(response, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
380
|
+
formatted_session = [{"role": message.role, "content": message.content} for message in session]
|
381
|
+
session_yaml = yaml.dump(formatted_session, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
382
|
+
query = (
|
383
|
+
yaml.dump(session[-1].content, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
384
|
+
.strip()
|
385
|
+
.removeprefix("'")
|
386
|
+
.removesuffix("'")
|
387
|
+
) # Extract serialized query from chat session
|
388
|
+
|
389
|
+
# Extract chat metadata for session
|
390
|
+
uid, cid, mid = tracer.get("uid", "main"), tracer.get("cid", "main"), tracer.get("mid")
|
391
|
+
|
392
|
+
# Infer repository path from environment variable or provided path
|
393
|
+
repo_path = os.getenv("PROMPTRACE_DIR", repo_path)
|
394
|
+
|
395
|
+
try:
|
396
|
+
# Prepare git repository
|
397
|
+
os.makedirs(repo_path, exist_ok=True)
|
398
|
+
repo = Repo.init(repo_path)
|
399
|
+
|
400
|
+
# Remove post-commit hook if it exists
|
401
|
+
hooks_dir = os.path.join(repo_path, ".git", "hooks")
|
402
|
+
post_commit_hook = os.path.join(hooks_dir, "post-commit")
|
403
|
+
if os.path.exists(post_commit_hook):
|
404
|
+
os.remove(post_commit_hook)
|
405
|
+
|
406
|
+
# Configure git user if not set
|
407
|
+
if not repo.config_reader().has_option("user", "email"):
|
408
|
+
repo.config_writer().set_value("user", "name", "Prompt Tracer").release()
|
409
|
+
repo.config_writer().set_value("user", "email", "promptracer@khoj.dev").release()
|
410
|
+
|
411
|
+
# Create an initial commit if the repository is newly created
|
412
|
+
if not repo.head.is_valid():
|
413
|
+
repo.index.commit("And then there was a trace")
|
414
|
+
|
415
|
+
# Check out the initial commit
|
416
|
+
initial_commit = repo.commit("HEAD~0")
|
417
|
+
repo.head.reference = initial_commit
|
418
|
+
repo.head.reset(index=True, working_tree=True)
|
419
|
+
|
420
|
+
# Create or switch to user branch from initial commit
|
421
|
+
user_branch = f"u_{uid}"
|
422
|
+
if user_branch not in repo.branches:
|
423
|
+
repo.create_head(user_branch)
|
424
|
+
repo.heads[user_branch].checkout()
|
425
|
+
|
426
|
+
# Create or switch to conversation branch from user branch
|
427
|
+
conv_branch = f"c_{cid}"
|
428
|
+
if conv_branch not in repo.branches:
|
429
|
+
repo.create_head(conv_branch)
|
430
|
+
repo.heads[conv_branch].checkout()
|
431
|
+
|
432
|
+
# Create or switch to message branch from conversation branch
|
433
|
+
msg_branch = f"m_{mid}" if mid else None
|
434
|
+
if msg_branch and msg_branch not in repo.branches:
|
435
|
+
repo.create_head(msg_branch)
|
436
|
+
if msg_branch:
|
437
|
+
repo.heads[msg_branch].checkout()
|
438
|
+
|
439
|
+
# Include file with content to commit
|
440
|
+
files_to_commit = {"query": session_yaml, "response": response_yaml, "system_prompt": system_message_yaml}
|
441
|
+
|
442
|
+
# Write files and stage them
|
443
|
+
for filename, content in files_to_commit.items():
|
444
|
+
file_path = os.path.join(repo_path, filename)
|
445
|
+
# Unescape special characters in content for better readability
|
446
|
+
content = content.strip().replace("\\n", "\n").replace("\\t", "\t")
|
447
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
448
|
+
f.write(content)
|
449
|
+
repo.index.add([filename])
|
450
|
+
|
451
|
+
# Create commit
|
452
|
+
metadata_yaml = yaml.dump(tracer, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
453
|
+
commit_message = f"""
|
454
|
+
{query[:250]}
|
455
|
+
|
456
|
+
Response:
|
457
|
+
---
|
458
|
+
{response[:500]}...
|
459
|
+
|
460
|
+
Metadata
|
461
|
+
---
|
462
|
+
{metadata_yaml}
|
463
|
+
""".strip()
|
464
|
+
|
465
|
+
repo.index.commit(commit_message)
|
466
|
+
|
467
|
+
logger.debug(f"Saved conversation trace to repo at {repo_path}")
|
468
|
+
return repo_path
|
469
|
+
except Exception as e:
|
470
|
+
logger.error(f"Failed to add conversation trace to repo: {str(e)}", exc_info=True)
|
471
|
+
return None
|
472
|
+
|
473
|
+
|
474
|
+
def merge_message_into_conversation_trace(query: str, response: str, tracer: dict, repo_path="/tmp/promptrace") -> bool:
|
475
|
+
"""
|
476
|
+
Merge the message branch into its parent conversation branch.
|
477
|
+
|
478
|
+
Args:
|
479
|
+
query: User query
|
480
|
+
response: Assistant response
|
481
|
+
tracer: Dictionary containing uid, cid and mid
|
482
|
+
repo_path: Path to the git repository
|
483
|
+
|
484
|
+
Returns:
|
485
|
+
bool: True if merge was successful, False otherwise
|
486
|
+
"""
|
487
|
+
try:
|
488
|
+
# Extract branch names
|
489
|
+
msg_branch = f"m_{tracer['mid']}"
|
490
|
+
conv_branch = f"c_{tracer['cid']}"
|
491
|
+
|
492
|
+
# Infer repository path from environment variable or provided path
|
493
|
+
repo_path = os.getenv("PROMPTRACE_DIR", repo_path)
|
494
|
+
repo = Repo(repo_path)
|
495
|
+
|
496
|
+
# Checkout conversation branch
|
497
|
+
repo.heads[conv_branch].checkout()
|
498
|
+
|
499
|
+
# Create commit message
|
500
|
+
metadata_yaml = yaml.dump(tracer, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
501
|
+
commit_message = f"""
|
502
|
+
{query[:250]}
|
503
|
+
|
504
|
+
Response:
|
505
|
+
---
|
506
|
+
{response[:500]}...
|
507
|
+
|
508
|
+
Metadata
|
509
|
+
---
|
510
|
+
{metadata_yaml}
|
511
|
+
""".strip()
|
512
|
+
|
513
|
+
# Merge message branch into conversation branch
|
514
|
+
repo.git.merge(msg_branch, no_ff=True, m=commit_message)
|
515
|
+
|
516
|
+
# Delete message branch after merge
|
517
|
+
repo.delete_head(msg_branch, force=True)
|
518
|
+
|
519
|
+
logger.debug(f"Successfully merged {msg_branch} into {conv_branch}")
|
520
|
+
return True
|
521
|
+
except Exception as e:
|
522
|
+
logger.error(f"Failed to merge message {msg_branch} into conversation {conv_branch}: {str(e)}", exc_info=True)
|
523
|
+
return False
|
khoj/processor/image/generate.py
CHANGED
@@ -28,6 +28,7 @@ async def text_to_image(
|
|
28
28
|
send_status_func: Optional[Callable] = None,
|
29
29
|
query_images: Optional[List[str]] = None,
|
30
30
|
agent: Agent = None,
|
31
|
+
tracer: dict = {},
|
31
32
|
):
|
32
33
|
status_code = 200
|
33
34
|
image = None
|
@@ -68,6 +69,7 @@ async def text_to_image(
|
|
68
69
|
query_images=query_images,
|
69
70
|
user=user,
|
70
71
|
agent=agent,
|
72
|
+
tracer=tracer,
|
71
73
|
)
|
72
74
|
|
73
75
|
if send_status_func:
|
@@ -64,6 +64,7 @@ async def search_online(
|
|
64
64
|
custom_filters: List[str] = [],
|
65
65
|
query_images: List[str] = None,
|
66
66
|
agent: Agent = None,
|
67
|
+
tracer: dict = {},
|
67
68
|
):
|
68
69
|
query += " ".join(custom_filters)
|
69
70
|
if not is_internet_connected():
|
@@ -73,7 +74,7 @@ async def search_online(
|
|
73
74
|
|
74
75
|
# Breakdown the query into subqueries to get the correct answer
|
75
76
|
subqueries = await generate_online_subqueries(
|
76
|
-
query, conversation_history, location, user, query_images=query_images, agent=agent
|
77
|
+
query, conversation_history, location, user, query_images=query_images, agent=agent, tracer=tracer
|
77
78
|
)
|
78
79
|
response_dict = {}
|
79
80
|
|
@@ -111,7 +112,7 @@ async def search_online(
|
|
111
112
|
async for event in send_status_func(f"**Reading web pages**: {webpage_links_str}"):
|
112
113
|
yield {ChatEvent.STATUS: event}
|
113
114
|
tasks = [
|
114
|
-
read_webpage_and_extract_content(data["queries"], link, data["content"], user=user, agent=agent)
|
115
|
+
read_webpage_and_extract_content(data["queries"], link, data["content"], user=user, agent=agent, tracer=tracer)
|
115
116
|
for link, data in webpages.items()
|
116
117
|
]
|
117
118
|
results = await asyncio.gather(*tasks)
|
@@ -153,6 +154,7 @@ async def read_webpages(
|
|
153
154
|
send_status_func: Optional[Callable] = None,
|
154
155
|
query_images: List[str] = None,
|
155
156
|
agent: Agent = None,
|
157
|
+
tracer: dict = {},
|
156
158
|
):
|
157
159
|
"Infer web pages to read from the query and extract relevant information from them"
|
158
160
|
logger.info(f"Inferring web pages to read")
|
@@ -166,7 +168,7 @@ async def read_webpages(
|
|
166
168
|
webpage_links_str = "\n- " + "\n- ".join(list(urls))
|
167
169
|
async for event in send_status_func(f"**Reading web pages**: {webpage_links_str}"):
|
168
170
|
yield {ChatEvent.STATUS: event}
|
169
|
-
tasks = [read_webpage_and_extract_content({query}, url, user=user, agent=agent) for url in urls]
|
171
|
+
tasks = [read_webpage_and_extract_content({query}, url, user=user, agent=agent, tracer=tracer) for url in urls]
|
170
172
|
results = await asyncio.gather(*tasks)
|
171
173
|
|
172
174
|
response: Dict[str, Dict] = defaultdict(dict)
|
@@ -192,7 +194,12 @@ async def read_webpage(
|
|
192
194
|
|
193
195
|
|
194
196
|
async def read_webpage_and_extract_content(
|
195
|
-
subqueries: set[str],
|
197
|
+
subqueries: set[str],
|
198
|
+
url: str,
|
199
|
+
content: str = None,
|
200
|
+
user: KhojUser = None,
|
201
|
+
agent: Agent = None,
|
202
|
+
tracer: dict = {},
|
196
203
|
) -> Tuple[set[str], str, Union[None, str]]:
|
197
204
|
# Select the web scrapers to use for reading the web page
|
198
205
|
web_scrapers = await ConversationAdapters.aget_enabled_webscrapers()
|
@@ -214,7 +221,9 @@ async def read_webpage_and_extract_content(
|
|
214
221
|
# Extract relevant information from the web page
|
215
222
|
if is_none_or_empty(extracted_info):
|
216
223
|
with timer(f"Extracting relevant information from web page at '{url}' took", logger):
|
217
|
-
extracted_info = await extract_relevant_info(
|
224
|
+
extracted_info = await extract_relevant_info(
|
225
|
+
subqueries, content, user=user, agent=agent, tracer=tracer
|
226
|
+
)
|
218
227
|
|
219
228
|
# If we successfully extracted information, break the loop
|
220
229
|
if not is_none_or_empty(extracted_info):
|
khoj/routers/api.py
CHANGED
@@ -350,6 +350,7 @@ async def extract_references_and_questions(
|
|
350
350
|
send_status_func: Optional[Callable] = None,
|
351
351
|
query_images: Optional[List[str]] = None,
|
352
352
|
agent: Agent = None,
|
353
|
+
tracer: dict = {},
|
353
354
|
):
|
354
355
|
user = request.user.object if request.user.is_authenticated else None
|
355
356
|
|
@@ -425,6 +426,7 @@ async def extract_references_and_questions(
|
|
425
426
|
user=user,
|
426
427
|
max_prompt_size=conversation_config.max_prompt_size,
|
427
428
|
personality_context=personality_context,
|
429
|
+
tracer=tracer,
|
428
430
|
)
|
429
431
|
elif conversation_config.model_type == ChatModelOptions.ModelType.OPENAI:
|
430
432
|
openai_chat_config = conversation_config.openai_config
|
@@ -442,6 +444,7 @@ async def extract_references_and_questions(
|
|
442
444
|
query_images=query_images,
|
443
445
|
vision_enabled=vision_enabled,
|
444
446
|
personality_context=personality_context,
|
447
|
+
tracer=tracer,
|
445
448
|
)
|
446
449
|
elif conversation_config.model_type == ChatModelOptions.ModelType.ANTHROPIC:
|
447
450
|
api_key = conversation_config.openai_config.api_key
|
@@ -456,6 +459,7 @@ async def extract_references_and_questions(
|
|
456
459
|
user=user,
|
457
460
|
vision_enabled=vision_enabled,
|
458
461
|
personality_context=personality_context,
|
462
|
+
tracer=tracer,
|
459
463
|
)
|
460
464
|
elif conversation_config.model_type == ChatModelOptions.ModelType.GOOGLE:
|
461
465
|
api_key = conversation_config.openai_config.api_key
|
@@ -471,6 +475,7 @@ async def extract_references_and_questions(
|
|
471
475
|
user=user,
|
472
476
|
vision_enabled=vision_enabled,
|
473
477
|
personality_context=personality_context,
|
478
|
+
tracer=tracer,
|
474
479
|
)
|
475
480
|
|
476
481
|
# Collate search results as context for GPT
|
khoj/routers/api_chat.py
CHANGED
@@ -3,6 +3,7 @@ import base64
|
|
3
3
|
import json
|
4
4
|
import logging
|
5
5
|
import time
|
6
|
+
import uuid
|
6
7
|
from datetime import datetime
|
7
8
|
from functools import partial
|
8
9
|
from typing import Dict, Optional
|
@@ -563,6 +564,12 @@ async def chat(
|
|
563
564
|
event_delimiter = "␃🔚␗"
|
564
565
|
q = unquote(q)
|
565
566
|
nonlocal conversation_id
|
567
|
+
tracer: dict = {
|
568
|
+
"mid": f"{uuid.uuid4()}",
|
569
|
+
"cid": conversation_id,
|
570
|
+
"uid": user.id,
|
571
|
+
"khoj_version": state.khoj_version,
|
572
|
+
}
|
566
573
|
|
567
574
|
uploaded_images: list[str] = []
|
568
575
|
if images:
|
@@ -682,6 +689,7 @@ async def chat(
|
|
682
689
|
user=user,
|
683
690
|
query_images=uploaded_images,
|
684
691
|
agent=agent,
|
692
|
+
tracer=tracer,
|
685
693
|
)
|
686
694
|
conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands])
|
687
695
|
async for result in send_event(
|
@@ -689,7 +697,9 @@ async def chat(
|
|
689
697
|
):
|
690
698
|
yield result
|
691
699
|
|
692
|
-
mode = await aget_relevant_output_modes(
|
700
|
+
mode = await aget_relevant_output_modes(
|
701
|
+
q, meta_log, is_automated_task, user, uploaded_images, agent, tracer=tracer
|
702
|
+
)
|
693
703
|
async for result in send_event(ChatEvent.STATUS, f"**Decided Response Mode:** {mode.value}"):
|
694
704
|
yield result
|
695
705
|
if mode not in conversation_commands:
|
@@ -755,6 +765,7 @@ async def chat(
|
|
755
765
|
query_images=uploaded_images,
|
756
766
|
user=user,
|
757
767
|
agent=agent,
|
768
|
+
tracer=tracer,
|
758
769
|
)
|
759
770
|
response_log = str(response)
|
760
771
|
async for result in send_llm_response(response_log):
|
@@ -774,6 +785,7 @@ async def chat(
|
|
774
785
|
client_application=request.user.client_app,
|
775
786
|
conversation_id=conversation_id,
|
776
787
|
query_images=uploaded_images,
|
788
|
+
tracer=tracer,
|
777
789
|
)
|
778
790
|
return
|
779
791
|
|
@@ -795,7 +807,7 @@ async def chat(
|
|
795
807
|
if ConversationCommand.Automation in conversation_commands:
|
796
808
|
try:
|
797
809
|
automation, crontime, query_to_run, subject = await create_automation(
|
798
|
-
q, timezone, user, request.url, meta_log
|
810
|
+
q, timezone, user, request.url, meta_log, tracer=tracer
|
799
811
|
)
|
800
812
|
except Exception as e:
|
801
813
|
logger.error(f"Error scheduling task {q} for {user.email}: {e}")
|
@@ -817,6 +829,7 @@ async def chat(
|
|
817
829
|
inferred_queries=[query_to_run],
|
818
830
|
automation_id=automation.id,
|
819
831
|
query_images=uploaded_images,
|
832
|
+
tracer=tracer,
|
820
833
|
)
|
821
834
|
async for result in send_llm_response(llm_response):
|
822
835
|
yield result
|
@@ -838,6 +851,7 @@ async def chat(
|
|
838
851
|
partial(send_event, ChatEvent.STATUS),
|
839
852
|
query_images=uploaded_images,
|
840
853
|
agent=agent,
|
854
|
+
tracer=tracer,
|
841
855
|
):
|
842
856
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
843
857
|
yield result[ChatEvent.STATUS]
|
@@ -882,6 +896,7 @@ async def chat(
|
|
882
896
|
custom_filters,
|
883
897
|
query_images=uploaded_images,
|
884
898
|
agent=agent,
|
899
|
+
tracer=tracer,
|
885
900
|
):
|
886
901
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
887
902
|
yield result[ChatEvent.STATUS]
|
@@ -906,6 +921,7 @@ async def chat(
|
|
906
921
|
partial(send_event, ChatEvent.STATUS),
|
907
922
|
query_images=uploaded_images,
|
908
923
|
agent=agent,
|
924
|
+
tracer=tracer,
|
909
925
|
):
|
910
926
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
911
927
|
yield result[ChatEvent.STATUS]
|
@@ -956,6 +972,7 @@ async def chat(
|
|
956
972
|
send_status_func=partial(send_event, ChatEvent.STATUS),
|
957
973
|
query_images=uploaded_images,
|
958
974
|
agent=agent,
|
975
|
+
tracer=tracer,
|
959
976
|
):
|
960
977
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
961
978
|
yield result[ChatEvent.STATUS]
|
@@ -986,6 +1003,7 @@ async def chat(
|
|
986
1003
|
compiled_references=compiled_references,
|
987
1004
|
online_results=online_results,
|
988
1005
|
query_images=uploaded_images,
|
1006
|
+
tracer=tracer,
|
989
1007
|
)
|
990
1008
|
content_obj = {
|
991
1009
|
"intentType": intent_type,
|
@@ -1014,6 +1032,7 @@ async def chat(
|
|
1014
1032
|
user=user,
|
1015
1033
|
agent=agent,
|
1016
1034
|
send_status_func=partial(send_event, ChatEvent.STATUS),
|
1035
|
+
tracer=tracer,
|
1017
1036
|
):
|
1018
1037
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
1019
1038
|
yield result[ChatEvent.STATUS]
|
@@ -1041,6 +1060,7 @@ async def chat(
|
|
1041
1060
|
compiled_references=compiled_references,
|
1042
1061
|
online_results=online_results,
|
1043
1062
|
query_images=uploaded_images,
|
1063
|
+
tracer=tracer,
|
1044
1064
|
)
|
1045
1065
|
|
1046
1066
|
async for result in send_llm_response(json.dumps(content_obj)):
|
@@ -1064,6 +1084,7 @@ async def chat(
|
|
1064
1084
|
location,
|
1065
1085
|
user_name,
|
1066
1086
|
uploaded_images,
|
1087
|
+
tracer,
|
1067
1088
|
)
|
1068
1089
|
|
1069
1090
|
# Send Response
|