khoj 1.27.2.dev18__py3-none-any.whl → 1.27.2.dev130__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +34 -10
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1467-5a191c1cd5bf0b83.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1603-5d70d9dfcdcb1f10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3423-fa918f4e5365a35e.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8423-3ad0bfb299801220.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-7dc98df9c88828f0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-d887f55fe6d4f35d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/{page-8f22b790e50dd722.js → page-d46244282af16509.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-6a01e07fb244c10c.js → page-505b07bce608b34e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-31239d193815e49e.js → webpack-8ae5ce45161bd98e.js} +1 -1
- khoj/interface/compiled/_next/static/css/{2272c73fc7a3b571.css → 26c1c33d0423a7d8.css} +1 -1
- khoj/interface/compiled/_next/static/css/e9c5fe555dd3050b.css +25 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/factchecker/index.html +1 -1
- khoj/interface/compiled/factchecker/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +19 -10
- khoj/processor/conversation/anthropic/utils.py +37 -6
- khoj/processor/conversation/google/gemini_chat.py +23 -13
- khoj/processor/conversation/google/utils.py +34 -10
- khoj/processor/conversation/offline/chat_model.py +40 -15
- khoj/processor/conversation/openai/gpt.py +25 -10
- khoj/processor/conversation/openai/utils.py +43 -9
- khoj/processor/conversation/prompts.py +131 -22
- khoj/processor/conversation/utils.py +299 -6
- khoj/processor/image/generate.py +2 -0
- khoj/processor/tools/online_search.py +19 -8
- khoj/processor/tools/run_code.py +144 -0
- khoj/routers/api.py +11 -6
- khoj/routers/api_chat.py +177 -88
- khoj/routers/helpers.py +155 -59
- khoj/routers/research.py +321 -0
- khoj/search_filter/date_filter.py +1 -3
- khoj/search_filter/file_filter.py +1 -2
- khoj/search_type/text_search.py +3 -3
- khoj/utils/helpers.py +15 -2
- khoj/utils/yaml.py +4 -0
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/METADATA +2 -1
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/RECORD +61 -58
- khoj/interface/compiled/_next/static/chunks/1603-5138bb7c8035d9a6.js +0 -1
- khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3423-8e9c420574a9fbe3.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9479-a5e7ff4c7d1d7ee7.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-151232d8417a1ea1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-798904432c2417c4.js +0 -1
- khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +0 -25
- /khoj/interface/compiled/_next/static/{_gBBcNbs4wMKxKXhQs5E4 → N19uqHAJYqRAVxvuVwHfE}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{_gBBcNbs4wMKxKXhQs5E4 → N19uqHAJYqRAVxvuVwHfE}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1970-1d6d0c1b00b4f343.js → 1970-444843bea1d17d61.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9417-759984ad62caa3dc.js → 9417-19cfd1a9cb758e71.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/app/settings/{page-7946cabb9c54e22d.js → page-89e6737b2cc9fb3a.js} +0 -0
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/WHEEL +0 -0
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/entry_points.txt +0 -0
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/licenses/LICENSE +0 -0
@@ -4,7 +4,7 @@ import logging
|
|
4
4
|
import os
|
5
5
|
import urllib.parse
|
6
6
|
from collections import defaultdict
|
7
|
-
from typing import Callable, Dict, List, Optional, Tuple, Union
|
7
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
8
8
|
|
9
9
|
import aiohttp
|
10
10
|
from bs4 import BeautifulSoup
|
@@ -52,7 +52,8 @@ OLOSTEP_QUERY_PARAMS = {
|
|
52
52
|
"expandMarkdown": "True",
|
53
53
|
"expandHtml": "False",
|
54
54
|
}
|
55
|
-
|
55
|
+
|
56
|
+
DEFAULT_MAX_WEBPAGES_TO_READ = 1
|
56
57
|
|
57
58
|
|
58
59
|
async def search_online(
|
@@ -62,8 +63,10 @@ async def search_online(
|
|
62
63
|
user: KhojUser,
|
63
64
|
send_status_func: Optional[Callable] = None,
|
64
65
|
custom_filters: List[str] = [],
|
66
|
+
max_webpages_to_read: int = DEFAULT_MAX_WEBPAGES_TO_READ,
|
65
67
|
query_images: List[str] = None,
|
66
68
|
agent: Agent = None,
|
69
|
+
tracer: dict = {},
|
67
70
|
):
|
68
71
|
query += " ".join(custom_filters)
|
69
72
|
if not is_internet_connected():
|
@@ -73,7 +76,7 @@ async def search_online(
|
|
73
76
|
|
74
77
|
# Breakdown the query into subqueries to get the correct answer
|
75
78
|
subqueries = await generate_online_subqueries(
|
76
|
-
query, conversation_history, location, user, query_images=query_images, agent=agent
|
79
|
+
query, conversation_history, location, user, query_images=query_images, agent=agent, tracer=tracer
|
77
80
|
)
|
78
81
|
response_dict = {}
|
79
82
|
|
@@ -96,7 +99,7 @@ async def search_online(
|
|
96
99
|
for subquery in response_dict:
|
97
100
|
if "answerBox" in response_dict[subquery]:
|
98
101
|
continue
|
99
|
-
for organic in response_dict[subquery].get("organic", [])[:
|
102
|
+
for organic in response_dict[subquery].get("organic", [])[:max_webpages_to_read]:
|
100
103
|
link = organic.get("link")
|
101
104
|
if link in webpages:
|
102
105
|
webpages[link]["queries"].add(subquery)
|
@@ -111,7 +114,7 @@ async def search_online(
|
|
111
114
|
async for event in send_status_func(f"**Reading web pages**: {webpage_links_str}"):
|
112
115
|
yield {ChatEvent.STATUS: event}
|
113
116
|
tasks = [
|
114
|
-
read_webpage_and_extract_content(data["queries"], link, data["content"], user=user, agent=agent)
|
117
|
+
read_webpage_and_extract_content(data["queries"], link, data["content"], user=user, agent=agent, tracer=tracer)
|
115
118
|
for link, data in webpages.items()
|
116
119
|
]
|
117
120
|
results = await asyncio.gather(*tasks)
|
@@ -153,6 +156,7 @@ async def read_webpages(
|
|
153
156
|
send_status_func: Optional[Callable] = None,
|
154
157
|
query_images: List[str] = None,
|
155
158
|
agent: Agent = None,
|
159
|
+
tracer: dict = {},
|
156
160
|
):
|
157
161
|
"Infer web pages to read from the query and extract relevant information from them"
|
158
162
|
logger.info(f"Inferring web pages to read")
|
@@ -166,7 +170,7 @@ async def read_webpages(
|
|
166
170
|
webpage_links_str = "\n- " + "\n- ".join(list(urls))
|
167
171
|
async for event in send_status_func(f"**Reading web pages**: {webpage_links_str}"):
|
168
172
|
yield {ChatEvent.STATUS: event}
|
169
|
-
tasks = [read_webpage_and_extract_content({query}, url, user=user, agent=agent) for url in urls]
|
173
|
+
tasks = [read_webpage_and_extract_content({query}, url, user=user, agent=agent, tracer=tracer) for url in urls]
|
170
174
|
results = await asyncio.gather(*tasks)
|
171
175
|
|
172
176
|
response: Dict[str, Dict] = defaultdict(dict)
|
@@ -192,7 +196,12 @@ async def read_webpage(
|
|
192
196
|
|
193
197
|
|
194
198
|
async def read_webpage_and_extract_content(
|
195
|
-
subqueries: set[str],
|
199
|
+
subqueries: set[str],
|
200
|
+
url: str,
|
201
|
+
content: str = None,
|
202
|
+
user: KhojUser = None,
|
203
|
+
agent: Agent = None,
|
204
|
+
tracer: dict = {},
|
196
205
|
) -> Tuple[set[str], str, Union[None, str]]:
|
197
206
|
# Select the web scrapers to use for reading the web page
|
198
207
|
web_scrapers = await ConversationAdapters.aget_enabled_webscrapers()
|
@@ -214,7 +223,9 @@ async def read_webpage_and_extract_content(
|
|
214
223
|
# Extract relevant information from the web page
|
215
224
|
if is_none_or_empty(extracted_info):
|
216
225
|
with timer(f"Extracting relevant information from web page at '{url}' took", logger):
|
217
|
-
extracted_info = await extract_relevant_info(
|
226
|
+
extracted_info = await extract_relevant_info(
|
227
|
+
subqueries, content, user=user, agent=agent, tracer=tracer
|
228
|
+
)
|
218
229
|
|
219
230
|
# If we successfully extracted information, break the loop
|
220
231
|
if not is_none_or_empty(extracted_info):
|
@@ -0,0 +1,144 @@
|
|
1
|
+
import asyncio
|
2
|
+
import datetime
|
3
|
+
import json
|
4
|
+
import logging
|
5
|
+
import os
|
6
|
+
from typing import Any, Callable, List, Optional
|
7
|
+
|
8
|
+
import aiohttp
|
9
|
+
|
10
|
+
from khoj.database.adapters import ais_user_subscribed
|
11
|
+
from khoj.database.models import Agent, KhojUser
|
12
|
+
from khoj.processor.conversation import prompts
|
13
|
+
from khoj.processor.conversation.utils import (
|
14
|
+
ChatEvent,
|
15
|
+
clean_code_python,
|
16
|
+
clean_json,
|
17
|
+
construct_chat_history,
|
18
|
+
)
|
19
|
+
from khoj.routers.helpers import send_message_to_model_wrapper
|
20
|
+
from khoj.utils.helpers import timer
|
21
|
+
from khoj.utils.rawconfig import LocationData
|
22
|
+
|
23
|
+
logger = logging.getLogger(__name__)
|
24
|
+
|
25
|
+
|
26
|
+
SANDBOX_URL = os.getenv("KHOJ_TERRARIUM_URL", "http://localhost:8080")
|
27
|
+
|
28
|
+
|
29
|
+
async def run_code(
|
30
|
+
query: str,
|
31
|
+
conversation_history: dict,
|
32
|
+
context: str,
|
33
|
+
location_data: LocationData,
|
34
|
+
user: KhojUser,
|
35
|
+
send_status_func: Optional[Callable] = None,
|
36
|
+
query_images: List[str] = None,
|
37
|
+
agent: Agent = None,
|
38
|
+
sandbox_url: str = SANDBOX_URL,
|
39
|
+
tracer: dict = {},
|
40
|
+
):
|
41
|
+
# Generate Code
|
42
|
+
if send_status_func:
|
43
|
+
async for event in send_status_func(f"**Generate code snippets** for {query}"):
|
44
|
+
yield {ChatEvent.STATUS: event}
|
45
|
+
try:
|
46
|
+
with timer("Chat actor: Generate programs to execute", logger):
|
47
|
+
codes = await generate_python_code(
|
48
|
+
query,
|
49
|
+
conversation_history,
|
50
|
+
context,
|
51
|
+
location_data,
|
52
|
+
user,
|
53
|
+
query_images,
|
54
|
+
agent,
|
55
|
+
tracer,
|
56
|
+
)
|
57
|
+
except Exception as e:
|
58
|
+
raise ValueError(f"Failed to generate code for {query} with error: {e}")
|
59
|
+
|
60
|
+
# Run Code
|
61
|
+
if send_status_func:
|
62
|
+
async for event in send_status_func(f"**Running {len(codes)} code snippets**"):
|
63
|
+
yield {ChatEvent.STATUS: event}
|
64
|
+
try:
|
65
|
+
tasks = [execute_sandboxed_python(code, sandbox_url) for code in codes]
|
66
|
+
with timer("Chat actor: Execute generated programs", logger):
|
67
|
+
results = await asyncio.gather(*tasks)
|
68
|
+
for result in results:
|
69
|
+
code = result.pop("code")
|
70
|
+
logger.info(f"Executed Code:\n--@@--\n{code}\n--@@--Result:\n--@@--\n{result}\n--@@--")
|
71
|
+
yield {query: {"code": code, "results": result}}
|
72
|
+
except Exception as e:
|
73
|
+
raise ValueError(f"Failed to run code for {query} with error: {e}")
|
74
|
+
|
75
|
+
|
76
|
+
async def generate_python_code(
|
77
|
+
q: str,
|
78
|
+
conversation_history: dict,
|
79
|
+
context: str,
|
80
|
+
location_data: LocationData,
|
81
|
+
user: KhojUser,
|
82
|
+
query_images: List[str] = None,
|
83
|
+
agent: Agent = None,
|
84
|
+
tracer: dict = {},
|
85
|
+
) -> List[str]:
|
86
|
+
location = f"{location_data}" if location_data else "Unknown"
|
87
|
+
username = prompts.user_name.format(name=user.get_full_name()) if user.get_full_name() else ""
|
88
|
+
subscribed = await ais_user_subscribed(user)
|
89
|
+
chat_history = construct_chat_history(conversation_history)
|
90
|
+
|
91
|
+
utc_date = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d")
|
92
|
+
personality_context = (
|
93
|
+
prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
|
94
|
+
)
|
95
|
+
|
96
|
+
code_generation_prompt = prompts.python_code_generation_prompt.format(
|
97
|
+
current_date=utc_date,
|
98
|
+
query=q,
|
99
|
+
chat_history=chat_history,
|
100
|
+
context=context,
|
101
|
+
location=location,
|
102
|
+
username=username,
|
103
|
+
personality_context=personality_context,
|
104
|
+
)
|
105
|
+
|
106
|
+
response = await send_message_to_model_wrapper(
|
107
|
+
code_generation_prompt,
|
108
|
+
query_images=query_images,
|
109
|
+
response_type="json_object",
|
110
|
+
user=user,
|
111
|
+
tracer=tracer,
|
112
|
+
)
|
113
|
+
|
114
|
+
# Validate that the response is a non-empty, JSON-serializable list
|
115
|
+
response = clean_json(response)
|
116
|
+
response = json.loads(response)
|
117
|
+
codes = [code.strip() for code in response["codes"] if code.strip()]
|
118
|
+
|
119
|
+
if not isinstance(codes, list) or not codes or len(codes) == 0:
|
120
|
+
raise ValueError
|
121
|
+
return codes
|
122
|
+
|
123
|
+
|
124
|
+
async def execute_sandboxed_python(code: str, sandbox_url: str = SANDBOX_URL) -> dict[str, Any]:
|
125
|
+
"""
|
126
|
+
Takes code to run as a string and calls the terrarium API to execute it.
|
127
|
+
Returns the result of the code execution as a dictionary.
|
128
|
+
"""
|
129
|
+
headers = {"Content-Type": "application/json"}
|
130
|
+
cleaned_code = clean_code_python(code)
|
131
|
+
data = {"code": cleaned_code}
|
132
|
+
|
133
|
+
async with aiohttp.ClientSession() as session:
|
134
|
+
async with session.post(sandbox_url, json=data, headers=headers) as response:
|
135
|
+
if response.status == 200:
|
136
|
+
result: dict[str, Any] = await response.json()
|
137
|
+
result["code"] = cleaned_code
|
138
|
+
return result
|
139
|
+
else:
|
140
|
+
return {
|
141
|
+
"code": cleaned_code,
|
142
|
+
"success": False,
|
143
|
+
"std_err": f"Failed to execute code with {response.status}",
|
144
|
+
}
|
khoj/routers/api.py
CHANGED
@@ -44,6 +44,7 @@ from khoj.processor.conversation.offline.chat_model import extract_questions_off
|
|
44
44
|
from khoj.processor.conversation.offline.whisper import transcribe_audio_offline
|
45
45
|
from khoj.processor.conversation.openai.gpt import extract_questions
|
46
46
|
from khoj.processor.conversation.openai.whisper import transcribe_audio
|
47
|
+
from khoj.processor.conversation.utils import defilter_query
|
47
48
|
from khoj.routers.helpers import (
|
48
49
|
ApiUserRateLimiter,
|
49
50
|
ChatEvent,
|
@@ -167,8 +168,8 @@ async def execute_search(
|
|
167
168
|
search_futures += [
|
168
169
|
executor.submit(
|
169
170
|
text_search.query,
|
170
|
-
user,
|
171
171
|
user_query,
|
172
|
+
user,
|
172
173
|
t,
|
173
174
|
question_embedding=encoded_asymmetric_query,
|
174
175
|
max_distance=max_distance,
|
@@ -350,11 +351,12 @@ async def extract_references_and_questions(
|
|
350
351
|
send_status_func: Optional[Callable] = None,
|
351
352
|
query_images: Optional[List[str]] = None,
|
352
353
|
agent: Agent = None,
|
354
|
+
tracer: dict = {},
|
353
355
|
):
|
354
356
|
user = request.user.object if request.user.is_authenticated else None
|
355
357
|
|
356
358
|
# Initialize Variables
|
357
|
-
compiled_references: List[
|
359
|
+
compiled_references: List[dict[str, str]] = []
|
358
360
|
inferred_queries: List[str] = []
|
359
361
|
|
360
362
|
agent_has_entries = False
|
@@ -383,9 +385,7 @@ async def extract_references_and_questions(
|
|
383
385
|
return
|
384
386
|
|
385
387
|
# Extract filter terms from user message
|
386
|
-
defiltered_query = q
|
387
|
-
for filter in [DateFilter(), WordFilter(), FileFilter()]:
|
388
|
-
defiltered_query = filter.defilter(defiltered_query)
|
388
|
+
defiltered_query = defilter_query(q)
|
389
389
|
filters_in_query = q.replace(defiltered_query, "").strip()
|
390
390
|
conversation = await sync_to_async(ConversationAdapters.get_conversation_by_id)(conversation_id)
|
391
391
|
|
@@ -425,6 +425,7 @@ async def extract_references_and_questions(
|
|
425
425
|
user=user,
|
426
426
|
max_prompt_size=conversation_config.max_prompt_size,
|
427
427
|
personality_context=personality_context,
|
428
|
+
tracer=tracer,
|
428
429
|
)
|
429
430
|
elif conversation_config.model_type == ChatModelOptions.ModelType.OPENAI:
|
430
431
|
openai_chat_config = conversation_config.openai_config
|
@@ -442,6 +443,7 @@ async def extract_references_and_questions(
|
|
442
443
|
query_images=query_images,
|
443
444
|
vision_enabled=vision_enabled,
|
444
445
|
personality_context=personality_context,
|
446
|
+
tracer=tracer,
|
445
447
|
)
|
446
448
|
elif conversation_config.model_type == ChatModelOptions.ModelType.ANTHROPIC:
|
447
449
|
api_key = conversation_config.openai_config.api_key
|
@@ -456,6 +458,7 @@ async def extract_references_and_questions(
|
|
456
458
|
user=user,
|
457
459
|
vision_enabled=vision_enabled,
|
458
460
|
personality_context=personality_context,
|
461
|
+
tracer=tracer,
|
459
462
|
)
|
460
463
|
elif conversation_config.model_type == ChatModelOptions.ModelType.GOOGLE:
|
461
464
|
api_key = conversation_config.openai_config.api_key
|
@@ -471,6 +474,7 @@ async def extract_references_and_questions(
|
|
471
474
|
user=user,
|
472
475
|
vision_enabled=vision_enabled,
|
473
476
|
personality_context=personality_context,
|
477
|
+
tracer=tracer,
|
474
478
|
)
|
475
479
|
|
476
480
|
# Collate search results as context for GPT
|
@@ -497,7 +501,8 @@ async def extract_references_and_questions(
|
|
497
501
|
)
|
498
502
|
search_results = text_search.deduplicated_search_responses(search_results)
|
499
503
|
compiled_references = [
|
500
|
-
{"compiled": item.additional["compiled"], "file": item.additional["file"]}
|
504
|
+
{"query": q, "compiled": item.additional["compiled"], "file": item.additional["file"]}
|
505
|
+
for q, item in zip(inferred_queries, search_results)
|
501
506
|
]
|
502
507
|
|
503
508
|
yield compiled_references, inferred_queries, defiltered_query
|