khoj 1.28.3__py3-none-any.whl → 1.28.4.dev92__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +10 -14
- khoj/database/adapters/__init__.py +128 -44
- khoj/database/admin.py +6 -3
- khoj/database/management/commands/change_default_model.py +7 -72
- khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
- khoj/database/models/__init__.py +4 -6
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1603-dc5fd983dbcd070d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1970-c78f6acc8e16e30b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/2261-748f7c327df3c8c1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3124-a4cea2eda163128d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3803-d74118a2d0182c52.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5538-36aa824a75519c5b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5961-3c104d9736b7902b.js +3 -0
- khoj/interface/compiled/_next/static/chunks/8423-ebfa9bb9e2424ca3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9417-32c4db52ca42e681.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e9838b642913a071.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/page-4353b1a532795ad1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-d3edae545a1b5393.js → page-c9f13c865e739607.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-b0e7ff4baa3b5265.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-45720e1ed71e3ef5.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/{layout-d0f0a9067427fb20.js → layout-86561d2fac35a91a.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-ea462e20376b6dce.js → page-ecb8e1c192aa8834.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-ea6b73fdaf9b24ca.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/{page-a5c277eff207959e.js → page-8e28deacb61f75aa.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{layout-a8f33dfe92f997fb.js → layout-254eaaf916449a60.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/page-2fab613a557d3cc5.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-cf7445cf0326bda3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-30376aa7e9cfa342.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{main-f84cd3c1873cd842.js → main-1ea5c2e0fdef4626.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-8beec5b51cabb39a.js → webpack-27cf153c35b1338d.js} +1 -1
- khoj/interface/compiled/_next/static/css/{467a524c75e7d7c0.css → 0e9d53dcd7f11342.css} +1 -1
- khoj/interface/compiled/_next/static/css/{26c1c33d0423a7d8.css → 1f293605f2871853.css} +1 -1
- khoj/interface/compiled/_next/static/css/2d097a35da6bfe8d.css +1 -0
- khoj/interface/compiled/_next/static/css/80bd6301fc657983.css +1 -0
- khoj/interface/compiled/_next/static/css/ed437164d77aa600.css +25 -0
- khoj/interface/compiled/_next/static/media/5455839c73f146e7-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/5984b96ba4822821-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/684adc3dde1b03f1-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/82e3b9a1bdaf0c26-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/8d1ea331386a0db8-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/91475f6526542a4f-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/b98b13dbc1c3b59c-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/c824d7a20139e39d-s.woff2 +0 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +3 -3
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +3 -3
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +3 -3
- khoj/processor/content/docx/docx_to_entries.py +27 -21
- khoj/processor/content/github/github_to_entries.py +2 -2
- khoj/processor/content/images/image_to_entries.py +2 -2
- khoj/processor/content/markdown/markdown_to_entries.py +2 -2
- khoj/processor/content/notion/notion_to_entries.py +2 -2
- khoj/processor/content/org_mode/org_to_entries.py +2 -2
- khoj/processor/content/org_mode/orgnode.py +1 -1
- khoj/processor/content/pdf/pdf_to_entries.py +37 -29
- khoj/processor/content/plaintext/plaintext_to_entries.py +2 -2
- khoj/processor/content/text_to_entries.py +3 -4
- khoj/processor/conversation/anthropic/anthropic_chat.py +9 -1
- khoj/processor/conversation/google/gemini_chat.py +15 -2
- khoj/processor/conversation/google/utils.py +3 -1
- khoj/processor/conversation/offline/chat_model.py +4 -0
- khoj/processor/conversation/openai/gpt.py +6 -1
- khoj/processor/conversation/prompts.py +72 -13
- khoj/processor/conversation/utils.py +80 -13
- khoj/processor/image/generate.py +2 -0
- khoj/processor/tools/online_search.py +68 -18
- khoj/processor/tools/run_code.py +54 -20
- khoj/routers/api.py +10 -4
- khoj/routers/api_agents.py +8 -10
- khoj/routers/api_chat.py +89 -24
- khoj/routers/api_content.py +80 -8
- khoj/routers/helpers.py +176 -60
- khoj/routers/notion.py +1 -1
- khoj/routers/research.py +73 -31
- khoj/routers/web_client.py +0 -10
- khoj/search_type/text_search.py +3 -7
- khoj/utils/cli.py +2 -2
- khoj/utils/fs_syncer.py +2 -1
- khoj/utils/helpers.py +6 -3
- khoj/utils/rawconfig.py +32 -0
- khoj/utils/state.py +2 -1
- {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/METADATA +3 -3
- {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/RECORD +99 -105
- {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/WHEEL +1 -1
- khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1467-b331e469fe411347.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1603-c1568f45947e9f2c.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1970-d44050bf658ae5cc.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3110-ef2cacd1b8d79ad8.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3423-f4b7df2f6f3362f7.js +0 -1
- khoj/interface/compiled/_next/static/chunks/394-6bcb8c429f168f21.js +0 -3
- khoj/interface/compiled/_next/static/chunks/7113-f2e114d7034a0835.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8423-da57554315eebcbe.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8840-b8d7b9f0923c6651.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9417-0d0fc7eb49a86abb.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-75636ab3a413fa8e.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/page-adbf3cd470da248f.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-96fcf62857bf8f30.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-222d348681b848a5.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/layout-7b30c541c05fb904.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-bded0868a08ac4ba.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-3720f1362310bebb.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/page-210bd54db4841333.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-2df56074e42adaa0.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-a21b7e8890ed1209.js +0 -1
- khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +0 -1
- khoj/interface/compiled/_next/static/css/553f9cdcc7a2bcd6.css +0 -1
- khoj/interface/compiled/_next/static/css/a795ee88875f4853.css +0 -25
- khoj/interface/compiled/_next/static/css/afd3d45cc65d55d8.css +0 -1
- khoj/interface/compiled/_next/static/media/0e790e04fd40ad16-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/4221e1667cd19c7d-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/6c276159aa0eb14b-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/6cc0b9500e4f9168-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/9d9319a7a2ac39c6-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/a75c8ea86756d52d-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/abce7c400ca31a51-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/f759c939737fb668-s.woff2 +0 -0
- khoj/interface/compiled/factchecker/index.html +0 -1
- khoj/interface/compiled/factchecker/index.txt +0 -7
- /khoj/interface/compiled/_next/static/{EfnEiWDle86AUcxEdEFgO → t_2jovvUVve0Gvc3FqpT9}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{EfnEiWDle86AUcxEdEFgO → t_2jovvUVve0Gvc3FqpT9}/_ssgManifest.js +0 -0
- {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/entry_points.txt +0 -0
- {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/licenses/LICENSE +0 -0
khoj/processor/tools/run_code.py
CHANGED
@@ -1,14 +1,16 @@
|
|
1
|
-
import
|
1
|
+
import base64
|
2
2
|
import datetime
|
3
3
|
import json
|
4
4
|
import logging
|
5
|
+
import mimetypes
|
5
6
|
import os
|
6
|
-
from
|
7
|
+
from pathlib import Path
|
8
|
+
from typing import Any, Callable, List, NamedTuple, Optional
|
7
9
|
|
8
10
|
import aiohttp
|
9
11
|
|
10
|
-
from khoj.database.adapters import
|
11
|
-
from khoj.database.models import Agent, KhojUser
|
12
|
+
from khoj.database.adapters import FileObjectAdapters
|
13
|
+
from khoj.database.models import Agent, FileObject, KhojUser
|
12
14
|
from khoj.processor.conversation import prompts
|
13
15
|
from khoj.processor.conversation.utils import (
|
14
16
|
ChatEvent,
|
@@ -17,7 +19,7 @@ from khoj.processor.conversation.utils import (
|
|
17
19
|
construct_chat_history,
|
18
20
|
)
|
19
21
|
from khoj.routers.helpers import send_message_to_model_wrapper
|
20
|
-
from khoj.utils.helpers import timer
|
22
|
+
from khoj.utils.helpers import is_none_or_empty, timer
|
21
23
|
from khoj.utils.rawconfig import LocationData
|
22
24
|
|
23
25
|
logger = logging.getLogger(__name__)
|
@@ -26,6 +28,12 @@ logger = logging.getLogger(__name__)
|
|
26
28
|
SANDBOX_URL = os.getenv("KHOJ_TERRARIUM_URL", "http://localhost:8080")
|
27
29
|
|
28
30
|
|
31
|
+
class GeneratedCode(NamedTuple):
|
32
|
+
code: str
|
33
|
+
input_files: List[str]
|
34
|
+
input_links: List[str]
|
35
|
+
|
36
|
+
|
29
37
|
async def run_code(
|
30
38
|
query: str,
|
31
39
|
conversation_history: dict,
|
@@ -36,15 +44,16 @@ async def run_code(
|
|
36
44
|
query_images: List[str] = None,
|
37
45
|
agent: Agent = None,
|
38
46
|
sandbox_url: str = SANDBOX_URL,
|
47
|
+
query_files: str = None,
|
39
48
|
tracer: dict = {},
|
40
49
|
):
|
41
50
|
# Generate Code
|
42
51
|
if send_status_func:
|
43
|
-
async for event in send_status_func(f"**Generate code
|
52
|
+
async for event in send_status_func(f"**Generate code snippet** for {query}"):
|
44
53
|
yield {ChatEvent.STATUS: event}
|
45
54
|
try:
|
46
55
|
with timer("Chat actor: Generate programs to execute", logger):
|
47
|
-
|
56
|
+
generated_code = await generate_python_code(
|
48
57
|
query,
|
49
58
|
conversation_history,
|
50
59
|
context,
|
@@ -53,19 +62,31 @@ async def run_code(
|
|
53
62
|
query_images,
|
54
63
|
agent,
|
55
64
|
tracer,
|
65
|
+
query_files,
|
56
66
|
)
|
57
67
|
except Exception as e:
|
58
68
|
raise ValueError(f"Failed to generate code for {query} with error: {e}")
|
59
69
|
|
70
|
+
# Prepare Input Data
|
71
|
+
input_data = []
|
72
|
+
user_input_files: List[FileObject] = []
|
73
|
+
for input_file in generated_code.input_files:
|
74
|
+
user_input_files += await FileObjectAdapters.aget_file_objects_by_name(user, input_file)
|
75
|
+
for f in user_input_files:
|
76
|
+
input_data.append(
|
77
|
+
{
|
78
|
+
"filename": os.path.basename(f.file_name),
|
79
|
+
"b64_data": base64.b64encode(f.raw_text.encode("utf-8")).decode("utf-8"),
|
80
|
+
}
|
81
|
+
)
|
82
|
+
|
60
83
|
# Run Code
|
61
84
|
if send_status_func:
|
62
|
-
async for event in send_status_func(f"**Running
|
85
|
+
async for event in send_status_func(f"**Running code snippet**"):
|
63
86
|
yield {ChatEvent.STATUS: event}
|
64
87
|
try:
|
65
|
-
|
66
|
-
|
67
|
-
results = await asyncio.gather(*tasks)
|
68
|
-
for result in results:
|
88
|
+
with timer("Chat actor: Execute generated program", logger, log_level=logging.INFO):
|
89
|
+
result = await execute_sandboxed_python(generated_code.code, input_data, sandbox_url)
|
69
90
|
code = result.pop("code")
|
70
91
|
logger.info(f"Executed Code:\n--@@--\n{code}\n--@@--Result:\n--@@--\n{result}\n--@@--")
|
71
92
|
yield {query: {"code": code, "results": result}}
|
@@ -79,13 +100,13 @@ async def generate_python_code(
|
|
79
100
|
context: str,
|
80
101
|
location_data: LocationData,
|
81
102
|
user: KhojUser,
|
82
|
-
query_images:
|
103
|
+
query_images: list[str] = None,
|
83
104
|
agent: Agent = None,
|
84
105
|
tracer: dict = {},
|
85
|
-
|
106
|
+
query_files: str = None,
|
107
|
+
) -> GeneratedCode:
|
86
108
|
location = f"{location_data}" if location_data else "Unknown"
|
87
109
|
username = prompts.user_name.format(name=user.get_full_name()) if user.get_full_name() else ""
|
88
|
-
subscribed = await ais_user_subscribed(user)
|
89
110
|
chat_history = construct_chat_history(conversation_history)
|
90
111
|
|
91
112
|
utc_date = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d")
|
@@ -109,32 +130,45 @@ async def generate_python_code(
|
|
109
130
|
response_type="json_object",
|
110
131
|
user=user,
|
111
132
|
tracer=tracer,
|
133
|
+
query_files=query_files,
|
112
134
|
)
|
113
135
|
|
114
136
|
# Validate that the response is a non-empty, JSON-serializable list
|
115
137
|
response = clean_json(response)
|
116
138
|
response = json.loads(response)
|
117
|
-
|
139
|
+
code = response.get("code", "").strip()
|
140
|
+
input_files = response.get("input_files", [])
|
141
|
+
input_links = response.get("input_links", [])
|
118
142
|
|
119
|
-
if not isinstance(
|
143
|
+
if not isinstance(code, str) or is_none_or_empty(code):
|
120
144
|
raise ValueError
|
121
|
-
return
|
145
|
+
return GeneratedCode(code, input_files, input_links)
|
122
146
|
|
123
147
|
|
124
|
-
async def execute_sandboxed_python(code: str, sandbox_url: str = SANDBOX_URL) -> dict[str, Any]:
|
148
|
+
async def execute_sandboxed_python(code: str, input_data: list[dict], sandbox_url: str = SANDBOX_URL) -> dict[str, Any]:
|
125
149
|
"""
|
126
150
|
Takes code to run as a string and calls the terrarium API to execute it.
|
127
151
|
Returns the result of the code execution as a dictionary.
|
152
|
+
|
153
|
+
Reference data i/o format based on Terrarium example client code at:
|
154
|
+
https://github.com/cohere-ai/cohere-terrarium/blob/main/example-clients/python/terrarium_client.py
|
128
155
|
"""
|
129
156
|
headers = {"Content-Type": "application/json"}
|
130
157
|
cleaned_code = clean_code_python(code)
|
131
|
-
data = {"code": cleaned_code}
|
158
|
+
data = {"code": cleaned_code, "files": input_data}
|
132
159
|
|
133
160
|
async with aiohttp.ClientSession() as session:
|
134
161
|
async with session.post(sandbox_url, json=data, headers=headers) as response:
|
135
162
|
if response.status == 200:
|
136
163
|
result: dict[str, Any] = await response.json()
|
137
164
|
result["code"] = cleaned_code
|
165
|
+
# Store decoded output files
|
166
|
+
for output_file in result.get("output_files", []):
|
167
|
+
# Decode text files as UTF-8
|
168
|
+
if mimetypes.guess_type(output_file["filename"])[0].startswith("text/") or Path(
|
169
|
+
output_file["filename"]
|
170
|
+
).suffix in [".org", ".md", ".json"]:
|
171
|
+
output_file["b64_data"] = base64.b64decode(output_file["b64_data"]).decode("utf-8")
|
138
172
|
return result
|
139
173
|
else:
|
140
174
|
return {
|
khoj/routers/api.py
CHANGED
@@ -6,7 +6,7 @@ import os
|
|
6
6
|
import threading
|
7
7
|
import time
|
8
8
|
import uuid
|
9
|
-
from typing import Any, Callable, List, Optional, Union
|
9
|
+
from typing import Any, Callable, List, Optional, Set, Union
|
10
10
|
|
11
11
|
import cron_descriptor
|
12
12
|
import pytz
|
@@ -26,7 +26,6 @@ from khoj.database.adapters import (
|
|
26
26
|
ConversationAdapters,
|
27
27
|
EntryAdapters,
|
28
28
|
get_default_search_model,
|
29
|
-
get_user_default_search_model,
|
30
29
|
get_user_photo,
|
31
30
|
)
|
32
31
|
from khoj.database.models import (
|
@@ -151,7 +150,7 @@ async def execute_search(
|
|
151
150
|
encoded_asymmetric_query = None
|
152
151
|
if t != SearchType.Image:
|
153
152
|
with timer("Encoding query took", logger=logger):
|
154
|
-
search_model = await sync_to_async(
|
153
|
+
search_model = await sync_to_async(get_default_search_model)()
|
155
154
|
encoded_asymmetric_query = state.embeddings_model[search_model.name].embed_query(defiltered_query)
|
156
155
|
|
157
156
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
@@ -213,7 +212,7 @@ def update(
|
|
213
212
|
logger.warning(error_msg)
|
214
213
|
raise HTTPException(status_code=500, detail=error_msg)
|
215
214
|
try:
|
216
|
-
initialize_content(regenerate=force, search_type=t
|
215
|
+
initialize_content(user=user, regenerate=force, search_type=t)
|
217
216
|
except Exception as e:
|
218
217
|
error_msg = f"🚨 Failed to update server via API: {e}"
|
219
218
|
logger.error(error_msg, exc_info=True)
|
@@ -350,7 +349,9 @@ async def extract_references_and_questions(
|
|
350
349
|
location_data: LocationData = None,
|
351
350
|
send_status_func: Optional[Callable] = None,
|
352
351
|
query_images: Optional[List[str]] = None,
|
352
|
+
previous_inferred_queries: Set = set(),
|
353
353
|
agent: Agent = None,
|
354
|
+
query_files: str = None,
|
354
355
|
tracer: dict = {},
|
355
356
|
):
|
356
357
|
user = request.user.object if request.user.is_authenticated else None
|
@@ -425,6 +426,7 @@ async def extract_references_and_questions(
|
|
425
426
|
user=user,
|
426
427
|
max_prompt_size=conversation_config.max_prompt_size,
|
427
428
|
personality_context=personality_context,
|
429
|
+
query_files=query_files,
|
428
430
|
tracer=tracer,
|
429
431
|
)
|
430
432
|
elif conversation_config.model_type == ChatModelOptions.ModelType.OPENAI:
|
@@ -443,6 +445,7 @@ async def extract_references_and_questions(
|
|
443
445
|
query_images=query_images,
|
444
446
|
vision_enabled=vision_enabled,
|
445
447
|
personality_context=personality_context,
|
448
|
+
query_files=query_files,
|
446
449
|
tracer=tracer,
|
447
450
|
)
|
448
451
|
elif conversation_config.model_type == ChatModelOptions.ModelType.ANTHROPIC:
|
@@ -458,6 +461,7 @@ async def extract_references_and_questions(
|
|
458
461
|
user=user,
|
459
462
|
vision_enabled=vision_enabled,
|
460
463
|
personality_context=personality_context,
|
464
|
+
query_files=query_files,
|
461
465
|
tracer=tracer,
|
462
466
|
)
|
463
467
|
elif conversation_config.model_type == ChatModelOptions.ModelType.GOOGLE:
|
@@ -474,10 +478,12 @@ async def extract_references_and_questions(
|
|
474
478
|
user=user,
|
475
479
|
vision_enabled=vision_enabled,
|
476
480
|
personality_context=personality_context,
|
481
|
+
query_files=query_files,
|
477
482
|
tracer=tracer,
|
478
483
|
)
|
479
484
|
|
480
485
|
# Collate search results as context for GPT
|
486
|
+
inferred_queries = list(set(inferred_queries) - previous_inferred_queries)
|
481
487
|
with timer("Searching knowledge base took", logger):
|
482
488
|
search_results = []
|
483
489
|
logger.info(f"🔍 Searching knowledge base with queries: {inferred_queries}")
|
khoj/routers/api_agents.py
CHANGED
@@ -183,7 +183,7 @@ async def delete_agent(
|
|
183
183
|
|
184
184
|
|
185
185
|
@api_agents.post("", response_class=Response)
|
186
|
-
@requires(["authenticated"
|
186
|
+
@requires(["authenticated"])
|
187
187
|
async def create_agent(
|
188
188
|
request: Request,
|
189
189
|
common: CommonQueryParams,
|
@@ -191,10 +191,9 @@ async def create_agent(
|
|
191
191
|
) -> Response:
|
192
192
|
user: KhojUser = request.user.object
|
193
193
|
|
194
|
-
is_safe_prompt, reason =
|
195
|
-
|
196
|
-
|
197
|
-
is_safe_prompt, reason = await acheck_if_safe_prompt(body.persona)
|
194
|
+
is_safe_prompt, reason = await acheck_if_safe_prompt(
|
195
|
+
body.persona, user, lax=body.privacy_level == Agent.PrivacyLevel.PRIVATE
|
196
|
+
)
|
198
197
|
|
199
198
|
if not is_safe_prompt:
|
200
199
|
return Response(
|
@@ -236,7 +235,7 @@ async def create_agent(
|
|
236
235
|
|
237
236
|
|
238
237
|
@api_agents.patch("", response_class=Response)
|
239
|
-
@requires(["authenticated"
|
238
|
+
@requires(["authenticated"])
|
240
239
|
async def update_agent(
|
241
240
|
request: Request,
|
242
241
|
common: CommonQueryParams,
|
@@ -244,10 +243,9 @@ async def update_agent(
|
|
244
243
|
) -> Response:
|
245
244
|
user: KhojUser = request.user.object
|
246
245
|
|
247
|
-
is_safe_prompt, reason =
|
248
|
-
|
249
|
-
|
250
|
-
is_safe_prompt, reason = await acheck_if_safe_prompt(body.persona)
|
246
|
+
is_safe_prompt, reason = await acheck_if_safe_prompt(
|
247
|
+
body.persona, user, lax=body.privacy_level == Agent.PrivacyLevel.PRIVATE
|
248
|
+
)
|
251
249
|
|
252
250
|
if not is_safe_prompt:
|
253
251
|
return Response(
|
khoj/routers/api_chat.py
CHANGED
@@ -19,7 +19,6 @@ from khoj.database.adapters import (
|
|
19
19
|
AgentAdapters,
|
20
20
|
ConversationAdapters,
|
21
21
|
EntryAdapters,
|
22
|
-
FileObjectAdapters,
|
23
22
|
PublicConversationAdapters,
|
24
23
|
aget_user_name,
|
25
24
|
)
|
@@ -28,7 +27,11 @@ from khoj.processor.conversation.prompts import help_message, no_entries_found
|
|
28
27
|
from khoj.processor.conversation.utils import defilter_query, save_to_conversation_log
|
29
28
|
from khoj.processor.image.generate import text_to_image
|
30
29
|
from khoj.processor.speech.text_to_speech import generate_text_to_speech
|
31
|
-
from khoj.processor.tools.online_search import
|
30
|
+
from khoj.processor.tools.online_search import (
|
31
|
+
deduplicate_organic_results,
|
32
|
+
read_webpages,
|
33
|
+
search_online,
|
34
|
+
)
|
32
35
|
from khoj.processor.tools.run_code import run_code
|
33
36
|
from khoj.routers.api import extract_references_and_questions
|
34
37
|
from khoj.routers.email import send_query_feedback
|
@@ -41,12 +44,13 @@ from khoj.routers.helpers import (
|
|
41
44
|
ConversationCommandRateLimiter,
|
42
45
|
DeleteMessageRequestBody,
|
43
46
|
FeedbackData,
|
47
|
+
acreate_title_from_history,
|
44
48
|
agenerate_chat_response,
|
45
49
|
aget_relevant_information_sources,
|
46
50
|
aget_relevant_output_modes,
|
47
51
|
construct_automation_created_message,
|
48
52
|
create_automation,
|
49
|
-
|
53
|
+
gather_raw_query_files,
|
50
54
|
generate_excalidraw_diagram,
|
51
55
|
generate_summary_from_files,
|
52
56
|
get_conversation_command,
|
@@ -72,7 +76,12 @@ from khoj.utils.helpers import (
|
|
72
76
|
get_device,
|
73
77
|
is_none_or_empty,
|
74
78
|
)
|
75
|
-
from khoj.utils.rawconfig import
|
79
|
+
from khoj.utils.rawconfig import (
|
80
|
+
ChatRequestBody,
|
81
|
+
FileFilterRequest,
|
82
|
+
FilesFilterRequest,
|
83
|
+
LocationData,
|
84
|
+
)
|
76
85
|
|
77
86
|
# Initialize Router
|
78
87
|
logger = logging.getLogger(__name__)
|
@@ -370,7 +379,7 @@ def fork_public_conversation(
|
|
370
379
|
{
|
371
380
|
"status": "ok",
|
372
381
|
"next_url": redirect_uri,
|
373
|
-
"conversation_id": new_conversation.id,
|
382
|
+
"conversation_id": str(new_conversation.id),
|
374
383
|
}
|
375
384
|
),
|
376
385
|
)
|
@@ -526,6 +535,32 @@ async def set_conversation_title(
|
|
526
535
|
)
|
527
536
|
|
528
537
|
|
538
|
+
@api_chat.post("/title")
|
539
|
+
@requires(["authenticated"])
|
540
|
+
async def generate_chat_title(
|
541
|
+
request: Request,
|
542
|
+
common: CommonQueryParams,
|
543
|
+
conversation_id: str,
|
544
|
+
):
|
545
|
+
user: KhojUser = request.user.object
|
546
|
+
conversation = await ConversationAdapters.aget_conversation_by_user(user=user, conversation_id=conversation_id)
|
547
|
+
|
548
|
+
# Conversation.title is explicitly set by the user. Do not override.
|
549
|
+
if conversation.title:
|
550
|
+
return {"status": "ok", "title": conversation.title}
|
551
|
+
|
552
|
+
if not conversation:
|
553
|
+
raise HTTPException(status_code=404, detail="Conversation not found")
|
554
|
+
|
555
|
+
new_title = await acreate_title_from_history(request.user.object, conversation=conversation)
|
556
|
+
|
557
|
+
conversation.slug = new_title
|
558
|
+
|
559
|
+
await conversation.asave()
|
560
|
+
|
561
|
+
return {"status": "ok", "title": new_title}
|
562
|
+
|
563
|
+
|
529
564
|
@api_chat.delete("/conversation/message", response_class=Response)
|
530
565
|
@requires(["authenticated"])
|
531
566
|
def delete_message(request: Request, delete_request: DeleteMessageRequestBody) -> Response:
|
@@ -567,6 +602,7 @@ async def chat(
|
|
567
602
|
country_code = body.country_code or get_country_code_from_timezone(body.timezone)
|
568
603
|
timezone = body.timezone
|
569
604
|
raw_images = body.images
|
605
|
+
raw_query_files = body.files
|
570
606
|
|
571
607
|
async def event_generator(q: str, images: list[str]):
|
572
608
|
start_time = time.perf_counter()
|
@@ -578,6 +614,7 @@ async def chat(
|
|
578
614
|
q = unquote(q)
|
579
615
|
train_of_thought = []
|
580
616
|
nonlocal conversation_id
|
617
|
+
nonlocal raw_query_files
|
581
618
|
|
582
619
|
tracer: dict = {
|
583
620
|
"mid": turn_id,
|
@@ -597,6 +634,11 @@ async def chat(
|
|
597
634
|
if uploaded_image:
|
598
635
|
uploaded_images.append(uploaded_image)
|
599
636
|
|
637
|
+
query_files: Dict[str, str] = {}
|
638
|
+
if raw_query_files:
|
639
|
+
for file in raw_query_files:
|
640
|
+
query_files[file.name] = file.content
|
641
|
+
|
600
642
|
async def send_event(event_type: ChatEvent, data: str | dict):
|
601
643
|
nonlocal connection_alive, ttft, train_of_thought
|
602
644
|
if not connection_alive or await request.is_disconnected():
|
@@ -707,6 +749,8 @@ async def chat(
|
|
707
749
|
## Extract Document References
|
708
750
|
compiled_references: List[Any] = []
|
709
751
|
inferred_queries: List[Any] = []
|
752
|
+
file_filters = conversation.file_filters if conversation and conversation.file_filters else []
|
753
|
+
attached_file_context = gather_raw_query_files(query_files)
|
710
754
|
|
711
755
|
if conversation_commands == [ConversationCommand.Default] or is_automated_task:
|
712
756
|
conversation_commands = await aget_relevant_information_sources(
|
@@ -716,6 +760,7 @@ async def chat(
|
|
716
760
|
user=user,
|
717
761
|
query_images=uploaded_images,
|
718
762
|
agent=agent,
|
763
|
+
query_files=attached_file_context,
|
719
764
|
tracer=tracer,
|
720
765
|
)
|
721
766
|
|
@@ -738,8 +783,13 @@ async def chat(
|
|
738
783
|
conversation_commands.append(mode)
|
739
784
|
|
740
785
|
for cmd in conversation_commands:
|
741
|
-
|
742
|
-
|
786
|
+
try:
|
787
|
+
await conversation_command_rate_limiter.update_and_check_if_valid(request, cmd)
|
788
|
+
q = q.replace(f"/{cmd.value}", "").strip()
|
789
|
+
except HTTPException as e:
|
790
|
+
async for result in send_llm_response(str(e.detail)):
|
791
|
+
yield result
|
792
|
+
return
|
743
793
|
|
744
794
|
defiltered_query = defilter_query(q)
|
745
795
|
|
@@ -756,6 +806,7 @@ async def chat(
|
|
756
806
|
user_name=user_name,
|
757
807
|
location=location,
|
758
808
|
file_filters=conversation.file_filters if conversation else [],
|
809
|
+
query_files=attached_file_context,
|
759
810
|
tracer=tracer,
|
760
811
|
):
|
761
812
|
if isinstance(research_result, InformationCollectionIteration):
|
@@ -773,11 +824,8 @@ async def chat(
|
|
773
824
|
yield research_result
|
774
825
|
|
775
826
|
# researched_results = await extract_relevant_info(q, researched_results, agent)
|
776
|
-
|
777
|
-
|
778
|
-
for cmd in conversation_commands:
|
779
|
-
await conversation_command_rate_limiter.update_and_check_if_valid(request, cmd)
|
780
|
-
q = q.replace(f"/{cmd.value}", "").strip()
|
827
|
+
if state.verbose > 1:
|
828
|
+
logger.debug(f"Researched Results: {researched_results}")
|
781
829
|
|
782
830
|
used_slash_summarize = conversation_commands == [ConversationCommand.Summarize]
|
783
831
|
file_filters = conversation.file_filters if conversation else []
|
@@ -798,10 +846,6 @@ async def chat(
|
|
798
846
|
response_log = "No files selected for summarization. Please add files using the section on the left."
|
799
847
|
async for result in send_llm_response(response_log):
|
800
848
|
yield result
|
801
|
-
elif len(file_filters) > 1 and not agent_has_entries:
|
802
|
-
response_log = "Only one file can be selected for summarization."
|
803
|
-
async for result in send_llm_response(response_log):
|
804
|
-
yield result
|
805
849
|
else:
|
806
850
|
async for response in generate_summary_from_files(
|
807
851
|
q=q,
|
@@ -811,6 +855,7 @@ async def chat(
|
|
811
855
|
query_images=uploaded_images,
|
812
856
|
agent=agent,
|
813
857
|
send_status_func=partial(send_event, ChatEvent.STATUS),
|
858
|
+
query_files=attached_file_context,
|
814
859
|
tracer=tracer,
|
815
860
|
):
|
816
861
|
if isinstance(response, dict) and ChatEvent.STATUS in response:
|
@@ -831,8 +876,9 @@ async def chat(
|
|
831
876
|
client_application=request.user.client_app,
|
832
877
|
conversation_id=conversation_id,
|
833
878
|
query_images=uploaded_images,
|
834
|
-
tracer=tracer,
|
835
879
|
train_of_thought=train_of_thought,
|
880
|
+
raw_query_files=raw_query_files,
|
881
|
+
tracer=tracer,
|
836
882
|
)
|
837
883
|
return
|
838
884
|
|
@@ -876,8 +922,9 @@ async def chat(
|
|
876
922
|
inferred_queries=[query_to_run],
|
877
923
|
automation_id=automation.id,
|
878
924
|
query_images=uploaded_images,
|
879
|
-
tracer=tracer,
|
880
925
|
train_of_thought=train_of_thought,
|
926
|
+
raw_query_files=raw_query_files,
|
927
|
+
tracer=tracer,
|
881
928
|
)
|
882
929
|
async for result in send_llm_response(llm_response):
|
883
930
|
yield result
|
@@ -899,6 +946,7 @@ async def chat(
|
|
899
946
|
partial(send_event, ChatEvent.STATUS),
|
900
947
|
query_images=uploaded_images,
|
901
948
|
agent=agent,
|
949
|
+
query_files=attached_file_context,
|
902
950
|
tracer=tracer,
|
903
951
|
):
|
904
952
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
@@ -944,6 +992,7 @@ async def chat(
|
|
944
992
|
custom_filters,
|
945
993
|
query_images=uploaded_images,
|
946
994
|
agent=agent,
|
995
|
+
query_files=attached_file_context,
|
947
996
|
tracer=tracer,
|
948
997
|
):
|
949
998
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
@@ -969,6 +1018,7 @@ async def chat(
|
|
969
1018
|
partial(send_event, ChatEvent.STATUS),
|
970
1019
|
query_images=uploaded_images,
|
971
1020
|
agent=agent,
|
1021
|
+
query_files=attached_file_context,
|
972
1022
|
tracer=tracer,
|
973
1023
|
):
|
974
1024
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
@@ -1009,6 +1059,7 @@ async def chat(
|
|
1009
1059
|
partial(send_event, ChatEvent.STATUS),
|
1010
1060
|
query_images=uploaded_images,
|
1011
1061
|
agent=agent,
|
1062
|
+
query_files=attached_file_context,
|
1012
1063
|
tracer=tracer,
|
1013
1064
|
):
|
1014
1065
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
@@ -1024,12 +1075,13 @@ async def chat(
|
|
1024
1075
|
)
|
1025
1076
|
|
1026
1077
|
## Send Gathered References
|
1078
|
+
unique_online_results = deduplicate_organic_results(online_results)
|
1027
1079
|
async for result in send_event(
|
1028
1080
|
ChatEvent.REFERENCES,
|
1029
1081
|
{
|
1030
1082
|
"inferredQueries": inferred_queries,
|
1031
1083
|
"context": compiled_references,
|
1032
|
-
"onlineContext":
|
1084
|
+
"onlineContext": unique_online_results,
|
1033
1085
|
"codeContext": code_results,
|
1034
1086
|
},
|
1035
1087
|
):
|
@@ -1048,6 +1100,7 @@ async def chat(
|
|
1048
1100
|
send_status_func=partial(send_event, ChatEvent.STATUS),
|
1049
1101
|
query_images=uploaded_images,
|
1050
1102
|
agent=agent,
|
1103
|
+
query_files=attached_file_context,
|
1051
1104
|
tracer=tracer,
|
1052
1105
|
):
|
1053
1106
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
@@ -1078,9 +1131,11 @@ async def chat(
|
|
1078
1131
|
conversation_id=conversation_id,
|
1079
1132
|
compiled_references=compiled_references,
|
1080
1133
|
online_results=online_results,
|
1134
|
+
code_results=code_results,
|
1081
1135
|
query_images=uploaded_images,
|
1082
|
-
tracer=tracer,
|
1083
1136
|
train_of_thought=train_of_thought,
|
1137
|
+
raw_query_files=raw_query_files,
|
1138
|
+
tracer=tracer,
|
1084
1139
|
)
|
1085
1140
|
content_obj = {
|
1086
1141
|
"intentType": intent_type,
|
@@ -1109,14 +1164,20 @@ async def chat(
|
|
1109
1164
|
user=user,
|
1110
1165
|
agent=agent,
|
1111
1166
|
send_status_func=partial(send_event, ChatEvent.STATUS),
|
1167
|
+
query_files=attached_file_context,
|
1112
1168
|
tracer=tracer,
|
1113
1169
|
):
|
1114
1170
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
1115
1171
|
yield result[ChatEvent.STATUS]
|
1116
1172
|
else:
|
1117
1173
|
better_diagram_description_prompt, excalidraw_diagram_description = result
|
1118
|
-
|
1119
|
-
|
1174
|
+
if better_diagram_description_prompt and excalidraw_diagram_description:
|
1175
|
+
inferred_queries.append(better_diagram_description_prompt)
|
1176
|
+
diagram_description = excalidraw_diagram_description
|
1177
|
+
else:
|
1178
|
+
async for result in send_llm_response(f"Failed to generate diagram. Please try again later."):
|
1179
|
+
yield result
|
1180
|
+
return
|
1120
1181
|
|
1121
1182
|
content_obj = {
|
1122
1183
|
"intentType": intent_type,
|
@@ -1136,9 +1197,11 @@ async def chat(
|
|
1136
1197
|
conversation_id=conversation_id,
|
1137
1198
|
compiled_references=compiled_references,
|
1138
1199
|
online_results=online_results,
|
1200
|
+
code_results=code_results,
|
1139
1201
|
query_images=uploaded_images,
|
1140
|
-
tracer=tracer,
|
1141
1202
|
train_of_thought=train_of_thought,
|
1203
|
+
raw_query_files=raw_query_files,
|
1204
|
+
tracer=tracer,
|
1142
1205
|
)
|
1143
1206
|
|
1144
1207
|
async for result in send_llm_response(json.dumps(content_obj)):
|
@@ -1164,8 +1227,10 @@ async def chat(
|
|
1164
1227
|
user_name,
|
1165
1228
|
researched_results,
|
1166
1229
|
uploaded_images,
|
1167
|
-
tracer,
|
1168
1230
|
train_of_thought,
|
1231
|
+
attached_file_context,
|
1232
|
+
raw_query_files,
|
1233
|
+
tracer,
|
1169
1234
|
)
|
1170
1235
|
|
1171
1236
|
# Send Response
|