khoj 1.27.2.dev15__py3-none-any.whl → 1.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +1 -1
- khoj/database/adapters/__init__.py +50 -12
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1467-b331e469fe411347.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1603-c1568f45947e9f2c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3423-f4b7df2f6f3362f7.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8423-da57554315eebcbe.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-2beaba7c9bb750bd.js → page-5ae1e540bb5be8a9.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-9b5c77e0b0dd772c.js → page-774ae3e033f938cd.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-d8f4c107ad78e9e9.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-1cc42ee55f89fb2e.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/{page-4b6008223ea79955.js → page-07e54186b066f5ce.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/{page-ab2995529ece3140.js → page-9b64f61caa5bd7f9.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-7946cabb9c54e22d.js → page-10b288c103f19468.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-6a01e07fb244c10c.js → page-db775d42e820afb2.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-878569182b3af4c6.js → webpack-8f2abab7b11aa120.js} +1 -1
- khoj/interface/compiled/_next/static/css/{2272c73fc7a3b571.css → 26c1c33d0423a7d8.css} +1 -1
- khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +1 -0
- khoj/interface/compiled/_next/static/css/a795ee88875f4853.css +25 -0
- khoj/interface/compiled/_next/static/css/ddcc0cf73e062476.css +1 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/factchecker/index.html +1 -1
- khoj/interface/compiled/factchecker/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +19 -10
- khoj/processor/conversation/anthropic/utils.py +37 -6
- khoj/processor/conversation/google/gemini_chat.py +23 -13
- khoj/processor/conversation/google/utils.py +34 -10
- khoj/processor/conversation/offline/chat_model.py +48 -16
- khoj/processor/conversation/openai/gpt.py +25 -10
- khoj/processor/conversation/openai/utils.py +50 -9
- khoj/processor/conversation/prompts.py +156 -65
- khoj/processor/conversation/utils.py +306 -6
- khoj/processor/embeddings.py +4 -4
- khoj/processor/image/generate.py +2 -0
- khoj/processor/tools/online_search.py +27 -12
- khoj/processor/tools/run_code.py +144 -0
- khoj/routers/api.py +11 -6
- khoj/routers/api_chat.py +213 -111
- khoj/routers/helpers.py +171 -60
- khoj/routers/research.py +320 -0
- khoj/search_filter/date_filter.py +1 -3
- khoj/search_filter/file_filter.py +1 -2
- khoj/search_type/text_search.py +3 -3
- khoj/utils/helpers.py +24 -2
- khoj/utils/yaml.py +4 -0
- {khoj-1.27.2.dev15.dist-info → khoj-1.28.0.dist-info}/METADATA +3 -2
- {khoj-1.27.2.dev15.dist-info → khoj-1.28.0.dist-info}/RECORD +68 -65
- khoj/interface/compiled/_next/static/chunks/1603-b9d95833e0e025e8.js +0 -1
- khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3423-0b533af8bf6ac218.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9479-ff7d8c4dae2014d1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-151232d8417a1ea1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-798904432c2417c4.js +0 -1
- khoj/interface/compiled/_next/static/css/592ca99f5122e75a.css +0 -1
- khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +0 -25
- khoj/interface/compiled/_next/static/css/d738728883c68af8.css +0 -1
- /khoj/interface/compiled/_next/static/{vcyFRDGArOFXwUVotHIuv → cC7ahn2y_DddSVovjlztj}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{vcyFRDGArOFXwUVotHIuv → cC7ahn2y_DddSVovjlztj}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1970-60c96aed937a4928.js → 1970-d44050bf658ae5cc.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9417-2ca87207387fc790.js → 9417-0d0fc7eb49a86abb.js} +0 -0
- {khoj-1.27.2.dev15.dist-info → khoj-1.28.0.dist-info}/WHEEL +0 -0
- {khoj-1.27.2.dev15.dist-info → khoj-1.28.0.dist-info}/entry_points.txt +0 -0
- {khoj-1.27.2.dev15.dist-info → khoj-1.28.0.dist-info}/licenses/LICENSE +0 -0
khoj/routers/research.py
ADDED
@@ -0,0 +1,320 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
from datetime import datetime
|
4
|
+
from typing import Callable, Dict, List, Optional
|
5
|
+
|
6
|
+
import yaml
|
7
|
+
from fastapi import Request
|
8
|
+
|
9
|
+
from khoj.database.models import Agent, KhojUser
|
10
|
+
from khoj.processor.conversation import prompts
|
11
|
+
from khoj.processor.conversation.utils import (
|
12
|
+
InformationCollectionIteration,
|
13
|
+
clean_json,
|
14
|
+
construct_iteration_history,
|
15
|
+
construct_tool_chat_history,
|
16
|
+
)
|
17
|
+
from khoj.processor.tools.online_search import read_webpages, search_online
|
18
|
+
from khoj.processor.tools.run_code import run_code
|
19
|
+
from khoj.routers.api import extract_references_and_questions
|
20
|
+
from khoj.routers.helpers import (
|
21
|
+
ChatEvent,
|
22
|
+
construct_chat_history,
|
23
|
+
extract_relevant_info,
|
24
|
+
generate_summary_from_files,
|
25
|
+
send_message_to_model_wrapper,
|
26
|
+
)
|
27
|
+
from khoj.utils.helpers import (
|
28
|
+
ConversationCommand,
|
29
|
+
function_calling_description_for_llm,
|
30
|
+
is_none_or_empty,
|
31
|
+
timer,
|
32
|
+
)
|
33
|
+
from khoj.utils.rawconfig import LocationData
|
34
|
+
|
35
|
+
logger = logging.getLogger(__name__)
|
36
|
+
|
37
|
+
|
38
|
+
async def apick_next_tool(
|
39
|
+
query: str,
|
40
|
+
conversation_history: dict,
|
41
|
+
user: KhojUser = None,
|
42
|
+
query_images: List[str] = [],
|
43
|
+
location: LocationData = None,
|
44
|
+
user_name: str = None,
|
45
|
+
agent: Agent = None,
|
46
|
+
previous_iterations_history: str = None,
|
47
|
+
max_iterations: int = 5,
|
48
|
+
send_status_func: Optional[Callable] = None,
|
49
|
+
tracer: dict = {},
|
50
|
+
):
|
51
|
+
"""
|
52
|
+
Given a query, determine which of the available tools the agent should use in order to answer appropriately. One at a time, and it's able to use subsequent iterations to refine the answer.
|
53
|
+
"""
|
54
|
+
|
55
|
+
tool_options = dict()
|
56
|
+
tool_options_str = ""
|
57
|
+
|
58
|
+
agent_tools = agent.input_tools if agent else []
|
59
|
+
|
60
|
+
for tool, description in function_calling_description_for_llm.items():
|
61
|
+
tool_options[tool.value] = description
|
62
|
+
if len(agent_tools) == 0 or tool.value in agent_tools:
|
63
|
+
tool_options_str += f'- "{tool.value}": "{description}"\n'
|
64
|
+
|
65
|
+
chat_history = construct_chat_history(conversation_history, agent_name=agent.name if agent else "Khoj")
|
66
|
+
|
67
|
+
if query_images:
|
68
|
+
query = f"[placeholder for user attached images]\n{query}"
|
69
|
+
|
70
|
+
personality_context = (
|
71
|
+
prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
|
72
|
+
)
|
73
|
+
|
74
|
+
# Extract Past User Message and Inferred Questions from Conversation Log
|
75
|
+
today = datetime.today()
|
76
|
+
location_data = f"{location}" if location else "Unknown"
|
77
|
+
|
78
|
+
function_planning_prompt = prompts.plan_function_execution.format(
|
79
|
+
tools=tool_options_str,
|
80
|
+
chat_history=chat_history,
|
81
|
+
personality_context=personality_context,
|
82
|
+
current_date=today.strftime("%Y-%m-%d"),
|
83
|
+
day_of_week=today.strftime("%A"),
|
84
|
+
username=user_name or "Unknown",
|
85
|
+
location=location_data,
|
86
|
+
previous_iterations=previous_iterations_history,
|
87
|
+
max_iterations=max_iterations,
|
88
|
+
)
|
89
|
+
|
90
|
+
with timer("Chat actor: Infer information sources to refer", logger):
|
91
|
+
response = await send_message_to_model_wrapper(
|
92
|
+
query=query,
|
93
|
+
context=function_planning_prompt,
|
94
|
+
response_type="json_object",
|
95
|
+
user=user,
|
96
|
+
query_images=query_images,
|
97
|
+
tracer=tracer,
|
98
|
+
)
|
99
|
+
|
100
|
+
try:
|
101
|
+
response = clean_json(response)
|
102
|
+
response = json.loads(response)
|
103
|
+
selected_tool = response.get("tool", None)
|
104
|
+
generated_query = response.get("query", None)
|
105
|
+
scratchpad = response.get("scratchpad", None)
|
106
|
+
logger.info(f"Response for determining relevant tools: {response}")
|
107
|
+
if send_status_func:
|
108
|
+
determined_tool_message = "**Determined Tool**: "
|
109
|
+
determined_tool_message += f"{selected_tool}({generated_query})." if selected_tool else "respond."
|
110
|
+
determined_tool_message += f"\nReason: {scratchpad}" if scratchpad else ""
|
111
|
+
async for event in send_status_func(f"{scratchpad}"):
|
112
|
+
yield {ChatEvent.STATUS: event}
|
113
|
+
|
114
|
+
yield InformationCollectionIteration(
|
115
|
+
tool=selected_tool,
|
116
|
+
query=generated_query,
|
117
|
+
)
|
118
|
+
|
119
|
+
except Exception as e:
|
120
|
+
logger.error(f"Invalid response for determining relevant tools: {response}. {e}", exc_info=True)
|
121
|
+
yield InformationCollectionIteration(
|
122
|
+
tool=None,
|
123
|
+
query=None,
|
124
|
+
)
|
125
|
+
|
126
|
+
|
127
|
+
async def execute_information_collection(
|
128
|
+
request: Request,
|
129
|
+
user: KhojUser,
|
130
|
+
query: str,
|
131
|
+
conversation_id: str,
|
132
|
+
conversation_history: dict,
|
133
|
+
query_images: List[str],
|
134
|
+
agent: Agent = None,
|
135
|
+
send_status_func: Optional[Callable] = None,
|
136
|
+
user_name: str = None,
|
137
|
+
location: LocationData = None,
|
138
|
+
file_filters: List[str] = [],
|
139
|
+
tracer: dict = {},
|
140
|
+
):
|
141
|
+
current_iteration = 0
|
142
|
+
MAX_ITERATIONS = 5
|
143
|
+
previous_iterations: List[InformationCollectionIteration] = []
|
144
|
+
while current_iteration < MAX_ITERATIONS:
|
145
|
+
online_results: Dict = dict()
|
146
|
+
code_results: Dict = dict()
|
147
|
+
document_results: List[Dict[str, str]] = []
|
148
|
+
summarize_files: str = ""
|
149
|
+
this_iteration = InformationCollectionIteration(tool=None, query=query)
|
150
|
+
previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
|
151
|
+
|
152
|
+
async for result in apick_next_tool(
|
153
|
+
query,
|
154
|
+
conversation_history,
|
155
|
+
user,
|
156
|
+
query_images,
|
157
|
+
location,
|
158
|
+
user_name,
|
159
|
+
agent,
|
160
|
+
previous_iterations_history,
|
161
|
+
MAX_ITERATIONS,
|
162
|
+
send_status_func,
|
163
|
+
tracer=tracer,
|
164
|
+
):
|
165
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
166
|
+
yield result[ChatEvent.STATUS]
|
167
|
+
elif isinstance(result, InformationCollectionIteration):
|
168
|
+
this_iteration = result
|
169
|
+
|
170
|
+
if this_iteration.tool == ConversationCommand.Notes:
|
171
|
+
this_iteration.context = []
|
172
|
+
document_results = []
|
173
|
+
async for result in extract_references_and_questions(
|
174
|
+
request,
|
175
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Notes),
|
176
|
+
this_iteration.query,
|
177
|
+
7,
|
178
|
+
None,
|
179
|
+
conversation_id,
|
180
|
+
[ConversationCommand.Default],
|
181
|
+
location,
|
182
|
+
send_status_func,
|
183
|
+
query_images,
|
184
|
+
agent=agent,
|
185
|
+
tracer=tracer,
|
186
|
+
):
|
187
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
188
|
+
yield result[ChatEvent.STATUS]
|
189
|
+
elif isinstance(result, tuple):
|
190
|
+
document_results = result[0]
|
191
|
+
this_iteration.context += document_results
|
192
|
+
|
193
|
+
if not is_none_or_empty(document_results):
|
194
|
+
try:
|
195
|
+
distinct_files = {d["file"] for d in document_results}
|
196
|
+
distinct_headings = set([d["compiled"].split("\n")[0] for d in document_results if "compiled" in d])
|
197
|
+
# Strip only leading # from headings
|
198
|
+
headings_str = "\n- " + "\n- ".join(distinct_headings).replace("#", "")
|
199
|
+
async for result in send_status_func(
|
200
|
+
f"**Found {len(distinct_headings)} Notes Across {len(distinct_files)} Files**: {headings_str}"
|
201
|
+
):
|
202
|
+
yield result
|
203
|
+
except Exception as e:
|
204
|
+
logger.error(f"Error extracting document references: {e}", exc_info=True)
|
205
|
+
|
206
|
+
elif this_iteration.tool == ConversationCommand.Online:
|
207
|
+
async for result in search_online(
|
208
|
+
this_iteration.query,
|
209
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Online),
|
210
|
+
location,
|
211
|
+
user,
|
212
|
+
send_status_func,
|
213
|
+
[],
|
214
|
+
max_webpages_to_read=0,
|
215
|
+
query_images=query_images,
|
216
|
+
agent=agent,
|
217
|
+
tracer=tracer,
|
218
|
+
):
|
219
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
220
|
+
yield result[ChatEvent.STATUS]
|
221
|
+
else:
|
222
|
+
online_results: Dict[str, Dict] = result # type: ignore
|
223
|
+
this_iteration.onlineContext = online_results
|
224
|
+
|
225
|
+
elif this_iteration.tool == ConversationCommand.Webpage:
|
226
|
+
try:
|
227
|
+
async for result in read_webpages(
|
228
|
+
this_iteration.query,
|
229
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Webpage),
|
230
|
+
location,
|
231
|
+
user,
|
232
|
+
send_status_func,
|
233
|
+
query_images=query_images,
|
234
|
+
agent=agent,
|
235
|
+
tracer=tracer,
|
236
|
+
):
|
237
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
238
|
+
yield result[ChatEvent.STATUS]
|
239
|
+
else:
|
240
|
+
direct_web_pages: Dict[str, Dict] = result # type: ignore
|
241
|
+
|
242
|
+
webpages = []
|
243
|
+
for web_query in direct_web_pages:
|
244
|
+
if online_results.get(web_query):
|
245
|
+
online_results[web_query]["webpages"] = direct_web_pages[web_query]["webpages"]
|
246
|
+
else:
|
247
|
+
online_results[web_query] = {"webpages": direct_web_pages[web_query]["webpages"]}
|
248
|
+
|
249
|
+
for webpage in direct_web_pages[web_query]["webpages"]:
|
250
|
+
webpages.append(webpage["link"])
|
251
|
+
this_iteration.onlineContext = online_results
|
252
|
+
except Exception as e:
|
253
|
+
logger.error(f"Error reading webpages: {e}", exc_info=True)
|
254
|
+
|
255
|
+
elif this_iteration.tool == ConversationCommand.Code:
|
256
|
+
try:
|
257
|
+
async for result in run_code(
|
258
|
+
this_iteration.query,
|
259
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Webpage),
|
260
|
+
"",
|
261
|
+
location,
|
262
|
+
user,
|
263
|
+
send_status_func,
|
264
|
+
query_images=query_images,
|
265
|
+
agent=agent,
|
266
|
+
tracer=tracer,
|
267
|
+
):
|
268
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
269
|
+
yield result[ChatEvent.STATUS]
|
270
|
+
else:
|
271
|
+
code_results: Dict[str, Dict] = result # type: ignore
|
272
|
+
this_iteration.codeContext = code_results
|
273
|
+
async for result in send_status_func(f"**Ran code snippets**: {len(this_iteration.codeContext)}"):
|
274
|
+
yield result
|
275
|
+
except ValueError as e:
|
276
|
+
logger.warning(
|
277
|
+
f"Failed to use code tool: {e}. Attempting to respond without code results",
|
278
|
+
exc_info=True,
|
279
|
+
)
|
280
|
+
|
281
|
+
elif this_iteration.tool == ConversationCommand.Summarize:
|
282
|
+
try:
|
283
|
+
async for result in generate_summary_from_files(
|
284
|
+
this_iteration.query,
|
285
|
+
user,
|
286
|
+
file_filters,
|
287
|
+
construct_tool_chat_history(previous_iterations),
|
288
|
+
query_images=query_images,
|
289
|
+
agent=agent,
|
290
|
+
send_status_func=send_status_func,
|
291
|
+
):
|
292
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
293
|
+
yield result[ChatEvent.STATUS]
|
294
|
+
else:
|
295
|
+
summarize_files = result # type: ignore
|
296
|
+
except Exception as e:
|
297
|
+
logger.error(f"Error generating summary: {e}", exc_info=True)
|
298
|
+
|
299
|
+
else:
|
300
|
+
# No valid tools. This is our exit condition.
|
301
|
+
current_iteration = MAX_ITERATIONS
|
302
|
+
|
303
|
+
current_iteration += 1
|
304
|
+
|
305
|
+
if document_results or online_results or code_results or summarize_files:
|
306
|
+
results_data = f"**Results**:\n"
|
307
|
+
if document_results:
|
308
|
+
results_data += f"**Document References**:\n{yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
309
|
+
if online_results:
|
310
|
+
results_data += f"**Online Results**:\n{yaml.dump(online_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
311
|
+
if code_results:
|
312
|
+
results_data += f"**Code Results**:\n{yaml.dump(code_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
313
|
+
if summarize_files:
|
314
|
+
results_data += f"**Summarized Files**:\n{yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
315
|
+
|
316
|
+
# intermediate_result = await extract_relevant_info(this_iteration.query, results_data, agent)
|
317
|
+
this_iteration.summarizedResult = results_data
|
318
|
+
|
319
|
+
previous_iterations.append(this_iteration)
|
320
|
+
yield this_iteration
|
@@ -7,8 +7,6 @@ from math import inf
|
|
7
7
|
from typing import List, Tuple
|
8
8
|
|
9
9
|
import dateparser as dtparse
|
10
|
-
from dateparser.search import search_dates
|
11
|
-
from dateparser_data.settings import default_parsers
|
12
10
|
from dateutil.relativedelta import relativedelta
|
13
11
|
|
14
12
|
from khoj.search_filter.base_filter import BaseFilter
|
@@ -23,7 +21,7 @@ class DateFilter(BaseFilter):
|
|
23
21
|
# - dt>="yesterday" dt<"tomorrow"
|
24
22
|
# - dt>="last week"
|
25
23
|
# - dt:"2 years ago"
|
26
|
-
date_regex = r"dt([:><=]{1,2})[\"'](.*?)[\"']"
|
24
|
+
date_regex = r"dt([:><=]{1,2})[\"'‘’](.*?)[\"'‘’]"
|
27
25
|
|
28
26
|
def __init__(self, entry_key="compiled"):
|
29
27
|
self.entry_key = entry_key
|
@@ -1,11 +1,10 @@
|
|
1
|
-
import fnmatch
|
2
1
|
import logging
|
3
2
|
import re
|
4
3
|
from collections import defaultdict
|
5
4
|
from typing import List
|
6
5
|
|
7
6
|
from khoj.search_filter.base_filter import BaseFilter
|
8
|
-
from khoj.utils.helpers import LRU
|
7
|
+
from khoj.utils.helpers import LRU
|
9
8
|
|
10
9
|
logger = logging.getLogger(__name__)
|
11
10
|
|
khoj/search_type/text_search.py
CHANGED
@@ -102,8 +102,8 @@ def load_embeddings(
|
|
102
102
|
|
103
103
|
|
104
104
|
async def query(
|
105
|
-
user: KhojUser,
|
106
105
|
raw_query: str,
|
106
|
+
user: KhojUser,
|
107
107
|
type: SearchType = SearchType.All,
|
108
108
|
question_embedding: Union[torch.Tensor, None] = None,
|
109
109
|
max_distance: float = None,
|
@@ -130,12 +130,12 @@ async def query(
|
|
130
130
|
top_k = 10
|
131
131
|
with timer("Search Time", logger, state.device):
|
132
132
|
hits = EntryAdapters.search_with_embeddings(
|
133
|
-
|
133
|
+
raw_query=raw_query,
|
134
134
|
embeddings=question_embedding,
|
135
135
|
max_results=top_k,
|
136
136
|
file_type_filter=file_type,
|
137
|
-
raw_query=raw_query,
|
138
137
|
max_distance=max_distance,
|
138
|
+
user=user,
|
139
139
|
agent=agent,
|
140
140
|
).all()
|
141
141
|
hits = await sync_to_async(list)(hits) # type: ignore[call-arg]
|
khoj/utils/helpers.py
CHANGED
@@ -101,6 +101,15 @@ def merge_dicts(priority_dict: dict, default_dict: dict):
|
|
101
101
|
return merged_dict
|
102
102
|
|
103
103
|
|
104
|
+
def fix_json_dict(json_dict: dict) -> dict:
|
105
|
+
for k, v in json_dict.items():
|
106
|
+
if v == "True" or v == "False":
|
107
|
+
json_dict[k] = v == "True"
|
108
|
+
if isinstance(v, dict):
|
109
|
+
json_dict[k] = fix_json_dict(v)
|
110
|
+
return json_dict
|
111
|
+
|
112
|
+
|
104
113
|
def get_file_type(file_type: str, file_content: bytes) -> tuple[str, str]:
|
105
114
|
"Get file type from file mime type"
|
106
115
|
|
@@ -313,12 +322,14 @@ class ConversationCommand(str, Enum):
|
|
313
322
|
Help = "help"
|
314
323
|
Online = "online"
|
315
324
|
Webpage = "webpage"
|
325
|
+
Code = "code"
|
316
326
|
Image = "image"
|
317
327
|
Text = "text"
|
318
328
|
Automation = "automation"
|
319
329
|
AutomatedTask = "automated_task"
|
320
330
|
Summarize = "summarize"
|
321
331
|
Diagram = "diagram"
|
332
|
+
Research = "research"
|
322
333
|
|
323
334
|
|
324
335
|
command_descriptions = {
|
@@ -327,11 +338,13 @@ command_descriptions = {
|
|
327
338
|
ConversationCommand.Default: "The default command when no command specified. It intelligently auto-switches between general and notes mode.",
|
328
339
|
ConversationCommand.Online: "Search for information on the internet.",
|
329
340
|
ConversationCommand.Webpage: "Get information from webpage suggested by you.",
|
341
|
+
ConversationCommand.Code: "Run Python code to parse information, run complex calculations, create documents and charts.",
|
330
342
|
ConversationCommand.Image: "Generate illustrative, creative images by describing your imagination in words.",
|
331
343
|
ConversationCommand.Automation: "Automatically run your query at a specified time or interval.",
|
332
344
|
ConversationCommand.Help: "Get help with how to use or setup Khoj from the documentation",
|
333
345
|
ConversationCommand.Summarize: "Get help with a question pertaining to an entire document.",
|
334
346
|
ConversationCommand.Diagram: "Draw a flowchart, diagram, or any other visual representation best expressed with primitives like lines, rectangles, and text.",
|
347
|
+
ConversationCommand.Research: "Do deep research on a topic. This will take longer than usual, but give a more detailed, comprehensive answer.",
|
335
348
|
}
|
336
349
|
|
337
350
|
command_descriptions_for_agent = {
|
@@ -340,6 +353,7 @@ command_descriptions_for_agent = {
|
|
340
353
|
ConversationCommand.Online: "Agent can search the internet for information.",
|
341
354
|
ConversationCommand.Webpage: "Agent can read suggested web pages for information.",
|
342
355
|
ConversationCommand.Summarize: "Agent can read an entire document. Agents knowledge base must be a single document.",
|
356
|
+
ConversationCommand.Research: "Agent can do deep research on a topic.",
|
343
357
|
}
|
344
358
|
|
345
359
|
tool_descriptions_for_llm = {
|
@@ -348,18 +362,26 @@ tool_descriptions_for_llm = {
|
|
348
362
|
ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.",
|
349
363
|
ConversationCommand.Online: "To search for the latest, up-to-date information from the internet. Note: **Questions about Khoj should always use this data source**",
|
350
364
|
ConversationCommand.Webpage: "To use if the user has directly provided the webpage urls or you are certain of the webpage urls to read.",
|
365
|
+
ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse information, run complex calculations, create documents and charts for user. Matplotlib, bs4, pandas, numpy, etc. are available.",
|
351
366
|
ConversationCommand.Summarize: "To retrieve an answer that depends on the entire document or a large text.",
|
352
367
|
}
|
353
368
|
|
369
|
+
function_calling_description_for_llm = {
|
370
|
+
ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.",
|
371
|
+
ConversationCommand.Online: "To search the internet for information. Useful to get a quick, broad overview from the internet. Provide all relevant context to ensure new searches, not in previous iterations, are performed.",
|
372
|
+
ConversationCommand.Webpage: "To extract information from webpages. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share the webpage links and information to extract in your query.",
|
373
|
+
ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse information, run complex calculations, create charts for user. Matplotlib, bs4, pandas, numpy, etc. are available.",
|
374
|
+
}
|
375
|
+
|
354
376
|
mode_descriptions_for_llm = {
|
355
|
-
ConversationCommand.Image: "Use this if you are confident the user is requesting you to create a new picture based on their description.",
|
377
|
+
ConversationCommand.Image: "Use this if you are confident the user is requesting you to create a new picture based on their description. This does not support generating charts or graphs.",
|
356
378
|
ConversationCommand.Automation: "Use this if you are confident the user is requesting a response at a scheduled date, time and frequency",
|
357
379
|
ConversationCommand.Text: "Use this if a normal text response would be sufficient for accurately responding to the query.",
|
358
380
|
ConversationCommand.Diagram: "Use this if the user is requesting a diagram or visual representation that requires primitives like lines, rectangles, and text.",
|
359
381
|
}
|
360
382
|
|
361
383
|
mode_descriptions_for_agent = {
|
362
|
-
ConversationCommand.Image: "Agent can generate
|
384
|
+
ConversationCommand.Image: "Agent can generate images in response. It cannot not use this to generate charts and graphs.",
|
363
385
|
ConversationCommand.Automation: "Agent can schedule a task to run at a scheduled date, time and frequency in response.",
|
364
386
|
ConversationCommand.Text: "Agent can generate text in response.",
|
365
387
|
ConversationCommand.Diagram: "Agent can generate a visual representation that requires primitives like lines, rectangles, and text.",
|
khoj/utils/yaml.py
CHANGED
@@ -41,3 +41,7 @@ def parse_config_from_string(yaml_config: dict) -> FullConfig:
|
|
41
41
|
def parse_config_from_file(yaml_config_file):
|
42
42
|
"Parse and validate config in YML file"
|
43
43
|
return parse_config_from_string(load_config_from_file(yaml_config_file))
|
44
|
+
|
45
|
+
|
46
|
+
def yaml_dump(data):
|
47
|
+
return yaml.dump(data, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: khoj
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.28.0
|
4
4
|
Summary: Your Second Brain
|
5
5
|
Project-URL: Homepage, https://khoj.dev
|
6
6
|
Project-URL: Documentation, https://docs.khoj.dev
|
@@ -36,7 +36,7 @@ Requires-Dist: django==5.0.9
|
|
36
36
|
Requires-Dist: docx2txt==0.8
|
37
37
|
Requires-Dist: einops==0.8.0
|
38
38
|
Requires-Dist: fastapi>=0.110.0
|
39
|
-
Requires-Dist: google-generativeai==0.
|
39
|
+
Requires-Dist: google-generativeai==0.8.3
|
40
40
|
Requires-Dist: httpx==0.25.0
|
41
41
|
Requires-Dist: huggingface-hub>=0.22.2
|
42
42
|
Requires-Dist: itsdangerous==2.1.2
|
@@ -78,6 +78,7 @@ Requires-Dist: black>=23.1.0; extra == 'dev'
|
|
78
78
|
Requires-Dist: boto3>=1.34.57; extra == 'dev'
|
79
79
|
Requires-Dist: factory-boy>=3.2.1; extra == 'dev'
|
80
80
|
Requires-Dist: freezegun>=1.2.0; extra == 'dev'
|
81
|
+
Requires-Dist: gitpython~=3.1.43; extra == 'dev'
|
81
82
|
Requires-Dist: google-auth==2.23.3; extra == 'dev'
|
82
83
|
Requires-Dist: gunicorn==22.0.0; extra == 'dev'
|
83
84
|
Requires-Dist: mypy>=1.0.1; extra == 'dev'
|