khoj 1.27.2.dev18__py3-none-any.whl → 1.27.2.dev130__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +34 -10
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1467-5a191c1cd5bf0b83.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1603-5d70d9dfcdcb1f10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3423-fa918f4e5365a35e.js +1 -0
- khoj/interface/compiled/_next/static/chunks/8423-3ad0bfb299801220.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-7dc98df9c88828f0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-d887f55fe6d4f35d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/{page-8f22b790e50dd722.js → page-d46244282af16509.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-6a01e07fb244c10c.js → page-505b07bce608b34e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-31239d193815e49e.js → webpack-8ae5ce45161bd98e.js} +1 -1
- khoj/interface/compiled/_next/static/css/{2272c73fc7a3b571.css → 26c1c33d0423a7d8.css} +1 -1
- khoj/interface/compiled/_next/static/css/e9c5fe555dd3050b.css +25 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/factchecker/index.html +1 -1
- khoj/interface/compiled/factchecker/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +19 -10
- khoj/processor/conversation/anthropic/utils.py +37 -6
- khoj/processor/conversation/google/gemini_chat.py +23 -13
- khoj/processor/conversation/google/utils.py +34 -10
- khoj/processor/conversation/offline/chat_model.py +40 -15
- khoj/processor/conversation/openai/gpt.py +25 -10
- khoj/processor/conversation/openai/utils.py +43 -9
- khoj/processor/conversation/prompts.py +131 -22
- khoj/processor/conversation/utils.py +299 -6
- khoj/processor/image/generate.py +2 -0
- khoj/processor/tools/online_search.py +19 -8
- khoj/processor/tools/run_code.py +144 -0
- khoj/routers/api.py +11 -6
- khoj/routers/api_chat.py +177 -88
- khoj/routers/helpers.py +155 -59
- khoj/routers/research.py +321 -0
- khoj/search_filter/date_filter.py +1 -3
- khoj/search_filter/file_filter.py +1 -2
- khoj/search_type/text_search.py +3 -3
- khoj/utils/helpers.py +15 -2
- khoj/utils/yaml.py +4 -0
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/METADATA +2 -1
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/RECORD +61 -58
- khoj/interface/compiled/_next/static/chunks/1603-5138bb7c8035d9a6.js +0 -1
- khoj/interface/compiled/_next/static/chunks/2697-61fcba89fd87eab4.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3423-8e9c420574a9fbe3.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9479-a5e7ff4c7d1d7ee7.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-151232d8417a1ea1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-798904432c2417c4.js +0 -1
- khoj/interface/compiled/_next/static/css/76d55eb435962b19.css +0 -25
- /khoj/interface/compiled/_next/static/{_gBBcNbs4wMKxKXhQs5E4 → N19uqHAJYqRAVxvuVwHfE}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{_gBBcNbs4wMKxKXhQs5E4 → N19uqHAJYqRAVxvuVwHfE}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1970-1d6d0c1b00b4f343.js → 1970-444843bea1d17d61.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9417-759984ad62caa3dc.js → 9417-19cfd1a9cb758e71.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/app/settings/{page-7946cabb9c54e22d.js → page-89e6737b2cc9fb3a.js} +0 -0
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/WHEEL +0 -0
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/entry_points.txt +0 -0
- {khoj-1.27.2.dev18.dist-info → khoj-1.27.2.dev130.dist-info}/licenses/LICENSE +0 -0
khoj/routers/research.py
ADDED
@@ -0,0 +1,321 @@
|
|
1
|
+
import json
|
2
|
+
import logging
|
3
|
+
from datetime import datetime
|
4
|
+
from typing import Any, Callable, Dict, List, Optional
|
5
|
+
|
6
|
+
import yaml
|
7
|
+
from fastapi import Request
|
8
|
+
|
9
|
+
from khoj.database.adapters import ConversationAdapters, EntryAdapters
|
10
|
+
from khoj.database.models import Agent, KhojUser
|
11
|
+
from khoj.processor.conversation import prompts
|
12
|
+
from khoj.processor.conversation.utils import (
|
13
|
+
InformationCollectionIteration,
|
14
|
+
clean_json,
|
15
|
+
construct_iteration_history,
|
16
|
+
construct_tool_chat_history,
|
17
|
+
)
|
18
|
+
from khoj.processor.tools.online_search import read_webpages, search_online
|
19
|
+
from khoj.processor.tools.run_code import run_code
|
20
|
+
from khoj.routers.api import extract_references_and_questions
|
21
|
+
from khoj.routers.helpers import (
|
22
|
+
ChatEvent,
|
23
|
+
construct_chat_history,
|
24
|
+
extract_relevant_info,
|
25
|
+
generate_summary_from_files,
|
26
|
+
send_message_to_model_wrapper,
|
27
|
+
)
|
28
|
+
from khoj.utils.helpers import (
|
29
|
+
ConversationCommand,
|
30
|
+
function_calling_description_for_llm,
|
31
|
+
is_none_or_empty,
|
32
|
+
timer,
|
33
|
+
)
|
34
|
+
from khoj.utils.rawconfig import LocationData
|
35
|
+
|
36
|
+
logger = logging.getLogger(__name__)
|
37
|
+
|
38
|
+
|
39
|
+
async def apick_next_tool(
|
40
|
+
query: str,
|
41
|
+
conversation_history: dict,
|
42
|
+
user: KhojUser = None,
|
43
|
+
query_images: List[str] = [],
|
44
|
+
location: LocationData = None,
|
45
|
+
user_name: str = None,
|
46
|
+
agent: Agent = None,
|
47
|
+
previous_iterations_history: str = None,
|
48
|
+
max_iterations: int = 5,
|
49
|
+
send_status_func: Optional[Callable] = None,
|
50
|
+
tracer: dict = {},
|
51
|
+
):
|
52
|
+
"""
|
53
|
+
Given a query, determine which of the available tools the agent should use in order to answer appropriately. One at a time, and it's able to use subsequent iterations to refine the answer.
|
54
|
+
"""
|
55
|
+
|
56
|
+
tool_options = dict()
|
57
|
+
tool_options_str = ""
|
58
|
+
|
59
|
+
agent_tools = agent.input_tools if agent else []
|
60
|
+
|
61
|
+
for tool, description in function_calling_description_for_llm.items():
|
62
|
+
tool_options[tool.value] = description
|
63
|
+
if len(agent_tools) == 0 or tool.value in agent_tools:
|
64
|
+
tool_options_str += f'- "{tool.value}": "{description}"\n'
|
65
|
+
|
66
|
+
chat_history = construct_chat_history(conversation_history, agent_name=agent.name if agent else "Khoj")
|
67
|
+
|
68
|
+
if query_images:
|
69
|
+
query = f"[placeholder for user attached images]\n{query}"
|
70
|
+
|
71
|
+
personality_context = (
|
72
|
+
prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
|
73
|
+
)
|
74
|
+
|
75
|
+
# Extract Past User Message and Inferred Questions from Conversation Log
|
76
|
+
today = datetime.today()
|
77
|
+
location_data = f"{location}" if location else "Unknown"
|
78
|
+
|
79
|
+
function_planning_prompt = prompts.plan_function_execution.format(
|
80
|
+
tools=tool_options_str,
|
81
|
+
chat_history=chat_history,
|
82
|
+
personality_context=personality_context,
|
83
|
+
current_date=today.strftime("%Y-%m-%d"),
|
84
|
+
day_of_week=today.strftime("%A"),
|
85
|
+
username=user_name or "Unknown",
|
86
|
+
location=location_data,
|
87
|
+
previous_iterations=previous_iterations_history,
|
88
|
+
max_iterations=max_iterations,
|
89
|
+
)
|
90
|
+
|
91
|
+
with timer("Chat actor: Infer information sources to refer", logger):
|
92
|
+
response = await send_message_to_model_wrapper(
|
93
|
+
query=query,
|
94
|
+
context=function_planning_prompt,
|
95
|
+
response_type="json_object",
|
96
|
+
user=user,
|
97
|
+
query_images=query_images,
|
98
|
+
tracer=tracer,
|
99
|
+
)
|
100
|
+
|
101
|
+
try:
|
102
|
+
response = clean_json(response)
|
103
|
+
response = json.loads(response)
|
104
|
+
selected_tool = response.get("tool", None)
|
105
|
+
generated_query = response.get("query", None)
|
106
|
+
scratchpad = response.get("scratchpad", None)
|
107
|
+
logger.info(f"Response for determining relevant tools: {response}")
|
108
|
+
if send_status_func:
|
109
|
+
determined_tool_message = "**Determined Tool**: "
|
110
|
+
determined_tool_message += f"{selected_tool}({generated_query})." if selected_tool else "respond."
|
111
|
+
determined_tool_message += f"\nReason: {scratchpad}" if scratchpad else ""
|
112
|
+
async for event in send_status_func(f"{scratchpad}"):
|
113
|
+
yield {ChatEvent.STATUS: event}
|
114
|
+
|
115
|
+
yield InformationCollectionIteration(
|
116
|
+
tool=selected_tool,
|
117
|
+
query=generated_query,
|
118
|
+
)
|
119
|
+
|
120
|
+
except Exception as e:
|
121
|
+
logger.error(f"Invalid response for determining relevant tools: {response}. {e}", exc_info=True)
|
122
|
+
yield InformationCollectionIteration(
|
123
|
+
tool=None,
|
124
|
+
query=None,
|
125
|
+
)
|
126
|
+
|
127
|
+
|
128
|
+
async def execute_information_collection(
|
129
|
+
request: Request,
|
130
|
+
user: KhojUser,
|
131
|
+
query: str,
|
132
|
+
conversation_id: str,
|
133
|
+
conversation_history: dict,
|
134
|
+
query_images: List[str],
|
135
|
+
agent: Agent = None,
|
136
|
+
send_status_func: Optional[Callable] = None,
|
137
|
+
user_name: str = None,
|
138
|
+
location: LocationData = None,
|
139
|
+
file_filters: List[str] = [],
|
140
|
+
tracer: dict = {},
|
141
|
+
):
|
142
|
+
current_iteration = 0
|
143
|
+
MAX_ITERATIONS = 5
|
144
|
+
previous_iterations: List[InformationCollectionIteration] = []
|
145
|
+
while current_iteration < MAX_ITERATIONS:
|
146
|
+
online_results: Dict = dict()
|
147
|
+
code_results: Dict = dict()
|
148
|
+
document_results: List[Dict[str, str]] = []
|
149
|
+
summarize_files: str = ""
|
150
|
+
this_iteration = InformationCollectionIteration(tool=None, query=query)
|
151
|
+
previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
|
152
|
+
|
153
|
+
async for result in apick_next_tool(
|
154
|
+
query,
|
155
|
+
conversation_history,
|
156
|
+
user,
|
157
|
+
query_images,
|
158
|
+
location,
|
159
|
+
user_name,
|
160
|
+
agent,
|
161
|
+
previous_iterations_history,
|
162
|
+
MAX_ITERATIONS,
|
163
|
+
send_status_func,
|
164
|
+
tracer=tracer,
|
165
|
+
):
|
166
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
167
|
+
yield result[ChatEvent.STATUS]
|
168
|
+
elif isinstance(result, InformationCollectionIteration):
|
169
|
+
this_iteration = result
|
170
|
+
|
171
|
+
if this_iteration.tool == ConversationCommand.Notes:
|
172
|
+
this_iteration.context = []
|
173
|
+
document_results = []
|
174
|
+
async for result in extract_references_and_questions(
|
175
|
+
request,
|
176
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Notes),
|
177
|
+
this_iteration.query,
|
178
|
+
7,
|
179
|
+
None,
|
180
|
+
conversation_id,
|
181
|
+
[ConversationCommand.Default],
|
182
|
+
location,
|
183
|
+
send_status_func,
|
184
|
+
query_images,
|
185
|
+
agent=agent,
|
186
|
+
tracer=tracer,
|
187
|
+
):
|
188
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
189
|
+
yield result[ChatEvent.STATUS]
|
190
|
+
elif isinstance(result, tuple):
|
191
|
+
document_results = result[0]
|
192
|
+
this_iteration.context += document_results
|
193
|
+
|
194
|
+
if not is_none_or_empty(document_results):
|
195
|
+
try:
|
196
|
+
distinct_files = {d["file"] for d in document_results}
|
197
|
+
distinct_headings = set([d["compiled"].split("\n")[0] for d in document_results if "compiled" in d])
|
198
|
+
# Strip only leading # from headings
|
199
|
+
headings_str = "\n- " + "\n- ".join(distinct_headings).replace("#", "")
|
200
|
+
async for result in send_status_func(
|
201
|
+
f"**Found {len(distinct_headings)} Notes Across {len(distinct_files)} Files**: {headings_str}"
|
202
|
+
):
|
203
|
+
yield result
|
204
|
+
except Exception as e:
|
205
|
+
logger.error(f"Error extracting document references: {e}", exc_info=True)
|
206
|
+
|
207
|
+
elif this_iteration.tool == ConversationCommand.Online:
|
208
|
+
async for result in search_online(
|
209
|
+
this_iteration.query,
|
210
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Online),
|
211
|
+
location,
|
212
|
+
user,
|
213
|
+
send_status_func,
|
214
|
+
[],
|
215
|
+
max_webpages_to_read=0,
|
216
|
+
query_images=query_images,
|
217
|
+
agent=agent,
|
218
|
+
tracer=tracer,
|
219
|
+
):
|
220
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
221
|
+
yield result[ChatEvent.STATUS]
|
222
|
+
else:
|
223
|
+
online_results: Dict[str, Dict] = result # type: ignore
|
224
|
+
this_iteration.onlineContext = online_results
|
225
|
+
|
226
|
+
elif this_iteration.tool == ConversationCommand.Webpage:
|
227
|
+
try:
|
228
|
+
async for result in read_webpages(
|
229
|
+
this_iteration.query,
|
230
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Webpage),
|
231
|
+
location,
|
232
|
+
user,
|
233
|
+
send_status_func,
|
234
|
+
query_images=query_images,
|
235
|
+
agent=agent,
|
236
|
+
tracer=tracer,
|
237
|
+
):
|
238
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
239
|
+
yield result[ChatEvent.STATUS]
|
240
|
+
else:
|
241
|
+
direct_web_pages: Dict[str, Dict] = result # type: ignore
|
242
|
+
|
243
|
+
webpages = []
|
244
|
+
for web_query in direct_web_pages:
|
245
|
+
if online_results.get(web_query):
|
246
|
+
online_results[web_query]["webpages"] = direct_web_pages[web_query]["webpages"]
|
247
|
+
else:
|
248
|
+
online_results[web_query] = {"webpages": direct_web_pages[web_query]["webpages"]}
|
249
|
+
|
250
|
+
for webpage in direct_web_pages[web_query]["webpages"]:
|
251
|
+
webpages.append(webpage["link"])
|
252
|
+
this_iteration.onlineContext = online_results
|
253
|
+
except Exception as e:
|
254
|
+
logger.error(f"Error reading webpages: {e}", exc_info=True)
|
255
|
+
|
256
|
+
elif this_iteration.tool == ConversationCommand.Code:
|
257
|
+
try:
|
258
|
+
async for result in run_code(
|
259
|
+
this_iteration.query,
|
260
|
+
construct_tool_chat_history(previous_iterations, ConversationCommand.Webpage),
|
261
|
+
"",
|
262
|
+
location,
|
263
|
+
user,
|
264
|
+
send_status_func,
|
265
|
+
query_images=query_images,
|
266
|
+
agent=agent,
|
267
|
+
tracer=tracer,
|
268
|
+
):
|
269
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
270
|
+
yield result[ChatEvent.STATUS]
|
271
|
+
else:
|
272
|
+
code_results: Dict[str, Dict] = result # type: ignore
|
273
|
+
this_iteration.codeContext = code_results
|
274
|
+
async for result in send_status_func(f"**Ran code snippets**: {len(this_iteration.codeContext)}"):
|
275
|
+
yield result
|
276
|
+
except ValueError as e:
|
277
|
+
logger.warning(
|
278
|
+
f"Failed to use code tool: {e}. Attempting to respond without code results",
|
279
|
+
exc_info=True,
|
280
|
+
)
|
281
|
+
|
282
|
+
elif this_iteration.tool == ConversationCommand.Summarize:
|
283
|
+
try:
|
284
|
+
async for result in generate_summary_from_files(
|
285
|
+
this_iteration.query,
|
286
|
+
user,
|
287
|
+
file_filters,
|
288
|
+
construct_tool_chat_history(previous_iterations),
|
289
|
+
query_images=query_images,
|
290
|
+
agent=agent,
|
291
|
+
send_status_func=send_status_func,
|
292
|
+
):
|
293
|
+
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
294
|
+
yield result[ChatEvent.STATUS]
|
295
|
+
else:
|
296
|
+
summarize_files = result # type: ignore
|
297
|
+
except Exception as e:
|
298
|
+
logger.error(f"Error generating summary: {e}", exc_info=True)
|
299
|
+
|
300
|
+
else:
|
301
|
+
# No valid tools. This is our exit condition.
|
302
|
+
current_iteration = MAX_ITERATIONS
|
303
|
+
|
304
|
+
current_iteration += 1
|
305
|
+
|
306
|
+
if document_results or online_results or code_results or summarize_files:
|
307
|
+
results_data = f"**Results**:\n"
|
308
|
+
if document_results:
|
309
|
+
results_data += f"**Document References**: {yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
310
|
+
if online_results:
|
311
|
+
results_data += f"**Online Results**: {yaml.dump(online_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
312
|
+
if code_results:
|
313
|
+
results_data += f"**Code Results**: {yaml.dump(code_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
314
|
+
if summarize_files:
|
315
|
+
results_data += f"**Summarized Files**: {yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
|
316
|
+
|
317
|
+
# intermediate_result = await extract_relevant_info(this_iteration.query, results_data, agent)
|
318
|
+
this_iteration.summarizedResult = results_data
|
319
|
+
|
320
|
+
previous_iterations.append(this_iteration)
|
321
|
+
yield this_iteration
|
@@ -7,8 +7,6 @@ from math import inf
|
|
7
7
|
from typing import List, Tuple
|
8
8
|
|
9
9
|
import dateparser as dtparse
|
10
|
-
from dateparser.search import search_dates
|
11
|
-
from dateparser_data.settings import default_parsers
|
12
10
|
from dateutil.relativedelta import relativedelta
|
13
11
|
|
14
12
|
from khoj.search_filter.base_filter import BaseFilter
|
@@ -23,7 +21,7 @@ class DateFilter(BaseFilter):
|
|
23
21
|
# - dt>="yesterday" dt<"tomorrow"
|
24
22
|
# - dt>="last week"
|
25
23
|
# - dt:"2 years ago"
|
26
|
-
date_regex = r"dt([:><=]{1,2})[\"'](.*?)[\"']"
|
24
|
+
date_regex = r"dt([:><=]{1,2})[\"'‘’](.*?)[\"'‘’]"
|
27
25
|
|
28
26
|
def __init__(self, entry_key="compiled"):
|
29
27
|
self.entry_key = entry_key
|
@@ -1,11 +1,10 @@
|
|
1
|
-
import fnmatch
|
2
1
|
import logging
|
3
2
|
import re
|
4
3
|
from collections import defaultdict
|
5
4
|
from typing import List
|
6
5
|
|
7
6
|
from khoj.search_filter.base_filter import BaseFilter
|
8
|
-
from khoj.utils.helpers import LRU
|
7
|
+
from khoj.utils.helpers import LRU
|
9
8
|
|
10
9
|
logger = logging.getLogger(__name__)
|
11
10
|
|
khoj/search_type/text_search.py
CHANGED
@@ -102,8 +102,8 @@ def load_embeddings(
|
|
102
102
|
|
103
103
|
|
104
104
|
async def query(
|
105
|
-
user: KhojUser,
|
106
105
|
raw_query: str,
|
106
|
+
user: KhojUser,
|
107
107
|
type: SearchType = SearchType.All,
|
108
108
|
question_embedding: Union[torch.Tensor, None] = None,
|
109
109
|
max_distance: float = None,
|
@@ -130,12 +130,12 @@ async def query(
|
|
130
130
|
top_k = 10
|
131
131
|
with timer("Search Time", logger, state.device):
|
132
132
|
hits = EntryAdapters.search_with_embeddings(
|
133
|
-
|
133
|
+
raw_query=raw_query,
|
134
134
|
embeddings=question_embedding,
|
135
135
|
max_results=top_k,
|
136
136
|
file_type_filter=file_type,
|
137
|
-
raw_query=raw_query,
|
138
137
|
max_distance=max_distance,
|
138
|
+
user=user,
|
139
139
|
agent=agent,
|
140
140
|
).all()
|
141
141
|
hits = await sync_to_async(list)(hits) # type: ignore[call-arg]
|
khoj/utils/helpers.py
CHANGED
@@ -313,12 +313,14 @@ class ConversationCommand(str, Enum):
|
|
313
313
|
Help = "help"
|
314
314
|
Online = "online"
|
315
315
|
Webpage = "webpage"
|
316
|
+
Code = "code"
|
316
317
|
Image = "image"
|
317
318
|
Text = "text"
|
318
319
|
Automation = "automation"
|
319
320
|
AutomatedTask = "automated_task"
|
320
321
|
Summarize = "summarize"
|
321
322
|
Diagram = "diagram"
|
323
|
+
Research = "research"
|
322
324
|
|
323
325
|
|
324
326
|
command_descriptions = {
|
@@ -327,11 +329,13 @@ command_descriptions = {
|
|
327
329
|
ConversationCommand.Default: "The default command when no command specified. It intelligently auto-switches between general and notes mode.",
|
328
330
|
ConversationCommand.Online: "Search for information on the internet.",
|
329
331
|
ConversationCommand.Webpage: "Get information from webpage suggested by you.",
|
332
|
+
ConversationCommand.Code: "Run Python code to parse information, run complex calculations, create documents and charts.",
|
330
333
|
ConversationCommand.Image: "Generate illustrative, creative images by describing your imagination in words.",
|
331
334
|
ConversationCommand.Automation: "Automatically run your query at a specified time or interval.",
|
332
335
|
ConversationCommand.Help: "Get help with how to use or setup Khoj from the documentation",
|
333
336
|
ConversationCommand.Summarize: "Get help with a question pertaining to an entire document.",
|
334
337
|
ConversationCommand.Diagram: "Draw a flowchart, diagram, or any other visual representation best expressed with primitives like lines, rectangles, and text.",
|
338
|
+
ConversationCommand.Research: "Do deep research on a topic. This will take longer than usual, but give a more detailed, comprehensive answer.",
|
335
339
|
}
|
336
340
|
|
337
341
|
command_descriptions_for_agent = {
|
@@ -340,6 +344,7 @@ command_descriptions_for_agent = {
|
|
340
344
|
ConversationCommand.Online: "Agent can search the internet for information.",
|
341
345
|
ConversationCommand.Webpage: "Agent can read suggested web pages for information.",
|
342
346
|
ConversationCommand.Summarize: "Agent can read an entire document. Agents knowledge base must be a single document.",
|
347
|
+
ConversationCommand.Research: "Agent can do deep research on a topic.",
|
343
348
|
}
|
344
349
|
|
345
350
|
tool_descriptions_for_llm = {
|
@@ -348,18 +353,26 @@ tool_descriptions_for_llm = {
|
|
348
353
|
ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.",
|
349
354
|
ConversationCommand.Online: "To search for the latest, up-to-date information from the internet. Note: **Questions about Khoj should always use this data source**",
|
350
355
|
ConversationCommand.Webpage: "To use if the user has directly provided the webpage urls or you are certain of the webpage urls to read.",
|
356
|
+
ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse information, run complex calculations, create documents and charts for user. Matplotlib, bs4, pandas, numpy, etc. are available.",
|
351
357
|
ConversationCommand.Summarize: "To retrieve an answer that depends on the entire document or a large text.",
|
352
358
|
}
|
353
359
|
|
360
|
+
function_calling_description_for_llm = {
|
361
|
+
ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.",
|
362
|
+
ConversationCommand.Online: "To search the internet for information. Provide all relevant context to ensure new searches, not previously run, are performed.",
|
363
|
+
ConversationCommand.Webpage: "To extract information from a webpage. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share the webpage link and information to extract in your query.",
|
364
|
+
ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse information, run complex calculations, create documents and charts for user. Matplotlib, bs4, pandas, numpy, etc. are available.",
|
365
|
+
}
|
366
|
+
|
354
367
|
mode_descriptions_for_llm = {
|
355
|
-
ConversationCommand.Image: "Use this if you are confident the user is requesting you to create a new picture based on their description.",
|
368
|
+
ConversationCommand.Image: "Use this if you are confident the user is requesting you to create a new picture based on their description. This does not support generating charts or graphs.",
|
356
369
|
ConversationCommand.Automation: "Use this if you are confident the user is requesting a response at a scheduled date, time and frequency",
|
357
370
|
ConversationCommand.Text: "Use this if a normal text response would be sufficient for accurately responding to the query.",
|
358
371
|
ConversationCommand.Diagram: "Use this if the user is requesting a diagram or visual representation that requires primitives like lines, rectangles, and text.",
|
359
372
|
}
|
360
373
|
|
361
374
|
mode_descriptions_for_agent = {
|
362
|
-
ConversationCommand.Image: "Agent can generate
|
375
|
+
ConversationCommand.Image: "Agent can generate images in response. It cannot not use this to generate charts and graphs.",
|
363
376
|
ConversationCommand.Automation: "Agent can schedule a task to run at a scheduled date, time and frequency in response.",
|
364
377
|
ConversationCommand.Text: "Agent can generate text in response.",
|
365
378
|
ConversationCommand.Diagram: "Agent can generate a visual representation that requires primitives like lines, rectangles, and text.",
|
khoj/utils/yaml.py
CHANGED
@@ -41,3 +41,7 @@ def parse_config_from_string(yaml_config: dict) -> FullConfig:
|
|
41
41
|
def parse_config_from_file(yaml_config_file):
|
42
42
|
"Parse and validate config in YML file"
|
43
43
|
return parse_config_from_string(load_config_from_file(yaml_config_file))
|
44
|
+
|
45
|
+
|
46
|
+
def yaml_dump(data):
|
47
|
+
return yaml.dump(data, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: khoj
|
3
|
-
Version: 1.27.2.
|
3
|
+
Version: 1.27.2.dev130
|
4
4
|
Summary: Your Second Brain
|
5
5
|
Project-URL: Homepage, https://khoj.dev
|
6
6
|
Project-URL: Documentation, https://docs.khoj.dev
|
@@ -78,6 +78,7 @@ Requires-Dist: black>=23.1.0; extra == 'dev'
|
|
78
78
|
Requires-Dist: boto3>=1.34.57; extra == 'dev'
|
79
79
|
Requires-Dist: factory-boy>=3.2.1; extra == 'dev'
|
80
80
|
Requires-Dist: freezegun>=1.2.0; extra == 'dev'
|
81
|
+
Requires-Dist: gitpython~=3.1.43; extra == 'dev'
|
81
82
|
Requires-Dist: google-auth==2.23.3; extra == 'dev'
|
82
83
|
Requires-Dist: gunicorn==22.0.0; extra == 'dev'
|
83
84
|
Requires-Dist: mypy>=1.0.1; extra == 'dev'
|