khoj 1.28.3__py3-none-any.whl → 1.28.4.dev92__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +10 -14
- khoj/database/adapters/__init__.py +128 -44
- khoj/database/admin.py +6 -3
- khoj/database/management/commands/change_default_model.py +7 -72
- khoj/database/migrations/0073_delete_usersearchmodelconfig.py +15 -0
- khoj/database/models/__init__.py +4 -6
- khoj/interface/compiled/404/index.html +1 -1
- khoj/interface/compiled/_next/static/chunks/1603-dc5fd983dbcd070d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/1970-c78f6acc8e16e30b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/2261-748f7c327df3c8c1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3124-a4cea2eda163128d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3803-d74118a2d0182c52.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5538-36aa824a75519c5b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5961-3c104d9736b7902b.js +3 -0
- khoj/interface/compiled/_next/static/chunks/8423-ebfa9bb9e2424ca3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9417-32c4db52ca42e681.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e9838b642913a071.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/page-4353b1a532795ad1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-d3edae545a1b5393.js → page-c9f13c865e739607.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-b0e7ff4baa3b5265.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-45720e1ed71e3ef5.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/{layout-d0f0a9067427fb20.js → layout-86561d2fac35a91a.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-ea462e20376b6dce.js → page-ecb8e1c192aa8834.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-ea6b73fdaf9b24ca.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/{page-a5c277eff207959e.js → page-8e28deacb61f75aa.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{layout-a8f33dfe92f997fb.js → layout-254eaaf916449a60.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/page-2fab613a557d3cc5.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-cf7445cf0326bda3.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-30376aa7e9cfa342.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{main-f84cd3c1873cd842.js → main-1ea5c2e0fdef4626.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-8beec5b51cabb39a.js → webpack-27cf153c35b1338d.js} +1 -1
- khoj/interface/compiled/_next/static/css/{467a524c75e7d7c0.css → 0e9d53dcd7f11342.css} +1 -1
- khoj/interface/compiled/_next/static/css/{26c1c33d0423a7d8.css → 1f293605f2871853.css} +1 -1
- khoj/interface/compiled/_next/static/css/2d097a35da6bfe8d.css +1 -0
- khoj/interface/compiled/_next/static/css/80bd6301fc657983.css +1 -0
- khoj/interface/compiled/_next/static/css/ed437164d77aa600.css +25 -0
- khoj/interface/compiled/_next/static/media/5455839c73f146e7-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/5984b96ba4822821-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/684adc3dde1b03f1-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/82e3b9a1bdaf0c26-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/8d1ea331386a0db8-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/91475f6526542a4f-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/b98b13dbc1c3b59c-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/c824d7a20139e39d-s.woff2 +0 -0
- khoj/interface/compiled/agents/index.html +1 -1
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +1 -1
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +1 -1
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +1 -1
- khoj/interface/compiled/index.txt +3 -3
- khoj/interface/compiled/search/index.html +1 -1
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +1 -1
- khoj/interface/compiled/settings/index.txt +3 -3
- khoj/interface/compiled/share/chat/index.html +1 -1
- khoj/interface/compiled/share/chat/index.txt +3 -3
- khoj/processor/content/docx/docx_to_entries.py +27 -21
- khoj/processor/content/github/github_to_entries.py +2 -2
- khoj/processor/content/images/image_to_entries.py +2 -2
- khoj/processor/content/markdown/markdown_to_entries.py +2 -2
- khoj/processor/content/notion/notion_to_entries.py +2 -2
- khoj/processor/content/org_mode/org_to_entries.py +2 -2
- khoj/processor/content/org_mode/orgnode.py +1 -1
- khoj/processor/content/pdf/pdf_to_entries.py +37 -29
- khoj/processor/content/plaintext/plaintext_to_entries.py +2 -2
- khoj/processor/content/text_to_entries.py +3 -4
- khoj/processor/conversation/anthropic/anthropic_chat.py +9 -1
- khoj/processor/conversation/google/gemini_chat.py +15 -2
- khoj/processor/conversation/google/utils.py +3 -1
- khoj/processor/conversation/offline/chat_model.py +4 -0
- khoj/processor/conversation/openai/gpt.py +6 -1
- khoj/processor/conversation/prompts.py +72 -13
- khoj/processor/conversation/utils.py +80 -13
- khoj/processor/image/generate.py +2 -0
- khoj/processor/tools/online_search.py +68 -18
- khoj/processor/tools/run_code.py +54 -20
- khoj/routers/api.py +10 -4
- khoj/routers/api_agents.py +8 -10
- khoj/routers/api_chat.py +89 -24
- khoj/routers/api_content.py +80 -8
- khoj/routers/helpers.py +176 -60
- khoj/routers/notion.py +1 -1
- khoj/routers/research.py +73 -31
- khoj/routers/web_client.py +0 -10
- khoj/search_type/text_search.py +3 -7
- khoj/utils/cli.py +2 -2
- khoj/utils/fs_syncer.py +2 -1
- khoj/utils/helpers.py +6 -3
- khoj/utils/rawconfig.py +32 -0
- khoj/utils/state.py +2 -1
- {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/METADATA +3 -3
- {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/RECORD +99 -105
- {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/WHEEL +1 -1
- khoj/interface/compiled/_next/static/chunks/1034-da58b679fcbb79c1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1467-b331e469fe411347.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1603-c1568f45947e9f2c.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1970-d44050bf658ae5cc.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3110-ef2cacd1b8d79ad8.js +0 -1
- khoj/interface/compiled/_next/static/chunks/3423-f4b7df2f6f3362f7.js +0 -1
- khoj/interface/compiled/_next/static/chunks/394-6bcb8c429f168f21.js +0 -3
- khoj/interface/compiled/_next/static/chunks/7113-f2e114d7034a0835.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8423-da57554315eebcbe.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8840-b8d7b9f0923c6651.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9417-0d0fc7eb49a86abb.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-75636ab3a413fa8e.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/page-adbf3cd470da248f.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-96fcf62857bf8f30.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-222d348681b848a5.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/layout-7b30c541c05fb904.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/factchecker/page-bded0868a08ac4ba.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-3720f1362310bebb.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/page-210bd54db4841333.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-2df56074e42adaa0.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/page-a21b7e8890ed1209.js +0 -1
- khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +0 -1
- khoj/interface/compiled/_next/static/css/553f9cdcc7a2bcd6.css +0 -1
- khoj/interface/compiled/_next/static/css/a795ee88875f4853.css +0 -25
- khoj/interface/compiled/_next/static/css/afd3d45cc65d55d8.css +0 -1
- khoj/interface/compiled/_next/static/media/0e790e04fd40ad16-s.p.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/4221e1667cd19c7d-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/6c276159aa0eb14b-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/6cc0b9500e4f9168-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/9d9319a7a2ac39c6-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/a75c8ea86756d52d-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/abce7c400ca31a51-s.woff2 +0 -0
- khoj/interface/compiled/_next/static/media/f759c939737fb668-s.woff2 +0 -0
- khoj/interface/compiled/factchecker/index.html +0 -1
- khoj/interface/compiled/factchecker/index.txt +0 -7
- /khoj/interface/compiled/_next/static/{EfnEiWDle86AUcxEdEFgO → t_2jovvUVve0Gvc3FqpT9}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{EfnEiWDle86AUcxEdEFgO → t_2jovvUVve0Gvc3FqpT9}/_ssgManifest.js +0 -0
- {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/entry_points.txt +0 -0
- {khoj-1.28.3.dist-info → khoj-1.28.4.dev92.dist-info}/licenses/LICENSE +0 -0
khoj/routers/research.py
CHANGED
@@ -11,6 +11,7 @@ from khoj.processor.conversation import prompts
|
|
11
11
|
from khoj.processor.conversation.utils import (
|
12
12
|
InformationCollectionIteration,
|
13
13
|
clean_json,
|
14
|
+
construct_chat_history,
|
14
15
|
construct_iteration_history,
|
15
16
|
construct_tool_chat_history,
|
16
17
|
)
|
@@ -19,8 +20,6 @@ from khoj.processor.tools.run_code import run_code
|
|
19
20
|
from khoj.routers.api import extract_references_and_questions
|
20
21
|
from khoj.routers.helpers import (
|
21
22
|
ChatEvent,
|
22
|
-
construct_chat_history,
|
23
|
-
extract_relevant_info,
|
24
23
|
generate_summary_from_files,
|
25
24
|
send_message_to_model_wrapper,
|
26
25
|
)
|
@@ -43,38 +42,36 @@ async def apick_next_tool(
|
|
43
42
|
location: LocationData = None,
|
44
43
|
user_name: str = None,
|
45
44
|
agent: Agent = None,
|
46
|
-
|
45
|
+
previous_iterations: List[InformationCollectionIteration] = [],
|
47
46
|
max_iterations: int = 5,
|
48
47
|
send_status_func: Optional[Callable] = None,
|
49
48
|
tracer: dict = {},
|
49
|
+
query_files: str = None,
|
50
50
|
):
|
51
|
-
"""
|
52
|
-
Given a query, determine which of the available tools the agent should use in order to answer appropriately. One at a time, and it's able to use subsequent iterations to refine the answer.
|
53
|
-
"""
|
51
|
+
"""Given a query, determine which of the available tools the agent should use in order to answer appropriately."""
|
54
52
|
|
53
|
+
# Construct tool options for the agent to choose from
|
55
54
|
tool_options = dict()
|
56
55
|
tool_options_str = ""
|
57
|
-
|
58
56
|
agent_tools = agent.input_tools if agent else []
|
59
|
-
|
60
57
|
for tool, description in function_calling_description_for_llm.items():
|
61
58
|
tool_options[tool.value] = description
|
62
59
|
if len(agent_tools) == 0 or tool.value in agent_tools:
|
63
60
|
tool_options_str += f'- "{tool.value}": "{description}"\n'
|
64
61
|
|
62
|
+
# Construct chat history with user and iteration history with researcher agent for context
|
65
63
|
chat_history = construct_chat_history(conversation_history, agent_name=agent.name if agent else "Khoj")
|
64
|
+
previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
|
66
65
|
|
67
66
|
if query_images:
|
68
67
|
query = f"[placeholder for user attached images]\n{query}"
|
69
68
|
|
69
|
+
today = datetime.today()
|
70
|
+
location_data = f"{location}" if location else "Unknown"
|
70
71
|
personality_context = (
|
71
72
|
prompts.personality_context.format(personality=agent.personality) if agent and agent.personality else ""
|
72
73
|
)
|
73
74
|
|
74
|
-
# Extract Past User Message and Inferred Questions from Conversation Log
|
75
|
-
today = datetime.today()
|
76
|
-
location_data = f"{location}" if location else "Unknown"
|
77
|
-
|
78
75
|
function_planning_prompt = prompts.plan_function_execution.format(
|
79
76
|
tools=tool_options_str,
|
80
77
|
chat_history=chat_history,
|
@@ -87,15 +84,25 @@ async def apick_next_tool(
|
|
87
84
|
max_iterations=max_iterations,
|
88
85
|
)
|
89
86
|
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
87
|
+
try:
|
88
|
+
with timer("Chat actor: Infer information sources to refer", logger):
|
89
|
+
response = await send_message_to_model_wrapper(
|
90
|
+
query=query,
|
91
|
+
context=function_planning_prompt,
|
92
|
+
response_type="json_object",
|
93
|
+
user=user,
|
94
|
+
query_images=query_images,
|
95
|
+
query_files=query_files,
|
96
|
+
tracer=tracer,
|
97
|
+
)
|
98
|
+
except Exception as e:
|
99
|
+
logger.error(f"Failed to infer information sources to refer: {e}", exc_info=True)
|
100
|
+
yield InformationCollectionIteration(
|
101
|
+
tool=None,
|
102
|
+
query=None,
|
103
|
+
warning="Failed to infer information sources to refer. Skipping iteration. Try again.",
|
98
104
|
)
|
105
|
+
return
|
99
106
|
|
100
107
|
try:
|
101
108
|
response = clean_json(response)
|
@@ -103,8 +110,15 @@ async def apick_next_tool(
|
|
103
110
|
selected_tool = response.get("tool", None)
|
104
111
|
generated_query = response.get("query", None)
|
105
112
|
scratchpad = response.get("scratchpad", None)
|
113
|
+
warning = None
|
106
114
|
logger.info(f"Response for determining relevant tools: {response}")
|
107
|
-
|
115
|
+
|
116
|
+
# Detect selection of previously used query, tool combination.
|
117
|
+
previous_tool_query_combinations = {(i.tool, i.query) for i in previous_iterations}
|
118
|
+
if (selected_tool, generated_query) in previous_tool_query_combinations:
|
119
|
+
warning = f"Repeated tool, query combination detected. Skipping iteration. Try something different."
|
120
|
+
# Only send client status updates if we'll execute this iteration
|
121
|
+
elif send_status_func:
|
108
122
|
determined_tool_message = "**Determined Tool**: "
|
109
123
|
determined_tool_message += f"{selected_tool}({generated_query})." if selected_tool else "respond."
|
110
124
|
determined_tool_message += f"\nReason: {scratchpad}" if scratchpad else ""
|
@@ -114,13 +128,14 @@ async def apick_next_tool(
|
|
114
128
|
yield InformationCollectionIteration(
|
115
129
|
tool=selected_tool,
|
116
130
|
query=generated_query,
|
131
|
+
warning=warning,
|
117
132
|
)
|
118
|
-
|
119
133
|
except Exception as e:
|
120
134
|
logger.error(f"Invalid response for determining relevant tools: {response}. {e}", exc_info=True)
|
121
135
|
yield InformationCollectionIteration(
|
122
136
|
tool=None,
|
123
137
|
query=None,
|
138
|
+
warning=f"Invalid response for determining relevant tools: {response}. Skipping iteration. Fix error: {e}",
|
124
139
|
)
|
125
140
|
|
126
141
|
|
@@ -137,6 +152,7 @@ async def execute_information_collection(
|
|
137
152
|
location: LocationData = None,
|
138
153
|
file_filters: List[str] = [],
|
139
154
|
tracer: dict = {},
|
155
|
+
query_files: str = None,
|
140
156
|
):
|
141
157
|
current_iteration = 0
|
142
158
|
MAX_ITERATIONS = 5
|
@@ -147,7 +163,6 @@ async def execute_information_collection(
|
|
147
163
|
document_results: List[Dict[str, str]] = []
|
148
164
|
summarize_files: str = ""
|
149
165
|
this_iteration = InformationCollectionIteration(tool=None, query=query)
|
150
|
-
previous_iterations_history = construct_iteration_history(previous_iterations, prompts.previous_iteration)
|
151
166
|
|
152
167
|
async for result in apick_next_tool(
|
153
168
|
query,
|
@@ -157,19 +172,27 @@ async def execute_information_collection(
|
|
157
172
|
location,
|
158
173
|
user_name,
|
159
174
|
agent,
|
160
|
-
|
175
|
+
previous_iterations,
|
161
176
|
MAX_ITERATIONS,
|
162
177
|
send_status_func,
|
163
178
|
tracer=tracer,
|
179
|
+
query_files=query_files,
|
164
180
|
):
|
165
181
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
166
182
|
yield result[ChatEvent.STATUS]
|
167
183
|
elif isinstance(result, InformationCollectionIteration):
|
168
184
|
this_iteration = result
|
169
185
|
|
170
|
-
if
|
186
|
+
# Skip running iteration if warning present in iteration
|
187
|
+
if this_iteration.warning:
|
188
|
+
logger.warning(f"Research mode: {this_iteration.warning}.")
|
189
|
+
|
190
|
+
elif this_iteration.tool == ConversationCommand.Notes:
|
171
191
|
this_iteration.context = []
|
172
192
|
document_results = []
|
193
|
+
previous_inferred_queries = {
|
194
|
+
c["query"] for iteration in previous_iterations if iteration.context for c in iteration.context
|
195
|
+
}
|
173
196
|
async for result in extract_references_and_questions(
|
174
197
|
request,
|
175
198
|
construct_tool_chat_history(previous_iterations, ConversationCommand.Notes),
|
@@ -181,8 +204,10 @@ async def execute_information_collection(
|
|
181
204
|
location,
|
182
205
|
send_status_func,
|
183
206
|
query_images,
|
207
|
+
previous_inferred_queries=previous_inferred_queries,
|
184
208
|
agent=agent,
|
185
209
|
tracer=tracer,
|
210
|
+
query_files=query_files,
|
186
211
|
):
|
187
212
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
188
213
|
yield result[ChatEvent.STATUS]
|
@@ -204,6 +229,12 @@ async def execute_information_collection(
|
|
204
229
|
logger.error(f"Error extracting document references: {e}", exc_info=True)
|
205
230
|
|
206
231
|
elif this_iteration.tool == ConversationCommand.Online:
|
232
|
+
previous_subqueries = {
|
233
|
+
subquery
|
234
|
+
for iteration in previous_iterations
|
235
|
+
if iteration.onlineContext
|
236
|
+
for subquery in iteration.onlineContext.keys()
|
237
|
+
}
|
207
238
|
async for result in search_online(
|
208
239
|
this_iteration.query,
|
209
240
|
construct_tool_chat_history(previous_iterations, ConversationCommand.Online),
|
@@ -213,11 +244,16 @@ async def execute_information_collection(
|
|
213
244
|
[],
|
214
245
|
max_webpages_to_read=0,
|
215
246
|
query_images=query_images,
|
247
|
+
previous_subqueries=previous_subqueries,
|
216
248
|
agent=agent,
|
217
249
|
tracer=tracer,
|
218
250
|
):
|
219
251
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
220
252
|
yield result[ChatEvent.STATUS]
|
253
|
+
elif is_none_or_empty(result):
|
254
|
+
this_iteration.warning = (
|
255
|
+
"Detected previously run online search queries. Skipping iteration. Try something different."
|
256
|
+
)
|
221
257
|
else:
|
222
258
|
online_results: Dict[str, Dict] = result # type: ignore
|
223
259
|
this_iteration.onlineContext = online_results
|
@@ -233,6 +269,7 @@ async def execute_information_collection(
|
|
233
269
|
query_images=query_images,
|
234
270
|
agent=agent,
|
235
271
|
tracer=tracer,
|
272
|
+
query_files=query_files,
|
236
273
|
):
|
237
274
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
238
275
|
yield result[ChatEvent.STATUS]
|
@@ -263,6 +300,7 @@ async def execute_information_collection(
|
|
263
300
|
send_status_func,
|
264
301
|
query_images=query_images,
|
265
302
|
agent=agent,
|
303
|
+
query_files=query_files,
|
266
304
|
tracer=tracer,
|
267
305
|
):
|
268
306
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
@@ -288,6 +326,7 @@ async def execute_information_collection(
|
|
288
326
|
query_images=query_images,
|
289
327
|
agent=agent,
|
290
328
|
send_status_func=send_status_func,
|
329
|
+
query_files=query_files,
|
291
330
|
):
|
292
331
|
if isinstance(result, dict) and ChatEvent.STATUS in result:
|
293
332
|
yield result[ChatEvent.STATUS]
|
@@ -302,16 +341,19 @@ async def execute_information_collection(
|
|
302
341
|
|
303
342
|
current_iteration += 1
|
304
343
|
|
305
|
-
if document_results or online_results or code_results or summarize_files:
|
306
|
-
results_data = f"
|
344
|
+
if document_results or online_results or code_results or summarize_files or this_iteration.warning:
|
345
|
+
results_data = f"\n<iteration>{current_iteration}\n<tool>{this_iteration.tool}</tool>\n<query>{this_iteration.query}</query>\n<results>"
|
307
346
|
if document_results:
|
308
|
-
results_data += f"
|
347
|
+
results_data += f"\n<document_references>\n{yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</document_references>"
|
309
348
|
if online_results:
|
310
|
-
results_data += f"
|
349
|
+
results_data += f"\n<online_results>\n{yaml.dump(online_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</online_results>"
|
311
350
|
if code_results:
|
312
|
-
results_data += f"
|
351
|
+
results_data += f"\n<code_results>\n{yaml.dump(code_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</code_results>"
|
313
352
|
if summarize_files:
|
314
|
-
results_data += f"
|
353
|
+
results_data += f"\n<summarized_files>\n{yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n</summarized_files>"
|
354
|
+
if this_iteration.warning:
|
355
|
+
results_data += f"\n<warning>\n{this_iteration.warning}\n</warning>"
|
356
|
+
results_data += "\n</results>\n</iteration>"
|
315
357
|
|
316
358
|
# intermediate_result = await extract_relevant_info(this_iteration.query, results_data, agent)
|
317
359
|
this_iteration.summarizedResult = results_data
|
khoj/routers/web_client.py
CHANGED
@@ -51,16 +51,6 @@ def chat_page(request: Request):
|
|
51
51
|
)
|
52
52
|
|
53
53
|
|
54
|
-
@web_client.get("/factchecker", response_class=FileResponse)
|
55
|
-
def fact_checker_page(request: Request):
|
56
|
-
return templates.TemplateResponse(
|
57
|
-
"factchecker/index.html",
|
58
|
-
context={
|
59
|
-
"request": request,
|
60
|
-
},
|
61
|
-
)
|
62
|
-
|
63
|
-
|
64
54
|
@web_client.get("/login", response_class=FileResponse)
|
65
55
|
def login_page(request: Request):
|
66
56
|
next_url = get_next_url(request)
|
khoj/search_type/text_search.py
CHANGED
@@ -8,11 +8,7 @@ import torch
|
|
8
8
|
from asgiref.sync import sync_to_async
|
9
9
|
from sentence_transformers import util
|
10
10
|
|
11
|
-
from khoj.database.adapters import
|
12
|
-
EntryAdapters,
|
13
|
-
get_default_search_model,
|
14
|
-
get_user_default_search_model,
|
15
|
-
)
|
11
|
+
from khoj.database.adapters import EntryAdapters, get_default_search_model
|
16
12
|
from khoj.database.models import Agent
|
17
13
|
from khoj.database.models import Entry as DbEntry
|
18
14
|
from khoj.database.models import KhojUser
|
@@ -114,7 +110,7 @@ async def query(
|
|
114
110
|
file_type = search_type_to_embeddings_type[type.value]
|
115
111
|
|
116
112
|
query = raw_query
|
117
|
-
search_model = await sync_to_async(
|
113
|
+
search_model = await sync_to_async(get_default_search_model)()
|
118
114
|
if not max_distance:
|
119
115
|
if search_model.bi_encoder_confidence_threshold:
|
120
116
|
max_distance = search_model.bi_encoder_confidence_threshold
|
@@ -212,7 +208,7 @@ def setup(
|
|
212
208
|
text_to_entries: Type[TextToEntries],
|
213
209
|
files: dict[str, str],
|
214
210
|
regenerate: bool,
|
215
|
-
user: KhojUser
|
211
|
+
user: KhojUser,
|
216
212
|
config=None,
|
217
213
|
) -> Tuple[int, int]:
|
218
214
|
if config:
|
khoj/utils/cli.py
CHANGED
@@ -16,7 +16,7 @@ from khoj.migrations.migrate_processor_config_openai import (
|
|
16
16
|
)
|
17
17
|
from khoj.migrations.migrate_server_pg import migrate_server_pg
|
18
18
|
from khoj.migrations.migrate_version import migrate_config_to_version
|
19
|
-
from khoj.utils.helpers import in_debug_mode, resolve_absolute_path
|
19
|
+
from khoj.utils.helpers import in_debug_mode, is_env_var_true, resolve_absolute_path
|
20
20
|
from khoj.utils.yaml import parse_config_from_file
|
21
21
|
|
22
22
|
|
@@ -79,7 +79,7 @@ def cli(args=None):
|
|
79
79
|
else:
|
80
80
|
args = run_migrations(args)
|
81
81
|
args.config = parse_config_from_file(args.config_file)
|
82
|
-
if in_debug_mode():
|
82
|
+
if is_env_var_true("KHOJ_TELEMETRY_DISABLE") or in_debug_mode():
|
83
83
|
args.config.app.should_log_telemetry = False
|
84
84
|
|
85
85
|
return args
|
khoj/utils/fs_syncer.py
CHANGED
@@ -8,6 +8,7 @@ from bs4 import BeautifulSoup
|
|
8
8
|
from magika import Magika
|
9
9
|
|
10
10
|
from khoj.database.models import (
|
11
|
+
KhojUser,
|
11
12
|
LocalMarkdownConfig,
|
12
13
|
LocalOrgConfig,
|
13
14
|
LocalPdfConfig,
|
@@ -21,7 +22,7 @@ logger = logging.getLogger(__name__)
|
|
21
22
|
magika = Magika()
|
22
23
|
|
23
24
|
|
24
|
-
def collect_files(search_type: Optional[SearchType] = SearchType.All
|
25
|
+
def collect_files(user: KhojUser, search_type: Optional[SearchType] = SearchType.All) -> dict:
|
25
26
|
files: dict[str, dict] = {"docx": {}, "image": {}}
|
26
27
|
|
27
28
|
if search_type == SearchType.All or search_type == SearchType.Org:
|
khoj/utils/helpers.py
CHANGED
@@ -254,8 +254,10 @@ def get_server_id():
|
|
254
254
|
return server_id
|
255
255
|
|
256
256
|
|
257
|
-
def telemetry_disabled(app_config: AppConfig):
|
258
|
-
return
|
257
|
+
def telemetry_disabled(app_config: AppConfig, telemetry_disable_env) -> bool:
|
258
|
+
return (
|
259
|
+
not app_config.should_log_telemetry if app_config and app_config.should_log_telemetry else telemetry_disable_env
|
260
|
+
)
|
259
261
|
|
260
262
|
|
261
263
|
def log_telemetry(
|
@@ -263,11 +265,12 @@ def log_telemetry(
|
|
263
265
|
api: str = None,
|
264
266
|
client: Optional[str] = None,
|
265
267
|
app_config: Optional[AppConfig] = None,
|
268
|
+
disable_telemetry_env: bool = False,
|
266
269
|
properties: dict = None,
|
267
270
|
):
|
268
271
|
"""Log basic app usage telemetry like client, os, api called"""
|
269
272
|
# Do not log usage telemetry, if telemetry is disabled via app config
|
270
|
-
if telemetry_disabled(app_config):
|
273
|
+
if telemetry_disabled(app_config, disable_telemetry_env):
|
271
274
|
return []
|
272
275
|
|
273
276
|
if properties.get("server_id") is None:
|
khoj/utils/rawconfig.py
CHANGED
@@ -138,6 +138,38 @@ class SearchResponse(ConfigBase):
|
|
138
138
|
corpus_id: str
|
139
139
|
|
140
140
|
|
141
|
+
class FileData(BaseModel):
|
142
|
+
name: str
|
143
|
+
content: bytes
|
144
|
+
file_type: str
|
145
|
+
encoding: str | None = None
|
146
|
+
|
147
|
+
|
148
|
+
class FileAttachment(BaseModel):
|
149
|
+
name: str
|
150
|
+
content: str
|
151
|
+
file_type: str
|
152
|
+
size: int
|
153
|
+
|
154
|
+
|
155
|
+
class ChatRequestBody(BaseModel):
|
156
|
+
q: str
|
157
|
+
n: Optional[int] = 7
|
158
|
+
d: Optional[float] = None
|
159
|
+
stream: Optional[bool] = False
|
160
|
+
title: Optional[str] = None
|
161
|
+
conversation_id: Optional[str] = None
|
162
|
+
turn_id: Optional[str] = None
|
163
|
+
city: Optional[str] = None
|
164
|
+
region: Optional[str] = None
|
165
|
+
country: Optional[str] = None
|
166
|
+
country_code: Optional[str] = None
|
167
|
+
timezone: Optional[str] = None
|
168
|
+
images: Optional[list[str]] = None
|
169
|
+
files: Optional[list[FileAttachment]] = []
|
170
|
+
create_new: Optional[bool] = False
|
171
|
+
|
172
|
+
|
141
173
|
class Entry:
|
142
174
|
raw: str
|
143
175
|
compiled: str
|
khoj/utils/state.py
CHANGED
@@ -12,7 +12,7 @@ from khoj.database.models import ProcessLock
|
|
12
12
|
from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel
|
13
13
|
from khoj.utils import config as utils_config
|
14
14
|
from khoj.utils.config import OfflineChatProcessorModel, SearchModels
|
15
|
-
from khoj.utils.helpers import LRU, get_device
|
15
|
+
from khoj.utils.helpers import LRU, get_device, is_env_var_true
|
16
16
|
from khoj.utils.rawconfig import FullConfig
|
17
17
|
|
18
18
|
# Application Global State
|
@@ -34,6 +34,7 @@ SearchType = utils_config.SearchType
|
|
34
34
|
scheduler: BackgroundScheduler = None
|
35
35
|
schedule_leader_process_lock: ProcessLock = None
|
36
36
|
telemetry: List[Dict[str, str]] = []
|
37
|
+
telemetry_disabled: bool = is_env_var_true("KHOJ_TELEMETRY_DISABLE")
|
37
38
|
khoj_version: str = None
|
38
39
|
device = get_device()
|
39
40
|
chat_on_gpu: bool = True
|
@@ -1,13 +1,11 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: khoj
|
3
|
-
Version: 1.28.
|
3
|
+
Version: 1.28.4.dev92
|
4
4
|
Summary: Your Second Brain
|
5
5
|
Project-URL: Homepage, https://khoj.dev
|
6
6
|
Project-URL: Documentation, https://docs.khoj.dev
|
7
7
|
Project-URL: Code, https://github.com/khoj-ai/khoj
|
8
8
|
Author: Debanjum Singh Solanky, Saba Imran
|
9
|
-
License-Expression: AGPL-3.0-or-later
|
10
|
-
License-File: LICENSE
|
11
9
|
Keywords: AI,NLP,images,markdown,org-mode,pdf,productivity,search,semantic-search
|
12
10
|
Classifier: Development Status :: 5 - Production/Stable
|
13
11
|
Classifier: Intended Audience :: Information Technology
|
@@ -76,12 +74,14 @@ Requires-Dist: websockets==12.0
|
|
76
74
|
Provides-Extra: dev
|
77
75
|
Requires-Dist: black>=23.1.0; extra == 'dev'
|
78
76
|
Requires-Dist: boto3>=1.34.57; extra == 'dev'
|
77
|
+
Requires-Dist: datasets; extra == 'dev'
|
79
78
|
Requires-Dist: factory-boy>=3.2.1; extra == 'dev'
|
80
79
|
Requires-Dist: freezegun>=1.2.0; extra == 'dev'
|
81
80
|
Requires-Dist: gitpython~=3.1.43; extra == 'dev'
|
82
81
|
Requires-Dist: google-auth==2.23.3; extra == 'dev'
|
83
82
|
Requires-Dist: gunicorn==22.0.0; extra == 'dev'
|
84
83
|
Requires-Dist: mypy>=1.0.1; extra == 'dev'
|
84
|
+
Requires-Dist: pandas; extra == 'dev'
|
85
85
|
Requires-Dist: pre-commit>=3.0.4; extra == 'dev'
|
86
86
|
Requires-Dist: pytest-asyncio==0.21.1; extra == 'dev'
|
87
87
|
Requires-Dist: pytest-django==4.5.2; extra == 'dev'
|