khoj 2.0.0b13.dev19__py3-none-any.whl → 2.0.0b14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/admin.py +2 -2
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/{2327-fe87dd989d71d0eb.js → 2327-438aaec1657c5ada.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{3260-43d3019b92c315bb.js → 3260-82d2521fab032ff1.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/9808-c0742b05e1ef29ba.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-0114c87d7ccf6d9b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/layout-8639ff99d6c2fec6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-198b26df6e09bbb0.js → page-1047097af99d31c7.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-2ff3e18a6feae92a.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/page-ac7ed0a1aff1b145.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/layout-78dd7cdd97510485.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-8addeb8079c3215b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-e0dcb1762f8c8f88.js → page-819c6536c15e3d31.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-d60b0c57a6c38d0f.js → webpack-5393aad3d824e0cb.js} +1 -1
- khoj/interface/compiled/_next/static/css/5c7a72bad47e50b3.css +25 -0
- khoj/interface/compiled/_next/static/css/821d0d60b0b6871d.css +1 -0
- khoj/interface/compiled/_next/static/css/ecea704005ba630c.css +1 -0
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +3 -3
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +5 -4
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +4 -4
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/google/gemini_chat.py +1 -1
- khoj/processor/conversation/google/utils.py +62 -19
- khoj/processor/conversation/openai/gpt.py +65 -28
- khoj/processor/conversation/openai/utils.py +401 -28
- khoj/processor/conversation/prompts.py +48 -30
- khoj/processor/conversation/utils.py +5 -1
- khoj/processor/tools/run_code.py +15 -22
- khoj/routers/api_chat.py +8 -3
- khoj/routers/api_content.py +1 -1
- khoj/routers/helpers.py +62 -42
- khoj/routers/research.py +7 -5
- khoj/utils/constants.py +9 -1
- khoj/utils/helpers.py +55 -15
- {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b14.dist-info}/METADATA +1 -1
- {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b14.dist-info}/RECORD +58 -58
- khoj/interface/compiled/_next/static/chunks/7127-97b83757db125ba6.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-4e2a134ec26aa606.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/layout-63603d2cb33279f7.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-ad4d1792ab1a4108.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/page-9a75d7369f2a7cd2.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +0 -1
- khoj/interface/compiled/_next/static/css/23b26df423cd8a9c.css +0 -1
- khoj/interface/compiled/_next/static/css/2945c4a857922f3b.css +0 -1
- khoj/interface/compiled/_next/static/css/3090706713c12a32.css +0 -25
- /khoj/interface/compiled/_next/static/{N-GdBSXoYe-DuObnbXVRO → Qn_2XyeVWxjaIRks7rzM-}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{N-GdBSXoYe-DuObnbXVRO → Qn_2XyeVWxjaIRks7rzM-}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1327-511bb0a862efce80.js → 1327-e254819a9172cfa7.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1915-fbfe167c84ad60c5.js → 1915-5c6508f6ebb62a30.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2117-e78b6902ad6f75ec.js → 2117-080746c8e170c81a.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2939-4d4084c5b888b960.js → 2939-4af3fd24b8ffc9ad.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{4447-d6cf93724d57e34b.js → 4447-cd95608f8e93e711.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{8667-4b7790573b08c50d.js → 8667-50b03a89e82e0ba7.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9139-ce1ae935dac9c871.js → 9139-8ac4d9feb10f8869.js} +0 -0
- {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b14.dist-info}/WHEEL +0 -0
- {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b14.dist-info}/entry_points.txt +0 -0
- {khoj-2.0.0b13.dev19.dist-info → khoj-2.0.0b14.dist-info}/licenses/LICENSE +0 -0
@@ -4,21 +4,27 @@ from langchain_core.prompts import PromptTemplate
|
|
4
4
|
## --
|
5
5
|
personality = PromptTemplate.from_template(
|
6
6
|
"""
|
7
|
-
You are Khoj, a smart,
|
7
|
+
You are Khoj, a smart, curious, empathetic and helpful personal assistant.
|
8
8
|
Use your general knowledge and past conversation with the user as context to inform your responses.
|
9
|
-
You were created by Khoj Inc. with the following capabilities:
|
10
9
|
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
You were created by Khoj Inc. More information about you, the company or Khoj apps can be found at https://khoj.dev.
|
11
|
+
|
12
|
+
Today is {day_of_week}, {current_date} in UTC.
|
13
|
+
|
14
|
+
# Capabilities
|
15
|
+
- Users can share files and other information with you using the Khoj Web, Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window.
|
16
|
+
- You can look up information from the user's notes and documents synced via the Khoj apps.
|
17
|
+
- You can generate images, look-up real-time information from the internet, analyze data and answer questions based on the user's notes.
|
18
|
+
|
19
|
+
# Style
|
20
|
+
- Your responses should be helpful, conversational and tuned to the user's communication style.
|
14
21
|
- Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following
|
15
22
|
- inline math mode : \\( and \\)
|
16
23
|
- display math mode: insert linebreak after opening $$, \\[ and before closing $$, \\]
|
17
|
-
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
Today is {day_of_week}, {current_date} in UTC.
|
24
|
+
- Provide inline citations to documents and websites referenced. Add them inline in markdown format to directly support your claim.
|
25
|
+
For example: "The weather today is sunny [1](https://weather.com)."
|
26
|
+
- Mention generated assets like images by reference, e.g . Do not manually output raw, b64 encoded bytes in your response.
|
27
|
+
- Do not respond with raw programs or scripts in your final response unless you know the user is a programmer or has explicitly requested code.
|
22
28
|
""".strip()
|
23
29
|
)
|
24
30
|
|
@@ -26,18 +32,23 @@ custom_personality = PromptTemplate.from_template(
|
|
26
32
|
"""
|
27
33
|
You are {name}, a personal agent on Khoj.
|
28
34
|
Use your general knowledge and past conversation with the user as context to inform your responses.
|
29
|
-
You were created by Khoj Inc. with the following capabilities:
|
30
35
|
|
31
|
-
|
32
|
-
|
36
|
+
You were created on the Khoj platform. More information about you, the company or Khoj apps can be found at https://khoj.dev.
|
37
|
+
|
38
|
+
Today is {day_of_week}, {current_date} in UTC.
|
39
|
+
|
40
|
+
# Base Capabilities
|
41
|
+
- Users can share files and other information with you using the Khoj Web, Desktop, Obsidian or Emacs app. They can also drag and drop their files into the chat window.
|
42
|
+
|
43
|
+
# Style
|
33
44
|
- Make sure to use the specific LaTeX math mode delimiters for your response. LaTex math mode specific delimiters as following
|
34
45
|
- inline math mode : `\\(` and `\\)`
|
35
46
|
- display math mode: insert linebreak after opening `$$`, `\\[` and before closing `$$`, `\\]`
|
36
|
-
-
|
37
|
-
|
38
|
-
|
47
|
+
- Provide inline citations to documents and websites referenced. Add them inline in markdown format to directly support your claim.
|
48
|
+
For example: "The weather today is sunny [1](https://weather.com)."
|
49
|
+
- Mention generated assets like images by reference, e.g . Do not manually output raw, b64 encoded bytes in your response.
|
39
50
|
|
40
|
-
Instructions:\n{bio}
|
51
|
+
# Instructions:\n{bio}
|
41
52
|
""".strip()
|
42
53
|
)
|
43
54
|
|
@@ -519,12 +530,13 @@ Q: {query}
|
|
519
530
|
|
520
531
|
extract_questions_system_prompt = PromptTemplate.from_template(
|
521
532
|
"""
|
522
|
-
You are Khoj, an extremely smart and helpful document search assistant with only the ability to retrieve information from the user's notes.
|
523
|
-
Construct search queries to retrieve relevant information to answer the user's question.
|
533
|
+
You are Khoj, an extremely smart and helpful document search assistant with only the ability to use natural language semantic search to retrieve information from the user's notes.
|
534
|
+
Construct upto {max_queries} search queries to retrieve relevant information to answer the user's question.
|
524
535
|
- You will be provided past questions(User), search queries(Assistant) and answers(A) for context.
|
525
|
-
-
|
526
|
-
- Break your search
|
527
|
-
- Add date filters to your search queries
|
536
|
+
- You can use context from previous questions and answers to improve your search queries.
|
537
|
+
- Break down your search into multiple search queries from a diverse set of lenses to retrieve all related documents. E.g who, what, where, when, why, how.
|
538
|
+
- Add date filters to your search queries when required to retrieve the relevant information. This is the only structured query filter you can use.
|
539
|
+
- Output 1 concept per query. Do not use boolean operators (OR/AND) to combine queries. They do not work and degrade search quality.
|
528
540
|
- When asked a meta, vague or random questions, search for a variety of broad topics to answer the user's question.
|
529
541
|
{personality_context}
|
530
542
|
What searches will you perform to answer the users question? Respond with a JSON object with the key "queries" mapping to a list of searches you would perform on the user's knowledge base. Just return the queries and nothing else.
|
@@ -535,22 +547,27 @@ User's Location: {location}
|
|
535
547
|
|
536
548
|
Here are some examples of how you can construct search queries to answer the user's question:
|
537
549
|
|
550
|
+
Illustrate - Using diverse perspectives to retrieve all relevant documents
|
538
551
|
User: How was my trip to Cambodia?
|
539
552
|
Assistant: {{"queries": ["How was my trip to Cambodia?", "Angkor Wat temple visit", "Flight to Phnom Penh", "Expenses in Cambodia", "Stay in Cambodia"]}}
|
540
553
|
A: The trip was amazing. You went to the Angkor Wat temple and it was beautiful.
|
541
554
|
|
555
|
+
Illustrate - Combining date filters with natural language queries to retrieve documents in relevant date range
|
542
556
|
User: What national parks did I go to last year?
|
543
557
|
Assistant: {{"queries": ["National park I visited in {last_new_year} dt>='{last_new_year_date}' dt<'{current_new_year_date}'"]}}
|
544
558
|
A: You visited the Grand Canyon and Yellowstone National Park in {last_new_year}.
|
545
559
|
|
560
|
+
Illustrate - Using broad topics to answer meta or vague questions
|
546
561
|
User: How can you help me?
|
547
562
|
Assistant: {{"queries": ["Social relationships", "Physical and mental health", "Education and career", "Personal life goals and habits"]}}
|
548
563
|
A: I can help you live healthier and happier across work and personal life
|
549
564
|
|
565
|
+
Illustrate - Combining location and date in natural language queries with date filters to retrieve relevant documents
|
550
566
|
User: Who all did I meet here yesterday?
|
551
567
|
Assistant: {{"queries": ["Met in {location} on {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]}}
|
552
568
|
A: Yesterday's note mentions your visit to your local beach with Ram and Shyam.
|
553
569
|
|
570
|
+
Illustrate - Combining broad, diverse topics with date filters to answer meta or vague questions
|
554
571
|
User: Share some random, interesting experiences from this month
|
555
572
|
Assistant: {{"queries": ["Exciting travel adventures from {current_month}", "Fun social events dt>='{current_month}-01' dt<'{current_date}'", "Intense emotional experiences in {current_month}"]}}
|
556
573
|
A: You had a great time at the local beach with your friends, attended a music concert and had a deep conversation with your friend, Khalid.
|
@@ -635,16 +652,17 @@ Here's some additional context about you:
|
|
635
652
|
|
636
653
|
plan_function_execution = PromptTemplate.from_template(
|
637
654
|
"""
|
638
|
-
You are Khoj, a smart, creative and meticulous researcher.
|
655
|
+
You are Khoj, a smart, creative and meticulous researcher.
|
639
656
|
Create a multi-step plan and intelligently iterate on the plan to complete the task.
|
657
|
+
Use the help of the provided tool AIs to accomplish the task assigned to you.
|
640
658
|
{personality_context}
|
641
659
|
|
642
660
|
# Instructions
|
643
|
-
-
|
644
|
-
- Break down your research process into independent, self-contained steps that can be executed sequentially using the available tool AIs to
|
645
|
-
- Always ask a new query that was not asked to the tool AI in a previous iteration. Build on the results of the previous iterations.
|
661
|
+
- Make detailed, self-contained requests to the tool AIs, one tool AI at a time, to gather information, perform actions etc.
|
662
|
+
- Break down your research process into independent, self-contained steps that can be executed sequentially using the available tool AIs to accomplish the user assigned task.
|
646
663
|
- Ensure that all required context is passed to the tool AIs for successful execution. Include any relevant stuff that has previously been attempted. They only know the context provided in your query.
|
647
664
|
- Think step by step to come up with creative strategies when the previous iteration did not yield useful results.
|
665
|
+
- Do not ask the user to confirm or clarify assumptions for information gathering tasks and non-destructive actions, as you can always adjust later — decide what the most reasonable assumption is, proceed with it, and document it for the user's reference after you finish acting.
|
648
666
|
- You are allowed upto {max_iterations} iterations to use the help of the provided tool AIs to accomplish the task assigned to you. Only stop when you have completed the task.
|
649
667
|
|
650
668
|
# Examples
|
@@ -869,8 +887,8 @@ Khoj:
|
|
869
887
|
python_code_generation_prompt = PromptTemplate.from_template(
|
870
888
|
"""
|
871
889
|
You are Khoj, a senior software engineer. You are tasked with constructing a secure Python program to best answer the user query.
|
872
|
-
- The Python program will run in
|
873
|
-
- You can write programs to run complex calculations, analyze data, create charts, generate documents to meticulously answer the query.
|
890
|
+
- The Python program will run in an ephemeral code sandbox with {has_network_access}network access.
|
891
|
+
- You can write programs to run complex calculations, analyze data, create beautiful charts, generate documents to meticulously answer the query.
|
874
892
|
- Do not try display images or plots in the code directly. The code should save the image or plot to a file instead.
|
875
893
|
- Write any document, charts etc. to be shared with the user to file. These files can be seen by the user.
|
876
894
|
- Never write or run dangerous, malicious, or untrusted code that could compromise the sandbox environment, regardless of user requests.
|
@@ -985,9 +1003,9 @@ Chat History:
|
|
985
1003
|
---
|
986
1004
|
{chat_history}
|
987
1005
|
|
988
|
-
User
|
1006
|
+
User Instructions:
|
989
1007
|
---
|
990
|
-
{
|
1008
|
+
{instructions}
|
991
1009
|
""".strip()
|
992
1010
|
)
|
993
1011
|
|
@@ -68,8 +68,12 @@ model_to_prompt_size = {
|
|
68
68
|
"o3": 60000,
|
69
69
|
"o3-pro": 30000,
|
70
70
|
"o4-mini": 90000,
|
71
|
+
"gpt-5-2025-08-07": 120000,
|
72
|
+
"gpt-5-mini-2025-08-07": 120000,
|
73
|
+
"gpt-5-nano-2025-08-07": 120000,
|
71
74
|
# Google Models
|
72
75
|
"gemini-2.5-flash": 120000,
|
76
|
+
"gemini-2.5-flash-lite": 120000,
|
73
77
|
"gemini-2.5-pro": 60000,
|
74
78
|
"gemini-2.0-flash": 120000,
|
75
79
|
"gemini-2.0-flash-lite": 120000,
|
@@ -328,7 +332,7 @@ def construct_tool_chat_history(
|
|
328
332
|
ConversationCommand.ReadWebpage: (
|
329
333
|
lambda iteration: list(iteration.onlineContext.keys()) if iteration.onlineContext else []
|
330
334
|
),
|
331
|
-
ConversationCommand.
|
335
|
+
ConversationCommand.PythonCoder: (
|
332
336
|
lambda iteration: list(iteration.codeContext.keys()) if iteration.codeContext else []
|
333
337
|
),
|
334
338
|
}
|
khoj/processor/tools/run_code.py
CHANGED
@@ -49,7 +49,7 @@ class GeneratedCode(NamedTuple):
|
|
49
49
|
|
50
50
|
|
51
51
|
async def run_code(
|
52
|
-
|
52
|
+
instructions: str,
|
53
53
|
conversation_history: List[ChatMessageModel],
|
54
54
|
context: str,
|
55
55
|
location_data: LocationData,
|
@@ -63,12 +63,12 @@ async def run_code(
|
|
63
63
|
):
|
64
64
|
# Generate Code
|
65
65
|
if send_status_func:
|
66
|
-
async for event in send_status_func(f"**Generate code snippet** for {
|
66
|
+
async for event in send_status_func(f"**Generate code snippet** for {instructions}"):
|
67
67
|
yield {ChatEvent.STATUS: event}
|
68
68
|
try:
|
69
69
|
with timer("Chat actor: Generate programs to execute", logger):
|
70
70
|
generated_code = await generate_python_code(
|
71
|
-
|
71
|
+
instructions,
|
72
72
|
conversation_history,
|
73
73
|
context,
|
74
74
|
location_data,
|
@@ -79,7 +79,7 @@ async def run_code(
|
|
79
79
|
query_files,
|
80
80
|
)
|
81
81
|
except Exception as e:
|
82
|
-
raise ValueError(f"Failed to generate code for {
|
82
|
+
raise ValueError(f"Failed to generate code for {instructions} with error: {e}")
|
83
83
|
|
84
84
|
# Prepare Input Data
|
85
85
|
input_data = []
|
@@ -101,21 +101,21 @@ async def run_code(
|
|
101
101
|
code = result.pop("code")
|
102
102
|
cleaned_result = truncate_code_context({"cleaned": {"results": result}})["cleaned"]["results"]
|
103
103
|
logger.info(f"Executed Code\n----\n{code}\n----\nResult\n----\n{cleaned_result}\n----")
|
104
|
-
yield {
|
104
|
+
yield {instructions: {"code": code, "results": result}}
|
105
105
|
except asyncio.TimeoutError as e:
|
106
106
|
# Call the sandbox_url/stop GET API endpoint to stop the code sandbox
|
107
|
-
error = f"Failed to run code for {
|
107
|
+
error = f"Failed to run code for {instructions} with Timeout error: {e}"
|
108
108
|
try:
|
109
109
|
await aiohttp.ClientSession().get(f"{sandbox_url}/stop", timeout=5)
|
110
110
|
except Exception as e:
|
111
111
|
error += f"\n\nFailed to stop code sandbox with error: {e}"
|
112
112
|
raise ValueError(error)
|
113
113
|
except Exception as e:
|
114
|
-
raise ValueError(f"Failed to run code for {
|
114
|
+
raise ValueError(f"Failed to run code for {instructions} with error: {e}")
|
115
115
|
|
116
116
|
|
117
117
|
async def generate_python_code(
|
118
|
-
|
118
|
+
instructions: str,
|
119
119
|
chat_history: List[ChatMessageModel],
|
120
120
|
context: str,
|
121
121
|
location_data: LocationData,
|
@@ -142,7 +142,7 @@ async def generate_python_code(
|
|
142
142
|
network_access_context = "**NO** " if not is_e2b_code_sandbox_enabled() else ""
|
143
143
|
|
144
144
|
code_generation_prompt = prompts.python_code_generation_prompt.format(
|
145
|
-
|
145
|
+
instructions=instructions,
|
146
146
|
chat_history=chat_history_str,
|
147
147
|
context=context,
|
148
148
|
has_network_access=network_access_context,
|
@@ -252,8 +252,12 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
|
|
252
252
|
|
253
253
|
# Identify new files created during execution
|
254
254
|
new_files = set(E2bFile(f.name, f.path) for f in await sandbox.files.list("~")) - original_files
|
255
|
+
|
255
256
|
# Read newly created files in parallel
|
256
|
-
|
257
|
+
def read_format(f):
|
258
|
+
return "bytes" if Path(f.name).suffix in image_file_ext else "text"
|
259
|
+
|
260
|
+
download_tasks = [sandbox.files.read(f.path, format=read_format(f), request_timeout=30) for f in new_files]
|
257
261
|
downloaded_files = await asyncio.gather(*download_tasks)
|
258
262
|
for f, content in zip(new_files, downloaded_files):
|
259
263
|
if isinstance(content, bytes):
|
@@ -261,23 +265,12 @@ async def execute_e2b(code: str, input_files: list[dict]) -> dict[str, Any]:
|
|
261
265
|
b64_data = base64.b64encode(content).decode("utf-8")
|
262
266
|
elif Path(f.name).suffix in image_file_ext:
|
263
267
|
# Ignore image files as they are extracted from execution results below for inline display
|
264
|
-
|
268
|
+
b64_data = base64.b64encode(content).decode("utf-8")
|
265
269
|
else:
|
266
270
|
# Text files - encode utf-8 string as base64
|
267
271
|
b64_data = content
|
268
272
|
output_files.append({"filename": f.name, "b64_data": b64_data})
|
269
273
|
|
270
|
-
# Collect output files from execution results
|
271
|
-
# Repect ordering of output result types to disregard text output associated with images
|
272
|
-
output_result_types = ["png", "jpeg", "svg", "text", "markdown", "json"]
|
273
|
-
for idx, result in enumerate(execution.results):
|
274
|
-
if getattr(result, "chart", None):
|
275
|
-
continue
|
276
|
-
for result_type in output_result_types:
|
277
|
-
if b64_data := getattr(result, result_type, None):
|
278
|
-
output_files.append({"filename": f"{idx}.{result_type}", "b64_data": b64_data})
|
279
|
-
break
|
280
|
-
|
281
274
|
# collect logs
|
282
275
|
success = not execution.error and not execution.logs.stderr
|
283
276
|
stdout = "\n".join(execution.logs.stdout)
|
khoj/routers/api_chat.py
CHANGED
@@ -786,6 +786,9 @@ async def event_generator(
|
|
786
786
|
if interrupt_query == ChatEvent.END_EVENT.value:
|
787
787
|
cancellation_event.set()
|
788
788
|
logger.debug(f"Chat cancelled by user {user} via interrupt queue.")
|
789
|
+
elif interrupt_query == ChatEvent.INTERRUPT.value:
|
790
|
+
cancellation_event.set()
|
791
|
+
logger.debug("Chat interrupted.")
|
789
792
|
else:
|
790
793
|
# Pass the interrupt query to child tasks
|
791
794
|
logger.info(f"Continuing chat with the new instruction: {interrupt_query}")
|
@@ -995,7 +998,7 @@ async def event_generator(
|
|
995
998
|
)
|
996
999
|
except ValueError as e:
|
997
1000
|
logger.error(f"Error getting data sources and output format: {e}. Falling back to default.")
|
998
|
-
|
1001
|
+
chosen_io = {"sources": [ConversationCommand.General], "output": ConversationCommand.Text}
|
999
1002
|
|
1000
1003
|
conversation_commands = chosen_io.get("sources") + [chosen_io.get("output")]
|
1001
1004
|
|
@@ -1523,6 +1526,8 @@ async def chat_ws(
|
|
1523
1526
|
ack_type = "interrupt_acknowledged"
|
1524
1527
|
await websocket.send_text(json.dumps({"type": ack_type}))
|
1525
1528
|
else:
|
1529
|
+
ack_type = "interrupt_acknowledged"
|
1530
|
+
await websocket.send_text(json.dumps({"type": ack_type}))
|
1526
1531
|
logger.info(f"No ongoing task to interrupt for user {websocket.scope['user'].object.id}")
|
1527
1532
|
continue
|
1528
1533
|
|
@@ -1556,7 +1561,7 @@ async def chat_ws(
|
|
1556
1561
|
except WebSocketDisconnect:
|
1557
1562
|
logger.info(f"WebSocket disconnected for user {websocket.scope['user'].object.id}")
|
1558
1563
|
if current_task and not current_task.done():
|
1559
|
-
|
1564
|
+
interrupt_queue.put_nowait(ChatEvent.INTERRUPT.value)
|
1560
1565
|
except Exception as e:
|
1561
1566
|
logger.error(f"Error in websocket chat: {e}", exc_info=True)
|
1562
1567
|
if current_task and not current_task.done():
|
@@ -1701,8 +1706,8 @@ async def process_chat_request(
|
|
1701
1706
|
logger.debug(f"Chat request cancelled for user {websocket.scope['user'].object.id}")
|
1702
1707
|
raise
|
1703
1708
|
except Exception as e:
|
1704
|
-
logger.error(f"Error processing chat request: {e}", exc_info=True)
|
1705
1709
|
await websocket.send_text(json.dumps({"error": "Internal server error"}))
|
1710
|
+
logger.error(f"Error processing chat request: {e}", exc_info=True)
|
1706
1711
|
raise
|
1707
1712
|
|
1708
1713
|
|
khoj/routers/api_content.py
CHANGED
@@ -570,7 +570,7 @@ async def indexer(
|
|
570
570
|
file_data.content.decode(file_data.encoding) if file_data.encoding else file_data.content
|
571
571
|
)
|
572
572
|
else:
|
573
|
-
logger.
|
573
|
+
logger.debug(f"Skipped indexing unsupported file type sent by {client} client: {file_data.name}")
|
574
574
|
|
575
575
|
indexer_input = IndexerInput(
|
576
576
|
org=index_files["org"],
|
khoj/routers/helpers.py
CHANGED
@@ -1264,6 +1264,7 @@ async def extract_questions(
|
|
1264
1264
|
location_data: LocationData = None,
|
1265
1265
|
query_images: Optional[List[str]] = None,
|
1266
1266
|
query_files: str = None,
|
1267
|
+
max_queries: int = 5,
|
1267
1268
|
tracer: dict = {},
|
1268
1269
|
):
|
1269
1270
|
"""
|
@@ -1293,14 +1294,20 @@ async def extract_questions(
|
|
1293
1294
|
location=location,
|
1294
1295
|
username=username,
|
1295
1296
|
personality_context=personality_context,
|
1297
|
+
max_queries=max_queries,
|
1296
1298
|
)
|
1297
1299
|
|
1298
1300
|
prompt = prompts.extract_questions_user_message.format(text=query, chat_history=chat_history_str)
|
1299
1301
|
|
1300
1302
|
class DocumentQueries(BaseModel):
|
1301
|
-
"""Choose
|
1303
|
+
"""Choose semantic search queries to run on user documents."""
|
1302
1304
|
|
1303
|
-
queries: List[str] = Field(
|
1305
|
+
queries: List[str] = Field(
|
1306
|
+
...,
|
1307
|
+
min_length=1,
|
1308
|
+
max_length=max_queries,
|
1309
|
+
description="List of semantic search queries to run on user documents.",
|
1310
|
+
)
|
1304
1311
|
|
1305
1312
|
raw_response = await send_message_to_model_wrapper(
|
1306
1313
|
system_message=system_prompt,
|
@@ -1618,6 +1625,7 @@ async def agenerate_chat_response(
|
|
1618
1625
|
deepthought = True
|
1619
1626
|
|
1620
1627
|
chat_model = await ConversationAdapters.aget_valid_chat_model(user, conversation, is_subscribed)
|
1628
|
+
max_prompt_size = await ConversationAdapters.aget_max_context_size(chat_model, user)
|
1621
1629
|
vision_available = chat_model.vision_enabled
|
1622
1630
|
if not vision_available and query_images:
|
1623
1631
|
vision_enabled_config = await ConversationAdapters.aget_vision_enabled_config()
|
@@ -1649,7 +1657,7 @@ async def agenerate_chat_response(
|
|
1649
1657
|
model=chat_model_name,
|
1650
1658
|
api_key=api_key,
|
1651
1659
|
api_base_url=openai_chat_config.api_base_url,
|
1652
|
-
max_prompt_size=
|
1660
|
+
max_prompt_size=max_prompt_size,
|
1653
1661
|
tokenizer_name=chat_model.tokenizer,
|
1654
1662
|
agent=agent,
|
1655
1663
|
vision_available=vision_available,
|
@@ -1680,7 +1688,7 @@ async def agenerate_chat_response(
|
|
1680
1688
|
model=chat_model.name,
|
1681
1689
|
api_key=api_key,
|
1682
1690
|
api_base_url=api_base_url,
|
1683
|
-
max_prompt_size=
|
1691
|
+
max_prompt_size=max_prompt_size,
|
1684
1692
|
tokenizer_name=chat_model.tokenizer,
|
1685
1693
|
agent=agent,
|
1686
1694
|
vision_available=vision_available,
|
@@ -1710,7 +1718,7 @@ async def agenerate_chat_response(
|
|
1710
1718
|
model=chat_model.name,
|
1711
1719
|
api_key=api_key,
|
1712
1720
|
api_base_url=api_base_url,
|
1713
|
-
max_prompt_size=
|
1721
|
+
max_prompt_size=max_prompt_size,
|
1714
1722
|
tokenizer_name=chat_model.tokenizer,
|
1715
1723
|
agent=agent,
|
1716
1724
|
vision_available=vision_available,
|
@@ -2731,7 +2739,9 @@ def configure_content(
|
|
2731
2739
|
|
2732
2740
|
try:
|
2733
2741
|
# Initialize Org Notes Search
|
2734
|
-
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Org.value) and files
|
2742
|
+
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Org.value) and files.get(
|
2743
|
+
"org"
|
2744
|
+
):
|
2735
2745
|
logger.info("🦄 Setting up search for orgmode notes")
|
2736
2746
|
# Extract Entries, Generate Notes Embeddings
|
2737
2747
|
text_search.setup(
|
@@ -2746,9 +2756,9 @@ def configure_content(
|
|
2746
2756
|
|
2747
2757
|
try:
|
2748
2758
|
# Initialize Markdown Search
|
2749
|
-
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Markdown.value) and files
|
2759
|
+
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Markdown.value) and files.get(
|
2750
2760
|
"markdown"
|
2751
|
-
|
2761
|
+
):
|
2752
2762
|
logger.info("💎 Setting up search for markdown notes")
|
2753
2763
|
# Extract Entries, Generate Markdown Embeddings
|
2754
2764
|
text_search.setup(
|
@@ -2764,7 +2774,9 @@ def configure_content(
|
|
2764
2774
|
|
2765
2775
|
try:
|
2766
2776
|
# Initialize PDF Search
|
2767
|
-
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Pdf.value) and files
|
2777
|
+
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Pdf.value) and files.get(
|
2778
|
+
"pdf"
|
2779
|
+
):
|
2768
2780
|
logger.info("🖨️ Setting up search for pdf")
|
2769
2781
|
# Extract Entries, Generate PDF Embeddings
|
2770
2782
|
text_search.setup(
|
@@ -2780,9 +2792,9 @@ def configure_content(
|
|
2780
2792
|
|
2781
2793
|
try:
|
2782
2794
|
# Initialize Plaintext Search
|
2783
|
-
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Plaintext.value) and files
|
2795
|
+
if (search_type == state.SearchType.All.value or search_type == state.SearchType.Plaintext.value) and files.get(
|
2784
2796
|
"plaintext"
|
2785
|
-
|
2797
|
+
):
|
2786
2798
|
logger.info("📄 Setting up search for plaintext")
|
2787
2799
|
# Extract Entries, Generate Plaintext Embeddings
|
2788
2800
|
text_search.setup(
|
@@ -2908,35 +2920,34 @@ async def view_file_content(
|
|
2908
2920
|
raw_text = file_object.raw_text
|
2909
2921
|
|
2910
2922
|
# Apply line range filtering if specified
|
2911
|
-
|
2912
|
-
|
2913
|
-
|
2914
|
-
lines = raw_text.split("\n")
|
2915
|
-
start_line = start_line or 1
|
2916
|
-
end_line = end_line or len(lines)
|
2917
|
-
|
2918
|
-
# Validate line range
|
2919
|
-
if start_line < 1 or end_line < 1 or start_line > end_line:
|
2920
|
-
error_msg = f"Invalid line range: {start_line}-{end_line}"
|
2921
|
-
logger.warning(error_msg)
|
2922
|
-
yield [{"query": query, "file": path, "compiled": error_msg}]
|
2923
|
-
return
|
2924
|
-
if start_line > len(lines):
|
2925
|
-
error_msg = f"Start line {start_line} exceeds total number of lines {len(lines)}"
|
2926
|
-
logger.warning(error_msg)
|
2927
|
-
yield [{"query": query, "file": path, "compiled": error_msg}]
|
2928
|
-
return
|
2923
|
+
lines = raw_text.split("\n")
|
2924
|
+
start_line = start_line or 1
|
2925
|
+
end_line = end_line or len(lines)
|
2929
2926
|
|
2930
|
-
|
2931
|
-
|
2932
|
-
|
2927
|
+
# Validate line range
|
2928
|
+
if start_line < 1 or end_line < 1 or start_line > end_line:
|
2929
|
+
error_msg = f"Invalid line range: {start_line}-{end_line}"
|
2930
|
+
logger.warning(error_msg)
|
2931
|
+
yield [{"query": query, "file": path, "compiled": error_msg}]
|
2932
|
+
return
|
2933
|
+
if start_line > len(lines):
|
2934
|
+
error_msg = f"Start line {start_line} exceeds total number of lines {len(lines)}"
|
2935
|
+
logger.warning(error_msg)
|
2936
|
+
yield [{"query": query, "file": path, "compiled": error_msg}]
|
2937
|
+
return
|
2938
|
+
|
2939
|
+
# Convert from 1-based to 0-based indexing and ensure bounds
|
2940
|
+
start_idx = max(0, start_line - 1)
|
2941
|
+
end_idx = min(len(lines), end_line)
|
2933
2942
|
|
2934
|
-
|
2935
|
-
|
2943
|
+
# Limit to first 50 lines if more than 50 lines are requested
|
2944
|
+
truncation_message = ""
|
2945
|
+
if end_idx - start_idx > 50:
|
2946
|
+
truncation_message = "\n\n[Truncated after 50 lines! Use narrower line range to view complete section.]"
|
2947
|
+
end_idx = start_idx + 50
|
2936
2948
|
|
2937
|
-
|
2938
|
-
|
2939
|
-
filtered_text = filtered_text[:10000] + "\n\n[Truncated. Use line numbers to view specific sections.]"
|
2949
|
+
selected_lines = lines[start_idx:end_idx]
|
2950
|
+
filtered_text = "\n".join(selected_lines) + truncation_message
|
2940
2951
|
|
2941
2952
|
# Format the result as a document reference
|
2942
2953
|
document_results = [
|
@@ -2995,7 +3006,7 @@ async def grep_files(
|
|
2995
3006
|
lines_after = lines_after or 0
|
2996
3007
|
|
2997
3008
|
try:
|
2998
|
-
regex = re.compile(regex_pattern, re.IGNORECASE)
|
3009
|
+
regex = re.compile(regex_pattern, re.IGNORECASE | re.MULTILINE)
|
2999
3010
|
except re.error as e:
|
3000
3011
|
yield {
|
3001
3012
|
"query": _generate_query(0, 0, path_prefix, regex_pattern, lines_before, lines_after),
|
@@ -3005,9 +3016,17 @@ async def grep_files(
|
|
3005
3016
|
return
|
3006
3017
|
|
3007
3018
|
try:
|
3008
|
-
|
3019
|
+
# Make db pushdown filters more permissive by removing line anchors
|
3020
|
+
# The precise line-anchored matching will be done in Python stage
|
3021
|
+
db_pattern = regex_pattern
|
3022
|
+
db_pattern = re.sub(r"\(\?\w*\)", "", db_pattern) # Remove inline flags like (?i), (?m), (?im)
|
3023
|
+
db_pattern = re.sub(r"^\^", "", db_pattern) # Remove ^ at regex pattern start
|
3024
|
+
db_pattern = re.sub(r"\$$", "", db_pattern) # Remove $ at regex pattern end
|
3025
|
+
|
3026
|
+
file_matches = await FileObjectAdapters.aget_file_objects_by_regex(user, db_pattern, path_prefix)
|
3009
3027
|
|
3010
3028
|
line_matches = []
|
3029
|
+
line_matches_count = 0
|
3011
3030
|
for file_object in file_matches:
|
3012
3031
|
lines = file_object.raw_text.split("\n")
|
3013
3032
|
matched_line_numbers = []
|
@@ -3016,6 +3035,7 @@ async def grep_files(
|
|
3016
3035
|
for i, line in enumerate(lines, 1):
|
3017
3036
|
if regex.search(line):
|
3018
3037
|
matched_line_numbers.append(i)
|
3038
|
+
line_matches_count += len(matched_line_numbers)
|
3019
3039
|
|
3020
3040
|
# Build context for each match
|
3021
3041
|
for line_num in matched_line_numbers:
|
@@ -3032,10 +3052,10 @@ async def grep_files(
|
|
3032
3052
|
|
3033
3053
|
if current_line_num == line_num:
|
3034
3054
|
# This is the matching line, mark it
|
3035
|
-
context_lines.append(f"{file_object.file_name}:{current_line_num}
|
3055
|
+
context_lines.append(f"{file_object.file_name}:{current_line_num}: {line_content}")
|
3036
3056
|
else:
|
3037
3057
|
# This is a context line
|
3038
|
-
context_lines.append(f"{file_object.file_name}
|
3058
|
+
context_lines.append(f"{file_object.file_name}-{current_line_num}- {line_content}")
|
3039
3059
|
|
3040
3060
|
# Add separator between matches if showing context
|
3041
3061
|
if lines_before > 0 or lines_after > 0:
|
@@ -3050,7 +3070,7 @@ async def grep_files(
|
|
3050
3070
|
# Check if no results found
|
3051
3071
|
max_results = 1000
|
3052
3072
|
query = _generate_query(
|
3053
|
-
|
3073
|
+
line_matches_count,
|
3054
3074
|
len(file_matches),
|
3055
3075
|
path_prefix,
|
3056
3076
|
regex_pattern,
|
khoj/routers/research.py
CHANGED
@@ -100,7 +100,7 @@ async def apick_next_tool(
|
|
100
100
|
ConversationCommand.Notes.value: [tool.value for tool in document_research_tools],
|
101
101
|
ConversationCommand.Webpage.value: [ConversationCommand.ReadWebpage.value],
|
102
102
|
ConversationCommand.Online.value: [ConversationCommand.SearchWeb.value],
|
103
|
-
ConversationCommand.Code.value: [ConversationCommand.
|
103
|
+
ConversationCommand.Code.value: [ConversationCommand.PythonCoder.value],
|
104
104
|
ConversationCommand.Operator.value: [ConversationCommand.OperateComputer.value],
|
105
105
|
}
|
106
106
|
for input_tool, research_tools in input_tools_to_research_tools.items():
|
@@ -197,7 +197,7 @@ async def apick_next_tool(
|
|
197
197
|
if i.warning is None and isinstance(i.query, ToolCall)
|
198
198
|
}
|
199
199
|
if (parsed_response.name, dict_to_tuple(parsed_response.args)) in previous_tool_query_combinations:
|
200
|
-
warning = "Repeated tool, query combination detected.
|
200
|
+
warning = f"Repeated tool, query combination detected. You've already called {parsed_response.name} with args: {parsed_response.args}. Try something different."
|
201
201
|
# Only send client status updates if we'll execute this iteration and model has thoughts to share.
|
202
202
|
elif send_status_func and not is_none_or_empty(response.thought):
|
203
203
|
async for event in send_status_func(response.thought):
|
@@ -412,11 +412,13 @@ async def research(
|
|
412
412
|
this_iteration.warning = f"Error reading webpages: {e}"
|
413
413
|
logger.error(this_iteration.warning, exc_info=True)
|
414
414
|
|
415
|
-
elif this_iteration.query.name == ConversationCommand.
|
415
|
+
elif this_iteration.query.name == ConversationCommand.PythonCoder:
|
416
416
|
try:
|
417
417
|
async for result in run_code(
|
418
418
|
**this_iteration.query.args,
|
419
|
-
conversation_history=construct_tool_chat_history(
|
419
|
+
conversation_history=construct_tool_chat_history(
|
420
|
+
previous_iterations, ConversationCommand.PythonCoder
|
421
|
+
),
|
420
422
|
context="",
|
421
423
|
location_data=location,
|
422
424
|
user=user,
|
@@ -433,7 +435,7 @@ async def research(
|
|
433
435
|
this_iteration.codeContext = code_results
|
434
436
|
async for result in send_status_func(f"**Ran code snippets**: {len(this_iteration.codeContext)}"):
|
435
437
|
yield result
|
436
|
-
except ValueError as e:
|
438
|
+
except (ValueError, TypeError) as e:
|
437
439
|
this_iteration.warning = f"Error running code: {e}"
|
438
440
|
logger.warning(this_iteration.warning, exc_info=True)
|
439
441
|
|