khoj 1.42.8.dev6__py3-none-any.whl → 1.42.9.dev16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +20 -0
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-2e626327abfbe612.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-9a4610474cd59a71.js → page-0006674668eb5a4d.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-f7bb9d777b7745d4.js → page-4c465cde2d14cb52.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-d6acbba22ccac0ff.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/{page-ef738950ea1babc3.js → page-9967631715682f3c.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-2b3056cba8aa96ce.js → page-6e91caf9bc0c8aba.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-94c76c3a41db42a2.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/{page-4885df3cd175c957.js → page-883b7d8d2e3abe3e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-8be3b35178abf2ec.js → page-95e994ddac31473f.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-95998f0bdc22bb13.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-4a4b0c0f4749c2b2.js → page-8c8c175f7f212b03.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-15412ee214acd999.js → webpack-4bf3eab7681a1206.js} +1 -1
- khoj/interface/compiled/_next/static/css/1e9b757ee2a2b34b.css +1 -0
- khoj/interface/compiled/_next/static/css/440ae0f0f650dc35.css +1 -0
- khoj/interface/compiled/_next/static/css/bd2071cad2ecf293.css +1 -0
- khoj/interface/compiled/_next/static/css/ee66643a6a5bf71c.css +1 -0
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +3 -3
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +4 -4
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/conversation/anthropic/anthropic_chat.py +11 -2
- khoj/processor/conversation/anthropic/utils.py +90 -103
- khoj/processor/conversation/google/gemini_chat.py +4 -1
- khoj/processor/conversation/google/utils.py +80 -18
- khoj/processor/conversation/offline/chat_model.py +3 -3
- khoj/processor/conversation/openai/gpt.py +13 -38
- khoj/processor/conversation/openai/utils.py +113 -12
- khoj/processor/conversation/prompts.py +17 -35
- khoj/processor/conversation/utils.py +128 -57
- khoj/processor/operator/grounding_agent.py +1 -1
- khoj/processor/operator/operator_agent_binary.py +4 -3
- khoj/processor/tools/online_search.py +18 -0
- khoj/processor/tools/run_code.py +1 -1
- khoj/routers/api_chat.py +1 -1
- khoj/routers/helpers.py +293 -26
- khoj/routers/research.py +169 -155
- khoj/utils/helpers.py +284 -8
- {khoj-1.42.8.dev6.dist-info → khoj-1.42.9.dev16.dist-info}/METADATA +1 -1
- {khoj-1.42.8.dev6.dist-info → khoj-1.42.9.dev16.dist-info}/RECORD +62 -62
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-4e2a134ec26aa606.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-ad4d1792ab1a4108.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +0 -1
- khoj/interface/compiled/_next/static/css/37a73b87f02df402.css +0 -1
- khoj/interface/compiled/_next/static/css/76c658ee459140a9.css +0 -1
- khoj/interface/compiled/_next/static/css/821d0d60b0b6871d.css +0 -1
- khoj/interface/compiled/_next/static/css/e6da1287d41f5409.css +0 -1
- /khoj/interface/compiled/_next/static/chunks/{1327-1a9107b9a2a04a98.js → 1327-3b1a41af530fa8ee.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1915-5c6508f6ebb62a30.js → 1915-fbfe167c84ad60c5.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2117-080746c8e170c81a.js → 2117-e78b6902ad6f75ec.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2939-4af3fd24b8ffc9ad.js → 2939-4d4084c5b888b960.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{4447-cd95608f8e93e711.js → 4447-d6cf93724d57e34b.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{8667-50b03a89e82e0ba7.js → 8667-4b7790573b08c50d.js} +0 -0
- /khoj/interface/compiled/_next/static/{cJdFAXV3MR9BSimUwQ40G → w19FJJa9p2AFJB6DEektd}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{cJdFAXV3MR9BSimUwQ40G → w19FJJa9p2AFJB6DEektd}/_ssgManifest.js +0 -0
- {khoj-1.42.8.dev6.dist-info → khoj-1.42.9.dev16.dist-info}/WHEEL +0 -0
- {khoj-1.42.8.dev6.dist-info → khoj-1.42.9.dev16.dist-info}/entry_points.txt +0 -0
- {khoj-1.42.8.dev6.dist-info → khoj-1.42.9.dev16.dist-info}/licenses/LICENSE +0 -0
@@ -121,7 +121,7 @@ class BinaryOperatorAgent(OperatorAgent):
|
|
121
121
|
# Construct input for visual reasoner history
|
122
122
|
visual_reasoner_history = self._format_message_for_api(self.messages)
|
123
123
|
try:
|
124
|
-
|
124
|
+
raw_response = await send_message_to_model_wrapper(
|
125
125
|
query=query_text,
|
126
126
|
query_images=query_screenshot,
|
127
127
|
system_message=reasoning_system_prompt,
|
@@ -129,6 +129,7 @@ class BinaryOperatorAgent(OperatorAgent):
|
|
129
129
|
agent_chat_model=self.reasoning_model,
|
130
130
|
tracer=self.tracer,
|
131
131
|
)
|
132
|
+
natural_language_action = raw_response.text
|
132
133
|
|
133
134
|
if not isinstance(natural_language_action, str) or not natural_language_action.strip():
|
134
135
|
raise ValueError(f"Natural language action is empty or not a string. Got {natural_language_action}")
|
@@ -255,10 +256,10 @@ class BinaryOperatorAgent(OperatorAgent):
|
|
255
256
|
|
256
257
|
# Append summary messages to history
|
257
258
|
trigger_summary = AgentMessage(role="user", content=summarize_prompt)
|
258
|
-
summary_message = AgentMessage(role="assistant", content=summary)
|
259
|
+
summary_message = AgentMessage(role="assistant", content=summary.text)
|
259
260
|
self.messages.extend([trigger_summary, summary_message])
|
260
261
|
|
261
|
-
return summary
|
262
|
+
return summary.text
|
262
263
|
|
263
264
|
def _compile_response(self, response_content: str | List) -> str:
|
264
265
|
"""Compile response content into a string, handling OpenAI message structures."""
|
@@ -390,7 +390,25 @@ async def read_webpages(
|
|
390
390
|
query_files=query_files,
|
391
391
|
tracer=tracer,
|
392
392
|
)
|
393
|
+
async for result in read_webpages_content(
|
394
|
+
query,
|
395
|
+
urls,
|
396
|
+
user,
|
397
|
+
send_status_func=send_status_func,
|
398
|
+
agent=agent,
|
399
|
+
tracer=tracer,
|
400
|
+
):
|
401
|
+
yield result
|
402
|
+
|
393
403
|
|
404
|
+
async def read_webpages_content(
|
405
|
+
query: str,
|
406
|
+
urls: List[str],
|
407
|
+
user: KhojUser,
|
408
|
+
send_status_func: Optional[Callable] = None,
|
409
|
+
agent: Agent = None,
|
410
|
+
tracer: dict = {},
|
411
|
+
):
|
394
412
|
logger.info(f"Reading web pages at: {urls}")
|
395
413
|
if send_status_func:
|
396
414
|
webpage_links_str = "\n- " + "\n- ".join(list(urls))
|
khoj/processor/tools/run_code.py
CHANGED
@@ -161,7 +161,7 @@ async def generate_python_code(
|
|
161
161
|
)
|
162
162
|
|
163
163
|
# Extract python code wrapped in markdown code blocks from the response
|
164
|
-
code_blocks = re.findall(r"```(?:python)?\n(.*?)```", response, re.DOTALL)
|
164
|
+
code_blocks = re.findall(r"```(?:python)?\n(.*?)```", response.text, re.DOTALL)
|
165
165
|
|
166
166
|
if not code_blocks:
|
167
167
|
raise ValueError("No Python code blocks found in response")
|
khoj/routers/api_chat.py
CHANGED
@@ -1390,7 +1390,7 @@ async def chat(
|
|
1390
1390
|
continue
|
1391
1391
|
if cancellation_event.is_set():
|
1392
1392
|
break
|
1393
|
-
message = item.
|
1393
|
+
message = item.text
|
1394
1394
|
full_response += message if message else ""
|
1395
1395
|
if item.thought:
|
1396
1396
|
async for result in send_event(ChatEvent.THOUGHT, item.thought):
|
khoj/routers/helpers.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import asyncio
|
2
2
|
import base64
|
3
3
|
import concurrent.futures
|
4
|
+
import fnmatch
|
4
5
|
import hashlib
|
5
6
|
import json
|
6
7
|
import logging
|
@@ -120,6 +121,7 @@ from khoj.utils.config import OfflineChatProcessorModel
|
|
120
121
|
from khoj.utils.helpers import (
|
121
122
|
LRU,
|
122
123
|
ConversationCommand,
|
124
|
+
ToolDefinition,
|
123
125
|
get_file_type,
|
124
126
|
in_debug_mode,
|
125
127
|
is_none_or_empty,
|
@@ -303,7 +305,7 @@ async def acreate_title_from_history(
|
|
303
305
|
with timer("Chat actor: Generate title from conversation history", logger):
|
304
306
|
response = await send_message_to_model_wrapper(title_generation_prompt, user=user)
|
305
307
|
|
306
|
-
return response.strip()
|
308
|
+
return response.text.strip()
|
307
309
|
|
308
310
|
|
309
311
|
async def acreate_title_from_query(query: str, user: KhojUser = None) -> str:
|
@@ -315,7 +317,7 @@ async def acreate_title_from_query(query: str, user: KhojUser = None) -> str:
|
|
315
317
|
with timer("Chat actor: Generate title from query", logger):
|
316
318
|
response = await send_message_to_model_wrapper(title_generation_prompt, user=user)
|
317
319
|
|
318
|
-
return response.strip()
|
320
|
+
return response.text.strip()
|
319
321
|
|
320
322
|
|
321
323
|
async def acheck_if_safe_prompt(system_prompt: str, user: KhojUser = None, lax: bool = False) -> Tuple[bool, str]:
|
@@ -339,7 +341,7 @@ async def acheck_if_safe_prompt(system_prompt: str, user: KhojUser = None, lax:
|
|
339
341
|
safe_prompt_check, user=user, response_type="json_object", response_schema=SafetyCheck
|
340
342
|
)
|
341
343
|
|
342
|
-
response = response.strip()
|
344
|
+
response = response.text.strip()
|
343
345
|
try:
|
344
346
|
response = json.loads(clean_json(response))
|
345
347
|
is_safe = str(response.get("safe", "true")).lower() == "true"
|
@@ -418,7 +420,7 @@ async def aget_data_sources_and_output_format(
|
|
418
420
|
output: str
|
419
421
|
|
420
422
|
with timer("Chat actor: Infer information sources to refer", logger):
|
421
|
-
|
423
|
+
raw_response = await send_message_to_model_wrapper(
|
422
424
|
relevant_tools_prompt,
|
423
425
|
response_type="json_object",
|
424
426
|
response_schema=PickTools,
|
@@ -429,7 +431,7 @@ async def aget_data_sources_and_output_format(
|
|
429
431
|
)
|
430
432
|
|
431
433
|
try:
|
432
|
-
response = clean_json(
|
434
|
+
response = clean_json(raw_response.text)
|
433
435
|
response = json.loads(response)
|
434
436
|
|
435
437
|
chosen_sources = [s.strip() for s in response.get("source", []) if s.strip()]
|
@@ -506,7 +508,7 @@ async def infer_webpage_urls(
|
|
506
508
|
links: List[str] = Field(..., min_items=1, max_items=max_webpages)
|
507
509
|
|
508
510
|
with timer("Chat actor: Infer webpage urls to read", logger):
|
509
|
-
|
511
|
+
raw_response = await send_message_to_model_wrapper(
|
510
512
|
online_queries_prompt,
|
511
513
|
query_images=query_images,
|
512
514
|
response_type="json_object",
|
@@ -519,7 +521,7 @@ async def infer_webpage_urls(
|
|
519
521
|
|
520
522
|
# Validate that the response is a non-empty, JSON-serializable list of URLs
|
521
523
|
try:
|
522
|
-
response = clean_json(
|
524
|
+
response = clean_json(raw_response.text)
|
523
525
|
urls = json.loads(response)
|
524
526
|
valid_unique_urls = {str(url).strip() for url in urls["links"] if is_valid_url(url)}
|
525
527
|
if is_none_or_empty(valid_unique_urls):
|
@@ -571,7 +573,7 @@ async def generate_online_subqueries(
|
|
571
573
|
queries: List[str] = Field(..., min_items=1, max_items=max_queries)
|
572
574
|
|
573
575
|
with timer("Chat actor: Generate online search subqueries", logger):
|
574
|
-
|
576
|
+
raw_response = await send_message_to_model_wrapper(
|
575
577
|
online_queries_prompt,
|
576
578
|
query_images=query_images,
|
577
579
|
response_type="json_object",
|
@@ -584,7 +586,7 @@ async def generate_online_subqueries(
|
|
584
586
|
|
585
587
|
# Validate that the response is a non-empty, JSON-serializable list
|
586
588
|
try:
|
587
|
-
response = clean_json(
|
589
|
+
response = clean_json(raw_response.text)
|
588
590
|
response = pyjson5.loads(response)
|
589
591
|
response = {q.strip() for q in response["queries"] if q.strip()}
|
590
592
|
if not isinstance(response, set) or not response or len(response) == 0:
|
@@ -645,7 +647,7 @@ async def aschedule_query(
|
|
645
647
|
|
646
648
|
# Validate that the response is a non-empty, JSON-serializable list
|
647
649
|
try:
|
648
|
-
raw_response = raw_response.strip()
|
650
|
+
raw_response = raw_response.text.strip()
|
649
651
|
response: Dict[str, str] = json.loads(clean_json(raw_response))
|
650
652
|
if not response or not isinstance(response, Dict) or len(response) != 3:
|
651
653
|
raise AssertionError(f"Invalid response for scheduling query : {response}")
|
@@ -683,7 +685,7 @@ async def extract_relevant_info(
|
|
683
685
|
agent_chat_model=agent_chat_model,
|
684
686
|
tracer=tracer,
|
685
687
|
)
|
686
|
-
return response.strip()
|
688
|
+
return response.text.strip()
|
687
689
|
|
688
690
|
|
689
691
|
async def extract_relevant_summary(
|
@@ -726,7 +728,7 @@ async def extract_relevant_summary(
|
|
726
728
|
agent_chat_model=agent_chat_model,
|
727
729
|
tracer=tracer,
|
728
730
|
)
|
729
|
-
return response.strip()
|
731
|
+
return response.text.strip()
|
730
732
|
|
731
733
|
|
732
734
|
async def generate_summary_from_files(
|
@@ -897,7 +899,7 @@ async def generate_better_diagram_description(
|
|
897
899
|
agent_chat_model=agent_chat_model,
|
898
900
|
tracer=tracer,
|
899
901
|
)
|
900
|
-
response = response.strip()
|
902
|
+
response = response.text.strip()
|
901
903
|
if response.startswith(('"', "'")) and response.endswith(('"', "'")):
|
902
904
|
response = response[1:-1]
|
903
905
|
|
@@ -925,10 +927,10 @@ async def generate_excalidraw_diagram_from_description(
|
|
925
927
|
raw_response = await send_message_to_model_wrapper(
|
926
928
|
query=excalidraw_diagram_generation, user=user, agent_chat_model=agent_chat_model, tracer=tracer
|
927
929
|
)
|
928
|
-
|
930
|
+
raw_response_text = clean_json(raw_response.text)
|
929
931
|
try:
|
930
932
|
# Expect response to have `elements` and `scratchpad` keys
|
931
|
-
response: Dict[str, str] = json.loads(
|
933
|
+
response: Dict[str, str] = json.loads(raw_response_text)
|
932
934
|
if (
|
933
935
|
not response
|
934
936
|
or not isinstance(response, Dict)
|
@@ -937,7 +939,7 @@ async def generate_excalidraw_diagram_from_description(
|
|
937
939
|
):
|
938
940
|
raise AssertionError(f"Invalid response for generating Excalidraw diagram: {response}")
|
939
941
|
except Exception:
|
940
|
-
raise AssertionError(f"Invalid response for generating Excalidraw diagram: {
|
942
|
+
raise AssertionError(f"Invalid response for generating Excalidraw diagram: {raw_response_text}")
|
941
943
|
if not response or not isinstance(response["elements"], List) or not isinstance(response["elements"][0], Dict):
|
942
944
|
# TODO Some additional validation here that it's a valid Excalidraw diagram
|
943
945
|
raise AssertionError(f"Invalid response for improving diagram description: {response}")
|
@@ -1048,11 +1050,11 @@ async def generate_better_mermaidjs_diagram_description(
|
|
1048
1050
|
agent_chat_model=agent_chat_model,
|
1049
1051
|
tracer=tracer,
|
1050
1052
|
)
|
1051
|
-
|
1052
|
-
if
|
1053
|
-
|
1053
|
+
response_text = response.text.strip()
|
1054
|
+
if response_text.startswith(('"', "'")) and response_text.endswith(('"', "'")):
|
1055
|
+
response_text = response_text[1:-1]
|
1054
1056
|
|
1055
|
-
return
|
1057
|
+
return response_text
|
1056
1058
|
|
1057
1059
|
|
1058
1060
|
async def generate_mermaidjs_diagram_from_description(
|
@@ -1076,7 +1078,7 @@ async def generate_mermaidjs_diagram_from_description(
|
|
1076
1078
|
raw_response = await send_message_to_model_wrapper(
|
1077
1079
|
query=mermaidjs_diagram_generation, user=user, agent_chat_model=agent_chat_model, tracer=tracer
|
1078
1080
|
)
|
1079
|
-
return clean_mermaidjs(raw_response.strip())
|
1081
|
+
return clean_mermaidjs(raw_response.text.strip())
|
1080
1082
|
|
1081
1083
|
|
1082
1084
|
async def generate_better_image_prompt(
|
@@ -1151,11 +1153,11 @@ async def generate_better_image_prompt(
|
|
1151
1153
|
agent_chat_model=agent_chat_model,
|
1152
1154
|
tracer=tracer,
|
1153
1155
|
)
|
1154
|
-
|
1155
|
-
if
|
1156
|
-
|
1156
|
+
response_text = response.text.strip()
|
1157
|
+
if response_text.startswith(('"', "'")) and response_text.endswith(('"', "'")):
|
1158
|
+
response_text = response_text[1:-1]
|
1157
1159
|
|
1158
|
-
return
|
1160
|
+
return response_text
|
1159
1161
|
|
1160
1162
|
|
1161
1163
|
async def search_documents(
|
@@ -1329,7 +1331,7 @@ async def extract_questions(
|
|
1329
1331
|
|
1330
1332
|
# Extract questions from the response
|
1331
1333
|
try:
|
1332
|
-
response = clean_json(raw_response)
|
1334
|
+
response = clean_json(raw_response.text)
|
1333
1335
|
response = pyjson5.loads(response)
|
1334
1336
|
queries = [q.strip() for q in response["queries"] if q.strip()]
|
1335
1337
|
if not isinstance(queries, list) or not queries:
|
@@ -1439,6 +1441,7 @@ async def send_message_to_model_wrapper(
|
|
1439
1441
|
system_message: str = "",
|
1440
1442
|
response_type: str = "text",
|
1441
1443
|
response_schema: BaseModel = None,
|
1444
|
+
tools: List[ToolDefinition] = None,
|
1442
1445
|
deepthought: bool = False,
|
1443
1446
|
user: KhojUser = None,
|
1444
1447
|
query_images: List[str] = None,
|
@@ -1506,6 +1509,7 @@ async def send_message_to_model_wrapper(
|
|
1506
1509
|
model=chat_model_name,
|
1507
1510
|
response_type=response_type,
|
1508
1511
|
response_schema=response_schema,
|
1512
|
+
tools=tools,
|
1509
1513
|
deepthought=deepthought,
|
1510
1514
|
api_base_url=api_base_url,
|
1511
1515
|
tracer=tracer,
|
@@ -1517,6 +1521,7 @@ async def send_message_to_model_wrapper(
|
|
1517
1521
|
model=chat_model_name,
|
1518
1522
|
response_type=response_type,
|
1519
1523
|
response_schema=response_schema,
|
1524
|
+
tools=tools,
|
1520
1525
|
deepthought=deepthought,
|
1521
1526
|
api_base_url=api_base_url,
|
1522
1527
|
tracer=tracer,
|
@@ -1528,6 +1533,7 @@ async def send_message_to_model_wrapper(
|
|
1528
1533
|
model=chat_model_name,
|
1529
1534
|
response_type=response_type,
|
1530
1535
|
response_schema=response_schema,
|
1536
|
+
tools=tools,
|
1531
1537
|
deepthought=deepthought,
|
1532
1538
|
api_base_url=api_base_url,
|
1533
1539
|
tracer=tracer,
|
@@ -2796,3 +2802,264 @@ def get_notion_auth_url(user: KhojUser):
|
|
2796
2802
|
if not NOTION_OAUTH_CLIENT_ID or not NOTION_OAUTH_CLIENT_SECRET or not NOTION_REDIRECT_URI:
|
2797
2803
|
return None
|
2798
2804
|
return f"https://api.notion.com/v1/oauth/authorize?client_id={NOTION_OAUTH_CLIENT_ID}&redirect_uri={NOTION_REDIRECT_URI}&response_type=code&state={user.uuid}"
|
2805
|
+
|
2806
|
+
|
2807
|
+
async def view_file_content(
|
2808
|
+
path: str,
|
2809
|
+
start_line: Optional[int] = None,
|
2810
|
+
end_line: Optional[int] = None,
|
2811
|
+
user: KhojUser = None,
|
2812
|
+
):
|
2813
|
+
"""
|
2814
|
+
View the contents of a file from the user's document database with optional line range specification.
|
2815
|
+
"""
|
2816
|
+
query = f"View file: {path}"
|
2817
|
+
if start_line and end_line:
|
2818
|
+
query += f" (lines {start_line}-{end_line})"
|
2819
|
+
|
2820
|
+
try:
|
2821
|
+
# Get the file object from the database by name
|
2822
|
+
file_objects = await FileObjectAdapters.aget_file_objects_by_name(user, path)
|
2823
|
+
|
2824
|
+
if not file_objects:
|
2825
|
+
error_msg = f"File '{path}' not found in user documents"
|
2826
|
+
logger.warning(error_msg)
|
2827
|
+
yield [{"query": query, "file": path, "compiled": error_msg}]
|
2828
|
+
return
|
2829
|
+
|
2830
|
+
# Use the first file object if multiple exist
|
2831
|
+
file_object = file_objects[0]
|
2832
|
+
raw_text = file_object.raw_text
|
2833
|
+
|
2834
|
+
# Apply line range filtering if specified
|
2835
|
+
if start_line is None and end_line is None:
|
2836
|
+
filtered_text = raw_text
|
2837
|
+
else:
|
2838
|
+
lines = raw_text.split("\n")
|
2839
|
+
start_line = start_line or 1
|
2840
|
+
end_line = end_line or len(lines)
|
2841
|
+
|
2842
|
+
# Validate line range
|
2843
|
+
if start_line < 1 or end_line < 1 or start_line > end_line:
|
2844
|
+
error_msg = f"Invalid line range: {start_line}-{end_line}"
|
2845
|
+
logger.warning(error_msg)
|
2846
|
+
yield [{"query": query, "file": path, "compiled": error_msg}]
|
2847
|
+
return
|
2848
|
+
if start_line > len(lines):
|
2849
|
+
error_msg = f"Start line {start_line} exceeds total number of lines {len(lines)}"
|
2850
|
+
logger.warning(error_msg)
|
2851
|
+
yield [{"query": query, "file": path, "compiled": error_msg}]
|
2852
|
+
return
|
2853
|
+
|
2854
|
+
# Convert from 1-based to 0-based indexing and ensure bounds
|
2855
|
+
start_idx = max(0, start_line - 1)
|
2856
|
+
end_idx = min(len(lines), end_line)
|
2857
|
+
|
2858
|
+
selected_lines = lines[start_idx:end_idx]
|
2859
|
+
filtered_text = "\n".join(selected_lines)
|
2860
|
+
|
2861
|
+
# Truncate the text if it's too long
|
2862
|
+
if len(filtered_text) > 10000:
|
2863
|
+
filtered_text = filtered_text[:10000] + "\n\n[Truncated. Use line numbers to view specific sections.]"
|
2864
|
+
|
2865
|
+
# Format the result as a document reference
|
2866
|
+
document_results = [
|
2867
|
+
{
|
2868
|
+
"query": query,
|
2869
|
+
"file": path,
|
2870
|
+
"compiled": filtered_text,
|
2871
|
+
}
|
2872
|
+
]
|
2873
|
+
|
2874
|
+
yield document_results
|
2875
|
+
|
2876
|
+
except Exception as e:
|
2877
|
+
error_msg = f"Error viewing file {path}: {str(e)}"
|
2878
|
+
logger.error(error_msg, exc_info=True)
|
2879
|
+
|
2880
|
+
# Return an error result in the expected format
|
2881
|
+
yield [{"query": query, "file": path, "compiled": error_msg}]
|
2882
|
+
|
2883
|
+
|
2884
|
+
async def grep_files(
|
2885
|
+
regex_pattern: str,
|
2886
|
+
path_prefix: Optional[str] = None,
|
2887
|
+
lines_before: Optional[int] = None,
|
2888
|
+
lines_after: Optional[int] = None,
|
2889
|
+
user: KhojUser = None,
|
2890
|
+
):
|
2891
|
+
"""
|
2892
|
+
Search for a regex pattern in files with an optional path prefix and context lines.
|
2893
|
+
"""
|
2894
|
+
|
2895
|
+
# Construct the query string based on provided parameters
|
2896
|
+
def _generate_query(line_count, doc_count, path, pattern, lines_before, lines_after, max_results=1000):
|
2897
|
+
query = f"**Found {line_count} matches for '{pattern}' in {doc_count} documents**"
|
2898
|
+
if path:
|
2899
|
+
query += f" in {path}"
|
2900
|
+
if lines_before or lines_after or line_count > max_results:
|
2901
|
+
query += " Showing"
|
2902
|
+
if lines_before or lines_after:
|
2903
|
+
context_info = []
|
2904
|
+
if lines_before:
|
2905
|
+
context_info.append(f"{lines_before} lines before")
|
2906
|
+
if lines_after:
|
2907
|
+
context_info.append(f"{lines_after} lines after")
|
2908
|
+
query += f" {' and '.join(context_info)}"
|
2909
|
+
if line_count > max_results:
|
2910
|
+
if lines_before or lines_after:
|
2911
|
+
query += f" for"
|
2912
|
+
query += f" first {max_results} results"
|
2913
|
+
return query
|
2914
|
+
|
2915
|
+
# Validate regex pattern
|
2916
|
+
path_prefix = path_prefix or ""
|
2917
|
+
lines_before = lines_before or 0
|
2918
|
+
lines_after = lines_after or 0
|
2919
|
+
|
2920
|
+
try:
|
2921
|
+
regex = re.compile(regex_pattern, re.IGNORECASE)
|
2922
|
+
except re.error as e:
|
2923
|
+
yield {
|
2924
|
+
"query": _generate_query(0, 0, path_prefix, regex_pattern, lines_before, lines_after),
|
2925
|
+
"file": path_prefix,
|
2926
|
+
"compiled": f"Invalid regex pattern: {e}",
|
2927
|
+
}
|
2928
|
+
return
|
2929
|
+
|
2930
|
+
try:
|
2931
|
+
file_matches = await FileObjectAdapters.aget_file_objects_by_regex(user, regex_pattern, path_prefix)
|
2932
|
+
|
2933
|
+
line_matches = []
|
2934
|
+
for file_object in file_matches:
|
2935
|
+
lines = file_object.raw_text.split("\n")
|
2936
|
+
matched_line_numbers = []
|
2937
|
+
|
2938
|
+
# Find all matching line numbers first
|
2939
|
+
for i, line in enumerate(lines, 1):
|
2940
|
+
if regex.search(line):
|
2941
|
+
matched_line_numbers.append(i)
|
2942
|
+
|
2943
|
+
# Build context for each match
|
2944
|
+
for line_num in matched_line_numbers:
|
2945
|
+
context_lines = []
|
2946
|
+
|
2947
|
+
# Calculate start and end indices for context (0-based)
|
2948
|
+
start_idx = max(0, line_num - 1 - lines_before)
|
2949
|
+
end_idx = min(len(lines), line_num + lines_after)
|
2950
|
+
|
2951
|
+
# Add context lines with line numbers
|
2952
|
+
for idx in range(start_idx, end_idx):
|
2953
|
+
current_line_num = idx + 1
|
2954
|
+
line_content = lines[idx]
|
2955
|
+
|
2956
|
+
if current_line_num == line_num:
|
2957
|
+
# This is the matching line, mark it
|
2958
|
+
context_lines.append(f"{file_object.file_name}:{current_line_num}:> {line_content}")
|
2959
|
+
else:
|
2960
|
+
# This is a context line
|
2961
|
+
context_lines.append(f"{file_object.file_name}:{current_line_num}: {line_content}")
|
2962
|
+
|
2963
|
+
# Add separator between matches if showing context
|
2964
|
+
if lines_before > 0 or lines_after > 0:
|
2965
|
+
context_lines.append("--")
|
2966
|
+
|
2967
|
+
line_matches.extend(context_lines)
|
2968
|
+
|
2969
|
+
# Remove the last separator if it exists
|
2970
|
+
if line_matches and line_matches[-1] == "--":
|
2971
|
+
line_matches.pop()
|
2972
|
+
|
2973
|
+
# Check if no results found
|
2974
|
+
max_results = 1000
|
2975
|
+
query = _generate_query(
|
2976
|
+
len([m for m in line_matches if ":>" in m]),
|
2977
|
+
len(file_matches),
|
2978
|
+
path_prefix,
|
2979
|
+
regex_pattern,
|
2980
|
+
lines_before,
|
2981
|
+
lines_after,
|
2982
|
+
max_results,
|
2983
|
+
)
|
2984
|
+
if not line_matches:
|
2985
|
+
yield {"query": query, "file": path_prefix, "compiled": "No matches found."}
|
2986
|
+
return
|
2987
|
+
|
2988
|
+
# Truncate matched lines list if too long
|
2989
|
+
if len(line_matches) > max_results:
|
2990
|
+
line_matches = line_matches[:max_results] + [
|
2991
|
+
f"... {len(line_matches) - max_results} more results found. Use stricter regex or path to narrow down results."
|
2992
|
+
]
|
2993
|
+
|
2994
|
+
yield {"query": query, "file": path_prefix or "", "compiled": "\n".join(line_matches)}
|
2995
|
+
|
2996
|
+
except Exception as e:
|
2997
|
+
error_msg = f"Error using grep files tool: {str(e)}"
|
2998
|
+
logger.error(error_msg, exc_info=True)
|
2999
|
+
yield [
|
3000
|
+
{
|
3001
|
+
"query": _generate_query(0, 0, path_prefix or "", regex_pattern, lines_before, lines_after),
|
3002
|
+
"file": path_prefix,
|
3003
|
+
"compiled": error_msg,
|
3004
|
+
}
|
3005
|
+
]
|
3006
|
+
|
3007
|
+
|
3008
|
+
async def list_files(
|
3009
|
+
path: Optional[str] = None,
|
3010
|
+
pattern: Optional[str] = None,
|
3011
|
+
user: KhojUser = None,
|
3012
|
+
):
|
3013
|
+
"""
|
3014
|
+
List files under a given path or glob pattern from the user's document database.
|
3015
|
+
"""
|
3016
|
+
|
3017
|
+
# Construct the query string based on provided parameters
|
3018
|
+
def _generate_query(doc_count, path, pattern):
|
3019
|
+
query = f"**Found {doc_count} files**"
|
3020
|
+
if path:
|
3021
|
+
query += f" in {path}"
|
3022
|
+
if pattern:
|
3023
|
+
query += f" filtered by {pattern}"
|
3024
|
+
return query
|
3025
|
+
|
3026
|
+
try:
|
3027
|
+
# Get user files by path prefix when specified
|
3028
|
+
path = path or ""
|
3029
|
+
if path in ["", "/", ".", "./", "~", "~/"]:
|
3030
|
+
file_objects = await FileObjectAdapters.aget_all_file_objects(user, limit=10000)
|
3031
|
+
else:
|
3032
|
+
file_objects = await FileObjectAdapters.aget_file_objects_by_path_prefix(user, path)
|
3033
|
+
|
3034
|
+
if not file_objects:
|
3035
|
+
yield {"query": _generate_query(0, path, pattern), "file": path, "compiled": "No files found."}
|
3036
|
+
return
|
3037
|
+
|
3038
|
+
# Extract file names from file objects
|
3039
|
+
files = [f.file_name for f in file_objects]
|
3040
|
+
# Convert to relative file path (similar to ls)
|
3041
|
+
if path:
|
3042
|
+
files = [f[len(path) :] for f in files]
|
3043
|
+
|
3044
|
+
# Apply glob pattern filtering if specified
|
3045
|
+
if pattern:
|
3046
|
+
files = [f for f in files if fnmatch.fnmatch(f, pattern)]
|
3047
|
+
|
3048
|
+
query = _generate_query(len(files), path, pattern)
|
3049
|
+
if not files:
|
3050
|
+
yield {"query": query, "file": path, "compiled": "No files found."}
|
3051
|
+
return
|
3052
|
+
|
3053
|
+
# Truncate the list if it's too long
|
3054
|
+
max_files = 100
|
3055
|
+
if len(files) > max_files:
|
3056
|
+
files = files[:max_files] + [
|
3057
|
+
f"... {len(files) - max_files} more files found. Use glob pattern to narrow down results."
|
3058
|
+
]
|
3059
|
+
|
3060
|
+
yield {"query": query, "file": path, "compiled": "\n- ".join(files)}
|
3061
|
+
|
3062
|
+
except Exception as e:
|
3063
|
+
error_msg = f"Error listing files in {path}: {str(e)}"
|
3064
|
+
logger.error(error_msg, exc_info=True)
|
3065
|
+
yield {"query": query, "file": path, "compiled": error_msg}
|