khoj 1.42.9.dev26__py3-none-any.whl → 1.42.10.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/database/adapters/__init__.py +0 -20
- khoj/database/models/__init__.py +0 -1
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/app/chat/page-4c6b873a4a5c7d2f.js +1 -0
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +3 -3
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +4 -4
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/processor/content/markdown/markdown_to_entries.py +9 -38
- khoj/processor/content/org_mode/org_to_entries.py +2 -18
- khoj/processor/content/org_mode/orgnode.py +16 -18
- khoj/processor/content/text_to_entries.py +0 -30
- khoj/processor/conversation/anthropic/anthropic_chat.py +2 -11
- khoj/processor/conversation/anthropic/utils.py +103 -90
- khoj/processor/conversation/google/gemini_chat.py +1 -4
- khoj/processor/conversation/google/utils.py +18 -80
- khoj/processor/conversation/offline/chat_model.py +3 -3
- khoj/processor/conversation/openai/gpt.py +38 -13
- khoj/processor/conversation/openai/utils.py +12 -113
- khoj/processor/conversation/prompts.py +35 -17
- khoj/processor/conversation/utils.py +58 -129
- khoj/processor/operator/grounding_agent.py +1 -1
- khoj/processor/operator/operator_agent_binary.py +3 -4
- khoj/processor/tools/online_search.py +0 -18
- khoj/processor/tools/run_code.py +1 -1
- khoj/routers/api_chat.py +1 -1
- khoj/routers/api_content.py +6 -6
- khoj/routers/helpers.py +27 -297
- khoj/routers/research.py +155 -169
- khoj/search_type/text_search.py +0 -2
- khoj/utils/helpers.py +8 -284
- khoj/utils/initialization.py +2 -0
- khoj/utils/rawconfig.py +0 -11
- {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/METADATA +1 -1
- {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/RECORD +57 -57
- khoj/interface/compiled/_next/static/chunks/app/chat/page-76fc915800aa90f4.js +0 -1
- /khoj/interface/compiled/_next/static/chunks/{1327-3b1a41af530fa8ee.js → 1327-1a9107b9a2a04a98.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1915-fbfe167c84ad60c5.js → 1915-5c6508f6ebb62a30.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2117-e78b6902ad6f75ec.js → 2117-080746c8e170c81a.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2939-4d4084c5b888b960.js → 2939-4af3fd24b8ffc9ad.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{4447-d6cf93724d57e34b.js → 4447-cd95608f8e93e711.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{8667-4b7790573b08c50d.js → 8667-50b03a89e82e0ba7.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{webpack-70e0762712341826.js → webpack-92ce8aaf95718ec4.js} +0 -0
- /khoj/interface/compiled/_next/static/{IYGyer2N7GdUJ7QHFghtY → cuzJcS32_a4L4a6gCZ63y}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{IYGyer2N7GdUJ7QHFghtY → cuzJcS32_a4L4a6gCZ63y}/_ssgManifest.js +0 -0
- {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/WHEEL +0 -0
- {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/entry_points.txt +0 -0
- {khoj-1.42.9.dev26.dist-info → khoj-1.42.10.dev2.dist-info}/licenses/LICENSE +0 -0
khoj/routers/helpers.py
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
import asyncio
|
2
2
|
import base64
|
3
3
|
import concurrent.futures
|
4
|
-
import fnmatch
|
5
4
|
import hashlib
|
6
5
|
import json
|
7
6
|
import logging
|
@@ -121,7 +120,6 @@ from khoj.utils.config import OfflineChatProcessorModel
|
|
121
120
|
from khoj.utils.helpers import (
|
122
121
|
LRU,
|
123
122
|
ConversationCommand,
|
124
|
-
ToolDefinition,
|
125
123
|
get_file_type,
|
126
124
|
in_debug_mode,
|
127
125
|
is_none_or_empty,
|
@@ -305,7 +303,7 @@ async def acreate_title_from_history(
|
|
305
303
|
with timer("Chat actor: Generate title from conversation history", logger):
|
306
304
|
response = await send_message_to_model_wrapper(title_generation_prompt, user=user)
|
307
305
|
|
308
|
-
return response.
|
306
|
+
return response.strip()
|
309
307
|
|
310
308
|
|
311
309
|
async def acreate_title_from_query(query: str, user: KhojUser = None) -> str:
|
@@ -317,7 +315,7 @@ async def acreate_title_from_query(query: str, user: KhojUser = None) -> str:
|
|
317
315
|
with timer("Chat actor: Generate title from query", logger):
|
318
316
|
response = await send_message_to_model_wrapper(title_generation_prompt, user=user)
|
319
317
|
|
320
|
-
return response.
|
318
|
+
return response.strip()
|
321
319
|
|
322
320
|
|
323
321
|
async def acheck_if_safe_prompt(system_prompt: str, user: KhojUser = None, lax: bool = False) -> Tuple[bool, str]:
|
@@ -341,7 +339,7 @@ async def acheck_if_safe_prompt(system_prompt: str, user: KhojUser = None, lax:
|
|
341
339
|
safe_prompt_check, user=user, response_type="json_object", response_schema=SafetyCheck
|
342
340
|
)
|
343
341
|
|
344
|
-
response = response.
|
342
|
+
response = response.strip()
|
345
343
|
try:
|
346
344
|
response = json.loads(clean_json(response))
|
347
345
|
is_safe = str(response.get("safe", "true")).lower() == "true"
|
@@ -420,7 +418,7 @@ async def aget_data_sources_and_output_format(
|
|
420
418
|
output: str
|
421
419
|
|
422
420
|
with timer("Chat actor: Infer information sources to refer", logger):
|
423
|
-
|
421
|
+
response = await send_message_to_model_wrapper(
|
424
422
|
relevant_tools_prompt,
|
425
423
|
response_type="json_object",
|
426
424
|
response_schema=PickTools,
|
@@ -431,7 +429,7 @@ async def aget_data_sources_and_output_format(
|
|
431
429
|
)
|
432
430
|
|
433
431
|
try:
|
434
|
-
response = clean_json(
|
432
|
+
response = clean_json(response)
|
435
433
|
response = json.loads(response)
|
436
434
|
|
437
435
|
chosen_sources = [s.strip() for s in response.get("source", []) if s.strip()]
|
@@ -508,7 +506,7 @@ async def infer_webpage_urls(
|
|
508
506
|
links: List[str] = Field(..., min_items=1, max_items=max_webpages)
|
509
507
|
|
510
508
|
with timer("Chat actor: Infer webpage urls to read", logger):
|
511
|
-
|
509
|
+
response = await send_message_to_model_wrapper(
|
512
510
|
online_queries_prompt,
|
513
511
|
query_images=query_images,
|
514
512
|
response_type="json_object",
|
@@ -521,7 +519,7 @@ async def infer_webpage_urls(
|
|
521
519
|
|
522
520
|
# Validate that the response is a non-empty, JSON-serializable list of URLs
|
523
521
|
try:
|
524
|
-
response = clean_json(
|
522
|
+
response = clean_json(response)
|
525
523
|
urls = json.loads(response)
|
526
524
|
valid_unique_urls = {str(url).strip() for url in urls["links"] if is_valid_url(url)}
|
527
525
|
if is_none_or_empty(valid_unique_urls):
|
@@ -573,7 +571,7 @@ async def generate_online_subqueries(
|
|
573
571
|
queries: List[str] = Field(..., min_items=1, max_items=max_queries)
|
574
572
|
|
575
573
|
with timer("Chat actor: Generate online search subqueries", logger):
|
576
|
-
|
574
|
+
response = await send_message_to_model_wrapper(
|
577
575
|
online_queries_prompt,
|
578
576
|
query_images=query_images,
|
579
577
|
response_type="json_object",
|
@@ -586,7 +584,7 @@ async def generate_online_subqueries(
|
|
586
584
|
|
587
585
|
# Validate that the response is a non-empty, JSON-serializable list
|
588
586
|
try:
|
589
|
-
response = clean_json(
|
587
|
+
response = clean_json(response)
|
590
588
|
response = pyjson5.loads(response)
|
591
589
|
response = {q.strip() for q in response["queries"] if q.strip()}
|
592
590
|
if not isinstance(response, set) or not response or len(response) == 0:
|
@@ -647,7 +645,7 @@ async def aschedule_query(
|
|
647
645
|
|
648
646
|
# Validate that the response is a non-empty, JSON-serializable list
|
649
647
|
try:
|
650
|
-
raw_response = raw_response.
|
648
|
+
raw_response = raw_response.strip()
|
651
649
|
response: Dict[str, str] = json.loads(clean_json(raw_response))
|
652
650
|
if not response or not isinstance(response, Dict) or len(response) != 3:
|
653
651
|
raise AssertionError(f"Invalid response for scheduling query : {response}")
|
@@ -685,7 +683,7 @@ async def extract_relevant_info(
|
|
685
683
|
agent_chat_model=agent_chat_model,
|
686
684
|
tracer=tracer,
|
687
685
|
)
|
688
|
-
return response.
|
686
|
+
return response.strip()
|
689
687
|
|
690
688
|
|
691
689
|
async def extract_relevant_summary(
|
@@ -728,7 +726,7 @@ async def extract_relevant_summary(
|
|
728
726
|
agent_chat_model=agent_chat_model,
|
729
727
|
tracer=tracer,
|
730
728
|
)
|
731
|
-
return response.
|
729
|
+
return response.strip()
|
732
730
|
|
733
731
|
|
734
732
|
async def generate_summary_from_files(
|
@@ -899,7 +897,7 @@ async def generate_better_diagram_description(
|
|
899
897
|
agent_chat_model=agent_chat_model,
|
900
898
|
tracer=tracer,
|
901
899
|
)
|
902
|
-
response = response.
|
900
|
+
response = response.strip()
|
903
901
|
if response.startswith(('"', "'")) and response.endswith(('"', "'")):
|
904
902
|
response = response[1:-1]
|
905
903
|
|
@@ -927,10 +925,10 @@ async def generate_excalidraw_diagram_from_description(
|
|
927
925
|
raw_response = await send_message_to_model_wrapper(
|
928
926
|
query=excalidraw_diagram_generation, user=user, agent_chat_model=agent_chat_model, tracer=tracer
|
929
927
|
)
|
930
|
-
|
928
|
+
raw_response = clean_json(raw_response)
|
931
929
|
try:
|
932
930
|
# Expect response to have `elements` and `scratchpad` keys
|
933
|
-
response: Dict[str, str] = json.loads(
|
931
|
+
response: Dict[str, str] = json.loads(raw_response)
|
934
932
|
if (
|
935
933
|
not response
|
936
934
|
or not isinstance(response, Dict)
|
@@ -939,7 +937,7 @@ async def generate_excalidraw_diagram_from_description(
|
|
939
937
|
):
|
940
938
|
raise AssertionError(f"Invalid response for generating Excalidraw diagram: {response}")
|
941
939
|
except Exception:
|
942
|
-
raise AssertionError(f"Invalid response for generating Excalidraw diagram: {
|
940
|
+
raise AssertionError(f"Invalid response for generating Excalidraw diagram: {raw_response}")
|
943
941
|
if not response or not isinstance(response["elements"], List) or not isinstance(response["elements"][0], Dict):
|
944
942
|
# TODO Some additional validation here that it's a valid Excalidraw diagram
|
945
943
|
raise AssertionError(f"Invalid response for improving diagram description: {response}")
|
@@ -1050,11 +1048,11 @@ async def generate_better_mermaidjs_diagram_description(
|
|
1050
1048
|
agent_chat_model=agent_chat_model,
|
1051
1049
|
tracer=tracer,
|
1052
1050
|
)
|
1053
|
-
|
1054
|
-
if
|
1055
|
-
|
1051
|
+
response = response.strip()
|
1052
|
+
if response.startswith(('"', "'")) and response.endswith(('"', "'")):
|
1053
|
+
response = response[1:-1]
|
1056
1054
|
|
1057
|
-
return
|
1055
|
+
return response
|
1058
1056
|
|
1059
1057
|
|
1060
1058
|
async def generate_mermaidjs_diagram_from_description(
|
@@ -1078,7 +1076,7 @@ async def generate_mermaidjs_diagram_from_description(
|
|
1078
1076
|
raw_response = await send_message_to_model_wrapper(
|
1079
1077
|
query=mermaidjs_diagram_generation, user=user, agent_chat_model=agent_chat_model, tracer=tracer
|
1080
1078
|
)
|
1081
|
-
return clean_mermaidjs(raw_response.
|
1079
|
+
return clean_mermaidjs(raw_response.strip())
|
1082
1080
|
|
1083
1081
|
|
1084
1082
|
async def generate_better_image_prompt(
|
@@ -1153,11 +1151,11 @@ async def generate_better_image_prompt(
|
|
1153
1151
|
agent_chat_model=agent_chat_model,
|
1154
1152
|
tracer=tracer,
|
1155
1153
|
)
|
1156
|
-
|
1157
|
-
if
|
1158
|
-
|
1154
|
+
response = response.strip()
|
1155
|
+
if response.startswith(('"', "'")) and response.endswith(('"', "'")):
|
1156
|
+
response = response[1:-1]
|
1159
1157
|
|
1160
|
-
return
|
1158
|
+
return response
|
1161
1159
|
|
1162
1160
|
|
1163
1161
|
async def search_documents(
|
@@ -1263,9 +1261,8 @@ async def search_documents(
|
|
1263
1261
|
compiled_references = [
|
1264
1262
|
{
|
1265
1263
|
"query": item.additional["query"],
|
1266
|
-
"compiled": item["
|
1264
|
+
"compiled": item.additional["compiled"],
|
1267
1265
|
"file": item.additional["file"],
|
1268
|
-
"uri": item.additional["uri"],
|
1269
1266
|
}
|
1270
1267
|
for item in search_results
|
1271
1268
|
]
|
@@ -1332,7 +1329,7 @@ async def extract_questions(
|
|
1332
1329
|
|
1333
1330
|
# Extract questions from the response
|
1334
1331
|
try:
|
1335
|
-
response = clean_json(raw_response
|
1332
|
+
response = clean_json(raw_response)
|
1336
1333
|
response = pyjson5.loads(response)
|
1337
1334
|
queries = [q.strip() for q in response["queries"] if q.strip()]
|
1338
1335
|
if not isinstance(queries, list) or not queries:
|
@@ -1442,7 +1439,6 @@ async def send_message_to_model_wrapper(
|
|
1442
1439
|
system_message: str = "",
|
1443
1440
|
response_type: str = "text",
|
1444
1441
|
response_schema: BaseModel = None,
|
1445
|
-
tools: List[ToolDefinition] = None,
|
1446
1442
|
deepthought: bool = False,
|
1447
1443
|
user: KhojUser = None,
|
1448
1444
|
query_images: List[str] = None,
|
@@ -1510,7 +1506,6 @@ async def send_message_to_model_wrapper(
|
|
1510
1506
|
model=chat_model_name,
|
1511
1507
|
response_type=response_type,
|
1512
1508
|
response_schema=response_schema,
|
1513
|
-
tools=tools,
|
1514
1509
|
deepthought=deepthought,
|
1515
1510
|
api_base_url=api_base_url,
|
1516
1511
|
tracer=tracer,
|
@@ -1522,7 +1517,6 @@ async def send_message_to_model_wrapper(
|
|
1522
1517
|
model=chat_model_name,
|
1523
1518
|
response_type=response_type,
|
1524
1519
|
response_schema=response_schema,
|
1525
|
-
tools=tools,
|
1526
1520
|
deepthought=deepthought,
|
1527
1521
|
api_base_url=api_base_url,
|
1528
1522
|
tracer=tracer,
|
@@ -1534,7 +1528,6 @@ async def send_message_to_model_wrapper(
|
|
1534
1528
|
model=chat_model_name,
|
1535
1529
|
response_type=response_type,
|
1536
1530
|
response_schema=response_schema,
|
1537
|
-
tools=tools,
|
1538
1531
|
deepthought=deepthought,
|
1539
1532
|
api_base_url=api_base_url,
|
1540
1533
|
tracer=tracer,
|
@@ -2803,266 +2796,3 @@ def get_notion_auth_url(user: KhojUser):
|
|
2803
2796
|
if not NOTION_OAUTH_CLIENT_ID or not NOTION_OAUTH_CLIENT_SECRET or not NOTION_REDIRECT_URI:
|
2804
2797
|
return None
|
2805
2798
|
return f"https://api.notion.com/v1/oauth/authorize?client_id={NOTION_OAUTH_CLIENT_ID}&redirect_uri={NOTION_REDIRECT_URI}&response_type=code&state={user.uuid}"
|
2806
|
-
|
2807
|
-
|
2808
|
-
async def view_file_content(
|
2809
|
-
path: str,
|
2810
|
-
start_line: Optional[int] = None,
|
2811
|
-
end_line: Optional[int] = None,
|
2812
|
-
user: KhojUser = None,
|
2813
|
-
):
|
2814
|
-
"""
|
2815
|
-
View the contents of a file from the user's document database with optional line range specification.
|
2816
|
-
"""
|
2817
|
-
query = f"View file: {path}"
|
2818
|
-
if start_line and end_line:
|
2819
|
-
query += f" (lines {start_line}-{end_line})"
|
2820
|
-
|
2821
|
-
try:
|
2822
|
-
# Get the file object from the database by name
|
2823
|
-
file_objects = await FileObjectAdapters.aget_file_objects_by_name(user, path)
|
2824
|
-
|
2825
|
-
if not file_objects:
|
2826
|
-
error_msg = f"File '{path}' not found in user documents"
|
2827
|
-
logger.warning(error_msg)
|
2828
|
-
yield [{"query": query, "file": path, "compiled": error_msg}]
|
2829
|
-
return
|
2830
|
-
|
2831
|
-
# Use the first file object if multiple exist
|
2832
|
-
file_object = file_objects[0]
|
2833
|
-
raw_text = file_object.raw_text
|
2834
|
-
|
2835
|
-
# Apply line range filtering if specified
|
2836
|
-
if start_line is None and end_line is None:
|
2837
|
-
filtered_text = raw_text
|
2838
|
-
else:
|
2839
|
-
lines = raw_text.split("\n")
|
2840
|
-
start_line = start_line or 1
|
2841
|
-
end_line = end_line or len(lines)
|
2842
|
-
|
2843
|
-
# Validate line range
|
2844
|
-
if start_line < 1 or end_line < 1 or start_line > end_line:
|
2845
|
-
error_msg = f"Invalid line range: {start_line}-{end_line}"
|
2846
|
-
logger.warning(error_msg)
|
2847
|
-
yield [{"query": query, "file": path, "compiled": error_msg}]
|
2848
|
-
return
|
2849
|
-
if start_line > len(lines):
|
2850
|
-
error_msg = f"Start line {start_line} exceeds total number of lines {len(lines)}"
|
2851
|
-
logger.warning(error_msg)
|
2852
|
-
yield [{"query": query, "file": path, "compiled": error_msg}]
|
2853
|
-
return
|
2854
|
-
|
2855
|
-
# Convert from 1-based to 0-based indexing and ensure bounds
|
2856
|
-
start_idx = max(0, start_line - 1)
|
2857
|
-
end_idx = min(len(lines), end_line)
|
2858
|
-
|
2859
|
-
selected_lines = lines[start_idx:end_idx]
|
2860
|
-
filtered_text = "\n".join(selected_lines)
|
2861
|
-
|
2862
|
-
# Truncate the text if it's too long
|
2863
|
-
if len(filtered_text) > 10000:
|
2864
|
-
filtered_text = filtered_text[:10000] + "\n\n[Truncated. Use line numbers to view specific sections.]"
|
2865
|
-
|
2866
|
-
# Format the result as a document reference
|
2867
|
-
document_results = [
|
2868
|
-
{
|
2869
|
-
"query": query,
|
2870
|
-
"file": path,
|
2871
|
-
"uri": path,
|
2872
|
-
"compiled": filtered_text,
|
2873
|
-
}
|
2874
|
-
]
|
2875
|
-
|
2876
|
-
yield document_results
|
2877
|
-
|
2878
|
-
except Exception as e:
|
2879
|
-
error_msg = f"Error viewing file {path}: {str(e)}"
|
2880
|
-
logger.error(error_msg, exc_info=True)
|
2881
|
-
|
2882
|
-
# Return an error result in the expected format
|
2883
|
-
yield [{"query": query, "file": path, "uri": path, "compiled": error_msg}]
|
2884
|
-
|
2885
|
-
|
2886
|
-
async def grep_files(
|
2887
|
-
regex_pattern: str,
|
2888
|
-
path_prefix: Optional[str] = None,
|
2889
|
-
lines_before: Optional[int] = None,
|
2890
|
-
lines_after: Optional[int] = None,
|
2891
|
-
user: KhojUser = None,
|
2892
|
-
):
|
2893
|
-
"""
|
2894
|
-
Search for a regex pattern in files with an optional path prefix and context lines.
|
2895
|
-
"""
|
2896
|
-
|
2897
|
-
# Construct the query string based on provided parameters
|
2898
|
-
def _generate_query(line_count, doc_count, path, pattern, lines_before, lines_after, max_results=1000):
|
2899
|
-
query = f"**Found {line_count} matches for '{pattern}' in {doc_count} documents**"
|
2900
|
-
if path:
|
2901
|
-
query += f" in {path}"
|
2902
|
-
if lines_before or lines_after or line_count > max_results:
|
2903
|
-
query += " Showing"
|
2904
|
-
if lines_before or lines_after:
|
2905
|
-
context_info = []
|
2906
|
-
if lines_before:
|
2907
|
-
context_info.append(f"{lines_before} lines before")
|
2908
|
-
if lines_after:
|
2909
|
-
context_info.append(f"{lines_after} lines after")
|
2910
|
-
query += f" {' and '.join(context_info)}"
|
2911
|
-
if line_count > max_results:
|
2912
|
-
if lines_before or lines_after:
|
2913
|
-
query += f" for"
|
2914
|
-
query += f" first {max_results} results"
|
2915
|
-
return query
|
2916
|
-
|
2917
|
-
# Validate regex pattern
|
2918
|
-
path_prefix = path_prefix or ""
|
2919
|
-
lines_before = lines_before or 0
|
2920
|
-
lines_after = lines_after or 0
|
2921
|
-
|
2922
|
-
try:
|
2923
|
-
regex = re.compile(regex_pattern, re.IGNORECASE)
|
2924
|
-
except re.error as e:
|
2925
|
-
yield {
|
2926
|
-
"query": _generate_query(0, 0, path_prefix, regex_pattern, lines_before, lines_after),
|
2927
|
-
"file": path_prefix,
|
2928
|
-
"compiled": f"Invalid regex pattern: {e}",
|
2929
|
-
}
|
2930
|
-
return
|
2931
|
-
|
2932
|
-
try:
|
2933
|
-
file_matches = await FileObjectAdapters.aget_file_objects_by_regex(user, regex_pattern, path_prefix)
|
2934
|
-
|
2935
|
-
line_matches = []
|
2936
|
-
for file_object in file_matches:
|
2937
|
-
lines = file_object.raw_text.split("\n")
|
2938
|
-
matched_line_numbers = []
|
2939
|
-
|
2940
|
-
# Find all matching line numbers first
|
2941
|
-
for i, line in enumerate(lines, 1):
|
2942
|
-
if regex.search(line):
|
2943
|
-
matched_line_numbers.append(i)
|
2944
|
-
|
2945
|
-
# Build context for each match
|
2946
|
-
for line_num in matched_line_numbers:
|
2947
|
-
context_lines = []
|
2948
|
-
|
2949
|
-
# Calculate start and end indices for context (0-based)
|
2950
|
-
start_idx = max(0, line_num - 1 - lines_before)
|
2951
|
-
end_idx = min(len(lines), line_num + lines_after)
|
2952
|
-
|
2953
|
-
# Add context lines with line numbers
|
2954
|
-
for idx in range(start_idx, end_idx):
|
2955
|
-
current_line_num = idx + 1
|
2956
|
-
line_content = lines[idx]
|
2957
|
-
|
2958
|
-
if current_line_num == line_num:
|
2959
|
-
# This is the matching line, mark it
|
2960
|
-
context_lines.append(f"{file_object.file_name}:{current_line_num}:> {line_content}")
|
2961
|
-
else:
|
2962
|
-
# This is a context line
|
2963
|
-
context_lines.append(f"{file_object.file_name}:{current_line_num}: {line_content}")
|
2964
|
-
|
2965
|
-
# Add separator between matches if showing context
|
2966
|
-
if lines_before > 0 or lines_after > 0:
|
2967
|
-
context_lines.append("--")
|
2968
|
-
|
2969
|
-
line_matches.extend(context_lines)
|
2970
|
-
|
2971
|
-
# Remove the last separator if it exists
|
2972
|
-
if line_matches and line_matches[-1] == "--":
|
2973
|
-
line_matches.pop()
|
2974
|
-
|
2975
|
-
# Check if no results found
|
2976
|
-
max_results = 1000
|
2977
|
-
query = _generate_query(
|
2978
|
-
len([m for m in line_matches if ":>" in m]),
|
2979
|
-
len(file_matches),
|
2980
|
-
path_prefix,
|
2981
|
-
regex_pattern,
|
2982
|
-
lines_before,
|
2983
|
-
lines_after,
|
2984
|
-
max_results,
|
2985
|
-
)
|
2986
|
-
if not line_matches:
|
2987
|
-
yield {"query": query, "file": path_prefix, "uri": path_prefix, "compiled": "No matches found."}
|
2988
|
-
return
|
2989
|
-
|
2990
|
-
# Truncate matched lines list if too long
|
2991
|
-
if len(line_matches) > max_results:
|
2992
|
-
line_matches = line_matches[:max_results] + [
|
2993
|
-
f"... {len(line_matches) - max_results} more results found. Use stricter regex or path to narrow down results."
|
2994
|
-
]
|
2995
|
-
|
2996
|
-
yield {"query": query, "file": path_prefix, "uri": path_prefix, "compiled": "\n".join(line_matches)}
|
2997
|
-
|
2998
|
-
except Exception as e:
|
2999
|
-
error_msg = f"Error using grep files tool: {str(e)}"
|
3000
|
-
logger.error(error_msg, exc_info=True)
|
3001
|
-
yield [
|
3002
|
-
{
|
3003
|
-
"query": _generate_query(0, 0, path_prefix or "", regex_pattern, lines_before, lines_after),
|
3004
|
-
"file": path_prefix,
|
3005
|
-
"uri": path_prefix,
|
3006
|
-
"compiled": error_msg,
|
3007
|
-
}
|
3008
|
-
]
|
3009
|
-
|
3010
|
-
|
3011
|
-
async def list_files(
|
3012
|
-
path: Optional[str] = None,
|
3013
|
-
pattern: Optional[str] = None,
|
3014
|
-
user: KhojUser = None,
|
3015
|
-
):
|
3016
|
-
"""
|
3017
|
-
List files under a given path or glob pattern from the user's document database.
|
3018
|
-
"""
|
3019
|
-
|
3020
|
-
# Construct the query string based on provided parameters
|
3021
|
-
def _generate_query(doc_count, path, pattern):
|
3022
|
-
query = f"**Found {doc_count} files**"
|
3023
|
-
if path:
|
3024
|
-
query += f" in {path}"
|
3025
|
-
if pattern:
|
3026
|
-
query += f" filtered by {pattern}"
|
3027
|
-
return query
|
3028
|
-
|
3029
|
-
try:
|
3030
|
-
# Get user files by path prefix when specified
|
3031
|
-
path = path or ""
|
3032
|
-
if path in ["", "/", ".", "./", "~", "~/"]:
|
3033
|
-
file_objects = await FileObjectAdapters.aget_all_file_objects(user, limit=10000)
|
3034
|
-
else:
|
3035
|
-
file_objects = await FileObjectAdapters.aget_file_objects_by_path_prefix(user, path)
|
3036
|
-
|
3037
|
-
if not file_objects:
|
3038
|
-
yield {"query": _generate_query(0, path, pattern), "file": path, "uri": path, "compiled": "No files found."}
|
3039
|
-
return
|
3040
|
-
|
3041
|
-
# Extract file names from file objects
|
3042
|
-
files = [f.file_name for f in file_objects]
|
3043
|
-
# Convert to relative file path (similar to ls)
|
3044
|
-
if path:
|
3045
|
-
files = [f[len(path) :] for f in files]
|
3046
|
-
|
3047
|
-
# Apply glob pattern filtering if specified
|
3048
|
-
if pattern:
|
3049
|
-
files = [f for f in files if fnmatch.fnmatch(f, pattern)]
|
3050
|
-
|
3051
|
-
query = _generate_query(len(files), path, pattern)
|
3052
|
-
if not files:
|
3053
|
-
yield {"query": query, "file": path, "uri": path, "compiled": "No files found."}
|
3054
|
-
return
|
3055
|
-
|
3056
|
-
# Truncate the list if it's too long
|
3057
|
-
max_files = 100
|
3058
|
-
if len(files) > max_files:
|
3059
|
-
files = files[:max_files] + [
|
3060
|
-
f"... {len(files) - max_files} more files found. Use glob pattern to narrow down results."
|
3061
|
-
]
|
3062
|
-
|
3063
|
-
yield {"query": query, "file": path, "uri": path, "compiled": "\n- ".join(files)}
|
3064
|
-
|
3065
|
-
except Exception as e:
|
3066
|
-
error_msg = f"Error listing files in {path}: {str(e)}"
|
3067
|
-
logger.error(error_msg, exc_info=True)
|
3068
|
-
yield {"query": query, "file": path, "uri": path, "compiled": error_msg}
|