khoj 2.0.0b13.dev5__py3-none-any.whl → 2.0.0b13.dev23__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/app/README.md +1 -1
- khoj/app/urls.py +1 -0
- khoj/database/adapters/__init__.py +4 -4
- khoj/database/management/commands/delete_orphaned_fileobjects.py +0 -1
- khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +1 -1
- khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +1 -1
- khoj/database/models/__init__.py +6 -6
- khoj/database/tests.py +0 -2
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/{9245.a04e92d034540234.js → 1225.ecac11e7421504c4.js} +3 -3
- khoj/interface/compiled/_next/static/chunks/1320.ae930ad00affe685.js +5 -0
- khoj/interface/compiled/_next/static/chunks/{1327-3b1a41af530fa8ee.js → 1327-e254819a9172cfa7.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/1626.15a8acc0d6639ec6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{3489.c523fe96a2eee74f.js → 1940.d082758bd04e08ae.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{2327-ea623ca2d22f78e9.js → 2327-438aaec1657c5ada.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/2475.57a0d0fd93d07af0.js +93 -0
- khoj/interface/compiled/_next/static/chunks/2481.5ce6524ba0a73f90.js +55 -0
- khoj/interface/compiled/_next/static/chunks/297.4c4c823ff6e3255b.js +174 -0
- khoj/interface/compiled/_next/static/chunks/{5639-09e2009a2adedf8b.js → 3260-82d2521fab032ff1.js} +68 -23
- khoj/interface/compiled/_next/static/chunks/3353.1c6d553216a1acae.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3855.f7b8131f78af046e.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3973.dc54a39586ab48be.js +1 -0
- khoj/interface/compiled/_next/static/chunks/4241.c1cd170f7f37ac59.js +24 -0
- khoj/interface/compiled/_next/static/chunks/{4327.8d2a1b8f1ea78208.js → 4327.f3704dc398c67113.js} +19 -19
- khoj/interface/compiled/_next/static/chunks/4505.f09454a346269c3f.js +117 -0
- khoj/interface/compiled/_next/static/chunks/4801.96a152d49742b644.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5427-a95ec748e52abb75.js +1 -0
- khoj/interface/compiled/_next/static/chunks/549.2bd27f59a91a9668.js +148 -0
- khoj/interface/compiled/_next/static/chunks/5765.71b1e1207b76b03f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/584.d7ce3505f169b706.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6240.34f7c1fa692edd61.js +24 -0
- khoj/interface/compiled/_next/static/chunks/6d3fe5a5-f9f3c16e0bc0cdf9.js +10 -0
- khoj/interface/compiled/_next/static/chunks/{7127-0f4a2a77d97fb5fa.js → 7127-97b83757db125ba6.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/7200-93ab0072359b8028.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{2612.bcf5a623b3da209e.js → 7553.f5ad54b1f6e92c49.js} +2 -2
- khoj/interface/compiled/_next/static/chunks/7626-1b630f1654172341.js +1 -0
- khoj/interface/compiled/_next/static/chunks/764.dadd316e8e16d191.js +63 -0
- khoj/interface/compiled/_next/static/chunks/78.08169ab541abab4f.js +43 -0
- khoj/interface/compiled/_next/static/chunks/784.e03acf460df213d1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{9537-d9ab442ce15d1e20.js → 8072-e1440cb482a0940e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{3265.924139c4146ee344.js → 8086.8d39887215807fcd.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/8168.f074ab8c7c16d82d.js +59 -0
- khoj/interface/compiled/_next/static/chunks/{8694.2bd9c2f65d8c5847.js → 8223.1705878fa7a09292.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/8483.94f6c9e2bee86f50.js +215 -0
- khoj/interface/compiled/_next/static/chunks/{8888.ebe0e552b59e7fed.js → 8810.fc0e479de78c7c61.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/8828.bc74dc4ce94e78f6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{7303.d0612f812a967a08.js → 8909.14ac3f43d0070cf1.js} +5 -5
- khoj/interface/compiled/_next/static/chunks/90542734.b1a1629065ba199b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9167.098534184f03fe92.js +56 -0
- khoj/interface/compiled/_next/static/chunks/{4980.63500d68b3bb1222.js → 9537.e934ce37bf314509.js} +5 -5
- khoj/interface/compiled/_next/static/chunks/9574.3fe8e26e95bf1c34.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9599.ec50b5296c27dae9.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9643.b34248df52ffc77c.js +262 -0
- khoj/interface/compiled/_next/static/chunks/9747.2fd9065b1435abb1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9922.98f2b2a9959b4ebe.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/page-e291b49977f43880.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/page-198b26df6e09bbb0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/{page-8e1c4f2af3c9429e.js → page-dfcc1e8e2ad62873.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-2b3056cba8aa96ce.js → page-1567cac7b79a7c59.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-8be3b35178abf2ec.js → page-6081362437c82470.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-4a4b0c0f4749c2b2.js → page-e0dcb1762f8c8f88.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/webpack-5393aad3d824e0cb.js +1 -0
- khoj/interface/compiled/_next/static/css/{2945c4a857922f3b.css → c34713c98384ee87.css} +1 -1
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +3 -3
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +4 -4
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +3 -3
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +3 -3
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +3 -3
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +5 -5
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +3 -3
- khoj/main.py +3 -3
- khoj/manage.py +1 -0
- khoj/processor/content/github/github_to_entries.py +6 -6
- khoj/processor/content/images/image_to_entries.py +0 -1
- khoj/processor/content/markdown/markdown_to_entries.py +2 -3
- khoj/processor/content/notion/notion_to_entries.py +5 -5
- khoj/processor/content/org_mode/org_to_entries.py +4 -5
- khoj/processor/content/org_mode/orgnode.py +4 -4
- khoj/processor/content/plaintext/plaintext_to_entries.py +1 -2
- khoj/processor/content/text_to_entries.py +1 -2
- khoj/processor/conversation/google/utils.py +3 -3
- khoj/processor/conversation/openai/gpt.py +65 -28
- khoj/processor/conversation/openai/utils.py +358 -22
- khoj/processor/conversation/prompts.py +11 -5
- khoj/processor/conversation/utils.py +20 -11
- khoj/processor/embeddings.py +0 -2
- khoj/processor/image/generate.py +3 -3
- khoj/processor/operator/__init__.py +2 -2
- khoj/processor/operator/grounding_agent.py +15 -2
- khoj/processor/operator/grounding_agent_uitars.py +34 -23
- khoj/processor/operator/operator_agent_anthropic.py +29 -4
- khoj/processor/operator/operator_agent_base.py +1 -1
- khoj/processor/operator/operator_agent_binary.py +4 -4
- khoj/processor/operator/operator_agent_openai.py +21 -6
- khoj/processor/operator/operator_environment_browser.py +1 -1
- khoj/processor/operator/operator_environment_computer.py +1 -1
- khoj/processor/speech/text_to_speech.py +0 -1
- khoj/processor/tools/online_search.py +1 -1
- khoj/processor/tools/run_code.py +1 -1
- khoj/routers/api.py +1 -2
- khoj/routers/api_agents.py +1 -2
- khoj/routers/api_automation.py +1 -1
- khoj/routers/api_chat.py +10 -16
- khoj/routers/api_model.py +0 -1
- khoj/routers/api_subscription.py +1 -1
- khoj/routers/email.py +4 -4
- khoj/routers/helpers.py +35 -24
- khoj/routers/research.py +2 -4
- khoj/search_filter/base_filter.py +2 -4
- khoj/search_type/text_search.py +1 -2
- khoj/utils/constants.py +3 -0
- khoj/utils/helpers.py +4 -4
- khoj/utils/initialization.py +1 -3
- khoj/utils/models.py +2 -4
- khoj/utils/rawconfig.py +1 -2
- khoj/utils/state.py +1 -1
- {khoj-2.0.0b13.dev5.dist-info → khoj-2.0.0b13.dev23.dist-info}/METADATA +3 -2
- {khoj-2.0.0b13.dev5.dist-info → khoj-2.0.0b13.dev23.dist-info}/RECORD +139 -137
- khoj/interface/compiled/_next/static/chunks/1191.b547ec13349b4aed.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1588.f0558a0bdffc4761.js +0 -117
- khoj/interface/compiled/_next/static/chunks/1918.925cb4a35518d258.js +0 -43
- khoj/interface/compiled/_next/static/chunks/2849.dc00ae5ba7219cfc.js +0 -1
- khoj/interface/compiled/_next/static/chunks/303.fe76de943e930fbd.js +0 -1
- khoj/interface/compiled/_next/static/chunks/4533.586e74b45a2bde25.js +0 -55
- khoj/interface/compiled/_next/static/chunks/4551.82ce1476b5516bc2.js +0 -5
- khoj/interface/compiled/_next/static/chunks/4748.0edd37cba3ea2809.js +0 -59
- khoj/interface/compiled/_next/static/chunks/5210.cd35a1c1ec594a20.js +0 -93
- khoj/interface/compiled/_next/static/chunks/5329.f8b3c5b3d16159cd.js +0 -1
- khoj/interface/compiled/_next/static/chunks/5427-13d6ffd380fdfab7.js +0 -1
- khoj/interface/compiled/_next/static/chunks/558-c14e76cff03f6a60.js +0 -1
- khoj/interface/compiled/_next/static/chunks/5830.8876eccb82da9b7d.js +0 -262
- khoj/interface/compiled/_next/static/chunks/6230.88a71d8145347b3f.js +0 -1
- khoj/interface/compiled/_next/static/chunks/7161.77e0530a40ad5ca8.js +0 -1
- khoj/interface/compiled/_next/static/chunks/7200-ac3b2e37ff30e126.js +0 -1
- khoj/interface/compiled/_next/static/chunks/7505.c31027a3695bdebb.js +0 -148
- khoj/interface/compiled/_next/static/chunks/7760.35649cc21d9585bd.js +0 -56
- khoj/interface/compiled/_next/static/chunks/83.48e2db193a940052.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8427.844694e06133fb51.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8665.4db7e6b2e8933497.js +0 -174
- khoj/interface/compiled/_next/static/chunks/872.caf84cc1a39ae59f.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8890.6e8a59e4de6978bc.js +0 -215
- khoj/interface/compiled/_next/static/chunks/8950.5f2272e0ac923f9e.js +0 -1
- khoj/interface/compiled/_next/static/chunks/90542734.2c21f16f18b22411.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9202.c703864fcedc8d1f.js +0 -63
- khoj/interface/compiled/_next/static/chunks/9320.6aca4885d541aa44.js +0 -24
- khoj/interface/compiled/_next/static/chunks/9535.f78cd92d03331e55.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9968.b111fc002796da81.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-4e2a134ec26aa606.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/page-9a4610474cd59a71.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/page-f7bb9d777b7745d4.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-ad4d1792ab1a4108.js +0 -1
- khoj/interface/compiled/_next/static/chunks/f3e3247b-1758d4651e4457c2.js +0 -10
- khoj/interface/compiled/_next/static/chunks/webpack-ee14d29b64c5ab47.js +0 -1
- /khoj/interface/compiled/_next/static/{XfWrWDAk5VXeZ88OdP652 → Q7tm150g44Fs4H1CGytNf}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{XfWrWDAk5VXeZ88OdP652 → Q7tm150g44Fs4H1CGytNf}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1915-fbfe167c84ad60c5.js → 1915-5c6508f6ebb62a30.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2117-e78b6902ad6f75ec.js → 2117-080746c8e170c81a.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2939-4d4084c5b888b960.js → 2939-4af3fd24b8ffc9ad.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{4447-d6cf93724d57e34b.js → 4447-cd95608f8e93e711.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{8667-4b7790573b08c50d.js → 8667-50b03a89e82e0ba7.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9139-ce1ae935dac9c871.js → 9139-8ac4d9feb10f8869.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/app/search/{page-4885df3cd175c957.js → page-3639e50ec3e9acfd.js} +0 -0
- {khoj-2.0.0b13.dev5.dist-info → khoj-2.0.0b13.dev23.dist-info}/WHEEL +0 -0
- {khoj-2.0.0b13.dev5.dist-info → khoj-2.0.0b13.dev23.dist-info}/entry_points.txt +0 -0
- {khoj-2.0.0b13.dev5.dist-info → khoj-2.0.0b13.dev23.dist-info}/licenses/LICENSE +0 -0
@@ -1,7 +1,6 @@
|
|
1
1
|
import base64
|
2
2
|
import json
|
3
3
|
import logging
|
4
|
-
import math
|
5
4
|
import mimetypes
|
6
5
|
import os
|
7
6
|
import re
|
@@ -18,7 +17,7 @@ import requests
|
|
18
17
|
import tiktoken
|
19
18
|
import yaml
|
20
19
|
from langchain_core.messages.chat import ChatMessage
|
21
|
-
from pydantic import BaseModel, ConfigDict, ValidationError
|
20
|
+
from pydantic import BaseModel, ConfigDict, ValidationError
|
22
21
|
from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
|
23
22
|
|
24
23
|
from khoj.database.adapters import ConversationAdapters
|
@@ -47,7 +46,11 @@ from khoj.utils.yaml import yaml_dump
|
|
47
46
|
logger = logging.getLogger(__name__)
|
48
47
|
|
49
48
|
try:
|
50
|
-
|
49
|
+
import importlib.util
|
50
|
+
|
51
|
+
git_spec = importlib.util.find_spec("git")
|
52
|
+
if git_spec is None:
|
53
|
+
raise ImportError
|
51
54
|
except ImportError:
|
52
55
|
if is_promptrace_enabled():
|
53
56
|
logger.warning("GitPython not installed. `pip install gitpython` to use prompt tracer.")
|
@@ -65,6 +68,9 @@ model_to_prompt_size = {
|
|
65
68
|
"o3": 60000,
|
66
69
|
"o3-pro": 30000,
|
67
70
|
"o4-mini": 90000,
|
71
|
+
"gpt-5-2025-08-07": 120000,
|
72
|
+
"gpt-5-mini-2025-08-07": 120000,
|
73
|
+
"gpt-5-nano-2025-08-07": 120000,
|
68
74
|
# Google Models
|
69
75
|
"gemini-2.5-flash": 120000,
|
70
76
|
"gemini-2.5-pro": 60000,
|
@@ -294,7 +300,7 @@ def construct_chat_history_for_operator(conversation_history: List[ChatMessageMo
|
|
294
300
|
if chat.by == "you" and chat.message:
|
295
301
|
content = [{"type": "text", "text": chat.message}]
|
296
302
|
for file in chat.queryFiles or []:
|
297
|
-
content += [{"type": "text", "text": f
|
303
|
+
content += [{"type": "text", "text": f"## File: {file['name']}\n\n{file['content']}"}]
|
298
304
|
user_message = AgentMessage(role="user", content=content)
|
299
305
|
elif chat.by == "khoj" and chat.message:
|
300
306
|
chat_history += [user_message, AgentMessage(role="assistant", content=chat.message)]
|
@@ -311,7 +317,10 @@ def construct_tool_chat_history(
|
|
311
317
|
If no tool is provided inferred query for all tools used are added.
|
312
318
|
"""
|
313
319
|
chat_history: list = []
|
314
|
-
|
320
|
+
|
321
|
+
def base_extractor(iteration: ResearchIteration) -> List[str]:
|
322
|
+
return []
|
323
|
+
|
315
324
|
extract_inferred_query_map: Dict[ConversationCommand, Callable[[ResearchIteration], List[str]]] = {
|
316
325
|
ConversationCommand.SemanticSearchFiles: (
|
317
326
|
lambda iteration: [c["query"] for c in iteration.context] if iteration.context else []
|
@@ -498,7 +507,7 @@ async def save_to_conversation_log(
|
|
498
507
|
|
499
508
|
logger.info(
|
500
509
|
f"""
|
501
|
-
Saved Conversation Turn ({db_conversation.id if db_conversation else
|
510
|
+
Saved Conversation Turn ({db_conversation.id if db_conversation else "N/A"}):
|
502
511
|
You ({user.username}): "{q}"
|
503
512
|
|
504
513
|
Khoj: "{chat_response}"
|
@@ -625,7 +634,7 @@ def generate_chatml_messages_with_context(
|
|
625
634
|
|
626
635
|
if not is_none_or_empty(chat.operatorContext):
|
627
636
|
operator_context = chat.operatorContext
|
628
|
-
operator_content = "\n\n".join([f
|
637
|
+
operator_content = "\n\n".join([f"## Task: {oc['query']}\n{oc['response']}\n" for oc in operator_context])
|
629
638
|
message_context += [
|
630
639
|
{
|
631
640
|
"type": "text",
|
@@ -744,7 +753,7 @@ def get_encoder(
|
|
744
753
|
else:
|
745
754
|
# as tiktoken doesn't recognize o1 model series yet
|
746
755
|
encoder = tiktoken.encoding_for_model("gpt-4o" if model_name.startswith("o1") else model_name)
|
747
|
-
except:
|
756
|
+
except Exception:
|
748
757
|
encoder = tiktoken.encoding_for_model(default_tokenizer)
|
749
758
|
if state.verbose > 2:
|
750
759
|
logger.debug(
|
@@ -846,9 +855,9 @@ def truncate_messages(
|
|
846
855
|
total_tokens, _ = count_total_tokens(messages, encoder, system_message)
|
847
856
|
if total_tokens > max_prompt_size:
|
848
857
|
# At this point, a single message with a single content part of type dict should remain
|
849
|
-
assert (
|
850
|
-
|
851
|
-
)
|
858
|
+
assert len(messages) == 1 and len(messages[0].content) == 1 and isinstance(messages[0].content[0], dict), (
|
859
|
+
"Expected a single message with a single content part remaining at this point in truncation"
|
860
|
+
)
|
852
861
|
|
853
862
|
# Collate message content into single string to ease truncation
|
854
863
|
part = messages[0].content[0]
|
khoj/processor/embeddings.py
CHANGED
khoj/processor/image/generate.py
CHANGED
@@ -108,12 +108,12 @@ async def text_to_image(
|
|
108
108
|
if "content_policy_violation" in e.message:
|
109
109
|
logger.error(f"Image Generation blocked by OpenAI: {e}")
|
110
110
|
status_code = e.status_code # type: ignore
|
111
|
-
message =
|
111
|
+
message = "Image generation blocked by OpenAI due to policy violation" # type: ignore
|
112
112
|
yield image_url or image, status_code, message
|
113
113
|
return
|
114
114
|
else:
|
115
115
|
logger.error(f"Image Generation failed with {e}", exc_info=True)
|
116
|
-
message =
|
116
|
+
message = "Image generation failed using OpenAI" # type: ignore
|
117
117
|
status_code = e.status_code # type: ignore
|
118
118
|
yield image_url or image, status_code, message
|
119
119
|
return
|
@@ -199,7 +199,7 @@ def generate_image_with_stability(
|
|
199
199
|
|
200
200
|
# Call Stability AI API to generate image
|
201
201
|
response = requests.post(
|
202
|
-
|
202
|
+
"https://api.stability.ai/v2beta/stable-image/generate/sd3",
|
203
203
|
headers={"authorization": f"Bearer {text_to_image_config.api_key}", "accept": "image/*"},
|
204
204
|
files={"none": ""},
|
205
205
|
data={
|
@@ -11,7 +11,7 @@ from khoj.processor.conversation.utils import (
|
|
11
11
|
OperatorRun,
|
12
12
|
construct_chat_history_for_operator,
|
13
13
|
)
|
14
|
-
from khoj.processor.operator.operator_actions import
|
14
|
+
from khoj.processor.operator.operator_actions import RequestUserAction
|
15
15
|
from khoj.processor.operator.operator_agent_anthropic import AnthropicOperatorAgent
|
16
16
|
from khoj.processor.operator.operator_agent_base import OperatorAgent
|
17
17
|
from khoj.processor.operator.operator_agent_binary import BinaryOperatorAgent
|
@@ -59,7 +59,7 @@ async def operate_environment(
|
|
59
59
|
if not reasoning_model or not reasoning_model.vision_enabled:
|
60
60
|
reasoning_model = await ConversationAdapters.aget_vision_enabled_config()
|
61
61
|
if not reasoning_model:
|
62
|
-
raise ValueError(
|
62
|
+
raise ValueError("No vision enabled chat model found. Configure a vision chat model to operate environment.")
|
63
63
|
|
64
64
|
# Create conversation history from conversation log
|
65
65
|
chat_history = construct_chat_history_for_operator(conversation_log)
|
@@ -1,14 +1,27 @@
|
|
1
1
|
import json
|
2
2
|
import logging
|
3
3
|
from textwrap import dedent
|
4
|
+
from typing import List, Optional
|
4
5
|
|
5
6
|
from openai import AzureOpenAI, OpenAI
|
6
7
|
from openai.types.chat import ChatCompletion, ChatCompletionMessage
|
7
8
|
|
8
9
|
from khoj.database.models import ChatModel
|
9
10
|
from khoj.processor.conversation.utils import construct_structured_message
|
10
|
-
from khoj.processor.operator.operator_actions import
|
11
|
-
|
11
|
+
from khoj.processor.operator.operator_actions import (
|
12
|
+
BackAction,
|
13
|
+
ClickAction,
|
14
|
+
DoubleClickAction,
|
15
|
+
DragAction,
|
16
|
+
GotoAction,
|
17
|
+
KeypressAction,
|
18
|
+
OperatorAction,
|
19
|
+
Point,
|
20
|
+
ScreenshotAction,
|
21
|
+
ScrollAction,
|
22
|
+
TypeAction,
|
23
|
+
WaitAction,
|
24
|
+
)
|
12
25
|
from khoj.processor.operator.operator_environment_base import EnvironmentType, EnvState
|
13
26
|
from khoj.utils.helpers import get_chat_usage_metrics
|
14
27
|
|
@@ -18,7 +18,22 @@ from openai import AsyncAzureOpenAI, AsyncOpenAI
|
|
18
18
|
from openai.types.chat import ChatCompletion
|
19
19
|
from PIL import Image
|
20
20
|
|
21
|
-
from khoj.processor.operator.operator_actions import
|
21
|
+
from khoj.processor.operator.operator_actions import (
|
22
|
+
BackAction,
|
23
|
+
ClickAction,
|
24
|
+
DoubleClickAction,
|
25
|
+
DragAction,
|
26
|
+
GotoAction,
|
27
|
+
KeyDownAction,
|
28
|
+
KeypressAction,
|
29
|
+
KeyUpAction,
|
30
|
+
MoveAction,
|
31
|
+
OperatorAction,
|
32
|
+
RequestUserAction,
|
33
|
+
ScrollAction,
|
34
|
+
TypeAction,
|
35
|
+
WaitAction,
|
36
|
+
)
|
22
37
|
from khoj.processor.operator.operator_environment_base import EnvironmentType, EnvState
|
23
38
|
from khoj.utils.helpers import get_chat_usage_metrics
|
24
39
|
|
@@ -122,11 +137,10 @@ class GroundingAgentUitars:
|
|
122
137
|
)
|
123
138
|
|
124
139
|
temperature = self.temperature
|
125
|
-
top_k = self.top_k
|
126
140
|
try_times = 3
|
127
141
|
while not parsed_responses:
|
128
142
|
if try_times <= 0:
|
129
|
-
logger.warning(
|
143
|
+
logger.warning("Reach max retry times to fetch response from client, as error flag.")
|
130
144
|
return "client error\nFAIL", []
|
131
145
|
try:
|
132
146
|
message_content = "\n".join([msg["content"][0].get("text") or "[image]" for msg in messages])
|
@@ -163,7 +177,6 @@ class GroundingAgentUitars:
|
|
163
177
|
prediction = None
|
164
178
|
try_times -= 1
|
165
179
|
temperature = 1
|
166
|
-
top_k = -1
|
167
180
|
|
168
181
|
if prediction is None:
|
169
182
|
return "client error\nFAIL", []
|
@@ -264,9 +277,9 @@ class GroundingAgentUitars:
|
|
264
277
|
raise ValueError(f"Unsupported environment type: {environment_type}")
|
265
278
|
|
266
279
|
def _format_messages_for_api(self, instruction: str, current_state: EnvState):
|
267
|
-
assert len(self.observations) == len(self.actions) and len(self.actions) == len(
|
268
|
-
|
269
|
-
)
|
280
|
+
assert len(self.observations) == len(self.actions) and len(self.actions) == len(self.thoughts), (
|
281
|
+
"The number of observations and actions should be the same."
|
282
|
+
)
|
270
283
|
|
271
284
|
self.history_images.append(base64.b64decode(current_state.screenshot))
|
272
285
|
self.observations.append({"screenshot": current_state.screenshot, "accessibility_tree": None})
|
@@ -524,7 +537,7 @@ class GroundingAgentUitars:
|
|
524
537
|
parsed_actions = [self.parse_action_string(action.replace("\n", "\\n").lstrip()) for action in all_action]
|
525
538
|
actions: list[dict] = []
|
526
539
|
for action_instance, raw_str in zip(parsed_actions, all_action):
|
527
|
-
if action_instance
|
540
|
+
if action_instance is None:
|
528
541
|
print(f"Action can't parse: {raw_str}")
|
529
542
|
raise ValueError(f"Action can't parse: {raw_str}")
|
530
543
|
action_type = action_instance["function"]
|
@@ -756,7 +769,7 @@ class GroundingAgentUitars:
|
|
756
769
|
The pyautogui code string
|
757
770
|
"""
|
758
771
|
|
759
|
-
pyautogui_code =
|
772
|
+
pyautogui_code = "import pyautogui\nimport time\n"
|
760
773
|
actions = []
|
761
774
|
if isinstance(responses, dict):
|
762
775
|
responses = [responses]
|
@@ -774,7 +787,7 @@ class GroundingAgentUitars:
|
|
774
787
|
if response_id == 0:
|
775
788
|
pyautogui_code += f"'''\nObservation:\n{observation}\n\nThought:\n{thought}\n'''\n"
|
776
789
|
else:
|
777
|
-
pyautogui_code +=
|
790
|
+
pyautogui_code += "\ntime.sleep(1)\n"
|
778
791
|
|
779
792
|
action_dict = response
|
780
793
|
action_type = action_dict.get("action_type")
|
@@ -846,17 +859,17 @@ class GroundingAgentUitars:
|
|
846
859
|
if content:
|
847
860
|
if input_swap:
|
848
861
|
actions += TypeAction()
|
849
|
-
pyautogui_code +=
|
862
|
+
pyautogui_code += "\nimport pyperclip"
|
850
863
|
pyautogui_code += f"\npyperclip.copy('{stripped_content}')"
|
851
|
-
pyautogui_code +=
|
852
|
-
pyautogui_code +=
|
864
|
+
pyautogui_code += "\npyautogui.hotkey('ctrl', 'v')"
|
865
|
+
pyautogui_code += "\ntime.sleep(0.5)\n"
|
853
866
|
if content.endswith("\n") or content.endswith("\\n"):
|
854
|
-
pyautogui_code +=
|
867
|
+
pyautogui_code += "\npyautogui.press('enter')"
|
855
868
|
else:
|
856
869
|
pyautogui_code += f"\npyautogui.write('{stripped_content}', interval=0.1)"
|
857
|
-
pyautogui_code +=
|
870
|
+
pyautogui_code += "\ntime.sleep(0.5)\n"
|
858
871
|
if content.endswith("\n") or content.endswith("\\n"):
|
859
|
-
pyautogui_code +=
|
872
|
+
pyautogui_code += "\npyautogui.press('enter')"
|
860
873
|
|
861
874
|
elif action_type in ["drag", "select"]:
|
862
875
|
# Parsing drag or select action based on start and end_boxes
|
@@ -869,9 +882,7 @@ class GroundingAgentUitars:
|
|
869
882
|
x1, y1, x2, y2 = eval(end_box) # Assuming box is in [x1, y1, x2, y2]
|
870
883
|
ex = round(float((x1 + x2) / 2) * image_width, 3)
|
871
884
|
ey = round(float((y1 + y2) / 2) * image_height, 3)
|
872
|
-
pyautogui_code += (
|
873
|
-
f"\npyautogui.moveTo({sx}, {sy})\n" f"\npyautogui.dragTo({ex}, {ey}, duration=1.0)\n"
|
874
|
-
)
|
885
|
+
pyautogui_code += f"\npyautogui.moveTo({sx}, {sy})\n\npyautogui.dragTo({ex}, {ey}, duration=1.0)\n"
|
875
886
|
|
876
887
|
elif action_type == "scroll":
|
877
888
|
# Parsing scroll action
|
@@ -888,11 +899,11 @@ class GroundingAgentUitars:
|
|
888
899
|
y = None
|
889
900
|
direction = action_inputs.get("direction", "")
|
890
901
|
|
891
|
-
if x
|
902
|
+
if x is None:
|
892
903
|
if "up" in direction.lower():
|
893
|
-
pyautogui_code +=
|
904
|
+
pyautogui_code += "\npyautogui.scroll(5)"
|
894
905
|
elif "down" in direction.lower():
|
895
|
-
pyautogui_code +=
|
906
|
+
pyautogui_code += "\npyautogui.scroll(-5)"
|
896
907
|
else:
|
897
908
|
if "up" in direction.lower():
|
898
909
|
pyautogui_code += f"\npyautogui.scroll(5, x={x}, y={y})"
|
@@ -923,7 +934,7 @@ class GroundingAgentUitars:
|
|
923
934
|
pyautogui_code += f"\npyautogui.moveTo({x}, {y})"
|
924
935
|
|
925
936
|
elif action_type in ["finished"]:
|
926
|
-
pyautogui_code =
|
937
|
+
pyautogui_code = "DONE"
|
927
938
|
|
928
939
|
else:
|
929
940
|
pyautogui_code += f"\n# Unrecognized action type: {action_type}"
|
@@ -11,7 +11,32 @@ from anthropic.types.beta import BetaContentBlock, BetaTextBlock, BetaToolUseBlo
|
|
11
11
|
from khoj.database.models import ChatModel
|
12
12
|
from khoj.processor.conversation.anthropic.utils import is_reasoning_model
|
13
13
|
from khoj.processor.conversation.utils import AgentMessage
|
14
|
-
from khoj.processor.operator.operator_actions import
|
14
|
+
from khoj.processor.operator.operator_actions import (
|
15
|
+
BackAction,
|
16
|
+
ClickAction,
|
17
|
+
CursorPositionAction,
|
18
|
+
DoubleClickAction,
|
19
|
+
DragAction,
|
20
|
+
GotoAction,
|
21
|
+
HoldKeyAction,
|
22
|
+
KeypressAction,
|
23
|
+
MouseDownAction,
|
24
|
+
MouseUpAction,
|
25
|
+
MoveAction,
|
26
|
+
NoopAction,
|
27
|
+
OperatorAction,
|
28
|
+
Point,
|
29
|
+
ScreenshotAction,
|
30
|
+
ScrollAction,
|
31
|
+
TerminalAction,
|
32
|
+
TextEditorCreateAction,
|
33
|
+
TextEditorInsertAction,
|
34
|
+
TextEditorStrReplaceAction,
|
35
|
+
TextEditorViewAction,
|
36
|
+
TripleClickAction,
|
37
|
+
TypeAction,
|
38
|
+
WaitAction,
|
39
|
+
)
|
15
40
|
from khoj.processor.operator.operator_agent_base import AgentActResult, OperatorAgent
|
16
41
|
from khoj.processor.operator.operator_environment_base import (
|
17
42
|
EnvironmentType,
|
@@ -518,7 +543,7 @@ class AnthropicOperatorAgent(OperatorAgent):
|
|
518
543
|
def model_default_headers(self) -> list[str]:
|
519
544
|
"""Get the default computer use headers for the given model."""
|
520
545
|
if self.vision_model.name.startswith("claude-3-7-sonnet"):
|
521
|
-
return [
|
546
|
+
return ["computer-use-2025-01-24", "token-efficient-tools-2025-02-19"]
|
522
547
|
elif self.vision_model.name.startswith("claude-sonnet-4") or self.vision_model.name.startswith("claude-opus-4"):
|
523
548
|
return ["computer-use-2025-01-24"]
|
524
549
|
else:
|
@@ -538,7 +563,7 @@ class AnthropicOperatorAgent(OperatorAgent):
|
|
538
563
|
* When viewing a webpage it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
|
539
564
|
* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
|
540
565
|
* Perform web searches using DuckDuckGo. Don't use Google even if requested as the query will fail.
|
541
|
-
* The current date is {datetime.today().strftime(
|
566
|
+
* The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
|
542
567
|
* The current URL is {current_state.url}.
|
543
568
|
</SYSTEM_CAPABILITY>
|
544
569
|
|
@@ -563,7 +588,7 @@ class AnthropicOperatorAgent(OperatorAgent):
|
|
563
588
|
</SYSTEM_CAPABILITY>
|
564
589
|
|
565
590
|
<CONTEXT>
|
566
|
-
* The current date is {datetime.today().strftime(
|
591
|
+
* The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
|
567
592
|
</CONTEXT>
|
568
593
|
"""
|
569
594
|
).lstrip()
|
@@ -12,7 +12,7 @@ from khoj.processor.conversation.utils import (
|
|
12
12
|
)
|
13
13
|
from khoj.processor.operator.grounding_agent import GroundingAgent
|
14
14
|
from khoj.processor.operator.grounding_agent_uitars import GroundingAgentUitars
|
15
|
-
from khoj.processor.operator.operator_actions import
|
15
|
+
from khoj.processor.operator.operator_actions import OperatorAction, WaitAction
|
16
16
|
from khoj.processor.operator.operator_agent_base import AgentActResult, OperatorAgent
|
17
17
|
from khoj.processor.operator.operator_environment_base import (
|
18
18
|
EnvironmentType,
|
@@ -181,7 +181,7 @@ class BinaryOperatorAgent(OperatorAgent):
|
|
181
181
|
elif action.type == "key_down":
|
182
182
|
rendered_parts += [f'**Action**: Press Key "{action.key}"']
|
183
183
|
elif action.type == "screenshot" and not current_state.screenshot:
|
184
|
-
rendered_parts += [
|
184
|
+
rendered_parts += ["**Error**: Failed to take screenshot"]
|
185
185
|
elif action.type == "goto":
|
186
186
|
rendered_parts += [f"**Action**: Open URL {action.url}"]
|
187
187
|
else:
|
@@ -317,7 +317,7 @@ class BinaryOperatorAgent(OperatorAgent):
|
|
317
317
|
# Introduction
|
318
318
|
* You are Khoj, a smart and resourceful web browsing assistant. You help the user accomplish their task using a web browser.
|
319
319
|
* You are given the user's query and screenshots of the browser's state transitions.
|
320
|
-
* The current date is {datetime.today().strftime(
|
320
|
+
* The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
|
321
321
|
* The current URL is {env_state.url}.
|
322
322
|
|
323
323
|
# Your Task
|
@@ -362,7 +362,7 @@ class BinaryOperatorAgent(OperatorAgent):
|
|
362
362
|
# Introduction
|
363
363
|
* You are Khoj, a smart and resourceful computer assistant. You help the user accomplish their task using a computer.
|
364
364
|
* You are given the user's query and screenshots of the computer's state transitions.
|
365
|
-
* The current date is {datetime.today().strftime(
|
365
|
+
* The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
|
366
366
|
|
367
367
|
# Your Task
|
368
368
|
* First look at the screenshots carefully to notice all pertinent information.
|
@@ -1,6 +1,5 @@
|
|
1
1
|
import json
|
2
2
|
import logging
|
3
|
-
import platform
|
4
3
|
from copy import deepcopy
|
5
4
|
from datetime import datetime
|
6
5
|
from textwrap import dedent
|
@@ -10,7 +9,23 @@ from openai.types.responses import Response, ResponseOutputItem
|
|
10
9
|
|
11
10
|
from khoj.database.models import ChatModel
|
12
11
|
from khoj.processor.conversation.utils import AgentMessage
|
13
|
-
from khoj.processor.operator.operator_actions import
|
12
|
+
from khoj.processor.operator.operator_actions import (
|
13
|
+
BackAction,
|
14
|
+
ClickAction,
|
15
|
+
DoubleClickAction,
|
16
|
+
DragAction,
|
17
|
+
GotoAction,
|
18
|
+
KeypressAction,
|
19
|
+
MoveAction,
|
20
|
+
NoopAction,
|
21
|
+
OperatorAction,
|
22
|
+
Point,
|
23
|
+
RequestUserAction,
|
24
|
+
ScreenshotAction,
|
25
|
+
ScrollAction,
|
26
|
+
TypeAction,
|
27
|
+
WaitAction,
|
28
|
+
)
|
14
29
|
from khoj.processor.operator.operator_agent_base import AgentActResult, OperatorAgent
|
15
30
|
from khoj.processor.operator.operator_environment_base import (
|
16
31
|
EnvironmentType,
|
@@ -152,7 +167,7 @@ class OpenAIOperatorAgent(OperatorAgent):
|
|
152
167
|
# Add screenshot data in openai message format
|
153
168
|
action_result["output"] = {
|
154
169
|
"type": "input_image",
|
155
|
-
"image_url": f
|
170
|
+
"image_url": f"data:image/webp;base64,{result_content['image']}",
|
156
171
|
"current_url": result_content["url"],
|
157
172
|
}
|
158
173
|
elif action_result["type"] == "computer_call_output" and idx == len(env_steps) - 1:
|
@@ -311,7 +326,7 @@ class OpenAIOperatorAgent(OperatorAgent):
|
|
311
326
|
elif block.type == "function_call":
|
312
327
|
if block.name == "goto":
|
313
328
|
args = json.loads(block.arguments)
|
314
|
-
render_texts = [f
|
329
|
+
render_texts = [f"Open URL: {args.get('url', '[Missing URL]')}"]
|
315
330
|
else:
|
316
331
|
render_texts += [block.name]
|
317
332
|
elif block.type == "computer_call":
|
@@ -351,7 +366,7 @@ class OpenAIOperatorAgent(OperatorAgent):
|
|
351
366
|
* When viewing a webpage it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
|
352
367
|
* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
|
353
368
|
* Perform web searches using DuckDuckGo. Don't use Google even if requested as the query will fail.
|
354
|
-
* The current date is {datetime.today().strftime(
|
369
|
+
* The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
|
355
370
|
* The current URL is {current_state.url}.
|
356
371
|
</SYSTEM_CAPABILITY>
|
357
372
|
|
@@ -374,7 +389,7 @@ class OpenAIOperatorAgent(OperatorAgent):
|
|
374
389
|
</SYSTEM_CAPABILITY>
|
375
390
|
|
376
391
|
<CONTEXT>
|
377
|
-
* The current date is {datetime.today().strftime(
|
392
|
+
* The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
|
378
393
|
</CONTEXT>
|
379
394
|
"""
|
380
395
|
).lstrip()
|
@@ -247,7 +247,7 @@ class BrowserEnvironment(Environment):
|
|
247
247
|
|
248
248
|
case "drag":
|
249
249
|
if not isinstance(action, DragAction):
|
250
|
-
raise TypeError(
|
250
|
+
raise TypeError("Invalid action type for drag")
|
251
251
|
path = action.path
|
252
252
|
if not path:
|
253
253
|
error = "Missing path for drag action"
|
@@ -532,7 +532,7 @@ class ComputerEnvironment(Environment):
|
|
532
532
|
else:
|
533
533
|
return {"success": False, "output": process.stdout, "error": process.stderr}
|
534
534
|
except asyncio.TimeoutError:
|
535
|
-
return {"success": False, "output": "", "error":
|
535
|
+
return {"success": False, "output": "", "error": "Command timed out after 120 seconds."}
|
536
536
|
except Exception as e:
|
537
537
|
return {"success": False, "output": "", "error": str(e)}
|
538
538
|
|
@@ -385,7 +385,7 @@ async def read_webpages(
|
|
385
385
|
tracer: dict = {},
|
386
386
|
):
|
387
387
|
"Infer web pages to read from the query and extract relevant information from them"
|
388
|
-
logger.info(
|
388
|
+
logger.info("Inferring web pages to read")
|
389
389
|
urls = await infer_webpage_urls(
|
390
390
|
query,
|
391
391
|
max_webpages_to_read,
|
khoj/processor/tools/run_code.py
CHANGED
@@ -93,7 +93,7 @@ async def run_code(
|
|
93
93
|
|
94
94
|
# Run Code
|
95
95
|
if send_status_func:
|
96
|
-
async for event in send_status_func(
|
96
|
+
async for event in send_status_func("**Running code snippet**"):
|
97
97
|
yield {ChatEvent.STATUS: event}
|
98
98
|
try:
|
99
99
|
with timer("Chat actor: Execute generated program", logger, log_level=logging.INFO):
|
khoj/routers/api.py
CHANGED
@@ -7,7 +7,6 @@ from typing import List, Optional, Union
|
|
7
7
|
|
8
8
|
import openai
|
9
9
|
from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile
|
10
|
-
from fastapi.requests import Request
|
11
10
|
from fastapi.responses import Response
|
12
11
|
from starlette.authentication import has_required_scope, requires
|
13
12
|
|
@@ -94,7 +93,7 @@ def update(
|
|
94
93
|
logger.error(error_msg, exc_info=True)
|
95
94
|
raise HTTPException(status_code=500, detail=error_msg)
|
96
95
|
else:
|
97
|
-
logger.info(
|
96
|
+
logger.info("📪 Server indexed content updated via API")
|
98
97
|
|
99
98
|
update_telemetry_state(
|
100
99
|
request=request,
|
khoj/routers/api_agents.py
CHANGED
@@ -6,12 +6,11 @@ from typing import Dict, List, Optional
|
|
6
6
|
|
7
7
|
from asgiref.sync import sync_to_async
|
8
8
|
from fastapi import APIRouter, Request
|
9
|
-
from fastapi.requests import Request
|
10
9
|
from fastapi.responses import Response
|
11
10
|
from pydantic import BaseModel
|
12
11
|
from starlette.authentication import has_required_scope, requires
|
13
12
|
|
14
|
-
from khoj.database.adapters import AgentAdapters, ConversationAdapters
|
13
|
+
from khoj.database.adapters import AgentAdapters, ConversationAdapters
|
15
14
|
from khoj.database.models import Agent, Conversation, KhojUser, PriceTier
|
16
15
|
from khoj.routers.helpers import CommonQueryParams, acheck_if_safe_prompt
|
17
16
|
from khoj.utils.helpers import (
|
khoj/routers/api_automation.py
CHANGED
@@ -109,7 +109,7 @@ def post_automation(
|
|
109
109
|
except Exception as e:
|
110
110
|
logger.error(f"Error creating automation {q} for {user.email}: {e}", exc_info=True)
|
111
111
|
return Response(
|
112
|
-
content=
|
112
|
+
content="Unable to create automation. Ensure the automation doesn't already exist.",
|
113
113
|
media_type="text/plain",
|
114
114
|
status_code=500,
|
115
115
|
)
|