khoj 2.0.0b12__py3-none-any.whl → 2.0.0b13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/app/README.md +1 -1
- khoj/app/urls.py +1 -0
- khoj/configure.py +21 -54
- khoj/database/adapters/__init__.py +6 -15
- khoj/database/management/commands/delete_orphaned_fileobjects.py +0 -1
- khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +1 -1
- khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +1 -1
- khoj/database/migrations/0092_alter_chatmodel_model_type_alter_chatmodel_name_and_more.py +36 -0
- khoj/database/migrations/0093_remove_localorgconfig_user_and_more.py +36 -0
- khoj/database/models/__init__.py +10 -40
- khoj/database/tests.py +0 -2
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/{9245.a04e92d034540234.js → 1225.ecac11e7421504c4.js} +3 -3
- khoj/interface/compiled/_next/static/chunks/1320.ae930ad00affe685.js +5 -0
- khoj/interface/compiled/_next/static/chunks/{1327-3b1a41af530fa8ee.js → 1327-e254819a9172cfa7.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/1626.15a8acc0d6639ec6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{3489.c523fe96a2eee74f.js → 1940.d082758bd04e08ae.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{2327-ea623ca2d22f78e9.js → 2327-438aaec1657c5ada.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/2475.57a0d0fd93d07af0.js +93 -0
- khoj/interface/compiled/_next/static/chunks/2481.5ce6524ba0a73f90.js +55 -0
- khoj/interface/compiled/_next/static/chunks/297.4c4c823ff6e3255b.js +174 -0
- khoj/interface/compiled/_next/static/chunks/{5639-09e2009a2adedf8b.js → 3260-82d2521fab032ff1.js} +68 -23
- khoj/interface/compiled/_next/static/chunks/3353.1c6d553216a1acae.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3855.f7b8131f78af046e.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3973.dc54a39586ab48be.js +1 -0
- khoj/interface/compiled/_next/static/chunks/4241.c1cd170f7f37ac59.js +24 -0
- khoj/interface/compiled/_next/static/chunks/{4327.8d2a1b8f1ea78208.js → 4327.f3704dc398c67113.js} +19 -19
- khoj/interface/compiled/_next/static/chunks/4505.f09454a346269c3f.js +117 -0
- khoj/interface/compiled/_next/static/chunks/4801.96a152d49742b644.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5427-a95ec748e52abb75.js +1 -0
- khoj/interface/compiled/_next/static/chunks/549.2bd27f59a91a9668.js +148 -0
- khoj/interface/compiled/_next/static/chunks/5765.71b1e1207b76b03f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/584.d7ce3505f169b706.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6240.34f7c1fa692edd61.js +24 -0
- khoj/interface/compiled/_next/static/chunks/6d3fe5a5-f9f3c16e0bc0cdf9.js +10 -0
- khoj/interface/compiled/_next/static/chunks/{7127-0f4a2a77d97fb5fa.js → 7127-97b83757db125ba6.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/7200-93ab0072359b8028.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{2612.bcf5a623b3da209e.js → 7553.f5ad54b1f6e92c49.js} +2 -2
- khoj/interface/compiled/_next/static/chunks/7626-1b630f1654172341.js +1 -0
- khoj/interface/compiled/_next/static/chunks/764.dadd316e8e16d191.js +63 -0
- khoj/interface/compiled/_next/static/chunks/78.08169ab541abab4f.js +43 -0
- khoj/interface/compiled/_next/static/chunks/784.e03acf460df213d1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{9537-d9ab442ce15d1e20.js → 8072-e1440cb482a0940e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{3265.924139c4146ee344.js → 8086.8d39887215807fcd.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/8168.f074ab8c7c16d82d.js +59 -0
- khoj/interface/compiled/_next/static/chunks/{8694.2bd9c2f65d8c5847.js → 8223.1705878fa7a09292.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/8483.94f6c9e2bee86f50.js +215 -0
- khoj/interface/compiled/_next/static/chunks/{8888.ebe0e552b59e7fed.js → 8810.fc0e479de78c7c61.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/8828.bc74dc4ce94e78f6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{7303.d0612f812a967a08.js → 8909.14ac3f43d0070cf1.js} +5 -5
- khoj/interface/compiled/_next/static/chunks/90542734.b1a1629065ba199b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9167.098534184f03fe92.js +56 -0
- khoj/interface/compiled/_next/static/chunks/{4980.63500d68b3bb1222.js → 9537.e934ce37bf314509.js} +5 -5
- khoj/interface/compiled/_next/static/chunks/9574.3fe8e26e95bf1c34.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9599.ec50b5296c27dae9.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9643.b34248df52ffc77c.js +262 -0
- khoj/interface/compiled/_next/static/chunks/9747.2fd9065b1435abb1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9922.98f2b2a9959b4ebe.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e49165209d2e406c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/page-e291b49977f43880.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/page-198b26df6e09bbb0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/{page-4bc2938df5d57981.js → page-dfcc1e8e2ad62873.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-a19a597629e87fb8.js → page-1567cac7b79a7c59.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/{page-fa366ac14b228688.js → page-3639e50ec3e9acfd.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-8f9a85f96088c18b.js → page-6081362437c82470.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-6fb51c5c80f8ec67.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-ed7787cf4938b8e3.js → page-e0dcb1762f8c8f88.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/webpack-5393aad3d824e0cb.js +1 -0
- khoj/interface/compiled/_next/static/css/{a0c2fd63bb396f04.css → 23b26df423cd8a9c.css} +1 -1
- khoj/interface/compiled/_next/static/css/{93eeacc43e261162.css → c34713c98384ee87.css} +1 -1
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +3 -3
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +4 -4
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +3 -3
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +3 -3
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +3 -3
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +5 -5
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +3 -3
- khoj/main.py +7 -9
- khoj/manage.py +1 -0
- khoj/processor/content/github/github_to_entries.py +6 -7
- khoj/processor/content/images/image_to_entries.py +0 -1
- khoj/processor/content/markdown/markdown_to_entries.py +2 -3
- khoj/processor/content/notion/notion_to_entries.py +5 -6
- khoj/processor/content/org_mode/org_to_entries.py +4 -5
- khoj/processor/content/org_mode/orgnode.py +4 -4
- khoj/processor/content/plaintext/plaintext_to_entries.py +1 -2
- khoj/processor/content/text_to_entries.py +1 -3
- khoj/processor/conversation/google/utils.py +3 -3
- khoj/processor/conversation/openai/gpt.py +65 -28
- khoj/processor/conversation/openai/utils.py +359 -28
- khoj/processor/conversation/prompts.py +16 -41
- khoj/processor/conversation/utils.py +29 -39
- khoj/processor/embeddings.py +0 -2
- khoj/processor/image/generate.py +3 -3
- khoj/processor/operator/__init__.py +2 -3
- khoj/processor/operator/grounding_agent.py +15 -2
- khoj/processor/operator/grounding_agent_uitars.py +34 -23
- khoj/processor/operator/operator_agent_anthropic.py +29 -4
- khoj/processor/operator/operator_agent_base.py +1 -1
- khoj/processor/operator/operator_agent_binary.py +4 -4
- khoj/processor/operator/operator_agent_openai.py +21 -6
- khoj/processor/operator/operator_environment_browser.py +1 -1
- khoj/processor/operator/operator_environment_computer.py +1 -1
- khoj/processor/speech/text_to_speech.py +0 -1
- khoj/processor/tools/online_search.py +1 -1
- khoj/processor/tools/run_code.py +1 -1
- khoj/routers/api.py +2 -15
- khoj/routers/api_agents.py +1 -2
- khoj/routers/api_automation.py +1 -1
- khoj/routers/api_chat.py +10 -16
- khoj/routers/api_content.py +3 -111
- khoj/routers/api_model.py +0 -1
- khoj/routers/api_subscription.py +1 -1
- khoj/routers/email.py +4 -4
- khoj/routers/helpers.py +44 -103
- khoj/routers/research.py +8 -8
- khoj/search_filter/base_filter.py +2 -4
- khoj/search_type/text_search.py +1 -2
- khoj/utils/cli.py +5 -53
- khoj/utils/config.py +0 -65
- khoj/utils/constants.py +6 -7
- khoj/utils/helpers.py +10 -18
- khoj/utils/initialization.py +7 -48
- khoj/utils/models.py +2 -4
- khoj/utils/rawconfig.py +1 -69
- khoj/utils/state.py +2 -8
- khoj/utils/yaml.py +0 -39
- {khoj-2.0.0b12.dist-info → khoj-2.0.0b13.dist-info}/METADATA +3 -3
- {khoj-2.0.0b12.dist-info → khoj-2.0.0b13.dist-info}/RECORD +149 -158
- khoj/interface/compiled/_next/static/chunks/1191.b547ec13349b4aed.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1588.f0558a0bdffc4761.js +0 -117
- khoj/interface/compiled/_next/static/chunks/1918.925cb4a35518d258.js +0 -43
- khoj/interface/compiled/_next/static/chunks/2849.dc00ae5ba7219cfc.js +0 -1
- khoj/interface/compiled/_next/static/chunks/303.fe76de943e930fbd.js +0 -1
- khoj/interface/compiled/_next/static/chunks/4533.586e74b45a2bde25.js +0 -55
- khoj/interface/compiled/_next/static/chunks/4551.82ce1476b5516bc2.js +0 -5
- khoj/interface/compiled/_next/static/chunks/4748.0edd37cba3ea2809.js +0 -59
- khoj/interface/compiled/_next/static/chunks/5210.cd35a1c1ec594a20.js +0 -93
- khoj/interface/compiled/_next/static/chunks/5329.f8b3c5b3d16159cd.js +0 -1
- khoj/interface/compiled/_next/static/chunks/5427-13d6ffd380fdfab7.js +0 -1
- khoj/interface/compiled/_next/static/chunks/558-c14e76cff03f6a60.js +0 -1
- khoj/interface/compiled/_next/static/chunks/5830.8876eccb82da9b7d.js +0 -262
- khoj/interface/compiled/_next/static/chunks/6230.88a71d8145347b3f.js +0 -1
- khoj/interface/compiled/_next/static/chunks/7161.77e0530a40ad5ca8.js +0 -1
- khoj/interface/compiled/_next/static/chunks/7200-ac3b2e37ff30e126.js +0 -1
- khoj/interface/compiled/_next/static/chunks/7505.c31027a3695bdebb.js +0 -148
- khoj/interface/compiled/_next/static/chunks/7760.35649cc21d9585bd.js +0 -56
- khoj/interface/compiled/_next/static/chunks/83.48e2db193a940052.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8427.844694e06133fb51.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8665.4db7e6b2e8933497.js +0 -174
- khoj/interface/compiled/_next/static/chunks/872.caf84cc1a39ae59f.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8890.6e8a59e4de6978bc.js +0 -215
- khoj/interface/compiled/_next/static/chunks/8950.5f2272e0ac923f9e.js +0 -1
- khoj/interface/compiled/_next/static/chunks/90542734.2c21f16f18b22411.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9202.c703864fcedc8d1f.js +0 -63
- khoj/interface/compiled/_next/static/chunks/9320.6aca4885d541aa44.js +0 -24
- khoj/interface/compiled/_next/static/chunks/9535.f78cd92d03331e55.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9968.b111fc002796da81.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-4e2a134ec26aa606.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/page-5db6ad18da10d353.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/page-6271e2e31c7571d1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-ad4d1792ab1a4108.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-f5881c7ae3ba0795.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-abb6c5f4239ad7be.js +0 -1
- khoj/interface/compiled/_next/static/chunks/f3e3247b-1758d4651e4457c2.js +0 -10
- khoj/interface/compiled/_next/static/chunks/webpack-4b00e5a0da4a9dae.js +0 -1
- khoj/migrations/__init__.py +0 -0
- khoj/migrations/migrate_offline_chat_default_model.py +0 -69
- khoj/migrations/migrate_offline_chat_default_model_2.py +0 -71
- khoj/migrations/migrate_offline_chat_schema.py +0 -83
- khoj/migrations/migrate_offline_model.py +0 -29
- khoj/migrations/migrate_processor_config_openai.py +0 -67
- khoj/migrations/migrate_server_pg.py +0 -132
- khoj/migrations/migrate_version.py +0 -17
- khoj/processor/conversation/offline/__init__.py +0 -0
- khoj/processor/conversation/offline/chat_model.py +0 -224
- khoj/processor/conversation/offline/utils.py +0 -80
- khoj/processor/conversation/offline/whisper.py +0 -15
- khoj/utils/fs_syncer.py +0 -252
- /khoj/interface/compiled/_next/static/{TTch40tYWOfh0SzwjwZXV → RYbQvo3AvgOR0bEVVfxF4}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{TTch40tYWOfh0SzwjwZXV → RYbQvo3AvgOR0bEVVfxF4}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1915-fbfe167c84ad60c5.js → 1915-5c6508f6ebb62a30.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2117-e78b6902ad6f75ec.js → 2117-080746c8e170c81a.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2939-4d4084c5b888b960.js → 2939-4af3fd24b8ffc9ad.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{4447-d6cf93724d57e34b.js → 4447-cd95608f8e93e711.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{8667-4b7790573b08c50d.js → 8667-50b03a89e82e0ba7.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9139-ce1ae935dac9c871.js → 9139-8ac4d9feb10f8869.js} +0 -0
- {khoj-2.0.0b12.dist-info → khoj-2.0.0b13.dist-info}/WHEEL +0 -0
- {khoj-2.0.0b12.dist-info → khoj-2.0.0b13.dist-info}/entry_points.txt +0 -0
- {khoj-2.0.0b12.dist-info → khoj-2.0.0b13.dist-info}/licenses/LICENSE +0 -0
@@ -1,7 +1,6 @@
|
|
1
1
|
import base64
|
2
2
|
import json
|
3
3
|
import logging
|
4
|
-
import math
|
5
4
|
import mimetypes
|
6
5
|
import os
|
7
6
|
import re
|
@@ -18,9 +17,7 @@ import requests
|
|
18
17
|
import tiktoken
|
19
18
|
import yaml
|
20
19
|
from langchain_core.messages.chat import ChatMessage
|
21
|
-
from
|
22
|
-
from llama_cpp.llama import Llama
|
23
|
-
from pydantic import BaseModel, ConfigDict, ValidationError, create_model
|
20
|
+
from pydantic import BaseModel, ConfigDict, ValidationError
|
24
21
|
from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
|
25
22
|
|
26
23
|
from khoj.database.adapters import ConversationAdapters
|
@@ -32,7 +29,6 @@ from khoj.database.models import (
|
|
32
29
|
KhojUser,
|
33
30
|
)
|
34
31
|
from khoj.processor.conversation import prompts
|
35
|
-
from khoj.processor.conversation.offline.utils import download_model, infer_max_tokens
|
36
32
|
from khoj.search_filter.base_filter import BaseFilter
|
37
33
|
from khoj.search_filter.date_filter import DateFilter
|
38
34
|
from khoj.search_filter.file_filter import FileFilter
|
@@ -50,7 +46,11 @@ from khoj.utils.yaml import yaml_dump
|
|
50
46
|
logger = logging.getLogger(__name__)
|
51
47
|
|
52
48
|
try:
|
53
|
-
|
49
|
+
import importlib.util
|
50
|
+
|
51
|
+
git_spec = importlib.util.find_spec("git")
|
52
|
+
if git_spec is None:
|
53
|
+
raise ImportError
|
54
54
|
except ImportError:
|
55
55
|
if is_promptrace_enabled():
|
56
56
|
logger.warning("GitPython not installed. `pip install gitpython` to use prompt tracer.")
|
@@ -68,6 +68,9 @@ model_to_prompt_size = {
|
|
68
68
|
"o3": 60000,
|
69
69
|
"o3-pro": 30000,
|
70
70
|
"o4-mini": 90000,
|
71
|
+
"gpt-5-2025-08-07": 120000,
|
72
|
+
"gpt-5-mini-2025-08-07": 120000,
|
73
|
+
"gpt-5-nano-2025-08-07": 120000,
|
71
74
|
# Google Models
|
72
75
|
"gemini-2.5-flash": 120000,
|
73
76
|
"gemini-2.5-pro": 60000,
|
@@ -85,12 +88,6 @@ model_to_prompt_size = {
|
|
85
88
|
"claude-sonnet-4-20250514": 60000,
|
86
89
|
"claude-opus-4-0": 60000,
|
87
90
|
"claude-opus-4-20250514": 60000,
|
88
|
-
# Offline Models
|
89
|
-
"bartowski/Qwen2.5-14B-Instruct-GGUF": 20000,
|
90
|
-
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
|
91
|
-
"bartowski/Llama-3.2-3B-Instruct-GGUF": 20000,
|
92
|
-
"bartowski/gemma-2-9b-it-GGUF": 6000,
|
93
|
-
"bartowski/gemma-2-2b-it-GGUF": 6000,
|
94
91
|
}
|
95
92
|
model_to_tokenizer: Dict[str, str] = {}
|
96
93
|
|
@@ -303,7 +300,7 @@ def construct_chat_history_for_operator(conversation_history: List[ChatMessageMo
|
|
303
300
|
if chat.by == "you" and chat.message:
|
304
301
|
content = [{"type": "text", "text": chat.message}]
|
305
302
|
for file in chat.queryFiles or []:
|
306
|
-
content += [{"type": "text", "text": f
|
303
|
+
content += [{"type": "text", "text": f"## File: {file['name']}\n\n{file['content']}"}]
|
307
304
|
user_message = AgentMessage(role="user", content=content)
|
308
305
|
elif chat.by == "khoj" and chat.message:
|
309
306
|
chat_history += [user_message, AgentMessage(role="assistant", content=chat.message)]
|
@@ -320,7 +317,10 @@ def construct_tool_chat_history(
|
|
320
317
|
If no tool is provided inferred query for all tools used are added.
|
321
318
|
"""
|
322
319
|
chat_history: list = []
|
323
|
-
|
320
|
+
|
321
|
+
def base_extractor(iteration: ResearchIteration) -> List[str]:
|
322
|
+
return []
|
323
|
+
|
324
324
|
extract_inferred_query_map: Dict[ConversationCommand, Callable[[ResearchIteration], List[str]]] = {
|
325
325
|
ConversationCommand.SemanticSearchFiles: (
|
326
326
|
lambda iteration: [c["query"] for c in iteration.context] if iteration.context else []
|
@@ -331,7 +331,7 @@ def construct_tool_chat_history(
|
|
331
331
|
ConversationCommand.ReadWebpage: (
|
332
332
|
lambda iteration: list(iteration.onlineContext.keys()) if iteration.onlineContext else []
|
333
333
|
),
|
334
|
-
ConversationCommand.
|
334
|
+
ConversationCommand.PythonCoder: (
|
335
335
|
lambda iteration: list(iteration.codeContext.keys()) if iteration.codeContext else []
|
336
336
|
),
|
337
337
|
}
|
@@ -507,7 +507,7 @@ async def save_to_conversation_log(
|
|
507
507
|
|
508
508
|
logger.info(
|
509
509
|
f"""
|
510
|
-
Saved Conversation Turn ({db_conversation.id if db_conversation else
|
510
|
+
Saved Conversation Turn ({db_conversation.id if db_conversation else "N/A"}):
|
511
511
|
You ({user.username}): "{q}"
|
512
512
|
|
513
513
|
Khoj: "{chat_response}"
|
@@ -573,7 +573,6 @@ def generate_chatml_messages_with_context(
|
|
573
573
|
system_message: str = None,
|
574
574
|
chat_history: list[ChatMessageModel] = [],
|
575
575
|
model_name="gpt-4o-mini",
|
576
|
-
loaded_model: Optional[Llama] = None,
|
577
576
|
max_prompt_size=None,
|
578
577
|
tokenizer_name=None,
|
579
578
|
query_images=None,
|
@@ -588,10 +587,7 @@ def generate_chatml_messages_with_context(
|
|
588
587
|
"""Generate chat messages with appropriate context from previous conversation to send to the chat model"""
|
589
588
|
# Set max prompt size from user config or based on pre-configured for model and machine specs
|
590
589
|
if not max_prompt_size:
|
591
|
-
|
592
|
-
max_prompt_size = infer_max_tokens(loaded_model.n_ctx(), model_to_prompt_size.get(model_name, math.inf))
|
593
|
-
else:
|
594
|
-
max_prompt_size = model_to_prompt_size.get(model_name, 10000)
|
590
|
+
max_prompt_size = model_to_prompt_size.get(model_name, 10000)
|
595
591
|
|
596
592
|
# Scale lookback turns proportional to max prompt size supported by model
|
597
593
|
lookback_turns = max_prompt_size // 750
|
@@ -638,7 +634,7 @@ def generate_chatml_messages_with_context(
|
|
638
634
|
|
639
635
|
if not is_none_or_empty(chat.operatorContext):
|
640
636
|
operator_context = chat.operatorContext
|
641
|
-
operator_content = "\n\n".join([f
|
637
|
+
operator_content = "\n\n".join([f"## Task: {oc['query']}\n{oc['response']}\n" for oc in operator_context])
|
642
638
|
message_context += [
|
643
639
|
{
|
644
640
|
"type": "text",
|
@@ -735,7 +731,7 @@ def generate_chatml_messages_with_context(
|
|
735
731
|
message.content = [{"type": "text", "text": message.content}]
|
736
732
|
|
737
733
|
# Truncate oldest messages from conversation history until under max supported prompt size by model
|
738
|
-
messages = truncate_messages(messages, max_prompt_size, model_name,
|
734
|
+
messages = truncate_messages(messages, max_prompt_size, model_name, tokenizer_name)
|
739
735
|
|
740
736
|
# Return message in chronological order
|
741
737
|
return messages[::-1]
|
@@ -743,26 +739,21 @@ def generate_chatml_messages_with_context(
|
|
743
739
|
|
744
740
|
def get_encoder(
|
745
741
|
model_name: str,
|
746
|
-
loaded_model: Optional[Llama] = None,
|
747
742
|
tokenizer_name=None,
|
748
|
-
) -> tiktoken.Encoding | PreTrainedTokenizer | PreTrainedTokenizerFast
|
743
|
+
) -> tiktoken.Encoding | PreTrainedTokenizer | PreTrainedTokenizerFast:
|
749
744
|
default_tokenizer = "gpt-4o"
|
750
745
|
|
751
746
|
try:
|
752
|
-
if
|
753
|
-
encoder = loaded_model.tokenizer()
|
754
|
-
elif model_name.startswith("gpt-") or model_name.startswith("o1"):
|
755
|
-
# as tiktoken doesn't recognize o1 model series yet
|
756
|
-
encoder = tiktoken.encoding_for_model("gpt-4o" if model_name.startswith("o1") else model_name)
|
757
|
-
elif tokenizer_name:
|
747
|
+
if tokenizer_name:
|
758
748
|
if tokenizer_name in state.pretrained_tokenizers:
|
759
749
|
encoder = state.pretrained_tokenizers[tokenizer_name]
|
760
750
|
else:
|
761
751
|
encoder = AutoTokenizer.from_pretrained(tokenizer_name)
|
762
752
|
state.pretrained_tokenizers[tokenizer_name] = encoder
|
763
753
|
else:
|
764
|
-
|
765
|
-
|
754
|
+
# as tiktoken doesn't recognize o1 model series yet
|
755
|
+
encoder = tiktoken.encoding_for_model("gpt-4o" if model_name.startswith("o1") else model_name)
|
756
|
+
except Exception:
|
766
757
|
encoder = tiktoken.encoding_for_model(default_tokenizer)
|
767
758
|
if state.verbose > 2:
|
768
759
|
logger.debug(
|
@@ -773,7 +764,7 @@ def get_encoder(
|
|
773
764
|
|
774
765
|
def count_tokens(
|
775
766
|
message_content: str | list[str | dict],
|
776
|
-
encoder: PreTrainedTokenizer | PreTrainedTokenizerFast |
|
767
|
+
encoder: PreTrainedTokenizer | PreTrainedTokenizerFast | tiktoken.Encoding,
|
777
768
|
) -> int:
|
778
769
|
"""
|
779
770
|
Count the total number of tokens in a list of messages.
|
@@ -825,11 +816,10 @@ def truncate_messages(
|
|
825
816
|
messages: list[ChatMessage],
|
826
817
|
max_prompt_size: int,
|
827
818
|
model_name: str,
|
828
|
-
loaded_model: Optional[Llama] = None,
|
829
819
|
tokenizer_name=None,
|
830
820
|
) -> list[ChatMessage]:
|
831
821
|
"""Truncate messages to fit within max prompt size supported by model"""
|
832
|
-
encoder = get_encoder(model_name,
|
822
|
+
encoder = get_encoder(model_name, tokenizer_name)
|
833
823
|
|
834
824
|
# Extract system message from messages
|
835
825
|
system_message = None
|
@@ -865,9 +855,9 @@ def truncate_messages(
|
|
865
855
|
total_tokens, _ = count_total_tokens(messages, encoder, system_message)
|
866
856
|
if total_tokens > max_prompt_size:
|
867
857
|
# At this point, a single message with a single content part of type dict should remain
|
868
|
-
assert (
|
869
|
-
|
870
|
-
)
|
858
|
+
assert len(messages) == 1 and len(messages[0].content) == 1 and isinstance(messages[0].content[0], dict), (
|
859
|
+
"Expected a single message with a single content part remaining at this point in truncation"
|
860
|
+
)
|
871
861
|
|
872
862
|
# Collate message content into single string to ease truncation
|
873
863
|
part = messages[0].content[0]
|
khoj/processor/embeddings.py
CHANGED
khoj/processor/image/generate.py
CHANGED
@@ -108,12 +108,12 @@ async def text_to_image(
|
|
108
108
|
if "content_policy_violation" in e.message:
|
109
109
|
logger.error(f"Image Generation blocked by OpenAI: {e}")
|
110
110
|
status_code = e.status_code # type: ignore
|
111
|
-
message =
|
111
|
+
message = "Image generation blocked by OpenAI due to policy violation" # type: ignore
|
112
112
|
yield image_url or image, status_code, message
|
113
113
|
return
|
114
114
|
else:
|
115
115
|
logger.error(f"Image Generation failed with {e}", exc_info=True)
|
116
|
-
message =
|
116
|
+
message = "Image generation failed using OpenAI" # type: ignore
|
117
117
|
status_code = e.status_code # type: ignore
|
118
118
|
yield image_url or image, status_code, message
|
119
119
|
return
|
@@ -199,7 +199,7 @@ def generate_image_with_stability(
|
|
199
199
|
|
200
200
|
# Call Stability AI API to generate image
|
201
201
|
response = requests.post(
|
202
|
-
|
202
|
+
"https://api.stability.ai/v2beta/stable-image/generate/sd3",
|
203
203
|
headers={"authorization": f"Bearer {text_to_image_config.api_key}", "accept": "image/*"},
|
204
204
|
files={"none": ""},
|
205
205
|
data={
|
@@ -11,7 +11,7 @@ from khoj.processor.conversation.utils import (
|
|
11
11
|
OperatorRun,
|
12
12
|
construct_chat_history_for_operator,
|
13
13
|
)
|
14
|
-
from khoj.processor.operator.operator_actions import
|
14
|
+
from khoj.processor.operator.operator_actions import RequestUserAction
|
15
15
|
from khoj.processor.operator.operator_agent_anthropic import AnthropicOperatorAgent
|
16
16
|
from khoj.processor.operator.operator_agent_base import OperatorAgent
|
17
17
|
from khoj.processor.operator.operator_agent_binary import BinaryOperatorAgent
|
@@ -59,7 +59,7 @@ async def operate_environment(
|
|
59
59
|
if not reasoning_model or not reasoning_model.vision_enabled:
|
60
60
|
reasoning_model = await ConversationAdapters.aget_vision_enabled_config()
|
61
61
|
if not reasoning_model:
|
62
|
-
raise ValueError(
|
62
|
+
raise ValueError("No vision enabled chat model found. Configure a vision chat model to operate environment.")
|
63
63
|
|
64
64
|
# Create conversation history from conversation log
|
65
65
|
chat_history = construct_chat_history_for_operator(conversation_log)
|
@@ -235,7 +235,6 @@ def is_operator_model(model: str) -> ChatModel.ModelType | None:
|
|
235
235
|
"claude-3-7-sonnet": ChatModel.ModelType.ANTHROPIC,
|
236
236
|
"claude-sonnet-4": ChatModel.ModelType.ANTHROPIC,
|
237
237
|
"claude-opus-4": ChatModel.ModelType.ANTHROPIC,
|
238
|
-
"ui-tars-1.5": ChatModel.ModelType.OFFLINE,
|
239
238
|
}
|
240
239
|
for operator_model in operator_models:
|
241
240
|
if model.startswith(operator_model):
|
@@ -1,14 +1,27 @@
|
|
1
1
|
import json
|
2
2
|
import logging
|
3
3
|
from textwrap import dedent
|
4
|
+
from typing import List, Optional
|
4
5
|
|
5
6
|
from openai import AzureOpenAI, OpenAI
|
6
7
|
from openai.types.chat import ChatCompletion, ChatCompletionMessage
|
7
8
|
|
8
9
|
from khoj.database.models import ChatModel
|
9
10
|
from khoj.processor.conversation.utils import construct_structured_message
|
10
|
-
from khoj.processor.operator.operator_actions import
|
11
|
-
|
11
|
+
from khoj.processor.operator.operator_actions import (
|
12
|
+
BackAction,
|
13
|
+
ClickAction,
|
14
|
+
DoubleClickAction,
|
15
|
+
DragAction,
|
16
|
+
GotoAction,
|
17
|
+
KeypressAction,
|
18
|
+
OperatorAction,
|
19
|
+
Point,
|
20
|
+
ScreenshotAction,
|
21
|
+
ScrollAction,
|
22
|
+
TypeAction,
|
23
|
+
WaitAction,
|
24
|
+
)
|
12
25
|
from khoj.processor.operator.operator_environment_base import EnvironmentType, EnvState
|
13
26
|
from khoj.utils.helpers import get_chat_usage_metrics
|
14
27
|
|
@@ -18,7 +18,22 @@ from openai import AsyncAzureOpenAI, AsyncOpenAI
|
|
18
18
|
from openai.types.chat import ChatCompletion
|
19
19
|
from PIL import Image
|
20
20
|
|
21
|
-
from khoj.processor.operator.operator_actions import
|
21
|
+
from khoj.processor.operator.operator_actions import (
|
22
|
+
BackAction,
|
23
|
+
ClickAction,
|
24
|
+
DoubleClickAction,
|
25
|
+
DragAction,
|
26
|
+
GotoAction,
|
27
|
+
KeyDownAction,
|
28
|
+
KeypressAction,
|
29
|
+
KeyUpAction,
|
30
|
+
MoveAction,
|
31
|
+
OperatorAction,
|
32
|
+
RequestUserAction,
|
33
|
+
ScrollAction,
|
34
|
+
TypeAction,
|
35
|
+
WaitAction,
|
36
|
+
)
|
22
37
|
from khoj.processor.operator.operator_environment_base import EnvironmentType, EnvState
|
23
38
|
from khoj.utils.helpers import get_chat_usage_metrics
|
24
39
|
|
@@ -122,11 +137,10 @@ class GroundingAgentUitars:
|
|
122
137
|
)
|
123
138
|
|
124
139
|
temperature = self.temperature
|
125
|
-
top_k = self.top_k
|
126
140
|
try_times = 3
|
127
141
|
while not parsed_responses:
|
128
142
|
if try_times <= 0:
|
129
|
-
logger.warning(
|
143
|
+
logger.warning("Reach max retry times to fetch response from client, as error flag.")
|
130
144
|
return "client error\nFAIL", []
|
131
145
|
try:
|
132
146
|
message_content = "\n".join([msg["content"][0].get("text") or "[image]" for msg in messages])
|
@@ -163,7 +177,6 @@ class GroundingAgentUitars:
|
|
163
177
|
prediction = None
|
164
178
|
try_times -= 1
|
165
179
|
temperature = 1
|
166
|
-
top_k = -1
|
167
180
|
|
168
181
|
if prediction is None:
|
169
182
|
return "client error\nFAIL", []
|
@@ -264,9 +277,9 @@ class GroundingAgentUitars:
|
|
264
277
|
raise ValueError(f"Unsupported environment type: {environment_type}")
|
265
278
|
|
266
279
|
def _format_messages_for_api(self, instruction: str, current_state: EnvState):
|
267
|
-
assert len(self.observations) == len(self.actions) and len(self.actions) == len(
|
268
|
-
|
269
|
-
)
|
280
|
+
assert len(self.observations) == len(self.actions) and len(self.actions) == len(self.thoughts), (
|
281
|
+
"The number of observations and actions should be the same."
|
282
|
+
)
|
270
283
|
|
271
284
|
self.history_images.append(base64.b64decode(current_state.screenshot))
|
272
285
|
self.observations.append({"screenshot": current_state.screenshot, "accessibility_tree": None})
|
@@ -524,7 +537,7 @@ class GroundingAgentUitars:
|
|
524
537
|
parsed_actions = [self.parse_action_string(action.replace("\n", "\\n").lstrip()) for action in all_action]
|
525
538
|
actions: list[dict] = []
|
526
539
|
for action_instance, raw_str in zip(parsed_actions, all_action):
|
527
|
-
if action_instance
|
540
|
+
if action_instance is None:
|
528
541
|
print(f"Action can't parse: {raw_str}")
|
529
542
|
raise ValueError(f"Action can't parse: {raw_str}")
|
530
543
|
action_type = action_instance["function"]
|
@@ -756,7 +769,7 @@ class GroundingAgentUitars:
|
|
756
769
|
The pyautogui code string
|
757
770
|
"""
|
758
771
|
|
759
|
-
pyautogui_code =
|
772
|
+
pyautogui_code = "import pyautogui\nimport time\n"
|
760
773
|
actions = []
|
761
774
|
if isinstance(responses, dict):
|
762
775
|
responses = [responses]
|
@@ -774,7 +787,7 @@ class GroundingAgentUitars:
|
|
774
787
|
if response_id == 0:
|
775
788
|
pyautogui_code += f"'''\nObservation:\n{observation}\n\nThought:\n{thought}\n'''\n"
|
776
789
|
else:
|
777
|
-
pyautogui_code +=
|
790
|
+
pyautogui_code += "\ntime.sleep(1)\n"
|
778
791
|
|
779
792
|
action_dict = response
|
780
793
|
action_type = action_dict.get("action_type")
|
@@ -846,17 +859,17 @@ class GroundingAgentUitars:
|
|
846
859
|
if content:
|
847
860
|
if input_swap:
|
848
861
|
actions += TypeAction()
|
849
|
-
pyautogui_code +=
|
862
|
+
pyautogui_code += "\nimport pyperclip"
|
850
863
|
pyautogui_code += f"\npyperclip.copy('{stripped_content}')"
|
851
|
-
pyautogui_code +=
|
852
|
-
pyautogui_code +=
|
864
|
+
pyautogui_code += "\npyautogui.hotkey('ctrl', 'v')"
|
865
|
+
pyautogui_code += "\ntime.sleep(0.5)\n"
|
853
866
|
if content.endswith("\n") or content.endswith("\\n"):
|
854
|
-
pyautogui_code +=
|
867
|
+
pyautogui_code += "\npyautogui.press('enter')"
|
855
868
|
else:
|
856
869
|
pyautogui_code += f"\npyautogui.write('{stripped_content}', interval=0.1)"
|
857
|
-
pyautogui_code +=
|
870
|
+
pyautogui_code += "\ntime.sleep(0.5)\n"
|
858
871
|
if content.endswith("\n") or content.endswith("\\n"):
|
859
|
-
pyautogui_code +=
|
872
|
+
pyautogui_code += "\npyautogui.press('enter')"
|
860
873
|
|
861
874
|
elif action_type in ["drag", "select"]:
|
862
875
|
# Parsing drag or select action based on start and end_boxes
|
@@ -869,9 +882,7 @@ class GroundingAgentUitars:
|
|
869
882
|
x1, y1, x2, y2 = eval(end_box) # Assuming box is in [x1, y1, x2, y2]
|
870
883
|
ex = round(float((x1 + x2) / 2) * image_width, 3)
|
871
884
|
ey = round(float((y1 + y2) / 2) * image_height, 3)
|
872
|
-
pyautogui_code += (
|
873
|
-
f"\npyautogui.moveTo({sx}, {sy})\n" f"\npyautogui.dragTo({ex}, {ey}, duration=1.0)\n"
|
874
|
-
)
|
885
|
+
pyautogui_code += f"\npyautogui.moveTo({sx}, {sy})\n\npyautogui.dragTo({ex}, {ey}, duration=1.0)\n"
|
875
886
|
|
876
887
|
elif action_type == "scroll":
|
877
888
|
# Parsing scroll action
|
@@ -888,11 +899,11 @@ class GroundingAgentUitars:
|
|
888
899
|
y = None
|
889
900
|
direction = action_inputs.get("direction", "")
|
890
901
|
|
891
|
-
if x
|
902
|
+
if x is None:
|
892
903
|
if "up" in direction.lower():
|
893
|
-
pyautogui_code +=
|
904
|
+
pyautogui_code += "\npyautogui.scroll(5)"
|
894
905
|
elif "down" in direction.lower():
|
895
|
-
pyautogui_code +=
|
906
|
+
pyautogui_code += "\npyautogui.scroll(-5)"
|
896
907
|
else:
|
897
908
|
if "up" in direction.lower():
|
898
909
|
pyautogui_code += f"\npyautogui.scroll(5, x={x}, y={y})"
|
@@ -923,7 +934,7 @@ class GroundingAgentUitars:
|
|
923
934
|
pyautogui_code += f"\npyautogui.moveTo({x}, {y})"
|
924
935
|
|
925
936
|
elif action_type in ["finished"]:
|
926
|
-
pyautogui_code =
|
937
|
+
pyautogui_code = "DONE"
|
927
938
|
|
928
939
|
else:
|
929
940
|
pyautogui_code += f"\n# Unrecognized action type: {action_type}"
|
@@ -11,7 +11,32 @@ from anthropic.types.beta import BetaContentBlock, BetaTextBlock, BetaToolUseBlo
|
|
11
11
|
from khoj.database.models import ChatModel
|
12
12
|
from khoj.processor.conversation.anthropic.utils import is_reasoning_model
|
13
13
|
from khoj.processor.conversation.utils import AgentMessage
|
14
|
-
from khoj.processor.operator.operator_actions import
|
14
|
+
from khoj.processor.operator.operator_actions import (
|
15
|
+
BackAction,
|
16
|
+
ClickAction,
|
17
|
+
CursorPositionAction,
|
18
|
+
DoubleClickAction,
|
19
|
+
DragAction,
|
20
|
+
GotoAction,
|
21
|
+
HoldKeyAction,
|
22
|
+
KeypressAction,
|
23
|
+
MouseDownAction,
|
24
|
+
MouseUpAction,
|
25
|
+
MoveAction,
|
26
|
+
NoopAction,
|
27
|
+
OperatorAction,
|
28
|
+
Point,
|
29
|
+
ScreenshotAction,
|
30
|
+
ScrollAction,
|
31
|
+
TerminalAction,
|
32
|
+
TextEditorCreateAction,
|
33
|
+
TextEditorInsertAction,
|
34
|
+
TextEditorStrReplaceAction,
|
35
|
+
TextEditorViewAction,
|
36
|
+
TripleClickAction,
|
37
|
+
TypeAction,
|
38
|
+
WaitAction,
|
39
|
+
)
|
15
40
|
from khoj.processor.operator.operator_agent_base import AgentActResult, OperatorAgent
|
16
41
|
from khoj.processor.operator.operator_environment_base import (
|
17
42
|
EnvironmentType,
|
@@ -518,7 +543,7 @@ class AnthropicOperatorAgent(OperatorAgent):
|
|
518
543
|
def model_default_headers(self) -> list[str]:
|
519
544
|
"""Get the default computer use headers for the given model."""
|
520
545
|
if self.vision_model.name.startswith("claude-3-7-sonnet"):
|
521
|
-
return [
|
546
|
+
return ["computer-use-2025-01-24", "token-efficient-tools-2025-02-19"]
|
522
547
|
elif self.vision_model.name.startswith("claude-sonnet-4") or self.vision_model.name.startswith("claude-opus-4"):
|
523
548
|
return ["computer-use-2025-01-24"]
|
524
549
|
else:
|
@@ -538,7 +563,7 @@ class AnthropicOperatorAgent(OperatorAgent):
|
|
538
563
|
* When viewing a webpage it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
|
539
564
|
* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
|
540
565
|
* Perform web searches using DuckDuckGo. Don't use Google even if requested as the query will fail.
|
541
|
-
* The current date is {datetime.today().strftime(
|
566
|
+
* The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
|
542
567
|
* The current URL is {current_state.url}.
|
543
568
|
</SYSTEM_CAPABILITY>
|
544
569
|
|
@@ -563,7 +588,7 @@ class AnthropicOperatorAgent(OperatorAgent):
|
|
563
588
|
</SYSTEM_CAPABILITY>
|
564
589
|
|
565
590
|
<CONTEXT>
|
566
|
-
* The current date is {datetime.today().strftime(
|
591
|
+
* The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
|
567
592
|
</CONTEXT>
|
568
593
|
"""
|
569
594
|
).lstrip()
|
@@ -12,7 +12,7 @@ from khoj.processor.conversation.utils import (
|
|
12
12
|
)
|
13
13
|
from khoj.processor.operator.grounding_agent import GroundingAgent
|
14
14
|
from khoj.processor.operator.grounding_agent_uitars import GroundingAgentUitars
|
15
|
-
from khoj.processor.operator.operator_actions import
|
15
|
+
from khoj.processor.operator.operator_actions import OperatorAction, WaitAction
|
16
16
|
from khoj.processor.operator.operator_agent_base import AgentActResult, OperatorAgent
|
17
17
|
from khoj.processor.operator.operator_environment_base import (
|
18
18
|
EnvironmentType,
|
@@ -181,7 +181,7 @@ class BinaryOperatorAgent(OperatorAgent):
|
|
181
181
|
elif action.type == "key_down":
|
182
182
|
rendered_parts += [f'**Action**: Press Key "{action.key}"']
|
183
183
|
elif action.type == "screenshot" and not current_state.screenshot:
|
184
|
-
rendered_parts += [
|
184
|
+
rendered_parts += ["**Error**: Failed to take screenshot"]
|
185
185
|
elif action.type == "goto":
|
186
186
|
rendered_parts += [f"**Action**: Open URL {action.url}"]
|
187
187
|
else:
|
@@ -317,7 +317,7 @@ class BinaryOperatorAgent(OperatorAgent):
|
|
317
317
|
# Introduction
|
318
318
|
* You are Khoj, a smart and resourceful web browsing assistant. You help the user accomplish their task using a web browser.
|
319
319
|
* You are given the user's query and screenshots of the browser's state transitions.
|
320
|
-
* The current date is {datetime.today().strftime(
|
320
|
+
* The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
|
321
321
|
* The current URL is {env_state.url}.
|
322
322
|
|
323
323
|
# Your Task
|
@@ -362,7 +362,7 @@ class BinaryOperatorAgent(OperatorAgent):
|
|
362
362
|
# Introduction
|
363
363
|
* You are Khoj, a smart and resourceful computer assistant. You help the user accomplish their task using a computer.
|
364
364
|
* You are given the user's query and screenshots of the computer's state transitions.
|
365
|
-
* The current date is {datetime.today().strftime(
|
365
|
+
* The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
|
366
366
|
|
367
367
|
# Your Task
|
368
368
|
* First look at the screenshots carefully to notice all pertinent information.
|
@@ -1,6 +1,5 @@
|
|
1
1
|
import json
|
2
2
|
import logging
|
3
|
-
import platform
|
4
3
|
from copy import deepcopy
|
5
4
|
from datetime import datetime
|
6
5
|
from textwrap import dedent
|
@@ -10,7 +9,23 @@ from openai.types.responses import Response, ResponseOutputItem
|
|
10
9
|
|
11
10
|
from khoj.database.models import ChatModel
|
12
11
|
from khoj.processor.conversation.utils import AgentMessage
|
13
|
-
from khoj.processor.operator.operator_actions import
|
12
|
+
from khoj.processor.operator.operator_actions import (
|
13
|
+
BackAction,
|
14
|
+
ClickAction,
|
15
|
+
DoubleClickAction,
|
16
|
+
DragAction,
|
17
|
+
GotoAction,
|
18
|
+
KeypressAction,
|
19
|
+
MoveAction,
|
20
|
+
NoopAction,
|
21
|
+
OperatorAction,
|
22
|
+
Point,
|
23
|
+
RequestUserAction,
|
24
|
+
ScreenshotAction,
|
25
|
+
ScrollAction,
|
26
|
+
TypeAction,
|
27
|
+
WaitAction,
|
28
|
+
)
|
14
29
|
from khoj.processor.operator.operator_agent_base import AgentActResult, OperatorAgent
|
15
30
|
from khoj.processor.operator.operator_environment_base import (
|
16
31
|
EnvironmentType,
|
@@ -152,7 +167,7 @@ class OpenAIOperatorAgent(OperatorAgent):
|
|
152
167
|
# Add screenshot data in openai message format
|
153
168
|
action_result["output"] = {
|
154
169
|
"type": "input_image",
|
155
|
-
"image_url": f
|
170
|
+
"image_url": f"data:image/webp;base64,{result_content['image']}",
|
156
171
|
"current_url": result_content["url"],
|
157
172
|
}
|
158
173
|
elif action_result["type"] == "computer_call_output" and idx == len(env_steps) - 1:
|
@@ -311,7 +326,7 @@ class OpenAIOperatorAgent(OperatorAgent):
|
|
311
326
|
elif block.type == "function_call":
|
312
327
|
if block.name == "goto":
|
313
328
|
args = json.loads(block.arguments)
|
314
|
-
render_texts = [f
|
329
|
+
render_texts = [f"Open URL: {args.get('url', '[Missing URL]')}"]
|
315
330
|
else:
|
316
331
|
render_texts += [block.name]
|
317
332
|
elif block.type == "computer_call":
|
@@ -351,7 +366,7 @@ class OpenAIOperatorAgent(OperatorAgent):
|
|
351
366
|
* When viewing a webpage it can be helpful to zoom out so that you can see everything on the page. Either that, or make sure you scroll down to see everything before deciding something isn't available.
|
352
367
|
* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
|
353
368
|
* Perform web searches using DuckDuckGo. Don't use Google even if requested as the query will fail.
|
354
|
-
* The current date is {datetime.today().strftime(
|
369
|
+
* The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
|
355
370
|
* The current URL is {current_state.url}.
|
356
371
|
</SYSTEM_CAPABILITY>
|
357
372
|
|
@@ -374,7 +389,7 @@ class OpenAIOperatorAgent(OperatorAgent):
|
|
374
389
|
</SYSTEM_CAPABILITY>
|
375
390
|
|
376
391
|
<CONTEXT>
|
377
|
-
* The current date is {datetime.today().strftime(
|
392
|
+
* The current date is {datetime.today().strftime("%A, %B %-d, %Y")}.
|
378
393
|
</CONTEXT>
|
379
394
|
"""
|
380
395
|
).lstrip()
|
@@ -247,7 +247,7 @@ class BrowserEnvironment(Environment):
|
|
247
247
|
|
248
248
|
case "drag":
|
249
249
|
if not isinstance(action, DragAction):
|
250
|
-
raise TypeError(
|
250
|
+
raise TypeError("Invalid action type for drag")
|
251
251
|
path = action.path
|
252
252
|
if not path:
|
253
253
|
error = "Missing path for drag action"
|
@@ -532,7 +532,7 @@ class ComputerEnvironment(Environment):
|
|
532
532
|
else:
|
533
533
|
return {"success": False, "output": process.stdout, "error": process.stderr}
|
534
534
|
except asyncio.TimeoutError:
|
535
|
-
return {"success": False, "output": "", "error":
|
535
|
+
return {"success": False, "output": "", "error": "Command timed out after 120 seconds."}
|
536
536
|
except Exception as e:
|
537
537
|
return {"success": False, "output": "", "error": str(e)}
|
538
538
|
|
@@ -385,7 +385,7 @@ async def read_webpages(
|
|
385
385
|
tracer: dict = {},
|
386
386
|
):
|
387
387
|
"Infer web pages to read from the query and extract relevant information from them"
|
388
|
-
logger.info(
|
388
|
+
logger.info("Inferring web pages to read")
|
389
389
|
urls = await infer_webpage_urls(
|
390
390
|
query,
|
391
391
|
max_webpages_to_read,
|
khoj/processor/tools/run_code.py
CHANGED
@@ -93,7 +93,7 @@ async def run_code(
|
|
93
93
|
|
94
94
|
# Run Code
|
95
95
|
if send_status_func:
|
96
|
-
async for event in send_status_func(
|
96
|
+
async for event in send_status_func("**Running code snippet**"):
|
97
97
|
yield {ChatEvent.STATUS: event}
|
98
98
|
try:
|
99
99
|
with timer("Chat actor: Execute generated program", logger, log_level=logging.INFO):
|