khoj 2.0.0b12.dev5__py3-none-any.whl → 2.0.0b13.dev19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/app/README.md +1 -1
- khoj/app/urls.py +1 -0
- khoj/configure.py +21 -54
- khoj/database/adapters/__init__.py +6 -15
- khoj/database/management/commands/delete_orphaned_fileobjects.py +0 -1
- khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +1 -1
- khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +1 -1
- khoj/database/migrations/0092_alter_chatmodel_model_type_alter_chatmodel_name_and_more.py +36 -0
- khoj/database/migrations/0093_remove_localorgconfig_user_and_more.py +36 -0
- khoj/database/models/__init__.py +10 -40
- khoj/database/tests.py +0 -2
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/{9245.a04e92d034540234.js → 1225.ecac11e7421504c4.js} +3 -3
- khoj/interface/compiled/_next/static/chunks/1320.ae930ad00affe685.js +5 -0
- khoj/interface/compiled/_next/static/chunks/{1327-1a9107b9a2a04a98.js → 1327-511bb0a862efce80.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/1626.15a8acc0d6639ec6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{3489.c523fe96a2eee74f.js → 1940.d082758bd04e08ae.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{2327-ea623ca2d22f78e9.js → 2327-fe87dd989d71d0eb.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/2475.57a0d0fd93d07af0.js +93 -0
- khoj/interface/compiled/_next/static/chunks/2481.5ce6524ba0a73f90.js +55 -0
- khoj/interface/compiled/_next/static/chunks/297.4c4c823ff6e3255b.js +174 -0
- khoj/interface/compiled/_next/static/chunks/{5639-09e2009a2adedf8b.js → 3260-43d3019b92c315bb.js} +68 -23
- khoj/interface/compiled/_next/static/chunks/3353.1c6d553216a1acae.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3855.f7b8131f78af046e.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3973.dc54a39586ab48be.js +1 -0
- khoj/interface/compiled/_next/static/chunks/4241.c1cd170f7f37ac59.js +24 -0
- khoj/interface/compiled/_next/static/chunks/{4327.8d2a1b8f1ea78208.js → 4327.f3704dc398c67113.js} +19 -19
- khoj/interface/compiled/_next/static/chunks/4505.f09454a346269c3f.js +117 -0
- khoj/interface/compiled/_next/static/chunks/4801.96a152d49742b644.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5427-a95ec748e52abb75.js +1 -0
- khoj/interface/compiled/_next/static/chunks/549.2bd27f59a91a9668.js +148 -0
- khoj/interface/compiled/_next/static/chunks/5765.71b1e1207b76b03f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/584.d7ce3505f169b706.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6240.34f7c1fa692edd61.js +24 -0
- khoj/interface/compiled/_next/static/chunks/6d3fe5a5-f9f3c16e0bc0cdf9.js +10 -0
- khoj/interface/compiled/_next/static/chunks/{7127-0f4a2a77d97fb5fa.js → 7127-97b83757db125ba6.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/7200-93ab0072359b8028.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{2612.bcf5a623b3da209e.js → 7553.f5ad54b1f6e92c49.js} +2 -2
- khoj/interface/compiled/_next/static/chunks/7626-1b630f1654172341.js +1 -0
- khoj/interface/compiled/_next/static/chunks/764.dadd316e8e16d191.js +63 -0
- khoj/interface/compiled/_next/static/chunks/78.08169ab541abab4f.js +43 -0
- khoj/interface/compiled/_next/static/chunks/784.e03acf460df213d1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{9537-d9ab442ce15d1e20.js → 8072-e1440cb482a0940e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{3265.924139c4146ee344.js → 8086.8d39887215807fcd.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/8168.f074ab8c7c16d82d.js +59 -0
- khoj/interface/compiled/_next/static/chunks/{8694.2bd9c2f65d8c5847.js → 8223.1705878fa7a09292.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/8483.94f6c9e2bee86f50.js +215 -0
- khoj/interface/compiled/_next/static/chunks/{8888.ebe0e552b59e7fed.js → 8810.fc0e479de78c7c61.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/8828.bc74dc4ce94e78f6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{7303.d0612f812a967a08.js → 8909.14ac3f43d0070cf1.js} +5 -5
- khoj/interface/compiled/_next/static/chunks/90542734.b1a1629065ba199b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9167.098534184f03fe92.js +56 -0
- khoj/interface/compiled/_next/static/chunks/{4980.63500d68b3bb1222.js → 9537.e934ce37bf314509.js} +5 -5
- khoj/interface/compiled/_next/static/chunks/9574.3fe8e26e95bf1c34.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9599.ec50b5296c27dae9.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9643.b34248df52ffc77c.js +262 -0
- khoj/interface/compiled/_next/static/chunks/9747.2fd9065b1435abb1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9922.98f2b2a9959b4ebe.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-4e2a134ec26aa606.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/page-e291b49977f43880.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/page-198b26df6e09bbb0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-ad4d1792ab1a4108.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/{page-8e1c4f2af3c9429e.js → page-9a75d7369f2a7cd2.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-2b3056cba8aa96ce.js → page-1567cac7b79a7c59.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-8be3b35178abf2ec.js → page-6081362437c82470.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-4a4b0c0f4749c2b2.js → page-e0dcb1762f8c8f88.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/webpack-d60b0c57a6c38d0f.js +1 -0
- khoj/interface/compiled/_next/static/css/{c34713c98384ee87.css → 2945c4a857922f3b.css} +1 -1
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +3 -3
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +4 -4
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +3 -3
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +3 -3
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +3 -3
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +5 -5
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +3 -3
- khoj/main.py +7 -9
- khoj/manage.py +1 -0
- khoj/processor/content/github/github_to_entries.py +6 -7
- khoj/processor/content/images/image_to_entries.py +0 -1
- khoj/processor/content/markdown/markdown_to_entries.py +2 -3
- khoj/processor/content/notion/notion_to_entries.py +5 -6
- khoj/processor/content/org_mode/org_to_entries.py +4 -5
- khoj/processor/content/org_mode/orgnode.py +4 -4
- khoj/processor/content/plaintext/plaintext_to_entries.py +1 -2
- khoj/processor/content/text_to_entries.py +1 -3
- khoj/processor/conversation/google/utils.py +3 -3
- khoj/processor/conversation/openai/utils.py +3 -4
- khoj/processor/conversation/prompts.py +0 -32
- khoj/processor/conversation/utils.py +25 -38
- khoj/processor/embeddings.py +0 -2
- khoj/processor/image/generate.py +3 -3
- khoj/processor/operator/__init__.py +2 -3
- khoj/processor/operator/grounding_agent.py +15 -2
- khoj/processor/operator/grounding_agent_uitars.py +34 -23
- khoj/processor/operator/operator_agent_anthropic.py +29 -4
- khoj/processor/operator/operator_agent_base.py +1 -1
- khoj/processor/operator/operator_agent_binary.py +4 -4
- khoj/processor/operator/operator_agent_openai.py +21 -6
- khoj/processor/operator/operator_environment_browser.py +1 -1
- khoj/processor/operator/operator_environment_computer.py +1 -1
- khoj/processor/speech/text_to_speech.py +0 -1
- khoj/processor/tools/online_search.py +1 -1
- khoj/processor/tools/run_code.py +1 -1
- khoj/routers/api.py +2 -15
- khoj/routers/api_agents.py +1 -2
- khoj/routers/api_automation.py +1 -1
- khoj/routers/api_chat.py +10 -16
- khoj/routers/api_content.py +3 -111
- khoj/routers/api_model.py +0 -1
- khoj/routers/api_subscription.py +1 -1
- khoj/routers/email.py +4 -4
- khoj/routers/helpers.py +26 -99
- khoj/routers/research.py +2 -4
- khoj/search_filter/base_filter.py +2 -4
- khoj/search_type/text_search.py +1 -2
- khoj/utils/cli.py +5 -53
- khoj/utils/config.py +0 -65
- khoj/utils/constants.py +0 -7
- khoj/utils/helpers.py +5 -13
- khoj/utils/initialization.py +7 -48
- khoj/utils/models.py +2 -4
- khoj/utils/rawconfig.py +1 -69
- khoj/utils/state.py +2 -8
- khoj/utils/yaml.py +0 -39
- {khoj-2.0.0b12.dev5.dist-info → khoj-2.0.0b13.dev19.dist-info}/METADATA +3 -3
- {khoj-2.0.0b12.dev5.dist-info → khoj-2.0.0b13.dev19.dist-info}/RECORD +145 -154
- khoj/interface/compiled/_next/static/chunks/1191.b547ec13349b4aed.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1588.f0558a0bdffc4761.js +0 -117
- khoj/interface/compiled/_next/static/chunks/1918.925cb4a35518d258.js +0 -43
- khoj/interface/compiled/_next/static/chunks/2849.dc00ae5ba7219cfc.js +0 -1
- khoj/interface/compiled/_next/static/chunks/303.fe76de943e930fbd.js +0 -1
- khoj/interface/compiled/_next/static/chunks/4533.586e74b45a2bde25.js +0 -55
- khoj/interface/compiled/_next/static/chunks/4551.82ce1476b5516bc2.js +0 -5
- khoj/interface/compiled/_next/static/chunks/4748.0edd37cba3ea2809.js +0 -59
- khoj/interface/compiled/_next/static/chunks/5210.cd35a1c1ec594a20.js +0 -93
- khoj/interface/compiled/_next/static/chunks/5329.f8b3c5b3d16159cd.js +0 -1
- khoj/interface/compiled/_next/static/chunks/5427-13d6ffd380fdfab7.js +0 -1
- khoj/interface/compiled/_next/static/chunks/558-c14e76cff03f6a60.js +0 -1
- khoj/interface/compiled/_next/static/chunks/5830.8876eccb82da9b7d.js +0 -262
- khoj/interface/compiled/_next/static/chunks/6230.88a71d8145347b3f.js +0 -1
- khoj/interface/compiled/_next/static/chunks/7161.77e0530a40ad5ca8.js +0 -1
- khoj/interface/compiled/_next/static/chunks/7200-ac3b2e37ff30e126.js +0 -1
- khoj/interface/compiled/_next/static/chunks/7505.c31027a3695bdebb.js +0 -148
- khoj/interface/compiled/_next/static/chunks/7760.35649cc21d9585bd.js +0 -56
- khoj/interface/compiled/_next/static/chunks/83.48e2db193a940052.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8427.844694e06133fb51.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8665.4db7e6b2e8933497.js +0 -174
- khoj/interface/compiled/_next/static/chunks/872.caf84cc1a39ae59f.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8890.6e8a59e4de6978bc.js +0 -215
- khoj/interface/compiled/_next/static/chunks/8950.5f2272e0ac923f9e.js +0 -1
- khoj/interface/compiled/_next/static/chunks/90542734.2c21f16f18b22411.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9202.c703864fcedc8d1f.js +0 -63
- khoj/interface/compiled/_next/static/chunks/9320.6aca4885d541aa44.js +0 -24
- khoj/interface/compiled/_next/static/chunks/9535.f78cd92d03331e55.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9968.b111fc002796da81.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/page-9a4610474cd59a71.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/page-f7bb9d777b7745d4.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +0 -1
- khoj/interface/compiled/_next/static/chunks/f3e3247b-1758d4651e4457c2.js +0 -10
- khoj/interface/compiled/_next/static/chunks/webpack-338a5000c912cc94.js +0 -1
- khoj/migrations/__init__.py +0 -0
- khoj/migrations/migrate_offline_chat_default_model.py +0 -69
- khoj/migrations/migrate_offline_chat_default_model_2.py +0 -71
- khoj/migrations/migrate_offline_chat_schema.py +0 -83
- khoj/migrations/migrate_offline_model.py +0 -29
- khoj/migrations/migrate_processor_config_openai.py +0 -67
- khoj/migrations/migrate_server_pg.py +0 -132
- khoj/migrations/migrate_version.py +0 -17
- khoj/processor/conversation/offline/__init__.py +0 -0
- khoj/processor/conversation/offline/chat_model.py +0 -224
- khoj/processor/conversation/offline/utils.py +0 -80
- khoj/processor/conversation/offline/whisper.py +0 -15
- khoj/utils/fs_syncer.py +0 -252
- /khoj/interface/compiled/_next/static/{7GoMcE8WpP9fbfYZXv4Nv → N-GdBSXoYe-DuObnbXVRO}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{7GoMcE8WpP9fbfYZXv4Nv → N-GdBSXoYe-DuObnbXVRO}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1915-5c6508f6ebb62a30.js → 1915-fbfe167c84ad60c5.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2117-080746c8e170c81a.js → 2117-e78b6902ad6f75ec.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2939-4af3fd24b8ffc9ad.js → 2939-4d4084c5b888b960.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{4447-cd95608f8e93e711.js → 4447-d6cf93724d57e34b.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{8667-50b03a89e82e0ba7.js → 8667-4b7790573b08c50d.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9139-8ac4d9feb10f8869.js → 9139-ce1ae935dac9c871.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/app/search/{page-4885df3cd175c957.js → page-3639e50ec3e9acfd.js} +0 -0
- {khoj-2.0.0b12.dev5.dist-info → khoj-2.0.0b13.dev19.dist-info}/WHEEL +0 -0
- {khoj-2.0.0b12.dev5.dist-info → khoj-2.0.0b13.dev19.dist-info}/entry_points.txt +0 -0
- {khoj-2.0.0b12.dev5.dist-info → khoj-2.0.0b13.dev19.dist-info}/licenses/LICENSE +0 -0
@@ -8,7 +8,6 @@ from khoj.database.models import KhojUser
|
|
8
8
|
from khoj.processor.content.org_mode import orgnode
|
9
9
|
from khoj.processor.content.org_mode.orgnode import Orgnode
|
10
10
|
from khoj.processor.content.text_to_entries import TextToEntries
|
11
|
-
from khoj.utils import state
|
12
11
|
from khoj.utils.helpers import timer
|
13
12
|
from khoj.utils.rawconfig import Entry
|
14
13
|
|
@@ -103,7 +102,7 @@ class OrgToEntries(TextToEntries):
|
|
103
102
|
# If content is small or content has no children headings, save it as a single entry
|
104
103
|
# Note: This is the terminating condition for this recursive function
|
105
104
|
if len(TextToEntries.tokenizer(org_content_with_ancestry)) <= max_tokens or not re.search(
|
106
|
-
rf"^\*{{{len(ancestry)+1},}}\s", org_content, re.MULTILINE
|
105
|
+
rf"^\*{{{len(ancestry) + 1},}}\s", org_content, re.MULTILINE
|
107
106
|
):
|
108
107
|
orgnode_content_with_ancestry = orgnode.makelist(
|
109
108
|
org_content_with_ancestry, org_file, start_line=start_line, ancestry_lines=len(ancestry)
|
@@ -195,7 +194,7 @@ class OrgToEntries(TextToEntries):
|
|
195
194
|
if not entry_heading and parsed_entry.level > 0:
|
196
195
|
base_level = parsed_entry.level
|
197
196
|
# Indent entry by 1 heading level as ancestry is prepended as top level heading
|
198
|
-
heading = f"{'*' * (parsed_entry.level-base_level+2)} {todo_str}" if parsed_entry.level > 0 else ""
|
197
|
+
heading = f"{'*' * (parsed_entry.level - base_level + 2)} {todo_str}" if parsed_entry.level > 0 else ""
|
199
198
|
if parsed_entry.heading:
|
200
199
|
heading += f"{parsed_entry.heading}."
|
201
200
|
|
@@ -212,10 +211,10 @@ class OrgToEntries(TextToEntries):
|
|
212
211
|
compiled += f"\t {tags_str}."
|
213
212
|
|
214
213
|
if parsed_entry.closed:
|
215
|
-
compiled += f
|
214
|
+
compiled += f"\n Closed on {parsed_entry.closed.strftime('%Y-%m-%d')}."
|
216
215
|
|
217
216
|
if parsed_entry.scheduled:
|
218
|
-
compiled += f
|
217
|
+
compiled += f"\n Scheduled for {parsed_entry.scheduled.strftime('%Y-%m-%d')}."
|
219
218
|
|
220
219
|
if parsed_entry.hasBody:
|
221
220
|
compiled += f"\n {parsed_entry.body}"
|
@@ -65,7 +65,7 @@ def makelist(file, filename, start_line: int = 1, ancestry_lines: int = 0) -> Li
|
|
65
65
|
"""
|
66
66
|
ctr = 0
|
67
67
|
|
68
|
-
if
|
68
|
+
if isinstance(file, str):
|
69
69
|
f = file.splitlines()
|
70
70
|
else:
|
71
71
|
f = file
|
@@ -512,11 +512,11 @@ class Orgnode(object):
|
|
512
512
|
if self._closed or self._scheduled or self._deadline:
|
513
513
|
n = n + indent
|
514
514
|
if self._closed:
|
515
|
-
n = n + f
|
515
|
+
n = n + f"CLOSED: [{self._closed.strftime('%Y-%m-%d %a')}] "
|
516
516
|
if self._scheduled:
|
517
|
-
n = n + f
|
517
|
+
n = n + f"SCHEDULED: <{self._scheduled.strftime('%Y-%m-%d %a')}> "
|
518
518
|
if self._deadline:
|
519
|
-
n = n + f
|
519
|
+
n = n + f"DEADLINE: <{self._deadline.strftime('%Y-%m-%d %a')}> "
|
520
520
|
if self._closed or self._scheduled or self._deadline:
|
521
521
|
n = n + "\n"
|
522
522
|
|
@@ -1,6 +1,5 @@
|
|
1
1
|
import logging
|
2
2
|
import re
|
3
|
-
from pathlib import Path
|
4
3
|
from typing import Dict, List, Tuple
|
5
4
|
|
6
5
|
import urllib3
|
@@ -97,7 +96,7 @@ class PlaintextToEntries(TextToEntries):
|
|
97
96
|
for parsed_entry in parsed_entries:
|
98
97
|
raw_filename = entry_to_file_map[parsed_entry]
|
99
98
|
# Check if raw_filename is a URL. If so, save it as is. If not, convert it to a Path.
|
100
|
-
if
|
99
|
+
if isinstance(raw_filename, str) and re.search(r"^https?://", raw_filename):
|
101
100
|
# Escape the URL to avoid issues with special characters
|
102
101
|
entry_filename = urllib3.util.parse_url(raw_filename).url
|
103
102
|
else:
|
@@ -27,12 +27,10 @@ logger = logging.getLogger(__name__)
|
|
27
27
|
class TextToEntries(ABC):
|
28
28
|
def __init__(self, config: Any = None):
|
29
29
|
self.embeddings_model = state.embeddings_model
|
30
|
-
self.config = config
|
31
30
|
self.date_filter = DateFilter()
|
32
31
|
|
33
32
|
@abstractmethod
|
34
|
-
def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]:
|
35
|
-
...
|
33
|
+
def process(self, files: dict[str, str], user: KhojUser, regenerate: bool = False) -> Tuple[int, int]: ...
|
36
34
|
|
37
35
|
@staticmethod
|
38
36
|
def hash_func(key: str) -> Callable:
|
@@ -194,7 +194,7 @@ def gemini_completion_with_backoff(
|
|
194
194
|
or not response.candidates[0].content
|
195
195
|
or response.candidates[0].content.parts is None
|
196
196
|
):
|
197
|
-
raise ValueError(
|
197
|
+
raise ValueError("Failed to get response from model.")
|
198
198
|
raw_content = [part.model_dump() for part in response.candidates[0].content.parts]
|
199
199
|
if response.function_calls:
|
200
200
|
function_calls = [
|
@@ -212,7 +212,7 @@ def gemini_completion_with_backoff(
|
|
212
212
|
response = None
|
213
213
|
# Handle 429 rate limit errors directly
|
214
214
|
if e.code == 429:
|
215
|
-
response_text =
|
215
|
+
response_text = "My brain is exhausted. Can you please try again in a bit?"
|
216
216
|
# Log the full error details for debugging
|
217
217
|
logger.error(f"Gemini ClientError: {e.code} {e.status}. Details: {e.details}")
|
218
218
|
# Handle other errors
|
@@ -361,7 +361,7 @@ def handle_gemini_response(
|
|
361
361
|
|
362
362
|
# Ensure we have a proper list of candidates
|
363
363
|
if not isinstance(candidates, list):
|
364
|
-
message =
|
364
|
+
message = "\nUnexpected response format. Try again."
|
365
365
|
stopped = True
|
366
366
|
return message, stopped
|
367
367
|
|
@@ -2,7 +2,6 @@ import json
|
|
2
2
|
import logging
|
3
3
|
import os
|
4
4
|
from copy import deepcopy
|
5
|
-
from functools import partial
|
6
5
|
from time import perf_counter
|
7
6
|
from typing import AsyncGenerator, Dict, Generator, List, Literal, Optional, Union
|
8
7
|
from urllib.parse import urlparse
|
@@ -284,9 +283,9 @@ async def chat_completion_with_backoff(
|
|
284
283
|
if len(system_messages) > 0:
|
285
284
|
first_system_message_index, first_system_message = system_messages[0]
|
286
285
|
first_system_message_content = first_system_message["content"]
|
287
|
-
formatted_messages[first_system_message_index][
|
288
|
-
"
|
289
|
-
|
286
|
+
formatted_messages[first_system_message_index]["content"] = (
|
287
|
+
f"{first_system_message_content}\nFormatting re-enabled"
|
288
|
+
)
|
290
289
|
elif is_twitter_reasoning_model(model_name, api_base_url):
|
291
290
|
reasoning_effort = "high" if deepthought else "low"
|
292
291
|
# Grok-4 models do not support reasoning_effort parameter
|
@@ -78,38 +78,6 @@ no_entries_found = PromptTemplate.from_template(
|
|
78
78
|
""".strip()
|
79
79
|
)
|
80
80
|
|
81
|
-
## Conversation Prompts for Offline Chat Models
|
82
|
-
## --
|
83
|
-
system_prompt_offline_chat = PromptTemplate.from_template(
|
84
|
-
"""
|
85
|
-
You are Khoj, a smart, inquisitive and helpful personal assistant.
|
86
|
-
- Use your general knowledge and past conversation with the user as context to inform your responses.
|
87
|
-
- If you do not know the answer, say 'I don't know.'
|
88
|
-
- Think step-by-step and ask questions to get the necessary information to answer the user's question.
|
89
|
-
- Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided information or past conversations.
|
90
|
-
- Do not print verbatim Notes unless necessary.
|
91
|
-
|
92
|
-
Note: More information about you, the company or Khoj apps can be found at https://khoj.dev.
|
93
|
-
Today is {day_of_week}, {current_date} in UTC.
|
94
|
-
""".strip()
|
95
|
-
)
|
96
|
-
|
97
|
-
custom_system_prompt_offline_chat = PromptTemplate.from_template(
|
98
|
-
"""
|
99
|
-
You are {name}, a personal agent on Khoj.
|
100
|
-
- Use your general knowledge and past conversation with the user as context to inform your responses.
|
101
|
-
- If you do not know the answer, say 'I don't know.'
|
102
|
-
- Think step-by-step and ask questions to get the necessary information to answer the user's question.
|
103
|
-
- Ask crisp follow-up questions to get additional context, when the answer cannot be inferred from the provided information or past conversations.
|
104
|
-
- Do not print verbatim Notes unless necessary.
|
105
|
-
|
106
|
-
Note: More information about you, the company or Khoj apps can be found at https://khoj.dev.
|
107
|
-
Today is {day_of_week}, {current_date} in UTC.
|
108
|
-
|
109
|
-
Instructions:\n{bio}
|
110
|
-
""".strip()
|
111
|
-
)
|
112
|
-
|
113
81
|
## Notes Conversation
|
114
82
|
## --
|
115
83
|
notes_conversation = PromptTemplate.from_template(
|
@@ -1,7 +1,6 @@
|
|
1
1
|
import base64
|
2
2
|
import json
|
3
3
|
import logging
|
4
|
-
import math
|
5
4
|
import mimetypes
|
6
5
|
import os
|
7
6
|
import re
|
@@ -18,9 +17,7 @@ import requests
|
|
18
17
|
import tiktoken
|
19
18
|
import yaml
|
20
19
|
from langchain_core.messages.chat import ChatMessage
|
21
|
-
from
|
22
|
-
from llama_cpp.llama import Llama
|
23
|
-
from pydantic import BaseModel, ConfigDict, ValidationError, create_model
|
20
|
+
from pydantic import BaseModel, ConfigDict, ValidationError
|
24
21
|
from transformers import AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
|
25
22
|
|
26
23
|
from khoj.database.adapters import ConversationAdapters
|
@@ -32,7 +29,6 @@ from khoj.database.models import (
|
|
32
29
|
KhojUser,
|
33
30
|
)
|
34
31
|
from khoj.processor.conversation import prompts
|
35
|
-
from khoj.processor.conversation.offline.utils import download_model, infer_max_tokens
|
36
32
|
from khoj.search_filter.base_filter import BaseFilter
|
37
33
|
from khoj.search_filter.date_filter import DateFilter
|
38
34
|
from khoj.search_filter.file_filter import FileFilter
|
@@ -50,7 +46,11 @@ from khoj.utils.yaml import yaml_dump
|
|
50
46
|
logger = logging.getLogger(__name__)
|
51
47
|
|
52
48
|
try:
|
53
|
-
|
49
|
+
import importlib.util
|
50
|
+
|
51
|
+
git_spec = importlib.util.find_spec("git")
|
52
|
+
if git_spec is None:
|
53
|
+
raise ImportError
|
54
54
|
except ImportError:
|
55
55
|
if is_promptrace_enabled():
|
56
56
|
logger.warning("GitPython not installed. `pip install gitpython` to use prompt tracer.")
|
@@ -85,12 +85,6 @@ model_to_prompt_size = {
|
|
85
85
|
"claude-sonnet-4-20250514": 60000,
|
86
86
|
"claude-opus-4-0": 60000,
|
87
87
|
"claude-opus-4-20250514": 60000,
|
88
|
-
# Offline Models
|
89
|
-
"bartowski/Qwen2.5-14B-Instruct-GGUF": 20000,
|
90
|
-
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
|
91
|
-
"bartowski/Llama-3.2-3B-Instruct-GGUF": 20000,
|
92
|
-
"bartowski/gemma-2-9b-it-GGUF": 6000,
|
93
|
-
"bartowski/gemma-2-2b-it-GGUF": 6000,
|
94
88
|
}
|
95
89
|
model_to_tokenizer: Dict[str, str] = {}
|
96
90
|
|
@@ -303,7 +297,7 @@ def construct_chat_history_for_operator(conversation_history: List[ChatMessageMo
|
|
303
297
|
if chat.by == "you" and chat.message:
|
304
298
|
content = [{"type": "text", "text": chat.message}]
|
305
299
|
for file in chat.queryFiles or []:
|
306
|
-
content += [{"type": "text", "text": f
|
300
|
+
content += [{"type": "text", "text": f"## File: {file['name']}\n\n{file['content']}"}]
|
307
301
|
user_message = AgentMessage(role="user", content=content)
|
308
302
|
elif chat.by == "khoj" and chat.message:
|
309
303
|
chat_history += [user_message, AgentMessage(role="assistant", content=chat.message)]
|
@@ -320,7 +314,10 @@ def construct_tool_chat_history(
|
|
320
314
|
If no tool is provided inferred query for all tools used are added.
|
321
315
|
"""
|
322
316
|
chat_history: list = []
|
323
|
-
|
317
|
+
|
318
|
+
def base_extractor(iteration: ResearchIteration) -> List[str]:
|
319
|
+
return []
|
320
|
+
|
324
321
|
extract_inferred_query_map: Dict[ConversationCommand, Callable[[ResearchIteration], List[str]]] = {
|
325
322
|
ConversationCommand.SemanticSearchFiles: (
|
326
323
|
lambda iteration: [c["query"] for c in iteration.context] if iteration.context else []
|
@@ -507,7 +504,7 @@ async def save_to_conversation_log(
|
|
507
504
|
|
508
505
|
logger.info(
|
509
506
|
f"""
|
510
|
-
Saved Conversation Turn ({db_conversation.id if db_conversation else
|
507
|
+
Saved Conversation Turn ({db_conversation.id if db_conversation else "N/A"}):
|
511
508
|
You ({user.username}): "{q}"
|
512
509
|
|
513
510
|
Khoj: "{chat_response}"
|
@@ -573,7 +570,6 @@ def generate_chatml_messages_with_context(
|
|
573
570
|
system_message: str = None,
|
574
571
|
chat_history: list[ChatMessageModel] = [],
|
575
572
|
model_name="gpt-4o-mini",
|
576
|
-
loaded_model: Optional[Llama] = None,
|
577
573
|
max_prompt_size=None,
|
578
574
|
tokenizer_name=None,
|
579
575
|
query_images=None,
|
@@ -588,10 +584,7 @@ def generate_chatml_messages_with_context(
|
|
588
584
|
"""Generate chat messages with appropriate context from previous conversation to send to the chat model"""
|
589
585
|
# Set max prompt size from user config or based on pre-configured for model and machine specs
|
590
586
|
if not max_prompt_size:
|
591
|
-
|
592
|
-
max_prompt_size = infer_max_tokens(loaded_model.n_ctx(), model_to_prompt_size.get(model_name, math.inf))
|
593
|
-
else:
|
594
|
-
max_prompt_size = model_to_prompt_size.get(model_name, 10000)
|
587
|
+
max_prompt_size = model_to_prompt_size.get(model_name, 10000)
|
595
588
|
|
596
589
|
# Scale lookback turns proportional to max prompt size supported by model
|
597
590
|
lookback_turns = max_prompt_size // 750
|
@@ -638,7 +631,7 @@ def generate_chatml_messages_with_context(
|
|
638
631
|
|
639
632
|
if not is_none_or_empty(chat.operatorContext):
|
640
633
|
operator_context = chat.operatorContext
|
641
|
-
operator_content = "\n\n".join([f
|
634
|
+
operator_content = "\n\n".join([f"## Task: {oc['query']}\n{oc['response']}\n" for oc in operator_context])
|
642
635
|
message_context += [
|
643
636
|
{
|
644
637
|
"type": "text",
|
@@ -735,7 +728,7 @@ def generate_chatml_messages_with_context(
|
|
735
728
|
message.content = [{"type": "text", "text": message.content}]
|
736
729
|
|
737
730
|
# Truncate oldest messages from conversation history until under max supported prompt size by model
|
738
|
-
messages = truncate_messages(messages, max_prompt_size, model_name,
|
731
|
+
messages = truncate_messages(messages, max_prompt_size, model_name, tokenizer_name)
|
739
732
|
|
740
733
|
# Return message in chronological order
|
741
734
|
return messages[::-1]
|
@@ -743,26 +736,21 @@ def generate_chatml_messages_with_context(
|
|
743
736
|
|
744
737
|
def get_encoder(
|
745
738
|
model_name: str,
|
746
|
-
loaded_model: Optional[Llama] = None,
|
747
739
|
tokenizer_name=None,
|
748
|
-
) -> tiktoken.Encoding | PreTrainedTokenizer | PreTrainedTokenizerFast
|
740
|
+
) -> tiktoken.Encoding | PreTrainedTokenizer | PreTrainedTokenizerFast:
|
749
741
|
default_tokenizer = "gpt-4o"
|
750
742
|
|
751
743
|
try:
|
752
|
-
if
|
753
|
-
encoder = loaded_model.tokenizer()
|
754
|
-
elif model_name.startswith("gpt-") or model_name.startswith("o1"):
|
755
|
-
# as tiktoken doesn't recognize o1 model series yet
|
756
|
-
encoder = tiktoken.encoding_for_model("gpt-4o" if model_name.startswith("o1") else model_name)
|
757
|
-
elif tokenizer_name:
|
744
|
+
if tokenizer_name:
|
758
745
|
if tokenizer_name in state.pretrained_tokenizers:
|
759
746
|
encoder = state.pretrained_tokenizers[tokenizer_name]
|
760
747
|
else:
|
761
748
|
encoder = AutoTokenizer.from_pretrained(tokenizer_name)
|
762
749
|
state.pretrained_tokenizers[tokenizer_name] = encoder
|
763
750
|
else:
|
764
|
-
|
765
|
-
|
751
|
+
# as tiktoken doesn't recognize o1 model series yet
|
752
|
+
encoder = tiktoken.encoding_for_model("gpt-4o" if model_name.startswith("o1") else model_name)
|
753
|
+
except Exception:
|
766
754
|
encoder = tiktoken.encoding_for_model(default_tokenizer)
|
767
755
|
if state.verbose > 2:
|
768
756
|
logger.debug(
|
@@ -773,7 +761,7 @@ def get_encoder(
|
|
773
761
|
|
774
762
|
def count_tokens(
|
775
763
|
message_content: str | list[str | dict],
|
776
|
-
encoder: PreTrainedTokenizer | PreTrainedTokenizerFast |
|
764
|
+
encoder: PreTrainedTokenizer | PreTrainedTokenizerFast | tiktoken.Encoding,
|
777
765
|
) -> int:
|
778
766
|
"""
|
779
767
|
Count the total number of tokens in a list of messages.
|
@@ -825,11 +813,10 @@ def truncate_messages(
|
|
825
813
|
messages: list[ChatMessage],
|
826
814
|
max_prompt_size: int,
|
827
815
|
model_name: str,
|
828
|
-
loaded_model: Optional[Llama] = None,
|
829
816
|
tokenizer_name=None,
|
830
817
|
) -> list[ChatMessage]:
|
831
818
|
"""Truncate messages to fit within max prompt size supported by model"""
|
832
|
-
encoder = get_encoder(model_name,
|
819
|
+
encoder = get_encoder(model_name, tokenizer_name)
|
833
820
|
|
834
821
|
# Extract system message from messages
|
835
822
|
system_message = None
|
@@ -865,9 +852,9 @@ def truncate_messages(
|
|
865
852
|
total_tokens, _ = count_total_tokens(messages, encoder, system_message)
|
866
853
|
if total_tokens > max_prompt_size:
|
867
854
|
# At this point, a single message with a single content part of type dict should remain
|
868
|
-
assert (
|
869
|
-
|
870
|
-
)
|
855
|
+
assert len(messages) == 1 and len(messages[0].content) == 1 and isinstance(messages[0].content[0], dict), (
|
856
|
+
"Expected a single message with a single content part remaining at this point in truncation"
|
857
|
+
)
|
871
858
|
|
872
859
|
# Collate message content into single string to ease truncation
|
873
860
|
part = messages[0].content[0]
|
khoj/processor/embeddings.py
CHANGED
khoj/processor/image/generate.py
CHANGED
@@ -108,12 +108,12 @@ async def text_to_image(
|
|
108
108
|
if "content_policy_violation" in e.message:
|
109
109
|
logger.error(f"Image Generation blocked by OpenAI: {e}")
|
110
110
|
status_code = e.status_code # type: ignore
|
111
|
-
message =
|
111
|
+
message = "Image generation blocked by OpenAI due to policy violation" # type: ignore
|
112
112
|
yield image_url or image, status_code, message
|
113
113
|
return
|
114
114
|
else:
|
115
115
|
logger.error(f"Image Generation failed with {e}", exc_info=True)
|
116
|
-
message =
|
116
|
+
message = "Image generation failed using OpenAI" # type: ignore
|
117
117
|
status_code = e.status_code # type: ignore
|
118
118
|
yield image_url or image, status_code, message
|
119
119
|
return
|
@@ -199,7 +199,7 @@ def generate_image_with_stability(
|
|
199
199
|
|
200
200
|
# Call Stability AI API to generate image
|
201
201
|
response = requests.post(
|
202
|
-
|
202
|
+
"https://api.stability.ai/v2beta/stable-image/generate/sd3",
|
203
203
|
headers={"authorization": f"Bearer {text_to_image_config.api_key}", "accept": "image/*"},
|
204
204
|
files={"none": ""},
|
205
205
|
data={
|
@@ -11,7 +11,7 @@ from khoj.processor.conversation.utils import (
|
|
11
11
|
OperatorRun,
|
12
12
|
construct_chat_history_for_operator,
|
13
13
|
)
|
14
|
-
from khoj.processor.operator.operator_actions import
|
14
|
+
from khoj.processor.operator.operator_actions import RequestUserAction
|
15
15
|
from khoj.processor.operator.operator_agent_anthropic import AnthropicOperatorAgent
|
16
16
|
from khoj.processor.operator.operator_agent_base import OperatorAgent
|
17
17
|
from khoj.processor.operator.operator_agent_binary import BinaryOperatorAgent
|
@@ -59,7 +59,7 @@ async def operate_environment(
|
|
59
59
|
if not reasoning_model or not reasoning_model.vision_enabled:
|
60
60
|
reasoning_model = await ConversationAdapters.aget_vision_enabled_config()
|
61
61
|
if not reasoning_model:
|
62
|
-
raise ValueError(
|
62
|
+
raise ValueError("No vision enabled chat model found. Configure a vision chat model to operate environment.")
|
63
63
|
|
64
64
|
# Create conversation history from conversation log
|
65
65
|
chat_history = construct_chat_history_for_operator(conversation_log)
|
@@ -235,7 +235,6 @@ def is_operator_model(model: str) -> ChatModel.ModelType | None:
|
|
235
235
|
"claude-3-7-sonnet": ChatModel.ModelType.ANTHROPIC,
|
236
236
|
"claude-sonnet-4": ChatModel.ModelType.ANTHROPIC,
|
237
237
|
"claude-opus-4": ChatModel.ModelType.ANTHROPIC,
|
238
|
-
"ui-tars-1.5": ChatModel.ModelType.OFFLINE,
|
239
238
|
}
|
240
239
|
for operator_model in operator_models:
|
241
240
|
if model.startswith(operator_model):
|
@@ -1,14 +1,27 @@
|
|
1
1
|
import json
|
2
2
|
import logging
|
3
3
|
from textwrap import dedent
|
4
|
+
from typing import List, Optional
|
4
5
|
|
5
6
|
from openai import AzureOpenAI, OpenAI
|
6
7
|
from openai.types.chat import ChatCompletion, ChatCompletionMessage
|
7
8
|
|
8
9
|
from khoj.database.models import ChatModel
|
9
10
|
from khoj.processor.conversation.utils import construct_structured_message
|
10
|
-
from khoj.processor.operator.operator_actions import
|
11
|
-
|
11
|
+
from khoj.processor.operator.operator_actions import (
|
12
|
+
BackAction,
|
13
|
+
ClickAction,
|
14
|
+
DoubleClickAction,
|
15
|
+
DragAction,
|
16
|
+
GotoAction,
|
17
|
+
KeypressAction,
|
18
|
+
OperatorAction,
|
19
|
+
Point,
|
20
|
+
ScreenshotAction,
|
21
|
+
ScrollAction,
|
22
|
+
TypeAction,
|
23
|
+
WaitAction,
|
24
|
+
)
|
12
25
|
from khoj.processor.operator.operator_environment_base import EnvironmentType, EnvState
|
13
26
|
from khoj.utils.helpers import get_chat_usage_metrics
|
14
27
|
|
@@ -18,7 +18,22 @@ from openai import AsyncAzureOpenAI, AsyncOpenAI
|
|
18
18
|
from openai.types.chat import ChatCompletion
|
19
19
|
from PIL import Image
|
20
20
|
|
21
|
-
from khoj.processor.operator.operator_actions import
|
21
|
+
from khoj.processor.operator.operator_actions import (
|
22
|
+
BackAction,
|
23
|
+
ClickAction,
|
24
|
+
DoubleClickAction,
|
25
|
+
DragAction,
|
26
|
+
GotoAction,
|
27
|
+
KeyDownAction,
|
28
|
+
KeypressAction,
|
29
|
+
KeyUpAction,
|
30
|
+
MoveAction,
|
31
|
+
OperatorAction,
|
32
|
+
RequestUserAction,
|
33
|
+
ScrollAction,
|
34
|
+
TypeAction,
|
35
|
+
WaitAction,
|
36
|
+
)
|
22
37
|
from khoj.processor.operator.operator_environment_base import EnvironmentType, EnvState
|
23
38
|
from khoj.utils.helpers import get_chat_usage_metrics
|
24
39
|
|
@@ -122,11 +137,10 @@ class GroundingAgentUitars:
|
|
122
137
|
)
|
123
138
|
|
124
139
|
temperature = self.temperature
|
125
|
-
top_k = self.top_k
|
126
140
|
try_times = 3
|
127
141
|
while not parsed_responses:
|
128
142
|
if try_times <= 0:
|
129
|
-
logger.warning(
|
143
|
+
logger.warning("Reach max retry times to fetch response from client, as error flag.")
|
130
144
|
return "client error\nFAIL", []
|
131
145
|
try:
|
132
146
|
message_content = "\n".join([msg["content"][0].get("text") or "[image]" for msg in messages])
|
@@ -163,7 +177,6 @@ class GroundingAgentUitars:
|
|
163
177
|
prediction = None
|
164
178
|
try_times -= 1
|
165
179
|
temperature = 1
|
166
|
-
top_k = -1
|
167
180
|
|
168
181
|
if prediction is None:
|
169
182
|
return "client error\nFAIL", []
|
@@ -264,9 +277,9 @@ class GroundingAgentUitars:
|
|
264
277
|
raise ValueError(f"Unsupported environment type: {environment_type}")
|
265
278
|
|
266
279
|
def _format_messages_for_api(self, instruction: str, current_state: EnvState):
|
267
|
-
assert len(self.observations) == len(self.actions) and len(self.actions) == len(
|
268
|
-
|
269
|
-
)
|
280
|
+
assert len(self.observations) == len(self.actions) and len(self.actions) == len(self.thoughts), (
|
281
|
+
"The number of observations and actions should be the same."
|
282
|
+
)
|
270
283
|
|
271
284
|
self.history_images.append(base64.b64decode(current_state.screenshot))
|
272
285
|
self.observations.append({"screenshot": current_state.screenshot, "accessibility_tree": None})
|
@@ -524,7 +537,7 @@ class GroundingAgentUitars:
|
|
524
537
|
parsed_actions = [self.parse_action_string(action.replace("\n", "\\n").lstrip()) for action in all_action]
|
525
538
|
actions: list[dict] = []
|
526
539
|
for action_instance, raw_str in zip(parsed_actions, all_action):
|
527
|
-
if action_instance
|
540
|
+
if action_instance is None:
|
528
541
|
print(f"Action can't parse: {raw_str}")
|
529
542
|
raise ValueError(f"Action can't parse: {raw_str}")
|
530
543
|
action_type = action_instance["function"]
|
@@ -756,7 +769,7 @@ class GroundingAgentUitars:
|
|
756
769
|
The pyautogui code string
|
757
770
|
"""
|
758
771
|
|
759
|
-
pyautogui_code =
|
772
|
+
pyautogui_code = "import pyautogui\nimport time\n"
|
760
773
|
actions = []
|
761
774
|
if isinstance(responses, dict):
|
762
775
|
responses = [responses]
|
@@ -774,7 +787,7 @@ class GroundingAgentUitars:
|
|
774
787
|
if response_id == 0:
|
775
788
|
pyautogui_code += f"'''\nObservation:\n{observation}\n\nThought:\n{thought}\n'''\n"
|
776
789
|
else:
|
777
|
-
pyautogui_code +=
|
790
|
+
pyautogui_code += "\ntime.sleep(1)\n"
|
778
791
|
|
779
792
|
action_dict = response
|
780
793
|
action_type = action_dict.get("action_type")
|
@@ -846,17 +859,17 @@ class GroundingAgentUitars:
|
|
846
859
|
if content:
|
847
860
|
if input_swap:
|
848
861
|
actions += TypeAction()
|
849
|
-
pyautogui_code +=
|
862
|
+
pyautogui_code += "\nimport pyperclip"
|
850
863
|
pyautogui_code += f"\npyperclip.copy('{stripped_content}')"
|
851
|
-
pyautogui_code +=
|
852
|
-
pyautogui_code +=
|
864
|
+
pyautogui_code += "\npyautogui.hotkey('ctrl', 'v')"
|
865
|
+
pyautogui_code += "\ntime.sleep(0.5)\n"
|
853
866
|
if content.endswith("\n") or content.endswith("\\n"):
|
854
|
-
pyautogui_code +=
|
867
|
+
pyautogui_code += "\npyautogui.press('enter')"
|
855
868
|
else:
|
856
869
|
pyautogui_code += f"\npyautogui.write('{stripped_content}', interval=0.1)"
|
857
|
-
pyautogui_code +=
|
870
|
+
pyautogui_code += "\ntime.sleep(0.5)\n"
|
858
871
|
if content.endswith("\n") or content.endswith("\\n"):
|
859
|
-
pyautogui_code +=
|
872
|
+
pyautogui_code += "\npyautogui.press('enter')"
|
860
873
|
|
861
874
|
elif action_type in ["drag", "select"]:
|
862
875
|
# Parsing drag or select action based on start and end_boxes
|
@@ -869,9 +882,7 @@ class GroundingAgentUitars:
|
|
869
882
|
x1, y1, x2, y2 = eval(end_box) # Assuming box is in [x1, y1, x2, y2]
|
870
883
|
ex = round(float((x1 + x2) / 2) * image_width, 3)
|
871
884
|
ey = round(float((y1 + y2) / 2) * image_height, 3)
|
872
|
-
pyautogui_code += (
|
873
|
-
f"\npyautogui.moveTo({sx}, {sy})\n" f"\npyautogui.dragTo({ex}, {ey}, duration=1.0)\n"
|
874
|
-
)
|
885
|
+
pyautogui_code += f"\npyautogui.moveTo({sx}, {sy})\n\npyautogui.dragTo({ex}, {ey}, duration=1.0)\n"
|
875
886
|
|
876
887
|
elif action_type == "scroll":
|
877
888
|
# Parsing scroll action
|
@@ -888,11 +899,11 @@ class GroundingAgentUitars:
|
|
888
899
|
y = None
|
889
900
|
direction = action_inputs.get("direction", "")
|
890
901
|
|
891
|
-
if x
|
902
|
+
if x is None:
|
892
903
|
if "up" in direction.lower():
|
893
|
-
pyautogui_code +=
|
904
|
+
pyautogui_code += "\npyautogui.scroll(5)"
|
894
905
|
elif "down" in direction.lower():
|
895
|
-
pyautogui_code +=
|
906
|
+
pyautogui_code += "\npyautogui.scroll(-5)"
|
896
907
|
else:
|
897
908
|
if "up" in direction.lower():
|
898
909
|
pyautogui_code += f"\npyautogui.scroll(5, x={x}, y={y})"
|
@@ -923,7 +934,7 @@ class GroundingAgentUitars:
|
|
923
934
|
pyautogui_code += f"\npyautogui.moveTo({x}, {y})"
|
924
935
|
|
925
936
|
elif action_type in ["finished"]:
|
926
|
-
pyautogui_code =
|
937
|
+
pyautogui_code = "DONE"
|
927
938
|
|
928
939
|
else:
|
929
940
|
pyautogui_code += f"\n# Unrecognized action type: {action_type}"
|