khoj 2.0.0b12__py3-none-any.whl → 2.0.0b13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/app/README.md +1 -1
- khoj/app/urls.py +1 -0
- khoj/configure.py +21 -54
- khoj/database/adapters/__init__.py +6 -15
- khoj/database/management/commands/delete_orphaned_fileobjects.py +0 -1
- khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +1 -1
- khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +1 -1
- khoj/database/migrations/0092_alter_chatmodel_model_type_alter_chatmodel_name_and_more.py +36 -0
- khoj/database/migrations/0093_remove_localorgconfig_user_and_more.py +36 -0
- khoj/database/models/__init__.py +10 -40
- khoj/database/tests.py +0 -2
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/{9245.a04e92d034540234.js → 1225.ecac11e7421504c4.js} +3 -3
- khoj/interface/compiled/_next/static/chunks/1320.ae930ad00affe685.js +5 -0
- khoj/interface/compiled/_next/static/chunks/{1327-3b1a41af530fa8ee.js → 1327-e254819a9172cfa7.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/1626.15a8acc0d6639ec6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{3489.c523fe96a2eee74f.js → 1940.d082758bd04e08ae.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{2327-ea623ca2d22f78e9.js → 2327-438aaec1657c5ada.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/2475.57a0d0fd93d07af0.js +93 -0
- khoj/interface/compiled/_next/static/chunks/2481.5ce6524ba0a73f90.js +55 -0
- khoj/interface/compiled/_next/static/chunks/297.4c4c823ff6e3255b.js +174 -0
- khoj/interface/compiled/_next/static/chunks/{5639-09e2009a2adedf8b.js → 3260-82d2521fab032ff1.js} +68 -23
- khoj/interface/compiled/_next/static/chunks/3353.1c6d553216a1acae.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3855.f7b8131f78af046e.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3973.dc54a39586ab48be.js +1 -0
- khoj/interface/compiled/_next/static/chunks/4241.c1cd170f7f37ac59.js +24 -0
- khoj/interface/compiled/_next/static/chunks/{4327.8d2a1b8f1ea78208.js → 4327.f3704dc398c67113.js} +19 -19
- khoj/interface/compiled/_next/static/chunks/4505.f09454a346269c3f.js +117 -0
- khoj/interface/compiled/_next/static/chunks/4801.96a152d49742b644.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5427-a95ec748e52abb75.js +1 -0
- khoj/interface/compiled/_next/static/chunks/549.2bd27f59a91a9668.js +148 -0
- khoj/interface/compiled/_next/static/chunks/5765.71b1e1207b76b03f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/584.d7ce3505f169b706.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6240.34f7c1fa692edd61.js +24 -0
- khoj/interface/compiled/_next/static/chunks/6d3fe5a5-f9f3c16e0bc0cdf9.js +10 -0
- khoj/interface/compiled/_next/static/chunks/{7127-0f4a2a77d97fb5fa.js → 7127-97b83757db125ba6.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/7200-93ab0072359b8028.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{2612.bcf5a623b3da209e.js → 7553.f5ad54b1f6e92c49.js} +2 -2
- khoj/interface/compiled/_next/static/chunks/7626-1b630f1654172341.js +1 -0
- khoj/interface/compiled/_next/static/chunks/764.dadd316e8e16d191.js +63 -0
- khoj/interface/compiled/_next/static/chunks/78.08169ab541abab4f.js +43 -0
- khoj/interface/compiled/_next/static/chunks/784.e03acf460df213d1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{9537-d9ab442ce15d1e20.js → 8072-e1440cb482a0940e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{3265.924139c4146ee344.js → 8086.8d39887215807fcd.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/8168.f074ab8c7c16d82d.js +59 -0
- khoj/interface/compiled/_next/static/chunks/{8694.2bd9c2f65d8c5847.js → 8223.1705878fa7a09292.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/8483.94f6c9e2bee86f50.js +215 -0
- khoj/interface/compiled/_next/static/chunks/{8888.ebe0e552b59e7fed.js → 8810.fc0e479de78c7c61.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/8828.bc74dc4ce94e78f6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{7303.d0612f812a967a08.js → 8909.14ac3f43d0070cf1.js} +5 -5
- khoj/interface/compiled/_next/static/chunks/90542734.b1a1629065ba199b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9167.098534184f03fe92.js +56 -0
- khoj/interface/compiled/_next/static/chunks/{4980.63500d68b3bb1222.js → 9537.e934ce37bf314509.js} +5 -5
- khoj/interface/compiled/_next/static/chunks/9574.3fe8e26e95bf1c34.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9599.ec50b5296c27dae9.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9643.b34248df52ffc77c.js +262 -0
- khoj/interface/compiled/_next/static/chunks/9747.2fd9065b1435abb1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9922.98f2b2a9959b4ebe.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e49165209d2e406c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/page-e291b49977f43880.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/page-198b26df6e09bbb0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-33934fc2d6ae6838.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/{page-4bc2938df5d57981.js → page-dfcc1e8e2ad62873.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-a19a597629e87fb8.js → page-1567cac7b79a7c59.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/{page-fa366ac14b228688.js → page-3639e50ec3e9acfd.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-8f9a85f96088c18b.js → page-6081362437c82470.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-6fb51c5c80f8ec67.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-ed7787cf4938b8e3.js → page-e0dcb1762f8c8f88.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/webpack-5393aad3d824e0cb.js +1 -0
- khoj/interface/compiled/_next/static/css/{a0c2fd63bb396f04.css → 23b26df423cd8a9c.css} +1 -1
- khoj/interface/compiled/_next/static/css/{93eeacc43e261162.css → c34713c98384ee87.css} +1 -1
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +3 -3
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +4 -4
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +3 -3
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +3 -3
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +3 -3
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +5 -5
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +3 -3
- khoj/main.py +7 -9
- khoj/manage.py +1 -0
- khoj/processor/content/github/github_to_entries.py +6 -7
- khoj/processor/content/images/image_to_entries.py +0 -1
- khoj/processor/content/markdown/markdown_to_entries.py +2 -3
- khoj/processor/content/notion/notion_to_entries.py +5 -6
- khoj/processor/content/org_mode/org_to_entries.py +4 -5
- khoj/processor/content/org_mode/orgnode.py +4 -4
- khoj/processor/content/plaintext/plaintext_to_entries.py +1 -2
- khoj/processor/content/text_to_entries.py +1 -3
- khoj/processor/conversation/google/utils.py +3 -3
- khoj/processor/conversation/openai/gpt.py +65 -28
- khoj/processor/conversation/openai/utils.py +359 -28
- khoj/processor/conversation/prompts.py +16 -41
- khoj/processor/conversation/utils.py +29 -39
- khoj/processor/embeddings.py +0 -2
- khoj/processor/image/generate.py +3 -3
- khoj/processor/operator/__init__.py +2 -3
- khoj/processor/operator/grounding_agent.py +15 -2
- khoj/processor/operator/grounding_agent_uitars.py +34 -23
- khoj/processor/operator/operator_agent_anthropic.py +29 -4
- khoj/processor/operator/operator_agent_base.py +1 -1
- khoj/processor/operator/operator_agent_binary.py +4 -4
- khoj/processor/operator/operator_agent_openai.py +21 -6
- khoj/processor/operator/operator_environment_browser.py +1 -1
- khoj/processor/operator/operator_environment_computer.py +1 -1
- khoj/processor/speech/text_to_speech.py +0 -1
- khoj/processor/tools/online_search.py +1 -1
- khoj/processor/tools/run_code.py +1 -1
- khoj/routers/api.py +2 -15
- khoj/routers/api_agents.py +1 -2
- khoj/routers/api_automation.py +1 -1
- khoj/routers/api_chat.py +10 -16
- khoj/routers/api_content.py +3 -111
- khoj/routers/api_model.py +0 -1
- khoj/routers/api_subscription.py +1 -1
- khoj/routers/email.py +4 -4
- khoj/routers/helpers.py +44 -103
- khoj/routers/research.py +8 -8
- khoj/search_filter/base_filter.py +2 -4
- khoj/search_type/text_search.py +1 -2
- khoj/utils/cli.py +5 -53
- khoj/utils/config.py +0 -65
- khoj/utils/constants.py +6 -7
- khoj/utils/helpers.py +10 -18
- khoj/utils/initialization.py +7 -48
- khoj/utils/models.py +2 -4
- khoj/utils/rawconfig.py +1 -69
- khoj/utils/state.py +2 -8
- khoj/utils/yaml.py +0 -39
- {khoj-2.0.0b12.dist-info → khoj-2.0.0b13.dist-info}/METADATA +3 -3
- {khoj-2.0.0b12.dist-info → khoj-2.0.0b13.dist-info}/RECORD +149 -158
- khoj/interface/compiled/_next/static/chunks/1191.b547ec13349b4aed.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1588.f0558a0bdffc4761.js +0 -117
- khoj/interface/compiled/_next/static/chunks/1918.925cb4a35518d258.js +0 -43
- khoj/interface/compiled/_next/static/chunks/2849.dc00ae5ba7219cfc.js +0 -1
- khoj/interface/compiled/_next/static/chunks/303.fe76de943e930fbd.js +0 -1
- khoj/interface/compiled/_next/static/chunks/4533.586e74b45a2bde25.js +0 -55
- khoj/interface/compiled/_next/static/chunks/4551.82ce1476b5516bc2.js +0 -5
- khoj/interface/compiled/_next/static/chunks/4748.0edd37cba3ea2809.js +0 -59
- khoj/interface/compiled/_next/static/chunks/5210.cd35a1c1ec594a20.js +0 -93
- khoj/interface/compiled/_next/static/chunks/5329.f8b3c5b3d16159cd.js +0 -1
- khoj/interface/compiled/_next/static/chunks/5427-13d6ffd380fdfab7.js +0 -1
- khoj/interface/compiled/_next/static/chunks/558-c14e76cff03f6a60.js +0 -1
- khoj/interface/compiled/_next/static/chunks/5830.8876eccb82da9b7d.js +0 -262
- khoj/interface/compiled/_next/static/chunks/6230.88a71d8145347b3f.js +0 -1
- khoj/interface/compiled/_next/static/chunks/7161.77e0530a40ad5ca8.js +0 -1
- khoj/interface/compiled/_next/static/chunks/7200-ac3b2e37ff30e126.js +0 -1
- khoj/interface/compiled/_next/static/chunks/7505.c31027a3695bdebb.js +0 -148
- khoj/interface/compiled/_next/static/chunks/7760.35649cc21d9585bd.js +0 -56
- khoj/interface/compiled/_next/static/chunks/83.48e2db193a940052.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8427.844694e06133fb51.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8665.4db7e6b2e8933497.js +0 -174
- khoj/interface/compiled/_next/static/chunks/872.caf84cc1a39ae59f.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8890.6e8a59e4de6978bc.js +0 -215
- khoj/interface/compiled/_next/static/chunks/8950.5f2272e0ac923f9e.js +0 -1
- khoj/interface/compiled/_next/static/chunks/90542734.2c21f16f18b22411.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9202.c703864fcedc8d1f.js +0 -63
- khoj/interface/compiled/_next/static/chunks/9320.6aca4885d541aa44.js +0 -24
- khoj/interface/compiled/_next/static/chunks/9535.f78cd92d03331e55.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9968.b111fc002796da81.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-4e2a134ec26aa606.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/page-5db6ad18da10d353.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/page-6271e2e31c7571d1.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/layout-ad4d1792ab1a4108.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-f5881c7ae3ba0795.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-abb6c5f4239ad7be.js +0 -1
- khoj/interface/compiled/_next/static/chunks/f3e3247b-1758d4651e4457c2.js +0 -10
- khoj/interface/compiled/_next/static/chunks/webpack-4b00e5a0da4a9dae.js +0 -1
- khoj/migrations/__init__.py +0 -0
- khoj/migrations/migrate_offline_chat_default_model.py +0 -69
- khoj/migrations/migrate_offline_chat_default_model_2.py +0 -71
- khoj/migrations/migrate_offline_chat_schema.py +0 -83
- khoj/migrations/migrate_offline_model.py +0 -29
- khoj/migrations/migrate_processor_config_openai.py +0 -67
- khoj/migrations/migrate_server_pg.py +0 -132
- khoj/migrations/migrate_version.py +0 -17
- khoj/processor/conversation/offline/__init__.py +0 -0
- khoj/processor/conversation/offline/chat_model.py +0 -224
- khoj/processor/conversation/offline/utils.py +0 -80
- khoj/processor/conversation/offline/whisper.py +0 -15
- khoj/utils/fs_syncer.py +0 -252
- /khoj/interface/compiled/_next/static/{TTch40tYWOfh0SzwjwZXV → RYbQvo3AvgOR0bEVVfxF4}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{TTch40tYWOfh0SzwjwZXV → RYbQvo3AvgOR0bEVVfxF4}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/{1915-fbfe167c84ad60c5.js → 1915-5c6508f6ebb62a30.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2117-e78b6902ad6f75ec.js → 2117-080746c8e170c81a.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{2939-4d4084c5b888b960.js → 2939-4af3fd24b8ffc9ad.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{4447-d6cf93724d57e34b.js → 4447-cd95608f8e93e711.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{8667-4b7790573b08c50d.js → 8667-50b03a89e82e0ba7.js} +0 -0
- /khoj/interface/compiled/_next/static/chunks/{9139-ce1ae935dac9c871.js → 9139-8ac4d9feb10f8869.js} +0 -0
- {khoj-2.0.0b12.dist-info → khoj-2.0.0b13.dist-info}/WHEEL +0 -0
- {khoj-2.0.0b12.dist-info → khoj-2.0.0b13.dist-info}/entry_points.txt +0 -0
- {khoj-2.0.0b12.dist-info → khoj-2.0.0b13.dist-info}/licenses/LICENSE +0 -0
khoj/routers/helpers.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
import asyncio
|
2
2
|
import base64
|
3
|
-
import concurrent.futures
|
4
3
|
import fnmatch
|
5
4
|
import hashlib
|
6
5
|
import json
|
@@ -47,14 +46,12 @@ from khoj.database.adapters import (
|
|
47
46
|
EntryAdapters,
|
48
47
|
FileObjectAdapters,
|
49
48
|
aget_user_by_email,
|
50
|
-
ais_user_subscribed,
|
51
49
|
create_khoj_token,
|
52
50
|
get_default_search_model,
|
53
51
|
get_khoj_tokens,
|
54
52
|
get_user_name,
|
55
53
|
get_user_notion_config,
|
56
54
|
get_user_subscription_state,
|
57
|
-
is_user_subscribed,
|
58
55
|
run_with_process_lock,
|
59
56
|
)
|
60
57
|
from khoj.database.models import (
|
@@ -89,10 +86,6 @@ from khoj.processor.conversation.google.gemini_chat import (
|
|
89
86
|
converse_gemini,
|
90
87
|
gemini_send_message_to_model,
|
91
88
|
)
|
92
|
-
from khoj.processor.conversation.offline.chat_model import (
|
93
|
-
converse_offline,
|
94
|
-
send_message_to_model_offline,
|
95
|
-
)
|
96
89
|
from khoj.processor.conversation.openai.gpt import (
|
97
90
|
converse_openai,
|
98
91
|
send_message_to_model,
|
@@ -117,7 +110,6 @@ from khoj.search_filter.file_filter import FileFilter
|
|
117
110
|
from khoj.search_filter.word_filter import WordFilter
|
118
111
|
from khoj.search_type import text_search
|
119
112
|
from khoj.utils import state
|
120
|
-
from khoj.utils.config import OfflineChatProcessorModel
|
121
113
|
from khoj.utils.helpers import (
|
122
114
|
LRU,
|
123
115
|
ConversationCommand,
|
@@ -165,17 +157,9 @@ def validate_chat_model(user: KhojUser):
|
|
165
157
|
|
166
158
|
async def is_ready_to_chat(user: KhojUser):
|
167
159
|
user_chat_model = await ConversationAdapters.aget_user_chat_model(user)
|
168
|
-
if user_chat_model
|
160
|
+
if user_chat_model is None:
|
169
161
|
user_chat_model = await ConversationAdapters.aget_default_chat_model(user)
|
170
162
|
|
171
|
-
if user_chat_model and user_chat_model.model_type == ChatModel.ModelType.OFFLINE:
|
172
|
-
chat_model_name = user_chat_model.name
|
173
|
-
max_tokens = user_chat_model.max_prompt_size
|
174
|
-
if state.offline_chat_processor_config is None:
|
175
|
-
logger.info("Loading Offline Chat Model...")
|
176
|
-
state.offline_chat_processor_config = OfflineChatProcessorModel(chat_model_name, max_tokens)
|
177
|
-
return True
|
178
|
-
|
179
163
|
if (
|
180
164
|
user_chat_model
|
181
165
|
and (
|
@@ -231,7 +215,6 @@ def update_telemetry_state(
|
|
231
215
|
telemetry_type=telemetry_type,
|
232
216
|
api=api,
|
233
217
|
client=client,
|
234
|
-
app_config=state.config.app,
|
235
218
|
disable_telemetry_env=state.telemetry_disabled,
|
236
219
|
properties=user_state,
|
237
220
|
)
|
@@ -595,7 +578,7 @@ async def generate_online_subqueries(
|
|
595
578
|
)
|
596
579
|
return {q}
|
597
580
|
return response
|
598
|
-
except Exception
|
581
|
+
except Exception:
|
599
582
|
logger.error(f"Invalid response for constructing online subqueries: {response}. Returning original query: {q}")
|
600
583
|
return {q}
|
601
584
|
|
@@ -1186,8 +1169,8 @@ async def search_documents(
|
|
1186
1169
|
agent_has_entries = await sync_to_async(EntryAdapters.agent_has_entries)(agent=agent)
|
1187
1170
|
|
1188
1171
|
if (
|
1189
|
-
|
1190
|
-
and
|
1172
|
+
ConversationCommand.Notes not in conversation_commands
|
1173
|
+
and ConversationCommand.Default not in conversation_commands
|
1191
1174
|
and not agent_has_entries
|
1192
1175
|
):
|
1193
1176
|
yield compiled_references, inferred_queries, q
|
@@ -1281,6 +1264,7 @@ async def extract_questions(
|
|
1281
1264
|
location_data: LocationData = None,
|
1282
1265
|
query_images: Optional[List[str]] = None,
|
1283
1266
|
query_files: str = None,
|
1267
|
+
max_queries: int = 5,
|
1284
1268
|
tracer: dict = {},
|
1285
1269
|
):
|
1286
1270
|
"""
|
@@ -1310,14 +1294,20 @@ async def extract_questions(
|
|
1310
1294
|
location=location,
|
1311
1295
|
username=username,
|
1312
1296
|
personality_context=personality_context,
|
1297
|
+
max_queries=max_queries,
|
1313
1298
|
)
|
1314
1299
|
|
1315
1300
|
prompt = prompts.extract_questions_user_message.format(text=query, chat_history=chat_history_str)
|
1316
1301
|
|
1317
1302
|
class DocumentQueries(BaseModel):
|
1318
|
-
"""Choose
|
1303
|
+
"""Choose semantic search queries to run on user documents."""
|
1319
1304
|
|
1320
|
-
queries: List[str] = Field(
|
1305
|
+
queries: List[str] = Field(
|
1306
|
+
...,
|
1307
|
+
min_length=1,
|
1308
|
+
max_length=max_queries,
|
1309
|
+
description="List of semantic search queries to run on user documents.",
|
1310
|
+
)
|
1321
1311
|
|
1322
1312
|
raw_response = await send_message_to_model_wrapper(
|
1323
1313
|
system_message=system_prompt,
|
@@ -1339,8 +1329,8 @@ async def extract_questions(
|
|
1339
1329
|
logger.error(f"Invalid response for constructing subqueries: {response}")
|
1340
1330
|
return [query]
|
1341
1331
|
return queries
|
1342
|
-
except:
|
1343
|
-
logger.warning(
|
1332
|
+
except Exception:
|
1333
|
+
logger.warning("LLM returned invalid JSON. Falling back to using user message as search query.")
|
1344
1334
|
return [query]
|
1345
1335
|
|
1346
1336
|
|
@@ -1365,7 +1355,7 @@ async def execute_search(
|
|
1365
1355
|
return results
|
1366
1356
|
|
1367
1357
|
if q is None or q == "":
|
1368
|
-
logger.warning(
|
1358
|
+
logger.warning("No query param (q) passed in API call to initiate search")
|
1369
1359
|
return results
|
1370
1360
|
|
1371
1361
|
# initialize variables
|
@@ -1378,7 +1368,7 @@ async def execute_search(
|
|
1378
1368
|
if user:
|
1379
1369
|
query_cache_key = f"{user_query}-{n}-{t}-{r}-{max_distance}-{dedupe}"
|
1380
1370
|
if query_cache_key in state.query_cache[user.uuid]:
|
1381
|
-
logger.debug(
|
1371
|
+
logger.debug("Return response from query cache")
|
1382
1372
|
return state.query_cache[user.uuid][query_cache_key]
|
1383
1373
|
|
1384
1374
|
# Encode query with filter terms removed
|
@@ -1470,12 +1460,6 @@ async def send_message_to_model_wrapper(
|
|
1470
1460
|
vision_available = chat_model.vision_enabled
|
1471
1461
|
api_key = chat_model.ai_model_api.api_key
|
1472
1462
|
api_base_url = chat_model.ai_model_api.api_base_url
|
1473
|
-
loaded_model = None
|
1474
|
-
|
1475
|
-
if model_type == ChatModel.ModelType.OFFLINE:
|
1476
|
-
if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None:
|
1477
|
-
state.offline_chat_processor_config = OfflineChatProcessorModel(chat_model_name, max_tokens)
|
1478
|
-
loaded_model = state.offline_chat_processor_config.loaded_model
|
1479
1463
|
|
1480
1464
|
truncated_messages = generate_chatml_messages_with_context(
|
1481
1465
|
user_message=query,
|
@@ -1483,7 +1467,6 @@ async def send_message_to_model_wrapper(
|
|
1483
1467
|
system_message=system_message,
|
1484
1468
|
chat_history=chat_history,
|
1485
1469
|
model_name=chat_model_name,
|
1486
|
-
loaded_model=loaded_model,
|
1487
1470
|
tokenizer_name=tokenizer,
|
1488
1471
|
max_prompt_size=max_tokens,
|
1489
1472
|
vision_enabled=vision_available,
|
@@ -1492,18 +1475,7 @@ async def send_message_to_model_wrapper(
|
|
1492
1475
|
query_files=query_files,
|
1493
1476
|
)
|
1494
1477
|
|
1495
|
-
if model_type == ChatModel.ModelType.
|
1496
|
-
return send_message_to_model_offline(
|
1497
|
-
messages=truncated_messages,
|
1498
|
-
loaded_model=loaded_model,
|
1499
|
-
model_name=chat_model_name,
|
1500
|
-
max_prompt_size=max_tokens,
|
1501
|
-
streaming=False,
|
1502
|
-
response_type=response_type,
|
1503
|
-
tracer=tracer,
|
1504
|
-
)
|
1505
|
-
|
1506
|
-
elif model_type == ChatModel.ModelType.OPENAI:
|
1478
|
+
if model_type == ChatModel.ModelType.OPENAI:
|
1507
1479
|
return send_message_to_model(
|
1508
1480
|
messages=truncated_messages,
|
1509
1481
|
api_key=api_key,
|
@@ -1565,19 +1537,12 @@ def send_message_to_model_wrapper_sync(
|
|
1565
1537
|
vision_available = chat_model.vision_enabled
|
1566
1538
|
api_key = chat_model.ai_model_api.api_key
|
1567
1539
|
api_base_url = chat_model.ai_model_api.api_base_url
|
1568
|
-
loaded_model = None
|
1569
|
-
|
1570
|
-
if model_type == ChatModel.ModelType.OFFLINE:
|
1571
|
-
if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None:
|
1572
|
-
state.offline_chat_processor_config = OfflineChatProcessorModel(chat_model_name, max_tokens)
|
1573
|
-
loaded_model = state.offline_chat_processor_config.loaded_model
|
1574
1540
|
|
1575
1541
|
truncated_messages = generate_chatml_messages_with_context(
|
1576
1542
|
user_message=message,
|
1577
1543
|
system_message=system_message,
|
1578
1544
|
chat_history=chat_history,
|
1579
1545
|
model_name=chat_model_name,
|
1580
|
-
loaded_model=loaded_model,
|
1581
1546
|
max_prompt_size=max_tokens,
|
1582
1547
|
vision_enabled=vision_available,
|
1583
1548
|
model_type=model_type,
|
@@ -1585,18 +1550,7 @@ def send_message_to_model_wrapper_sync(
|
|
1585
1550
|
query_files=query_files,
|
1586
1551
|
)
|
1587
1552
|
|
1588
|
-
if model_type == ChatModel.ModelType.
|
1589
|
-
return send_message_to_model_offline(
|
1590
|
-
messages=truncated_messages,
|
1591
|
-
loaded_model=loaded_model,
|
1592
|
-
model_name=chat_model_name,
|
1593
|
-
max_prompt_size=max_tokens,
|
1594
|
-
streaming=False,
|
1595
|
-
response_type=response_type,
|
1596
|
-
tracer=tracer,
|
1597
|
-
)
|
1598
|
-
|
1599
|
-
elif model_type == ChatModel.ModelType.OPENAI:
|
1553
|
+
if model_type == ChatModel.ModelType.OPENAI:
|
1600
1554
|
return send_message_to_model(
|
1601
1555
|
messages=truncated_messages,
|
1602
1556
|
api_key=api_key,
|
@@ -1678,30 +1632,7 @@ async def agenerate_chat_response(
|
|
1678
1632
|
chat_model = vision_enabled_config
|
1679
1633
|
vision_available = True
|
1680
1634
|
|
1681
|
-
if chat_model.model_type ==
|
1682
|
-
loaded_model = state.offline_chat_processor_config.loaded_model
|
1683
|
-
chat_response_generator = converse_offline(
|
1684
|
-
# Query
|
1685
|
-
user_query=query_to_run,
|
1686
|
-
# Context
|
1687
|
-
references=compiled_references,
|
1688
|
-
online_results=online_results,
|
1689
|
-
generated_files=raw_generated_files,
|
1690
|
-
generated_asset_results=generated_asset_results,
|
1691
|
-
location_data=location_data,
|
1692
|
-
user_name=user_name,
|
1693
|
-
query_files=query_files,
|
1694
|
-
chat_history=chat_history,
|
1695
|
-
# Model
|
1696
|
-
loaded_model=loaded_model,
|
1697
|
-
model_name=chat_model.name,
|
1698
|
-
max_prompt_size=chat_model.max_prompt_size,
|
1699
|
-
tokenizer_name=chat_model.tokenizer,
|
1700
|
-
agent=agent,
|
1701
|
-
tracer=tracer,
|
1702
|
-
)
|
1703
|
-
|
1704
|
-
elif chat_model.model_type == ChatModel.ModelType.OPENAI:
|
1635
|
+
if chat_model.model_type == ChatModel.ModelType.OPENAI:
|
1705
1636
|
openai_chat_config = chat_model.ai_model_api
|
1706
1637
|
api_key = openai_chat_config.api_key
|
1707
1638
|
chat_model_name = chat_model.name
|
@@ -1948,8 +1879,8 @@ class ApiUserRateLimiter:
|
|
1948
1879
|
|
1949
1880
|
user: KhojUser = websocket.scope["user"].object
|
1950
1881
|
subscribed = has_required_scope(websocket, ["premium"])
|
1951
|
-
current_window = "today" if self.window == 60 * 60 * 24 else
|
1952
|
-
next_window = "tomorrow" if self.window == 60 * 60 * 24 else
|
1882
|
+
current_window = "today" if self.window == 60 * 60 * 24 else "now"
|
1883
|
+
next_window = "tomorrow" if self.window == 60 * 60 * 24 else "in a bit"
|
1953
1884
|
common_message_prefix = f"I'm glad you're enjoying interacting with me! You've unfortunately exceeded your usage limit for {current_window}."
|
1954
1885
|
|
1955
1886
|
# Remove requests outside of the time window
|
@@ -2292,7 +2223,7 @@ def should_notify(original_query: str, executed_query: str, ai_response: str, us
|
|
2292
2223
|
should_notify_result = response["decision"] == "Yes"
|
2293
2224
|
reason = response.get("reason", "unknown")
|
2294
2225
|
logger.info(
|
2295
|
-
f
|
2226
|
+
f"Decided to {'not ' if not should_notify_result else ''}notify user of automation response because of reason: {reason}."
|
2296
2227
|
)
|
2297
2228
|
return should_notify_result
|
2298
2229
|
except Exception as e:
|
@@ -2386,7 +2317,7 @@ def scheduled_chat(
|
|
2386
2317
|
response_map = raw_response.json()
|
2387
2318
|
ai_response = response_map.get("response") or response_map.get("image")
|
2388
2319
|
is_image = False
|
2389
|
-
if
|
2320
|
+
if isinstance(ai_response, dict):
|
2390
2321
|
is_image = ai_response.get("image") is not None
|
2391
2322
|
else:
|
2392
2323
|
ai_response = raw_response.text
|
@@ -2533,12 +2464,12 @@ async def aschedule_automation(
|
|
2533
2464
|
|
2534
2465
|
def construct_automation_created_message(automation: Job, crontime: str, query_to_run: str, subject: str):
|
2535
2466
|
# Display next run time in user timezone instead of UTC
|
2536
|
-
schedule = f
|
2467
|
+
schedule = f"{cron_descriptor.get_description(crontime)} {automation.next_run_time.strftime('%Z')}"
|
2537
2468
|
next_run_time = automation.next_run_time.strftime("%Y-%m-%d %I:%M %p %Z")
|
2538
2469
|
# Remove /automated_task prefix from inferred_query
|
2539
2470
|
unprefixed_query_to_run = re.sub(r"^\/automated_task\s*", "", query_to_run)
|
2540
2471
|
# Create the automation response
|
2541
|
-
automation_icon_url =
|
2472
|
+
automation_icon_url = "/static/assets/icons/automation.svg"
|
2542
2473
|
return f"""
|
2543
2474
|
###  Created Automation
|
2544
2475
|
- Subject: **{subject}**
|
@@ -2786,19 +2717,20 @@ def configure_content(
|
|
2786
2717
|
t: Optional[state.SearchType] = state.SearchType.All,
|
2787
2718
|
) -> bool:
|
2788
2719
|
success = True
|
2789
|
-
if t
|
2720
|
+
if t is None:
|
2790
2721
|
t = state.SearchType.All
|
2791
2722
|
|
2792
2723
|
if t is not None and t in [type.value for type in state.SearchType]:
|
2793
2724
|
t = state.SearchType(t)
|
2794
2725
|
|
2795
|
-
if t is not None and
|
2726
|
+
if t is not None and t.value not in [type.value for type in state.SearchType]:
|
2796
2727
|
logger.warning(f"🚨 Invalid search type: {t}")
|
2797
2728
|
return False
|
2798
2729
|
|
2799
2730
|
search_type = t.value if t else None
|
2800
2731
|
|
2801
|
-
|
2732
|
+
# Check if client sent any documents of the supported types
|
2733
|
+
no_client_sent_documents = all([not files.get(file_type) for file_type in files])
|
2802
2734
|
|
2803
2735
|
if files is None:
|
2804
2736
|
logger.warning(f"🚨 No files to process for {search_type} search.")
|
@@ -2872,7 +2804,8 @@ def configure_content(
|
|
2872
2804
|
success = False
|
2873
2805
|
|
2874
2806
|
try:
|
2875
|
-
if
|
2807
|
+
# Run server side indexing of user Github docs if no client sent documents
|
2808
|
+
if no_client_sent_documents:
|
2876
2809
|
github_config = GithubConfig.objects.filter(user=user).prefetch_related("githubrepoconfig").first()
|
2877
2810
|
if (
|
2878
2811
|
search_type == state.SearchType.All.value or search_type == state.SearchType.Github.value
|
@@ -2892,7 +2825,8 @@ def configure_content(
|
|
2892
2825
|
success = False
|
2893
2826
|
|
2894
2827
|
try:
|
2895
|
-
if
|
2828
|
+
# Run server side indexing of user Notion docs if no client sent documents
|
2829
|
+
if no_client_sent_documents:
|
2896
2830
|
# Initialize Notion Search
|
2897
2831
|
notion_config = NotionConfig.objects.filter(user=user).first()
|
2898
2832
|
if (
|
@@ -3058,7 +2992,7 @@ async def grep_files(
|
|
3058
2992
|
query += f" {' and '.join(context_info)}"
|
3059
2993
|
if line_count > max_results:
|
3060
2994
|
if lines_before or lines_after:
|
3061
|
-
query +=
|
2995
|
+
query += " for"
|
3062
2996
|
query += f" first {max_results} results"
|
3063
2997
|
return query
|
3064
2998
|
|
@@ -3068,7 +3002,7 @@ async def grep_files(
|
|
3068
3002
|
lines_after = lines_after or 0
|
3069
3003
|
|
3070
3004
|
try:
|
3071
|
-
regex = re.compile(regex_pattern, re.IGNORECASE)
|
3005
|
+
regex = re.compile(regex_pattern, re.IGNORECASE | re.MULTILINE)
|
3072
3006
|
except re.error as e:
|
3073
3007
|
yield {
|
3074
3008
|
"query": _generate_query(0, 0, path_prefix, regex_pattern, lines_before, lines_after),
|
@@ -3078,7 +3012,14 @@ async def grep_files(
|
|
3078
3012
|
return
|
3079
3013
|
|
3080
3014
|
try:
|
3081
|
-
|
3015
|
+
# Make db pushdown filters more permissive by removing line anchors
|
3016
|
+
# The precise line-anchored matching will be done in Python stage
|
3017
|
+
db_pattern = regex_pattern
|
3018
|
+
db_pattern = re.sub(r"\(\?\w*\)", "", db_pattern) # Remove inline flags like (?i), (?m), (?im)
|
3019
|
+
db_pattern = re.sub(r"^\^", "", db_pattern) # Remove ^ at regex pattern start
|
3020
|
+
db_pattern = re.sub(r"\$$", "", db_pattern) # Remove $ at regex pattern end
|
3021
|
+
|
3022
|
+
file_matches = await FileObjectAdapters.aget_file_objects_by_regex(user, db_pattern, path_prefix)
|
3082
3023
|
|
3083
3024
|
line_matches = []
|
3084
3025
|
for file_object in file_matches:
|
khoj/routers/research.py
CHANGED
@@ -15,7 +15,6 @@ from khoj.processor.conversation.utils import (
|
|
15
15
|
ResearchIteration,
|
16
16
|
ToolCall,
|
17
17
|
construct_iteration_history,
|
18
|
-
construct_structured_message,
|
19
18
|
construct_tool_chat_history,
|
20
19
|
load_complex_json,
|
21
20
|
)
|
@@ -24,7 +23,6 @@ from khoj.processor.tools.online_search import read_webpages_content, search_onl
|
|
24
23
|
from khoj.processor.tools.run_code import run_code
|
25
24
|
from khoj.routers.helpers import (
|
26
25
|
ChatEvent,
|
27
|
-
generate_summary_from_files,
|
28
26
|
get_message_from_queue,
|
29
27
|
grep_files,
|
30
28
|
list_files,
|
@@ -102,7 +100,7 @@ async def apick_next_tool(
|
|
102
100
|
ConversationCommand.Notes.value: [tool.value for tool in document_research_tools],
|
103
101
|
ConversationCommand.Webpage.value: [ConversationCommand.ReadWebpage.value],
|
104
102
|
ConversationCommand.Online.value: [ConversationCommand.SearchWeb.value],
|
105
|
-
ConversationCommand.Code.value: [ConversationCommand.
|
103
|
+
ConversationCommand.Code.value: [ConversationCommand.PythonCoder.value],
|
106
104
|
ConversationCommand.Operator.value: [ConversationCommand.OperateComputer.value],
|
107
105
|
}
|
108
106
|
for input_tool, research_tools in input_tools_to_research_tools.items():
|
@@ -184,7 +182,7 @@ async def apick_next_tool(
|
|
184
182
|
# TODO: Handle multiple tool calls.
|
185
183
|
response_text = response.text
|
186
184
|
parsed_response = [ToolCall(**item) for item in load_complex_json(response_text)][0]
|
187
|
-
except Exception
|
185
|
+
except Exception:
|
188
186
|
# Otherwise assume the model has decided to end the research run and respond to the user.
|
189
187
|
parsed_response = ToolCall(name=ConversationCommand.Text, args={"response": response_text}, id=None)
|
190
188
|
|
@@ -199,7 +197,7 @@ async def apick_next_tool(
|
|
199
197
|
if i.warning is None and isinstance(i.query, ToolCall)
|
200
198
|
}
|
201
199
|
if (parsed_response.name, dict_to_tuple(parsed_response.args)) in previous_tool_query_combinations:
|
202
|
-
warning =
|
200
|
+
warning = "Repeated tool, query combination detected. Skipping iteration. Try something different."
|
203
201
|
# Only send client status updates if we'll execute this iteration and model has thoughts to share.
|
204
202
|
elif send_status_func and not is_none_or_empty(response.thought):
|
205
203
|
async for event in send_status_func(response.thought):
|
@@ -414,11 +412,13 @@ async def research(
|
|
414
412
|
this_iteration.warning = f"Error reading webpages: {e}"
|
415
413
|
logger.error(this_iteration.warning, exc_info=True)
|
416
414
|
|
417
|
-
elif this_iteration.query.name == ConversationCommand.
|
415
|
+
elif this_iteration.query.name == ConversationCommand.PythonCoder:
|
418
416
|
try:
|
419
417
|
async for result in run_code(
|
420
418
|
**this_iteration.query.args,
|
421
|
-
conversation_history=construct_tool_chat_history(
|
419
|
+
conversation_history=construct_tool_chat_history(
|
420
|
+
previous_iterations, ConversationCommand.PythonCoder
|
421
|
+
),
|
422
422
|
context="",
|
423
423
|
location_data=location,
|
424
424
|
user=user,
|
@@ -435,7 +435,7 @@ async def research(
|
|
435
435
|
this_iteration.codeContext = code_results
|
436
436
|
async for result in send_status_func(f"**Ran code snippets**: {len(this_iteration.codeContext)}"):
|
437
437
|
yield result
|
438
|
-
except ValueError as e:
|
438
|
+
except (ValueError, TypeError) as e:
|
439
439
|
this_iteration.warning = f"Error running code: {e}"
|
440
440
|
logger.warning(this_iteration.warning, exc_info=True)
|
441
441
|
|
@@ -4,12 +4,10 @@ from typing import List
|
|
4
4
|
|
5
5
|
class BaseFilter(ABC):
|
6
6
|
@abstractmethod
|
7
|
-
def get_filter_terms(self, query: str) -> List[str]:
|
8
|
-
...
|
7
|
+
def get_filter_terms(self, query: str) -> List[str]: ...
|
9
8
|
|
10
9
|
def can_filter(self, raw_query: str) -> bool:
|
11
10
|
return len(self.get_filter_terms(raw_query)) > 0
|
12
11
|
|
13
12
|
@abstractmethod
|
14
|
-
def defilter(self, query: str) -> str:
|
15
|
-
...
|
13
|
+
def defilter(self, query: str) -> str: ...
|
khoj/search_type/text_search.py
CHANGED
@@ -9,9 +9,8 @@ from asgiref.sync import sync_to_async
|
|
9
9
|
from sentence_transformers import util
|
10
10
|
|
11
11
|
from khoj.database.adapters import EntryAdapters, get_default_search_model
|
12
|
-
from khoj.database.models import Agent
|
12
|
+
from khoj.database.models import Agent, KhojUser
|
13
13
|
from khoj.database.models import Entry as DbEntry
|
14
|
-
from khoj.database.models import KhojUser
|
15
14
|
from khoj.processor.content.text_to_entries import TextToEntries
|
16
15
|
from khoj.utils import state
|
17
16
|
from khoj.utils.helpers import get_absolute_path, timer
|
khoj/utils/cli.py
CHANGED
@@ -1,36 +1,19 @@
|
|
1
1
|
import argparse
|
2
2
|
import logging
|
3
|
-
import os
|
4
3
|
import pathlib
|
5
4
|
from importlib.metadata import version
|
6
5
|
|
7
6
|
logger = logging.getLogger(__name__)
|
8
7
|
|
9
|
-
from khoj.migrations.migrate_offline_chat_default_model import (
|
10
|
-
migrate_offline_chat_default_model,
|
11
|
-
)
|
12
|
-
from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema
|
13
|
-
from khoj.migrations.migrate_offline_model import migrate_offline_model
|
14
|
-
from khoj.migrations.migrate_processor_config_openai import (
|
15
|
-
migrate_processor_conversation_schema,
|
16
|
-
)
|
17
|
-
from khoj.migrations.migrate_server_pg import migrate_server_pg
|
18
|
-
from khoj.migrations.migrate_version import migrate_config_to_version
|
19
|
-
from khoj.utils.helpers import is_env_var_true, resolve_absolute_path
|
20
|
-
from khoj.utils.yaml import parse_config_from_file
|
21
|
-
|
22
8
|
|
23
9
|
def cli(args=None):
|
24
10
|
# Setup Argument Parser for the Commandline Interface
|
25
11
|
parser = argparse.ArgumentParser(description="Start Khoj; An AI personal assistant for your Digital Brain")
|
26
12
|
parser.add_argument(
|
27
|
-
"--
|
28
|
-
|
29
|
-
|
30
|
-
"
|
31
|
-
action="store_true",
|
32
|
-
default=False,
|
33
|
-
help="Regenerate model embeddings from source files. Default: false",
|
13
|
+
"--log-file",
|
14
|
+
default="~/.khoj/khoj.log",
|
15
|
+
type=pathlib.Path,
|
16
|
+
help="File path for server logs. Default: ~/.khoj/khoj.log",
|
34
17
|
)
|
35
18
|
parser.add_argument("--verbose", "-v", action="count", default=0, help="Show verbose conversion logs. Default: 0")
|
36
19
|
parser.add_argument("--host", type=str, default="127.0.0.1", help="Host address of the server. Default: 127.0.0.1")
|
@@ -43,14 +26,11 @@ def cli(args=None):
|
|
43
26
|
parser.add_argument("--sslcert", type=str, help="Path to SSL certificate file")
|
44
27
|
parser.add_argument("--sslkey", type=str, help="Path to SSL key file")
|
45
28
|
parser.add_argument("--version", "-V", action="store_true", help="Print the installed Khoj version and exit")
|
46
|
-
parser.add_argument(
|
47
|
-
"--disable-chat-on-gpu", action="store_true", default=False, help="Disable using GPU for the offline chat model"
|
48
|
-
)
|
49
29
|
parser.add_argument(
|
50
30
|
"--anonymous-mode",
|
51
31
|
action="store_true",
|
52
32
|
default=False,
|
53
|
-
help="Run Khoj in
|
33
|
+
help="Run Khoj in single user mode with no login required. Useful for personal use or testing.",
|
54
34
|
)
|
55
35
|
parser.add_argument(
|
56
36
|
"--non-interactive",
|
@@ -64,38 +44,10 @@ def cli(args=None):
|
|
64
44
|
if len(remaining_args) > 0:
|
65
45
|
logger.info(f"⚠️ Ignoring unknown commandline args: {remaining_args}")
|
66
46
|
|
67
|
-
# Set default values for arguments
|
68
|
-
args.chat_on_gpu = not args.disable_chat_on_gpu
|
69
|
-
|
70
47
|
args.version_no = version("khoj")
|
71
48
|
if args.version:
|
72
49
|
# Show version of khoj installed and exit
|
73
50
|
print(args.version_no)
|
74
51
|
exit(0)
|
75
52
|
|
76
|
-
# Normalize config_file path to absolute path
|
77
|
-
args.config_file = resolve_absolute_path(args.config_file)
|
78
|
-
|
79
|
-
if not args.config_file.exists():
|
80
|
-
args.config = None
|
81
|
-
else:
|
82
|
-
args = run_migrations(args)
|
83
|
-
args.config = parse_config_from_file(args.config_file)
|
84
|
-
if is_env_var_true("KHOJ_TELEMETRY_DISABLE"):
|
85
|
-
args.config.app.should_log_telemetry = False
|
86
|
-
|
87
|
-
return args
|
88
|
-
|
89
|
-
|
90
|
-
def run_migrations(args):
|
91
|
-
migrations = [
|
92
|
-
migrate_config_to_version,
|
93
|
-
migrate_processor_conversation_schema,
|
94
|
-
migrate_offline_model,
|
95
|
-
migrate_offline_chat_schema,
|
96
|
-
migrate_offline_chat_default_model,
|
97
|
-
migrate_server_pg,
|
98
|
-
]
|
99
|
-
for migration in migrations:
|
100
|
-
args = migration(args)
|
101
53
|
return args
|
khoj/utils/config.py
CHANGED
@@ -1,22 +1,7 @@
|
|
1
1
|
# System Packages
|
2
2
|
from __future__ import annotations # to avoid quoting type hints
|
3
3
|
|
4
|
-
import logging
|
5
|
-
from dataclasses import dataclass
|
6
4
|
from enum import Enum
|
7
|
-
from typing import TYPE_CHECKING, Any, List, Optional, Union
|
8
|
-
|
9
|
-
import torch
|
10
|
-
|
11
|
-
from khoj.processor.conversation.offline.utils import download_model
|
12
|
-
|
13
|
-
logger = logging.getLogger(__name__)
|
14
|
-
|
15
|
-
|
16
|
-
if TYPE_CHECKING:
|
17
|
-
from sentence_transformers import CrossEncoder
|
18
|
-
|
19
|
-
from khoj.utils.models import BaseEncoder
|
20
5
|
|
21
6
|
|
22
7
|
class SearchType(str, Enum):
|
@@ -29,53 +14,3 @@ class SearchType(str, Enum):
|
|
29
14
|
Notion = "notion"
|
30
15
|
Plaintext = "plaintext"
|
31
16
|
Docx = "docx"
|
32
|
-
|
33
|
-
|
34
|
-
class ProcessorType(str, Enum):
|
35
|
-
Conversation = "conversation"
|
36
|
-
|
37
|
-
|
38
|
-
@dataclass
|
39
|
-
class TextContent:
|
40
|
-
enabled: bool
|
41
|
-
|
42
|
-
|
43
|
-
@dataclass
|
44
|
-
class ImageContent:
|
45
|
-
image_names: List[str]
|
46
|
-
image_embeddings: torch.Tensor
|
47
|
-
image_metadata_embeddings: torch.Tensor
|
48
|
-
|
49
|
-
|
50
|
-
@dataclass
|
51
|
-
class TextSearchModel:
|
52
|
-
bi_encoder: BaseEncoder
|
53
|
-
cross_encoder: Optional[CrossEncoder] = None
|
54
|
-
top_k: Optional[int] = 15
|
55
|
-
|
56
|
-
|
57
|
-
@dataclass
|
58
|
-
class ImageSearchModel:
|
59
|
-
image_encoder: BaseEncoder
|
60
|
-
|
61
|
-
|
62
|
-
@dataclass
|
63
|
-
class SearchModels:
|
64
|
-
text_search: Optional[TextSearchModel] = None
|
65
|
-
|
66
|
-
|
67
|
-
@dataclass
|
68
|
-
class OfflineChatProcessorConfig:
|
69
|
-
loaded_model: Union[Any, None] = None
|
70
|
-
|
71
|
-
|
72
|
-
class OfflineChatProcessorModel:
|
73
|
-
def __init__(self, chat_model: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", max_tokens: int = None):
|
74
|
-
self.chat_model = chat_model
|
75
|
-
self.loaded_model = None
|
76
|
-
try:
|
77
|
-
self.loaded_model = download_model(self.chat_model, max_tokens=max_tokens)
|
78
|
-
except ValueError as e:
|
79
|
-
self.loaded_model = None
|
80
|
-
logger.error(f"Error while loading offline chat model: {e}", exc_info=True)
|
81
|
-
raise e
|
khoj/utils/constants.py
CHANGED
@@ -10,13 +10,6 @@ empty_escape_sequences = "\n|\r|\t| "
|
|
10
10
|
app_env_filepath = "~/.khoj/env"
|
11
11
|
telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry"
|
12
12
|
content_directory = "~/.khoj/content/"
|
13
|
-
default_offline_chat_models = [
|
14
|
-
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
|
15
|
-
"bartowski/Llama-3.2-3B-Instruct-GGUF",
|
16
|
-
"bartowski/gemma-2-9b-it-GGUF",
|
17
|
-
"bartowski/gemma-2-2b-it-GGUF",
|
18
|
-
"bartowski/Qwen2.5-14B-Instruct-GGUF",
|
19
|
-
]
|
20
13
|
default_openai_chat_models = ["gpt-4o-mini", "gpt-4.1", "o3", "o4-mini"]
|
21
14
|
default_gemini_chat_models = ["gemini-2.0-flash", "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05"]
|
22
15
|
default_anthropic_chat_models = ["claude-sonnet-4-0", "claude-3-5-haiku-latest"]
|
@@ -47,6 +40,9 @@ model_to_cost: Dict[str, Dict[str, float]] = {
|
|
47
40
|
"o3": {"input": 2.0, "output": 8.00},
|
48
41
|
"o3-pro": {"input": 20.0, "output": 80.00},
|
49
42
|
"o4-mini": {"input": 1.10, "output": 4.40},
|
43
|
+
"gpt-5-2025-08-07": {"input": 1.25, "output": 10.00, "cache_read": 0.125},
|
44
|
+
"gpt-5-mini-2025-08-07": {"input": 0.25, "output": 2.00, "cache_read": 0.025},
|
45
|
+
"gpt-5-nano-2025-08-07": {"input": 0.05, "output": 0.40, "cache_read": 0.005},
|
50
46
|
# Gemini Pricing: https://ai.google.dev/pricing
|
51
47
|
"gemini-1.5-flash": {"input": 0.075, "output": 0.30},
|
52
48
|
"gemini-1.5-flash-002": {"input": 0.075, "output": 0.30},
|
@@ -75,4 +71,7 @@ model_to_cost: Dict[str, Dict[str, float]] = {
|
|
75
71
|
"grok-3-latest": {"input": 3.0, "output": 15.0},
|
76
72
|
"grok-3-mini": {"input": 0.30, "output": 0.50},
|
77
73
|
"grok-3-mini-latest": {"input": 0.30, "output": 0.50},
|
74
|
+
# Groq pricing
|
75
|
+
"moonshotai/kimi-k2-instruct": {"input": 1.00, "output": 3.00},
|
76
|
+
"openai/gpt-oss-120b": {"input": 0.15, "output": 0.75},
|
78
77
|
}
|