khoj 2.0.0b12__py3-none-any.whl → 2.0.0b13.dev5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +21 -54
- khoj/database/adapters/__init__.py +2 -11
- khoj/database/migrations/0092_alter_chatmodel_model_type_alter_chatmodel_name_and_more.py +36 -0
- khoj/database/migrations/0093_remove_localorgconfig_user_and_more.py +36 -0
- khoj/database/models/__init__.py +4 -34
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/app/agents/{page-5db6ad18da10d353.js → page-9a4610474cd59a71.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/{page-6271e2e31c7571d1.js → page-f7bb9d777b7745d4.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/chat/{page-4bc2938df5d57981.js → page-8e1c4f2af3c9429e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-a19a597629e87fb8.js → page-2b3056cba8aa96ce.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/search/layout-c02531d586972d7d.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/search/{page-fa366ac14b228688.js → page-4885df3cd175c957.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-8f9a85f96088c18b.js → page-8be3b35178abf2ec.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-ed7787cf4938b8e3.js → page-4a4b0c0f4749c2b2.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{webpack-4b00e5a0da4a9dae.js → webpack-ee14d29b64c5ab47.js} +1 -1
- khoj/interface/compiled/_next/static/css/{a0c2fd63bb396f04.css → 23b26df423cd8a9c.css} +1 -1
- khoj/interface/compiled/_next/static/css/2945c4a857922f3b.css +1 -0
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +2 -2
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +2 -2
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +2 -2
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +2 -2
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +2 -2
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +2 -2
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +2 -2
- khoj/main.py +4 -6
- khoj/processor/content/github/github_to_entries.py +0 -1
- khoj/processor/content/notion/notion_to_entries.py +0 -1
- khoj/processor/content/text_to_entries.py +0 -1
- khoj/processor/conversation/prompts.py +0 -32
- khoj/processor/conversation/utils.py +8 -27
- khoj/processor/operator/__init__.py +0 -1
- khoj/routers/api.py +2 -14
- khoj/routers/api_content.py +3 -111
- khoj/routers/helpers.py +9 -79
- khoj/utils/cli.py +5 -53
- khoj/utils/config.py +0 -65
- khoj/utils/constants.py +0 -7
- khoj/utils/helpers.py +1 -9
- khoj/utils/initialization.py +6 -45
- khoj/utils/rawconfig.py +0 -67
- khoj/utils/state.py +1 -7
- khoj/utils/yaml.py +0 -39
- {khoj-2.0.0b12.dist-info → khoj-2.0.0b13.dev5.dist-info}/METADATA +1 -2
- {khoj-2.0.0b12.dist-info → khoj-2.0.0b13.dev5.dist-info}/RECORD +57 -68
- khoj/interface/compiled/_next/static/chunks/app/search/layout-f5881c7ae3ba0795.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-abb6c5f4239ad7be.js +0 -1
- khoj/interface/compiled/_next/static/css/93eeacc43e261162.css +0 -1
- khoj/migrations/__init__.py +0 -0
- khoj/migrations/migrate_offline_chat_default_model.py +0 -69
- khoj/migrations/migrate_offline_chat_default_model_2.py +0 -71
- khoj/migrations/migrate_offline_chat_schema.py +0 -83
- khoj/migrations/migrate_offline_model.py +0 -29
- khoj/migrations/migrate_processor_config_openai.py +0 -67
- khoj/migrations/migrate_server_pg.py +0 -132
- khoj/migrations/migrate_version.py +0 -17
- khoj/processor/conversation/offline/__init__.py +0 -0
- khoj/processor/conversation/offline/chat_model.py +0 -224
- khoj/processor/conversation/offline/utils.py +0 -80
- khoj/processor/conversation/offline/whisper.py +0 -15
- khoj/utils/fs_syncer.py +0 -252
- /khoj/interface/compiled/_next/static/{TTch40tYWOfh0SzwjwZXV → XfWrWDAk5VXeZ88OdP652}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{TTch40tYWOfh0SzwjwZXV → XfWrWDAk5VXeZ88OdP652}/_ssgManifest.js +0 -0
- {khoj-2.0.0b12.dist-info → khoj-2.0.0b13.dev5.dist-info}/WHEEL +0 -0
- {khoj-2.0.0b12.dist-info → khoj-2.0.0b13.dev5.dist-info}/entry_points.txt +0 -0
- {khoj-2.0.0b12.dist-info → khoj-2.0.0b13.dev5.dist-info}/licenses/LICENSE +0 -0
khoj/routers/api.py
CHANGED
@@ -15,7 +15,6 @@ from khoj.configure import initialize_content
|
|
15
15
|
from khoj.database import adapters
|
16
16
|
from khoj.database.adapters import ConversationAdapters, EntryAdapters, get_user_photo
|
17
17
|
from khoj.database.models import KhojUser, SpeechToTextModelOptions
|
18
|
-
from khoj.processor.conversation.offline.whisper import transcribe_audio_offline
|
19
18
|
from khoj.processor.conversation.openai.whisper import transcribe_audio
|
20
19
|
from khoj.routers.helpers import (
|
21
20
|
ApiUserRateLimiter,
|
@@ -88,22 +87,14 @@ def update(
|
|
88
87
|
force: Optional[bool] = False,
|
89
88
|
):
|
90
89
|
user = request.user.object
|
91
|
-
if not state.config:
|
92
|
-
error_msg = f"🚨 Khoj is not configured.\nConfigure it via http://localhost:42110/settings, plugins or by editing {state.config_file}."
|
93
|
-
logger.warning(error_msg)
|
94
|
-
raise HTTPException(status_code=500, detail=error_msg)
|
95
90
|
try:
|
96
91
|
initialize_content(user=user, regenerate=force, search_type=t)
|
97
92
|
except Exception as e:
|
98
|
-
error_msg = f"🚨 Failed to update server via API: {e}"
|
93
|
+
error_msg = f"🚨 Failed to update server indexed content via API: {e}"
|
99
94
|
logger.error(error_msg, exc_info=True)
|
100
95
|
raise HTTPException(status_code=500, detail=error_msg)
|
101
96
|
else:
|
102
|
-
|
103
|
-
if state.search_models:
|
104
|
-
components.append("Search models")
|
105
|
-
components_msg = ", ".join(components)
|
106
|
-
logger.info(f"📪 {components_msg} updated via API")
|
97
|
+
logger.info(f"📪 Server indexed content updated via API")
|
107
98
|
|
108
99
|
update_telemetry_state(
|
109
100
|
request=request,
|
@@ -150,9 +141,6 @@ async def transcribe(
|
|
150
141
|
if not speech_to_text_config:
|
151
142
|
# If the user has not configured a speech to text model, return an unsupported on server error
|
152
143
|
status_code = 501
|
153
|
-
elif speech_to_text_config.model_type == SpeechToTextModelOptions.ModelType.OFFLINE:
|
154
|
-
speech2text_model = speech_to_text_config.model_name
|
155
|
-
user_message = await transcribe_audio_offline(audio_filename, speech2text_model)
|
156
144
|
elif speech_to_text_config.model_type == SpeechToTextModelOptions.ModelType.OPENAI:
|
157
145
|
speech2text_model = speech_to_text_config.model_name
|
158
146
|
if speech_to_text_config.ai_model_api:
|
khoj/routers/api_content.py
CHANGED
@@ -27,16 +27,7 @@ from khoj.database.adapters import (
|
|
27
27
|
get_user_notion_config,
|
28
28
|
)
|
29
29
|
from khoj.database.models import Entry as DbEntry
|
30
|
-
from khoj.database.models import
|
31
|
-
GithubConfig,
|
32
|
-
GithubRepoConfig,
|
33
|
-
KhojUser,
|
34
|
-
LocalMarkdownConfig,
|
35
|
-
LocalOrgConfig,
|
36
|
-
LocalPdfConfig,
|
37
|
-
LocalPlaintextConfig,
|
38
|
-
NotionConfig,
|
39
|
-
)
|
30
|
+
from khoj.database.models import GithubConfig, GithubRepoConfig, NotionConfig
|
40
31
|
from khoj.processor.content.docx.docx_to_entries import DocxToEntries
|
41
32
|
from khoj.processor.content.pdf.pdf_to_entries import PdfToEntries
|
42
33
|
from khoj.routers.helpers import (
|
@@ -47,17 +38,9 @@ from khoj.routers.helpers import (
|
|
47
38
|
get_user_config,
|
48
39
|
update_telemetry_state,
|
49
40
|
)
|
50
|
-
from khoj.utils import
|
51
|
-
from khoj.utils.
|
52
|
-
from khoj.utils.rawconfig import (
|
53
|
-
ContentConfig,
|
54
|
-
FullConfig,
|
55
|
-
GithubContentConfig,
|
56
|
-
NotionContentConfig,
|
57
|
-
SearchConfig,
|
58
|
-
)
|
41
|
+
from khoj.utils import state
|
42
|
+
from khoj.utils.rawconfig import GithubContentConfig, NotionContentConfig
|
59
43
|
from khoj.utils.state import SearchType
|
60
|
-
from khoj.utils.yaml import save_config_to_file_updated_state
|
61
44
|
|
62
45
|
logger = logging.getLogger(__name__)
|
63
46
|
|
@@ -192,8 +175,6 @@ async def set_content_github(
|
|
192
175
|
updated_config: Union[GithubContentConfig, None],
|
193
176
|
client: Optional[str] = None,
|
194
177
|
):
|
195
|
-
_initialize_config()
|
196
|
-
|
197
178
|
user = request.user.object
|
198
179
|
|
199
180
|
try:
|
@@ -225,8 +206,6 @@ async def set_content_notion(
|
|
225
206
|
updated_config: Union[NotionContentConfig, None],
|
226
207
|
client: Optional[str] = None,
|
227
208
|
):
|
228
|
-
_initialize_config()
|
229
|
-
|
230
209
|
user = request.user.object
|
231
210
|
|
232
211
|
try:
|
@@ -323,10 +302,6 @@ def get_content_types(request: Request, client: Optional[str] = None):
|
|
323
302
|
configured_content_types = set(EntryAdapters.get_unique_file_types(user))
|
324
303
|
configured_content_types |= {"all"}
|
325
304
|
|
326
|
-
if state.config and state.config.content_type:
|
327
|
-
for ctype in state.config.content_type.model_dump(exclude_none=True):
|
328
|
-
configured_content_types.add(ctype)
|
329
|
-
|
330
305
|
return list(configured_content_types & all_content_types)
|
331
306
|
|
332
307
|
|
@@ -606,28 +581,6 @@ async def indexer(
|
|
606
581
|
docx=index_files["docx"],
|
607
582
|
)
|
608
583
|
|
609
|
-
if state.config == None:
|
610
|
-
logger.info("📬 Initializing content index on first run.")
|
611
|
-
default_full_config = FullConfig(
|
612
|
-
content_type=None,
|
613
|
-
search_type=SearchConfig.model_validate(constants.default_config["search-type"]),
|
614
|
-
processor=None,
|
615
|
-
)
|
616
|
-
state.config = default_full_config
|
617
|
-
default_content_config = ContentConfig(
|
618
|
-
org=None,
|
619
|
-
markdown=None,
|
620
|
-
pdf=None,
|
621
|
-
docx=None,
|
622
|
-
image=None,
|
623
|
-
github=None,
|
624
|
-
notion=None,
|
625
|
-
plaintext=None,
|
626
|
-
)
|
627
|
-
state.config.content_type = default_content_config
|
628
|
-
save_config_to_file_updated_state()
|
629
|
-
configure_search(state.search_models, state.config.search_type)
|
630
|
-
|
631
584
|
loop = asyncio.get_event_loop()
|
632
585
|
success = await loop.run_in_executor(
|
633
586
|
None,
|
@@ -674,14 +627,6 @@ async def indexer(
|
|
674
627
|
return Response(content=indexed_filenames, status_code=200)
|
675
628
|
|
676
629
|
|
677
|
-
def configure_search(search_models: SearchModels, search_config: Optional[SearchConfig]) -> Optional[SearchModels]:
|
678
|
-
# Run Validation Checks
|
679
|
-
if search_models is None:
|
680
|
-
search_models = SearchModels()
|
681
|
-
|
682
|
-
return search_models
|
683
|
-
|
684
|
-
|
685
630
|
def map_config_to_object(content_source: str):
|
686
631
|
if content_source == DbEntry.EntrySource.GITHUB:
|
687
632
|
return GithubConfig
|
@@ -689,56 +634,3 @@ def map_config_to_object(content_source: str):
|
|
689
634
|
return NotionConfig
|
690
635
|
if content_source == DbEntry.EntrySource.COMPUTER:
|
691
636
|
return "Computer"
|
692
|
-
|
693
|
-
|
694
|
-
async def map_config_to_db(config: FullConfig, user: KhojUser):
|
695
|
-
if config.content_type:
|
696
|
-
if config.content_type.org:
|
697
|
-
await LocalOrgConfig.objects.filter(user=user).adelete()
|
698
|
-
await LocalOrgConfig.objects.acreate(
|
699
|
-
input_files=config.content_type.org.input_files,
|
700
|
-
input_filter=config.content_type.org.input_filter,
|
701
|
-
index_heading_entries=config.content_type.org.index_heading_entries,
|
702
|
-
user=user,
|
703
|
-
)
|
704
|
-
if config.content_type.markdown:
|
705
|
-
await LocalMarkdownConfig.objects.filter(user=user).adelete()
|
706
|
-
await LocalMarkdownConfig.objects.acreate(
|
707
|
-
input_files=config.content_type.markdown.input_files,
|
708
|
-
input_filter=config.content_type.markdown.input_filter,
|
709
|
-
index_heading_entries=config.content_type.markdown.index_heading_entries,
|
710
|
-
user=user,
|
711
|
-
)
|
712
|
-
if config.content_type.pdf:
|
713
|
-
await LocalPdfConfig.objects.filter(user=user).adelete()
|
714
|
-
await LocalPdfConfig.objects.acreate(
|
715
|
-
input_files=config.content_type.pdf.input_files,
|
716
|
-
input_filter=config.content_type.pdf.input_filter,
|
717
|
-
index_heading_entries=config.content_type.pdf.index_heading_entries,
|
718
|
-
user=user,
|
719
|
-
)
|
720
|
-
if config.content_type.plaintext:
|
721
|
-
await LocalPlaintextConfig.objects.filter(user=user).adelete()
|
722
|
-
await LocalPlaintextConfig.objects.acreate(
|
723
|
-
input_files=config.content_type.plaintext.input_files,
|
724
|
-
input_filter=config.content_type.plaintext.input_filter,
|
725
|
-
index_heading_entries=config.content_type.plaintext.index_heading_entries,
|
726
|
-
user=user,
|
727
|
-
)
|
728
|
-
if config.content_type.github:
|
729
|
-
await adapters.set_user_github_config(
|
730
|
-
user=user,
|
731
|
-
pat_token=config.content_type.github.pat_token,
|
732
|
-
repos=config.content_type.github.repos,
|
733
|
-
)
|
734
|
-
if config.content_type.notion:
|
735
|
-
await adapters.set_notion_config(
|
736
|
-
user=user,
|
737
|
-
token=config.content_type.notion.token,
|
738
|
-
)
|
739
|
-
|
740
|
-
|
741
|
-
def _initialize_config():
|
742
|
-
if state.config is None:
|
743
|
-
state.config = FullConfig()
|
744
|
-
state.config.search_type = SearchConfig.model_validate(constants.default_config["search-type"])
|
khoj/routers/helpers.py
CHANGED
@@ -89,10 +89,6 @@ from khoj.processor.conversation.google.gemini_chat import (
|
|
89
89
|
converse_gemini,
|
90
90
|
gemini_send_message_to_model,
|
91
91
|
)
|
92
|
-
from khoj.processor.conversation.offline.chat_model import (
|
93
|
-
converse_offline,
|
94
|
-
send_message_to_model_offline,
|
95
|
-
)
|
96
92
|
from khoj.processor.conversation.openai.gpt import (
|
97
93
|
converse_openai,
|
98
94
|
send_message_to_model,
|
@@ -117,7 +113,6 @@ from khoj.search_filter.file_filter import FileFilter
|
|
117
113
|
from khoj.search_filter.word_filter import WordFilter
|
118
114
|
from khoj.search_type import text_search
|
119
115
|
from khoj.utils import state
|
120
|
-
from khoj.utils.config import OfflineChatProcessorModel
|
121
116
|
from khoj.utils.helpers import (
|
122
117
|
LRU,
|
123
118
|
ConversationCommand,
|
@@ -168,14 +163,6 @@ async def is_ready_to_chat(user: KhojUser):
|
|
168
163
|
if user_chat_model == None:
|
169
164
|
user_chat_model = await ConversationAdapters.aget_default_chat_model(user)
|
170
165
|
|
171
|
-
if user_chat_model and user_chat_model.model_type == ChatModel.ModelType.OFFLINE:
|
172
|
-
chat_model_name = user_chat_model.name
|
173
|
-
max_tokens = user_chat_model.max_prompt_size
|
174
|
-
if state.offline_chat_processor_config is None:
|
175
|
-
logger.info("Loading Offline Chat Model...")
|
176
|
-
state.offline_chat_processor_config = OfflineChatProcessorModel(chat_model_name, max_tokens)
|
177
|
-
return True
|
178
|
-
|
179
166
|
if (
|
180
167
|
user_chat_model
|
181
168
|
and (
|
@@ -231,7 +218,6 @@ def update_telemetry_state(
|
|
231
218
|
telemetry_type=telemetry_type,
|
232
219
|
api=api,
|
233
220
|
client=client,
|
234
|
-
app_config=state.config.app,
|
235
221
|
disable_telemetry_env=state.telemetry_disabled,
|
236
222
|
properties=user_state,
|
237
223
|
)
|
@@ -1470,12 +1456,6 @@ async def send_message_to_model_wrapper(
|
|
1470
1456
|
vision_available = chat_model.vision_enabled
|
1471
1457
|
api_key = chat_model.ai_model_api.api_key
|
1472
1458
|
api_base_url = chat_model.ai_model_api.api_base_url
|
1473
|
-
loaded_model = None
|
1474
|
-
|
1475
|
-
if model_type == ChatModel.ModelType.OFFLINE:
|
1476
|
-
if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None:
|
1477
|
-
state.offline_chat_processor_config = OfflineChatProcessorModel(chat_model_name, max_tokens)
|
1478
|
-
loaded_model = state.offline_chat_processor_config.loaded_model
|
1479
1459
|
|
1480
1460
|
truncated_messages = generate_chatml_messages_with_context(
|
1481
1461
|
user_message=query,
|
@@ -1483,7 +1463,6 @@ async def send_message_to_model_wrapper(
|
|
1483
1463
|
system_message=system_message,
|
1484
1464
|
chat_history=chat_history,
|
1485
1465
|
model_name=chat_model_name,
|
1486
|
-
loaded_model=loaded_model,
|
1487
1466
|
tokenizer_name=tokenizer,
|
1488
1467
|
max_prompt_size=max_tokens,
|
1489
1468
|
vision_enabled=vision_available,
|
@@ -1492,18 +1471,7 @@ async def send_message_to_model_wrapper(
|
|
1492
1471
|
query_files=query_files,
|
1493
1472
|
)
|
1494
1473
|
|
1495
|
-
if model_type == ChatModel.ModelType.
|
1496
|
-
return send_message_to_model_offline(
|
1497
|
-
messages=truncated_messages,
|
1498
|
-
loaded_model=loaded_model,
|
1499
|
-
model_name=chat_model_name,
|
1500
|
-
max_prompt_size=max_tokens,
|
1501
|
-
streaming=False,
|
1502
|
-
response_type=response_type,
|
1503
|
-
tracer=tracer,
|
1504
|
-
)
|
1505
|
-
|
1506
|
-
elif model_type == ChatModel.ModelType.OPENAI:
|
1474
|
+
if model_type == ChatModel.ModelType.OPENAI:
|
1507
1475
|
return send_message_to_model(
|
1508
1476
|
messages=truncated_messages,
|
1509
1477
|
api_key=api_key,
|
@@ -1565,19 +1533,12 @@ def send_message_to_model_wrapper_sync(
|
|
1565
1533
|
vision_available = chat_model.vision_enabled
|
1566
1534
|
api_key = chat_model.ai_model_api.api_key
|
1567
1535
|
api_base_url = chat_model.ai_model_api.api_base_url
|
1568
|
-
loaded_model = None
|
1569
|
-
|
1570
|
-
if model_type == ChatModel.ModelType.OFFLINE:
|
1571
|
-
if state.offline_chat_processor_config is None or state.offline_chat_processor_config.loaded_model is None:
|
1572
|
-
state.offline_chat_processor_config = OfflineChatProcessorModel(chat_model_name, max_tokens)
|
1573
|
-
loaded_model = state.offline_chat_processor_config.loaded_model
|
1574
1536
|
|
1575
1537
|
truncated_messages = generate_chatml_messages_with_context(
|
1576
1538
|
user_message=message,
|
1577
1539
|
system_message=system_message,
|
1578
1540
|
chat_history=chat_history,
|
1579
1541
|
model_name=chat_model_name,
|
1580
|
-
loaded_model=loaded_model,
|
1581
1542
|
max_prompt_size=max_tokens,
|
1582
1543
|
vision_enabled=vision_available,
|
1583
1544
|
model_type=model_type,
|
@@ -1585,18 +1546,7 @@ def send_message_to_model_wrapper_sync(
|
|
1585
1546
|
query_files=query_files,
|
1586
1547
|
)
|
1587
1548
|
|
1588
|
-
if model_type == ChatModel.ModelType.
|
1589
|
-
return send_message_to_model_offline(
|
1590
|
-
messages=truncated_messages,
|
1591
|
-
loaded_model=loaded_model,
|
1592
|
-
model_name=chat_model_name,
|
1593
|
-
max_prompt_size=max_tokens,
|
1594
|
-
streaming=False,
|
1595
|
-
response_type=response_type,
|
1596
|
-
tracer=tracer,
|
1597
|
-
)
|
1598
|
-
|
1599
|
-
elif model_type == ChatModel.ModelType.OPENAI:
|
1549
|
+
if model_type == ChatModel.ModelType.OPENAI:
|
1600
1550
|
return send_message_to_model(
|
1601
1551
|
messages=truncated_messages,
|
1602
1552
|
api_key=api_key,
|
@@ -1678,30 +1628,7 @@ async def agenerate_chat_response(
|
|
1678
1628
|
chat_model = vision_enabled_config
|
1679
1629
|
vision_available = True
|
1680
1630
|
|
1681
|
-
if chat_model.model_type ==
|
1682
|
-
loaded_model = state.offline_chat_processor_config.loaded_model
|
1683
|
-
chat_response_generator = converse_offline(
|
1684
|
-
# Query
|
1685
|
-
user_query=query_to_run,
|
1686
|
-
# Context
|
1687
|
-
references=compiled_references,
|
1688
|
-
online_results=online_results,
|
1689
|
-
generated_files=raw_generated_files,
|
1690
|
-
generated_asset_results=generated_asset_results,
|
1691
|
-
location_data=location_data,
|
1692
|
-
user_name=user_name,
|
1693
|
-
query_files=query_files,
|
1694
|
-
chat_history=chat_history,
|
1695
|
-
# Model
|
1696
|
-
loaded_model=loaded_model,
|
1697
|
-
model_name=chat_model.name,
|
1698
|
-
max_prompt_size=chat_model.max_prompt_size,
|
1699
|
-
tokenizer_name=chat_model.tokenizer,
|
1700
|
-
agent=agent,
|
1701
|
-
tracer=tracer,
|
1702
|
-
)
|
1703
|
-
|
1704
|
-
elif chat_model.model_type == ChatModel.ModelType.OPENAI:
|
1631
|
+
if chat_model.model_type == ChatModel.ModelType.OPENAI:
|
1705
1632
|
openai_chat_config = chat_model.ai_model_api
|
1706
1633
|
api_key = openai_chat_config.api_key
|
1707
1634
|
chat_model_name = chat_model.name
|
@@ -2798,7 +2725,8 @@ def configure_content(
|
|
2798
2725
|
|
2799
2726
|
search_type = t.value if t else None
|
2800
2727
|
|
2801
|
-
|
2728
|
+
# Check if client sent any documents of the supported types
|
2729
|
+
no_client_sent_documents = all([not files.get(file_type) for file_type in files])
|
2802
2730
|
|
2803
2731
|
if files is None:
|
2804
2732
|
logger.warning(f"🚨 No files to process for {search_type} search.")
|
@@ -2872,7 +2800,8 @@ def configure_content(
|
|
2872
2800
|
success = False
|
2873
2801
|
|
2874
2802
|
try:
|
2875
|
-
if
|
2803
|
+
# Run server side indexing of user Github docs if no client sent documents
|
2804
|
+
if no_client_sent_documents:
|
2876
2805
|
github_config = GithubConfig.objects.filter(user=user).prefetch_related("githubrepoconfig").first()
|
2877
2806
|
if (
|
2878
2807
|
search_type == state.SearchType.All.value or search_type == state.SearchType.Github.value
|
@@ -2892,7 +2821,8 @@ def configure_content(
|
|
2892
2821
|
success = False
|
2893
2822
|
|
2894
2823
|
try:
|
2895
|
-
if
|
2824
|
+
# Run server side indexing of user Notion docs if no client sent documents
|
2825
|
+
if no_client_sent_documents:
|
2896
2826
|
# Initialize Notion Search
|
2897
2827
|
notion_config = NotionConfig.objects.filter(user=user).first()
|
2898
2828
|
if (
|
khoj/utils/cli.py
CHANGED
@@ -1,36 +1,19 @@
|
|
1
1
|
import argparse
|
2
2
|
import logging
|
3
|
-
import os
|
4
3
|
import pathlib
|
5
4
|
from importlib.metadata import version
|
6
5
|
|
7
6
|
logger = logging.getLogger(__name__)
|
8
7
|
|
9
|
-
from khoj.migrations.migrate_offline_chat_default_model import (
|
10
|
-
migrate_offline_chat_default_model,
|
11
|
-
)
|
12
|
-
from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema
|
13
|
-
from khoj.migrations.migrate_offline_model import migrate_offline_model
|
14
|
-
from khoj.migrations.migrate_processor_config_openai import (
|
15
|
-
migrate_processor_conversation_schema,
|
16
|
-
)
|
17
|
-
from khoj.migrations.migrate_server_pg import migrate_server_pg
|
18
|
-
from khoj.migrations.migrate_version import migrate_config_to_version
|
19
|
-
from khoj.utils.helpers import is_env_var_true, resolve_absolute_path
|
20
|
-
from khoj.utils.yaml import parse_config_from_file
|
21
|
-
|
22
8
|
|
23
9
|
def cli(args=None):
|
24
10
|
# Setup Argument Parser for the Commandline Interface
|
25
11
|
parser = argparse.ArgumentParser(description="Start Khoj; An AI personal assistant for your Digital Brain")
|
26
12
|
parser.add_argument(
|
27
|
-
"--
|
28
|
-
|
29
|
-
|
30
|
-
"
|
31
|
-
action="store_true",
|
32
|
-
default=False,
|
33
|
-
help="Regenerate model embeddings from source files. Default: false",
|
13
|
+
"--log-file",
|
14
|
+
default="~/.khoj/khoj.log",
|
15
|
+
type=pathlib.Path,
|
16
|
+
help="File path for server logs. Default: ~/.khoj/khoj.log",
|
34
17
|
)
|
35
18
|
parser.add_argument("--verbose", "-v", action="count", default=0, help="Show verbose conversion logs. Default: 0")
|
36
19
|
parser.add_argument("--host", type=str, default="127.0.0.1", help="Host address of the server. Default: 127.0.0.1")
|
@@ -43,14 +26,11 @@ def cli(args=None):
|
|
43
26
|
parser.add_argument("--sslcert", type=str, help="Path to SSL certificate file")
|
44
27
|
parser.add_argument("--sslkey", type=str, help="Path to SSL key file")
|
45
28
|
parser.add_argument("--version", "-V", action="store_true", help="Print the installed Khoj version and exit")
|
46
|
-
parser.add_argument(
|
47
|
-
"--disable-chat-on-gpu", action="store_true", default=False, help="Disable using GPU for the offline chat model"
|
48
|
-
)
|
49
29
|
parser.add_argument(
|
50
30
|
"--anonymous-mode",
|
51
31
|
action="store_true",
|
52
32
|
default=False,
|
53
|
-
help="Run Khoj in
|
33
|
+
help="Run Khoj in single user mode with no login required. Useful for personal use or testing.",
|
54
34
|
)
|
55
35
|
parser.add_argument(
|
56
36
|
"--non-interactive",
|
@@ -64,38 +44,10 @@ def cli(args=None):
|
|
64
44
|
if len(remaining_args) > 0:
|
65
45
|
logger.info(f"⚠️ Ignoring unknown commandline args: {remaining_args}")
|
66
46
|
|
67
|
-
# Set default values for arguments
|
68
|
-
args.chat_on_gpu = not args.disable_chat_on_gpu
|
69
|
-
|
70
47
|
args.version_no = version("khoj")
|
71
48
|
if args.version:
|
72
49
|
# Show version of khoj installed and exit
|
73
50
|
print(args.version_no)
|
74
51
|
exit(0)
|
75
52
|
|
76
|
-
# Normalize config_file path to absolute path
|
77
|
-
args.config_file = resolve_absolute_path(args.config_file)
|
78
|
-
|
79
|
-
if not args.config_file.exists():
|
80
|
-
args.config = None
|
81
|
-
else:
|
82
|
-
args = run_migrations(args)
|
83
|
-
args.config = parse_config_from_file(args.config_file)
|
84
|
-
if is_env_var_true("KHOJ_TELEMETRY_DISABLE"):
|
85
|
-
args.config.app.should_log_telemetry = False
|
86
|
-
|
87
|
-
return args
|
88
|
-
|
89
|
-
|
90
|
-
def run_migrations(args):
|
91
|
-
migrations = [
|
92
|
-
migrate_config_to_version,
|
93
|
-
migrate_processor_conversation_schema,
|
94
|
-
migrate_offline_model,
|
95
|
-
migrate_offline_chat_schema,
|
96
|
-
migrate_offline_chat_default_model,
|
97
|
-
migrate_server_pg,
|
98
|
-
]
|
99
|
-
for migration in migrations:
|
100
|
-
args = migration(args)
|
101
53
|
return args
|
khoj/utils/config.py
CHANGED
@@ -1,22 +1,7 @@
|
|
1
1
|
# System Packages
|
2
2
|
from __future__ import annotations # to avoid quoting type hints
|
3
3
|
|
4
|
-
import logging
|
5
|
-
from dataclasses import dataclass
|
6
4
|
from enum import Enum
|
7
|
-
from typing import TYPE_CHECKING, Any, List, Optional, Union
|
8
|
-
|
9
|
-
import torch
|
10
|
-
|
11
|
-
from khoj.processor.conversation.offline.utils import download_model
|
12
|
-
|
13
|
-
logger = logging.getLogger(__name__)
|
14
|
-
|
15
|
-
|
16
|
-
if TYPE_CHECKING:
|
17
|
-
from sentence_transformers import CrossEncoder
|
18
|
-
|
19
|
-
from khoj.utils.models import BaseEncoder
|
20
5
|
|
21
6
|
|
22
7
|
class SearchType(str, Enum):
|
@@ -29,53 +14,3 @@ class SearchType(str, Enum):
|
|
29
14
|
Notion = "notion"
|
30
15
|
Plaintext = "plaintext"
|
31
16
|
Docx = "docx"
|
32
|
-
|
33
|
-
|
34
|
-
class ProcessorType(str, Enum):
|
35
|
-
Conversation = "conversation"
|
36
|
-
|
37
|
-
|
38
|
-
@dataclass
|
39
|
-
class TextContent:
|
40
|
-
enabled: bool
|
41
|
-
|
42
|
-
|
43
|
-
@dataclass
|
44
|
-
class ImageContent:
|
45
|
-
image_names: List[str]
|
46
|
-
image_embeddings: torch.Tensor
|
47
|
-
image_metadata_embeddings: torch.Tensor
|
48
|
-
|
49
|
-
|
50
|
-
@dataclass
|
51
|
-
class TextSearchModel:
|
52
|
-
bi_encoder: BaseEncoder
|
53
|
-
cross_encoder: Optional[CrossEncoder] = None
|
54
|
-
top_k: Optional[int] = 15
|
55
|
-
|
56
|
-
|
57
|
-
@dataclass
|
58
|
-
class ImageSearchModel:
|
59
|
-
image_encoder: BaseEncoder
|
60
|
-
|
61
|
-
|
62
|
-
@dataclass
|
63
|
-
class SearchModels:
|
64
|
-
text_search: Optional[TextSearchModel] = None
|
65
|
-
|
66
|
-
|
67
|
-
@dataclass
|
68
|
-
class OfflineChatProcessorConfig:
|
69
|
-
loaded_model: Union[Any, None] = None
|
70
|
-
|
71
|
-
|
72
|
-
class OfflineChatProcessorModel:
|
73
|
-
def __init__(self, chat_model: str = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", max_tokens: int = None):
|
74
|
-
self.chat_model = chat_model
|
75
|
-
self.loaded_model = None
|
76
|
-
try:
|
77
|
-
self.loaded_model = download_model(self.chat_model, max_tokens=max_tokens)
|
78
|
-
except ValueError as e:
|
79
|
-
self.loaded_model = None
|
80
|
-
logger.error(f"Error while loading offline chat model: {e}", exc_info=True)
|
81
|
-
raise e
|
khoj/utils/constants.py
CHANGED
@@ -10,13 +10,6 @@ empty_escape_sequences = "\n|\r|\t| "
|
|
10
10
|
app_env_filepath = "~/.khoj/env"
|
11
11
|
telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry"
|
12
12
|
content_directory = "~/.khoj/content/"
|
13
|
-
default_offline_chat_models = [
|
14
|
-
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
|
15
|
-
"bartowski/Llama-3.2-3B-Instruct-GGUF",
|
16
|
-
"bartowski/gemma-2-9b-it-GGUF",
|
17
|
-
"bartowski/gemma-2-2b-it-GGUF",
|
18
|
-
"bartowski/Qwen2.5-14B-Instruct-GGUF",
|
19
|
-
]
|
20
13
|
default_openai_chat_models = ["gpt-4o-mini", "gpt-4.1", "o3", "o4-mini"]
|
21
14
|
default_gemini_chat_models = ["gemini-2.0-flash", "gemini-2.5-flash-preview-05-20", "gemini-2.5-pro-preview-06-05"]
|
22
15
|
default_anthropic_chat_models = ["claude-sonnet-4-0", "claude-3-5-haiku-latest"]
|
khoj/utils/helpers.py
CHANGED
@@ -47,7 +47,6 @@ if TYPE_CHECKING:
|
|
47
47
|
from sentence_transformers import CrossEncoder, SentenceTransformer
|
48
48
|
|
49
49
|
from khoj.utils.models import BaseEncoder
|
50
|
-
from khoj.utils.rawconfig import AppConfig
|
51
50
|
|
52
51
|
logger = logging.getLogger(__name__)
|
53
52
|
|
@@ -267,23 +266,16 @@ def get_server_id():
|
|
267
266
|
return server_id
|
268
267
|
|
269
268
|
|
270
|
-
def telemetry_disabled(app_config: AppConfig, telemetry_disable_env) -> bool:
|
271
|
-
if telemetry_disable_env is True:
|
272
|
-
return True
|
273
|
-
return not app_config or not app_config.should_log_telemetry
|
274
|
-
|
275
|
-
|
276
269
|
def log_telemetry(
|
277
270
|
telemetry_type: str,
|
278
271
|
api: str = None,
|
279
272
|
client: Optional[str] = None,
|
280
|
-
app_config: Optional[AppConfig] = None,
|
281
273
|
disable_telemetry_env: bool = False,
|
282
274
|
properties: dict = None,
|
283
275
|
):
|
284
276
|
"""Log basic app usage telemetry like client, os, api called"""
|
285
277
|
# Do not log usage telemetry, if telemetry is disabled via app config
|
286
|
-
if
|
278
|
+
if disable_telemetry_env:
|
287
279
|
return []
|
288
280
|
|
289
281
|
if properties.get("server_id") is None:
|