khoj 2.0.0b12.dev5__py3-none-any.whl → 2.0.0b13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/app/README.md +1 -1
- khoj/app/urls.py +1 -0
- khoj/configure.py +21 -54
- khoj/database/adapters/__init__.py +6 -15
- khoj/database/management/commands/delete_orphaned_fileobjects.py +0 -1
- khoj/database/migrations/0064_remove_conversation_temp_id_alter_conversation_id.py +1 -1
- khoj/database/migrations/0075_migrate_generated_assets_and_validate.py +1 -1
- khoj/database/migrations/0092_alter_chatmodel_model_type_alter_chatmodel_name_and_more.py +36 -0
- khoj/database/migrations/0093_remove_localorgconfig_user_and_more.py +36 -0
- khoj/database/models/__init__.py +10 -40
- khoj/database/tests.py +0 -2
- khoj/interface/compiled/404/index.html +2 -2
- khoj/interface/compiled/_next/static/chunks/{9245.a04e92d034540234.js → 1225.ecac11e7421504c4.js} +3 -3
- khoj/interface/compiled/_next/static/chunks/1320.ae930ad00affe685.js +5 -0
- khoj/interface/compiled/_next/static/chunks/{1327-1a9107b9a2a04a98.js → 1327-e254819a9172cfa7.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/1626.15a8acc0d6639ec6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{3489.c523fe96a2eee74f.js → 1940.d082758bd04e08ae.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{2327-ea623ca2d22f78e9.js → 2327-438aaec1657c5ada.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/2475.57a0d0fd93d07af0.js +93 -0
- khoj/interface/compiled/_next/static/chunks/2481.5ce6524ba0a73f90.js +55 -0
- khoj/interface/compiled/_next/static/chunks/297.4c4c823ff6e3255b.js +174 -0
- khoj/interface/compiled/_next/static/chunks/{5639-09e2009a2adedf8b.js → 3260-82d2521fab032ff1.js} +68 -23
- khoj/interface/compiled/_next/static/chunks/3353.1c6d553216a1acae.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3855.f7b8131f78af046e.js +1 -0
- khoj/interface/compiled/_next/static/chunks/3973.dc54a39586ab48be.js +1 -0
- khoj/interface/compiled/_next/static/chunks/4241.c1cd170f7f37ac59.js +24 -0
- khoj/interface/compiled/_next/static/chunks/{4327.8d2a1b8f1ea78208.js → 4327.f3704dc398c67113.js} +19 -19
- khoj/interface/compiled/_next/static/chunks/4505.f09454a346269c3f.js +117 -0
- khoj/interface/compiled/_next/static/chunks/4801.96a152d49742b644.js +1 -0
- khoj/interface/compiled/_next/static/chunks/5427-a95ec748e52abb75.js +1 -0
- khoj/interface/compiled/_next/static/chunks/549.2bd27f59a91a9668.js +148 -0
- khoj/interface/compiled/_next/static/chunks/5765.71b1e1207b76b03f.js +1 -0
- khoj/interface/compiled/_next/static/chunks/584.d7ce3505f169b706.js +1 -0
- khoj/interface/compiled/_next/static/chunks/6240.34f7c1fa692edd61.js +24 -0
- khoj/interface/compiled/_next/static/chunks/6d3fe5a5-f9f3c16e0bc0cdf9.js +10 -0
- khoj/interface/compiled/_next/static/chunks/{7127-0f4a2a77d97fb5fa.js → 7127-97b83757db125ba6.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/7200-93ab0072359b8028.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{2612.bcf5a623b3da209e.js → 7553.f5ad54b1f6e92c49.js} +2 -2
- khoj/interface/compiled/_next/static/chunks/7626-1b630f1654172341.js +1 -0
- khoj/interface/compiled/_next/static/chunks/764.dadd316e8e16d191.js +63 -0
- khoj/interface/compiled/_next/static/chunks/78.08169ab541abab4f.js +43 -0
- khoj/interface/compiled/_next/static/chunks/784.e03acf460df213d1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{9537-d9ab442ce15d1e20.js → 8072-e1440cb482a0940e.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/{3265.924139c4146ee344.js → 8086.8d39887215807fcd.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/8168.f074ab8c7c16d82d.js +59 -0
- khoj/interface/compiled/_next/static/chunks/{8694.2bd9c2f65d8c5847.js → 8223.1705878fa7a09292.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/8483.94f6c9e2bee86f50.js +215 -0
- khoj/interface/compiled/_next/static/chunks/{8888.ebe0e552b59e7fed.js → 8810.fc0e479de78c7c61.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/8828.bc74dc4ce94e78f6.js +1 -0
- khoj/interface/compiled/_next/static/chunks/{7303.d0612f812a967a08.js → 8909.14ac3f43d0070cf1.js} +5 -5
- khoj/interface/compiled/_next/static/chunks/90542734.b1a1629065ba199b.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9167.098534184f03fe92.js +56 -0
- khoj/interface/compiled/_next/static/chunks/{4980.63500d68b3bb1222.js → 9537.e934ce37bf314509.js} +5 -5
- khoj/interface/compiled/_next/static/chunks/9574.3fe8e26e95bf1c34.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9599.ec50b5296c27dae9.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9643.b34248df52ffc77c.js +262 -0
- khoj/interface/compiled/_next/static/chunks/9747.2fd9065b1435abb1.js +1 -0
- khoj/interface/compiled/_next/static/chunks/9922.98f2b2a9959b4ebe.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e49165209d2e406c.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/agents/page-e291b49977f43880.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/automations/page-198b26df6e09bbb0.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/chat/{page-8e1c4f2af3c9429e.js → page-dfcc1e8e2ad62873.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/{page-2b3056cba8aa96ce.js → page-1567cac7b79a7c59.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/settings/{page-8be3b35178abf2ec.js → page-6081362437c82470.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-6fb51c5c80f8ec67.js +1 -0
- khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-4a4b0c0f4749c2b2.js → page-e0dcb1762f8c8f88.js} +1 -1
- khoj/interface/compiled/_next/static/chunks/webpack-5393aad3d824e0cb.js +1 -0
- khoj/interface/compiled/agents/index.html +2 -2
- khoj/interface/compiled/agents/index.txt +3 -3
- khoj/interface/compiled/automations/index.html +2 -2
- khoj/interface/compiled/automations/index.txt +3 -3
- khoj/interface/compiled/chat/index.html +2 -2
- khoj/interface/compiled/chat/index.txt +3 -3
- khoj/interface/compiled/index.html +2 -2
- khoj/interface/compiled/index.txt +3 -3
- khoj/interface/compiled/search/index.html +2 -2
- khoj/interface/compiled/search/index.txt +3 -3
- khoj/interface/compiled/settings/index.html +2 -2
- khoj/interface/compiled/settings/index.txt +3 -3
- khoj/interface/compiled/share/chat/index.html +2 -2
- khoj/interface/compiled/share/chat/index.txt +3 -3
- khoj/main.py +7 -9
- khoj/manage.py +1 -0
- khoj/processor/content/github/github_to_entries.py +6 -7
- khoj/processor/content/images/image_to_entries.py +0 -1
- khoj/processor/content/markdown/markdown_to_entries.py +2 -3
- khoj/processor/content/notion/notion_to_entries.py +5 -6
- khoj/processor/content/org_mode/org_to_entries.py +4 -5
- khoj/processor/content/org_mode/orgnode.py +4 -4
- khoj/processor/content/plaintext/plaintext_to_entries.py +1 -2
- khoj/processor/content/text_to_entries.py +1 -3
- khoj/processor/conversation/google/utils.py +3 -3
- khoj/processor/conversation/openai/gpt.py +65 -28
- khoj/processor/conversation/openai/utils.py +359 -28
- khoj/processor/conversation/prompts.py +16 -41
- khoj/processor/conversation/utils.py +29 -39
- khoj/processor/embeddings.py +0 -2
- khoj/processor/image/generate.py +3 -3
- khoj/processor/operator/__init__.py +2 -3
- khoj/processor/operator/grounding_agent.py +15 -2
- khoj/processor/operator/grounding_agent_uitars.py +34 -23
- khoj/processor/operator/operator_agent_anthropic.py +29 -4
- khoj/processor/operator/operator_agent_base.py +1 -1
- khoj/processor/operator/operator_agent_binary.py +4 -4
- khoj/processor/operator/operator_agent_openai.py +21 -6
- khoj/processor/operator/operator_environment_browser.py +1 -1
- khoj/processor/operator/operator_environment_computer.py +1 -1
- khoj/processor/speech/text_to_speech.py +0 -1
- khoj/processor/tools/online_search.py +1 -1
- khoj/processor/tools/run_code.py +1 -1
- khoj/routers/api.py +2 -15
- khoj/routers/api_agents.py +1 -2
- khoj/routers/api_automation.py +1 -1
- khoj/routers/api_chat.py +10 -16
- khoj/routers/api_content.py +3 -111
- khoj/routers/api_model.py +0 -1
- khoj/routers/api_subscription.py +1 -1
- khoj/routers/email.py +4 -4
- khoj/routers/helpers.py +44 -103
- khoj/routers/research.py +8 -8
- khoj/search_filter/base_filter.py +2 -4
- khoj/search_type/text_search.py +1 -2
- khoj/utils/cli.py +5 -53
- khoj/utils/config.py +0 -65
- khoj/utils/constants.py +6 -7
- khoj/utils/helpers.py +10 -18
- khoj/utils/initialization.py +7 -48
- khoj/utils/models.py +2 -4
- khoj/utils/rawconfig.py +1 -69
- khoj/utils/state.py +2 -8
- khoj/utils/yaml.py +0 -39
- {khoj-2.0.0b12.dev5.dist-info → khoj-2.0.0b13.dist-info}/METADATA +3 -3
- {khoj-2.0.0b12.dev5.dist-info → khoj-2.0.0b13.dist-info}/RECORD +139 -148
- khoj/interface/compiled/_next/static/chunks/1191.b547ec13349b4aed.js +0 -1
- khoj/interface/compiled/_next/static/chunks/1588.f0558a0bdffc4761.js +0 -117
- khoj/interface/compiled/_next/static/chunks/1918.925cb4a35518d258.js +0 -43
- khoj/interface/compiled/_next/static/chunks/2849.dc00ae5ba7219cfc.js +0 -1
- khoj/interface/compiled/_next/static/chunks/303.fe76de943e930fbd.js +0 -1
- khoj/interface/compiled/_next/static/chunks/4533.586e74b45a2bde25.js +0 -55
- khoj/interface/compiled/_next/static/chunks/4551.82ce1476b5516bc2.js +0 -5
- khoj/interface/compiled/_next/static/chunks/4748.0edd37cba3ea2809.js +0 -59
- khoj/interface/compiled/_next/static/chunks/5210.cd35a1c1ec594a20.js +0 -93
- khoj/interface/compiled/_next/static/chunks/5329.f8b3c5b3d16159cd.js +0 -1
- khoj/interface/compiled/_next/static/chunks/5427-13d6ffd380fdfab7.js +0 -1
- khoj/interface/compiled/_next/static/chunks/558-c14e76cff03f6a60.js +0 -1
- khoj/interface/compiled/_next/static/chunks/5830.8876eccb82da9b7d.js +0 -262
- khoj/interface/compiled/_next/static/chunks/6230.88a71d8145347b3f.js +0 -1
- khoj/interface/compiled/_next/static/chunks/7161.77e0530a40ad5ca8.js +0 -1
- khoj/interface/compiled/_next/static/chunks/7200-ac3b2e37ff30e126.js +0 -1
- khoj/interface/compiled/_next/static/chunks/7505.c31027a3695bdebb.js +0 -148
- khoj/interface/compiled/_next/static/chunks/7760.35649cc21d9585bd.js +0 -56
- khoj/interface/compiled/_next/static/chunks/83.48e2db193a940052.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8427.844694e06133fb51.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8665.4db7e6b2e8933497.js +0 -174
- khoj/interface/compiled/_next/static/chunks/872.caf84cc1a39ae59f.js +0 -1
- khoj/interface/compiled/_next/static/chunks/8890.6e8a59e4de6978bc.js +0 -215
- khoj/interface/compiled/_next/static/chunks/8950.5f2272e0ac923f9e.js +0 -1
- khoj/interface/compiled/_next/static/chunks/90542734.2c21f16f18b22411.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9202.c703864fcedc8d1f.js +0 -63
- khoj/interface/compiled/_next/static/chunks/9320.6aca4885d541aa44.js +0 -24
- khoj/interface/compiled/_next/static/chunks/9535.f78cd92d03331e55.js +0 -1
- khoj/interface/compiled/_next/static/chunks/9968.b111fc002796da81.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/layout-e00fb81dca656a10.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/agents/page-9a4610474cd59a71.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/automations/page-f7bb9d777b7745d4.js +0 -1
- khoj/interface/compiled/_next/static/chunks/app/share/chat/layout-e8e5db7830bf3f47.js +0 -1
- khoj/interface/compiled/_next/static/chunks/f3e3247b-1758d4651e4457c2.js +0 -10
- khoj/interface/compiled/_next/static/chunks/webpack-338a5000c912cc94.js +0 -1
- khoj/migrations/__init__.py +0 -0
- khoj/migrations/migrate_offline_chat_default_model.py +0 -69
- khoj/migrations/migrate_offline_chat_default_model_2.py +0 -71
- khoj/migrations/migrate_offline_chat_schema.py +0 -83
- khoj/migrations/migrate_offline_model.py +0 -29
- khoj/migrations/migrate_processor_config_openai.py +0 -67
- khoj/migrations/migrate_server_pg.py +0 -132
- khoj/migrations/migrate_version.py +0 -17
- khoj/processor/conversation/offline/__init__.py +0 -0
- khoj/processor/conversation/offline/chat_model.py +0 -224
- khoj/processor/conversation/offline/utils.py +0 -80
- khoj/processor/conversation/offline/whisper.py +0 -15
- khoj/utils/fs_syncer.py +0 -252
- /khoj/interface/compiled/_next/static/{7GoMcE8WpP9fbfYZXv4Nv → RYbQvo3AvgOR0bEVVfxF4}/_buildManifest.js +0 -0
- /khoj/interface/compiled/_next/static/{7GoMcE8WpP9fbfYZXv4Nv → RYbQvo3AvgOR0bEVVfxF4}/_ssgManifest.js +0 -0
- /khoj/interface/compiled/_next/static/chunks/app/search/{page-4885df3cd175c957.js → page-3639e50ec3e9acfd.js} +0 -0
- {khoj-2.0.0b12.dev5.dist-info → khoj-2.0.0b13.dist-info}/WHEEL +0 -0
- {khoj-2.0.0b12.dev5.dist-info → khoj-2.0.0b13.dist-info}/entry_points.txt +0 -0
- {khoj-2.0.0b12.dev5.dist-info → khoj-2.0.0b13.dist-info}/licenses/LICENSE +0 -0
khoj/utils/helpers.py
CHANGED
@@ -47,7 +47,6 @@ if TYPE_CHECKING:
|
|
47
47
|
from sentence_transformers import CrossEncoder, SentenceTransformer
|
48
48
|
|
49
49
|
from khoj.utils.models import BaseEncoder
|
50
|
-
from khoj.utils.rawconfig import AppConfig
|
51
50
|
|
52
51
|
logger = logging.getLogger(__name__)
|
53
52
|
|
@@ -78,7 +77,7 @@ class AsyncIteratorWrapper:
|
|
78
77
|
|
79
78
|
|
80
79
|
def is_none_or_empty(item):
|
81
|
-
return item
|
80
|
+
return item is None or (hasattr(item, "__iter__") and len(item) == 0) or item == ""
|
82
81
|
|
83
82
|
|
84
83
|
def to_snake_case_from_dash(item: str):
|
@@ -98,7 +97,7 @@ def get_from_dict(dictionary, *args):
|
|
98
97
|
Returns: dictionary[args[0]][args[1]]... or None if any keys missing"""
|
99
98
|
current = dictionary
|
100
99
|
for arg in args:
|
101
|
-
if not hasattr(current, "__iter__") or not
|
100
|
+
if not hasattr(current, "__iter__") or arg not in current:
|
102
101
|
return None
|
103
102
|
current = current[arg]
|
104
103
|
return current
|
@@ -267,23 +266,16 @@ def get_server_id():
|
|
267
266
|
return server_id
|
268
267
|
|
269
268
|
|
270
|
-
def telemetry_disabled(app_config: AppConfig, telemetry_disable_env) -> bool:
|
271
|
-
if telemetry_disable_env is True:
|
272
|
-
return True
|
273
|
-
return not app_config or not app_config.should_log_telemetry
|
274
|
-
|
275
|
-
|
276
269
|
def log_telemetry(
|
277
270
|
telemetry_type: str,
|
278
271
|
api: str = None,
|
279
272
|
client: Optional[str] = None,
|
280
|
-
app_config: Optional[AppConfig] = None,
|
281
273
|
disable_telemetry_env: bool = False,
|
282
274
|
properties: dict = None,
|
283
275
|
):
|
284
276
|
"""Log basic app usage telemetry like client, os, api called"""
|
285
277
|
# Do not log usage telemetry, if telemetry is disabled via app config
|
286
|
-
if
|
278
|
+
if disable_telemetry_env:
|
287
279
|
return []
|
288
280
|
|
289
281
|
if properties.get("server_id") is None:
|
@@ -435,7 +427,7 @@ class ConversationCommand(str, Enum):
|
|
435
427
|
SemanticSearchFiles = "semantic_search_files"
|
436
428
|
SearchWeb = "search_web"
|
437
429
|
ReadWebpage = "read_webpage"
|
438
|
-
|
430
|
+
PythonCoder = "run_code"
|
439
431
|
OperateComputer = "operate_computer"
|
440
432
|
|
441
433
|
|
@@ -511,15 +503,15 @@ tools_for_research_llm = {
|
|
511
503
|
"required": ["urls", "query"],
|
512
504
|
},
|
513
505
|
),
|
514
|
-
ConversationCommand.
|
515
|
-
name="
|
516
|
-
description=e2b_tool_description if is_e2b_code_sandbox_enabled() else terrarium_tool_description,
|
506
|
+
ConversationCommand.PythonCoder: ToolDefinition(
|
507
|
+
name="python_coder",
|
508
|
+
description="Ask them " + e2b_tool_description if is_e2b_code_sandbox_enabled() else terrarium_tool_description,
|
517
509
|
schema={
|
518
510
|
"type": "object",
|
519
511
|
"properties": {
|
520
512
|
"query": {
|
521
513
|
"type": "string",
|
522
|
-
"description": "Detailed query and all input data required to generate, execute code in the sandbox.",
|
514
|
+
"description": "Detailed query and all input data required for the Python Coder to generate, execute code in the sandbox.",
|
523
515
|
},
|
524
516
|
},
|
525
517
|
"required": ["query"],
|
@@ -759,7 +751,7 @@ def is_valid_url(url: str) -> bool:
|
|
759
751
|
try:
|
760
752
|
result = urlparse(url.strip())
|
761
753
|
return all([result.scheme, result.netloc])
|
762
|
-
except:
|
754
|
+
except Exception:
|
763
755
|
return False
|
764
756
|
|
765
757
|
|
@@ -767,7 +759,7 @@ def is_internet_connected():
|
|
767
759
|
try:
|
768
760
|
response = requests.head("https://www.google.com")
|
769
761
|
return response.status_code == 200
|
770
|
-
except:
|
762
|
+
except Exception:
|
771
763
|
return False
|
772
764
|
|
773
765
|
|
khoj/utils/initialization.py
CHANGED
@@ -16,7 +16,6 @@ from khoj.processor.conversation.utils import model_to_prompt_size, model_to_tok
|
|
16
16
|
from khoj.utils.constants import (
|
17
17
|
default_anthropic_chat_models,
|
18
18
|
default_gemini_chat_models,
|
19
|
-
default_offline_chat_models,
|
20
19
|
default_openai_chat_models,
|
21
20
|
)
|
22
21
|
|
@@ -61,9 +60,7 @@ def initialization(interactive: bool = True):
|
|
61
60
|
]
|
62
61
|
default_chat_models = known_available_models + other_available_models
|
63
62
|
except Exception as e:
|
64
|
-
logger.warning(
|
65
|
-
f"⚠️ Failed to fetch {provider} chat models. Fallback to default models. Error: {str(e)}"
|
66
|
-
)
|
63
|
+
logger.warning(f"⚠️ Failed to fetch {provider} chat models. Fallback to default models. Error: {str(e)}")
|
67
64
|
|
68
65
|
# Set up OpenAI's online chat models
|
69
66
|
openai_configured, openai_provider = _setup_chat_model_provider(
|
@@ -72,7 +69,6 @@ def initialization(interactive: bool = True):
|
|
72
69
|
default_api_key=openai_api_key,
|
73
70
|
api_base_url=openai_base_url,
|
74
71
|
vision_enabled=True,
|
75
|
-
is_offline=False,
|
76
72
|
interactive=interactive,
|
77
73
|
provider_name=provider,
|
78
74
|
)
|
@@ -118,7 +114,6 @@ def initialization(interactive: bool = True):
|
|
118
114
|
default_gemini_chat_models,
|
119
115
|
default_api_key=os.getenv("GEMINI_API_KEY"),
|
120
116
|
vision_enabled=True,
|
121
|
-
is_offline=False,
|
122
117
|
interactive=interactive,
|
123
118
|
provider_name="Google Gemini",
|
124
119
|
)
|
@@ -145,40 +140,11 @@ def initialization(interactive: bool = True):
|
|
145
140
|
default_anthropic_chat_models,
|
146
141
|
default_api_key=os.getenv("ANTHROPIC_API_KEY"),
|
147
142
|
vision_enabled=True,
|
148
|
-
is_offline=False,
|
149
|
-
interactive=interactive,
|
150
|
-
)
|
151
|
-
|
152
|
-
# Set up offline chat models
|
153
|
-
_setup_chat_model_provider(
|
154
|
-
ChatModel.ModelType.OFFLINE,
|
155
|
-
default_offline_chat_models,
|
156
|
-
default_api_key=None,
|
157
|
-
vision_enabled=False,
|
158
|
-
is_offline=True,
|
159
143
|
interactive=interactive,
|
160
144
|
)
|
161
145
|
|
162
146
|
logger.info("🗣️ Chat model configuration complete")
|
163
147
|
|
164
|
-
# Set up offline speech to text model
|
165
|
-
use_offline_speech2text_model = "n" if not interactive else input("Use offline speech to text model? (y/n): ")
|
166
|
-
if use_offline_speech2text_model == "y":
|
167
|
-
logger.info("🗣️ Setting up offline speech to text model")
|
168
|
-
# Delete any existing speech to text model options. There can only be one.
|
169
|
-
SpeechToTextModelOptions.objects.all().delete()
|
170
|
-
|
171
|
-
default_offline_speech2text_model = "base"
|
172
|
-
offline_speech2text_model = input(
|
173
|
-
f"Enter the Whisper model to use Offline (default: {default_offline_speech2text_model}): "
|
174
|
-
)
|
175
|
-
offline_speech2text_model = offline_speech2text_model or default_offline_speech2text_model
|
176
|
-
SpeechToTextModelOptions.objects.create(
|
177
|
-
model_name=offline_speech2text_model, model_type=SpeechToTextModelOptions.ModelType.OFFLINE
|
178
|
-
)
|
179
|
-
|
180
|
-
logger.info(f"🗣️ Offline speech to text model configured to {offline_speech2text_model}")
|
181
|
-
|
182
148
|
def _setup_chat_model_provider(
|
183
149
|
model_type: ChatModel.ModelType,
|
184
150
|
default_chat_models: list,
|
@@ -186,7 +152,6 @@ def initialization(interactive: bool = True):
|
|
186
152
|
interactive: bool,
|
187
153
|
api_base_url: str = None,
|
188
154
|
vision_enabled: bool = False,
|
189
|
-
is_offline: bool = False,
|
190
155
|
provider_name: str = None,
|
191
156
|
) -> Tuple[bool, AiModelApi]:
|
192
157
|
supported_vision_models = (
|
@@ -195,11 +160,6 @@ def initialization(interactive: bool = True):
|
|
195
160
|
provider_name = provider_name or model_type.name.capitalize()
|
196
161
|
|
197
162
|
default_use_model = default_api_key is not None
|
198
|
-
# If not in interactive mode & in the offline setting, it's most likely that we're running in a containerized environment.
|
199
|
-
# This usually means there's not enough RAM to load offline models directly within the application.
|
200
|
-
# In such cases, we default to not using the model -- it's recommended to use another service like Ollama to host the model locally in that case.
|
201
|
-
if is_offline:
|
202
|
-
default_use_model = False
|
203
163
|
|
204
164
|
use_model_provider = (
|
205
165
|
default_use_model if not interactive else input(f"Add {provider_name} chat models? (y/n): ") == "y"
|
@@ -211,13 +171,12 @@ def initialization(interactive: bool = True):
|
|
211
171
|
logger.info(f"️💬 Setting up your {provider_name} chat configuration")
|
212
172
|
|
213
173
|
ai_model_api = None
|
214
|
-
if
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
ai_model_api = AiModelApi.objects.create(api_key=api_key, name=provider_name, api_base_url=api_base_url)
|
174
|
+
if interactive:
|
175
|
+
user_api_key = input(f"Enter your {provider_name} API key (default: {default_api_key}): ")
|
176
|
+
api_key = user_api_key if user_api_key != "" else default_api_key
|
177
|
+
else:
|
178
|
+
api_key = default_api_key
|
179
|
+
ai_model_api = AiModelApi.objects.create(api_key=api_key, name=provider_name, api_base_url=api_base_url)
|
221
180
|
|
222
181
|
if interactive:
|
223
182
|
user_chat_models = input(
|
khoj/utils/models.py
CHANGED
@@ -8,12 +8,10 @@ from tqdm import trange
|
|
8
8
|
|
9
9
|
class BaseEncoder(ABC):
|
10
10
|
@abstractmethod
|
11
|
-
def __init__(self, model_name: str, device: torch.device = None, **kwargs):
|
12
|
-
...
|
11
|
+
def __init__(self, model_name: str, device: torch.device = None, **kwargs): ...
|
13
12
|
|
14
13
|
@abstractmethod
|
15
|
-
def encode(self, entries: List[str], device: torch.device = None, **kwargs) -> torch.Tensor:
|
16
|
-
...
|
14
|
+
def encode(self, entries: List[str], device: torch.device = None, **kwargs) -> torch.Tensor: ...
|
17
15
|
|
18
16
|
|
19
17
|
class OpenAI(BaseEncoder):
|
khoj/utils/rawconfig.py
CHANGED
@@ -1,8 +1,7 @@
|
|
1
1
|
# System Packages
|
2
2
|
import json
|
3
3
|
import uuid
|
4
|
-
from
|
5
|
-
from typing import Dict, List, Optional
|
4
|
+
from typing import List, Optional
|
6
5
|
|
7
6
|
from pydantic import BaseModel
|
8
7
|
|
@@ -48,17 +47,6 @@ class FilesFilterRequest(BaseModel):
|
|
48
47
|
conversation_id: str
|
49
48
|
|
50
49
|
|
51
|
-
class TextConfigBase(ConfigBase):
|
52
|
-
compressed_jsonl: Path
|
53
|
-
embeddings_file: Path
|
54
|
-
|
55
|
-
|
56
|
-
class TextContentConfig(ConfigBase):
|
57
|
-
input_files: Optional[List[Path]] = None
|
58
|
-
input_filter: Optional[List[str]] = None
|
59
|
-
index_heading_entries: Optional[bool] = False
|
60
|
-
|
61
|
-
|
62
50
|
class GithubRepoConfig(ConfigBase):
|
63
51
|
name: str
|
64
52
|
owner: str
|
@@ -74,62 +62,6 @@ class NotionContentConfig(ConfigBase):
|
|
74
62
|
token: str
|
75
63
|
|
76
64
|
|
77
|
-
class ContentConfig(ConfigBase):
|
78
|
-
org: Optional[TextContentConfig] = None
|
79
|
-
markdown: Optional[TextContentConfig] = None
|
80
|
-
pdf: Optional[TextContentConfig] = None
|
81
|
-
plaintext: Optional[TextContentConfig] = None
|
82
|
-
github: Optional[GithubContentConfig] = None
|
83
|
-
notion: Optional[NotionContentConfig] = None
|
84
|
-
image: Optional[TextContentConfig] = None
|
85
|
-
docx: Optional[TextContentConfig] = None
|
86
|
-
|
87
|
-
|
88
|
-
class ImageSearchConfig(ConfigBase):
|
89
|
-
encoder: str
|
90
|
-
encoder_type: Optional[str] = None
|
91
|
-
model_directory: Optional[Path] = None
|
92
|
-
|
93
|
-
class Config:
|
94
|
-
protected_namespaces = ()
|
95
|
-
|
96
|
-
|
97
|
-
class SearchConfig(ConfigBase):
|
98
|
-
image: Optional[ImageSearchConfig] = None
|
99
|
-
|
100
|
-
|
101
|
-
class OpenAIProcessorConfig(ConfigBase):
|
102
|
-
api_key: str
|
103
|
-
chat_model: Optional[str] = "gpt-4o-mini"
|
104
|
-
|
105
|
-
|
106
|
-
class OfflineChatProcessorConfig(ConfigBase):
|
107
|
-
chat_model: Optional[str] = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
|
108
|
-
|
109
|
-
|
110
|
-
class ConversationProcessorConfig(ConfigBase):
|
111
|
-
openai: Optional[OpenAIProcessorConfig] = None
|
112
|
-
offline_chat: Optional[OfflineChatProcessorConfig] = None
|
113
|
-
max_prompt_size: Optional[int] = None
|
114
|
-
tokenizer: Optional[str] = None
|
115
|
-
|
116
|
-
|
117
|
-
class ProcessorConfig(ConfigBase):
|
118
|
-
conversation: Optional[ConversationProcessorConfig] = None
|
119
|
-
|
120
|
-
|
121
|
-
class AppConfig(ConfigBase):
|
122
|
-
should_log_telemetry: bool = True
|
123
|
-
|
124
|
-
|
125
|
-
class FullConfig(ConfigBase):
|
126
|
-
content_type: Optional[ContentConfig] = None
|
127
|
-
search_type: Optional[SearchConfig] = None
|
128
|
-
processor: Optional[ProcessorConfig] = None
|
129
|
-
app: Optional[AppConfig] = AppConfig()
|
130
|
-
version: Optional[str] = None
|
131
|
-
|
132
|
-
|
133
65
|
class SearchResponse(ConfigBase):
|
134
66
|
entry: str
|
135
67
|
score: float
|
khoj/utils/state.py
CHANGED
@@ -2,7 +2,7 @@ import os
|
|
2
2
|
import threading
|
3
3
|
from collections import defaultdict
|
4
4
|
from pathlib import Path
|
5
|
-
from typing import
|
5
|
+
from typing import Dict, List
|
6
6
|
|
7
7
|
from apscheduler.schedulers.background import BackgroundScheduler
|
8
8
|
from openai import OpenAI
|
@@ -12,19 +12,14 @@ from whisper import Whisper
|
|
12
12
|
from khoj.database.models import ProcessLock
|
13
13
|
from khoj.processor.embeddings import CrossEncoderModel, EmbeddingsModel
|
14
14
|
from khoj.utils import config as utils_config
|
15
|
-
from khoj.utils.config import OfflineChatProcessorModel, SearchModels
|
16
15
|
from khoj.utils.helpers import LRU, get_device, is_env_var_true
|
17
|
-
from khoj.utils.rawconfig import FullConfig
|
18
16
|
|
19
17
|
# Application Global State
|
20
|
-
config = FullConfig()
|
21
|
-
search_models = SearchModels()
|
22
18
|
embeddings_model: Dict[str, EmbeddingsModel] = None
|
23
19
|
cross_encoder_model: Dict[str, CrossEncoderModel] = None
|
24
20
|
openai_client: OpenAI = None
|
25
|
-
offline_chat_processor_config: OfflineChatProcessorModel = None
|
26
21
|
whisper_model: Whisper = None
|
27
|
-
|
22
|
+
log_file: Path = None
|
28
23
|
verbose: int = 0
|
29
24
|
host: str = None
|
30
25
|
port: int = None
|
@@ -39,7 +34,6 @@ telemetry: List[Dict[str, str]] = []
|
|
39
34
|
telemetry_disabled: bool = is_env_var_true("KHOJ_TELEMETRY_DISABLE")
|
40
35
|
khoj_version: str = None
|
41
36
|
device = get_device()
|
42
|
-
chat_on_gpu: bool = True
|
43
37
|
anonymous_mode: bool = False
|
44
38
|
pretrained_tokenizers: Dict[str, PreTrainedTokenizer | PreTrainedTokenizerFast] = dict()
|
45
39
|
billing_enabled: bool = (
|
khoj/utils/yaml.py
CHANGED
@@ -1,47 +1,8 @@
|
|
1
|
-
from pathlib import Path
|
2
|
-
|
3
1
|
import yaml
|
4
2
|
|
5
|
-
from khoj.utils import state
|
6
|
-
from khoj.utils.rawconfig import FullConfig
|
7
|
-
|
8
3
|
# Do not emit tags when dumping to YAML
|
9
4
|
yaml.emitter.Emitter.process_tag = lambda self, *args, **kwargs: None # type: ignore[assignment]
|
10
5
|
|
11
6
|
|
12
|
-
def save_config_to_file_updated_state():
|
13
|
-
with open(state.config_file, "w") as outfile:
|
14
|
-
yaml.dump(yaml.safe_load(state.config.json(by_alias=True)), outfile)
|
15
|
-
outfile.close()
|
16
|
-
return state.config
|
17
|
-
|
18
|
-
|
19
|
-
def save_config_to_file(yaml_config: dict, yaml_config_file: Path):
|
20
|
-
"Write config to YML file"
|
21
|
-
# Create output directory, if it doesn't exist
|
22
|
-
yaml_config_file.parent.mkdir(parents=True, exist_ok=True)
|
23
|
-
|
24
|
-
with open(yaml_config_file, "w", encoding="utf-8") as config_file:
|
25
|
-
yaml.safe_dump(yaml_config, config_file, allow_unicode=True)
|
26
|
-
|
27
|
-
|
28
|
-
def load_config_from_file(yaml_config_file: Path) -> dict:
|
29
|
-
"Read config from YML file"
|
30
|
-
config_from_file = None
|
31
|
-
with open(yaml_config_file, "r", encoding="utf-8") as config_file:
|
32
|
-
config_from_file = yaml.safe_load(config_file)
|
33
|
-
return config_from_file
|
34
|
-
|
35
|
-
|
36
|
-
def parse_config_from_string(yaml_config: dict) -> FullConfig:
|
37
|
-
"Parse and validate config in YML string"
|
38
|
-
return FullConfig.model_validate(yaml_config)
|
39
|
-
|
40
|
-
|
41
|
-
def parse_config_from_file(yaml_config_file):
|
42
|
-
"Parse and validate config in YML file"
|
43
|
-
return parse_config_from_string(load_config_from_file(yaml_config_file))
|
44
|
-
|
45
|
-
|
46
7
|
def yaml_dump(data):
|
47
8
|
return yaml.dump(data, allow_unicode=True, sort_keys=False, default_flow_style=False)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: khoj
|
3
|
-
Version: 2.0.
|
3
|
+
Version: 2.0.0b13
|
4
4
|
Summary: Your Second Brain
|
5
5
|
Project-URL: Homepage, https://khoj.dev
|
6
6
|
Project-URL: Documentation, https://docs.khoj.dev
|
@@ -27,6 +27,7 @@ Requires-Dist: anyio~=4.8.0
|
|
27
27
|
Requires-Dist: apscheduler~=3.10.0
|
28
28
|
Requires-Dist: authlib==1.2.1
|
29
29
|
Requires-Dist: beautifulsoup4~=4.12.3
|
30
|
+
Requires-Dist: click<8.2.2
|
30
31
|
Requires-Dist: cron-descriptor==1.4.3
|
31
32
|
Requires-Dist: dateparser>=1.1.1
|
32
33
|
Requires-Dist: defusedxml==0.7.1
|
@@ -47,7 +48,6 @@ Requires-Dist: itsdangerous==2.1.2
|
|
47
48
|
Requires-Dist: jinja2==3.1.6
|
48
49
|
Requires-Dist: langchain-community==0.3.3
|
49
50
|
Requires-Dist: langchain-text-splitters==0.3.1
|
50
|
-
Requires-Dist: llama-cpp-python==0.2.88
|
51
51
|
Requires-Dist: lxml==4.9.3
|
52
52
|
Requires-Dist: magika~=0.5.1
|
53
53
|
Requires-Dist: markdown-it-py~=3.0.0
|
@@ -79,7 +79,6 @@ Requires-Dist: tzdata==2023.3
|
|
79
79
|
Requires-Dist: uvicorn==0.30.6
|
80
80
|
Requires-Dist: websockets==13.0
|
81
81
|
Provides-Extra: dev
|
82
|
-
Requires-Dist: black>=23.1.0; extra == 'dev'
|
83
82
|
Requires-Dist: boto3>=1.34.57; extra == 'dev'
|
84
83
|
Requires-Dist: datasets; extra == 'dev'
|
85
84
|
Requires-Dist: factory-boy>=3.2.1; extra == 'dev'
|
@@ -94,6 +93,7 @@ Requires-Dist: pytest-asyncio==0.21.1; extra == 'dev'
|
|
94
93
|
Requires-Dist: pytest-django==4.5.2; extra == 'dev'
|
95
94
|
Requires-Dist: pytest-xdist[psutil]; extra == 'dev'
|
96
95
|
Requires-Dist: pytest>=7.1.2; extra == 'dev'
|
96
|
+
Requires-Dist: ruff>=0.12.0; extra == 'dev'
|
97
97
|
Requires-Dist: stripe==7.3.0; extra == 'dev'
|
98
98
|
Requires-Dist: twilio==8.11; extra == 'dev'
|
99
99
|
Provides-Extra: local
|