khoj 1.23.2__py3-none-any.whl → 1.23.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. khoj/interface/compiled/404/index.html +1 -1
  2. khoj/interface/compiled/_next/static/chunks/app/agents/{page-6ade083d5e27a023.js → page-922694b75f1fb67b.js} +1 -1
  3. khoj/interface/compiled/_next/static/chunks/app/automations/{page-6ea3381528603372.js → page-3f4b6ff0261e19b7.js} +1 -1
  4. khoj/interface/compiled/_next/static/chunks/app/chat/{page-4534a3104d24ddc9.js → page-6fac068cc1cca546.js} +1 -1
  5. khoj/interface/compiled/_next/static/chunks/app/factchecker/{page-04a19ab1a988976f.js → page-828cf3c5b8e3af79.js} +1 -1
  6. khoj/interface/compiled/_next/static/chunks/app/{page-8465c90401833c39.js → page-b9e0500234c59a3e.js} +1 -1
  7. khoj/interface/compiled/_next/static/chunks/app/search/{page-fa15807b1ad7e30b.js → page-dcd385f03255ef36.js} +1 -1
  8. khoj/interface/compiled/_next/static/chunks/app/settings/{page-1a2acc46cdabaf4a.js → page-ddcd51147d18c694.js} +1 -1
  9. khoj/interface/compiled/_next/static/chunks/app/share/chat/{page-e20f54450d3ce6c0.js → page-a84001b4724b5463.js} +1 -1
  10. khoj/interface/compiled/_next/static/chunks/{webpack-14f504dff756f79c.js → webpack-56df4667546b1c60.js} +1 -1
  11. khoj/interface/compiled/_next/static/css/4cae6c0e5c72fb2d.css +1 -0
  12. khoj/interface/compiled/_next/static/css/92c48eece0b102b9.css +1 -0
  13. khoj/interface/compiled/agents/index.html +1 -1
  14. khoj/interface/compiled/agents/index.txt +2 -2
  15. khoj/interface/compiled/automations/index.html +1 -1
  16. khoj/interface/compiled/automations/index.txt +2 -2
  17. khoj/interface/compiled/chat/index.html +1 -1
  18. khoj/interface/compiled/chat/index.txt +2 -2
  19. khoj/interface/compiled/factchecker/index.html +1 -1
  20. khoj/interface/compiled/factchecker/index.txt +2 -2
  21. khoj/interface/compiled/index.html +1 -1
  22. khoj/interface/compiled/index.txt +2 -2
  23. khoj/interface/compiled/search/index.html +1 -1
  24. khoj/interface/compiled/search/index.txt +2 -2
  25. khoj/interface/compiled/settings/index.html +1 -1
  26. khoj/interface/compiled/settings/index.txt +2 -2
  27. khoj/interface/compiled/share/chat/index.html +1 -1
  28. khoj/interface/compiled/share/chat/index.txt +2 -2
  29. khoj/interface/email/magic_link.html +1 -1
  30. khoj/interface/email/task.html +31 -34
  31. khoj/interface/email/welcome.html +82 -53
  32. khoj/main.py +1 -1
  33. khoj/processor/content/images/image_to_entries.py +6 -4
  34. khoj/processor/conversation/utils.py +12 -7
  35. khoj/routers/helpers.py +4 -0
  36. khoj/utils/cli.py +6 -0
  37. khoj/utils/constants.py +9 -2
  38. khoj/utils/initialization.py +158 -71
  39. {khoj-1.23.2.dist-info → khoj-1.23.3.dist-info}/METADATA +2 -2
  40. {khoj-1.23.2.dist-info → khoj-1.23.3.dist-info}/RECORD +48 -48
  41. khoj/interface/compiled/_next/static/css/43939edc2f9b2043.css +0 -1
  42. khoj/interface/compiled/_next/static/css/592ca99f5122e75a.css +0 -1
  43. /khoj/interface/compiled/_next/static/{9uwb5t7DoQqI3yWXNbS71 → H8WZzL_OHSsjoAIEAIo8M}/_buildManifest.js +0 -0
  44. /khoj/interface/compiled/_next/static/{9uwb5t7DoQqI3yWXNbS71 → H8WZzL_OHSsjoAIEAIo8M}/_ssgManifest.js +0 -0
  45. /khoj/interface/compiled/_next/static/chunks/{8423-14fc72aec9104ce9.js → 8423-ce22327cf2d2edae.js} +0 -0
  46. /khoj/interface/compiled/_next/static/chunks/{9178-c153fc402c970365.js → 9178-3a0baad1c172d515.js} +0 -0
  47. /khoj/interface/compiled/_next/static/chunks/{9417-5d14ac74aaab2c66.js → 9417-2e54c6fd056982d8.js} +0 -0
  48. {khoj-1.23.2.dist-info → khoj-1.23.3.dist-info}/WHEEL +0 -0
  49. {khoj-1.23.2.dist-info → khoj-1.23.3.dist-info}/entry_points.txt +0 -0
  50. {khoj-1.23.2.dist-info → khoj-1.23.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,61 +1,90 @@
1
1
  <!DOCTYPE html>
2
- <html>
3
- <head>
4
- <title>Welcome to Khoj</title>
5
- </head>
6
- <body>
7
- <body style="font-family: 'Verdana', sans-serif; font-weight: 400; font-style: normal; padding: 0; text-align: left; width: 600px; margin: 20px auto;">
8
- <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=no">
9
- <a class="logo" href="https://khoj.dev" target="_blank" style="text-decoration: none; text-decoration: underline dotted;">
10
- <img src="https://khoj.dev/khoj-logo-sideways-500.png" alt="Khoj Logo" style="width: 100px;">
11
- </a>
12
- <div class="calls-to-action" style="margin-top: 20px;">
13
- <div>
14
- <h1 style="color: #333; font-size: large; font-weight: bold; margin: 0; line-height: 1.5; background-color: #fee285; padding: 8px; box-shadow: 6px 6px rgba(0, 0, 0, 1.5);">Merge AI with your brain</h1>
15
- <p style="color: #333; font-size: medium; margin-top: 20px; padding: 0; line-height: 1.5;">Hi {{name}}! We are psyched to be part of your journey with personal AI. To better help you, we're committed to staying transparent, accessible, and completely open-source.</p>
16
- <a class="button" href="https://app.khoj.dev" target="_blank" style="display: block; width: 200px; text-align: center; padding: 10px; margin-top: 20px; color: #333; background-color: #fee285; text-decoration: none; border-radius: 5px; font-weight: bold; transition: background-color 0.3s ease; box-shadow: 6px 6px rgba(0, 0, 0, 1.0); padding: 4px; font-size: large; text-transform: uppercase;">Get Started</a>
17
- <p style="color: #333; font-size: medium; margin-top: 20px; padding: 0; line-height: 1.5;">You're about to get a whole lot more productive.</p>
18
-
19
- <div style="display: grid; grid-template-columns: 1fr 1fr; grid-gap: 12px; margin-top: 20px;">
20
- <div style="border: 1px solid black; border-radius: 8px; padding: 8px; box-shadow: 6px 6px rgba(0, 0, 0, 1.0); margin-top: 20px;">
21
- <a href="https://docs.khoj.dev/features/online_search" style="text-decoration: none; text-decoration: underline dotted;">
22
- <h3 style="color: #333; font-size: large; margin: 0; padding: 0; line-height: 2.0; background-color: #b8f1c7; padding: 8px; ">Ditch the search bar</h3>
23
- </a>
24
- <p style="color: #333; font-size: medium; margin-top: 20px; padding: 0; line-height: 1.5;">You don't need to click around Google results and sift through information yourself, because Khoj is connected to the internet.</p>
25
- </div>
26
- <div style="border: 1px solid black; border-radius: 8px; padding: 8px; box-shadow: 6px 6px rgba(0, 0, 0, 1.0); margin-top: 20px;">
27
- <a href="https://app.khoj.dev/agents" style="text-decoration: none; text-decoration: underline dotted;">
28
- <h3 style="color: #333; font-size: large; margin: 0; padding: 0; line-height: 2.0; background-color: #b8f1c7; padding: 8px;">Get a village, not just an agent</h3>
29
- </a>
30
- <p style="color: #333; font-size: medium; margin-top: 20px; padding: 0; line-height: 1.5;">Khoj can fill the need for more specialized assistance, <a href="https://blog.khoj.dev/posts/using-khoj-for-studying/">such as tutoring</a>, with its curated agents. You get a whole team, always available.</p>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>Welcome to Khoj</title>
8
+ </head>
9
+
10
+ <body
11
+ style="font-family: 'Arial', sans-serif; line-height: 1.6; color: #333; max-width: 600px; margin: 0 auto; padding: 20px; background-color: #f5f5f5;">
12
+ <div
13
+ style="background-color: #ffffff; border-radius: 10px; box-shadow: 0 0 20px rgba(0, 0, 0, 0.1); padding: 30px;">
14
+ <a href="https://khoj.dev" target="_blank"
15
+ style="display: block; text-align: center; margin-bottom: 20px; text-decoration: none;">
16
+ <img src="https://assets.khoj.dev/khoj_logo.png" alt="Khoj Logo" style="width: 120px;">
17
+ </a>
18
+
19
+ <h1
20
+ style="font-size: 24px; color: #2c3e50; margin-bottom: 20px; text-align: center; border-bottom: 2px solid #FFA07A; padding-bottom: 10px;">
21
+ Merge AI with your brain</h1>
22
+
23
+ <p style="font-size: 16px; color: #333; margin-bottom: 20px;">Hi {{name}}! We are psyched to be part of your
24
+ journey with personal AI. To better help you, we're committed to staying transparent, accessible, and
25
+ completely open-source.</p>
26
+
27
+ <a href="https://app.khoj.dev" target="_blank"
28
+ style="display: block; width: 200px; text-align: center; padding: 10px; margin: 20px auto; background-color: #FFA07A; color: #ffffff; text-decoration: none; border-radius: 5px; font-weight: bold; font-size: 16px; text-transform: uppercase;">Get
29
+ Started</a>
30
+
31
+ <p style="font-size: 16px; color: #333; margin-bottom: 20px;">You're about to get a whole lot more productive.
32
+ </p>
33
+ <a href="https://docs.khoj.dev/features/online_search"
34
+ style="color: #FFA07A; text-decoration: none; font-weight: bold; font-size: 14px;">
35
+ <div style="display: grid; grid-template-columns: 1fr 1fr; grid-gap: 20px; margin-bottom: 20px;">
36
+ <div style="background-color: #f8f9fa; border-left: 4px solid #FFA07A; padding: 15px;">
37
+ <h3 style="color: #2c3e50; margin-top: 0; font-size: 18px;">Ditch the search bar</h3>
38
+ <p style="font-size: 14px; color: #666; margin-bottom: 0;">You don't need to click around Google
39
+ results
40
+ and sift through information yourself, because Khoj is connected to the internet.</p>
41
+ </div>
42
+ </a>
43
+ <a href="https://app.khoj.dev/agents"
44
+ style="color: #FFA07A; text-decoration: none; font-weight: bold; font-size: 14px;">
45
+ <div style="background-color: #f8f9fa; border-left: 4px solid #FFA07A; padding: 15px;">
46
+ <h3 style="color: #2c3e50; margin-top: 0; font-size: 18px;">Get a village, not just an agent</h3>
47
+ <p style="font-size: 14px; color: #666; margin-bottom: 0;">Khoj can fill the need for more specialized
48
+ assistance, such as tutoring, with its curated agents. You get a whole team, always available.</p>
31
49
  </div>
32
- <div style="border: 1px solid black; border-radius: 8px; padding: 8px; box-shadow: 6px 6px rgba(0, 0, 0, 1.0); margin-top: 20px;">
33
- <a href="https://docs.khoj.dev/category/clients" style="text-decoration: none; text-decoration: underline dotted;">
34
- <h3 style="color: #333; font-size: large; margin: 0; padding: 0; line-height: 2.0; background-color: #b8f1c7; padding: 8px;">Available where you are</h3>
35
- </a>
36
- <p style="color: #333; font-size: medium; margin-top: 20px; padding: 0; line-height: 1.5;">Build on top of your digital brain. Khoj stores whatever data you share with it, so you can get answers from your personal notes and documents in your native language. You can engage from your desktop, Obsidian, WhatsApp, or the web.</p>
50
+ </a>
51
+ <a href="https://docs.khoj.dev/category/clients"
52
+ style="color: #FFA07A; text-decoration: none; font-weight: bold; font-size: 14px;">
53
+ <div style="background-color: #f8f9fa; border-left: 4px solid #FFA07A; padding: 15px;">
54
+ <h3 style="color: #2c3e50; margin-top: 0; font-size: 18px;">Activate your data</h3>
55
+ <p style="font-size: 14px; color: #666; margin-bottom: 0;">Build on top of your digital brain. Khoj
56
+ stores whatever data you share with it, so you can get answers from your personal notes and
57
+ documents in your native language.</p>
37
58
  </div>
38
- <div style="border: 1px solid black; border-radius: 8px; padding: 8px; box-shadow: 6px 6px rgba(0, 0, 0, 1.0); margin-top: 20px;">
39
- <a href="https://blog.khoj.dev/posts/how-khoj-generates-images/" style="text-decoration: none; text-decoration: underline dotted;">
40
- <h3 style="color: #333; font-size: large; margin: 0; padding: 0; line-height: 2.0; background-color: #b8f1c7; padding: 8px;">Create rich, contextual images</h3>
41
- </a>
42
- <p style="color: #333; font-size: medium; margin-top: 20px; padding: 0; line-height: 1.5;">With your shared data, Khoj can help you create astoundingly personal images depicting scenes of what's important to you.</p>
59
+ </a>
60
+ <a href="https://blog.khoj.dev/posts/how-khoj-generates-images/"
61
+ style="color: #FFA07A; text-decoration: none; font-weight: bold; font-size: 14px;">
62
+ <div style="background-color: #f8f9fa; border-left: 4px solid #FFA07A; padding: 15px;">
63
+ <h3 style="color: #2c3e50; margin-top: 0; font-size: 18px;">Create rich, contextual images</h3>
64
+ <p style="font-size: 14px; color: #666; margin-bottom: 0;">With your shared data, Khoj can help you
65
+ create astoundingly personal images depicting scenes of what's important to you.</p>
43
66
  </div>
44
- </div>
67
+ </a>
45
68
  </div>
46
- </div>
47
- <p style="color: #333; font-size: medium; margin-top: 20px; padding: 0; line-height: 1.5;">Like something? Dislike something? Searching for some other magical feature? Our inbox is always open for feedback! Reply to this email and say hi to introduce yourself 👋🏽.</p>
48
-
49
- <p style="color: #333; font-size: large; margin-top: 20px; padding: 0; line-height: 1.5;">- The Khoj Team</p>
50
- <table style="width: 100%; margin-top: 20px;">
51
- <tr>
52
- <td style="text-align: center;"><a href="https://docs.khoj.dev" target="_blank" style="padding: 8px; color: #333; background-color: #fee285; border-radius: 8px; box-shadow: 6px 6px rgba(0, 0, 0, 1.0);">Docs</a></td>
53
- <td style="text-align: center;"><a href="https://github.com/khoj-ai/khoj" target="_blank" style="padding: 8px; color: #333; background-color: #fee285; border-radius: 8px; box-shadow: 6px 6px rgba(0, 0, 0, 1.0);">GitHub</a></td>
54
- <td style="text-align: center;"><a href="https://twitter.com/khoj_ai" target="_blank" style="padding: 8px; color: #333; background-color: #fee285; border-radius: 8px; box-shadow: 6px 6px rgba(0, 0, 0, 1.0);">Twitter</a></td>
55
- <td style="text-align: center;"><a href="https://www.linkedin.com/company/khoj-ai" target="_blank" style="padding: 8px; color: #333; background-color: #fee285; border-radius: 8px; box-shadow: 6px 6px rgba(0, 0, 0, 1.0);">LinkedIn</a></td>
56
- <td style="text-align: center;"><a href="https://discord.gg/BDgyabRM6e" target="_blank" style="padding: 8px; color: #333; background-color: #fee285; border-radius: 8px; box-shadow: 6px 6px rgba(0, 0, 0, 1.0);">Discord</a></td>
57
- </tr>
58
- </table>
59
69
 
70
+ <p style="font-size: 16px; color: #333; margin-bottom: 20px;">Like something? Dislike something? Searching for
71
+ some other magical feature? Our inbox is always open for feedback! Reply to this email and say hi to
72
+ introduce yourself 👋🏽.</p>
73
+ <div style="font-size: 18px; font-weight: bold; margin-top: 30px; text-align: right;">- The Khoj Team</div>
74
+
75
+ <div style="margin-top: 30px; text-align: center;">
76
+ <a href="https://docs.khoj.dev" target="_blank"
77
+ style="display: inline-block; margin: 0 10px; padding: 8px 15px; background-color: #FFA07A; color: #ffffff; text-decoration: none; border-radius: 5px;">Docs</a>
78
+ <a href="https://github.com/khoj-ai/khoj" target="_blank"
79
+ style="display: inline-block; margin: 0 10px; padding: 8px 15px; background-color: #FFA07A; color: #ffffff; text-decoration: none; border-radius: 5px;">GitHub</a>
80
+ <a href="https://twitter.com/khoj_ai" target="_blank"
81
+ style="display: inline-block; margin: 0 10px; padding: 8px 15px; background-color: #FFA07A; color: #ffffff; text-decoration: none; border-radius: 5px;">Twitter</a>
82
+ <a href="https://www.linkedin.com/company/khoj-ai" target="_blank"
83
+ style="display: inline-block; margin: 0 10px; padding: 8px 15px; background-color: #FFA07A; color: #ffffff; text-decoration: none; border-radius: 5px;">LinkedIn</a>
84
+ <a href="https://discord.gg/BDgyabRM6e" target="_blank"
85
+ style="display: inline-block; margin: 0 10px; padding: 8px 15px; background-color: #FFA07A; color: #ffffff; text-decoration: none; border-radius: 5px;">Discord</a>
86
+ </div>
87
+ </div>
60
88
  </body>
89
+
61
90
  </html>
khoj/main.py CHANGED
@@ -131,7 +131,7 @@ def run(should_start_server=True):
131
131
  logger.info(f"📦 Initializing DB:\n{db_migrate_output.getvalue().strip()}")
132
132
  logger.debug(f"🌍 Initializing Web Client:\n{collectstatic_output.getvalue().strip()}")
133
133
 
134
- initialization()
134
+ initialization(not args.non_interactive)
135
135
 
136
136
  # Create app directory, if it doesn't exist
137
137
  state.config_file.parent.mkdir(parents=True, exist_ok=True)
@@ -4,8 +4,6 @@ import os
4
4
  from datetime import datetime
5
5
  from typing import Dict, List, Tuple
6
6
 
7
- from rapidocr_onnxruntime import RapidOCR
8
-
9
7
  from khoj.database.models import Entry as DbEntry
10
8
  from khoj.database.models import KhojUser
11
9
  from khoj.processor.content.text_to_entries import TextToEntries
@@ -58,7 +56,6 @@ class ImageToEntries(TextToEntries):
58
56
  entry_to_location_map: List[Tuple[str, str]] = []
59
57
  for image_file in image_files:
60
58
  try:
61
- loader = RapidOCR()
62
59
  bytes = image_files[image_file]
63
60
  # write the image to a temporary file
64
61
  timestamp_now = datetime.utcnow().timestamp()
@@ -71,13 +68,18 @@ class ImageToEntries(TextToEntries):
71
68
  bytes = image_files[image_file]
72
69
  f.write(bytes)
73
70
  try:
71
+ from rapidocr_onnxruntime import RapidOCR
72
+
73
+ loader = RapidOCR()
74
74
  image_entries_per_file = ""
75
75
  result, _ = loader(tmp_file)
76
76
  if result:
77
77
  expanded_entries = [text[1] for text in result]
78
78
  image_entries_per_file = " ".join(expanded_entries)
79
79
  except ImportError:
80
- logger.warning(f"Unable to process file: {image_file}. This file will not be indexed.")
80
+ logger.warning(
81
+ f"Unable to process image or scanned file for text: {image_file}. This file will not be indexed."
82
+ )
81
83
  continue
82
84
  entry_to_location_map.append((image_entries_per_file, image_file))
83
85
  entries.extend([image_entries_per_file])
@@ -18,13 +18,20 @@ from khoj.utils.helpers import is_none_or_empty, merge_dicts
18
18
 
19
19
  logger = logging.getLogger(__name__)
20
20
  model_to_prompt_size = {
21
+ # OpenAI Models
21
22
  "gpt-3.5-turbo": 12000,
22
- "gpt-3.5-turbo-0125": 12000,
23
- "gpt-4-0125-preview": 20000,
24
23
  "gpt-4-turbo-preview": 20000,
24
+ "gpt-4o": 20000,
25
25
  "gpt-4o-mini": 20000,
26
26
  "o1-preview": 20000,
27
27
  "o1-mini": 20000,
28
+ # Google Models
29
+ "gemini-1.5-flash": 20000,
30
+ "gemini-1.5-pro": 20000,
31
+ # Anthropic Models
32
+ "claude-3-5-sonnet-20240620": 20000,
33
+ "claude-3-opus-20240229": 20000,
34
+ # Offline Models
28
35
  "TheBloke/Mistral-7B-Instruct-v0.2-GGUF": 3500,
29
36
  "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF": 3500,
30
37
  "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
@@ -163,7 +170,7 @@ def generate_chatml_messages_with_context(
163
170
  if loaded_model:
164
171
  max_prompt_size = infer_max_tokens(loaded_model.n_ctx(), model_to_prompt_size.get(model_name, math.inf))
165
172
  else:
166
- max_prompt_size = model_to_prompt_size.get(model_name, 2000)
173
+ max_prompt_size = model_to_prompt_size.get(model_name, 10000)
167
174
 
168
175
  # Scale lookback turns proportional to max prompt size supported by model
169
176
  lookback_turns = max_prompt_size // 750
@@ -291,8 +298,6 @@ def reciprocal_conversation_to_chatml(message_pair):
291
298
  return [ChatMessage(content=message, role=role) for message, role in zip(message_pair, ["user", "assistant"])]
292
299
 
293
300
 
294
- def remove_json_codeblock(response):
301
+ def remove_json_codeblock(response: str):
295
302
  """Remove any markdown json codeblock formatting if present. Useful for non schema enforceable models"""
296
- if response.startswith("```json") and response.endswith("```"):
297
- response = response[7:-3]
298
- return response
303
+ return response.removeprefix("```json").removesuffix("```")
khoj/routers/helpers.py CHANGED
@@ -632,6 +632,7 @@ async def send_message_to_model_wrapper(
632
632
  messages=truncated_messages,
633
633
  loaded_model=loaded_model,
634
634
  model=chat_model,
635
+ max_prompt_size=max_tokens,
635
636
  streaming=False,
636
637
  response_type=response_type,
637
638
  )
@@ -721,6 +722,7 @@ def send_message_to_model_wrapper_sync(
721
722
  system_message=system_message,
722
723
  model_name=chat_model,
723
724
  loaded_model=loaded_model,
725
+ max_prompt_size=max_tokens,
724
726
  vision_enabled=vision_available,
725
727
  model_type=conversation_config.model_type,
726
728
  )
@@ -729,6 +731,7 @@ def send_message_to_model_wrapper_sync(
729
731
  messages=truncated_messages,
730
732
  loaded_model=loaded_model,
731
733
  model=chat_model,
734
+ max_prompt_size=max_tokens,
732
735
  streaming=False,
733
736
  response_type=response_type,
734
737
  )
@@ -739,6 +742,7 @@ def send_message_to_model_wrapper_sync(
739
742
  user_message=message,
740
743
  system_message=system_message,
741
744
  model_name=chat_model,
745
+ max_prompt_size=max_tokens,
742
746
  vision_enabled=vision_available,
743
747
  model_type=conversation_config.model_type,
744
748
  )
khoj/utils/cli.py CHANGED
@@ -50,6 +50,12 @@ def cli(args=None):
50
50
  default=False,
51
51
  help="Run Khoj in anonymous mode. This does not require any login for connecting users.",
52
52
  )
53
+ parser.add_argument(
54
+ "--non-interactive",
55
+ action="store_true",
56
+ default=False,
57
+ help="Start Khoj in non-interactive mode. Assumes interactive shell unavailable for config. E.g when run via Docker.",
58
+ )
53
59
 
54
60
  args, remaining_args = parser.parse_known_args(args)
55
61
 
khoj/utils/constants.py CHANGED
@@ -8,8 +8,15 @@ empty_escape_sequences = "\n|\r|\t| "
8
8
  app_env_filepath = "~/.khoj/env"
9
9
  telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry"
10
10
  content_directory = "~/.khoj/content/"
11
- default_offline_chat_model = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
12
- default_online_chat_model = "gpt-4o-mini"
11
+ default_offline_chat_models = [
12
+ "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
13
+ "bartowski/gemma-2-9b-it-GGUF",
14
+ "bartowski/gemma-2-2b-it-GGUF",
15
+ "bartowski/Phi-3.5-mini-instruct-GGUF",
16
+ ]
17
+ default_openai_chat_models = ["gpt-4o-mini", "gpt-4o"]
18
+ default_gemini_chat_models = ["gemini-1.5-flash", "gemini-1.5-pro"]
19
+ default_anthropic_chat_models = ["claude-3-5-sonnet-20240620", "claude-3-opus-20240229"]
13
20
 
14
21
  empty_config = {
15
22
  "search-type": {
@@ -1,25 +1,37 @@
1
1
  import logging
2
2
  import os
3
+ from typing import Tuple
3
4
 
4
5
  from khoj.database.adapters import ConversationAdapters
5
6
  from khoj.database.models import (
6
7
  ChatModelOptions,
7
8
  KhojUser,
8
9
  OpenAIProcessorConversationConfig,
10
+ ServerChatSettings,
9
11
  SpeechToTextModelOptions,
10
12
  TextToImageModelConfig,
11
13
  )
12
14
  from khoj.processor.conversation.utils import model_to_prompt_size, model_to_tokenizer
13
- from khoj.utils.constants import default_offline_chat_model, default_online_chat_model
15
+ from khoj.utils.constants import (
16
+ default_anthropic_chat_models,
17
+ default_gemini_chat_models,
18
+ default_offline_chat_models,
19
+ default_openai_chat_models,
20
+ )
14
21
 
15
22
  logger = logging.getLogger(__name__)
16
23
 
17
24
 
18
- def initialization():
25
+ def initialization(interactive: bool = True):
19
26
  def _create_admin_user():
20
27
  logger.info(
21
28
  "👩‍✈️ Setting up admin user. These credentials will allow you to configure your server at /server/admin."
22
29
  )
30
+ if not interactive and (not os.getenv("KHOJ_ADMIN_EMAIL") or not os.getenv("KHOJ_ADMIN_PASSWORD")):
31
+ logger.error(
32
+ "🚨 Admin user cannot be created. Please set the KHOJ_ADMIN_EMAIL, KHOJ_ADMIN_PASSWORD environment variables or start server in interactive mode."
33
+ )
34
+ exit(1)
23
35
  email_addr = os.getenv("KHOJ_ADMIN_EMAIL") or input("Email: ")
24
36
  password = os.getenv("KHOJ_ADMIN_PASSWORD") or input("Password: ")
25
37
  admin_user = KhojUser.objects.create_superuser(email=email_addr, username=email_addr, password=password)
@@ -27,87 +39,103 @@ def initialization():
27
39
 
28
40
  def _create_chat_configuration():
29
41
  logger.info(
30
- "🗣️ Configure chat models available to your server. You can always update these at /server/admin using the credentials of your admin account"
42
+ "🗣️ Configure chat models available to your server. You can always update these at /server/admin using your admin account"
31
43
  )
32
44
 
33
- try:
34
- use_offline_model = input("Use offline chat model? (y/n): ")
35
- if use_offline_model == "y":
36
- logger.info("🗣️ Setting up offline chat model")
37
-
38
- offline_chat_model = input(
39
- f"Enter the offline chat model you want to use. See HuggingFace for available GGUF models (default: {default_offline_chat_model}): "
40
- )
41
- if offline_chat_model == "":
42
- ChatModelOptions.objects.create(
43
- chat_model=default_offline_chat_model, model_type=ChatModelOptions.ModelType.OFFLINE
44
- )
45
- else:
46
- default_max_tokens = model_to_prompt_size.get(offline_chat_model, 2000)
47
- max_tokens = input(
48
- f"Enter the maximum number of tokens to use for the offline chat model (default {default_max_tokens}):"
49
- )
50
- max_tokens = max_tokens or default_max_tokens
51
-
52
- default_tokenizer = model_to_tokenizer.get(
53
- offline_chat_model, "hf-internal-testing/llama-tokenizer"
54
- )
55
- tokenizer = input(
56
- f"Enter the tokenizer to use for the offline chat model (default: {default_tokenizer}):"
57
- )
58
- tokenizer = tokenizer or default_tokenizer
59
-
60
- ChatModelOptions.objects.create(
61
- chat_model=offline_chat_model,
62
- model_type=ChatModelOptions.ModelType.OFFLINE,
63
- max_prompt_size=max_tokens,
64
- tokenizer=tokenizer,
65
- )
66
- except ModuleNotFoundError as e:
67
- logger.warning("Offline models are not supported on this device.")
68
-
69
- use_openai_model = input("Use OpenAI models? (y/n): ")
70
- if use_openai_model == "y":
71
- logger.info("🗣️ Setting up your OpenAI configuration")
72
- api_key = input("Enter your OpenAI API key: ")
73
- OpenAIProcessorConversationConfig.objects.create(api_key=api_key)
74
-
75
- openai_chat_model = input(
76
- f"Enter the OpenAI chat model you want to use (default: {default_online_chat_model}): "
77
- )
78
- openai_chat_model = openai_chat_model or default_online_chat_model
79
-
80
- default_max_tokens = model_to_prompt_size.get(openai_chat_model, 2000)
81
- max_tokens = input(
82
- f"Enter the maximum number of tokens to use for the OpenAI chat model (default: {default_max_tokens}): "
83
- )
84
- max_tokens = max_tokens or default_max_tokens
85
- ChatModelOptions.objects.create(
86
- chat_model=openai_chat_model, model_type=ChatModelOptions.ModelType.OPENAI, max_prompt_size=max_tokens
87
- )
45
+ # Set up OpenAI's online chat models
46
+ openai_configured, openai_provider = _setup_chat_model_provider(
47
+ ChatModelOptions.ModelType.OPENAI,
48
+ default_openai_chat_models,
49
+ default_api_key=os.getenv("OPENAI_API_KEY"),
50
+ vision_enabled=True,
51
+ is_offline=False,
52
+ interactive=interactive,
53
+ )
88
54
 
55
+ # Setup OpenAI speech to text model
56
+ if openai_configured:
89
57
  default_speech2text_model = "whisper-1"
90
- openai_speech2text_model = input(
91
- f"Enter the OpenAI speech to text model you want to use (default: {default_speech2text_model}): "
92
- )
93
- openai_speech2text_model = openai_speech2text_model or default_speech2text_model
58
+ if interactive:
59
+ openai_speech2text_model = input(
60
+ f"Enter the OpenAI speech to text model you want to use (default: {default_speech2text_model}): "
61
+ )
62
+ openai_speech2text_model = openai_speech2text_model or default_speech2text_model
63
+ else:
64
+ openai_speech2text_model = default_speech2text_model
94
65
  SpeechToTextModelOptions.objects.create(
95
66
  model_name=openai_speech2text_model, model_type=SpeechToTextModelOptions.ModelType.OPENAI
96
67
  )
97
68
 
69
+ # Setup OpenAI text to image model
70
+ if openai_configured:
98
71
  default_text_to_image_model = "dall-e-3"
99
- openai_text_to_image_model = input(
100
- f"Enter the OpenAI text to image model you want to use (default: {default_text_to_image_model}): "
101
- )
102
- openai_speech2text_model = openai_text_to_image_model or default_text_to_image_model
72
+ if interactive:
73
+ openai_text_to_image_model = input(
74
+ f"Enter the OpenAI text to image model you want to use (default: {default_text_to_image_model}): "
75
+ )
76
+ openai_text_to_image_model = openai_text_to_image_model or default_text_to_image_model
77
+ else:
78
+ openai_text_to_image_model = default_text_to_image_model
103
79
  TextToImageModelConfig.objects.create(
104
- model_name=openai_text_to_image_model, model_type=TextToImageModelConfig.ModelType.OPENAI
80
+ model_name=openai_text_to_image_model,
81
+ model_type=TextToImageModelConfig.ModelType.OPENAI,
82
+ openai_config=openai_provider,
105
83
  )
106
84
 
107
- if use_offline_model == "y" or use_openai_model == "y":
108
- logger.info("🗣️ Chat model configuration complete")
85
+ # Set up Google's Gemini online chat models
86
+ _setup_chat_model_provider(
87
+ ChatModelOptions.ModelType.GOOGLE,
88
+ default_gemini_chat_models,
89
+ default_api_key=os.getenv("GEMINI_API_KEY"),
90
+ vision_enabled=False,
91
+ is_offline=False,
92
+ interactive=interactive,
93
+ provider_name="Google Gemini",
94
+ )
109
95
 
110
- use_offline_speech2text_model = input("Use offline speech to text model? (y/n): ")
96
+ # Set up Anthropic's online chat models
97
+ _setup_chat_model_provider(
98
+ ChatModelOptions.ModelType.ANTHROPIC,
99
+ default_anthropic_chat_models,
100
+ default_api_key=os.getenv("ANTHROPIC_API_KEY"),
101
+ vision_enabled=False,
102
+ is_offline=False,
103
+ interactive=interactive,
104
+ )
105
+
106
+ # Set up offline chat models
107
+ _setup_chat_model_provider(
108
+ ChatModelOptions.ModelType.OFFLINE,
109
+ default_offline_chat_models,
110
+ default_api_key=None,
111
+ vision_enabled=False,
112
+ is_offline=True,
113
+ interactive=interactive,
114
+ )
115
+
116
+ # Explicitly set default chat model
117
+ chat_models_configured = ChatModelOptions.objects.count()
118
+ if chat_models_configured > 0:
119
+ default_chat_model_name = ChatModelOptions.objects.first().chat_model
120
+ # If there are multiple chat models, ask the user to choose the default chat model
121
+ if chat_models_configured > 1 and interactive:
122
+ user_chat_model_name = input(
123
+ f"Enter the default chat model to use (default: {default_chat_model_name}): "
124
+ )
125
+ else:
126
+ user_chat_model_name = None
127
+
128
+ # If the user's choice is valid, set it as the default chat model
129
+ if user_chat_model_name and ChatModelOptions.objects.filter(chat_model=user_chat_model_name).exists():
130
+ default_chat_model_name = user_chat_model_name
131
+
132
+ # Create a server chat settings object with the default chat model
133
+ default_chat_model = ChatModelOptions.objects.filter(chat_model=default_chat_model_name).first()
134
+ ServerChatSettings.objects.create(chat_default=default_chat_model)
135
+ logger.info("🗣️ Chat model configuration complete")
136
+
137
+ # Set up offline speech to text model
138
+ use_offline_speech2text_model = "n" if not interactive else input("Use offline speech to text model? (y/n): ")
111
139
  if use_offline_speech2text_model == "y":
112
140
  logger.info("🗣️ Setting up offline speech to text model")
113
141
  # Delete any existing speech to text model options. There can only be one.
@@ -124,6 +152,64 @@ def initialization():
124
152
 
125
153
  logger.info(f"🗣️ Offline speech to text model configured to {offline_speech2text_model}")
126
154
 
155
+ def _setup_chat_model_provider(
156
+ model_type: ChatModelOptions.ModelType,
157
+ default_chat_models: list,
158
+ default_api_key: str,
159
+ interactive: bool,
160
+ vision_enabled: bool = False,
161
+ is_offline: bool = False,
162
+ provider_name: str = None,
163
+ ) -> Tuple[bool, OpenAIProcessorConversationConfig]:
164
+ supported_vision_models = ["gpt-4o-mini", "gpt-4o"]
165
+ provider_name = provider_name or model_type.name.capitalize()
166
+ default_use_model = {True: "y", False: "n"}[default_api_key is not None or is_offline]
167
+ use_model_provider = (
168
+ default_use_model if not interactive else input(f"Add {provider_name} chat models? (y/n): ")
169
+ )
170
+
171
+ if use_model_provider != "y":
172
+ return False, None
173
+
174
+ logger.info(f"️💬 Setting up your {provider_name} chat configuration")
175
+
176
+ chat_model_provider = None
177
+ if not is_offline:
178
+ if interactive:
179
+ user_api_key = input(f"Enter your {provider_name} API key (default: {default_api_key}): ")
180
+ api_key = user_api_key if user_api_key != "" else default_api_key
181
+ else:
182
+ api_key = default_api_key
183
+ chat_model_provider = OpenAIProcessorConversationConfig.objects.create(api_key=api_key, name=provider_name)
184
+
185
+ if interactive:
186
+ chat_model_names = input(
187
+ f"Enter the {provider_name} chat models you want to use (default: {','.join(default_chat_models)}): "
188
+ )
189
+ chat_models = chat_model_names.split(",") if chat_model_names != "" else default_chat_models
190
+ chat_models = [model.strip() for model in chat_models]
191
+ else:
192
+ chat_models = default_chat_models
193
+
194
+ for chat_model in chat_models:
195
+ default_max_tokens = model_to_prompt_size.get(chat_model)
196
+ default_tokenizer = model_to_tokenizer.get(chat_model)
197
+ vision_enabled = vision_enabled and chat_model in supported_vision_models
198
+
199
+ chat_model_options = {
200
+ "chat_model": chat_model,
201
+ "model_type": model_type,
202
+ "max_prompt_size": default_max_tokens,
203
+ "vision_enabled": vision_enabled,
204
+ "tokenizer": default_tokenizer,
205
+ "openai_config": chat_model_provider,
206
+ }
207
+
208
+ ChatModelOptions.objects.create(**chat_model_options)
209
+
210
+ logger.info(f"🗣️ {provider_name} chat model configuration complete")
211
+ return True, chat_model_provider
212
+
127
213
  admin_user = KhojUser.objects.filter(is_staff=True).first()
128
214
  if admin_user is None:
129
215
  while True:
@@ -139,7 +225,8 @@ def initialization():
139
225
  try:
140
226
  _create_chat_configuration()
141
227
  break
142
- # Some environments don't support interactive input. We catch the exception and return if that's the case. The admin can still configure their settings from the admin page.
228
+ # Some environments don't support interactive input. We catch the exception and return if that's the case.
229
+ # The admin can still configure their settings from the admin page.
143
230
  except EOFError:
144
231
  return
145
232
  except Exception as e:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: khoj
3
- Version: 1.23.2
3
+ Version: 1.23.3
4
4
  Summary: Your Second Brain
5
5
  Project-URL: Homepage, https://khoj.dev
6
6
  Project-URL: Documentation, https://docs.khoj.dev
@@ -61,7 +61,7 @@ Requires-Dist: pymupdf>=1.23.5
61
61
  Requires-Dist: python-multipart>=0.0.7
62
62
  Requires-Dist: pytz~=2024.1
63
63
  Requires-Dist: pyyaml~=6.0
64
- Requires-Dist: rapidocr-onnxruntime==1.3.22
64
+ Requires-Dist: rapidocr-onnxruntime==1.3.24
65
65
  Requires-Dist: requests>=2.26.0
66
66
  Requires-Dist: rich>=13.3.1
67
67
  Requires-Dist: schedule==1.1.0