khoj 1.16.1.dev15__py3-none-any.whl → 1.17.1.dev220__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. khoj/configure.py +6 -6
  2. khoj/database/adapters/__init__.py +56 -12
  3. khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
  4. khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
  5. khoj/database/models/__init__.py +35 -0
  6. khoj/interface/web/assets/icons/favicon-128x128.png +0 -0
  7. khoj/interface/web/assets/icons/favicon-256x256.png +0 -0
  8. khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
  9. khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
  10. khoj/interface/web/assets/icons/khoj-logo-sideways.svg +31 -5384
  11. khoj/interface/web/assets/icons/khoj.svg +26 -0
  12. khoj/interface/web/chat.html +191 -301
  13. khoj/interface/web/content_source_computer_input.html +3 -3
  14. khoj/interface/web/content_source_github_input.html +1 -1
  15. khoj/interface/web/content_source_notion_input.html +1 -1
  16. khoj/interface/web/public_conversation.html +1 -1
  17. khoj/interface/web/search.html +2 -2
  18. khoj/interface/web/{config.html → settings.html} +30 -30
  19. khoj/interface/web/utils.html +1 -1
  20. khoj/processor/content/docx/docx_to_entries.py +4 -9
  21. khoj/processor/content/github/github_to_entries.py +1 -3
  22. khoj/processor/content/images/image_to_entries.py +4 -9
  23. khoj/processor/content/markdown/markdown_to_entries.py +4 -9
  24. khoj/processor/content/notion/notion_to_entries.py +1 -3
  25. khoj/processor/content/org_mode/org_to_entries.py +4 -9
  26. khoj/processor/content/pdf/pdf_to_entries.py +4 -9
  27. khoj/processor/content/plaintext/plaintext_to_entries.py +4 -9
  28. khoj/processor/content/text_to_entries.py +1 -3
  29. khoj/processor/conversation/anthropic/anthropic_chat.py +10 -4
  30. khoj/processor/conversation/offline/chat_model.py +19 -7
  31. khoj/processor/conversation/offline/utils.py +2 -0
  32. khoj/processor/conversation/openai/gpt.py +9 -3
  33. khoj/processor/conversation/prompts.py +56 -25
  34. khoj/processor/conversation/utils.py +5 -6
  35. khoj/processor/tools/online_search.py +13 -7
  36. khoj/routers/api.py +60 -10
  37. khoj/routers/api_agents.py +3 -1
  38. khoj/routers/api_chat.py +335 -562
  39. khoj/routers/api_content.py +538 -0
  40. khoj/routers/api_model.py +156 -0
  41. khoj/routers/helpers.py +339 -26
  42. khoj/routers/notion.py +2 -8
  43. khoj/routers/web_client.py +43 -256
  44. khoj/search_type/text_search.py +5 -4
  45. khoj/utils/fs_syncer.py +4 -2
  46. khoj/utils/rawconfig.py +6 -1
  47. {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev220.dist-info}/METADATA +3 -3
  48. {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev220.dist-info}/RECORD +51 -48
  49. khoj/routers/api_config.py +0 -434
  50. khoj/routers/indexer.py +0 -349
  51. {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev220.dist-info}/WHEEL +0 -0
  52. {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev220.dist-info}/entry_points.txt +0 -0
  53. {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev220.dist-info}/licenses/LICENSE +0 -0
@@ -165,7 +165,7 @@
165
165
 
166
166
  // Save Github config on server
167
167
  const csrfToken = document.cookie.split('; ').find(row => row.startsWith('csrftoken'))?.split('=')[1];
168
- fetch('/api/config/data/content-source/github', {
168
+ fetch('/api/content/github', {
169
169
  method: 'POST',
170
170
  headers: {
171
171
  'Content-Type': 'application/json',
@@ -45,7 +45,7 @@
45
45
 
46
46
  // Save Notion config on server
47
47
  const csrfToken = document.cookie.split('; ').find(row => row.startsWith('csrftoken'))?.split('=')[1];
48
- fetch('/api/config/data/content-source/notion', {
48
+ fetch('/api/content/notion', {
49
49
  method: 'POST',
50
50
  headers: {
51
51
  'Content-Type': 'application/json',
@@ -34,7 +34,7 @@ Hi, I am Khoj, your open, personal AI 👋🏽. I can:
34
34
  - 📚 Understand files you drag & drop here
35
35
  - 👩🏾‍🚀 Be tuned to your conversation needs via [agents](./agents)
36
36
 
37
- Get the Khoj [Desktop](https://khoj.dev/downloads), [Obsidian](https://docs.khoj.dev/clients/obsidian#setup), [Emacs](https://docs.khoj.dev/clients/emacs#setup) apps to search, chat with your 🖥️ computer docs. You can manage all the files you've shared with me at any time by going to [your settings](/config/content-source/computer/).
37
+ Get the Khoj [Desktop](https://khoj.dev/downloads), [Obsidian](https://docs.khoj.dev/clients/obsidian#setup), [Emacs](https://docs.khoj.dev/clients/emacs#setup) apps to search, chat with your 🖥️ computer docs. You can manage all the files you've shared with me at any time by going to [your settings](/settings/content/computer/).
38
38
 
39
39
  To get started, just start typing below. You can also type / to see a list of commands.
40
40
  `.trim()
@@ -209,12 +209,12 @@
209
209
 
210
210
  function populate_type_dropdown() {
211
211
  // Populate type dropdown field with enabled content types only
212
- fetch("/api/config/types")
212
+ fetch("/api/content/types")
213
213
  .then(response => response.json())
214
214
  .then(enabled_types => {
215
215
  // Show warning if no content types are enabled, or just one ("all")
216
216
  if (enabled_types[0] === "all" && enabled_types.length === 1) {
217
- document.getElementById("results").innerHTML = "<div id='results-error'>To use Khoj search, setup your content plugins on the Khoj <a class='inline-chat-link' href='/config'>settings page</a>.</div>";
217
+ document.getElementById("results").innerHTML = "<div id='results-error'>To use Khoj search, setup your content plugins on the Khoj <a class='inline-chat-link' href='/settings'>settings page</a>.</div>";
218
218
  document.getElementById("query").setAttribute("disabled", "disabled");
219
219
  document.getElementById("query").setAttribute("placeholder", "Configure Khoj to enable search");
220
220
  return [];
@@ -34,7 +34,7 @@
34
34
  <h3 id="card-title-computer" class="card-title">
35
35
  <span>Files</span>
36
36
  <img id="configured-icon-computer"
37
- style="display: {% if not current_model_state.computer %}none{% endif %}"
37
+ style="display: {% if not enabled_content_source.computer %}none{% endif %}"
38
38
  class="configured-icon"
39
39
  src="/static/assets/icons/confirm-icon.svg"
40
40
  alt="Configured">
@@ -44,8 +44,8 @@
44
44
  <p class="card-description">Manage files from your computer</p>
45
45
  </div>
46
46
  <div class="card-action-row">
47
- <a class="card-button" href="/config/content-source/computer">
48
- {% if current_model_state.computer %}
47
+ <a class="card-button" href="/settings/content/computer">
48
+ {% if enabled_content_source.computer %}
49
49
  Update
50
50
  {% else %}
51
51
  Setup
@@ -53,7 +53,7 @@
53
53
  <svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
54
54
  </a>
55
55
  <div id="clear-computer" class="card-action-row"
56
- style="display: {% if not current_model_state.computer %}none{% endif %}">
56
+ style="display: {% if not enabled_content_source.computer %}none{% endif %}">
57
57
  <button class="card-button" onclick="clearContentType('computer')">
58
58
  Disable
59
59
  </button>
@@ -69,15 +69,15 @@
69
69
  class="configured-icon"
70
70
  src="/static/assets/icons/confirm-icon.svg"
71
71
  alt="Configured"
72
- style="display: {% if not current_model_state.github %}none{% endif %}">
72
+ style="display: {% if not enabled_content_source.github %}none{% endif %}">
73
73
  </h3>
74
74
  </div>
75
75
  <div class="card-description-row">
76
76
  <p class="card-description">Set repositories to index</p>
77
77
  </div>
78
78
  <div class="card-action-row">
79
- <a class="card-button" href="/config/content-source/github">
80
- {% if current_model_state.github %}
79
+ <a class="card-button" href="/settings/content/github">
80
+ {% if enabled_content_source.github %}
81
81
  Update
82
82
  {% else %}
83
83
  Setup
@@ -86,7 +86,7 @@
86
86
  </a>
87
87
  <div id="clear-github"
88
88
  class="card-action-row"
89
- style="display: {% if not current_model_state.github %}none{% endif %}">
89
+ style="display: {% if not enabled_content_source.github %}none{% endif %}">
90
90
  <button class="card-button" onclick="clearContentType('github')">
91
91
  Disable
92
92
  </button>
@@ -102,15 +102,15 @@
102
102
  class="configured-icon"
103
103
  src="/static/assets/icons/confirm-icon.svg"
104
104
  alt="Configured"
105
- style="display: {% if not current_model_state.notion %}none{% endif %}">
105
+ style="display: {% if not enabled_content_source.notion %}none{% endif %}">
106
106
  </h3>
107
107
  </div>
108
108
  <div class="card-description-row">
109
109
  <p class="card-description">Sync your Notion pages</p>
110
110
  </div>
111
111
  <div class="card-action-row">
112
- {% if current_model_state.notion %}
113
- <a class="card-button" href="/config/content-source/notion">
112
+ {% if enabled_content_source.notion %}
113
+ <a class="card-button" href="/settings/content/notion">
114
114
  Update
115
115
  <svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
116
116
  </a>
@@ -120,7 +120,7 @@
120
120
  <svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
121
121
  </a>
122
122
  {% else %}
123
- <a class="card-button" href="/config/content-source/notion">
123
+ <a class="card-button" href="/settings/content/notion">
124
124
  Setup
125
125
  <svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
126
126
  </a>
@@ -128,7 +128,7 @@
128
128
 
129
129
  <div id="clear-notion"
130
130
  class="card-action-row"
131
- style="display: {% if not current_model_state.notion %}none{% endif %}">
131
+ style="display: {% if not enabled_content_source.notion %}none{% endif %}">
132
132
  <button class="card-button" onclick="clearContentType('notion')">
133
133
  Disable
134
134
  </button>
@@ -181,8 +181,8 @@
181
181
  </div>
182
182
  <div class="card-description-row">
183
183
  <select id="chat-models">
184
- {% for option in conversation_options %}
185
- <option value="{{ option.id }}" {% if option.id == selected_conversation_config %}selected{% endif %}>{{ option.chat_model }}</option>
184
+ {% for option in chat_model_options %}
185
+ <option value="{{ option.id }}" {% if option.id == selected_chat_model_config %}selected{% endif %}>{{ option.name }}</option>
186
186
  {% endfor %}
187
187
  </select>
188
188
  </div>
@@ -208,7 +208,7 @@
208
208
  <div class="card-description-row">
209
209
  <select id="paint-models">
210
210
  {% for option in paint_model_options %}
211
- <option value="{{ option.id }}" {% if option.id == selected_paint_model_config %}selected{% endif %}>{{ option.model_name }}</option>
211
+ <option value="{{ option.id }}" {% if option.id == selected_paint_model_config %}selected{% endif %}>{{ option.name }}</option>
212
212
  {% endfor %}
213
213
  </select>
214
214
  </div>
@@ -235,7 +235,7 @@
235
235
  <div class="card-description-row">
236
236
  <select id="voice-models">
237
237
  {% for option in voice_model_options %}
238
- <option value="{{ option.id }}" {% if option.id == selected_voice_config %}selected{% endif %}>{{ option.name }}</option>
238
+ <option value="{{ option.id }}" {% if option.id == selected_voice_model_config %}selected{% endif %}>{{ option.name }}</option>
239
239
  {% endfor %}
240
240
  </select>
241
241
  </div>
@@ -394,8 +394,8 @@
394
394
 
395
395
  function saveProfileGivenName() {
396
396
  const givenName = document.getElementById("profile_given_name").value;
397
- fetch('/api/config/user/name?name=' + givenName, {
398
- method: 'POST',
397
+ fetch('/api/user/name?name=' + givenName, {
398
+ method: 'PATCH',
399
399
  headers: {
400
400
  'Content-Type': 'application/json',
401
401
  }
@@ -421,7 +421,7 @@
421
421
  saveVoiceModelButton.disabled = true;
422
422
  saveVoiceModelButton.textContent = "Saving...";
423
423
 
424
- fetch('/api/config/data/voice/model?id=' + voiceModel, {
424
+ fetch('/api/model/voice?id=' + voiceModel, {
425
425
  method: 'POST',
426
426
  headers: {
427
427
  'Content-Type': 'application/json',
@@ -455,7 +455,7 @@
455
455
  saveModelButton.innerHTML = "";
456
456
  saveModelButton.textContent = "Saving...";
457
457
 
458
- fetch('/api/config/data/conversation/model?id=' + chatModel, {
458
+ fetch('/api/model/chat?id=' + chatModel, {
459
459
  method: 'POST',
460
460
  headers: {
461
461
  'Content-Type': 'application/json',
@@ -494,7 +494,7 @@
494
494
  saveSearchModelButton.disabled = true;
495
495
  saveSearchModelButton.textContent = "Saving...";
496
496
 
497
- fetch('/api/config/data/search/model?id=' + searchModel, {
497
+ fetch('/api/model/search?id=' + searchModel, {
498
498
  method: 'POST',
499
499
  headers: {
500
500
  'Content-Type': 'application/json',
@@ -526,7 +526,7 @@
526
526
  saveModelButton.disabled = true;
527
527
  saveModelButton.innerHTML = "Saving...";
528
528
 
529
- fetch('/api/config/data/paint/model?id=' + paintModel, {
529
+ fetch('/api/model/paint?id=' + paintModel, {
530
530
  method: 'POST',
531
531
  headers: {
532
532
  'Content-Type': 'application/json',
@@ -553,7 +553,7 @@
553
553
  };
554
554
 
555
555
  function clearContentType(content_source) {
556
- fetch('/api/config/data/content-source/' + content_source, {
556
+ fetch('/api/content/' + content_source, {
557
557
  method: 'DELETE',
558
558
  headers: {
559
559
  'Content-Type': 'application/json',
@@ -676,7 +676,7 @@
676
676
 
677
677
  content_sources = ["computer", "github", "notion"];
678
678
  content_sources.forEach(content_source => {
679
- fetch(`/api/config/data/${content_source}`, {
679
+ fetch(`/api/content/${content_source}`, {
680
680
  method: 'GET',
681
681
  headers: {
682
682
  'Content-Type': 'application/json',
@@ -807,7 +807,7 @@
807
807
 
808
808
  function getIndexedDataSize() {
809
809
  document.getElementById("indexed-data-size").textContent = "Calculating...";
810
- fetch('/api/config/index/size')
810
+ fetch('/api/content/size')
811
811
  .then(response => response.json())
812
812
  .then(data => {
813
813
  document.getElementById("indexed-data-size").textContent = data.indexed_data_size_in_mb + " MB used";
@@ -815,7 +815,7 @@
815
815
  }
816
816
 
817
817
  function removeFile(path) {
818
- fetch('/api/config/data/file?filename=' + path, {
818
+ fetch('/api/content/file?filename=' + path, {
819
819
  method: 'DELETE',
820
820
  headers: {
821
821
  'Content-Type': 'application/json',
@@ -890,7 +890,7 @@
890
890
  })
891
891
 
892
892
  phonenumberRemoveButton.addEventListener("click", () => {
893
- fetch('/api/config/phone', {
893
+ fetch('/api/phone', {
894
894
  method: 'DELETE',
895
895
  headers: {
896
896
  'Content-Type': 'application/json',
@@ -917,7 +917,7 @@
917
917
  }, 5000);
918
918
  } else {
919
919
  const mobileNumber = iti.getNumber();
920
- fetch('/api/config/phone?phone_number=' + mobileNumber, {
920
+ fetch('/api/phone?phone_number=' + mobileNumber, {
921
921
  method: 'POST',
922
922
  headers: {
923
923
  'Content-Type': 'application/json',
@@ -970,7 +970,7 @@
970
970
  return;
971
971
  }
972
972
 
973
- fetch('/api/config/phone/verify?code=' + otp, {
973
+ fetch('/api/phone/verify?code=' + otp, {
974
974
  method: 'POST',
975
975
  headers: {
976
976
  'Content-Type': 'application/json',
@@ -36,7 +36,7 @@
36
36
  {% endif %}
37
37
  <div id="khoj-nav-menu" class="khoj-nav-dropdown-content">
38
38
  <div class="khoj-nav-username"> {{ username }} </div>
39
- <a id="settings-nav" class="khoj-nav" href="/config">Settings</a>
39
+ <a id="settings-nav" class="khoj-nav" href="/settings">Settings</a>
40
40
  <a id="github-nav" class="khoj-nav" href="https://github.com/khoj-ai/khoj">GitHub</a>
41
41
  <a id="help-nav" class="khoj-nav" href="https://docs.khoj.dev" target="_blank">Help</a>
42
42
  <a class="khoj-nav" href="/auth/logout">Logout</a>
@@ -19,16 +19,11 @@ class DocxToEntries(TextToEntries):
19
19
  super().__init__()
20
20
 
21
21
  # Define Functions
22
- def process(
23
- self, files: dict[str, str] = None, full_corpus: bool = True, user: KhojUser = None, regenerate: bool = False
24
- ) -> Tuple[int, int]:
22
+ def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
25
23
  # Extract required fields from config
26
- if not full_corpus:
27
- deletion_file_names = set([file for file in files if files[file] == b""])
28
- files_to_process = set(files) - deletion_file_names
29
- files = {file: files[file] for file in files_to_process}
30
- else:
31
- deletion_file_names = None
24
+ deletion_file_names = set([file for file in files if files[file] == b""])
25
+ files_to_process = set(files) - deletion_file_names
26
+ files = {file: files[file] for file in files_to_process}
32
27
 
33
28
  # Extract Entries from specified Docx files
34
29
  with timer("Extract entries from specified DOCX files", logger):
@@ -48,9 +48,7 @@ class GithubToEntries(TextToEntries):
48
48
  else:
49
49
  return
50
50
 
51
- def process(
52
- self, files: dict[str, str] = None, full_corpus: bool = True, user: KhojUser = None, regenerate: bool = False
53
- ) -> Tuple[int, int]:
51
+ def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
54
52
  if self.config.pat_token is None or self.config.pat_token == "":
55
53
  logger.error(f"Github PAT token is not set. Skipping github content")
56
54
  raise ValueError("Github PAT token is not set. Skipping github content")
@@ -20,16 +20,11 @@ class ImageToEntries(TextToEntries):
20
20
  super().__init__()
21
21
 
22
22
  # Define Functions
23
- def process(
24
- self, files: dict[str, str] = None, full_corpus: bool = True, user: KhojUser = None, regenerate: bool = False
25
- ) -> Tuple[int, int]:
23
+ def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
26
24
  # Extract required fields from config
27
- if not full_corpus:
28
- deletion_file_names = set([file for file in files if files[file] == b""])
29
- files_to_process = set(files) - deletion_file_names
30
- files = {file: files[file] for file in files_to_process}
31
- else:
32
- deletion_file_names = None
25
+ deletion_file_names = set([file for file in files if files[file] == b""])
26
+ files_to_process = set(files) - deletion_file_names
27
+ files = {file: files[file] for file in files_to_process}
33
28
 
34
29
  # Extract Entries from specified image files
35
30
  with timer("Extract entries from specified Image files", logger):
@@ -19,16 +19,11 @@ class MarkdownToEntries(TextToEntries):
19
19
  super().__init__()
20
20
 
21
21
  # Define Functions
22
- def process(
23
- self, files: dict[str, str] = None, full_corpus: bool = True, user: KhojUser = None, regenerate: bool = False
24
- ) -> Tuple[int, int]:
22
+ def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
25
23
  # Extract required fields from config
26
- if not full_corpus:
27
- deletion_file_names = set([file for file in files if files[file] == ""])
28
- files_to_process = set(files) - deletion_file_names
29
- files = {file: files[file] for file in files_to_process}
30
- else:
31
- deletion_file_names = None
24
+ deletion_file_names = set([file for file in files if files[file] == ""])
25
+ files_to_process = set(files) - deletion_file_names
26
+ files = {file: files[file] for file in files_to_process}
32
27
 
33
28
  max_tokens = 256
34
29
  # Extract Entries from specified Markdown files
@@ -78,9 +78,7 @@ class NotionToEntries(TextToEntries):
78
78
 
79
79
  self.body_params = {"page_size": 100}
80
80
 
81
- def process(
82
- self, files: dict[str, str] = None, full_corpus: bool = True, user: KhojUser = None, regenerate: bool = False
83
- ) -> Tuple[int, int]:
81
+ def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
84
82
  current_entries = []
85
83
 
86
84
  # Get all pages
@@ -20,15 +20,10 @@ class OrgToEntries(TextToEntries):
20
20
  super().__init__()
21
21
 
22
22
  # Define Functions
23
- def process(
24
- self, files: dict[str, str] = None, full_corpus: bool = True, user: KhojUser = None, regenerate: bool = False
25
- ) -> Tuple[int, int]:
26
- if not full_corpus:
27
- deletion_file_names = set([file for file in files if files[file] == ""])
28
- files_to_process = set(files) - deletion_file_names
29
- files = {file: files[file] for file in files_to_process}
30
- else:
31
- deletion_file_names = None
23
+ def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
24
+ deletion_file_names = set([file for file in files if files[file] == ""])
25
+ files_to_process = set(files) - deletion_file_names
26
+ files = {file: files[file] for file in files_to_process}
32
27
 
33
28
  # Extract Entries from specified Org files
34
29
  max_tokens = 256
@@ -22,16 +22,11 @@ class PdfToEntries(TextToEntries):
22
22
  super().__init__()
23
23
 
24
24
  # Define Functions
25
- def process(
26
- self, files: dict[str, str] = None, full_corpus: bool = True, user: KhojUser = None, regenerate: bool = False
27
- ) -> Tuple[int, int]:
25
+ def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
28
26
  # Extract required fields from config
29
- if not full_corpus:
30
- deletion_file_names = set([file for file in files if files[file] == b""])
31
- files_to_process = set(files) - deletion_file_names
32
- files = {file: files[file] for file in files_to_process}
33
- else:
34
- deletion_file_names = None
27
+ deletion_file_names = set([file for file in files if files[file] == b""])
28
+ files_to_process = set(files) - deletion_file_names
29
+ files = {file: files[file] for file in files_to_process}
35
30
 
36
31
  # Extract Entries from specified Pdf files
37
32
  with timer("Extract entries from specified PDF files", logger):
@@ -20,15 +20,10 @@ class PlaintextToEntries(TextToEntries):
20
20
  super().__init__()
21
21
 
22
22
  # Define Functions
23
- def process(
24
- self, files: dict[str, str] = None, full_corpus: bool = True, user: KhojUser = None, regenerate: bool = False
25
- ) -> Tuple[int, int]:
26
- if not full_corpus:
27
- deletion_file_names = set([file for file in files if files[file] == ""])
28
- files_to_process = set(files) - deletion_file_names
29
- files = {file: files[file] for file in files_to_process}
30
- else:
31
- deletion_file_names = None
23
+ def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
24
+ deletion_file_names = set([file for file in files if files[file] == ""])
25
+ files_to_process = set(files) - deletion_file_names
26
+ files = {file: files[file] for file in files_to_process}
32
27
 
33
28
  # Extract Entries from specified plaintext files
34
29
  with timer("Extract entries from specified Plaintext files", logger):
@@ -31,9 +31,7 @@ class TextToEntries(ABC):
31
31
  self.date_filter = DateFilter()
32
32
 
33
33
  @abstractmethod
34
- def process(
35
- self, files: dict[str, str] = None, full_corpus: bool = True, user: KhojUser = None, regenerate: bool = False
36
- ) -> Tuple[int, int]:
34
+ def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
37
35
  ...
38
36
 
39
37
  @staticmethod
@@ -36,7 +36,7 @@ def extract_questions_anthropic(
36
36
  # Extract Past User Message and Inferred Questions from Conversation Log
37
37
  chat_history = "".join(
38
38
  [
39
- f'Q: {chat["intent"]["query"]}\nKhoj: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
39
+ f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
40
40
  for chat in conversation_log.get("chat", [])[-4:]
41
41
  if chat["by"] == "khoj" and "text-to-image" not in chat["intent"].get("type")
42
42
  ]
@@ -135,17 +135,23 @@ def converse_anthropic(
135
135
  Converse with user using Anthropic's Claude
136
136
  """
137
137
  # Initialize Variables
138
- current_date = datetime.now().strftime("%Y-%m-%d")
138
+ current_date = datetime.now()
139
139
  compiled_references = "\n\n".join({f"# {item}" for item in references})
140
140
 
141
141
  conversation_primer = prompts.query_prompt.format(query=user_query)
142
142
 
143
143
  if agent and agent.personality:
144
144
  system_prompt = prompts.custom_personality.format(
145
- name=agent.name, bio=agent.personality, current_date=current_date
145
+ name=agent.name,
146
+ bio=agent.personality,
147
+ current_date=current_date.strftime("%Y-%m-%d"),
148
+ day_of_week=current_date.strftime("%A"),
146
149
  )
147
150
  else:
148
- system_prompt = prompts.personality.format(current_date=current_date)
151
+ system_prompt = prompts.personality.format(
152
+ current_date=current_date.strftime("%Y-%m-%d"),
153
+ day_of_week=current_date.strftime("%A"),
154
+ )
149
155
 
150
156
  if location_data:
151
157
  location = f"{location_data.city}, {location_data.region}, {location_data.country}"
@@ -55,6 +55,7 @@ def extract_questions_offline(
55
55
  chat_history += f"Q: {chat['intent']['query']}\n"
56
56
  chat_history += f"Khoj: {chat['message']}\n\n"
57
57
 
58
+ # Get dates relative to today for prompt creation
58
59
  today = datetime.today()
59
60
  yesterday = (today - timedelta(days=1)).strftime("%Y-%m-%d")
60
61
  last_year = today.year - 1
@@ -62,11 +63,13 @@ def extract_questions_offline(
62
63
  query=text,
63
64
  chat_history=chat_history,
64
65
  current_date=today.strftime("%Y-%m-%d"),
66
+ day_of_week=today.strftime("%A"),
65
67
  yesterday_date=yesterday,
66
68
  last_year=last_year,
67
69
  this_year=today.year,
68
70
  location=location,
69
71
  )
72
+
70
73
  messages = generate_chatml_messages_with_context(
71
74
  example_questions, model_name=model, loaded_model=offline_chat_model, max_prompt_size=max_prompt_size
72
75
  )
@@ -74,7 +77,7 @@ def extract_questions_offline(
74
77
  state.chat_lock.acquire()
75
78
  try:
76
79
  response = send_message_to_model_offline(
77
- messages, loaded_model=offline_chat_model, max_prompt_size=max_prompt_size
80
+ messages, loaded_model=offline_chat_model, model=model, max_prompt_size=max_prompt_size
78
81
  )
79
82
  finally:
80
83
  state.chat_lock.release()
@@ -96,7 +99,7 @@ def extract_questions_offline(
96
99
  except:
97
100
  logger.warning(f"Llama returned invalid JSON. Falling back to using user message as search query.\n{response}")
98
101
  return all_questions
99
- logger.debug(f"Extracted Questions by Llama: {questions}")
102
+ logger.debug(f"Questions extracted by {model}: {questions}")
100
103
  return questions
101
104
 
102
105
 
@@ -144,14 +147,20 @@ def converse_offline(
144
147
  offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
145
148
  compiled_references_message = "\n\n".join({f"{item['compiled']}" for item in references})
146
149
 
147
- current_date = datetime.now().strftime("%Y-%m-%d")
150
+ current_date = datetime.now()
148
151
 
149
152
  if agent and agent.personality:
150
153
  system_prompt = prompts.custom_system_prompt_offline_chat.format(
151
- name=agent.name, bio=agent.personality, current_date=current_date
154
+ name=agent.name,
155
+ bio=agent.personality,
156
+ current_date=current_date.strftime("%Y-%m-%d"),
157
+ day_of_week=current_date.strftime("%A"),
152
158
  )
153
159
  else:
154
- system_prompt = prompts.system_prompt_offline_chat.format(current_date=current_date)
160
+ system_prompt = prompts.system_prompt_offline_chat.format(
161
+ current_date=current_date.strftime("%Y-%m-%d"),
162
+ day_of_week=current_date.strftime("%A"),
163
+ )
155
164
 
156
165
  conversation_primer = prompts.query_prompt.format(query=user_query)
157
166
 
@@ -177,9 +186,9 @@ def converse_offline(
177
186
  if online_results[result].get("webpages"):
178
187
  simplified_online_results[result] = online_results[result]["webpages"]
179
188
 
180
- conversation_primer = f"{prompts.online_search_conversation.format(online_results=str(simplified_online_results))}\n{conversation_primer}"
189
+ conversation_primer = f"{prompts.online_search_conversation_offline.format(online_results=str(simplified_online_results))}\n{conversation_primer}"
181
190
  if not is_none_or_empty(compiled_references_message):
182
- conversation_primer = f"{prompts.notes_conversation_offline.format(references=compiled_references_message)}\n{conversation_primer}"
191
+ conversation_primer = f"{prompts.notes_conversation_offline.format(references=compiled_references_message)}\n\n{conversation_primer}"
183
192
 
184
193
  # Setup Prompt with Primer or Conversation History
185
194
  messages = generate_chatml_messages_with_context(
@@ -192,6 +201,9 @@ def converse_offline(
192
201
  tokenizer_name=tokenizer_name,
193
202
  )
194
203
 
204
+ truncated_messages = "\n".join({f"{message.content[:70]}..." for message in messages})
205
+ logger.debug(f"Conversation Context for {model}: {truncated_messages}")
206
+
195
207
  g = ThreadedGenerator(references, online_results, completion_func=completion_func)
196
208
  t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size))
197
209
  t.start()
@@ -24,6 +24,8 @@ def download_model(repo_id: str, filename: str = "*Q4_K_M.gguf", max_tokens: int
24
24
  # Add chat format if known
25
25
  if "llama-3" in repo_id.lower():
26
26
  kwargs["chat_format"] = "llama-3"
27
+ elif "gemma-2" in repo_id.lower():
28
+ kwargs["chat_format"] = "gemma"
27
29
 
28
30
  # Check if the model is already downloaded
29
31
  model_path = load_model_from_cache(repo_id, filename)
@@ -125,17 +125,23 @@ def converse(
125
125
  Converse with user using OpenAI's ChatGPT
126
126
  """
127
127
  # Initialize Variables
128
- current_date = datetime.now().strftime("%Y-%m-%d")
128
+ current_date = datetime.now()
129
129
  compiled_references = "\n\n".join({f"# {item['compiled']}" for item in references})
130
130
 
131
131
  conversation_primer = prompts.query_prompt.format(query=user_query)
132
132
 
133
133
  if agent and agent.personality:
134
134
  system_prompt = prompts.custom_personality.format(
135
- name=agent.name, bio=agent.personality, current_date=current_date
135
+ name=agent.name,
136
+ bio=agent.personality,
137
+ current_date=current_date.strftime("%Y-%m-%d"),
138
+ day_of_week=current_date.strftime("%A"),
136
139
  )
137
140
  else:
138
- system_prompt = prompts.personality.format(current_date=current_date)
141
+ system_prompt = prompts.personality.format(
142
+ current_date=current_date.strftime("%Y-%m-%d"),
143
+ day_of_week=current_date.strftime("%A"),
144
+ )
139
145
 
140
146
  if location_data:
141
147
  location = f"{location_data.city}, {location_data.region}, {location_data.country}"