khoj 1.16.1.dev15__py3-none-any.whl → 1.17.1.dev229__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/configure.py +6 -6
- khoj/database/adapters/__init__.py +56 -12
- khoj/database/migrations/0053_agent_style_color_agent_style_icon.py +61 -0
- khoj/database/migrations/0054_alter_agent_style_color.py +38 -0
- khoj/database/models/__init__.py +35 -0
- khoj/interface/web/assets/icons/favicon-128x128.png +0 -0
- khoj/interface/web/assets/icons/favicon-256x256.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways.svg +31 -5384
- khoj/interface/web/assets/icons/khoj.svg +26 -0
- khoj/interface/web/chat.html +191 -301
- khoj/interface/web/content_source_computer_input.html +3 -3
- khoj/interface/web/content_source_github_input.html +1 -1
- khoj/interface/web/content_source_notion_input.html +1 -1
- khoj/interface/web/public_conversation.html +1 -1
- khoj/interface/web/search.html +2 -2
- khoj/interface/web/{config.html → settings.html} +30 -30
- khoj/interface/web/utils.html +1 -1
- khoj/processor/content/docx/docx_to_entries.py +4 -9
- khoj/processor/content/github/github_to_entries.py +1 -3
- khoj/processor/content/images/image_to_entries.py +4 -9
- khoj/processor/content/markdown/markdown_to_entries.py +4 -9
- khoj/processor/content/notion/notion_to_entries.py +1 -3
- khoj/processor/content/org_mode/org_to_entries.py +4 -9
- khoj/processor/content/pdf/pdf_to_entries.py +4 -9
- khoj/processor/content/plaintext/plaintext_to_entries.py +4 -9
- khoj/processor/content/text_to_entries.py +1 -3
- khoj/processor/conversation/anthropic/anthropic_chat.py +10 -4
- khoj/processor/conversation/offline/chat_model.py +19 -7
- khoj/processor/conversation/offline/utils.py +2 -0
- khoj/processor/conversation/openai/gpt.py +9 -3
- khoj/processor/conversation/prompts.py +56 -25
- khoj/processor/conversation/utils.py +5 -6
- khoj/processor/tools/online_search.py +13 -7
- khoj/routers/api.py +60 -10
- khoj/routers/api_agents.py +3 -1
- khoj/routers/api_chat.py +335 -562
- khoj/routers/api_content.py +538 -0
- khoj/routers/api_model.py +156 -0
- khoj/routers/helpers.py +339 -26
- khoj/routers/notion.py +2 -8
- khoj/routers/web_client.py +43 -256
- khoj/search_type/text_search.py +5 -4
- khoj/utils/fs_syncer.py +4 -2
- khoj/utils/rawconfig.py +6 -1
- {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev229.dist-info}/METADATA +3 -3
- {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev229.dist-info}/RECORD +51 -49
- khoj/interface/web/assets/icons/favicon.icns +0 -0
- khoj/routers/api_config.py +0 -434
- khoj/routers/indexer.py +0 -349
- {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev229.dist-info}/WHEEL +0 -0
- {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev229.dist-info}/entry_points.txt +0 -0
- {khoj-1.16.1.dev15.dist-info → khoj-1.17.1.dev229.dist-info}/licenses/LICENSE +0 -0
|
@@ -165,7 +165,7 @@
|
|
|
165
165
|
|
|
166
166
|
// Save Github config on server
|
|
167
167
|
const csrfToken = document.cookie.split('; ').find(row => row.startsWith('csrftoken'))?.split('=')[1];
|
|
168
|
-
fetch('/api/
|
|
168
|
+
fetch('/api/content/github', {
|
|
169
169
|
method: 'POST',
|
|
170
170
|
headers: {
|
|
171
171
|
'Content-Type': 'application/json',
|
|
@@ -45,7 +45,7 @@
|
|
|
45
45
|
|
|
46
46
|
// Save Notion config on server
|
|
47
47
|
const csrfToken = document.cookie.split('; ').find(row => row.startsWith('csrftoken'))?.split('=')[1];
|
|
48
|
-
fetch('/api/
|
|
48
|
+
fetch('/api/content/notion', {
|
|
49
49
|
method: 'POST',
|
|
50
50
|
headers: {
|
|
51
51
|
'Content-Type': 'application/json',
|
|
@@ -34,7 +34,7 @@ Hi, I am Khoj, your open, personal AI 👋🏽. I can:
|
|
|
34
34
|
- 📚 Understand files you drag & drop here
|
|
35
35
|
- 👩🏾🚀 Be tuned to your conversation needs via [agents](./agents)
|
|
36
36
|
|
|
37
|
-
Get the Khoj [Desktop](https://khoj.dev/downloads), [Obsidian](https://docs.khoj.dev/clients/obsidian#setup), [Emacs](https://docs.khoj.dev/clients/emacs#setup) apps to search, chat with your 🖥️ computer docs. You can manage all the files you've shared with me at any time by going to [your settings](/
|
|
37
|
+
Get the Khoj [Desktop](https://khoj.dev/downloads), [Obsidian](https://docs.khoj.dev/clients/obsidian#setup), [Emacs](https://docs.khoj.dev/clients/emacs#setup) apps to search, chat with your 🖥️ computer docs. You can manage all the files you've shared with me at any time by going to [your settings](/settings/content/computer/).
|
|
38
38
|
|
|
39
39
|
To get started, just start typing below. You can also type / to see a list of commands.
|
|
40
40
|
`.trim()
|
khoj/interface/web/search.html
CHANGED
|
@@ -209,12 +209,12 @@
|
|
|
209
209
|
|
|
210
210
|
function populate_type_dropdown() {
|
|
211
211
|
// Populate type dropdown field with enabled content types only
|
|
212
|
-
fetch("/api/
|
|
212
|
+
fetch("/api/content/types")
|
|
213
213
|
.then(response => response.json())
|
|
214
214
|
.then(enabled_types => {
|
|
215
215
|
// Show warning if no content types are enabled, or just one ("all")
|
|
216
216
|
if (enabled_types[0] === "all" && enabled_types.length === 1) {
|
|
217
|
-
document.getElementById("results").innerHTML = "<div id='results-error'>To use Khoj search, setup your content plugins on the Khoj <a class='inline-chat-link' href='/
|
|
217
|
+
document.getElementById("results").innerHTML = "<div id='results-error'>To use Khoj search, setup your content plugins on the Khoj <a class='inline-chat-link' href='/settings'>settings page</a>.</div>";
|
|
218
218
|
document.getElementById("query").setAttribute("disabled", "disabled");
|
|
219
219
|
document.getElementById("query").setAttribute("placeholder", "Configure Khoj to enable search");
|
|
220
220
|
return [];
|
|
@@ -34,7 +34,7 @@
|
|
|
34
34
|
<h3 id="card-title-computer" class="card-title">
|
|
35
35
|
<span>Files</span>
|
|
36
36
|
<img id="configured-icon-computer"
|
|
37
|
-
style="display: {% if not
|
|
37
|
+
style="display: {% if not enabled_content_source.computer %}none{% endif %}"
|
|
38
38
|
class="configured-icon"
|
|
39
39
|
src="/static/assets/icons/confirm-icon.svg"
|
|
40
40
|
alt="Configured">
|
|
@@ -44,8 +44,8 @@
|
|
|
44
44
|
<p class="card-description">Manage files from your computer</p>
|
|
45
45
|
</div>
|
|
46
46
|
<div class="card-action-row">
|
|
47
|
-
<a class="card-button" href="/
|
|
48
|
-
{% if
|
|
47
|
+
<a class="card-button" href="/settings/content/computer">
|
|
48
|
+
{% if enabled_content_source.computer %}
|
|
49
49
|
Update
|
|
50
50
|
{% else %}
|
|
51
51
|
Setup
|
|
@@ -53,7 +53,7 @@
|
|
|
53
53
|
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
|
|
54
54
|
</a>
|
|
55
55
|
<div id="clear-computer" class="card-action-row"
|
|
56
|
-
style="display: {% if not
|
|
56
|
+
style="display: {% if not enabled_content_source.computer %}none{% endif %}">
|
|
57
57
|
<button class="card-button" onclick="clearContentType('computer')">
|
|
58
58
|
Disable
|
|
59
59
|
</button>
|
|
@@ -69,15 +69,15 @@
|
|
|
69
69
|
class="configured-icon"
|
|
70
70
|
src="/static/assets/icons/confirm-icon.svg"
|
|
71
71
|
alt="Configured"
|
|
72
|
-
style="display: {% if not
|
|
72
|
+
style="display: {% if not enabled_content_source.github %}none{% endif %}">
|
|
73
73
|
</h3>
|
|
74
74
|
</div>
|
|
75
75
|
<div class="card-description-row">
|
|
76
76
|
<p class="card-description">Set repositories to index</p>
|
|
77
77
|
</div>
|
|
78
78
|
<div class="card-action-row">
|
|
79
|
-
<a class="card-button" href="/
|
|
80
|
-
{% if
|
|
79
|
+
<a class="card-button" href="/settings/content/github">
|
|
80
|
+
{% if enabled_content_source.github %}
|
|
81
81
|
Update
|
|
82
82
|
{% else %}
|
|
83
83
|
Setup
|
|
@@ -86,7 +86,7 @@
|
|
|
86
86
|
</a>
|
|
87
87
|
<div id="clear-github"
|
|
88
88
|
class="card-action-row"
|
|
89
|
-
style="display: {% if not
|
|
89
|
+
style="display: {% if not enabled_content_source.github %}none{% endif %}">
|
|
90
90
|
<button class="card-button" onclick="clearContentType('github')">
|
|
91
91
|
Disable
|
|
92
92
|
</button>
|
|
@@ -102,15 +102,15 @@
|
|
|
102
102
|
class="configured-icon"
|
|
103
103
|
src="/static/assets/icons/confirm-icon.svg"
|
|
104
104
|
alt="Configured"
|
|
105
|
-
style="display: {% if not
|
|
105
|
+
style="display: {% if not enabled_content_source.notion %}none{% endif %}">
|
|
106
106
|
</h3>
|
|
107
107
|
</div>
|
|
108
108
|
<div class="card-description-row">
|
|
109
109
|
<p class="card-description">Sync your Notion pages</p>
|
|
110
110
|
</div>
|
|
111
111
|
<div class="card-action-row">
|
|
112
|
-
{% if
|
|
113
|
-
<a class="card-button" href="/
|
|
112
|
+
{% if enabled_content_source.notion %}
|
|
113
|
+
<a class="card-button" href="/settings/content/notion">
|
|
114
114
|
Update
|
|
115
115
|
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
|
|
116
116
|
</a>
|
|
@@ -120,7 +120,7 @@
|
|
|
120
120
|
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
|
|
121
121
|
</a>
|
|
122
122
|
{% else %}
|
|
123
|
-
<a class="card-button" href="/
|
|
123
|
+
<a class="card-button" href="/settings/content/notion">
|
|
124
124
|
Setup
|
|
125
125
|
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M5 12h14M12 5l7 7-7 7"></path></svg>
|
|
126
126
|
</a>
|
|
@@ -128,7 +128,7 @@
|
|
|
128
128
|
|
|
129
129
|
<div id="clear-notion"
|
|
130
130
|
class="card-action-row"
|
|
131
|
-
style="display: {% if not
|
|
131
|
+
style="display: {% if not enabled_content_source.notion %}none{% endif %}">
|
|
132
132
|
<button class="card-button" onclick="clearContentType('notion')">
|
|
133
133
|
Disable
|
|
134
134
|
</button>
|
|
@@ -181,8 +181,8 @@
|
|
|
181
181
|
</div>
|
|
182
182
|
<div class="card-description-row">
|
|
183
183
|
<select id="chat-models">
|
|
184
|
-
{% for option in
|
|
185
|
-
<option value="{{ option.id }}" {% if option.id ==
|
|
184
|
+
{% for option in chat_model_options %}
|
|
185
|
+
<option value="{{ option.id }}" {% if option.id == selected_chat_model_config %}selected{% endif %}>{{ option.name }}</option>
|
|
186
186
|
{% endfor %}
|
|
187
187
|
</select>
|
|
188
188
|
</div>
|
|
@@ -208,7 +208,7 @@
|
|
|
208
208
|
<div class="card-description-row">
|
|
209
209
|
<select id="paint-models">
|
|
210
210
|
{% for option in paint_model_options %}
|
|
211
|
-
<option value="{{ option.id }}" {% if option.id == selected_paint_model_config %}selected{% endif %}>{{ option.
|
|
211
|
+
<option value="{{ option.id }}" {% if option.id == selected_paint_model_config %}selected{% endif %}>{{ option.name }}</option>
|
|
212
212
|
{% endfor %}
|
|
213
213
|
</select>
|
|
214
214
|
</div>
|
|
@@ -235,7 +235,7 @@
|
|
|
235
235
|
<div class="card-description-row">
|
|
236
236
|
<select id="voice-models">
|
|
237
237
|
{% for option in voice_model_options %}
|
|
238
|
-
<option value="{{ option.id }}" {% if option.id ==
|
|
238
|
+
<option value="{{ option.id }}" {% if option.id == selected_voice_model_config %}selected{% endif %}>{{ option.name }}</option>
|
|
239
239
|
{% endfor %}
|
|
240
240
|
</select>
|
|
241
241
|
</div>
|
|
@@ -394,8 +394,8 @@
|
|
|
394
394
|
|
|
395
395
|
function saveProfileGivenName() {
|
|
396
396
|
const givenName = document.getElementById("profile_given_name").value;
|
|
397
|
-
fetch('/api/
|
|
398
|
-
method: '
|
|
397
|
+
fetch('/api/user/name?name=' + givenName, {
|
|
398
|
+
method: 'PATCH',
|
|
399
399
|
headers: {
|
|
400
400
|
'Content-Type': 'application/json',
|
|
401
401
|
}
|
|
@@ -421,7 +421,7 @@
|
|
|
421
421
|
saveVoiceModelButton.disabled = true;
|
|
422
422
|
saveVoiceModelButton.textContent = "Saving...";
|
|
423
423
|
|
|
424
|
-
fetch('/api/
|
|
424
|
+
fetch('/api/model/voice?id=' + voiceModel, {
|
|
425
425
|
method: 'POST',
|
|
426
426
|
headers: {
|
|
427
427
|
'Content-Type': 'application/json',
|
|
@@ -455,7 +455,7 @@
|
|
|
455
455
|
saveModelButton.innerHTML = "";
|
|
456
456
|
saveModelButton.textContent = "Saving...";
|
|
457
457
|
|
|
458
|
-
fetch('/api/
|
|
458
|
+
fetch('/api/model/chat?id=' + chatModel, {
|
|
459
459
|
method: 'POST',
|
|
460
460
|
headers: {
|
|
461
461
|
'Content-Type': 'application/json',
|
|
@@ -494,7 +494,7 @@
|
|
|
494
494
|
saveSearchModelButton.disabled = true;
|
|
495
495
|
saveSearchModelButton.textContent = "Saving...";
|
|
496
496
|
|
|
497
|
-
fetch('/api/
|
|
497
|
+
fetch('/api/model/search?id=' + searchModel, {
|
|
498
498
|
method: 'POST',
|
|
499
499
|
headers: {
|
|
500
500
|
'Content-Type': 'application/json',
|
|
@@ -526,7 +526,7 @@
|
|
|
526
526
|
saveModelButton.disabled = true;
|
|
527
527
|
saveModelButton.innerHTML = "Saving...";
|
|
528
528
|
|
|
529
|
-
fetch('/api/
|
|
529
|
+
fetch('/api/model/paint?id=' + paintModel, {
|
|
530
530
|
method: 'POST',
|
|
531
531
|
headers: {
|
|
532
532
|
'Content-Type': 'application/json',
|
|
@@ -553,7 +553,7 @@
|
|
|
553
553
|
};
|
|
554
554
|
|
|
555
555
|
function clearContentType(content_source) {
|
|
556
|
-
fetch('/api/
|
|
556
|
+
fetch('/api/content/' + content_source, {
|
|
557
557
|
method: 'DELETE',
|
|
558
558
|
headers: {
|
|
559
559
|
'Content-Type': 'application/json',
|
|
@@ -676,7 +676,7 @@
|
|
|
676
676
|
|
|
677
677
|
content_sources = ["computer", "github", "notion"];
|
|
678
678
|
content_sources.forEach(content_source => {
|
|
679
|
-
fetch(`/api/
|
|
679
|
+
fetch(`/api/content/${content_source}`, {
|
|
680
680
|
method: 'GET',
|
|
681
681
|
headers: {
|
|
682
682
|
'Content-Type': 'application/json',
|
|
@@ -807,7 +807,7 @@
|
|
|
807
807
|
|
|
808
808
|
function getIndexedDataSize() {
|
|
809
809
|
document.getElementById("indexed-data-size").textContent = "Calculating...";
|
|
810
|
-
fetch('/api/
|
|
810
|
+
fetch('/api/content/size')
|
|
811
811
|
.then(response => response.json())
|
|
812
812
|
.then(data => {
|
|
813
813
|
document.getElementById("indexed-data-size").textContent = data.indexed_data_size_in_mb + " MB used";
|
|
@@ -815,7 +815,7 @@
|
|
|
815
815
|
}
|
|
816
816
|
|
|
817
817
|
function removeFile(path) {
|
|
818
|
-
fetch('/api/
|
|
818
|
+
fetch('/api/content/file?filename=' + path, {
|
|
819
819
|
method: 'DELETE',
|
|
820
820
|
headers: {
|
|
821
821
|
'Content-Type': 'application/json',
|
|
@@ -890,7 +890,7 @@
|
|
|
890
890
|
})
|
|
891
891
|
|
|
892
892
|
phonenumberRemoveButton.addEventListener("click", () => {
|
|
893
|
-
fetch('/api/
|
|
893
|
+
fetch('/api/phone', {
|
|
894
894
|
method: 'DELETE',
|
|
895
895
|
headers: {
|
|
896
896
|
'Content-Type': 'application/json',
|
|
@@ -917,7 +917,7 @@
|
|
|
917
917
|
}, 5000);
|
|
918
918
|
} else {
|
|
919
919
|
const mobileNumber = iti.getNumber();
|
|
920
|
-
fetch('/api/
|
|
920
|
+
fetch('/api/phone?phone_number=' + mobileNumber, {
|
|
921
921
|
method: 'POST',
|
|
922
922
|
headers: {
|
|
923
923
|
'Content-Type': 'application/json',
|
|
@@ -970,7 +970,7 @@
|
|
|
970
970
|
return;
|
|
971
971
|
}
|
|
972
972
|
|
|
973
|
-
fetch('/api/
|
|
973
|
+
fetch('/api/phone/verify?code=' + otp, {
|
|
974
974
|
method: 'POST',
|
|
975
975
|
headers: {
|
|
976
976
|
'Content-Type': 'application/json',
|
khoj/interface/web/utils.html
CHANGED
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
{% endif %}
|
|
37
37
|
<div id="khoj-nav-menu" class="khoj-nav-dropdown-content">
|
|
38
38
|
<div class="khoj-nav-username"> {{ username }} </div>
|
|
39
|
-
<a id="settings-nav" class="khoj-nav" href="/
|
|
39
|
+
<a id="settings-nav" class="khoj-nav" href="/settings">Settings</a>
|
|
40
40
|
<a id="github-nav" class="khoj-nav" href="https://github.com/khoj-ai/khoj">GitHub</a>
|
|
41
41
|
<a id="help-nav" class="khoj-nav" href="https://docs.khoj.dev" target="_blank">Help</a>
|
|
42
42
|
<a class="khoj-nav" href="/auth/logout">Logout</a>
|
|
@@ -19,16 +19,11 @@ class DocxToEntries(TextToEntries):
|
|
|
19
19
|
super().__init__()
|
|
20
20
|
|
|
21
21
|
# Define Functions
|
|
22
|
-
def process(
|
|
23
|
-
self, files: dict[str, str] = None, full_corpus: bool = True, user: KhojUser = None, regenerate: bool = False
|
|
24
|
-
) -> Tuple[int, int]:
|
|
22
|
+
def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
|
|
25
23
|
# Extract required fields from config
|
|
26
|
-
if
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
files = {file: files[file] for file in files_to_process}
|
|
30
|
-
else:
|
|
31
|
-
deletion_file_names = None
|
|
24
|
+
deletion_file_names = set([file for file in files if files[file] == b""])
|
|
25
|
+
files_to_process = set(files) - deletion_file_names
|
|
26
|
+
files = {file: files[file] for file in files_to_process}
|
|
32
27
|
|
|
33
28
|
# Extract Entries from specified Docx files
|
|
34
29
|
with timer("Extract entries from specified DOCX files", logger):
|
|
@@ -48,9 +48,7 @@ class GithubToEntries(TextToEntries):
|
|
|
48
48
|
else:
|
|
49
49
|
return
|
|
50
50
|
|
|
51
|
-
def process(
|
|
52
|
-
self, files: dict[str, str] = None, full_corpus: bool = True, user: KhojUser = None, regenerate: bool = False
|
|
53
|
-
) -> Tuple[int, int]:
|
|
51
|
+
def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
|
|
54
52
|
if self.config.pat_token is None or self.config.pat_token == "":
|
|
55
53
|
logger.error(f"Github PAT token is not set. Skipping github content")
|
|
56
54
|
raise ValueError("Github PAT token is not set. Skipping github content")
|
|
@@ -20,16 +20,11 @@ class ImageToEntries(TextToEntries):
|
|
|
20
20
|
super().__init__()
|
|
21
21
|
|
|
22
22
|
# Define Functions
|
|
23
|
-
def process(
|
|
24
|
-
self, files: dict[str, str] = None, full_corpus: bool = True, user: KhojUser = None, regenerate: bool = False
|
|
25
|
-
) -> Tuple[int, int]:
|
|
23
|
+
def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
|
|
26
24
|
# Extract required fields from config
|
|
27
|
-
if
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
files = {file: files[file] for file in files_to_process}
|
|
31
|
-
else:
|
|
32
|
-
deletion_file_names = None
|
|
25
|
+
deletion_file_names = set([file for file in files if files[file] == b""])
|
|
26
|
+
files_to_process = set(files) - deletion_file_names
|
|
27
|
+
files = {file: files[file] for file in files_to_process}
|
|
33
28
|
|
|
34
29
|
# Extract Entries from specified image files
|
|
35
30
|
with timer("Extract entries from specified Image files", logger):
|
|
@@ -19,16 +19,11 @@ class MarkdownToEntries(TextToEntries):
|
|
|
19
19
|
super().__init__()
|
|
20
20
|
|
|
21
21
|
# Define Functions
|
|
22
|
-
def process(
|
|
23
|
-
self, files: dict[str, str] = None, full_corpus: bool = True, user: KhojUser = None, regenerate: bool = False
|
|
24
|
-
) -> Tuple[int, int]:
|
|
22
|
+
def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
|
|
25
23
|
# Extract required fields from config
|
|
26
|
-
if
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
files = {file: files[file] for file in files_to_process}
|
|
30
|
-
else:
|
|
31
|
-
deletion_file_names = None
|
|
24
|
+
deletion_file_names = set([file for file in files if files[file] == ""])
|
|
25
|
+
files_to_process = set(files) - deletion_file_names
|
|
26
|
+
files = {file: files[file] for file in files_to_process}
|
|
32
27
|
|
|
33
28
|
max_tokens = 256
|
|
34
29
|
# Extract Entries from specified Markdown files
|
|
@@ -78,9 +78,7 @@ class NotionToEntries(TextToEntries):
|
|
|
78
78
|
|
|
79
79
|
self.body_params = {"page_size": 100}
|
|
80
80
|
|
|
81
|
-
def process(
|
|
82
|
-
self, files: dict[str, str] = None, full_corpus: bool = True, user: KhojUser = None, regenerate: bool = False
|
|
83
|
-
) -> Tuple[int, int]:
|
|
81
|
+
def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
|
|
84
82
|
current_entries = []
|
|
85
83
|
|
|
86
84
|
# Get all pages
|
|
@@ -20,15 +20,10 @@ class OrgToEntries(TextToEntries):
|
|
|
20
20
|
super().__init__()
|
|
21
21
|
|
|
22
22
|
# Define Functions
|
|
23
|
-
def process(
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
deletion_file_names = set([file for file in files if files[file] == ""])
|
|
28
|
-
files_to_process = set(files) - deletion_file_names
|
|
29
|
-
files = {file: files[file] for file in files_to_process}
|
|
30
|
-
else:
|
|
31
|
-
deletion_file_names = None
|
|
23
|
+
def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
|
|
24
|
+
deletion_file_names = set([file for file in files if files[file] == ""])
|
|
25
|
+
files_to_process = set(files) - deletion_file_names
|
|
26
|
+
files = {file: files[file] for file in files_to_process}
|
|
32
27
|
|
|
33
28
|
# Extract Entries from specified Org files
|
|
34
29
|
max_tokens = 256
|
|
@@ -22,16 +22,11 @@ class PdfToEntries(TextToEntries):
|
|
|
22
22
|
super().__init__()
|
|
23
23
|
|
|
24
24
|
# Define Functions
|
|
25
|
-
def process(
|
|
26
|
-
self, files: dict[str, str] = None, full_corpus: bool = True, user: KhojUser = None, regenerate: bool = False
|
|
27
|
-
) -> Tuple[int, int]:
|
|
25
|
+
def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
|
|
28
26
|
# Extract required fields from config
|
|
29
|
-
if
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
files = {file: files[file] for file in files_to_process}
|
|
33
|
-
else:
|
|
34
|
-
deletion_file_names = None
|
|
27
|
+
deletion_file_names = set([file for file in files if files[file] == b""])
|
|
28
|
+
files_to_process = set(files) - deletion_file_names
|
|
29
|
+
files = {file: files[file] for file in files_to_process}
|
|
35
30
|
|
|
36
31
|
# Extract Entries from specified Pdf files
|
|
37
32
|
with timer("Extract entries from specified PDF files", logger):
|
|
@@ -20,15 +20,10 @@ class PlaintextToEntries(TextToEntries):
|
|
|
20
20
|
super().__init__()
|
|
21
21
|
|
|
22
22
|
# Define Functions
|
|
23
|
-
def process(
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
deletion_file_names = set([file for file in files if files[file] == ""])
|
|
28
|
-
files_to_process = set(files) - deletion_file_names
|
|
29
|
-
files = {file: files[file] for file in files_to_process}
|
|
30
|
-
else:
|
|
31
|
-
deletion_file_names = None
|
|
23
|
+
def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
|
|
24
|
+
deletion_file_names = set([file for file in files if files[file] == ""])
|
|
25
|
+
files_to_process = set(files) - deletion_file_names
|
|
26
|
+
files = {file: files[file] for file in files_to_process}
|
|
32
27
|
|
|
33
28
|
# Extract Entries from specified plaintext files
|
|
34
29
|
with timer("Extract entries from specified Plaintext files", logger):
|
|
@@ -31,9 +31,7 @@ class TextToEntries(ABC):
|
|
|
31
31
|
self.date_filter = DateFilter()
|
|
32
32
|
|
|
33
33
|
@abstractmethod
|
|
34
|
-
def process(
|
|
35
|
-
self, files: dict[str, str] = None, full_corpus: bool = True, user: KhojUser = None, regenerate: bool = False
|
|
36
|
-
) -> Tuple[int, int]:
|
|
34
|
+
def process(self, files: dict[str, str] = None, user: KhojUser = None, regenerate: bool = False) -> Tuple[int, int]:
|
|
37
35
|
...
|
|
38
36
|
|
|
39
37
|
@staticmethod
|
|
@@ -36,7 +36,7 @@ def extract_questions_anthropic(
|
|
|
36
36
|
# Extract Past User Message and Inferred Questions from Conversation Log
|
|
37
37
|
chat_history = "".join(
|
|
38
38
|
[
|
|
39
|
-
f'
|
|
39
|
+
f'User: {chat["intent"]["query"]}\nAssistant: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
|
|
40
40
|
for chat in conversation_log.get("chat", [])[-4:]
|
|
41
41
|
if chat["by"] == "khoj" and "text-to-image" not in chat["intent"].get("type")
|
|
42
42
|
]
|
|
@@ -135,17 +135,23 @@ def converse_anthropic(
|
|
|
135
135
|
Converse with user using Anthropic's Claude
|
|
136
136
|
"""
|
|
137
137
|
# Initialize Variables
|
|
138
|
-
current_date = datetime.now()
|
|
138
|
+
current_date = datetime.now()
|
|
139
139
|
compiled_references = "\n\n".join({f"# {item}" for item in references})
|
|
140
140
|
|
|
141
141
|
conversation_primer = prompts.query_prompt.format(query=user_query)
|
|
142
142
|
|
|
143
143
|
if agent and agent.personality:
|
|
144
144
|
system_prompt = prompts.custom_personality.format(
|
|
145
|
-
name=agent.name,
|
|
145
|
+
name=agent.name,
|
|
146
|
+
bio=agent.personality,
|
|
147
|
+
current_date=current_date.strftime("%Y-%m-%d"),
|
|
148
|
+
day_of_week=current_date.strftime("%A"),
|
|
146
149
|
)
|
|
147
150
|
else:
|
|
148
|
-
system_prompt = prompts.personality.format(
|
|
151
|
+
system_prompt = prompts.personality.format(
|
|
152
|
+
current_date=current_date.strftime("%Y-%m-%d"),
|
|
153
|
+
day_of_week=current_date.strftime("%A"),
|
|
154
|
+
)
|
|
149
155
|
|
|
150
156
|
if location_data:
|
|
151
157
|
location = f"{location_data.city}, {location_data.region}, {location_data.country}"
|
|
@@ -55,6 +55,7 @@ def extract_questions_offline(
|
|
|
55
55
|
chat_history += f"Q: {chat['intent']['query']}\n"
|
|
56
56
|
chat_history += f"Khoj: {chat['message']}\n\n"
|
|
57
57
|
|
|
58
|
+
# Get dates relative to today for prompt creation
|
|
58
59
|
today = datetime.today()
|
|
59
60
|
yesterday = (today - timedelta(days=1)).strftime("%Y-%m-%d")
|
|
60
61
|
last_year = today.year - 1
|
|
@@ -62,11 +63,13 @@ def extract_questions_offline(
|
|
|
62
63
|
query=text,
|
|
63
64
|
chat_history=chat_history,
|
|
64
65
|
current_date=today.strftime("%Y-%m-%d"),
|
|
66
|
+
day_of_week=today.strftime("%A"),
|
|
65
67
|
yesterday_date=yesterday,
|
|
66
68
|
last_year=last_year,
|
|
67
69
|
this_year=today.year,
|
|
68
70
|
location=location,
|
|
69
71
|
)
|
|
72
|
+
|
|
70
73
|
messages = generate_chatml_messages_with_context(
|
|
71
74
|
example_questions, model_name=model, loaded_model=offline_chat_model, max_prompt_size=max_prompt_size
|
|
72
75
|
)
|
|
@@ -74,7 +77,7 @@ def extract_questions_offline(
|
|
|
74
77
|
state.chat_lock.acquire()
|
|
75
78
|
try:
|
|
76
79
|
response = send_message_to_model_offline(
|
|
77
|
-
messages, loaded_model=offline_chat_model, max_prompt_size=max_prompt_size
|
|
80
|
+
messages, loaded_model=offline_chat_model, model=model, max_prompt_size=max_prompt_size
|
|
78
81
|
)
|
|
79
82
|
finally:
|
|
80
83
|
state.chat_lock.release()
|
|
@@ -96,7 +99,7 @@ def extract_questions_offline(
|
|
|
96
99
|
except:
|
|
97
100
|
logger.warning(f"Llama returned invalid JSON. Falling back to using user message as search query.\n{response}")
|
|
98
101
|
return all_questions
|
|
99
|
-
logger.debug(f"
|
|
102
|
+
logger.debug(f"Questions extracted by {model}: {questions}")
|
|
100
103
|
return questions
|
|
101
104
|
|
|
102
105
|
|
|
@@ -144,14 +147,20 @@ def converse_offline(
|
|
|
144
147
|
offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
|
|
145
148
|
compiled_references_message = "\n\n".join({f"{item['compiled']}" for item in references})
|
|
146
149
|
|
|
147
|
-
current_date = datetime.now()
|
|
150
|
+
current_date = datetime.now()
|
|
148
151
|
|
|
149
152
|
if agent and agent.personality:
|
|
150
153
|
system_prompt = prompts.custom_system_prompt_offline_chat.format(
|
|
151
|
-
name=agent.name,
|
|
154
|
+
name=agent.name,
|
|
155
|
+
bio=agent.personality,
|
|
156
|
+
current_date=current_date.strftime("%Y-%m-%d"),
|
|
157
|
+
day_of_week=current_date.strftime("%A"),
|
|
152
158
|
)
|
|
153
159
|
else:
|
|
154
|
-
system_prompt = prompts.system_prompt_offline_chat.format(
|
|
160
|
+
system_prompt = prompts.system_prompt_offline_chat.format(
|
|
161
|
+
current_date=current_date.strftime("%Y-%m-%d"),
|
|
162
|
+
day_of_week=current_date.strftime("%A"),
|
|
163
|
+
)
|
|
155
164
|
|
|
156
165
|
conversation_primer = prompts.query_prompt.format(query=user_query)
|
|
157
166
|
|
|
@@ -177,9 +186,9 @@ def converse_offline(
|
|
|
177
186
|
if online_results[result].get("webpages"):
|
|
178
187
|
simplified_online_results[result] = online_results[result]["webpages"]
|
|
179
188
|
|
|
180
|
-
conversation_primer = f"{prompts.
|
|
189
|
+
conversation_primer = f"{prompts.online_search_conversation_offline.format(online_results=str(simplified_online_results))}\n{conversation_primer}"
|
|
181
190
|
if not is_none_or_empty(compiled_references_message):
|
|
182
|
-
conversation_primer = f"{prompts.notes_conversation_offline.format(references=compiled_references_message)}\n{conversation_primer}"
|
|
191
|
+
conversation_primer = f"{prompts.notes_conversation_offline.format(references=compiled_references_message)}\n\n{conversation_primer}"
|
|
183
192
|
|
|
184
193
|
# Setup Prompt with Primer or Conversation History
|
|
185
194
|
messages = generate_chatml_messages_with_context(
|
|
@@ -192,6 +201,9 @@ def converse_offline(
|
|
|
192
201
|
tokenizer_name=tokenizer_name,
|
|
193
202
|
)
|
|
194
203
|
|
|
204
|
+
truncated_messages = "\n".join({f"{message.content[:70]}..." for message in messages})
|
|
205
|
+
logger.debug(f"Conversation Context for {model}: {truncated_messages}")
|
|
206
|
+
|
|
195
207
|
g = ThreadedGenerator(references, online_results, completion_func=completion_func)
|
|
196
208
|
t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size))
|
|
197
209
|
t.start()
|
|
@@ -24,6 +24,8 @@ def download_model(repo_id: str, filename: str = "*Q4_K_M.gguf", max_tokens: int
|
|
|
24
24
|
# Add chat format if known
|
|
25
25
|
if "llama-3" in repo_id.lower():
|
|
26
26
|
kwargs["chat_format"] = "llama-3"
|
|
27
|
+
elif "gemma-2" in repo_id.lower():
|
|
28
|
+
kwargs["chat_format"] = "gemma"
|
|
27
29
|
|
|
28
30
|
# Check if the model is already downloaded
|
|
29
31
|
model_path = load_model_from_cache(repo_id, filename)
|
|
@@ -125,17 +125,23 @@ def converse(
|
|
|
125
125
|
Converse with user using OpenAI's ChatGPT
|
|
126
126
|
"""
|
|
127
127
|
# Initialize Variables
|
|
128
|
-
current_date = datetime.now()
|
|
128
|
+
current_date = datetime.now()
|
|
129
129
|
compiled_references = "\n\n".join({f"# {item['compiled']}" for item in references})
|
|
130
130
|
|
|
131
131
|
conversation_primer = prompts.query_prompt.format(query=user_query)
|
|
132
132
|
|
|
133
133
|
if agent and agent.personality:
|
|
134
134
|
system_prompt = prompts.custom_personality.format(
|
|
135
|
-
name=agent.name,
|
|
135
|
+
name=agent.name,
|
|
136
|
+
bio=agent.personality,
|
|
137
|
+
current_date=current_date.strftime("%Y-%m-%d"),
|
|
138
|
+
day_of_week=current_date.strftime("%A"),
|
|
136
139
|
)
|
|
137
140
|
else:
|
|
138
|
-
system_prompt = prompts.personality.format(
|
|
141
|
+
system_prompt = prompts.personality.format(
|
|
142
|
+
current_date=current_date.strftime("%Y-%m-%d"),
|
|
143
|
+
day_of_week=current_date.strftime("%A"),
|
|
144
|
+
)
|
|
139
145
|
|
|
140
146
|
if location_data:
|
|
141
147
|
location = f"{location_data.city}, {location_data.region}, {location_data.country}"
|