khoj 1.16.1.dev15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- khoj/__init__.py +0 -0
- khoj/app/README.md +94 -0
- khoj/app/__init__.py +0 -0
- khoj/app/asgi.py +16 -0
- khoj/app/settings.py +192 -0
- khoj/app/urls.py +25 -0
- khoj/configure.py +424 -0
- khoj/database/__init__.py +0 -0
- khoj/database/adapters/__init__.py +1234 -0
- khoj/database/admin.py +290 -0
- khoj/database/apps.py +6 -0
- khoj/database/management/__init__.py +0 -0
- khoj/database/management/commands/__init__.py +0 -0
- khoj/database/management/commands/change_generated_images_url.py +61 -0
- khoj/database/management/commands/convert_images_png_to_webp.py +99 -0
- khoj/database/migrations/0001_khojuser.py +98 -0
- khoj/database/migrations/0002_googleuser.py +32 -0
- khoj/database/migrations/0003_vector_extension.py +10 -0
- khoj/database/migrations/0004_content_types_and_more.py +181 -0
- khoj/database/migrations/0005_embeddings_corpus_id.py +19 -0
- khoj/database/migrations/0006_embeddingsdates.py +33 -0
- khoj/database/migrations/0007_add_conversation.py +27 -0
- khoj/database/migrations/0008_alter_conversation_conversation_log.py +17 -0
- khoj/database/migrations/0009_khojapiuser.py +24 -0
- khoj/database/migrations/0010_chatmodeloptions_and_more.py +83 -0
- khoj/database/migrations/0010_rename_embeddings_entry_and_more.py +30 -0
- khoj/database/migrations/0011_merge_20231102_0138.py +14 -0
- khoj/database/migrations/0012_entry_file_source.py +21 -0
- khoj/database/migrations/0013_subscription.py +37 -0
- khoj/database/migrations/0014_alter_googleuser_picture.py +17 -0
- khoj/database/migrations/0015_alter_subscription_user.py +21 -0
- khoj/database/migrations/0016_alter_subscription_renewal_date.py +17 -0
- khoj/database/migrations/0017_searchmodel.py +32 -0
- khoj/database/migrations/0018_searchmodelconfig_delete_searchmodel.py +30 -0
- khoj/database/migrations/0019_alter_googleuser_family_name_and_more.py +27 -0
- khoj/database/migrations/0020_reflectivequestion.py +36 -0
- khoj/database/migrations/0021_speechtotextmodeloptions_and_more.py +42 -0
- khoj/database/migrations/0022_texttoimagemodelconfig.py +25 -0
- khoj/database/migrations/0023_usersearchmodelconfig.py +33 -0
- khoj/database/migrations/0024_alter_entry_embeddings.py +18 -0
- khoj/database/migrations/0025_clientapplication_khojuser_phone_number_and_more.py +46 -0
- khoj/database/migrations/0025_searchmodelconfig_embeddings_inference_endpoint_and_more.py +22 -0
- khoj/database/migrations/0026_searchmodelconfig_cross_encoder_inference_endpoint_and_more.py +22 -0
- khoj/database/migrations/0027_merge_20240118_1324.py +13 -0
- khoj/database/migrations/0028_khojuser_verified_phone_number.py +17 -0
- khoj/database/migrations/0029_userrequests.py +27 -0
- khoj/database/migrations/0030_conversation_slug_and_title.py +38 -0
- khoj/database/migrations/0031_agent_conversation_agent.py +53 -0
- khoj/database/migrations/0031_alter_googleuser_locale.py +30 -0
- khoj/database/migrations/0032_merge_20240322_0427.py +14 -0
- khoj/database/migrations/0033_rename_tuning_agent_personality.py +17 -0
- khoj/database/migrations/0034_alter_chatmodeloptions_chat_model.py +32 -0
- khoj/database/migrations/0035_processlock.py +26 -0
- khoj/database/migrations/0036_alter_processlock_name.py +19 -0
- khoj/database/migrations/0036_delete_offlinechatprocessorconversationconfig.py +15 -0
- khoj/database/migrations/0036_publicconversation.py +42 -0
- khoj/database/migrations/0037_chatmodeloptions_openai_config_and_more.py +51 -0
- khoj/database/migrations/0037_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +32 -0
- khoj/database/migrations/0038_merge_20240425_0857.py +14 -0
- khoj/database/migrations/0038_merge_20240426_1640.py +12 -0
- khoj/database/migrations/0039_merge_20240501_0301.py +12 -0
- khoj/database/migrations/0040_alter_processlock_name.py +26 -0
- khoj/database/migrations/0040_merge_20240504_1010.py +14 -0
- khoj/database/migrations/0041_merge_20240505_1234.py +14 -0
- khoj/database/migrations/0042_serverchatsettings.py +46 -0
- khoj/database/migrations/0043_alter_chatmodeloptions_model_type.py +21 -0
- khoj/database/migrations/0044_conversation_file_filters.py +17 -0
- khoj/database/migrations/0045_fileobject.py +37 -0
- khoj/database/migrations/0046_khojuser_email_verification_code_and_more.py +22 -0
- khoj/database/migrations/0047_alter_entry_file_type.py +31 -0
- khoj/database/migrations/0048_voicemodeloption_uservoicemodelconfig.py +52 -0
- khoj/database/migrations/0049_datastore.py +38 -0
- khoj/database/migrations/0049_texttoimagemodelconfig_api_key_and_more.py +58 -0
- khoj/database/migrations/0050_alter_processlock_name.py +25 -0
- khoj/database/migrations/0051_merge_20240702_1220.py +14 -0
- khoj/database/migrations/0052_alter_searchmodelconfig_bi_encoder_docs_encode_config_and_more.py +27 -0
- khoj/database/migrations/__init__.py +0 -0
- khoj/database/models/__init__.py +402 -0
- khoj/database/tests.py +3 -0
- khoj/interface/email/feedback.html +34 -0
- khoj/interface/email/magic_link.html +17 -0
- khoj/interface/email/task.html +40 -0
- khoj/interface/email/welcome.html +61 -0
- khoj/interface/web/404.html +56 -0
- khoj/interface/web/agent.html +312 -0
- khoj/interface/web/agents.html +276 -0
- khoj/interface/web/assets/icons/agents.svg +6 -0
- khoj/interface/web/assets/icons/automation.svg +37 -0
- khoj/interface/web/assets/icons/cancel.svg +3 -0
- khoj/interface/web/assets/icons/chat.svg +24 -0
- khoj/interface/web/assets/icons/collapse.svg +17 -0
- khoj/interface/web/assets/icons/computer.png +0 -0
- khoj/interface/web/assets/icons/confirm-icon.svg +1 -0
- khoj/interface/web/assets/icons/copy-button-success.svg +6 -0
- khoj/interface/web/assets/icons/copy-button.svg +5 -0
- khoj/interface/web/assets/icons/credit-card.png +0 -0
- khoj/interface/web/assets/icons/delete.svg +26 -0
- khoj/interface/web/assets/icons/docx.svg +7 -0
- khoj/interface/web/assets/icons/edit.svg +4 -0
- khoj/interface/web/assets/icons/favicon-128x128.ico +0 -0
- khoj/interface/web/assets/icons/favicon-128x128.png +0 -0
- khoj/interface/web/assets/icons/favicon-256x256.png +0 -0
- khoj/interface/web/assets/icons/favicon.icns +0 -0
- khoj/interface/web/assets/icons/github.svg +1 -0
- khoj/interface/web/assets/icons/key.svg +4 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-200.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways-500.png +0 -0
- khoj/interface/web/assets/icons/khoj-logo-sideways.svg +5385 -0
- khoj/interface/web/assets/icons/logotype.svg +1 -0
- khoj/interface/web/assets/icons/markdown.svg +1 -0
- khoj/interface/web/assets/icons/new.svg +23 -0
- khoj/interface/web/assets/icons/notion.svg +4 -0
- khoj/interface/web/assets/icons/openai-logomark.svg +1 -0
- khoj/interface/web/assets/icons/org.svg +1 -0
- khoj/interface/web/assets/icons/pdf.svg +23 -0
- khoj/interface/web/assets/icons/pencil-edit.svg +5 -0
- khoj/interface/web/assets/icons/plaintext.svg +1 -0
- khoj/interface/web/assets/icons/question-mark-icon.svg +1 -0
- khoj/interface/web/assets/icons/search.svg +25 -0
- khoj/interface/web/assets/icons/send.svg +1 -0
- khoj/interface/web/assets/icons/share.svg +8 -0
- khoj/interface/web/assets/icons/speaker.svg +4 -0
- khoj/interface/web/assets/icons/stop-solid.svg +37 -0
- khoj/interface/web/assets/icons/sync.svg +4 -0
- khoj/interface/web/assets/icons/thumbs-down-svgrepo-com.svg +6 -0
- khoj/interface/web/assets/icons/thumbs-up-svgrepo-com.svg +6 -0
- khoj/interface/web/assets/icons/user-silhouette.svg +4 -0
- khoj/interface/web/assets/icons/voice.svg +8 -0
- khoj/interface/web/assets/icons/web.svg +2 -0
- khoj/interface/web/assets/icons/whatsapp.svg +17 -0
- khoj/interface/web/assets/khoj.css +237 -0
- khoj/interface/web/assets/markdown-it.min.js +8476 -0
- khoj/interface/web/assets/natural-cron.min.js +1 -0
- khoj/interface/web/assets/org.min.js +1823 -0
- khoj/interface/web/assets/pico.min.css +5 -0
- khoj/interface/web/assets/purify.min.js +3 -0
- khoj/interface/web/assets/samples/desktop-browse-draw-sample.png +0 -0
- khoj/interface/web/assets/samples/desktop-plain-chat-sample.png +0 -0
- khoj/interface/web/assets/samples/desktop-remember-plan-sample.png +0 -0
- khoj/interface/web/assets/samples/phone-browse-draw-sample.png +0 -0
- khoj/interface/web/assets/samples/phone-plain-chat-sample.png +0 -0
- khoj/interface/web/assets/samples/phone-remember-plan-sample.png +0 -0
- khoj/interface/web/assets/utils.js +33 -0
- khoj/interface/web/base_config.html +445 -0
- khoj/interface/web/chat.html +3546 -0
- khoj/interface/web/config.html +1011 -0
- khoj/interface/web/config_automation.html +1103 -0
- khoj/interface/web/content_source_computer_input.html +139 -0
- khoj/interface/web/content_source_github_input.html +216 -0
- khoj/interface/web/content_source_notion_input.html +94 -0
- khoj/interface/web/khoj.webmanifest +51 -0
- khoj/interface/web/login.html +219 -0
- khoj/interface/web/public_conversation.html +2006 -0
- khoj/interface/web/search.html +470 -0
- khoj/interface/web/utils.html +48 -0
- khoj/main.py +241 -0
- khoj/manage.py +22 -0
- khoj/migrations/__init__.py +0 -0
- khoj/migrations/migrate_offline_chat_default_model.py +69 -0
- khoj/migrations/migrate_offline_chat_default_model_2.py +71 -0
- khoj/migrations/migrate_offline_chat_schema.py +83 -0
- khoj/migrations/migrate_offline_model.py +29 -0
- khoj/migrations/migrate_processor_config_openai.py +67 -0
- khoj/migrations/migrate_server_pg.py +138 -0
- khoj/migrations/migrate_version.py +17 -0
- khoj/processor/__init__.py +0 -0
- khoj/processor/content/__init__.py +0 -0
- khoj/processor/content/docx/__init__.py +0 -0
- khoj/processor/content/docx/docx_to_entries.py +110 -0
- khoj/processor/content/github/__init__.py +0 -0
- khoj/processor/content/github/github_to_entries.py +224 -0
- khoj/processor/content/images/__init__.py +0 -0
- khoj/processor/content/images/image_to_entries.py +118 -0
- khoj/processor/content/markdown/__init__.py +0 -0
- khoj/processor/content/markdown/markdown_to_entries.py +165 -0
- khoj/processor/content/notion/notion_to_entries.py +260 -0
- khoj/processor/content/org_mode/__init__.py +0 -0
- khoj/processor/content/org_mode/org_to_entries.py +231 -0
- khoj/processor/content/org_mode/orgnode.py +532 -0
- khoj/processor/content/pdf/__init__.py +0 -0
- khoj/processor/content/pdf/pdf_to_entries.py +116 -0
- khoj/processor/content/plaintext/__init__.py +0 -0
- khoj/processor/content/plaintext/plaintext_to_entries.py +122 -0
- khoj/processor/content/text_to_entries.py +297 -0
- khoj/processor/conversation/__init__.py +0 -0
- khoj/processor/conversation/anthropic/__init__.py +0 -0
- khoj/processor/conversation/anthropic/anthropic_chat.py +206 -0
- khoj/processor/conversation/anthropic/utils.py +114 -0
- khoj/processor/conversation/offline/__init__.py +0 -0
- khoj/processor/conversation/offline/chat_model.py +231 -0
- khoj/processor/conversation/offline/utils.py +78 -0
- khoj/processor/conversation/offline/whisper.py +15 -0
- khoj/processor/conversation/openai/__init__.py +0 -0
- khoj/processor/conversation/openai/gpt.py +187 -0
- khoj/processor/conversation/openai/utils.py +129 -0
- khoj/processor/conversation/openai/whisper.py +13 -0
- khoj/processor/conversation/prompts.py +758 -0
- khoj/processor/conversation/utils.py +262 -0
- khoj/processor/embeddings.py +117 -0
- khoj/processor/speech/__init__.py +0 -0
- khoj/processor/speech/text_to_speech.py +51 -0
- khoj/processor/tools/__init__.py +0 -0
- khoj/processor/tools/online_search.py +225 -0
- khoj/routers/__init__.py +0 -0
- khoj/routers/api.py +626 -0
- khoj/routers/api_agents.py +43 -0
- khoj/routers/api_chat.py +1180 -0
- khoj/routers/api_config.py +434 -0
- khoj/routers/api_phone.py +86 -0
- khoj/routers/auth.py +181 -0
- khoj/routers/email.py +133 -0
- khoj/routers/helpers.py +1188 -0
- khoj/routers/indexer.py +349 -0
- khoj/routers/notion.py +91 -0
- khoj/routers/storage.py +35 -0
- khoj/routers/subscription.py +104 -0
- khoj/routers/twilio.py +36 -0
- khoj/routers/web_client.py +471 -0
- khoj/search_filter/__init__.py +0 -0
- khoj/search_filter/base_filter.py +15 -0
- khoj/search_filter/date_filter.py +217 -0
- khoj/search_filter/file_filter.py +30 -0
- khoj/search_filter/word_filter.py +29 -0
- khoj/search_type/__init__.py +0 -0
- khoj/search_type/text_search.py +241 -0
- khoj/utils/__init__.py +0 -0
- khoj/utils/cli.py +93 -0
- khoj/utils/config.py +81 -0
- khoj/utils/constants.py +24 -0
- khoj/utils/fs_syncer.py +249 -0
- khoj/utils/helpers.py +418 -0
- khoj/utils/initialization.py +146 -0
- khoj/utils/jsonl.py +43 -0
- khoj/utils/models.py +47 -0
- khoj/utils/rawconfig.py +160 -0
- khoj/utils/state.py +46 -0
- khoj/utils/yaml.py +43 -0
- khoj-1.16.1.dev15.dist-info/METADATA +178 -0
- khoj-1.16.1.dev15.dist-info/RECORD +242 -0
- khoj-1.16.1.dev15.dist-info/WHEEL +4 -0
- khoj-1.16.1.dev15.dist-info/entry_points.txt +2 -0
- khoj-1.16.1.dev15.dist-info/licenses/LICENSE +661 -0
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from threading import Thread
|
|
3
|
+
from typing import Dict, List
|
|
4
|
+
|
|
5
|
+
import anthropic
|
|
6
|
+
from tenacity import (
|
|
7
|
+
before_sleep_log,
|
|
8
|
+
retry,
|
|
9
|
+
stop_after_attempt,
|
|
10
|
+
wait_exponential,
|
|
11
|
+
wait_random_exponential,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
from khoj.processor.conversation.utils import ThreadedGenerator
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
anthropic_clients: Dict[str, anthropic.Anthropic] = {}
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
DEFAULT_MAX_TOKENS_ANTHROPIC = 3000
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@retry(
|
|
25
|
+
wait=wait_random_exponential(min=1, max=10),
|
|
26
|
+
stop=stop_after_attempt(2),
|
|
27
|
+
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
|
28
|
+
reraise=True,
|
|
29
|
+
)
|
|
30
|
+
def anthropic_completion_with_backoff(
|
|
31
|
+
messages, system_prompt, model_name, temperature=0, api_key=None, model_kwargs=None, max_tokens=None
|
|
32
|
+
) -> str:
|
|
33
|
+
if api_key not in anthropic_clients:
|
|
34
|
+
client: anthropic.Anthropic = anthropic.Anthropic(api_key=api_key)
|
|
35
|
+
anthropic_clients[api_key] = client
|
|
36
|
+
else:
|
|
37
|
+
client = anthropic_clients[api_key]
|
|
38
|
+
|
|
39
|
+
formatted_messages = [{"role": message.role, "content": message.content} for message in messages]
|
|
40
|
+
|
|
41
|
+
aggregated_response = ""
|
|
42
|
+
max_tokens = max_tokens or DEFAULT_MAX_TOKENS_ANTHROPIC
|
|
43
|
+
|
|
44
|
+
model_kwargs = model_kwargs or dict()
|
|
45
|
+
if system_prompt:
|
|
46
|
+
model_kwargs["system"] = system_prompt
|
|
47
|
+
|
|
48
|
+
with client.messages.stream(
|
|
49
|
+
messages=formatted_messages,
|
|
50
|
+
model=model_name, # type: ignore
|
|
51
|
+
temperature=temperature,
|
|
52
|
+
timeout=20,
|
|
53
|
+
max_tokens=max_tokens,
|
|
54
|
+
**(model_kwargs),
|
|
55
|
+
) as stream:
|
|
56
|
+
for text in stream.text_stream:
|
|
57
|
+
aggregated_response += text
|
|
58
|
+
|
|
59
|
+
return aggregated_response
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@retry(
|
|
63
|
+
wait=wait_exponential(multiplier=1, min=4, max=10),
|
|
64
|
+
stop=stop_after_attempt(2),
|
|
65
|
+
before_sleep=before_sleep_log(logger, logging.DEBUG),
|
|
66
|
+
reraise=True,
|
|
67
|
+
)
|
|
68
|
+
def anthropic_chat_completion_with_backoff(
|
|
69
|
+
messages,
|
|
70
|
+
compiled_references,
|
|
71
|
+
online_results,
|
|
72
|
+
model_name,
|
|
73
|
+
temperature,
|
|
74
|
+
api_key,
|
|
75
|
+
system_prompt,
|
|
76
|
+
max_prompt_size=None,
|
|
77
|
+
completion_func=None,
|
|
78
|
+
model_kwargs=None,
|
|
79
|
+
):
|
|
80
|
+
g = ThreadedGenerator(compiled_references, online_results, completion_func=completion_func)
|
|
81
|
+
t = Thread(
|
|
82
|
+
target=anthropic_llm_thread,
|
|
83
|
+
args=(g, messages, system_prompt, model_name, temperature, api_key, max_prompt_size, model_kwargs),
|
|
84
|
+
)
|
|
85
|
+
t.start()
|
|
86
|
+
return g
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def anthropic_llm_thread(
|
|
90
|
+
g, messages, system_prompt, model_name, temperature, api_key, max_prompt_size=None, model_kwargs=None
|
|
91
|
+
):
|
|
92
|
+
if api_key not in anthropic_clients:
|
|
93
|
+
client: anthropic.Anthropic = anthropic.Anthropic(api_key=api_key)
|
|
94
|
+
anthropic_clients[api_key] = client
|
|
95
|
+
else:
|
|
96
|
+
client: anthropic.Anthropic = anthropic_clients[api_key]
|
|
97
|
+
|
|
98
|
+
formatted_messages: List[anthropic.types.MessageParam] = [
|
|
99
|
+
anthropic.types.MessageParam(role=message.role, content=message.content) for message in messages
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
with client.messages.stream(
|
|
103
|
+
messages=formatted_messages,
|
|
104
|
+
model=model_name, # type: ignore
|
|
105
|
+
temperature=temperature,
|
|
106
|
+
system=system_prompt,
|
|
107
|
+
timeout=20,
|
|
108
|
+
max_tokens=DEFAULT_MAX_TOKENS_ANTHROPIC,
|
|
109
|
+
**(model_kwargs or dict()),
|
|
110
|
+
) as stream:
|
|
111
|
+
for text in stream.text_stream:
|
|
112
|
+
g.send(text)
|
|
113
|
+
|
|
114
|
+
g.close()
|
|
File without changes
|
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from datetime import datetime, timedelta
|
|
4
|
+
from threading import Thread
|
|
5
|
+
from typing import Any, Iterator, List, Union
|
|
6
|
+
|
|
7
|
+
from langchain.schema import ChatMessage
|
|
8
|
+
from llama_cpp import Llama
|
|
9
|
+
|
|
10
|
+
from khoj.database.models import Agent
|
|
11
|
+
from khoj.processor.conversation import prompts
|
|
12
|
+
from khoj.processor.conversation.offline.utils import download_model
|
|
13
|
+
from khoj.processor.conversation.utils import (
|
|
14
|
+
ThreadedGenerator,
|
|
15
|
+
generate_chatml_messages_with_context,
|
|
16
|
+
)
|
|
17
|
+
from khoj.utils import state
|
|
18
|
+
from khoj.utils.constants import empty_escape_sequences
|
|
19
|
+
from khoj.utils.helpers import ConversationCommand, is_none_or_empty
|
|
20
|
+
from khoj.utils.rawconfig import LocationData
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def extract_questions_offline(
|
|
26
|
+
text: str,
|
|
27
|
+
model: str = "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF",
|
|
28
|
+
loaded_model: Union[Any, None] = None,
|
|
29
|
+
conversation_log={},
|
|
30
|
+
use_history: bool = True,
|
|
31
|
+
should_extract_questions: bool = True,
|
|
32
|
+
location_data: LocationData = None,
|
|
33
|
+
max_prompt_size: int = None,
|
|
34
|
+
) -> List[str]:
|
|
35
|
+
"""
|
|
36
|
+
Infer search queries to retrieve relevant notes to answer user query
|
|
37
|
+
"""
|
|
38
|
+
all_questions = text.split("? ")
|
|
39
|
+
all_questions = [q + "?" for q in all_questions[:-1]] + [all_questions[-1]]
|
|
40
|
+
|
|
41
|
+
if not should_extract_questions:
|
|
42
|
+
return all_questions
|
|
43
|
+
|
|
44
|
+
assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
|
|
45
|
+
offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
|
|
46
|
+
|
|
47
|
+
location = f"{location_data.city}, {location_data.region}, {location_data.country}" if location_data else "Unknown"
|
|
48
|
+
|
|
49
|
+
# Extract Past User Message and Inferred Questions from Conversation Log
|
|
50
|
+
chat_history = ""
|
|
51
|
+
|
|
52
|
+
if use_history:
|
|
53
|
+
for chat in conversation_log.get("chat", [])[-4:]:
|
|
54
|
+
if chat["by"] == "khoj" and "text-to-image" not in chat["intent"].get("type"):
|
|
55
|
+
chat_history += f"Q: {chat['intent']['query']}\n"
|
|
56
|
+
chat_history += f"Khoj: {chat['message']}\n\n"
|
|
57
|
+
|
|
58
|
+
today = datetime.today()
|
|
59
|
+
yesterday = (today - timedelta(days=1)).strftime("%Y-%m-%d")
|
|
60
|
+
last_year = today.year - 1
|
|
61
|
+
example_questions = prompts.extract_questions_offline.format(
|
|
62
|
+
query=text,
|
|
63
|
+
chat_history=chat_history,
|
|
64
|
+
current_date=today.strftime("%Y-%m-%d"),
|
|
65
|
+
yesterday_date=yesterday,
|
|
66
|
+
last_year=last_year,
|
|
67
|
+
this_year=today.year,
|
|
68
|
+
location=location,
|
|
69
|
+
)
|
|
70
|
+
messages = generate_chatml_messages_with_context(
|
|
71
|
+
example_questions, model_name=model, loaded_model=offline_chat_model, max_prompt_size=max_prompt_size
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
state.chat_lock.acquire()
|
|
75
|
+
try:
|
|
76
|
+
response = send_message_to_model_offline(
|
|
77
|
+
messages, loaded_model=offline_chat_model, max_prompt_size=max_prompt_size
|
|
78
|
+
)
|
|
79
|
+
finally:
|
|
80
|
+
state.chat_lock.release()
|
|
81
|
+
|
|
82
|
+
# Extract, Clean Message from GPT's Response
|
|
83
|
+
try:
|
|
84
|
+
# This will expect to be a list with a single string with a list of questions
|
|
85
|
+
questions_str = (
|
|
86
|
+
str(response)
|
|
87
|
+
.strip(empty_escape_sequences)
|
|
88
|
+
.replace("['", '["')
|
|
89
|
+
.replace("<s>", "")
|
|
90
|
+
.replace("</s>", "")
|
|
91
|
+
.replace("']", '"]')
|
|
92
|
+
.replace("', '", '", "')
|
|
93
|
+
)
|
|
94
|
+
questions: List[str] = json.loads(questions_str)
|
|
95
|
+
questions = filter_questions(questions)
|
|
96
|
+
except:
|
|
97
|
+
logger.warning(f"Llama returned invalid JSON. Falling back to using user message as search query.\n{response}")
|
|
98
|
+
return all_questions
|
|
99
|
+
logger.debug(f"Extracted Questions by Llama: {questions}")
|
|
100
|
+
return questions
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def filter_questions(questions: List[str]):
|
|
104
|
+
# Skip questions that seem to be apologizing for not being able to answer the question
|
|
105
|
+
hint_words = [
|
|
106
|
+
"sorry",
|
|
107
|
+
"apologize",
|
|
108
|
+
"unable",
|
|
109
|
+
"can't",
|
|
110
|
+
"cannot",
|
|
111
|
+
"don't know",
|
|
112
|
+
"don't understand",
|
|
113
|
+
"do not know",
|
|
114
|
+
"do not understand",
|
|
115
|
+
]
|
|
116
|
+
filtered_questions = set()
|
|
117
|
+
for q in questions:
|
|
118
|
+
if not any([word in q.lower() for word in hint_words]) and not is_none_or_empty(q):
|
|
119
|
+
filtered_questions.add(q)
|
|
120
|
+
|
|
121
|
+
return list(filtered_questions)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def converse_offline(
|
|
125
|
+
user_query,
|
|
126
|
+
references=[],
|
|
127
|
+
online_results=[],
|
|
128
|
+
conversation_log={},
|
|
129
|
+
model: str = "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF",
|
|
130
|
+
loaded_model: Union[Any, None] = None,
|
|
131
|
+
completion_func=None,
|
|
132
|
+
conversation_commands=[ConversationCommand.Default],
|
|
133
|
+
max_prompt_size=None,
|
|
134
|
+
tokenizer_name=None,
|
|
135
|
+
location_data: LocationData = None,
|
|
136
|
+
user_name: str = None,
|
|
137
|
+
agent: Agent = None,
|
|
138
|
+
) -> Union[ThreadedGenerator, Iterator[str]]:
|
|
139
|
+
"""
|
|
140
|
+
Converse with user using Llama
|
|
141
|
+
"""
|
|
142
|
+
# Initialize Variables
|
|
143
|
+
assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
|
|
144
|
+
offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
|
|
145
|
+
compiled_references_message = "\n\n".join({f"{item['compiled']}" for item in references})
|
|
146
|
+
|
|
147
|
+
current_date = datetime.now().strftime("%Y-%m-%d")
|
|
148
|
+
|
|
149
|
+
if agent and agent.personality:
|
|
150
|
+
system_prompt = prompts.custom_system_prompt_offline_chat.format(
|
|
151
|
+
name=agent.name, bio=agent.personality, current_date=current_date
|
|
152
|
+
)
|
|
153
|
+
else:
|
|
154
|
+
system_prompt = prompts.system_prompt_offline_chat.format(current_date=current_date)
|
|
155
|
+
|
|
156
|
+
conversation_primer = prompts.query_prompt.format(query=user_query)
|
|
157
|
+
|
|
158
|
+
if location_data:
|
|
159
|
+
location = f"{location_data.city}, {location_data.region}, {location_data.country}"
|
|
160
|
+
location_prompt = prompts.user_location.format(location=location)
|
|
161
|
+
system_prompt = f"{system_prompt}\n{location_prompt}"
|
|
162
|
+
|
|
163
|
+
if user_name:
|
|
164
|
+
user_name_prompt = prompts.user_name.format(name=user_name)
|
|
165
|
+
system_prompt = f"{system_prompt}\n{user_name_prompt}"
|
|
166
|
+
|
|
167
|
+
# Get Conversation Primer appropriate to Conversation Type
|
|
168
|
+
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(compiled_references_message):
|
|
169
|
+
return iter([prompts.no_notes_found.format()])
|
|
170
|
+
elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
|
|
171
|
+
completion_func(chat_response=prompts.no_online_results_found.format())
|
|
172
|
+
return iter([prompts.no_online_results_found.format()])
|
|
173
|
+
|
|
174
|
+
if ConversationCommand.Online in conversation_commands:
|
|
175
|
+
simplified_online_results = online_results.copy()
|
|
176
|
+
for result in online_results:
|
|
177
|
+
if online_results[result].get("webpages"):
|
|
178
|
+
simplified_online_results[result] = online_results[result]["webpages"]
|
|
179
|
+
|
|
180
|
+
conversation_primer = f"{prompts.online_search_conversation.format(online_results=str(simplified_online_results))}\n{conversation_primer}"
|
|
181
|
+
if not is_none_or_empty(compiled_references_message):
|
|
182
|
+
conversation_primer = f"{prompts.notes_conversation_offline.format(references=compiled_references_message)}\n{conversation_primer}"
|
|
183
|
+
|
|
184
|
+
# Setup Prompt with Primer or Conversation History
|
|
185
|
+
messages = generate_chatml_messages_with_context(
|
|
186
|
+
conversation_primer,
|
|
187
|
+
system_prompt,
|
|
188
|
+
conversation_log,
|
|
189
|
+
model_name=model,
|
|
190
|
+
loaded_model=offline_chat_model,
|
|
191
|
+
max_prompt_size=max_prompt_size,
|
|
192
|
+
tokenizer_name=tokenizer_name,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
g = ThreadedGenerator(references, online_results, completion_func=completion_func)
|
|
196
|
+
t = Thread(target=llm_thread, args=(g, messages, offline_chat_model, max_prompt_size))
|
|
197
|
+
t.start()
|
|
198
|
+
return g
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def llm_thread(g, messages: List[ChatMessage], model: Any, max_prompt_size: int = None):
|
|
202
|
+
stop_phrases = ["<s>", "INST]", "Notes:"]
|
|
203
|
+
|
|
204
|
+
state.chat_lock.acquire()
|
|
205
|
+
try:
|
|
206
|
+
response_iterator = send_message_to_model_offline(
|
|
207
|
+
messages, loaded_model=model, stop=stop_phrases, max_prompt_size=max_prompt_size, streaming=True
|
|
208
|
+
)
|
|
209
|
+
for response in response_iterator:
|
|
210
|
+
g.send(response["choices"][0]["delta"].get("content", ""))
|
|
211
|
+
finally:
|
|
212
|
+
state.chat_lock.release()
|
|
213
|
+
g.close()
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def send_message_to_model_offline(
|
|
217
|
+
messages: List[ChatMessage],
|
|
218
|
+
loaded_model=None,
|
|
219
|
+
model="NousResearch/Hermes-2-Pro-Mistral-7B-GGUF",
|
|
220
|
+
streaming=False,
|
|
221
|
+
stop=[],
|
|
222
|
+
max_prompt_size: int = None,
|
|
223
|
+
):
|
|
224
|
+
assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
|
|
225
|
+
offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
|
|
226
|
+
messages_dict = [{"role": message.role, "content": message.content} for message in messages]
|
|
227
|
+
response = offline_chat_model.create_chat_completion(messages_dict, stop=stop, stream=streaming)
|
|
228
|
+
if streaming:
|
|
229
|
+
return response
|
|
230
|
+
else:
|
|
231
|
+
return response["choices"][0]["message"].get("content", "")
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import glob
|
|
2
|
+
import logging
|
|
3
|
+
import math
|
|
4
|
+
import os
|
|
5
|
+
from typing import Any, Dict
|
|
6
|
+
|
|
7
|
+
from huggingface_hub.constants import HF_HUB_CACHE
|
|
8
|
+
|
|
9
|
+
from khoj.utils import state
|
|
10
|
+
from khoj.utils.helpers import get_device_memory
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def download_model(repo_id: str, filename: str = "*Q4_K_M.gguf", max_tokens: int = None):
|
|
16
|
+
# Initialize Model Parameters
|
|
17
|
+
# Use n_ctx=0 to get context size from the model
|
|
18
|
+
kwargs: Dict[str, Any] = {"n_threads": 4, "n_ctx": 0, "verbose": False}
|
|
19
|
+
|
|
20
|
+
# Decide whether to load model to GPU or CPU
|
|
21
|
+
device = "gpu" if state.chat_on_gpu and state.device != "cpu" else "cpu"
|
|
22
|
+
kwargs["n_gpu_layers"] = -1 if device == "gpu" else 0
|
|
23
|
+
|
|
24
|
+
# Add chat format if known
|
|
25
|
+
if "llama-3" in repo_id.lower():
|
|
26
|
+
kwargs["chat_format"] = "llama-3"
|
|
27
|
+
|
|
28
|
+
# Check if the model is already downloaded
|
|
29
|
+
model_path = load_model_from_cache(repo_id, filename)
|
|
30
|
+
chat_model = None
|
|
31
|
+
try:
|
|
32
|
+
chat_model = load_model(model_path, repo_id, filename, kwargs)
|
|
33
|
+
except:
|
|
34
|
+
# Load model on CPU if GPU is not available
|
|
35
|
+
kwargs["n_gpu_layers"], device = 0, "cpu"
|
|
36
|
+
chat_model = load_model(model_path, repo_id, filename, kwargs)
|
|
37
|
+
|
|
38
|
+
# Now load the model with context size set based on:
|
|
39
|
+
# 1. context size supported by model and
|
|
40
|
+
# 2. configured size or machine (V)RAM
|
|
41
|
+
kwargs["n_ctx"] = infer_max_tokens(chat_model.n_ctx(), max_tokens)
|
|
42
|
+
chat_model = load_model(model_path, repo_id, filename, kwargs)
|
|
43
|
+
|
|
44
|
+
logger.debug(
|
|
45
|
+
f"{'Loaded' if model_path else 'Downloaded'} chat model to {device.upper()} with {kwargs['n_ctx']} token context window."
|
|
46
|
+
)
|
|
47
|
+
return chat_model
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def load_model(model_path: str, repo_id: str, filename: str = "*Q4_K_M.gguf", kwargs: dict = {}):
|
|
51
|
+
from llama_cpp.llama import Llama
|
|
52
|
+
|
|
53
|
+
if model_path:
|
|
54
|
+
return Llama(model_path, **kwargs)
|
|
55
|
+
else:
|
|
56
|
+
return Llama.from_pretrained(repo_id=repo_id, filename=filename, **kwargs)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def load_model_from_cache(repo_id: str, filename: str, repo_type="models"):
|
|
60
|
+
# Construct the path to the model file in the cache directory
|
|
61
|
+
repo_org, repo_name = repo_id.split("/")
|
|
62
|
+
object_id = "--".join([repo_type, repo_org, repo_name])
|
|
63
|
+
model_path = os.path.sep.join([HF_HUB_CACHE, object_id, "snapshots", "**", filename])
|
|
64
|
+
|
|
65
|
+
# Check if the model file exists
|
|
66
|
+
paths = glob.glob(model_path)
|
|
67
|
+
if paths:
|
|
68
|
+
return paths[0]
|
|
69
|
+
else:
|
|
70
|
+
return None
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def infer_max_tokens(model_context_window: int, configured_max_tokens=None) -> int:
|
|
74
|
+
"""Infer max prompt size based on device memory and max context window supported by the model"""
|
|
75
|
+
configured_max_tokens = math.inf if configured_max_tokens is None else configured_max_tokens
|
|
76
|
+
vram_based_n_ctx = int(get_device_memory() / 2e6) # based on heuristic
|
|
77
|
+
configured_max_tokens = configured_max_tokens or math.inf # do not use if set to None
|
|
78
|
+
return min(configured_max_tokens, vram_based_n_ctx, model_context_window)
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import whisper
|
|
2
|
+
from asgiref.sync import sync_to_async
|
|
3
|
+
|
|
4
|
+
from khoj.utils import state
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
async def transcribe_audio_offline(audio_filename: str, model: str) -> str:
|
|
8
|
+
"""
|
|
9
|
+
Transcribe audio file offline using Whisper
|
|
10
|
+
"""
|
|
11
|
+
# Send the audio data to the Whisper API
|
|
12
|
+
if not state.whisper_model:
|
|
13
|
+
state.whisper_model = whisper.load_model(model)
|
|
14
|
+
response = await sync_to_async(state.whisper_model.transcribe)(audio_filename)
|
|
15
|
+
return response["text"]
|
|
File without changes
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from datetime import datetime, timedelta
|
|
4
|
+
from typing import Dict, Optional
|
|
5
|
+
|
|
6
|
+
from langchain.schema import ChatMessage
|
|
7
|
+
|
|
8
|
+
from khoj.database.models import Agent
|
|
9
|
+
from khoj.processor.conversation import prompts
|
|
10
|
+
from khoj.processor.conversation.openai.utils import (
|
|
11
|
+
chat_completion_with_backoff,
|
|
12
|
+
completion_with_backoff,
|
|
13
|
+
)
|
|
14
|
+
from khoj.processor.conversation.utils import generate_chatml_messages_with_context
|
|
15
|
+
from khoj.utils.helpers import ConversationCommand, is_none_or_empty
|
|
16
|
+
from khoj.utils.rawconfig import LocationData
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def extract_questions(
|
|
22
|
+
text,
|
|
23
|
+
model: Optional[str] = "gpt-4-turbo-preview",
|
|
24
|
+
conversation_log={},
|
|
25
|
+
api_key=None,
|
|
26
|
+
api_base_url=None,
|
|
27
|
+
temperature=0,
|
|
28
|
+
max_tokens=100,
|
|
29
|
+
location_data: LocationData = None,
|
|
30
|
+
):
|
|
31
|
+
"""
|
|
32
|
+
Infer search queries to retrieve relevant notes to answer user query
|
|
33
|
+
"""
|
|
34
|
+
location = f"{location_data.city}, {location_data.region}, {location_data.country}" if location_data else "Unknown"
|
|
35
|
+
|
|
36
|
+
# Extract Past User Message and Inferred Questions from Conversation Log
|
|
37
|
+
chat_history = "".join(
|
|
38
|
+
[
|
|
39
|
+
f'Q: {chat["intent"]["query"]}\nKhoj: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\nA: {chat["message"]}\n\n'
|
|
40
|
+
for chat in conversation_log.get("chat", [])[-4:]
|
|
41
|
+
if chat["by"] == "khoj" and "text-to-image" not in chat["intent"].get("type")
|
|
42
|
+
]
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Get dates relative to today for prompt creation
|
|
46
|
+
today = datetime.today()
|
|
47
|
+
current_new_year = today.replace(month=1, day=1)
|
|
48
|
+
last_new_year = current_new_year.replace(year=today.year - 1)
|
|
49
|
+
|
|
50
|
+
prompt = prompts.extract_questions.format(
|
|
51
|
+
current_date=today.strftime("%Y-%m-%d"),
|
|
52
|
+
day_of_week=today.strftime("%A"),
|
|
53
|
+
last_new_year=last_new_year.strftime("%Y"),
|
|
54
|
+
last_new_year_date=last_new_year.strftime("%Y-%m-%d"),
|
|
55
|
+
current_new_year_date=current_new_year.strftime("%Y-%m-%d"),
|
|
56
|
+
bob_tom_age_difference={current_new_year.year - 1984 - 30},
|
|
57
|
+
bob_age={current_new_year.year - 1984},
|
|
58
|
+
chat_history=chat_history,
|
|
59
|
+
text=text,
|
|
60
|
+
yesterday_date=(today - timedelta(days=1)).strftime("%Y-%m-%d"),
|
|
61
|
+
location=location,
|
|
62
|
+
)
|
|
63
|
+
messages = [ChatMessage(content=prompt, role="user")]
|
|
64
|
+
|
|
65
|
+
# Get Response from GPT
|
|
66
|
+
response = completion_with_backoff(
|
|
67
|
+
messages=messages,
|
|
68
|
+
model=model,
|
|
69
|
+
temperature=temperature,
|
|
70
|
+
api_base_url=api_base_url,
|
|
71
|
+
model_kwargs={"response_format": {"type": "json_object"}},
|
|
72
|
+
openai_api_key=api_key,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# Extract, Clean Message from GPT's Response
|
|
76
|
+
try:
|
|
77
|
+
response = response.strip()
|
|
78
|
+
response = json.loads(response)
|
|
79
|
+
response = [q.strip() for q in response["queries"] if q.strip()]
|
|
80
|
+
if not isinstance(response, list) or not response:
|
|
81
|
+
logger.error(f"Invalid response for constructing subqueries: {response}")
|
|
82
|
+
return [text]
|
|
83
|
+
return response
|
|
84
|
+
except:
|
|
85
|
+
logger.warning(f"GPT returned invalid JSON. Falling back to using user message as search query.\n{response}")
|
|
86
|
+
questions = [text]
|
|
87
|
+
|
|
88
|
+
logger.debug(f"Extracted Questions by GPT: {questions}")
|
|
89
|
+
return questions
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def send_message_to_model(messages, api_key, model, response_type="text", api_base_url=None):
|
|
93
|
+
"""
|
|
94
|
+
Send message to model
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
# Get Response from GPT
|
|
98
|
+
return completion_with_backoff(
|
|
99
|
+
messages=messages,
|
|
100
|
+
model=model,
|
|
101
|
+
openai_api_key=api_key,
|
|
102
|
+
api_base_url=api_base_url,
|
|
103
|
+
model_kwargs={"response_format": {"type": response_type}},
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def converse(
|
|
108
|
+
references,
|
|
109
|
+
user_query,
|
|
110
|
+
online_results: Optional[Dict[str, Dict]] = None,
|
|
111
|
+
conversation_log={},
|
|
112
|
+
model: str = "gpt-3.5-turbo",
|
|
113
|
+
api_key: Optional[str] = None,
|
|
114
|
+
api_base_url: Optional[str] = None,
|
|
115
|
+
temperature: float = 0.2,
|
|
116
|
+
completion_func=None,
|
|
117
|
+
conversation_commands=[ConversationCommand.Default],
|
|
118
|
+
max_prompt_size=None,
|
|
119
|
+
tokenizer_name=None,
|
|
120
|
+
location_data: LocationData = None,
|
|
121
|
+
user_name: str = None,
|
|
122
|
+
agent: Agent = None,
|
|
123
|
+
):
|
|
124
|
+
"""
|
|
125
|
+
Converse with user using OpenAI's ChatGPT
|
|
126
|
+
"""
|
|
127
|
+
# Initialize Variables
|
|
128
|
+
current_date = datetime.now().strftime("%Y-%m-%d")
|
|
129
|
+
compiled_references = "\n\n".join({f"# {item['compiled']}" for item in references})
|
|
130
|
+
|
|
131
|
+
conversation_primer = prompts.query_prompt.format(query=user_query)
|
|
132
|
+
|
|
133
|
+
if agent and agent.personality:
|
|
134
|
+
system_prompt = prompts.custom_personality.format(
|
|
135
|
+
name=agent.name, bio=agent.personality, current_date=current_date
|
|
136
|
+
)
|
|
137
|
+
else:
|
|
138
|
+
system_prompt = prompts.personality.format(current_date=current_date)
|
|
139
|
+
|
|
140
|
+
if location_data:
|
|
141
|
+
location = f"{location_data.city}, {location_data.region}, {location_data.country}"
|
|
142
|
+
location_prompt = prompts.user_location.format(location=location)
|
|
143
|
+
system_prompt = f"{system_prompt}\n{location_prompt}"
|
|
144
|
+
|
|
145
|
+
if user_name:
|
|
146
|
+
user_name_prompt = prompts.user_name.format(name=user_name)
|
|
147
|
+
system_prompt = f"{system_prompt}\n{user_name_prompt}"
|
|
148
|
+
|
|
149
|
+
# Get Conversation Primer appropriate to Conversation Type
|
|
150
|
+
if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(compiled_references):
|
|
151
|
+
completion_func(chat_response=prompts.no_notes_found.format())
|
|
152
|
+
return iter([prompts.no_notes_found.format()])
|
|
153
|
+
elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
|
|
154
|
+
completion_func(chat_response=prompts.no_online_results_found.format())
|
|
155
|
+
return iter([prompts.no_online_results_found.format()])
|
|
156
|
+
|
|
157
|
+
if not is_none_or_empty(online_results):
|
|
158
|
+
conversation_primer = (
|
|
159
|
+
f"{prompts.online_search_conversation.format(online_results=str(online_results))}\n{conversation_primer}"
|
|
160
|
+
)
|
|
161
|
+
if not is_none_or_empty(compiled_references):
|
|
162
|
+
conversation_primer = f"{prompts.notes_conversation.format(query=user_query, references=compiled_references)}\n\n{conversation_primer}"
|
|
163
|
+
|
|
164
|
+
# Setup Prompt with Primer or Conversation History
|
|
165
|
+
messages = generate_chatml_messages_with_context(
|
|
166
|
+
conversation_primer,
|
|
167
|
+
system_prompt,
|
|
168
|
+
conversation_log,
|
|
169
|
+
model_name=model,
|
|
170
|
+
max_prompt_size=max_prompt_size,
|
|
171
|
+
tokenizer_name=tokenizer_name,
|
|
172
|
+
)
|
|
173
|
+
truncated_messages = "\n".join({f"{message.content[:70]}..." for message in messages})
|
|
174
|
+
logger.debug(f"Conversation Context for GPT: {truncated_messages}")
|
|
175
|
+
|
|
176
|
+
# Get Response from GPT
|
|
177
|
+
return chat_completion_with_backoff(
|
|
178
|
+
messages=messages,
|
|
179
|
+
compiled_references=references,
|
|
180
|
+
online_results=online_results,
|
|
181
|
+
model_name=model,
|
|
182
|
+
temperature=temperature,
|
|
183
|
+
openai_api_key=api_key,
|
|
184
|
+
api_base_url=api_base_url,
|
|
185
|
+
completion_func=completion_func,
|
|
186
|
+
model_kwargs={"stop": ["Notes:\n["]},
|
|
187
|
+
)
|