ai-parrot 0.8.3__cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ai-parrot might be problematic. Click here for more details.
- ai_parrot-0.8.3.dist-info/LICENSE +21 -0
- ai_parrot-0.8.3.dist-info/METADATA +306 -0
- ai_parrot-0.8.3.dist-info/RECORD +128 -0
- ai_parrot-0.8.3.dist-info/WHEEL +6 -0
- ai_parrot-0.8.3.dist-info/top_level.txt +2 -0
- parrot/__init__.py +30 -0
- parrot/bots/__init__.py +5 -0
- parrot/bots/abstract.py +1115 -0
- parrot/bots/agent.py +492 -0
- parrot/bots/basic.py +9 -0
- parrot/bots/bose.py +17 -0
- parrot/bots/chatbot.py +271 -0
- parrot/bots/cody.py +17 -0
- parrot/bots/copilot.py +117 -0
- parrot/bots/data.py +730 -0
- parrot/bots/dataframe.py +103 -0
- parrot/bots/hrbot.py +15 -0
- parrot/bots/interfaces/__init__.py +1 -0
- parrot/bots/interfaces/retrievers.py +12 -0
- parrot/bots/notebook.py +619 -0
- parrot/bots/odoo.py +17 -0
- parrot/bots/prompts/__init__.py +41 -0
- parrot/bots/prompts/agents.py +91 -0
- parrot/bots/prompts/data.py +214 -0
- parrot/bots/retrievals/__init__.py +1 -0
- parrot/bots/retrievals/constitutional.py +19 -0
- parrot/bots/retrievals/multi.py +122 -0
- parrot/bots/retrievals/retrieval.py +610 -0
- parrot/bots/tools/__init__.py +7 -0
- parrot/bots/tools/eda.py +325 -0
- parrot/bots/tools/pdf.py +50 -0
- parrot/bots/tools/plot.py +48 -0
- parrot/bots/troc.py +16 -0
- parrot/conf.py +170 -0
- parrot/crew/__init__.py +3 -0
- parrot/crew/tools/__init__.py +22 -0
- parrot/crew/tools/bing.py +13 -0
- parrot/crew/tools/config.py +43 -0
- parrot/crew/tools/duckgo.py +62 -0
- parrot/crew/tools/file.py +24 -0
- parrot/crew/tools/google.py +168 -0
- parrot/crew/tools/gtrends.py +16 -0
- parrot/crew/tools/md2pdf.py +25 -0
- parrot/crew/tools/rag.py +42 -0
- parrot/crew/tools/search.py +32 -0
- parrot/crew/tools/url.py +21 -0
- parrot/exceptions.cpython-311-x86_64-linux-gnu.so +0 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/agents.py +292 -0
- parrot/handlers/bots.py +196 -0
- parrot/handlers/chat.py +192 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/database.py +27 -0
- parrot/interfaces/http.py +805 -0
- parrot/interfaces/images/__init__.py +0 -0
- parrot/interfaces/images/plugins/__init__.py +18 -0
- parrot/interfaces/images/plugins/abstract.py +58 -0
- parrot/interfaces/images/plugins/exif.py +709 -0
- parrot/interfaces/images/plugins/hash.py +52 -0
- parrot/interfaces/images/plugins/vision.py +104 -0
- parrot/interfaces/images/plugins/yolo.py +66 -0
- parrot/interfaces/images/plugins/zerodetect.py +197 -0
- parrot/llms/__init__.py +1 -0
- parrot/llms/abstract.py +69 -0
- parrot/llms/anthropic.py +58 -0
- parrot/llms/gemma.py +15 -0
- parrot/llms/google.py +44 -0
- parrot/llms/groq.py +67 -0
- parrot/llms/hf.py +45 -0
- parrot/llms/openai.py +61 -0
- parrot/llms/pipes.py +114 -0
- parrot/llms/vertex.py +89 -0
- parrot/loaders/__init__.py +9 -0
- parrot/loaders/abstract.py +628 -0
- parrot/loaders/files/__init__.py +0 -0
- parrot/loaders/files/abstract.py +39 -0
- parrot/loaders/files/text.py +63 -0
- parrot/loaders/txt.py +26 -0
- parrot/manager.py +333 -0
- parrot/models.py +504 -0
- parrot/py.typed +0 -0
- parrot/stores/__init__.py +11 -0
- parrot/stores/abstract.py +248 -0
- parrot/stores/chroma.py +188 -0
- parrot/stores/duck.py +162 -0
- parrot/stores/embeddings/__init__.py +10 -0
- parrot/stores/embeddings/abstract.py +46 -0
- parrot/stores/embeddings/base.py +52 -0
- parrot/stores/embeddings/bge.py +20 -0
- parrot/stores/embeddings/fastembed.py +17 -0
- parrot/stores/embeddings/google.py +18 -0
- parrot/stores/embeddings/huggingface.py +20 -0
- parrot/stores/embeddings/ollama.py +14 -0
- parrot/stores/embeddings/openai.py +26 -0
- parrot/stores/embeddings/transformers.py +21 -0
- parrot/stores/embeddings/vertexai.py +17 -0
- parrot/stores/empty.py +10 -0
- parrot/stores/faiss.py +160 -0
- parrot/stores/milvus.py +397 -0
- parrot/stores/postgres.py +653 -0
- parrot/stores/qdrant.py +170 -0
- parrot/tools/__init__.py +23 -0
- parrot/tools/abstract.py +68 -0
- parrot/tools/asknews.py +33 -0
- parrot/tools/basic.py +51 -0
- parrot/tools/bby.py +359 -0
- parrot/tools/bing.py +13 -0
- parrot/tools/docx.py +343 -0
- parrot/tools/duck.py +62 -0
- parrot/tools/execute.py +56 -0
- parrot/tools/gamma.py +28 -0
- parrot/tools/google.py +170 -0
- parrot/tools/gvoice.py +301 -0
- parrot/tools/results.py +278 -0
- parrot/tools/stack.py +27 -0
- parrot/tools/weather.py +70 -0
- parrot/tools/wikipedia.py +58 -0
- parrot/tools/zipcode.py +198 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.cpython-311-x86_64-linux-gnu.so +0 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpython-311-x86_64-linux-gnu.so +0 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- resources/users/__init__.py +5 -0
- resources/users/handlers.py +13 -0
- resources/users/models.py +205 -0
|
@@ -0,0 +1,610 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from typing import Any
|
|
3
|
+
import uuid
|
|
4
|
+
import asyncio
|
|
5
|
+
from aiohttp import web
|
|
6
|
+
from langchain_core.vectorstores import VectorStoreRetriever
|
|
7
|
+
from langchain.memory import (
|
|
8
|
+
ConversationBufferMemory
|
|
9
|
+
)
|
|
10
|
+
from langchain.chains.retrieval_qa.base import RetrievalQA
|
|
11
|
+
from langchain.chains.conversational_retrieval.base import (
|
|
12
|
+
ConversationalRetrievalChain
|
|
13
|
+
)
|
|
14
|
+
from langchain.retrievers import (
|
|
15
|
+
EnsembleRetriever,
|
|
16
|
+
)
|
|
17
|
+
from langchain.prompts import (
|
|
18
|
+
ChatPromptTemplate,
|
|
19
|
+
SystemMessagePromptTemplate,
|
|
20
|
+
HumanMessagePromptTemplate,
|
|
21
|
+
PromptTemplate
|
|
22
|
+
)
|
|
23
|
+
from langchain_community.retrievers import BM25Retriever
|
|
24
|
+
from datamodel.exceptions import ValidationError # pylint: disable=E0611
|
|
25
|
+
from asyncdb import AsyncDB
|
|
26
|
+
from navconfig.logging import logging
|
|
27
|
+
from navigator_session import get_session
|
|
28
|
+
from parrot.conf import (
|
|
29
|
+
BIGQUERY_CREDENTIALS,
|
|
30
|
+
BIGQUERY_PROJECT_ID,
|
|
31
|
+
BIGQUERY_DATASET
|
|
32
|
+
)
|
|
33
|
+
try:
|
|
34
|
+
from ...llms import VertexLLM
|
|
35
|
+
VERTEX_ENABLED = True
|
|
36
|
+
except ImportError:
|
|
37
|
+
VERTEX_ENABLED = False
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
from ...llms import Anthropic
|
|
41
|
+
ANTHROPIC_ENABLED = True
|
|
42
|
+
except ImportError:
|
|
43
|
+
ANTHROPIC_ENABLED = False
|
|
44
|
+
|
|
45
|
+
from ...utils import SafeDict
|
|
46
|
+
from ...models import ChatResponse, ChatbotUsage
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class RetrievalManager:
|
|
50
|
+
"""Managing the Chain Retrieval, answers and sources.
|
|
51
|
+
"""
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
chatbot_id: uuid.UUID,
|
|
55
|
+
chatbot_name: str,
|
|
56
|
+
model: Callable,
|
|
57
|
+
store: Callable,
|
|
58
|
+
system_prompt: str = None,
|
|
59
|
+
human_prompt: str = None,
|
|
60
|
+
memory: ConversationBufferMemory = None,
|
|
61
|
+
source_path: str = 'web',
|
|
62
|
+
request: web.Request = None,
|
|
63
|
+
kb: Any = None,
|
|
64
|
+
**kwargs
|
|
65
|
+
):
|
|
66
|
+
# Chatbot ID:
|
|
67
|
+
self.chatbot_id: uuid.UUID = chatbot_id
|
|
68
|
+
# Chatbot Name:
|
|
69
|
+
self.chatbot_name: str = chatbot_name
|
|
70
|
+
# Source Path:
|
|
71
|
+
self.source_path: str = source_path
|
|
72
|
+
# Vector Store
|
|
73
|
+
self.store = store
|
|
74
|
+
# Memory Manager
|
|
75
|
+
self.memory = memory
|
|
76
|
+
# LLM Model
|
|
77
|
+
self.model = model
|
|
78
|
+
# template prompt
|
|
79
|
+
# TODO: if none, create a basic template
|
|
80
|
+
self.system_prompt = system_prompt
|
|
81
|
+
self.human_prompt = human_prompt
|
|
82
|
+
# Knowledge-base
|
|
83
|
+
self.kb = kb
|
|
84
|
+
# Logger:
|
|
85
|
+
self.logger = logging.getLogger('Parrot.Retrieval')
|
|
86
|
+
# Web Request:
|
|
87
|
+
self.request = request
|
|
88
|
+
# Test Vector Retriever:
|
|
89
|
+
self._test_vector: bool = kwargs.get('test_vector', False)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def __enter__(self):
|
|
93
|
+
return self
|
|
94
|
+
|
|
95
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
async def __aenter__(self):
|
|
99
|
+
return self
|
|
100
|
+
|
|
101
|
+
async def __aexit__(self, exc_type, exc_value, traceback):
|
|
102
|
+
pass
|
|
103
|
+
|
|
104
|
+
def create_memory(
|
|
105
|
+
self,
|
|
106
|
+
key: str = 'chat_history',
|
|
107
|
+
input_key: str = 'question',
|
|
108
|
+
output_key: str = 'answer'
|
|
109
|
+
):
|
|
110
|
+
return ConversationBufferMemory(
|
|
111
|
+
memory_key=key,
|
|
112
|
+
return_messages=True,
|
|
113
|
+
input_key=input_key,
|
|
114
|
+
output_key=output_key
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
def test_retriever(self, question, retriever):
|
|
118
|
+
if self._test_vector is True:
|
|
119
|
+
docs = retriever.get_relevant_documents(question)
|
|
120
|
+
self.logger.notice(
|
|
121
|
+
f":: Question: {question}"
|
|
122
|
+
)
|
|
123
|
+
# Print the retrieved documents
|
|
124
|
+
for doc in docs:
|
|
125
|
+
self.logger.debug(
|
|
126
|
+
f":: Document: {doc.page_content}"
|
|
127
|
+
)
|
|
128
|
+
print("---")
|
|
129
|
+
|
|
130
|
+
### Different types of Retrieval
|
|
131
|
+
async def conversation(
|
|
132
|
+
self,
|
|
133
|
+
question: str = None,
|
|
134
|
+
chain_type: str = 'stuff',
|
|
135
|
+
search_type: str = 'similarity',
|
|
136
|
+
search_kwargs: dict = {"k": 4, "fetch_k": 10, "lambda_mult": 0.89},
|
|
137
|
+
return_docs: bool = True,
|
|
138
|
+
metric_type: str = None,
|
|
139
|
+
memory: Any = None,
|
|
140
|
+
use_llm: str = None,
|
|
141
|
+
**kwargs
|
|
142
|
+
):
|
|
143
|
+
# Question:
|
|
144
|
+
self._question = question
|
|
145
|
+
# Memory:
|
|
146
|
+
self.memory = memory
|
|
147
|
+
# Get a Vector Retriever:
|
|
148
|
+
vector = self.store.get_vector(
|
|
149
|
+
metric_type=metric_type
|
|
150
|
+
)
|
|
151
|
+
simil_retriever = VectorStoreRetriever(
|
|
152
|
+
vectorstore=vector,
|
|
153
|
+
search_type='similarity',
|
|
154
|
+
chain_type=chain_type,
|
|
155
|
+
search_kwargs=search_kwargs
|
|
156
|
+
)
|
|
157
|
+
retriever = vector.as_retriever(
|
|
158
|
+
search_type=search_type,
|
|
159
|
+
search_kwargs=search_kwargs
|
|
160
|
+
)
|
|
161
|
+
if self.kb:
|
|
162
|
+
# Get a BM25 Retriever:
|
|
163
|
+
b25_retriever = BM25Retriever.from_documents(self.kb)
|
|
164
|
+
retriever = EnsembleRetriever(
|
|
165
|
+
retrievers=[simil_retriever, retriever, b25_retriever],
|
|
166
|
+
weights=[0.6, 0.3, 0.1]
|
|
167
|
+
)
|
|
168
|
+
else:
|
|
169
|
+
retriever = EnsembleRetriever(
|
|
170
|
+
retrievers=[simil_retriever, retriever],
|
|
171
|
+
weights=[0.6, 0.4]
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# TEST THE VECTOR RETRIEVER:
|
|
176
|
+
self.test_retriever(question, retriever)
|
|
177
|
+
|
|
178
|
+
# Create prompt templates
|
|
179
|
+
system_prompt = SystemMessagePromptTemplate.from_template(
|
|
180
|
+
self.system_prompt
|
|
181
|
+
)
|
|
182
|
+
human_prompt = HumanMessagePromptTemplate.from_template(
|
|
183
|
+
self.human_prompt,
|
|
184
|
+
input_variables=['question', 'chat_history']
|
|
185
|
+
)
|
|
186
|
+
chat_prompt = ChatPromptTemplate.from_messages([
|
|
187
|
+
system_prompt,
|
|
188
|
+
human_prompt
|
|
189
|
+
])
|
|
190
|
+
if use_llm is not None:
|
|
191
|
+
if use_llm == 'claude':
|
|
192
|
+
if ANTHROPIC_ENABLED is True:
|
|
193
|
+
llm = Anthropic(
|
|
194
|
+
model='claude-3-opus-20240229',
|
|
195
|
+
temperature=0.2,
|
|
196
|
+
top_p=0.4,
|
|
197
|
+
top_k=20
|
|
198
|
+
)
|
|
199
|
+
else:
|
|
200
|
+
raise ValueError(
|
|
201
|
+
"No Anthropic Claude was installed."
|
|
202
|
+
)
|
|
203
|
+
elif use_llm == 'vertex':
|
|
204
|
+
if VERTEX_ENABLED is True:
|
|
205
|
+
llm = VertexLLM(
|
|
206
|
+
model='gemini-pro-1.5',
|
|
207
|
+
temperature=0.2,
|
|
208
|
+
top_p=0.4,
|
|
209
|
+
top_k=20
|
|
210
|
+
)
|
|
211
|
+
else:
|
|
212
|
+
raise ValueError(
|
|
213
|
+
"No VertexAI was installed."
|
|
214
|
+
)
|
|
215
|
+
else:
|
|
216
|
+
raise ValueError(
|
|
217
|
+
f"Only Claude and Vertex are Supported Now."
|
|
218
|
+
)
|
|
219
|
+
_model = llm.get_llm()
|
|
220
|
+
else:
|
|
221
|
+
_model = self.model
|
|
222
|
+
# Conversational Chain:
|
|
223
|
+
self.chain = ConversationalRetrievalChain.from_llm(
|
|
224
|
+
llm=_model,
|
|
225
|
+
retriever=retriever,
|
|
226
|
+
chain_type=chain_type,
|
|
227
|
+
verbose=True,
|
|
228
|
+
memory=self.memory,
|
|
229
|
+
return_source_documents=return_docs,
|
|
230
|
+
return_generated_question=True,
|
|
231
|
+
combine_docs_chain_kwargs={"prompt": chat_prompt},
|
|
232
|
+
)
|
|
233
|
+
return self
|
|
234
|
+
|
|
235
|
+
def qa(
|
|
236
|
+
self,
|
|
237
|
+
question: str = None,
|
|
238
|
+
chain_type: str = 'stuff',
|
|
239
|
+
search_type: str = 'mmr',
|
|
240
|
+
search_kwargs: dict = {"k": 4, "fetch_k": 10, "lambda_mult": 0.89},
|
|
241
|
+
return_docs: bool = True,
|
|
242
|
+
metric_type: str = None,
|
|
243
|
+
use_llm: str = None
|
|
244
|
+
):
|
|
245
|
+
# Question:
|
|
246
|
+
self._question = question
|
|
247
|
+
# Get a Vector Retriever:
|
|
248
|
+
vector = self.store.get_vector(
|
|
249
|
+
metric_type=metric_type
|
|
250
|
+
)
|
|
251
|
+
simil_retriever = VectorStoreRetriever(
|
|
252
|
+
vectorstore=vector,
|
|
253
|
+
search_type='similarity',
|
|
254
|
+
chain_type=chain_type,
|
|
255
|
+
search_kwargs=search_kwargs
|
|
256
|
+
)
|
|
257
|
+
retriever = vector.as_retriever(
|
|
258
|
+
search_type=search_type,
|
|
259
|
+
search_kwargs=search_kwargs
|
|
260
|
+
)
|
|
261
|
+
if self.kb:
|
|
262
|
+
# Get a BM25 Retriever:
|
|
263
|
+
b25_retriever = BM25Retriever.from_documents(self.kb)
|
|
264
|
+
retriever = EnsembleRetriever(
|
|
265
|
+
retrievers=[simil_retriever, retriever, b25_retriever],
|
|
266
|
+
weights=[0.6, 0.3, 0.1]
|
|
267
|
+
)
|
|
268
|
+
else:
|
|
269
|
+
retriever = EnsembleRetriever(
|
|
270
|
+
retrievers=[simil_retriever, retriever],
|
|
271
|
+
weights=[0.7, 0.3]
|
|
272
|
+
)
|
|
273
|
+
# TEST THE VECTOR RETRIEVER:
|
|
274
|
+
self.test_retriever(question, retriever)
|
|
275
|
+
human_prompt = self.human_prompt.replace(
|
|
276
|
+
'**Chat History:**', ''
|
|
277
|
+
)
|
|
278
|
+
human_prompt = human_prompt.format_map(
|
|
279
|
+
SafeDict(
|
|
280
|
+
chat_history=''
|
|
281
|
+
)
|
|
282
|
+
)
|
|
283
|
+
if use_llm is not None:
|
|
284
|
+
if use_llm == 'claude':
|
|
285
|
+
if ANTHROPIC_ENABLED is True:
|
|
286
|
+
llm = Anthropic(
|
|
287
|
+
model='claude-3-opus-20240229',
|
|
288
|
+
temperature=0.2,
|
|
289
|
+
top_p=0.4,
|
|
290
|
+
top_k=20
|
|
291
|
+
)
|
|
292
|
+
else:
|
|
293
|
+
raise ValueError(
|
|
294
|
+
"No Anthropic Claude was installed."
|
|
295
|
+
)
|
|
296
|
+
elif use_llm == 'vertex':
|
|
297
|
+
if VERTEX_ENABLED is True:
|
|
298
|
+
llm = VertexLLM(
|
|
299
|
+
model='gemini-pro',
|
|
300
|
+
temperature=0.2,
|
|
301
|
+
top_p=0.4,
|
|
302
|
+
top_k=20
|
|
303
|
+
)
|
|
304
|
+
else:
|
|
305
|
+
raise ValueError(
|
|
306
|
+
"No VertexAI was installed."
|
|
307
|
+
)
|
|
308
|
+
else:
|
|
309
|
+
raise ValueError(
|
|
310
|
+
f"Only Claude and Vertex are Supported Now."
|
|
311
|
+
)
|
|
312
|
+
self.model = llm.get_llm()
|
|
313
|
+
|
|
314
|
+
self.chain = RetrievalQA.from_chain_type(
|
|
315
|
+
llm=self.model,
|
|
316
|
+
chain_type=chain_type,
|
|
317
|
+
retriever=retriever,
|
|
318
|
+
return_source_documents=return_docs,
|
|
319
|
+
verbose=True,
|
|
320
|
+
chain_type_kwargs={
|
|
321
|
+
"prompt": PromptTemplate(
|
|
322
|
+
template=self.system_prompt + '\n' + human_prompt,
|
|
323
|
+
input_variables=['context', 'question']
|
|
324
|
+
)
|
|
325
|
+
},
|
|
326
|
+
)
|
|
327
|
+
# Debug Code ::
|
|
328
|
+
# print('=====================')
|
|
329
|
+
# print(custom_template)
|
|
330
|
+
# response = self.chain.invoke(question)
|
|
331
|
+
# print('Q > ', response['result'])
|
|
332
|
+
# docs = vector.similarity_search(
|
|
333
|
+
# self._question, k=10
|
|
334
|
+
# )
|
|
335
|
+
# print(" LENGHT DOCS > ", len(docs))
|
|
336
|
+
# print(docs)
|
|
337
|
+
# print(' ========================== ')
|
|
338
|
+
|
|
339
|
+
# try:
|
|
340
|
+
# distance = self.evaluate_distance(
|
|
341
|
+
# self.store.embedding_name, question, docs
|
|
342
|
+
# )
|
|
343
|
+
# print('DISTANCE > ', distance)
|
|
344
|
+
# except Exception as e:
|
|
345
|
+
# distance = 'EMPTY'
|
|
346
|
+
# print('DISTANCE > ', distance)
|
|
347
|
+
# print('CHAIN > ', self.chain)
|
|
348
|
+
|
|
349
|
+
return self
|
|
350
|
+
|
|
351
|
+
def get_current_context(self):
|
|
352
|
+
if self.memory:
|
|
353
|
+
return self.memory.buffer_as_str()
|
|
354
|
+
return None
|
|
355
|
+
|
|
356
|
+
def as_markdown(self, response: ChatResponse, return_sources: bool = True) -> str:
|
|
357
|
+
markdown_output = f"**Question**: {response.question} \n"
|
|
358
|
+
markdown_output += f"**Answer**: {response.answer} \n"
|
|
359
|
+
if return_sources is True and response.source_documents:
|
|
360
|
+
source_documents = response.source_documents
|
|
361
|
+
current_sources = []
|
|
362
|
+
block_sources = []
|
|
363
|
+
count = 0
|
|
364
|
+
d = {}
|
|
365
|
+
for source in source_documents:
|
|
366
|
+
if count >= 20:
|
|
367
|
+
break # Exit loop after processing 10 documents
|
|
368
|
+
metadata = source.metadata
|
|
369
|
+
if 'url' in metadata:
|
|
370
|
+
src = metadata.get('url')
|
|
371
|
+
elif 'filename' in metadata:
|
|
372
|
+
src = metadata.get('filename')
|
|
373
|
+
else:
|
|
374
|
+
src = metadata.get('source', 'unknown')
|
|
375
|
+
if src == 'knowledge-base':
|
|
376
|
+
continue # avoid attaching kb documents
|
|
377
|
+
source_title = metadata.get('title', src)
|
|
378
|
+
if source_title in current_sources:
|
|
379
|
+
continue
|
|
380
|
+
current_sources.append(source_title)
|
|
381
|
+
if src:
|
|
382
|
+
d[src] = metadata.get('document_meta', {})
|
|
383
|
+
source_filename = metadata.get('filename', src)
|
|
384
|
+
if src:
|
|
385
|
+
block_sources.append(f"- [{source_title}]({src})")
|
|
386
|
+
else:
|
|
387
|
+
if 'page_number' in metadata:
|
|
388
|
+
block_sources.append(f"- {source_filename} (Page {metadata.get('page_number')})")
|
|
389
|
+
else:
|
|
390
|
+
block_sources.append(f"- {source_filename}")
|
|
391
|
+
if block_sources:
|
|
392
|
+
markdown_output += f"**Sources**: \n"
|
|
393
|
+
markdown_output += "\n".join(block_sources)
|
|
394
|
+
if d:
|
|
395
|
+
response.documents = d
|
|
396
|
+
return markdown_output
|
|
397
|
+
|
|
398
|
+
# def evaluate_distance(self, model, question, source_documents):
|
|
399
|
+
# tokenizer = SentenceTransformer(model)
|
|
400
|
+
# query_embedding = tokenizer.encode(question)
|
|
401
|
+
# document_embeddings = [
|
|
402
|
+
# tokenizer.encode(doc.page_content) for doc in source_documents
|
|
403
|
+
# ]
|
|
404
|
+
# distances = util.cos_sim(query_embedding, document_embeddings)
|
|
405
|
+
# result = []
|
|
406
|
+
# for doc, distance in zip(source_documents, distances):
|
|
407
|
+
# result.append({
|
|
408
|
+
# "document": doc,
|
|
409
|
+
# "distance": distance
|
|
410
|
+
# })
|
|
411
|
+
# return result
|
|
412
|
+
|
|
413
|
+
async def log_usage(self, response: ChatResponse, request: web.Request = None):
|
|
414
|
+
params = {
|
|
415
|
+
"credentials": BIGQUERY_CREDENTIALS,
|
|
416
|
+
"project_id": BIGQUERY_PROJECT_ID,
|
|
417
|
+
}
|
|
418
|
+
db = AsyncDB(
|
|
419
|
+
'bigquery',
|
|
420
|
+
params=params
|
|
421
|
+
)
|
|
422
|
+
origin = {
|
|
423
|
+
"user_agent": 'script'
|
|
424
|
+
}
|
|
425
|
+
user_id = 0
|
|
426
|
+
if request:
|
|
427
|
+
origin = {
|
|
428
|
+
"origin": request.remote,
|
|
429
|
+
"user_agent": request.headers.get('User-Agent')
|
|
430
|
+
}
|
|
431
|
+
session = await get_session(request)
|
|
432
|
+
if session:
|
|
433
|
+
user_id = session.user_id
|
|
434
|
+
async with await db.connection() as conn: #pylint: disable=E1101
|
|
435
|
+
# set connection to model:
|
|
436
|
+
ChatbotUsage.Meta.connection = conn
|
|
437
|
+
# Add a new record of chatbot usage:
|
|
438
|
+
record = {
|
|
439
|
+
"chatbot_id": str(self.chatbot_id),
|
|
440
|
+
"user_id": user_id, # TODO: add session informtion
|
|
441
|
+
"source_path": self.source_path,
|
|
442
|
+
"platform": 'web',
|
|
443
|
+
"sid": str(response.sid),
|
|
444
|
+
"used_at": response.at,
|
|
445
|
+
"question": response.question,
|
|
446
|
+
"response": response.answer,
|
|
447
|
+
**origin
|
|
448
|
+
}
|
|
449
|
+
try:
|
|
450
|
+
log = ChatbotUsage(**record)
|
|
451
|
+
data = log.to_dict()
|
|
452
|
+
# convert to string (bigquery uses json.dumps to convert to string)
|
|
453
|
+
data['sid'] = str(data['sid'])
|
|
454
|
+
data['chatbot_id'] = str(data['chatbot_id'])
|
|
455
|
+
data['event_timestamp'] = str(data['event_timestamp'])
|
|
456
|
+
# writing directly to bigquery
|
|
457
|
+
await conn.write(
|
|
458
|
+
[data],
|
|
459
|
+
table_id=ChatbotUsage.Meta.name,
|
|
460
|
+
dataset_id=ChatbotUsage.Meta.schema,
|
|
461
|
+
use_streams=False,
|
|
462
|
+
use_pandas=False
|
|
463
|
+
)
|
|
464
|
+
# await log.insert()
|
|
465
|
+
except Exception as exc:
|
|
466
|
+
self.logger.error(
|
|
467
|
+
f"Error inserting log: {exc}"
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
async def question(
|
|
472
|
+
self,
|
|
473
|
+
question: str = None,
|
|
474
|
+
chain_type: str = 'stuff',
|
|
475
|
+
search_type: str = 'similarity',
|
|
476
|
+
search_kwargs: dict = {"k": 4, "fetch_k": 10, "lambda_mult": 0.89},
|
|
477
|
+
return_docs: bool = True,
|
|
478
|
+
metric_type: str = None,
|
|
479
|
+
memory: Any = None,
|
|
480
|
+
**kwargs
|
|
481
|
+
):
|
|
482
|
+
# Generating Vector:
|
|
483
|
+
async with self.store as store: #pylint: disable=E1101
|
|
484
|
+
vector = store.get_vector(metric_type=metric_type)
|
|
485
|
+
retriever = VectorStoreRetriever(
|
|
486
|
+
vectorstore=vector,
|
|
487
|
+
search_type=search_type,
|
|
488
|
+
chain_type=chain_type,
|
|
489
|
+
search_kwargs=search_kwargs
|
|
490
|
+
)
|
|
491
|
+
# TEST THE VECTOR RETRIEVER:
|
|
492
|
+
self.test_retriever(question, retriever)
|
|
493
|
+
system_prompt = SystemMessagePromptTemplate.from_template(
|
|
494
|
+
self.system_prompt
|
|
495
|
+
)
|
|
496
|
+
human_prompt = HumanMessagePromptTemplate.from_template(
|
|
497
|
+
self.human_prompt,
|
|
498
|
+
input_variables=['question', 'chat_history']
|
|
499
|
+
)
|
|
500
|
+
# Combine into a ChatPromptTemplate
|
|
501
|
+
chat_prompt = ChatPromptTemplate.from_messages([
|
|
502
|
+
system_prompt,
|
|
503
|
+
human_prompt
|
|
504
|
+
])
|
|
505
|
+
response = None
|
|
506
|
+
try:
|
|
507
|
+
chain = ConversationalRetrievalChain.from_llm(
|
|
508
|
+
llm=self.model,
|
|
509
|
+
retriever=retriever,
|
|
510
|
+
chain_type=chain_type,
|
|
511
|
+
verbose=False,
|
|
512
|
+
memory=memory,
|
|
513
|
+
return_source_documents=return_docs,
|
|
514
|
+
return_generated_question=True,
|
|
515
|
+
combine_docs_chain_kwargs={"prompt": chat_prompt},
|
|
516
|
+
**kwargs
|
|
517
|
+
)
|
|
518
|
+
response = chain.invoke(
|
|
519
|
+
{"question": question}
|
|
520
|
+
)
|
|
521
|
+
except Exception as exc:
|
|
522
|
+
self.logger.error(
|
|
523
|
+
f"Error invoking chain: {exc}"
|
|
524
|
+
)
|
|
525
|
+
return {
|
|
526
|
+
"question": question,
|
|
527
|
+
"error": str(exc)
|
|
528
|
+
}
|
|
529
|
+
try:
|
|
530
|
+
qa_response = ChatResponse(**response)
|
|
531
|
+
except (ValueError, TypeError) as exc:
|
|
532
|
+
self.logger.error(
|
|
533
|
+
f"Error validating response: {exc}"
|
|
534
|
+
)
|
|
535
|
+
return response
|
|
536
|
+
except ValidationError as exc:
|
|
537
|
+
self.logger.error(
|
|
538
|
+
f"Error on response: {exc.payload}"
|
|
539
|
+
)
|
|
540
|
+
return response
|
|
541
|
+
try:
|
|
542
|
+
qa_response.response = self.as_markdown(
|
|
543
|
+
qa_response
|
|
544
|
+
)
|
|
545
|
+
# saving question to Usage Log
|
|
546
|
+
if self.request:
|
|
547
|
+
tasker = self.request.app['service_queue']
|
|
548
|
+
await tasker.put(
|
|
549
|
+
self.log_usage,
|
|
550
|
+
response=qa_response,
|
|
551
|
+
request=self.request
|
|
552
|
+
)
|
|
553
|
+
else:
|
|
554
|
+
asyncio.create_task(
|
|
555
|
+
self.log_usage(response=qa_response)
|
|
556
|
+
)
|
|
557
|
+
return qa_response
|
|
558
|
+
except Exception as exc:
|
|
559
|
+
self.logger.exception(
|
|
560
|
+
f"Error on response: {exc}"
|
|
561
|
+
)
|
|
562
|
+
return None
|
|
563
|
+
|
|
564
|
+
|
|
565
|
+
async def invoke(self, question):
|
|
566
|
+
# Invoke the chain with the given question
|
|
567
|
+
try:
|
|
568
|
+
response = self.chain.invoke(
|
|
569
|
+
question
|
|
570
|
+
)
|
|
571
|
+
except Exception as exc:
|
|
572
|
+
self.logger.error(
|
|
573
|
+
f"Error invoking chain: {exc}"
|
|
574
|
+
)
|
|
575
|
+
return {
|
|
576
|
+
"question": question,
|
|
577
|
+
"error": str(exc)
|
|
578
|
+
}
|
|
579
|
+
try:
|
|
580
|
+
qa_response = ChatResponse(**response)
|
|
581
|
+
except (ValueError, TypeError) as exc:
|
|
582
|
+
self.logger.error(
|
|
583
|
+
f"Error validating response: {exc}"
|
|
584
|
+
)
|
|
585
|
+
return response
|
|
586
|
+
except ValidationError as exc:
|
|
587
|
+
self.logger.error(
|
|
588
|
+
f"Error on response: {exc.payload}"
|
|
589
|
+
)
|
|
590
|
+
return response
|
|
591
|
+
try:
|
|
592
|
+
qa_response.response = self.as_markdown(
|
|
593
|
+
qa_response
|
|
594
|
+
)
|
|
595
|
+
# saving question to Usage Log
|
|
596
|
+
if self.request:
|
|
597
|
+
tasker = self.request.app['service_queue']
|
|
598
|
+
await tasker.put(
|
|
599
|
+
self.log_usage,
|
|
600
|
+
response=qa_response,
|
|
601
|
+
request=self.request
|
|
602
|
+
)
|
|
603
|
+
else:
|
|
604
|
+
asyncio.create_task(self.log_usage(response=qa_response))
|
|
605
|
+
return qa_response
|
|
606
|
+
except Exception as exc:
|
|
607
|
+
self.logger.exception(
|
|
608
|
+
f"Error on response: {exc}"
|
|
609
|
+
)
|
|
610
|
+
return response
|