ai-parrot 0.3.4__cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ai-parrot might be problematic. Click here for more details.

Files changed (109) hide show
  1. ai_parrot-0.3.4.dist-info/LICENSE +21 -0
  2. ai_parrot-0.3.4.dist-info/METADATA +319 -0
  3. ai_parrot-0.3.4.dist-info/RECORD +109 -0
  4. ai_parrot-0.3.4.dist-info/WHEEL +6 -0
  5. ai_parrot-0.3.4.dist-info/top_level.txt +3 -0
  6. parrot/__init__.py +21 -0
  7. parrot/chatbots/__init__.py +7 -0
  8. parrot/chatbots/abstract.py +728 -0
  9. parrot/chatbots/asktroc.py +16 -0
  10. parrot/chatbots/base.py +366 -0
  11. parrot/chatbots/basic.py +9 -0
  12. parrot/chatbots/bose.py +17 -0
  13. parrot/chatbots/cody.py +17 -0
  14. parrot/chatbots/copilot.py +83 -0
  15. parrot/chatbots/dataframe.py +103 -0
  16. parrot/chatbots/hragents.py +15 -0
  17. parrot/chatbots/odoo.py +17 -0
  18. parrot/chatbots/retrievals/__init__.py +578 -0
  19. parrot/chatbots/retrievals/constitutional.py +19 -0
  20. parrot/conf.py +110 -0
  21. parrot/crew/__init__.py +3 -0
  22. parrot/crew/tools/__init__.py +22 -0
  23. parrot/crew/tools/bing.py +13 -0
  24. parrot/crew/tools/config.py +43 -0
  25. parrot/crew/tools/duckgo.py +62 -0
  26. parrot/crew/tools/file.py +24 -0
  27. parrot/crew/tools/google.py +168 -0
  28. parrot/crew/tools/gtrends.py +16 -0
  29. parrot/crew/tools/md2pdf.py +25 -0
  30. parrot/crew/tools/rag.py +42 -0
  31. parrot/crew/tools/search.py +32 -0
  32. parrot/crew/tools/url.py +21 -0
  33. parrot/exceptions.cpython-312-x86_64-linux-gnu.so +0 -0
  34. parrot/handlers/__init__.py +4 -0
  35. parrot/handlers/bots.py +196 -0
  36. parrot/handlers/chat.py +162 -0
  37. parrot/interfaces/__init__.py +6 -0
  38. parrot/interfaces/database.py +29 -0
  39. parrot/llms/__init__.py +137 -0
  40. parrot/llms/abstract.py +47 -0
  41. parrot/llms/anthropic.py +42 -0
  42. parrot/llms/google.py +42 -0
  43. parrot/llms/groq.py +45 -0
  44. parrot/llms/hf.py +45 -0
  45. parrot/llms/openai.py +59 -0
  46. parrot/llms/pipes.py +114 -0
  47. parrot/llms/vertex.py +78 -0
  48. parrot/loaders/__init__.py +20 -0
  49. parrot/loaders/abstract.py +456 -0
  50. parrot/loaders/audio.py +106 -0
  51. parrot/loaders/basepdf.py +102 -0
  52. parrot/loaders/basevideo.py +280 -0
  53. parrot/loaders/csv.py +42 -0
  54. parrot/loaders/dir.py +37 -0
  55. parrot/loaders/excel.py +349 -0
  56. parrot/loaders/github.py +65 -0
  57. parrot/loaders/handlers/__init__.py +5 -0
  58. parrot/loaders/handlers/data.py +213 -0
  59. parrot/loaders/image.py +119 -0
  60. parrot/loaders/json.py +52 -0
  61. parrot/loaders/pdf.py +437 -0
  62. parrot/loaders/pdfchapters.py +142 -0
  63. parrot/loaders/pdffn.py +112 -0
  64. parrot/loaders/pdfimages.py +207 -0
  65. parrot/loaders/pdfmark.py +88 -0
  66. parrot/loaders/pdftables.py +145 -0
  67. parrot/loaders/ppt.py +30 -0
  68. parrot/loaders/qa.py +81 -0
  69. parrot/loaders/repo.py +103 -0
  70. parrot/loaders/rtd.py +65 -0
  71. parrot/loaders/txt.py +92 -0
  72. parrot/loaders/utils/__init__.py +1 -0
  73. parrot/loaders/utils/models.py +25 -0
  74. parrot/loaders/video.py +96 -0
  75. parrot/loaders/videolocal.py +120 -0
  76. parrot/loaders/vimeo.py +106 -0
  77. parrot/loaders/web.py +216 -0
  78. parrot/loaders/web_base.py +112 -0
  79. parrot/loaders/word.py +125 -0
  80. parrot/loaders/youtube.py +192 -0
  81. parrot/manager.py +166 -0
  82. parrot/models.py +372 -0
  83. parrot/py.typed +0 -0
  84. parrot/stores/__init__.py +48 -0
  85. parrot/stores/abstract.py +171 -0
  86. parrot/stores/milvus.py +632 -0
  87. parrot/stores/qdrant.py +153 -0
  88. parrot/tools/__init__.py +12 -0
  89. parrot/tools/abstract.py +53 -0
  90. parrot/tools/asknews.py +32 -0
  91. parrot/tools/bing.py +13 -0
  92. parrot/tools/duck.py +62 -0
  93. parrot/tools/google.py +170 -0
  94. parrot/tools/stack.py +26 -0
  95. parrot/tools/weather.py +70 -0
  96. parrot/tools/wikipedia.py +59 -0
  97. parrot/tools/zipcode.py +179 -0
  98. parrot/utils/__init__.py +2 -0
  99. parrot/utils/parsers/__init__.py +5 -0
  100. parrot/utils/parsers/toml.cpython-312-x86_64-linux-gnu.so +0 -0
  101. parrot/utils/toml.py +11 -0
  102. parrot/utils/types.cpython-312-x86_64-linux-gnu.so +0 -0
  103. parrot/utils/uv.py +11 -0
  104. parrot/version.py +10 -0
  105. resources/users/__init__.py +5 -0
  106. resources/users/handlers.py +13 -0
  107. resources/users/models.py +205 -0
  108. settings/__init__.py +0 -0
  109. settings/settings.py +51 -0
@@ -0,0 +1,578 @@
1
+ from collections.abc import Callable
2
+ from typing import Any
3
+ import uuid
4
+ import asyncio
5
+ from aiohttp import web
6
+ from sentence_transformers import SentenceTransformer, util
7
+ from langchain.memory import (
8
+ ConversationSummaryMemory,
9
+ ConversationBufferMemory
10
+ )
11
+ from langchain.chains.retrieval_qa.base import RetrievalQA
12
+ from langchain.chains.conversational_retrieval.base import (
13
+ ConversationalRetrievalChain
14
+ )
15
+ from langchain.retrievers import (
16
+ EnsembleRetriever,
17
+ ContextualCompressionRetriever
18
+ )
19
+ from langchain_core.vectorstores import VectorStoreRetriever
20
+ from langchain_core.prompts import (
21
+ PromptTemplate,
22
+ ChatPromptTemplate
23
+ )
24
+ from langchain_community.retrievers import BM25Retriever
25
+ from datamodel.exceptions import ValidationError # pylint: disable=E0611
26
+ from asyncdb import AsyncDB
27
+ from navconfig.logging import logging
28
+ from navigator_session import get_session
29
+ from parrot.conf import (
30
+ BIGQUERY_CREDENTIALS,
31
+ BIGQUERY_PROJECT_ID,
32
+ BIGQUERY_DATASET
33
+ )
34
+ try:
35
+ from ...llms import VertexLLM
36
+ VERTEX_ENABLED = True
37
+ except ImportError:
38
+ VERTEX_ENABLED = False
39
+
40
+ try:
41
+ from ...llms import Anthropic
42
+ ANTHROPIC_ENABLED = True
43
+ except ImportError:
44
+ ANTHROPIC_ENABLED = False
45
+
46
+ from ...utils import SafeDict
47
+ from ...models import ChatResponse, ChatbotUsage
48
+
49
+
50
+ class RetrievalManager:
51
+ """Managing the Chain Retrieval, answers and sources.
52
+ """
53
+ def __init__(
54
+ self,
55
+ chatbot_id: uuid.UUID,
56
+ chatbot_name: str,
57
+ model: Callable,
58
+ store: Callable,
59
+ memory: ConversationBufferMemory = None,
60
+ template: str = None,
61
+ source_path: str = 'web',
62
+ request: web.Request = None,
63
+ kb: Any = None
64
+ ):
65
+ # Chatbot ID:
66
+ self.chatbot_id: uuid.UUID = chatbot_id
67
+ # Chatbot Name:
68
+ self.chatbot_name: str = chatbot_name
69
+ # Source Path:
70
+ self.source_path: str = source_path
71
+ # Vector Store
72
+ self.store = store
73
+ # Memory Manager
74
+ self.memory = memory
75
+ # LLM Model
76
+ self.model = model
77
+ # template prompt
78
+ # TODO: if none, create a basic template
79
+ self.template = template
80
+ # Knowledge-base
81
+ self.kb = kb
82
+ # Logger:
83
+ self.logger = logging.getLogger('Parrot.Retrieval')
84
+ # Web Request:
85
+ self.request = request
86
+
87
+ def __enter__(self):
88
+ self.client_id: str = str(uuid.uuid4())
89
+ self.client, _ = self.store.connect(alias=self.client_id)
90
+ return self
91
+
92
+ async def __aenter__(self):
93
+ self.client_id: str = str(uuid.uuid4())
94
+ self.client, _ = self.store.connect(alias=self.client_id)
95
+ return self
96
+
97
+ async def __aexit__(self, exc_type, exc_value, traceback):
98
+ self.client.close() # closing database connection
99
+ # closing the connection:
100
+ self.store.close(alias=self.client_id)
101
+ pass
102
+
103
+ def __exit__(self, exc_type, exc_value, traceback):
104
+ self.client.close() # closing database connection
105
+ # closing the connection:
106
+ self.store.close(alias=self.client_id)
107
+ pass
108
+
109
+ def create_memory(
110
+ self,
111
+ key: str = 'chat_history',
112
+ input_key: str = 'question',
113
+ output_key: str = 'answer'
114
+ ):
115
+ return ConversationBufferMemory(
116
+ memory_key=key,
117
+ return_messages=True,
118
+ input_key=input_key,
119
+ output_key=output_key
120
+ )
121
+
122
+ ### Different types of Retrieval
123
+ def conversation(
124
+ self,
125
+ question: str = None,
126
+ chain_type: str = 'stuff',
127
+ search_type: str = 'similarity',
128
+ search_kwargs: dict = {"k": 4, "fetch_k": 10, "lambda_mult": 0.89},
129
+ return_docs: bool = True,
130
+ metric_type: str = None,
131
+ memory: Any = None,
132
+ use_llm: str = None,
133
+ **kwargs
134
+ ):
135
+ # Question:
136
+ self._question = question
137
+ # Memory:
138
+ self.memory = memory
139
+ # Get a Vector Retriever:
140
+ vector = self.store.get_vector(
141
+ metric_type=metric_type
142
+ )
143
+ simil_retriever = VectorStoreRetriever(
144
+ vectorstore=vector,
145
+ search_type='similarity',
146
+ chain_type=chain_type,
147
+ search_kwargs=search_kwargs
148
+ )
149
+ retriever = vector.as_retriever(
150
+ search_type=search_type,
151
+ search_kwargs=search_kwargs
152
+ )
153
+ if self.kb:
154
+ # Get a BM25 Retriever:
155
+ b25_retriever = BM25Retriever.from_documents(self.kb)
156
+ retriever = EnsembleRetriever(
157
+ retrievers=[simil_retriever, retriever, b25_retriever],
158
+ weights=[0.6, 0.3, 0.1]
159
+ )
160
+ else:
161
+ retriever = EnsembleRetriever(
162
+ retrievers=[simil_retriever, retriever],
163
+ weights=[0.7, 0.3]
164
+ )
165
+ custom_template = self.template.format_map(
166
+ SafeDict(
167
+ summaries=''
168
+ )
169
+ )
170
+ _prompt = PromptTemplate(
171
+ template=custom_template,
172
+ input_variables=["context", "chat_history", "question"],
173
+ )
174
+ if use_llm is not None:
175
+ if use_llm == 'claude':
176
+ if ANTHROPIC_ENABLED is True:
177
+ llm = Anthropic(
178
+ model='claude-3-opus-20240229',
179
+ temperature=0.2,
180
+ top_p=0.4,
181
+ top_k=20
182
+ )
183
+ else:
184
+ raise ValueError(
185
+ "No Anthropic Claude was installed."
186
+ )
187
+ elif use_llm == 'vertex':
188
+ if VERTEX_ENABLED is True:
189
+ llm = VertexLLM(
190
+ model='gemini-pro',
191
+ temperature=0.2,
192
+ top_p=0.4,
193
+ top_k=20
194
+ )
195
+ else:
196
+ raise ValueError(
197
+ "No VertexAI was installed."
198
+ )
199
+ else:
200
+ raise ValueError(
201
+ f"Only Claude and Vertex are Supported Now."
202
+ )
203
+ _model = llm.get_llm()
204
+ else:
205
+ _model = self.model
206
+ # Conversational Chain:
207
+ self.chain = ConversationalRetrievalChain.from_llm(
208
+ llm=_model,
209
+ retriever=retriever,
210
+ chain_type=chain_type,
211
+ verbose=True,
212
+ memory=self.memory,
213
+ return_source_documents=return_docs,
214
+ return_generated_question=True,
215
+ combine_docs_chain_kwargs={"prompt": _prompt},
216
+ )
217
+ return self
218
+
219
+ def qa(
220
+ self,
221
+ question: str = None,
222
+ chain_type: str = 'stuff',
223
+ search_type: str = 'mmr',
224
+ search_kwargs: dict = {"k": 4, "fetch_k": 10, "lambda_mult": 0.89},
225
+ return_docs: bool = True,
226
+ metric_type: str = None,
227
+ use_llm: str = None
228
+ ):
229
+ # Question:
230
+ self._question = question
231
+ # Get a Vector Retriever:
232
+ vector = self.store.get_vector(
233
+ metric_type=metric_type
234
+ )
235
+ simil_retriever = VectorStoreRetriever(
236
+ vectorstore=vector,
237
+ search_type='similarity',
238
+ chain_type=chain_type,
239
+ search_kwargs=search_kwargs
240
+ )
241
+ retriever = vector.as_retriever(
242
+ search_type=search_type,
243
+ search_kwargs=search_kwargs
244
+ )
245
+ if self.kb:
246
+ # Get a BM25 Retriever:
247
+ b25_retriever = BM25Retriever.from_documents(self.kb)
248
+ retriever = EnsembleRetriever(
249
+ retrievers=[simil_retriever, retriever, b25_retriever],
250
+ weights=[0.6, 0.3, 0.1]
251
+ )
252
+ else:
253
+ retriever = EnsembleRetriever(
254
+ retrievers=[simil_retriever, retriever],
255
+ weights=[0.7, 0.3]
256
+ )
257
+ # retriever = simil_retriever
258
+ custom_template = self.template.format_map(
259
+ SafeDict(
260
+ chat_history=''
261
+ )
262
+ )
263
+ if use_llm is not None:
264
+ if use_llm == 'claude':
265
+ if ANTHROPIC_ENABLED is True:
266
+ llm = Anthropic(
267
+ model='claude-3-opus-20240229',
268
+ temperature=0.2,
269
+ top_p=0.4,
270
+ top_k=20
271
+ )
272
+ else:
273
+ raise ValueError(
274
+ "No Anthropic Claude was installed."
275
+ )
276
+ elif use_llm == 'vertex':
277
+ if VERTEX_ENABLED is True:
278
+ llm = VertexLLM(
279
+ model='gemini-pro',
280
+ temperature=0.2,
281
+ top_p=0.4,
282
+ top_k=20
283
+ )
284
+ else:
285
+ raise ValueError(
286
+ "No VertexAI was installed."
287
+ )
288
+ else:
289
+ raise ValueError(
290
+ f"Only Claude and Vertex are Supported Now."
291
+ )
292
+ self.model = llm.get_llm()
293
+
294
+ self.chain = RetrievalQA.from_chain_type(
295
+ llm=self.model,
296
+ chain_type=chain_type,
297
+ retriever=retriever,
298
+ return_source_documents=return_docs,
299
+ verbose=True,
300
+ chain_type_kwargs={
301
+ "prompt": PromptTemplate(
302
+ template=custom_template,
303
+ input_variables=['context', 'question']
304
+ )
305
+ },
306
+ )
307
+ # Debug Code ::
308
+ # print('=====================')
309
+ # print(custom_template)
310
+ # response = self.chain.invoke(question)
311
+ # print('Q > ', response['result'])
312
+ # docs = vector.similarity_search(
313
+ # self._question, k=10
314
+ # )
315
+ # print(" LENGHT DOCS > ", len(docs))
316
+ # print(docs)
317
+ # print(' ========================== ')
318
+
319
+ # try:
320
+ # distance = self.evaluate_distance(
321
+ # self.store.embedding_name, question, docs
322
+ # )
323
+ # print('DISTANCE > ', distance)
324
+ # except Exception as e:
325
+ # distance = 'EMPTY'
326
+ # print('DISTANCE > ', distance)
327
+ # print('CHAIN > ', self.chain)
328
+
329
+ return self
330
+
331
+ def get_current_context(self):
332
+ if self.memory:
333
+ return self.memory.buffer_as_str()
334
+ return None
335
+
336
+ def as_markdown(self, response: ChatResponse) -> str:
337
+ markdown_output = f"**Question**: {response.question} \n"
338
+ markdown_output += f"**Answer**: {response.answer} \n"
339
+ if response.source_documents:
340
+ source_documents = response.source_documents
341
+ current_sources = []
342
+ block_sources = []
343
+ count = 0
344
+ d = {}
345
+ for source in source_documents:
346
+ if count >= 20:
347
+ break # Exit loop after processing 10 documents
348
+ metadata = source.metadata
349
+ if 'url' in metadata:
350
+ src = metadata.get('url')
351
+ elif 'filename' in metadata:
352
+ src = metadata.get('filename')
353
+ else:
354
+ src = metadata.get('source', 'unknown')
355
+ if src == 'knowledge-base':
356
+ continue # avoid attaching kb documents
357
+ source_title = metadata.get('title', src)
358
+ if source_title in current_sources:
359
+ continue
360
+ current_sources.append(source_title)
361
+ if src:
362
+ d[src] = metadata.get('document_meta', {})
363
+ source_filename = metadata.get('filename', src)
364
+ if src:
365
+ block_sources.append(f"- [{source_title}]({src})")
366
+ else:
367
+ if 'page_number' in metadata:
368
+ block_sources.append(f"- {source_filename} (Page {metadata.get('page_number')})")
369
+ else:
370
+ block_sources.append(f"- {source_filename}")
371
+ if block_sources:
372
+ markdown_output += f"**Sources**: \n"
373
+ markdown_output += "\n".join(block_sources)
374
+ if d:
375
+ response.documents = d
376
+ return markdown_output
377
+
378
+ def evaluate_distance(self, model, question, source_documents):
379
+ tokenizer = SentenceTransformer(model)
380
+ query_embedding = tokenizer.encode(question)
381
+ document_embeddings = [
382
+ tokenizer.encode(doc.page_content) for doc in source_documents
383
+ ]
384
+ distances = util.cos_sim(query_embedding, document_embeddings)
385
+ result = []
386
+ for doc, distance in zip(source_documents, distances):
387
+ result.append({
388
+ "document": doc,
389
+ "distance": distance
390
+ })
391
+ return result
392
+
393
+ async def log_usage(self, response: ChatResponse, request: web.Request = None):
394
+ PARAMS = {
395
+ "credentials": BIGQUERY_CREDENTIALS,
396
+ "project_id": BIGQUERY_PROJECT_ID,
397
+ }
398
+ db = AsyncDB(
399
+ 'bigquery',
400
+ params=PARAMS
401
+ )
402
+ origin = {
403
+ "user_agent": 'script'
404
+ }
405
+ user_id = 0
406
+ if request:
407
+ origin = {
408
+ "origin": request.remote,
409
+ "user_agent": request.headers.get('User-Agent')
410
+ }
411
+ session = await get_session(request)
412
+ if session:
413
+ user_id = session.user_id
414
+ async with await db.connection() as conn: #pylint: disable=E1101
415
+ # set connection to model:
416
+ ChatbotUsage.Meta.connection = conn
417
+ # Add a new record of chatbot usage:
418
+ record = {
419
+ "chatbot_id": str(self.chatbot_id),
420
+ "user_id": user_id, # TODO: add session informtion
421
+ "source_path": self.source_path,
422
+ "platform": 'web',
423
+ "sid": str(response.sid),
424
+ "used_at": response.at,
425
+ "question": response.question,
426
+ "response": response.answer,
427
+ **origin
428
+ }
429
+ try:
430
+ log = ChatbotUsage(**record)
431
+ data = log.to_dict()
432
+ # convert to string (bigquery uses json.dumps to convert to string)
433
+ data['sid'] = str(data['sid'])
434
+ data['chatbot_id'] = str(data['chatbot_id'])
435
+ data['event_timestamp'] = str(data['event_timestamp'])
436
+ # writing directly to bigquery
437
+ await conn.write(
438
+ [data],
439
+ table_id=ChatbotUsage.Meta.name,
440
+ dataset_id=ChatbotUsage.Meta.schema,
441
+ use_streams=False,
442
+ use_pandas=False
443
+ )
444
+ # await log.insert()
445
+ except Exception as exc:
446
+ self.logger.error(
447
+ f"Error inserting log: {exc}"
448
+ )
449
+
450
+
451
+ async def question(
452
+ self,
453
+ question: str = None,
454
+ chain_type: str = 'stuff',
455
+ search_type: str = 'similarity',
456
+ search_kwargs: dict = {"k": 4, "fetch_k": 10, "lambda_mult": 0.89},
457
+ return_docs: bool = True,
458
+ metric_type: str = None,
459
+ memory: Any = None,
460
+ **kwargs
461
+ ):
462
+ # Generating Vector:
463
+ async with self.store.connection() as store: #pylint: disable=E1101
464
+ vector = store.get_vector(metric_type=metric_type)
465
+ retriever = VectorStoreRetriever(
466
+ vectorstore=vector,
467
+ search_type=search_type,
468
+ chain_type=chain_type,
469
+ search_kwargs=search_kwargs
470
+ )
471
+ custom_template = self.template.format_map(
472
+ SafeDict(
473
+ summaries=''
474
+ )
475
+ )
476
+ _prompt = PromptTemplate(
477
+ template=custom_template,
478
+ input_variables=["context", "chat_history", "question"],
479
+ )
480
+ try:
481
+ chain = ConversationalRetrievalChain.from_llm(
482
+ llm=self.model,
483
+ retriever=retriever,
484
+ chain_type=chain_type,
485
+ verbose=False,
486
+ memory=memory,
487
+ return_source_documents=return_docs,
488
+ return_generated_question=True,
489
+ combine_docs_chain_kwargs={"prompt": _prompt},
490
+ **kwargs
491
+ )
492
+ response = chain.invoke(
493
+ question
494
+ )
495
+ except Exception as exc:
496
+ self.logger.error(
497
+ f"Error invoking chain: {exc}"
498
+ )
499
+ return None, None
500
+ try:
501
+ qa_response = ChatResponse(**response)
502
+ except (ValueError, TypeError) as exc:
503
+ self.logger.error(
504
+ f"Error validating response: {exc}"
505
+ )
506
+ return None, None
507
+ except ValidationError as exc:
508
+ self.logger.error(
509
+ f"Error on response: {exc.payload}"
510
+ )
511
+ return None, None
512
+ try:
513
+ qa_response.response = self.as_markdown(
514
+ qa_response
515
+ )
516
+ # saving question to Usage Log
517
+ if self.request:
518
+ tasker = self.request.app['service_queue']
519
+ await tasker.put(
520
+ self.log_usage,
521
+ response=qa_response,
522
+ request=self.request
523
+ )
524
+ else:
525
+ asyncio.create_task(
526
+ self.log_usage(response=qa_response)
527
+ )
528
+ return qa_response
529
+ except Exception as exc:
530
+ self.logger.exception(
531
+ f"Error on response: {exc}"
532
+ )
533
+ return None
534
+
535
+
536
+ async def invoke(self, question):
537
+ # Invoke the chain with the given question
538
+ try:
539
+ response = self.chain.invoke(
540
+ question
541
+ )
542
+ except Exception as exc:
543
+ self.logger.error(
544
+ f"Error invoking chain: {exc}"
545
+ )
546
+ return None, None
547
+ try:
548
+ qa_response = ChatResponse(**response)
549
+ except (ValueError, TypeError) as exc:
550
+ self.logger.error(
551
+ f"Error validating response: {exc}"
552
+ )
553
+ return None, None
554
+ except ValidationError as exc:
555
+ self.logger.error(
556
+ f"Error on response: {exc.payload}"
557
+ )
558
+ return None, None
559
+ try:
560
+ qa_response.response = self.as_markdown(
561
+ qa_response
562
+ )
563
+ # saving question to Usage Log
564
+ if self.request:
565
+ tasker = self.request.app['service_queue']
566
+ await tasker.put(
567
+ self.log_usage,
568
+ response=qa_response,
569
+ request=self.request
570
+ )
571
+ else:
572
+ asyncio.create_task(self.log_usage(response=qa_response))
573
+ return qa_response
574
+ except Exception as exc:
575
+ self.logger.exception(
576
+ f"Error on response: {exc}"
577
+ )
578
+ return None, None
@@ -0,0 +1,19 @@
1
+ from langchain.chains.constitutional_ai.base import ConstitutionalChain
2
+ from langchain.chains.constitutional_ai.models import ConstitutionalPrinciple
3
+ from ...conf import ETHICAL_PRINCIPLE
4
+
5
+
6
+ ethical_principle = ConstitutionalPrinciple(
7
+ name="Ethical Principle",
8
+ critique_request=ETHICAL_PRINCIPLE,
9
+ revision_request="Rewrite the model's output to be both ethical and legal.",
10
+ )
11
+
12
+
13
+ def get_constitutional_chain(llm, qa_chain):
14
+ return ConstitutionalChain.from_llm(
15
+ chain=qa_chain,
16
+ constitutional_principles=[ethical_principle],
17
+ llm=llm,
18
+ verbose=True,
19
+ )
parrot/conf.py ADDED
@@ -0,0 +1,110 @@
1
+ from pathlib import Path
2
+ from navconfig import config, BASE_DIR
3
+ from navconfig.logging import logging
4
+ from navigator.conf import default_dsn, CACHE_HOST, CACHE_PORT
5
+
6
+
7
+ # disable debug on some libraries:
8
+ logging.getLogger(name='httpcore').setLevel(logging.INFO)
9
+ logging.getLogger(name='httpx').setLevel(logging.INFO)
10
+ logging.getLogger(name='groq').setLevel(logging.INFO)
11
+ logging.getLogger(name='h5py').setLevel(logging.INFO)
12
+ logging.getLogger(name='tensorflow').setLevel(logging.INFO)
13
+ logging.getLogger(name='selenium.webdriver').setLevel(logging.WARNING)
14
+ logging.getLogger(name='selenium').setLevel(logging.INFO)
15
+
16
+
17
+ # Static directory
18
+ STATIC_DIR = config.get('STATIC_DIR', fallback=BASE_DIR.joinpath('static'))
19
+ if isinstance(STATIC_DIR, str):
20
+ STATIC_DIR = Path(STATIC_DIR)
21
+
22
+ # LLM Model
23
+ DEFAULT_LLM_MODEL_NAME = config.get('LLM_MODEL_NAME', fallback='gemini-pro')
24
+
25
+
26
+ ## MILVUS DB ##:
27
+ MILVUS_HOST = config.get('MILVUS_HOST', fallback='localhost')
28
+ MILVUS_PROTOCOL = config.get('MILVUS_PROTOCOL', fallback='http')
29
+ MILVUS_PORT = config.get('MILVUS_PORT', fallback=19530)
30
+ MILVUS_URL = config.get('MILVUS_URL')
31
+ MILVUS_TOKEN = config.get('MILVUS_TOKEN')
32
+ MILVUS_USER = config.get('MILVUS_USER')
33
+ MILVUS_PASSWORD = config.get('MILVUS_PASSWORD')
34
+ MILVUS_SECURE = config.getboolean('MILVUS_SECURE', fallback=False)
35
+ MILVUS_SERVER_NAME = config.get(
36
+ 'MILVUS_SERVER_NAME'
37
+ )
38
+ MILVUS_CA_CERT = config.get('MILVUS_CA_CERT', fallback=None)
39
+ MILVUS_SERVER_CERT = config.get('MILVUS_SERVER_CERT', fallback=None)
40
+ MILVUS_SERVER_KEY = config.get('MILVUS_SERVER_KEY', fallback=None)
41
+ MILVUS_USE_TLSv2 = config.getboolean('MILVUS_USE_TLSv2', fallback=False)
42
+
43
+ # ScyllaDB Database:
44
+ SCYLLADB_DRIVER = config.get('SCYLLADB_DRIVER', fallback='scylladb')
45
+ SCYLLADB_HOST = config.get('SCYLLADB_HOST', fallback='localhost')
46
+ SCYLLADB_PORT = int(config.get('SCYLLADB_PORT', fallback=9042))
47
+ SCYLLADB_USERNAME = config.get('SCYLLADB_USERNAME', fallback='navigator')
48
+ SCYLLADB_PASSWORD = config.get('SCYLLADB_PASSWORD', fallback='navigator')
49
+ SCYLLADB_KEYSPACE = config.get('SCYLLADB_KEYSPACE', fallback='navigator')
50
+
51
+
52
+ # BigQuery Configuration:
53
+ BIGQUERY_CREDENTIALS = config.get('BIGQUERY_CREDENTIALS')
54
+ BIGQUERY_PROJECT_ID = config.get('BIGQUERY_PROJECT_ID', fallback='navigator')
55
+ BIGQUERY_DATASET = config.get('BIGQUERY_DATASET', fallback='navigator')
56
+
57
+ # Redis History Configuration:
58
+ REDIS_HISTORY_DB = config.get('REDIS_HISTORY_DB', fallback=3)
59
+ REDIS_HISTORY_URL = f"redis://{CACHE_HOST}:{CACHE_PORT}/{REDIS_HISTORY_DB}"
60
+
61
+ def resolve_cert(crt):
62
+ cert = Path(crt)
63
+ if not cert.is_absolute():
64
+ cert = BASE_DIR.joinpath(cert)
65
+ else:
66
+ cert.resolve()
67
+ return cert
68
+
69
+ if MILVUS_SERVER_CERT:
70
+ MILVUS_SERVER_CERT = str(resolve_cert(MILVUS_SERVER_CERT))
71
+ if MILVUS_CA_CERT:
72
+ MILVUS_CA_CERT = str(resolve_cert(MILVUS_CA_CERT))
73
+ if MILVUS_SERVER_KEY:
74
+ MILVUS_SERVER_KEY = str(resolve_cert(MILVUS_SERVER_KEY))
75
+
76
+ # QDRANT:
77
+ QDRANT_PROTOCOL = config.get('QDRANT_PROTOCOL', fallback='http')
78
+ QDRANT_HOST = config.get('QDRANT_HOST', fallback='localhost')
79
+ QDRANT_PORT = config.get('QDRANT_PORT', fallback=6333)
80
+ QDRANT_USE_HTTPS = config.getboolean('QDRANT_USE_HTTPS', fallback=False)
81
+ QDRANT_URL = config.get('QDRANT_URL')
82
+ # QDRANT Connection Type: server or cloud
83
+ QDRANT_CONN_TYPE = config.get('QDRANT_CONN_TYPE', fallback='server')
84
+
85
+
86
+ # Embedding Device:
87
+ EMBEDDING_DEVICE = config.get('EMBEDDING_DEVICE', fallback='cpu')
88
+ EMBEDDING_DEFAULT_MODEL = config.get(
89
+ 'EMBEDDING_DEFAULT_MODEL',
90
+ fallback='thenlper/gte-base'
91
+ )
92
+ MAX_VRAM_AVAILABLE = config.get('MAX_VRAM_AVAILABLE', fallback=20000)
93
+ RAM_AVAILABLE = config.get('RAM_AVAILABLE', fallback=819200)
94
+ CUDA_DEFAULT_DEVICE = config.get('CUDA_DEFAULT_DEVICE', fallback=0)
95
+ MAX_BATCH_SIZE = config.get('MAX_BATCH_SIZE', fallback=768)
96
+
97
+ # Enable Teams Bot:
98
+ ENABLE_AZURE_BOT = config.getboolean('ENABLE_AZURE_BOT', fallback=True)
99
+
100
+ ## Google API:
101
+ GOOGLE_API_KEY = config.get('GOOGLE_API_KEY')
102
+ ### Google Service Credentials:
103
+ GA_SERVICE_ACCOUNT_NAME = config.get('GA_SERVICE_ACCOUNT_NAME', fallback="google.json")
104
+ GA_SERVICE_PATH = config.get('GA_SERVICE_PATH', fallback="env/google/")
105
+
106
+ # Ethical Principle:
107
+ ETHICAL_PRINCIPLE = config.get(
108
+ 'ETHICAL_PRINCIPLE',
109
+ fallback='The model should only talk about ethical and legal things.'
110
+ )