ai-parrot 0.1.0__cp311-cp311-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ai-parrot might be problematic. Click here for more details.
- ai_parrot-0.1.0.dist-info/LICENSE +21 -0
- ai_parrot-0.1.0.dist-info/METADATA +299 -0
- ai_parrot-0.1.0.dist-info/RECORD +108 -0
- ai_parrot-0.1.0.dist-info/WHEEL +5 -0
- ai_parrot-0.1.0.dist-info/top_level.txt +3 -0
- parrot/__init__.py +18 -0
- parrot/chatbots/__init__.py +7 -0
- parrot/chatbots/abstract.py +965 -0
- parrot/chatbots/asktroc.py +16 -0
- parrot/chatbots/base.py +257 -0
- parrot/chatbots/basic.py +9 -0
- parrot/chatbots/bose.py +17 -0
- parrot/chatbots/cody.py +17 -0
- parrot/chatbots/copilot.py +100 -0
- parrot/chatbots/dataframe.py +103 -0
- parrot/chatbots/hragents.py +15 -0
- parrot/chatbots/oddie.py +17 -0
- parrot/chatbots/retrievals/__init__.py +515 -0
- parrot/chatbots/retrievals/constitutional.py +19 -0
- parrot/conf.py +108 -0
- parrot/crew/__init__.py +3 -0
- parrot/crew/tools/__init__.py +22 -0
- parrot/crew/tools/bing.py +13 -0
- parrot/crew/tools/config.py +43 -0
- parrot/crew/tools/duckgo.py +62 -0
- parrot/crew/tools/file.py +24 -0
- parrot/crew/tools/google.py +168 -0
- parrot/crew/tools/gtrends.py +16 -0
- parrot/crew/tools/md2pdf.py +25 -0
- parrot/crew/tools/rag.py +42 -0
- parrot/crew/tools/search.py +32 -0
- parrot/crew/tools/url.py +21 -0
- parrot/exceptions.cpython-311-x86_64-linux-gnu.so +0 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/bots.py +196 -0
- parrot/handlers/chat.py +169 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/database.py +29 -0
- parrot/llms/__init__.py +0 -0
- parrot/llms/abstract.py +41 -0
- parrot/llms/anthropic.py +36 -0
- parrot/llms/google.py +37 -0
- parrot/llms/groq.py +33 -0
- parrot/llms/hf.py +39 -0
- parrot/llms/openai.py +49 -0
- parrot/llms/pipes.py +103 -0
- parrot/llms/vertex.py +68 -0
- parrot/loaders/__init__.py +20 -0
- parrot/loaders/abstract.py +456 -0
- parrot/loaders/basepdf.py +102 -0
- parrot/loaders/basevideo.py +280 -0
- parrot/loaders/csv.py +42 -0
- parrot/loaders/dir.py +37 -0
- parrot/loaders/excel.py +349 -0
- parrot/loaders/github.py +65 -0
- parrot/loaders/handlers/__init__.py +5 -0
- parrot/loaders/handlers/data.py +213 -0
- parrot/loaders/image.py +119 -0
- parrot/loaders/json.py +52 -0
- parrot/loaders/pdf.py +187 -0
- parrot/loaders/pdfchapters.py +142 -0
- parrot/loaders/pdffn.py +112 -0
- parrot/loaders/pdfimages.py +207 -0
- parrot/loaders/pdfmark.py +88 -0
- parrot/loaders/pdftables.py +145 -0
- parrot/loaders/ppt.py +30 -0
- parrot/loaders/qa.py +81 -0
- parrot/loaders/repo.py +103 -0
- parrot/loaders/rtd.py +65 -0
- parrot/loaders/txt.py +92 -0
- parrot/loaders/utils/__init__.py +1 -0
- parrot/loaders/utils/models.py +25 -0
- parrot/loaders/video.py +96 -0
- parrot/loaders/videolocal.py +107 -0
- parrot/loaders/vimeo.py +106 -0
- parrot/loaders/web.py +216 -0
- parrot/loaders/web_base.py +112 -0
- parrot/loaders/word.py +125 -0
- parrot/loaders/youtube.py +192 -0
- parrot/manager.py +152 -0
- parrot/models.py +347 -0
- parrot/py.typed +0 -0
- parrot/stores/__init__.py +0 -0
- parrot/stores/abstract.py +170 -0
- parrot/stores/milvus.py +540 -0
- parrot/stores/qdrant.py +153 -0
- parrot/tools/__init__.py +16 -0
- parrot/tools/abstract.py +53 -0
- parrot/tools/asknews.py +32 -0
- parrot/tools/bing.py +13 -0
- parrot/tools/duck.py +62 -0
- parrot/tools/google.py +170 -0
- parrot/tools/stack.py +26 -0
- parrot/tools/weather.py +70 -0
- parrot/tools/wikipedia.py +59 -0
- parrot/tools/zipcode.py +179 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.cpython-311-x86_64-linux-gnu.so +0 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpython-311-x86_64-linux-gnu.so +0 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- resources/users/__init__.py +5 -0
- resources/users/handlers.py +13 -0
- resources/users/models.py +205 -0
- settings/__init__.py +0 -0
- settings/settings.py +51 -0
parrot/manager.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Chatbot Manager.
|
|
3
|
+
|
|
4
|
+
Tool for instanciate, managing and interacting with Chatbot through APIs.
|
|
5
|
+
"""
|
|
6
|
+
from typing import Any, Dict, Type
|
|
7
|
+
from importlib import import_module
|
|
8
|
+
from aiohttp import web
|
|
9
|
+
from navconfig.logging import logging
|
|
10
|
+
from .chatbots import (
|
|
11
|
+
AbstractChatbot,
|
|
12
|
+
Chatbot,
|
|
13
|
+
HRAgent,
|
|
14
|
+
AskTROC,
|
|
15
|
+
OddieBot,
|
|
16
|
+
Cody,
|
|
17
|
+
BoseBot
|
|
18
|
+
)
|
|
19
|
+
from .handlers.chat import ChatHandler # , BotHandler
|
|
20
|
+
from .handlers import ChatbotHandler
|
|
21
|
+
from .models import ChatbotModel
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
logging.getLogger(name='selenium.webdriver').setLevel(logging.WARNING)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ChatbotManager:
|
|
28
|
+
"""ChatbotManager.
|
|
29
|
+
|
|
30
|
+
Manage chatbots and interact with them through via aiohttp App.
|
|
31
|
+
|
|
32
|
+
"""
|
|
33
|
+
app: web.Application = None
|
|
34
|
+
chatbots: Dict[str, AbstractChatbot] = {}
|
|
35
|
+
|
|
36
|
+
def __init__(self) -> None:
|
|
37
|
+
self.app = None
|
|
38
|
+
self.chatbots = {}
|
|
39
|
+
self.logger = logging.getLogger(name='Parrot.Manager')
|
|
40
|
+
|
|
41
|
+
def get_chatbot_class(self, class_name: str) -> Type[AbstractChatbot]:
|
|
42
|
+
"""
|
|
43
|
+
Dynamically import a chatbot class based on the class name from the relative module '.chatbots'.
|
|
44
|
+
Args:
|
|
45
|
+
class_name (str): The name of the chatbot class to be imported.
|
|
46
|
+
Returns:
|
|
47
|
+
Type[AbstractChatbot]: A chatbot class derived from AbstractChatbot.
|
|
48
|
+
"""
|
|
49
|
+
module = import_module('.chatbots', __package__)
|
|
50
|
+
try:
|
|
51
|
+
return getattr(module, class_name)
|
|
52
|
+
except AttributeError:
|
|
53
|
+
raise ImportError(f"No class named '{class_name}' found in the module 'chatbots'.")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
async def load_bots(self, app: web.Application) -> None:
|
|
57
|
+
"""Load all chatbots from DB."""
|
|
58
|
+
self.logger.info("Loading chatbots from DB...")
|
|
59
|
+
db = app['database']
|
|
60
|
+
async with await db.acquire() as conn:
|
|
61
|
+
ChatbotModel.Meta.connection = conn
|
|
62
|
+
bots = await ChatbotModel.all()
|
|
63
|
+
for bot in bots:
|
|
64
|
+
self.logger.notice(
|
|
65
|
+
f"Loading chatbot '{bot.name}'..."
|
|
66
|
+
)
|
|
67
|
+
cls_name = bot.custom_class
|
|
68
|
+
if cls_name is None:
|
|
69
|
+
class_name = Chatbot
|
|
70
|
+
else:
|
|
71
|
+
class_name = self.get_chatbot_class(cls_name)
|
|
72
|
+
chatbot = class_name(
|
|
73
|
+
chatbot_id=bot.chatbot_id,
|
|
74
|
+
name=bot.name
|
|
75
|
+
)
|
|
76
|
+
try:
|
|
77
|
+
await chatbot.configure()
|
|
78
|
+
self.add_chatbot(chatbot)
|
|
79
|
+
except Exception as e:
|
|
80
|
+
self.logger.error(
|
|
81
|
+
f"Failed to configure chatbot '{chatbot.name}': {e}"
|
|
82
|
+
)
|
|
83
|
+
self.logger.info(
|
|
84
|
+
":: Chatbots loaded successfully."
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
def create_chatbot(self, class_name: Any = None, name: str = None, **kwargs) -> AbstractChatbot:
|
|
88
|
+
"""Create a chatbot and add it to the manager."""
|
|
89
|
+
if class_name is None:
|
|
90
|
+
class_name = Chatbot
|
|
91
|
+
chatbot = class_name(**kwargs)
|
|
92
|
+
chatbot.name = name
|
|
93
|
+
self.add_chatbot(chatbot)
|
|
94
|
+
if 'llm' in kwargs:
|
|
95
|
+
llm = kwargs['llm']
|
|
96
|
+
llm_name = llm.pop('name')
|
|
97
|
+
model = llm.pop('model')
|
|
98
|
+
llm = chatbot.load_llm(
|
|
99
|
+
llm_name, model=model, **llm
|
|
100
|
+
)
|
|
101
|
+
chatbot.llm = llm
|
|
102
|
+
return chatbot
|
|
103
|
+
|
|
104
|
+
def add_chatbot(self, chatbot: AbstractChatbot) -> None:
|
|
105
|
+
"""Add a chatbot to the manager."""
|
|
106
|
+
self.chatbots[chatbot.name] = chatbot
|
|
107
|
+
|
|
108
|
+
def get_chatbot(self, name: str) -> AbstractChatbot:
|
|
109
|
+
"""Get a chatbot by name."""
|
|
110
|
+
return self.chatbots.get(name)
|
|
111
|
+
|
|
112
|
+
def remove_chatbot(self, name: str) -> None:
|
|
113
|
+
"""Remove a chatbot by name."""
|
|
114
|
+
del self.chatbots[name]
|
|
115
|
+
|
|
116
|
+
def get_chatbots(self) -> Dict[str, AbstractChatbot]:
|
|
117
|
+
"""Get all chatbots."""
|
|
118
|
+
return self.chatbots
|
|
119
|
+
|
|
120
|
+
def get_app(self) -> web.Application:
|
|
121
|
+
"""Get the app."""
|
|
122
|
+
if self.app is None:
|
|
123
|
+
raise RuntimeError("App is not set.")
|
|
124
|
+
return self.app
|
|
125
|
+
|
|
126
|
+
def setup(self, app: web.Application) -> web.Application:
|
|
127
|
+
if isinstance(app, web.Application):
|
|
128
|
+
self.app = app # register the app into the Extension
|
|
129
|
+
else:
|
|
130
|
+
self.app = app.get_app() # Nav Application
|
|
131
|
+
# register signals for startup and shutdown
|
|
132
|
+
self.app.on_startup.append(self.on_startup)
|
|
133
|
+
self.app.on_shutdown.append(self.on_shutdown)
|
|
134
|
+
# Add Manager to main Application:
|
|
135
|
+
self.app['chatbot_manager'] = self
|
|
136
|
+
## Configure Routes
|
|
137
|
+
router = self.app.router
|
|
138
|
+
router.add_view('/api/v1/chat', ChatHandler)
|
|
139
|
+
router.add_view('/api/v1/chat/{chatbot_name}', ChatHandler)
|
|
140
|
+
ChatbotHandler.configure(self.app, '/api/v1/bots')
|
|
141
|
+
# router.add_view('/api/v1/bots', ChatbotHandler)
|
|
142
|
+
# router.add_view('/api/v1/bots/{chatbot_name}', ChatbotHandler)
|
|
143
|
+
return self.app
|
|
144
|
+
|
|
145
|
+
async def on_startup(self, app: web.Application) -> None:
|
|
146
|
+
"""On startup."""
|
|
147
|
+
# configure all pre-configured chatbots:
|
|
148
|
+
await self.load_bots(app)
|
|
149
|
+
|
|
150
|
+
async def on_shutdown(self, app: web.Application) -> None:
|
|
151
|
+
"""On shutdown."""
|
|
152
|
+
pass
|
parrot/models.py
ADDED
|
@@ -0,0 +1,347 @@
|
|
|
1
|
+
from typing import Union, Optional
|
|
2
|
+
import uuid
|
|
3
|
+
import time
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path, PurePath
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from datamodel import BaseModel, Field
|
|
8
|
+
from datamodel.types import Text # pylint: disable=no-name-in-module
|
|
9
|
+
from asyncdb.models import Model
|
|
10
|
+
|
|
11
|
+
def created_at(*args, **kwargs) -> int:
|
|
12
|
+
return int(time.time()) * 1000
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AgentResponse(BaseModel):
|
|
16
|
+
"""AgentResponse.
|
|
17
|
+
dict_keys(
|
|
18
|
+
['input', 'chat_history', 'output', 'intermediate_steps']
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
Response from Chatbots.
|
|
22
|
+
"""
|
|
23
|
+
question: str = Field(required=False)
|
|
24
|
+
input: str = Field(required=False)
|
|
25
|
+
output: str = Field(required=False)
|
|
26
|
+
intermediate_steps: list = Field(default_factory=list)
|
|
27
|
+
|
|
28
|
+
class ChatResponse(BaseModel):
|
|
29
|
+
"""ChatResponse.
|
|
30
|
+
dict_keys(
|
|
31
|
+
['question', 'chat_history', 'answer', 'source_documents', 'generated_question']
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
Response from Chatbots.
|
|
35
|
+
"""
|
|
36
|
+
query: str = Field(required=False)
|
|
37
|
+
result: str = Field(required=False)
|
|
38
|
+
question: str = Field(required=False)
|
|
39
|
+
generated_question: str = Field(required=False)
|
|
40
|
+
answer: str = Field(required=False)
|
|
41
|
+
response: str = Field(required=False)
|
|
42
|
+
chat_history: list = Field(repr=True, default_factory=list)
|
|
43
|
+
source_documents: list = Field(required=False, default_factory=list)
|
|
44
|
+
documents: dict = Field(required=False, default_factory=dict)
|
|
45
|
+
sid: uuid.UUID = Field(primary_key=True, required=False, default=uuid.uuid4)
|
|
46
|
+
at: int = Field(default=created_at)
|
|
47
|
+
|
|
48
|
+
def __post_init__(self) -> None:
|
|
49
|
+
if self.result and not self.answer:
|
|
50
|
+
self.answer = self.result
|
|
51
|
+
if self.question and not self.generated_question:
|
|
52
|
+
self.generated_question = self.question
|
|
53
|
+
return super().__post_init__()
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
# Chatbot Model:
|
|
57
|
+
class ChatbotModel(Model):
|
|
58
|
+
"""Chatbot.
|
|
59
|
+
CREATE TABLE IF NOT EXISTS navigator.chatbots (
|
|
60
|
+
chatbot_id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
61
|
+
name VARCHAR NOT NULL DEFAULT 'Nav',
|
|
62
|
+
description VARCHAR,
|
|
63
|
+
config_file VARCHAR DEFAULT 'config.toml',
|
|
64
|
+
company_information JSONB DEFAULT '{}'::JSONB,
|
|
65
|
+
contact_information VARCHAR,
|
|
66
|
+
contact_form VARCHAR,
|
|
67
|
+
contact_email VARCHAR,
|
|
68
|
+
company_website VARCHAR,
|
|
69
|
+
avatar TEXT,
|
|
70
|
+
enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
|
71
|
+
timezone VARCHAR DEFAULT 'UTC',
|
|
72
|
+
attributes JSONB DEFAULT '{}'::JSONB,
|
|
73
|
+
role VARCHAR DEFAULT 'a Human Resources Assistant',
|
|
74
|
+
goal VARCHAR NOT NULL DEFAULT 'Bring useful information to Users.',
|
|
75
|
+
backstory VARCHAR NOT NULL DEFAULT 'I was created by a team of developers to assist with users tasks.',
|
|
76
|
+
rationale VARCHAR NOT NULL DEFAULT 'Remember to maintain a professional tone. Please provide accurate and relevant information.',
|
|
77
|
+
language VARCHAR DEFAULT 'en',
|
|
78
|
+
template_prompt VARCHAR,
|
|
79
|
+
pre_instructions JSONB DEFAULT '[]'::JSONB,
|
|
80
|
+
llm VARCHAR DEFAULT 'VertexLLM',
|
|
81
|
+
model_name VARCHAR DEFAULT 'gemini-pro',
|
|
82
|
+
model_config JSONB DEFAULT '{}'::JSONB,
|
|
83
|
+
embedding_name VARCHAR DEFAULT 'thenlper/gte-base',
|
|
84
|
+
tokenizer VARCHAR DEFAULT 'thenlper/gte-base',
|
|
85
|
+
summarize_model VARCHAR DEFAULT 'facebook/bart-large-cnn',
|
|
86
|
+
classification_model VARCHAR DEFAULT 'facebook/bart-large-cnn',
|
|
87
|
+
database JSONB DEFAULT '{"vector_database": "MilvusStore", "database": "TROC", "collection_name": "troc_information"}'::JSONB,
|
|
88
|
+
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
89
|
+
created_by INTEGER,
|
|
90
|
+
updated_at TIMESTAMPTZ DEFAULT NOW()
|
|
91
|
+
);
|
|
92
|
+
"""
|
|
93
|
+
chatbot_id: uuid.UUID = Field(primary_key=True, required=False, default_factory=uuid.uuid4)
|
|
94
|
+
name: str = Field(default='Nav', required=True)
|
|
95
|
+
description: str = Field(default='Nav Chatbot', required=False)
|
|
96
|
+
config_file: str = Field(default='config.toml', required=False)
|
|
97
|
+
custom_class: str = Field(required=False)
|
|
98
|
+
company_information: dict = Field(default_factory=dict, required=False)
|
|
99
|
+
avatar: Text
|
|
100
|
+
enabled: bool = Field(required=True, default=True)
|
|
101
|
+
timezone: str = Field(required=False, max=75, default="UTC", repr=False)
|
|
102
|
+
attributes: Optional[dict] = Field(required=False, default_factory=dict)
|
|
103
|
+
# Chatbot Configuration
|
|
104
|
+
role: str = Field(
|
|
105
|
+
default="a Human Resources Assistant",
|
|
106
|
+
required=False
|
|
107
|
+
)
|
|
108
|
+
goal: str = Field(
|
|
109
|
+
default="Bring useful information to Users.",
|
|
110
|
+
required=True
|
|
111
|
+
)
|
|
112
|
+
backstory: str = Field(
|
|
113
|
+
default="I was created by a team of developers to assist with users tasks.",
|
|
114
|
+
required=True
|
|
115
|
+
)
|
|
116
|
+
rationale: str = Field(
|
|
117
|
+
default=(
|
|
118
|
+
"Remember to maintain a professional tone."
|
|
119
|
+
" Please provide accurate and relevant information."
|
|
120
|
+
),
|
|
121
|
+
required=True
|
|
122
|
+
)
|
|
123
|
+
language: str = Field(default='en', required=False)
|
|
124
|
+
template_prompt: Union[str, PurePath] = Field(
|
|
125
|
+
default=None,
|
|
126
|
+
required=False
|
|
127
|
+
)
|
|
128
|
+
pre_instructions: list = Field(
|
|
129
|
+
default_factory=list,
|
|
130
|
+
required=False
|
|
131
|
+
)
|
|
132
|
+
# Model Configuration:
|
|
133
|
+
llm: str = Field(default='VertexLLM', required=False)
|
|
134
|
+
llm_config: dict = Field(default_factory=dict, required=False)
|
|
135
|
+
embedding_name: str = Field(default="thenlper/gte-base", required=False)
|
|
136
|
+
tokenizer: str = Field(default='thenlper/gte-base', required=False)
|
|
137
|
+
summarize_model: str = Field(default="facebook/bart-large-cnn", required=False)
|
|
138
|
+
classification_model: str = Field(default="facebook/bart-large-cnn", required=False)
|
|
139
|
+
# Database Configuration
|
|
140
|
+
database: dict = Field(default='TROC', required=False, default_factory=dict)
|
|
141
|
+
# When created
|
|
142
|
+
created_at: datetime = Field(required=False, default=datetime.now())
|
|
143
|
+
created_by: int = Field(required=False)
|
|
144
|
+
updated_at: datetime = Field(required=False, default=datetime.now())
|
|
145
|
+
|
|
146
|
+
def __post_init__(self) -> None:
|
|
147
|
+
super(ChatbotModel, self).__post_init__()
|
|
148
|
+
if isinstance(self.config_file, str):
|
|
149
|
+
self.config_file = Path(self.config_file).resolve()
|
|
150
|
+
if isinstance(self.config_file, PurePath):
|
|
151
|
+
self.config_file = str(self.config_file)
|
|
152
|
+
|
|
153
|
+
class Meta:
|
|
154
|
+
"""Meta Chatbot."""
|
|
155
|
+
driver = 'pg'
|
|
156
|
+
name = "chatbots"
|
|
157
|
+
schema = "navigator"
|
|
158
|
+
strict = True
|
|
159
|
+
frozen = False
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class ChatbotUsage(Model):
|
|
163
|
+
"""ChatbotUsage.
|
|
164
|
+
|
|
165
|
+
Saving information about Chatbot Usage.
|
|
166
|
+
|
|
167
|
+
-- ScyllaDB CREATE TABLE Syntax --
|
|
168
|
+
CREATE TABLE IF NOT EXISTS navigator.chatbots_usage (
|
|
169
|
+
chatbot_id TEXT,
|
|
170
|
+
user_id SMALLINT,
|
|
171
|
+
sid TEXT,
|
|
172
|
+
source_path TEXT,
|
|
173
|
+
platform TEXT,
|
|
174
|
+
origin inet,
|
|
175
|
+
user_agent TEXT,
|
|
176
|
+
question TEXT,
|
|
177
|
+
response TEXT,
|
|
178
|
+
used_at BIGINT,
|
|
179
|
+
at TEXT,
|
|
180
|
+
PRIMARY KEY ((chatbot_id, sid, at), used_at)
|
|
181
|
+
) WITH CLUSTERING ORDER BY (used_at DESC)
|
|
182
|
+
AND default_time_to_live = 10368000;
|
|
183
|
+
|
|
184
|
+
"""
|
|
185
|
+
chatbot_id: uuid.UUID = Field(primary_key=True, required=False)
|
|
186
|
+
user_id: int = Field(primary_key=True, required=False)
|
|
187
|
+
sid: uuid.UUID = Field(primary_key=True, required=False, default=uuid.uuid4)
|
|
188
|
+
source_path: str = Field(required=False, default='web')
|
|
189
|
+
platform: str = Field(required=False, default='web')
|
|
190
|
+
origin: str = Field(required=False)
|
|
191
|
+
user_agent: str = Field(required=False)
|
|
192
|
+
question: str = Field(required=False)
|
|
193
|
+
response: str = Field(required=False)
|
|
194
|
+
used_at: int = Field(required=False, default=created_at)
|
|
195
|
+
event_timestamp: datetime = Field(required=False, default=datetime.now)
|
|
196
|
+
_at: str = Field(primary_key=True, required=False)
|
|
197
|
+
|
|
198
|
+
class Meta:
|
|
199
|
+
"""Meta Chatbot."""
|
|
200
|
+
driver = 'bigquery'
|
|
201
|
+
name = "chatbots_usage"
|
|
202
|
+
schema = "navigator"
|
|
203
|
+
ttl = 10368000 # 120 days in seconds
|
|
204
|
+
strict = True
|
|
205
|
+
frozen = False
|
|
206
|
+
|
|
207
|
+
def __post_init__(self) -> None:
|
|
208
|
+
if not self._at:
|
|
209
|
+
# Generate a unique session id
|
|
210
|
+
self._at = f'{self.sid}:{self.used_at}'
|
|
211
|
+
super(ChatbotUsage, self).__post_init__()
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class FeedbackType(Enum):
|
|
215
|
+
"""FeedbackType."""
|
|
216
|
+
# Good Feedback
|
|
217
|
+
GOOD_COMPLETE = "Completeness"
|
|
218
|
+
GOOD_CORRECT = "Correct"
|
|
219
|
+
GOOD_FOLLOW = "Follow the instructions"
|
|
220
|
+
GOOD_UNDERSTAND = "Understandable"
|
|
221
|
+
GOOD_USEFUL = "very useful"
|
|
222
|
+
GOOD_OTHER = "Please Explain"
|
|
223
|
+
# Bad Feedback
|
|
224
|
+
BAD_DONTLIKE = "Don't like the style"
|
|
225
|
+
BAD_INCORRECT = "Incorrect"
|
|
226
|
+
BAD_NOTFOLLOW = "Didn't follow the instructions"
|
|
227
|
+
BAD_LAZY = "Being lazy"
|
|
228
|
+
BAD_NOTUSEFUL = "Not useful"
|
|
229
|
+
BAD_UNSAFE = "Unsafe or problematic"
|
|
230
|
+
BAD_OTHER = "Other"
|
|
231
|
+
|
|
232
|
+
@classmethod
|
|
233
|
+
def list_feedback(cls, feedback_category):
|
|
234
|
+
"""Return a list of feedback types based on the given category (Good or Bad)."""
|
|
235
|
+
prefix = feedback_category.upper() + "_"
|
|
236
|
+
return [feedback for feedback in cls if feedback.name.startswith(prefix)]
|
|
237
|
+
|
|
238
|
+
class ChatbotFeedback(Model):
|
|
239
|
+
"""ChatbotFeedback.
|
|
240
|
+
|
|
241
|
+
Saving information about Chatbot Feedback.
|
|
242
|
+
|
|
243
|
+
-- ScyllaDB CREATE TABLE Syntax --
|
|
244
|
+
CREATE TABLE IF NOT EXISTS navigator.chatbots_feedback (
|
|
245
|
+
chatbot_id UUID,
|
|
246
|
+
user_id INT,
|
|
247
|
+
sid UUID,
|
|
248
|
+
at TEXT,
|
|
249
|
+
rating TINYINT,
|
|
250
|
+
like BOOLEAN,
|
|
251
|
+
dislike BOOLEAN,
|
|
252
|
+
feedback_type TEXT,
|
|
253
|
+
feedback TEXT,
|
|
254
|
+
created_at BIGINT,
|
|
255
|
+
PRIMARY KEY ((chatbot_id, user_id, sid), created_at)
|
|
256
|
+
) WITH CLUSTERING ORDER BY (created_at DESC)
|
|
257
|
+
AND default_time_to_live = 7776000;
|
|
258
|
+
|
|
259
|
+
"""
|
|
260
|
+
chatbot_id: uuid.UUID = Field(primary_key=True, required=False)
|
|
261
|
+
user_id: int = Field(required=False)
|
|
262
|
+
sid: uuid.UUID = Field(primary_key=True, required=False)
|
|
263
|
+
_at: str = Field(primary_key=True, required=False)
|
|
264
|
+
# feedback information:
|
|
265
|
+
rating: int = Field(required=False, default=0)
|
|
266
|
+
_like: bool = Field(required=False, default=False)
|
|
267
|
+
_dislike: bool = Field(required=False, default=False)
|
|
268
|
+
feedback_type: FeedbackType = Field(required=False)
|
|
269
|
+
feedback: str = Field(required=False)
|
|
270
|
+
created_at: int = Field(required=False, default=created_at)
|
|
271
|
+
expiration_timestamp: datetime = Field(required=False, default=datetime.now)
|
|
272
|
+
|
|
273
|
+
class Meta:
|
|
274
|
+
"""Meta Chatbot."""
|
|
275
|
+
driver = 'bigquery'
|
|
276
|
+
name = "chatbots_feedback"
|
|
277
|
+
schema = "navigator"
|
|
278
|
+
ttl = 7776000 # 3 months in seconds
|
|
279
|
+
strict = True
|
|
280
|
+
frozen = False
|
|
281
|
+
|
|
282
|
+
def __post_init__(self) -> None:
|
|
283
|
+
if not self._at:
|
|
284
|
+
# Generate a unique session id
|
|
285
|
+
if not self.created_at:
|
|
286
|
+
self.created_at = created_at()
|
|
287
|
+
self._at = f'{self.sid}:{self.created_at}'
|
|
288
|
+
super(ChatbotFeedback, self).__post_init__()
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
## Prompt Library:
|
|
292
|
+
|
|
293
|
+
class PromptCategory(Enum):
|
|
294
|
+
"""
|
|
295
|
+
Prompt Category.
|
|
296
|
+
|
|
297
|
+
Categorization of Prompts, as "tech", "tech-or-explain", "idea", "explain", "action", "command", "other".
|
|
298
|
+
"""
|
|
299
|
+
TECH = "tech"
|
|
300
|
+
TECH_OR_EXPLAIN = "tech-or-explain"
|
|
301
|
+
IDEA = "idea"
|
|
302
|
+
EXPLAIN = "explain"
|
|
303
|
+
ACTION = "action"
|
|
304
|
+
COMMAND = "command"
|
|
305
|
+
OTHER = "other"
|
|
306
|
+
|
|
307
|
+
class PromptLibrary(Model):
|
|
308
|
+
"""PromptLibrary.
|
|
309
|
+
|
|
310
|
+
Saving information about Prompt Library.
|
|
311
|
+
|
|
312
|
+
-- PostgreSQL CREATE TABLE Syntax --
|
|
313
|
+
CREATE TABLE IF NOT EXISTS navigator.prompt_library (
|
|
314
|
+
prompt_id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
315
|
+
chatbot_id UUID,
|
|
316
|
+
title varchar,
|
|
317
|
+
query varchar,
|
|
318
|
+
description TEXT,
|
|
319
|
+
prompt_category varchar,
|
|
320
|
+
prompt_tags varchar[],
|
|
321
|
+
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
|
|
322
|
+
created_by INTEGER,
|
|
323
|
+
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
|
|
324
|
+
);
|
|
325
|
+
"""
|
|
326
|
+
prompt_id: uuid.UUID = Field(primary_key=True, required=False, default_factory=uuid.uuid4)
|
|
327
|
+
chatbot_id: uuid.UUID = Field(required=True)
|
|
328
|
+
title: str = Field(required=True)
|
|
329
|
+
query: str = Field(required=True)
|
|
330
|
+
description: str = Field(required=False)
|
|
331
|
+
prompt_category: str = Field(required=False, default=PromptCategory.OTHER)
|
|
332
|
+
prompt_tags: list = Field(required=False, default_factory=list)
|
|
333
|
+
created_at: datetime = Field(required=False, default=datetime.now)
|
|
334
|
+
created_by: int = Field(required=False)
|
|
335
|
+
updated_at: datetime = Field(required=False, default=datetime.now)
|
|
336
|
+
|
|
337
|
+
class Meta:
|
|
338
|
+
"""Meta Prompt Library."""
|
|
339
|
+
driver = 'pg'
|
|
340
|
+
name = "prompt_library"
|
|
341
|
+
schema = "navigator"
|
|
342
|
+
strict = True
|
|
343
|
+
frozen = False
|
|
344
|
+
|
|
345
|
+
|
|
346
|
+
def __post_init__(self) -> None:
|
|
347
|
+
super(PromptLibrary, self).__post_init__()
|
parrot/py.typed
ADDED
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Union, Any
|
|
3
|
+
from collections.abc import Callable
|
|
4
|
+
import torch
|
|
5
|
+
from langchain_huggingface import (
|
|
6
|
+
HuggingFaceEmbeddings
|
|
7
|
+
)
|
|
8
|
+
from langchain_community.embeddings import (
|
|
9
|
+
HuggingFaceBgeEmbeddings
|
|
10
|
+
)
|
|
11
|
+
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
|
|
12
|
+
from navconfig.logging import logging
|
|
13
|
+
from ..conf import (
|
|
14
|
+
EMBEDDING_DEVICE,
|
|
15
|
+
EMBEDDING_DEFAULT_MODEL,
|
|
16
|
+
CUDA_DEFAULT_DEVICE,
|
|
17
|
+
MAX_BATCH_SIZE
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class AbstractStore(ABC):
|
|
22
|
+
"""AbstractStore class.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
embeddings (str): Embeddings.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, embeddings: Union[str, Callable] = None, **kwargs):
|
|
29
|
+
self.client: Callable = None
|
|
30
|
+
self.vector: Callable = None
|
|
31
|
+
self._embed_: Callable = None
|
|
32
|
+
self._connected: bool = False
|
|
33
|
+
self.use_bge: bool = kwargs.pop("use_bge", False)
|
|
34
|
+
self.fastembed: bool = kwargs.pop("use_fastembed", False)
|
|
35
|
+
self.embedding_name: str = EMBEDDING_DEFAULT_MODEL
|
|
36
|
+
self.dimension: int = kwargs.pop("dimension", 768)
|
|
37
|
+
self._metric_type: str = kwargs.pop("metric_type", 'COSINE')
|
|
38
|
+
self._index_type: str = kwargs.pop("index_type", 'IVF_FLAT')
|
|
39
|
+
self.database: str = kwargs.pop('database', '')
|
|
40
|
+
self.collection = kwargs.pop("collection_name", "my_collection")
|
|
41
|
+
self.index_name = kwargs.pop("index_name", "my_index")
|
|
42
|
+
if embeddings is not None:
|
|
43
|
+
if isinstance(embeddings, str):
|
|
44
|
+
self.embedding_name = embeddings
|
|
45
|
+
else:
|
|
46
|
+
self._embed_ = embeddings
|
|
47
|
+
self.logger = logging.getLogger(f"Store.{__name__}")
|
|
48
|
+
# client
|
|
49
|
+
self._client = None
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def connected(self) -> bool:
|
|
53
|
+
return self._connected
|
|
54
|
+
|
|
55
|
+
async def __aenter__(self):
|
|
56
|
+
try:
|
|
57
|
+
self.tensor = torch.randn(1000, 1000).cuda()
|
|
58
|
+
except RuntimeError:
|
|
59
|
+
self.tensor = None
|
|
60
|
+
if self._embed_ is None:
|
|
61
|
+
self._embed_ = self.create_embedding(
|
|
62
|
+
model_name=self.embedding_name
|
|
63
|
+
)
|
|
64
|
+
self._client, self.client_id = self.connect()
|
|
65
|
+
return self
|
|
66
|
+
|
|
67
|
+
@abstractmethod
|
|
68
|
+
def connect(self):
|
|
69
|
+
pass
|
|
70
|
+
|
|
71
|
+
def __enter__(self):
|
|
72
|
+
if self._embed_ is None:
|
|
73
|
+
self._embed_ = self.create_embedding(
|
|
74
|
+
model_name=self.embedding_name
|
|
75
|
+
)
|
|
76
|
+
return self
|
|
77
|
+
|
|
78
|
+
async def __aexit__(self, exc_type, exc_value, traceback):
|
|
79
|
+
# closing Embedding
|
|
80
|
+
self._embed_ = None
|
|
81
|
+
del self.tensor
|
|
82
|
+
try:
|
|
83
|
+
torch.cuda.empty_cache()
|
|
84
|
+
except RuntimeError:
|
|
85
|
+
pass
|
|
86
|
+
|
|
87
|
+
def __exit__(self, exc_type, exc_value, traceback):
|
|
88
|
+
# closing Embedding
|
|
89
|
+
self._embed_ = None
|
|
90
|
+
try:
|
|
91
|
+
torch.cuda.empty_cache()
|
|
92
|
+
except RuntimeError:
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
@abstractmethod
|
|
96
|
+
def get_vector(self):
|
|
97
|
+
pass
|
|
98
|
+
|
|
99
|
+
@abstractmethod
|
|
100
|
+
async def load_documents(
|
|
101
|
+
self,
|
|
102
|
+
documents: list,
|
|
103
|
+
collection: str = None
|
|
104
|
+
):
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
@abstractmethod
|
|
108
|
+
def upsert(self, payload: dict, collection_name: str = None) -> None:
|
|
109
|
+
pass
|
|
110
|
+
|
|
111
|
+
@abstractmethod
|
|
112
|
+
def search(self, payload: dict, collection_name: str = None) -> dict:
|
|
113
|
+
pass
|
|
114
|
+
|
|
115
|
+
@abstractmethod
|
|
116
|
+
async def delete_collection(self, collection_name: str = None) -> dict:
|
|
117
|
+
pass
|
|
118
|
+
|
|
119
|
+
@abstractmethod
|
|
120
|
+
async def create_collection(self, collection_name: str, document: Any) -> dict:
|
|
121
|
+
pass
|
|
122
|
+
|
|
123
|
+
def create_embedding(
|
|
124
|
+
self,
|
|
125
|
+
model_name: str = None
|
|
126
|
+
):
|
|
127
|
+
encode_kwargs: str = {
|
|
128
|
+
'normalize_embeddings': True,
|
|
129
|
+
"batch_size": MAX_BATCH_SIZE
|
|
130
|
+
}
|
|
131
|
+
if torch.backends.mps.is_available():
|
|
132
|
+
# Use CUDA Multi-Processing Service if available
|
|
133
|
+
device = torch.device("mps")
|
|
134
|
+
elif torch.cuda.is_available():
|
|
135
|
+
# Use CUDA GPU if available
|
|
136
|
+
device = torch.device(
|
|
137
|
+
f'cuda:{CUDA_DEFAULT_DEVICE}'
|
|
138
|
+
)
|
|
139
|
+
elif EMBEDDING_DEVICE == 'cuda':
|
|
140
|
+
device = torch.device(
|
|
141
|
+
f'cuda:{CUDA_DEFAULT_DEVICE}'
|
|
142
|
+
)
|
|
143
|
+
else:
|
|
144
|
+
device = torch.device(EMBEDDING_DEVICE)
|
|
145
|
+
model_kwargs: str = {'device': device}
|
|
146
|
+
if model_name is None:
|
|
147
|
+
model_name = EMBEDDING_DEFAULT_MODEL
|
|
148
|
+
if self.use_bge is True:
|
|
149
|
+
return HuggingFaceBgeEmbeddings(
|
|
150
|
+
model_name=model_name,
|
|
151
|
+
model_kwargs=model_kwargs,
|
|
152
|
+
encode_kwargs=encode_kwargs
|
|
153
|
+
)
|
|
154
|
+
if self.fastembed is True:
|
|
155
|
+
return FastEmbedEmbeddings(
|
|
156
|
+
model_name=model_name,
|
|
157
|
+
max_length=1024,
|
|
158
|
+
threads=4
|
|
159
|
+
)
|
|
160
|
+
return HuggingFaceEmbeddings(
|
|
161
|
+
model_name=model_name,
|
|
162
|
+
model_kwargs=model_kwargs,
|
|
163
|
+
encode_kwargs=encode_kwargs
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
def get_default_embedding(
|
|
167
|
+
self,
|
|
168
|
+
model_name: str = EMBEDDING_DEFAULT_MODEL
|
|
169
|
+
):
|
|
170
|
+
return self.create_embedding(model_name=model_name)
|