ai-parrot 0.3.4__cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ai-parrot might be problematic. Click here for more details.
- ai_parrot-0.3.4.dist-info/LICENSE +21 -0
- ai_parrot-0.3.4.dist-info/METADATA +319 -0
- ai_parrot-0.3.4.dist-info/RECORD +109 -0
- ai_parrot-0.3.4.dist-info/WHEEL +6 -0
- ai_parrot-0.3.4.dist-info/top_level.txt +3 -0
- parrot/__init__.py +21 -0
- parrot/chatbots/__init__.py +7 -0
- parrot/chatbots/abstract.py +728 -0
- parrot/chatbots/asktroc.py +16 -0
- parrot/chatbots/base.py +366 -0
- parrot/chatbots/basic.py +9 -0
- parrot/chatbots/bose.py +17 -0
- parrot/chatbots/cody.py +17 -0
- parrot/chatbots/copilot.py +83 -0
- parrot/chatbots/dataframe.py +103 -0
- parrot/chatbots/hragents.py +15 -0
- parrot/chatbots/odoo.py +17 -0
- parrot/chatbots/retrievals/__init__.py +578 -0
- parrot/chatbots/retrievals/constitutional.py +19 -0
- parrot/conf.py +110 -0
- parrot/crew/__init__.py +3 -0
- parrot/crew/tools/__init__.py +22 -0
- parrot/crew/tools/bing.py +13 -0
- parrot/crew/tools/config.py +43 -0
- parrot/crew/tools/duckgo.py +62 -0
- parrot/crew/tools/file.py +24 -0
- parrot/crew/tools/google.py +168 -0
- parrot/crew/tools/gtrends.py +16 -0
- parrot/crew/tools/md2pdf.py +25 -0
- parrot/crew/tools/rag.py +42 -0
- parrot/crew/tools/search.py +32 -0
- parrot/crew/tools/url.py +21 -0
- parrot/exceptions.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/handlers/__init__.py +4 -0
- parrot/handlers/bots.py +196 -0
- parrot/handlers/chat.py +162 -0
- parrot/interfaces/__init__.py +6 -0
- parrot/interfaces/database.py +29 -0
- parrot/llms/__init__.py +137 -0
- parrot/llms/abstract.py +47 -0
- parrot/llms/anthropic.py +42 -0
- parrot/llms/google.py +42 -0
- parrot/llms/groq.py +45 -0
- parrot/llms/hf.py +45 -0
- parrot/llms/openai.py +59 -0
- parrot/llms/pipes.py +114 -0
- parrot/llms/vertex.py +78 -0
- parrot/loaders/__init__.py +20 -0
- parrot/loaders/abstract.py +456 -0
- parrot/loaders/audio.py +106 -0
- parrot/loaders/basepdf.py +102 -0
- parrot/loaders/basevideo.py +280 -0
- parrot/loaders/csv.py +42 -0
- parrot/loaders/dir.py +37 -0
- parrot/loaders/excel.py +349 -0
- parrot/loaders/github.py +65 -0
- parrot/loaders/handlers/__init__.py +5 -0
- parrot/loaders/handlers/data.py +213 -0
- parrot/loaders/image.py +119 -0
- parrot/loaders/json.py +52 -0
- parrot/loaders/pdf.py +437 -0
- parrot/loaders/pdfchapters.py +142 -0
- parrot/loaders/pdffn.py +112 -0
- parrot/loaders/pdfimages.py +207 -0
- parrot/loaders/pdfmark.py +88 -0
- parrot/loaders/pdftables.py +145 -0
- parrot/loaders/ppt.py +30 -0
- parrot/loaders/qa.py +81 -0
- parrot/loaders/repo.py +103 -0
- parrot/loaders/rtd.py +65 -0
- parrot/loaders/txt.py +92 -0
- parrot/loaders/utils/__init__.py +1 -0
- parrot/loaders/utils/models.py +25 -0
- parrot/loaders/video.py +96 -0
- parrot/loaders/videolocal.py +120 -0
- parrot/loaders/vimeo.py +106 -0
- parrot/loaders/web.py +216 -0
- parrot/loaders/web_base.py +112 -0
- parrot/loaders/word.py +125 -0
- parrot/loaders/youtube.py +192 -0
- parrot/manager.py +166 -0
- parrot/models.py +372 -0
- parrot/py.typed +0 -0
- parrot/stores/__init__.py +48 -0
- parrot/stores/abstract.py +171 -0
- parrot/stores/milvus.py +632 -0
- parrot/stores/qdrant.py +153 -0
- parrot/tools/__init__.py +12 -0
- parrot/tools/abstract.py +53 -0
- parrot/tools/asknews.py +32 -0
- parrot/tools/bing.py +13 -0
- parrot/tools/duck.py +62 -0
- parrot/tools/google.py +170 -0
- parrot/tools/stack.py +26 -0
- parrot/tools/weather.py +70 -0
- parrot/tools/wikipedia.py +59 -0
- parrot/tools/zipcode.py +179 -0
- parrot/utils/__init__.py +2 -0
- parrot/utils/parsers/__init__.py +5 -0
- parrot/utils/parsers/toml.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/toml.py +11 -0
- parrot/utils/types.cpython-310-x86_64-linux-gnu.so +0 -0
- parrot/utils/uv.py +11 -0
- parrot/version.py +10 -0
- resources/users/__init__.py +5 -0
- resources/users/handlers.py +13 -0
- resources/users/models.py +205 -0
- settings/__init__.py +0 -0
- settings/settings.py +51 -0
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
from typing import Optional, Union
|
|
2
|
+
from pytube import YouTube
|
|
3
|
+
from youtube_transcript_api import NoTranscriptFound
|
|
4
|
+
import torch
|
|
5
|
+
from langchain.docstore.document import Document
|
|
6
|
+
from langchain_community.document_loaders.parsers.audio import (
|
|
7
|
+
OpenAIWhisperParserLocal
|
|
8
|
+
)
|
|
9
|
+
from langchain_community.document_loaders import YoutubeLoader as YTLoader
|
|
10
|
+
from langchain_community.document_loaders.generic import (
|
|
11
|
+
GenericLoader
|
|
12
|
+
)
|
|
13
|
+
from langchain_community.document_loaders.blob_loaders.youtube_audio import (
|
|
14
|
+
YoutubeAudioLoader
|
|
15
|
+
)
|
|
16
|
+
from .video import VideoLoader
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def extract_video_id(url):
|
|
20
|
+
parts = url.split("?v=")
|
|
21
|
+
video_id = parts[1].split("&")[0]
|
|
22
|
+
return video_id
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class YoutubeLoader(VideoLoader):
|
|
26
|
+
"""
|
|
27
|
+
Loader for Youtube videos.
|
|
28
|
+
"""
|
|
29
|
+
_extension = ['.youtube']
|
|
30
|
+
encoding = 'utf-8'
|
|
31
|
+
chunk_size = 768
|
|
32
|
+
|
|
33
|
+
def get_video_info(self, url: str) -> dict:
|
|
34
|
+
yt = YouTube(url)
|
|
35
|
+
return {
|
|
36
|
+
"url": url,
|
|
37
|
+
"video_id": yt.video_id,
|
|
38
|
+
"watch_url": yt.watch_url,
|
|
39
|
+
"embed_url": yt.embed_url,
|
|
40
|
+
"title": yt.title or "Unknown",
|
|
41
|
+
"description": yt.description or "Unknown",
|
|
42
|
+
"view_count": yt.views or 0,
|
|
43
|
+
#"thumbnail_url": yt.thumbnail_url or "Unknown",
|
|
44
|
+
"publish_date": yt.publish_date.strftime("%Y-%m-%d %H:%M:%S") if yt.publish_date else "Unknown",
|
|
45
|
+
# "length": yt.length or 0,
|
|
46
|
+
"author": yt.author or "Unknown",
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
def load_video(self, url: str, video_title: str, transcript: Optional[Union[str, None]] = None) -> list:
|
|
50
|
+
# first: load video metadata:
|
|
51
|
+
video_info = self.get_video_info(url)
|
|
52
|
+
# Second: load video transcript (if any)
|
|
53
|
+
if transcript is None:
|
|
54
|
+
try:
|
|
55
|
+
documents = []
|
|
56
|
+
docs = []
|
|
57
|
+
# first: download video
|
|
58
|
+
file_path = self.download_video(url, self._video_path)
|
|
59
|
+
audio_path = file_path.with_suffix('.mp3')
|
|
60
|
+
transcript_path = file_path.with_suffix('.vtt')
|
|
61
|
+
# second: extract audio
|
|
62
|
+
self.extract_audio(file_path, audio_path)
|
|
63
|
+
transcript_whisper = self.get_whisper_transcript(audio_path)
|
|
64
|
+
transcript = transcript_whisper['text']
|
|
65
|
+
# Summarize the transcript
|
|
66
|
+
try:
|
|
67
|
+
summary = self.get_summary_from_text(transcript)
|
|
68
|
+
except Exception:
|
|
69
|
+
summary = ''
|
|
70
|
+
# Create Two Documents, one is for transcript, second is VTT:
|
|
71
|
+
metadata = {
|
|
72
|
+
"url": f"{url}",
|
|
73
|
+
"source": f"{url}",
|
|
74
|
+
"index": video_title,
|
|
75
|
+
"filename": video_title,
|
|
76
|
+
"question": '',
|
|
77
|
+
"answer": '',
|
|
78
|
+
"source_type": self._source_type,
|
|
79
|
+
'type': 'video_transcript',
|
|
80
|
+
"summary": f"{summary!s}",
|
|
81
|
+
"document_meta": {
|
|
82
|
+
"language": self._language,
|
|
83
|
+
"title": video_title,
|
|
84
|
+
"docinfo": video_info
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
if self.topics:
|
|
88
|
+
metadata["document_meta"]['topic_tags'] = self.topics
|
|
89
|
+
doc = Document(
|
|
90
|
+
page_content=transcript,
|
|
91
|
+
metadata=metadata
|
|
92
|
+
)
|
|
93
|
+
documents.append(doc)
|
|
94
|
+
# VTT version:
|
|
95
|
+
transcript = self.transcript_to_vtt(transcript_whisper, transcript_path)
|
|
96
|
+
if transcript:
|
|
97
|
+
doc = Document(
|
|
98
|
+
page_content=transcript,
|
|
99
|
+
metadata=metadata
|
|
100
|
+
)
|
|
101
|
+
documents.append(doc)
|
|
102
|
+
# Saving every dialog chunk as a separate document
|
|
103
|
+
dialogs = self.transcript_to_blocks(transcript_whisper)
|
|
104
|
+
for chunk in dialogs:
|
|
105
|
+
_meta = {
|
|
106
|
+
"index": f"{video_title}:{chunk['id']}",
|
|
107
|
+
"document_meta": {
|
|
108
|
+
"start": f"{chunk['start_time']}",
|
|
109
|
+
"end": f"{chunk['end_time']}",
|
|
110
|
+
"id": f"{chunk['id']}",
|
|
111
|
+
"language": self._language,
|
|
112
|
+
"title": video_title,
|
|
113
|
+
"topic_tags": ""
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
_info = {**metadata, **_meta}
|
|
117
|
+
doc = Document(
|
|
118
|
+
page_content=chunk['text'],
|
|
119
|
+
metadata=_info
|
|
120
|
+
)
|
|
121
|
+
docs.append(doc)
|
|
122
|
+
documents.extend(docs)
|
|
123
|
+
return documents
|
|
124
|
+
except Exception:
|
|
125
|
+
try:
|
|
126
|
+
loader = YTLoader.from_youtube_url(
|
|
127
|
+
url,
|
|
128
|
+
add_video_info=True,
|
|
129
|
+
language=[self._language],
|
|
130
|
+
)
|
|
131
|
+
docs = loader.load()
|
|
132
|
+
except NoTranscriptFound:
|
|
133
|
+
loader = GenericLoader(
|
|
134
|
+
YoutubeAudioLoader([url], str(self._video_path)),
|
|
135
|
+
OpenAIWhisperParserLocal(
|
|
136
|
+
# lang_model='openai/whisper-medium.en',
|
|
137
|
+
lang_model='openai/whisper-small.en',
|
|
138
|
+
device=self._get_device()
|
|
139
|
+
)
|
|
140
|
+
)
|
|
141
|
+
docs = loader.load()
|
|
142
|
+
if not docs:
|
|
143
|
+
self.logger.warning(
|
|
144
|
+
f"Unable to load Youtube Video {url}"
|
|
145
|
+
)
|
|
146
|
+
return []
|
|
147
|
+
summary = self.get_summary(
|
|
148
|
+
docs
|
|
149
|
+
)
|
|
150
|
+
for doc in docs:
|
|
151
|
+
doc.metadata['source_type'] = self._source_type
|
|
152
|
+
doc.metadata['summary'] = f"{summary!s}"
|
|
153
|
+
doc.metadata['index'] = ''
|
|
154
|
+
doc.metadata['filename'] = ''
|
|
155
|
+
doc.metadata['question'] = ''
|
|
156
|
+
doc.metadata['answer'] = ''
|
|
157
|
+
doc.metadata['type'] = 'video_transcript'
|
|
158
|
+
doc.metadata['document_meta'] = {}
|
|
159
|
+
if self.topics:
|
|
160
|
+
doc.metadata['document_meta']['topic_tags'] = self.topics
|
|
161
|
+
# add video metadata to document metadata:
|
|
162
|
+
for key, value in video_info.items():
|
|
163
|
+
doc.metadata['document_meta'][key] = f"{value!s}"
|
|
164
|
+
return docs
|
|
165
|
+
else:
|
|
166
|
+
with open(transcript, 'r') as f:
|
|
167
|
+
transcript = f.read()
|
|
168
|
+
if transcript:
|
|
169
|
+
summary = self.get_summary_from_text(transcript)
|
|
170
|
+
transcript_whisper = None
|
|
171
|
+
metadata = {
|
|
172
|
+
"source": url,
|
|
173
|
+
"url": url,
|
|
174
|
+
"index": '',
|
|
175
|
+
"filename": '',
|
|
176
|
+
"question": '',
|
|
177
|
+
"answer": '',
|
|
178
|
+
"source_type": self._source_type,
|
|
179
|
+
'type': 'video_transcript',
|
|
180
|
+
'summary': f"{summary!s}",
|
|
181
|
+
"document_meta": {
|
|
182
|
+
"language": self._language,
|
|
183
|
+
"title": video_title
|
|
184
|
+
},
|
|
185
|
+
}
|
|
186
|
+
if self.topics:
|
|
187
|
+
metadata['document_meta']['topic_tags'] = self.topics
|
|
188
|
+
doc = Document(
|
|
189
|
+
page_content=transcript,
|
|
190
|
+
metadata=metadata
|
|
191
|
+
)
|
|
192
|
+
return [doc]
|
parrot/manager.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Chatbot Manager.
|
|
3
|
+
|
|
4
|
+
Tool for instanciate, managing and interacting with Chatbot through APIs.
|
|
5
|
+
"""
|
|
6
|
+
from typing import Any, Dict, Type
|
|
7
|
+
from importlib import import_module
|
|
8
|
+
from aiohttp import web
|
|
9
|
+
from navconfig.logging import logging
|
|
10
|
+
from .chatbots import (
|
|
11
|
+
AbstractChatbot,
|
|
12
|
+
Chatbot
|
|
13
|
+
)
|
|
14
|
+
from .handlers.chat import ChatHandler # , BotHandler
|
|
15
|
+
from .handlers import ChatbotHandler
|
|
16
|
+
from .models import ChatbotModel
|
|
17
|
+
# Manual Load of Copilot Agent:
|
|
18
|
+
from parrot.chatbots.copilot import CopilotAgent
|
|
19
|
+
|
|
20
|
+
class ChatbotManager:
|
|
21
|
+
"""ChatbotManager.
|
|
22
|
+
|
|
23
|
+
Manage chatbots and interact with them through via aiohttp App.
|
|
24
|
+
|
|
25
|
+
"""
|
|
26
|
+
app: web.Application = None
|
|
27
|
+
chatbots: Dict[str, AbstractChatbot] = {}
|
|
28
|
+
|
|
29
|
+
def __init__(self) -> None:
|
|
30
|
+
self.app = None
|
|
31
|
+
self.chatbots = {}
|
|
32
|
+
self.logger = logging.getLogger(name='Parrot.Manager')
|
|
33
|
+
|
|
34
|
+
def get_chatbot_class(self, class_name: str) -> Type[AbstractChatbot]:
|
|
35
|
+
"""
|
|
36
|
+
Dynamically import a chatbot class based on the class name from the relative module '.chatbots'.
|
|
37
|
+
Args:
|
|
38
|
+
class_name (str): The name of the chatbot class to be imported.
|
|
39
|
+
Returns:
|
|
40
|
+
Type[AbstractChatbot]: A chatbot class derived from AbstractChatbot.
|
|
41
|
+
"""
|
|
42
|
+
module = import_module('.chatbots', __package__)
|
|
43
|
+
try:
|
|
44
|
+
return getattr(module, class_name)
|
|
45
|
+
except AttributeError:
|
|
46
|
+
raise ImportError(f"No class named '{class_name}' found in the module 'chatbots'.")
|
|
47
|
+
|
|
48
|
+
async def load_bots(self, app: web.Application) -> None:
|
|
49
|
+
"""Load all chatbots from DB."""
|
|
50
|
+
self.logger.info("Loading chatbots from DB...")
|
|
51
|
+
db = app['database']
|
|
52
|
+
async with await db.acquire() as conn:
|
|
53
|
+
ChatbotModel.Meta.connection = conn
|
|
54
|
+
bots = await ChatbotModel.all()
|
|
55
|
+
for bot in bots:
|
|
56
|
+
if bot.bot_type == 'chatbot':
|
|
57
|
+
self.logger.notice(
|
|
58
|
+
f"Loading chatbot '{bot.name}'..."
|
|
59
|
+
)
|
|
60
|
+
cls_name = bot.custom_class
|
|
61
|
+
if cls_name is None:
|
|
62
|
+
class_name = Chatbot
|
|
63
|
+
else:
|
|
64
|
+
class_name = self.get_chatbot_class(cls_name)
|
|
65
|
+
chatbot = class_name(
|
|
66
|
+
chatbot_id=bot.chatbot_id,
|
|
67
|
+
name=bot.name
|
|
68
|
+
)
|
|
69
|
+
try:
|
|
70
|
+
await chatbot.configure()
|
|
71
|
+
except Exception as e:
|
|
72
|
+
self.logger.error(
|
|
73
|
+
f"Failed to configure chatbot '{chatbot.name}': {e}"
|
|
74
|
+
)
|
|
75
|
+
elif bot.bot_type == 'agent':
|
|
76
|
+
self.logger.notice(
|
|
77
|
+
f"Loading Agent '{bot.name}'..."
|
|
78
|
+
)
|
|
79
|
+
# TODO: extract the list of tools from Agent config
|
|
80
|
+
try:
|
|
81
|
+
tools = CopilotAgent.default_tools()
|
|
82
|
+
chatbot = CopilotAgent(
|
|
83
|
+
name=bot.name,
|
|
84
|
+
llm=bot.llm,
|
|
85
|
+
tools=tools
|
|
86
|
+
)
|
|
87
|
+
except Exception as e:
|
|
88
|
+
print('AQUI >>> ', e)
|
|
89
|
+
self.logger.error(
|
|
90
|
+
f"Failed to configure Agent '{bot.name}': {e}"
|
|
91
|
+
)
|
|
92
|
+
self.add_chatbot(chatbot)
|
|
93
|
+
self.logger.info(
|
|
94
|
+
":: Chatbots loaded successfully."
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
def create_chatbot(self, class_name: Any = None, name: str = None, **kwargs) -> AbstractChatbot:
|
|
98
|
+
"""Create a chatbot and add it to the manager."""
|
|
99
|
+
if class_name is None:
|
|
100
|
+
class_name = Chatbot
|
|
101
|
+
chatbot = class_name(**kwargs)
|
|
102
|
+
chatbot.name = name
|
|
103
|
+
self.add_chatbot(chatbot)
|
|
104
|
+
if 'llm' in kwargs:
|
|
105
|
+
llm = kwargs['llm']
|
|
106
|
+
llm_name = llm.pop('name')
|
|
107
|
+
model = llm.pop('model')
|
|
108
|
+
llm = chatbot.load_llm(
|
|
109
|
+
llm_name, model=model, **llm
|
|
110
|
+
)
|
|
111
|
+
chatbot.llm = llm
|
|
112
|
+
return chatbot
|
|
113
|
+
|
|
114
|
+
def add_chatbot(self, chatbot: AbstractChatbot) -> None:
|
|
115
|
+
"""Add a chatbot to the manager."""
|
|
116
|
+
self.chatbots[chatbot.name] = chatbot
|
|
117
|
+
|
|
118
|
+
def get_chatbot(self, name: str) -> AbstractChatbot:
|
|
119
|
+
"""Get a chatbot by name."""
|
|
120
|
+
return self.chatbots.get(name)
|
|
121
|
+
|
|
122
|
+
def remove_chatbot(self, name: str) -> None:
|
|
123
|
+
"""Remove a chatbot by name."""
|
|
124
|
+
del self.chatbots[name]
|
|
125
|
+
|
|
126
|
+
def get_chatbots(self) -> Dict[str, AbstractChatbot]:
|
|
127
|
+
"""Get all chatbots."""
|
|
128
|
+
return self.chatbots
|
|
129
|
+
|
|
130
|
+
def get_app(self) -> web.Application:
|
|
131
|
+
"""Get the app."""
|
|
132
|
+
if self.app is None:
|
|
133
|
+
raise RuntimeError("App is not set.")
|
|
134
|
+
return self.app
|
|
135
|
+
|
|
136
|
+
def setup(self, app: web.Application) -> web.Application:
|
|
137
|
+
if isinstance(app, web.Application):
|
|
138
|
+
self.app = app # register the app into the Extension
|
|
139
|
+
else:
|
|
140
|
+
self.app = app.get_app() # Nav Application
|
|
141
|
+
# register signals for startup and shutdown
|
|
142
|
+
self.app.on_startup.append(self.on_startup)
|
|
143
|
+
self.app.on_shutdown.append(self.on_shutdown)
|
|
144
|
+
# Add Manager to main Application:
|
|
145
|
+
self.app['chatbot_manager'] = self
|
|
146
|
+
## Configure Routes
|
|
147
|
+
router = self.app.router
|
|
148
|
+
router.add_view(
|
|
149
|
+
'/api/v1/chat',
|
|
150
|
+
ChatHandler
|
|
151
|
+
)
|
|
152
|
+
router.add_view(
|
|
153
|
+
'/api/v1/chat/{chatbot_name}',
|
|
154
|
+
ChatHandler
|
|
155
|
+
)
|
|
156
|
+
ChatbotHandler.configure(self.app, '/api/v1/bots')
|
|
157
|
+
return self.app
|
|
158
|
+
|
|
159
|
+
async def on_startup(self, app: web.Application) -> None:
|
|
160
|
+
"""On startup."""
|
|
161
|
+
# configure all pre-configured chatbots:
|
|
162
|
+
await self.load_bots(app)
|
|
163
|
+
|
|
164
|
+
async def on_shutdown(self, app: web.Application) -> None:
|
|
165
|
+
"""On shutdown."""
|
|
166
|
+
pass
|