ai-parrot 0.3.4__cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ai-parrot might be problematic. Click here for more details.

Files changed (109) hide show
  1. ai_parrot-0.3.4.dist-info/LICENSE +21 -0
  2. ai_parrot-0.3.4.dist-info/METADATA +319 -0
  3. ai_parrot-0.3.4.dist-info/RECORD +109 -0
  4. ai_parrot-0.3.4.dist-info/WHEEL +6 -0
  5. ai_parrot-0.3.4.dist-info/top_level.txt +3 -0
  6. parrot/__init__.py +21 -0
  7. parrot/chatbots/__init__.py +7 -0
  8. parrot/chatbots/abstract.py +728 -0
  9. parrot/chatbots/asktroc.py +16 -0
  10. parrot/chatbots/base.py +366 -0
  11. parrot/chatbots/basic.py +9 -0
  12. parrot/chatbots/bose.py +17 -0
  13. parrot/chatbots/cody.py +17 -0
  14. parrot/chatbots/copilot.py +83 -0
  15. parrot/chatbots/dataframe.py +103 -0
  16. parrot/chatbots/hragents.py +15 -0
  17. parrot/chatbots/odoo.py +17 -0
  18. parrot/chatbots/retrievals/__init__.py +578 -0
  19. parrot/chatbots/retrievals/constitutional.py +19 -0
  20. parrot/conf.py +110 -0
  21. parrot/crew/__init__.py +3 -0
  22. parrot/crew/tools/__init__.py +22 -0
  23. parrot/crew/tools/bing.py +13 -0
  24. parrot/crew/tools/config.py +43 -0
  25. parrot/crew/tools/duckgo.py +62 -0
  26. parrot/crew/tools/file.py +24 -0
  27. parrot/crew/tools/google.py +168 -0
  28. parrot/crew/tools/gtrends.py +16 -0
  29. parrot/crew/tools/md2pdf.py +25 -0
  30. parrot/crew/tools/rag.py +42 -0
  31. parrot/crew/tools/search.py +32 -0
  32. parrot/crew/tools/url.py +21 -0
  33. parrot/exceptions.cpython-39-x86_64-linux-gnu.so +0 -0
  34. parrot/handlers/__init__.py +4 -0
  35. parrot/handlers/bots.py +196 -0
  36. parrot/handlers/chat.py +162 -0
  37. parrot/interfaces/__init__.py +6 -0
  38. parrot/interfaces/database.py +29 -0
  39. parrot/llms/__init__.py +137 -0
  40. parrot/llms/abstract.py +47 -0
  41. parrot/llms/anthropic.py +42 -0
  42. parrot/llms/google.py +42 -0
  43. parrot/llms/groq.py +45 -0
  44. parrot/llms/hf.py +45 -0
  45. parrot/llms/openai.py +59 -0
  46. parrot/llms/pipes.py +114 -0
  47. parrot/llms/vertex.py +78 -0
  48. parrot/loaders/__init__.py +20 -0
  49. parrot/loaders/abstract.py +456 -0
  50. parrot/loaders/audio.py +106 -0
  51. parrot/loaders/basepdf.py +102 -0
  52. parrot/loaders/basevideo.py +280 -0
  53. parrot/loaders/csv.py +42 -0
  54. parrot/loaders/dir.py +37 -0
  55. parrot/loaders/excel.py +349 -0
  56. parrot/loaders/github.py +65 -0
  57. parrot/loaders/handlers/__init__.py +5 -0
  58. parrot/loaders/handlers/data.py +213 -0
  59. parrot/loaders/image.py +119 -0
  60. parrot/loaders/json.py +52 -0
  61. parrot/loaders/pdf.py +437 -0
  62. parrot/loaders/pdfchapters.py +142 -0
  63. parrot/loaders/pdffn.py +112 -0
  64. parrot/loaders/pdfimages.py +207 -0
  65. parrot/loaders/pdfmark.py +88 -0
  66. parrot/loaders/pdftables.py +145 -0
  67. parrot/loaders/ppt.py +30 -0
  68. parrot/loaders/qa.py +81 -0
  69. parrot/loaders/repo.py +103 -0
  70. parrot/loaders/rtd.py +65 -0
  71. parrot/loaders/txt.py +92 -0
  72. parrot/loaders/utils/__init__.py +1 -0
  73. parrot/loaders/utils/models.py +25 -0
  74. parrot/loaders/video.py +96 -0
  75. parrot/loaders/videolocal.py +120 -0
  76. parrot/loaders/vimeo.py +106 -0
  77. parrot/loaders/web.py +216 -0
  78. parrot/loaders/web_base.py +112 -0
  79. parrot/loaders/word.py +125 -0
  80. parrot/loaders/youtube.py +192 -0
  81. parrot/manager.py +166 -0
  82. parrot/models.py +372 -0
  83. parrot/py.typed +0 -0
  84. parrot/stores/__init__.py +48 -0
  85. parrot/stores/abstract.py +171 -0
  86. parrot/stores/milvus.py +632 -0
  87. parrot/stores/qdrant.py +153 -0
  88. parrot/tools/__init__.py +12 -0
  89. parrot/tools/abstract.py +53 -0
  90. parrot/tools/asknews.py +32 -0
  91. parrot/tools/bing.py +13 -0
  92. parrot/tools/duck.py +62 -0
  93. parrot/tools/google.py +170 -0
  94. parrot/tools/stack.py +26 -0
  95. parrot/tools/weather.py +70 -0
  96. parrot/tools/wikipedia.py +59 -0
  97. parrot/tools/zipcode.py +179 -0
  98. parrot/utils/__init__.py +2 -0
  99. parrot/utils/parsers/__init__.py +5 -0
  100. parrot/utils/parsers/toml.cpython-39-x86_64-linux-gnu.so +0 -0
  101. parrot/utils/toml.py +11 -0
  102. parrot/utils/types.cpython-39-x86_64-linux-gnu.so +0 -0
  103. parrot/utils/uv.py +11 -0
  104. parrot/version.py +10 -0
  105. resources/users/__init__.py +5 -0
  106. resources/users/handlers.py +13 -0
  107. resources/users/models.py +205 -0
  108. settings/__init__.py +0 -0
  109. settings/settings.py +51 -0
@@ -0,0 +1,192 @@
1
+ from typing import Optional, Union
2
+ from pytube import YouTube
3
+ from youtube_transcript_api import NoTranscriptFound
4
+ import torch
5
+ from langchain.docstore.document import Document
6
+ from langchain_community.document_loaders.parsers.audio import (
7
+ OpenAIWhisperParserLocal
8
+ )
9
+ from langchain_community.document_loaders import YoutubeLoader as YTLoader
10
+ from langchain_community.document_loaders.generic import (
11
+ GenericLoader
12
+ )
13
+ from langchain_community.document_loaders.blob_loaders.youtube_audio import (
14
+ YoutubeAudioLoader
15
+ )
16
+ from .video import VideoLoader
17
+
18
+
19
+ def extract_video_id(url):
20
+ parts = url.split("?v=")
21
+ video_id = parts[1].split("&")[0]
22
+ return video_id
23
+
24
+
25
+ class YoutubeLoader(VideoLoader):
26
+ """
27
+ Loader for Youtube videos.
28
+ """
29
+ _extension = ['.youtube']
30
+ encoding = 'utf-8'
31
+ chunk_size = 768
32
+
33
+ def get_video_info(self, url: str) -> dict:
34
+ yt = YouTube(url)
35
+ return {
36
+ "url": url,
37
+ "video_id": yt.video_id,
38
+ "watch_url": yt.watch_url,
39
+ "embed_url": yt.embed_url,
40
+ "title": yt.title or "Unknown",
41
+ "description": yt.description or "Unknown",
42
+ "view_count": yt.views or 0,
43
+ #"thumbnail_url": yt.thumbnail_url or "Unknown",
44
+ "publish_date": yt.publish_date.strftime("%Y-%m-%d %H:%M:%S") if yt.publish_date else "Unknown",
45
+ # "length": yt.length or 0,
46
+ "author": yt.author or "Unknown",
47
+ }
48
+
49
+ def load_video(self, url: str, video_title: str, transcript: Optional[Union[str, None]] = None) -> list:
50
+ # first: load video metadata:
51
+ video_info = self.get_video_info(url)
52
+ # Second: load video transcript (if any)
53
+ if transcript is None:
54
+ try:
55
+ documents = []
56
+ docs = []
57
+ # first: download video
58
+ file_path = self.download_video(url, self._video_path)
59
+ audio_path = file_path.with_suffix('.mp3')
60
+ transcript_path = file_path.with_suffix('.vtt')
61
+ # second: extract audio
62
+ self.extract_audio(file_path, audio_path)
63
+ transcript_whisper = self.get_whisper_transcript(audio_path)
64
+ transcript = transcript_whisper['text']
65
+ # Summarize the transcript
66
+ try:
67
+ summary = self.get_summary_from_text(transcript)
68
+ except Exception:
69
+ summary = ''
70
+ # Create Two Documents, one is for transcript, second is VTT:
71
+ metadata = {
72
+ "url": f"{url}",
73
+ "source": f"{url}",
74
+ "index": video_title,
75
+ "filename": video_title,
76
+ "question": '',
77
+ "answer": '',
78
+ "source_type": self._source_type,
79
+ 'type': 'video_transcript',
80
+ "summary": f"{summary!s}",
81
+ "document_meta": {
82
+ "language": self._language,
83
+ "title": video_title,
84
+ "docinfo": video_info
85
+ }
86
+ }
87
+ if self.topics:
88
+ metadata["document_meta"]['topic_tags'] = self.topics
89
+ doc = Document(
90
+ page_content=transcript,
91
+ metadata=metadata
92
+ )
93
+ documents.append(doc)
94
+ # VTT version:
95
+ transcript = self.transcript_to_vtt(transcript_whisper, transcript_path)
96
+ if transcript:
97
+ doc = Document(
98
+ page_content=transcript,
99
+ metadata=metadata
100
+ )
101
+ documents.append(doc)
102
+ # Saving every dialog chunk as a separate document
103
+ dialogs = self.transcript_to_blocks(transcript_whisper)
104
+ for chunk in dialogs:
105
+ _meta = {
106
+ "index": f"{video_title}:{chunk['id']}",
107
+ "document_meta": {
108
+ "start": f"{chunk['start_time']}",
109
+ "end": f"{chunk['end_time']}",
110
+ "id": f"{chunk['id']}",
111
+ "language": self._language,
112
+ "title": video_title,
113
+ "topic_tags": ""
114
+ }
115
+ }
116
+ _info = {**metadata, **_meta}
117
+ doc = Document(
118
+ page_content=chunk['text'],
119
+ metadata=_info
120
+ )
121
+ docs.append(doc)
122
+ documents.extend(docs)
123
+ return documents
124
+ except Exception:
125
+ try:
126
+ loader = YTLoader.from_youtube_url(
127
+ url,
128
+ add_video_info=True,
129
+ language=[self._language],
130
+ )
131
+ docs = loader.load()
132
+ except NoTranscriptFound:
133
+ loader = GenericLoader(
134
+ YoutubeAudioLoader([url], str(self._video_path)),
135
+ OpenAIWhisperParserLocal(
136
+ # lang_model='openai/whisper-medium.en',
137
+ lang_model='openai/whisper-small.en',
138
+ device=self._get_device()
139
+ )
140
+ )
141
+ docs = loader.load()
142
+ if not docs:
143
+ self.logger.warning(
144
+ f"Unable to load Youtube Video {url}"
145
+ )
146
+ return []
147
+ summary = self.get_summary(
148
+ docs
149
+ )
150
+ for doc in docs:
151
+ doc.metadata['source_type'] = self._source_type
152
+ doc.metadata['summary'] = f"{summary!s}"
153
+ doc.metadata['index'] = ''
154
+ doc.metadata['filename'] = ''
155
+ doc.metadata['question'] = ''
156
+ doc.metadata['answer'] = ''
157
+ doc.metadata['type'] = 'video_transcript'
158
+ doc.metadata['document_meta'] = {}
159
+ if self.topics:
160
+ doc.metadata['document_meta']['topic_tags'] = self.topics
161
+ # add video metadata to document metadata:
162
+ for key, value in video_info.items():
163
+ doc.metadata['document_meta'][key] = f"{value!s}"
164
+ return docs
165
+ else:
166
+ with open(transcript, 'r') as f:
167
+ transcript = f.read()
168
+ if transcript:
169
+ summary = self.get_summary_from_text(transcript)
170
+ transcript_whisper = None
171
+ metadata = {
172
+ "source": url,
173
+ "url": url,
174
+ "index": '',
175
+ "filename": '',
176
+ "question": '',
177
+ "answer": '',
178
+ "source_type": self._source_type,
179
+ 'type': 'video_transcript',
180
+ 'summary': f"{summary!s}",
181
+ "document_meta": {
182
+ "language": self._language,
183
+ "title": video_title
184
+ },
185
+ }
186
+ if self.topics:
187
+ metadata['document_meta']['topic_tags'] = self.topics
188
+ doc = Document(
189
+ page_content=transcript,
190
+ metadata=metadata
191
+ )
192
+ return [doc]
parrot/manager.py ADDED
@@ -0,0 +1,166 @@
1
+ """
2
+ Chatbot Manager.
3
+
4
+ Tool for instanciate, managing and interacting with Chatbot through APIs.
5
+ """
6
+ from typing import Any, Dict, Type
7
+ from importlib import import_module
8
+ from aiohttp import web
9
+ from navconfig.logging import logging
10
+ from .chatbots import (
11
+ AbstractChatbot,
12
+ Chatbot
13
+ )
14
+ from .handlers.chat import ChatHandler # , BotHandler
15
+ from .handlers import ChatbotHandler
16
+ from .models import ChatbotModel
17
+ # Manual Load of Copilot Agent:
18
+ from parrot.chatbots.copilot import CopilotAgent
19
+
20
+ class ChatbotManager:
21
+ """ChatbotManager.
22
+
23
+ Manage chatbots and interact with them through via aiohttp App.
24
+
25
+ """
26
+ app: web.Application = None
27
+ chatbots: Dict[str, AbstractChatbot] = {}
28
+
29
+ def __init__(self) -> None:
30
+ self.app = None
31
+ self.chatbots = {}
32
+ self.logger = logging.getLogger(name='Parrot.Manager')
33
+
34
+ def get_chatbot_class(self, class_name: str) -> Type[AbstractChatbot]:
35
+ """
36
+ Dynamically import a chatbot class based on the class name from the relative module '.chatbots'.
37
+ Args:
38
+ class_name (str): The name of the chatbot class to be imported.
39
+ Returns:
40
+ Type[AbstractChatbot]: A chatbot class derived from AbstractChatbot.
41
+ """
42
+ module = import_module('.chatbots', __package__)
43
+ try:
44
+ return getattr(module, class_name)
45
+ except AttributeError:
46
+ raise ImportError(f"No class named '{class_name}' found in the module 'chatbots'.")
47
+
48
+ async def load_bots(self, app: web.Application) -> None:
49
+ """Load all chatbots from DB."""
50
+ self.logger.info("Loading chatbots from DB...")
51
+ db = app['database']
52
+ async with await db.acquire() as conn:
53
+ ChatbotModel.Meta.connection = conn
54
+ bots = await ChatbotModel.all()
55
+ for bot in bots:
56
+ if bot.bot_type == 'chatbot':
57
+ self.logger.notice(
58
+ f"Loading chatbot '{bot.name}'..."
59
+ )
60
+ cls_name = bot.custom_class
61
+ if cls_name is None:
62
+ class_name = Chatbot
63
+ else:
64
+ class_name = self.get_chatbot_class(cls_name)
65
+ chatbot = class_name(
66
+ chatbot_id=bot.chatbot_id,
67
+ name=bot.name
68
+ )
69
+ try:
70
+ await chatbot.configure()
71
+ except Exception as e:
72
+ self.logger.error(
73
+ f"Failed to configure chatbot '{chatbot.name}': {e}"
74
+ )
75
+ elif bot.bot_type == 'agent':
76
+ self.logger.notice(
77
+ f"Loading Agent '{bot.name}'..."
78
+ )
79
+ # TODO: extract the list of tools from Agent config
80
+ try:
81
+ tools = CopilotAgent.default_tools()
82
+ chatbot = CopilotAgent(
83
+ name=bot.name,
84
+ llm=bot.llm,
85
+ tools=tools
86
+ )
87
+ except Exception as e:
88
+ print('AQUI >>> ', e)
89
+ self.logger.error(
90
+ f"Failed to configure Agent '{bot.name}': {e}"
91
+ )
92
+ self.add_chatbot(chatbot)
93
+ self.logger.info(
94
+ ":: Chatbots loaded successfully."
95
+ )
96
+
97
+ def create_chatbot(self, class_name: Any = None, name: str = None, **kwargs) -> AbstractChatbot:
98
+ """Create a chatbot and add it to the manager."""
99
+ if class_name is None:
100
+ class_name = Chatbot
101
+ chatbot = class_name(**kwargs)
102
+ chatbot.name = name
103
+ self.add_chatbot(chatbot)
104
+ if 'llm' in kwargs:
105
+ llm = kwargs['llm']
106
+ llm_name = llm.pop('name')
107
+ model = llm.pop('model')
108
+ llm = chatbot.load_llm(
109
+ llm_name, model=model, **llm
110
+ )
111
+ chatbot.llm = llm
112
+ return chatbot
113
+
114
+ def add_chatbot(self, chatbot: AbstractChatbot) -> None:
115
+ """Add a chatbot to the manager."""
116
+ self.chatbots[chatbot.name] = chatbot
117
+
118
+ def get_chatbot(self, name: str) -> AbstractChatbot:
119
+ """Get a chatbot by name."""
120
+ return self.chatbots.get(name)
121
+
122
+ def remove_chatbot(self, name: str) -> None:
123
+ """Remove a chatbot by name."""
124
+ del self.chatbots[name]
125
+
126
+ def get_chatbots(self) -> Dict[str, AbstractChatbot]:
127
+ """Get all chatbots."""
128
+ return self.chatbots
129
+
130
+ def get_app(self) -> web.Application:
131
+ """Get the app."""
132
+ if self.app is None:
133
+ raise RuntimeError("App is not set.")
134
+ return self.app
135
+
136
+ def setup(self, app: web.Application) -> web.Application:
137
+ if isinstance(app, web.Application):
138
+ self.app = app # register the app into the Extension
139
+ else:
140
+ self.app = app.get_app() # Nav Application
141
+ # register signals for startup and shutdown
142
+ self.app.on_startup.append(self.on_startup)
143
+ self.app.on_shutdown.append(self.on_shutdown)
144
+ # Add Manager to main Application:
145
+ self.app['chatbot_manager'] = self
146
+ ## Configure Routes
147
+ router = self.app.router
148
+ router.add_view(
149
+ '/api/v1/chat',
150
+ ChatHandler
151
+ )
152
+ router.add_view(
153
+ '/api/v1/chat/{chatbot_name}',
154
+ ChatHandler
155
+ )
156
+ ChatbotHandler.configure(self.app, '/api/v1/bots')
157
+ return self.app
158
+
159
+ async def on_startup(self, app: web.Application) -> None:
160
+ """On startup."""
161
+ # configure all pre-configured chatbots:
162
+ await self.load_bots(app)
163
+
164
+ async def on_shutdown(self, app: web.Application) -> None:
165
+ """On shutdown."""
166
+ pass