PyPI - ai-parrot - Versions diffs - 0.3.11__cp311-cp311-manylinux_2_28_x86_64.whl → 0.3.16__cp311-cp311-manylinux_2_28_x86_64.whl - Mend

ai-parrot 0.3.11__cp311-cp311-manylinux_2_28_x86_64.whl → 0.3.16__cp311-cp311-manylinux_2_28_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ai-parrot might be problematic. Click here for more details.

Files changed (16) hide show

{ai_parrot-0.3.11.dist-info → ai_parrot-0.3.16.dist-info}/METADATA +17 -16
{ai_parrot-0.3.11.dist-info → ai_parrot-0.3.16.dist-info}/RECORD +16 -15
parrot/chatbots/base.py +3 -0
parrot/chatbots/copilot.py +36 -3
parrot/conf.py +2 -0
parrot/loaders/__init__.py +0 -20
parrot/loaders/abstract.py +44 -1
parrot/loaders/basevideo.py +48 -3
parrot/loaders/video.py +1 -1
parrot/loaders/videolocal.py +149 -32
parrot/loaders/youtube.py +50 -1
parrot/tools/execute.py +56 -0
parrot/version.py +1 -1
{ai_parrot-0.3.11.dist-info → ai_parrot-0.3.16.dist-info}/LICENSE +0 -0
{ai_parrot-0.3.11.dist-info → ai_parrot-0.3.16.dist-info}/WHEEL +0 -0
{ai_parrot-0.3.11.dist-info → ai_parrot-0.3.16.dist-info}/top_level.txt +0 -0

{ai_parrot-0.3.11.dist-info → ai_parrot-0.3.16.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ai-parrot
-Version: 0.3.11
+Version: 0.3.16
 Summary: Live Chatbots based on Langchain chatbots and Agents     Integrated into Navigator Framework or used into aiohttp applications.
 Home-page: https://github.com/phenobarbital/ai-parrot
 Author: Jesus Lara
@@ -88,6 +88,22 @@ Requires-Dist: streamlit==1.37.1; extra == "analytics"
 Provides-Extra: anthropic
 Requires-Dist: langchain-anthropic==0.1.11; extra == "anthropic"
 Requires-Dist: anthropic==0.25.2; extra == "anthropic"
+Provides-Extra: basic_loaders
+Requires-Dist: youtube-transcript-api==0.6.2; extra == "basic-loaders"
+Requires-Dist: pymupdf==1.24.4; extra == "basic-loaders"
+Requires-Dist: pymupdf4llm==0.0.1; extra == "basic-loaders"
+Requires-Dist: pdf4llm==0.0.6; extra == "basic-loaders"
+Requires-Dist: pytube==15.0.0; extra == "basic-loaders"
+Requires-Dist: pydub==0.25.1; extra == "basic-loaders"
+Requires-Dist: markdownify==0.12.1; extra == "basic-loaders"
+Requires-Dist: yt-dlp==2024.4.9; extra == "basic-loaders"
+Requires-Dist: moviepy==1.0.3; extra == "basic-loaders"
+Requires-Dist: rapidocr-onnxruntime==1.3.15; extra == "basic-loaders"
+Requires-Dist: pytesseract==0.3.10; extra == "basic-loaders"
+Requires-Dist: python-docx==1.1.0; extra == "basic-loaders"
+Requires-Dist: python-pptx==0.6.23; extra == "basic-loaders"
+Requires-Dist: docx2txt==0.8; extra == "basic-loaders"
+Requires-Dist: mammoth==1.7.1; extra == "basic-loaders"
 Provides-Extra: crew
 Requires-Dist: colbert-ai==0.2.19; extra == "crew"
 Requires-Dist: vanna==0.3.4; extra == "crew"
@@ -104,26 +120,11 @@ Requires-Dist: llama-index-llms-huggingface==0.2.7; extra == "hunggingfaces"
 Provides-Extra: loaders
 Requires-Dist: unstructured==0.14.3; extra == "loaders"
 Requires-Dist: unstructured-client==0.18.0; extra == "loaders"
-Requires-Dist: youtube-transcript-api==0.6.2; extra == "loaders"
-Requires-Dist: pymupdf==1.24.4; extra == "loaders"
-Requires-Dist: pymupdf4llm==0.0.1; extra == "loaders"
-Requires-Dist: pdf4llm==0.0.6; extra == "loaders"
 Requires-Dist: PyPDF2==3.0.1; extra == "loaders"
 Requires-Dist: pdfminer.six==20231228; extra == "loaders"
 Requires-Dist: pdfplumber==0.11.0; extra == "loaders"
 Requires-Dist: GitPython==3.1.42; extra == "loaders"
 Requires-Dist: opentelemetry-sdk==1.24.0; extra == "loaders"
-Requires-Dist: rapidocr-onnxruntime==1.3.15; extra == "loaders"
-Requires-Dist: pytesseract==0.3.10; extra == "loaders"
-Requires-Dist: python-docx==1.1.0; extra == "loaders"
-Requires-Dist: python-pptx==0.6.23; extra == "loaders"
-Requires-Dist: docx2txt==0.8; extra == "loaders"
-Requires-Dist: pytube==15.0.0; extra == "loaders"
-Requires-Dist: pydub==0.25.1; extra == "loaders"
-Requires-Dist: markdownify==0.12.1; extra == "loaders"
-Requires-Dist: yt-dlp==2024.4.9; extra == "loaders"
-Requires-Dist: moviepy==1.0.3; extra == "loaders"
-Requires-Dist: mammoth==1.7.1; extra == "loaders"
 Requires-Dist: paddlepaddle==2.6.1; extra == "loaders"
 Requires-Dist: paddlepaddle-gpu==2.6.1; extra == "loaders"
 Requires-Dist: paddleocr==2.8.1; extra == "loaders"

{ai_parrot-0.3.11.dist-info → ai_parrot-0.3.16.dist-info}/RECORD RENAMED Viewed

@@ -1,18 +1,18 @@
 parrot/__init__.py,sha256=eTkAkHeJ5BBDG2fxrXA4M37ODBJoS1DQYpeBAWL2xeI,387
-parrot/conf.py,sha256=-9bVGC7Rf-6wpIg6-ojvU4S_G1wBLUCVDt46KEGHEhM,4257
+parrot/conf.py,sha256=andrPREuR_BHiXA_Q0Utyb5xSb1ct_uKnjMzEOa1ftE,4373
 parrot/exceptions.cpython-311-x86_64-linux-gnu.so,sha256=VNyBh3uLxGQgB0l1bkWjQDqYUN2ZAvRmV12AqQijV9Q,361184
 parrot/manager.py,sha256=NhzXoWxSgtoWHpmYP8cV2Ujq_SlvCbQYQBaohAeL2TM,5935
 parrot/models.py,sha256=RsVQCqhSXBKRPcu-BCga9Y1wyvENFXDCuq3_ObIKvAo,13452
 parrot/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-parrot/version.py,sha256=iXWwg_05dEqR8n13PRWz5QqlmSq6_og3HT3moP37RAg,374
+parrot/version.py,sha256=gt_HAcyWEN5lqHTmPKg0Nn-BgX6aq3F6Ah64q2Dfc7s,374
 parrot/chatbots/__init__.py,sha256=ypskCnME0xUv6psBEGCEyXCrD0J0ULHSllpVmSxqb4A,200
 parrot/chatbots/abstract.py,sha256=CmDn3k4r9uKImOZRN4L9zxLbCdC-1MPUAorDlfZT-kA,26421
 parrot/chatbots/asktroc.py,sha256=gyWzyvpAnmXwXd-3DEKoIJtAxt6NnP5mUZdZbkFky8s,604
-parrot/chatbots/base.py,sha256=cRw7k5FRKOfLdXQJeQvACVE5ZgE1NUWf3IY7OsSsxuo,12912
+parrot/chatbots/base.py,sha256=5QX5-VPAOM-I8o0ktBt3_JEDPRQ_-iK0fFLRMUlvs_s,13396
 parrot/chatbots/basic.py,sha256=DIMTPoGc90BRSlokeOdnjlEXAAfZlIFqxXWaMyAX9uk,232
 parrot/chatbots/bose.py,sha256=z8rm8G_tAwHjDUodXfrAKnhaMzufQyf-GrhxwHeHle4,757
 parrot/chatbots/cody.py,sha256=Z0LNiNtZjEe7bA3hwexclBZK5zEF9m2ODVmrzZjC3Bw,623
-parrot/chatbots/copilot.py,sha256=JTnc-fdszwZ2nLmpNu-tVe6Al8z9PNIYHxv8fd42YQU,2051
+parrot/chatbots/copilot.py,sha256=Q_CwoPm1M0loa7N3DLSLK8eq4m99z1CeU5FI9iqF9XI,2767
 parrot/chatbots/dataframe.py,sha256=CfZiLKIwnaku52nl2PNjciqRlH8m2lM4buO6xI7P408,3914
 parrot/chatbots/hragents.py,sha256=PyNIBJ2OH5CtfVydccgpY50V6GI3cLKuVdOMaa7sQz0,574
 parrot/chatbots/oddie.py,sha256=RMbANmJZP1_vLVGKRNBKmA8otyAiWPkvpA0rJ0U3tZk,796
@@ -45,11 +45,11 @@ parrot/llms/hf.py,sha256=f2HhHCICaSHp0y3KRhqNcYXNO-amYTxDXJ_2_9L5Bk8,1594
 parrot/llms/openai.py,sha256=NgWv6IwJ1DborlYhTyureBBdgHfAPc_lGHQRGt80ca8,1759
 parrot/llms/pipes.py,sha256=Ns_wh-alkKocZKlbQyQLKOSBxqfRC_hCbz34vpOOyP8,3798
 parrot/llms/vertex.py,sha256=a0UsH9sa_GiMkg31E52cWE8pXFZjyMtIanr7eAA7iyE,2615
-parrot/loaders/__init__.py,sha256=LGEaj54DP3FA5-C2IDaA8u-MF4lj-Lbd_Mx5R19qHYY,665
-parrot/loaders/abstract.py,sha256=_tsGDb7TracwkL20J2VYd5hC9MR262c2mmS9VvYB4vM,15870
+parrot/loaders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+parrot/loaders/abstract.py,sha256=Mx6BtihuwvFkl-Ali_84949BfVXFB0JZjmSKnJ-gXSg,17272
 parrot/loaders/audio.py,sha256=P2tWYKxWLM5TMLMm-5qR35cD_pGQWmf8-UZUTiK4R0o,3698
 parrot/loaders/basepdf.py,sha256=Qh_hzR0JArQEVP31SgWt9utt7qWmbfwVoCzUDyBHcXw,3243
-parrot/loaders/basevideo.py,sha256=WcX-q0Rn_E1dYurbA1eH5NOcUBdOye2iWiFTCY_DVgo,10292
+parrot/loaders/basevideo.py,sha256=xLAMfIhjGR10T3-Pdx8MLq5Bp6woCOuq5Jr6yUZ6LCU,11992
 parrot/loaders/csv.py,sha256=DLcFK3z9boMNH3y9Qca5BWDfYXgXjXsGkzxVN1_2wyo,1103
 parrot/loaders/dir.py,sha256=_CU9kWGCpHnZplUamXLs2yEizA1aCRBASn3F6MggitQ,866
 parrot/loaders/excel.py,sha256=9cTsMfxR_YOpBHz9Ru0LJsxBXDVBh52XM8hHV63QgYo,12445
@@ -67,13 +67,13 @@ parrot/loaders/qa.py,sha256=3K_2yBxUzj-ifDpAbUsIc-v66004fKPzGavUqrhc3Kc,2646
 parrot/loaders/repo.py,sha256=vBqBAnwU6p3_DCvI9DVhi1Bs8iCDYHwFGp0P9zvGRyw,3737
 parrot/loaders/rtd.py,sha256=O0h7LDntP_0IBT8LDQi09u-gYVUO5cuvmGsfZLZ4CoU,1990
 parrot/loaders/txt.py,sha256=-xXVSuvkC2LQ2XZ44Nqwk3V8nE4F6UgXylosMCNgeFo,2804
-parrot/loaders/video.py,sha256=pl5Ho69bp5vrWMqg5tLbsnHUus1LByTDoL6NPk57Ays,2929
-parrot/loaders/videolocal.py,sha256=NwFB6n9sQZxh01L6YKIISpG1tuRsg-ME_qXCDS7Vtkk,5143
+parrot/loaders/video.py,sha256=9zKUFFROSIbWjWFOvxDrW4uOewrMzD7-xADmszOpP4k,2930
+parrot/loaders/videolocal.py,sha256=cRYv3KvKKHltMY4QbnvEMCOLHlEY9ZmWeXTL23fy-gA,9669
 parrot/loaders/vimeo.py,sha256=Cs7FkL2Cr8yV44-Tv5wWkveKzqhOeAIP6kF93SCr_Lk,4118
 parrot/loaders/web.py,sha256=kTi-NtAsbQLKi3wD_2o15Z0HHnYzsEEEGjH0RdvyQqQ,8869
 parrot/loaders/web_base.py,sha256=ZwSFXtJR71cpFGN1WCLUC2W6JjEUV865tRKf8isbJ5M,4382
 parrot/loaders/word.py,sha256=jZdHSL5CtAEn1otBYLNSqKLtO3BNcTObDPgqhzk5-4M,4533
-parrot/loaders/youtube.py,sha256=fVnBBw4IfK6NWP7mO66TgxOzJEcGwE3-3S1WMUApJYg,7751
+parrot/loaders/youtube.py,sha256=DzH9bD5ZrLaTG_6GMjHsy1cHoTBR712yUC8tJiAYbNM,9607
 parrot/loaders/handlers/__init__.py,sha256=ksEDtUOEJELmyCIi0KNv7tR2fCUyADBVkwCcyqN_sVE,70
 parrot/loaders/handlers/data.py,sha256=olZ2p-wyUMGoazah7tgHY7V9buGX1FOeJ-cv2vGEoH8,7386
 parrot/loaders/utils/__init__.py,sha256=SkDyK3MuPGhp0NM6kHvaxQDe97Gcl3n9t5A741OVh1c,28
@@ -87,6 +87,7 @@ parrot/tools/abstract.py,sha256=pVSZw8MDpbVcQ-CHaGwP6CpqXHIs8hH8Oy1AqUuMmrw,1706
 parrot/tools/asknews.py,sha256=hEpPJMyNBVfj2maHbqnumn3VkY45oFvrjkE3Rq8EdGA,1039
 parrot/tools/bing.py,sha256=BtmFD66OIuCaOue5U2_yIqtjWf24IhEgNOX1LAVvHtA,464
 parrot/tools/duck.py,sha256=UAAZzlF-Q0sZh0_IcS96dwSgCuBPdeepkwRrMM5cJPY,1920
+parrot/tools/execute.py,sha256=fTMQAsXuUzVyIWmZxL22LrSj2eQ-Rh-ncyUZ9gY-d-A,1687
 parrot/tools/google.py,sha256=NjijcUWH6Crk5Uty_x3FstjDTGZV8JXfBFDQEtMHhac,6236
 parrot/tools/stack.py,sha256=M-VRWjIDa18bl5p88dSKtxMj4Kn21YB76to0u6yXA30,942
 parrot/tools/weather.py,sha256=4v9Ft5lkVzb9Pg7afNs7BK5T3WEcsZbHPlBrF9oXSo8,2541
@@ -103,8 +104,8 @@ resources/users/handlers.py,sha256=BGzqBvPY_OaIF_nONWX4b_B5OyyBrdGuSihIsdlFwjk,2
 resources/users/models.py,sha256=glk7Emv7QCi6i32xRFDrGc8UwK23_LPg0XUOJoHnwRU,6799
 settings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 settings/settings.py,sha256=9ueEvyLNurUX-AaIeRPV8GKX1c4YjDLbksUAeqEq6Ck,1854
-ai_parrot-0.3.11.dist-info/LICENSE,sha256=vRKOoa7onTsLNvSzJtGtMaNhWWh8B3YAT733Tlu6M4o,1070
-ai_parrot-0.3.11.dist-info/METADATA,sha256=zOYFernNOneZdQH-EIXCsmT83ATrzxGA4x9WmOhciX8,9838
-ai_parrot-0.3.11.dist-info/WHEEL,sha256=UQ-0qXN3LQUffjrV43_e_ZXj2pgORBqTmXipnkj0E8I,113
-ai_parrot-0.3.11.dist-info/top_level.txt,sha256=qHoO4BhYDfeTkyKnciZSQtn5FSLN3Q-P5xCTkyvbuxg,26
-ai_parrot-0.3.11.dist-info/RECORD,,
+ai_parrot-0.3.16.dist-info/LICENSE,sha256=vRKOoa7onTsLNvSzJtGtMaNhWWh8B3YAT733Tlu6M4o,1070
+ai_parrot-0.3.16.dist-info/METADATA,sha256=wD3EGhVYnjrQtP_j4853HjspuqFjDQvJB_-dvnBTK_A,9958
+ai_parrot-0.3.16.dist-info/WHEEL,sha256=UQ-0qXN3LQUffjrV43_e_ZXj2pgORBqTmXipnkj0E8I,113
+ai_parrot-0.3.16.dist-info/top_level.txt,sha256=qHoO4BhYDfeTkyKnciZSQtn5FSLN3Q-P5xCTkyvbuxg,26
+ai_parrot-0.3.16.dist-info/RECORD,,

parrot/chatbots/base.py CHANGED Viewed

@@ -89,6 +89,9 @@ Whether you need help with a specific question or just want to have a conversati
 - OpenWeatherMap: Get weather information about a location.
 - yahoo_finance_news: Retrieve the latest financial news from Yahoo Finance.
 - python_repl_ast: A Python shell. Use this to execute python commands. Input should be a valid python command. When using this tool, sometimes output is abbreviated - make sure it does not look abbreviated before using it in your answer.
+- executable_python_repl_ast: A Python shell. Use this to execute python commands. Input should be a valid python command. When using this tool, whenever you generate a visual output (like charts with matplotlib), instead of using plt.show(), render the image as a base64-encoded HTML string. Do this by saving the plot to a buffer and encoding it in base64, then return the result as a JSON object formatted as follows: "image": "format": "png", "base64": "base64-encoded-string".
 - youtube_search: Search for videos on YouTube based on specific keywords.

parrot/chatbots/copilot.py CHANGED Viewed

@@ -4,17 +4,43 @@ from .base import BaseAgent
 from ..tools import (
     ZipcodeAPIToolkit,
     WikipediaTool,
-    WikidataTool,
+    # WikidataTool,
     GoogleSearchTool,
     GoogleLocationFinder,
     BingSearchTool,
-    AskNewsTool,
+    # AskNewsTool,
     DuckDuckGoSearchTool,
     YouTubeSearchTool,
     OpenWeatherMapTool,
     StackExchangeTool,
 )
+from ..tools.execute import ExecutablePythonREPLTool
+# ZipCode API Toolkit
+zpt = ZipcodeAPIToolkit()
+zpt_tools = zpt.get_tools()
+wk1 = WikipediaTool()
+# wk12 = WikidataTool()
+g1 = GoogleSearchTool()
+g2 = GoogleLocationFinder()
+b = BingSearchTool()
+d = DuckDuckGoSearchTool()
+# ask = AskNewsTool()
+yt = YouTubeSearchTool()
+stackexchange = StackExchangeTool()
+weather = OpenWeatherMapTool()
+tooling = [
+    wk1,
+    g1, g2,
+    b, d, yt,
+    weather,
+    stackexchange
+] + zpt_tools
 class CopilotAgent(BaseAgent):
     """CopilotAgent Agent.
@@ -30,17 +56,24 @@ class CopilotAgent(BaseAgent):
         **kwargs
     ):
         super().__init__(name, llm, tools, prompt_template, **kwargs)
+        if not tools:
+            tools = tooling
         self.tools = [
                 PythonAstREPLTool(
                     name='python_repl_ast',
                     globals={},
                     locals={}
+                ),
+                ExecutablePythonREPLTool(
+                    name='executable_python_repl_ast',
+                    globals={},
+                    locals={}
                 )
             ] + list(tools)
         self.prompt = self.get_prompt(
             self.prompt_template
         )
-        # print('PROMPT > ', self.prompt)
+        print('PROMPT > ', self.prompt)
     @classmethod
     def default_tools(cls) -> list:

parrot/conf.py CHANGED Viewed

@@ -12,6 +12,8 @@ logging.getLogger(name='h5py').setLevel(logging.INFO)
 logging.getLogger(name='tensorflow').setLevel(logging.INFO)
 logging.getLogger(name='selenium.webdriver').setLevel(logging.WARNING)
 logging.getLogger(name='selenium').setLevel(logging.INFO)
+logging.getLogger(name='matplotlib').setLevel(logging.WARNING)
+logging.getLogger(name='PIL').setLevel(logging.INFO)
 # Static directory

parrot/loaders/__init__.py CHANGED Viewed

@@ -1,20 +0,0 @@
-from .dir import load_directory
-from .pdf import PDFLoader
-from .web import WebLoader
-from .youtube import YoutubeLoader
-from .vimeo import VimeoLoader
-from .word import MSWordLoader
-from .ppt import PPTXLoader
-from .repo import RepositoryLoader
-from .github import GithubLoader
-from .json import JSONLoader
-from .excel import ExcelLoader
-from .web_base import WebBaseLoader
-from .pdfmark import PDFMarkdownLoader
-from .pdfimages import PDFImageLoader
-from .pdftables import PDFTablesLoader
-from .pdfchapters import PDFChapterLoader
-from .txt import TXTLoader
-from .qa import QAFileLoader
-from .rtd import ReadTheDocsLoader
-from .videolocal import VideoLocalLoader

parrot/loaders/abstract.py CHANGED Viewed

@@ -74,6 +74,7 @@ class AbstractLoader(ABC):
         self,
         tokenizer: Union[str, Callable] = None,
         text_splitter: Union[str, Callable] = None,
+        translation: Optional[str] = None,
         source_type: str = 'file',
         **kwargs
     ):
@@ -114,6 +115,15 @@ class AbstractLoader(ABC):
             )
         # JSON encoder:
         self._encoder = JSONContent()
+        # Traslation
+        self._translation = translation
+        self.translator = None
+        if self._translation:
+            mdl = kwargs.get(
+                'translation_model',
+                f"Helsinki-NLP/opus-mt-en-{self._translation}"
+            )
+            self.translator = self.get_translator(mdl)
     def __enter__(self):
@@ -159,6 +169,27 @@ class AbstractLoader(ABC):
             use_memory_efficient_attention=True,
         ).to(self._device)
+    def get_translator(self, model_name: str = 'Helsinki-NLP/opus-mt-en-es'):
+        if not self._translation:
+            return None
+        trans_model = AutoModelForSeq2SeqLM.from_pretrained(
+            model_name,
+            device_map="auto",
+            torch_dtype=torch.bfloat16,
+            trust_remote_code=True
+        )
+        trans_tokenizer = AutoTokenizer.from_pretrained(model_name)
+        translator = pipeline(
+            "translation",
+            model=trans_model,
+            tokenizer=trans_tokenizer,
+            batch_size=True,
+            max_new_tokens=500,
+            min_new_tokens=300,
+            use_fast=True
+        )
+        return translator
     def get_summarization_model(self, model_name: str = 'facebook/bart-large-cnn'):
         if self._no_summarization is True:
             return None
@@ -216,7 +247,7 @@ class AbstractLoader(ABC):
             return ''
         try:
             splitter = TokenTextSplitter(
-                chunk_size=5000,
+                chunk_size=6144,
                 chunk_overlap=100,
             )
             prompt_template = """Write a summary of the following, please also identify the main theme:
@@ -454,3 +485,15 @@ class AbstractLoader(ABC):
         for url in urls:
             documents += cls.load(url, **kwargs)
         return documents
+    def saving_file(self, filename: PurePath, data: Any):
+        """Save data to a file.
+        Args:
+            filename (PurePath): The path to the file.
+            data (Any): The data to save.
+        """
+        with open(filename, 'wb') as f:
+            f.write(data)
+            f.flush()
+        print(f':: Saved File on {filename}')

parrot/loaders/basevideo.py CHANGED Viewed

@@ -1,8 +1,9 @@
 from collections.abc import Callable
-from typing import Any, Union, List
+from typing import Any, Union, List, Optional
 from abc import abstractmethod
 from pathlib import Path
 from moviepy.editor import VideoFileClip
+from pydub import AudioSegment
 from transformers import (
     pipeline,
     AutoModelForSeq2SeqLM,
@@ -193,25 +194,69 @@ class BaseVideoLoader(AbstractLoader):
             print('ERROR in summarization:', e)
             return ""
-    def extract_audio(self, video_path, audio_path):
+    def extract_audio(
+        self,
+        video_path: Path,
+        audio_path: Path,
+        compress_speed: bool = False,
+        output_path: Optional[Path] = None,
+        speed_factor: float = 1.5
+    ):
         """
-        Extracts the audio from a video file and saves it as an audio file.
+        Extracts the audio from a video file and optionally compresses the audio speed.
         Args:
             video_path (str): Path to the video file.
             audio_path (str): Path where the extracted audio file will be saved.
+            compress_speed (bool): Whether to compress the audio speed.
+            speed_factor (float): The factor by which to speed up the audio.
         """
+        # Ensure that the paths are valid Path objects
+        video_path = Path(video_path)
+        audio_path = Path(audio_path)
+        # Check if the audio file already exists
         if audio_path.exists():
             print(f"Audio already extracted: {audio_path}")
             return
+        # Load the video and extract the audio
         video_clip = VideoFileClip(str(video_path))
         audio_clip = video_clip.audio
         if not audio_clip:
+            print("No audio found in video.")
             return
+        # Write the extracted audio to the specified path
+        print(f"Extracting audio to: {audio_path}")
         audio_clip.write_audiofile(str(audio_path))
         audio_clip.close()
         video_clip.close()
+        # Optionally compress the audio speed
+        if compress_speed:
+            print(f"Compressing audio speed by factor: {speed_factor}")
+            # Load the audio file with pydub
+            audio = AudioSegment.from_file(audio_path)
+            # Adjust the playback speed by modifying the frame rate
+            sped_up_audio = audio._spawn(audio.raw_data, overrides={
+                "frame_rate": int(audio.frame_rate * speed_factor)
+            })
+            # Restore the original frame rate to maintain proper playback speed
+            sped_up_audio = sped_up_audio.set_frame_rate(audio.frame_rate)
+            # Overwrite the original file with the sped-up version
+            if not output_path:
+                output_path = audio_path
+            sped_up_audio.export(output_path, format="mp3")
+            print(f"Compressed audio saved to: {audio_path}")
+        else:
+            print(f"Audio extracted: {audio_path}")
     def get_whisper_transcript(self, audio_path: Path, chunk_length: int = 30):
         # Initialize the Whisper parser
         if self._model_name == 'whisper':

parrot/loaders/video.py CHANGED Viewed

@@ -12,7 +12,7 @@ class VideoLoader(BaseVideoLoader):
     """
     _extension = ['.youtube']
     encoding = 'utf-8'
-    chunk_size = 768
+    chunk_size = 2048
     def __init__(
         self,

parrot/loaders/videolocal.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from typing import Any
 from collections.abc import Callable
+import re
 import math
 from pathlib import PurePath
 from langchain.docstore.document import Document
@@ -8,16 +9,35 @@ from .basevideo import BaseVideoLoader
 def split_text(text, max_length):
     """Split text into chunks of a maximum length, ensuring not to break words."""
+    # Split the transcript into paragraphs
+    paragraphs = text.split('\n\n')
     chunks = []
-    while len(text) > max_length:
-        # Find the last space before the max_length
-        split_point = text.rfind(' ', 0, max_length)
-        # If no space found, split at max_length
-        if split_point == -1:
-            split_point = max_length
-        chunks.append(text[:split_point])
-        text = text[split_point:].strip()
-    chunks.append(text)
+    current_chunk = ""
+    for paragraph in paragraphs:
+        # If the paragraph is too large, split it into sentences
+        if len(paragraph) > max_length:
+            # Split paragraph into sentences
+            sentences = re.split(r'(?<=[.!?]) +', paragraph)
+            for sentence in sentences:
+                if len(current_chunk) + len(sentence) + 1 > max_length:
+                    # Save the current chunk and start a new one
+                    chunks.append(current_chunk.strip())
+                    current_chunk = sentence
+                else:
+                    # Add sentence to the current chunk
+                    current_chunk += " " + sentence
+        else:
+            # If adding the paragraph exceeds max size, start a new chunk
+            if len(current_chunk) + len(paragraph) + 2 > max_length:
+                chunks.append(current_chunk.strip())
+                current_chunk = paragraph
+            else:
+                # Add paragraph to the current chunk
+                current_chunk += "\n\n" + paragraph
+    # Add any remaining text to chunks
+    if current_chunk.strip():
+        chunks.append(current_chunk.strip())
     return chunks
@@ -37,15 +57,23 @@ class VideoLocalLoader(BaseVideoLoader):
         origin: str = '',
         **kwargs
     ):
-        super().__init__(tokenizer, text_splitter, source_type=source_type, **kwargs)
+        super().__init__(
+            tokenizer,
+            text_splitter,
+            source_type=source_type,
+            **kwargs
+        )
+        self.extract_frames: bool = kwargs.pop('extract_frames', False)
+        self.seconds_per_frame: int = kwargs.pop('seconds_per_frame', 1)
+        self.compress_speed: bool = kwargs.pop('compress_speed', False)
+        self.speed_factor: float = kwargs.pop('speed_factor', 1.5)
         self.path = path
     def load_video(self, path: PurePath) -> list:
         metadata = {
-            "url": f"{path.name}",
+            "url": f"{path}",
             "source": f"{path}",
-            "filename": f"{path}",
-            # "index": path.stem,
+            "filename": f"{path.name}",
             "question": '',
             "answer": '',
             'type': 'video_transcript',
@@ -58,10 +86,17 @@ class VideoLocalLoader(BaseVideoLoader):
             }
         }
         documents = []
-        transcript_path = path.with_suffix('.vtt')
+        transcript_path = path.with_suffix('.txt')
+        vtt_path = path.with_suffix('.vtt')
+        summary_path = path.with_suffix('.summary')
         audio_path = path.with_suffix('.mp3')
         # second: extract audio from File
-        self.extract_audio(path, audio_path)
+        self.extract_audio(
+            path,
+            audio_path,
+            compress_speed=self.compress_speed,
+            speed_factor=self.speed_factor
+        )
         # get the Whisper parser
         transcript_whisper = self.get_whisper_transcript(audio_path)
         if transcript_whisper:
@@ -70,35 +105,71 @@ class VideoLocalLoader(BaseVideoLoader):
             transcript = ''
         # Summarize the transcript
         if transcript:
-            # Split transcript into chunks
-            transcript_chunks = split_text(transcript, 32767)
+            # first: extract summary, saving summary as a document:
             summary = self.get_summary_from_text(transcript)
-            # Create Two Documents, one is for transcript, second is VTT:
-            metadata['summary'] = summary
-            for chunk in transcript_chunks:
+            self.saving_file(summary_path, summary.encode('utf-8'))
+            # second: saving transcript to a file:
+            self.saving_file(transcript_path, transcript.encode('utf-8'))
+            # Create Three Documents:
+            # one is for transcript
+            # split document only if size > 65.534
+            if len(transcript) > 65534:
+                # Split transcript into chunks
+                transcript_chunks = split_text(transcript, 32767)
+                for chunk in transcript_chunks:
+                    doc = Document(
+                        page_content=chunk,
+                        metadata=metadata
+                    )
+                    documents.append(doc)
+            else:
                 doc = Document(
-                    page_content=chunk,
+                    page_content=transcript,
                     metadata=metadata
                 )
                 documents.append(doc)
+            # second is Summary
+            if summary:
+                _meta = {
+                    **metadata,
+                    "type": 'video summary'
+                }
+                doc = Document(
+                    page_content=summary,
+                    metadata=_meta
+                )
+            # Third is VTT:
         if transcript_whisper:
             # VTT version:
-            transcript = self.transcript_to_vtt(transcript_whisper, transcript_path)
-            transcript_chunks = split_text(transcript, 65535)
-            for chunk in transcript_chunks:
+            transcript = self.transcript_to_vtt(transcript_whisper, vtt_path)
+            _meta = {
+                **metadata,
+                "type": 'video subte vtt'
+            }
+            if len(transcript) > 65535:
+                transcript_chunks = split_text(transcript, 65535)
+                for chunk in transcript_chunks:
+                    doc = Document(
+                        page_content=chunk,
+                        metadata=_meta
+                    )
+                    documents.append(doc)
+            else:
                 doc = Document(
-                    page_content=chunk,
-                    metadata=metadata
+                    page_content=transcript,
+                    metadata=_meta
                 )
                 documents.append(doc)
             # Saving every dialog chunk as a separate document
             dialogs = self.transcript_to_blocks(transcript_whisper)
             docs = []
             for chunk in dialogs:
+                start_time = chunk['start_time']
                 _meta = {
-                    # "index": f"{path.stem}:{chunk['id']}",
+                    "source": f"{path.name}: min. {start_time}",
+                    "type": "video dialog",
                     "document_meta": {
-                        "start": f"{chunk['start_time']}",
+                        "start": f"{start_time}",
                         "end": f"{chunk['end_time']}",
                         "id": f"{chunk['id']}",
                         "language": self._language,
@@ -128,15 +199,61 @@ class VideoLocalLoader(BaseVideoLoader):
                         documents.extend(self.load_video(item))
         return self.split_documents(documents)
+    def extract_video(self, path: PurePath) -> list:
+        metadata = {
+            "url": f"{path}",
+            "source": f"{path}",
+            "filename": f"{path.name}",
+            'type': 'video_transcript',
+            "source_type": self._source_type,
+            "transcript": None,
+            "summary": None,
+            "vtt": None
+        }
+        transcript_path = path.with_suffix('.txt')
+        vtt_path = path.with_suffix('.vtt')
+        summary_path = path.with_suffix('.summary')
+        audio_path = path.with_suffix('.mp3')
+        # second: extract audio from File
+        self.extract_audio(
+            path,
+            audio_path,
+            compress_speed=self.compress_speed,
+            speed_factor=self.speed_factor
+        )
+        # get the Whisper parser
+        transcript_whisper = self.get_whisper_transcript(audio_path)
+        if transcript_whisper:
+            transcript = transcript_whisper['text']
+        else:
+            transcript = ''
+        # Summarize the transcript
+        if transcript:
+            # first: extract summary, saving summary as a document:
+            summary = self.get_summary_from_text(transcript)
+            self.saving_file(summary_path, summary.encode('utf-8'))
+            # second: saving transcript to a file:
+            self.saving_file(transcript_path, transcript.encode('utf-8'))
+            metadata['transcript'] = transcript_path
+            metadata["summary"] = summary
+            metadata['summary_file'] = summary_path
+            metadata["vtt"] = vtt_path
+            # Third is VTT:
+        if transcript_whisper:
+            # VTT version:
+            transcript = self.transcript_to_vtt(transcript_whisper, vtt_path)
+        return metadata
     def extract(self) -> list:
+        # Adding also Translation to other language.
         documents = []
         if self.path.is_file():
-            docs = self.load_video(self.path)
-            documents.extend(docs)
-        if self.path.is_dir():
+            doc = self.extract_video(self.path)
+            documents.append(doc)
+        elif self.path.is_dir():
             # iterate over the files in the directory
             for ext in self._extension:
                 for item in self.path.glob(f'*{ext}'):
                     if set(item.parts).isdisjoint(self.skip_directories):
-                        documents.extend(self.load_video(item))
+                        documents.append(self.extract_video(item))
         return documents

parrot/loaders/youtube.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from typing import Optional, Union
 from pytube import YouTube
 from youtube_transcript_api import NoTranscriptFound
-import torch
 from langchain.docstore.document import Document
 from langchain_community.document_loaders.parsers.audio import (
     OpenAIWhisperParserLocal
@@ -190,3 +189,53 @@ class YoutubeLoader(VideoLoader):
                     metadata=metadata
                 )
                 return [doc]
+    def extract_video(
+        self,
+        url: str
+    ) -> list:
+        # first: load video metadata:
+        video_info = self.get_video_info(url)
+        # first: download video
+        file_path = self.download_video(url, self._video_path)
+        audio_path = file_path.with_suffix('.mp3')
+        transcript_path = file_path.with_suffix('.txt')
+        vtt_path = file_path.with_suffix('.vtt')
+        summary_path = file_path.with_suffix('.summary')
+        # second: extract audio
+        self.extract_audio(file_path, audio_path)
+        transcript_whisper = self.get_whisper_transcript(audio_path)
+        transcript = transcript_whisper['text']
+        # Summarize the transcript
+        try:
+            summary = self.get_summary_from_text(transcript)
+            self.saving_file(summary_path, summary.encode('utf-8'))
+        except Exception:
+            summary = ''
+        # Create Meta of Video Document
+        metadata = {
+            "url": f"{url}",
+            "source": f"{url}",
+            "source_type": self._source_type,
+            'type': 'video_transcript',
+            "summary": f"{summary!s}",
+            "video_info": video_info
+        }
+        # VTT version:
+        transcript = self.transcript_to_vtt(transcript_whisper, vtt_path)
+        # second: saving transcript to a file:
+        self.saving_file(transcript_path, transcript.encode('utf-8'))
+        metadata['transcript'] = transcript_path
+        metadata["summary"] = summary
+        metadata['summary_file'] = summary_path
+        metadata["vtt"] = vtt_path
+        metadata['audio'] = audio_path
+        return metadata
+    def extract(self) -> list:
+        # Adding also Translation to other language.
+        documents = []
+        for url in self.urls:
+            doc = self.extract_video(url)
+            documents.append(doc)
+        return documents

parrot/tools/execute.py ADDED Viewed

@@ -0,0 +1,56 @@
+"""
+Executable Python REPL Tool.
+"""
+import io
+import base64
+import json
+import matplotlib.pyplot as plt
+from langchain_experimental.tools.python.tool import PythonAstREPLTool
+class ExecutablePythonREPLTool(PythonAstREPLTool):
+    """
+    Executable Python REPL Tool.
+    """
+    def execute_code(self, code: str) -> str:
+        """
+        Execute the provided Python code and return the output.
+        Args:
+            code (str): The Python code to execute.
+        Returns:
+            str: The output of the executed code.
+        """
+        try:
+            # Set up a namespace for execution
+            namespace = {}
+            exec(code, namespace)
+            # Check if a plot was created
+            if 'plt' in namespace:
+                buf = io.BytesIO()
+                plt.savefig(buf, format='png')
+                plt.close()
+                buf.seek(0)
+                # Encode the image in base64
+                # Encode the image in base64
+                img_str = base64.b64encode(buf.read()).decode('utf-8')
+                # Prepare the JSON output
+                result = {
+                    "image": {
+                        "format": "png",
+                        "base64": img_str
+                    }
+                }
+                # Return both the code and the JSON result
+                return f"**Code Executed**:\n```python\n{code}\n```\n\n**Result**:\n{json.dumps(result)}"
+            else:
+                return f"**Code Executed**:\n```python\n{code}\n```\n\n"
+        except Exception as e:
+            return f"Error executing code: {e}"
+    def __call__(self, code: str) -> str:
+        return self.execute_code(code)

parrot/version.py CHANGED Viewed

@@ -3,7 +3,7 @@
 __title__ = "ai-parrot"
 __description__ = "Live Chatbots based on Langchain chatbots and Agents \
     Integrated into Navigator Framework or used into aiohttp applications."
-__version__ = "0.3.11"
+__version__ = "0.3.16"
 __author__ = "Jesus Lara"
 __author_email__ = "jesuslarag@gmail.com"
 __license__ = "MIT"

{ai_parrot-0.3.11.dist-info → ai_parrot-0.3.16.dist-info}/LICENSE RENAMED Viewed

File without changes

{ai_parrot-0.3.11.dist-info → ai_parrot-0.3.16.dist-info}/WHEEL RENAMED Viewed

File without changes

{ai_parrot-0.3.11.dist-info → ai_parrot-0.3.16.dist-info}/top_level.txt RENAMED Viewed

File without changes