ai-parrot 0.3.11__cp311-cp311-manylinux_2_28_x86_64.whl → 0.3.17__cp311-cp311-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ai-parrot might be problematic. Click here for more details.
- {ai_parrot-0.3.11.dist-info → ai_parrot-0.3.17.dist-info}/METADATA +17 -16
- {ai_parrot-0.3.11.dist-info → ai_parrot-0.3.17.dist-info}/RECORD +16 -15
- parrot/chatbots/base.py +3 -0
- parrot/chatbots/copilot.py +36 -3
- parrot/conf.py +2 -0
- parrot/loaders/__init__.py +0 -20
- parrot/loaders/abstract.py +44 -1
- parrot/loaders/basevideo.py +48 -3
- parrot/loaders/video.py +1 -1
- parrot/loaders/videolocal.py +149 -32
- parrot/loaders/youtube.py +50 -1
- parrot/tools/execute.py +56 -0
- parrot/version.py +1 -1
- {ai_parrot-0.3.11.dist-info → ai_parrot-0.3.17.dist-info}/LICENSE +0 -0
- {ai_parrot-0.3.11.dist-info → ai_parrot-0.3.17.dist-info}/WHEEL +0 -0
- {ai_parrot-0.3.11.dist-info → ai_parrot-0.3.17.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ai-parrot
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.17
|
|
4
4
|
Summary: Live Chatbots based on Langchain chatbots and Agents Integrated into Navigator Framework or used into aiohttp applications.
|
|
5
5
|
Home-page: https://github.com/phenobarbital/ai-parrot
|
|
6
6
|
Author: Jesus Lara
|
|
@@ -88,6 +88,22 @@ Requires-Dist: streamlit==1.37.1; extra == "analytics"
|
|
|
88
88
|
Provides-Extra: anthropic
|
|
89
89
|
Requires-Dist: langchain-anthropic==0.1.11; extra == "anthropic"
|
|
90
90
|
Requires-Dist: anthropic==0.25.2; extra == "anthropic"
|
|
91
|
+
Provides-Extra: basic_loaders
|
|
92
|
+
Requires-Dist: youtube-transcript-api==0.6.2; extra == "basic-loaders"
|
|
93
|
+
Requires-Dist: pymupdf==1.24.4; extra == "basic-loaders"
|
|
94
|
+
Requires-Dist: pymupdf4llm==0.0.1; extra == "basic-loaders"
|
|
95
|
+
Requires-Dist: pdf4llm==0.0.6; extra == "basic-loaders"
|
|
96
|
+
Requires-Dist: pytube==15.0.0; extra == "basic-loaders"
|
|
97
|
+
Requires-Dist: pydub==0.25.1; extra == "basic-loaders"
|
|
98
|
+
Requires-Dist: markdownify==0.12.1; extra == "basic-loaders"
|
|
99
|
+
Requires-Dist: yt-dlp==2024.4.9; extra == "basic-loaders"
|
|
100
|
+
Requires-Dist: moviepy==1.0.3; extra == "basic-loaders"
|
|
101
|
+
Requires-Dist: rapidocr-onnxruntime==1.3.15; extra == "basic-loaders"
|
|
102
|
+
Requires-Dist: pytesseract==0.3.10; extra == "basic-loaders"
|
|
103
|
+
Requires-Dist: python-docx==1.1.0; extra == "basic-loaders"
|
|
104
|
+
Requires-Dist: python-pptx==0.6.23; extra == "basic-loaders"
|
|
105
|
+
Requires-Dist: docx2txt==0.8; extra == "basic-loaders"
|
|
106
|
+
Requires-Dist: mammoth==1.7.1; extra == "basic-loaders"
|
|
91
107
|
Provides-Extra: crew
|
|
92
108
|
Requires-Dist: colbert-ai==0.2.19; extra == "crew"
|
|
93
109
|
Requires-Dist: vanna==0.3.4; extra == "crew"
|
|
@@ -104,26 +120,11 @@ Requires-Dist: llama-index-llms-huggingface==0.2.7; extra == "hunggingfaces"
|
|
|
104
120
|
Provides-Extra: loaders
|
|
105
121
|
Requires-Dist: unstructured==0.14.3; extra == "loaders"
|
|
106
122
|
Requires-Dist: unstructured-client==0.18.0; extra == "loaders"
|
|
107
|
-
Requires-Dist: youtube-transcript-api==0.6.2; extra == "loaders"
|
|
108
|
-
Requires-Dist: pymupdf==1.24.4; extra == "loaders"
|
|
109
|
-
Requires-Dist: pymupdf4llm==0.0.1; extra == "loaders"
|
|
110
|
-
Requires-Dist: pdf4llm==0.0.6; extra == "loaders"
|
|
111
123
|
Requires-Dist: PyPDF2==3.0.1; extra == "loaders"
|
|
112
124
|
Requires-Dist: pdfminer.six==20231228; extra == "loaders"
|
|
113
125
|
Requires-Dist: pdfplumber==0.11.0; extra == "loaders"
|
|
114
126
|
Requires-Dist: GitPython==3.1.42; extra == "loaders"
|
|
115
127
|
Requires-Dist: opentelemetry-sdk==1.24.0; extra == "loaders"
|
|
116
|
-
Requires-Dist: rapidocr-onnxruntime==1.3.15; extra == "loaders"
|
|
117
|
-
Requires-Dist: pytesseract==0.3.10; extra == "loaders"
|
|
118
|
-
Requires-Dist: python-docx==1.1.0; extra == "loaders"
|
|
119
|
-
Requires-Dist: python-pptx==0.6.23; extra == "loaders"
|
|
120
|
-
Requires-Dist: docx2txt==0.8; extra == "loaders"
|
|
121
|
-
Requires-Dist: pytube==15.0.0; extra == "loaders"
|
|
122
|
-
Requires-Dist: pydub==0.25.1; extra == "loaders"
|
|
123
|
-
Requires-Dist: markdownify==0.12.1; extra == "loaders"
|
|
124
|
-
Requires-Dist: yt-dlp==2024.4.9; extra == "loaders"
|
|
125
|
-
Requires-Dist: moviepy==1.0.3; extra == "loaders"
|
|
126
|
-
Requires-Dist: mammoth==1.7.1; extra == "loaders"
|
|
127
128
|
Requires-Dist: paddlepaddle==2.6.1; extra == "loaders"
|
|
128
129
|
Requires-Dist: paddlepaddle-gpu==2.6.1; extra == "loaders"
|
|
129
130
|
Requires-Dist: paddleocr==2.8.1; extra == "loaders"
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
parrot/__init__.py,sha256=eTkAkHeJ5BBDG2fxrXA4M37ODBJoS1DQYpeBAWL2xeI,387
|
|
2
|
-
parrot/conf.py,sha256
|
|
2
|
+
parrot/conf.py,sha256=andrPREuR_BHiXA_Q0Utyb5xSb1ct_uKnjMzEOa1ftE,4373
|
|
3
3
|
parrot/exceptions.cpython-311-x86_64-linux-gnu.so,sha256=VNyBh3uLxGQgB0l1bkWjQDqYUN2ZAvRmV12AqQijV9Q,361184
|
|
4
4
|
parrot/manager.py,sha256=NhzXoWxSgtoWHpmYP8cV2Ujq_SlvCbQYQBaohAeL2TM,5935
|
|
5
5
|
parrot/models.py,sha256=RsVQCqhSXBKRPcu-BCga9Y1wyvENFXDCuq3_ObIKvAo,13452
|
|
6
6
|
parrot/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
7
|
-
parrot/version.py,sha256=
|
|
7
|
+
parrot/version.py,sha256=zllacxT5drrjYejdjA1kZgUHRXjCtORHFYx4kK_6c1g,374
|
|
8
8
|
parrot/chatbots/__init__.py,sha256=ypskCnME0xUv6psBEGCEyXCrD0J0ULHSllpVmSxqb4A,200
|
|
9
9
|
parrot/chatbots/abstract.py,sha256=CmDn3k4r9uKImOZRN4L9zxLbCdC-1MPUAorDlfZT-kA,26421
|
|
10
10
|
parrot/chatbots/asktroc.py,sha256=gyWzyvpAnmXwXd-3DEKoIJtAxt6NnP5mUZdZbkFky8s,604
|
|
11
|
-
parrot/chatbots/base.py,sha256=
|
|
11
|
+
parrot/chatbots/base.py,sha256=5QX5-VPAOM-I8o0ktBt3_JEDPRQ_-iK0fFLRMUlvs_s,13396
|
|
12
12
|
parrot/chatbots/basic.py,sha256=DIMTPoGc90BRSlokeOdnjlEXAAfZlIFqxXWaMyAX9uk,232
|
|
13
13
|
parrot/chatbots/bose.py,sha256=z8rm8G_tAwHjDUodXfrAKnhaMzufQyf-GrhxwHeHle4,757
|
|
14
14
|
parrot/chatbots/cody.py,sha256=Z0LNiNtZjEe7bA3hwexclBZK5zEF9m2ODVmrzZjC3Bw,623
|
|
15
|
-
parrot/chatbots/copilot.py,sha256=
|
|
15
|
+
parrot/chatbots/copilot.py,sha256=Q_CwoPm1M0loa7N3DLSLK8eq4m99z1CeU5FI9iqF9XI,2767
|
|
16
16
|
parrot/chatbots/dataframe.py,sha256=CfZiLKIwnaku52nl2PNjciqRlH8m2lM4buO6xI7P408,3914
|
|
17
17
|
parrot/chatbots/hragents.py,sha256=PyNIBJ2OH5CtfVydccgpY50V6GI3cLKuVdOMaa7sQz0,574
|
|
18
18
|
parrot/chatbots/oddie.py,sha256=RMbANmJZP1_vLVGKRNBKmA8otyAiWPkvpA0rJ0U3tZk,796
|
|
@@ -45,11 +45,11 @@ parrot/llms/hf.py,sha256=f2HhHCICaSHp0y3KRhqNcYXNO-amYTxDXJ_2_9L5Bk8,1594
|
|
|
45
45
|
parrot/llms/openai.py,sha256=NgWv6IwJ1DborlYhTyureBBdgHfAPc_lGHQRGt80ca8,1759
|
|
46
46
|
parrot/llms/pipes.py,sha256=Ns_wh-alkKocZKlbQyQLKOSBxqfRC_hCbz34vpOOyP8,3798
|
|
47
47
|
parrot/llms/vertex.py,sha256=a0UsH9sa_GiMkg31E52cWE8pXFZjyMtIanr7eAA7iyE,2615
|
|
48
|
-
parrot/loaders/__init__.py,sha256=
|
|
49
|
-
parrot/loaders/abstract.py,sha256=
|
|
48
|
+
parrot/loaders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
|
+
parrot/loaders/abstract.py,sha256=Mx6BtihuwvFkl-Ali_84949BfVXFB0JZjmSKnJ-gXSg,17272
|
|
50
50
|
parrot/loaders/audio.py,sha256=P2tWYKxWLM5TMLMm-5qR35cD_pGQWmf8-UZUTiK4R0o,3698
|
|
51
51
|
parrot/loaders/basepdf.py,sha256=Qh_hzR0JArQEVP31SgWt9utt7qWmbfwVoCzUDyBHcXw,3243
|
|
52
|
-
parrot/loaders/basevideo.py,sha256=
|
|
52
|
+
parrot/loaders/basevideo.py,sha256=xLAMfIhjGR10T3-Pdx8MLq5Bp6woCOuq5Jr6yUZ6LCU,11992
|
|
53
53
|
parrot/loaders/csv.py,sha256=DLcFK3z9boMNH3y9Qca5BWDfYXgXjXsGkzxVN1_2wyo,1103
|
|
54
54
|
parrot/loaders/dir.py,sha256=_CU9kWGCpHnZplUamXLs2yEizA1aCRBASn3F6MggitQ,866
|
|
55
55
|
parrot/loaders/excel.py,sha256=9cTsMfxR_YOpBHz9Ru0LJsxBXDVBh52XM8hHV63QgYo,12445
|
|
@@ -67,13 +67,13 @@ parrot/loaders/qa.py,sha256=3K_2yBxUzj-ifDpAbUsIc-v66004fKPzGavUqrhc3Kc,2646
|
|
|
67
67
|
parrot/loaders/repo.py,sha256=vBqBAnwU6p3_DCvI9DVhi1Bs8iCDYHwFGp0P9zvGRyw,3737
|
|
68
68
|
parrot/loaders/rtd.py,sha256=O0h7LDntP_0IBT8LDQi09u-gYVUO5cuvmGsfZLZ4CoU,1990
|
|
69
69
|
parrot/loaders/txt.py,sha256=-xXVSuvkC2LQ2XZ44Nqwk3V8nE4F6UgXylosMCNgeFo,2804
|
|
70
|
-
parrot/loaders/video.py,sha256=
|
|
71
|
-
parrot/loaders/videolocal.py,sha256=
|
|
70
|
+
parrot/loaders/video.py,sha256=9zKUFFROSIbWjWFOvxDrW4uOewrMzD7-xADmszOpP4k,2930
|
|
71
|
+
parrot/loaders/videolocal.py,sha256=cRYv3KvKKHltMY4QbnvEMCOLHlEY9ZmWeXTL23fy-gA,9669
|
|
72
72
|
parrot/loaders/vimeo.py,sha256=Cs7FkL2Cr8yV44-Tv5wWkveKzqhOeAIP6kF93SCr_Lk,4118
|
|
73
73
|
parrot/loaders/web.py,sha256=kTi-NtAsbQLKi3wD_2o15Z0HHnYzsEEEGjH0RdvyQqQ,8869
|
|
74
74
|
parrot/loaders/web_base.py,sha256=ZwSFXtJR71cpFGN1WCLUC2W6JjEUV865tRKf8isbJ5M,4382
|
|
75
75
|
parrot/loaders/word.py,sha256=jZdHSL5CtAEn1otBYLNSqKLtO3BNcTObDPgqhzk5-4M,4533
|
|
76
|
-
parrot/loaders/youtube.py,sha256=
|
|
76
|
+
parrot/loaders/youtube.py,sha256=DzH9bD5ZrLaTG_6GMjHsy1cHoTBR712yUC8tJiAYbNM,9607
|
|
77
77
|
parrot/loaders/handlers/__init__.py,sha256=ksEDtUOEJELmyCIi0KNv7tR2fCUyADBVkwCcyqN_sVE,70
|
|
78
78
|
parrot/loaders/handlers/data.py,sha256=olZ2p-wyUMGoazah7tgHY7V9buGX1FOeJ-cv2vGEoH8,7386
|
|
79
79
|
parrot/loaders/utils/__init__.py,sha256=SkDyK3MuPGhp0NM6kHvaxQDe97Gcl3n9t5A741OVh1c,28
|
|
@@ -87,6 +87,7 @@ parrot/tools/abstract.py,sha256=pVSZw8MDpbVcQ-CHaGwP6CpqXHIs8hH8Oy1AqUuMmrw,1706
|
|
|
87
87
|
parrot/tools/asknews.py,sha256=hEpPJMyNBVfj2maHbqnumn3VkY45oFvrjkE3Rq8EdGA,1039
|
|
88
88
|
parrot/tools/bing.py,sha256=BtmFD66OIuCaOue5U2_yIqtjWf24IhEgNOX1LAVvHtA,464
|
|
89
89
|
parrot/tools/duck.py,sha256=UAAZzlF-Q0sZh0_IcS96dwSgCuBPdeepkwRrMM5cJPY,1920
|
|
90
|
+
parrot/tools/execute.py,sha256=fTMQAsXuUzVyIWmZxL22LrSj2eQ-Rh-ncyUZ9gY-d-A,1687
|
|
90
91
|
parrot/tools/google.py,sha256=NjijcUWH6Crk5Uty_x3FstjDTGZV8JXfBFDQEtMHhac,6236
|
|
91
92
|
parrot/tools/stack.py,sha256=M-VRWjIDa18bl5p88dSKtxMj4Kn21YB76to0u6yXA30,942
|
|
92
93
|
parrot/tools/weather.py,sha256=4v9Ft5lkVzb9Pg7afNs7BK5T3WEcsZbHPlBrF9oXSo8,2541
|
|
@@ -103,8 +104,8 @@ resources/users/handlers.py,sha256=BGzqBvPY_OaIF_nONWX4b_B5OyyBrdGuSihIsdlFwjk,2
|
|
|
103
104
|
resources/users/models.py,sha256=glk7Emv7QCi6i32xRFDrGc8UwK23_LPg0XUOJoHnwRU,6799
|
|
104
105
|
settings/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
105
106
|
settings/settings.py,sha256=9ueEvyLNurUX-AaIeRPV8GKX1c4YjDLbksUAeqEq6Ck,1854
|
|
106
|
-
ai_parrot-0.3.
|
|
107
|
-
ai_parrot-0.3.
|
|
108
|
-
ai_parrot-0.3.
|
|
109
|
-
ai_parrot-0.3.
|
|
110
|
-
ai_parrot-0.3.
|
|
107
|
+
ai_parrot-0.3.17.dist-info/LICENSE,sha256=vRKOoa7onTsLNvSzJtGtMaNhWWh8B3YAT733Tlu6M4o,1070
|
|
108
|
+
ai_parrot-0.3.17.dist-info/METADATA,sha256=Imgx1G3UJwSQnl-eZM8g2BaDrE7HnYypF_18hKOTKg4,9958
|
|
109
|
+
ai_parrot-0.3.17.dist-info/WHEEL,sha256=UQ-0qXN3LQUffjrV43_e_ZXj2pgORBqTmXipnkj0E8I,113
|
|
110
|
+
ai_parrot-0.3.17.dist-info/top_level.txt,sha256=qHoO4BhYDfeTkyKnciZSQtn5FSLN3Q-P5xCTkyvbuxg,26
|
|
111
|
+
ai_parrot-0.3.17.dist-info/RECORD,,
|
parrot/chatbots/base.py
CHANGED
|
@@ -89,6 +89,9 @@ Whether you need help with a specific question or just want to have a conversati
|
|
|
89
89
|
- OpenWeatherMap: Get weather information about a location.
|
|
90
90
|
- yahoo_finance_news: Retrieve the latest financial news from Yahoo Finance.
|
|
91
91
|
- python_repl_ast: A Python shell. Use this to execute python commands. Input should be a valid python command. When using this tool, sometimes output is abbreviated - make sure it does not look abbreviated before using it in your answer.
|
|
92
|
+
- executable_python_repl_ast: A Python shell. Use this to execute python commands. Input should be a valid python command. When using this tool, whenever you generate a visual output (like charts with matplotlib), instead of using plt.show(), render the image as a base64-encoded HTML string. Do this by saving the plot to a buffer and encoding it in base64, then return the result as a JSON object formatted as follows: "image": "format": "png", "base64": "base64-encoded-string".
|
|
93
|
+
|
|
94
|
+
|
|
92
95
|
- youtube_search: Search for videos on YouTube based on specific keywords.
|
|
93
96
|
|
|
94
97
|
|
parrot/chatbots/copilot.py
CHANGED
|
@@ -4,17 +4,43 @@ from .base import BaseAgent
|
|
|
4
4
|
from ..tools import (
|
|
5
5
|
ZipcodeAPIToolkit,
|
|
6
6
|
WikipediaTool,
|
|
7
|
-
WikidataTool,
|
|
7
|
+
# WikidataTool,
|
|
8
8
|
GoogleSearchTool,
|
|
9
9
|
GoogleLocationFinder,
|
|
10
10
|
BingSearchTool,
|
|
11
|
-
AskNewsTool,
|
|
11
|
+
# AskNewsTool,
|
|
12
12
|
DuckDuckGoSearchTool,
|
|
13
13
|
YouTubeSearchTool,
|
|
14
14
|
OpenWeatherMapTool,
|
|
15
15
|
StackExchangeTool,
|
|
16
16
|
)
|
|
17
|
+
from ..tools.execute import ExecutablePythonREPLTool
|
|
17
18
|
|
|
19
|
+
# ZipCode API Toolkit
|
|
20
|
+
zpt = ZipcodeAPIToolkit()
|
|
21
|
+
zpt_tools = zpt.get_tools()
|
|
22
|
+
|
|
23
|
+
wk1 = WikipediaTool()
|
|
24
|
+
# wk12 = WikidataTool()
|
|
25
|
+
|
|
26
|
+
g1 = GoogleSearchTool()
|
|
27
|
+
g2 = GoogleLocationFinder()
|
|
28
|
+
|
|
29
|
+
b = BingSearchTool()
|
|
30
|
+
d = DuckDuckGoSearchTool()
|
|
31
|
+
# ask = AskNewsTool()
|
|
32
|
+
|
|
33
|
+
yt = YouTubeSearchTool()
|
|
34
|
+
stackexchange = StackExchangeTool()
|
|
35
|
+
weather = OpenWeatherMapTool()
|
|
36
|
+
|
|
37
|
+
tooling = [
|
|
38
|
+
wk1,
|
|
39
|
+
g1, g2,
|
|
40
|
+
b, d, yt,
|
|
41
|
+
weather,
|
|
42
|
+
stackexchange
|
|
43
|
+
] + zpt_tools
|
|
18
44
|
|
|
19
45
|
class CopilotAgent(BaseAgent):
|
|
20
46
|
"""CopilotAgent Agent.
|
|
@@ -30,17 +56,24 @@ class CopilotAgent(BaseAgent):
|
|
|
30
56
|
**kwargs
|
|
31
57
|
):
|
|
32
58
|
super().__init__(name, llm, tools, prompt_template, **kwargs)
|
|
59
|
+
if not tools:
|
|
60
|
+
tools = tooling
|
|
33
61
|
self.tools = [
|
|
34
62
|
PythonAstREPLTool(
|
|
35
63
|
name='python_repl_ast',
|
|
36
64
|
globals={},
|
|
37
65
|
locals={}
|
|
66
|
+
),
|
|
67
|
+
ExecutablePythonREPLTool(
|
|
68
|
+
name='executable_python_repl_ast',
|
|
69
|
+
globals={},
|
|
70
|
+
locals={}
|
|
38
71
|
)
|
|
39
72
|
] + list(tools)
|
|
40
73
|
self.prompt = self.get_prompt(
|
|
41
74
|
self.prompt_template
|
|
42
75
|
)
|
|
43
|
-
|
|
76
|
+
print('PROMPT > ', self.prompt)
|
|
44
77
|
|
|
45
78
|
@classmethod
|
|
46
79
|
def default_tools(cls) -> list:
|
parrot/conf.py
CHANGED
|
@@ -12,6 +12,8 @@ logging.getLogger(name='h5py').setLevel(logging.INFO)
|
|
|
12
12
|
logging.getLogger(name='tensorflow').setLevel(logging.INFO)
|
|
13
13
|
logging.getLogger(name='selenium.webdriver').setLevel(logging.WARNING)
|
|
14
14
|
logging.getLogger(name='selenium').setLevel(logging.INFO)
|
|
15
|
+
logging.getLogger(name='matplotlib').setLevel(logging.WARNING)
|
|
16
|
+
logging.getLogger(name='PIL').setLevel(logging.INFO)
|
|
15
17
|
|
|
16
18
|
|
|
17
19
|
# Static directory
|
parrot/loaders/__init__.py
CHANGED
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
from .dir import load_directory
|
|
2
|
-
from .pdf import PDFLoader
|
|
3
|
-
from .web import WebLoader
|
|
4
|
-
from .youtube import YoutubeLoader
|
|
5
|
-
from .vimeo import VimeoLoader
|
|
6
|
-
from .word import MSWordLoader
|
|
7
|
-
from .ppt import PPTXLoader
|
|
8
|
-
from .repo import RepositoryLoader
|
|
9
|
-
from .github import GithubLoader
|
|
10
|
-
from .json import JSONLoader
|
|
11
|
-
from .excel import ExcelLoader
|
|
12
|
-
from .web_base import WebBaseLoader
|
|
13
|
-
from .pdfmark import PDFMarkdownLoader
|
|
14
|
-
from .pdfimages import PDFImageLoader
|
|
15
|
-
from .pdftables import PDFTablesLoader
|
|
16
|
-
from .pdfchapters import PDFChapterLoader
|
|
17
|
-
from .txt import TXTLoader
|
|
18
|
-
from .qa import QAFileLoader
|
|
19
|
-
from .rtd import ReadTheDocsLoader
|
|
20
|
-
from .videolocal import VideoLocalLoader
|
parrot/loaders/abstract.py
CHANGED
|
@@ -74,6 +74,7 @@ class AbstractLoader(ABC):
|
|
|
74
74
|
self,
|
|
75
75
|
tokenizer: Union[str, Callable] = None,
|
|
76
76
|
text_splitter: Union[str, Callable] = None,
|
|
77
|
+
translation: Optional[str] = None,
|
|
77
78
|
source_type: str = 'file',
|
|
78
79
|
**kwargs
|
|
79
80
|
):
|
|
@@ -114,6 +115,15 @@ class AbstractLoader(ABC):
|
|
|
114
115
|
)
|
|
115
116
|
# JSON encoder:
|
|
116
117
|
self._encoder = JSONContent()
|
|
118
|
+
# Traslation
|
|
119
|
+
self._translation = translation
|
|
120
|
+
self.translator = None
|
|
121
|
+
if self._translation:
|
|
122
|
+
mdl = kwargs.get(
|
|
123
|
+
'translation_model',
|
|
124
|
+
f"Helsinki-NLP/opus-mt-en-{self._translation}"
|
|
125
|
+
)
|
|
126
|
+
self.translator = self.get_translator(mdl)
|
|
117
127
|
|
|
118
128
|
|
|
119
129
|
def __enter__(self):
|
|
@@ -159,6 +169,27 @@ class AbstractLoader(ABC):
|
|
|
159
169
|
use_memory_efficient_attention=True,
|
|
160
170
|
).to(self._device)
|
|
161
171
|
|
|
172
|
+
def get_translator(self, model_name: str = 'Helsinki-NLP/opus-mt-en-es'):
|
|
173
|
+
if not self._translation:
|
|
174
|
+
return None
|
|
175
|
+
trans_model = AutoModelForSeq2SeqLM.from_pretrained(
|
|
176
|
+
model_name,
|
|
177
|
+
device_map="auto",
|
|
178
|
+
torch_dtype=torch.bfloat16,
|
|
179
|
+
trust_remote_code=True
|
|
180
|
+
)
|
|
181
|
+
trans_tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
182
|
+
translator = pipeline(
|
|
183
|
+
"translation",
|
|
184
|
+
model=trans_model,
|
|
185
|
+
tokenizer=trans_tokenizer,
|
|
186
|
+
batch_size=True,
|
|
187
|
+
max_new_tokens=500,
|
|
188
|
+
min_new_tokens=300,
|
|
189
|
+
use_fast=True
|
|
190
|
+
)
|
|
191
|
+
return translator
|
|
192
|
+
|
|
162
193
|
def get_summarization_model(self, model_name: str = 'facebook/bart-large-cnn'):
|
|
163
194
|
if self._no_summarization is True:
|
|
164
195
|
return None
|
|
@@ -216,7 +247,7 @@ class AbstractLoader(ABC):
|
|
|
216
247
|
return ''
|
|
217
248
|
try:
|
|
218
249
|
splitter = TokenTextSplitter(
|
|
219
|
-
chunk_size=
|
|
250
|
+
chunk_size=6144,
|
|
220
251
|
chunk_overlap=100,
|
|
221
252
|
)
|
|
222
253
|
prompt_template = """Write a summary of the following, please also identify the main theme:
|
|
@@ -454,3 +485,15 @@ class AbstractLoader(ABC):
|
|
|
454
485
|
for url in urls:
|
|
455
486
|
documents += cls.load(url, **kwargs)
|
|
456
487
|
return documents
|
|
488
|
+
|
|
489
|
+
def saving_file(self, filename: PurePath, data: Any):
|
|
490
|
+
"""Save data to a file.
|
|
491
|
+
|
|
492
|
+
Args:
|
|
493
|
+
filename (PurePath): The path to the file.
|
|
494
|
+
data (Any): The data to save.
|
|
495
|
+
"""
|
|
496
|
+
with open(filename, 'wb') as f:
|
|
497
|
+
f.write(data)
|
|
498
|
+
f.flush()
|
|
499
|
+
print(f':: Saved File on {filename}')
|
parrot/loaders/basevideo.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
from collections.abc import Callable
|
|
2
|
-
from typing import Any, Union, List
|
|
2
|
+
from typing import Any, Union, List, Optional
|
|
3
3
|
from abc import abstractmethod
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from moviepy.editor import VideoFileClip
|
|
6
|
+
from pydub import AudioSegment
|
|
6
7
|
from transformers import (
|
|
7
8
|
pipeline,
|
|
8
9
|
AutoModelForSeq2SeqLM,
|
|
@@ -193,25 +194,69 @@ class BaseVideoLoader(AbstractLoader):
|
|
|
193
194
|
print('ERROR in summarization:', e)
|
|
194
195
|
return ""
|
|
195
196
|
|
|
196
|
-
def extract_audio(
|
|
197
|
+
def extract_audio(
|
|
198
|
+
self,
|
|
199
|
+
video_path: Path,
|
|
200
|
+
audio_path: Path,
|
|
201
|
+
compress_speed: bool = False,
|
|
202
|
+
output_path: Optional[Path] = None,
|
|
203
|
+
speed_factor: float = 1.5
|
|
204
|
+
):
|
|
197
205
|
"""
|
|
198
|
-
Extracts the audio from a video file and
|
|
206
|
+
Extracts the audio from a video file and optionally compresses the audio speed.
|
|
199
207
|
|
|
200
208
|
Args:
|
|
201
209
|
video_path (str): Path to the video file.
|
|
202
210
|
audio_path (str): Path where the extracted audio file will be saved.
|
|
211
|
+
compress_speed (bool): Whether to compress the audio speed.
|
|
212
|
+
speed_factor (float): The factor by which to speed up the audio.
|
|
203
213
|
"""
|
|
214
|
+
# Ensure that the paths are valid Path objects
|
|
215
|
+
video_path = Path(video_path)
|
|
216
|
+
audio_path = Path(audio_path)
|
|
217
|
+
|
|
218
|
+
# Check if the audio file already exists
|
|
204
219
|
if audio_path.exists():
|
|
205
220
|
print(f"Audio already extracted: {audio_path}")
|
|
206
221
|
return
|
|
222
|
+
|
|
223
|
+
# Load the video and extract the audio
|
|
207
224
|
video_clip = VideoFileClip(str(video_path))
|
|
208
225
|
audio_clip = video_clip.audio
|
|
209
226
|
if not audio_clip:
|
|
227
|
+
print("No audio found in video.")
|
|
210
228
|
return
|
|
229
|
+
|
|
230
|
+
# Write the extracted audio to the specified path
|
|
231
|
+
print(f"Extracting audio to: {audio_path}")
|
|
211
232
|
audio_clip.write_audiofile(str(audio_path))
|
|
212
233
|
audio_clip.close()
|
|
213
234
|
video_clip.close()
|
|
214
235
|
|
|
236
|
+
# Optionally compress the audio speed
|
|
237
|
+
if compress_speed:
|
|
238
|
+
print(f"Compressing audio speed by factor: {speed_factor}")
|
|
239
|
+
|
|
240
|
+
# Load the audio file with pydub
|
|
241
|
+
audio = AudioSegment.from_file(audio_path)
|
|
242
|
+
|
|
243
|
+
# Adjust the playback speed by modifying the frame rate
|
|
244
|
+
sped_up_audio = audio._spawn(audio.raw_data, overrides={
|
|
245
|
+
"frame_rate": int(audio.frame_rate * speed_factor)
|
|
246
|
+
})
|
|
247
|
+
|
|
248
|
+
# Restore the original frame rate to maintain proper playback speed
|
|
249
|
+
sped_up_audio = sped_up_audio.set_frame_rate(audio.frame_rate)
|
|
250
|
+
|
|
251
|
+
# Overwrite the original file with the sped-up version
|
|
252
|
+
if not output_path:
|
|
253
|
+
output_path = audio_path
|
|
254
|
+
sped_up_audio.export(output_path, format="mp3")
|
|
255
|
+
print(f"Compressed audio saved to: {audio_path}")
|
|
256
|
+
else:
|
|
257
|
+
print(f"Audio extracted: {audio_path}")
|
|
258
|
+
|
|
259
|
+
|
|
215
260
|
def get_whisper_transcript(self, audio_path: Path, chunk_length: int = 30):
|
|
216
261
|
# Initialize the Whisper parser
|
|
217
262
|
if self._model_name == 'whisper':
|
parrot/loaders/video.py
CHANGED
parrot/loaders/videolocal.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from typing import Any
|
|
2
2
|
from collections.abc import Callable
|
|
3
|
+
import re
|
|
3
4
|
import math
|
|
4
5
|
from pathlib import PurePath
|
|
5
6
|
from langchain.docstore.document import Document
|
|
@@ -8,16 +9,35 @@ from .basevideo import BaseVideoLoader
|
|
|
8
9
|
|
|
9
10
|
def split_text(text, max_length):
|
|
10
11
|
"""Split text into chunks of a maximum length, ensuring not to break words."""
|
|
12
|
+
# Split the transcript into paragraphs
|
|
13
|
+
paragraphs = text.split('\n\n')
|
|
11
14
|
chunks = []
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
15
|
+
current_chunk = ""
|
|
16
|
+
for paragraph in paragraphs:
|
|
17
|
+
# If the paragraph is too large, split it into sentences
|
|
18
|
+
if len(paragraph) > max_length:
|
|
19
|
+
# Split paragraph into sentences
|
|
20
|
+
sentences = re.split(r'(?<=[.!?]) +', paragraph)
|
|
21
|
+
for sentence in sentences:
|
|
22
|
+
if len(current_chunk) + len(sentence) + 1 > max_length:
|
|
23
|
+
# Save the current chunk and start a new one
|
|
24
|
+
chunks.append(current_chunk.strip())
|
|
25
|
+
current_chunk = sentence
|
|
26
|
+
else:
|
|
27
|
+
# Add sentence to the current chunk
|
|
28
|
+
current_chunk += " " + sentence
|
|
29
|
+
else:
|
|
30
|
+
# If adding the paragraph exceeds max size, start a new chunk
|
|
31
|
+
if len(current_chunk) + len(paragraph) + 2 > max_length:
|
|
32
|
+
chunks.append(current_chunk.strip())
|
|
33
|
+
current_chunk = paragraph
|
|
34
|
+
else:
|
|
35
|
+
# Add paragraph to the current chunk
|
|
36
|
+
current_chunk += "\n\n" + paragraph
|
|
37
|
+
# Add any remaining text to chunks
|
|
38
|
+
if current_chunk.strip():
|
|
39
|
+
chunks.append(current_chunk.strip())
|
|
40
|
+
|
|
21
41
|
return chunks
|
|
22
42
|
|
|
23
43
|
|
|
@@ -37,15 +57,23 @@ class VideoLocalLoader(BaseVideoLoader):
|
|
|
37
57
|
origin: str = '',
|
|
38
58
|
**kwargs
|
|
39
59
|
):
|
|
40
|
-
super().__init__(
|
|
60
|
+
super().__init__(
|
|
61
|
+
tokenizer,
|
|
62
|
+
text_splitter,
|
|
63
|
+
source_type=source_type,
|
|
64
|
+
**kwargs
|
|
65
|
+
)
|
|
66
|
+
self.extract_frames: bool = kwargs.pop('extract_frames', False)
|
|
67
|
+
self.seconds_per_frame: int = kwargs.pop('seconds_per_frame', 1)
|
|
68
|
+
self.compress_speed: bool = kwargs.pop('compress_speed', False)
|
|
69
|
+
self.speed_factor: float = kwargs.pop('speed_factor', 1.5)
|
|
41
70
|
self.path = path
|
|
42
71
|
|
|
43
72
|
def load_video(self, path: PurePath) -> list:
|
|
44
73
|
metadata = {
|
|
45
|
-
"url": f"{path
|
|
74
|
+
"url": f"{path}",
|
|
46
75
|
"source": f"{path}",
|
|
47
|
-
"filename": f"{path}",
|
|
48
|
-
# "index": path.stem,
|
|
76
|
+
"filename": f"{path.name}",
|
|
49
77
|
"question": '',
|
|
50
78
|
"answer": '',
|
|
51
79
|
'type': 'video_transcript',
|
|
@@ -58,10 +86,17 @@ class VideoLocalLoader(BaseVideoLoader):
|
|
|
58
86
|
}
|
|
59
87
|
}
|
|
60
88
|
documents = []
|
|
61
|
-
transcript_path = path.with_suffix('.
|
|
89
|
+
transcript_path = path.with_suffix('.txt')
|
|
90
|
+
vtt_path = path.with_suffix('.vtt')
|
|
91
|
+
summary_path = path.with_suffix('.summary')
|
|
62
92
|
audio_path = path.with_suffix('.mp3')
|
|
63
93
|
# second: extract audio from File
|
|
64
|
-
self.extract_audio(
|
|
94
|
+
self.extract_audio(
|
|
95
|
+
path,
|
|
96
|
+
audio_path,
|
|
97
|
+
compress_speed=self.compress_speed,
|
|
98
|
+
speed_factor=self.speed_factor
|
|
99
|
+
)
|
|
65
100
|
# get the Whisper parser
|
|
66
101
|
transcript_whisper = self.get_whisper_transcript(audio_path)
|
|
67
102
|
if transcript_whisper:
|
|
@@ -70,35 +105,71 @@ class VideoLocalLoader(BaseVideoLoader):
|
|
|
70
105
|
transcript = ''
|
|
71
106
|
# Summarize the transcript
|
|
72
107
|
if transcript:
|
|
73
|
-
#
|
|
74
|
-
transcript_chunks = split_text(transcript, 32767)
|
|
108
|
+
# first: extract summary, saving summary as a document:
|
|
75
109
|
summary = self.get_summary_from_text(transcript)
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
110
|
+
self.saving_file(summary_path, summary.encode('utf-8'))
|
|
111
|
+
# second: saving transcript to a file:
|
|
112
|
+
self.saving_file(transcript_path, transcript.encode('utf-8'))
|
|
113
|
+
# Create Three Documents:
|
|
114
|
+
# one is for transcript
|
|
115
|
+
# split document only if size > 65.534
|
|
116
|
+
if len(transcript) > 65534:
|
|
117
|
+
# Split transcript into chunks
|
|
118
|
+
transcript_chunks = split_text(transcript, 32767)
|
|
119
|
+
for chunk in transcript_chunks:
|
|
120
|
+
doc = Document(
|
|
121
|
+
page_content=chunk,
|
|
122
|
+
metadata=metadata
|
|
123
|
+
)
|
|
124
|
+
documents.append(doc)
|
|
125
|
+
else:
|
|
79
126
|
doc = Document(
|
|
80
|
-
page_content=
|
|
127
|
+
page_content=transcript,
|
|
81
128
|
metadata=metadata
|
|
82
129
|
)
|
|
83
130
|
documents.append(doc)
|
|
131
|
+
# second is Summary
|
|
132
|
+
if summary:
|
|
133
|
+
_meta = {
|
|
134
|
+
**metadata,
|
|
135
|
+
"type": 'video summary'
|
|
136
|
+
}
|
|
137
|
+
doc = Document(
|
|
138
|
+
page_content=summary,
|
|
139
|
+
metadata=_meta
|
|
140
|
+
)
|
|
141
|
+
# Third is VTT:
|
|
84
142
|
if transcript_whisper:
|
|
85
143
|
# VTT version:
|
|
86
|
-
transcript = self.transcript_to_vtt(transcript_whisper,
|
|
87
|
-
|
|
88
|
-
|
|
144
|
+
transcript = self.transcript_to_vtt(transcript_whisper, vtt_path)
|
|
145
|
+
_meta = {
|
|
146
|
+
**metadata,
|
|
147
|
+
"type": 'video subte vtt'
|
|
148
|
+
}
|
|
149
|
+
if len(transcript) > 65535:
|
|
150
|
+
transcript_chunks = split_text(transcript, 65535)
|
|
151
|
+
for chunk in transcript_chunks:
|
|
152
|
+
doc = Document(
|
|
153
|
+
page_content=chunk,
|
|
154
|
+
metadata=_meta
|
|
155
|
+
)
|
|
156
|
+
documents.append(doc)
|
|
157
|
+
else:
|
|
89
158
|
doc = Document(
|
|
90
|
-
page_content=
|
|
91
|
-
metadata=
|
|
159
|
+
page_content=transcript,
|
|
160
|
+
metadata=_meta
|
|
92
161
|
)
|
|
93
162
|
documents.append(doc)
|
|
94
163
|
# Saving every dialog chunk as a separate document
|
|
95
164
|
dialogs = self.transcript_to_blocks(transcript_whisper)
|
|
96
165
|
docs = []
|
|
97
166
|
for chunk in dialogs:
|
|
167
|
+
start_time = chunk['start_time']
|
|
98
168
|
_meta = {
|
|
99
|
-
|
|
169
|
+
"source": f"{path.name}: min. {start_time}",
|
|
170
|
+
"type": "video dialog",
|
|
100
171
|
"document_meta": {
|
|
101
|
-
"start": f"{
|
|
172
|
+
"start": f"{start_time}",
|
|
102
173
|
"end": f"{chunk['end_time']}",
|
|
103
174
|
"id": f"{chunk['id']}",
|
|
104
175
|
"language": self._language,
|
|
@@ -128,15 +199,61 @@ class VideoLocalLoader(BaseVideoLoader):
|
|
|
128
199
|
documents.extend(self.load_video(item))
|
|
129
200
|
return self.split_documents(documents)
|
|
130
201
|
|
|
202
|
+
def extract_video(self, path: PurePath) -> list:
|
|
203
|
+
metadata = {
|
|
204
|
+
"url": f"{path}",
|
|
205
|
+
"source": f"{path}",
|
|
206
|
+
"filename": f"{path.name}",
|
|
207
|
+
'type': 'video_transcript',
|
|
208
|
+
"source_type": self._source_type,
|
|
209
|
+
"transcript": None,
|
|
210
|
+
"summary": None,
|
|
211
|
+
"vtt": None
|
|
212
|
+
}
|
|
213
|
+
transcript_path = path.with_suffix('.txt')
|
|
214
|
+
vtt_path = path.with_suffix('.vtt')
|
|
215
|
+
summary_path = path.with_suffix('.summary')
|
|
216
|
+
audio_path = path.with_suffix('.mp3')
|
|
217
|
+
# second: extract audio from File
|
|
218
|
+
self.extract_audio(
|
|
219
|
+
path,
|
|
220
|
+
audio_path,
|
|
221
|
+
compress_speed=self.compress_speed,
|
|
222
|
+
speed_factor=self.speed_factor
|
|
223
|
+
)
|
|
224
|
+
# get the Whisper parser
|
|
225
|
+
transcript_whisper = self.get_whisper_transcript(audio_path)
|
|
226
|
+
if transcript_whisper:
|
|
227
|
+
transcript = transcript_whisper['text']
|
|
228
|
+
else:
|
|
229
|
+
transcript = ''
|
|
230
|
+
# Summarize the transcript
|
|
231
|
+
if transcript:
|
|
232
|
+
# first: extract summary, saving summary as a document:
|
|
233
|
+
summary = self.get_summary_from_text(transcript)
|
|
234
|
+
self.saving_file(summary_path, summary.encode('utf-8'))
|
|
235
|
+
# second: saving transcript to a file:
|
|
236
|
+
self.saving_file(transcript_path, transcript.encode('utf-8'))
|
|
237
|
+
metadata['transcript'] = transcript_path
|
|
238
|
+
metadata["summary"] = summary
|
|
239
|
+
metadata['summary_file'] = summary_path
|
|
240
|
+
metadata["vtt"] = vtt_path
|
|
241
|
+
# Third is VTT:
|
|
242
|
+
if transcript_whisper:
|
|
243
|
+
# VTT version:
|
|
244
|
+
transcript = self.transcript_to_vtt(transcript_whisper, vtt_path)
|
|
245
|
+
return metadata
|
|
246
|
+
|
|
131
247
|
def extract(self) -> list:
|
|
248
|
+
# Adding also Translation to other language.
|
|
132
249
|
documents = []
|
|
133
250
|
if self.path.is_file():
|
|
134
|
-
|
|
135
|
-
documents.
|
|
136
|
-
|
|
251
|
+
doc = self.extract_video(self.path)
|
|
252
|
+
documents.append(doc)
|
|
253
|
+
elif self.path.is_dir():
|
|
137
254
|
# iterate over the files in the directory
|
|
138
255
|
for ext in self._extension:
|
|
139
256
|
for item in self.path.glob(f'*{ext}'):
|
|
140
257
|
if set(item.parts).isdisjoint(self.skip_directories):
|
|
141
|
-
documents.
|
|
258
|
+
documents.append(self.extract_video(item))
|
|
142
259
|
return documents
|
parrot/loaders/youtube.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from typing import Optional, Union
|
|
2
2
|
from pytube import YouTube
|
|
3
3
|
from youtube_transcript_api import NoTranscriptFound
|
|
4
|
-
import torch
|
|
5
4
|
from langchain.docstore.document import Document
|
|
6
5
|
from langchain_community.document_loaders.parsers.audio import (
|
|
7
6
|
OpenAIWhisperParserLocal
|
|
@@ -190,3 +189,53 @@ class YoutubeLoader(VideoLoader):
|
|
|
190
189
|
metadata=metadata
|
|
191
190
|
)
|
|
192
191
|
return [doc]
|
|
192
|
+
|
|
193
|
+
def extract_video(
|
|
194
|
+
self,
|
|
195
|
+
url: str
|
|
196
|
+
) -> list:
|
|
197
|
+
# first: load video metadata:
|
|
198
|
+
video_info = self.get_video_info(url)
|
|
199
|
+
# first: download video
|
|
200
|
+
file_path = self.download_video(url, self._video_path)
|
|
201
|
+
audio_path = file_path.with_suffix('.mp3')
|
|
202
|
+
transcript_path = file_path.with_suffix('.txt')
|
|
203
|
+
vtt_path = file_path.with_suffix('.vtt')
|
|
204
|
+
summary_path = file_path.with_suffix('.summary')
|
|
205
|
+
# second: extract audio
|
|
206
|
+
self.extract_audio(file_path, audio_path)
|
|
207
|
+
transcript_whisper = self.get_whisper_transcript(audio_path)
|
|
208
|
+
transcript = transcript_whisper['text']
|
|
209
|
+
# Summarize the transcript
|
|
210
|
+
try:
|
|
211
|
+
summary = self.get_summary_from_text(transcript)
|
|
212
|
+
self.saving_file(summary_path, summary.encode('utf-8'))
|
|
213
|
+
except Exception:
|
|
214
|
+
summary = ''
|
|
215
|
+
# Create Meta of Video Document
|
|
216
|
+
metadata = {
|
|
217
|
+
"url": f"{url}",
|
|
218
|
+
"source": f"{url}",
|
|
219
|
+
"source_type": self._source_type,
|
|
220
|
+
'type': 'video_transcript',
|
|
221
|
+
"summary": f"{summary!s}",
|
|
222
|
+
"video_info": video_info
|
|
223
|
+
}
|
|
224
|
+
# VTT version:
|
|
225
|
+
transcript = self.transcript_to_vtt(transcript_whisper, vtt_path)
|
|
226
|
+
# second: saving transcript to a file:
|
|
227
|
+
self.saving_file(transcript_path, transcript.encode('utf-8'))
|
|
228
|
+
metadata['transcript'] = transcript_path
|
|
229
|
+
metadata["summary"] = summary
|
|
230
|
+
metadata['summary_file'] = summary_path
|
|
231
|
+
metadata["vtt"] = vtt_path
|
|
232
|
+
metadata['audio'] = audio_path
|
|
233
|
+
return metadata
|
|
234
|
+
|
|
235
|
+
def extract(self) -> list:
|
|
236
|
+
# Adding also Translation to other language.
|
|
237
|
+
documents = []
|
|
238
|
+
for url in self.urls:
|
|
239
|
+
doc = self.extract_video(url)
|
|
240
|
+
documents.append(doc)
|
|
241
|
+
return documents
|
parrot/tools/execute.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Executable Python REPL Tool.
|
|
3
|
+
"""
|
|
4
|
+
import io
|
|
5
|
+
import base64
|
|
6
|
+
import json
|
|
7
|
+
import matplotlib.pyplot as plt
|
|
8
|
+
from langchain_experimental.tools.python.tool import PythonAstREPLTool
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ExecutablePythonREPLTool(PythonAstREPLTool):
|
|
12
|
+
"""
|
|
13
|
+
Executable Python REPL Tool.
|
|
14
|
+
"""
|
|
15
|
+
def execute_code(self, code: str) -> str:
|
|
16
|
+
"""
|
|
17
|
+
Execute the provided Python code and return the output.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
code (str): The Python code to execute.
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
str: The output of the executed code.
|
|
24
|
+
"""
|
|
25
|
+
try:
|
|
26
|
+
# Set up a namespace for execution
|
|
27
|
+
namespace = {}
|
|
28
|
+
exec(code, namespace)
|
|
29
|
+
|
|
30
|
+
# Check if a plot was created
|
|
31
|
+
if 'plt' in namespace:
|
|
32
|
+
buf = io.BytesIO()
|
|
33
|
+
plt.savefig(buf, format='png')
|
|
34
|
+
plt.close()
|
|
35
|
+
buf.seek(0)
|
|
36
|
+
# Encode the image in base64
|
|
37
|
+
# Encode the image in base64
|
|
38
|
+
img_str = base64.b64encode(buf.read()).decode('utf-8')
|
|
39
|
+
|
|
40
|
+
# Prepare the JSON output
|
|
41
|
+
result = {
|
|
42
|
+
"image": {
|
|
43
|
+
"format": "png",
|
|
44
|
+
"base64": img_str
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
# Return both the code and the JSON result
|
|
48
|
+
return f"**Code Executed**:\n```python\n{code}\n```\n\n**Result**:\n{json.dumps(result)}"
|
|
49
|
+
else:
|
|
50
|
+
return f"**Code Executed**:\n```python\n{code}\n```\n\n"
|
|
51
|
+
|
|
52
|
+
except Exception as e:
|
|
53
|
+
return f"Error executing code: {e}"
|
|
54
|
+
|
|
55
|
+
def __call__(self, code: str) -> str:
|
|
56
|
+
return self.execute_code(code)
|
parrot/version.py
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
__title__ = "ai-parrot"
|
|
4
4
|
__description__ = "Live Chatbots based on Langchain chatbots and Agents \
|
|
5
5
|
Integrated into Navigator Framework or used into aiohttp applications."
|
|
6
|
-
__version__ = "0.3.
|
|
6
|
+
__version__ = "0.3.17"
|
|
7
7
|
__author__ = "Jesus Lara"
|
|
8
8
|
__author_email__ = "jesuslarag@gmail.com"
|
|
9
9
|
__license__ = "MIT"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|