ai-parrot 0.3.8__tar.gz → 0.3.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ai-parrot might be problematic. Click here for more details.
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/PKG-INFO +4 -3
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/ai_parrot.egg-info/PKG-INFO +4 -3
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/ai_parrot.egg-info/requires.txt +3 -2
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/pdf.py +205 -124
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/version.py +1 -1
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/setup.py +4 -3
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/.flake8 +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/.github/dependabot.yml +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/.github/workflows/codeql-analysis.yml +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/.github/workflows/release.yml +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/.gitignore +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/.isort.cfg +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/.pylintrc +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/INSTALL +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/LICENSE +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/Makefile +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/README.md +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/SECURITY.md +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/ai_parrot.egg-info/SOURCES.txt +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/ai_parrot.egg-info/dependency_links.txt +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/ai_parrot.egg-info/top_level.txt +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/app.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/documents/AR_Certification_Skill_Practice_Scorecard_EXAMPLE.pdf +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/documents/Day 1_Essentials_AR_PPT.pdf +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/documents/ex-code-loaders.txt +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/documents/video_2024-09-11_19-43-58.mp3 +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/documents/video_2024-09-11_19-43-58.mp4 +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/documents/video_2024-09-11_19-43-58.vtt +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/etc/navigator-ssl.ini +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/etc/navigator.ini +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/etc/ssl/domain.ext +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/etc/ssl/navigator.local.crt +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/etc/ssl/navigator.local.csr +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/etc/ssl/navigator.local.key +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/etc/ssl/rootCA.crt +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/etc/ssl/rootCA.key +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/etc/ssl/rootCA.srl +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/examples/analyze_video.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/examples/askbuddy/create_bot.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/examples/check_bot.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/examples/extract_frames.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/examples/load_pdf.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/examples/test_bot.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/examples/test_question.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/mypy.ini +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/__init__.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/chatbots/__init__.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/chatbots/abstract.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/chatbots/asktroc.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/chatbots/base.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/chatbots/basic.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/chatbots/bose.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/chatbots/cody.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/chatbots/copilot.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/chatbots/dataframe.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/chatbots/hragents.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/chatbots/odoo.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/chatbots/retrievals/__init__.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/chatbots/retrievals/constitutional.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/conf.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/crew/__init__.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/crew/tools/__init__.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/crew/tools/bing.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/crew/tools/config.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/crew/tools/duckgo.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/crew/tools/file.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/crew/tools/google.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/crew/tools/gtrends.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/crew/tools/md2pdf.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/crew/tools/rag.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/crew/tools/search.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/crew/tools/url.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/exceptions.c +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/exceptions.pyx +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/handlers/__init__.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/handlers/bots.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/handlers/chat.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/interfaces/__init__.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/interfaces/database.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/llms/__init__.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/llms/abstract.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/llms/anthropic.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/llms/google.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/llms/groq.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/llms/hf.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/llms/openai.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/llms/pipes.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/llms/vertex.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/__init__.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/abstract.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/audio.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/basepdf.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/basevideo.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/csv.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/dir.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/excel.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/github.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/handlers/__init__.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/handlers/data.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/image.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/json.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/pdfchapters.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/pdffn.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/pdfimages.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/pdfmark.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/pdftables.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/ppt.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/qa.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/repo.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/rtd.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/txt.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/utils/__init__.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/utils/models.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/video.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/videolocal.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/vimeo.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/web.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/web_base.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/word.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/loaders/youtube.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/manager.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/models.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/py.typed +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/stores/__init__.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/stores/abstract.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/stores/milvus.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/stores/qdrant.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/tools/__init__.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/tools/abstract.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/tools/asknews.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/tools/bing.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/tools/duck.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/tools/google.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/tools/stack.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/tools/weather.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/tools/wikipedia.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/tools/zipcode.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/utils/__init__.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/utils/parsers/__init__.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/utils/parsers/toml.c +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/utils/parsers/toml.pyx +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/utils/toml.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/utils/types.cpp +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/utils/types.pyx +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/parrot/utils/uv.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/pyproject.toml +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/pytest.ini +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/requirements/requirements-dev.txt +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/resources/__init__.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/resources/quick.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/resources/users/__init__.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/resources/users/handlers.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/resources/users/models.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/run.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/settings/__init__.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/settings/settings.py +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/setup.cfg +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/templates/.compiled +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/templates/README.md +0 -0
- {ai_parrot-0.3.8 → ai_parrot-0.3.10}/tox.ini +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ai-parrot
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.10
|
|
4
4
|
Summary: Live Chatbots based on Langchain chatbots and Agents Integrated into Navigator Framework or used into aiohttp applications.
|
|
5
5
|
Home-page: https://github.com/phenobarbital/ai-parrot
|
|
6
6
|
Author: Jesus Lara
|
|
@@ -78,6 +78,8 @@ Requires-Dist: O365==2.0.35
|
|
|
78
78
|
Requires-Dist: stackapi==0.3.1
|
|
79
79
|
Requires-Dist: torchvision==0.19.1
|
|
80
80
|
Requires-Dist: tf-keras==2.17.0
|
|
81
|
+
Requires-Dist: simsimd==4.3.1
|
|
82
|
+
Requires-Dist: opencv-python==4.10.0.84
|
|
81
83
|
Provides-Extra: loaders
|
|
82
84
|
Requires-Dist: unstructured==0.14.3; extra == "loaders"
|
|
83
85
|
Requires-Dist: unstructured-client==0.18.0; extra == "loaders"
|
|
@@ -108,6 +110,7 @@ Requires-Dist: ftfy==6.2.3; extra == "loaders"
|
|
|
108
110
|
Requires-Dist: librosa==0.10.1; extra == "loaders"
|
|
109
111
|
Requires-Dist: XlsxWriter==3.2.0; extra == "loaders"
|
|
110
112
|
Requires-Dist: timm==1.0.9; extra == "loaders"
|
|
113
|
+
Requires-Dist: easyocr==1.7.1; extra == "loaders"
|
|
111
114
|
Provides-Extra: anthropic
|
|
112
115
|
Requires-Dist: langchain-anthropic==0.1.11; extra == "anthropic"
|
|
113
116
|
Requires-Dist: anthropic==0.25.2; extra == "anthropic"
|
|
@@ -140,8 +143,6 @@ Requires-Dist: annoy==1.17.3; extra == "analytics"
|
|
|
140
143
|
Requires-Dist: gradio_tools==0.0.9; extra == "analytics"
|
|
141
144
|
Requires-Dist: gradio-client==0.2.9; extra == "analytics"
|
|
142
145
|
Requires-Dist: streamlit==1.37.1; extra == "analytics"
|
|
143
|
-
Requires-Dist: simsimd==4.3.1; extra == "analytics"
|
|
144
|
-
Requires-Dist: opencv-python==4.10.0.84; extra == "analytics"
|
|
145
146
|
|
|
146
147
|
# AI Parrot: Python package for creating Chatbots
|
|
147
148
|
This is an open-source Python package for creating Chatbots based on Langchain and Navigator.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ai-parrot
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.10
|
|
4
4
|
Summary: Live Chatbots based on Langchain chatbots and Agents Integrated into Navigator Framework or used into aiohttp applications.
|
|
5
5
|
Home-page: https://github.com/phenobarbital/ai-parrot
|
|
6
6
|
Author: Jesus Lara
|
|
@@ -78,6 +78,8 @@ Requires-Dist: O365==2.0.35
|
|
|
78
78
|
Requires-Dist: stackapi==0.3.1
|
|
79
79
|
Requires-Dist: torchvision==0.19.1
|
|
80
80
|
Requires-Dist: tf-keras==2.17.0
|
|
81
|
+
Requires-Dist: simsimd==4.3.1
|
|
82
|
+
Requires-Dist: opencv-python==4.10.0.84
|
|
81
83
|
Provides-Extra: loaders
|
|
82
84
|
Requires-Dist: unstructured==0.14.3; extra == "loaders"
|
|
83
85
|
Requires-Dist: unstructured-client==0.18.0; extra == "loaders"
|
|
@@ -108,6 +110,7 @@ Requires-Dist: ftfy==6.2.3; extra == "loaders"
|
|
|
108
110
|
Requires-Dist: librosa==0.10.1; extra == "loaders"
|
|
109
111
|
Requires-Dist: XlsxWriter==3.2.0; extra == "loaders"
|
|
110
112
|
Requires-Dist: timm==1.0.9; extra == "loaders"
|
|
113
|
+
Requires-Dist: easyocr==1.7.1; extra == "loaders"
|
|
111
114
|
Provides-Extra: anthropic
|
|
112
115
|
Requires-Dist: langchain-anthropic==0.1.11; extra == "anthropic"
|
|
113
116
|
Requires-Dist: anthropic==0.25.2; extra == "anthropic"
|
|
@@ -140,8 +143,6 @@ Requires-Dist: annoy==1.17.3; extra == "analytics"
|
|
|
140
143
|
Requires-Dist: gradio_tools==0.0.9; extra == "analytics"
|
|
141
144
|
Requires-Dist: gradio-client==0.2.9; extra == "analytics"
|
|
142
145
|
Requires-Dist: streamlit==1.37.1; extra == "analytics"
|
|
143
|
-
Requires-Dist: simsimd==4.3.1; extra == "analytics"
|
|
144
|
-
Requires-Dist: opencv-python==4.10.0.84; extra == "analytics"
|
|
145
146
|
|
|
146
147
|
# AI Parrot: Python package for creating Chatbots
|
|
147
148
|
This is an open-source Python package for creating Chatbots based on Langchain and Navigator.
|
|
@@ -47,14 +47,14 @@ O365==2.0.35
|
|
|
47
47
|
stackapi==0.3.1
|
|
48
48
|
torchvision==0.19.1
|
|
49
49
|
tf-keras==2.17.0
|
|
50
|
+
simsimd==4.3.1
|
|
51
|
+
opencv-python==4.10.0.84
|
|
50
52
|
|
|
51
53
|
[analytics]
|
|
52
54
|
annoy==1.17.3
|
|
53
55
|
gradio_tools==0.0.9
|
|
54
56
|
gradio-client==0.2.9
|
|
55
57
|
streamlit==1.37.1
|
|
56
|
-
simsimd==4.3.1
|
|
57
|
-
opencv-python==4.10.0.84
|
|
58
58
|
|
|
59
59
|
[anthropic]
|
|
60
60
|
langchain-anthropic==0.1.11
|
|
@@ -107,6 +107,7 @@ ftfy==6.2.3
|
|
|
107
107
|
librosa==0.10.1
|
|
108
108
|
XlsxWriter==3.2.0
|
|
109
109
|
timm==1.0.9
|
|
110
|
+
easyocr==1.7.1
|
|
110
111
|
|
|
111
112
|
[milvus]
|
|
112
113
|
langchain-milvus>=0.1.4
|
|
@@ -6,6 +6,7 @@ import re
|
|
|
6
6
|
import ftfy
|
|
7
7
|
import fitz
|
|
8
8
|
import pytesseract
|
|
9
|
+
from pytesseract import Output
|
|
9
10
|
from paddleocr import PaddleOCR
|
|
10
11
|
import torch
|
|
11
12
|
import cv2
|
|
@@ -15,16 +16,38 @@ from transformers import (
|
|
|
15
16
|
# VisionEncoderDecoderConfig,
|
|
16
17
|
# ViTImageProcessor,
|
|
17
18
|
# AutoTokenizer,
|
|
19
|
+
LayoutLMv3FeatureExtractor,
|
|
20
|
+
LayoutLMv3TokenizerFast,
|
|
18
21
|
LayoutLMv3ForTokenClassification,
|
|
19
22
|
LayoutLMv3Processor
|
|
20
23
|
)
|
|
21
24
|
from pdf4llm import to_markdown
|
|
22
25
|
from PIL import Image
|
|
23
26
|
from langchain.docstore.document import Document
|
|
24
|
-
from navconfig import
|
|
27
|
+
from navconfig.logging import logging
|
|
25
28
|
from .basepdf import BasePDF
|
|
26
29
|
|
|
27
30
|
|
|
31
|
+
logging.getLogger(name='ppocr').setLevel(logging.INFO)
|
|
32
|
+
|
|
33
|
+
# Function to rescale bounding boxes
|
|
34
|
+
def rescale_bounding_boxes(bboxes, image_width, image_height, target_size=1000):
|
|
35
|
+
"""Rescale bounding boxes to fit within the target size for LayoutLMv3."""
|
|
36
|
+
rescaled_bboxes = []
|
|
37
|
+
for bbox in bboxes:
|
|
38
|
+
x1, y1 = bbox[0]
|
|
39
|
+
x2, y2 = bbox[2]
|
|
40
|
+
# Rescale based on the image dimensions
|
|
41
|
+
rescaled_bbox = [
|
|
42
|
+
int(x1 / image_width * target_size),
|
|
43
|
+
int(y1 / image_height * target_size),
|
|
44
|
+
int(x2 / image_width * target_size),
|
|
45
|
+
int(y2 / image_height * target_size)
|
|
46
|
+
]
|
|
47
|
+
rescaled_bboxes.append(rescaled_bbox)
|
|
48
|
+
return rescaled_bboxes
|
|
49
|
+
|
|
50
|
+
|
|
28
51
|
class PDFLoader(BasePDF):
|
|
29
52
|
"""
|
|
30
53
|
Loader for PDF files.
|
|
@@ -50,13 +73,22 @@ class PDFLoader(BasePDF):
|
|
|
50
73
|
self.page_as_images = kwargs.get('page_as_images', False)
|
|
51
74
|
if self.page_as_images is True:
|
|
52
75
|
# Load the processor and model from Hugging Face
|
|
76
|
+
# self.feature_extractor = LayoutLMv3FeatureExtractor(apply_ocr=False)
|
|
77
|
+
# self.image_tokenizer = LayoutLMv3TokenizerFast.from_pretrained(
|
|
78
|
+
# "microsoft/layoutlmv3-base"
|
|
79
|
+
# )
|
|
80
|
+
# self.image_processor = LayoutLMv3Processor(
|
|
81
|
+
# self.feature_extractor,
|
|
82
|
+
# self.image_tokenizer
|
|
83
|
+
# )
|
|
53
84
|
self.image_processor = LayoutLMv3Processor.from_pretrained(
|
|
54
85
|
"microsoft/layoutlmv3-base",
|
|
55
|
-
apply_ocr=
|
|
86
|
+
apply_ocr=False
|
|
56
87
|
)
|
|
88
|
+
# LayoutLMv3ForSequenceClassification.from_pretrained
|
|
57
89
|
self.image_model = LayoutLMv3ForTokenClassification.from_pretrained(
|
|
58
|
-
|
|
59
|
-
"HYPJUDY/layoutlmv3-base-finetuned-funsd"
|
|
90
|
+
"microsoft/layoutlmv3-base"
|
|
91
|
+
# "HYPJUDY/layoutlmv3-base-finetuned-funsd"
|
|
60
92
|
)
|
|
61
93
|
# Set device to GPU if available
|
|
62
94
|
self.image_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
@@ -73,63 +105,63 @@ class PDFLoader(BasePDF):
|
|
|
73
105
|
if table_settings:
|
|
74
106
|
self.table_settings.update(table_settings)
|
|
75
107
|
|
|
76
|
-
def explain_image(self, image_path):
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
108
|
+
# def explain_image(self, image_path):
|
|
109
|
+
# """Function to explain the image."""
|
|
110
|
+
# # with open(image_path, "rb") as image_file:
|
|
111
|
+
# # image_content = image_file.read()
|
|
112
|
+
|
|
113
|
+
# # Open the image
|
|
114
|
+
# image = cv2.imread(image_path)
|
|
115
|
+
# task_prompt = "<s_docvqa><s_question>{user_input}</s_question><s_answer>"
|
|
116
|
+
# question = "Extract Questions about Happily Greet"
|
|
117
|
+
# prompt = task_prompt.replace("{user_input}", question)
|
|
118
|
+
|
|
119
|
+
# decoder_input_ids = self.image_processor.tokenizer(
|
|
120
|
+
# prompt,
|
|
121
|
+
# add_special_tokens=False,
|
|
122
|
+
# return_tensors="pt",
|
|
123
|
+
# ).input_ids
|
|
124
|
+
|
|
125
|
+
# pixel_values = self.image_processor(
|
|
126
|
+
# image,
|
|
127
|
+
# return_tensors="pt"
|
|
128
|
+
# ).pixel_values
|
|
129
|
+
|
|
130
|
+
# # Send inputs to the appropriate device
|
|
131
|
+
# pixel_values = pixel_values.to(self.image_device)
|
|
132
|
+
# decoder_input_ids = decoder_input_ids.to(self.image_device)
|
|
133
|
+
|
|
134
|
+
# outputs = self.image_model.generate(
|
|
135
|
+
# pixel_values,
|
|
136
|
+
# decoder_input_ids=decoder_input_ids,
|
|
137
|
+
# max_length=self.image_model.decoder.config.max_position_embeddings,
|
|
138
|
+
# pad_token_id=self.image_processor.tokenizer.pad_token_id,
|
|
139
|
+
# eos_token_id=self.image_processor.tokenizer.eos_token_id,
|
|
140
|
+
# bad_words_ids=[[self.image_processor.tokenizer.unk_token_id]],
|
|
141
|
+
# # use_cache=True
|
|
142
|
+
# return_dict_in_generate=True,
|
|
143
|
+
# )
|
|
144
|
+
|
|
145
|
+
# sequence = self.image_processor.batch_decode(outputs.sequences)[0]
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# sequence = sequence.replace(
|
|
149
|
+
# self.image_processor.tokenizer.eos_token, ""
|
|
150
|
+
# ).replace(
|
|
151
|
+
# self.image_processor.tokenizer.pad_token, ""
|
|
152
|
+
# )
|
|
153
|
+
# # remove first task start token
|
|
154
|
+
# sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()
|
|
155
|
+
# # Print the extracted sequence
|
|
156
|
+
# print("Extracted Text:", sequence)
|
|
157
|
+
|
|
158
|
+
# print(self.image_processor.token2json(sequence))
|
|
159
|
+
|
|
160
|
+
# # Format the output as Markdown (optional step)
|
|
161
|
+
# markdown_text = self.format_as_markdown(sequence)
|
|
162
|
+
# print("Markdown Format:\n", markdown_text)
|
|
163
|
+
|
|
164
|
+
# return None
|
|
133
165
|
|
|
134
166
|
def convert_to_markdown(self, text):
|
|
135
167
|
"""
|
|
@@ -141,7 +173,7 @@ class PDFLoader(BasePDF):
|
|
|
141
173
|
# Detect headings and bold them
|
|
142
174
|
markdown_text = re.sub(r"(^.*Scorecard.*$)", r"## \1", markdown_text)
|
|
143
175
|
# Convert lines with ":" to a list item (rough approach)
|
|
144
|
-
markdown_text = re.sub(r"(\w+):", r"- **\1**:", markdown_text)
|
|
176
|
+
# markdown_text = re.sub(r"(\w+):", r"- **\1**:", markdown_text)
|
|
145
177
|
# Return the markdown formatted text
|
|
146
178
|
return markdown_text
|
|
147
179
|
|
|
@@ -164,40 +196,77 @@ class PDFLoader(BasePDF):
|
|
|
164
196
|
|
|
165
197
|
return cleaned_text.strip()
|
|
166
198
|
|
|
167
|
-
def
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
encoding = {k: v.to(self.image_device) for k, v in encoding.items()}
|
|
199
|
+
def create_bounding_box(self, bbox_data):
|
|
200
|
+
xs = []
|
|
201
|
+
ys = []
|
|
202
|
+
for x, y in bbox_data:
|
|
203
|
+
xs.append(x)
|
|
204
|
+
ys.append(y)
|
|
174
205
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
206
|
+
left = int(min(xs))
|
|
207
|
+
top = int(min(ys))
|
|
208
|
+
right = int(max(xs))
|
|
209
|
+
bottom = int(max(ys))
|
|
178
210
|
|
|
179
|
-
|
|
180
|
-
predictions = logits.argmax(-1).squeeze().tolist()
|
|
181
|
-
labels = [self.image_model.config.id2label[pred] for pred in predictions]
|
|
211
|
+
return [left, top, right, bottom]
|
|
182
212
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
213
|
+
def extract_page_text(self, image_path) -> str:
|
|
214
|
+
# Open the image
|
|
215
|
+
image = Image.open(image_path).convert("RGB")
|
|
216
|
+
image_width, image_height = image.size
|
|
217
|
+
|
|
218
|
+
# Initialize PaddleOCR with English language
|
|
219
|
+
ocr = PaddleOCR(use_angle_cls=True, lang='en')
|
|
220
|
+
ocr_result = ocr.ocr(str(image_path), cls=True)
|
|
221
|
+
|
|
222
|
+
# Collect the text and bounding boxes
|
|
223
|
+
text_with_boxes = []
|
|
224
|
+
for line in ocr_result[0]:
|
|
225
|
+
text = line[1][0] # Extract the text
|
|
226
|
+
bbox = line[0] # Extract the bounding box
|
|
227
|
+
text_with_boxes.append((text, bbox))
|
|
228
|
+
|
|
229
|
+
# Step 2: Sort text based on y-coordinate (top-down order)
|
|
230
|
+
def average_y(bbox):
|
|
231
|
+
return sum([point[1] for point in bbox]) / len(bbox)
|
|
232
|
+
|
|
233
|
+
text_with_boxes.sort(key=lambda x: average_y(x[1]))
|
|
234
|
+
|
|
235
|
+
# Insert line breaks based on y-coordinate differences
|
|
236
|
+
words_with_newlines = []
|
|
237
|
+
last_y = None
|
|
238
|
+
threshold = 20 # You can adjust this value based on the document's layout
|
|
239
|
+
|
|
240
|
+
for _, (word, bbox) in enumerate(text_with_boxes):
|
|
241
|
+
current_y = average_y(bbox)
|
|
242
|
+
if last_y is not None and current_y - last_y > threshold:
|
|
243
|
+
words_with_newlines.append("\n") # Insert a line break
|
|
244
|
+
words_with_newlines.append(word)
|
|
245
|
+
last_y = current_y
|
|
246
|
+
|
|
247
|
+
# # Step 3: Extract words and bounding boxes after sorting
|
|
248
|
+
# words = [item[0] for item in text_with_boxes]
|
|
249
|
+
# bounding_boxes = [item[1] for item in text_with_boxes]
|
|
250
|
+
|
|
251
|
+
# # Step 4: Rescale bounding boxes to the 0-1000 range for LayoutLMv3
|
|
252
|
+
# boxes = rescale_bounding_boxes(
|
|
253
|
+
# bounding_boxes,
|
|
254
|
+
# image_width,
|
|
255
|
+
# image_height
|
|
256
|
+
# )
|
|
257
|
+
|
|
258
|
+
# # Print extracted text and bounding boxes
|
|
259
|
+
# # for word, bbox in zip(words, boxes):
|
|
260
|
+
# # print(f"Word: {word}, Bounding Box: {bbox}")
|
|
261
|
+
|
|
262
|
+
# # Processor handles the OCR internally, no need for words or boxes
|
|
263
|
+
# encoded_inputs = self.image_processor(image, words, boxes=boxes, return_tensors="pt")
|
|
264
|
+
# outputs = self.image_model(**encoded_inputs)
|
|
265
|
+
|
|
266
|
+
# Step 7: Join the sorted words into a paragraph
|
|
267
|
+
paragraph = " ".join(words_with_newlines)
|
|
268
|
+
|
|
269
|
+
cleaned_text = self.clean_tokenized_text(paragraph)
|
|
201
270
|
markdown_text = self.convert_to_markdown(cleaned_text)
|
|
202
271
|
return markdown_text
|
|
203
272
|
|
|
@@ -217,6 +286,10 @@ class PDFLoader(BasePDF):
|
|
|
217
286
|
docs = []
|
|
218
287
|
try:
|
|
219
288
|
md_text = to_markdown(pdf) # get markdown for all pages
|
|
289
|
+
try:
|
|
290
|
+
summary_document = self.get_summary_from_text(md_text)
|
|
291
|
+
except Exception:
|
|
292
|
+
summary_document = ''
|
|
220
293
|
_meta = {
|
|
221
294
|
"url": f'{path}',
|
|
222
295
|
"source": f"{path.name}",
|
|
@@ -226,11 +299,11 @@ class PDFLoader(BasePDF):
|
|
|
226
299
|
"answer": '',
|
|
227
300
|
"source_type": self._source_type,
|
|
228
301
|
"data": {},
|
|
229
|
-
"summary": '',
|
|
302
|
+
"summary": '-',
|
|
230
303
|
"document_meta": {
|
|
231
|
-
"title": pdf.metadata.get("title", ""),
|
|
232
|
-
"creationDate": pdf.metadata.get("creationDate", ""),
|
|
233
|
-
"author": pdf.metadata.get("author", ""),
|
|
304
|
+
"title": pdf.metadata.get("title", ""), # pylint: disable=E1101
|
|
305
|
+
"creationDate": pdf.metadata.get("creationDate", ""), # pylint: disable=E1101
|
|
306
|
+
"author": pdf.metadata.get("author", ""), # pylint: disable=E1101
|
|
234
307
|
}
|
|
235
308
|
}
|
|
236
309
|
docs.append(
|
|
@@ -239,6 +312,14 @@ class PDFLoader(BasePDF):
|
|
|
239
312
|
metadata=_meta
|
|
240
313
|
)
|
|
241
314
|
)
|
|
315
|
+
if summary_document:
|
|
316
|
+
summary_document = f"**Summary**\n{path.name}\n" + summary_document
|
|
317
|
+
docs.append(
|
|
318
|
+
Document(
|
|
319
|
+
page_content=summary_document,
|
|
320
|
+
metadata=_meta
|
|
321
|
+
)
|
|
322
|
+
)
|
|
242
323
|
except Exception:
|
|
243
324
|
pass
|
|
244
325
|
for page_number in range(pdf.page_count):
|
|
@@ -250,9 +331,9 @@ class PDFLoader(BasePDF):
|
|
|
250
331
|
try:
|
|
251
332
|
summary = self.get_summary_from_text(text)
|
|
252
333
|
except Exception:
|
|
253
|
-
summary = ''
|
|
334
|
+
summary = '-'
|
|
254
335
|
metadata = {
|
|
255
|
-
"url":
|
|
336
|
+
"url": f"{path}:#{page_num}",
|
|
256
337
|
"source": f"{path.name} Page.#{page_num}",
|
|
257
338
|
"filename": path.name,
|
|
258
339
|
"index": f"{page_num}",
|
|
@@ -261,11 +342,10 @@ class PDFLoader(BasePDF):
|
|
|
261
342
|
"answer": '',
|
|
262
343
|
"source_type": self._source_type,
|
|
263
344
|
"data": {},
|
|
264
|
-
"summary":
|
|
345
|
+
"summary": '',
|
|
265
346
|
"document_meta": {
|
|
266
|
-
"title": pdf.metadata.get("title", ""),
|
|
267
|
-
"
|
|
268
|
-
"author": pdf.metadata.get("author", ""),
|
|
347
|
+
"title": pdf.metadata.get("title", ""), # pylint: disable=E1101
|
|
348
|
+
"author": pdf.metadata.get("author", ""), # pylint: disable=E1101
|
|
269
349
|
}
|
|
270
350
|
}
|
|
271
351
|
docs.append(
|
|
@@ -274,6 +354,15 @@ class PDFLoader(BasePDF):
|
|
|
274
354
|
metadata=metadata
|
|
275
355
|
)
|
|
276
356
|
)
|
|
357
|
+
# And Summary Document:
|
|
358
|
+
if summary:
|
|
359
|
+
sm = f"**Summary**\n{path.name} Page.#{page_num}\n" + summary
|
|
360
|
+
docs.append(
|
|
361
|
+
Document(
|
|
362
|
+
page_content=sm,
|
|
363
|
+
metadata=metadata
|
|
364
|
+
)
|
|
365
|
+
)
|
|
277
366
|
# Extract images and use OCR to get text from each image
|
|
278
367
|
# second: images
|
|
279
368
|
file_name = path.stem.replace(' ', '_').replace('.', '').lower()
|
|
@@ -338,7 +427,7 @@ class PDFLoader(BasePDF):
|
|
|
338
427
|
df = df.dropna(axis=1, how='all')
|
|
339
428
|
df = df.dropna(how='all', axis=0) # Drop empty rows
|
|
340
429
|
table_meta = {
|
|
341
|
-
"url":
|
|
430
|
+
"url": f"{path.name} Page.#{page_num} Table.#{tab_idx}",
|
|
342
431
|
"source": f"{path.name} Page.#{page_num} Table.#{tab_idx}",
|
|
343
432
|
"filename": path.name,
|
|
344
433
|
"index": f"{path.name}:{page_num}",
|
|
@@ -346,7 +435,7 @@ class PDFLoader(BasePDF):
|
|
|
346
435
|
"answer": '',
|
|
347
436
|
"type": 'table',
|
|
348
437
|
"data": {},
|
|
349
|
-
"summary": '',
|
|
438
|
+
"summary": '-',
|
|
350
439
|
"document_meta": {
|
|
351
440
|
"table_index": tab_idx,
|
|
352
441
|
"table_shape": df.shape,
|
|
@@ -366,9 +455,10 @@ class PDFLoader(BasePDF):
|
|
|
366
455
|
if self.page_as_images is True:
|
|
367
456
|
# Convert the page to a Pixmap (which is an image)
|
|
368
457
|
mat = fitz.Matrix(2, 2)
|
|
369
|
-
pix = page.get_pixmap(dpi=
|
|
458
|
+
pix = page.get_pixmap(dpi=600, matrix=mat) # Increase DPI for better resolution
|
|
370
459
|
img_name = f'{file_name}_page_{page_num}.png'
|
|
371
460
|
img_path = self._imgdir.joinpath(img_name)
|
|
461
|
+
print('IMAGE > ', img_path)
|
|
372
462
|
if img_path.exists():
|
|
373
463
|
img_path.unlink(missing_ok=True)
|
|
374
464
|
self.logger.notice(
|
|
@@ -380,7 +470,7 @@ class PDFLoader(BasePDF):
|
|
|
380
470
|
# TODO passing the image to a AI visual to get explanation
|
|
381
471
|
# Get the extracted text from the image
|
|
382
472
|
text = self.extract_page_text(img_path)
|
|
383
|
-
print('TEXT EXTRACTED >> ', text)
|
|
473
|
+
# print('TEXT EXTRACTED >> ', text)
|
|
384
474
|
url = f'/static/images/{img_name}'
|
|
385
475
|
image_meta = {
|
|
386
476
|
"url": url,
|
|
@@ -391,7 +481,7 @@ class PDFLoader(BasePDF):
|
|
|
391
481
|
"answer": '',
|
|
392
482
|
"type": 'page',
|
|
393
483
|
"data": {},
|
|
394
|
-
"summary": '',
|
|
484
|
+
"summary": '-',
|
|
395
485
|
"document_meta": {
|
|
396
486
|
"image_name": img_name,
|
|
397
487
|
"page_number": f"{page_number}"
|
|
@@ -406,25 +496,16 @@ class PDFLoader(BasePDF):
|
|
|
406
496
|
else:
|
|
407
497
|
return []
|
|
408
498
|
|
|
409
|
-
def
|
|
410
|
-
# Initialize PaddleOCR
|
|
411
|
-
|
|
499
|
+
def get_paddleocr(self, img_path) -> list:
|
|
500
|
+
# Initialize PaddleOCR
|
|
501
|
+
ocr_model = PaddleOCR(
|
|
412
502
|
lang='en',
|
|
413
503
|
det_model_dir=None,
|
|
414
504
|
rec_model_dir=None,
|
|
415
505
|
rec_char_dict_path=None,
|
|
416
|
-
table=True,
|
|
417
|
-
|
|
506
|
+
# table=True,
|
|
507
|
+
use_angle_cls=True,
|
|
418
508
|
# use_gpu=True
|
|
419
509
|
)
|
|
420
|
-
result =
|
|
421
|
-
|
|
422
|
-
# extract tables:
|
|
423
|
-
# The result contains the table structure and content
|
|
424
|
-
tables = []
|
|
425
|
-
for line in result:
|
|
426
|
-
if 'html' in line[1]:
|
|
427
|
-
html_table = line[1]['html']
|
|
428
|
-
tables.append(html_table)
|
|
429
|
-
|
|
430
|
-
print('TABLES > ', tables)
|
|
510
|
+
result = ocr_model.ocr(img_path, cls=True)
|
|
511
|
+
return result
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
__title__ = "ai-parrot"
|
|
4
4
|
__description__ = "Live Chatbots based on Langchain chatbots and Agents \
|
|
5
5
|
Integrated into Navigator Framework or used into aiohttp applications."
|
|
6
|
-
__version__ = "0.3.
|
|
6
|
+
__version__ = "0.3.10"
|
|
7
7
|
__author__ = "Jesus Lara"
|
|
8
8
|
__author_email__ = "jesuslarag@gmail.com"
|
|
9
9
|
__license__ = "MIT"
|
|
@@ -190,7 +190,9 @@ setup(
|
|
|
190
190
|
"O365==2.0.35",
|
|
191
191
|
"stackapi==0.3.1",
|
|
192
192
|
"torchvision==0.19.1",
|
|
193
|
-
"tf-keras==2.17.0"
|
|
193
|
+
"tf-keras==2.17.0",
|
|
194
|
+
"simsimd==4.3.1",
|
|
195
|
+
"opencv-python==4.10.0.84"
|
|
194
196
|
],
|
|
195
197
|
extras_require={
|
|
196
198
|
"loaders": [
|
|
@@ -224,6 +226,7 @@ setup(
|
|
|
224
226
|
"XlsxWriter==3.2.0",
|
|
225
227
|
# "xformers==0.0.27.post2",
|
|
226
228
|
"timm==1.0.9",
|
|
229
|
+
"easyocr==1.7.1"
|
|
227
230
|
],
|
|
228
231
|
"anthropic": [
|
|
229
232
|
"langchain-anthropic==0.1.11",
|
|
@@ -265,8 +268,6 @@ setup(
|
|
|
265
268
|
"gradio_tools==0.0.9",
|
|
266
269
|
"gradio-client==0.2.9",
|
|
267
270
|
"streamlit==1.37.1",
|
|
268
|
-
"simsimd==4.3.1",
|
|
269
|
-
"opencv-python==4.10.0.84"
|
|
270
271
|
# "timm==0.9.16", # image-processor
|
|
271
272
|
# "ultralytics==8.2.4", # image-processor
|
|
272
273
|
# "albumentations-1.4.4",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ai_parrot-0.3.8 → ai_parrot-0.3.10}/documents/AR_Certification_Skill_Practice_Scorecard_EXAMPLE.pdf
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|