ai-parrot 0.3.6__tar.gz → 0.3.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ai-parrot might be problematic. Click here for more details.
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/Makefile +4 -3
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/PKG-INFO +2 -1
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/ai_parrot.egg-info/PKG-INFO +2 -1
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/ai_parrot.egg-info/SOURCES.txt +1 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/ai_parrot.egg-info/requires.txt +1 -0
- ai_parrot-0.3.9/examples/askbuddy/create_bot.py +32 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/pdf.py +199 -118
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/videolocal.py +7 -5
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/stores/milvus.py +22 -18
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/version.py +1 -1
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/setup.py +1 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/.flake8 +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/.github/dependabot.yml +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/.github/workflows/codeql-analysis.yml +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/.github/workflows/release.yml +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/.gitignore +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/.isort.cfg +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/.pylintrc +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/INSTALL +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/LICENSE +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/README.md +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/SECURITY.md +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/ai_parrot.egg-info/dependency_links.txt +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/ai_parrot.egg-info/top_level.txt +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/app.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/documents/AR_Certification_Skill_Practice_Scorecard_EXAMPLE.pdf +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/documents/Day 1_Essentials_AR_PPT.pdf +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/documents/ex-code-loaders.txt +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/documents/video_2024-09-11_19-43-58.mp3 +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/documents/video_2024-09-11_19-43-58.mp4 +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/documents/video_2024-09-11_19-43-58.vtt +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/etc/navigator-ssl.ini +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/etc/navigator.ini +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/etc/ssl/domain.ext +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/etc/ssl/navigator.local.crt +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/etc/ssl/navigator.local.csr +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/etc/ssl/navigator.local.key +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/etc/ssl/rootCA.crt +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/etc/ssl/rootCA.key +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/etc/ssl/rootCA.srl +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/examples/analyze_video.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/examples/check_bot.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/examples/extract_frames.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/examples/load_pdf.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/examples/test_bot.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/examples/test_question.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/mypy.ini +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/__init__.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/chatbots/__init__.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/chatbots/abstract.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/chatbots/asktroc.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/chatbots/base.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/chatbots/basic.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/chatbots/bose.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/chatbots/cody.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/chatbots/copilot.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/chatbots/dataframe.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/chatbots/hragents.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/chatbots/odoo.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/chatbots/retrievals/__init__.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/chatbots/retrievals/constitutional.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/conf.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/crew/__init__.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/crew/tools/__init__.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/crew/tools/bing.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/crew/tools/config.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/crew/tools/duckgo.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/crew/tools/file.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/crew/tools/google.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/crew/tools/gtrends.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/crew/tools/md2pdf.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/crew/tools/rag.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/crew/tools/search.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/crew/tools/url.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/exceptions.c +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/exceptions.pyx +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/handlers/__init__.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/handlers/bots.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/handlers/chat.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/interfaces/__init__.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/interfaces/database.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/llms/__init__.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/llms/abstract.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/llms/anthropic.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/llms/google.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/llms/groq.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/llms/hf.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/llms/openai.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/llms/pipes.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/llms/vertex.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/__init__.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/abstract.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/audio.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/basepdf.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/basevideo.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/csv.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/dir.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/excel.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/github.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/handlers/__init__.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/handlers/data.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/image.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/json.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/pdfchapters.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/pdffn.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/pdfimages.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/pdfmark.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/pdftables.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/ppt.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/qa.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/repo.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/rtd.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/txt.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/utils/__init__.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/utils/models.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/video.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/vimeo.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/web.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/web_base.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/word.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/loaders/youtube.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/manager.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/models.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/py.typed +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/stores/__init__.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/stores/abstract.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/stores/qdrant.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/tools/__init__.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/tools/abstract.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/tools/asknews.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/tools/bing.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/tools/duck.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/tools/google.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/tools/stack.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/tools/weather.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/tools/wikipedia.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/tools/zipcode.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/utils/__init__.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/utils/parsers/__init__.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/utils/parsers/toml.c +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/utils/parsers/toml.pyx +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/utils/toml.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/utils/types.cpp +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/utils/types.pyx +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/parrot/utils/uv.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/pyproject.toml +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/pytest.ini +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/requirements/requirements-dev.txt +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/resources/__init__.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/resources/quick.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/resources/users/__init__.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/resources/users/handlers.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/resources/users/models.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/run.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/settings/__init__.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/settings/settings.py +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/setup.cfg +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/templates/.compiled +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/templates/README.md +0 -0
- {ai_parrot-0.3.6 → ai_parrot-0.3.9}/tox.ini +0 -0
|
@@ -5,14 +5,15 @@ venv:
|
|
|
5
5
|
install:
|
|
6
6
|
# Install Parrot
|
|
7
7
|
pip install --upgrade python-datamodel
|
|
8
|
-
pip install --upgrade asyncdb[
|
|
8
|
+
pip install --upgrade asyncdb[default,bigquery]
|
|
9
9
|
pip install --upgrade navconfig[default]
|
|
10
|
-
pip install --upgrade navigator-api[locale]
|
|
10
|
+
pip install --upgrade navigator-api[uvloop,locale]
|
|
11
11
|
# Nav requirements:
|
|
12
12
|
pip install --upgrade navigator-session
|
|
13
13
|
pip install --upgrade navigator-auth
|
|
14
14
|
# QS requirements
|
|
15
|
-
pip install --upgrade querysource[analytics]
|
|
15
|
+
# pip install --upgrade querysource[analytics]
|
|
16
|
+
pip install --upgrade querysource
|
|
16
17
|
# and Parrot:
|
|
17
18
|
# pip install -e .[google,milvus,groq,analytics]
|
|
18
19
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ai-parrot
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.9
|
|
4
4
|
Summary: Live Chatbots based on Langchain chatbots and Agents Integrated into Navigator Framework or used into aiohttp applications.
|
|
5
5
|
Home-page: https://github.com/phenobarbital/ai-parrot
|
|
6
6
|
Author: Jesus Lara
|
|
@@ -108,6 +108,7 @@ Requires-Dist: ftfy==6.2.3; extra == "loaders"
|
|
|
108
108
|
Requires-Dist: librosa==0.10.1; extra == "loaders"
|
|
109
109
|
Requires-Dist: XlsxWriter==3.2.0; extra == "loaders"
|
|
110
110
|
Requires-Dist: timm==1.0.9; extra == "loaders"
|
|
111
|
+
Requires-Dist: easyocr==1.7.1; extra == "loaders"
|
|
111
112
|
Provides-Extra: anthropic
|
|
112
113
|
Requires-Dist: langchain-anthropic==0.1.11; extra == "anthropic"
|
|
113
114
|
Requires-Dist: anthropic==0.25.2; extra == "anthropic"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ai-parrot
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.9
|
|
4
4
|
Summary: Live Chatbots based on Langchain chatbots and Agents Integrated into Navigator Framework or used into aiohttp applications.
|
|
5
5
|
Home-page: https://github.com/phenobarbital/ai-parrot
|
|
6
6
|
Author: Jesus Lara
|
|
@@ -108,6 +108,7 @@ Requires-Dist: ftfy==6.2.3; extra == "loaders"
|
|
|
108
108
|
Requires-Dist: librosa==0.10.1; extra == "loaders"
|
|
109
109
|
Requires-Dist: XlsxWriter==3.2.0; extra == "loaders"
|
|
110
110
|
Requires-Dist: timm==1.0.9; extra == "loaders"
|
|
111
|
+
Requires-Dist: easyocr==1.7.1; extra == "loaders"
|
|
111
112
|
Provides-Extra: anthropic
|
|
112
113
|
Requires-Dist: langchain-anthropic==0.1.11; extra == "anthropic"
|
|
113
114
|
Requires-Dist: anthropic==0.25.2; extra == "anthropic"
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from parrot.chatbots.basic import Chatbot
|
|
3
|
+
from parrot.llms.vertex import VertexLLM
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
async def get_agent():
|
|
7
|
+
"""Return the New Agent.
|
|
8
|
+
"""
|
|
9
|
+
llm = VertexLLM(
|
|
10
|
+
model='gemini-1.5-pro',
|
|
11
|
+
temperature=0.1,
|
|
12
|
+
top_k=30,
|
|
13
|
+
Top_p=0.6,
|
|
14
|
+
)
|
|
15
|
+
agent = Chatbot(
|
|
16
|
+
name='AskBuddy',
|
|
17
|
+
llm=llm
|
|
18
|
+
)
|
|
19
|
+
await agent.configure()
|
|
20
|
+
# Create the Collection
|
|
21
|
+
if agent.store.collection_exists('employee_information'):
|
|
22
|
+
await agent.store.delete_collection('employee_information')
|
|
23
|
+
await agent.store.create_collection( # pylint: disable=E1120
|
|
24
|
+
collection_name='employee_information',
|
|
25
|
+
dimension=768,
|
|
26
|
+
index_type="IVF_SQ8",
|
|
27
|
+
metric_type='L2'
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
if __name__ == "__main__":
|
|
32
|
+
agent = asyncio.run(get_agent())
|
|
@@ -6,6 +6,7 @@ import re
|
|
|
6
6
|
import ftfy
|
|
7
7
|
import fitz
|
|
8
8
|
import pytesseract
|
|
9
|
+
from pytesseract import Output
|
|
9
10
|
from paddleocr import PaddleOCR
|
|
10
11
|
import torch
|
|
11
12
|
import cv2
|
|
@@ -15,16 +16,38 @@ from transformers import (
|
|
|
15
16
|
# VisionEncoderDecoderConfig,
|
|
16
17
|
# ViTImageProcessor,
|
|
17
18
|
# AutoTokenizer,
|
|
19
|
+
LayoutLMv3FeatureExtractor,
|
|
20
|
+
LayoutLMv3TokenizerFast,
|
|
18
21
|
LayoutLMv3ForTokenClassification,
|
|
19
22
|
LayoutLMv3Processor
|
|
20
23
|
)
|
|
21
24
|
from pdf4llm import to_markdown
|
|
22
25
|
from PIL import Image
|
|
23
26
|
from langchain.docstore.document import Document
|
|
24
|
-
from navconfig import
|
|
27
|
+
from navconfig.logging import logging
|
|
25
28
|
from .basepdf import BasePDF
|
|
26
29
|
|
|
27
30
|
|
|
31
|
+
logging.getLogger(name='ppocr').setLevel(logging.INFO)
|
|
32
|
+
|
|
33
|
+
# Function to rescale bounding boxes
|
|
34
|
+
def rescale_bounding_boxes(bboxes, image_width, image_height, target_size=1000):
|
|
35
|
+
"""Rescale bounding boxes to fit within the target size for LayoutLMv3."""
|
|
36
|
+
rescaled_bboxes = []
|
|
37
|
+
for bbox in bboxes:
|
|
38
|
+
x1, y1 = bbox[0]
|
|
39
|
+
x2, y2 = bbox[2]
|
|
40
|
+
# Rescale based on the image dimensions
|
|
41
|
+
rescaled_bbox = [
|
|
42
|
+
int(x1 / image_width * target_size),
|
|
43
|
+
int(y1 / image_height * target_size),
|
|
44
|
+
int(x2 / image_width * target_size),
|
|
45
|
+
int(y2 / image_height * target_size)
|
|
46
|
+
]
|
|
47
|
+
rescaled_bboxes.append(rescaled_bbox)
|
|
48
|
+
return rescaled_bboxes
|
|
49
|
+
|
|
50
|
+
|
|
28
51
|
class PDFLoader(BasePDF):
|
|
29
52
|
"""
|
|
30
53
|
Loader for PDF files.
|
|
@@ -50,13 +73,22 @@ class PDFLoader(BasePDF):
|
|
|
50
73
|
self.page_as_images = kwargs.get('page_as_images', False)
|
|
51
74
|
if self.page_as_images is True:
|
|
52
75
|
# Load the processor and model from Hugging Face
|
|
76
|
+
# self.feature_extractor = LayoutLMv3FeatureExtractor(apply_ocr=False)
|
|
77
|
+
# self.image_tokenizer = LayoutLMv3TokenizerFast.from_pretrained(
|
|
78
|
+
# "microsoft/layoutlmv3-base"
|
|
79
|
+
# )
|
|
80
|
+
# self.image_processor = LayoutLMv3Processor(
|
|
81
|
+
# self.feature_extractor,
|
|
82
|
+
# self.image_tokenizer
|
|
83
|
+
# )
|
|
53
84
|
self.image_processor = LayoutLMv3Processor.from_pretrained(
|
|
54
85
|
"microsoft/layoutlmv3-base",
|
|
55
|
-
apply_ocr=
|
|
86
|
+
apply_ocr=False
|
|
56
87
|
)
|
|
88
|
+
# LayoutLMv3ForSequenceClassification.from_pretrained
|
|
57
89
|
self.image_model = LayoutLMv3ForTokenClassification.from_pretrained(
|
|
58
|
-
|
|
59
|
-
"HYPJUDY/layoutlmv3-base-finetuned-funsd"
|
|
90
|
+
"microsoft/layoutlmv3-base"
|
|
91
|
+
# "HYPJUDY/layoutlmv3-base-finetuned-funsd"
|
|
60
92
|
)
|
|
61
93
|
# Set device to GPU if available
|
|
62
94
|
self.image_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
@@ -73,63 +105,63 @@ class PDFLoader(BasePDF):
|
|
|
73
105
|
if table_settings:
|
|
74
106
|
self.table_settings.update(table_settings)
|
|
75
107
|
|
|
76
|
-
def explain_image(self, image_path):
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
108
|
+
# def explain_image(self, image_path):
|
|
109
|
+
# """Function to explain the image."""
|
|
110
|
+
# # with open(image_path, "rb") as image_file:
|
|
111
|
+
# # image_content = image_file.read()
|
|
112
|
+
|
|
113
|
+
# # Open the image
|
|
114
|
+
# image = cv2.imread(image_path)
|
|
115
|
+
# task_prompt = "<s_docvqa><s_question>{user_input}</s_question><s_answer>"
|
|
116
|
+
# question = "Extract Questions about Happily Greet"
|
|
117
|
+
# prompt = task_prompt.replace("{user_input}", question)
|
|
118
|
+
|
|
119
|
+
# decoder_input_ids = self.image_processor.tokenizer(
|
|
120
|
+
# prompt,
|
|
121
|
+
# add_special_tokens=False,
|
|
122
|
+
# return_tensors="pt",
|
|
123
|
+
# ).input_ids
|
|
124
|
+
|
|
125
|
+
# pixel_values = self.image_processor(
|
|
126
|
+
# image,
|
|
127
|
+
# return_tensors="pt"
|
|
128
|
+
# ).pixel_values
|
|
129
|
+
|
|
130
|
+
# # Send inputs to the appropriate device
|
|
131
|
+
# pixel_values = pixel_values.to(self.image_device)
|
|
132
|
+
# decoder_input_ids = decoder_input_ids.to(self.image_device)
|
|
133
|
+
|
|
134
|
+
# outputs = self.image_model.generate(
|
|
135
|
+
# pixel_values,
|
|
136
|
+
# decoder_input_ids=decoder_input_ids,
|
|
137
|
+
# max_length=self.image_model.decoder.config.max_position_embeddings,
|
|
138
|
+
# pad_token_id=self.image_processor.tokenizer.pad_token_id,
|
|
139
|
+
# eos_token_id=self.image_processor.tokenizer.eos_token_id,
|
|
140
|
+
# bad_words_ids=[[self.image_processor.tokenizer.unk_token_id]],
|
|
141
|
+
# # use_cache=True
|
|
142
|
+
# return_dict_in_generate=True,
|
|
143
|
+
# )
|
|
144
|
+
|
|
145
|
+
# sequence = self.image_processor.batch_decode(outputs.sequences)[0]
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
# sequence = sequence.replace(
|
|
149
|
+
# self.image_processor.tokenizer.eos_token, ""
|
|
150
|
+
# ).replace(
|
|
151
|
+
# self.image_processor.tokenizer.pad_token, ""
|
|
152
|
+
# )
|
|
153
|
+
# # remove first task start token
|
|
154
|
+
# sequence = re.sub(r"<.*?>", "", sequence, count=1).strip()
|
|
155
|
+
# # Print the extracted sequence
|
|
156
|
+
# print("Extracted Text:", sequence)
|
|
157
|
+
|
|
158
|
+
# print(self.image_processor.token2json(sequence))
|
|
159
|
+
|
|
160
|
+
# # Format the output as Markdown (optional step)
|
|
161
|
+
# markdown_text = self.format_as_markdown(sequence)
|
|
162
|
+
# print("Markdown Format:\n", markdown_text)
|
|
163
|
+
|
|
164
|
+
# return None
|
|
133
165
|
|
|
134
166
|
def convert_to_markdown(self, text):
|
|
135
167
|
"""
|
|
@@ -141,7 +173,7 @@ class PDFLoader(BasePDF):
|
|
|
141
173
|
# Detect headings and bold them
|
|
142
174
|
markdown_text = re.sub(r"(^.*Scorecard.*$)", r"## \1", markdown_text)
|
|
143
175
|
# Convert lines with ":" to a list item (rough approach)
|
|
144
|
-
markdown_text = re.sub(r"(\w+):", r"- **\1**:", markdown_text)
|
|
176
|
+
# markdown_text = re.sub(r"(\w+):", r"- **\1**:", markdown_text)
|
|
145
177
|
# Return the markdown formatted text
|
|
146
178
|
return markdown_text
|
|
147
179
|
|
|
@@ -164,40 +196,77 @@ class PDFLoader(BasePDF):
|
|
|
164
196
|
|
|
165
197
|
return cleaned_text.strip()
|
|
166
198
|
|
|
167
|
-
def
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
encoding = {k: v.to(self.image_device) for k, v in encoding.items()}
|
|
199
|
+
def create_bounding_box(self, bbox_data):
|
|
200
|
+
xs = []
|
|
201
|
+
ys = []
|
|
202
|
+
for x, y in bbox_data:
|
|
203
|
+
xs.append(x)
|
|
204
|
+
ys.append(y)
|
|
174
205
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
206
|
+
left = int(min(xs))
|
|
207
|
+
top = int(min(ys))
|
|
208
|
+
right = int(max(xs))
|
|
209
|
+
bottom = int(max(ys))
|
|
178
210
|
|
|
179
|
-
|
|
180
|
-
predictions = logits.argmax(-1).squeeze().tolist()
|
|
181
|
-
labels = [self.image_model.config.id2label[pred] for pred in predictions]
|
|
211
|
+
return [left, top, right, bottom]
|
|
182
212
|
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
213
|
+
def extract_page_text(self, image_path) -> str:
|
|
214
|
+
# Open the image
|
|
215
|
+
image = Image.open(image_path).convert("RGB")
|
|
216
|
+
image_width, image_height = image.size
|
|
217
|
+
|
|
218
|
+
# Initialize PaddleOCR with English language
|
|
219
|
+
ocr = PaddleOCR(use_angle_cls=True, lang='en')
|
|
220
|
+
ocr_result = ocr.ocr(str(image_path), cls=True)
|
|
221
|
+
|
|
222
|
+
# Collect the text and bounding boxes
|
|
223
|
+
text_with_boxes = []
|
|
224
|
+
for line in ocr_result[0]:
|
|
225
|
+
text = line[1][0] # Extract the text
|
|
226
|
+
bbox = line[0] # Extract the bounding box
|
|
227
|
+
text_with_boxes.append((text, bbox))
|
|
228
|
+
|
|
229
|
+
# Step 2: Sort text based on y-coordinate (top-down order)
|
|
230
|
+
def average_y(bbox):
|
|
231
|
+
return sum([point[1] for point in bbox]) / len(bbox)
|
|
232
|
+
|
|
233
|
+
text_with_boxes.sort(key=lambda x: average_y(x[1]))
|
|
234
|
+
|
|
235
|
+
# Insert line breaks based on y-coordinate differences
|
|
236
|
+
words_with_newlines = []
|
|
237
|
+
last_y = None
|
|
238
|
+
threshold = 20 # You can adjust this value based on the document's layout
|
|
239
|
+
|
|
240
|
+
for _, (word, bbox) in enumerate(text_with_boxes):
|
|
241
|
+
current_y = average_y(bbox)
|
|
242
|
+
if last_y is not None and current_y - last_y > threshold:
|
|
243
|
+
words_with_newlines.append("\n") # Insert a line break
|
|
244
|
+
words_with_newlines.append(word)
|
|
245
|
+
last_y = current_y
|
|
246
|
+
|
|
247
|
+
# # Step 3: Extract words and bounding boxes after sorting
|
|
248
|
+
# words = [item[0] for item in text_with_boxes]
|
|
249
|
+
# bounding_boxes = [item[1] for item in text_with_boxes]
|
|
250
|
+
|
|
251
|
+
# # Step 4: Rescale bounding boxes to the 0-1000 range for LayoutLMv3
|
|
252
|
+
# boxes = rescale_bounding_boxes(
|
|
253
|
+
# bounding_boxes,
|
|
254
|
+
# image_width,
|
|
255
|
+
# image_height
|
|
256
|
+
# )
|
|
257
|
+
|
|
258
|
+
# # Print extracted text and bounding boxes
|
|
259
|
+
# # for word, bbox in zip(words, boxes):
|
|
260
|
+
# # print(f"Word: {word}, Bounding Box: {bbox}")
|
|
261
|
+
|
|
262
|
+
# # Processor handles the OCR internally, no need for words or boxes
|
|
263
|
+
# encoded_inputs = self.image_processor(image, words, boxes=boxes, return_tensors="pt")
|
|
264
|
+
# outputs = self.image_model(**encoded_inputs)
|
|
265
|
+
|
|
266
|
+
# Step 7: Join the sorted words into a paragraph
|
|
267
|
+
paragraph = " ".join(words_with_newlines)
|
|
268
|
+
|
|
269
|
+
cleaned_text = self.clean_tokenized_text(paragraph)
|
|
201
270
|
markdown_text = self.convert_to_markdown(cleaned_text)
|
|
202
271
|
return markdown_text
|
|
203
272
|
|
|
@@ -217,6 +286,10 @@ class PDFLoader(BasePDF):
|
|
|
217
286
|
docs = []
|
|
218
287
|
try:
|
|
219
288
|
md_text = to_markdown(pdf) # get markdown for all pages
|
|
289
|
+
try:
|
|
290
|
+
summary_document = self.get_summary_from_text(md_text)
|
|
291
|
+
except Exception:
|
|
292
|
+
summary_document = ''
|
|
220
293
|
_meta = {
|
|
221
294
|
"url": f'{path}',
|
|
222
295
|
"source": f"{path.name}",
|
|
@@ -228,9 +301,9 @@ class PDFLoader(BasePDF):
|
|
|
228
301
|
"data": {},
|
|
229
302
|
"summary": '',
|
|
230
303
|
"document_meta": {
|
|
231
|
-
"title": pdf.metadata.get("title", ""),
|
|
232
|
-
"creationDate": pdf.metadata.get("creationDate", ""),
|
|
233
|
-
"author": pdf.metadata.get("author", ""),
|
|
304
|
+
"title": pdf.metadata.get("title", ""), # pylint: disable=E1101
|
|
305
|
+
"creationDate": pdf.metadata.get("creationDate", ""), # pylint: disable=E1101
|
|
306
|
+
"author": pdf.metadata.get("author", ""), # pylint: disable=E1101
|
|
234
307
|
}
|
|
235
308
|
}
|
|
236
309
|
docs.append(
|
|
@@ -239,6 +312,14 @@ class PDFLoader(BasePDF):
|
|
|
239
312
|
metadata=_meta
|
|
240
313
|
)
|
|
241
314
|
)
|
|
315
|
+
if summary_document:
|
|
316
|
+
summary_document = f"**Summary**\n{path.name}\n" + summary_document
|
|
317
|
+
docs.append(
|
|
318
|
+
Document(
|
|
319
|
+
page_content=summary_document,
|
|
320
|
+
metadata=_meta
|
|
321
|
+
)
|
|
322
|
+
)
|
|
242
323
|
except Exception:
|
|
243
324
|
pass
|
|
244
325
|
for page_number in range(pdf.page_count):
|
|
@@ -261,11 +342,10 @@ class PDFLoader(BasePDF):
|
|
|
261
342
|
"answer": '',
|
|
262
343
|
"source_type": self._source_type,
|
|
263
344
|
"data": {},
|
|
264
|
-
"summary":
|
|
345
|
+
"summary": '',
|
|
265
346
|
"document_meta": {
|
|
266
|
-
"title": pdf.metadata.get("title", ""),
|
|
267
|
-
"
|
|
268
|
-
"author": pdf.metadata.get("author", ""),
|
|
347
|
+
"title": pdf.metadata.get("title", ""), # pylint: disable=E1101
|
|
348
|
+
"author": pdf.metadata.get("author", ""), # pylint: disable=E1101
|
|
269
349
|
}
|
|
270
350
|
}
|
|
271
351
|
docs.append(
|
|
@@ -274,6 +354,15 @@ class PDFLoader(BasePDF):
|
|
|
274
354
|
metadata=metadata
|
|
275
355
|
)
|
|
276
356
|
)
|
|
357
|
+
# And Summary Document:
|
|
358
|
+
if summary:
|
|
359
|
+
sm = f"**Summary**\n{path.name} Page.#{page_num}\n" + summary
|
|
360
|
+
docs.append(
|
|
361
|
+
Document(
|
|
362
|
+
page_content=sm,
|
|
363
|
+
metadata=metadata
|
|
364
|
+
)
|
|
365
|
+
)
|
|
277
366
|
# Extract images and use OCR to get text from each image
|
|
278
367
|
# second: images
|
|
279
368
|
file_name = path.stem.replace(' ', '_').replace('.', '').lower()
|
|
@@ -366,9 +455,10 @@ class PDFLoader(BasePDF):
|
|
|
366
455
|
if self.page_as_images is True:
|
|
367
456
|
# Convert the page to a Pixmap (which is an image)
|
|
368
457
|
mat = fitz.Matrix(2, 2)
|
|
369
|
-
pix = page.get_pixmap(dpi=
|
|
458
|
+
pix = page.get_pixmap(dpi=600, matrix=mat) # Increase DPI for better resolution
|
|
370
459
|
img_name = f'{file_name}_page_{page_num}.png'
|
|
371
460
|
img_path = self._imgdir.joinpath(img_name)
|
|
461
|
+
print('IMAGE > ', img_path)
|
|
372
462
|
if img_path.exists():
|
|
373
463
|
img_path.unlink(missing_ok=True)
|
|
374
464
|
self.logger.notice(
|
|
@@ -380,7 +470,7 @@ class PDFLoader(BasePDF):
|
|
|
380
470
|
# TODO passing the image to a AI visual to get explanation
|
|
381
471
|
# Get the extracted text from the image
|
|
382
472
|
text = self.extract_page_text(img_path)
|
|
383
|
-
print('TEXT EXTRACTED >> ', text)
|
|
473
|
+
# print('TEXT EXTRACTED >> ', text)
|
|
384
474
|
url = f'/static/images/{img_name}'
|
|
385
475
|
image_meta = {
|
|
386
476
|
"url": url,
|
|
@@ -406,25 +496,16 @@ class PDFLoader(BasePDF):
|
|
|
406
496
|
else:
|
|
407
497
|
return []
|
|
408
498
|
|
|
409
|
-
def
|
|
410
|
-
# Initialize PaddleOCR
|
|
411
|
-
|
|
499
|
+
def get_paddleocr(self, img_path) -> list:
|
|
500
|
+
# Initialize PaddleOCR
|
|
501
|
+
ocr_model = PaddleOCR(
|
|
412
502
|
lang='en',
|
|
413
503
|
det_model_dir=None,
|
|
414
504
|
rec_model_dir=None,
|
|
415
505
|
rec_char_dict_path=None,
|
|
416
|
-
table=True,
|
|
417
|
-
|
|
506
|
+
# table=True,
|
|
507
|
+
use_angle_cls=True,
|
|
418
508
|
# use_gpu=True
|
|
419
509
|
)
|
|
420
|
-
result =
|
|
421
|
-
|
|
422
|
-
# extract tables:
|
|
423
|
-
# The result contains the table structure and content
|
|
424
|
-
tables = []
|
|
425
|
-
for line in result:
|
|
426
|
-
if 'html' in line[1]:
|
|
427
|
-
html_table = line[1]['html']
|
|
428
|
-
tables.append(html_table)
|
|
429
|
-
|
|
430
|
-
print('TABLES > ', tables)
|
|
510
|
+
result = ocr_model.ocr(img_path, cls=True)
|
|
511
|
+
return result
|
|
@@ -84,11 +84,13 @@ class VideoLocalLoader(BaseVideoLoader):
|
|
|
84
84
|
if transcript_whisper:
|
|
85
85
|
# VTT version:
|
|
86
86
|
transcript = self.transcript_to_vtt(transcript_whisper, transcript_path)
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
87
|
+
transcript_chunks = split_text(transcript, 65535)
|
|
88
|
+
for chunk in transcript_chunks:
|
|
89
|
+
doc = Document(
|
|
90
|
+
page_content=chunk,
|
|
91
|
+
metadata=metadata
|
|
92
|
+
)
|
|
93
|
+
documents.append(doc)
|
|
92
94
|
# Saving every dialog chunk as a separate document
|
|
93
95
|
dialogs = self.transcript_to_blocks(transcript_whisper)
|
|
94
96
|
docs = []
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
from typing import Optional, Union, Any
|
|
2
2
|
import asyncio
|
|
3
|
-
import uuid
|
|
4
|
-
import torch
|
|
3
|
+
# import uuid
|
|
4
|
+
# import torch
|
|
5
5
|
from pymilvus import (
|
|
6
6
|
MilvusClient,
|
|
7
|
-
Collection,
|
|
8
|
-
FieldSchema,
|
|
9
|
-
CollectionSchema,
|
|
7
|
+
# Collection,
|
|
8
|
+
# FieldSchema,
|
|
9
|
+
# CollectionSchema,
|
|
10
10
|
DataType,
|
|
11
11
|
connections,
|
|
12
12
|
db
|
|
@@ -148,10 +148,10 @@ class MilvusStore(AbstractStore):
|
|
|
148
148
|
)
|
|
149
149
|
|
|
150
150
|
async def __aenter__(self):
|
|
151
|
-
try:
|
|
152
|
-
|
|
153
|
-
except RuntimeError:
|
|
154
|
-
|
|
151
|
+
# try:
|
|
152
|
+
# self.tensor = torch.randn(1000, 1000).cuda()
|
|
153
|
+
# except RuntimeError:
|
|
154
|
+
# self.tensor = None
|
|
155
155
|
if self._embed_ is None:
|
|
156
156
|
self._embed_ = self.create_embedding(
|
|
157
157
|
model_name=self.embedding_name
|
|
@@ -161,10 +161,10 @@ class MilvusStore(AbstractStore):
|
|
|
161
161
|
async def __aexit__(self, exc_type, exc_value, traceback):
|
|
162
162
|
# closing Embedding
|
|
163
163
|
self._embed_ = None
|
|
164
|
-
del self.tensor
|
|
164
|
+
# del self.tensor
|
|
165
165
|
try:
|
|
166
166
|
self.close(alias=self._client_id)
|
|
167
|
-
torch.cuda.empty_cache()
|
|
167
|
+
# torch.cuda.empty_cache()
|
|
168
168
|
except RuntimeError:
|
|
169
169
|
pass
|
|
170
170
|
|
|
@@ -199,7 +199,10 @@ class MilvusStore(AbstractStore):
|
|
|
199
199
|
connections.disconnect(alias=alias)
|
|
200
200
|
try:
|
|
201
201
|
self._client.close()
|
|
202
|
+
except AttributeError:
|
|
203
|
+
pass
|
|
202
204
|
finally:
|
|
205
|
+
self._client = None
|
|
203
206
|
self._connected = False
|
|
204
207
|
|
|
205
208
|
def create_db(self, db_name: str, alias: str = 'default', **kwargs) -> bool:
|
|
@@ -269,8 +272,9 @@ class MilvusStore(AbstractStore):
|
|
|
269
272
|
return self.get_vector()
|
|
270
273
|
|
|
271
274
|
def collection_exists(self, collection_name: str) -> bool:
|
|
272
|
-
|
|
273
|
-
|
|
275
|
+
with self.connection():
|
|
276
|
+
if collection_name in self._client.list_collections():
|
|
277
|
+
return True
|
|
274
278
|
return False
|
|
275
279
|
|
|
276
280
|
def check_state(self, collection_name: str) -> dict:
|
|
@@ -478,10 +482,10 @@ class MilvusStore(AbstractStore):
|
|
|
478
482
|
):
|
|
479
483
|
if not collection:
|
|
480
484
|
collection = self.collection
|
|
481
|
-
try:
|
|
482
|
-
|
|
483
|
-
except Exception:
|
|
484
|
-
|
|
485
|
+
# try:
|
|
486
|
+
# tensor = torch.randn(1000, 1000).cuda()
|
|
487
|
+
# except Exception:
|
|
488
|
+
# tensor = None
|
|
485
489
|
if upsert is True:
|
|
486
490
|
# get first document
|
|
487
491
|
doc = documents[0]
|
|
@@ -510,7 +514,7 @@ class MilvusStore(AbstractStore):
|
|
|
510
514
|
vector_field='vector',
|
|
511
515
|
**kwargs
|
|
512
516
|
)
|
|
513
|
-
del tensor
|
|
517
|
+
# del tensor
|
|
514
518
|
return docstore
|
|
515
519
|
|
|
516
520
|
def upsert(self, payload: dict, collection: str = None) -> None:
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
__title__ = "ai-parrot"
|
|
4
4
|
__description__ = "Live Chatbots based on Langchain chatbots and Agents \
|
|
5
5
|
Integrated into Navigator Framework or used into aiohttp applications."
|
|
6
|
-
__version__ = "0.3.
|
|
6
|
+
__version__ = "0.3.9"
|
|
7
7
|
__author__ = "Jesus Lara"
|
|
8
8
|
__author_email__ = "jesuslarag@gmail.com"
|
|
9
9
|
__license__ = "MIT"
|