pydatamax 0.1.13__py3-none-any.whl → 0.1.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datamax/__init__.py +1 -1
- datamax/loader/core.py +118 -118
- datamax/loader/{MinioHandler.py → minio_handler.py} +171 -171
- datamax/loader/{OssHandler.py → oss_handler.py} +191 -191
- datamax/parser/__init__.py +2 -4
- datamax/parser/base.py +76 -76
- datamax/parser/core.py +406 -288
- datamax/parser/csv_parser.py +31 -10
- datamax/parser/doc_parser.py +525 -61
- datamax/parser/docx_parser.py +512 -62
- datamax/parser/epub_parser.py +41 -41
- datamax/parser/html_parser.py +37 -37
- datamax/parser/image_parser.py +34 -34
- datamax/parser/json_parser.py +32 -10
- datamax/parser/md_parser.py +72 -72
- datamax/parser/pdf_parser.py +101 -101
- datamax/parser/ppt_parser.py +70 -20
- datamax/parser/pptx_parser.py +45 -45
- datamax/parser/txt_parser.py +45 -45
- datamax/parser/xls_parser.py +26 -26
- datamax/parser/xlsx_parser.py +212 -208
- datamax/utils/__init__.py +23 -2
- datamax/utils/constants.py +58 -58
- datamax/utils/data_cleaner.py +275 -237
- datamax/utils/env_setup.py +79 -79
- datamax/utils/gotocr_pdf.py +265 -265
- datamax/utils/mineru_operator.py +62 -62
- datamax/utils/paddleocr_pdf_operator.py +90 -90
- datamax/utils/ppt_extract.py +140 -140
- datamax/utils/qa_generator.py +369 -376
- datamax/utils/tokenizer.py +21 -21
- datamax/utils/uno_handler.py +426 -0
- pydatamax-0.1.15.dist-info/METADATA +340 -0
- pydatamax-0.1.15.dist-info/RECORD +38 -0
- {pydatamax-0.1.13.dist-info → pydatamax-0.1.15.dist-info}/licenses/LICENSE +21 -21
- {pydatamax-0.1.13.dist-info → pydatamax-0.1.15.dist-info}/top_level.txt +0 -1
- pydatamax-0.1.13.dist-info/METADATA +0 -280
- pydatamax-0.1.13.dist-info/RECORD +0 -39
- tests/__init__.py +0 -0
- tests/test_basic.py +0 -20
- {pydatamax-0.1.13.dist-info → pydatamax-0.1.15.dist-info}/WHEEL +0 -0
datamax/utils/env_setup.py
CHANGED
@@ -1,80 +1,80 @@
|
|
1
|
-
import subprocess
|
2
|
-
import sys
|
3
|
-
import os
|
4
|
-
import importlib.metadata
|
5
|
-
|
6
|
-
class EnvironmentSetup:
|
7
|
-
""" Responsible for setting up the correct environment,
|
8
|
-
including checking GPU support and installing the necessary packages
|
9
|
-
"""
|
10
|
-
|
11
|
-
def __init__(self, use_gpu: bool = False):
|
12
|
-
self._gpu_available = None
|
13
|
-
self._setup_completed = False
|
14
|
-
self.use_gpu = use_gpu # Use GPU if True, otherwise use CPU
|
15
|
-
|
16
|
-
def is_gpu_available(self):
|
17
|
-
"""Check whether the system supports Gpus"""
|
18
|
-
if self._gpu_available is None:
|
19
|
-
try:
|
20
|
-
# Check whether CUDA is available
|
21
|
-
subprocess.check_output(['nvcc', '--version'], stderr=subprocess.STDOUT)
|
22
|
-
self._gpu_available = True
|
23
|
-
except (subprocess.CalledProcessError, FileNotFoundError):
|
24
|
-
self._gpu_available = False
|
25
|
-
return self._gpu_available
|
26
|
-
|
27
|
-
def is_conda(self):
|
28
|
-
""" Check whether the current environment is a Conda environment """
|
29
|
-
return os.path.exists(os.path.join(sys.prefix, 'conda-meta'))
|
30
|
-
|
31
|
-
def install_package(self, package_name):
|
32
|
-
""" Select pip or conda or other installation specified package according to the environment """
|
33
|
-
installer = 'conda' if self.is_conda() else 'pip'
|
34
|
-
if installer == 'conda':
|
35
|
-
print(f"Detected Conda environment. Installing {package_name} with conda.")
|
36
|
-
try:
|
37
|
-
subprocess.check_call(['pip', 'install', package_name])
|
38
|
-
print(f"Successfully installed {package_name} with conda.")
|
39
|
-
except subprocess.CalledProcessError as e:
|
40
|
-
print(f"Failed to install {package_name} with conda: {e}")
|
41
|
-
elif installer == 'pip':
|
42
|
-
print(f"Using pip to install {package_name}.")
|
43
|
-
try:
|
44
|
-
# Invoke the pip installation package using the Python interpreter
|
45
|
-
subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
|
46
|
-
print(f"Successfully installed {package_name} with pip.")
|
47
|
-
except subprocess.CalledProcessError as e:
|
48
|
-
print(f"Failed to install {package_name} with pip: {e}")
|
49
|
-
else:
|
50
|
-
print("Unable to determine the package manager. Please install the package manually.")
|
51
|
-
|
52
|
-
def check_and_install(self):
|
53
|
-
"""Check and install appropriate packages based on user's choice and GPU availability"""
|
54
|
-
if self._setup_completed:
|
55
|
-
return
|
56
|
-
|
57
|
-
# Override GPU detection with the use_gpu parameter
|
58
|
-
if self.use_gpu:
|
59
|
-
pkg_name = 'paddlepaddle-gpu' if self.is_gpu_available() else 'paddlepaddle'
|
60
|
-
else:
|
61
|
-
pkg_name = 'paddlepaddle'
|
62
|
-
|
63
|
-
try:
|
64
|
-
_ = importlib.metadata.version(pkg_name.split()[0]) # Check if paddlepaddle is installed
|
65
|
-
# print(f"{pkg_name} version {1} is already installed.")
|
66
|
-
except importlib.metadata.PackageNotFoundError:
|
67
|
-
print(f"{pkg_name} is not installed. Installing now...")
|
68
|
-
self.install_package(pkg_name)
|
69
|
-
|
70
|
-
self._setup_completed = True
|
71
|
-
|
72
|
-
|
73
|
-
# Create an instance of EnvironmentSetup with the desired GPU option and call check_and_install when the program initializes
|
74
|
-
env_setup = EnvironmentSetup() # Set this flag as needed
|
75
|
-
|
76
|
-
|
77
|
-
def setup_environment(use_gpu: bool = False):
|
78
|
-
"""Used to set the environment when the program starts"""
|
79
|
-
env_setup.use_gpu = use_gpu
|
1
|
+
import subprocess
|
2
|
+
import sys
|
3
|
+
import os
|
4
|
+
import importlib.metadata
|
5
|
+
|
6
|
+
class EnvironmentSetup:
|
7
|
+
""" Responsible for setting up the correct environment,
|
8
|
+
including checking GPU support and installing the necessary packages
|
9
|
+
"""
|
10
|
+
|
11
|
+
def __init__(self, use_gpu: bool = False):
|
12
|
+
self._gpu_available = None
|
13
|
+
self._setup_completed = False
|
14
|
+
self.use_gpu = use_gpu # Use GPU if True, otherwise use CPU
|
15
|
+
|
16
|
+
def is_gpu_available(self):
|
17
|
+
"""Check whether the system supports Gpus"""
|
18
|
+
if self._gpu_available is None:
|
19
|
+
try:
|
20
|
+
# Check whether CUDA is available
|
21
|
+
subprocess.check_output(['nvcc', '--version'], stderr=subprocess.STDOUT)
|
22
|
+
self._gpu_available = True
|
23
|
+
except (subprocess.CalledProcessError, FileNotFoundError):
|
24
|
+
self._gpu_available = False
|
25
|
+
return self._gpu_available
|
26
|
+
|
27
|
+
def is_conda(self):
|
28
|
+
""" Check whether the current environment is a Conda environment """
|
29
|
+
return os.path.exists(os.path.join(sys.prefix, 'conda-meta'))
|
30
|
+
|
31
|
+
def install_package(self, package_name):
|
32
|
+
""" Select pip or conda or other installation specified package according to the environment """
|
33
|
+
installer = 'conda' if self.is_conda() else 'pip'
|
34
|
+
if installer == 'conda':
|
35
|
+
print(f"Detected Conda environment. Installing {package_name} with conda.")
|
36
|
+
try:
|
37
|
+
subprocess.check_call(['pip', 'install', package_name])
|
38
|
+
print(f"Successfully installed {package_name} with conda.")
|
39
|
+
except subprocess.CalledProcessError as e:
|
40
|
+
print(f"Failed to install {package_name} with conda: {e}")
|
41
|
+
elif installer == 'pip':
|
42
|
+
print(f"Using pip to install {package_name}.")
|
43
|
+
try:
|
44
|
+
# Invoke the pip installation package using the Python interpreter
|
45
|
+
subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
|
46
|
+
print(f"Successfully installed {package_name} with pip.")
|
47
|
+
except subprocess.CalledProcessError as e:
|
48
|
+
print(f"Failed to install {package_name} with pip: {e}")
|
49
|
+
else:
|
50
|
+
print("Unable to determine the package manager. Please install the package manually.")
|
51
|
+
|
52
|
+
def check_and_install(self):
|
53
|
+
"""Check and install appropriate packages based on user's choice and GPU availability"""
|
54
|
+
if self._setup_completed:
|
55
|
+
return
|
56
|
+
|
57
|
+
# Override GPU detection with the use_gpu parameter
|
58
|
+
if self.use_gpu:
|
59
|
+
pkg_name = 'paddlepaddle-gpu' if self.is_gpu_available() else 'paddlepaddle'
|
60
|
+
else:
|
61
|
+
pkg_name = 'paddlepaddle'
|
62
|
+
|
63
|
+
try:
|
64
|
+
_ = importlib.metadata.version(pkg_name.split()[0]) # Check if paddlepaddle is installed
|
65
|
+
# print(f"{pkg_name} version {1} is already installed.")
|
66
|
+
except importlib.metadata.PackageNotFoundError:
|
67
|
+
print(f"{pkg_name} is not installed. Installing now...")
|
68
|
+
self.install_package(pkg_name)
|
69
|
+
|
70
|
+
self._setup_completed = True
|
71
|
+
|
72
|
+
|
73
|
+
# Create an instance of EnvironmentSetup with the desired GPU option and call check_and_install when the program initializes
|
74
|
+
env_setup = EnvironmentSetup() # Set this flag as needed
|
75
|
+
|
76
|
+
|
77
|
+
def setup_environment(use_gpu: bool = False):
|
78
|
+
"""Used to set the environment when the program starts"""
|
79
|
+
env_setup.use_gpu = use_gpu
|
80
80
|
env_setup.check_and_install()
|