pydatamax 0.1.14__py3-none-any.whl → 0.1.15.post2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. datamax/__init__.py +1 -1
  2. datamax/loader/core.py +118 -118
  3. datamax/loader/minio_handler.py +171 -171
  4. datamax/loader/oss_handler.py +191 -191
  5. datamax/parser/__init__.py +2 -4
  6. datamax/parser/base.py +76 -76
  7. datamax/parser/core.py +406 -288
  8. datamax/parser/csv_parser.py +31 -10
  9. datamax/parser/doc_parser.py +466 -10
  10. datamax/parser/docx_parser.py +449 -11
  11. datamax/parser/epub_parser.py +41 -41
  12. datamax/parser/html_parser.py +37 -37
  13. datamax/parser/image_parser.py +34 -34
  14. datamax/parser/json_parser.py +32 -10
  15. datamax/parser/md_parser.py +72 -72
  16. datamax/parser/pdf_parser.py +101 -101
  17. datamax/parser/ppt_parser.py +70 -20
  18. datamax/parser/pptx_parser.py +45 -45
  19. datamax/parser/txt_parser.py +45 -45
  20. datamax/parser/xls_parser.py +26 -26
  21. datamax/parser/xlsx_parser.py +212 -215
  22. datamax/utils/__init__.py +23 -2
  23. datamax/utils/constants.py +58 -58
  24. datamax/utils/data_cleaner.py +275 -237
  25. datamax/utils/env_setup.py +79 -79
  26. datamax/utils/gotocr_pdf.py +265 -265
  27. datamax/utils/mineru_operator.py +62 -62
  28. datamax/utils/paddleocr_pdf_operator.py +90 -90
  29. datamax/utils/ppt_extract.py +140 -140
  30. datamax/utils/qa_generator.py +369 -376
  31. datamax/utils/tokenizer.py +21 -21
  32. datamax/utils/uno_handler.py +426 -0
  33. {pydatamax-0.1.14.dist-info → pydatamax-0.1.15.post2.dist-info}/METADATA +117 -5
  34. pydatamax-0.1.15.post2.dist-info/RECORD +38 -0
  35. {pydatamax-0.1.14.dist-info → pydatamax-0.1.15.post2.dist-info}/licenses/LICENSE +21 -21
  36. {pydatamax-0.1.14.dist-info → pydatamax-0.1.15.post2.dist-info}/top_level.txt +0 -1
  37. pydatamax-0.1.14.dist-info/RECORD +0 -39
  38. tests/__init__.py +0 -0
  39. tests/test_basic.py +0 -20
  40. {pydatamax-0.1.14.dist-info → pydatamax-0.1.15.post2.dist-info}/WHEEL +0 -0
@@ -1,80 +1,80 @@
1
- import subprocess
2
- import sys
3
- import os
4
- import importlib.metadata
5
-
6
- class EnvironmentSetup:
7
- """ Responsible for setting up the correct environment,
8
- including checking GPU support and installing the necessary packages
9
- """
10
-
11
- def __init__(self, use_gpu: bool = False):
12
- self._gpu_available = None
13
- self._setup_completed = False
14
- self.use_gpu = use_gpu # Use GPU if True, otherwise use CPU
15
-
16
- def is_gpu_available(self):
17
- """Check whether the system supports Gpus"""
18
- if self._gpu_available is None:
19
- try:
20
- # Check whether CUDA is available
21
- subprocess.check_output(['nvcc', '--version'], stderr=subprocess.STDOUT)
22
- self._gpu_available = True
23
- except (subprocess.CalledProcessError, FileNotFoundError):
24
- self._gpu_available = False
25
- return self._gpu_available
26
-
27
- def is_conda(self):
28
- """ Check whether the current environment is a Conda environment """
29
- return os.path.exists(os.path.join(sys.prefix, 'conda-meta'))
30
-
31
- def install_package(self, package_name):
32
- """ Select pip or conda or other installation specified package according to the environment """
33
- installer = 'conda' if self.is_conda() else 'pip'
34
- if installer == 'conda':
35
- print(f"Detected Conda environment. Installing {package_name} with conda.")
36
- try:
37
- subprocess.check_call(['pip', 'install', package_name])
38
- print(f"Successfully installed {package_name} with conda.")
39
- except subprocess.CalledProcessError as e:
40
- print(f"Failed to install {package_name} with conda: {e}")
41
- elif installer == 'pip':
42
- print(f"Using pip to install {package_name}.")
43
- try:
44
- # Invoke the pip installation package using the Python interpreter
45
- subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
46
- print(f"Successfully installed {package_name} with pip.")
47
- except subprocess.CalledProcessError as e:
48
- print(f"Failed to install {package_name} with pip: {e}")
49
- else:
50
- print("Unable to determine the package manager. Please install the package manually.")
51
-
52
- def check_and_install(self):
53
- """Check and install appropriate packages based on user's choice and GPU availability"""
54
- if self._setup_completed:
55
- return
56
-
57
- # Override GPU detection with the use_gpu parameter
58
- if self.use_gpu:
59
- pkg_name = 'paddlepaddle-gpu' if self.is_gpu_available() else 'paddlepaddle'
60
- else:
61
- pkg_name = 'paddlepaddle'
62
-
63
- try:
64
- _ = importlib.metadata.version(pkg_name.split()[0]) # Check if paddlepaddle is installed
65
- # print(f"{pkg_name} version {1} is already installed.")
66
- except importlib.metadata.PackageNotFoundError:
67
- print(f"{pkg_name} is not installed. Installing now...")
68
- self.install_package(pkg_name)
69
-
70
- self._setup_completed = True
71
-
72
-
73
- # Create an instance of EnvironmentSetup with the desired GPU option and call check_and_install when the program initializes
74
- env_setup = EnvironmentSetup() # Set this flag as needed
75
-
76
-
77
- def setup_environment(use_gpu: bool = False):
78
- """Used to set the environment when the program starts"""
79
- env_setup.use_gpu = use_gpu
1
+ import subprocess
2
+ import sys
3
+ import os
4
+ import importlib.metadata
5
+
6
+ class EnvironmentSetup:
7
+ """ Responsible for setting up the correct environment,
8
+ including checking GPU support and installing the necessary packages
9
+ """
10
+
11
+ def __init__(self, use_gpu: bool = False):
12
+ self._gpu_available = None
13
+ self._setup_completed = False
14
+ self.use_gpu = use_gpu # Use GPU if True, otherwise use CPU
15
+
16
+ def is_gpu_available(self):
17
+ """Check whether the system supports Gpus"""
18
+ if self._gpu_available is None:
19
+ try:
20
+ # Check whether CUDA is available
21
+ subprocess.check_output(['nvcc', '--version'], stderr=subprocess.STDOUT)
22
+ self._gpu_available = True
23
+ except (subprocess.CalledProcessError, FileNotFoundError):
24
+ self._gpu_available = False
25
+ return self._gpu_available
26
+
27
+ def is_conda(self):
28
+ """ Check whether the current environment is a Conda environment """
29
+ return os.path.exists(os.path.join(sys.prefix, 'conda-meta'))
30
+
31
+ def install_package(self, package_name):
32
+ """ Select pip or conda or other installation specified package according to the environment """
33
+ installer = 'conda' if self.is_conda() else 'pip'
34
+ if installer == 'conda':
35
+ print(f"Detected Conda environment. Installing {package_name} with conda.")
36
+ try:
37
+ subprocess.check_call(['pip', 'install', package_name])
38
+ print(f"Successfully installed {package_name} with conda.")
39
+ except subprocess.CalledProcessError as e:
40
+ print(f"Failed to install {package_name} with conda: {e}")
41
+ elif installer == 'pip':
42
+ print(f"Using pip to install {package_name}.")
43
+ try:
44
+ # Invoke the pip installation package using the Python interpreter
45
+ subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
46
+ print(f"Successfully installed {package_name} with pip.")
47
+ except subprocess.CalledProcessError as e:
48
+ print(f"Failed to install {package_name} with pip: {e}")
49
+ else:
50
+ print("Unable to determine the package manager. Please install the package manually.")
51
+
52
+ def check_and_install(self):
53
+ """Check and install appropriate packages based on user's choice and GPU availability"""
54
+ if self._setup_completed:
55
+ return
56
+
57
+ # Override GPU detection with the use_gpu parameter
58
+ if self.use_gpu:
59
+ pkg_name = 'paddlepaddle-gpu' if self.is_gpu_available() else 'paddlepaddle'
60
+ else:
61
+ pkg_name = 'paddlepaddle'
62
+
63
+ try:
64
+ _ = importlib.metadata.version(pkg_name.split()[0]) # Check if paddlepaddle is installed
65
+ # print(f"{pkg_name} version {1} is already installed.")
66
+ except importlib.metadata.PackageNotFoundError:
67
+ print(f"{pkg_name} is not installed. Installing now...")
68
+ self.install_package(pkg_name)
69
+
70
+ self._setup_completed = True
71
+
72
+
73
+ # Create an instance of EnvironmentSetup with the desired GPU option and call check_and_install when the program initializes
74
+ env_setup = EnvironmentSetup() # Set this flag as needed
75
+
76
+
77
+ def setup_environment(use_gpu: bool = False):
78
+ """Used to set the environment when the program starts"""
79
+ env_setup.use_gpu = use_gpu
80
80
  env_setup.check_and_install()