PyPI - pydatamax - Versions diffs - 0.1.16__py3-none-any.whl → 0.1.16.post2__py3-none-any.whl - Mend

pydatamax 0.1.16py3-none-any.whl → 0.1.16.post2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

datamax/loader/core.py +67 -42
datamax/loader/minio_handler.py +38 -19
datamax/parser/__init__.py +2 -1
datamax/parser/base.py +46 -22
datamax/parser/core.py +215 -126
datamax/parser/csv_parser.py +25 -5
datamax/parser/doc_parser.py +230 -141
datamax/parser/docx_parser.py +275 -186
datamax/parser/epub_parser.py +49 -13
datamax/parser/html_parser.py +36 -16
datamax/parser/image_parser.py +52 -14
datamax/parser/json_parser.py +26 -5
datamax/parser/md_parser.py +40 -21
datamax/parser/pdf_parser.py +69 -29
datamax/parser/ppt_parser.py +41 -9
datamax/parser/pptx_parser.py +49 -21
datamax/parser/txt_parser.py +45 -14
datamax/parser/xls_parser.py +34 -6
datamax/parser/xlsx_parser.py +58 -51
datamax/utils/__init__.py +2 -1
datamax/utils/data_cleaner.py +36 -22
datamax/utils/env_setup.py +25 -18
datamax/utils/gotocr_pdf.py +13 -13
datamax/utils/lifecycle_types.py +18 -0
datamax/utils/mineru_operator.py +17 -15
datamax/utils/paddleocr_pdf_operator.py +34 -19
datamax/utils/ppt_extract.py +34 -11
datamax/utils/qa_generator.py +332 -44
datamax/utils/tokenizer.py +10 -9
datamax/utils/uno_handler.py +91 -68
{pydatamax-0.1.16.dist-info → pydatamax-0.1.16.post2.dist-info}/METADATA +54 -2
pydatamax-0.1.16.post2.dist-info/RECORD +39 -0
pydatamax-0.1.16.dist-info/RECORD +0 -38
{pydatamax-0.1.16.dist-info → pydatamax-0.1.16.post2.dist-info}/WHEEL +0 -0
{pydatamax-0.1.16.dist-info → pydatamax-0.1.16.post2.dist-info}/licenses/LICENSE +0 -0
{pydatamax-0.1.16.dist-info → pydatamax-0.1.16.post2.dist-info}/top_level.txt +0 -0

datamax/parser/pptx_parser.py CHANGED Viewed

@@ -1,11 +1,13 @@
-import os
 from typing import Union
+from loguru import logger
 from pptx import Presentation
-from datamax.parser.base import BaseLife
-from datamax.parser.base import MarkdownOutputVo
+from datamax.parser.base import BaseLife, MarkdownOutputVo
+from datamax.utils.lifecycle_types import LifeType
-class PPtxParser(BaseLife):
+class PptxParser(BaseLife):
     def __init__(self, file_path: Union[str, list]):
         super().__init__()
         self.file_path = file_path
@@ -13,33 +15,59 @@ class PPtxParser(BaseLife):
     @staticmethod
     def read_ppt_file(file_path: str):
         try:
-            content = ''
+            content = ""
             prs = Presentation(file_path)
             for slide in prs.slides:
                 for shape in slide.shapes:
                     if shape.has_text_frame:
-                        content += shape.text + '\n'
-                    # if shape.shape_type == 13:
-                    #     if not os.path.exists("extracted_images"):
-                    #         os.makedirs("extracted_images")
-                    #     image = shape.image
-                    #     image_filename = f'extracted_images/image_{shape.shape_id}.{image.ext}'
-                    #     with open(image_filename, 'wb') as img_file:
-                    #         img_file.write(image.blob)
-                    #     content += ('[' + image_filename + ']')
+                        content += shape.text + "\n"
             return content
         except Exception:
             raise
     def parse(self, file_path: str) -> MarkdownOutputVo:
+        # —— 生命周期：开始处理 PPTX —— #
+        lc_start = self.generate_lifecycle(
+            source_file=file_path,
+            domain="Technology",
+            usage_purpose="Documentation",
+            life_type=LifeType.DATA_PROCESSING,
+        )
+        logger.debug("⚙️ DATA_PROCESSING 生命周期已生成")
         try:
-            title = os.path.splitext(os.path.basename(file_path))[0]
+            extension = self.get_file_extension(file_path)
             content = self.read_ppt_file(file_path=file_path)
             mk_content = content
-            lifecycle = self.generate_lifecycle(source_file=file_path, domain="Technology",
-                                                usage_purpose="Documentation", life_type="LLM_ORIGIN")
-            output_vo = MarkdownOutputVo(title, mk_content)
-            output_vo.add_lifecycle(lifecycle)
+            # —— 生命周期：处理完成 —— #
+            lc_end = self.generate_lifecycle(
+                source_file=file_path,
+                domain="Technology",
+                usage_purpose="Documentation",
+                life_type=LifeType.DATA_PROCESSED,
+            )
+            logger.debug("⚙️ DATA_PROCESSED 生命周期已生成")
+            output_vo = MarkdownOutputVo(extension, mk_content)
+            output_vo.add_lifecycle(lc_start)
+            output_vo.add_lifecycle(lc_end)
             return output_vo.to_dict()
-        except Exception:
-            raise
+        except Exception as e:
+            # —— 生命周期：处理失败 —— #
+            lc_fail = self.generate_lifecycle(
+                source_file=file_path,
+                domain="Technology",
+                usage_purpose="Documentation",
+                life_type=LifeType.DATA_PROCESS_FAILED,
+            )
+            logger.debug("⚙️ DATA_PROCESS_FAILED 生命周期已生成")
+            raise Exception(
+                {
+                    "error": str(e),
+                    "file_path": file_path,
+                    "lifecycle": [lc_fail.to_dict()],
+                }
+            )

datamax/parser/txt_parser.py CHANGED Viewed

@@ -1,8 +1,10 @@
-import chardet
 from typing import Union
-from datamax.parser.base import BaseLife
-from datamax.parser.base import MarkdownOutputVo
-import os
+import chardet
+from datamax.parser.base import BaseLife, MarkdownOutputVo
+from datamax.utils.lifecycle_types import LifeType
 class TxtParser(BaseLife):
     def __init__(self, file_path: Union[str, list]):
@@ -12,9 +14,9 @@ class TxtParser(BaseLife):
     @staticmethod
     def detect_encoding(file_path: str):
         try:
-            with open(file_path, 'rb') as f:
+            with open(file_path, "rb") as f:
                 result = chardet.detect(f.read())
-                return result['encoding']
+                return result["encoding"]
         except Exception as e:
             raise e
@@ -27,20 +29,49 @@ class TxtParser(BaseLife):
         """
         try:
             encoding = TxtParser.detect_encoding(file_path)
-            with open(file_path, 'r', encoding=encoding) as file:
+            with open(file_path, "r", encoding=encoding) as file:
                 return file.read()
         except Exception as e:
             raise e
     def parse(self, file_path: str) -> MarkdownOutputVo:
         try:
-            title = os.path.splitext(os.path.basename(file_path))[0]
-            content = self.read_txt_file(file_path=file_path)  # 真实数据是从load加载
+            extension = self.get_file_extension(file_path)
+            # 1) 开始处理
+            lc_start = self.generate_lifecycle(
+                source_file=file_path,
+                domain="Technology",
+                usage_purpose="Documentation",
+                life_type=LifeType.DATA_PROCESSING,
+            )
+            # 2) 读取文件内容
+            content = self.read_txt_file(file_path=file_path)
             mk_content = content
-            lifecycle = self.generate_lifecycle(source_file=file_path, domain="Technology",
-                                                usage_purpose="Documentation", life_type="LLM_ORIGIN")
-            output_vo = MarkdownOutputVo(title, mk_content)
-            output_vo.add_lifecycle(lifecycle)
+            # 3) 构造输出对象并加上开始生命周期
+            output_vo = MarkdownOutputVo(extension, mk_content)
+            output_vo.add_lifecycle(lc_start)
+            # 4) 处理完成
+            lc_end = self.generate_lifecycle(
+                source_file=file_path,
+                domain="Technology",
+                usage_purpose="Documentation",
+                life_type=LifeType.DATA_PROCESSED,
+            )
+            output_vo.add_lifecycle(lc_end)
             return output_vo.to_dict()
         except Exception as e:
-            raise e
+            # 5) 处理失败
+            lc_fail = self.generate_lifecycle(
+                source_file=file_path,
+                domain="Technology",
+                usage_purpose="Documentation",
+                life_type=LifeType.DATA_PROCESS_FAILED,
+            )
+            # （可选）如果希望在失败时也返回 VO，可在这里构造空 content 的 VO 并加入 lc_fail
+            raise

datamax/parser/xls_parser.py CHANGED Viewed

@@ -1,8 +1,10 @@
-from datamax.parser.base import MarkdownOutputVo
-from datamax.parser.base import BaseLife
-import pandas as pd
 import warnings
+import pandas as pd
+from datamax.parser.base import BaseLife, MarkdownOutputVo
+from datamax.utils.lifecycle_types import LifeType
 warnings.filterwarnings("ignore")
@@ -15,12 +17,38 @@ class XlsParser(BaseLife):
     def parse(self, file_path: str) -> MarkdownOutputVo:
         try:
+            # 🏷️ 解析开始
+            lc_start = self.generate_lifecycle(
+                source_file=file_path,
+                domain="Technology",
+                usage_purpose="Documentation",
+                life_type=LifeType.DATA_PROCESSING,
+            )
+            # 📊 读取Excel并生成Markdown
             df = pd.read_excel(file_path)
             mk_content = df.to_markdown(index=False)
-            lifecycle = self.generate_lifecycle(source_file=file_path, domain="Technology",
-                                                usage_purpose="Documentation", life_type="LLM_ORIGIN")
+            # 🏷️ 解析完成
+            lc_end = self.generate_lifecycle(
+                source_file=file_path,
+                domain="Technology",
+                usage_purpose="Documentation",
+                life_type=LifeType.DATA_PROCESSED,
+            )
             output_vo = MarkdownOutputVo(self.get_file_extension(file_path), mk_content)
-            output_vo.add_lifecycle(lifecycle)
+            output_vo.add_lifecycle(lc_start)
+            output_vo.add_lifecycle(lc_end)
             return output_vo.to_dict()
         except Exception as e:
+            # ❌ 解析失败
+            lc_fail = self.generate_lifecycle(
+                source_file=file_path,
+                domain="Technology",
+                usage_purpose="Documentation",
+                life_type=LifeType.DATA_PROCESS_FAILED,
+            )
+            # 此处不返回空VO，直接抛出，框架可捕获并上报
             raise e

datamax/parser/xlsx_parser.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from loguru import logger
 import multiprocessing
 import os
 import time
@@ -6,8 +5,10 @@ import warnings
 from multiprocessing import Queue
 import pandas as pd
+from loguru import logger
 from datamax.parser.base import BaseLife, MarkdownOutputVo
+from datamax.utils.lifecycle_types import LifeType
 warnings.filterwarnings("ignore")
@@ -15,11 +16,10 @@ warnings.filterwarnings("ignore")
 class XlsxParser(BaseLife):
     """XLSX解析器 - 使用pandas读取并转换为markdown，支持多进程处理"""
-    def __init__(self, file_path, timeout):
+    def __init__(self, file_path):
         super().__init__()
         self.file_path = file_path
-        self.timeout = timeout
-        logger.info(f"🚀 XlsxParser初始化完成 - 文件路径: {file_path}, 超时: {timeout}s")
+        logger.info(f"🚀 XlsxParser初始化完成 - 文件路径: {file_path}")
     def _parse_with_pandas(self, file_path: str) -> str:
         """使用pandas读取Excel并转换为markdown"""
@@ -85,7 +85,9 @@ class XlsxParser(BaseLife):
                     markdown_content = "*工作表为空*"
                     logger.warning("⚠️ 工作表为空")
-            logger.info(f"🎊 pandas转换完成，markdown内容长度: {len(markdown_content)} 字符")
+            logger.info(
+                f"🎊 pandas转换完成，markdown内容长度: {len(markdown_content)} 字符"
+            )
             logger.debug(f"👀 前200字符预览: {markdown_content[:200]}...")
             return markdown_content
@@ -107,6 +109,15 @@ class XlsxParser(BaseLife):
         """解析Excel文件的核心方法"""
         logger.info(f"🎬 开始解析Excel文件: {file_path}")
+        # —— 生命周期：开始处理 —— #
+        lc_start = self.generate_lifecycle(
+            source_file=file_path,
+            domain="Technology",
+            usage_purpose="Documentation",
+            life_type=LifeType.DATA_PROCESSING,
+        )
+        logger.debug("⚙️ DATA_PROCESSING 生命周期已生成")
         try:
             # 使用pandas解析Excel
             logger.info("🐼 使用pandas模式解析Excel")
@@ -119,19 +130,20 @@ class XlsxParser(BaseLife):
             logger.info(f"🎊 文件内容解析完成，最终内容长度: {len(mk_content)} 字符")
-            # 生成lifecycle信息
-            lifecycle = self.generate_lifecycle(
+            # —— 生命周期：处理完成 —— #
+            lc_end = self.generate_lifecycle(
                 source_file=file_path,
                 domain="Technology",
                 usage_purpose="Documentation",
-                life_type="LLM_ORIGIN",
+                life_type=LifeType.DATA_PROCESSED,
             )
-            logger.debug("⚙️ 生成lifecycle信息完成")
+            logger.debug("⚙️ DATA_PROCESSED 生命周期已生成")
-            # 创建输出对象
-            title = os.path.splitext(os.path.basename(file_path))[0]
-            output_vo = MarkdownOutputVo(title, mk_content)
-            output_vo.add_lifecycle(lifecycle)
+            # 创建输出对象并添加两个生命周期
+            extension = self.get_file_extension(file_path)
+            output_vo = MarkdownOutputVo(extension, mk_content)
+            output_vo.add_lifecycle(lc_start)
+            output_vo.add_lifecycle(lc_end)
             result = output_vo.to_dict()
             result_queue.put(result)
@@ -142,15 +154,46 @@ class XlsxParser(BaseLife):
             return result
         except Exception as e:
+            # —— 生命周期：处理失败 —— #
+            try:
+                lc_fail = self.generate_lifecycle(
+                    source_file=file_path,
+                    domain="Technology",
+                    usage_purpose="Documentation",
+                    life_type=LifeType.DATA_PROCESS_FAILED,
+                )
+                logger.debug("⚙️ DATA_PROCESS_FAILED 生命周期已生成")
+                # 如果需要，也可以把它加到 error_result 里：
+                # error_result = {"error": str(e), "file_path": file_path, "lifecycle":[lc_fail.to_dict()]}
+            except Exception:
+                pass
+            # —— 生命周期：处理失败 —— #
+            try:
+                lc_fail = self.generate_lifecycle(
+                    source_file=file_path,
+                    domain="Technology",
+                    usage_purpose="Documentation",
+                    life_type=LifeType.DATA_PROCESS_FAILED,
+                )
+                logger.debug("⚙️ DATA_PROCESS_FAILED 生命周期已生成")
+            except Exception:
+                pass
             logger.error(f"💀 解析Excel文件失败: {file_path}, 错误: {str(e)}")
             # 将错误也放入队列
-            error_result = {"error": str(e), "file_path": file_path}
+            error_result = {
+                "error": str(e),
+                "file_path": file_path,
+                # 额外把失败的 lifecycle 也一起返回，测试中可选校验
+                "lifecycle": [lc_fail.to_dict()] if "lc_fail" in locals() else [],
+            }
             result_queue.put(error_result)
             raise
     def parse(self, file_path: str) -> dict:
         """解析Excel文件 - 支持多进程和超时控制"""
-        logger.info(f"🚀 启动Excel解析进程 - 文件: {file_path}, 超时: {self.timeout}s")
+        logger.info(f"🚀 启动Excel解析进程 - 文件: {file_path}")
         try:
             # 验证文件存在
@@ -169,42 +212,6 @@ class XlsxParser(BaseLife):
             process.start()
             logger.debug(f"⚡ 启动子进程，PID: {process.pid}")
-            start_time = time.time()
-            # 等待解析完成或超时
-            while time.time() - start_time < self.timeout:
-                elapsed_time = int(time.time() - start_time)
-                logger.debug(f"⏱️ 等待解析完成... {elapsed_time}s")
-                if not process.is_alive():
-                    logger.debug("✅ 子进程已完成")
-                    break
-                if not result_queue.empty():
-                    result = result_queue.get()
-                    process.join()  # 等待进程正常结束
-                    # 检查是否是错误结果
-                    if "error" in result:
-                        logger.error(f"💥 子进程返回错误: {result['error']}")
-                        raise Exception(result["error"])
-                    logger.info(f"🎉 Excel解析成功完成，耗时: {elapsed_time}s")
-                    return result
-                time.sleep(1)
-            else:
-                # 超时处理
-                logger.error(f"⏰ 解析超时 ({self.timeout}s)，终止进程")
-                process.terminate()
-                process.join(timeout=5)  # 给进程5秒时间优雅退出
-                if process.is_alive():
-                    logger.error("💀 强制杀死进程")
-                    process.kill()
-                raise TimeoutError(f"Excel解析超时: {file_path}")
         except Exception as e:
             logger.error(
                 f"💀 Excel解析失败: {file_path}, 错误类型: {type(e).__name__}, 错误信息: {str(e)}"

datamax/utils/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ from datamax.utils.data_cleaner import (
 )
 from datamax.utils.env_setup import setup_environment
-# 条件导入UNO处理器
+# Conditionally import the UNO processor
 try:
     from datamax.utils.uno_handler import (
         HAS_UNO,
@@ -23,6 +23,7 @@ except ImportError:
     cleanup_uno_manager = None
     uno_manager_context = None
 def clean_original_text(text):
     """
     Clean the original text.

datamax/utils/data_cleaner.py CHANGED Viewed

@@ -31,15 +31,15 @@ class AbnormalCleaner:
         """
         Extract reference entries and assign to self.parsed_data
         (Original text will be replaced with extracted references, each item on a separate line)
         Returns:
             str: Extracted reference text (same as self.parsed_data)
         """
         patterns = [
-            r'([A-Z][a-z]+(?:, [A-Z](?:\.[a-z]*)?)+(?: et al\.)? $\d{4}$[^\n]+)',  # APA format
-            r'($$\d+$$[^\n]+)',                                                      # Numbered references like [1]
-            r'(DOI:\s?\S+|https?://\S+)',                                            # DOI/URL
-            r'([A-Z][a-z]+, [A-Z]\.?,? & [A-Z][a-z]+, [A-Z]\. \d{4}[^\n]+)'           # Multi-author APA
+            r"([A-Z][a-z]+(?:, [A-Z](?:\.[a-z]*)?)+(?: et al\.)? $\d{4}$[^\n]+)",  # APA format
+            r"($$\d+$$[^\n]+)",  # Numbered references like [1]
+            r"(DOI:\s?\S+|https?://\S+)",  # DOI/URL
+            r"([A-Z][a-z]+, [A-Z]\.?,? & [A-Z][a-z]+, [A-Z]\. \d{4}[^\n]+)",  # Multi-author APA
         ]
         references = []
         for pattern in patterns:
@@ -47,9 +47,11 @@ class AbnormalCleaner:
                 references.extend(re.findall(pattern, self.parsed_data))
             except re.error as e:
                 print(f"Regex error {pattern}: {e}")
         # Assign extraction results to parsed_data (each item on a separate line)
-        self.parsed_data = "\n".join(list(set(references)))  # Deduplicate and merge into string
+        self.parsed_data = "\n".join(
+            list(set(references))
+        )  # Deduplicate and merge into string
         return self.parsed_data
     # Exception cleaning class
@@ -164,19 +166,19 @@ class TextFilter:
         """Filter by word repetition rate"""
         if not isinstance(self.parsed_data, str):
             return False
         text = str(self.parsed_data)
-        bi_grams = [text[i:i+2] for i in range(0, len(text)-1, 2)]
+        bi_grams = [text[i : i + 2] for i in range(0, len(text) - 1, 2)]
         word_count = len(bi_grams)
         if word_count == 0:
             print("No words found.")
             return False
         word_freq = Counter(bi_grams)
         most_common_word, most_common_count = word_freq.most_common(1)[0]
         repetition_rate = most_common_count / word_count
         print(f"Word repetition rate: {repetition_rate}")
         return repetition_rate <= threshold
     def filter_by_char_count(self, min_chars=30, max_chars=500000):
@@ -227,22 +229,34 @@ class PrivacyDesensitization:
         # Customer service hotlines are not easy to match and are not considered private data
         self.parsed_data = re.sub(r"\d+-\d+-\d+", token, self.parsed_data)
         return self.parsed_data
     def replace_bank_id(self, token="COSCO_NUMBER"):
         # Match bank card numbers and replace
-        self.parsed_data = self.replace_bank_id(
-            self.parsed_data, token=token
-        )
+        BANK_ID_PATTERN = r"\b(?:(?:\d{4}[ -]?){4}\d{3}|(?:\d{4}[ -]?){3}\d{4}|(?:4\d{3}|5[1-5]\d{2}|6[045]\d{2})(?:[ -]?\d{4}){3}|3[47]\d{2}[ -]?\d{6}[ -]?\d{5})\b"
+        def luhn_check(card_number):
+            digits = [int(d) for d in card_number if d.isdigit()]
+            if len(digits) not in (13, 15, 16, 19):
+                return False
+            checksum = sum(digits[-1::-2])
+            checksum += sum(sum(divmod(d * 2, 10)) for d in digits[-2::-2])
+            return checksum % 10 == 0
+        bank_card_numbers = re.findall(BANK_ID_PATTERN, self.parsed_data)
+        for card_number in bank_card_numbers:
+            if luhn_check(card_number):
+                self.parsed_data = re.sub(card_number, token, self.parsed_data)
         return self.parsed_data
     def replace_phone_number(self, token="COSCO_NUMBER"):
         # Match phone numbers and replace
         self.parsed_data = jio.replace_phone_number(self.parsed_data, token)
         return self.parsed_data
     def replace_qq(self, token="COSCO_NUMBER"):
         # Match QQ numbers and replace
-        self.parsed_data = jio.replace_qq(self.parsed_data,token)
+        self.parsed_data = jio.replace_qq(self.parsed_data, token)
         return self.parsed_data
     def replace_id_card(self, token="COSCO_NUMBER"):
@@ -252,6 +266,10 @@ class PrivacyDesensitization:
     def replace_number(self):
         # Replace all types of numeric private data
+        # Bank card
+        self.parsed_data = self.replace_bank_id(
+            token="BANK_ID"
+        )  # nosec B106 - 这是数据脱敏标记，不是密码
         # Landline + mobile phone
         self.parsed_data = jio.replace_phone_number(self.parsed_data, "COSCO_NUMBER")
@@ -259,10 +277,6 @@ class PrivacyDesensitization:
         self.parsed_data = jio.replace_qq(self.parsed_data, "COSCO_NUMBER")
         # ID card
         self.parsed_data = jio.replace_id_card(self.parsed_data, "COSCO_NUMBER")
-        # Bank card
-        self.parsed_data = self.replace_bank_id(
-            self.parsed_data, token="COSCO_NUMBER"
-        )  # nosec B106 - 这是数据脱敏标记，不是密码
         return self.parsed_data

datamax/utils/env_setup.py CHANGED Viewed

@@ -1,11 +1,12 @@
+import importlib.metadata
+import os
 import subprocess
 import sys
-import os
-import importlib.metadata
 class EnvironmentSetup:
-    """ Responsible for setting up the correct environment,
-        including checking GPU support and installing the necessary packages
+    """Responsible for setting up the correct environment,
+    including checking GPU support and installing the necessary packages
     """
     def __init__(self, use_gpu: bool = False):
@@ -18,36 +19,40 @@ class EnvironmentSetup:
         if self._gpu_available is None:
             try:
                 # Check whether CUDA is available
-                subprocess.check_output(['nvcc', '--version'], stderr=subprocess.STDOUT)
+                subprocess.check_output(["nvcc", "--version"], stderr=subprocess.STDOUT)
                 self._gpu_available = True
             except (subprocess.CalledProcessError, FileNotFoundError):
                 self._gpu_available = False
         return self._gpu_available
     def is_conda(self):
-        """ Check whether the current environment is a Conda environment """
-        return os.path.exists(os.path.join(sys.prefix, 'conda-meta'))
+        """Check whether the current environment is a Conda environment"""
+        return os.path.exists(os.path.join(sys.prefix, "conda-meta"))
     def install_package(self, package_name):
-        """ Select pip or conda or other installation specified package according to the environment """
-        installer = 'conda' if self.is_conda() else 'pip'
-        if installer == 'conda':
+        """Select pip or conda or other installation specified package according to the environment"""
+        installer = "conda" if self.is_conda() else "pip"
+        if installer == "conda":
             print(f"Detected Conda environment. Installing {package_name} with conda.")
             try:
-                subprocess.check_call(['pip', 'install', package_name])
+                subprocess.check_call(["pip", "install", package_name])
                 print(f"Successfully installed {package_name} with conda.")
             except subprocess.CalledProcessError as e:
                 print(f"Failed to install {package_name} with conda: {e}")
-        elif installer == 'pip':
+        elif installer == "pip":
             print(f"Using pip to install {package_name}.")
             try:
                 # Invoke the pip installation package using the Python interpreter
-                subprocess.check_call([sys.executable, "-m", "pip", "install", package_name])
+                subprocess.check_call(
+                    [sys.executable, "-m", "pip", "install", package_name]
+                )
                 print(f"Successfully installed {package_name} with pip.")
             except subprocess.CalledProcessError as e:
                 print(f"Failed to install {package_name} with pip: {e}")
         else:
-            print("Unable to determine the package manager. Please install the package manually.")
+            print(
+                "Unable to determine the package manager. Please install the package manually."
+            )
     def check_and_install(self):
         """Check and install appropriate packages based on user's choice and GPU availability"""
@@ -56,12 +61,14 @@ class EnvironmentSetup:
         # Override GPU detection with the use_gpu parameter
         if self.use_gpu:
-            pkg_name = 'paddlepaddle-gpu' if self.is_gpu_available() else 'paddlepaddle'
+            pkg_name = "paddlepaddle-gpu" if self.is_gpu_available() else "paddlepaddle"
         else:
-            pkg_name = 'paddlepaddle'
+            pkg_name = "paddlepaddle"
         try:
-            _ = importlib.metadata.version(pkg_name.split()[0])  # Check if paddlepaddle is installed
+            _ = importlib.metadata.version(
+                pkg_name.split()[0]
+            )  # Check if paddlepaddle is installed
             # print(f"{pkg_name} version {1} is already installed.")
         except importlib.metadata.PackageNotFoundError:
             print(f"{pkg_name} is not installed. Installing now...")
@@ -77,4 +84,4 @@ env_setup = EnvironmentSetup()  # Set this flag as needed
 def setup_environment(use_gpu: bool = False):
     """Used to set the environment when the program starts"""
     env_setup.use_gpu = use_gpu
-    env_setup.check_and_install()
+    env_setup.check_and_install()

pydatamax 0.1.16__py3-none-any.whl → 0.1.16.post2__py3-none-any.whl

pydatamax 0.1.16py3-none-any.whl → 0.1.16.post2py3-none-any.whl