PyPI - pydatamax - Versions diffs - 0.1.16.post1__py3-none-any.whl → 0.1.16.post2__py3-none-any.whl - Mend

pydatamax 0.1.16.post1py3-none-any.whl → 0.1.16.post2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

datamax/loader/core.py +67 -42
datamax/loader/minio_handler.py +38 -19
datamax/parser/__init__.py +2 -1
datamax/parser/base.py +46 -22
datamax/parser/core.py +215 -126
datamax/parser/csv_parser.py +25 -5
datamax/parser/doc_parser.py +230 -141
datamax/parser/docx_parser.py +275 -186
datamax/parser/epub_parser.py +49 -13
datamax/parser/html_parser.py +36 -16
datamax/parser/image_parser.py +52 -14
datamax/parser/json_parser.py +26 -5
datamax/parser/md_parser.py +40 -21
datamax/parser/pdf_parser.py +69 -29
datamax/parser/ppt_parser.py +41 -9
datamax/parser/pptx_parser.py +49 -21
datamax/parser/txt_parser.py +45 -14
datamax/parser/xls_parser.py +34 -6
datamax/parser/xlsx_parser.py +58 -51
datamax/utils/__init__.py +2 -1
datamax/utils/data_cleaner.py +36 -22
datamax/utils/env_setup.py +25 -18
datamax/utils/gotocr_pdf.py +13 -13
datamax/utils/lifecycle_types.py +18 -0
datamax/utils/mineru_operator.py +17 -15
datamax/utils/paddleocr_pdf_operator.py +34 -19
datamax/utils/ppt_extract.py +34 -11
datamax/utils/qa_generator.py +332 -44
datamax/utils/tokenizer.py +10 -9
datamax/utils/uno_handler.py +84 -72
{pydatamax-0.1.16.post1.dist-info → pydatamax-0.1.16.post2.dist-info}/METADATA +54 -2
pydatamax-0.1.16.post2.dist-info/RECORD +39 -0
pydatamax-0.1.16.post1.dist-info/RECORD +0 -38
{pydatamax-0.1.16.post1.dist-info → pydatamax-0.1.16.post2.dist-info}/WHEEL +0 -0
{pydatamax-0.1.16.post1.dist-info → pydatamax-0.1.16.post2.dist-info}/licenses/LICENSE +0 -0
{pydatamax-0.1.16.post1.dist-info → pydatamax-0.1.16.post2.dist-info}/top_level.txt +0 -0

datamax/utils/uno_handler.py CHANGED Viewed

@@ -1,4 +1,3 @@
-from loguru import logger
 import os
 import subprocess
 import threading
@@ -7,7 +6,9 @@ from contextlib import contextmanager
 from pathlib import Path
 from typing import Optional
-# 延迟导入标志和锁
+from loguru import logger
+# delayed import of lock and flag
 _uno_imported = False
 _import_error = None
 _import_lock = threading.Lock()
@@ -16,23 +17,23 @@ _import_lock = threading.Lock()
 def _lazy_import_uno():
     """延迟导入 UNO 模块，避免与其他库冲突（线程安全）"""
     global _uno_imported, _import_error
-    # 快速检查，避免不必要的锁获取
+    # quick check,avoiding unnacessary acquisition of lock
     if _uno_imported:
         return True
     with _import_lock:
-        # 双重检查锁定模式
+        # double check lock mode
         if _uno_imported:
             return True
         try:
-            # 在这里导入所有 UNO 相关的模块
+            # import module relate to UNO
             global uno, PropertyValue, NoConnectException
             import uno
             from com.sun.star.beans import PropertyValue
             from com.sun.star.connection import NoConnectException
             _uno_imported = True
             logger.info("✅ UNO模块导入成功")
             return True
@@ -53,11 +54,12 @@ def ensure_uno_imported():
         )
-# 检查 UNO 是否可用（但不立即导入）
+# check if uno is available(not importing immediately）
 def check_uno_available():
     """检查 UNO 是否可用（不会真正导入）"""
     try:
         import importlib.util
         spec = importlib.util.find_spec("uno")
         return spec is not None
     except:
@@ -72,7 +74,7 @@ class UnoManager:
     UNO管理器，用于管理LibreOffice服务实例和文档转换
     单线程版本，适合稳定高效的文档处理
     """
     def __init__(self, host: str = "localhost", port: int = 2002, timeout: int = 30):
         """
         初始化UNO管理器
@@ -82,9 +84,9 @@ class UnoManager:
             port: LibreOffice服务端口
             timeout: 连接超时时间（秒）
         """
-        # 确保UNO已导入（使用线程安全的方式）
+        # Ensure that UNO has been imported (in a thread-safe manner)
         ensure_uno_imported()
         self.host = host
         self.port = port
         self.timeout = timeout
@@ -102,12 +104,12 @@ class UnoManager:
         """启动LibreOffice服务"""
         logger.info(f"🌟 启动LibreOffice服务，监听端口 {self.port}...")
-        # 检查是否已有服务在运行
+        # check if soffice running
         if self._check_soffice_running():
             logger.info("✅ LibreOffice服务已在运行")
             return
-        # 启动新的服务实例
+        # new a soffice
         cmd = [
             "soffice",
             "--headless",
@@ -125,22 +127,24 @@ class UnoManager:
                 cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL
             )
             logger.info(f"⏳ 等待LibreOffice服务启动...")
-            # 智能等待：轮询检查服务状态，给不同性能机器弹性时间
+            # Intelligent waiting: Polling to check service status, providing flexible time for machines of different performance levels.
             start_time = time.time()
-            check_interval = 1  # 每1秒检查一次
-            max_wait_time = 30    # 最大等待30秒
+            check_interval = 1  # checking every sec
+            max_wait_time = 30  # wait for max 30 sec
             while time.time() - start_time < max_wait_time:
                 if self._check_soffice_running():
                     elapsed = time.time() - start_time
                     logger.info(f"✅ LibreOffice服务启动成功 (耗时 {elapsed:.1f}秒)")
                     return
-                logger.debug(f"🔄 服务未就绪，继续等待... (已等待 {time.time() - start_time:.1f}秒)")
+                logger.debug(
+                    f"🔄 服务未就绪，继续等待... (已等待 {time.time() - start_time:.1f}秒)"
+                )
                 time.sleep(check_interval)
-            # 超时仍未启动
+            # overtime
             raise Exception(f"LibreOffice服务启动超时 (等待了{max_wait_time}秒)")
         except Exception as e:
@@ -168,38 +172,38 @@ class UnoManager:
         """连接到LibreOffice服务"""
         with self._lock:
             if self._connected and self._desktop is not None:
-                return  # 已连接
+                return  # connected
             self._start_soffice_service()
             logger.info(f"🔌 连接到LibreOffice服务...")
             start_time = time.time()
             while time.time() - start_time < self.timeout:
                 try:
-                    # 获取组件上下文
+                    # get context
                     local_ctx = uno.getComponentContext()
                     resolver = local_ctx.ServiceManager.createInstanceWithContext(
                         "com.sun.star.bridge.UnoUrlResolver", local_ctx
                     )
-                    # 连接到LibreOffice
+                    # connect to LibreOffice
                     self._ctx = resolver.resolve(f"uno:{self.connection_string}")
                     self._desktop = self._ctx.ServiceManager.createInstanceWithContext(
                         "com.sun.star.frame.Desktop", self._ctx
                     )
                     self._connected = True
                     logger.info("✅ 成功连接到LibreOffice服务")
                     return
                 except NoConnectException:
                     logger.debug("⏳ 等待LibreOffice服务就绪...")
                     time.sleep(1)
                 except Exception as e:
                     logger.error(f"❌ 连接失败: {str(e)}")
                     time.sleep(1)
             raise TimeoutError(f"连接LibreOffice服务超时（{self.timeout}秒）")
     def disconnect(self):
@@ -240,10 +244,10 @@ class UnoManager:
         """
         self.connect()
-        # 将路径转换为URL格式
+        # converse path to URL
         file_url = uno.systemPathToFileUrl(os.path.abspath(file_path))
-        # 打开文档
+        # open file
         properties = []
         properties.append(self._make_property("Hidden", True))
         properties.append(self._make_property("ReadOnly", True))
@@ -285,53 +289,63 @@ class UnoManager:
             if document is None:
                 raise Exception(f"无法打开文档: {input_path}")
-            # 准备输出属性
+            # prepare to output properties
             properties = []
-            # 设置过滤器
+            # set filter
             if filter_name:
                 properties.append(self._make_property("FilterName", filter_name))
             else:
-                # 根据格式自动选择过滤器
+                # choose filter by format
                 if output_format == "txt":
-                    # 对于文本格式，尝试多个过滤器
+                    # multi-filter for multi-files
                     filter_options = [
                         ("Text (encoded)", "UTF8"),
                         ("Text", None),
-                        ("HTML (StarWriter)", None)
+                        ("HTML (StarWriter)", None),
                     ]
                     success = False
                     for filter_name, filter_option in filter_options:
                         try:
                             properties = []
-                            properties.append(self._make_property("FilterName", filter_name))
+                            properties.append(
+                                self._make_property("FilterName", filter_name)
+                            )
                             if filter_option:
-                                properties.append(self._make_property("FilterOptions", filter_option))
-                            # 确保输出目录存在
+                                properties.append(
+                                    self._make_property("FilterOptions", filter_option)
+                                )
+                            # ensuring that the output directory exists.
                             output_dir = os.path.dirname(output_path)
                             if output_dir and not os.path.exists(output_dir):
                                 os.makedirs(output_dir)
-                            # 转换为URL格式
-                            output_url = uno.systemPathToFileUrl(os.path.abspath(output_path))
+                            # converse to URL
+                            output_url = uno.systemPathToFileUrl(
+                                os.path.abspath(output_path)
+                            )
-                            # 执行转换
+                            # conversing
                             document.storeToURL(output_url, properties)
-                            logger.info(f"✅ 文档转换成功 (使用过滤器: {filter_name}): {output_path}")
+                            logger.info(
+                                f"✅ 文档转换成功 (使用过滤器: {filter_name}): {output_path}"
+                            )
                             success = True
                             break
                         except Exception as e:
                             logger.debug(f"🔄 过滤器 {filter_name} 失败: {str(e)}")
                             continue
                     if not success:
-                        raise Exception(f"所有文本过滤器都失败，无法转换文档: {input_path}")
-                    return  # 已经完成转换，直接返回
+                        raise Exception(
+                            f"所有文本过滤器都失败，无法转换文档: {input_path}"
+                        )
+                    return  # converted,return
                 else:
-                    # 其他格式使用默认过滤器
+                    # Other formats use the default filter
                     filter_map = {
                         "pdf": "writer_pdf_Export",
                         "docx": "MS Word 2007 XML",
@@ -343,15 +357,15 @@ class UnoManager:
                             self._make_property("FilterName", filter_map[output_format])
                         )
-            # 确保输出目录存在
+            # ensuring that the output directory exists
             output_dir = os.path.dirname(output_path)
             if output_dir and not os.path.exists(output_dir):
                 os.makedirs(output_dir)
-            # 转换为URL格式
+            # converse to URL
             output_url = uno.systemPathToFileUrl(os.path.abspath(output_path))
-            # 执行转换
+            # conversing
             document.storeToURL(output_url, properties)
             logger.info(f"✅ 文档转换成功: {output_path}")
@@ -363,7 +377,7 @@ class UnoManager:
         return prop
-# 全局单例UnoManager
+# global Singleton UnoManager
 _global_uno_manager: Optional[UnoManager] = None
 _manager_lock = threading.Lock()
@@ -371,20 +385,20 @@ _manager_lock = threading.Lock()
 def get_uno_manager() -> UnoManager:
     """获取全局单例UNO管理器"""
     global _global_uno_manager
     if _global_uno_manager is None:
         with _manager_lock:
             if _global_uno_manager is None:
                 _global_uno_manager = UnoManager()
                 logger.info("🎯 创建全局单例UnoManager (单线程模式)")
     return _global_uno_manager
 def cleanup_uno_manager():
     """清理全局UNO管理器"""
     global _global_uno_manager
     with _manager_lock:
         if _global_uno_manager is not None:
             try:
@@ -402,36 +416,34 @@ def uno_manager_context():
     try:
         yield manager
     finally:
-        # 在单线程模式下，保持连接以提高效率
+        # Maintain connections to improve efficiency in single-threaded mode
         pass
 def convert_with_uno(
-    input_path: str,
-    output_format: str,
-    output_dir: Optional[str] = None
+    input_path: str, output_format: str, output_dir: Optional[str] = None
 ) -> str:
     """
     使用UNO转换文档格式（便捷函数）
     Args:
         input_path: 输入文件路径
         output_format: 输出格式
         output_dir: 输出目录（可选，默认为输入文件所在目录）
     Returns:
         输出文件路径
     """
     input_path = Path(input_path)
     if output_dir is None:
         output_dir = input_path.parent
     else:
         output_dir = Path(output_dir)
     output_path = output_dir / f"{input_path.stem}.{output_format}"
     with uno_manager_context() as manager:
         manager.convert_document(str(input_path), str(output_path), output_format)
     return str(output_path)

{pydatamax-0.1.16.post1.dist-info → pydatamax-0.1.16.post2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pydatamax
-Version: 0.1.16.post1
+Version: 0.1.16.post2
 Summary: A library for parsing and converting various file formats.
 Home-page: https://github.com/Hi-Dolphin/datamax
 Author: ccy
@@ -113,7 +113,7 @@ qa_data = dm.get_pre_label(
     question_number=5,     # 每块生成问题数
     max_workers=5          # 并发数
 )
-dm.save_label_data(res)
+dm.save_label_data(qa_data)
 ```
 ## 📖 Detailed Documentation
@@ -316,6 +316,58 @@ pip install -r requirements.txt
 python setup.py install
 ```
+### Developer Mode
+For developers who want to contribute to the project or make modifications, we recommend using developer mode for a better development experience.
+#### Setup Developer Mode
+```bash
+# Clone the repository
+git clone https://github.com/Hi-Dolphin/datamax.git
+cd datamax
+# Create virtual environment (recommended)
+python -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+# Install in developer mode
+pip install -e .
+```
+#### Benefits of Developer Mode
+- **Live Updates**: Changes to source code are immediately reflected without reinstallation
+- **Easy Testing**: Test your modifications instantly
+- **Debugging**: Better debugging experience with direct access to source code
+- **Development Workflow**: Seamless integration with your development environment
+#### Development Commands
+```bash
+# Run tests
+pytest
+# Install development dependencies
+pip install -r requirements-dev.txt  # if available
+# Check code style
+flake8 datamax/
+black datamax/
+# Build package
+python setup.py sdist bdist_wheel
+```
+#### Making Changes
+After installing in developer mode, you can:
+1. Edit source code in the `datamax/` directory
+2. Changes are automatically available when you import the module
+3. Test your changes immediately without reinstalling
+4. Submit pull requests with your improvements
 ## 📋 System Requirements
 - Python >= 3.10

pydatamax-0.1.16.post2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,39 @@
+datamax/__init__.py,sha256=IGJxWkFpUj1xuHfwtPTrNqsRdLB4jBZIweAVHzDKrvU,29
+datamax/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+datamax/loader/core.py,sha256=Ld4PmMcbKbsFyU_ynqXxpB9x3IJ34c3hfJBcUiSthrA,5370
+datamax/loader/minio_handler.py,sha256=VpQ5EHZfLaw0e2JXflAbgPK_plmM_VkPXEiKtIZlQL0,6876
+datamax/loader/oss_handler.py,sha256=ZO8ZbbA1oyuNN4Z7iVgSbMArYgJ1gvpqADkXDNDw4y0,7648
+datamax/parser/__init__.py,sha256=3tCt1bmTjJ0sroivt60AoQuZyHH8AtCvn664Qtoh-60,56
+datamax/parser/base.py,sha256=yEPdk3K-vTf2JnIcTczxEDoMQtVKva9tp2nSACeXOB0,3153
+datamax/parser/core.py,sha256=eglNe4Vk6U3XEUYT2oLovWfuL1XeFDc0KnRahYN24Mk,19208
+datamax/parser/csv_parser.py,sha256=PPRqL4MKDymBgFYo0xrgrO8HB3jFcrXTizG27fXVEag,1698
+datamax/parser/doc_parser.py,sha256=x6aJMQmNCUR2WA0hMvsro1atz6kWK3pRLPzlLfwCUw0,32273
+datamax/parser/docx_parser.py,sha256=40xq86jLI3nayg0dthBnWSMN2qYQeyUNWh5wWJ8Lar8,37658
+datamax/parser/epub_parser.py,sha256=zjLp1ha_oQBGIxvgzAyHzO0ZZJQt1JfoAZ9TM8liZ0o,2708
+datamax/parser/html_parser.py,sha256=b5Rvonj0cScYI2gYfKAfBplp6C8kT2ataf3J0lO40Ok,2017
+datamax/parser/image_parser.py,sha256=HYNN0oqA1LjI8XBQN09nnWfnVrLcDHcXHOytf8z6NAk,2536
+datamax/parser/json_parser.py,sha256=wzUKv1lH35PtM8uXunxNZ6ykHEdI_m02SIW1Y7y-wxc,1826
+datamax/parser/md_parser.py,sha256=62vBHAsHotCC1bNquh7jt8EZuoPyANrbkRTF7YlEOMs,3019
+datamax/parser/pdf_parser.py,sha256=pm6WVNe2nP5K-XHwNfa3BoS_oNDDdJ5kOo5QERICfkI,5245
+datamax/parser/ppt_parser.py,sha256=7KmSrxyPRYlYqclMcsxQmLTD3eR1mbED4qgm1mjF4Mg,5808
+datamax/parser/pptx_parser.py,sha256=huMbv9JMGI7Nvs3v-TGn7MOpicb8-z3L5G24PxBVDzw,2546
+datamax/parser/txt_parser.py,sha256=GyPFuYQ00mI1shmMFi_gtDJ8B-C2rMI9rI0sjay32Hs,2630
+datamax/parser/xls_parser.py,sha256=J-Eumrh3oxwr06YHHfUplJlskwBlsEoj3sQ9OVXBFCo,1819
+datamax/parser/xlsx_parser.py,sha256=Uj1OisEVAzO8mMRcTHpitBstS0M7aSS4UehnE78pvxU,9468
+datamax/utils/__init__.py,sha256=elPbB7MSk5VfcmKmhaXCTUsVXP9vxd8C-DAMG3JqbDU,1491
+datamax/utils/constants.py,sha256=1hzHnYsm43Q36Czc7OnC-zJVTunThx82d_ZZAZBErHw,4565
+datamax/utils/data_cleaner.py,sha256=2sfjXkDaEXavr98Ezj1BWG4uJQPUzeR99172tH43-Yk,10454
+datamax/utils/env_setup.py,sha256=lXPAL6WGkjOBgqTa2A0li5YS2TQ96cvAR4OhJjQP4pA,3638
+datamax/utils/gotocr_pdf.py,sha256=LHQ4nIFNC47b7hLWzMGkk_UsLmIxMLnUhRa48iwJo48,8796
+datamax/utils/lifecycle_types.py,sha256=rvHB4zwzS_nlWKUtWA37L9dJNvx6ol5F2-x2eEf6zJk,625
+datamax/utils/mineru_operator.py,sha256=mBw9xuCwJZmmOLaUFhw2c3JPDB7KMjWqSlEzbKCRXc8,2276
+datamax/utils/paddleocr_pdf_operator.py,sha256=5l7P7wCGd4-Qph3NMTDdHR6nStjafDMNpX4sSCFv5qQ,3637
+datamax/utils/ppt_extract.py,sha256=Sf4H3TKdK6BnKRv0sw5JnfKSQH9l6u5XUwLTd78KB94,6619
+datamax/utils/qa_generator.py,sha256=q7pzZ3DWItRQLBQH1jab2TBkjJvcKfkBuzlN0wxZ5Rs,24353
+datamax/utils/tokenizer.py,sha256=j93Uky4bYDKZKT-MOtenZb36MoRPNnYk8sP9t_FSQqk,860
+datamax/utils/uno_handler.py,sha256=xITU8FGeeBtHRc-Aj4lbKHGvKVslWEwWZOIUZiP_ghY,15447
+pydatamax-0.1.16.post2.dist-info/licenses/LICENSE,sha256=RltoeMa9c1956S08ThvZE2yJSPbnem68Y8cmiIfOgco,1088
+pydatamax-0.1.16.post2.dist-info/METADATA,sha256=pTi_avX8RBNYxHcPS6CmZnESFqGlX-TwvqMzF6Ilx0Q,11145
+pydatamax-0.1.16.post2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+pydatamax-0.1.16.post2.dist-info/top_level.txt,sha256=N9TrwI3GKnWW07RRXHr0xX5Bm8dIM_sahfAnf9j8J9M,8
+pydatamax-0.1.16.post2.dist-info/RECORD,,

pydatamax-0.1.16.post1.dist-info/RECORD DELETED Viewed

@@ -1,38 +0,0 @@
-datamax/__init__.py,sha256=IGJxWkFpUj1xuHfwtPTrNqsRdLB4jBZIweAVHzDKrvU,29
-datamax/loader/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-datamax/loader/core.py,sha256=NGnK2m59GRBauYxZST0kyX5f4zhvAOk4Z5bVoF0CjGo,5218
-datamax/loader/minio_handler.py,sha256=e7ZUlwoStVe5iQfAVfNgEwRLxen4NbxwokpJZl6AR0U,6557
-datamax/loader/oss_handler.py,sha256=ZO8ZbbA1oyuNN4Z7iVgSbMArYgJ1gvpqADkXDNDw4y0,7648
-datamax/parser/__init__.py,sha256=sIB1N4B_fVguSBN-Uw9tGxAO6s0oi4Tq4kRZ59YlUKo,52
-datamax/parser/base.py,sha256=FamDV6csc3aXVbobMR1lPNtVpvYMO19koRJW9poj_gE,2590
-datamax/parser/core.py,sha256=pySissrF6kVVAzT5abIlQ-4cUliFu1HBWjcD6psNkYA,16845
-datamax/parser/csv_parser.py,sha256=lHQs1MHK9WM4Vl0p9nsE3fFhewF0EoXZUhtk8ixznRw,1028
-datamax/parser/doc_parser.py,sha256=qPKpZy_p1veV2AodqEQU6LzqmT7y1PANlPtt0CYoHeg,30837
-datamax/parser/docx_parser.py,sha256=wdDGgeYIDg1Se493XZhlduxKjtYZ58Uqxltm2vt9Dy4,36691
-datamax/parser/epub_parser.py,sha256=K4eCS4wIXJzDicvtVAfQT8yt1gFHeibZN5-EdQZfJe8,1621
-datamax/parser/html_parser.py,sha256=5ACrVc03Q9pJqWI_b0EtRgOYy0eMYJq4podgHGD68Z8,1453
-datamax/parser/image_parser.py,sha256=UH3duPvB7Xu6CFlEeAukX5uJ8VlqnMR89hcLsW2O-aU,1281
-datamax/parser/json_parser.py,sha256=2Ns2Lm6sei9TnDaFFYvl-xhyhse12sMJBwjKNACw4po,1072
-datamax/parser/md_parser.py,sha256=rHJqtRV78XgQuKtDdwn1LcgRSUEuhGBqN5uaHG6oPT4,2251
-datamax/parser/pdf_parser.py,sha256=YOJFOEC8DxWLAE6yNd2x6qMvYJl3sWVcM1eA8R4uA30,4116
-datamax/parser/ppt_parser.py,sha256=0OlsIrzZZJnYZqLmQkUz4J_Hiv-rQHLHJnHIsw345c8,4631
-datamax/parser/pptx_parser.py,sha256=yWajVd8kpyTdFavR8XcbwxOj94uNBswDoMHfSOycn0o,1870
-datamax/parser/txt_parser.py,sha256=NXs7aNpm1PUwiUSlN1RU23ittuuQSBaBNI3KeQjJFXs,1750
-datamax/parser/xls_parser.py,sha256=iNMx8iPakjE984dkaFL-oUBYWpQwxbWoDnQdwfAeeGM,980
-datamax/parser/xlsx_parser.py,sha256=hUOFqkqkI0XPcwwrp2cs4PFKbChpZtb8orGsZc9kxJ0,9089
-datamax/utils/__init__.py,sha256=75D4WFE_FVG9MyT8qWtvtlgzuuRelTC7ObSqqfjDKIY,1476
-datamax/utils/constants.py,sha256=1hzHnYsm43Q36Czc7OnC-zJVTunThx82d_ZZAZBErHw,4565
-datamax/utils/data_cleaner.py,sha256=TrrxC1r0__wuOhrQSJZcJKoEIyB4eNKWZkA1IoBYhyQ,9937
-datamax/utils/env_setup.py,sha256=p_7sqHwyXroeOI_yFZpUOK6wOGmPVmf-gBa6M3351O4,3539
-datamax/utils/gotocr_pdf.py,sha256=A7sn77EQBDbAe-4edCBUlYXKuE2mY7JcsFGm8U3-xbE,8744
-datamax/utils/mineru_operator.py,sha256=4i4FtDkDE61FWPyRoDjPujHYJq_kDUAkwlowmFWdEOA,2303
-datamax/utils/paddleocr_pdf_operator.py,sha256=SW06dts1SxDnUvyf5zWYpAN_6t9PLtJXUSsYhSS317I,3645
-datamax/utils/ppt_extract.py,sha256=jBVGYEsBGPjHqyq7drHTOM8MnFOwqKyHhbkKmEAryAk,6307
-datamax/utils/qa_generator.py,sha256=pXxdFm_EnWgMuilfmLKgy2c6NDexQZN8nWxT-bYBt74,12548
-datamax/utils/tokenizer.py,sha256=o78GPmeJ3vs3-SF0b2nMm35XtbrCKbrhDW0gI9gqGl4,880
-datamax/utils/uno_handler.py,sha256=ehUyk3I8dxMzjK8IzNO5nKcmc-t97ERMUqmSbYPeABc,15435
-pydatamax-0.1.16.post1.dist-info/licenses/LICENSE,sha256=RltoeMa9c1956S08ThvZE2yJSPbnem68Y8cmiIfOgco,1088
-pydatamax-0.1.16.post1.dist-info/METADATA,sha256=6I4bYRn8noQbBVURScRDut0fFksMDiU3wAXSNgpavDg,9801
-pydatamax-0.1.16.post1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-pydatamax-0.1.16.post1.dist-info/top_level.txt,sha256=N9TrwI3GKnWW07RRXHr0xX5Bm8dIM_sahfAnf9j8J9M,8
-pydatamax-0.1.16.post1.dist-info/RECORD,,

{pydatamax-0.1.16.post1.dist-info → pydatamax-0.1.16.post2.dist-info}/WHEEL RENAMED Viewed

File without changes

{pydatamax-0.1.16.post1.dist-info → pydatamax-0.1.16.post2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{pydatamax-0.1.16.post1.dist-info → pydatamax-0.1.16.post2.dist-info}/top_level.txt RENAMED Viewed

File without changes

pydatamax 0.1.16.post1__py3-none-any.whl → 0.1.16.post2__py3-none-any.whl

pydatamax 0.1.16.post1py3-none-any.whl → 0.1.16.post2py3-none-any.whl