PyPI - hjxdl - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

hjxdl 0.1.4py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

hdl/_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.1.4'
-__version_tuple__ = version_tuple = (0, 1, 4)
+__version__ = version = '0.1.5'
+__version_tuple__ = version_tuple = (0, 1, 5)

hdl/utils/llm/extract.py CHANGED Viewed

@@ -13,13 +13,22 @@ class DocExtractor():
         ltp_model_path: str = None,
         lang: str = "chi_sim"
     ) -> None:
+        """Initialize the object with the specified LTP model path and language.
+        Args:
+            ltp_model_path (str): The file path to the LTP model. Default is None.
+            lang (str): The language to be used for processing. Default is "chi_sim".
+        Returns:
+            None
+        """
         self.ltp_model_path = ltp_model_path
         self.lang = lang
         self.split = None
         if self.ltp_model_path is not None:
             from ltp import StnSplit, LTP
-            ltp  = LTP(ltp_model)
+            ltp  = LTP(self.ltp_model_path)
             self.split = StnSplit()
             # sents = self.split.split(text)
@@ -38,6 +47,14 @@ class DocExtractor():
     def text_from_plain(
         txt_path
     ):
+        """Reads and returns the text content from a plain text file.
+            Args:
+                txt_path (str): The path to the plain text file.
+            Returns:
+                str: The text content read from the file.
+        """
         with open(txt_path, "r") as f:
             text = f.read()
         return text
@@ -46,13 +63,29 @@ class DocExtractor():
     def extract_text_from_image(
         image: Image.Image,
     ) -> str:
+        """Extracts text from the given image using pytesseract.
+        Args:
+            image (PIL.Image.Image): The input image from which text needs to be extracted.
+        Returns:
+            str: The extracted text from the image.
+        """
         return pytesseract.image_to_string(image, lang=self.lang)
     @staticmethod
     def is_within_bbox(
         bbox1, bbox2
     ):
-        """Check if bbox1 is within bbox2."""
+        """Check if bbox1 is within bbox2.
+        Args:
+            bbox1 (list): List of 4 integers representing the bounding box coordinates [x_min, y_min, x_max, y_max].
+            bbox2 (list): List of 4 integers representing the bounding box coordinates [x_min, y_min, x_max, y_max].
+        Returns:
+            bool: True if bbox1 is within bbox2, False otherwise.
+        """
         return bbox1[0] >= bbox2[0] and bbox1[1] >= bbox2[1] and bbox1[2] <= bbox2[2] and bbox1[3] <= bbox2[3]
     def text_tables_from_pdf(
@@ -60,6 +93,15 @@ class DocExtractor():
         pdf_path,
         table_from_pic: bool = False
     ):
+        """Extract text and tables from a PDF file.
+        Args:
+            pdf_path (str): Path to the PDF file.
+            table_from_pic (bool, optional): Whether to extract tables from images in the PDF. Defaults to False.
+        Returns:
+            tuple: A tuple containing a list of extracted texts and a list of extracted tables as DataFrames.
+        """
         all_tables = []
         all_texts = []
         with pdfplumber.open(pdf_path) as pdf:

{hjxdl-0.1.4.dist-info → hjxdl-0.1.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: hjxdl
-Version: 0.1.4
+Version: 0.1.5
 Summary: A collection of functions for Jupyter notebooks
 Home-page: https://github.com/huluxiaohuowa/hdl
 Author: Jianxing Hu

{hjxdl-0.1.4.dist-info → hjxdl-0.1.5.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 hdl/__init__.py,sha256=5sZZNySv08wwfzJcSDssGTqUn9wlmDsR6R4XB8J8mFM,70
-hdl/_version.py,sha256=9GTNkADgEYZ6fEjCvZZUdKyqxiPIgtskLFZNJz7nq_U,411
+hdl/_version.py,sha256=zBVX2byWL6NrFlwjvahpnvSqDsdtebZW0K9WM_cj20U,411
 hdl/args/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 hdl/args/loss_args.py,sha256=s7YzSdd7IjD24rZvvOrxLLFqMZQb9YylxKeyelSdrTk,70
 hdl/controllers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -86,10 +86,10 @@ hdl/utils/general/glob.py,sha256=8-RCnt6L297wMIfn34ZAMCsGCZUjHG3MGglGZI1cX0g,491
 hdl/utils/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 hdl/utils/llm/chat.py,sha256=H2c8assJlSdZQKIfPkYrVZHqv66TsdsxtaLXv0kNe1w,11565
 hdl/utils/llm/embs.py,sha256=sC8tga7HgDwPI2m7TDWKp9kkxEIMxEyMtgmEhfRi4vI,6362
-hdl/utils/llm/extract.py,sha256=qlthQiFQm5DfHDzimjQKotzLB7oPk5UTODsw22pzs80,4891
+hdl/utils/llm/extract.py,sha256=WbTlQmcPNfrKmzSZSKSdWLA0LqLAgoa4J_IcjxBXACI,6506
 hdl/utils/schedulers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 hdl/utils/schedulers/norm_lr.py,sha256=bDwCmdEK-WkgxQMFBiMuchv8Mm7C0-GZJ6usm-PQk14,4461
-hjxdl-0.1.4.dist-info/METADATA,sha256=SCG5RpSG11LK0MmcU22aJOOV5Dmh_D7cCgi6kYaSnd0,542
-hjxdl-0.1.4.dist-info/WHEEL,sha256=-oYQCr74JF3a37z2nRlQays_SX2MqOANoqVjBBAP2yE,91
-hjxdl-0.1.4.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
-hjxdl-0.1.4.dist-info/RECORD,,
+hjxdl-0.1.5.dist-info/METADATA,sha256=bcWpSx6Y2t3rb8n8Ry--JImcj0wIJh6CcZRDdE3xzc8,542
+hjxdl-0.1.5.dist-info/WHEEL,sha256=-oYQCr74JF3a37z2nRlQays_SX2MqOANoqVjBBAP2yE,91
+hjxdl-0.1.5.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
+hjxdl-0.1.5.dist-info/RECORD,,

{hjxdl-0.1.4.dist-info → hjxdl-0.1.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{hjxdl-0.1.4.dist-info → hjxdl-0.1.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

hjxdl 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

hjxdl 0.1.4py3-none-any.whl → 0.1.5py3-none-any.whl