PyPI - hjxdl - Versions diffs - 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl - Mend

hjxdl 0.0.17py3-none-any.whl → 0.0.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

hdl/_version.py CHANGED Viewed

@@ -12,5 +12,5 @@ __version__: str
 __version_tuple__: VERSION_TUPLE
 version_tuple: VERSION_TUPLE
-__version__ = version = '0.0.17'
-__version_tuple__ = version_tuple = (0, 0, 17)
+__version__ = version = '0.0.19'
+__version_tuple__ = version_tuple = (0, 0, 19)

hdl/utils/llm/embs.py CHANGED Viewed

@@ -1,8 +1,14 @@
-class HFEmbedder():
+from sentence_transformers import SentenceTransformer
+class BEEmbedder():
     def __init__(
         self,
         emb_name: str = "bge",
-        emb_dir: str = None
+        emb_dir: str = None,
+        device: str = 'cuda',
+        batch_size: int = 16,
+        max_length: int = 1024,
     ) -> None:
         """Initializes the object with the specified embedding name and directory.
@@ -15,6 +21,14 @@ class HFEmbedder():
         """
         self.emb_name = emb_name
         self.emb_dir = emb_dir
+        self.batch_size = batch_size
+        self.model_kwargs = {'device': device}
+        self.encode_kwargs = {
+            'batch_size': self.batch_size,
+            'normalize_embeddings': True,
+            'show_progress_bar': False
+        }
         if "bge" in emb_name.lower():
             from FlagEmbedding import BGEM3FlagModel
@@ -31,7 +45,7 @@ class HFEmbedder():
     def encode(
         self,
-        sentences
+        sentences,
     ):
         """Encode the input sentences using the model.
@@ -47,7 +61,9 @@ class HFEmbedder():
             sentences,
             return_dense=True,
             return_sparse=True,
-            return_colbert_vecs=False
+            return_colbert_vecs=False,
+            batch_size=self.batch_size,
+            max_length=self.max_length
         )
         if "bge" in self.emb_name.lower():
             return output["dense_vecs"]
@@ -71,3 +87,102 @@ class HFEmbedder():
         output_2 = self.encode(sentences_2)
         similarity = output_1 @ output_2.T
         return similarity
+class HFEmbedder():
+    def __init__(
+        self,
+        emb_dir: str = None,
+        device: str = 'cuda',
+        trust_remote_code: bool = True,
+        *args, **kwargs
+    ) -> None:
+        """Initialize the class with the specified parameters.
+        Args:
+            emb_dir (str): Directory path to the embeddings.
+            device (str): Device to be used for computation (default is 'cuda').
+            trust_remote_code (bool): Whether to trust remote code (default is True).
+            *args: Variable length argument list.
+            **kwargs: Arbitrary keyword arguments.
+                - modules: Optional[Iterable[torch.nn.modules.module.Module]] = None,
+                - device: Optional[str] = None,
+                - prompts: Optional[Dict[str, str]] = None,
+                - default_prompt_name: Optional[str] = None,
+                - cache_folder: Optional[str] = None,
+                - revision: Optional[str] = None,
+                - token: Union[str, bool, NoneType] = None,
+                - use_auth_token: Union[str, bool, NoneType] = None,
+                - truncate_dim: Optional[int] = None,
+        Returns:
+            None
+        """
+        self.device = device
+        self.emb_dir = emb_dir
+        self.model = SentenceTransformer(
+            emb_dir,
+            device=device,
+            trust_remote_code=trust_remote_code,
+            *args, **kwargs
+        ).half()
+        # self.model = model.half()
+    def encode(
+        self,
+        sentences: list[str],
+        *args, **kwargs
+    ):
+        """Encode the input sentences using the model.
+        Args:
+            sentences (list[str]): List of input sentences to encode.
+            *args: Variable length argument list.
+            **kwargs: Arbitrary keyword arguments.
+                - prompt_name: Optional[str] = None,
+                - prompt: Optional[str] = None,
+                - batch_size: int = 32,
+                - show_progress_bar: bool = None,
+                - output_value: Optional[Literal['sentence_embedding', 'token_embeddings']] = 'sentence_embedding',
+                - precision: Literal['float32', 'int8', 'uint8', 'binary', 'ubinary'] = 'float32',
+                - convert_to_numpy: bool = True,
+                - convert_to_tensor: bool = False,
+                - device: str = None,
+                - normalize_embeddings: bool = False,
+        Returns:
+            output: Encoded representation of the input sentences.
+        """
+        if isinstance(sentences, str):
+            sentences = [sentences]
+        if kwargs.get("convert_to_tensor", False) is True:
+            kwargs["device"] = self.device
+        output = self.model.encode(
+            sentences,
+            *args, **kwargs
+        )
+        return output
+    def sim(
+        self,
+        sentences_1,
+        sentences_2,
+        *args, **kwargs
+    ):
+        """Calculate the similarity between two sets of sentences.
+            Args:
+                sentences_1 (list): List of sentences for the first set.
+                sentences_2 (list): List of sentences for the second set.
+                *args: Additional positional arguments to be passed to the encode function.
+                **kwargs: Additional keyword arguments to be passed to the encode function.
+            Returns:
+                numpy.ndarray: Similarity matrix between the two sets of sentences.
+        """
+        output_1 = self.encode(sentences_1, *args, **kwargs)
+        output_2 = self.encode(sentences_2, *args, **kwargs)
+        similarity = output_1 @ output_2.T
+        return similarity

{hjxdl-0.0.17.dist-info → hjxdl-0.0.19.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: hjxdl
-Version: 0.0.17
+Version: 0.0.19
 Summary: A collection of functions for Jupyter notebooks
 Home-page: https://github.com/huluxiaohuowa/hdl
 Author: Jianxing Hu

{hjxdl-0.0.17.dist-info → hjxdl-0.0.19.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 hdl/__init__.py,sha256=5sZZNySv08wwfzJcSDssGTqUn9wlmDsR6R4XB8J8mFM,70
-hdl/_version.py,sha256=OXG6jb6QQz55miFHOqfJx8zqIQYlPcgtvsnb6XEE5Fs,413
+hdl/_version.py,sha256=CYabGzkNwriz1Zjt5kNvBOZD6wtqQ_twYh4s5xzmT-I,413
 hdl/args/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 hdl/args/loss_args.py,sha256=s7YzSdd7IjD24rZvvOrxLLFqMZQb9YylxKeyelSdrTk,70
 hdl/controllers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -85,10 +85,10 @@ hdl/utils/general/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
 hdl/utils/general/glob.py,sha256=8-RCnt6L297wMIfn34ZAMCsGCZUjHG3MGglGZI1cX0g,491
 hdl/utils/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 hdl/utils/llm/chat.py,sha256=H2c8assJlSdZQKIfPkYrVZHqv66TsdsxtaLXv0kNe1w,11565
-hdl/utils/llm/embs.py,sha256=yCFtc25gUFas6kwgOGBFydeaHNyQMq5y1Chxl8TNEUQ,2190
+hdl/utils/llm/embs.py,sha256=sC8tga7HgDwPI2m7TDWKp9kkxEIMxEyMtgmEhfRi4vI,6362
 hdl/utils/schedulers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 hdl/utils/schedulers/norm_lr.py,sha256=bDwCmdEK-WkgxQMFBiMuchv8Mm7C0-GZJ6usm-PQk14,4461
-hjxdl-0.0.17.dist-info/METADATA,sha256=MBuoruFG8xTqFc-epdvuhZWoQwbpo28-fTfi7QJPijc,543
-hjxdl-0.0.17.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
-hjxdl-0.0.17.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
-hjxdl-0.0.17.dist-info/RECORD,,
+hjxdl-0.0.19.dist-info/METADATA,sha256=W1TN19HaXAt3kP6BvCpHWNam7w9lhe3LlVql4QCh5jw,543
+hjxdl-0.0.19.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
+hjxdl-0.0.19.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
+hjxdl-0.0.19.dist-info/RECORD,,

{hjxdl-0.0.17.dist-info → hjxdl-0.0.19.dist-info}/WHEEL RENAMED Viewed

File without changes

{hjxdl-0.0.17.dist-info → hjxdl-0.0.19.dist-info}/top_level.txt RENAMED Viewed

File without changes

hjxdl 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl

hjxdl 0.0.17py3-none-any.whl → 0.0.19py3-none-any.whl