hjxdl 0.0.16__py3-none-any.whl → 0.0.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hdl/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.0.16'
16
- __version_tuple__ = version_tuple = (0, 0, 16)
15
+ __version__ = version = '0.0.18'
16
+ __version_tuple__ = version_tuple = (0, 0, 18)
hdl/utils/llm/chat.py CHANGED
@@ -18,6 +18,20 @@ class GGUF_M(Llama):
18
18
  *args,
19
19
  **kwargs
20
20
  ):
21
+ """Initialize the model with the specified parameters.
22
+
23
+ Args:
24
+ model_path (str): The path to the model.
25
+ device (str, optional): The device to use, either 'gpu' or 'cpu'. Defaults to 'gpu'.
26
+ generation_kwargs (dict, optional): Additional generation keyword arguments. Defaults to {}.
27
+ server_ip (str, optional): The IP address of the server. Defaults to "127.0.0.1".
28
+ server_port (int, optional): The port of the server. Defaults to 8000.
29
+ *args: Variable length argument list.
30
+ **kwargs: Arbitrary keyword arguments.
31
+
32
+ Raises:
33
+ KeyError: If 'num_threads' or 'max_context_length' is missing in generation_kwargs.
34
+ """
21
35
  print("正在从本地加载模型...")
22
36
  if device.lower() == 'cpu':
23
37
  super().__init__(
hdl/utils/llm/embs.py CHANGED
@@ -1,8 +1,14 @@
1
- class HFEmbedder():
1
+ from sentence_transformers import SentenceTransformer
2
+
3
+
4
+ class BEEmbedder():
2
5
  def __init__(
3
6
  self,
4
7
  emb_name: str = "bge",
5
- emb_dir: str = None
8
+ emb_dir: str = None,
9
+ device: str = 'cuda',
10
+ batch_size: int = 16,
11
+ max_length: int = 1024,
6
12
  ) -> None:
7
13
  """Initializes the object with the specified embedding name and directory.
8
14
 
@@ -15,6 +21,14 @@ class HFEmbedder():
15
21
  """
16
22
  self.emb_name = emb_name
17
23
  self.emb_dir = emb_dir
24
+ self.batch_size = batch_size
25
+
26
+ self.model_kwargs = {'device': device}
27
+ self.encode_kwargs = {
28
+ 'batch_size': self.batch_size,
29
+ 'normalize_embeddings': True,
30
+ 'show_progress_bar': False
31
+ }
18
32
 
19
33
  if "bge" in emb_name.lower():
20
34
  from FlagEmbedding import BGEM3FlagModel
@@ -31,7 +45,7 @@ class HFEmbedder():
31
45
 
32
46
  def encode(
33
47
  self,
34
- sentences
48
+ sentences,
35
49
  ):
36
50
  """Encode the input sentences using the model.
37
51
 
@@ -47,7 +61,9 @@ class HFEmbedder():
47
61
  sentences,
48
62
  return_dense=True,
49
63
  return_sparse=True,
50
- return_colbert_vecs=False
64
+ return_colbert_vecs=False,
65
+ batch_size=self.batch_size,
66
+ max_length=self.max_length
51
67
  )
52
68
  if "bge" in self.emb_name.lower():
53
69
  return output["dense_vecs"]
@@ -71,3 +87,102 @@ class HFEmbedder():
71
87
  output_2 = self.encode(sentences_2)
72
88
  similarity = output_1 @ output_2.T
73
89
  return similarity
90
+
91
+
92
+ class HFEmbedder():
93
+ def __init__(
94
+ self,
95
+ emb_dir: str = None,
96
+ device: str = 'cuda',
97
+ trust_remote_code: bool = True,
98
+ *args, **kwargs
99
+ ) -> None:
100
+ """Initialize the class with the specified parameters.
101
+
102
+ Args:
103
+ emb_dir (str): Directory path to the embeddings.
104
+ device (str): Device to be used for computation (default is 'cuda').
105
+ trust_remote_code (bool): Whether to trust remote code (default is True).
106
+ *args: Variable length argument list.
107
+ **kwargs: Arbitrary keyword arguments.
108
+ - modules: Optional[Iterable[torch.nn.modules.module.Module]] = None,
109
+ - device: Optional[str] = None,
110
+ - prompts: Optional[Dict[str, str]] = None,
111
+ - default_prompt_name: Optional[str] = None,
112
+ - cache_folder: Optional[str] = None,
113
+ - revision: Optional[str] = None,
114
+ - token: Union[str, bool, NoneType] = None,
115
+ - use_auth_token: Union[str, bool, NoneType] = None,
116
+ - truncate_dim: Optional[int] = None,
117
+
118
+ Returns:
119
+ None
120
+ """
121
+
122
+ self.device = device
123
+ self.emb_dir = emb_dir
124
+
125
+ self.model = SentenceTransformer(
126
+ emb_dir,
127
+ device=device,
128
+ trust_remote_code=trust_remote_code,
129
+ *args, **kwargs
130
+ ).half()
131
+ # self.model = model.half()
132
+
133
+ def encode(
134
+ self,
135
+ sentences: list[str],
136
+ *args, **kwargs
137
+ ):
138
+ """Encode the input sentences using the model.
139
+
140
+ Args:
141
+ sentences (list[str]): List of input sentences to encode.
142
+ *args: Variable length argument list.
143
+ **kwargs: Arbitrary keyword arguments.
144
+ - prompt_name: Optional[str] = None,
145
+ - prompt: Optional[str] = None,
146
+ - batch_size: int = 32,
147
+ - show_progress_bar: bool = None,
148
+ - output_value: Optional[Literal['sentence_embedding', 'token_embeddings']] = 'sentence_embedding',
149
+ - precision: Literal['float32', 'int8', 'uint8', 'binary', 'ubinary'] = 'float32',
150
+ - convert_to_numpy: bool = True,
151
+ - convert_to_tensor: bool = False,
152
+ - device: str = None,
153
+ - normalize_embeddings: bool = False,
154
+
155
+ Returns:
156
+ output: Encoded representation of the input sentences.
157
+ """
158
+ if isinstance(sentences, str):
159
+ sentences = [sentences]
160
+ if kwargs.get("convert_to_tensor") is True:
161
+ kwargs["device"] = self.device
162
+ output = self.model.encode(
163
+ sentences,
164
+ *args, **kwargs
165
+ )
166
+ return output
167
+
168
+ def sim(
169
+ self,
170
+ sentences_1,
171
+ sentences_2,
172
+ *args, **kwargs
173
+ ):
174
+ """Calculate the similarity between two sets of sentences.
175
+
176
+ Args:
177
+ sentences_1 (list): List of sentences for the first set.
178
+ sentences_2 (list): List of sentences for the second set.
179
+ *args: Additional positional arguments to be passed to the encode function.
180
+ **kwargs: Additional keyword arguments to be passed to the encode function.
181
+
182
+ Returns:
183
+ numpy.ndarray: Similarity matrix between the two sets of sentences.
184
+ """
185
+ output_1 = self.encode(sentences_1, *args, **kwargs)
186
+ output_2 = self.encode(sentences_2, *args, **kwargs)
187
+ similarity = output_1 @ output_2.T
188
+ return similarity
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hjxdl
3
- Version: 0.0.16
3
+ Version: 0.0.18
4
4
  Summary: A collection of functions for Jupyter notebooks
5
5
  Home-page: https://github.com/huluxiaohuowa/hdl
6
6
  Author: Jianxing Hu
@@ -1,5 +1,5 @@
1
1
  hdl/__init__.py,sha256=5sZZNySv08wwfzJcSDssGTqUn9wlmDsR6R4XB8J8mFM,70
2
- hdl/_version.py,sha256=C1uSJtiL3eBiXPR4mc0kyqGBqfZj8NitaI9frAY9h7I,413
2
+ hdl/_version.py,sha256=k900Q8XjzRKO6ZOHY0wFLzfzTGArI0sGircauDDJhu0,413
3
3
  hdl/args/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  hdl/args/loss_args.py,sha256=s7YzSdd7IjD24rZvvOrxLLFqMZQb9YylxKeyelSdrTk,70
5
5
  hdl/controllers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -84,11 +84,11 @@ hdl/utils/database_tools/connect.py,sha256=KUnVG-8raifEJ_N0b3c8LkTTIfn9NIyw8LX6q
84
84
  hdl/utils/general/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
85
85
  hdl/utils/general/glob.py,sha256=8-RCnt6L297wMIfn34ZAMCsGCZUjHG3MGglGZI1cX0g,491
86
86
  hdl/utils/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
87
- hdl/utils/llm/chat.py,sha256=q5644lQ-oGoCZVMMVJp4qPUe4oGf7XZ-nuLItC8CeSw,10813
88
- hdl/utils/llm/embs.py,sha256=yCFtc25gUFas6kwgOGBFydeaHNyQMq5y1Chxl8TNEUQ,2190
87
+ hdl/utils/llm/chat.py,sha256=H2c8assJlSdZQKIfPkYrVZHqv66TsdsxtaLXv0kNe1w,11565
88
+ hdl/utils/llm/embs.py,sha256=MqOD58OhgA-rDeBSwnyYrklowxSNGHpUkm-FwisqW44,6355
89
89
  hdl/utils/schedulers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
90
90
  hdl/utils/schedulers/norm_lr.py,sha256=bDwCmdEK-WkgxQMFBiMuchv8Mm7C0-GZJ6usm-PQk14,4461
91
- hjxdl-0.0.16.dist-info/METADATA,sha256=_xC63dyfzZes4vmGeFZbRM0bDBxAmLWcc_BzcsFqB40,543
92
- hjxdl-0.0.16.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
93
- hjxdl-0.0.16.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
94
- hjxdl-0.0.16.dist-info/RECORD,,
91
+ hjxdl-0.0.18.dist-info/METADATA,sha256=AeINDjqXiZ84ZsmNc4TQsSJo_D27yijt_iTg3nhx778,543
92
+ hjxdl-0.0.18.dist-info/WHEEL,sha256=Z4pYXqR_rTB7OWNDYFOm1qRk0RX6GFP2o8LgvP453Hk,91
93
+ hjxdl-0.0.18.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
94
+ hjxdl-0.0.18.dist-info/RECORD,,
File without changes