hjxdl 0.1.7__py3-none-any.whl → 0.1.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hdl/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.1.7'
16
- __version_tuple__ = version_tuple = (0, 1, 7)
15
+ __version__ = version = '0.1.8'
16
+ __version_tuple__ = version_tuple = (0, 1, 8)
hdl/utils/llm/embs.py CHANGED
@@ -1,4 +1,4 @@
1
- from sentence_transformers import SentenceTransformer
1
+ import re
2
2
 
3
3
 
4
4
  class BEEmbedder():
@@ -118,6 +118,8 @@ class HFEmbedder():
118
118
  Returns:
119
119
  None
120
120
  """
121
+
122
+ from sentence_transformers import SentenceTransformer
121
123
 
122
124
  self.device = device
123
125
  self.emb_dir = emb_dir
@@ -190,7 +192,7 @@ class HFEmbedder():
190
192
 
191
193
  def get_n_tokens(
192
194
  paragraph,
193
- model: str = None
195
+ model: str = ""
194
196
  ):
195
197
  """Get the number of tokens in a paragraph using a specified model.
196
198
 
@@ -201,7 +203,7 @@ def get_n_tokens(
201
203
  Returns:
202
204
  int: The number of tokens in the paragraph based on the specified model or default CJK tokenization.
203
205
  """
204
- if model is None:
206
+ if model == "":
205
207
  cjk_regex = re.compile(u'[\u1100-\uFFFDh]+?')
206
208
  trimed_cjk = cjk_regex.sub( ' a ', paragraph, 0)
207
209
  return len(trimed_cjk.split())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hjxdl
3
- Version: 0.1.7
3
+ Version: 0.1.8
4
4
  Summary: A collection of functions for Jupyter notebooks
5
5
  Home-page: https://github.com/huluxiaohuowa/hdl
6
6
  Author: Jianxing Hu
@@ -1,5 +1,5 @@
1
1
  hdl/__init__.py,sha256=5sZZNySv08wwfzJcSDssGTqUn9wlmDsR6R4XB8J8mFM,70
2
- hdl/_version.py,sha256=0A08Kvw-SYs_CkLPEV1KmD8lb9IPH1psSxb5iQEGtI8,411
2
+ hdl/_version.py,sha256=PdJ7dZoz_SyEgX0MdrMfQYBFlGcwpemv6ibF8NKALBY,411
3
3
  hdl/args/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  hdl/args/loss_args.py,sha256=s7YzSdd7IjD24rZvvOrxLLFqMZQb9YylxKeyelSdrTk,70
5
5
  hdl/controllers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -85,11 +85,11 @@ hdl/utils/general/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
85
85
  hdl/utils/general/glob.py,sha256=8-RCnt6L297wMIfn34ZAMCsGCZUjHG3MGglGZI1cX0g,491
86
86
  hdl/utils/llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
87
87
  hdl/utils/llm/chat.py,sha256=gsbqWh8fTcJUENU6ZuMClZAuSOLFnD5VP8kXOxGh3Zw,13776
88
- hdl/utils/llm/embs.py,sha256=ntT9Noax4ao7Rm1kjVMtXa3j0ykbRq0cABAci147Bu0,7159
88
+ hdl/utils/llm/embs.py,sha256=Tf0FOYrOFZp7qQpEPiSCXzlgyHH0X9HVTUtsup74a9E,7174
89
89
  hdl/utils/llm/extract.py,sha256=2sK_WJzmYIc8iuWaM9DA6Nw3_6q1O4lJ5pKpcZo-bBA,6512
90
90
  hdl/utils/schedulers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
91
  hdl/utils/schedulers/norm_lr.py,sha256=bDwCmdEK-WkgxQMFBiMuchv8Mm7C0-GZJ6usm-PQk14,4461
92
- hjxdl-0.1.7.dist-info/METADATA,sha256=MMDp3uVtFocfPk5vDF056atHLM0dBDzPu6GrLZXbGDg,542
93
- hjxdl-0.1.7.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
94
- hjxdl-0.1.7.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
95
- hjxdl-0.1.7.dist-info/RECORD,,
92
+ hjxdl-0.1.8.dist-info/METADATA,sha256=a9BaE0EGy5G9EM3Tbsi4LMmIrCMFJUuDjFnmmu_nBW4,542
93
+ hjxdl-0.1.8.dist-info/WHEEL,sha256=R0nc6qTxuoLk7ShA2_Y-UWkN8ZdfDBG2B6Eqpz2WXbs,91
94
+ hjxdl-0.1.8.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
95
+ hjxdl-0.1.8.dist-info/RECORD,,
File without changes