hjxdl 0.1.51__py3-none-any.whl → 0.1.53__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hdl/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.1.51'
16
- __version_tuple__ = version_tuple = (0, 1, 51)
15
+ __version__ = version = '0.1.53'
16
+ __version_tuple__ = version_tuple = (0, 1, 53)
@@ -12,6 +12,7 @@ import sys
12
12
  import importlib
13
13
  import subprocess
14
14
  import re
15
+ from pathlib import Path
15
16
 
16
17
  import multiprocess as mp
17
18
 
@@ -21,7 +22,7 @@ import json
21
22
 
22
23
  def in_jupyter():
23
24
  """Check if the code is running in a Jupyter notebook.
24
-
25
+
25
26
  Returns:
26
27
  bool: True if running in Jupyter notebook, False otherwise.
27
28
  """
@@ -32,7 +33,7 @@ def in_jupyter():
32
33
 
33
34
  def in_docker():
34
35
  """Check if the code is running inside a Docker container.
35
-
36
+
36
37
  Returns:
37
38
  bool: True if running inside a Docker container, False otherwise.
38
39
  """
@@ -44,10 +45,10 @@ def get_files(
44
45
  file_types: list = ["txt"]
45
46
  ):
46
47
  """Get a list of files with specific file extensions in the given directory path.
47
-
48
+
48
49
  Args:
49
50
  dir_path (str): The path to the target directory.
50
-
51
+
51
52
  Returns:
52
53
  list: A list of absolute file paths that have file extensions such as .md, .doc, .docx, .pdf, .csv, or .txt.
53
54
  """
@@ -56,7 +57,7 @@ def get_files(
56
57
  for filepath, dirnames, filenames in os.walk(dir_path):
57
58
  # os.walk 函数将递归遍历指定文件夹
58
59
  filenames = [f for f in filenames if not f[0] == '.']
59
- dirnames[:] = [d for d in dirnames if not d[0] == '.']
60
+ dirnames[:] = [d for d in dirnames if not d[0] == '.']
60
61
  for filename in filenames:
61
62
  # 通过后缀名判断文件类型是否满足要求
62
63
  if filename.endswith(file_types):
@@ -67,10 +68,10 @@ def get_files(
67
68
 
68
69
  def get_dataset_file(filename):
69
70
  """Get dataset file.
70
-
71
+
71
72
  Args:
72
73
  filename (str): The name of the dataset file.
73
-
74
+
74
75
  Returns:
75
76
  dict: The data loaded from the dataset file.
76
77
  """
@@ -82,11 +83,11 @@ def get_dataset_file(filename):
82
83
 
83
84
  def recursive_glob(treeroot, pattern):
84
85
  """Recursively searches for files matching a specified pattern starting from the given directory.
85
-
86
+
86
87
  Args:
87
88
  treeroot (str): The root directory to start the search from.
88
89
  pattern (str): The pattern to match the files against.
89
-
90
+
90
91
  Returns:
91
92
  list: A list of file paths that match the specified pattern.
92
93
  """
@@ -104,9 +105,9 @@ def makedirs(path: str, isfile: bool = False) -> None:
104
105
 
105
106
 
106
107
  Args:
107
- path (str): Path to a directory or file.
108
+ path (str): Path to a directory or file.
108
109
  isfile (bool, optional): Whether the provided path is a directory or file.Defaults to False.
109
- """
110
+ """
110
111
  if isfile:
111
112
  path = os.path.dirname(path)
112
113
  if path != '':
@@ -120,10 +121,10 @@ def get_current_dir():
120
121
 
121
122
  def get_num_lines(file):
122
123
  """Get the number of lines in a file.
123
-
124
+
124
125
  Args:
125
126
  file (str): The path to the file.
126
-
127
+
127
128
  Returns:
128
129
  int: The number of lines in the file.
129
130
  """
@@ -143,11 +144,11 @@ def chunkify_file(
143
144
  """
144
145
  function to divide a large text file into chunks each having size ~= size so that the chunks are line aligned
145
146
 
146
- Params :
147
+ Params :
147
148
  fname : path to the file to be chunked
148
149
  size : size of each chink is ~> this
149
150
  skiplines : number of lines in the begining to skip, -1 means don't skip any lines
150
- Returns :
151
+ Returns :
151
152
  start and end position of chunks in Bytes
152
153
  """
153
154
  chunks = []
@@ -177,7 +178,7 @@ def parallel_apply_line_by_line_chunk(chunk_data):
177
178
  function to apply a function to each line in a chunk
178
179
 
179
180
  Params :
180
- chunk_data : the data for this chunk
181
+ chunk_data : the data for this chunk
181
182
  Returns :
182
183
  list of the non-None results for this chunk
183
184
  """
@@ -262,10 +263,10 @@ def parallel_apply_line_by_line(
262
263
 
263
264
  def get_func_from_dir(score_dir: str) -> t.Tuple[t.Callable, str]:
264
265
  """Get function and mode from directory.
265
-
266
+
266
267
  Args:
267
268
  score_dir (str): The directory path containing the function file.
268
-
269
+
269
270
  Returns:
270
271
  Tuple[Callable, str]: A tuple containing the main function and the mode.
271
272
  """
@@ -277,9 +278,17 @@ def get_func_from_dir(score_dir: str) -> t.Tuple[t.Callable, str]:
277
278
  file_name = "main"
278
279
 
279
280
  sys.path.append(func_dir)
280
- module = importlib.import_module(file_name)
281
+ module = importlib.import_module(file_name)
281
282
  try:
282
283
  mode = module.MODE
283
284
  except Exception as _:
284
285
  mode = 'batch'
285
- return module.main, mode
286
+ return module.main, mode
287
+
288
+
289
+ def find_images_recursive(
290
+ directory,
291
+ extensions=(".jpg", ".jpeg", ".png", ".gif", ".bmp", ".tiff")
292
+ ):
293
+ path = Path(directory)
294
+ return [str(file) for file in path.rglob("*") if file.suffix.lower() in extensions]
@@ -1,16 +1,17 @@
1
1
  import psycopg
2
+ import redis
2
3
 
3
4
 
4
5
  def connect_by_infofile(info_file: str) -> psycopg.Connection:
5
6
  """Create a postgres connection
6
7
 
7
8
  Args:
8
- info_file (str):
9
+ info_file (str):
9
10
  the path of the connection info like
10
11
  host=127.0.0.1 dbname=dbname port=5432 user=postgres password=lala
11
12
 
12
13
  Returns:
13
- psycopg.Connection:
14
+ psycopg.Connection:
14
15
  the connection instance should be closed after committing.
15
16
  """
16
17
  conn = psycopg.connect(
@@ -25,4 +26,17 @@ def connect_by_infofile(info_file: str) -> psycopg.Connection:
25
26
  # for record in cur:
26
27
  # print(record)
27
28
  # conn.commit()
28
- # conn.close()
29
+ # conn.close()
30
+
31
+ def conn_redis(
32
+ host: str,
33
+ port: int
34
+ ):
35
+ client = redis.Redis(
36
+ host=host,
37
+ port=port,
38
+ decode_responses=True
39
+ )
40
+ res = client.ping()
41
+ print(res)
42
+ return client
hdl/utils/llm/vis.py ADDED
@@ -0,0 +1,79 @@
1
+ import requests
2
+
3
+ import torch
4
+ import numpy as np
5
+ from PIL import Image
6
+ import redis
7
+ from transformers import ChineseCLIPProcessor, ChineseCLIPModel
8
+
9
+ from ..database_tools.connect import conn_redis
10
+
11
+
12
+ # url = "https://clip-cn-beijing.oss-cn-beijing.aliyuncs.com/pokemon.jpeg"
13
+ # image = Image.open(requests.get(url, stream=True).raw)
14
+
15
+
16
+ class ImgHandler:
17
+ def __init__(
18
+ self,
19
+ model_path,
20
+ redis_host,
21
+ redis_port,
22
+ ) -> None:
23
+ self.model = ChineseCLIPModel.from_pretrained(model_path)
24
+ self.processor = ChineseCLIPProcessor.from_pretrained(model_path)
25
+ self.redis_host = redis_host
26
+ self.redis_port = redis_port
27
+ self._redis_conn = None
28
+
29
+ @property
30
+ def redis_conn(self):
31
+ if self._redis_conn is None:
32
+ self._redis_conn = conn_redis(self.redis_host, self.redis_port)
33
+ return self._redis_conn
34
+
35
+ def get_img_features(self, images, **kwargs):
36
+ inputs = self.processor(
37
+ images=images,
38
+ return_tensors="pt",
39
+ **kwargs
40
+ )
41
+ image_features = self.model.get_image_features(**inputs)
42
+ image_features = image_features / \
43
+ image_features.norm(p=2, dim=-1, keepdim=True)
44
+ return image_features
45
+
46
+ def get_text_features(
47
+ self,
48
+ texts,
49
+ **kwargs
50
+ ):
51
+ inputs = self.processor(
52
+ text=texts,
53
+ padding=True,
54
+ return_tensors="pt",
55
+ **kwargs
56
+ )
57
+ text_features = self.model.get_text_features(**inputs)
58
+ text_features = text_features / \
59
+ text_features.norm(p=2, dim=-1, keepdim=True)
60
+ return text_features
61
+
62
+ def get_text_img_sims(
63
+ self,
64
+ texts,
65
+ images,
66
+ **kwargs
67
+ ):
68
+ inputs = self.processor(
69
+ text=texts,
70
+ images=images,
71
+ return_tensors="pt",
72
+ padding=True,
73
+ **kwargs
74
+ )
75
+ outputs = self.model(**inputs)
76
+ logits_per_image = outputs.logits_per_image # this is the image-text similarity score
77
+ probs = logits_per_image.softmax(dim=1)
78
+ return probs
79
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hjxdl
3
- Version: 0.1.51
3
+ Version: 0.1.53
4
4
  Summary: A collection of functions for Jupyter notebooks
5
5
  Home-page: https://github.com/huluxiaohuowa/hdl
6
6
  Author: Jianxing Hu
@@ -18,6 +18,9 @@ Requires-Dist: geopy
18
18
  Requires-Dist: timezonefinder
19
19
  Requires-Dist: pytz
20
20
  Requires-Dist: duckduckgo-search[lxml]
21
+ Requires-Dist: opencv-python
22
+ Requires-Dist: redis[hiredis]
23
+ Requires-Dist: Pillow
21
24
 
22
25
  # DL framework by Jianxing
23
26
 
@@ -1,5 +1,5 @@
1
1
  hdl/__init__.py,sha256=GffnD0jLJdhkd-vo989v40N90sQbofkayRBwxc6TVhQ,72
2
- hdl/_version.py,sha256=f9ICF0HnULXyn5fGbc4yAmBNgRL7m8zPXBZHmrIl7ZQ,413
2
+ hdl/_version.py,sha256=bPb2Oc5wemZHZQ36bFj8MSRaDTXfjYMCh-GfQfIz_eE,413
3
3
  hdl/args/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  hdl/args/loss_args.py,sha256=s7YzSdd7IjD24rZvvOrxLLFqMZQb9YylxKeyelSdrTk,70
5
5
  hdl/controllers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -76,7 +76,7 @@ hdl/jupyfuncs/llm/openapi.py,sha256=pNBW0Jzt0JAZP8ZexgoQZVF118jrhKnBYheClcMu9bU,
76
76
  hdl/jupyfuncs/network/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
77
  hdl/jupyfuncs/network/proxy.py,sha256=foZm3gGFTPLeMNRfWs4QKNUEmlhtNTr_1GQvn0IgDVw,545
78
78
  hdl/jupyfuncs/path/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
79
- hdl/jupyfuncs/path/glob.py,sha256=fl0YDLDS9QI2WgDlBzDGlALbvkmPGcXp4UnbbQ17BOM,8300
79
+ hdl/jupyfuncs/path/glob.py,sha256=W4sxFmCLgUUfvYJR7axlEKfwOohDS0fEggLQoTI2Xi0,8491
80
80
  hdl/jupyfuncs/path/strings.py,sha256=eZCXElh7pT0xwy6ZBqSu3frq3Xx8CN5TMuQzsxb0Sbw,2009
81
81
  hdl/jupyfuncs/show/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
82
82
  hdl/jupyfuncs/show/pbar.py,sha256=QzyHV9XEyk9U5oyonxLSwYb5pD09iVUw_atVlzxYBNQ,1005
@@ -118,7 +118,7 @@ hdl/utils/chemical_tools/__init__.py,sha256=_QRNtVx0ieZZgSEsHndPFKm6XU3WXfRb7GYq
118
118
  hdl/utils/chemical_tools/query_info.py,sha256=wyQXwKSY_gBGVUNvYggHpYBtOLAtpYKq3PN5wqDb7Co,4204
119
119
  hdl/utils/chemical_tools/sdf.py,sha256=71PEqU0H885L6IeGHEa6n7ZLZThvMsZOVLuFG2wnoyM,542
120
120
  hdl/utils/database_tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
121
- hdl/utils/database_tools/connect.py,sha256=KUnVG-8raifEJ_N0b3c8LkTTIfn9NIyw8LX6qvpA3YU,723
121
+ hdl/utils/database_tools/connect.py,sha256=1CKK1VbgbvYMlA6iBSPpQM6bDt0fxwKwC7cxoppIHoU,940
122
122
  hdl/utils/database_tools/datetime.py,sha256=xqE2xNiOpADzX-R8_bM0bioJRF3Ay9Jp1CAG6dy6uVI,1202
123
123
  hdl/utils/database_tools/web.py,sha256=vZJYWA02QbyUUMGLDX710cexn7RlfWeHa8YwK3UsDZ0,1934
124
124
  hdl/utils/desc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -131,11 +131,12 @@ hdl/utils/llm/chat.py,sha256=sk7Lw5Oa30k-l2fnJknkMmTc5zkBeEKsR981aeFhH5s,11907
131
131
  hdl/utils/llm/embs.py,sha256=Tf0FOYrOFZp7qQpEPiSCXzlgyHH0X9HVTUtsup74a9E,7174
132
132
  hdl/utils/llm/extract.py,sha256=2sK_WJzmYIc8iuWaM9DA6Nw3_6q1O4lJ5pKpcZo-bBA,6512
133
133
  hdl/utils/llm/llama_chat.py,sha256=watcHGOaz-bv3x-yDucYlGk5f8FiqfFhwWogrl334fk,4387
134
+ hdl/utils/llm/vis.py,sha256=J7wSvizcsAOdGi6MyzzyQ1K5iINYhCXShDw1yG7QriI,2158
134
135
  hdl/utils/schedulers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
135
136
  hdl/utils/schedulers/norm_lr.py,sha256=bDwCmdEK-WkgxQMFBiMuchv8Mm7C0-GZJ6usm-PQk14,4461
136
137
  hdl/utils/weather/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
137
138
  hdl/utils/weather/weather.py,sha256=k11o6wM15kF8b9NMlEfrg68ak-SfSYLN3nOOflFUv-I,4381
138
- hjxdl-0.1.51.dist-info/METADATA,sha256=syqwDtpatUKLAZTxw0RqV4GFDHAqP-Hh840LZtLgfZE,737
139
- hjxdl-0.1.51.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
140
- hjxdl-0.1.51.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
141
- hjxdl-0.1.51.dist-info/RECORD,,
139
+ hjxdl-0.1.53.dist-info/METADATA,sha256=uGSeHnUI-OHFWnechzweGcSlxeRTEWACTLZObCHQrI8,818
140
+ hjxdl-0.1.53.dist-info/WHEEL,sha256=cVxcB9AmuTcXqmwrtPhNK88dr7IR_b6qagTj0UvIEbY,91
141
+ hjxdl-0.1.53.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
142
+ hjxdl-0.1.53.dist-info/RECORD,,
File without changes