hjxdl 0.2.23__py3-none-any.whl → 0.2.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hdl/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.2.23'
16
- __version_tuple__ = version_tuple = (0, 2, 23)
15
+ __version__ = version = '0.2.25'
16
+ __version_tuple__ = version_tuple = (0, 2, 25)
hdl/utils/llm/vis.py CHANGED
@@ -4,6 +4,7 @@ import base64
4
4
  from io import BytesIO
5
5
  import requests
6
6
  import uuid
7
+ import hashlib
7
8
 
8
9
  import torch
9
10
  import numpy as np
@@ -132,8 +133,8 @@ class ImgHandler:
132
133
  def __init__(
133
134
  self,
134
135
  model_path,
135
- db_host,
136
- db_port,
136
+ db_host = "127.0.0.1",
137
+ db_port = 8888,
137
138
  conn=None,
138
139
  model_name: str = None,
139
140
  device: str = "cpu",
@@ -353,7 +354,7 @@ class ImgHandler:
353
354
  conn=None,
354
355
  print_idx_info: bool = False,
355
356
  ):
356
- """Save image features to a Redis database.
357
+ """Save image features to a Redis database, avoiding duplicates.
357
358
 
358
359
  Args:
359
360
  images (list): A list of image file paths.
@@ -370,10 +371,8 @@ class ImgHandler:
370
371
  if conn is None:
371
372
  conn = self.db_conn
372
373
  pipeline = conn.pipeline()
373
- for img_file, emb in tqdm(zip(sorted_imgs, img_feats)):
374
- # 初始化 Redis,先使用 img 文件名作为 Key 和 Value,后续再更新为图片特征向量
375
- # pipeline.json().set(img_file, "$", img_file)
376
374
 
375
+ for img_file, emb in tqdm(zip(sorted_imgs, img_feats)):
377
376
  if img_file.startswith("data:"):
378
377
  img_data = img_file
379
378
  img_idx = f"pic-{str(uuid.uuid4())}"
@@ -381,39 +380,58 @@ class ImgHandler:
381
380
  img_data = imgfile_to_base64(img_file)
382
381
  img_idx = f"pic-{img_file}"
383
382
 
383
+ # 使用图片特征生成唯一哈希值
384
+ emb_hash = hashlib.sha256(emb.tobytes()).hexdigest()
385
+
386
+ # 检查该哈希值是否已存在,避免重复存储
387
+ if conn.exists(f"pic-hash-{emb_hash}"):
388
+ print(f"Image {img_file} already exists, skipping.")
389
+ continue
390
+
391
+ # 存储新图片的特征和数据
384
392
  emb = emb.astype(np.float32).tolist()
385
393
  emb_json = {
386
394
  "emb": emb,
387
395
  "data": img_data
388
396
  }
389
397
  pipeline.json().set(img_idx, "$", emb_json)
390
- res = pipeline.execute()
391
- # print('redis set:', res)
392
398
 
399
+ # 将哈希值作为键存储,以便后续检查
400
+ pipeline.set(f"pic-hash-{emb_hash}", img_idx)
401
+
402
+ res = pipeline.execute()
403
+
404
+ # 定义向量索引的schema
393
405
  schema = (
394
406
  VectorField(
395
- "$.emb", # 这是 JSON 中存储向量的路径
396
- "FLAT", # 使用 FLAT 索引类型
407
+ "$.emb",
408
+ "FLAT",
397
409
  {
398
- "TYPE": "FLOAT32", # 向量类型
399
- "DIM": self.num_vec_dim, # 向量维度,必须与实际数据的维度一致
400
- "DISTANCE_METRIC": "COSINE", # 余弦相似度作为距离度量
410
+ "TYPE": "FLOAT32",
411
+ "DIM": self.num_vec_dim,
412
+ "DISTANCE_METRIC": "COSINE",
401
413
  },
402
- as_name="vector", # 给这个字段定义一个别名,后续可以使用
414
+ as_name="vector",
403
415
  ),
404
416
  )
405
- # vector_idx_name = "idx:pic_idx"
417
+ # 定义索引的配置
406
418
  definition = IndexDefinition(
407
419
  prefix=["pic-"],
408
420
  index_type=IndexType.JSON
409
421
  )
410
- res = conn.ft(
411
- self.pic_idx_name
412
- ).create_index(
413
- fields=schema,
414
- definition=definition
415
- )
416
- print("create_index:", res)
422
+
423
+ # 检查索引是否已经存在
424
+ try:
425
+ conn.ft(self.pic_idx_name).info() # 检查索引信息
426
+ print("Index already exists, skipping creation.")
427
+ except Exception:
428
+ # 如果索引不存在,创建新的索引
429
+ res = conn.ft(self.pic_idx_name).create_index(
430
+ fields=schema,
431
+ definition=definition
432
+ )
433
+ print("create_index:", res)
434
+
417
435
  if print_idx_info:
418
436
  print(self.pic_idx_info)
419
437
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: hjxdl
3
- Version: 0.2.23
3
+ Version: 0.2.25
4
4
  Summary: A collection of functions for Jupyter notebooks
5
5
  Home-page: https://github.com/huluxiaohuowa/hdl
6
6
  Author: Jianxing Hu
@@ -1,5 +1,5 @@
1
1
  hdl/__init__.py,sha256=GffnD0jLJdhkd-vo989v40N90sQbofkayRBwxc6TVhQ,72
2
- hdl/_version.py,sha256=-hljAU21MXFeuL9fV1MtjsvUKcUWxJn4DlS0PdRjDcs,413
2
+ hdl/_version.py,sha256=UmsAAHFuqYBzGCBQpGNQPt7zCgBErX1ViQbYB7gdPPg,413
3
3
  hdl/args/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  hdl/args/loss_args.py,sha256=s7YzSdd7IjD24rZvvOrxLLFqMZQb9YylxKeyelSdrTk,70
5
5
  hdl/controllers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -132,13 +132,13 @@ hdl/utils/llm/chatgr.py,sha256=GO2G7g6YybduA5VCUuGjvEsJfC_6L7rycSnPeHMcxyM,2820
132
132
  hdl/utils/llm/embs.py,sha256=Tf0FOYrOFZp7qQpEPiSCXzlgyHH0X9HVTUtsup74a9E,7174
133
133
  hdl/utils/llm/extract.py,sha256=2sK_WJzmYIc8iuWaM9DA6Nw3_6q1O4lJ5pKpcZo-bBA,6512
134
134
  hdl/utils/llm/llama_chat.py,sha256=watcHGOaz-bv3x-yDucYlGk5f8FiqfFhwWogrl334fk,4387
135
- hdl/utils/llm/vis.py,sha256=aeTVX-jKXJBnvQXaOWiHpI9Nc7IneSCqm_g7N4spJUc,16446
135
+ hdl/utils/llm/vis.py,sha256=skeoBgFNes8swPBeWIsn3oKZ4Jv4Cl5GGrxsRDNupac,16854
136
136
  hdl/utils/llm/visrag.py,sha256=_PuKtmQIXD5bnmXwDWhTLdzOhgC42JiqdMNb1uKA7n8,9190
137
137
  hdl/utils/schedulers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
138
138
  hdl/utils/schedulers/norm_lr.py,sha256=bDwCmdEK-WkgxQMFBiMuchv8Mm7C0-GZJ6usm-PQk14,4461
139
139
  hdl/utils/weather/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
140
140
  hdl/utils/weather/weather.py,sha256=k11o6wM15kF8b9NMlEfrg68ak-SfSYLN3nOOflFUv-I,4381
141
- hjxdl-0.2.23.dist-info/METADATA,sha256=xFu32son2_PThYfRcnKAh5-Kc1bUoRgyAhoo9hsHixc,836
142
- hjxdl-0.2.23.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
143
- hjxdl-0.2.23.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
144
- hjxdl-0.2.23.dist-info/RECORD,,
141
+ hjxdl-0.2.25.dist-info/METADATA,sha256=i8tbHzSQf9nal7CU2P7Whe3AChbtkBHJPUIIKQaVqQo,836
142
+ hjxdl-0.2.25.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
143
+ hjxdl-0.2.25.dist-info/top_level.txt,sha256=-kxwTM5JPhylp06z3zAVO3w6_h7wtBfBo2zgM6YZoTk,4
144
+ hjxdl-0.2.25.dist-info/RECORD,,
File without changes