re-common 10.0.26__py3-none-any.whl → 10.0.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,100 @@
1
+ import base64
2
+ import hashlib
3
+
4
+ """
5
+ VIP编码lngid生成
6
+ """
7
+
8
+
9
+ class BaseLngid(object):
10
+ def __int__(self):
11
+ pass
12
+
13
+ def BaseEncodeID(self, strRaw):
14
+ r""" 自定义base编码 """
15
+
16
+ strEncode = base64.b32encode(strRaw.encode('utf8')).decode('utf8')
17
+
18
+ if strEncode.endswith('======'):
19
+ strEncode = '%s%s' % (strEncode[0:-6], '0')
20
+ elif strEncode.endswith('===='):
21
+ strEncode = '%s%s' % (strEncode[0:-4], '1')
22
+ elif strEncode.endswith('==='):
23
+ strEncode = '%s%s' % (strEncode[0:-3], '8')
24
+ elif strEncode.endswith('='):
25
+ strEncode = '%s%s' % (strEncode[0:-1], '9')
26
+
27
+ table = str.maketrans('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'ZYXWVUTSRQPONMLKJIHGFEDCBA9876543210')
28
+ strEncode = strEncode.translate(table)
29
+
30
+ return strEncode
31
+
32
+ def BaseDecodeID(self, strEncode):
33
+ r""" 自定义base解码 """
34
+
35
+ table = str.maketrans('ZYXWVUTSRQPONMLKJIHGFEDCBA9876543210', '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ')
36
+ strEncode = strEncode.translate(table)
37
+
38
+ if strEncode.endswith('0'):
39
+ strEncode = '%s%s' % (strEncode[0:-1], '======')
40
+ elif strEncode.endswith('1'):
41
+ strEncode = '%s%s' % (strEncode[0:-1], '====')
42
+ elif strEncode.endswith('8'):
43
+ strEncode = '%s%s' % (strEncode[0:-1], '===')
44
+ elif strEncode.endswith('9'):
45
+ strEncode = '%s%s' % (strEncode[0:-1], '=')
46
+
47
+ strRaw = base64.b32decode(strEncode.encode('utf8')).decode('utf8')
48
+
49
+ return strRaw
50
+
51
+ def GetLngid(self, sub_db_id, rawid, case_insensitive=False):
52
+ """
53
+ :param sub_db_id:
54
+ :param rawid:
55
+ 由 sub_db_id 和 rawid 得到 lngid。
56
+ :param case_insensitive: 标识源网站的 rawid 是否区分大小写
57
+ :return: lngid
58
+ """
59
+ uppercase_rawid = '' # 大写版 rawid
60
+ if case_insensitive: # 源网站的 rawid 区分大小写
61
+ for ch in rawid:
62
+ if ch.upper() == ch:
63
+ uppercase_rawid += ch
64
+ else:
65
+ uppercase_rawid += ch.upper() + '_'
66
+ else:
67
+ uppercase_rawid = rawid.upper()
68
+
69
+ limited_id = uppercase_rawid # 限长ID
70
+ if len(uppercase_rawid) > 20:
71
+ limited_id = hashlib.md5(uppercase_rawid.encode('utf8')).hexdigest().upper()
72
+ else:
73
+ limited_id = self.BaseEncodeID(uppercase_rawid)
74
+
75
+ lngid = sub_db_id + limited_id
76
+
77
+ return lngid
78
+
79
+ def getDoiid(self, doi):
80
+ doi_upper = doi.upper()
81
+ limited_id = self.BaseEncodeID(doi_upper)
82
+ if len(limited_id) > 240:
83
+ limited_id = hashlib.md5(doi_upper.encode('utf8')).hexdigest().upper()
84
+ return limited_id
85
+
86
+ def GetRawid(self, limited_id, case_insensitive=False):
87
+ try:
88
+ uppercase_rawid = self.BaseDecodeID(limited_id)
89
+ if case_insensitive:
90
+ str_ = "_"
91
+ uppercase_rawid_list = list(uppercase_rawid)
92
+ for num, li in enumerate(uppercase_rawid_list):
93
+ if li == str_:
94
+ old_str = "".join(uppercase_rawid_list[num - 1:num + 1])
95
+ uppercase_rawid = uppercase_rawid.replace(old_str, uppercase_rawid_list[num - 1].lower())
96
+ except Exception as e:
97
+ raise Exception("长度超过20,不可逆")
98
+
99
+ return uppercase_rawid
100
+
@@ -0,0 +1,116 @@
1
+ import base64
2
+ import hashlib
3
+ import os
4
+
5
+ from re_common.v2.baselibrary.business_utils.baseencodeid import BaseLngid
6
+
7
+ import os
8
+ import base64
9
+ import hashlib
10
+
11
+ """
12
+ DOI-文件路径 转换工具
13
+
14
+ 设计目标:
15
+ 1. 将任意DOI字符串转换为可逆、稳定的文件路径
16
+ 2. 提供高效的目录分散方案(65,536个子目录)
17
+ 3. 支持带文件扩展名的存储
18
+ 4. 完全可逆转换
19
+
20
+ 工作原理:
21
+ 1. DOI编码:
22
+ - 使用URL安全的Base64编码(RFC 3548)
23
+ - 移除Base64填充的'='字符
24
+ - 文件名长度 ≈ 原始DOI长度 × 4/3
25
+
26
+ 2. 目录分散:
27
+ - 使用MD5哈希创建两级目录结构
28
+ - 目录层级:/MD5[0:2]/MD5[2:4]/
29
+ - 支持65,536个目录(256×256),每目录约1,525个文件(假设10亿文件)
30
+
31
+ 3. 扩展名处理:
32
+ - 保持原始扩展名不变
33
+ - 解码时自动忽略扩展名
34
+
35
+ 典型转换示例:
36
+ DOI: "10.1000/xyz123" -> 路径: "a1/b2/QTMuMTAwMC94eXoxMjM.pdf"
37
+ 路径: "a1/b2/QTMuMTAwMC94eXoxMjM.pdf" -> DOI: "10.1000/xyz123"
38
+ """
39
+
40
+ base_lngid = BaseLngid()
41
+
42
+
43
+ # 以后需要启用
44
+ def doi_to_path(doi: str, ext: str = "") -> str:
45
+ """
46
+ 将 DOI 转换为可逆的存储路径:
47
+ 1. 对 DOI 进行 URL 安全的 Base64 编码(可逆)
48
+ 2. 生成 DOI 的 MD5 哈希用于目录分散
49
+ 3. 目录结构:MD5前2字符/次2字符/
50
+ 4. 文件名:Base64编码的DOI + 扩展名
51
+
52
+ Args:
53
+ doi: 文件 DOI 标识符
54
+ ext: 文件扩展名(如 '.pdf')
55
+
56
+ Returns:
57
+ 相对文件路径(如 'a1/b2/QTMuMTAwMC94eXoxMjM=.pdf')
58
+ """
59
+ # URL安全的Base64编码(可逆)
60
+ doi_b64 = base64.urlsafe_b64encode(doi.encode("utf-8")).decode("ascii").rstrip("=")
61
+
62
+ # 生成MD5哈希用于目录分配
63
+ hash_md5 = hashlib.md5(doi.encode("utf-8")).hexdigest()
64
+ dir_level1 = hash_md5[0:2]
65
+ dir_level2 = hash_md5[2:4]
66
+
67
+ return os.path.join(dir_level1, dir_level2, f"{doi_b64}{ext}")
68
+
69
+
70
+ # 以后需要启用
71
+ def path_to_doi(path: str) -> str:
72
+ """
73
+ 从文件路径反推原始DOI
74
+ Args:
75
+ path: 文件路径(如 'a1/b2/QTMuMTAwMC94eXoxMjM=.pdf')
76
+
77
+ Returns:
78
+ 原始DOI字符串
79
+ """
80
+ # 提取文件名并移除扩展名
81
+ filename = os.path.basename(path)
82
+ base_name = os.path.splitext(filename)[0]
83
+
84
+ # 补齐Base64填充字符
85
+ padding = 4 - (len(base_name) % 4)
86
+ if padding != 4: # 不需要补齐
87
+ base_name += "=" * padding
88
+
89
+ # Base64解码还原DOI
90
+ return base64.urlsafe_b64decode(base_name.encode("ascii")).decode("utf-8")
91
+
92
+
93
+ def doi_to_dir(doi):
94
+ """生成文件的存储路径和可解码的文件名
95
+
96
+ Args:
97
+ doi (str): 文件的唯一DOI标识
98
+
99
+ Returns:
100
+ str: 文件相对路径,如 "ab/cd/Base64EncodedFileName"
101
+ """
102
+ # 计算DOI的MD5哈希
103
+ hash_md5 = hashlib.md5(doi.encode('utf-8')).hexdigest().lower()
104
+
105
+ # 提取目录层级:前2位作为一级目录,3-4位作为二级目录
106
+ first_dir = hash_md5[0:2].upper()
107
+ second_dir = hash_md5[2:4].upper()
108
+
109
+ return first_dir + "/" + second_dir
110
+
111
+
112
+ def get_doi_path(doi):
113
+ # 目前使用
114
+ dir_path = doi_to_dir(doi)
115
+ file_name = base_lngid.getDoiid(doi) + ".pdf"
116
+ return dir_path + "/" + file_name
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: re_common
3
- Version: 10.0.26
3
+ Version: 10.0.27
4
4
  Summary: a library about all python projects
5
5
  Home-page: https://gitee.com/xujiangios/re-common
6
6
  Author: vic
@@ -165,6 +165,8 @@ re_common/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
165
  re_common/v2/baselibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
166
166
  re_common/v2/baselibrary/business_utils/BusinessStringUtil.py,sha256=njPcRgeBWpnZr5u2cPAO4qdWBq-CgTn99rJuvWFcChk,6788
167
167
  re_common/v2/baselibrary/business_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
168
+ re_common/v2/baselibrary/business_utils/baseencodeid.py,sha256=J2Gs_F3-aAwIYjO06T1IIKHE6jen3mFfQVoVW3HywyI,3548
169
+ re_common/v2/baselibrary/business_utils/full_doi_path.py,sha256=PaMIrgDWWt_fzSFyvvDD-8CcYZJTNo6Pj-uR0WafNbY,3319
168
170
  re_common/v2/baselibrary/business_utils/rel_tools.py,sha256=LfnGFCkUSxg1SHvOMOQdP1PiHxIKqk7Syuk5YYpjJag,295
169
171
  re_common/v2/baselibrary/decorators/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
170
172
  re_common/v2/baselibrary/decorators/utils.py,sha256=Q4D6KKCQxvNBXZkPQQn14keKKJpGtg8TUSakjJU40s0,2056
@@ -229,8 +231,8 @@ re_common/vip/title/transform/TransformRegulationTitleToZt.py,sha256=LKRdIsWKues
229
231
  re_common/vip/title/transform/TransformStandardTitleToZt.py,sha256=-fCKAbSBzXVyQDCE61CalvR9E_QzQMA08QOO_NePFNI,5563
230
232
  re_common/vip/title/transform/TransformThesisTitleToZt.py,sha256=QS-uV0cQrpUFAcKucuJQ9Ue2VRQH-inmfn_X3IplfRo,5488
231
233
  re_common/vip/title/transform/__init__.py,sha256=m83-CWyRq_VHPYHaALEQlmXrkTdrZ3e4B_kCfBYE-uc,239
232
- re_common-10.0.26.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
233
- re_common-10.0.26.dist-info/METADATA,sha256=kHLVPF-e0PjpnUL7dN9pAMqK_pw4yHwZGKxbJ_zlAY0,582
234
- re_common-10.0.26.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
235
- re_common-10.0.26.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
236
- re_common-10.0.26.dist-info/RECORD,,
234
+ re_common-10.0.27.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
235
+ re_common-10.0.27.dist-info/METADATA,sha256=P-xatKifINopvPpQFudSbxCKEpgqzLyw5Pesn-7_VvU,582
236
+ re_common-10.0.27.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
237
+ re_common-10.0.27.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
238
+ re_common-10.0.27.dist-info/RECORD,,