re-common 10.0.39__py3-none-any.whl → 10.0.41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. re_common/baselibrary/__init__.py +4 -4
  2. re_common/baselibrary/baseabs/__init__.py +6 -6
  3. re_common/baselibrary/baseabs/baseabs.py +26 -26
  4. re_common/baselibrary/database/mbuilder.py +132 -132
  5. re_common/baselibrary/database/moudle.py +93 -93
  6. re_common/baselibrary/database/msqlite3.py +194 -194
  7. re_common/baselibrary/database/mysql.py +169 -169
  8. re_common/baselibrary/database/sql_factory.py +26 -26
  9. re_common/baselibrary/mthread/MThreadingRun.py +486 -486
  10. re_common/baselibrary/mthread/MThreadingRunEvent.py +349 -349
  11. re_common/baselibrary/mthread/__init__.py +2 -2
  12. re_common/baselibrary/mthread/mythreading.py +695 -695
  13. re_common/baselibrary/pakge_other/socks.py +404 -404
  14. re_common/baselibrary/readconfig/config_factory.py +18 -18
  15. re_common/baselibrary/readconfig/ini_config.py +317 -317
  16. re_common/baselibrary/readconfig/toml_config.py +49 -49
  17. re_common/baselibrary/temporary/envdata.py +36 -36
  18. re_common/baselibrary/tools/all_requests/aiohttp_request.py +118 -118
  19. re_common/baselibrary/tools/all_requests/httpx_requet.py +102 -102
  20. re_common/baselibrary/tools/all_requests/mrequest.py +412 -412
  21. re_common/baselibrary/tools/all_requests/requests_request.py +81 -81
  22. re_common/baselibrary/tools/batch_compre/bijiao_batch.py +31 -31
  23. re_common/baselibrary/tools/contrast_db3.py +123 -123
  24. re_common/baselibrary/tools/copy_file.py +39 -39
  25. re_common/baselibrary/tools/db3_2_sizedb3.py +102 -102
  26. re_common/baselibrary/tools/foreachgz.py +39 -39
  27. re_common/baselibrary/tools/get_attr.py +10 -10
  28. re_common/baselibrary/tools/image_to_pdf.py +61 -61
  29. re_common/baselibrary/tools/java_code_deal.py +139 -139
  30. re_common/baselibrary/tools/javacode.py +79 -79
  31. re_common/baselibrary/tools/mdb_db3.py +48 -48
  32. re_common/baselibrary/tools/merge_file.py +171 -171
  33. re_common/baselibrary/tools/merge_gz_file.py +165 -165
  34. re_common/baselibrary/tools/mhdfstools/down_hdfs_files.py +42 -42
  35. re_common/baselibrary/tools/mhdfstools/hdfst.py +42 -42
  36. re_common/baselibrary/tools/mhdfstools/up_hdfs_files.py +38 -38
  37. re_common/baselibrary/tools/mongo_tools.py +50 -50
  38. re_common/baselibrary/tools/move_file.py +170 -170
  39. re_common/baselibrary/tools/move_mongo/mongo_table_to_file.py +63 -63
  40. re_common/baselibrary/tools/move_mongo/move_mongo_table.py +354 -354
  41. re_common/baselibrary/tools/move_mongo/use_mttf.py +18 -18
  42. re_common/baselibrary/tools/move_mongo/use_mv.py +93 -93
  43. re_common/baselibrary/tools/mpandas/mpandasreadexcel.py +125 -125
  44. re_common/baselibrary/tools/mpandas/pandas_visualization.py +7 -7
  45. re_common/baselibrary/tools/myparsel.py +104 -104
  46. re_common/baselibrary/tools/rename_dir_file.py +37 -37
  47. re_common/baselibrary/tools/sequoiadb_utils.py +398 -398
  48. re_common/baselibrary/tools/split_line_to_many.py +25 -25
  49. re_common/baselibrary/tools/stringtodicts.py +33 -33
  50. re_common/baselibrary/tools/workwechant_bot.py +84 -84
  51. re_common/baselibrary/utils/baseaiohttp.py +296 -296
  52. re_common/baselibrary/utils/baseaiomysql.py +87 -87
  53. re_common/baselibrary/utils/baseallstep.py +191 -191
  54. re_common/baselibrary/utils/baseavro.py +19 -19
  55. re_common/baselibrary/utils/baseboto3.py +291 -291
  56. re_common/baselibrary/utils/basecsv.py +32 -32
  57. re_common/baselibrary/utils/basedict.py +133 -133
  58. re_common/baselibrary/utils/basedir.py +241 -241
  59. re_common/baselibrary/utils/baseencode.py +351 -351
  60. re_common/baselibrary/utils/baseencoding.py +28 -28
  61. re_common/baselibrary/utils/baseesdsl.py +86 -86
  62. re_common/baselibrary/utils/baseexcel.py +264 -264
  63. re_common/baselibrary/utils/baseexcept.py +109 -109
  64. re_common/baselibrary/utils/basefile.py +654 -654
  65. re_common/baselibrary/utils/baseftp.py +214 -214
  66. re_common/baselibrary/utils/basegzip.py +60 -60
  67. re_common/baselibrary/utils/basehdfs.py +135 -135
  68. re_common/baselibrary/utils/basehttpx.py +268 -268
  69. re_common/baselibrary/utils/baseip.py +87 -87
  70. re_common/baselibrary/utils/basejson.py +2 -2
  71. re_common/baselibrary/utils/baselist.py +32 -32
  72. re_common/baselibrary/utils/basemotor.py +190 -190
  73. re_common/baselibrary/utils/basemssql.py +98 -98
  74. re_common/baselibrary/utils/baseodbc.py +113 -113
  75. re_common/baselibrary/utils/basepandas.py +302 -302
  76. re_common/baselibrary/utils/basepeewee.py +11 -11
  77. re_common/baselibrary/utils/basepika.py +180 -180
  78. re_common/baselibrary/utils/basepydash.py +143 -143
  79. re_common/baselibrary/utils/basepymongo.py +230 -230
  80. re_common/baselibrary/utils/basequeue.py +22 -22
  81. re_common/baselibrary/utils/baserar.py +57 -57
  82. re_common/baselibrary/utils/baserequest.py +279 -279
  83. re_common/baselibrary/utils/baseset.py +8 -8
  84. re_common/baselibrary/utils/basesmb.py +403 -403
  85. re_common/baselibrary/utils/basestring.py +382 -382
  86. re_common/baselibrary/utils/basetime.py +320 -320
  87. re_common/baselibrary/utils/baseurl.py +121 -121
  88. re_common/baselibrary/utils/basezip.py +57 -57
  89. re_common/baselibrary/utils/core/__init__.py +7 -7
  90. re_common/baselibrary/utils/core/bottomutils.py +18 -18
  91. re_common/baselibrary/utils/core/mdeprecated.py +327 -327
  92. re_common/baselibrary/utils/core/mlamada.py +16 -16
  93. re_common/baselibrary/utils/core/msginfo.py +25 -25
  94. re_common/baselibrary/utils/core/requests_core.py +103 -103
  95. re_common/baselibrary/utils/fateadm.py +429 -429
  96. re_common/baselibrary/utils/importfun.py +123 -123
  97. re_common/baselibrary/utils/mfaker.py +57 -57
  98. re_common/baselibrary/utils/my_abc/__init__.py +3 -3
  99. re_common/baselibrary/utils/my_abc/better_abc.py +32 -32
  100. re_common/baselibrary/utils/mylogger.py +414 -414
  101. re_common/baselibrary/utils/myredisclient.py +861 -861
  102. re_common/baselibrary/utils/pipupgrade.py +21 -21
  103. re_common/baselibrary/utils/ringlist.py +85 -85
  104. re_common/baselibrary/utils/version_compare.py +36 -36
  105. re_common/baselibrary/utils/ydmhttp.py +126 -126
  106. re_common/facade/lazy_import.py +11 -11
  107. re_common/facade/loggerfacade.py +25 -25
  108. re_common/facade/mysqlfacade.py +467 -467
  109. re_common/facade/now.py +31 -31
  110. re_common/facade/sqlite3facade.py +257 -257
  111. re_common/facade/use/mq_use_facade.py +83 -83
  112. re_common/facade/use/proxy_use_facade.py +19 -19
  113. re_common/libtest/base_dict_test.py +19 -19
  114. re_common/libtest/baseavro_test.py +13 -13
  115. re_common/libtest/basefile_test.py +14 -14
  116. re_common/libtest/basemssql_test.py +77 -77
  117. re_common/libtest/baseodbc_test.py +7 -7
  118. re_common/libtest/basepandas_test.py +38 -38
  119. re_common/libtest/get_attr_test/get_attr_test_settings.py +14 -14
  120. re_common/libtest/get_attr_test/settings.py +54 -54
  121. re_common/libtest/idencode_test.py +53 -53
  122. re_common/libtest/iniconfig_test.py +35 -35
  123. re_common/libtest/ip_test.py +34 -34
  124. re_common/libtest/merge_file_test.py +20 -20
  125. re_common/libtest/mfaker_test.py +8 -8
  126. re_common/libtest/mm3_test.py +31 -31
  127. re_common/libtest/mylogger_test.py +88 -88
  128. re_common/libtest/myparsel_test.py +27 -27
  129. re_common/libtest/mysql_test.py +151 -151
  130. re_common/libtest/pymongo_test.py +21 -21
  131. re_common/libtest/split_test.py +11 -11
  132. re_common/libtest/sqlite3_merge_test.py +5 -5
  133. re_common/libtest/sqlite3_test.py +34 -34
  134. re_common/libtest/tomlconfig_test.py +30 -30
  135. re_common/libtest/use_tools_test/__init__.py +2 -2
  136. re_common/libtest/user/__init__.py +4 -4
  137. re_common/studio/__init__.py +4 -4
  138. re_common/studio/assignment_expressions.py +36 -36
  139. re_common/studio/mydash/test1.py +18 -18
  140. re_common/studio/pydashstudio/first.py +9 -9
  141. re_common/studio/streamlitstudio/first_app.py +65 -65
  142. re_common/studio/streamlitstudio/uber_pickups.py +23 -23
  143. re_common/studio/test.py +18 -18
  144. re_common/v2/baselibrary/business_utils/BusinessStringUtil.py +235 -220
  145. re_common/v2/baselibrary/business_utils/baseencodeid.py +100 -100
  146. re_common/v2/baselibrary/business_utils/full_doi_path.py +116 -116
  147. re_common/v2/baselibrary/business_utils/rel_tools.py +6 -6
  148. re_common/v2/baselibrary/decorators/utils.py +59 -59
  149. re_common/v2/baselibrary/helpers/search_packge/NearestNeighbors_test.py +105 -105
  150. re_common/v2/baselibrary/helpers/search_packge/fit_text_match.py +253 -253
  151. re_common/v2/baselibrary/helpers/search_packge/scikit_learn_text_matcher.py +260 -260
  152. re_common/v2/baselibrary/helpers/search_packge/test.py +1 -1
  153. re_common/v2/baselibrary/s3object/baseboto3.py +230 -230
  154. re_common/v2/baselibrary/tools/WeChatRobot.py +95 -95
  155. re_common/v2/baselibrary/tools/ac_ahocorasick.py +75 -75
  156. re_common/v2/baselibrary/tools/concurrency.py +35 -35
  157. re_common/v2/baselibrary/tools/data_processer/base.py +53 -53
  158. re_common/v2/baselibrary/tools/data_processer/data_processer.py +497 -508
  159. re_common/v2/baselibrary/tools/data_processer/data_reader.py +187 -187
  160. re_common/v2/baselibrary/tools/data_processer/data_writer.py +38 -38
  161. re_common/v2/baselibrary/tools/dict_tools.py +44 -44
  162. re_common/v2/baselibrary/tools/dolphinscheduler.py +187 -187
  163. re_common/v2/baselibrary/tools/hdfs_base_processor.py +204 -204
  164. re_common/v2/baselibrary/tools/hdfs_bulk_processor.py +67 -67
  165. re_common/v2/baselibrary/tools/hdfs_data_processer.py +338 -338
  166. re_common/v2/baselibrary/tools/hdfs_line_processor.py +74 -74
  167. re_common/v2/baselibrary/tools/list_tools.py +69 -69
  168. re_common/v2/baselibrary/tools/resume_tracker.py +94 -94
  169. re_common/v2/baselibrary/tools/search_hash_tools.py +54 -54
  170. re_common/v2/baselibrary/tools/text_matcher.py +326 -326
  171. re_common/v2/baselibrary/tools/tree_processor/__init__.py +0 -0
  172. re_common/v2/baselibrary/tools/tree_processor/builder.py +25 -0
  173. re_common/v2/baselibrary/tools/tree_processor/node.py +13 -0
  174. re_common/v2/baselibrary/tools/unionfind_tools.py +60 -60
  175. re_common/v2/baselibrary/utils/BusinessStringUtil.py +196 -196
  176. re_common/v2/baselibrary/utils/api_net_utils.py +270 -270
  177. re_common/v2/baselibrary/utils/author_smi.py +361 -361
  178. re_common/v2/baselibrary/utils/base_string_similarity.py +158 -158
  179. re_common/v2/baselibrary/utils/basedict.py +37 -37
  180. re_common/v2/baselibrary/utils/basehdfs.py +163 -163
  181. re_common/v2/baselibrary/utils/basepika.py +180 -180
  182. re_common/v2/baselibrary/utils/basetime.py +94 -77
  183. re_common/v2/baselibrary/utils/db.py +174 -156
  184. re_common/v2/baselibrary/utils/elasticsearch.py +46 -0
  185. re_common/v2/baselibrary/utils/json_cls.py +16 -16
  186. re_common/v2/baselibrary/utils/mq.py +83 -83
  187. re_common/v2/baselibrary/utils/n_ary_expression_tree.py +243 -243
  188. re_common/v2/baselibrary/utils/string_bool.py +187 -186
  189. re_common/v2/baselibrary/utils/string_clear.py +246 -246
  190. re_common/v2/baselibrary/utils/string_smi.py +18 -18
  191. re_common/v2/baselibrary/utils/stringutils.py +312 -271
  192. re_common/vip/base_step_process.py +11 -11
  193. re_common/vip/baseencodeid.py +90 -90
  194. re_common/vip/changetaskname.py +28 -28
  195. re_common/vip/core_var.py +24 -24
  196. re_common/vip/mmh3Hash.py +89 -89
  197. re_common/vip/proxy/allproxys.py +127 -127
  198. re_common/vip/proxy/allproxys_thread.py +159 -159
  199. re_common/vip/proxy/cnki_proxy.py +153 -153
  200. re_common/vip/proxy/kuaidaili.py +87 -87
  201. re_common/vip/proxy/proxy_all.py +113 -113
  202. re_common/vip/proxy/update_kuaidaili_0.py +42 -42
  203. re_common/vip/proxy/wanfang_proxy.py +152 -152
  204. re_common/vip/proxy/wp_proxy_all.py +181 -181
  205. re_common/vip/read_rawid_to_txt.py +91 -91
  206. re_common/vip/title/__init__.py +5 -5
  207. re_common/vip/title/transform/TransformBookTitleToZt.py +125 -125
  208. re_common/vip/title/transform/TransformConferenceTitleToZt.py +139 -139
  209. re_common/vip/title/transform/TransformCstadTitleToZt.py +195 -195
  210. re_common/vip/title/transform/TransformJournalTitleToZt.py +203 -203
  211. re_common/vip/title/transform/TransformPatentTitleToZt.py +132 -132
  212. re_common/vip/title/transform/TransformRegulationTitleToZt.py +114 -114
  213. re_common/vip/title/transform/TransformStandardTitleToZt.py +135 -135
  214. re_common/vip/title/transform/TransformThesisTitleToZt.py +135 -135
  215. re_common/vip/title/transform/__init__.py +10 -10
  216. {re_common-10.0.39.dist-info → re_common-10.0.41.dist-info}/LICENSE +201 -201
  217. {re_common-10.0.39.dist-info → re_common-10.0.41.dist-info}/METADATA +16 -16
  218. re_common-10.0.41.dist-info/RECORD +252 -0
  219. {re_common-10.0.39.dist-info → re_common-10.0.41.dist-info}/WHEEL +1 -1
  220. re_common-10.0.39.dist-info/RECORD +0 -248
  221. {re_common-10.0.39.dist-info → re_common-10.0.41.dist-info}/top_level.txt +0 -0
@@ -1,100 +1,100 @@
1
- import base64
2
- import hashlib
3
-
4
- """
5
- VIP编码lngid生成
6
- """
7
-
8
-
9
- class BaseLngid(object):
10
- def __int__(self):
11
- pass
12
-
13
- def BaseEncodeID(self, strRaw):
14
- r""" 自定义base编码 """
15
-
16
- strEncode = base64.b32encode(strRaw.encode('utf8')).decode('utf8')
17
-
18
- if strEncode.endswith('======'):
19
- strEncode = '%s%s' % (strEncode[0:-6], '0')
20
- elif strEncode.endswith('===='):
21
- strEncode = '%s%s' % (strEncode[0:-4], '1')
22
- elif strEncode.endswith('==='):
23
- strEncode = '%s%s' % (strEncode[0:-3], '8')
24
- elif strEncode.endswith('='):
25
- strEncode = '%s%s' % (strEncode[0:-1], '9')
26
-
27
- table = str.maketrans('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'ZYXWVUTSRQPONMLKJIHGFEDCBA9876543210')
28
- strEncode = strEncode.translate(table)
29
-
30
- return strEncode
31
-
32
- def BaseDecodeID(self, strEncode):
33
- r""" 自定义base解码 """
34
-
35
- table = str.maketrans('ZYXWVUTSRQPONMLKJIHGFEDCBA9876543210', '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ')
36
- strEncode = strEncode.translate(table)
37
-
38
- if strEncode.endswith('0'):
39
- strEncode = '%s%s' % (strEncode[0:-1], '======')
40
- elif strEncode.endswith('1'):
41
- strEncode = '%s%s' % (strEncode[0:-1], '====')
42
- elif strEncode.endswith('8'):
43
- strEncode = '%s%s' % (strEncode[0:-1], '===')
44
- elif strEncode.endswith('9'):
45
- strEncode = '%s%s' % (strEncode[0:-1], '=')
46
-
47
- strRaw = base64.b32decode(strEncode.encode('utf8')).decode('utf8')
48
-
49
- return strRaw
50
-
51
- def GetLngid(self, sub_db_id, rawid, case_insensitive=False):
52
- """
53
- :param sub_db_id:
54
- :param rawid:
55
- 由 sub_db_id 和 rawid 得到 lngid。
56
- :param case_insensitive: 标识源网站的 rawid 是否区分大小写
57
- :return: lngid
58
- """
59
- uppercase_rawid = '' # 大写版 rawid
60
- if case_insensitive: # 源网站的 rawid 区分大小写
61
- for ch in rawid:
62
- if ch.upper() == ch:
63
- uppercase_rawid += ch
64
- else:
65
- uppercase_rawid += ch.upper() + '_'
66
- else:
67
- uppercase_rawid = rawid.upper()
68
-
69
- limited_id = uppercase_rawid # 限长ID
70
- if len(uppercase_rawid) > 20:
71
- limited_id = hashlib.md5(uppercase_rawid.encode('utf8')).hexdigest().upper()
72
- else:
73
- limited_id = self.BaseEncodeID(uppercase_rawid)
74
-
75
- lngid = sub_db_id + limited_id
76
-
77
- return lngid
78
-
79
- def getDoiid(self, doi, case_insensitive=False):
80
- if case_insensitive is False:
81
- doi = doi.upper()
82
- limited_id = self.BaseEncodeID(doi)
83
- if len(limited_id) > 240:
84
- limited_id = hashlib.md5(doi.encode('utf8')).hexdigest().upper()
85
- return limited_id
86
-
87
- def GetRawid(self, limited_id, case_insensitive=False):
88
- try:
89
- uppercase_rawid = self.BaseDecodeID(limited_id)
90
- if case_insensitive:
91
- str_ = "_"
92
- uppercase_rawid_list = list(uppercase_rawid)
93
- for num, li in enumerate(uppercase_rawid_list):
94
- if li == str_:
95
- old_str = "".join(uppercase_rawid_list[num - 1:num + 1])
96
- uppercase_rawid = uppercase_rawid.replace(old_str, uppercase_rawid_list[num - 1].lower())
97
- except Exception as e:
98
- raise Exception("长度超过20,不可逆")
99
-
100
- return uppercase_rawid
1
+ import base64
2
+ import hashlib
3
+
4
+ """
5
+ VIP编码lngid生成
6
+ """
7
+
8
+
9
+ class BaseLngid(object):
10
+ def __int__(self):
11
+ pass
12
+
13
+ def BaseEncodeID(self, strRaw):
14
+ r""" 自定义base编码 """
15
+
16
+ strEncode = base64.b32encode(strRaw.encode('utf8')).decode('utf8')
17
+
18
+ if strEncode.endswith('======'):
19
+ strEncode = '%s%s' % (strEncode[0:-6], '0')
20
+ elif strEncode.endswith('===='):
21
+ strEncode = '%s%s' % (strEncode[0:-4], '1')
22
+ elif strEncode.endswith('==='):
23
+ strEncode = '%s%s' % (strEncode[0:-3], '8')
24
+ elif strEncode.endswith('='):
25
+ strEncode = '%s%s' % (strEncode[0:-1], '9')
26
+
27
+ table = str.maketrans('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ', 'ZYXWVUTSRQPONMLKJIHGFEDCBA9876543210')
28
+ strEncode = strEncode.translate(table)
29
+
30
+ return strEncode
31
+
32
+ def BaseDecodeID(self, strEncode):
33
+ r""" 自定义base解码 """
34
+
35
+ table = str.maketrans('ZYXWVUTSRQPONMLKJIHGFEDCBA9876543210', '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ')
36
+ strEncode = strEncode.translate(table)
37
+
38
+ if strEncode.endswith('0'):
39
+ strEncode = '%s%s' % (strEncode[0:-1], '======')
40
+ elif strEncode.endswith('1'):
41
+ strEncode = '%s%s' % (strEncode[0:-1], '====')
42
+ elif strEncode.endswith('8'):
43
+ strEncode = '%s%s' % (strEncode[0:-1], '===')
44
+ elif strEncode.endswith('9'):
45
+ strEncode = '%s%s' % (strEncode[0:-1], '=')
46
+
47
+ strRaw = base64.b32decode(strEncode.encode('utf8')).decode('utf8')
48
+
49
+ return strRaw
50
+
51
+ def GetLngid(self, sub_db_id, rawid, case_insensitive=False):
52
+ """
53
+ :param sub_db_id:
54
+ :param rawid:
55
+ 由 sub_db_id 和 rawid 得到 lngid。
56
+ :param case_insensitive: 标识源网站的 rawid 是否区分大小写
57
+ :return: lngid
58
+ """
59
+ uppercase_rawid = '' # 大写版 rawid
60
+ if case_insensitive: # 源网站的 rawid 区分大小写
61
+ for ch in rawid:
62
+ if ch.upper() == ch:
63
+ uppercase_rawid += ch
64
+ else:
65
+ uppercase_rawid += ch.upper() + '_'
66
+ else:
67
+ uppercase_rawid = rawid.upper()
68
+
69
+ limited_id = uppercase_rawid # 限长ID
70
+ if len(uppercase_rawid) > 20:
71
+ limited_id = hashlib.md5(uppercase_rawid.encode('utf8')).hexdigest().upper()
72
+ else:
73
+ limited_id = self.BaseEncodeID(uppercase_rawid)
74
+
75
+ lngid = sub_db_id + limited_id
76
+
77
+ return lngid
78
+
79
+ def getDoiid(self, doi, case_insensitive=False):
80
+ if case_insensitive is False:
81
+ doi = doi.upper()
82
+ limited_id = self.BaseEncodeID(doi)
83
+ if len(limited_id) > 240:
84
+ limited_id = hashlib.md5(doi.encode('utf8')).hexdigest().upper()
85
+ return limited_id
86
+
87
+ def GetRawid(self, limited_id, case_insensitive=False):
88
+ try:
89
+ uppercase_rawid = self.BaseDecodeID(limited_id)
90
+ if case_insensitive:
91
+ str_ = "_"
92
+ uppercase_rawid_list = list(uppercase_rawid)
93
+ for num, li in enumerate(uppercase_rawid_list):
94
+ if li == str_:
95
+ old_str = "".join(uppercase_rawid_list[num - 1:num + 1])
96
+ uppercase_rawid = uppercase_rawid.replace(old_str, uppercase_rawid_list[num - 1].lower())
97
+ except Exception as e:
98
+ raise Exception("长度超过20,不可逆")
99
+
100
+ return uppercase_rawid
@@ -1,116 +1,116 @@
1
- import base64
2
- import hashlib
3
- import os
4
-
5
- from re_common.v2.baselibrary.business_utils.baseencodeid import BaseLngid
6
-
7
- import os
8
- import base64
9
- import hashlib
10
-
11
- """
12
- DOI-文件路径 转换工具
13
-
14
- 设计目标:
15
- 1. 将任意DOI字符串转换为可逆、稳定的文件路径
16
- 2. 提供高效的目录分散方案(65,536个子目录)
17
- 3. 支持带文件扩展名的存储
18
- 4. 完全可逆转换
19
-
20
- 工作原理:
21
- 1. DOI编码:
22
- - 使用URL安全的Base64编码(RFC 3548)
23
- - 移除Base64填充的'='字符
24
- - 文件名长度 ≈ 原始DOI长度 × 4/3
25
-
26
- 2. 目录分散:
27
- - 使用MD5哈希创建两级目录结构
28
- - 目录层级:/MD5[0:2]/MD5[2:4]/
29
- - 支持65,536个目录(256×256),每目录约1,525个文件(假设10亿文件)
30
-
31
- 3. 扩展名处理:
32
- - 保持原始扩展名不变
33
- - 解码时自动忽略扩展名
34
-
35
- 典型转换示例:
36
- DOI: "10.1000/xyz123" -> 路径: "a1/b2/QTMuMTAwMC94eXoxMjM.pdf"
37
- 路径: "a1/b2/QTMuMTAwMC94eXoxMjM.pdf" -> DOI: "10.1000/xyz123"
38
- """
39
-
40
- base_lngid = BaseLngid()
41
-
42
-
43
- # 以后需要启用
44
- def doi_to_path(doi: str, ext: str = "") -> str:
45
- """
46
- 将 DOI 转换为可逆的存储路径:
47
- 1. 对 DOI 进行 URL 安全的 Base64 编码(可逆)
48
- 2. 生成 DOI 的 MD5 哈希用于目录分散
49
- 3. 目录结构:MD5前2字符/次2字符/
50
- 4. 文件名:Base64编码的DOI + 扩展名
51
-
52
- Args:
53
- doi: 文件 DOI 标识符
54
- ext: 文件扩展名(如 '.pdf')
55
-
56
- Returns:
57
- 相对文件路径(如 'a1/b2/QTMuMTAwMC94eXoxMjM=.pdf')
58
- """
59
- # URL安全的Base64编码(可逆)
60
- doi_b64 = base64.urlsafe_b64encode(doi.encode("utf-8")).decode("ascii").rstrip("=")
61
-
62
- # 生成MD5哈希用于目录分配
63
- hash_md5 = hashlib.md5(doi.encode("utf-8")).hexdigest()
64
- dir_level1 = hash_md5[0:2]
65
- dir_level2 = hash_md5[2:4]
66
-
67
- return os.path.join(dir_level1, dir_level2, f"{doi_b64}{ext}")
68
-
69
-
70
- # 以后需要启用
71
- def path_to_doi(path: str) -> str:
72
- """
73
- 从文件路径反推原始DOI
74
- Args:
75
- path: 文件路径(如 'a1/b2/QTMuMTAwMC94eXoxMjM=.pdf')
76
-
77
- Returns:
78
- 原始DOI字符串
79
- """
80
- # 提取文件名并移除扩展名
81
- filename = os.path.basename(path)
82
- base_name = os.path.splitext(filename)[0]
83
-
84
- # 补齐Base64填充字符
85
- padding = 4 - (len(base_name) % 4)
86
- if padding != 4: # 不需要补齐
87
- base_name += "=" * padding
88
-
89
- # Base64解码还原DOI
90
- return base64.urlsafe_b64decode(base_name.encode("ascii")).decode("utf-8")
91
-
92
-
93
- def doi_to_dir(doi):
94
- """生成文件的存储路径和可解码的文件名
95
-
96
- Args:
97
- doi (str): 文件的唯一DOI标识
98
-
99
- Returns:
100
- str: 文件相对路径,如 "ab/cd/Base64EncodedFileName"
101
- """
102
- # 计算DOI的MD5哈希
103
- hash_md5 = hashlib.md5(doi.encode('utf-8')).hexdigest().lower()
104
-
105
- # 提取目录层级:前2位作为一级目录,3-4位作为二级目录
106
- first_dir = hash_md5[0:2].upper()
107
- second_dir = hash_md5[2:4].upper()
108
-
109
- return first_dir + "/" + second_dir
110
-
111
-
112
- def get_doi_path(doi, case_insensitive=False):
113
- # 目前使用
114
- dir_path = doi_to_dir(doi)
115
- file_name = base_lngid.getDoiid(doi, case_insensitive=case_insensitive) + ".pdf"
116
- return dir_path + "/" + file_name
1
+ import base64
2
+ import hashlib
3
+ import os
4
+
5
+ from re_common.v2.baselibrary.business_utils.baseencodeid import BaseLngid
6
+
7
+ import os
8
+ import base64
9
+ import hashlib
10
+
11
+ """
12
+ DOI-文件路径 转换工具
13
+
14
+ 设计目标:
15
+ 1. 将任意DOI字符串转换为可逆、稳定的文件路径
16
+ 2. 提供高效的目录分散方案(65,536个子目录)
17
+ 3. 支持带文件扩展名的存储
18
+ 4. 完全可逆转换
19
+
20
+ 工作原理:
21
+ 1. DOI编码:
22
+ - 使用URL安全的Base64编码(RFC 3548)
23
+ - 移除Base64填充的'='字符
24
+ - 文件名长度 ≈ 原始DOI长度 × 4/3
25
+
26
+ 2. 目录分散:
27
+ - 使用MD5哈希创建两级目录结构
28
+ - 目录层级:/MD5[0:2]/MD5[2:4]/
29
+ - 支持65,536个目录(256×256),每目录约1,525个文件(假设10亿文件)
30
+
31
+ 3. 扩展名处理:
32
+ - 保持原始扩展名不变
33
+ - 解码时自动忽略扩展名
34
+
35
+ 典型转换示例:
36
+ DOI: "10.1000/xyz123" -> 路径: "a1/b2/QTMuMTAwMC94eXoxMjM.pdf"
37
+ 路径: "a1/b2/QTMuMTAwMC94eXoxMjM.pdf" -> DOI: "10.1000/xyz123"
38
+ """
39
+
40
+ base_lngid = BaseLngid()
41
+
42
+
43
+ # 以后需要启用
44
+ def doi_to_path(doi: str, ext: str = "") -> str:
45
+ """
46
+ 将 DOI 转换为可逆的存储路径:
47
+ 1. 对 DOI 进行 URL 安全的 Base64 编码(可逆)
48
+ 2. 生成 DOI 的 MD5 哈希用于目录分散
49
+ 3. 目录结构:MD5前2字符/次2字符/
50
+ 4. 文件名:Base64编码的DOI + 扩展名
51
+
52
+ Args:
53
+ doi: 文件 DOI 标识符
54
+ ext: 文件扩展名(如 '.pdf')
55
+
56
+ Returns:
57
+ 相对文件路径(如 'a1/b2/QTMuMTAwMC94eXoxMjM=.pdf')
58
+ """
59
+ # URL安全的Base64编码(可逆)
60
+ doi_b64 = base64.urlsafe_b64encode(doi.encode("utf-8")).decode("ascii").rstrip("=")
61
+
62
+ # 生成MD5哈希用于目录分配
63
+ hash_md5 = hashlib.md5(doi.encode("utf-8")).hexdigest()
64
+ dir_level1 = hash_md5[0:2]
65
+ dir_level2 = hash_md5[2:4]
66
+
67
+ return os.path.join(dir_level1, dir_level2, f"{doi_b64}{ext}")
68
+
69
+
70
+ # 以后需要启用
71
+ def path_to_doi(path: str) -> str:
72
+ """
73
+ 从文件路径反推原始DOI
74
+ Args:
75
+ path: 文件路径(如 'a1/b2/QTMuMTAwMC94eXoxMjM=.pdf')
76
+
77
+ Returns:
78
+ 原始DOI字符串
79
+ """
80
+ # 提取文件名并移除扩展名
81
+ filename = os.path.basename(path)
82
+ base_name = os.path.splitext(filename)[0]
83
+
84
+ # 补齐Base64填充字符
85
+ padding = 4 - (len(base_name) % 4)
86
+ if padding != 4: # 不需要补齐
87
+ base_name += "=" * padding
88
+
89
+ # Base64解码还原DOI
90
+ return base64.urlsafe_b64decode(base_name.encode("ascii")).decode("utf-8")
91
+
92
+
93
+ def doi_to_dir(doi):
94
+ """生成文件的存储路径和可解码的文件名
95
+
96
+ Args:
97
+ doi (str): 文件的唯一DOI标识
98
+
99
+ Returns:
100
+ str: 文件相对路径,如 "ab/cd/Base64EncodedFileName"
101
+ """
102
+ # 计算DOI的MD5哈希
103
+ hash_md5 = hashlib.md5(doi.encode('utf-8')).hexdigest().lower()
104
+
105
+ # 提取目录层级:前2位作为一级目录,3-4位作为二级目录
106
+ first_dir = hash_md5[0:2].upper()
107
+ second_dir = hash_md5[2:4].upper()
108
+
109
+ return first_dir + "/" + second_dir
110
+
111
+
112
+ def get_doi_path(doi, case_insensitive=False):
113
+ # 目前使用
114
+ dir_path = doi_to_dir(doi)
115
+ file_name = base_lngid.getDoiid(doi, case_insensitive=case_insensitive) + ".pdf"
116
+ return dir_path + "/" + file_name
@@ -1,6 +1,6 @@
1
- def assign_group_id(rows: list, sub_db_order: list):
2
- subdb_keyid_map = {row.sub_db_id: row.keyid for row in rows}
3
- for sub_db_id in sub_db_order:
4
- if keyid := subdb_keyid_map.get(sub_db_id):
5
- return keyid, len(rows), rows
6
- return rows[0].keyid, len(rows), rows
1
+ def assign_group_id(rows: list, sub_db_order: list):
2
+ subdb_keyid_map = {row.sub_db_id: row.keyid for row in rows}
3
+ for sub_db_id in sub_db_order:
4
+ if keyid := subdb_keyid_map.get(sub_db_id):
5
+ return keyid, len(rows), rows
6
+ return rows[0].keyid, len(rows), rows
@@ -1,59 +1,59 @@
1
- import warnings
2
- import functools
3
-
4
- # 全局集合,用于记录已警告的函数或类
5
- _warned_once = set()
6
-
7
-
8
- def deprecated(message=None):
9
- """
10
- 装饰器:标记函数或类为已废弃,整个进程只发出一次警告。
11
-
12
- Args:
13
- message (str): 自定义警告信息,默认为 None。
14
- """
15
-
16
- def decorator(obj):
17
- # 如果是函数
18
- if isinstance(obj, type(lambda: None)):
19
- @functools.wraps(obj)
20
- def wrapper(*args, **kwargs):
21
- obj_id = id(obj) # 使用对象的内存地址作为唯一标识
22
- if obj_id not in _warned_once:
23
- default_msg = f"函数 {obj.__name__} 已不建议使用。"
24
- warn_msg = f"{default_msg} {message}" if message else default_msg
25
- warnings.warn(
26
- warn_msg,
27
- category=DeprecationWarning,
28
- stacklevel=2
29
- )
30
- _warned_once.add(obj_id) # 记录已警告
31
- return obj(*args, **kwargs)
32
-
33
- return wrapper
34
-
35
- # 如果是类
36
- elif isinstance(obj, type):
37
- orig_init = obj.__init__
38
-
39
- @functools.wraps(orig_init)
40
- def new_init(self, *args, **kwargs):
41
- obj_id = id(obj)
42
- if obj_id not in _warned_once:
43
- default_msg = f"类 {obj.__name__} 已不建议使用。"
44
- warn_msg = f"{default_msg} {message}" if message else default_msg
45
- warnings.warn(
46
- warn_msg,
47
- category=DeprecationWarning,
48
- stacklevel=2
49
- )
50
- _warned_once.add(obj_id) # 记录已警告
51
- orig_init(self, *args, **kwargs)
52
-
53
- obj.__init__ = new_init
54
- return obj
55
-
56
- else:
57
- raise TypeError("此装饰器仅适用于函数和类")
58
-
59
- return decorator
1
+ import warnings
2
+ import functools
3
+
4
+ # 全局集合,用于记录已警告的函数或类
5
+ _warned_once = set()
6
+
7
+
8
+ def deprecated(message=None):
9
+ """
10
+ 装饰器:标记函数或类为已废弃,整个进程只发出一次警告。
11
+
12
+ Args:
13
+ message (str): 自定义警告信息,默认为 None。
14
+ """
15
+
16
+ def decorator(obj):
17
+ # 如果是函数
18
+ if isinstance(obj, type(lambda: None)):
19
+ @functools.wraps(obj)
20
+ def wrapper(*args, **kwargs):
21
+ obj_id = id(obj) # 使用对象的内存地址作为唯一标识
22
+ if obj_id not in _warned_once:
23
+ default_msg = f"函数 {obj.__name__} 已不建议使用。"
24
+ warn_msg = f"{default_msg} {message}" if message else default_msg
25
+ warnings.warn(
26
+ warn_msg,
27
+ category=DeprecationWarning,
28
+ stacklevel=2
29
+ )
30
+ _warned_once.add(obj_id) # 记录已警告
31
+ return obj(*args, **kwargs)
32
+
33
+ return wrapper
34
+
35
+ # 如果是类
36
+ elif isinstance(obj, type):
37
+ orig_init = obj.__init__
38
+
39
+ @functools.wraps(orig_init)
40
+ def new_init(self, *args, **kwargs):
41
+ obj_id = id(obj)
42
+ if obj_id not in _warned_once:
43
+ default_msg = f"类 {obj.__name__} 已不建议使用。"
44
+ warn_msg = f"{default_msg} {message}" if message else default_msg
45
+ warnings.warn(
46
+ warn_msg,
47
+ category=DeprecationWarning,
48
+ stacklevel=2
49
+ )
50
+ _warned_once.add(obj_id) # 记录已警告
51
+ orig_init(self, *args, **kwargs)
52
+
53
+ obj.__init__ = new_init
54
+ return obj
55
+
56
+ else:
57
+ raise TypeError("此装饰器仅适用于函数和类")
58
+
59
+ return decorator