re-common 10.0.5__py3-none-any.whl → 10.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- re_common/v2/baselibrary/helpers/__init__.py +0 -0
- re_common/v2/baselibrary/utils/author_smi.py +1 -0
- re_common/v2/baselibrary/utils/n_ary_expression_tree.py +66 -0
- re_common/v2/baselibrary/utils/string_clear.py +53 -0
- {re_common-10.0.5.dist-info → re_common-10.0.7.dist-info}/METADATA +1 -1
- {re_common-10.0.5.dist-info → re_common-10.0.7.dist-info}/RECORD +9 -8
- {re_common-10.0.5.dist-info → re_common-10.0.7.dist-info}/LICENSE +0 -0
- {re_common-10.0.5.dist-info → re_common-10.0.7.dist-info}/WHEEL +0 -0
- {re_common-10.0.5.dist-info → re_common-10.0.7.dist-info}/top_level.txt +0 -0
|
File without changes
|
|
@@ -299,6 +299,7 @@ def AuthorRatio(
|
|
|
299
299
|
if len(sort_l1) == len(sort_l2) and (is_same_or_initials_match(sort_l1, sort_l2) or set(sort_l1) == set(sort_l2)):
|
|
300
300
|
return 0.99
|
|
301
301
|
|
|
302
|
+
|
|
302
303
|
##############################################################
|
|
303
304
|
# 以上为情况穷举情况,以下为其他情况的相似率计算
|
|
304
305
|
##############################################################
|
|
@@ -157,6 +157,70 @@ def parse_expression(tokens):
|
|
|
157
157
|
return tree
|
|
158
158
|
|
|
159
159
|
|
|
160
|
+
def flatten_tree(node):
|
|
161
|
+
"""清理语法树,将嵌套的同级 and/or 节点展平。
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
node (Node): 输入的语法树节点
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
Node: 清理后的新语法树节点
|
|
168
|
+
"""
|
|
169
|
+
# 如果没有子节点,直接返回原节点(条件节点)
|
|
170
|
+
if not node.children:
|
|
171
|
+
return Node(value=node.value, children=[])
|
|
172
|
+
|
|
173
|
+
# 递归清理所有子节点
|
|
174
|
+
cleaned_children = [flatten_tree(child) for child in node.children]
|
|
175
|
+
|
|
176
|
+
# 如果当前节点是 'and' 或 'or',展平嵌套的同类节点
|
|
177
|
+
if node.value in ('and', 'or'):
|
|
178
|
+
flattened_children = []
|
|
179
|
+
for child in cleaned_children:
|
|
180
|
+
# 如果子节点的值与当前节点相同(例如 'or' 下的 'or'),将其子节点提升
|
|
181
|
+
if child.value == node.value:
|
|
182
|
+
flattened_children.extend(child.children)
|
|
183
|
+
else:
|
|
184
|
+
flattened_children.append(child)
|
|
185
|
+
return Node(value=node.value, children=flattened_children)
|
|
186
|
+
|
|
187
|
+
# 对于其他节点(例如 'not'),保持结构不变,只更新子节点
|
|
188
|
+
return Node(value=node.value, children=cleaned_children)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def pretty_print_tree(node, indent=0, prefix=""):
|
|
192
|
+
"""生成语法树的格式化字符串表示,带有层次缩进。
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
node (Node): 要格式化的语法树节点
|
|
196
|
+
indent (int): 当前缩进级别(空格数),默认从 0 开始
|
|
197
|
+
prefix (str): 前缀字符串,用于表示当前行的开头
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
str: 格式化后的树形字符串
|
|
201
|
+
"""
|
|
202
|
+
# 基本缩进单位
|
|
203
|
+
spaces = " " * indent
|
|
204
|
+
|
|
205
|
+
# 如果没有子节点,返回单行表示
|
|
206
|
+
if not node.children:
|
|
207
|
+
return f"{spaces}{prefix}Node(value='{node.value}', children=[])"
|
|
208
|
+
|
|
209
|
+
# 构建当前节点的字符串
|
|
210
|
+
result = [f"{spaces}{prefix}Node(value='{node.value}', children=["]
|
|
211
|
+
|
|
212
|
+
# 递归处理每个子节点
|
|
213
|
+
for i, child in enumerate(node.children):
|
|
214
|
+
is_last = i == len(node.children) - 1
|
|
215
|
+
child_prefix = " " if is_last else " "
|
|
216
|
+
result.append(pretty_print_tree(child, indent + 4, child_prefix))
|
|
217
|
+
|
|
218
|
+
# 添加结束括号
|
|
219
|
+
result.append(f"{spaces}])")
|
|
220
|
+
|
|
221
|
+
# 将所有行连接成一个字符串
|
|
222
|
+
return "\n".join(result)
|
|
223
|
+
|
|
160
224
|
# 测试代码
|
|
161
225
|
expressions = [
|
|
162
226
|
"not A=1 and B= 2",
|
|
@@ -173,6 +237,8 @@ for expr in expressions:
|
|
|
173
237
|
tokens = tokenize(expr)
|
|
174
238
|
print("Tokens:", tokens)
|
|
175
239
|
tree = parse_expression(tokens)
|
|
240
|
+
tree = flatten_tree(tree)
|
|
241
|
+
tree = pretty_print_tree(tree)
|
|
176
242
|
print("Tree:", tree)
|
|
177
243
|
except ValueError as e:
|
|
178
244
|
print(f"Error: {e}")
|
|
@@ -67,9 +67,15 @@ class StringClear(object):
|
|
|
67
67
|
return self
|
|
68
68
|
|
|
69
69
|
def replace_dash_with_space(self):
|
|
70
|
+
# 横杆转空格
|
|
70
71
|
self.obj_str = self.obj_str.replace("-", " ")
|
|
71
72
|
return self
|
|
72
73
|
|
|
74
|
+
def strip_quotes(self):
|
|
75
|
+
# 清理 双引号
|
|
76
|
+
self.obj_str = self.obj_str.replace("\"", "")
|
|
77
|
+
return self
|
|
78
|
+
|
|
73
79
|
def remove_diacritics(self):
|
|
74
80
|
# 去除音标 转换成字母
|
|
75
81
|
self.obj_str = get_diacritic_variant(self.obj_str)
|
|
@@ -110,3 +116,50 @@ def rel_clear(str_obj):
|
|
|
110
116
|
.lower() # 小写
|
|
111
117
|
.get_str() # 获取str
|
|
112
118
|
.strip()) # 去掉空格
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def clear_au_organ(str_obj):
|
|
122
|
+
"""
|
|
123
|
+
为作者机构定制的清理 与上面比除了不转小写外 还多了些特殊的清理
|
|
124
|
+
"""
|
|
125
|
+
strs = (StringClear(str_obj)
|
|
126
|
+
.None_to_str() # None 转 空字符串
|
|
127
|
+
.to_str() # 防止其他类型传入 比如 int double
|
|
128
|
+
.qj_to_bj() # 全角转半角
|
|
129
|
+
.strip_quotes() # 清理 双引号
|
|
130
|
+
.clean_symbols() # 清理已知的符号
|
|
131
|
+
.collapse_spaces() # 移除多余空格,连续多个空格变一个
|
|
132
|
+
.get_str() # 获取str
|
|
133
|
+
.strip() # 去掉空格
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
strs = strs.replace("lt正gt", "").strip() # 特殊需求
|
|
137
|
+
return strs
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def ref_clear(str_obj):
|
|
141
|
+
# 为 引文 数据定制的清理
|
|
142
|
+
strs = (StringClear(str_obj)
|
|
143
|
+
.None_to_str() # None 转 空字符串
|
|
144
|
+
.to_str() # 防止其他类型传入 比如 int double
|
|
145
|
+
.qj_to_bj() # 全角转半角
|
|
146
|
+
.strip_quotes() # 清理 双引号
|
|
147
|
+
.clean_symbols() # 清理已知的符号
|
|
148
|
+
.collapse_spaces() # 移除多余空格,连续多个空格变一个
|
|
149
|
+
.lower() # 小写
|
|
150
|
+
.get_str() # 获取str
|
|
151
|
+
.strip() # 去掉空格
|
|
152
|
+
)
|
|
153
|
+
return strs
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def clear_obj(str_obj):
|
|
157
|
+
# 为对象化定制的清理
|
|
158
|
+
str_obj = clear_au_organ(str_obj)
|
|
159
|
+
str_obj = str_obj.replace("ß", "SS")
|
|
160
|
+
return (StringClear(str_obj)
|
|
161
|
+
.remove_diacritics() # 清理音标
|
|
162
|
+
.upper()
|
|
163
|
+
.get_str() # 获取str
|
|
164
|
+
.strip() # 去掉空格
|
|
165
|
+
)
|
|
@@ -163,6 +163,7 @@ re_common/studio/streamlitstudio/first_app.py,sha256=t7Fw8YDlub7G9q99GgVo_3sPZXU
|
|
|
163
163
|
re_common/studio/streamlitstudio/uber_pickups.py,sha256=cvrV5e8vRBM2_CpVDBE-f3V4mGFK9SqpRPZK8TEqr6U,785
|
|
164
164
|
re_common/v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
165
165
|
re_common/v2/baselibrary/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
166
|
+
re_common/v2/baselibrary/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
166
167
|
re_common/v2/baselibrary/s3object/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
167
168
|
re_common/v2/baselibrary/s3object/baseboto3.py,sha256=mXuIFx99pnrPGQ4LJCZwlN1HLbaU-OWLwck0cVzW6hc,11203
|
|
168
169
|
re_common/v2/baselibrary/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -173,13 +174,13 @@ re_common/v2/baselibrary/tools/text_matcher.py,sha256=F4WtLO-b7H6V9TIvOntCD9ZXSQ
|
|
|
173
174
|
re_common/v2/baselibrary/tools/unionfind_tools.py,sha256=VYHZZPXwBYljsm7TjV1B6iCgDn3O3btzNf9hMvQySVU,2965
|
|
174
175
|
re_common/v2/baselibrary/utils/BusinessStringUtil.py,sha256=tzjVr_-6iPAKTt14hR-BhRshdRgeT_MPJpUQkxcTXns,4084
|
|
175
176
|
re_common/v2/baselibrary/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
176
|
-
re_common/v2/baselibrary/utils/author_smi.py,sha256=
|
|
177
|
+
re_common/v2/baselibrary/utils/author_smi.py,sha256=1ebH3AHv19jtJWdlqNdwu6t58HNVLCotuCB6ff1SWiw,13666
|
|
177
178
|
re_common/v2/baselibrary/utils/basedict.py,sha256=tSV85pARe8ZQDY77_h_heS81EWwcgJW076DcA9WQyjY,1161
|
|
178
179
|
re_common/v2/baselibrary/utils/basehdfs.py,sha256=NVV5Q0OMPlM_zTrs9ZDoPJv29GQv5wi9-AP1us5dBrQ,4651
|
|
179
180
|
re_common/v2/baselibrary/utils/json_cls.py,sha256=dHOkWafG9lbQDoub9cbDwT2fDjMKtblQnjFLeA4hECA,286
|
|
180
|
-
re_common/v2/baselibrary/utils/n_ary_expression_tree.py,sha256
|
|
181
|
+
re_common/v2/baselibrary/utils/n_ary_expression_tree.py,sha256=-05kO6G2Rth7CEK-5lfFrthFZ1Q0-0a7cni7mWZ-2gg,9172
|
|
181
182
|
re_common/v2/baselibrary/utils/string_bool.py,sha256=4VCr1g8pX5YnzZSKctQgQfmhSQ0aw7a8ruhWdiRmBFU,641
|
|
182
|
-
re_common/v2/baselibrary/utils/string_clear.py,sha256=
|
|
183
|
+
re_common/v2/baselibrary/utils/string_clear.py,sha256=k0QyD2FNPDpDvBd91Qhz69K81ydDpcHzHq1HSwaeG_w,5434
|
|
183
184
|
re_common/v2/baselibrary/utils/stringutils.py,sha256=GLXHAm8IulC_8hWrN2aiFQjsoOpjczvcVozmTJj86-A,3864
|
|
184
185
|
re_common/vip/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
185
186
|
re_common/vip/base_step_process.py,sha256=VXXiNj0I5CpzXIMCgOPU86bzDJkSBkUS-9CpZIl_GOk,205
|
|
@@ -207,8 +208,8 @@ re_common/vip/title/transform/TransformRegulationTitleToZt.py,sha256=LKRdIsWKues
|
|
|
207
208
|
re_common/vip/title/transform/TransformStandardTitleToZt.py,sha256=-fCKAbSBzXVyQDCE61CalvR9E_QzQMA08QOO_NePFNI,5563
|
|
208
209
|
re_common/vip/title/transform/TransformThesisTitleToZt.py,sha256=QS-uV0cQrpUFAcKucuJQ9Ue2VRQH-inmfn_X3IplfRo,5488
|
|
209
210
|
re_common/vip/title/transform/__init__.py,sha256=m83-CWyRq_VHPYHaALEQlmXrkTdrZ3e4B_kCfBYE-uc,239
|
|
210
|
-
re_common-10.0.
|
|
211
|
-
re_common-10.0.
|
|
212
|
-
re_common-10.0.
|
|
213
|
-
re_common-10.0.
|
|
214
|
-
re_common-10.0.
|
|
211
|
+
re_common-10.0.7.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
212
|
+
re_common-10.0.7.dist-info/METADATA,sha256=4WDIp2GrlSKpykFERfN59eNiEE0CgOJppUVg_wvEjHk,581
|
|
213
|
+
re_common-10.0.7.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
214
|
+
re_common-10.0.7.dist-info/top_level.txt,sha256=_H9H23zoLIalm1AIY_KYTVh_H0ZnmjxQIxsvXtLv45o,10
|
|
215
|
+
re_common-10.0.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|