pyeasyphd 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyeasyphd might be problematic. Click here for more details.
- pyeasyphd/.python-version +1 -1
- pyeasyphd/main/__init__.py +0 -4
- pyeasyphd/main/basic_input.py +7 -63
- pyeasyphd/main/python_run_md.py +3 -3
- pyeasyphd/main/python_run_tex.py +1 -1
- pyeasyphd/pyeasyphd.sublime-settings +2 -160
- pyeasyphd/tools/__init__.py +1 -16
- pyeasyphd/tools/generate/generate_from_bibs.py +54 -330
- pyeasyphd/tools/generate/generate_html.py +122 -0
- pyeasyphd/tools/generate/generate_library.py +188 -0
- pyeasyphd/tools/generate/generate_links.py +13 -4
- pyeasyphd/tools/py_run_bib_md_tex.py +12 -13
- pyeasyphd/tools/search/search_base.py +8 -5
- pyeasyphd/tools/search/search_core.py +4 -3
- pyeasyphd/tools/search/search_keywords.py +1 -1
- pyeasyphd/tools/search/search_writers.py +8 -5
- {pyeasyphd-0.1.0.dist-info → pyeasyphd-0.1.2.dist-info}/METADATA +3 -6
- pyeasyphd-0.1.2.dist-info/RECORD +27 -0
- pyeasyphd/bib/__init__.py +0 -1
- pyeasyphd/bib/bibtexbase/__init__.py +0 -7
- pyeasyphd/bib/bibtexbase/standardize/_base.py +0 -36
- pyeasyphd/bib/bibtexbase/standardize/default_data.py +0 -97
- pyeasyphd/bib/bibtexbase/standardize/do_on_bib.py +0 -54
- pyeasyphd/bib/bibtexbase/standardize/do_on_comment_block.py +0 -38
- pyeasyphd/bib/bibtexbase/standardize/do_on_entry_block.py +0 -310
- pyeasyphd/bib/bibtexbase/standardize/do_on_preamble_block.py +0 -35
- pyeasyphd/bib/bibtexbase/standardize/do_on_string_block.py +0 -34
- pyeasyphd/bib/bibtexbase/standardize_bib.py +0 -75
- pyeasyphd/bib/bibtexparser/__init__.py +0 -47
- pyeasyphd/bib/bibtexparser/bibtex_format.py +0 -87
- pyeasyphd/bib/bibtexparser/exceptions.py +0 -64
- pyeasyphd/bib/bibtexparser/library.py +0 -207
- pyeasyphd/bib/bibtexparser/middlewares/block/add.py +0 -94
- pyeasyphd/bib/bibtexparser/middlewares/block/authors.py +0 -22
- pyeasyphd/bib/bibtexparser/middlewares/block/doi_url.py +0 -62
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_normalize.py +0 -47
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_replace.py +0 -31
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_values_normalize.py +0 -222
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_delete.py +0 -34
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_keep.py +0 -33
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_sort.py +0 -70
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_types.py +0 -15
- pyeasyphd/bib/bibtexparser/middlewares/block/journal_booktitle.py +0 -113
- pyeasyphd/bib/bibtexparser/middlewares/block/month_year.py +0 -34
- pyeasyphd/bib/bibtexparser/middlewares/block/number_volume.py +0 -21
- pyeasyphd/bib/bibtexparser/middlewares/block/pages.py +0 -28
- pyeasyphd/bib/bibtexparser/middlewares/block/title.py +0 -20
- pyeasyphd/bib/bibtexparser/middlewares/library/generating_entrykeys.py +0 -98
- pyeasyphd/bib/bibtexparser/middlewares/library/keeping_blocks.py +0 -29
- pyeasyphd/bib/bibtexparser/middlewares/library/sorting_blocks.py +0 -124
- pyeasyphd/bib/bibtexparser/middlewares/middleware.py +0 -222
- pyeasyphd/bib/bibtexparser/middlewares/parsestack.py +0 -13
- pyeasyphd/bib/bibtexparser/middlewares/utils.py +0 -226
- pyeasyphd/bib/bibtexparser/middlewares_library_to_library.py +0 -414
- pyeasyphd/bib/bibtexparser/middlewares_library_to_str.py +0 -42
- pyeasyphd/bib/bibtexparser/middlewares_str_to_library.py +0 -35
- pyeasyphd/bib/bibtexparser/middlewares_str_to_str.py +0 -29
- pyeasyphd/bib/bibtexparser/model.py +0 -481
- pyeasyphd/bib/bibtexparser/splitter.py +0 -151
- pyeasyphd/bib/core/__init__.py +0 -18
- pyeasyphd/bib/core/convert_library_to_library.py +0 -31
- pyeasyphd/bib/core/convert_library_to_str.py +0 -199
- pyeasyphd/bib/core/convert_str_to_library.py +0 -34
- pyeasyphd/bib/core/convert_str_to_str.py +0 -27
- pyeasyphd/main/python_run_bib.py +0 -73
- pyeasyphd/main/python_writers.py +0 -212
- pyeasyphd/tools/compare/compare_bibs.py +0 -234
- pyeasyphd/tools/experiments_base.py +0 -203
- pyeasyphd/tools/format_save_bibs.py +0 -178
- pyeasyphd/tools/replace/replace.py +0 -81
- pyeasyphd/tools/spider/process_spider_bib.py +0 -247
- pyeasyphd/tools/spider/process_spider_url.py +0 -75
- pyeasyphd/tools/spider/process_spider_url_bib.py +0 -62
- pyeasyphd-0.1.0.dist-info/RECORD +0 -80
- {pyeasyphd-0.1.0.dist-info → pyeasyphd-0.1.2.dist-info}/WHEEL +0 -0
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
from typing import List, Tuple
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class StandardizeCommentBlock(object):
|
|
6
|
-
"""Stanndardize comment block."""
|
|
7
|
-
|
|
8
|
-
def __init__(self) -> None:
|
|
9
|
-
pass
|
|
10
|
-
|
|
11
|
-
def standardize(self, block: List[str]) -> Tuple[List[str], List[List[str]]]:
|
|
12
|
-
implicit_comments = []
|
|
13
|
-
|
|
14
|
-
regex_comment = re.compile(r"@comment{" + r"(.*)", re.DOTALL)
|
|
15
|
-
if mch := regex_comment.match("".join(block)):
|
|
16
|
-
a = mch.group(1).strip()
|
|
17
|
-
if (ll := (a.count("{") + 1)) > (lr := a.count("}")):
|
|
18
|
-
a += "}" * (ll - lr)
|
|
19
|
-
elif ll < lr:
|
|
20
|
-
a = "{" * (lr - ll) + a
|
|
21
|
-
|
|
22
|
-
if sub_mch := re.match(r"(.*)" + "}" + r"(.*)(\n*)", a):
|
|
23
|
-
block = []
|
|
24
|
-
|
|
25
|
-
sub_a, sub_b, sub_c = sub_mch.groups()
|
|
26
|
-
if sub_a.strip():
|
|
27
|
-
block = ["@comment{" + sub_a.replace("\n", " ").strip() + "}\n"]
|
|
28
|
-
|
|
29
|
-
if sub_b.strip():
|
|
30
|
-
implicit_comments = [[sub_b + sub_c, __class__.__name__]]
|
|
31
|
-
|
|
32
|
-
else:
|
|
33
|
-
block = []
|
|
34
|
-
implicit_comments = [["".join(block), __class__.__name__]]
|
|
35
|
-
else:
|
|
36
|
-
block = []
|
|
37
|
-
implicit_comments = [["".join(block), __class__.__name__]]
|
|
38
|
-
return block, implicit_comments
|
|
@@ -1,310 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
from typing import Dict, List, Tuple
|
|
3
|
-
|
|
4
|
-
from ._base import split_data_list
|
|
5
|
-
from .default_data import DEFAULT_FIELDS_LIST, FIELD_FORMAT_FLAG
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class StandardizeEntryBlock(object):
|
|
9
|
-
"""Stanndardize entry block.
|
|
10
|
-
|
|
11
|
-
Args:
|
|
12
|
-
default_additional_field_list (List[str] = []): Additional default fields.
|
|
13
|
-
|
|
14
|
-
Attributes:
|
|
15
|
-
default_fields_list (List[str]): Default fields.
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
def __init__(self, default_additional_field_list: List[str] = []) -> None:
|
|
19
|
-
default_fields_old = [d.lower().strip() for d in DEFAULT_FIELDS_LIST]
|
|
20
|
-
default_fields_new = [d.lower().strip() for d in default_additional_field_list]
|
|
21
|
-
self.default_fields_list = list(set(default_fields_old).union(set(default_fields_new)))
|
|
22
|
-
|
|
23
|
-
def standardize(self, block: List[str]) -> Tuple[List[str], List[List[str]]]:
|
|
24
|
-
# obtain braces or quotes
|
|
25
|
-
implicit_comments = []
|
|
26
|
-
pre, post = EntryBase().obtain_braces_or_quotes(block)
|
|
27
|
-
if (len(pre) == 0) or (len(post) == 0):
|
|
28
|
-
message = f"Obtain braces or quotes: No standard `pre - {pre}` and `post - {post}`"
|
|
29
|
-
implicit_comments.append(["".join(block), message])
|
|
30
|
-
return [], implicit_comments
|
|
31
|
-
|
|
32
|
-
# Obtain fields
|
|
33
|
-
# Support for abbreviations
|
|
34
|
-
field_list = EntryBase().obtain_fields(block, self.default_fields_list)
|
|
35
|
-
if len(field_list) == 0:
|
|
36
|
-
message = "`Obtain fields`: No fields found"
|
|
37
|
-
implicit_comments.append(["".join(block), message])
|
|
38
|
-
return [], implicit_comments
|
|
39
|
-
|
|
40
|
-
# Split according to the field pattern
|
|
41
|
-
# Not support abbreviations: ['year = {2019}, journal = ECJ,']
|
|
42
|
-
# TODO Support abbreviations
|
|
43
|
-
pattern = r"\b((?:{})\s*=\s*(?:{}))".format("|".join(field_list), "|".join([rf"{pre}"])) # compulsory
|
|
44
|
-
block = SplitEntry().split_fields(pattern, block)
|
|
45
|
-
|
|
46
|
-
# Append according to the field pattern
|
|
47
|
-
# Support abbreviations
|
|
48
|
-
block = AppendEntry().append_field(field_list, (pre, post), block)
|
|
49
|
-
|
|
50
|
-
# Extract
|
|
51
|
-
# Support abbreviations
|
|
52
|
-
block, redundant_list = ExtractEntry().extract(field_list, (pre, post), block)
|
|
53
|
-
if len(redundant_list) != 0:
|
|
54
|
-
message = "`Extract`: Redundant content"
|
|
55
|
-
implicit_comments.append(["".join(redundant_list), message])
|
|
56
|
-
|
|
57
|
-
# Check
|
|
58
|
-
# Support abbreviations
|
|
59
|
-
error_dict, block, is_standard_bib_flag = CheckEntry().check(field_list, (pre, post), block)
|
|
60
|
-
if len(error_dict) != 0:
|
|
61
|
-
for key in error_dict:
|
|
62
|
-
implicit_comments.append(["".join(error_dict[key]), f"`Check`: {key}"])
|
|
63
|
-
|
|
64
|
-
if not is_standard_bib_flag:
|
|
65
|
-
implicit_comments.append(["".join(block), "`Check`: Not standard bib"])
|
|
66
|
-
return [], implicit_comments
|
|
67
|
-
|
|
68
|
-
return block, implicit_comments
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
class EntryBase(object):
|
|
72
|
-
def __init__(self) -> None:
|
|
73
|
-
pass
|
|
74
|
-
|
|
75
|
-
@staticmethod
|
|
76
|
-
def obtain_braces_or_quotes(block: List[str]) -> Tuple[str, str]:
|
|
77
|
-
"""Obtain braces or quotes in block.
|
|
78
|
-
|
|
79
|
-
Args:
|
|
80
|
-
block (List[str]): block.
|
|
81
|
-
|
|
82
|
-
Returns:
|
|
83
|
-
Tuple[str, str]: the tuple of braces or quotes.
|
|
84
|
-
"""
|
|
85
|
-
content = "".join(block)
|
|
86
|
-
regex_list = [
|
|
87
|
-
re.compile(r'\btitles*\s*=\s*([{"])', flags=re.I),
|
|
88
|
-
re.compile(r'\bauthors*\s*=\s*([{"])', flags=re.I),
|
|
89
|
-
re.compile(r'\byears*\s*=\s*([{"])', flags=re.I),
|
|
90
|
-
re.compile(r'\bpages*\s*=\s*([{"])', flags=re.I),
|
|
91
|
-
re.compile(r'\burls*\s*=\s*([{"])', flags=re.I),
|
|
92
|
-
]
|
|
93
|
-
flag_list_list = [sorted(list(set(regex.findall(content)))) for regex in regex_list]
|
|
94
|
-
|
|
95
|
-
flag_list_list = [f for f in flag_list_list if len(f) != 0]
|
|
96
|
-
len_list = [len(f) for f in flag_list_list]
|
|
97
|
-
|
|
98
|
-
# 0 or 1 or 2 flags
|
|
99
|
-
if (len(len_list) == 0) or (2 in len_list) or (not all([f == flag_list_list[0] for f in flag_list_list])):
|
|
100
|
-
return "", ""
|
|
101
|
-
|
|
102
|
-
if flag_list_list[0][0] == "{":
|
|
103
|
-
return "{", "}"
|
|
104
|
-
else:
|
|
105
|
-
return '"', '"'
|
|
106
|
-
|
|
107
|
-
def obtain_fields(
|
|
108
|
-
self,
|
|
109
|
-
block: List[str],
|
|
110
|
-
default_fields_list: List[str],
|
|
111
|
-
field_pattern: str = r'[\w\-]+'
|
|
112
|
-
) -> List[str]:
|
|
113
|
-
r"""Obtain fileds in block.
|
|
114
|
-
|
|
115
|
-
Args:
|
|
116
|
-
block (List[str]): block.
|
|
117
|
-
field_pattern (str = r'[\w\-]+'): field pattern.
|
|
118
|
-
|
|
119
|
-
Returns:
|
|
120
|
-
List[str]: field list.
|
|
121
|
-
"""
|
|
122
|
-
regex = re.compile(rf'({field_pattern})\s*=\s*(?:{"|".join(FIELD_FORMAT_FLAG)})') # support for abbreviation
|
|
123
|
-
obtain_field_list = list(set(regex.findall("".join(block))))
|
|
124
|
-
obtain_field_list = [field for field in obtain_field_list if field.lower() in default_fields_list]
|
|
125
|
-
return sorted(obtain_field_list)
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
class SplitEntry(object):
|
|
129
|
-
def __init__(self) -> None:
|
|
130
|
-
super().__init__()
|
|
131
|
-
|
|
132
|
-
def split_fields(self, field_pattern: str, block: List[str], last_next: str = "next") -> List[str]:
|
|
133
|
-
return split_data_list(field_pattern, block, last_next)
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
class AppendEntry(object):
|
|
137
|
-
"""Append Patch Bib."""
|
|
138
|
-
|
|
139
|
-
def __init__(self) -> None:
|
|
140
|
-
pass
|
|
141
|
-
|
|
142
|
-
@staticmethod
|
|
143
|
-
def append_field(field_list: List[str], braces_or_quotes: Tuple[str, str], block: List[str]) -> List[str]:
|
|
144
|
-
"""Append.
|
|
145
|
-
|
|
146
|
-
Args:
|
|
147
|
-
field_list (List[str]): Append field list.
|
|
148
|
-
braces_or_quotes (Tuple[str, str]): Brace or quote.
|
|
149
|
-
data_list (List[str]): Data list.
|
|
150
|
-
|
|
151
|
-
Returns:
|
|
152
|
-
List[str]: new patch bib after appending.
|
|
153
|
-
"""
|
|
154
|
-
pre, _ = braces_or_quotes
|
|
155
|
-
|
|
156
|
-
temp = rf'[%\s]*(?:{"|".join(field_list)})'
|
|
157
|
-
regex_field = re.compile(rf"{temp}\s*=\s*{pre}", flags=re.I)
|
|
158
|
-
regex_field_abbr = re.compile(rf"{temp}\s*=\s*\w+[\w\-]*", flags=re.I) # journal = EJC,
|
|
159
|
-
regex_termination = re.compile(r"\s*@[a-zA-Z]*{", flags=re.I)
|
|
160
|
-
|
|
161
|
-
# strip and append
|
|
162
|
-
line_index, len_data, new_block = 0, len(block), []
|
|
163
|
-
while line_index < len_data:
|
|
164
|
-
line = block[line_index]
|
|
165
|
-
line_index += 1
|
|
166
|
-
if regex_field.match(line) or regex_termination.match(line) or regex_field_abbr.match(line):
|
|
167
|
-
new_line = line
|
|
168
|
-
while line_index < len_data:
|
|
169
|
-
line = block[line_index]
|
|
170
|
-
if regex_field.match(line) or regex_termination.match(line) or regex_field_abbr.match(line):
|
|
171
|
-
break
|
|
172
|
-
else:
|
|
173
|
-
if line.lstrip():
|
|
174
|
-
new_line = new_line.rstrip() + " " + line.lstrip() # append
|
|
175
|
-
line_index += 1
|
|
176
|
-
new_block.append(new_line)
|
|
177
|
-
else:
|
|
178
|
-
new_block.append(line)
|
|
179
|
-
return new_block
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
class ExtractEntry(object):
|
|
183
|
-
def __init__(self) -> None:
|
|
184
|
-
pass
|
|
185
|
-
|
|
186
|
-
def extract(
|
|
187
|
-
self, field_list: List[str], brace_or_quote: Tuple[str, str], block: List[str]
|
|
188
|
-
) -> Tuple[List[str], List[str]]:
|
|
189
|
-
"""Extract.
|
|
190
|
-
|
|
191
|
-
Args:
|
|
192
|
-
field_list (List[str]): field list
|
|
193
|
-
brace_or_quote (Tuple[str, str]): (", ") or ({, })
|
|
194
|
-
block (List[str]): the block
|
|
195
|
-
|
|
196
|
-
Return:
|
|
197
|
-
Tuple[List[str], List[str]]: main block, redundant part
|
|
198
|
-
"""
|
|
199
|
-
pre, post = brace_or_quote
|
|
200
|
-
|
|
201
|
-
temp = rf'[%\s]*(?:{"|".join(field_list)})'
|
|
202
|
-
regex_field_two = re.compile(rf"({temp}\s*=\s*{pre})(.*)(\n*)", flags=re.I)
|
|
203
|
-
regex_field_one = re.compile(rf"({temp}\s*=\s*{pre}.*{post})(.*)(\n*)", flags=re.I)
|
|
204
|
-
regex_field_abbr = re.compile(rf"({temp}\s*=\s*\w+[\w\-]*)(.*)(\n*)", flags=re.I)
|
|
205
|
-
regex_termination = re.compile(r"(\s*@[a-zA-Z]*{\s*[\w\-:/\\.\']*)(.*)(\n*)", flags=re.I)
|
|
206
|
-
|
|
207
|
-
main_list, redundant_list = [], []
|
|
208
|
-
|
|
209
|
-
for line in block:
|
|
210
|
-
new_line, redundant = "", ""
|
|
211
|
-
if mch := regex_termination.match(line):
|
|
212
|
-
one, two, three = mch.groups()
|
|
213
|
-
new_line = one + ",\n"
|
|
214
|
-
if re.sub(r"[\s,\n\}]+", "", two):
|
|
215
|
-
redundant = two + three
|
|
216
|
-
|
|
217
|
-
elif mch := regex_field_abbr.match(line):
|
|
218
|
-
one, two, three = mch.groups()
|
|
219
|
-
new_line = one + ",\n"
|
|
220
|
-
if re.sub(r"[\s,\n\}]+", "", two):
|
|
221
|
-
redundant = two + three
|
|
222
|
-
|
|
223
|
-
elif mch := regex_field_one.match(line):
|
|
224
|
-
one, two, three = mch.groups()
|
|
225
|
-
new_line = self._resub_brace_or_quote(pre, post, one + ",\n")
|
|
226
|
-
if re.sub(r"[\s,\n\}]+", "", two):
|
|
227
|
-
redundant = two + three
|
|
228
|
-
|
|
229
|
-
elif mch := regex_field_two.match(line):
|
|
230
|
-
one, two, three = mch.groups()
|
|
231
|
-
new_line = self._resub_brace_or_quote(pre, post, one + two.strip() + post + ",\n")
|
|
232
|
-
|
|
233
|
-
elif line.strip() == "}":
|
|
234
|
-
pass
|
|
235
|
-
|
|
236
|
-
else:
|
|
237
|
-
return [], block
|
|
238
|
-
|
|
239
|
-
if new_line:
|
|
240
|
-
main_list.append(new_line)
|
|
241
|
-
if redundant:
|
|
242
|
-
redundant_list.append(redundant)
|
|
243
|
-
|
|
244
|
-
# for enclosing "@[a-zA-Z]{"
|
|
245
|
-
if main_list:
|
|
246
|
-
main_list.append("}\n")
|
|
247
|
-
return main_list, redundant_list
|
|
248
|
-
|
|
249
|
-
def _resub_brace_or_quote(self, pre, post, line: str) -> str:
|
|
250
|
-
if post == "}":
|
|
251
|
-
if line.count(post) > line.count(pre):
|
|
252
|
-
line = re.sub(r'(}[}\s\n,]*)$', '},\n', line)
|
|
253
|
-
line = add_brace_or_quote(pre, post, line)
|
|
254
|
-
|
|
255
|
-
elif post == '"':
|
|
256
|
-
if line.count(post) > line.count(pre):
|
|
257
|
-
line = re.sub(r'("["\s\n,]*)$', '",\n', line)
|
|
258
|
-
line = add_brace_or_quote(pre, post, line)
|
|
259
|
-
return line
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
def add_brace_or_quote(pre, post, line: str):
|
|
263
|
-
if (cpre := line.count(pre)) != (cpost := line.count(post)):
|
|
264
|
-
line_list = list(line)
|
|
265
|
-
if cpre > cpost:
|
|
266
|
-
line_list = line_list[::-1]
|
|
267
|
-
line_list.insert(line_list.index(post), post * (cpre - cpost))
|
|
268
|
-
line_list = line_list[::-1]
|
|
269
|
-
else:
|
|
270
|
-
line_list.insert(line.index(pre), pre * (cpost - cpre))
|
|
271
|
-
|
|
272
|
-
line = "".join(line_list)
|
|
273
|
-
return line
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
class CheckEntry(object):
|
|
277
|
-
@staticmethod
|
|
278
|
-
def check(
|
|
279
|
-
field_list: List[str], brace_or_quote: Tuple[str, str], block: List[str]
|
|
280
|
-
) -> Tuple[Dict[str, List[str]], List[str], bool]:
|
|
281
|
-
"""Check."""
|
|
282
|
-
pre, post = brace_or_quote
|
|
283
|
-
|
|
284
|
-
regex_entry = re.compile(r"\s*@[a-zA-Z]+{")
|
|
285
|
-
regex_field = re.compile(fr'\s*(?:{"|".join(field_list)})' + r"\s*=")
|
|
286
|
-
entry_flag, brace_flag = False, False # minimal conditions
|
|
287
|
-
error_dict: Dict[str, List[str]] = {}
|
|
288
|
-
new_block = []
|
|
289
|
-
for line in block:
|
|
290
|
-
if regex_entry.match(line) and (not entry_flag): # just iff exsiting one time in single patch bib
|
|
291
|
-
if (line.count("{") != 1) or (line.count(",") != 1):
|
|
292
|
-
error_dict.setdefault("Failed entry_type", []).append(line)
|
|
293
|
-
else:
|
|
294
|
-
entry_flag = True
|
|
295
|
-
new_block.append(line)
|
|
296
|
-
|
|
297
|
-
elif regex_field.match(line):
|
|
298
|
-
new_block.append(add_brace_or_quote(pre, post, line))
|
|
299
|
-
|
|
300
|
-
elif (line.strip() == "}") and (not brace_flag): # just iff exsiting one time in single patch bib
|
|
301
|
-
brace_flag = True
|
|
302
|
-
new_block.append(line)
|
|
303
|
-
|
|
304
|
-
else:
|
|
305
|
-
error_dict.setdefault("Redundant content`", []).append(line)
|
|
306
|
-
return error_dict, new_block, entry_flag and brace_flag
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
if __name__ == "__main__":
|
|
310
|
-
pass
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
from typing import List, Tuple
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class StandardizePreambleBlock(object):
|
|
6
|
-
"""Stanndardize preamble block."""
|
|
7
|
-
|
|
8
|
-
def __init__(self) -> None:
|
|
9
|
-
pass
|
|
10
|
-
|
|
11
|
-
def standardize(self, block: List[str]) -> Tuple[List[str], List[List[str]]]:
|
|
12
|
-
# @preamble{ "\providecommand{\noopsort}[1]{} " }
|
|
13
|
-
implicit_comments = []
|
|
14
|
-
regex_preamble = re.compile(
|
|
15
|
-
r"@preamble{" + r'\s*(")' + r"([\w\-\\\[\]\{\}\s]+)" + r'(")\s*' + r"(.*)(\n*)", re.DOTALL
|
|
16
|
-
)
|
|
17
|
-
mch = regex_preamble.match("".join(block))
|
|
18
|
-
if mch:
|
|
19
|
-
a, b, c, d, e = mch.groups()
|
|
20
|
-
if (a == '"') and (c == '"'):
|
|
21
|
-
block = ["@preamble{ " + a + b.replace("\n", " ").strip() + c + " }\n"]
|
|
22
|
-
|
|
23
|
-
if d and d.lstrip()[0] == "}":
|
|
24
|
-
d = d.lstrip()[1:].lstrip()
|
|
25
|
-
|
|
26
|
-
if d.strip():
|
|
27
|
-
implicit_comments = [[d + e, __class__.__name__]]
|
|
28
|
-
|
|
29
|
-
else:
|
|
30
|
-
block = []
|
|
31
|
-
implicit_comments = [["".join(block), __class__.__name__]]
|
|
32
|
-
else:
|
|
33
|
-
block = []
|
|
34
|
-
implicit_comments = [["".join(block), __class__.__name__]]
|
|
35
|
-
return block, implicit_comments
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
from typing import List, Tuple
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class StandardizeStringBlock(object):
|
|
6
|
-
"""Stanndardize string block."""
|
|
7
|
-
|
|
8
|
-
def __init__(self) -> None:
|
|
9
|
-
pass
|
|
10
|
-
|
|
11
|
-
def standardize(self, block: List[str]) -> Tuple[List[str], List[List[str]]]:
|
|
12
|
-
implicit_comments = []
|
|
13
|
-
|
|
14
|
-
regex = re.compile(
|
|
15
|
-
r"@string{" + r"\s*([\w]+)\s*=\s*" + r'(["{])' + r"([\w\-\n]+)" + r'(["}])' + r'(.*)(\n*)', re.DOTALL
|
|
16
|
-
)
|
|
17
|
-
if mch := regex.match("".join(block)):
|
|
18
|
-
a, b, c, d, e, f = mch.groups()
|
|
19
|
-
if ((b == '"') and (d == '"')) or ((b == '{') and (d == '}')):
|
|
20
|
-
block = ["@string{" + a + " = " + b + c.replace("\n", " ").strip() + d + "}\n"]
|
|
21
|
-
|
|
22
|
-
if e and e.lstrip()[0] == "}":
|
|
23
|
-
e = e.lstrip()[1:].lstrip()
|
|
24
|
-
|
|
25
|
-
if e.strip():
|
|
26
|
-
implicit_comments = [[e + f, __class__.__name__]]
|
|
27
|
-
|
|
28
|
-
else:
|
|
29
|
-
block = []
|
|
30
|
-
implicit_comments = [["".join(block), __class__.__name__]]
|
|
31
|
-
else:
|
|
32
|
-
block = []
|
|
33
|
-
implicit_comments = [["".join(block), __class__.__name__]]
|
|
34
|
-
return block, implicit_comments
|
|
@@ -1,75 +0,0 @@
|
|
|
1
|
-
from typing import List, Tuple
|
|
2
|
-
|
|
3
|
-
from .standardize.do_on_bib import ObtainMarkBlocksDict, SplitBibAccordingToMark
|
|
4
|
-
from .standardize.do_on_comment_block import StandardizeCommentBlock
|
|
5
|
-
from .standardize.do_on_entry_block import StandardizeEntryBlock
|
|
6
|
-
from .standardize.do_on_preamble_block import StandardizePreambleBlock
|
|
7
|
-
from .standardize.do_on_string_block import StandardizeStringBlock
|
|
8
|
-
|
|
9
|
-
MARKS_FLAGS = [
|
|
10
|
-
["comment", "comment", "C"], # comment
|
|
11
|
-
["string", "string", "S"], # string
|
|
12
|
-
["preamble", "preamble", "P"], # preamble
|
|
13
|
-
["article", "entry", "J"], # entry
|
|
14
|
-
["inproceedings", "entry", "C"], # entry
|
|
15
|
-
["proceedings", "entry", "B"], # entry
|
|
16
|
-
["book", "entry", "B"], # entry
|
|
17
|
-
["incollection", "entry", "BS"], # entry
|
|
18
|
-
["misc", "entry", "D"], # entry
|
|
19
|
-
["unpublished", "entry", "M"], # entry
|
|
20
|
-
["techreport", "entry", "R"], # entry
|
|
21
|
-
["phdthesis", "entry", "T_D"], # entry
|
|
22
|
-
["mastersthesis", "entry", "T_M"], # entry
|
|
23
|
-
]
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class StandardizeBib(object):
|
|
27
|
-
"""Stanndardize bib.
|
|
28
|
-
|
|
29
|
-
Args:
|
|
30
|
-
default_additional_field_list (List[str] = []): Additional default fields.
|
|
31
|
-
"""
|
|
32
|
-
|
|
33
|
-
def __init__(self, default_additional_field_list: List[str] = []) -> None:
|
|
34
|
-
self._standardize_comment_block = StandardizeCommentBlock()
|
|
35
|
-
self._standardize_entry_block = StandardizeEntryBlock(default_additional_field_list)
|
|
36
|
-
self._standardize_preamble_block = StandardizePreambleBlock()
|
|
37
|
-
self._standardize_string_block = StandardizeStringBlock()
|
|
38
|
-
|
|
39
|
-
def standardize(self, data_list: List[str]) -> Tuple[List[str], List[List[str]]]:
|
|
40
|
-
"""Generate standard bib.
|
|
41
|
-
|
|
42
|
-
Args:
|
|
43
|
-
data_list (List[str]): Bib data.
|
|
44
|
-
|
|
45
|
-
Returns:
|
|
46
|
-
List[str]: Standard bib.
|
|
47
|
-
"""
|
|
48
|
-
# Initialize
|
|
49
|
-
data_list = "".join(data_list).splitlines(keepends=True)
|
|
50
|
-
data_list = [line for line in data_list if line.strip()]
|
|
51
|
-
|
|
52
|
-
# Split data according to mark pattern
|
|
53
|
-
data_list = SplitBibAccordingToMark().split_marks(data_list)
|
|
54
|
-
|
|
55
|
-
new_data_list: List[str] = []
|
|
56
|
-
implicit_comment_list: List[List[str]] = []
|
|
57
|
-
|
|
58
|
-
# Generate dict
|
|
59
|
-
mark_blocks_dict, temp_implicit_comment_list = ObtainMarkBlocksDict().obtain_dict(data_list, True)
|
|
60
|
-
implicit_comment_list.extend(temp_implicit_comment_list)
|
|
61
|
-
|
|
62
|
-
marks, flags = [i[0] for i in MARKS_FLAGS], [i[1] for i in MARKS_FLAGS]
|
|
63
|
-
if not_in := {k: v for k, v in mark_blocks_dict.items() if k not in marks}:
|
|
64
|
-
print(f"Warning: Not standard parts - {not_in}")
|
|
65
|
-
|
|
66
|
-
for mark in mark_blocks_dict:
|
|
67
|
-
if mark in marks:
|
|
68
|
-
flag = flags[marks.index(mark)]
|
|
69
|
-
|
|
70
|
-
for block in mark_blocks_dict[mark]:
|
|
71
|
-
block, temp = eval(f"self._standardize_{flag}_block.standardize")(block)
|
|
72
|
-
new_data_list.extend(block)
|
|
73
|
-
implicit_comment_list.extend(temp)
|
|
74
|
-
|
|
75
|
-
return new_data_list, implicit_comment_list
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
"""Initialization.
|
|
2
|
-
|
|
3
|
-
This submodule incorporates modified source code from the python-bibtexparser project
|
|
4
|
-
(https://github.com/sciunto-org/python-bibtexparser), which is licensed under the MIT License.
|
|
5
|
-
The original copyright notice and license terms have been preserved in accordance with the license requirements.
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
__all__ = [
|
|
9
|
-
"Block",
|
|
10
|
-
"Field",
|
|
11
|
-
"Entry",
|
|
12
|
-
"ImplicitComment",
|
|
13
|
-
"ExplicitComment",
|
|
14
|
-
"String",
|
|
15
|
-
"Preamble",
|
|
16
|
-
"ParsingFailedBlock",
|
|
17
|
-
"DuplicateBlockKeyBlock",
|
|
18
|
-
"Library",
|
|
19
|
-
|
|
20
|
-
"MiddlewaresStrToStr",
|
|
21
|
-
"MiddlewaresStrToLibrary",
|
|
22
|
-
"MiddlewaresLibraryToLibrary",
|
|
23
|
-
"MiddlewaresLibraryToStr",
|
|
24
|
-
|
|
25
|
-
"Splitter",
|
|
26
|
-
|
|
27
|
-
"BibtexFormat",
|
|
28
|
-
]
|
|
29
|
-
|
|
30
|
-
from .bibtex_format import BibtexFormat
|
|
31
|
-
from .library import Library
|
|
32
|
-
from .middlewares_library_to_library import MiddlewaresLibraryToLibrary
|
|
33
|
-
from .middlewares_library_to_str import MiddlewaresLibraryToStr
|
|
34
|
-
from .middlewares_str_to_library import MiddlewaresStrToLibrary
|
|
35
|
-
from .middlewares_str_to_str import MiddlewaresStrToStr
|
|
36
|
-
from .model import (
|
|
37
|
-
Block,
|
|
38
|
-
DuplicateBlockKeyBlock,
|
|
39
|
-
Entry,
|
|
40
|
-
ExplicitComment,
|
|
41
|
-
Field,
|
|
42
|
-
ImplicitComment,
|
|
43
|
-
ParsingFailedBlock,
|
|
44
|
-
Preamble,
|
|
45
|
-
String,
|
|
46
|
-
)
|
|
47
|
-
from .splitter import Splitter
|
|
@@ -1,87 +0,0 @@
|
|
|
1
|
-
from typing import Union
|
|
2
|
-
|
|
3
|
-
PARSING_FAILED_COMMENT = "% WARNING Parsing failed for the following {n} lines."
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class BibtexFormat(object):
|
|
7
|
-
"""Definition of formatting (alignment, ...) when writing a BibTeX file.
|
|
8
|
-
|
|
9
|
-
Hint: For more manual, GUI-based formatting, see the `bibtex-tidy` tool:
|
|
10
|
-
https://flamingtempura.github.io/bibtex-tidy/
|
|
11
|
-
"""
|
|
12
|
-
|
|
13
|
-
def __init__(self):
|
|
14
|
-
self._indent: str = " " # "\t"
|
|
15
|
-
self._align_field_values: Union[int, str] = "auto"
|
|
16
|
-
self._block_separator: str = "" # "\n\n"
|
|
17
|
-
self._trailing_comma: bool = True
|
|
18
|
-
self._parsing_failed_comment: str = PARSING_FAILED_COMMENT
|
|
19
|
-
|
|
20
|
-
@property
|
|
21
|
-
def indent(self) -> str:
|
|
22
|
-
"""Character(s) for indenting BibTeX field-value pairs. Default: single space."""
|
|
23
|
-
return self._indent
|
|
24
|
-
|
|
25
|
-
@indent.setter
|
|
26
|
-
def indent(self, indent: str) -> None:
|
|
27
|
-
self._indent = indent
|
|
28
|
-
|
|
29
|
-
@property
|
|
30
|
-
def value_column(self) -> Union[int, str]:
|
|
31
|
-
"""Controls the alignment of field- and string-values. Default: no alignment.
|
|
32
|
-
|
|
33
|
-
This impacts String and Entry blocks.
|
|
34
|
-
|
|
35
|
-
An integer value x specifies that spaces should be added before the " = ",
|
|
36
|
-
such that, if possible, the value is written at column `len(self.indent) + x`.
|
|
37
|
-
Note that for long keys, the value may be written at a later column.
|
|
38
|
-
|
|
39
|
-
Thus, a value of 0 means that the value is written directly after the " = ".
|
|
40
|
-
|
|
41
|
-
The special value "auto" specifies that the bibtex field value should be aligned
|
|
42
|
-
based on the longest key in the library.
|
|
43
|
-
"""
|
|
44
|
-
return self._align_field_values
|
|
45
|
-
|
|
46
|
-
@value_column.setter
|
|
47
|
-
def value_column(self, align_values: Union[int, str]) -> None:
|
|
48
|
-
if isinstance(align_values, int):
|
|
49
|
-
if align_values < 0:
|
|
50
|
-
raise ValueError("align_field_values must be >= 0")
|
|
51
|
-
elif align_values != "auto":
|
|
52
|
-
raise ValueError("align_field_values must be an integer or 'auto'")
|
|
53
|
-
self._align_field_values = align_values
|
|
54
|
-
|
|
55
|
-
@property
|
|
56
|
-
def block_separator(self) -> str:
|
|
57
|
-
"""Character(s) for separating BibTeX entries.
|
|
58
|
-
|
|
59
|
-
Default: Two lines breaks, i.e., two blank lines.
|
|
60
|
-
"""
|
|
61
|
-
return self._block_separator
|
|
62
|
-
|
|
63
|
-
@block_separator.setter
|
|
64
|
-
def block_separator(self, entry_separator: str) -> None:
|
|
65
|
-
self._block_separator = entry_separator
|
|
66
|
-
|
|
67
|
-
@property
|
|
68
|
-
def trailing_comma(self) -> bool:
|
|
69
|
-
"""Use the trailing comma syntax for BibTeX entries. Default: True.
|
|
70
|
-
|
|
71
|
-
BibTeX syntax allows an optional comma at the end
|
|
72
|
-
of the last field in an entry.
|
|
73
|
-
"""
|
|
74
|
-
return self._trailing_comma
|
|
75
|
-
|
|
76
|
-
@trailing_comma.setter
|
|
77
|
-
def trailing_comma(self, trailing_comma: bool) -> None:
|
|
78
|
-
self._trailing_comma = trailing_comma
|
|
79
|
-
|
|
80
|
-
@property
|
|
81
|
-
def parsing_failed_comment(self) -> str:
|
|
82
|
-
"""Comment to use for blocks that could not be parsed."""
|
|
83
|
-
return self._parsing_failed_comment
|
|
84
|
-
|
|
85
|
-
@parsing_failed_comment.setter
|
|
86
|
-
def parsing_failed_comment(self, parsing_failed_comment: str) -> None:
|
|
87
|
-
self._parsing_failed_comment = parsing_failed_comment
|