pyeasyphd 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of pyeasyphd might be problematic. Click here for more details.
- pyeasyphd/.python-version +1 -1
- pyeasyphd/main/__init__.py +0 -4
- pyeasyphd/main/basic_input.py +7 -63
- pyeasyphd/main/python_run_md.py +3 -3
- pyeasyphd/main/python_run_tex.py +1 -1
- pyeasyphd/pyeasyphd.sublime-settings +2 -160
- pyeasyphd/tools/__init__.py +1 -16
- pyeasyphd/tools/generate/generate_from_bibs.py +54 -330
- pyeasyphd/tools/generate/generate_html.py +122 -0
- pyeasyphd/tools/generate/generate_library.py +188 -0
- pyeasyphd/tools/generate/generate_links.py +13 -4
- pyeasyphd/tools/py_run_bib_md_tex.py +12 -13
- pyeasyphd/tools/search/search_base.py +8 -5
- pyeasyphd/tools/search/search_core.py +4 -3
- pyeasyphd/tools/search/search_keywords.py +1 -1
- pyeasyphd/tools/search/search_writers.py +8 -5
- {pyeasyphd-0.1.0.dist-info → pyeasyphd-0.1.2.dist-info}/METADATA +3 -6
- pyeasyphd-0.1.2.dist-info/RECORD +27 -0
- pyeasyphd/bib/__init__.py +0 -1
- pyeasyphd/bib/bibtexbase/__init__.py +0 -7
- pyeasyphd/bib/bibtexbase/standardize/_base.py +0 -36
- pyeasyphd/bib/bibtexbase/standardize/default_data.py +0 -97
- pyeasyphd/bib/bibtexbase/standardize/do_on_bib.py +0 -54
- pyeasyphd/bib/bibtexbase/standardize/do_on_comment_block.py +0 -38
- pyeasyphd/bib/bibtexbase/standardize/do_on_entry_block.py +0 -310
- pyeasyphd/bib/bibtexbase/standardize/do_on_preamble_block.py +0 -35
- pyeasyphd/bib/bibtexbase/standardize/do_on_string_block.py +0 -34
- pyeasyphd/bib/bibtexbase/standardize_bib.py +0 -75
- pyeasyphd/bib/bibtexparser/__init__.py +0 -47
- pyeasyphd/bib/bibtexparser/bibtex_format.py +0 -87
- pyeasyphd/bib/bibtexparser/exceptions.py +0 -64
- pyeasyphd/bib/bibtexparser/library.py +0 -207
- pyeasyphd/bib/bibtexparser/middlewares/block/add.py +0 -94
- pyeasyphd/bib/bibtexparser/middlewares/block/authors.py +0 -22
- pyeasyphd/bib/bibtexparser/middlewares/block/doi_url.py +0 -62
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_normalize.py +0 -47
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_replace.py +0 -31
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_values_normalize.py +0 -222
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_delete.py +0 -34
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_keep.py +0 -33
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_sort.py +0 -70
- pyeasyphd/bib/bibtexparser/middlewares/block/entry_types.py +0 -15
- pyeasyphd/bib/bibtexparser/middlewares/block/journal_booktitle.py +0 -113
- pyeasyphd/bib/bibtexparser/middlewares/block/month_year.py +0 -34
- pyeasyphd/bib/bibtexparser/middlewares/block/number_volume.py +0 -21
- pyeasyphd/bib/bibtexparser/middlewares/block/pages.py +0 -28
- pyeasyphd/bib/bibtexparser/middlewares/block/title.py +0 -20
- pyeasyphd/bib/bibtexparser/middlewares/library/generating_entrykeys.py +0 -98
- pyeasyphd/bib/bibtexparser/middlewares/library/keeping_blocks.py +0 -29
- pyeasyphd/bib/bibtexparser/middlewares/library/sorting_blocks.py +0 -124
- pyeasyphd/bib/bibtexparser/middlewares/middleware.py +0 -222
- pyeasyphd/bib/bibtexparser/middlewares/parsestack.py +0 -13
- pyeasyphd/bib/bibtexparser/middlewares/utils.py +0 -226
- pyeasyphd/bib/bibtexparser/middlewares_library_to_library.py +0 -414
- pyeasyphd/bib/bibtexparser/middlewares_library_to_str.py +0 -42
- pyeasyphd/bib/bibtexparser/middlewares_str_to_library.py +0 -35
- pyeasyphd/bib/bibtexparser/middlewares_str_to_str.py +0 -29
- pyeasyphd/bib/bibtexparser/model.py +0 -481
- pyeasyphd/bib/bibtexparser/splitter.py +0 -151
- pyeasyphd/bib/core/__init__.py +0 -18
- pyeasyphd/bib/core/convert_library_to_library.py +0 -31
- pyeasyphd/bib/core/convert_library_to_str.py +0 -199
- pyeasyphd/bib/core/convert_str_to_library.py +0 -34
- pyeasyphd/bib/core/convert_str_to_str.py +0 -27
- pyeasyphd/main/python_run_bib.py +0 -73
- pyeasyphd/main/python_writers.py +0 -212
- pyeasyphd/tools/compare/compare_bibs.py +0 -234
- pyeasyphd/tools/experiments_base.py +0 -203
- pyeasyphd/tools/format_save_bibs.py +0 -178
- pyeasyphd/tools/replace/replace.py +0 -81
- pyeasyphd/tools/spider/process_spider_bib.py +0 -247
- pyeasyphd/tools/spider/process_spider_url.py +0 -75
- pyeasyphd/tools/spider/process_spider_url_bib.py +0 -62
- pyeasyphd-0.1.0.dist-info/RECORD +0 -80
- {pyeasyphd-0.1.0.dist-info → pyeasyphd-0.1.2.dist-info}/WHEEL +0 -0
|
@@ -1,222 +0,0 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
import re
|
|
3
|
-
from typing import Dict, List, Set
|
|
4
|
-
|
|
5
|
-
from ...library import Library
|
|
6
|
-
from ...model import Block, Entry, Field
|
|
7
|
-
from ..middleware import BlockMiddleware
|
|
8
|
-
from ..utils import SKIP_WORD_IN_CITATION_KEY
|
|
9
|
-
|
|
10
|
-
logger = logging.getLogger(__name__)
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class NormalizeEntryFieldValues(BlockMiddleware):
|
|
14
|
-
"""Normalize some field values (journal and booktitle) to upper case."""
|
|
15
|
-
|
|
16
|
-
def __init__(
|
|
17
|
-
self,
|
|
18
|
-
field_keys: List[str] = ["journal", "booktitle"],
|
|
19
|
-
title_lower_upper: str = 'upper',
|
|
20
|
-
allow_inplace_modification: bool = True
|
|
21
|
-
):
|
|
22
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
23
|
-
|
|
24
|
-
self._field_keys = field_keys
|
|
25
|
-
self.title_lower_upper = title_lower_upper
|
|
26
|
-
|
|
27
|
-
# docstr-coverage: inherited
|
|
28
|
-
def transform_entry(self, entry: Entry, library: Library) -> Entry:
|
|
29
|
-
seen_normalized_keys: Set[str] = set()
|
|
30
|
-
new_fields_dict: Dict[str, Field] = {}
|
|
31
|
-
for field in entry.fields:
|
|
32
|
-
if self.title_lower_upper == "upper":
|
|
33
|
-
normalized_key: str = field.key.upper()
|
|
34
|
-
elif self.title_lower_upper == "lower":
|
|
35
|
-
normalized_key: str = field.key.lower()
|
|
36
|
-
else:
|
|
37
|
-
normalized_key: str = field.key.upper()
|
|
38
|
-
# if the normalized key is already present, apply "last one wins"
|
|
39
|
-
# otherwise preserve insertion order
|
|
40
|
-
# if a key is overwritten, emit a detailed warning
|
|
41
|
-
# if performance is a concern, we could emit a warning with only {entry.key}
|
|
42
|
-
# to remove "seen_normalized_keys" and this if statement
|
|
43
|
-
if normalized_key in seen_normalized_keys:
|
|
44
|
-
logger.warning(
|
|
45
|
-
f"NormalizeFieldKeys: in entry '{entry.key}': "
|
|
46
|
-
+ f"duplicate normalized key '{normalized_key}' "
|
|
47
|
-
+ f"(original '{field.key}'); overriding previous value"
|
|
48
|
-
)
|
|
49
|
-
seen_normalized_keys.add(normalized_key)
|
|
50
|
-
field.key = normalized_key
|
|
51
|
-
new_fields_dict[normalized_key] = field
|
|
52
|
-
|
|
53
|
-
new_fields: List[Field] = list(new_fields_dict.values())
|
|
54
|
-
entry.fields = new_fields
|
|
55
|
-
|
|
56
|
-
return entry
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
class AddUrlToFieldValueInEntry(BlockMiddleware):
|
|
60
|
-
"""Add url link to title."""
|
|
61
|
-
|
|
62
|
-
# docstr-coverage: inherited
|
|
63
|
-
def __init__(self, field_key: str, allow_inplace_modification: bool = True):
|
|
64
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
65
|
-
|
|
66
|
-
self.field_key = field_key
|
|
67
|
-
|
|
68
|
-
# docstr-coverage: inherited
|
|
69
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
70
|
-
url = ""
|
|
71
|
-
if "doi" in entry:
|
|
72
|
-
url = entry["doi"]
|
|
73
|
-
if (len(url) != 0) and (not re.match(r"https*://", url)):
|
|
74
|
-
url = f"https://doi.org/{url}"
|
|
75
|
-
elif "url" in entry:
|
|
76
|
-
url = entry["url"]
|
|
77
|
-
|
|
78
|
-
if (len(url) != 0) and self.field_key in entry:
|
|
79
|
-
entry[self.field_key] = r"\href{" + str(url) + "}" + "{" + entry[self.field_key] + "}"
|
|
80
|
-
return entry
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
class NormalizeFieldValuesInEntry(BlockMiddleware):
|
|
84
|
-
"""Sentence field values."""
|
|
85
|
-
|
|
86
|
-
# docstr-coverage: inherited
|
|
87
|
-
def __init__(self, field_key: str, sentence_title: str, allow_inplace_modification: bool = True):
|
|
88
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
89
|
-
|
|
90
|
-
self.field_key = field_key
|
|
91
|
-
self.sentence_title = sentence_title
|
|
92
|
-
|
|
93
|
-
# docstr-coverage: inherited
|
|
94
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
95
|
-
if self.field_key in entry:
|
|
96
|
-
if self.sentence_title == "sentence":
|
|
97
|
-
entry[self.field_key] = self.generate_standard_sentence_case(entry[self.field_key])
|
|
98
|
-
if self.sentence_title == "title":
|
|
99
|
-
entry[self.field_key] = self.generate_standard_title_case(entry[self.field_key])
|
|
100
|
-
return entry
|
|
101
|
-
|
|
102
|
-
@staticmethod
|
|
103
|
-
def __upper_or_lower_first_letter(input_str: str, flag: str) -> str:
|
|
104
|
-
"""Upper or lower first letter.
|
|
105
|
-
|
|
106
|
-
Check whether the first is in the a-zA-Z and then UPPER or LOWER it.
|
|
107
|
-
flag = upper
|
|
108
|
-
Input: about; $food; About; aBout
|
|
109
|
-
Output: About; $food; About; ABout
|
|
110
|
-
flag = lower
|
|
111
|
-
Input: About; $food; about; ABout
|
|
112
|
-
Output: about; $food; about; aBout
|
|
113
|
-
"""
|
|
114
|
-
new_input_str = input_str.strip()
|
|
115
|
-
if new_input_str and re.search(r"[a-zA-Z]", new_input_str[0]):
|
|
116
|
-
if flag == "lower":
|
|
117
|
-
new_input_str = new_input_str[0].lower() + new_input_str[1:]
|
|
118
|
-
elif flag == "upper":
|
|
119
|
-
new_input_str = new_input_str[0].upper() + new_input_str[1:]
|
|
120
|
-
else:
|
|
121
|
-
new_input_str = input_str
|
|
122
|
-
return new_input_str
|
|
123
|
-
|
|
124
|
-
def __lower_first_letter_and_others_not_contain_uppers(self, input_str: str) -> str:
|
|
125
|
-
"""Lower.
|
|
126
|
-
|
|
127
|
-
Input: About; A; $about; ABOUT; aBOUT
|
|
128
|
-
Output: about; a; $about; ABOUT; aBOUT
|
|
129
|
-
"""
|
|
130
|
-
new_input_str = input_str.strip()
|
|
131
|
-
if new_input_str and (not re.search(r"[A-Z]", new_input_str[1:])): # Others not contain upper letter
|
|
132
|
-
input_str = self.__upper_or_lower_first_letter(input_str, "lower") # Lower
|
|
133
|
-
return input_str
|
|
134
|
-
|
|
135
|
-
def __upper_first_letter_and_others_not_contain_uppers(self, input_str: str) -> str:
|
|
136
|
-
"""Upper.
|
|
137
|
-
|
|
138
|
-
Input: about; a; $about; ABOUT; abOUT
|
|
139
|
-
Output: About; A; $about; ABOUT; abOUT
|
|
140
|
-
"""
|
|
141
|
-
new_input_str = input_str.strip()
|
|
142
|
-
if new_input_str.lower() in SKIP_WORD_IN_CITATION_KEY:
|
|
143
|
-
return new_input_str.lower()
|
|
144
|
-
|
|
145
|
-
if new_input_str and (not re.search(r"[A-Z]", new_input_str[1:])): # Others not contain upper letter
|
|
146
|
-
input_str = self.__upper_or_lower_first_letter(input_str, "upper") # upper
|
|
147
|
-
return input_str
|
|
148
|
-
|
|
149
|
-
def __generate_new_case_title(self, old_title: str, flag: str) -> str:
|
|
150
|
-
"""Generate new title."""
|
|
151
|
-
old_list, new_list = re.split(r"\s+", old_title), []
|
|
152
|
-
for i in range(len(old_list)):
|
|
153
|
-
old_str = old_list[i]
|
|
154
|
-
if re.search(r"-", old_str):
|
|
155
|
-
temp_list, new_temp_list = re.split("-", old_str), []
|
|
156
|
-
if i == 0: # for the first element
|
|
157
|
-
new_temp_list = [self.__upper_or_lower_first_letter(temp_list[0], "upper")]
|
|
158
|
-
temp_list = temp_list[1:]
|
|
159
|
-
for t in temp_list:
|
|
160
|
-
if len(t.strip()) == 1:
|
|
161
|
-
new_temp_list.append(t) # not change
|
|
162
|
-
else:
|
|
163
|
-
if flag == "sentence":
|
|
164
|
-
new_temp_list.append(self.__lower_first_letter_and_others_not_contain_uppers(t))
|
|
165
|
-
elif flag == "title":
|
|
166
|
-
new_temp_list.append(self.__upper_first_letter_and_others_not_contain_uppers(t))
|
|
167
|
-
else:
|
|
168
|
-
pass
|
|
169
|
-
new_list.append("-".join(new_temp_list))
|
|
170
|
-
else:
|
|
171
|
-
if i == 0:
|
|
172
|
-
new_list.append(self.__upper_or_lower_first_letter(old_str, "upper"))
|
|
173
|
-
else:
|
|
174
|
-
if flag == "sentence":
|
|
175
|
-
new_list.append(self.__lower_first_letter_and_others_not_contain_uppers(old_str))
|
|
176
|
-
elif flag == "title":
|
|
177
|
-
new_list.append(self.__upper_first_letter_and_others_not_contain_uppers(old_str))
|
|
178
|
-
else:
|
|
179
|
-
pass
|
|
180
|
-
return " ".join(new_list)
|
|
181
|
-
|
|
182
|
-
def _generate_standard_title(self, title_content: str, flag: str) -> str:
|
|
183
|
-
title_list, relative_flags = [title_content], []
|
|
184
|
-
flags = [r":\s+", r"\s+-\s+", r"\s+--\s+", r"\s+—\s+", r"\s+——\s+", r"\s+–\s+", r"\s+––\s+"]
|
|
185
|
-
for i in range(len(flags)):
|
|
186
|
-
new_title_list = []
|
|
187
|
-
for j in range(len(title_list)):
|
|
188
|
-
temp_list = re.split(flags[i], title_list[j])
|
|
189
|
-
new_title_list.extend(temp_list)
|
|
190
|
-
if len(temp_list) > 1:
|
|
191
|
-
relative_flags[j:j] = [flags[i].replace(r"\s+", " ") for _ in range(len(temp_list) - 1)]
|
|
192
|
-
title_list = new_title_list
|
|
193
|
-
|
|
194
|
-
new_title = ""
|
|
195
|
-
title_list = [self.__generate_new_case_title(t.strip(), flag) for t in title_list]
|
|
196
|
-
for i in range(ll := len(title_list)):
|
|
197
|
-
new_title = new_title + title_list[i]
|
|
198
|
-
if i < (ll - 1):
|
|
199
|
-
new_title = new_title + relative_flags[i]
|
|
200
|
-
return new_title
|
|
201
|
-
|
|
202
|
-
# --------- --------- --------- --------- --------- --------- --------- --------- --------- #
|
|
203
|
-
def generate_standard_sentence_case(self, title_content: str) -> str:
|
|
204
|
-
"""Generate standard title.
|
|
205
|
-
|
|
206
|
-
"Hello, world".upper() # HELLO WORLD
|
|
207
|
-
"HELLO, WORLD".lower() # hello world
|
|
208
|
-
"hello, world".capitalize() # Hello, world
|
|
209
|
-
"hello, world".title() # Hello, World
|
|
210
|
-
"""
|
|
211
|
-
return self._generate_standard_title(title_content, "sentence")
|
|
212
|
-
|
|
213
|
-
# --------- --------- --------- --------- --------- --------- --------- --------- --------- #
|
|
214
|
-
def generate_standard_title_case(self, title_content: str) -> str:
|
|
215
|
-
"""Generate standard title.
|
|
216
|
-
|
|
217
|
-
"Hello, world".upper() # HELLO WORLD
|
|
218
|
-
"HELLO, WORLD".lower() # hello world
|
|
219
|
-
"hello, world".capitalize() # Hello, world
|
|
220
|
-
"hello, world".title() # Hello, World
|
|
221
|
-
"""
|
|
222
|
-
return self._generate_standard_title(title_content, "title")
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
from typing import List, Optional
|
|
2
|
-
|
|
3
|
-
from ...library import Library
|
|
4
|
-
from ...model import Block, Entry
|
|
5
|
-
from ..middleware import BlockMiddleware
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class DeleteFieldsInEntry(BlockMiddleware):
|
|
9
|
-
"""Delete fields by user."""
|
|
10
|
-
|
|
11
|
-
def __init__(
|
|
12
|
-
self, delete_field_keys: List[str], entry_type: Optional[str] = None, allow_inplace_modification: bool = True
|
|
13
|
-
):
|
|
14
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
15
|
-
|
|
16
|
-
self.entry_type = entry_type
|
|
17
|
-
self.delete_field_keys = delete_field_keys
|
|
18
|
-
|
|
19
|
-
# docstr-coverage: inherited
|
|
20
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
21
|
-
if self.entry_type is None:
|
|
22
|
-
entry_type = entry.entry_type
|
|
23
|
-
else:
|
|
24
|
-
entry_type = self.entry_type
|
|
25
|
-
|
|
26
|
-
if entry_type == entry.entry_type:
|
|
27
|
-
for key in self.delete_field_keys:
|
|
28
|
-
del entry[key]
|
|
29
|
-
return entry
|
|
30
|
-
|
|
31
|
-
# docstr-coverage: inherited
|
|
32
|
-
@classmethod
|
|
33
|
-
def metadata_key(cls) -> str:
|
|
34
|
-
return "delete_custom_fields"
|
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
import copy
|
|
2
|
-
from typing import List
|
|
3
|
-
|
|
4
|
-
from ...library import Library
|
|
5
|
-
from ...model import Block, Entry
|
|
6
|
-
from ..middleware import BlockMiddleware
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class KeepFieldsInEntry(BlockMiddleware):
|
|
10
|
-
"""Keep the fields of an entry according to a custom list of field keys provided by user."""
|
|
11
|
-
|
|
12
|
-
def __init__(self, entry_type: str, keep_field_keys: List[str], allow_inplace_modification: bool = True):
|
|
13
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
14
|
-
|
|
15
|
-
self.entry_type = entry_type
|
|
16
|
-
self.keep_field_keys = keep_field_keys
|
|
17
|
-
|
|
18
|
-
# docstr-coverage: inherited
|
|
19
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
20
|
-
if self.entry_type == entry.entry_type:
|
|
21
|
-
keep_field_keys = copy.deepcopy(self.keep_field_keys)
|
|
22
|
-
if ("editor" in entry) and ("author" not in entry) and ("author" in self.keep_field_keys):
|
|
23
|
-
keep_field_keys.append("editor")
|
|
24
|
-
|
|
25
|
-
delete_field_keys = [k for k in entry.fields_dict.keys() if k not in keep_field_keys]
|
|
26
|
-
for key in delete_field_keys:
|
|
27
|
-
del entry[key]
|
|
28
|
-
return entry
|
|
29
|
-
|
|
30
|
-
# docstr-coverage: inherited
|
|
31
|
-
@classmethod
|
|
32
|
-
def metadata_key(cls) -> str:
|
|
33
|
-
return "keep_fields_custom"
|
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
from typing import Tuple
|
|
2
|
-
|
|
3
|
-
from ...library import Library
|
|
4
|
-
from ...model import Block, Entry
|
|
5
|
-
from ..middleware import BlockMiddleware
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class SortFieldsAlphabeticallyMiddleware(BlockMiddleware):
|
|
9
|
-
"""Sort the fields of an entry alphabetically by key."""
|
|
10
|
-
|
|
11
|
-
def __init__(self, allow_inplace_modification: bool = True):
|
|
12
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
13
|
-
|
|
14
|
-
# docstr-coverage: inherited
|
|
15
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
16
|
-
entry.fields = sorted(entry.fields, key=lambda f: f.key)
|
|
17
|
-
entry.parser_metadata[self.metadata_key()] = True
|
|
18
|
-
return entry
|
|
19
|
-
|
|
20
|
-
# docstr-coverage: inherited
|
|
21
|
-
@classmethod
|
|
22
|
-
def metadata_key(cls) -> str:
|
|
23
|
-
return "sorted_fields_alphabetically"
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
class SortFieldsCustomMiddleware(BlockMiddleware):
|
|
27
|
-
"""Sort the fields of an entry according to a custom order provided by user.
|
|
28
|
-
|
|
29
|
-
The order is a list of field keys. Fields not in the list are put at the end.
|
|
30
|
-
"""
|
|
31
|
-
|
|
32
|
-
def __init__(
|
|
33
|
-
self,
|
|
34
|
-
order: Tuple[str, ...],
|
|
35
|
-
case_sensitive: bool = False,
|
|
36
|
-
allow_inplace_modification: bool = True,
|
|
37
|
-
):
|
|
38
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
39
|
-
self._case_sensitive = case_sensitive
|
|
40
|
-
if not case_sensitive:
|
|
41
|
-
self._order = [x.lower() for x in order]
|
|
42
|
-
else:
|
|
43
|
-
self._order = order
|
|
44
|
-
|
|
45
|
-
if len(self._order) != len(set(self._order)):
|
|
46
|
-
duplicate_keys = {x for x in self._order if self._order.count(x) > 1}
|
|
47
|
-
raise ValueError(
|
|
48
|
-
"Order list must not contain duplicates. "
|
|
49
|
-
"The following keys are duplicated: "
|
|
50
|
-
f"{', '.join(duplicate_keys)}"
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
# docstr-coverage: inherited
|
|
54
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
55
|
-
def _sort_key(field):
|
|
56
|
-
try:
|
|
57
|
-
key = field.key.lower() if not self._case_sensitive else field.key
|
|
58
|
-
return self._order.index(key)
|
|
59
|
-
except ValueError:
|
|
60
|
-
# If the field is not in the order list, put it at the end
|
|
61
|
-
return len(self._order)
|
|
62
|
-
|
|
63
|
-
entry.fields = sorted(entry.fields, key=_sort_key)
|
|
64
|
-
entry.parser_metadata[self.metadata_key()] = self._order
|
|
65
|
-
return entry
|
|
66
|
-
|
|
67
|
-
# docstr-coverage: inherited
|
|
68
|
-
@classmethod
|
|
69
|
-
def metadata_key(cls) -> str:
|
|
70
|
-
return "sorted_fields_custom"
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
from ...library import Library
|
|
2
|
-
from ...model import Block, Entry
|
|
3
|
-
from ..middleware import BlockMiddleware
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class NormalizeEntryTypes(BlockMiddleware):
|
|
7
|
-
"""Normalize Entry types."""
|
|
8
|
-
|
|
9
|
-
def __init__(self, allow_inplace_modification: bool = True):
|
|
10
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
11
|
-
|
|
12
|
-
# docstr-coverage: inherited
|
|
13
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
14
|
-
entry.entry_type = entry.entry_type.lower()
|
|
15
|
-
return entry
|
|
@@ -1,113 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
|
|
3
|
-
from ...library import Library
|
|
4
|
-
from ...model import Block, Entry
|
|
5
|
-
from ..middleware import BlockMiddleware
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class AbbreviateJournalBooktitle(BlockMiddleware):
|
|
9
|
-
"""Abbreviate the field `journal` or `booktitle` value of an entry."""
|
|
10
|
-
|
|
11
|
-
def __init__(
|
|
12
|
-
self,
|
|
13
|
-
full_abbr_article_dict: dict,
|
|
14
|
-
full_abbr_inproceedings_dict: dict,
|
|
15
|
-
abbr_index_article_for_abbr: int,
|
|
16
|
-
abbr_index_inproceedings_for_abbr: int,
|
|
17
|
-
full_names_in_json: str,
|
|
18
|
-
abbr_names_in_json: str,
|
|
19
|
-
allow_inplace_modification: bool = True
|
|
20
|
-
):
|
|
21
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification)
|
|
22
|
-
|
|
23
|
-
self.full_abbr_article_dict = full_abbr_article_dict
|
|
24
|
-
self.full_abbr_inproceedings_dict = full_abbr_inproceedings_dict
|
|
25
|
-
self.abbr_index_article_for_abbr = abbr_index_article_for_abbr
|
|
26
|
-
self.abbr_index_inproceedings_for_abbr = abbr_index_inproceedings_for_abbr
|
|
27
|
-
self.full_names_in_json = full_names_in_json
|
|
28
|
-
self.abbr_names_in_json = abbr_names_in_json
|
|
29
|
-
|
|
30
|
-
# docstr-coverage: inherited
|
|
31
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
32
|
-
return self.abbreviate_journal_booktitle(entry)
|
|
33
|
-
|
|
34
|
-
def abbreviate_journal_booktitle(self, entry: Entry) -> Entry:
|
|
35
|
-
"""Abbreviate."""
|
|
36
|
-
if entry.entry_type.lower() == "article":
|
|
37
|
-
full_abbr_dict = self.full_abbr_article_dict
|
|
38
|
-
field_key = "journal"
|
|
39
|
-
abbr_index = self.abbr_index_article_for_abbr
|
|
40
|
-
elif entry.entry_type.lower() == "inproceedings":
|
|
41
|
-
full_abbr_dict = self.full_abbr_inproceedings_dict
|
|
42
|
-
field_key = "booktitle"
|
|
43
|
-
abbr_index = self.abbr_index_inproceedings_for_abbr
|
|
44
|
-
else:
|
|
45
|
-
return entry
|
|
46
|
-
|
|
47
|
-
if abbr_index not in [1, 2]:
|
|
48
|
-
return entry
|
|
49
|
-
|
|
50
|
-
# Case 1
|
|
51
|
-
if abbr_index == 2:
|
|
52
|
-
regex = re.compile(r"([a-zA-Z])_([\w\-]+)_(.*)")
|
|
53
|
-
if mch := regex.search(entry.key):
|
|
54
|
-
if mch.group(1).lower() in ["j", "c"]:
|
|
55
|
-
entry[field_key] = mch.group(2)
|
|
56
|
-
return entry
|
|
57
|
-
|
|
58
|
-
# Case 2
|
|
59
|
-
# obtain new_dict
|
|
60
|
-
abbr_dict_dict = {}
|
|
61
|
-
for publisher in full_abbr_dict:
|
|
62
|
-
abbr_dict_dict.update({abbr: full_abbr_dict[publisher][abbr] for abbr in full_abbr_dict[publisher]})
|
|
63
|
-
|
|
64
|
-
field_content = entry[field_key] if field_key in entry else ""
|
|
65
|
-
field_content = re.sub(r"\(.*\)", "", field_content).strip()
|
|
66
|
-
|
|
67
|
-
if not field_content:
|
|
68
|
-
return entry
|
|
69
|
-
|
|
70
|
-
# match
|
|
71
|
-
content_list = []
|
|
72
|
-
for abbr in abbr_dict_dict:
|
|
73
|
-
full_name_list = abbr_dict_dict[abbr].get(self.full_names_in_json, [])
|
|
74
|
-
long_abbr_name_list = abbr_dict_dict[abbr].get(self.abbr_names_in_json, [])
|
|
75
|
-
|
|
76
|
-
# long abbreviation
|
|
77
|
-
if abbr_index == 1:
|
|
78
|
-
for full, long_abbr in zip(full_name_list, long_abbr_name_list):
|
|
79
|
-
if re.match('{' + full + '}', '{' + field_content + '}', re.I):
|
|
80
|
-
content_list.append(long_abbr)
|
|
81
|
-
|
|
82
|
-
# short abbreviation
|
|
83
|
-
elif abbr_index == 2:
|
|
84
|
-
full_abbr = []
|
|
85
|
-
full_abbr.extend(full_name_list)
|
|
86
|
-
full_abbr.extend(long_abbr_name_list)
|
|
87
|
-
|
|
88
|
-
if re.match("{" + rf'({"|".join(full_abbr)})' + "}", "{" + field_content + "}", flags=re.I):
|
|
89
|
-
content_list.append(abbr)
|
|
90
|
-
|
|
91
|
-
# check
|
|
92
|
-
content_list = list(set(content_list))
|
|
93
|
-
if len(content_list) > 1:
|
|
94
|
-
print(f"Multiple match: {content_list} for {field_content}.")
|
|
95
|
-
elif len(content_list) == 1:
|
|
96
|
-
entry[field_key] = content_list[0]
|
|
97
|
-
return entry
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
class DeleteRedundantInJournalBooktitle(BlockMiddleware):
|
|
101
|
-
"""Delete redundant part such as `(CEC)` in field `journal` or `booktitle` value of an entry."""
|
|
102
|
-
|
|
103
|
-
def __init__(self, allow_inplace_modification: bool = True):
|
|
104
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
105
|
-
|
|
106
|
-
# docstr-coverage: inherited
|
|
107
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
108
|
-
if entry.entry_type.lower() in ["article", "inproceedings"]:
|
|
109
|
-
for i in ["journal", "booktitle"]:
|
|
110
|
-
value = entry[i] if i in entry else ""
|
|
111
|
-
if value:
|
|
112
|
-
entry[i] = re.sub(r"\(.*\)", "", value).strip()
|
|
113
|
-
return entry
|
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
from pyadvtools.core.convert import convert_str_month_to_number_month
|
|
2
|
-
|
|
3
|
-
from ...library import Library
|
|
4
|
-
from ...model import Block, Entry
|
|
5
|
-
from ..middleware import BlockMiddleware
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class ConvertStrMonthToInt(BlockMiddleware):
|
|
9
|
-
"""Convert the field `month` value of an entry when it is str to int type if possible."""
|
|
10
|
-
|
|
11
|
-
def __init__(self, allow_inplace_modification: bool = True):
|
|
12
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
13
|
-
|
|
14
|
-
# docstr-coverage: inherited
|
|
15
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
16
|
-
if "month" in entry:
|
|
17
|
-
entry["month"] = convert_str_month_to_number_month(entry["month"])
|
|
18
|
-
return entry
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class ExtractYear(BlockMiddleware):
|
|
22
|
-
"""Convert the field `month` value of an entry when it is str to int type if possible."""
|
|
23
|
-
|
|
24
|
-
def __init__(self, allow_inplace_modification: bool = True):
|
|
25
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
26
|
-
|
|
27
|
-
# docstr-coverage: inherited
|
|
28
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
29
|
-
year = entry["year"] if "year" in entry else ""
|
|
30
|
-
if year:
|
|
31
|
-
year_list = [i for j in year.split("/") for i in j.split("-")]
|
|
32
|
-
year_list = sorted(set(year_list), key=len, reverse=True)
|
|
33
|
-
entry["year"] = f"{year_list[0]}"
|
|
34
|
-
return entry
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
from ...library import Library
|
|
2
|
-
from ...model import Block, Entry
|
|
3
|
-
from ..middleware import BlockMiddleware
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class ConvertStrNumberVolumeToInt(BlockMiddleware):
|
|
7
|
-
"""Convert the field `number` or `volume` value of an entry when it is str to int type if possible."""
|
|
8
|
-
|
|
9
|
-
def __init__(self, allow_inplace_modification: bool = True):
|
|
10
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
11
|
-
|
|
12
|
-
# docstr-coverage: inherited
|
|
13
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
14
|
-
for i in ["number", "volume"]:
|
|
15
|
-
value = entry[i] if i in entry else ""
|
|
16
|
-
if value:
|
|
17
|
-
try:
|
|
18
|
-
entry[i] = f"{int(value)}"
|
|
19
|
-
except ValueError:
|
|
20
|
-
pass
|
|
21
|
-
return entry
|
|
@@ -1,28 +0,0 @@
|
|
|
1
|
-
from ...library import Library
|
|
2
|
-
from ...model import Block, Entry
|
|
3
|
-
from ..middleware import BlockMiddleware
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class NormalizePagesInEntry(BlockMiddleware):
|
|
7
|
-
"""Normalize field `pages` of an entry by deleting redundant part or generating when not existed."""
|
|
8
|
-
|
|
9
|
-
def __init__(self, allow_inplace_modification: bool = True):
|
|
10
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
11
|
-
|
|
12
|
-
# docstr-coverage: inherited
|
|
13
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
14
|
-
if "pages" in entry:
|
|
15
|
-
# 5-10-5-10 -> 5-10
|
|
16
|
-
page_list = []
|
|
17
|
-
for page in entry["pages"].split("-"): # English hyphen
|
|
18
|
-
for p in page.strip().split("—"): # Chinese hyphen
|
|
19
|
-
if p.strip():
|
|
20
|
-
page_list.append(p.strip())
|
|
21
|
-
|
|
22
|
-
page_list = sorted(set(page_list), key=page_list.index)
|
|
23
|
-
entry["pages"] = "-".join(page_list)
|
|
24
|
-
else:
|
|
25
|
-
# pages = {12:1-37}
|
|
26
|
-
if "articleno" in entry and "numpages" in entry:
|
|
27
|
-
entry["pages"] = f'{entry["articleno"]}:1-{entry["numpages"]}'
|
|
28
|
-
return entry
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
import re
|
|
2
|
-
|
|
3
|
-
from ...library import Library
|
|
4
|
-
from ...model import Block, Entry
|
|
5
|
-
from ..middleware import BlockMiddleware
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class NormalizeTitleInEntry(BlockMiddleware):
|
|
9
|
-
r"""Normalize field `title` of an entry by deleting \href{}{} if existed."""
|
|
10
|
-
|
|
11
|
-
def __init__(self, allow_inplace_modification: bool = True):
|
|
12
|
-
super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
|
|
13
|
-
|
|
14
|
-
# docstr-coverage: inherited
|
|
15
|
-
def transform_entry(self, entry: Entry, library: Library) -> Block:
|
|
16
|
-
regex = re.compile(r"\\href{(.*)}{(.*)}")
|
|
17
|
-
if "title" in entry:
|
|
18
|
-
if mch := regex.search(entry["title"]):
|
|
19
|
-
entry["title"] = mch.group(2)
|
|
20
|
-
return entry
|