pyeasyphd 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyeasyphd might be problematic. Click here for more details.

Files changed (75) hide show
  1. pyeasyphd/.python-version +1 -1
  2. pyeasyphd/main/__init__.py +0 -4
  3. pyeasyphd/main/basic_input.py +7 -63
  4. pyeasyphd/main/python_run_md.py +3 -3
  5. pyeasyphd/main/python_run_tex.py +1 -1
  6. pyeasyphd/pyeasyphd.sublime-settings +2 -160
  7. pyeasyphd/tools/__init__.py +1 -16
  8. pyeasyphd/tools/generate/generate_from_bibs.py +54 -330
  9. pyeasyphd/tools/generate/generate_html.py +122 -0
  10. pyeasyphd/tools/generate/generate_library.py +188 -0
  11. pyeasyphd/tools/generate/generate_links.py +13 -4
  12. pyeasyphd/tools/py_run_bib_md_tex.py +12 -13
  13. pyeasyphd/tools/search/search_base.py +8 -5
  14. pyeasyphd/tools/search/search_core.py +4 -3
  15. pyeasyphd/tools/search/search_keywords.py +1 -1
  16. pyeasyphd/tools/search/search_writers.py +8 -5
  17. {pyeasyphd-0.1.0.dist-info → pyeasyphd-0.1.2.dist-info}/METADATA +3 -6
  18. pyeasyphd-0.1.2.dist-info/RECORD +27 -0
  19. pyeasyphd/bib/__init__.py +0 -1
  20. pyeasyphd/bib/bibtexbase/__init__.py +0 -7
  21. pyeasyphd/bib/bibtexbase/standardize/_base.py +0 -36
  22. pyeasyphd/bib/bibtexbase/standardize/default_data.py +0 -97
  23. pyeasyphd/bib/bibtexbase/standardize/do_on_bib.py +0 -54
  24. pyeasyphd/bib/bibtexbase/standardize/do_on_comment_block.py +0 -38
  25. pyeasyphd/bib/bibtexbase/standardize/do_on_entry_block.py +0 -310
  26. pyeasyphd/bib/bibtexbase/standardize/do_on_preamble_block.py +0 -35
  27. pyeasyphd/bib/bibtexbase/standardize/do_on_string_block.py +0 -34
  28. pyeasyphd/bib/bibtexbase/standardize_bib.py +0 -75
  29. pyeasyphd/bib/bibtexparser/__init__.py +0 -47
  30. pyeasyphd/bib/bibtexparser/bibtex_format.py +0 -87
  31. pyeasyphd/bib/bibtexparser/exceptions.py +0 -64
  32. pyeasyphd/bib/bibtexparser/library.py +0 -207
  33. pyeasyphd/bib/bibtexparser/middlewares/block/add.py +0 -94
  34. pyeasyphd/bib/bibtexparser/middlewares/block/authors.py +0 -22
  35. pyeasyphd/bib/bibtexparser/middlewares/block/doi_url.py +0 -62
  36. pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_normalize.py +0 -47
  37. pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_keys_replace.py +0 -31
  38. pyeasyphd/bib/bibtexparser/middlewares/block/entry_field_values_normalize.py +0 -222
  39. pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_delete.py +0 -34
  40. pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_keep.py +0 -33
  41. pyeasyphd/bib/bibtexparser/middlewares/block/entry_fields_sort.py +0 -70
  42. pyeasyphd/bib/bibtexparser/middlewares/block/entry_types.py +0 -15
  43. pyeasyphd/bib/bibtexparser/middlewares/block/journal_booktitle.py +0 -113
  44. pyeasyphd/bib/bibtexparser/middlewares/block/month_year.py +0 -34
  45. pyeasyphd/bib/bibtexparser/middlewares/block/number_volume.py +0 -21
  46. pyeasyphd/bib/bibtexparser/middlewares/block/pages.py +0 -28
  47. pyeasyphd/bib/bibtexparser/middlewares/block/title.py +0 -20
  48. pyeasyphd/bib/bibtexparser/middlewares/library/generating_entrykeys.py +0 -98
  49. pyeasyphd/bib/bibtexparser/middlewares/library/keeping_blocks.py +0 -29
  50. pyeasyphd/bib/bibtexparser/middlewares/library/sorting_blocks.py +0 -124
  51. pyeasyphd/bib/bibtexparser/middlewares/middleware.py +0 -222
  52. pyeasyphd/bib/bibtexparser/middlewares/parsestack.py +0 -13
  53. pyeasyphd/bib/bibtexparser/middlewares/utils.py +0 -226
  54. pyeasyphd/bib/bibtexparser/middlewares_library_to_library.py +0 -414
  55. pyeasyphd/bib/bibtexparser/middlewares_library_to_str.py +0 -42
  56. pyeasyphd/bib/bibtexparser/middlewares_str_to_library.py +0 -35
  57. pyeasyphd/bib/bibtexparser/middlewares_str_to_str.py +0 -29
  58. pyeasyphd/bib/bibtexparser/model.py +0 -481
  59. pyeasyphd/bib/bibtexparser/splitter.py +0 -151
  60. pyeasyphd/bib/core/__init__.py +0 -18
  61. pyeasyphd/bib/core/convert_library_to_library.py +0 -31
  62. pyeasyphd/bib/core/convert_library_to_str.py +0 -199
  63. pyeasyphd/bib/core/convert_str_to_library.py +0 -34
  64. pyeasyphd/bib/core/convert_str_to_str.py +0 -27
  65. pyeasyphd/main/python_run_bib.py +0 -73
  66. pyeasyphd/main/python_writers.py +0 -212
  67. pyeasyphd/tools/compare/compare_bibs.py +0 -234
  68. pyeasyphd/tools/experiments_base.py +0 -203
  69. pyeasyphd/tools/format_save_bibs.py +0 -178
  70. pyeasyphd/tools/replace/replace.py +0 -81
  71. pyeasyphd/tools/spider/process_spider_bib.py +0 -247
  72. pyeasyphd/tools/spider/process_spider_url.py +0 -75
  73. pyeasyphd/tools/spider/process_spider_url_bib.py +0 -62
  74. pyeasyphd-0.1.0.dist-info/RECORD +0 -80
  75. {pyeasyphd-0.1.0.dist-info → pyeasyphd-0.1.2.dist-info}/WHEEL +0 -0
@@ -1,222 +0,0 @@
1
- import logging
2
- import re
3
- from typing import Dict, List, Set
4
-
5
- from ...library import Library
6
- from ...model import Block, Entry, Field
7
- from ..middleware import BlockMiddleware
8
- from ..utils import SKIP_WORD_IN_CITATION_KEY
9
-
10
- logger = logging.getLogger(__name__)
11
-
12
-
13
- class NormalizeEntryFieldValues(BlockMiddleware):
14
- """Normalize some field values (journal and booktitle) to upper case."""
15
-
16
- def __init__(
17
- self,
18
- field_keys: List[str] = ["journal", "booktitle"],
19
- title_lower_upper: str = 'upper',
20
- allow_inplace_modification: bool = True
21
- ):
22
- super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
23
-
24
- self._field_keys = field_keys
25
- self.title_lower_upper = title_lower_upper
26
-
27
- # docstr-coverage: inherited
28
- def transform_entry(self, entry: Entry, library: Library) -> Entry:
29
- seen_normalized_keys: Set[str] = set()
30
- new_fields_dict: Dict[str, Field] = {}
31
- for field in entry.fields:
32
- if self.title_lower_upper == "upper":
33
- normalized_key: str = field.key.upper()
34
- elif self.title_lower_upper == "lower":
35
- normalized_key: str = field.key.lower()
36
- else:
37
- normalized_key: str = field.key.upper()
38
- # if the normalized key is already present, apply "last one wins"
39
- # otherwise preserve insertion order
40
- # if a key is overwritten, emit a detailed warning
41
- # if performance is a concern, we could emit a warning with only {entry.key}
42
- # to remove "seen_normalized_keys" and this if statement
43
- if normalized_key in seen_normalized_keys:
44
- logger.warning(
45
- f"NormalizeFieldKeys: in entry '{entry.key}': "
46
- + f"duplicate normalized key '{normalized_key}' "
47
- + f"(original '{field.key}'); overriding previous value"
48
- )
49
- seen_normalized_keys.add(normalized_key)
50
- field.key = normalized_key
51
- new_fields_dict[normalized_key] = field
52
-
53
- new_fields: List[Field] = list(new_fields_dict.values())
54
- entry.fields = new_fields
55
-
56
- return entry
57
-
58
-
59
- class AddUrlToFieldValueInEntry(BlockMiddleware):
60
- """Add url link to title."""
61
-
62
- # docstr-coverage: inherited
63
- def __init__(self, field_key: str, allow_inplace_modification: bool = True):
64
- super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
65
-
66
- self.field_key = field_key
67
-
68
- # docstr-coverage: inherited
69
- def transform_entry(self, entry: Entry, library: Library) -> Block:
70
- url = ""
71
- if "doi" in entry:
72
- url = entry["doi"]
73
- if (len(url) != 0) and (not re.match(r"https*://", url)):
74
- url = f"https://doi.org/{url}"
75
- elif "url" in entry:
76
- url = entry["url"]
77
-
78
- if (len(url) != 0) and self.field_key in entry:
79
- entry[self.field_key] = r"\href{" + str(url) + "}" + "{" + entry[self.field_key] + "}"
80
- return entry
81
-
82
-
83
- class NormalizeFieldValuesInEntry(BlockMiddleware):
84
- """Sentence field values."""
85
-
86
- # docstr-coverage: inherited
87
- def __init__(self, field_key: str, sentence_title: str, allow_inplace_modification: bool = True):
88
- super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
89
-
90
- self.field_key = field_key
91
- self.sentence_title = sentence_title
92
-
93
- # docstr-coverage: inherited
94
- def transform_entry(self, entry: Entry, library: Library) -> Block:
95
- if self.field_key in entry:
96
- if self.sentence_title == "sentence":
97
- entry[self.field_key] = self.generate_standard_sentence_case(entry[self.field_key])
98
- if self.sentence_title == "title":
99
- entry[self.field_key] = self.generate_standard_title_case(entry[self.field_key])
100
- return entry
101
-
102
- @staticmethod
103
- def __upper_or_lower_first_letter(input_str: str, flag: str) -> str:
104
- """Upper or lower first letter.
105
-
106
- Check whether the first is in the a-zA-Z and then UPPER or LOWER it.
107
- flag = upper
108
- Input: about; $food; About; aBout
109
- Output: About; $food; About; ABout
110
- flag = lower
111
- Input: About; $food; about; ABout
112
- Output: about; $food; about; aBout
113
- """
114
- new_input_str = input_str.strip()
115
- if new_input_str and re.search(r"[a-zA-Z]", new_input_str[0]):
116
- if flag == "lower":
117
- new_input_str = new_input_str[0].lower() + new_input_str[1:]
118
- elif flag == "upper":
119
- new_input_str = new_input_str[0].upper() + new_input_str[1:]
120
- else:
121
- new_input_str = input_str
122
- return new_input_str
123
-
124
- def __lower_first_letter_and_others_not_contain_uppers(self, input_str: str) -> str:
125
- """Lower.
126
-
127
- Input: About; A; $about; ABOUT; aBOUT
128
- Output: about; a; $about; ABOUT; aBOUT
129
- """
130
- new_input_str = input_str.strip()
131
- if new_input_str and (not re.search(r"[A-Z]", new_input_str[1:])): # Others not contain upper letter
132
- input_str = self.__upper_or_lower_first_letter(input_str, "lower") # Lower
133
- return input_str
134
-
135
- def __upper_first_letter_and_others_not_contain_uppers(self, input_str: str) -> str:
136
- """Upper.
137
-
138
- Input: about; a; $about; ABOUT; abOUT
139
- Output: About; A; $about; ABOUT; abOUT
140
- """
141
- new_input_str = input_str.strip()
142
- if new_input_str.lower() in SKIP_WORD_IN_CITATION_KEY:
143
- return new_input_str.lower()
144
-
145
- if new_input_str and (not re.search(r"[A-Z]", new_input_str[1:])): # Others not contain upper letter
146
- input_str = self.__upper_or_lower_first_letter(input_str, "upper") # upper
147
- return input_str
148
-
149
- def __generate_new_case_title(self, old_title: str, flag: str) -> str:
150
- """Generate new title."""
151
- old_list, new_list = re.split(r"\s+", old_title), []
152
- for i in range(len(old_list)):
153
- old_str = old_list[i]
154
- if re.search(r"-", old_str):
155
- temp_list, new_temp_list = re.split("-", old_str), []
156
- if i == 0: # for the first element
157
- new_temp_list = [self.__upper_or_lower_first_letter(temp_list[0], "upper")]
158
- temp_list = temp_list[1:]
159
- for t in temp_list:
160
- if len(t.strip()) == 1:
161
- new_temp_list.append(t) # not change
162
- else:
163
- if flag == "sentence":
164
- new_temp_list.append(self.__lower_first_letter_and_others_not_contain_uppers(t))
165
- elif flag == "title":
166
- new_temp_list.append(self.__upper_first_letter_and_others_not_contain_uppers(t))
167
- else:
168
- pass
169
- new_list.append("-".join(new_temp_list))
170
- else:
171
- if i == 0:
172
- new_list.append(self.__upper_or_lower_first_letter(old_str, "upper"))
173
- else:
174
- if flag == "sentence":
175
- new_list.append(self.__lower_first_letter_and_others_not_contain_uppers(old_str))
176
- elif flag == "title":
177
- new_list.append(self.__upper_first_letter_and_others_not_contain_uppers(old_str))
178
- else:
179
- pass
180
- return " ".join(new_list)
181
-
182
- def _generate_standard_title(self, title_content: str, flag: str) -> str:
183
- title_list, relative_flags = [title_content], []
184
- flags = [r":\s+", r"\s+-\s+", r"\s+--\s+", r"\s+—\s+", r"\s+——\s+", r"\s+–\s+", r"\s+––\s+"]
185
- for i in range(len(flags)):
186
- new_title_list = []
187
- for j in range(len(title_list)):
188
- temp_list = re.split(flags[i], title_list[j])
189
- new_title_list.extend(temp_list)
190
- if len(temp_list) > 1:
191
- relative_flags[j:j] = [flags[i].replace(r"\s+", " ") for _ in range(len(temp_list) - 1)]
192
- title_list = new_title_list
193
-
194
- new_title = ""
195
- title_list = [self.__generate_new_case_title(t.strip(), flag) for t in title_list]
196
- for i in range(ll := len(title_list)):
197
- new_title = new_title + title_list[i]
198
- if i < (ll - 1):
199
- new_title = new_title + relative_flags[i]
200
- return new_title
201
-
202
- # --------- --------- --------- --------- --------- --------- --------- --------- --------- #
203
- def generate_standard_sentence_case(self, title_content: str) -> str:
204
- """Generate standard title.
205
-
206
- "Hello, world".upper() # HELLO WORLD
207
- "HELLO, WORLD".lower() # hello world
208
- "hello, world".capitalize() # Hello, world
209
- "hello, world".title() # Hello, World
210
- """
211
- return self._generate_standard_title(title_content, "sentence")
212
-
213
- # --------- --------- --------- --------- --------- --------- --------- --------- --------- #
214
- def generate_standard_title_case(self, title_content: str) -> str:
215
- """Generate standard title.
216
-
217
- "Hello, world".upper() # HELLO WORLD
218
- "HELLO, WORLD".lower() # hello world
219
- "hello, world".capitalize() # Hello, world
220
- "hello, world".title() # Hello, World
221
- """
222
- return self._generate_standard_title(title_content, "title")
@@ -1,34 +0,0 @@
1
- from typing import List, Optional
2
-
3
- from ...library import Library
4
- from ...model import Block, Entry
5
- from ..middleware import BlockMiddleware
6
-
7
-
8
- class DeleteFieldsInEntry(BlockMiddleware):
9
- """Delete fields by user."""
10
-
11
- def __init__(
12
- self, delete_field_keys: List[str], entry_type: Optional[str] = None, allow_inplace_modification: bool = True
13
- ):
14
- super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
15
-
16
- self.entry_type = entry_type
17
- self.delete_field_keys = delete_field_keys
18
-
19
- # docstr-coverage: inherited
20
- def transform_entry(self, entry: Entry, library: Library) -> Block:
21
- if self.entry_type is None:
22
- entry_type = entry.entry_type
23
- else:
24
- entry_type = self.entry_type
25
-
26
- if entry_type == entry.entry_type:
27
- for key in self.delete_field_keys:
28
- del entry[key]
29
- return entry
30
-
31
- # docstr-coverage: inherited
32
- @classmethod
33
- def metadata_key(cls) -> str:
34
- return "delete_custom_fields"
@@ -1,33 +0,0 @@
1
- import copy
2
- from typing import List
3
-
4
- from ...library import Library
5
- from ...model import Block, Entry
6
- from ..middleware import BlockMiddleware
7
-
8
-
9
- class KeepFieldsInEntry(BlockMiddleware):
10
- """Keep the fields of an entry according to a custom list of field keys provided by user."""
11
-
12
- def __init__(self, entry_type: str, keep_field_keys: List[str], allow_inplace_modification: bool = True):
13
- super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
14
-
15
- self.entry_type = entry_type
16
- self.keep_field_keys = keep_field_keys
17
-
18
- # docstr-coverage: inherited
19
- def transform_entry(self, entry: Entry, library: Library) -> Block:
20
- if self.entry_type == entry.entry_type:
21
- keep_field_keys = copy.deepcopy(self.keep_field_keys)
22
- if ("editor" in entry) and ("author" not in entry) and ("author" in self.keep_field_keys):
23
- keep_field_keys.append("editor")
24
-
25
- delete_field_keys = [k for k in entry.fields_dict.keys() if k not in keep_field_keys]
26
- for key in delete_field_keys:
27
- del entry[key]
28
- return entry
29
-
30
- # docstr-coverage: inherited
31
- @classmethod
32
- def metadata_key(cls) -> str:
33
- return "keep_fields_custom"
@@ -1,70 +0,0 @@
1
- from typing import Tuple
2
-
3
- from ...library import Library
4
- from ...model import Block, Entry
5
- from ..middleware import BlockMiddleware
6
-
7
-
8
- class SortFieldsAlphabeticallyMiddleware(BlockMiddleware):
9
- """Sort the fields of an entry alphabetically by key."""
10
-
11
- def __init__(self, allow_inplace_modification: bool = True):
12
- super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
13
-
14
- # docstr-coverage: inherited
15
- def transform_entry(self, entry: Entry, library: Library) -> Block:
16
- entry.fields = sorted(entry.fields, key=lambda f: f.key)
17
- entry.parser_metadata[self.metadata_key()] = True
18
- return entry
19
-
20
- # docstr-coverage: inherited
21
- @classmethod
22
- def metadata_key(cls) -> str:
23
- return "sorted_fields_alphabetically"
24
-
25
-
26
- class SortFieldsCustomMiddleware(BlockMiddleware):
27
- """Sort the fields of an entry according to a custom order provided by user.
28
-
29
- The order is a list of field keys. Fields not in the list are put at the end.
30
- """
31
-
32
- def __init__(
33
- self,
34
- order: Tuple[str, ...],
35
- case_sensitive: bool = False,
36
- allow_inplace_modification: bool = True,
37
- ):
38
- super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
39
- self._case_sensitive = case_sensitive
40
- if not case_sensitive:
41
- self._order = [x.lower() for x in order]
42
- else:
43
- self._order = order
44
-
45
- if len(self._order) != len(set(self._order)):
46
- duplicate_keys = {x for x in self._order if self._order.count(x) > 1}
47
- raise ValueError(
48
- "Order list must not contain duplicates. "
49
- "The following keys are duplicated: "
50
- f"{', '.join(duplicate_keys)}"
51
- )
52
-
53
- # docstr-coverage: inherited
54
- def transform_entry(self, entry: Entry, library: Library) -> Block:
55
- def _sort_key(field):
56
- try:
57
- key = field.key.lower() if not self._case_sensitive else field.key
58
- return self._order.index(key)
59
- except ValueError:
60
- # If the field is not in the order list, put it at the end
61
- return len(self._order)
62
-
63
- entry.fields = sorted(entry.fields, key=_sort_key)
64
- entry.parser_metadata[self.metadata_key()] = self._order
65
- return entry
66
-
67
- # docstr-coverage: inherited
68
- @classmethod
69
- def metadata_key(cls) -> str:
70
- return "sorted_fields_custom"
@@ -1,15 +0,0 @@
1
- from ...library import Library
2
- from ...model import Block, Entry
3
- from ..middleware import BlockMiddleware
4
-
5
-
6
- class NormalizeEntryTypes(BlockMiddleware):
7
- """Normalize Entry types."""
8
-
9
- def __init__(self, allow_inplace_modification: bool = True):
10
- super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
11
-
12
- # docstr-coverage: inherited
13
- def transform_entry(self, entry: Entry, library: Library) -> Block:
14
- entry.entry_type = entry.entry_type.lower()
15
- return entry
@@ -1,113 +0,0 @@
1
- import re
2
-
3
- from ...library import Library
4
- from ...model import Block, Entry
5
- from ..middleware import BlockMiddleware
6
-
7
-
8
- class AbbreviateJournalBooktitle(BlockMiddleware):
9
- """Abbreviate the field `journal` or `booktitle` value of an entry."""
10
-
11
- def __init__(
12
- self,
13
- full_abbr_article_dict: dict,
14
- full_abbr_inproceedings_dict: dict,
15
- abbr_index_article_for_abbr: int,
16
- abbr_index_inproceedings_for_abbr: int,
17
- full_names_in_json: str,
18
- abbr_names_in_json: str,
19
- allow_inplace_modification: bool = True
20
- ):
21
- super().__init__(allow_inplace_modification=allow_inplace_modification)
22
-
23
- self.full_abbr_article_dict = full_abbr_article_dict
24
- self.full_abbr_inproceedings_dict = full_abbr_inproceedings_dict
25
- self.abbr_index_article_for_abbr = abbr_index_article_for_abbr
26
- self.abbr_index_inproceedings_for_abbr = abbr_index_inproceedings_for_abbr
27
- self.full_names_in_json = full_names_in_json
28
- self.abbr_names_in_json = abbr_names_in_json
29
-
30
- # docstr-coverage: inherited
31
- def transform_entry(self, entry: Entry, library: Library) -> Block:
32
- return self.abbreviate_journal_booktitle(entry)
33
-
34
- def abbreviate_journal_booktitle(self, entry: Entry) -> Entry:
35
- """Abbreviate."""
36
- if entry.entry_type.lower() == "article":
37
- full_abbr_dict = self.full_abbr_article_dict
38
- field_key = "journal"
39
- abbr_index = self.abbr_index_article_for_abbr
40
- elif entry.entry_type.lower() == "inproceedings":
41
- full_abbr_dict = self.full_abbr_inproceedings_dict
42
- field_key = "booktitle"
43
- abbr_index = self.abbr_index_inproceedings_for_abbr
44
- else:
45
- return entry
46
-
47
- if abbr_index not in [1, 2]:
48
- return entry
49
-
50
- # Case 1
51
- if abbr_index == 2:
52
- regex = re.compile(r"([a-zA-Z])_([\w\-]+)_(.*)")
53
- if mch := regex.search(entry.key):
54
- if mch.group(1).lower() in ["j", "c"]:
55
- entry[field_key] = mch.group(2)
56
- return entry
57
-
58
- # Case 2
59
- # obtain new_dict
60
- abbr_dict_dict = {}
61
- for publisher in full_abbr_dict:
62
- abbr_dict_dict.update({abbr: full_abbr_dict[publisher][abbr] for abbr in full_abbr_dict[publisher]})
63
-
64
- field_content = entry[field_key] if field_key in entry else ""
65
- field_content = re.sub(r"\(.*\)", "", field_content).strip()
66
-
67
- if not field_content:
68
- return entry
69
-
70
- # match
71
- content_list = []
72
- for abbr in abbr_dict_dict:
73
- full_name_list = abbr_dict_dict[abbr].get(self.full_names_in_json, [])
74
- long_abbr_name_list = abbr_dict_dict[abbr].get(self.abbr_names_in_json, [])
75
-
76
- # long abbreviation
77
- if abbr_index == 1:
78
- for full, long_abbr in zip(full_name_list, long_abbr_name_list):
79
- if re.match('{' + full + '}', '{' + field_content + '}', re.I):
80
- content_list.append(long_abbr)
81
-
82
- # short abbreviation
83
- elif abbr_index == 2:
84
- full_abbr = []
85
- full_abbr.extend(full_name_list)
86
- full_abbr.extend(long_abbr_name_list)
87
-
88
- if re.match("{" + rf'({"|".join(full_abbr)})' + "}", "{" + field_content + "}", flags=re.I):
89
- content_list.append(abbr)
90
-
91
- # check
92
- content_list = list(set(content_list))
93
- if len(content_list) > 1:
94
- print(f"Multiple match: {content_list} for {field_content}.")
95
- elif len(content_list) == 1:
96
- entry[field_key] = content_list[0]
97
- return entry
98
-
99
-
100
- class DeleteRedundantInJournalBooktitle(BlockMiddleware):
101
- """Delete redundant part such as `(CEC)` in field `journal` or `booktitle` value of an entry."""
102
-
103
- def __init__(self, allow_inplace_modification: bool = True):
104
- super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
105
-
106
- # docstr-coverage: inherited
107
- def transform_entry(self, entry: Entry, library: Library) -> Block:
108
- if entry.entry_type.lower() in ["article", "inproceedings"]:
109
- for i in ["journal", "booktitle"]:
110
- value = entry[i] if i in entry else ""
111
- if value:
112
- entry[i] = re.sub(r"\(.*\)", "", value).strip()
113
- return entry
@@ -1,34 +0,0 @@
1
- from pyadvtools.core.convert import convert_str_month_to_number_month
2
-
3
- from ...library import Library
4
- from ...model import Block, Entry
5
- from ..middleware import BlockMiddleware
6
-
7
-
8
- class ConvertStrMonthToInt(BlockMiddleware):
9
- """Convert the field `month` value of an entry when it is str to int type if possible."""
10
-
11
- def __init__(self, allow_inplace_modification: bool = True):
12
- super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
13
-
14
- # docstr-coverage: inherited
15
- def transform_entry(self, entry: Entry, library: Library) -> Block:
16
- if "month" in entry:
17
- entry["month"] = convert_str_month_to_number_month(entry["month"])
18
- return entry
19
-
20
-
21
- class ExtractYear(BlockMiddleware):
22
- """Convert the field `month` value of an entry when it is str to int type if possible."""
23
-
24
- def __init__(self, allow_inplace_modification: bool = True):
25
- super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
26
-
27
- # docstr-coverage: inherited
28
- def transform_entry(self, entry: Entry, library: Library) -> Block:
29
- year = entry["year"] if "year" in entry else ""
30
- if year:
31
- year_list = [i for j in year.split("/") for i in j.split("-")]
32
- year_list = sorted(set(year_list), key=len, reverse=True)
33
- entry["year"] = f"{year_list[0]}"
34
- return entry
@@ -1,21 +0,0 @@
1
- from ...library import Library
2
- from ...model import Block, Entry
3
- from ..middleware import BlockMiddleware
4
-
5
-
6
- class ConvertStrNumberVolumeToInt(BlockMiddleware):
7
- """Convert the field `number` or `volume` value of an entry when it is str to int type if possible."""
8
-
9
- def __init__(self, allow_inplace_modification: bool = True):
10
- super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
11
-
12
- # docstr-coverage: inherited
13
- def transform_entry(self, entry: Entry, library: Library) -> Block:
14
- for i in ["number", "volume"]:
15
- value = entry[i] if i in entry else ""
16
- if value:
17
- try:
18
- entry[i] = f"{int(value)}"
19
- except ValueError:
20
- pass
21
- return entry
@@ -1,28 +0,0 @@
1
- from ...library import Library
2
- from ...model import Block, Entry
3
- from ..middleware import BlockMiddleware
4
-
5
-
6
- class NormalizePagesInEntry(BlockMiddleware):
7
- """Normalize field `pages` of an entry by deleting redundant part or generating when not existed."""
8
-
9
- def __init__(self, allow_inplace_modification: bool = True):
10
- super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
11
-
12
- # docstr-coverage: inherited
13
- def transform_entry(self, entry: Entry, library: Library) -> Block:
14
- if "pages" in entry:
15
- # 5-10-5-10 -> 5-10
16
- page_list = []
17
- for page in entry["pages"].split("-"): # English hyphen
18
- for p in page.strip().split("—"): # Chinese hyphen
19
- if p.strip():
20
- page_list.append(p.strip())
21
-
22
- page_list = sorted(set(page_list), key=page_list.index)
23
- entry["pages"] = "-".join(page_list)
24
- else:
25
- # pages = {12:1-37}
26
- if "articleno" in entry and "numpages" in entry:
27
- entry["pages"] = f'{entry["articleno"]}:1-{entry["numpages"]}'
28
- return entry
@@ -1,20 +0,0 @@
1
- import re
2
-
3
- from ...library import Library
4
- from ...model import Block, Entry
5
- from ..middleware import BlockMiddleware
6
-
7
-
8
- class NormalizeTitleInEntry(BlockMiddleware):
9
- r"""Normalize field `title` of an entry by deleting \href{}{} if existed."""
10
-
11
- def __init__(self, allow_inplace_modification: bool = True):
12
- super().__init__(allow_inplace_modification=allow_inplace_modification, allow_parallel_execution=True)
13
-
14
- # docstr-coverage: inherited
15
- def transform_entry(self, entry: Entry, library: Library) -> Block:
16
- regex = re.compile(r"\\href{(.*)}{(.*)}")
17
- if "title" in entry:
18
- if mch := regex.search(entry["title"]):
19
- entry["title"] = mch.group(2)
20
- return entry